Revert "DEP: Updated Jemalloc to Version 2.5" - this version of the jemalloc-library is crashy at best, and should not have been pushed.

Further investigations on why this occurs is required before it will be slammed into master. This reverts commit 126fd13e5d6b57dc0c8830248d44db504c7d103f.
author: click <click@gonnamakeyou.com> 2012-04-23 20:23:30 +0200
committer: click <click@gonnamakeyou.com> 2012-04-23 20:23:30 +0200
commit: c4123289916daa7bd1c7feb191e8c647fd17b163 (patch)
tree: d6235b61f7ead417757273184acb27a252a96bc5 /dep/jemalloc
parent: 5da5021464c649d84c755a921eae43519eba8567 (diff)
34 files changed, 804 insertions, 2067 deletions
diff --git a/dep/jemalloc/VERSION b/dep/jemalloc/VERSION
index aa85f5a2acf..585f53edd80 100644
--- a/dep/jemalloc/VERSION
+++ b/dep/jemalloc/VERSION
@@ -1 +1 @@
-2.2.5-0-gfc1bb70e5f0d9a58b39efa39cc549b5af5104760
+2.1.0-0-g1c4b088b08d3bc7617a34387e196ce03716160bf
diff --git a/dep/jemalloc/include/jemalloc/internal/arena.h b/dep/jemalloc/include/jemalloc/internal/arena.h
index b80c118d811..9556c2c68f7 100644
--- a/dep/jemalloc/include/jemalloc/internal/arena.h
+++ b/dep/jemalloc/include/jemalloc/internal/arena.h
@@ -19,7 +19,6 @@
 #ifdef JEMALLOC_TINY
    /* Smallest size class to support. */
 #  define LG_TINY_MIN		LG_SIZEOF_PTR
-#  define TINY_MIN		(1U << LG_TINY_MIN)
 #endif
 
 /*
@@ -46,10 +45,9 @@
  * point is implicitly RUN_BFP bits to the left.
  *
  * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
- * honored for some/all object sizes, since when heap profiling is enabled
- * there is one pointer of header overhead per object (plus a constant).  This
- * constraint is relaxed (ignored) for runs that are so small that the
- * per-region overhead is greater than:
+ * honored for some/all object sizes, since there is one bit of header overhead
+ * per object (plus a constant).  This constraint is relaxed (ignored) for runs
+ * that are so small that the per-region overhead is greater than:
  *
  *   (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
  */
@@ -58,10 +56,6 @@
 #define	RUN_MAX_OVRHD		0x0000003dU
 #define	RUN_MAX_OVRHD_RELAX	0x00001800U
 
-/* Maximum number of regions in one run. */
-#define	LG_RUN_MAXREGS		11
-#define	RUN_MAXREGS		(1U << LG_RUN_MAXREGS)
-
 /*
  * The minimum ratio of active:dirty pages per arena is computed as:
  *
@@ -75,7 +69,6 @@
 typedef struct arena_chunk_map_s arena_chunk_map_t;
 typedef struct arena_chunk_s arena_chunk_t;
 typedef struct arena_run_s arena_run_t;
-typedef struct arena_bin_info_s arena_bin_info_t;
 typedef struct arena_bin_s arena_bin_t;
 typedef struct arena_s arena_t;
 
@@ -112,7 +105,7 @@ struct arena_chunk_map_s {
 	 * Run address (or size) and various flags are stored together.  The bit
 	 * layout looks like (assuming 32-bit system):
 	 *
-	 *   ???????? ???????? ????---- ----dula
+	 *   ???????? ???????? ????---- ----dzla
 	 *
 	 * ? : Unallocated: Run address for first/last pages, unset for internal
 	 *                  pages.
@@ -120,7 +113,7 @@ struct arena_chunk_map_s {
 	 *     Large: Run size for first page, unset for trailing pages.
 	 * - : Unused.
 	 * d : dirty?
-	 * u : unzeroed?
+	 * z : zeroed?
 	 * l : large?
 	 * a : allocated?
 	 *
@@ -136,30 +129,30 @@ struct arena_chunk_map_s {
 	 * [dula] : bit unset
 	 *
 	 *   Unallocated (clean):
-	 *     ssssssss ssssssss ssss---- ----du-a
+	 *     ssssssss ssssssss ssss---- ----du--
 	 *     xxxxxxxx xxxxxxxx xxxx---- -----Uxx
-	 *     ssssssss ssssssss ssss---- ----dU-a
+	 *     ssssssss ssssssss ssss---- ----dU--
 	 *
 	 *   Unallocated (dirty):
-	 *     ssssssss ssssssss ssss---- ----D--a
+	 *     ssssssss ssssssss ssss---- ----D---
 	 *     xxxxxxxx xxxxxxxx xxxx---- ----xxxx
-	 *     ssssssss ssssssss ssss---- ----D--a
+	 *     ssssssss ssssssss ssss---- ----D---
 	 *
 	 *   Small:
-	 *     pppppppp pppppppp pppp---- ----d--A
-	 *     pppppppp pppppppp pppp---- -------A
-	 *     pppppppp pppppppp pppp---- ----d--A
+	 *     pppppppp pppppppp pppp---- ----d--a
+	 *     pppppppp pppppppp pppp---- -------a
+	 *     pppppppp pppppppp pppp---- ----d--a
 	 *
 	 *   Large:
-	 *     ssssssss ssssssss ssss---- ----D-LA
+	 *     ssssssss ssssssss ssss---- ----D-la
 	 *     xxxxxxxx xxxxxxxx xxxx---- ----xxxx
-	 *     -------- -------- -------- ----D-LA
+	 *     -------- -------- -------- ----D-la
 	 *
 	 *   Large (sampled, size <= PAGE_SIZE):
-	 *     ssssssss ssssssss sssscccc ccccD-LA
+	 *     ssssssss ssssssss sssscccc ccccD-la
 	 *
 	 *   Large (not sampled, size == PAGE_SIZE):
-	 *     ssssssss ssssssss ssss---- ----D-LA
+	 *     ssssssss ssssssss ssss---- ----D-la
 	 */
 	size_t				bits;
 #ifdef JEMALLOC_PROF
@@ -213,52 +206,16 @@ struct arena_run_s {
 	/* Bin this run is associated with. */
 	arena_bin_t	*bin;
 
-	/* Index of next region that has never been allocated, or nregs. */
-	uint32_t	nextind;
+	/* Stack of available freed regions, or NULL. */
+	void		*avail;
+
+	/* Next region that has never been allocated, or run boundary. */
+	void		*next;
 
 	/* Number of free regions in run. */
 	unsigned	nfree;
 };
 
-/*
- * Read-only information associated with each element of arena_t's bins array
- * is stored separately, partly to reduce memory usage (only one copy, rather
- * than one per arena), but mainly to avoid false cacheline sharing.
- */
-struct arena_bin_info_s {
-	/* Size of regions in a run for this bin's size class. */
-	size_t		reg_size;
-
-	/* Total size of a run for this bin's size class. */
-	size_t		run_size;
-
-	/* Total number of regions in a run for this bin's size class. */
-	uint32_t	nregs;
-
-	/*
-	 * Offset of first bitmap_t element in a run header for this bin's size
-	 * class.
-	 */
-	uint32_t	bitmap_offset;
-
-	/*
-	 * Metadata used to manipulate bitmaps for runs associated with this
-	 * bin.
-	 */
-	bitmap_info_t	bitmap_info;
-
-#ifdef JEMALLOC_PROF
-	/*
-	 * Offset of first (prof_ctx_t *) in a run header for this bin's size
-	 * class, or 0 if (opt_prof == false).
-	 */
-	uint32_t	ctx0_offset;
-#endif
-
-	/* Offset of first region in a run for this bin's size class. */
-	uint32_t	reg0_offset;
-};
-
 struct arena_bin_s {
 	/*
 	 * All operations on runcur, runs, and stats require that lock be
@@ -283,6 +240,26 @@ struct arena_bin_s {
 	 */
 	arena_run_tree_t runs;
 
+	/* Size of regions in a run for this bin's size class. */
+	size_t		reg_size;
+
+	/* Total size of a run for this bin's size class. */
+	size_t		run_size;
+
+	/* Total number of regions in a run for this bin's size class. */
+	uint32_t	nregs;
+
+#ifdef JEMALLOC_PROF
+	/*
+	 * Offset of first (prof_ctx_t *) in a run header for this bin's size
+	 * class, or 0 if (opt_prof == false).
+	 */
+	uint32_t	ctx0_offset;
+#endif
+
+	/* Offset of first region in a run for this bin's size class. */
+	uint32_t	reg0_offset;
+
 #ifdef JEMALLOC_STATS
 	/* Bin statistics. */
 	malloc_bin_stats_t stats;
@@ -299,18 +276,8 @@ struct arena_s {
 	unsigned		ind;
 
 	/*
-	 * Number of threads currently assigned to this arena.  This field is
-	 * protected by arenas_lock.
-	 */
-	unsigned		nthreads;
-
-	/*
-	 * There are three classes of arena operations from a locking
-	 * perspective:
-	 * 1) Thread asssignment (modifies nthreads) is protected by
-	 *    arenas_lock.
-	 * 2) Bin-related operations are protected by bin locks.
-	 * 3) Chunk- and run-related operations are protected by this mutex.
+	 * All non-bin-related operations on this arena require that lock be
+	 * locked.
 	 */
 	malloc_mutex_t		lock;
 
@@ -380,35 +347,45 @@ struct arena_s {
 
 	/*
 	 * bins is used to store trees of free regions of the following sizes,
-	 * assuming a 64-bit system with 16-byte quantum, 4 KiB page size, and
-	 * default MALLOC_CONF.
+	 * assuming a 16-byte quantum, 4 KiB page size, and default
+	 * JEMALLOC_OPTIONS.
 	 *
 	 *   bins[i] |   size |
 	 *   --------+--------+
-	 *        0  |      8 |
+	 *        0  |      2 |
+	 *        1  |      4 |
+	 *        2  |      8 |
 	 *   --------+--------+
-	 *        1  |     16 |
-	 *        2  |     32 |
-	 *        3  |     48 |
+	 *        3  |     16 |
+	 *        4  |     32 |
+	 *        5  |     48 |
 	 *           :        :
-	 *        6  |     96 |
-	 *        7  |    112 |
-	 *        8  |    128 |
+	 *        8  |     96 |
+	 *        9  |    112 |
+	 *       10  |    128 |
 	 *   --------+--------+
-	 *        9  |    192 |
-	 *       10  |    256 |
-	 *       11  |    320 |
-	 *       12  |    384 |
-	 *       13  |    448 |
-	 *       14  |    512 |
+	 *       11  |    192 |
+	 *       12  |    256 |
+	 *       13  |    320 |
+	 *       14  |    384 |
+	 *       15  |    448 |
+	 *       16  |    512 |
+	 *   --------+--------+
+	 *       17  |    768 |
+	 *       18  |   1024 |
+	 *       19  |   1280 |
+	 *           :        :
+	 *       27  |   3328 |
+	 *       28  |   3584 |
+	 *       29  |   3840 |
 	 *   --------+--------+
-	 *       15  |    768 |
-	 *       16  |   1024 |
-	 *       17  |   1280 |
+	 *       30  |  4 KiB |
+	 *       31  |  6 KiB |
+	 *       33  |  8 KiB |
 	 *           :        :
-	 *       25  |   3328 |
-	 *       26  |   3584 |
-	 *       27  |   3840 |
+	 *       43  | 28 KiB |
+	 *       44  | 30 KiB |
+	 *       45  | 32 KiB |
 	 *   --------+--------+
 	 */
 	arena_bin_t		bins[1]; /* Dynamically sized. */
@@ -420,16 +397,8 @@ struct arena_s {
 
 extern size_t	opt_lg_qspace_max;
 extern size_t	opt_lg_cspace_max;
-extern ssize_t	opt_lg_dirty_mult;
-/*
- * small_size2bin is a compact lookup table that rounds request sizes up to
- * size classes.  In order to reduce cache footprint, the table is compressed,
- * and all accesses are via the SMALL_SIZE2BIN macro.
- */
+extern ssize_t		opt_lg_dirty_mult;
 extern uint8_t const	*small_size2bin;
-#define	SMALL_SIZE2BIN(s)	(small_size2bin[(s-1) >> LG_TINY_MIN])
-
-extern arena_bin_info_t	*arena_bin_info;
 
 /* Various bin-related settings. */
 #ifdef JEMALLOC_TINY		/* Number of (2^n)-spaced tiny bins. */
@@ -496,9 +465,8 @@ bool	arena_boot(void);
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
-size_t	arena_bin_index(arena_t *arena, arena_bin_t *bin);
-unsigned	arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
-    const void *ptr);
+unsigned	arena_run_regind(arena_run_t *run, arena_bin_t *bin,
+    const void *ptr, size_t size);
 #  ifdef JEMALLOC_PROF
 prof_ctx_t	*arena_prof_ctx_get(const void *ptr);
 void	arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
@@ -507,37 +475,21 @@ void	arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
-JEMALLOC_INLINE size_t
-arena_bin_index(arena_t *arena, arena_bin_t *bin)
-{
-	size_t binind = bin - arena->bins;
-	assert(binind < nbins);
-	return (binind);
-}
-
 JEMALLOC_INLINE unsigned
-arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
+arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr,
+    size_t size)
 {
 	unsigned shift, diff, regind;
-	size_t size;
 
-	dassert(run->magic == ARENA_RUN_MAGIC);
-	/*
-	 * Freeing a pointer lower than region zero can cause assertion
-	 * failure.
-	 */
-	assert((uintptr_t)ptr >= (uintptr_t)run +
-	    (uintptr_t)bin_info->reg0_offset);
+	assert(run->magic == ARENA_RUN_MAGIC);
 
 	/*
 	 * Avoid doing division with a variable divisor if possible.  Using
 	 * actual division here can reduce allocator throughput by over 20%!
 	 */
-	diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run -
-	    bin_info->reg0_offset);
+	diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
 
 	/* Rescale (factor powers of 2 out of the numerator and denominator). */
-	size = bin_info->reg_size;
 	shift = ffs(size) - 1;
 	diff >>= shift;
 	size >>= shift;
@@ -560,8 +512,8 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
 		 * divide by 0, and 1 and 2 are both powers of two, which are
 		 * handled above.
 		 */
-#define	SIZE_INV_SHIFT	((sizeof(unsigned) << 3) - LG_RUN_MAXREGS)
-#define	SIZE_INV(s)	(((1U << SIZE_INV_SHIFT) / (s)) + 1)
+#define	SIZE_INV_SHIFT 21
+#define	SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1)
 		static const unsigned size_invs[] = {
 		    SIZE_INV(3),
 		    SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
@@ -581,7 +533,7 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
 #undef SIZE_INV_SHIFT
 	}
 	assert(diff == regind * size);
-	assert(regind < bin_info->nregs);
+	assert(regind < bin->nregs);
 
 	return (regind);
 }
@@ -608,14 +560,13 @@ arena_prof_ctx_get(const void *ptr)
 			arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
 			    (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
 			    PAGE_SHIFT));
-			size_t binind = arena_bin_index(chunk->arena, run->bin);
-			arena_bin_info_t *bin_info = &arena_bin_info[binind];
+			arena_bin_t *bin = run->bin;
 			unsigned regind;
 
-			dassert(run->magic == ARENA_RUN_MAGIC);
-			regind = arena_run_regind(run, bin_info, ptr);
+			assert(run->magic == ARENA_RUN_MAGIC);
+			regind = arena_run_regind(run, bin, ptr, bin->reg_size);
 			ret = *(prof_ctx_t **)((uintptr_t)run +
-			    bin_info->ctx0_offset + (regind *
+			    bin->ctx0_offset + (regind *
 			    sizeof(prof_ctx_t *)));
 		}
 	} else
@@ -643,16 +594,12 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
 			    (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
 			    PAGE_SHIFT));
 			arena_bin_t *bin = run->bin;
-			size_t binind;
-			arena_bin_info_t *bin_info;
 			unsigned regind;
 
-			dassert(run->magic == ARENA_RUN_MAGIC);
-			binind = arena_bin_index(chunk->arena, bin);
-			bin_info = &arena_bin_info[binind];
-			regind = arena_run_regind(run, bin_info, ptr);
+			assert(run->magic == ARENA_RUN_MAGIC);
+			regind = arena_run_regind(run, bin, ptr, bin->reg_size);
 
-			*((prof_ctx_t **)((uintptr_t)run + bin_info->ctx0_offset
+			*((prof_ctx_t **)((uintptr_t)run + bin->ctx0_offset
 			    + (regind * sizeof(prof_ctx_t *)))) = ctx;
 		} else
 			assert((uintptr_t)ctx == (uintptr_t)1U);
@@ -668,7 +615,7 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
 	arena_chunk_map_t *mapelm;
 
 	assert(arena != NULL);
-	dassert(arena->magic == ARENA_MAGIC);
+	assert(arena->magic == ARENA_MAGIC);
 	assert(chunk->arena == arena);
 	assert(ptr != NULL);
 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
@@ -691,18 +638,11 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
 			run = (arena_run_t *)((uintptr_t)chunk +
 			    (uintptr_t)((pageind - (mapelm->bits >>
 			    PAGE_SHIFT)) << PAGE_SHIFT));
-			dassert(run->magic == ARENA_RUN_MAGIC);
+			assert(run->magic == ARENA_RUN_MAGIC);
+			assert(((uintptr_t)ptr - ((uintptr_t)run +
+			    (uintptr_t)run->bin->reg0_offset)) %
+			    run->bin->reg_size == 0);
 			bin = run->bin;
-#ifdef JEMALLOC_DEBUG
-			{
-				size_t binind = arena_bin_index(arena, bin);
-				arena_bin_info_t *bin_info =
-				    &arena_bin_info[binind];
-				assert(((uintptr_t)ptr - ((uintptr_t)run +
-				    (uintptr_t)bin_info->reg0_offset)) %
-				    bin_info->reg_size == 0);
-			}
-#endif
 			malloc_mutex_lock(&bin->lock);
 			arena_dalloc_bin(arena, chunk, ptr, mapelm);
 			malloc_mutex_unlock(&bin->lock);
diff --git a/dep/jemalloc/include/jemalloc/internal/atomic.h b/dep/jemalloc/include/jemalloc/internal/atomic.h
deleted file mode 100644
index 9a298623f8a..00000000000
--- a/dep/jemalloc/include/jemalloc/internal/atomic.h
+++ /dev/null
@@ -1,169 +0,0 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-#define	atomic_read_uint64(p)	atomic_add_uint64(p, 0)
-#define	atomic_read_uint32(p)	atomic_add_uint32(p, 0)
-
-#if (LG_SIZEOF_PTR == 3)
-#  define atomic_read_z(p)						\
-    (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)0)
-#  define atomic_add_z(p, x)						\
-    (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x)
-#  define atomic_sub_z(p, x)						\
-    (size_t)atomic_sub_uint64((uint64_t *)p, (uint64_t)x)
-#elif (LG_SIZEOF_PTR == 2)
-#  define atomic_read_z(p)						\
-    (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)0)
-#  define atomic_add_z(p, x)						\
-    (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x)
-#  define atomic_sub_z(p, x)						\
-    (size_t)atomic_sub_uint32((uint32_t *)p, (uint32_t)x)
-#endif
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#ifndef JEMALLOC_ENABLE_INLINE
-uint64_t	atomic_add_uint64(uint64_t *p, uint64_t x);
-uint64_t	atomic_sub_uint64(uint64_t *p, uint64_t x);
-uint32_t	atomic_add_uint32(uint32_t *p, uint32_t x);
-uint32_t	atomic_sub_uint32(uint32_t *p, uint32_t x);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_))
-/******************************************************************************/
-/* 64-bit operations. */
-#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
-JEMALLOC_INLINE uint64_t
-atomic_add_uint64(uint64_t *p, uint64_t x)
-{
-
-	return (__sync_add_and_fetch(p, x));
-}
-
-JEMALLOC_INLINE uint64_t
-atomic_sub_uint64(uint64_t *p, uint64_t x)
-{
-
-	return (__sync_sub_and_fetch(p, x));
-}
-#elif (defined(JEMALLOC_OSATOMIC))
-JEMALLOC_INLINE uint64_t
-atomic_add_uint64(uint64_t *p, uint64_t x)
-{
-
-	return (OSAtomicAdd64((int64_t)x, (int64_t *)p));
-}
-
-JEMALLOC_INLINE uint64_t
-atomic_sub_uint64(uint64_t *p, uint64_t x)
-{
-
-	return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p));
-}
-#elif (defined(__amd64_) || defined(__x86_64__))
-JEMALLOC_INLINE uint64_t
-atomic_add_uint64(uint64_t *p, uint64_t x)
-{
-
-	asm volatile (
-	    "lock; xaddq %0, %1;"
-	    : "+r" (x), "=m" (*p) /* Outputs. */
-	    : "m" (*p) /* Inputs. */
-	    );
-
-	return (x);
-}
-
-JEMALLOC_INLINE uint64_t
-atomic_sub_uint64(uint64_t *p, uint64_t x)
-{
-
-	x = (uint64_t)(-(int64_t)x);
-	asm volatile (
-	    "lock; xaddq %0, %1;"
-	    : "+r" (x), "=m" (*p) /* Outputs. */
-	    : "m" (*p) /* Inputs. */
-	    );
-
-	return (x);
-}
-#else
-#  if (LG_SIZEOF_PTR == 3)
-#    error "Missing implementation for 64-bit atomic operations"
-#  endif
-#endif
-
-/******************************************************************************/
-/* 32-bit operations. */
-#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4
-JEMALLOC_INLINE uint32_t
-atomic_add_uint32(uint32_t *p, uint32_t x)
-{
-
-	return (__sync_add_and_fetch(p, x));
-}
-
-JEMALLOC_INLINE uint32_t
-atomic_sub_uint32(uint32_t *p, uint32_t x)
-{
-
-	return (__sync_sub_and_fetch(p, x));
-}
-#elif (defined(JEMALLOC_OSATOMIC))
-JEMALLOC_INLINE uint32_t
-atomic_add_uint32(uint32_t *p, uint32_t x)
-{
-
-	return (OSAtomicAdd32((int32_t)x, (int32_t *)p));
-}
-
-JEMALLOC_INLINE uint32_t
-atomic_sub_uint32(uint32_t *p, uint32_t x)
-{
-
-	return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p));
-}
-#elif (defined(__i386__) || defined(__amd64_) || defined(__x86_64__))
-JEMALLOC_INLINE uint32_t
-atomic_add_uint32(uint32_t *p, uint32_t x)
-{
-
-	asm volatile (
-	    "lock; xaddl %0, %1;"
-	    : "+r" (x), "=m" (*p) /* Outputs. */
-	    : "m" (*p) /* Inputs. */
-	    );
-
-	return (x);
-}
-
-JEMALLOC_INLINE uint32_t
-atomic_sub_uint32(uint32_t *p, uint32_t x)
-{
-
-	x = (uint32_t)(-(int32_t)x);
-	asm volatile (
-	    "lock; xaddl %0, %1;"
-	    : "+r" (x), "=m" (*p) /* Outputs. */
-	    : "m" (*p) /* Inputs. */
-	    );
-
-	return (x);
-}
-#else
-#  error "Missing implementation for 32-bit atomic operations"
-#endif
-#endif
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
diff --git a/dep/jemalloc/include/jemalloc/internal/bitmap.h b/dep/jemalloc/include/jemalloc/internal/bitmap.h
deleted file mode 100644
index 605ebac58c1..00000000000
--- a/dep/jemalloc/include/jemalloc/internal/bitmap.h
+++ /dev/null
@@ -1,184 +0,0 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */
-#define	LG_BITMAP_MAXBITS	LG_RUN_MAXREGS
-
-typedef struct bitmap_level_s bitmap_level_t;
-typedef struct bitmap_info_s bitmap_info_t;
-typedef unsigned long bitmap_t;
-#define	LG_SIZEOF_BITMAP	LG_SIZEOF_LONG
-
-/* Number of bits per group. */
-#define	LG_BITMAP_GROUP_NBITS		(LG_SIZEOF_BITMAP + 3)
-#define	BITMAP_GROUP_NBITS		(ZU(1) << LG_BITMAP_GROUP_NBITS)
-#define	BITMAP_GROUP_NBITS_MASK		(BITMAP_GROUP_NBITS-1)
-
-/* Maximum number of levels possible. */
-#define	BITMAP_MAX_LEVELS						\
-    (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP)				\
-    + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP)
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-struct bitmap_level_s {
-	/* Offset of this level's groups within the array of groups. */
-	size_t group_offset;
-};
-
-struct bitmap_info_s {
-	/* Logical number of bits in bitmap (stored at bottom level). */
-	size_t nbits;
-
-	/* Number of levels necessary for nbits. */
-	unsigned nlevels;
-
-	/*
-	 * Only the first (nlevels+1) elements are used, and levels are ordered
-	 * bottom to top (e.g. the bottom level is stored in levels[0]).
-	 */
-	bitmap_level_t levels[BITMAP_MAX_LEVELS+1];
-};
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-void	bitmap_info_init(bitmap_info_t *binfo, size_t nbits);
-size_t	bitmap_info_ngroups(const bitmap_info_t *binfo);
-size_t	bitmap_size(size_t nbits);
-void	bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#ifndef JEMALLOC_ENABLE_INLINE
-bool	bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo);
-bool	bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
-void	bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
-size_t	bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo);
-void	bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BITMAP_C_))
-JEMALLOC_INLINE bool
-bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo)
-{
-	unsigned rgoff = binfo->levels[binfo->nlevels].group_offset - 1;
-	bitmap_t rg = bitmap[rgoff];
-	/* The bitmap is full iff the root group is 0. */
-	return (rg == 0);
-}
-
-JEMALLOC_INLINE bool
-bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
-{
-	size_t goff;
-	bitmap_t g;
-
-	assert(bit < binfo->nbits);
-	goff = bit >> LG_BITMAP_GROUP_NBITS;
-	g = bitmap[goff];
-	return (!(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))));
-}
-
-JEMALLOC_INLINE void
-bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
-{
-	size_t goff;
-	bitmap_t *gp;
-	bitmap_t g;
-
-	assert(bit < binfo->nbits);
-	assert(bitmap_get(bitmap, binfo, bit) == false);
-	goff = bit >> LG_BITMAP_GROUP_NBITS;
-	gp = &bitmap[goff];
-	g = *gp;
-	assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)));
-	g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
-	*gp = g;
-	assert(bitmap_get(bitmap, binfo, bit));
-	/* Propagate group state transitions up the tree. */
-	if (g == 0) {
-		unsigned i;
-		for (i = 1; i < binfo->nlevels; i++) {
-			bit = goff;
-			goff = bit >> LG_BITMAP_GROUP_NBITS;
-			gp = &bitmap[binfo->levels[i].group_offset + goff];
-			g = *gp;
-			assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)));
-			g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
-			*gp = g;
-			if (g != 0)
-				break;
-		}
-	}
-}
-
-/* sfu: set first unset. */
-JEMALLOC_INLINE size_t
-bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo)
-{
-	size_t bit;
-	bitmap_t g;
-	unsigned i;
-
-	assert(bitmap_full(bitmap, binfo) == false);
-
-	i = binfo->nlevels - 1;
-	g = bitmap[binfo->levels[i].group_offset];
-	bit = ffsl(g) - 1;
-	while (i > 0) {
-		i--;
-		g = bitmap[binfo->levels[i].group_offset + bit];
-		bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffsl(g) - 1);
-	}
-
-	bitmap_set(bitmap, binfo, bit);
-	return (bit);
-}
-
-JEMALLOC_INLINE void
-bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
-{
-	size_t goff;
-	bitmap_t *gp;
-	bitmap_t g;
-	bool propagate;
-
-	assert(bit < binfo->nbits);
-	assert(bitmap_get(bitmap, binfo, bit));
-	goff = bit >> LG_BITMAP_GROUP_NBITS;
-	gp = &bitmap[goff];
-	g = *gp;
-	propagate = (g == 0);
-	assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) == 0);
-	g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
-	*gp = g;
-	assert(bitmap_get(bitmap, binfo, bit) == false);
-	/* Propagate group state transitions up the tree. */
-	if (propagate) {
-		unsigned i;
-		for (i = 1; i < binfo->nlevels; i++) {
-			bit = goff;
-			goff = bit >> LG_BITMAP_GROUP_NBITS;
-			gp = &bitmap[binfo->levels[i].group_offset + goff];
-			g = *gp;
-			propagate = (g == 0);
-			assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)))
-			    == 0);
-			g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
-			*gp = g;
-			if (propagate == false)
-				break;
-		}
-	}
-}
-
-#endif
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
diff --git a/dep/jemalloc/include/jemalloc/internal/chunk.h b/dep/jemalloc/include/jemalloc/internal/chunk.h
index 54b6a3ec886..a60f0ad7498 100644
--- a/dep/jemalloc/include/jemalloc/internal/chunk.h
+++ b/dep/jemalloc/include/jemalloc/internal/chunk.h
@@ -50,7 +50,7 @@ extern size_t		map_bias; /* Number of arena chunk header pages. */
 extern size_t		arena_maxclass; /* Max size class for arenas. */
 
 void	*chunk_alloc(size_t size, bool base, bool *zero);
-void	chunk_dealloc(void *chunk, size_t size, bool unmap);
+void	chunk_dealloc(void *chunk, size_t size);
 bool	chunk_boot(void);
 
 #endif /* JEMALLOC_H_EXTERNS */
diff --git a/dep/jemalloc/include/jemalloc/internal/ckh.h b/dep/jemalloc/include/jemalloc/internal/ckh.h
index 3e4ad4c85f9..d4e391b6360 100644
--- a/dep/jemalloc/include/jemalloc/internal/ckh.h
+++ b/dep/jemalloc/include/jemalloc/internal/ckh.h
@@ -31,7 +31,7 @@ struct ckhc_s {
 
 struct ckh_s {
 #ifdef JEMALLOC_DEBUG
-#define	CKH_MAGIC	0x3af2489d
+#define	CKH_MAGIG	0x3af2489d
 	uint32_t	magic;
 #endif
 
diff --git a/dep/jemalloc/include/jemalloc/internal/ctl.h b/dep/jemalloc/include/jemalloc/internal/ctl.h
index f1f5eb70a2a..8776ad135a7 100644
--- a/dep/jemalloc/include/jemalloc/internal/ctl.h
+++ b/dep/jemalloc/include/jemalloc/internal/ctl.h
@@ -29,7 +29,6 @@ struct ctl_node_s {
 
 struct ctl_arena_stats_s {
 	bool			initialized;
-	unsigned		nthreads;
 	size_t			pactive;
 	size_t			pdirty;
 #ifdef JEMALLOC_STATS
diff --git a/dep/jemalloc/include/jemalloc/internal/hash.h b/dep/jemalloc/include/jemalloc/internal/hash.h
index 8a46ce30803..d12cdb8359f 100644
--- a/dep/jemalloc/include/jemalloc/internal/hash.h
+++ b/dep/jemalloc/include/jemalloc/internal/hash.h
@@ -17,7 +17,7 @@
 uint64_t	hash(const void *key, size_t len, uint64_t seed);
 #endif
 
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_HASH_C_))
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(HASH_C_))
 /*
  * The following hash function is based on MurmurHash64A(), placed into the
  * public domain by Austin Appleby.  See http://murmurhash.googlepages.com/ for
@@ -26,7 +26,7 @@ uint64_t	hash(const void *key, size_t len, uint64_t seed);
 JEMALLOC_INLINE uint64_t
 hash(const void *key, size_t len, uint64_t seed)
 {
-	const uint64_t m = 0xc6a4a7935bd1e995LLU;
+	const uint64_t m = 0xc6a4a7935bd1e995;
 	const int r = 47;
 	uint64_t h = seed ^ (len * m);
 	const uint64_t *data = (const uint64_t *)key;
@@ -62,7 +62,7 @@ hash(const void *key, size_t len, uint64_t seed)
 	h *= m;
 	h ^= h >> r;
 
-	return (h);
+	return h;
 }
 #endif
 
diff --git a/dep/jemalloc/include/jemalloc/internal/jemalloc_internal.h b/dep/jemalloc/include/jemalloc/internal/jemalloc_internal.h
index cd554bea1b9..611f0c665a1 100644
--- a/dep/jemalloc/include/jemalloc/internal/jemalloc_internal.h
+++ b/dep/jemalloc/include/jemalloc/internal/jemalloc_internal.h
@@ -33,12 +33,6 @@
 #define	JEMALLOC_MANGLE
 #include "../jemalloc.h"
 
-#include "jemalloc/internal/private_namespace.h"
-
-#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN))
-#include <libkern/OSAtomic.h>
-#endif
-
 #ifdef JEMALLOC_ZONE
 #include <mach/mach_error.h>
 #include <mach/mach_init.h>
@@ -61,9 +55,8 @@ extern void	(*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
  * Define a custom assert() in order to reduce the chances of deadlock during
  * assertion failure.
  */
-#ifndef assert
-#  ifdef JEMALLOC_DEBUG
-#    define assert(e) do {						\
+#ifdef JEMALLOC_DEBUG
+#  define assert(e) do {						\
 	if (!(e)) {							\
 		char line_buf[UMAX2S_BUFSIZE];				\
 		malloc_write("<jemalloc>: ");				\
@@ -77,15 +70,8 @@ extern void	(*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
 		abort();						\
 	}								\
 } while (0)
-#  else
-#    define assert(e)
-#  endif
-#endif
-
-#ifdef JEMALLOC_DEBUG
-#  define dassert(e) assert(e)
 #else
-#  define dassert(e)
+#define assert(e)
 #endif
 
 /*
@@ -160,19 +146,12 @@ extern void	(*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
 #define	QUANTUM_CEILING(a)						\
 	(((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
 
-#define	LONG			((size_t)(1U << LG_SIZEOF_LONG))
-#define	LONG_MASK		(LONG - 1)
-
-/* Return the smallest long multiple that is >= a. */
-#define	LONG_CEILING(a)						\
-	(((a) + LONG_MASK) & ~LONG_MASK)
-
 #define	SIZEOF_PTR		(1U << LG_SIZEOF_PTR)
-#define	PTR_MASK		(SIZEOF_PTR - 1)
 
-/* Return the smallest (void *) multiple that is >= a. */
-#define	PTR_CEILING(a)						\
-	(((a) + PTR_MASK) & ~PTR_MASK)
+/* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
+#if (!defined(PIC) && !defined(NO_TLS))
+#  define NO_TLS
+#endif
 
 /*
  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing.
@@ -219,7 +198,6 @@ extern void	(*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
 #define	PAGE_CEILING(s)							\
 	(((s) + PAGE_MASK) & ~PAGE_MASK)
 
-#include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/jemprn.h"
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/stats.h"
@@ -228,7 +206,6 @@ extern void	(*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/arena.h"
-#include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/chunk.h"
 #include "jemalloc/internal/huge.h"
@@ -244,14 +221,12 @@ extern void	(*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
 /******************************************************************************/
 #define JEMALLOC_H_STRUCTS
 
-#include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/jemprn.h"
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mb.h"
-#include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/arena.h"
 #include "jemalloc/internal/base.h"
@@ -265,13 +240,6 @@ extern void	(*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
 #endif
 #include "jemalloc/internal/prof.h"
 
-#ifdef JEMALLOC_STATS
-typedef struct {
-	uint64_t	allocated;
-	uint64_t	deallocated;
-} thread_allocated_t;
-#endif
-
 #undef JEMALLOC_H_STRUCTS
 /******************************************************************************/
 #define JEMALLOC_H_EXTERNS
@@ -301,7 +269,6 @@ extern size_t		lg_pagesize;
 extern unsigned		ncpus;
 
 extern malloc_mutex_t	arenas_lock; /* Protects arenas initialization. */
-extern pthread_key_t	arenas_tsd;
 #ifndef NO_TLS
 /*
  * Map of pthread_self() --> arenas[???], used for selecting an arena to use
@@ -311,9 +278,9 @@ extern __thread arena_t	*arenas_tls JEMALLOC_ATTR(tls_model("initial-exec"));
 #  define ARENA_GET()	arenas_tls
 #  define ARENA_SET(v)	do {						\
 	arenas_tls = (v);						\
-	pthread_setspecific(arenas_tsd, (void *)(v));			\
 } while (0)
 #else
+extern pthread_key_t	arenas_tsd;
 #  define ARENA_GET()	((arena_t *)pthread_getspecific(arenas_tsd))
 #  define ARENA_SET(v)	do {						\
 	pthread_setspecific(arenas_tsd, (void *)(v));			\
@@ -328,28 +295,45 @@ extern arena_t		**arenas;
 extern unsigned		narenas;
 
 #ifdef JEMALLOC_STATS
+typedef struct {
+	uint64_t	allocated;
+	uint64_t	deallocated;
+} thread_allocated_t;
 #  ifndef NO_TLS
 extern __thread thread_allocated_t	thread_allocated_tls;
-#    define ALLOCATED_GET() (thread_allocated_tls.allocated)
-#    define ALLOCATEDP_GET() (&thread_allocated_tls.allocated)
-#    define DEALLOCATED_GET() (thread_allocated_tls.deallocated)
-#    define DEALLOCATEDP_GET() (&thread_allocated_tls.deallocated)
+#    define ALLOCATED_GET() thread_allocated_tls.allocated
+#    define DEALLOCATED_GET() thread_allocated_tls.deallocated
 #    define ALLOCATED_ADD(a, d) do {					\
 	thread_allocated_tls.allocated += a;				\
 	thread_allocated_tls.deallocated += d;				\
 } while (0)
 #  else
 extern pthread_key_t	thread_allocated_tsd;
-thread_allocated_t	*thread_allocated_get_hard(void);
-
-#    define ALLOCATED_GET() (thread_allocated_get()->allocated)
-#    define ALLOCATEDP_GET() (&thread_allocated_get()->allocated)
-#    define DEALLOCATED_GET() (thread_allocated_get()->deallocated)
-#    define DEALLOCATEDP_GET() (&thread_allocated_get()->deallocated)
+#    define ALLOCATED_GET()						\
+	(uint64_t)((pthread_getspecific(thread_allocated_tsd) != NULL)	\
+	    ? ((thread_allocated_t *)					\
+	    pthread_getspecific(thread_allocated_tsd))->allocated : 0)
+#    define DEALLOCATED_GET()						\
+	(uint64_t)((pthread_getspecific(thread_allocated_tsd) != NULL)	\
+	    ? ((thread_allocated_t					\
+	    *)pthread_getspecific(thread_allocated_tsd))->deallocated :	\
+	    0)
 #    define ALLOCATED_ADD(a, d) do {					\
-	thread_allocated_t *thread_allocated = thread_allocated_get();	\
-	thread_allocated->allocated += (a);				\
-	thread_allocated->deallocated += (d);				\
+	thread_allocated_t *thread_allocated = (thread_allocated_t *)	\
+	    pthread_getspecific(thread_allocated_tsd);			\
+	if (thread_allocated != NULL) {					\
+		thread_allocated->allocated += (a);			\
+		thread_allocated->deallocated += (d);			\
+	} else {							\
+		thread_allocated = (thread_allocated_t *)		\
+		    imalloc(sizeof(thread_allocated_t));		\
+		if (thread_allocated != NULL) {				\
+			pthread_setspecific(thread_allocated_tsd,	\
+			    thread_allocated);				\
+			thread_allocated->allocated = (a);		\
+			thread_allocated->deallocated = (d);		\
+		}							\
+	}								\
 } while (0)
 #  endif
 #endif
@@ -360,14 +344,12 @@ int	buferror(int errnum, char *buf, size_t buflen);
 void	jemalloc_prefork(void);
 void	jemalloc_postfork(void);
 
-#include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/jemprn.h"
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mb.h"
-#include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/arena.h"
 #include "jemalloc/internal/base.h"
@@ -385,7 +367,6 @@ void	jemalloc_postfork(void);
 /******************************************************************************/
 #define JEMALLOC_H_INLINES
 
-#include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/jemprn.h"
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/stats.h"
@@ -403,9 +384,6 @@ size_t	s2u(size_t size);
 size_t	sa2u(size_t size, size_t alignment, size_t *run_size_p);
 void	malloc_write(const char *s);
 arena_t	*choose_arena(void);
-#  if (defined(JEMALLOC_STATS) && defined(NO_TLS))
-thread_allocated_t	*thread_allocated_get(void);
-#  endif
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
@@ -436,10 +414,10 @@ s2u(size_t size)
 {
 
 	if (size <= small_maxclass)
-		return (arena_bin_info[SMALL_SIZE2BIN(size)].reg_size);
+		return arenas[0]->bins[small_size2bin[size]].reg_size;
 	if (size <= arena_maxclass)
-		return (PAGE_CEILING(size));
-	return (CHUNK_CEILING(size));
+		return PAGE_CEILING(size);
+	return CHUNK_CEILING(size);
 }
 
 /*
@@ -480,8 +458,10 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p)
 	}
 
 	if (usize <= arena_maxclass && alignment <= PAGE_SIZE) {
-		if (usize <= small_maxclass)
-			return (arena_bin_info[SMALL_SIZE2BIN(usize)].reg_size);
+		if (usize <= small_maxclass) {
+			return
+			    (arenas[0]->bins[small_size2bin[usize]].reg_size);
+		}
 		return (PAGE_CEILING(usize));
 	} else {
 		size_t run_size;
@@ -564,22 +544,8 @@ choose_arena(void)
 
 	return (ret);
 }
-
-#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
-JEMALLOC_INLINE thread_allocated_t *
-thread_allocated_get(void)
-{
-	thread_allocated_t *thread_allocated = (thread_allocated_t *)
-	    pthread_getspecific(thread_allocated_tsd);
-
-	if (thread_allocated == NULL)
-		return (thread_allocated_get_hard());
-	return (thread_allocated);
-}
-#endif
 #endif
 
-#include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/tcache.h"
 #include "jemalloc/internal/arena.h"
@@ -591,7 +557,7 @@ thread_allocated_get(void)
 #ifndef JEMALLOC_ENABLE_INLINE
 void	*imalloc(size_t size);
 void	*icalloc(size_t size);
-void	*ipalloc(size_t usize, size_t alignment, bool zero);
+void	*ipalloc(size_t size, size_t alignment, bool zero);
 size_t	isalloc(const void *ptr);
 #  ifdef JEMALLOC_IVSALLOC
 size_t	ivsalloc(const void *ptr);
@@ -625,39 +591,28 @@ icalloc(size_t size)
 }
 
 JEMALLOC_INLINE void *
-ipalloc(size_t usize, size_t alignment, bool zero)
+ipalloc(size_t size, size_t alignment, bool zero)
 {
 	void *ret;
+	size_t usize;
+	size_t run_size
+#  ifdef JEMALLOC_CC_SILENCE
+	    = 0
+#  endif
+	    ;
 
-	assert(usize != 0);
-	assert(usize == sa2u(usize, alignment, NULL));
-
+	usize = sa2u(size, alignment, &run_size);
+	if (usize == 0)
+		return (NULL);
 	if (usize <= arena_maxclass && alignment <= PAGE_SIZE)
 		ret = arena_malloc(usize, zero);
-	else {
-		size_t run_size
-#ifdef JEMALLOC_CC_SILENCE
-		    = 0
-#endif
-		    ;
-
-		/*
-		 * Ideally we would only ever call sa2u() once per aligned
-		 * allocation request, and the caller of this function has
-		 * already done so once.  However, it's rather burdensome to
-		 * require every caller to pass in run_size, especially given
-		 * that it's only relevant to large allocations.  Therefore,
-		 * just call it again here in order to get run_size.
-		 */
-		sa2u(usize, alignment, &run_size);
-		if (run_size <= arena_maxclass) {
-			ret = arena_palloc(choose_arena(), usize, run_size,
-			    alignment, zero);
-		} else if (alignment <= chunksize)
-			ret = huge_malloc(usize, zero);
-		else
-			ret = huge_palloc(usize, alignment, zero);
-	}
+	else if (run_size <= arena_maxclass) {
+		ret = arena_palloc(choose_arena(), usize, run_size, alignment,
+		    zero);
+	} else if (alignment <= chunksize)
+		ret = huge_malloc(usize, zero);
+	else
+		ret = huge_palloc(usize, alignment, zero);
 
 	assert(((uintptr_t)ret & (alignment - 1)) == 0);
 	return (ret);
@@ -674,7 +629,7 @@ isalloc(const void *ptr)
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	if (chunk != ptr) {
 		/* Region. */
-		dassert(chunk->arena->magic == ARENA_MAGIC);
+		assert(chunk->arena->magic == ARENA_MAGIC);
 
 #ifdef JEMALLOC_PROF
 		ret = arena_salloc_demote(ptr);
@@ -728,7 +683,7 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero,
 
 	if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
 	    != 0) {
-		size_t usize, copysize;
+		size_t copysize;
 
 		/*
 		 * Existing object alignment is inadquate; allocate new space
@@ -736,18 +691,12 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero,
 		 */
 		if (no_move)
 			return (NULL);
-		usize = sa2u(size + extra, alignment, NULL);
-		if (usize == 0)
-			return (NULL);
-		ret = ipalloc(usize, alignment, zero);
+		ret = ipalloc(size + extra, alignment, zero);
 		if (ret == NULL) {
 			if (extra == 0)
 				return (NULL);
 			/* Try again, without extra this time. */
-			usize = sa2u(size, alignment, NULL);
-			if (usize == 0)
-				return (NULL);
-			ret = ipalloc(usize, alignment, zero);
+			ret = ipalloc(size, alignment, zero);
 			if (ret == NULL)
 				return (NULL);
 		}
diff --git a/dep/jemalloc/include/jemalloc/internal/mb.h b/dep/jemalloc/include/jemalloc/internal/mb.h
index dc9f2a54262..1707aa91d68 100644
--- a/dep/jemalloc/include/jemalloc/internal/mb.h
+++ b/dep/jemalloc/include/jemalloc/internal/mb.h
@@ -17,7 +17,7 @@
 void	mb_write(void);
 #endif
 
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MB_C_))
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(MB_C_))
 #ifdef __i386__
 /*
  * According to the Intel Architecture Software Developer's Manual, current
diff --git a/dep/jemalloc/include/jemalloc/internal/mutex.h b/dep/jemalloc/include/jemalloc/internal/mutex.h
index 62947ced55e..dcca01edd5d 100644
--- a/dep/jemalloc/include/jemalloc/internal/mutex.h
+++ b/dep/jemalloc/include/jemalloc/internal/mutex.h
@@ -1,11 +1,7 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_TYPES
 
-#ifdef JEMALLOC_OSSPIN
-typedef OSSpinLock malloc_mutex_t;
-#else
 typedef pthread_mutex_t malloc_mutex_t;
-#endif
 
 #ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
 #  define MALLOC_MUTEX_INITIALIZER PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
@@ -45,26 +41,17 @@ JEMALLOC_INLINE void
 malloc_mutex_lock(malloc_mutex_t *mutex)
 {
 
-	if (isthreaded) {
-#ifdef JEMALLOC_OSSPIN
-		OSSpinLockLock(mutex);
-#else
+	if (isthreaded)
 		pthread_mutex_lock(mutex);
-#endif
-	}
 }
 
 JEMALLOC_INLINE bool
 malloc_mutex_trylock(malloc_mutex_t *mutex)
 {
 
-	if (isthreaded) {
-#ifdef JEMALLOC_OSSPIN
-		return (OSSpinLockTry(mutex) == false);
-#else
+	if (isthreaded)
 		return (pthread_mutex_trylock(mutex) != 0);
-#endif
-	} else
+	else
 		return (false);
 }
 
@@ -72,13 +59,8 @@ JEMALLOC_INLINE void
 malloc_mutex_unlock(malloc_mutex_t *mutex)
 {
 
-	if (isthreaded) {
-#ifdef JEMALLOC_OSSPIN
-		OSSpinLockUnlock(mutex);
-#else
+	if (isthreaded)
 		pthread_mutex_unlock(mutex);
-#endif
-	}
 }
 #endif
 
diff --git a/dep/jemalloc/include/jemalloc/internal/private_namespace.h b/dep/jemalloc/include/jemalloc/internal/private_namespace.h
deleted file mode 100644
index d4f5f96d7b2..00000000000
--- a/dep/jemalloc/include/jemalloc/internal/private_namespace.h
+++ /dev/null
@@ -1,195 +0,0 @@
-#define	arena_bin_index JEMALLOC_N(arena_bin_index)
-#define	arena_boot JEMALLOC_N(arena_boot)
-#define	arena_dalloc JEMALLOC_N(arena_dalloc)
-#define	arena_dalloc_bin JEMALLOC_N(arena_dalloc_bin)
-#define	arena_dalloc_large JEMALLOC_N(arena_dalloc_large)
-#define	arena_malloc JEMALLOC_N(arena_malloc)
-#define	arena_malloc_large JEMALLOC_N(arena_malloc_large)
-#define	arena_malloc_small JEMALLOC_N(arena_malloc_small)
-#define	arena_new JEMALLOC_N(arena_new)
-#define	arena_palloc JEMALLOC_N(arena_palloc)
-#define	arena_prof_accum JEMALLOC_N(arena_prof_accum)
-#define	arena_prof_ctx_get JEMALLOC_N(arena_prof_ctx_get)
-#define	arena_prof_ctx_set JEMALLOC_N(arena_prof_ctx_set)
-#define	arena_prof_promoted JEMALLOC_N(arena_prof_promoted)
-#define	arena_purge_all JEMALLOC_N(arena_purge_all)
-#define	arena_ralloc JEMALLOC_N(arena_ralloc)
-#define	arena_ralloc_no_move JEMALLOC_N(arena_ralloc_no_move)
-#define	arena_run_regind JEMALLOC_N(arena_run_regind)
-#define	arena_salloc JEMALLOC_N(arena_salloc)
-#define	arena_salloc_demote JEMALLOC_N(arena_salloc_demote)
-#define	arena_stats_merge JEMALLOC_N(arena_stats_merge)
-#define	arena_tcache_fill_small JEMALLOC_N(arena_tcache_fill_small)
-#define	arenas_bin_i_index JEMALLOC_N(arenas_bin_i_index)
-#define	arenas_extend JEMALLOC_N(arenas_extend)
-#define	arenas_lrun_i_index JEMALLOC_N(arenas_lrun_i_index)
-#define	atomic_add_uint32 JEMALLOC_N(atomic_add_uint32)
-#define	atomic_add_uint64 JEMALLOC_N(atomic_add_uint64)
-#define	atomic_sub_uint32 JEMALLOC_N(atomic_sub_uint32)
-#define	atomic_sub_uint64 JEMALLOC_N(atomic_sub_uint64)
-#define	base_alloc JEMALLOC_N(base_alloc)
-#define	base_boot JEMALLOC_N(base_boot)
-#define	base_node_alloc JEMALLOC_N(base_node_alloc)
-#define	base_node_dealloc JEMALLOC_N(base_node_dealloc)
-#define	bitmap_full JEMALLOC_N(bitmap_full)
-#define	bitmap_get JEMALLOC_N(bitmap_get)
-#define	bitmap_info_init JEMALLOC_N(bitmap_info_init)
-#define	bitmap_info_ngroups JEMALLOC_N(bitmap_info_ngroups)
-#define	bitmap_init JEMALLOC_N(bitmap_init)
-#define	bitmap_set JEMALLOC_N(bitmap_set)
-#define	bitmap_sfu JEMALLOC_N(bitmap_sfu)
-#define	bitmap_size JEMALLOC_N(bitmap_size)
-#define	bitmap_unset JEMALLOC_N(bitmap_unset)
-#define	bt_init JEMALLOC_N(bt_init)
-#define	buferror JEMALLOC_N(buferror)
-#define	choose_arena JEMALLOC_N(choose_arena)
-#define	choose_arena_hard JEMALLOC_N(choose_arena_hard)
-#define	chunk_alloc JEMALLOC_N(chunk_alloc)
-#define	chunk_alloc_dss JEMALLOC_N(chunk_alloc_dss)
-#define	chunk_alloc_mmap JEMALLOC_N(chunk_alloc_mmap)
-#define	chunk_alloc_mmap_noreserve JEMALLOC_N(chunk_alloc_mmap_noreserve)
-#define	chunk_alloc_swap JEMALLOC_N(chunk_alloc_swap)
-#define	chunk_boot JEMALLOC_N(chunk_boot)
-#define	chunk_dealloc JEMALLOC_N(chunk_dealloc)
-#define	chunk_dealloc_dss JEMALLOC_N(chunk_dealloc_dss)
-#define	chunk_dealloc_mmap JEMALLOC_N(chunk_dealloc_mmap)
-#define	chunk_dealloc_swap JEMALLOC_N(chunk_dealloc_swap)
-#define	chunk_dss_boot JEMALLOC_N(chunk_dss_boot)
-#define	chunk_in_dss JEMALLOC_N(chunk_in_dss)
-#define	chunk_in_swap JEMALLOC_N(chunk_in_swap)
-#define	chunk_mmap_boot JEMALLOC_N(chunk_mmap_boot)
-#define	chunk_swap_boot JEMALLOC_N(chunk_swap_boot)
-#define	chunk_swap_enable JEMALLOC_N(chunk_swap_enable)
-#define	ckh_bucket_search JEMALLOC_N(ckh_bucket_search)
-#define	ckh_count JEMALLOC_N(ckh_count)
-#define	ckh_delete JEMALLOC_N(ckh_delete)
-#define	ckh_evict_reloc_insert JEMALLOC_N(ckh_evict_reloc_insert)
-#define	ckh_insert JEMALLOC_N(ckh_insert)
-#define	ckh_isearch JEMALLOC_N(ckh_isearch)
-#define	ckh_iter JEMALLOC_N(ckh_iter)
-#define	ckh_new JEMALLOC_N(ckh_new)
-#define	ckh_pointer_hash JEMALLOC_N(ckh_pointer_hash)
-#define	ckh_pointer_keycomp JEMALLOC_N(ckh_pointer_keycomp)
-#define	ckh_rebuild JEMALLOC_N(ckh_rebuild)
-#define	ckh_remove JEMALLOC_N(ckh_remove)
-#define	ckh_search JEMALLOC_N(ckh_search)
-#define	ckh_string_hash JEMALLOC_N(ckh_string_hash)
-#define	ckh_string_keycomp JEMALLOC_N(ckh_string_keycomp)
-#define	ckh_try_bucket_insert JEMALLOC_N(ckh_try_bucket_insert)
-#define	ckh_try_insert JEMALLOC_N(ckh_try_insert)
-#define	create_zone JEMALLOC_N(create_zone)
-#define	ctl_boot JEMALLOC_N(ctl_boot)
-#define	ctl_bymib JEMALLOC_N(ctl_bymib)
-#define	ctl_byname JEMALLOC_N(ctl_byname)
-#define	ctl_nametomib JEMALLOC_N(ctl_nametomib)
-#define	extent_tree_ad_first JEMALLOC_N(extent_tree_ad_first)
-#define	extent_tree_ad_insert JEMALLOC_N(extent_tree_ad_insert)
-#define	extent_tree_ad_iter JEMALLOC_N(extent_tree_ad_iter)
-#define	extent_tree_ad_iter_recurse JEMALLOC_N(extent_tree_ad_iter_recurse)
-#define	extent_tree_ad_iter_start JEMALLOC_N(extent_tree_ad_iter_start)
-#define	extent_tree_ad_last JEMALLOC_N(extent_tree_ad_last)
-#define	extent_tree_ad_new JEMALLOC_N(extent_tree_ad_new)
-#define	extent_tree_ad_next JEMALLOC_N(extent_tree_ad_next)
-#define	extent_tree_ad_nsearch JEMALLOC_N(extent_tree_ad_nsearch)
-#define	extent_tree_ad_prev JEMALLOC_N(extent_tree_ad_prev)
-#define	extent_tree_ad_psearch JEMALLOC_N(extent_tree_ad_psearch)
-#define	extent_tree_ad_remove JEMALLOC_N(extent_tree_ad_remove)
-#define	extent_tree_ad_reverse_iter JEMALLOC_N(extent_tree_ad_reverse_iter)
-#define	extent_tree_ad_reverse_iter_recurse JEMALLOC_N(extent_tree_ad_reverse_iter_recurse)
-#define	extent_tree_ad_reverse_iter_start JEMALLOC_N(extent_tree_ad_reverse_iter_start)
-#define	extent_tree_ad_search JEMALLOC_N(extent_tree_ad_search)
-#define	extent_tree_szad_first JEMALLOC_N(extent_tree_szad_first)
-#define	extent_tree_szad_insert JEMALLOC_N(extent_tree_szad_insert)
-#define	extent_tree_szad_iter JEMALLOC_N(extent_tree_szad_iter)
-#define	extent_tree_szad_iter_recurse JEMALLOC_N(extent_tree_szad_iter_recurse)
-#define	extent_tree_szad_iter_start JEMALLOC_N(extent_tree_szad_iter_start)
-#define	extent_tree_szad_last JEMALLOC_N(extent_tree_szad_last)
-#define	extent_tree_szad_new JEMALLOC_N(extent_tree_szad_new)
-#define	extent_tree_szad_next JEMALLOC_N(extent_tree_szad_next)
-#define	extent_tree_szad_nsearch JEMALLOC_N(extent_tree_szad_nsearch)
-#define	extent_tree_szad_prev JEMALLOC_N(extent_tree_szad_prev)
-#define	extent_tree_szad_psearch JEMALLOC_N(extent_tree_szad_psearch)
-#define	extent_tree_szad_remove JEMALLOC_N(extent_tree_szad_remove)
-#define	extent_tree_szad_reverse_iter JEMALLOC_N(extent_tree_szad_reverse_iter)
-#define	extent_tree_szad_reverse_iter_recurse JEMALLOC_N(extent_tree_szad_reverse_iter_recurse)
-#define	extent_tree_szad_reverse_iter_start JEMALLOC_N(extent_tree_szad_reverse_iter_start)
-#define	extent_tree_szad_search JEMALLOC_N(extent_tree_szad_search)
-#define	hash JEMALLOC_N(hash)
-#define	huge_boot JEMALLOC_N(huge_boot)
-#define	huge_dalloc JEMALLOC_N(huge_dalloc)
-#define	huge_malloc JEMALLOC_N(huge_malloc)
-#define	huge_palloc JEMALLOC_N(huge_palloc)
-#define	huge_prof_ctx_get JEMALLOC_N(huge_prof_ctx_get)
-#define	huge_prof_ctx_set JEMALLOC_N(huge_prof_ctx_set)
-#define	huge_ralloc JEMALLOC_N(huge_ralloc)
-#define	huge_ralloc_no_move JEMALLOC_N(huge_ralloc_no_move)
-#define	huge_salloc JEMALLOC_N(huge_salloc)
-#define	iallocm JEMALLOC_N(iallocm)
-#define	icalloc JEMALLOC_N(icalloc)
-#define	idalloc JEMALLOC_N(idalloc)
-#define	imalloc JEMALLOC_N(imalloc)
-#define	ipalloc JEMALLOC_N(ipalloc)
-#define	iralloc JEMALLOC_N(iralloc)
-#define	isalloc JEMALLOC_N(isalloc)
-#define	ivsalloc JEMALLOC_N(ivsalloc)
-#define	jemalloc_darwin_init JEMALLOC_N(jemalloc_darwin_init)
-#define	jemalloc_postfork JEMALLOC_N(jemalloc_postfork)
-#define	jemalloc_prefork JEMALLOC_N(jemalloc_prefork)
-#define	malloc_cprintf JEMALLOC_N(malloc_cprintf)
-#define	malloc_mutex_destroy JEMALLOC_N(malloc_mutex_destroy)
-#define	malloc_mutex_init JEMALLOC_N(malloc_mutex_init)
-#define	malloc_mutex_lock JEMALLOC_N(malloc_mutex_lock)
-#define	malloc_mutex_trylock JEMALLOC_N(malloc_mutex_trylock)
-#define	malloc_mutex_unlock JEMALLOC_N(malloc_mutex_unlock)
-#define	malloc_printf JEMALLOC_N(malloc_printf)
-#define	malloc_write JEMALLOC_N(malloc_write)
-#define	mb_write JEMALLOC_N(mb_write)
-#define	pow2_ceil JEMALLOC_N(pow2_ceil)
-#define	prof_backtrace JEMALLOC_N(prof_backtrace)
-#define	prof_boot0 JEMALLOC_N(prof_boot0)
-#define	prof_boot1 JEMALLOC_N(prof_boot1)
-#define	prof_boot2 JEMALLOC_N(prof_boot2)
-#define	prof_ctx_get JEMALLOC_N(prof_ctx_get)
-#define	prof_ctx_set JEMALLOC_N(prof_ctx_set)
-#define	prof_free JEMALLOC_N(prof_free)
-#define	prof_gdump JEMALLOC_N(prof_gdump)
-#define	prof_idump JEMALLOC_N(prof_idump)
-#define	prof_lookup JEMALLOC_N(prof_lookup)
-#define	prof_malloc JEMALLOC_N(prof_malloc)
-#define	prof_mdump JEMALLOC_N(prof_mdump)
-#define	prof_realloc JEMALLOC_N(prof_realloc)
-#define	prof_sample_accum_update JEMALLOC_N(prof_sample_accum_update)
-#define	prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update)
-#define	prof_tdata_init JEMALLOC_N(prof_tdata_init)
-#define	pthread_create JEMALLOC_N(pthread_create)
-#define	rtree_get JEMALLOC_N(rtree_get)
-#define	rtree_get_locked JEMALLOC_N(rtree_get_locked)
-#define	rtree_new JEMALLOC_N(rtree_new)
-#define	rtree_set JEMALLOC_N(rtree_set)
-#define	s2u JEMALLOC_N(s2u)
-#define	sa2u JEMALLOC_N(sa2u)
-#define	stats_arenas_i_bins_j_index JEMALLOC_N(stats_arenas_i_bins_j_index)
-#define	stats_arenas_i_index JEMALLOC_N(stats_arenas_i_index)
-#define	stats_arenas_i_lruns_j_index JEMALLOC_N(stats_arenas_i_lruns_j_index)
-#define	stats_cactive_add JEMALLOC_N(stats_cactive_add)
-#define	stats_cactive_get JEMALLOC_N(stats_cactive_get)
-#define	stats_cactive_sub JEMALLOC_N(stats_cactive_sub)
-#define	stats_print JEMALLOC_N(stats_print)
-#define	szone2ozone JEMALLOC_N(szone2ozone)
-#define	tcache_alloc_easy JEMALLOC_N(tcache_alloc_easy)
-#define	tcache_alloc_large JEMALLOC_N(tcache_alloc_large)
-#define	tcache_alloc_small JEMALLOC_N(tcache_alloc_small)
-#define	tcache_alloc_small_hard JEMALLOC_N(tcache_alloc_small_hard)
-#define	tcache_bin_flush_large JEMALLOC_N(tcache_bin_flush_large)
-#define	tcache_bin_flush_small JEMALLOC_N(tcache_bin_flush_small)
-#define	tcache_boot JEMALLOC_N(tcache_boot)
-#define	tcache_create JEMALLOC_N(tcache_create)
-#define	tcache_dalloc_large JEMALLOC_N(tcache_dalloc_large)
-#define	tcache_dalloc_small JEMALLOC_N(tcache_dalloc_small)
-#define	tcache_destroy JEMALLOC_N(tcache_destroy)
-#define	tcache_event JEMALLOC_N(tcache_event)
-#define	tcache_get JEMALLOC_N(tcache_get)
-#define	tcache_stats_merge JEMALLOC_N(tcache_stats_merge)
-#define	thread_allocated_get JEMALLOC_N(thread_allocated_get)
-#define	thread_allocated_get_hard JEMALLOC_N(thread_allocated_get_hard)
-#define	u2s JEMALLOC_N(u2s)
diff --git a/dep/jemalloc/include/jemalloc/internal/prof.h b/dep/jemalloc/include/jemalloc/internal/prof.h
index e9064ba6e73..7864000b88b 100644
--- a/dep/jemalloc/include/jemalloc/internal/prof.h
+++ b/dep/jemalloc/include/jemalloc/internal/prof.h
@@ -227,60 +227,9 @@ bool	prof_boot2(void);
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
-#define	PROF_ALLOC_PREP(nignore, size, ret) do {			\
-	prof_tdata_t *prof_tdata;					\
-	prof_bt_t bt;							\
-									\
-	assert(size == s2u(size));					\
-									\
-	prof_tdata = PROF_TCACHE_GET();					\
-	if (prof_tdata == NULL) {					\
-		prof_tdata = prof_tdata_init();				\
-		if (prof_tdata == NULL) {				\
-			ret = NULL;					\
-			break;						\
-		}							\
-	}								\
-									\
-	if (opt_prof_active == false) {					\
-		/* Sampling is currently inactive, so avoid sampling. */\
-		ret = (prof_thr_cnt_t *)(uintptr_t)1U;			\
-	} else if (opt_lg_prof_sample == 0) {				\
-		/* Don't bother with sampling logic, since sampling   */\
-		/* interval is 1.                                     */\
-		bt_init(&bt, prof_tdata->vec);				\
-		prof_backtrace(&bt, nignore, prof_bt_max);		\
-		ret = prof_lookup(&bt);					\
-	} else {							\
-		if (prof_tdata->threshold == 0) {			\
-			/* Initialize.  Seed the prng differently for */\
-			/* each thread.                               */\
-			prof_tdata->prn_state =				\
-			    (uint64_t)(uintptr_t)&size;			\
-			prof_sample_threshold_update(prof_tdata);	\
-		}							\
-									\
-		/* Determine whether to capture a backtrace based on  */\
-		/* whether size is enough for prof_accum to reach     */\
-		/* prof_tdata->threshold.  However, delay updating    */\
-		/* these variables until prof_{m,re}alloc(), because  */\
-		/* we don't know for sure that the allocation will    */\
-		/* succeed.                                           */\
-		/*                                                    */\
-		/* Use subtraction rather than addition to avoid      */\
-		/* potential integer overflow.                        */\
-		if (size >= prof_tdata->threshold -			\
-		    prof_tdata->accum) {				\
-			bt_init(&bt, prof_tdata->vec);			\
-			prof_backtrace(&bt, nignore, prof_bt_max);	\
-			ret = prof_lookup(&bt);				\
-		} else							\
-			ret = (prof_thr_cnt_t *)(uintptr_t)1U;		\
-	}								\
-} while (0)
-
 #ifndef JEMALLOC_ENABLE_INLINE
 void	prof_sample_threshold_update(prof_tdata_t *prof_tdata);
+prof_thr_cnt_t	*prof_alloc_prep(size_t size);
 prof_ctx_t	*prof_ctx_get(const void *ptr);
 void	prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
 bool	prof_sample_accum_update(size_t size);
@@ -298,22 +247,8 @@ prof_sample_threshold_update(prof_tdata_t *prof_tdata)
 	double u;
 
 	/*
-	 * Compute sample threshold as a geometrically distributed random
+	 * Compute prof_sample_threshold as a geometrically distributed random
 	 * variable with mean (2^opt_lg_prof_sample).
-	 *
-	 *                         __        __
-	 *                         |  log(u)  |                     1
-	 * prof_tdata->threshold = | -------- |, where p = -------------------
-	 *                         | log(1-p) |             opt_lg_prof_sample
-	 *                                                 2
-	 *
-	 * For more information on the math, see:
-	 *
-	 *   Non-Uniform Random Variate Generation
-	 *   Luc Devroye
-	 *   Springer-Verlag, New York, 1986
-	 *   pp 500
-	 *   (http://cg.scs.carleton.ca/~luc/rnbookindex.html)
 	 */
 	prn64(r, 53, prof_tdata->prn_state,
 	    (uint64_t)6364136223846793005LLU, (uint64_t)1442695040888963407LLU);
@@ -323,6 +258,71 @@ prof_sample_threshold_update(prof_tdata_t *prof_tdata)
 	    + (uint64_t)1U;
 }
 
+JEMALLOC_INLINE prof_thr_cnt_t *
+prof_alloc_prep(size_t size)
+{
+#ifdef JEMALLOC_ENABLE_INLINE
+   /* This function does not have its own stack frame, because it is inlined. */
+#  define NIGNORE 1
+#else
+#  define NIGNORE 2
+#endif
+	prof_thr_cnt_t *ret;
+	prof_tdata_t *prof_tdata;
+	prof_bt_t bt;
+
+	assert(size == s2u(size));
+
+	prof_tdata = PROF_TCACHE_GET();
+	if (prof_tdata == NULL) {
+		prof_tdata = prof_tdata_init();
+		if (prof_tdata == NULL)
+			return (NULL);
+	}
+
+	if (opt_prof_active == false) {
+		/* Sampling is currently inactive, so avoid sampling. */
+		ret = (prof_thr_cnt_t *)(uintptr_t)1U;
+	} else if (opt_lg_prof_sample == 0) {
+		/*
+		 * Don't bother with sampling logic, since sampling interval is
+		 * 1.
+		 */
+		bt_init(&bt, prof_tdata->vec);
+		prof_backtrace(&bt, NIGNORE, prof_bt_max);
+		ret = prof_lookup(&bt);
+	} else {
+		if (prof_tdata->threshold == 0) {
+			/*
+			 * Initialize.  Seed the prng differently for each
+			 * thread.
+			 */
+			prof_tdata->prn_state = (uint64_t)(uintptr_t)&size;
+			prof_sample_threshold_update(prof_tdata);
+		}
+
+		/*
+		 * Determine whether to capture a backtrace based on whether
+		 * size is enough for prof_accum to reach
+		 * prof_tdata->threshold.  However, delay updating these
+		 * variables until prof_{m,re}alloc(), because we don't know
+		 * for sure that the allocation will succeed.
+		 *
+		 * Use subtraction rather than addition to avoid potential
+		 * integer overflow.
+		 */
+		if (size >= prof_tdata->threshold - prof_tdata->accum) {
+			bt_init(&bt, prof_tdata->vec);
+			prof_backtrace(&bt, NIGNORE, prof_bt_max);
+			ret = prof_lookup(&bt);
+		} else
+			ret = (prof_thr_cnt_t *)(uintptr_t)1U;
+	}
+
+	return (ret);
+#undef NIGNORE
+}
+
 JEMALLOC_INLINE prof_ctx_t *
 prof_ctx_get(const void *ptr)
 {
@@ -334,7 +334,7 @@ prof_ctx_get(const void *ptr)
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	if (chunk != ptr) {
 		/* Region. */
-		dassert(chunk->arena->magic == ARENA_MAGIC);
+		assert(chunk->arena->magic == ARENA_MAGIC);
 
 		ret = arena_prof_ctx_get(ptr);
 	} else
@@ -353,7 +353,7 @@ prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	if (chunk != ptr) {
 		/* Region. */
-		dassert(chunk->arena->magic == ARENA_MAGIC);
+		assert(chunk->arena->magic == ARENA_MAGIC);
 
 		arena_prof_ctx_set(ptr, ctx);
 	} else
@@ -374,7 +374,7 @@ prof_sample_accum_update(size_t size)
 	/* Take care to avoid integer overflow. */
 	if (size >= prof_tdata->threshold - prof_tdata->accum) {
 		prof_tdata->accum -= (prof_tdata->threshold - size);
-		/* Compute new sample threshold. */
+		/* Compute new prof_sample_threshold. */
 		prof_sample_threshold_update(prof_tdata);
 		while (prof_tdata->accum >= prof_tdata->threshold) {
 			prof_tdata->accum -= prof_tdata->threshold;
@@ -401,7 +401,7 @@ prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt)
 			 * always possible to tell in advance how large an
 			 * object's usable size will be, so there should never
 			 * be a difference between the size passed to
-			 * PROF_ALLOC_PREP() and prof_malloc().
+			 * prof_alloc_prep() and prof_malloc().
 			 */
 			assert((uintptr_t)cnt == (uintptr_t)1U);
 		}
@@ -445,7 +445,7 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
 			if (prof_sample_accum_update(size)) {
 				/*
 				 * Don't sample.  The size passed to
-				 * PROF_ALLOC_PREP() was larger than what
+				 * prof_alloc_prep() was larger than what
 				 * actually got allocated, so a backtrace was
 				 * captured for this allocation, even though
 				 * its actual size was insufficient to cross
diff --git a/dep/jemalloc/include/jemalloc/internal/rtree.h b/dep/jemalloc/include/jemalloc/internal/rtree.h
index 95d6355a5f4..9d58ebac545 100644
--- a/dep/jemalloc/include/jemalloc/internal/rtree.h
+++ b/dep/jemalloc/include/jemalloc/internal/rtree.h
@@ -49,7 +49,7 @@ void	*rtree_get(rtree_t *rtree, uintptr_t key);
 bool	rtree_set(rtree_t *rtree, uintptr_t key, void *val);
 #endif
 
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_))
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(RTREE_C_))
 #define	RTREE_GET_GENERATE(f)						\
 /* The least significant bits of the key are ignored. */		\
 JEMALLOC_INLINE void *							\
diff --git a/dep/jemalloc/include/jemalloc/internal/stats.h b/dep/jemalloc/include/jemalloc/internal/stats.h
index 2a9b31d9ffc..3fc2080a34b 100644
--- a/dep/jemalloc/include/jemalloc/internal/stats.h
+++ b/dep/jemalloc/include/jemalloc/internal/stats.h
@@ -154,10 +154,6 @@ struct chunk_stats_s {
 
 extern bool	opt_stats_print;
 
-#ifdef JEMALLOC_STATS
-extern size_t	stats_cactive;
-#endif
-
 char	*u2s(uint64_t x, unsigned base, char *s);
 #ifdef JEMALLOC_STATS
 void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque,
@@ -170,38 +166,9 @@ void	stats_print(void (*write)(void *, const char *), void *cbopaque,
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
 #ifdef JEMALLOC_STATS
+#ifdef JEMALLOC_H_INLINES
 
-#ifndef JEMALLOC_ENABLE_INLINE
-size_t	stats_cactive_get(void);
-void	stats_cactive_add(size_t size);
-void	stats_cactive_sub(size_t size);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_STATS_C_))
-JEMALLOC_INLINE size_t
-stats_cactive_get(void)
-{
-
-	return (atomic_read_z(&stats_cactive));
-}
-
-JEMALLOC_INLINE void
-stats_cactive_add(size_t size)
-{
-
-	atomic_add_z(&stats_cactive, size);
-}
-
-JEMALLOC_INLINE void
-stats_cactive_sub(size_t size)
-{
-
-	atomic_sub_z(&stats_cactive, size);
-}
-#endif
-
-#endif /* JEMALLOC_STATS */
 #endif /* JEMALLOC_H_INLINES */
+#endif /* JEMALLOC_STATS */
 /******************************************************************************/
diff --git a/dep/jemalloc/include/jemalloc/internal/tcache.h b/dep/jemalloc/include/jemalloc/internal/tcache.h
index da3c68c5770..1ad91a9b1e0 100644
--- a/dep/jemalloc/include/jemalloc/internal/tcache.h
+++ b/dep/jemalloc/include/jemalloc/internal/tcache.h
@@ -2,7 +2,6 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_TYPES
 
-typedef struct tcache_bin_info_s tcache_bin_info_t;
 typedef struct tcache_bin_s tcache_bin_t;
 typedef struct tcache_s tcache_t;
 
@@ -33,22 +32,15 @@ typedef struct tcache_s tcache_t;
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
 
-/*
- * Read-only information associated with each element of tcache_t's tbins array
- * is stored separately, mainly to reduce memory usage.
- */
-struct tcache_bin_info_s {
-	unsigned	ncached_max;	/* Upper limit on ncached. */
-};
-
 struct tcache_bin_s {
 #  ifdef JEMALLOC_STATS
 	tcache_bin_stats_t tstats;
 #  endif
-	int		low_water;	/* Min # cached since last GC. */
-	unsigned	lg_fill_div;	/* Fill (ncached_max >> lg_fill_div). */
+	unsigned	low_water;	/* Min # cached since last GC. */
+	unsigned	high_water;	/* Max # cached since last GC. */
 	unsigned	ncached;	/* # of cached objects. */
-	void		**avail;	/* Stack of available objects. */
+	unsigned	ncached_max;	/* Upper limit on ncached. */
+	void		*avail;		/* Chain of available objects. */
 };
 
 struct tcache_s {
@@ -62,12 +54,6 @@ struct tcache_s {
 	unsigned	ev_cnt;		/* Event count since incremental GC. */
 	unsigned	next_gc_bin;	/* Next bin to GC. */
 	tcache_bin_t	tbins[1];	/* Dynamically sized. */
-	/*
-	 * The pointer stacks associated with tbins follow as a contiguous
-	 * array.  During tcache initialization, the avail pointer in each
-	 * element of tbins is initialized to point to the proper offset within
-	 * this array.
-	 */
 };
 
 #endif /* JEMALLOC_H_STRUCTS */
@@ -78,8 +64,6 @@ extern bool	opt_tcache;
 extern ssize_t	opt_lg_tcache_max;
 extern ssize_t	opt_lg_tcache_gc_sweep;
 
-extern tcache_bin_info_t	*tcache_bin_info;
-
 /* Map of thread-specific caches. */
 #ifndef NO_TLS
 extern __thread tcache_t	*tcache_tls
@@ -126,7 +110,7 @@ void	tcache_destroy(tcache_t *tcache);
 #ifdef JEMALLOC_STATS
 void	tcache_stats_merge(tcache_t *tcache, arena_t *arena);
 #endif
-bool	tcache_boot(void);
+void	tcache_boot(void);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
@@ -185,7 +169,6 @@ tcache_event(tcache_t *tcache)
 	if (tcache->ev_cnt == tcache_gc_incr) {
 		size_t binind = tcache->next_gc_bin;
 		tcache_bin_t *tbin = &tcache->tbins[binind];
-		tcache_bin_info_t *tbin_info = &tcache_bin_info[binind];
 
 		if (tbin->low_water > 0) {
 			/*
@@ -209,22 +192,9 @@ tcache_event(tcache_t *tcache)
 #endif
 				    );
 			}
-			/*
-			 * Reduce fill count by 2X.  Limit lg_fill_div such that
-			 * the fill count is always at least 1.
-			 */
-			if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1))
-			    >= 1)
-				tbin->lg_fill_div++;
-		} else if (tbin->low_water < 0) {
-			/*
-			 * Increase fill count by 2X.  Make sure lg_fill_div
-			 * stays greater than 0.
-			 */
-			if (tbin->lg_fill_div > 1)
-				tbin->lg_fill_div--;
 		}
 		tbin->low_water = tbin->ncached;
+		tbin->high_water = tbin->ncached;
 
 		tcache->next_gc_bin++;
 		if (tcache->next_gc_bin == nhbins)
@@ -238,14 +208,13 @@ tcache_alloc_easy(tcache_bin_t *tbin)
 {
 	void *ret;
 
-	if (tbin->ncached == 0) {
-		tbin->low_water = -1;
+	if (tbin->ncached == 0)
 		return (NULL);
-	}
 	tbin->ncached--;
-	if ((int)tbin->ncached < tbin->low_water)
+	if (tbin->ncached < tbin->low_water)
 		tbin->low_water = tbin->ncached;
-	ret = tbin->avail[tbin->ncached];
+	ret = tbin->avail;
+	tbin->avail = *(void **)ret;
 	return (ret);
 }
 
@@ -256,7 +225,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero)
 	size_t binind;
 	tcache_bin_t *tbin;
 
-	binind = SMALL_SIZE2BIN(size);
+	binind = small_size2bin[size];
 	assert(binind < nbins);
 	tbin = &tcache->tbins[binind];
 	ret = tcache_alloc_easy(tbin);
@@ -265,7 +234,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero)
 		if (ret == NULL)
 			return (NULL);
 	}
-	assert(arena_salloc(ret) == arena_bin_info[binind].reg_size);
+	assert(arena_salloc(ret) == tcache->arena->bins[binind].reg_size);
 
 	if (zero == false) {
 #ifdef JEMALLOC_FILL
@@ -281,7 +250,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero)
 	tbin->tstats.nrequests++;
 #endif
 #ifdef JEMALLOC_PROF
-	tcache->prof_accumbytes += arena_bin_info[binind].reg_size;
+	tcache->prof_accumbytes += tcache->arena->bins[binind].reg_size;
 #endif
 	tcache_event(tcache);
 	return (ret);
@@ -345,7 +314,6 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr)
 	arena_run_t *run;
 	arena_bin_t *bin;
 	tcache_bin_t *tbin;
-	tcache_bin_info_t *tbin_info;
 	size_t pageind, binind;
 	arena_chunk_map_t *mapelm;
 
@@ -357,7 +325,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr)
 	mapelm = &chunk->map[pageind-map_bias];
 	run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
 	    (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT));
-	dassert(run->magic == ARENA_RUN_MAGIC);
+	assert(run->magic == ARENA_RUN_MAGIC);
 	bin = run->bin;
 	binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) /
 	    sizeof(arena_bin_t);
@@ -365,22 +333,23 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr)
 
 #ifdef JEMALLOC_FILL
 	if (opt_junk)
-		memset(ptr, 0x5a, arena_bin_info[binind].reg_size);
+		memset(ptr, 0x5a, bin->reg_size);
 #endif
 
 	tbin = &tcache->tbins[binind];
-	tbin_info = &tcache_bin_info[binind];
-	if (tbin->ncached == tbin_info->ncached_max) {
-		tcache_bin_flush_small(tbin, binind, (tbin_info->ncached_max >>
-		    1)
+	if (tbin->ncached == tbin->ncached_max) {
+		tcache_bin_flush_small(tbin, binind, (tbin->ncached_max >> 1)
 #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
 		    , tcache
 #endif
 		    );
 	}
-	assert(tbin->ncached < tbin_info->ncached_max);
-	tbin->avail[tbin->ncached] = ptr;
+	assert(tbin->ncached < tbin->ncached_max);
+	*(void **)ptr = tbin->avail;
+	tbin->avail = ptr;
 	tbin->ncached++;
+	if (tbin->ncached > tbin->high_water)
+		tbin->high_water = tbin->ncached;
 
 	tcache_event(tcache);
 }
@@ -392,7 +361,6 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size)
 	arena_chunk_t *chunk;
 	size_t pageind, binind;
 	tcache_bin_t *tbin;
-	tcache_bin_info_t *tbin_info;
 
 	assert((size & PAGE_MASK) == 0);
 	assert(arena_salloc(ptr) > small_maxclass);
@@ -409,18 +377,19 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size)
 #endif
 
 	tbin = &tcache->tbins[binind];
-	tbin_info = &tcache_bin_info[binind];
-	if (tbin->ncached == tbin_info->ncached_max) {
-		tcache_bin_flush_large(tbin, binind, (tbin_info->ncached_max >>
-		    1)
+	if (tbin->ncached == tbin->ncached_max) {
+		tcache_bin_flush_large(tbin, binind, (tbin->ncached_max >> 1)
 #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
 		    , tcache
 #endif
 		    );
 	}
-	assert(tbin->ncached < tbin_info->ncached_max);
-	tbin->avail[tbin->ncached] = ptr;
+	assert(tbin->ncached < tbin->ncached_max);
+	*(void **)ptr = tbin->avail;
+	tbin->avail = ptr;
 	tbin->ncached++;
+	if (tbin->ncached > tbin->high_water)
+		tbin->high_water = tbin->ncached;
 
 	tcache_event(tcache);
 }
diff --git a/dep/jemalloc/include/jemalloc/jemalloc.h b/dep/jemalloc/include/jemalloc/jemalloc.h
index 3842e28115e..287dac46ed2 100644
--- a/dep/jemalloc/include/jemalloc/jemalloc.h
+++ b/dep/jemalloc/include/jemalloc/jemalloc.h
@@ -7,19 +7,19 @@ extern "C" {
 #include <limits.h>
 #include <strings.h>
 
-#define	JEMALLOC_VERSION "2.2.5-0-gfc1bb70e5f0d9a58b39efa39cc549b5af5104760"
+#define	JEMALLOC_VERSION "2.1.0-0-g1c4b088b08d3bc7617a34387e196ce03716160bf"
 #define	JEMALLOC_VERSION_MAJOR 2
-#define	JEMALLOC_VERSION_MINOR 2
-#define	JEMALLOC_VERSION_BUGFIX 5
+#define	JEMALLOC_VERSION_MINOR 1
+#define	JEMALLOC_VERSION_BUGFIX 0
 #define	JEMALLOC_VERSION_NREV 0
-#define	JEMALLOC_VERSION_GID "fc1bb70e5f0d9a58b39efa39cc549b5af5104760"
+#define	JEMALLOC_VERSION_GID "1c4b088b08d3bc7617a34387e196ce03716160bf"
 
 #include "jemalloc_defs.h"
 #ifndef JEMALLOC_P
 #  define JEMALLOC_P(s) s
 #endif
 
-#define	ALLOCM_LG_ALIGN(la)	(la)
+#define	ALLOCM_LG_ALIGN	((int)0x3f)
 #if LG_SIZEOF_PTR == 2
 #define	ALLOCM_ALIGN(a)	(ffs(a)-1)
 #else
diff --git a/dep/jemalloc/include/jemalloc/jemalloc_defs.h b/dep/jemalloc/include/jemalloc/jemalloc_defs.h
index f0f8fa71a4d..a641b56da03 100644
--- a/dep/jemalloc/include/jemalloc/jemalloc_defs.h
+++ b/dep/jemalloc/include/jemalloc/jemalloc_defs.h
@@ -20,32 +20,11 @@
 #endif
 
 /*
- * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs.
- * For shared libraries, symbol visibility mechanisms prevent these symbols
- * from being exported, but for static libraries, naming collisions are a real
- * possibility.
- */
-#define JEMALLOC_PRIVATE_NAMESPACE ""
-#define JEMALLOC_N(string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix) string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix
-
-/*
  * Hyper-threaded CPUs may need a special instruction inside spin loops in
  * order to yield to another virtual CPU.
  */
 #define CPU_SPINWAIT __asm__ volatile("pause")
 
-/*
- * Defined if OSAtomic*() functions are available, as provided by Darwin, and
- * documented in the atomic(3) manual page.
- */
-/* #undef JEMALLOC_OSATOMIC */
-
-/*
- * Defined if OSSpin*() functions are available, as provided by Darwin, and
- * documented in the spinlock(3) manual page.
- */
-/* #undef JEMALLOC_OSSPIN */
-
 /* Defined if __attribute__((...)) syntax is supported. */
 #define JEMALLOC_HAVE_ATTR 
 #ifdef JEMALLOC_HAVE_ATTR
@@ -75,21 +54,18 @@
 /* Use libgcc for profile backtracing if defined. */
 /* #undef JEMALLOC_PROF_LIBGCC */
 
-/* Use gcc intrinsics for profile backtracing if defined. */
-/* #undef JEMALLOC_PROF_GCC */
-
 /*
  * JEMALLOC_TINY enables support for tiny objects, which are smaller than one
  * quantum.
  */
-#define JEMALLOC_TINY 
+/* #undef JEMALLOC_TINY */
 
 /*
  * JEMALLOC_TCACHE enables a thread-specific caching layer for small objects.
  * This makes it possible to allocate/deallocate objects without any locking
  * when the cache is in the steady state.
  */
-#define JEMALLOC_TCACHE 
+/* #undef JEMALLOC_TCACHE */
 
 /*
  * JEMALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage
@@ -110,7 +86,7 @@
 /* #undef JEMALLOC_SYSV */
 
 /* Support lazy locking (avoid locking unless a second thread is launched). */
-#define JEMALLOC_LAZY_LOCK 
+/* #undef JEMALLOC_LAZY_LOCK */
 
 /* Determine page size at run time if defined. */
 /* #undef DYNAMIC_PAGE_SHIFT */
@@ -157,12 +133,9 @@
 /* #undef JEMALLOC_PURGE_MADVISE_FREE */
 
 /* sizeof(void *) == 2^LG_SIZEOF_PTR. */
-#define LG_SIZEOF_PTR 3
+#define LG_SIZEOF_PTR 2
 
 /* sizeof(int) == 2^LG_SIZEOF_INT. */
 #define LG_SIZEOF_INT 2
 
-/* sizeof(long) == 2^LG_SIZEOF_LONG. */
-#define LG_SIZEOF_LONG 3
-
 #endif /* JEMALLOC_DEFS_H_ */
diff --git a/dep/jemalloc/src/arena.c b/dep/jemalloc/src/arena.c
index d166ca1ec4d..7f939b3cd77 100644
--- a/dep/jemalloc/src/arena.c
+++ b/dep/jemalloc/src/arena.c
@@ -8,7 +8,6 @@ size_t	opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT;
 size_t	opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT;
 ssize_t		opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT;
 uint8_t const	*small_size2bin;
-arena_bin_info_t	*arena_bin_info;
 
 /* Various bin-related settings. */
 unsigned	nqbins;
@@ -26,27 +25,26 @@ size_t		mspace_mask;
 
 /*
  * const_small_size2bin is a static constant lookup table that in the common
- * case can be used as-is for small_size2bin.
+ * case can be used as-is for small_size2bin.  For dynamically linked programs,
+ * this avoids a page of memory overhead per process.
  */
-#if (LG_TINY_MIN == 2)
-#define	S2B_4(i)	i,
+#define	S2B_1(i)	i,
+#define	S2B_2(i)	S2B_1(i) S2B_1(i)
+#define	S2B_4(i)	S2B_2(i) S2B_2(i)
 #define	S2B_8(i)	S2B_4(i) S2B_4(i)
-#elif (LG_TINY_MIN == 3)
-#define	S2B_8(i)	i,
-#else
-#  error "Unsupported LG_TINY_MIN"
-#endif
 #define	S2B_16(i)	S2B_8(i) S2B_8(i)
 #define	S2B_32(i)	S2B_16(i) S2B_16(i)
 #define	S2B_64(i)	S2B_32(i) S2B_32(i)
 #define	S2B_128(i)	S2B_64(i) S2B_64(i)
 #define	S2B_256(i)	S2B_128(i) S2B_128(i)
 /*
- * The number of elements in const_small_size2bin is dependent on the
- * definition for SUBPAGE.
+ * The number of elements in const_small_size2bin is dependent on page size
+ * and on the definition for SUBPAGE.  If SUBPAGE changes, the '- 255' must also
+ * change, along with the addition/removal of static lookup table element
+ * definitions.
  */
-static JEMALLOC_ATTR(aligned(CACHELINE))
-    const uint8_t	const_small_size2bin[] = {
+static const uint8_t	const_small_size2bin[STATIC_PAGE_SIZE - 255] = {
+	S2B_1(0xffU)		/*    0 */
 #if (LG_QUANTUM == 4)
 /* 16-byte quantum **********************/
 #  ifdef JEMALLOC_TINY
@@ -175,6 +173,7 @@ static void	arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk,
     arena_run_t *run, size_t oldsize, size_t newsize, bool dirty);
 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
 static void	*arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
+static size_t	arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
 static void	arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
     arena_bin_t *bin);
 static void	arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk,
@@ -192,9 +191,6 @@ static bool	small_size2bin_init(void);
 static void	small_size2bin_validate(void);
 #endif
 static bool	small_size2bin_init_hard(void);
-static size_t	bin_info_run_size_calc(arena_bin_info_t *bin_info,
-    size_t min_run_size);
-static bool	bin_info_init(void);
 
 /******************************************************************************/
 
@@ -250,48 +246,57 @@ rb_gen(static JEMALLOC_ATTR(unused), arena_avail_tree_, arena_avail_tree_t,
     arena_chunk_map_t, u.rb_link, arena_avail_comp)
 
 static inline void *
-arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info)
+arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
 {
 	void *ret;
-	unsigned regind;
-	bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
-	    (uintptr_t)bin_info->bitmap_offset);
 
-	dassert(run->magic == ARENA_RUN_MAGIC);
+	assert(run->magic == ARENA_RUN_MAGIC);
 	assert(run->nfree > 0);
-	assert(bitmap_full(bitmap, &bin_info->bitmap_info) == false);
 
-	regind = bitmap_sfu(bitmap, &bin_info->bitmap_info);
-	ret = (void *)((uintptr_t)run + (uintptr_t)bin_info->reg0_offset +
-	    (uintptr_t)(bin_info->reg_size * regind));
 	run->nfree--;
-	if (regind == run->nextind)
-		run->nextind++;
-	assert(regind < run->nextind);
+	ret = run->avail;
+	if (ret != NULL) {
+		/* Double free can cause assertion failure.*/
+		assert(ret != NULL);
+		/* Write-after free can cause assertion failure. */
+		assert((uintptr_t)ret >= (uintptr_t)run +
+		    (uintptr_t)bin->reg0_offset);
+		assert((uintptr_t)ret < (uintptr_t)run->next);
+		assert(((uintptr_t)ret - ((uintptr_t)run +
+		    (uintptr_t)bin->reg0_offset)) % (uintptr_t)bin->reg_size ==
+		    0);
+		run->avail = *(void **)ret;
+		return (ret);
+	}
+	ret = run->next;
+	run->next = (void *)((uintptr_t)ret + (uintptr_t)bin->reg_size);
+	assert(ret != NULL);
 	return (ret);
 }
 
 static inline void
 arena_run_reg_dalloc(arena_run_t *run, void *ptr)
 {
-	arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
-	size_t binind = arena_bin_index(chunk->arena, run->bin);
-	arena_bin_info_t *bin_info = &arena_bin_info[binind];
-	unsigned regind = arena_run_regind(run, bin_info, ptr);
-	bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
-	    (uintptr_t)bin_info->bitmap_offset);
-
-	assert(run->nfree < bin_info->nregs);
+
+	assert(run->nfree < run->bin->nregs);
 	/* Freeing an interior pointer can cause assertion failure. */
 	assert(((uintptr_t)ptr - ((uintptr_t)run +
-	    (uintptr_t)bin_info->reg0_offset)) % (uintptr_t)bin_info->reg_size
+	    (uintptr_t)run->bin->reg0_offset)) % (uintptr_t)run->bin->reg_size
 	    == 0);
+	/*
+	 * Freeing a pointer lower than region zero can cause assertion
+	 * failure.
+	 */
 	assert((uintptr_t)ptr >= (uintptr_t)run +
-	    (uintptr_t)bin_info->reg0_offset);
-	/* Freeing an unallocated pointer can cause assertion failure. */
-	assert(bitmap_get(bitmap, &bin_info->bitmap_info, regind));
+	    (uintptr_t)run->bin->reg0_offset);
+	/*
+	 * Freeing a pointer past in the run's frontier can cause assertion
+	 * failure.
+	 */
+	assert((uintptr_t)ptr < (uintptr_t)run->next);
 
-	bitmap_unset(bitmap, &bin_info->bitmap_info, regind);
+	*(void **)ptr = run->avail;
+	run->avail = ptr;
 	run->nfree++;
 }
 
@@ -315,9 +320,6 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
 	size_t old_ndirty, run_ind, total_pages, need_pages, rem_pages, i;
 	size_t flag_dirty;
 	arena_avail_tree_t *runs_avail;
-#ifdef JEMALLOC_STATS
-	size_t cactive_diff;
-#endif
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
 	old_ndirty = chunk->ndirty;
@@ -336,13 +338,6 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
 	rem_pages = total_pages - need_pages;
 
 	arena_avail_tree_remove(runs_avail, &chunk->map[run_ind-map_bias]);
-#ifdef JEMALLOC_STATS
-	/* Update stats_cactive if nactive is crossing a chunk multiple. */
-	cactive_diff = CHUNK_CEILING((arena->nactive + need_pages) <<
-	    PAGE_SHIFT) - CHUNK_CEILING(arena->nactive << PAGE_SHIFT);
-	if (cactive_diff != 0)
-		stats_cactive_add(cactive_diff);
-#endif
 	arena->nactive += need_pages;
 
 	/* Keep track of trailing unused pages for later use. */
@@ -569,7 +564,7 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
 			arena->ndirty -= spare->ndirty;
 		}
 		malloc_mutex_unlock(&arena->lock);
-		chunk_dealloc((void *)spare, chunksize, true);
+		chunk_dealloc((void *)spare, chunksize);
 		malloc_mutex_lock(&arena->lock);
 #ifdef JEMALLOC_STATS
 		arena->stats.mapped -= chunksize;
@@ -730,9 +725,6 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
 			assert(pageind + npages <= chunk_npages);
 			if (mapelm->bits & CHUNK_MAP_DIRTY) {
 				size_t i;
-#ifdef JEMALLOC_STATS
-				size_t cactive_diff;
-#endif
 
 				arena_avail_tree_remove(
 				    &arena->runs_avail_dirty, mapelm);
@@ -755,17 +747,6 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
 					    CHUNK_MAP_ALLOCATED;
 				}
 
-#ifdef JEMALLOC_STATS
-				/*
-				 * Update stats_cactive if nactive is crossing a
-				 * chunk multiple.
-				 */
-				cactive_diff = CHUNK_CEILING((arena->nactive +
-				    npages) << PAGE_SHIFT) -
-				    CHUNK_CEILING(arena->nactive << PAGE_SHIFT);
-				if (cactive_diff != 0)
-					stats_cactive_add(cactive_diff);
-#endif
 				arena->nactive += npages;
 				/* Append to list for later processing. */
 				ql_elm_new(mapelm, u.ql_link);
@@ -782,12 +763,8 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
 				    chunk + (uintptr_t)(pageind << PAGE_SHIFT));
 
 				assert((mapelm->bits >> PAGE_SHIFT) == 0);
-				dassert(run->magic == ARENA_RUN_MAGIC);
-				size_t binind = arena_bin_index(arena,
-				    run->bin);
-				arena_bin_info_t *bin_info =
-				    &arena_bin_info[binind];
-				pageind += bin_info->run_size >> PAGE_SHIFT;
+				assert(run->magic == ARENA_RUN_MAGIC);
+				pageind += run->bin->run_size >> PAGE_SHIFT;
 			}
 		}
 	}
@@ -868,10 +845,9 @@ arena_purge(arena_t *arena, bool all)
 	}
 	assert(ndirty == arena->ndirty);
 #endif
-	assert(arena->ndirty > arena->npurgatory || all);
-	assert(arena->ndirty - arena->npurgatory > chunk_npages || all);
-	assert((arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty -
-	    arena->npurgatory) || all);
+	assert(arena->ndirty > arena->npurgatory);
+	assert(arena->ndirty > chunk_npages || all);
+	assert((arena->nactive >> opt_lg_dirty_mult) < arena->ndirty || all);
 
 #ifdef JEMALLOC_STATS
 	arena->stats.npurge++;
@@ -883,10 +859,8 @@ arena_purge(arena_t *arena, bool all)
 	 * multiple threads from racing to reduce ndirty below the threshold.
 	 */
 	npurgatory = arena->ndirty - arena->npurgatory;
-	if (all == false) {
-		assert(npurgatory >= arena->nactive >> opt_lg_dirty_mult);
+	if (all == false)
 		npurgatory -= arena->nactive >> opt_lg_dirty_mult;
-	}
 	arena->npurgatory += npurgatory;
 
 	while (npurgatory > 0) {
@@ -957,9 +931,6 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
 	arena_chunk_t *chunk;
 	size_t size, run_ind, run_pages, flag_dirty;
 	arena_avail_tree_t *runs_avail;
-#ifdef JEMALLOC_STATS
-	size_t cactive_diff;
-#endif
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
 	run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk)
@@ -975,19 +946,9 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
 		    CHUNK_MAP_LARGE) != 0);
 		assert((chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits &
 		    CHUNK_MAP_ALLOCATED) != 0);
-	} else {
-		size_t binind = arena_bin_index(arena, run->bin);
-		arena_bin_info_t *bin_info = &arena_bin_info[binind];
-		size = bin_info->run_size;
-	}
+	} else
+		size = run->bin->run_size;
 	run_pages = (size >> PAGE_SHIFT);
-#ifdef JEMALLOC_STATS
-	/* Update stats_cactive if nactive is crossing a chunk multiple. */
-	cactive_diff = CHUNK_CEILING(arena->nactive << PAGE_SHIFT) -
-	    CHUNK_CEILING((arena->nactive - run_pages) << PAGE_SHIFT);
-	if (cactive_diff != 0)
-		stats_cactive_sub(cactive_diff);
-#endif
 	arena->nactive -= run_pages;
 
 	/*
@@ -1213,8 +1174,6 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
 {
 	arena_chunk_map_t *mapelm;
 	arena_run_t *run;
-	size_t binind;
-	arena_bin_info_t *bin_info;
 
 	/* Look for a usable run. */
 	mapelm = arena_run_tree_first(&bin->runs);
@@ -1238,23 +1197,18 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
 	}
 	/* No existing runs have any space available. */
 
-	binind = arena_bin_index(arena, bin);
-	bin_info = &arena_bin_info[binind];
-
 	/* Allocate a new run. */
 	malloc_mutex_unlock(&bin->lock);
 	/******************************/
 	malloc_mutex_lock(&arena->lock);
-	run = arena_run_alloc(arena, bin_info->run_size, false, false);
+	run = arena_run_alloc(arena, bin->run_size, false, false);
 	if (run != NULL) {
-		bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
-		    (uintptr_t)bin_info->bitmap_offset);
-
 		/* Initialize run internals. */
 		run->bin = bin;
-		run->nextind = 0;
-		run->nfree = bin_info->nregs;
-		bitmap_init(bitmap, &bin_info->bitmap_info);
+		run->avail = NULL;
+		run->next = (void *)((uintptr_t)run +
+		    (uintptr_t)bin->reg0_offset);
+		run->nfree = bin->nregs;
 #ifdef JEMALLOC_DEBUG
 		run->magic = ARENA_RUN_MAGIC;
 #endif
@@ -1305,12 +1259,8 @@ static void *
 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
 {
 	void *ret;
-	size_t binind;
-	arena_bin_info_t *bin_info;
 	arena_run_t *run;
 
-	binind = arena_bin_index(arena, bin);
-	bin_info = &arena_bin_info[binind];
 	bin->runcur = NULL;
 	run = arena_bin_nonfull_run_get(arena, bin);
 	if (bin->runcur != NULL && bin->runcur->nfree > 0) {
@@ -1318,22 +1268,22 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
 		 * Another thread updated runcur while this one ran without the
 		 * bin lock in arena_bin_nonfull_run_get().
 		 */
-		dassert(bin->runcur->magic == ARENA_RUN_MAGIC);
+		assert(bin->runcur->magic == ARENA_RUN_MAGIC);
 		assert(bin->runcur->nfree > 0);
-		ret = arena_run_reg_alloc(bin->runcur, bin_info);
+		ret = arena_run_reg_alloc(bin->runcur, bin);
 		if (run != NULL) {
 			arena_chunk_t *chunk;
 
 			/*
 			 * arena_run_alloc() may have allocated run, or it may
-			 * have pulled run from the bin's run tree.  Therefore
+			 * have pulled it from the bin's run tree.  Therefore
 			 * it is unsafe to make any assumptions about how run
 			 * has previously been used, and arena_bin_lower_run()
 			 * must be called, as if a region were just deallocated
 			 * from the run.
 			 */
 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
-			if (run->nfree == bin_info->nregs)
+			if (run->nfree == bin->nregs)
 				arena_dalloc_bin_run(arena, chunk, run, bin);
 			else
 				arena_bin_lower_run(arena, chunk, run, bin);
@@ -1346,10 +1296,10 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
 
 	bin->runcur = run;
 
-	dassert(bin->runcur->magic == ARENA_RUN_MAGIC);
+	assert(bin->runcur->magic == ARENA_RUN_MAGIC);
 	assert(bin->runcur->nfree > 0);
 
-	return (arena_run_reg_alloc(bin->runcur, bin_info));
+	return (arena_run_reg_alloc(bin->runcur, bin));
 }
 
 #ifdef JEMALLOC_PROF
@@ -1389,19 +1339,18 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind
 #endif
 	bin = &arena->bins[binind];
 	malloc_mutex_lock(&bin->lock);
-	for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >>
-	    tbin->lg_fill_div); i < nfill; i++) {
+	for (i = 0, nfill = (tbin->ncached_max >> 1); i < nfill; i++) {
 		if ((run = bin->runcur) != NULL && run->nfree > 0)
-			ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]);
+			ptr = arena_run_reg_alloc(run, bin);
 		else
 			ptr = arena_bin_malloc_hard(arena, bin);
 		if (ptr == NULL)
 			break;
-		/* Insert such that low regions get used first. */
-		tbin->avail[nfill - 1 - i] = ptr;
+		*(void **)ptr = tbin->avail;
+		tbin->avail = ptr;
 	}
 #ifdef JEMALLOC_STATS
-	bin->stats.allocated += i * arena_bin_info[binind].reg_size;
+	bin->stats.allocated += (i - tbin->ncached) * bin->reg_size;
 	bin->stats.nmalloc += i;
 	bin->stats.nrequests += tbin->tstats.nrequests;
 	bin->stats.nfills++;
@@ -1409,9 +1358,119 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind
 #endif
 	malloc_mutex_unlock(&bin->lock);
 	tbin->ncached = i;
+	if (tbin->ncached > tbin->high_water)
+		tbin->high_water = tbin->ncached;
 }
 #endif
 
+/*
+ * Calculate bin->run_size such that it meets the following constraints:
+ *
+ *   *) bin->run_size >= min_run_size
+ *   *) bin->run_size <= arena_maxclass
+ *   *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
+ *   *) run header size < PAGE_SIZE
+ *
+ * bin->nregs and bin->reg0_offset are also calculated here, since these
+ * settings are all interdependent.
+ */
+static size_t
+arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
+{
+	size_t try_run_size, good_run_size;
+	uint32_t try_nregs, good_nregs;
+	uint32_t try_hdr_size, good_hdr_size;
+#ifdef JEMALLOC_PROF
+	uint32_t try_ctx0_offset, good_ctx0_offset;
+#endif
+	uint32_t try_reg0_offset, good_reg0_offset;
+
+	assert(min_run_size >= PAGE_SIZE);
+	assert(min_run_size <= arena_maxclass);
+
+	/*
+	 * Calculate known-valid settings before entering the run_size
+	 * expansion loop, so that the first part of the loop always copies
+	 * valid settings.
+	 *
+	 * The do..while loop iteratively reduces the number of regions until
+	 * the run header and the regions no longer overlap.  A closed formula
+	 * would be quite messy, since there is an interdependency between the
+	 * header's mask length and the number of regions.
+	 */
+	try_run_size = min_run_size;
+	try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin->reg_size)
+	    + 1; /* Counter-act try_nregs-- in loop. */
+	do {
+		try_nregs--;
+		try_hdr_size = sizeof(arena_run_t);
+#ifdef JEMALLOC_PROF
+		if (opt_prof && prof_promote == false) {
+			/* Pad to a quantum boundary. */
+			try_hdr_size = QUANTUM_CEILING(try_hdr_size);
+			try_ctx0_offset = try_hdr_size;
+			/* Add space for one (prof_ctx_t *) per region. */
+			try_hdr_size += try_nregs * sizeof(prof_ctx_t *);
+		} else
+			try_ctx0_offset = 0;
+#endif
+		try_reg0_offset = try_run_size - (try_nregs * bin->reg_size);
+	} while (try_hdr_size > try_reg0_offset);
+
+	/* run_size expansion loop. */
+	do {
+		/*
+		 * Copy valid settings before trying more aggressive settings.
+		 */
+		good_run_size = try_run_size;
+		good_nregs = try_nregs;
+		good_hdr_size = try_hdr_size;
+#ifdef JEMALLOC_PROF
+		good_ctx0_offset = try_ctx0_offset;
+#endif
+		good_reg0_offset = try_reg0_offset;
+
+		/* Try more aggressive settings. */
+		try_run_size += PAGE_SIZE;
+		try_nregs = ((try_run_size - sizeof(arena_run_t)) /
+		    bin->reg_size) + 1; /* Counter-act try_nregs-- in loop. */
+		do {
+			try_nregs--;
+			try_hdr_size = sizeof(arena_run_t);
+#ifdef JEMALLOC_PROF
+			if (opt_prof && prof_promote == false) {
+				/* Pad to a quantum boundary. */
+				try_hdr_size = QUANTUM_CEILING(try_hdr_size);
+				try_ctx0_offset = try_hdr_size;
+				/*
+				 * Add space for one (prof_ctx_t *) per region.
+				 */
+				try_hdr_size += try_nregs *
+				    sizeof(prof_ctx_t *);
+			}
+#endif
+			try_reg0_offset = try_run_size - (try_nregs *
+			    bin->reg_size);
+		} while (try_hdr_size > try_reg0_offset);
+	} while (try_run_size <= arena_maxclass
+	    && try_run_size <= arena_maxclass
+	    && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
+	    && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size
+	    && try_hdr_size < PAGE_SIZE);
+
+	assert(good_hdr_size <= good_reg0_offset);
+
+	/* Copy final settings. */
+	bin->run_size = good_run_size;
+	bin->nregs = good_nregs;
+#ifdef JEMALLOC_PROF
+	bin->ctx0_offset = good_ctx0_offset;
+#endif
+	bin->reg0_offset = good_reg0_offset;
+
+	return (good_run_size);
+}
+
 void *
 arena_malloc_small(arena_t *arena, size_t size, bool zero)
 {
@@ -1420,14 +1479,14 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero)
 	arena_run_t *run;
 	size_t binind;
 
-	binind = SMALL_SIZE2BIN(size);
+	binind = small_size2bin[size];
 	assert(binind < nbins);
 	bin = &arena->bins[binind];
-	size = arena_bin_info[binind].reg_size;
+	size = bin->reg_size;
 
 	malloc_mutex_lock(&bin->lock);
 	if ((run = bin->runcur) != NULL && run->nfree > 0)
-		ret = arena_run_reg_alloc(run, &arena_bin_info[binind]);
+		ret = arena_run_reg_alloc(run, bin);
 	else
 		ret = arena_bin_malloc_hard(arena, bin);
 
@@ -1631,13 +1690,11 @@ arena_salloc(const void *ptr)
 		arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
 		    (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
 		    PAGE_SHIFT));
-		dassert(run->magic == ARENA_RUN_MAGIC);
-		size_t binind = arena_bin_index(chunk->arena, run->bin);
-		arena_bin_info_t *bin_info = &arena_bin_info[binind];
+		assert(run->magic == ARENA_RUN_MAGIC);
 		assert(((uintptr_t)ptr - ((uintptr_t)run +
-		    (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_size ==
+		    (uintptr_t)run->bin->reg0_offset)) % run->bin->reg_size ==
 		    0);
-		ret = bin_info->reg_size;
+		ret = run->bin->reg_size;
 	} else {
 		assert(((uintptr_t)ptr & PAGE_MASK) == 0);
 		ret = mapbits & ~PAGE_MASK;
@@ -1657,11 +1714,10 @@ arena_prof_promoted(const void *ptr, size_t size)
 	assert(ptr != NULL);
 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
 	assert(isalloc(ptr) == PAGE_SIZE);
-	assert(size <= small_maxclass);
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
-	binind = SMALL_SIZE2BIN(size);
+	binind = small_size2bin[size];
 	assert(binind < nbins);
 	chunk->map[pageind-map_bias].bits = (chunk->map[pageind-map_bias].bits &
 	    ~CHUNK_MAP_CLASS_MASK) | ((binind+1) << CHUNK_MAP_CLASS_SHIFT);
@@ -1685,13 +1741,11 @@ arena_salloc_demote(const void *ptr)
 		arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
 		    (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
 		    PAGE_SHIFT));
-		dassert(run->magic == ARENA_RUN_MAGIC);
-		size_t binind = arena_bin_index(chunk->arena, run->bin);
-		arena_bin_info_t *bin_info = &arena_bin_info[binind];
+		assert(run->magic == ARENA_RUN_MAGIC);
 		assert(((uintptr_t)ptr - ((uintptr_t)run +
-		    (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_size ==
+		    (uintptr_t)run->bin->reg0_offset)) % run->bin->reg_size ==
 		    0);
-		ret = bin_info->reg_size;
+		ret = run->bin->reg_size;
 	} else {
 		assert(((uintptr_t)ptr & PAGE_MASK) == 0);
 		ret = mapbits & ~PAGE_MASK;
@@ -1700,7 +1754,7 @@ arena_salloc_demote(const void *ptr)
 			size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >>
 			    CHUNK_MAP_CLASS_SHIFT) - 1;
 			assert(binind < nbins);
-			ret = arena_bin_info[binind].reg_size;
+			ret = chunk->arena->bins[binind].reg_size;
 		}
 		assert(ret != 0);
 	}
@@ -1717,22 +1771,17 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
 	/* Dissociate run from bin. */
 	if (run == bin->runcur)
 		bin->runcur = NULL;
-	else {
-		size_t binind = arena_bin_index(chunk->arena, bin);
-		arena_bin_info_t *bin_info = &arena_bin_info[binind];
-
-		if (bin_info->nregs != 1) {
-			size_t run_pageind = (((uintptr_t)run -
-			    (uintptr_t)chunk)) >> PAGE_SHIFT;
-			arena_chunk_map_t *run_mapelm =
-			    &chunk->map[run_pageind-map_bias];
-			/*
-			 * This block's conditional is necessary because if the
-			 * run only contains one region, then it never gets
-			 * inserted into the non-full runs tree.
-			 */
-			arena_run_tree_remove(&bin->runs, run_mapelm);
-		}
+	else if (bin->nregs != 1) {
+		size_t run_pageind = (((uintptr_t)run - (uintptr_t)chunk)) >>
+		    PAGE_SHIFT;
+		arena_chunk_map_t *run_mapelm =
+		    &chunk->map[run_pageind-map_bias];
+		/*
+		 * This block's conditional is necessary because if the run
+		 * only contains one region, then it never gets inserted into
+		 * the non-full runs tree.
+		 */
+		arena_run_tree_remove(&bin->runs, run_mapelm);
 	}
 }
 
@@ -1740,24 +1789,18 @@ static void
 arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
     arena_bin_t *bin)
 {
-	size_t binind;
-	arena_bin_info_t *bin_info;
 	size_t npages, run_ind, past;
 
 	assert(run != bin->runcur);
 	assert(arena_run_tree_search(&bin->runs, &chunk->map[
 	    (((uintptr_t)run-(uintptr_t)chunk)>>PAGE_SHIFT)-map_bias]) == NULL);
 
-	binind = arena_bin_index(chunk->arena, run->bin);
-	bin_info = &arena_bin_info[binind];
-
 	malloc_mutex_unlock(&bin->lock);
 	/******************************/
-	npages = bin_info->run_size >> PAGE_SHIFT;
+	npages = bin->run_size >> PAGE_SHIFT;
 	run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT);
-	past = (size_t)(PAGE_CEILING((uintptr_t)run +
-	    (uintptr_t)bin_info->reg0_offset + (uintptr_t)(run->nextind *
-	    bin_info->reg_size) - (uintptr_t)chunk) >> PAGE_SHIFT);
+	past = (size_t)((PAGE_CEILING((uintptr_t)run->next) - (uintptr_t)chunk)
+	    >> PAGE_SHIFT);
 	malloc_mutex_lock(&arena->lock);
 
 	/*
@@ -1774,7 +1817,7 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 		chunk->map[run_ind+npages-1-map_bias].bits = CHUNK_MAP_LARGE |
 		    (chunk->map[run_ind+npages-1-map_bias].bits &
 		    CHUNK_MAP_FLAGS_MASK);
-		chunk->map[run_ind-map_bias].bits = bin_info->run_size |
+		chunk->map[run_ind-map_bias].bits = bin->run_size |
 		    CHUNK_MAP_LARGE | (chunk->map[run_ind-map_bias].bits &
 		    CHUNK_MAP_FLAGS_MASK);
 		arena_run_trim_tail(arena, chunk, run, (npages << PAGE_SHIFT),
@@ -1843,12 +1886,10 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
 	run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
 	    (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT));
-	dassert(run->magic == ARENA_RUN_MAGIC);
+	assert(run->magic == ARENA_RUN_MAGIC);
 	bin = run->bin;
-	size_t binind = arena_bin_index(arena, bin);
-	arena_bin_info_t *bin_info = &arena_bin_info[binind];
 #if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS))
-	size = bin_info->reg_size;
+	size = bin->reg_size;
 #endif
 
 #ifdef JEMALLOC_FILL
@@ -1857,7 +1898,7 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 #endif
 
 	arena_run_reg_dalloc(run, ptr);
-	if (run->nfree == bin_info->nregs) {
+	if (run->nfree == bin->nregs) {
 		arena_dissociate_bin_run(chunk, run, bin);
 		arena_dalloc_bin_run(arena, chunk, run, bin);
 	} else if (run->nfree == 1 && run != bin->runcur)
@@ -2091,7 +2132,7 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra,
 
 		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 		arena = chunk->arena;
-		dassert(arena->magic == ARENA_MAGIC);
+		assert(arena->magic == ARENA_MAGIC);
 
 		if (psize < oldsize) {
 #ifdef JEMALLOC_FILL
@@ -2129,11 +2170,11 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra,
 	 */
 	if (oldsize <= arena_maxclass) {
 		if (oldsize <= small_maxclass) {
-			assert(arena_bin_info[SMALL_SIZE2BIN(oldsize)].reg_size
-			    == oldsize);
+			assert(choose_arena()->bins[small_size2bin[
+			    oldsize]].reg_size == oldsize);
 			if ((size + extra <= small_maxclass &&
-			    SMALL_SIZE2BIN(size + extra) ==
-			    SMALL_SIZE2BIN(oldsize)) || (size <= oldsize &&
+			    small_size2bin[size + extra] ==
+			    small_size2bin[oldsize]) || (size <= oldsize &&
 			    size + extra >= oldsize)) {
 #ifdef JEMALLOC_FILL
 				if (opt_junk && size < oldsize) {
@@ -2169,29 +2210,24 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
 	if (ret != NULL)
 		return (ret);
 
+
 	/*
 	 * size and oldsize are different enough that we need to move the
 	 * object.  In that case, fall back to allocating new space and
 	 * copying.
 	 */
-	if (alignment != 0) {
-		size_t usize = sa2u(size + extra, alignment, NULL);
-		if (usize == 0)
-			return (NULL);
-		ret = ipalloc(usize, alignment, zero);
-	} else
+	if (alignment != 0)
+		ret = ipalloc(size + extra, alignment, zero);
+	else
 		ret = arena_malloc(size + extra, zero);
 
 	if (ret == NULL) {
 		if (extra == 0)
 			return (NULL);
 		/* Try again, this time without extra. */
-		if (alignment != 0) {
-			size_t usize = sa2u(size, alignment, NULL);
-			if (usize == 0)
-				return (NULL);
-			ret = ipalloc(usize, alignment, zero);
-		} else
+		if (alignment != 0)
+			ret = ipalloc(size, alignment, zero);
+		else
 			ret = arena_malloc(size, zero);
 
 		if (ret == NULL)
@@ -2215,9 +2251,9 @@ arena_new(arena_t *arena, unsigned ind)
 {
 	unsigned i;
 	arena_bin_t *bin;
+	size_t prev_run_size;
 
 	arena->ind = ind;
-	arena->nthreads = 0;
 
 	if (malloc_mutex_init(&arena->lock))
 		return (true);
@@ -2251,6 +2287,8 @@ arena_new(arena_t *arena, unsigned ind)
 	arena_avail_tree_new(&arena->runs_avail_dirty);
 
 	/* Initialize bins. */
+	prev_run_size = PAGE_SIZE;
+
 	i = 0;
 #ifdef JEMALLOC_TINY
 	/* (2^n)-spaced tiny bins. */
@@ -2260,6 +2298,11 @@ arena_new(arena_t *arena, unsigned ind)
 			return (true);
 		bin->runcur = NULL;
 		arena_run_tree_new(&bin->runs);
+
+		bin->reg_size = (1U << (LG_TINY_MIN + i));
+
+		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
+
 #ifdef JEMALLOC_STATS
 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
 #endif
@@ -2273,6 +2316,11 @@ arena_new(arena_t *arena, unsigned ind)
 			return (true);
 		bin->runcur = NULL;
 		arena_run_tree_new(&bin->runs);
+
+		bin->reg_size = (i - ntbins + 1) << LG_QUANTUM;
+
+		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
+
 #ifdef JEMALLOC_STATS
 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
 #endif
@@ -2285,6 +2333,12 @@ arena_new(arena_t *arena, unsigned ind)
 			return (true);
 		bin->runcur = NULL;
 		arena_run_tree_new(&bin->runs);
+
+		bin->reg_size = cspace_min + ((i - (ntbins + nqbins)) <<
+		    LG_CACHELINE);
+
+		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
+
 #ifdef JEMALLOC_STATS
 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
 #endif
@@ -2297,6 +2351,12 @@ arena_new(arena_t *arena, unsigned ind)
 			return (true);
 		bin->runcur = NULL;
 		arena_run_tree_new(&bin->runs);
+
+		bin->reg_size = sspace_min + ((i - (ntbins + nqbins + ncbins))
+		    << LG_SUBPAGE);
+
+		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
+
 #ifdef JEMALLOC_STATS
 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
 #endif
@@ -2315,39 +2375,40 @@ small_size2bin_validate(void)
 {
 	size_t i, size, binind;
 
+	assert(small_size2bin[0] == 0xffU);
 	i = 1;
 #  ifdef JEMALLOC_TINY
 	/* Tiny. */
 	for (; i < (1U << LG_TINY_MIN); i++) {
 		size = pow2_ceil(1U << LG_TINY_MIN);
 		binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
-		assert(SMALL_SIZE2BIN(i) == binind);
+		assert(small_size2bin[i] == binind);
 	}
 	for (; i < qspace_min; i++) {
 		size = pow2_ceil(i);
 		binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
-		assert(SMALL_SIZE2BIN(i) == binind);
+		assert(small_size2bin[i] == binind);
 	}
 #  endif
 	/* Quantum-spaced. */
 	for (; i <= qspace_max; i++) {
 		size = QUANTUM_CEILING(i);
 		binind = ntbins + (size >> LG_QUANTUM) - 1;
-		assert(SMALL_SIZE2BIN(i) == binind);
+		assert(small_size2bin[i] == binind);
 	}
 	/* Cacheline-spaced. */
 	for (; i <= cspace_max; i++) {
 		size = CACHELINE_CEILING(i);
 		binind = ntbins + nqbins + ((size - cspace_min) >>
 		    LG_CACHELINE);
-		assert(SMALL_SIZE2BIN(i) == binind);
+		assert(small_size2bin[i] == binind);
 	}
 	/* Sub-page. */
 	for (; i <= sspace_max; i++) {
 		size = SUBPAGE_CEILING(i);
 		binind = ntbins + nqbins + ncbins + ((size - sspace_min)
 		    >> LG_SUBPAGE);
-		assert(SMALL_SIZE2BIN(i) == binind);
+		assert(small_size2bin[i] == binind);
 	}
 }
 #endif
@@ -2358,12 +2419,12 @@ small_size2bin_init(void)
 
 	if (opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT
 	    || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT
-	    || (sizeof(const_small_size2bin) != ((small_maxclass-1) >>
-	    LG_TINY_MIN) + 1))
+	    || sizeof(const_small_size2bin) != small_maxclass + 1)
 		return (small_size2bin_init_hard());
 
 	small_size2bin = const_small_size2bin;
 #ifdef JEMALLOC_DEBUG
+	assert(sizeof(const_small_size2bin) == small_maxclass + 1);
 	small_size2bin_validate();
 #endif
 	return (false);
@@ -2374,52 +2435,49 @@ small_size2bin_init_hard(void)
 {
 	size_t i, size, binind;
 	uint8_t *custom_small_size2bin;
-#define	CUSTOM_SMALL_SIZE2BIN(s)					\
-    custom_small_size2bin[(s-1) >> LG_TINY_MIN]
 
 	assert(opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT
 	    || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT
-	    || (sizeof(const_small_size2bin) != ((small_maxclass-1) >>
-	    LG_TINY_MIN) + 1));
+	    || sizeof(const_small_size2bin) != small_maxclass + 1);
 
-	custom_small_size2bin = (uint8_t *)
-	    base_alloc(small_maxclass >> LG_TINY_MIN);
+	custom_small_size2bin = (uint8_t *)base_alloc(small_maxclass + 1);
 	if (custom_small_size2bin == NULL)
 		return (true);
 
+	custom_small_size2bin[0] = 0xffU;
 	i = 1;
 #ifdef JEMALLOC_TINY
 	/* Tiny. */
-	for (; i < (1U << LG_TINY_MIN); i += TINY_MIN) {
+	for (; i < (1U << LG_TINY_MIN); i++) {
 		size = pow2_ceil(1U << LG_TINY_MIN);
 		binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
-		CUSTOM_SMALL_SIZE2BIN(i) = binind;
+		custom_small_size2bin[i] = binind;
 	}
-	for (; i < qspace_min; i += TINY_MIN) {
+	for (; i < qspace_min; i++) {
 		size = pow2_ceil(i);
 		binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
-		CUSTOM_SMALL_SIZE2BIN(i) = binind;
+		custom_small_size2bin[i] = binind;
 	}
 #endif
 	/* Quantum-spaced. */
-	for (; i <= qspace_max; i += TINY_MIN) {
+	for (; i <= qspace_max; i++) {
 		size = QUANTUM_CEILING(i);
 		binind = ntbins + (size >> LG_QUANTUM) - 1;
-		CUSTOM_SMALL_SIZE2BIN(i) = binind;
+		custom_small_size2bin[i] = binind;
 	}
 	/* Cacheline-spaced. */
-	for (; i <= cspace_max; i += TINY_MIN) {
+	for (; i <= cspace_max; i++) {
 		size = CACHELINE_CEILING(i);
 		binind = ntbins + nqbins + ((size - cspace_min) >>
 		    LG_CACHELINE);
-		CUSTOM_SMALL_SIZE2BIN(i) = binind;
+		custom_small_size2bin[i] = binind;
 	}
 	/* Sub-page. */
-	for (; i <= sspace_max; i += TINY_MIN) {
+	for (; i <= sspace_max; i++) {
 		size = SUBPAGE_CEILING(i);
 		binind = ntbins + nqbins + ncbins + ((size - sspace_min) >>
 		    LG_SUBPAGE);
-		CUSTOM_SMALL_SIZE2BIN(i) = binind;
+		custom_small_size2bin[i] = binind;
 	}
 
 	small_size2bin = custom_small_size2bin;
@@ -2427,190 +2485,6 @@ small_size2bin_init_hard(void)
 	small_size2bin_validate();
 #endif
 	return (false);
-#undef CUSTOM_SMALL_SIZE2BIN
-}
-
-/*
- * Calculate bin_info->run_size such that it meets the following constraints:
- *
- *   *) bin_info->run_size >= min_run_size
- *   *) bin_info->run_size <= arena_maxclass
- *   *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
- *   *) bin_info->nregs <= RUN_MAXREGS
- *
- * bin_info->nregs, bin_info->bitmap_offset, and bin_info->reg0_offset are also
- * calculated here, since these settings are all interdependent.
- */
-static size_t
-bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size)
-{
-	size_t try_run_size, good_run_size;
-	uint32_t try_nregs, good_nregs;
-	uint32_t try_hdr_size, good_hdr_size;
-	uint32_t try_bitmap_offset, good_bitmap_offset;
-#ifdef JEMALLOC_PROF
-	uint32_t try_ctx0_offset, good_ctx0_offset;
-#endif
-	uint32_t try_reg0_offset, good_reg0_offset;
-
-	assert(min_run_size >= PAGE_SIZE);
-	assert(min_run_size <= arena_maxclass);
-
-	/*
-	 * Calculate known-valid settings before entering the run_size
-	 * expansion loop, so that the first part of the loop always copies
-	 * valid settings.
-	 *
-	 * The do..while loop iteratively reduces the number of regions until
-	 * the run header and the regions no longer overlap.  A closed formula
-	 * would be quite messy, since there is an interdependency between the
-	 * header's mask length and the number of regions.
-	 */
-	try_run_size = min_run_size;
-	try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin_info->reg_size)
-	    + 1; /* Counter-act try_nregs-- in loop. */
-	if (try_nregs > RUN_MAXREGS) {
-		try_nregs = RUN_MAXREGS
-		    + 1; /* Counter-act try_nregs-- in loop. */
-	}
-	do {
-		try_nregs--;
-		try_hdr_size = sizeof(arena_run_t);
-		/* Pad to a long boundary. */
-		try_hdr_size = LONG_CEILING(try_hdr_size);
-		try_bitmap_offset = try_hdr_size;
-		/* Add space for bitmap. */
-		try_hdr_size += bitmap_size(try_nregs);
-#ifdef JEMALLOC_PROF
-		if (opt_prof && prof_promote == false) {
-			/* Pad to a quantum boundary. */
-			try_hdr_size = QUANTUM_CEILING(try_hdr_size);
-			try_ctx0_offset = try_hdr_size;
-			/* Add space for one (prof_ctx_t *) per region. */
-			try_hdr_size += try_nregs * sizeof(prof_ctx_t *);
-		} else
-			try_ctx0_offset = 0;
-#endif
-		try_reg0_offset = try_run_size - (try_nregs *
-		    bin_info->reg_size);
-	} while (try_hdr_size > try_reg0_offset);
-
-	/* run_size expansion loop. */
-	do {
-		/*
-		 * Copy valid settings before trying more aggressive settings.
-		 */
-		good_run_size = try_run_size;
-		good_nregs = try_nregs;
-		good_hdr_size = try_hdr_size;
-		good_bitmap_offset = try_bitmap_offset;
-#ifdef JEMALLOC_PROF
-		good_ctx0_offset = try_ctx0_offset;
-#endif
-		good_reg0_offset = try_reg0_offset;
-
-		/* Try more aggressive settings. */
-		try_run_size += PAGE_SIZE;
-		try_nregs = ((try_run_size - sizeof(arena_run_t)) /
-		    bin_info->reg_size)
-		    + 1; /* Counter-act try_nregs-- in loop. */
-		if (try_nregs > RUN_MAXREGS) {
-			try_nregs = RUN_MAXREGS
-			    + 1; /* Counter-act try_nregs-- in loop. */
-		}
-		do {
-			try_nregs--;
-			try_hdr_size = sizeof(arena_run_t);
-			/* Pad to a long boundary. */
-			try_hdr_size = LONG_CEILING(try_hdr_size);
-			try_bitmap_offset = try_hdr_size;
-			/* Add space for bitmap. */
-			try_hdr_size += bitmap_size(try_nregs);
-#ifdef JEMALLOC_PROF
-			if (opt_prof && prof_promote == false) {
-				/* Pad to a quantum boundary. */
-				try_hdr_size = QUANTUM_CEILING(try_hdr_size);
-				try_ctx0_offset = try_hdr_size;
-				/*
-				 * Add space for one (prof_ctx_t *) per region.
-				 */
-				try_hdr_size += try_nregs *
-				    sizeof(prof_ctx_t *);
-			}
-#endif
-			try_reg0_offset = try_run_size - (try_nregs *
-			    bin_info->reg_size);
-		} while (try_hdr_size > try_reg0_offset);
-	} while (try_run_size <= arena_maxclass
-	    && try_run_size <= arena_maxclass
-	    && RUN_MAX_OVRHD * (bin_info->reg_size << 3) > RUN_MAX_OVRHD_RELAX
-	    && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size
-	    && try_nregs < RUN_MAXREGS);
-
-	assert(good_hdr_size <= good_reg0_offset);
-
-	/* Copy final settings. */
-	bin_info->run_size = good_run_size;
-	bin_info->nregs = good_nregs;
-	bin_info->bitmap_offset = good_bitmap_offset;
-#ifdef JEMALLOC_PROF
-	bin_info->ctx0_offset = good_ctx0_offset;
-#endif
-	bin_info->reg0_offset = good_reg0_offset;
-
-	return (good_run_size);
-}
-
-static bool
-bin_info_init(void)
-{
-	arena_bin_info_t *bin_info;
-	unsigned i;
-	size_t prev_run_size;
-
-	arena_bin_info = base_alloc(sizeof(arena_bin_info_t) * nbins);
-	if (arena_bin_info == NULL)
-		return (true);
-
-	prev_run_size = PAGE_SIZE;
-	i = 0;
-#ifdef JEMALLOC_TINY
-	/* (2^n)-spaced tiny bins. */
-	for (; i < ntbins; i++) {
-		bin_info = &arena_bin_info[i];
-		bin_info->reg_size = (1U << (LG_TINY_MIN + i));
-		prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
-		bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
-	}
-#endif
-
-	/* Quantum-spaced bins. */
-	for (; i < ntbins + nqbins; i++) {
-		bin_info = &arena_bin_info[i];
-		bin_info->reg_size = (i - ntbins + 1) << LG_QUANTUM;
-		prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
-		bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
-	}
-
-	/* Cacheline-spaced bins. */
-	for (; i < ntbins + nqbins + ncbins; i++) {
-		bin_info = &arena_bin_info[i];
-		bin_info->reg_size = cspace_min + ((i - (ntbins + nqbins)) <<
-		    LG_CACHELINE);
-		prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
-		bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
-	}
-
-	/* Subpage-spaced bins. */
-	for (; i < nbins; i++) {
-		bin_info = &arena_bin_info[i];
-		bin_info->reg_size = sspace_min + ((i - (ntbins + nqbins +
-		    ncbins)) << LG_SUBPAGE);
-		prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
-		bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
-	}
-
-	return (false);
 }
 
 bool
@@ -2671,6 +2545,9 @@ arena_boot(void)
 	    abort();
 	}
 
+	if (small_size2bin_init())
+		return (true);
+
 	/*
 	 * Compute the header size such that it is large enough to contain the
 	 * page map.  The page map is biased to omit entries for the header
@@ -2694,11 +2571,5 @@ arena_boot(void)
 
 	arena_maxclass = chunksize - (map_bias << PAGE_SHIFT);
 
-	if (small_size2bin_init())
-		return (true);
-
-	if (bin_info_init())
-		return (true);
-
 	return (false);
 }
diff --git a/dep/jemalloc/src/atomic.c b/dep/jemalloc/src/atomic.c
deleted file mode 100644
index 77ee313113b..00000000000
--- a/dep/jemalloc/src/atomic.c
+++ /dev/null
@@ -1,2 +0,0 @@
-#define	JEMALLOC_ATOMIC_C_
-#include "jemalloc/internal/jemalloc_internal.h"
diff --git a/dep/jemalloc/src/bitmap.c b/dep/jemalloc/src/bitmap.c
deleted file mode 100644
index b47e2629093..00000000000
--- a/dep/jemalloc/src/bitmap.c
+++ /dev/null
@@ -1,90 +0,0 @@
-#define JEMALLOC_BITMAP_C_
-#include "jemalloc/internal/jemalloc_internal.h"
-
-/******************************************************************************/
-/* Function prototypes for non-inline static functions. */
-
-static size_t	bits2groups(size_t nbits);
-
-/******************************************************************************/
-
-static size_t
-bits2groups(size_t nbits)
-{
-
-	return ((nbits >> LG_BITMAP_GROUP_NBITS) +
-	    !!(nbits & BITMAP_GROUP_NBITS_MASK));
-}
-
-void
-bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
-{
-	unsigned i;
-	size_t group_count;
-
-	assert(nbits > 0);
-	assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS));
-
-	/*
-	 * Compute the number of groups necessary to store nbits bits, and
-	 * progressively work upward through the levels until reaching a level
-	 * that requires only one group.
-	 */
-	binfo->levels[0].group_offset = 0;
-	group_count = bits2groups(nbits);
-	for (i = 1; group_count > 1; i++) {
-		assert(i < BITMAP_MAX_LEVELS);
-		binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
-		    + group_count;
-		group_count = bits2groups(group_count);
-	}
-	binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
-	    + group_count;
-	binfo->nlevels = i;
-	binfo->nbits = nbits;
-}
-
-size_t
-bitmap_info_ngroups(const bitmap_info_t *binfo)
-{
-
-	return (binfo->levels[binfo->nlevels].group_offset << LG_SIZEOF_BITMAP);
-}
-
-size_t
-bitmap_size(size_t nbits)
-{
-	bitmap_info_t binfo;
-
-	bitmap_info_init(&binfo, nbits);
-	return (bitmap_info_ngroups(&binfo));
-}
-
-void
-bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
-{
-	size_t extra;
-	unsigned i;
-
-	/*
-	 * Bits are actually inverted with regard to the external bitmap
-	 * interface, so the bitmap starts out with all 1 bits, except for
-	 * trailing unused bits (if any).  Note that each group uses bit 0 to
-	 * correspond to the first logical bit in the group, so extra bits
-	 * are the most significant bits of the last group.
-	 */
-	memset(bitmap, 0xffU, binfo->levels[binfo->nlevels].group_offset <<
-	    LG_SIZEOF_BITMAP);
-	extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK))
-	    & BITMAP_GROUP_NBITS_MASK;
-	if (extra != 0)
-		bitmap[binfo->levels[1].group_offset - 1] >>= extra;
-	for (i = 1; i < binfo->nlevels; i++) {
-		size_t group_count = binfo->levels[i].group_offset -
-		    binfo->levels[i-1].group_offset;
-		extra = (BITMAP_GROUP_NBITS - (group_count &
-		    BITMAP_GROUP_NBITS_MASK)) & BITMAP_GROUP_NBITS_MASK;
-		if (extra != 0)
-			bitmap[binfo->levels[i+1].group_offset - 1] >>= extra;
-	}
-}
diff --git a/dep/jemalloc/src/chunk.c b/dep/jemalloc/src/chunk.c
index d190c6f49b3..301519e8042 100644
--- a/dep/jemalloc/src/chunk.c
+++ b/dep/jemalloc/src/chunk.c
@@ -70,7 +70,7 @@ RETURN:
 #ifdef JEMALLOC_IVSALLOC
 	if (base == false && ret != NULL) {
 		if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) {
-			chunk_dealloc(ret, size, true);
+			chunk_dealloc(ret, size);
 			return (NULL);
 		}
 	}
@@ -108,7 +108,7 @@ RETURN:
 }
 
 void
-chunk_dealloc(void *chunk, size_t size, bool unmap)
+chunk_dealloc(void *chunk, size_t size)
 {
 
 	assert(chunk != NULL);
@@ -125,17 +125,15 @@ chunk_dealloc(void *chunk, size_t size, bool unmap)
 	malloc_mutex_unlock(&chunks_mtx);
 #endif
 
-	if (unmap) {
 #ifdef JEMALLOC_SWAP
-		if (swap_enabled && chunk_dealloc_swap(chunk, size) == false)
-			return;
+	if (swap_enabled && chunk_dealloc_swap(chunk, size) == false)
+		return;
 #endif
 #ifdef JEMALLOC_DSS
-		if (chunk_dealloc_dss(chunk, size) == false)
-			return;
+	if (chunk_dealloc_dss(chunk, size) == false)
+		return;
 #endif
-		chunk_dealloc_mmap(chunk, size);
-	}
+	chunk_dealloc_mmap(chunk, size);
 }
 
 bool
diff --git a/dep/jemalloc/src/chunk_mmap.c b/dep/jemalloc/src/chunk_mmap.c
index 164e86e7b38..bc367559774 100644
--- a/dep/jemalloc/src/chunk_mmap.c
+++ b/dep/jemalloc/src/chunk_mmap.c
@@ -206,15 +206,13 @@ chunk_alloc_mmap_internal(size_t size, bool noreserve)
 void *
 chunk_alloc_mmap(size_t size)
 {
-
-	return (chunk_alloc_mmap_internal(size, false));
+	return chunk_alloc_mmap_internal(size, false);
 }
 
 void *
 chunk_alloc_mmap_noreserve(size_t size)
 {
-
-	return (chunk_alloc_mmap_internal(size, true));
+	return chunk_alloc_mmap_internal(size, true);
 }
 
 void
diff --git a/dep/jemalloc/src/ckh.c b/dep/jemalloc/src/ckh.c
index 43fcc25239d..682a8db65bf 100644
--- a/dep/jemalloc/src/ckh.c
+++ b/dep/jemalloc/src/ckh.c
@@ -34,7 +34,7 @@
  * respectively.
  *
  ******************************************************************************/
-#define	JEMALLOC_CKH_C_
+#define	CKH_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
 /******************************************************************************/
@@ -73,7 +73,7 @@ ckh_isearch(ckh_t *ckh, const void *key)
 	size_t hash1, hash2, bucket, cell;
 
 	assert(ckh != NULL);
-	dassert(ckh->magic == CKH_MAGIC);
+	assert(ckh->magic = CKH_MAGIG);
 
 	ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2);
 
@@ -262,15 +262,9 @@ ckh_grow(ckh_t *ckh)
 	lg_prevbuckets = ckh->lg_curbuckets;
 	lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS;
 	while (true) {
-		size_t usize;
-
 		lg_curcells++;
-		usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL);
-		if (usize == 0) {
-			ret = true;
-			goto RETURN;
-		}
-		tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
+		tab = (ckhc_t *)ipalloc(sizeof(ckhc_t) << lg_curcells,
+		    ZU(1) << LG_CACHELINE, true);
 		if (tab == NULL) {
 			ret = true;
 			goto RETURN;
@@ -301,7 +295,7 @@ static void
 ckh_shrink(ckh_t *ckh)
 {
 	ckhc_t *tab, *ttab;
-	size_t lg_curcells, usize;
+	size_t lg_curcells;
 	unsigned lg_prevbuckets;
 
 	/*
@@ -310,10 +304,8 @@ ckh_shrink(ckh_t *ckh)
 	 */
 	lg_prevbuckets = ckh->lg_curbuckets;
 	lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1;
-	usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL);
-	if (usize == 0)
-		return;
-	tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
+	tab = (ckhc_t *)ipalloc(sizeof(ckhc_t) << lg_curcells,
+	    ZU(1) << LG_CACHELINE, true);
 	if (tab == NULL) {
 		/*
 		 * An OOM error isn't worth propagating, since it doesn't
@@ -348,7 +340,7 @@ bool
 ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp)
 {
 	bool ret;
-	size_t mincells, usize;
+	size_t mincells;
 	unsigned lg_mincells;
 
 	assert(minitems > 0);
@@ -383,19 +375,15 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp)
 	ckh->hash = hash;
 	ckh->keycomp = keycomp;
 
-	usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE, NULL);
-	if (usize == 0) {
-		ret = true;
-		goto RETURN;
-	}
-	ckh->tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
+	ckh->tab = (ckhc_t *)ipalloc(sizeof(ckhc_t) << lg_mincells,
+	    (ZU(1) << LG_CACHELINE), true);
 	if (ckh->tab == NULL) {
 		ret = true;
 		goto RETURN;
 	}
 
 #ifdef JEMALLOC_DEBUG
-	ckh->magic = CKH_MAGIC;
+	ckh->magic = CKH_MAGIG;
 #endif
 
 	ret = false;
@@ -408,7 +396,7 @@ ckh_delete(ckh_t *ckh)
 {
 
 	assert(ckh != NULL);
-	dassert(ckh->magic == CKH_MAGIC);
+	assert(ckh->magic = CKH_MAGIG);
 
 #ifdef CKH_VERBOSE
 	malloc_printf(
@@ -433,7 +421,7 @@ ckh_count(ckh_t *ckh)
 {
 
 	assert(ckh != NULL);
-	dassert(ckh->magic == CKH_MAGIC);
+	assert(ckh->magic = CKH_MAGIG);
 
 	return (ckh->count);
 }
@@ -464,7 +452,7 @@ ckh_insert(ckh_t *ckh, const void *key, const void *data)
 	bool ret;
 
 	assert(ckh != NULL);
-	dassert(ckh->magic == CKH_MAGIC);
+	assert(ckh->magic = CKH_MAGIG);
 	assert(ckh_search(ckh, key, NULL, NULL));
 
 #ifdef CKH_COUNT
@@ -489,7 +477,7 @@ ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data)
 	size_t cell;
 
 	assert(ckh != NULL);
-	dassert(ckh->magic == CKH_MAGIC);
+	assert(ckh->magic = CKH_MAGIG);
 
 	cell = ckh_isearch(ckh, searchkey);
 	if (cell != SIZE_T_MAX) {
@@ -521,7 +509,7 @@ ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data)
 	size_t cell;
 
 	assert(ckh != NULL);
-	dassert(ckh->magic == CKH_MAGIC);
+	assert(ckh->magic = CKH_MAGIG);
 
 	cell = ckh_isearch(ckh, searchkey);
 	if (cell != SIZE_T_MAX) {
@@ -556,7 +544,7 @@ ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2)
 	} else {
 		ret1 = h;
 		ret2 = hash(key, strlen((const char *)key),
-		    0x8432a476666bbc13LLU);
+		    0x8432a476666bbc13U);
 	}
 
 	*hash1 = ret1;
diff --git a/dep/jemalloc/src/ctl.c b/dep/jemalloc/src/ctl.c
index e5336d36949..3c8adab90a3 100644
--- a/dep/jemalloc/src/ctl.c
+++ b/dep/jemalloc/src/ctl.c
@@ -182,7 +182,6 @@ CTL_PROTO(stats_arenas_i_lruns_j_highruns)
 CTL_PROTO(stats_arenas_i_lruns_j_curruns)
 INDEX_PROTO(stats_arenas_i_lruns_j)
 #endif
-CTL_PROTO(stats_arenas_i_nthreads)
 CTL_PROTO(stats_arenas_i_pactive)
 CTL_PROTO(stats_arenas_i_pdirty)
 #ifdef JEMALLOC_STATS
@@ -193,7 +192,6 @@ CTL_PROTO(stats_arenas_i_purged)
 #endif
 INDEX_PROTO(stats_arenas_i)
 #ifdef JEMALLOC_STATS
-CTL_PROTO(stats_cactive)
 CTL_PROTO(stats_allocated)
 CTL_PROTO(stats_active)
 CTL_PROTO(stats_mapped)
@@ -436,7 +434,6 @@ static const ctl_node_t stats_arenas_i_lruns_node[] = {
 #endif
 
 static const ctl_node_t stats_arenas_i_node[] = {
-	{NAME("nthreads"),		CTL(stats_arenas_i_nthreads)},
 	{NAME("pactive"),		CTL(stats_arenas_i_pactive)},
 	{NAME("pdirty"),		CTL(stats_arenas_i_pdirty)}
 #ifdef JEMALLOC_STATS
@@ -461,7 +458,6 @@ static const ctl_node_t stats_arenas_node[] = {
 
 static const ctl_node_t stats_node[] = {
 #ifdef JEMALLOC_STATS
-	{NAME("cactive"),		CTL(stats_cactive)},
 	{NAME("allocated"),		CTL(stats_allocated)},
 	{NAME("active"),		CTL(stats_active)},
 	{NAME("mapped"),		CTL(stats_mapped)},
@@ -624,7 +620,6 @@ ctl_arena_refresh(arena_t *arena, unsigned i)
 
 	ctl_arena_clear(astats);
 
-	sstats->nthreads += astats->nthreads;
 #ifdef JEMALLOC_STATS
 	ctl_arena_stats_amerge(astats, arena);
 	/* Merge into sum stats as well. */
@@ -662,17 +657,10 @@ ctl_refresh(void)
 	 * Clear sum stats, since they will be merged into by
 	 * ctl_arena_refresh().
 	 */
-	ctl_stats.arenas[narenas].nthreads = 0;
 	ctl_arena_clear(&ctl_stats.arenas[narenas]);
 
 	malloc_mutex_lock(&arenas_lock);
 	memcpy(tarenas, arenas, sizeof(arena_t *) * narenas);
-	for (i = 0; i < narenas; i++) {
-		if (arenas[i] != NULL)
-			ctl_stats.arenas[i].nthreads = arenas[i]->nthreads;
-		else
-			ctl_stats.arenas[i].nthreads = 0;
-	}
 	malloc_mutex_unlock(&arenas_lock);
 	for (i = 0; i < narenas; i++) {
 		bool initialized = (tarenas[i] != NULL);
@@ -1126,8 +1114,8 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	unsigned newind, oldind;
 
 	newind = oldind = choose_arena()->ind;
-	WRITE(newind, unsigned);
-	READ(oldind, unsigned);
+	WRITE(oldind, unsigned);
+	READ(newind, unsigned);
 	if (newind != oldind) {
 		arena_t *arena;
 
@@ -1141,8 +1129,6 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 		malloc_mutex_lock(&arenas_lock);
 		if ((arena = arenas[newind]) == NULL)
 			arena = arenas_extend(newind);
-		arenas[oldind]->nthreads--;
-		arenas[newind]->nthreads++;
 		malloc_mutex_unlock(&arenas_lock);
 		if (arena == NULL) {
 			ret = EAGAIN;
@@ -1151,13 +1137,6 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 
 		/* Set new arena association. */
 		ARENA_SET(arena);
-#ifdef JEMALLOC_TCACHE
-		{
-			tcache_t *tcache = TCACHE_GET();
-			if (tcache != NULL)
-				tcache->arena = arena;
-		}
-#endif
 	}
 
 	ret = 0;
@@ -1167,9 +1146,9 @@ RETURN:
 
 #ifdef JEMALLOC_STATS
 CTL_RO_NL_GEN(thread_allocated, ALLOCATED_GET(), uint64_t);
-CTL_RO_NL_GEN(thread_allocatedp, ALLOCATEDP_GET(), uint64_t *);
+CTL_RO_NL_GEN(thread_allocatedp, &ALLOCATED_GET(), uint64_t *);
 CTL_RO_NL_GEN(thread_deallocated, DEALLOCATED_GET(), uint64_t);
-CTL_RO_NL_GEN(thread_deallocatedp, DEALLOCATEDP_GET(), uint64_t *);
+CTL_RO_NL_GEN(thread_deallocatedp, &DEALLOCATED_GET(), uint64_t *);
 #endif
 
 /******************************************************************************/
@@ -1305,9 +1284,9 @@ CTL_RO_NL_GEN(opt_overcommit, opt_overcommit, bool)
 
 /******************************************************************************/
 
-CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t)
-CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t)
-CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t)
+CTL_RO_NL_GEN(arenas_bin_i_size, arenas[0]->bins[mib[2]].reg_size, size_t)
+CTL_RO_NL_GEN(arenas_bin_i_nregs, arenas[0]->bins[mib[2]].nregs, uint32_t)
+CTL_RO_NL_GEN(arenas_bin_i_run_size, arenas[0]->bins[mib[2]].run_size, size_t)
 const ctl_node_t *
 arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i)
 {
@@ -1552,7 +1531,6 @@ stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j)
 }
 
 #endif
-CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned)
 CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t)
 CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t)
 #ifdef JEMALLOC_STATS
@@ -1584,7 +1562,6 @@ RETURN:
 }
 
 #ifdef JEMALLOC_STATS
-CTL_RO_GEN(stats_cactive, &stats_cactive, size_t *)
 CTL_RO_GEN(stats_allocated, ctl_stats.allocated, size_t)
 CTL_RO_GEN(stats_active, ctl_stats.active, size_t)
 CTL_RO_GEN(stats_mapped, ctl_stats.mapped, size_t)
diff --git a/dep/jemalloc/src/hash.c b/dep/jemalloc/src/hash.c
index cfa4da0275c..6a13d7a03c0 100644
--- a/dep/jemalloc/src/hash.c
+++ b/dep/jemalloc/src/hash.c
@@ -1,2 +1,2 @@
-#define	JEMALLOC_HASH_C_
+#define	HASH_C_
 #include "jemalloc/internal/jemalloc_internal.h"
diff --git a/dep/jemalloc/src/huge.c b/dep/jemalloc/src/huge.c
index a4f9b054ed5..0aadc4339a9 100644
--- a/dep/jemalloc/src/huge.c
+++ b/dep/jemalloc/src/huge.c
@@ -50,7 +50,6 @@ huge_malloc(size_t size, bool zero)
 	malloc_mutex_lock(&huge_mtx);
 	extent_tree_ad_insert(&huge, node);
 #ifdef JEMALLOC_STATS
-	stats_cactive_add(csize);
 	huge_nmalloc++;
 	huge_allocated += csize;
 #endif
@@ -84,7 +83,7 @@ huge_palloc(size_t size, size_t alignment, bool zero)
 	 * alignment, in order to assure the alignment can be achieved, then
 	 * unmap leading and trailing chunks.
 	 */
-	assert(alignment > chunksize);
+	assert(alignment >= chunksize);
 
 	chunk_size = CHUNK_CEILING(size);
 
@@ -110,12 +109,12 @@ huge_palloc(size_t size, size_t alignment, bool zero)
 	if (offset == 0) {
 		/* Trim trailing space. */
 		chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
-		    - chunk_size, true);
+		    - chunk_size);
 	} else {
 		size_t trailsize;
 
 		/* Trim leading space. */
-		chunk_dealloc(ret, alignment - offset, true);
+		chunk_dealloc(ret, alignment - offset);
 
 		ret = (void *)((uintptr_t)ret + (alignment - offset));
 
@@ -124,7 +123,7 @@ huge_palloc(size_t size, size_t alignment, bool zero)
 		    /* Trim trailing space. */
 		    assert(trailsize < alloc_size);
 		    chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
-			trailsize, true);
+			trailsize);
 		}
 	}
 
@@ -135,7 +134,6 @@ huge_palloc(size_t size, size_t alignment, bool zero)
 	malloc_mutex_lock(&huge_mtx);
 	extent_tree_ad_insert(&huge, node);
 #ifdef JEMALLOC_STATS
-	stats_cactive_add(chunk_size);
 	huge_nmalloc++;
 	huge_allocated += chunk_size;
 #endif
@@ -194,7 +192,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
 	 * different size class.  In that case, fall back to allocating new
 	 * space and copying.
 	 */
-	if (alignment > chunksize)
+	if (alignment != 0)
 		ret = huge_palloc(size + extra, alignment, zero);
 	else
 		ret = huge_malloc(size + extra, zero);
@@ -203,7 +201,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
 		if (extra == 0)
 			return (NULL);
 		/* Try again, this time without extra. */
-		if (alignment > chunksize)
+		if (alignment != 0)
 			ret = huge_palloc(size, alignment, zero);
 		else
 			ret = huge_malloc(size, zero);
@@ -234,13 +232,6 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
 	    ) {
 		size_t newsize = huge_salloc(ret);
 
-		/*
-		 * Remove ptr from the tree of huge allocations before
-		 * performing the remap operation, in order to avoid the
-		 * possibility of another thread acquiring that mapping before
-		 * this one removes it from the tree.
-		 */
-		huge_dalloc(ptr, false);
 		if (mremap(ptr, oldsize, newsize, MREMAP_MAYMOVE|MREMAP_FIXED,
 		    ret) == MAP_FAILED) {
 			/*
@@ -260,8 +251,9 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
 			if (opt_abort)
 				abort();
 			memcpy(ret, ptr, copysize);
-			chunk_dealloc_mmap(ptr, oldsize);
-		}
+			idalloc(ptr);
+		} else
+			huge_dalloc(ptr, false);
 	} else
 #endif
 	{
@@ -286,7 +278,6 @@ huge_dalloc(void *ptr, bool unmap)
 	extent_tree_ad_remove(&huge, node);
 
 #ifdef JEMALLOC_STATS
-	stats_cactive_sub(node->size);
 	huge_ndalloc++;
 	huge_allocated -= node->size;
 #endif
@@ -301,10 +292,9 @@ huge_dalloc(void *ptr, bool unmap)
 			memset(node->addr, 0x5a, node->size);
 #endif
 #endif
+		chunk_dealloc(node->addr, node->size);
 	}
 
-	chunk_dealloc(node->addr, node->size, unmap);
-
 	base_node_dealloc(node);
 }
 
diff --git a/dep/jemalloc/src/jemalloc.c b/dep/jemalloc/src/jemalloc.c
index a161c2e26e1..2aebc51dd19 100644
--- a/dep/jemalloc/src/jemalloc.c
+++ b/dep/jemalloc/src/jemalloc.c
@@ -7,10 +7,12 @@
 malloc_mutex_t		arenas_lock;
 arena_t			**arenas;
 unsigned		narenas;
+static unsigned		next_arena;
 
-pthread_key_t		arenas_tsd;
 #ifndef NO_TLS
 __thread arena_t	*arenas_tls JEMALLOC_ATTR(tls_model("initial-exec"));
+#else
+pthread_key_t		arenas_tsd;
 #endif
 
 #ifdef JEMALLOC_STATS
@@ -28,13 +30,7 @@ static bool		malloc_initialized = false;
 static pthread_t	malloc_initializer = (unsigned long)0;
 
 /* Used to avoid initialization races. */
-static malloc_mutex_t	init_lock =
-#ifdef JEMALLOC_OSSPIN
-    0
-#else
-    MALLOC_MUTEX_INITIALIZER
-#endif
-    ;
+static malloc_mutex_t	init_lock = MALLOC_MUTEX_INITIALIZER;
 
 #ifdef DYNAMIC_PAGE_SHIFT
 size_t		pagesize;
@@ -74,7 +70,6 @@ size_t	opt_narenas = 0;
 static void	wrtmessage(void *cbopaque, const char *s);
 static void	stats_print_atexit(void);
 static unsigned	malloc_ncpus(void);
-static void	arenas_cleanup(void *arg);
 #if (defined(JEMALLOC_STATS) && defined(NO_TLS))
 static void	thread_allocated_cleanup(void *arg);
 #endif
@@ -84,7 +79,6 @@ static void	malloc_conf_error(const char *msg, const char *k, size_t klen,
     const char *v, size_t vlen);
 static void	malloc_conf_init(void);
 static bool	malloc_init_hard(void);
-static int	imemalign(void **memptr, size_t alignment, size_t size);
 
 /******************************************************************************/
 /* malloc_message() setup. */
@@ -153,53 +147,13 @@ choose_arena_hard(void)
 	arena_t *ret;
 
 	if (narenas > 1) {
-		unsigned i, choose, first_null;
-
-		choose = 0;
-		first_null = narenas;
 		malloc_mutex_lock(&arenas_lock);
-		assert(arenas[0] != NULL);
-		for (i = 1; i < narenas; i++) {
-			if (arenas[i] != NULL) {
-				/*
-				 * Choose the first arena that has the lowest
-				 * number of threads assigned to it.
-				 */
-				if (arenas[i]->nthreads <
-				    arenas[choose]->nthreads)
-					choose = i;
-			} else if (first_null == narenas) {
-				/*
-				 * Record the index of the first uninitialized
-				 * arena, in case all extant arenas are in use.
-				 *
-				 * NB: It is possible for there to be
-				 * discontinuities in terms of initialized
-				 * versus uninitialized arenas, due to the
-				 * "thread.arena" mallctl.
-				 */
-				first_null = i;
-			}
-		}
-
-		if (arenas[choose] == 0 || first_null == narenas) {
-			/*
-			 * Use an unloaded arena, or the least loaded arena if
-			 * all arenas are already initialized.
-			 */
-			ret = arenas[choose];
-		} else {
-			/* Initialize a new arena. */
-			ret = arenas_extend(first_null);
-		}
-		ret->nthreads++;
+		if ((ret = arenas[next_arena]) == NULL)
+			ret = arenas_extend(next_arena);
+		next_arena = (next_arena + 1) % narenas;
 		malloc_mutex_unlock(&arenas_lock);
-	} else {
+	} else
 		ret = arenas[0];
-		malloc_mutex_lock(&arenas_lock);
-		ret->nthreads++;
-		malloc_mutex_unlock(&arenas_lock);
-	}
 
 	ARENA_SET(ret);
 
@@ -259,28 +213,6 @@ stats_print_atexit(void)
 	JEMALLOC_P(malloc_stats_print)(NULL, NULL, NULL);
 }
 
-#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
-thread_allocated_t *
-thread_allocated_get_hard(void)
-{
-	thread_allocated_t *thread_allocated = (thread_allocated_t *)
-	    imalloc(sizeof(thread_allocated_t));
-	if (thread_allocated == NULL) {
-		static thread_allocated_t static_thread_allocated = {0, 0};
-		malloc_write("<jemalloc>: Error allocating TSD;"
-		    " mallctl(\"thread.{de,}allocated[p]\", ...)"
-		    " will be inaccurate\n");
-		if (opt_abort)
-			abort();
-		return (&static_thread_allocated);
-	}
-	pthread_setspecific(thread_allocated_tsd, thread_allocated);
-	thread_allocated->allocated = 0;
-	thread_allocated->deallocated = 0;
-	return (thread_allocated);
-}
-#endif
-
 /*
  * End miscellaneous support functions.
  */
@@ -305,16 +237,6 @@ malloc_ncpus(void)
 	return (ret);
 }
 
-static void
-arenas_cleanup(void *arg)
-{
-	arena_t *arena = (arena_t *)arg;
-
-	malloc_mutex_lock(&arenas_lock);
-	arena->nthreads--;
-	malloc_mutex_unlock(&arenas_lock);
-}
-
 #if (defined(JEMALLOC_STATS) && defined(NO_TLS))
 static void
 thread_allocated_cleanup(void *arg)
@@ -499,8 +421,8 @@ malloc_conf_init(void)
 			if ((opts = getenv(envname)) != NULL) {
 				/*
 				 * Do nothing; opts is already initialized to
-				 * the value of the MALLOC_CONF environment
-				 * variable.
+				 * the value of the JEMALLOC_OPTIONS
+				 * environment variable.
 				 */
 			} else {
 				/* No configuration specified. */
@@ -689,7 +611,7 @@ malloc_init_hard(void)
 
 		result = sysconf(_SC_PAGESIZE);
 		assert(result != -1);
-		pagesize = (size_t)result;
+		pagesize = (unsigned)result;
 
 		/*
 		 * We assume that pagesize is a power of 2 when calculating
@@ -749,10 +671,7 @@ malloc_init_hard(void)
 	}
 
 #ifdef JEMALLOC_TCACHE
-	if (tcache_boot()) {
-		malloc_mutex_unlock(&init_lock);
-		return (true);
-	}
+	tcache_boot();
 #endif
 
 	if (huge_boot()) {
@@ -769,14 +688,6 @@ malloc_init_hard(void)
 	}
 #endif
 
-	if (malloc_mutex_init(&arenas_lock))
-		return (true);
-
-	if (pthread_key_create(&arenas_tsd, arenas_cleanup) != 0) {
-		malloc_mutex_unlock(&init_lock);
-		return (true);
-	}
-
 	/*
 	 * Create enough scaffolding to allow recursive allocation in
 	 * malloc_ncpus().
@@ -801,7 +712,8 @@ malloc_init_hard(void)
 	 * threaded mode.
 	 */
 	ARENA_SET(arenas[0]);
-	arenas[0]->nthreads++;
+
+	malloc_mutex_init(&arenas_lock);
 
 #ifdef JEMALLOC_PROF
 	if (prof_boot2()) {
@@ -841,6 +753,15 @@ malloc_init_hard(void)
 		malloc_write(")\n");
 	}
 
+	next_arena = (narenas > 0) ? 1 : 0;
+
+#ifdef NO_TLS
+	if (pthread_key_create(&arenas_tsd, NULL) != 0) {
+		malloc_mutex_unlock(&init_lock);
+		return (true);
+	}
+#endif
+
 	/* Allocate and initialize arenas. */
 	arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
 	if (arenas == NULL) {
@@ -872,6 +793,7 @@ malloc_init_hard(void)
 	return (false);
 }
 
+
 #ifdef JEMALLOC_ZONE
 JEMALLOC_ATTR(constructor)
 void
@@ -940,8 +862,7 @@ JEMALLOC_P(malloc)(size_t size)
 #ifdef JEMALLOC_PROF
 	if (opt_prof) {
 		usize = s2u(size);
-		PROF_ALLOC_PREP(1, usize, cnt);
-		if (cnt == NULL) {
+		if ((cnt = prof_alloc_prep(usize)) == NULL) {
 			ret = NULL;
 			goto OOM;
 		}
@@ -990,23 +911,19 @@ RETURN:
 }
 
 JEMALLOC_ATTR(nonnull(1))
-#ifdef JEMALLOC_PROF
-/*
- * Avoid any uncertainty as to how many backtrace frames to ignore in 
- * PROF_ALLOC_PREP().
- */
-JEMALLOC_ATTR(noinline)
-#endif
-static int
-imemalign(void **memptr, size_t alignment, size_t size)
+JEMALLOC_ATTR(visibility("default"))
+int
+JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
 {
 	int ret;
+	void *result;
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
 	size_t usize
-#ifdef JEMALLOC_CC_SILENCE
+#  ifdef JEMALLOC_CC_SILENCE
 	    = 0
-#endif
+#  endif
 	    ;
-	void *result;
+#endif
 #ifdef JEMALLOC_PROF
 	prof_thr_cnt_t *cnt
 #  ifdef JEMALLOC_CC_SILENCE
@@ -1056,38 +973,34 @@ imemalign(void **memptr, size_t alignment, size_t size)
 			goto RETURN;
 		}
 
-		usize = sa2u(size, alignment, NULL);
-		if (usize == 0) {
-			result = NULL;
-			ret = ENOMEM;
-			goto RETURN;
-		}
-
 #ifdef JEMALLOC_PROF
 		if (opt_prof) {
-			PROF_ALLOC_PREP(2, usize, cnt);
-			if (cnt == NULL) {
+			usize = sa2u(size, alignment, NULL);
+			if ((cnt = prof_alloc_prep(usize)) == NULL) {
 				result = NULL;
 				ret = EINVAL;
 			} else {
 				if (prof_promote && (uintptr_t)cnt !=
 				    (uintptr_t)1U && usize <= small_maxclass) {
-					assert(sa2u(small_maxclass+1,
-					    alignment, NULL) != 0);
-					result = ipalloc(sa2u(small_maxclass+1,
-					    alignment, NULL), alignment, false);
+					result = ipalloc(small_maxclass+1,
+					    alignment, false);
 					if (result != NULL) {
 						arena_prof_promoted(result,
 						    usize);
 					}
 				} else {
-					result = ipalloc(usize, alignment,
+					result = ipalloc(size, alignment,
 					    false);
 				}
 			}
 		} else
 #endif
-			result = ipalloc(usize, alignment, false);
+		{
+#ifdef JEMALLOC_STATS
+			usize = sa2u(size, alignment, NULL);
+#endif
+			result = ipalloc(size, alignment, false);
+		}
 	}
 
 	if (result == NULL) {
@@ -1119,15 +1032,6 @@ RETURN:
 	return (ret);
 }
 
-JEMALLOC_ATTR(nonnull(1))
-JEMALLOC_ATTR(visibility("default"))
-int
-JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
-{
-
-	return imemalign(memptr, alignment, size);
-}
-
 JEMALLOC_ATTR(malloc)
 JEMALLOC_ATTR(visibility("default"))
 void *
@@ -1183,8 +1087,7 @@ JEMALLOC_P(calloc)(size_t num, size_t size)
 #ifdef JEMALLOC_PROF
 	if (opt_prof) {
 		usize = s2u(num_size);
-		PROF_ALLOC_PREP(1, usize, cnt);
-		if (cnt == NULL) {
+		if ((cnt = prof_alloc_prep(usize)) == NULL) {
 			ret = NULL;
 			goto RETURN;
 		}
@@ -1297,9 +1200,7 @@ JEMALLOC_P(realloc)(void *ptr, size_t size)
 		if (opt_prof) {
 			usize = s2u(size);
 			old_ctx = prof_ctx_get(ptr);
-			PROF_ALLOC_PREP(1, usize, cnt);
-			if (cnt == NULL) {
-				old_ctx = NULL;
+			if ((cnt = prof_alloc_prep(usize)) == NULL) {
 				ret = NULL;
 				goto OOM;
 			}
@@ -1309,13 +1210,8 @@ JEMALLOC_P(realloc)(void *ptr, size_t size)
 				    false, false);
 				if (ret != NULL)
 					arena_prof_promoted(ret, usize);
-				else
-					old_ctx = NULL;
-			} else {
+			} else
 				ret = iralloc(ptr, size, 0, 0, false, false);
-				if (ret == NULL)
-					old_ctx = NULL;
-			}
 		} else
 #endif
 		{
@@ -1353,8 +1249,7 @@ OOM:
 #ifdef JEMALLOC_PROF
 			if (opt_prof) {
 				usize = s2u(size);
-				PROF_ALLOC_PREP(1, usize, cnt);
-				if (cnt == NULL)
+				if ((cnt = prof_alloc_prep(usize)) == NULL)
 					ret = NULL;
 				else {
 					if (prof_promote && (uintptr_t)cnt !=
@@ -1459,7 +1354,7 @@ JEMALLOC_P(memalign)(size_t alignment, size_t size)
 #ifdef JEMALLOC_CC_SILENCE
 	int result =
 #endif
-	    imemalign(&ret, alignment, size);
+	    JEMALLOC_P(posix_memalign)(&ret, alignment, size);
 #ifdef JEMALLOC_CC_SILENCE
 	if (result != 0)
 		return (NULL);
@@ -1478,7 +1373,7 @@ JEMALLOC_P(valloc)(size_t size)
 #ifdef JEMALLOC_CC_SILENCE
 	int result =
 #endif
-	    imemalign(&ret, PAGE_SIZE, size);
+	    JEMALLOC_P(posix_memalign)(&ret, PAGE_SIZE, size);
 #ifdef JEMALLOC_CC_SILENCE
 	if (result != 0)
 		return (NULL);
@@ -1559,18 +1454,15 @@ JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp,
 }
 
 JEMALLOC_INLINE void *
-iallocm(size_t usize, size_t alignment, bool zero)
+iallocm(size_t size, size_t alignment, bool zero)
 {
 
-	assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize, alignment,
-	    NULL)));
-
 	if (alignment != 0)
-		return (ipalloc(usize, alignment, zero));
+		return (ipalloc(size, alignment, zero));
 	else if (zero)
-		return (icalloc(usize));
+		return (icalloc(size));
 	else
-		return (imalloc(usize));
+		return (imalloc(size));
 }
 
 JEMALLOC_ATTR(nonnull(1))
@@ -1593,43 +1485,38 @@ JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags)
 	if (malloc_init())
 		goto OOM;
 
-	usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, NULL);
-	if (usize == 0)
-		goto OOM;
-
 #ifdef JEMALLOC_PROF
 	if (opt_prof) {
-		PROF_ALLOC_PREP(1, usize, cnt);
-		if (cnt == NULL)
+		usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment,
+		    NULL);
+		if ((cnt = prof_alloc_prep(usize)) == NULL)
 			goto OOM;
 		if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <=
 		    small_maxclass) {
-			size_t usize_promoted = (alignment == 0) ?
-			    s2u(small_maxclass+1) : sa2u(small_maxclass+1,
-			    alignment, NULL);
-			assert(usize_promoted != 0);
-			p = iallocm(usize_promoted, alignment, zero);
+			p = iallocm(small_maxclass+1, alignment, zero);
 			if (p == NULL)
 				goto OOM;
 			arena_prof_promoted(p, usize);
 		} else {
-			p = iallocm(usize, alignment, zero);
+			p = iallocm(size, alignment, zero);
 			if (p == NULL)
 				goto OOM;
 		}
-		prof_malloc(p, usize, cnt);
+
 		if (rsize != NULL)
 			*rsize = usize;
 	} else
 #endif
 	{
-		p = iallocm(usize, alignment, zero);
+		p = iallocm(size, alignment, zero);
 		if (p == NULL)
 			goto OOM;
 #ifndef JEMALLOC_STATS
 		if (rsize != NULL)
 #endif
 		{
+			usize = (alignment == 0) ? s2u(size) : sa2u(size,
+			    alignment, NULL);
 #ifdef JEMALLOC_STATS
 			if (rsize != NULL)
 #endif
@@ -1672,6 +1559,7 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra,
 	bool no_move = flags & ALLOCM_NO_MOVE;
 #ifdef JEMALLOC_PROF
 	prof_thr_cnt_t *cnt;
+	prof_ctx_t *old_ctx;
 #endif
 
 	assert(ptr != NULL);
@@ -1686,33 +1574,25 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra,
 		/*
 		 * usize isn't knowable before iralloc() returns when extra is
 		 * non-zero.  Therefore, compute its maximum possible value and
-		 * use that in PROF_ALLOC_PREP() to decide whether to capture a
+		 * use that in prof_alloc_prep() to decide whether to capture a
 		 * backtrace.  prof_realloc() will use the actual usize to
 		 * decide whether to sample.
 		 */
 		size_t max_usize = (alignment == 0) ? s2u(size+extra) :
 		    sa2u(size+extra, alignment, NULL);
-		prof_ctx_t *old_ctx = prof_ctx_get(p);
 		old_size = isalloc(p);
-		PROF_ALLOC_PREP(1, max_usize, cnt);
-		if (cnt == NULL)
+		old_ctx = prof_ctx_get(p);
+		if ((cnt = prof_alloc_prep(max_usize)) == NULL)
 			goto OOM;
-		/*
-		 * Use minimum usize to determine whether promotion may happen.
-		 */
-		if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U
-		    && ((alignment == 0) ? s2u(size) : sa2u(size,
-		    alignment, NULL)) <= small_maxclass) {
+		if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && max_usize
+		    <= small_maxclass) {
 			q = iralloc(p, small_maxclass+1, (small_maxclass+1 >=
 			    size+extra) ? 0 : size+extra - (small_maxclass+1),
 			    alignment, zero, no_move);
 			if (q == NULL)
 				goto ERR;
-			if (max_usize < PAGE_SIZE) {
-				usize = max_usize;
-				arena_prof_promoted(q, usize);
-			} else
-				usize = isalloc(q);
+			usize = isalloc(q);
+			arena_prof_promoted(q, usize);
 		} else {
 			q = iralloc(p, size, extra, alignment, zero, no_move);
 			if (q == NULL)
@@ -1720,8 +1600,6 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra,
 			usize = isalloc(q);
 		}
 		prof_realloc(q, usize, cnt, old_size, old_ctx);
-		if (rsize != NULL)
-			*rsize = usize;
 	} else
 #endif
 	{
diff --git a/dep/jemalloc/src/mb.c b/dep/jemalloc/src/mb.c
index dc2c0a256fd..30a1a2e997a 100644
--- a/dep/jemalloc/src/mb.c
+++ b/dep/jemalloc/src/mb.c
@@ -1,2 +1,2 @@
-#define	JEMALLOC_MB_C_
+#define	MB_C_
 #include "jemalloc/internal/jemalloc_internal.h"
diff --git a/dep/jemalloc/src/mutex.c b/dep/jemalloc/src/mutex.c
index ca89ef1c962..3ecb18a340e 100644
--- a/dep/jemalloc/src/mutex.c
+++ b/dep/jemalloc/src/mutex.c
@@ -55,9 +55,6 @@ pthread_create(pthread_t *__restrict thread,
 bool
 malloc_mutex_init(malloc_mutex_t *mutex)
 {
-#ifdef JEMALLOC_OSSPIN
-	*mutex = 0;
-#else
 	pthread_mutexattr_t attr;
 
 	if (pthread_mutexattr_init(&attr) != 0)
@@ -73,7 +70,6 @@ malloc_mutex_init(malloc_mutex_t *mutex)
 	}
 	pthread_mutexattr_destroy(&attr);
 
-#endif
 	return (false);
 }
 
@@ -81,10 +77,8 @@ void
 malloc_mutex_destroy(malloc_mutex_t *mutex)
 {
 
-#ifndef JEMALLOC_OSSPIN
 	if (pthread_mutex_destroy(mutex) != 0) {
 		malloc_write("<jemalloc>: Error in pthread_mutex_destroy()\n");
 		abort();
 	}
-#endif
 }
diff --git a/dep/jemalloc/src/prof.c b/dep/jemalloc/src/prof.c
index 8a144b4e46c..636cccef52a 100644
--- a/dep/jemalloc/src/prof.c
+++ b/dep/jemalloc/src/prof.c
@@ -3,15 +3,15 @@
 #ifdef JEMALLOC_PROF
 /******************************************************************************/
 
+#ifdef JEMALLOC_PROF_LIBGCC
+#include <unwind.h>
+#endif
+
 #ifdef JEMALLOC_PROF_LIBUNWIND
 #define	UNW_LOCAL_ONLY
 #include <libunwind.h>
 #endif
 
-#ifdef JEMALLOC_PROF_LIBGCC
-#include <unwind.h>
-#endif
-
 /******************************************************************************/
 /* Data. */
 
@@ -169,7 +169,39 @@ prof_leave(void)
 		prof_gdump();
 }
 
-#ifdef JEMALLOC_PROF_LIBUNWIND
+#ifdef JEMALLOC_PROF_LIBGCC
+static _Unwind_Reason_Code
+prof_unwind_init_callback(struct _Unwind_Context *context, void *arg)
+{
+
+	return (_URC_NO_REASON);
+}
+
+static _Unwind_Reason_Code
+prof_unwind_callback(struct _Unwind_Context *context, void *arg)
+{
+	prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
+
+	if (data->nignore > 0)
+		data->nignore--;
+	else {
+		data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context);
+		data->bt->len++;
+		if (data->bt->len == data->max)
+			return (_URC_END_OF_STACK);
+	}
+
+	return (_URC_NO_REASON);
+}
+
+void
+prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
+{
+	prof_unwind_data_t data = {bt, nignore, max};
+
+	_Unwind_Backtrace(prof_unwind_callback, &data);
+}
+#elif defined(JEMALLOC_PROF_LIBUNWIND)
 void
 prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
 {
@@ -204,41 +236,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
 			break;
 	}
 }
-#endif
-#ifdef JEMALLOC_PROF_LIBGCC
-static _Unwind_Reason_Code
-prof_unwind_init_callback(struct _Unwind_Context *context, void *arg)
-{
-
-	return (_URC_NO_REASON);
-}
-
-static _Unwind_Reason_Code
-prof_unwind_callback(struct _Unwind_Context *context, void *arg)
-{
-	prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
-
-	if (data->nignore > 0)
-		data->nignore--;
-	else {
-		data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context);
-		data->bt->len++;
-		if (data->bt->len == data->max)
-			return (_URC_END_OF_STACK);
-	}
-
-	return (_URC_NO_REASON);
-}
-
-void
-prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
-{
-	prof_unwind_data_t data = {bt, nignore, max};
-
-	_Unwind_Backtrace(prof_unwind_callback, &data);
-}
-#endif
-#ifdef JEMALLOC_PROF_GCC
+#else
 void
 prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
 {
@@ -434,7 +432,6 @@ prof_lookup(prof_bt_t *bt)
 			prof_ctx_t	*p;
 			void		*v;
 		} ctx;
-		bool new_ctx;
 
 		/*
 		 * This thread's cache lacks bt.  Look for it in the global
@@ -471,26 +468,12 @@ prof_lookup(prof_bt_t *bt)
 				idalloc(ctx.v);
 				return (NULL);
 			}
-			/*
-			 * Artificially raise curobjs, in order to avoid a race
-			 * condition with prof_ctx_merge()/prof_ctx_destroy().
-			 *
-			 * No locking is necessary for ctx here because no other
-			 * threads have had the opportunity to fetch it from
-			 * bt2ctx yet.
-			 */
-			ctx.p->cnt_merged.curobjs++;
-			new_ctx = true;
-		} else {
-			/*
-			 * Artificially raise curobjs, in order to avoid a race
-			 * condition with prof_ctx_merge()/prof_ctx_destroy().
-			 */
-			malloc_mutex_lock(&ctx.p->lock);
-			ctx.p->cnt_merged.curobjs++;
-			malloc_mutex_unlock(&ctx.p->lock);
-			new_ctx = false;
 		}
+		/*
+		 * Acquire ctx's lock before releasing bt2ctx_mtx, in order to
+		 * avoid a race condition with prof_ctx_destroy().
+		 */
+		malloc_mutex_lock(&ctx.p->lock);
 		prof_leave();
 
 		/* Link a prof_thd_cnt_t into ctx for this thread. */
@@ -503,9 +486,8 @@ prof_lookup(prof_bt_t *bt)
 			 */
 			ret.p = ql_last(&prof_tdata->lru_ql, lru_link);
 			assert(ret.v != NULL);
-			if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt,
-			    NULL, NULL))
-				assert(false);
+			ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, NULL,
+			    NULL);
 			ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
 			prof_ctx_merge(ret.p->ctx, ret.p);
 			/* ret can now be re-used. */
@@ -516,8 +498,7 @@ prof_lookup(prof_bt_t *bt)
 			/* Allocate and partially initialize a new cnt. */
 			ret.v = imalloc(sizeof(prof_thr_cnt_t));
 			if (ret.p == NULL) {
-				if (new_ctx)
-					prof_ctx_destroy(ctx.p);
+				malloc_mutex_unlock(&ctx.p->lock);
 				return (NULL);
 			}
 			ql_elm_new(ret.p, cnts_link);
@@ -528,15 +509,12 @@ prof_lookup(prof_bt_t *bt)
 		ret.p->epoch = 0;
 		memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
 		if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) {
-			if (new_ctx)
-				prof_ctx_destroy(ctx.p);
+			malloc_mutex_unlock(&ctx.p->lock);
 			idalloc(ret.v);
 			return (NULL);
 		}
 		ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
-		malloc_mutex_lock(&ctx.p->lock);
 		ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link);
-		ctx.p->cnt_merged.curobjs--;
 		malloc_mutex_unlock(&ctx.p->lock);
 	} else {
 		/* Move ret to the front of the LRU. */
@@ -650,10 +628,11 @@ prof_ctx_destroy(prof_ctx_t *ctx)
 
 	/*
 	 * Check that ctx is still unused by any thread cache before destroying
-	 * it.  prof_lookup() artificially raises ctx->cnt_merge.curobjs in
-	 * order to avoid a race condition with this function, as does
-	 * prof_ctx_merge() in order to avoid a race between the main body of
-	 * prof_ctx_merge() and entry into this function.
+	 * it.  prof_lookup() interlocks bt2ctx_mtx and ctx->lock in order to
+	 * avoid a race condition with this function, and prof_ctx_merge()
+	 * artificially raises ctx->cnt_merged.curobjs in order to avoid a race
+	 * between the main body of prof_ctx_merge() and entry into this
+	 * function.
 	 */
 	prof_enter();
 	malloc_mutex_lock(&ctx->lock);
@@ -662,8 +641,7 @@ prof_ctx_destroy(prof_ctx_t *ctx)
 		assert(ctx->cnt_merged.accumobjs == 0);
 		assert(ctx->cnt_merged.accumbytes == 0);
 		/* Remove ctx from bt2ctx. */
-		if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL))
-			assert(false);
+		ckh_remove(&bt2ctx, ctx->bt, NULL, NULL);
 		prof_leave();
 		/* Destroy ctx. */
 		malloc_mutex_unlock(&ctx->lock);
@@ -671,10 +649,7 @@ prof_ctx_destroy(prof_ctx_t *ctx)
 		malloc_mutex_destroy(&ctx->lock);
 		idalloc(ctx);
 	} else {
-		/*
-		 * Compensate for increment in prof_ctx_merge() or
-		 * prof_lookup().
-		 */
+		/* Compensate for increment in prof_ctx_merge(). */
 		ctx->cnt_merged.curobjs--;
 		malloc_mutex_unlock(&ctx->lock);
 		prof_leave();
@@ -1081,7 +1056,7 @@ prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2)
 	} else {
 		ret1 = h;
 		ret2 = hash(bt->vec, bt->len * sizeof(void *),
-		    0x8432a476666bbc13LLU);
+		    0x8432a476666bbc13U);
 	}
 
 	*hash1 = ret1;
@@ -1118,6 +1093,7 @@ prof_tdata_init(void)
 
 	prof_tdata->vec = imalloc(sizeof(void *) * prof_bt_max);
 	if (prof_tdata->vec == NULL) {
+
 		ckh_delete(&prof_tdata->bt2cnt);
 		idalloc(prof_tdata);
 		return (NULL);
@@ -1135,26 +1111,33 @@ prof_tdata_init(void)
 static void
 prof_tdata_cleanup(void *arg)
 {
-	prof_thr_cnt_t *cnt;
-	prof_tdata_t *prof_tdata = (prof_tdata_t *)arg;
+	prof_tdata_t *prof_tdata;
 
-	/*
-	 * Delete the hash table.  All of its contents can still be iterated
-	 * over via the LRU.
-	 */
-	ckh_delete(&prof_tdata->bt2cnt);
+	prof_tdata = PROF_TCACHE_GET();
+	if (prof_tdata != NULL) {
+		prof_thr_cnt_t *cnt;
 
-	/* Iteratively merge cnt's into the global stats and delete them. */
-	while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) {
-		ql_remove(&prof_tdata->lru_ql, cnt, lru_link);
-		prof_ctx_merge(cnt->ctx, cnt);
-		idalloc(cnt);
-	}
+		/*
+		 * Delete the hash table.  All of its contents can still be
+		 * iterated over via the LRU.
+		 */
+		ckh_delete(&prof_tdata->bt2cnt);
 
-	idalloc(prof_tdata->vec);
+		/*
+		 * Iteratively merge cnt's into the global stats and delete
+		 * them.
+		 */
+		while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) {
+			prof_ctx_merge(cnt->ctx, cnt);
+			ql_remove(&prof_tdata->lru_ql, cnt, lru_link);
+			idalloc(cnt);
+		}
 
-	idalloc(prof_tdata);
-	PROF_TCACHE_SET(NULL);
+		idalloc(prof_tdata->vec);
+
+		idalloc(prof_tdata);
+		PROF_TCACHE_SET(NULL);
+	}
 }
 
 void
diff --git a/dep/jemalloc/src/rtree.c b/dep/jemalloc/src/rtree.c
index eb0ff1e24af..7753743c5e6 100644
--- a/dep/jemalloc/src/rtree.c
+++ b/dep/jemalloc/src/rtree.c
@@ -1,4 +1,4 @@
-#define	JEMALLOC_RTREE_C_
+#define	RTREE_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
 rtree_t *
@@ -20,10 +20,7 @@ rtree_new(unsigned bits)
 	memset(ret, 0, offsetof(rtree_t, level2bits) + (sizeof(unsigned) *
 	    height));
 
-	if (malloc_mutex_init(&ret->mutex)) {
-		/* Leak the rtree. */
-		return (NULL);
-	}
+	malloc_mutex_init(&ret->mutex);
 	ret->height = height;
 	if (bits_per_level * height > bits)
 		ret->level2bits[0] = bits % bits_per_level;
diff --git a/dep/jemalloc/src/stats.c b/dep/jemalloc/src/stats.c
index dc172e425c0..3dfe0d232a6 100644
--- a/dep/jemalloc/src/stats.c
+++ b/dep/jemalloc/src/stats.c
@@ -39,10 +39,6 @@
 
 bool	opt_stats_print = false;
 
-#ifdef JEMALLOC_STATS
-size_t	stats_cactive = 0;
-#endif
-
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
@@ -323,7 +319,6 @@ static void
 stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
     unsigned i)
 {
-	unsigned nthreads;
 	size_t pagesize, pactive, pdirty, mapped;
 	uint64_t npurge, nmadvise, purged;
 	size_t small_allocated;
@@ -333,9 +328,6 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 
 	CTL_GET("arenas.pagesize", &pagesize, size_t);
 
-	CTL_I_GET("stats.arenas.0.nthreads", &nthreads, unsigned);
-	malloc_cprintf(write_cb, cbopaque,
-	    "assigned threads: %u\n", nthreads);
 	CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t);
 	CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t);
 	CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t);
@@ -677,26 +669,21 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 #ifdef JEMALLOC_STATS
 	{
 		int err;
-		size_t sszp, ssz;
-		size_t *cactive;
+		size_t ssz;
 		size_t allocated, active, mapped;
 		size_t chunks_current, chunks_high, swap_avail;
 		uint64_t chunks_total;
 		size_t huge_allocated;
 		uint64_t huge_nmalloc, huge_ndalloc;
 
-		sszp = sizeof(size_t *);
 		ssz = sizeof(size_t);
 
-		CTL_GET("stats.cactive", &cactive, size_t *);
 		CTL_GET("stats.allocated", &allocated, size_t);
 		CTL_GET("stats.active", &active, size_t);
 		CTL_GET("stats.mapped", &mapped, size_t);
 		malloc_cprintf(write_cb, cbopaque,
-		    "Allocated: %zu, active: %zu, mapped: %zu\n",
-		    allocated, active, mapped);
-		malloc_cprintf(write_cb, cbopaque,
-		    "Current active ceiling: %zu\n", atomic_read_z(cactive));
+		    "Allocated: %zu, active: %zu, mapped: %zu\n", allocated,
+		    active, mapped);
 
 		/* Print chunk stats. */
 		CTL_GET("stats.chunks.total", &chunks_total, uint64_t);
@@ -748,7 +735,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 						ninitialized++;
 				}
 
-				if (ninitialized > 1 || unmerged == false) {
+				if (ninitialized > 1) {
 					/* Print merged arena stats. */
 					malloc_cprintf(write_cb, cbopaque,
 					    "\nMerged arenas stats:\n");
diff --git a/dep/jemalloc/src/tcache.c b/dep/jemalloc/src/tcache.c
index 31c329e1613..cbbe7a113a9 100644
--- a/dep/jemalloc/src/tcache.c
+++ b/dep/jemalloc/src/tcache.c
@@ -8,9 +8,6 @@ bool	opt_tcache = true;
 ssize_t	opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
 ssize_t	opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;
 
-tcache_bin_info_t	*tcache_bin_info;
-static unsigned		stack_nelms; /* Total stack elms per tcache. */
-
 /* Map of thread-specific caches. */
 #ifndef NO_TLS
 __thread tcache_t	*tcache_tls JEMALLOC_ATTR(tls_model("initial-exec"));
@@ -58,19 +55,18 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
 #endif
     )
 {
-	void *ptr;
+	void *flush, *deferred, *ptr;
 	unsigned i, nflush, ndeferred;
-#ifdef JEMALLOC_STATS
-	bool merged_stats = false;
-#endif
+	bool first_pass;
 
 	assert(binind < nbins);
 	assert(rem <= tbin->ncached);
+	assert(tbin->ncached > 0 || tbin->avail == NULL);
 
-	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
+	for (flush = tbin->avail, nflush = tbin->ncached - rem, first_pass =
+	    true; flush != NULL; flush = deferred, nflush = ndeferred) {
 		/* Lock the arena bin associated with the first object. */
-		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
-		    tbin->avail[0]);
+		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(flush);
 		arena_t *arena = chunk->arena;
 		arena_bin_t *bin = &arena->bins[binind];
 
@@ -86,17 +82,17 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
 		malloc_mutex_lock(&bin->lock);
 #ifdef JEMALLOC_STATS
 		if (arena == tcache->arena) {
-			assert(merged_stats == false);
-			merged_stats = true;
 			bin->stats.nflushes++;
 			bin->stats.nrequests += tbin->tstats.nrequests;
 			tbin->tstats.nrequests = 0;
 		}
 #endif
+		deferred = NULL;
 		ndeferred = 0;
 		for (i = 0; i < nflush; i++) {
-			ptr = tbin->avail[i];
+			ptr = flush;
 			assert(ptr != NULL);
+			flush = *(void **)ptr;
 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 			if (chunk->arena == arena) {
 				size_t pageind = ((uintptr_t)ptr -
@@ -111,31 +107,21 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
 				 * locked.  Stash the object, so that it can be
 				 * handled in a future pass.
 				 */
-				tbin->avail[ndeferred] = ptr;
+				*(void **)ptr = deferred;
+				deferred = ptr;
 				ndeferred++;
 			}
 		}
 		malloc_mutex_unlock(&bin->lock);
+
+		if (first_pass) {
+			tbin->avail = flush;
+			first_pass = false;
+		}
 	}
-#ifdef JEMALLOC_STATS
-	if (merged_stats == false) {
-		/*
-		 * The flush loop didn't happen to flush to this thread's
-		 * arena, so the stats didn't get merged.  Manually do so now.
-		 */
-		arena_bin_t *bin = &tcache->arena->bins[binind];
-		malloc_mutex_lock(&bin->lock);
-		bin->stats.nflushes++;
-		bin->stats.nrequests += tbin->tstats.nrequests;
-		tbin->tstats.nrequests = 0;
-		malloc_mutex_unlock(&bin->lock);
-	}
-#endif
 
-	memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
-	    rem * sizeof(void *));
 	tbin->ncached = rem;
-	if ((int)tbin->ncached < tbin->low_water)
+	if (tbin->ncached < tbin->low_water)
 		tbin->low_water = tbin->ncached;
 }
 
@@ -146,19 +132,18 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
 #endif
     )
 {
-	void *ptr;
+	void *flush, *deferred, *ptr;
 	unsigned i, nflush, ndeferred;
-#ifdef JEMALLOC_STATS
-	bool merged_stats = false;
-#endif
+	bool first_pass;
 
 	assert(binind < nhbins);
 	assert(rem <= tbin->ncached);
+	assert(tbin->ncached > 0 || tbin->avail == NULL);
 
-	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
+	for (flush = tbin->avail, nflush = tbin->ncached - rem, first_pass =
+	    true; flush != NULL; flush = deferred, nflush = ndeferred) {
 		/* Lock the arena associated with the first object. */
-		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
-		    tbin->avail[0]);
+		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(flush);
 		arena_t *arena = chunk->arena;
 
 		malloc_mutex_lock(&arena->lock);
@@ -170,7 +155,6 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
 			tcache->prof_accumbytes = 0;
 #endif
 #ifdef JEMALLOC_STATS
-			merged_stats = true;
 			arena->stats.nrequests_large += tbin->tstats.nrequests;
 			arena->stats.lstats[binind - nbins].nrequests +=
 			    tbin->tstats.nrequests;
@@ -179,10 +163,12 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
 #if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
 		}
 #endif
+		deferred = NULL;
 		ndeferred = 0;
 		for (i = 0; i < nflush; i++) {
-			ptr = tbin->avail[i];
+			ptr = flush;
 			assert(ptr != NULL);
+			flush = *(void **)ptr;
 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 			if (chunk->arena == arena)
 				arena_dalloc_large(arena, chunk, ptr);
@@ -193,32 +179,21 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
 				 * Stash the object, so that it can be handled
 				 * in a future pass.
 				 */
-				tbin->avail[ndeferred] = ptr;
+				*(void **)ptr = deferred;
+				deferred = ptr;
 				ndeferred++;
 			}
 		}
 		malloc_mutex_unlock(&arena->lock);
+
+		if (first_pass) {
+			tbin->avail = flush;
+			first_pass = false;
+		}
 	}
-#ifdef JEMALLOC_STATS
-	if (merged_stats == false) {
-		/*
-		 * The flush loop didn't happen to flush to this thread's
-		 * arena, so the stats didn't get merged.  Manually do so now.
-		 */
-		arena_t *arena = tcache->arena;
-		malloc_mutex_lock(&arena->lock);
-		arena->stats.nrequests_large += tbin->tstats.nrequests;
-		arena->stats.lstats[binind - nbins].nrequests +=
-		    tbin->tstats.nrequests;
-		tbin->tstats.nrequests = 0;
-		malloc_mutex_unlock(&arena->lock);
-	}
-#endif
 
-	memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
-	    rem * sizeof(void *));
 	tbin->ncached = rem;
-	if ((int)tbin->ncached < tbin->low_water)
+	if (tbin->ncached < tbin->low_water)
 		tbin->low_water = tbin->ncached;
 }
 
@@ -226,14 +201,10 @@ tcache_t *
 tcache_create(arena_t *arena)
 {
 	tcache_t *tcache;
-	size_t size, stack_offset;
+	size_t size;
 	unsigned i;
 
 	size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins);
-	/* Naturally align the pointer stacks. */
-	size = PTR_CEILING(size);
-	stack_offset = size;
-	size += stack_nelms * sizeof(void *);
 	/*
 	 * Round up to the nearest multiple of the cacheline size, in order to
 	 * avoid the possibility of false cacheline sharing.
@@ -246,8 +217,6 @@ tcache_create(arena_t *arena)
 
 	if (size <= small_maxclass)
 		tcache = (tcache_t *)arena_malloc_small(arena, size, true);
-	else if (size <= tcache_maxclass)
-		tcache = (tcache_t *)arena_malloc_large(arena, size, true);
 	else
 		tcache = (tcache_t *)icalloc(size);
 
@@ -264,12 +233,15 @@ tcache_create(arena_t *arena)
 
 	tcache->arena = arena;
 	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
-	for (i = 0; i < nhbins; i++) {
-		tcache->tbins[i].lg_fill_div = 1;
-		tcache->tbins[i].avail = (void **)((uintptr_t)tcache +
-		    (uintptr_t)stack_offset);
-		stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
+	for (i = 0; i < nbins; i++) {
+		if ((arena->bins[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) {
+			tcache->tbins[i].ncached_max = (arena->bins[i].nregs <<
+			    1);
+		} else
+			tcache->tbins[i].ncached_max = TCACHE_NSLOTS_SMALL_MAX;
 	}
+	for (; i < nhbins; i++)
+		tcache->tbins[i].ncached_max = TCACHE_NSLOTS_LARGE;
 
 	TCACHE_SET(tcache);
 
@@ -280,7 +252,6 @@ void
 tcache_destroy(tcache_t *tcache)
 {
 	unsigned i;
-	size_t tcache_size;
 
 #ifdef JEMALLOC_STATS
 	/* Unlink from list of extant tcaches. */
@@ -337,8 +308,7 @@ tcache_destroy(tcache_t *tcache)
 	}
 #endif
 
-	tcache_size = arena_salloc(tcache);
-	if (tcache_size <= small_maxclass) {
+	if (arena_salloc(tcache) <= small_maxclass) {
 		arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
 		arena_t *arena = chunk->arena;
 		size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >>
@@ -352,13 +322,6 @@ tcache_destroy(tcache_t *tcache)
 		malloc_mutex_lock(&bin->lock);
 		arena_dalloc_bin(arena, chunk, tcache, mapelm);
 		malloc_mutex_unlock(&bin->lock);
-	} else if (tcache_size <= tcache_maxclass) {
-		arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
-		arena_t *arena = chunk->arena;
-
-		malloc_mutex_lock(&arena->lock);
-		arena_dalloc_large(arena, chunk, tcache);
-		malloc_mutex_unlock(&arena->lock);
 	} else
 		idalloc(tcache);
 }
@@ -415,13 +378,11 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena)
 }
 #endif
 
-bool
+void
 tcache_boot(void)
 {
 
 	if (opt_tcache) {
-		unsigned i;
-
 		/*
 		 * If necessary, clamp opt_lg_tcache_max, now that
 		 * small_maxclass and arena_maxclass are known.
@@ -436,28 +397,6 @@ tcache_boot(void)
 
 		nhbins = nbins + (tcache_maxclass >> PAGE_SHIFT);
 
-		/* Initialize tcache_bin_info. */
-		tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins *
-		    sizeof(tcache_bin_info_t));
-		if (tcache_bin_info == NULL)
-			return (true);
-		stack_nelms = 0;
-		for (i = 0; i < nbins; i++) {
-			if ((arena_bin_info[i].nregs << 1) <=
-			    TCACHE_NSLOTS_SMALL_MAX) {
-				tcache_bin_info[i].ncached_max =
-				    (arena_bin_info[i].nregs << 1);
-			} else {
-				tcache_bin_info[i].ncached_max =
-				    TCACHE_NSLOTS_SMALL_MAX;
-			}
-			stack_nelms += tcache_bin_info[i].ncached_max;
-		}
-		for (; i < nhbins; i++) {
-			tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE;
-			stack_nelms += tcache_bin_info[i].ncached_max;
-		}
-
 		/* Compute incremental GC event threshold. */
 		if (opt_lg_tcache_gc_sweep >= 0) {
 			tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) /
@@ -473,8 +412,6 @@ tcache_boot(void)
 			abort();
 		}
 	}
-
-	return (false);
 }
 /******************************************************************************/
 #endif /* JEMALLOC_TCACHE */
author	click <click@gonnamakeyou.com>	2012-04-23 20:23:30 +0200
committer	click <click@gonnamakeyou.com>	2012-04-23 20:23:30 +0200
commit	c4123289916daa7bd1c7feb191e8c647fd17b163 (patch)
tree	d6235b61f7ead417757273184acb27a252a96bc5 /dep/jemalloc
parent	5da5021464c649d84c755a921eae43519eba8567 (diff)