Revert "DEP: Updated Jemalloc to Version 2.5" - this version of the jemalloc-library is crashy at best, and should not have been pushed.

Further investigations on why this occurs is required before it will be slammed into master. This reverts commit 126fd13e5d6b57dc0c8830248d44db504c7d103f.
author: click <click@gonnamakeyou.com> 2012-04-23 20:23:30 +0200
committer: click <click@gonnamakeyou.com> 2012-04-23 20:23:30 +0200
commit: c4123289916daa7bd1c7feb191e8c647fd17b163 (patch)
tree: d6235b61f7ead417757273184acb27a252a96bc5 /dep/jemalloc/src
parent: 5da5021464c649d84c755a921eae43519eba8567 (diff)
16 files changed, 517 insertions, 1011 deletions
diff --git a/dep/jemalloc/src/arena.c b/dep/jemalloc/src/arena.c
index d166ca1ec4d..7f939b3cd77 100644
--- a/dep/jemalloc/src/arena.c
+++ b/dep/jemalloc/src/arena.c
@@ -8,7 +8,6 @@ size_t	opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT;
 size_t	opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT;
 ssize_t		opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT;
 uint8_t const	*small_size2bin;
-arena_bin_info_t	*arena_bin_info;
 
 /* Various bin-related settings. */
 unsigned	nqbins;
@@ -26,27 +25,26 @@ size_t		mspace_mask;
 
 /*
  * const_small_size2bin is a static constant lookup table that in the common
- * case can be used as-is for small_size2bin.
+ * case can be used as-is for small_size2bin.  For dynamically linked programs,
+ * this avoids a page of memory overhead per process.
  */
-#if (LG_TINY_MIN == 2)
-#define	S2B_4(i)	i,
+#define	S2B_1(i)	i,
+#define	S2B_2(i)	S2B_1(i) S2B_1(i)
+#define	S2B_4(i)	S2B_2(i) S2B_2(i)
 #define	S2B_8(i)	S2B_4(i) S2B_4(i)
-#elif (LG_TINY_MIN == 3)
-#define	S2B_8(i)	i,
-#else
-#  error "Unsupported LG_TINY_MIN"
-#endif
 #define	S2B_16(i)	S2B_8(i) S2B_8(i)
 #define	S2B_32(i)	S2B_16(i) S2B_16(i)
 #define	S2B_64(i)	S2B_32(i) S2B_32(i)
 #define	S2B_128(i)	S2B_64(i) S2B_64(i)
 #define	S2B_256(i)	S2B_128(i) S2B_128(i)
 /*
- * The number of elements in const_small_size2bin is dependent on the
- * definition for SUBPAGE.
+ * The number of elements in const_small_size2bin is dependent on page size
+ * and on the definition for SUBPAGE.  If SUBPAGE changes, the '- 255' must also
+ * change, along with the addition/removal of static lookup table element
+ * definitions.
  */
-static JEMALLOC_ATTR(aligned(CACHELINE))
-    const uint8_t	const_small_size2bin[] = {
+static const uint8_t	const_small_size2bin[STATIC_PAGE_SIZE - 255] = {
+	S2B_1(0xffU)		/*    0 */
 #if (LG_QUANTUM == 4)
 /* 16-byte quantum **********************/
 #  ifdef JEMALLOC_TINY
@@ -175,6 +173,7 @@ static void	arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk,
     arena_run_t *run, size_t oldsize, size_t newsize, bool dirty);
 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
 static void	*arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
+static size_t	arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
 static void	arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
     arena_bin_t *bin);
 static void	arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk,
@@ -192,9 +191,6 @@ static bool	small_size2bin_init(void);
 static void	small_size2bin_validate(void);
 #endif
 static bool	small_size2bin_init_hard(void);
-static size_t	bin_info_run_size_calc(arena_bin_info_t *bin_info,
-    size_t min_run_size);
-static bool	bin_info_init(void);
 
 /******************************************************************************/
 
@@ -250,48 +246,57 @@ rb_gen(static JEMALLOC_ATTR(unused), arena_avail_tree_, arena_avail_tree_t,
     arena_chunk_map_t, u.rb_link, arena_avail_comp)
 
 static inline void *
-arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info)
+arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
 {
 	void *ret;
-	unsigned regind;
-	bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
-	    (uintptr_t)bin_info->bitmap_offset);
 
-	dassert(run->magic == ARENA_RUN_MAGIC);
+	assert(run->magic == ARENA_RUN_MAGIC);
 	assert(run->nfree > 0);
-	assert(bitmap_full(bitmap, &bin_info->bitmap_info) == false);
 
-	regind = bitmap_sfu(bitmap, &bin_info->bitmap_info);
-	ret = (void *)((uintptr_t)run + (uintptr_t)bin_info->reg0_offset +
-	    (uintptr_t)(bin_info->reg_size * regind));
 	run->nfree--;
-	if (regind == run->nextind)
-		run->nextind++;
-	assert(regind < run->nextind);
+	ret = run->avail;
+	if (ret != NULL) {
+		/* Double free can cause assertion failure.*/
+		assert(ret != NULL);
+		/* Write-after free can cause assertion failure. */
+		assert((uintptr_t)ret >= (uintptr_t)run +
+		    (uintptr_t)bin->reg0_offset);
+		assert((uintptr_t)ret < (uintptr_t)run->next);
+		assert(((uintptr_t)ret - ((uintptr_t)run +
+		    (uintptr_t)bin->reg0_offset)) % (uintptr_t)bin->reg_size ==
+		    0);
+		run->avail = *(void **)ret;
+		return (ret);
+	}
+	ret = run->next;
+	run->next = (void *)((uintptr_t)ret + (uintptr_t)bin->reg_size);
+	assert(ret != NULL);
 	return (ret);
 }
 
 static inline void
 arena_run_reg_dalloc(arena_run_t *run, void *ptr)
 {
-	arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
-	size_t binind = arena_bin_index(chunk->arena, run->bin);
-	arena_bin_info_t *bin_info = &arena_bin_info[binind];
-	unsigned regind = arena_run_regind(run, bin_info, ptr);
-	bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
-	    (uintptr_t)bin_info->bitmap_offset);
-
-	assert(run->nfree < bin_info->nregs);
+
+	assert(run->nfree < run->bin->nregs);
 	/* Freeing an interior pointer can cause assertion failure. */
 	assert(((uintptr_t)ptr - ((uintptr_t)run +
-	    (uintptr_t)bin_info->reg0_offset)) % (uintptr_t)bin_info->reg_size
+	    (uintptr_t)run->bin->reg0_offset)) % (uintptr_t)run->bin->reg_size
 	    == 0);
+	/*
+	 * Freeing a pointer lower than region zero can cause assertion
+	 * failure.
+	 */
 	assert((uintptr_t)ptr >= (uintptr_t)run +
-	    (uintptr_t)bin_info->reg0_offset);
-	/* Freeing an unallocated pointer can cause assertion failure. */
-	assert(bitmap_get(bitmap, &bin_info->bitmap_info, regind));
+	    (uintptr_t)run->bin->reg0_offset);
+	/*
+	 * Freeing a pointer past in the run's frontier can cause assertion
+	 * failure.
+	 */
+	assert((uintptr_t)ptr < (uintptr_t)run->next);
 
-	bitmap_unset(bitmap, &bin_info->bitmap_info, regind);
+	*(void **)ptr = run->avail;
+	run->avail = ptr;
 	run->nfree++;
 }
 
@@ -315,9 +320,6 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
 	size_t old_ndirty, run_ind, total_pages, need_pages, rem_pages, i;
 	size_t flag_dirty;
 	arena_avail_tree_t *runs_avail;
-#ifdef JEMALLOC_STATS
-	size_t cactive_diff;
-#endif
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
 	old_ndirty = chunk->ndirty;
@@ -336,13 +338,6 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
 	rem_pages = total_pages - need_pages;
 
 	arena_avail_tree_remove(runs_avail, &chunk->map[run_ind-map_bias]);
-#ifdef JEMALLOC_STATS
-	/* Update stats_cactive if nactive is crossing a chunk multiple. */
-	cactive_diff = CHUNK_CEILING((arena->nactive + need_pages) <<
-	    PAGE_SHIFT) - CHUNK_CEILING(arena->nactive << PAGE_SHIFT);
-	if (cactive_diff != 0)
-		stats_cactive_add(cactive_diff);
-#endif
 	arena->nactive += need_pages;
 
 	/* Keep track of trailing unused pages for later use. */
@@ -569,7 +564,7 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
 			arena->ndirty -= spare->ndirty;
 		}
 		malloc_mutex_unlock(&arena->lock);
-		chunk_dealloc((void *)spare, chunksize, true);
+		chunk_dealloc((void *)spare, chunksize);
 		malloc_mutex_lock(&arena->lock);
 #ifdef JEMALLOC_STATS
 		arena->stats.mapped -= chunksize;
@@ -730,9 +725,6 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
 			assert(pageind + npages <= chunk_npages);
 			if (mapelm->bits & CHUNK_MAP_DIRTY) {
 				size_t i;
-#ifdef JEMALLOC_STATS
-				size_t cactive_diff;
-#endif
 
 				arena_avail_tree_remove(
 				    &arena->runs_avail_dirty, mapelm);
@@ -755,17 +747,6 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
 					    CHUNK_MAP_ALLOCATED;
 				}
 
-#ifdef JEMALLOC_STATS
-				/*
-				 * Update stats_cactive if nactive is crossing a
-				 * chunk multiple.
-				 */
-				cactive_diff = CHUNK_CEILING((arena->nactive +
-				    npages) << PAGE_SHIFT) -
-				    CHUNK_CEILING(arena->nactive << PAGE_SHIFT);
-				if (cactive_diff != 0)
-					stats_cactive_add(cactive_diff);
-#endif
 				arena->nactive += npages;
 				/* Append to list for later processing. */
 				ql_elm_new(mapelm, u.ql_link);
@@ -782,12 +763,8 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
 				    chunk + (uintptr_t)(pageind << PAGE_SHIFT));
 
 				assert((mapelm->bits >> PAGE_SHIFT) == 0);
-				dassert(run->magic == ARENA_RUN_MAGIC);
-				size_t binind = arena_bin_index(arena,
-				    run->bin);
-				arena_bin_info_t *bin_info =
-				    &arena_bin_info[binind];
-				pageind += bin_info->run_size >> PAGE_SHIFT;
+				assert(run->magic == ARENA_RUN_MAGIC);
+				pageind += run->bin->run_size >> PAGE_SHIFT;
 			}
 		}
 	}
@@ -868,10 +845,9 @@ arena_purge(arena_t *arena, bool all)
 	}
 	assert(ndirty == arena->ndirty);
 #endif
-	assert(arena->ndirty > arena->npurgatory || all);
-	assert(arena->ndirty - arena->npurgatory > chunk_npages || all);
-	assert((arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty -
-	    arena->npurgatory) || all);
+	assert(arena->ndirty > arena->npurgatory);
+	assert(arena->ndirty > chunk_npages || all);
+	assert((arena->nactive >> opt_lg_dirty_mult) < arena->ndirty || all);
 
 #ifdef JEMALLOC_STATS
 	arena->stats.npurge++;
@@ -883,10 +859,8 @@ arena_purge(arena_t *arena, bool all)
 	 * multiple threads from racing to reduce ndirty below the threshold.
 	 */
 	npurgatory = arena->ndirty - arena->npurgatory;
-	if (all == false) {
-		assert(npurgatory >= arena->nactive >> opt_lg_dirty_mult);
+	if (all == false)
 		npurgatory -= arena->nactive >> opt_lg_dirty_mult;
-	}
 	arena->npurgatory += npurgatory;
 
 	while (npurgatory > 0) {
@@ -957,9 +931,6 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
 	arena_chunk_t *chunk;
 	size_t size, run_ind, run_pages, flag_dirty;
 	arena_avail_tree_t *runs_avail;
-#ifdef JEMALLOC_STATS
-	size_t cactive_diff;
-#endif
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
 	run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk)
@@ -975,19 +946,9 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
 		    CHUNK_MAP_LARGE) != 0);
 		assert((chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits &
 		    CHUNK_MAP_ALLOCATED) != 0);
-	} else {
-		size_t binind = arena_bin_index(arena, run->bin);
-		arena_bin_info_t *bin_info = &arena_bin_info[binind];
-		size = bin_info->run_size;
-	}
+	} else
+		size = run->bin->run_size;
 	run_pages = (size >> PAGE_SHIFT);
-#ifdef JEMALLOC_STATS
-	/* Update stats_cactive if nactive is crossing a chunk multiple. */
-	cactive_diff = CHUNK_CEILING(arena->nactive << PAGE_SHIFT) -
-	    CHUNK_CEILING((arena->nactive - run_pages) << PAGE_SHIFT);
-	if (cactive_diff != 0)
-		stats_cactive_sub(cactive_diff);
-#endif
 	arena->nactive -= run_pages;
 
 	/*
@@ -1213,8 +1174,6 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
 {
 	arena_chunk_map_t *mapelm;
 	arena_run_t *run;
-	size_t binind;
-	arena_bin_info_t *bin_info;
 
 	/* Look for a usable run. */
 	mapelm = arena_run_tree_first(&bin->runs);
@@ -1238,23 +1197,18 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
 	}
 	/* No existing runs have any space available. */
 
-	binind = arena_bin_index(arena, bin);
-	bin_info = &arena_bin_info[binind];
-
 	/* Allocate a new run. */
 	malloc_mutex_unlock(&bin->lock);
 	/******************************/
 	malloc_mutex_lock(&arena->lock);
-	run = arena_run_alloc(arena, bin_info->run_size, false, false);
+	run = arena_run_alloc(arena, bin->run_size, false, false);
 	if (run != NULL) {
-		bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
-		    (uintptr_t)bin_info->bitmap_offset);
-
 		/* Initialize run internals. */
 		run->bin = bin;
-		run->nextind = 0;
-		run->nfree = bin_info->nregs;
-		bitmap_init(bitmap, &bin_info->bitmap_info);
+		run->avail = NULL;
+		run->next = (void *)((uintptr_t)run +
+		    (uintptr_t)bin->reg0_offset);
+		run->nfree = bin->nregs;
 #ifdef JEMALLOC_DEBUG
 		run->magic = ARENA_RUN_MAGIC;
 #endif
@@ -1305,12 +1259,8 @@ static void *
 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
 {
 	void *ret;
-	size_t binind;
-	arena_bin_info_t *bin_info;
 	arena_run_t *run;
 
-	binind = arena_bin_index(arena, bin);
-	bin_info = &arena_bin_info[binind];
 	bin->runcur = NULL;
 	run = arena_bin_nonfull_run_get(arena, bin);
 	if (bin->runcur != NULL && bin->runcur->nfree > 0) {
@@ -1318,22 +1268,22 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
 		 * Another thread updated runcur while this one ran without the
 		 * bin lock in arena_bin_nonfull_run_get().
 		 */
-		dassert(bin->runcur->magic == ARENA_RUN_MAGIC);
+		assert(bin->runcur->magic == ARENA_RUN_MAGIC);
 		assert(bin->runcur->nfree > 0);
-		ret = arena_run_reg_alloc(bin->runcur, bin_info);
+		ret = arena_run_reg_alloc(bin->runcur, bin);
 		if (run != NULL) {
 			arena_chunk_t *chunk;
 
 			/*
 			 * arena_run_alloc() may have allocated run, or it may
-			 * have pulled run from the bin's run tree.  Therefore
+			 * have pulled it from the bin's run tree.  Therefore
 			 * it is unsafe to make any assumptions about how run
 			 * has previously been used, and arena_bin_lower_run()
 			 * must be called, as if a region were just deallocated
 			 * from the run.
 			 */
 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
-			if (run->nfree == bin_info->nregs)
+			if (run->nfree == bin->nregs)
 				arena_dalloc_bin_run(arena, chunk, run, bin);
 			else
 				arena_bin_lower_run(arena, chunk, run, bin);
@@ -1346,10 +1296,10 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
 
 	bin->runcur = run;
 
-	dassert(bin->runcur->magic == ARENA_RUN_MAGIC);
+	assert(bin->runcur->magic == ARENA_RUN_MAGIC);
 	assert(bin->runcur->nfree > 0);
 
-	return (arena_run_reg_alloc(bin->runcur, bin_info));
+	return (arena_run_reg_alloc(bin->runcur, bin));
 }
 
 #ifdef JEMALLOC_PROF
@@ -1389,19 +1339,18 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind
 #endif
 	bin = &arena->bins[binind];
 	malloc_mutex_lock(&bin->lock);
-	for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >>
-	    tbin->lg_fill_div); i < nfill; i++) {
+	for (i = 0, nfill = (tbin->ncached_max >> 1); i < nfill; i++) {
 		if ((run = bin->runcur) != NULL && run->nfree > 0)
-			ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]);
+			ptr = arena_run_reg_alloc(run, bin);
 		else
 			ptr = arena_bin_malloc_hard(arena, bin);
 		if (ptr == NULL)
 			break;
-		/* Insert such that low regions get used first. */
-		tbin->avail[nfill - 1 - i] = ptr;
+		*(void **)ptr = tbin->avail;
+		tbin->avail = ptr;
 	}
 #ifdef JEMALLOC_STATS
-	bin->stats.allocated += i * arena_bin_info[binind].reg_size;
+	bin->stats.allocated += (i - tbin->ncached) * bin->reg_size;
 	bin->stats.nmalloc += i;
 	bin->stats.nrequests += tbin->tstats.nrequests;
 	bin->stats.nfills++;
@@ -1409,9 +1358,119 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind
 #endif
 	malloc_mutex_unlock(&bin->lock);
 	tbin->ncached = i;
+	if (tbin->ncached > tbin->high_water)
+		tbin->high_water = tbin->ncached;
 }
 #endif
 
+/*
+ * Calculate bin->run_size such that it meets the following constraints:
+ *
+ *   *) bin->run_size >= min_run_size
+ *   *) bin->run_size <= arena_maxclass
+ *   *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
+ *   *) run header size < PAGE_SIZE
+ *
+ * bin->nregs and bin->reg0_offset are also calculated here, since these
+ * settings are all interdependent.
+ */
+static size_t
+arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
+{
+	size_t try_run_size, good_run_size;
+	uint32_t try_nregs, good_nregs;
+	uint32_t try_hdr_size, good_hdr_size;
+#ifdef JEMALLOC_PROF
+	uint32_t try_ctx0_offset, good_ctx0_offset;
+#endif
+	uint32_t try_reg0_offset, good_reg0_offset;
+
+	assert(min_run_size >= PAGE_SIZE);
+	assert(min_run_size <= arena_maxclass);
+
+	/*
+	 * Calculate known-valid settings before entering the run_size
+	 * expansion loop, so that the first part of the loop always copies
+	 * valid settings.
+	 *
+	 * The do..while loop iteratively reduces the number of regions until
+	 * the run header and the regions no longer overlap.  A closed formula
+	 * would be quite messy, since there is an interdependency between the
+	 * header's mask length and the number of regions.
+	 */
+	try_run_size = min_run_size;
+	try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin->reg_size)
+	    + 1; /* Counter-act try_nregs-- in loop. */
+	do {
+		try_nregs--;
+		try_hdr_size = sizeof(arena_run_t);
+#ifdef JEMALLOC_PROF
+		if (opt_prof && prof_promote == false) {
+			/* Pad to a quantum boundary. */
+			try_hdr_size = QUANTUM_CEILING(try_hdr_size);
+			try_ctx0_offset = try_hdr_size;
+			/* Add space for one (prof_ctx_t *) per region. */
+			try_hdr_size += try_nregs * sizeof(prof_ctx_t *);
+		} else
+			try_ctx0_offset = 0;
+#endif
+		try_reg0_offset = try_run_size - (try_nregs * bin->reg_size);
+	} while (try_hdr_size > try_reg0_offset);
+
+	/* run_size expansion loop. */
+	do {
+		/*
+		 * Copy valid settings before trying more aggressive settings.
+		 */
+		good_run_size = try_run_size;
+		good_nregs = try_nregs;
+		good_hdr_size = try_hdr_size;
+#ifdef JEMALLOC_PROF
+		good_ctx0_offset = try_ctx0_offset;
+#endif
+		good_reg0_offset = try_reg0_offset;
+
+		/* Try more aggressive settings. */
+		try_run_size += PAGE_SIZE;
+		try_nregs = ((try_run_size - sizeof(arena_run_t)) /
+		    bin->reg_size) + 1; /* Counter-act try_nregs-- in loop. */
+		do {
+			try_nregs--;
+			try_hdr_size = sizeof(arena_run_t);
+#ifdef JEMALLOC_PROF
+			if (opt_prof && prof_promote == false) {
+				/* Pad to a quantum boundary. */
+				try_hdr_size = QUANTUM_CEILING(try_hdr_size);
+				try_ctx0_offset = try_hdr_size;
+				/*
+				 * Add space for one (prof_ctx_t *) per region.
+				 */
+				try_hdr_size += try_nregs *
+				    sizeof(prof_ctx_t *);
+			}
+#endif
+			try_reg0_offset = try_run_size - (try_nregs *
+			    bin->reg_size);
+		} while (try_hdr_size > try_reg0_offset);
+	} while (try_run_size <= arena_maxclass
+	    && try_run_size <= arena_maxclass
+	    && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
+	    && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size
+	    && try_hdr_size < PAGE_SIZE);
+
+	assert(good_hdr_size <= good_reg0_offset);
+
+	/* Copy final settings. */
+	bin->run_size = good_run_size;
+	bin->nregs = good_nregs;
+#ifdef JEMALLOC_PROF
+	bin->ctx0_offset = good_ctx0_offset;
+#endif
+	bin->reg0_offset = good_reg0_offset;
+
+	return (good_run_size);
+}
+
 void *
 arena_malloc_small(arena_t *arena, size_t size, bool zero)
 {
@@ -1420,14 +1479,14 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero)
 	arena_run_t *run;
 	size_t binind;
 
-	binind = SMALL_SIZE2BIN(size);
+	binind = small_size2bin[size];
 	assert(binind < nbins);
 	bin = &arena->bins[binind];
-	size = arena_bin_info[binind].reg_size;
+	size = bin->reg_size;
 
 	malloc_mutex_lock(&bin->lock);
 	if ((run = bin->runcur) != NULL && run->nfree > 0)
-		ret = arena_run_reg_alloc(run, &arena_bin_info[binind]);
+		ret = arena_run_reg_alloc(run, bin);
 	else
 		ret = arena_bin_malloc_hard(arena, bin);
 
@@ -1631,13 +1690,11 @@ arena_salloc(const void *ptr)
 		arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
 		    (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
 		    PAGE_SHIFT));
-		dassert(run->magic == ARENA_RUN_MAGIC);
-		size_t binind = arena_bin_index(chunk->arena, run->bin);
-		arena_bin_info_t *bin_info = &arena_bin_info[binind];
+		assert(run->magic == ARENA_RUN_MAGIC);
 		assert(((uintptr_t)ptr - ((uintptr_t)run +
-		    (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_size ==
+		    (uintptr_t)run->bin->reg0_offset)) % run->bin->reg_size ==
 		    0);
-		ret = bin_info->reg_size;
+		ret = run->bin->reg_size;
 	} else {
 		assert(((uintptr_t)ptr & PAGE_MASK) == 0);
 		ret = mapbits & ~PAGE_MASK;
@@ -1657,11 +1714,10 @@ arena_prof_promoted(const void *ptr, size_t size)
 	assert(ptr != NULL);
 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
 	assert(isalloc(ptr) == PAGE_SIZE);
-	assert(size <= small_maxclass);
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
-	binind = SMALL_SIZE2BIN(size);
+	binind = small_size2bin[size];
 	assert(binind < nbins);
 	chunk->map[pageind-map_bias].bits = (chunk->map[pageind-map_bias].bits &
 	    ~CHUNK_MAP_CLASS_MASK) | ((binind+1) << CHUNK_MAP_CLASS_SHIFT);
@@ -1685,13 +1741,11 @@ arena_salloc_demote(const void *ptr)
 		arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
 		    (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
 		    PAGE_SHIFT));
-		dassert(run->magic == ARENA_RUN_MAGIC);
-		size_t binind = arena_bin_index(chunk->arena, run->bin);
-		arena_bin_info_t *bin_info = &arena_bin_info[binind];
+		assert(run->magic == ARENA_RUN_MAGIC);
 		assert(((uintptr_t)ptr - ((uintptr_t)run +
-		    (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_size ==
+		    (uintptr_t)run->bin->reg0_offset)) % run->bin->reg_size ==
 		    0);
-		ret = bin_info->reg_size;
+		ret = run->bin->reg_size;
 	} else {
 		assert(((uintptr_t)ptr & PAGE_MASK) == 0);
 		ret = mapbits & ~PAGE_MASK;
@@ -1700,7 +1754,7 @@ arena_salloc_demote(const void *ptr)
 			size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >>
 			    CHUNK_MAP_CLASS_SHIFT) - 1;
 			assert(binind < nbins);
-			ret = arena_bin_info[binind].reg_size;
+			ret = chunk->arena->bins[binind].reg_size;
 		}
 		assert(ret != 0);
 	}
@@ -1717,22 +1771,17 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
 	/* Dissociate run from bin. */
 	if (run == bin->runcur)
 		bin->runcur = NULL;
-	else {
-		size_t binind = arena_bin_index(chunk->arena, bin);
-		arena_bin_info_t *bin_info = &arena_bin_info[binind];
-
-		if (bin_info->nregs != 1) {
-			size_t run_pageind = (((uintptr_t)run -
-			    (uintptr_t)chunk)) >> PAGE_SHIFT;
-			arena_chunk_map_t *run_mapelm =
-			    &chunk->map[run_pageind-map_bias];
-			/*
-			 * This block's conditional is necessary because if the
-			 * run only contains one region, then it never gets
-			 * inserted into the non-full runs tree.
-			 */
-			arena_run_tree_remove(&bin->runs, run_mapelm);
-		}
+	else if (bin->nregs != 1) {
+		size_t run_pageind = (((uintptr_t)run - (uintptr_t)chunk)) >>
+		    PAGE_SHIFT;
+		arena_chunk_map_t *run_mapelm =
+		    &chunk->map[run_pageind-map_bias];
+		/*
+		 * This block's conditional is necessary because if the run
+		 * only contains one region, then it never gets inserted into
+		 * the non-full runs tree.
+		 */
+		arena_run_tree_remove(&bin->runs, run_mapelm);
 	}
 }
 
@@ -1740,24 +1789,18 @@ static void
 arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
     arena_bin_t *bin)
 {
-	size_t binind;
-	arena_bin_info_t *bin_info;
 	size_t npages, run_ind, past;
 
 	assert(run != bin->runcur);
 	assert(arena_run_tree_search(&bin->runs, &chunk->map[
 	    (((uintptr_t)run-(uintptr_t)chunk)>>PAGE_SHIFT)-map_bias]) == NULL);
 
-	binind = arena_bin_index(chunk->arena, run->bin);
-	bin_info = &arena_bin_info[binind];
-
 	malloc_mutex_unlock(&bin->lock);
 	/******************************/
-	npages = bin_info->run_size >> PAGE_SHIFT;
+	npages = bin->run_size >> PAGE_SHIFT;
 	run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT);
-	past = (size_t)(PAGE_CEILING((uintptr_t)run +
-	    (uintptr_t)bin_info->reg0_offset + (uintptr_t)(run->nextind *
-	    bin_info->reg_size) - (uintptr_t)chunk) >> PAGE_SHIFT);
+	past = (size_t)((PAGE_CEILING((uintptr_t)run->next) - (uintptr_t)chunk)
+	    >> PAGE_SHIFT);
 	malloc_mutex_lock(&arena->lock);
 
 	/*
@@ -1774,7 +1817,7 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 		chunk->map[run_ind+npages-1-map_bias].bits = CHUNK_MAP_LARGE |
 		    (chunk->map[run_ind+npages-1-map_bias].bits &
 		    CHUNK_MAP_FLAGS_MASK);
-		chunk->map[run_ind-map_bias].bits = bin_info->run_size |
+		chunk->map[run_ind-map_bias].bits = bin->run_size |
 		    CHUNK_MAP_LARGE | (chunk->map[run_ind-map_bias].bits &
 		    CHUNK_MAP_FLAGS_MASK);
 		arena_run_trim_tail(arena, chunk, run, (npages << PAGE_SHIFT),
@@ -1843,12 +1886,10 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
 	run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
 	    (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT));
-	dassert(run->magic == ARENA_RUN_MAGIC);
+	assert(run->magic == ARENA_RUN_MAGIC);
 	bin = run->bin;
-	size_t binind = arena_bin_index(arena, bin);
-	arena_bin_info_t *bin_info = &arena_bin_info[binind];
 #if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS))
-	size = bin_info->reg_size;
+	size = bin->reg_size;
 #endif
 
 #ifdef JEMALLOC_FILL
@@ -1857,7 +1898,7 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 #endif
 
 	arena_run_reg_dalloc(run, ptr);
-	if (run->nfree == bin_info->nregs) {
+	if (run->nfree == bin->nregs) {
 		arena_dissociate_bin_run(chunk, run, bin);
 		arena_dalloc_bin_run(arena, chunk, run, bin);
 	} else if (run->nfree == 1 && run != bin->runcur)
@@ -2091,7 +2132,7 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra,
 
 		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 		arena = chunk->arena;
-		dassert(arena->magic == ARENA_MAGIC);
+		assert(arena->magic == ARENA_MAGIC);
 
 		if (psize < oldsize) {
 #ifdef JEMALLOC_FILL
@@ -2129,11 +2170,11 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra,
 	 */
 	if (oldsize <= arena_maxclass) {
 		if (oldsize <= small_maxclass) {
-			assert(arena_bin_info[SMALL_SIZE2BIN(oldsize)].reg_size
-			    == oldsize);
+			assert(choose_arena()->bins[small_size2bin[
+			    oldsize]].reg_size == oldsize);
 			if ((size + extra <= small_maxclass &&
-			    SMALL_SIZE2BIN(size + extra) ==
-			    SMALL_SIZE2BIN(oldsize)) || (size <= oldsize &&
+			    small_size2bin[size + extra] ==
+			    small_size2bin[oldsize]) || (size <= oldsize &&
 			    size + extra >= oldsize)) {
 #ifdef JEMALLOC_FILL
 				if (opt_junk && size < oldsize) {
@@ -2169,29 +2210,24 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
 	if (ret != NULL)
 		return (ret);
 
+
 	/*
 	 * size and oldsize are different enough that we need to move the
 	 * object.  In that case, fall back to allocating new space and
 	 * copying.
 	 */
-	if (alignment != 0) {
-		size_t usize = sa2u(size + extra, alignment, NULL);
-		if (usize == 0)
-			return (NULL);
-		ret = ipalloc(usize, alignment, zero);
-	} else
+	if (alignment != 0)
+		ret = ipalloc(size + extra, alignment, zero);
+	else
 		ret = arena_malloc(size + extra, zero);
 
 	if (ret == NULL) {
 		if (extra == 0)
 			return (NULL);
 		/* Try again, this time without extra. */
-		if (alignment != 0) {
-			size_t usize = sa2u(size, alignment, NULL);
-			if (usize == 0)
-				return (NULL);
-			ret = ipalloc(usize, alignment, zero);
-		} else
+		if (alignment != 0)
+			ret = ipalloc(size, alignment, zero);
+		else
 			ret = arena_malloc(size, zero);
 
 		if (ret == NULL)
@@ -2215,9 +2251,9 @@ arena_new(arena_t *arena, unsigned ind)
 {
 	unsigned i;
 	arena_bin_t *bin;
+	size_t prev_run_size;
 
 	arena->ind = ind;
-	arena->nthreads = 0;
 
 	if (malloc_mutex_init(&arena->lock))
 		return (true);
@@ -2251,6 +2287,8 @@ arena_new(arena_t *arena, unsigned ind)
 	arena_avail_tree_new(&arena->runs_avail_dirty);
 
 	/* Initialize bins. */
+	prev_run_size = PAGE_SIZE;
+
 	i = 0;
 #ifdef JEMALLOC_TINY
 	/* (2^n)-spaced tiny bins. */
@@ -2260,6 +2298,11 @@ arena_new(arena_t *arena, unsigned ind)
 			return (true);
 		bin->runcur = NULL;
 		arena_run_tree_new(&bin->runs);
+
+		bin->reg_size = (1U << (LG_TINY_MIN + i));
+
+		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
+
 #ifdef JEMALLOC_STATS
 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
 #endif
@@ -2273,6 +2316,11 @@ arena_new(arena_t *arena, unsigned ind)
 			return (true);
 		bin->runcur = NULL;
 		arena_run_tree_new(&bin->runs);
+
+		bin->reg_size = (i - ntbins + 1) << LG_QUANTUM;
+
+		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
+
 #ifdef JEMALLOC_STATS
 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
 #endif
@@ -2285,6 +2333,12 @@ arena_new(arena_t *arena, unsigned ind)
 			return (true);
 		bin->runcur = NULL;
 		arena_run_tree_new(&bin->runs);
+
+		bin->reg_size = cspace_min + ((i - (ntbins + nqbins)) <<
+		    LG_CACHELINE);
+
+		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
+
 #ifdef JEMALLOC_STATS
 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
 #endif
@@ -2297,6 +2351,12 @@ arena_new(arena_t *arena, unsigned ind)
 			return (true);
 		bin->runcur = NULL;
 		arena_run_tree_new(&bin->runs);
+
+		bin->reg_size = sspace_min + ((i - (ntbins + nqbins + ncbins))
+		    << LG_SUBPAGE);
+
+		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
+
 #ifdef JEMALLOC_STATS
 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
 #endif
@@ -2315,39 +2375,40 @@ small_size2bin_validate(void)
 {
 	size_t i, size, binind;
 
+	assert(small_size2bin[0] == 0xffU);
 	i = 1;
 #  ifdef JEMALLOC_TINY
 	/* Tiny. */
 	for (; i < (1U << LG_TINY_MIN); i++) {
 		size = pow2_ceil(1U << LG_TINY_MIN);
 		binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
-		assert(SMALL_SIZE2BIN(i) == binind);
+		assert(small_size2bin[i] == binind);
 	}
 	for (; i < qspace_min; i++) {
 		size = pow2_ceil(i);
 		binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
-		assert(SMALL_SIZE2BIN(i) == binind);
+		assert(small_size2bin[i] == binind);
 	}
 #  endif
 	/* Quantum-spaced. */
 	for (; i <= qspace_max; i++) {
 		size = QUANTUM_CEILING(i);
 		binind = ntbins + (size >> LG_QUANTUM) - 1;
-		assert(SMALL_SIZE2BIN(i) == binind);
+		assert(small_size2bin[i] == binind);
 	}
 	/* Cacheline-spaced. */
 	for (; i <= cspace_max; i++) {
 		size = CACHELINE_CEILING(i);
 		binind = ntbins + nqbins + ((size - cspace_min) >>
 		    LG_CACHELINE);
-		assert(SMALL_SIZE2BIN(i) == binind);
+		assert(small_size2bin[i] == binind);
 	}
 	/* Sub-page. */
 	for (; i <= sspace_max; i++) {
 		size = SUBPAGE_CEILING(i);
 		binind = ntbins + nqbins + ncbins + ((size - sspace_min)
 		    >> LG_SUBPAGE);
-		assert(SMALL_SIZE2BIN(i) == binind);
+		assert(small_size2bin[i] == binind);
 	}
 }
 #endif
@@ -2358,12 +2419,12 @@ small_size2bin_init(void)
 
 	if (opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT
 	    || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT
-	    || (sizeof(const_small_size2bin) != ((small_maxclass-1) >>
-	    LG_TINY_MIN) + 1))
+	    || sizeof(const_small_size2bin) != small_maxclass + 1)
 		return (small_size2bin_init_hard());
 
 	small_size2bin = const_small_size2bin;
 #ifdef JEMALLOC_DEBUG
+	assert(sizeof(const_small_size2bin) == small_maxclass + 1);
 	small_size2bin_validate();
 #endif
 	return (false);
@@ -2374,52 +2435,49 @@ small_size2bin_init_hard(void)
 {
 	size_t i, size, binind;
 	uint8_t *custom_small_size2bin;
-#define	CUSTOM_SMALL_SIZE2BIN(s)					\
-    custom_small_size2bin[(s-1) >> LG_TINY_MIN]
 
 	assert(opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT
 	    || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT
-	    || (sizeof(const_small_size2bin) != ((small_maxclass-1) >>
-	    LG_TINY_MIN) + 1));
+	    || sizeof(const_small_size2bin) != small_maxclass + 1);
 
-	custom_small_size2bin = (uint8_t *)
-	    base_alloc(small_maxclass >> LG_TINY_MIN);
+	custom_small_size2bin = (uint8_t *)base_alloc(small_maxclass + 1);
 	if (custom_small_size2bin == NULL)
 		return (true);
 
+	custom_small_size2bin[0] = 0xffU;
 	i = 1;
 #ifdef JEMALLOC_TINY
 	/* Tiny. */
-	for (; i < (1U << LG_TINY_MIN); i += TINY_MIN) {
+	for (; i < (1U << LG_TINY_MIN); i++) {
 		size = pow2_ceil(1U << LG_TINY_MIN);
 		binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
-		CUSTOM_SMALL_SIZE2BIN(i) = binind;
+		custom_small_size2bin[i] = binind;
 	}
-	for (; i < qspace_min; i += TINY_MIN) {
+	for (; i < qspace_min; i++) {
 		size = pow2_ceil(i);
 		binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
-		CUSTOM_SMALL_SIZE2BIN(i) = binind;
+		custom_small_size2bin[i] = binind;
 	}
 #endif
 	/* Quantum-spaced. */
-	for (; i <= qspace_max; i += TINY_MIN) {
+	for (; i <= qspace_max; i++) {
 		size = QUANTUM_CEILING(i);
 		binind = ntbins + (size >> LG_QUANTUM) - 1;
-		CUSTOM_SMALL_SIZE2BIN(i) = binind;
+		custom_small_size2bin[i] = binind;
 	}
 	/* Cacheline-spaced. */
-	for (; i <= cspace_max; i += TINY_MIN) {
+	for (; i <= cspace_max; i++) {
 		size = CACHELINE_CEILING(i);
 		binind = ntbins + nqbins + ((size - cspace_min) >>
 		    LG_CACHELINE);
-		CUSTOM_SMALL_SIZE2BIN(i) = binind;
+		custom_small_size2bin[i] = binind;
 	}
 	/* Sub-page. */
-	for (; i <= sspace_max; i += TINY_MIN) {
+	for (; i <= sspace_max; i++) {
 		size = SUBPAGE_CEILING(i);
 		binind = ntbins + nqbins + ncbins + ((size - sspace_min) >>
 		    LG_SUBPAGE);
-		CUSTOM_SMALL_SIZE2BIN(i) = binind;
+		custom_small_size2bin[i] = binind;
 	}
 
 	small_size2bin = custom_small_size2bin;
@@ -2427,190 +2485,6 @@ small_size2bin_init_hard(void)
 	small_size2bin_validate();
 #endif
 	return (false);
-#undef CUSTOM_SMALL_SIZE2BIN
-}
-
-/*
- * Calculate bin_info->run_size such that it meets the following constraints:
- *
- *   *) bin_info->run_size >= min_run_size
- *   *) bin_info->run_size <= arena_maxclass
- *   *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
- *   *) bin_info->nregs <= RUN_MAXREGS
- *
- * bin_info->nregs, bin_info->bitmap_offset, and bin_info->reg0_offset are also
- * calculated here, since these settings are all interdependent.
- */
-static size_t
-bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size)
-{
-	size_t try_run_size, good_run_size;
-	uint32_t try_nregs, good_nregs;
-	uint32_t try_hdr_size, good_hdr_size;
-	uint32_t try_bitmap_offset, good_bitmap_offset;
-#ifdef JEMALLOC_PROF
-	uint32_t try_ctx0_offset, good_ctx0_offset;
-#endif
-	uint32_t try_reg0_offset, good_reg0_offset;
-
-	assert(min_run_size >= PAGE_SIZE);
-	assert(min_run_size <= arena_maxclass);
-
-	/*
-	 * Calculate known-valid settings before entering the run_size
-	 * expansion loop, so that the first part of the loop always copies
-	 * valid settings.
-	 *
-	 * The do..while loop iteratively reduces the number of regions until
-	 * the run header and the regions no longer overlap.  A closed formula
-	 * would be quite messy, since there is an interdependency between the
-	 * header's mask length and the number of regions.
-	 */
-	try_run_size = min_run_size;
-	try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin_info->reg_size)
-	    + 1; /* Counter-act try_nregs-- in loop. */
-	if (try_nregs > RUN_MAXREGS) {
-		try_nregs = RUN_MAXREGS
-		    + 1; /* Counter-act try_nregs-- in loop. */
-	}
-	do {
-		try_nregs--;
-		try_hdr_size = sizeof(arena_run_t);
-		/* Pad to a long boundary. */
-		try_hdr_size = LONG_CEILING(try_hdr_size);
-		try_bitmap_offset = try_hdr_size;
-		/* Add space for bitmap. */
-		try_hdr_size += bitmap_size(try_nregs);
-#ifdef JEMALLOC_PROF
-		if (opt_prof && prof_promote == false) {
-			/* Pad to a quantum boundary. */
-			try_hdr_size = QUANTUM_CEILING(try_hdr_size);
-			try_ctx0_offset = try_hdr_size;
-			/* Add space for one (prof_ctx_t *) per region. */
-			try_hdr_size += try_nregs * sizeof(prof_ctx_t *);
-		} else
-			try_ctx0_offset = 0;
-#endif
-		try_reg0_offset = try_run_size - (try_nregs *
-		    bin_info->reg_size);
-	} while (try_hdr_size > try_reg0_offset);
-
-	/* run_size expansion loop. */
-	do {
-		/*
-		 * Copy valid settings before trying more aggressive settings.
-		 */
-		good_run_size = try_run_size;
-		good_nregs = try_nregs;
-		good_hdr_size = try_hdr_size;
-		good_bitmap_offset = try_bitmap_offset;
-#ifdef JEMALLOC_PROF
-		good_ctx0_offset = try_ctx0_offset;
-#endif
-		good_reg0_offset = try_reg0_offset;
-
-		/* Try more aggressive settings. */
-		try_run_size += PAGE_SIZE;
-		try_nregs = ((try_run_size - sizeof(arena_run_t)) /
-		    bin_info->reg_size)
-		    + 1; /* Counter-act try_nregs-- in loop. */
-		if (try_nregs > RUN_MAXREGS) {
-			try_nregs = RUN_MAXREGS
-			    + 1; /* Counter-act try_nregs-- in loop. */
-		}
-		do {
-			try_nregs--;
-			try_hdr_size = sizeof(arena_run_t);
-			/* Pad to a long boundary. */
-			try_hdr_size = LONG_CEILING(try_hdr_size);
-			try_bitmap_offset = try_hdr_size;
-			/* Add space for bitmap. */
-			try_hdr_size += bitmap_size(try_nregs);
-#ifdef JEMALLOC_PROF
-			if (opt_prof && prof_promote == false) {
-				/* Pad to a quantum boundary. */
-				try_hdr_size = QUANTUM_CEILING(try_hdr_size);
-				try_ctx0_offset = try_hdr_size;
-				/*
-				 * Add space for one (prof_ctx_t *) per region.
-				 */
-				try_hdr_size += try_nregs *
-				    sizeof(prof_ctx_t *);
-			}
-#endif
-			try_reg0_offset = try_run_size - (try_nregs *
-			    bin_info->reg_size);
-		} while (try_hdr_size > try_reg0_offset);
-	} while (try_run_size <= arena_maxclass
-	    && try_run_size <= arena_maxclass
-	    && RUN_MAX_OVRHD * (bin_info->reg_size << 3) > RUN_MAX_OVRHD_RELAX
-	    && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size
-	    && try_nregs < RUN_MAXREGS);
-
-	assert(good_hdr_size <= good_reg0_offset);
-
-	/* Copy final settings. */
-	bin_info->run_size = good_run_size;
-	bin_info->nregs = good_nregs;
-	bin_info->bitmap_offset = good_bitmap_offset;
-#ifdef JEMALLOC_PROF
-	bin_info->ctx0_offset = good_ctx0_offset;
-#endif
-	bin_info->reg0_offset = good_reg0_offset;
-
-	return (good_run_size);
-}
-
-static bool
-bin_info_init(void)
-{
-	arena_bin_info_t *bin_info;
-	unsigned i;
-	size_t prev_run_size;
-
-	arena_bin_info = base_alloc(sizeof(arena_bin_info_t) * nbins);
-	if (arena_bin_info == NULL)
-		return (true);
-
-	prev_run_size = PAGE_SIZE;
-	i = 0;
-#ifdef JEMALLOC_TINY
-	/* (2^n)-spaced tiny bins. */
-	for (; i < ntbins; i++) {
-		bin_info = &arena_bin_info[i];
-		bin_info->reg_size = (1U << (LG_TINY_MIN + i));
-		prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
-		bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
-	}
-#endif
-
-	/* Quantum-spaced bins. */
-	for (; i < ntbins + nqbins; i++) {
-		bin_info = &arena_bin_info[i];
-		bin_info->reg_size = (i - ntbins + 1) << LG_QUANTUM;
-		prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
-		bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
-	}
-
-	/* Cacheline-spaced bins. */
-	for (; i < ntbins + nqbins + ncbins; i++) {
-		bin_info = &arena_bin_info[i];
-		bin_info->reg_size = cspace_min + ((i - (ntbins + nqbins)) <<
-		    LG_CACHELINE);
-		prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
-		bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
-	}
-
-	/* Subpage-spaced bins. */
-	for (; i < nbins; i++) {
-		bin_info = &arena_bin_info[i];
-		bin_info->reg_size = sspace_min + ((i - (ntbins + nqbins +
-		    ncbins)) << LG_SUBPAGE);
-		prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
-		bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
-	}
-
-	return (false);
 }
 
 bool
@@ -2671,6 +2545,9 @@ arena_boot(void)
 	    abort();
 	}
 
+	if (small_size2bin_init())
+		return (true);
+
 	/*
 	 * Compute the header size such that it is large enough to contain the
 	 * page map.  The page map is biased to omit entries for the header
@@ -2694,11 +2571,5 @@ arena_boot(void)
 
 	arena_maxclass = chunksize - (map_bias << PAGE_SHIFT);
 
-	if (small_size2bin_init())
-		return (true);
-
-	if (bin_info_init())
-		return (true);
-
 	return (false);
 }
diff --git a/dep/jemalloc/src/atomic.c b/dep/jemalloc/src/atomic.c
deleted file mode 100644
index 77ee313113b..00000000000
--- a/dep/jemalloc/src/atomic.c
+++ /dev/null
@@ -1,2 +0,0 @@
-#define	JEMALLOC_ATOMIC_C_
-#include "jemalloc/internal/jemalloc_internal.h"
diff --git a/dep/jemalloc/src/bitmap.c b/dep/jemalloc/src/bitmap.c
deleted file mode 100644
index b47e2629093..00000000000
--- a/dep/jemalloc/src/bitmap.c
+++ /dev/null
@@ -1,90 +0,0 @@
-#define JEMALLOC_BITMAP_C_
-#include "jemalloc/internal/jemalloc_internal.h"
-
-/******************************************************************************/
-/* Function prototypes for non-inline static functions. */
-
-static size_t	bits2groups(size_t nbits);
-
-/******************************************************************************/
-
-static size_t
-bits2groups(size_t nbits)
-{
-
-	return ((nbits >> LG_BITMAP_GROUP_NBITS) +
-	    !!(nbits & BITMAP_GROUP_NBITS_MASK));
-}
-
-void
-bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
-{
-	unsigned i;
-	size_t group_count;
-
-	assert(nbits > 0);
-	assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS));
-
-	/*
-	 * Compute the number of groups necessary to store nbits bits, and
-	 * progressively work upward through the levels until reaching a level
-	 * that requires only one group.
-	 */
-	binfo->levels[0].group_offset = 0;
-	group_count = bits2groups(nbits);
-	for (i = 1; group_count > 1; i++) {
-		assert(i < BITMAP_MAX_LEVELS);
-		binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
-		    + group_count;
-		group_count = bits2groups(group_count);
-	}
-	binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
-	    + group_count;
-	binfo->nlevels = i;
-	binfo->nbits = nbits;
-}
-
-size_t
-bitmap_info_ngroups(const bitmap_info_t *binfo)
-{
-
-	return (binfo->levels[binfo->nlevels].group_offset << LG_SIZEOF_BITMAP);
-}
-
-size_t
-bitmap_size(size_t nbits)
-{
-	bitmap_info_t binfo;
-
-	bitmap_info_init(&binfo, nbits);
-	return (bitmap_info_ngroups(&binfo));
-}
-
-void
-bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
-{
-	size_t extra;
-	unsigned i;
-
-	/*
-	 * Bits are actually inverted with regard to the external bitmap
-	 * interface, so the bitmap starts out with all 1 bits, except for
-	 * trailing unused bits (if any).  Note that each group uses bit 0 to
-	 * correspond to the first logical bit in the group, so extra bits
-	 * are the most significant bits of the last group.
-	 */
-	memset(bitmap, 0xffU, binfo->levels[binfo->nlevels].group_offset <<
-	    LG_SIZEOF_BITMAP);
-	extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK))
-	    & BITMAP_GROUP_NBITS_MASK;
-	if (extra != 0)
-		bitmap[binfo->levels[1].group_offset - 1] >>= extra;
-	for (i = 1; i < binfo->nlevels; i++) {
-		size_t group_count = binfo->levels[i].group_offset -
-		    binfo->levels[i-1].group_offset;
-		extra = (BITMAP_GROUP_NBITS - (group_count &
-		    BITMAP_GROUP_NBITS_MASK)) & BITMAP_GROUP_NBITS_MASK;
-		if (extra != 0)
-			bitmap[binfo->levels[i+1].group_offset - 1] >>= extra;
-	}
-}
diff --git a/dep/jemalloc/src/chunk.c b/dep/jemalloc/src/chunk.c
index d190c6f49b3..301519e8042 100644
--- a/dep/jemalloc/src/chunk.c
+++ b/dep/jemalloc/src/chunk.c
@@ -70,7 +70,7 @@ RETURN:
 #ifdef JEMALLOC_IVSALLOC
 	if (base == false && ret != NULL) {
 		if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) {
-			chunk_dealloc(ret, size, true);
+			chunk_dealloc(ret, size);
 			return (NULL);
 		}
 	}
@@ -108,7 +108,7 @@ RETURN:
 }
 
 void
-chunk_dealloc(void *chunk, size_t size, bool unmap)
+chunk_dealloc(void *chunk, size_t size)
 {
 
 	assert(chunk != NULL);
@@ -125,17 +125,15 @@ chunk_dealloc(void *chunk, size_t size, bool unmap)
 	malloc_mutex_unlock(&chunks_mtx);
 #endif
 
-	if (unmap) {
 #ifdef JEMALLOC_SWAP
-		if (swap_enabled && chunk_dealloc_swap(chunk, size) == false)
-			return;
+	if (swap_enabled && chunk_dealloc_swap(chunk, size) == false)
+		return;
 #endif
 #ifdef JEMALLOC_DSS
-		if (chunk_dealloc_dss(chunk, size) == false)
-			return;
+	if (chunk_dealloc_dss(chunk, size) == false)
+		return;
 #endif
-		chunk_dealloc_mmap(chunk, size);
-	}
+	chunk_dealloc_mmap(chunk, size);
 }
 
 bool
diff --git a/dep/jemalloc/src/chunk_mmap.c b/dep/jemalloc/src/chunk_mmap.c
index 164e86e7b38..bc367559774 100644
--- a/dep/jemalloc/src/chunk_mmap.c
+++ b/dep/jemalloc/src/chunk_mmap.c
@@ -206,15 +206,13 @@ chunk_alloc_mmap_internal(size_t size, bool noreserve)
 void *
 chunk_alloc_mmap(size_t size)
 {
-
-	return (chunk_alloc_mmap_internal(size, false));
+	return chunk_alloc_mmap_internal(size, false);
 }
 
 void *
 chunk_alloc_mmap_noreserve(size_t size)
 {
-
-	return (chunk_alloc_mmap_internal(size, true));
+	return chunk_alloc_mmap_internal(size, true);
 }
 
 void
diff --git a/dep/jemalloc/src/ckh.c b/dep/jemalloc/src/ckh.c
index 43fcc25239d..682a8db65bf 100644
--- a/dep/jemalloc/src/ckh.c
+++ b/dep/jemalloc/src/ckh.c
@@ -34,7 +34,7 @@
  * respectively.
  *
  ******************************************************************************/
-#define	JEMALLOC_CKH_C_
+#define	CKH_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
 /******************************************************************************/
@@ -73,7 +73,7 @@ ckh_isearch(ckh_t *ckh, const void *key)
 	size_t hash1, hash2, bucket, cell;
 
 	assert(ckh != NULL);
-	dassert(ckh->magic == CKH_MAGIC);
+	assert(ckh->magic = CKH_MAGIG);
 
 	ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2);
 
@@ -262,15 +262,9 @@ ckh_grow(ckh_t *ckh)
 	lg_prevbuckets = ckh->lg_curbuckets;
 	lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS;
 	while (true) {
-		size_t usize;
-
 		lg_curcells++;
-		usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL);
-		if (usize == 0) {
-			ret = true;
-			goto RETURN;
-		}
-		tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
+		tab = (ckhc_t *)ipalloc(sizeof(ckhc_t) << lg_curcells,
+		    ZU(1) << LG_CACHELINE, true);
 		if (tab == NULL) {
 			ret = true;
 			goto RETURN;
@@ -301,7 +295,7 @@ static void
 ckh_shrink(ckh_t *ckh)
 {
 	ckhc_t *tab, *ttab;
-	size_t lg_curcells, usize;
+	size_t lg_curcells;
 	unsigned lg_prevbuckets;
 
 	/*
@@ -310,10 +304,8 @@ ckh_shrink(ckh_t *ckh)
 	 */
 	lg_prevbuckets = ckh->lg_curbuckets;
 	lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1;
-	usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL);
-	if (usize == 0)
-		return;
-	tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
+	tab = (ckhc_t *)ipalloc(sizeof(ckhc_t) << lg_curcells,
+	    ZU(1) << LG_CACHELINE, true);
 	if (tab == NULL) {
 		/*
 		 * An OOM error isn't worth propagating, since it doesn't
@@ -348,7 +340,7 @@ bool
 ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp)
 {
 	bool ret;
-	size_t mincells, usize;
+	size_t mincells;
 	unsigned lg_mincells;
 
 	assert(minitems > 0);
@@ -383,19 +375,15 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp)
 	ckh->hash = hash;
 	ckh->keycomp = keycomp;
 
-	usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE, NULL);
-	if (usize == 0) {
-		ret = true;
-		goto RETURN;
-	}
-	ckh->tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
+	ckh->tab = (ckhc_t *)ipalloc(sizeof(ckhc_t) << lg_mincells,
+	    (ZU(1) << LG_CACHELINE), true);
 	if (ckh->tab == NULL) {
 		ret = true;
 		goto RETURN;
 	}
 
 #ifdef JEMALLOC_DEBUG
-	ckh->magic = CKH_MAGIC;
+	ckh->magic = CKH_MAGIG;
 #endif
 
 	ret = false;
@@ -408,7 +396,7 @@ ckh_delete(ckh_t *ckh)
 {
 
 	assert(ckh != NULL);
-	dassert(ckh->magic == CKH_MAGIC);
+	assert(ckh->magic = CKH_MAGIG);
 
 #ifdef CKH_VERBOSE
 	malloc_printf(
@@ -433,7 +421,7 @@ ckh_count(ckh_t *ckh)
 {
 
 	assert(ckh != NULL);
-	dassert(ckh->magic == CKH_MAGIC);
+	assert(ckh->magic = CKH_MAGIG);
 
 	return (ckh->count);
 }
@@ -464,7 +452,7 @@ ckh_insert(ckh_t *ckh, const void *key, const void *data)
 	bool ret;
 
 	assert(ckh != NULL);
-	dassert(ckh->magic == CKH_MAGIC);
+	assert(ckh->magic = CKH_MAGIG);
 	assert(ckh_search(ckh, key, NULL, NULL));
 
 #ifdef CKH_COUNT
@@ -489,7 +477,7 @@ ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data)
 	size_t cell;
 
 	assert(ckh != NULL);
-	dassert(ckh->magic == CKH_MAGIC);
+	assert(ckh->magic = CKH_MAGIG);
 
 	cell = ckh_isearch(ckh, searchkey);
 	if (cell != SIZE_T_MAX) {
@@ -521,7 +509,7 @@ ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data)
 	size_t cell;
 
 	assert(ckh != NULL);
-	dassert(ckh->magic == CKH_MAGIC);
+	assert(ckh->magic = CKH_MAGIG);
 
 	cell = ckh_isearch(ckh, searchkey);
 	if (cell != SIZE_T_MAX) {
@@ -556,7 +544,7 @@ ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2)
 	} else {
 		ret1 = h;
 		ret2 = hash(key, strlen((const char *)key),
-		    0x8432a476666bbc13LLU);
+		    0x8432a476666bbc13U);
 	}
 
 	*hash1 = ret1;
diff --git a/dep/jemalloc/src/ctl.c b/dep/jemalloc/src/ctl.c
index e5336d36949..3c8adab90a3 100644
--- a/dep/jemalloc/src/ctl.c
+++ b/dep/jemalloc/src/ctl.c
@@ -182,7 +182,6 @@ CTL_PROTO(stats_arenas_i_lruns_j_highruns)
 CTL_PROTO(stats_arenas_i_lruns_j_curruns)
 INDEX_PROTO(stats_arenas_i_lruns_j)
 #endif
-CTL_PROTO(stats_arenas_i_nthreads)
 CTL_PROTO(stats_arenas_i_pactive)
 CTL_PROTO(stats_arenas_i_pdirty)
 #ifdef JEMALLOC_STATS
@@ -193,7 +192,6 @@ CTL_PROTO(stats_arenas_i_purged)
 #endif
 INDEX_PROTO(stats_arenas_i)
 #ifdef JEMALLOC_STATS
-CTL_PROTO(stats_cactive)
 CTL_PROTO(stats_allocated)
 CTL_PROTO(stats_active)
 CTL_PROTO(stats_mapped)
@@ -436,7 +434,6 @@ static const ctl_node_t stats_arenas_i_lruns_node[] = {
 #endif
 
 static const ctl_node_t stats_arenas_i_node[] = {
-	{NAME("nthreads"),		CTL(stats_arenas_i_nthreads)},
 	{NAME("pactive"),		CTL(stats_arenas_i_pactive)},
 	{NAME("pdirty"),		CTL(stats_arenas_i_pdirty)}
 #ifdef JEMALLOC_STATS
@@ -461,7 +458,6 @@ static const ctl_node_t stats_arenas_node[] = {
 
 static const ctl_node_t stats_node[] = {
 #ifdef JEMALLOC_STATS
-	{NAME("cactive"),		CTL(stats_cactive)},
 	{NAME("allocated"),		CTL(stats_allocated)},
 	{NAME("active"),		CTL(stats_active)},
 	{NAME("mapped"),		CTL(stats_mapped)},
@@ -624,7 +620,6 @@ ctl_arena_refresh(arena_t *arena, unsigned i)
 
 	ctl_arena_clear(astats);
 
-	sstats->nthreads += astats->nthreads;
 #ifdef JEMALLOC_STATS
 	ctl_arena_stats_amerge(astats, arena);
 	/* Merge into sum stats as well. */
@@ -662,17 +657,10 @@ ctl_refresh(void)
 	 * Clear sum stats, since they will be merged into by
 	 * ctl_arena_refresh().
 	 */
-	ctl_stats.arenas[narenas].nthreads = 0;
 	ctl_arena_clear(&ctl_stats.arenas[narenas]);
 
 	malloc_mutex_lock(&arenas_lock);
 	memcpy(tarenas, arenas, sizeof(arena_t *) * narenas);
-	for (i = 0; i < narenas; i++) {
-		if (arenas[i] != NULL)
-			ctl_stats.arenas[i].nthreads = arenas[i]->nthreads;
-		else
-			ctl_stats.arenas[i].nthreads = 0;
-	}
 	malloc_mutex_unlock(&arenas_lock);
 	for (i = 0; i < narenas; i++) {
 		bool initialized = (tarenas[i] != NULL);
@@ -1126,8 +1114,8 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	unsigned newind, oldind;
 
 	newind = oldind = choose_arena()->ind;
-	WRITE(newind, unsigned);
-	READ(oldind, unsigned);
+	WRITE(oldind, unsigned);
+	READ(newind, unsigned);
 	if (newind != oldind) {
 		arena_t *arena;
 
@@ -1141,8 +1129,6 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 		malloc_mutex_lock(&arenas_lock);
 		if ((arena = arenas[newind]) == NULL)
 			arena = arenas_extend(newind);
-		arenas[oldind]->nthreads--;
-		arenas[newind]->nthreads++;
 		malloc_mutex_unlock(&arenas_lock);
 		if (arena == NULL) {
 			ret = EAGAIN;
@@ -1151,13 +1137,6 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 
 		/* Set new arena association. */
 		ARENA_SET(arena);
-#ifdef JEMALLOC_TCACHE
-		{
-			tcache_t *tcache = TCACHE_GET();
-			if (tcache != NULL)
-				tcache->arena = arena;
-		}
-#endif
 	}
 
 	ret = 0;
@@ -1167,9 +1146,9 @@ RETURN:
 
 #ifdef JEMALLOC_STATS
 CTL_RO_NL_GEN(thread_allocated, ALLOCATED_GET(), uint64_t);
-CTL_RO_NL_GEN(thread_allocatedp, ALLOCATEDP_GET(), uint64_t *);
+CTL_RO_NL_GEN(thread_allocatedp, &ALLOCATED_GET(), uint64_t *);
 CTL_RO_NL_GEN(thread_deallocated, DEALLOCATED_GET(), uint64_t);
-CTL_RO_NL_GEN(thread_deallocatedp, DEALLOCATEDP_GET(), uint64_t *);
+CTL_RO_NL_GEN(thread_deallocatedp, &DEALLOCATED_GET(), uint64_t *);
 #endif
 
 /******************************************************************************/
@@ -1305,9 +1284,9 @@ CTL_RO_NL_GEN(opt_overcommit, opt_overcommit, bool)
 
 /******************************************************************************/
 
-CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t)
-CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t)
-CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t)
+CTL_RO_NL_GEN(arenas_bin_i_size, arenas[0]->bins[mib[2]].reg_size, size_t)
+CTL_RO_NL_GEN(arenas_bin_i_nregs, arenas[0]->bins[mib[2]].nregs, uint32_t)
+CTL_RO_NL_GEN(arenas_bin_i_run_size, arenas[0]->bins[mib[2]].run_size, size_t)
 const ctl_node_t *
 arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i)
 {
@@ -1552,7 +1531,6 @@ stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j)
 }
 
 #endif
-CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned)
 CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t)
 CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t)
 #ifdef JEMALLOC_STATS
@@ -1584,7 +1562,6 @@ RETURN:
 }
 
 #ifdef JEMALLOC_STATS
-CTL_RO_GEN(stats_cactive, &stats_cactive, size_t *)
 CTL_RO_GEN(stats_allocated, ctl_stats.allocated, size_t)
 CTL_RO_GEN(stats_active, ctl_stats.active, size_t)
 CTL_RO_GEN(stats_mapped, ctl_stats.mapped, size_t)
diff --git a/dep/jemalloc/src/hash.c b/dep/jemalloc/src/hash.c
index cfa4da0275c..6a13d7a03c0 100644
--- a/dep/jemalloc/src/hash.c
+++ b/dep/jemalloc/src/hash.c
@@ -1,2 +1,2 @@
-#define	JEMALLOC_HASH_C_
+#define	HASH_C_
 #include "jemalloc/internal/jemalloc_internal.h"
diff --git a/dep/jemalloc/src/huge.c b/dep/jemalloc/src/huge.c
index a4f9b054ed5..0aadc4339a9 100644
--- a/dep/jemalloc/src/huge.c
+++ b/dep/jemalloc/src/huge.c
@@ -50,7 +50,6 @@ huge_malloc(size_t size, bool zero)
 	malloc_mutex_lock(&huge_mtx);
 	extent_tree_ad_insert(&huge, node);
 #ifdef JEMALLOC_STATS
-	stats_cactive_add(csize);
 	huge_nmalloc++;
 	huge_allocated += csize;
 #endif
@@ -84,7 +83,7 @@ huge_palloc(size_t size, size_t alignment, bool zero)
 	 * alignment, in order to assure the alignment can be achieved, then
 	 * unmap leading and trailing chunks.
 	 */
-	assert(alignment > chunksize);
+	assert(alignment >= chunksize);
 
 	chunk_size = CHUNK_CEILING(size);
 
@@ -110,12 +109,12 @@ huge_palloc(size_t size, size_t alignment, bool zero)
 	if (offset == 0) {
 		/* Trim trailing space. */
 		chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
-		    - chunk_size, true);
+		    - chunk_size);
 	} else {
 		size_t trailsize;
 
 		/* Trim leading space. */
-		chunk_dealloc(ret, alignment - offset, true);
+		chunk_dealloc(ret, alignment - offset);
 
 		ret = (void *)((uintptr_t)ret + (alignment - offset));
 
@@ -124,7 +123,7 @@ huge_palloc(size_t size, size_t alignment, bool zero)
 		    /* Trim trailing space. */
 		    assert(trailsize < alloc_size);
 		    chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
-			trailsize, true);
+			trailsize);
 		}
 	}
 
@@ -135,7 +134,6 @@ huge_palloc(size_t size, size_t alignment, bool zero)
 	malloc_mutex_lock(&huge_mtx);
 	extent_tree_ad_insert(&huge, node);
 #ifdef JEMALLOC_STATS
-	stats_cactive_add(chunk_size);
 	huge_nmalloc++;
 	huge_allocated += chunk_size;
 #endif
@@ -194,7 +192,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
 	 * different size class.  In that case, fall back to allocating new
 	 * space and copying.
 	 */
-	if (alignment > chunksize)
+	if (alignment != 0)
 		ret = huge_palloc(size + extra, alignment, zero);
 	else
 		ret = huge_malloc(size + extra, zero);
@@ -203,7 +201,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
 		if (extra == 0)
 			return (NULL);
 		/* Try again, this time without extra. */
-		if (alignment > chunksize)
+		if (alignment != 0)
 			ret = huge_palloc(size, alignment, zero);
 		else
 			ret = huge_malloc(size, zero);
@@ -234,13 +232,6 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
 	    ) {
 		size_t newsize = huge_salloc(ret);
 
-		/*
-		 * Remove ptr from the tree of huge allocations before
-		 * performing the remap operation, in order to avoid the
-		 * possibility of another thread acquiring that mapping before
-		 * this one removes it from the tree.
-		 */
-		huge_dalloc(ptr, false);
 		if (mremap(ptr, oldsize, newsize, MREMAP_MAYMOVE|MREMAP_FIXED,
 		    ret) == MAP_FAILED) {
 			/*
@@ -260,8 +251,9 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
 			if (opt_abort)
 				abort();
 			memcpy(ret, ptr, copysize);
-			chunk_dealloc_mmap(ptr, oldsize);
-		}
+			idalloc(ptr);
+		} else
+			huge_dalloc(ptr, false);
 	} else
 #endif
 	{
@@ -286,7 +278,6 @@ huge_dalloc(void *ptr, bool unmap)
 	extent_tree_ad_remove(&huge, node);
 
 #ifdef JEMALLOC_STATS
-	stats_cactive_sub(node->size);
 	huge_ndalloc++;
 	huge_allocated -= node->size;
 #endif
@@ -301,10 +292,9 @@ huge_dalloc(void *ptr, bool unmap)
 			memset(node->addr, 0x5a, node->size);
 #endif
 #endif
+		chunk_dealloc(node->addr, node->size);
 	}
 
-	chunk_dealloc(node->addr, node->size, unmap);
-
 	base_node_dealloc(node);
 }
 
diff --git a/dep/jemalloc/src/jemalloc.c b/dep/jemalloc/src/jemalloc.c
index a161c2e26e1..2aebc51dd19 100644
--- a/dep/jemalloc/src/jemalloc.c
+++ b/dep/jemalloc/src/jemalloc.c
@@ -7,10 +7,12 @@
 malloc_mutex_t		arenas_lock;
 arena_t			**arenas;
 unsigned		narenas;
+static unsigned		next_arena;
 
-pthread_key_t		arenas_tsd;
 #ifndef NO_TLS
 __thread arena_t	*arenas_tls JEMALLOC_ATTR(tls_model("initial-exec"));
+#else
+pthread_key_t		arenas_tsd;
 #endif
 
 #ifdef JEMALLOC_STATS
@@ -28,13 +30,7 @@ static bool		malloc_initialized = false;
 static pthread_t	malloc_initializer = (unsigned long)0;
 
 /* Used to avoid initialization races. */
-static malloc_mutex_t	init_lock =
-#ifdef JEMALLOC_OSSPIN
-    0
-#else
-    MALLOC_MUTEX_INITIALIZER
-#endif
-    ;
+static malloc_mutex_t	init_lock = MALLOC_MUTEX_INITIALIZER;
 
 #ifdef DYNAMIC_PAGE_SHIFT
 size_t		pagesize;
@@ -74,7 +70,6 @@ size_t	opt_narenas = 0;
 static void	wrtmessage(void *cbopaque, const char *s);
 static void	stats_print_atexit(void);
 static unsigned	malloc_ncpus(void);
-static void	arenas_cleanup(void *arg);
 #if (defined(JEMALLOC_STATS) && defined(NO_TLS))
 static void	thread_allocated_cleanup(void *arg);
 #endif
@@ -84,7 +79,6 @@ static void	malloc_conf_error(const char *msg, const char *k, size_t klen,
     const char *v, size_t vlen);
 static void	malloc_conf_init(void);
 static bool	malloc_init_hard(void);
-static int	imemalign(void **memptr, size_t alignment, size_t size);
 
 /******************************************************************************/
 /* malloc_message() setup. */
@@ -153,53 +147,13 @@ choose_arena_hard(void)
 	arena_t *ret;
 
 	if (narenas > 1) {
-		unsigned i, choose, first_null;
-
-		choose = 0;
-		first_null = narenas;
 		malloc_mutex_lock(&arenas_lock);
-		assert(arenas[0] != NULL);
-		for (i = 1; i < narenas; i++) {
-			if (arenas[i] != NULL) {
-				/*
-				 * Choose the first arena that has the lowest
-				 * number of threads assigned to it.
-				 */
-				if (arenas[i]->nthreads <
-				    arenas[choose]->nthreads)
-					choose = i;
-			} else if (first_null == narenas) {
-				/*
-				 * Record the index of the first uninitialized
-				 * arena, in case all extant arenas are in use.
-				 *
-				 * NB: It is possible for there to be
-				 * discontinuities in terms of initialized
-				 * versus uninitialized arenas, due to the
-				 * "thread.arena" mallctl.
-				 */
-				first_null = i;
-			}
-		}
-
-		if (arenas[choose] == 0 || first_null == narenas) {
-			/*
-			 * Use an unloaded arena, or the least loaded arena if
-			 * all arenas are already initialized.
-			 */
-			ret = arenas[choose];
-		} else {
-			/* Initialize a new arena. */
-			ret = arenas_extend(first_null);
-		}
-		ret->nthreads++;
+		if ((ret = arenas[next_arena]) == NULL)
+			ret = arenas_extend(next_arena);
+		next_arena = (next_arena + 1) % narenas;
 		malloc_mutex_unlock(&arenas_lock);
-	} else {
+	} else
 		ret = arenas[0];
-		malloc_mutex_lock(&arenas_lock);
-		ret->nthreads++;
-		malloc_mutex_unlock(&arenas_lock);
-	}
 
 	ARENA_SET(ret);
 
@@ -259,28 +213,6 @@ stats_print_atexit(void)
 	JEMALLOC_P(malloc_stats_print)(NULL, NULL, NULL);
 }
 
-#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
-thread_allocated_t *
-thread_allocated_get_hard(void)
-{
-	thread_allocated_t *thread_allocated = (thread_allocated_t *)
-	    imalloc(sizeof(thread_allocated_t));
-	if (thread_allocated == NULL) {
-		static thread_allocated_t static_thread_allocated = {0, 0};
-		malloc_write("<jemalloc>: Error allocating TSD;"
-		    " mallctl(\"thread.{de,}allocated[p]\", ...)"
-		    " will be inaccurate\n");
-		if (opt_abort)
-			abort();
-		return (&static_thread_allocated);
-	}
-	pthread_setspecific(thread_allocated_tsd, thread_allocated);
-	thread_allocated->allocated = 0;
-	thread_allocated->deallocated = 0;
-	return (thread_allocated);
-}
-#endif
-
 /*
  * End miscellaneous support functions.
  */
@@ -305,16 +237,6 @@ malloc_ncpus(void)
 	return (ret);
 }
 
-static void
-arenas_cleanup(void *arg)
-{
-	arena_t *arena = (arena_t *)arg;
-
-	malloc_mutex_lock(&arenas_lock);
-	arena->nthreads--;
-	malloc_mutex_unlock(&arenas_lock);
-}
-
 #if (defined(JEMALLOC_STATS) && defined(NO_TLS))
 static void
 thread_allocated_cleanup(void *arg)
@@ -499,8 +421,8 @@ malloc_conf_init(void)
 			if ((opts = getenv(envname)) != NULL) {
 				/*
 				 * Do nothing; opts is already initialized to
-				 * the value of the MALLOC_CONF environment
-				 * variable.
+				 * the value of the JEMALLOC_OPTIONS
+				 * environment variable.
 				 */
 			} else {
 				/* No configuration specified. */
@@ -689,7 +611,7 @@ malloc_init_hard(void)
 
 		result = sysconf(_SC_PAGESIZE);
 		assert(result != -1);
-		pagesize = (size_t)result;
+		pagesize = (unsigned)result;
 
 		/*
 		 * We assume that pagesize is a power of 2 when calculating
@@ -749,10 +671,7 @@ malloc_init_hard(void)
 	}
 
 #ifdef JEMALLOC_TCACHE
-	if (tcache_boot()) {
-		malloc_mutex_unlock(&init_lock);
-		return (true);
-	}
+	tcache_boot();
 #endif
 
 	if (huge_boot()) {
@@ -769,14 +688,6 @@ malloc_init_hard(void)
 	}
 #endif
 
-	if (malloc_mutex_init(&arenas_lock))
-		return (true);
-
-	if (pthread_key_create(&arenas_tsd, arenas_cleanup) != 0) {
-		malloc_mutex_unlock(&init_lock);
-		return (true);
-	}
-
 	/*
 	 * Create enough scaffolding to allow recursive allocation in
 	 * malloc_ncpus().
@@ -801,7 +712,8 @@ malloc_init_hard(void)
 	 * threaded mode.
 	 */
 	ARENA_SET(arenas[0]);
-	arenas[0]->nthreads++;
+
+	malloc_mutex_init(&arenas_lock);
 
 #ifdef JEMALLOC_PROF
 	if (prof_boot2()) {
@@ -841,6 +753,15 @@ malloc_init_hard(void)
 		malloc_write(")\n");
 	}
 
+	next_arena = (narenas > 0) ? 1 : 0;
+
+#ifdef NO_TLS
+	if (pthread_key_create(&arenas_tsd, NULL) != 0) {
+		malloc_mutex_unlock(&init_lock);
+		return (true);
+	}
+#endif
+
 	/* Allocate and initialize arenas. */
 	arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
 	if (arenas == NULL) {
@@ -872,6 +793,7 @@ malloc_init_hard(void)
 	return (false);
 }
 
+
 #ifdef JEMALLOC_ZONE
 JEMALLOC_ATTR(constructor)
 void
@@ -940,8 +862,7 @@ JEMALLOC_P(malloc)(size_t size)
 #ifdef JEMALLOC_PROF
 	if (opt_prof) {
 		usize = s2u(size);
-		PROF_ALLOC_PREP(1, usize, cnt);
-		if (cnt == NULL) {
+		if ((cnt = prof_alloc_prep(usize)) == NULL) {
 			ret = NULL;
 			goto OOM;
 		}
@@ -990,23 +911,19 @@ RETURN:
 }
 
 JEMALLOC_ATTR(nonnull(1))
-#ifdef JEMALLOC_PROF
-/*
- * Avoid any uncertainty as to how many backtrace frames to ignore in 
- * PROF_ALLOC_PREP().
- */
-JEMALLOC_ATTR(noinline)
-#endif
-static int
-imemalign(void **memptr, size_t alignment, size_t size)
+JEMALLOC_ATTR(visibility("default"))
+int
+JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
 {
 	int ret;
+	void *result;
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
 	size_t usize
-#ifdef JEMALLOC_CC_SILENCE
+#  ifdef JEMALLOC_CC_SILENCE
 	    = 0
-#endif
+#  endif
 	    ;
-	void *result;
+#endif
 #ifdef JEMALLOC_PROF
 	prof_thr_cnt_t *cnt
 #  ifdef JEMALLOC_CC_SILENCE
@@ -1056,38 +973,34 @@ imemalign(void **memptr, size_t alignment, size_t size)
 			goto RETURN;
 		}
 
-		usize = sa2u(size, alignment, NULL);
-		if (usize == 0) {
-			result = NULL;
-			ret = ENOMEM;
-			goto RETURN;
-		}
-
 #ifdef JEMALLOC_PROF
 		if (opt_prof) {
-			PROF_ALLOC_PREP(2, usize, cnt);
-			if (cnt == NULL) {
+			usize = sa2u(size, alignment, NULL);
+			if ((cnt = prof_alloc_prep(usize)) == NULL) {
 				result = NULL;
 				ret = EINVAL;
 			} else {
 				if (prof_promote && (uintptr_t)cnt !=
 				    (uintptr_t)1U && usize <= small_maxclass) {
-					assert(sa2u(small_maxclass+1,
-					    alignment, NULL) != 0);
-					result = ipalloc(sa2u(small_maxclass+1,
-					    alignment, NULL), alignment, false);
+					result = ipalloc(small_maxclass+1,
+					    alignment, false);
 					if (result != NULL) {
 						arena_prof_promoted(result,
 						    usize);
 					}
 				} else {
-					result = ipalloc(usize, alignment,
+					result = ipalloc(size, alignment,
 					    false);
 				}
 			}
 		} else
 #endif
-			result = ipalloc(usize, alignment, false);
+		{
+#ifdef JEMALLOC_STATS
+			usize = sa2u(size, alignment, NULL);
+#endif
+			result = ipalloc(size, alignment, false);
+		}
 	}
 
 	if (result == NULL) {
@@ -1119,15 +1032,6 @@ RETURN:
 	return (ret);
 }
 
-JEMALLOC_ATTR(nonnull(1))
-JEMALLOC_ATTR(visibility("default"))
-int
-JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
-{
-
-	return imemalign(memptr, alignment, size);
-}
-
 JEMALLOC_ATTR(malloc)
 JEMALLOC_ATTR(visibility("default"))
 void *
@@ -1183,8 +1087,7 @@ JEMALLOC_P(calloc)(size_t num, size_t size)
 #ifdef JEMALLOC_PROF
 	if (opt_prof) {
 		usize = s2u(num_size);
-		PROF_ALLOC_PREP(1, usize, cnt);
-		if (cnt == NULL) {
+		if ((cnt = prof_alloc_prep(usize)) == NULL) {
 			ret = NULL;
 			goto RETURN;
 		}
@@ -1297,9 +1200,7 @@ JEMALLOC_P(realloc)(void *ptr, size_t size)
 		if (opt_prof) {
 			usize = s2u(size);
 			old_ctx = prof_ctx_get(ptr);
-			PROF_ALLOC_PREP(1, usize, cnt);
-			if (cnt == NULL) {
-				old_ctx = NULL;
+			if ((cnt = prof_alloc_prep(usize)) == NULL) {
 				ret = NULL;
 				goto OOM;
 			}
@@ -1309,13 +1210,8 @@ JEMALLOC_P(realloc)(void *ptr, size_t size)
 				    false, false);
 				if (ret != NULL)
 					arena_prof_promoted(ret, usize);
-				else
-					old_ctx = NULL;
-			} else {
+			} else
 				ret = iralloc(ptr, size, 0, 0, false, false);
-				if (ret == NULL)
-					old_ctx = NULL;
-			}
 		} else
 #endif
 		{
@@ -1353,8 +1249,7 @@ OOM:
 #ifdef JEMALLOC_PROF
 			if (opt_prof) {
 				usize = s2u(size);
-				PROF_ALLOC_PREP(1, usize, cnt);
-				if (cnt == NULL)
+				if ((cnt = prof_alloc_prep(usize)) == NULL)
 					ret = NULL;
 				else {
 					if (prof_promote && (uintptr_t)cnt !=
@@ -1459,7 +1354,7 @@ JEMALLOC_P(memalign)(size_t alignment, size_t size)
 #ifdef JEMALLOC_CC_SILENCE
 	int result =
 #endif
-	    imemalign(&ret, alignment, size);
+	    JEMALLOC_P(posix_memalign)(&ret, alignment, size);
 #ifdef JEMALLOC_CC_SILENCE
 	if (result != 0)
 		return (NULL);
@@ -1478,7 +1373,7 @@ JEMALLOC_P(valloc)(size_t size)
 #ifdef JEMALLOC_CC_SILENCE
 	int result =
 #endif
-	    imemalign(&ret, PAGE_SIZE, size);
+	    JEMALLOC_P(posix_memalign)(&ret, PAGE_SIZE, size);
 #ifdef JEMALLOC_CC_SILENCE
 	if (result != 0)
 		return (NULL);
@@ -1559,18 +1454,15 @@ JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp,
 }
 
 JEMALLOC_INLINE void *
-iallocm(size_t usize, size_t alignment, bool zero)
+iallocm(size_t size, size_t alignment, bool zero)
 {
 
-	assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize, alignment,
-	    NULL)));
-
 	if (alignment != 0)
-		return (ipalloc(usize, alignment, zero));
+		return (ipalloc(size, alignment, zero));
 	else if (zero)
-		return (icalloc(usize));
+		return (icalloc(size));
 	else
-		return (imalloc(usize));
+		return (imalloc(size));
 }
 
 JEMALLOC_ATTR(nonnull(1))
@@ -1593,43 +1485,38 @@ JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags)
 	if (malloc_init())
 		goto OOM;
 
-	usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, NULL);
-	if (usize == 0)
-		goto OOM;
-
 #ifdef JEMALLOC_PROF
 	if (opt_prof) {
-		PROF_ALLOC_PREP(1, usize, cnt);
-		if (cnt == NULL)
+		usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment,
+		    NULL);
+		if ((cnt = prof_alloc_prep(usize)) == NULL)
 			goto OOM;
 		if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <=
 		    small_maxclass) {
-			size_t usize_promoted = (alignment == 0) ?
-			    s2u(small_maxclass+1) : sa2u(small_maxclass+1,
-			    alignment, NULL);
-			assert(usize_promoted != 0);
-			p = iallocm(usize_promoted, alignment, zero);
+			p = iallocm(small_maxclass+1, alignment, zero);
 			if (p == NULL)
 				goto OOM;
 			arena_prof_promoted(p, usize);
 		} else {
-			p = iallocm(usize, alignment, zero);
+			p = iallocm(size, alignment, zero);
 			if (p == NULL)
 				goto OOM;
 		}
-		prof_malloc(p, usize, cnt);
+
 		if (rsize != NULL)
 			*rsize = usize;
 	} else
 #endif
 	{
-		p = iallocm(usize, alignment, zero);
+		p = iallocm(size, alignment, zero);
 		if (p == NULL)
 			goto OOM;
 #ifndef JEMALLOC_STATS
 		if (rsize != NULL)
 #endif
 		{
+			usize = (alignment == 0) ? s2u(size) : sa2u(size,
+			    alignment, NULL);
 #ifdef JEMALLOC_STATS
 			if (rsize != NULL)
 #endif
@@ -1672,6 +1559,7 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra,
 	bool no_move = flags & ALLOCM_NO_MOVE;
 #ifdef JEMALLOC_PROF
 	prof_thr_cnt_t *cnt;
+	prof_ctx_t *old_ctx;
 #endif
 
 	assert(ptr != NULL);
@@ -1686,33 +1574,25 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra,
 		/*
 		 * usize isn't knowable before iralloc() returns when extra is
 		 * non-zero.  Therefore, compute its maximum possible value and
-		 * use that in PROF_ALLOC_PREP() to decide whether to capture a
+		 * use that in prof_alloc_prep() to decide whether to capture a
 		 * backtrace.  prof_realloc() will use the actual usize to
 		 * decide whether to sample.
 		 */
 		size_t max_usize = (alignment == 0) ? s2u(size+extra) :
 		    sa2u(size+extra, alignment, NULL);
-		prof_ctx_t *old_ctx = prof_ctx_get(p);
 		old_size = isalloc(p);
-		PROF_ALLOC_PREP(1, max_usize, cnt);
-		if (cnt == NULL)
+		old_ctx = prof_ctx_get(p);
+		if ((cnt = prof_alloc_prep(max_usize)) == NULL)
 			goto OOM;
-		/*
-		 * Use minimum usize to determine whether promotion may happen.
-		 */
-		if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U
-		    && ((alignment == 0) ? s2u(size) : sa2u(size,
-		    alignment, NULL)) <= small_maxclass) {
+		if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && max_usize
+		    <= small_maxclass) {
 			q = iralloc(p, small_maxclass+1, (small_maxclass+1 >=
 			    size+extra) ? 0 : size+extra - (small_maxclass+1),
 			    alignment, zero, no_move);
 			if (q == NULL)
 				goto ERR;
-			if (max_usize < PAGE_SIZE) {
-				usize = max_usize;
-				arena_prof_promoted(q, usize);
-			} else
-				usize = isalloc(q);
+			usize = isalloc(q);
+			arena_prof_promoted(q, usize);
 		} else {
 			q = iralloc(p, size, extra, alignment, zero, no_move);
 			if (q == NULL)
@@ -1720,8 +1600,6 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra,
 			usize = isalloc(q);
 		}
 		prof_realloc(q, usize, cnt, old_size, old_ctx);
-		if (rsize != NULL)
-			*rsize = usize;
 	} else
 #endif
 	{
diff --git a/dep/jemalloc/src/mb.c b/dep/jemalloc/src/mb.c
index dc2c0a256fd..30a1a2e997a 100644
--- a/dep/jemalloc/src/mb.c
+++ b/dep/jemalloc/src/mb.c
@@ -1,2 +1,2 @@
-#define	JEMALLOC_MB_C_
+#define	MB_C_
 #include "jemalloc/internal/jemalloc_internal.h"
diff --git a/dep/jemalloc/src/mutex.c b/dep/jemalloc/src/mutex.c
index ca89ef1c962..3ecb18a340e 100644
--- a/dep/jemalloc/src/mutex.c
+++ b/dep/jemalloc/src/mutex.c
@@ -55,9 +55,6 @@ pthread_create(pthread_t *__restrict thread,
 bool
 malloc_mutex_init(malloc_mutex_t *mutex)
 {
-#ifdef JEMALLOC_OSSPIN
-	*mutex = 0;
-#else
 	pthread_mutexattr_t attr;
 
 	if (pthread_mutexattr_init(&attr) != 0)
@@ -73,7 +70,6 @@ malloc_mutex_init(malloc_mutex_t *mutex)
 	}
 	pthread_mutexattr_destroy(&attr);
 
-#endif
 	return (false);
 }
 
@@ -81,10 +77,8 @@ void
 malloc_mutex_destroy(malloc_mutex_t *mutex)
 {
 
-#ifndef JEMALLOC_OSSPIN
 	if (pthread_mutex_destroy(mutex) != 0) {
 		malloc_write("<jemalloc>: Error in pthread_mutex_destroy()\n");
 		abort();
 	}
-#endif
 }
diff --git a/dep/jemalloc/src/prof.c b/dep/jemalloc/src/prof.c
index 8a144b4e46c..636cccef52a 100644
--- a/dep/jemalloc/src/prof.c
+++ b/dep/jemalloc/src/prof.c
@@ -3,15 +3,15 @@
 #ifdef JEMALLOC_PROF
 /******************************************************************************/
 
+#ifdef JEMALLOC_PROF_LIBGCC
+#include <unwind.h>
+#endif
+
 #ifdef JEMALLOC_PROF_LIBUNWIND
 #define	UNW_LOCAL_ONLY
 #include <libunwind.h>
 #endif
 
-#ifdef JEMALLOC_PROF_LIBGCC
-#include <unwind.h>
-#endif
-
 /******************************************************************************/
 /* Data. */
 
@@ -169,7 +169,39 @@ prof_leave(void)
 		prof_gdump();
 }
 
-#ifdef JEMALLOC_PROF_LIBUNWIND
+#ifdef JEMALLOC_PROF_LIBGCC
+static _Unwind_Reason_Code
+prof_unwind_init_callback(struct _Unwind_Context *context, void *arg)
+{
+
+	return (_URC_NO_REASON);
+}
+
+static _Unwind_Reason_Code
+prof_unwind_callback(struct _Unwind_Context *context, void *arg)
+{
+	prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
+
+	if (data->nignore > 0)
+		data->nignore--;
+	else {
+		data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context);
+		data->bt->len++;
+		if (data->bt->len == data->max)
+			return (_URC_END_OF_STACK);
+	}
+
+	return (_URC_NO_REASON);
+}
+
+void
+prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
+{
+	prof_unwind_data_t data = {bt, nignore, max};
+
+	_Unwind_Backtrace(prof_unwind_callback, &data);
+}
+#elif defined(JEMALLOC_PROF_LIBUNWIND)
 void
 prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
 {
@@ -204,41 +236,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
 			break;
 	}
 }
-#endif
-#ifdef JEMALLOC_PROF_LIBGCC
-static _Unwind_Reason_Code
-prof_unwind_init_callback(struct _Unwind_Context *context, void *arg)
-{
-
-	return (_URC_NO_REASON);
-}
-
-static _Unwind_Reason_Code
-prof_unwind_callback(struct _Unwind_Context *context, void *arg)
-{
-	prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
-
-	if (data->nignore > 0)
-		data->nignore--;
-	else {
-		data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context);
-		data->bt->len++;
-		if (data->bt->len == data->max)
-			return (_URC_END_OF_STACK);
-	}
-
-	return (_URC_NO_REASON);
-}
-
-void
-prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
-{
-	prof_unwind_data_t data = {bt, nignore, max};
-
-	_Unwind_Backtrace(prof_unwind_callback, &data);
-}
-#endif
-#ifdef JEMALLOC_PROF_GCC
+#else
 void
 prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
 {
@@ -434,7 +432,6 @@ prof_lookup(prof_bt_t *bt)
 			prof_ctx_t	*p;
 			void		*v;
 		} ctx;
-		bool new_ctx;
 
 		/*
 		 * This thread's cache lacks bt.  Look for it in the global
@@ -471,26 +468,12 @@ prof_lookup(prof_bt_t *bt)
 				idalloc(ctx.v);
 				return (NULL);
 			}
-			/*
-			 * Artificially raise curobjs, in order to avoid a race
-			 * condition with prof_ctx_merge()/prof_ctx_destroy().
-			 *
-			 * No locking is necessary for ctx here because no other
-			 * threads have had the opportunity to fetch it from
-			 * bt2ctx yet.
-			 */
-			ctx.p->cnt_merged.curobjs++;
-			new_ctx = true;
-		} else {
-			/*
-			 * Artificially raise curobjs, in order to avoid a race
-			 * condition with prof_ctx_merge()/prof_ctx_destroy().
-			 */
-			malloc_mutex_lock(&ctx.p->lock);
-			ctx.p->cnt_merged.curobjs++;
-			malloc_mutex_unlock(&ctx.p->lock);
-			new_ctx = false;
 		}
+		/*
+		 * Acquire ctx's lock before releasing bt2ctx_mtx, in order to
+		 * avoid a race condition with prof_ctx_destroy().
+		 */
+		malloc_mutex_lock(&ctx.p->lock);
 		prof_leave();
 
 		/* Link a prof_thd_cnt_t into ctx for this thread. */
@@ -503,9 +486,8 @@ prof_lookup(prof_bt_t *bt)
 			 */
 			ret.p = ql_last(&prof_tdata->lru_ql, lru_link);
 			assert(ret.v != NULL);
-			if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt,
-			    NULL, NULL))
-				assert(false);
+			ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, NULL,
+			    NULL);
 			ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
 			prof_ctx_merge(ret.p->ctx, ret.p);
 			/* ret can now be re-used. */
@@ -516,8 +498,7 @@ prof_lookup(prof_bt_t *bt)
 			/* Allocate and partially initialize a new cnt. */
 			ret.v = imalloc(sizeof(prof_thr_cnt_t));
 			if (ret.p == NULL) {
-				if (new_ctx)
-					prof_ctx_destroy(ctx.p);
+				malloc_mutex_unlock(&ctx.p->lock);
 				return (NULL);
 			}
 			ql_elm_new(ret.p, cnts_link);
@@ -528,15 +509,12 @@ prof_lookup(prof_bt_t *bt)
 		ret.p->epoch = 0;
 		memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
 		if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) {
-			if (new_ctx)
-				prof_ctx_destroy(ctx.p);
+			malloc_mutex_unlock(&ctx.p->lock);
 			idalloc(ret.v);
 			return (NULL);
 		}
 		ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
-		malloc_mutex_lock(&ctx.p->lock);
 		ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link);
-		ctx.p->cnt_merged.curobjs--;
 		malloc_mutex_unlock(&ctx.p->lock);
 	} else {
 		/* Move ret to the front of the LRU. */
@@ -650,10 +628,11 @@ prof_ctx_destroy(prof_ctx_t *ctx)
 
 	/*
 	 * Check that ctx is still unused by any thread cache before destroying
-	 * it.  prof_lookup() artificially raises ctx->cnt_merge.curobjs in
-	 * order to avoid a race condition with this function, as does
-	 * prof_ctx_merge() in order to avoid a race between the main body of
-	 * prof_ctx_merge() and entry into this function.
+	 * it.  prof_lookup() interlocks bt2ctx_mtx and ctx->lock in order to
+	 * avoid a race condition with this function, and prof_ctx_merge()
+	 * artificially raises ctx->cnt_merged.curobjs in order to avoid a race
+	 * between the main body of prof_ctx_merge() and entry into this
+	 * function.
 	 */
 	prof_enter();
 	malloc_mutex_lock(&ctx->lock);
@@ -662,8 +641,7 @@ prof_ctx_destroy(prof_ctx_t *ctx)
 		assert(ctx->cnt_merged.accumobjs == 0);
 		assert(ctx->cnt_merged.accumbytes == 0);
 		/* Remove ctx from bt2ctx. */
-		if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL))
-			assert(false);
+		ckh_remove(&bt2ctx, ctx->bt, NULL, NULL);
 		prof_leave();
 		/* Destroy ctx. */
 		malloc_mutex_unlock(&ctx->lock);
@@ -671,10 +649,7 @@ prof_ctx_destroy(prof_ctx_t *ctx)
 		malloc_mutex_destroy(&ctx->lock);
 		idalloc(ctx);
 	} else {
-		/*
-		 * Compensate for increment in prof_ctx_merge() or
-		 * prof_lookup().
-		 */
+		/* Compensate for increment in prof_ctx_merge(). */
 		ctx->cnt_merged.curobjs--;
 		malloc_mutex_unlock(&ctx->lock);
 		prof_leave();
@@ -1081,7 +1056,7 @@ prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2)
 	} else {
 		ret1 = h;
 		ret2 = hash(bt->vec, bt->len * sizeof(void *),
-		    0x8432a476666bbc13LLU);
+		    0x8432a476666bbc13U);
 	}
 
 	*hash1 = ret1;
@@ -1118,6 +1093,7 @@ prof_tdata_init(void)
 
 	prof_tdata->vec = imalloc(sizeof(void *) * prof_bt_max);
 	if (prof_tdata->vec == NULL) {
+
 		ckh_delete(&prof_tdata->bt2cnt);
 		idalloc(prof_tdata);
 		return (NULL);
@@ -1135,26 +1111,33 @@ prof_tdata_init(void)
 static void
 prof_tdata_cleanup(void *arg)
 {
-	prof_thr_cnt_t *cnt;
-	prof_tdata_t *prof_tdata = (prof_tdata_t *)arg;
+	prof_tdata_t *prof_tdata;
 
-	/*
-	 * Delete the hash table.  All of its contents can still be iterated
-	 * over via the LRU.
-	 */
-	ckh_delete(&prof_tdata->bt2cnt);
+	prof_tdata = PROF_TCACHE_GET();
+	if (prof_tdata != NULL) {
+		prof_thr_cnt_t *cnt;
 
-	/* Iteratively merge cnt's into the global stats and delete them. */
-	while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) {
-		ql_remove(&prof_tdata->lru_ql, cnt, lru_link);
-		prof_ctx_merge(cnt->ctx, cnt);
-		idalloc(cnt);
-	}
+		/*
+		 * Delete the hash table.  All of its contents can still be
+		 * iterated over via the LRU.
+		 */
+		ckh_delete(&prof_tdata->bt2cnt);
 
-	idalloc(prof_tdata->vec);
+		/*
+		 * Iteratively merge cnt's into the global stats and delete
+		 * them.
+		 */
+		while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) {
+			prof_ctx_merge(cnt->ctx, cnt);
+			ql_remove(&prof_tdata->lru_ql, cnt, lru_link);
+			idalloc(cnt);
+		}
 
-	idalloc(prof_tdata);
-	PROF_TCACHE_SET(NULL);
+		idalloc(prof_tdata->vec);
+
+		idalloc(prof_tdata);
+		PROF_TCACHE_SET(NULL);
+	}
 }
 
 void
diff --git a/dep/jemalloc/src/rtree.c b/dep/jemalloc/src/rtree.c
index eb0ff1e24af..7753743c5e6 100644
--- a/dep/jemalloc/src/rtree.c
+++ b/dep/jemalloc/src/rtree.c
@@ -1,4 +1,4 @@
-#define	JEMALLOC_RTREE_C_
+#define	RTREE_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
 rtree_t *
@@ -20,10 +20,7 @@ rtree_new(unsigned bits)
 	memset(ret, 0, offsetof(rtree_t, level2bits) + (sizeof(unsigned) *
 	    height));
 
-	if (malloc_mutex_init(&ret->mutex)) {
-		/* Leak the rtree. */
-		return (NULL);
-	}
+	malloc_mutex_init(&ret->mutex);
 	ret->height = height;
 	if (bits_per_level * height > bits)
 		ret->level2bits[0] = bits % bits_per_level;
diff --git a/dep/jemalloc/src/stats.c b/dep/jemalloc/src/stats.c
index dc172e425c0..3dfe0d232a6 100644
--- a/dep/jemalloc/src/stats.c
+++ b/dep/jemalloc/src/stats.c
@@ -39,10 +39,6 @@
 
 bool	opt_stats_print = false;
 
-#ifdef JEMALLOC_STATS
-size_t	stats_cactive = 0;
-#endif
-
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
@@ -323,7 +319,6 @@ static void
 stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
     unsigned i)
 {
-	unsigned nthreads;
 	size_t pagesize, pactive, pdirty, mapped;
 	uint64_t npurge, nmadvise, purged;
 	size_t small_allocated;
@@ -333,9 +328,6 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 
 	CTL_GET("arenas.pagesize", &pagesize, size_t);
 
-	CTL_I_GET("stats.arenas.0.nthreads", &nthreads, unsigned);
-	malloc_cprintf(write_cb, cbopaque,
-	    "assigned threads: %u\n", nthreads);
 	CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t);
 	CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t);
 	CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t);
@@ -677,26 +669,21 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 #ifdef JEMALLOC_STATS
 	{
 		int err;
-		size_t sszp, ssz;
-		size_t *cactive;
+		size_t ssz;
 		size_t allocated, active, mapped;
 		size_t chunks_current, chunks_high, swap_avail;
 		uint64_t chunks_total;
 		size_t huge_allocated;
 		uint64_t huge_nmalloc, huge_ndalloc;
 
-		sszp = sizeof(size_t *);
 		ssz = sizeof(size_t);
 
-		CTL_GET("stats.cactive", &cactive, size_t *);
 		CTL_GET("stats.allocated", &allocated, size_t);
 		CTL_GET("stats.active", &active, size_t);
 		CTL_GET("stats.mapped", &mapped, size_t);
 		malloc_cprintf(write_cb, cbopaque,
-		    "Allocated: %zu, active: %zu, mapped: %zu\n",
-		    allocated, active, mapped);
-		malloc_cprintf(write_cb, cbopaque,
-		    "Current active ceiling: %zu\n", atomic_read_z(cactive));
+		    "Allocated: %zu, active: %zu, mapped: %zu\n", allocated,
+		    active, mapped);
 
 		/* Print chunk stats. */
 		CTL_GET("stats.chunks.total", &chunks_total, uint64_t);
@@ -748,7 +735,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 						ninitialized++;
 				}
 
-				if (ninitialized > 1 || unmerged == false) {
+				if (ninitialized > 1) {
 					/* Print merged arena stats. */
 					malloc_cprintf(write_cb, cbopaque,
 					    "\nMerged arenas stats:\n");
diff --git a/dep/jemalloc/src/tcache.c b/dep/jemalloc/src/tcache.c
index 31c329e1613..cbbe7a113a9 100644
--- a/dep/jemalloc/src/tcache.c
+++ b/dep/jemalloc/src/tcache.c
@@ -8,9 +8,6 @@ bool	opt_tcache = true;
 ssize_t	opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
 ssize_t	opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;
 
-tcache_bin_info_t	*tcache_bin_info;
-static unsigned		stack_nelms; /* Total stack elms per tcache. */
-
 /* Map of thread-specific caches. */
 #ifndef NO_TLS
 __thread tcache_t	*tcache_tls JEMALLOC_ATTR(tls_model("initial-exec"));
@@ -58,19 +55,18 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
 #endif
     )
 {
-	void *ptr;
+	void *flush, *deferred, *ptr;
 	unsigned i, nflush, ndeferred;
-#ifdef JEMALLOC_STATS
-	bool merged_stats = false;
-#endif
+	bool first_pass;
 
 	assert(binind < nbins);
 	assert(rem <= tbin->ncached);
+	assert(tbin->ncached > 0 || tbin->avail == NULL);
 
-	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
+	for (flush = tbin->avail, nflush = tbin->ncached - rem, first_pass =
+	    true; flush != NULL; flush = deferred, nflush = ndeferred) {
 		/* Lock the arena bin associated with the first object. */
-		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
-		    tbin->avail[0]);
+		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(flush);
 		arena_t *arena = chunk->arena;
 		arena_bin_t *bin = &arena->bins[binind];
 
@@ -86,17 +82,17 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
 		malloc_mutex_lock(&bin->lock);
 #ifdef JEMALLOC_STATS
 		if (arena == tcache->arena) {
-			assert(merged_stats == false);
-			merged_stats = true;
 			bin->stats.nflushes++;
 			bin->stats.nrequests += tbin->tstats.nrequests;
 			tbin->tstats.nrequests = 0;
 		}
 #endif
+		deferred = NULL;
 		ndeferred = 0;
 		for (i = 0; i < nflush; i++) {
-			ptr = tbin->avail[i];
+			ptr = flush;
 			assert(ptr != NULL);
+			flush = *(void **)ptr;
 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 			if (chunk->arena == arena) {
 				size_t pageind = ((uintptr_t)ptr -
@@ -111,31 +107,21 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
 				 * locked.  Stash the object, so that it can be
 				 * handled in a future pass.
 				 */
-				tbin->avail[ndeferred] = ptr;
+				*(void **)ptr = deferred;
+				deferred = ptr;
 				ndeferred++;
 			}
 		}
 		malloc_mutex_unlock(&bin->lock);
+
+		if (first_pass) {
+			tbin->avail = flush;
+			first_pass = false;
+		}
 	}
-#ifdef JEMALLOC_STATS
-	if (merged_stats == false) {
-		/*
-		 * The flush loop didn't happen to flush to this thread's
-		 * arena, so the stats didn't get merged.  Manually do so now.
-		 */
-		arena_bin_t *bin = &tcache->arena->bins[binind];
-		malloc_mutex_lock(&bin->lock);
-		bin->stats.nflushes++;
-		bin->stats.nrequests += tbin->tstats.nrequests;
-		tbin->tstats.nrequests = 0;
-		malloc_mutex_unlock(&bin->lock);
-	}
-#endif
 
-	memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
-	    rem * sizeof(void *));
 	tbin->ncached = rem;
-	if ((int)tbin->ncached < tbin->low_water)
+	if (tbin->ncached < tbin->low_water)
 		tbin->low_water = tbin->ncached;
 }
 
@@ -146,19 +132,18 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
 #endif
     )
 {
-	void *ptr;
+	void *flush, *deferred, *ptr;
 	unsigned i, nflush, ndeferred;
-#ifdef JEMALLOC_STATS
-	bool merged_stats = false;
-#endif
+	bool first_pass;
 
 	assert(binind < nhbins);
 	assert(rem <= tbin->ncached);
+	assert(tbin->ncached > 0 || tbin->avail == NULL);
 
-	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
+	for (flush = tbin->avail, nflush = tbin->ncached - rem, first_pass =
+	    true; flush != NULL; flush = deferred, nflush = ndeferred) {
 		/* Lock the arena associated with the first object. */
-		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
-		    tbin->avail[0]);
+		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(flush);
 		arena_t *arena = chunk->arena;
 
 		malloc_mutex_lock(&arena->lock);
@@ -170,7 +155,6 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
 			tcache->prof_accumbytes = 0;
 #endif
 #ifdef JEMALLOC_STATS
-			merged_stats = true;
 			arena->stats.nrequests_large += tbin->tstats.nrequests;
 			arena->stats.lstats[binind - nbins].nrequests +=
 			    tbin->tstats.nrequests;
@@ -179,10 +163,12 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
 #if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
 		}
 #endif
+		deferred = NULL;
 		ndeferred = 0;
 		for (i = 0; i < nflush; i++) {
-			ptr = tbin->avail[i];
+			ptr = flush;
 			assert(ptr != NULL);
+			flush = *(void **)ptr;
 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 			if (chunk->arena == arena)
 				arena_dalloc_large(arena, chunk, ptr);
@@ -193,32 +179,21 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
 				 * Stash the object, so that it can be handled
 				 * in a future pass.
 				 */
-				tbin->avail[ndeferred] = ptr;
+				*(void **)ptr = deferred;
+				deferred = ptr;
 				ndeferred++;
 			}
 		}
 		malloc_mutex_unlock(&arena->lock);
+
+		if (first_pass) {
+			tbin->avail = flush;
+			first_pass = false;
+		}
 	}
-#ifdef JEMALLOC_STATS
-	if (merged_stats == false) {
-		/*
-		 * The flush loop didn't happen to flush to this thread's
-		 * arena, so the stats didn't get merged.  Manually do so now.
-		 */
-		arena_t *arena = tcache->arena;
-		malloc_mutex_lock(&arena->lock);
-		arena->stats.nrequests_large += tbin->tstats.nrequests;
-		arena->stats.lstats[binind - nbins].nrequests +=
-		    tbin->tstats.nrequests;
-		tbin->tstats.nrequests = 0;
-		malloc_mutex_unlock(&arena->lock);
-	}
-#endif
 
-	memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
-	    rem * sizeof(void *));
 	tbin->ncached = rem;
-	if ((int)tbin->ncached < tbin->low_water)
+	if (tbin->ncached < tbin->low_water)
 		tbin->low_water = tbin->ncached;
 }
 
@@ -226,14 +201,10 @@ tcache_t *
 tcache_create(arena_t *arena)
 {
 	tcache_t *tcache;
-	size_t size, stack_offset;
+	size_t size;
 	unsigned i;
 
 	size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins);
-	/* Naturally align the pointer stacks. */
-	size = PTR_CEILING(size);
-	stack_offset = size;
-	size += stack_nelms * sizeof(void *);
 	/*
 	 * Round up to the nearest multiple of the cacheline size, in order to
 	 * avoid the possibility of false cacheline sharing.
@@ -246,8 +217,6 @@ tcache_create(arena_t *arena)
 
 	if (size <= small_maxclass)
 		tcache = (tcache_t *)arena_malloc_small(arena, size, true);
-	else if (size <= tcache_maxclass)
-		tcache = (tcache_t *)arena_malloc_large(arena, size, true);
 	else
 		tcache = (tcache_t *)icalloc(size);
 
@@ -264,12 +233,15 @@ tcache_create(arena_t *arena)
 
 	tcache->arena = arena;
 	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
-	for (i = 0; i < nhbins; i++) {
-		tcache->tbins[i].lg_fill_div = 1;
-		tcache->tbins[i].avail = (void **)((uintptr_t)tcache +
-		    (uintptr_t)stack_offset);
-		stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
+	for (i = 0; i < nbins; i++) {
+		if ((arena->bins[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) {
+			tcache->tbins[i].ncached_max = (arena->bins[i].nregs <<
+			    1);
+		} else
+			tcache->tbins[i].ncached_max = TCACHE_NSLOTS_SMALL_MAX;
 	}
+	for (; i < nhbins; i++)
+		tcache->tbins[i].ncached_max = TCACHE_NSLOTS_LARGE;
 
 	TCACHE_SET(tcache);
 
@@ -280,7 +252,6 @@ void
 tcache_destroy(tcache_t *tcache)
 {
 	unsigned i;
-	size_t tcache_size;
 
 #ifdef JEMALLOC_STATS
 	/* Unlink from list of extant tcaches. */
@@ -337,8 +308,7 @@ tcache_destroy(tcache_t *tcache)
 	}
 #endif
 
-	tcache_size = arena_salloc(tcache);
-	if (tcache_size <= small_maxclass) {
+	if (arena_salloc(tcache) <= small_maxclass) {
 		arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
 		arena_t *arena = chunk->arena;
 		size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >>
@@ -352,13 +322,6 @@ tcache_destroy(tcache_t *tcache)
 		malloc_mutex_lock(&bin->lock);
 		arena_dalloc_bin(arena, chunk, tcache, mapelm);
 		malloc_mutex_unlock(&bin->lock);
-	} else if (tcache_size <= tcache_maxclass) {
-		arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
-		arena_t *arena = chunk->arena;
-
-		malloc_mutex_lock(&arena->lock);
-		arena_dalloc_large(arena, chunk, tcache);
-		malloc_mutex_unlock(&arena->lock);
 	} else
 		idalloc(tcache);
 }
@@ -415,13 +378,11 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena)
 }
 #endif
 
-bool
+void
 tcache_boot(void)
 {
 
 	if (opt_tcache) {
-		unsigned i;
-
 		/*
 		 * If necessary, clamp opt_lg_tcache_max, now that
 		 * small_maxclass and arena_maxclass are known.
@@ -436,28 +397,6 @@ tcache_boot(void)
 
 		nhbins = nbins + (tcache_maxclass >> PAGE_SHIFT);
 
-		/* Initialize tcache_bin_info. */
-		tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins *
-		    sizeof(tcache_bin_info_t));
-		if (tcache_bin_info == NULL)
-			return (true);
-		stack_nelms = 0;
-		for (i = 0; i < nbins; i++) {
-			if ((arena_bin_info[i].nregs << 1) <=
-			    TCACHE_NSLOTS_SMALL_MAX) {
-				tcache_bin_info[i].ncached_max =
-				    (arena_bin_info[i].nregs << 1);
-			} else {
-				tcache_bin_info[i].ncached_max =
-				    TCACHE_NSLOTS_SMALL_MAX;
-			}
-			stack_nelms += tcache_bin_info[i].ncached_max;
-		}
-		for (; i < nhbins; i++) {
-			tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE;
-			stack_nelms += tcache_bin_info[i].ncached_max;
-		}
-
 		/* Compute incremental GC event threshold. */
 		if (opt_lg_tcache_gc_sweep >= 0) {
 			tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) /
@@ -473,8 +412,6 @@ tcache_boot(void)
 			abort();
 		}
 	}
-
-	return (false);
 }
 /******************************************************************************/
 #endif /* JEMALLOC_TCACHE */
author	click <click@gonnamakeyou.com>	2012-04-23 20:23:30 +0200
committer	click <click@gonnamakeyou.com>	2012-04-23 20:23:30 +0200
commit	c4123289916daa7bd1c7feb191e8c647fd17b163 (patch)
tree	d6235b61f7ead417757273184acb27a252a96bc5 /dep/jemalloc/src
parent	5da5021464c649d84c755a921eae43519eba8567 (diff)