1 files changed, 96 insertions, 79 deletions
diff --git a/dep/jemalloc/src/prof.c b/dep/jemalloc/src/prof.c
index 636cccef52a..8a144b4e46c 100644
--- a/dep/jemalloc/src/prof.c
+++ b/dep/jemalloc/src/prof.c
@@ -3,15 +3,15 @@
 #ifdef JEMALLOC_PROF
 /******************************************************************************/
 
-#ifdef JEMALLOC_PROF_LIBGCC
-#include <unwind.h>
-#endif
-
 #ifdef JEMALLOC_PROF_LIBUNWIND
 #define	UNW_LOCAL_ONLY
 #include <libunwind.h>
 #endif
 
+#ifdef JEMALLOC_PROF_LIBGCC
+#include <unwind.h>
+#endif
+
 /******************************************************************************/
 /* Data. */
 
@@ -169,39 +169,7 @@ prof_leave(void)
 		prof_gdump();
 }
 
-#ifdef JEMALLOC_PROF_LIBGCC
-static _Unwind_Reason_Code
-prof_unwind_init_callback(struct _Unwind_Context *context, void *arg)
-{
-
-	return (_URC_NO_REASON);
-}
-
-static _Unwind_Reason_Code
-prof_unwind_callback(struct _Unwind_Context *context, void *arg)
-{
-	prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
-
-	if (data->nignore > 0)
-		data->nignore--;
-	else {
-		data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context);
-		data->bt->len++;
-		if (data->bt->len == data->max)
-			return (_URC_END_OF_STACK);
-	}
-
-	return (_URC_NO_REASON);
-}
-
-void
-prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
-{
-	prof_unwind_data_t data = {bt, nignore, max};
-
-	_Unwind_Backtrace(prof_unwind_callback, &data);
-}
-#elif defined(JEMALLOC_PROF_LIBUNWIND)
+#ifdef JEMALLOC_PROF_LIBUNWIND
 void
 prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
 {
@@ -236,7 +204,41 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
 			break;
 	}
 }
-#else
+#endif
+#ifdef JEMALLOC_PROF_LIBGCC
+static _Unwind_Reason_Code
+prof_unwind_init_callback(struct _Unwind_Context *context, void *arg)
+{
+
+	return (_URC_NO_REASON);
+}
+
+static _Unwind_Reason_Code
+prof_unwind_callback(struct _Unwind_Context *context, void *arg)
+{
+	prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
+
+	if (data->nignore > 0)
+		data->nignore--;
+	else {
+		data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context);
+		data->bt->len++;
+		if (data->bt->len == data->max)
+			return (_URC_END_OF_STACK);
+	}
+
+	return (_URC_NO_REASON);
+}
+
+void
+prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
+{
+	prof_unwind_data_t data = {bt, nignore, max};
+
+	_Unwind_Backtrace(prof_unwind_callback, &data);
+}
+#endif
+#ifdef JEMALLOC_PROF_GCC
 void
 prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
 {
@@ -432,6 +434,7 @@ prof_lookup(prof_bt_t *bt)
 			prof_ctx_t	*p;
 			void		*v;
 		} ctx;
+		bool new_ctx;
 
 		/*
 		 * This thread's cache lacks bt.  Look for it in the global
@@ -468,12 +471,26 @@ prof_lookup(prof_bt_t *bt)
 				idalloc(ctx.v);
 				return (NULL);
 			}
+			/*
+			 * Artificially raise curobjs, in order to avoid a race
+			 * condition with prof_ctx_merge()/prof_ctx_destroy().
+			 *
+			 * No locking is necessary for ctx here because no other
+			 * threads have had the opportunity to fetch it from
+			 * bt2ctx yet.
+			 */
+			ctx.p->cnt_merged.curobjs++;
+			new_ctx = true;
+		} else {
+			/*
+			 * Artificially raise curobjs, in order to avoid a race
+			 * condition with prof_ctx_merge()/prof_ctx_destroy().
+			 */
+			malloc_mutex_lock(&ctx.p->lock);
+			ctx.p->cnt_merged.curobjs++;
+			malloc_mutex_unlock(&ctx.p->lock);
+			new_ctx = false;
 		}
-		/*
-		 * Acquire ctx's lock before releasing bt2ctx_mtx, in order to
-		 * avoid a race condition with prof_ctx_destroy().
-		 */
-		malloc_mutex_lock(&ctx.p->lock);
 		prof_leave();
 
 		/* Link a prof_thd_cnt_t into ctx for this thread. */
@@ -486,8 +503,9 @@ prof_lookup(prof_bt_t *bt)
 			 */
 			ret.p = ql_last(&prof_tdata->lru_ql, lru_link);
 			assert(ret.v != NULL);
-			ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, NULL,
-			    NULL);
+			if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt,
+			    NULL, NULL))
+				assert(false);
 			ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
 			prof_ctx_merge(ret.p->ctx, ret.p);
 			/* ret can now be re-used. */
@@ -498,7 +516,8 @@ prof_lookup(prof_bt_t *bt)
 			/* Allocate and partially initialize a new cnt. */
 			ret.v = imalloc(sizeof(prof_thr_cnt_t));
 			if (ret.p == NULL) {
-				malloc_mutex_unlock(&ctx.p->lock);
+				if (new_ctx)
+					prof_ctx_destroy(ctx.p);
 				return (NULL);
 			}
 			ql_elm_new(ret.p, cnts_link);
@@ -509,12 +528,15 @@ prof_lookup(prof_bt_t *bt)
 		ret.p->epoch = 0;
 		memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
 		if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) {
-			malloc_mutex_unlock(&ctx.p->lock);
+			if (new_ctx)
+				prof_ctx_destroy(ctx.p);
 			idalloc(ret.v);
 			return (NULL);
 		}
 		ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
+		malloc_mutex_lock(&ctx.p->lock);
 		ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link);
+		ctx.p->cnt_merged.curobjs--;
 		malloc_mutex_unlock(&ctx.p->lock);
 	} else {
 		/* Move ret to the front of the LRU. */
@@ -628,11 +650,10 @@ prof_ctx_destroy(prof_ctx_t *ctx)
 
 	/*
 	 * Check that ctx is still unused by any thread cache before destroying
-	 * it.  prof_lookup() interlocks bt2ctx_mtx and ctx->lock in order to
-	 * avoid a race condition with this function, and prof_ctx_merge()
-	 * artificially raises ctx->cnt_merged.curobjs in order to avoid a race
-	 * between the main body of prof_ctx_merge() and entry into this
-	 * function.
+	 * it.  prof_lookup() artificially raises ctx->cnt_merge.curobjs in
+	 * order to avoid a race condition with this function, as does
+	 * prof_ctx_merge() in order to avoid a race between the main body of
+	 * prof_ctx_merge() and entry into this function.
 	 */
 	prof_enter();
 	malloc_mutex_lock(&ctx->lock);
@@ -641,7 +662,8 @@ prof_ctx_destroy(prof_ctx_t *ctx)
 		assert(ctx->cnt_merged.accumobjs == 0);
 		assert(ctx->cnt_merged.accumbytes == 0);
 		/* Remove ctx from bt2ctx. */
-		ckh_remove(&bt2ctx, ctx->bt, NULL, NULL);
+		if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL))
+			assert(false);
 		prof_leave();
 		/* Destroy ctx. */
 		malloc_mutex_unlock(&ctx->lock);
@@ -649,7 +671,10 @@ prof_ctx_destroy(prof_ctx_t *ctx)
 		malloc_mutex_destroy(&ctx->lock);
 		idalloc(ctx);
 	} else {
-		/* Compensate for increment in prof_ctx_merge(). */
+		/*
+		 * Compensate for increment in prof_ctx_merge() or
+		 * prof_lookup().
+		 */
 		ctx->cnt_merged.curobjs--;
 		malloc_mutex_unlock(&ctx->lock);
 		prof_leave();
@@ -1056,7 +1081,7 @@ prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2)
 	} else {
 		ret1 = h;
 		ret2 = hash(bt->vec, bt->len * sizeof(void *),
-		    0x8432a476666bbc13U);
+		    0x8432a476666bbc13LLU);
 	}
 
 	*hash1 = ret1;
@@ -1093,7 +1118,6 @@ prof_tdata_init(void)
 
 	prof_tdata->vec = imalloc(sizeof(void *) * prof_bt_max);
 	if (prof_tdata->vec == NULL) {
-
 		ckh_delete(&prof_tdata->bt2cnt);
 		idalloc(prof_tdata);
 		return (NULL);
@@ -1111,33 +1135,26 @@ prof_tdata_init(void)
 static void
 prof_tdata_cleanup(void *arg)
 {
-	prof_tdata_t *prof_tdata;
+	prof_thr_cnt_t *cnt;
+	prof_tdata_t *prof_tdata = (prof_tdata_t *)arg;
 
-	prof_tdata = PROF_TCACHE_GET();
-	if (prof_tdata != NULL) {
-		prof_thr_cnt_t *cnt;
-
-		/*
-		 * Delete the hash table.  All of its contents can still be
-		 * iterated over via the LRU.
-		 */
-		ckh_delete(&prof_tdata->bt2cnt);
+	/*
+	 * Delete the hash table.  All of its contents can still be iterated
+	 * over via the LRU.
+	 */
+	ckh_delete(&prof_tdata->bt2cnt);
 
-		/*
-		 * Iteratively merge cnt's into the global stats and delete
-		 * them.
-		 */
-		while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) {
-			prof_ctx_merge(cnt->ctx, cnt);
-			ql_remove(&prof_tdata->lru_ql, cnt, lru_link);
-			idalloc(cnt);
-		}
+	/* Iteratively merge cnt's into the global stats and delete them. */
+	while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) {
+		ql_remove(&prof_tdata->lru_ql, cnt, lru_link);
+		prof_ctx_merge(cnt->ctx, cnt);
+		idalloc(cnt);
+	}
 
-		idalloc(prof_tdata->vec);
+	idalloc(prof_tdata->vec);
 
-		idalloc(prof_tdata);
-		PROF_TCACHE_SET(NULL);
-	}
+	idalloc(prof_tdata);
+	PROF_TCACHE_SET(NULL);
 }
 
 void