From c4123289916daa7bd1c7feb191e8c647fd17b163 Mon Sep 17 00:00:00 2001 From: click Date: Mon, 23 Apr 2012 20:23:30 +0200 Subject: Revert "DEP: Updated Jemalloc to Version 2.5" - this version of the jemalloc-library is crashy at best, and should not have been pushed. Further investigations on why this occurs is required before it will be slammed into master. This reverts commit 126fd13e5d6b57dc0c8830248d44db504c7d103f. --- dep/jemalloc/src/arena.c | 671 +++++++++++++++++------------------------- dep/jemalloc/src/atomic.c | 2 - dep/jemalloc/src/bitmap.c | 90 ------ dep/jemalloc/src/chunk.c | 16 +- dep/jemalloc/src/chunk_mmap.c | 6 +- dep/jemalloc/src/ckh.c | 46 ++- dep/jemalloc/src/ctl.c | 37 +-- dep/jemalloc/src/hash.c | 2 +- dep/jemalloc/src/huge.c | 30 +- dep/jemalloc/src/jemalloc.c | 264 +++++------------ dep/jemalloc/src/mb.c | 2 +- dep/jemalloc/src/mutex.c | 6 - dep/jemalloc/src/prof.c | 175 +++++------ dep/jemalloc/src/rtree.c | 7 +- dep/jemalloc/src/stats.c | 21 +- dep/jemalloc/src/tcache.c | 153 +++------- 16 files changed, 517 insertions(+), 1011 deletions(-) delete mode 100644 dep/jemalloc/src/atomic.c delete mode 100644 dep/jemalloc/src/bitmap.c (limited to 'dep/jemalloc/src') diff --git a/dep/jemalloc/src/arena.c b/dep/jemalloc/src/arena.c index d166ca1ec4d..7f939b3cd77 100644 --- a/dep/jemalloc/src/arena.c +++ b/dep/jemalloc/src/arena.c @@ -8,7 +8,6 @@ size_t opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT; size_t opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT; ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT; uint8_t const *small_size2bin; -arena_bin_info_t *arena_bin_info; /* Various bin-related settings. */ unsigned nqbins; @@ -26,27 +25,26 @@ size_t mspace_mask; /* * const_small_size2bin is a static constant lookup table that in the common - * case can be used as-is for small_size2bin. + * case can be used as-is for small_size2bin. For dynamically linked programs, + * this avoids a page of memory overhead per process. */ -#if (LG_TINY_MIN == 2) -#define S2B_4(i) i, +#define S2B_1(i) i, +#define S2B_2(i) S2B_1(i) S2B_1(i) +#define S2B_4(i) S2B_2(i) S2B_2(i) #define S2B_8(i) S2B_4(i) S2B_4(i) -#elif (LG_TINY_MIN == 3) -#define S2B_8(i) i, -#else -# error "Unsupported LG_TINY_MIN" -#endif #define S2B_16(i) S2B_8(i) S2B_8(i) #define S2B_32(i) S2B_16(i) S2B_16(i) #define S2B_64(i) S2B_32(i) S2B_32(i) #define S2B_128(i) S2B_64(i) S2B_64(i) #define S2B_256(i) S2B_128(i) S2B_128(i) /* - * The number of elements in const_small_size2bin is dependent on the - * definition for SUBPAGE. + * The number of elements in const_small_size2bin is dependent on page size + * and on the definition for SUBPAGE. If SUBPAGE changes, the '- 255' must also + * change, along with the addition/removal of static lookup table element + * definitions. */ -static JEMALLOC_ATTR(aligned(CACHELINE)) - const uint8_t const_small_size2bin[] = { +static const uint8_t const_small_size2bin[STATIC_PAGE_SIZE - 255] = { + S2B_1(0xffU) /* 0 */ #if (LG_QUANTUM == 4) /* 16-byte quantum **********************/ # ifdef JEMALLOC_TINY @@ -175,6 +173,7 @@ static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, size_t oldsize, size_t newsize, bool dirty); static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin); static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin); +static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size); static void arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin); static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, @@ -192,9 +191,6 @@ static bool small_size2bin_init(void); static void small_size2bin_validate(void); #endif static bool small_size2bin_init_hard(void); -static size_t bin_info_run_size_calc(arena_bin_info_t *bin_info, - size_t min_run_size); -static bool bin_info_init(void); /******************************************************************************/ @@ -250,48 +246,57 @@ rb_gen(static JEMALLOC_ATTR(unused), arena_avail_tree_, arena_avail_tree_t, arena_chunk_map_t, u.rb_link, arena_avail_comp) static inline void * -arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) +arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin) { void *ret; - unsigned regind; - bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + - (uintptr_t)bin_info->bitmap_offset); - dassert(run->magic == ARENA_RUN_MAGIC); + assert(run->magic == ARENA_RUN_MAGIC); assert(run->nfree > 0); - assert(bitmap_full(bitmap, &bin_info->bitmap_info) == false); - regind = bitmap_sfu(bitmap, &bin_info->bitmap_info); - ret = (void *)((uintptr_t)run + (uintptr_t)bin_info->reg0_offset + - (uintptr_t)(bin_info->reg_size * regind)); run->nfree--; - if (regind == run->nextind) - run->nextind++; - assert(regind < run->nextind); + ret = run->avail; + if (ret != NULL) { + /* Double free can cause assertion failure.*/ + assert(ret != NULL); + /* Write-after free can cause assertion failure. */ + assert((uintptr_t)ret >= (uintptr_t)run + + (uintptr_t)bin->reg0_offset); + assert((uintptr_t)ret < (uintptr_t)run->next); + assert(((uintptr_t)ret - ((uintptr_t)run + + (uintptr_t)bin->reg0_offset)) % (uintptr_t)bin->reg_size == + 0); + run->avail = *(void **)ret; + return (ret); + } + ret = run->next; + run->next = (void *)((uintptr_t)ret + (uintptr_t)bin->reg_size); + assert(ret != NULL); return (ret); } static inline void arena_run_reg_dalloc(arena_run_t *run, void *ptr) { - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - size_t binind = arena_bin_index(chunk->arena, run->bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; - unsigned regind = arena_run_regind(run, bin_info, ptr); - bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + - (uintptr_t)bin_info->bitmap_offset); - - assert(run->nfree < bin_info->nregs); + + assert(run->nfree < run->bin->nregs); /* Freeing an interior pointer can cause assertion failure. */ assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset)) % (uintptr_t)bin_info->reg_size + (uintptr_t)run->bin->reg0_offset)) % (uintptr_t)run->bin->reg_size == 0); + /* + * Freeing a pointer lower than region zero can cause assertion + * failure. + */ assert((uintptr_t)ptr >= (uintptr_t)run + - (uintptr_t)bin_info->reg0_offset); - /* Freeing an unallocated pointer can cause assertion failure. */ - assert(bitmap_get(bitmap, &bin_info->bitmap_info, regind)); + (uintptr_t)run->bin->reg0_offset); + /* + * Freeing a pointer past in the run's frontier can cause assertion + * failure. + */ + assert((uintptr_t)ptr < (uintptr_t)run->next); - bitmap_unset(bitmap, &bin_info->bitmap_info, regind); + *(void **)ptr = run->avail; + run->avail = ptr; run->nfree++; } @@ -315,9 +320,6 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, size_t old_ndirty, run_ind, total_pages, need_pages, rem_pages, i; size_t flag_dirty; arena_avail_tree_t *runs_avail; -#ifdef JEMALLOC_STATS - size_t cactive_diff; -#endif chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); old_ndirty = chunk->ndirty; @@ -336,13 +338,6 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, rem_pages = total_pages - need_pages; arena_avail_tree_remove(runs_avail, &chunk->map[run_ind-map_bias]); -#ifdef JEMALLOC_STATS - /* Update stats_cactive if nactive is crossing a chunk multiple. */ - cactive_diff = CHUNK_CEILING((arena->nactive + need_pages) << - PAGE_SHIFT) - CHUNK_CEILING(arena->nactive << PAGE_SHIFT); - if (cactive_diff != 0) - stats_cactive_add(cactive_diff); -#endif arena->nactive += need_pages; /* Keep track of trailing unused pages for later use. */ @@ -569,7 +564,7 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) arena->ndirty -= spare->ndirty; } malloc_mutex_unlock(&arena->lock); - chunk_dealloc((void *)spare, chunksize, true); + chunk_dealloc((void *)spare, chunksize); malloc_mutex_lock(&arena->lock); #ifdef JEMALLOC_STATS arena->stats.mapped -= chunksize; @@ -730,9 +725,6 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) assert(pageind + npages <= chunk_npages); if (mapelm->bits & CHUNK_MAP_DIRTY) { size_t i; -#ifdef JEMALLOC_STATS - size_t cactive_diff; -#endif arena_avail_tree_remove( &arena->runs_avail_dirty, mapelm); @@ -755,17 +747,6 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) CHUNK_MAP_ALLOCATED; } -#ifdef JEMALLOC_STATS - /* - * Update stats_cactive if nactive is crossing a - * chunk multiple. - */ - cactive_diff = CHUNK_CEILING((arena->nactive + - npages) << PAGE_SHIFT) - - CHUNK_CEILING(arena->nactive << PAGE_SHIFT); - if (cactive_diff != 0) - stats_cactive_add(cactive_diff); -#endif arena->nactive += npages; /* Append to list for later processing. */ ql_elm_new(mapelm, u.ql_link); @@ -782,12 +763,8 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) chunk + (uintptr_t)(pageind << PAGE_SHIFT)); assert((mapelm->bits >> PAGE_SHIFT) == 0); - dassert(run->magic == ARENA_RUN_MAGIC); - size_t binind = arena_bin_index(arena, - run->bin); - arena_bin_info_t *bin_info = - &arena_bin_info[binind]; - pageind += bin_info->run_size >> PAGE_SHIFT; + assert(run->magic == ARENA_RUN_MAGIC); + pageind += run->bin->run_size >> PAGE_SHIFT; } } } @@ -868,10 +845,9 @@ arena_purge(arena_t *arena, bool all) } assert(ndirty == arena->ndirty); #endif - assert(arena->ndirty > arena->npurgatory || all); - assert(arena->ndirty - arena->npurgatory > chunk_npages || all); - assert((arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty - - arena->npurgatory) || all); + assert(arena->ndirty > arena->npurgatory); + assert(arena->ndirty > chunk_npages || all); + assert((arena->nactive >> opt_lg_dirty_mult) < arena->ndirty || all); #ifdef JEMALLOC_STATS arena->stats.npurge++; @@ -883,10 +859,8 @@ arena_purge(arena_t *arena, bool all) * multiple threads from racing to reduce ndirty below the threshold. */ npurgatory = arena->ndirty - arena->npurgatory; - if (all == false) { - assert(npurgatory >= arena->nactive >> opt_lg_dirty_mult); + if (all == false) npurgatory -= arena->nactive >> opt_lg_dirty_mult; - } arena->npurgatory += npurgatory; while (npurgatory > 0) { @@ -957,9 +931,6 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) arena_chunk_t *chunk; size_t size, run_ind, run_pages, flag_dirty; arena_avail_tree_t *runs_avail; -#ifdef JEMALLOC_STATS - size_t cactive_diff; -#endif chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) @@ -975,19 +946,9 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) CHUNK_MAP_LARGE) != 0); assert((chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits & CHUNK_MAP_ALLOCATED) != 0); - } else { - size_t binind = arena_bin_index(arena, run->bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; - size = bin_info->run_size; - } + } else + size = run->bin->run_size; run_pages = (size >> PAGE_SHIFT); -#ifdef JEMALLOC_STATS - /* Update stats_cactive if nactive is crossing a chunk multiple. */ - cactive_diff = CHUNK_CEILING(arena->nactive << PAGE_SHIFT) - - CHUNK_CEILING((arena->nactive - run_pages) << PAGE_SHIFT); - if (cactive_diff != 0) - stats_cactive_sub(cactive_diff); -#endif arena->nactive -= run_pages; /* @@ -1213,8 +1174,6 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) { arena_chunk_map_t *mapelm; arena_run_t *run; - size_t binind; - arena_bin_info_t *bin_info; /* Look for a usable run. */ mapelm = arena_run_tree_first(&bin->runs); @@ -1238,23 +1197,18 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) } /* No existing runs have any space available. */ - binind = arena_bin_index(arena, bin); - bin_info = &arena_bin_info[binind]; - /* Allocate a new run. */ malloc_mutex_unlock(&bin->lock); /******************************/ malloc_mutex_lock(&arena->lock); - run = arena_run_alloc(arena, bin_info->run_size, false, false); + run = arena_run_alloc(arena, bin->run_size, false, false); if (run != NULL) { - bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + - (uintptr_t)bin_info->bitmap_offset); - /* Initialize run internals. */ run->bin = bin; - run->nextind = 0; - run->nfree = bin_info->nregs; - bitmap_init(bitmap, &bin_info->bitmap_info); + run->avail = NULL; + run->next = (void *)((uintptr_t)run + + (uintptr_t)bin->reg0_offset); + run->nfree = bin->nregs; #ifdef JEMALLOC_DEBUG run->magic = ARENA_RUN_MAGIC; #endif @@ -1305,12 +1259,8 @@ static void * arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) { void *ret; - size_t binind; - arena_bin_info_t *bin_info; arena_run_t *run; - binind = arena_bin_index(arena, bin); - bin_info = &arena_bin_info[binind]; bin->runcur = NULL; run = arena_bin_nonfull_run_get(arena, bin); if (bin->runcur != NULL && bin->runcur->nfree > 0) { @@ -1318,22 +1268,22 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) * Another thread updated runcur while this one ran without the * bin lock in arena_bin_nonfull_run_get(). */ - dassert(bin->runcur->magic == ARENA_RUN_MAGIC); + assert(bin->runcur->magic == ARENA_RUN_MAGIC); assert(bin->runcur->nfree > 0); - ret = arena_run_reg_alloc(bin->runcur, bin_info); + ret = arena_run_reg_alloc(bin->runcur, bin); if (run != NULL) { arena_chunk_t *chunk; /* * arena_run_alloc() may have allocated run, or it may - * have pulled run from the bin's run tree. Therefore + * have pulled it from the bin's run tree. Therefore * it is unsafe to make any assumptions about how run * has previously been used, and arena_bin_lower_run() * must be called, as if a region were just deallocated * from the run. */ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - if (run->nfree == bin_info->nregs) + if (run->nfree == bin->nregs) arena_dalloc_bin_run(arena, chunk, run, bin); else arena_bin_lower_run(arena, chunk, run, bin); @@ -1346,10 +1296,10 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) bin->runcur = run; - dassert(bin->runcur->magic == ARENA_RUN_MAGIC); + assert(bin->runcur->magic == ARENA_RUN_MAGIC); assert(bin->runcur->nfree > 0); - return (arena_run_reg_alloc(bin->runcur, bin_info)); + return (arena_run_reg_alloc(bin->runcur, bin)); } #ifdef JEMALLOC_PROF @@ -1389,19 +1339,18 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind #endif bin = &arena->bins[binind]; malloc_mutex_lock(&bin->lock); - for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >> - tbin->lg_fill_div); i < nfill; i++) { + for (i = 0, nfill = (tbin->ncached_max >> 1); i < nfill; i++) { if ((run = bin->runcur) != NULL && run->nfree > 0) - ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]); + ptr = arena_run_reg_alloc(run, bin); else ptr = arena_bin_malloc_hard(arena, bin); if (ptr == NULL) break; - /* Insert such that low regions get used first. */ - tbin->avail[nfill - 1 - i] = ptr; + *(void **)ptr = tbin->avail; + tbin->avail = ptr; } #ifdef JEMALLOC_STATS - bin->stats.allocated += i * arena_bin_info[binind].reg_size; + bin->stats.allocated += (i - tbin->ncached) * bin->reg_size; bin->stats.nmalloc += i; bin->stats.nrequests += tbin->tstats.nrequests; bin->stats.nfills++; @@ -1409,9 +1358,119 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind #endif malloc_mutex_unlock(&bin->lock); tbin->ncached = i; + if (tbin->ncached > tbin->high_water) + tbin->high_water = tbin->ncached; } #endif +/* + * Calculate bin->run_size such that it meets the following constraints: + * + * *) bin->run_size >= min_run_size + * *) bin->run_size <= arena_maxclass + * *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed). + * *) run header size < PAGE_SIZE + * + * bin->nregs and bin->reg0_offset are also calculated here, since these + * settings are all interdependent. + */ +static size_t +arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size) +{ + size_t try_run_size, good_run_size; + uint32_t try_nregs, good_nregs; + uint32_t try_hdr_size, good_hdr_size; +#ifdef JEMALLOC_PROF + uint32_t try_ctx0_offset, good_ctx0_offset; +#endif + uint32_t try_reg0_offset, good_reg0_offset; + + assert(min_run_size >= PAGE_SIZE); + assert(min_run_size <= arena_maxclass); + + /* + * Calculate known-valid settings before entering the run_size + * expansion loop, so that the first part of the loop always copies + * valid settings. + * + * The do..while loop iteratively reduces the number of regions until + * the run header and the regions no longer overlap. A closed formula + * would be quite messy, since there is an interdependency between the + * header's mask length and the number of regions. + */ + try_run_size = min_run_size; + try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin->reg_size) + + 1; /* Counter-act try_nregs-- in loop. */ + do { + try_nregs--; + try_hdr_size = sizeof(arena_run_t); +#ifdef JEMALLOC_PROF + if (opt_prof && prof_promote == false) { + /* Pad to a quantum boundary. */ + try_hdr_size = QUANTUM_CEILING(try_hdr_size); + try_ctx0_offset = try_hdr_size; + /* Add space for one (prof_ctx_t *) per region. */ + try_hdr_size += try_nregs * sizeof(prof_ctx_t *); + } else + try_ctx0_offset = 0; +#endif + try_reg0_offset = try_run_size - (try_nregs * bin->reg_size); + } while (try_hdr_size > try_reg0_offset); + + /* run_size expansion loop. */ + do { + /* + * Copy valid settings before trying more aggressive settings. + */ + good_run_size = try_run_size; + good_nregs = try_nregs; + good_hdr_size = try_hdr_size; +#ifdef JEMALLOC_PROF + good_ctx0_offset = try_ctx0_offset; +#endif + good_reg0_offset = try_reg0_offset; + + /* Try more aggressive settings. */ + try_run_size += PAGE_SIZE; + try_nregs = ((try_run_size - sizeof(arena_run_t)) / + bin->reg_size) + 1; /* Counter-act try_nregs-- in loop. */ + do { + try_nregs--; + try_hdr_size = sizeof(arena_run_t); +#ifdef JEMALLOC_PROF + if (opt_prof && prof_promote == false) { + /* Pad to a quantum boundary. */ + try_hdr_size = QUANTUM_CEILING(try_hdr_size); + try_ctx0_offset = try_hdr_size; + /* + * Add space for one (prof_ctx_t *) per region. + */ + try_hdr_size += try_nregs * + sizeof(prof_ctx_t *); + } +#endif + try_reg0_offset = try_run_size - (try_nregs * + bin->reg_size); + } while (try_hdr_size > try_reg0_offset); + } while (try_run_size <= arena_maxclass + && try_run_size <= arena_maxclass + && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX + && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size + && try_hdr_size < PAGE_SIZE); + + assert(good_hdr_size <= good_reg0_offset); + + /* Copy final settings. */ + bin->run_size = good_run_size; + bin->nregs = good_nregs; +#ifdef JEMALLOC_PROF + bin->ctx0_offset = good_ctx0_offset; +#endif + bin->reg0_offset = good_reg0_offset; + + return (good_run_size); +} + void * arena_malloc_small(arena_t *arena, size_t size, bool zero) { @@ -1420,14 +1479,14 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) arena_run_t *run; size_t binind; - binind = SMALL_SIZE2BIN(size); + binind = small_size2bin[size]; assert(binind < nbins); bin = &arena->bins[binind]; - size = arena_bin_info[binind].reg_size; + size = bin->reg_size; malloc_mutex_lock(&bin->lock); if ((run = bin->runcur) != NULL && run->nfree > 0) - ret = arena_run_reg_alloc(run, &arena_bin_info[binind]); + ret = arena_run_reg_alloc(run, bin); else ret = arena_bin_malloc_hard(arena, bin); @@ -1631,13 +1690,11 @@ arena_salloc(const void *ptr) arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << PAGE_SHIFT)); - dassert(run->magic == ARENA_RUN_MAGIC); - size_t binind = arena_bin_index(chunk->arena, run->bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; + assert(run->magic == ARENA_RUN_MAGIC); assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_size == + (uintptr_t)run->bin->reg0_offset)) % run->bin->reg_size == 0); - ret = bin_info->reg_size; + ret = run->bin->reg_size; } else { assert(((uintptr_t)ptr & PAGE_MASK) == 0); ret = mapbits & ~PAGE_MASK; @@ -1657,11 +1714,10 @@ arena_prof_promoted(const void *ptr, size_t size) assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); assert(isalloc(ptr) == PAGE_SIZE); - assert(size <= small_maxclass); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; - binind = SMALL_SIZE2BIN(size); + binind = small_size2bin[size]; assert(binind < nbins); chunk->map[pageind-map_bias].bits = (chunk->map[pageind-map_bias].bits & ~CHUNK_MAP_CLASS_MASK) | ((binind+1) << CHUNK_MAP_CLASS_SHIFT); @@ -1685,13 +1741,11 @@ arena_salloc_demote(const void *ptr) arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << PAGE_SHIFT)); - dassert(run->magic == ARENA_RUN_MAGIC); - size_t binind = arena_bin_index(chunk->arena, run->bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; + assert(run->magic == ARENA_RUN_MAGIC); assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_size == + (uintptr_t)run->bin->reg0_offset)) % run->bin->reg_size == 0); - ret = bin_info->reg_size; + ret = run->bin->reg_size; } else { assert(((uintptr_t)ptr & PAGE_MASK) == 0); ret = mapbits & ~PAGE_MASK; @@ -1700,7 +1754,7 @@ arena_salloc_demote(const void *ptr) size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >> CHUNK_MAP_CLASS_SHIFT) - 1; assert(binind < nbins); - ret = arena_bin_info[binind].reg_size; + ret = chunk->arena->bins[binind].reg_size; } assert(ret != 0); } @@ -1717,22 +1771,17 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, /* Dissociate run from bin. */ if (run == bin->runcur) bin->runcur = NULL; - else { - size_t binind = arena_bin_index(chunk->arena, bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; - - if (bin_info->nregs != 1) { - size_t run_pageind = (((uintptr_t)run - - (uintptr_t)chunk)) >> PAGE_SHIFT; - arena_chunk_map_t *run_mapelm = - &chunk->map[run_pageind-map_bias]; - /* - * This block's conditional is necessary because if the - * run only contains one region, then it never gets - * inserted into the non-full runs tree. - */ - arena_run_tree_remove(&bin->runs, run_mapelm); - } + else if (bin->nregs != 1) { + size_t run_pageind = (((uintptr_t)run - (uintptr_t)chunk)) >> + PAGE_SHIFT; + arena_chunk_map_t *run_mapelm = + &chunk->map[run_pageind-map_bias]; + /* + * This block's conditional is necessary because if the run + * only contains one region, then it never gets inserted into + * the non-full runs tree. + */ + arena_run_tree_remove(&bin->runs, run_mapelm); } } @@ -1740,24 +1789,18 @@ static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin) { - size_t binind; - arena_bin_info_t *bin_info; size_t npages, run_ind, past; assert(run != bin->runcur); assert(arena_run_tree_search(&bin->runs, &chunk->map[ (((uintptr_t)run-(uintptr_t)chunk)>>PAGE_SHIFT)-map_bias]) == NULL); - binind = arena_bin_index(chunk->arena, run->bin); - bin_info = &arena_bin_info[binind]; - malloc_mutex_unlock(&bin->lock); /******************************/ - npages = bin_info->run_size >> PAGE_SHIFT; + npages = bin->run_size >> PAGE_SHIFT; run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT); - past = (size_t)(PAGE_CEILING((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset + (uintptr_t)(run->nextind * - bin_info->reg_size) - (uintptr_t)chunk) >> PAGE_SHIFT); + past = (size_t)((PAGE_CEILING((uintptr_t)run->next) - (uintptr_t)chunk) + >> PAGE_SHIFT); malloc_mutex_lock(&arena->lock); /* @@ -1774,7 +1817,7 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, chunk->map[run_ind+npages-1-map_bias].bits = CHUNK_MAP_LARGE | (chunk->map[run_ind+npages-1-map_bias].bits & CHUNK_MAP_FLAGS_MASK); - chunk->map[run_ind-map_bias].bits = bin_info->run_size | + chunk->map[run_ind-map_bias].bits = bin->run_size | CHUNK_MAP_LARGE | (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_FLAGS_MASK); arena_run_trim_tail(arena, chunk, run, (npages << PAGE_SHIFT), @@ -1843,12 +1886,10 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); - dassert(run->magic == ARENA_RUN_MAGIC); + assert(run->magic == ARENA_RUN_MAGIC); bin = run->bin; - size_t binind = arena_bin_index(arena, bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; #if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS)) - size = bin_info->reg_size; + size = bin->reg_size; #endif #ifdef JEMALLOC_FILL @@ -1857,7 +1898,7 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, #endif arena_run_reg_dalloc(run, ptr); - if (run->nfree == bin_info->nregs) { + if (run->nfree == bin->nregs) { arena_dissociate_bin_run(chunk, run, bin); arena_dalloc_bin_run(arena, chunk, run, bin); } else if (run->nfree == 1 && run != bin->runcur) @@ -2091,7 +2132,7 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); arena = chunk->arena; - dassert(arena->magic == ARENA_MAGIC); + assert(arena->magic == ARENA_MAGIC); if (psize < oldsize) { #ifdef JEMALLOC_FILL @@ -2129,11 +2170,11 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, */ if (oldsize <= arena_maxclass) { if (oldsize <= small_maxclass) { - assert(arena_bin_info[SMALL_SIZE2BIN(oldsize)].reg_size - == oldsize); + assert(choose_arena()->bins[small_size2bin[ + oldsize]].reg_size == oldsize); if ((size + extra <= small_maxclass && - SMALL_SIZE2BIN(size + extra) == - SMALL_SIZE2BIN(oldsize)) || (size <= oldsize && + small_size2bin[size + extra] == + small_size2bin[oldsize]) || (size <= oldsize && size + extra >= oldsize)) { #ifdef JEMALLOC_FILL if (opt_junk && size < oldsize) { @@ -2169,29 +2210,24 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, if (ret != NULL) return (ret); + /* * size and oldsize are different enough that we need to move the * object. In that case, fall back to allocating new space and * copying. */ - if (alignment != 0) { - size_t usize = sa2u(size + extra, alignment, NULL); - if (usize == 0) - return (NULL); - ret = ipalloc(usize, alignment, zero); - } else + if (alignment != 0) + ret = ipalloc(size + extra, alignment, zero); + else ret = arena_malloc(size + extra, zero); if (ret == NULL) { if (extra == 0) return (NULL); /* Try again, this time without extra. */ - if (alignment != 0) { - size_t usize = sa2u(size, alignment, NULL); - if (usize == 0) - return (NULL); - ret = ipalloc(usize, alignment, zero); - } else + if (alignment != 0) + ret = ipalloc(size, alignment, zero); + else ret = arena_malloc(size, zero); if (ret == NULL) @@ -2215,9 +2251,9 @@ arena_new(arena_t *arena, unsigned ind) { unsigned i; arena_bin_t *bin; + size_t prev_run_size; arena->ind = ind; - arena->nthreads = 0; if (malloc_mutex_init(&arena->lock)) return (true); @@ -2251,6 +2287,8 @@ arena_new(arena_t *arena, unsigned ind) arena_avail_tree_new(&arena->runs_avail_dirty); /* Initialize bins. */ + prev_run_size = PAGE_SIZE; + i = 0; #ifdef JEMALLOC_TINY /* (2^n)-spaced tiny bins. */ @@ -2260,6 +2298,11 @@ arena_new(arena_t *arena, unsigned ind) return (true); bin->runcur = NULL; arena_run_tree_new(&bin->runs); + + bin->reg_size = (1U << (LG_TINY_MIN + i)); + + prev_run_size = arena_bin_run_size_calc(bin, prev_run_size); + #ifdef JEMALLOC_STATS memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); #endif @@ -2273,6 +2316,11 @@ arena_new(arena_t *arena, unsigned ind) return (true); bin->runcur = NULL; arena_run_tree_new(&bin->runs); + + bin->reg_size = (i - ntbins + 1) << LG_QUANTUM; + + prev_run_size = arena_bin_run_size_calc(bin, prev_run_size); + #ifdef JEMALLOC_STATS memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); #endif @@ -2285,6 +2333,12 @@ arena_new(arena_t *arena, unsigned ind) return (true); bin->runcur = NULL; arena_run_tree_new(&bin->runs); + + bin->reg_size = cspace_min + ((i - (ntbins + nqbins)) << + LG_CACHELINE); + + prev_run_size = arena_bin_run_size_calc(bin, prev_run_size); + #ifdef JEMALLOC_STATS memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); #endif @@ -2297,6 +2351,12 @@ arena_new(arena_t *arena, unsigned ind) return (true); bin->runcur = NULL; arena_run_tree_new(&bin->runs); + + bin->reg_size = sspace_min + ((i - (ntbins + nqbins + ncbins)) + << LG_SUBPAGE); + + prev_run_size = arena_bin_run_size_calc(bin, prev_run_size); + #ifdef JEMALLOC_STATS memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); #endif @@ -2315,39 +2375,40 @@ small_size2bin_validate(void) { size_t i, size, binind; + assert(small_size2bin[0] == 0xffU); i = 1; # ifdef JEMALLOC_TINY /* Tiny. */ for (; i < (1U << LG_TINY_MIN); i++) { size = pow2_ceil(1U << LG_TINY_MIN); binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - assert(SMALL_SIZE2BIN(i) == binind); + assert(small_size2bin[i] == binind); } for (; i < qspace_min; i++) { size = pow2_ceil(i); binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - assert(SMALL_SIZE2BIN(i) == binind); + assert(small_size2bin[i] == binind); } # endif /* Quantum-spaced. */ for (; i <= qspace_max; i++) { size = QUANTUM_CEILING(i); binind = ntbins + (size >> LG_QUANTUM) - 1; - assert(SMALL_SIZE2BIN(i) == binind); + assert(small_size2bin[i] == binind); } /* Cacheline-spaced. */ for (; i <= cspace_max; i++) { size = CACHELINE_CEILING(i); binind = ntbins + nqbins + ((size - cspace_min) >> LG_CACHELINE); - assert(SMALL_SIZE2BIN(i) == binind); + assert(small_size2bin[i] == binind); } /* Sub-page. */ for (; i <= sspace_max; i++) { size = SUBPAGE_CEILING(i); binind = ntbins + nqbins + ncbins + ((size - sspace_min) >> LG_SUBPAGE); - assert(SMALL_SIZE2BIN(i) == binind); + assert(small_size2bin[i] == binind); } } #endif @@ -2358,12 +2419,12 @@ small_size2bin_init(void) if (opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT - || (sizeof(const_small_size2bin) != ((small_maxclass-1) >> - LG_TINY_MIN) + 1)) + || sizeof(const_small_size2bin) != small_maxclass + 1) return (small_size2bin_init_hard()); small_size2bin = const_small_size2bin; #ifdef JEMALLOC_DEBUG + assert(sizeof(const_small_size2bin) == small_maxclass + 1); small_size2bin_validate(); #endif return (false); @@ -2374,242 +2435,55 @@ small_size2bin_init_hard(void) { size_t i, size, binind; uint8_t *custom_small_size2bin; -#define CUSTOM_SMALL_SIZE2BIN(s) \ - custom_small_size2bin[(s-1) >> LG_TINY_MIN] assert(opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT - || (sizeof(const_small_size2bin) != ((small_maxclass-1) >> - LG_TINY_MIN) + 1)); + || sizeof(const_small_size2bin) != small_maxclass + 1); - custom_small_size2bin = (uint8_t *) - base_alloc(small_maxclass >> LG_TINY_MIN); + custom_small_size2bin = (uint8_t *)base_alloc(small_maxclass + 1); if (custom_small_size2bin == NULL) return (true); + custom_small_size2bin[0] = 0xffU; i = 1; #ifdef JEMALLOC_TINY /* Tiny. */ - for (; i < (1U << LG_TINY_MIN); i += TINY_MIN) { + for (; i < (1U << LG_TINY_MIN); i++) { size = pow2_ceil(1U << LG_TINY_MIN); binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - CUSTOM_SMALL_SIZE2BIN(i) = binind; + custom_small_size2bin[i] = binind; } - for (; i < qspace_min; i += TINY_MIN) { + for (; i < qspace_min; i++) { size = pow2_ceil(i); binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - CUSTOM_SMALL_SIZE2BIN(i) = binind; + custom_small_size2bin[i] = binind; } #endif /* Quantum-spaced. */ - for (; i <= qspace_max; i += TINY_MIN) { + for (; i <= qspace_max; i++) { size = QUANTUM_CEILING(i); binind = ntbins + (size >> LG_QUANTUM) - 1; - CUSTOM_SMALL_SIZE2BIN(i) = binind; + custom_small_size2bin[i] = binind; } /* Cacheline-spaced. */ - for (; i <= cspace_max; i += TINY_MIN) { + for (; i <= cspace_max; i++) { size = CACHELINE_CEILING(i); binind = ntbins + nqbins + ((size - cspace_min) >> LG_CACHELINE); - CUSTOM_SMALL_SIZE2BIN(i) = binind; + custom_small_size2bin[i] = binind; } /* Sub-page. */ - for (; i <= sspace_max; i += TINY_MIN) { + for (; i <= sspace_max; i++) { size = SUBPAGE_CEILING(i); binind = ntbins + nqbins + ncbins + ((size - sspace_min) >> LG_SUBPAGE); - CUSTOM_SMALL_SIZE2BIN(i) = binind; + custom_small_size2bin[i] = binind; } small_size2bin = custom_small_size2bin; #ifdef JEMALLOC_DEBUG small_size2bin_validate(); #endif - return (false); -#undef CUSTOM_SMALL_SIZE2BIN -} - -/* - * Calculate bin_info->run_size such that it meets the following constraints: - * - * *) bin_info->run_size >= min_run_size - * *) bin_info->run_size <= arena_maxclass - * *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed). - * *) bin_info->nregs <= RUN_MAXREGS - * - * bin_info->nregs, bin_info->bitmap_offset, and bin_info->reg0_offset are also - * calculated here, since these settings are all interdependent. - */ -static size_t -bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) -{ - size_t try_run_size, good_run_size; - uint32_t try_nregs, good_nregs; - uint32_t try_hdr_size, good_hdr_size; - uint32_t try_bitmap_offset, good_bitmap_offset; -#ifdef JEMALLOC_PROF - uint32_t try_ctx0_offset, good_ctx0_offset; -#endif - uint32_t try_reg0_offset, good_reg0_offset; - - assert(min_run_size >= PAGE_SIZE); - assert(min_run_size <= arena_maxclass); - - /* - * Calculate known-valid settings before entering the run_size - * expansion loop, so that the first part of the loop always copies - * valid settings. - * - * The do..while loop iteratively reduces the number of regions until - * the run header and the regions no longer overlap. A closed formula - * would be quite messy, since there is an interdependency between the - * header's mask length and the number of regions. - */ - try_run_size = min_run_size; - try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin_info->reg_size) - + 1; /* Counter-act try_nregs-- in loop. */ - if (try_nregs > RUN_MAXREGS) { - try_nregs = RUN_MAXREGS - + 1; /* Counter-act try_nregs-- in loop. */ - } - do { - try_nregs--; - try_hdr_size = sizeof(arena_run_t); - /* Pad to a long boundary. */ - try_hdr_size = LONG_CEILING(try_hdr_size); - try_bitmap_offset = try_hdr_size; - /* Add space for bitmap. */ - try_hdr_size += bitmap_size(try_nregs); -#ifdef JEMALLOC_PROF - if (opt_prof && prof_promote == false) { - /* Pad to a quantum boundary. */ - try_hdr_size = QUANTUM_CEILING(try_hdr_size); - try_ctx0_offset = try_hdr_size; - /* Add space for one (prof_ctx_t *) per region. */ - try_hdr_size += try_nregs * sizeof(prof_ctx_t *); - } else - try_ctx0_offset = 0; -#endif - try_reg0_offset = try_run_size - (try_nregs * - bin_info->reg_size); - } while (try_hdr_size > try_reg0_offset); - - /* run_size expansion loop. */ - do { - /* - * Copy valid settings before trying more aggressive settings. - */ - good_run_size = try_run_size; - good_nregs = try_nregs; - good_hdr_size = try_hdr_size; - good_bitmap_offset = try_bitmap_offset; -#ifdef JEMALLOC_PROF - good_ctx0_offset = try_ctx0_offset; -#endif - good_reg0_offset = try_reg0_offset; - - /* Try more aggressive settings. */ - try_run_size += PAGE_SIZE; - try_nregs = ((try_run_size - sizeof(arena_run_t)) / - bin_info->reg_size) - + 1; /* Counter-act try_nregs-- in loop. */ - if (try_nregs > RUN_MAXREGS) { - try_nregs = RUN_MAXREGS - + 1; /* Counter-act try_nregs-- in loop. */ - } - do { - try_nregs--; - try_hdr_size = sizeof(arena_run_t); - /* Pad to a long boundary. */ - try_hdr_size = LONG_CEILING(try_hdr_size); - try_bitmap_offset = try_hdr_size; - /* Add space for bitmap. */ - try_hdr_size += bitmap_size(try_nregs); -#ifdef JEMALLOC_PROF - if (opt_prof && prof_promote == false) { - /* Pad to a quantum boundary. */ - try_hdr_size = QUANTUM_CEILING(try_hdr_size); - try_ctx0_offset = try_hdr_size; - /* - * Add space for one (prof_ctx_t *) per region. - */ - try_hdr_size += try_nregs * - sizeof(prof_ctx_t *); - } -#endif - try_reg0_offset = try_run_size - (try_nregs * - bin_info->reg_size); - } while (try_hdr_size > try_reg0_offset); - } while (try_run_size <= arena_maxclass - && try_run_size <= arena_maxclass - && RUN_MAX_OVRHD * (bin_info->reg_size << 3) > RUN_MAX_OVRHD_RELAX - && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size - && try_nregs < RUN_MAXREGS); - - assert(good_hdr_size <= good_reg0_offset); - - /* Copy final settings. */ - bin_info->run_size = good_run_size; - bin_info->nregs = good_nregs; - bin_info->bitmap_offset = good_bitmap_offset; -#ifdef JEMALLOC_PROF - bin_info->ctx0_offset = good_ctx0_offset; -#endif - bin_info->reg0_offset = good_reg0_offset; - - return (good_run_size); -} - -static bool -bin_info_init(void) -{ - arena_bin_info_t *bin_info; - unsigned i; - size_t prev_run_size; - - arena_bin_info = base_alloc(sizeof(arena_bin_info_t) * nbins); - if (arena_bin_info == NULL) - return (true); - - prev_run_size = PAGE_SIZE; - i = 0; -#ifdef JEMALLOC_TINY - /* (2^n)-spaced tiny bins. */ - for (; i < ntbins; i++) { - bin_info = &arena_bin_info[i]; - bin_info->reg_size = (1U << (LG_TINY_MIN + i)); - prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); - bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); - } -#endif - - /* Quantum-spaced bins. */ - for (; i < ntbins + nqbins; i++) { - bin_info = &arena_bin_info[i]; - bin_info->reg_size = (i - ntbins + 1) << LG_QUANTUM; - prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); - bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); - } - - /* Cacheline-spaced bins. */ - for (; i < ntbins + nqbins + ncbins; i++) { - bin_info = &arena_bin_info[i]; - bin_info->reg_size = cspace_min + ((i - (ntbins + nqbins)) << - LG_CACHELINE); - prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); - bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); - } - - /* Subpage-spaced bins. */ - for (; i < nbins; i++) { - bin_info = &arena_bin_info[i]; - bin_info->reg_size = sspace_min + ((i - (ntbins + nqbins + - ncbins)) << LG_SUBPAGE); - prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); - bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); - } - return (false); } @@ -2671,6 +2545,9 @@ arena_boot(void) abort(); } + if (small_size2bin_init()) + return (true); + /* * Compute the header size such that it is large enough to contain the * page map. The page map is biased to omit entries for the header @@ -2694,11 +2571,5 @@ arena_boot(void) arena_maxclass = chunksize - (map_bias << PAGE_SHIFT); - if (small_size2bin_init()) - return (true); - - if (bin_info_init()) - return (true); - return (false); } diff --git a/dep/jemalloc/src/atomic.c b/dep/jemalloc/src/atomic.c deleted file mode 100644 index 77ee313113b..00000000000 --- a/dep/jemalloc/src/atomic.c +++ /dev/null @@ -1,2 +0,0 @@ -#define JEMALLOC_ATOMIC_C_ -#include "jemalloc/internal/jemalloc_internal.h" diff --git a/dep/jemalloc/src/bitmap.c b/dep/jemalloc/src/bitmap.c deleted file mode 100644 index b47e2629093..00000000000 --- a/dep/jemalloc/src/bitmap.c +++ /dev/null @@ -1,90 +0,0 @@ -#define JEMALLOC_BITMAP_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static size_t bits2groups(size_t nbits); - -/******************************************************************************/ - -static size_t -bits2groups(size_t nbits) -{ - - return ((nbits >> LG_BITMAP_GROUP_NBITS) + - !!(nbits & BITMAP_GROUP_NBITS_MASK)); -} - -void -bitmap_info_init(bitmap_info_t *binfo, size_t nbits) -{ - unsigned i; - size_t group_count; - - assert(nbits > 0); - assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS)); - - /* - * Compute the number of groups necessary to store nbits bits, and - * progressively work upward through the levels until reaching a level - * that requires only one group. - */ - binfo->levels[0].group_offset = 0; - group_count = bits2groups(nbits); - for (i = 1; group_count > 1; i++) { - assert(i < BITMAP_MAX_LEVELS); - binfo->levels[i].group_offset = binfo->levels[i-1].group_offset - + group_count; - group_count = bits2groups(group_count); - } - binfo->levels[i].group_offset = binfo->levels[i-1].group_offset - + group_count; - binfo->nlevels = i; - binfo->nbits = nbits; -} - -size_t -bitmap_info_ngroups(const bitmap_info_t *binfo) -{ - - return (binfo->levels[binfo->nlevels].group_offset << LG_SIZEOF_BITMAP); -} - -size_t -bitmap_size(size_t nbits) -{ - bitmap_info_t binfo; - - bitmap_info_init(&binfo, nbits); - return (bitmap_info_ngroups(&binfo)); -} - -void -bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo) -{ - size_t extra; - unsigned i; - - /* - * Bits are actually inverted with regard to the external bitmap - * interface, so the bitmap starts out with all 1 bits, except for - * trailing unused bits (if any). Note that each group uses bit 0 to - * correspond to the first logical bit in the group, so extra bits - * are the most significant bits of the last group. - */ - memset(bitmap, 0xffU, binfo->levels[binfo->nlevels].group_offset << - LG_SIZEOF_BITMAP); - extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK)) - & BITMAP_GROUP_NBITS_MASK; - if (extra != 0) - bitmap[binfo->levels[1].group_offset - 1] >>= extra; - for (i = 1; i < binfo->nlevels; i++) { - size_t group_count = binfo->levels[i].group_offset - - binfo->levels[i-1].group_offset; - extra = (BITMAP_GROUP_NBITS - (group_count & - BITMAP_GROUP_NBITS_MASK)) & BITMAP_GROUP_NBITS_MASK; - if (extra != 0) - bitmap[binfo->levels[i+1].group_offset - 1] >>= extra; - } -} diff --git a/dep/jemalloc/src/chunk.c b/dep/jemalloc/src/chunk.c index d190c6f49b3..301519e8042 100644 --- a/dep/jemalloc/src/chunk.c +++ b/dep/jemalloc/src/chunk.c @@ -70,7 +70,7 @@ RETURN: #ifdef JEMALLOC_IVSALLOC if (base == false && ret != NULL) { if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) { - chunk_dealloc(ret, size, true); + chunk_dealloc(ret, size); return (NULL); } } @@ -108,7 +108,7 @@ RETURN: } void -chunk_dealloc(void *chunk, size_t size, bool unmap) +chunk_dealloc(void *chunk, size_t size) { assert(chunk != NULL); @@ -125,17 +125,15 @@ chunk_dealloc(void *chunk, size_t size, bool unmap) malloc_mutex_unlock(&chunks_mtx); #endif - if (unmap) { #ifdef JEMALLOC_SWAP - if (swap_enabled && chunk_dealloc_swap(chunk, size) == false) - return; + if (swap_enabled && chunk_dealloc_swap(chunk, size) == false) + return; #endif #ifdef JEMALLOC_DSS - if (chunk_dealloc_dss(chunk, size) == false) - return; + if (chunk_dealloc_dss(chunk, size) == false) + return; #endif - chunk_dealloc_mmap(chunk, size); - } + chunk_dealloc_mmap(chunk, size); } bool diff --git a/dep/jemalloc/src/chunk_mmap.c b/dep/jemalloc/src/chunk_mmap.c index 164e86e7b38..bc367559774 100644 --- a/dep/jemalloc/src/chunk_mmap.c +++ b/dep/jemalloc/src/chunk_mmap.c @@ -206,15 +206,13 @@ chunk_alloc_mmap_internal(size_t size, bool noreserve) void * chunk_alloc_mmap(size_t size) { - - return (chunk_alloc_mmap_internal(size, false)); + return chunk_alloc_mmap_internal(size, false); } void * chunk_alloc_mmap_noreserve(size_t size) { - - return (chunk_alloc_mmap_internal(size, true)); + return chunk_alloc_mmap_internal(size, true); } void diff --git a/dep/jemalloc/src/ckh.c b/dep/jemalloc/src/ckh.c index 43fcc25239d..682a8db65bf 100644 --- a/dep/jemalloc/src/ckh.c +++ b/dep/jemalloc/src/ckh.c @@ -34,7 +34,7 @@ * respectively. * ******************************************************************************/ -#define JEMALLOC_CKH_C_ +#define CKH_C_ #include "jemalloc/internal/jemalloc_internal.h" /******************************************************************************/ @@ -73,7 +73,7 @@ ckh_isearch(ckh_t *ckh, const void *key) size_t hash1, hash2, bucket, cell; assert(ckh != NULL); - dassert(ckh->magic == CKH_MAGIC); + assert(ckh->magic = CKH_MAGIG); ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2); @@ -262,15 +262,9 @@ ckh_grow(ckh_t *ckh) lg_prevbuckets = ckh->lg_curbuckets; lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS; while (true) { - size_t usize; - lg_curcells++; - usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL); - if (usize == 0) { - ret = true; - goto RETURN; - } - tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); + tab = (ckhc_t *)ipalloc(sizeof(ckhc_t) << lg_curcells, + ZU(1) << LG_CACHELINE, true); if (tab == NULL) { ret = true; goto RETURN; @@ -301,7 +295,7 @@ static void ckh_shrink(ckh_t *ckh) { ckhc_t *tab, *ttab; - size_t lg_curcells, usize; + size_t lg_curcells; unsigned lg_prevbuckets; /* @@ -310,10 +304,8 @@ ckh_shrink(ckh_t *ckh) */ lg_prevbuckets = ckh->lg_curbuckets; lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1; - usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL); - if (usize == 0) - return; - tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); + tab = (ckhc_t *)ipalloc(sizeof(ckhc_t) << lg_curcells, + ZU(1) << LG_CACHELINE, true); if (tab == NULL) { /* * An OOM error isn't worth propagating, since it doesn't @@ -348,7 +340,7 @@ bool ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) { bool ret; - size_t mincells, usize; + size_t mincells; unsigned lg_mincells; assert(minitems > 0); @@ -383,19 +375,15 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) ckh->hash = hash; ckh->keycomp = keycomp; - usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE, NULL); - if (usize == 0) { - ret = true; - goto RETURN; - } - ckh->tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); + ckh->tab = (ckhc_t *)ipalloc(sizeof(ckhc_t) << lg_mincells, + (ZU(1) << LG_CACHELINE), true); if (ckh->tab == NULL) { ret = true; goto RETURN; } #ifdef JEMALLOC_DEBUG - ckh->magic = CKH_MAGIC; + ckh->magic = CKH_MAGIG; #endif ret = false; @@ -408,7 +396,7 @@ ckh_delete(ckh_t *ckh) { assert(ckh != NULL); - dassert(ckh->magic == CKH_MAGIC); + assert(ckh->magic = CKH_MAGIG); #ifdef CKH_VERBOSE malloc_printf( @@ -433,7 +421,7 @@ ckh_count(ckh_t *ckh) { assert(ckh != NULL); - dassert(ckh->magic == CKH_MAGIC); + assert(ckh->magic = CKH_MAGIG); return (ckh->count); } @@ -464,7 +452,7 @@ ckh_insert(ckh_t *ckh, const void *key, const void *data) bool ret; assert(ckh != NULL); - dassert(ckh->magic == CKH_MAGIC); + assert(ckh->magic = CKH_MAGIG); assert(ckh_search(ckh, key, NULL, NULL)); #ifdef CKH_COUNT @@ -489,7 +477,7 @@ ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data) size_t cell; assert(ckh != NULL); - dassert(ckh->magic == CKH_MAGIC); + assert(ckh->magic = CKH_MAGIG); cell = ckh_isearch(ckh, searchkey); if (cell != SIZE_T_MAX) { @@ -521,7 +509,7 @@ ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data) size_t cell; assert(ckh != NULL); - dassert(ckh->magic == CKH_MAGIC); + assert(ckh->magic = CKH_MAGIG); cell = ckh_isearch(ckh, searchkey); if (cell != SIZE_T_MAX) { @@ -556,7 +544,7 @@ ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) } else { ret1 = h; ret2 = hash(key, strlen((const char *)key), - 0x8432a476666bbc13LLU); + 0x8432a476666bbc13U); } *hash1 = ret1; diff --git a/dep/jemalloc/src/ctl.c b/dep/jemalloc/src/ctl.c index e5336d36949..3c8adab90a3 100644 --- a/dep/jemalloc/src/ctl.c +++ b/dep/jemalloc/src/ctl.c @@ -182,7 +182,6 @@ CTL_PROTO(stats_arenas_i_lruns_j_highruns) CTL_PROTO(stats_arenas_i_lruns_j_curruns) INDEX_PROTO(stats_arenas_i_lruns_j) #endif -CTL_PROTO(stats_arenas_i_nthreads) CTL_PROTO(stats_arenas_i_pactive) CTL_PROTO(stats_arenas_i_pdirty) #ifdef JEMALLOC_STATS @@ -193,7 +192,6 @@ CTL_PROTO(stats_arenas_i_purged) #endif INDEX_PROTO(stats_arenas_i) #ifdef JEMALLOC_STATS -CTL_PROTO(stats_cactive) CTL_PROTO(stats_allocated) CTL_PROTO(stats_active) CTL_PROTO(stats_mapped) @@ -436,7 +434,6 @@ static const ctl_node_t stats_arenas_i_lruns_node[] = { #endif static const ctl_node_t stats_arenas_i_node[] = { - {NAME("nthreads"), CTL(stats_arenas_i_nthreads)}, {NAME("pactive"), CTL(stats_arenas_i_pactive)}, {NAME("pdirty"), CTL(stats_arenas_i_pdirty)} #ifdef JEMALLOC_STATS @@ -461,7 +458,6 @@ static const ctl_node_t stats_arenas_node[] = { static const ctl_node_t stats_node[] = { #ifdef JEMALLOC_STATS - {NAME("cactive"), CTL(stats_cactive)}, {NAME("allocated"), CTL(stats_allocated)}, {NAME("active"), CTL(stats_active)}, {NAME("mapped"), CTL(stats_mapped)}, @@ -624,7 +620,6 @@ ctl_arena_refresh(arena_t *arena, unsigned i) ctl_arena_clear(astats); - sstats->nthreads += astats->nthreads; #ifdef JEMALLOC_STATS ctl_arena_stats_amerge(astats, arena); /* Merge into sum stats as well. */ @@ -662,17 +657,10 @@ ctl_refresh(void) * Clear sum stats, since they will be merged into by * ctl_arena_refresh(). */ - ctl_stats.arenas[narenas].nthreads = 0; ctl_arena_clear(&ctl_stats.arenas[narenas]); malloc_mutex_lock(&arenas_lock); memcpy(tarenas, arenas, sizeof(arena_t *) * narenas); - for (i = 0; i < narenas; i++) { - if (arenas[i] != NULL) - ctl_stats.arenas[i].nthreads = arenas[i]->nthreads; - else - ctl_stats.arenas[i].nthreads = 0; - } malloc_mutex_unlock(&arenas_lock); for (i = 0; i < narenas; i++) { bool initialized = (tarenas[i] != NULL); @@ -1126,8 +1114,8 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, unsigned newind, oldind; newind = oldind = choose_arena()->ind; - WRITE(newind, unsigned); - READ(oldind, unsigned); + WRITE(oldind, unsigned); + READ(newind, unsigned); if (newind != oldind) { arena_t *arena; @@ -1141,8 +1129,6 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, malloc_mutex_lock(&arenas_lock); if ((arena = arenas[newind]) == NULL) arena = arenas_extend(newind); - arenas[oldind]->nthreads--; - arenas[newind]->nthreads++; malloc_mutex_unlock(&arenas_lock); if (arena == NULL) { ret = EAGAIN; @@ -1151,13 +1137,6 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, /* Set new arena association. */ ARENA_SET(arena); -#ifdef JEMALLOC_TCACHE - { - tcache_t *tcache = TCACHE_GET(); - if (tcache != NULL) - tcache->arena = arena; - } -#endif } ret = 0; @@ -1167,9 +1146,9 @@ RETURN: #ifdef JEMALLOC_STATS CTL_RO_NL_GEN(thread_allocated, ALLOCATED_GET(), uint64_t); -CTL_RO_NL_GEN(thread_allocatedp, ALLOCATEDP_GET(), uint64_t *); +CTL_RO_NL_GEN(thread_allocatedp, &ALLOCATED_GET(), uint64_t *); CTL_RO_NL_GEN(thread_deallocated, DEALLOCATED_GET(), uint64_t); -CTL_RO_NL_GEN(thread_deallocatedp, DEALLOCATEDP_GET(), uint64_t *); +CTL_RO_NL_GEN(thread_deallocatedp, &DEALLOCATED_GET(), uint64_t *); #endif /******************************************************************************/ @@ -1305,9 +1284,9 @@ CTL_RO_NL_GEN(opt_overcommit, opt_overcommit, bool) /******************************************************************************/ -CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t) -CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t) -CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t) +CTL_RO_NL_GEN(arenas_bin_i_size, arenas[0]->bins[mib[2]].reg_size, size_t) +CTL_RO_NL_GEN(arenas_bin_i_nregs, arenas[0]->bins[mib[2]].nregs, uint32_t) +CTL_RO_NL_GEN(arenas_bin_i_run_size, arenas[0]->bins[mib[2]].run_size, size_t) const ctl_node_t * arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) { @@ -1552,7 +1531,6 @@ stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j) } #endif -CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned) CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t) CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t) #ifdef JEMALLOC_STATS @@ -1584,7 +1562,6 @@ RETURN: } #ifdef JEMALLOC_STATS -CTL_RO_GEN(stats_cactive, &stats_cactive, size_t *) CTL_RO_GEN(stats_allocated, ctl_stats.allocated, size_t) CTL_RO_GEN(stats_active, ctl_stats.active, size_t) CTL_RO_GEN(stats_mapped, ctl_stats.mapped, size_t) diff --git a/dep/jemalloc/src/hash.c b/dep/jemalloc/src/hash.c index cfa4da0275c..6a13d7a03c0 100644 --- a/dep/jemalloc/src/hash.c +++ b/dep/jemalloc/src/hash.c @@ -1,2 +1,2 @@ -#define JEMALLOC_HASH_C_ +#define HASH_C_ #include "jemalloc/internal/jemalloc_internal.h" diff --git a/dep/jemalloc/src/huge.c b/dep/jemalloc/src/huge.c index a4f9b054ed5..0aadc4339a9 100644 --- a/dep/jemalloc/src/huge.c +++ b/dep/jemalloc/src/huge.c @@ -50,7 +50,6 @@ huge_malloc(size_t size, bool zero) malloc_mutex_lock(&huge_mtx); extent_tree_ad_insert(&huge, node); #ifdef JEMALLOC_STATS - stats_cactive_add(csize); huge_nmalloc++; huge_allocated += csize; #endif @@ -84,7 +83,7 @@ huge_palloc(size_t size, size_t alignment, bool zero) * alignment, in order to assure the alignment can be achieved, then * unmap leading and trailing chunks. */ - assert(alignment > chunksize); + assert(alignment >= chunksize); chunk_size = CHUNK_CEILING(size); @@ -110,12 +109,12 @@ huge_palloc(size_t size, size_t alignment, bool zero) if (offset == 0) { /* Trim trailing space. */ chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size - - chunk_size, true); + - chunk_size); } else { size_t trailsize; /* Trim leading space. */ - chunk_dealloc(ret, alignment - offset, true); + chunk_dealloc(ret, alignment - offset); ret = (void *)((uintptr_t)ret + (alignment - offset)); @@ -124,7 +123,7 @@ huge_palloc(size_t size, size_t alignment, bool zero) /* Trim trailing space. */ assert(trailsize < alloc_size); chunk_dealloc((void *)((uintptr_t)ret + chunk_size), - trailsize, true); + trailsize); } } @@ -135,7 +134,6 @@ huge_palloc(size_t size, size_t alignment, bool zero) malloc_mutex_lock(&huge_mtx); extent_tree_ad_insert(&huge, node); #ifdef JEMALLOC_STATS - stats_cactive_add(chunk_size); huge_nmalloc++; huge_allocated += chunk_size; #endif @@ -194,7 +192,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, * different size class. In that case, fall back to allocating new * space and copying. */ - if (alignment > chunksize) + if (alignment != 0) ret = huge_palloc(size + extra, alignment, zero); else ret = huge_malloc(size + extra, zero); @@ -203,7 +201,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, if (extra == 0) return (NULL); /* Try again, this time without extra. */ - if (alignment > chunksize) + if (alignment != 0) ret = huge_palloc(size, alignment, zero); else ret = huge_malloc(size, zero); @@ -234,13 +232,6 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, ) { size_t newsize = huge_salloc(ret); - /* - * Remove ptr from the tree of huge allocations before - * performing the remap operation, in order to avoid the - * possibility of another thread acquiring that mapping before - * this one removes it from the tree. - */ - huge_dalloc(ptr, false); if (mremap(ptr, oldsize, newsize, MREMAP_MAYMOVE|MREMAP_FIXED, ret) == MAP_FAILED) { /* @@ -260,8 +251,9 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, if (opt_abort) abort(); memcpy(ret, ptr, copysize); - chunk_dealloc_mmap(ptr, oldsize); - } + idalloc(ptr); + } else + huge_dalloc(ptr, false); } else #endif { @@ -286,7 +278,6 @@ huge_dalloc(void *ptr, bool unmap) extent_tree_ad_remove(&huge, node); #ifdef JEMALLOC_STATS - stats_cactive_sub(node->size); huge_ndalloc++; huge_allocated -= node->size; #endif @@ -301,10 +292,9 @@ huge_dalloc(void *ptr, bool unmap) memset(node->addr, 0x5a, node->size); #endif #endif + chunk_dealloc(node->addr, node->size); } - chunk_dealloc(node->addr, node->size, unmap); - base_node_dealloc(node); } diff --git a/dep/jemalloc/src/jemalloc.c b/dep/jemalloc/src/jemalloc.c index a161c2e26e1..2aebc51dd19 100644 --- a/dep/jemalloc/src/jemalloc.c +++ b/dep/jemalloc/src/jemalloc.c @@ -7,10 +7,12 @@ malloc_mutex_t arenas_lock; arena_t **arenas; unsigned narenas; +static unsigned next_arena; -pthread_key_t arenas_tsd; #ifndef NO_TLS __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); +#else +pthread_key_t arenas_tsd; #endif #ifdef JEMALLOC_STATS @@ -28,13 +30,7 @@ static bool malloc_initialized = false; static pthread_t malloc_initializer = (unsigned long)0; /* Used to avoid initialization races. */ -static malloc_mutex_t init_lock = -#ifdef JEMALLOC_OSSPIN - 0 -#else - MALLOC_MUTEX_INITIALIZER -#endif - ; +static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; #ifdef DYNAMIC_PAGE_SHIFT size_t pagesize; @@ -74,7 +70,6 @@ size_t opt_narenas = 0; static void wrtmessage(void *cbopaque, const char *s); static void stats_print_atexit(void); static unsigned malloc_ncpus(void); -static void arenas_cleanup(void *arg); #if (defined(JEMALLOC_STATS) && defined(NO_TLS)) static void thread_allocated_cleanup(void *arg); #endif @@ -84,7 +79,6 @@ static void malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v, size_t vlen); static void malloc_conf_init(void); static bool malloc_init_hard(void); -static int imemalign(void **memptr, size_t alignment, size_t size); /******************************************************************************/ /* malloc_message() setup. */ @@ -153,53 +147,13 @@ choose_arena_hard(void) arena_t *ret; if (narenas > 1) { - unsigned i, choose, first_null; - - choose = 0; - first_null = narenas; malloc_mutex_lock(&arenas_lock); - assert(arenas[0] != NULL); - for (i = 1; i < narenas; i++) { - if (arenas[i] != NULL) { - /* - * Choose the first arena that has the lowest - * number of threads assigned to it. - */ - if (arenas[i]->nthreads < - arenas[choose]->nthreads) - choose = i; - } else if (first_null == narenas) { - /* - * Record the index of the first uninitialized - * arena, in case all extant arenas are in use. - * - * NB: It is possible for there to be - * discontinuities in terms of initialized - * versus uninitialized arenas, due to the - * "thread.arena" mallctl. - */ - first_null = i; - } - } - - if (arenas[choose] == 0 || first_null == narenas) { - /* - * Use an unloaded arena, or the least loaded arena if - * all arenas are already initialized. - */ - ret = arenas[choose]; - } else { - /* Initialize a new arena. */ - ret = arenas_extend(first_null); - } - ret->nthreads++; + if ((ret = arenas[next_arena]) == NULL) + ret = arenas_extend(next_arena); + next_arena = (next_arena + 1) % narenas; malloc_mutex_unlock(&arenas_lock); - } else { + } else ret = arenas[0]; - malloc_mutex_lock(&arenas_lock); - ret->nthreads++; - malloc_mutex_unlock(&arenas_lock); - } ARENA_SET(ret); @@ -259,28 +213,6 @@ stats_print_atexit(void) JEMALLOC_P(malloc_stats_print)(NULL, NULL, NULL); } -#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) -thread_allocated_t * -thread_allocated_get_hard(void) -{ - thread_allocated_t *thread_allocated = (thread_allocated_t *) - imalloc(sizeof(thread_allocated_t)); - if (thread_allocated == NULL) { - static thread_allocated_t static_thread_allocated = {0, 0}; - malloc_write(": Error allocating TSD;" - " mallctl(\"thread.{de,}allocated[p]\", ...)" - " will be inaccurate\n"); - if (opt_abort) - abort(); - return (&static_thread_allocated); - } - pthread_setspecific(thread_allocated_tsd, thread_allocated); - thread_allocated->allocated = 0; - thread_allocated->deallocated = 0; - return (thread_allocated); -} -#endif - /* * End miscellaneous support functions. */ @@ -305,16 +237,6 @@ malloc_ncpus(void) return (ret); } -static void -arenas_cleanup(void *arg) -{ - arena_t *arena = (arena_t *)arg; - - malloc_mutex_lock(&arenas_lock); - arena->nthreads--; - malloc_mutex_unlock(&arenas_lock); -} - #if (defined(JEMALLOC_STATS) && defined(NO_TLS)) static void thread_allocated_cleanup(void *arg) @@ -499,8 +421,8 @@ malloc_conf_init(void) if ((opts = getenv(envname)) != NULL) { /* * Do nothing; opts is already initialized to - * the value of the MALLOC_CONF environment - * variable. + * the value of the JEMALLOC_OPTIONS + * environment variable. */ } else { /* No configuration specified. */ @@ -689,7 +611,7 @@ malloc_init_hard(void) result = sysconf(_SC_PAGESIZE); assert(result != -1); - pagesize = (size_t)result; + pagesize = (unsigned)result; /* * We assume that pagesize is a power of 2 when calculating @@ -749,10 +671,7 @@ malloc_init_hard(void) } #ifdef JEMALLOC_TCACHE - if (tcache_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } + tcache_boot(); #endif if (huge_boot()) { @@ -769,14 +688,6 @@ malloc_init_hard(void) } #endif - if (malloc_mutex_init(&arenas_lock)) - return (true); - - if (pthread_key_create(&arenas_tsd, arenas_cleanup) != 0) { - malloc_mutex_unlock(&init_lock); - return (true); - } - /* * Create enough scaffolding to allow recursive allocation in * malloc_ncpus(). @@ -801,7 +712,8 @@ malloc_init_hard(void) * threaded mode. */ ARENA_SET(arenas[0]); - arenas[0]->nthreads++; + + malloc_mutex_init(&arenas_lock); #ifdef JEMALLOC_PROF if (prof_boot2()) { @@ -841,6 +753,15 @@ malloc_init_hard(void) malloc_write(")\n"); } + next_arena = (narenas > 0) ? 1 : 0; + +#ifdef NO_TLS + if (pthread_key_create(&arenas_tsd, NULL) != 0) { + malloc_mutex_unlock(&init_lock); + return (true); + } +#endif + /* Allocate and initialize arenas. */ arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas); if (arenas == NULL) { @@ -872,6 +793,7 @@ malloc_init_hard(void) return (false); } + #ifdef JEMALLOC_ZONE JEMALLOC_ATTR(constructor) void @@ -940,8 +862,7 @@ JEMALLOC_P(malloc)(size_t size) #ifdef JEMALLOC_PROF if (opt_prof) { usize = s2u(size); - PROF_ALLOC_PREP(1, usize, cnt); - if (cnt == NULL) { + if ((cnt = prof_alloc_prep(usize)) == NULL) { ret = NULL; goto OOM; } @@ -990,23 +911,19 @@ RETURN: } JEMALLOC_ATTR(nonnull(1)) -#ifdef JEMALLOC_PROF -/* - * Avoid any uncertainty as to how many backtrace frames to ignore in - * PROF_ALLOC_PREP(). - */ -JEMALLOC_ATTR(noinline) -#endif -static int -imemalign(void **memptr, size_t alignment, size_t size) +JEMALLOC_ATTR(visibility("default")) +int +JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) { int ret; + void *result; +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) size_t usize -#ifdef JEMALLOC_CC_SILENCE +# ifdef JEMALLOC_CC_SILENCE = 0 -#endif +# endif ; - void *result; +#endif #ifdef JEMALLOC_PROF prof_thr_cnt_t *cnt # ifdef JEMALLOC_CC_SILENCE @@ -1056,38 +973,34 @@ imemalign(void **memptr, size_t alignment, size_t size) goto RETURN; } - usize = sa2u(size, alignment, NULL); - if (usize == 0) { - result = NULL; - ret = ENOMEM; - goto RETURN; - } - #ifdef JEMALLOC_PROF if (opt_prof) { - PROF_ALLOC_PREP(2, usize, cnt); - if (cnt == NULL) { + usize = sa2u(size, alignment, NULL); + if ((cnt = prof_alloc_prep(usize)) == NULL) { result = NULL; ret = EINVAL; } else { if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= small_maxclass) { - assert(sa2u(small_maxclass+1, - alignment, NULL) != 0); - result = ipalloc(sa2u(small_maxclass+1, - alignment, NULL), alignment, false); + result = ipalloc(small_maxclass+1, + alignment, false); if (result != NULL) { arena_prof_promoted(result, usize); } } else { - result = ipalloc(usize, alignment, + result = ipalloc(size, alignment, false); } } } else #endif - result = ipalloc(usize, alignment, false); + { +#ifdef JEMALLOC_STATS + usize = sa2u(size, alignment, NULL); +#endif + result = ipalloc(size, alignment, false); + } } if (result == NULL) { @@ -1119,15 +1032,6 @@ RETURN: return (ret); } -JEMALLOC_ATTR(nonnull(1)) -JEMALLOC_ATTR(visibility("default")) -int -JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) -{ - - return imemalign(memptr, alignment, size); -} - JEMALLOC_ATTR(malloc) JEMALLOC_ATTR(visibility("default")) void * @@ -1183,8 +1087,7 @@ JEMALLOC_P(calloc)(size_t num, size_t size) #ifdef JEMALLOC_PROF if (opt_prof) { usize = s2u(num_size); - PROF_ALLOC_PREP(1, usize, cnt); - if (cnt == NULL) { + if ((cnt = prof_alloc_prep(usize)) == NULL) { ret = NULL; goto RETURN; } @@ -1297,9 +1200,7 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) if (opt_prof) { usize = s2u(size); old_ctx = prof_ctx_get(ptr); - PROF_ALLOC_PREP(1, usize, cnt); - if (cnt == NULL) { - old_ctx = NULL; + if ((cnt = prof_alloc_prep(usize)) == NULL) { ret = NULL; goto OOM; } @@ -1309,13 +1210,8 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) false, false); if (ret != NULL) arena_prof_promoted(ret, usize); - else - old_ctx = NULL; - } else { + } else ret = iralloc(ptr, size, 0, 0, false, false); - if (ret == NULL) - old_ctx = NULL; - } } else #endif { @@ -1353,8 +1249,7 @@ OOM: #ifdef JEMALLOC_PROF if (opt_prof) { usize = s2u(size); - PROF_ALLOC_PREP(1, usize, cnt); - if (cnt == NULL) + if ((cnt = prof_alloc_prep(usize)) == NULL) ret = NULL; else { if (prof_promote && (uintptr_t)cnt != @@ -1459,7 +1354,7 @@ JEMALLOC_P(memalign)(size_t alignment, size_t size) #ifdef JEMALLOC_CC_SILENCE int result = #endif - imemalign(&ret, alignment, size); + JEMALLOC_P(posix_memalign)(&ret, alignment, size); #ifdef JEMALLOC_CC_SILENCE if (result != 0) return (NULL); @@ -1478,7 +1373,7 @@ JEMALLOC_P(valloc)(size_t size) #ifdef JEMALLOC_CC_SILENCE int result = #endif - imemalign(&ret, PAGE_SIZE, size); + JEMALLOC_P(posix_memalign)(&ret, PAGE_SIZE, size); #ifdef JEMALLOC_CC_SILENCE if (result != 0) return (NULL); @@ -1559,18 +1454,15 @@ JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp, } JEMALLOC_INLINE void * -iallocm(size_t usize, size_t alignment, bool zero) +iallocm(size_t size, size_t alignment, bool zero) { - assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize, alignment, - NULL))); - if (alignment != 0) - return (ipalloc(usize, alignment, zero)); + return (ipalloc(size, alignment, zero)); else if (zero) - return (icalloc(usize)); + return (icalloc(size)); else - return (imalloc(usize)); + return (imalloc(size)); } JEMALLOC_ATTR(nonnull(1)) @@ -1593,43 +1485,38 @@ JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) if (malloc_init()) goto OOM; - usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, NULL); - if (usize == 0) - goto OOM; - #ifdef JEMALLOC_PROF if (opt_prof) { - PROF_ALLOC_PREP(1, usize, cnt); - if (cnt == NULL) + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, + NULL); + if ((cnt = prof_alloc_prep(usize)) == NULL) goto OOM; if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= small_maxclass) { - size_t usize_promoted = (alignment == 0) ? - s2u(small_maxclass+1) : sa2u(small_maxclass+1, - alignment, NULL); - assert(usize_promoted != 0); - p = iallocm(usize_promoted, alignment, zero); + p = iallocm(small_maxclass+1, alignment, zero); if (p == NULL) goto OOM; arena_prof_promoted(p, usize); } else { - p = iallocm(usize, alignment, zero); + p = iallocm(size, alignment, zero); if (p == NULL) goto OOM; } - prof_malloc(p, usize, cnt); + if (rsize != NULL) *rsize = usize; } else #endif { - p = iallocm(usize, alignment, zero); + p = iallocm(size, alignment, zero); if (p == NULL) goto OOM; #ifndef JEMALLOC_STATS if (rsize != NULL) #endif { + usize = (alignment == 0) ? s2u(size) : sa2u(size, + alignment, NULL); #ifdef JEMALLOC_STATS if (rsize != NULL) #endif @@ -1672,6 +1559,7 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, bool no_move = flags & ALLOCM_NO_MOVE; #ifdef JEMALLOC_PROF prof_thr_cnt_t *cnt; + prof_ctx_t *old_ctx; #endif assert(ptr != NULL); @@ -1686,33 +1574,25 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, /* * usize isn't knowable before iralloc() returns when extra is * non-zero. Therefore, compute its maximum possible value and - * use that in PROF_ALLOC_PREP() to decide whether to capture a + * use that in prof_alloc_prep() to decide whether to capture a * backtrace. prof_realloc() will use the actual usize to * decide whether to sample. */ size_t max_usize = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra, alignment, NULL); - prof_ctx_t *old_ctx = prof_ctx_get(p); old_size = isalloc(p); - PROF_ALLOC_PREP(1, max_usize, cnt); - if (cnt == NULL) + old_ctx = prof_ctx_get(p); + if ((cnt = prof_alloc_prep(max_usize)) == NULL) goto OOM; - /* - * Use minimum usize to determine whether promotion may happen. - */ - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U - && ((alignment == 0) ? s2u(size) : sa2u(size, - alignment, NULL)) <= small_maxclass) { + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && max_usize + <= small_maxclass) { q = iralloc(p, small_maxclass+1, (small_maxclass+1 >= size+extra) ? 0 : size+extra - (small_maxclass+1), alignment, zero, no_move); if (q == NULL) goto ERR; - if (max_usize < PAGE_SIZE) { - usize = max_usize; - arena_prof_promoted(q, usize); - } else - usize = isalloc(q); + usize = isalloc(q); + arena_prof_promoted(q, usize); } else { q = iralloc(p, size, extra, alignment, zero, no_move); if (q == NULL) @@ -1720,8 +1600,6 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, usize = isalloc(q); } prof_realloc(q, usize, cnt, old_size, old_ctx); - if (rsize != NULL) - *rsize = usize; } else #endif { diff --git a/dep/jemalloc/src/mb.c b/dep/jemalloc/src/mb.c index dc2c0a256fd..30a1a2e997a 100644 --- a/dep/jemalloc/src/mb.c +++ b/dep/jemalloc/src/mb.c @@ -1,2 +1,2 @@ -#define JEMALLOC_MB_C_ +#define MB_C_ #include "jemalloc/internal/jemalloc_internal.h" diff --git a/dep/jemalloc/src/mutex.c b/dep/jemalloc/src/mutex.c index ca89ef1c962..3ecb18a340e 100644 --- a/dep/jemalloc/src/mutex.c +++ b/dep/jemalloc/src/mutex.c @@ -55,9 +55,6 @@ pthread_create(pthread_t *__restrict thread, bool malloc_mutex_init(malloc_mutex_t *mutex) { -#ifdef JEMALLOC_OSSPIN - *mutex = 0; -#else pthread_mutexattr_t attr; if (pthread_mutexattr_init(&attr) != 0) @@ -73,7 +70,6 @@ malloc_mutex_init(malloc_mutex_t *mutex) } pthread_mutexattr_destroy(&attr); -#endif return (false); } @@ -81,10 +77,8 @@ void malloc_mutex_destroy(malloc_mutex_t *mutex) { -#ifndef JEMALLOC_OSSPIN if (pthread_mutex_destroy(mutex) != 0) { malloc_write(": Error in pthread_mutex_destroy()\n"); abort(); } -#endif } diff --git a/dep/jemalloc/src/prof.c b/dep/jemalloc/src/prof.c index 8a144b4e46c..636cccef52a 100644 --- a/dep/jemalloc/src/prof.c +++ b/dep/jemalloc/src/prof.c @@ -3,15 +3,15 @@ #ifdef JEMALLOC_PROF /******************************************************************************/ +#ifdef JEMALLOC_PROF_LIBGCC +#include +#endif + #ifdef JEMALLOC_PROF_LIBUNWIND #define UNW_LOCAL_ONLY #include #endif -#ifdef JEMALLOC_PROF_LIBGCC -#include -#endif - /******************************************************************************/ /* Data. */ @@ -169,7 +169,39 @@ prof_leave(void) prof_gdump(); } -#ifdef JEMALLOC_PROF_LIBUNWIND +#ifdef JEMALLOC_PROF_LIBGCC +static _Unwind_Reason_Code +prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) +{ + + return (_URC_NO_REASON); +} + +static _Unwind_Reason_Code +prof_unwind_callback(struct _Unwind_Context *context, void *arg) +{ + prof_unwind_data_t *data = (prof_unwind_data_t *)arg; + + if (data->nignore > 0) + data->nignore--; + else { + data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context); + data->bt->len++; + if (data->bt->len == data->max) + return (_URC_END_OF_STACK); + } + + return (_URC_NO_REASON); +} + +void +prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) +{ + prof_unwind_data_t data = {bt, nignore, max}; + + _Unwind_Backtrace(prof_unwind_callback, &data); +} +#elif defined(JEMALLOC_PROF_LIBUNWIND) void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) { @@ -204,41 +236,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) break; } } -#endif -#ifdef JEMALLOC_PROF_LIBGCC -static _Unwind_Reason_Code -prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) -{ - - return (_URC_NO_REASON); -} - -static _Unwind_Reason_Code -prof_unwind_callback(struct _Unwind_Context *context, void *arg) -{ - prof_unwind_data_t *data = (prof_unwind_data_t *)arg; - - if (data->nignore > 0) - data->nignore--; - else { - data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context); - data->bt->len++; - if (data->bt->len == data->max) - return (_URC_END_OF_STACK); - } - - return (_URC_NO_REASON); -} - -void -prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) -{ - prof_unwind_data_t data = {bt, nignore, max}; - - _Unwind_Backtrace(prof_unwind_callback, &data); -} -#endif -#ifdef JEMALLOC_PROF_GCC +#else void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) { @@ -434,7 +432,6 @@ prof_lookup(prof_bt_t *bt) prof_ctx_t *p; void *v; } ctx; - bool new_ctx; /* * This thread's cache lacks bt. Look for it in the global @@ -471,26 +468,12 @@ prof_lookup(prof_bt_t *bt) idalloc(ctx.v); return (NULL); } - /* - * Artificially raise curobjs, in order to avoid a race - * condition with prof_ctx_merge()/prof_ctx_destroy(). - * - * No locking is necessary for ctx here because no other - * threads have had the opportunity to fetch it from - * bt2ctx yet. - */ - ctx.p->cnt_merged.curobjs++; - new_ctx = true; - } else { - /* - * Artificially raise curobjs, in order to avoid a race - * condition with prof_ctx_merge()/prof_ctx_destroy(). - */ - malloc_mutex_lock(&ctx.p->lock); - ctx.p->cnt_merged.curobjs++; - malloc_mutex_unlock(&ctx.p->lock); - new_ctx = false; } + /* + * Acquire ctx's lock before releasing bt2ctx_mtx, in order to + * avoid a race condition with prof_ctx_destroy(). + */ + malloc_mutex_lock(&ctx.p->lock); prof_leave(); /* Link a prof_thd_cnt_t into ctx for this thread. */ @@ -503,9 +486,8 @@ prof_lookup(prof_bt_t *bt) */ ret.p = ql_last(&prof_tdata->lru_ql, lru_link); assert(ret.v != NULL); - if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, - NULL, NULL)) - assert(false); + ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, NULL, + NULL); ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); prof_ctx_merge(ret.p->ctx, ret.p); /* ret can now be re-used. */ @@ -516,8 +498,7 @@ prof_lookup(prof_bt_t *bt) /* Allocate and partially initialize a new cnt. */ ret.v = imalloc(sizeof(prof_thr_cnt_t)); if (ret.p == NULL) { - if (new_ctx) - prof_ctx_destroy(ctx.p); + malloc_mutex_unlock(&ctx.p->lock); return (NULL); } ql_elm_new(ret.p, cnts_link); @@ -528,15 +509,12 @@ prof_lookup(prof_bt_t *bt) ret.p->epoch = 0; memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) { - if (new_ctx) - prof_ctx_destroy(ctx.p); + malloc_mutex_unlock(&ctx.p->lock); idalloc(ret.v); return (NULL); } ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); - malloc_mutex_lock(&ctx.p->lock); ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link); - ctx.p->cnt_merged.curobjs--; malloc_mutex_unlock(&ctx.p->lock); } else { /* Move ret to the front of the LRU. */ @@ -650,10 +628,11 @@ prof_ctx_destroy(prof_ctx_t *ctx) /* * Check that ctx is still unused by any thread cache before destroying - * it. prof_lookup() artificially raises ctx->cnt_merge.curobjs in - * order to avoid a race condition with this function, as does - * prof_ctx_merge() in order to avoid a race between the main body of - * prof_ctx_merge() and entry into this function. + * it. prof_lookup() interlocks bt2ctx_mtx and ctx->lock in order to + * avoid a race condition with this function, and prof_ctx_merge() + * artificially raises ctx->cnt_merged.curobjs in order to avoid a race + * between the main body of prof_ctx_merge() and entry into this + * function. */ prof_enter(); malloc_mutex_lock(&ctx->lock); @@ -662,8 +641,7 @@ prof_ctx_destroy(prof_ctx_t *ctx) assert(ctx->cnt_merged.accumobjs == 0); assert(ctx->cnt_merged.accumbytes == 0); /* Remove ctx from bt2ctx. */ - if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL)) - assert(false); + ckh_remove(&bt2ctx, ctx->bt, NULL, NULL); prof_leave(); /* Destroy ctx. */ malloc_mutex_unlock(&ctx->lock); @@ -671,10 +649,7 @@ prof_ctx_destroy(prof_ctx_t *ctx) malloc_mutex_destroy(&ctx->lock); idalloc(ctx); } else { - /* - * Compensate for increment in prof_ctx_merge() or - * prof_lookup(). - */ + /* Compensate for increment in prof_ctx_merge(). */ ctx->cnt_merged.curobjs--; malloc_mutex_unlock(&ctx->lock); prof_leave(); @@ -1081,7 +1056,7 @@ prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) } else { ret1 = h; ret2 = hash(bt->vec, bt->len * sizeof(void *), - 0x8432a476666bbc13LLU); + 0x8432a476666bbc13U); } *hash1 = ret1; @@ -1118,6 +1093,7 @@ prof_tdata_init(void) prof_tdata->vec = imalloc(sizeof(void *) * prof_bt_max); if (prof_tdata->vec == NULL) { + ckh_delete(&prof_tdata->bt2cnt); idalloc(prof_tdata); return (NULL); @@ -1135,26 +1111,33 @@ prof_tdata_init(void) static void prof_tdata_cleanup(void *arg) { - prof_thr_cnt_t *cnt; - prof_tdata_t *prof_tdata = (prof_tdata_t *)arg; + prof_tdata_t *prof_tdata; - /* - * Delete the hash table. All of its contents can still be iterated - * over via the LRU. - */ - ckh_delete(&prof_tdata->bt2cnt); + prof_tdata = PROF_TCACHE_GET(); + if (prof_tdata != NULL) { + prof_thr_cnt_t *cnt; - /* Iteratively merge cnt's into the global stats and delete them. */ - while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { - ql_remove(&prof_tdata->lru_ql, cnt, lru_link); - prof_ctx_merge(cnt->ctx, cnt); - idalloc(cnt); - } + /* + * Delete the hash table. All of its contents can still be + * iterated over via the LRU. + */ + ckh_delete(&prof_tdata->bt2cnt); - idalloc(prof_tdata->vec); + /* + * Iteratively merge cnt's into the global stats and delete + * them. + */ + while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { + prof_ctx_merge(cnt->ctx, cnt); + ql_remove(&prof_tdata->lru_ql, cnt, lru_link); + idalloc(cnt); + } - idalloc(prof_tdata); - PROF_TCACHE_SET(NULL); + idalloc(prof_tdata->vec); + + idalloc(prof_tdata); + PROF_TCACHE_SET(NULL); + } } void diff --git a/dep/jemalloc/src/rtree.c b/dep/jemalloc/src/rtree.c index eb0ff1e24af..7753743c5e6 100644 --- a/dep/jemalloc/src/rtree.c +++ b/dep/jemalloc/src/rtree.c @@ -1,4 +1,4 @@ -#define JEMALLOC_RTREE_C_ +#define RTREE_C_ #include "jemalloc/internal/jemalloc_internal.h" rtree_t * @@ -20,10 +20,7 @@ rtree_new(unsigned bits) memset(ret, 0, offsetof(rtree_t, level2bits) + (sizeof(unsigned) * height)); - if (malloc_mutex_init(&ret->mutex)) { - /* Leak the rtree. */ - return (NULL); - } + malloc_mutex_init(&ret->mutex); ret->height = height; if (bits_per_level * height > bits) ret->level2bits[0] = bits % bits_per_level; diff --git a/dep/jemalloc/src/stats.c b/dep/jemalloc/src/stats.c index dc172e425c0..3dfe0d232a6 100644 --- a/dep/jemalloc/src/stats.c +++ b/dep/jemalloc/src/stats.c @@ -39,10 +39,6 @@ bool opt_stats_print = false; -#ifdef JEMALLOC_STATS -size_t stats_cactive = 0; -#endif - /******************************************************************************/ /* Function prototypes for non-inline static functions. */ @@ -323,7 +319,6 @@ static void stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i) { - unsigned nthreads; size_t pagesize, pactive, pdirty, mapped; uint64_t npurge, nmadvise, purged; size_t small_allocated; @@ -333,9 +328,6 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("arenas.pagesize", &pagesize, size_t); - CTL_I_GET("stats.arenas.0.nthreads", &nthreads, unsigned); - malloc_cprintf(write_cb, cbopaque, - "assigned threads: %u\n", nthreads); CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t); CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t); CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t); @@ -677,26 +669,21 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, #ifdef JEMALLOC_STATS { int err; - size_t sszp, ssz; - size_t *cactive; + size_t ssz; size_t allocated, active, mapped; size_t chunks_current, chunks_high, swap_avail; uint64_t chunks_total; size_t huge_allocated; uint64_t huge_nmalloc, huge_ndalloc; - sszp = sizeof(size_t *); ssz = sizeof(size_t); - CTL_GET("stats.cactive", &cactive, size_t *); CTL_GET("stats.allocated", &allocated, size_t); CTL_GET("stats.active", &active, size_t); CTL_GET("stats.mapped", &mapped, size_t); malloc_cprintf(write_cb, cbopaque, - "Allocated: %zu, active: %zu, mapped: %zu\n", - allocated, active, mapped); - malloc_cprintf(write_cb, cbopaque, - "Current active ceiling: %zu\n", atomic_read_z(cactive)); + "Allocated: %zu, active: %zu, mapped: %zu\n", allocated, + active, mapped); /* Print chunk stats. */ CTL_GET("stats.chunks.total", &chunks_total, uint64_t); @@ -748,7 +735,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, ninitialized++; } - if (ninitialized > 1 || unmerged == false) { + if (ninitialized > 1) { /* Print merged arena stats. */ malloc_cprintf(write_cb, cbopaque, "\nMerged arenas stats:\n"); diff --git a/dep/jemalloc/src/tcache.c b/dep/jemalloc/src/tcache.c index 31c329e1613..cbbe7a113a9 100644 --- a/dep/jemalloc/src/tcache.c +++ b/dep/jemalloc/src/tcache.c @@ -8,9 +8,6 @@ bool opt_tcache = true; ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT; -tcache_bin_info_t *tcache_bin_info; -static unsigned stack_nelms; /* Total stack elms per tcache. */ - /* Map of thread-specific caches. */ #ifndef NO_TLS __thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec")); @@ -58,19 +55,18 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem #endif ) { - void *ptr; + void *flush, *deferred, *ptr; unsigned i, nflush, ndeferred; -#ifdef JEMALLOC_STATS - bool merged_stats = false; -#endif + bool first_pass; assert(binind < nbins); assert(rem <= tbin->ncached); + assert(tbin->ncached > 0 || tbin->avail == NULL); - for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { + for (flush = tbin->avail, nflush = tbin->ncached - rem, first_pass = + true; flush != NULL; flush = deferred, nflush = ndeferred) { /* Lock the arena bin associated with the first object. */ - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( - tbin->avail[0]); + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(flush); arena_t *arena = chunk->arena; arena_bin_t *bin = &arena->bins[binind]; @@ -86,17 +82,17 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem malloc_mutex_lock(&bin->lock); #ifdef JEMALLOC_STATS if (arena == tcache->arena) { - assert(merged_stats == false); - merged_stats = true; bin->stats.nflushes++; bin->stats.nrequests += tbin->tstats.nrequests; tbin->tstats.nrequests = 0; } #endif + deferred = NULL; ndeferred = 0; for (i = 0; i < nflush; i++) { - ptr = tbin->avail[i]; + ptr = flush; assert(ptr != NULL); + flush = *(void **)ptr; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk->arena == arena) { size_t pageind = ((uintptr_t)ptr - @@ -111,31 +107,21 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem * locked. Stash the object, so that it can be * handled in a future pass. */ - tbin->avail[ndeferred] = ptr; + *(void **)ptr = deferred; + deferred = ptr; ndeferred++; } } malloc_mutex_unlock(&bin->lock); + + if (first_pass) { + tbin->avail = flush; + first_pass = false; + } } -#ifdef JEMALLOC_STATS - if (merged_stats == false) { - /* - * The flush loop didn't happen to flush to this thread's - * arena, so the stats didn't get merged. Manually do so now. - */ - arena_bin_t *bin = &tcache->arena->bins[binind]; - malloc_mutex_lock(&bin->lock); - bin->stats.nflushes++; - bin->stats.nrequests += tbin->tstats.nrequests; - tbin->tstats.nrequests = 0; - malloc_mutex_unlock(&bin->lock); - } -#endif - memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], - rem * sizeof(void *)); tbin->ncached = rem; - if ((int)tbin->ncached < tbin->low_water) + if (tbin->ncached < tbin->low_water) tbin->low_water = tbin->ncached; } @@ -146,19 +132,18 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem #endif ) { - void *ptr; + void *flush, *deferred, *ptr; unsigned i, nflush, ndeferred; -#ifdef JEMALLOC_STATS - bool merged_stats = false; -#endif + bool first_pass; assert(binind < nhbins); assert(rem <= tbin->ncached); + assert(tbin->ncached > 0 || tbin->avail == NULL); - for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { + for (flush = tbin->avail, nflush = tbin->ncached - rem, first_pass = + true; flush != NULL; flush = deferred, nflush = ndeferred) { /* Lock the arena associated with the first object. */ - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( - tbin->avail[0]); + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(flush); arena_t *arena = chunk->arena; malloc_mutex_lock(&arena->lock); @@ -170,7 +155,6 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem tcache->prof_accumbytes = 0; #endif #ifdef JEMALLOC_STATS - merged_stats = true; arena->stats.nrequests_large += tbin->tstats.nrequests; arena->stats.lstats[binind - nbins].nrequests += tbin->tstats.nrequests; @@ -179,10 +163,12 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem #if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) } #endif + deferred = NULL; ndeferred = 0; for (i = 0; i < nflush; i++) { - ptr = tbin->avail[i]; + ptr = flush; assert(ptr != NULL); + flush = *(void **)ptr; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk->arena == arena) arena_dalloc_large(arena, chunk, ptr); @@ -193,32 +179,21 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem * Stash the object, so that it can be handled * in a future pass. */ - tbin->avail[ndeferred] = ptr; + *(void **)ptr = deferred; + deferred = ptr; ndeferred++; } } malloc_mutex_unlock(&arena->lock); + + if (first_pass) { + tbin->avail = flush; + first_pass = false; + } } -#ifdef JEMALLOC_STATS - if (merged_stats == false) { - /* - * The flush loop didn't happen to flush to this thread's - * arena, so the stats didn't get merged. Manually do so now. - */ - arena_t *arena = tcache->arena; - malloc_mutex_lock(&arena->lock); - arena->stats.nrequests_large += tbin->tstats.nrequests; - arena->stats.lstats[binind - nbins].nrequests += - tbin->tstats.nrequests; - tbin->tstats.nrequests = 0; - malloc_mutex_unlock(&arena->lock); - } -#endif - memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], - rem * sizeof(void *)); tbin->ncached = rem; - if ((int)tbin->ncached < tbin->low_water) + if (tbin->ncached < tbin->low_water) tbin->low_water = tbin->ncached; } @@ -226,14 +201,10 @@ tcache_t * tcache_create(arena_t *arena) { tcache_t *tcache; - size_t size, stack_offset; + size_t size; unsigned i; size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins); - /* Naturally align the pointer stacks. */ - size = PTR_CEILING(size); - stack_offset = size; - size += stack_nelms * sizeof(void *); /* * Round up to the nearest multiple of the cacheline size, in order to * avoid the possibility of false cacheline sharing. @@ -246,8 +217,6 @@ tcache_create(arena_t *arena) if (size <= small_maxclass) tcache = (tcache_t *)arena_malloc_small(arena, size, true); - else if (size <= tcache_maxclass) - tcache = (tcache_t *)arena_malloc_large(arena, size, true); else tcache = (tcache_t *)icalloc(size); @@ -264,12 +233,15 @@ tcache_create(arena_t *arena) tcache->arena = arena; assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0); - for (i = 0; i < nhbins; i++) { - tcache->tbins[i].lg_fill_div = 1; - tcache->tbins[i].avail = (void **)((uintptr_t)tcache + - (uintptr_t)stack_offset); - stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *); + for (i = 0; i < nbins; i++) { + if ((arena->bins[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) { + tcache->tbins[i].ncached_max = (arena->bins[i].nregs << + 1); + } else + tcache->tbins[i].ncached_max = TCACHE_NSLOTS_SMALL_MAX; } + for (; i < nhbins; i++) + tcache->tbins[i].ncached_max = TCACHE_NSLOTS_LARGE; TCACHE_SET(tcache); @@ -280,7 +252,6 @@ void tcache_destroy(tcache_t *tcache) { unsigned i; - size_t tcache_size; #ifdef JEMALLOC_STATS /* Unlink from list of extant tcaches. */ @@ -337,8 +308,7 @@ tcache_destroy(tcache_t *tcache) } #endif - tcache_size = arena_salloc(tcache); - if (tcache_size <= small_maxclass) { + if (arena_salloc(tcache) <= small_maxclass) { arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); arena_t *arena = chunk->arena; size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >> @@ -352,13 +322,6 @@ tcache_destroy(tcache_t *tcache) malloc_mutex_lock(&bin->lock); arena_dalloc_bin(arena, chunk, tcache, mapelm); malloc_mutex_unlock(&bin->lock); - } else if (tcache_size <= tcache_maxclass) { - arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); - arena_t *arena = chunk->arena; - - malloc_mutex_lock(&arena->lock); - arena_dalloc_large(arena, chunk, tcache); - malloc_mutex_unlock(&arena->lock); } else idalloc(tcache); } @@ -415,13 +378,11 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena) } #endif -bool +void tcache_boot(void) { if (opt_tcache) { - unsigned i; - /* * If necessary, clamp opt_lg_tcache_max, now that * small_maxclass and arena_maxclass are known. @@ -436,28 +397,6 @@ tcache_boot(void) nhbins = nbins + (tcache_maxclass >> PAGE_SHIFT); - /* Initialize tcache_bin_info. */ - tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins * - sizeof(tcache_bin_info_t)); - if (tcache_bin_info == NULL) - return (true); - stack_nelms = 0; - for (i = 0; i < nbins; i++) { - if ((arena_bin_info[i].nregs << 1) <= - TCACHE_NSLOTS_SMALL_MAX) { - tcache_bin_info[i].ncached_max = - (arena_bin_info[i].nregs << 1); - } else { - tcache_bin_info[i].ncached_max = - TCACHE_NSLOTS_SMALL_MAX; - } - stack_nelms += tcache_bin_info[i].ncached_max; - } - for (; i < nhbins; i++) { - tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE; - stack_nelms += tcache_bin_info[i].ncached_max; - } - /* Compute incremental GC event threshold. */ if (opt_lg_tcache_gc_sweep >= 0) { tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) / @@ -473,8 +412,6 @@ tcache_boot(void) abort(); } } - - return (false); } /******************************************************************************/ #endif /* JEMALLOC_TCACHE */ -- cgit v1.2.3