diff options
Diffstat (limited to 'dep/jemalloc/src')
-rw-r--r-- | dep/jemalloc/src/arena.c | 406 | ||||
-rw-r--r-- | dep/jemalloc/src/background_thread.c | 151 | ||||
-rw-r--r-- | dep/jemalloc/src/base.c | 166 | ||||
-rw-r--r-- | dep/jemalloc/src/bin.c | 50 | ||||
-rw-r--r-- | dep/jemalloc/src/ctl.c | 307 | ||||
-rw-r--r-- | dep/jemalloc/src/div.c | 55 | ||||
-rw-r--r-- | dep/jemalloc/src/extent.c | 510 | ||||
-rw-r--r-- | dep/jemalloc/src/extent_dss.c | 5 | ||||
-rw-r--r-- | dep/jemalloc/src/jemalloc.c | 198 | ||||
-rw-r--r-- | dep/jemalloc/src/jemalloc_cpp.cpp | 23 | ||||
-rw-r--r-- | dep/jemalloc/src/log.c | 78 | ||||
-rw-r--r-- | dep/jemalloc/src/malloc_io.c | 17 | ||||
-rw-r--r-- | dep/jemalloc/src/mutex.c | 5 | ||||
-rw-r--r-- | dep/jemalloc/src/pages.c | 228 | ||||
-rw-r--r-- | dep/jemalloc/src/prof.c | 19 | ||||
-rw-r--r-- | dep/jemalloc/src/spin.c | 4 | ||||
-rw-r--r-- | dep/jemalloc/src/stats.c | 1781 | ||||
-rw-r--r-- | dep/jemalloc/src/sz.c | 3 | ||||
-rw-r--r-- | dep/jemalloc/src/tcache.c | 59 | ||||
-rw-r--r-- | dep/jemalloc/src/tsd.c | 10 | ||||
-rw-r--r-- | dep/jemalloc/src/zone.c | 30 |
21 files changed, 2533 insertions, 1572 deletions
diff --git a/dep/jemalloc/src/arena.c b/dep/jemalloc/src/arena.c index 632fce5233e..5d55bf1a060 100644 --- a/dep/jemalloc/src/arena.c +++ b/dep/jemalloc/src/arena.c @@ -3,6 +3,7 @@ #include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/assert.h" +#include "jemalloc/internal/div.h" #include "jemalloc/internal/extent_dss.h" #include "jemalloc/internal/extent_mmap.h" #include "jemalloc/internal/mutex.h" @@ -32,21 +33,6 @@ ssize_t opt_muzzy_decay_ms = MUZZY_DECAY_MS_DEFAULT; static atomic_zd_t dirty_decay_ms_default; static atomic_zd_t muzzy_decay_ms_default; -const arena_bin_info_t arena_bin_info[NBINS] = { -#define BIN_INFO_bin_yes(reg_size, slab_size, nregs) \ - {reg_size, slab_size, nregs, BITMAP_INFO_INITIALIZER(nregs)}, -#define BIN_INFO_bin_no(reg_size, slab_size, nregs) -#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, \ - lg_delta_lookup) \ - BIN_INFO_bin_##bin((1U<<lg_grp) + (ndelta<<lg_delta), \ - (pgs << LG_PAGE), (pgs << LG_PAGE) / ((1U<<lg_grp) + \ - (ndelta<<lg_delta))) - SIZE_CLASSES -#undef BIN_INFO_bin_yes -#undef BIN_INFO_bin_no -#undef SC -}; - const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = { #define STEP(step, h, x, y) \ h, @@ -54,6 +40,8 @@ const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = { #undef STEP }; +static div_info_t arena_binind_div_info[NBINS]; + /******************************************************************************/ /* * Function prototypes for static functions that are referenced prior to @@ -62,157 +50,18 @@ const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = { static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, extents_t *extents, bool all, size_t npages_limit, - bool is_background_thread); + size_t npages_decay_max, bool is_background_thread); static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all); static void arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab, - arena_bin_t *bin); + bin_t *bin); static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab, - arena_bin_t *bin); + bin_t *bin); /******************************************************************************/ -static bool -arena_stats_init(tsdn_t *tsdn, arena_stats_t *arena_stats) { - if (config_debug) { - for (size_t i = 0; i < sizeof(arena_stats_t); i++) { - assert(((char *)arena_stats)[i] == 0); - } - } -#ifndef JEMALLOC_ATOMIC_U64 - if (malloc_mutex_init(&arena_stats->mtx, "arena_stats", - WITNESS_RANK_ARENA_STATS, malloc_mutex_rank_exclusive)) { - return true; - } -#endif - /* Memory is zeroed, so there is no need to clear stats. */ - return false; -} - -static void -arena_stats_lock(tsdn_t *tsdn, arena_stats_t *arena_stats) { -#ifndef JEMALLOC_ATOMIC_U64 - malloc_mutex_lock(tsdn, &arena_stats->mtx); -#endif -} - -static void -arena_stats_unlock(tsdn_t *tsdn, arena_stats_t *arena_stats) { -#ifndef JEMALLOC_ATOMIC_U64 - malloc_mutex_unlock(tsdn, &arena_stats->mtx); -#endif -} - -static uint64_t -arena_stats_read_u64(tsdn_t *tsdn, arena_stats_t *arena_stats, - arena_stats_u64_t *p) { -#ifdef JEMALLOC_ATOMIC_U64 - return atomic_load_u64(p, ATOMIC_RELAXED); -#else - malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); - return *p; -#endif -} - -static void -arena_stats_add_u64(tsdn_t *tsdn, arena_stats_t *arena_stats, - arena_stats_u64_t *p, uint64_t x) { -#ifdef JEMALLOC_ATOMIC_U64 - atomic_fetch_add_u64(p, x, ATOMIC_RELAXED); -#else - malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); - *p += x; -#endif -} - -UNUSED static void -arena_stats_sub_u64(tsdn_t *tsdn, arena_stats_t *arena_stats, - arena_stats_u64_t *p, uint64_t x) { -#ifdef JEMALLOC_ATOMIC_U64 - UNUSED uint64_t r = atomic_fetch_sub_u64(p, x, ATOMIC_RELAXED); - assert(r - x <= r); -#else - malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); - *p -= x; - assert(*p + x >= *p); -#endif -} - -/* - * Non-atomically sets *dst += src. *dst needs external synchronization. - * This lets us avoid the cost of a fetch_add when its unnecessary (note that - * the types here are atomic). - */ -static void -arena_stats_accum_u64(arena_stats_u64_t *dst, uint64_t src) { -#ifdef JEMALLOC_ATOMIC_U64 - uint64_t cur_dst = atomic_load_u64(dst, ATOMIC_RELAXED); - atomic_store_u64(dst, src + cur_dst, ATOMIC_RELAXED); -#else - *dst += src; -#endif -} - -static size_t -arena_stats_read_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p) { -#ifdef JEMALLOC_ATOMIC_U64 - return atomic_load_zu(p, ATOMIC_RELAXED); -#else - malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); - return atomic_load_zu(p, ATOMIC_RELAXED); -#endif -} - -static void -arena_stats_add_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p, - size_t x) { -#ifdef JEMALLOC_ATOMIC_U64 - atomic_fetch_add_zu(p, x, ATOMIC_RELAXED); -#else - malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); - size_t cur = atomic_load_zu(p, ATOMIC_RELAXED); - atomic_store_zu(p, cur + x, ATOMIC_RELAXED); -#endif -} - -static void -arena_stats_sub_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p, - size_t x) { -#ifdef JEMALLOC_ATOMIC_U64 - UNUSED size_t r = atomic_fetch_sub_zu(p, x, ATOMIC_RELAXED); - assert(r - x <= r); -#else - malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); - size_t cur = atomic_load_zu(p, ATOMIC_RELAXED); - atomic_store_zu(p, cur - x, ATOMIC_RELAXED); -#endif -} - -/* Like the _u64 variant, needs an externally synchronized *dst. */ -static void -arena_stats_accum_zu(atomic_zu_t *dst, size_t src) { - size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED); - atomic_store_zu(dst, src + cur_dst, ATOMIC_RELAXED); -} - -void -arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats, - szind_t szind, uint64_t nrequests) { - arena_stats_lock(tsdn, arena_stats); - arena_stats_add_u64(tsdn, arena_stats, &arena_stats->lstats[szind - - NBINS].nrequests, nrequests); - arena_stats_unlock(tsdn, arena_stats); -} - -void -arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t size) { - arena_stats_lock(tsdn, arena_stats); - arena_stats_add_zu(tsdn, arena_stats, &arena_stats->mapped, size); - arena_stats_unlock(tsdn, arena_stats); -} - void -arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, +arena_basic_stats_merge(UNUSED tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy) { *nthreads += arena_nthreads_get(arena, false); @@ -228,15 +77,15 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats, - malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats) { + bin_stats_t *bstats, arena_stats_large_t *lstats) { cassert(config_stats); arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_ms, muzzy_decay_ms, nactive, ndirty, nmuzzy); - size_t base_allocated, base_resident, base_mapped; + size_t base_allocated, base_resident, base_mapped, metadata_thp; base_stats_get(tsdn, arena->base, &base_allocated, &base_resident, - &base_mapped); + &base_mapped, &metadata_thp); arena_stats_lock(tsdn, &arena->stats); @@ -267,6 +116,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, arena_stats_accum_zu(&astats->base, base_allocated); arena_stats_accum_zu(&astats->internal, arena_internal_get(arena)); + arena_stats_accum_zu(&astats->metadata_thp, metadata_thp); arena_stats_accum_zu(&astats->resident, base_resident + (((atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) + extents_npages_get(&arena->extents_dirty) + @@ -303,16 +153,16 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, /* tcache_bytes counts currently cached bytes. */ atomic_store_zu(&astats->tcache_bytes, 0, ATOMIC_RELAXED); malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx); - tcache_t *tcache; - ql_foreach(tcache, &arena->tcache_ql, link) { + cache_bin_array_descriptor_t *descriptor; + ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) { szind_t i = 0; for (; i < NBINS; i++) { - tcache_bin_t *tbin = tcache_small_bin_get(tcache, i); + cache_bin_t *tbin = &descriptor->bins_small[i]; arena_stats_accum_zu(&astats->tcache_bytes, tbin->ncached * sz_index2size(i)); } for (; i < nhbins; i++) { - tcache_bin_t *tbin = tcache_large_bin_get(tcache, i); + cache_bin_t *tbin = &descriptor->bins_large[i]; arena_stats_accum_zu(&astats->tcache_bytes, tbin->ncached * sz_index2size(i)); } @@ -351,20 +201,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, nstime_subtract(&astats->uptime, &arena->create_time); for (szind_t i = 0; i < NBINS; i++) { - arena_bin_t *bin = &arena->bins[i]; - - malloc_mutex_lock(tsdn, &bin->lock); - malloc_mutex_prof_read(tsdn, &bstats[i].mutex_data, &bin->lock); - bstats[i].nmalloc += bin->stats.nmalloc; - bstats[i].ndalloc += bin->stats.ndalloc; - bstats[i].nrequests += bin->stats.nrequests; - bstats[i].curregs += bin->stats.curregs; - bstats[i].nfills += bin->stats.nfills; - bstats[i].nflushes += bin->stats.nflushes; - bstats[i].nslabs += bin->stats.nslabs; - bstats[i].reslabs += bin->stats.reslabs; - bstats[i].curslabs += bin->stats.curslabs; - malloc_mutex_unlock(tsdn, &bin->lock); + bin_stats_merge(tsdn, &bstats[i], &arena->bins[i]); } } @@ -384,8 +221,7 @@ arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena, } static void * -arena_slab_reg_alloc(tsdn_t *tsdn, extent_t *slab, - const arena_bin_info_t *bin_info) { +arena_slab_reg_alloc(extent_t *slab, const bin_info_t *bin_info) { void *ret; arena_slab_data_t *slab_data = extent_slab_data_get(slab); size_t regind; @@ -412,37 +248,22 @@ arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr) { assert((uintptr_t)ptr < (uintptr_t)extent_past_get(slab)); /* Freeing an interior pointer can cause assertion failure. */ assert(((uintptr_t)ptr - (uintptr_t)extent_addr_get(slab)) % - (uintptr_t)arena_bin_info[binind].reg_size == 0); + (uintptr_t)bin_infos[binind].reg_size == 0); - /* Avoid doing division with a variable divisor. */ diff = (size_t)((uintptr_t)ptr - (uintptr_t)extent_addr_get(slab)); - switch (binind) { -#define REGIND_bin_yes(index, reg_size) \ - case index: \ - regind = diff / (reg_size); \ - assert(diff == regind * (reg_size)); \ - break; -#define REGIND_bin_no(index, reg_size) -#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, \ - lg_delta_lookup) \ - REGIND_bin_##bin(index, (1U<<lg_grp) + (ndelta<<lg_delta)) - SIZE_CLASSES -#undef REGIND_bin_yes -#undef REGIND_bin_no -#undef SC - default: not_reached(); - } - assert(regind < arena_bin_info[binind].nregs); + /* Avoid doing division with a variable divisor. */ + regind = div_compute(&arena_binind_div_info[binind], diff); + + assert(regind < bin_infos[binind].nregs); return regind; } static void -arena_slab_reg_dalloc(tsdn_t *tsdn, extent_t *slab, - arena_slab_data_t *slab_data, void *ptr) { +arena_slab_reg_dalloc(extent_t *slab, arena_slab_data_t *slab_data, void *ptr) { szind_t binind = extent_szind_get(slab); - const arena_bin_info_t *bin_info = &arena_bin_info[binind]; + const bin_info_t *bin_info = &bin_infos[binind]; size_t regind = arena_slab_regind(slab, binind, ptr); assert(extent_nfree_get(slab) < bin_info->nregs); @@ -692,7 +513,8 @@ arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, bool is_background_thread) { if (current_npages > npages_limit) { arena_decay_to_limit(tsdn, arena, decay, extents, false, - npages_limit, is_background_thread); + npages_limit, current_npages - npages_limit, + is_background_thread); } } @@ -738,7 +560,7 @@ arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, } static void -arena_decay_reinit(arena_decay_t *decay, extents_t *extents, ssize_t decay_ms) { +arena_decay_reinit(arena_decay_t *decay, ssize_t decay_ms) { arena_decay_ms_write(decay, decay_ms); if (decay_ms > 0) { nstime_init(&decay->interval, (uint64_t)decay_ms * @@ -755,8 +577,8 @@ arena_decay_reinit(arena_decay_t *decay, extents_t *extents, ssize_t decay_ms) { } static bool -arena_decay_init(arena_decay_t *decay, extents_t *extents, ssize_t decay_ms, - decay_stats_t *stats) { +arena_decay_init(arena_decay_t *decay, ssize_t decay_ms, + arena_stats_decay_t *stats) { if (config_debug) { for (size_t i = 0; i < sizeof(arena_decay_t); i++) { assert(((char *)decay)[i] == 0); @@ -768,7 +590,7 @@ arena_decay_init(arena_decay_t *decay, extents_t *extents, ssize_t decay_ms, return true; } decay->purging = false; - arena_decay_reinit(decay, extents, decay_ms); + arena_decay_reinit(decay, decay_ms); /* Memory is zeroed, so there is no need to clear stats. */ if (config_stats) { decay->stats = stats; @@ -798,7 +620,8 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, if (decay_ms <= 0) { if (decay_ms == 0) { arena_decay_to_limit(tsdn, arena, decay, extents, false, - 0, is_background_thread); + 0, extents_npages_get(extents), + is_background_thread); } return false; } @@ -876,7 +699,7 @@ arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, * infrequent, either between the {-1, 0, >0} states, or a one-time * arbitrary change during initial arena configuration. */ - arena_decay_reinit(decay, extents, decay_ms); + arena_decay_reinit(decay, decay_ms); arena_maybe_decay(tsdn, arena, decay, extents, false); malloc_mutex_unlock(tsdn, &decay->mtx); @@ -900,14 +723,15 @@ arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena, static size_t arena_stash_decayed(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, extents_t *extents, size_t npages_limit, - extent_list_t *decay_extents) { + size_t npages_decay_max, extent_list_t *decay_extents) { witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); /* Stash extents according to npages_limit. */ size_t nstashed = 0; extent_t *extent; - while ((extent = extents_evict(tsdn, arena, r_extent_hooks, extents, + while (nstashed < npages_decay_max && + (extent = extents_evict(tsdn, arena, r_extent_hooks, extents, npages_limit)) != NULL) { extent_list_append(decay_extents, extent); nstashed += extent_size_get(extent) >> LG_PAGE; @@ -982,12 +806,15 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, } /* - * npages_limit: Decay as many dirty extents as possible without violating the - * invariant: (extents_npages_get(extents) >= npages_limit) + * npages_limit: Decay at most npages_decay_max pages without violating the + * invariant: (extents_npages_get(extents) >= npages_limit). We need an upper + * bound on number of pages in order to prevent unbounded growth (namely in + * stashed), otherwise unbounded new pages could be added to extents during the + * current decay run, so that the purging thread never finishes. */ static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, - extents_t *extents, bool all, size_t npages_limit, + extents_t *extents, bool all, size_t npages_limit, size_t npages_decay_max, bool is_background_thread) { witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 1); @@ -1005,7 +832,7 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, extent_list_init(&decay_extents); size_t npurge = arena_stash_decayed(tsdn, arena, &extent_hooks, extents, - npages_limit, &decay_extents); + npages_limit, npages_decay_max, &decay_extents); if (npurge != 0) { UNUSED size_t npurged = arena_decay_stashed(tsdn, arena, &extent_hooks, decay, extents, all, &decay_extents, @@ -1023,7 +850,7 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, if (all) { malloc_mutex_lock(tsdn, &decay->mtx); arena_decay_to_limit(tsdn, arena, decay, extents, all, 0, - is_background_thread); + extents_npages_get(extents), is_background_thread); malloc_mutex_unlock(tsdn, &decay->mtx); return false; @@ -1036,7 +863,7 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, bool epoch_advanced = arena_maybe_decay(tsdn, arena, decay, extents, is_background_thread); - size_t npages_new; + UNUSED size_t npages_new; if (epoch_advanced) { /* Backlog is updated on epoch advance. */ npages_new = decay->backlog[SMOOTHSTEP_NSTEPS-1]; @@ -1045,7 +872,8 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, if (have_background_thread && background_thread_enabled() && epoch_advanced && !is_background_thread) { - background_thread_interval_check(tsdn, arena, decay, npages_new); + background_thread_interval_check(tsdn, arena, decay, + npages_new); } return false; @@ -1082,18 +910,18 @@ arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *slab) { } static void -arena_bin_slabs_nonfull_insert(arena_bin_t *bin, extent_t *slab) { +arena_bin_slabs_nonfull_insert(bin_t *bin, extent_t *slab) { assert(extent_nfree_get(slab) > 0); extent_heap_insert(&bin->slabs_nonfull, slab); } static void -arena_bin_slabs_nonfull_remove(arena_bin_t *bin, extent_t *slab) { +arena_bin_slabs_nonfull_remove(bin_t *bin, extent_t *slab) { extent_heap_remove(&bin->slabs_nonfull, slab); } static extent_t * -arena_bin_slabs_nonfull_tryget(arena_bin_t *bin) { +arena_bin_slabs_nonfull_tryget(bin_t *bin) { extent_t *slab = extent_heap_remove_first(&bin->slabs_nonfull); if (slab == NULL) { return NULL; @@ -1105,7 +933,7 @@ arena_bin_slabs_nonfull_tryget(arena_bin_t *bin) { } static void -arena_bin_slabs_full_insert(arena_t *arena, arena_bin_t *bin, extent_t *slab) { +arena_bin_slabs_full_insert(arena_t *arena, bin_t *bin, extent_t *slab) { assert(extent_nfree_get(slab) == 0); /* * Tracking extents is required by arena_reset, which is not allowed @@ -1119,7 +947,7 @@ arena_bin_slabs_full_insert(arena_t *arena, arena_bin_t *bin, extent_t *slab) { } static void -arena_bin_slabs_full_remove(arena_t *arena, arena_bin_t *bin, extent_t *slab) { +arena_bin_slabs_full_remove(arena_t *arena, bin_t *bin, extent_t *slab) { if (arena_is_auto(arena)) { return; } @@ -1173,7 +1001,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) { /* Bins. */ for (unsigned i = 0; i < NBINS; i++) { extent_t *slab; - arena_bin_t *bin = &arena->bins[i]; + bin_t *bin = &arena->bins[i]; malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); if (bin->slabcur != NULL) { slab = bin->slabcur; @@ -1262,7 +1090,7 @@ arena_destroy(tsd_t *tsd, arena_t *arena) { static extent_t * arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, - extent_hooks_t **r_extent_hooks, const arena_bin_info_t *bin_info, + extent_hooks_t **r_extent_hooks, const bin_info_t *bin_info, szind_t szind) { extent_t *slab; bool zero, commit; @@ -1285,7 +1113,7 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, static extent_t * arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, - const arena_bin_info_t *bin_info) { + const bin_info_t *bin_info) { witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); @@ -1321,10 +1149,10 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, } static extent_t * -arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin, +arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, bin_t *bin, szind_t binind) { extent_t *slab; - const arena_bin_info_t *bin_info; + const bin_info_t *bin_info; /* Look for a usable slab. */ slab = arena_bin_slabs_nonfull_tryget(bin); @@ -1333,7 +1161,7 @@ arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin, } /* No existing slabs have any space available. */ - bin_info = &arena_bin_info[binind]; + bin_info = &bin_infos[binind]; /* Allocate a new slab. */ malloc_mutex_unlock(tsdn, &bin->lock); @@ -1364,12 +1192,12 @@ arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin, /* Re-fill bin->slabcur, then call arena_slab_reg_alloc(). */ static void * -arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin, +arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, bin_t *bin, szind_t binind) { - const arena_bin_info_t *bin_info; + const bin_info_t *bin_info; extent_t *slab; - bin_info = &arena_bin_info[binind]; + bin_info = &bin_infos[binind]; if (!arena_is_auto(arena) && bin->slabcur != NULL) { arena_bin_slabs_full_insert(arena, bin, bin->slabcur); bin->slabcur = NULL; @@ -1381,7 +1209,7 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin, * bin lock in arena_bin_nonfull_slab_get(). */ if (extent_nfree_get(bin->slabcur) > 0) { - void *ret = arena_slab_reg_alloc(tsdn, bin->slabcur, + void *ret = arena_slab_reg_alloc(bin->slabcur, bin_info); if (slab != NULL) { /* @@ -1415,14 +1243,14 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin, assert(extent_nfree_get(bin->slabcur) > 0); - return arena_slab_reg_alloc(tsdn, slab, bin_info); + return arena_slab_reg_alloc(slab, bin_info); } void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, - tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) { + cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) { unsigned i, nfill; - arena_bin_t *bin; + bin_t *bin; assert(tbin->ncached == 0); @@ -1437,8 +1265,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, void *ptr; if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) > 0) { - ptr = arena_slab_reg_alloc(tsdn, slab, - &arena_bin_info[binind]); + ptr = arena_slab_reg_alloc(slab, &bin_infos[binind]); } else { ptr = arena_bin_malloc_hard(tsdn, arena, bin, binind); } @@ -1455,8 +1282,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, break; } if (config_fill && unlikely(opt_junk_alloc)) { - arena_alloc_junk_small(ptr, &arena_bin_info[binind], - true); + arena_alloc_junk_small(ptr, &bin_infos[binind], true); } /* Insert such that low regions get used first. */ *(tbin->avail - nfill + i) = ptr; @@ -1474,14 +1300,14 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, } void -arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info, bool zero) { +arena_alloc_junk_small(void *ptr, const bin_info_t *bin_info, bool zero) { if (!zero) { memset(ptr, JEMALLOC_ALLOC_JUNK, bin_info->reg_size); } } static void -arena_dalloc_junk_small_impl(void *ptr, const arena_bin_info_t *bin_info) { +arena_dalloc_junk_small_impl(void *ptr, const bin_info_t *bin_info) { memset(ptr, JEMALLOC_FREE_JUNK, bin_info->reg_size); } arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small = @@ -1490,7 +1316,7 @@ arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small = static void * arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) { void *ret; - arena_bin_t *bin; + bin_t *bin; size_t usize; extent_t *slab; @@ -1500,7 +1326,7 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) { malloc_mutex_lock(tsdn, &bin->lock); if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) > 0) { - ret = arena_slab_reg_alloc(tsdn, slab, &arena_bin_info[binind]); + ret = arena_slab_reg_alloc(slab, &bin_infos[binind]); } else { ret = arena_bin_malloc_hard(tsdn, arena, bin, binind); } @@ -1524,14 +1350,14 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) { if (config_fill) { if (unlikely(opt_junk_alloc)) { arena_alloc_junk_small(ret, - &arena_bin_info[binind], false); + &bin_infos[binind], false); } else if (unlikely(opt_zero)) { memset(ret, 0, usize); } } } else { if (config_fill && unlikely(opt_junk_alloc)) { - arena_alloc_junk_small(ret, &arena_bin_info[binind], + arena_alloc_junk_small(ret, &bin_infos[binind], true); } memset(ret, 0, usize); @@ -1636,13 +1462,13 @@ arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache, } static void -arena_dissociate_bin_slab(arena_t *arena, extent_t *slab, arena_bin_t *bin) { +arena_dissociate_bin_slab(arena_t *arena, extent_t *slab, bin_t *bin) { /* Dissociate slab from bin. */ if (slab == bin->slabcur) { bin->slabcur = NULL; } else { szind_t binind = extent_szind_get(slab); - const arena_bin_info_t *bin_info = &arena_bin_info[binind]; + const bin_info_t *bin_info = &bin_infos[binind]; /* * The following block's conditional is necessary because if the @@ -1659,7 +1485,7 @@ arena_dissociate_bin_slab(arena_t *arena, extent_t *slab, arena_bin_t *bin) { static void arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab, - arena_bin_t *bin) { + bin_t *bin) { assert(slab != bin->slabcur); malloc_mutex_unlock(tsdn, &bin->lock); @@ -1673,8 +1499,8 @@ arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab, } static void -arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab, - arena_bin_t *bin) { +arena_bin_lower_slab(UNUSED tsdn_t *tsdn, arena_t *arena, extent_t *slab, + bin_t *bin) { assert(extent_nfree_get(slab) > 0); /* @@ -1704,14 +1530,14 @@ arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, extent_t *slab, void *ptr, bool junked) { arena_slab_data_t *slab_data = extent_slab_data_get(slab); szind_t binind = extent_szind_get(slab); - arena_bin_t *bin = &arena->bins[binind]; - const arena_bin_info_t *bin_info = &arena_bin_info[binind]; + bin_t *bin = &arena->bins[binind]; + const bin_info_t *bin_info = &bin_infos[binind]; if (!junked && config_fill && unlikely(opt_junk_free)) { arena_dalloc_junk_small(ptr, bin_info); } - arena_slab_reg_dalloc(tsdn, slab, slab_data, ptr); + arena_slab_reg_dalloc(slab, slab_data, ptr); unsigned nfree = extent_nfree_get(slab); if (nfree == bin_info->nregs) { arena_dissociate_bin_slab(arena, slab, bin); @@ -1736,7 +1562,7 @@ arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, extent_t *extent, static void arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr) { szind_t binind = extent_szind_get(extent); - arena_bin_t *bin = &arena->bins[binind]; + bin_t *bin = &arena->bins[binind]; malloc_mutex_lock(tsdn, &bin->lock); arena_dalloc_bin_locked_impl(tsdn, arena, extent, ptr, false); @@ -1770,7 +1596,7 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, * Avoid moving the allocation if the size class can be left the * same. */ - assert(arena_bin_info[sz_size2index(oldsize)].reg_size == + assert(bin_infos[sz_size2index(oldsize)].reg_size == oldsize); if ((usize_max > SMALL_MAXCLASS || sz_size2index(usize_max) != sz_size2index(oldsize)) && (size > oldsize || usize_max < @@ -1885,6 +1711,33 @@ arena_muzzy_decay_ms_default_set(ssize_t decay_ms) { return false; } +bool +arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena, size_t *old_limit, + size_t *new_limit) { + assert(opt_retain); + + pszind_t new_ind JEMALLOC_CC_SILENCE_INIT(0); + if (new_limit != NULL) { + size_t limit = *new_limit; + /* Grow no more than the new limit. */ + if ((new_ind = sz_psz2ind(limit + 1) - 1) > + EXTENT_GROW_MAX_PIND) { + return true; + } + } + + malloc_mutex_lock(tsd_tsdn(tsd), &arena->extent_grow_mtx); + if (old_limit != NULL) { + *old_limit = sz_pind2sz(arena->retain_grow_limit); + } + if (new_limit != NULL) { + arena->retain_grow_limit = new_ind; + } + malloc_mutex_unlock(tsd_tsdn(tsd), &arena->extent_grow_mtx); + + return false; +} + unsigned arena_nthreads_get(arena_t *arena, bool internal) { return atomic_load_u(&arena->nthreads[internal], ATOMIC_RELAXED); @@ -1935,6 +1788,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { } ql_new(&arena->tcache_ql); + ql_new(&arena->cache_bin_array_descriptor_ql); if (malloc_mutex_init(&arena->tcache_ql_mtx, "tcache_ql", WITNESS_RANK_TCACHE_QL, malloc_mutex_rank_exclusive)) { goto label_error; @@ -2001,16 +1855,17 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { goto label_error; } - if (arena_decay_init(&arena->decay_dirty, &arena->extents_dirty, + if (arena_decay_init(&arena->decay_dirty, arena_dirty_decay_ms_default_get(), &arena->stats.decay_dirty)) { goto label_error; } - if (arena_decay_init(&arena->decay_muzzy, &arena->extents_muzzy, + if (arena_decay_init(&arena->decay_muzzy, arena_muzzy_decay_ms_default_get(), &arena->stats.decay_muzzy)) { goto label_error; } arena->extent_grow_next = sz_psz2ind(HUGEPAGE); + arena->retain_grow_limit = EXTENT_GROW_MAX_PIND; if (malloc_mutex_init(&arena->extent_grow_mtx, "extent_grow", WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) { goto label_error; @@ -2024,17 +1879,10 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { /* Initialize bins. */ for (i = 0; i < NBINS; i++) { - arena_bin_t *bin = &arena->bins[i]; - if (malloc_mutex_init(&bin->lock, "arena_bin", - WITNESS_RANK_ARENA_BIN, malloc_mutex_rank_exclusive)) { + bool err = bin_init(&arena->bins[i]); + if (err) { goto label_error; } - bin->slabcur = NULL; - extent_heap_new(&bin->slabs_nonfull); - extent_list_init(&bin->slabs_full); - if (config_stats) { - memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); - } } arena->base = base; @@ -2070,6 +1918,16 @@ void arena_boot(void) { arena_dirty_decay_ms_default_set(opt_dirty_decay_ms); arena_muzzy_decay_ms_default_set(opt_muzzy_decay_ms); +#define REGIND_bin_yes(index, reg_size) \ + div_init(&arena_binind_div_info[(index)], (reg_size)); +#define REGIND_bin_no(index, reg_size) +#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, \ + lg_delta_lookup) \ + REGIND_bin_##bin(index, (1U<<lg_grp) + (ndelta << lg_delta)) + SIZE_CLASSES +#undef REGIND_bin_yes +#undef REGIND_bin_no +#undef SC } void @@ -2115,7 +1973,7 @@ arena_prefork6(tsdn_t *tsdn, arena_t *arena) { void arena_prefork7(tsdn_t *tsdn, arena_t *arena) { for (unsigned i = 0; i < NBINS; i++) { - malloc_mutex_prefork(tsdn, &arena->bins[i].lock); + bin_prefork(tsdn, &arena->bins[i]); } } @@ -2124,7 +1982,7 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) { unsigned i; for (i = 0; i < NBINS; i++) { - malloc_mutex_postfork_parent(tsdn, &arena->bins[i].lock); + bin_postfork_parent(tsdn, &arena->bins[i]); } malloc_mutex_postfork_parent(tsdn, &arena->large_mtx); base_postfork_parent(tsdn, arena->base); @@ -2154,15 +2012,21 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) { } if (config_stats) { ql_new(&arena->tcache_ql); + ql_new(&arena->cache_bin_array_descriptor_ql); tcache_t *tcache = tcache_get(tsdn_tsd(tsdn)); if (tcache != NULL && tcache->arena == arena) { ql_elm_new(tcache, link); ql_tail_insert(&arena->tcache_ql, tcache, link); + cache_bin_array_descriptor_init( + &tcache->cache_bin_array_descriptor, + tcache->bins_small, tcache->bins_large); + ql_tail_insert(&arena->cache_bin_array_descriptor_ql, + &tcache->cache_bin_array_descriptor, link); } } for (i = 0; i < NBINS; i++) { - malloc_mutex_postfork_child(tsdn, &arena->bins[i].lock); + bin_postfork_child(tsdn, &arena->bins[i]); } malloc_mutex_postfork_child(tsdn, &arena->large_mtx); base_postfork_child(tsdn, arena->base); diff --git a/dep/jemalloc/src/background_thread.c b/dep/jemalloc/src/background_thread.c index eb30eb5b423..3517a3bb8ed 100644 --- a/dep/jemalloc/src/background_thread.c +++ b/dep/jemalloc/src/background_thread.c @@ -11,12 +11,14 @@ #define BACKGROUND_THREAD_DEFAULT false /* Read-only after initialization. */ bool opt_background_thread = BACKGROUND_THREAD_DEFAULT; +size_t opt_max_background_threads = MAX_BACKGROUND_THREAD_LIMIT; /* Used for thread creation, termination and stats. */ malloc_mutex_t background_thread_lock; /* Indicates global state. Atomic because decay reads this w/o locking. */ atomic_b_t background_thread_enabled_state; size_t n_background_threads; +size_t max_background_threads; /* Thread info per-index. */ background_thread_info_t *background_thread_info; @@ -30,19 +32,20 @@ bool can_enable_background_thread; static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *, void *(*)(void *), void *__restrict); -static pthread_once_t once_control = PTHREAD_ONCE_INIT; static void -pthread_create_wrapper_once(void) { +pthread_create_wrapper_init(void) { #ifdef JEMALLOC_LAZY_LOCK - isthreaded = true; + if (!isthreaded) { + isthreaded = true; + } #endif } int pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr, void *(*start_routine)(void *), void *__restrict arg) { - pthread_once(&once_control, pthread_create_wrapper_once); + pthread_create_wrapper_init(); return pthread_create_fptr(thread, attr, start_routine, arg); } @@ -286,7 +289,7 @@ background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigne uint64_t min_interval = BACKGROUND_THREAD_INDEFINITE_SLEEP; unsigned narenas = narenas_total_get(); - for (unsigned i = ind; i < narenas; i += ncpus) { + for (unsigned i = ind; i < narenas; i += max_background_threads) { arena_t *arena = arena_get(tsdn, i, false); if (!arena) { continue; @@ -379,35 +382,32 @@ background_thread_create_signals_masked(pthread_t *thread, return create_err; } -static void +static bool check_background_thread_creation(tsd_t *tsd, unsigned *n_created, bool *created_threads) { + bool ret = false; if (likely(*n_created == n_background_threads)) { - return; + return ret; } - malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_info[0].mtx); -label_restart: - malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock); - for (unsigned i = 1; i < ncpus; i++) { + tsdn_t *tsdn = tsd_tsdn(tsd); + malloc_mutex_unlock(tsdn, &background_thread_info[0].mtx); + for (unsigned i = 1; i < max_background_threads; i++) { if (created_threads[i]) { continue; } background_thread_info_t *info = &background_thread_info[i]; - malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx); - assert(info->state != background_thread_paused); + malloc_mutex_lock(tsdn, &info->mtx); + /* + * In case of the background_thread_paused state because of + * arena reset, delay the creation. + */ bool create = (info->state == background_thread_started); - malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx); + malloc_mutex_unlock(tsdn, &info->mtx); if (!create) { continue; } - /* - * To avoid deadlock with prefork handlers (which waits for the - * mutex held here), unlock before calling pthread_create(). - */ - malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock); - pre_reentrancy(tsd, NULL); int err = background_thread_create_signals_masked(&info->thread, NULL, background_thread_entry, (void *)(uintptr_t)i); @@ -423,19 +423,21 @@ label_restart: abort(); } } - /* Restart since we unlocked. */ - goto label_restart; + /* Return to restart the loop since we unlocked. */ + ret = true; + break; } - malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_info[0].mtx); - malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock); + malloc_mutex_lock(tsdn, &background_thread_info[0].mtx); + + return ret; } static void background_thread0_work(tsd_t *tsd) { /* Thread0 is also responsible for launching / terminating threads. */ - VARIABLE_ARRAY(bool, created_threads, ncpus); + VARIABLE_ARRAY(bool, created_threads, max_background_threads); unsigned i; - for (i = 1; i < ncpus; i++) { + for (i = 1; i < max_background_threads; i++) { created_threads[i] = false; } /* Start working, and create more threads when asked. */ @@ -445,8 +447,10 @@ background_thread0_work(tsd_t *tsd) { &background_thread_info[0])) { continue; } - check_background_thread_creation(tsd, &n_created, - (bool *)&created_threads); + if (check_background_thread_creation(tsd, &n_created, + (bool *)&created_threads)) { + continue; + } background_work_sleep_once(tsd_tsdn(tsd), &background_thread_info[0], 0); } @@ -456,15 +460,20 @@ background_thread0_work(tsd_t *tsd) { * the global background_thread mutex (and is waiting) for us. */ assert(!background_thread_enabled()); - for (i = 1; i < ncpus; i++) { + for (i = 1; i < max_background_threads; i++) { background_thread_info_t *info = &background_thread_info[i]; assert(info->state != background_thread_paused); if (created_threads[i]) { background_threads_disable_single(tsd, info); } else { malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx); - /* Clear in case the thread wasn't created. */ - info->state = background_thread_stopped; + if (info->state != background_thread_stopped) { + /* The thread was not created. */ + assert(info->state == + background_thread_started); + n_background_threads--; + info->state = background_thread_stopped; + } malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx); } } @@ -498,7 +507,7 @@ background_work(tsd_t *tsd, unsigned ind) { static void * background_thread_entry(void *ind_arg) { unsigned thread_ind = (unsigned)(uintptr_t)ind_arg; - assert(thread_ind < ncpus); + assert(thread_ind < max_background_threads); #ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP pthread_setname_np(pthread_self(), "jemalloc_bg_thd"); #endif @@ -532,7 +541,7 @@ background_thread_create(tsd_t *tsd, unsigned arena_ind) { malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock); /* We create at most NCPUs threads. */ - size_t thread_ind = arena_ind % ncpus; + size_t thread_ind = arena_ind % max_background_threads; background_thread_info_t *info = &background_thread_info[thread_ind]; bool need_new_thread; @@ -586,26 +595,29 @@ background_threads_enable(tsd_t *tsd) { assert(background_thread_enabled()); malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock); - VARIABLE_ARRAY(bool, marked, ncpus); + VARIABLE_ARRAY(bool, marked, max_background_threads); unsigned i, nmarked; - for (i = 0; i < ncpus; i++) { + for (i = 0; i < max_background_threads; i++) { marked[i] = false; } nmarked = 0; + /* Thread 0 is required and created at the end. */ + marked[0] = true; /* Mark the threads we need to create for thread 0. */ unsigned n = narenas_total_get(); for (i = 1; i < n; i++) { - if (marked[i % ncpus] || + if (marked[i % max_background_threads] || arena_get(tsd_tsdn(tsd), i, false) == NULL) { continue; } - background_thread_info_t *info = &background_thread_info[i]; + background_thread_info_t *info = &background_thread_info[ + i % max_background_threads]; malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx); assert(info->state == background_thread_stopped); background_thread_init(tsd, info); malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx); - marked[i % ncpus] = true; - if (++nmarked == ncpus) { + marked[i % max_background_threads] = true; + if (++nmarked == max_background_threads) { break; } } @@ -720,14 +732,14 @@ background_thread_prefork0(tsdn_t *tsdn) { void background_thread_prefork1(tsdn_t *tsdn) { - for (unsigned i = 0; i < ncpus; i++) { + for (unsigned i = 0; i < max_background_threads; i++) { malloc_mutex_prefork(tsdn, &background_thread_info[i].mtx); } } void background_thread_postfork_parent(tsdn_t *tsdn) { - for (unsigned i = 0; i < ncpus; i++) { + for (unsigned i = 0; i < max_background_threads; i++) { malloc_mutex_postfork_parent(tsdn, &background_thread_info[i].mtx); } @@ -736,7 +748,7 @@ background_thread_postfork_parent(tsdn_t *tsdn) { void background_thread_postfork_child(tsdn_t *tsdn) { - for (unsigned i = 0; i < ncpus; i++) { + for (unsigned i = 0; i < max_background_threads; i++) { malloc_mutex_postfork_child(tsdn, &background_thread_info[i].mtx); } @@ -749,7 +761,7 @@ background_thread_postfork_child(tsdn_t *tsdn) { malloc_mutex_lock(tsdn, &background_thread_lock); n_background_threads = 0; background_thread_enabled_set(tsdn, false); - for (unsigned i = 0; i < ncpus; i++) { + for (unsigned i = 0; i < max_background_threads; i++) { background_thread_info_t *info = &background_thread_info[i]; malloc_mutex_lock(tsdn, &info->mtx); info->state = background_thread_stopped; @@ -773,7 +785,7 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) { stats->num_threads = n_background_threads; uint64_t num_runs = 0; nstime_init(&stats->run_interval, 0); - for (unsigned i = 0; i < ncpus; i++) { + for (unsigned i = 0; i < max_background_threads; i++) { background_thread_info_t *info = &background_thread_info[i]; malloc_mutex_lock(tsdn, &info->mtx); if (info->state != background_thread_stopped) { @@ -795,6 +807,26 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) { #undef BILLION #undef BACKGROUND_THREAD_MIN_INTERVAL_NS +static bool +pthread_create_fptr_init(void) { + if (pthread_create_fptr != NULL) { + return false; + } + pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create"); + if (pthread_create_fptr == NULL) { + can_enable_background_thread = false; + if (config_lazy_lock || opt_background_thread) { + malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, " + "\"pthread_create\")\n"); + abort(); + } + } else { + can_enable_background_thread = true; + } + + return false; +} + /* * When lazy lock is enabled, we need to make sure setting isthreaded before * taking any background_thread locks. This is called early in ctl (instead of @@ -805,7 +837,8 @@ void background_thread_ctl_init(tsdn_t *tsdn) { malloc_mutex_assert_not_owner(tsdn, &background_thread_lock); #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER - pthread_once(&once_control, pthread_create_wrapper_once); + pthread_create_fptr_init(); + pthread_create_wrapper_init(); #endif } @@ -818,18 +851,10 @@ background_thread_boot0(void) { "supports pthread only\n"); return true; } - #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER - pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create"); - if (pthread_create_fptr == NULL) { - can_enable_background_thread = false; - if (config_lazy_lock || opt_background_thread) { - malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, " - "\"pthread_create\")\n"); - abort(); - } - } else { - can_enable_background_thread = true; + if ((config_lazy_lock || opt_background_thread) && + pthread_create_fptr_init()) { + return true; } #endif return false; @@ -841,6 +866,12 @@ background_thread_boot1(tsdn_t *tsdn) { assert(have_background_thread); assert(narenas_total_get() > 0); + if (opt_max_background_threads == MAX_BACKGROUND_THREAD_LIMIT && + ncpus < MAX_BACKGROUND_THREAD_LIMIT) { + opt_max_background_threads = ncpus; + } + max_background_threads = opt_max_background_threads; + background_thread_enabled_set(tsdn, opt_background_thread); if (malloc_mutex_init(&background_thread_lock, "background_thread_global", @@ -848,17 +879,15 @@ background_thread_boot1(tsdn_t *tsdn) { malloc_mutex_rank_exclusive)) { return true; } - if (opt_background_thread) { - background_thread_ctl_init(tsdn); - } background_thread_info = (background_thread_info_t *)base_alloc(tsdn, - b0get(), ncpus * sizeof(background_thread_info_t), CACHELINE); + b0get(), opt_max_background_threads * + sizeof(background_thread_info_t), CACHELINE); if (background_thread_info == NULL) { return true; } - for (unsigned i = 0; i < ncpus; i++) { + for (unsigned i = 0; i < max_background_threads; i++) { background_thread_info_t *info = &background_thread_info[i]; /* Thread mutex is rank_inclusive because of thread0. */ if (malloc_mutex_init(&info->mtx, "background_thread", diff --git a/dep/jemalloc/src/base.c b/dep/jemalloc/src/base.c index 97078b134d1..b0324b5d758 100644 --- a/dep/jemalloc/src/base.c +++ b/dep/jemalloc/src/base.c @@ -10,25 +10,40 @@ /******************************************************************************/ /* Data. */ -static base_t *b0; +static base_t *b0; + +metadata_thp_mode_t opt_metadata_thp = METADATA_THP_DEFAULT; + +const char *metadata_thp_mode_names[] = { + "disabled", + "auto", + "always" +}; /******************************************************************************/ +static inline bool +metadata_thp_madvise(void) { + return (metadata_thp_enabled() && + (init_system_thp_mode == thp_mode_default)); +} + static void * base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size) { void *addr; bool zero = true; bool commit = true; + /* Use huge page sizes and alignment regardless of opt_metadata_thp. */ assert(size == HUGEPAGE_CEILING(size)); - + size_t alignment = HUGEPAGE; if (extent_hooks == &extent_hooks_default) { - addr = extent_alloc_mmap(NULL, size, PAGE, &zero, &commit); + addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit); } else { /* No arena context as we are creating new arenas. */ tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn); pre_reentrancy(tsd, NULL); - addr = extent_hooks->alloc(extent_hooks, NULL, size, PAGE, + addr = extent_hooks->alloc(extent_hooks, NULL, size, alignment, &zero, &commit, ind); post_reentrancy(tsd); } @@ -51,16 +66,16 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr, */ if (extent_hooks == &extent_hooks_default) { if (!extent_dalloc_mmap(addr, size)) { - return; + goto label_done; } if (!pages_decommit(addr, size)) { - return; + goto label_done; } if (!pages_purge_forced(addr, size)) { - return; + goto label_done; } if (!pages_purge_lazy(addr, size)) { - return; + goto label_done; } /* Nothing worked. This should never happen. */ not_reached(); @@ -70,27 +85,33 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr, if (extent_hooks->dalloc != NULL && !extent_hooks->dalloc(extent_hooks, addr, size, true, ind)) { - goto label_done; + goto label_post_reentrancy; } if (extent_hooks->decommit != NULL && !extent_hooks->decommit(extent_hooks, addr, size, 0, size, ind)) { - goto label_done; + goto label_post_reentrancy; } if (extent_hooks->purge_forced != NULL && !extent_hooks->purge_forced(extent_hooks, addr, size, 0, size, ind)) { - goto label_done; + goto label_post_reentrancy; } if (extent_hooks->purge_lazy != NULL && !extent_hooks->purge_lazy(extent_hooks, addr, size, 0, size, ind)) { - goto label_done; + goto label_post_reentrancy; } /* Nothing worked. That's the application's problem. */ - label_done: + label_post_reentrancy: post_reentrancy(tsd); - return; + } +label_done: + if (metadata_thp_madvise()) { + /* Set NOHUGEPAGE after unmap to avoid kernel defrag. */ + assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 && + (size & HUGEPAGE_MASK) == 0); + pages_nohuge(addr, size); } } @@ -105,6 +126,56 @@ base_extent_init(size_t *extent_sn_next, extent_t *extent, void *addr, extent_binit(extent, addr, size, sn); } +static size_t +base_get_num_blocks(base_t *base, bool with_new_block) { + base_block_t *b = base->blocks; + assert(b != NULL); + + size_t n_blocks = with_new_block ? 2 : 1; + while (b->next != NULL) { + n_blocks++; + b = b->next; + } + + return n_blocks; +} + +static void +base_auto_thp_switch(tsdn_t *tsdn, base_t *base) { + assert(opt_metadata_thp == metadata_thp_auto); + malloc_mutex_assert_owner(tsdn, &base->mtx); + if (base->auto_thp_switched) { + return; + } + /* Called when adding a new block. */ + bool should_switch; + if (base_ind_get(base) != 0) { + should_switch = (base_get_num_blocks(base, true) == + BASE_AUTO_THP_THRESHOLD); + } else { + should_switch = (base_get_num_blocks(base, true) == + BASE_AUTO_THP_THRESHOLD_A0); + } + if (!should_switch) { + return; + } + + base->auto_thp_switched = true; + assert(!config_stats || base->n_thp == 0); + /* Make the initial blocks THP lazily. */ + base_block_t *block = base->blocks; + while (block != NULL) { + assert((block->size & HUGEPAGE_MASK) == 0); + pages_huge(block, block->size); + if (config_stats) { + base->n_thp += HUGEPAGE_CEILING(block->size - + extent_bsize_get(&block->extent)) >> LG_HUGEPAGE; + } + block = block->next; + assert(block == NULL || (base_ind_get(base) == 0)); + } +} + static void * base_extent_bump_alloc_helper(extent_t *extent, size_t *gap_size, size_t size, size_t alignment) { @@ -124,8 +195,8 @@ base_extent_bump_alloc_helper(extent_t *extent, size_t *gap_size, size_t size, } static void -base_extent_bump_alloc_post(tsdn_t *tsdn, base_t *base, extent_t *extent, - size_t gap_size, void *addr, size_t size) { +base_extent_bump_alloc_post(base_t *base, extent_t *extent, size_t gap_size, + void *addr, size_t size) { if (extent_bsize_get(extent) > 0) { /* * Compute the index for the largest size class that does not @@ -140,23 +211,31 @@ base_extent_bump_alloc_post(tsdn_t *tsdn, base_t *base, extent_t *extent, base->allocated += size; /* * Add one PAGE to base_resident for every page boundary that is - * crossed by the new allocation. + * crossed by the new allocation. Adjust n_thp similarly when + * metadata_thp is enabled. */ base->resident += PAGE_CEILING((uintptr_t)addr + size) - PAGE_CEILING((uintptr_t)addr - gap_size); assert(base->allocated <= base->resident); assert(base->resident <= base->mapped); + if (metadata_thp_madvise() && (opt_metadata_thp == + metadata_thp_always || base->auto_thp_switched)) { + base->n_thp += (HUGEPAGE_CEILING((uintptr_t)addr + size) + - HUGEPAGE_CEILING((uintptr_t)addr - gap_size)) >> + LG_HUGEPAGE; + assert(base->mapped >= base->n_thp << LG_HUGEPAGE); + } } } static void * -base_extent_bump_alloc(tsdn_t *tsdn, base_t *base, extent_t *extent, - size_t size, size_t alignment) { +base_extent_bump_alloc(base_t *base, extent_t *extent, size_t size, + size_t alignment) { void *ret; size_t gap_size; ret = base_extent_bump_alloc_helper(extent, &gap_size, size, alignment); - base_extent_bump_alloc_post(tsdn, base, extent, gap_size, ret, size); + base_extent_bump_alloc_post(base, extent, gap_size, ret, size); return ret; } @@ -166,8 +245,8 @@ base_extent_bump_alloc(tsdn_t *tsdn, base_t *base, extent_t *extent, * On success a pointer to the initialized base_block_t header is returned. */ static base_block_t * -base_block_alloc(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, - pszind_t *pind_last, size_t *extent_sn_next, size_t size, +base_block_alloc(tsdn_t *tsdn, base_t *base, extent_hooks_t *extent_hooks, + unsigned ind, pszind_t *pind_last, size_t *extent_sn_next, size_t size, size_t alignment) { alignment = ALIGNMENT_CEILING(alignment, QUANTUM); size_t usize = ALIGNMENT_CEILING(size, alignment); @@ -193,6 +272,25 @@ base_block_alloc(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, if (block == NULL) { return NULL; } + + if (metadata_thp_madvise()) { + void *addr = (void *)block; + assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 && + (block_size & HUGEPAGE_MASK) == 0); + if (opt_metadata_thp == metadata_thp_always) { + pages_huge(addr, block_size); + } else if (opt_metadata_thp == metadata_thp_auto && + base != NULL) { + /* base != NULL indicates this is not a new base. */ + malloc_mutex_lock(tsdn, &base->mtx); + base_auto_thp_switch(tsdn, base); + if (base->auto_thp_switched) { + pages_huge(addr, block_size); + } + malloc_mutex_unlock(tsdn, &base->mtx); + } + } + *pind_last = sz_psz2ind(block_size); block->size = block_size; block->next = NULL; @@ -216,7 +314,7 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) { * called. */ malloc_mutex_unlock(tsdn, &base->mtx); - base_block_t *block = base_block_alloc(tsdn, extent_hooks, + base_block_t *block = base_block_alloc(tsdn, base, extent_hooks, base_ind_get(base), &base->pind_last, &base->extent_sn_next, size, alignment); malloc_mutex_lock(tsdn, &base->mtx); @@ -229,8 +327,16 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) { base->allocated += sizeof(base_block_t); base->resident += PAGE_CEILING(sizeof(base_block_t)); base->mapped += block->size; + if (metadata_thp_madvise() && + !(opt_metadata_thp == metadata_thp_auto + && !base->auto_thp_switched)) { + assert(base->n_thp > 0); + base->n_thp += HUGEPAGE_CEILING(sizeof(base_block_t)) >> + LG_HUGEPAGE; + } assert(base->allocated <= base->resident); assert(base->resident <= base->mapped); + assert(base->n_thp << LG_HUGEPAGE <= base->mapped); } return &block->extent; } @@ -244,7 +350,7 @@ base_t * base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { pszind_t pind_last = 0; size_t extent_sn_next = 0; - base_block_t *block = base_block_alloc(tsdn, extent_hooks, ind, + base_block_t *block = base_block_alloc(tsdn, NULL, extent_hooks, ind, &pind_last, &extent_sn_next, sizeof(base_t), QUANTUM); if (block == NULL) { return NULL; @@ -265,6 +371,7 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { base->pind_last = pind_last; base->extent_sn_next = extent_sn_next; base->blocks = block; + base->auto_thp_switched = false; for (szind_t i = 0; i < NSIZES; i++) { extent_heap_new(&base->avail[i]); } @@ -272,10 +379,14 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { base->allocated = sizeof(base_block_t); base->resident = PAGE_CEILING(sizeof(base_block_t)); base->mapped = block->size; + base->n_thp = (opt_metadata_thp == metadata_thp_always) && + metadata_thp_madvise() ? HUGEPAGE_CEILING(sizeof(base_block_t)) + >> LG_HUGEPAGE : 0; assert(base->allocated <= base->resident); assert(base->resident <= base->mapped); + assert(base->n_thp << LG_HUGEPAGE <= base->mapped); } - base_extent_bump_alloc_post(tsdn, base, &block->extent, gap_size, base, + base_extent_bump_alloc_post(base, &block->extent, gap_size, base, base_size); return base; @@ -332,7 +443,7 @@ base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment, goto label_return; } - ret = base_extent_bump_alloc(tsdn, base, extent, usize, alignment); + ret = base_extent_bump_alloc(base, extent, usize, alignment); if (esn != NULL) { *esn = extent_sn_get(extent); } @@ -368,7 +479,7 @@ base_alloc_extent(tsdn_t *tsdn, base_t *base) { void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident, - size_t *mapped) { + size_t *mapped, size_t *n_thp) { cassert(config_stats); malloc_mutex_lock(tsdn, &base->mtx); @@ -377,6 +488,7 @@ base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident, *allocated = base->allocated; *resident = base->resident; *mapped = base->mapped; + *n_thp = base->n_thp; malloc_mutex_unlock(tsdn, &base->mtx); } diff --git a/dep/jemalloc/src/bin.c b/dep/jemalloc/src/bin.c new file mode 100644 index 00000000000..0886bc4ea92 --- /dev/null +++ b/dep/jemalloc/src/bin.c @@ -0,0 +1,50 @@ +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/bin.h" +#include "jemalloc/internal/witness.h" + +const bin_info_t bin_infos[NBINS] = { +#define BIN_INFO_bin_yes(reg_size, slab_size, nregs) \ + {reg_size, slab_size, nregs, BITMAP_INFO_INITIALIZER(nregs)}, +#define BIN_INFO_bin_no(reg_size, slab_size, nregs) +#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, \ + lg_delta_lookup) \ + BIN_INFO_bin_##bin((1U<<lg_grp) + (ndelta<<lg_delta), \ + (pgs << LG_PAGE), (pgs << LG_PAGE) / ((1U<<lg_grp) + \ + (ndelta<<lg_delta))) + SIZE_CLASSES +#undef BIN_INFO_bin_yes +#undef BIN_INFO_bin_no +#undef SC +}; + +bool +bin_init(bin_t *bin) { + if (malloc_mutex_init(&bin->lock, "bin", WITNESS_RANK_BIN, + malloc_mutex_rank_exclusive)) { + return true; + } + bin->slabcur = NULL; + extent_heap_new(&bin->slabs_nonfull); + extent_list_init(&bin->slabs_full); + if (config_stats) { + memset(&bin->stats, 0, sizeof(bin_stats_t)); + } + return false; +} + +void +bin_prefork(tsdn_t *tsdn, bin_t *bin) { + malloc_mutex_prefork(tsdn, &bin->lock); +} + +void +bin_postfork_parent(tsdn_t *tsdn, bin_t *bin) { + malloc_mutex_postfork_parent(tsdn, &bin->lock); +} + +void +bin_postfork_child(tsdn_t *tsdn, bin_t *bin) { + malloc_mutex_postfork_child(tsdn, &bin->lock); +} diff --git a/dep/jemalloc/src/ctl.c b/dep/jemalloc/src/ctl.c index 36bc8fb5b75..1e713a3d104 100644 --- a/dep/jemalloc/src/ctl.c +++ b/dep/jemalloc/src/ctl.c @@ -57,6 +57,7 @@ static const ctl_named_node_t *n##_index(tsdn_t *tsdn, \ CTL_PROTO(version) CTL_PROTO(epoch) CTL_PROTO(background_thread) +CTL_PROTO(max_background_threads) CTL_PROTO(thread_tcache_enabled) CTL_PROTO(thread_tcache_flush) CTL_PROTO(thread_prof_name) @@ -75,16 +76,17 @@ CTL_PROTO(config_prof) CTL_PROTO(config_prof_libgcc) CTL_PROTO(config_prof_libunwind) CTL_PROTO(config_stats) -CTL_PROTO(config_thp) CTL_PROTO(config_utrace) CTL_PROTO(config_xmalloc) CTL_PROTO(opt_abort) CTL_PROTO(opt_abort_conf) +CTL_PROTO(opt_metadata_thp) CTL_PROTO(opt_retain) CTL_PROTO(opt_dss) CTL_PROTO(opt_narenas) CTL_PROTO(opt_percpu_arena) CTL_PROTO(opt_background_thread) +CTL_PROTO(opt_max_background_threads) CTL_PROTO(opt_dirty_decay_ms) CTL_PROTO(opt_muzzy_decay_ms) CTL_PROTO(opt_stats_print) @@ -94,6 +96,8 @@ CTL_PROTO(opt_zero) CTL_PROTO(opt_utrace) CTL_PROTO(opt_xmalloc) CTL_PROTO(opt_tcache) +CTL_PROTO(opt_thp) +CTL_PROTO(opt_lg_extent_max_active_fit) CTL_PROTO(opt_lg_tcache_max) CTL_PROTO(opt_prof) CTL_PROTO(opt_prof_prefix) @@ -117,6 +121,7 @@ CTL_PROTO(arena_i_dss) CTL_PROTO(arena_i_dirty_decay_ms) CTL_PROTO(arena_i_muzzy_decay_ms) CTL_PROTO(arena_i_extent_hooks) +CTL_PROTO(arena_i_retain_grow_limit) INDEX_PROTO(arena_i) CTL_PROTO(arenas_bin_i_size) CTL_PROTO(arenas_bin_i_nregs) @@ -134,6 +139,7 @@ CTL_PROTO(arenas_nbins) CTL_PROTO(arenas_nhbins) CTL_PROTO(arenas_nlextents) CTL_PROTO(arenas_create) +CTL_PROTO(arenas_lookup) CTL_PROTO(prof_thread_active_init) CTL_PROTO(prof_active) CTL_PROTO(prof_dump) @@ -182,6 +188,7 @@ CTL_PROTO(stats_arenas_i_muzzy_nmadvise) CTL_PROTO(stats_arenas_i_muzzy_purged) CTL_PROTO(stats_arenas_i_base) CTL_PROTO(stats_arenas_i_internal) +CTL_PROTO(stats_arenas_i_metadata_thp) CTL_PROTO(stats_arenas_i_tcache_bytes) CTL_PROTO(stats_arenas_i_resident) INDEX_PROTO(stats_arenas_i) @@ -191,6 +198,7 @@ CTL_PROTO(stats_background_thread_num_threads) CTL_PROTO(stats_background_thread_num_runs) CTL_PROTO(stats_background_thread_run_interval) CTL_PROTO(stats_metadata) +CTL_PROTO(stats_metadata_thp) CTL_PROTO(stats_resident) CTL_PROTO(stats_mapped) CTL_PROTO(stats_retained) @@ -266,7 +274,6 @@ static const ctl_named_node_t config_node[] = { {NAME("prof_libgcc"), CTL(config_prof_libgcc)}, {NAME("prof_libunwind"), CTL(config_prof_libunwind)}, {NAME("stats"), CTL(config_stats)}, - {NAME("thp"), CTL(config_thp)}, {NAME("utrace"), CTL(config_utrace)}, {NAME("xmalloc"), CTL(config_xmalloc)} }; @@ -274,11 +281,13 @@ static const ctl_named_node_t config_node[] = { static const ctl_named_node_t opt_node[] = { {NAME("abort"), CTL(opt_abort)}, {NAME("abort_conf"), CTL(opt_abort_conf)}, + {NAME("metadata_thp"), CTL(opt_metadata_thp)}, {NAME("retain"), CTL(opt_retain)}, {NAME("dss"), CTL(opt_dss)}, {NAME("narenas"), CTL(opt_narenas)}, {NAME("percpu_arena"), CTL(opt_percpu_arena)}, {NAME("background_thread"), CTL(opt_background_thread)}, + {NAME("max_background_threads"), CTL(opt_max_background_threads)}, {NAME("dirty_decay_ms"), CTL(opt_dirty_decay_ms)}, {NAME("muzzy_decay_ms"), CTL(opt_muzzy_decay_ms)}, {NAME("stats_print"), CTL(opt_stats_print)}, @@ -288,6 +297,8 @@ static const ctl_named_node_t opt_node[] = { {NAME("utrace"), CTL(opt_utrace)}, {NAME("xmalloc"), CTL(opt_xmalloc)}, {NAME("tcache"), CTL(opt_tcache)}, + {NAME("thp"), CTL(opt_thp)}, + {NAME("lg_extent_max_active_fit"), CTL(opt_lg_extent_max_active_fit)}, {NAME("lg_tcache_max"), CTL(opt_lg_tcache_max)}, {NAME("prof"), CTL(opt_prof)}, {NAME("prof_prefix"), CTL(opt_prof_prefix)}, @@ -316,7 +327,8 @@ static const ctl_named_node_t arena_i_node[] = { {NAME("dss"), CTL(arena_i_dss)}, {NAME("dirty_decay_ms"), CTL(arena_i_dirty_decay_ms)}, {NAME("muzzy_decay_ms"), CTL(arena_i_muzzy_decay_ms)}, - {NAME("extent_hooks"), CTL(arena_i_extent_hooks)} + {NAME("extent_hooks"), CTL(arena_i_extent_hooks)}, + {NAME("retain_grow_limit"), CTL(arena_i_retain_grow_limit)} }; static const ctl_named_node_t super_arena_i_node[] = { {NAME(""), CHILD(named, arena_i)} @@ -362,7 +374,8 @@ static const ctl_named_node_t arenas_node[] = { {NAME("bin"), CHILD(indexed, arenas_bin)}, {NAME("nlextents"), CTL(arenas_nlextents)}, {NAME("lextent"), CHILD(indexed, arenas_lextent)}, - {NAME("create"), CTL(arenas_create)} + {NAME("create"), CTL(arenas_create)}, + {NAME("lookup"), CTL(arenas_lookup)} }; static const ctl_named_node_t prof_node[] = { @@ -474,6 +487,7 @@ static const ctl_named_node_t stats_arenas_i_node[] = { {NAME("muzzy_purged"), CTL(stats_arenas_i_muzzy_purged)}, {NAME("base"), CTL(stats_arenas_i_base)}, {NAME("internal"), CTL(stats_arenas_i_internal)}, + {NAME("metadata_thp"), CTL(stats_arenas_i_metadata_thp)}, {NAME("tcache_bytes"), CTL(stats_arenas_i_tcache_bytes)}, {NAME("resident"), CTL(stats_arenas_i_resident)}, {NAME("small"), CHILD(named, stats_arenas_i_small)}, @@ -512,6 +526,7 @@ static const ctl_named_node_t stats_node[] = { {NAME("allocated"), CTL(stats_allocated)}, {NAME("active"), CTL(stats_active)}, {NAME("metadata"), CTL(stats_metadata)}, + {NAME("metadata_thp"), CTL(stats_metadata_thp)}, {NAME("resident"), CTL(stats_resident)}, {NAME("mapped"), CTL(stats_mapped)}, {NAME("retained"), CTL(stats_retained)}, @@ -525,6 +540,7 @@ static const ctl_named_node_t root_node[] = { {NAME("version"), CTL(version)}, {NAME("epoch"), CTL(epoch)}, {NAME("background_thread"), CTL(background_thread)}, + {NAME("max_background_threads"), CTL(max_background_threads)}, {NAME("thread"), CHILD(named, thread)}, {NAME("config"), CHILD(named, config)}, {NAME("opt"), CHILD(named, opt)}, @@ -550,7 +566,7 @@ static const ctl_named_node_t super_root_node[] = { * synchronized by the ctl mutex. */ static void -accum_arena_stats_u64(arena_stats_u64_t *dst, arena_stats_u64_t *src) { +ctl_accum_arena_stats_u64(arena_stats_u64_t *dst, arena_stats_u64_t *src) { #ifdef JEMALLOC_ATOMIC_U64 uint64_t cur_dst = atomic_load_u64(dst, ATOMIC_RELAXED); uint64_t cur_src = atomic_load_u64(src, ATOMIC_RELAXED); @@ -562,7 +578,7 @@ accum_arena_stats_u64(arena_stats_u64_t *dst, arena_stats_u64_t *src) { /* Likewise: with ctl mutex synchronization, reading is simple. */ static uint64_t -arena_stats_read_u64(arena_stats_u64_t *p) { +ctl_arena_stats_read_u64(arena_stats_u64_t *p) { #ifdef JEMALLOC_ATOMIC_U64 return atomic_load_u64(p, ATOMIC_RELAXED); #else @@ -570,7 +586,8 @@ arena_stats_read_u64(arena_stats_u64_t *p) { #endif } -static void accum_atomic_zu(atomic_zu_t *dst, atomic_zu_t *src) { +static void +accum_atomic_zu(atomic_zu_t *dst, atomic_zu_t *src) { size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED); size_t cur_src = atomic_load_zu(src, ATOMIC_RELAXED); atomic_store_zu(dst, cur_dst + cur_src, ATOMIC_RELAXED); @@ -680,9 +697,9 @@ ctl_arena_clear(ctl_arena_t *ctl_arena) { ctl_arena->astats->ndalloc_small = 0; ctl_arena->astats->nrequests_small = 0; memset(ctl_arena->astats->bstats, 0, NBINS * - sizeof(malloc_bin_stats_t)); + sizeof(bin_stats_t)); memset(ctl_arena->astats->lstats, 0, (NSIZES - NBINS) * - sizeof(malloc_large_stats_t)); + sizeof(arena_stats_large_t)); } } @@ -745,18 +762,18 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena, &astats->astats.retained); } - accum_arena_stats_u64(&sdstats->astats.decay_dirty.npurge, + ctl_accum_arena_stats_u64(&sdstats->astats.decay_dirty.npurge, &astats->astats.decay_dirty.npurge); - accum_arena_stats_u64(&sdstats->astats.decay_dirty.nmadvise, + ctl_accum_arena_stats_u64(&sdstats->astats.decay_dirty.nmadvise, &astats->astats.decay_dirty.nmadvise); - accum_arena_stats_u64(&sdstats->astats.decay_dirty.purged, + ctl_accum_arena_stats_u64(&sdstats->astats.decay_dirty.purged, &astats->astats.decay_dirty.purged); - accum_arena_stats_u64(&sdstats->astats.decay_muzzy.npurge, + ctl_accum_arena_stats_u64(&sdstats->astats.decay_muzzy.npurge, &astats->astats.decay_muzzy.npurge); - accum_arena_stats_u64(&sdstats->astats.decay_muzzy.nmadvise, + ctl_accum_arena_stats_u64(&sdstats->astats.decay_muzzy.nmadvise, &astats->astats.decay_muzzy.nmadvise); - accum_arena_stats_u64(&sdstats->astats.decay_muzzy.purged, + ctl_accum_arena_stats_u64(&sdstats->astats.decay_muzzy.purged, &astats->astats.decay_muzzy.purged); #define OP(mtx) malloc_mutex_prof_merge( \ @@ -773,6 +790,8 @@ MUTEX_PROF_ARENA_MUTEXES &astats->astats.internal); accum_atomic_zu(&sdstats->astats.resident, &astats->astats.resident); + accum_atomic_zu(&sdstats->astats.metadata_thp, + &astats->astats.metadata_thp); } else { assert(atomic_load_zu( &astats->astats.internal, ATOMIC_RELAXED) == 0); @@ -794,11 +813,11 @@ MUTEX_PROF_ARENA_MUTEXES assert(atomic_load_zu(&astats->astats.allocated_large, ATOMIC_RELAXED) == 0); } - accum_arena_stats_u64(&sdstats->astats.nmalloc_large, + ctl_accum_arena_stats_u64(&sdstats->astats.nmalloc_large, &astats->astats.nmalloc_large); - accum_arena_stats_u64(&sdstats->astats.ndalloc_large, + ctl_accum_arena_stats_u64(&sdstats->astats.ndalloc_large, &astats->astats.ndalloc_large); - accum_arena_stats_u64(&sdstats->astats.nrequests_large, + ctl_accum_arena_stats_u64(&sdstats->astats.nrequests_large, &astats->astats.nrequests_large); accum_atomic_zu(&sdstats->astats.tcache_bytes, @@ -835,11 +854,11 @@ MUTEX_PROF_ARENA_MUTEXES } for (i = 0; i < NSIZES - NBINS; i++) { - accum_arena_stats_u64(&sdstats->lstats[i].nmalloc, + ctl_accum_arena_stats_u64(&sdstats->lstats[i].nmalloc, &astats->lstats[i].nmalloc); - accum_arena_stats_u64(&sdstats->lstats[i].ndalloc, + ctl_accum_arena_stats_u64(&sdstats->lstats[i].ndalloc, &astats->lstats[i].ndalloc); - accum_arena_stats_u64(&sdstats->lstats[i].nrequests, + ctl_accum_arena_stats_u64(&sdstats->lstats[i].nrequests, &astats->lstats[i].nrequests); if (!destroyed) { sdstats->lstats[i].curlextents += @@ -938,6 +957,8 @@ ctl_refresh(tsdn_t *tsdn) { &ctl_sarena->astats->astats.base, ATOMIC_RELAXED) + atomic_load_zu(&ctl_sarena->astats->astats.internal, ATOMIC_RELAXED); + ctl_stats->metadata_thp = atomic_load_zu( + &ctl_sarena->astats->astats.metadata_thp, ATOMIC_RELAXED); ctl_stats->resident = atomic_load_zu( &ctl_sarena->astats->astats.resident, ATOMIC_RELAXED); ctl_stats->mapped = atomic_load_zu( @@ -1549,6 +1570,71 @@ label_return: return ret; } +static int +max_background_threads_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen) { + int ret; + size_t oldval; + + if (!have_background_thread) { + return ENOENT; + } + background_thread_ctl_init(tsd_tsdn(tsd)); + + malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); + malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock); + if (newp == NULL) { + oldval = max_background_threads; + READ(oldval, size_t); + } else { + if (newlen != sizeof(size_t)) { + ret = EINVAL; + goto label_return; + } + oldval = max_background_threads; + READ(oldval, size_t); + + size_t newval = *(size_t *)newp; + if (newval == oldval) { + ret = 0; + goto label_return; + } + if (newval > opt_max_background_threads) { + ret = EINVAL; + goto label_return; + } + + if (background_thread_enabled()) { + if (!can_enable_background_thread) { + malloc_printf("<jemalloc>: Error in dlsym(" + "RTLD_NEXT, \"pthread_create\"). Cannot " + "enable background_thread\n"); + ret = EFAULT; + goto label_return; + } + background_thread_enabled_set(tsd_tsdn(tsd), false); + if (background_threads_disable(tsd)) { + ret = EFAULT; + goto label_return; + } + max_background_threads = newval; + background_thread_enabled_set(tsd_tsdn(tsd), true); + if (background_threads_enable(tsd)) { + ret = EFAULT; + goto label_return; + } + } else { + max_background_threads = newval; + } + } + ret = 0; +label_return: + malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock); + malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); + + return ret; +} + /******************************************************************************/ CTL_RO_CONFIG_GEN(config_cache_oblivious, bool) @@ -1560,7 +1646,6 @@ CTL_RO_CONFIG_GEN(config_prof, bool) CTL_RO_CONFIG_GEN(config_prof_libgcc, bool) CTL_RO_CONFIG_GEN(config_prof_libunwind, bool) CTL_RO_CONFIG_GEN(config_stats, bool) -CTL_RO_CONFIG_GEN(config_thp, bool) CTL_RO_CONFIG_GEN(config_utrace, bool) CTL_RO_CONFIG_GEN(config_xmalloc, bool) @@ -1568,12 +1653,15 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool) CTL_RO_NL_GEN(opt_abort, opt_abort, bool) CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool) +CTL_RO_NL_GEN(opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp], + const char *) CTL_RO_NL_GEN(opt_retain, opt_retain, bool) CTL_RO_NL_GEN(opt_dss, opt_dss, const char *) CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned) CTL_RO_NL_GEN(opt_percpu_arena, percpu_arena_mode_names[opt_percpu_arena], const char *) CTL_RO_NL_GEN(opt_background_thread, opt_background_thread, bool) +CTL_RO_NL_GEN(opt_max_background_threads, opt_max_background_threads, size_t) CTL_RO_NL_GEN(opt_dirty_decay_ms, opt_dirty_decay_ms, ssize_t) CTL_RO_NL_GEN(opt_muzzy_decay_ms, opt_muzzy_decay_ms, ssize_t) CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) @@ -1583,6 +1671,9 @@ CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool) CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool) CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool) CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool) +CTL_RO_NL_GEN(opt_thp, thp_mode_names[opt_thp], const char *) +CTL_RO_NL_GEN(opt_lg_extent_max_active_fit, opt_lg_extent_max_active_fit, + size_t) CTL_RO_NL_GEN(opt_lg_tcache_max, opt_lg_tcache_max, ssize_t) CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *) @@ -2162,20 +2253,41 @@ arena_i_extent_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); MIB_UNSIGNED(arena_ind, 1); - if (arena_ind < narenas_total_get() && (arena = - arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) { - if (newp != NULL) { - extent_hooks_t *old_extent_hooks; - extent_hooks_t *new_extent_hooks - JEMALLOC_CC_SILENCE_INIT(NULL); - WRITE(new_extent_hooks, extent_hooks_t *); - old_extent_hooks = extent_hooks_set(tsd, arena, - new_extent_hooks); + if (arena_ind < narenas_total_get()) { + extent_hooks_t *old_extent_hooks; + arena = arena_get(tsd_tsdn(tsd), arena_ind, false); + if (arena == NULL) { + if (arena_ind >= narenas_auto) { + ret = EFAULT; + goto label_return; + } + old_extent_hooks = + (extent_hooks_t *)&extent_hooks_default; READ(old_extent_hooks, extent_hooks_t *); + if (newp != NULL) { + /* Initialize a new arena as a side effect. */ + extent_hooks_t *new_extent_hooks + JEMALLOC_CC_SILENCE_INIT(NULL); + WRITE(new_extent_hooks, extent_hooks_t *); + arena = arena_init(tsd_tsdn(tsd), arena_ind, + new_extent_hooks); + if (arena == NULL) { + ret = EFAULT; + goto label_return; + } + } } else { - extent_hooks_t *old_extent_hooks = - extent_hooks_get(arena); - READ(old_extent_hooks, extent_hooks_t *); + if (newp != NULL) { + extent_hooks_t *new_extent_hooks + JEMALLOC_CC_SILENCE_INIT(NULL); + WRITE(new_extent_hooks, extent_hooks_t *); + old_extent_hooks = extent_hooks_set(tsd, arena, + new_extent_hooks); + READ(old_extent_hooks, extent_hooks_t *); + } else { + old_extent_hooks = extent_hooks_get(arena); + READ(old_extent_hooks, extent_hooks_t *); + } } } else { ret = EFAULT; @@ -2187,6 +2299,42 @@ label_return: return ret; } +static int +arena_i_retain_grow_limit_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen) { + int ret; + unsigned arena_ind; + arena_t *arena; + + if (!opt_retain) { + /* Only relevant when retain is enabled. */ + return ENOENT; + } + + malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); + MIB_UNSIGNED(arena_ind, 1); + if (arena_ind < narenas_total_get() && (arena = + arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) { + size_t old_limit, new_limit; + if (newp != NULL) { + WRITE(new_limit, size_t); + } + bool err = arena_retain_grow_limit_get_set(tsd, arena, + &old_limit, newp != NULL ? &new_limit : NULL); + if (!err) { + READ(old_limit, size_t); + ret = 0; + } else { + ret = EFAULT; + } + } else { + ret = EFAULT; + } +label_return: + malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); + return ret; +} + static const ctl_named_node_t * arena_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) { const ctl_named_node_t *ret; @@ -2248,7 +2396,7 @@ arenas_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen, ret = EINVAL; goto label_return; } - if (dirty ? arena_dirty_decay_ms_default_set(*(ssize_t *)newp) + if (dirty ? arena_dirty_decay_ms_default_set(*(ssize_t *)newp) : arena_muzzy_decay_ms_default_set(*(ssize_t *)newp)) { ret = EFAULT; goto label_return; @@ -2279,9 +2427,9 @@ CTL_RO_NL_GEN(arenas_page, PAGE, size_t) CTL_RO_NL_GEN(arenas_tcache_max, tcache_maxclass, size_t) CTL_RO_NL_GEN(arenas_nbins, NBINS, unsigned) CTL_RO_NL_GEN(arenas_nhbins, nhbins, unsigned) -CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t) -CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t) -CTL_RO_NL_GEN(arenas_bin_i_slab_size, arena_bin_info[mib[2]].slab_size, size_t) +CTL_RO_NL_GEN(arenas_bin_i_size, bin_infos[mib[2]].reg_size, size_t) +CTL_RO_NL_GEN(arenas_bin_i_nregs, bin_infos[mib[2]].nregs, uint32_t) +CTL_RO_NL_GEN(arenas_bin_i_slab_size, bin_infos[mib[2]].slab_size, size_t) static const ctl_named_node_t * arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) { if (i > NBINS) { @@ -2325,6 +2473,36 @@ label_return: return ret; } +static int +arenas_lookup_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) { + int ret; + unsigned arena_ind; + void *ptr; + extent_t *extent; + arena_t *arena; + + ptr = NULL; + ret = EINVAL; + malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); + WRITE(ptr, void *); + extent = iealloc(tsd_tsdn(tsd), ptr); + if (extent == NULL) + goto label_return; + + arena = extent_arena_get(extent); + if (arena == NULL) + goto label_return; + + arena_ind = arena_ind_get(arena); + READ(arena_ind, unsigned); + + ret = 0; +label_return: + malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); + return ret; +} + /******************************************************************************/ static int @@ -2460,6 +2638,7 @@ CTL_RO_NL_CGEN(config_prof, lg_prof_sample, lg_prof_sample, size_t) CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats->allocated, size_t) CTL_RO_CGEN(config_stats, stats_active, ctl_stats->active, size_t) CTL_RO_CGEN(config_stats, stats_metadata, ctl_stats->metadata, size_t) +CTL_RO_CGEN(config_stats, stats_metadata_thp, ctl_stats->metadata_thp, size_t) CTL_RO_CGEN(config_stats, stats_resident, ctl_stats->resident, size_t) CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats->mapped, size_t) CTL_RO_CGEN(config_stats, stats_retained, ctl_stats->retained, size_t) @@ -2490,24 +2669,24 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_retained, size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_npurge, - arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.decay_dirty.npurge), - uint64_t) + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->astats.decay_dirty.npurge), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_nmadvise, - arena_stats_read_u64( + ctl_arena_stats_read_u64( &arenas_i(mib[2])->astats->astats.decay_dirty.nmadvise), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_purged, - arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.decay_dirty.purged), - uint64_t) + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->astats.decay_dirty.purged), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_npurge, - arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.decay_muzzy.npurge), - uint64_t) + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->astats.decay_muzzy.npurge), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_nmadvise, - arena_stats_read_u64( + ctl_arena_stats_read_u64( &arenas_i(mib[2])->astats->astats.decay_muzzy.nmadvise), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_purged, - arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.decay_muzzy.purged), - uint64_t) + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->astats.decay_muzzy.purged), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_base, atomic_load_zu(&arenas_i(mib[2])->astats->astats.base, ATOMIC_RELAXED), @@ -2515,6 +2694,9 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_base, CTL_RO_CGEN(config_stats, stats_arenas_i_internal, atomic_load_zu(&arenas_i(mib[2])->astats->astats.internal, ATOMIC_RELAXED), size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_thp, + atomic_load_zu(&arenas_i(mib[2])->astats->astats.metadata_thp, + ATOMIC_RELAXED), size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_tcache_bytes, atomic_load_zu(&arenas_i(mib[2])->astats->astats.tcache_bytes, ATOMIC_RELAXED), size_t) @@ -2534,14 +2716,17 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_large_allocated, atomic_load_zu(&arenas_i(mib[2])->astats->astats.allocated_large, ATOMIC_RELAXED), size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_large_nmalloc, - arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.nmalloc_large), - uint64_t) + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc, - arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.ndalloc_large), - uint64_t) + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->astats.ndalloc_large), uint64_t) +/* + * Note: "nmalloc" here instead of "nrequests" in the read. This is intentional. + */ CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests, - arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.nmalloc_large), - uint64_t) /* Intentional. */ + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t) /* Intentional. */ /* Lock profiling related APIs below. */ #define RO_MUTEX_CTL_GEN(n, l) \ @@ -2622,7 +2807,7 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, MUTEX_PROF_RESET(arena->base->mtx); for (szind_t i = 0; i < NBINS; i++) { - arena_bin_t *bin = &arena->bins[i]; + bin_t *bin = &arena->bins[i]; MUTEX_PROF_RESET(bin->lock); } } @@ -2659,14 +2844,14 @@ stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, } CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nmalloc, - arena_stats_read_u64(&arenas_i(mib[2])->astats->lstats[mib[4]].nmalloc), - uint64_t) + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->lstats[mib[4]].nmalloc), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_ndalloc, - arena_stats_read_u64(&arenas_i(mib[2])->astats->lstats[mib[4]].ndalloc), - uint64_t) + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->lstats[mib[4]].ndalloc), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nrequests, - arena_stats_read_u64(&arenas_i(mib[2])->astats->lstats[mib[4]].nrequests), - uint64_t) + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->lstats[mib[4]].nrequests), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_curlextents, arenas_i(mib[2])->astats->lstats[mib[4]].curlextents, size_t) diff --git a/dep/jemalloc/src/div.c b/dep/jemalloc/src/div.c new file mode 100644 index 00000000000..808892a133f --- /dev/null +++ b/dep/jemalloc/src/div.c @@ -0,0 +1,55 @@ +#include "jemalloc/internal/jemalloc_preamble.h" + +#include "jemalloc/internal/div.h" + +#include "jemalloc/internal/assert.h" + +/* + * Suppose we have n = q * d, all integers. We know n and d, and want q = n / d. + * + * For any k, we have (here, all division is exact; not C-style rounding): + * floor(ceil(2^k / d) * n / 2^k) = floor((2^k + r) / d * n / 2^k), where + * r = (-2^k) mod d. + * + * Expanding this out: + * ... = floor(2^k / d * n / 2^k + r / d * n / 2^k) + * = floor(n / d + (r / d) * (n / 2^k)). + * + * The fractional part of n / d is 0 (because of the assumption that d divides n + * exactly), so we have: + * ... = n / d + floor((r / d) * (n / 2^k)) + * + * So that our initial expression is equal to the quantity we seek, so long as + * (r / d) * (n / 2^k) < 1. + * + * r is a remainder mod d, so r < d and r / d < 1 always. We can make + * n / 2 ^ k < 1 by setting k = 32. This gets us a value of magic that works. + */ + +void +div_init(div_info_t *div_info, size_t d) { + /* Nonsensical. */ + assert(d != 0); + /* + * This would make the value of magic too high to fit into a uint32_t + * (we would want magic = 2^32 exactly). This would mess with code gen + * on 32-bit machines. + */ + assert(d != 1); + + uint64_t two_to_k = ((uint64_t)1 << 32); + uint32_t magic = (uint32_t)(two_to_k / d); + + /* + * We want magic = ceil(2^k / d), but C gives us floor. We have to + * increment it unless the result was exact (i.e. unless d is a power of + * two). + */ + if (two_to_k % d != 0) { + magic++; + } + div_info->magic = magic; +#ifdef JEMALLOC_DEBUG + div_info->d = d; +#endif +} diff --git a/dep/jemalloc/src/extent.c b/dep/jemalloc/src/extent.c index fa45c84d34f..09d6d771817 100644 --- a/dep/jemalloc/src/extent.c +++ b/dep/jemalloc/src/extent.c @@ -17,6 +17,8 @@ rtree_t extents_rtree; /* Keyed by the address of the extent_t being protected. */ mutex_pool_t extent_mutex_pool; +size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT; + static const bitmap_info_t extents_bitmap_info = BITMAP_INFO_INITIALIZER(NPSIZES+1); @@ -117,7 +119,7 @@ static void extent_record(tsdn_t *tsdn, arena_t *arena, /******************************************************************************/ -rb_gen(UNUSED, extent_avail_, extent_tree_t, extent_t, rb_link, +ph_gen(UNUSED, extent_avail_, extent_tree_t, extent_t, ph_link, extent_esnead_comp) typedef enum { @@ -304,8 +306,7 @@ extents_npages_get(extents_t *extents) { } static void -extents_insert_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent, - bool preserve_lru) { +extents_insert_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent) { malloc_mutex_assert_owner(tsdn, &extents->mtx); assert(extent_state_get(extent) == extents->state); @@ -317,9 +318,7 @@ extents_insert_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent, (size_t)pind); } extent_heap_insert(&extents->heaps[pind], extent); - if (!preserve_lru) { - extent_list_append(&extents->lru, extent); - } + extent_list_append(&extents->lru, extent); size_t npages = size >> LG_PAGE; /* * All modifications to npages hold the mutex (as asserted above), so we @@ -333,8 +332,7 @@ extents_insert_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent, } static void -extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent, - bool preserve_lru) { +extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent) { malloc_mutex_assert_owner(tsdn, &extents->mtx); assert(extent_state_get(extent) == extents->state); @@ -346,9 +344,7 @@ extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent, bitmap_set(extents->bitmap, &extents_bitmap_info, (size_t)pind); } - if (!preserve_lru) { - extent_list_remove(&extents->lru, extent); - } + extent_list_remove(&extents->lru, extent); size_t npages = size >> LG_PAGE; /* * As in extents_insert_locked, we hold extents->mtx and so don't need @@ -361,6 +357,43 @@ extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent, cur_extents_npages - (size >> LG_PAGE), ATOMIC_RELAXED); } +/* + * Find an extent with size [min_size, max_size) to satisfy the alignment + * requirement. For each size, try only the first extent in the heap. + */ +static extent_t * +extents_fit_alignment(extents_t *extents, size_t min_size, size_t max_size, + size_t alignment) { + pszind_t pind = sz_psz2ind(extent_size_quantize_ceil(min_size)); + pszind_t pind_max = sz_psz2ind(extent_size_quantize_ceil(max_size)); + + for (pszind_t i = (pszind_t)bitmap_ffu(extents->bitmap, + &extents_bitmap_info, (size_t)pind); i < pind_max; i = + (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info, + (size_t)i+1)) { + assert(i < NPSIZES); + assert(!extent_heap_empty(&extents->heaps[i])); + extent_t *extent = extent_heap_first(&extents->heaps[i]); + uintptr_t base = (uintptr_t)extent_base_get(extent); + size_t candidate_size = extent_size_get(extent); + assert(candidate_size >= min_size); + + uintptr_t next_align = ALIGNMENT_CEILING((uintptr_t)base, + PAGE_CEILING(alignment)); + if (base > next_align || base + candidate_size <= next_align) { + /* Overflow or not crossing the next alignment. */ + continue; + } + + size_t leadsize = next_align - base; + if (candidate_size - leadsize >= min_size) { + return extent; + } + } + + return NULL; +} + /* Do any-best-fit extent selection, i.e. select any extent that best fits. */ static extent_t * extents_best_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents, @@ -369,8 +402,15 @@ extents_best_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents, pszind_t i = (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info, (size_t)pind); if (i < NPSIZES+1) { + /* + * In order to reduce fragmentation, avoid reusing and splitting + * large extents for much smaller sizes. + */ + if ((sz_pind2sz(i) >> opt_lg_extent_max_active_fit) > size) { + return NULL; + } assert(!extent_heap_empty(&extents->heaps[i])); - extent_t *extent = extent_heap_any(&extents->heaps[i]); + extent_t *extent = extent_heap_first(&extents->heaps[i]); assert(extent_size_get(extent) >= size); return extent; } @@ -415,12 +455,30 @@ extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents, */ static extent_t * extents_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents, - size_t size) { + size_t esize, size_t alignment) { malloc_mutex_assert_owner(tsdn, &extents->mtx); - return extents->delay_coalesce ? extents_best_fit_locked(tsdn, arena, - extents, size) : extents_first_fit_locked(tsdn, arena, extents, - size); + size_t max_size = esize + PAGE_CEILING(alignment) - PAGE; + /* Beware size_t wrap-around. */ + if (max_size < esize) { + return NULL; + } + + extent_t *extent = extents->delay_coalesce ? + extents_best_fit_locked(tsdn, arena, extents, max_size) : + extents_first_fit_locked(tsdn, arena, extents, max_size); + + if (alignment > PAGE && extent == NULL) { + /* + * max_size guarantees the alignment requirement but is rather + * pessimistic. Next we try to satisfy the aligned allocation + * with sizes in [esize, max_size). + */ + extent = extents_fit_alignment(extents, esize, max_size, + alignment); + } + + return extent; } static bool @@ -436,7 +494,7 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena, if (!coalesced) { return true; } - extents_insert_locked(tsdn, extents, extent, true); + extents_insert_locked(tsdn, extents, extent); return false; } @@ -449,8 +507,10 @@ extents_alloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); - return extent_recycle(tsdn, arena, r_extent_hooks, extents, new_addr, - size, pad, alignment, slab, szind, zero, commit, false); + extent_t *extent = extent_recycle(tsdn, arena, r_extent_hooks, extents, + new_addr, size, pad, alignment, slab, szind, zero, commit, false); + assert(extent == NULL || extent_dumpable_get(extent)); + return extent; } void @@ -458,6 +518,7 @@ extents_dalloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, extents_t *extents, extent_t *extent) { assert(extent_base_get(extent) != NULL); assert(extent_size_get(extent) != 0); + assert(extent_dumpable_get(extent)); witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); @@ -487,14 +548,13 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, goto label_return; } /* Check the eviction limit. */ - size_t npages = extent_size_get(extent) >> LG_PAGE; size_t extents_npages = atomic_load_zu(&extents->npages, ATOMIC_RELAXED); - if (extents_npages - npages < npages_min) { + if (extents_npages <= npages_min) { extent = NULL; goto label_return; } - extents_remove_locked(tsdn, extents, extent, false); + extents_remove_locked(tsdn, extents, extent); if (!extents->delay_coalesce) { break; } @@ -567,29 +627,29 @@ extents_postfork_child(tsdn_t *tsdn, extents_t *extents) { static void extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents, - extent_t *extent, bool preserve_lru) { + extent_t *extent) { assert(extent_arena_get(extent) == arena); assert(extent_state_get(extent) == extent_state_active); extent_state_set(extent, extents_state_get(extents)); - extents_insert_locked(tsdn, extents, extent, preserve_lru); + extents_insert_locked(tsdn, extents, extent); } static void extent_deactivate(tsdn_t *tsdn, arena_t *arena, extents_t *extents, - extent_t *extent, bool preserve_lru) { + extent_t *extent) { malloc_mutex_lock(tsdn, &extents->mtx); - extent_deactivate_locked(tsdn, arena, extents, extent, preserve_lru); + extent_deactivate_locked(tsdn, arena, extents, extent); malloc_mutex_unlock(tsdn, &extents->mtx); } static void extent_activate_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents, - extent_t *extent, bool preserve_lru) { + extent_t *extent) { assert(extent_arena_get(extent) == arena); assert(extent_state_get(extent) == extents_state_get(extents)); - extents_remove_locked(tsdn, extents, extent, preserve_lru); + extents_remove_locked(tsdn, extents, extent); extent_state_set(extent, extent_state_active); } @@ -723,6 +783,13 @@ extent_reregister(tsdn_t *tsdn, extent_t *extent) { assert(!err); } +/* + * Removes all pointers to the given extent from the global rtree indices for + * its interior. This is relevant for slab extents, for which we need to do + * metadata lookups at places other than the head of the extent. We deregister + * on the interior, then, when an extent moves from being an active slab to an + * inactive state. + */ static void extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, extent_t *extent) { @@ -737,8 +804,11 @@ extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, } } +/* + * Removes all pointers to the given extent from the global rtree. + */ static void -extent_deregister(tsdn_t *tsdn, extent_t *extent) { +extent_deregister_impl(tsdn_t *tsdn, extent_t *extent, bool gdump) { rtree_ctx_t rtree_ctx_fallback; rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback); rtree_leaf_elm_t *elm_a, *elm_b; @@ -755,16 +825,30 @@ extent_deregister(tsdn_t *tsdn, extent_t *extent) { extent_unlock(tsdn, extent); - if (config_prof) { + if (config_prof && gdump) { extent_gdump_sub(tsdn, extent); } } +static void +extent_deregister(tsdn_t *tsdn, extent_t *extent) { + extent_deregister_impl(tsdn, extent, true); +} + +static void +extent_deregister_no_gdump_sub(tsdn_t *tsdn, extent_t *extent) { + extent_deregister_impl(tsdn, extent, false); +} + +/* + * Tries to find and remove an extent from extents that can be used for the + * given allocation request. + */ static extent_t * extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents, void *new_addr, size_t size, size_t pad, size_t alignment, bool slab, - bool *zero, bool *commit, bool growing_retained) { + bool growing_retained) { witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, growing_retained ? 1 : 0); assert(alignment > 0); @@ -786,11 +870,6 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, } size_t esize = size + pad; - size_t alloc_size = esize + PAGE_CEILING(alignment) - PAGE; - /* Beware size_t wrap-around. */ - if (alloc_size < esize) { - return NULL; - } malloc_mutex_lock(tsdn, &extents->mtx); extent_hooks_assure_initialized(arena, r_extent_hooks); extent_t *extent; @@ -812,86 +891,172 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, extent_unlock(tsdn, unlock_extent); } } else { - extent = extents_fit_locked(tsdn, arena, extents, alloc_size); + extent = extents_fit_locked(tsdn, arena, extents, esize, + alignment); } if (extent == NULL) { malloc_mutex_unlock(tsdn, &extents->mtx); return NULL; } - extent_activate_locked(tsdn, arena, extents, extent, false); + extent_activate_locked(tsdn, arena, extents, extent); malloc_mutex_unlock(tsdn, &extents->mtx); - if (extent_zeroed_get(extent)) { - *zero = true; - } - if (extent_committed_get(extent)) { - *commit = true; - } - return extent; } -static extent_t * -extent_recycle_split(tsdn_t *tsdn, arena_t *arena, - extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents, +/* + * Given an allocation request and an extent guaranteed to be able to satisfy + * it, this splits off lead and trail extents, leaving extent pointing to an + * extent satisfying the allocation. + * This function doesn't put lead or trail into any extents_t; it's the caller's + * job to ensure that they can be reused. + */ +typedef enum { + /* + * Split successfully. lead, extent, and trail, are modified to extents + * describing the ranges before, in, and after the given allocation. + */ + extent_split_interior_ok, + /* + * The extent can't satisfy the given allocation request. None of the + * input extent_t *s are touched. + */ + extent_split_interior_cant_alloc, + /* + * In a potentially invalid state. Must leak (if *to_leak is non-NULL), + * and salvage what's still salvageable (if *to_salvage is non-NULL). + * None of lead, extent, or trail are valid. + */ + extent_split_interior_error +} extent_split_interior_result_t; + +static extent_split_interior_result_t +extent_split_interior(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, + /* The result of splitting, in case of success. */ + extent_t **extent, extent_t **lead, extent_t **trail, + /* The mess to clean up, in case of error. */ + extent_t **to_leak, extent_t **to_salvage, void *new_addr, size_t size, size_t pad, size_t alignment, bool slab, - szind_t szind, extent_t *extent, bool growing_retained) { + szind_t szind, bool growing_retained) { size_t esize = size + pad; - size_t leadsize = ALIGNMENT_CEILING((uintptr_t)extent_base_get(extent), - PAGE_CEILING(alignment)) - (uintptr_t)extent_base_get(extent); + size_t leadsize = ALIGNMENT_CEILING((uintptr_t)extent_base_get(*extent), + PAGE_CEILING(alignment)) - (uintptr_t)extent_base_get(*extent); assert(new_addr == NULL || leadsize == 0); - assert(extent_size_get(extent) >= leadsize + esize); - size_t trailsize = extent_size_get(extent) - leadsize - esize; + if (extent_size_get(*extent) < leadsize + esize) { + return extent_split_interior_cant_alloc; + } + size_t trailsize = extent_size_get(*extent) - leadsize - esize; + + *lead = NULL; + *trail = NULL; + *to_leak = NULL; + *to_salvage = NULL; /* Split the lead. */ if (leadsize != 0) { - extent_t *lead = extent; - extent = extent_split_impl(tsdn, arena, r_extent_hooks, - lead, leadsize, NSIZES, false, esize + trailsize, szind, + *lead = *extent; + *extent = extent_split_impl(tsdn, arena, r_extent_hooks, + *lead, leadsize, NSIZES, false, esize + trailsize, szind, slab, growing_retained); - if (extent == NULL) { - extent_deregister(tsdn, lead); - extents_leak(tsdn, arena, r_extent_hooks, extents, - lead, growing_retained); - return NULL; + if (*extent == NULL) { + *to_leak = *lead; + *lead = NULL; + return extent_split_interior_error; } - extent_deactivate(tsdn, arena, extents, lead, false); } /* Split the trail. */ if (trailsize != 0) { - extent_t *trail = extent_split_impl(tsdn, arena, - r_extent_hooks, extent, esize, szind, slab, trailsize, - NSIZES, false, growing_retained); - if (trail == NULL) { - extent_deregister(tsdn, extent); - extents_leak(tsdn, arena, r_extent_hooks, extents, - extent, growing_retained); - return NULL; + *trail = extent_split_impl(tsdn, arena, r_extent_hooks, *extent, + esize, szind, slab, trailsize, NSIZES, false, + growing_retained); + if (*trail == NULL) { + *to_leak = *extent; + *to_salvage = *lead; + *lead = NULL; + *extent = NULL; + return extent_split_interior_error; } - extent_deactivate(tsdn, arena, extents, trail, false); - } else if (leadsize == 0) { + } + + if (leadsize == 0 && trailsize == 0) { /* * Splitting causes szind to be set as a side effect, but no * splitting occurred. */ - extent_szind_set(extent, szind); + extent_szind_set(*extent, szind); if (szind != NSIZES) { rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, - (uintptr_t)extent_addr_get(extent), szind, slab); - if (slab && extent_size_get(extent) > PAGE) { + (uintptr_t)extent_addr_get(*extent), szind, slab); + if (slab && extent_size_get(*extent) > PAGE) { rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, - (uintptr_t)extent_past_get(extent) - + (uintptr_t)extent_past_get(*extent) - (uintptr_t)PAGE, szind, slab); } } } - return extent; + return extent_split_interior_ok; } +/* + * This fulfills the indicated allocation request out of the given extent (which + * the caller should have ensured was big enough). If there's any unused space + * before or after the resulting allocation, that space is given its own extent + * and put back into extents. + */ +static extent_t * +extent_recycle_split(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents, + void *new_addr, size_t size, size_t pad, size_t alignment, bool slab, + szind_t szind, extent_t *extent, bool growing_retained) { + extent_t *lead; + extent_t *trail; + extent_t *to_leak; + extent_t *to_salvage; + + extent_split_interior_result_t result = extent_split_interior( + tsdn, arena, r_extent_hooks, rtree_ctx, &extent, &lead, &trail, + &to_leak, &to_salvage, new_addr, size, pad, alignment, slab, szind, + growing_retained); + + if (result == extent_split_interior_ok) { + if (lead != NULL) { + extent_deactivate(tsdn, arena, extents, lead); + } + if (trail != NULL) { + extent_deactivate(tsdn, arena, extents, trail); + } + return extent; + } else { + /* + * We should have picked an extent that was large enough to + * fulfill our allocation request. + */ + assert(result == extent_split_interior_error); + if (to_salvage != NULL) { + extent_deregister(tsdn, to_salvage); + } + if (to_leak != NULL) { + void *leak = extent_base_get(to_leak); + extent_deregister_no_gdump_sub(tsdn, to_leak); + extents_leak(tsdn, arena, r_extent_hooks, extents, + to_leak, growing_retained); + assert(extent_lock_from_addr(tsdn, rtree_ctx, leak) + == NULL); + } + return NULL; + } + unreachable(); +} + +/* + * Tries to satisfy the given allocation request by reusing one of the extents + * in the given extents_t. + */ static extent_t * extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, extents_t *extents, void *new_addr, size_t size, size_t pad, @@ -906,16 +1071,12 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, rtree_ctx_t rtree_ctx_fallback; rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback); - bool committed = false; extent_t *extent = extent_recycle_extract(tsdn, arena, r_extent_hooks, - rtree_ctx, extents, new_addr, size, pad, alignment, slab, zero, - &committed, growing_retained); + rtree_ctx, extents, new_addr, size, pad, alignment, slab, + growing_retained); if (extent == NULL) { return NULL; } - if (committed) { - *commit = true; - } extent = extent_recycle_split(tsdn, arena, r_extent_hooks, rtree_ctx, extents, new_addr, size, pad, alignment, slab, szind, extent, @@ -934,6 +1095,13 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, extent_zeroed_set(extent, true); } + if (extent_committed_get(extent)) { + *commit = true; + } + if (extent_zeroed_get(extent)) { + *zero = true; + } + if (pad != 0) { extent_addr_randomize(tsdn, extent, alignment); } @@ -999,11 +1167,12 @@ extent_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, static void * extent_alloc_default_impl(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit) { - void *ret; - - ret = extent_alloc_core(tsdn, arena, new_addr, size, alignment, zero, + void *ret = extent_alloc_core(tsdn, arena, new_addr, size, alignment, zero, commit, (dss_prec_t)atomic_load_u(&arena->dss_prec, ATOMIC_RELAXED)); + if (have_madvise_huge && ret) { + pages_set_thp_state(ret, size); + } return ret; } @@ -1028,7 +1197,18 @@ extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr, size_t size, static void extent_hook_pre_reentrancy(tsdn_t *tsdn, arena_t *arena) { tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn); - pre_reentrancy(tsd, arena); + if (arena == arena_get(tsd_tsdn(tsd), 0, false)) { + /* + * The only legitimate case of customized extent hooks for a0 is + * hooks with no allocation activities. One such example is to + * place metadata on pre-allocated resources such as huge pages. + * In that case, rely on reentrancy_level checks to catch + * infinite recursions. + */ + pre_reentrancy(tsd, NULL); + } else { + pre_reentrancy(tsd, arena); + } } static void @@ -1081,9 +1261,8 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, void *ptr; if (*r_extent_hooks == &extent_hooks_default) { - ptr = extent_alloc_core(tsdn, arena, NULL, alloc_size, PAGE, - &zeroed, &committed, (dss_prec_t)atomic_load_u( - &arena->dss_prec, ATOMIC_RELAXED)); + ptr = extent_alloc_default_impl(tsdn, arena, NULL, + alloc_size, PAGE, &zeroed, &committed); } else { extent_hook_pre_reentrancy(tsdn, arena); ptr = (*r_extent_hooks)->alloc(*r_extent_hooks, NULL, @@ -1094,21 +1273,18 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, extent_init(extent, arena, ptr, alloc_size, false, NSIZES, arena_extent_sn_next(arena), extent_state_active, zeroed, - committed); + committed, true); if (ptr == NULL) { extent_dalloc(tsdn, arena, extent); goto label_err; } + if (extent_register_no_gdump_add(tsdn, extent)) { extents_leak(tsdn, arena, r_extent_hooks, &arena->extents_retained, extent, true); goto label_err; } - size_t leadsize = ALIGNMENT_CEILING((uintptr_t)ptr, - PAGE_CEILING(alignment)) - (uintptr_t)ptr; - assert(alloc_size >= leadsize + esize); - size_t trailsize = alloc_size - leadsize - esize; if (extent_zeroed_get(extent) && extent_committed_get(extent)) { *zero = true; } @@ -1116,54 +1292,46 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, *commit = true; } - /* Split the lead. */ - if (leadsize != 0) { - extent_t *lead = extent; - extent = extent_split_impl(tsdn, arena, r_extent_hooks, lead, - leadsize, NSIZES, false, esize + trailsize, szind, slab, - true); - if (extent == NULL) { - extent_deregister(tsdn, lead); - extents_leak(tsdn, arena, r_extent_hooks, + rtree_ctx_t rtree_ctx_fallback; + rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback); + + extent_t *lead; + extent_t *trail; + extent_t *to_leak; + extent_t *to_salvage; + extent_split_interior_result_t result = extent_split_interior( + tsdn, arena, r_extent_hooks, rtree_ctx, &extent, &lead, &trail, + &to_leak, &to_salvage, NULL, size, pad, alignment, slab, szind, + true); + + if (result == extent_split_interior_ok) { + if (lead != NULL) { + extent_record(tsdn, arena, r_extent_hooks, &arena->extents_retained, lead, true); - goto label_err; } - extent_record(tsdn, arena, r_extent_hooks, - &arena->extents_retained, lead, true); - } - - /* Split the trail. */ - if (trailsize != 0) { - extent_t *trail = extent_split_impl(tsdn, arena, r_extent_hooks, - extent, esize, szind, slab, trailsize, NSIZES, false, true); - if (trail == NULL) { - extent_deregister(tsdn, extent); - extents_leak(tsdn, arena, r_extent_hooks, - &arena->extents_retained, extent, true); - goto label_err; + if (trail != NULL) { + extent_record(tsdn, arena, r_extent_hooks, + &arena->extents_retained, trail, true); } - extent_record(tsdn, arena, r_extent_hooks, - &arena->extents_retained, trail, true); - } else if (leadsize == 0) { + } else { /* - * Splitting causes szind to be set as a side effect, but no - * splitting occurred. + * We should have allocated a sufficiently large extent; the + * cant_alloc case should not occur. */ - rtree_ctx_t rtree_ctx_fallback; - rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, - &rtree_ctx_fallback); - - extent_szind_set(extent, szind); - if (szind != NSIZES) { - rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, - (uintptr_t)extent_addr_get(extent), szind, slab); - if (slab && extent_size_get(extent) > PAGE) { - rtree_szind_slab_update(tsdn, &extents_rtree, - rtree_ctx, - (uintptr_t)extent_past_get(extent) - - (uintptr_t)PAGE, szind, slab); + assert(result == extent_split_interior_error); + if (to_salvage != NULL) { + if (config_prof) { + extent_gdump_add(tsdn, to_salvage); } + extent_record(tsdn, arena, r_extent_hooks, + &arena->extents_retained, to_salvage, true); } + if (to_leak != NULL) { + extent_deregister_no_gdump_sub(tsdn, to_leak); + extents_leak(tsdn, arena, r_extent_hooks, + &arena->extents_retained, to_leak, true); + } + goto label_err; } if (*commit && !extent_committed_get(extent)) { @@ -1177,13 +1345,14 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, } /* - * Increment extent_grow_next if doing so wouldn't exceed the legal + * Increment extent_grow_next if doing so wouldn't exceed the allowed * range. */ - if (arena->extent_grow_next + egn_skip + 1 < NPSIZES) { + if (arena->extent_grow_next + egn_skip + 1 <= + arena->retain_grow_limit) { arena->extent_grow_next += egn_skip + 1; } else { - arena->extent_grow_next = NPSIZES - 1; + arena->extent_grow_next = arena->retain_grow_limit; } /* All opportunities for failure are past. */ malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx); @@ -1271,7 +1440,8 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena, return NULL; } extent_init(extent, arena, addr, esize, slab, szind, - arena_extent_sn_next(arena), extent_state_active, zero, commit); + arena_extent_sn_next(arena), extent_state_active, *zero, *commit, + true); if (pad != 0) { extent_addr_randomize(tsdn, extent, alignment); } @@ -1296,10 +1466,20 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, extent_t *extent = extent_alloc_retained(tsdn, arena, r_extent_hooks, new_addr, size, pad, alignment, slab, szind, zero, commit); if (extent == NULL) { + if (opt_retain && new_addr != NULL) { + /* + * When retain is enabled and new_addr is set, we do not + * attempt extent_alloc_wrapper_hard which does mmap + * that is very unlikely to succeed (unless it happens + * to be at the end). + */ + return NULL; + } extent = extent_alloc_wrapper_hard(tsdn, arena, r_extent_hooks, new_addr, size, pad, alignment, slab, szind, zero, commit); } + assert(extent == NULL || extent_dumpable_get(extent)); return extent; } @@ -1329,16 +1509,7 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, bool growing_retained) { assert(extent_can_coalesce(arena, extents, inner, outer)); - if (forward && extents->delay_coalesce) { - /* - * The extent that remains after coalescing must occupy the - * outer extent's position in the LRU. For forward coalescing, - * swap the inner extent into the LRU. - */ - extent_list_replace(&extents->lru, outer, inner); - } - extent_activate_locked(tsdn, arena, extents, outer, - extents->delay_coalesce); + extent_activate_locked(tsdn, arena, extents, outer); malloc_mutex_unlock(tsdn, &extents->mtx); bool err = extent_merge_impl(tsdn, arena, r_extent_hooks, @@ -1346,11 +1517,7 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, malloc_mutex_lock(tsdn, &extents->mtx); if (err) { - if (forward && extents->delay_coalesce) { - extent_list_replace(&extents->lru, inner, outer); - } - extent_deactivate_locked(tsdn, arena, extents, outer, - extents->delay_coalesce); + extent_deactivate_locked(tsdn, arena, extents, outer); } return err; @@ -1422,6 +1589,10 @@ extent_try_coalesce(tsdn_t *tsdn, arena_t *arena, return extent; } +/* + * Does the metadata management portions of putting an unused extent into the + * given extents_t (coalesces, deregisters slab interiors, the heap operations). + */ static void extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, extents_t *extents, extent_t *extent, bool growing_retained) { @@ -1447,9 +1618,20 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, if (!extents->delay_coalesce) { extent = extent_try_coalesce(tsdn, arena, r_extent_hooks, rtree_ctx, extents, extent, NULL, growing_retained); - } - - extent_deactivate_locked(tsdn, arena, extents, extent, false); + } else if (extent_size_get(extent) >= LARGE_MINCLASS) { + /* Always coalesce large extents eagerly. */ + bool coalesced; + size_t prev_size; + do { + prev_size = extent_size_get(extent); + assert(extent_state_get(extent) == extent_state_active); + extent = extent_try_coalesce(tsdn, arena, + r_extent_hooks, rtree_ctx, extents, extent, + &coalesced, growing_retained); + } while (coalesced && + extent_size_get(extent) >= prev_size + LARGE_MINCLASS); + } + extent_deactivate_locked(tsdn, arena, extents, extent); malloc_mutex_unlock(tsdn, &extents->mtx); } @@ -1520,6 +1702,7 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, extent_t *extent) { + assert(extent_dumpable_get(extent)); witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); @@ -1780,6 +1963,13 @@ extent_split_default(extent_hooks_t *extent_hooks, void *addr, size_t size, } #endif +/* + * Accepts the extent to split, and the characteristics of each side of the + * split. The 'a' parameters go with the 'lead' of the resulting pair of + * extents (the lower addressed portion of the split), and the 'b' parameters go + * with the trail (the higher addressed portion). This makes 'extent' the lead, + * and returns the trail (except in case of error). + */ static extent_t * extent_split_impl(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a, @@ -1803,7 +1993,7 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena, extent_init(trail, arena, (void *)((uintptr_t)extent_base_get(extent) + size_a), size_b, slab_b, szind_b, extent_sn_get(extent), extent_state_get(extent), extent_zeroed_get(extent), - extent_committed_get(extent)); + extent_committed_get(extent), extent_dumpable_get(extent)); rtree_ctx_t rtree_ctx_fallback; rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback); @@ -1814,7 +2004,7 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena, extent_init(&lead, arena, extent_addr_get(extent), size_a, slab_a, szind_a, extent_sn_get(extent), extent_state_get(extent), extent_zeroed_get(extent), - extent_committed_get(extent)); + extent_committed_get(extent), extent_dumpable_get(extent)); extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, &lead, false, true, &lead_elm_a, &lead_elm_b); diff --git a/dep/jemalloc/src/extent_dss.c b/dep/jemalloc/src/extent_dss.c index e72da95870d..2b1ea9cafa0 100644 --- a/dep/jemalloc/src/extent_dss.c +++ b/dep/jemalloc/src/extent_dss.c @@ -156,7 +156,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, extent_init(gap, arena, gap_addr_page, gap_size_page, false, NSIZES, arena_extent_sn_next(arena), - extent_state_active, false, true); + extent_state_active, false, true, true); } /* * Compute the address just past the end of the desired @@ -199,7 +199,8 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, extent_init(&extent, arena, ret, size, size, false, NSIZES, - extent_state_active, false, true); + extent_state_active, false, true, + true); if (extent_purge_forced_wrapper(tsdn, arena, &extent_hooks, &extent, 0, size)) { diff --git a/dep/jemalloc/src/jemalloc.c b/dep/jemalloc/src/jemalloc.c index 0ee8ad48b98..f93c16fa3cf 100644 --- a/dep/jemalloc/src/jemalloc.c +++ b/dep/jemalloc/src/jemalloc.c @@ -8,6 +8,7 @@ #include "jemalloc/internal/extent_dss.h" #include "jemalloc/internal/extent_mmap.h" #include "jemalloc/internal/jemalloc_internal_types.h" +#include "jemalloc/internal/log.h" #include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/rtree.h" @@ -848,10 +849,8 @@ malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v, size_t vlen) { malloc_printf("<jemalloc>: %s: %.*s:%.*s\n", msg, (int)klen, k, (int)vlen, v); + /* If abort_conf is set, error out after processing all options. */ had_conf_error = true; - if (opt_abort_conf) { - malloc_abort_invalid_conf(); - } } static void @@ -1051,8 +1050,22 @@ malloc_conf_init(void) { CONF_HANDLE_BOOL(opt_abort, "abort") CONF_HANDLE_BOOL(opt_abort_conf, "abort_conf") - if (opt_abort_conf && had_conf_error) { - malloc_abort_invalid_conf(); + if (strncmp("metadata_thp", k, klen) == 0) { + int i; + bool match = false; + for (i = 0; i < metadata_thp_mode_limit; i++) { + if (strncmp(metadata_thp_mode_names[i], + v, vlen) == 0) { + opt_metadata_thp = i; + match = true; + break; + } + } + if (!match) { + malloc_conf_error("Invalid conf value", + k, klen, v, vlen); + } + continue; } CONF_HANDLE_BOOL(opt_retain, "retain") if (strncmp("dss", k, klen) == 0) { @@ -1128,12 +1141,14 @@ malloc_conf_init(void) { CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc") } CONF_HANDLE_BOOL(opt_tcache, "tcache") + CONF_HANDLE_SIZE_T(opt_lg_extent_max_active_fit, + "lg_extent_max_active_fit", 0, + (sizeof(size_t) << 3), yes, yes, false) CONF_HANDLE_SSIZE_T(opt_lg_tcache_max, "lg_tcache_max", -1, (sizeof(size_t) << 3) - 1) if (strncmp("percpu_arena", k, klen) == 0) { - int i; bool match = false; - for (i = percpu_arena_mode_names_base; i < + for (int i = percpu_arena_mode_names_base; i < percpu_arena_mode_names_limit; i++) { if (strncmp(percpu_arena_mode_names[i], v, vlen) == 0) { @@ -1155,6 +1170,10 @@ malloc_conf_init(void) { } CONF_HANDLE_BOOL(opt_background_thread, "background_thread"); + CONF_HANDLE_SIZE_T(opt_max_background_threads, + "max_background_threads", 1, + opt_max_background_threads, yes, yes, + true); if (config_prof) { CONF_HANDLE_BOOL(opt_prof, "prof") CONF_HANDLE_CHAR_P(opt_prof_prefix, @@ -1173,6 +1192,37 @@ malloc_conf_init(void) { CONF_HANDLE_BOOL(opt_prof_final, "prof_final") CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak") } + if (config_log) { + if (CONF_MATCH("log")) { + size_t cpylen = ( + vlen <= sizeof(log_var_names) ? + vlen : sizeof(log_var_names) - 1); + strncpy(log_var_names, v, cpylen); + log_var_names[cpylen] = '\0'; + continue; + } + } + if (CONF_MATCH("thp")) { + bool match = false; + for (int i = 0; i < thp_mode_names_limit; i++) { + if (strncmp(thp_mode_names[i],v, vlen) + == 0) { + if (!have_madvise_huge) { + malloc_conf_error( + "No THP support", + k, klen, v, vlen); + } + opt_thp = i; + match = true; + break; + } + } + if (!match) { + malloc_conf_error("Invalid conf value", + k, klen, v, vlen); + } + continue; + } malloc_conf_error("Invalid conf pair", k, klen, v, vlen); #undef CONF_MATCH @@ -1188,7 +1238,11 @@ malloc_conf_init(void) { #undef CONF_HANDLE_SSIZE_T #undef CONF_HANDLE_CHAR_P } + if (opt_abort_conf && had_conf_error) { + malloc_abort_invalid_conf(); + } } + atomic_store_b(&log_init_done, true, ATOMIC_RELEASE); } static bool @@ -1493,6 +1547,8 @@ malloc_init_hard(void) { post_reentrancy(tsd); malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock); + witness_assert_lockless(witness_tsd_tsdn( + tsd_witness_tsdp_get_unsafe(tsd))); malloc_tsd_boot1(); /* Update TSD after tsd_boot1. */ tsd = tsd_fetch(); @@ -1500,8 +1556,11 @@ malloc_init_hard(void) { assert(have_background_thread); /* * Need to finish init & unlock first before creating background - * threads (pthread_create depends on malloc). + * threads (pthread_create depends on malloc). ctl_init (which + * sets isthreaded) needs to be called without holding any lock. */ + background_thread_ctl_init(tsd_tsdn(tsd)); + malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock); bool err = background_thread_create(tsd, 0); malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock); @@ -1701,7 +1760,7 @@ compute_size_with_overflow(bool may_overflow, dynamic_opts_t *dopts, } /* A size_t with its high-half bits all set to 1. */ - const static size_t high_bits = SIZE_T_MAX << (sizeof(size_t) * 8 / 2); + static const size_t high_bits = SIZE_T_MAX << (sizeof(size_t) * 8 / 2); *size = dopts->item_size * dopts->num_items; @@ -1962,6 +2021,8 @@ je_malloc(size_t size) { static_opts_t sopts; dynamic_opts_t dopts; + LOG("core.malloc.entry", "size: %zu", size); + static_opts_init(&sopts); dynamic_opts_init(&dopts); @@ -1976,6 +2037,8 @@ je_malloc(size_t size) { imalloc(&sopts, &dopts); + LOG("core.malloc.exit", "result: %p", ret); + return ret; } @@ -1986,6 +2049,9 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) { static_opts_t sopts; dynamic_opts_t dopts; + LOG("core.posix_memalign.entry", "mem ptr: %p, alignment: %zu, " + "size: %zu", memptr, alignment, size); + static_opts_init(&sopts); dynamic_opts_init(&dopts); @@ -2002,6 +2068,10 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) { dopts.alignment = alignment; ret = imalloc(&sopts, &dopts); + + LOG("core.posix_memalign.exit", "result: %d, alloc ptr: %p", ret, + *memptr); + return ret; } @@ -2014,6 +2084,9 @@ je_aligned_alloc(size_t alignment, size_t size) { static_opts_t sopts; dynamic_opts_t dopts; + LOG("core.aligned_alloc.entry", "alignment: %zu, size: %zu\n", + alignment, size); + static_opts_init(&sopts); dynamic_opts_init(&dopts); @@ -2032,6 +2105,9 @@ je_aligned_alloc(size_t alignment, size_t size) { dopts.alignment = alignment; imalloc(&sopts, &dopts); + + LOG("core.aligned_alloc.exit", "result: %p", ret); + return ret; } @@ -2043,6 +2119,8 @@ je_calloc(size_t num, size_t size) { static_opts_t sopts; dynamic_opts_t dopts; + LOG("core.calloc.entry", "num: %zu, size: %zu\n", num, size); + static_opts_init(&sopts); dynamic_opts_init(&dopts); @@ -2059,6 +2137,8 @@ je_calloc(size_t num, size_t size) { imalloc(&sopts, &dopts); + LOG("core.calloc.exit", "result: %p", ret); + return ret; } @@ -2161,17 +2241,37 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) { assert(malloc_initialized() || IS_INITIALIZER); alloc_ctx_t alloc_ctx, *ctx; - if (config_prof && opt_prof) { + if (!config_cache_oblivious && ((uintptr_t)ptr & PAGE_MASK) != 0) { + /* + * When cache_oblivious is disabled and ptr is not page aligned, + * the allocation was not sampled -- usize can be used to + * determine szind directly. + */ + alloc_ctx.szind = sz_size2index(usize); + alloc_ctx.slab = true; + ctx = &alloc_ctx; + if (config_debug) { + alloc_ctx_t dbg_ctx; + rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd); + rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, + rtree_ctx, (uintptr_t)ptr, true, &dbg_ctx.szind, + &dbg_ctx.slab); + assert(dbg_ctx.szind == alloc_ctx.szind); + assert(dbg_ctx.slab == alloc_ctx.slab); + } + } else if (config_prof && opt_prof) { rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd); rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx, (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab); assert(alloc_ctx.szind == sz_size2index(usize)); ctx = &alloc_ctx; - prof_free(tsd, ptr, usize, ctx); } else { ctx = NULL; } + if (config_prof && opt_prof) { + prof_free(tsd, ptr, usize, ctx); + } if (config_stats) { *tsd_thread_deallocatedp_get(tsd) += usize; } @@ -2192,6 +2292,8 @@ je_realloc(void *ptr, size_t size) { size_t usize JEMALLOC_CC_SILENCE_INIT(0); size_t old_usize = 0; + LOG("core.realloc.entry", "ptr: %p, size: %zu\n", ptr, size); + if (unlikely(size == 0)) { if (ptr != NULL) { /* realloc(ptr, 0) is equivalent to free(ptr). */ @@ -2204,6 +2306,8 @@ je_realloc(void *ptr, size_t size) { tcache = NULL; } ifree(tsd, ptr, tcache, true); + + LOG("core.realloc.exit", "result: %p", NULL); return NULL; } size = 1; @@ -2236,7 +2340,9 @@ je_realloc(void *ptr, size_t size) { tsdn = tsd_tsdn(tsd); } else { /* realloc(NULL, size) is equivalent to malloc(size). */ - return je_malloc(size); + void *ret = je_malloc(size); + LOG("core.realloc.exit", "result: %p", ret); + return ret; } if (unlikely(ret == NULL)) { @@ -2257,11 +2363,15 @@ je_realloc(void *ptr, size_t size) { } UTRACE(ptr, size, ret); check_entry_exit_locking(tsdn); + + LOG("core.realloc.exit", "result: %p", ret); return ret; } JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_free(void *ptr) { + LOG("core.free.entry", "ptr: %p", ptr); + UTRACE(ptr, 0, 0); if (likely(ptr != NULL)) { /* @@ -2291,6 +2401,7 @@ je_free(void *ptr) { } check_entry_exit_locking(tsd_tsdn(tsd)); } + LOG("core.free.exit", ""); } /* @@ -2310,6 +2421,9 @@ je_memalign(size_t alignment, size_t size) { static_opts_t sopts; dynamic_opts_t dopts; + LOG("core.memalign.entry", "alignment: %zu, size: %zu\n", alignment, + size); + static_opts_init(&sopts); dynamic_opts_init(&dopts); @@ -2327,6 +2441,8 @@ je_memalign(size_t alignment, size_t size) { dopts.alignment = alignment; imalloc(&sopts, &dopts); + + LOG("core.memalign.exit", "result: %p", ret); return ret; } #endif @@ -2341,6 +2457,8 @@ je_valloc(size_t size) { static_opts_t sopts; dynamic_opts_t dopts; + LOG("core.valloc.entry", "size: %zu\n", size); + static_opts_init(&sopts); dynamic_opts_init(&dopts); @@ -2359,6 +2477,7 @@ je_valloc(size_t size) { imalloc(&sopts, &dopts); + LOG("core.valloc.exit", "result: %p\n", ret); return ret; } #endif @@ -2432,6 +2551,8 @@ je_mallocx(size_t size, int flags) { static_opts_t sopts; dynamic_opts_t dopts; + LOG("core.mallocx.entry", "size: %zu, flags: %d", size, flags); + static_opts_init(&sopts); dynamic_opts_init(&dopts); @@ -2465,6 +2586,8 @@ je_mallocx(size_t size, int flags) { } imalloc(&sopts, &dopts); + + LOG("core.mallocx.exit", "result: %p", ret); return ret; } @@ -2545,6 +2668,10 @@ je_rallocx(void *ptr, size_t size, int flags) { arena_t *arena; tcache_t *tcache; + LOG("core.rallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr, + size, flags); + + assert(ptr != NULL); assert(size != 0); assert(malloc_initialized() || IS_INITIALIZER); @@ -2607,6 +2734,8 @@ je_rallocx(void *ptr, size_t size, int flags) { } UTRACE(ptr, size, p); check_entry_exit_locking(tsd_tsdn(tsd)); + + LOG("core.rallocx.exit", "result: %p", p); return p; label_oom: if (config_xmalloc && unlikely(opt_xmalloc)) { @@ -2615,6 +2744,8 @@ label_oom: } UTRACE(ptr, size, 0); check_entry_exit_locking(tsd_tsdn(tsd)); + + LOG("core.rallocx.exit", "result: %p", NULL); return NULL; } @@ -2701,6 +2832,9 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) { size_t alignment = MALLOCX_ALIGN_GET(flags); bool zero = flags & MALLOCX_ZERO; + LOG("core.xallocx.entry", "ptr: %p, size: %zu, extra: %zu, " + "flags: %d", ptr, size, extra, flags); + assert(ptr != NULL); assert(size != 0); assert(SIZE_T_MAX - size >= extra); @@ -2750,15 +2884,19 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) { label_not_resized: UTRACE(ptr, size, ptr); check_entry_exit_locking(tsd_tsdn(tsd)); + + LOG("core.xallocx.exit", "result: %zu", usize); return usize; } JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW JEMALLOC_ATTR(pure) -je_sallocx(const void *ptr, int flags) { +je_sallocx(const void *ptr, UNUSED int flags) { size_t usize; tsdn_t *tsdn; + LOG("core.sallocx.entry", "ptr: %p, flags: %d", ptr, flags); + assert(malloc_initialized() || IS_INITIALIZER); assert(ptr != NULL); @@ -2773,11 +2911,15 @@ je_sallocx(const void *ptr, int flags) { } check_entry_exit_locking(tsdn); + + LOG("core.sallocx.exit", "result: %zu", usize); return usize; } JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_dallocx(void *ptr, int flags) { + LOG("core.dallocx.entry", "ptr: %p, flags: %d", ptr, flags); + assert(ptr != NULL); assert(malloc_initialized() || IS_INITIALIZER); @@ -2815,6 +2957,8 @@ je_dallocx(void *ptr, int flags) { ifree(tsd, ptr, tcache, true); } check_entry_exit_locking(tsd_tsdn(tsd)); + + LOG("core.dallocx.exit", ""); } JEMALLOC_ALWAYS_INLINE size_t @@ -2836,6 +2980,9 @@ je_sdallocx(void *ptr, size_t size, int flags) { assert(ptr != NULL); assert(malloc_initialized() || IS_INITIALIZER); + LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr, + size, flags); + tsd_t *tsd = tsd_fetch(); bool fast = tsd_fast(tsd); size_t usize = inallocx(tsd_tsdn(tsd), size, flags); @@ -2872,6 +3019,8 @@ je_sdallocx(void *ptr, size_t size, int flags) { isfree(tsd, ptr, usize, tcache, true); } check_entry_exit_locking(tsd_tsdn(tsd)); + + LOG("core.sdallocx.exit", ""); } JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW @@ -2883,6 +3032,7 @@ je_nallocx(size_t size, int flags) { assert(size != 0); if (unlikely(malloc_init())) { + LOG("core.nallocx.exit", "result: %zu", ZU(0)); return 0; } @@ -2891,10 +3041,12 @@ je_nallocx(size_t size, int flags) { usize = inallocx(tsdn, size, flags); if (unlikely(usize > LARGE_MAXCLASS)) { + LOG("core.nallocx.exit", "result: %zu", ZU(0)); return 0; } check_entry_exit_locking(tsdn); + LOG("core.nallocx.exit", "result: %zu", usize); return usize; } @@ -2904,7 +3056,10 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, int ret; tsd_t *tsd; + LOG("core.mallctl.entry", "name: %s", name); + if (unlikely(malloc_init())) { + LOG("core.mallctl.exit", "result: %d", EAGAIN); return EAGAIN; } @@ -2912,6 +3067,8 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, check_entry_exit_locking(tsd_tsdn(tsd)); ret = ctl_byname(tsd, name, oldp, oldlenp, newp, newlen); check_entry_exit_locking(tsd_tsdn(tsd)); + + LOG("core.mallctl.exit", "result: %d", ret); return ret; } @@ -2919,7 +3076,10 @@ JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) { int ret; + LOG("core.mallctlnametomib.entry", "name: %s", name); + if (unlikely(malloc_init())) { + LOG("core.mallctlnametomib.exit", "result: %d", EAGAIN); return EAGAIN; } @@ -2927,6 +3087,8 @@ je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) { check_entry_exit_locking(tsd_tsdn(tsd)); ret = ctl_nametomib(tsd, name, mibp, miblenp); check_entry_exit_locking(tsd_tsdn(tsd)); + + LOG("core.mallctlnametomib.exit", "result: %d", ret); return ret; } @@ -2936,7 +3098,10 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, int ret; tsd_t *tsd; + LOG("core.mallctlbymib.entry", ""); + if (unlikely(malloc_init())) { + LOG("core.mallctlbymib.exit", "result: %d", EAGAIN); return EAGAIN; } @@ -2944,6 +3109,7 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, check_entry_exit_locking(tsd_tsdn(tsd)); ret = ctl_bymib(tsd, mib, miblen, oldp, oldlenp, newp, newlen); check_entry_exit_locking(tsd_tsdn(tsd)); + LOG("core.mallctlbymib.exit", "result: %d", ret); return ret; } @@ -2952,10 +3118,13 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque, const char *opts) { tsdn_t *tsdn; + LOG("core.malloc_stats_print.entry", ""); + tsdn = tsdn_fetch(); check_entry_exit_locking(tsdn); stats_print(write_cb, cbopaque, opts); check_entry_exit_locking(tsdn); + LOG("core.malloc_stats_print.exit", ""); } JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW @@ -2963,6 +3132,8 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) { size_t ret; tsdn_t *tsdn; + LOG("core.malloc_usable_size.entry", "ptr: %p", ptr); + assert(malloc_initialized() || IS_INITIALIZER); tsdn = tsdn_fetch(); @@ -2980,6 +3151,7 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) { } check_entry_exit_locking(tsdn); + LOG("core.malloc_usable_size.exit", "result: %zu", ret); return ret; } diff --git a/dep/jemalloc/src/jemalloc_cpp.cpp b/dep/jemalloc/src/jemalloc_cpp.cpp index 844ab398a71..f0ceddae33a 100644 --- a/dep/jemalloc/src/jemalloc_cpp.cpp +++ b/dep/jemalloc/src/jemalloc_cpp.cpp @@ -39,12 +39,10 @@ void operator delete(void *ptr, std::size_t size) noexcept; void operator delete[](void *ptr, std::size_t size) noexcept; #endif -template <bool IsNoExcept> -void * -newImpl(std::size_t size) noexcept(IsNoExcept) { - void *ptr = je_malloc(size); - if (likely(ptr != nullptr)) - return ptr; +JEMALLOC_NOINLINE +static void * +handleOOM(std::size_t size, bool nothrow) { + void *ptr = nullptr; while (ptr == nullptr) { std::new_handler handler; @@ -68,11 +66,22 @@ newImpl(std::size_t size) noexcept(IsNoExcept) { ptr = je_malloc(size); } - if (ptr == nullptr && !IsNoExcept) + if (ptr == nullptr && !nothrow) std::__throw_bad_alloc(); return ptr; } +template <bool IsNoExcept> +JEMALLOC_ALWAYS_INLINE +void * +newImpl(std::size_t size) noexcept(IsNoExcept) { + void *ptr = je_malloc(size); + if (likely(ptr != nullptr)) + return ptr; + + return handleOOM(size, IsNoExcept); +} + void * operator new(std::size_t size) { return newImpl<false>(size); diff --git a/dep/jemalloc/src/log.c b/dep/jemalloc/src/log.c new file mode 100644 index 00000000000..778902fb9b8 --- /dev/null +++ b/dep/jemalloc/src/log.c @@ -0,0 +1,78 @@ +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/log.h" + +char log_var_names[JEMALLOC_LOG_VAR_BUFSIZE]; +atomic_b_t log_init_done = ATOMIC_INIT(false); + +/* + * Returns true if we were able to pick out a segment. Fills in r_segment_end + * with a pointer to the first character after the end of the string. + */ +static const char * +log_var_extract_segment(const char* segment_begin) { + const char *end; + for (end = segment_begin; *end != '\0' && *end != '|'; end++) { + } + return end; +} + +static bool +log_var_matches_segment(const char *segment_begin, const char *segment_end, + const char *log_var_begin, const char *log_var_end) { + assert(segment_begin <= segment_end); + assert(log_var_begin < log_var_end); + + ptrdiff_t segment_len = segment_end - segment_begin; + ptrdiff_t log_var_len = log_var_end - log_var_begin; + /* The special '.' segment matches everything. */ + if (segment_len == 1 && *segment_begin == '.') { + return true; + } + if (segment_len == log_var_len) { + return strncmp(segment_begin, log_var_begin, segment_len) == 0; + } else if (segment_len < log_var_len) { + return strncmp(segment_begin, log_var_begin, segment_len) == 0 + && log_var_begin[segment_len] == '.'; + } else { + return false; + } +} + +unsigned +log_var_update_state(log_var_t *log_var) { + const char *log_var_begin = log_var->name; + const char *log_var_end = log_var->name + strlen(log_var->name); + + /* Pointer to one before the beginning of the current segment. */ + const char *segment_begin = log_var_names; + + /* + * If log_init done is false, we haven't parsed the malloc conf yet. To + * avoid log-spew, we default to not displaying anything. + */ + if (!atomic_load_b(&log_init_done, ATOMIC_ACQUIRE)) { + return LOG_INITIALIZED_NOT_ENABLED; + } + + while (true) { + const char *segment_end = log_var_extract_segment( + segment_begin); + assert(segment_end < log_var_names + JEMALLOC_LOG_VAR_BUFSIZE); + if (log_var_matches_segment(segment_begin, segment_end, + log_var_begin, log_var_end)) { + atomic_store_u(&log_var->state, LOG_ENABLED, + ATOMIC_RELAXED); + return LOG_ENABLED; + } + if (*segment_end == '\0') { + /* Hit the end of the segment string with no match. */ + atomic_store_u(&log_var->state, + LOG_INITIALIZED_NOT_ENABLED, ATOMIC_RELAXED); + return LOG_INITIALIZED_NOT_ENABLED; + } + /* Otherwise, skip the delimiter and continue. */ + segment_begin = segment_end + 1; + } +} diff --git a/dep/jemalloc/src/malloc_io.c b/dep/jemalloc/src/malloc_io.c index 6b99afcd3fc..7bdc13f9519 100644 --- a/dep/jemalloc/src/malloc_io.c +++ b/dep/jemalloc/src/malloc_io.c @@ -70,20 +70,7 @@ static char *x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, /* malloc_message() setup. */ static void wrtmessage(void *cbopaque, const char *s) { -#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_write) - /* - * Use syscall(2) rather than write(2) when possible in order to avoid - * the possibility of memory allocation within libc. This is necessary - * on FreeBSD; most operating systems do not have this problem though. - * - * syscall() returns long or int, depending on platform, so capture the - * unused result in the widest plausible type to avoid compiler - * warnings. - */ - UNUSED long result = syscall(SYS_write, STDERR_FILENO, s, strlen(s)); -#else - UNUSED ssize_t result = write(STDERR_FILENO, s, strlen(s)); -#endif + malloc_write_fd(STDERR_FILENO, s, strlen(s)); } JEMALLOC_EXPORT void (*je_malloc_message)(void *, const char *s); @@ -111,7 +98,7 @@ buferror(int err, char *buf, size_t buflen) { FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, err, 0, (LPSTR)buf, (DWORD)buflen, NULL); return 0; -#elif defined(__GLIBC__) && defined(_GNU_SOURCE) +#elif defined(JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE) && defined(_GNU_SOURCE) char *b = strerror_r(err, buf, buflen); if (b != buf) { strncpy(buf, b, buflen); diff --git a/dep/jemalloc/src/mutex.c b/dep/jemalloc/src/mutex.c index a528ef0c243..30222b3e582 100644 --- a/dep/jemalloc/src/mutex.c +++ b/dep/jemalloc/src/mutex.c @@ -4,6 +4,7 @@ #include "jemalloc/internal/assert.h" #include "jemalloc/internal/malloc_io.h" +#include "jemalloc/internal/spin.h" #ifndef _CRT_SPINCOUNT #define _CRT_SPINCOUNT 4000 @@ -53,7 +54,7 @@ malloc_mutex_lock_slow(malloc_mutex_t *mutex) { int cnt = 0, max_cnt = MALLOC_MUTEX_MAX_SPIN; do { - CPU_SPINWAIT; + spin_cpu_spinwait(); if (!malloc_mutex_trylock_final(mutex)) { data->n_spin_acquired++; return; @@ -173,7 +174,7 @@ malloc_mutex_init(malloc_mutex_t *mutex, const char *name, mutex->lock_order = lock_order; if (lock_order == malloc_mutex_address_ordered) { witness_init(&mutex->witness, name, rank, - mutex_addr_comp, &mutex); + mutex_addr_comp, mutex); } else { witness_init(&mutex->witness, name, rank, NULL, NULL); } diff --git a/dep/jemalloc/src/pages.c b/dep/jemalloc/src/pages.c index fec64dd01d7..26002692d60 100644 --- a/dep/jemalloc/src/pages.c +++ b/dep/jemalloc/src/pages.c @@ -10,6 +10,9 @@ #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT #include <sys/sysctl.h> +#ifdef __FreeBSD__ +#include <vm/vm_param.h> +#endif #endif /******************************************************************************/ @@ -25,6 +28,18 @@ static int mmap_flags; #endif static bool os_overcommits; +const char *thp_mode_names[] = { + "default", + "always", + "never", + "not supported" +}; +thp_mode_t opt_thp = THP_MODE_DEFAULT; +thp_mode_t init_system_thp_mode; + +/* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */ +static bool pages_can_purge_lazy_runtime = true; + /******************************************************************************/ /* * Function prototypes for static functions that are referenced prior to @@ -252,12 +267,25 @@ pages_purge_lazy(void *addr, size_t size) { if (!pages_can_purge_lazy) { return true; } + if (!pages_can_purge_lazy_runtime) { + /* + * Built with lazy purge enabled, but detected it was not + * supported on the current system. + */ + return true; + } #ifdef _WIN32 VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE); return false; #elif defined(JEMALLOC_PURGE_MADVISE_FREE) - return (madvise(addr, size, MADV_FREE) != 0); + return (madvise(addr, size, +# ifdef MADV_FREE + MADV_FREE +# else + JEMALLOC_MADV_FREE +# endif + ) != 0); #elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \ !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS) return (madvise(addr, size, MADV_DONTNEED) != 0); @@ -286,36 +314,84 @@ pages_purge_forced(void *addr, size_t size) { #endif } +static bool +pages_huge_impl(void *addr, size_t size, bool aligned) { + if (aligned) { + assert(HUGEPAGE_ADDR2BASE(addr) == addr); + assert(HUGEPAGE_CEILING(size) == size); + } +#ifdef JEMALLOC_HAVE_MADVISE_HUGE + return (madvise(addr, size, MADV_HUGEPAGE) != 0); +#else + return true; +#endif +} + bool pages_huge(void *addr, size_t size) { - assert(HUGEPAGE_ADDR2BASE(addr) == addr); - assert(HUGEPAGE_CEILING(size) == size); + return pages_huge_impl(addr, size, true); +} -#ifdef JEMALLOC_THP - return (madvise(addr, size, MADV_HUGEPAGE) != 0); +static bool +pages_huge_unaligned(void *addr, size_t size) { + return pages_huge_impl(addr, size, false); +} + +static bool +pages_nohuge_impl(void *addr, size_t size, bool aligned) { + if (aligned) { + assert(HUGEPAGE_ADDR2BASE(addr) == addr); + assert(HUGEPAGE_CEILING(size) == size); + } + +#ifdef JEMALLOC_HAVE_MADVISE_HUGE + return (madvise(addr, size, MADV_NOHUGEPAGE) != 0); #else - return true; + return false; #endif } bool pages_nohuge(void *addr, size_t size) { - assert(HUGEPAGE_ADDR2BASE(addr) == addr); - assert(HUGEPAGE_CEILING(size) == size); + return pages_nohuge_impl(addr, size, true); +} -#ifdef JEMALLOC_THP - return (madvise(addr, size, MADV_NOHUGEPAGE) != 0); +static bool +pages_nohuge_unaligned(void *addr, size_t size) { + return pages_nohuge_impl(addr, size, false); +} + +bool +pages_dontdump(void *addr, size_t size) { + assert(PAGE_ADDR2BASE(addr) == addr); + assert(PAGE_CEILING(size) == size); +#ifdef JEMALLOC_MADVISE_DONTDUMP + return madvise(addr, size, MADV_DONTDUMP) != 0; #else return false; #endif } +bool +pages_dodump(void *addr, size_t size) { + assert(PAGE_ADDR2BASE(addr) == addr); + assert(PAGE_CEILING(size) == size); +#ifdef JEMALLOC_MADVISE_DONTDUMP + return madvise(addr, size, MADV_DODUMP) != 0; +#else + return false; +#endif +} + + static size_t os_page_detect(void) { #ifdef _WIN32 SYSTEM_INFO si; GetSystemInfo(&si); return si.dwPageSize; +#elif defined(__FreeBSD__) + return getpagesize(); #else long result = sysconf(_SC_PAGESIZE); if (result == -1) { @@ -332,9 +408,19 @@ os_overcommits_sysctl(void) { size_t sz; sz = sizeof(vm_overcommit); +#if defined(__FreeBSD__) && defined(VM_OVERCOMMIT) + int mib[2]; + + mib[0] = CTL_VM; + mib[1] = VM_OVERCOMMIT; + if (sysctl(mib, 2, &vm_overcommit, &sz, NULL, 0) != 0) { + return false; /* Error. */ + } +#else if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) { return false; /* Error. */ } +#endif return ((vm_overcommit & 0x3) == 0); } @@ -350,27 +436,44 @@ static bool os_overcommits_proc(void) { int fd; char buf[1]; - ssize_t nread; #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open) - fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY | - O_CLOEXEC); + #if defined(O_CLOEXEC) + fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY | + O_CLOEXEC); + #else + fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY); + if (fd != -1) { + fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); + } + #endif #elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat) - fd = (int)syscall(SYS_openat, - AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC); + #if defined(O_CLOEXEC) + fd = (int)syscall(SYS_openat, + AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC); + #else + fd = (int)syscall(SYS_openat, + AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY); + if (fd != -1) { + fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); + } + #endif #else - fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC); + #if defined(O_CLOEXEC) + fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC); + #else + fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY); + if (fd != -1) { + fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); + } + #endif #endif + if (fd == -1) { return false; /* Error. */ } -#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read) - nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf)); -#else - nread = read(fd, &buf, sizeof(buf)); -#endif - + ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf)); #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close) syscall(SYS_close, fd); #else @@ -390,6 +493,71 @@ os_overcommits_proc(void) { } #endif +void +pages_set_thp_state (void *ptr, size_t size) { + if (opt_thp == thp_mode_default || opt_thp == init_system_thp_mode) { + return; + } + assert(opt_thp != thp_mode_not_supported && + init_system_thp_mode != thp_mode_not_supported); + + if (opt_thp == thp_mode_always + && init_system_thp_mode != thp_mode_never) { + assert(init_system_thp_mode == thp_mode_default); + pages_huge_unaligned(ptr, size); + } else if (opt_thp == thp_mode_never) { + assert(init_system_thp_mode == thp_mode_default || + init_system_thp_mode == thp_mode_always); + pages_nohuge_unaligned(ptr, size); + } +} + +static void +init_thp_state(void) { + if (!have_madvise_huge) { + if (metadata_thp_enabled() && opt_abort) { + malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n"); + abort(); + } + goto label_error; + } + + static const char sys_state_madvise[] = "always [madvise] never\n"; + static const char sys_state_always[] = "[always] madvise never\n"; + static const char sys_state_never[] = "always madvise [never]\n"; + char buf[sizeof(sys_state_madvise)]; + +#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open) + int fd = (int)syscall(SYS_open, + "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY); +#else + int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY); +#endif + if (fd == -1) { + goto label_error; + } + + ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf)); +#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close) + syscall(SYS_close, fd); +#else + close(fd); +#endif + + if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) { + init_system_thp_mode = thp_mode_default; + } else if (strncmp(buf, sys_state_always, (size_t)nread) == 0) { + init_system_thp_mode = thp_mode_always; + } else if (strncmp(buf, sys_state_never, (size_t)nread) == 0) { + init_system_thp_mode = thp_mode_never; + } else { + goto label_error; + } + return; +label_error: + opt_thp = init_system_thp_mode = thp_mode_not_supported; +} + bool pages_boot(void) { os_page = os_page_detect(); @@ -418,5 +586,21 @@ pages_boot(void) { os_overcommits = false; #endif + init_thp_state(); + + /* Detect lazy purge runtime support. */ + if (pages_can_purge_lazy) { + bool committed = false; + void *madv_free_page = os_pages_map(NULL, PAGE, PAGE, &committed); + if (madv_free_page == NULL) { + return true; + } + assert(pages_can_purge_lazy_runtime); + if (pages_purge_lazy(madv_free_page, PAGE)) { + pages_can_purge_lazy_runtime = false; + } + os_pages_unmap(madv_free_page, PAGE); + } + return false; } diff --git a/dep/jemalloc/src/prof.c b/dep/jemalloc/src/prof.c index 975722c4c38..13df641a030 100644 --- a/dep/jemalloc/src/prof.c +++ b/dep/jemalloc/src/prof.c @@ -978,7 +978,7 @@ prof_dump_flush(bool propagate_err) { cassert(config_prof); - err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end); + err = malloc_write_fd(prof_dump_fd, prof_dump_buf, prof_dump_buf_end); if (err == -1) { if (!propagate_err) { malloc_write("<jemalloc>: write() failed during heap " @@ -1409,7 +1409,15 @@ prof_open_maps(const char *format, ...) { va_start(ap, format); malloc_vsnprintf(filename, sizeof(filename), format, ap); va_end(ap); + +#if defined(O_CLOEXEC) mfd = open(filename, O_RDONLY | O_CLOEXEC); +#else + mfd = open(filename, O_RDONLY); + if (mfd != -1) { + fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC); + } +#endif return mfd; } @@ -1463,8 +1471,9 @@ prof_dump_maps(bool propagate_err) { goto label_return; } } - nread = read(mfd, &prof_dump_buf[prof_dump_buf_end], - PROF_DUMP_BUFSIZE - prof_dump_buf_end); + nread = malloc_read_fd(mfd, + &prof_dump_buf[prof_dump_buf_end], PROF_DUMP_BUFSIZE + - prof_dump_buf_end); } while (nread > 0); } else { ret = true; @@ -1772,7 +1781,7 @@ prof_idump(tsdn_t *tsdn) { cassert(config_prof); - if (!prof_booted || tsdn_null(tsdn)) { + if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) { return; } tsd = tsdn_tsd(tsdn); @@ -1829,7 +1838,7 @@ prof_gdump(tsdn_t *tsdn) { cassert(config_prof); - if (!prof_booted || tsdn_null(tsdn)) { + if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) { return; } tsd = tsdn_tsd(tsdn); diff --git a/dep/jemalloc/src/spin.c b/dep/jemalloc/src/spin.c deleted file mode 100644 index 24372c26c94..00000000000 --- a/dep/jemalloc/src/spin.c +++ /dev/null @@ -1,4 +0,0 @@ -#define JEMALLOC_SPIN_C_ -#include "jemalloc/internal/jemalloc_preamble.h" - -#include "jemalloc/internal/spin.h" diff --git a/dep/jemalloc/src/stats.c b/dep/jemalloc/src/stats.c index 087df7676e9..08b9507cfe9 100644 --- a/dep/jemalloc/src/stats.c +++ b/dep/jemalloc/src/stats.c @@ -4,6 +4,7 @@ #include "jemalloc/internal/assert.h" #include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/emitter.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/mutex_prof.h" @@ -84,41 +85,138 @@ gen_mutex_ctl_str(char *str, size_t buf_len, const char *prefix, } static void -read_arena_bin_mutex_stats(unsigned arena_ind, unsigned bin_ind, - uint64_t results[mutex_prof_num_counters]) { +mutex_stats_init_cols(emitter_row_t *row, const char *table_name, + emitter_col_t *name, + emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters], + emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) { + mutex_prof_uint64_t_counter_ind_t k_uint64_t = 0; + mutex_prof_uint32_t_counter_ind_t k_uint32_t = 0; + + emitter_col_t *col; + + if (name != NULL) { + emitter_col_init(name, row); + name->justify = emitter_justify_left; + name->width = 21; + name->type = emitter_type_title; + name->str_val = table_name; + } + +#define WIDTH_uint32_t 12 +#define WIDTH_uint64_t 16 +#define OP(counter, counter_type, human) \ + col = &col_##counter_type[k_##counter_type]; \ + ++k_##counter_type; \ + emitter_col_init(col, row); \ + col->justify = emitter_justify_right; \ + col->width = WIDTH_##counter_type; \ + col->type = emitter_type_title; \ + col->str_val = human; + MUTEX_PROF_COUNTERS +#undef OP +#undef WIDTH_uint32_t +#undef WIDTH_uint64_t +} + +static void +mutex_stats_read_global(const char *name, emitter_col_t *col_name, + emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters], + emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) { + char cmd[MUTEX_CTL_STR_MAX_LENGTH]; + + col_name->str_val = name; + + emitter_col_t *dst; +#define EMITTER_TYPE_uint32_t emitter_type_uint32 +#define EMITTER_TYPE_uint64_t emitter_type_uint64 +#define OP(counter, counter_type, human) \ + dst = &col_##counter_type[mutex_counter_##counter]; \ + dst->type = EMITTER_TYPE_##counter_type; \ + gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH, \ + "mutexes", name, #counter); \ + CTL_GET(cmd, (counter_type *)&dst->bool_val, counter_type); + MUTEX_PROF_COUNTERS +#undef OP +#undef EMITTER_TYPE_uint32_t +#undef EMITTER_TYPE_uint64_t +} + +static void +mutex_stats_read_arena(unsigned arena_ind, mutex_prof_arena_ind_t mutex_ind, + const char *name, emitter_col_t *col_name, + emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters], + emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) { + char cmd[MUTEX_CTL_STR_MAX_LENGTH]; + + col_name->str_val = name; + + emitter_col_t *dst; +#define EMITTER_TYPE_uint32_t emitter_type_uint32 +#define EMITTER_TYPE_uint64_t emitter_type_uint64 +#define OP(counter, counter_type, human) \ + dst = &col_##counter_type[mutex_counter_##counter]; \ + dst->type = EMITTER_TYPE_##counter_type; \ + gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH, \ + "arenas.0.mutexes", arena_mutex_names[mutex_ind], #counter);\ + CTL_M2_GET(cmd, arena_ind, \ + (counter_type *)&dst->bool_val, counter_type); + MUTEX_PROF_COUNTERS +#undef OP +#undef EMITTER_TYPE_uint32_t +#undef EMITTER_TYPE_uint64_t +} + +static void +mutex_stats_read_arena_bin(unsigned arena_ind, unsigned bin_ind, + emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters], + emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) { char cmd[MUTEX_CTL_STR_MAX_LENGTH]; -#define OP(c, t) \ - gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH, \ - "arenas.0.bins.0","mutex", #c); \ - CTL_M2_M4_GET(cmd, arena_ind, bin_ind, \ - (t *)&results[mutex_counter_##c], t); -MUTEX_PROF_COUNTERS + emitter_col_t *dst; + +#define EMITTER_TYPE_uint32_t emitter_type_uint32 +#define EMITTER_TYPE_uint64_t emitter_type_uint64 +#define OP(counter, counter_type, human) \ + dst = &col_##counter_type[mutex_counter_##counter]; \ + dst->type = EMITTER_TYPE_##counter_type; \ + gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH, \ + "arenas.0.bins.0","mutex", #counter); \ + CTL_M2_M4_GET(cmd, arena_ind, bin_ind, \ + (counter_type *)&dst->bool_val, counter_type); + MUTEX_PROF_COUNTERS #undef OP +#undef EMITTER_TYPE_uint32_t +#undef EMITTER_TYPE_uint64_t } +/* "row" can be NULL to avoid emitting in table mode. */ static void -mutex_stats_output_json(void (*write_cb)(void *, const char *), void *cbopaque, - const char *name, uint64_t stats[mutex_prof_num_counters], - const char *json_indent, bool last) { - malloc_cprintf(write_cb, cbopaque, "%s\"%s\": {\n", json_indent, name); - - mutex_prof_counter_ind_t k = 0; - char *fmt_str[2] = {"%s\t\"%s\": %"FMTu32"%s\n", - "%s\t\"%s\": %"FMTu64"%s\n"}; -#define OP(c, t) \ - malloc_cprintf(write_cb, cbopaque, \ - fmt_str[sizeof(t) / sizeof(uint32_t) - 1], \ - json_indent, #c, (t)stats[mutex_counter_##c], \ - (++k == mutex_prof_num_counters) ? "" : ","); -MUTEX_PROF_COUNTERS +mutex_stats_emit(emitter_t *emitter, emitter_row_t *row, + emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters], + emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) { + if (row != NULL) { + emitter_table_row(emitter, row); + } + + mutex_prof_uint64_t_counter_ind_t k_uint64_t = 0; + mutex_prof_uint32_t_counter_ind_t k_uint32_t = 0; + + emitter_col_t *col; + +#define EMITTER_TYPE_uint32_t emitter_type_uint32 +#define EMITTER_TYPE_uint64_t emitter_type_uint64 +#define OP(counter, type, human) \ + col = &col_##type[k_##type]; \ + ++k_##type; \ + emitter_json_kv(emitter, #counter, EMITTER_TYPE_##type, \ + (const void *)&col->bool_val); + MUTEX_PROF_COUNTERS; #undef OP - malloc_cprintf(write_cb, cbopaque, "%s}%s\n", json_indent, - last ? "" : ","); +#undef EMITTER_TYPE_uint32_t +#undef EMITTER_TYPE_uint64_t } static void -stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, - bool json, bool large, bool mutex, unsigned i) { +stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i) { size_t page; bool in_gap, in_gap_prev; unsigned nbins, j; @@ -126,18 +224,71 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("arenas.page", &page, size_t); CTL_GET("arenas.nbins", &nbins, unsigned); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"bins\": [\n"); - } else { - char *mutex_counters = " n_lock_ops n_waiting" - " n_spin_acq total_wait_ns max_wait_ns\n"; - malloc_cprintf(write_cb, cbopaque, - "bins: size ind allocated nmalloc" - " ndalloc nrequests curregs curslabs regs" - " pgs util nfills nflushes newslabs" - " reslabs%s", mutex ? mutex_counters : "\n"); + + emitter_row_t header_row; + emitter_row_init(&header_row); + + emitter_row_t row; + emitter_row_init(&row); +#define COL(name, left_or_right, col_width, etype) \ + emitter_col_t col_##name; \ + emitter_col_init(&col_##name, &row); \ + col_##name.justify = emitter_justify_##left_or_right; \ + col_##name.width = col_width; \ + col_##name.type = emitter_type_##etype; \ + emitter_col_t header_col_##name; \ + emitter_col_init(&header_col_##name, &header_row); \ + header_col_##name.justify = emitter_justify_##left_or_right; \ + header_col_##name.width = col_width; \ + header_col_##name.type = emitter_type_title; \ + header_col_##name.str_val = #name; + + COL(size, right, 20, size) + COL(ind, right, 4, unsigned) + COL(allocated, right, 13, uint64) + COL(nmalloc, right, 13, uint64) + COL(ndalloc, right, 13, uint64) + COL(nrequests, right, 13, uint64) + COL(curregs, right, 13, size) + COL(curslabs, right, 13, size) + COL(regs, right, 5, unsigned) + COL(pgs, right, 4, size) + /* To buffer a right- and left-justified column. */ + COL(justify_spacer, right, 1, title) + COL(util, right, 6, title) + COL(nfills, right, 13, uint64) + COL(nflushes, right, 13, uint64) + COL(nslabs, right, 13, uint64) + COL(nreslabs, right, 13, uint64) +#undef COL + + /* Don't want to actually print the name. */ + header_col_justify_spacer.str_val = " "; + col_justify_spacer.str_val = " "; + + + emitter_col_t col_mutex64[mutex_prof_num_uint64_t_counters]; + emitter_col_t col_mutex32[mutex_prof_num_uint32_t_counters]; + + emitter_col_t header_mutex64[mutex_prof_num_uint64_t_counters]; + emitter_col_t header_mutex32[mutex_prof_num_uint32_t_counters]; + + if (mutex) { + mutex_stats_init_cols(&row, NULL, NULL, col_mutex64, + col_mutex32); + mutex_stats_init_cols(&header_row, NULL, NULL, header_mutex64, + header_mutex32); } + + /* + * We print a "bins:" header as part of the table row; we need to adjust + * the header size column to compensate. + */ + header_col_size.width -=5; + emitter_table_printf(emitter, "bins:"); + emitter_table_row(emitter, &header_row); + emitter_json_arr_begin(emitter, "bins"); + for (j = 0, in_gap = false; j < nbins; j++) { uint64_t nslabs; size_t reg_size, slab_size, curregs; @@ -151,8 +302,8 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, in_gap_prev = in_gap; in_gap = (nslabs == 0); - if (!json && in_gap_prev && !in_gap) { - malloc_cprintf(write_cb, cbopaque, + if (in_gap_prev && !in_gap) { + emitter_table_printf(emitter, " ---\n"); } @@ -177,105 +328,127 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_M2_M4_GET("stats.arenas.0.bins.0.curslabs", i, j, &curslabs, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t{\n" - "\t\t\t\t\t\t\"nmalloc\": %"FMTu64",\n" - "\t\t\t\t\t\t\"ndalloc\": %"FMTu64",\n" - "\t\t\t\t\t\t\"curregs\": %zu,\n" - "\t\t\t\t\t\t\"nrequests\": %"FMTu64",\n" - "\t\t\t\t\t\t\"nfills\": %"FMTu64",\n" - "\t\t\t\t\t\t\"nflushes\": %"FMTu64",\n" - "\t\t\t\t\t\t\"nreslabs\": %"FMTu64",\n" - "\t\t\t\t\t\t\"curslabs\": %zu%s\n", - nmalloc, ndalloc, curregs, nrequests, nfills, - nflushes, nreslabs, curslabs, mutex ? "," : ""); - if (mutex) { - uint64_t mutex_stats[mutex_prof_num_counters]; - read_arena_bin_mutex_stats(i, j, mutex_stats); - mutex_stats_output_json(write_cb, cbopaque, - "mutex", mutex_stats, "\t\t\t\t\t\t", true); - } - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t}%s\n", - (j + 1 < nbins) ? "," : ""); - } else if (!in_gap) { - size_t availregs = nregs * curslabs; - char util[6]; - if (get_rate_str((uint64_t)curregs, (uint64_t)availregs, - util)) { - if (availregs == 0) { - malloc_snprintf(util, sizeof(util), - "1"); - } else if (curregs > availregs) { - /* - * Race detected: the counters were read - * in separate mallctl calls and - * concurrent operations happened in - * between. In this case no meaningful - * utilization can be computed. - */ - malloc_snprintf(util, sizeof(util), - " race"); - } else { - not_reached(); - } - } - uint64_t mutex_stats[mutex_prof_num_counters]; - if (mutex) { - read_arena_bin_mutex_stats(i, j, mutex_stats); - } + if (mutex) { + mutex_stats_read_arena_bin(i, j, col_mutex64, + col_mutex32); + } + + emitter_json_arr_obj_begin(emitter); + emitter_json_kv(emitter, "nmalloc", emitter_type_uint64, + &nmalloc); + emitter_json_kv(emitter, "ndalloc", emitter_type_uint64, + &ndalloc); + emitter_json_kv(emitter, "curregs", emitter_type_size, + &curregs); + emitter_json_kv(emitter, "nrequests", emitter_type_uint64, + &nrequests); + emitter_json_kv(emitter, "nfills", emitter_type_uint64, + &nfills); + emitter_json_kv(emitter, "nflushes", emitter_type_uint64, + &nflushes); + emitter_json_kv(emitter, "nreslabs", emitter_type_uint64, + &nreslabs); + emitter_json_kv(emitter, "curslabs", emitter_type_size, + &curslabs); + if (mutex) { + emitter_json_dict_begin(emitter, "mutex"); + mutex_stats_emit(emitter, NULL, col_mutex64, + col_mutex32); + emitter_json_dict_end(emitter); + } + emitter_json_arr_obj_end(emitter); - malloc_cprintf(write_cb, cbopaque, "%20zu %3u %12zu %12" - FMTu64" %12"FMTu64" %12"FMTu64" %12zu %12zu %4u" - " %3zu %-5s %12"FMTu64" %12"FMTu64" %12"FMTu64 - " %12"FMTu64, reg_size, j, curregs * reg_size, - nmalloc, ndalloc, nrequests, curregs, curslabs, - nregs, slab_size / page, util, nfills, nflushes, - nslabs, nreslabs); - - /* Output less info for bin mutexes to save space. */ - if (mutex) { - malloc_cprintf(write_cb, cbopaque, - " %12"FMTu64" %12"FMTu64" %12"FMTu64 - " %14"FMTu64" %12"FMTu64"\n", - mutex_stats[mutex_counter_num_ops], - mutex_stats[mutex_counter_num_wait], - mutex_stats[mutex_counter_num_spin_acq], - mutex_stats[mutex_counter_total_wait_time], - mutex_stats[mutex_counter_max_wait_time]); + size_t availregs = nregs * curslabs; + char util[6]; + if (get_rate_str((uint64_t)curregs, (uint64_t)availregs, util)) + { + if (availregs == 0) { + malloc_snprintf(util, sizeof(util), "1"); + } else if (curregs > availregs) { + /* + * Race detected: the counters were read in + * separate mallctl calls and concurrent + * operations happened in between. In this case + * no meaningful utilization can be computed. + */ + malloc_snprintf(util, sizeof(util), " race"); } else { - malloc_cprintf(write_cb, cbopaque, "\n"); + not_reached(); } } + + col_size.size_val = reg_size; + col_ind.unsigned_val = j; + col_allocated.size_val = curregs * reg_size; + col_nmalloc.uint64_val = nmalloc; + col_ndalloc.uint64_val = ndalloc; + col_nrequests.uint64_val = nrequests; + col_curregs.size_val = curregs; + col_curslabs.size_val = curslabs; + col_regs.unsigned_val = nregs; + col_pgs.size_val = slab_size / page; + col_util.str_val = util; + col_nfills.uint64_val = nfills; + col_nflushes.uint64_val = nflushes; + col_nslabs.uint64_val = nslabs; + col_nreslabs.uint64_val = nreslabs; + + /* + * Note that mutex columns were initialized above, if mutex == + * true. + */ + + emitter_table_row(emitter, &row); } - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t]%s\n", large ? "," : ""); - } else { - if (in_gap) { - malloc_cprintf(write_cb, cbopaque, - " ---\n"); - } + emitter_json_arr_end(emitter); /* Close "bins". */ + + if (in_gap) { + emitter_table_printf(emitter, " ---\n"); } } static void -stats_arena_lextents_print(void (*write_cb)(void *, const char *), - void *cbopaque, bool json, unsigned i) { +stats_arena_lextents_print(emitter_t *emitter, unsigned i) { unsigned nbins, nlextents, j; bool in_gap, in_gap_prev; CTL_GET("arenas.nbins", &nbins, unsigned); CTL_GET("arenas.nlextents", &nlextents, unsigned); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"lextents\": [\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "large: size ind allocated nmalloc" - " ndalloc nrequests curlextents\n"); - } + + emitter_row_t header_row; + emitter_row_init(&header_row); + emitter_row_t row; + emitter_row_init(&row); + +#define COL(name, left_or_right, col_width, etype) \ + emitter_col_t header_##name; \ + emitter_col_init(&header_##name, &header_row); \ + header_##name.justify = emitter_justify_##left_or_right; \ + header_##name.width = col_width; \ + header_##name.type = emitter_type_title; \ + header_##name.str_val = #name; \ + \ + emitter_col_t col_##name; \ + emitter_col_init(&col_##name, &row); \ + col_##name.justify = emitter_justify_##left_or_right; \ + col_##name.width = col_width; \ + col_##name.type = emitter_type_##etype; + + COL(size, right, 20, size) + COL(ind, right, 4, unsigned) + COL(allocated, right, 13, size) + COL(nmalloc, right, 13, uint64) + COL(ndalloc, right, 13, uint64) + COL(nrequests, right, 13, uint64) + COL(curlextents, right, 13, size) +#undef COL + + /* As with bins, we label the large extents table. */ + header_size.width -= 6; + emitter_table_printf(emitter, "large:"); + emitter_table_row(emitter, &header_row); + emitter_json_arr_begin(emitter, "lextents"); + for (j = 0, in_gap = false; j < nlextents; j++) { uint64_t nmalloc, ndalloc, nrequests; size_t lextent_size, curlextents; @@ -289,119 +462,71 @@ stats_arena_lextents_print(void (*write_cb)(void *, const char *), in_gap_prev = in_gap; in_gap = (nrequests == 0); - if (!json && in_gap_prev && !in_gap) { - malloc_cprintf(write_cb, cbopaque, + if (in_gap_prev && !in_gap) { + emitter_table_printf(emitter, " ---\n"); } CTL_M2_GET("arenas.lextent.0.size", j, &lextent_size, size_t); CTL_M2_M4_GET("stats.arenas.0.lextents.0.curlextents", i, j, &curlextents, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t{\n" - "\t\t\t\t\t\t\"curlextents\": %zu\n" - "\t\t\t\t\t}%s\n", - curlextents, - (j + 1 < nlextents) ? "," : ""); - } else if (!in_gap) { - malloc_cprintf(write_cb, cbopaque, - "%20zu %3u %12zu %12"FMTu64" %12"FMTu64 - " %12"FMTu64" %12zu\n", - lextent_size, nbins + j, - curlextents * lextent_size, nmalloc, ndalloc, - nrequests, curlextents); - } - } - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t]\n"); - } else { - if (in_gap) { - malloc_cprintf(write_cb, cbopaque, - " ---\n"); - } - } -} -static void -read_arena_mutex_stats(unsigned arena_ind, - uint64_t results[mutex_prof_num_arena_mutexes][mutex_prof_num_counters]) { - char cmd[MUTEX_CTL_STR_MAX_LENGTH]; - - mutex_prof_arena_ind_t i; - for (i = 0; i < mutex_prof_num_arena_mutexes; i++) { -#define OP(c, t) \ - gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH, \ - "arenas.0.mutexes", arena_mutex_names[i], #c); \ - CTL_M2_GET(cmd, arena_ind, \ - (t *)&results[i][mutex_counter_##c], t); -MUTEX_PROF_COUNTERS -#undef OP + emitter_json_arr_obj_begin(emitter); + emitter_json_kv(emitter, "curlextents", emitter_type_size, + &curlextents); + emitter_json_arr_obj_end(emitter); + + col_size.size_val = lextent_size; + col_ind.unsigned_val = nbins + j; + col_allocated.size_val = curlextents * lextent_size; + col_nmalloc.uint64_val = nmalloc; + col_ndalloc.uint64_val = ndalloc; + col_nrequests.uint64_val = nrequests; + col_curlextents.size_val = curlextents; + + if (!in_gap) { + emitter_table_row(emitter, &row); + } } -} - -static void -mutex_stats_output(void (*write_cb)(void *, const char *), void *cbopaque, - const char *name, uint64_t stats[mutex_prof_num_counters], - bool first_mutex) { - if (first_mutex) { - /* Print title. */ - malloc_cprintf(write_cb, cbopaque, - " n_lock_ops n_waiting" - " n_spin_acq n_owner_switch total_wait_ns" - " max_wait_ns max_n_thds\n"); + emitter_json_arr_end(emitter); /* Close "lextents". */ + if (in_gap) { + emitter_table_printf(emitter, " ---\n"); } - - malloc_cprintf(write_cb, cbopaque, "%s", name); - malloc_cprintf(write_cb, cbopaque, ":%*c", - (int)(20 - strlen(name)), ' '); - - char *fmt_str[2] = {"%12"FMTu32, "%16"FMTu64}; -#define OP(c, t) \ - malloc_cprintf(write_cb, cbopaque, \ - fmt_str[sizeof(t) / sizeof(uint32_t) - 1], \ - (t)stats[mutex_counter_##c]); -MUTEX_PROF_COUNTERS -#undef OP - malloc_cprintf(write_cb, cbopaque, "\n"); } static void -stats_arena_mutexes_print(void (*write_cb)(void *, const char *), - void *cbopaque, bool json, bool json_end, unsigned arena_ind) { - uint64_t mutex_stats[mutex_prof_num_arena_mutexes][mutex_prof_num_counters]; - read_arena_mutex_stats(arena_ind, mutex_stats); - - /* Output mutex stats. */ - if (json) { - malloc_cprintf(write_cb, cbopaque, "\t\t\t\t\"mutexes\": {\n"); - mutex_prof_arena_ind_t i, last_mutex; - last_mutex = mutex_prof_num_arena_mutexes - 1; - for (i = 0; i < mutex_prof_num_arena_mutexes; i++) { - mutex_stats_output_json(write_cb, cbopaque, - arena_mutex_names[i], mutex_stats[i], - "\t\t\t\t\t", (i == last_mutex)); - } - malloc_cprintf(write_cb, cbopaque, "\t\t\t\t}%s\n", - json_end ? "" : ","); - } else { - mutex_prof_arena_ind_t i; - for (i = 0; i < mutex_prof_num_arena_mutexes; i++) { - mutex_stats_output(write_cb, cbopaque, - arena_mutex_names[i], mutex_stats[i], i == 0); - } +stats_arena_mutexes_print(emitter_t *emitter, unsigned arena_ind) { + emitter_row_t row; + emitter_col_t col_name; + emitter_col_t col64[mutex_prof_num_uint64_t_counters]; + emitter_col_t col32[mutex_prof_num_uint32_t_counters]; + + emitter_row_init(&row); + mutex_stats_init_cols(&row, "", &col_name, col64, col32); + + emitter_json_dict_begin(emitter, "mutexes"); + emitter_table_row(emitter, &row); + + for (mutex_prof_arena_ind_t i = 0; i < mutex_prof_num_arena_mutexes; + i++) { + const char *name = arena_mutex_names[i]; + emitter_json_dict_begin(emitter, name); + mutex_stats_read_arena(arena_ind, i, name, &col_name, col64, + col32); + mutex_stats_emit(emitter, &row, col64, col32); + emitter_json_dict_end(emitter); /* Close the mutex dict. */ } + emitter_json_dict_end(emitter); /* End "mutexes". */ } static void -stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, - bool json, unsigned i, bool bins, bool large, bool mutex) { +stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large, + bool mutex) { unsigned nthreads; const char *dss; ssize_t dirty_decay_ms, muzzy_decay_ms; size_t page, pactive, pdirty, pmuzzy, mapped, retained; - size_t base, internal, resident; + size_t base, internal, resident, metadata_thp; uint64_t dirty_npurge, dirty_nmadvise, dirty_purged; uint64_t muzzy_npurge, muzzy_nmadvise, muzzy_purged; size_t small_allocated; @@ -414,31 +539,16 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("arenas.page", &page, size_t); CTL_M2_GET("stats.arenas.0.nthreads", i, &nthreads, unsigned); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"nthreads\": %u,\n", nthreads); - } else { - malloc_cprintf(write_cb, cbopaque, - "assigned threads: %u\n", nthreads); - } + emitter_kv(emitter, "nthreads", "assigned threads", + emitter_type_unsigned, &nthreads); CTL_M2_GET("stats.arenas.0.uptime", i, &uptime, uint64_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"uptime_ns\": %"FMTu64",\n", uptime); - } else { - malloc_cprintf(write_cb, cbopaque, - "uptime: %"FMTu64"\n", uptime); - } + emitter_kv(emitter, "uptime_ns", "uptime", emitter_type_uint64, + &uptime); CTL_M2_GET("stats.arenas.0.dss", i, &dss, const char *); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"dss\": \"%s\",\n", dss); - } else { - malloc_cprintf(write_cb, cbopaque, - "dss allocation precedence: %s\n", dss); - } + emitter_kv(emitter, "dss", "dss allocation precedence", + emitter_type_string, &dss); CTL_M2_GET("stats.arenas.0.dirty_decay_ms", i, &dirty_decay_ms, ssize_t); @@ -455,205 +565,271 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_M2_GET("stats.arenas.0.muzzy_nmadvise", i, &muzzy_nmadvise, uint64_t); CTL_M2_GET("stats.arenas.0.muzzy_purged", i, &muzzy_purged, uint64_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"dirty_decay_ms\": %zd,\n", dirty_decay_ms); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"muzzy_decay_ms\": %zd,\n", muzzy_decay_ms); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"pactive\": %zu,\n", pactive); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"pdirty\": %zu,\n", pdirty); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"pmuzzy\": %zu,\n", pmuzzy); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"dirty_npurge\": %"FMTu64",\n", dirty_npurge); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"dirty_nmadvise\": %"FMTu64",\n", dirty_nmadvise); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"dirty_purged\": %"FMTu64",\n", dirty_purged); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"muzzy_npurge\": %"FMTu64",\n", muzzy_npurge); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"muzzy_nmadvise\": %"FMTu64",\n", muzzy_nmadvise); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"muzzy_purged\": %"FMTu64",\n", muzzy_purged); - } else { - malloc_cprintf(write_cb, cbopaque, - "decaying: time npages sweeps madvises" - " purged\n"); - if (dirty_decay_ms >= 0) { - malloc_cprintf(write_cb, cbopaque, - " dirty: %5zd %12zu %12"FMTu64" %12"FMTu64" %12" - FMTu64"\n", dirty_decay_ms, pdirty, dirty_npurge, - dirty_nmadvise, dirty_purged); - } else { - malloc_cprintf(write_cb, cbopaque, - " dirty: N/A %12zu %12"FMTu64" %12"FMTu64" %12" - FMTu64"\n", pdirty, dirty_npurge, dirty_nmadvise, - dirty_purged); - } - if (muzzy_decay_ms >= 0) { - malloc_cprintf(write_cb, cbopaque, - " muzzy: %5zd %12zu %12"FMTu64" %12"FMTu64" %12" - FMTu64"\n", muzzy_decay_ms, pmuzzy, muzzy_npurge, - muzzy_nmadvise, muzzy_purged); - } else { - malloc_cprintf(write_cb, cbopaque, - " muzzy: N/A %12zu %12"FMTu64" %12"FMTu64" %12" - FMTu64"\n", pmuzzy, muzzy_npurge, muzzy_nmadvise, - muzzy_purged); - } - } - CTL_M2_GET("stats.arenas.0.small.allocated", i, &small_allocated, - size_t); - CTL_M2_GET("stats.arenas.0.small.nmalloc", i, &small_nmalloc, uint64_t); - CTL_M2_GET("stats.arenas.0.small.ndalloc", i, &small_ndalloc, uint64_t); - CTL_M2_GET("stats.arenas.0.small.nrequests", i, &small_nrequests, - uint64_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"small\": {\n"); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"allocated\": %zu,\n", small_allocated); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"nmalloc\": %"FMTu64",\n", small_nmalloc); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"ndalloc\": %"FMTu64",\n", small_ndalloc); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"nrequests\": %"FMTu64"\n", small_nrequests); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t},\n"); + emitter_row_t decay_row; + emitter_row_init(&decay_row); + + /* JSON-style emission. */ + emitter_json_kv(emitter, "dirty_decay_ms", emitter_type_ssize, + &dirty_decay_ms); + emitter_json_kv(emitter, "muzzy_decay_ms", emitter_type_ssize, + &muzzy_decay_ms); + + emitter_json_kv(emitter, "pactive", emitter_type_size, &pactive); + emitter_json_kv(emitter, "pdirty", emitter_type_size, &pdirty); + emitter_json_kv(emitter, "pmuzzy", emitter_type_size, &pmuzzy); + + emitter_json_kv(emitter, "dirty_npurge", emitter_type_uint64, + &dirty_npurge); + emitter_json_kv(emitter, "dirty_nmadvise", emitter_type_uint64, + &dirty_nmadvise); + emitter_json_kv(emitter, "dirty_purged", emitter_type_uint64, + &dirty_purged); + + emitter_json_kv(emitter, "muzzy_npurge", emitter_type_uint64, + &muzzy_npurge); + emitter_json_kv(emitter, "muzzy_nmadvise", emitter_type_uint64, + &muzzy_nmadvise); + emitter_json_kv(emitter, "muzzy_purged", emitter_type_uint64, + &muzzy_purged); + + /* Table-style emission. */ + emitter_col_t decay_type; + emitter_col_init(&decay_type, &decay_row); + decay_type.justify = emitter_justify_right; + decay_type.width = 9; + decay_type.type = emitter_type_title; + decay_type.str_val = "decaying:"; + + emitter_col_t decay_time; + emitter_col_init(&decay_time, &decay_row); + decay_time.justify = emitter_justify_right; + decay_time.width = 6; + decay_time.type = emitter_type_title; + decay_time.str_val = "time"; + + emitter_col_t decay_npages; + emitter_col_init(&decay_npages, &decay_row); + decay_npages.justify = emitter_justify_right; + decay_npages.width = 13; + decay_npages.type = emitter_type_title; + decay_npages.str_val = "npages"; + + emitter_col_t decay_sweeps; + emitter_col_init(&decay_sweeps, &decay_row); + decay_sweeps.justify = emitter_justify_right; + decay_sweeps.width = 13; + decay_sweeps.type = emitter_type_title; + decay_sweeps.str_val = "sweeps"; + + emitter_col_t decay_madvises; + emitter_col_init(&decay_madvises, &decay_row); + decay_madvises.justify = emitter_justify_right; + decay_madvises.width = 13; + decay_madvises.type = emitter_type_title; + decay_madvises.str_val = "madvises"; + + emitter_col_t decay_purged; + emitter_col_init(&decay_purged, &decay_row); + decay_purged.justify = emitter_justify_right; + decay_purged.width = 13; + decay_purged.type = emitter_type_title; + decay_purged.str_val = "purged"; + + /* Title row. */ + emitter_table_row(emitter, &decay_row); + + /* Dirty row. */ + decay_type.str_val = "dirty:"; + + if (dirty_decay_ms >= 0) { + decay_time.type = emitter_type_ssize; + decay_time.ssize_val = dirty_decay_ms; } else { - malloc_cprintf(write_cb, cbopaque, - " allocated nmalloc" - " ndalloc nrequests\n"); - malloc_cprintf(write_cb, cbopaque, - "small: %12zu %12"FMTu64" %12"FMTu64 - " %12"FMTu64"\n", - small_allocated, small_nmalloc, small_ndalloc, - small_nrequests); + decay_time.type = emitter_type_title; + decay_time.str_val = "N/A"; } - CTL_M2_GET("stats.arenas.0.large.allocated", i, &large_allocated, - size_t); - CTL_M2_GET("stats.arenas.0.large.nmalloc", i, &large_nmalloc, uint64_t); - CTL_M2_GET("stats.arenas.0.large.ndalloc", i, &large_ndalloc, uint64_t); - CTL_M2_GET("stats.arenas.0.large.nrequests", i, &large_nrequests, - uint64_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"large\": {\n"); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"allocated\": %zu,\n", large_allocated); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"nmalloc\": %"FMTu64",\n", large_nmalloc); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"ndalloc\": %"FMTu64",\n", large_ndalloc); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"nrequests\": %"FMTu64"\n", large_nrequests); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t},\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "large: %12zu %12"FMTu64" %12"FMTu64 - " %12"FMTu64"\n", - large_allocated, large_nmalloc, large_ndalloc, - large_nrequests); - malloc_cprintf(write_cb, cbopaque, - "total: %12zu %12"FMTu64" %12"FMTu64 - " %12"FMTu64"\n", - small_allocated + large_allocated, small_nmalloc + - large_nmalloc, small_ndalloc + large_ndalloc, - small_nrequests + large_nrequests); - } - if (!json) { - malloc_cprintf(write_cb, cbopaque, - "active: %12zu\n", pactive * page); - } + decay_npages.type = emitter_type_size; + decay_npages.size_val = pdirty; - CTL_M2_GET("stats.arenas.0.mapped", i, &mapped, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"mapped\": %zu,\n", mapped); - } else { - malloc_cprintf(write_cb, cbopaque, - "mapped: %12zu\n", mapped); - } + decay_sweeps.type = emitter_type_uint64; + decay_sweeps.uint64_val = dirty_npurge; - CTL_M2_GET("stats.arenas.0.retained", i, &retained, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"retained\": %zu,\n", retained); - } else { - malloc_cprintf(write_cb, cbopaque, - "retained: %12zu\n", retained); - } + decay_madvises.type = emitter_type_uint64; + decay_madvises.uint64_val = dirty_nmadvise; - CTL_M2_GET("stats.arenas.0.base", i, &base, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"base\": %zu,\n", base); - } else { - malloc_cprintf(write_cb, cbopaque, - "base: %12zu\n", base); - } + decay_purged.type = emitter_type_uint64; + decay_purged.uint64_val = dirty_purged; - CTL_M2_GET("stats.arenas.0.internal", i, &internal, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"internal\": %zu,\n", internal); - } else { - malloc_cprintf(write_cb, cbopaque, - "internal: %12zu\n", internal); - } + emitter_table_row(emitter, &decay_row); - CTL_M2_GET("stats.arenas.0.tcache_bytes", i, &tcache_bytes, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"tcache\": %zu,\n", tcache_bytes); - } else { - malloc_cprintf(write_cb, cbopaque, - "tcache: %12zu\n", tcache_bytes); - } + /* Muzzy row. */ + decay_type.str_val = "muzzy:"; - CTL_M2_GET("stats.arenas.0.resident", i, &resident, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"resident\": %zu%s\n", resident, - (bins || large || mutex) ? "," : ""); + if (muzzy_decay_ms >= 0) { + decay_time.type = emitter_type_ssize; + decay_time.ssize_val = muzzy_decay_ms; } else { - malloc_cprintf(write_cb, cbopaque, - "resident: %12zu\n", resident); + decay_time.type = emitter_type_title; + decay_time.str_val = "N/A"; } + decay_npages.type = emitter_type_size; + decay_npages.size_val = pmuzzy; + + decay_sweeps.type = emitter_type_uint64; + decay_sweeps.uint64_val = muzzy_npurge; + + decay_madvises.type = emitter_type_uint64; + decay_madvises.uint64_val = muzzy_nmadvise; + + decay_purged.type = emitter_type_uint64; + decay_purged.uint64_val = muzzy_purged; + + emitter_table_row(emitter, &decay_row); + + /* Small / large / total allocation counts. */ + emitter_row_t alloc_count_row; + emitter_row_init(&alloc_count_row); + + emitter_col_t alloc_count_title; + emitter_col_init(&alloc_count_title, &alloc_count_row); + alloc_count_title.justify = emitter_justify_left; + alloc_count_title.width = 25; + alloc_count_title.type = emitter_type_title; + alloc_count_title.str_val = ""; + + emitter_col_t alloc_count_allocated; + emitter_col_init(&alloc_count_allocated, &alloc_count_row); + alloc_count_allocated.justify = emitter_justify_right; + alloc_count_allocated.width = 12; + alloc_count_allocated.type = emitter_type_title; + alloc_count_allocated.str_val = "allocated"; + + emitter_col_t alloc_count_nmalloc; + emitter_col_init(&alloc_count_nmalloc, &alloc_count_row); + alloc_count_nmalloc.justify = emitter_justify_right; + alloc_count_nmalloc.width = 12; + alloc_count_nmalloc.type = emitter_type_title; + alloc_count_nmalloc.str_val = "nmalloc"; + + emitter_col_t alloc_count_ndalloc; + emitter_col_init(&alloc_count_ndalloc, &alloc_count_row); + alloc_count_ndalloc.justify = emitter_justify_right; + alloc_count_ndalloc.width = 12; + alloc_count_ndalloc.type = emitter_type_title; + alloc_count_ndalloc.str_val = "ndalloc"; + + emitter_col_t alloc_count_nrequests; + emitter_col_init(&alloc_count_nrequests, &alloc_count_row); + alloc_count_nrequests.justify = emitter_justify_right; + alloc_count_nrequests.width = 12; + alloc_count_nrequests.type = emitter_type_title; + alloc_count_nrequests.str_val = "nrequests"; + + emitter_table_row(emitter, &alloc_count_row); + +#define GET_AND_EMIT_ALLOC_STAT(small_or_large, name, valtype) \ + CTL_M2_GET("stats.arenas.0." #small_or_large "." #name, i, \ + &small_or_large##_##name, valtype##_t); \ + emitter_json_kv(emitter, #name, emitter_type_##valtype, \ + &small_or_large##_##name); \ + alloc_count_##name.type = emitter_type_##valtype; \ + alloc_count_##name.valtype##_val = small_or_large##_##name; + + emitter_json_dict_begin(emitter, "small"); + alloc_count_title.str_val = "small:"; + + GET_AND_EMIT_ALLOC_STAT(small, allocated, size) + GET_AND_EMIT_ALLOC_STAT(small, nmalloc, uint64) + GET_AND_EMIT_ALLOC_STAT(small, ndalloc, uint64) + GET_AND_EMIT_ALLOC_STAT(small, nrequests, uint64) + + emitter_table_row(emitter, &alloc_count_row); + emitter_json_dict_end(emitter); /* Close "small". */ + + emitter_json_dict_begin(emitter, "large"); + alloc_count_title.str_val = "large:"; + + GET_AND_EMIT_ALLOC_STAT(large, allocated, size) + GET_AND_EMIT_ALLOC_STAT(large, nmalloc, uint64) + GET_AND_EMIT_ALLOC_STAT(large, ndalloc, uint64) + GET_AND_EMIT_ALLOC_STAT(large, nrequests, uint64) + + emitter_table_row(emitter, &alloc_count_row); + emitter_json_dict_end(emitter); /* Close "large". */ + +#undef GET_AND_EMIT_ALLOC_STAT + + /* Aggregated small + large stats are emitter only in table mode. */ + alloc_count_title.str_val = "total:"; + alloc_count_allocated.size_val = small_allocated + large_allocated; + alloc_count_nmalloc.uint64_val = small_nmalloc + large_nmalloc; + alloc_count_ndalloc.uint64_val = small_ndalloc + large_ndalloc; + alloc_count_nrequests.uint64_val = small_nrequests + large_nrequests; + emitter_table_row(emitter, &alloc_count_row); + + emitter_row_t mem_count_row; + emitter_row_init(&mem_count_row); + + emitter_col_t mem_count_title; + emitter_col_init(&mem_count_title, &mem_count_row); + mem_count_title.justify = emitter_justify_left; + mem_count_title.width = 25; + mem_count_title.type = emitter_type_title; + mem_count_title.str_val = ""; + + emitter_col_t mem_count_val; + emitter_col_init(&mem_count_val, &mem_count_row); + mem_count_val.justify = emitter_justify_right; + mem_count_val.width = 12; + mem_count_val.type = emitter_type_title; + mem_count_val.str_val = ""; + + emitter_table_row(emitter, &mem_count_row); + mem_count_val.type = emitter_type_size; + + /* Active count in bytes is emitted only in table mode. */ + mem_count_title.str_val = "active:"; + mem_count_val.size_val = pactive * page; + emitter_table_row(emitter, &mem_count_row); + +#define GET_AND_EMIT_MEM_STAT(stat) \ + CTL_M2_GET("stats.arenas.0."#stat, i, &stat, size_t); \ + emitter_json_kv(emitter, #stat, emitter_type_size, &stat); \ + mem_count_title.str_val = #stat":"; \ + mem_count_val.size_val = stat; \ + emitter_table_row(emitter, &mem_count_row); + + GET_AND_EMIT_MEM_STAT(mapped) + GET_AND_EMIT_MEM_STAT(retained) + GET_AND_EMIT_MEM_STAT(base) + GET_AND_EMIT_MEM_STAT(internal) + GET_AND_EMIT_MEM_STAT(metadata_thp) + GET_AND_EMIT_MEM_STAT(tcache_bytes) + GET_AND_EMIT_MEM_STAT(resident) +#undef GET_AND_EMIT_MEM_STAT + if (mutex) { - stats_arena_mutexes_print(write_cb, cbopaque, json, - !(bins || large), i); + stats_arena_mutexes_print(emitter, i); } if (bins) { - stats_arena_bins_print(write_cb, cbopaque, json, large, mutex, - i); + stats_arena_bins_print(emitter, mutex, i); } if (large) { - stats_arena_lextents_print(write_cb, cbopaque, json, i); + stats_arena_lextents_print(emitter, i); } } static void -stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque, - bool json, bool more) { +stats_general_print(emitter_t *emitter) { const char *cpv; - bool bv; + bool bv, bv2; unsigned uv; uint32_t u32v; uint64_t u64v; - ssize_t ssv; + ssize_t ssv, ssv2; size_t sv, bsz, usz, ssz, sssz, cpsz; bsz = sizeof(bool); @@ -663,365 +839,248 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque, cpsz = sizeof(const char *); CTL_GET("version", &cpv, const char *); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\"version\": \"%s\",\n", cpv); - } else { - malloc_cprintf(write_cb, cbopaque, "Version: %s\n", cpv); - } + emitter_kv(emitter, "version", "Version", emitter_type_string, &cpv); /* config. */ -#define CONFIG_WRITE_BOOL_JSON(n, c) \ - if (json) { \ - CTL_GET("config."#n, &bv, bool); \ - malloc_cprintf(write_cb, cbopaque, \ - "\t\t\t\""#n"\": %s%s\n", bv ? "true" : "false", \ - (c)); \ + emitter_dict_begin(emitter, "config", "Build-time option settings"); +#define CONFIG_WRITE_BOOL(name) \ + do { \ + CTL_GET("config."#name, &bv, bool); \ + emitter_kv(emitter, #name, "config."#name, \ + emitter_type_bool, &bv); \ + } while (0) + + CONFIG_WRITE_BOOL(cache_oblivious); + CONFIG_WRITE_BOOL(debug); + CONFIG_WRITE_BOOL(fill); + CONFIG_WRITE_BOOL(lazy_lock); + emitter_kv(emitter, "malloc_conf", "config.malloc_conf", + emitter_type_string, &config_malloc_conf); + + CONFIG_WRITE_BOOL(prof); + CONFIG_WRITE_BOOL(prof_libgcc); + CONFIG_WRITE_BOOL(prof_libunwind); + CONFIG_WRITE_BOOL(stats); + CONFIG_WRITE_BOOL(utrace); + CONFIG_WRITE_BOOL(xmalloc); +#undef CONFIG_WRITE_BOOL + emitter_dict_end(emitter); /* Close "config" dict. */ + + /* opt. */ +#define OPT_WRITE(name, var, size, emitter_type) \ + if (je_mallctl("opt."name, (void *)&var, &size, NULL, 0) == \ + 0) { \ + emitter_kv(emitter, name, "opt."name, emitter_type, \ + &var); \ } - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\"config\": {\n"); +#define OPT_WRITE_MUTABLE(name, var1, var2, size, emitter_type, \ + altname) \ + if (je_mallctl("opt."name, (void *)&var1, &size, NULL, 0) == \ + 0 && je_mallctl(altname, (void *)&var2, &size, NULL, 0) \ + == 0) { \ + emitter_kv_note(emitter, name, "opt."name, \ + emitter_type, &var1, altname, emitter_type, \ + &var2); \ } - CONFIG_WRITE_BOOL_JSON(cache_oblivious, ",") +#define OPT_WRITE_BOOL(name) OPT_WRITE(name, bv, bsz, emitter_type_bool) +#define OPT_WRITE_BOOL_MUTABLE(name, altname) \ + OPT_WRITE_MUTABLE(name, bv, bv2, bsz, emitter_type_bool, altname) + +#define OPT_WRITE_UNSIGNED(name) \ + OPT_WRITE(name, uv, usz, emitter_type_unsigned) + +#define OPT_WRITE_SSIZE_T(name) \ + OPT_WRITE(name, ssv, sssz, emitter_type_ssize) +#define OPT_WRITE_SSIZE_T_MUTABLE(name, altname) \ + OPT_WRITE_MUTABLE(name, ssv, ssv2, sssz, emitter_type_ssize, \ + altname) + +#define OPT_WRITE_CHAR_P(name) \ + OPT_WRITE(name, cpv, cpsz, emitter_type_string) + + emitter_dict_begin(emitter, "opt", "Run-time option settings"); + + OPT_WRITE_BOOL("abort") + OPT_WRITE_BOOL("abort_conf") + OPT_WRITE_BOOL("retain") + OPT_WRITE_CHAR_P("dss") + OPT_WRITE_UNSIGNED("narenas") + OPT_WRITE_CHAR_P("percpu_arena") + OPT_WRITE_CHAR_P("metadata_thp") + OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread") + OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms") + OPT_WRITE_SSIZE_T_MUTABLE("muzzy_decay_ms", "arenas.muzzy_decay_ms") + OPT_WRITE_UNSIGNED("lg_extent_max_active_fit") + OPT_WRITE_CHAR_P("junk") + OPT_WRITE_BOOL("zero") + OPT_WRITE_BOOL("utrace") + OPT_WRITE_BOOL("xmalloc") + OPT_WRITE_BOOL("tcache") + OPT_WRITE_SSIZE_T("lg_tcache_max") + OPT_WRITE_CHAR_P("thp") + OPT_WRITE_BOOL("prof") + OPT_WRITE_CHAR_P("prof_prefix") + OPT_WRITE_BOOL_MUTABLE("prof_active", "prof.active") + OPT_WRITE_BOOL_MUTABLE("prof_thread_active_init", + "prof.thread_active_init") + OPT_WRITE_SSIZE_T_MUTABLE("lg_prof_sample", "prof.lg_sample") + OPT_WRITE_BOOL("prof_accum") + OPT_WRITE_SSIZE_T("lg_prof_interval") + OPT_WRITE_BOOL("prof_gdump") + OPT_WRITE_BOOL("prof_final") + OPT_WRITE_BOOL("prof_leak") + OPT_WRITE_BOOL("stats_print") + OPT_WRITE_CHAR_P("stats_print_opts") + + emitter_dict_end(emitter); + +#undef OPT_WRITE +#undef OPT_WRITE_MUTABLE +#undef OPT_WRITE_BOOL +#undef OPT_WRITE_BOOL_MUTABLE +#undef OPT_WRITE_UNSIGNED +#undef OPT_WRITE_SSIZE_T +#undef OPT_WRITE_SSIZE_T_MUTABLE +#undef OPT_WRITE_CHAR_P + + /* prof. */ + if (config_prof) { + emitter_dict_begin(emitter, "prof", "Profiling settings"); - CTL_GET("config.debug", &bv, bool); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"debug\": %s,\n", bv ? "true" : "false"); - } else { - malloc_cprintf(write_cb, cbopaque, "Assertions %s\n", - bv ? "enabled" : "disabled"); - } + CTL_GET("prof.thread_active_init", &bv, bool); + emitter_kv(emitter, "thread_active_init", + "prof.thread_active_init", emitter_type_bool, &bv); - CONFIG_WRITE_BOOL_JSON(fill, ",") - CONFIG_WRITE_BOOL_JSON(lazy_lock, ",") + CTL_GET("prof.active", &bv, bool); + emitter_kv(emitter, "active", "prof.active", emitter_type_bool, + &bv); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"malloc_conf\": \"%s\",\n", - config_malloc_conf); - } else { - malloc_cprintf(write_cb, cbopaque, - "config.malloc_conf: \"%s\"\n", config_malloc_conf); - } + CTL_GET("prof.gdump", &bv, bool); + emitter_kv(emitter, "gdump", "prof.gdump", emitter_type_bool, + &bv); - CONFIG_WRITE_BOOL_JSON(prof, ",") - CONFIG_WRITE_BOOL_JSON(prof_libgcc, ",") - CONFIG_WRITE_BOOL_JSON(prof_libunwind, ",") - CONFIG_WRITE_BOOL_JSON(stats, ",") - CONFIG_WRITE_BOOL_JSON(thp, ",") - CONFIG_WRITE_BOOL_JSON(utrace, ",") - CONFIG_WRITE_BOOL_JSON(xmalloc, "") - - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t},\n"); - } -#undef CONFIG_WRITE_BOOL_JSON + CTL_GET("prof.interval", &u64v, uint64_t); + emitter_kv(emitter, "interval", "prof.interval", + emitter_type_uint64, &u64v); - /* opt. */ -#define OPT_WRITE_BOOL(n, c) \ - if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) == 0) { \ - if (json) { \ - malloc_cprintf(write_cb, cbopaque, \ - "\t\t\t\""#n"\": %s%s\n", bv ? "true" : \ - "false", (c)); \ - } else { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %s\n", bv ? "true" : "false"); \ - } \ - } -#define OPT_WRITE_BOOL_MUTABLE(n, m, c) { \ - bool bv2; \ - if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) == 0 && \ - je_mallctl(#m, (void *)&bv2, &bsz, NULL, 0) == 0) { \ - if (json) { \ - malloc_cprintf(write_cb, cbopaque, \ - "\t\t\t\""#n"\": %s%s\n", bv ? "true" : \ - "false", (c)); \ - } else { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %s ("#m": %s)\n", bv ? "true" \ - : "false", bv2 ? "true" : "false"); \ - } \ - } \ -} -#define OPT_WRITE_UNSIGNED(n, c) \ - if (je_mallctl("opt."#n, (void *)&uv, &usz, NULL, 0) == 0) { \ - if (json) { \ - malloc_cprintf(write_cb, cbopaque, \ - "\t\t\t\""#n"\": %u%s\n", uv, (c)); \ - } else { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %u\n", uv); \ - } \ - } -#define OPT_WRITE_SSIZE_T(n, c) \ - if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0) == 0) { \ - if (json) { \ - malloc_cprintf(write_cb, cbopaque, \ - "\t\t\t\""#n"\": %zd%s\n", ssv, (c)); \ - } else { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %zd\n", ssv); \ - } \ - } -#define OPT_WRITE_SSIZE_T_MUTABLE(n, m, c) { \ - ssize_t ssv2; \ - if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0) == 0 && \ - je_mallctl(#m, (void *)&ssv2, &sssz, NULL, 0) == 0) { \ - if (json) { \ - malloc_cprintf(write_cb, cbopaque, \ - "\t\t\t\""#n"\": %zd%s\n", ssv, (c)); \ - } else { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %zd ("#m": %zd)\n", \ - ssv, ssv2); \ - } \ - } \ -} -#define OPT_WRITE_CHAR_P(n, c) \ - if (je_mallctl("opt."#n, (void *)&cpv, &cpsz, NULL, 0) == 0) { \ - if (json) { \ - malloc_cprintf(write_cb, cbopaque, \ - "\t\t\t\""#n"\": \"%s\"%s\n", cpv, (c)); \ - } else { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": \"%s\"\n", cpv); \ - } \ - } + CTL_GET("prof.lg_sample", &ssv, ssize_t); + emitter_kv(emitter, "lg_sample", "prof.lg_sample", + emitter_type_ssize, &ssv); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\"opt\": {\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "Run-time option settings:\n"); - } - OPT_WRITE_BOOL(abort, ",") - OPT_WRITE_BOOL(abort_conf, ",") - OPT_WRITE_BOOL(retain, ",") - OPT_WRITE_CHAR_P(dss, ",") - OPT_WRITE_UNSIGNED(narenas, ",") - OPT_WRITE_CHAR_P(percpu_arena, ",") - OPT_WRITE_BOOL_MUTABLE(background_thread, background_thread, ",") - OPT_WRITE_SSIZE_T_MUTABLE(dirty_decay_ms, arenas.dirty_decay_ms, ",") - OPT_WRITE_SSIZE_T_MUTABLE(muzzy_decay_ms, arenas.muzzy_decay_ms, ",") - OPT_WRITE_CHAR_P(junk, ",") - OPT_WRITE_BOOL(zero, ",") - OPT_WRITE_BOOL(utrace, ",") - OPT_WRITE_BOOL(xmalloc, ",") - OPT_WRITE_BOOL(tcache, ",") - OPT_WRITE_SSIZE_T(lg_tcache_max, ",") - OPT_WRITE_BOOL(prof, ",") - OPT_WRITE_CHAR_P(prof_prefix, ",") - OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active, ",") - OPT_WRITE_BOOL_MUTABLE(prof_thread_active_init, prof.thread_active_init, - ",") - OPT_WRITE_SSIZE_T_MUTABLE(lg_prof_sample, prof.lg_sample, ",") - OPT_WRITE_BOOL(prof_accum, ",") - OPT_WRITE_SSIZE_T(lg_prof_interval, ",") - OPT_WRITE_BOOL(prof_gdump, ",") - OPT_WRITE_BOOL(prof_final, ",") - OPT_WRITE_BOOL(prof_leak, ",") - OPT_WRITE_BOOL(stats_print, ",") - if (json || opt_stats_print) { - /* - * stats_print_opts is always emitted for JSON, so as long as it - * comes last it's safe to unconditionally omit the comma here - * (rather than having to conditionally omit it elsewhere - * depending on configuration). - */ - OPT_WRITE_CHAR_P(stats_print_opts, "") - } - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t},\n"); + emitter_dict_end(emitter); /* Close "prof". */ } -#undef OPT_WRITE_BOOL -#undef OPT_WRITE_BOOL_MUTABLE -#undef OPT_WRITE_SSIZE_T -#undef OPT_WRITE_CHAR_P - /* arenas. */ - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\"arenas\": {\n"); - } + /* + * The json output sticks arena info into an "arenas" dict; the table + * output puts them at the top-level. + */ + emitter_json_dict_begin(emitter, "arenas"); CTL_GET("arenas.narenas", &uv, unsigned); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"narenas\": %u,\n", uv); - } else { - malloc_cprintf(write_cb, cbopaque, "Arenas: %u\n", uv); - } + emitter_kv(emitter, "narenas", "Arenas", emitter_type_unsigned, &uv); - if (json) { - CTL_GET("arenas.dirty_decay_ms", &ssv, ssize_t); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"dirty_decay_ms\": %zd,\n", ssv); + /* + * Decay settings are emitted only in json mode; in table mode, they're + * emitted as notes with the opt output, above. + */ + CTL_GET("arenas.dirty_decay_ms", &ssv, ssize_t); + emitter_json_kv(emitter, "dirty_decay_ms", emitter_type_ssize, &ssv); - CTL_GET("arenas.muzzy_decay_ms", &ssv, ssize_t); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"muzzy_decay_ms\": %zd,\n", ssv); - } + CTL_GET("arenas.muzzy_decay_ms", &ssv, ssize_t); + emitter_json_kv(emitter, "muzzy_decay_ms", emitter_type_ssize, &ssv); CTL_GET("arenas.quantum", &sv, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"quantum\": %zu,\n", sv); - } else { - malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n", sv); - } + emitter_kv(emitter, "quantum", "Quantum size", emitter_type_size, &sv); CTL_GET("arenas.page", &sv, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"page\": %zu,\n", sv); - } else { - malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv); - } + emitter_kv(emitter, "page", "Page size", emitter_type_size, &sv); if (je_mallctl("arenas.tcache_max", (void *)&sv, &ssz, NULL, 0) == 0) { - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"tcache_max\": %zu,\n", sv); - } else { - malloc_cprintf(write_cb, cbopaque, - "Maximum thread-cached size class: %zu\n", sv); - } + emitter_kv(emitter, "tcache_max", + "Maximum thread-cached size class", emitter_type_size, &sv); } - if (json) { - unsigned nbins, nlextents, i; - - CTL_GET("arenas.nbins", &nbins, unsigned); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"nbins\": %u,\n", nbins); + unsigned nbins; + CTL_GET("arenas.nbins", &nbins, unsigned); + emitter_kv(emitter, "nbins", "Number of bin size classes", + emitter_type_unsigned, &nbins); - CTL_GET("arenas.nhbins", &uv, unsigned); - malloc_cprintf(write_cb, cbopaque, "\t\t\t\"nhbins\": %u,\n", - uv); + unsigned nhbins; + CTL_GET("arenas.nhbins", &nhbins, unsigned); + emitter_kv(emitter, "nhbins", "Number of thread-cache bin size classes", + emitter_type_unsigned, &nhbins); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"bin\": [\n"); - for (i = 0; i < nbins; i++) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t{\n"); + /* + * We do enough mallctls in a loop that we actually want to omit them + * (not just omit the printing). + */ + if (emitter->output == emitter_output_json) { + emitter_json_arr_begin(emitter, "bin"); + for (unsigned i = 0; i < nbins; i++) { + emitter_json_arr_obj_begin(emitter); CTL_M2_GET("arenas.bin.0.size", i, &sv, size_t); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"size\": %zu,\n", sv); + emitter_json_kv(emitter, "size", emitter_type_size, + &sv); CTL_M2_GET("arenas.bin.0.nregs", i, &u32v, uint32_t); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"nregs\": %"FMTu32",\n", u32v); + emitter_json_kv(emitter, "nregs", emitter_type_uint32, + &u32v); CTL_M2_GET("arenas.bin.0.slab_size", i, &sv, size_t); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"slab_size\": %zu\n", sv); + emitter_json_kv(emitter, "slab_size", emitter_type_size, + &sv); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t}%s\n", (i + 1 < nbins) ? "," : ""); + emitter_json_arr_obj_end(emitter); } - malloc_cprintf(write_cb, cbopaque, - "\t\t\t],\n"); + emitter_json_arr_end(emitter); /* Close "bin". */ + } - CTL_GET("arenas.nlextents", &nlextents, unsigned); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"nlextents\": %u,\n", nlextents); + unsigned nlextents; + CTL_GET("arenas.nlextents", &nlextents, unsigned); + emitter_kv(emitter, "nlextents", "Number of large size classes", + emitter_type_unsigned, &nlextents); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"lextent\": [\n"); - for (i = 0; i < nlextents; i++) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t{\n"); + if (emitter->output == emitter_output_json) { + emitter_json_arr_begin(emitter, "lextent"); + for (unsigned i = 0; i < nlextents; i++) { + emitter_json_arr_obj_begin(emitter); CTL_M2_GET("arenas.lextent.0.size", i, &sv, size_t); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"size\": %zu\n", sv); + emitter_json_kv(emitter, "size", emitter_type_size, + &sv); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t}%s\n", (i + 1 < nlextents) ? "," : ""); + emitter_json_arr_obj_end(emitter); } - malloc_cprintf(write_cb, cbopaque, - "\t\t\t]\n"); - - malloc_cprintf(write_cb, cbopaque, - "\t\t}%s\n", (config_prof || more) ? "," : ""); - } - - /* prof. */ - if (config_prof && json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\"prof\": {\n"); - - CTL_GET("prof.thread_active_init", &bv, bool); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"thread_active_init\": %s,\n", bv ? "true" : - "false"); - - CTL_GET("prof.active", &bv, bool); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"active\": %s,\n", bv ? "true" : "false"); - - CTL_GET("prof.gdump", &bv, bool); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"gdump\": %s,\n", bv ? "true" : "false"); - - CTL_GET("prof.interval", &u64v, uint64_t); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"interval\": %"FMTu64",\n", u64v); - - CTL_GET("prof.lg_sample", &ssv, ssize_t); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"lg_sample\": %zd\n", ssv); - - malloc_cprintf(write_cb, cbopaque, - "\t\t}%s\n", more ? "," : ""); + emitter_json_arr_end(emitter); /* Close "lextent". */ } -} - -static void -read_global_mutex_stats( - uint64_t results[mutex_prof_num_global_mutexes][mutex_prof_num_counters]) { - char cmd[MUTEX_CTL_STR_MAX_LENGTH]; - mutex_prof_global_ind_t i; - for (i = 0; i < mutex_prof_num_global_mutexes; i++) { -#define OP(c, t) \ - gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH, \ - "mutexes", global_mutex_names[i], #c); \ - CTL_GET(cmd, (t *)&results[i][mutex_counter_##c], t); -MUTEX_PROF_COUNTERS -#undef OP - } + emitter_json_dict_end(emitter); /* Close "arenas" */ } static void -stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque, - bool json, bool merged, bool destroyed, bool unmerged, bool bins, - bool large, bool mutex) { - size_t allocated, active, metadata, resident, mapped, retained; +stats_print_helper(emitter_t *emitter, bool merged, bool destroyed, + bool unmerged, bool bins, bool large, bool mutex) { + /* + * These should be deleted. We keep them around for a while, to aid in + * the transition to the emitter code. + */ + size_t allocated, active, metadata, metadata_thp, resident, mapped, + retained; size_t num_background_threads; uint64_t background_thread_num_runs, background_thread_run_interval; CTL_GET("stats.allocated", &allocated, size_t); CTL_GET("stats.active", &active, size_t); CTL_GET("stats.metadata", &metadata, size_t); + CTL_GET("stats.metadata_thp", &metadata_thp, size_t); CTL_GET("stats.resident", &resident, size_t); CTL_GET("stats.mapped", &mapped, size_t); CTL_GET("stats.retained", &retained, size_t); - uint64_t mutex_stats[mutex_prof_num_global_mutexes][mutex_prof_num_counters]; - if (mutex) { - read_global_mutex_stats(mutex_stats); - } - if (have_background_thread) { CTL_GET("stats.background_thread.num_threads", &num_background_threads, size_t); @@ -1035,182 +1094,130 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque, background_thread_run_interval = 0; } - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\"stats\": {\n"); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"allocated\": %zu,\n", allocated); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"active\": %zu,\n", active); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"metadata\": %zu,\n", metadata); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"resident\": %zu,\n", resident); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"mapped\": %zu,\n", mapped); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"retained\": %zu,\n", retained); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"background_thread\": {\n"); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"num_threads\": %zu,\n", num_background_threads); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"num_runs\": %"FMTu64",\n", - background_thread_num_runs); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"run_interval\": %"FMTu64"\n", - background_thread_run_interval); - malloc_cprintf(write_cb, cbopaque, "\t\t\t}%s\n", - mutex ? "," : ""); + /* Generic global stats. */ + emitter_json_dict_begin(emitter, "stats"); + emitter_json_kv(emitter, "allocated", emitter_type_size, &allocated); + emitter_json_kv(emitter, "active", emitter_type_size, &active); + emitter_json_kv(emitter, "metadata", emitter_type_size, &metadata); + emitter_json_kv(emitter, "metadata_thp", emitter_type_size, + &metadata_thp); + emitter_json_kv(emitter, "resident", emitter_type_size, &resident); + emitter_json_kv(emitter, "mapped", emitter_type_size, &mapped); + emitter_json_kv(emitter, "retained", emitter_type_size, &retained); + + emitter_table_printf(emitter, "Allocated: %zu, active: %zu, " + "metadata: %zu (n_thp %zu), resident: %zu, mapped: %zu, " + "retained: %zu\n", allocated, active, metadata, metadata_thp, + resident, mapped, retained); + + /* Background thread stats. */ + emitter_json_dict_begin(emitter, "background_thread"); + emitter_json_kv(emitter, "num_threads", emitter_type_size, + &num_background_threads); + emitter_json_kv(emitter, "num_runs", emitter_type_uint64, + &background_thread_num_runs); + emitter_json_kv(emitter, "run_interval", emitter_type_uint64, + &background_thread_run_interval); + emitter_json_dict_end(emitter); /* Close "background_thread". */ + + emitter_table_printf(emitter, "Background threads: %zu, " + "num_runs: %"FMTu64", run_interval: %"FMTu64" ns\n", + num_background_threads, background_thread_num_runs, + background_thread_run_interval); - if (mutex) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"mutexes\": {\n"); - mutex_prof_global_ind_t i; - for (i = 0; i < mutex_prof_num_global_mutexes; i++) { - mutex_stats_output_json(write_cb, cbopaque, - global_mutex_names[i], mutex_stats[i], - "\t\t\t\t", - i == mutex_prof_num_global_mutexes - 1); - } - malloc_cprintf(write_cb, cbopaque, "\t\t\t}\n"); - } - malloc_cprintf(write_cb, cbopaque, - "\t\t}%s\n", (merged || unmerged || destroyed) ? "," : ""); - } else { - malloc_cprintf(write_cb, cbopaque, - "Allocated: %zu, active: %zu, metadata: %zu," - " resident: %zu, mapped: %zu, retained: %zu\n", - allocated, active, metadata, resident, mapped, retained); - - if (have_background_thread && num_background_threads > 0) { - malloc_cprintf(write_cb, cbopaque, - "Background threads: %zu, num_runs: %"FMTu64", " - "run_interval: %"FMTu64" ns\n", - num_background_threads, - background_thread_num_runs, - background_thread_run_interval); - } - if (mutex) { - mutex_prof_global_ind_t i; - for (i = 0; i < mutex_prof_num_global_mutexes; i++) { - mutex_stats_output(write_cb, cbopaque, - global_mutex_names[i], mutex_stats[i], - i == 0); - } + if (mutex) { + emitter_row_t row; + emitter_col_t name; + emitter_col_t col64[mutex_prof_num_uint64_t_counters]; + emitter_col_t col32[mutex_prof_num_uint32_t_counters]; + + emitter_row_init(&row); + mutex_stats_init_cols(&row, "", &name, col64, col32); + + emitter_table_row(emitter, &row); + emitter_json_dict_begin(emitter, "mutexes"); + + for (int i = 0; i < mutex_prof_num_global_mutexes; i++) { + mutex_stats_read_global(global_mutex_names[i], &name, + col64, col32); + emitter_json_dict_begin(emitter, global_mutex_names[i]); + mutex_stats_emit(emitter, &row, col64, col32); + emitter_json_dict_end(emitter); } + + emitter_json_dict_end(emitter); /* Close "mutexes". */ } + emitter_json_dict_end(emitter); /* Close "stats". */ + if (merged || destroyed || unmerged) { unsigned narenas; - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\"stats.arenas\": {\n"); - } + emitter_json_dict_begin(emitter, "stats.arenas"); CTL_GET("arenas.narenas", &narenas, unsigned); - { - size_t mib[3]; - size_t miblen = sizeof(mib) / sizeof(size_t); - size_t sz; - VARIABLE_ARRAY(bool, initialized, narenas); - bool destroyed_initialized; - unsigned i, j, ninitialized; - - xmallctlnametomib("arena.0.initialized", mib, &miblen); - for (i = ninitialized = 0; i < narenas; i++) { - mib[1] = i; - sz = sizeof(bool); - xmallctlbymib(mib, miblen, &initialized[i], &sz, - NULL, 0); - if (initialized[i]) { - ninitialized++; - } - } - mib[1] = MALLCTL_ARENAS_DESTROYED; + size_t mib[3]; + size_t miblen = sizeof(mib) / sizeof(size_t); + size_t sz; + VARIABLE_ARRAY(bool, initialized, narenas); + bool destroyed_initialized; + unsigned i, j, ninitialized; + + xmallctlnametomib("arena.0.initialized", mib, &miblen); + for (i = ninitialized = 0; i < narenas; i++) { + mib[1] = i; sz = sizeof(bool); - xmallctlbymib(mib, miblen, &destroyed_initialized, &sz, + xmallctlbymib(mib, miblen, &initialized[i], &sz, NULL, 0); - - /* Merged stats. */ - if (merged && (ninitialized > 1 || !unmerged)) { - /* Print merged arena stats. */ - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"merged\": {\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "\nMerged arenas stats:\n"); - } - stats_arena_print(write_cb, cbopaque, json, - MALLCTL_ARENAS_ALL, bins, large, mutex); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t}%s\n", - ((destroyed_initialized && - destroyed) || unmerged) ? "," : - ""); - } + if (initialized[i]) { + ninitialized++; } + } + mib[1] = MALLCTL_ARENAS_DESTROYED; + sz = sizeof(bool); + xmallctlbymib(mib, miblen, &destroyed_initialized, &sz, + NULL, 0); + + /* Merged stats. */ + if (merged && (ninitialized > 1 || !unmerged)) { + /* Print merged arena stats. */ + emitter_table_printf(emitter, "Merged arenas stats:\n"); + emitter_json_dict_begin(emitter, "merged"); + stats_arena_print(emitter, MALLCTL_ARENAS_ALL, bins, + large, mutex); + emitter_json_dict_end(emitter); /* Close "merged". */ + } - /* Destroyed stats. */ - if (destroyed_initialized && destroyed) { - /* Print destroyed arena stats. */ - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"destroyed\": {\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "\nDestroyed arenas stats:\n"); - } - stats_arena_print(write_cb, cbopaque, json, - MALLCTL_ARENAS_DESTROYED, bins, large, - mutex); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t}%s\n", unmerged ? "," : - ""); - } - } + /* Destroyed stats. */ + if (destroyed_initialized && destroyed) { + /* Print destroyed arena stats. */ + emitter_table_printf(emitter, + "Destroyed arenas stats:\n"); + emitter_json_dict_begin(emitter, "destroyed"); + stats_arena_print(emitter, MALLCTL_ARENAS_DESTROYED, + bins, large, mutex); + emitter_json_dict_end(emitter); /* Close "destroyed". */ + } - /* Unmerged stats. */ - if (unmerged) { - for (i = j = 0; i < narenas; i++) { - if (initialized[i]) { - if (json) { - j++; - malloc_cprintf(write_cb, - cbopaque, - "\t\t\t\"%u\": {\n", - i); - } else { - malloc_cprintf(write_cb, - cbopaque, - "\narenas[%u]:\n", - i); - } - stats_arena_print(write_cb, - cbopaque, json, i, bins, - large, mutex); - if (json) { - malloc_cprintf(write_cb, - cbopaque, - "\t\t\t}%s\n", (j < - ninitialized) ? "," - : ""); - } - } + /* Unmerged stats. */ + if (unmerged) { + for (i = j = 0; i < narenas; i++) { + if (initialized[i]) { + char arena_ind_str[20]; + malloc_snprintf(arena_ind_str, + sizeof(arena_ind_str), "%u", i); + emitter_json_dict_begin(emitter, + arena_ind_str); + emitter_table_printf(emitter, + "arenas[%s]:\n", arena_ind_str); + stats_arena_print(emitter, i, bins, + large, mutex); + /* Close "<arena-ind>". */ + emitter_json_dict_end(emitter); } } } - - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t}\n"); - } + emitter_json_dict_end(emitter); /* Close "stats.arenas". */ } } @@ -1257,29 +1264,23 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, } } - if (json) { - malloc_cprintf(write_cb, cbopaque, - "{\n" - "\t\"jemalloc\": {\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "___ Begin jemalloc statistics ___\n"); - } + emitter_t emitter; + emitter_init(&emitter, + json ? emitter_output_json : emitter_output_table, write_cb, + cbopaque); + emitter_begin(&emitter); + emitter_table_printf(&emitter, "___ Begin jemalloc statistics ___\n"); + emitter_json_dict_begin(&emitter, "jemalloc"); if (general) { - stats_general_print(write_cb, cbopaque, json, config_stats); + stats_general_print(&emitter); } if (config_stats) { - stats_print_helper(write_cb, cbopaque, json, merged, destroyed, - unmerged, bins, large, mutex); + stats_print_helper(&emitter, merged, destroyed, unmerged, + bins, large, mutex); } - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t}\n" - "}\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "--- End jemalloc statistics ---\n"); - } + emitter_json_dict_end(&emitter); /* Closes the "jemalloc" dict. */ + emitter_table_printf(&emitter, "--- End jemalloc statistics ---\n"); + emitter_end(&emitter); } diff --git a/dep/jemalloc/src/sz.c b/dep/jemalloc/src/sz.c index 0986615f711..9de77e45fff 100644 --- a/dep/jemalloc/src/sz.c +++ b/dep/jemalloc/src/sz.c @@ -26,7 +26,8 @@ const size_t sz_index2size_tab[NSIZES] = { JEMALLOC_ALIGNED(CACHELINE) const uint8_t sz_size2index_tab[] = { #if LG_TINY_MIN == 0 -#warning "Dangerous LG_TINY_MIN" +/* The div module doesn't support division by 1. */ +#error "Unsupported LG_TINY_MIN" #define S2B_0(i) i, #elif LG_TINY_MIN == 1 #warning "Dangerous LG_TINY_MIN" diff --git a/dep/jemalloc/src/tcache.c b/dep/jemalloc/src/tcache.c index 936ef3140d5..a769a6b17bc 100644 --- a/dep/jemalloc/src/tcache.c +++ b/dep/jemalloc/src/tcache.c @@ -12,7 +12,7 @@ bool opt_tcache = true; ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; -tcache_bin_info_t *tcache_bin_info; +cache_bin_info_t *tcache_bin_info; static unsigned stack_nelms; /* Total stack elms per tcache. */ unsigned nhbins; @@ -40,7 +40,7 @@ void tcache_event_hard(tsd_t *tsd, tcache_t *tcache) { szind_t binind = tcache->next_gc_bin; - tcache_bin_t *tbin; + cache_bin_t *tbin; if (binind < NBINS) { tbin = tcache_small_bin_get(tcache, binind); } else { @@ -58,7 +58,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) { * Reduce fill count by 2X. Limit lg_fill_div such that * the fill count is always at least 1. */ - tcache_bin_info_t *tbin_info = &tcache_bin_info[binind]; + cache_bin_info_t *tbin_info = &tcache_bin_info[binind]; if ((tbin_info->ncached_max >> (tcache->lg_fill_div[binind] + 1)) >= 1) { tcache->lg_fill_div[binind]++; @@ -86,7 +86,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) { void * tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, - tcache_bin_t *tbin, szind_t binind, bool *tcache_success) { + cache_bin_t *tbin, szind_t binind, bool *tcache_success) { void *ret; assert(tcache->arena != NULL); @@ -95,18 +95,18 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, if (config_prof) { tcache->prof_accumbytes = 0; } - ret = tcache_alloc_easy(tbin, tcache_success); + ret = cache_bin_alloc_easy(tbin, tcache_success); return ret; } void -tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, +tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t binind, unsigned rem) { bool merged_stats = false; assert(binind < NBINS); - assert(rem <= tbin->ncached); + assert((cache_bin_sz_t)rem <= tbin->ncached); arena_t *arena = tcache->arena; assert(arena != NULL); @@ -121,7 +121,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, /* Lock the arena bin associated with the first object. */ extent_t *extent = item_extent[0]; arena_t *bin_arena = extent_arena_get(extent); - arena_bin_t *bin = &bin_arena->bins[binind]; + bin_t *bin = &bin_arena->bins[binind]; if (config_prof && bin_arena == arena) { if (arena_prof_accum(tsd_tsdn(tsd), arena, @@ -169,7 +169,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, * The flush loop didn't happen to flush to this thread's * arena, so the stats didn't get merged. Manually do so now. */ - arena_bin_t *bin = &arena->bins[binind]; + bin_t *bin = &arena->bins[binind]; malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); bin->stats.nflushes++; bin->stats.nrequests += tbin->tstats.nrequests; @@ -180,18 +180,18 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem * sizeof(void *)); tbin->ncached = rem; - if ((low_water_t)tbin->ncached < tbin->low_water) { + if (tbin->ncached < tbin->low_water) { tbin->low_water = tbin->ncached; } } void -tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, +tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind, unsigned rem, tcache_t *tcache) { bool merged_stats = false; assert(binind < nhbins); - assert(rem <= tbin->ncached); + assert((cache_bin_sz_t)rem <= tbin->ncached); arena_t *arena = tcache->arena; assert(arena != NULL); @@ -278,7 +278,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem * sizeof(void *)); tbin->ncached = rem; - if ((low_water_t)tbin->ncached < tbin->low_water) { + if (tbin->ncached < tbin->low_water) { tbin->low_water = tbin->ncached; } } @@ -291,8 +291,15 @@ tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) { if (config_stats) { /* Link into list of extant tcaches. */ malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx); + ql_elm_new(tcache, link); ql_tail_insert(&arena->tcache_ql, tcache, link); + cache_bin_array_descriptor_init( + &tcache->cache_bin_array_descriptor, tcache->bins_small, + tcache->bins_large); + ql_tail_insert(&arena->cache_bin_array_descriptor_ql, + &tcache->cache_bin_array_descriptor, link); + malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx); } } @@ -316,6 +323,8 @@ tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache) { assert(in_ql); } ql_remove(&arena->tcache_ql, tcache, link); + ql_remove(&arena->cache_bin_array_descriptor_ql, + &tcache->cache_bin_array_descriptor, link); tcache_stats_merge(tsdn, tcache, arena); malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx); } @@ -354,8 +363,8 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) { size_t stack_offset = 0; assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0); - memset(tcache->tbins_small, 0, sizeof(tcache_bin_t) * NBINS); - memset(tcache->tbins_large, 0, sizeof(tcache_bin_t) * (nhbins - NBINS)); + memset(tcache->bins_small, 0, sizeof(cache_bin_t) * NBINS); + memset(tcache->bins_large, 0, sizeof(cache_bin_t) * (nhbins - NBINS)); unsigned i = 0; for (; i < NBINS; i++) { tcache->lg_fill_div[i] = 1; @@ -450,7 +459,7 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) { assert(tcache->arena != NULL); for (unsigned i = 0; i < NBINS; i++) { - tcache_bin_t *tbin = tcache_small_bin_get(tcache, i); + cache_bin_t *tbin = tcache_small_bin_get(tcache, i); tcache_bin_flush_small(tsd, tcache, tbin, i, 0); if (config_stats) { @@ -458,7 +467,7 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) { } } for (unsigned i = NBINS; i < nhbins; i++) { - tcache_bin_t *tbin = tcache_large_bin_get(tcache, i); + cache_bin_t *tbin = tcache_large_bin_get(tcache, i); tcache_bin_flush_large(tsd, tbin, i, 0, tcache); if (config_stats) { @@ -524,8 +533,8 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) { /* Merge and reset tcache stats. */ for (i = 0; i < NBINS; i++) { - arena_bin_t *bin = &arena->bins[i]; - tcache_bin_t *tbin = tcache_small_bin_get(tcache, i); + bin_t *bin = &arena->bins[i]; + cache_bin_t *tbin = tcache_small_bin_get(tcache, i); malloc_mutex_lock(tsdn, &bin->lock); bin->stats.nrequests += tbin->tstats.nrequests; malloc_mutex_unlock(tsdn, &bin->lock); @@ -533,7 +542,7 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) { } for (; i < nhbins; i++) { - tcache_bin_t *tbin = tcache_large_bin_get(tcache, i); + cache_bin_t *tbin = tcache_large_bin_get(tcache, i); arena_stats_large_nrequests_add(tsdn, &arena->stats, i, tbin->tstats.nrequests); tbin->tstats.nrequests = 0; @@ -657,21 +666,21 @@ tcache_boot(tsdn_t *tsdn) { nhbins = sz_size2index(tcache_maxclass) + 1; /* Initialize tcache_bin_info. */ - tcache_bin_info = (tcache_bin_info_t *)base_alloc(tsdn, b0get(), nhbins - * sizeof(tcache_bin_info_t), CACHELINE); + tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, b0get(), nhbins + * sizeof(cache_bin_info_t), CACHELINE); if (tcache_bin_info == NULL) { return true; } stack_nelms = 0; unsigned i; for (i = 0; i < NBINS; i++) { - if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) { + if ((bin_infos[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) { tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_SMALL_MIN; - } else if ((arena_bin_info[i].nregs << 1) <= + } else if ((bin_infos[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) { tcache_bin_info[i].ncached_max = - (arena_bin_info[i].nregs << 1); + (bin_infos[i].nregs << 1); } else { tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_SMALL_MAX; diff --git a/dep/jemalloc/src/tsd.c b/dep/jemalloc/src/tsd.c index f968992f2b5..c1430682dd5 100644 --- a/dep/jemalloc/src/tsd.c +++ b/dep/jemalloc/src/tsd.c @@ -71,6 +71,16 @@ tsd_data_init(tsd_t *tsd) { */ rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd)); + /* + * A nondeterministic seed based on the address of tsd reduces + * the likelihood of lockstep non-uniform cache index + * utilization among identical concurrent processes, but at the + * cost of test repeatability. For debug builds, instead use a + * deterministic seed. + */ + *tsd_offset_statep_get(tsd) = config_debug ? 0 : + (uint64_t)(uintptr_t)tsd; + return tsd_tcache_enabled_data_init(tsd); } diff --git a/dep/jemalloc/src/zone.c b/dep/jemalloc/src/zone.c index 9d3b7b49522..23dfdd04a91 100644 --- a/dep/jemalloc/src/zone.c +++ b/dep/jemalloc/src/zone.c @@ -89,6 +89,7 @@ JEMALLOC_ATTR(weak_import); static malloc_zone_t *default_zone, *purgeable_zone; static malloc_zone_t jemalloc_zone; static struct malloc_introspection_t jemalloc_zone_introspect; +static pid_t zone_force_lock_pid = -1; /******************************************************************************/ /* Function prototypes for non-inline static functions. */ @@ -270,6 +271,12 @@ zone_log(malloc_zone_t *zone, void *address) { static void zone_force_lock(malloc_zone_t *zone) { if (isthreaded) { + /* + * See the note in zone_force_unlock, below, to see why we need + * this. + */ + assert(zone_force_lock_pid == -1); + zone_force_lock_pid = getpid(); jemalloc_prefork(); } } @@ -277,14 +284,25 @@ zone_force_lock(malloc_zone_t *zone) { static void zone_force_unlock(malloc_zone_t *zone) { /* - * Call jemalloc_postfork_child() rather than - * jemalloc_postfork_parent(), because this function is executed by both - * parent and child. The parent can tolerate having state - * reinitialized, but the child cannot unlock mutexes that were locked - * by the parent. + * zone_force_lock and zone_force_unlock are the entry points to the + * forking machinery on OS X. The tricky thing is, the child is not + * allowed to unlock mutexes locked in the parent, even if owned by the + * forking thread (and the mutex type we use in OS X will fail an assert + * if we try). In the child, we can get away with reinitializing all + * the mutexes, which has the effect of unlocking them. In the parent, + * doing this would mean we wouldn't wake any waiters blocked on the + * mutexes we unlock. So, we record the pid of the current thread in + * zone_force_lock, and use that to detect if we're in the parent or + * child here, to decide which unlock logic we need. */ if (isthreaded) { - jemalloc_postfork_child(); + assert(zone_force_lock_pid != -1); + if (getpid() == zone_force_lock_pid) { + jemalloc_postfork_parent(); + } else { + jemalloc_postfork_child(); + } + zone_force_lock_pid = -1; } } |