diff options
Diffstat (limited to 'deps')
103 files changed, 10880 insertions, 3862 deletions
diff --git a/deps/jemalloc/CMakeLists.txt b/deps/jemalloc/CMakeLists.txt index fa2a51c0ec..ec01256f4c 100644 --- a/deps/jemalloc/CMakeLists.txt +++ b/deps/jemalloc/CMakeLists.txt @@ -9,18 +9,18 @@ # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -if(CMAKE_SYSTEM_NAME MATCHES "Linux" AND SERVERS AND NOT NOJEM) +if(CMAKE_SYSTEM_NAME MATCHES "Linux" AND NOT NOJEM) # We need to generate the jemalloc_def.h header based on platform-specific settings CHECK_SYMBOL_EXISTS(MADV_FREE "sys/mman.h" HAVE_MADV_FREE) - + if (PLATFORM EQUAL 32) set(JEM_SIZEDEF 2) set(JEM_TLSMODEL) - set(JEM_VADDRBITS 32) + set(JEM_VADDRBITS 32) else() set(JEM_SIZEDEF 3) set(JEM_TLSMODEL "__attribute__\(\(tls_model\(\"initial-exec\"\)\)\)") - set(JEM_VADDRBITS 48) + set(JEM_VADDRBITS 48) endif() if (HAVE_MADV_FREE) @@ -29,6 +29,14 @@ if(CMAKE_SYSTEM_NAME MATCHES "Linux" AND SERVERS AND NOT NOJEM) set(JEM_MADFREE_DEF "#undef") endif() + if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") + set(JEM_CPU_SPINWAIT "") + set(JEM_HAVE_CPU_SPINWAIT 0) + else() + set(JEM_CPU_SPINWAIT "__asm__ volatile\(\"pause\"\)") + set(JEM_HAVE_CPU_SPINWAIT 1) + endif() + # Create the header, so we can use it configure_file( "${CMAKE_SOURCE_DIR}/deps/jemalloc/jemalloc_internal_defs.h.in.cmake" @@ -41,17 +49,20 @@ if(CMAKE_SYSTEM_NAME MATCHES "Linux" AND SERVERS AND NOT NOJEM) ${CMAKE_CURRENT_SOURCE_DIR}/src/arena.c ${CMAKE_CURRENT_SOURCE_DIR}/src/background_thread.c ${CMAKE_CURRENT_SOURCE_DIR}/src/base.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/bin.c ${CMAKE_CURRENT_SOURCE_DIR}/src/bitmap.c ${CMAKE_CURRENT_SOURCE_DIR}/src/ckh.c ${CMAKE_CURRENT_SOURCE_DIR}/src/ctl.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/div.c ${CMAKE_CURRENT_SOURCE_DIR}/src/extent.c ${CMAKE_CURRENT_SOURCE_DIR}/src/extent_dss.c ${CMAKE_CURRENT_SOURCE_DIR}/src/extent_mmap.c ${CMAKE_CURRENT_SOURCE_DIR}/src/hash.c - ${CMAKE_CURRENT_SOURCE_DIR}/src/hooks.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/hook.c ${CMAKE_CURRENT_SOURCE_DIR}/src/jemalloc.c ${CMAKE_CURRENT_SOURCE_DIR}/src/jemalloc_cpp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/large.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/log.c ${CMAKE_CURRENT_SOURCE_DIR}/src/malloc_io.c ${CMAKE_CURRENT_SOURCE_DIR}/src/mutex.c ${CMAKE_CURRENT_SOURCE_DIR}/src/mutex_pool.c @@ -60,17 +71,18 @@ if(CMAKE_SYSTEM_NAME MATCHES "Linux" AND SERVERS AND NOT NOJEM) ${CMAKE_CURRENT_SOURCE_DIR}/src/prng.c ${CMAKE_CURRENT_SOURCE_DIR}/src/prof.c ${CMAKE_CURRENT_SOURCE_DIR}/src/rtree.c - ${CMAKE_CURRENT_SOURCE_DIR}/src/spin.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/safety_check.c ${CMAKE_CURRENT_SOURCE_DIR}/src/stats.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/sc.c ${CMAKE_CURRENT_SOURCE_DIR}/src/sz.c ${CMAKE_CURRENT_SOURCE_DIR}/src/tcache.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/test_hooks.c ${CMAKE_CURRENT_SOURCE_DIR}/src/ticker.c ${CMAKE_CURRENT_SOURCE_DIR}/src/tsd.c ${CMAKE_CURRENT_SOURCE_DIR}/src/witness.c ) - add_library(jemalloc STATIC - ${jemalloc_STAT_SRC}) + add_library(jemalloc STATIC ${jemalloc_STAT_SRC}) target_include_directories(jemalloc PRIVATE @@ -89,17 +101,19 @@ if(CMAKE_SYSTEM_NAME MATCHES "Linux" AND SERVERS AND NOT NOJEM) acore-dependency-interface PUBLIC threads - ${CMAKE_DL_LIBS}) + ${CMAKE_DL_LIBS}) set_target_properties(jemalloc - PROPERTIES - FOLDER - "deps") + PROPERTIES + FOLDER + "deps") + else() # Provide a dummy target for jemalloc which is used when jemalloc # is disabled or not supported. add_library(jemalloc INTERFACE) - # target_link_libraries(jemalloc - # INTERFACE - # valgrind) + target_link_libraries(jemalloc + INTERFACE + valgrind) + endif() diff --git a/deps/jemalloc/COPYING b/deps/jemalloc/COPYING index e308632a81..3b7fd3585d 100644 --- a/deps/jemalloc/COPYING +++ b/deps/jemalloc/COPYING @@ -1,10 +1,10 @@ Unless otherwise specified, files in the jemalloc source distribution are subject to the following license: -------------------------------------------------------------------------------- -Copyright (C) 2002-2017 Jason Evans <jasone@canonware.com>. +Copyright (C) 2002-present Jason Evans <jasone@canonware.com>. All rights reserved. Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. -Copyright (C) 2009-2017 Facebook, Inc. All rights reserved. +Copyright (C) 2009-present Facebook, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/deps/jemalloc/ChangeLog b/deps/jemalloc/ChangeLog index ee1b7ead39..e55813b7be 100644 --- a/deps/jemalloc/ChangeLog +++ b/deps/jemalloc/ChangeLog @@ -4,6 +4,259 @@ brevity. Much more detail can be found in the git revision history: https://github.com/jemalloc/jemalloc +* 5.2.1 (August 5, 2019) + + This release is primarily about Windows. A critical virtual memory leak is + resolved on all Windows platforms. The regression was present in all releases + since 5.0.0. + + Bug fixes: + - Fix a severe virtual memory leak on Windows. This regression was first + released in 5.0.0. (@Ignition, @j0t, @frederik-h, @davidtgoldblatt, + @interwq) + - Fix size 0 handling in posix_memalign(). This regression was first released + in 5.2.0. (@interwq) + - Fix the prof_log unit test which may observe unexpected backtraces from + compiler optimizations. The test was first added in 5.2.0. (@marxin, + @gnzlbg, @interwq) + - Fix the declaration of the extent_avail tree. This regression was first + released in 5.1.0. (@zoulasc) + - Fix an incorrect reference in jeprof. This functionality was first released + in 3.0.0. (@prehistoric-penguin) + - Fix an assertion on the deallocation fast-path. This regression was first + released in 5.2.0. (@yinan1048576) + - Fix the TLS_MODEL attribute in headers. This regression was first released + in 5.0.0. (@zoulasc, @interwq) + + Optimizations and refactors: + - Implement opt.retain on Windows and enable by default on 64-bit. (@interwq, + @davidtgoldblatt) + - Optimize away a branch on the operator delete[] path. (@mgrice) + - Add format annotation to the format generator function. (@zoulasc) + - Refactor and improve the size class header generation. (@yinan1048576) + - Remove best fit. (@djwatson) + - Avoid blocking on background thread locks for stats. (@oranagra, @interwq) + +* 5.2.0 (April 2, 2019) + + This release includes a few notable improvements, which are summarized below: + 1) improved fast-path performance from the optimizations by @djwatson; 2) + reduced virtual memory fragmentation and metadata usage; and 3) bug fixes on + setting the number of background threads. In addition, peak / spike memory + usage is improved with certain allocation patterns. As usual, the release and + prior dev versions have gone through large-scale production testing. + + New features: + - Implement oversize_threshold, which uses a dedicated arena for allocations + crossing the specified threshold to reduce fragmentation. (@interwq) + - Add extents usage information to stats. (@tyleretzel) + - Log time information for sampled allocations. (@tyleretzel) + - Support 0 size in sdallocx. (@djwatson) + - Output rate for certain counters in malloc_stats. (@zinoale) + - Add configure option --enable-readlinkat, which allows the use of readlinkat + over readlink. (@davidtgoldblatt) + - Add configure options --{enable,disable}-{static,shared} to allow not + building unwanted libraries. (@Ericson2314) + - Add configure option --disable-libdl to enable fully static builds. + (@interwq) + - Add mallctl interfaces: + + opt.oversize_threshold (@interwq) + + stats.arenas.<i>.extent_avail (@tyleretzel) + + stats.arenas.<i>.extents.<j>.n{dirty,muzzy,retained} (@tyleretzel) + + stats.arenas.<i>.extents.<j>.{dirty,muzzy,retained}_bytes + (@tyleretzel) + + Portability improvements: + - Update MSVC builds. (@maksqwe, @rustyx) + - Workaround a compiler optimizer bug on s390x. (@rkmisra) + - Make use of pthread_set_name_np(3) on FreeBSD. (@trasz) + - Implement malloc_getcpu() to enable percpu_arena for windows. (@santagada) + - Link against -pthread instead of -lpthread. (@paravoid) + - Make background_thread not dependent on libdl. (@interwq) + - Add stringify to fix a linker directive issue on MSVC. (@daverigby) + - Detect and fall back when 8-bit atomics are unavailable. (@interwq) + - Fall back to the default pthread_create if dlsym(3) fails. (@interwq) + + Optimizations and refactors: + - Refactor the TSD module. (@davidtgoldblatt) + - Avoid taking extents_muzzy mutex when muzzy is disabled. (@interwq) + - Avoid taking large_mtx for auto arenas on the tcache flush path. (@interwq) + - Optimize ixalloc by avoiding a size lookup. (@interwq) + - Implement opt.oversize_threshold which uses a dedicated arena for requests + crossing the threshold, also eagerly purges the oversize extents. Default + the threshold to 8 MiB. (@interwq) + - Clean compilation with -Wextra. (@gnzlbg, @jasone) + - Refactor the size class module. (@davidtgoldblatt) + - Refactor the stats emitter. (@tyleretzel) + - Optimize pow2_ceil. (@rkmisra) + - Avoid runtime detection of lazy purging on FreeBSD. (@trasz) + - Optimize mmap(2) alignment handling on FreeBSD. (@trasz) + - Improve error handling for THP state initialization. (@jsteemann) + - Rework the malloc() fast path. (@djwatson) + - Rework the free() fast path. (@djwatson) + - Refactor and optimize the tcache fill / flush paths. (@djwatson) + - Optimize sync / lwsync on PowerPC. (@chmeeedalf) + - Bypass extent_dalloc() when retain is enabled. (@interwq) + - Optimize the locking on large deallocation. (@interwq) + - Reduce the number of pages committed from sanity checking in debug build. + (@trasz, @interwq) + - Deprecate OSSpinLock. (@interwq) + - Lower the default number of background threads to 4 (when the feature + is enabled). (@interwq) + - Optimize the trylock spin wait. (@djwatson) + - Use arena index for arena-matching checks. (@interwq) + - Avoid forced decay on thread termination when using background threads. + (@interwq) + - Disable muzzy decay by default. (@djwatson, @interwq) + - Only initialize libgcc unwinder when profiling is enabled. (@paravoid, + @interwq) + + Bug fixes (all only relevant to jemalloc 5.x): + - Fix background thread index issues with max_background_threads. (@djwatson, + @interwq) + - Fix stats output for opt.lg_extent_max_active_fit. (@interwq) + - Fix opt.prof_prefix initialization. (@davidtgoldblatt) + - Properly trigger decay on tcache destroy. (@interwq, @amosbird) + - Fix tcache.flush. (@interwq) + - Detect whether explicit extent zero out is necessary with huge pages or + custom extent hooks, which may change the purge semantics. (@interwq) + - Fix a side effect caused by extent_max_active_fit combined with decay-based + purging, where freed extents can accumulate and not be reused for an + extended period of time. (@interwq, @mpghf) + - Fix a missing unlock on extent register error handling. (@zoulasc) + + Testing: + - Simplify the Travis script output. (@gnzlbg) + - Update the test scripts for FreeBSD. (@devnexen) + - Add unit tests for the producer-consumer pattern. (@interwq) + - Add Cirrus-CI config for FreeBSD builds. (@jasone) + - Add size-matching sanity checks on tcache flush. (@davidtgoldblatt, + @interwq) + + Incompatible changes: + - Remove --with-lg-page-sizes. (@davidtgoldblatt) + + Documentation: + - Attempt to build docs by default, however skip doc building when xsltproc + is missing. (@interwq, @cmuellner) + +* 5.1.0 (May 4, 2018) + + This release is primarily about fine-tuning, ranging from several new features + to numerous notable performance and portability enhancements. The release and + prior dev versions have been running in multiple large scale applications for + months, and the cumulative improvements are substantial in many cases. + + Given the long and successful production runs, this release is likely a good + candidate for applications to upgrade, from both jemalloc 5.0 and before. For + performance-critical applications, the newly added TUNING.md provides + guidelines on jemalloc tuning. + + New features: + - Implement transparent huge page support for internal metadata. (@interwq) + - Add opt.thp to allow enabling / disabling transparent huge pages for all + mappings. (@interwq) + - Add maximum background thread count option. (@djwatson) + - Allow prof_active to control opt.lg_prof_interval and prof.gdump. + (@interwq) + - Allow arena index lookup based on allocation addresses via mallctl. + (@lionkov) + - Allow disabling initial-exec TLS model. (@davidtgoldblatt, @KenMacD) + - Add opt.lg_extent_max_active_fit to set the max ratio between the size of + the active extent selected (to split off from) and the size of the requested + allocation. (@interwq, @davidtgoldblatt) + - Add retain_grow_limit to set the max size when growing virtual address + space. (@interwq) + - Add mallctl interfaces: + + arena.<i>.retain_grow_limit (@interwq) + + arenas.lookup (@lionkov) + + max_background_threads (@djwatson) + + opt.lg_extent_max_active_fit (@interwq) + + opt.max_background_threads (@djwatson) + + opt.metadata_thp (@interwq) + + opt.thp (@interwq) + + stats.metadata_thp (@interwq) + + Portability improvements: + - Support GNU/kFreeBSD configuration. (@paravoid) + - Support m68k, nios2 and SH3 architectures. (@paravoid) + - Fall back to FD_CLOEXEC when O_CLOEXEC is unavailable. (@zonyitoo) + - Fix symbol listing for cross-compiling. (@tamird) + - Fix high bits computation on ARM. (@davidtgoldblatt, @paravoid) + - Disable the CPU_SPINWAIT macro for Power. (@davidtgoldblatt, @marxin) + - Fix MSVC 2015 & 2017 builds. (@rustyx) + - Improve RISC-V support. (@EdSchouten) + - Set name mangling script in strict mode. (@nicolov) + - Avoid MADV_HUGEPAGE on ARM. (@marxin) + - Modify configure to determine return value of strerror_r. + (@davidtgoldblatt, @cferris1000) + - Make sure CXXFLAGS is tested with CPP compiler. (@nehaljwani) + - Fix 32-bit build on MSVC. (@rustyx) + - Fix external symbol on MSVC. (@maksqwe) + - Avoid a printf format specifier warning. (@jasone) + - Add configure option --disable-initial-exec-tls which can allow jemalloc to + be dynamically loaded after program startup. (@davidtgoldblatt, @KenMacD) + - AArch64: Add ILP32 support. (@cmuellner) + - Add --with-lg-vaddr configure option to support cross compiling. + (@cmuellner, @davidtgoldblatt) + + Optimizations and refactors: + - Improve active extent fit with extent_max_active_fit. This considerably + reduces fragmentation over time and improves virtual memory and metadata + usage. (@davidtgoldblatt, @interwq) + - Eagerly coalesce large extents to reduce fragmentation. (@interwq) + - sdallocx: only read size info when page aligned (i.e. possibly sampled), + which speeds up the sized deallocation path significantly. (@interwq) + - Avoid attempting new mappings for in place expansion with retain, since + it rarely succeeds in practice and causes high overhead. (@interwq) + - Refactor OOM handling in newImpl. (@wqfish) + - Add internal fine-grained logging functionality for debugging use. + (@davidtgoldblatt) + - Refactor arena / tcache interactions. (@davidtgoldblatt) + - Refactor extent management with dumpable flag. (@davidtgoldblatt) + - Add runtime detection of lazy purging. (@interwq) + - Use pairing heap instead of red-black tree for extents_avail. (@djwatson) + - Use sysctl on startup in FreeBSD. (@trasz) + - Use thread local prng state instead of atomic. (@djwatson) + - Make decay to always purge one more extent than before, because in + practice large extents are usually the ones that cross the decay threshold. + Purging the additional extent helps save memory as well as reduce VM + fragmentation. (@interwq) + - Fast division by dynamic values. (@davidtgoldblatt) + - Improve the fit for aligned allocation. (@interwq, @edwinsmith) + - Refactor extent_t bitpacking. (@rkmisra) + - Optimize the generated assembly for ticker operations. (@davidtgoldblatt) + - Convert stats printing to use a structured text emitter. (@davidtgoldblatt) + - Remove preserve_lru feature for extents management. (@djwatson) + - Consolidate two memory loads into one on the fast deallocation path. + (@davidtgoldblatt, @interwq) + + Bug fixes (most of the issues are only relevant to jemalloc 5.0): + - Fix deadlock with multithreaded fork in OS X. (@davidtgoldblatt) + - Validate returned file descriptor before use. (@zonyitoo) + - Fix a few background thread initialization and shutdown issues. (@interwq) + - Fix an extent coalesce + decay race by taking both coalescing extents off + the LRU list. (@interwq) + - Fix potentially unbound increase during decay, caused by one thread keep + stashing memory to purge while other threads generating new pages. The + number of pages to purge is checked to prevent this. (@interwq) + - Fix a FreeBSD bootstrap assertion. (@strejda, @interwq) + - Handle 32 bit mutex counters. (@rkmisra) + - Fix a indexing bug when creating background threads. (@davidtgoldblatt, + @binliu19) + - Fix arguments passed to extent_init. (@yuleniwo, @interwq) + - Fix addresses used for ordering mutexes. (@rkmisra) + - Fix abort_conf processing during bootstrap. (@interwq) + - Fix include path order for out-of-tree builds. (@cmuellner) + + Incompatible changes: + - Remove --disable-thp. (@interwq) + - Remove mallctl interfaces: + + config.thp (@interwq) + + Documentation: + - Add TUNING.md. (@interwq, @davidtgoldblatt, @djwatson) + * 5.0.1 (July 1, 2017) This bugfix release fixes several issues, most of which are obscure enough @@ -22,7 +275,7 @@ brevity. Much more detail can be found in the git revision history: unlikely to be an issue with other libc implementations. (@interwq) - Mask signals during background thread creation. This prevents signals from being inadvertently delivered to background threads. (@jasone, - @davidgoldblatt, @interwq) + @davidtgoldblatt, @interwq) - Avoid inactivity checks within background threads, in order to prevent recursive mutex acquisition. (@interwq) - Fix extent_grow_retained() to use the specified hooks when the @@ -515,7 +768,7 @@ brevity. Much more detail can be found in the git revision history: these fixes, xallocx() now tries harder to partially fulfill requests for optional extra space. Note that a couple of minor heap profiling optimizations are included, but these are better thought of as performance - fixes that were integral to disovering most of the other bugs. + fixes that were integral to discovering most of the other bugs. Optimizations: - Avoid a chunk metadata read in arena_prof_tctx_set(), since it is in the diff --git a/deps/jemalloc/include/jemalloc/internal/arena_externs.h b/deps/jemalloc/include/jemalloc/internal/arena_externs.h index af16d15885..a4523ae0c4 100644 --- a/deps/jemalloc/include/jemalloc/internal/arena_externs.h +++ b/deps/jemalloc/include/jemalloc/internal/arena_externs.h @@ -1,33 +1,32 @@ #ifndef JEMALLOC_INTERNAL_ARENA_EXTERNS_H #define JEMALLOC_INTERNAL_ARENA_EXTERNS_H +#include "jemalloc/internal/bin.h" #include "jemalloc/internal/extent_dss.h" +#include "jemalloc/internal/hook.h" #include "jemalloc/internal/pages.h" -#include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/stats.h" extern ssize_t opt_dirty_decay_ms; extern ssize_t opt_muzzy_decay_ms; -extern const arena_bin_info_t arena_bin_info[NBINS]; - extern percpu_arena_mode_t opt_percpu_arena; extern const char *percpu_arena_mode_names[]; extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS]; extern malloc_mutex_t arenas_lock; -void arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats, - szind_t szind, uint64_t nrequests); -void arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats, - size_t size); +extern size_t opt_oversize_threshold; +extern size_t oversize_threshold; + void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy); void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats, - malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats); + bin_stats_t *bstats, arena_stats_large_t *lstats, + arena_stats_extents_t *estats); void arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, extent_t *extent); #ifdef JEMALLOC_JET @@ -50,39 +49,47 @@ void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, void arena_reset(tsd_t *tsd, arena_t *arena); void arena_destroy(tsd_t *tsd, arena_t *arena); void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, - tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes); -void arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info, + cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes); +void arena_alloc_junk_small(void *ptr, const bin_info_t *bin_info, bool zero); -typedef void (arena_dalloc_junk_small_t)(void *, const arena_bin_info_t *); +typedef void (arena_dalloc_junk_small_t)(void *, const bin_info_t *); extern arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small; void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero); void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero, tcache_t *tcache); -void arena_prof_promote(tsdn_t *tsdn, const void *ptr, size_t usize); +void arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize); void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path); -void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, - extent_t *extent, void *ptr); +void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin, + szind_t binind, extent_t *extent, void *ptr); void arena_dalloc_small(tsdn_t *tsdn, void *ptr); bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, - size_t extra, bool zero); + size_t extra, bool zero, size_t *newsize); void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize, - size_t size, size_t alignment, bool zero, tcache_t *tcache); + size_t size, size_t alignment, bool zero, tcache_t *tcache, + hook_ralloc_args_t *hook_args); dss_prec_t arena_dss_prec_get(arena_t *arena); bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec); ssize_t arena_dirty_decay_ms_default_get(void); bool arena_dirty_decay_ms_default_set(ssize_t decay_ms); ssize_t arena_muzzy_decay_ms_default_get(void); bool arena_muzzy_decay_ms_default_set(ssize_t decay_ms); +bool arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena, + size_t *old_limit, size_t *new_limit); unsigned arena_nthreads_get(arena_t *arena, bool internal); void arena_nthreads_inc(arena_t *arena, bool internal); void arena_nthreads_dec(arena_t *arena, bool internal); size_t arena_extent_sn_next(arena_t *arena); arena_t *arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks); -void arena_boot(void); +bool arena_init_huge(void); +bool arena_is_huge(unsigned arena_ind); +arena_t *arena_choose_huge(tsd_t *tsd); +bin_t *arena_bin_choose_lock(tsdn_t *tsdn, arena_t *arena, szind_t binind, + unsigned *binshard); +void arena_boot(sc_data_t *sc_data); void arena_prefork0(tsdn_t *tsdn, arena_t *arena); void arena_prefork1(tsdn_t *tsdn, arena_t *arena); void arena_prefork2(tsdn_t *tsdn, arena_t *arena); diff --git a/deps/jemalloc/include/jemalloc/internal/arena_inlines_a.h b/deps/jemalloc/include/jemalloc/internal/arena_inlines_a.h index da5877060a..9abf7f6ac7 100644 --- a/deps/jemalloc/include/jemalloc/internal/arena_inlines_a.h +++ b/deps/jemalloc/include/jemalloc/internal/arena_inlines_a.h @@ -25,7 +25,7 @@ static inline bool arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) { cassert(config_prof); - if (likely(prof_interval == 0)) { + if (likely(prof_interval == 0 || !prof_active_get_unlocked())) { return false; } diff --git a/deps/jemalloc/include/jemalloc/internal/arena_inlines_b.h b/deps/jemalloc/include/jemalloc/internal/arena_inlines_b.h index 003abe116f..dd926575fc 100644 --- a/deps/jemalloc/include/jemalloc/internal/arena_inlines_b.h +++ b/deps/jemalloc/include/jemalloc/internal/arena_inlines_b.h @@ -4,15 +4,34 @@ #include "jemalloc/internal/jemalloc_internal_types.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/rtree.h" -#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/sc.h" #include "jemalloc/internal/sz.h" #include "jemalloc/internal/ticker.h" -static inline szind_t -arena_bin_index(arena_t *arena, arena_bin_t *bin) { - szind_t binind = (szind_t)(bin - arena->bins); - assert(binind < NBINS); - return binind; +JEMALLOC_ALWAYS_INLINE bool +arena_has_default_hooks(arena_t *arena) { + return (extent_hooks_get(arena) == &extent_hooks_default); +} + +JEMALLOC_ALWAYS_INLINE arena_t * +arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) { + if (arena != NULL) { + return arena; + } + + /* + * For huge allocations, use the dedicated huge arena if both are true: + * 1) is using auto arena selection (i.e. arena == NULL), and 2) the + * thread is not assigned to a manual arena. + */ + if (unlikely(size >= oversize_threshold)) { + arena_t *tsd_arena = tsd_arena_get(tsd); + if (tsd_arena == NULL || arena_is_auto(tsd_arena)) { + return arena_choose_huge(tsd); + } + } + + return arena_choose(tsd, NULL); } JEMALLOC_ALWAYS_INLINE prof_tctx_t * @@ -64,6 +83,32 @@ arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) { large_prof_tctx_reset(tsdn, extent); } +JEMALLOC_ALWAYS_INLINE nstime_t +arena_prof_alloc_time_get(tsdn_t *tsdn, const void *ptr, + alloc_ctx_t *alloc_ctx) { + cassert(config_prof); + assert(ptr != NULL); + + extent_t *extent = iealloc(tsdn, ptr); + /* + * Unlike arena_prof_prof_tctx_{get, set}, we only call this once we're + * sure we have a sampled allocation. + */ + assert(!extent_slab_get(extent)); + return large_prof_alloc_time_get(extent); +} + +JEMALLOC_ALWAYS_INLINE void +arena_prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx, + nstime_t t) { + cassert(config_prof); + assert(ptr != NULL); + + extent_t *extent = iealloc(tsdn, ptr); + assert(!extent_slab_get(extent)); + large_prof_alloc_time_set(extent, t); +} + JEMALLOC_ALWAYS_INLINE void arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) { tsd_t *tsd; @@ -90,14 +135,33 @@ arena_decay_tick(tsdn_t *tsdn, arena_t *arena) { arena_decay_ticks(tsdn, arena, 1); } +/* Purge a single extent to retained / unmapped directly. */ +JEMALLOC_ALWAYS_INLINE void +arena_decay_extent(tsdn_t *tsdn,arena_t *arena, extent_hooks_t **r_extent_hooks, + extent_t *extent) { + size_t extent_size = extent_size_get(extent); + extent_dalloc_wrapper(tsdn, arena, + r_extent_hooks, extent); + if (config_stats) { + /* Update stats accordingly. */ + arena_stats_lock(tsdn, &arena->stats); + arena_stats_add_u64(tsdn, &arena->stats, + &arena->decay_dirty.stats->nmadvise, 1); + arena_stats_add_u64(tsdn, &arena->stats, + &arena->decay_dirty.stats->purged, extent_size >> LG_PAGE); + arena_stats_sub_zu(tsdn, &arena->stats, &arena->stats.mapped, + extent_size); + arena_stats_unlock(tsdn, &arena->stats); + } +} + JEMALLOC_ALWAYS_INLINE void * arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero, tcache_t *tcache, bool slow_path) { assert(!tsdn_null(tsdn) || tcache == NULL); - assert(size != 0); if (likely(tcache != NULL)) { - if (likely(size <= SMALL_MAXCLASS)) { + if (likely(size <= SC_SMALL_MAXCLASS)) { return tcache_alloc_small(tsdn_tsd(tsdn), arena, tcache, size, ind, zero, slow_path); } @@ -126,7 +190,7 @@ arena_salloc(tsdn_t *tsdn, const void *ptr) { szind_t szind = rtree_szind_read(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr, true); - assert(szind != NSIZES); + assert(szind != SC_NSIZES); return sz_index2size(szind); } @@ -159,12 +223,22 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) { /* Only slab members should be looked up via interior pointers. */ assert(extent_addr_get(extent) == ptr || extent_slab_get(extent)); - assert(szind != NSIZES); + assert(szind != SC_NSIZES); return sz_index2size(szind); } static inline void +arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind) { + if (config_prof && unlikely(szind < SC_NBINS)) { + arena_dalloc_promoted(tsdn, ptr, NULL, true); + } else { + extent_t *extent = iealloc(tsdn, ptr); + large_dalloc(tsdn, extent); + } +} + +static inline void arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) { assert(ptr != NULL); @@ -180,7 +254,7 @@ arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) { extent_t *extent = rtree_extent_read(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr, true); assert(szind == extent_szind_get(extent)); - assert(szind < NSIZES); + assert(szind < SC_NSIZES); assert(slab == extent_slab_get(extent)); } @@ -188,6 +262,21 @@ arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) { /* Small allocation. */ arena_dalloc_small(tsdn, ptr); } else { + arena_dalloc_large_no_tcache(tsdn, ptr, szind); + } +} + +JEMALLOC_ALWAYS_INLINE void +arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind, + bool slow_path) { + if (szind < nhbins) { + if (config_prof && unlikely(szind < SC_NBINS)) { + arena_dalloc_promoted(tsdn, ptr, tcache, slow_path); + } else { + tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, szind, + slow_path); + } + } else { extent_t *extent = iealloc(tsdn, ptr); large_dalloc(tsdn, extent); } @@ -210,7 +299,7 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, if (alloc_ctx != NULL) { szind = alloc_ctx->szind; slab = alloc_ctx->slab; - assert(szind != NSIZES); + assert(szind != SC_NSIZES); } else { rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn)); rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx, @@ -222,7 +311,7 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, extent_t *extent = rtree_extent_read(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr, true); assert(szind == extent_szind_get(extent)); - assert(szind < NSIZES); + assert(szind < SC_NSIZES); assert(slab == extent_slab_get(extent)); } @@ -231,25 +320,14 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind, slow_path); } else { - if (szind < nhbins) { - if (config_prof && unlikely(szind < NBINS)) { - arena_dalloc_promoted(tsdn, ptr, tcache, - slow_path); - } else { - tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, - szind, slow_path); - } - } else { - extent_t *extent = iealloc(tsdn, ptr); - large_dalloc(tsdn, extent); - } + arena_dalloc_large(tsdn, ptr, tcache, szind, slow_path); } } static inline void arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) { assert(ptr != NULL); - assert(size <= LARGE_MAXCLASS); + assert(size <= SC_LARGE_MAXCLASS); szind_t szind; bool slab; @@ -259,7 +337,7 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) { * object, so base szind and slab on the given size. */ szind = sz_size2index(size); - slab = (szind < NBINS); + slab = (szind < SC_NBINS); } if ((config_prof && opt_prof) || config_debug) { @@ -271,7 +349,7 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) { (uintptr_t)ptr, true, &szind, &slab); assert(szind == sz_size2index(size)); - assert((config_prof && opt_prof) || slab == (szind < NBINS)); + assert((config_prof && opt_prof) || slab == (szind < SC_NBINS)); if (config_debug) { extent_t *extent = rtree_extent_read(tsdn, @@ -285,8 +363,7 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) { /* Small allocation. */ arena_dalloc_small(tsdn, ptr); } else { - extent_t *extent = iealloc(tsdn, ptr); - large_dalloc(tsdn, extent); + arena_dalloc_large_no_tcache(tsdn, ptr, szind); } } @@ -295,7 +372,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, alloc_ctx_t *alloc_ctx, bool slow_path) { assert(!tsdn_null(tsdn) || tcache == NULL); assert(ptr != NULL); - assert(size <= LARGE_MAXCLASS); + assert(size <= SC_LARGE_MAXCLASS); if (unlikely(tcache == NULL)) { arena_sdalloc_no_tcache(tsdn, ptr, size); @@ -304,7 +381,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, szind_t szind; bool slab; - UNUSED alloc_ctx_t local_ctx; + alloc_ctx_t local_ctx; if (config_prof && opt_prof) { if (alloc_ctx == NULL) { /* Uncommon case and should be a static check. */ @@ -325,7 +402,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, * object, so base szind and slab on the given size. */ szind = sz_size2index(size); - slab = (szind < NBINS); + slab = (szind < SC_NBINS); } if (config_debug) { @@ -343,18 +420,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind, slow_path); } else { - if (szind < nhbins) { - if (config_prof && unlikely(szind < NBINS)) { - arena_dalloc_promoted(tsdn, ptr, tcache, - slow_path); - } else { - tcache_dalloc_large(tsdn_tsd(tsdn), - tcache, ptr, szind, slow_path); - } - } else { - extent_t *extent = iealloc(tsdn, ptr); - large_dalloc(tsdn, extent); - } + arena_dalloc_large(tsdn, ptr, tcache, szind, slow_path); } } diff --git a/deps/jemalloc/include/jemalloc/internal/arena_stats.h b/deps/jemalloc/include/jemalloc/internal/arena_stats.h new file mode 100644 index 0000000000..23949ed926 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/arena_stats.h @@ -0,0 +1,271 @@ +#ifndef JEMALLOC_INTERNAL_ARENA_STATS_H +#define JEMALLOC_INTERNAL_ARENA_STATS_H + +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/mutex_prof.h" +#include "jemalloc/internal/sc.h" + +JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS + +/* + * In those architectures that support 64-bit atomics, we use atomic updates for + * our 64-bit values. Otherwise, we use a plain uint64_t and synchronize + * externally. + */ +#ifdef JEMALLOC_ATOMIC_U64 +typedef atomic_u64_t arena_stats_u64_t; +#else +/* Must hold the arena stats mutex while reading atomically. */ +typedef uint64_t arena_stats_u64_t; +#endif + +typedef struct arena_stats_large_s arena_stats_large_t; +struct arena_stats_large_s { + /* + * Total number of allocation/deallocation requests served directly by + * the arena. + */ + arena_stats_u64_t nmalloc; + arena_stats_u64_t ndalloc; + + /* + * Number of allocation requests that correspond to this size class. + * This includes requests served by tcache, though tcache only + * periodically merges into this counter. + */ + arena_stats_u64_t nrequests; /* Partially derived. */ + /* + * Number of tcache fills / flushes for large (similarly, periodically + * merged). Note that there is no large tcache batch-fill currently + * (i.e. only fill 1 at a time); however flush may be batched. + */ + arena_stats_u64_t nfills; /* Partially derived. */ + arena_stats_u64_t nflushes; /* Partially derived. */ + + /* Current number of allocations of this size class. */ + size_t curlextents; /* Derived. */ +}; + +typedef struct arena_stats_decay_s arena_stats_decay_t; +struct arena_stats_decay_s { + /* Total number of purge sweeps. */ + arena_stats_u64_t npurge; + /* Total number of madvise calls made. */ + arena_stats_u64_t nmadvise; + /* Total number of pages purged. */ + arena_stats_u64_t purged; +}; + +typedef struct arena_stats_extents_s arena_stats_extents_t; +struct arena_stats_extents_s { + /* + * Stats for a given index in the range [0, SC_NPSIZES] in an extents_t. + * We track both bytes and # of extents: two extents in the same bucket + * may have different sizes if adjacent size classes differ by more than + * a page, so bytes cannot always be derived from # of extents. + */ + atomic_zu_t ndirty; + atomic_zu_t dirty_bytes; + atomic_zu_t nmuzzy; + atomic_zu_t muzzy_bytes; + atomic_zu_t nretained; + atomic_zu_t retained_bytes; +}; + +/* + * Arena stats. Note that fields marked "derived" are not directly maintained + * within the arena code; rather their values are derived during stats merge + * requests. + */ +typedef struct arena_stats_s arena_stats_t; +struct arena_stats_s { +#ifndef JEMALLOC_ATOMIC_U64 + malloc_mutex_t mtx; +#endif + + /* Number of bytes currently mapped, excluding retained memory. */ + atomic_zu_t mapped; /* Partially derived. */ + + /* + * Number of unused virtual memory bytes currently retained. Retained + * bytes are technically mapped (though always decommitted or purged), + * but they are excluded from the mapped statistic (above). + */ + atomic_zu_t retained; /* Derived. */ + + /* Number of extent_t structs allocated by base, but not being used. */ + atomic_zu_t extent_avail; + + arena_stats_decay_t decay_dirty; + arena_stats_decay_t decay_muzzy; + + atomic_zu_t base; /* Derived. */ + atomic_zu_t internal; + atomic_zu_t resident; /* Derived. */ + atomic_zu_t metadata_thp; + + atomic_zu_t allocated_large; /* Derived. */ + arena_stats_u64_t nmalloc_large; /* Derived. */ + arena_stats_u64_t ndalloc_large; /* Derived. */ + arena_stats_u64_t nfills_large; /* Derived. */ + arena_stats_u64_t nflushes_large; /* Derived. */ + arena_stats_u64_t nrequests_large; /* Derived. */ + + /* VM space had to be leaked (undocumented). Normally 0. */ + atomic_zu_t abandoned_vm; + + /* Number of bytes cached in tcache associated with this arena. */ + atomic_zu_t tcache_bytes; /* Derived. */ + + mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes]; + + /* One element for each large size class. */ + arena_stats_large_t lstats[SC_NSIZES - SC_NBINS]; + + /* Arena uptime. */ + nstime_t uptime; +}; + +static inline bool +arena_stats_init(tsdn_t *tsdn, arena_stats_t *arena_stats) { + if (config_debug) { + for (size_t i = 0; i < sizeof(arena_stats_t); i++) { + assert(((char *)arena_stats)[i] == 0); + } + } +#ifndef JEMALLOC_ATOMIC_U64 + if (malloc_mutex_init(&arena_stats->mtx, "arena_stats", + WITNESS_RANK_ARENA_STATS, malloc_mutex_rank_exclusive)) { + return true; + } +#endif + /* Memory is zeroed, so there is no need to clear stats. */ + return false; +} + +static inline void +arena_stats_lock(tsdn_t *tsdn, arena_stats_t *arena_stats) { +#ifndef JEMALLOC_ATOMIC_U64 + malloc_mutex_lock(tsdn, &arena_stats->mtx); +#endif +} + +static inline void +arena_stats_unlock(tsdn_t *tsdn, arena_stats_t *arena_stats) { +#ifndef JEMALLOC_ATOMIC_U64 + malloc_mutex_unlock(tsdn, &arena_stats->mtx); +#endif +} + +static inline uint64_t +arena_stats_read_u64(tsdn_t *tsdn, arena_stats_t *arena_stats, + arena_stats_u64_t *p) { +#ifdef JEMALLOC_ATOMIC_U64 + return atomic_load_u64(p, ATOMIC_RELAXED); +#else + malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); + return *p; +#endif +} + +static inline void +arena_stats_add_u64(tsdn_t *tsdn, arena_stats_t *arena_stats, + arena_stats_u64_t *p, uint64_t x) { +#ifdef JEMALLOC_ATOMIC_U64 + atomic_fetch_add_u64(p, x, ATOMIC_RELAXED); +#else + malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); + *p += x; +#endif +} + +static inline void +arena_stats_sub_u64(tsdn_t *tsdn, arena_stats_t *arena_stats, + arena_stats_u64_t *p, uint64_t x) { +#ifdef JEMALLOC_ATOMIC_U64 + uint64_t r = atomic_fetch_sub_u64(p, x, ATOMIC_RELAXED); + assert(r - x <= r); +#else + malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); + *p -= x; + assert(*p + x >= *p); +#endif +} + +/* + * Non-atomically sets *dst += src. *dst needs external synchronization. + * This lets us avoid the cost of a fetch_add when its unnecessary (note that + * the types here are atomic). + */ +static inline void +arena_stats_accum_u64(arena_stats_u64_t *dst, uint64_t src) { +#ifdef JEMALLOC_ATOMIC_U64 + uint64_t cur_dst = atomic_load_u64(dst, ATOMIC_RELAXED); + atomic_store_u64(dst, src + cur_dst, ATOMIC_RELAXED); +#else + *dst += src; +#endif +} + +static inline size_t +arena_stats_read_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, + atomic_zu_t *p) { +#ifdef JEMALLOC_ATOMIC_U64 + return atomic_load_zu(p, ATOMIC_RELAXED); +#else + malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); + return atomic_load_zu(p, ATOMIC_RELAXED); +#endif +} + +static inline void +arena_stats_add_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, + atomic_zu_t *p, size_t x) { +#ifdef JEMALLOC_ATOMIC_U64 + atomic_fetch_add_zu(p, x, ATOMIC_RELAXED); +#else + malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); + size_t cur = atomic_load_zu(p, ATOMIC_RELAXED); + atomic_store_zu(p, cur + x, ATOMIC_RELAXED); +#endif +} + +static inline void +arena_stats_sub_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, + atomic_zu_t *p, size_t x) { +#ifdef JEMALLOC_ATOMIC_U64 + size_t r = atomic_fetch_sub_zu(p, x, ATOMIC_RELAXED); + assert(r - x <= r); +#else + malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); + size_t cur = atomic_load_zu(p, ATOMIC_RELAXED); + atomic_store_zu(p, cur - x, ATOMIC_RELAXED); +#endif +} + +/* Like the _u64 variant, needs an externally synchronized *dst. */ +static inline void +arena_stats_accum_zu(atomic_zu_t *dst, size_t src) { + size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED); + atomic_store_zu(dst, src + cur_dst, ATOMIC_RELAXED); +} + +static inline void +arena_stats_large_flush_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats, + szind_t szind, uint64_t nrequests) { + arena_stats_lock(tsdn, arena_stats); + arena_stats_large_t *lstats = &arena_stats->lstats[szind - SC_NBINS]; + arena_stats_add_u64(tsdn, arena_stats, &lstats->nrequests, nrequests); + arena_stats_add_u64(tsdn, arena_stats, &lstats->nflushes, 1); + arena_stats_unlock(tsdn, arena_stats); +} + +static inline void +arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t size) { + arena_stats_lock(tsdn, arena_stats); + arena_stats_add_zu(tsdn, arena_stats, &arena_stats->mapped, size); + arena_stats_unlock(tsdn, arena_stats); +} + +#endif /* JEMALLOC_INTERNAL_ARENA_STATS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/arena_structs_b.h b/deps/jemalloc/include/jemalloc/internal/arena_structs_b.h index d1fffec193..eeab57fd6e 100644 --- a/deps/jemalloc/include/jemalloc/internal/arena_structs_b.h +++ b/deps/jemalloc/include/jemalloc/internal/arena_structs_b.h @@ -1,54 +1,19 @@ #ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H #define JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H +#include "jemalloc/internal/arena_stats.h" #include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/bin.h" #include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/extent_dss.h" #include "jemalloc/internal/jemalloc_internal_types.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/nstime.h" #include "jemalloc/internal/ql.h" -#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/sc.h" #include "jemalloc/internal/smoothstep.h" -#include "jemalloc/internal/stats.h" #include "jemalloc/internal/ticker.h" -/* - * Read-only information associated with each element of arena_t's bins array - * is stored separately, partly to reduce memory usage (only one copy, rather - * than one per arena), but mainly to avoid false cacheline sharing. - * - * Each slab has the following layout: - * - * /--------------------\ - * | region 0 | - * |--------------------| - * | region 1 | - * |--------------------| - * | ... | - * | ... | - * | ... | - * |--------------------| - * | region nregs-1 | - * \--------------------/ - */ -struct arena_bin_info_s { - /* Size of regions in a slab for this bin's size class. */ - size_t reg_size; - - /* Total size of a slab for this bin's size class. */ - size_t slab_size; - - /* Total number of regions in a slab for this bin's size class. */ - uint32_t nregs; - - /* - * Metadata used to manipulate bitmaps for slabs associated with this - * bin. - */ - bitmap_info_t bitmap_info; -}; - struct arena_decay_s { /* Synchronizes all non-atomic fields. */ malloc_mutex_t mtx; @@ -104,37 +69,11 @@ struct arena_decay_s { * arena and ctl code. * * Synchronization: Same as associated arena's stats field. */ - decay_stats_t *stats; + arena_stats_decay_t *stats; /* Peak number of pages in associated extents. Used for debug only. */ uint64_t ceil_npages; }; -struct arena_bin_s { - /* All operations on arena_bin_t fields require lock ownership. */ - malloc_mutex_t lock; - - /* - * Current slab being used to service allocations of this bin's size - * class. slabcur is independent of slabs_{nonfull,full}; whenever - * slabcur is reassigned, the previous slab must be deallocated or - * inserted into slabs_{nonfull,full}. - */ - extent_t *slabcur; - - /* - * Heap of non-full slabs. This heap is used to assure that new - * allocations come from the non-full slab that is oldest/lowest in - * memory. - */ - extent_heap_t slabs_nonfull; - - /* List used to track full slabs. */ - extent_list_t slabs_full; - - /* Bin statistics. */ - malloc_bin_stats_t stats; -}; - struct arena_s { /* * Number of threads currently assigned to this arena. Each thread has @@ -151,6 +90,9 @@ struct arena_s { */ atomic_u_t nthreads[2]; + /* Next bin shard for binding new threads. Synchronization: atomic. */ + atomic_u_t binshard_next; + /* * When percpu_arena is enabled, to amortize the cost of reading / * updating the current CPU id, track the most recent thread accessing @@ -162,18 +104,18 @@ struct arena_s { arena_stats_t stats; /* - * List of tcaches for extant threads associated with this arena. - * Stats from these are merged incrementally, and at exit if - * opt_stats_print is enabled. + * Lists of tcaches and cache_bin_array_descriptors for extant threads + * associated with this arena. Stats from these are merged + * incrementally, and at exit if opt_stats_print is enabled. * * Synchronization: tcache_ql_mtx. */ - ql_head(tcache_t) tcache_ql; - malloc_mutex_t tcache_ql_mtx; + ql_head(tcache_t) tcache_ql; + ql_head(cache_bin_array_descriptor_t) cache_bin_array_descriptor_ql; + malloc_mutex_t tcache_ql_mtx; /* Synchronization: internal. */ prof_accum_t prof_accum; - uint64_t prof_accumbytes; /* * PRNG state for cache index randomization of large allocation base @@ -239,9 +181,14 @@ struct arena_s { * be effective even if multiple arenas' extent allocation requests are * highly interleaved. * + * retain_grow_limit is the max allowed size ind to expand (unless the + * required size is greater). Default is no limit, and controlled + * through mallctl only. + * * Synchronization: extent_grow_mtx */ pszind_t extent_grow_next; + pszind_t retain_grow_limit; malloc_mutex_t extent_grow_mtx; /* @@ -251,6 +198,7 @@ struct arena_s { * Synchronization: extent_avail_mtx. */ extent_tree_t extent_avail; + atomic_zu_t extent_avail_cnt; malloc_mutex_t extent_avail_mtx; /* @@ -258,7 +206,7 @@ struct arena_s { * * Synchronization: internal. */ - arena_bin_t bins[NBINS]; + bins_t bins[SC_NBINS]; /* * Base allocator, from which arena metadata are allocated. diff --git a/deps/jemalloc/include/jemalloc/internal/arena_types.h b/deps/jemalloc/include/jemalloc/internal/arena_types.h index a691bd811e..624937e4f5 100644 --- a/deps/jemalloc/include/jemalloc/internal/arena_types.h +++ b/deps/jemalloc/include/jemalloc/internal/arena_types.h @@ -1,20 +1,20 @@ #ifndef JEMALLOC_INTERNAL_ARENA_TYPES_H #define JEMALLOC_INTERNAL_ARENA_TYPES_H +#include "jemalloc/internal/sc.h" + /* Maximum number of regions in one slab. */ -#define LG_SLAB_MAXREGS (LG_PAGE - LG_TINY_MIN) +#define LG_SLAB_MAXREGS (LG_PAGE - SC_LG_TINY_MIN) #define SLAB_MAXREGS (1U << LG_SLAB_MAXREGS) /* Default decay times in milliseconds. */ #define DIRTY_DECAY_MS_DEFAULT ZD(10 * 1000) -#define MUZZY_DECAY_MS_DEFAULT ZD(10 * 1000) +#define MUZZY_DECAY_MS_DEFAULT (0) /* Number of event ticks between time checks. */ #define DECAY_NTICKS_PER_UPDATE 1000 typedef struct arena_slab_data_s arena_slab_data_t; -typedef struct arena_bin_info_s arena_bin_info_t; typedef struct arena_decay_s arena_decay_t; -typedef struct arena_bin_s arena_bin_t; typedef struct arena_s arena_t; typedef struct arena_tdata_s arena_tdata_t; typedef struct alloc_ctx_s alloc_ctx_t; @@ -42,4 +42,10 @@ typedef enum { #define PERCPU_ARENA_ENABLED(m) ((m) >= percpu_arena_mode_enabled_base) #define PERCPU_ARENA_DEFAULT percpu_arena_disabled +/* + * When allocation_size >= oversize_threshold, use the dedicated huge arena + * (unless have explicitly spicified arena index). 0 disables the feature. + */ +#define OVERSIZE_THRESHOLD_DEFAULT (8 << 20) + #endif /* JEMALLOC_INTERNAL_ARENA_TYPES_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/atomic.h b/deps/jemalloc/include/jemalloc/internal/atomic.h index adadb1a3ac..a76f54cee3 100644 --- a/deps/jemalloc/include/jemalloc/internal/atomic.h +++ b/deps/jemalloc/include/jemalloc/internal/atomic.h @@ -1,12 +1,19 @@ #ifndef JEMALLOC_INTERNAL_ATOMIC_H #define JEMALLOC_INTERNAL_ATOMIC_H -#define ATOMIC_INLINE static inline +#define ATOMIC_INLINE JEMALLOC_ALWAYS_INLINE +#define JEMALLOC_U8_ATOMICS #if defined(JEMALLOC_GCC_ATOMIC_ATOMICS) # include "jemalloc/internal/atomic_gcc_atomic.h" +# if !defined(JEMALLOC_GCC_U8_ATOMIC_ATOMICS) +# undef JEMALLOC_U8_ATOMICS +# endif #elif defined(JEMALLOC_GCC_SYNC_ATOMICS) # include "jemalloc/internal/atomic_gcc_sync.h" +# if !defined(JEMALLOC_GCC_U8_SYNC_ATOMICS) +# undef JEMALLOC_U8_ATOMICS +# endif #elif defined(_MSC_VER) # include "jemalloc/internal/atomic_msvc.h" #elif defined(JEMALLOC_C11_ATOMICS) @@ -66,6 +73,8 @@ JEMALLOC_GENERATE_INT_ATOMICS(size_t, zu, LG_SIZEOF_PTR) JEMALLOC_GENERATE_INT_ATOMICS(ssize_t, zd, LG_SIZEOF_PTR) +JEMALLOC_GENERATE_INT_ATOMICS(uint8_t, u8, 0) + JEMALLOC_GENERATE_INT_ATOMICS(uint32_t, u32, 2) #ifdef JEMALLOC_ATOMIC_U64 diff --git a/deps/jemalloc/include/jemalloc/internal/atomic_gcc_atomic.h b/deps/jemalloc/include/jemalloc/internal/atomic_gcc_atomic.h index 6b73a14f81..471515e82f 100644 --- a/deps/jemalloc/include/jemalloc/internal/atomic_gcc_atomic.h +++ b/deps/jemalloc/include/jemalloc/internal/atomic_gcc_atomic.h @@ -67,7 +67,8 @@ atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \ \ ATOMIC_INLINE bool \ atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \ - type *expected, type desired, atomic_memory_order_t success_mo, \ + UNUSED type *expected, type desired, \ + atomic_memory_order_t success_mo, \ atomic_memory_order_t failure_mo) { \ return __atomic_compare_exchange(&a->repr, expected, &desired, \ true, atomic_enum_to_builtin(success_mo), \ @@ -76,7 +77,8 @@ atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \ \ ATOMIC_INLINE bool \ atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \ - type *expected, type desired, atomic_memory_order_t success_mo, \ + UNUSED type *expected, type desired, \ + atomic_memory_order_t success_mo, \ atomic_memory_order_t failure_mo) { \ return __atomic_compare_exchange(&a->repr, expected, &desired, \ false, \ diff --git a/deps/jemalloc/include/jemalloc/internal/atomic_gcc_sync.h b/deps/jemalloc/include/jemalloc/internal/atomic_gcc_sync.h index 30846e4d27..e02b7cbe3c 100644 --- a/deps/jemalloc/include/jemalloc/internal/atomic_gcc_sync.h +++ b/deps/jemalloc/include/jemalloc/internal/atomic_gcc_sync.h @@ -27,8 +27,10 @@ atomic_fence(atomic_memory_order_t mo) { asm volatile("" ::: "memory"); # if defined(__i386__) || defined(__x86_64__) /* This is implicit on x86. */ -# elif defined(__ppc__) +# elif defined(__ppc64__) asm volatile("lwsync"); +# elif defined(__ppc__) + asm volatile("sync"); # elif defined(__sparc__) && defined(__arch64__) if (mo == atomic_memory_order_acquire) { asm volatile("membar #LoadLoad | #LoadStore"); @@ -113,8 +115,8 @@ atomic_store_##short_type(atomic_##short_type##_t *a, \ } \ \ ATOMIC_INLINE type \ -atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \ - atomic_memory_order_t mo) { \ +atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ /* \ * Because of FreeBSD, we care about gcc 4.2, which doesn't have\ * an atomic exchange builtin. We fake it with a CAS loop. \ @@ -129,8 +131,9 @@ atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \ \ ATOMIC_INLINE bool \ atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \ - type *expected, type desired, atomic_memory_order_t success_mo, \ - atomic_memory_order_t failure_mo) { \ + type *expected, type desired, \ + atomic_memory_order_t success_mo, \ + atomic_memory_order_t failure_mo) { \ type prev = __sync_val_compare_and_swap(&a->repr, *expected, \ desired); \ if (prev == *expected) { \ @@ -142,8 +145,9 @@ atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \ } \ ATOMIC_INLINE bool \ atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \ - type *expected, type desired, atomic_memory_order_t success_mo, \ - atomic_memory_order_t failure_mo) { \ + type *expected, type desired, \ + atomic_memory_order_t success_mo, \ + atomic_memory_order_t failure_mo) { \ type prev = __sync_val_compare_and_swap(&a->repr, *expected, \ desired); \ if (prev == *expected) { \ diff --git a/deps/jemalloc/include/jemalloc/internal/background_thread_externs.h b/deps/jemalloc/include/jemalloc/internal/background_thread_externs.h index 8b4b8471a9..0f997e18be 100644 --- a/deps/jemalloc/include/jemalloc/internal/background_thread_externs.h +++ b/deps/jemalloc/include/jemalloc/internal/background_thread_externs.h @@ -2,11 +2,12 @@ #define JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H extern bool opt_background_thread; +extern size_t opt_max_background_threads; extern malloc_mutex_t background_thread_lock; extern atomic_b_t background_thread_enabled_state; extern size_t n_background_threads; +extern size_t max_background_threads; extern background_thread_info_t *background_thread_info; -extern bool can_enable_background_thread; bool background_thread_create(tsd_t *tsd, unsigned arena_ind); bool background_threads_enable(tsd_t *tsd); diff --git a/deps/jemalloc/include/jemalloc/internal/background_thread_inlines.h b/deps/jemalloc/include/jemalloc/internal/background_thread_inlines.h index ef50231e8d..f85e86fa37 100644 --- a/deps/jemalloc/include/jemalloc/internal/background_thread_inlines.h +++ b/deps/jemalloc/include/jemalloc/internal/background_thread_inlines.h @@ -15,7 +15,12 @@ background_thread_enabled_set(tsdn_t *tsdn, bool state) { JEMALLOC_ALWAYS_INLINE background_thread_info_t * arena_background_thread_info_get(arena_t *arena) { unsigned arena_ind = arena_ind_get(arena); - return &background_thread_info[arena_ind % ncpus]; + return &background_thread_info[arena_ind % max_background_threads]; +} + +JEMALLOC_ALWAYS_INLINE background_thread_info_t * +background_thread_info_get(size_t ind) { + return &background_thread_info[ind % max_background_threads]; } JEMALLOC_ALWAYS_INLINE uint64_t diff --git a/deps/jemalloc/include/jemalloc/internal/background_thread_structs.h b/deps/jemalloc/include/jemalloc/internal/background_thread_structs.h index e69a7d022b..c02aa434c7 100644 --- a/deps/jemalloc/include/jemalloc/internal/background_thread_structs.h +++ b/deps/jemalloc/include/jemalloc/internal/background_thread_structs.h @@ -8,6 +8,8 @@ #endif #define BACKGROUND_THREAD_INDEFINITE_SLEEP UINT64_MAX +#define MAX_BACKGROUND_THREAD_LIMIT MALLOCX_ARENA_LIMIT +#define DEFAULT_NUM_BACKGROUND_THREAD 4 typedef enum { background_thread_stopped, diff --git a/deps/jemalloc/include/jemalloc/internal/base_externs.h b/deps/jemalloc/include/jemalloc/internal/base_externs.h index a4fd5ac7d9..7b705c9b4d 100644 --- a/deps/jemalloc/include/jemalloc/internal/base_externs.h +++ b/deps/jemalloc/include/jemalloc/internal/base_externs.h @@ -1,6 +1,9 @@ #ifndef JEMALLOC_INTERNAL_BASE_EXTERNS_H #define JEMALLOC_INTERNAL_BASE_EXTERNS_H +extern metadata_thp_mode_t opt_metadata_thp; +extern const char *metadata_thp_mode_names[]; + base_t *b0get(void); base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks); void base_delete(tsdn_t *tsdn, base_t *base); @@ -10,7 +13,7 @@ extent_hooks_t *base_extent_hooks_set(base_t *base, void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment); extent_t *base_alloc_extent(tsdn_t *tsdn, base_t *base); void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, - size_t *resident, size_t *mapped); + size_t *resident, size_t *mapped, size_t *n_thp); void base_prefork(tsdn_t *tsdn, base_t *base); void base_postfork_parent(tsdn_t *tsdn, base_t *base); void base_postfork_child(tsdn_t *tsdn, base_t *base); diff --git a/deps/jemalloc/include/jemalloc/internal/base_inlines.h b/deps/jemalloc/include/jemalloc/internal/base_inlines.h index 931560bfae..aec0e2e1e1 100644 --- a/deps/jemalloc/include/jemalloc/internal/base_inlines.h +++ b/deps/jemalloc/include/jemalloc/internal/base_inlines.h @@ -6,4 +6,8 @@ base_ind_get(const base_t *base) { return base->ind; } +static inline bool +metadata_thp_enabled(void) { + return (opt_metadata_thp != metadata_thp_disabled); +} #endif /* JEMALLOC_INTERNAL_BASE_INLINES_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/base_structs.h b/deps/jemalloc/include/jemalloc/internal/base_structs.h index 18e227bd5a..07f214eb2f 100644 --- a/deps/jemalloc/include/jemalloc/internal/base_structs.h +++ b/deps/jemalloc/include/jemalloc/internal/base_structs.h @@ -3,7 +3,7 @@ #include "jemalloc/internal/jemalloc_internal_types.h" #include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/sc.h" /* Embedded at the beginning of every block of base-managed virtual memory. */ struct base_block_s { @@ -30,6 +30,8 @@ struct base_s { /* Protects base_alloc() and base_stats_get() operations. */ malloc_mutex_t mtx; + /* Using THP when true (metadata_thp auto mode). */ + bool auto_thp_switched; /* * Most recent size class in the series of increasingly large base * extents. Logarithmic spacing between subsequent allocations ensures @@ -44,12 +46,14 @@ struct base_s { base_block_t *blocks; /* Heap of extents that track unused trailing space within blocks. */ - extent_heap_t avail[NSIZES]; + extent_heap_t avail[SC_NSIZES]; /* Stats, only maintained if config_stats. */ size_t allocated; size_t resident; size_t mapped; + /* Number of THP regions touched. */ + size_t n_thp; }; #endif /* JEMALLOC_INTERNAL_BASE_STRUCTS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/base_types.h b/deps/jemalloc/include/jemalloc/internal/base_types.h index be7ee82589..b6db77df7c 100644 --- a/deps/jemalloc/include/jemalloc/internal/base_types.h +++ b/deps/jemalloc/include/jemalloc/internal/base_types.h @@ -4,4 +4,30 @@ typedef struct base_block_s base_block_t; typedef struct base_s base_t; +#define METADATA_THP_DEFAULT metadata_thp_disabled + +/* + * In auto mode, arenas switch to huge pages for the base allocator on the + * second base block. a0 switches to thp on the 5th block (after 20 megabytes + * of metadata), since more metadata (e.g. rtree nodes) come from a0's base. + */ + +#define BASE_AUTO_THP_THRESHOLD 2 +#define BASE_AUTO_THP_THRESHOLD_A0 5 + +typedef enum { + metadata_thp_disabled = 0, + /* + * Lazily enable hugepage for metadata. To avoid high RSS caused by THP + * + low usage arena (i.e. THP becomes a significant percentage), the + * "auto" option only starts using THP after a base allocator used up + * the first THP region. Starting from the second hugepage (in a single + * arena), "auto" behaves the same as "always", i.e. madvise hugepage + * right away. + */ + metadata_thp_auto = 1, + metadata_thp_always = 2, + metadata_thp_mode_limit = 3 +} metadata_thp_mode_t; + #endif /* JEMALLOC_INTERNAL_BASE_TYPES_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/bin.h b/deps/jemalloc/include/jemalloc/internal/bin.h new file mode 100644 index 0000000000..8547e89309 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/bin.h @@ -0,0 +1,123 @@ +#ifndef JEMALLOC_INTERNAL_BIN_H +#define JEMALLOC_INTERNAL_BIN_H + +#include "jemalloc/internal/bin_stats.h" +#include "jemalloc/internal/bin_types.h" +#include "jemalloc/internal/extent_types.h" +#include "jemalloc/internal/extent_structs.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/sc.h" + +/* + * A bin contains a set of extents that are currently being used for slab + * allocations. + */ + +/* + * Read-only information associated with each element of arena_t's bins array + * is stored separately, partly to reduce memory usage (only one copy, rather + * than one per arena), but mainly to avoid false cacheline sharing. + * + * Each slab has the following layout: + * + * /--------------------\ + * | region 0 | + * |--------------------| + * | region 1 | + * |--------------------| + * | ... | + * | ... | + * | ... | + * |--------------------| + * | region nregs-1 | + * \--------------------/ + */ +typedef struct bin_info_s bin_info_t; +struct bin_info_s { + /* Size of regions in a slab for this bin's size class. */ + size_t reg_size; + + /* Total size of a slab for this bin's size class. */ + size_t slab_size; + + /* Total number of regions in a slab for this bin's size class. */ + uint32_t nregs; + + /* Number of sharded bins in each arena for this size class. */ + uint32_t n_shards; + + /* + * Metadata used to manipulate bitmaps for slabs associated with this + * bin. + */ + bitmap_info_t bitmap_info; +}; + +extern bin_info_t bin_infos[SC_NBINS]; + +typedef struct bin_s bin_t; +struct bin_s { + /* All operations on bin_t fields require lock ownership. */ + malloc_mutex_t lock; + + /* + * Current slab being used to service allocations of this bin's size + * class. slabcur is independent of slabs_{nonfull,full}; whenever + * slabcur is reassigned, the previous slab must be deallocated or + * inserted into slabs_{nonfull,full}. + */ + extent_t *slabcur; + + /* + * Heap of non-full slabs. This heap is used to assure that new + * allocations come from the non-full slab that is oldest/lowest in + * memory. + */ + extent_heap_t slabs_nonfull; + + /* List used to track full slabs. */ + extent_list_t slabs_full; + + /* Bin statistics. */ + bin_stats_t stats; +}; + +/* A set of sharded bins of the same size class. */ +typedef struct bins_s bins_t; +struct bins_s { + /* Sharded bins. Dynamically sized. */ + bin_t *bin_shards; +}; + +void bin_shard_sizes_boot(unsigned bin_shards[SC_NBINS]); +bool bin_update_shard_size(unsigned bin_shards[SC_NBINS], size_t start_size, + size_t end_size, size_t nshards); +void bin_boot(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]); + +/* Initializes a bin to empty. Returns true on error. */ +bool bin_init(bin_t *bin); + +/* Forking. */ +void bin_prefork(tsdn_t *tsdn, bin_t *bin); +void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin); +void bin_postfork_child(tsdn_t *tsdn, bin_t *bin); + +/* Stats. */ +static inline void +bin_stats_merge(tsdn_t *tsdn, bin_stats_t *dst_bin_stats, bin_t *bin) { + malloc_mutex_lock(tsdn, &bin->lock); + malloc_mutex_prof_accum(tsdn, &dst_bin_stats->mutex_data, &bin->lock); + dst_bin_stats->nmalloc += bin->stats.nmalloc; + dst_bin_stats->ndalloc += bin->stats.ndalloc; + dst_bin_stats->nrequests += bin->stats.nrequests; + dst_bin_stats->curregs += bin->stats.curregs; + dst_bin_stats->nfills += bin->stats.nfills; + dst_bin_stats->nflushes += bin->stats.nflushes; + dst_bin_stats->nslabs += bin->stats.nslabs; + dst_bin_stats->reslabs += bin->stats.reslabs; + dst_bin_stats->curslabs += bin->stats.curslabs; + dst_bin_stats->nonfull_slabs += bin->stats.nonfull_slabs; + malloc_mutex_unlock(tsdn, &bin->lock); +} + +#endif /* JEMALLOC_INTERNAL_BIN_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/bin_stats.h b/deps/jemalloc/include/jemalloc/internal/bin_stats.h new file mode 100644 index 0000000000..d04519c824 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/bin_stats.h @@ -0,0 +1,54 @@ +#ifndef JEMALLOC_INTERNAL_BIN_STATS_H +#define JEMALLOC_INTERNAL_BIN_STATS_H + +#include "jemalloc/internal/mutex_prof.h" + +typedef struct bin_stats_s bin_stats_t; +struct bin_stats_s { + /* + * Total number of allocation/deallocation requests served directly by + * the bin. Note that tcache may allocate an object, then recycle it + * many times, resulting many increments to nrequests, but only one + * each to nmalloc and ndalloc. + */ + uint64_t nmalloc; + uint64_t ndalloc; + + /* + * Number of allocation requests that correspond to the size of this + * bin. This includes requests served by tcache, though tcache only + * periodically merges into this counter. + */ + uint64_t nrequests; + + /* + * Current number of regions of this size class, including regions + * currently cached by tcache. + */ + size_t curregs; + + /* Number of tcache fills from this bin. */ + uint64_t nfills; + + /* Number of tcache flushes to this bin. */ + uint64_t nflushes; + + /* Total number of slabs created for this bin's size class. */ + uint64_t nslabs; + + /* + * Total number of slabs reused by extracting them from the slabs heap + * for this bin's size class. + */ + uint64_t reslabs; + + /* Current number of slabs in this bin. */ + size_t curslabs; + + /* Current size of nonfull slabs heap in this bin. */ + size_t nonfull_slabs; + + mutex_prof_data_t mutex_data; +}; + +#endif /* JEMALLOC_INTERNAL_BIN_STATS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/bin_types.h b/deps/jemalloc/include/jemalloc/internal/bin_types.h new file mode 100644 index 0000000000..3533606b90 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/bin_types.h @@ -0,0 +1,17 @@ +#ifndef JEMALLOC_INTERNAL_BIN_TYPES_H +#define JEMALLOC_INTERNAL_BIN_TYPES_H + +#include "jemalloc/internal/sc.h" + +#define BIN_SHARDS_MAX (1 << EXTENT_BITS_BINSHARD_WIDTH) +#define N_BIN_SHARDS_DEFAULT 1 + +/* Used in TSD static initializer only. Real init in arena_bind(). */ +#define TSD_BINSHARDS_ZERO_INITIALIZER {{UINT8_MAX}} + +typedef struct tsd_binshards_s tsd_binshards_t; +struct tsd_binshards_s { + uint8_t binshard[SC_NBINS]; +}; + +#endif /* JEMALLOC_INTERNAL_BIN_TYPES_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/bit_util.h b/deps/jemalloc/include/jemalloc/internal/bit_util.h index 8d078a8a35..c045eb8687 100644 --- a/deps/jemalloc/include/jemalloc/internal/bit_util.h +++ b/deps/jemalloc/include/jemalloc/internal/bit_util.h @@ -27,6 +27,25 @@ ffs_u(unsigned bitmap) { return JEMALLOC_INTERNAL_FFS(bitmap); } +#ifdef JEMALLOC_INTERNAL_POPCOUNTL +BIT_UTIL_INLINE unsigned +popcount_lu(unsigned long bitmap) { + return JEMALLOC_INTERNAL_POPCOUNTL(bitmap); +} +#endif + +/* + * Clears first unset bit in bitmap, and returns + * place of bit. bitmap *must not* be 0. + */ + +BIT_UTIL_INLINE size_t +cfs_lu(unsigned long* bitmap) { + size_t bit = ffs_lu(*bitmap) - 1; + *bitmap ^= ZU(1) << bit; + return bit; +} + BIT_UTIL_INLINE unsigned ffs_zu(size_t bitmap) { #if LG_SIZEOF_PTR == LG_SIZEOF_INT @@ -63,6 +82,22 @@ ffs_u32(uint32_t bitmap) { BIT_UTIL_INLINE uint64_t pow2_ceil_u64(uint64_t x) { +#if (defined(__amd64__) || defined(__x86_64__) || defined(JEMALLOC_HAVE_BUILTIN_CLZ)) + if(unlikely(x <= 1)) { + return x; + } + size_t msb_on_index; +#if (defined(__amd64__) || defined(__x86_64__)) + asm ("bsrq %1, %0" + : "=r"(msb_on_index) // Outputs. + : "r"(x-1) // Inputs. + ); +#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ)) + msb_on_index = (63 ^ __builtin_clzll(x - 1)); +#endif + assert(msb_on_index < 63); + return 1ULL << (msb_on_index + 1); +#else x--; x |= x >> 1; x |= x >> 2; @@ -72,10 +107,27 @@ pow2_ceil_u64(uint64_t x) { x |= x >> 32; x++; return x; +#endif } BIT_UTIL_INLINE uint32_t pow2_ceil_u32(uint32_t x) { +#if ((defined(__i386__) || defined(JEMALLOC_HAVE_BUILTIN_CLZ)) && (!defined(__s390__))) + if(unlikely(x <= 1)) { + return x; + } + size_t msb_on_index; +#if (defined(__i386__)) + asm ("bsr %1, %0" + : "=r"(msb_on_index) // Outputs. + : "r"(x-1) // Inputs. + ); +#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ)) + msb_on_index = (31 ^ __builtin_clz(x - 1)); +#endif + assert(msb_on_index < 31); + return 1U << (msb_on_index + 1); +#else x--; x |= x >> 1; x |= x >> 2; @@ -84,6 +136,7 @@ pow2_ceil_u32(uint32_t x) { x |= x >> 16; x++; return x; +#endif } /* Compute the smallest power of 2 that is >= x. */ @@ -160,6 +213,27 @@ lg_floor(size_t x) { } #endif +BIT_UTIL_INLINE unsigned +lg_ceil(size_t x) { + return lg_floor(x) + ((x & (x - 1)) == 0 ? 0 : 1); +} + #undef BIT_UTIL_INLINE +/* A compile-time version of lg_floor and lg_ceil. */ +#define LG_FLOOR_1(x) 0 +#define LG_FLOOR_2(x) (x < (1ULL << 1) ? LG_FLOOR_1(x) : 1 + LG_FLOOR_1(x >> 1)) +#define LG_FLOOR_4(x) (x < (1ULL << 2) ? LG_FLOOR_2(x) : 2 + LG_FLOOR_2(x >> 2)) +#define LG_FLOOR_8(x) (x < (1ULL << 4) ? LG_FLOOR_4(x) : 4 + LG_FLOOR_4(x >> 4)) +#define LG_FLOOR_16(x) (x < (1ULL << 8) ? LG_FLOOR_8(x) : 8 + LG_FLOOR_8(x >> 8)) +#define LG_FLOOR_32(x) (x < (1ULL << 16) ? LG_FLOOR_16(x) : 16 + LG_FLOOR_16(x >> 16)) +#define LG_FLOOR_64(x) (x < (1ULL << 32) ? LG_FLOOR_32(x) : 32 + LG_FLOOR_32(x >> 32)) +#if LG_SIZEOF_PTR == 2 +# define LG_FLOOR(x) LG_FLOOR_32((x)) +#else +# define LG_FLOOR(x) LG_FLOOR_64((x)) +#endif + +#define LG_CEIL(x) (LG_FLOOR(x) + (((x) & ((x) - 1)) == 0 ? 0 : 1)) + #endif /* JEMALLOC_INTERNAL_BIT_UTIL_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/bitmap.h b/deps/jemalloc/include/jemalloc/internal/bitmap.h index ac990290a5..c3f9cb490f 100644 --- a/deps/jemalloc/include/jemalloc/internal/bitmap.h +++ b/deps/jemalloc/include/jemalloc/internal/bitmap.h @@ -3,18 +3,18 @@ #include "jemalloc/internal/arena_types.h" #include "jemalloc/internal/bit_util.h" -#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/sc.h" typedef unsigned long bitmap_t; #define LG_SIZEOF_BITMAP LG_SIZEOF_LONG /* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */ -#if LG_SLAB_MAXREGS > LG_CEIL_NSIZES +#if LG_SLAB_MAXREGS > LG_CEIL(SC_NSIZES) /* Maximum bitmap bit count is determined by maximum regions per slab. */ # define LG_BITMAP_MAXBITS LG_SLAB_MAXREGS #else /* Maximum bitmap bit count is determined by number of extent size classes. */ -# define LG_BITMAP_MAXBITS LG_CEIL_NSIZES +# define LG_BITMAP_MAXBITS LG_CEIL(SC_NSIZES) #endif #define BITMAP_MAXBITS (ZU(1) << LG_BITMAP_MAXBITS) diff --git a/deps/jemalloc/include/jemalloc/internal/cache_bin.h b/deps/jemalloc/include/jemalloc/internal/cache_bin.h new file mode 100644 index 0000000000..d14556a3da --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/cache_bin.h @@ -0,0 +1,131 @@ +#ifndef JEMALLOC_INTERNAL_CACHE_BIN_H +#define JEMALLOC_INTERNAL_CACHE_BIN_H + +#include "jemalloc/internal/ql.h" + +/* + * The cache_bins are the mechanism that the tcache and the arena use to + * communicate. The tcache fills from and flushes to the arena by passing a + * cache_bin_t to fill/flush. When the arena needs to pull stats from the + * tcaches associated with it, it does so by iterating over its + * cache_bin_array_descriptor_t objects and reading out per-bin stats it + * contains. This makes it so that the arena need not know about the existence + * of the tcache at all. + */ + + +/* + * The count of the number of cached allocations in a bin. We make this signed + * so that negative numbers can encode "invalid" states (e.g. a low water mark + * of -1 for a cache that has been depleted). + */ +typedef int32_t cache_bin_sz_t; + +typedef struct cache_bin_stats_s cache_bin_stats_t; +struct cache_bin_stats_s { + /* + * Number of allocation requests that corresponded to the size of this + * bin. + */ + uint64_t nrequests; +}; + +/* + * Read-only information associated with each element of tcache_t's tbins array + * is stored separately, mainly to reduce memory usage. + */ +typedef struct cache_bin_info_s cache_bin_info_t; +struct cache_bin_info_s { + /* Upper limit on ncached. */ + cache_bin_sz_t ncached_max; +}; + +typedef struct cache_bin_s cache_bin_t; +struct cache_bin_s { + /* Min # cached since last GC. */ + cache_bin_sz_t low_water; + /* # of cached objects. */ + cache_bin_sz_t ncached; + /* + * ncached and stats are both modified frequently. Let's keep them + * close so that they have a higher chance of being on the same + * cacheline, thus less write-backs. + */ + cache_bin_stats_t tstats; + /* + * Stack of available objects. + * + * To make use of adjacent cacheline prefetch, the items in the avail + * stack goes to higher address for newer allocations. avail points + * just above the available space, which means that + * avail[-ncached, ... -1] are available items and the lowest item will + * be allocated first. + */ + void **avail; +}; + +typedef struct cache_bin_array_descriptor_s cache_bin_array_descriptor_t; +struct cache_bin_array_descriptor_s { + /* + * The arena keeps a list of the cache bins associated with it, for + * stats collection. + */ + ql_elm(cache_bin_array_descriptor_t) link; + /* Pointers to the tcache bins. */ + cache_bin_t *bins_small; + cache_bin_t *bins_large; +}; + +static inline void +cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor, + cache_bin_t *bins_small, cache_bin_t *bins_large) { + ql_elm_new(descriptor, link); + descriptor->bins_small = bins_small; + descriptor->bins_large = bins_large; +} + +JEMALLOC_ALWAYS_INLINE void * +cache_bin_alloc_easy(cache_bin_t *bin, bool *success) { + void *ret; + + bin->ncached--; + + /* + * Check for both bin->ncached == 0 and ncached < low_water + * in a single branch. + */ + if (unlikely(bin->ncached <= bin->low_water)) { + bin->low_water = bin->ncached; + if (bin->ncached == -1) { + bin->ncached = 0; + *success = false; + return NULL; + } + } + + /* + * success (instead of ret) should be checked upon the return of this + * function. We avoid checking (ret == NULL) because there is never a + * null stored on the avail stack (which is unknown to the compiler), + * and eagerly checking ret would cause pipeline stall (waiting for the + * cacheline). + */ + *success = true; + ret = *(bin->avail - (bin->ncached + 1)); + + return ret; +} + +JEMALLOC_ALWAYS_INLINE bool +cache_bin_dalloc_easy(cache_bin_t *bin, cache_bin_info_t *bin_info, void *ptr) { + if (unlikely(bin->ncached == bin_info->ncached_max)) { + return false; + } + assert(bin->ncached < bin_info->ncached_max); + bin->ncached++; + *(bin->avail - bin->ncached) = ptr; + + return true; +} + +#endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/ctl.h b/deps/jemalloc/include/jemalloc/internal/ctl.h index a91c4cf556..1d1aacc6f4 100644 --- a/deps/jemalloc/include/jemalloc/internal/ctl.h +++ b/deps/jemalloc/include/jemalloc/internal/ctl.h @@ -5,7 +5,7 @@ #include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/mutex_prof.h" #include "jemalloc/internal/ql.h" -#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/sc.h" #include "jemalloc/internal/stats.h" /* Maximum ctl tree depth. */ @@ -39,15 +39,19 @@ typedef struct ctl_arena_stats_s { uint64_t nmalloc_small; uint64_t ndalloc_small; uint64_t nrequests_small; + uint64_t nfills_small; + uint64_t nflushes_small; - malloc_bin_stats_t bstats[NBINS]; - malloc_large_stats_t lstats[NSIZES - NBINS]; + bin_stats_t bstats[SC_NBINS]; + arena_stats_large_t lstats[SC_NSIZES - SC_NBINS]; + arena_stats_extents_t estats[SC_NPSIZES]; } ctl_arena_stats_t; typedef struct ctl_stats_s { size_t allocated; size_t active; size_t metadata; + size_t metadata_thp; size_t resident; size_t mapped; size_t retained; diff --git a/deps/jemalloc/include/jemalloc/internal/div.h b/deps/jemalloc/include/jemalloc/internal/div.h new file mode 100644 index 0000000000..aebae9398c --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/div.h @@ -0,0 +1,41 @@ +#ifndef JEMALLOC_INTERNAL_DIV_H +#define JEMALLOC_INTERNAL_DIV_H + +#include "jemalloc/internal/assert.h" + +/* + * This module does the division that computes the index of a region in a slab, + * given its offset relative to the base. + * That is, given a divisor d, an n = i * d (all integers), we'll return i. + * We do some pre-computation to do this more quickly than a CPU division + * instruction. + * We bound n < 2^32, and don't support dividing by one. + */ + +typedef struct div_info_s div_info_t; +struct div_info_s { + uint32_t magic; +#ifdef JEMALLOC_DEBUG + size_t d; +#endif +}; + +void div_init(div_info_t *div_info, size_t divisor); + +static inline size_t +div_compute(div_info_t *div_info, size_t n) { + assert(n <= (uint32_t)-1); + /* + * This generates, e.g. mov; imul; shr on x86-64. On a 32-bit machine, + * the compilers I tried were all smart enough to turn this into the + * appropriate "get the high 32 bits of the result of a multiply" (e.g. + * mul; mov edx eax; on x86, umull on arm, etc.). + */ + size_t i = ((uint64_t)n * (uint64_t)div_info->magic) >> 32; +#ifdef JEMALLOC_DEBUG + assert(i * div_info->d == n); +#endif + return i; +} + +#endif /* JEMALLOC_INTERNAL_DIV_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/emitter.h b/deps/jemalloc/include/jemalloc/internal/emitter.h new file mode 100644 index 0000000000..542bc79c36 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/emitter.h @@ -0,0 +1,486 @@ +#ifndef JEMALLOC_INTERNAL_EMITTER_H +#define JEMALLOC_INTERNAL_EMITTER_H + +#include "jemalloc/internal/ql.h" + +typedef enum emitter_output_e emitter_output_t; +enum emitter_output_e { + emitter_output_json, + emitter_output_table +}; + +typedef enum emitter_justify_e emitter_justify_t; +enum emitter_justify_e { + emitter_justify_left, + emitter_justify_right, + /* Not for users; just to pass to internal functions. */ + emitter_justify_none +}; + +typedef enum emitter_type_e emitter_type_t; +enum emitter_type_e { + emitter_type_bool, + emitter_type_int, + emitter_type_unsigned, + emitter_type_uint32, + emitter_type_uint64, + emitter_type_size, + emitter_type_ssize, + emitter_type_string, + /* + * A title is a column title in a table; it's just a string, but it's + * not quoted. + */ + emitter_type_title, +}; + +typedef struct emitter_col_s emitter_col_t; +struct emitter_col_s { + /* Filled in by the user. */ + emitter_justify_t justify; + int width; + emitter_type_t type; + union { + bool bool_val; + int int_val; + unsigned unsigned_val; + uint32_t uint32_val; + uint32_t uint32_t_val; + uint64_t uint64_val; + uint64_t uint64_t_val; + size_t size_val; + ssize_t ssize_val; + const char *str_val; + }; + + /* Filled in by initialization. */ + ql_elm(emitter_col_t) link; +}; + +typedef struct emitter_row_s emitter_row_t; +struct emitter_row_s { + ql_head(emitter_col_t) cols; +}; + +typedef struct emitter_s emitter_t; +struct emitter_s { + emitter_output_t output; + /* The output information. */ + void (*write_cb)(void *, const char *); + void *cbopaque; + int nesting_depth; + /* True if we've already emitted a value at the given depth. */ + bool item_at_depth; + /* True if we emitted a key and will emit corresponding value next. */ + bool emitted_key; +}; + +/* Internal convenience function. Write to the emitter the given string. */ +JEMALLOC_FORMAT_PRINTF(2, 3) +static inline void +emitter_printf(emitter_t *emitter, const char *format, ...) { + va_list ap; + + va_start(ap, format); + malloc_vcprintf(emitter->write_cb, emitter->cbopaque, format, ap); + va_end(ap); +} + +static inline const char * JEMALLOC_FORMAT_ARG(3) +emitter_gen_fmt(char *out_fmt, size_t out_size, const char *fmt_specifier, + emitter_justify_t justify, int width) { + size_t written; + fmt_specifier++; + if (justify == emitter_justify_none) { + written = malloc_snprintf(out_fmt, out_size, + "%%%s", fmt_specifier); + } else if (justify == emitter_justify_left) { + written = malloc_snprintf(out_fmt, out_size, + "%%-%d%s", width, fmt_specifier); + } else { + written = malloc_snprintf(out_fmt, out_size, + "%%%d%s", width, fmt_specifier); + } + /* Only happens in case of bad format string, which *we* choose. */ + assert(written < out_size); + return out_fmt; +} + +/* + * Internal. Emit the given value type in the relevant encoding (so that the + * bool true gets mapped to json "true", but the string "true" gets mapped to + * json "\"true\"", for instance. + * + * Width is ignored if justify is emitter_justify_none. + */ +static inline void +emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width, + emitter_type_t value_type, const void *value) { + size_t str_written; +#define BUF_SIZE 256 +#define FMT_SIZE 10 + /* + * We dynamically generate a format string to emit, to let us use the + * snprintf machinery. This is kinda hacky, but gets the job done + * quickly without having to think about the various snprintf edge + * cases. + */ + char fmt[FMT_SIZE]; + char buf[BUF_SIZE]; + +#define EMIT_SIMPLE(type, format) \ + emitter_printf(emitter, \ + emitter_gen_fmt(fmt, FMT_SIZE, format, justify, width), \ + *(const type *)value); + + switch (value_type) { + case emitter_type_bool: + emitter_printf(emitter, + emitter_gen_fmt(fmt, FMT_SIZE, "%s", justify, width), + *(const bool *)value ? "true" : "false"); + break; + case emitter_type_int: + EMIT_SIMPLE(int, "%d") + break; + case emitter_type_unsigned: + EMIT_SIMPLE(unsigned, "%u") + break; + case emitter_type_ssize: + EMIT_SIMPLE(ssize_t, "%zd") + break; + case emitter_type_size: + EMIT_SIMPLE(size_t, "%zu") + break; + case emitter_type_string: + str_written = malloc_snprintf(buf, BUF_SIZE, "\"%s\"", + *(const char *const *)value); + /* + * We control the strings we output; we shouldn't get anything + * anywhere near the fmt size. + */ + assert(str_written < BUF_SIZE); + emitter_printf(emitter, + emitter_gen_fmt(fmt, FMT_SIZE, "%s", justify, width), buf); + break; + case emitter_type_uint32: + EMIT_SIMPLE(uint32_t, "%" FMTu32) + break; + case emitter_type_uint64: + EMIT_SIMPLE(uint64_t, "%" FMTu64) + break; + case emitter_type_title: + EMIT_SIMPLE(char *const, "%s"); + break; + default: + unreachable(); + } +#undef BUF_SIZE +#undef FMT_SIZE +} + + +/* Internal functions. In json mode, tracks nesting state. */ +static inline void +emitter_nest_inc(emitter_t *emitter) { + emitter->nesting_depth++; + emitter->item_at_depth = false; +} + +static inline void +emitter_nest_dec(emitter_t *emitter) { + emitter->nesting_depth--; + emitter->item_at_depth = true; +} + +static inline void +emitter_indent(emitter_t *emitter) { + int amount = emitter->nesting_depth; + const char *indent_str; + if (emitter->output == emitter_output_json) { + indent_str = "\t"; + } else { + amount *= 2; + indent_str = " "; + } + for (int i = 0; i < amount; i++) { + emitter_printf(emitter, "%s", indent_str); + } +} + +static inline void +emitter_json_key_prefix(emitter_t *emitter) { + if (emitter->emitted_key) { + emitter->emitted_key = false; + return; + } + emitter_printf(emitter, "%s\n", emitter->item_at_depth ? "," : ""); + emitter_indent(emitter); +} + +/******************************************************************************/ +/* Public functions for emitter_t. */ + +static inline void +emitter_init(emitter_t *emitter, emitter_output_t emitter_output, + void (*write_cb)(void *, const char *), void *cbopaque) { + emitter->output = emitter_output; + emitter->write_cb = write_cb; + emitter->cbopaque = cbopaque; + emitter->item_at_depth = false; + emitter->emitted_key = false; + emitter->nesting_depth = 0; +} + +/******************************************************************************/ +/* JSON public API. */ + +/* + * Emits a key (e.g. as appears in an object). The next json entity emitted will + * be the corresponding value. + */ +static inline void +emitter_json_key(emitter_t *emitter, const char *json_key) { + if (emitter->output == emitter_output_json) { + emitter_json_key_prefix(emitter); + emitter_printf(emitter, "\"%s\": ", json_key); + emitter->emitted_key = true; + } +} + +static inline void +emitter_json_value(emitter_t *emitter, emitter_type_t value_type, + const void *value) { + if (emitter->output == emitter_output_json) { + emitter_json_key_prefix(emitter); + emitter_print_value(emitter, emitter_justify_none, -1, + value_type, value); + emitter->item_at_depth = true; + } +} + +/* Shorthand for calling emitter_json_key and then emitter_json_value. */ +static inline void +emitter_json_kv(emitter_t *emitter, const char *json_key, + emitter_type_t value_type, const void *value) { + emitter_json_key(emitter, json_key); + emitter_json_value(emitter, value_type, value); +} + +static inline void +emitter_json_array_begin(emitter_t *emitter) { + if (emitter->output == emitter_output_json) { + emitter_json_key_prefix(emitter); + emitter_printf(emitter, "["); + emitter_nest_inc(emitter); + } +} + +/* Shorthand for calling emitter_json_key and then emitter_json_array_begin. */ +static inline void +emitter_json_array_kv_begin(emitter_t *emitter, const char *json_key) { + emitter_json_key(emitter, json_key); + emitter_json_array_begin(emitter); +} + +static inline void +emitter_json_array_end(emitter_t *emitter) { + if (emitter->output == emitter_output_json) { + assert(emitter->nesting_depth > 0); + emitter_nest_dec(emitter); + emitter_printf(emitter, "\n"); + emitter_indent(emitter); + emitter_printf(emitter, "]"); + } +} + +static inline void +emitter_json_object_begin(emitter_t *emitter) { + if (emitter->output == emitter_output_json) { + emitter_json_key_prefix(emitter); + emitter_printf(emitter, "{"); + emitter_nest_inc(emitter); + } +} + +/* Shorthand for calling emitter_json_key and then emitter_json_object_begin. */ +static inline void +emitter_json_object_kv_begin(emitter_t *emitter, const char *json_key) { + emitter_json_key(emitter, json_key); + emitter_json_object_begin(emitter); +} + +static inline void +emitter_json_object_end(emitter_t *emitter) { + if (emitter->output == emitter_output_json) { + assert(emitter->nesting_depth > 0); + emitter_nest_dec(emitter); + emitter_printf(emitter, "\n"); + emitter_indent(emitter); + emitter_printf(emitter, "}"); + } +} + + +/******************************************************************************/ +/* Table public API. */ + +static inline void +emitter_table_dict_begin(emitter_t *emitter, const char *table_key) { + if (emitter->output == emitter_output_table) { + emitter_indent(emitter); + emitter_printf(emitter, "%s\n", table_key); + emitter_nest_inc(emitter); + } +} + +static inline void +emitter_table_dict_end(emitter_t *emitter) { + if (emitter->output == emitter_output_table) { + emitter_nest_dec(emitter); + } +} + +static inline void +emitter_table_kv_note(emitter_t *emitter, const char *table_key, + emitter_type_t value_type, const void *value, + const char *table_note_key, emitter_type_t table_note_value_type, + const void *table_note_value) { + if (emitter->output == emitter_output_table) { + emitter_indent(emitter); + emitter_printf(emitter, "%s: ", table_key); + emitter_print_value(emitter, emitter_justify_none, -1, + value_type, value); + if (table_note_key != NULL) { + emitter_printf(emitter, " (%s: ", table_note_key); + emitter_print_value(emitter, emitter_justify_none, -1, + table_note_value_type, table_note_value); + emitter_printf(emitter, ")"); + } + emitter_printf(emitter, "\n"); + } + emitter->item_at_depth = true; +} + +static inline void +emitter_table_kv(emitter_t *emitter, const char *table_key, + emitter_type_t value_type, const void *value) { + emitter_table_kv_note(emitter, table_key, value_type, value, NULL, + emitter_type_bool, NULL); +} + + +/* Write to the emitter the given string, but only in table mode. */ +JEMALLOC_FORMAT_PRINTF(2, 3) +static inline void +emitter_table_printf(emitter_t *emitter, const char *format, ...) { + if (emitter->output == emitter_output_table) { + va_list ap; + va_start(ap, format); + malloc_vcprintf(emitter->write_cb, emitter->cbopaque, format, ap); + va_end(ap); + } +} + +static inline void +emitter_table_row(emitter_t *emitter, emitter_row_t *row) { + if (emitter->output != emitter_output_table) { + return; + } + emitter_col_t *col; + ql_foreach(col, &row->cols, link) { + emitter_print_value(emitter, col->justify, col->width, + col->type, (const void *)&col->bool_val); + } + emitter_table_printf(emitter, "\n"); +} + +static inline void +emitter_row_init(emitter_row_t *row) { + ql_new(&row->cols); +} + +static inline void +emitter_col_init(emitter_col_t *col, emitter_row_t *row) { + ql_elm_new(col, link); + ql_tail_insert(&row->cols, col, link); +} + + +/******************************************************************************/ +/* + * Generalized public API. Emits using either JSON or table, according to + * settings in the emitter_t. */ + +/* + * Note emits a different kv pair as well, but only in table mode. Omits the + * note if table_note_key is NULL. + */ +static inline void +emitter_kv_note(emitter_t *emitter, const char *json_key, const char *table_key, + emitter_type_t value_type, const void *value, + const char *table_note_key, emitter_type_t table_note_value_type, + const void *table_note_value) { + if (emitter->output == emitter_output_json) { + emitter_json_key(emitter, json_key); + emitter_json_value(emitter, value_type, value); + } else { + emitter_table_kv_note(emitter, table_key, value_type, value, + table_note_key, table_note_value_type, table_note_value); + } + emitter->item_at_depth = true; +} + +static inline void +emitter_kv(emitter_t *emitter, const char *json_key, const char *table_key, + emitter_type_t value_type, const void *value) { + emitter_kv_note(emitter, json_key, table_key, value_type, value, NULL, + emitter_type_bool, NULL); +} + +static inline void +emitter_dict_begin(emitter_t *emitter, const char *json_key, + const char *table_header) { + if (emitter->output == emitter_output_json) { + emitter_json_key(emitter, json_key); + emitter_json_object_begin(emitter); + } else { + emitter_table_dict_begin(emitter, table_header); + } +} + +static inline void +emitter_dict_end(emitter_t *emitter) { + if (emitter->output == emitter_output_json) { + emitter_json_object_end(emitter); + } else { + emitter_table_dict_end(emitter); + } +} + +static inline void +emitter_begin(emitter_t *emitter) { + if (emitter->output == emitter_output_json) { + assert(emitter->nesting_depth == 0); + emitter_printf(emitter, "{"); + emitter_nest_inc(emitter); + } else { + /* + * This guarantees that we always call write_cb at least once. + * This is useful if some invariant is established by each call + * to write_cb, but doesn't hold initially: e.g., some buffer + * holds a null-terminated string. + */ + emitter_printf(emitter, "%s", ""); + } +} + +static inline void +emitter_end(emitter_t *emitter) { + if (emitter->output == emitter_output_json) { + assert(emitter->nesting_depth == 1); + emitter_nest_dec(emitter); + emitter_printf(emitter, "\n}\n"); + } +} + +#endif /* JEMALLOC_INTERNAL_EMITTER_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/extent_externs.h b/deps/jemalloc/include/jemalloc/internal/extent_externs.h index 489a813c80..8aba57633a 100644 --- a/deps/jemalloc/include/jemalloc/internal/extent_externs.h +++ b/deps/jemalloc/include/jemalloc/internal/extent_externs.h @@ -4,12 +4,13 @@ #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/mutex_pool.h" #include "jemalloc/internal/ph.h" -#include "jemalloc/internal/rb.h" #include "jemalloc/internal/rtree.h" -extern rtree_t extents_rtree; -extern const extent_hooks_t extent_hooks_default; -extern mutex_pool_t extent_mutex_pool; +extern size_t opt_lg_extent_max_active_fit; + +extern rtree_t extents_rtree; +extern const extent_hooks_t extent_hooks_default; +extern mutex_pool_t extent_mutex_pool; extent_t *extent_alloc(tsdn_t *tsdn, arena_t *arena); void extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent); @@ -23,13 +24,17 @@ size_t extent_size_quantize_floor(size_t size); size_t extent_size_quantize_ceil(size_t size); #endif -rb_proto(, extent_avail_, extent_tree_t, extent_t) +ph_proto(, extent_avail_, extent_tree_t, extent_t) ph_proto(, extent_heap_, extent_heap_t, extent_t) bool extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state, bool delay_coalesce); extent_state_t extents_state_get(const extents_t *extents); size_t extents_npages_get(extents_t *extents); +/* Get the number of extents in the given page size index. */ +size_t extents_nextents_get(extents_t *extents, pszind_t ind); +/* Get the sum total bytes of the extents in the given page size index. */ +size_t extents_nbytes_get(extents_t *extents, pszind_t ind); extent_t *extents_alloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, extents_t *extents, void *new_addr, size_t size, size_t pad, size_t alignment, bool slab, szind_t szind, @@ -69,4 +74,10 @@ bool extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, bool extent_boot(void); +void extent_util_stats_get(tsdn_t *tsdn, const void *ptr, + size_t *nfree, size_t *nregs, size_t *size); +void extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr, + size_t *nfree, size_t *nregs, size_t *size, + size_t *bin_nfree, size_t *bin_nregs, void **slabcur_addr); + #endif /* JEMALLOC_INTERNAL_EXTENT_EXTERNS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/extent_inlines.h b/deps/jemalloc/include/jemalloc/internal/extent_inlines.h index bb2bd699ed..77fa4c4a29 100644 --- a/deps/jemalloc/include/jemalloc/internal/extent_inlines.h +++ b/deps/jemalloc/include/jemalloc/internal/extent_inlines.h @@ -6,6 +6,7 @@ #include "jemalloc/internal/pages.h" #include "jemalloc/internal/prng.h" #include "jemalloc/internal/ql.h" +#include "jemalloc/internal/sc.h" #include "jemalloc/internal/sz.h" static inline void @@ -34,18 +35,19 @@ extent_unlock2(tsdn_t *tsdn, extent_t *extent1, extent_t *extent2) { (uintptr_t)extent2); } -static inline arena_t * -extent_arena_get(const extent_t *extent) { +static inline unsigned +extent_arena_ind_get(const extent_t *extent) { unsigned arena_ind = (unsigned)((extent->e_bits & EXTENT_BITS_ARENA_MASK) >> EXTENT_BITS_ARENA_SHIFT); - /* - * The following check is omitted because we should never actually read - * a NULL arena pointer. - */ - if (false && arena_ind >= MALLOCX_ARENA_LIMIT) { - return NULL; - } assert(arena_ind < MALLOCX_ARENA_LIMIT); + + return arena_ind; +} + +static inline arena_t * +extent_arena_get(const extent_t *extent) { + unsigned arena_ind = extent_arena_ind_get(extent); + return (arena_t *)atomic_load_p(&arenas[arena_ind], ATOMIC_ACQUIRE); } @@ -53,14 +55,14 @@ static inline szind_t extent_szind_get_maybe_invalid(const extent_t *extent) { szind_t szind = (szind_t)((extent->e_bits & EXTENT_BITS_SZIND_MASK) >> EXTENT_BITS_SZIND_SHIFT); - assert(szind <= NSIZES); + assert(szind <= SC_NSIZES); return szind; } static inline szind_t extent_szind_get(const extent_t *extent) { szind_t szind = extent_szind_get_maybe_invalid(extent); - assert(szind < NSIZES); /* Never call when "invalid". */ + assert(szind < SC_NSIZES); /* Never call when "invalid". */ return szind; } @@ -69,6 +71,14 @@ extent_usize_get(const extent_t *extent) { return sz_index2size(extent_szind_get(extent)); } +static inline unsigned +extent_binshard_get(const extent_t *extent) { + unsigned binshard = (unsigned)((extent->e_bits & + EXTENT_BITS_BINSHARD_MASK) >> EXTENT_BITS_BINSHARD_SHIFT); + assert(binshard < bin_infos[extent_szind_get(extent)].n_shards); + return binshard; +} + static inline size_t extent_sn_get(const extent_t *extent) { return (size_t)((extent->e_bits & EXTENT_BITS_SN_MASK) >> @@ -94,6 +104,12 @@ extent_committed_get(const extent_t *extent) { } static inline bool +extent_dumpable_get(const extent_t *extent) { + return (bool)((extent->e_bits & EXTENT_BITS_DUMPABLE_MASK) >> + EXTENT_BITS_DUMPABLE_SHIFT); +} + +static inline bool extent_slab_get(const extent_t *extent) { return (bool)((extent->e_bits & EXTENT_BITS_SLAB_MASK) >> EXTENT_BITS_SLAB_SHIFT); @@ -170,6 +186,11 @@ extent_prof_tctx_get(const extent_t *extent) { ATOMIC_ACQUIRE); } +static inline nstime_t +extent_prof_alloc_time_get(const extent_t *extent) { + return extent->e_alloc_time; +} + static inline void extent_arena_set(extent_t *extent, arena_t *arena) { unsigned arena_ind = (arena != NULL) ? arena_ind_get(arena) : ((1U << @@ -179,6 +200,14 @@ extent_arena_set(extent_t *extent, arena_t *arena) { } static inline void +extent_binshard_set(extent_t *extent, unsigned binshard) { + /* The assertion assumes szind is set already. */ + assert(binshard < bin_infos[extent_szind_get(extent)].n_shards); + extent->e_bits = (extent->e_bits & ~EXTENT_BITS_BINSHARD_MASK) | + ((uint64_t)binshard << EXTENT_BITS_BINSHARD_SHIFT); +} + +static inline void extent_addr_set(extent_t *extent, void *addr) { extent->e_addr = addr; } @@ -190,9 +219,16 @@ extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment) { if (alignment < PAGE) { unsigned lg_range = LG_PAGE - lg_floor(CACHELINE_CEILING(alignment)); - size_t r = - prng_lg_range_zu(&extent_arena_get(extent)->offset_state, - lg_range, true); + size_t r; + if (!tsdn_null(tsdn)) { + tsd_t *tsd = tsdn_tsd(tsdn); + r = (size_t)prng_lg_range_u64( + tsd_offset_statep_get(tsd), lg_range); + } else { + r = prng_lg_range_zu( + &extent_arena_get(extent)->offset_state, + lg_range, true); + } uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE - lg_range); extent->e_addr = (void *)((uintptr_t)extent->e_addr + @@ -221,7 +257,7 @@ extent_bsize_set(extent_t *extent, size_t bsize) { static inline void extent_szind_set(extent_t *extent, szind_t szind) { - assert(szind <= NSIZES); /* NSIZES means "invalid". */ + assert(szind <= SC_NSIZES); /* SC_NSIZES means "invalid". */ extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SZIND_MASK) | ((uint64_t)szind << EXTENT_BITS_SZIND_SHIFT); } @@ -234,6 +270,16 @@ extent_nfree_set(extent_t *extent, unsigned nfree) { } static inline void +extent_nfree_binshard_set(extent_t *extent, unsigned nfree, unsigned binshard) { + /* The assertion assumes szind is set already. */ + assert(binshard < bin_infos[extent_szind_get(extent)].n_shards); + extent->e_bits = (extent->e_bits & + (~EXTENT_BITS_NFREE_MASK & ~EXTENT_BITS_BINSHARD_MASK)) | + ((uint64_t)binshard << EXTENT_BITS_BINSHARD_SHIFT) | + ((uint64_t)nfree << EXTENT_BITS_NFREE_SHIFT); +} + +static inline void extent_nfree_inc(extent_t *extent) { assert(extent_slab_get(extent)); extent->e_bits += ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT); @@ -246,6 +292,12 @@ extent_nfree_dec(extent_t *extent) { } static inline void +extent_nfree_sub(extent_t *extent, uint64_t n) { + assert(extent_slab_get(extent)); + extent->e_bits -= (n << EXTENT_BITS_NFREE_SHIFT); +} + +static inline void extent_sn_set(extent_t *extent, size_t sn) { extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SN_MASK) | ((uint64_t)sn << EXTENT_BITS_SN_SHIFT); @@ -270,6 +322,12 @@ extent_committed_set(extent_t *extent, bool committed) { } static inline void +extent_dumpable_set(extent_t *extent, bool dumpable) { + extent->e_bits = (extent->e_bits & ~EXTENT_BITS_DUMPABLE_MASK) | + ((uint64_t)dumpable << EXTENT_BITS_DUMPABLE_SHIFT); +} + +static inline void extent_slab_set(extent_t *extent, bool slab) { extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SLAB_MASK) | ((uint64_t)slab << EXTENT_BITS_SLAB_SHIFT); @@ -281,9 +339,34 @@ extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) { } static inline void +extent_prof_alloc_time_set(extent_t *extent, nstime_t t) { + nstime_copy(&extent->e_alloc_time, &t); +} + +static inline bool +extent_is_head_get(extent_t *extent) { + if (maps_coalesce) { + not_reached(); + } + + return (bool)((extent->e_bits & EXTENT_BITS_IS_HEAD_MASK) >> + EXTENT_BITS_IS_HEAD_SHIFT); +} + +static inline void +extent_is_head_set(extent_t *extent, bool is_head) { + if (maps_coalesce) { + not_reached(); + } + + extent->e_bits = (extent->e_bits & ~EXTENT_BITS_IS_HEAD_MASK) | + ((uint64_t)is_head << EXTENT_BITS_IS_HEAD_SHIFT); +} + +static inline void extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size, bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed, - bool committed) { + bool committed, bool dumpable, extent_head_state_t is_head) { assert(addr == PAGE_ADDR2BASE(addr) || !slab); extent_arena_set(extent, arena); @@ -295,7 +378,12 @@ extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size, extent_state_set(extent, state); extent_zeroed_set(extent, zeroed); extent_committed_set(extent, committed); + extent_dumpable_set(extent, dumpable); ql_elm_new(extent, ql_link); + if (!maps_coalesce) { + extent_is_head_set(extent, (is_head == EXTENT_IS_HEAD) ? true : + false); + } if (config_prof) { extent_prof_tctx_set(extent, NULL); } @@ -307,11 +395,12 @@ extent_binit(extent_t *extent, void *addr, size_t bsize, size_t sn) { extent_addr_set(extent, addr); extent_bsize_set(extent, bsize); extent_slab_set(extent, false); - extent_szind_set(extent, NSIZES); + extent_szind_set(extent, SC_NSIZES); extent_sn_set(extent, sn); extent_state_set(extent, extent_state_active); extent_zeroed_set(extent, true); extent_committed_set(extent, true); + extent_dumpable_set(extent, true); } static inline void @@ -335,6 +424,11 @@ extent_list_append(extent_list_t *list, extent_t *extent) { } static inline void +extent_list_prepend(extent_list_t *list, extent_t *extent) { + ql_head_insert(list, extent, ql_link); +} + +static inline void extent_list_replace(extent_list_t *list, extent_t *to_remove, extent_t *to_insert) { ql_after_insert(to_remove, to_insert, ql_link); diff --git a/deps/jemalloc/include/jemalloc/internal/extent_structs.h b/deps/jemalloc/include/jemalloc/internal/extent_structs.h index d297950345..767cd8930f 100644 --- a/deps/jemalloc/include/jemalloc/internal/extent_structs.h +++ b/deps/jemalloc/include/jemalloc/internal/extent_structs.h @@ -2,12 +2,12 @@ #define JEMALLOC_INTERNAL_EXTENT_STRUCTS_H #include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/bit_util.h" #include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/ql.h" -#include "jemalloc/internal/rb.h" #include "jemalloc/internal/ph.h" -#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/sc.h" typedef enum { extent_state_active = 0, @@ -24,13 +24,15 @@ struct extent_s { * a: arena_ind * b: slab * c: committed + * d: dumpable * z: zeroed * t: state * i: szind * f: nfree + * s: bin_shard * n: sn * - * nnnnnnnn ... nnnnnfff fffffffi iiiiiiit tzcbaaaa aaaaaaaa + * nnnnnnnn ... nnnnnnss ssssffff ffffffii iiiiiitt zdcbaaaa aaaaaaaa * * arena_ind: Arena from which this extent came, or all 1 bits if * unassociated. @@ -45,6 +47,23 @@ struct extent_s { * as on a system that overcommits and satisfies physical * memory needs on demand via soft page faults. * + * dumpable: The dumpable flag indicates whether or not we've set the + * memory in question to be dumpable. Note that this + * interacts somewhat subtly with user-specified extent hooks, + * since we don't know if *they* are fiddling with + * dumpability (in which case, we don't want to undo whatever + * they're doing). To deal with this scenario, we: + * - Make dumpable false only for memory allocated with the + * default hooks. + * - Only allow memory to go from non-dumpable to dumpable, + * and only once. + * - Never make the OS call to allow dumping when the + * dumpable bit is already set. + * These three constraints mean that we will never + * accidentally dump user memory that the user meant to set + * nondumpable with their extent hooks. + * + * * zeroed: The zeroed flag is used by extent recycling code to track * whether memory is zero-filled. * @@ -58,6 +77,8 @@ struct extent_s { * * nfree: Number of free regions in slab. * + * bin_shard: the shard of the bin from which this extent came. + * * sn: Serial number (potentially non-unique). * * Serial numbers may wrap around if !opt_retain, but as long as @@ -69,38 +90,50 @@ struct extent_s { * serial number to both resulting adjacent extents. */ uint64_t e_bits; -#define EXTENT_BITS_ARENA_SHIFT 0 -#define EXTENT_BITS_ARENA_MASK \ - (((uint64_t)(1U << MALLOCX_ARENA_BITS) - 1) << EXTENT_BITS_ARENA_SHIFT) +#define MASK(CURRENT_FIELD_WIDTH, CURRENT_FIELD_SHIFT) ((((((uint64_t)0x1U) << (CURRENT_FIELD_WIDTH)) - 1)) << (CURRENT_FIELD_SHIFT)) + +#define EXTENT_BITS_ARENA_WIDTH MALLOCX_ARENA_BITS +#define EXTENT_BITS_ARENA_SHIFT 0 +#define EXTENT_BITS_ARENA_MASK MASK(EXTENT_BITS_ARENA_WIDTH, EXTENT_BITS_ARENA_SHIFT) + +#define EXTENT_BITS_SLAB_WIDTH 1 +#define EXTENT_BITS_SLAB_SHIFT (EXTENT_BITS_ARENA_WIDTH + EXTENT_BITS_ARENA_SHIFT) +#define EXTENT_BITS_SLAB_MASK MASK(EXTENT_BITS_SLAB_WIDTH, EXTENT_BITS_SLAB_SHIFT) + +#define EXTENT_BITS_COMMITTED_WIDTH 1 +#define EXTENT_BITS_COMMITTED_SHIFT (EXTENT_BITS_SLAB_WIDTH + EXTENT_BITS_SLAB_SHIFT) +#define EXTENT_BITS_COMMITTED_MASK MASK(EXTENT_BITS_COMMITTED_WIDTH, EXTENT_BITS_COMMITTED_SHIFT) -#define EXTENT_BITS_SLAB_SHIFT MALLOCX_ARENA_BITS -#define EXTENT_BITS_SLAB_MASK \ - ((uint64_t)0x1U << EXTENT_BITS_SLAB_SHIFT) +#define EXTENT_BITS_DUMPABLE_WIDTH 1 +#define EXTENT_BITS_DUMPABLE_SHIFT (EXTENT_BITS_COMMITTED_WIDTH + EXTENT_BITS_COMMITTED_SHIFT) +#define EXTENT_BITS_DUMPABLE_MASK MASK(EXTENT_BITS_DUMPABLE_WIDTH, EXTENT_BITS_DUMPABLE_SHIFT) -#define EXTENT_BITS_COMMITTED_SHIFT (MALLOCX_ARENA_BITS + 1) -#define EXTENT_BITS_COMMITTED_MASK \ - ((uint64_t)0x1U << EXTENT_BITS_COMMITTED_SHIFT) +#define EXTENT_BITS_ZEROED_WIDTH 1 +#define EXTENT_BITS_ZEROED_SHIFT (EXTENT_BITS_DUMPABLE_WIDTH + EXTENT_BITS_DUMPABLE_SHIFT) +#define EXTENT_BITS_ZEROED_MASK MASK(EXTENT_BITS_ZEROED_WIDTH, EXTENT_BITS_ZEROED_SHIFT) -#define EXTENT_BITS_ZEROED_SHIFT (MALLOCX_ARENA_BITS + 2) -#define EXTENT_BITS_ZEROED_MASK \ - ((uint64_t)0x1U << EXTENT_BITS_ZEROED_SHIFT) +#define EXTENT_BITS_STATE_WIDTH 2 +#define EXTENT_BITS_STATE_SHIFT (EXTENT_BITS_ZEROED_WIDTH + EXTENT_BITS_ZEROED_SHIFT) +#define EXTENT_BITS_STATE_MASK MASK(EXTENT_BITS_STATE_WIDTH, EXTENT_BITS_STATE_SHIFT) -#define EXTENT_BITS_STATE_SHIFT (MALLOCX_ARENA_BITS + 3) -#define EXTENT_BITS_STATE_MASK \ - ((uint64_t)0x3U << EXTENT_BITS_STATE_SHIFT) +#define EXTENT_BITS_SZIND_WIDTH LG_CEIL(SC_NSIZES) +#define EXTENT_BITS_SZIND_SHIFT (EXTENT_BITS_STATE_WIDTH + EXTENT_BITS_STATE_SHIFT) +#define EXTENT_BITS_SZIND_MASK MASK(EXTENT_BITS_SZIND_WIDTH, EXTENT_BITS_SZIND_SHIFT) -#define EXTENT_BITS_SZIND_SHIFT (MALLOCX_ARENA_BITS + 5) -#define EXTENT_BITS_SZIND_MASK \ - (((uint64_t)(1U << LG_CEIL_NSIZES) - 1) << EXTENT_BITS_SZIND_SHIFT) +#define EXTENT_BITS_NFREE_WIDTH (LG_SLAB_MAXREGS + 1) +#define EXTENT_BITS_NFREE_SHIFT (EXTENT_BITS_SZIND_WIDTH + EXTENT_BITS_SZIND_SHIFT) +#define EXTENT_BITS_NFREE_MASK MASK(EXTENT_BITS_NFREE_WIDTH, EXTENT_BITS_NFREE_SHIFT) -#define EXTENT_BITS_NFREE_SHIFT \ - (MALLOCX_ARENA_BITS + 5 + LG_CEIL_NSIZES) -#define EXTENT_BITS_NFREE_MASK \ - ((uint64_t)((1U << (LG_SLAB_MAXREGS + 1)) - 1) << EXTENT_BITS_NFREE_SHIFT) +#define EXTENT_BITS_BINSHARD_WIDTH 6 +#define EXTENT_BITS_BINSHARD_SHIFT (EXTENT_BITS_NFREE_WIDTH + EXTENT_BITS_NFREE_SHIFT) +#define EXTENT_BITS_BINSHARD_MASK MASK(EXTENT_BITS_BINSHARD_WIDTH, EXTENT_BITS_BINSHARD_SHIFT) -#define EXTENT_BITS_SN_SHIFT \ - (MALLOCX_ARENA_BITS + 5 + LG_CEIL_NSIZES + (LG_SLAB_MAXREGS + 1)) -#define EXTENT_BITS_SN_MASK (UINT64_MAX << EXTENT_BITS_SN_SHIFT) +#define EXTENT_BITS_IS_HEAD_WIDTH 1 +#define EXTENT_BITS_IS_HEAD_SHIFT (EXTENT_BITS_BINSHARD_WIDTH + EXTENT_BITS_BINSHARD_SHIFT) +#define EXTENT_BITS_IS_HEAD_MASK MASK(EXTENT_BITS_IS_HEAD_WIDTH, EXTENT_BITS_IS_HEAD_SHIFT) + +#define EXTENT_BITS_SN_SHIFT (EXTENT_BITS_IS_HEAD_WIDTH + EXTENT_BITS_IS_HEAD_SHIFT) +#define EXTENT_BITS_SN_MASK (UINT64_MAX << EXTENT_BITS_SN_SHIFT) /* Pointer to the extent that this structure is responsible for. */ void *e_addr; @@ -120,35 +153,36 @@ struct extent_s { size_t e_bsize; }; - union { - /* - * List linkage, used by a variety of lists: - * - arena_bin_t's slabs_full - * - extents_t's LRU - * - stashed dirty extents - * - arena's large allocations - */ - ql_elm(extent_t) ql_link; - /* Red-black tree linkage, used by arena's extent_avail. */ - rb_node(extent_t) rb_link; - }; + /* + * List linkage, used by a variety of lists: + * - bin_t's slabs_full + * - extents_t's LRU + * - stashed dirty extents + * - arena's large allocations + */ + ql_elm(extent_t) ql_link; - /* Linkage for per size class sn/address-ordered heaps. */ + /* + * Linkage for per size class sn/address-ordered heaps, and + * for extent_avail + */ phn(extent_t) ph_link; union { /* Small region slab metadata. */ arena_slab_data_t e_slab_data; - /* - * Profile counters, used for large objects. Points to a - * prof_tctx_t. - */ - atomic_p_t e_prof_tctx; + /* Profiling data, used for large objects. */ + struct { + /* Time when this was allocated. */ + nstime_t e_alloc_time; + /* Points to a prof_tctx_t. */ + atomic_p_t e_prof_tctx; + }; }; }; typedef ql_head(extent_t) extent_list_t; -typedef rb_tree(extent_t) extent_tree_t; +typedef ph(extent_t) extent_tree_t; typedef ph(extent_t) extent_heap_t; /* Quantized collection of extents, with built-in LRU queue. */ @@ -160,14 +194,16 @@ struct extents_s { * * Synchronization: mtx. */ - extent_heap_t heaps[NPSIZES+1]; + extent_heap_t heaps[SC_NPSIZES + 1]; + atomic_zu_t nextents[SC_NPSIZES + 1]; + atomic_zu_t nbytes[SC_NPSIZES + 1]; /* * Bitmap for which set bits correspond to non-empty heaps. * * Synchronization: mtx. */ - bitmap_t bitmap[BITMAP_GROUPS(NPSIZES+1)]; + bitmap_t bitmap[BITMAP_GROUPS(SC_NPSIZES + 1)]; /* * LRU of all extents in heaps. @@ -196,4 +232,25 @@ struct extents_s { bool delay_coalesce; }; +/* + * The following two structs are for experimental purposes. See + * experimental_utilization_query_ctl and + * experimental_utilization_batch_query_ctl in src/ctl.c. + */ + +struct extent_util_stats_s { + size_t nfree; + size_t nregs; + size_t size; +}; + +struct extent_util_stats_verbose_s { + void *slabcur_addr; + size_t nfree; + size_t nregs; + size_t size; + size_t bin_nfree; + size_t bin_nregs; +}; + #endif /* JEMALLOC_INTERNAL_EXTENT_STRUCTS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/extent_types.h b/deps/jemalloc/include/jemalloc/internal/extent_types.h index b6905ce105..96925cf958 100644 --- a/deps/jemalloc/include/jemalloc/internal/extent_types.h +++ b/deps/jemalloc/include/jemalloc/internal/extent_types.h @@ -4,6 +4,20 @@ typedef struct extent_s extent_t; typedef struct extents_s extents_t; +typedef struct extent_util_stats_s extent_util_stats_t; +typedef struct extent_util_stats_verbose_s extent_util_stats_verbose_t; + #define EXTENT_HOOKS_INITIALIZER NULL +/* + * When reuse (and split) an active extent, (1U << opt_lg_extent_max_active_fit) + * is the max ratio between the size of the active extent and the new extent. + */ +#define LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT 6 + +typedef enum { + EXTENT_NOT_HEAD, + EXTENT_IS_HEAD /* Only relevant for Windows && opt.retain. */ +} extent_head_state_t; + #endif /* JEMALLOC_INTERNAL_EXTENT_TYPES_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/hash.h b/deps/jemalloc/include/jemalloc/internal/hash.h index 188296cf0e..0270034e87 100644 --- a/deps/jemalloc/include/jemalloc/internal/hash.h +++ b/deps/jemalloc/include/jemalloc/internal/hash.h @@ -104,8 +104,8 @@ hash_x86_32(const void *key, int len, uint32_t seed) { uint32_t k1 = 0; switch (len & 3) { - case 3: k1 ^= tail[2] << 16; - case 2: k1 ^= tail[1] << 8; + case 3: k1 ^= tail[2] << 16; JEMALLOC_FALLTHROUGH + case 2: k1 ^= tail[1] << 8; JEMALLOC_FALLTHROUGH case 1: k1 ^= tail[0]; k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1; } @@ -119,7 +119,7 @@ hash_x86_32(const void *key, int len, uint32_t seed) { return h1; } -UNUSED static inline void +static inline void hash_x86_128(const void *key, const int len, uint32_t seed, uint64_t r_out[2]) { const uint8_t * data = (const uint8_t *) key; @@ -177,28 +177,29 @@ hash_x86_128(const void *key, const int len, uint32_t seed, uint32_t k4 = 0; switch (len & 15) { - case 15: k4 ^= tail[14] << 16; - case 14: k4 ^= tail[13] << 8; + case 15: k4 ^= tail[14] << 16; JEMALLOC_FALLTHROUGH + case 14: k4 ^= tail[13] << 8; JEMALLOC_FALLTHROUGH case 13: k4 ^= tail[12] << 0; k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4; - - case 12: k3 ^= tail[11] << 24; - case 11: k3 ^= tail[10] << 16; - case 10: k3 ^= tail[ 9] << 8; + JEMALLOC_FALLTHROUGH + case 12: k3 ^= tail[11] << 24; JEMALLOC_FALLTHROUGH + case 11: k3 ^= tail[10] << 16; JEMALLOC_FALLTHROUGH + case 10: k3 ^= tail[ 9] << 8; JEMALLOC_FALLTHROUGH case 9: k3 ^= tail[ 8] << 0; k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3; - - case 8: k2 ^= tail[ 7] << 24; - case 7: k2 ^= tail[ 6] << 16; - case 6: k2 ^= tail[ 5] << 8; + JEMALLOC_FALLTHROUGH + case 8: k2 ^= tail[ 7] << 24; JEMALLOC_FALLTHROUGH + case 7: k2 ^= tail[ 6] << 16; JEMALLOC_FALLTHROUGH + case 6: k2 ^= tail[ 5] << 8; JEMALLOC_FALLTHROUGH case 5: k2 ^= tail[ 4] << 0; k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2; - - case 4: k1 ^= tail[ 3] << 24; - case 3: k1 ^= tail[ 2] << 16; - case 2: k1 ^= tail[ 1] << 8; + JEMALLOC_FALLTHROUGH + case 4: k1 ^= tail[ 3] << 24; JEMALLOC_FALLTHROUGH + case 3: k1 ^= tail[ 2] << 16; JEMALLOC_FALLTHROUGH + case 2: k1 ^= tail[ 1] << 8; JEMALLOC_FALLTHROUGH case 1: k1 ^= tail[ 0] << 0; k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1; + JEMALLOC_FALLTHROUGH } } @@ -220,7 +221,7 @@ hash_x86_128(const void *key, const int len, uint32_t seed, r_out[1] = (((uint64_t) h4) << 32) | h3; } -UNUSED static inline void +static inline void hash_x64_128(const void *key, const int len, const uint32_t seed, uint64_t r_out[2]) { const uint8_t *data = (const uint8_t *) key; @@ -260,22 +261,22 @@ hash_x64_128(const void *key, const int len, const uint32_t seed, uint64_t k2 = 0; switch (len & 15) { - case 15: k2 ^= ((uint64_t)(tail[14])) << 48; - case 14: k2 ^= ((uint64_t)(tail[13])) << 40; - case 13: k2 ^= ((uint64_t)(tail[12])) << 32; - case 12: k2 ^= ((uint64_t)(tail[11])) << 24; - case 11: k2 ^= ((uint64_t)(tail[10])) << 16; - case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8; + case 15: k2 ^= ((uint64_t)(tail[14])) << 48; JEMALLOC_FALLTHROUGH + case 14: k2 ^= ((uint64_t)(tail[13])) << 40; JEMALLOC_FALLTHROUGH + case 13: k2 ^= ((uint64_t)(tail[12])) << 32; JEMALLOC_FALLTHROUGH + case 12: k2 ^= ((uint64_t)(tail[11])) << 24; JEMALLOC_FALLTHROUGH + case 11: k2 ^= ((uint64_t)(tail[10])) << 16; JEMALLOC_FALLTHROUGH + case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8; JEMALLOC_FALLTHROUGH case 9: k2 ^= ((uint64_t)(tail[ 8])) << 0; k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2; - - case 8: k1 ^= ((uint64_t)(tail[ 7])) << 56; - case 7: k1 ^= ((uint64_t)(tail[ 6])) << 48; - case 6: k1 ^= ((uint64_t)(tail[ 5])) << 40; - case 5: k1 ^= ((uint64_t)(tail[ 4])) << 32; - case 4: k1 ^= ((uint64_t)(tail[ 3])) << 24; - case 3: k1 ^= ((uint64_t)(tail[ 2])) << 16; - case 2: k1 ^= ((uint64_t)(tail[ 1])) << 8; + JEMALLOC_FALLTHROUGH + case 8: k1 ^= ((uint64_t)(tail[ 7])) << 56; JEMALLOC_FALLTHROUGH + case 7: k1 ^= ((uint64_t)(tail[ 6])) << 48; JEMALLOC_FALLTHROUGH + case 6: k1 ^= ((uint64_t)(tail[ 5])) << 40; JEMALLOC_FALLTHROUGH + case 5: k1 ^= ((uint64_t)(tail[ 4])) << 32; JEMALLOC_FALLTHROUGH + case 4: k1 ^= ((uint64_t)(tail[ 3])) << 24; JEMALLOC_FALLTHROUGH + case 3: k1 ^= ((uint64_t)(tail[ 2])) << 16; JEMALLOC_FALLTHROUGH + case 2: k1 ^= ((uint64_t)(tail[ 1])) << 8; JEMALLOC_FALLTHROUGH case 1: k1 ^= ((uint64_t)(tail[ 0])) << 0; k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1; } diff --git a/deps/jemalloc/include/jemalloc/internal/hook.h b/deps/jemalloc/include/jemalloc/internal/hook.h new file mode 100644 index 0000000000..ee246b1e0b --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/hook.h @@ -0,0 +1,163 @@ +#ifndef JEMALLOC_INTERNAL_HOOK_H +#define JEMALLOC_INTERNAL_HOOK_H + +#include "jemalloc/internal/tsd.h" + +/* + * This API is *extremely* experimental, and may get ripped out, changed in API- + * and ABI-incompatible ways, be insufficiently or incorrectly documented, etc. + * + * It allows hooking the stateful parts of the API to see changes as they + * happen. + * + * Allocation hooks are called after the allocation is done, free hooks are + * called before the free is done, and expand hooks are called after the + * allocation is expanded. + * + * For realloc and rallocx, if the expansion happens in place, the expansion + * hook is called. If it is moved, then the alloc hook is called on the new + * location, and then the free hook is called on the old location (i.e. both + * hooks are invoked in between the alloc and the dalloc). + * + * If we return NULL from OOM, then usize might not be trustworthy. Calling + * realloc(NULL, size) only calls the alloc hook, and calling realloc(ptr, 0) + * only calls the free hook. (Calling realloc(NULL, 0) is treated as malloc(0), + * and only calls the alloc hook). + * + * Reentrancy: + * Reentrancy is guarded against from within the hook implementation. If you + * call allocator functions from within a hook, the hooks will not be invoked + * again. + * Threading: + * The installation of a hook synchronizes with all its uses. If you can + * prove the installation of a hook happens-before a jemalloc entry point, + * then the hook will get invoked (unless there's a racing removal). + * + * Hook insertion appears to be atomic at a per-thread level (i.e. if a thread + * allocates and has the alloc hook invoked, then a subsequent free on the + * same thread will also have the free hook invoked). + * + * The *removal* of a hook does *not* block until all threads are done with + * the hook. Hook authors have to be resilient to this, and need some + * out-of-band mechanism for cleaning up any dynamically allocated memory + * associated with their hook. + * Ordering: + * Order of hook execution is unspecified, and may be different than insertion + * order. + */ + +#define HOOK_MAX 4 + +enum hook_alloc_e { + hook_alloc_malloc, + hook_alloc_posix_memalign, + hook_alloc_aligned_alloc, + hook_alloc_calloc, + hook_alloc_memalign, + hook_alloc_valloc, + hook_alloc_mallocx, + + /* The reallocating functions have both alloc and dalloc variants */ + hook_alloc_realloc, + hook_alloc_rallocx, +}; +/* + * We put the enum typedef after the enum, since this file may get included by + * jemalloc_cpp.cpp, and C++ disallows enum forward declarations. + */ +typedef enum hook_alloc_e hook_alloc_t; + +enum hook_dalloc_e { + hook_dalloc_free, + hook_dalloc_dallocx, + hook_dalloc_sdallocx, + + /* + * The dalloc halves of reallocation (not called if in-place expansion + * happens). + */ + hook_dalloc_realloc, + hook_dalloc_rallocx, +}; +typedef enum hook_dalloc_e hook_dalloc_t; + + +enum hook_expand_e { + hook_expand_realloc, + hook_expand_rallocx, + hook_expand_xallocx, +}; +typedef enum hook_expand_e hook_expand_t; + +typedef void (*hook_alloc)( + void *extra, hook_alloc_t type, void *result, uintptr_t result_raw, + uintptr_t args_raw[3]); + +typedef void (*hook_dalloc)( + void *extra, hook_dalloc_t type, void *address, uintptr_t args_raw[3]); + +typedef void (*hook_expand)( + void *extra, hook_expand_t type, void *address, size_t old_usize, + size_t new_usize, uintptr_t result_raw, uintptr_t args_raw[4]); + +typedef struct hooks_s hooks_t; +struct hooks_s { + hook_alloc alloc_hook; + hook_dalloc dalloc_hook; + hook_expand expand_hook; + void *extra; +}; + +/* + * Begin implementation details; everything above this point might one day live + * in a public API. Everything below this point never will. + */ + +/* + * The realloc pathways haven't gotten any refactoring love in a while, and it's + * fairly difficult to pass information from the entry point to the hooks. We + * put the informaiton the hooks will need into a struct to encapsulate + * everything. + * + * Much of these pathways are force-inlined, so that the compiler can avoid + * materializing this struct until we hit an extern arena function. For fairly + * goofy reasons, *many* of the realloc paths hit an extern arena function. + * These paths are cold enough that it doesn't matter; eventually, we should + * rewrite the realloc code to make the expand-in-place and the + * free-then-realloc paths more orthogonal, at which point we don't need to + * spread the hook logic all over the place. + */ +typedef struct hook_ralloc_args_s hook_ralloc_args_t; +struct hook_ralloc_args_s { + /* I.e. as opposed to rallocx. */ + bool is_realloc; + /* + * The expand hook takes 4 arguments, even if only 3 are actually used; + * we add an extra one in case the user decides to memcpy without + * looking too closely at the hooked function. + */ + uintptr_t args[4]; +}; + +/* + * Returns an opaque handle to be used when removing the hook. NULL means that + * we couldn't install the hook. + */ +bool hook_boot(); + +void *hook_install(tsdn_t *tsdn, hooks_t *hooks); +/* Uninstalls the hook with the handle previously returned from hook_install. */ +void hook_remove(tsdn_t *tsdn, void *opaque); + +/* Hooks */ + +void hook_invoke_alloc(hook_alloc_t type, void *result, uintptr_t result_raw, + uintptr_t args_raw[3]); + +void hook_invoke_dalloc(hook_dalloc_t type, void *address, + uintptr_t args_raw[3]); + +void hook_invoke_expand(hook_expand_t type, void *address, size_t old_usize, + size_t new_usize, uintptr_t result_raw, uintptr_t args_raw[4]); + +#endif /* JEMALLOC_INTERNAL_HOOK_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h index 8ae5ef48cd..7d6053e219 100644 --- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h @@ -5,7 +5,16 @@ #ifdef _WIN32 # include <windows.h> # include "msvc_compat/windows_extra.h" - +# ifdef _WIN64 +# if LG_VADDR <= 32 +# error Generate the headers using x64 vcargs +# endif +# else +# if LG_VADDR > 32 +# undef LG_VADDR +# define LG_VADDR 32 +# endif +# endif #else # include <sys/param.h> # include <sys/mman.h> @@ -22,6 +31,9 @@ # include <sys/uio.h> # endif # include <pthread.h> +# ifdef __FreeBSD__ +# include <pthread_np.h> +# endif # include <signal.h> # ifdef JEMALLOC_OS_UNFAIR_LOCK # include <os/lock.h> diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_externs.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_externs.h index e10fb275d4..d291170bee 100644 --- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_externs.h +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_externs.h @@ -2,7 +2,6 @@ #define JEMALLOC_INTERNAL_EXTERNS_H #include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/tsd_types.h" /* TSD checks this to set thread local slow state accordingly. */ @@ -11,6 +10,7 @@ extern bool malloc_slow; /* Run-time options. */ extern bool opt_abort; extern bool opt_abort_conf; +extern bool opt_confirm_conf; extern const char *opt_junk; extern bool opt_junk_alloc; extern bool opt_junk_free; @@ -25,6 +25,9 @@ extern unsigned ncpus; /* Number of arenas used for automatic multiplexing of threads and arenas. */ extern unsigned narenas_auto; +/* Base index for manual arenas. */ +extern unsigned manual_arena_base; + /* * Arenas that are used to service external requests. Not all elements of the * arenas array are necessarily used; arenas are created lazily as needed. @@ -49,5 +52,6 @@ void jemalloc_prefork(void); void jemalloc_postfork_parent(void); void jemalloc_postfork_child(void); bool malloc_initialized(void); +void je_sdallocx_noflags(void *ptr, size_t size); #endif /* JEMALLOC_INTERNAL_EXTERNS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_a.h index 24ea416297..ddde9b4e63 100644 --- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_a.h +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_a.h @@ -4,13 +4,15 @@ #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/bit_util.h" #include "jemalloc/internal/jemalloc_internal_types.h" -#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/sc.h" #include "jemalloc/internal/ticker.h" JEMALLOC_ALWAYS_INLINE malloc_cpuid_t malloc_getcpu(void) { assert(have_percpu_arena); -#if defined(JEMALLOC_HAVE_SCHED_GETCPU) +#if defined(_WIN32) + return GetCurrentProcessorNumber(); +#elif defined(JEMALLOC_HAVE_SCHED_GETCPU) return (malloc_cpuid_t)sched_getcpu(); #else not_reached(); @@ -106,16 +108,16 @@ decay_ticker_get(tsd_t *tsd, unsigned ind) { return &tdata->decay_ticker; } -JEMALLOC_ALWAYS_INLINE tcache_bin_t * +JEMALLOC_ALWAYS_INLINE cache_bin_t * tcache_small_bin_get(tcache_t *tcache, szind_t binind) { - assert(binind < NBINS); - return &tcache->tbins_small[binind]; + assert(binind < SC_NBINS); + return &tcache->bins_small[binind]; } -JEMALLOC_ALWAYS_INLINE tcache_bin_t * +JEMALLOC_ALWAYS_INLINE cache_bin_t * tcache_large_bin_get(tcache_t *tcache, szind_t binind) { - assert(binind >= NBINS &&binind < nhbins); - return &tcache->tbins_large[binind - NBINS]; + assert(binind >= SC_NBINS &&binind < nhbins); + return &tcache->bins_large[binind - SC_NBINS]; } JEMALLOC_ALWAYS_INLINE bool @@ -151,11 +153,12 @@ pre_reentrancy(tsd_t *tsd, arena_t *arena) { assert(arena != arena_get(tsd_tsdn(tsd), 0, false)); bool fast = tsd_fast(tsd); + assert(tsd_reentrancy_level_get(tsd) < INT8_MAX); ++*tsd_reentrancy_levelp_get(tsd); if (fast) { /* Prepare slow path for reentrancy. */ tsd_slow_update(tsd); - assert(tsd->state == tsd_state_nominal_slow); + assert(tsd_state_get(tsd) == tsd_state_nominal_slow); } } diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_b.h index 2e76e5d8f7..70d6e57885 100644 --- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_b.h +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_b.h @@ -71,7 +71,8 @@ arena_ichoose(tsd_t *tsd, arena_t *arena) { static inline bool arena_is_auto(arena_t *arena) { assert(narenas_auto > 0); - return (arena_ind_get(arena) < narenas_auto); + + return (arena_ind_get(arena) < manual_arena_base); } JEMALLOC_ALWAYS_INLINE extent_t * diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h index 7ffce6fb03..cdb10eb21f 100644 --- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h @@ -1,10 +1,29 @@ #ifndef JEMALLOC_INTERNAL_INLINES_C_H #define JEMALLOC_INTERNAL_INLINES_C_H +#include "jemalloc/internal/hook.h" #include "jemalloc/internal/jemalloc_internal_types.h" #include "jemalloc/internal/sz.h" #include "jemalloc/internal/witness.h" +/* + * Translating the names of the 'i' functions: + * Abbreviations used in the first part of the function name (before + * alloc/dalloc) describe what that function accomplishes: + * a: arena (query) + * s: size (query, or sized deallocation) + * e: extent (query) + * p: aligned (allocates) + * vs: size (query, without knowing that the pointer is into the heap) + * r: rallocx implementation + * x: xallocx implementation + * Abbreviations used in the second part of the function name (after + * alloc/dalloc) describe the arguments it takes + * z: whether to return zeroed memory + * t: accepts a tcache_t * parameter + * m: accepts an arena_t * parameter + */ + JEMALLOC_ALWAYS_INLINE arena_t * iaalloc(tsdn_t *tsdn, const void *ptr) { assert(ptr != NULL); @@ -24,11 +43,12 @@ iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache, bool is_internal, arena_t *arena, bool slow_path) { void *ret; - assert(size != 0); assert(!is_internal || tcache == NULL); assert(!is_internal || arena == NULL || arena_is_auto(arena)); - witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), - WITNESS_RANK_CORE, 0); + if (!tsdn_null(tsdn) && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) == 0) { + witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), + WITNESS_RANK_CORE, 0); + } ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path); if (config_stats && is_internal && likely(ret != NULL)) { @@ -91,7 +111,8 @@ idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, alloc_ctx_t *alloc_ctx, if (config_stats && is_internal) { arena_internal_sub(iaalloc(tsdn, ptr), isalloc(tsdn, ptr)); } - if (!is_internal && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) != 0) { + if (!is_internal && !tsdn_null(tsdn) && + tsd_reentrancy_level_get(tsdn_tsd(tsdn)) != 0) { assert(tcache == NULL); } arena_dalloc(tsdn, ptr, tcache, alloc_ctx, slow_path); @@ -112,31 +133,20 @@ isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, JEMALLOC_ALWAYS_INLINE void * iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, - size_t extra, size_t alignment, bool zero, tcache_t *tcache, - arena_t *arena) { + size_t alignment, bool zero, tcache_t *tcache, arena_t *arena, + hook_ralloc_args_t *hook_args) { witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); void *p; size_t usize, copysize; - usize = sz_sa2u(size + extra, alignment); - if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) { + usize = sz_sa2u(size, alignment); + if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) { return NULL; } p = ipalloct(tsdn, usize, alignment, zero, tcache, arena); if (p == NULL) { - if (extra == 0) { - return NULL; - } - /* Try again, without extra this time. */ - usize = sz_sa2u(size, alignment); - if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) { - return NULL; - } - p = ipalloct(tsdn, usize, alignment, zero, tcache, arena); - if (p == NULL) { - return NULL; - } + return NULL; } /* * Copy at most size bytes (not size+extra), since the caller has no @@ -144,13 +154,26 @@ iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, */ copysize = (size < oldsize) ? size : oldsize; memcpy(p, ptr, copysize); + hook_invoke_alloc(hook_args->is_realloc + ? hook_alloc_realloc : hook_alloc_rallocx, p, (uintptr_t)p, + hook_args->args); + hook_invoke_dalloc(hook_args->is_realloc + ? hook_dalloc_realloc : hook_dalloc_rallocx, ptr, hook_args->args); isdalloct(tsdn, ptr, oldsize, tcache, NULL, true); return p; } +/* + * is_realloc threads through the knowledge of whether or not this call comes + * from je_realloc (as opposed to je_rallocx); this ensures that we pass the + * correct entry point into any hooks. + * Note that these functions are all force-inlined, so no actual bool gets + * passed-around anywhere. + */ JEMALLOC_ALWAYS_INLINE void * iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment, - bool zero, tcache_t *tcache, arena_t *arena) { + bool zero, tcache_t *tcache, arena_t *arena, hook_ralloc_args_t *hook_args) +{ assert(ptr != NULL); assert(size != 0); witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), @@ -162,24 +185,24 @@ iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment, * Existing object alignment is inadequate; allocate new space * and copy. */ - return iralloct_realign(tsdn, ptr, oldsize, size, 0, alignment, - zero, tcache, arena); + return iralloct_realign(tsdn, ptr, oldsize, size, alignment, + zero, tcache, arena, hook_args); } return arena_ralloc(tsdn, arena, ptr, oldsize, size, alignment, zero, - tcache); + tcache, hook_args); } JEMALLOC_ALWAYS_INLINE void * iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, - bool zero) { + bool zero, hook_ralloc_args_t *hook_args) { return iralloct(tsd_tsdn(tsd), ptr, oldsize, size, alignment, zero, - tcache_get(tsd), NULL); + tcache_get(tsd), NULL, hook_args); } JEMALLOC_ALWAYS_INLINE bool ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero) { + size_t alignment, bool zero, size_t *newsize) { assert(ptr != NULL); assert(size != 0); witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), @@ -188,10 +211,12 @@ ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra, if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) != 0) { /* Existing object alignment is inadequate. */ + *newsize = oldsize; return true; } - return arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero); + return arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero, + newsize); } #endif /* JEMALLOC_INTERNAL_INLINES_C_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h index 4571895ec3..d8ea06f6d0 100644 --- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h @@ -30,11 +30,85 @@ # define restrict #endif -/* Various function pointers are statick and immutable except during testing. */ +/* Various function pointers are static and immutable except during testing. */ #ifdef JEMALLOC_JET # define JET_MUTABLE #else # define JET_MUTABLE const #endif +#define JEMALLOC_VA_ARGS_HEAD(head, ...) head +#define JEMALLOC_VA_ARGS_TAIL(head, ...) __VA_ARGS__ + +#if (defined(__GNUC__) || defined(__GNUG__)) && !defined(__clang__) \ + && defined(JEMALLOC_HAVE_ATTR) && (__GNUC__ >= 7) +#define JEMALLOC_FALLTHROUGH JEMALLOC_ATTR(fallthrough); +#else +#define JEMALLOC_FALLTHROUGH /* falls through */ +#endif + +/* Diagnostic suppression macros */ +#if defined(_MSC_VER) && !defined(__clang__) +# define JEMALLOC_DIAGNOSTIC_PUSH __pragma(warning(push)) +# define JEMALLOC_DIAGNOSTIC_POP __pragma(warning(pop)) +# define JEMALLOC_DIAGNOSTIC_IGNORE(W) __pragma(warning(disable:W)) +# define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS +# define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS +# define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN +# define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS +/* #pragma GCC diagnostic first appeared in gcc 4.6. */ +#elif (defined(__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && \ + (__GNUC_MINOR__ > 5)))) || defined(__clang__) +/* + * The JEMALLOC_PRAGMA__ macro is an implementation detail of the GCC and Clang + * diagnostic suppression macros and should not be used anywhere else. + */ +# define JEMALLOC_PRAGMA__(X) _Pragma(#X) +# define JEMALLOC_DIAGNOSTIC_PUSH JEMALLOC_PRAGMA__(GCC diagnostic push) +# define JEMALLOC_DIAGNOSTIC_POP JEMALLOC_PRAGMA__(GCC diagnostic pop) +# define JEMALLOC_DIAGNOSTIC_IGNORE(W) \ + JEMALLOC_PRAGMA__(GCC diagnostic ignored W) + +/* + * The -Wmissing-field-initializers warning is buggy in GCC versions < 5.1 and + * all clang versions up to version 7 (currently trunk, unreleased). This macro + * suppresses the warning for the affected compiler versions only. + */ +# if ((defined(__GNUC__) && !defined(__clang__)) && (__GNUC__ < 5)) || \ + defined(__clang__) +# define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS \ + JEMALLOC_DIAGNOSTIC_IGNORE("-Wmissing-field-initializers") +# else +# define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS +# endif + +# define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS \ + JEMALLOC_DIAGNOSTIC_IGNORE("-Wtype-limits") +# define JEMALLOC_DIAGNOSTIC_IGNORE_UNUSED_PARAMETER \ + JEMALLOC_DIAGNOSTIC_IGNORE("-Wunused-parameter") +# if defined(__GNUC__) && !defined(__clang__) && (__GNUC__ >= 7) +# define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN \ + JEMALLOC_DIAGNOSTIC_IGNORE("-Walloc-size-larger-than=") +# else +# define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN +# endif +# define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS \ + JEMALLOC_DIAGNOSTIC_PUSH \ + JEMALLOC_DIAGNOSTIC_IGNORE_UNUSED_PARAMETER +#else +# define JEMALLOC_DIAGNOSTIC_PUSH +# define JEMALLOC_DIAGNOSTIC_POP +# define JEMALLOC_DIAGNOSTIC_IGNORE(W) +# define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS +# define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS +# define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN +# define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS +#endif + +/* + * Disables spurious diagnostics for all headers. Since these headers are not + * included by users directly, it does not affect their diagnostic settings. + */ +JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS + #endif /* JEMALLOC_INTERNAL_MACROS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_types.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_types.h index 50f9d001d5..e296c5a7e8 100644 --- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_types.h +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_types.h @@ -1,6 +1,8 @@ #ifndef JEMALLOC_INTERNAL_TYPES_H #define JEMALLOC_INTERNAL_TYPES_H +#include "jemalloc/internal/quantum.h" + /* Page size index type. */ typedef unsigned pszind_t; @@ -50,72 +52,6 @@ typedef int malloc_cpuid_t; /* Smallest size class to support. */ #define TINY_MIN (1U << LG_TINY_MIN) -/* - * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size - * classes). - */ -#ifndef LG_QUANTUM -# if (defined(__i386__) || defined(_M_IX86)) -# define LG_QUANTUM 4 -# endif -# ifdef __ia64__ -# define LG_QUANTUM 4 -# endif -# ifdef __alpha__ -# define LG_QUANTUM 4 -# endif -# if (defined(__sparc64__) || defined(__sparcv9) || defined(__sparc_v9__)) -# define LG_QUANTUM 4 -# endif -# if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64)) -# define LG_QUANTUM 4 -# endif -# ifdef __arm__ -# define LG_QUANTUM 3 -# endif -# ifdef __aarch64__ -# define LG_QUANTUM 4 -# endif -# ifdef __hppa__ -# define LG_QUANTUM 4 -# endif -# ifdef __mips__ -# define LG_QUANTUM 3 -# endif -# ifdef __or1k__ -# define LG_QUANTUM 3 -# endif -# ifdef __powerpc__ -# define LG_QUANTUM 4 -# endif -# ifdef __riscv__ -# define LG_QUANTUM 4 -# endif -# ifdef __s390__ -# define LG_QUANTUM 4 -# endif -# ifdef __SH4__ -# define LG_QUANTUM 4 -# endif -# ifdef __tile__ -# define LG_QUANTUM 4 -# endif -# ifdef __le32__ -# define LG_QUANTUM 4 -# endif -# ifndef LG_QUANTUM -# error "Unknown minimum alignment for architecture; specify via " - "--with-lg-quantum" -# endif -#endif - -#define QUANTUM ((size_t)(1U << LG_QUANTUM)) -#define QUANTUM_MASK (QUANTUM - 1) - -/* Return the smallest quantum multiple that is >= a. */ -#define QUANTUM_CEILING(a) \ - (((a) + QUANTUM_MASK) & ~QUANTUM_MASK) - #define LONG ((size_t)(1U << LG_SIZEOF_LONG)) #define LONG_MASK (LONG - 1) diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_preamble.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_preamble.h index 39045c857f..c66ada8b87 100644 --- a/deps/jemalloc/include/jemalloc/internal/jemalloc_preamble.h +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_preamble.h @@ -21,7 +21,7 @@ # include "../jemalloc.h" #endif -#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN)) +#if defined(JEMALLOC_OSATOMIC) #include <libkern/OSAtomic.h> #endif @@ -45,7 +45,11 @@ # include "jemalloc/internal/private_namespace_jet.h" # endif #endif -#include "jemalloc/internal/hooks.h" +#include "jemalloc/internal/test_hooks.h" + +#ifdef JEMALLOC_DEFINE_MADVISE_FREE +# define JEMALLOC_MADV_FREE 8 +#endif static const bool config_debug = #ifdef JEMALLOC_DEBUG @@ -61,6 +65,13 @@ static const bool have_dss = false #endif ; +static const bool have_madvise_huge = +#ifdef JEMALLOC_HAVE_MADVISE_HUGE + true +#else + false +#endif + ; static const bool config_fill = #ifdef JEMALLOC_FILL true @@ -111,13 +122,6 @@ static const bool config_stats = false #endif ; -static const bool config_thp = -#ifdef JEMALLOC_THP - true -#else - false -#endif - ; static const bool config_tls = #ifdef JEMALLOC_TLS true @@ -146,7 +150,37 @@ static const bool config_cache_oblivious = false #endif ; -#ifdef JEMALLOC_HAVE_SCHED_GETCPU +/* + * Undocumented, for jemalloc development use only at the moment. See the note + * in jemalloc/internal/log.h. + */ +static const bool config_log = +#ifdef JEMALLOC_LOG + true +#else + false +#endif + ; +/* + * Are extra safety checks enabled; things like checking the size of sized + * deallocations, double-frees, etc. + */ +static const bool config_opt_safety_checks = +#ifdef JEMALLOC_OPT_SAFETY_CHECKS + true +#elif defined(JEMALLOC_DEBUG) + /* + * This lets us only guard safety checks by one flag instead of two; fast + * checks can guard solely by config_opt_safety_checks and run in debug mode + * too. + */ + true +#else + false +#endif + ; + +#if defined(_WIN32) || defined(JEMALLOC_HAVE_SCHED_GETCPU) /* Currently percpu_arena depends on sched_getcpu. */ #define JEMALLOC_PERCPU_ARENA #endif diff --git a/deps/jemalloc/include/jemalloc/internal/large_externs.h b/deps/jemalloc/include/jemalloc/internal/large_externs.h index 3f36282cd4..a05019e8a5 100644 --- a/deps/jemalloc/include/jemalloc/internal/large_externs.h +++ b/deps/jemalloc/include/jemalloc/internal/large_externs.h @@ -1,13 +1,16 @@ #ifndef JEMALLOC_INTERNAL_LARGE_EXTERNS_H #define JEMALLOC_INTERNAL_LARGE_EXTERNS_H +#include "jemalloc/internal/hook.h" + void *large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero); void *large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero); bool large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min, size_t usize_max, bool zero); -void *large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize, - size_t alignment, bool zero, tcache_t *tcache); +void *large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize, + size_t alignment, bool zero, tcache_t *tcache, + hook_ralloc_args_t *hook_args); typedef void (large_dalloc_junk_t)(void *, size_t); extern large_dalloc_junk_t *JET_MUTABLE large_dalloc_junk; @@ -23,4 +26,7 @@ prof_tctx_t *large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent); void large_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx); void large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent); +nstime_t large_prof_alloc_time_get(const extent_t *extent); +void large_prof_alloc_time_set(extent_t *extent, nstime_t time); + #endif /* JEMALLOC_INTERNAL_LARGE_EXTERNS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/log.h b/deps/jemalloc/include/jemalloc/internal/log.h new file mode 100644 index 0000000000..6420858635 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/log.h @@ -0,0 +1,115 @@ +#ifndef JEMALLOC_INTERNAL_LOG_H +#define JEMALLOC_INTERNAL_LOG_H + +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/malloc_io.h" +#include "jemalloc/internal/mutex.h" + +#ifdef JEMALLOC_LOG +# define JEMALLOC_LOG_VAR_BUFSIZE 1000 +#else +# define JEMALLOC_LOG_VAR_BUFSIZE 1 +#endif + +#define JEMALLOC_LOG_BUFSIZE 4096 + +/* + * The log malloc_conf option is a '|'-delimited list of log_var name segments + * which should be logged. The names are themselves hierarchical, with '.' as + * the delimiter (a "segment" is just a prefix in the log namespace). So, if + * you have: + * + * log("arena", "log msg for arena"); // 1 + * log("arena.a", "log msg for arena.a"); // 2 + * log("arena.b", "log msg for arena.b"); // 3 + * log("arena.a.a", "log msg for arena.a.a"); // 4 + * log("extent.a", "log msg for extent.a"); // 5 + * log("extent.b", "log msg for extent.b"); // 6 + * + * And your malloc_conf option is "log=arena.a|extent", then lines 2, 4, 5, and + * 6 will print at runtime. You can enable logging from all log vars by + * writing "log=.". + * + * None of this should be regarded as a stable API for right now. It's intended + * as a debugging interface, to let us keep around some of our printf-debugging + * statements. + */ + +extern char log_var_names[JEMALLOC_LOG_VAR_BUFSIZE]; +extern atomic_b_t log_init_done; + +typedef struct log_var_s log_var_t; +struct log_var_s { + /* + * Lowest bit is "inited", second lowest is "enabled". Putting them in + * a single word lets us avoid any fences on weak architectures. + */ + atomic_u_t state; + const char *name; +}; + +#define LOG_NOT_INITIALIZED 0U +#define LOG_INITIALIZED_NOT_ENABLED 1U +#define LOG_ENABLED 2U + +#define LOG_VAR_INIT(name_str) {ATOMIC_INIT(LOG_NOT_INITIALIZED), name_str} + +/* + * Returns the value we should assume for state (which is not necessarily + * accurate; if logging is done before logging has finished initializing, then + * we default to doing the safe thing by logging everything). + */ +unsigned log_var_update_state(log_var_t *log_var); + +/* We factor out the metadata management to allow us to test more easily. */ +#define log_do_begin(log_var) \ +if (config_log) { \ + unsigned log_state = atomic_load_u(&(log_var).state, \ + ATOMIC_RELAXED); \ + if (unlikely(log_state == LOG_NOT_INITIALIZED)) { \ + log_state = log_var_update_state(&(log_var)); \ + assert(log_state != LOG_NOT_INITIALIZED); \ + } \ + if (log_state == LOG_ENABLED) { \ + { + /* User code executes here. */ +#define log_do_end(log_var) \ + } \ + } \ +} + +/* + * MSVC has some preprocessor bugs in its expansion of __VA_ARGS__ during + * preprocessing. To work around this, we take all potential extra arguments in + * a var-args functions. Since a varargs macro needs at least one argument in + * the "...", we accept the format string there, and require that the first + * argument in this "..." is a const char *. + */ +static inline void +log_impl_varargs(const char *name, ...) { + char buf[JEMALLOC_LOG_BUFSIZE]; + va_list ap; + + va_start(ap, name); + const char *format = va_arg(ap, const char *); + size_t dst_offset = 0; + dst_offset += malloc_snprintf(buf, JEMALLOC_LOG_BUFSIZE, "%s: ", name); + dst_offset += malloc_vsnprintf(buf + dst_offset, + JEMALLOC_LOG_BUFSIZE - dst_offset, format, ap); + dst_offset += malloc_snprintf(buf + dst_offset, + JEMALLOC_LOG_BUFSIZE - dst_offset, "\n"); + va_end(ap); + + malloc_write(buf); +} + +/* Call as log("log.var.str", "format_string %d", arg_for_format_string); */ +#define LOG(log_var_str, ...) \ +do { \ + static log_var_t log_var = LOG_VAR_INIT(log_var_str); \ + log_do_begin(log_var) \ + log_impl_varargs((log_var).name, __VA_ARGS__); \ + log_do_end(log_var) \ +} while (0) + +#endif /* JEMALLOC_INTERNAL_LOG_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/malloc_io.h b/deps/jemalloc/include/jemalloc/internal/malloc_io.h index 47ae58ec35..1d1a414e0f 100644 --- a/deps/jemalloc/include/jemalloc/internal/malloc_io.h +++ b/deps/jemalloc/include/jemalloc/internal/malloc_io.h @@ -53,10 +53,50 @@ size_t malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap); size_t malloc_snprintf(char *str, size_t size, const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4); +/* + * The caller can set write_cb to null to choose to print with the + * je_malloc_message hook. + */ void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, const char *format, va_list ap); void malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque, const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4); void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2); +static inline ssize_t +malloc_write_fd(int fd, const void *buf, size_t count) { +#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_write) + /* + * Use syscall(2) rather than write(2) when possible in order to avoid + * the possibility of memory allocation within libc. This is necessary + * on FreeBSD; most operating systems do not have this problem though. + * + * syscall() returns long or int, depending on platform, so capture the + * result in the widest plausible type to avoid compiler warnings. + */ + long result = syscall(SYS_write, fd, buf, count); +#else + ssize_t result = (ssize_t)write(fd, buf, +#ifdef _WIN32 + (unsigned int) +#endif + count); +#endif + return (ssize_t)result; +} + +static inline ssize_t +malloc_read_fd(int fd, void *buf, size_t count) { +#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read) + long result = syscall(SYS_read, fd, buf, count); +#else + ssize_t result = read(fd, buf, +#ifdef _WIN32 + (unsigned int) +#endif + count); +#endif + return (ssize_t)result; +} + #endif /* JEMALLOC_INTERNAL_MALLOC_IO_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/mutex.h b/deps/jemalloc/include/jemalloc/internal/mutex.h index 6520c2512d..7c24f0725e 100644 --- a/deps/jemalloc/include/jemalloc/internal/mutex.h +++ b/deps/jemalloc/include/jemalloc/internal/mutex.h @@ -37,14 +37,17 @@ struct malloc_mutex_s { # endif #elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) os_unfair_lock lock; -#elif (defined(JEMALLOC_OSSPIN)) - OSSpinLock lock; #elif (defined(JEMALLOC_MUTEX_INIT_CB)) pthread_mutex_t lock; malloc_mutex_t *postponed_next; #else pthread_mutex_t lock; #endif + /* + * Hint flag to avoid exclusive cache line contention + * during spin waiting + */ + atomic_b_t locked; }; /* * We only touch witness when configured w/ debug. However we @@ -84,10 +87,6 @@ struct malloc_mutex_s { # define MALLOC_MUTEX_LOCK(m) os_unfair_lock_lock(&(m)->lock) # define MALLOC_MUTEX_UNLOCK(m) os_unfair_lock_unlock(&(m)->lock) # define MALLOC_MUTEX_TRYLOCK(m) (!os_unfair_lock_trylock(&(m)->lock)) -#elif (defined(JEMALLOC_OSSPIN)) -# define MALLOC_MUTEX_LOCK(m) OSSpinLockLock(&(m)->lock) -# define MALLOC_MUTEX_UNLOCK(m) OSSpinLockUnlock(&(m)->lock) -# define MALLOC_MUTEX_TRYLOCK(m) (!OSSpinLockTry(&(m)->lock)) #else # define MALLOC_MUTEX_LOCK(m) pthread_mutex_lock(&(m)->lock) # define MALLOC_MUTEX_UNLOCK(m) pthread_mutex_unlock(&(m)->lock) @@ -101,22 +100,37 @@ struct malloc_mutex_s { #ifdef _WIN32 # define MALLOC_MUTEX_INITIALIZER #elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) -# define MALLOC_MUTEX_INITIALIZER \ - {{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT}}, \ - WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)} -#elif (defined(JEMALLOC_OSSPIN)) -# define MALLOC_MUTEX_INITIALIZER \ - {{{LOCK_PROF_DATA_INITIALIZER, 0}}, \ +# if defined(JEMALLOC_DEBUG) +# define MALLOC_MUTEX_INITIALIZER \ + {{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT, ATOMIC_INIT(false)}}, \ + WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0} +# else +# define MALLOC_MUTEX_INITIALIZER \ + {{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT, ATOMIC_INIT(false)}}, \ WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)} +# endif #elif (defined(JEMALLOC_MUTEX_INIT_CB)) -# define MALLOC_MUTEX_INITIALIZER \ - {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL}}, \ - WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)} +# if (defined(JEMALLOC_DEBUG)) +# define MALLOC_MUTEX_INITIALIZER \ + {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL, ATOMIC_INIT(false)}}, \ + WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0} +# else +# define MALLOC_MUTEX_INITIALIZER \ + {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL, ATOMIC_INIT(false)}}, \ + WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)} +# endif + #else # define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT +# if defined(JEMALLOC_DEBUG) # define MALLOC_MUTEX_INITIALIZER \ - {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER}}, \ - WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)} + {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, ATOMIC_INIT(false)}}, \ + WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0} +# else +# define MALLOC_MUTEX_INITIALIZER \ + {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, ATOMIC_INIT(false)}}, \ + WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)} +# endif #endif #ifdef JEMALLOC_LAZY_LOCK @@ -139,6 +153,7 @@ void malloc_mutex_lock_slow(malloc_mutex_t *mutex); static inline void malloc_mutex_lock_final(malloc_mutex_t *mutex) { MALLOC_MUTEX_LOCK(mutex); + atomic_store_b(&mutex->locked, true, ATOMIC_RELAXED); } static inline bool @@ -164,6 +179,7 @@ malloc_mutex_trylock(tsdn_t *tsdn, malloc_mutex_t *mutex) { witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness); if (isthreaded) { if (malloc_mutex_trylock_final(mutex)) { + atomic_store_b(&mutex->locked, true, ATOMIC_RELAXED); return true; } mutex_owner_stats_update(tsdn, mutex); @@ -203,6 +219,7 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) { if (isthreaded) { if (malloc_mutex_trylock_final(mutex)) { malloc_mutex_lock_slow(mutex); + atomic_store_b(&mutex->locked, true, ATOMIC_RELAXED); } mutex_owner_stats_update(tsdn, mutex); } @@ -211,6 +228,7 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) { static inline void malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) { + atomic_store_b(&mutex->locked, false, ATOMIC_RELAXED); witness_unlock(tsdn_witness_tsdp_get(tsdn), &mutex->witness); if (isthreaded) { MALLOC_MUTEX_UNLOCK(mutex); @@ -245,4 +263,26 @@ malloc_mutex_prof_read(tsdn_t *tsdn, mutex_prof_data_t *data, atomic_store_u32(&data->n_waiting_thds, 0, ATOMIC_RELAXED); } +static inline void +malloc_mutex_prof_accum(tsdn_t *tsdn, mutex_prof_data_t *data, + malloc_mutex_t *mutex) { + mutex_prof_data_t *source = &mutex->prof_data; + /* Can only read holding the mutex. */ + malloc_mutex_assert_owner(tsdn, mutex); + + nstime_add(&data->tot_wait_time, &source->tot_wait_time); + if (nstime_compare(&source->max_wait_time, &data->max_wait_time) > 0) { + nstime_copy(&data->max_wait_time, &source->max_wait_time); + } + data->n_wait_times += source->n_wait_times; + data->n_spin_acquired += source->n_spin_acquired; + if (data->max_n_thds < source->max_n_thds) { + data->max_n_thds = source->max_n_thds; + } + /* n_wait_thds is not reported. */ + atomic_store_u32(&data->n_waiting_thds, 0, ATOMIC_RELAXED); + data->n_owner_switches += source->n_owner_switches; + data->n_lock_ops += source->n_lock_ops; +} + #endif /* JEMALLOC_INTERNAL_MUTEX_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/mutex_prof.h b/deps/jemalloc/include/jemalloc/internal/mutex_prof.h index 3358bcf535..2cb8fb0cbf 100644 --- a/deps/jemalloc/include/jemalloc/internal/mutex_prof.h +++ b/deps/jemalloc/include/jemalloc/internal/mutex_prof.h @@ -35,21 +35,43 @@ typedef enum { mutex_prof_num_arena_mutexes } mutex_prof_arena_ind_t; +/* + * The forth parameter is a boolean value that is true for derived rate counters + * and false for real ones. + */ +#define MUTEX_PROF_UINT64_COUNTERS \ + OP(num_ops, uint64_t, "n_lock_ops", false, num_ops) \ + OP(num_ops_ps, uint64_t, "(#/sec)", true, num_ops) \ + OP(num_wait, uint64_t, "n_waiting", false, num_wait) \ + OP(num_wait_ps, uint64_t, "(#/sec)", true, num_wait) \ + OP(num_spin_acq, uint64_t, "n_spin_acq", false, num_spin_acq) \ + OP(num_spin_acq_ps, uint64_t, "(#/sec)", true, num_spin_acq) \ + OP(num_owner_switch, uint64_t, "n_owner_switch", false, num_owner_switch) \ + OP(num_owner_switch_ps, uint64_t, "(#/sec)", true, num_owner_switch) \ + OP(total_wait_time, uint64_t, "total_wait_ns", false, total_wait_time) \ + OP(total_wait_time_ps, uint64_t, "(#/sec)", true, total_wait_time) \ + OP(max_wait_time, uint64_t, "max_wait_ns", false, max_wait_time) + +#define MUTEX_PROF_UINT32_COUNTERS \ + OP(max_num_thds, uint32_t, "max_n_thds", false, max_num_thds) + #define MUTEX_PROF_COUNTERS \ - OP(num_ops, uint64_t) \ - OP(num_wait, uint64_t) \ - OP(num_spin_acq, uint64_t) \ - OP(num_owner_switch, uint64_t) \ - OP(total_wait_time, uint64_t) \ - OP(max_wait_time, uint64_t) \ - OP(max_num_thds, uint32_t) + MUTEX_PROF_UINT64_COUNTERS \ + MUTEX_PROF_UINT32_COUNTERS -typedef enum { -#define OP(counter, type) mutex_counter_##counter, - MUTEX_PROF_COUNTERS +#define OP(counter, type, human, derived, base_counter) mutex_counter_##counter, + +#define COUNTER_ENUM(counter_list, t) \ + typedef enum { \ + counter_list \ + mutex_prof_num_##t##_counters \ + } mutex_prof_##t##_counter_ind_t; + +COUNTER_ENUM(MUTEX_PROF_UINT64_COUNTERS, uint64_t) +COUNTER_ENUM(MUTEX_PROF_UINT32_COUNTERS, uint32_t) + +#undef COUNTER_ENUM #undef OP - mutex_prof_num_counters -} mutex_prof_counter_ind_t; typedef struct { /* diff --git a/deps/jemalloc/include/jemalloc/internal/pages.h b/deps/jemalloc/include/jemalloc/internal/pages.h index 28383b7f97..7dae633afe 100644 --- a/deps/jemalloc/include/jemalloc/internal/pages.h +++ b/deps/jemalloc/include/jemalloc/internal/pages.h @@ -58,6 +58,20 @@ static const bool pages_can_purge_forced = #endif ; +typedef enum { + thp_mode_default = 0, /* Do not change hugepage settings. */ + thp_mode_always = 1, /* Always set MADV_HUGEPAGE. */ + thp_mode_never = 2, /* Always set MADV_NOHUGEPAGE. */ + + thp_mode_names_limit = 3, /* Used for option processing. */ + thp_mode_not_supported = 3 /* No THP support detected. */ +} thp_mode_t; + +#define THP_MODE_DEFAULT thp_mode_default +extern thp_mode_t opt_thp; +extern thp_mode_t init_system_thp_mode; /* Initial system wide state. */ +extern const char *thp_mode_names[]; + void *pages_map(void *addr, size_t size, size_t alignment, bool *commit); void pages_unmap(void *addr, size_t size); bool pages_commit(void *addr, size_t size); @@ -66,6 +80,9 @@ bool pages_purge_lazy(void *addr, size_t size); bool pages_purge_forced(void *addr, size_t size); bool pages_huge(void *addr, size_t size); bool pages_nohuge(void *addr, size_t size); +bool pages_dontdump(void *addr, size_t size); +bool pages_dodump(void *addr, size_t size); bool pages_boot(void); +void pages_set_thp_state (void *ptr, size_t size); #endif /* JEMALLOC_INTERNAL_PAGES_EXTERNS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/private_namespace.h b/deps/jemalloc/include/jemalloc/internal/private_namespace.h index 7ebeeba826..9aa21f8f01 100644 --- a/deps/jemalloc/include/jemalloc/internal/private_namespace.h +++ b/deps/jemalloc/include/jemalloc/internal/private_namespace.h @@ -1,639 +1,422 @@ -#define a0dalloc JEMALLOC_N(a0dalloc) -#define a0get JEMALLOC_N(a0get) -#define a0malloc JEMALLOC_N(a0malloc) -#define arena_aalloc JEMALLOC_N(arena_aalloc) -#define arena_alloc_junk_small JEMALLOC_N(arena_alloc_junk_small) -#define arena_basic_stats_merge JEMALLOC_N(arena_basic_stats_merge) -#define arena_bin_index JEMALLOC_N(arena_bin_index) -#define arena_bin_info JEMALLOC_N(arena_bin_info) -#define arena_bitselm_get_const JEMALLOC_N(arena_bitselm_get_const) -#define arena_bitselm_get_mutable JEMALLOC_N(arena_bitselm_get_mutable) -#define arena_boot JEMALLOC_N(arena_boot) -#define arena_choose JEMALLOC_N(arena_choose) -#define arena_choose_hard JEMALLOC_N(arena_choose_hard) -#define arena_choose_impl JEMALLOC_N(arena_choose_impl) -#define arena_chunk_alloc_huge JEMALLOC_N(arena_chunk_alloc_huge) -#define arena_chunk_cache_maybe_insert JEMALLOC_N(arena_chunk_cache_maybe_insert) -#define arena_chunk_cache_maybe_remove JEMALLOC_N(arena_chunk_cache_maybe_remove) -#define arena_chunk_dalloc_huge JEMALLOC_N(arena_chunk_dalloc_huge) -#define arena_chunk_ralloc_huge_expand JEMALLOC_N(arena_chunk_ralloc_huge_expand) -#define arena_chunk_ralloc_huge_shrink JEMALLOC_N(arena_chunk_ralloc_huge_shrink) -#define arena_chunk_ralloc_huge_similar JEMALLOC_N(arena_chunk_ralloc_huge_similar) -#define arena_cleanup JEMALLOC_N(arena_cleanup) -#define arena_dalloc JEMALLOC_N(arena_dalloc) -#define arena_dalloc_bin JEMALLOC_N(arena_dalloc_bin) -#define arena_dalloc_bin_junked_locked JEMALLOC_N(arena_dalloc_bin_junked_locked) -#define arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large) -#define arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small) -#define arena_dalloc_large JEMALLOC_N(arena_dalloc_large) -#define arena_dalloc_large_junked_locked JEMALLOC_N(arena_dalloc_large_junked_locked) -#define arena_dalloc_small JEMALLOC_N(arena_dalloc_small) -#define arena_decay_tick JEMALLOC_N(arena_decay_tick) -#define arena_decay_ticks JEMALLOC_N(arena_decay_ticks) -#define arena_decay_time_default_get JEMALLOC_N(arena_decay_time_default_get) -#define arena_decay_time_default_set JEMALLOC_N(arena_decay_time_default_set) -#define arena_decay_time_get JEMALLOC_N(arena_decay_time_get) -#define arena_decay_time_set JEMALLOC_N(arena_decay_time_set) -#define arena_dss_prec_get JEMALLOC_N(arena_dss_prec_get) -#define arena_dss_prec_set JEMALLOC_N(arena_dss_prec_set) -#define arena_extent_sn_next JEMALLOC_N(arena_extent_sn_next) -#define arena_get JEMALLOC_N(arena_get) -#define arena_ichoose JEMALLOC_N(arena_ichoose) -#define arena_init JEMALLOC_N(arena_init) -#define arena_lg_dirty_mult_default_get JEMALLOC_N(arena_lg_dirty_mult_default_get) -#define arena_lg_dirty_mult_default_set JEMALLOC_N(arena_lg_dirty_mult_default_set) -#define arena_lg_dirty_mult_get JEMALLOC_N(arena_lg_dirty_mult_get) -#define arena_lg_dirty_mult_set JEMALLOC_N(arena_lg_dirty_mult_set) -#define arena_malloc JEMALLOC_N(arena_malloc) -#define arena_malloc_hard JEMALLOC_N(arena_malloc_hard) -#define arena_malloc_large JEMALLOC_N(arena_malloc_large) -#define arena_mapbits_allocated_get JEMALLOC_N(arena_mapbits_allocated_get) -#define arena_mapbits_binind_get JEMALLOC_N(arena_mapbits_binind_get) -#define arena_mapbits_decommitted_get JEMALLOC_N(arena_mapbits_decommitted_get) -#define arena_mapbits_dirty_get JEMALLOC_N(arena_mapbits_dirty_get) -#define arena_mapbits_get JEMALLOC_N(arena_mapbits_get) -#define arena_mapbits_internal_set JEMALLOC_N(arena_mapbits_internal_set) -#define arena_mapbits_large_binind_set JEMALLOC_N(arena_mapbits_large_binind_set) -#define arena_mapbits_large_get JEMALLOC_N(arena_mapbits_large_get) -#define arena_mapbits_large_set JEMALLOC_N(arena_mapbits_large_set) -#define arena_mapbits_large_size_get JEMALLOC_N(arena_mapbits_large_size_get) -#define arena_mapbits_size_decode JEMALLOC_N(arena_mapbits_size_decode) -#define arena_mapbits_size_encode JEMALLOC_N(arena_mapbits_size_encode) -#define arena_mapbits_small_runind_get JEMALLOC_N(arena_mapbits_small_runind_get) -#define arena_mapbits_small_set JEMALLOC_N(arena_mapbits_small_set) -#define arena_mapbits_unallocated_set JEMALLOC_N(arena_mapbits_unallocated_set) -#define arena_mapbits_unallocated_size_get JEMALLOC_N(arena_mapbits_unallocated_size_get) -#define arena_mapbits_unallocated_size_set JEMALLOC_N(arena_mapbits_unallocated_size_set) -#define arena_mapbits_unzeroed_get JEMALLOC_N(arena_mapbits_unzeroed_get) -#define arena_mapbitsp_get_const JEMALLOC_N(arena_mapbitsp_get_const) -#define arena_mapbitsp_get_mutable JEMALLOC_N(arena_mapbitsp_get_mutable) -#define arena_mapbitsp_read JEMALLOC_N(arena_mapbitsp_read) -#define arena_mapbitsp_write JEMALLOC_N(arena_mapbitsp_write) -#define arena_maxrun JEMALLOC_N(arena_maxrun) -#define arena_maybe_purge JEMALLOC_N(arena_maybe_purge) -#define arena_metadata_allocated_add JEMALLOC_N(arena_metadata_allocated_add) -#define arena_metadata_allocated_get JEMALLOC_N(arena_metadata_allocated_get) -#define arena_metadata_allocated_sub JEMALLOC_N(arena_metadata_allocated_sub) -#define arena_migrate JEMALLOC_N(arena_migrate) -#define arena_miscelm_get_const JEMALLOC_N(arena_miscelm_get_const) -#define arena_miscelm_get_mutable JEMALLOC_N(arena_miscelm_get_mutable) -#define arena_miscelm_to_pageind JEMALLOC_N(arena_miscelm_to_pageind) -#define arena_miscelm_to_rpages JEMALLOC_N(arena_miscelm_to_rpages) -#define arena_new JEMALLOC_N(arena_new) -#define arena_node_alloc JEMALLOC_N(arena_node_alloc) -#define arena_node_dalloc JEMALLOC_N(arena_node_dalloc) -#define arena_nthreads_dec JEMALLOC_N(arena_nthreads_dec) -#define arena_nthreads_get JEMALLOC_N(arena_nthreads_get) -#define arena_nthreads_inc JEMALLOC_N(arena_nthreads_inc) -#define arena_palloc JEMALLOC_N(arena_palloc) -#define arena_postfork_child JEMALLOC_N(arena_postfork_child) -#define arena_postfork_parent JEMALLOC_N(arena_postfork_parent) -#define arena_prefork0 JEMALLOC_N(arena_prefork0) -#define arena_prefork1 JEMALLOC_N(arena_prefork1) -#define arena_prefork2 JEMALLOC_N(arena_prefork2) -#define arena_prefork3 JEMALLOC_N(arena_prefork3) -#define arena_prof_accum JEMALLOC_N(arena_prof_accum) -#define arena_prof_accum_impl JEMALLOC_N(arena_prof_accum_impl) -#define arena_prof_accum_locked JEMALLOC_N(arena_prof_accum_locked) -#define arena_prof_promoted JEMALLOC_N(arena_prof_promoted) -#define arena_prof_tctx_get JEMALLOC_N(arena_prof_tctx_get) -#define arena_prof_tctx_reset JEMALLOC_N(arena_prof_tctx_reset) -#define arena_prof_tctx_set JEMALLOC_N(arena_prof_tctx_set) -#define arena_ptr_small_binind_get JEMALLOC_N(arena_ptr_small_binind_get) -#define arena_purge JEMALLOC_N(arena_purge) -#define arena_quarantine_junk_small JEMALLOC_N(arena_quarantine_junk_small) -#define arena_ralloc JEMALLOC_N(arena_ralloc) -#define arena_ralloc_junk_large JEMALLOC_N(arena_ralloc_junk_large) -#define arena_ralloc_no_move JEMALLOC_N(arena_ralloc_no_move) -#define arena_rd_to_miscelm JEMALLOC_N(arena_rd_to_miscelm) -#define arena_redzone_corruption JEMALLOC_N(arena_redzone_corruption) -#define arena_reset JEMALLOC_N(arena_reset) -#define arena_run_regind JEMALLOC_N(arena_run_regind) -#define arena_run_to_miscelm JEMALLOC_N(arena_run_to_miscelm) -#define arena_salloc JEMALLOC_N(arena_salloc) -#define arena_sdalloc JEMALLOC_N(arena_sdalloc) -#define arena_stats_merge JEMALLOC_N(arena_stats_merge) -#define arena_tcache_fill_small JEMALLOC_N(arena_tcache_fill_small) -#define arena_tdata_get JEMALLOC_N(arena_tdata_get) -#define arena_tdata_get_hard JEMALLOC_N(arena_tdata_get_hard) -#define arenas JEMALLOC_N(arenas) -#define arenas_tdata_bypass_cleanup JEMALLOC_N(arenas_tdata_bypass_cleanup) -#define arenas_tdata_cleanup JEMALLOC_N(arenas_tdata_cleanup) -#define atomic_add_p JEMALLOC_N(atomic_add_p) -#define atomic_add_u JEMALLOC_N(atomic_add_u) -#define atomic_add_uint32 JEMALLOC_N(atomic_add_uint32) -#define atomic_add_uint64 JEMALLOC_N(atomic_add_uint64) -#define atomic_add_z JEMALLOC_N(atomic_add_z) -#define atomic_cas_p JEMALLOC_N(atomic_cas_p) -#define atomic_cas_u JEMALLOC_N(atomic_cas_u) -#define atomic_cas_uint32 JEMALLOC_N(atomic_cas_uint32) -#define atomic_cas_uint64 JEMALLOC_N(atomic_cas_uint64) -#define atomic_cas_z JEMALLOC_N(atomic_cas_z) -#define atomic_sub_p JEMALLOC_N(atomic_sub_p) -#define atomic_sub_u JEMALLOC_N(atomic_sub_u) -#define atomic_sub_uint32 JEMALLOC_N(atomic_sub_uint32) -#define atomic_sub_uint64 JEMALLOC_N(atomic_sub_uint64) -#define atomic_sub_z JEMALLOC_N(atomic_sub_z) -#define atomic_write_p JEMALLOC_N(atomic_write_p) -#define atomic_write_u JEMALLOC_N(atomic_write_u) -#define atomic_write_uint32 JEMALLOC_N(atomic_write_uint32) -#define atomic_write_uint64 JEMALLOC_N(atomic_write_uint64) -#define atomic_write_z JEMALLOC_N(atomic_write_z) -#define base_alloc JEMALLOC_N(base_alloc) -#define base_boot JEMALLOC_N(base_boot) -#define base_postfork_child JEMALLOC_N(base_postfork_child) -#define base_postfork_parent JEMALLOC_N(base_postfork_parent) -#define base_prefork JEMALLOC_N(base_prefork) -#define base_stats_get JEMALLOC_N(base_stats_get) -#define bitmap_full JEMALLOC_N(bitmap_full) -#define bitmap_get JEMALLOC_N(bitmap_get) -#define bitmap_info_init JEMALLOC_N(bitmap_info_init) -#define bitmap_init JEMALLOC_N(bitmap_init) -#define bitmap_set JEMALLOC_N(bitmap_set) -#define bitmap_sfu JEMALLOC_N(bitmap_sfu) -#define bitmap_size JEMALLOC_N(bitmap_size) -#define bitmap_unset JEMALLOC_N(bitmap_unset) -#define bootstrap_calloc JEMALLOC_N(bootstrap_calloc) -#define bootstrap_free JEMALLOC_N(bootstrap_free) -#define bootstrap_malloc JEMALLOC_N(bootstrap_malloc) -#define bt_init JEMALLOC_N(bt_init) -#define buferror JEMALLOC_N(buferror) -#define chunk_alloc_base JEMALLOC_N(chunk_alloc_base) -#define chunk_alloc_cache JEMALLOC_N(chunk_alloc_cache) -#define chunk_alloc_dss JEMALLOC_N(chunk_alloc_dss) -#define chunk_alloc_mmap JEMALLOC_N(chunk_alloc_mmap) -#define chunk_alloc_wrapper JEMALLOC_N(chunk_alloc_wrapper) -#define chunk_boot JEMALLOC_N(chunk_boot) -#define chunk_dalloc_cache JEMALLOC_N(chunk_dalloc_cache) -#define chunk_dalloc_mmap JEMALLOC_N(chunk_dalloc_mmap) -#define chunk_dalloc_wrapper JEMALLOC_N(chunk_dalloc_wrapper) -#define chunk_deregister JEMALLOC_N(chunk_deregister) -#define chunk_dss_boot JEMALLOC_N(chunk_dss_boot) -#define chunk_dss_mergeable JEMALLOC_N(chunk_dss_mergeable) -#define chunk_dss_prec_get JEMALLOC_N(chunk_dss_prec_get) -#define chunk_dss_prec_set JEMALLOC_N(chunk_dss_prec_set) -#define chunk_hooks_default JEMALLOC_N(chunk_hooks_default) -#define chunk_hooks_get JEMALLOC_N(chunk_hooks_get) -#define chunk_hooks_set JEMALLOC_N(chunk_hooks_set) -#define chunk_in_dss JEMALLOC_N(chunk_in_dss) -#define chunk_lookup JEMALLOC_N(chunk_lookup) -#define chunk_npages JEMALLOC_N(chunk_npages) -#define chunk_purge_wrapper JEMALLOC_N(chunk_purge_wrapper) -#define chunk_register JEMALLOC_N(chunk_register) -#define chunks_rtree JEMALLOC_N(chunks_rtree) -#define chunksize JEMALLOC_N(chunksize) -#define chunksize_mask JEMALLOC_N(chunksize_mask) -#define ckh_count JEMALLOC_N(ckh_count) -#define ckh_delete JEMALLOC_N(ckh_delete) -#define ckh_insert JEMALLOC_N(ckh_insert) -#define ckh_iter JEMALLOC_N(ckh_iter) -#define ckh_new JEMALLOC_N(ckh_new) -#define ckh_pointer_hash JEMALLOC_N(ckh_pointer_hash) -#define ckh_pointer_keycomp JEMALLOC_N(ckh_pointer_keycomp) -#define ckh_remove JEMALLOC_N(ckh_remove) -#define ckh_search JEMALLOC_N(ckh_search) -#define ckh_string_hash JEMALLOC_N(ckh_string_hash) -#define ckh_string_keycomp JEMALLOC_N(ckh_string_keycomp) -#define ctl_boot JEMALLOC_N(ctl_boot) -#define ctl_bymib JEMALLOC_N(ctl_bymib) -#define ctl_byname JEMALLOC_N(ctl_byname) -#define ctl_nametomib JEMALLOC_N(ctl_nametomib) -#define ctl_postfork_child JEMALLOC_N(ctl_postfork_child) -#define ctl_postfork_parent JEMALLOC_N(ctl_postfork_parent) -#define ctl_prefork JEMALLOC_N(ctl_prefork) -#define decay_ticker_get JEMALLOC_N(decay_ticker_get) -#define dss_prec_names JEMALLOC_N(dss_prec_names) -#define extent_node_achunk_get JEMALLOC_N(extent_node_achunk_get) -#define extent_node_achunk_set JEMALLOC_N(extent_node_achunk_set) -#define extent_node_addr_get JEMALLOC_N(extent_node_addr_get) -#define extent_node_addr_set JEMALLOC_N(extent_node_addr_set) -#define extent_node_arena_get JEMALLOC_N(extent_node_arena_get) -#define extent_node_arena_set JEMALLOC_N(extent_node_arena_set) -#define extent_node_committed_get JEMALLOC_N(extent_node_committed_get) -#define extent_node_committed_set JEMALLOC_N(extent_node_committed_set) -#define extent_node_dirty_insert JEMALLOC_N(extent_node_dirty_insert) -#define extent_node_dirty_linkage_init JEMALLOC_N(extent_node_dirty_linkage_init) -#define extent_node_dirty_remove JEMALLOC_N(extent_node_dirty_remove) -#define extent_node_init JEMALLOC_N(extent_node_init) -#define extent_node_prof_tctx_get JEMALLOC_N(extent_node_prof_tctx_get) -#define extent_node_prof_tctx_set JEMALLOC_N(extent_node_prof_tctx_set) -#define extent_node_size_get JEMALLOC_N(extent_node_size_get) -#define extent_node_size_set JEMALLOC_N(extent_node_size_set) -#define extent_node_sn_get JEMALLOC_N(extent_node_sn_get) -#define extent_node_sn_set JEMALLOC_N(extent_node_sn_set) -#define extent_node_zeroed_get JEMALLOC_N(extent_node_zeroed_get) -#define extent_node_zeroed_set JEMALLOC_N(extent_node_zeroed_set) -#define extent_size_quantize_ceil JEMALLOC_N(extent_size_quantize_ceil) -#define extent_size_quantize_floor JEMALLOC_N(extent_size_quantize_floor) -#define extent_tree_ad_destroy JEMALLOC_N(extent_tree_ad_destroy) -#define extent_tree_ad_destroy_recurse JEMALLOC_N(extent_tree_ad_destroy_recurse) -#define extent_tree_ad_empty JEMALLOC_N(extent_tree_ad_empty) -#define extent_tree_ad_first JEMALLOC_N(extent_tree_ad_first) -#define extent_tree_ad_insert JEMALLOC_N(extent_tree_ad_insert) -#define extent_tree_ad_iter JEMALLOC_N(extent_tree_ad_iter) -#define extent_tree_ad_iter_recurse JEMALLOC_N(extent_tree_ad_iter_recurse) -#define extent_tree_ad_iter_start JEMALLOC_N(extent_tree_ad_iter_start) -#define extent_tree_ad_last JEMALLOC_N(extent_tree_ad_last) -#define extent_tree_ad_new JEMALLOC_N(extent_tree_ad_new) -#define extent_tree_ad_next JEMALLOC_N(extent_tree_ad_next) -#define extent_tree_ad_nsearch JEMALLOC_N(extent_tree_ad_nsearch) -#define extent_tree_ad_prev JEMALLOC_N(extent_tree_ad_prev) -#define extent_tree_ad_psearch JEMALLOC_N(extent_tree_ad_psearch) -#define extent_tree_ad_remove JEMALLOC_N(extent_tree_ad_remove) -#define extent_tree_ad_reverse_iter JEMALLOC_N(extent_tree_ad_reverse_iter) -#define extent_tree_ad_reverse_iter_recurse JEMALLOC_N(extent_tree_ad_reverse_iter_recurse) -#define extent_tree_ad_reverse_iter_start JEMALLOC_N(extent_tree_ad_reverse_iter_start) -#define extent_tree_ad_search JEMALLOC_N(extent_tree_ad_search) -#define extent_tree_szsnad_destroy JEMALLOC_N(extent_tree_szsnad_destroy) -#define extent_tree_szsnad_destroy_recurse JEMALLOC_N(extent_tree_szsnad_destroy_recurse) -#define extent_tree_szsnad_empty JEMALLOC_N(extent_tree_szsnad_empty) -#define extent_tree_szsnad_first JEMALLOC_N(extent_tree_szsnad_first) -#define extent_tree_szsnad_insert JEMALLOC_N(extent_tree_szsnad_insert) -#define extent_tree_szsnad_iter JEMALLOC_N(extent_tree_szsnad_iter) -#define extent_tree_szsnad_iter_recurse JEMALLOC_N(extent_tree_szsnad_iter_recurse) -#define extent_tree_szsnad_iter_start JEMALLOC_N(extent_tree_szsnad_iter_start) -#define extent_tree_szsnad_last JEMALLOC_N(extent_tree_szsnad_last) -#define extent_tree_szsnad_new JEMALLOC_N(extent_tree_szsnad_new) -#define extent_tree_szsnad_next JEMALLOC_N(extent_tree_szsnad_next) -#define extent_tree_szsnad_nsearch JEMALLOC_N(extent_tree_szsnad_nsearch) -#define extent_tree_szsnad_prev JEMALLOC_N(extent_tree_szsnad_prev) -#define extent_tree_szsnad_psearch JEMALLOC_N(extent_tree_szsnad_psearch) -#define extent_tree_szsnad_remove JEMALLOC_N(extent_tree_szsnad_remove) -#define extent_tree_szsnad_reverse_iter JEMALLOC_N(extent_tree_szsnad_reverse_iter) -#define extent_tree_szsnad_reverse_iter_recurse JEMALLOC_N(extent_tree_szsnad_reverse_iter_recurse) -#define extent_tree_szsnad_reverse_iter_start JEMALLOC_N(extent_tree_szsnad_reverse_iter_start) -#define extent_tree_szsnad_search JEMALLOC_N(extent_tree_szsnad_search) -#define ffs_llu JEMALLOC_N(ffs_llu) -#define ffs_lu JEMALLOC_N(ffs_lu) -#define ffs_u JEMALLOC_N(ffs_u) -#define ffs_u32 JEMALLOC_N(ffs_u32) -#define ffs_u64 JEMALLOC_N(ffs_u64) -#define ffs_zu JEMALLOC_N(ffs_zu) -#define get_errno JEMALLOC_N(get_errno) -#define hash JEMALLOC_N(hash) -#define hash_fmix_32 JEMALLOC_N(hash_fmix_32) -#define hash_fmix_64 JEMALLOC_N(hash_fmix_64) -#define hash_get_block_32 JEMALLOC_N(hash_get_block_32) -#define hash_get_block_64 JEMALLOC_N(hash_get_block_64) -#define hash_rotl_32 JEMALLOC_N(hash_rotl_32) -#define hash_rotl_64 JEMALLOC_N(hash_rotl_64) -#define hash_x64_128 JEMALLOC_N(hash_x64_128) -#define hash_x86_128 JEMALLOC_N(hash_x86_128) -#define hash_x86_32 JEMALLOC_N(hash_x86_32) -#define huge_aalloc JEMALLOC_N(huge_aalloc) -#define huge_dalloc JEMALLOC_N(huge_dalloc) -#define huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk) -#define huge_malloc JEMALLOC_N(huge_malloc) -#define huge_palloc JEMALLOC_N(huge_palloc) -#define huge_prof_tctx_get JEMALLOC_N(huge_prof_tctx_get) -#define huge_prof_tctx_reset JEMALLOC_N(huge_prof_tctx_reset) -#define huge_prof_tctx_set JEMALLOC_N(huge_prof_tctx_set) -#define huge_ralloc JEMALLOC_N(huge_ralloc) -#define huge_ralloc_no_move JEMALLOC_N(huge_ralloc_no_move) -#define huge_salloc JEMALLOC_N(huge_salloc) -#define iaalloc JEMALLOC_N(iaalloc) -#define ialloc JEMALLOC_N(ialloc) -#define iallocztm JEMALLOC_N(iallocztm) -#define iarena_cleanup JEMALLOC_N(iarena_cleanup) -#define idalloc JEMALLOC_N(idalloc) -#define idalloctm JEMALLOC_N(idalloctm) -#define in_valgrind JEMALLOC_N(in_valgrind) -#define index2size JEMALLOC_N(index2size) -#define index2size_compute JEMALLOC_N(index2size_compute) -#define index2size_lookup JEMALLOC_N(index2size_lookup) -#define index2size_tab JEMALLOC_N(index2size_tab) -#define ipalloc JEMALLOC_N(ipalloc) -#define ipalloct JEMALLOC_N(ipalloct) -#define ipallocztm JEMALLOC_N(ipallocztm) -#define iqalloc JEMALLOC_N(iqalloc) -#define iralloc JEMALLOC_N(iralloc) -#define iralloct JEMALLOC_N(iralloct) -#define iralloct_realign JEMALLOC_N(iralloct_realign) -#define isalloc JEMALLOC_N(isalloc) -#define isdalloct JEMALLOC_N(isdalloct) -#define isqalloc JEMALLOC_N(isqalloc) -#define isthreaded JEMALLOC_N(isthreaded) -#define ivsalloc JEMALLOC_N(ivsalloc) -#define ixalloc JEMALLOC_N(ixalloc) -#define jemalloc_postfork_child JEMALLOC_N(jemalloc_postfork_child) -#define jemalloc_postfork_parent JEMALLOC_N(jemalloc_postfork_parent) -#define jemalloc_prefork JEMALLOC_N(jemalloc_prefork) -#define large_maxclass JEMALLOC_N(large_maxclass) -#define lg_floor JEMALLOC_N(lg_floor) -#define lg_prof_sample JEMALLOC_N(lg_prof_sample) -#define malloc_cprintf JEMALLOC_N(malloc_cprintf) -#define malloc_mutex_assert_not_owner JEMALLOC_N(malloc_mutex_assert_not_owner) -#define malloc_mutex_assert_owner JEMALLOC_N(malloc_mutex_assert_owner) -#define malloc_mutex_boot JEMALLOC_N(malloc_mutex_boot) -#define malloc_mutex_init JEMALLOC_N(malloc_mutex_init) -#define malloc_mutex_lock JEMALLOC_N(malloc_mutex_lock) -#define malloc_mutex_postfork_child JEMALLOC_N(malloc_mutex_postfork_child) -#define malloc_mutex_postfork_parent JEMALLOC_N(malloc_mutex_postfork_parent) -#define malloc_mutex_prefork JEMALLOC_N(malloc_mutex_prefork) -#define malloc_mutex_unlock JEMALLOC_N(malloc_mutex_unlock) -#define malloc_printf JEMALLOC_N(malloc_printf) -#define malloc_snprintf JEMALLOC_N(malloc_snprintf) -#define malloc_strtoumax JEMALLOC_N(malloc_strtoumax) -#define malloc_tsd_boot0 JEMALLOC_N(malloc_tsd_boot0) -#define malloc_tsd_boot1 JEMALLOC_N(malloc_tsd_boot1) -#define malloc_tsd_cleanup_register JEMALLOC_N(malloc_tsd_cleanup_register) -#define malloc_tsd_dalloc JEMALLOC_N(malloc_tsd_dalloc) -#define malloc_tsd_malloc JEMALLOC_N(malloc_tsd_malloc) -#define malloc_tsd_no_cleanup JEMALLOC_N(malloc_tsd_no_cleanup) -#define malloc_vcprintf JEMALLOC_N(malloc_vcprintf) -#define malloc_vsnprintf JEMALLOC_N(malloc_vsnprintf) -#define malloc_write JEMALLOC_N(malloc_write) -#define map_bias JEMALLOC_N(map_bias) -#define map_misc_offset JEMALLOC_N(map_misc_offset) -#define mb_write JEMALLOC_N(mb_write) -#define narenas_auto JEMALLOC_N(narenas_auto) -#define narenas_tdata_cleanup JEMALLOC_N(narenas_tdata_cleanup) -#define narenas_total_get JEMALLOC_N(narenas_total_get) -#define ncpus JEMALLOC_N(ncpus) -#define nhbins JEMALLOC_N(nhbins) -#define nhclasses JEMALLOC_N(nhclasses) -#define nlclasses JEMALLOC_N(nlclasses) -#define nstime_add JEMALLOC_N(nstime_add) -#define nstime_compare JEMALLOC_N(nstime_compare) -#define nstime_copy JEMALLOC_N(nstime_copy) -#define nstime_divide JEMALLOC_N(nstime_divide) -#define nstime_idivide JEMALLOC_N(nstime_idivide) -#define nstime_imultiply JEMALLOC_N(nstime_imultiply) -#define nstime_init JEMALLOC_N(nstime_init) -#define nstime_init2 JEMALLOC_N(nstime_init2) -#define nstime_monotonic JEMALLOC_N(nstime_monotonic) -#define nstime_ns JEMALLOC_N(nstime_ns) -#define nstime_nsec JEMALLOC_N(nstime_nsec) -#define nstime_sec JEMALLOC_N(nstime_sec) -#define nstime_subtract JEMALLOC_N(nstime_subtract) -#define nstime_update JEMALLOC_N(nstime_update) -#define opt_abort JEMALLOC_N(opt_abort) -#define opt_decay_time JEMALLOC_N(opt_decay_time) -#define opt_dss JEMALLOC_N(opt_dss) -#define opt_junk JEMALLOC_N(opt_junk) -#define opt_junk_alloc JEMALLOC_N(opt_junk_alloc) -#define opt_junk_free JEMALLOC_N(opt_junk_free) -#define opt_lg_chunk JEMALLOC_N(opt_lg_chunk) -#define opt_lg_dirty_mult JEMALLOC_N(opt_lg_dirty_mult) -#define opt_lg_prof_interval JEMALLOC_N(opt_lg_prof_interval) -#define opt_lg_prof_sample JEMALLOC_N(opt_lg_prof_sample) -#define opt_lg_tcache_max JEMALLOC_N(opt_lg_tcache_max) -#define opt_narenas JEMALLOC_N(opt_narenas) -#define opt_prof JEMALLOC_N(opt_prof) -#define opt_prof_accum JEMALLOC_N(opt_prof_accum) -#define opt_prof_active JEMALLOC_N(opt_prof_active) -#define opt_prof_final JEMALLOC_N(opt_prof_final) -#define opt_prof_gdump JEMALLOC_N(opt_prof_gdump) -#define opt_prof_leak JEMALLOC_N(opt_prof_leak) -#define opt_prof_prefix JEMALLOC_N(opt_prof_prefix) -#define opt_prof_thread_active_init JEMALLOC_N(opt_prof_thread_active_init) -#define opt_purge JEMALLOC_N(opt_purge) -#define opt_quarantine JEMALLOC_N(opt_quarantine) -#define opt_redzone JEMALLOC_N(opt_redzone) -#define opt_stats_print JEMALLOC_N(opt_stats_print) -#define opt_tcache JEMALLOC_N(opt_tcache) -#define opt_thp JEMALLOC_N(opt_thp) -#define opt_utrace JEMALLOC_N(opt_utrace) -#define opt_xmalloc JEMALLOC_N(opt_xmalloc) -#define opt_zero JEMALLOC_N(opt_zero) -#define p2rz JEMALLOC_N(p2rz) -#define pages_boot JEMALLOC_N(pages_boot) -#define pages_commit JEMALLOC_N(pages_commit) -#define pages_decommit JEMALLOC_N(pages_decommit) -#define pages_huge JEMALLOC_N(pages_huge) -#define pages_map JEMALLOC_N(pages_map) -#define pages_nohuge JEMALLOC_N(pages_nohuge) -#define pages_purge JEMALLOC_N(pages_purge) -#define pages_trim JEMALLOC_N(pages_trim) -#define pages_unmap JEMALLOC_N(pages_unmap) -#define pind2sz JEMALLOC_N(pind2sz) -#define pind2sz_compute JEMALLOC_N(pind2sz_compute) -#define pind2sz_lookup JEMALLOC_N(pind2sz_lookup) -#define pind2sz_tab JEMALLOC_N(pind2sz_tab) -#define pow2_ceil_u32 JEMALLOC_N(pow2_ceil_u32) -#define pow2_ceil_u64 JEMALLOC_N(pow2_ceil_u64) -#define pow2_ceil_zu JEMALLOC_N(pow2_ceil_zu) -#define prng_lg_range_u32 JEMALLOC_N(prng_lg_range_u32) -#define prng_lg_range_u64 JEMALLOC_N(prng_lg_range_u64) -#define prng_lg_range_zu JEMALLOC_N(prng_lg_range_zu) -#define prng_range_u32 JEMALLOC_N(prng_range_u32) -#define prng_range_u64 JEMALLOC_N(prng_range_u64) -#define prng_range_zu JEMALLOC_N(prng_range_zu) -#define prng_state_next_u32 JEMALLOC_N(prng_state_next_u32) -#define prng_state_next_u64 JEMALLOC_N(prng_state_next_u64) -#define prng_state_next_zu JEMALLOC_N(prng_state_next_zu) -#define prof_active JEMALLOC_N(prof_active) -#define prof_active_get JEMALLOC_N(prof_active_get) -#define prof_active_get_unlocked JEMALLOC_N(prof_active_get_unlocked) -#define prof_active_set JEMALLOC_N(prof_active_set) -#define prof_alloc_prep JEMALLOC_N(prof_alloc_prep) -#define prof_alloc_rollback JEMALLOC_N(prof_alloc_rollback) -#define prof_backtrace JEMALLOC_N(prof_backtrace) -#define prof_boot0 JEMALLOC_N(prof_boot0) -#define prof_boot1 JEMALLOC_N(prof_boot1) -#define prof_boot2 JEMALLOC_N(prof_boot2) -#define prof_bt_count JEMALLOC_N(prof_bt_count) -#define prof_dump_header JEMALLOC_N(prof_dump_header) -#define prof_dump_open JEMALLOC_N(prof_dump_open) -#define prof_free JEMALLOC_N(prof_free) -#define prof_free_sampled_object JEMALLOC_N(prof_free_sampled_object) -#define prof_gdump JEMALLOC_N(prof_gdump) -#define prof_gdump_get JEMALLOC_N(prof_gdump_get) -#define prof_gdump_get_unlocked JEMALLOC_N(prof_gdump_get_unlocked) -#define prof_gdump_set JEMALLOC_N(prof_gdump_set) -#define prof_gdump_val JEMALLOC_N(prof_gdump_val) -#define prof_idump JEMALLOC_N(prof_idump) -#define prof_interval JEMALLOC_N(prof_interval) -#define prof_lookup JEMALLOC_N(prof_lookup) -#define prof_malloc JEMALLOC_N(prof_malloc) -#define prof_malloc_sample_object JEMALLOC_N(prof_malloc_sample_object) -#define prof_mdump JEMALLOC_N(prof_mdump) -#define prof_postfork_child JEMALLOC_N(prof_postfork_child) -#define prof_postfork_parent JEMALLOC_N(prof_postfork_parent) -#define prof_prefork0 JEMALLOC_N(prof_prefork0) -#define prof_prefork1 JEMALLOC_N(prof_prefork1) -#define prof_realloc JEMALLOC_N(prof_realloc) -#define prof_reset JEMALLOC_N(prof_reset) -#define prof_sample_accum_update JEMALLOC_N(prof_sample_accum_update) -#define prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update) -#define prof_tctx_get JEMALLOC_N(prof_tctx_get) -#define prof_tctx_reset JEMALLOC_N(prof_tctx_reset) -#define prof_tctx_set JEMALLOC_N(prof_tctx_set) -#define prof_tdata_cleanup JEMALLOC_N(prof_tdata_cleanup) -#define prof_tdata_count JEMALLOC_N(prof_tdata_count) -#define prof_tdata_get JEMALLOC_N(prof_tdata_get) -#define prof_tdata_init JEMALLOC_N(prof_tdata_init) -#define prof_tdata_reinit JEMALLOC_N(prof_tdata_reinit) -#define prof_thread_active_get JEMALLOC_N(prof_thread_active_get) -#define prof_thread_active_init_get JEMALLOC_N(prof_thread_active_init_get) -#define prof_thread_active_init_set JEMALLOC_N(prof_thread_active_init_set) -#define prof_thread_active_set JEMALLOC_N(prof_thread_active_set) -#define prof_thread_name_get JEMALLOC_N(prof_thread_name_get) -#define prof_thread_name_set JEMALLOC_N(prof_thread_name_set) -#define psz2ind JEMALLOC_N(psz2ind) -#define psz2u JEMALLOC_N(psz2u) -#define purge_mode_names JEMALLOC_N(purge_mode_names) -#define quarantine JEMALLOC_N(quarantine) -#define quarantine_alloc_hook JEMALLOC_N(quarantine_alloc_hook) -#define quarantine_alloc_hook_work JEMALLOC_N(quarantine_alloc_hook_work) -#define quarantine_cleanup JEMALLOC_N(quarantine_cleanup) -#define rtree_child_read JEMALLOC_N(rtree_child_read) -#define rtree_child_read_hard JEMALLOC_N(rtree_child_read_hard) -#define rtree_child_tryread JEMALLOC_N(rtree_child_tryread) -#define rtree_delete JEMALLOC_N(rtree_delete) -#define rtree_get JEMALLOC_N(rtree_get) -#define rtree_new JEMALLOC_N(rtree_new) -#define rtree_node_valid JEMALLOC_N(rtree_node_valid) -#define rtree_set JEMALLOC_N(rtree_set) -#define rtree_start_level JEMALLOC_N(rtree_start_level) -#define rtree_subkey JEMALLOC_N(rtree_subkey) -#define rtree_subtree_read JEMALLOC_N(rtree_subtree_read) -#define rtree_subtree_read_hard JEMALLOC_N(rtree_subtree_read_hard) -#define rtree_subtree_tryread JEMALLOC_N(rtree_subtree_tryread) -#define rtree_val_read JEMALLOC_N(rtree_val_read) -#define rtree_val_write JEMALLOC_N(rtree_val_write) -#define run_quantize_ceil JEMALLOC_N(run_quantize_ceil) -#define run_quantize_floor JEMALLOC_N(run_quantize_floor) -#define s2u JEMALLOC_N(s2u) -#define s2u_compute JEMALLOC_N(s2u_compute) -#define s2u_lookup JEMALLOC_N(s2u_lookup) -#define sa2u JEMALLOC_N(sa2u) -#define set_errno JEMALLOC_N(set_errno) -#define size2index JEMALLOC_N(size2index) -#define size2index_compute JEMALLOC_N(size2index_compute) -#define size2index_lookup JEMALLOC_N(size2index_lookup) -#define size2index_tab JEMALLOC_N(size2index_tab) -#define spin_adaptive JEMALLOC_N(spin_adaptive) -#define spin_init JEMALLOC_N(spin_init) -#define stats_cactive JEMALLOC_N(stats_cactive) -#define stats_cactive_add JEMALLOC_N(stats_cactive_add) -#define stats_cactive_get JEMALLOC_N(stats_cactive_get) -#define stats_cactive_sub JEMALLOC_N(stats_cactive_sub) -#define stats_print JEMALLOC_N(stats_print) -#define tcache_alloc_easy JEMALLOC_N(tcache_alloc_easy) -#define tcache_alloc_large JEMALLOC_N(tcache_alloc_large) -#define tcache_alloc_small JEMALLOC_N(tcache_alloc_small) -#define tcache_alloc_small_hard JEMALLOC_N(tcache_alloc_small_hard) -#define tcache_arena_reassociate JEMALLOC_N(tcache_arena_reassociate) -#define tcache_bin_flush_large JEMALLOC_N(tcache_bin_flush_large) -#define tcache_bin_flush_small JEMALLOC_N(tcache_bin_flush_small) -#define tcache_bin_info JEMALLOC_N(tcache_bin_info) -#define tcache_boot JEMALLOC_N(tcache_boot) -#define tcache_cleanup JEMALLOC_N(tcache_cleanup) -#define tcache_create JEMALLOC_N(tcache_create) -#define tcache_dalloc_large JEMALLOC_N(tcache_dalloc_large) -#define tcache_dalloc_small JEMALLOC_N(tcache_dalloc_small) -#define tcache_enabled_cleanup JEMALLOC_N(tcache_enabled_cleanup) -#define tcache_enabled_get JEMALLOC_N(tcache_enabled_get) -#define tcache_enabled_set JEMALLOC_N(tcache_enabled_set) -#define tcache_event JEMALLOC_N(tcache_event) -#define tcache_event_hard JEMALLOC_N(tcache_event_hard) -#define tcache_flush JEMALLOC_N(tcache_flush) -#define tcache_get JEMALLOC_N(tcache_get) -#define tcache_get_hard JEMALLOC_N(tcache_get_hard) -#define tcache_maxclass JEMALLOC_N(tcache_maxclass) -#define tcache_postfork_child JEMALLOC_N(tcache_postfork_child) -#define tcache_postfork_parent JEMALLOC_N(tcache_postfork_parent) -#define tcache_prefork JEMALLOC_N(tcache_prefork) -#define tcache_salloc JEMALLOC_N(tcache_salloc) -#define tcache_stats_merge JEMALLOC_N(tcache_stats_merge) -#define tcaches JEMALLOC_N(tcaches) -#define tcaches_create JEMALLOC_N(tcaches_create) -#define tcaches_destroy JEMALLOC_N(tcaches_destroy) -#define tcaches_flush JEMALLOC_N(tcaches_flush) -#define tcaches_get JEMALLOC_N(tcaches_get) -#define thread_allocated_cleanup JEMALLOC_N(thread_allocated_cleanup) -#define thread_deallocated_cleanup JEMALLOC_N(thread_deallocated_cleanup) -#define ticker_copy JEMALLOC_N(ticker_copy) -#define ticker_init JEMALLOC_N(ticker_init) -#define ticker_read JEMALLOC_N(ticker_read) -#define ticker_tick JEMALLOC_N(ticker_tick) -#define ticker_ticks JEMALLOC_N(ticker_ticks) -#define tsd_arena_get JEMALLOC_N(tsd_arena_get) -#define tsd_arena_set JEMALLOC_N(tsd_arena_set) -#define tsd_arenap_get JEMALLOC_N(tsd_arenap_get) -#define tsd_arenas_tdata_bypass_get JEMALLOC_N(tsd_arenas_tdata_bypass_get) -#define tsd_arenas_tdata_bypass_set JEMALLOC_N(tsd_arenas_tdata_bypass_set) -#define tsd_arenas_tdata_bypassp_get JEMALLOC_N(tsd_arenas_tdata_bypassp_get) -#define tsd_arenas_tdata_get JEMALLOC_N(tsd_arenas_tdata_get) -#define tsd_arenas_tdata_set JEMALLOC_N(tsd_arenas_tdata_set) -#define tsd_arenas_tdatap_get JEMALLOC_N(tsd_arenas_tdatap_get) -#define tsd_boot JEMALLOC_N(tsd_boot) -#define tsd_boot0 JEMALLOC_N(tsd_boot0) -#define tsd_boot1 JEMALLOC_N(tsd_boot1) -#define tsd_booted JEMALLOC_N(tsd_booted) -#define tsd_booted_get JEMALLOC_N(tsd_booted_get) -#define tsd_cleanup JEMALLOC_N(tsd_cleanup) -#define tsd_cleanup_wrapper JEMALLOC_N(tsd_cleanup_wrapper) -#define tsd_fetch JEMALLOC_N(tsd_fetch) -#define tsd_fetch_impl JEMALLOC_N(tsd_fetch_impl) -#define tsd_get JEMALLOC_N(tsd_get) -#define tsd_get_allocates JEMALLOC_N(tsd_get_allocates) -#define tsd_iarena_get JEMALLOC_N(tsd_iarena_get) -#define tsd_iarena_set JEMALLOC_N(tsd_iarena_set) -#define tsd_iarenap_get JEMALLOC_N(tsd_iarenap_get) -#define tsd_initialized JEMALLOC_N(tsd_initialized) -#define tsd_init_check_recursion JEMALLOC_N(tsd_init_check_recursion) -#define tsd_init_finish JEMALLOC_N(tsd_init_finish) -#define tsd_init_head JEMALLOC_N(tsd_init_head) -#define tsd_narenas_tdata_get JEMALLOC_N(tsd_narenas_tdata_get) -#define tsd_narenas_tdata_set JEMALLOC_N(tsd_narenas_tdata_set) -#define tsd_narenas_tdatap_get JEMALLOC_N(tsd_narenas_tdatap_get) -#define tsd_wrapper_get JEMALLOC_N(tsd_wrapper_get) -#define tsd_wrapper_set JEMALLOC_N(tsd_wrapper_set) -#define tsd_nominal JEMALLOC_N(tsd_nominal) -#define tsd_prof_tdata_get JEMALLOC_N(tsd_prof_tdata_get) -#define tsd_prof_tdata_set JEMALLOC_N(tsd_prof_tdata_set) -#define tsd_prof_tdatap_get JEMALLOC_N(tsd_prof_tdatap_get) -#define tsd_quarantine_get JEMALLOC_N(tsd_quarantine_get) -#define tsd_quarantine_set JEMALLOC_N(tsd_quarantine_set) -#define tsd_quarantinep_get JEMALLOC_N(tsd_quarantinep_get) -#define tsd_set JEMALLOC_N(tsd_set) -#define tsd_tcache_enabled_get JEMALLOC_N(tsd_tcache_enabled_get) -#define tsd_tcache_enabled_set JEMALLOC_N(tsd_tcache_enabled_set) -#define tsd_tcache_enabledp_get JEMALLOC_N(tsd_tcache_enabledp_get) -#define tsd_tcache_get JEMALLOC_N(tsd_tcache_get) -#define tsd_tcache_set JEMALLOC_N(tsd_tcache_set) -#define tsd_tcachep_get JEMALLOC_N(tsd_tcachep_get) -#define tsd_thread_allocated_get JEMALLOC_N(tsd_thread_allocated_get) -#define tsd_thread_allocated_set JEMALLOC_N(tsd_thread_allocated_set) -#define tsd_thread_allocatedp_get JEMALLOC_N(tsd_thread_allocatedp_get) -#define tsd_thread_deallocated_get JEMALLOC_N(tsd_thread_deallocated_get) -#define tsd_thread_deallocated_set JEMALLOC_N(tsd_thread_deallocated_set) -#define tsd_thread_deallocatedp_get JEMALLOC_N(tsd_thread_deallocatedp_get) -#define tsd_tls JEMALLOC_N(tsd_tls) -#define tsd_tsd JEMALLOC_N(tsd_tsd) -#define tsd_tsdn JEMALLOC_N(tsd_tsdn) -#define tsd_witness_fork_get JEMALLOC_N(tsd_witness_fork_get) -#define tsd_witness_fork_set JEMALLOC_N(tsd_witness_fork_set) -#define tsd_witness_forkp_get JEMALLOC_N(tsd_witness_forkp_get) -#define tsd_witnesses_get JEMALLOC_N(tsd_witnesses_get) -#define tsd_witnesses_set JEMALLOC_N(tsd_witnesses_set) -#define tsd_witnessesp_get JEMALLOC_N(tsd_witnessesp_get) -#define tsdn_fetch JEMALLOC_N(tsdn_fetch) -#define tsdn_null JEMALLOC_N(tsdn_null) -#define tsdn_tsd JEMALLOC_N(tsdn_tsd) -#define u2rz JEMALLOC_N(u2rz) -#define valgrind_freelike_block JEMALLOC_N(valgrind_freelike_block) -#define valgrind_make_mem_defined JEMALLOC_N(valgrind_make_mem_defined) -#define valgrind_make_mem_noaccess JEMALLOC_N(valgrind_make_mem_noaccess) -#define valgrind_make_mem_undefined JEMALLOC_N(valgrind_make_mem_undefined) -#define witness_assert_depth JEMALLOC_N(witness_assert_depth) -#define witness_assert_depth_to_rank JEMALLOC_N(witness_assert_depth_to_rank) -#define witness_assert_lockless JEMALLOC_N(witness_assert_lockless) -#define witness_assert_not_owner JEMALLOC_N(witness_assert_not_owner) -#define witness_assert_owner JEMALLOC_N(witness_assert_owner) -#define witness_depth_error JEMALLOC_N(witness_depth_error) -#define witness_fork_cleanup JEMALLOC_N(witness_fork_cleanup) -#define witness_init JEMALLOC_N(witness_init) -#define witness_lock JEMALLOC_N(witness_lock) -#define witness_lock_error JEMALLOC_N(witness_lock_error) -#define witness_not_owner_error JEMALLOC_N(witness_not_owner_error) -#define witness_owner JEMALLOC_N(witness_owner) -#define witness_owner_error JEMALLOC_N(witness_owner_error) -#define witness_postfork_child JEMALLOC_N(witness_postfork_child) -#define witness_postfork_parent JEMALLOC_N(witness_postfork_parent) -#define witness_prefork JEMALLOC_N(witness_prefork) -#define witness_unlock JEMALLOC_N(witness_unlock) -#define witnesses_cleanup JEMALLOC_N(witnesses_cleanup) -#define zone_register JEMALLOC_N(zone_register) +#define a0dalloc JEMALLOC_N(a0dalloc) +#define a0malloc JEMALLOC_N(a0malloc) +#define arena_choose_hard JEMALLOC_N(arena_choose_hard) +#define arena_cleanup JEMALLOC_N(arena_cleanup) +#define arena_init JEMALLOC_N(arena_init) +#define arena_migrate JEMALLOC_N(arena_migrate) +#define arenas JEMALLOC_N(arenas) +#define arena_set JEMALLOC_N(arena_set) +#define arenas_lock JEMALLOC_N(arenas_lock) +#define arenas_tdata_cleanup JEMALLOC_N(arenas_tdata_cleanup) +#define arena_tdata_get_hard JEMALLOC_N(arena_tdata_get_hard) +#define bootstrap_calloc JEMALLOC_N(bootstrap_calloc) +#define bootstrap_free JEMALLOC_N(bootstrap_free) +#define bootstrap_malloc JEMALLOC_N(bootstrap_malloc) +#define free_default JEMALLOC_N(free_default) +#define iarena_cleanup JEMALLOC_N(iarena_cleanup) +#define jemalloc_postfork_child JEMALLOC_N(jemalloc_postfork_child) +#define jemalloc_postfork_parent JEMALLOC_N(jemalloc_postfork_parent) +#define jemalloc_prefork JEMALLOC_N(jemalloc_prefork) +#define je_sdallocx_noflags JEMALLOC_N(je_sdallocx_noflags) +#define malloc_default JEMALLOC_N(malloc_default) +#define malloc_initialized JEMALLOC_N(malloc_initialized) +#define malloc_slow JEMALLOC_N(malloc_slow) +#define manual_arena_base JEMALLOC_N(manual_arena_base) +#define narenas_auto JEMALLOC_N(narenas_auto) +#define narenas_total_get JEMALLOC_N(narenas_total_get) +#define ncpus JEMALLOC_N(ncpus) +#define opt_abort JEMALLOC_N(opt_abort) +#define opt_abort_conf JEMALLOC_N(opt_abort_conf) +#define opt_confirm_conf JEMALLOC_N(opt_confirm_conf) +#define opt_junk JEMALLOC_N(opt_junk) +#define opt_junk_alloc JEMALLOC_N(opt_junk_alloc) +#define opt_junk_free JEMALLOC_N(opt_junk_free) +#define opt_narenas JEMALLOC_N(opt_narenas) +#define opt_utrace JEMALLOC_N(opt_utrace) +#define opt_xmalloc JEMALLOC_N(opt_xmalloc) +#define opt_zero JEMALLOC_N(opt_zero) +#define sdallocx_default JEMALLOC_N(sdallocx_default) +#define arena_alloc_junk_small JEMALLOC_N(arena_alloc_junk_small) +#define arena_basic_stats_merge JEMALLOC_N(arena_basic_stats_merge) +#define arena_bin_choose_lock JEMALLOC_N(arena_bin_choose_lock) +#define arena_boot JEMALLOC_N(arena_boot) +#define arena_choose_huge JEMALLOC_N(arena_choose_huge) +#define arena_dalloc_bin_junked_locked JEMALLOC_N(arena_dalloc_bin_junked_locked) +#define arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small) +#define arena_dalloc_promoted JEMALLOC_N(arena_dalloc_promoted) +#define arena_dalloc_small JEMALLOC_N(arena_dalloc_small) +#define arena_decay JEMALLOC_N(arena_decay) +#define arena_destroy JEMALLOC_N(arena_destroy) +#define arena_dirty_decay_ms_default_get JEMALLOC_N(arena_dirty_decay_ms_default_get) +#define arena_dirty_decay_ms_default_set JEMALLOC_N(arena_dirty_decay_ms_default_set) +#define arena_dirty_decay_ms_get JEMALLOC_N(arena_dirty_decay_ms_get) +#define arena_dirty_decay_ms_set JEMALLOC_N(arena_dirty_decay_ms_set) +#define arena_dss_prec_get JEMALLOC_N(arena_dss_prec_get) +#define arena_dss_prec_set JEMALLOC_N(arena_dss_prec_set) +#define arena_extent_alloc_large JEMALLOC_N(arena_extent_alloc_large) +#define arena_extent_dalloc_large_prep JEMALLOC_N(arena_extent_dalloc_large_prep) +#define arena_extent_ralloc_large_expand JEMALLOC_N(arena_extent_ralloc_large_expand) +#define arena_extent_ralloc_large_shrink JEMALLOC_N(arena_extent_ralloc_large_shrink) +#define arena_extents_dirty_dalloc JEMALLOC_N(arena_extents_dirty_dalloc) +#define arena_extent_sn_next JEMALLOC_N(arena_extent_sn_next) +#define arena_init_huge JEMALLOC_N(arena_init_huge) +#define arena_is_huge JEMALLOC_N(arena_is_huge) +#define arena_malloc_hard JEMALLOC_N(arena_malloc_hard) +#define arena_muzzy_decay_ms_default_get JEMALLOC_N(arena_muzzy_decay_ms_default_get) +#define arena_muzzy_decay_ms_default_set JEMALLOC_N(arena_muzzy_decay_ms_default_set) +#define arena_muzzy_decay_ms_get JEMALLOC_N(arena_muzzy_decay_ms_get) +#define arena_muzzy_decay_ms_set JEMALLOC_N(arena_muzzy_decay_ms_set) +#define arena_new JEMALLOC_N(arena_new) +#define arena_nthreads_dec JEMALLOC_N(arena_nthreads_dec) +#define arena_nthreads_get JEMALLOC_N(arena_nthreads_get) +#define arena_nthreads_inc JEMALLOC_N(arena_nthreads_inc) +#define arena_palloc JEMALLOC_N(arena_palloc) +#define arena_postfork_child JEMALLOC_N(arena_postfork_child) +#define arena_postfork_parent JEMALLOC_N(arena_postfork_parent) +#define arena_prefork0 JEMALLOC_N(arena_prefork0) +#define arena_prefork1 JEMALLOC_N(arena_prefork1) +#define arena_prefork2 JEMALLOC_N(arena_prefork2) +#define arena_prefork3 JEMALLOC_N(arena_prefork3) +#define arena_prefork4 JEMALLOC_N(arena_prefork4) +#define arena_prefork5 JEMALLOC_N(arena_prefork5) +#define arena_prefork6 JEMALLOC_N(arena_prefork6) +#define arena_prefork7 JEMALLOC_N(arena_prefork7) +#define arena_prof_promote JEMALLOC_N(arena_prof_promote) +#define arena_ralloc JEMALLOC_N(arena_ralloc) +#define arena_ralloc_no_move JEMALLOC_N(arena_ralloc_no_move) +#define arena_reset JEMALLOC_N(arena_reset) +#define arena_retain_grow_limit_get_set JEMALLOC_N(arena_retain_grow_limit_get_set) +#define arena_stats_merge JEMALLOC_N(arena_stats_merge) +#define arena_tcache_fill_small JEMALLOC_N(arena_tcache_fill_small) +#define h_steps JEMALLOC_N(h_steps) +#define opt_dirty_decay_ms JEMALLOC_N(opt_dirty_decay_ms) +#define opt_muzzy_decay_ms JEMALLOC_N(opt_muzzy_decay_ms) +#define opt_oversize_threshold JEMALLOC_N(opt_oversize_threshold) +#define opt_percpu_arena JEMALLOC_N(opt_percpu_arena) +#define oversize_threshold JEMALLOC_N(oversize_threshold) +#define percpu_arena_mode_names JEMALLOC_N(percpu_arena_mode_names) +#define background_thread_boot0 JEMALLOC_N(background_thread_boot0) +#define background_thread_boot1 JEMALLOC_N(background_thread_boot1) +#define background_thread_create JEMALLOC_N(background_thread_create) +#define background_thread_ctl_init JEMALLOC_N(background_thread_ctl_init) +#define background_thread_enabled_state JEMALLOC_N(background_thread_enabled_state) +#define background_thread_info JEMALLOC_N(background_thread_info) +#define background_thread_interval_check JEMALLOC_N(background_thread_interval_check) +#define background_thread_lock JEMALLOC_N(background_thread_lock) +#define background_thread_postfork_child JEMALLOC_N(background_thread_postfork_child) +#define background_thread_postfork_parent JEMALLOC_N(background_thread_postfork_parent) +#define background_thread_prefork0 JEMALLOC_N(background_thread_prefork0) +#define background_thread_prefork1 JEMALLOC_N(background_thread_prefork1) +#define background_threads_disable JEMALLOC_N(background_threads_disable) +#define background_threads_enable JEMALLOC_N(background_threads_enable) +#define background_thread_stats_read JEMALLOC_N(background_thread_stats_read) +#define max_background_threads JEMALLOC_N(max_background_threads) +#define n_background_threads JEMALLOC_N(n_background_threads) +#define opt_background_thread JEMALLOC_N(opt_background_thread) +#define opt_max_background_threads JEMALLOC_N(opt_max_background_threads) +#define pthread_create_wrapper JEMALLOC_N(pthread_create_wrapper) +#define b0get JEMALLOC_N(b0get) +#define base_alloc JEMALLOC_N(base_alloc) +#define base_alloc_extent JEMALLOC_N(base_alloc_extent) +#define base_boot JEMALLOC_N(base_boot) +#define base_delete JEMALLOC_N(base_delete) +#define base_extent_hooks_get JEMALLOC_N(base_extent_hooks_get) +#define base_extent_hooks_set JEMALLOC_N(base_extent_hooks_set) +#define base_new JEMALLOC_N(base_new) +#define base_postfork_child JEMALLOC_N(base_postfork_child) +#define base_postfork_parent JEMALLOC_N(base_postfork_parent) +#define base_prefork JEMALLOC_N(base_prefork) +#define base_stats_get JEMALLOC_N(base_stats_get) +#define metadata_thp_mode_names JEMALLOC_N(metadata_thp_mode_names) +#define opt_metadata_thp JEMALLOC_N(opt_metadata_thp) +#define bin_boot JEMALLOC_N(bin_boot) +#define bin_infos JEMALLOC_N(bin_infos) +#define bin_init JEMALLOC_N(bin_init) +#define bin_postfork_child JEMALLOC_N(bin_postfork_child) +#define bin_postfork_parent JEMALLOC_N(bin_postfork_parent) +#define bin_prefork JEMALLOC_N(bin_prefork) +#define bin_shard_sizes_boot JEMALLOC_N(bin_shard_sizes_boot) +#define bin_update_shard_size JEMALLOC_N(bin_update_shard_size) +#define bitmap_info_init JEMALLOC_N(bitmap_info_init) +#define bitmap_init JEMALLOC_N(bitmap_init) +#define bitmap_size JEMALLOC_N(bitmap_size) +#define ckh_count JEMALLOC_N(ckh_count) +#define ckh_delete JEMALLOC_N(ckh_delete) +#define ckh_insert JEMALLOC_N(ckh_insert) +#define ckh_iter JEMALLOC_N(ckh_iter) +#define ckh_new JEMALLOC_N(ckh_new) +#define ckh_pointer_hash JEMALLOC_N(ckh_pointer_hash) +#define ckh_pointer_keycomp JEMALLOC_N(ckh_pointer_keycomp) +#define ckh_remove JEMALLOC_N(ckh_remove) +#define ckh_search JEMALLOC_N(ckh_search) +#define ckh_string_hash JEMALLOC_N(ckh_string_hash) +#define ckh_string_keycomp JEMALLOC_N(ckh_string_keycomp) +#define ctl_boot JEMALLOC_N(ctl_boot) +#define ctl_bymib JEMALLOC_N(ctl_bymib) +#define ctl_byname JEMALLOC_N(ctl_byname) +#define ctl_nametomib JEMALLOC_N(ctl_nametomib) +#define ctl_postfork_child JEMALLOC_N(ctl_postfork_child) +#define ctl_postfork_parent JEMALLOC_N(ctl_postfork_parent) +#define ctl_prefork JEMALLOC_N(ctl_prefork) +#define div_init JEMALLOC_N(div_init) +#define extent_alloc JEMALLOC_N(extent_alloc) +#define extent_alloc_wrapper JEMALLOC_N(extent_alloc_wrapper) +#define extent_avail_any JEMALLOC_N(extent_avail_any) +#define extent_avail_empty JEMALLOC_N(extent_avail_empty) +#define extent_avail_first JEMALLOC_N(extent_avail_first) +#define extent_avail_insert JEMALLOC_N(extent_avail_insert) +#define extent_avail_new JEMALLOC_N(extent_avail_new) +#define extent_avail_remove JEMALLOC_N(extent_avail_remove) +#define extent_avail_remove_any JEMALLOC_N(extent_avail_remove_any) +#define extent_avail_remove_first JEMALLOC_N(extent_avail_remove_first) +#define extent_boot JEMALLOC_N(extent_boot) +#define extent_commit_wrapper JEMALLOC_N(extent_commit_wrapper) +#define extent_dalloc JEMALLOC_N(extent_dalloc) +#define extent_dalloc_gap JEMALLOC_N(extent_dalloc_gap) +#define extent_dalloc_wrapper JEMALLOC_N(extent_dalloc_wrapper) +#define extent_decommit_wrapper JEMALLOC_N(extent_decommit_wrapper) +#define extent_destroy_wrapper JEMALLOC_N(extent_destroy_wrapper) +#define extent_heap_any JEMALLOC_N(extent_heap_any) +#define extent_heap_empty JEMALLOC_N(extent_heap_empty) +#define extent_heap_first JEMALLOC_N(extent_heap_first) +#define extent_heap_insert JEMALLOC_N(extent_heap_insert) +#define extent_heap_new JEMALLOC_N(extent_heap_new) +#define extent_heap_remove JEMALLOC_N(extent_heap_remove) +#define extent_heap_remove_any JEMALLOC_N(extent_heap_remove_any) +#define extent_heap_remove_first JEMALLOC_N(extent_heap_remove_first) +#define extent_hooks_default JEMALLOC_N(extent_hooks_default) +#define extent_hooks_get JEMALLOC_N(extent_hooks_get) +#define extent_hooks_set JEMALLOC_N(extent_hooks_set) +#define extent_merge_wrapper JEMALLOC_N(extent_merge_wrapper) +#define extent_mutex_pool JEMALLOC_N(extent_mutex_pool) +#define extent_purge_forced_wrapper JEMALLOC_N(extent_purge_forced_wrapper) +#define extent_purge_lazy_wrapper JEMALLOC_N(extent_purge_lazy_wrapper) +#define extents_alloc JEMALLOC_N(extents_alloc) +#define extents_dalloc JEMALLOC_N(extents_dalloc) +#define extents_evict JEMALLOC_N(extents_evict) +#define extents_init JEMALLOC_N(extents_init) +#define extents_nbytes_get JEMALLOC_N(extents_nbytes_get) +#define extents_nextents_get JEMALLOC_N(extents_nextents_get) +#define extents_npages_get JEMALLOC_N(extents_npages_get) +#define extent_split_wrapper JEMALLOC_N(extent_split_wrapper) +#define extents_postfork_child JEMALLOC_N(extents_postfork_child) +#define extents_postfork_parent JEMALLOC_N(extents_postfork_parent) +#define extents_prefork JEMALLOC_N(extents_prefork) +#define extents_rtree JEMALLOC_N(extents_rtree) +#define extents_state_get JEMALLOC_N(extents_state_get) +#define extent_util_stats_get JEMALLOC_N(extent_util_stats_get) +#define extent_util_stats_verbose_get JEMALLOC_N(extent_util_stats_verbose_get) +#define opt_lg_extent_max_active_fit JEMALLOC_N(opt_lg_extent_max_active_fit) +#define dss_prec_names JEMALLOC_N(dss_prec_names) +#define extent_alloc_dss JEMALLOC_N(extent_alloc_dss) +#define extent_dss_boot JEMALLOC_N(extent_dss_boot) +#define extent_dss_mergeable JEMALLOC_N(extent_dss_mergeable) +#define extent_dss_prec_get JEMALLOC_N(extent_dss_prec_get) +#define extent_dss_prec_set JEMALLOC_N(extent_dss_prec_set) +#define extent_in_dss JEMALLOC_N(extent_in_dss) +#define opt_dss JEMALLOC_N(opt_dss) +#define extent_alloc_mmap JEMALLOC_N(extent_alloc_mmap) +#define extent_dalloc_mmap JEMALLOC_N(extent_dalloc_mmap) +#define opt_retain JEMALLOC_N(opt_retain) +#define hook_boot JEMALLOC_N(hook_boot) +#define hook_install JEMALLOC_N(hook_install) +#define hook_invoke_alloc JEMALLOC_N(hook_invoke_alloc) +#define hook_invoke_dalloc JEMALLOC_N(hook_invoke_dalloc) +#define hook_invoke_expand JEMALLOC_N(hook_invoke_expand) +#define hook_remove JEMALLOC_N(hook_remove) +#define large_dalloc JEMALLOC_N(large_dalloc) +#define large_dalloc_finish JEMALLOC_N(large_dalloc_finish) +#define large_dalloc_junk JEMALLOC_N(large_dalloc_junk) +#define large_dalloc_maybe_junk JEMALLOC_N(large_dalloc_maybe_junk) +#define large_dalloc_prep_junked_locked JEMALLOC_N(large_dalloc_prep_junked_locked) +#define large_malloc JEMALLOC_N(large_malloc) +#define large_palloc JEMALLOC_N(large_palloc) +#define large_prof_alloc_time_get JEMALLOC_N(large_prof_alloc_time_get) +#define large_prof_alloc_time_set JEMALLOC_N(large_prof_alloc_time_set) +#define large_prof_tctx_get JEMALLOC_N(large_prof_tctx_get) +#define large_prof_tctx_reset JEMALLOC_N(large_prof_tctx_reset) +#define large_prof_tctx_set JEMALLOC_N(large_prof_tctx_set) +#define large_ralloc JEMALLOC_N(large_ralloc) +#define large_ralloc_no_move JEMALLOC_N(large_ralloc_no_move) +#define large_salloc JEMALLOC_N(large_salloc) +#define log_init_done JEMALLOC_N(log_init_done) +#define log_var_names JEMALLOC_N(log_var_names) +#define log_var_update_state JEMALLOC_N(log_var_update_state) +#define buferror JEMALLOC_N(buferror) +#define malloc_cprintf JEMALLOC_N(malloc_cprintf) +#define malloc_printf JEMALLOC_N(malloc_printf) +#define malloc_snprintf JEMALLOC_N(malloc_snprintf) +#define malloc_strtoumax JEMALLOC_N(malloc_strtoumax) +#define malloc_vcprintf JEMALLOC_N(malloc_vcprintf) +#define malloc_vsnprintf JEMALLOC_N(malloc_vsnprintf) +#define malloc_write JEMALLOC_N(malloc_write) +#define malloc_mutex_boot JEMALLOC_N(malloc_mutex_boot) +#define malloc_mutex_init JEMALLOC_N(malloc_mutex_init) +#define malloc_mutex_lock_slow JEMALLOC_N(malloc_mutex_lock_slow) +#define malloc_mutex_postfork_child JEMALLOC_N(malloc_mutex_postfork_child) +#define malloc_mutex_postfork_parent JEMALLOC_N(malloc_mutex_postfork_parent) +#define malloc_mutex_prefork JEMALLOC_N(malloc_mutex_prefork) +#define malloc_mutex_prof_data_reset JEMALLOC_N(malloc_mutex_prof_data_reset) +#define mutex_pool_init JEMALLOC_N(mutex_pool_init) +#define nstime_add JEMALLOC_N(nstime_add) +#define nstime_compare JEMALLOC_N(nstime_compare) +#define nstime_copy JEMALLOC_N(nstime_copy) +#define nstime_divide JEMALLOC_N(nstime_divide) +#define nstime_iadd JEMALLOC_N(nstime_iadd) +#define nstime_idivide JEMALLOC_N(nstime_idivide) +#define nstime_imultiply JEMALLOC_N(nstime_imultiply) +#define nstime_init JEMALLOC_N(nstime_init) +#define nstime_init2 JEMALLOC_N(nstime_init2) +#define nstime_isubtract JEMALLOC_N(nstime_isubtract) +#define nstime_monotonic JEMALLOC_N(nstime_monotonic) +#define nstime_msec JEMALLOC_N(nstime_msec) +#define nstime_ns JEMALLOC_N(nstime_ns) +#define nstime_nsec JEMALLOC_N(nstime_nsec) +#define nstime_sec JEMALLOC_N(nstime_sec) +#define nstime_subtract JEMALLOC_N(nstime_subtract) +#define nstime_update JEMALLOC_N(nstime_update) +#define init_system_thp_mode JEMALLOC_N(init_system_thp_mode) +#define opt_thp JEMALLOC_N(opt_thp) +#define pages_boot JEMALLOC_N(pages_boot) +#define pages_commit JEMALLOC_N(pages_commit) +#define pages_decommit JEMALLOC_N(pages_decommit) +#define pages_dodump JEMALLOC_N(pages_dodump) +#define pages_dontdump JEMALLOC_N(pages_dontdump) +#define pages_huge JEMALLOC_N(pages_huge) +#define pages_map JEMALLOC_N(pages_map) +#define pages_nohuge JEMALLOC_N(pages_nohuge) +#define pages_purge_forced JEMALLOC_N(pages_purge_forced) +#define pages_purge_lazy JEMALLOC_N(pages_purge_lazy) +#define pages_set_thp_state JEMALLOC_N(pages_set_thp_state) +#define pages_unmap JEMALLOC_N(pages_unmap) +#define thp_mode_names JEMALLOC_N(thp_mode_names) +#define bt2gctx_mtx JEMALLOC_N(bt2gctx_mtx) +#define bt_init JEMALLOC_N(bt_init) +#define lg_prof_sample JEMALLOC_N(lg_prof_sample) +#define opt_lg_prof_interval JEMALLOC_N(opt_lg_prof_interval) +#define opt_lg_prof_sample JEMALLOC_N(opt_lg_prof_sample) +#define opt_prof JEMALLOC_N(opt_prof) +#define opt_prof_accum JEMALLOC_N(opt_prof_accum) +#define opt_prof_active JEMALLOC_N(opt_prof_active) +#define opt_prof_final JEMALLOC_N(opt_prof_final) +#define opt_prof_gdump JEMALLOC_N(opt_prof_gdump) +#define opt_prof_leak JEMALLOC_N(opt_prof_leak) +#define opt_prof_log JEMALLOC_N(opt_prof_log) +#define opt_prof_prefix JEMALLOC_N(opt_prof_prefix) +#define opt_prof_thread_active_init JEMALLOC_N(opt_prof_thread_active_init) +#define prof_accum_init JEMALLOC_N(prof_accum_init) +#define prof_active JEMALLOC_N(prof_active) +#define prof_active_get JEMALLOC_N(prof_active_get) +#define prof_active_set JEMALLOC_N(prof_active_set) +#define prof_alloc_rollback JEMALLOC_N(prof_alloc_rollback) +#define prof_backtrace JEMALLOC_N(prof_backtrace) +#define prof_boot0 JEMALLOC_N(prof_boot0) +#define prof_boot1 JEMALLOC_N(prof_boot1) +#define prof_boot2 JEMALLOC_N(prof_boot2) +#define prof_dump_header JEMALLOC_N(prof_dump_header) +#define prof_dump_open JEMALLOC_N(prof_dump_open) +#define prof_free_sampled_object JEMALLOC_N(prof_free_sampled_object) +#define prof_gdump JEMALLOC_N(prof_gdump) +#define prof_gdump_get JEMALLOC_N(prof_gdump_get) +#define prof_gdump_set JEMALLOC_N(prof_gdump_set) +#define prof_gdump_val JEMALLOC_N(prof_gdump_val) +#define prof_idump JEMALLOC_N(prof_idump) +#define prof_interval JEMALLOC_N(prof_interval) +#define prof_logging_state JEMALLOC_N(prof_logging_state) +#define prof_log_start JEMALLOC_N(prof_log_start) +#define prof_log_stop JEMALLOC_N(prof_log_stop) +#define prof_lookup JEMALLOC_N(prof_lookup) +#define prof_malloc_sample_object JEMALLOC_N(prof_malloc_sample_object) +#define prof_mdump JEMALLOC_N(prof_mdump) +#define prof_postfork_child JEMALLOC_N(prof_postfork_child) +#define prof_postfork_parent JEMALLOC_N(prof_postfork_parent) +#define prof_prefork0 JEMALLOC_N(prof_prefork0) +#define prof_prefork1 JEMALLOC_N(prof_prefork1) +#define prof_reset JEMALLOC_N(prof_reset) +#define prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update) +#define prof_tdata_cleanup JEMALLOC_N(prof_tdata_cleanup) +#define prof_tdata_init JEMALLOC_N(prof_tdata_init) +#define prof_tdata_reinit JEMALLOC_N(prof_tdata_reinit) +#define prof_thread_active_get JEMALLOC_N(prof_thread_active_get) +#define prof_thread_active_init_get JEMALLOC_N(prof_thread_active_init_get) +#define prof_thread_active_init_set JEMALLOC_N(prof_thread_active_init_set) +#define prof_thread_active_set JEMALLOC_N(prof_thread_active_set) +#define prof_thread_name_get JEMALLOC_N(prof_thread_name_get) +#define prof_thread_name_set JEMALLOC_N(prof_thread_name_set) +#define rtree_ctx_data_init JEMALLOC_N(rtree_ctx_data_init) +#define rtree_leaf_alloc JEMALLOC_N(rtree_leaf_alloc) +#define rtree_leaf_dalloc JEMALLOC_N(rtree_leaf_dalloc) +#define rtree_leaf_elm_lookup_hard JEMALLOC_N(rtree_leaf_elm_lookup_hard) +#define rtree_new JEMALLOC_N(rtree_new) +#define rtree_node_alloc JEMALLOC_N(rtree_node_alloc) +#define rtree_node_dalloc JEMALLOC_N(rtree_node_dalloc) +#define safety_check_fail JEMALLOC_N(safety_check_fail) +#define safety_check_set_abort JEMALLOC_N(safety_check_set_abort) +#define arena_mutex_names JEMALLOC_N(arena_mutex_names) +#define global_mutex_names JEMALLOC_N(global_mutex_names) +#define opt_stats_print JEMALLOC_N(opt_stats_print) +#define opt_stats_print_opts JEMALLOC_N(opt_stats_print_opts) +#define stats_print JEMALLOC_N(stats_print) +#define sc_boot JEMALLOC_N(sc_boot) +#define sc_data_global JEMALLOC_N(sc_data_global) +#define sc_data_init JEMALLOC_N(sc_data_init) +#define sc_data_update_slab_size JEMALLOC_N(sc_data_update_slab_size) +#define sz_boot JEMALLOC_N(sz_boot) +#define sz_index2size_tab JEMALLOC_N(sz_index2size_tab) +#define sz_pind2sz_tab JEMALLOC_N(sz_pind2sz_tab) +#define sz_size2index_tab JEMALLOC_N(sz_size2index_tab) +#define nhbins JEMALLOC_N(nhbins) +#define opt_lg_tcache_max JEMALLOC_N(opt_lg_tcache_max) +#define opt_tcache JEMALLOC_N(opt_tcache) +#define tcache_alloc_small_hard JEMALLOC_N(tcache_alloc_small_hard) +#define tcache_arena_associate JEMALLOC_N(tcache_arena_associate) +#define tcache_arena_reassociate JEMALLOC_N(tcache_arena_reassociate) +#define tcache_bin_flush_large JEMALLOC_N(tcache_bin_flush_large) +#define tcache_bin_flush_small JEMALLOC_N(tcache_bin_flush_small) +#define tcache_bin_info JEMALLOC_N(tcache_bin_info) +#define tcache_boot JEMALLOC_N(tcache_boot) +#define tcache_cleanup JEMALLOC_N(tcache_cleanup) +#define tcache_create_explicit JEMALLOC_N(tcache_create_explicit) +#define tcache_event_hard JEMALLOC_N(tcache_event_hard) +#define tcache_flush JEMALLOC_N(tcache_flush) +#define tcache_maxclass JEMALLOC_N(tcache_maxclass) +#define tcache_postfork_child JEMALLOC_N(tcache_postfork_child) +#define tcache_postfork_parent JEMALLOC_N(tcache_postfork_parent) +#define tcache_prefork JEMALLOC_N(tcache_prefork) +#define tcaches JEMALLOC_N(tcaches) +#define tcache_salloc JEMALLOC_N(tcache_salloc) +#define tcaches_create JEMALLOC_N(tcaches_create) +#define tcaches_destroy JEMALLOC_N(tcaches_destroy) +#define tcaches_flush JEMALLOC_N(tcaches_flush) +#define tcache_stats_merge JEMALLOC_N(tcache_stats_merge) +#define tsd_tcache_data_init JEMALLOC_N(tsd_tcache_data_init) +#define tsd_tcache_enabled_data_init JEMALLOC_N(tsd_tcache_enabled_data_init) +#define test_hooks_arena_new_hook JEMALLOC_N(test_hooks_arena_new_hook) +#define test_hooks_libc_hook JEMALLOC_N(test_hooks_libc_hook) +#define malloc_tsd_boot0 JEMALLOC_N(malloc_tsd_boot0) +#define malloc_tsd_boot1 JEMALLOC_N(malloc_tsd_boot1) +#define malloc_tsd_cleanup_register JEMALLOC_N(malloc_tsd_cleanup_register) +#define malloc_tsd_dalloc JEMALLOC_N(malloc_tsd_dalloc) +#define malloc_tsd_malloc JEMALLOC_N(malloc_tsd_malloc) +#define tsd_booted JEMALLOC_N(tsd_booted) +#define tsd_cleanup JEMALLOC_N(tsd_cleanup) +#define tsd_fetch_slow JEMALLOC_N(tsd_fetch_slow) +#define tsd_global_slow JEMALLOC_N(tsd_global_slow) +#define tsd_global_slow_dec JEMALLOC_N(tsd_global_slow_dec) +#define tsd_global_slow_inc JEMALLOC_N(tsd_global_slow_inc) +#define tsd_postfork_child JEMALLOC_N(tsd_postfork_child) +#define tsd_postfork_parent JEMALLOC_N(tsd_postfork_parent) +#define tsd_prefork JEMALLOC_N(tsd_prefork) +#define tsd_slow_update JEMALLOC_N(tsd_slow_update) +#define tsd_state_set JEMALLOC_N(tsd_state_set) +#define tsd_tls JEMALLOC_N(tsd_tls) +#define tsd_tsd JEMALLOC_N(tsd_tsd) +#define witness_depth_error JEMALLOC_N(witness_depth_error) +#define witnesses_cleanup JEMALLOC_N(witnesses_cleanup) +#define witness_init JEMALLOC_N(witness_init) +#define witness_lock_error JEMALLOC_N(witness_lock_error) +#define witness_not_owner_error JEMALLOC_N(witness_not_owner_error) +#define witness_owner_error JEMALLOC_N(witness_owner_error) +#define witness_postfork_child JEMALLOC_N(witness_postfork_child) +#define witness_postfork_parent JEMALLOC_N(witness_postfork_parent) +#define witness_prefork JEMALLOC_N(witness_prefork) diff --git a/deps/jemalloc/include/jemalloc/internal/prof_externs.h b/deps/jemalloc/include/jemalloc/internal/prof_externs.h index 04348696f5..094f3e170a 100644 --- a/deps/jemalloc/include/jemalloc/internal/prof_externs.h +++ b/deps/jemalloc/include/jemalloc/internal/prof_externs.h @@ -14,6 +14,7 @@ extern bool opt_prof_gdump; /* High-water memory dumping. */ extern bool opt_prof_final; /* Final profile dumping. */ extern bool opt_prof_leak; /* Dump leak summary at exit. */ extern bool opt_prof_accum; /* Report cumulative bytes. */ +extern bool opt_prof_log; /* Turn logging on at boot. */ extern char opt_prof_prefix[ /* Minimize memory bloat for non-prof builds. */ #ifdef JEMALLOC_PROF @@ -45,7 +46,8 @@ extern size_t lg_prof_sample; void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated); void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx); -void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx); +void prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize, + prof_tctx_t *tctx); void bt_init(prof_bt_t *bt, void **vec); void prof_backtrace(prof_bt_t *bt); prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt); @@ -89,4 +91,15 @@ void prof_postfork_parent(tsdn_t *tsdn); void prof_postfork_child(tsdn_t *tsdn); void prof_sample_threshold_update(prof_tdata_t *tdata); +bool prof_log_start(tsdn_t *tsdn, const char *filename); +bool prof_log_stop(tsdn_t *tsdn); +#ifdef JEMALLOC_JET +size_t prof_log_bt_count(void); +size_t prof_log_alloc_count(void); +size_t prof_log_thr_count(void); +bool prof_log_is_logging(void); +bool prof_log_rep_check(void); +void prof_log_dummy_set(bool new_value); +#endif + #endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/prof_inlines_a.h b/deps/jemalloc/include/jemalloc/internal/prof_inlines_a.h index eda6839ade..471d9853cf 100644 --- a/deps/jemalloc/include/jemalloc/internal/prof_inlines_a.h +++ b/deps/jemalloc/include/jemalloc/internal/prof_inlines_a.h @@ -4,7 +4,8 @@ #include "jemalloc/internal/mutex.h" static inline bool -prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum, uint64_t accumbytes) { +prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum, + uint64_t accumbytes) { cassert(config_prof); bool overflow; @@ -42,7 +43,8 @@ prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum, uint64_t accumbytes) { } static inline void -prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum, size_t usize) { +prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum, + size_t usize) { cassert(config_prof); /* @@ -55,18 +57,29 @@ prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum, size_t usize) { #ifdef JEMALLOC_ATOMIC_U64 a0 = atomic_load_u64(&prof_accum->accumbytes, ATOMIC_RELAXED); do { - a1 = (a0 >= LARGE_MINCLASS - usize) ? a0 - (LARGE_MINCLASS - - usize) : 0; + a1 = (a0 >= SC_LARGE_MINCLASS - usize) + ? a0 - (SC_LARGE_MINCLASS - usize) : 0; } while (!atomic_compare_exchange_weak_u64(&prof_accum->accumbytes, &a0, a1, ATOMIC_RELAXED, ATOMIC_RELAXED)); #else malloc_mutex_lock(tsdn, &prof_accum->mtx); a0 = prof_accum->accumbytes; - a1 = (a0 >= LARGE_MINCLASS - usize) ? a0 - (LARGE_MINCLASS - usize) : - 0; + a1 = (a0 >= SC_LARGE_MINCLASS - usize) + ? a0 - (SC_LARGE_MINCLASS - usize) : 0; prof_accum->accumbytes = a1; malloc_mutex_unlock(tsdn, &prof_accum->mtx); #endif } +JEMALLOC_ALWAYS_INLINE bool +prof_active_get_unlocked(void) { + /* + * Even if opt_prof is true, sampling can be temporarily disabled by + * setting prof_active to false. No locking is used when reading + * prof_active in the fast path, so there are no guarantees regarding + * how long it will take for all threads to notice state changes. + */ + return prof_active; +} + #endif /* JEMALLOC_INTERNAL_PROF_INLINES_A_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/prof_inlines_b.h b/deps/jemalloc/include/jemalloc/internal/prof_inlines_b.h index d670cb7b8f..8ba8a1e1ff 100644 --- a/deps/jemalloc/include/jemalloc/internal/prof_inlines_b.h +++ b/deps/jemalloc/include/jemalloc/internal/prof_inlines_b.h @@ -1,20 +1,10 @@ #ifndef JEMALLOC_INTERNAL_PROF_INLINES_B_H #define JEMALLOC_INTERNAL_PROF_INLINES_B_H +#include "jemalloc/internal/safety_check.h" #include "jemalloc/internal/sz.h" JEMALLOC_ALWAYS_INLINE bool -prof_active_get_unlocked(void) { - /* - * Even if opt_prof is true, sampling can be temporarily disabled by - * setting prof_active to false. No locking is used when reading - * prof_active in the fast path, so there are no guarantees regarding - * how long it will take for all threads to notice state changes. - */ - return prof_active; -} - -JEMALLOC_ALWAYS_INLINE bool prof_gdump_get_unlocked(void) { /* * No locking is used when reading prof_gdump_val in the fast path, so @@ -72,13 +62,54 @@ prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) { arena_prof_tctx_reset(tsdn, ptr, tctx); } +JEMALLOC_ALWAYS_INLINE nstime_t +prof_alloc_time_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) { + cassert(config_prof); + assert(ptr != NULL); + + return arena_prof_alloc_time_get(tsdn, ptr, alloc_ctx); +} + +JEMALLOC_ALWAYS_INLINE void +prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx, + nstime_t t) { + cassert(config_prof); + assert(ptr != NULL); + + arena_prof_alloc_time_set(tsdn, ptr, alloc_ctx, t); +} + +JEMALLOC_ALWAYS_INLINE bool +prof_sample_check(tsd_t *tsd, size_t usize, bool update) { + ssize_t check = update ? 0 : usize; + + int64_t bytes_until_sample = tsd_bytes_until_sample_get(tsd); + if (update) { + bytes_until_sample -= usize; + if (tsd_nominal(tsd)) { + tsd_bytes_until_sample_set(tsd, bytes_until_sample); + } + } + if (likely(bytes_until_sample >= check)) { + return true; + } + + return false; +} + JEMALLOC_ALWAYS_INLINE bool prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, - prof_tdata_t **tdata_out) { + prof_tdata_t **tdata_out) { prof_tdata_t *tdata; cassert(config_prof); + /* Fastpath: no need to load tdata */ + if (likely(prof_sample_check(tsd, usize, update))) { + return true; + } + + bool booted = tsd_prof_tdata_get(tsd); tdata = prof_tdata_get(tsd, true); if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) { tdata = NULL; @@ -92,21 +123,23 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, return true; } - if (likely(tdata->bytes_until_sample >= usize)) { - if (update) { - tdata->bytes_until_sample -= usize; - } + /* + * If this was the first creation of tdata, then + * prof_tdata_get() reset bytes_until_sample, so decrement and + * check it again + */ + if (!booted && prof_sample_check(tsd, usize, update)) { return true; - } else { - if (tsd_reentrancy_level_get(tsd) > 0) { - return true; - } - /* Compute new sample threshold. */ - if (update) { - prof_sample_threshold_update(tdata); - } - return !tdata->active; } + + if (tsd_reentrancy_level_get(tsd) > 0) { + return true; + } + /* Compute new sample threshold. */ + if (update) { + prof_sample_threshold_update(tdata); + } + return !tdata->active; } JEMALLOC_ALWAYS_INLINE prof_tctx_t * @@ -198,7 +231,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, * counters. */ if (unlikely(old_sampled)) { - prof_free_sampled_object(tsd, old_usize, old_tctx); + prof_free_sampled_object(tsd, ptr, old_usize, old_tctx); } } @@ -210,7 +243,7 @@ prof_free(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx) { assert(usize == isalloc(tsd_tsdn(tsd), ptr)); if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) { - prof_free_sampled_object(tsd, usize, tctx); + prof_free_sampled_object(tsd, ptr, usize, tctx); } } diff --git a/deps/jemalloc/include/jemalloc/internal/prof_structs.h b/deps/jemalloc/include/jemalloc/internal/prof_structs.h index 0d58ae1005..34ed4822b6 100644 --- a/deps/jemalloc/include/jemalloc/internal/prof_structs.h +++ b/deps/jemalloc/include/jemalloc/internal/prof_structs.h @@ -169,7 +169,6 @@ struct prof_tdata_s { /* Sampling state. */ uint64_t prng_state; - uint64_t bytes_until_sample; /* State used to avoid dumping while operating on prof internals. */ bool enq; diff --git a/deps/jemalloc/include/jemalloc/internal/quantum.h b/deps/jemalloc/include/jemalloc/internal/quantum.h new file mode 100644 index 0000000000..821086e992 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/quantum.h @@ -0,0 +1,77 @@ +#ifndef JEMALLOC_INTERNAL_QUANTUM_H +#define JEMALLOC_INTERNAL_QUANTUM_H + +/* + * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size + * classes). + */ +#ifndef LG_QUANTUM +# if (defined(__i386__) || defined(_M_IX86)) +# define LG_QUANTUM 4 +# endif +# ifdef __ia64__ +# define LG_QUANTUM 4 +# endif +# ifdef __alpha__ +# define LG_QUANTUM 4 +# endif +# if (defined(__sparc64__) || defined(__sparcv9) || defined(__sparc_v9__)) +# define LG_QUANTUM 4 +# endif +# if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64)) +# define LG_QUANTUM 4 +# endif +# ifdef __arm__ +# define LG_QUANTUM 3 +# endif +# ifdef __aarch64__ +# define LG_QUANTUM 4 +# endif +# ifdef __hppa__ +# define LG_QUANTUM 4 +# endif +# ifdef __m68k__ +# define LG_QUANTUM 3 +# endif +# ifdef __mips__ +# define LG_QUANTUM 3 +# endif +# ifdef __nios2__ +# define LG_QUANTUM 3 +# endif +# ifdef __or1k__ +# define LG_QUANTUM 3 +# endif +# ifdef __powerpc__ +# define LG_QUANTUM 4 +# endif +# if defined(__riscv) || defined(__riscv__) +# define LG_QUANTUM 4 +# endif +# ifdef __s390__ +# define LG_QUANTUM 4 +# endif +# if (defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || \ + defined(__SH4_SINGLE_ONLY__)) +# define LG_QUANTUM 4 +# endif +# ifdef __tile__ +# define LG_QUANTUM 4 +# endif +# ifdef __le32__ +# define LG_QUANTUM 4 +# endif +# ifndef LG_QUANTUM +# error "Unknown minimum alignment for architecture; specify via " + "--with-lg-quantum" +# endif +#endif + +#define QUANTUM ((size_t)(1U << LG_QUANTUM)) +#define QUANTUM_MASK (QUANTUM - 1) + +/* Return the smallest quantum multiple that is >= a. */ +#define QUANTUM_CEILING(a) \ + (((a) + QUANTUM_MASK) & ~QUANTUM_MASK) + +#endif /* JEMALLOC_INTERNAL_QUANTUM_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/rtree.h b/deps/jemalloc/include/jemalloc/internal/rtree.h index b5d4db3988..16ccbebee7 100644 --- a/deps/jemalloc/include/jemalloc/internal/rtree.h +++ b/deps/jemalloc/include/jemalloc/internal/rtree.h @@ -4,7 +4,7 @@ #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/rtree_tsd.h" -#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/sc.h" #include "jemalloc/internal/tsd.h" /* @@ -31,7 +31,7 @@ # error Unsupported number of significant virtual address bits #endif /* Use compact leaf representation if virtual address encoding allows. */ -#if RTREE_NHIB >= LG_CEIL_NSIZES +#if RTREE_NHIB >= LG_CEIL(SC_NSIZES) # define RTREE_LEAF_COMPACT #endif @@ -170,17 +170,29 @@ rtree_subkey(uintptr_t key, unsigned level) { */ # ifdef RTREE_LEAF_COMPACT JEMALLOC_ALWAYS_INLINE uintptr_t -rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, - bool dependent) { +rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree, + rtree_leaf_elm_t *elm, bool dependent) { return (uintptr_t)atomic_load_p(&elm->le_bits, dependent ? ATOMIC_RELAXED : ATOMIC_ACQUIRE); } JEMALLOC_ALWAYS_INLINE extent_t * rtree_leaf_elm_bits_extent_get(uintptr_t bits) { +# ifdef __aarch64__ + /* + * aarch64 doesn't sign extend the highest virtual address bit to set + * the higher ones. Instead, the high bits gets zeroed. + */ + uintptr_t high_bit_mask = ((uintptr_t)1 << LG_VADDR) - 1; + /* Mask off the slab bit. */ + uintptr_t low_bit_mask = ~(uintptr_t)1; + uintptr_t mask = high_bit_mask & low_bit_mask; + return (extent_t *)(bits & mask); +# else /* Restore sign-extended high bits, mask slab bit. */ return (extent_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB) >> RTREE_NHIB) & ~((uintptr_t)0x1)); +# endif } JEMALLOC_ALWAYS_INLINE szind_t @@ -196,8 +208,8 @@ rtree_leaf_elm_bits_slab_get(uintptr_t bits) { # endif JEMALLOC_ALWAYS_INLINE extent_t * -rtree_leaf_elm_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, - bool dependent) { +rtree_leaf_elm_extent_read(tsdn_t *tsdn, rtree_t *rtree, + rtree_leaf_elm_t *elm, bool dependent) { #ifdef RTREE_LEAF_COMPACT uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent); return rtree_leaf_elm_bits_extent_get(bits); @@ -209,8 +221,8 @@ rtree_leaf_elm_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, } JEMALLOC_ALWAYS_INLINE szind_t -rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, - bool dependent) { +rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree, + rtree_leaf_elm_t *elm, bool dependent) { #ifdef RTREE_LEAF_COMPACT uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent); return rtree_leaf_elm_bits_szind_get(bits); @@ -221,8 +233,8 @@ rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, } JEMALLOC_ALWAYS_INLINE bool -rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, - bool dependent) { +rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree, + rtree_leaf_elm_t *elm, bool dependent) { #ifdef RTREE_LEAF_COMPACT uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent); return rtree_leaf_elm_bits_slab_get(bits); @@ -233,8 +245,8 @@ rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, } static inline void -rtree_leaf_elm_extent_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, - extent_t *extent) { +rtree_leaf_elm_extent_write(tsdn_t *tsdn, rtree_t *rtree, + rtree_leaf_elm_t *elm, extent_t *extent) { #ifdef RTREE_LEAF_COMPACT uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, true); uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) << @@ -247,9 +259,9 @@ rtree_leaf_elm_extent_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, } static inline void -rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, - szind_t szind) { - assert(szind <= NSIZES); +rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree, + rtree_leaf_elm_t *elm, szind_t szind) { + assert(szind <= SC_NSIZES); #ifdef RTREE_LEAF_COMPACT uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, @@ -265,8 +277,8 @@ rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, } static inline void -rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, - bool slab) { +rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree, + rtree_leaf_elm_t *elm, bool slab) { #ifdef RTREE_LEAF_COMPACT uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, true); @@ -280,8 +292,8 @@ rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, } static inline void -rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, - extent_t *extent, szind_t szind, bool slab) { +rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, + rtree_leaf_elm_t *elm, extent_t *extent, szind_t szind, bool slab) { #ifdef RTREE_LEAF_COMPACT uintptr_t bits = ((uintptr_t)szind << LG_VADDR) | ((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1)) | @@ -301,7 +313,7 @@ rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, static inline void rtree_leaf_elm_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, szind_t szind, bool slab) { - assert(!slab || szind < NBINS); + assert(!slab || szind < SC_NBINS); /* * The caller implicitly assures that it is the only writer to the szind @@ -417,7 +429,7 @@ rtree_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, dependent); if (!dependent && elm == NULL) { - return NSIZES; + return SC_NSIZES; } return rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent); } @@ -440,6 +452,42 @@ rtree_extent_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, return false; } +/* + * Try to read szind_slab from the L1 cache. Returns true on a hit, + * and fills in r_szind and r_slab. Otherwise returns false. + * + * Key is allowed to be NULL in order to save an extra branch on the + * fastpath. returns false in this case. + */ +JEMALLOC_ALWAYS_INLINE bool +rtree_szind_slab_read_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, + uintptr_t key, szind_t *r_szind, bool *r_slab) { + rtree_leaf_elm_t *elm; + + size_t slot = rtree_cache_direct_map(key); + uintptr_t leafkey = rtree_leafkey(key); + assert(leafkey != RTREE_LEAFKEY_INVALID); + + if (likely(rtree_ctx->cache[slot].leafkey == leafkey)) { + rtree_leaf_elm_t *leaf = rtree_ctx->cache[slot].leaf; + assert(leaf != NULL); + uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1); + elm = &leaf[subkey]; + +#ifdef RTREE_LEAF_COMPACT + uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, + elm, true); + *r_szind = rtree_leaf_elm_bits_szind_get(bits); + *r_slab = rtree_leaf_elm_bits_slab_get(bits); +#else + *r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, true); + *r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, true); +#endif + return true; + } else { + return false; + } +} JEMALLOC_ALWAYS_INLINE bool rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, szind_t *r_szind, bool *r_slab) { @@ -448,15 +496,21 @@ rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, if (!dependent && elm == NULL) { return true; } +#ifdef RTREE_LEAF_COMPACT + uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent); + *r_szind = rtree_leaf_elm_bits_szind_get(bits); + *r_slab = rtree_leaf_elm_bits_slab_get(bits); +#else *r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent); *r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, dependent); +#endif return false; } static inline void rtree_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key, szind_t szind, bool slab) { - assert(!slab || szind < NBINS); + assert(!slab || szind < SC_NBINS); rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true); rtree_leaf_elm_szind_slab_update(tsdn, rtree, elm, szind, slab); @@ -468,7 +522,7 @@ rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true); assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false) != NULL); - rtree_leaf_elm_write(tsdn, rtree, elm, NULL, NSIZES, false); + rtree_leaf_elm_write(tsdn, rtree, elm, NULL, SC_NSIZES, false); } #endif /* JEMALLOC_INTERNAL_RTREE_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/rtree_tsd.h b/deps/jemalloc/include/jemalloc/internal/rtree_tsd.h index 3cdc862548..562e29297a 100644 --- a/deps/jemalloc/include/jemalloc/internal/rtree_tsd.h +++ b/deps/jemalloc/include/jemalloc/internal/rtree_tsd.h @@ -26,7 +26,7 @@ * Zero initializer required for tsd initialization only. Proper initialization * done via rtree_ctx_data_init(). */ -#define RTREE_CTX_ZERO_INITIALIZER {{{0}}} +#define RTREE_CTX_ZERO_INITIALIZER {{{0, 0}}, {{0, 0}}} typedef struct rtree_leaf_elm_s rtree_leaf_elm_t; diff --git a/deps/jemalloc/include/jemalloc/internal/safety_check.h b/deps/jemalloc/include/jemalloc/internal/safety_check.h new file mode 100644 index 0000000000..53339ac12f --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/safety_check.h @@ -0,0 +1,26 @@ +#ifndef JEMALLOC_INTERNAL_SAFETY_CHECK_H +#define JEMALLOC_INTERNAL_SAFETY_CHECK_H + +void safety_check_fail(const char *format, ...); +/* Can set to NULL for a default. */ +void safety_check_set_abort(void (*abort_fn)()); + +JEMALLOC_ALWAYS_INLINE void +safety_check_set_redzone(void *ptr, size_t usize, size_t bumped_usize) { + assert(usize < bumped_usize); + for (size_t i = usize; i < bumped_usize && i < usize + 32; ++i) { + *((unsigned char *)ptr + i) = 0xBC; + } +} + +JEMALLOC_ALWAYS_INLINE void +safety_check_verify_redzone(const void *ptr, size_t usize, size_t bumped_usize) +{ + for (size_t i = usize; i < bumped_usize && i < usize + 32; ++i) { + if (unlikely(*((unsigned char *)ptr + i) != 0xBC)) { + safety_check_fail("Use after free error\n"); + } + } +} + +#endif /*JEMALLOC_INTERNAL_SAFETY_CHECK_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/sc.h b/deps/jemalloc/include/jemalloc/internal/sc.h new file mode 100644 index 0000000000..9a099d8b64 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/sc.h @@ -0,0 +1,333 @@ +#ifndef JEMALLOC_INTERNAL_SC_H +#define JEMALLOC_INTERNAL_SC_H + +#include "jemalloc/internal/jemalloc_internal_types.h" + +/* + * Size class computations: + * + * These are a little tricky; we'll first start by describing how things + * generally work, and then describe some of the details. + * + * Ignore the first few size classes for a moment. We can then split all the + * remaining size classes into groups. The size classes in a group are spaced + * such that they cover allocation request sizes in a power-of-2 range. The + * power of two is called the base of the group, and the size classes in it + * satisfy allocations in the half-open range (base, base * 2]. There are + * SC_NGROUP size classes in each group, equally spaced in the range, so that + * each one covers allocations for base / SC_NGROUP possible allocation sizes. + * We call that value (base / SC_NGROUP) the delta of the group. Each size class + * is delta larger than the one before it (including the initial size class in a + * group, which is delta larger than base, the largest size class in the + * previous group). + * To make the math all work out nicely, we require that SC_NGROUP is a power of + * two, and define it in terms of SC_LG_NGROUP. We'll often talk in terms of + * lg_base and lg_delta. For each of these groups then, we have that + * lg_delta == lg_base - SC_LG_NGROUP. + * The size classes in a group with a given lg_base and lg_delta (which, recall, + * can be computed from lg_base for these groups) are therefore: + * base + 1 * delta + * which covers allocations in (base, base + 1 * delta] + * base + 2 * delta + * which covers allocations in (base + 1 * delta, base + 2 * delta]. + * base + 3 * delta + * which covers allocations in (base + 2 * delta, base + 3 * delta]. + * ... + * base + SC_NGROUP * delta ( == 2 * base) + * which covers allocations in (base + (SC_NGROUP - 1) * delta, 2 * base]. + * (Note that currently SC_NGROUP is always 4, so the "..." is empty in + * practice.) + * Note that the last size class in the group is the next power of two (after + * base), so that we've set up the induction correctly for the next group's + * selection of delta. + * + * Now, let's start considering the first few size classes. Two extra constants + * come into play here: LG_QUANTUM and SC_LG_TINY_MIN. LG_QUANTUM ensures + * correct platform alignment; all objects of size (1 << LG_QUANTUM) or larger + * are at least (1 << LG_QUANTUM) aligned; this can be used to ensure that we + * never return improperly aligned memory, by making (1 << LG_QUANTUM) equal the + * highest required alignment of a platform. For allocation sizes smaller than + * (1 << LG_QUANTUM) though, we can be more relaxed (since we don't support + * platforms with types with alignment larger than their size). To allow such + * allocations (without wasting space unnecessarily), we introduce tiny size + * classes; one per power of two, up until we hit the quantum size. There are + * therefore LG_QUANTUM - SC_LG_TINY_MIN such size classes. + * + * Next, we have a size class of size (1 << LG_QUANTUM). This can't be the + * start of a group in the sense we described above (covering a power of two + * range) since, if we divided into it to pick a value of delta, we'd get a + * delta smaller than (1 << LG_QUANTUM) for sizes >= (1 << LG_QUANTUM), which + * is against the rules. + * + * The first base we can divide by SC_NGROUP while still being at least + * (1 << LG_QUANTUM) is SC_NGROUP * (1 << LG_QUANTUM). We can get there by + * having SC_NGROUP size classes, spaced (1 << LG_QUANTUM) apart. These size + * classes are: + * 1 * (1 << LG_QUANTUM) + * 2 * (1 << LG_QUANTUM) + * 3 * (1 << LG_QUANTUM) + * ... (although, as above, this "..." is empty in practice) + * SC_NGROUP * (1 << LG_QUANTUM). + * + * There are SC_NGROUP of these size classes, so we can regard it as a sort of + * pseudo-group, even though it spans multiple powers of 2, is divided + * differently, and both starts and ends on a power of 2 (as opposed to just + * ending). SC_NGROUP is itself a power of two, so the first group after the + * pseudo-group has the power-of-two base SC_NGROUP * (1 << LG_QUANTUM), for a + * lg_base of LG_QUANTUM + SC_LG_NGROUP. We can divide this base into SC_NGROUP + * sizes without violating our LG_QUANTUM requirements, so we can safely set + * lg_delta = lg_base - SC_LG_GROUP (== LG_QUANTUM). + * + * So, in order, the size classes are: + * + * Tiny size classes: + * - Count: LG_QUANTUM - SC_LG_TINY_MIN. + * - Sizes: + * 1 << SC_LG_TINY_MIN + * 1 << (SC_LG_TINY_MIN + 1) + * 1 << (SC_LG_TINY_MIN + 2) + * ... + * 1 << (LG_QUANTUM - 1) + * + * Initial pseudo-group: + * - Count: SC_NGROUP + * - Sizes: + * 1 * (1 << LG_QUANTUM) + * 2 * (1 << LG_QUANTUM) + * 3 * (1 << LG_QUANTUM) + * ... + * SC_NGROUP * (1 << LG_QUANTUM) + * + * Regular group 0: + * - Count: SC_NGROUP + * - Sizes: + * (relative to lg_base of LG_QUANTUM + SC_LG_NGROUP and lg_delta of + * lg_base - SC_LG_NGROUP) + * (1 << lg_base) + 1 * (1 << lg_delta) + * (1 << lg_base) + 2 * (1 << lg_delta) + * (1 << lg_base) + 3 * (1 << lg_delta) + * ... + * (1 << lg_base) + SC_NGROUP * (1 << lg_delta) [ == (1 << (lg_base + 1)) ] + * + * Regular group 1: + * - Count: SC_NGROUP + * - Sizes: + * (relative to lg_base of LG_QUANTUM + SC_LG_NGROUP + 1 and lg_delta of + * lg_base - SC_LG_NGROUP) + * (1 << lg_base) + 1 * (1 << lg_delta) + * (1 << lg_base) + 2 * (1 << lg_delta) + * (1 << lg_base) + 3 * (1 << lg_delta) + * ... + * (1 << lg_base) + SC_NGROUP * (1 << lg_delta) [ == (1 << (lg_base + 1)) ] + * + * ... + * + * Regular group N: + * - Count: SC_NGROUP + * - Sizes: + * (relative to lg_base of LG_QUANTUM + SC_LG_NGROUP + N and lg_delta of + * lg_base - SC_LG_NGROUP) + * (1 << lg_base) + 1 * (1 << lg_delta) + * (1 << lg_base) + 2 * (1 << lg_delta) + * (1 << lg_base) + 3 * (1 << lg_delta) + * ... + * (1 << lg_base) + SC_NGROUP * (1 << lg_delta) [ == (1 << (lg_base + 1)) ] + * + * + * Representation of metadata: + * To make the math easy, we'll mostly work in lg quantities. We record lg_base, + * lg_delta, and ndelta (i.e. number of deltas above the base) on a + * per-size-class basis, and maintain the invariant that, across all size + * classes, size == (1 << lg_base) + ndelta * (1 << lg_delta). + * + * For regular groups (i.e. those with lg_base >= LG_QUANTUM + SC_LG_NGROUP), + * lg_delta is lg_base - SC_LG_NGROUP, and ndelta goes from 1 to SC_NGROUP. + * + * For the initial tiny size classes (if any), lg_base is lg(size class size). + * lg_delta is lg_base for the first size class, and lg_base - 1 for all + * subsequent ones. ndelta is always 0. + * + * For the pseudo-group, if there are no tiny size classes, then we set + * lg_base == LG_QUANTUM, lg_delta == LG_QUANTUM, and have ndelta range from 0 + * to SC_NGROUP - 1. (Note that delta == base, so base + (SC_NGROUP - 1) * delta + * is just SC_NGROUP * base, or (1 << (SC_LG_NGROUP + LG_QUANTUM)), so we do + * indeed get a power of two that way). If there *are* tiny size classes, then + * the first size class needs to have lg_delta relative to the largest tiny size + * class. We therefore set lg_base == LG_QUANTUM - 1, + * lg_delta == LG_QUANTUM - 1, and ndelta == 1, keeping the rest of the + * pseudo-group the same. + * + * + * Other terminology: + * "Small" size classes mean those that are allocated out of bins, which is the + * same as those that are slab allocated. + * "Large" size classes are those that are not small. The cutoff for counting as + * large is page size * group size. + */ + +/* + * Size class N + (1 << SC_LG_NGROUP) twice the size of size class N. + */ +#define SC_LG_NGROUP 2 +#define SC_LG_TINY_MIN 3 + +#if SC_LG_TINY_MIN == 0 +/* The div module doesn't support division by 1, which this would require. */ +#error "Unsupported LG_TINY_MIN" +#endif + +/* + * The definitions below are all determined by the above settings and system + * characteristics. + */ +#define SC_NGROUP (1ULL << SC_LG_NGROUP) +#define SC_PTR_BITS ((1ULL << LG_SIZEOF_PTR) * 8) +#define SC_NTINY (LG_QUANTUM - SC_LG_TINY_MIN) +#define SC_LG_TINY_MAXCLASS (LG_QUANTUM > SC_LG_TINY_MIN ? LG_QUANTUM - 1 : -1) +#define SC_NPSEUDO SC_NGROUP +#define SC_LG_FIRST_REGULAR_BASE (LG_QUANTUM + SC_LG_NGROUP) +/* + * We cap allocations to be less than 2 ** (ptr_bits - 1), so the highest base + * we need is 2 ** (ptr_bits - 2). (This also means that the last group is 1 + * size class shorter than the others). + * We could probably save some space in arenas by capping this at LG_VADDR size. + */ +#define SC_LG_BASE_MAX (SC_PTR_BITS - 2) +#define SC_NREGULAR (SC_NGROUP * \ + (SC_LG_BASE_MAX - SC_LG_FIRST_REGULAR_BASE + 1) - 1) +#define SC_NSIZES (SC_NTINY + SC_NPSEUDO + SC_NREGULAR) + +/* The number of size classes that are a multiple of the page size. */ +#define SC_NPSIZES ( \ + /* Start with all the size classes. */ \ + SC_NSIZES \ + /* Subtract out those groups with too small a base. */ \ + - (LG_PAGE - 1 - SC_LG_FIRST_REGULAR_BASE) * SC_NGROUP \ + /* And the pseudo-group. */ \ + - SC_NPSEUDO \ + /* And the tiny group. */ \ + - SC_NTINY \ + /* Sizes where ndelta*delta is not a multiple of the page size. */ \ + - (SC_LG_NGROUP * SC_NGROUP)) +/* + * Note that the last line is computed as the sum of the second column in the + * following table: + * lg(base) | count of sizes to exclude + * ------------------------------|----------------------------- + * LG_PAGE - 1 | SC_NGROUP - 1 + * LG_PAGE | SC_NGROUP - 1 + * LG_PAGE + 1 | SC_NGROUP - 2 + * LG_PAGE + 2 | SC_NGROUP - 4 + * ... | ... + * LG_PAGE + (SC_LG_NGROUP - 1) | SC_NGROUP - (SC_NGROUP / 2) + */ + +/* + * We declare a size class is binnable if size < page size * group. Or, in other + * words, lg(size) < lg(page size) + lg(group size). + */ +#define SC_NBINS ( \ + /* Sub-regular size classes. */ \ + SC_NTINY + SC_NPSEUDO \ + /* Groups with lg_regular_min_base <= lg_base <= lg_base_max */ \ + + SC_NGROUP * (LG_PAGE + SC_LG_NGROUP - SC_LG_FIRST_REGULAR_BASE) \ + /* Last SC of the last group hits the bound exactly; exclude it. */ \ + - 1) + +/* + * The size2index_tab lookup table uses uint8_t to encode each bin index, so we + * cannot support more than 256 small size classes. + */ +#if (SC_NBINS > 256) +# error "Too many small size classes" +#endif + +/* The largest size class in the lookup table. */ +#define SC_LOOKUP_MAXCLASS ((size_t)1 << 12) + +/* Internal, only used for the definition of SC_SMALL_MAXCLASS. */ +#define SC_SMALL_MAX_BASE ((size_t)1 << (LG_PAGE + SC_LG_NGROUP - 1)) +#define SC_SMALL_MAX_DELTA ((size_t)1 << (LG_PAGE - 1)) + +/* The largest size class allocated out of a slab. */ +#define SC_SMALL_MAXCLASS (SC_SMALL_MAX_BASE \ + + (SC_NGROUP - 1) * SC_SMALL_MAX_DELTA) + +/* The smallest size class not allocated out of a slab. */ +#define SC_LARGE_MINCLASS ((size_t)1ULL << (LG_PAGE + SC_LG_NGROUP)) +#define SC_LG_LARGE_MINCLASS (LG_PAGE + SC_LG_NGROUP) + +/* Internal; only used for the definition of SC_LARGE_MAXCLASS. */ +#define SC_MAX_BASE ((size_t)1 << (SC_PTR_BITS - 2)) +#define SC_MAX_DELTA ((size_t)1 << (SC_PTR_BITS - 2 - SC_LG_NGROUP)) + +/* The largest size class supported. */ +#define SC_LARGE_MAXCLASS (SC_MAX_BASE + (SC_NGROUP - 1) * SC_MAX_DELTA) + +typedef struct sc_s sc_t; +struct sc_s { + /* Size class index, or -1 if not a valid size class. */ + int index; + /* Lg group base size (no deltas added). */ + int lg_base; + /* Lg delta to previous size class. */ + int lg_delta; + /* Delta multiplier. size == 1<<lg_base + ndelta<<lg_delta */ + int ndelta; + /* + * True if the size class is a multiple of the page size, false + * otherwise. + */ + bool psz; + /* + * True if the size class is a small, bin, size class. False otherwise. + */ + bool bin; + /* The slab page count if a small bin size class, 0 otherwise. */ + int pgs; + /* Same as lg_delta if a lookup table size class, 0 otherwise. */ + int lg_delta_lookup; +}; + +typedef struct sc_data_s sc_data_t; +struct sc_data_s { + /* Number of tiny size classes. */ + unsigned ntiny; + /* Number of bins supported by the lookup table. */ + int nlbins; + /* Number of small size class bins. */ + int nbins; + /* Number of size classes. */ + int nsizes; + /* Number of bits required to store NSIZES. */ + int lg_ceil_nsizes; + /* Number of size classes that are a multiple of (1U << LG_PAGE). */ + unsigned npsizes; + /* Lg of maximum tiny size class (or -1, if none). */ + int lg_tiny_maxclass; + /* Maximum size class included in lookup table. */ + size_t lookup_maxclass; + /* Maximum small size class. */ + size_t small_maxclass; + /* Lg of minimum large size class. */ + int lg_large_minclass; + /* The minimum large size class. */ + size_t large_minclass; + /* Maximum (large) size class. */ + size_t large_maxclass; + /* True if the sc_data_t has been initialized (for debugging only). */ + bool initialized; + + sc_t sc[SC_NSIZES]; +}; + +void sc_data_init(sc_data_t *data); +/* + * Updates slab sizes in [begin, end] to be pgs pages in length, if possible. + * Otherwise, does its best to accomodate the request. + */ +void sc_data_update_slab_size(sc_data_t *data, size_t begin, size_t end, + int pgs); +void sc_boot(sc_data_t *data); + +#endif /* JEMALLOC_INTERNAL_SC_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/seq.h b/deps/jemalloc/include/jemalloc/internal/seq.h new file mode 100644 index 0000000000..ef2df4c6ee --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/seq.h @@ -0,0 +1,55 @@ +#ifndef JEMALLOC_INTERNAL_SEQ_H +#define JEMALLOC_INTERNAL_SEQ_H + +#include "jemalloc/internal/atomic.h" + +/* + * A simple seqlock implementation. + */ + +#define seq_define(type, short_type) \ +typedef struct { \ + atomic_zu_t seq; \ + atomic_zu_t data[ \ + (sizeof(type) + sizeof(size_t) - 1) / sizeof(size_t)]; \ +} seq_##short_type##_t; \ + \ +/* \ + * No internal synchronization -- the caller must ensure that there's \ + * only a single writer at a time. \ + */ \ +static inline void \ +seq_store_##short_type(seq_##short_type##_t *dst, type *src) { \ + size_t buf[sizeof(dst->data) / sizeof(size_t)]; \ + buf[sizeof(buf) / sizeof(size_t) - 1] = 0; \ + memcpy(buf, src, sizeof(type)); \ + size_t old_seq = atomic_load_zu(&dst->seq, ATOMIC_RELAXED); \ + atomic_store_zu(&dst->seq, old_seq + 1, ATOMIC_RELAXED); \ + atomic_fence(ATOMIC_RELEASE); \ + for (size_t i = 0; i < sizeof(buf) / sizeof(size_t); i++) { \ + atomic_store_zu(&dst->data[i], buf[i], ATOMIC_RELAXED); \ + } \ + atomic_store_zu(&dst->seq, old_seq + 2, ATOMIC_RELEASE); \ +} \ + \ +/* Returns whether or not the read was consistent. */ \ +static inline bool \ +seq_try_load_##short_type(type *dst, seq_##short_type##_t *src) { \ + size_t buf[sizeof(src->data) / sizeof(size_t)]; \ + size_t seq1 = atomic_load_zu(&src->seq, ATOMIC_ACQUIRE); \ + if (seq1 % 2 != 0) { \ + return false; \ + } \ + for (size_t i = 0; i < sizeof(buf) / sizeof(size_t); i++) { \ + buf[i] = atomic_load_zu(&src->data[i], ATOMIC_RELAXED); \ + } \ + atomic_fence(ATOMIC_ACQUIRE); \ + size_t seq2 = atomic_load_zu(&src->seq, ATOMIC_RELAXED); \ + if (seq1 != seq2) { \ + return false; \ + } \ + memcpy(dst, buf, sizeof(type)); \ + return true; \ +} + +#endif /* JEMALLOC_INTERNAL_SEQ_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/spin.h b/deps/jemalloc/include/jemalloc/internal/spin.h index e2afc98cfd..22804c687f 100644 --- a/deps/jemalloc/include/jemalloc/internal/spin.h +++ b/deps/jemalloc/include/jemalloc/internal/spin.h @@ -1,25 +1,29 @@ #ifndef JEMALLOC_INTERNAL_SPIN_H #define JEMALLOC_INTERNAL_SPIN_H -#ifdef JEMALLOC_SPIN_C_ -# define SPIN_INLINE extern inline -#else -# define SPIN_INLINE inline -#endif - #define SPIN_INITIALIZER {0U} typedef struct { unsigned iteration; } spin_t; -SPIN_INLINE void +static inline void +spin_cpu_spinwait() { +# if HAVE_CPU_SPINWAIT + CPU_SPINWAIT; +# else + volatile int x = 0; + x = x; +# endif +} + +static inline void spin_adaptive(spin_t *spin) { volatile uint32_t i; if (spin->iteration < 5) { for (i = 0; i < (1U << spin->iteration); i++) { - CPU_SPINWAIT; + spin_cpu_spinwait(); } spin->iteration++; } else { diff --git a/deps/jemalloc/include/jemalloc/internal/stats.h b/deps/jemalloc/include/jemalloc/internal/stats.h index 1198779ab9..3b9e0eac12 100644 --- a/deps/jemalloc/include/jemalloc/internal/stats.h +++ b/deps/jemalloc/include/jemalloc/internal/stats.h @@ -1,12 +1,6 @@ #ifndef JEMALLOC_INTERNAL_STATS_H #define JEMALLOC_INTERNAL_STATS_H -#include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/mutex_prof.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/size_classes.h" -#include "jemalloc/internal/stats_tsd.h" - /* OPTION(opt, var_name, default, set_value_to) */ #define STATS_PRINT_OPTIONS \ OPTION('J', json, false, true) \ @@ -16,7 +10,8 @@ OPTION('a', unmerged, config_stats, false) \ OPTION('b', bins, true, false) \ OPTION('l', large, true, false) \ - OPTION('x', mutex, true, false) + OPTION('x', mutex, true, false) \ + OPTION('e', extents, true, false) enum { #define OPTION(o, v, d, s) stats_print_option_num_##v, @@ -33,132 +28,4 @@ extern char opt_stats_print_opts[stats_print_tot_num_options+1]; void stats_print(void (*write_cb)(void *, const char *), void *cbopaque, const char *opts); -/* - * In those architectures that support 64-bit atomics, we use atomic updates for - * our 64-bit values. Otherwise, we use a plain uint64_t and synchronize - * externally. - */ -#ifdef JEMALLOC_ATOMIC_U64 -typedef atomic_u64_t arena_stats_u64_t; -#else -/* Must hold the arena stats mutex while reading atomically. */ -typedef uint64_t arena_stats_u64_t; -#endif - -typedef struct malloc_bin_stats_s { - /* - * Total number of allocation/deallocation requests served directly by - * the bin. Note that tcache may allocate an object, then recycle it - * many times, resulting many increments to nrequests, but only one - * each to nmalloc and ndalloc. - */ - uint64_t nmalloc; - uint64_t ndalloc; - - /* - * Number of allocation requests that correspond to the size of this - * bin. This includes requests served by tcache, though tcache only - * periodically merges into this counter. - */ - uint64_t nrequests; - - /* - * Current number of regions of this size class, including regions - * currently cached by tcache. - */ - size_t curregs; - - /* Number of tcache fills from this bin. */ - uint64_t nfills; - - /* Number of tcache flushes to this bin. */ - uint64_t nflushes; - - /* Total number of slabs created for this bin's size class. */ - uint64_t nslabs; - - /* - * Total number of slabs reused by extracting them from the slabs heap - * for this bin's size class. - */ - uint64_t reslabs; - - /* Current number of slabs in this bin. */ - size_t curslabs; - - mutex_prof_data_t mutex_data; -} malloc_bin_stats_t; - -typedef struct malloc_large_stats_s { - /* - * Total number of allocation/deallocation requests served directly by - * the arena. - */ - arena_stats_u64_t nmalloc; - arena_stats_u64_t ndalloc; - - /* - * Number of allocation requests that correspond to this size class. - * This includes requests served by tcache, though tcache only - * periodically merges into this counter. - */ - arena_stats_u64_t nrequests; /* Partially derived. */ - - /* Current number of allocations of this size class. */ - size_t curlextents; /* Derived. */ -} malloc_large_stats_t; - -typedef struct decay_stats_s { - /* Total number of purge sweeps. */ - arena_stats_u64_t npurge; - /* Total number of madvise calls made. */ - arena_stats_u64_t nmadvise; - /* Total number of pages purged. */ - arena_stats_u64_t purged; -} decay_stats_t; - -/* - * Arena stats. Note that fields marked "derived" are not directly maintained - * within the arena code; rather their values are derived during stats merge - * requests. - */ -typedef struct arena_stats_s { -#ifndef JEMALLOC_ATOMIC_U64 - malloc_mutex_t mtx; -#endif - - /* Number of bytes currently mapped, excluding retained memory. */ - atomic_zu_t mapped; /* Partially derived. */ - - /* - * Number of unused virtual memory bytes currently retained. Retained - * bytes are technically mapped (though always decommitted or purged), - * but they are excluded from the mapped statistic (above). - */ - atomic_zu_t retained; /* Derived. */ - - decay_stats_t decay_dirty; - decay_stats_t decay_muzzy; - - atomic_zu_t base; /* Derived. */ - atomic_zu_t internal; - atomic_zu_t resident; /* Derived. */ - - atomic_zu_t allocated_large; /* Derived. */ - arena_stats_u64_t nmalloc_large; /* Derived. */ - arena_stats_u64_t ndalloc_large; /* Derived. */ - arena_stats_u64_t nrequests_large; /* Derived. */ - - /* Number of bytes cached in tcache associated with this arena. */ - atomic_zu_t tcache_bytes; /* Derived. */ - - mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes]; - - /* One element for each large size class. */ - malloc_large_stats_t lstats[NSIZES - NBINS]; - - /* Arena uptime. */ - nstime_t uptime; -} arena_stats_t; - #endif /* JEMALLOC_INTERNAL_STATS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/sz.h b/deps/jemalloc/include/jemalloc/internal/sz.h index 7f640d55ad..68e558abfe 100644 --- a/deps/jemalloc/include/jemalloc/internal/sz.h +++ b/deps/jemalloc/include/jemalloc/internal/sz.h @@ -3,7 +3,7 @@ #include "jemalloc/internal/bit_util.h" #include "jemalloc/internal/pages.h" -#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/sc.h" #include "jemalloc/internal/util.h" /* @@ -26,18 +26,18 @@ * sz_pind2sz_tab encodes the same information as could be computed by * sz_pind2sz_compute(). */ -extern size_t const sz_pind2sz_tab[NPSIZES+1]; +extern size_t sz_pind2sz_tab[SC_NPSIZES + 1]; /* * sz_index2size_tab encodes the same information as could be computed (at * unacceptable cost in some code paths) by sz_index2size_compute(). */ -extern size_t const sz_index2size_tab[NSIZES]; +extern size_t sz_index2size_tab[SC_NSIZES]; /* * sz_size2index_tab is a compact lookup table that rounds request sizes up to * size classes. In order to reduce cache footprint, the table is compressed, * and all accesses are via sz_size2index(). */ -extern uint8_t const sz_size2index_tab[]; +extern uint8_t sz_size2index_tab[]; static const size_t sz_large_pad = #ifdef JEMALLOC_CACHE_OBLIVIOUS @@ -47,49 +47,47 @@ static const size_t sz_large_pad = #endif ; +extern void sz_boot(const sc_data_t *sc_data); + JEMALLOC_ALWAYS_INLINE pszind_t sz_psz2ind(size_t psz) { - if (unlikely(psz > LARGE_MAXCLASS)) { - return NPSIZES; + if (unlikely(psz > SC_LARGE_MAXCLASS)) { + return SC_NPSIZES; } - { - pszind_t x = lg_floor((psz<<1)-1); - pszind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_PAGE) ? 0 : x - - (LG_SIZE_CLASS_GROUP + LG_PAGE); - pszind_t grp = shift << LG_SIZE_CLASS_GROUP; + pszind_t x = lg_floor((psz<<1)-1); + pszind_t shift = (x < SC_LG_NGROUP + LG_PAGE) ? + 0 : x - (SC_LG_NGROUP + LG_PAGE); + pszind_t grp = shift << SC_LG_NGROUP; - pszind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ? - LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1; + pszind_t lg_delta = (x < SC_LG_NGROUP + LG_PAGE + 1) ? + LG_PAGE : x - SC_LG_NGROUP - 1; - size_t delta_inverse_mask = ZD(-1) << lg_delta; - pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) & - ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); + size_t delta_inverse_mask = ZU(-1) << lg_delta; + pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) & + ((ZU(1) << SC_LG_NGROUP) - 1); - pszind_t ind = grp + mod; - return ind; - } + pszind_t ind = grp + mod; + return ind; } static inline size_t sz_pind2sz_compute(pszind_t pind) { - if (unlikely(pind == NPSIZES)) { - return LARGE_MAXCLASS + PAGE; + if (unlikely(pind == SC_NPSIZES)) { + return SC_LARGE_MAXCLASS + PAGE; } - { - size_t grp = pind >> LG_SIZE_CLASS_GROUP; - size_t mod = pind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); + size_t grp = pind >> SC_LG_NGROUP; + size_t mod = pind & ((ZU(1) << SC_LG_NGROUP) - 1); - size_t grp_size_mask = ~((!!grp)-1); - size_t grp_size = ((ZU(1) << (LG_PAGE + - (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask; + size_t grp_size_mask = ~((!!grp)-1); + size_t grp_size = ((ZU(1) << (LG_PAGE + (SC_LG_NGROUP-1))) << grp) + & grp_size_mask; - size_t shift = (grp == 0) ? 1 : grp; - size_t lg_delta = shift + (LG_PAGE-1); - size_t mod_size = (mod+1) << lg_delta; + size_t shift = (grp == 0) ? 1 : grp; + size_t lg_delta = shift + (LG_PAGE-1); + size_t mod_size = (mod+1) << lg_delta; - size_t sz = grp_size + mod_size; - return sz; - } + size_t sz = grp_size + mod_size; + return sz; } static inline size_t @@ -101,70 +99,70 @@ sz_pind2sz_lookup(pszind_t pind) { static inline size_t sz_pind2sz(pszind_t pind) { - assert(pind < NPSIZES+1); + assert(pind < SC_NPSIZES + 1); return sz_pind2sz_lookup(pind); } static inline size_t sz_psz2u(size_t psz) { - if (unlikely(psz > LARGE_MAXCLASS)) { - return LARGE_MAXCLASS + PAGE; - } - { - size_t x = lg_floor((psz<<1)-1); - size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ? - LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1; - size_t delta = ZU(1) << lg_delta; - size_t delta_mask = delta - 1; - size_t usize = (psz + delta_mask) & ~delta_mask; - return usize; + if (unlikely(psz > SC_LARGE_MAXCLASS)) { + return SC_LARGE_MAXCLASS + PAGE; } + size_t x = lg_floor((psz<<1)-1); + size_t lg_delta = (x < SC_LG_NGROUP + LG_PAGE + 1) ? + LG_PAGE : x - SC_LG_NGROUP - 1; + size_t delta = ZU(1) << lg_delta; + size_t delta_mask = delta - 1; + size_t usize = (psz + delta_mask) & ~delta_mask; + return usize; } static inline szind_t sz_size2index_compute(size_t size) { - if (unlikely(size > LARGE_MAXCLASS)) { - return NSIZES; + if (unlikely(size > SC_LARGE_MAXCLASS)) { + return SC_NSIZES; + } + + if (size == 0) { + return 0; } -#if (NTBINS != 0) - if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { - szind_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; +#if (SC_NTINY != 0) + if (size <= (ZU(1) << SC_LG_TINY_MAXCLASS)) { + szind_t lg_tmin = SC_LG_TINY_MAXCLASS - SC_NTINY + 1; szind_t lg_ceil = lg_floor(pow2_ceil_zu(size)); return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin); } #endif { szind_t x = lg_floor((size<<1)-1); - szind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 : - x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM); - szind_t grp = shift << LG_SIZE_CLASS_GROUP; + szind_t shift = (x < SC_LG_NGROUP + LG_QUANTUM) ? 0 : + x - (SC_LG_NGROUP + LG_QUANTUM); + szind_t grp = shift << SC_LG_NGROUP; - szind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) - ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; + szind_t lg_delta = (x < SC_LG_NGROUP + LG_QUANTUM + 1) + ? LG_QUANTUM : x - SC_LG_NGROUP - 1; - size_t delta_inverse_mask = ZD(-1) << lg_delta; + size_t delta_inverse_mask = ZU(-1) << lg_delta; szind_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) & - ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); + ((ZU(1) << SC_LG_NGROUP) - 1); - szind_t index = NTBINS + grp + mod; + szind_t index = SC_NTINY + grp + mod; return index; } } JEMALLOC_ALWAYS_INLINE szind_t sz_size2index_lookup(size_t size) { - assert(size <= LOOKUP_MAXCLASS); - { - szind_t ret = (sz_size2index_tab[(size-1) >> LG_TINY_MIN]); - assert(ret == sz_size2index_compute(size)); - return ret; - } + assert(size <= SC_LOOKUP_MAXCLASS); + szind_t ret = (sz_size2index_tab[(size + (ZU(1) << SC_LG_TINY_MIN) - 1) + >> SC_LG_TINY_MIN]); + assert(ret == sz_size2index_compute(size)); + return ret; } JEMALLOC_ALWAYS_INLINE szind_t sz_size2index(size_t size) { - assert(size > 0); - if (likely(size <= LOOKUP_MAXCLASS)) { + if (likely(size <= SC_LOOKUP_MAXCLASS)) { return sz_size2index_lookup(size); } return sz_size2index_compute(size); @@ -172,20 +170,20 @@ sz_size2index(size_t size) { static inline size_t sz_index2size_compute(szind_t index) { -#if (NTBINS > 0) - if (index < NTBINS) { - return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + index)); +#if (SC_NTINY > 0) + if (index < SC_NTINY) { + return (ZU(1) << (SC_LG_TINY_MAXCLASS - SC_NTINY + 1 + index)); } #endif { - size_t reduced_index = index - NTBINS; - size_t grp = reduced_index >> LG_SIZE_CLASS_GROUP; - size_t mod = reduced_index & ((ZU(1) << LG_SIZE_CLASS_GROUP) - + size_t reduced_index = index - SC_NTINY; + size_t grp = reduced_index >> SC_LG_NGROUP; + size_t mod = reduced_index & ((ZU(1) << SC_LG_NGROUP) - 1); size_t grp_size_mask = ~((!!grp)-1); size_t grp_size = ((ZU(1) << (LG_QUANTUM + - (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask; + (SC_LG_NGROUP-1))) << grp) & grp_size_mask; size_t shift = (grp == 0) ? 1 : grp; size_t lg_delta = shift + (LG_QUANTUM-1); @@ -205,18 +203,22 @@ sz_index2size_lookup(szind_t index) { JEMALLOC_ALWAYS_INLINE size_t sz_index2size(szind_t index) { - assert(index < NSIZES); + assert(index < SC_NSIZES); return sz_index2size_lookup(index); } JEMALLOC_ALWAYS_INLINE size_t sz_s2u_compute(size_t size) { - if (unlikely(size > LARGE_MAXCLASS)) { + if (unlikely(size > SC_LARGE_MAXCLASS)) { return 0; } -#if (NTBINS > 0) - if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { - size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; + + if (size == 0) { + size++; + } +#if (SC_NTINY > 0) + if (size <= (ZU(1) << SC_LG_TINY_MAXCLASS)) { + size_t lg_tmin = SC_LG_TINY_MAXCLASS - SC_NTINY + 1; size_t lg_ceil = lg_floor(pow2_ceil_zu(size)); return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) : (ZU(1) << lg_ceil)); @@ -224,8 +226,8 @@ sz_s2u_compute(size_t size) { #endif { size_t x = lg_floor((size<<1)-1); - size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) - ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; + size_t lg_delta = (x < SC_LG_NGROUP + LG_QUANTUM + 1) + ? LG_QUANTUM : x - SC_LG_NGROUP - 1; size_t delta = ZU(1) << lg_delta; size_t delta_mask = delta - 1; size_t usize = (size + delta_mask) & ~delta_mask; @@ -247,8 +249,7 @@ sz_s2u_lookup(size_t size) { */ JEMALLOC_ALWAYS_INLINE size_t sz_s2u(size_t size) { - assert(size > 0); - if (likely(size <= LOOKUP_MAXCLASS)) { + if (likely(size <= SC_LOOKUP_MAXCLASS)) { return sz_s2u_lookup(size); } return sz_s2u_compute(size); @@ -265,7 +266,7 @@ sz_sa2u(size_t size, size_t alignment) { assert(alignment != 0 && ((alignment - 1) & alignment) == 0); /* Try for a small size class. */ - if (size <= SMALL_MAXCLASS && alignment < PAGE) { + if (size <= SC_SMALL_MAXCLASS && alignment < PAGE) { /* * Round size up to the nearest multiple of alignment. * @@ -281,20 +282,20 @@ sz_sa2u(size_t size, size_t alignment) { * 192 | 11000000 | 64 */ usize = sz_s2u(ALIGNMENT_CEILING(size, alignment)); - if (usize < LARGE_MINCLASS) { + if (usize < SC_LARGE_MINCLASS) { return usize; } } /* Large size class. Beware of overflow. */ - if (unlikely(alignment > LARGE_MAXCLASS)) { + if (unlikely(alignment > SC_LARGE_MAXCLASS)) { return 0; } /* Make sure result is a large size class. */ - if (size <= LARGE_MINCLASS) { - usize = LARGE_MINCLASS; + if (size <= SC_LARGE_MINCLASS) { + usize = SC_LARGE_MINCLASS; } else { usize = sz_s2u(size); if (usize < size) { diff --git a/deps/jemalloc/include/jemalloc/internal/tcache_externs.h b/deps/jemalloc/include/jemalloc/internal/tcache_externs.h index db3e9c7d5d..d63eafde8c 100644 --- a/deps/jemalloc/include/jemalloc/internal/tcache_externs.h +++ b/deps/jemalloc/include/jemalloc/internal/tcache_externs.h @@ -1,15 +1,13 @@ #ifndef JEMALLOC_INTERNAL_TCACHE_EXTERNS_H #define JEMALLOC_INTERNAL_TCACHE_EXTERNS_H -#include "jemalloc/internal/size_classes.h" - extern bool opt_tcache; extern ssize_t opt_lg_tcache_max; -extern tcache_bin_info_t *tcache_bin_info; +extern cache_bin_info_t *tcache_bin_info; /* - * Number of tcache bins. There are NBINS small-object bins, plus 0 or more + * Number of tcache bins. There are SC_NBINS small-object bins, plus 0 or more * large-object bins. */ extern unsigned nhbins; @@ -30,10 +28,10 @@ extern tcaches_t *tcaches; size_t tcache_salloc(tsdn_t *tsdn, const void *ptr); void tcache_event_hard(tsd_t *tsd, tcache_t *tcache); void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, - tcache_bin_t *tbin, szind_t binind, bool *tcache_success); -void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, + cache_bin_t *tbin, szind_t binind, bool *tcache_success); +void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t binind, unsigned rem); -void tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, +void tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind, unsigned rem, tcache_t *tcache); void tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena); diff --git a/deps/jemalloc/include/jemalloc/internal/tcache_inlines.h b/deps/jemalloc/include/jemalloc/internal/tcache_inlines.h index c55bcd2723..5eca20e893 100644 --- a/deps/jemalloc/include/jemalloc/internal/tcache_inlines.h +++ b/deps/jemalloc/include/jemalloc/internal/tcache_inlines.h @@ -1,8 +1,9 @@ #ifndef JEMALLOC_INTERNAL_TCACHE_INLINES_H #define JEMALLOC_INTERNAL_TCACHE_INLINES_H +#include "jemalloc/internal/bin.h" #include "jemalloc/internal/jemalloc_internal_types.h" -#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/sc.h" #include "jemalloc/internal/sz.h" #include "jemalloc/internal/ticker.h" #include "jemalloc/internal/util.h" @@ -38,43 +39,16 @@ tcache_event(tsd_t *tsd, tcache_t *tcache) { } JEMALLOC_ALWAYS_INLINE void * -tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success) { +tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, + size_t size, szind_t binind, bool zero, bool slow_path) { void *ret; - - if (unlikely(tbin->ncached == 0)) { - tbin->low_water = -1; - *tcache_success = false; - return NULL; - } - /* - * tcache_success (instead of ret) should be checked upon the return of - * this function. We avoid checking (ret == NULL) because there is - * never a null stored on the avail stack (which is unknown to the - * compiler), and eagerly checking ret would cause pipeline stall - * (waiting for the cacheline). - */ - *tcache_success = true; - ret = *(tbin->avail - tbin->ncached); - tbin->ncached--; - - if (unlikely((low_water_t)tbin->ncached < tbin->low_water)) { - tbin->low_water = tbin->ncached; - } - - return ret; -} - -JEMALLOC_ALWAYS_INLINE void * -tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, - szind_t binind, bool zero, bool slow_path) { - void *ret; - tcache_bin_t *tbin; + cache_bin_t *bin; bool tcache_success; size_t usize JEMALLOC_CC_SILENCE_INIT(0); - assert(binind < NBINS); - tbin = tcache_small_bin_get(tcache, binind); - ret = tcache_alloc_easy(tbin, &tcache_success); + assert(binind < SC_NBINS); + bin = tcache_small_bin_get(tcache, binind); + ret = cache_bin_alloc_easy(bin, &tcache_success); assert(tcache_success == (ret != NULL)); if (unlikely(!tcache_success)) { bool tcache_hard_success; @@ -84,7 +58,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, } ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache, - tbin, binind, &tcache_hard_success); + bin, binind, &tcache_hard_success); if (tcache_hard_success == false) { return NULL; } @@ -103,22 +77,21 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, if (likely(!zero)) { if (slow_path && config_fill) { if (unlikely(opt_junk_alloc)) { - arena_alloc_junk_small(ret, - &arena_bin_info[binind], false); + arena_alloc_junk_small(ret, &bin_infos[binind], + false); } else if (unlikely(opt_zero)) { memset(ret, 0, usize); } } } else { if (slow_path && config_fill && unlikely(opt_junk_alloc)) { - arena_alloc_junk_small(ret, &arena_bin_info[binind], - true); + arena_alloc_junk_small(ret, &bin_infos[binind], true); } memset(ret, 0, usize); } if (config_stats) { - tbin->tstats.nrequests++; + bin->tstats.nrequests++; } if (config_prof) { tcache->prof_accumbytes += usize; @@ -131,12 +104,12 @@ JEMALLOC_ALWAYS_INLINE void * tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, szind_t binind, bool zero, bool slow_path) { void *ret; - tcache_bin_t *tbin; + cache_bin_t *bin; bool tcache_success; - assert(binind >= NBINS &&binind < nhbins); - tbin = tcache_large_bin_get(tcache, binind); - ret = tcache_alloc_easy(tbin, &tcache_success); + assert(binind >= SC_NBINS &&binind < nhbins); + bin = tcache_large_bin_get(tcache, binind); + ret = cache_bin_alloc_easy(bin, &tcache_success); assert(tcache_success == (ret != NULL)); if (unlikely(!tcache_success)) { /* @@ -176,7 +149,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, } if (config_stats) { - tbin->tstats.nrequests++; + bin->tstats.nrequests++; } if (config_prof) { tcache->prof_accumbytes += usize; @@ -190,24 +163,24 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, JEMALLOC_ALWAYS_INLINE void tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind, bool slow_path) { - tcache_bin_t *tbin; - tcache_bin_info_t *tbin_info; + cache_bin_t *bin; + cache_bin_info_t *bin_info; - assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SMALL_MAXCLASS); + assert(tcache_salloc(tsd_tsdn(tsd), ptr) + <= SC_SMALL_MAXCLASS); if (slow_path && config_fill && unlikely(opt_junk_free)) { - arena_dalloc_junk_small(ptr, &arena_bin_info[binind]); + arena_dalloc_junk_small(ptr, &bin_infos[binind]); } - tbin = tcache_small_bin_get(tcache, binind); - tbin_info = &tcache_bin_info[binind]; - if (unlikely(tbin->ncached == tbin_info->ncached_max)) { - tcache_bin_flush_small(tsd, tcache, tbin, binind, - (tbin_info->ncached_max >> 1)); + bin = tcache_small_bin_get(tcache, binind); + bin_info = &tcache_bin_info[binind]; + if (unlikely(!cache_bin_dalloc_easy(bin, bin_info, ptr))) { + tcache_bin_flush_small(tsd, tcache, bin, binind, + (bin_info->ncached_max >> 1)); + bool ret = cache_bin_dalloc_easy(bin, bin_info, ptr); + assert(ret); } - assert(tbin->ncached < tbin_info->ncached_max); - tbin->ncached++; - *(tbin->avail - tbin->ncached) = ptr; tcache_event(tsd, tcache); } @@ -215,25 +188,26 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind, JEMALLOC_ALWAYS_INLINE void tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind, bool slow_path) { - tcache_bin_t *tbin; - tcache_bin_info_t *tbin_info; + cache_bin_t *bin; + cache_bin_info_t *bin_info; - assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SMALL_MAXCLASS); + assert(tcache_salloc(tsd_tsdn(tsd), ptr) + > SC_SMALL_MAXCLASS); assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass); if (slow_path && config_fill && unlikely(opt_junk_free)) { large_dalloc_junk(ptr, sz_index2size(binind)); } - tbin = tcache_large_bin_get(tcache, binind); - tbin_info = &tcache_bin_info[binind]; - if (unlikely(tbin->ncached == tbin_info->ncached_max)) { - tcache_bin_flush_large(tsd, tbin, binind, - (tbin_info->ncached_max >> 1), tcache); + bin = tcache_large_bin_get(tcache, binind); + bin_info = &tcache_bin_info[binind]; + if (unlikely(bin->ncached == bin_info->ncached_max)) { + tcache_bin_flush_large(tsd, bin, binind, + (bin_info->ncached_max >> 1), tcache); } - assert(tbin->ncached < tbin_info->ncached_max); - tbin->ncached++; - *(tbin->avail - tbin->ncached) = ptr; + assert(bin->ncached < bin_info->ncached_max); + bin->ncached++; + *(bin->avail - bin->ncached) = ptr; tcache_event(tsd, tcache); } @@ -242,6 +216,9 @@ JEMALLOC_ALWAYS_INLINE tcache_t * tcaches_get(tsd_t *tsd, unsigned ind) { tcaches_t *elm = &tcaches[ind]; if (unlikely(elm->tcache == NULL)) { + malloc_printf("<jemalloc>: invalid tcache id (%u).\n", ind); + abort(); + } else if (unlikely(elm->tcache == TCACHES_ELM_NEED_REINIT)) { elm->tcache = tcache_create_explicit(tsd); } return elm->tcache; diff --git a/deps/jemalloc/include/jemalloc/internal/tcache_structs.h b/deps/jemalloc/include/jemalloc/internal/tcache_structs.h index 7eb516fb6b..172ef9040c 100644 --- a/deps/jemalloc/include/jemalloc/internal/tcache_structs.h +++ b/deps/jemalloc/include/jemalloc/internal/tcache_structs.h @@ -1,56 +1,62 @@ #ifndef JEMALLOC_INTERNAL_TCACHE_STRUCTS_H #define JEMALLOC_INTERNAL_TCACHE_STRUCTS_H +#include "jemalloc/internal/cache_bin.h" #include "jemalloc/internal/ql.h" -#include "jemalloc/internal/size_classes.h" -#include "jemalloc/internal/stats_tsd.h" +#include "jemalloc/internal/sc.h" #include "jemalloc/internal/ticker.h" +#include "jemalloc/internal/tsd_types.h" -/* - * Read-only information associated with each element of tcache_t's tbins array - * is stored separately, mainly to reduce memory usage. - */ -struct tcache_bin_info_s { - unsigned ncached_max; /* Upper limit on ncached. */ -}; +/* Various uses of this struct need it to be a named type. */ +typedef ql_elm(tsd_t) tsd_link_t; -struct tcache_bin_s { - low_water_t low_water; /* Min # cached since last GC. */ - uint32_t ncached; /* # of cached objects. */ +struct tcache_s { /* - * ncached and stats are both modified frequently. Let's keep them - * close so that they have a higher chance of being on the same - * cacheline, thus less write-backs. + * To minimize our cache-footprint, we put the frequently accessed data + * together at the start of this struct. */ - tcache_bin_stats_t tstats; + + /* Cleared after arena_prof_accum(). */ + uint64_t prof_accumbytes; + /* Drives incremental GC. */ + ticker_t gc_ticker; /* - * To make use of adjacent cacheline prefetch, the items in the avail - * stack goes to higher address for newer allocations. avail points - * just above the available space, which means that - * avail[-ncached, ... -1] are available items and the lowest item will - * be allocated first. + * The pointer stacks associated with bins follow as a contiguous array. + * During tcache initialization, the avail pointer in each element of + * tbins is initialized to point to the proper offset within this array. */ - void **avail; /* Stack of available objects. */ -}; + cache_bin_t bins_small[SC_NBINS]; + + /* + * This data is less hot; we can be a little less careful with our + * footprint here. + */ + /* Lets us track all the tcaches in an arena. */ + ql_elm(tcache_t) link; + + /* Logically scoped to tsd, but put here for cache layout reasons. */ + ql_elm(tsd_t) tsd_link; + bool in_hook; -struct tcache_s { - /* Data accessed frequently first: prof, ticker and small bins. */ - uint64_t prof_accumbytes;/* Cleared after arena_prof_accum(). */ - ticker_t gc_ticker; /* Drives incremental GC. */ /* - * The pointer stacks associated with tbins follow as a contiguous - * array. During tcache initialization, the avail pointer in each - * element of tbins is initialized to point to the proper offset within - * this array. + * The descriptor lets the arena find our cache bins without seeing the + * tcache definition. This enables arenas to aggregate stats across + * tcaches without having a tcache dependency. */ - tcache_bin_t tbins_small[NBINS]; - /* Data accessed less often below. */ - ql_elm(tcache_t) link; /* Used for aggregating stats. */ - arena_t *arena; /* Associated arena. */ - szind_t next_gc_bin; /* Next bin to GC. */ + cache_bin_array_descriptor_t cache_bin_array_descriptor; + + /* The arena this tcache is associated with. */ + arena_t *arena; + /* Next bin to GC. */ + szind_t next_gc_bin; /* For small bins, fill (ncached_max >> lg_fill_div). */ - uint8_t lg_fill_div[NBINS]; - tcache_bin_t tbins_large[NSIZES-NBINS]; + uint8_t lg_fill_div[SC_NBINS]; + /* + * We put the cache bins for large size classes at the end of the + * struct, since some of them might not get used. This might end up + * letting us avoid touching an extra page if we don't have to. + */ + cache_bin_t bins_large[SC_NSIZES-SC_NBINS]; }; /* Linkage for list of available (previously used) explicit tcache IDs. */ diff --git a/deps/jemalloc/include/jemalloc/internal/tcache_types.h b/deps/jemalloc/include/jemalloc/internal/tcache_types.h index 1155d62cb4..dce69382eb 100644 --- a/deps/jemalloc/include/jemalloc/internal/tcache_types.h +++ b/deps/jemalloc/include/jemalloc/internal/tcache_types.h @@ -1,16 +1,11 @@ #ifndef JEMALLOC_INTERNAL_TCACHE_TYPES_H #define JEMALLOC_INTERNAL_TCACHE_TYPES_H -#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/sc.h" -typedef struct tcache_bin_info_s tcache_bin_info_t; -typedef struct tcache_bin_s tcache_bin_t; typedef struct tcache_s tcache_t; typedef struct tcaches_s tcaches_t; -/* ncached is cast to this type for comparison. */ -typedef int32_t low_water_t; - /* * tcache pointers close to NULL are used to encode state information that is * used for two purposes: preventing thread caching on a per thread basis and @@ -50,7 +45,7 @@ typedef int32_t low_water_t; /* Number of tcache allocation/deallocation events between incremental GCs. */ #define TCACHE_GC_INCR \ - ((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1)) + ((TCACHE_GC_SWEEP / SC_NBINS) + ((TCACHE_GC_SWEEP / SC_NBINS == 0) ? 0 : 1)) /* Used in TSD static initializer only. Real init in tcache_data_init(). */ #define TCACHE_ZERO_INITIALIZER {0} @@ -58,4 +53,7 @@ typedef int32_t low_water_t; /* Used in TSD static initializer only. Will be initialized to opt_tcache. */ #define TCACHE_ENABLED_ZERO_INITIALIZER false +/* Used for explicit tcache only. Means flushed but not destroyed. */ +#define TCACHES_ELM_NEED_REINIT ((tcache_t *)(uintptr_t)1) + #endif /* JEMALLOC_INTERNAL_TCACHE_TYPES_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/test_hooks.h b/deps/jemalloc/include/jemalloc/internal/test_hooks.h new file mode 100644 index 0000000000..a6351e59af --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/test_hooks.h @@ -0,0 +1,19 @@ +#ifndef JEMALLOC_INTERNAL_TEST_HOOKS_H +#define JEMALLOC_INTERNAL_TEST_HOOKS_H + +extern JEMALLOC_EXPORT void (*test_hooks_arena_new_hook)(); +extern JEMALLOC_EXPORT void (*test_hooks_libc_hook)(); + +#define JEMALLOC_HOOK(fn, hook) ((void)(hook != NULL && (hook(), 0)), fn) + +#define open JEMALLOC_HOOK(open, test_hooks_libc_hook) +#define read JEMALLOC_HOOK(read, test_hooks_libc_hook) +#define write JEMALLOC_HOOK(write, test_hooks_libc_hook) +#define readlink JEMALLOC_HOOK(readlink, test_hooks_libc_hook) +#define close JEMALLOC_HOOK(close, test_hooks_libc_hook) +#define creat JEMALLOC_HOOK(creat, test_hooks_libc_hook) +#define secure_getenv JEMALLOC_HOOK(secure_getenv, test_hooks_libc_hook) +/* Note that this is undef'd and re-define'd in src/prof.c. */ +#define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, test_hooks_libc_hook) + +#endif /* JEMALLOC_INTERNAL_TEST_HOOKS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/ticker.h b/deps/jemalloc/include/jemalloc/internal/ticker.h index 572b96459c..52d0db4c89 100644 --- a/deps/jemalloc/include/jemalloc/internal/ticker.h +++ b/deps/jemalloc/include/jemalloc/internal/ticker.h @@ -32,14 +32,42 @@ ticker_read(const ticker_t *ticker) { return ticker->tick; } +/* + * Not intended to be a public API. Unfortunately, on x86, neither gcc nor + * clang seems smart enough to turn + * ticker->tick -= nticks; + * if (unlikely(ticker->tick < 0)) { + * fixup ticker + * return true; + * } + * return false; + * into + * subq %nticks_reg, (%ticker_reg) + * js fixup ticker + * + * unless we force "fixup ticker" out of line. In that case, gcc gets it right, + * but clang now does worse than before. So, on x86 with gcc, we force it out + * of line, but otherwise let the inlining occur. Ordinarily this wouldn't be + * worth the hassle, but this is on the fast path of both malloc and free (via + * tcache_event). + */ +#if defined(__GNUC__) && !defined(__clang__) \ + && (defined(__x86_64__) || defined(__i386__)) +JEMALLOC_NOINLINE +#endif +static bool +ticker_fixup(ticker_t *ticker) { + ticker->tick = ticker->nticks; + return true; +} + static inline bool ticker_ticks(ticker_t *ticker, int32_t nticks) { - if (unlikely(ticker->tick < nticks)) { - ticker->tick = ticker->nticks; - return true; - } ticker->tick -= nticks; - return(false); + if (unlikely(ticker->tick < 0)) { + return ticker_fixup(ticker); + } + return false; } static inline bool @@ -47,4 +75,17 @@ ticker_tick(ticker_t *ticker) { return ticker_ticks(ticker, 1); } +/* + * Try to tick. If ticker would fire, return true, but rely on + * slowpath to reset ticker. + */ +static inline bool +ticker_trytick(ticker_t *ticker) { + --ticker->tick; + if (unlikely(ticker->tick < 0)) { + return true; + } + return false; +} + #endif /* JEMALLOC_INTERNAL_TICKER_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/tsd.h b/deps/jemalloc/include/jemalloc/internal/tsd.h index 155a2ec6c4..9ba2600453 100644 --- a/deps/jemalloc/include/jemalloc/internal/tsd.h +++ b/deps/jemalloc/include/jemalloc/internal/tsd.h @@ -3,6 +3,7 @@ #include "jemalloc/internal/arena_types.h" #include "jemalloc/internal/assert.h" +#include "jemalloc/internal/bin_types.h" #include "jemalloc/internal/jemalloc_internal_externs.h" #include "jemalloc/internal/prof_types.h" #include "jemalloc/internal/ql.h" @@ -65,48 +66,120 @@ typedef void (*test_callback_t)(int *); O(arenas_tdata_bypass, bool, bool) \ O(reentrancy_level, int8_t, int8_t) \ O(narenas_tdata, uint32_t, uint32_t) \ + O(offset_state, uint64_t, uint64_t) \ O(thread_allocated, uint64_t, uint64_t) \ O(thread_deallocated, uint64_t, uint64_t) \ + O(bytes_until_sample, int64_t, int64_t) \ O(prof_tdata, prof_tdata_t *, prof_tdata_t *) \ O(rtree_ctx, rtree_ctx_t, rtree_ctx_t) \ O(iarena, arena_t *, arena_t *) \ O(arena, arena_t *, arena_t *) \ O(arenas_tdata, arena_tdata_t *, arena_tdata_t *)\ + O(binshards, tsd_binshards_t, tsd_binshards_t)\ O(tcache, tcache_t, tcache_t) \ O(witness_tsd, witness_tsd_t, witness_tsdn_t) \ MALLOC_TEST_TSD #define TSD_INITIALIZER { \ - tsd_state_uninitialized, \ + ATOMIC_INIT(tsd_state_uninitialized), \ TCACHE_ENABLED_ZERO_INITIALIZER, \ false, \ 0, \ 0, \ 0, \ 0, \ + 0, \ + 0, \ NULL, \ RTREE_CTX_ZERO_INITIALIZER, \ NULL, \ NULL, \ NULL, \ + TSD_BINSHARDS_ZERO_INITIALIZER, \ TCACHE_ZERO_INITIALIZER, \ WITNESS_TSD_INITIALIZER \ MALLOC_TEST_TSD_INITIALIZER \ } +void *malloc_tsd_malloc(size_t size); +void malloc_tsd_dalloc(void *wrapper); +void malloc_tsd_cleanup_register(bool (*f)(void)); +tsd_t *malloc_tsd_boot0(void); +void malloc_tsd_boot1(void); +void tsd_cleanup(void *arg); +tsd_t *tsd_fetch_slow(tsd_t *tsd, bool internal); +void tsd_state_set(tsd_t *tsd, uint8_t new_state); +void tsd_slow_update(tsd_t *tsd); +void tsd_prefork(tsd_t *tsd); +void tsd_postfork_parent(tsd_t *tsd); +void tsd_postfork_child(tsd_t *tsd); + +/* + * Call ..._inc when your module wants to take all threads down the slow paths, + * and ..._dec when it no longer needs to. + */ +void tsd_global_slow_inc(tsdn_t *tsdn); +void tsd_global_slow_dec(tsdn_t *tsdn); +bool tsd_global_slow(); + enum { - tsd_state_nominal = 0, /* Common case --> jnz. */ - tsd_state_nominal_slow = 1, /* Initialized but on slow path. */ - /* the above 2 nominal states should be lower values. */ - tsd_state_nominal_max = 1, /* used for comparison only. */ - tsd_state_minimal_initialized = 2, - tsd_state_purgatory = 3, - tsd_state_reincarnated = 4, - tsd_state_uninitialized = 5 + /* Common case --> jnz. */ + tsd_state_nominal = 0, + /* Initialized but on slow path. */ + tsd_state_nominal_slow = 1, + /* + * Some thread has changed global state in such a way that all nominal + * threads need to recompute their fast / slow status the next time they + * get a chance. + * + * Any thread can change another thread's status *to* recompute, but + * threads are the only ones who can change their status *from* + * recompute. + */ + tsd_state_nominal_recompute = 2, + /* + * The above nominal states should be lower values. We use + * tsd_nominal_max to separate nominal states from threads in the + * process of being born / dying. + */ + tsd_state_nominal_max = 2, + + /* + * A thread might free() during its death as its only allocator action; + * in such scenarios, we need tsd, but set up in such a way that no + * cleanup is necessary. + */ + tsd_state_minimal_initialized = 3, + /* States during which we know we're in thread death. */ + tsd_state_purgatory = 4, + tsd_state_reincarnated = 5, + /* + * What it says on the tin; tsd that hasn't been initialized. Note + * that even when the tsd struct lives in TLS, when need to keep track + * of stuff like whether or not our pthread destructors have been + * scheduled, so this really truly is different than the nominal state. + */ + tsd_state_uninitialized = 6 }; -/* Manually limit tsd_state_t to a single byte. */ -typedef uint8_t tsd_state_t; +/* + * Some TSD accesses can only be done in a nominal state. To enforce this, we + * wrap TSD member access in a function that asserts on TSD state, and mangle + * field names to prevent touching them accidentally. + */ +#define TSD_MANGLE(n) cant_access_tsd_items_directly_use_a_getter_or_setter_##n + +#ifdef JEMALLOC_U8_ATOMICS +# define tsd_state_t atomic_u8_t +# define tsd_atomic_load atomic_load_u8 +# define tsd_atomic_store atomic_store_u8 +# define tsd_atomic_exchange atomic_exchange_u8 +#else +# define tsd_state_t atomic_u32_t +# define tsd_atomic_load atomic_load_u32 +# define tsd_atomic_store atomic_store_u32 +# define tsd_atomic_exchange atomic_exchange_u32 +#endif /* The actual tsd. */ struct tsd_s { @@ -115,13 +188,29 @@ struct tsd_s { * module. Access any thread-local state through the getters and * setters below. */ - tsd_state_t state; + + /* + * We manually limit the state to just a single byte. Unless the 8-bit + * atomics are unavailable (which is rare). + */ + tsd_state_t state; #define O(n, t, nt) \ - t use_a_getter_or_setter_instead_##n; + t TSD_MANGLE(n); MALLOC_TSD #undef O }; +JEMALLOC_ALWAYS_INLINE uint8_t +tsd_state_get(tsd_t *tsd) { + /* + * This should be atomic. Unfortunately, compilers right now can't tell + * that this can be done as a memory comparison, and forces a load into + * a register that hurts fast-path performance. + */ + /* return atomic_load_u8(&tsd->state, ATOMIC_RELAXED); */ + return *(uint8_t *)&tsd->state; +} + /* * Wrapper around tsd_t that makes it possible to avoid implicit conversion * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be @@ -148,15 +237,6 @@ tsdn_tsd(tsdn_t *tsdn) { return &tsdn->tsd; } -void *malloc_tsd_malloc(size_t size); -void malloc_tsd_dalloc(void *wrapper); -void malloc_tsd_cleanup_register(bool (*f)(void)); -tsd_t *malloc_tsd_boot0(void); -void malloc_tsd_boot1(void); -void tsd_cleanup(void *arg); -tsd_t *tsd_fetch_slow(tsd_t *tsd, bool internal); -void tsd_slow_update(tsd_t *tsd); - /* * We put the platform-specific data declarations and inlines into their own * header files to avoid cluttering this file. They define tsd_boot0, @@ -180,7 +260,7 @@ void tsd_slow_update(tsd_t *tsd); #define O(n, t, nt) \ JEMALLOC_ALWAYS_INLINE t * \ tsd_##n##p_get_unsafe(tsd_t *tsd) { \ - return &tsd->use_a_getter_or_setter_instead_##n; \ + return &tsd->TSD_MANGLE(n); \ } MALLOC_TSD #undef O @@ -189,10 +269,16 @@ MALLOC_TSD #define O(n, t, nt) \ JEMALLOC_ALWAYS_INLINE t * \ tsd_##n##p_get(tsd_t *tsd) { \ - assert(tsd->state == tsd_state_nominal || \ - tsd->state == tsd_state_nominal_slow || \ - tsd->state == tsd_state_reincarnated || \ - tsd->state == tsd_state_minimal_initialized); \ + /* \ + * Because the state might change asynchronously if it's \ + * nominal, we need to make sure that we only read it once. \ + */ \ + uint8_t state = tsd_state_get(tsd); \ + assert(state == tsd_state_nominal || \ + state == tsd_state_nominal_slow || \ + state == tsd_state_nominal_recompute || \ + state == tsd_state_reincarnated || \ + state == tsd_state_minimal_initialized); \ return tsd_##n##p_get_unsafe(tsd); \ } MALLOC_TSD @@ -227,8 +313,8 @@ MALLOC_TSD #define O(n, t, nt) \ JEMALLOC_ALWAYS_INLINE void \ tsd_##n##_set(tsd_t *tsd, t val) { \ - assert(tsd->state != tsd_state_reincarnated && \ - tsd->state != tsd_state_minimal_initialized); \ + assert(tsd_state_get(tsd) != tsd_state_reincarnated && \ + tsd_state_get(tsd) != tsd_state_minimal_initialized); \ *tsd_##n##p_get(tsd) = val; \ } MALLOC_TSD @@ -236,13 +322,18 @@ MALLOC_TSD JEMALLOC_ALWAYS_INLINE void tsd_assert_fast(tsd_t *tsd) { + /* + * Note that our fastness assertion does *not* include global slowness + * counters; it's not in general possible to ensure that they won't + * change asynchronously from underneath us. + */ assert(!malloc_slow && tsd_tcache_enabled_get(tsd) && tsd_reentrancy_level_get(tsd) == 0); } JEMALLOC_ALWAYS_INLINE bool tsd_fast(tsd_t *tsd) { - bool fast = (tsd->state == tsd_state_nominal); + bool fast = (tsd_state_get(tsd) == tsd_state_nominal); if (fast) { tsd_assert_fast(tsd); } @@ -259,7 +350,7 @@ tsd_fetch_impl(bool init, bool minimal) { } assert(tsd != NULL); - if (unlikely(tsd->state != tsd_state_nominal)) { + if (unlikely(tsd_state_get(tsd) != tsd_state_nominal)) { return tsd_fetch_slow(tsd, minimal); } assert(tsd_fast(tsd)); @@ -279,7 +370,7 @@ JEMALLOC_ALWAYS_INLINE tsd_t * tsd_internal_fetch(void) { tsd_t *tsd = tsd_fetch_min(); /* Use reincarnated state to prevent full initialization. */ - tsd->state = tsd_state_reincarnated; + tsd_state_set(tsd, tsd_state_reincarnated); return tsd; } @@ -291,7 +382,7 @@ tsd_fetch(void) { static inline bool tsd_nominal(tsd_t *tsd) { - return (tsd->state <= tsd_state_nominal_max); + return (tsd_state_get(tsd) <= tsd_state_nominal_max); } JEMALLOC_ALWAYS_INLINE tsdn_t * diff --git a/deps/jemalloc/include/jemalloc/internal/tsd_generic.h b/deps/jemalloc/include/jemalloc/internal/tsd_generic.h index 1e52ef767f..cf73c0c715 100644 --- a/deps/jemalloc/include/jemalloc/internal/tsd_generic.h +++ b/deps/jemalloc/include/jemalloc/internal/tsd_generic.h @@ -77,7 +77,10 @@ tsd_wrapper_get(bool init) { abort(); } else { wrapper->initialized = false; + JEMALLOC_DIAGNOSTIC_PUSH + JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS tsd_t initializer = TSD_INITIALIZER; + JEMALLOC_DIAGNOSTIC_POP wrapper->val = initializer; } tsd_wrapper_set(wrapper); @@ -107,7 +110,10 @@ tsd_boot1(void) { tsd_boot_wrapper.initialized = false; tsd_cleanup(&tsd_boot_wrapper.val); wrapper->initialized = false; + JEMALLOC_DIAGNOSTIC_PUSH + JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS tsd_t initializer = TSD_INITIALIZER; + JEMALLOC_DIAGNOSTIC_POP wrapper->val = initializer; tsd_wrapper_set(wrapper); } diff --git a/deps/jemalloc/include/jemalloc/internal/tsd_malloc_thread_cleanup.h b/deps/jemalloc/include/jemalloc/internal/tsd_malloc_thread_cleanup.h index beb467a67e..65852d5c14 100644 --- a/deps/jemalloc/include/jemalloc/internal/tsd_malloc_thread_cleanup.h +++ b/deps/jemalloc/include/jemalloc/internal/tsd_malloc_thread_cleanup.h @@ -3,8 +3,10 @@ #endif #define JEMALLOC_INTERNAL_TSD_MALLOC_THREAD_CLEANUP_H -extern __thread tsd_t tsd_tls; -extern __thread bool tsd_initialized; +#define JEMALLOC_TSD_TYPE_ATTR(type) __thread type JEMALLOC_TLS_MODEL + +extern JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls; +extern JEMALLOC_TSD_TYPE_ATTR(bool) tsd_initialized; extern bool tsd_booted; /* Initialization/cleanup. */ @@ -47,7 +49,6 @@ tsd_get_allocates(void) { /* Get/set. */ JEMALLOC_ALWAYS_INLINE tsd_t * tsd_get(bool init) { - assert(tsd_booted); return &tsd_tls; } JEMALLOC_ALWAYS_INLINE void diff --git a/deps/jemalloc/include/jemalloc/internal/tsd_tls.h b/deps/jemalloc/include/jemalloc/internal/tsd_tls.h index 757aaa0eef..7d6c805beb 100644 --- a/deps/jemalloc/include/jemalloc/internal/tsd_tls.h +++ b/deps/jemalloc/include/jemalloc/internal/tsd_tls.h @@ -3,7 +3,9 @@ #endif #define JEMALLOC_INTERNAL_TSD_TLS_H -extern __thread tsd_t tsd_tls; +#define JEMALLOC_TSD_TYPE_ATTR(type) __thread type JEMALLOC_TLS_MODEL + +extern JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls; extern pthread_key_t tsd_tsd; extern bool tsd_booted; @@ -40,7 +42,6 @@ tsd_get_allocates(void) { /* Get/set. */ JEMALLOC_ALWAYS_INLINE tsd_t * tsd_get(bool init) { - assert(tsd_booted); return &tsd_tls; } diff --git a/deps/jemalloc/include/jemalloc/internal/witness.h b/deps/jemalloc/include/jemalloc/internal/witness.h index 33be666107..fff9e98cb6 100644 --- a/deps/jemalloc/include/jemalloc/internal/witness.h +++ b/deps/jemalloc/include/jemalloc/internal/witness.h @@ -27,9 +27,9 @@ #define WITNESS_RANK_PROF_BT2GCTX 6U #define WITNESS_RANK_PROF_TDATAS 7U #define WITNESS_RANK_PROF_TDATA 8U -#define WITNESS_RANK_PROF_GCTX 9U - -#define WITNESS_RANK_BACKGROUND_THREAD 10U +#define WITNESS_RANK_PROF_LOG 9U +#define WITNESS_RANK_PROF_GCTX 10U +#define WITNESS_RANK_BACKGROUND_THREAD 11U /* * Used as an argument to witness_assert_depth_to_rank() in order to validate @@ -37,21 +37,22 @@ * witness_assert_depth_to_rank() is inclusive rather than exclusive, this * definition can have the same value as the minimally ranked core lock. */ -#define WITNESS_RANK_CORE 11U +#define WITNESS_RANK_CORE 12U -#define WITNESS_RANK_DECAY 11U -#define WITNESS_RANK_TCACHE_QL 12U -#define WITNESS_RANK_EXTENT_GROW 13U -#define WITNESS_RANK_EXTENTS 14U -#define WITNESS_RANK_EXTENT_AVAIL 15U +#define WITNESS_RANK_DECAY 12U +#define WITNESS_RANK_TCACHE_QL 13U +#define WITNESS_RANK_EXTENT_GROW 14U +#define WITNESS_RANK_EXTENTS 15U +#define WITNESS_RANK_EXTENT_AVAIL 16U -#define WITNESS_RANK_EXTENT_POOL 16U -#define WITNESS_RANK_RTREE 17U -#define WITNESS_RANK_BASE 18U -#define WITNESS_RANK_ARENA_LARGE 19U +#define WITNESS_RANK_EXTENT_POOL 17U +#define WITNESS_RANK_RTREE 18U +#define WITNESS_RANK_BASE 19U +#define WITNESS_RANK_ARENA_LARGE 20U +#define WITNESS_RANK_HOOK 21U #define WITNESS_RANK_LEAF 0xffffffffU -#define WITNESS_RANK_ARENA_BIN WITNESS_RANK_LEAF +#define WITNESS_RANK_BIN WITNESS_RANK_LEAF #define WITNESS_RANK_ARENA_STATS WITNESS_RANK_LEAF #define WITNESS_RANK_DSS WITNESS_RANK_LEAF #define WITNESS_RANK_PROF_ACTIVE WITNESS_RANK_LEAF diff --git a/deps/jemalloc/include/jemalloc/jemalloc.h b/deps/jemalloc/include/jemalloc/jemalloc.h index 6ffe5c71b3..28d0904772 100644 --- a/deps/jemalloc/include/jemalloc/jemalloc.h +++ b/deps/jemalloc/include/jemalloc/jemalloc.h @@ -10,6 +10,9 @@ extern "C" { /* Defined if alloc_size attribute is supported. */ #define JEMALLOC_HAVE_ATTR_ALLOC_SIZE +/* Defined if format_arg(...) attribute is supported. */ +#define JEMALLOC_HAVE_ATTR_FORMAT_ARG + /* Defined if format(gnu_printf, ...) attribute is supported. */ #define JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF @@ -66,6 +69,7 @@ extern "C" { # define je_malloc_stats_print malloc_stats_print # define je_malloc_usable_size malloc_usable_size # define je_mallocx mallocx +# define je_smallocx_ea6b3e973b477b8061e0076bb257dbd7f3faa756 smallocx_ea6b3e973b477b8061e0076bb257dbd7f3faa756 # define je_nallocx nallocx # define je_posix_memalign posix_memalign # define je_rallocx rallocx @@ -83,12 +87,13 @@ extern "C" { #include <limits.h> #include <strings.h> -#define JEMALLOC_VERSION "5.0.1-0-g896ed3a8b3f41998d4fb4d625d30ac63ef2d51fb" +#define JEMALLOC_VERSION "5.2.1-0-gea6b3e973b477b8061e0076bb257dbd7f3faa756" #define JEMALLOC_VERSION_MAJOR 5 -#define JEMALLOC_VERSION_MINOR 0 +#define JEMALLOC_VERSION_MINOR 2 #define JEMALLOC_VERSION_BUGFIX 1 #define JEMALLOC_VERSION_NREV 0 -#define JEMALLOC_VERSION_GID "896ed3a8b3f41998d4fb4d625d30ac63ef2d51fb" +#define JEMALLOC_VERSION_GID "ea6b3e973b477b8061e0076bb257dbd7f3faa756" +#define JEMALLOC_VERSION_GID_IDENT ea6b3e973b477b8061e0076bb257dbd7f3faa756 #define MALLOCX_LG_ALIGN(la) ((int)(la)) #if LG_SIZEOF_PTR == 2 @@ -147,6 +152,7 @@ extern "C" { # define JEMALLOC_EXPORT __declspec(dllimport) # endif # endif +# define JEMALLOC_FORMAT_ARG(i) # define JEMALLOC_FORMAT_PRINTF(s, i) # define JEMALLOC_NOINLINE __declspec(noinline) # ifdef __cplusplus @@ -174,6 +180,11 @@ extern "C" { # ifndef JEMALLOC_EXPORT # define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) # endif +# ifdef JEMALLOC_HAVE_ATTR_FORMAT_ARG +# define JEMALLOC_FORMAT_ARG(i) JEMALLOC_ATTR(__format_arg__(3)) +# else +# define JEMALLOC_FORMAT_ARG(i) +# endif # ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF # define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i)) # elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF) @@ -369,6 +380,7 @@ struct extent_hooks_s { # define malloc_stats_print je_malloc_stats_print # define malloc_usable_size je_malloc_usable_size # define mallocx je_mallocx +# define smallocx_ea6b3e973b477b8061e0076bb257dbd7f3faa756 je_smallocx_ea6b3e973b477b8061e0076bb257dbd7f3faa756 # define nallocx je_nallocx # define posix_memalign je_posix_memalign # define rallocx je_rallocx @@ -401,6 +413,7 @@ struct extent_hooks_s { # undef je_malloc_stats_print # undef je_malloc_usable_size # undef je_mallocx +# undef je_smallocx_ea6b3e973b477b8061e0076bb257dbd7f3faa756 # undef je_nallocx # undef je_posix_memalign # undef je_rallocx diff --git a/deps/jemalloc/jemalloc_internal_defs.h.in.cmake b/deps/jemalloc/jemalloc_internal_defs.h.in.cmake index 9e5f305f7b..28f7e9386f 100644 --- a/deps/jemalloc/jemalloc_internal_defs.h.in.cmake +++ b/deps/jemalloc/jemalloc_internal_defs.h.in.cmake @@ -33,7 +33,9 @@ * Hyper-threaded CPUs may need a special instruction inside spin loops in * order to yield to another virtual CPU. */ -#define CPU_SPINWAIT __asm__ volatile("pause") +#define CPU_SPINWAIT @JEM_CPU_SPINWAIT@ +/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */ +#define HAVE_CPU_SPINWAIT @JEM_HAVE_CPU_SPINWAIT@ /* * Number of significant bits in virtual addresses. This may be less than the @@ -47,25 +49,13 @@ /* Defined if GCC __atomic atomics are available. */ #define JEMALLOC_GCC_ATOMIC_ATOMICS 1 +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS 1 /* Defined if GCC __sync atomics are available. */ #define JEMALLOC_GCC_SYNC_ATOMICS 1 - -/* - * Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and - * __sync_sub_and_fetch(uint32_t *, uint32_t) are available, despite - * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 not being defined (which means the - * functions are defined in libgcc instead of being inlines). - */ -/* #undef JE_FORCE_SYNC_COMPARE_AND_SWAP_4 */ - -/* - * Defined if __sync_add_and_fetch(uint64_t *, uint64_t) and - * __sync_sub_and_fetch(uint64_t *, uint64_t) are available, despite - * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 not being defined (which means the - * functions are defined in libgcc instead of being inlines). - */ -/* #undef JE_FORCE_SYNC_COMPARE_AND_SWAP_8 */ +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_SYNC_ATOMICS 1 /* * Defined if __builtin_clz() and __builtin_clzl() are available. @@ -77,12 +67,6 @@ */ /* #undef JEMALLOC_OS_UNFAIR_LOCK */ -/* - * Defined if OSSpin*() functions are available, as provided by Darwin, and - * documented in the spinlock(3) manual page. - */ -/* #undef JEMALLOC_OSSPIN */ - /* Defined if syscall(2) is usable. */ #define JEMALLOC_USE_SYSCALL @@ -152,6 +136,9 @@ /* JEMALLOC_STATS enables statistics calculation. */ /* #undef JEMALLOC_STATS */ +/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */ +/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */ + /* JEMALLOC_PROF enables allocation profiling. */ /* #undef JEMALLOC_PROF */ @@ -233,12 +220,30 @@ #define JEMALLOC_INTERNAL_FFS __builtin_ffs /* + * popcount*() functions to use for bitmapping. + */ +#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl +#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount + +/* * If defined, explicitly attempt to more uniformly distribute large allocation * pointer alignments across all cache indices. */ #define JEMALLOC_CACHE_OBLIVIOUS /* + * If defined, enable logging facilities. We make this a configure option to + * avoid taking extra branches everywhere. + */ +/* #undef JEMALLOC_LOG */ + +/* + * If defined, use readlinkat() (instead of readlink()) to follow + * /etc/malloc_conf. + */ +/* #undef JEMALLOC_READLINKAT */ + +/* * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. */ /* #undef JEMALLOC_ZONE */ @@ -256,6 +261,12 @@ #define JEMALLOC_HAVE_MADVISE /* + * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE + * arguments to madvise(2). + */ +#define JEMALLOC_HAVE_MADVISE_HUGE + +/* * Methods for purging unused pages differ between operating systems. * * madvise(..., MADV_FREE) : This marks pages as being unused, such that they @@ -268,15 +279,23 @@ * MADV_FREE, though typically with higher * system overhead. */ -@JEM_MADFREE_DEF@ JEMALLOC_PURGE_MADVISE_FREE +@JEM_MADFREE_DEF@ JEMALLOC_PURGE_MADVISE_FREE #define JEMALLOC_PURGE_MADVISE_DONTNEED -#define JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS 1 +#define JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS + +/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */ +/* #undef JEMALLOC_DEFINE_MADVISE_FREE */ + +/* + * Defined if MADV_DO[NT]DUMP is supported as an argument to madvise. + */ +#define JEMALLOC_MADVISE_DONTDUMP /* * Defined if transparent huge pages (THPs) are supported via the * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled. */ -#define JEMALLOC_THP +/* #undef JEMALLOC_THP */ /* Define if operating system has alloca.h header. */ #define JEMALLOC_HAS_ALLOCA_H 1 @@ -337,8 +356,15 @@ /* If defined, jemalloc takes the malloc/free/etc. symbol names. */ #define JEMALLOC_IS_MALLOC 1 +/* + * Defined if strerror_r returns char * if _GNU_SOURCE is defined. + */ +#define JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE + +/* Performs additional safety checks when defined. */ +/* #undef JEMALLOC_OPT_SAFETY_CHECKS */ + /* sizeof(void *) == 2^LG_SIZEOF_PTR. */ #define LG_SIZEOF_PTR @JEM_SIZEDEF@ #endif /* JEMALLOC_INTERNAL_DEFS_H_ */ - diff --git a/deps/jemalloc/src/arena.c b/deps/jemalloc/src/arena.c index 632fce5233..ba50e41033 100644 --- a/deps/jemalloc/src/arena.c +++ b/deps/jemalloc/src/arena.c @@ -3,13 +3,16 @@ #include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/assert.h" +#include "jemalloc/internal/div.h" #include "jemalloc/internal/extent_dss.h" #include "jemalloc/internal/extent_mmap.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/rtree.h" -#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/safety_check.h" #include "jemalloc/internal/util.h" +JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS + /******************************************************************************/ /* Data. */ @@ -32,21 +35,6 @@ ssize_t opt_muzzy_decay_ms = MUZZY_DECAY_MS_DEFAULT; static atomic_zd_t dirty_decay_ms_default; static atomic_zd_t muzzy_decay_ms_default; -const arena_bin_info_t arena_bin_info[NBINS] = { -#define BIN_INFO_bin_yes(reg_size, slab_size, nregs) \ - {reg_size, slab_size, nregs, BITMAP_INFO_INITIALIZER(nregs)}, -#define BIN_INFO_bin_no(reg_size, slab_size, nregs) -#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, \ - lg_delta_lookup) \ - BIN_INFO_bin_##bin((1U<<lg_grp) + (ndelta<<lg_delta), \ - (pgs << LG_PAGE), (pgs << LG_PAGE) / ((1U<<lg_grp) + \ - (ndelta<<lg_delta))) - SIZE_CLASSES -#undef BIN_INFO_bin_yes -#undef BIN_INFO_bin_no -#undef SC -}; - const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = { #define STEP(step, h, x, y) \ h, @@ -54,6 +42,12 @@ const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = { #undef STEP }; +static div_info_t arena_binind_div_info[SC_NBINS]; + +size_t opt_oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT; +size_t oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT; +static unsigned huge_arena_ind; + /******************************************************************************/ /* * Function prototypes for static functions that are referenced prior to @@ -62,155 +56,16 @@ const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = { static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, extents_t *extents, bool all, size_t npages_limit, - bool is_background_thread); + size_t npages_decay_max, bool is_background_thread); static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all); static void arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab, - arena_bin_t *bin); + bin_t *bin); static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab, - arena_bin_t *bin); + bin_t *bin); /******************************************************************************/ -static bool -arena_stats_init(tsdn_t *tsdn, arena_stats_t *arena_stats) { - if (config_debug) { - for (size_t i = 0; i < sizeof(arena_stats_t); i++) { - assert(((char *)arena_stats)[i] == 0); - } - } -#ifndef JEMALLOC_ATOMIC_U64 - if (malloc_mutex_init(&arena_stats->mtx, "arena_stats", - WITNESS_RANK_ARENA_STATS, malloc_mutex_rank_exclusive)) { - return true; - } -#endif - /* Memory is zeroed, so there is no need to clear stats. */ - return false; -} - -static void -arena_stats_lock(tsdn_t *tsdn, arena_stats_t *arena_stats) { -#ifndef JEMALLOC_ATOMIC_U64 - malloc_mutex_lock(tsdn, &arena_stats->mtx); -#endif -} - -static void -arena_stats_unlock(tsdn_t *tsdn, arena_stats_t *arena_stats) { -#ifndef JEMALLOC_ATOMIC_U64 - malloc_mutex_unlock(tsdn, &arena_stats->mtx); -#endif -} - -static uint64_t -arena_stats_read_u64(tsdn_t *tsdn, arena_stats_t *arena_stats, - arena_stats_u64_t *p) { -#ifdef JEMALLOC_ATOMIC_U64 - return atomic_load_u64(p, ATOMIC_RELAXED); -#else - malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); - return *p; -#endif -} - -static void -arena_stats_add_u64(tsdn_t *tsdn, arena_stats_t *arena_stats, - arena_stats_u64_t *p, uint64_t x) { -#ifdef JEMALLOC_ATOMIC_U64 - atomic_fetch_add_u64(p, x, ATOMIC_RELAXED); -#else - malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); - *p += x; -#endif -} - -UNUSED static void -arena_stats_sub_u64(tsdn_t *tsdn, arena_stats_t *arena_stats, - arena_stats_u64_t *p, uint64_t x) { -#ifdef JEMALLOC_ATOMIC_U64 - UNUSED uint64_t r = atomic_fetch_sub_u64(p, x, ATOMIC_RELAXED); - assert(r - x <= r); -#else - malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); - *p -= x; - assert(*p + x >= *p); -#endif -} - -/* - * Non-atomically sets *dst += src. *dst needs external synchronization. - * This lets us avoid the cost of a fetch_add when its unnecessary (note that - * the types here are atomic). - */ -static void -arena_stats_accum_u64(arena_stats_u64_t *dst, uint64_t src) { -#ifdef JEMALLOC_ATOMIC_U64 - uint64_t cur_dst = atomic_load_u64(dst, ATOMIC_RELAXED); - atomic_store_u64(dst, src + cur_dst, ATOMIC_RELAXED); -#else - *dst += src; -#endif -} - -static size_t -arena_stats_read_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p) { -#ifdef JEMALLOC_ATOMIC_U64 - return atomic_load_zu(p, ATOMIC_RELAXED); -#else - malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); - return atomic_load_zu(p, ATOMIC_RELAXED); -#endif -} - -static void -arena_stats_add_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p, - size_t x) { -#ifdef JEMALLOC_ATOMIC_U64 - atomic_fetch_add_zu(p, x, ATOMIC_RELAXED); -#else - malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); - size_t cur = atomic_load_zu(p, ATOMIC_RELAXED); - atomic_store_zu(p, cur + x, ATOMIC_RELAXED); -#endif -} - -static void -arena_stats_sub_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p, - size_t x) { -#ifdef JEMALLOC_ATOMIC_U64 - UNUSED size_t r = atomic_fetch_sub_zu(p, x, ATOMIC_RELAXED); - assert(r - x <= r); -#else - malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); - size_t cur = atomic_load_zu(p, ATOMIC_RELAXED); - atomic_store_zu(p, cur - x, ATOMIC_RELAXED); -#endif -} - -/* Like the _u64 variant, needs an externally synchronized *dst. */ -static void -arena_stats_accum_zu(atomic_zu_t *dst, size_t src) { - size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED); - atomic_store_zu(dst, src + cur_dst, ATOMIC_RELAXED); -} - -void -arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats, - szind_t szind, uint64_t nrequests) { - arena_stats_lock(tsdn, arena_stats); - arena_stats_add_u64(tsdn, arena_stats, &arena_stats->lstats[szind - - NBINS].nrequests, nrequests); - arena_stats_unlock(tsdn, arena_stats); -} - -void -arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t size) { - arena_stats_lock(tsdn, arena_stats); - arena_stats_add_zu(tsdn, arena_stats, &arena_stats->mapped, size); - arena_stats_unlock(tsdn, arena_stats); -} - void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms, @@ -228,15 +83,16 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats, - malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats) { + bin_stats_t *bstats, arena_stats_large_t *lstats, + arena_stats_extents_t *estats) { cassert(config_stats); arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_ms, muzzy_decay_ms, nactive, ndirty, nmuzzy); - size_t base_allocated, base_resident, base_mapped; + size_t base_allocated, base_resident, base_mapped, metadata_thp; base_stats_get(tsdn, arena->base, &base_allocated, &base_resident, - &base_mapped); + &base_mapped, &metadata_thp); arena_stats_lock(tsdn, &arena->stats); @@ -245,6 +101,10 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, arena_stats_accum_zu(&astats->retained, extents_npages_get(&arena->extents_retained) << LG_PAGE); + atomic_store_zu(&astats->extent_avail, + atomic_load_zu(&arena->extent_avail_cnt, ATOMIC_RELAXED), + ATOMIC_RELAXED); + arena_stats_accum_u64(&astats->decay_dirty.npurge, arena_stats_read_u64(tsdn, &arena->stats, &arena->stats.decay_dirty.npurge)); @@ -267,12 +127,15 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, arena_stats_accum_zu(&astats->base, base_allocated); arena_stats_accum_zu(&astats->internal, arena_internal_get(arena)); + arena_stats_accum_zu(&astats->metadata_thp, metadata_thp); arena_stats_accum_zu(&astats->resident, base_resident + (((atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) + extents_npages_get(&arena->extents_dirty) + extents_npages_get(&arena->extents_muzzy)) << LG_PAGE))); + arena_stats_accum_zu(&astats->abandoned_vm, atomic_load_zu( + &arena->stats.abandoned_vm, ATOMIC_RELAXED)); - for (szind_t i = 0; i < NSIZES - NBINS; i++) { + for (szind_t i = 0; i < SC_NSIZES - SC_NBINS; i++) { uint64_t nmalloc = arena_stats_read_u64(tsdn, &arena->stats, &arena->stats.lstats[i].nmalloc); arena_stats_accum_u64(&lstats[i].nmalloc, nmalloc); @@ -290,12 +153,43 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, arena_stats_accum_u64(&astats->nrequests_large, nmalloc + nrequests); + /* nfill == nmalloc for large currently. */ + arena_stats_accum_u64(&lstats[i].nfills, nmalloc); + arena_stats_accum_u64(&astats->nfills_large, nmalloc); + + uint64_t nflush = arena_stats_read_u64(tsdn, &arena->stats, + &arena->stats.lstats[i].nflushes); + arena_stats_accum_u64(&lstats[i].nflushes, nflush); + arena_stats_accum_u64(&astats->nflushes_large, nflush); + assert(nmalloc >= ndalloc); assert(nmalloc - ndalloc <= SIZE_T_MAX); size_t curlextents = (size_t)(nmalloc - ndalloc); lstats[i].curlextents += curlextents; arena_stats_accum_zu(&astats->allocated_large, - curlextents * sz_index2size(NBINS + i)); + curlextents * sz_index2size(SC_NBINS + i)); + } + + for (pszind_t i = 0; i < SC_NPSIZES; i++) { + size_t dirty, muzzy, retained, dirty_bytes, muzzy_bytes, + retained_bytes; + dirty = extents_nextents_get(&arena->extents_dirty, i); + muzzy = extents_nextents_get(&arena->extents_muzzy, i); + retained = extents_nextents_get(&arena->extents_retained, i); + dirty_bytes = extents_nbytes_get(&arena->extents_dirty, i); + muzzy_bytes = extents_nbytes_get(&arena->extents_muzzy, i); + retained_bytes = + extents_nbytes_get(&arena->extents_retained, i); + + atomic_store_zu(&estats[i].ndirty, dirty, ATOMIC_RELAXED); + atomic_store_zu(&estats[i].nmuzzy, muzzy, ATOMIC_RELAXED); + atomic_store_zu(&estats[i].nretained, retained, ATOMIC_RELAXED); + atomic_store_zu(&estats[i].dirty_bytes, dirty_bytes, + ATOMIC_RELAXED); + atomic_store_zu(&estats[i].muzzy_bytes, muzzy_bytes, + ATOMIC_RELAXED); + atomic_store_zu(&estats[i].retained_bytes, retained_bytes, + ATOMIC_RELAXED); } arena_stats_unlock(tsdn, &arena->stats); @@ -303,16 +197,16 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, /* tcache_bytes counts currently cached bytes. */ atomic_store_zu(&astats->tcache_bytes, 0, ATOMIC_RELAXED); malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx); - tcache_t *tcache; - ql_foreach(tcache, &arena->tcache_ql, link) { + cache_bin_array_descriptor_t *descriptor; + ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) { szind_t i = 0; - for (; i < NBINS; i++) { - tcache_bin_t *tbin = tcache_small_bin_get(tcache, i); + for (; i < SC_NBINS; i++) { + cache_bin_t *tbin = &descriptor->bins_small[i]; arena_stats_accum_zu(&astats->tcache_bytes, tbin->ncached * sz_index2size(i)); } for (; i < nhbins; i++) { - tcache_bin_t *tbin = tcache_large_bin_get(tcache, i); + cache_bin_t *tbin = &descriptor->bins_large[i]; arena_stats_accum_zu(&astats->tcache_bytes, tbin->ncached * sz_index2size(i)); } @@ -350,21 +244,11 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, nstime_update(&astats->uptime); nstime_subtract(&astats->uptime, &arena->create_time); - for (szind_t i = 0; i < NBINS; i++) { - arena_bin_t *bin = &arena->bins[i]; - - malloc_mutex_lock(tsdn, &bin->lock); - malloc_mutex_prof_read(tsdn, &bstats[i].mutex_data, &bin->lock); - bstats[i].nmalloc += bin->stats.nmalloc; - bstats[i].ndalloc += bin->stats.ndalloc; - bstats[i].nrequests += bin->stats.nrequests; - bstats[i].curregs += bin->stats.curregs; - bstats[i].nfills += bin->stats.nfills; - bstats[i].nflushes += bin->stats.nflushes; - bstats[i].nslabs += bin->stats.nslabs; - bstats[i].reslabs += bin->stats.reslabs; - bstats[i].curslabs += bin->stats.curslabs; - malloc_mutex_unlock(tsdn, &bin->lock); + for (szind_t i = 0; i < SC_NBINS; i++) { + for (unsigned j = 0; j < bin_infos[i].n_shards; j++) { + bin_stats_merge(tsdn, &bstats[i], + &arena->bins[i].bin_shards[j]); + } } } @@ -384,8 +268,7 @@ arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena, } static void * -arena_slab_reg_alloc(tsdn_t *tsdn, extent_t *slab, - const arena_bin_info_t *bin_info) { +arena_slab_reg_alloc(extent_t *slab, const bin_info_t *bin_info) { void *ret; arena_slab_data_t *slab_data = extent_slab_data_get(slab); size_t regind; @@ -400,6 +283,54 @@ arena_slab_reg_alloc(tsdn_t *tsdn, extent_t *slab, return ret; } +static void +arena_slab_reg_alloc_batch(extent_t *slab, const bin_info_t *bin_info, + unsigned cnt, void** ptrs) { + arena_slab_data_t *slab_data = extent_slab_data_get(slab); + + assert(extent_nfree_get(slab) >= cnt); + assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info)); + +#if (! defined JEMALLOC_INTERNAL_POPCOUNTL) || (defined BITMAP_USE_TREE) + for (unsigned i = 0; i < cnt; i++) { + size_t regind = bitmap_sfu(slab_data->bitmap, + &bin_info->bitmap_info); + *(ptrs + i) = (void *)((uintptr_t)extent_addr_get(slab) + + (uintptr_t)(bin_info->reg_size * regind)); + } +#else + unsigned group = 0; + bitmap_t g = slab_data->bitmap[group]; + unsigned i = 0; + while (i < cnt) { + while (g == 0) { + g = slab_data->bitmap[++group]; + } + size_t shift = group << LG_BITMAP_GROUP_NBITS; + size_t pop = popcount_lu(g); + if (pop > (cnt - i)) { + pop = cnt - i; + } + + /* + * Load from memory locations only once, outside the + * hot loop below. + */ + uintptr_t base = (uintptr_t)extent_addr_get(slab); + uintptr_t regsize = (uintptr_t)bin_info->reg_size; + while (pop--) { + size_t bit = cfs_lu(&g); + size_t regind = shift + bit; + *(ptrs + i) = (void *)(base + regsize * regind); + + i++; + } + slab_data->bitmap[group] = g; + } +#endif + extent_nfree_sub(slab, cnt); +} + #ifndef JEMALLOC_JET static #endif @@ -412,37 +343,22 @@ arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr) { assert((uintptr_t)ptr < (uintptr_t)extent_past_get(slab)); /* Freeing an interior pointer can cause assertion failure. */ assert(((uintptr_t)ptr - (uintptr_t)extent_addr_get(slab)) % - (uintptr_t)arena_bin_info[binind].reg_size == 0); + (uintptr_t)bin_infos[binind].reg_size == 0); - /* Avoid doing division with a variable divisor. */ diff = (size_t)((uintptr_t)ptr - (uintptr_t)extent_addr_get(slab)); - switch (binind) { -#define REGIND_bin_yes(index, reg_size) \ - case index: \ - regind = diff / (reg_size); \ - assert(diff == regind * (reg_size)); \ - break; -#define REGIND_bin_no(index, reg_size) -#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, \ - lg_delta_lookup) \ - REGIND_bin_##bin(index, (1U<<lg_grp) + (ndelta<<lg_delta)) - SIZE_CLASSES -#undef REGIND_bin_yes -#undef REGIND_bin_no -#undef SC - default: not_reached(); - } - - assert(regind < arena_bin_info[binind].nregs); + + /* Avoid doing division with a variable divisor. */ + regind = div_compute(&arena_binind_div_info[binind], diff); + + assert(regind < bin_infos[binind].nregs); return regind; } static void -arena_slab_reg_dalloc(tsdn_t *tsdn, extent_t *slab, - arena_slab_data_t *slab_data, void *ptr) { +arena_slab_reg_dalloc(extent_t *slab, arena_slab_data_t *slab_data, void *ptr) { szind_t binind = extent_szind_get(slab); - const arena_bin_info_t *bin_info = &arena_bin_info[binind]; + const bin_info_t *bin_info = &bin_infos[binind]; size_t regind = arena_slab_regind(slab, binind, ptr); assert(extent_nfree_get(slab) < bin_info->nregs); @@ -470,11 +386,11 @@ arena_large_malloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) { cassert(config_stats); - if (usize < LARGE_MINCLASS) { - usize = LARGE_MINCLASS; + if (usize < SC_LARGE_MINCLASS) { + usize = SC_LARGE_MINCLASS; } index = sz_size2index(usize); - hindex = (index >= NBINS) ? index - NBINS : 0; + hindex = (index >= SC_NBINS) ? index - SC_NBINS : 0; arena_stats_add_u64(tsdn, &arena->stats, &arena->stats.lstats[hindex].nmalloc, 1); @@ -486,11 +402,11 @@ arena_large_dalloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) { cassert(config_stats); - if (usize < LARGE_MINCLASS) { - usize = LARGE_MINCLASS; + if (usize < SC_LARGE_MINCLASS) { + usize = SC_LARGE_MINCLASS; } index = sz_size2index(usize); - hindex = (index >= NBINS) ? index - NBINS : 0; + hindex = (index >= SC_NBINS) ? index - SC_NBINS : 0; arena_stats_add_u64(tsdn, &arena->stats, &arena->stats.lstats[hindex].ndalloc, 1); @@ -503,6 +419,11 @@ arena_large_ralloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t oldusize, arena_large_malloc_stats_update(tsdn, arena, usize); } +static bool +arena_may_have_muzzy(arena_t *arena) { + return (pages_can_purge_lazy && (arena_muzzy_decay_ms_get(arena) != 0)); +} + extent_t * arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool *zero) { @@ -517,7 +438,7 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize, extent_t *extent = extents_alloc(tsdn, arena, &extent_hooks, &arena->extents_dirty, NULL, usize, sz_large_pad, alignment, false, szind, zero, &commit); - if (extent == NULL) { + if (extent == NULL && arena_may_have_muzzy(arena)) { extent = extents_alloc(tsdn, arena, &extent_hooks, &arena->extents_muzzy, NULL, usize, sz_large_pad, alignment, false, szind, zero, &commit); @@ -692,7 +613,8 @@ arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, bool is_background_thread) { if (current_npages > npages_limit) { arena_decay_to_limit(tsdn, arena, decay, extents, false, - npages_limit, is_background_thread); + npages_limit, current_npages - npages_limit, + is_background_thread); } } @@ -738,7 +660,7 @@ arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, } static void -arena_decay_reinit(arena_decay_t *decay, extents_t *extents, ssize_t decay_ms) { +arena_decay_reinit(arena_decay_t *decay, ssize_t decay_ms) { arena_decay_ms_write(decay, decay_ms); if (decay_ms > 0) { nstime_init(&decay->interval, (uint64_t)decay_ms * @@ -755,8 +677,8 @@ arena_decay_reinit(arena_decay_t *decay, extents_t *extents, ssize_t decay_ms) { } static bool -arena_decay_init(arena_decay_t *decay, extents_t *extents, ssize_t decay_ms, - decay_stats_t *stats) { +arena_decay_init(arena_decay_t *decay, ssize_t decay_ms, + arena_stats_decay_t *stats) { if (config_debug) { for (size_t i = 0; i < sizeof(arena_decay_t); i++) { assert(((char *)decay)[i] == 0); @@ -768,7 +690,7 @@ arena_decay_init(arena_decay_t *decay, extents_t *extents, ssize_t decay_ms, return true; } decay->purging = false; - arena_decay_reinit(decay, extents, decay_ms); + arena_decay_reinit(decay, decay_ms); /* Memory is zeroed, so there is no need to clear stats. */ if (config_stats) { decay->stats = stats; @@ -798,7 +720,8 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, if (decay_ms <= 0) { if (decay_ms == 0) { arena_decay_to_limit(tsdn, arena, decay, extents, false, - 0, is_background_thread); + 0, extents_npages_get(extents), + is_background_thread); } return false; } @@ -876,7 +799,7 @@ arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, * infrequent, either between the {-1, 0, >0} states, or a one-time * arbitrary change during initial arena configuration. */ - arena_decay_reinit(decay, extents, decay_ms); + arena_decay_reinit(decay, decay_ms); arena_maybe_decay(tsdn, arena, decay, extents, false); malloc_mutex_unlock(tsdn, &decay->mtx); @@ -900,14 +823,15 @@ arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena, static size_t arena_stash_decayed(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, extents_t *extents, size_t npages_limit, - extent_list_t *decay_extents) { + size_t npages_decay_max, extent_list_t *decay_extents) { witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); /* Stash extents according to npages_limit. */ size_t nstashed = 0; extent_t *extent; - while ((extent = extents_evict(tsdn, arena, r_extent_hooks, extents, + while (nstashed < npages_decay_max && + (extent = extents_evict(tsdn, arena, r_extent_hooks, extents, npages_limit)) != NULL) { extent_list_append(decay_extents, extent); nstashed += extent_size_get(extent) >> LG_PAGE; @@ -919,7 +843,7 @@ static size_t arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, arena_decay_t *decay, extents_t *extents, bool all, extent_list_t *decay_extents, bool is_background_thread) { - UNUSED size_t nmadvise, nunmapped; + size_t nmadvise, nunmapped; size_t npurged; if (config_stats) { @@ -982,12 +906,15 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, } /* - * npages_limit: Decay as many dirty extents as possible without violating the - * invariant: (extents_npages_get(extents) >= npages_limit) + * npages_limit: Decay at most npages_decay_max pages without violating the + * invariant: (extents_npages_get(extents) >= npages_limit). We need an upper + * bound on number of pages in order to prevent unbounded growth (namely in + * stashed), otherwise unbounded new pages could be added to extents during the + * current decay run, so that the purging thread never finishes. */ static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, - extents_t *extents, bool all, size_t npages_limit, + extents_t *extents, bool all, size_t npages_limit, size_t npages_decay_max, bool is_background_thread) { witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 1); @@ -1005,9 +932,9 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, extent_list_init(&decay_extents); size_t npurge = arena_stash_decayed(tsdn, arena, &extent_hooks, extents, - npages_limit, &decay_extents); + npages_limit, npages_decay_max, &decay_extents); if (npurge != 0) { - UNUSED size_t npurged = arena_decay_stashed(tsdn, arena, + size_t npurged = arena_decay_stashed(tsdn, arena, &extent_hooks, decay, extents, all, &decay_extents, is_background_thread); assert(npurged == npurge); @@ -1023,7 +950,7 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, if (all) { malloc_mutex_lock(tsdn, &decay->mtx); arena_decay_to_limit(tsdn, arena, decay, extents, all, 0, - is_background_thread); + extents_npages_get(extents), is_background_thread); malloc_mutex_unlock(tsdn, &decay->mtx); return false; @@ -1045,7 +972,8 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, if (have_background_thread && background_thread_enabled() && epoch_advanced && !is_background_thread) { - background_thread_interval_check(tsdn, arena, decay, npages_new); + background_thread_interval_check(tsdn, arena, decay, + npages_new); } return false; @@ -1082,30 +1010,37 @@ arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *slab) { } static void -arena_bin_slabs_nonfull_insert(arena_bin_t *bin, extent_t *slab) { +arena_bin_slabs_nonfull_insert(bin_t *bin, extent_t *slab) { assert(extent_nfree_get(slab) > 0); extent_heap_insert(&bin->slabs_nonfull, slab); + if (config_stats) { + bin->stats.nonfull_slabs++; + } } static void -arena_bin_slabs_nonfull_remove(arena_bin_t *bin, extent_t *slab) { +arena_bin_slabs_nonfull_remove(bin_t *bin, extent_t *slab) { extent_heap_remove(&bin->slabs_nonfull, slab); + if (config_stats) { + bin->stats.nonfull_slabs--; + } } static extent_t * -arena_bin_slabs_nonfull_tryget(arena_bin_t *bin) { +arena_bin_slabs_nonfull_tryget(bin_t *bin) { extent_t *slab = extent_heap_remove_first(&bin->slabs_nonfull); if (slab == NULL) { return NULL; } if (config_stats) { bin->stats.reslabs++; + bin->stats.nonfull_slabs--; } return slab; } static void -arena_bin_slabs_full_insert(arena_t *arena, arena_bin_t *bin, extent_t *slab) { +arena_bin_slabs_full_insert(arena_t *arena, bin_t *bin, extent_t *slab) { assert(extent_nfree_get(slab) == 0); /* * Tracking extents is required by arena_reset, which is not allowed @@ -1119,13 +1054,44 @@ arena_bin_slabs_full_insert(arena_t *arena, arena_bin_t *bin, extent_t *slab) { } static void -arena_bin_slabs_full_remove(arena_t *arena, arena_bin_t *bin, extent_t *slab) { +arena_bin_slabs_full_remove(arena_t *arena, bin_t *bin, extent_t *slab) { if (arena_is_auto(arena)) { return; } extent_list_remove(&bin->slabs_full, slab); } +static void +arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin) { + extent_t *slab; + + malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); + if (bin->slabcur != NULL) { + slab = bin->slabcur; + bin->slabcur = NULL; + malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock); + arena_slab_dalloc(tsd_tsdn(tsd), arena, slab); + malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); + } + while ((slab = extent_heap_remove_first(&bin->slabs_nonfull)) != NULL) { + malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock); + arena_slab_dalloc(tsd_tsdn(tsd), arena, slab); + malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); + } + for (slab = extent_list_first(&bin->slabs_full); slab != NULL; + slab = extent_list_first(&bin->slabs_full)) { + arena_bin_slabs_full_remove(arena, bin, slab); + malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock); + arena_slab_dalloc(tsd_tsdn(tsd), arena, slab); + malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); + } + if (config_stats) { + bin->stats.curregs = 0; + bin->stats.curslabs = 0; + } + malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock); +} + void arena_reset(tsd_t *tsd, arena_t *arena) { /* @@ -1155,7 +1121,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) { rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd); rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx, (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab); - assert(alloc_ctx.szind != NSIZES); + assert(alloc_ctx.szind != SC_NSIZES); if (config_stats || (config_prof && opt_prof)) { usize = sz_index2size(alloc_ctx.szind); @@ -1171,35 +1137,11 @@ arena_reset(tsd_t *tsd, arena_t *arena) { malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx); /* Bins. */ - for (unsigned i = 0; i < NBINS; i++) { - extent_t *slab; - arena_bin_t *bin = &arena->bins[i]; - malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); - if (bin->slabcur != NULL) { - slab = bin->slabcur; - bin->slabcur = NULL; - malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock); - arena_slab_dalloc(tsd_tsdn(tsd), arena, slab); - malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); - } - while ((slab = extent_heap_remove_first(&bin->slabs_nonfull)) != - NULL) { - malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock); - arena_slab_dalloc(tsd_tsdn(tsd), arena, slab); - malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); - } - for (slab = extent_list_first(&bin->slabs_full); slab != NULL; - slab = extent_list_first(&bin->slabs_full)) { - arena_bin_slabs_full_remove(arena, bin, slab); - malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock); - arena_slab_dalloc(tsd_tsdn(tsd), arena, slab); - malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); - } - if (config_stats) { - bin->stats.curregs = 0; - bin->stats.curslabs = 0; + for (unsigned i = 0; i < SC_NBINS; i++) { + for (unsigned j = 0; j < bin_infos[i].n_shards; j++) { + arena_bin_reset(tsd, arena, + &arena->bins[i].bin_shards[j]); } - malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock); } atomic_store_zu(&arena->nactive, 0, ATOMIC_RELAXED); @@ -1262,7 +1204,7 @@ arena_destroy(tsd_t *tsd, arena_t *arena) { static extent_t * arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, - extent_hooks_t **r_extent_hooks, const arena_bin_info_t *bin_info, + extent_hooks_t **r_extent_hooks, const bin_info_t *bin_info, szind_t szind) { extent_t *slab; bool zero, commit; @@ -1284,8 +1226,8 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, } static extent_t * -arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, - const arena_bin_info_t *bin_info) { +arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard, + const bin_info_t *bin_info) { witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); @@ -1296,7 +1238,7 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, extent_t *slab = extents_alloc(tsdn, arena, &extent_hooks, &arena->extents_dirty, NULL, bin_info->slab_size, 0, PAGE, true, binind, &zero, &commit); - if (slab == NULL) { + if (slab == NULL && arena_may_have_muzzy(arena)) { slab = extents_alloc(tsdn, arena, &extent_hooks, &arena->extents_muzzy, NULL, bin_info->slab_size, 0, PAGE, true, binind, &zero, &commit); @@ -1312,7 +1254,7 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, /* Initialize slab internals. */ arena_slab_data_t *slab_data = extent_slab_data_get(slab); - extent_nfree_set(slab, bin_info->nregs); + extent_nfree_binshard_set(slab, bin_info->nregs, binshard); bitmap_init(slab_data->bitmap, &bin_info->bitmap_info, false); arena_nactive_add(arena, extent_size_get(slab) >> LG_PAGE); @@ -1321,10 +1263,10 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, } static extent_t * -arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin, - szind_t binind) { +arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, bin_t *bin, + szind_t binind, unsigned binshard) { extent_t *slab; - const arena_bin_info_t *bin_info; + const bin_info_t *bin_info; /* Look for a usable slab. */ slab = arena_bin_slabs_nonfull_tryget(bin); @@ -1333,12 +1275,12 @@ arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin, } /* No existing slabs have any space available. */ - bin_info = &arena_bin_info[binind]; + bin_info = &bin_infos[binind]; /* Allocate a new slab. */ malloc_mutex_unlock(tsdn, &bin->lock); /******************************/ - slab = arena_slab_alloc(tsdn, arena, binind, bin_info); + slab = arena_slab_alloc(tsdn, arena, binind, binshard, bin_info); /********************************/ malloc_mutex_lock(tsdn, &bin->lock); if (slab != NULL) { @@ -1364,24 +1306,24 @@ arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin, /* Re-fill bin->slabcur, then call arena_slab_reg_alloc(). */ static void * -arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin, - szind_t binind) { - const arena_bin_info_t *bin_info; +arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, bin_t *bin, + szind_t binind, unsigned binshard) { + const bin_info_t *bin_info; extent_t *slab; - bin_info = &arena_bin_info[binind]; + bin_info = &bin_infos[binind]; if (!arena_is_auto(arena) && bin->slabcur != NULL) { arena_bin_slabs_full_insert(arena, bin, bin->slabcur); bin->slabcur = NULL; } - slab = arena_bin_nonfull_slab_get(tsdn, arena, bin, binind); + slab = arena_bin_nonfull_slab_get(tsdn, arena, bin, binind, binshard); if (bin->slabcur != NULL) { /* * Another thread updated slabcur while this one ran without the * bin lock in arena_bin_nonfull_slab_get(). */ if (extent_nfree_get(bin->slabcur) > 0) { - void *ret = arena_slab_reg_alloc(tsdn, bin->slabcur, + void *ret = arena_slab_reg_alloc(bin->slabcur, bin_info); if (slab != NULL) { /* @@ -1415,51 +1357,78 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin, assert(extent_nfree_get(bin->slabcur) > 0); - return arena_slab_reg_alloc(tsdn, slab, bin_info); + return arena_slab_reg_alloc(slab, bin_info); +} + +/* Choose a bin shard and return the locked bin. */ +bin_t * +arena_bin_choose_lock(tsdn_t *tsdn, arena_t *arena, szind_t binind, + unsigned *binshard) { + bin_t *bin; + if (tsdn_null(tsdn) || tsd_arena_get(tsdn_tsd(tsdn)) == NULL) { + *binshard = 0; + } else { + *binshard = tsd_binshardsp_get(tsdn_tsd(tsdn))->binshard[binind]; + } + assert(*binshard < bin_infos[binind].n_shards); + bin = &arena->bins[binind].bin_shards[*binshard]; + malloc_mutex_lock(tsdn, &bin->lock); + + return bin; } void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, - tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) { - unsigned i, nfill; - arena_bin_t *bin; + cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) { + unsigned i, nfill, cnt; assert(tbin->ncached == 0); if (config_prof && arena_prof_accum(tsdn, arena, prof_accumbytes)) { prof_idump(tsdn); } - bin = &arena->bins[binind]; - malloc_mutex_lock(tsdn, &bin->lock); + + unsigned binshard; + bin_t *bin = arena_bin_choose_lock(tsdn, arena, binind, &binshard); + for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >> - tcache->lg_fill_div[binind]); i < nfill; i++) { + tcache->lg_fill_div[binind]); i < nfill; i += cnt) { extent_t *slab; - void *ptr; if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) > 0) { - ptr = arena_slab_reg_alloc(tsdn, slab, - &arena_bin_info[binind]); + unsigned tofill = nfill - i; + cnt = tofill < extent_nfree_get(slab) ? + tofill : extent_nfree_get(slab); + arena_slab_reg_alloc_batch( + slab, &bin_infos[binind], cnt, + tbin->avail - nfill + i); } else { - ptr = arena_bin_malloc_hard(tsdn, arena, bin, binind); - } - if (ptr == NULL) { + cnt = 1; + void *ptr = arena_bin_malloc_hard(tsdn, arena, bin, + binind, binshard); /* * OOM. tbin->avail isn't yet filled down to its first * element, so the successful allocations (if any) must * be moved just before tbin->avail before bailing out. */ - if (i > 0) { - memmove(tbin->avail - i, tbin->avail - nfill, - i * sizeof(void *)); + if (ptr == NULL) { + if (i > 0) { + memmove(tbin->avail - i, + tbin->avail - nfill, + i * sizeof(void *)); + } + break; } - break; + /* Insert such that low regions get used first. */ + *(tbin->avail - nfill + i) = ptr; } if (config_fill && unlikely(opt_junk_alloc)) { - arena_alloc_junk_small(ptr, &arena_bin_info[binind], - true); + for (unsigned j = 0; j < cnt; j++) { + void* ptr = *(tbin->avail - nfill + i + j); + arena_alloc_junk_small(ptr, &bin_infos[binind], + true); + } } - /* Insert such that low regions get used first. */ - *(tbin->avail - nfill + i) = ptr; } if (config_stats) { bin->stats.nmalloc += i; @@ -1474,14 +1443,14 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, } void -arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info, bool zero) { +arena_alloc_junk_small(void *ptr, const bin_info_t *bin_info, bool zero) { if (!zero) { memset(ptr, JEMALLOC_ALLOC_JUNK, bin_info->reg_size); } } static void -arena_dalloc_junk_small_impl(void *ptr, const arena_bin_info_t *bin_info) { +arena_dalloc_junk_small_impl(void *ptr, const bin_info_t *bin_info) { memset(ptr, JEMALLOC_FREE_JUNK, bin_info->reg_size); } arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small = @@ -1490,19 +1459,19 @@ arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small = static void * arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) { void *ret; - arena_bin_t *bin; + bin_t *bin; size_t usize; extent_t *slab; - assert(binind < NBINS); - bin = &arena->bins[binind]; + assert(binind < SC_NBINS); usize = sz_index2size(binind); + unsigned binshard; + bin = arena_bin_choose_lock(tsdn, arena, binind, &binshard); - malloc_mutex_lock(tsdn, &bin->lock); if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) > 0) { - ret = arena_slab_reg_alloc(tsdn, slab, &arena_bin_info[binind]); + ret = arena_slab_reg_alloc(slab, &bin_infos[binind]); } else { - ret = arena_bin_malloc_hard(tsdn, arena, bin, binind); + ret = arena_bin_malloc_hard(tsdn, arena, bin, binind, binshard); } if (ret == NULL) { @@ -1524,14 +1493,14 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) { if (config_fill) { if (unlikely(opt_junk_alloc)) { arena_alloc_junk_small(ret, - &arena_bin_info[binind], false); + &bin_infos[binind], false); } else if (unlikely(opt_zero)) { memset(ret, 0, usize); } } } else { if (config_fill && unlikely(opt_junk_alloc)) { - arena_alloc_junk_small(ret, &arena_bin_info[binind], + arena_alloc_junk_small(ret, &bin_infos[binind], true); } memset(ret, 0, usize); @@ -1547,13 +1516,13 @@ arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, assert(!tsdn_null(tsdn) || arena != NULL); if (likely(!tsdn_null(tsdn))) { - arena = arena_choose(tsdn_tsd(tsdn), arena); + arena = arena_choose_maybe_huge(tsdn_tsd(tsdn), arena, size); } if (unlikely(arena == NULL)) { return NULL; } - if (likely(size <= SMALL_MAXCLASS)) { + if (likely(size <= SC_SMALL_MAXCLASS)) { return arena_malloc_small(tsdn, arena, ind, zero); } return large_malloc(tsdn, arena, sz_index2size(ind), zero); @@ -1564,8 +1533,9 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero, tcache_t *tcache) { void *ret; - if (usize <= SMALL_MAXCLASS && (alignment < PAGE || (alignment == PAGE - && (usize & PAGE_MASK) == 0))) { + if (usize <= SC_SMALL_MAXCLASS + && (alignment < PAGE + || (alignment == PAGE && (usize & PAGE_MASK) == 0))) { /* Small; alignment doesn't require special slab placement. */ ret = arena_malloc(tsdn, arena, usize, sz_size2index(usize), zero, tcache, true); @@ -1580,11 +1550,15 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, } void -arena_prof_promote(tsdn_t *tsdn, const void *ptr, size_t usize) { +arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) { cassert(config_prof); assert(ptr != NULL); - assert(isalloc(tsdn, ptr) == LARGE_MINCLASS); - assert(usize <= SMALL_MAXCLASS); + assert(isalloc(tsdn, ptr) == SC_LARGE_MINCLASS); + assert(usize <= SC_SMALL_MAXCLASS); + + if (config_opt_safety_checks) { + safety_check_set_redzone(ptr, usize, SC_LARGE_MINCLASS); + } rtree_ctx_t rtree_ctx_fallback; rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback); @@ -1608,15 +1582,15 @@ arena_prof_demote(tsdn_t *tsdn, extent_t *extent, const void *ptr) { cassert(config_prof); assert(ptr != NULL); - extent_szind_set(extent, NBINS); + extent_szind_set(extent, SC_NBINS); rtree_ctx_t rtree_ctx_fallback; rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback); rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr, - NBINS, false); + SC_NBINS, false); - assert(isalloc(tsdn, ptr) == LARGE_MINCLASS); + assert(isalloc(tsdn, ptr) == SC_LARGE_MINCLASS); - return LARGE_MINCLASS; + return SC_LARGE_MINCLASS; } void @@ -1626,23 +1600,32 @@ arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache, assert(opt_prof); extent_t *extent = iealloc(tsdn, ptr); - size_t usize = arena_prof_demote(tsdn, extent, ptr); - if (usize <= tcache_maxclass) { + size_t usize = extent_usize_get(extent); + size_t bumped_usize = arena_prof_demote(tsdn, extent, ptr); + if (config_opt_safety_checks && usize < SC_LARGE_MINCLASS) { + /* + * Currently, we only do redzoning for small sampled + * allocations. + */ + assert(bumped_usize == SC_LARGE_MINCLASS); + safety_check_verify_redzone(ptr, usize, bumped_usize); + } + if (bumped_usize <= tcache_maxclass && tcache != NULL) { tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, - sz_size2index(usize), slow_path); + sz_size2index(bumped_usize), slow_path); } else { large_dalloc(tsdn, extent); } } static void -arena_dissociate_bin_slab(arena_t *arena, extent_t *slab, arena_bin_t *bin) { +arena_dissociate_bin_slab(arena_t *arena, extent_t *slab, bin_t *bin) { /* Dissociate slab from bin. */ if (slab == bin->slabcur) { bin->slabcur = NULL; } else { szind_t binind = extent_szind_get(slab); - const arena_bin_info_t *bin_info = &arena_bin_info[binind]; + const bin_info_t *bin_info = &bin_infos[binind]; /* * The following block's conditional is necessary because if the @@ -1659,7 +1642,7 @@ arena_dissociate_bin_slab(arena_t *arena, extent_t *slab, arena_bin_t *bin) { static void arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab, - arena_bin_t *bin) { + bin_t *bin) { assert(slab != bin->slabcur); malloc_mutex_unlock(tsdn, &bin->lock); @@ -1674,7 +1657,7 @@ arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab, static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab, - arena_bin_t *bin) { + bin_t *bin) { assert(extent_nfree_get(slab) > 0); /* @@ -1700,18 +1683,16 @@ arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab, } static void -arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, extent_t *slab, - void *ptr, bool junked) { +arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin, + szind_t binind, extent_t *slab, void *ptr, bool junked) { arena_slab_data_t *slab_data = extent_slab_data_get(slab); - szind_t binind = extent_szind_get(slab); - arena_bin_t *bin = &arena->bins[binind]; - const arena_bin_info_t *bin_info = &arena_bin_info[binind]; + const bin_info_t *bin_info = &bin_infos[binind]; if (!junked && config_fill && unlikely(opt_junk_free)) { arena_dalloc_junk_small(ptr, bin_info); } - arena_slab_reg_dalloc(tsdn, slab, slab_data, ptr); + arena_slab_reg_dalloc(slab, slab_data, ptr); unsigned nfree = extent_nfree_get(slab); if (nfree == bin_info->nregs) { arena_dissociate_bin_slab(arena, slab, bin); @@ -1728,18 +1709,21 @@ arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, extent_t *slab, } void -arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, extent_t *extent, - void *ptr) { - arena_dalloc_bin_locked_impl(tsdn, arena, extent, ptr, true); +arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin, + szind_t binind, extent_t *extent, void *ptr) { + arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, extent, ptr, + true); } static void arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr) { szind_t binind = extent_szind_get(extent); - arena_bin_t *bin = &arena->bins[binind]; + unsigned binshard = extent_binshard_get(extent); + bin_t *bin = &arena->bins[binind].bin_shards[binshard]; malloc_mutex_lock(tsdn, &bin->lock); - arena_dalloc_bin_locked_impl(tsdn, arena, extent, ptr, false); + arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, extent, ptr, + false); malloc_mutex_unlock(tsdn, &bin->lock); } @@ -1754,38 +1738,48 @@ arena_dalloc_small(tsdn_t *tsdn, void *ptr) { bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, - size_t extra, bool zero) { + size_t extra, bool zero, size_t *newsize) { + bool ret; /* Calls with non-zero extra had to clamp extra. */ - assert(extra == 0 || size + extra <= LARGE_MAXCLASS); + assert(extra == 0 || size + extra <= SC_LARGE_MAXCLASS); - if (unlikely(size > LARGE_MAXCLASS)) { - return true; + extent_t *extent = iealloc(tsdn, ptr); + if (unlikely(size > SC_LARGE_MAXCLASS)) { + ret = true; + goto done; } - extent_t *extent = iealloc(tsdn, ptr); size_t usize_min = sz_s2u(size); size_t usize_max = sz_s2u(size + extra); - if (likely(oldsize <= SMALL_MAXCLASS && usize_min <= SMALL_MAXCLASS)) { + if (likely(oldsize <= SC_SMALL_MAXCLASS && usize_min + <= SC_SMALL_MAXCLASS)) { /* * Avoid moving the allocation if the size class can be left the * same. */ - assert(arena_bin_info[sz_size2index(oldsize)].reg_size == + assert(bin_infos[sz_size2index(oldsize)].reg_size == oldsize); - if ((usize_max > SMALL_MAXCLASS || sz_size2index(usize_max) != - sz_size2index(oldsize)) && (size > oldsize || usize_max < - oldsize)) { - return true; + if ((usize_max > SC_SMALL_MAXCLASS + || sz_size2index(usize_max) != sz_size2index(oldsize)) + && (size > oldsize || usize_max < oldsize)) { + ret = true; + goto done; } arena_decay_tick(tsdn, extent_arena_get(extent)); - return false; - } else if (oldsize >= LARGE_MINCLASS && usize_max >= LARGE_MINCLASS) { - return large_ralloc_no_move(tsdn, extent, usize_min, usize_max, + ret = false; + } else if (oldsize >= SC_LARGE_MINCLASS + && usize_max >= SC_LARGE_MINCLASS) { + ret = large_ralloc_no_move(tsdn, extent, usize_min, usize_max, zero); + } else { + ret = true; } +done: + assert(extent == iealloc(tsdn, ptr)); + *newsize = extent_usize_get(extent); - return true; + return ret; } static void * @@ -1796,7 +1790,7 @@ arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize, zero, tcache, true); } usize = sz_sa2u(usize, alignment); - if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) { + if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) { return NULL; } return ipalloct(tsdn, usize, alignment, zero, tcache, arena); @@ -1804,22 +1798,30 @@ arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize, void * arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize, - size_t size, size_t alignment, bool zero, tcache_t *tcache) { + size_t size, size_t alignment, bool zero, tcache_t *tcache, + hook_ralloc_args_t *hook_args) { size_t usize = sz_s2u(size); - if (unlikely(usize == 0 || size > LARGE_MAXCLASS)) { + if (unlikely(usize == 0 || size > SC_LARGE_MAXCLASS)) { return NULL; } - if (likely(usize <= SMALL_MAXCLASS)) { + if (likely(usize <= SC_SMALL_MAXCLASS)) { /* Try to avoid moving the allocation. */ - if (!arena_ralloc_no_move(tsdn, ptr, oldsize, usize, 0, zero)) { + UNUSED size_t newsize; + if (!arena_ralloc_no_move(tsdn, ptr, oldsize, usize, 0, zero, + &newsize)) { + hook_invoke_expand(hook_args->is_realloc + ? hook_expand_realloc : hook_expand_rallocx, + ptr, oldsize, usize, (uintptr_t)ptr, + hook_args->args); return ptr; } } - if (oldsize >= LARGE_MINCLASS && usize >= LARGE_MINCLASS) { - return large_ralloc(tsdn, arena, iealloc(tsdn, ptr), usize, - alignment, zero, tcache); + if (oldsize >= SC_LARGE_MINCLASS + && usize >= SC_LARGE_MINCLASS) { + return large_ralloc(tsdn, arena, ptr, usize, + alignment, zero, tcache, hook_args); } /* @@ -1832,11 +1834,16 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize, return NULL; } + hook_invoke_alloc(hook_args->is_realloc + ? hook_alloc_realloc : hook_alloc_rallocx, ret, (uintptr_t)ret, + hook_args->args); + hook_invoke_dalloc(hook_args->is_realloc + ? hook_dalloc_realloc : hook_dalloc_rallocx, ptr, hook_args->args); + /* * Junk/zero-filling were already done by * ipalloc()/arena_malloc(). */ - size_t copysize = (usize < oldsize) ? usize : oldsize; memcpy(ret, ptr, copysize); isdalloct(tsdn, ptr, oldsize, tcache, NULL, true); @@ -1885,6 +1892,32 @@ arena_muzzy_decay_ms_default_set(ssize_t decay_ms) { return false; } +bool +arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena, size_t *old_limit, + size_t *new_limit) { + assert(opt_retain); + + pszind_t new_ind JEMALLOC_CC_SILENCE_INIT(0); + if (new_limit != NULL) { + size_t limit = *new_limit; + /* Grow no more than the new limit. */ + if ((new_ind = sz_psz2ind(limit + 1) - 1) >= SC_NPSIZES) { + return true; + } + } + + malloc_mutex_lock(tsd_tsdn(tsd), &arena->extent_grow_mtx); + if (old_limit != NULL) { + *old_limit = sz_pind2sz(arena->retain_grow_limit); + } + if (new_limit != NULL) { + arena->retain_grow_limit = new_ind; + } + malloc_mutex_unlock(tsd_tsdn(tsd), &arena->extent_grow_mtx); + + return false; +} + unsigned arena_nthreads_get(arena_t *arena, bool internal) { return atomic_load_u(&arena->nthreads[internal], ATOMIC_RELAXED); @@ -1920,7 +1953,12 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { } } - arena = (arena_t *)base_alloc(tsdn, base, sizeof(arena_t), CACHELINE); + unsigned nbins_total = 0; + for (i = 0; i < SC_NBINS; i++) { + nbins_total += bin_infos[i].n_shards; + } + size_t arena_size = sizeof(arena_t) + sizeof(bin_t) * nbins_total; + arena = (arena_t *)base_alloc(tsdn, base, arena_size, CACHELINE); if (arena == NULL) { goto label_error; } @@ -1935,6 +1973,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { } ql_new(&arena->tcache_ql); + ql_new(&arena->cache_bin_array_descriptor_ql); if (malloc_mutex_init(&arena->tcache_ql_mtx, "tcache_ql", WITNESS_RANK_TCACHE_QL, malloc_mutex_rank_exclusive)) { goto label_error; @@ -2001,16 +2040,17 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { goto label_error; } - if (arena_decay_init(&arena->decay_dirty, &arena->extents_dirty, + if (arena_decay_init(&arena->decay_dirty, arena_dirty_decay_ms_default_get(), &arena->stats.decay_dirty)) { goto label_error; } - if (arena_decay_init(&arena->decay_muzzy, &arena->extents_muzzy, + if (arena_decay_init(&arena->decay_muzzy, arena_muzzy_decay_ms_default_get(), &arena->stats.decay_muzzy)) { goto label_error; } arena->extent_grow_next = sz_psz2ind(HUGEPAGE); + arena->retain_grow_limit = sz_psz2ind(SC_LARGE_MAXCLASS); if (malloc_mutex_init(&arena->extent_grow_mtx, "extent_grow", WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) { goto label_error; @@ -2023,19 +2063,20 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { } /* Initialize bins. */ - for (i = 0; i < NBINS; i++) { - arena_bin_t *bin = &arena->bins[i]; - if (malloc_mutex_init(&bin->lock, "arena_bin", - WITNESS_RANK_ARENA_BIN, malloc_mutex_rank_exclusive)) { - goto label_error; - } - bin->slabcur = NULL; - extent_heap_new(&bin->slabs_nonfull); - extent_list_init(&bin->slabs_full); - if (config_stats) { - memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); + uintptr_t bin_addr = (uintptr_t)arena + sizeof(arena_t); + atomic_store_u(&arena->binshard_next, 0, ATOMIC_RELEASE); + for (i = 0; i < SC_NBINS; i++) { + unsigned nshards = bin_infos[i].n_shards; + arena->bins[i].bin_shards = (bin_t *)bin_addr; + bin_addr += nshards * sizeof(bin_t); + for (unsigned j = 0; j < nshards; j++) { + bool err = bin_init(&arena->bins[i].bin_shards[j]); + if (err) { + goto label_error; + } } } + assert(bin_addr == (uintptr_t)arena + arena_size); arena->base = base; /* Set arena before creating background threads. */ @@ -2052,8 +2093,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { */ assert(!tsdn_null(tsdn)); pre_reentrancy(tsdn_tsd(tsdn), arena); - if (hooks_arena_new_hook) { - hooks_arena_new_hook(); + if (test_hooks_arena_new_hook) { + test_hooks_arena_new_hook(); } post_reentrancy(tsdn_tsd(tsdn)); } @@ -2066,10 +2107,75 @@ label_error: return NULL; } +arena_t * +arena_choose_huge(tsd_t *tsd) { + /* huge_arena_ind can be 0 during init (will use a0). */ + if (huge_arena_ind == 0) { + assert(!malloc_initialized()); + } + + arena_t *huge_arena = arena_get(tsd_tsdn(tsd), huge_arena_ind, false); + if (huge_arena == NULL) { + /* Create the huge arena on demand. */ + assert(huge_arena_ind != 0); + huge_arena = arena_get(tsd_tsdn(tsd), huge_arena_ind, true); + if (huge_arena == NULL) { + return NULL; + } + /* + * Purge eagerly for huge allocations, because: 1) number of + * huge allocations is usually small, which means ticker based + * decay is not reliable; and 2) less immediate reuse is + * expected for huge allocations. + */ + if (arena_dirty_decay_ms_default_get() > 0) { + arena_dirty_decay_ms_set(tsd_tsdn(tsd), huge_arena, 0); + } + if (arena_muzzy_decay_ms_default_get() > 0) { + arena_muzzy_decay_ms_set(tsd_tsdn(tsd), huge_arena, 0); + } + } + + return huge_arena; +} + +bool +arena_init_huge(void) { + bool huge_enabled; + + /* The threshold should be large size class. */ + if (opt_oversize_threshold > SC_LARGE_MAXCLASS || + opt_oversize_threshold < SC_LARGE_MINCLASS) { + opt_oversize_threshold = 0; + oversize_threshold = SC_LARGE_MAXCLASS + PAGE; + huge_enabled = false; + } else { + /* Reserve the index for the huge arena. */ + huge_arena_ind = narenas_total_get(); + oversize_threshold = opt_oversize_threshold; + huge_enabled = true; + } + + return huge_enabled; +} + +bool +arena_is_huge(unsigned arena_ind) { + if (huge_arena_ind == 0) { + return false; + } + return (arena_ind == huge_arena_ind); +} + void -arena_boot(void) { +arena_boot(sc_data_t *sc_data) { arena_dirty_decay_ms_default_set(opt_dirty_decay_ms); arena_muzzy_decay_ms_default_set(opt_muzzy_decay_ms); + for (unsigned i = 0; i < SC_NBINS; i++) { + sc_t *sc = &sc_data->sc[i]; + div_init(&arena_binind_div_info[i], + (1U << sc->lg_base) + (sc->ndelta << sc->lg_delta)); + } } void @@ -2114,8 +2220,10 @@ arena_prefork6(tsdn_t *tsdn, arena_t *arena) { void arena_prefork7(tsdn_t *tsdn, arena_t *arena) { - for (unsigned i = 0; i < NBINS; i++) { - malloc_mutex_prefork(tsdn, &arena->bins[i].lock); + for (unsigned i = 0; i < SC_NBINS; i++) { + for (unsigned j = 0; j < bin_infos[i].n_shards; j++) { + bin_prefork(tsdn, &arena->bins[i].bin_shards[j]); + } } } @@ -2123,8 +2231,11 @@ void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) { unsigned i; - for (i = 0; i < NBINS; i++) { - malloc_mutex_postfork_parent(tsdn, &arena->bins[i].lock); + for (i = 0; i < SC_NBINS; i++) { + for (unsigned j = 0; j < bin_infos[i].n_shards; j++) { + bin_postfork_parent(tsdn, + &arena->bins[i].bin_shards[j]); + } } malloc_mutex_postfork_parent(tsdn, &arena->large_mtx); base_postfork_parent(tsdn, arena->base); @@ -2154,15 +2265,23 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) { } if (config_stats) { ql_new(&arena->tcache_ql); + ql_new(&arena->cache_bin_array_descriptor_ql); tcache_t *tcache = tcache_get(tsdn_tsd(tsdn)); if (tcache != NULL && tcache->arena == arena) { ql_elm_new(tcache, link); ql_tail_insert(&arena->tcache_ql, tcache, link); + cache_bin_array_descriptor_init( + &tcache->cache_bin_array_descriptor, + tcache->bins_small, tcache->bins_large); + ql_tail_insert(&arena->cache_bin_array_descriptor_ql, + &tcache->cache_bin_array_descriptor, link); } } - for (i = 0; i < NBINS; i++) { - malloc_mutex_postfork_child(tsdn, &arena->bins[i].lock); + for (i = 0; i < SC_NBINS; i++) { + for (unsigned j = 0; j < bin_infos[i].n_shards; j++) { + bin_postfork_child(tsdn, &arena->bins[i].bin_shards[j]); + } } malloc_mutex_postfork_child(tsdn, &arena->large_mtx); base_postfork_child(tsdn, arena->base); diff --git a/deps/jemalloc/src/background_thread.c b/deps/jemalloc/src/background_thread.c index eb30eb5b42..57b9b256bb 100644 --- a/deps/jemalloc/src/background_thread.c +++ b/deps/jemalloc/src/background_thread.c @@ -4,6 +4,8 @@ #include "jemalloc/internal/assert.h" +JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS + /******************************************************************************/ /* Data. */ @@ -11,38 +13,37 @@ #define BACKGROUND_THREAD_DEFAULT false /* Read-only after initialization. */ bool opt_background_thread = BACKGROUND_THREAD_DEFAULT; +size_t opt_max_background_threads = MAX_BACKGROUND_THREAD_LIMIT + 1; /* Used for thread creation, termination and stats. */ malloc_mutex_t background_thread_lock; /* Indicates global state. Atomic because decay reads this w/o locking. */ atomic_b_t background_thread_enabled_state; size_t n_background_threads; +size_t max_background_threads; /* Thread info per-index. */ background_thread_info_t *background_thread_info; -/* False if no necessary runtime support. */ -bool can_enable_background_thread; - /******************************************************************************/ #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER -#include <dlfcn.h> static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *, void *(*)(void *), void *__restrict); -static pthread_once_t once_control = PTHREAD_ONCE_INIT; static void -pthread_create_wrapper_once(void) { +pthread_create_wrapper_init(void) { #ifdef JEMALLOC_LAZY_LOCK - isthreaded = true; + if (!isthreaded) { + isthreaded = true; + } #endif } int pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr, void *(*start_routine)(void *), void *__restrict arg) { - pthread_once(&once_control, pthread_create_wrapper_once); + pthread_create_wrapper_init(); return pthread_create_fptr(thread, attr, start_routine, arg); } @@ -78,7 +79,7 @@ background_thread_info_init(tsdn_t *tsdn, background_thread_info_t *info) { } static inline bool -set_current_thread_affinity(UNUSED int cpu) { +set_current_thread_affinity(int cpu) { #if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY) cpu_set_t cpuset; CPU_ZERO(&cpuset); @@ -286,7 +287,7 @@ background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigne uint64_t min_interval = BACKGROUND_THREAD_INDEFINITE_SLEEP; unsigned narenas = narenas_total_get(); - for (unsigned i = ind; i < narenas; i += ncpus) { + for (unsigned i = ind; i < narenas; i += max_background_threads) { arena_t *arena = arena_get(tsdn, i, false); if (!arena) { continue; @@ -379,35 +380,32 @@ background_thread_create_signals_masked(pthread_t *thread, return create_err; } -static void +static bool check_background_thread_creation(tsd_t *tsd, unsigned *n_created, bool *created_threads) { + bool ret = false; if (likely(*n_created == n_background_threads)) { - return; + return ret; } - malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_info[0].mtx); -label_restart: - malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock); - for (unsigned i = 1; i < ncpus; i++) { + tsdn_t *tsdn = tsd_tsdn(tsd); + malloc_mutex_unlock(tsdn, &background_thread_info[0].mtx); + for (unsigned i = 1; i < max_background_threads; i++) { if (created_threads[i]) { continue; } background_thread_info_t *info = &background_thread_info[i]; - malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx); - assert(info->state != background_thread_paused); + malloc_mutex_lock(tsdn, &info->mtx); + /* + * In case of the background_thread_paused state because of + * arena reset, delay the creation. + */ bool create = (info->state == background_thread_started); - malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx); + malloc_mutex_unlock(tsdn, &info->mtx); if (!create) { continue; } - /* - * To avoid deadlock with prefork handlers (which waits for the - * mutex held here), unlock before calling pthread_create(). - */ - malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock); - pre_reentrancy(tsd, NULL); int err = background_thread_create_signals_masked(&info->thread, NULL, background_thread_entry, (void *)(uintptr_t)i); @@ -423,19 +421,21 @@ label_restart: abort(); } } - /* Restart since we unlocked. */ - goto label_restart; + /* Return to restart the loop since we unlocked. */ + ret = true; + break; } - malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_info[0].mtx); - malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock); + malloc_mutex_lock(tsdn, &background_thread_info[0].mtx); + + return ret; } static void background_thread0_work(tsd_t *tsd) { /* Thread0 is also responsible for launching / terminating threads. */ - VARIABLE_ARRAY(bool, created_threads, ncpus); + VARIABLE_ARRAY(bool, created_threads, max_background_threads); unsigned i; - for (i = 1; i < ncpus; i++) { + for (i = 1; i < max_background_threads; i++) { created_threads[i] = false; } /* Start working, and create more threads when asked. */ @@ -445,8 +445,10 @@ background_thread0_work(tsd_t *tsd) { &background_thread_info[0])) { continue; } - check_background_thread_creation(tsd, &n_created, - (bool *)&created_threads); + if (check_background_thread_creation(tsd, &n_created, + (bool *)&created_threads)) { + continue; + } background_work_sleep_once(tsd_tsdn(tsd), &background_thread_info[0], 0); } @@ -456,15 +458,20 @@ background_thread0_work(tsd_t *tsd) { * the global background_thread mutex (and is waiting) for us. */ assert(!background_thread_enabled()); - for (i = 1; i < ncpus; i++) { + for (i = 1; i < max_background_threads; i++) { background_thread_info_t *info = &background_thread_info[i]; assert(info->state != background_thread_paused); if (created_threads[i]) { background_threads_disable_single(tsd, info); } else { malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx); - /* Clear in case the thread wasn't created. */ - info->state = background_thread_stopped; + if (info->state != background_thread_stopped) { + /* The thread was not created. */ + assert(info->state == + background_thread_started); + n_background_threads--; + info->state = background_thread_stopped; + } malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx); } } @@ -498,9 +505,11 @@ background_work(tsd_t *tsd, unsigned ind) { static void * background_thread_entry(void *ind_arg) { unsigned thread_ind = (unsigned)(uintptr_t)ind_arg; - assert(thread_ind < ncpus); + assert(thread_ind < max_background_threads); #ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP pthread_setname_np(pthread_self(), "jemalloc_bg_thd"); +#elif defined(__FreeBSD__) + pthread_set_name_np(pthread_self(), "jemalloc_bg_thd"); #endif if (opt_percpu_arena != percpu_arena_disabled) { set_current_thread_affinity((int)thread_ind); @@ -525,14 +534,13 @@ background_thread_init(tsd_t *tsd, background_thread_info_t *info) { n_background_threads++; } -/* Create a new background thread if needed. */ -bool -background_thread_create(tsd_t *tsd, unsigned arena_ind) { +static bool +background_thread_create_locked(tsd_t *tsd, unsigned arena_ind) { assert(have_background_thread); malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock); /* We create at most NCPUs threads. */ - size_t thread_ind = arena_ind % ncpus; + size_t thread_ind = arena_ind % max_background_threads; background_thread_info_t *info = &background_thread_info[thread_ind]; bool need_new_thread; @@ -580,37 +588,53 @@ background_thread_create(tsd_t *tsd, unsigned arena_ind) { return false; } +/* Create a new background thread if needed. */ +bool +background_thread_create(tsd_t *tsd, unsigned arena_ind) { + assert(have_background_thread); + + bool ret; + malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock); + ret = background_thread_create_locked(tsd, arena_ind); + malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock); + + return ret; +} + bool background_threads_enable(tsd_t *tsd) { assert(n_background_threads == 0); assert(background_thread_enabled()); malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock); - VARIABLE_ARRAY(bool, marked, ncpus); + VARIABLE_ARRAY(bool, marked, max_background_threads); unsigned i, nmarked; - for (i = 0; i < ncpus; i++) { + for (i = 0; i < max_background_threads; i++) { marked[i] = false; } nmarked = 0; + /* Thread 0 is required and created at the end. */ + marked[0] = true; /* Mark the threads we need to create for thread 0. */ unsigned n = narenas_total_get(); for (i = 1; i < n; i++) { - if (marked[i % ncpus] || + if (marked[i % max_background_threads] || arena_get(tsd_tsdn(tsd), i, false) == NULL) { continue; } - background_thread_info_t *info = &background_thread_info[i]; + background_thread_info_t *info = &background_thread_info[ + i % max_background_threads]; malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx); assert(info->state == background_thread_stopped); background_thread_init(tsd, info); malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx); - marked[i % ncpus] = true; - if (++nmarked == ncpus) { + marked[i % max_background_threads] = true; + if (++nmarked == max_background_threads) { break; } } - return background_thread_create(tsd, 0); + return background_thread_create_locked(tsd, 0); } bool @@ -720,14 +744,14 @@ background_thread_prefork0(tsdn_t *tsdn) { void background_thread_prefork1(tsdn_t *tsdn) { - for (unsigned i = 0; i < ncpus; i++) { + for (unsigned i = 0; i < max_background_threads; i++) { malloc_mutex_prefork(tsdn, &background_thread_info[i].mtx); } } void background_thread_postfork_parent(tsdn_t *tsdn) { - for (unsigned i = 0; i < ncpus; i++) { + for (unsigned i = 0; i < max_background_threads; i++) { malloc_mutex_postfork_parent(tsdn, &background_thread_info[i].mtx); } @@ -736,7 +760,7 @@ background_thread_postfork_parent(tsdn_t *tsdn) { void background_thread_postfork_child(tsdn_t *tsdn) { - for (unsigned i = 0; i < ncpus; i++) { + for (unsigned i = 0; i < max_background_threads; i++) { malloc_mutex_postfork_child(tsdn, &background_thread_info[i].mtx); } @@ -749,7 +773,7 @@ background_thread_postfork_child(tsdn_t *tsdn) { malloc_mutex_lock(tsdn, &background_thread_lock); n_background_threads = 0; background_thread_enabled_set(tsdn, false); - for (unsigned i = 0; i < ncpus; i++) { + for (unsigned i = 0; i < max_background_threads; i++) { background_thread_info_t *info = &background_thread_info[i]; malloc_mutex_lock(tsdn, &info->mtx); info->state = background_thread_stopped; @@ -773,9 +797,15 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) { stats->num_threads = n_background_threads; uint64_t num_runs = 0; nstime_init(&stats->run_interval, 0); - for (unsigned i = 0; i < ncpus; i++) { + for (unsigned i = 0; i < max_background_threads; i++) { background_thread_info_t *info = &background_thread_info[i]; - malloc_mutex_lock(tsdn, &info->mtx); + if (malloc_mutex_trylock(tsdn, &info->mtx)) { + /* + * Each background thread run may take a long time; + * avoid waiting on the stats if the thread is active. + */ + continue; + } if (info->state != background_thread_stopped) { num_runs += info->tot_n_runs; nstime_add(&stats->run_interval, &info->tot_sleep_time); @@ -795,6 +825,39 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) { #undef BILLION #undef BACKGROUND_THREAD_MIN_INTERVAL_NS +#ifdef JEMALLOC_HAVE_DLSYM +#include <dlfcn.h> +#endif + +static bool +pthread_create_fptr_init(void) { + if (pthread_create_fptr != NULL) { + return false; + } + /* + * Try the next symbol first, because 1) when use lazy_lock we have a + * wrapper for pthread_create; and 2) application may define its own + * wrapper as well (and can call malloc within the wrapper). + */ +#ifdef JEMALLOC_HAVE_DLSYM + pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create"); +#else + pthread_create_fptr = NULL; +#endif + if (pthread_create_fptr == NULL) { + if (config_lazy_lock) { + malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, " + "\"pthread_create\")\n"); + abort(); + } else { + /* Fall back to the default symbol. */ + pthread_create_fptr = pthread_create; + } + } + + return false; +} + /* * When lazy lock is enabled, we need to make sure setting isthreaded before * taking any background_thread locks. This is called early in ctl (instead of @@ -805,7 +868,8 @@ void background_thread_ctl_init(tsdn_t *tsdn) { malloc_mutex_assert_not_owner(tsdn, &background_thread_lock); #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER - pthread_once(&once_control, pthread_create_wrapper_once); + pthread_create_fptr_init(); + pthread_create_wrapper_init(); #endif } @@ -818,18 +882,10 @@ background_thread_boot0(void) { "supports pthread only\n"); return true; } - #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER - pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create"); - if (pthread_create_fptr == NULL) { - can_enable_background_thread = false; - if (config_lazy_lock || opt_background_thread) { - malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, " - "\"pthread_create\")\n"); - abort(); - } - } else { - can_enable_background_thread = true; + if ((config_lazy_lock || opt_background_thread) && + pthread_create_fptr_init()) { + return true; } #endif return false; @@ -841,6 +897,11 @@ background_thread_boot1(tsdn_t *tsdn) { assert(have_background_thread); assert(narenas_total_get() > 0); + if (opt_max_background_threads > MAX_BACKGROUND_THREAD_LIMIT) { + opt_max_background_threads = DEFAULT_NUM_BACKGROUND_THREAD; + } + max_background_threads = opt_max_background_threads; + background_thread_enabled_set(tsdn, opt_background_thread); if (malloc_mutex_init(&background_thread_lock, "background_thread_global", @@ -848,17 +909,15 @@ background_thread_boot1(tsdn_t *tsdn) { malloc_mutex_rank_exclusive)) { return true; } - if (opt_background_thread) { - background_thread_ctl_init(tsdn); - } background_thread_info = (background_thread_info_t *)base_alloc(tsdn, - b0get(), ncpus * sizeof(background_thread_info_t), CACHELINE); + b0get(), opt_max_background_threads * + sizeof(background_thread_info_t), CACHELINE); if (background_thread_info == NULL) { return true; } - for (unsigned i = 0; i < ncpus; i++) { + for (unsigned i = 0; i < max_background_threads; i++) { background_thread_info_t *info = &background_thread_info[i]; /* Thread mutex is rank_inclusive because of thread0. */ if (malloc_mutex_init(&info->mtx, "background_thread", diff --git a/deps/jemalloc/src/base.c b/deps/jemalloc/src/base.c index 97078b134d..f3c61661a2 100644 --- a/deps/jemalloc/src/base.c +++ b/deps/jemalloc/src/base.c @@ -10,25 +10,40 @@ /******************************************************************************/ /* Data. */ -static base_t *b0; +static base_t *b0; + +metadata_thp_mode_t opt_metadata_thp = METADATA_THP_DEFAULT; + +const char *metadata_thp_mode_names[] = { + "disabled", + "auto", + "always" +}; /******************************************************************************/ +static inline bool +metadata_thp_madvise(void) { + return (metadata_thp_enabled() && + (init_system_thp_mode == thp_mode_default)); +} + static void * base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size) { void *addr; bool zero = true; bool commit = true; + /* Use huge page sizes and alignment regardless of opt_metadata_thp. */ assert(size == HUGEPAGE_CEILING(size)); - + size_t alignment = HUGEPAGE; if (extent_hooks == &extent_hooks_default) { - addr = extent_alloc_mmap(NULL, size, PAGE, &zero, &commit); + addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit); } else { /* No arena context as we are creating new arenas. */ tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn); pre_reentrancy(tsd, NULL); - addr = extent_hooks->alloc(extent_hooks, NULL, size, PAGE, + addr = extent_hooks->alloc(extent_hooks, NULL, size, alignment, &zero, &commit, ind); post_reentrancy(tsd); } @@ -51,16 +66,16 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr, */ if (extent_hooks == &extent_hooks_default) { if (!extent_dalloc_mmap(addr, size)) { - return; + goto label_done; } if (!pages_decommit(addr, size)) { - return; + goto label_done; } if (!pages_purge_forced(addr, size)) { - return; + goto label_done; } if (!pages_purge_lazy(addr, size)) { - return; + goto label_done; } /* Nothing worked. This should never happen. */ not_reached(); @@ -70,27 +85,33 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr, if (extent_hooks->dalloc != NULL && !extent_hooks->dalloc(extent_hooks, addr, size, true, ind)) { - goto label_done; + goto label_post_reentrancy; } if (extent_hooks->decommit != NULL && !extent_hooks->decommit(extent_hooks, addr, size, 0, size, ind)) { - goto label_done; + goto label_post_reentrancy; } if (extent_hooks->purge_forced != NULL && !extent_hooks->purge_forced(extent_hooks, addr, size, 0, size, ind)) { - goto label_done; + goto label_post_reentrancy; } if (extent_hooks->purge_lazy != NULL && !extent_hooks->purge_lazy(extent_hooks, addr, size, 0, size, ind)) { - goto label_done; + goto label_post_reentrancy; } /* Nothing worked. That's the application's problem. */ - label_done: + label_post_reentrancy: post_reentrancy(tsd); - return; + } +label_done: + if (metadata_thp_madvise()) { + /* Set NOHUGEPAGE after unmap to avoid kernel defrag. */ + assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 && + (size & HUGEPAGE_MASK) == 0); + pages_nohuge(addr, size); } } @@ -105,6 +126,56 @@ base_extent_init(size_t *extent_sn_next, extent_t *extent, void *addr, extent_binit(extent, addr, size, sn); } +static size_t +base_get_num_blocks(base_t *base, bool with_new_block) { + base_block_t *b = base->blocks; + assert(b != NULL); + + size_t n_blocks = with_new_block ? 2 : 1; + while (b->next != NULL) { + n_blocks++; + b = b->next; + } + + return n_blocks; +} + +static void +base_auto_thp_switch(tsdn_t *tsdn, base_t *base) { + assert(opt_metadata_thp == metadata_thp_auto); + malloc_mutex_assert_owner(tsdn, &base->mtx); + if (base->auto_thp_switched) { + return; + } + /* Called when adding a new block. */ + bool should_switch; + if (base_ind_get(base) != 0) { + should_switch = (base_get_num_blocks(base, true) == + BASE_AUTO_THP_THRESHOLD); + } else { + should_switch = (base_get_num_blocks(base, true) == + BASE_AUTO_THP_THRESHOLD_A0); + } + if (!should_switch) { + return; + } + + base->auto_thp_switched = true; + assert(!config_stats || base->n_thp == 0); + /* Make the initial blocks THP lazily. */ + base_block_t *block = base->blocks; + while (block != NULL) { + assert((block->size & HUGEPAGE_MASK) == 0); + pages_huge(block, block->size); + if (config_stats) { + base->n_thp += HUGEPAGE_CEILING(block->size - + extent_bsize_get(&block->extent)) >> LG_HUGEPAGE; + } + block = block->next; + assert(block == NULL || (base_ind_get(base) == 0)); + } +} + static void * base_extent_bump_alloc_helper(extent_t *extent, size_t *gap_size, size_t size, size_t alignment) { @@ -124,8 +195,8 @@ base_extent_bump_alloc_helper(extent_t *extent, size_t *gap_size, size_t size, } static void -base_extent_bump_alloc_post(tsdn_t *tsdn, base_t *base, extent_t *extent, - size_t gap_size, void *addr, size_t size) { +base_extent_bump_alloc_post(base_t *base, extent_t *extent, size_t gap_size, + void *addr, size_t size) { if (extent_bsize_get(extent) > 0) { /* * Compute the index for the largest size class that does not @@ -140,23 +211,31 @@ base_extent_bump_alloc_post(tsdn_t *tsdn, base_t *base, extent_t *extent, base->allocated += size; /* * Add one PAGE to base_resident for every page boundary that is - * crossed by the new allocation. + * crossed by the new allocation. Adjust n_thp similarly when + * metadata_thp is enabled. */ base->resident += PAGE_CEILING((uintptr_t)addr + size) - PAGE_CEILING((uintptr_t)addr - gap_size); assert(base->allocated <= base->resident); assert(base->resident <= base->mapped); + if (metadata_thp_madvise() && (opt_metadata_thp == + metadata_thp_always || base->auto_thp_switched)) { + base->n_thp += (HUGEPAGE_CEILING((uintptr_t)addr + size) + - HUGEPAGE_CEILING((uintptr_t)addr - gap_size)) >> + LG_HUGEPAGE; + assert(base->mapped >= base->n_thp << LG_HUGEPAGE); + } } } static void * -base_extent_bump_alloc(tsdn_t *tsdn, base_t *base, extent_t *extent, - size_t size, size_t alignment) { +base_extent_bump_alloc(base_t *base, extent_t *extent, size_t size, + size_t alignment) { void *ret; size_t gap_size; ret = base_extent_bump_alloc_helper(extent, &gap_size, size, alignment); - base_extent_bump_alloc_post(tsdn, base, extent, gap_size, ret, size); + base_extent_bump_alloc_post(base, extent, gap_size, ret, size); return ret; } @@ -166,8 +245,8 @@ base_extent_bump_alloc(tsdn_t *tsdn, base_t *base, extent_t *extent, * On success a pointer to the initialized base_block_t header is returned. */ static base_block_t * -base_block_alloc(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, - pszind_t *pind_last, size_t *extent_sn_next, size_t size, +base_block_alloc(tsdn_t *tsdn, base_t *base, extent_hooks_t *extent_hooks, + unsigned ind, pszind_t *pind_last, size_t *extent_sn_next, size_t size, size_t alignment) { alignment = ALIGNMENT_CEILING(alignment, QUANTUM); size_t usize = ALIGNMENT_CEILING(size, alignment); @@ -183,8 +262,8 @@ base_block_alloc(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, */ size_t min_block_size = HUGEPAGE_CEILING(sz_psz2u(header_size + gap_size + usize)); - pszind_t pind_next = (*pind_last + 1 < NPSIZES) ? *pind_last + 1 : - *pind_last; + pszind_t pind_next = (*pind_last + 1 < sz_psz2ind(SC_LARGE_MAXCLASS)) ? + *pind_last + 1 : *pind_last; size_t next_block_size = HUGEPAGE_CEILING(sz_pind2sz(pind_next)); size_t block_size = (min_block_size > next_block_size) ? min_block_size : next_block_size; @@ -193,6 +272,25 @@ base_block_alloc(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, if (block == NULL) { return NULL; } + + if (metadata_thp_madvise()) { + void *addr = (void *)block; + assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 && + (block_size & HUGEPAGE_MASK) == 0); + if (opt_metadata_thp == metadata_thp_always) { + pages_huge(addr, block_size); + } else if (opt_metadata_thp == metadata_thp_auto && + base != NULL) { + /* base != NULL indicates this is not a new base. */ + malloc_mutex_lock(tsdn, &base->mtx); + base_auto_thp_switch(tsdn, base); + if (base->auto_thp_switched) { + pages_huge(addr, block_size); + } + malloc_mutex_unlock(tsdn, &base->mtx); + } + } + *pind_last = sz_psz2ind(block_size); block->size = block_size; block->next = NULL; @@ -216,7 +314,7 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) { * called. */ malloc_mutex_unlock(tsdn, &base->mtx); - base_block_t *block = base_block_alloc(tsdn, extent_hooks, + base_block_t *block = base_block_alloc(tsdn, base, extent_hooks, base_ind_get(base), &base->pind_last, &base->extent_sn_next, size, alignment); malloc_mutex_lock(tsdn, &base->mtx); @@ -229,8 +327,16 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) { base->allocated += sizeof(base_block_t); base->resident += PAGE_CEILING(sizeof(base_block_t)); base->mapped += block->size; + if (metadata_thp_madvise() && + !(opt_metadata_thp == metadata_thp_auto + && !base->auto_thp_switched)) { + assert(base->n_thp > 0); + base->n_thp += HUGEPAGE_CEILING(sizeof(base_block_t)) >> + LG_HUGEPAGE; + } assert(base->allocated <= base->resident); assert(base->resident <= base->mapped); + assert(base->n_thp << LG_HUGEPAGE <= base->mapped); } return &block->extent; } @@ -244,7 +350,7 @@ base_t * base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { pszind_t pind_last = 0; size_t extent_sn_next = 0; - base_block_t *block = base_block_alloc(tsdn, extent_hooks, ind, + base_block_t *block = base_block_alloc(tsdn, NULL, extent_hooks, ind, &pind_last, &extent_sn_next, sizeof(base_t), QUANTUM); if (block == NULL) { return NULL; @@ -265,17 +371,22 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { base->pind_last = pind_last; base->extent_sn_next = extent_sn_next; base->blocks = block; - for (szind_t i = 0; i < NSIZES; i++) { + base->auto_thp_switched = false; + for (szind_t i = 0; i < SC_NSIZES; i++) { extent_heap_new(&base->avail[i]); } if (config_stats) { base->allocated = sizeof(base_block_t); base->resident = PAGE_CEILING(sizeof(base_block_t)); base->mapped = block->size; + base->n_thp = (opt_metadata_thp == metadata_thp_always) && + metadata_thp_madvise() ? HUGEPAGE_CEILING(sizeof(base_block_t)) + >> LG_HUGEPAGE : 0; assert(base->allocated <= base->resident); assert(base->resident <= base->mapped); + assert(base->n_thp << LG_HUGEPAGE <= base->mapped); } - base_extent_bump_alloc_post(tsdn, base, &block->extent, gap_size, base, + base_extent_bump_alloc_post(base, &block->extent, gap_size, base, base_size); return base; @@ -315,7 +426,7 @@ base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment, extent_t *extent = NULL; malloc_mutex_lock(tsdn, &base->mtx); - for (szind_t i = sz_size2index(asize); i < NSIZES; i++) { + for (szind_t i = sz_size2index(asize); i < SC_NSIZES; i++) { extent = extent_heap_remove_first(&base->avail[i]); if (extent != NULL) { /* Use existing space. */ @@ -332,7 +443,7 @@ base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment, goto label_return; } - ret = base_extent_bump_alloc(tsdn, base, extent, usize, alignment); + ret = base_extent_bump_alloc(base, extent, usize, alignment); if (esn != NULL) { *esn = extent_sn_get(extent); } @@ -368,7 +479,7 @@ base_alloc_extent(tsdn_t *tsdn, base_t *base) { void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident, - size_t *mapped) { + size_t *mapped, size_t *n_thp) { cassert(config_stats); malloc_mutex_lock(tsdn, &base->mtx); @@ -377,6 +488,7 @@ base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident, *allocated = base->allocated; *resident = base->resident; *mapped = base->mapped; + *n_thp = base->n_thp; malloc_mutex_unlock(tsdn, &base->mtx); } diff --git a/deps/jemalloc/src/bin.c b/deps/jemalloc/src/bin.c new file mode 100644 index 0000000000..bca6b12c35 --- /dev/null +++ b/deps/jemalloc/src/bin.c @@ -0,0 +1,95 @@ +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/assert.h" +#include "jemalloc/internal/bin.h" +#include "jemalloc/internal/sc.h" +#include "jemalloc/internal/witness.h" + +bin_info_t bin_infos[SC_NBINS]; + +static void +bin_infos_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS], + bin_info_t bin_infos[SC_NBINS]) { + for (unsigned i = 0; i < SC_NBINS; i++) { + bin_info_t *bin_info = &bin_infos[i]; + sc_t *sc = &sc_data->sc[i]; + bin_info->reg_size = ((size_t)1U << sc->lg_base) + + ((size_t)sc->ndelta << sc->lg_delta); + bin_info->slab_size = (sc->pgs << LG_PAGE); + bin_info->nregs = + (uint32_t)(bin_info->slab_size / bin_info->reg_size); + bin_info->n_shards = bin_shard_sizes[i]; + bitmap_info_t bitmap_info = BITMAP_INFO_INITIALIZER( + bin_info->nregs); + bin_info->bitmap_info = bitmap_info; + } +} + +bool +bin_update_shard_size(unsigned bin_shard_sizes[SC_NBINS], size_t start_size, + size_t end_size, size_t nshards) { + if (nshards > BIN_SHARDS_MAX || nshards == 0) { + return true; + } + + if (start_size > SC_SMALL_MAXCLASS) { + return false; + } + if (end_size > SC_SMALL_MAXCLASS) { + end_size = SC_SMALL_MAXCLASS; + } + + /* Compute the index since this may happen before sz init. */ + szind_t ind1 = sz_size2index_compute(start_size); + szind_t ind2 = sz_size2index_compute(end_size); + for (unsigned i = ind1; i <= ind2; i++) { + bin_shard_sizes[i] = (unsigned)nshards; + } + + return false; +} + +void +bin_shard_sizes_boot(unsigned bin_shard_sizes[SC_NBINS]) { + /* Load the default number of shards. */ + for (unsigned i = 0; i < SC_NBINS; i++) { + bin_shard_sizes[i] = N_BIN_SHARDS_DEFAULT; + } +} + +void +bin_boot(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) { + assert(sc_data->initialized); + bin_infos_init(sc_data, bin_shard_sizes, bin_infos); +} + +bool +bin_init(bin_t *bin) { + if (malloc_mutex_init(&bin->lock, "bin", WITNESS_RANK_BIN, + malloc_mutex_rank_exclusive)) { + return true; + } + bin->slabcur = NULL; + extent_heap_new(&bin->slabs_nonfull); + extent_list_init(&bin->slabs_full); + if (config_stats) { + memset(&bin->stats, 0, sizeof(bin_stats_t)); + } + return false; +} + +void +bin_prefork(tsdn_t *tsdn, bin_t *bin) { + malloc_mutex_prefork(tsdn, &bin->lock); +} + +void +bin_postfork_parent(tsdn_t *tsdn, bin_t *bin) { + malloc_mutex_postfork_parent(tsdn, &bin->lock); +} + +void +bin_postfork_child(tsdn_t *tsdn, bin_t *bin) { + malloc_mutex_postfork_child(tsdn, &bin->lock); +} diff --git a/deps/jemalloc/src/ckh.c b/deps/jemalloc/src/ckh.c index e95e0a3ed5..1bf6df5a11 100644 --- a/deps/jemalloc/src/ckh.c +++ b/deps/jemalloc/src/ckh.c @@ -275,7 +275,8 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh) { lg_curcells++; usize = sz_sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE); - if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) { + if (unlikely(usize == 0 + || usize > SC_LARGE_MAXCLASS)) { ret = true; goto label_return; } @@ -320,7 +321,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) { lg_prevbuckets = ckh->lg_curbuckets; lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1; usize = sz_sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE); - if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) { + if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) { return; } tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, true, NULL, @@ -396,7 +397,7 @@ ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh->keycomp = keycomp; usize = sz_sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE); - if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) { + if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) { ret = true; goto label_return; } diff --git a/deps/jemalloc/src/ctl.c b/deps/jemalloc/src/ctl.c index 36bc8fb5b7..48afaa61f4 100644 --- a/deps/jemalloc/src/ctl.c +++ b/deps/jemalloc/src/ctl.c @@ -8,7 +8,7 @@ #include "jemalloc/internal/extent_mmap.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/nstime.h" -#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/sc.h" #include "jemalloc/internal/util.h" /******************************************************************************/ @@ -57,6 +57,7 @@ static const ctl_named_node_t *n##_index(tsdn_t *tsdn, \ CTL_PROTO(version) CTL_PROTO(epoch) CTL_PROTO(background_thread) +CTL_PROTO(max_background_threads) CTL_PROTO(thread_tcache_enabled) CTL_PROTO(thread_tcache_flush) CTL_PROTO(thread_prof_name) @@ -71,20 +72,24 @@ CTL_PROTO(config_debug) CTL_PROTO(config_fill) CTL_PROTO(config_lazy_lock) CTL_PROTO(config_malloc_conf) +CTL_PROTO(config_opt_safety_checks) CTL_PROTO(config_prof) CTL_PROTO(config_prof_libgcc) CTL_PROTO(config_prof_libunwind) CTL_PROTO(config_stats) -CTL_PROTO(config_thp) CTL_PROTO(config_utrace) CTL_PROTO(config_xmalloc) CTL_PROTO(opt_abort) CTL_PROTO(opt_abort_conf) +CTL_PROTO(opt_confirm_conf) +CTL_PROTO(opt_metadata_thp) CTL_PROTO(opt_retain) CTL_PROTO(opt_dss) CTL_PROTO(opt_narenas) CTL_PROTO(opt_percpu_arena) +CTL_PROTO(opt_oversize_threshold) CTL_PROTO(opt_background_thread) +CTL_PROTO(opt_max_background_threads) CTL_PROTO(opt_dirty_decay_ms) CTL_PROTO(opt_muzzy_decay_ms) CTL_PROTO(opt_stats_print) @@ -94,6 +99,8 @@ CTL_PROTO(opt_zero) CTL_PROTO(opt_utrace) CTL_PROTO(opt_xmalloc) CTL_PROTO(opt_tcache) +CTL_PROTO(opt_thp) +CTL_PROTO(opt_lg_extent_max_active_fit) CTL_PROTO(opt_lg_tcache_max) CTL_PROTO(opt_prof) CTL_PROTO(opt_prof_prefix) @@ -117,10 +124,12 @@ CTL_PROTO(arena_i_dss) CTL_PROTO(arena_i_dirty_decay_ms) CTL_PROTO(arena_i_muzzy_decay_ms) CTL_PROTO(arena_i_extent_hooks) +CTL_PROTO(arena_i_retain_grow_limit) INDEX_PROTO(arena_i) CTL_PROTO(arenas_bin_i_size) CTL_PROTO(arenas_bin_i_nregs) CTL_PROTO(arenas_bin_i_slab_size) +CTL_PROTO(arenas_bin_i_nshards) INDEX_PROTO(arenas_bin_i) CTL_PROTO(arenas_lextent_i_size) INDEX_PROTO(arenas_lextent_i) @@ -134,6 +143,7 @@ CTL_PROTO(arenas_nbins) CTL_PROTO(arenas_nhbins) CTL_PROTO(arenas_nlextents) CTL_PROTO(arenas_create) +CTL_PROTO(arenas_lookup) CTL_PROTO(prof_thread_active_init) CTL_PROTO(prof_active) CTL_PROTO(prof_dump) @@ -141,14 +151,20 @@ CTL_PROTO(prof_gdump) CTL_PROTO(prof_reset) CTL_PROTO(prof_interval) CTL_PROTO(lg_prof_sample) +CTL_PROTO(prof_log_start) +CTL_PROTO(prof_log_stop) CTL_PROTO(stats_arenas_i_small_allocated) CTL_PROTO(stats_arenas_i_small_nmalloc) CTL_PROTO(stats_arenas_i_small_ndalloc) CTL_PROTO(stats_arenas_i_small_nrequests) +CTL_PROTO(stats_arenas_i_small_nfills) +CTL_PROTO(stats_arenas_i_small_nflushes) CTL_PROTO(stats_arenas_i_large_allocated) CTL_PROTO(stats_arenas_i_large_nmalloc) CTL_PROTO(stats_arenas_i_large_ndalloc) CTL_PROTO(stats_arenas_i_large_nrequests) +CTL_PROTO(stats_arenas_i_large_nfills) +CTL_PROTO(stats_arenas_i_large_nflushes) CTL_PROTO(stats_arenas_i_bins_j_nmalloc) CTL_PROTO(stats_arenas_i_bins_j_ndalloc) CTL_PROTO(stats_arenas_i_bins_j_nrequests) @@ -158,12 +174,20 @@ CTL_PROTO(stats_arenas_i_bins_j_nflushes) CTL_PROTO(stats_arenas_i_bins_j_nslabs) CTL_PROTO(stats_arenas_i_bins_j_nreslabs) CTL_PROTO(stats_arenas_i_bins_j_curslabs) +CTL_PROTO(stats_arenas_i_bins_j_nonfull_slabs) INDEX_PROTO(stats_arenas_i_bins_j) CTL_PROTO(stats_arenas_i_lextents_j_nmalloc) CTL_PROTO(stats_arenas_i_lextents_j_ndalloc) CTL_PROTO(stats_arenas_i_lextents_j_nrequests) CTL_PROTO(stats_arenas_i_lextents_j_curlextents) INDEX_PROTO(stats_arenas_i_lextents_j) +CTL_PROTO(stats_arenas_i_extents_j_ndirty) +CTL_PROTO(stats_arenas_i_extents_j_nmuzzy) +CTL_PROTO(stats_arenas_i_extents_j_nretained) +CTL_PROTO(stats_arenas_i_extents_j_dirty_bytes) +CTL_PROTO(stats_arenas_i_extents_j_muzzy_bytes) +CTL_PROTO(stats_arenas_i_extents_j_retained_bytes) +INDEX_PROTO(stats_arenas_i_extents_j) CTL_PROTO(stats_arenas_i_nthreads) CTL_PROTO(stats_arenas_i_uptime) CTL_PROTO(stats_arenas_i_dss) @@ -174,6 +198,7 @@ CTL_PROTO(stats_arenas_i_pdirty) CTL_PROTO(stats_arenas_i_pmuzzy) CTL_PROTO(stats_arenas_i_mapped) CTL_PROTO(stats_arenas_i_retained) +CTL_PROTO(stats_arenas_i_extent_avail) CTL_PROTO(stats_arenas_i_dirty_npurge) CTL_PROTO(stats_arenas_i_dirty_nmadvise) CTL_PROTO(stats_arenas_i_dirty_purged) @@ -182,8 +207,10 @@ CTL_PROTO(stats_arenas_i_muzzy_nmadvise) CTL_PROTO(stats_arenas_i_muzzy_purged) CTL_PROTO(stats_arenas_i_base) CTL_PROTO(stats_arenas_i_internal) +CTL_PROTO(stats_arenas_i_metadata_thp) CTL_PROTO(stats_arenas_i_tcache_bytes) CTL_PROTO(stats_arenas_i_resident) +CTL_PROTO(stats_arenas_i_abandoned_vm) INDEX_PROTO(stats_arenas_i) CTL_PROTO(stats_allocated) CTL_PROTO(stats_active) @@ -191,9 +218,16 @@ CTL_PROTO(stats_background_thread_num_threads) CTL_PROTO(stats_background_thread_num_runs) CTL_PROTO(stats_background_thread_run_interval) CTL_PROTO(stats_metadata) +CTL_PROTO(stats_metadata_thp) CTL_PROTO(stats_resident) CTL_PROTO(stats_mapped) CTL_PROTO(stats_retained) +CTL_PROTO(experimental_hooks_install) +CTL_PROTO(experimental_hooks_remove) +CTL_PROTO(experimental_utilization_query) +CTL_PROTO(experimental_utilization_batch_query) +CTL_PROTO(experimental_arenas_i_pactivep) +INDEX_PROTO(experimental_arenas_i) #define MUTEX_STATS_CTL_PROTO_GEN(n) \ CTL_PROTO(stats_##n##_num_ops) \ @@ -262,11 +296,11 @@ static const ctl_named_node_t config_node[] = { {NAME("fill"), CTL(config_fill)}, {NAME("lazy_lock"), CTL(config_lazy_lock)}, {NAME("malloc_conf"), CTL(config_malloc_conf)}, + {NAME("opt_safety_checks"), CTL(config_opt_safety_checks)}, {NAME("prof"), CTL(config_prof)}, {NAME("prof_libgcc"), CTL(config_prof_libgcc)}, {NAME("prof_libunwind"), CTL(config_prof_libunwind)}, {NAME("stats"), CTL(config_stats)}, - {NAME("thp"), CTL(config_thp)}, {NAME("utrace"), CTL(config_utrace)}, {NAME("xmalloc"), CTL(config_xmalloc)} }; @@ -274,11 +308,15 @@ static const ctl_named_node_t config_node[] = { static const ctl_named_node_t opt_node[] = { {NAME("abort"), CTL(opt_abort)}, {NAME("abort_conf"), CTL(opt_abort_conf)}, + {NAME("confirm_conf"), CTL(opt_confirm_conf)}, + {NAME("metadata_thp"), CTL(opt_metadata_thp)}, {NAME("retain"), CTL(opt_retain)}, {NAME("dss"), CTL(opt_dss)}, {NAME("narenas"), CTL(opt_narenas)}, {NAME("percpu_arena"), CTL(opt_percpu_arena)}, + {NAME("oversize_threshold"), CTL(opt_oversize_threshold)}, {NAME("background_thread"), CTL(opt_background_thread)}, + {NAME("max_background_threads"), CTL(opt_max_background_threads)}, {NAME("dirty_decay_ms"), CTL(opt_dirty_decay_ms)}, {NAME("muzzy_decay_ms"), CTL(opt_muzzy_decay_ms)}, {NAME("stats_print"), CTL(opt_stats_print)}, @@ -288,6 +326,8 @@ static const ctl_named_node_t opt_node[] = { {NAME("utrace"), CTL(opt_utrace)}, {NAME("xmalloc"), CTL(opt_xmalloc)}, {NAME("tcache"), CTL(opt_tcache)}, + {NAME("thp"), CTL(opt_thp)}, + {NAME("lg_extent_max_active_fit"), CTL(opt_lg_extent_max_active_fit)}, {NAME("lg_tcache_max"), CTL(opt_lg_tcache_max)}, {NAME("prof"), CTL(opt_prof)}, {NAME("prof_prefix"), CTL(opt_prof_prefix)}, @@ -316,7 +356,8 @@ static const ctl_named_node_t arena_i_node[] = { {NAME("dss"), CTL(arena_i_dss)}, {NAME("dirty_decay_ms"), CTL(arena_i_dirty_decay_ms)}, {NAME("muzzy_decay_ms"), CTL(arena_i_muzzy_decay_ms)}, - {NAME("extent_hooks"), CTL(arena_i_extent_hooks)} + {NAME("extent_hooks"), CTL(arena_i_extent_hooks)}, + {NAME("retain_grow_limit"), CTL(arena_i_retain_grow_limit)} }; static const ctl_named_node_t super_arena_i_node[] = { {NAME(""), CHILD(named, arena_i)} @@ -329,7 +370,8 @@ static const ctl_indexed_node_t arena_node[] = { static const ctl_named_node_t arenas_bin_i_node[] = { {NAME("size"), CTL(arenas_bin_i_size)}, {NAME("nregs"), CTL(arenas_bin_i_nregs)}, - {NAME("slab_size"), CTL(arenas_bin_i_slab_size)} + {NAME("slab_size"), CTL(arenas_bin_i_slab_size)}, + {NAME("nshards"), CTL(arenas_bin_i_nshards)} }; static const ctl_named_node_t super_arenas_bin_i_node[] = { {NAME(""), CHILD(named, arenas_bin_i)} @@ -362,7 +404,8 @@ static const ctl_named_node_t arenas_node[] = { {NAME("bin"), CHILD(indexed, arenas_bin)}, {NAME("nlextents"), CTL(arenas_nlextents)}, {NAME("lextent"), CHILD(indexed, arenas_lextent)}, - {NAME("create"), CTL(arenas_create)} + {NAME("create"), CTL(arenas_create)}, + {NAME("lookup"), CTL(arenas_lookup)} }; static const ctl_named_node_t prof_node[] = { @@ -372,21 +415,26 @@ static const ctl_named_node_t prof_node[] = { {NAME("gdump"), CTL(prof_gdump)}, {NAME("reset"), CTL(prof_reset)}, {NAME("interval"), CTL(prof_interval)}, - {NAME("lg_sample"), CTL(lg_prof_sample)} + {NAME("lg_sample"), CTL(lg_prof_sample)}, + {NAME("log_start"), CTL(prof_log_start)}, + {NAME("log_stop"), CTL(prof_log_stop)} }; - static const ctl_named_node_t stats_arenas_i_small_node[] = { {NAME("allocated"), CTL(stats_arenas_i_small_allocated)}, {NAME("nmalloc"), CTL(stats_arenas_i_small_nmalloc)}, {NAME("ndalloc"), CTL(stats_arenas_i_small_ndalloc)}, - {NAME("nrequests"), CTL(stats_arenas_i_small_nrequests)} + {NAME("nrequests"), CTL(stats_arenas_i_small_nrequests)}, + {NAME("nfills"), CTL(stats_arenas_i_small_nfills)}, + {NAME("nflushes"), CTL(stats_arenas_i_small_nflushes)} }; static const ctl_named_node_t stats_arenas_i_large_node[] = { {NAME("allocated"), CTL(stats_arenas_i_large_allocated)}, {NAME("nmalloc"), CTL(stats_arenas_i_large_nmalloc)}, {NAME("ndalloc"), CTL(stats_arenas_i_large_ndalloc)}, - {NAME("nrequests"), CTL(stats_arenas_i_large_nrequests)} + {NAME("nrequests"), CTL(stats_arenas_i_large_nrequests)}, + {NAME("nfills"), CTL(stats_arenas_i_large_nfills)}, + {NAME("nflushes"), CTL(stats_arenas_i_large_nflushes)} }; #define MUTEX_PROF_DATA_NODE(prefix) \ @@ -420,6 +468,7 @@ static const ctl_named_node_t stats_arenas_i_bins_j_node[] = { {NAME("nslabs"), CTL(stats_arenas_i_bins_j_nslabs)}, {NAME("nreslabs"), CTL(stats_arenas_i_bins_j_nreslabs)}, {NAME("curslabs"), CTL(stats_arenas_i_bins_j_curslabs)}, + {NAME("nonfull_slabs"), CTL(stats_arenas_i_bins_j_nonfull_slabs)}, {NAME("mutex"), CHILD(named, stats_arenas_i_bins_j_mutex)} }; @@ -445,6 +494,23 @@ static const ctl_indexed_node_t stats_arenas_i_lextents_node[] = { {INDEX(stats_arenas_i_lextents_j)} }; +static const ctl_named_node_t stats_arenas_i_extents_j_node[] = { + {NAME("ndirty"), CTL(stats_arenas_i_extents_j_ndirty)}, + {NAME("nmuzzy"), CTL(stats_arenas_i_extents_j_nmuzzy)}, + {NAME("nretained"), CTL(stats_arenas_i_extents_j_nretained)}, + {NAME("dirty_bytes"), CTL(stats_arenas_i_extents_j_dirty_bytes)}, + {NAME("muzzy_bytes"), CTL(stats_arenas_i_extents_j_muzzy_bytes)}, + {NAME("retained_bytes"), CTL(stats_arenas_i_extents_j_retained_bytes)} +}; + +static const ctl_named_node_t super_stats_arenas_i_extents_j_node[] = { + {NAME(""), CHILD(named, stats_arenas_i_extents_j)} +}; + +static const ctl_indexed_node_t stats_arenas_i_extents_node[] = { + {INDEX(stats_arenas_i_extents_j)} +}; + #define OP(mtx) MUTEX_PROF_DATA_NODE(arenas_i_mutexes_##mtx) MUTEX_PROF_ARENA_MUTEXES #undef OP @@ -466,6 +532,7 @@ static const ctl_named_node_t stats_arenas_i_node[] = { {NAME("pmuzzy"), CTL(stats_arenas_i_pmuzzy)}, {NAME("mapped"), CTL(stats_arenas_i_mapped)}, {NAME("retained"), CTL(stats_arenas_i_retained)}, + {NAME("extent_avail"), CTL(stats_arenas_i_extent_avail)}, {NAME("dirty_npurge"), CTL(stats_arenas_i_dirty_npurge)}, {NAME("dirty_nmadvise"), CTL(stats_arenas_i_dirty_nmadvise)}, {NAME("dirty_purged"), CTL(stats_arenas_i_dirty_purged)}, @@ -474,12 +541,15 @@ static const ctl_named_node_t stats_arenas_i_node[] = { {NAME("muzzy_purged"), CTL(stats_arenas_i_muzzy_purged)}, {NAME("base"), CTL(stats_arenas_i_base)}, {NAME("internal"), CTL(stats_arenas_i_internal)}, + {NAME("metadata_thp"), CTL(stats_arenas_i_metadata_thp)}, {NAME("tcache_bytes"), CTL(stats_arenas_i_tcache_bytes)}, {NAME("resident"), CTL(stats_arenas_i_resident)}, + {NAME("abandoned_vm"), CTL(stats_arenas_i_abandoned_vm)}, {NAME("small"), CHILD(named, stats_arenas_i_small)}, {NAME("large"), CHILD(named, stats_arenas_i_large)}, {NAME("bins"), CHILD(indexed, stats_arenas_i_bins)}, {NAME("lextents"), CHILD(indexed, stats_arenas_i_lextents)}, + {NAME("extents"), CHILD(indexed, stats_arenas_i_extents)}, {NAME("mutexes"), CHILD(named, stats_arenas_i_mutexes)} }; static const ctl_named_node_t super_stats_arenas_i_node[] = { @@ -512,6 +582,7 @@ static const ctl_named_node_t stats_node[] = { {NAME("allocated"), CTL(stats_allocated)}, {NAME("active"), CTL(stats_active)}, {NAME("metadata"), CTL(stats_metadata)}, + {NAME("metadata_thp"), CTL(stats_metadata_thp)}, {NAME("resident"), CTL(stats_resident)}, {NAME("mapped"), CTL(stats_mapped)}, {NAME("retained"), CTL(stats_retained)}, @@ -521,10 +592,38 @@ static const ctl_named_node_t stats_node[] = { {NAME("arenas"), CHILD(indexed, stats_arenas)} }; +static const ctl_named_node_t experimental_hooks_node[] = { + {NAME("install"), CTL(experimental_hooks_install)}, + {NAME("remove"), CTL(experimental_hooks_remove)} +}; + +static const ctl_named_node_t experimental_utilization_node[] = { + {NAME("query"), CTL(experimental_utilization_query)}, + {NAME("batch_query"), CTL(experimental_utilization_batch_query)} +}; + +static const ctl_named_node_t experimental_arenas_i_node[] = { + {NAME("pactivep"), CTL(experimental_arenas_i_pactivep)} +}; +static const ctl_named_node_t super_experimental_arenas_i_node[] = { + {NAME(""), CHILD(named, experimental_arenas_i)} +}; + +static const ctl_indexed_node_t experimental_arenas_node[] = { + {INDEX(experimental_arenas_i)} +}; + +static const ctl_named_node_t experimental_node[] = { + {NAME("hooks"), CHILD(named, experimental_hooks)}, + {NAME("utilization"), CHILD(named, experimental_utilization)}, + {NAME("arenas"), CHILD(indexed, experimental_arenas)} +}; + static const ctl_named_node_t root_node[] = { {NAME("version"), CTL(version)}, {NAME("epoch"), CTL(epoch)}, {NAME("background_thread"), CTL(background_thread)}, + {NAME("max_background_threads"), CTL(max_background_threads)}, {NAME("thread"), CHILD(named, thread)}, {NAME("config"), CHILD(named, config)}, {NAME("opt"), CHILD(named, opt)}, @@ -532,7 +631,8 @@ static const ctl_named_node_t root_node[] = { {NAME("arena"), CHILD(indexed, arena)}, {NAME("arenas"), CHILD(named, arenas)}, {NAME("prof"), CHILD(named, prof)}, - {NAME("stats"), CHILD(named, stats)} + {NAME("stats"), CHILD(named, stats)}, + {NAME("experimental"), CHILD(named, experimental)} }; static const ctl_named_node_t super_root_node[] = { {NAME(""), CHILD(named, root)} @@ -550,7 +650,7 @@ static const ctl_named_node_t super_root_node[] = { * synchronized by the ctl mutex. */ static void -accum_arena_stats_u64(arena_stats_u64_t *dst, arena_stats_u64_t *src) { +ctl_accum_arena_stats_u64(arena_stats_u64_t *dst, arena_stats_u64_t *src) { #ifdef JEMALLOC_ATOMIC_U64 uint64_t cur_dst = atomic_load_u64(dst, ATOMIC_RELAXED); uint64_t cur_src = atomic_load_u64(src, ATOMIC_RELAXED); @@ -562,7 +662,7 @@ accum_arena_stats_u64(arena_stats_u64_t *dst, arena_stats_u64_t *src) { /* Likewise: with ctl mutex synchronization, reading is simple. */ static uint64_t -arena_stats_read_u64(arena_stats_u64_t *p) { +ctl_arena_stats_read_u64(arena_stats_u64_t *p) { #ifdef JEMALLOC_ATOMIC_U64 return atomic_load_u64(p, ATOMIC_RELAXED); #else @@ -570,7 +670,8 @@ arena_stats_read_u64(arena_stats_u64_t *p) { #endif } -static void accum_atomic_zu(atomic_zu_t *dst, atomic_zu_t *src) { +static void +accum_atomic_zu(atomic_zu_t *dst, atomic_zu_t *src) { size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED); size_t cur_src = atomic_load_zu(src, ATOMIC_RELAXED); atomic_store_zu(dst, cur_dst + cur_src, ATOMIC_RELAXED); @@ -679,10 +780,14 @@ ctl_arena_clear(ctl_arena_t *ctl_arena) { ctl_arena->astats->nmalloc_small = 0; ctl_arena->astats->ndalloc_small = 0; ctl_arena->astats->nrequests_small = 0; - memset(ctl_arena->astats->bstats, 0, NBINS * - sizeof(malloc_bin_stats_t)); - memset(ctl_arena->astats->lstats, 0, (NSIZES - NBINS) * - sizeof(malloc_large_stats_t)); + ctl_arena->astats->nfills_small = 0; + ctl_arena->astats->nflushes_small = 0; + memset(ctl_arena->astats->bstats, 0, SC_NBINS * + sizeof(bin_stats_t)); + memset(ctl_arena->astats->lstats, 0, (SC_NSIZES - SC_NBINS) * + sizeof(arena_stats_large_t)); + memset(ctl_arena->astats->estats, 0, SC_NPSIZES * + sizeof(arena_stats_extents_t)); } } @@ -696,9 +801,9 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) { &ctl_arena->muzzy_decay_ms, &ctl_arena->pactive, &ctl_arena->pdirty, &ctl_arena->pmuzzy, &ctl_arena->astats->astats, ctl_arena->astats->bstats, - ctl_arena->astats->lstats); + ctl_arena->astats->lstats, ctl_arena->astats->estats); - for (i = 0; i < NBINS; i++) { + for (i = 0; i < SC_NBINS; i++) { ctl_arena->astats->allocated_small += ctl_arena->astats->bstats[i].curregs * sz_index2size(i); @@ -708,6 +813,10 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) { ctl_arena->astats->bstats[i].ndalloc; ctl_arena->astats->nrequests_small += ctl_arena->astats->bstats[i].nrequests; + ctl_arena->astats->nfills_small += + ctl_arena->astats->bstats[i].nfills; + ctl_arena->astats->nflushes_small += + ctl_arena->astats->bstats[i].nflushes; } } else { arena_basic_stats_merge(tsdn, arena, &ctl_arena->nthreads, @@ -743,20 +852,22 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena, &astats->astats.mapped); accum_atomic_zu(&sdstats->astats.retained, &astats->astats.retained); + accum_atomic_zu(&sdstats->astats.extent_avail, + &astats->astats.extent_avail); } - accum_arena_stats_u64(&sdstats->astats.decay_dirty.npurge, + ctl_accum_arena_stats_u64(&sdstats->astats.decay_dirty.npurge, &astats->astats.decay_dirty.npurge); - accum_arena_stats_u64(&sdstats->astats.decay_dirty.nmadvise, + ctl_accum_arena_stats_u64(&sdstats->astats.decay_dirty.nmadvise, &astats->astats.decay_dirty.nmadvise); - accum_arena_stats_u64(&sdstats->astats.decay_dirty.purged, + ctl_accum_arena_stats_u64(&sdstats->astats.decay_dirty.purged, &astats->astats.decay_dirty.purged); - accum_arena_stats_u64(&sdstats->astats.decay_muzzy.npurge, + ctl_accum_arena_stats_u64(&sdstats->astats.decay_muzzy.npurge, &astats->astats.decay_muzzy.npurge); - accum_arena_stats_u64(&sdstats->astats.decay_muzzy.nmadvise, + ctl_accum_arena_stats_u64(&sdstats->astats.decay_muzzy.nmadvise, &astats->astats.decay_muzzy.nmadvise); - accum_arena_stats_u64(&sdstats->astats.decay_muzzy.purged, + ctl_accum_arena_stats_u64(&sdstats->astats.decay_muzzy.purged, &astats->astats.decay_muzzy.purged); #define OP(mtx) malloc_mutex_prof_merge( \ @@ -773,6 +884,8 @@ MUTEX_PROF_ARENA_MUTEXES &astats->astats.internal); accum_atomic_zu(&sdstats->astats.resident, &astats->astats.resident); + accum_atomic_zu(&sdstats->astats.metadata_thp, + &astats->astats.metadata_thp); } else { assert(atomic_load_zu( &astats->astats.internal, ATOMIC_RELAXED) == 0); @@ -786,6 +899,8 @@ MUTEX_PROF_ARENA_MUTEXES sdstats->nmalloc_small += astats->nmalloc_small; sdstats->ndalloc_small += astats->ndalloc_small; sdstats->nrequests_small += astats->nrequests_small; + sdstats->nfills_small += astats->nfills_small; + sdstats->nflushes_small += astats->nflushes_small; if (!destroyed) { accum_atomic_zu(&sdstats->astats.allocated_large, @@ -794,12 +909,14 @@ MUTEX_PROF_ARENA_MUTEXES assert(atomic_load_zu(&astats->astats.allocated_large, ATOMIC_RELAXED) == 0); } - accum_arena_stats_u64(&sdstats->astats.nmalloc_large, + ctl_accum_arena_stats_u64(&sdstats->astats.nmalloc_large, &astats->astats.nmalloc_large); - accum_arena_stats_u64(&sdstats->astats.ndalloc_large, + ctl_accum_arena_stats_u64(&sdstats->astats.ndalloc_large, &astats->astats.ndalloc_large); - accum_arena_stats_u64(&sdstats->astats.nrequests_large, + ctl_accum_arena_stats_u64(&sdstats->astats.nrequests_large, &astats->astats.nrequests_large); + accum_atomic_zu(&sdstats->astats.abandoned_vm, + &astats->astats.abandoned_vm); accum_atomic_zu(&sdstats->astats.tcache_bytes, &astats->astats.tcache_bytes); @@ -808,7 +925,8 @@ MUTEX_PROF_ARENA_MUTEXES sdstats->astats.uptime = astats->astats.uptime; } - for (i = 0; i < NBINS; i++) { + /* Merge bin stats. */ + for (i = 0; i < SC_NBINS; i++) { sdstats->bstats[i].nmalloc += astats->bstats[i].nmalloc; sdstats->bstats[i].ndalloc += astats->bstats[i].ndalloc; sdstats->bstats[i].nrequests += @@ -827,19 +945,23 @@ MUTEX_PROF_ARENA_MUTEXES if (!destroyed) { sdstats->bstats[i].curslabs += astats->bstats[i].curslabs; + sdstats->bstats[i].nonfull_slabs += + astats->bstats[i].nonfull_slabs; } else { assert(astats->bstats[i].curslabs == 0); + assert(astats->bstats[i].nonfull_slabs == 0); } malloc_mutex_prof_merge(&sdstats->bstats[i].mutex_data, &astats->bstats[i].mutex_data); } - for (i = 0; i < NSIZES - NBINS; i++) { - accum_arena_stats_u64(&sdstats->lstats[i].nmalloc, + /* Merge stats for large allocations. */ + for (i = 0; i < SC_NSIZES - SC_NBINS; i++) { + ctl_accum_arena_stats_u64(&sdstats->lstats[i].nmalloc, &astats->lstats[i].nmalloc); - accum_arena_stats_u64(&sdstats->lstats[i].ndalloc, + ctl_accum_arena_stats_u64(&sdstats->lstats[i].ndalloc, &astats->lstats[i].ndalloc); - accum_arena_stats_u64(&sdstats->lstats[i].nrequests, + ctl_accum_arena_stats_u64(&sdstats->lstats[i].nrequests, &astats->lstats[i].nrequests); if (!destroyed) { sdstats->lstats[i].curlextents += @@ -848,6 +970,22 @@ MUTEX_PROF_ARENA_MUTEXES assert(astats->lstats[i].curlextents == 0); } } + + /* Merge extents stats. */ + for (i = 0; i < SC_NPSIZES; i++) { + accum_atomic_zu(&sdstats->estats[i].ndirty, + &astats->estats[i].ndirty); + accum_atomic_zu(&sdstats->estats[i].nmuzzy, + &astats->estats[i].nmuzzy); + accum_atomic_zu(&sdstats->estats[i].nretained, + &astats->estats[i].nretained); + accum_atomic_zu(&sdstats->estats[i].dirty_bytes, + &astats->estats[i].dirty_bytes); + accum_atomic_zu(&sdstats->estats[i].muzzy_bytes, + &astats->estats[i].muzzy_bytes); + accum_atomic_zu(&sdstats->estats[i].retained_bytes, + &astats->estats[i].retained_bytes); + } } } @@ -938,6 +1076,8 @@ ctl_refresh(tsdn_t *tsdn) { &ctl_sarena->astats->astats.base, ATOMIC_RELAXED) + atomic_load_zu(&ctl_sarena->astats->astats.internal, ATOMIC_RELAXED); + ctl_stats->metadata_thp = atomic_load_zu( + &ctl_sarena->astats->astats.metadata_thp, ATOMIC_RELAXED); ctl_stats->resident = atomic_load_zu( &ctl_sarena->astats->astats.resident, ATOMIC_RELAXED); ctl_stats->mapped = atomic_load_zu( @@ -1357,8 +1497,8 @@ label_return: \ #define CTL_RO_CGEN(c, n, v, t) \ static int \ -n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, \ - size_t *oldlenp, void *newp, size_t newlen) { \ +n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, \ + void *oldp, size_t *oldlenp, void *newp, size_t newlen) { \ int ret; \ t oldval; \ \ @@ -1400,8 +1540,8 @@ label_return: \ */ #define CTL_RO_NL_CGEN(c, n, v, t) \ static int \ -n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, \ - size_t *oldlenp, void *newp, size_t newlen) { \ +n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, \ + void *oldp, size_t *oldlenp, void *newp, size_t newlen) { \ int ret; \ t oldval; \ \ @@ -1419,8 +1559,8 @@ label_return: \ #define CTL_RO_NL_GEN(n, v, t) \ static int \ -n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, \ - size_t *oldlenp, void *newp, size_t newlen) { \ +n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, \ + void *oldp, size_t *oldlenp, void *newp, size_t newlen) { \ int ret; \ t oldval; \ \ @@ -1454,8 +1594,8 @@ label_return: \ #define CTL_RO_CONFIG_GEN(n, t) \ static int \ -n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, \ - size_t *oldlenp, void *newp, size_t newlen) { \ +n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, \ + void *oldp, size_t *oldlenp, void *newp, size_t newlen) { \ int ret; \ t oldval; \ \ @@ -1473,8 +1613,8 @@ label_return: \ CTL_RO_NL_GEN(version, JEMALLOC_VERSION, const char *) static int -epoch_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) { +epoch_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; UNUSED uint64_t newval; @@ -1492,8 +1632,9 @@ label_return: } static int -background_thread_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, - void *oldp, size_t *oldlenp, void *newp, size_t newlen) { +background_thread_ctl(tsd_t *tsd, const size_t *mib, + size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) { int ret; bool oldval; @@ -1523,22 +1664,74 @@ background_thread_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, background_thread_enabled_set(tsd_tsdn(tsd), newval); if (newval) { - if (!can_enable_background_thread) { - malloc_printf("<jemalloc>: Error in dlsym(" - "RTLD_NEXT, \"pthread_create\"). Cannot " - "enable background_thread\n"); + if (background_threads_enable(tsd)) { ret = EFAULT; goto label_return; } - if (background_threads_enable(tsd)) { + } else { + if (background_threads_disable(tsd)) { ret = EFAULT; goto label_return; } - } else { + } + } + ret = 0; +label_return: + malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock); + malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); + + return ret; +} + +static int +max_background_threads_ctl(tsd_t *tsd, const size_t *mib, + size_t miblen, void *oldp, size_t *oldlenp, void *newp, + size_t newlen) { + int ret; + size_t oldval; + + if (!have_background_thread) { + return ENOENT; + } + background_thread_ctl_init(tsd_tsdn(tsd)); + + malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); + malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock); + if (newp == NULL) { + oldval = max_background_threads; + READ(oldval, size_t); + } else { + if (newlen != sizeof(size_t)) { + ret = EINVAL; + goto label_return; + } + oldval = max_background_threads; + READ(oldval, size_t); + + size_t newval = *(size_t *)newp; + if (newval == oldval) { + ret = 0; + goto label_return; + } + if (newval > opt_max_background_threads) { + ret = EINVAL; + goto label_return; + } + + if (background_thread_enabled()) { + background_thread_enabled_set(tsd_tsdn(tsd), false); if (background_threads_disable(tsd)) { ret = EFAULT; goto label_return; } + max_background_threads = newval; + background_thread_enabled_set(tsd_tsdn(tsd), true); + if (background_threads_enable(tsd)) { + ret = EFAULT; + goto label_return; + } + } else { + max_background_threads = newval; } } ret = 0; @@ -1556,11 +1749,11 @@ CTL_RO_CONFIG_GEN(config_debug, bool) CTL_RO_CONFIG_GEN(config_fill, bool) CTL_RO_CONFIG_GEN(config_lazy_lock, bool) CTL_RO_CONFIG_GEN(config_malloc_conf, const char *) +CTL_RO_CONFIG_GEN(config_opt_safety_checks, bool) CTL_RO_CONFIG_GEN(config_prof, bool) CTL_RO_CONFIG_GEN(config_prof_libgcc, bool) CTL_RO_CONFIG_GEN(config_prof_libunwind, bool) CTL_RO_CONFIG_GEN(config_stats, bool) -CTL_RO_CONFIG_GEN(config_thp, bool) CTL_RO_CONFIG_GEN(config_utrace, bool) CTL_RO_CONFIG_GEN(config_xmalloc, bool) @@ -1568,12 +1761,17 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool) CTL_RO_NL_GEN(opt_abort, opt_abort, bool) CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool) +CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool) +CTL_RO_NL_GEN(opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp], + const char *) CTL_RO_NL_GEN(opt_retain, opt_retain, bool) CTL_RO_NL_GEN(opt_dss, opt_dss, const char *) CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned) CTL_RO_NL_GEN(opt_percpu_arena, percpu_arena_mode_names[opt_percpu_arena], const char *) +CTL_RO_NL_GEN(opt_oversize_threshold, opt_oversize_threshold, size_t) CTL_RO_NL_GEN(opt_background_thread, opt_background_thread, bool) +CTL_RO_NL_GEN(opt_max_background_threads, opt_max_background_threads, size_t) CTL_RO_NL_GEN(opt_dirty_decay_ms, opt_dirty_decay_ms, ssize_t) CTL_RO_NL_GEN(opt_muzzy_decay_ms, opt_muzzy_decay_ms, ssize_t) CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) @@ -1583,6 +1781,9 @@ CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool) CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool) CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool) CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool) +CTL_RO_NL_GEN(opt_thp, thp_mode_names[opt_thp], const char *) +CTL_RO_NL_GEN(opt_lg_extent_max_active_fit, opt_lg_extent_max_active_fit, + size_t) CTL_RO_NL_GEN(opt_lg_tcache_max, opt_lg_tcache_max, ssize_t) CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *) @@ -1599,8 +1800,8 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool) /******************************************************************************/ static int -thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) { +thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; arena_t *oldarena; unsigned newind, oldind; @@ -1664,8 +1865,9 @@ CTL_TSD_RO_NL_CGEN(config_stats, thread_deallocatedp, tsd_thread_deallocatedp_get, uint64_t *) static int -thread_tcache_enabled_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, - void *oldp, size_t *oldlenp, void *newp, size_t newlen) { +thread_tcache_enabled_ctl(tsd_t *tsd, const size_t *mib, + size_t miblen, void *oldp, size_t *oldlenp, void *newp, + size_t newlen) { int ret; bool oldval; @@ -1685,8 +1887,9 @@ label_return: } static int -thread_tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, - void *oldp, size_t *oldlenp, void *newp, size_t newlen) { +thread_tcache_flush_ctl(tsd_t *tsd, const size_t *mib, + size_t miblen, void *oldp, size_t *oldlenp, void *newp, + size_t newlen) { int ret; if (!tcache_available(tsd)) { @@ -1705,8 +1908,9 @@ label_return: } static int -thread_prof_name_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) { +thread_prof_name_ctl(tsd_t *tsd, const size_t *mib, + size_t miblen, void *oldp, size_t *oldlenp, void *newp, + size_t newlen) { int ret; if (!config_prof) { @@ -1736,8 +1940,9 @@ label_return: } static int -thread_prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) { +thread_prof_active_ctl(tsd_t *tsd, const size_t *mib, + size_t miblen, void *oldp, size_t *oldlenp, void *newp, + size_t newlen) { int ret; bool oldval; @@ -1766,8 +1971,8 @@ label_return: /******************************************************************************/ static int -tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) { +tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; unsigned tcache_ind; @@ -1784,8 +1989,8 @@ label_return: } static int -tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) { +tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; unsigned tcache_ind; @@ -1804,8 +2009,8 @@ label_return: } static int -tcache_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) { +tcache_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; unsigned tcache_ind; @@ -1953,9 +2158,8 @@ arena_reset_prepare_background_thread(tsd_t *tsd, unsigned arena_ind) { if (have_background_thread) { malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock); if (background_thread_enabled()) { - unsigned ind = arena_ind % ncpus; background_thread_info_t *info = - &background_thread_info[ind]; + background_thread_info_get(arena_ind); assert(info->state == background_thread_started); malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx); info->state = background_thread_paused; @@ -1968,9 +2172,8 @@ static void arena_reset_finish_background_thread(tsd_t *tsd, unsigned arena_ind) { if (have_background_thread) { if (background_thread_enabled()) { - unsigned ind = arena_ind % ncpus; background_thread_info_t *info = - &background_thread_info[ind]; + background_thread_info_get(arena_ind); assert(info->state == background_thread_paused); malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx); info->state = background_thread_started; @@ -2126,6 +2329,17 @@ arena_i_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen, ret = EINVAL; goto label_return; } + if (arena_is_huge(arena_ind) && *(ssize_t *)newp > 0) { + /* + * By default the huge arena purges eagerly. If it is + * set to non-zero decay time afterwards, background + * thread might be needed. + */ + if (background_thread_create(tsd, arena_ind)) { + ret = EFAULT; + goto label_return; + } + } if (dirty ? arena_dirty_decay_ms_set(tsd_tsdn(tsd), arena, *(ssize_t *)newp) : arena_muzzy_decay_ms_set(tsd_tsdn(tsd), arena, *(ssize_t *)newp)) { @@ -2162,20 +2376,41 @@ arena_i_extent_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); MIB_UNSIGNED(arena_ind, 1); - if (arena_ind < narenas_total_get() && (arena = - arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) { - if (newp != NULL) { - extent_hooks_t *old_extent_hooks; - extent_hooks_t *new_extent_hooks - JEMALLOC_CC_SILENCE_INIT(NULL); - WRITE(new_extent_hooks, extent_hooks_t *); - old_extent_hooks = extent_hooks_set(tsd, arena, - new_extent_hooks); + if (arena_ind < narenas_total_get()) { + extent_hooks_t *old_extent_hooks; + arena = arena_get(tsd_tsdn(tsd), arena_ind, false); + if (arena == NULL) { + if (arena_ind >= narenas_auto) { + ret = EFAULT; + goto label_return; + } + old_extent_hooks = + (extent_hooks_t *)&extent_hooks_default; READ(old_extent_hooks, extent_hooks_t *); + if (newp != NULL) { + /* Initialize a new arena as a side effect. */ + extent_hooks_t *new_extent_hooks + JEMALLOC_CC_SILENCE_INIT(NULL); + WRITE(new_extent_hooks, extent_hooks_t *); + arena = arena_init(tsd_tsdn(tsd), arena_ind, + new_extent_hooks); + if (arena == NULL) { + ret = EFAULT; + goto label_return; + } + } } else { - extent_hooks_t *old_extent_hooks = - extent_hooks_get(arena); - READ(old_extent_hooks, extent_hooks_t *); + if (newp != NULL) { + extent_hooks_t *new_extent_hooks + JEMALLOC_CC_SILENCE_INIT(NULL); + WRITE(new_extent_hooks, extent_hooks_t *); + old_extent_hooks = extent_hooks_set(tsd, arena, + new_extent_hooks); + READ(old_extent_hooks, extent_hooks_t *); + } else { + old_extent_hooks = extent_hooks_get(arena); + READ(old_extent_hooks, extent_hooks_t *); + } } } else { ret = EFAULT; @@ -2187,8 +2422,46 @@ label_return: return ret; } +static int +arena_i_retain_grow_limit_ctl(tsd_t *tsd, const size_t *mib, + size_t miblen, void *oldp, size_t *oldlenp, void *newp, + size_t newlen) { + int ret; + unsigned arena_ind; + arena_t *arena; + + if (!opt_retain) { + /* Only relevant when retain is enabled. */ + return ENOENT; + } + + malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); + MIB_UNSIGNED(arena_ind, 1); + if (arena_ind < narenas_total_get() && (arena = + arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) { + size_t old_limit, new_limit; + if (newp != NULL) { + WRITE(new_limit, size_t); + } + bool err = arena_retain_grow_limit_get_set(tsd, arena, + &old_limit, newp != NULL ? &new_limit : NULL); + if (!err) { + READ(old_limit, size_t); + ret = 0; + } else { + ret = EFAULT; + } + } else { + ret = EFAULT; + } +label_return: + malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); + return ret; +} + static const ctl_named_node_t * -arena_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) { +arena_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, + size_t i) { const ctl_named_node_t *ret; malloc_mutex_lock(tsdn, &ctl_mtx); @@ -2213,8 +2486,8 @@ label_return: /******************************************************************************/ static int -arenas_narenas_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) { +arenas_narenas_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; unsigned narenas; @@ -2234,8 +2507,9 @@ label_return: } static int -arenas_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen, - void *oldp, size_t *oldlenp, void *newp, size_t newlen, bool dirty) { +arenas_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib, + size_t miblen, void *oldp, size_t *oldlenp, void *newp, + size_t newlen, bool dirty) { int ret; if (oldp != NULL && oldlenp != NULL) { @@ -2248,7 +2522,7 @@ arenas_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen, ret = EINVAL; goto label_return; } - if (dirty ? arena_dirty_decay_ms_default_set(*(ssize_t *)newp) + if (dirty ? arena_dirty_decay_ms_default_set(*(ssize_t *)newp) : arena_muzzy_decay_ms_default_set(*(ssize_t *)newp)) { ret = EFAULT; goto label_return; @@ -2277,34 +2551,36 @@ arenas_muzzy_decay_ms_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t) CTL_RO_NL_GEN(arenas_page, PAGE, size_t) CTL_RO_NL_GEN(arenas_tcache_max, tcache_maxclass, size_t) -CTL_RO_NL_GEN(arenas_nbins, NBINS, unsigned) +CTL_RO_NL_GEN(arenas_nbins, SC_NBINS, unsigned) CTL_RO_NL_GEN(arenas_nhbins, nhbins, unsigned) -CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t) -CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t) -CTL_RO_NL_GEN(arenas_bin_i_slab_size, arena_bin_info[mib[2]].slab_size, size_t) +CTL_RO_NL_GEN(arenas_bin_i_size, bin_infos[mib[2]].reg_size, size_t) +CTL_RO_NL_GEN(arenas_bin_i_nregs, bin_infos[mib[2]].nregs, uint32_t) +CTL_RO_NL_GEN(arenas_bin_i_slab_size, bin_infos[mib[2]].slab_size, size_t) +CTL_RO_NL_GEN(arenas_bin_i_nshards, bin_infos[mib[2]].n_shards, uint32_t) static const ctl_named_node_t * -arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) { - if (i > NBINS) { +arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib, + size_t miblen, size_t i) { + if (i > SC_NBINS) { return NULL; } return super_arenas_bin_i_node; } -CTL_RO_NL_GEN(arenas_nlextents, NSIZES - NBINS, unsigned) -CTL_RO_NL_GEN(arenas_lextent_i_size, sz_index2size(NBINS+(szind_t)mib[2]), +CTL_RO_NL_GEN(arenas_nlextents, SC_NSIZES - SC_NBINS, unsigned) +CTL_RO_NL_GEN(arenas_lextent_i_size, sz_index2size(SC_NBINS+(szind_t)mib[2]), size_t) static const ctl_named_node_t * -arenas_lextent_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, - size_t i) { - if (i > NSIZES - NBINS) { +arenas_lextent_i_index(tsdn_t *tsdn, const size_t *mib, + size_t miblen, size_t i) { + if (i > SC_NSIZES - SC_NBINS) { return NULL; } return super_arenas_lextent_i_node; } static int -arenas_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) { +arenas_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; extent_hooks_t *extent_hooks; unsigned arena_ind; @@ -2325,11 +2601,43 @@ label_return: return ret; } +static int +arenas_lookup_ctl(tsd_t *tsd, const size_t *mib, + size_t miblen, void *oldp, size_t *oldlenp, void *newp, + size_t newlen) { + int ret; + unsigned arena_ind; + void *ptr; + extent_t *extent; + arena_t *arena; + + ptr = NULL; + ret = EINVAL; + malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); + WRITE(ptr, void *); + extent = iealloc(tsd_tsdn(tsd), ptr); + if (extent == NULL) + goto label_return; + + arena = extent_arena_get(extent); + if (arena == NULL) + goto label_return; + + arena_ind = arena_ind_get(arena); + READ(arena_ind, unsigned); + + ret = 0; +label_return: + malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); + return ret; +} + /******************************************************************************/ static int -prof_thread_active_init_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, - void *oldp, size_t *oldlenp, void *newp, size_t newlen) { +prof_thread_active_init_ctl(tsd_t *tsd, const size_t *mib, + size_t miblen, void *oldp, size_t *oldlenp, void *newp, + size_t newlen) { int ret; bool oldval; @@ -2355,8 +2663,8 @@ label_return: } static int -prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) { +prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; bool oldval; @@ -2381,8 +2689,8 @@ label_return: } static int -prof_dump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) { +prof_dump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; const char *filename = NULL; @@ -2404,8 +2712,8 @@ label_return: } static int -prof_gdump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) { +prof_gdump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; bool oldval; @@ -2430,8 +2738,8 @@ label_return: } static int -prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) { +prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; size_t lg_sample = lg_prof_sample; @@ -2455,11 +2763,50 @@ label_return: CTL_RO_NL_CGEN(config_prof, prof_interval, prof_interval, uint64_t) CTL_RO_NL_CGEN(config_prof, lg_prof_sample, lg_prof_sample, size_t) +static int +prof_log_start_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) { + int ret; + + const char *filename = NULL; + + if (!config_prof) { + return ENOENT; + } + + WRITEONLY(); + WRITE(filename, const char *); + + if (prof_log_start(tsd_tsdn(tsd), filename)) { + ret = EFAULT; + goto label_return; + } + + ret = 0; +label_return: + return ret; +} + +static int +prof_log_stop_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) { + if (!config_prof) { + return ENOENT; + } + + if (prof_log_stop(tsd_tsdn(tsd))) { + return EFAULT; + } + + return 0; +} + /******************************************************************************/ CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats->allocated, size_t) CTL_RO_CGEN(config_stats, stats_active, ctl_stats->active, size_t) CTL_RO_CGEN(config_stats, stats_metadata, ctl_stats->metadata, size_t) +CTL_RO_CGEN(config_stats, stats_metadata_thp, ctl_stats->metadata_thp, size_t) CTL_RO_CGEN(config_stats, stats_resident, ctl_stats->resident, size_t) CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats->mapped, size_t) CTL_RO_CGEN(config_stats, stats_retained, ctl_stats->retained, size_t) @@ -2488,26 +2835,30 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_mapped, CTL_RO_CGEN(config_stats, stats_arenas_i_retained, atomic_load_zu(&arenas_i(mib[2])->astats->astats.retained, ATOMIC_RELAXED), size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_extent_avail, + atomic_load_zu(&arenas_i(mib[2])->astats->astats.extent_avail, + ATOMIC_RELAXED), + size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_npurge, - arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.decay_dirty.npurge), - uint64_t) + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->astats.decay_dirty.npurge), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_nmadvise, - arena_stats_read_u64( + ctl_arena_stats_read_u64( &arenas_i(mib[2])->astats->astats.decay_dirty.nmadvise), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_purged, - arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.decay_dirty.purged), - uint64_t) + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->astats.decay_dirty.purged), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_npurge, - arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.decay_muzzy.npurge), - uint64_t) + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->astats.decay_muzzy.npurge), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_nmadvise, - arena_stats_read_u64( + ctl_arena_stats_read_u64( &arenas_i(mib[2])->astats->astats.decay_muzzy.nmadvise), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_purged, - arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.decay_muzzy.purged), - uint64_t) + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->astats.decay_muzzy.purged), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_base, atomic_load_zu(&arenas_i(mib[2])->astats->astats.base, ATOMIC_RELAXED), @@ -2515,12 +2866,18 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_base, CTL_RO_CGEN(config_stats, stats_arenas_i_internal, atomic_load_zu(&arenas_i(mib[2])->astats->astats.internal, ATOMIC_RELAXED), size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_thp, + atomic_load_zu(&arenas_i(mib[2])->astats->astats.metadata_thp, + ATOMIC_RELAXED), size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_tcache_bytes, atomic_load_zu(&arenas_i(mib[2])->astats->astats.tcache_bytes, ATOMIC_RELAXED), size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_resident, atomic_load_zu(&arenas_i(mib[2])->astats->astats.resident, ATOMIC_RELAXED), size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_abandoned_vm, + atomic_load_zu(&arenas_i(mib[2])->astats->astats.abandoned_vm, + ATOMIC_RELAXED), size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated, arenas_i(mib[2])->astats->allocated_small, size_t) @@ -2530,18 +2887,32 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_small_ndalloc, arenas_i(mib[2])->astats->ndalloc_small, uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_small_nrequests, arenas_i(mib[2])->astats->nrequests_small, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_small_nfills, + arenas_i(mib[2])->astats->nfills_small, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_small_nflushes, + arenas_i(mib[2])->astats->nflushes_small, uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_large_allocated, atomic_load_zu(&arenas_i(mib[2])->astats->astats.allocated_large, ATOMIC_RELAXED), size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_large_nmalloc, - arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.nmalloc_large), - uint64_t) + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc, - arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.ndalloc_large), - uint64_t) + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->astats.ndalloc_large), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests, - arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.nmalloc_large), - uint64_t) /* Intentional. */ + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->astats.nrequests_large), uint64_t) +/* + * Note: "nmalloc_large" here instead of "nfills" in the read. This is + * intentional (large has no batch fill). + */ +CTL_RO_CGEN(config_stats, stats_arenas_i_large_nfills, + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_large_nflushes, + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->astats.nflushes_large), uint64_t) /* Lock profiling related APIs below. */ #define RO_MUTEX_CTL_GEN(n, l) \ @@ -2580,8 +2951,9 @@ RO_MUTEX_CTL_GEN(arenas_i_bins_j_mutex, /* Resets all mutex stats, including global, arena and bin mutexes. */ static int -stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, - void *oldp, size_t *oldlenp, void *newp, size_t newlen) { +stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib, + size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) { if (!config_stats) { return ENOENT; } @@ -2621,9 +2993,11 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, MUTEX_PROF_RESET(arena->tcache_ql_mtx); MUTEX_PROF_RESET(arena->base->mtx); - for (szind_t i = 0; i < NBINS; i++) { - arena_bin_t *bin = &arena->bins[i]; - MUTEX_PROF_RESET(bin->lock); + for (szind_t i = 0; i < SC_NBINS; i++) { + for (unsigned j = 0; j < bin_infos[i].n_shards; j++) { + bin_t *bin = &arena->bins[i].bin_shards[j]; + MUTEX_PROF_RESET(bin->lock); + } } } #undef MUTEX_PROF_RESET @@ -2648,45 +3022,90 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreslabs, arenas_i(mib[2])->astats->bstats[mib[4]].reslabs, uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curslabs, arenas_i(mib[2])->astats->bstats[mib[4]].curslabs, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nonfull_slabs, + arenas_i(mib[2])->astats->bstats[mib[4]].nonfull_slabs, size_t) static const ctl_named_node_t * -stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, - size_t j) { - if (j > NBINS) { +stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, + size_t miblen, size_t j) { + if (j > SC_NBINS) { return NULL; } return super_stats_arenas_i_bins_j_node; } CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nmalloc, - arena_stats_read_u64(&arenas_i(mib[2])->astats->lstats[mib[4]].nmalloc), - uint64_t) + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->lstats[mib[4]].nmalloc), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_ndalloc, - arena_stats_read_u64(&arenas_i(mib[2])->astats->lstats[mib[4]].ndalloc), - uint64_t) + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->lstats[mib[4]].ndalloc), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nrequests, - arena_stats_read_u64(&arenas_i(mib[2])->astats->lstats[mib[4]].nrequests), - uint64_t) + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->lstats[mib[4]].nrequests), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_curlextents, arenas_i(mib[2])->astats->lstats[mib[4]].curlextents, size_t) static const ctl_named_node_t * -stats_arenas_i_lextents_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, - size_t j) { - if (j > NSIZES - NBINS) { +stats_arenas_i_lextents_j_index(tsdn_t *tsdn, const size_t *mib, + size_t miblen, size_t j) { + if (j > SC_NSIZES - SC_NBINS) { return NULL; } return super_stats_arenas_i_lextents_j_node; } +CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_ndirty, + atomic_load_zu( + &arenas_i(mib[2])->astats->estats[mib[4]].ndirty, + ATOMIC_RELAXED), size_t); +CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_nmuzzy, + atomic_load_zu( + &arenas_i(mib[2])->astats->estats[mib[4]].nmuzzy, + ATOMIC_RELAXED), size_t); +CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_nretained, + atomic_load_zu( + &arenas_i(mib[2])->astats->estats[mib[4]].nretained, + ATOMIC_RELAXED), size_t); +CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_dirty_bytes, + atomic_load_zu( + &arenas_i(mib[2])->astats->estats[mib[4]].dirty_bytes, + ATOMIC_RELAXED), size_t); +CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_muzzy_bytes, + atomic_load_zu( + &arenas_i(mib[2])->astats->estats[mib[4]].muzzy_bytes, + ATOMIC_RELAXED), size_t); +CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_retained_bytes, + atomic_load_zu( + &arenas_i(mib[2])->astats->estats[mib[4]].retained_bytes, + ATOMIC_RELAXED), size_t); + +static const ctl_named_node_t * +stats_arenas_i_extents_j_index(tsdn_t *tsdn, const size_t *mib, + size_t miblen, size_t j) { + if (j >= SC_NPSIZES) { + return NULL; + } + return super_stats_arenas_i_extents_j_node; +} + +static bool +ctl_arenas_i_verify(size_t i) { + size_t a = arenas_i2a_impl(i, true, true); + if (a == UINT_MAX || !ctl_arenas->arenas[a]->initialized) { + return true; + } + + return false; +} + static const ctl_named_node_t * -stats_arenas_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) { +stats_arenas_i_index(tsdn_t *tsdn, const size_t *mib, + size_t miblen, size_t i) { const ctl_named_node_t *ret; - size_t a; malloc_mutex_lock(tsdn, &ctl_mtx); - a = arenas_i2a_impl(i, true, true); - if (a == UINT_MAX || !ctl_arenas->arenas[a]->initialized) { + if (ctl_arenas_i_verify(i)) { ret = NULL; goto label_return; } @@ -2696,3 +3115,321 @@ label_return: malloc_mutex_unlock(tsdn, &ctl_mtx); return ret; } + +static int +experimental_hooks_install_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen) { + int ret; + if (oldp == NULL || oldlenp == NULL|| newp == NULL) { + ret = EINVAL; + goto label_return; + } + /* + * Note: this is a *private* struct. This is an experimental interface; + * forcing the user to know the jemalloc internals well enough to + * extract the ABI hopefully ensures nobody gets too comfortable with + * this API, which can change at a moment's notice. + */ + hooks_t hooks; + WRITE(hooks, hooks_t); + void *handle = hook_install(tsd_tsdn(tsd), &hooks); + if (handle == NULL) { + ret = EAGAIN; + goto label_return; + } + READ(handle, void *); + + ret = 0; +label_return: + return ret; +} + +static int +experimental_hooks_remove_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen) { + int ret; + WRITEONLY(); + void *handle = NULL; + WRITE(handle, void *); + if (handle == NULL) { + ret = EINVAL; + goto label_return; + } + hook_remove(tsd_tsdn(tsd), handle); + ret = 0; +label_return: + return ret; +} + +/* + * Output six memory utilization entries for an input pointer, the first one of + * type (void *) and the remaining five of type size_t, describing the following + * (in the same order): + * + * (a) memory address of the extent a potential reallocation would go into, + * == the five fields below describe about the extent the pointer resides in == + * (b) number of free regions in the extent, + * (c) number of regions in the extent, + * (d) size of the extent in terms of bytes, + * (e) total number of free regions in the bin the extent belongs to, and + * (f) total number of regions in the bin the extent belongs to. + * + * Note that "(e)" and "(f)" are only available when stats are enabled; + * otherwise their values are undefined. + * + * This API is mainly intended for small class allocations, where extents are + * used as slab. + * + * In case of large class allocations, "(a)" will be NULL, and "(e)" and "(f)" + * will be zero (if stats are enabled; otherwise undefined). The other three + * fields will be properly set though the values are trivial: "(b)" will be 0, + * "(c)" will be 1, and "(d)" will be the usable size. + * + * The input pointer and size are respectively passed in by newp and newlen, + * and the output fields and size are respectively oldp and *oldlenp. + * + * It can be beneficial to define the following macros to make it easier to + * access the output: + * + * #define SLABCUR_READ(out) (*(void **)out) + * #define COUNTS(out) ((size_t *)((void **)out + 1)) + * #define NFREE_READ(out) COUNTS(out)[0] + * #define NREGS_READ(out) COUNTS(out)[1] + * #define SIZE_READ(out) COUNTS(out)[2] + * #define BIN_NFREE_READ(out) COUNTS(out)[3] + * #define BIN_NREGS_READ(out) COUNTS(out)[4] + * + * and then write e.g. NFREE_READ(oldp) to fetch the output. See the unit test + * test_query in test/unit/extent_util.c for an example. + * + * For a typical defragmentation workflow making use of this API for + * understanding the fragmentation level, please refer to the comment for + * experimental_utilization_batch_query_ctl. + * + * It's up to the application how to determine the significance of + * fragmentation relying on the outputs returned. Possible choices are: + * + * (a) if extent utilization ratio is below certain threshold, + * (b) if extent memory consumption is above certain threshold, + * (c) if extent utilization ratio is significantly below bin utilization ratio, + * (d) if input pointer deviates a lot from potential reallocation address, or + * (e) some selection/combination of the above. + * + * The caller needs to make sure that the input/output arguments are valid, + * in particular, that the size of the output is correct, i.e.: + * + * *oldlenp = sizeof(void *) + sizeof(size_t) * 5 + * + * Otherwise, the function immediately returns EINVAL without touching anything. + * + * In the rare case where there's no associated extent found for the input + * pointer, the function zeros out all output fields and return. Please refer + * to the comment for experimental_utilization_batch_query_ctl to understand the + * motivation from C++. + */ +static int +experimental_utilization_query_ctl(tsd_t *tsd, const size_t *mib, + size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { + int ret; + + assert(sizeof(extent_util_stats_verbose_t) + == sizeof(void *) + sizeof(size_t) * 5); + + if (oldp == NULL || oldlenp == NULL + || *oldlenp != sizeof(extent_util_stats_verbose_t) + || newp == NULL) { + ret = EINVAL; + goto label_return; + } + + void *ptr = NULL; + WRITE(ptr, void *); + extent_util_stats_verbose_t *util_stats + = (extent_util_stats_verbose_t *)oldp; + extent_util_stats_verbose_get(tsd_tsdn(tsd), ptr, + &util_stats->nfree, &util_stats->nregs, &util_stats->size, + &util_stats->bin_nfree, &util_stats->bin_nregs, + &util_stats->slabcur_addr); + ret = 0; + +label_return: + return ret; +} + +/* + * Given an input array of pointers, output three memory utilization entries of + * type size_t for each input pointer about the extent it resides in: + * + * (a) number of free regions in the extent, + * (b) number of regions in the extent, and + * (c) size of the extent in terms of bytes. + * + * This API is mainly intended for small class allocations, where extents are + * used as slab. In case of large class allocations, the outputs are trivial: + * "(a)" will be 0, "(b)" will be 1, and "(c)" will be the usable size. + * + * Note that multiple input pointers may reside on a same extent so the output + * fields may contain duplicates. + * + * The format of the input/output looks like: + * + * input[0]: 1st_pointer_to_query | output[0]: 1st_extent_n_free_regions + * | output[1]: 1st_extent_n_regions + * | output[2]: 1st_extent_size + * input[1]: 2nd_pointer_to_query | output[3]: 2nd_extent_n_free_regions + * | output[4]: 2nd_extent_n_regions + * | output[5]: 2nd_extent_size + * ... | ... + * + * The input array and size are respectively passed in by newp and newlen, and + * the output array and size are respectively oldp and *oldlenp. + * + * It can be beneficial to define the following macros to make it easier to + * access the output: + * + * #define NFREE_READ(out, i) out[(i) * 3] + * #define NREGS_READ(out, i) out[(i) * 3 + 1] + * #define SIZE_READ(out, i) out[(i) * 3 + 2] + * + * and then write e.g. NFREE_READ(oldp, i) to fetch the output. See the unit + * test test_batch in test/unit/extent_util.c for a concrete example. + * + * A typical workflow would be composed of the following steps: + * + * (1) flush tcache: mallctl("thread.tcache.flush", ...) + * (2) initialize input array of pointers to query fragmentation + * (3) allocate output array to hold utilization statistics + * (4) query utilization: mallctl("experimental.utilization.batch_query", ...) + * (5) (optional) decide if it's worthwhile to defragment; otherwise stop here + * (6) disable tcache: mallctl("thread.tcache.enabled", ...) + * (7) defragment allocations with significant fragmentation, e.g.: + * for each allocation { + * if it's fragmented { + * malloc(...); + * memcpy(...); + * free(...); + * } + * } + * (8) enable tcache: mallctl("thread.tcache.enabled", ...) + * + * The application can determine the significance of fragmentation themselves + * relying on the statistics returned, both at the overall level i.e. step "(5)" + * and at individual allocation level i.e. within step "(7)". Possible choices + * are: + * + * (a) whether memory utilization ratio is below certain threshold, + * (b) whether memory consumption is above certain threshold, or + * (c) some combination of the two. + * + * The caller needs to make sure that the input/output arrays are valid and + * their sizes are proper as well as matched, meaning: + * + * (a) newlen = n_pointers * sizeof(const void *) + * (b) *oldlenp = n_pointers * sizeof(size_t) * 3 + * (c) n_pointers > 0 + * + * Otherwise, the function immediately returns EINVAL without touching anything. + * + * In the rare case where there's no associated extent found for some pointers, + * rather than immediately terminating the computation and raising an error, + * the function simply zeros out the corresponding output fields and continues + * the computation until all input pointers are handled. The motivations of + * such a design are as follows: + * + * (a) The function always either processes nothing or processes everything, and + * never leaves the output half touched and half untouched. + * + * (b) It facilitates usage needs especially common in C++. A vast variety of + * C++ objects are instantiated with multiple dynamic memory allocations. For + * example, std::string and std::vector typically use at least two allocations, + * one for the metadata and one for the actual content. Other types may use + * even more allocations. When inquiring about utilization statistics, the + * caller often wants to examine into all such allocations, especially internal + * one(s), rather than just the topmost one. The issue comes when some + * implementations do certain optimizations to reduce/aggregate some internal + * allocations, e.g. putting short strings directly into the metadata, and such + * decisions are not known to the caller. Therefore, we permit pointers to + * memory usages that may not be returned by previous malloc calls, and we + * provide the caller a convenient way to identify such cases. + */ +static int +experimental_utilization_batch_query_ctl(tsd_t *tsd, const size_t *mib, + size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { + int ret; + + assert(sizeof(extent_util_stats_t) == sizeof(size_t) * 3); + + const size_t len = newlen / sizeof(const void *); + if (oldp == NULL || oldlenp == NULL || newp == NULL || newlen == 0 + || newlen != len * sizeof(const void *) + || *oldlenp != len * sizeof(extent_util_stats_t)) { + ret = EINVAL; + goto label_return; + } + + void **ptrs = (void **)newp; + extent_util_stats_t *util_stats = (extent_util_stats_t *)oldp; + size_t i; + for (i = 0; i < len; ++i) { + extent_util_stats_get(tsd_tsdn(tsd), ptrs[i], + &util_stats[i].nfree, &util_stats[i].nregs, + &util_stats[i].size); + } + ret = 0; + +label_return: + return ret; +} + +static const ctl_named_node_t * +experimental_arenas_i_index(tsdn_t *tsdn, const size_t *mib, + size_t miblen, size_t i) { + const ctl_named_node_t *ret; + + malloc_mutex_lock(tsdn, &ctl_mtx); + if (ctl_arenas_i_verify(i)) { + ret = NULL; + goto label_return; + } + ret = super_experimental_arenas_i_node; +label_return: + malloc_mutex_unlock(tsdn, &ctl_mtx); + return ret; +} + +static int +experimental_arenas_i_pactivep_ctl(tsd_t *tsd, const size_t *mib, + size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { + if (!config_stats) { + return ENOENT; + } + if (oldp == NULL || oldlenp == NULL || *oldlenp != sizeof(size_t *)) { + return EINVAL; + } + + unsigned arena_ind; + arena_t *arena; + int ret; + size_t *pactivep; + + malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); + READONLY(); + MIB_UNSIGNED(arena_ind, 2); + if (arena_ind < narenas_total_get() && (arena = + arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) { +#if defined(JEMALLOC_GCC_ATOMIC_ATOMICS) || \ + defined(JEMALLOC_GCC_SYNC_ATOMICS) || defined(_MSC_VER) + /* Expose the underlying counter for fast read. */ + pactivep = (size_t *)&(arena->nactive.repr); + READ(pactivep, size_t *); + ret = 0; +#else + ret = EFAULT; +#endif + } else { + ret = EFAULT; + } +label_return: + malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); + return ret; +} diff --git a/deps/jemalloc/src/div.c b/deps/jemalloc/src/div.c new file mode 100644 index 0000000000..808892a133 --- /dev/null +++ b/deps/jemalloc/src/div.c @@ -0,0 +1,55 @@ +#include "jemalloc/internal/jemalloc_preamble.h" + +#include "jemalloc/internal/div.h" + +#include "jemalloc/internal/assert.h" + +/* + * Suppose we have n = q * d, all integers. We know n and d, and want q = n / d. + * + * For any k, we have (here, all division is exact; not C-style rounding): + * floor(ceil(2^k / d) * n / 2^k) = floor((2^k + r) / d * n / 2^k), where + * r = (-2^k) mod d. + * + * Expanding this out: + * ... = floor(2^k / d * n / 2^k + r / d * n / 2^k) + * = floor(n / d + (r / d) * (n / 2^k)). + * + * The fractional part of n / d is 0 (because of the assumption that d divides n + * exactly), so we have: + * ... = n / d + floor((r / d) * (n / 2^k)) + * + * So that our initial expression is equal to the quantity we seek, so long as + * (r / d) * (n / 2^k) < 1. + * + * r is a remainder mod d, so r < d and r / d < 1 always. We can make + * n / 2 ^ k < 1 by setting k = 32. This gets us a value of magic that works. + */ + +void +div_init(div_info_t *div_info, size_t d) { + /* Nonsensical. */ + assert(d != 0); + /* + * This would make the value of magic too high to fit into a uint32_t + * (we would want magic = 2^32 exactly). This would mess with code gen + * on 32-bit machines. + */ + assert(d != 1); + + uint64_t two_to_k = ((uint64_t)1 << 32); + uint32_t magic = (uint32_t)(two_to_k / d); + + /* + * We want magic = ceil(2^k / d), but C gives us floor. We have to + * increment it unless the result was exact (i.e. unless d is a power of + * two). + */ + if (two_to_k % d != 0) { + magic++; + } + div_info->magic = magic; +#ifdef JEMALLOC_DEBUG + div_info->d = d; +#endif +} diff --git a/deps/jemalloc/src/extent.c b/deps/jemalloc/src/extent.c index fa45c84d34..9237f903dc 100644 --- a/deps/jemalloc/src/extent.c +++ b/deps/jemalloc/src/extent.c @@ -17,8 +17,10 @@ rtree_t extents_rtree; /* Keyed by the address of the extent_t being protected. */ mutex_pool_t extent_mutex_pool; +size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT; + static const bitmap_info_t extents_bitmap_info = - BITMAP_INFO_INITIALIZER(NPSIZES+1); + BITMAP_INFO_INITIALIZER(SC_NPSIZES+1); static void *extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit, @@ -48,20 +50,16 @@ static bool extent_purge_forced_default(extent_hooks_t *extent_hooks, static bool extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset, size_t length, bool growing_retained); -#ifdef JEMALLOC_MAPS_COALESCE static bool extent_split_default(extent_hooks_t *extent_hooks, void *addr, size_t size, size_t size_a, size_t size_b, bool committed, unsigned arena_ind); -#endif static extent_t *extent_split_impl(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a, szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b, bool growing_retained); -#ifdef JEMALLOC_MAPS_COALESCE static bool extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a, void *addr_b, size_t size_b, bool committed, unsigned arena_ind); -#endif static bool extent_merge_impl(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b, bool growing_retained); @@ -86,11 +84,9 @@ const extent_hooks_t extent_hooks_default = { , NULL #endif -#ifdef JEMALLOC_MAPS_COALESCE , extent_split_default, extent_merge_default -#endif }; /* Used exclusively for gdump triggering. */ @@ -117,9 +113,13 @@ static void extent_record(tsdn_t *tsdn, arena_t *arena, /******************************************************************************/ -rb_gen(UNUSED, extent_avail_, extent_tree_t, extent_t, rb_link, +#define ATTR_NONE /* does nothing */ + +ph_gen(ATTR_NONE, extent_avail_, extent_tree_t, extent_t, ph_link, extent_esnead_comp) +#undef ATTR_NONE + typedef enum { lock_result_success, lock_result_failure, @@ -128,13 +128,16 @@ typedef enum { static lock_result_t extent_rtree_leaf_elm_try_lock(tsdn_t *tsdn, rtree_leaf_elm_t *elm, - extent_t **result) { + extent_t **result, bool inactive_only) { extent_t *extent1 = rtree_leaf_elm_extent_read(tsdn, &extents_rtree, elm, true); - if (extent1 == NULL) { + /* Slab implies active extents and should be skipped. */ + if (extent1 == NULL || (inactive_only && rtree_leaf_elm_slab_read(tsdn, + &extents_rtree, elm, true))) { return lock_result_no_extent; } + /* * It's possible that the extent changed out from under us, and with it * the leaf->extent mapping. We have to recheck while holding the lock. @@ -157,7 +160,8 @@ extent_rtree_leaf_elm_try_lock(tsdn_t *tsdn, rtree_leaf_elm_t *elm, * address, and NULL otherwise. */ static extent_t * -extent_lock_from_addr(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, void *addr) { +extent_lock_from_addr(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, void *addr, + bool inactive_only) { extent_t *ret = NULL; rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)addr, false, false); @@ -166,7 +170,8 @@ extent_lock_from_addr(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, void *addr) { } lock_result_t lock_result; do { - lock_result = extent_rtree_leaf_elm_try_lock(tsdn, elm, &ret); + lock_result = extent_rtree_leaf_elm_try_lock(tsdn, elm, &ret, + inactive_only); } while (lock_result == lock_result_failure); return ret; } @@ -180,6 +185,7 @@ extent_alloc(tsdn_t *tsdn, arena_t *arena) { return base_alloc_extent(tsdn, arena->base); } extent_avail_remove(&arena->extent_avail, extent); + atomic_fetch_sub_zu(&arena->extent_avail_cnt, 1, ATOMIC_RELAXED); malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx); return extent; } @@ -188,6 +194,7 @@ void extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent) { malloc_mutex_lock(tsdn, &arena->extent_avail_mtx); extent_avail_insert(&arena->extent_avail, extent); + atomic_fetch_add_zu(&arena->extent_avail_cnt, 1, ATOMIC_RELAXED); malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx); } @@ -253,7 +260,7 @@ extent_size_quantize_ceil(size_t size) { size_t ret; assert(size > 0); - assert(size - sz_large_pad <= LARGE_MAXCLASS); + assert(size - sz_large_pad <= SC_LARGE_MAXCLASS); assert((size & PAGE_MASK) == 0); ret = extent_size_quantize_floor(size); @@ -282,7 +289,7 @@ extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state, malloc_mutex_rank_exclusive)) { return true; } - for (unsigned i = 0; i < NPSIZES+1; i++) { + for (unsigned i = 0; i < SC_NPSIZES + 1; i++) { extent_heap_new(&extents->heaps[i]); } bitmap_init(extents->bitmap, &extents_bitmap_info, true); @@ -303,9 +310,34 @@ extents_npages_get(extents_t *extents) { return atomic_load_zu(&extents->npages, ATOMIC_RELAXED); } +size_t +extents_nextents_get(extents_t *extents, pszind_t pind) { + return atomic_load_zu(&extents->nextents[pind], ATOMIC_RELAXED); +} + +size_t +extents_nbytes_get(extents_t *extents, pszind_t pind) { + return atomic_load_zu(&extents->nbytes[pind], ATOMIC_RELAXED); +} + +static void +extents_stats_add(extents_t *extent, pszind_t pind, size_t sz) { + size_t cur = atomic_load_zu(&extent->nextents[pind], ATOMIC_RELAXED); + atomic_store_zu(&extent->nextents[pind], cur + 1, ATOMIC_RELAXED); + cur = atomic_load_zu(&extent->nbytes[pind], ATOMIC_RELAXED); + atomic_store_zu(&extent->nbytes[pind], cur + sz, ATOMIC_RELAXED); +} + +static void +extents_stats_sub(extents_t *extent, pszind_t pind, size_t sz) { + size_t cur = atomic_load_zu(&extent->nextents[pind], ATOMIC_RELAXED); + atomic_store_zu(&extent->nextents[pind], cur - 1, ATOMIC_RELAXED); + cur = atomic_load_zu(&extent->nbytes[pind], ATOMIC_RELAXED); + atomic_store_zu(&extent->nbytes[pind], cur - sz, ATOMIC_RELAXED); +} + static void -extents_insert_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent, - bool preserve_lru) { +extents_insert_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent) { malloc_mutex_assert_owner(tsdn, &extents->mtx); assert(extent_state_get(extent) == extents->state); @@ -317,9 +349,12 @@ extents_insert_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent, (size_t)pind); } extent_heap_insert(&extents->heaps[pind], extent); - if (!preserve_lru) { - extent_list_append(&extents->lru, extent); + + if (config_stats) { + extents_stats_add(extents, pind, size); } + + extent_list_append(&extents->lru, extent); size_t npages = size >> LG_PAGE; /* * All modifications to npages hold the mutex (as asserted above), so we @@ -333,8 +368,7 @@ extents_insert_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent, } static void -extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent, - bool preserve_lru) { +extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent) { malloc_mutex_assert_owner(tsdn, &extents->mtx); assert(extent_state_get(extent) == extents->state); @@ -342,13 +376,16 @@ extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent, size_t psz = extent_size_quantize_floor(size); pszind_t pind = sz_psz2ind(psz); extent_heap_remove(&extents->heaps[pind], extent); + + if (config_stats) { + extents_stats_sub(extents, pind, size); + } + if (extent_heap_empty(&extents->heaps[pind])) { bitmap_set(extents->bitmap, &extents_bitmap_info, (size_t)pind); } - if (!preserve_lru) { - extent_list_remove(&extents->lru, extent); - } + extent_list_remove(&extents->lru, extent); size_t npages = size >> LG_PAGE; /* * As in extents_insert_locked, we hold extents->mtx and so don't need @@ -361,18 +398,38 @@ extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent, cur_extents_npages - (size >> LG_PAGE), ATOMIC_RELAXED); } -/* Do any-best-fit extent selection, i.e. select any extent that best fits. */ +/* + * Find an extent with size [min_size, max_size) to satisfy the alignment + * requirement. For each size, try only the first extent in the heap. + */ static extent_t * -extents_best_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents, - size_t size) { - pszind_t pind = sz_psz2ind(extent_size_quantize_ceil(size)); - pszind_t i = (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info, - (size_t)pind); - if (i < NPSIZES+1) { +extents_fit_alignment(extents_t *extents, size_t min_size, size_t max_size, + size_t alignment) { + pszind_t pind = sz_psz2ind(extent_size_quantize_ceil(min_size)); + pszind_t pind_max = sz_psz2ind(extent_size_quantize_ceil(max_size)); + + for (pszind_t i = (pszind_t)bitmap_ffu(extents->bitmap, + &extents_bitmap_info, (size_t)pind); i < pind_max; i = + (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info, + (size_t)i+1)) { + assert(i < SC_NPSIZES); assert(!extent_heap_empty(&extents->heaps[i])); - extent_t *extent = extent_heap_any(&extents->heaps[i]); - assert(extent_size_get(extent) >= size); - return extent; + extent_t *extent = extent_heap_first(&extents->heaps[i]); + uintptr_t base = (uintptr_t)extent_base_get(extent); + size_t candidate_size = extent_size_get(extent); + assert(candidate_size >= min_size); + + uintptr_t next_align = ALIGNMENT_CEILING((uintptr_t)base, + PAGE_CEILING(alignment)); + if (base > next_align || base + candidate_size <= next_align) { + /* Overflow or not crossing the next alignment. */ + continue; + } + + size_t leadsize = next_align - base; + if (candidate_size - leadsize >= min_size) { + return extent; + } } return NULL; @@ -388,39 +445,75 @@ extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents, extent_t *ret = NULL; pszind_t pind = sz_psz2ind(extent_size_quantize_ceil(size)); + + if (!maps_coalesce && !opt_retain) { + /* + * No split / merge allowed (Windows w/o retain). Try exact fit + * only. + */ + return extent_heap_empty(&extents->heaps[pind]) ? NULL : + extent_heap_first(&extents->heaps[pind]); + } + for (pszind_t i = (pszind_t)bitmap_ffu(extents->bitmap, - &extents_bitmap_info, (size_t)pind); i < NPSIZES+1; i = - (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info, + &extents_bitmap_info, (size_t)pind); + i < SC_NPSIZES + 1; + i = (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info, (size_t)i+1)) { assert(!extent_heap_empty(&extents->heaps[i])); extent_t *extent = extent_heap_first(&extents->heaps[i]); assert(extent_size_get(extent) >= size); + /* + * In order to reduce fragmentation, avoid reusing and splitting + * large extents for much smaller sizes. + * + * Only do check for dirty extents (delay_coalesce). + */ + if (extents->delay_coalesce && + (sz_pind2sz(i) >> opt_lg_extent_max_active_fit) > size) { + break; + } if (ret == NULL || extent_snad_comp(extent, ret) < 0) { ret = extent; } - if (i == NPSIZES) { + if (i == SC_NPSIZES) { break; } - assert(i < NPSIZES); + assert(i < SC_NPSIZES); } return ret; } /* - * Do {best,first}-fit extent selection, where the selection policy choice is - * based on extents->delay_coalesce. Best-fit selection requires less - * searching, but its layout policy is less stable and may cause higher virtual - * memory fragmentation as a side effect. + * Do first-fit extent selection, where the selection policy choice is + * based on extents->delay_coalesce. */ static extent_t * extents_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents, - size_t size) { + size_t esize, size_t alignment) { malloc_mutex_assert_owner(tsdn, &extents->mtx); - return extents->delay_coalesce ? extents_best_fit_locked(tsdn, arena, - extents, size) : extents_first_fit_locked(tsdn, arena, extents, - size); + size_t max_size = esize + PAGE_CEILING(alignment) - PAGE; + /* Beware size_t wrap-around. */ + if (max_size < esize) { + return NULL; + } + + extent_t *extent = + extents_first_fit_locked(tsdn, arena, extents, max_size); + + if (alignment > PAGE && extent == NULL) { + /* + * max_size guarantees the alignment requirement but is rather + * pessimistic. Next we try to satisfy the aligned allocation + * with sizes in [esize, max_size). + */ + extent = extents_fit_alignment(extents, esize, max_size, + alignment); + } + + return extent; } static bool @@ -436,7 +529,7 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena, if (!coalesced) { return true; } - extents_insert_locked(tsdn, extents, extent, true); + extents_insert_locked(tsdn, extents, extent); return false; } @@ -449,8 +542,10 @@ extents_alloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); - return extent_recycle(tsdn, arena, r_extent_hooks, extents, new_addr, - size, pad, alignment, slab, szind, zero, commit, false); + extent_t *extent = extent_recycle(tsdn, arena, r_extent_hooks, extents, + new_addr, size, pad, alignment, slab, szind, zero, commit, false); + assert(extent == NULL || extent_dumpable_get(extent)); + return extent; } void @@ -458,6 +553,7 @@ extents_dalloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, extents_t *extents, extent_t *extent) { assert(extent_base_get(extent) != NULL); assert(extent_size_get(extent) != 0); + assert(extent_dumpable_get(extent)); witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); @@ -487,14 +583,13 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, goto label_return; } /* Check the eviction limit. */ - size_t npages = extent_size_get(extent) >> LG_PAGE; size_t extents_npages = atomic_load_zu(&extents->npages, ATOMIC_RELAXED); - if (extents_npages - npages < npages_min) { + if (extents_npages <= npages_min) { extent = NULL; goto label_return; } - extents_remove_locked(tsdn, extents, extent, false); + extents_remove_locked(tsdn, extents, extent); if (!extents->delay_coalesce) { break; } @@ -532,16 +627,24 @@ label_return: return extent; } +/* + * This can only happen when we fail to allocate a new extent struct (which + * indicates OOM), e.g. when trying to split an existing extent. + */ static void -extents_leak(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, +extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, extents_t *extents, extent_t *extent, bool growing_retained) { + size_t sz = extent_size_get(extent); + if (config_stats) { + arena_stats_accum_zu(&arena->stats.abandoned_vm, sz); + } /* * Leak extent after making sure its pages have already been purged, so * that this is only a virtual memory leak. */ if (extents_state_get(extents) == extent_state_dirty) { if (extent_purge_lazy_impl(tsdn, arena, r_extent_hooks, - extent, 0, extent_size_get(extent), growing_retained)) { + extent, 0, sz, growing_retained)) { extent_purge_forced_impl(tsdn, arena, r_extent_hooks, extent, 0, extent_size_get(extent), growing_retained); @@ -567,29 +670,29 @@ extents_postfork_child(tsdn_t *tsdn, extents_t *extents) { static void extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents, - extent_t *extent, bool preserve_lru) { + extent_t *extent) { assert(extent_arena_get(extent) == arena); assert(extent_state_get(extent) == extent_state_active); extent_state_set(extent, extents_state_get(extents)); - extents_insert_locked(tsdn, extents, extent, preserve_lru); + extents_insert_locked(tsdn, extents, extent); } static void extent_deactivate(tsdn_t *tsdn, arena_t *arena, extents_t *extents, - extent_t *extent, bool preserve_lru) { + extent_t *extent) { malloc_mutex_lock(tsdn, &extents->mtx); - extent_deactivate_locked(tsdn, arena, extents, extent, preserve_lru); + extent_deactivate_locked(tsdn, arena, extents, extent); malloc_mutex_unlock(tsdn, &extents->mtx); } static void extent_activate_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents, - extent_t *extent, bool preserve_lru) { + extent_t *extent) { assert(extent_arena_get(extent) == arena); assert(extent_state_get(extent) == extents_state_get(extents)); - extents_remove_locked(tsdn, extents, extent, preserve_lru); + extents_remove_locked(tsdn, extents, extent); extent_state_set(extent, extent_state_active); } @@ -688,6 +791,7 @@ extent_register_impl(tsdn_t *tsdn, extent_t *extent, bool gdump_add) { if (extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, extent, false, true, &elm_a, &elm_b)) { + extent_unlock(tsdn, extent); return true; } @@ -723,6 +827,13 @@ extent_reregister(tsdn_t *tsdn, extent_t *extent) { assert(!err); } +/* + * Removes all pointers to the given extent from the global rtree indices for + * its interior. This is relevant for slab extents, for which we need to do + * metadata lookups at places other than the head of the extent. We deregister + * on the interior, then, when an extent moves from being an active slab to an + * inactive state. + */ static void extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, extent_t *extent) { @@ -737,8 +848,11 @@ extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, } } +/* + * Removes all pointers to the given extent from the global rtree. + */ static void -extent_deregister(tsdn_t *tsdn, extent_t *extent) { +extent_deregister_impl(tsdn_t *tsdn, extent_t *extent, bool gdump) { rtree_ctx_t rtree_ctx_fallback; rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback); rtree_leaf_elm_t *elm_a, *elm_b; @@ -747,7 +861,7 @@ extent_deregister(tsdn_t *tsdn, extent_t *extent) { extent_lock(tsdn, extent); - extent_rtree_write_acquired(tsdn, elm_a, elm_b, NULL, NSIZES, false); + extent_rtree_write_acquired(tsdn, elm_a, elm_b, NULL, SC_NSIZES, false); if (extent_slab_get(extent)) { extent_interior_deregister(tsdn, rtree_ctx, extent); extent_slab_set(extent, false); @@ -755,16 +869,30 @@ extent_deregister(tsdn_t *tsdn, extent_t *extent) { extent_unlock(tsdn, extent); - if (config_prof) { + if (config_prof && gdump) { extent_gdump_sub(tsdn, extent); } } +static void +extent_deregister(tsdn_t *tsdn, extent_t *extent) { + extent_deregister_impl(tsdn, extent, true); +} + +static void +extent_deregister_no_gdump_sub(tsdn_t *tsdn, extent_t *extent) { + extent_deregister_impl(tsdn, extent, false); +} + +/* + * Tries to find and remove an extent from extents that can be used for the + * given allocation request. + */ static extent_t * extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents, void *new_addr, size_t size, size_t pad, size_t alignment, bool slab, - bool *zero, bool *commit, bool growing_retained) { + bool growing_retained) { witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, growing_retained ? 1 : 0); assert(alignment > 0); @@ -786,16 +914,12 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, } size_t esize = size + pad; - size_t alloc_size = esize + PAGE_CEILING(alignment) - PAGE; - /* Beware size_t wrap-around. */ - if (alloc_size < esize) { - return NULL; - } malloc_mutex_lock(tsdn, &extents->mtx); extent_hooks_assure_initialized(arena, r_extent_hooks); extent_t *extent; if (new_addr != NULL) { - extent = extent_lock_from_addr(tsdn, rtree_ctx, new_addr); + extent = extent_lock_from_addr(tsdn, rtree_ctx, new_addr, + false); if (extent != NULL) { /* * We might null-out extent to report an error, but we @@ -812,86 +936,194 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, extent_unlock(tsdn, unlock_extent); } } else { - extent = extents_fit_locked(tsdn, arena, extents, alloc_size); + extent = extents_fit_locked(tsdn, arena, extents, esize, + alignment); } if (extent == NULL) { malloc_mutex_unlock(tsdn, &extents->mtx); return NULL; } - extent_activate_locked(tsdn, arena, extents, extent, false); + extent_activate_locked(tsdn, arena, extents, extent); malloc_mutex_unlock(tsdn, &extents->mtx); - if (extent_zeroed_get(extent)) { - *zero = true; - } - if (extent_committed_get(extent)) { - *commit = true; - } - return extent; } -static extent_t * -extent_recycle_split(tsdn_t *tsdn, arena_t *arena, - extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents, +/* + * Given an allocation request and an extent guaranteed to be able to satisfy + * it, this splits off lead and trail extents, leaving extent pointing to an + * extent satisfying the allocation. + * This function doesn't put lead or trail into any extents_t; it's the caller's + * job to ensure that they can be reused. + */ +typedef enum { + /* + * Split successfully. lead, extent, and trail, are modified to extents + * describing the ranges before, in, and after the given allocation. + */ + extent_split_interior_ok, + /* + * The extent can't satisfy the given allocation request. None of the + * input extent_t *s are touched. + */ + extent_split_interior_cant_alloc, + /* + * In a potentially invalid state. Must leak (if *to_leak is non-NULL), + * and salvage what's still salvageable (if *to_salvage is non-NULL). + * None of lead, extent, or trail are valid. + */ + extent_split_interior_error +} extent_split_interior_result_t; + +static extent_split_interior_result_t +extent_split_interior(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, + /* The result of splitting, in case of success. */ + extent_t **extent, extent_t **lead, extent_t **trail, + /* The mess to clean up, in case of error. */ + extent_t **to_leak, extent_t **to_salvage, void *new_addr, size_t size, size_t pad, size_t alignment, bool slab, - szind_t szind, extent_t *extent, bool growing_retained) { + szind_t szind, bool growing_retained) { size_t esize = size + pad; - size_t leadsize = ALIGNMENT_CEILING((uintptr_t)extent_base_get(extent), - PAGE_CEILING(alignment)) - (uintptr_t)extent_base_get(extent); + size_t leadsize = ALIGNMENT_CEILING((uintptr_t)extent_base_get(*extent), + PAGE_CEILING(alignment)) - (uintptr_t)extent_base_get(*extent); assert(new_addr == NULL || leadsize == 0); - assert(extent_size_get(extent) >= leadsize + esize); - size_t trailsize = extent_size_get(extent) - leadsize - esize; + if (extent_size_get(*extent) < leadsize + esize) { + return extent_split_interior_cant_alloc; + } + size_t trailsize = extent_size_get(*extent) - leadsize - esize; + + *lead = NULL; + *trail = NULL; + *to_leak = NULL; + *to_salvage = NULL; /* Split the lead. */ if (leadsize != 0) { - extent_t *lead = extent; - extent = extent_split_impl(tsdn, arena, r_extent_hooks, - lead, leadsize, NSIZES, false, esize + trailsize, szind, + *lead = *extent; + *extent = extent_split_impl(tsdn, arena, r_extent_hooks, + *lead, leadsize, SC_NSIZES, false, esize + trailsize, szind, slab, growing_retained); - if (extent == NULL) { - extent_deregister(tsdn, lead); - extents_leak(tsdn, arena, r_extent_hooks, extents, - lead, growing_retained); - return NULL; + if (*extent == NULL) { + *to_leak = *lead; + *lead = NULL; + return extent_split_interior_error; } - extent_deactivate(tsdn, arena, extents, lead, false); } /* Split the trail. */ if (trailsize != 0) { - extent_t *trail = extent_split_impl(tsdn, arena, - r_extent_hooks, extent, esize, szind, slab, trailsize, - NSIZES, false, growing_retained); - if (trail == NULL) { - extent_deregister(tsdn, extent); - extents_leak(tsdn, arena, r_extent_hooks, extents, - extent, growing_retained); - return NULL; + *trail = extent_split_impl(tsdn, arena, r_extent_hooks, *extent, + esize, szind, slab, trailsize, SC_NSIZES, false, + growing_retained); + if (*trail == NULL) { + *to_leak = *extent; + *to_salvage = *lead; + *lead = NULL; + *extent = NULL; + return extent_split_interior_error; } - extent_deactivate(tsdn, arena, extents, trail, false); - } else if (leadsize == 0) { + } + + if (leadsize == 0 && trailsize == 0) { /* * Splitting causes szind to be set as a side effect, but no * splitting occurred. */ - extent_szind_set(extent, szind); - if (szind != NSIZES) { + extent_szind_set(*extent, szind); + if (szind != SC_NSIZES) { rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, - (uintptr_t)extent_addr_get(extent), szind, slab); - if (slab && extent_size_get(extent) > PAGE) { + (uintptr_t)extent_addr_get(*extent), szind, slab); + if (slab && extent_size_get(*extent) > PAGE) { rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, - (uintptr_t)extent_past_get(extent) - + (uintptr_t)extent_past_get(*extent) - (uintptr_t)PAGE, szind, slab); } } } - return extent; + return extent_split_interior_ok; +} + +/* + * This fulfills the indicated allocation request out of the given extent (which + * the caller should have ensured was big enough). If there's any unused space + * before or after the resulting allocation, that space is given its own extent + * and put back into extents. + */ +static extent_t * +extent_recycle_split(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents, + void *new_addr, size_t size, size_t pad, size_t alignment, bool slab, + szind_t szind, extent_t *extent, bool growing_retained) { + extent_t *lead; + extent_t *trail; + extent_t *to_leak; + extent_t *to_salvage; + + extent_split_interior_result_t result = extent_split_interior( + tsdn, arena, r_extent_hooks, rtree_ctx, &extent, &lead, &trail, + &to_leak, &to_salvage, new_addr, size, pad, alignment, slab, szind, + growing_retained); + + if (!maps_coalesce && result != extent_split_interior_ok + && !opt_retain) { + /* + * Split isn't supported (implies Windows w/o retain). Avoid + * leaking the extents. + */ + assert(to_leak != NULL && lead == NULL && trail == NULL); + extent_deactivate(tsdn, arena, extents, to_leak); + return NULL; + } + + if (result == extent_split_interior_ok) { + if (lead != NULL) { + extent_deactivate(tsdn, arena, extents, lead); + } + if (trail != NULL) { + extent_deactivate(tsdn, arena, extents, trail); + } + return extent; + } else { + /* + * We should have picked an extent that was large enough to + * fulfill our allocation request. + */ + assert(result == extent_split_interior_error); + if (to_salvage != NULL) { + extent_deregister(tsdn, to_salvage); + } + if (to_leak != NULL) { + void *leak = extent_base_get(to_leak); + extent_deregister_no_gdump_sub(tsdn, to_leak); + extents_abandon_vm(tsdn, arena, r_extent_hooks, extents, + to_leak, growing_retained); + assert(extent_lock_from_addr(tsdn, rtree_ctx, leak, + false) == NULL); + } + return NULL; + } + unreachable(); +} + +static bool +extent_need_manual_zero(arena_t *arena) { + /* + * Need to manually zero the extent on repopulating if either; 1) non + * default extent hooks installed (in which case the purge semantics may + * change); or 2) transparent huge pages enabled. + */ + return (!arena_has_default_hooks(arena) || + (opt_thp == thp_mode_always)); } +/* + * Tries to satisfy the given allocation request by reusing one of the extents + * in the given extents_t. + */ static extent_t * extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, extents_t *extents, void *new_addr, size_t size, size_t pad, @@ -906,16 +1138,12 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, rtree_ctx_t rtree_ctx_fallback; rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback); - bool committed = false; extent_t *extent = extent_recycle_extract(tsdn, arena, r_extent_hooks, - rtree_ctx, extents, new_addr, size, pad, alignment, slab, zero, - &committed, growing_retained); + rtree_ctx, extents, new_addr, size, pad, alignment, slab, + growing_retained); if (extent == NULL) { return NULL; } - if (committed) { - *commit = true; - } extent = extent_recycle_split(tsdn, arena, r_extent_hooks, rtree_ctx, extents, new_addr, size, pad, alignment, slab, szind, extent, @@ -931,7 +1159,16 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, extent, growing_retained); return NULL; } - extent_zeroed_set(extent, true); + if (!extent_need_manual_zero(arena)) { + extent_zeroed_set(extent, true); + } + } + + if (extent_committed_get(extent)) { + *commit = true; + } + if (extent_zeroed_get(extent)) { + *zero = true; } if (pad != 0) { @@ -945,14 +1182,16 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, if (*zero) { void *addr = extent_base_get(extent); - size_t size = extent_size_get(extent); if (!extent_zeroed_get(extent)) { - if (pages_purge_forced(addr, size)) { + size_t size = extent_size_get(extent); + if (extent_need_manual_zero(arena) || + pages_purge_forced(addr, size)) { memset(addr, 0, size); } } else if (config_debug) { size_t *p = (size_t *)(uintptr_t)addr; - for (size_t i = 0; i < size / sizeof(size_t); i++) { + /* Check the first page only. */ + for (size_t i = 0; i < PAGE / sizeof(size_t); i++) { assert(p[i] == 0); } } @@ -999,11 +1238,12 @@ extent_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, static void * extent_alloc_default_impl(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit) { - void *ret; - - ret = extent_alloc_core(tsdn, arena, new_addr, size, alignment, zero, + void *ret = extent_alloc_core(tsdn, arena, new_addr, size, alignment, zero, commit, (dss_prec_t)atomic_load_u(&arena->dss_prec, ATOMIC_RELAXED)); + if (have_madvise_huge && ret) { + pages_set_thp_state(ret, size); + } return ret; } @@ -1022,13 +1262,24 @@ extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr, size_t size, assert(arena != NULL); return extent_alloc_default_impl(tsdn, arena, new_addr, size, - alignment, zero, commit); + ALIGNMENT_CEILING(alignment, PAGE), zero, commit); } static void extent_hook_pre_reentrancy(tsdn_t *tsdn, arena_t *arena) { tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn); - pre_reentrancy(tsd, arena); + if (arena == arena_get(tsd_tsdn(tsd), 0, false)) { + /* + * The only legitimate case of customized extent hooks for a0 is + * hooks with no allocation activities. One such example is to + * place metadata on pre-allocated resources such as huge pages. + * In that case, rely on reentrancy_level checks to catch + * infinite recursions. + */ + pre_reentrancy(tsd, NULL); + } else { + pre_reentrancy(tsd, arena); + } } static void @@ -1064,11 +1315,11 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, size_t alloc_size = sz_pind2sz(arena->extent_grow_next + egn_skip); while (alloc_size < alloc_size_min) { egn_skip++; - if (arena->extent_grow_next + egn_skip == NPSIZES) { + if (arena->extent_grow_next + egn_skip >= + sz_psz2ind(SC_LARGE_MAXCLASS)) { /* Outside legal range. */ goto label_err; } - assert(arena->extent_grow_next + egn_skip < NPSIZES); alloc_size = sz_pind2sz(arena->extent_grow_next + egn_skip); } @@ -1081,9 +1332,8 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, void *ptr; if (*r_extent_hooks == &extent_hooks_default) { - ptr = extent_alloc_core(tsdn, arena, NULL, alloc_size, PAGE, - &zeroed, &committed, (dss_prec_t)atomic_load_u( - &arena->dss_prec, ATOMIC_RELAXED)); + ptr = extent_alloc_default_impl(tsdn, arena, NULL, + alloc_size, PAGE, &zeroed, &committed); } else { extent_hook_pre_reentrancy(tsdn, arena); ptr = (*r_extent_hooks)->alloc(*r_extent_hooks, NULL, @@ -1092,23 +1342,19 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, extent_hook_post_reentrancy(tsdn); } - extent_init(extent, arena, ptr, alloc_size, false, NSIZES, + extent_init(extent, arena, ptr, alloc_size, false, SC_NSIZES, arena_extent_sn_next(arena), extent_state_active, zeroed, - committed); + committed, true, EXTENT_IS_HEAD); if (ptr == NULL) { extent_dalloc(tsdn, arena, extent); goto label_err; } + if (extent_register_no_gdump_add(tsdn, extent)) { - extents_leak(tsdn, arena, r_extent_hooks, - &arena->extents_retained, extent, true); + extent_dalloc(tsdn, arena, extent); goto label_err; } - size_t leadsize = ALIGNMENT_CEILING((uintptr_t)ptr, - PAGE_CEILING(alignment)) - (uintptr_t)ptr; - assert(alloc_size >= leadsize + esize); - size_t trailsize = alloc_size - leadsize - esize; if (extent_zeroed_get(extent) && extent_committed_get(extent)) { *zero = true; } @@ -1116,54 +1362,46 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, *commit = true; } - /* Split the lead. */ - if (leadsize != 0) { - extent_t *lead = extent; - extent = extent_split_impl(tsdn, arena, r_extent_hooks, lead, - leadsize, NSIZES, false, esize + trailsize, szind, slab, - true); - if (extent == NULL) { - extent_deregister(tsdn, lead); - extents_leak(tsdn, arena, r_extent_hooks, + rtree_ctx_t rtree_ctx_fallback; + rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback); + + extent_t *lead; + extent_t *trail; + extent_t *to_leak; + extent_t *to_salvage; + extent_split_interior_result_t result = extent_split_interior( + tsdn, arena, r_extent_hooks, rtree_ctx, &extent, &lead, &trail, + &to_leak, &to_salvage, NULL, size, pad, alignment, slab, szind, + true); + + if (result == extent_split_interior_ok) { + if (lead != NULL) { + extent_record(tsdn, arena, r_extent_hooks, &arena->extents_retained, lead, true); - goto label_err; } - extent_record(tsdn, arena, r_extent_hooks, - &arena->extents_retained, lead, true); - } - - /* Split the trail. */ - if (trailsize != 0) { - extent_t *trail = extent_split_impl(tsdn, arena, r_extent_hooks, - extent, esize, szind, slab, trailsize, NSIZES, false, true); - if (trail == NULL) { - extent_deregister(tsdn, extent); - extents_leak(tsdn, arena, r_extent_hooks, - &arena->extents_retained, extent, true); - goto label_err; + if (trail != NULL) { + extent_record(tsdn, arena, r_extent_hooks, + &arena->extents_retained, trail, true); } - extent_record(tsdn, arena, r_extent_hooks, - &arena->extents_retained, trail, true); - } else if (leadsize == 0) { + } else { /* - * Splitting causes szind to be set as a side effect, but no - * splitting occurred. + * We should have allocated a sufficiently large extent; the + * cant_alloc case should not occur. */ - rtree_ctx_t rtree_ctx_fallback; - rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, - &rtree_ctx_fallback); - - extent_szind_set(extent, szind); - if (szind != NSIZES) { - rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, - (uintptr_t)extent_addr_get(extent), szind, slab); - if (slab && extent_size_get(extent) > PAGE) { - rtree_szind_slab_update(tsdn, &extents_rtree, - rtree_ctx, - (uintptr_t)extent_past_get(extent) - - (uintptr_t)PAGE, szind, slab); + assert(result == extent_split_interior_error); + if (to_salvage != NULL) { + if (config_prof) { + extent_gdump_add(tsdn, to_salvage); } + extent_record(tsdn, arena, r_extent_hooks, + &arena->extents_retained, to_salvage, true); + } + if (to_leak != NULL) { + extent_deregister_no_gdump_sub(tsdn, to_leak); + extents_abandon_vm(tsdn, arena, r_extent_hooks, + &arena->extents_retained, to_leak, true); } + goto label_err; } if (*commit && !extent_committed_get(extent)) { @@ -1173,17 +1411,20 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, &arena->extents_retained, extent, true); goto label_err; } - extent_zeroed_set(extent, true); + if (!extent_need_manual_zero(arena)) { + extent_zeroed_set(extent, true); + } } /* - * Increment extent_grow_next if doing so wouldn't exceed the legal + * Increment extent_grow_next if doing so wouldn't exceed the allowed * range. */ - if (arena->extent_grow_next + egn_skip + 1 < NPSIZES) { + if (arena->extent_grow_next + egn_skip + 1 <= + arena->retain_grow_limit) { arena->extent_grow_next += egn_skip + 1; } else { - arena->extent_grow_next = NPSIZES - 1; + arena->extent_grow_next = arena->retain_grow_limit; } /* All opportunities for failure are past. */ malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx); @@ -1206,7 +1447,8 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, if (*zero && !extent_zeroed_get(extent)) { void *addr = extent_base_get(extent); size_t size = extent_size_get(extent); - if (pages_purge_forced(addr, size)) { + if (extent_need_manual_zero(arena) || + pages_purge_forced(addr, size)) { memset(addr, 0, size); } } @@ -1256,14 +1498,15 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena, return NULL; } void *addr; + size_t palignment = ALIGNMENT_CEILING(alignment, PAGE); if (*r_extent_hooks == &extent_hooks_default) { /* Call directly to propagate tsdn. */ addr = extent_alloc_default_impl(tsdn, arena, new_addr, esize, - alignment, zero, commit); + palignment, zero, commit); } else { extent_hook_pre_reentrancy(tsdn, arena); addr = (*r_extent_hooks)->alloc(*r_extent_hooks, new_addr, - esize, alignment, zero, commit, arena_ind_get(arena)); + esize, palignment, zero, commit, arena_ind_get(arena)); extent_hook_post_reentrancy(tsdn); } if (addr == NULL) { @@ -1271,13 +1514,13 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena, return NULL; } extent_init(extent, arena, addr, esize, slab, szind, - arena_extent_sn_next(arena), extent_state_active, zero, commit); + arena_extent_sn_next(arena), extent_state_active, *zero, *commit, + true, EXTENT_NOT_HEAD); if (pad != 0) { extent_addr_randomize(tsdn, extent, alignment); } if (extent_register(tsdn, extent)) { - extents_leak(tsdn, arena, r_extent_hooks, - &arena->extents_retained, extent, false); + extent_dalloc(tsdn, arena, extent); return NULL; } @@ -1296,10 +1539,20 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, extent_t *extent = extent_alloc_retained(tsdn, arena, r_extent_hooks, new_addr, size, pad, alignment, slab, szind, zero, commit); if (extent == NULL) { + if (opt_retain && new_addr != NULL) { + /* + * When retain is enabled and new_addr is set, we do not + * attempt extent_alloc_wrapper_hard which does mmap + * that is very unlikely to succeed (unless it happens + * to be at the end). + */ + return NULL; + } extent = extent_alloc_wrapper_hard(tsdn, arena, r_extent_hooks, new_addr, size, pad, alignment, slab, szind, zero, commit); } + assert(extent == NULL || extent_dumpable_get(extent)); return extent; } @@ -1329,16 +1582,7 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, bool growing_retained) { assert(extent_can_coalesce(arena, extents, inner, outer)); - if (forward && extents->delay_coalesce) { - /* - * The extent that remains after coalescing must occupy the - * outer extent's position in the LRU. For forward coalescing, - * swap the inner extent into the LRU. - */ - extent_list_replace(&extents->lru, outer, inner); - } - extent_activate_locked(tsdn, arena, extents, outer, - extents->delay_coalesce); + extent_activate_locked(tsdn, arena, extents, outer); malloc_mutex_unlock(tsdn, &extents->mtx); bool err = extent_merge_impl(tsdn, arena, r_extent_hooks, @@ -1346,20 +1590,22 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, malloc_mutex_lock(tsdn, &extents->mtx); if (err) { - if (forward && extents->delay_coalesce) { - extent_list_replace(&extents->lru, inner, outer); - } - extent_deactivate_locked(tsdn, arena, extents, outer, - extents->delay_coalesce); + extent_deactivate_locked(tsdn, arena, extents, outer); } return err; } static extent_t * -extent_try_coalesce(tsdn_t *tsdn, arena_t *arena, +extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents, - extent_t *extent, bool *coalesced, bool growing_retained) { + extent_t *extent, bool *coalesced, bool growing_retained, + bool inactive_only) { + /* + * We avoid checking / locking inactive neighbors for large size + * classes, since they are eagerly coalesced on deallocation which can + * cause lock contention. + */ /* * Continue attempting to coalesce until failure, to protect against * races with other threads that are thwarted by this one. @@ -1370,7 +1616,7 @@ extent_try_coalesce(tsdn_t *tsdn, arena_t *arena, /* Try to coalesce forward. */ extent_t *next = extent_lock_from_addr(tsdn, rtree_ctx, - extent_past_get(extent)); + extent_past_get(extent), inactive_only); if (next != NULL) { /* * extents->mtx only protects against races for @@ -1396,7 +1642,7 @@ extent_try_coalesce(tsdn_t *tsdn, arena_t *arena, /* Try to coalesce backward. */ extent_t *prev = extent_lock_from_addr(tsdn, rtree_ctx, - extent_before_get(extent)); + extent_before_get(extent), inactive_only); if (prev != NULL) { bool can_coalesce = extent_can_coalesce(arena, extents, extent, prev); @@ -1422,6 +1668,26 @@ extent_try_coalesce(tsdn_t *tsdn, arena_t *arena, return extent; } +static extent_t * +extent_try_coalesce(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents, + extent_t *extent, bool *coalesced, bool growing_retained) { + return extent_try_coalesce_impl(tsdn, arena, r_extent_hooks, rtree_ctx, + extents, extent, coalesced, growing_retained, false); +} + +static extent_t * +extent_try_coalesce_large(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents, + extent_t *extent, bool *coalesced, bool growing_retained) { + return extent_try_coalesce_impl(tsdn, arena, r_extent_hooks, rtree_ctx, + extents, extent, coalesced, growing_retained, true); +} + +/* + * Does the metadata management portions of putting an unused extent into the + * given extents_t (coalesces, deregisters slab interiors, the heap operations). + */ static void extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, extents_t *extents, extent_t *extent, bool growing_retained) { @@ -1435,7 +1701,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, malloc_mutex_lock(tsdn, &extents->mtx); extent_hooks_assure_initialized(arena, r_extent_hooks); - extent_szind_set(extent, NSIZES); + extent_szind_set(extent, SC_NSIZES); if (extent_slab_get(extent)) { extent_interior_deregister(tsdn, rtree_ctx, extent); extent_slab_set(extent, false); @@ -1447,9 +1713,24 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, if (!extents->delay_coalesce) { extent = extent_try_coalesce(tsdn, arena, r_extent_hooks, rtree_ctx, extents, extent, NULL, growing_retained); + } else if (extent_size_get(extent) >= SC_LARGE_MINCLASS) { + assert(extents == &arena->extents_dirty); + /* Always coalesce large extents eagerly. */ + bool coalesced; + do { + assert(extent_state_get(extent) == extent_state_active); + extent = extent_try_coalesce_large(tsdn, arena, + r_extent_hooks, rtree_ctx, extents, extent, + &coalesced, growing_retained); + } while (coalesced); + if (extent_size_get(extent) >= oversize_threshold) { + /* Shortcut to purge the oversize extent eagerly. */ + malloc_mutex_unlock(tsdn, &extents->mtx); + arena_decay_extent(tsdn, arena, r_extent_hooks, extent); + return; + } } - - extent_deactivate_locked(tsdn, arena, extents, extent, false); + extent_deactivate_locked(tsdn, arena, extents, extent); malloc_mutex_unlock(tsdn, &extents->mtx); } @@ -1462,14 +1743,19 @@ extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent) { WITNESS_RANK_CORE, 0); if (extent_register(tsdn, extent)) { - extents_leak(tsdn, arena, &extent_hooks, - &arena->extents_retained, extent, false); + extent_dalloc(tsdn, arena, extent); return; } extent_dalloc_wrapper(tsdn, arena, &extent_hooks, extent); } static bool +extent_may_dalloc(void) { + /* With retain enabled, the default dalloc always fails. */ + return !opt_retain; +} + +static bool extent_dalloc_default_impl(void *addr, size_t size) { if (!have_dss || !extent_in_dss(addr)) { return extent_dalloc_mmap(addr, size); @@ -1520,19 +1806,24 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, extent_t *extent) { + assert(extent_dumpable_get(extent)); witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); - /* - * Deregister first to avoid a race with other allocating threads, and - * reregister if deallocation fails. - */ - extent_deregister(tsdn, extent); - if (!extent_dalloc_wrapper_try(tsdn, arena, r_extent_hooks, extent)) { - return; + /* Avoid calling the default extent_dalloc unless have to. */ + if (*r_extent_hooks != &extent_hooks_default || extent_may_dalloc()) { + /* + * Deregister first to avoid a race with other allocating + * threads, and reregister if deallocation fails. + */ + extent_deregister(tsdn, extent); + if (!extent_dalloc_wrapper_try(tsdn, arena, r_extent_hooks, + extent)) { + return; + } + extent_reregister(tsdn, extent); } - extent_reregister(tsdn, extent); if (*r_extent_hooks != &extent_hooks_default) { extent_hook_pre_reentrancy(tsdn, arena); } @@ -1772,14 +2063,28 @@ extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, offset, length, false); } -#ifdef JEMALLOC_MAPS_COALESCE static bool extent_split_default(extent_hooks_t *extent_hooks, void *addr, size_t size, size_t size_a, size_t size_b, bool committed, unsigned arena_ind) { - return !maps_coalesce; + if (!maps_coalesce) { + /* + * Without retain, only whole regions can be purged (required by + * MEM_RELEASE on Windows) -- therefore disallow splitting. See + * comments in extent_head_no_merge(). + */ + return !opt_retain; + } + + return false; } -#endif +/* + * Accepts the extent to split, and the characteristics of each side of the + * split. The 'a' parameters go with the 'lead' of the resulting pair of + * extents (the lower addressed portion of the split), and the 'b' parameters go + * with the trail (the higher addressed portion). This makes 'extent' the lead, + * and returns the trail (except in case of error). + */ static extent_t * extent_split_impl(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a, @@ -1803,7 +2108,8 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena, extent_init(trail, arena, (void *)((uintptr_t)extent_base_get(extent) + size_a), size_b, slab_b, szind_b, extent_sn_get(extent), extent_state_get(extent), extent_zeroed_get(extent), - extent_committed_get(extent)); + extent_committed_get(extent), extent_dumpable_get(extent), + EXTENT_NOT_HEAD); rtree_ctx_t rtree_ctx_fallback; rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback); @@ -1814,7 +2120,8 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena, extent_init(&lead, arena, extent_addr_get(extent), size_a, slab_a, szind_a, extent_sn_get(extent), extent_state_get(extent), extent_zeroed_get(extent), - extent_committed_get(extent)); + extent_committed_get(extent), extent_dumpable_get(extent), + EXTENT_NOT_HEAD); extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, &lead, false, true, &lead_elm_a, &lead_elm_b); @@ -1872,7 +2179,7 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, static bool extent_merge_default_impl(void *addr_a, void *addr_b) { - if (!maps_coalesce) { + if (!maps_coalesce && !opt_retain) { return true; } if (have_dss && !extent_dss_mergeable(addr_a, addr_b)) { @@ -1882,13 +2189,51 @@ extent_merge_default_impl(void *addr_a, void *addr_b) { return false; } -#ifdef JEMALLOC_MAPS_COALESCE +/* + * Returns true if the given extents can't be merged because of their head bit + * settings. Assumes the second extent has the higher address. + */ +static bool +extent_head_no_merge(extent_t *a, extent_t *b) { + assert(extent_base_get(a) < extent_base_get(b)); + /* + * When coalesce is not always allowed (Windows), only merge extents + * from the same VirtualAlloc region under opt.retain (in which case + * MEM_DECOMMIT is utilized for purging). + */ + if (maps_coalesce) { + return false; + } + if (!opt_retain) { + return true; + } + /* If b is a head extent, disallow the cross-region merge. */ + if (extent_is_head_get(b)) { + /* + * Additionally, sn should not overflow with retain; sanity + * check that different regions have unique sn. + */ + assert(extent_sn_comp(a, b) != 0); + return true; + } + assert(extent_sn_comp(a, b) == 0); + + return false; +} + static bool extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a, void *addr_b, size_t size_b, bool committed, unsigned arena_ind) { + if (!maps_coalesce) { + tsdn_t *tsdn = tsdn_fetch(); + extent_t *a = iealloc(tsdn, addr_a); + extent_t *b = iealloc(tsdn, addr_b); + if (extent_head_no_merge(a, b)) { + return true; + } + } return extent_merge_default_impl(addr_a, addr_b); } -#endif static bool extent_merge_impl(tsdn_t *tsdn, arena_t *arena, @@ -1896,10 +2241,11 @@ extent_merge_impl(tsdn_t *tsdn, arena_t *arena, bool growing_retained) { witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, growing_retained ? 1 : 0); + assert(extent_base_get(a) < extent_base_get(b)); extent_hooks_assure_initialized(arena, r_extent_hooks); - if ((*r_extent_hooks)->merge == NULL) { + if ((*r_extent_hooks)->merge == NULL || extent_head_no_merge(a, b)) { return true; } @@ -1938,22 +2284,23 @@ extent_merge_impl(tsdn_t *tsdn, arena_t *arena, if (a_elm_b != NULL) { rtree_leaf_elm_write(tsdn, &extents_rtree, a_elm_b, NULL, - NSIZES, false); + SC_NSIZES, false); } if (b_elm_b != NULL) { rtree_leaf_elm_write(tsdn, &extents_rtree, b_elm_a, NULL, - NSIZES, false); + SC_NSIZES, false); } else { b_elm_b = b_elm_a; } extent_size_set(a, extent_size_get(a) + extent_size_get(b)); - extent_szind_set(a, NSIZES); + extent_szind_set(a, SC_NSIZES); extent_sn_set(a, (extent_sn_get(a) < extent_sn_get(b)) ? extent_sn_get(a) : extent_sn_get(b)); extent_zeroed_set(a, extent_zeroed_get(a) && extent_zeroed_get(b)); - extent_rtree_write_acquired(tsdn, a_elm_a, b_elm_b, a, NSIZES, false); + extent_rtree_write_acquired(tsdn, a_elm_a, b_elm_b, a, SC_NSIZES, + false); extent_unlock2(tsdn, a, b); @@ -1985,3 +2332,72 @@ extent_boot(void) { return false; } + +void +extent_util_stats_get(tsdn_t *tsdn, const void *ptr, + size_t *nfree, size_t *nregs, size_t *size) { + assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL); + + const extent_t *extent = iealloc(tsdn, ptr); + if (unlikely(extent == NULL)) { + *nfree = *nregs = *size = 0; + return; + } + + *size = extent_size_get(extent); + if (!extent_slab_get(extent)) { + *nfree = 0; + *nregs = 1; + } else { + *nfree = extent_nfree_get(extent); + *nregs = bin_infos[extent_szind_get(extent)].nregs; + assert(*nfree <= *nregs); + assert(*nfree * extent_usize_get(extent) <= *size); + } +} + +void +extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr, + size_t *nfree, size_t *nregs, size_t *size, + size_t *bin_nfree, size_t *bin_nregs, void **slabcur_addr) { + assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL + && bin_nfree != NULL && bin_nregs != NULL && slabcur_addr != NULL); + + const extent_t *extent = iealloc(tsdn, ptr); + if (unlikely(extent == NULL)) { + *nfree = *nregs = *size = *bin_nfree = *bin_nregs = 0; + *slabcur_addr = NULL; + return; + } + + *size = extent_size_get(extent); + if (!extent_slab_get(extent)) { + *nfree = *bin_nfree = *bin_nregs = 0; + *nregs = 1; + *slabcur_addr = NULL; + return; + } + + *nfree = extent_nfree_get(extent); + const szind_t szind = extent_szind_get(extent); + *nregs = bin_infos[szind].nregs; + assert(*nfree <= *nregs); + assert(*nfree * extent_usize_get(extent) <= *size); + + const arena_t *arena = extent_arena_get(extent); + assert(arena != NULL); + const unsigned binshard = extent_binshard_get(extent); + bin_t *bin = &arena->bins[szind].bin_shards[binshard]; + + malloc_mutex_lock(tsdn, &bin->lock); + if (config_stats) { + *bin_nregs = *nregs * bin->stats.curslabs; + assert(*bin_nregs >= bin->stats.curregs); + *bin_nfree = *bin_nregs - bin->stats.curregs; + } else { + *bin_nfree = *bin_nregs = 0; + } + *slabcur_addr = extent_addr_get(bin->slabcur); + assert(*slabcur_addr != NULL); + malloc_mutex_unlock(tsdn, &bin->lock); +} diff --git a/deps/jemalloc/src/extent_dss.c b/deps/jemalloc/src/extent_dss.c index e72da95870..8581789110 100644 --- a/deps/jemalloc/src/extent_dss.c +++ b/deps/jemalloc/src/extent_dss.c @@ -113,7 +113,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, cassert(have_dss); assert(size > 0); - assert(alignment > 0); + assert(alignment == ALIGNMENT_CEILING(alignment, PAGE)); /* * sbrk() uses a signed increment argument, so take care not to @@ -154,9 +154,10 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, (uintptr_t)gap_addr_page; if (gap_size_page != 0) { extent_init(gap, arena, gap_addr_page, - gap_size_page, false, NSIZES, + gap_size_page, false, SC_NSIZES, arena_extent_sn_next(arena), - extent_state_active, false, true); + extent_state_active, false, true, true, + EXTENT_NOT_HEAD); } /* * Compute the address just past the end of the desired @@ -198,8 +199,9 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, extent_t extent; extent_init(&extent, arena, ret, size, - size, false, NSIZES, - extent_state_active, false, true); + size, false, SC_NSIZES, + extent_state_active, false, true, + true, EXTENT_NOT_HEAD); if (extent_purge_forced_wrapper(tsdn, arena, &extent_hooks, &extent, 0, size)) { diff --git a/deps/jemalloc/src/extent_mmap.c b/deps/jemalloc/src/extent_mmap.c index 8d607dc803..17fd1c8f95 100644 --- a/deps/jemalloc/src/extent_mmap.c +++ b/deps/jemalloc/src/extent_mmap.c @@ -21,8 +21,8 @@ bool opt_retain = void * extent_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit) { - void *ret = pages_map(new_addr, size, ALIGNMENT_CEILING(alignment, - PAGE), commit); + assert(alignment == ALIGNMENT_CEILING(alignment, PAGE)); + void *ret = pages_map(new_addr, size, alignment, commit); if (ret == NULL) { return NULL; } diff --git a/deps/jemalloc/src/hook.c b/deps/jemalloc/src/hook.c new file mode 100644 index 0000000000..9ac703cf9f --- /dev/null +++ b/deps/jemalloc/src/hook.c @@ -0,0 +1,195 @@ +#include "jemalloc/internal/jemalloc_preamble.h" + +#include "jemalloc/internal/hook.h" + +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/seq.h" + +typedef struct hooks_internal_s hooks_internal_t; +struct hooks_internal_s { + hooks_t hooks; + bool in_use; +}; + +seq_define(hooks_internal_t, hooks) + +static atomic_u_t nhooks = ATOMIC_INIT(0); +static seq_hooks_t hooks[HOOK_MAX]; +static malloc_mutex_t hooks_mu; + +bool +hook_boot() { + return malloc_mutex_init(&hooks_mu, "hooks", WITNESS_RANK_HOOK, + malloc_mutex_rank_exclusive); +} + +static void * +hook_install_locked(hooks_t *to_install) { + hooks_internal_t hooks_internal; + for (int i = 0; i < HOOK_MAX; i++) { + bool success = seq_try_load_hooks(&hooks_internal, &hooks[i]); + /* We hold mu; no concurrent access. */ + assert(success); + if (!hooks_internal.in_use) { + hooks_internal.hooks = *to_install; + hooks_internal.in_use = true; + seq_store_hooks(&hooks[i], &hooks_internal); + atomic_store_u(&nhooks, + atomic_load_u(&nhooks, ATOMIC_RELAXED) + 1, + ATOMIC_RELAXED); + return &hooks[i]; + } + } + return NULL; +} + +void * +hook_install(tsdn_t *tsdn, hooks_t *to_install) { + malloc_mutex_lock(tsdn, &hooks_mu); + void *ret = hook_install_locked(to_install); + if (ret != NULL) { + tsd_global_slow_inc(tsdn); + } + malloc_mutex_unlock(tsdn, &hooks_mu); + return ret; +} + +static void +hook_remove_locked(seq_hooks_t *to_remove) { + hooks_internal_t hooks_internal; + bool success = seq_try_load_hooks(&hooks_internal, to_remove); + /* We hold mu; no concurrent access. */ + assert(success); + /* Should only remove hooks that were added. */ + assert(hooks_internal.in_use); + hooks_internal.in_use = false; + seq_store_hooks(to_remove, &hooks_internal); + atomic_store_u(&nhooks, atomic_load_u(&nhooks, ATOMIC_RELAXED) - 1, + ATOMIC_RELAXED); +} + +void +hook_remove(tsdn_t *tsdn, void *opaque) { + if (config_debug) { + char *hooks_begin = (char *)&hooks[0]; + char *hooks_end = (char *)&hooks[HOOK_MAX]; + char *hook = (char *)opaque; + assert(hooks_begin <= hook && hook < hooks_end + && (hook - hooks_begin) % sizeof(seq_hooks_t) == 0); + } + malloc_mutex_lock(tsdn, &hooks_mu); + hook_remove_locked((seq_hooks_t *)opaque); + tsd_global_slow_dec(tsdn); + malloc_mutex_unlock(tsdn, &hooks_mu); +} + +#define FOR_EACH_HOOK_BEGIN(hooks_internal_ptr) \ +for (int for_each_hook_counter = 0; \ + for_each_hook_counter < HOOK_MAX; \ + for_each_hook_counter++) { \ + bool for_each_hook_success = seq_try_load_hooks( \ + (hooks_internal_ptr), &hooks[for_each_hook_counter]); \ + if (!for_each_hook_success) { \ + continue; \ + } \ + if (!(hooks_internal_ptr)->in_use) { \ + continue; \ + } +#define FOR_EACH_HOOK_END \ +} + +static bool * +hook_reentrantp() { + /* + * We prevent user reentrancy within hooks. This is basically just a + * thread-local bool that triggers an early-exit. + * + * We don't fold in_hook into reentrancy. There are two reasons for + * this: + * - Right now, we turn on reentrancy during things like extent hook + * execution. Allocating during extent hooks is not officially + * supported, but we don't want to break it for the time being. These + * sorts of allocations should probably still be hooked, though. + * - If a hook allocates, we may want it to be relatively fast (after + * all, it executes on every allocator operation). Turning on + * reentrancy is a fairly heavyweight mode (disabling tcache, + * redirecting to arena 0, etc.). It's possible we may one day want + * to turn on reentrant mode here, if it proves too difficult to keep + * this working. But that's fairly easy for us to see; OTOH, people + * not using hooks because they're too slow is easy for us to miss. + * + * The tricky part is + * that this code might get invoked even if we don't have access to tsd. + * This function mimics getting a pointer to thread-local data, except + * that it might secretly return a pointer to some global data if we + * know that the caller will take the early-exit path. + * If we return a bool that indicates that we are reentrant, then the + * caller will go down the early exit path, leaving the global + * untouched. + */ + static bool in_hook_global = true; + tsdn_t *tsdn = tsdn_fetch(); + tcache_t *tcache = tsdn_tcachep_get(tsdn); + if (tcache != NULL) { + return &tcache->in_hook; + } + return &in_hook_global; +} + +#define HOOK_PROLOGUE \ + if (likely(atomic_load_u(&nhooks, ATOMIC_RELAXED) == 0)) { \ + return; \ + } \ + bool *in_hook = hook_reentrantp(); \ + if (*in_hook) { \ + return; \ + } \ + *in_hook = true; + +#define HOOK_EPILOGUE \ + *in_hook = false; + +void +hook_invoke_alloc(hook_alloc_t type, void *result, uintptr_t result_raw, + uintptr_t args_raw[3]) { + HOOK_PROLOGUE + + hooks_internal_t hook; + FOR_EACH_HOOK_BEGIN(&hook) + hook_alloc h = hook.hooks.alloc_hook; + if (h != NULL) { + h(hook.hooks.extra, type, result, result_raw, args_raw); + } + FOR_EACH_HOOK_END + + HOOK_EPILOGUE +} + +void +hook_invoke_dalloc(hook_dalloc_t type, void *address, uintptr_t args_raw[3]) { + HOOK_PROLOGUE + hooks_internal_t hook; + FOR_EACH_HOOK_BEGIN(&hook) + hook_dalloc h = hook.hooks.dalloc_hook; + if (h != NULL) { + h(hook.hooks.extra, type, address, args_raw); + } + FOR_EACH_HOOK_END + HOOK_EPILOGUE +} + +void +hook_invoke_expand(hook_expand_t type, void *address, size_t old_usize, + size_t new_usize, uintptr_t result_raw, uintptr_t args_raw[4]) { + HOOK_PROLOGUE + hooks_internal_t hook; + FOR_EACH_HOOK_BEGIN(&hook) + hook_expand h = hook.hooks.expand_hook; + if (h != NULL) { + h(hook.hooks.extra, type, address, old_usize, new_usize, + result_raw, args_raw); + } + FOR_EACH_HOOK_END + HOOK_EPILOGUE +} diff --git a/deps/jemalloc/src/jemalloc.c b/deps/jemalloc/src/jemalloc.c index 0ee8ad48b9..ed13718d48 100644 --- a/deps/jemalloc/src/jemalloc.c +++ b/deps/jemalloc/src/jemalloc.c @@ -7,11 +7,14 @@ #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/extent_dss.h" #include "jemalloc/internal/extent_mmap.h" +#include "jemalloc/internal/hook.h" #include "jemalloc/internal/jemalloc_internal_types.h" +#include "jemalloc/internal/log.h" #include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/rtree.h" -#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/safety_check.h" +#include "jemalloc/internal/sc.h" #include "jemalloc/internal/spin.h" #include "jemalloc/internal/sz.h" #include "jemalloc/internal/ticker.h" @@ -40,6 +43,8 @@ bool opt_abort_conf = false #endif ; +/* Intentionally default off, even with debug builds. */ +bool opt_confirm_conf = false; const char *opt_junk = #if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL)) "true" @@ -84,8 +89,10 @@ malloc_mutex_t arenas_lock; JEMALLOC_ALIGNED(CACHELINE) atomic_p_t arenas[MALLOCX_ARENA_LIMIT]; static atomic_u_t narenas_total; /* Use narenas_total_*(). */ -static arena_t *a0; /* arenas[0]; read-only after initialization. */ -unsigned narenas_auto; /* Read-only after initialization. */ +/* Below three are read-only after initialization. */ +static arena_t *a0; /* arenas[0]. */ +unsigned narenas_auto; +unsigned manual_arena_base; typedef enum { malloc_init_uninitialized = 3, @@ -325,7 +332,7 @@ arena_init_locked(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { */ arena = arena_get(tsdn, ind, false); if (arena != NULL) { - assert(ind < narenas_auto); + assert(arena_is_auto(arena)); return arena; } @@ -340,12 +347,12 @@ arena_new_create_background_thread(tsdn_t *tsdn, unsigned ind) { if (ind == 0) { return; } - if (have_background_thread) { - bool err; - malloc_mutex_lock(tsdn, &background_thread_lock); - err = background_thread_create(tsdn_tsd(tsdn), ind); - malloc_mutex_unlock(tsdn, &background_thread_lock); - if (err) { + /* + * Avoid creating a new background thread just for the huge arena, which + * purges eagerly by default. + */ + if (have_background_thread && !arena_is_huge(ind)) { + if (background_thread_create(tsdn_tsd(tsdn), ind)) { malloc_printf("<jemalloc>: error in background thread " "creation for arena %u. Abort.\n", ind); abort(); @@ -375,6 +382,14 @@ arena_bind(tsd_t *tsd, unsigned ind, bool internal) { tsd_iarena_set(tsd, arena); } else { tsd_arena_set(tsd, arena); + unsigned shard = atomic_fetch_add_u(&arena->binshard_next, 1, + ATOMIC_RELAXED); + tsd_binshards_t *bins = tsd_binshardsp_get(tsd); + for (unsigned i = 0; i < SC_NBINS; i++) { + assert(bin_infos[i].n_shards > 0 && + bin_infos[i].n_shards <= BIN_SHARDS_MAX); + bins->binshard[i] = shard % bin_infos[i].n_shards; + } } } @@ -760,6 +775,50 @@ init_opt_stats_print_opts(const char *v, size_t vlen) { assert(opts_len == strlen(opt_stats_print_opts)); } +/* Reads the next size pair in a multi-sized option. */ +static bool +malloc_conf_multi_sizes_next(const char **slab_size_segment_cur, + size_t *vlen_left, size_t *slab_start, size_t *slab_end, size_t *new_size) { + const char *cur = *slab_size_segment_cur; + char *end; + uintmax_t um; + + set_errno(0); + + /* First number, then '-' */ + um = malloc_strtoumax(cur, &end, 0); + if (get_errno() != 0 || *end != '-') { + return true; + } + *slab_start = (size_t)um; + cur = end + 1; + + /* Second number, then ':' */ + um = malloc_strtoumax(cur, &end, 0); + if (get_errno() != 0 || *end != ':') { + return true; + } + *slab_end = (size_t)um; + cur = end + 1; + + /* Last number */ + um = malloc_strtoumax(cur, &end, 0); + if (get_errno() != 0) { + return true; + } + *new_size = (size_t)um; + + /* Consume the separator if there is one. */ + if (*end == '|') { + end++; + } + + *vlen_left -= end - *slab_size_segment_cur; + *slab_size_segment_cur = end; + + return false; +} + static bool malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p, char const **v_p, size_t *vlen_p) { @@ -848,10 +907,13 @@ malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v, size_t vlen) { malloc_printf("<jemalloc>: %s: %.*s:%.*s\n", msg, (int)klen, k, (int)vlen, v); - had_conf_error = true; - if (opt_abort_conf) { - malloc_abort_invalid_conf(); + /* If abort_conf is set, error out after processing all options. */ + const char *experimental = "experimental_"; + if (strncmp(k, experimental, strlen(experimental)) == 0) { + /* However, tolerate experimental features. */ + return; } + had_conf_error = true; } static void @@ -869,88 +931,141 @@ malloc_slow_flag_init(void) { malloc_slow = (malloc_slow_flags != 0); } -static void -malloc_conf_init(void) { - unsigned i; - char buf[PATH_MAX + 1]; - const char *opts, *k, *v; - size_t klen, vlen; +/* Number of sources for initializing malloc_conf */ +#define MALLOC_CONF_NSOURCES 4 - for (i = 0; i < 4; i++) { - /* Get runtime configuration. */ - switch (i) { - case 0: - opts = config_malloc_conf; - break; - case 1: - if (je_malloc_conf != NULL) { - /* - * Use options that were compiled into the - * program. - */ - opts = je_malloc_conf; - } else { - /* No configuration specified. */ - buf[0] = '\0'; - opts = buf; - } - break; - case 2: { - ssize_t linklen = 0; +static const char * +obtain_malloc_conf(unsigned which_source, char buf[PATH_MAX + 1]) { + if (config_debug) { + static unsigned read_source = 0; + /* + * Each source should only be read once, to minimize # of + * syscalls on init. + */ + assert(read_source++ == which_source); + } + assert(which_source < MALLOC_CONF_NSOURCES); + + const char *ret; + switch (which_source) { + case 0: + ret = config_malloc_conf; + break; + case 1: + if (je_malloc_conf != NULL) { + /* Use options that were compiled into the program. */ + ret = je_malloc_conf; + } else { + /* No configuration specified. */ + ret = NULL; + } + break; + case 2: { + ssize_t linklen = 0; #ifndef _WIN32 - int saved_errno = errno; - const char *linkname = + int saved_errno = errno; + const char *linkname = # ifdef JEMALLOC_PREFIX - "/etc/"JEMALLOC_PREFIX"malloc.conf" + "/etc/"JEMALLOC_PREFIX"malloc.conf" # else - "/etc/malloc.conf" + "/etc/malloc.conf" # endif - ; + ; - /* - * Try to use the contents of the "/etc/malloc.conf" - * symbolic link's name. - */ - linklen = readlink(linkname, buf, sizeof(buf) - 1); - if (linklen == -1) { - /* No configuration specified. */ - linklen = 0; - /* Restore errno. */ - set_errno(saved_errno); - } + /* + * Try to use the contents of the "/etc/malloc.conf" symbolic + * link's name. + */ +#ifndef JEMALLOC_READLINKAT + linklen = readlink(linkname, buf, PATH_MAX); +#else + linklen = readlinkat(AT_FDCWD, linkname, buf, PATH_MAX); #endif - buf[linklen] = '\0'; - opts = buf; - break; - } case 3: { - const char *envname = + if (linklen == -1) { + /* No configuration specified. */ + linklen = 0; + /* Restore errno. */ + set_errno(saved_errno); + } +#endif + buf[linklen] = '\0'; + ret = buf; + break; + } case 3: { + const char *envname = #ifdef JEMALLOC_PREFIX - JEMALLOC_CPREFIX"MALLOC_CONF" + JEMALLOC_CPREFIX"MALLOC_CONF" #else - "MALLOC_CONF" + "MALLOC_CONF" #endif - ; + ; - if ((opts = jemalloc_secure_getenv(envname)) != NULL) { - /* - * Do nothing; opts is already initialized to - * the value of the MALLOC_CONF environment - * variable. - */ - } else { - /* No configuration specified. */ - buf[0] = '\0'; - opts = buf; - } - break; - } default: - not_reached(); - buf[0] = '\0'; - opts = buf; + if ((ret = jemalloc_secure_getenv(envname)) != NULL) { + /* + * Do nothing; opts is already initialized to the value + * of the MALLOC_CONF environment variable. + */ + } else { + /* No configuration specified. */ + ret = NULL; + } + break; + } default: + not_reached(); + ret = NULL; + } + return ret; +} + +static void +malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS], + bool initial_call, const char *opts_cache[MALLOC_CONF_NSOURCES], + char buf[PATH_MAX + 1]) { + static const char *opts_explain[MALLOC_CONF_NSOURCES] = { + "string specified via --with-malloc-conf", + "string pointed to by the global variable malloc_conf", + "\"name\" of the file referenced by the symbolic link named " + "/etc/malloc.conf", + "value of the environment variable MALLOC_CONF" + }; + unsigned i; + const char *opts, *k, *v; + size_t klen, vlen; + + for (i = 0; i < MALLOC_CONF_NSOURCES; i++) { + /* Get runtime configuration. */ + if (initial_call) { + opts_cache[i] = obtain_malloc_conf(i, buf); + } + opts = opts_cache[i]; + if (!initial_call && opt_confirm_conf) { + malloc_printf( + "<jemalloc>: malloc_conf #%u (%s): \"%s\"\n", + i + 1, opts_explain[i], opts != NULL ? opts : ""); + } + if (opts == NULL) { + continue; } while (*opts != '\0' && !malloc_conf_next(&opts, &k, &klen, &v, &vlen)) { + +#define CONF_ERROR(msg, k, klen, v, vlen) \ + if (!initial_call) { \ + malloc_conf_error( \ + msg, k, klen, v, vlen); \ + cur_opt_valid = false; \ + } +#define CONF_CONTINUE { \ + if (!initial_call && opt_confirm_conf \ + && cur_opt_valid) { \ + malloc_printf("<jemalloc>: -- " \ + "Set conf value: %.*s:%.*s" \ + "\n", (int)klen, k, \ + (int)vlen, v); \ + } \ + continue; \ + } #define CONF_MATCH(n) \ (sizeof(n)-1 == klen && strncmp(n, k, klen) == 0) #define CONF_MATCH_VALUE(n) \ @@ -962,16 +1077,23 @@ malloc_conf_init(void) { } else if (CONF_MATCH_VALUE("false")) { \ o = false; \ } else { \ - malloc_conf_error( \ - "Invalid conf value", \ + CONF_ERROR("Invalid conf value",\ k, klen, v, vlen); \ } \ - continue; \ + CONF_CONTINUE; \ } -#define CONF_MIN_no(um, min) false -#define CONF_MIN_yes(um, min) ((um) < (min)) -#define CONF_MAX_no(um, max) false -#define CONF_MAX_yes(um, max) ((um) > (max)) + /* + * One of the CONF_MIN macros below expands, in one of the use points, + * to "unsigned integer < 0", which is always false, triggering the + * GCC -Wtype-limits warning, which we disable here and re-enable below. + */ + JEMALLOC_DIAGNOSTIC_PUSH + JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS + +#define CONF_DONT_CHECK_MIN(um, min) false +#define CONF_CHECK_MIN(um, min) ((um) < (min)) +#define CONF_DONT_CHECK_MAX(um, max) false +#define CONF_CHECK_MAX(um, max) ((um) > (max)) #define CONF_HANDLE_T_U(t, o, n, min, max, check_min, check_max, clip) \ if (CONF_MATCH(n)) { \ uintmax_t um; \ @@ -981,26 +1103,21 @@ malloc_conf_init(void) { um = malloc_strtoumax(v, &end, 0); \ if (get_errno() != 0 || (uintptr_t)end -\ (uintptr_t)v != vlen) { \ - malloc_conf_error( \ - "Invalid conf value", \ + CONF_ERROR("Invalid conf value",\ k, klen, v, vlen); \ } else if (clip) { \ - if (CONF_MIN_##check_min(um, \ - (t)(min))) { \ + if (check_min(um, (t)(min))) { \ o = (t)(min); \ } else if ( \ - CONF_MAX_##check_max(um, \ - (t)(max))) { \ + check_max(um, (t)(max))) { \ o = (t)(max); \ } else { \ o = (t)um; \ } \ } else { \ - if (CONF_MIN_##check_min(um, \ - (t)(min)) || \ - CONF_MAX_##check_max(um, \ - (t)(max))) { \ - malloc_conf_error( \ + if (check_min(um, (t)(min)) || \ + check_max(um, (t)(max))) { \ + CONF_ERROR( \ "Out-of-range " \ "conf value", \ k, klen, v, vlen); \ @@ -1008,7 +1125,7 @@ malloc_conf_init(void) { o = (t)um; \ } \ } \ - continue; \ + CONF_CONTINUE; \ } #define CONF_HANDLE_UNSIGNED(o, n, min, max, check_min, check_max, \ clip) \ @@ -1026,18 +1143,17 @@ malloc_conf_init(void) { l = strtol(v, &end, 0); \ if (get_errno() != 0 || (uintptr_t)end -\ (uintptr_t)v != vlen) { \ - malloc_conf_error( \ - "Invalid conf value", \ + CONF_ERROR("Invalid conf value",\ k, klen, v, vlen); \ } else if (l < (ssize_t)(min) || l > \ (ssize_t)(max)) { \ - malloc_conf_error( \ + CONF_ERROR( \ "Out-of-range conf value", \ k, klen, v, vlen); \ } else { \ o = l; \ } \ - continue; \ + CONF_CONTINUE; \ } #define CONF_HANDLE_CHAR_P(o, n, d) \ if (CONF_MATCH(n)) { \ @@ -1046,13 +1162,34 @@ malloc_conf_init(void) { sizeof(o)-1; \ strncpy(o, v, cpylen); \ o[cpylen] = '\0'; \ - continue; \ + CONF_CONTINUE; \ + } + + bool cur_opt_valid = true; + + CONF_HANDLE_BOOL(opt_confirm_conf, "confirm_conf") + if (initial_call) { + continue; } CONF_HANDLE_BOOL(opt_abort, "abort") CONF_HANDLE_BOOL(opt_abort_conf, "abort_conf") - if (opt_abort_conf && had_conf_error) { - malloc_abort_invalid_conf(); + if (strncmp("metadata_thp", k, klen) == 0) { + int i; + bool match = false; + for (i = 0; i < metadata_thp_mode_limit; i++) { + if (strncmp(metadata_thp_mode_names[i], + v, vlen) == 0) { + opt_metadata_thp = i; + match = true; + break; + } + } + if (!match) { + CONF_ERROR("Invalid conf value", + k, klen, v, vlen); + } + CONF_CONTINUE; } CONF_HANDLE_BOOL(opt_retain, "retain") if (strncmp("dss", k, klen) == 0) { @@ -1062,7 +1199,7 @@ malloc_conf_init(void) { if (strncmp(dss_prec_names[i], v, vlen) == 0) { if (extent_dss_prec_set(i)) { - malloc_conf_error( + CONF_ERROR( "Error setting dss", k, klen, v, vlen); } else { @@ -1074,13 +1211,36 @@ malloc_conf_init(void) { } } if (!match) { - malloc_conf_error("Invalid conf value", + CONF_ERROR("Invalid conf value", k, klen, v, vlen); } - continue; + CONF_CONTINUE; } CONF_HANDLE_UNSIGNED(opt_narenas, "narenas", 1, - UINT_MAX, yes, no, false) + UINT_MAX, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, + false) + if (CONF_MATCH("bin_shards")) { + const char *bin_shards_segment_cur = v; + size_t vlen_left = vlen; + do { + size_t size_start; + size_t size_end; + size_t nshards; + bool err = malloc_conf_multi_sizes_next( + &bin_shards_segment_cur, &vlen_left, + &size_start, &size_end, &nshards); + if (err || bin_update_shard_size( + bin_shard_sizes, size_start, + size_end, nshards)) { + CONF_ERROR( + "Invalid settings for " + "bin_shards", k, klen, v, + vlen); + break; + } + } while (vlen_left > 0); + CONF_CONTINUE; + } CONF_HANDLE_SSIZE_T(opt_dirty_decay_ms, "dirty_decay_ms", -1, NSTIME_SEC_MAX * KQU(1000) < QU(SSIZE_MAX) ? NSTIME_SEC_MAX * KQU(1000) : @@ -1092,7 +1252,7 @@ malloc_conf_init(void) { CONF_HANDLE_BOOL(opt_stats_print, "stats_print") if (CONF_MATCH("stats_print_opts")) { init_opt_stats_print_opts(v, vlen); - continue; + CONF_CONTINUE; } if (config_fill) { if (CONF_MATCH("junk")) { @@ -1113,11 +1273,11 @@ malloc_conf_init(void) { opt_junk_alloc = false; opt_junk_free = true; } else { - malloc_conf_error( - "Invalid conf value", k, - klen, v, vlen); + CONF_ERROR( + "Invalid conf value", + k, klen, v, vlen); } - continue; + CONF_CONTINUE; } CONF_HANDLE_BOOL(opt_zero, "zero") } @@ -1130,15 +1290,31 @@ malloc_conf_init(void) { CONF_HANDLE_BOOL(opt_tcache, "tcache") CONF_HANDLE_SSIZE_T(opt_lg_tcache_max, "lg_tcache_max", -1, (sizeof(size_t) << 3) - 1) + + /* + * The runtime option of oversize_threshold remains + * undocumented. It may be tweaked in the next major + * release (6.0). The default value 8M is rather + * conservative / safe. Tuning it further down may + * improve fragmentation a bit more, but may also cause + * contention on the huge arena. + */ + CONF_HANDLE_SIZE_T(opt_oversize_threshold, + "oversize_threshold", 0, SC_LARGE_MAXCLASS, + CONF_DONT_CHECK_MIN, CONF_CHECK_MAX, false) + CONF_HANDLE_SIZE_T(opt_lg_extent_max_active_fit, + "lg_extent_max_active_fit", 0, + (sizeof(size_t) << 3), CONF_DONT_CHECK_MIN, + CONF_CHECK_MAX, false) + if (strncmp("percpu_arena", k, klen) == 0) { - int i; bool match = false; - for (i = percpu_arena_mode_names_base; i < + for (int i = percpu_arena_mode_names_base; i < percpu_arena_mode_names_limit; i++) { if (strncmp(percpu_arena_mode_names[i], v, vlen) == 0) { if (!have_percpu_arena) { - malloc_conf_error( + CONF_ERROR( "No getcpu support", k, klen, v, vlen); } @@ -1148,13 +1324,42 @@ malloc_conf_init(void) { } } if (!match) { - malloc_conf_error("Invalid conf value", + CONF_ERROR("Invalid conf value", k, klen, v, vlen); } - continue; + CONF_CONTINUE; } CONF_HANDLE_BOOL(opt_background_thread, "background_thread"); + CONF_HANDLE_SIZE_T(opt_max_background_threads, + "max_background_threads", 1, + opt_max_background_threads, + CONF_CHECK_MIN, CONF_CHECK_MAX, + true); + if (CONF_MATCH("slab_sizes")) { + bool err; + const char *slab_size_segment_cur = v; + size_t vlen_left = vlen; + do { + size_t slab_start; + size_t slab_end; + size_t pgs; + err = malloc_conf_multi_sizes_next( + &slab_size_segment_cur, + &vlen_left, &slab_start, &slab_end, + &pgs); + if (!err) { + sc_data_update_slab_size( + sc_data, slab_start, + slab_end, (int)pgs); + } else { + CONF_ERROR("Invalid settings " + "for slab_sizes", + k, klen, v, vlen); + } + } while (!err && vlen_left > 0); + CONF_CONTINUE; + } if (config_prof) { CONF_HANDLE_BOOL(opt_prof, "prof") CONF_HANDLE_CHAR_P(opt_prof_prefix, @@ -1164,7 +1369,8 @@ malloc_conf_init(void) { "prof_thread_active_init") CONF_HANDLE_SIZE_T(opt_lg_prof_sample, "lg_prof_sample", 0, (sizeof(uint64_t) << 3) - - 1, no, yes, true) + - 1, CONF_DONT_CHECK_MIN, CONF_CHECK_MAX, + true) CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum") CONF_HANDLE_SSIZE_T(opt_lg_prof_interval, "lg_prof_interval", -1, @@ -1172,25 +1378,77 @@ malloc_conf_init(void) { CONF_HANDLE_BOOL(opt_prof_gdump, "prof_gdump") CONF_HANDLE_BOOL(opt_prof_final, "prof_final") CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak") + CONF_HANDLE_BOOL(opt_prof_log, "prof_log") + } + if (config_log) { + if (CONF_MATCH("log")) { + size_t cpylen = ( + vlen <= sizeof(log_var_names) ? + vlen : sizeof(log_var_names) - 1); + strncpy(log_var_names, v, cpylen); + log_var_names[cpylen] = '\0'; + CONF_CONTINUE; + } } - malloc_conf_error("Invalid conf pair", k, klen, v, - vlen); + if (CONF_MATCH("thp")) { + bool match = false; + for (int i = 0; i < thp_mode_names_limit; i++) { + if (strncmp(thp_mode_names[i],v, vlen) + == 0) { + if (!have_madvise_huge) { + CONF_ERROR( + "No THP support", + k, klen, v, vlen); + } + opt_thp = i; + match = true; + break; + } + } + if (!match) { + CONF_ERROR("Invalid conf value", + k, klen, v, vlen); + } + CONF_CONTINUE; + } + CONF_ERROR("Invalid conf pair", k, klen, v, vlen); +#undef CONF_ERROR +#undef CONF_CONTINUE #undef CONF_MATCH #undef CONF_MATCH_VALUE #undef CONF_HANDLE_BOOL -#undef CONF_MIN_no -#undef CONF_MIN_yes -#undef CONF_MAX_no -#undef CONF_MAX_yes +#undef CONF_DONT_CHECK_MIN +#undef CONF_CHECK_MIN +#undef CONF_DONT_CHECK_MAX +#undef CONF_CHECK_MAX #undef CONF_HANDLE_T_U #undef CONF_HANDLE_UNSIGNED #undef CONF_HANDLE_SIZE_T #undef CONF_HANDLE_SSIZE_T #undef CONF_HANDLE_CHAR_P + /* Re-enable diagnostic "-Wtype-limits" */ + JEMALLOC_DIAGNOSTIC_POP + } + if (opt_abort_conf && had_conf_error) { + malloc_abort_invalid_conf(); } } + atomic_store_b(&log_init_done, true, ATOMIC_RELEASE); +} + +static void +malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) { + const char *opts_cache[MALLOC_CONF_NSOURCES] = {NULL, NULL, NULL, NULL}; + char buf[PATH_MAX + 1]; + + /* The first call only set the confirm_conf option and opts_cache */ + malloc_conf_init_helper(NULL, NULL, true, opts_cache, buf); + malloc_conf_init_helper(sc_data, bin_shard_sizes, false, opts_cache, + NULL); } +#undef MALLOC_CONF_NSOURCES + static bool malloc_init_hard_needed(void) { if (malloc_initialized() || (IS_INITIALIZER && malloc_init_state == @@ -1221,10 +1479,33 @@ static bool malloc_init_hard_a0_locked() { malloc_initializer = INITIALIZER; + JEMALLOC_DIAGNOSTIC_PUSH + JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS + sc_data_t sc_data = {0}; + JEMALLOC_DIAGNOSTIC_POP + + /* + * Ordering here is somewhat tricky; we need sc_boot() first, since that + * determines what the size classes will be, and then + * malloc_conf_init(), since any slab size tweaking will need to be done + * before sz_boot and bin_boot, which assume that the values they read + * out of sc_data_global are final. + */ + sc_boot(&sc_data); + unsigned bin_shard_sizes[SC_NBINS]; + bin_shard_sizes_boot(bin_shard_sizes); + /* + * prof_boot0 only initializes opt_prof_prefix. We need to do it before + * we parse malloc_conf options, in case malloc_conf parsing overwrites + * it. + */ if (config_prof) { prof_boot0(); } - malloc_conf_init(); + malloc_conf_init(&sc_data, bin_shard_sizes); + sz_boot(&sc_data); + bin_boot(&sc_data, bin_shard_sizes); + if (opt_stats_print) { /* Print statistics at exit. */ if (atexit(stats_print_atexit) != 0) { @@ -1249,7 +1530,7 @@ malloc_init_hard_a0_locked() { if (config_prof) { prof_boot1(); } - arena_boot(); + arena_boot(&sc_data); if (tcache_boot(TSDN_NULL)) { return true; } @@ -1257,11 +1538,13 @@ malloc_init_hard_a0_locked() { malloc_mutex_rank_exclusive)) { return true; } + hook_boot(); /* * Create enough scaffolding to allow recursive allocation in * malloc_ncpus(). */ narenas_auto = 1; + manual_arena_base = narenas_auto + 1; memset(arenas, 0, sizeof(arena_t *) * narenas_auto); /* * Initialize one arena here. The rest are lazily created in @@ -1409,6 +1692,10 @@ malloc_init_narenas(void) { narenas_auto); } narenas_total_set(narenas_auto); + if (arena_init_huge()) { + narenas_total_inc(); + } + manual_arena_base = narenas_total_get(); return false; } @@ -1493,6 +1780,8 @@ malloc_init_hard(void) { post_reentrancy(tsd); malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock); + witness_assert_lockless(witness_tsd_tsdn( + tsd_witness_tsdp_get_unsafe(tsd))); malloc_tsd_boot1(); /* Update TSD after tsd_boot1. */ tsd = tsd_fetch(); @@ -1500,12 +1789,11 @@ malloc_init_hard(void) { assert(have_background_thread); /* * Need to finish init & unlock first before creating background - * threads (pthread_create depends on malloc). + * threads (pthread_create depends on malloc). ctl_init (which + * sets isthreaded) needs to be called without holding any lock. */ - malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock); - bool err = background_thread_create(tsd, 0); - malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock); - if (err) { + background_thread_ctl_init(tsd_tsdn(tsd)); + if (background_thread_create(tsd, 0)) { return true; } } @@ -1528,8 +1816,12 @@ typedef struct static_opts_s static_opts_t; struct static_opts_s { /* Whether or not allocation size may overflow. */ bool may_overflow; - /* Whether or not allocations of size 0 should be treated as size 1. */ - bool bump_empty_alloc; + + /* + * Whether or not allocations (with alignment) of size 0 should be + * treated as size 1. + */ + bool bump_empty_aligned_alloc; /* * Whether to assert that allocations are not of size 0 (after any * bumping). @@ -1562,12 +1854,16 @@ struct static_opts_s { * initialization) options. */ bool slow; + /* + * Return size. + */ + bool usize; }; JEMALLOC_ALWAYS_INLINE void static_opts_init(static_opts_t *static_opts) { static_opts->may_overflow = false; - static_opts->bump_empty_alloc = false; + static_opts->bump_empty_aligned_alloc = false; static_opts->assert_nonempty_alloc = false; static_opts->null_out_result_on_error = false; static_opts->set_errno_on_error = false; @@ -1575,6 +1871,7 @@ static_opts_init(static_opts_t *static_opts) { static_opts->oom_string = ""; static_opts->invalid_alignment_string = ""; static_opts->slow = false; + static_opts->usize = false; } /* @@ -1589,6 +1886,7 @@ static_opts_init(static_opts_t *static_opts) { typedef struct dynamic_opts_s dynamic_opts_t; struct dynamic_opts_s { void **result; + size_t usize; size_t num_items; size_t item_size; size_t alignment; @@ -1600,6 +1898,7 @@ struct dynamic_opts_s { JEMALLOC_ALWAYS_INLINE void dynamic_opts_init(dynamic_opts_t *dynamic_opts) { dynamic_opts->result = NULL; + dynamic_opts->usize = 0; dynamic_opts->num_items = 0; dynamic_opts->item_size = 0; dynamic_opts->alignment = 0; @@ -1663,12 +1962,13 @@ imalloc_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd, szind_t ind_large; size_t bumped_usize = usize; - if (usize <= SMALL_MAXCLASS) { - assert(((dopts->alignment == 0) ? sz_s2u(LARGE_MINCLASS) : - sz_sa2u(LARGE_MINCLASS, dopts->alignment)) - == LARGE_MINCLASS); - ind_large = sz_size2index(LARGE_MINCLASS); - bumped_usize = sz_s2u(LARGE_MINCLASS); + if (usize <= SC_SMALL_MAXCLASS) { + assert(((dopts->alignment == 0) ? + sz_s2u(SC_LARGE_MINCLASS) : + sz_sa2u(SC_LARGE_MINCLASS, dopts->alignment)) + == SC_LARGE_MINCLASS); + ind_large = sz_size2index(SC_LARGE_MINCLASS); + bumped_usize = sz_s2u(SC_LARGE_MINCLASS); ret = imalloc_no_sample(sopts, dopts, tsd, bumped_usize, bumped_usize, ind_large); if (unlikely(ret == NULL)) { @@ -1701,7 +2001,7 @@ compute_size_with_overflow(bool may_overflow, dynamic_opts_t *dopts, } /* A size_t with its high-half bits all set to 1. */ - const static size_t high_bits = SIZE_T_MAX << (sizeof(size_t) * 8 / 2); + static const size_t high_bits = SIZE_T_MAX << (sizeof(size_t) * 8 / 2); *size = dopts->item_size * dopts->num_items; @@ -1751,17 +2051,6 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) { goto label_oom; } - /* Validate the user input. */ - if (sopts->bump_empty_alloc) { - if (unlikely(size == 0)) { - size = 1; - } - } - - if (sopts->assert_nonempty_alloc) { - assert (size != 0); - } - if (unlikely(dopts->alignment < sopts->min_alignment || (dopts->alignment & (dopts->alignment - 1)) != 0)) { goto label_invalid_alignment; @@ -1771,19 +2060,32 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) { if (dopts->alignment == 0) { ind = sz_size2index(size); - if (unlikely(ind >= NSIZES)) { + if (unlikely(ind >= SC_NSIZES)) { goto label_oom; } - if (config_stats || (config_prof && opt_prof)) { + if (config_stats || (config_prof && opt_prof) || sopts->usize) { usize = sz_index2size(ind); - assert(usize > 0 && usize <= LARGE_MAXCLASS); + dopts->usize = usize; + assert(usize > 0 && usize + <= SC_LARGE_MAXCLASS); } } else { + if (sopts->bump_empty_aligned_alloc) { + if (unlikely(size == 0)) { + size = 1; + } + } usize = sz_sa2u(size, dopts->alignment); - if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) { + dopts->usize = usize; + if (unlikely(usize == 0 + || usize > SC_LARGE_MAXCLASS)) { goto label_oom; } } + /* Validate the user input. */ + if (sopts->assert_nonempty_alloc) { + assert (size != 0); + } check_entry_exit_locking(tsd_tsdn(tsd)); @@ -1816,7 +2118,8 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) { alloc_ctx_t alloc_ctx; if (likely((uintptr_t)tctx == (uintptr_t)1U)) { - alloc_ctx.slab = (usize <= SMALL_MAXCLASS); + alloc_ctx.slab = (usize + <= SC_SMALL_MAXCLASS); allocation = imalloc_no_sample( sopts, dopts, tsd, usize, usize, ind); } else if ((uintptr_t)tctx > (uintptr_t)1U) { @@ -1921,9 +2224,8 @@ label_invalid_alignment: return EINVAL; } -/* Returns the errno-style error code of the allocation. */ -JEMALLOC_ALWAYS_INLINE int -imalloc(static_opts_t *sopts, dynamic_opts_t *dopts) { +JEMALLOC_ALWAYS_INLINE bool +imalloc_init_check(static_opts_t *sopts, dynamic_opts_t *dopts) { if (unlikely(!malloc_initialized()) && unlikely(malloc_init())) { if (config_xmalloc && unlikely(opt_xmalloc)) { malloc_write(sopts->oom_string); @@ -1933,6 +2235,16 @@ imalloc(static_opts_t *sopts, dynamic_opts_t *dopts) { set_errno(ENOMEM); *dopts->result = NULL; + return false; + } + + return true; +} + +/* Returns the errno-style error code of the allocation. */ +JEMALLOC_ALWAYS_INLINE int +imalloc(static_opts_t *sopts, dynamic_opts_t *dopts) { + if (tsd_get_allocates() && !imalloc_init_check(sopts, dopts)) { return ENOMEM; } @@ -1945,27 +2257,27 @@ imalloc(static_opts_t *sopts, dynamic_opts_t *dopts) { sopts->slow = false; return imalloc_body(sopts, dopts, tsd); } else { + if (!tsd_get_allocates() && !imalloc_init_check(sopts, dopts)) { + return ENOMEM; + } + sopts->slow = true; return imalloc_body(sopts, dopts, tsd); } } -/******************************************************************************/ -/* - * Begin malloc(3)-compatible functions. - */ -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN -void JEMALLOC_NOTHROW * -JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1) -je_malloc(size_t size) { +JEMALLOC_NOINLINE +void * +malloc_default(size_t size) { void *ret; static_opts_t sopts; dynamic_opts_t dopts; + LOG("core.malloc.entry", "size: %zu", size); + static_opts_init(&sopts); dynamic_opts_init(&dopts); - sopts.bump_empty_alloc = true; sopts.null_out_result_on_error = true; sopts.set_errno_on_error = true; sopts.oom_string = "<jemalloc>: Error in malloc(): out of memory\n"; @@ -1975,10 +2287,107 @@ je_malloc(size_t size) { dopts.item_size = size; imalloc(&sopts, &dopts); + /* + * Note that this branch gets optimized away -- it immediately follows + * the check on tsd_fast that sets sopts.slow. + */ + if (sopts.slow) { + uintptr_t args[3] = {size}; + hook_invoke_alloc(hook_alloc_malloc, ret, (uintptr_t)ret, args); + } + + LOG("core.malloc.exit", "result: %p", ret); return ret; } +/******************************************************************************/ +/* + * Begin malloc(3)-compatible functions. + */ + +/* + * malloc() fastpath. + * + * Fastpath assumes size <= SC_LOOKUP_MAXCLASS, and that we hit + * tcache. If either of these is false, we tail-call to the slowpath, + * malloc_default(). Tail-calling is used to avoid any caller-saved + * registers. + * + * fastpath supports ticker and profiling, both of which will also + * tail-call to the slowpath if they fire. + */ +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN +void JEMALLOC_NOTHROW * +JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1) +je_malloc(size_t size) { + LOG("core.malloc.entry", "size: %zu", size); + + if (tsd_get_allocates() && unlikely(!malloc_initialized())) { + return malloc_default(size); + } + + tsd_t *tsd = tsd_get(false); + if (unlikely(!tsd || !tsd_fast(tsd) || (size > SC_LOOKUP_MAXCLASS))) { + return malloc_default(size); + } + + tcache_t *tcache = tsd_tcachep_get(tsd); + + if (unlikely(ticker_trytick(&tcache->gc_ticker))) { + return malloc_default(size); + } + + szind_t ind = sz_size2index_lookup(size); + size_t usize; + if (config_stats || config_prof) { + usize = sz_index2size(ind); + } + /* Fast path relies on size being a bin. I.e. SC_LOOKUP_MAXCLASS < SC_SMALL_MAXCLASS */ + assert(ind < SC_NBINS); + assert(size <= SC_SMALL_MAXCLASS); + + if (config_prof) { + int64_t bytes_until_sample = tsd_bytes_until_sample_get(tsd); + bytes_until_sample -= usize; + tsd_bytes_until_sample_set(tsd, bytes_until_sample); + + if (unlikely(bytes_until_sample < 0)) { + /* + * Avoid a prof_active check on the fastpath. + * If prof_active is false, set bytes_until_sample to + * a large value. If prof_active is set to true, + * bytes_until_sample will be reset. + */ + if (!prof_active) { + tsd_bytes_until_sample_set(tsd, SSIZE_MAX); + } + return malloc_default(size); + } + } + + cache_bin_t *bin = tcache_small_bin_get(tcache, ind); + bool tcache_success; + void* ret = cache_bin_alloc_easy(bin, &tcache_success); + + if (tcache_success) { + if (config_stats) { + *tsd_thread_allocatedp_get(tsd) += usize; + bin->tstats.nrequests++; + } + if (config_prof) { + tcache->prof_accumbytes += usize; + } + + LOG("core.malloc.exit", "result: %p", ret); + + /* Fastpath success */ + return ret; + } + + return malloc_default(size); +} + JEMALLOC_EXPORT int JEMALLOC_NOTHROW JEMALLOC_ATTR(nonnull(1)) je_posix_memalign(void **memptr, size_t alignment, size_t size) { @@ -1986,10 +2395,13 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) { static_opts_t sopts; dynamic_opts_t dopts; + LOG("core.posix_memalign.entry", "mem ptr: %p, alignment: %zu, " + "size: %zu", memptr, alignment, size); + static_opts_init(&sopts); dynamic_opts_init(&dopts); - sopts.bump_empty_alloc = true; + sopts.bump_empty_aligned_alloc = true; sopts.min_alignment = sizeof(void *); sopts.oom_string = "<jemalloc>: Error allocating aligned memory: out of memory\n"; @@ -2002,6 +2414,16 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) { dopts.alignment = alignment; ret = imalloc(&sopts, &dopts); + if (sopts.slow) { + uintptr_t args[3] = {(uintptr_t)memptr, (uintptr_t)alignment, + (uintptr_t)size}; + hook_invoke_alloc(hook_alloc_posix_memalign, *memptr, + (uintptr_t)ret, args); + } + + LOG("core.posix_memalign.exit", "result: %d, alloc ptr: %p", ret, + *memptr); + return ret; } @@ -2014,10 +2436,13 @@ je_aligned_alloc(size_t alignment, size_t size) { static_opts_t sopts; dynamic_opts_t dopts; + LOG("core.aligned_alloc.entry", "alignment: %zu, size: %zu\n", + alignment, size); + static_opts_init(&sopts); dynamic_opts_init(&dopts); - sopts.bump_empty_alloc = true; + sopts.bump_empty_aligned_alloc = true; sopts.null_out_result_on_error = true; sopts.set_errno_on_error = true; sopts.min_alignment = 1; @@ -2032,6 +2457,14 @@ je_aligned_alloc(size_t alignment, size_t size) { dopts.alignment = alignment; imalloc(&sopts, &dopts); + if (sopts.slow) { + uintptr_t args[3] = {(uintptr_t)alignment, (uintptr_t)size}; + hook_invoke_alloc(hook_alloc_aligned_alloc, ret, + (uintptr_t)ret, args); + } + + LOG("core.aligned_alloc.exit", "result: %p", ret); + return ret; } @@ -2043,11 +2476,12 @@ je_calloc(size_t num, size_t size) { static_opts_t sopts; dynamic_opts_t dopts; + LOG("core.calloc.entry", "num: %zu, size: %zu\n", num, size); + static_opts_init(&sopts); dynamic_opts_init(&dopts); sopts.may_overflow = true; - sopts.bump_empty_alloc = true; sopts.null_out_result_on_error = true; sopts.set_errno_on_error = true; sopts.oom_string = "<jemalloc>: Error in calloc(): out of memory\n"; @@ -2058,26 +2492,34 @@ je_calloc(size_t num, size_t size) { dopts.zero = true; imalloc(&sopts, &dopts); + if (sopts.slow) { + uintptr_t args[3] = {(uintptr_t)num, (uintptr_t)size}; + hook_invoke_alloc(hook_alloc_calloc, ret, (uintptr_t)ret, args); + } + + LOG("core.calloc.exit", "result: %p", ret); return ret; } static void * irealloc_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize, - prof_tctx_t *tctx) { + prof_tctx_t *tctx, hook_ralloc_args_t *hook_args) { void *p; if (tctx == NULL) { return NULL; } - if (usize <= SMALL_MAXCLASS) { - p = iralloc(tsd, old_ptr, old_usize, LARGE_MINCLASS, 0, false); + if (usize <= SC_SMALL_MAXCLASS) { + p = iralloc(tsd, old_ptr, old_usize, + SC_LARGE_MINCLASS, 0, false, hook_args); if (p == NULL) { return NULL; } arena_prof_promote(tsd_tsdn(tsd), p, usize); } else { - p = iralloc(tsd, old_ptr, old_usize, usize, 0, false); + p = iralloc(tsd, old_ptr, old_usize, usize, 0, false, + hook_args); } return p; @@ -2085,7 +2527,7 @@ irealloc_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize, JEMALLOC_ALWAYS_INLINE void * irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize, - alloc_ctx_t *alloc_ctx) { + alloc_ctx_t *alloc_ctx, hook_ralloc_args_t *hook_args) { void *p; bool prof_active; prof_tctx_t *old_tctx, *tctx; @@ -2094,9 +2536,11 @@ irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize, old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr, alloc_ctx); tctx = prof_alloc_prep(tsd, usize, prof_active, true); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) { - p = irealloc_prof_sample(tsd, old_ptr, old_usize, usize, tctx); + p = irealloc_prof_sample(tsd, old_ptr, old_usize, usize, tctx, + hook_args); } else { - p = iralloc(tsd, old_ptr, old_usize, usize, 0, false); + p = iralloc(tsd, old_ptr, old_usize, usize, 0, false, + hook_args); } if (unlikely(p == NULL)) { prof_alloc_rollback(tsd, tctx, true); @@ -2125,7 +2569,7 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) { rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd); rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx, (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab); - assert(alloc_ctx.szind != NSIZES); + assert(alloc_ctx.szind != SC_NSIZES); size_t usize; if (config_prof && opt_prof) { @@ -2161,17 +2605,37 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) { assert(malloc_initialized() || IS_INITIALIZER); alloc_ctx_t alloc_ctx, *ctx; - if (config_prof && opt_prof) { + if (!config_cache_oblivious && ((uintptr_t)ptr & PAGE_MASK) != 0) { + /* + * When cache_oblivious is disabled and ptr is not page aligned, + * the allocation was not sampled -- usize can be used to + * determine szind directly. + */ + alloc_ctx.szind = sz_size2index(usize); + alloc_ctx.slab = true; + ctx = &alloc_ctx; + if (config_debug) { + alloc_ctx_t dbg_ctx; + rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd); + rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, + rtree_ctx, (uintptr_t)ptr, true, &dbg_ctx.szind, + &dbg_ctx.slab); + assert(dbg_ctx.szind == alloc_ctx.szind); + assert(dbg_ctx.slab == alloc_ctx.slab); + } + } else if (config_prof && opt_prof) { rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd); rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx, (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab); assert(alloc_ctx.szind == sz_size2index(usize)); ctx = &alloc_ctx; - prof_free(tsd, ptr, usize, ctx); } else { ctx = NULL; } + if (config_prof && opt_prof) { + prof_free(tsd, ptr, usize, ctx); + } if (config_stats) { *tsd_thread_deallocatedp_get(tsd) += usize; } @@ -2186,11 +2650,14 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) { JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW * JEMALLOC_ALLOC_SIZE(2) -je_realloc(void *ptr, size_t size) { +je_realloc(void *ptr, size_t arg_size) { void *ret; tsdn_t *tsdn JEMALLOC_CC_SILENCE_INIT(NULL); size_t usize JEMALLOC_CC_SILENCE_INIT(0); size_t old_usize = 0; + size_t size = arg_size; + + LOG("core.realloc.entry", "ptr: %p, size: %zu\n", ptr, size); if (unlikely(size == 0)) { if (ptr != NULL) { @@ -2203,7 +2670,13 @@ je_realloc(void *ptr, size_t size) { } else { tcache = NULL; } + + uintptr_t args[3] = {(uintptr_t)ptr, size}; + hook_invoke_dalloc(hook_dalloc_realloc, ptr, args); + ifree(tsd, ptr, tcache, true); + + LOG("core.realloc.exit", "result: %p", NULL); return NULL; } size = 1; @@ -2215,28 +2688,59 @@ je_realloc(void *ptr, size_t size) { check_entry_exit_locking(tsd_tsdn(tsd)); + + hook_ralloc_args_t hook_args = {true, {(uintptr_t)ptr, + (uintptr_t)arg_size, 0, 0}}; + alloc_ctx_t alloc_ctx; rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd); rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx, (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab); - assert(alloc_ctx.szind != NSIZES); + assert(alloc_ctx.szind != SC_NSIZES); old_usize = sz_index2size(alloc_ctx.szind); assert(old_usize == isalloc(tsd_tsdn(tsd), ptr)); if (config_prof && opt_prof) { usize = sz_s2u(size); - ret = unlikely(usize == 0 || usize > LARGE_MAXCLASS) ? - NULL : irealloc_prof(tsd, ptr, old_usize, usize, - &alloc_ctx); + if (unlikely(usize == 0 + || usize > SC_LARGE_MAXCLASS)) { + ret = NULL; + } else { + ret = irealloc_prof(tsd, ptr, old_usize, usize, + &alloc_ctx, &hook_args); + } } else { if (config_stats) { usize = sz_s2u(size); } - ret = iralloc(tsd, ptr, old_usize, size, 0, false); + ret = iralloc(tsd, ptr, old_usize, size, 0, false, + &hook_args); } tsdn = tsd_tsdn(tsd); } else { /* realloc(NULL, size) is equivalent to malloc(size). */ - return je_malloc(size); + static_opts_t sopts; + dynamic_opts_t dopts; + + static_opts_init(&sopts); + dynamic_opts_init(&dopts); + + sopts.null_out_result_on_error = true; + sopts.set_errno_on_error = true; + sopts.oom_string = + "<jemalloc>: Error in realloc(): out of memory\n"; + + dopts.result = &ret; + dopts.num_items = 1; + dopts.item_size = size; + + imalloc(&sopts, &dopts); + if (sopts.slow) { + uintptr_t args[3] = {(uintptr_t)ptr, arg_size}; + hook_invoke_alloc(hook_alloc_realloc, ret, + (uintptr_t)ret, args); + } + + return ret; } if (unlikely(ret == NULL)) { @@ -2257,11 +2761,14 @@ je_realloc(void *ptr, size_t size) { } UTRACE(ptr, size, ret); check_entry_exit_locking(tsdn); + + LOG("core.realloc.exit", "result: %p", ret); return ret; } -JEMALLOC_EXPORT void JEMALLOC_NOTHROW -je_free(void *ptr) { +JEMALLOC_NOINLINE +void +free_default(void *ptr) { UTRACE(ptr, 0, 0); if (likely(ptr != NULL)) { /* @@ -2287,12 +2794,82 @@ je_free(void *ptr) { } else { tcache = NULL; } + uintptr_t args_raw[3] = {(uintptr_t)ptr}; + hook_invoke_dalloc(hook_dalloc_free, ptr, args_raw); ifree(tsd, ptr, tcache, true); } check_entry_exit_locking(tsd_tsdn(tsd)); } } +JEMALLOC_ALWAYS_INLINE +bool free_fastpath(void *ptr, size_t size, bool size_hint) { + tsd_t *tsd = tsd_get(false); + if (unlikely(!tsd || !tsd_fast(tsd))) { + return false; + } + + tcache_t *tcache = tsd_tcachep_get(tsd); + + alloc_ctx_t alloc_ctx; + /* + * If !config_cache_oblivious, we can check PAGE alignment to + * detect sampled objects. Otherwise addresses are + * randomized, and we have to look it up in the rtree anyway. + * See also isfree(). + */ + if (!size_hint || config_cache_oblivious) { + rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd); + bool res = rtree_szind_slab_read_fast(tsd_tsdn(tsd), &extents_rtree, + rtree_ctx, (uintptr_t)ptr, + &alloc_ctx.szind, &alloc_ctx.slab); + + /* Note: profiled objects will have alloc_ctx.slab set */ + if (!res || !alloc_ctx.slab) { + return false; + } + assert(alloc_ctx.szind != SC_NSIZES); + } else { + /* + * Check for both sizes that are too large, and for sampled objects. + * Sampled objects are always page-aligned. The sampled object check + * will also check for null ptr. + */ + if (size > SC_LOOKUP_MAXCLASS || (((uintptr_t)ptr & PAGE_MASK) == 0)) { + return false; + } + alloc_ctx.szind = sz_size2index_lookup(size); + } + + if (unlikely(ticker_trytick(&tcache->gc_ticker))) { + return false; + } + + cache_bin_t *bin = tcache_small_bin_get(tcache, alloc_ctx.szind); + cache_bin_info_t *bin_info = &tcache_bin_info[alloc_ctx.szind]; + if (!cache_bin_dalloc_easy(bin, bin_info, ptr)) { + return false; + } + + if (config_stats) { + size_t usize = sz_index2size(alloc_ctx.szind); + *tsd_thread_deallocatedp_get(tsd) += usize; + } + + return true; +} + +JEMALLOC_EXPORT void JEMALLOC_NOTHROW +je_free(void *ptr) { + LOG("core.free.entry", "ptr: %p", ptr); + + if (!free_fastpath(ptr, 0, false)) { + free_default(ptr); + } + + LOG("core.free.exit", ""); +} + /* * End malloc(3)-compatible functions. */ @@ -2310,10 +2887,12 @@ je_memalign(size_t alignment, size_t size) { static_opts_t sopts; dynamic_opts_t dopts; + LOG("core.memalign.entry", "alignment: %zu, size: %zu\n", alignment, + size); + static_opts_init(&sopts); dynamic_opts_init(&dopts); - sopts.bump_empty_alloc = true; sopts.min_alignment = 1; sopts.oom_string = "<jemalloc>: Error allocating aligned memory: out of memory\n"; @@ -2327,6 +2906,13 @@ je_memalign(size_t alignment, size_t size) { dopts.alignment = alignment; imalloc(&sopts, &dopts); + if (sopts.slow) { + uintptr_t args[3] = {alignment, size}; + hook_invoke_alloc(hook_alloc_memalign, ret, (uintptr_t)ret, + args); + } + + LOG("core.memalign.exit", "result: %p", ret); return ret; } #endif @@ -2341,10 +2927,11 @@ je_valloc(size_t size) { static_opts_t sopts; dynamic_opts_t dopts; + LOG("core.valloc.entry", "size: %zu\n", size); + static_opts_init(&sopts); dynamic_opts_init(&dopts); - sopts.bump_empty_alloc = true; sopts.null_out_result_on_error = true; sopts.min_alignment = PAGE; sopts.oom_string = @@ -2358,7 +2945,12 @@ je_valloc(size_t size) { dopts.alignment = PAGE; imalloc(&sopts, &dopts); + if (sopts.slow) { + uintptr_t args[3] = {size}; + hook_invoke_alloc(hook_alloc_valloc, ret, (uintptr_t)ret, args); + } + LOG("core.valloc.exit", "result: %p\n", ret); return ret; } #endif @@ -2424,6 +3016,82 @@ int __posix_memalign(void** r, size_t a, size_t s) PREALIAS(je_posix_memalign); * Begin non-standard functions. */ +#ifdef JEMALLOC_EXPERIMENTAL_SMALLOCX_API + +#define JEMALLOC_SMALLOCX_CONCAT_HELPER(x, y) x ## y +#define JEMALLOC_SMALLOCX_CONCAT_HELPER2(x, y) \ + JEMALLOC_SMALLOCX_CONCAT_HELPER(x, y) + +typedef struct { + void *ptr; + size_t size; +} smallocx_return_t; + +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN +smallocx_return_t JEMALLOC_NOTHROW +/* + * The attribute JEMALLOC_ATTR(malloc) cannot be used due to: + * - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86488 + */ +JEMALLOC_SMALLOCX_CONCAT_HELPER2(je_smallocx_, JEMALLOC_VERSION_GID_IDENT) + (size_t size, int flags) { + /* + * Note: the attribute JEMALLOC_ALLOC_SIZE(1) cannot be + * used here because it makes writing beyond the `size` + * of the `ptr` undefined behavior, but the objective + * of this function is to allow writing beyond `size` + * up to `smallocx_return_t::size`. + */ + smallocx_return_t ret; + static_opts_t sopts; + dynamic_opts_t dopts; + + LOG("core.smallocx.entry", "size: %zu, flags: %d", size, flags); + + static_opts_init(&sopts); + dynamic_opts_init(&dopts); + + sopts.assert_nonempty_alloc = true; + sopts.null_out_result_on_error = true; + sopts.oom_string = "<jemalloc>: Error in mallocx(): out of memory\n"; + sopts.usize = true; + + dopts.result = &ret.ptr; + dopts.num_items = 1; + dopts.item_size = size; + if (unlikely(flags != 0)) { + if ((flags & MALLOCX_LG_ALIGN_MASK) != 0) { + dopts.alignment = MALLOCX_ALIGN_GET_SPECIFIED(flags); + } + + dopts.zero = MALLOCX_ZERO_GET(flags); + + if ((flags & MALLOCX_TCACHE_MASK) != 0) { + if ((flags & MALLOCX_TCACHE_MASK) + == MALLOCX_TCACHE_NONE) { + dopts.tcache_ind = TCACHE_IND_NONE; + } else { + dopts.tcache_ind = MALLOCX_TCACHE_GET(flags); + } + } else { + dopts.tcache_ind = TCACHE_IND_AUTOMATIC; + } + + if ((flags & MALLOCX_ARENA_MASK) != 0) + dopts.arena_ind = MALLOCX_ARENA_GET(flags); + } + + imalloc(&sopts, &dopts); + assert(dopts.usize == je_nallocx(size, flags)); + ret.size = dopts.usize; + + LOG("core.smallocx.exit", "result: %p, size: %zu", ret.ptr, ret.size); + return ret; +} +#undef JEMALLOC_SMALLOCX_CONCAT_HELPER +#undef JEMALLOC_SMALLOCX_CONCAT_HELPER2 +#endif + JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW * JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1) @@ -2432,6 +3100,8 @@ je_mallocx(size_t size, int flags) { static_opts_t sopts; dynamic_opts_t dopts; + LOG("core.mallocx.entry", "size: %zu, flags: %d", size, flags); + static_opts_init(&sopts); dynamic_opts_init(&dopts); @@ -2465,28 +3135,36 @@ je_mallocx(size_t size, int flags) { } imalloc(&sopts, &dopts); + if (sopts.slow) { + uintptr_t args[3] = {size, flags}; + hook_invoke_alloc(hook_alloc_mallocx, ret, (uintptr_t)ret, + args); + } + + LOG("core.mallocx.exit", "result: %p", ret); return ret; } static void * irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize, size_t usize, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena, - prof_tctx_t *tctx) { + prof_tctx_t *tctx, hook_ralloc_args_t *hook_args) { void *p; if (tctx == NULL) { return NULL; } - if (usize <= SMALL_MAXCLASS) { - p = iralloct(tsdn, old_ptr, old_usize, LARGE_MINCLASS, - alignment, zero, tcache, arena); + if (usize <= SC_SMALL_MAXCLASS) { + p = iralloct(tsdn, old_ptr, old_usize, + SC_LARGE_MINCLASS, alignment, zero, tcache, + arena, hook_args); if (p == NULL) { return NULL; } arena_prof_promote(tsdn, p, usize); } else { p = iralloct(tsdn, old_ptr, old_usize, usize, alignment, zero, - tcache, arena); + tcache, arena, hook_args); } return p; @@ -2495,7 +3173,7 @@ irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize, JEMALLOC_ALWAYS_INLINE void * irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size, size_t alignment, size_t *usize, bool zero, tcache_t *tcache, - arena_t *arena, alloc_ctx_t *alloc_ctx) { + arena_t *arena, alloc_ctx_t *alloc_ctx, hook_ralloc_args_t *hook_args) { void *p; bool prof_active; prof_tctx_t *old_tctx, *tctx; @@ -2505,10 +3183,10 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size, tctx = prof_alloc_prep(tsd, *usize, prof_active, false); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) { p = irallocx_prof_sample(tsd_tsdn(tsd), old_ptr, old_usize, - *usize, alignment, zero, tcache, arena, tctx); + *usize, alignment, zero, tcache, arena, tctx, hook_args); } else { p = iralloct(tsd_tsdn(tsd), old_ptr, old_usize, size, alignment, - zero, tcache, arena); + zero, tcache, arena, hook_args); } if (unlikely(p == NULL)) { prof_alloc_rollback(tsd, tctx, false); @@ -2545,6 +3223,10 @@ je_rallocx(void *ptr, size_t size, int flags) { arena_t *arena; tcache_t *tcache; + LOG("core.rallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr, + size, flags); + + assert(ptr != NULL); assert(size != 0); assert(malloc_initialized() || IS_INITIALIZER); @@ -2575,23 +3257,27 @@ je_rallocx(void *ptr, size_t size, int flags) { rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd); rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx, (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab); - assert(alloc_ctx.szind != NSIZES); + assert(alloc_ctx.szind != SC_NSIZES); old_usize = sz_index2size(alloc_ctx.szind); assert(old_usize == isalloc(tsd_tsdn(tsd), ptr)); + + hook_ralloc_args_t hook_args = {false, {(uintptr_t)ptr, size, flags, + 0}}; if (config_prof && opt_prof) { usize = (alignment == 0) ? sz_s2u(size) : sz_sa2u(size, alignment); - if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) { + if (unlikely(usize == 0 + || usize > SC_LARGE_MAXCLASS)) { goto label_oom; } p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize, - zero, tcache, arena, &alloc_ctx); + zero, tcache, arena, &alloc_ctx, &hook_args); if (unlikely(p == NULL)) { goto label_oom; } } else { p = iralloct(tsd_tsdn(tsd), ptr, old_usize, size, alignment, - zero, tcache, arena); + zero, tcache, arena, &hook_args); if (unlikely(p == NULL)) { goto label_oom; } @@ -2607,6 +3293,8 @@ je_rallocx(void *ptr, size_t size, int flags) { } UTRACE(ptr, size, p); check_entry_exit_locking(tsd_tsdn(tsd)); + + LOG("core.rallocx.exit", "result: %p", p); return p; label_oom: if (config_xmalloc && unlikely(opt_xmalloc)) { @@ -2615,20 +3303,22 @@ label_oom: } UTRACE(ptr, size, 0); check_entry_exit_locking(tsd_tsdn(tsd)); + + LOG("core.rallocx.exit", "result: %p", NULL); return NULL; } JEMALLOC_ALWAYS_INLINE size_t ixallocx_helper(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size, size_t extra, size_t alignment, bool zero) { - size_t usize; + size_t newsize; - if (ixalloc(tsdn, ptr, old_usize, size, extra, alignment, zero)) { + if (ixalloc(tsdn, ptr, old_usize, size, extra, alignment, zero, + &newsize)) { return old_usize; } - usize = isalloc(tsdn, ptr); - return usize; + return newsize; } static size_t @@ -2662,17 +3352,19 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size, */ if (alignment == 0) { usize_max = sz_s2u(size+extra); - assert(usize_max > 0 && usize_max <= LARGE_MAXCLASS); + assert(usize_max > 0 + && usize_max <= SC_LARGE_MAXCLASS); } else { usize_max = sz_sa2u(size+extra, alignment); - if (unlikely(usize_max == 0 || usize_max > LARGE_MAXCLASS)) { + if (unlikely(usize_max == 0 + || usize_max > SC_LARGE_MAXCLASS)) { /* * usize_max is out of range, and chances are that * allocation will fail, but use the maximum possible * value and carry on with prof_alloc_prep(), just in * case allocation succeeds. */ - usize_max = LARGE_MAXCLASS; + usize_max = SC_LARGE_MAXCLASS; } } tctx = prof_alloc_prep(tsd, usize_max, prof_active, false); @@ -2701,6 +3393,9 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) { size_t alignment = MALLOCX_ALIGN_GET(flags); bool zero = flags & MALLOCX_ZERO; + LOG("core.xallocx.entry", "ptr: %p, size: %zu, extra: %zu, " + "flags: %d", ptr, size, extra, flags); + assert(ptr != NULL); assert(size != 0); assert(SIZE_T_MAX - size >= extra); @@ -2712,24 +3407,24 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) { rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd); rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx, (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab); - assert(alloc_ctx.szind != NSIZES); + assert(alloc_ctx.szind != SC_NSIZES); old_usize = sz_index2size(alloc_ctx.szind); assert(old_usize == isalloc(tsd_tsdn(tsd), ptr)); /* * The API explicitly absolves itself of protecting against (size + * extra) numerical overflow, but we may need to clamp extra to avoid - * exceeding LARGE_MAXCLASS. + * exceeding SC_LARGE_MAXCLASS. * * Ordinarily, size limit checking is handled deeper down, but here we * have to check as part of (size + extra) clamping, since we need the * clamped value in the above helper functions. */ - if (unlikely(size > LARGE_MAXCLASS)) { + if (unlikely(size > SC_LARGE_MAXCLASS)) { usize = old_usize; goto label_not_resized; } - if (unlikely(LARGE_MAXCLASS - size < extra)) { - extra = LARGE_MAXCLASS - size; + if (unlikely(SC_LARGE_MAXCLASS - size < extra)) { + extra = SC_LARGE_MAXCLASS - size; } if (config_prof && opt_prof) { @@ -2748,8 +3443,16 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) { *tsd_thread_deallocatedp_get(tsd) += old_usize; } label_not_resized: + if (unlikely(!tsd_fast(tsd))) { + uintptr_t args[4] = {(uintptr_t)ptr, size, extra, flags}; + hook_invoke_expand(hook_expand_xallocx, ptr, old_usize, + usize, (uintptr_t)usize, args); + } + UTRACE(ptr, size, ptr); check_entry_exit_locking(tsd_tsdn(tsd)); + + LOG("core.xallocx.exit", "result: %zu", usize); return usize; } @@ -2759,6 +3462,8 @@ je_sallocx(const void *ptr, int flags) { size_t usize; tsdn_t *tsdn; + LOG("core.sallocx.entry", "ptr: %p, flags: %d", ptr, flags); + assert(malloc_initialized() || IS_INITIALIZER); assert(ptr != NULL); @@ -2773,11 +3478,15 @@ je_sallocx(const void *ptr, int flags) { } check_entry_exit_locking(tsdn); + + LOG("core.sallocx.exit", "result: %zu", usize); return usize; } JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_dallocx(void *ptr, int flags) { + LOG("core.dallocx.entry", "ptr: %p, flags: %d", ptr, flags); + assert(ptr != NULL); assert(malloc_initialized() || IS_INITIALIZER); @@ -2812,9 +3521,13 @@ je_dallocx(void *ptr, int flags) { tsd_assert_fast(tsd); ifree(tsd, ptr, tcache, false); } else { + uintptr_t args_raw[3] = {(uintptr_t)ptr, flags}; + hook_invoke_dalloc(hook_dalloc_dallocx, ptr, args_raw); ifree(tsd, ptr, tcache, true); } check_entry_exit_locking(tsd_tsdn(tsd)); + + LOG("core.dallocx.exit", ""); } JEMALLOC_ALWAYS_INLINE size_t @@ -2831,8 +3544,8 @@ inallocx(tsdn_t *tsdn, size_t size, int flags) { return usize; } -JEMALLOC_EXPORT void JEMALLOC_NOTHROW -je_sdallocx(void *ptr, size_t size, int flags) { +JEMALLOC_NOINLINE void +sdallocx_default(void *ptr, size_t size, int flags) { assert(ptr != NULL); assert(malloc_initialized() || IS_INITIALIZER); @@ -2869,9 +3582,36 @@ je_sdallocx(void *ptr, size_t size, int flags) { tsd_assert_fast(tsd); isfree(tsd, ptr, usize, tcache, false); } else { + uintptr_t args_raw[3] = {(uintptr_t)ptr, size, flags}; + hook_invoke_dalloc(hook_dalloc_sdallocx, ptr, args_raw); isfree(tsd, ptr, usize, tcache, true); } check_entry_exit_locking(tsd_tsdn(tsd)); + +} + +JEMALLOC_EXPORT void JEMALLOC_NOTHROW +je_sdallocx(void *ptr, size_t size, int flags) { + LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr, + size, flags); + + if (flags !=0 || !free_fastpath(ptr, size, true)) { + sdallocx_default(ptr, size, flags); + } + + LOG("core.sdallocx.exit", ""); +} + +void JEMALLOC_NOTHROW +je_sdallocx_noflags(void *ptr, size_t size) { + LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: 0", ptr, + size); + + if (!free_fastpath(ptr, size, true)) { + sdallocx_default(ptr, size, 0); + } + + LOG("core.sdallocx.exit", ""); } JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW @@ -2883,6 +3623,7 @@ je_nallocx(size_t size, int flags) { assert(size != 0); if (unlikely(malloc_init())) { + LOG("core.nallocx.exit", "result: %zu", ZU(0)); return 0; } @@ -2890,11 +3631,13 @@ je_nallocx(size_t size, int flags) { check_entry_exit_locking(tsdn); usize = inallocx(tsdn, size, flags); - if (unlikely(usize > LARGE_MAXCLASS)) { + if (unlikely(usize > SC_LARGE_MAXCLASS)) { + LOG("core.nallocx.exit", "result: %zu", ZU(0)); return 0; } check_entry_exit_locking(tsdn); + LOG("core.nallocx.exit", "result: %zu", usize); return usize; } @@ -2904,7 +3647,10 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, int ret; tsd_t *tsd; + LOG("core.mallctl.entry", "name: %s", name); + if (unlikely(malloc_init())) { + LOG("core.mallctl.exit", "result: %d", EAGAIN); return EAGAIN; } @@ -2912,6 +3658,8 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, check_entry_exit_locking(tsd_tsdn(tsd)); ret = ctl_byname(tsd, name, oldp, oldlenp, newp, newlen); check_entry_exit_locking(tsd_tsdn(tsd)); + + LOG("core.mallctl.exit", "result: %d", ret); return ret; } @@ -2919,7 +3667,10 @@ JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) { int ret; + LOG("core.mallctlnametomib.entry", "name: %s", name); + if (unlikely(malloc_init())) { + LOG("core.mallctlnametomib.exit", "result: %d", EAGAIN); return EAGAIN; } @@ -2927,6 +3678,8 @@ je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) { check_entry_exit_locking(tsd_tsdn(tsd)); ret = ctl_nametomib(tsd, name, mibp, miblenp); check_entry_exit_locking(tsd_tsdn(tsd)); + + LOG("core.mallctlnametomib.exit", "result: %d", ret); return ret; } @@ -2936,7 +3689,10 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, int ret; tsd_t *tsd; + LOG("core.mallctlbymib.entry", ""); + if (unlikely(malloc_init())) { + LOG("core.mallctlbymib.exit", "result: %d", EAGAIN); return EAGAIN; } @@ -2944,6 +3700,7 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, check_entry_exit_locking(tsd_tsdn(tsd)); ret = ctl_bymib(tsd, mib, miblen, oldp, oldlenp, newp, newlen); check_entry_exit_locking(tsd_tsdn(tsd)); + LOG("core.mallctlbymib.exit", "result: %d", ret); return ret; } @@ -2952,10 +3709,13 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque, const char *opts) { tsdn_t *tsdn; + LOG("core.malloc_stats_print.entry", ""); + tsdn = tsdn_fetch(); check_entry_exit_locking(tsdn); stats_print(write_cb, cbopaque, opts); check_entry_exit_locking(tsdn); + LOG("core.malloc_stats_print.exit", ""); } JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW @@ -2963,6 +3723,8 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) { size_t ret; tsdn_t *tsdn; + LOG("core.malloc_usable_size.entry", "ptr: %p", ptr); + assert(malloc_initialized() || IS_INITIALIZER); tsdn = tsdn_fetch(); @@ -2980,6 +3742,7 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) { } check_entry_exit_locking(tsdn); + LOG("core.malloc_usable_size.exit", "result: %zu", ret); return ret; } @@ -3084,6 +3847,7 @@ _malloc_prefork(void) } } prof_prefork1(tsd_tsdn(tsd)); + tsd_prefork(tsd); } #ifndef JEMALLOC_MUTEX_INIT_CB @@ -3106,6 +3870,8 @@ _malloc_postfork(void) tsd = tsd_fetch(); + tsd_postfork_parent(tsd); + witness_postfork_parent(tsd_witness_tsdp_get(tsd)); /* Release all mutexes, now that fork() has completed. */ for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { @@ -3133,6 +3899,8 @@ jemalloc_postfork_child(void) { tsd = tsd_fetch(); + tsd_postfork_child(tsd); + witness_postfork_child(tsd_witness_tsdp_get(tsd)); /* Release all mutexes, now that fork() has completed. */ for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { diff --git a/deps/jemalloc/src/jemalloc_cpp.cpp b/deps/jemalloc/src/jemalloc_cpp.cpp index 844ab398a7..da0441a7c9 100644 --- a/deps/jemalloc/src/jemalloc_cpp.cpp +++ b/deps/jemalloc/src/jemalloc_cpp.cpp @@ -39,12 +39,10 @@ void operator delete(void *ptr, std::size_t size) noexcept; void operator delete[](void *ptr, std::size_t size) noexcept; #endif -template <bool IsNoExcept> -void * -newImpl(std::size_t size) noexcept(IsNoExcept) { - void *ptr = je_malloc(size); - if (likely(ptr != nullptr)) - return ptr; +JEMALLOC_NOINLINE +static void * +handleOOM(std::size_t size, bool nothrow) { + void *ptr = nullptr; while (ptr == nullptr) { std::new_handler handler; @@ -68,11 +66,22 @@ newImpl(std::size_t size) noexcept(IsNoExcept) { ptr = je_malloc(size); } - if (ptr == nullptr && !IsNoExcept) + if (ptr == nullptr && !nothrow) std::__throw_bad_alloc(); return ptr; } +template <bool IsNoExcept> +JEMALLOC_ALWAYS_INLINE +void * +newImpl(std::size_t size) noexcept(IsNoExcept) { + void *ptr = je_malloc(size); + if (likely(ptr != nullptr)) + return ptr; + + return handleOOM(size, IsNoExcept); +} + void * operator new(std::size_t size) { return newImpl<false>(size); @@ -119,14 +128,14 @@ operator delete(void *ptr, std::size_t size) noexcept { if (unlikely(ptr == nullptr)) { return; } - je_sdallocx(ptr, size, /*flags=*/0); + je_sdallocx_noflags(ptr, size); } void operator delete[](void *ptr, std::size_t size) noexcept { if (unlikely(ptr == nullptr)) { return; } - je_sdallocx(ptr, size, /*flags=*/0); + je_sdallocx_noflags(ptr, size); } #endif // __cpp_sized_deallocation diff --git a/deps/jemalloc/src/large.c b/deps/jemalloc/src/large.c index 27a2c67987..8e7a781d33 100644 --- a/deps/jemalloc/src/large.c +++ b/deps/jemalloc/src/large.c @@ -28,7 +28,7 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, assert(!tsdn_null(tsdn) || arena != NULL); ausize = sz_sa2u(usize, alignment); - if (unlikely(ausize == 0 || ausize > LARGE_MAXCLASS)) { + if (unlikely(ausize == 0 || ausize > SC_LARGE_MAXCLASS)) { return NULL; } @@ -42,7 +42,7 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, */ is_zeroed = zero; if (likely(!tsdn_null(tsdn))) { - arena = arena_choose(tsdn_tsd(tsdn), arena); + arena = arena_choose_maybe_huge(tsdn_tsd(tsdn), arena, usize); } if (unlikely(arena == NULL) || (extent = arena_extent_alloc_large(tsdn, arena, usize, alignment, &is_zeroed)) == NULL) { @@ -109,7 +109,7 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) { if (diff != 0) { extent_t *trail = extent_split_wrapper(tsdn, arena, &extent_hooks, extent, usize + sz_large_pad, - sz_size2index(usize), false, diff, NSIZES, false); + sz_size2index(usize), false, diff, SC_NSIZES, false); if (trail == NULL) { return true; } @@ -154,17 +154,17 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize, bool new_mapping; if ((trail = extents_alloc(tsdn, arena, &extent_hooks, &arena->extents_dirty, extent_past_get(extent), trailsize, 0, - CACHELINE, false, NSIZES, &is_zeroed_trail, &commit)) != NULL + CACHELINE, false, SC_NSIZES, &is_zeroed_trail, &commit)) != NULL || (trail = extents_alloc(tsdn, arena, &extent_hooks, &arena->extents_muzzy, extent_past_get(extent), trailsize, 0, - CACHELINE, false, NSIZES, &is_zeroed_trail, &commit)) != NULL) { + CACHELINE, false, SC_NSIZES, &is_zeroed_trail, &commit)) != NULL) { if (config_stats) { new_mapping = false; } } else { if ((trail = extent_alloc_wrapper(tsdn, arena, &extent_hooks, extent_past_get(extent), trailsize, 0, CACHELINE, false, - NSIZES, &is_zeroed_trail, &commit)) == NULL) { + SC_NSIZES, &is_zeroed_trail, &commit)) == NULL) { return true; } if (config_stats) { @@ -221,9 +221,10 @@ large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min, size_t oldusize = extent_usize_get(extent); /* The following should have been caught by callers. */ - assert(usize_min > 0 && usize_max <= LARGE_MAXCLASS); + assert(usize_min > 0 && usize_max <= SC_LARGE_MAXCLASS); /* Both allocation sizes must be large to avoid a move. */ - assert(oldusize >= LARGE_MINCLASS && usize_max >= LARGE_MINCLASS); + assert(oldusize >= SC_LARGE_MINCLASS + && usize_max >= SC_LARGE_MINCLASS); if (usize_max > oldusize) { /* Attempt to expand the allocation in-place. */ @@ -270,17 +271,23 @@ large_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize, } void * -large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize, - size_t alignment, bool zero, tcache_t *tcache) { - size_t oldusize = extent_usize_get(extent); +large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize, + size_t alignment, bool zero, tcache_t *tcache, + hook_ralloc_args_t *hook_args) { + extent_t *extent = iealloc(tsdn, ptr); + size_t oldusize = extent_usize_get(extent); /* The following should have been caught by callers. */ - assert(usize > 0 && usize <= LARGE_MAXCLASS); + assert(usize > 0 && usize <= SC_LARGE_MAXCLASS); /* Both allocation sizes must be large to avoid a move. */ - assert(oldusize >= LARGE_MINCLASS && usize >= LARGE_MINCLASS); + assert(oldusize >= SC_LARGE_MINCLASS + && usize >= SC_LARGE_MINCLASS); /* Try to avoid moving the allocation. */ if (!large_ralloc_no_move(tsdn, extent, usize, usize, zero)) { + hook_invoke_expand(hook_args->is_realloc + ? hook_expand_realloc : hook_expand_rallocx, ptr, oldusize, + usize, (uintptr_t)ptr, hook_args->args); return extent_addr_get(extent); } @@ -295,6 +302,12 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize, return NULL; } + hook_invoke_alloc(hook_args->is_realloc + ? hook_alloc_realloc : hook_alloc_rallocx, ret, (uintptr_t)ret, + hook_args->args); + hook_invoke_dalloc(hook_args->is_realloc + ? hook_dalloc_realloc : hook_dalloc_rallocx, ptr, hook_args->args); + size_t copysize = (usize < oldusize) ? usize : oldusize; memcpy(ret, extent_addr_get(extent), copysize); isdalloct(tsdn, extent_addr_get(extent), oldusize, tcache, NULL, true); @@ -318,8 +331,9 @@ large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent, large_dalloc_maybe_junk(extent_addr_get(extent), extent_usize_get(extent)); } else { - malloc_mutex_assert_owner(tsdn, &arena->large_mtx); + /* Only hold the large_mtx if necessary. */ if (!arena_is_auto(arena)) { + malloc_mutex_assert_owner(tsdn, &arena->large_mtx); extent_list_remove(&arena->large, extent); } } @@ -369,3 +383,13 @@ void large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent) { large_prof_tctx_set(tsdn, extent, (prof_tctx_t *)(uintptr_t)1U); } + +nstime_t +large_prof_alloc_time_get(const extent_t *extent) { + return extent_prof_alloc_time_get(extent); +} + +void +large_prof_alloc_time_set(extent_t *extent, nstime_t t) { + extent_prof_alloc_time_set(extent, t); +} diff --git a/deps/jemalloc/src/log.c b/deps/jemalloc/src/log.c new file mode 100644 index 0000000000..778902fb9b --- /dev/null +++ b/deps/jemalloc/src/log.c @@ -0,0 +1,78 @@ +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/log.h" + +char log_var_names[JEMALLOC_LOG_VAR_BUFSIZE]; +atomic_b_t log_init_done = ATOMIC_INIT(false); + +/* + * Returns true if we were able to pick out a segment. Fills in r_segment_end + * with a pointer to the first character after the end of the string. + */ +static const char * +log_var_extract_segment(const char* segment_begin) { + const char *end; + for (end = segment_begin; *end != '\0' && *end != '|'; end++) { + } + return end; +} + +static bool +log_var_matches_segment(const char *segment_begin, const char *segment_end, + const char *log_var_begin, const char *log_var_end) { + assert(segment_begin <= segment_end); + assert(log_var_begin < log_var_end); + + ptrdiff_t segment_len = segment_end - segment_begin; + ptrdiff_t log_var_len = log_var_end - log_var_begin; + /* The special '.' segment matches everything. */ + if (segment_len == 1 && *segment_begin == '.') { + return true; + } + if (segment_len == log_var_len) { + return strncmp(segment_begin, log_var_begin, segment_len) == 0; + } else if (segment_len < log_var_len) { + return strncmp(segment_begin, log_var_begin, segment_len) == 0 + && log_var_begin[segment_len] == '.'; + } else { + return false; + } +} + +unsigned +log_var_update_state(log_var_t *log_var) { + const char *log_var_begin = log_var->name; + const char *log_var_end = log_var->name + strlen(log_var->name); + + /* Pointer to one before the beginning of the current segment. */ + const char *segment_begin = log_var_names; + + /* + * If log_init done is false, we haven't parsed the malloc conf yet. To + * avoid log-spew, we default to not displaying anything. + */ + if (!atomic_load_b(&log_init_done, ATOMIC_ACQUIRE)) { + return LOG_INITIALIZED_NOT_ENABLED; + } + + while (true) { + const char *segment_end = log_var_extract_segment( + segment_begin); + assert(segment_end < log_var_names + JEMALLOC_LOG_VAR_BUFSIZE); + if (log_var_matches_segment(segment_begin, segment_end, + log_var_begin, log_var_end)) { + atomic_store_u(&log_var->state, LOG_ENABLED, + ATOMIC_RELAXED); + return LOG_ENABLED; + } + if (*segment_end == '\0') { + /* Hit the end of the segment string with no match. */ + atomic_store_u(&log_var->state, + LOG_INITIALIZED_NOT_ENABLED, ATOMIC_RELAXED); + return LOG_INITIALIZED_NOT_ENABLED; + } + /* Otherwise, skip the delimiter and continue. */ + segment_begin = segment_end + 1; + } +} diff --git a/deps/jemalloc/src/malloc_io.c b/deps/jemalloc/src/malloc_io.c index 6b99afcd3f..d7cb0f5284 100644 --- a/deps/jemalloc/src/malloc_io.c +++ b/deps/jemalloc/src/malloc_io.c @@ -70,20 +70,7 @@ static char *x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, /* malloc_message() setup. */ static void wrtmessage(void *cbopaque, const char *s) { -#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_write) - /* - * Use syscall(2) rather than write(2) when possible in order to avoid - * the possibility of memory allocation within libc. This is necessary - * on FreeBSD; most operating systems do not have this problem though. - * - * syscall() returns long or int, depending on platform, so capture the - * unused result in the widest plausible type to avoid compiler - * warnings. - */ - UNUSED long result = syscall(SYS_write, STDERR_FILENO, s, strlen(s)); -#else - UNUSED ssize_t result = write(STDERR_FILENO, s, strlen(s)); -#endif + malloc_write_fd(STDERR_FILENO, s, strlen(s)); } JEMALLOC_EXPORT void (*je_malloc_message)(void *, const char *s); @@ -111,7 +98,7 @@ buferror(int err, char *buf, size_t buflen) { FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, err, 0, (LPSTR)buf, (DWORD)buflen, NULL); return 0; -#elif defined(__GLIBC__) && defined(_GNU_SOURCE) +#elif defined(JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE) && defined(_GNU_SOURCE) char *b = strerror_r(err, buf, buflen); if (b != buf) { strncpy(buf, b, buflen); @@ -375,7 +362,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) { } \ } while (0) #define GET_ARG_NUMERIC(val, len) do { \ - switch (len) { \ + switch ((unsigned char)len) { \ case '?': \ val = va_arg(ap, int); \ break; \ @@ -645,7 +632,6 @@ malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, */ write_cb = (je_malloc_message != NULL) ? je_malloc_message : wrtmessage; - cbopaque = NULL; } malloc_vsnprintf(buf, sizeof(buf), format, ap); diff --git a/deps/jemalloc/src/mutex.c b/deps/jemalloc/src/mutex.c index a528ef0c24..3f920f5b1c 100644 --- a/deps/jemalloc/src/mutex.c +++ b/deps/jemalloc/src/mutex.c @@ -4,6 +4,7 @@ #include "jemalloc/internal/assert.h" #include "jemalloc/internal/malloc_io.h" +#include "jemalloc/internal/spin.h" #ifndef _CRT_SPINCOUNT #define _CRT_SPINCOUNT 4000 @@ -45,7 +46,7 @@ JEMALLOC_EXPORT int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex, void malloc_mutex_lock_slow(malloc_mutex_t *mutex) { mutex_prof_data_t *data = &mutex->prof_data; - UNUSED nstime_t before = NSTIME_ZERO_INITIALIZER; + nstime_t before = NSTIME_ZERO_INITIALIZER; if (ncpus == 1) { goto label_spin_done; @@ -53,8 +54,9 @@ malloc_mutex_lock_slow(malloc_mutex_t *mutex) { int cnt = 0, max_cnt = MALLOC_MUTEX_MAX_SPIN; do { - CPU_SPINWAIT; - if (!malloc_mutex_trylock_final(mutex)) { + spin_cpu_spinwait(); + if (!atomic_load_b(&mutex->locked, ATOMIC_RELAXED) + && !malloc_mutex_trylock_final(mutex)) { data->n_spin_acquired++; return; } @@ -143,9 +145,7 @@ malloc_mutex_init(malloc_mutex_t *mutex, const char *name, } # endif #elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) - mutex->lock = OS_UNFAIR_LOCK_INIT; -#elif (defined(JEMALLOC_OSSPIN)) - mutex->lock = 0; + mutex->lock = OS_UNFAIR_LOCK_INIT; #elif (defined(JEMALLOC_MUTEX_INIT_CB)) if (postpone_init) { mutex->postponed_next = postponed_mutexes; @@ -173,7 +173,7 @@ malloc_mutex_init(malloc_mutex_t *mutex, const char *name, mutex->lock_order = lock_order; if (lock_order == malloc_mutex_address_ordered) { witness_init(&mutex->witness, name, rank, - mutex_addr_comp, &mutex); + mutex_addr_comp, mutex); } else { witness_init(&mutex->witness, name, rank, NULL, NULL); } diff --git a/deps/jemalloc/src/pages.c b/deps/jemalloc/src/pages.c index 6f2ba5669b..13de27a008 100644 --- a/deps/jemalloc/src/pages.c +++ b/deps/jemalloc/src/pages.c @@ -10,6 +10,9 @@ #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT #include <sys/sysctl.h> +#ifdef __FreeBSD__ +#include <vm/vm_param.h> +#endif #endif /******************************************************************************/ @@ -25,6 +28,18 @@ static int mmap_flags; #endif static bool os_overcommits; +const char *thp_mode_names[] = { + "default", + "always", + "never", + "not supported" +}; +thp_mode_t opt_thp = THP_MODE_DEFAULT; +thp_mode_t init_system_thp_mode; + +/* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */ +static bool pages_can_purge_lazy_runtime = true; + /******************************************************************************/ /* * Function prototypes for static functions that are referenced prior to @@ -165,6 +180,35 @@ pages_map(void *addr, size_t size, size_t alignment, bool *commit) { assert(alignment >= PAGE); assert(ALIGNMENT_ADDR2BASE(addr, alignment) == addr); +#if defined(__FreeBSD__) && defined(MAP_EXCL) + /* + * FreeBSD has mechanisms both to mmap at specific address without + * touching existing mappings, and to mmap with specific alignment. + */ + { + if (os_overcommits) { + *commit = true; + } + + int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT; + int flags = mmap_flags; + + if (addr != NULL) { + flags |= MAP_FIXED | MAP_EXCL; + } else { + unsigned alignment_bits = ffs_zu(alignment); + assert(alignment_bits > 1); + flags |= MAP_ALIGNED(alignment_bits - 1); + } + + void *ret = mmap(addr, size, prot, flags, -1, 0); + if (ret == MAP_FAILED) { + ret = NULL; + } + + return ret; + } +#endif /* * Ideally, there would be a way to specify alignment to mmap() (like * NetBSD has), but in the absence of such a feature, we have to work @@ -246,19 +290,31 @@ pages_decommit(void *addr, size_t size) { bool pages_purge_lazy(void *addr, size_t size) { - assert(PAGE_ADDR2BASE(addr) == addr); + assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr); assert(PAGE_CEILING(size) == size); if (!pages_can_purge_lazy) { return true; } + if (!pages_can_purge_lazy_runtime) { + /* + * Built with lazy purge enabled, but detected it was not + * supported on the current system. + */ + return true; + } #ifdef _WIN32 VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE); return false; -#elif defined(JEMALLOC_PURGE_MADVISE_FREE) && \ - !defined(PAGES_CAN_PURGE_LAZY) - return (madvise(addr, size, MADV_FREE) != 0); +#elif defined(JEMALLOC_PURGE_MADVISE_FREE) + return (madvise(addr, size, +# ifdef MADV_FREE + MADV_FREE +# else + JEMALLOC_MADV_FREE +# endif + ) != 0); #elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \ !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS) return (madvise(addr, size, MADV_DONTNEED) != 0); @@ -287,36 +343,88 @@ pages_purge_forced(void *addr, size_t size) { #endif } +static bool +pages_huge_impl(void *addr, size_t size, bool aligned) { + if (aligned) { + assert(HUGEPAGE_ADDR2BASE(addr) == addr); + assert(HUGEPAGE_CEILING(size) == size); + } +#ifdef JEMALLOC_HAVE_MADVISE_HUGE + return (madvise(addr, size, MADV_HUGEPAGE) != 0); +#else + return true; +#endif +} + bool pages_huge(void *addr, size_t size) { - assert(HUGEPAGE_ADDR2BASE(addr) == addr); - assert(HUGEPAGE_CEILING(size) == size); + return pages_huge_impl(addr, size, true); +} -#ifdef JEMALLOC_THP - return (madvise(addr, size, MADV_HUGEPAGE) != 0); +static bool +pages_huge_unaligned(void *addr, size_t size) { + return pages_huge_impl(addr, size, false); +} + +static bool +pages_nohuge_impl(void *addr, size_t size, bool aligned) { + if (aligned) { + assert(HUGEPAGE_ADDR2BASE(addr) == addr); + assert(HUGEPAGE_CEILING(size) == size); + } + +#ifdef JEMALLOC_HAVE_MADVISE_HUGE + return (madvise(addr, size, MADV_NOHUGEPAGE) != 0); #else - return true; + return false; #endif } bool pages_nohuge(void *addr, size_t size) { - assert(HUGEPAGE_ADDR2BASE(addr) == addr); - assert(HUGEPAGE_CEILING(size) == size); + return pages_nohuge_impl(addr, size, true); +} -#ifdef JEMALLOC_THP - return (madvise(addr, size, MADV_NOHUGEPAGE) != 0); +static bool +pages_nohuge_unaligned(void *addr, size_t size) { + return pages_nohuge_impl(addr, size, false); +} + +bool +pages_dontdump(void *addr, size_t size) { + assert(PAGE_ADDR2BASE(addr) == addr); + assert(PAGE_CEILING(size) == size); +#ifdef JEMALLOC_MADVISE_DONTDUMP + return madvise(addr, size, MADV_DONTDUMP) != 0; #else return false; #endif } +bool +pages_dodump(void *addr, size_t size) { + assert(PAGE_ADDR2BASE(addr) == addr); + assert(PAGE_CEILING(size) == size); +#ifdef JEMALLOC_MADVISE_DONTDUMP + return madvise(addr, size, MADV_DODUMP) != 0; +#else + return false; +#endif +} + + static size_t os_page_detect(void) { #ifdef _WIN32 SYSTEM_INFO si; GetSystemInfo(&si); return si.dwPageSize; +#elif defined(__FreeBSD__) + /* + * This returns the value obtained from + * the auxv vector, avoiding a syscall. + */ + return getpagesize(); #else long result = sysconf(_SC_PAGESIZE); if (result == -1) { @@ -333,9 +441,19 @@ os_overcommits_sysctl(void) { size_t sz; sz = sizeof(vm_overcommit); +#if defined(__FreeBSD__) && defined(VM_OVERCOMMIT) + int mib[2]; + + mib[0] = CTL_VM; + mib[1] = VM_OVERCOMMIT; + if (sysctl(mib, 2, &vm_overcommit, &sz, NULL, 0) != 0) { + return false; /* Error. */ + } +#else if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) { return false; /* Error. */ } +#endif return ((vm_overcommit & 0x3) == 0); } @@ -351,27 +469,44 @@ static bool os_overcommits_proc(void) { int fd; char buf[1]; - ssize_t nread; #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open) - fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY | - O_CLOEXEC); + #if defined(O_CLOEXEC) + fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY | + O_CLOEXEC); + #else + fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY); + if (fd != -1) { + fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); + } + #endif #elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat) - fd = (int)syscall(SYS_openat, - AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC); + #if defined(O_CLOEXEC) + fd = (int)syscall(SYS_openat, + AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC); + #else + fd = (int)syscall(SYS_openat, + AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY); + if (fd != -1) { + fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); + } + #endif #else - fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC); + #if defined(O_CLOEXEC) + fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC); + #else + fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY); + if (fd != -1) { + fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); + } + #endif #endif + if (fd == -1) { return false; /* Error. */ } -#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read) - nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf)); -#else - nread = read(fd, &buf, sizeof(buf)); -#endif - + ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf)); #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close) syscall(SYS_close, fd); #else @@ -391,6 +526,75 @@ os_overcommits_proc(void) { } #endif +void +pages_set_thp_state (void *ptr, size_t size) { + if (opt_thp == thp_mode_default || opt_thp == init_system_thp_mode) { + return; + } + assert(opt_thp != thp_mode_not_supported && + init_system_thp_mode != thp_mode_not_supported); + + if (opt_thp == thp_mode_always + && init_system_thp_mode != thp_mode_never) { + assert(init_system_thp_mode == thp_mode_default); + pages_huge_unaligned(ptr, size); + } else if (opt_thp == thp_mode_never) { + assert(init_system_thp_mode == thp_mode_default || + init_system_thp_mode == thp_mode_always); + pages_nohuge_unaligned(ptr, size); + } +} + +static void +init_thp_state(void) { + if (!have_madvise_huge) { + if (metadata_thp_enabled() && opt_abort) { + malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n"); + abort(); + } + goto label_error; + } + + static const char sys_state_madvise[] = "always [madvise] never\n"; + static const char sys_state_always[] = "[always] madvise never\n"; + static const char sys_state_never[] = "always madvise [never]\n"; + char buf[sizeof(sys_state_madvise)]; + +#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open) + int fd = (int)syscall(SYS_open, + "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY); +#else + int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY); +#endif + if (fd == -1) { + goto label_error; + } + + ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf)); +#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close) + syscall(SYS_close, fd); +#else + close(fd); +#endif + + if (nread < 0) { + goto label_error; + } + + if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) { + init_system_thp_mode = thp_mode_default; + } else if (strncmp(buf, sys_state_always, (size_t)nread) == 0) { + init_system_thp_mode = thp_mode_always; + } else if (strncmp(buf, sys_state_never, (size_t)nread) == 0) { + init_system_thp_mode = thp_mode_never; + } else { + goto label_error; + } + return; +label_error: + opt_thp = init_system_thp_mode = thp_mode_not_supported; +} + bool pages_boot(void) { os_page = os_page_detect(); @@ -419,5 +623,27 @@ pages_boot(void) { os_overcommits = false; #endif + init_thp_state(); + +#ifdef __FreeBSD__ + /* + * FreeBSD doesn't need the check; madvise(2) is known to work. + */ +#else + /* Detect lazy purge runtime support. */ + if (pages_can_purge_lazy) { + bool committed = false; + void *madv_free_page = os_pages_map(NULL, PAGE, PAGE, &committed); + if (madv_free_page == NULL) { + return true; + } + assert(pages_can_purge_lazy_runtime); + if (pages_purge_lazy(madv_free_page, PAGE)) { + pages_can_purge_lazy_runtime = false; + } + os_pages_unmap(madv_free_page, PAGE); + } +#endif + return false; } diff --git a/deps/jemalloc/src/prof.c b/deps/jemalloc/src/prof.c index 975722c4c3..13334cb4c0 100644 --- a/deps/jemalloc/src/prof.c +++ b/deps/jemalloc/src/prof.c @@ -7,6 +7,7 @@ #include "jemalloc/internal/hash.h" #include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/emitter.h" /******************************************************************************/ @@ -23,7 +24,7 @@ */ #undef _Unwind_Backtrace #include <unwind.h> -#define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, hooks_libc_hook) +#define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, test_hooks_libc_hook) #endif /******************************************************************************/ @@ -38,6 +39,7 @@ bool opt_prof_gdump = false; bool opt_prof_final = false; bool opt_prof_leak = false; bool opt_prof_accum = false; +bool opt_prof_log = false; char opt_prof_prefix[ /* Minimize memory bloat for non-prof builds. */ #ifdef JEMALLOC_PROF @@ -70,6 +72,100 @@ uint64_t prof_interval = 0; size_t lg_prof_sample; +typedef enum prof_logging_state_e prof_logging_state_t; +enum prof_logging_state_e { + prof_logging_state_stopped, + prof_logging_state_started, + prof_logging_state_dumping +}; + +/* + * - stopped: log_start never called, or previous log_stop has completed. + * - started: log_start called, log_stop not called yet. Allocations are logged. + * - dumping: log_stop called but not finished; samples are not logged anymore. + */ +prof_logging_state_t prof_logging_state = prof_logging_state_stopped; + +#ifdef JEMALLOC_JET +static bool prof_log_dummy = false; +#endif + +/* Incremented for every log file that is output. */ +static uint64_t log_seq = 0; +static char log_filename[ + /* Minimize memory bloat for non-prof builds. */ +#ifdef JEMALLOC_PROF + PATH_MAX + +#endif + 1]; + +/* Timestamp for most recent call to log_start(). */ +static nstime_t log_start_timestamp = NSTIME_ZERO_INITIALIZER; + +/* Increment these when adding to the log_bt and log_thr linked lists. */ +static size_t log_bt_index = 0; +static size_t log_thr_index = 0; + +/* Linked list node definitions. These are only used in prof.c. */ +typedef struct prof_bt_node_s prof_bt_node_t; + +struct prof_bt_node_s { + prof_bt_node_t *next; + size_t index; + prof_bt_t bt; + /* Variable size backtrace vector pointed to by bt. */ + void *vec[1]; +}; + +typedef struct prof_thr_node_s prof_thr_node_t; + +struct prof_thr_node_s { + prof_thr_node_t *next; + size_t index; + uint64_t thr_uid; + /* Variable size based on thr_name_sz. */ + char name[1]; +}; + +typedef struct prof_alloc_node_s prof_alloc_node_t; + +/* This is output when logging sampled allocations. */ +struct prof_alloc_node_s { + prof_alloc_node_t *next; + /* Indices into an array of thread data. */ + size_t alloc_thr_ind; + size_t free_thr_ind; + + /* Indices into an array of backtraces. */ + size_t alloc_bt_ind; + size_t free_bt_ind; + + uint64_t alloc_time_ns; + uint64_t free_time_ns; + + size_t usize; +}; + +/* + * Created on the first call to prof_log_start and deleted on prof_log_stop. + * These are the backtraces and threads that have already been logged by an + * allocation. + */ +static bool log_tables_initialized = false; +static ckh_t log_bt_node_set; +static ckh_t log_thr_node_set; + +/* Store linked lists for logged data. */ +static prof_bt_node_t *log_bt_first = NULL; +static prof_bt_node_t *log_bt_last = NULL; +static prof_thr_node_t *log_thr_first = NULL; +static prof_thr_node_t *log_thr_last = NULL; +static prof_alloc_node_t *log_alloc_first = NULL; +static prof_alloc_node_t *log_alloc_last = NULL; + +/* Protects the prof_logging_state and any log_{...} variable. */ +static malloc_mutex_t log_mtx; + /* * Table of mutexes that are shared among gctx's. These are leaf locks, so * there is no problem with using them for more than one gctx at the same time. @@ -145,6 +241,12 @@ static void prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached); static char *prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name); +/* Hashtable functions for log_bt_node_set and log_thr_node_set. */ +static void prof_thr_node_hash(const void *key, size_t r_hash[2]); +static bool prof_thr_node_keycomp(const void *k1, const void *k2); +static void prof_bt_node_hash(const void *key, size_t r_hash[2]); +static bool prof_bt_node_keycomp(const void *k1, const void *k2); + /******************************************************************************/ /* Red-black trees. */ @@ -242,6 +344,12 @@ prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx) { prof_tctx_set(tsdn, ptr, usize, NULL, tctx); + /* Get the current time and set this in the extent_t. We'll read this + * when free() is called. */ + nstime_t t = NSTIME_ZERO_INITIALIZER; + nstime_update(&t); + prof_alloc_time_set(tsdn, ptr, NULL, t); + malloc_mutex_lock(tsdn, tctx->tdata->lock); tctx->cnts.curobjs++; tctx->cnts.curbytes += usize; @@ -253,14 +361,174 @@ prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize, malloc_mutex_unlock(tsdn, tctx->tdata->lock); } +static size_t +prof_log_bt_index(tsd_t *tsd, prof_bt_t *bt) { + assert(prof_logging_state == prof_logging_state_started); + malloc_mutex_assert_owner(tsd_tsdn(tsd), &log_mtx); + + prof_bt_node_t dummy_node; + dummy_node.bt = *bt; + prof_bt_node_t *node; + + /* See if this backtrace is already cached in the table. */ + if (ckh_search(&log_bt_node_set, (void *)(&dummy_node), + (void **)(&node), NULL)) { + size_t sz = offsetof(prof_bt_node_t, vec) + + (bt->len * sizeof(void *)); + prof_bt_node_t *new_node = (prof_bt_node_t *) + iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, + true, arena_get(TSDN_NULL, 0, true), true); + if (log_bt_first == NULL) { + log_bt_first = new_node; + log_bt_last = new_node; + } else { + log_bt_last->next = new_node; + log_bt_last = new_node; + } + + new_node->next = NULL; + new_node->index = log_bt_index; + /* + * Copy the backtrace: bt is inside a tdata or gctx, which + * might die before prof_log_stop is called. + */ + new_node->bt.len = bt->len; + memcpy(new_node->vec, bt->vec, bt->len * sizeof(void *)); + new_node->bt.vec = new_node->vec; + + log_bt_index++; + ckh_insert(tsd, &log_bt_node_set, (void *)new_node, NULL); + return new_node->index; + } else { + return node->index; + } +} +static size_t +prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) { + assert(prof_logging_state == prof_logging_state_started); + malloc_mutex_assert_owner(tsd_tsdn(tsd), &log_mtx); + + prof_thr_node_t dummy_node; + dummy_node.thr_uid = thr_uid; + prof_thr_node_t *node; + + /* See if this thread is already cached in the table. */ + if (ckh_search(&log_thr_node_set, (void *)(&dummy_node), + (void **)(&node), NULL)) { + size_t sz = offsetof(prof_thr_node_t, name) + strlen(name) + 1; + prof_thr_node_t *new_node = (prof_thr_node_t *) + iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, + true, arena_get(TSDN_NULL, 0, true), true); + if (log_thr_first == NULL) { + log_thr_first = new_node; + log_thr_last = new_node; + } else { + log_thr_last->next = new_node; + log_thr_last = new_node; + } + + new_node->next = NULL; + new_node->index = log_thr_index; + new_node->thr_uid = thr_uid; + strcpy(new_node->name, name); + + log_thr_index++; + ckh_insert(tsd, &log_thr_node_set, (void *)new_node, NULL); + return new_node->index; + } else { + return node->index; + } +} + +static void +prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) { + malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock); + + prof_tdata_t *cons_tdata = prof_tdata_get(tsd, false); + if (cons_tdata == NULL) { + /* + * We decide not to log these allocations. cons_tdata will be + * NULL only when the current thread is in a weird state (e.g. + * it's being destroyed). + */ + return; + } + + malloc_mutex_lock(tsd_tsdn(tsd), &log_mtx); + + if (prof_logging_state != prof_logging_state_started) { + goto label_done; + } + + if (!log_tables_initialized) { + bool err1 = ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS, + prof_bt_node_hash, prof_bt_node_keycomp); + bool err2 = ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS, + prof_thr_node_hash, prof_thr_node_keycomp); + if (err1 || err2) { + goto label_done; + } + log_tables_initialized = true; + } + + nstime_t alloc_time = prof_alloc_time_get(tsd_tsdn(tsd), ptr, + (alloc_ctx_t *)NULL); + nstime_t free_time = NSTIME_ZERO_INITIALIZER; + nstime_update(&free_time); + + size_t sz = sizeof(prof_alloc_node_t); + prof_alloc_node_t *new_node = (prof_alloc_node_t *) + iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, true, + arena_get(TSDN_NULL, 0, true), true); + + const char *prod_thr_name = (tctx->tdata->thread_name == NULL)? + "" : tctx->tdata->thread_name; + const char *cons_thr_name = prof_thread_name_get(tsd); + + prof_bt_t bt; + /* Initialize the backtrace, using the buffer in tdata to store it. */ + bt_init(&bt, cons_tdata->vec); + prof_backtrace(&bt); + prof_bt_t *cons_bt = &bt; + + /* We haven't destroyed tctx yet, so gctx should be good to read. */ + prof_bt_t *prod_bt = &tctx->gctx->bt; + + new_node->next = NULL; + new_node->alloc_thr_ind = prof_log_thr_index(tsd, tctx->tdata->thr_uid, + prod_thr_name); + new_node->free_thr_ind = prof_log_thr_index(tsd, cons_tdata->thr_uid, + cons_thr_name); + new_node->alloc_bt_ind = prof_log_bt_index(tsd, prod_bt); + new_node->free_bt_ind = prof_log_bt_index(tsd, cons_bt); + new_node->alloc_time_ns = nstime_ns(&alloc_time); + new_node->free_time_ns = nstime_ns(&free_time); + new_node->usize = usize; + + if (log_alloc_first == NULL) { + log_alloc_first = new_node; + log_alloc_last = new_node; + } else { + log_alloc_last->next = new_node; + log_alloc_last = new_node; + } + +label_done: + malloc_mutex_unlock(tsd_tsdn(tsd), &log_mtx); +} + void -prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx) { +prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize, + prof_tctx_t *tctx) { malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock); + assert(tctx->cnts.curobjs > 0); assert(tctx->cnts.curbytes >= usize); tctx->cnts.curobjs--; tctx->cnts.curbytes -= usize; + prof_try_log(tsd, ptr, usize, tctx); + if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) { prof_tctx_destroy(tsd, tctx); } else { @@ -871,15 +1139,12 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) { void prof_sample_threshold_update(prof_tdata_t *tdata) { #ifdef JEMALLOC_PROF - uint64_t r; - double u; - if (!config_prof) { return; } if (lg_prof_sample == 0) { - tdata->bytes_until_sample = 0; + tsd_bytes_until_sample_set(tsd_fetch(), 0); return; } @@ -901,11 +1166,16 @@ prof_sample_threshold_update(prof_tdata_t *tdata) { * pp 500 * (http://luc.devroye.org/rnbookindex.html) */ - r = prng_lg_range_u64(&tdata->prng_state, 53); - u = (double)r * (1.0/9007199254740992.0L); - tdata->bytes_until_sample = (uint64_t)(log(u) / + uint64_t r = prng_lg_range_u64(&tdata->prng_state, 53); + double u = (double)r * (1.0/9007199254740992.0L); + uint64_t bytes_until_sample = (uint64_t)(log(u) / log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample)))) + (uint64_t)1U; + if (bytes_until_sample > SSIZE_MAX) { + bytes_until_sample = SSIZE_MAX; + } + tsd_bytes_until_sample_set(tsd_fetch(), bytes_until_sample); + #endif } @@ -978,7 +1248,7 @@ prof_dump_flush(bool propagate_err) { cassert(config_prof); - err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end); + err = malloc_write_fd(prof_dump_fd, prof_dump_buf, prof_dump_buf_end); if (err == -1) { if (!propagate_err) { malloc_write("<jemalloc>: write() failed during heap " @@ -1022,7 +1292,7 @@ prof_dump_write(bool propagate_err, const char *s) { } } - if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) { + if (prof_dump_buf_end + slen - i <= PROF_DUMP_BUFSIZE) { /* Finish writing. */ n = slen - i; } else { @@ -1033,6 +1303,7 @@ prof_dump_write(bool propagate_err, const char *s) { prof_dump_buf_end += n; i += n; } + assert(i == slen); return false; } @@ -1409,7 +1680,15 @@ prof_open_maps(const char *format, ...) { va_start(ap, format); malloc_vsnprintf(filename, sizeof(filename), format, ap); va_end(ap); + +#if defined(O_CLOEXEC) mfd = open(filename, O_RDONLY | O_CLOEXEC); +#else + mfd = open(filename, O_RDONLY); + if (mfd != -1) { + fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC); + } +#endif return mfd; } @@ -1463,8 +1742,9 @@ prof_dump_maps(bool propagate_err) { goto label_return; } } - nread = read(mfd, &prof_dump_buf[prof_dump_buf_end], - PROF_DUMP_BUFSIZE - prof_dump_buf_end); + nread = malloc_read_fd(mfd, + &prof_dump_buf[prof_dump_buf_end], PROF_DUMP_BUFSIZE + - prof_dump_buf_end); } while (nread > 0); } else { ret = true; @@ -1772,7 +2052,7 @@ prof_idump(tsdn_t *tsdn) { cassert(config_prof); - if (!prof_booted || tsdn_null(tsdn)) { + if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) { return; } tsd = tsdn_tsd(tsdn); @@ -1829,7 +2109,7 @@ prof_gdump(tsdn_t *tsdn) { cassert(config_prof); - if (!prof_booted || tsdn_null(tsdn)) { + if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) { return; } tsd = tsdn_tsd(tsdn); @@ -1878,6 +2158,33 @@ prof_bt_keycomp(const void *k1, const void *k2) { return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0); } +static void +prof_bt_node_hash(const void *key, size_t r_hash[2]) { + const prof_bt_node_t *bt_node = (prof_bt_node_t *)key; + prof_bt_hash((void *)(&bt_node->bt), r_hash); +} + +static bool +prof_bt_node_keycomp(const void *k1, const void *k2) { + const prof_bt_node_t *bt_node1 = (prof_bt_node_t *)k1; + const prof_bt_node_t *bt_node2 = (prof_bt_node_t *)k2; + return prof_bt_keycomp((void *)(&bt_node1->bt), + (void *)(&bt_node2->bt)); +} + +static void +prof_thr_node_hash(const void *key, size_t r_hash[2]) { + const prof_thr_node_t *thr_node = (prof_thr_node_t *)key; + hash(&thr_node->thr_uid, sizeof(uint64_t), 0x94122f35U, r_hash); +} + +static bool +prof_thr_node_keycomp(const void *k1, const void *k2) { + const prof_thr_node_t *thr_node1 = (prof_thr_node_t *)k1; + const prof_thr_node_t *thr_node2 = (prof_thr_node_t *)k2; + return thr_node1->thr_uid == thr_node2->thr_uid; +} + static uint64_t prof_thr_uid_alloc(tsdn_t *tsdn) { uint64_t thr_uid; @@ -2110,6 +2417,368 @@ prof_active_set(tsdn_t *tsdn, bool active) { return prof_active_old; } +#ifdef JEMALLOC_JET +size_t +prof_log_bt_count(void) { + size_t cnt = 0; + prof_bt_node_t *node = log_bt_first; + while (node != NULL) { + cnt++; + node = node->next; + } + return cnt; +} + +size_t +prof_log_alloc_count(void) { + size_t cnt = 0; + prof_alloc_node_t *node = log_alloc_first; + while (node != NULL) { + cnt++; + node = node->next; + } + return cnt; +} + +size_t +prof_log_thr_count(void) { + size_t cnt = 0; + prof_thr_node_t *node = log_thr_first; + while (node != NULL) { + cnt++; + node = node->next; + } + return cnt; +} + +bool +prof_log_is_logging(void) { + return prof_logging_state == prof_logging_state_started; +} + +bool +prof_log_rep_check(void) { + if (prof_logging_state == prof_logging_state_stopped + && log_tables_initialized) { + return true; + } + + if (log_bt_last != NULL && log_bt_last->next != NULL) { + return true; + } + if (log_thr_last != NULL && log_thr_last->next != NULL) { + return true; + } + if (log_alloc_last != NULL && log_alloc_last->next != NULL) { + return true; + } + + size_t bt_count = prof_log_bt_count(); + size_t thr_count = prof_log_thr_count(); + size_t alloc_count = prof_log_alloc_count(); + + + if (prof_logging_state == prof_logging_state_stopped) { + if (bt_count != 0 || thr_count != 0 || alloc_count || 0) { + return true; + } + } + + prof_alloc_node_t *node = log_alloc_first; + while (node != NULL) { + if (node->alloc_bt_ind >= bt_count) { + return true; + } + if (node->free_bt_ind >= bt_count) { + return true; + } + if (node->alloc_thr_ind >= thr_count) { + return true; + } + if (node->free_thr_ind >= thr_count) { + return true; + } + if (node->alloc_time_ns > node->free_time_ns) { + return true; + } + node = node->next; + } + + return false; +} + +void +prof_log_dummy_set(bool new_value) { + prof_log_dummy = new_value; +} +#endif + +bool +prof_log_start(tsdn_t *tsdn, const char *filename) { + if (!opt_prof || !prof_booted) { + return true; + } + + bool ret = false; + size_t buf_size = PATH_MAX + 1; + + malloc_mutex_lock(tsdn, &log_mtx); + + if (prof_logging_state != prof_logging_state_stopped) { + ret = true; + } else if (filename == NULL) { + /* Make default name. */ + malloc_snprintf(log_filename, buf_size, "%s.%d.%"FMTu64".json", + opt_prof_prefix, prof_getpid(), log_seq); + log_seq++; + prof_logging_state = prof_logging_state_started; + } else if (strlen(filename) >= buf_size) { + ret = true; + } else { + strcpy(log_filename, filename); + prof_logging_state = prof_logging_state_started; + } + + if (!ret) { + nstime_update(&log_start_timestamp); + } + + malloc_mutex_unlock(tsdn, &log_mtx); + + return ret; +} + +/* Used as an atexit function to stop logging on exit. */ +static void +prof_log_stop_final(void) { + tsd_t *tsd = tsd_fetch(); + prof_log_stop(tsd_tsdn(tsd)); +} + +struct prof_emitter_cb_arg_s { + int fd; + ssize_t ret; +}; + +static void +prof_emitter_write_cb(void *opaque, const char *to_write) { + struct prof_emitter_cb_arg_s *arg = + (struct prof_emitter_cb_arg_s *)opaque; + size_t bytes = strlen(to_write); +#ifdef JEMALLOC_JET + if (prof_log_dummy) { + return; + } +#endif + arg->ret = write(arg->fd, (void *)to_write, bytes); +} + +/* + * prof_log_emit_{...} goes through the appropriate linked list, emitting each + * node to the json and deallocating it. + */ +static void +prof_log_emit_threads(tsd_t *tsd, emitter_t *emitter) { + emitter_json_array_kv_begin(emitter, "threads"); + prof_thr_node_t *thr_node = log_thr_first; + prof_thr_node_t *thr_old_node; + while (thr_node != NULL) { + emitter_json_object_begin(emitter); + + emitter_json_kv(emitter, "thr_uid", emitter_type_uint64, + &thr_node->thr_uid); + + char *thr_name = thr_node->name; + + emitter_json_kv(emitter, "thr_name", emitter_type_string, + &thr_name); + + emitter_json_object_end(emitter); + thr_old_node = thr_node; + thr_node = thr_node->next; + idalloc(tsd, thr_old_node); + } + emitter_json_array_end(emitter); +} + +static void +prof_log_emit_traces(tsd_t *tsd, emitter_t *emitter) { + emitter_json_array_kv_begin(emitter, "stack_traces"); + prof_bt_node_t *bt_node = log_bt_first; + prof_bt_node_t *bt_old_node; + /* + * Calculate how many hex digits we need: twice number of bytes, two for + * "0x", and then one more for terminating '\0'. + */ + char buf[2 * sizeof(intptr_t) + 3]; + size_t buf_sz = sizeof(buf); + while (bt_node != NULL) { + emitter_json_array_begin(emitter); + size_t i; + for (i = 0; i < bt_node->bt.len; i++) { + malloc_snprintf(buf, buf_sz, "%p", bt_node->bt.vec[i]); + char *trace_str = buf; + emitter_json_value(emitter, emitter_type_string, + &trace_str); + } + emitter_json_array_end(emitter); + + bt_old_node = bt_node; + bt_node = bt_node->next; + idalloc(tsd, bt_old_node); + } + emitter_json_array_end(emitter); +} + +static void +prof_log_emit_allocs(tsd_t *tsd, emitter_t *emitter) { + emitter_json_array_kv_begin(emitter, "allocations"); + prof_alloc_node_t *alloc_node = log_alloc_first; + prof_alloc_node_t *alloc_old_node; + while (alloc_node != NULL) { + emitter_json_object_begin(emitter); + + emitter_json_kv(emitter, "alloc_thread", emitter_type_size, + &alloc_node->alloc_thr_ind); + + emitter_json_kv(emitter, "free_thread", emitter_type_size, + &alloc_node->free_thr_ind); + + emitter_json_kv(emitter, "alloc_trace", emitter_type_size, + &alloc_node->alloc_bt_ind); + + emitter_json_kv(emitter, "free_trace", emitter_type_size, + &alloc_node->free_bt_ind); + + emitter_json_kv(emitter, "alloc_timestamp", + emitter_type_uint64, &alloc_node->alloc_time_ns); + + emitter_json_kv(emitter, "free_timestamp", emitter_type_uint64, + &alloc_node->free_time_ns); + + emitter_json_kv(emitter, "usize", emitter_type_uint64, + &alloc_node->usize); + + emitter_json_object_end(emitter); + + alloc_old_node = alloc_node; + alloc_node = alloc_node->next; + idalloc(tsd, alloc_old_node); + } + emitter_json_array_end(emitter); +} + +static void +prof_log_emit_metadata(emitter_t *emitter) { + emitter_json_object_kv_begin(emitter, "info"); + + nstime_t now = NSTIME_ZERO_INITIALIZER; + + nstime_update(&now); + uint64_t ns = nstime_ns(&now) - nstime_ns(&log_start_timestamp); + emitter_json_kv(emitter, "duration", emitter_type_uint64, &ns); + + char *vers = JEMALLOC_VERSION; + emitter_json_kv(emitter, "version", + emitter_type_string, &vers); + + emitter_json_kv(emitter, "lg_sample_rate", + emitter_type_int, &lg_prof_sample); + + int pid = prof_getpid(); + emitter_json_kv(emitter, "pid", emitter_type_int, &pid); + + emitter_json_object_end(emitter); +} + + +bool +prof_log_stop(tsdn_t *tsdn) { + if (!opt_prof || !prof_booted) { + return true; + } + + tsd_t *tsd = tsdn_tsd(tsdn); + malloc_mutex_lock(tsdn, &log_mtx); + + if (prof_logging_state != prof_logging_state_started) { + malloc_mutex_unlock(tsdn, &log_mtx); + return true; + } + + /* + * Set the state to dumping. We'll set it to stopped when we're done. + * Since other threads won't be able to start/stop/log when the state is + * dumping, we don't have to hold the lock during the whole method. + */ + prof_logging_state = prof_logging_state_dumping; + malloc_mutex_unlock(tsdn, &log_mtx); + + + emitter_t emitter; + + /* Create a file. */ + + int fd; +#ifdef JEMALLOC_JET + if (prof_log_dummy) { + fd = 0; + } else { + fd = creat(log_filename, 0644); + } +#else + fd = creat(log_filename, 0644); +#endif + + if (fd == -1) { + malloc_printf("<jemalloc>: creat() for log file \"%s\" " + " failed with %d\n", log_filename, errno); + if (opt_abort) { + abort(); + } + return true; + } + + /* Emit to json. */ + struct prof_emitter_cb_arg_s arg; + arg.fd = fd; + emitter_init(&emitter, emitter_output_json, &prof_emitter_write_cb, + (void *)(&arg)); + + emitter_begin(&emitter); + prof_log_emit_metadata(&emitter); + prof_log_emit_threads(tsd, &emitter); + prof_log_emit_traces(tsd, &emitter); + prof_log_emit_allocs(tsd, &emitter); + emitter_end(&emitter); + + /* Reset global state. */ + if (log_tables_initialized) { + ckh_delete(tsd, &log_bt_node_set); + ckh_delete(tsd, &log_thr_node_set); + } + log_tables_initialized = false; + log_bt_index = 0; + log_thr_index = 0; + log_bt_first = NULL; + log_bt_last = NULL; + log_thr_first = NULL; + log_thr_last = NULL; + log_alloc_first = NULL; + log_alloc_last = NULL; + + malloc_mutex_lock(tsdn, &log_mtx); + prof_logging_state = prof_logging_state_stopped; + malloc_mutex_unlock(tsdn, &log_mtx); + +#ifdef JEMALLOC_JET + if (prof_log_dummy) { + return false; + } +#endif + return close(fd); +} + const char * prof_thread_name_get(tsd_t *tsd) { prof_tdata_t *tdata; @@ -2346,6 +3015,35 @@ prof_boot2(tsd_t *tsd) { } } + if (opt_prof_log) { + prof_log_start(tsd_tsdn(tsd), NULL); + } + + if (atexit(prof_log_stop_final) != 0) { + malloc_write("<jemalloc>: Error in atexit() " + "for logging\n"); + if (opt_abort) { + abort(); + } + } + + if (malloc_mutex_init(&log_mtx, "prof_log", + WITNESS_RANK_PROF_LOG, malloc_mutex_rank_exclusive)) { + return true; + } + + if (ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS, + prof_bt_node_hash, prof_bt_node_keycomp)) { + return true; + } + + if (ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS, + prof_thr_node_hash, prof_thr_node_keycomp)) { + return true; + } + + log_tables_initialized = true; + gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd), b0get(), PROF_NCTX_LOCKS * sizeof(malloc_mutex_t), CACHELINE); @@ -2373,16 +3071,14 @@ prof_boot2(tsd_t *tsd) { return true; } } - } - #ifdef JEMALLOC_PROF_LIBGCC - /* - * Cause the backtracing machinery to allocate its internal state - * before enabling profiling. - */ - _Unwind_Backtrace(prof_unwind_init_callback, NULL); + /* + * Cause the backtracing machinery to allocate its internal + * state before enabling profiling. + */ + _Unwind_Backtrace(prof_unwind_init_callback, NULL); #endif - + } prof_booted = true; return false; diff --git a/deps/jemalloc/src/rtree.c b/deps/jemalloc/src/rtree.c index 53702cf723..4ae41fe2fe 100644 --- a/deps/jemalloc/src/rtree.c +++ b/deps/jemalloc/src/rtree.c @@ -39,7 +39,7 @@ rtree_node_dalloc_impl(tsdn_t *tsdn, rtree_t *rtree, rtree_node_elm_t *node) { /* Nodes are never deleted during normal operation. */ not_reached(); } -UNUSED rtree_node_dalloc_t *JET_MUTABLE rtree_node_dalloc = +rtree_node_dalloc_t *JET_MUTABLE rtree_node_dalloc = rtree_node_dalloc_impl; static rtree_leaf_elm_t * @@ -54,7 +54,7 @@ rtree_leaf_dalloc_impl(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *leaf) { /* Leaves are never deleted during normal operation. */ not_reached(); } -UNUSED rtree_leaf_dalloc_t *JET_MUTABLE rtree_leaf_dalloc = +rtree_leaf_dalloc_t *JET_MUTABLE rtree_leaf_dalloc = rtree_leaf_dalloc_impl; #ifdef JEMALLOC_JET diff --git a/deps/jemalloc/src/safety_check.c b/deps/jemalloc/src/safety_check.c new file mode 100644 index 0000000000..804155dcfc --- /dev/null +++ b/deps/jemalloc/src/safety_check.c @@ -0,0 +1,24 @@ +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/jemalloc_internal_includes.h" + +static void (*safety_check_abort)(const char *message); + +void safety_check_set_abort(void (*abort_fn)(const char *)) { + safety_check_abort = abort_fn; +} + +void safety_check_fail(const char *format, ...) { + char buf[MALLOC_PRINTF_BUFSIZE]; + + va_list ap; + va_start(ap, format); + malloc_vsnprintf(buf, MALLOC_PRINTF_BUFSIZE, format, ap); + va_end(ap); + + if (safety_check_abort == NULL) { + malloc_write(buf); + abort(); + } else { + safety_check_abort(buf); + } +} diff --git a/deps/jemalloc/src/sc.c b/deps/jemalloc/src/sc.c new file mode 100644 index 0000000000..89ddb6ba6a --- /dev/null +++ b/deps/jemalloc/src/sc.c @@ -0,0 +1,313 @@ +#include "jemalloc/internal/jemalloc_preamble.h" + +#include "jemalloc/internal/assert.h" +#include "jemalloc/internal/bit_util.h" +#include "jemalloc/internal/bitmap.h" +#include "jemalloc/internal/pages.h" +#include "jemalloc/internal/sc.h" + +/* + * This module computes the size classes used to satisfy allocations. The logic + * here was ported more or less line-by-line from a shell script, and because of + * that is not the most idiomatic C. Eventually we should fix this, but for now + * at least the damage is compartmentalized to this file. + */ + +sc_data_t sc_data_global; + +static size_t +reg_size_compute(int lg_base, int lg_delta, int ndelta) { + return (ZU(1) << lg_base) + (ZU(ndelta) << lg_delta); +} + +/* Returns the number of pages in the slab. */ +static int +slab_size(int lg_page, int lg_base, int lg_delta, int ndelta) { + size_t page = (ZU(1) << lg_page); + size_t reg_size = reg_size_compute(lg_base, lg_delta, ndelta); + + size_t try_slab_size = page; + size_t try_nregs = try_slab_size / reg_size; + size_t perfect_slab_size = 0; + bool perfect = false; + /* + * This loop continues until we find the least common multiple of the + * page size and size class size. Size classes are all of the form + * base + ndelta * delta == (ndelta + base/ndelta) * delta, which is + * (ndelta + ngroup) * delta. The way we choose slabbing strategies + * means that delta is at most the page size and ndelta < ngroup. So + * the loop executes for at most 2 * ngroup - 1 iterations, which is + * also the bound on the number of pages in a slab chosen by default. + * With the current default settings, this is at most 7. + */ + while (!perfect) { + perfect_slab_size = try_slab_size; + size_t perfect_nregs = try_nregs; + try_slab_size += page; + try_nregs = try_slab_size / reg_size; + if (perfect_slab_size == perfect_nregs * reg_size) { + perfect = true; + } + } + return (int)(perfect_slab_size / page); +} + +static void +size_class( + /* Output. */ + sc_t *sc, + /* Configuration decisions. */ + int lg_max_lookup, int lg_page, int lg_ngroup, + /* Inputs specific to the size class. */ + int index, int lg_base, int lg_delta, int ndelta) { + sc->index = index; + sc->lg_base = lg_base; + sc->lg_delta = lg_delta; + sc->ndelta = ndelta; + sc->psz = (reg_size_compute(lg_base, lg_delta, ndelta) + % (ZU(1) << lg_page) == 0); + size_t size = (ZU(1) << lg_base) + (ZU(ndelta) << lg_delta); + if (index == 0) { + assert(!sc->psz); + } + if (size < (ZU(1) << (lg_page + lg_ngroup))) { + sc->bin = true; + sc->pgs = slab_size(lg_page, lg_base, lg_delta, ndelta); + } else { + sc->bin = false; + sc->pgs = 0; + } + if (size <= (ZU(1) << lg_max_lookup)) { + sc->lg_delta_lookup = lg_delta; + } else { + sc->lg_delta_lookup = 0; + } +} + +static void +size_classes( + /* Output. */ + sc_data_t *sc_data, + /* Determined by the system. */ + size_t lg_ptr_size, int lg_quantum, + /* Configuration decisions. */ + int lg_tiny_min, int lg_max_lookup, int lg_page, int lg_ngroup) { + int ptr_bits = (1 << lg_ptr_size) * 8; + int ngroup = (1 << lg_ngroup); + int ntiny = 0; + int nlbins = 0; + int lg_tiny_maxclass = (unsigned)-1; + int nbins = 0; + int npsizes = 0; + + int index = 0; + + int ndelta = 0; + int lg_base = lg_tiny_min; + int lg_delta = lg_base; + + /* Outputs that we update as we go. */ + size_t lookup_maxclass = 0; + size_t small_maxclass = 0; + int lg_large_minclass = 0; + size_t large_maxclass = 0; + + /* Tiny size classes. */ + while (lg_base < lg_quantum) { + sc_t *sc = &sc_data->sc[index]; + size_class(sc, lg_max_lookup, lg_page, lg_ngroup, index, + lg_base, lg_delta, ndelta); + if (sc->lg_delta_lookup != 0) { + nlbins = index + 1; + } + if (sc->psz) { + npsizes++; + } + if (sc->bin) { + nbins++; + } + ntiny++; + /* Final written value is correct. */ + lg_tiny_maxclass = lg_base; + index++; + lg_delta = lg_base; + lg_base++; + } + + /* First non-tiny (pseudo) group. */ + if (ntiny != 0) { + sc_t *sc = &sc_data->sc[index]; + /* + * See the note in sc.h; the first non-tiny size class has an + * unusual encoding. + */ + lg_base--; + ndelta = 1; + size_class(sc, lg_max_lookup, lg_page, lg_ngroup, index, + lg_base, lg_delta, ndelta); + index++; + lg_base++; + lg_delta++; + if (sc->psz) { + npsizes++; + } + if (sc->bin) { + nbins++; + } + } + while (ndelta < ngroup) { + sc_t *sc = &sc_data->sc[index]; + size_class(sc, lg_max_lookup, lg_page, lg_ngroup, index, + lg_base, lg_delta, ndelta); + index++; + ndelta++; + if (sc->psz) { + npsizes++; + } + if (sc->bin) { + nbins++; + } + } + + /* All remaining groups. */ + lg_base = lg_base + lg_ngroup; + while (lg_base < ptr_bits - 1) { + ndelta = 1; + int ndelta_limit; + if (lg_base == ptr_bits - 2) { + ndelta_limit = ngroup - 1; + } else { + ndelta_limit = ngroup; + } + while (ndelta <= ndelta_limit) { + sc_t *sc = &sc_data->sc[index]; + size_class(sc, lg_max_lookup, lg_page, lg_ngroup, index, + lg_base, lg_delta, ndelta); + if (sc->lg_delta_lookup != 0) { + nlbins = index + 1; + /* Final written value is correct. */ + lookup_maxclass = (ZU(1) << lg_base) + + (ZU(ndelta) << lg_delta); + } + if (sc->psz) { + npsizes++; + } + if (sc->bin) { + nbins++; + /* Final written value is correct. */ + small_maxclass = (ZU(1) << lg_base) + + (ZU(ndelta) << lg_delta); + if (lg_ngroup > 0) { + lg_large_minclass = lg_base + 1; + } else { + lg_large_minclass = lg_base + 2; + } + } + large_maxclass = (ZU(1) << lg_base) + + (ZU(ndelta) << lg_delta); + index++; + ndelta++; + } + lg_base++; + lg_delta++; + } + /* Additional outputs. */ + int nsizes = index; + unsigned lg_ceil_nsizes = lg_ceil(nsizes); + + /* Fill in the output data. */ + sc_data->ntiny = ntiny; + sc_data->nlbins = nlbins; + sc_data->nbins = nbins; + sc_data->nsizes = nsizes; + sc_data->lg_ceil_nsizes = lg_ceil_nsizes; + sc_data->npsizes = npsizes; + sc_data->lg_tiny_maxclass = lg_tiny_maxclass; + sc_data->lookup_maxclass = lookup_maxclass; + sc_data->small_maxclass = small_maxclass; + sc_data->lg_large_minclass = lg_large_minclass; + sc_data->large_minclass = (ZU(1) << lg_large_minclass); + sc_data->large_maxclass = large_maxclass; + + /* + * We compute these values in two ways: + * - Incrementally, as above. + * - In macros, in sc.h. + * The computation is easier when done incrementally, but putting it in + * a constant makes it available to the fast paths without having to + * touch the extra global cacheline. We assert, however, that the two + * computations are equivalent. + */ + assert(sc_data->npsizes == SC_NPSIZES); + assert(sc_data->lg_tiny_maxclass == SC_LG_TINY_MAXCLASS); + assert(sc_data->small_maxclass == SC_SMALL_MAXCLASS); + assert(sc_data->large_minclass == SC_LARGE_MINCLASS); + assert(sc_data->lg_large_minclass == SC_LG_LARGE_MINCLASS); + assert(sc_data->large_maxclass == SC_LARGE_MAXCLASS); + + /* + * In the allocation fastpath, we want to assume that we can + * unconditionally subtract the requested allocation size from + * a ssize_t, and detect passing through 0 correctly. This + * results in optimal generated code. For this to work, the + * maximum allocation size must be less than SSIZE_MAX. + */ + assert(SC_LARGE_MAXCLASS < SSIZE_MAX); +} + +void +sc_data_init(sc_data_t *sc_data) { + assert(!sc_data->initialized); + + int lg_max_lookup = 12; + + size_classes(sc_data, LG_SIZEOF_PTR, LG_QUANTUM, SC_LG_TINY_MIN, + lg_max_lookup, LG_PAGE, 2); + + sc_data->initialized = true; +} + +static void +sc_data_update_sc_slab_size(sc_t *sc, size_t reg_size, size_t pgs_guess) { + size_t min_pgs = reg_size / PAGE; + if (reg_size % PAGE != 0) { + min_pgs++; + } + /* + * BITMAP_MAXBITS is actually determined by putting the smallest + * possible size-class on one page, so this can never be 0. + */ + size_t max_pgs = BITMAP_MAXBITS * reg_size / PAGE; + + assert(min_pgs <= max_pgs); + assert(min_pgs > 0); + assert(max_pgs >= 1); + if (pgs_guess < min_pgs) { + sc->pgs = (int)min_pgs; + } else if (pgs_guess > max_pgs) { + sc->pgs = (int)max_pgs; + } else { + sc->pgs = (int)pgs_guess; + } +} + +void +sc_data_update_slab_size(sc_data_t *data, size_t begin, size_t end, int pgs) { + assert(data->initialized); + for (int i = 0; i < data->nsizes; i++) { + sc_t *sc = &data->sc[i]; + if (!sc->bin) { + break; + } + size_t reg_size = reg_size_compute(sc->lg_base, sc->lg_delta, + sc->ndelta); + if (begin <= reg_size && reg_size <= end) { + sc_data_update_sc_slab_size(sc, reg_size, pgs); + } + } +} + +void +sc_boot(sc_data_t *data) { + sc_data_init(data); +} diff --git a/deps/jemalloc/src/stats.c b/deps/jemalloc/src/stats.c index 087df7676e..118e05d291 100644 --- a/deps/jemalloc/src/stats.c +++ b/deps/jemalloc/src/stats.c @@ -4,6 +4,7 @@ #include "jemalloc/internal/assert.h" #include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/emitter.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/mutex_prof.h" @@ -51,6 +52,20 @@ char opt_stats_print_opts[stats_print_tot_num_options+1] = ""; /******************************************************************************/ +static uint64_t +rate_per_second(uint64_t value, uint64_t uptime_ns) { + uint64_t billion = 1000000000; + if (uptime_ns == 0 || value == 0) { + return 0; + } + if (uptime_ns < billion) { + return value; + } else { + uint64_t uptime_s = uptime_ns / billion; + return value / uptime_s; + } +} + /* Calculate x.yyy and output a string (takes a fixed sized char array). */ static bool get_rate_str(uint64_t dividend, uint64_t divisor, char str[6]) { @@ -84,41 +99,175 @@ gen_mutex_ctl_str(char *str, size_t buf_len, const char *prefix, } static void -read_arena_bin_mutex_stats(unsigned arena_ind, unsigned bin_ind, - uint64_t results[mutex_prof_num_counters]) { +mutex_stats_init_cols(emitter_row_t *row, const char *table_name, + emitter_col_t *name, + emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters], + emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) { + mutex_prof_uint64_t_counter_ind_t k_uint64_t = 0; + mutex_prof_uint32_t_counter_ind_t k_uint32_t = 0; + + emitter_col_t *col; + + if (name != NULL) { + emitter_col_init(name, row); + name->justify = emitter_justify_left; + name->width = 21; + name->type = emitter_type_title; + name->str_val = table_name; + } + +#define WIDTH_uint32_t 12 +#define WIDTH_uint64_t 16 +#define OP(counter, counter_type, human, derived, base_counter) \ + col = &col_##counter_type[k_##counter_type]; \ + ++k_##counter_type; \ + emitter_col_init(col, row); \ + col->justify = emitter_justify_right; \ + col->width = derived ? 8 : WIDTH_##counter_type; \ + col->type = emitter_type_title; \ + col->str_val = human; + MUTEX_PROF_COUNTERS +#undef OP +#undef WIDTH_uint32_t +#undef WIDTH_uint64_t + col_uint64_t[mutex_counter_total_wait_time_ps].width = 10; +} + +static void +mutex_stats_read_global(const char *name, emitter_col_t *col_name, + emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters], + emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters], + uint64_t uptime) { char cmd[MUTEX_CTL_STR_MAX_LENGTH]; -#define OP(c, t) \ - gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH, \ - "arenas.0.bins.0","mutex", #c); \ - CTL_M2_M4_GET(cmd, arena_ind, bin_ind, \ - (t *)&results[mutex_counter_##c], t); -MUTEX_PROF_COUNTERS + + col_name->str_val = name; + + emitter_col_t *dst; +#define EMITTER_TYPE_uint32_t emitter_type_uint32 +#define EMITTER_TYPE_uint64_t emitter_type_uint64 +#define OP(counter, counter_type, human, derived, base_counter) \ + dst = &col_##counter_type[mutex_counter_##counter]; \ + dst->type = EMITTER_TYPE_##counter_type; \ + if (!derived) { \ + gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH, \ + "mutexes", name, #counter); \ + CTL_GET(cmd, (counter_type *)&dst->bool_val, counter_type); \ + } else { \ + emitter_col_t *base = &col_##counter_type[mutex_counter_##base_counter]; \ + dst->counter_type##_val = rate_per_second(base->counter_type##_val, uptime); \ + } + MUTEX_PROF_COUNTERS #undef OP +#undef EMITTER_TYPE_uint32_t +#undef EMITTER_TYPE_uint64_t } static void -mutex_stats_output_json(void (*write_cb)(void *, const char *), void *cbopaque, - const char *name, uint64_t stats[mutex_prof_num_counters], - const char *json_indent, bool last) { - malloc_cprintf(write_cb, cbopaque, "%s\"%s\": {\n", json_indent, name); - - mutex_prof_counter_ind_t k = 0; - char *fmt_str[2] = {"%s\t\"%s\": %"FMTu32"%s\n", - "%s\t\"%s\": %"FMTu64"%s\n"}; -#define OP(c, t) \ - malloc_cprintf(write_cb, cbopaque, \ - fmt_str[sizeof(t) / sizeof(uint32_t) - 1], \ - json_indent, #c, (t)stats[mutex_counter_##c], \ - (++k == mutex_prof_num_counters) ? "" : ","); -MUTEX_PROF_COUNTERS +mutex_stats_read_arena(unsigned arena_ind, mutex_prof_arena_ind_t mutex_ind, + const char *name, emitter_col_t *col_name, + emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters], + emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters], + uint64_t uptime) { + char cmd[MUTEX_CTL_STR_MAX_LENGTH]; + + col_name->str_val = name; + + emitter_col_t *dst; +#define EMITTER_TYPE_uint32_t emitter_type_uint32 +#define EMITTER_TYPE_uint64_t emitter_type_uint64 +#define OP(counter, counter_type, human, derived, base_counter) \ + dst = &col_##counter_type[mutex_counter_##counter]; \ + dst->type = EMITTER_TYPE_##counter_type; \ + if (!derived) { \ + gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH, \ + "arenas.0.mutexes", arena_mutex_names[mutex_ind], #counter);\ + CTL_M2_GET(cmd, arena_ind, (counter_type *)&dst->bool_val, counter_type); \ + } else { \ + emitter_col_t *base = &col_##counter_type[mutex_counter_##base_counter]; \ + dst->counter_type##_val = rate_per_second(base->counter_type##_val, uptime); \ + } + MUTEX_PROF_COUNTERS +#undef OP +#undef EMITTER_TYPE_uint32_t +#undef EMITTER_TYPE_uint64_t +} + +static void +mutex_stats_read_arena_bin(unsigned arena_ind, unsigned bin_ind, + emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters], + emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters], + uint64_t uptime) { + char cmd[MUTEX_CTL_STR_MAX_LENGTH]; + emitter_col_t *dst; + +#define EMITTER_TYPE_uint32_t emitter_type_uint32 +#define EMITTER_TYPE_uint64_t emitter_type_uint64 +#define OP(counter, counter_type, human, derived, base_counter) \ + dst = &col_##counter_type[mutex_counter_##counter]; \ + dst->type = EMITTER_TYPE_##counter_type; \ + if (!derived) { \ + gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH, \ + "arenas.0.bins.0","mutex", #counter); \ + CTL_M2_M4_GET(cmd, arena_ind, bin_ind, \ + (counter_type *)&dst->bool_val, counter_type); \ + } else { \ + emitter_col_t *base = &col_##counter_type[mutex_counter_##base_counter]; \ + dst->counter_type##_val = rate_per_second(base->counter_type##_val, uptime); \ + } + MUTEX_PROF_COUNTERS #undef OP - malloc_cprintf(write_cb, cbopaque, "%s}%s\n", json_indent, - last ? "" : ","); +#undef EMITTER_TYPE_uint32_t +#undef EMITTER_TYPE_uint64_t } +/* "row" can be NULL to avoid emitting in table mode. */ static void -stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, - bool json, bool large, bool mutex, unsigned i) { +mutex_stats_emit(emitter_t *emitter, emitter_row_t *row, + emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters], + emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) { + if (row != NULL) { + emitter_table_row(emitter, row); + } + + mutex_prof_uint64_t_counter_ind_t k_uint64_t = 0; + mutex_prof_uint32_t_counter_ind_t k_uint32_t = 0; + + emitter_col_t *col; + +#define EMITTER_TYPE_uint32_t emitter_type_uint32 +#define EMITTER_TYPE_uint64_t emitter_type_uint64 +#define OP(counter, type, human, derived, base_counter) \ + if (!derived) { \ + col = &col_##type[k_##type]; \ + ++k_##type; \ + emitter_json_kv(emitter, #counter, EMITTER_TYPE_##type, \ + (const void *)&col->bool_val); \ + } + MUTEX_PROF_COUNTERS; +#undef OP +#undef EMITTER_TYPE_uint32_t +#undef EMITTER_TYPE_uint64_t +} + +#define COL(row_name, column_name, left_or_right, col_width, etype) \ + emitter_col_t col_##column_name; \ + emitter_col_init(&col_##column_name, &row_name); \ + col_##column_name.justify = emitter_justify_##left_or_right; \ + col_##column_name.width = col_width; \ + col_##column_name.type = emitter_type_##etype; + +#define COL_HDR(row_name, column_name, human, left_or_right, col_width, etype) \ + COL(row_name, column_name, left_or_right, col_width, etype) \ + emitter_col_t header_##column_name; \ + emitter_col_init(&header_##column_name, &header_##row_name); \ + header_##column_name.justify = emitter_justify_##left_or_right; \ + header_##column_name.width = col_width; \ + header_##column_name.type = emitter_type_title; \ + header_##column_name.str_val = human ? human : #column_name; + + +static void +stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t uptime) { size_t page; bool in_gap, in_gap_prev; unsigned nbins, j; @@ -126,23 +275,71 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("arenas.page", &page, size_t); CTL_GET("arenas.nbins", &nbins, unsigned); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"bins\": [\n"); - } else { - char *mutex_counters = " n_lock_ops n_waiting" - " n_spin_acq total_wait_ns max_wait_ns\n"; - malloc_cprintf(write_cb, cbopaque, - "bins: size ind allocated nmalloc" - " ndalloc nrequests curregs curslabs regs" - " pgs util nfills nflushes newslabs" - " reslabs%s", mutex ? mutex_counters : "\n"); + + emitter_row_t header_row; + emitter_row_init(&header_row); + + emitter_row_t row; + emitter_row_init(&row); + + COL_HDR(row, size, NULL, right, 20, size) + COL_HDR(row, ind, NULL, right, 4, unsigned) + COL_HDR(row, allocated, NULL, right, 13, uint64) + COL_HDR(row, nmalloc, NULL, right, 13, uint64) + COL_HDR(row, nmalloc_ps, "(#/sec)", right, 8, uint64) + COL_HDR(row, ndalloc, NULL, right, 13, uint64) + COL_HDR(row, ndalloc_ps, "(#/sec)", right, 8, uint64) + COL_HDR(row, nrequests, NULL, right, 13, uint64) + COL_HDR(row, nrequests_ps, "(#/sec)", right, 10, uint64) + COL_HDR(row, nshards, NULL, right, 9, unsigned) + COL_HDR(row, curregs, NULL, right, 13, size) + COL_HDR(row, curslabs, NULL, right, 13, size) + COL_HDR(row, nonfull_slabs, NULL, right, 15, size) + COL_HDR(row, regs, NULL, right, 5, unsigned) + COL_HDR(row, pgs, NULL, right, 4, size) + /* To buffer a right- and left-justified column. */ + COL_HDR(row, justify_spacer, NULL, right, 1, title) + COL_HDR(row, util, NULL, right, 6, title) + COL_HDR(row, nfills, NULL, right, 13, uint64) + COL_HDR(row, nfills_ps, "(#/sec)", right, 8, uint64) + COL_HDR(row, nflushes, NULL, right, 13, uint64) + COL_HDR(row, nflushes_ps, "(#/sec)", right, 8, uint64) + COL_HDR(row, nslabs, NULL, right, 13, uint64) + COL_HDR(row, nreslabs, NULL, right, 13, uint64) + COL_HDR(row, nreslabs_ps, "(#/sec)", right, 8, uint64) + + /* Don't want to actually print the name. */ + header_justify_spacer.str_val = " "; + col_justify_spacer.str_val = " "; + + emitter_col_t col_mutex64[mutex_prof_num_uint64_t_counters]; + emitter_col_t col_mutex32[mutex_prof_num_uint32_t_counters]; + + emitter_col_t header_mutex64[mutex_prof_num_uint64_t_counters]; + emitter_col_t header_mutex32[mutex_prof_num_uint32_t_counters]; + + if (mutex) { + mutex_stats_init_cols(&row, NULL, NULL, col_mutex64, + col_mutex32); + mutex_stats_init_cols(&header_row, NULL, NULL, header_mutex64, + header_mutex32); } + + /* + * We print a "bins:" header as part of the table row; we need to adjust + * the header size column to compensate. + */ + header_size.width -=5; + emitter_table_printf(emitter, "bins:"); + emitter_table_row(emitter, &header_row); + emitter_json_array_kv_begin(emitter, "bins"); + for (j = 0, in_gap = false; j < nbins; j++) { uint64_t nslabs; size_t reg_size, slab_size, curregs; size_t curslabs; - uint32_t nregs; + size_t nonfull_slabs; + uint32_t nregs, nshards; uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes; uint64_t nreslabs; @@ -151,14 +348,15 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, in_gap_prev = in_gap; in_gap = (nslabs == 0); - if (!json && in_gap_prev && !in_gap) { - malloc_cprintf(write_cb, cbopaque, + if (in_gap_prev && !in_gap) { + emitter_table_printf(emitter, " ---\n"); } CTL_M2_GET("arenas.bin.0.size", j, ®_size, size_t); CTL_M2_GET("arenas.bin.0.nregs", j, &nregs, uint32_t); CTL_M2_GET("arenas.bin.0.slab_size", j, &slab_size, size_t); + CTL_M2_GET("arenas.bin.0.nshards", j, &nshards, uint32_t); CTL_M2_M4_GET("stats.arenas.0.bins.0.nmalloc", i, j, &nmalloc, uint64_t); @@ -176,106 +374,128 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, uint64_t); CTL_M2_M4_GET("stats.arenas.0.bins.0.curslabs", i, j, &curslabs, size_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.nonfull_slabs", i, j, &nonfull_slabs, + size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t{\n" - "\t\t\t\t\t\t\"nmalloc\": %"FMTu64",\n" - "\t\t\t\t\t\t\"ndalloc\": %"FMTu64",\n" - "\t\t\t\t\t\t\"curregs\": %zu,\n" - "\t\t\t\t\t\t\"nrequests\": %"FMTu64",\n" - "\t\t\t\t\t\t\"nfills\": %"FMTu64",\n" - "\t\t\t\t\t\t\"nflushes\": %"FMTu64",\n" - "\t\t\t\t\t\t\"nreslabs\": %"FMTu64",\n" - "\t\t\t\t\t\t\"curslabs\": %zu%s\n", - nmalloc, ndalloc, curregs, nrequests, nfills, - nflushes, nreslabs, curslabs, mutex ? "," : ""); - if (mutex) { - uint64_t mutex_stats[mutex_prof_num_counters]; - read_arena_bin_mutex_stats(i, j, mutex_stats); - mutex_stats_output_json(write_cb, cbopaque, - "mutex", mutex_stats, "\t\t\t\t\t\t", true); - } - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t}%s\n", - (j + 1 < nbins) ? "," : ""); - } else if (!in_gap) { - size_t availregs = nregs * curslabs; - char util[6]; - if (get_rate_str((uint64_t)curregs, (uint64_t)availregs, - util)) { - if (availregs == 0) { - malloc_snprintf(util, sizeof(util), - "1"); - } else if (curregs > availregs) { - /* - * Race detected: the counters were read - * in separate mallctl calls and - * concurrent operations happened in - * between. In this case no meaningful - * utilization can be computed. - */ - malloc_snprintf(util, sizeof(util), - " race"); - } else { - not_reached(); - } - } - uint64_t mutex_stats[mutex_prof_num_counters]; - if (mutex) { - read_arena_bin_mutex_stats(i, j, mutex_stats); - } + if (mutex) { + mutex_stats_read_arena_bin(i, j, col_mutex64, + col_mutex32, uptime); + } - malloc_cprintf(write_cb, cbopaque, "%20zu %3u %12zu %12" - FMTu64" %12"FMTu64" %12"FMTu64" %12zu %12zu %4u" - " %3zu %-5s %12"FMTu64" %12"FMTu64" %12"FMTu64 - " %12"FMTu64, reg_size, j, curregs * reg_size, - nmalloc, ndalloc, nrequests, curregs, curslabs, - nregs, slab_size / page, util, nfills, nflushes, - nslabs, nreslabs); - - /* Output less info for bin mutexes to save space. */ - if (mutex) { - malloc_cprintf(write_cb, cbopaque, - " %12"FMTu64" %12"FMTu64" %12"FMTu64 - " %14"FMTu64" %12"FMTu64"\n", - mutex_stats[mutex_counter_num_ops], - mutex_stats[mutex_counter_num_wait], - mutex_stats[mutex_counter_num_spin_acq], - mutex_stats[mutex_counter_total_wait_time], - mutex_stats[mutex_counter_max_wait_time]); + emitter_json_object_begin(emitter); + emitter_json_kv(emitter, "nmalloc", emitter_type_uint64, + &nmalloc); + emitter_json_kv(emitter, "ndalloc", emitter_type_uint64, + &ndalloc); + emitter_json_kv(emitter, "curregs", emitter_type_size, + &curregs); + emitter_json_kv(emitter, "nrequests", emitter_type_uint64, + &nrequests); + emitter_json_kv(emitter, "nfills", emitter_type_uint64, + &nfills); + emitter_json_kv(emitter, "nflushes", emitter_type_uint64, + &nflushes); + emitter_json_kv(emitter, "nreslabs", emitter_type_uint64, + &nreslabs); + emitter_json_kv(emitter, "curslabs", emitter_type_size, + &curslabs); + emitter_json_kv(emitter, "nonfull_slabs", emitter_type_size, + &nonfull_slabs); + if (mutex) { + emitter_json_object_kv_begin(emitter, "mutex"); + mutex_stats_emit(emitter, NULL, col_mutex64, + col_mutex32); + emitter_json_object_end(emitter); + } + emitter_json_object_end(emitter); + + size_t availregs = nregs * curslabs; + char util[6]; + if (get_rate_str((uint64_t)curregs, (uint64_t)availregs, util)) + { + if (availregs == 0) { + malloc_snprintf(util, sizeof(util), "1"); + } else if (curregs > availregs) { + /* + * Race detected: the counters were read in + * separate mallctl calls and concurrent + * operations happened in between. In this case + * no meaningful utilization can be computed. + */ + malloc_snprintf(util, sizeof(util), " race"); } else { - malloc_cprintf(write_cb, cbopaque, "\n"); + not_reached(); } } + + col_size.size_val = reg_size; + col_ind.unsigned_val = j; + col_allocated.size_val = curregs * reg_size; + col_nmalloc.uint64_val = nmalloc; + col_nmalloc_ps.uint64_val = rate_per_second(nmalloc, uptime); + col_ndalloc.uint64_val = ndalloc; + col_ndalloc_ps.uint64_val = rate_per_second(ndalloc, uptime); + col_nrequests.uint64_val = nrequests; + col_nrequests_ps.uint64_val = rate_per_second(nrequests, uptime); + col_nshards.unsigned_val = nshards; + col_curregs.size_val = curregs; + col_curslabs.size_val = curslabs; + col_nonfull_slabs.size_val = nonfull_slabs; + col_regs.unsigned_val = nregs; + col_pgs.size_val = slab_size / page; + col_util.str_val = util; + col_nfills.uint64_val = nfills; + col_nfills_ps.uint64_val = rate_per_second(nfills, uptime); + col_nflushes.uint64_val = nflushes; + col_nflushes_ps.uint64_val = rate_per_second(nflushes, uptime); + col_nslabs.uint64_val = nslabs; + col_nreslabs.uint64_val = nreslabs; + col_nreslabs_ps.uint64_val = rate_per_second(nreslabs, uptime); + + /* + * Note that mutex columns were initialized above, if mutex == + * true. + */ + + emitter_table_row(emitter, &row); } - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t]%s\n", large ? "," : ""); - } else { - if (in_gap) { - malloc_cprintf(write_cb, cbopaque, - " ---\n"); - } + emitter_json_array_end(emitter); /* Close "bins". */ + + if (in_gap) { + emitter_table_printf(emitter, " ---\n"); } } static void -stats_arena_lextents_print(void (*write_cb)(void *, const char *), - void *cbopaque, bool json, unsigned i) { +stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) { unsigned nbins, nlextents, j; bool in_gap, in_gap_prev; CTL_GET("arenas.nbins", &nbins, unsigned); CTL_GET("arenas.nlextents", &nlextents, unsigned); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"lextents\": [\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "large: size ind allocated nmalloc" - " ndalloc nrequests curlextents\n"); - } + + emitter_row_t header_row; + emitter_row_init(&header_row); + emitter_row_t row; + emitter_row_init(&row); + + COL_HDR(row, size, NULL, right, 20, size) + COL_HDR(row, ind, NULL, right, 4, unsigned) + COL_HDR(row, allocated, NULL, right, 13, size) + COL_HDR(row, nmalloc, NULL, right, 13, uint64) + COL_HDR(row, nmalloc_ps, "(#/sec)", right, 8, uint64) + COL_HDR(row, ndalloc, NULL, right, 13, uint64) + COL_HDR(row, ndalloc_ps, "(#/sec)", right, 8, uint64) + COL_HDR(row, nrequests, NULL, right, 13, uint64) + COL_HDR(row, nrequests_ps, "(#/sec)", right, 8, uint64) + COL_HDR(row, curlextents, NULL, right, 13, size) + + /* As with bins, we label the large extents table. */ + header_size.width -= 6; + emitter_table_printf(emitter, "large:"); + emitter_table_row(emitter, &header_row); + emitter_json_array_kv_begin(emitter, "lextents"); + for (j = 0, in_gap = false; j < nlextents; j++) { uint64_t nmalloc, ndalloc, nrequests; size_t lextent_size, curlextents; @@ -289,156 +509,186 @@ stats_arena_lextents_print(void (*write_cb)(void *, const char *), in_gap_prev = in_gap; in_gap = (nrequests == 0); - if (!json && in_gap_prev && !in_gap) { - malloc_cprintf(write_cb, cbopaque, + if (in_gap_prev && !in_gap) { + emitter_table_printf(emitter, " ---\n"); } CTL_M2_GET("arenas.lextent.0.size", j, &lextent_size, size_t); CTL_M2_M4_GET("stats.arenas.0.lextents.0.curlextents", i, j, &curlextents, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t{\n" - "\t\t\t\t\t\t\"curlextents\": %zu\n" - "\t\t\t\t\t}%s\n", - curlextents, - (j + 1 < nlextents) ? "," : ""); - } else if (!in_gap) { - malloc_cprintf(write_cb, cbopaque, - "%20zu %3u %12zu %12"FMTu64" %12"FMTu64 - " %12"FMTu64" %12zu\n", - lextent_size, nbins + j, - curlextents * lextent_size, nmalloc, ndalloc, - nrequests, curlextents); + + emitter_json_object_begin(emitter); + emitter_json_kv(emitter, "curlextents", emitter_type_size, + &curlextents); + emitter_json_object_end(emitter); + + col_size.size_val = lextent_size; + col_ind.unsigned_val = nbins + j; + col_allocated.size_val = curlextents * lextent_size; + col_nmalloc.uint64_val = nmalloc; + col_nmalloc_ps.uint64_val = rate_per_second(nmalloc, uptime); + col_ndalloc.uint64_val = ndalloc; + col_ndalloc_ps.uint64_val = rate_per_second(ndalloc, uptime); + col_nrequests.uint64_val = nrequests; + col_nrequests_ps.uint64_val = rate_per_second(nrequests, uptime); + col_curlextents.size_val = curlextents; + + if (!in_gap) { + emitter_table_row(emitter, &row); } } - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t]\n"); - } else { - if (in_gap) { - malloc_cprintf(write_cb, cbopaque, - " ---\n"); - } + emitter_json_array_end(emitter); /* Close "lextents". */ + if (in_gap) { + emitter_table_printf(emitter, " ---\n"); } } static void -read_arena_mutex_stats(unsigned arena_ind, - uint64_t results[mutex_prof_num_arena_mutexes][mutex_prof_num_counters]) { - char cmd[MUTEX_CTL_STR_MAX_LENGTH]; - - mutex_prof_arena_ind_t i; - for (i = 0; i < mutex_prof_num_arena_mutexes; i++) { -#define OP(c, t) \ - gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH, \ - "arenas.0.mutexes", arena_mutex_names[i], #c); \ - CTL_M2_GET(cmd, arena_ind, \ - (t *)&results[i][mutex_counter_##c], t); -MUTEX_PROF_COUNTERS -#undef OP - } -} +stats_arena_extents_print(emitter_t *emitter, unsigned i) { + unsigned j; + bool in_gap, in_gap_prev; + emitter_row_t header_row; + emitter_row_init(&header_row); + emitter_row_t row; + emitter_row_init(&row); + + COL_HDR(row, size, NULL, right, 20, size) + COL_HDR(row, ind, NULL, right, 4, unsigned) + COL_HDR(row, ndirty, NULL, right, 13, size) + COL_HDR(row, dirty, NULL, right, 13, size) + COL_HDR(row, nmuzzy, NULL, right, 13, size) + COL_HDR(row, muzzy, NULL, right, 13, size) + COL_HDR(row, nretained, NULL, right, 13, size) + COL_HDR(row, retained, NULL, right, 13, size) + COL_HDR(row, ntotal, NULL, right, 13, size) + COL_HDR(row, total, NULL, right, 13, size) + + /* Label this section. */ + header_size.width -= 8; + emitter_table_printf(emitter, "extents:"); + emitter_table_row(emitter, &header_row); + emitter_json_array_kv_begin(emitter, "extents"); + + in_gap = false; + for (j = 0; j < SC_NPSIZES; j++) { + size_t ndirty, nmuzzy, nretained, total, dirty_bytes, + muzzy_bytes, retained_bytes, total_bytes; + CTL_M2_M4_GET("stats.arenas.0.extents.0.ndirty", i, j, + &ndirty, size_t); + CTL_M2_M4_GET("stats.arenas.0.extents.0.nmuzzy", i, j, + &nmuzzy, size_t); + CTL_M2_M4_GET("stats.arenas.0.extents.0.nretained", i, j, + &nretained, size_t); + CTL_M2_M4_GET("stats.arenas.0.extents.0.dirty_bytes", i, j, + &dirty_bytes, size_t); + CTL_M2_M4_GET("stats.arenas.0.extents.0.muzzy_bytes", i, j, + &muzzy_bytes, size_t); + CTL_M2_M4_GET("stats.arenas.0.extents.0.retained_bytes", i, j, + &retained_bytes, size_t); + total = ndirty + nmuzzy + nretained; + total_bytes = dirty_bytes + muzzy_bytes + retained_bytes; -static void -mutex_stats_output(void (*write_cb)(void *, const char *), void *cbopaque, - const char *name, uint64_t stats[mutex_prof_num_counters], - bool first_mutex) { - if (first_mutex) { - /* Print title. */ - malloc_cprintf(write_cb, cbopaque, - " n_lock_ops n_waiting" - " n_spin_acq n_owner_switch total_wait_ns" - " max_wait_ns max_n_thds\n"); - } + in_gap_prev = in_gap; + in_gap = (total == 0); - malloc_cprintf(write_cb, cbopaque, "%s", name); - malloc_cprintf(write_cb, cbopaque, ":%*c", - (int)(20 - strlen(name)), ' '); + if (in_gap_prev && !in_gap) { + emitter_table_printf(emitter, + " ---\n"); + } - char *fmt_str[2] = {"%12"FMTu32, "%16"FMTu64}; -#define OP(c, t) \ - malloc_cprintf(write_cb, cbopaque, \ - fmt_str[sizeof(t) / sizeof(uint32_t) - 1], \ - (t)stats[mutex_counter_##c]); -MUTEX_PROF_COUNTERS -#undef OP - malloc_cprintf(write_cb, cbopaque, "\n"); + emitter_json_object_begin(emitter); + emitter_json_kv(emitter, "ndirty", emitter_type_size, &ndirty); + emitter_json_kv(emitter, "nmuzzy", emitter_type_size, &nmuzzy); + emitter_json_kv(emitter, "nretained", emitter_type_size, + &nretained); + + emitter_json_kv(emitter, "dirty_bytes", emitter_type_size, + &dirty_bytes); + emitter_json_kv(emitter, "muzzy_bytes", emitter_type_size, + &muzzy_bytes); + emitter_json_kv(emitter, "retained_bytes", emitter_type_size, + &retained_bytes); + emitter_json_object_end(emitter); + + col_size.size_val = sz_pind2sz(j); + col_ind.size_val = j; + col_ndirty.size_val = ndirty; + col_dirty.size_val = dirty_bytes; + col_nmuzzy.size_val = nmuzzy; + col_muzzy.size_val = muzzy_bytes; + col_nretained.size_val = nretained; + col_retained.size_val = retained_bytes; + col_ntotal.size_val = total; + col_total.size_val = total_bytes; + + if (!in_gap) { + emitter_table_row(emitter, &row); + } + } + emitter_json_array_end(emitter); /* Close "extents". */ + if (in_gap) { + emitter_table_printf(emitter, " ---\n"); + } } static void -stats_arena_mutexes_print(void (*write_cb)(void *, const char *), - void *cbopaque, bool json, bool json_end, unsigned arena_ind) { - uint64_t mutex_stats[mutex_prof_num_arena_mutexes][mutex_prof_num_counters]; - read_arena_mutex_stats(arena_ind, mutex_stats); - - /* Output mutex stats. */ - if (json) { - malloc_cprintf(write_cb, cbopaque, "\t\t\t\t\"mutexes\": {\n"); - mutex_prof_arena_ind_t i, last_mutex; - last_mutex = mutex_prof_num_arena_mutexes - 1; - for (i = 0; i < mutex_prof_num_arena_mutexes; i++) { - mutex_stats_output_json(write_cb, cbopaque, - arena_mutex_names[i], mutex_stats[i], - "\t\t\t\t\t", (i == last_mutex)); - } - malloc_cprintf(write_cb, cbopaque, "\t\t\t\t}%s\n", - json_end ? "" : ","); - } else { - mutex_prof_arena_ind_t i; - for (i = 0; i < mutex_prof_num_arena_mutexes; i++) { - mutex_stats_output(write_cb, cbopaque, - arena_mutex_names[i], mutex_stats[i], i == 0); - } +stats_arena_mutexes_print(emitter_t *emitter, unsigned arena_ind, uint64_t uptime) { + emitter_row_t row; + emitter_col_t col_name; + emitter_col_t col64[mutex_prof_num_uint64_t_counters]; + emitter_col_t col32[mutex_prof_num_uint32_t_counters]; + + emitter_row_init(&row); + mutex_stats_init_cols(&row, "", &col_name, col64, col32); + + emitter_json_object_kv_begin(emitter, "mutexes"); + emitter_table_row(emitter, &row); + + for (mutex_prof_arena_ind_t i = 0; i < mutex_prof_num_arena_mutexes; + i++) { + const char *name = arena_mutex_names[i]; + emitter_json_object_kv_begin(emitter, name); + mutex_stats_read_arena(arena_ind, i, name, &col_name, col64, + col32, uptime); + mutex_stats_emit(emitter, &row, col64, col32); + emitter_json_object_end(emitter); /* Close the mutex dict. */ } + emitter_json_object_end(emitter); /* End "mutexes". */ } static void -stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, - bool json, unsigned i, bool bins, bool large, bool mutex) { +stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large, + bool mutex, bool extents) { unsigned nthreads; const char *dss; ssize_t dirty_decay_ms, muzzy_decay_ms; size_t page, pactive, pdirty, pmuzzy, mapped, retained; - size_t base, internal, resident; + size_t base, internal, resident, metadata_thp, extent_avail; uint64_t dirty_npurge, dirty_nmadvise, dirty_purged; uint64_t muzzy_npurge, muzzy_nmadvise, muzzy_purged; size_t small_allocated; - uint64_t small_nmalloc, small_ndalloc, small_nrequests; + uint64_t small_nmalloc, small_ndalloc, small_nrequests, small_nfills, + small_nflushes; size_t large_allocated; - uint64_t large_nmalloc, large_ndalloc, large_nrequests; - size_t tcache_bytes; + uint64_t large_nmalloc, large_ndalloc, large_nrequests, large_nfills, + large_nflushes; + size_t tcache_bytes, abandoned_vm; uint64_t uptime; CTL_GET("arenas.page", &page, size_t); CTL_M2_GET("stats.arenas.0.nthreads", i, &nthreads, unsigned); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"nthreads\": %u,\n", nthreads); - } else { - malloc_cprintf(write_cb, cbopaque, - "assigned threads: %u\n", nthreads); - } + emitter_kv(emitter, "nthreads", "assigned threads", + emitter_type_unsigned, &nthreads); CTL_M2_GET("stats.arenas.0.uptime", i, &uptime, uint64_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"uptime_ns\": %"FMTu64",\n", uptime); - } else { - malloc_cprintf(write_cb, cbopaque, - "uptime: %"FMTu64"\n", uptime); - } + emitter_kv(emitter, "uptime_ns", "uptime", emitter_type_uint64, + &uptime); CTL_M2_GET("stats.arenas.0.dss", i, &dss, const char *); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"dss\": \"%s\",\n", dss); - } else { - malloc_cprintf(write_cb, cbopaque, - "dss allocation precedence: %s\n", dss); - } + emitter_kv(emitter, "dss", "dss allocation precedence", + emitter_type_string, &dss); CTL_M2_GET("stats.arenas.0.dirty_decay_ms", i, &dirty_decay_ms, ssize_t); @@ -455,205 +705,290 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_M2_GET("stats.arenas.0.muzzy_nmadvise", i, &muzzy_nmadvise, uint64_t); CTL_M2_GET("stats.arenas.0.muzzy_purged", i, &muzzy_purged, uint64_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"dirty_decay_ms\": %zd,\n", dirty_decay_ms); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"muzzy_decay_ms\": %zd,\n", muzzy_decay_ms); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"pactive\": %zu,\n", pactive); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"pdirty\": %zu,\n", pdirty); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"pmuzzy\": %zu,\n", pmuzzy); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"dirty_npurge\": %"FMTu64",\n", dirty_npurge); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"dirty_nmadvise\": %"FMTu64",\n", dirty_nmadvise); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"dirty_purged\": %"FMTu64",\n", dirty_purged); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"muzzy_npurge\": %"FMTu64",\n", muzzy_npurge); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"muzzy_nmadvise\": %"FMTu64",\n", muzzy_nmadvise); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"muzzy_purged\": %"FMTu64",\n", muzzy_purged); - } else { - malloc_cprintf(write_cb, cbopaque, - "decaying: time npages sweeps madvises" - " purged\n"); - if (dirty_decay_ms >= 0) { - malloc_cprintf(write_cb, cbopaque, - " dirty: %5zd %12zu %12"FMTu64" %12"FMTu64" %12" - FMTu64"\n", dirty_decay_ms, pdirty, dirty_npurge, - dirty_nmadvise, dirty_purged); - } else { - malloc_cprintf(write_cb, cbopaque, - " dirty: N/A %12zu %12"FMTu64" %12"FMTu64" %12" - FMTu64"\n", pdirty, dirty_npurge, dirty_nmadvise, - dirty_purged); - } - if (muzzy_decay_ms >= 0) { - malloc_cprintf(write_cb, cbopaque, - " muzzy: %5zd %12zu %12"FMTu64" %12"FMTu64" %12" - FMTu64"\n", muzzy_decay_ms, pmuzzy, muzzy_npurge, - muzzy_nmadvise, muzzy_purged); - } else { - malloc_cprintf(write_cb, cbopaque, - " muzzy: N/A %12zu %12"FMTu64" %12"FMTu64" %12" - FMTu64"\n", pmuzzy, muzzy_npurge, muzzy_nmadvise, - muzzy_purged); - } - } - CTL_M2_GET("stats.arenas.0.small.allocated", i, &small_allocated, - size_t); - CTL_M2_GET("stats.arenas.0.small.nmalloc", i, &small_nmalloc, uint64_t); - CTL_M2_GET("stats.arenas.0.small.ndalloc", i, &small_ndalloc, uint64_t); - CTL_M2_GET("stats.arenas.0.small.nrequests", i, &small_nrequests, - uint64_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"small\": {\n"); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"allocated\": %zu,\n", small_allocated); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"nmalloc\": %"FMTu64",\n", small_nmalloc); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"ndalloc\": %"FMTu64",\n", small_ndalloc); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"nrequests\": %"FMTu64"\n", small_nrequests); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t},\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - " allocated nmalloc" - " ndalloc nrequests\n"); - malloc_cprintf(write_cb, cbopaque, - "small: %12zu %12"FMTu64" %12"FMTu64 - " %12"FMTu64"\n", - small_allocated, small_nmalloc, small_ndalloc, - small_nrequests); - } + emitter_row_t decay_row; + emitter_row_init(&decay_row); - CTL_M2_GET("stats.arenas.0.large.allocated", i, &large_allocated, - size_t); - CTL_M2_GET("stats.arenas.0.large.nmalloc", i, &large_nmalloc, uint64_t); - CTL_M2_GET("stats.arenas.0.large.ndalloc", i, &large_ndalloc, uint64_t); - CTL_M2_GET("stats.arenas.0.large.nrequests", i, &large_nrequests, - uint64_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"large\": {\n"); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"allocated\": %zu,\n", large_allocated); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"nmalloc\": %"FMTu64",\n", large_nmalloc); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"ndalloc\": %"FMTu64",\n", large_ndalloc); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"nrequests\": %"FMTu64"\n", large_nrequests); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t},\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "large: %12zu %12"FMTu64" %12"FMTu64 - " %12"FMTu64"\n", - large_allocated, large_nmalloc, large_ndalloc, - large_nrequests); - malloc_cprintf(write_cb, cbopaque, - "total: %12zu %12"FMTu64" %12"FMTu64 - " %12"FMTu64"\n", - small_allocated + large_allocated, small_nmalloc + - large_nmalloc, small_ndalloc + large_ndalloc, - small_nrequests + large_nrequests); - } - if (!json) { - malloc_cprintf(write_cb, cbopaque, - "active: %12zu\n", pactive * page); - } + /* JSON-style emission. */ + emitter_json_kv(emitter, "dirty_decay_ms", emitter_type_ssize, + &dirty_decay_ms); + emitter_json_kv(emitter, "muzzy_decay_ms", emitter_type_ssize, + &muzzy_decay_ms); - CTL_M2_GET("stats.arenas.0.mapped", i, &mapped, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"mapped\": %zu,\n", mapped); - } else { - malloc_cprintf(write_cb, cbopaque, - "mapped: %12zu\n", mapped); - } + emitter_json_kv(emitter, "pactive", emitter_type_size, &pactive); + emitter_json_kv(emitter, "pdirty", emitter_type_size, &pdirty); + emitter_json_kv(emitter, "pmuzzy", emitter_type_size, &pmuzzy); - CTL_M2_GET("stats.arenas.0.retained", i, &retained, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"retained\": %zu,\n", retained); - } else { - malloc_cprintf(write_cb, cbopaque, - "retained: %12zu\n", retained); - } + emitter_json_kv(emitter, "dirty_npurge", emitter_type_uint64, + &dirty_npurge); + emitter_json_kv(emitter, "dirty_nmadvise", emitter_type_uint64, + &dirty_nmadvise); + emitter_json_kv(emitter, "dirty_purged", emitter_type_uint64, + &dirty_purged); - CTL_M2_GET("stats.arenas.0.base", i, &base, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"base\": %zu,\n", base); - } else { - malloc_cprintf(write_cb, cbopaque, - "base: %12zu\n", base); - } + emitter_json_kv(emitter, "muzzy_npurge", emitter_type_uint64, + &muzzy_npurge); + emitter_json_kv(emitter, "muzzy_nmadvise", emitter_type_uint64, + &muzzy_nmadvise); + emitter_json_kv(emitter, "muzzy_purged", emitter_type_uint64, + &muzzy_purged); - CTL_M2_GET("stats.arenas.0.internal", i, &internal, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"internal\": %zu,\n", internal); - } else { - malloc_cprintf(write_cb, cbopaque, - "internal: %12zu\n", internal); - } + /* Table-style emission. */ + COL(decay_row, decay_type, right, 9, title); + col_decay_type.str_val = "decaying:"; + + COL(decay_row, decay_time, right, 6, title); + col_decay_time.str_val = "time"; + + COL(decay_row, decay_npages, right, 13, title); + col_decay_npages.str_val = "npages"; + + COL(decay_row, decay_sweeps, right, 13, title); + col_decay_sweeps.str_val = "sweeps"; + + COL(decay_row, decay_madvises, right, 13, title); + col_decay_madvises.str_val = "madvises"; + + COL(decay_row, decay_purged, right, 13, title); + col_decay_purged.str_val = "purged"; + + /* Title row. */ + emitter_table_row(emitter, &decay_row); - CTL_M2_GET("stats.arenas.0.tcache_bytes", i, &tcache_bytes, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"tcache\": %zu,\n", tcache_bytes); + /* Dirty row. */ + col_decay_type.str_val = "dirty:"; + + if (dirty_decay_ms >= 0) { + col_decay_time.type = emitter_type_ssize; + col_decay_time.ssize_val = dirty_decay_ms; } else { - malloc_cprintf(write_cb, cbopaque, - "tcache: %12zu\n", tcache_bytes); + col_decay_time.type = emitter_type_title; + col_decay_time.str_val = "N/A"; } - CTL_M2_GET("stats.arenas.0.resident", i, &resident, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"resident\": %zu%s\n", resident, - (bins || large || mutex) ? "," : ""); + col_decay_npages.type = emitter_type_size; + col_decay_npages.size_val = pdirty; + + col_decay_sweeps.type = emitter_type_uint64; + col_decay_sweeps.uint64_val = dirty_npurge; + + col_decay_madvises.type = emitter_type_uint64; + col_decay_madvises.uint64_val = dirty_nmadvise; + + col_decay_purged.type = emitter_type_uint64; + col_decay_purged.uint64_val = dirty_purged; + + emitter_table_row(emitter, &decay_row); + + /* Muzzy row. */ + col_decay_type.str_val = "muzzy:"; + + if (muzzy_decay_ms >= 0) { + col_decay_time.type = emitter_type_ssize; + col_decay_time.ssize_val = muzzy_decay_ms; } else { - malloc_cprintf(write_cb, cbopaque, - "resident: %12zu\n", resident); + col_decay_time.type = emitter_type_title; + col_decay_time.str_val = "N/A"; } + col_decay_npages.type = emitter_type_size; + col_decay_npages.size_val = pmuzzy; + + col_decay_sweeps.type = emitter_type_uint64; + col_decay_sweeps.uint64_val = muzzy_npurge; + + col_decay_madvises.type = emitter_type_uint64; + col_decay_madvises.uint64_val = muzzy_nmadvise; + + col_decay_purged.type = emitter_type_uint64; + col_decay_purged.uint64_val = muzzy_purged; + + emitter_table_row(emitter, &decay_row); + + /* Small / large / total allocation counts. */ + emitter_row_t alloc_count_row; + emitter_row_init(&alloc_count_row); + + COL(alloc_count_row, count_title, left, 21, title); + col_count_title.str_val = ""; + + COL(alloc_count_row, count_allocated, right, 16, title); + col_count_allocated.str_val = "allocated"; + + COL(alloc_count_row, count_nmalloc, right, 16, title); + col_count_nmalloc.str_val = "nmalloc"; + COL(alloc_count_row, count_nmalloc_ps, right, 8, title); + col_count_nmalloc_ps.str_val = "(#/sec)"; + + COL(alloc_count_row, count_ndalloc, right, 16, title); + col_count_ndalloc.str_val = "ndalloc"; + COL(alloc_count_row, count_ndalloc_ps, right, 8, title); + col_count_ndalloc_ps.str_val = "(#/sec)"; + + COL(alloc_count_row, count_nrequests, right, 16, title); + col_count_nrequests.str_val = "nrequests"; + COL(alloc_count_row, count_nrequests_ps, right, 10, title); + col_count_nrequests_ps.str_val = "(#/sec)"; + + COL(alloc_count_row, count_nfills, right, 16, title); + col_count_nfills.str_val = "nfill"; + COL(alloc_count_row, count_nfills_ps, right, 10, title); + col_count_nfills_ps.str_val = "(#/sec)"; + + COL(alloc_count_row, count_nflushes, right, 16, title); + col_count_nflushes.str_val = "nflush"; + COL(alloc_count_row, count_nflushes_ps, right, 10, title); + col_count_nflushes_ps.str_val = "(#/sec)"; + + emitter_table_row(emitter, &alloc_count_row); + + col_count_nmalloc_ps.type = emitter_type_uint64; + col_count_ndalloc_ps.type = emitter_type_uint64; + col_count_nrequests_ps.type = emitter_type_uint64; + col_count_nfills_ps.type = emitter_type_uint64; + col_count_nflushes_ps.type = emitter_type_uint64; + +#define GET_AND_EMIT_ALLOC_STAT(small_or_large, name, valtype) \ + CTL_M2_GET("stats.arenas.0." #small_or_large "." #name, i, \ + &small_or_large##_##name, valtype##_t); \ + emitter_json_kv(emitter, #name, emitter_type_##valtype, \ + &small_or_large##_##name); \ + col_count_##name.type = emitter_type_##valtype; \ + col_count_##name.valtype##_val = small_or_large##_##name; + + emitter_json_object_kv_begin(emitter, "small"); + col_count_title.str_val = "small:"; + + GET_AND_EMIT_ALLOC_STAT(small, allocated, size) + GET_AND_EMIT_ALLOC_STAT(small, nmalloc, uint64) + col_count_nmalloc_ps.uint64_val = + rate_per_second(col_count_nmalloc.uint64_val, uptime); + GET_AND_EMIT_ALLOC_STAT(small, ndalloc, uint64) + col_count_ndalloc_ps.uint64_val = + rate_per_second(col_count_ndalloc.uint64_val, uptime); + GET_AND_EMIT_ALLOC_STAT(small, nrequests, uint64) + col_count_nrequests_ps.uint64_val = + rate_per_second(col_count_nrequests.uint64_val, uptime); + GET_AND_EMIT_ALLOC_STAT(small, nfills, uint64) + col_count_nfills_ps.uint64_val = + rate_per_second(col_count_nfills.uint64_val, uptime); + GET_AND_EMIT_ALLOC_STAT(small, nflushes, uint64) + col_count_nflushes_ps.uint64_val = + rate_per_second(col_count_nflushes.uint64_val, uptime); + + emitter_table_row(emitter, &alloc_count_row); + emitter_json_object_end(emitter); /* Close "small". */ + + emitter_json_object_kv_begin(emitter, "large"); + col_count_title.str_val = "large:"; + + GET_AND_EMIT_ALLOC_STAT(large, allocated, size) + GET_AND_EMIT_ALLOC_STAT(large, nmalloc, uint64) + col_count_nmalloc_ps.uint64_val = + rate_per_second(col_count_nmalloc.uint64_val, uptime); + GET_AND_EMIT_ALLOC_STAT(large, ndalloc, uint64) + col_count_ndalloc_ps.uint64_val = + rate_per_second(col_count_ndalloc.uint64_val, uptime); + GET_AND_EMIT_ALLOC_STAT(large, nrequests, uint64) + col_count_nrequests_ps.uint64_val = + rate_per_second(col_count_nrequests.uint64_val, uptime); + GET_AND_EMIT_ALLOC_STAT(large, nfills, uint64) + col_count_nfills_ps.uint64_val = + rate_per_second(col_count_nfills.uint64_val, uptime); + GET_AND_EMIT_ALLOC_STAT(large, nflushes, uint64) + col_count_nflushes_ps.uint64_val = + rate_per_second(col_count_nflushes.uint64_val, uptime); + + emitter_table_row(emitter, &alloc_count_row); + emitter_json_object_end(emitter); /* Close "large". */ + +#undef GET_AND_EMIT_ALLOC_STAT + + /* Aggregated small + large stats are emitter only in table mode. */ + col_count_title.str_val = "total:"; + col_count_allocated.size_val = small_allocated + large_allocated; + col_count_nmalloc.uint64_val = small_nmalloc + large_nmalloc; + col_count_ndalloc.uint64_val = small_ndalloc + large_ndalloc; + col_count_nrequests.uint64_val = small_nrequests + large_nrequests; + col_count_nfills.uint64_val = small_nfills + large_nfills; + col_count_nflushes.uint64_val = small_nflushes + large_nflushes; + col_count_nmalloc_ps.uint64_val = + rate_per_second(col_count_nmalloc.uint64_val, uptime); + col_count_ndalloc_ps.uint64_val = + rate_per_second(col_count_ndalloc.uint64_val, uptime); + col_count_nrequests_ps.uint64_val = + rate_per_second(col_count_nrequests.uint64_val, uptime); + col_count_nfills_ps.uint64_val = + rate_per_second(col_count_nfills.uint64_val, uptime); + col_count_nflushes_ps.uint64_val = + rate_per_second(col_count_nflushes.uint64_val, uptime); + emitter_table_row(emitter, &alloc_count_row); + + emitter_row_t mem_count_row; + emitter_row_init(&mem_count_row); + + emitter_col_t mem_count_title; + emitter_col_init(&mem_count_title, &mem_count_row); + mem_count_title.justify = emitter_justify_left; + mem_count_title.width = 21; + mem_count_title.type = emitter_type_title; + mem_count_title.str_val = ""; + + emitter_col_t mem_count_val; + emitter_col_init(&mem_count_val, &mem_count_row); + mem_count_val.justify = emitter_justify_right; + mem_count_val.width = 16; + mem_count_val.type = emitter_type_title; + mem_count_val.str_val = ""; + + emitter_table_row(emitter, &mem_count_row); + mem_count_val.type = emitter_type_size; + + /* Active count in bytes is emitted only in table mode. */ + mem_count_title.str_val = "active:"; + mem_count_val.size_val = pactive * page; + emitter_table_row(emitter, &mem_count_row); + +#define GET_AND_EMIT_MEM_STAT(stat) \ + CTL_M2_GET("stats.arenas.0."#stat, i, &stat, size_t); \ + emitter_json_kv(emitter, #stat, emitter_type_size, &stat); \ + mem_count_title.str_val = #stat":"; \ + mem_count_val.size_val = stat; \ + emitter_table_row(emitter, &mem_count_row); + + GET_AND_EMIT_MEM_STAT(mapped) + GET_AND_EMIT_MEM_STAT(retained) + GET_AND_EMIT_MEM_STAT(base) + GET_AND_EMIT_MEM_STAT(internal) + GET_AND_EMIT_MEM_STAT(metadata_thp) + GET_AND_EMIT_MEM_STAT(tcache_bytes) + GET_AND_EMIT_MEM_STAT(resident) + GET_AND_EMIT_MEM_STAT(abandoned_vm) + GET_AND_EMIT_MEM_STAT(extent_avail) +#undef GET_AND_EMIT_MEM_STAT + if (mutex) { - stats_arena_mutexes_print(write_cb, cbopaque, json, - !(bins || large), i); + stats_arena_mutexes_print(emitter, i, uptime); } if (bins) { - stats_arena_bins_print(write_cb, cbopaque, json, large, mutex, - i); + stats_arena_bins_print(emitter, mutex, i, uptime); } if (large) { - stats_arena_lextents_print(write_cb, cbopaque, json, i); + stats_arena_lextents_print(emitter, i, uptime); + } + if (extents) { + stats_arena_extents_print(emitter, i); } } static void -stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque, - bool json, bool more) { +stats_general_print(emitter_t *emitter) { const char *cpv; - bool bv; + bool bv, bv2; unsigned uv; uint32_t u32v; uint64_t u64v; - ssize_t ssv; + ssize_t ssv, ssv2; size_t sv, bsz, usz, ssz, sssz, cpsz; bsz = sizeof(bool); @@ -663,365 +998,257 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque, cpsz = sizeof(const char *); CTL_GET("version", &cpv, const char *); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\"version\": \"%s\",\n", cpv); - } else { - malloc_cprintf(write_cb, cbopaque, "Version: %s\n", cpv); - } + emitter_kv(emitter, "version", "Version", emitter_type_string, &cpv); /* config. */ -#define CONFIG_WRITE_BOOL_JSON(n, c) \ - if (json) { \ - CTL_GET("config."#n, &bv, bool); \ - malloc_cprintf(write_cb, cbopaque, \ - "\t\t\t\""#n"\": %s%s\n", bv ? "true" : "false", \ - (c)); \ + emitter_dict_begin(emitter, "config", "Build-time option settings"); +#define CONFIG_WRITE_BOOL(name) \ + do { \ + CTL_GET("config."#name, &bv, bool); \ + emitter_kv(emitter, #name, "config."#name, \ + emitter_type_bool, &bv); \ + } while (0) + + CONFIG_WRITE_BOOL(cache_oblivious); + CONFIG_WRITE_BOOL(debug); + CONFIG_WRITE_BOOL(fill); + CONFIG_WRITE_BOOL(lazy_lock); + emitter_kv(emitter, "malloc_conf", "config.malloc_conf", + emitter_type_string, &config_malloc_conf); + + CONFIG_WRITE_BOOL(opt_safety_checks); + CONFIG_WRITE_BOOL(prof); + CONFIG_WRITE_BOOL(prof_libgcc); + CONFIG_WRITE_BOOL(prof_libunwind); + CONFIG_WRITE_BOOL(stats); + CONFIG_WRITE_BOOL(utrace); + CONFIG_WRITE_BOOL(xmalloc); +#undef CONFIG_WRITE_BOOL + emitter_dict_end(emitter); /* Close "config" dict. */ + + /* opt. */ +#define OPT_WRITE(name, var, size, emitter_type) \ + if (je_mallctl("opt."name, (void *)&var, &size, NULL, 0) == \ + 0) { \ + emitter_kv(emitter, name, "opt."name, emitter_type, \ + &var); \ } - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\"config\": {\n"); +#define OPT_WRITE_MUTABLE(name, var1, var2, size, emitter_type, \ + altname) \ + if (je_mallctl("opt."name, (void *)&var1, &size, NULL, 0) == \ + 0 && je_mallctl(altname, (void *)&var2, &size, NULL, 0) \ + == 0) { \ + emitter_kv_note(emitter, name, "opt."name, \ + emitter_type, &var1, altname, emitter_type, \ + &var2); \ } - CONFIG_WRITE_BOOL_JSON(cache_oblivious, ",") +#define OPT_WRITE_BOOL(name) OPT_WRITE(name, bv, bsz, emitter_type_bool) +#define OPT_WRITE_BOOL_MUTABLE(name, altname) \ + OPT_WRITE_MUTABLE(name, bv, bv2, bsz, emitter_type_bool, altname) + +#define OPT_WRITE_UNSIGNED(name) \ + OPT_WRITE(name, uv, usz, emitter_type_unsigned) + +#define OPT_WRITE_SIZE_T(name) \ + OPT_WRITE(name, sv, ssz, emitter_type_size) +#define OPT_WRITE_SSIZE_T(name) \ + OPT_WRITE(name, ssv, sssz, emitter_type_ssize) +#define OPT_WRITE_SSIZE_T_MUTABLE(name, altname) \ + OPT_WRITE_MUTABLE(name, ssv, ssv2, sssz, emitter_type_ssize, \ + altname) + +#define OPT_WRITE_CHAR_P(name) \ + OPT_WRITE(name, cpv, cpsz, emitter_type_string) + + emitter_dict_begin(emitter, "opt", "Run-time option settings"); + + OPT_WRITE_BOOL("abort") + OPT_WRITE_BOOL("abort_conf") + OPT_WRITE_BOOL("confirm_conf") + OPT_WRITE_BOOL("retain") + OPT_WRITE_CHAR_P("dss") + OPT_WRITE_UNSIGNED("narenas") + OPT_WRITE_CHAR_P("percpu_arena") + OPT_WRITE_SIZE_T("oversize_threshold") + OPT_WRITE_CHAR_P("metadata_thp") + OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread") + OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms") + OPT_WRITE_SSIZE_T_MUTABLE("muzzy_decay_ms", "arenas.muzzy_decay_ms") + OPT_WRITE_SIZE_T("lg_extent_max_active_fit") + OPT_WRITE_CHAR_P("junk") + OPT_WRITE_BOOL("zero") + OPT_WRITE_BOOL("utrace") + OPT_WRITE_BOOL("xmalloc") + OPT_WRITE_BOOL("tcache") + OPT_WRITE_SSIZE_T("lg_tcache_max") + OPT_WRITE_CHAR_P("thp") + OPT_WRITE_BOOL("prof") + OPT_WRITE_CHAR_P("prof_prefix") + OPT_WRITE_BOOL_MUTABLE("prof_active", "prof.active") + OPT_WRITE_BOOL_MUTABLE("prof_thread_active_init", + "prof.thread_active_init") + OPT_WRITE_SSIZE_T_MUTABLE("lg_prof_sample", "prof.lg_sample") + OPT_WRITE_BOOL("prof_accum") + OPT_WRITE_SSIZE_T("lg_prof_interval") + OPT_WRITE_BOOL("prof_gdump") + OPT_WRITE_BOOL("prof_final") + OPT_WRITE_BOOL("prof_leak") + OPT_WRITE_BOOL("stats_print") + OPT_WRITE_CHAR_P("stats_print_opts") + + emitter_dict_end(emitter); + +#undef OPT_WRITE +#undef OPT_WRITE_MUTABLE +#undef OPT_WRITE_BOOL +#undef OPT_WRITE_BOOL_MUTABLE +#undef OPT_WRITE_UNSIGNED +#undef OPT_WRITE_SSIZE_T +#undef OPT_WRITE_SSIZE_T_MUTABLE +#undef OPT_WRITE_CHAR_P - CTL_GET("config.debug", &bv, bool); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"debug\": %s,\n", bv ? "true" : "false"); - } else { - malloc_cprintf(write_cb, cbopaque, "Assertions %s\n", - bv ? "enabled" : "disabled"); - } + /* prof. */ + if (config_prof) { + emitter_dict_begin(emitter, "prof", "Profiling settings"); - CONFIG_WRITE_BOOL_JSON(fill, ",") - CONFIG_WRITE_BOOL_JSON(lazy_lock, ",") + CTL_GET("prof.thread_active_init", &bv, bool); + emitter_kv(emitter, "thread_active_init", + "prof.thread_active_init", emitter_type_bool, &bv); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"malloc_conf\": \"%s\",\n", - config_malloc_conf); - } else { - malloc_cprintf(write_cb, cbopaque, - "config.malloc_conf: \"%s\"\n", config_malloc_conf); - } + CTL_GET("prof.active", &bv, bool); + emitter_kv(emitter, "active", "prof.active", emitter_type_bool, + &bv); - CONFIG_WRITE_BOOL_JSON(prof, ",") - CONFIG_WRITE_BOOL_JSON(prof_libgcc, ",") - CONFIG_WRITE_BOOL_JSON(prof_libunwind, ",") - CONFIG_WRITE_BOOL_JSON(stats, ",") - CONFIG_WRITE_BOOL_JSON(thp, ",") - CONFIG_WRITE_BOOL_JSON(utrace, ",") - CONFIG_WRITE_BOOL_JSON(xmalloc, "") - - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t},\n"); - } -#undef CONFIG_WRITE_BOOL_JSON + CTL_GET("prof.gdump", &bv, bool); + emitter_kv(emitter, "gdump", "prof.gdump", emitter_type_bool, + &bv); - /* opt. */ -#define OPT_WRITE_BOOL(n, c) \ - if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) == 0) { \ - if (json) { \ - malloc_cprintf(write_cb, cbopaque, \ - "\t\t\t\""#n"\": %s%s\n", bv ? "true" : \ - "false", (c)); \ - } else { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %s\n", bv ? "true" : "false"); \ - } \ - } -#define OPT_WRITE_BOOL_MUTABLE(n, m, c) { \ - bool bv2; \ - if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) == 0 && \ - je_mallctl(#m, (void *)&bv2, &bsz, NULL, 0) == 0) { \ - if (json) { \ - malloc_cprintf(write_cb, cbopaque, \ - "\t\t\t\""#n"\": %s%s\n", bv ? "true" : \ - "false", (c)); \ - } else { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %s ("#m": %s)\n", bv ? "true" \ - : "false", bv2 ? "true" : "false"); \ - } \ - } \ -} -#define OPT_WRITE_UNSIGNED(n, c) \ - if (je_mallctl("opt."#n, (void *)&uv, &usz, NULL, 0) == 0) { \ - if (json) { \ - malloc_cprintf(write_cb, cbopaque, \ - "\t\t\t\""#n"\": %u%s\n", uv, (c)); \ - } else { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %u\n", uv); \ - } \ - } -#define OPT_WRITE_SSIZE_T(n, c) \ - if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0) == 0) { \ - if (json) { \ - malloc_cprintf(write_cb, cbopaque, \ - "\t\t\t\""#n"\": %zd%s\n", ssv, (c)); \ - } else { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %zd\n", ssv); \ - } \ - } -#define OPT_WRITE_SSIZE_T_MUTABLE(n, m, c) { \ - ssize_t ssv2; \ - if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0) == 0 && \ - je_mallctl(#m, (void *)&ssv2, &sssz, NULL, 0) == 0) { \ - if (json) { \ - malloc_cprintf(write_cb, cbopaque, \ - "\t\t\t\""#n"\": %zd%s\n", ssv, (c)); \ - } else { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %zd ("#m": %zd)\n", \ - ssv, ssv2); \ - } \ - } \ -} -#define OPT_WRITE_CHAR_P(n, c) \ - if (je_mallctl("opt."#n, (void *)&cpv, &cpsz, NULL, 0) == 0) { \ - if (json) { \ - malloc_cprintf(write_cb, cbopaque, \ - "\t\t\t\""#n"\": \"%s\"%s\n", cpv, (c)); \ - } else { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": \"%s\"\n", cpv); \ - } \ - } + CTL_GET("prof.interval", &u64v, uint64_t); + emitter_kv(emitter, "interval", "prof.interval", + emitter_type_uint64, &u64v); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\"opt\": {\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "Run-time option settings:\n"); - } - OPT_WRITE_BOOL(abort, ",") - OPT_WRITE_BOOL(abort_conf, ",") - OPT_WRITE_BOOL(retain, ",") - OPT_WRITE_CHAR_P(dss, ",") - OPT_WRITE_UNSIGNED(narenas, ",") - OPT_WRITE_CHAR_P(percpu_arena, ",") - OPT_WRITE_BOOL_MUTABLE(background_thread, background_thread, ",") - OPT_WRITE_SSIZE_T_MUTABLE(dirty_decay_ms, arenas.dirty_decay_ms, ",") - OPT_WRITE_SSIZE_T_MUTABLE(muzzy_decay_ms, arenas.muzzy_decay_ms, ",") - OPT_WRITE_CHAR_P(junk, ",") - OPT_WRITE_BOOL(zero, ",") - OPT_WRITE_BOOL(utrace, ",") - OPT_WRITE_BOOL(xmalloc, ",") - OPT_WRITE_BOOL(tcache, ",") - OPT_WRITE_SSIZE_T(lg_tcache_max, ",") - OPT_WRITE_BOOL(prof, ",") - OPT_WRITE_CHAR_P(prof_prefix, ",") - OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active, ",") - OPT_WRITE_BOOL_MUTABLE(prof_thread_active_init, prof.thread_active_init, - ",") - OPT_WRITE_SSIZE_T_MUTABLE(lg_prof_sample, prof.lg_sample, ",") - OPT_WRITE_BOOL(prof_accum, ",") - OPT_WRITE_SSIZE_T(lg_prof_interval, ",") - OPT_WRITE_BOOL(prof_gdump, ",") - OPT_WRITE_BOOL(prof_final, ",") - OPT_WRITE_BOOL(prof_leak, ",") - OPT_WRITE_BOOL(stats_print, ",") - if (json || opt_stats_print) { - /* - * stats_print_opts is always emitted for JSON, so as long as it - * comes last it's safe to unconditionally omit the comma here - * (rather than having to conditionally omit it elsewhere - * depending on configuration). - */ - OPT_WRITE_CHAR_P(stats_print_opts, "") - } - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t},\n"); - } + CTL_GET("prof.lg_sample", &ssv, ssize_t); + emitter_kv(emitter, "lg_sample", "prof.lg_sample", + emitter_type_ssize, &ssv); -#undef OPT_WRITE_BOOL -#undef OPT_WRITE_BOOL_MUTABLE -#undef OPT_WRITE_SSIZE_T -#undef OPT_WRITE_CHAR_P + emitter_dict_end(emitter); /* Close "prof". */ + } /* arenas. */ - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\"arenas\": {\n"); - } + /* + * The json output sticks arena info into an "arenas" dict; the table + * output puts them at the top-level. + */ + emitter_json_object_kv_begin(emitter, "arenas"); CTL_GET("arenas.narenas", &uv, unsigned); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"narenas\": %u,\n", uv); - } else { - malloc_cprintf(write_cb, cbopaque, "Arenas: %u\n", uv); - } + emitter_kv(emitter, "narenas", "Arenas", emitter_type_unsigned, &uv); - if (json) { - CTL_GET("arenas.dirty_decay_ms", &ssv, ssize_t); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"dirty_decay_ms\": %zd,\n", ssv); + /* + * Decay settings are emitted only in json mode; in table mode, they're + * emitted as notes with the opt output, above. + */ + CTL_GET("arenas.dirty_decay_ms", &ssv, ssize_t); + emitter_json_kv(emitter, "dirty_decay_ms", emitter_type_ssize, &ssv); - CTL_GET("arenas.muzzy_decay_ms", &ssv, ssize_t); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"muzzy_decay_ms\": %zd,\n", ssv); - } + CTL_GET("arenas.muzzy_decay_ms", &ssv, ssize_t); + emitter_json_kv(emitter, "muzzy_decay_ms", emitter_type_ssize, &ssv); CTL_GET("arenas.quantum", &sv, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"quantum\": %zu,\n", sv); - } else { - malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n", sv); - } + emitter_kv(emitter, "quantum", "Quantum size", emitter_type_size, &sv); CTL_GET("arenas.page", &sv, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"page\": %zu,\n", sv); - } else { - malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv); - } + emitter_kv(emitter, "page", "Page size", emitter_type_size, &sv); if (je_mallctl("arenas.tcache_max", (void *)&sv, &ssz, NULL, 0) == 0) { - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"tcache_max\": %zu,\n", sv); - } else { - malloc_cprintf(write_cb, cbopaque, - "Maximum thread-cached size class: %zu\n", sv); - } + emitter_kv(emitter, "tcache_max", + "Maximum thread-cached size class", emitter_type_size, &sv); } - if (json) { - unsigned nbins, nlextents, i; - - CTL_GET("arenas.nbins", &nbins, unsigned); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"nbins\": %u,\n", nbins); + unsigned nbins; + CTL_GET("arenas.nbins", &nbins, unsigned); + emitter_kv(emitter, "nbins", "Number of bin size classes", + emitter_type_unsigned, &nbins); - CTL_GET("arenas.nhbins", &uv, unsigned); - malloc_cprintf(write_cb, cbopaque, "\t\t\t\"nhbins\": %u,\n", - uv); + unsigned nhbins; + CTL_GET("arenas.nhbins", &nhbins, unsigned); + emitter_kv(emitter, "nhbins", "Number of thread-cache bin size classes", + emitter_type_unsigned, &nhbins); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"bin\": [\n"); - for (i = 0; i < nbins; i++) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t{\n"); + /* + * We do enough mallctls in a loop that we actually want to omit them + * (not just omit the printing). + */ + if (emitter->output == emitter_output_json) { + emitter_json_array_kv_begin(emitter, "bin"); + for (unsigned i = 0; i < nbins; i++) { + emitter_json_object_begin(emitter); CTL_M2_GET("arenas.bin.0.size", i, &sv, size_t); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"size\": %zu,\n", sv); + emitter_json_kv(emitter, "size", emitter_type_size, + &sv); CTL_M2_GET("arenas.bin.0.nregs", i, &u32v, uint32_t); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"nregs\": %"FMTu32",\n", u32v); + emitter_json_kv(emitter, "nregs", emitter_type_uint32, + &u32v); CTL_M2_GET("arenas.bin.0.slab_size", i, &sv, size_t); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"slab_size\": %zu\n", sv); + emitter_json_kv(emitter, "slab_size", emitter_type_size, + &sv); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t}%s\n", (i + 1 < nbins) ? "," : ""); - } - malloc_cprintf(write_cb, cbopaque, - "\t\t\t],\n"); - - CTL_GET("arenas.nlextents", &nlextents, unsigned); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"nlextents\": %u,\n", nlextents); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"lextent\": [\n"); - for (i = 0; i < nlextents; i++) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t{\n"); - - CTL_M2_GET("arenas.lextent.0.size", i, &sv, size_t); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"size\": %zu\n", sv); + CTL_M2_GET("arenas.bin.0.nshards", i, &u32v, uint32_t); + emitter_json_kv(emitter, "nshards", emitter_type_uint32, + &u32v); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t}%s\n", (i + 1 < nlextents) ? "," : ""); + emitter_json_object_end(emitter); } - malloc_cprintf(write_cb, cbopaque, - "\t\t\t]\n"); - - malloc_cprintf(write_cb, cbopaque, - "\t\t}%s\n", (config_prof || more) ? "," : ""); + emitter_json_array_end(emitter); /* Close "bin". */ } - /* prof. */ - if (config_prof && json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\"prof\": {\n"); - - CTL_GET("prof.thread_active_init", &bv, bool); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"thread_active_init\": %s,\n", bv ? "true" : - "false"); - - CTL_GET("prof.active", &bv, bool); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"active\": %s,\n", bv ? "true" : "false"); - - CTL_GET("prof.gdump", &bv, bool); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"gdump\": %s,\n", bv ? "true" : "false"); + unsigned nlextents; + CTL_GET("arenas.nlextents", &nlextents, unsigned); + emitter_kv(emitter, "nlextents", "Number of large size classes", + emitter_type_unsigned, &nlextents); - CTL_GET("prof.interval", &u64v, uint64_t); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"interval\": %"FMTu64",\n", u64v); + if (emitter->output == emitter_output_json) { + emitter_json_array_kv_begin(emitter, "lextent"); + for (unsigned i = 0; i < nlextents; i++) { + emitter_json_object_begin(emitter); - CTL_GET("prof.lg_sample", &ssv, ssize_t); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"lg_sample\": %zd\n", ssv); + CTL_M2_GET("arenas.lextent.0.size", i, &sv, size_t); + emitter_json_kv(emitter, "size", emitter_type_size, + &sv); - malloc_cprintf(write_cb, cbopaque, - "\t\t}%s\n", more ? "," : ""); + emitter_json_object_end(emitter); + } + emitter_json_array_end(emitter); /* Close "lextent". */ } -} -static void -read_global_mutex_stats( - uint64_t results[mutex_prof_num_global_mutexes][mutex_prof_num_counters]) { - char cmd[MUTEX_CTL_STR_MAX_LENGTH]; - - mutex_prof_global_ind_t i; - for (i = 0; i < mutex_prof_num_global_mutexes; i++) { -#define OP(c, t) \ - gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH, \ - "mutexes", global_mutex_names[i], #c); \ - CTL_GET(cmd, (t *)&results[i][mutex_counter_##c], t); -MUTEX_PROF_COUNTERS -#undef OP - } + emitter_json_object_end(emitter); /* Close "arenas" */ } static void -stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque, - bool json, bool merged, bool destroyed, bool unmerged, bool bins, - bool large, bool mutex) { - size_t allocated, active, metadata, resident, mapped, retained; +stats_print_helper(emitter_t *emitter, bool merged, bool destroyed, + bool unmerged, bool bins, bool large, bool mutex, bool extents) { + /* + * These should be deleted. We keep them around for a while, to aid in + * the transition to the emitter code. + */ + size_t allocated, active, metadata, metadata_thp, resident, mapped, + retained; size_t num_background_threads; uint64_t background_thread_num_runs, background_thread_run_interval; CTL_GET("stats.allocated", &allocated, size_t); CTL_GET("stats.active", &active, size_t); CTL_GET("stats.metadata", &metadata, size_t); + CTL_GET("stats.metadata_thp", &metadata_thp, size_t); CTL_GET("stats.resident", &resident, size_t); CTL_GET("stats.mapped", &mapped, size_t); CTL_GET("stats.retained", &retained, size_t); - uint64_t mutex_stats[mutex_prof_num_global_mutexes][mutex_prof_num_counters]; - if (mutex) { - read_global_mutex_stats(mutex_stats); - } - if (have_background_thread) { CTL_GET("stats.background_thread.num_threads", &num_background_threads, size_t); @@ -1035,182 +1262,133 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque, background_thread_run_interval = 0; } - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\"stats\": {\n"); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"allocated\": %zu,\n", allocated); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"active\": %zu,\n", active); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"metadata\": %zu,\n", metadata); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"resident\": %zu,\n", resident); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"mapped\": %zu,\n", mapped); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"retained\": %zu,\n", retained); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"background_thread\": {\n"); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"num_threads\": %zu,\n", num_background_threads); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"num_runs\": %"FMTu64",\n", - background_thread_num_runs); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"run_interval\": %"FMTu64"\n", - background_thread_run_interval); - malloc_cprintf(write_cb, cbopaque, "\t\t\t}%s\n", - mutex ? "," : ""); + /* Generic global stats. */ + emitter_json_object_kv_begin(emitter, "stats"); + emitter_json_kv(emitter, "allocated", emitter_type_size, &allocated); + emitter_json_kv(emitter, "active", emitter_type_size, &active); + emitter_json_kv(emitter, "metadata", emitter_type_size, &metadata); + emitter_json_kv(emitter, "metadata_thp", emitter_type_size, + &metadata_thp); + emitter_json_kv(emitter, "resident", emitter_type_size, &resident); + emitter_json_kv(emitter, "mapped", emitter_type_size, &mapped); + emitter_json_kv(emitter, "retained", emitter_type_size, &retained); + + emitter_table_printf(emitter, "Allocated: %zu, active: %zu, " + "metadata: %zu (n_thp %zu), resident: %zu, mapped: %zu, " + "retained: %zu\n", allocated, active, metadata, metadata_thp, + resident, mapped, retained); + + /* Background thread stats. */ + emitter_json_object_kv_begin(emitter, "background_thread"); + emitter_json_kv(emitter, "num_threads", emitter_type_size, + &num_background_threads); + emitter_json_kv(emitter, "num_runs", emitter_type_uint64, + &background_thread_num_runs); + emitter_json_kv(emitter, "run_interval", emitter_type_uint64, + &background_thread_run_interval); + emitter_json_object_end(emitter); /* Close "background_thread". */ + + emitter_table_printf(emitter, "Background threads: %zu, " + "num_runs: %"FMTu64", run_interval: %"FMTu64" ns\n", + num_background_threads, background_thread_num_runs, + background_thread_run_interval); - if (mutex) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"mutexes\": {\n"); - mutex_prof_global_ind_t i; - for (i = 0; i < mutex_prof_num_global_mutexes; i++) { - mutex_stats_output_json(write_cb, cbopaque, - global_mutex_names[i], mutex_stats[i], - "\t\t\t\t", - i == mutex_prof_num_global_mutexes - 1); - } - malloc_cprintf(write_cb, cbopaque, "\t\t\t}\n"); - } - malloc_cprintf(write_cb, cbopaque, - "\t\t}%s\n", (merged || unmerged || destroyed) ? "," : ""); - } else { - malloc_cprintf(write_cb, cbopaque, - "Allocated: %zu, active: %zu, metadata: %zu," - " resident: %zu, mapped: %zu, retained: %zu\n", - allocated, active, metadata, resident, mapped, retained); - - if (have_background_thread && num_background_threads > 0) { - malloc_cprintf(write_cb, cbopaque, - "Background threads: %zu, num_runs: %"FMTu64", " - "run_interval: %"FMTu64" ns\n", - num_background_threads, - background_thread_num_runs, - background_thread_run_interval); - } - if (mutex) { - mutex_prof_global_ind_t i; - for (i = 0; i < mutex_prof_num_global_mutexes; i++) { - mutex_stats_output(write_cb, cbopaque, - global_mutex_names[i], mutex_stats[i], - i == 0); - } + if (mutex) { + emitter_row_t row; + emitter_col_t name; + emitter_col_t col64[mutex_prof_num_uint64_t_counters]; + emitter_col_t col32[mutex_prof_num_uint32_t_counters]; + uint64_t uptime; + + emitter_row_init(&row); + mutex_stats_init_cols(&row, "", &name, col64, col32); + + emitter_table_row(emitter, &row); + emitter_json_object_kv_begin(emitter, "mutexes"); + + CTL_M2_GET("stats.arenas.0.uptime", 0, &uptime, uint64_t); + + for (int i = 0; i < mutex_prof_num_global_mutexes; i++) { + mutex_stats_read_global(global_mutex_names[i], &name, + col64, col32, uptime); + emitter_json_object_kv_begin(emitter, global_mutex_names[i]); + mutex_stats_emit(emitter, &row, col64, col32); + emitter_json_object_end(emitter); } + + emitter_json_object_end(emitter); /* Close "mutexes". */ } + emitter_json_object_end(emitter); /* Close "stats". */ + if (merged || destroyed || unmerged) { unsigned narenas; - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\"stats.arenas\": {\n"); - } + emitter_json_object_kv_begin(emitter, "stats.arenas"); CTL_GET("arenas.narenas", &narenas, unsigned); - { - size_t mib[3]; - size_t miblen = sizeof(mib) / sizeof(size_t); - size_t sz; - VARIABLE_ARRAY(bool, initialized, narenas); - bool destroyed_initialized; - unsigned i, j, ninitialized; - - xmallctlnametomib("arena.0.initialized", mib, &miblen); - for (i = ninitialized = 0; i < narenas; i++) { - mib[1] = i; - sz = sizeof(bool); - xmallctlbymib(mib, miblen, &initialized[i], &sz, - NULL, 0); - if (initialized[i]) { - ninitialized++; - } - } - mib[1] = MALLCTL_ARENAS_DESTROYED; + size_t mib[3]; + size_t miblen = sizeof(mib) / sizeof(size_t); + size_t sz; + VARIABLE_ARRAY(bool, initialized, narenas); + bool destroyed_initialized; + unsigned i, j, ninitialized; + + xmallctlnametomib("arena.0.initialized", mib, &miblen); + for (i = ninitialized = 0; i < narenas; i++) { + mib[1] = i; sz = sizeof(bool); - xmallctlbymib(mib, miblen, &destroyed_initialized, &sz, + xmallctlbymib(mib, miblen, &initialized[i], &sz, NULL, 0); - - /* Merged stats. */ - if (merged && (ninitialized > 1 || !unmerged)) { - /* Print merged arena stats. */ - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"merged\": {\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "\nMerged arenas stats:\n"); - } - stats_arena_print(write_cb, cbopaque, json, - MALLCTL_ARENAS_ALL, bins, large, mutex); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t}%s\n", - ((destroyed_initialized && - destroyed) || unmerged) ? "," : - ""); - } + if (initialized[i]) { + ninitialized++; } + } + mib[1] = MALLCTL_ARENAS_DESTROYED; + sz = sizeof(bool); + xmallctlbymib(mib, miblen, &destroyed_initialized, &sz, + NULL, 0); + + /* Merged stats. */ + if (merged && (ninitialized > 1 || !unmerged)) { + /* Print merged arena stats. */ + emitter_table_printf(emitter, "Merged arenas stats:\n"); + emitter_json_object_kv_begin(emitter, "merged"); + stats_arena_print(emitter, MALLCTL_ARENAS_ALL, bins, + large, mutex, extents); + emitter_json_object_end(emitter); /* Close "merged". */ + } - /* Destroyed stats. */ - if (destroyed_initialized && destroyed) { - /* Print destroyed arena stats. */ - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"destroyed\": {\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "\nDestroyed arenas stats:\n"); - } - stats_arena_print(write_cb, cbopaque, json, - MALLCTL_ARENAS_DESTROYED, bins, large, - mutex); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t}%s\n", unmerged ? "," : - ""); - } - } + /* Destroyed stats. */ + if (destroyed_initialized && destroyed) { + /* Print destroyed arena stats. */ + emitter_table_printf(emitter, + "Destroyed arenas stats:\n"); + emitter_json_object_kv_begin(emitter, "destroyed"); + stats_arena_print(emitter, MALLCTL_ARENAS_DESTROYED, + bins, large, mutex, extents); + emitter_json_object_end(emitter); /* Close "destroyed". */ + } - /* Unmerged stats. */ - if (unmerged) { - for (i = j = 0; i < narenas; i++) { - if (initialized[i]) { - if (json) { - j++; - malloc_cprintf(write_cb, - cbopaque, - "\t\t\t\"%u\": {\n", - i); - } else { - malloc_cprintf(write_cb, - cbopaque, - "\narenas[%u]:\n", - i); - } - stats_arena_print(write_cb, - cbopaque, json, i, bins, - large, mutex); - if (json) { - malloc_cprintf(write_cb, - cbopaque, - "\t\t\t}%s\n", (j < - ninitialized) ? "," - : ""); - } - } + /* Unmerged stats. */ + if (unmerged) { + for (i = j = 0; i < narenas; i++) { + if (initialized[i]) { + char arena_ind_str[20]; + malloc_snprintf(arena_ind_str, + sizeof(arena_ind_str), "%u", i); + emitter_json_object_kv_begin(emitter, + arena_ind_str); + emitter_table_printf(emitter, + "arenas[%s]:\n", arena_ind_str); + stats_arena_print(emitter, i, bins, + large, mutex, extents); + /* Close "<arena-ind>". */ + emitter_json_object_end(emitter); } } } - - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t}\n"); - } + emitter_json_object_end(emitter); /* Close "stats.arenas". */ } } @@ -1257,29 +1435,23 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, } } - if (json) { - malloc_cprintf(write_cb, cbopaque, - "{\n" - "\t\"jemalloc\": {\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "___ Begin jemalloc statistics ___\n"); - } + emitter_t emitter; + emitter_init(&emitter, + json ? emitter_output_json : emitter_output_table, write_cb, + cbopaque); + emitter_begin(&emitter); + emitter_table_printf(&emitter, "___ Begin jemalloc statistics ___\n"); + emitter_json_object_kv_begin(&emitter, "jemalloc"); if (general) { - stats_general_print(write_cb, cbopaque, json, config_stats); + stats_general_print(&emitter); } if (config_stats) { - stats_print_helper(write_cb, cbopaque, json, merged, destroyed, - unmerged, bins, large, mutex); + stats_print_helper(&emitter, merged, destroyed, unmerged, + bins, large, mutex, extents); } - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t}\n" - "}\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "--- End jemalloc statistics ---\n"); - } + emitter_json_object_end(&emitter); /* Closes the "jemalloc" dict. */ + emitter_table_printf(&emitter, "--- End jemalloc statistics ---\n"); + emitter_end(&emitter); } diff --git a/deps/jemalloc/src/sz.c b/deps/jemalloc/src/sz.c index 0986615f71..8633fb0500 100644 --- a/deps/jemalloc/src/sz.c +++ b/deps/jemalloc/src/sz.c @@ -2,105 +2,63 @@ #include "jemalloc/internal/sz.h" JEMALLOC_ALIGNED(CACHELINE) -const size_t sz_pind2sz_tab[NPSIZES+1] = { -#define PSZ_yes(lg_grp, ndelta, lg_delta) \ - (((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))), -#define PSZ_no(lg_grp, ndelta, lg_delta) -#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \ - PSZ_##psz(lg_grp, ndelta, lg_delta) - SIZE_CLASSES -#undef PSZ_yes -#undef PSZ_no -#undef SC - (LARGE_MAXCLASS + PAGE) -}; +size_t sz_pind2sz_tab[SC_NPSIZES+1]; + +static void +sz_boot_pind2sz_tab(const sc_data_t *sc_data) { + int pind = 0; + for (unsigned i = 0; i < SC_NSIZES; i++) { + const sc_t *sc = &sc_data->sc[i]; + if (sc->psz) { + sz_pind2sz_tab[pind] = (ZU(1) << sc->lg_base) + + (ZU(sc->ndelta) << sc->lg_delta); + pind++; + } + } + for (int i = pind; i <= (int)SC_NPSIZES; i++) { + sz_pind2sz_tab[pind] = sc_data->large_maxclass + PAGE; + } +} JEMALLOC_ALIGNED(CACHELINE) -const size_t sz_index2size_tab[NSIZES] = { -#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \ - ((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta)), - SIZE_CLASSES -#undef SC -}; +size_t sz_index2size_tab[SC_NSIZES]; + +static void +sz_boot_index2size_tab(const sc_data_t *sc_data) { + for (unsigned i = 0; i < SC_NSIZES; i++) { + const sc_t *sc = &sc_data->sc[i]; + sz_index2size_tab[i] = (ZU(1) << sc->lg_base) + + (ZU(sc->ndelta) << (sc->lg_delta)); + } +} +/* + * To keep this table small, we divide sizes by the tiny min size, which gives + * the smallest interval for which the result can change. + */ JEMALLOC_ALIGNED(CACHELINE) -const uint8_t sz_size2index_tab[] = { -#if LG_TINY_MIN == 0 -#warning "Dangerous LG_TINY_MIN" -#define S2B_0(i) i, -#elif LG_TINY_MIN == 1 -#warning "Dangerous LG_TINY_MIN" -#define S2B_1(i) i, -#elif LG_TINY_MIN == 2 -#warning "Dangerous LG_TINY_MIN" -#define S2B_2(i) i, -#elif LG_TINY_MIN == 3 -#define S2B_3(i) i, -#elif LG_TINY_MIN == 4 -#define S2B_4(i) i, -#elif LG_TINY_MIN == 5 -#define S2B_5(i) i, -#elif LG_TINY_MIN == 6 -#define S2B_6(i) i, -#elif LG_TINY_MIN == 7 -#define S2B_7(i) i, -#elif LG_TINY_MIN == 8 -#define S2B_8(i) i, -#elif LG_TINY_MIN == 9 -#define S2B_9(i) i, -#elif LG_TINY_MIN == 10 -#define S2B_10(i) i, -#elif LG_TINY_MIN == 11 -#define S2B_11(i) i, -#else -#error "Unsupported LG_TINY_MIN" -#endif -#if LG_TINY_MIN < 1 -#define S2B_1(i) S2B_0(i) S2B_0(i) -#endif -#if LG_TINY_MIN < 2 -#define S2B_2(i) S2B_1(i) S2B_1(i) -#endif -#if LG_TINY_MIN < 3 -#define S2B_3(i) S2B_2(i) S2B_2(i) -#endif -#if LG_TINY_MIN < 4 -#define S2B_4(i) S2B_3(i) S2B_3(i) -#endif -#if LG_TINY_MIN < 5 -#define S2B_5(i) S2B_4(i) S2B_4(i) -#endif -#if LG_TINY_MIN < 6 -#define S2B_6(i) S2B_5(i) S2B_5(i) -#endif -#if LG_TINY_MIN < 7 -#define S2B_7(i) S2B_6(i) S2B_6(i) -#endif -#if LG_TINY_MIN < 8 -#define S2B_8(i) S2B_7(i) S2B_7(i) -#endif -#if LG_TINY_MIN < 9 -#define S2B_9(i) S2B_8(i) S2B_8(i) -#endif -#if LG_TINY_MIN < 10 -#define S2B_10(i) S2B_9(i) S2B_9(i) -#endif -#if LG_TINY_MIN < 11 -#define S2B_11(i) S2B_10(i) S2B_10(i) -#endif -#define S2B_no(i) -#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \ - S2B_##lg_delta_lookup(index) - SIZE_CLASSES -#undef S2B_3 -#undef S2B_4 -#undef S2B_5 -#undef S2B_6 -#undef S2B_7 -#undef S2B_8 -#undef S2B_9 -#undef S2B_10 -#undef S2B_11 -#undef S2B_no -#undef SC -}; +uint8_t sz_size2index_tab[(SC_LOOKUP_MAXCLASS >> SC_LG_TINY_MIN) + 1]; + +static void +sz_boot_size2index_tab(const sc_data_t *sc_data) { + size_t dst_max = (SC_LOOKUP_MAXCLASS >> SC_LG_TINY_MIN) + 1; + size_t dst_ind = 0; + for (unsigned sc_ind = 0; sc_ind < SC_NSIZES && dst_ind < dst_max; + sc_ind++) { + const sc_t *sc = &sc_data->sc[sc_ind]; + size_t sz = (ZU(1) << sc->lg_base) + + (ZU(sc->ndelta) << sc->lg_delta); + size_t max_ind = ((sz + (ZU(1) << SC_LG_TINY_MIN) - 1) + >> SC_LG_TINY_MIN); + for (; dst_ind <= max_ind && dst_ind < dst_max; dst_ind++) { + sz_size2index_tab[dst_ind] = sc_ind; + } + } +} + +void +sz_boot(const sc_data_t *sc_data) { + sz_boot_pind2sz_tab(sc_data); + sz_boot_index2size_tab(sc_data); + sz_boot_size2index_tab(sc_data); +} diff --git a/deps/jemalloc/src/tcache.c b/deps/jemalloc/src/tcache.c index 936ef3140d..50099a9f2c 100644 --- a/deps/jemalloc/src/tcache.c +++ b/deps/jemalloc/src/tcache.c @@ -4,7 +4,8 @@ #include "jemalloc/internal/assert.h" #include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/safety_check.h" +#include "jemalloc/internal/sc.h" /******************************************************************************/ /* Data. */ @@ -12,7 +13,7 @@ bool opt_tcache = true; ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; -tcache_bin_info_t *tcache_bin_info; +cache_bin_info_t *tcache_bin_info; static unsigned stack_nelms; /* Total stack elms per tcache. */ unsigned nhbins; @@ -40,8 +41,8 @@ void tcache_event_hard(tsd_t *tsd, tcache_t *tcache) { szind_t binind = tcache->next_gc_bin; - tcache_bin_t *tbin; - if (binind < NBINS) { + cache_bin_t *tbin; + if (binind < SC_NBINS) { tbin = tcache_small_bin_get(tcache, binind); } else { tbin = tcache_large_bin_get(tcache, binind); @@ -50,7 +51,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) { /* * Flush (ceiling) 3/4 of the objects below the low water mark. */ - if (binind < NBINS) { + if (binind < SC_NBINS) { tcache_bin_flush_small(tsd, tcache, tbin, binind, tbin->ncached - tbin->low_water + (tbin->low_water >> 2)); @@ -58,7 +59,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) { * Reduce fill count by 2X. Limit lg_fill_div such that * the fill count is always at least 1. */ - tcache_bin_info_t *tbin_info = &tcache_bin_info[binind]; + cache_bin_info_t *tbin_info = &tcache_bin_info[binind]; if ((tbin_info->ncached_max >> (tcache->lg_fill_div[binind] + 1)) >= 1) { tcache->lg_fill_div[binind]++; @@ -72,7 +73,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) { * Increase fill count by 2X for small bins. Make sure * lg_fill_div stays greater than 0. */ - if (binind < NBINS && tcache->lg_fill_div[binind] > 1) { + if (binind < SC_NBINS && tcache->lg_fill_div[binind] > 1) { tcache->lg_fill_div[binind]--; } } @@ -86,7 +87,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) { void * tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, - tcache_bin_t *tbin, szind_t binind, bool *tcache_success) { + cache_bin_t *tbin, szind_t binind, bool *tcache_success) { void *ret; assert(tcache->arena != NULL); @@ -95,33 +96,72 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, if (config_prof) { tcache->prof_accumbytes = 0; } - ret = tcache_alloc_easy(tbin, tcache_success); + ret = cache_bin_alloc_easy(tbin, tcache_success); return ret; } +/* Enabled with --enable-extra-size-check. */ +static void +tbin_extents_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind, + size_t nflush, extent_t **extents){ + rtree_ctx_t rtree_ctx_fallback; + rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback); + + /* + * Verify that the items in the tcache all have the correct size; this + * is useful for catching sized deallocation bugs, also to fail early + * instead of corrupting metadata. Since this can be turned on for opt + * builds, avoid the branch in the loop. + */ + szind_t szind; + size_t sz_sum = binind * nflush; + for (unsigned i = 0 ; i < nflush; i++) { + rtree_extent_szind_read(tsdn, &extents_rtree, + rtree_ctx, (uintptr_t)*(tbin->avail - 1 - i), true, + &extents[i], &szind); + sz_sum -= szind; + } + if (sz_sum != 0) { + safety_check_fail("<jemalloc>: size mismatch in thread cache " + "detected, likely caused by sized deallocation bugs by " + "application. Abort.\n"); + abort(); + } +} + void -tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, +tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t binind, unsigned rem) { bool merged_stats = false; - assert(binind < NBINS); - assert(rem <= tbin->ncached); + assert(binind < SC_NBINS); + assert((cache_bin_sz_t)rem <= tbin->ncached); arena_t *arena = tcache->arena; assert(arena != NULL); unsigned nflush = tbin->ncached - rem; VARIABLE_ARRAY(extent_t *, item_extent, nflush); + /* Look up extent once per item. */ - for (unsigned i = 0 ; i < nflush; i++) { - item_extent[i] = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1 - i)); + if (config_opt_safety_checks) { + tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind, + nflush, item_extent); + } else { + for (unsigned i = 0 ; i < nflush; i++) { + item_extent[i] = iealloc(tsd_tsdn(tsd), + *(tbin->avail - 1 - i)); + } } - while (nflush > 0) { /* Lock the arena bin associated with the first object. */ extent_t *extent = item_extent[0]; - arena_t *bin_arena = extent_arena_get(extent); - arena_bin_t *bin = &bin_arena->bins[binind]; + unsigned bin_arena_ind = extent_arena_ind_get(extent); + arena_t *bin_arena = arena_get(tsd_tsdn(tsd), bin_arena_ind, + false); + unsigned binshard = extent_binshard_get(extent); + assert(binshard < bin_infos[binind].n_shards); + bin_t *bin = &bin_arena->bins[binind].bin_shards[binshard]; if (config_prof && bin_arena == arena) { if (arena_prof_accum(tsd_tsdn(tsd), arena, @@ -132,8 +172,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, } malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); - if (config_stats && bin_arena == arena) { - assert(!merged_stats); + if (config_stats && bin_arena == arena && !merged_stats) { merged_stats = true; bin->stats.nflushes++; bin->stats.nrequests += tbin->tstats.nrequests; @@ -145,9 +184,10 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, extent = item_extent[i]; assert(ptr != NULL && extent != NULL); - if (extent_arena_get(extent) == bin_arena) { + if (extent_arena_ind_get(extent) == bin_arena_ind + && extent_binshard_get(extent) == binshard) { arena_dalloc_bin_junked_locked(tsd_tsdn(tsd), - bin_arena, extent, ptr); + bin_arena, bin, binind, extent, ptr); } else { /* * This object was allocated via a different @@ -169,8 +209,9 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, * The flush loop didn't happen to flush to this thread's * arena, so the stats didn't get merged. Manually do so now. */ - arena_bin_t *bin = &arena->bins[binind]; - malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); + unsigned binshard; + bin_t *bin = arena_bin_choose_lock(tsd_tsdn(tsd), arena, binind, + &binshard); bin->stats.nflushes++; bin->stats.nrequests += tbin->tstats.nrequests; tbin->tstats.nrequests = 0; @@ -180,63 +221,76 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem * sizeof(void *)); tbin->ncached = rem; - if ((low_water_t)tbin->ncached < tbin->low_water) { + if (tbin->ncached < tbin->low_water) { tbin->low_water = tbin->ncached; } } void -tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, +tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind, unsigned rem, tcache_t *tcache) { bool merged_stats = false; assert(binind < nhbins); - assert(rem <= tbin->ncached); + assert((cache_bin_sz_t)rem <= tbin->ncached); - arena_t *arena = tcache->arena; - assert(arena != NULL); + arena_t *tcache_arena = tcache->arena; + assert(tcache_arena != NULL); unsigned nflush = tbin->ncached - rem; VARIABLE_ARRAY(extent_t *, item_extent, nflush); + +#ifndef JEMALLOC_EXTRA_SIZE_CHECK /* Look up extent once per item. */ for (unsigned i = 0 ; i < nflush; i++) { item_extent[i] = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1 - i)); } - +#else + tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind, nflush, + item_extent); +#endif while (nflush > 0) { /* Lock the arena associated with the first object. */ extent_t *extent = item_extent[0]; - arena_t *locked_arena = extent_arena_get(extent); - UNUSED bool idump; + unsigned locked_arena_ind = extent_arena_ind_get(extent); + arena_t *locked_arena = arena_get(tsd_tsdn(tsd), + locked_arena_ind, false); + bool idump; if (config_prof) { idump = false; } - malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->large_mtx); + bool lock_large = !arena_is_auto(locked_arena); + if (lock_large) { + malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->large_mtx); + } for (unsigned i = 0; i < nflush; i++) { void *ptr = *(tbin->avail - 1 - i); assert(ptr != NULL); extent = item_extent[i]; - if (extent_arena_get(extent) == locked_arena) { + if (extent_arena_ind_get(extent) == locked_arena_ind) { large_dalloc_prep_junked_locked(tsd_tsdn(tsd), extent); } } - if ((config_prof || config_stats) && locked_arena == arena) { + if ((config_prof || config_stats) && + (locked_arena == tcache_arena)) { if (config_prof) { - idump = arena_prof_accum(tsd_tsdn(tsd), arena, - tcache->prof_accumbytes); + idump = arena_prof_accum(tsd_tsdn(tsd), + tcache_arena, tcache->prof_accumbytes); tcache->prof_accumbytes = 0; } if (config_stats) { merged_stats = true; - arena_stats_large_nrequests_add(tsd_tsdn(tsd), - &arena->stats, binind, + arena_stats_large_flush_nrequests_add( + tsd_tsdn(tsd), &tcache_arena->stats, binind, tbin->tstats.nrequests); tbin->tstats.nrequests = 0; } } - malloc_mutex_unlock(tsd_tsdn(tsd), &locked_arena->large_mtx); + if (lock_large) { + malloc_mutex_unlock(tsd_tsdn(tsd), &locked_arena->large_mtx); + } unsigned ndeferred = 0; for (unsigned i = 0; i < nflush; i++) { @@ -244,7 +298,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, extent = item_extent[i]; assert(ptr != NULL && extent != NULL); - if (extent_arena_get(extent) == locked_arena) { + if (extent_arena_ind_get(extent) == locked_arena_ind) { large_dalloc_finish(tsd_tsdn(tsd), extent); } else { /* @@ -270,15 +324,15 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, * The flush loop didn't happen to flush to this thread's * arena, so the stats didn't get merged. Manually do so now. */ - arena_stats_large_nrequests_add(tsd_tsdn(tsd), &arena->stats, - binind, tbin->tstats.nrequests); + arena_stats_large_flush_nrequests_add(tsd_tsdn(tsd), + &tcache_arena->stats, binind, tbin->tstats.nrequests); tbin->tstats.nrequests = 0; } memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem * sizeof(void *)); tbin->ncached = rem; - if ((low_water_t)tbin->ncached < tbin->low_water) { + if (tbin->ncached < tbin->low_water) { tbin->low_water = tbin->ncached; } } @@ -291,8 +345,15 @@ tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) { if (config_stats) { /* Link into list of extant tcaches. */ malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx); + ql_elm_new(tcache, link); ql_tail_insert(&arena->tcache_ql, tcache, link); + cache_bin_array_descriptor_init( + &tcache->cache_bin_array_descriptor, tcache->bins_small, + tcache->bins_large); + ql_tail_insert(&arena->cache_bin_array_descriptor_ql, + &tcache->cache_bin_array_descriptor, link); + malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx); } } @@ -316,6 +377,8 @@ tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache) { assert(in_ql); } ql_remove(&arena->tcache_ql, tcache, link); + ql_remove(&arena->cache_bin_array_descriptor_ql, + &tcache->cache_bin_array_descriptor, link); tcache_stats_merge(tsdn, tcache, arena); malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx); } @@ -354,10 +417,10 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) { size_t stack_offset = 0; assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0); - memset(tcache->tbins_small, 0, sizeof(tcache_bin_t) * NBINS); - memset(tcache->tbins_large, 0, sizeof(tcache_bin_t) * (nhbins - NBINS)); + memset(tcache->bins_small, 0, sizeof(cache_bin_t) * SC_NBINS); + memset(tcache->bins_large, 0, sizeof(cache_bin_t) * (nhbins - SC_NBINS)); unsigned i = 0; - for (; i < NBINS; i++) { + for (; i < SC_NBINS; i++) { tcache->lg_fill_div[i] = 1; stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *); /* @@ -449,16 +512,16 @@ static void tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) { assert(tcache->arena != NULL); - for (unsigned i = 0; i < NBINS; i++) { - tcache_bin_t *tbin = tcache_small_bin_get(tcache, i); + for (unsigned i = 0; i < SC_NBINS; i++) { + cache_bin_t *tbin = tcache_small_bin_get(tcache, i); tcache_bin_flush_small(tsd, tcache, tbin, i, 0); if (config_stats) { assert(tbin->tstats.nrequests == 0); } } - for (unsigned i = NBINS; i < nhbins; i++) { - tcache_bin_t *tbin = tcache_large_bin_get(tcache, i); + for (unsigned i = SC_NBINS; i < nhbins; i++) { + cache_bin_t *tbin = tcache_large_bin_get(tcache, i); tcache_bin_flush_large(tsd, tbin, i, 0, tcache); if (config_stats) { @@ -482,6 +545,7 @@ tcache_flush(tsd_t *tsd) { static void tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) { tcache_flush_cache(tsd, tcache); + arena_t *arena = tcache->arena; tcache_arena_dissociate(tsd_tsdn(tsd), tcache); if (tsd_tcache) { @@ -494,6 +558,23 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) { /* Release both the tcache struct and avail array. */ idalloctm(tsd_tsdn(tsd), tcache, NULL, NULL, true, true); } + + /* + * The deallocation and tcache flush above may not trigger decay since + * we are on the tcache shutdown path (potentially with non-nominal + * tsd). Manually trigger decay to avoid pathological cases. Also + * include arena 0 because the tcache array is allocated from it. + */ + arena_decay(tsd_tsdn(tsd), arena_get(tsd_tsdn(tsd), 0, false), + false, false); + + if (arena_nthreads_get(arena, false) == 0 && + !background_thread_enabled()) { + /* Force purging when no threads assigned to the arena anymore. */ + arena_decay(tsd_tsdn(tsd), arena, false, true); + } else { + arena_decay(tsd_tsdn(tsd), arena, false, false); + } } /* For auto tcache (embedded in TSD) only. */ @@ -523,18 +604,18 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) { cassert(config_stats); /* Merge and reset tcache stats. */ - for (i = 0; i < NBINS; i++) { - arena_bin_t *bin = &arena->bins[i]; - tcache_bin_t *tbin = tcache_small_bin_get(tcache, i); - malloc_mutex_lock(tsdn, &bin->lock); + for (i = 0; i < SC_NBINS; i++) { + cache_bin_t *tbin = tcache_small_bin_get(tcache, i); + unsigned binshard; + bin_t *bin = arena_bin_choose_lock(tsdn, arena, i, &binshard); bin->stats.nrequests += tbin->tstats.nrequests; malloc_mutex_unlock(tsdn, &bin->lock); tbin->tstats.nrequests = 0; } for (; i < nhbins; i++) { - tcache_bin_t *tbin = tcache_large_bin_get(tcache, i); - arena_stats_large_nrequests_add(tsdn, &arena->stats, i, + cache_bin_t *tbin = tcache_large_bin_get(tcache, i); + arena_stats_large_flush_nrequests_add(tsdn, &arena->stats, i, tbin->tstats.nrequests); tbin->tstats.nrequests = 0; } @@ -605,23 +686,32 @@ label_return: } static tcache_t * -tcaches_elm_remove(tsd_t *tsd, tcaches_t *elm) { +tcaches_elm_remove(tsd_t *tsd, tcaches_t *elm, bool allow_reinit) { malloc_mutex_assert_owner(tsd_tsdn(tsd), &tcaches_mtx); if (elm->tcache == NULL) { return NULL; } tcache_t *tcache = elm->tcache; - elm->tcache = NULL; + if (allow_reinit) { + elm->tcache = TCACHES_ELM_NEED_REINIT; + } else { + elm->tcache = NULL; + } + + if (tcache == TCACHES_ELM_NEED_REINIT) { + return NULL; + } return tcache; } void tcaches_flush(tsd_t *tsd, unsigned ind) { malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx); - tcache_t *tcache = tcaches_elm_remove(tsd, &tcaches[ind]); + tcache_t *tcache = tcaches_elm_remove(tsd, &tcaches[ind], true); malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx); if (tcache != NULL) { + /* Destroy the tcache; recreate in tcaches_get() if needed. */ tcache_destroy(tsd, tcache, false); } } @@ -630,7 +720,7 @@ void tcaches_destroy(tsd_t *tsd, unsigned ind) { malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx); tcaches_t *elm = &tcaches[ind]; - tcache_t *tcache = tcaches_elm_remove(tsd, elm); + tcache_t *tcache = tcaches_elm_remove(tsd, elm, false); elm->next = tcaches_avail; tcaches_avail = elm; malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx); @@ -643,8 +733,8 @@ bool tcache_boot(tsdn_t *tsdn) { /* If necessary, clamp opt_lg_tcache_max. */ if (opt_lg_tcache_max < 0 || (ZU(1) << opt_lg_tcache_max) < - SMALL_MAXCLASS) { - tcache_maxclass = SMALL_MAXCLASS; + SC_SMALL_MAXCLASS) { + tcache_maxclass = SC_SMALL_MAXCLASS; } else { tcache_maxclass = (ZU(1) << opt_lg_tcache_max); } @@ -657,21 +747,21 @@ tcache_boot(tsdn_t *tsdn) { nhbins = sz_size2index(tcache_maxclass) + 1; /* Initialize tcache_bin_info. */ - tcache_bin_info = (tcache_bin_info_t *)base_alloc(tsdn, b0get(), nhbins - * sizeof(tcache_bin_info_t), CACHELINE); + tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, b0get(), nhbins + * sizeof(cache_bin_info_t), CACHELINE); if (tcache_bin_info == NULL) { return true; } stack_nelms = 0; unsigned i; - for (i = 0; i < NBINS; i++) { - if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) { + for (i = 0; i < SC_NBINS; i++) { + if ((bin_infos[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) { tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_SMALL_MIN; - } else if ((arena_bin_info[i].nregs << 1) <= + } else if ((bin_infos[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) { tcache_bin_info[i].ncached_max = - (arena_bin_info[i].nregs << 1); + (bin_infos[i].nregs << 1); } else { tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_SMALL_MAX; diff --git a/deps/jemalloc/src/test_hooks.c b/deps/jemalloc/src/test_hooks.c new file mode 100644 index 0000000000..ace00d9c46 --- /dev/null +++ b/deps/jemalloc/src/test_hooks.c @@ -0,0 +1,12 @@ +#include "jemalloc/internal/jemalloc_preamble.h" + +/* + * The hooks are a little bit screwy -- they're not genuinely exported in the + * sense that we want them available to end-users, but we do want them visible + * from outside the generated library, so that we can use them in test code. + */ +JEMALLOC_EXPORT +void (*test_hooks_arena_new_hook)() = NULL; + +JEMALLOC_EXPORT +void (*test_hooks_libc_hook)() = NULL; diff --git a/deps/jemalloc/src/tsd.c b/deps/jemalloc/src/tsd.c index f968992f2b..a31f6b9698 100644 --- a/deps/jemalloc/src/tsd.c +++ b/deps/jemalloc/src/tsd.c @@ -12,12 +12,16 @@ static unsigned ncleanups; static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX]; +/* TSD_INITIALIZER triggers "-Wmissing-field-initializer" */ +JEMALLOC_DIAGNOSTIC_PUSH +JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS + #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP -__thread tsd_t JEMALLOC_TLS_MODEL tsd_tls = TSD_INITIALIZER; -__thread bool JEMALLOC_TLS_MODEL tsd_initialized = false; +JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER; +JEMALLOC_TSD_TYPE_ATTR(bool) JEMALLOC_TLS_MODEL tsd_initialized = false; bool tsd_booted = false; #elif (defined(JEMALLOC_TLS)) -__thread tsd_t JEMALLOC_TLS_MODEL tsd_tls = TSD_INITIALIZER; +JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER; pthread_key_t tsd_tsd; bool tsd_booted = false; #elif (defined(_WIN32)) @@ -41,6 +45,7 @@ tsd_init_head_t tsd_init_head = { ql_head_initializer(blocks), MALLOC_MUTEX_INITIALIZER }; + tsd_wrapper_t tsd_boot_wrapper = { false, TSD_INITIALIZER @@ -48,17 +53,164 @@ tsd_wrapper_t tsd_boot_wrapper = { bool tsd_booted = false; #endif +JEMALLOC_DIAGNOSTIC_POP /******************************************************************************/ +/* A list of all the tsds in the nominal state. */ +typedef ql_head(tsd_t) tsd_list_t; +static tsd_list_t tsd_nominal_tsds = ql_head_initializer(tsd_nominal_tsds); +static malloc_mutex_t tsd_nominal_tsds_lock; + +/* How many slow-path-enabling features are turned on. */ +static atomic_u32_t tsd_global_slow_count = ATOMIC_INIT(0); + +static bool +tsd_in_nominal_list(tsd_t *tsd) { + tsd_t *tsd_list; + bool found = false; + /* + * We don't know that tsd is nominal; it might not be safe to get data + * out of it here. + */ + malloc_mutex_lock(TSDN_NULL, &tsd_nominal_tsds_lock); + ql_foreach(tsd_list, &tsd_nominal_tsds, TSD_MANGLE(tcache).tsd_link) { + if (tsd == tsd_list) { + found = true; + break; + } + } + malloc_mutex_unlock(TSDN_NULL, &tsd_nominal_tsds_lock); + return found; +} + +static void +tsd_add_nominal(tsd_t *tsd) { + assert(!tsd_in_nominal_list(tsd)); + assert(tsd_state_get(tsd) <= tsd_state_nominal_max); + ql_elm_new(tsd, TSD_MANGLE(tcache).tsd_link); + malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); + ql_tail_insert(&tsd_nominal_tsds, tsd, TSD_MANGLE(tcache).tsd_link); + malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); +} + +static void +tsd_remove_nominal(tsd_t *tsd) { + assert(tsd_in_nominal_list(tsd)); + assert(tsd_state_get(tsd) <= tsd_state_nominal_max); + malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); + ql_remove(&tsd_nominal_tsds, tsd, TSD_MANGLE(tcache).tsd_link); + malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); +} + +static void +tsd_force_recompute(tsdn_t *tsdn) { + /* + * The stores to tsd->state here need to synchronize with the exchange + * in tsd_slow_update. + */ + atomic_fence(ATOMIC_RELEASE); + malloc_mutex_lock(tsdn, &tsd_nominal_tsds_lock); + tsd_t *remote_tsd; + ql_foreach(remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(tcache).tsd_link) { + assert(tsd_atomic_load(&remote_tsd->state, ATOMIC_RELAXED) + <= tsd_state_nominal_max); + tsd_atomic_store(&remote_tsd->state, tsd_state_nominal_recompute, + ATOMIC_RELAXED); + } + malloc_mutex_unlock(tsdn, &tsd_nominal_tsds_lock); +} + +void +tsd_global_slow_inc(tsdn_t *tsdn) { + atomic_fetch_add_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED); + /* + * We unconditionally force a recompute, even if the global slow count + * was already positive. If we didn't, then it would be possible for us + * to return to the user, have the user synchronize externally with some + * other thread, and then have that other thread not have picked up the + * update yet (since the original incrementing thread might still be + * making its way through the tsd list). + */ + tsd_force_recompute(tsdn); +} + +void tsd_global_slow_dec(tsdn_t *tsdn) { + atomic_fetch_sub_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED); + /* See the note in ..._inc(). */ + tsd_force_recompute(tsdn); +} + +static bool +tsd_local_slow(tsd_t *tsd) { + return !tsd_tcache_enabled_get(tsd) + || tsd_reentrancy_level_get(tsd) > 0; +} + +bool +tsd_global_slow() { + return atomic_load_u32(&tsd_global_slow_count, ATOMIC_RELAXED) > 0; +} + +/******************************************************************************/ + +static uint8_t +tsd_state_compute(tsd_t *tsd) { + if (!tsd_nominal(tsd)) { + return tsd_state_get(tsd); + } + /* We're in *a* nominal state; but which one? */ + if (malloc_slow || tsd_local_slow(tsd) || tsd_global_slow()) { + return tsd_state_nominal_slow; + } else { + return tsd_state_nominal; + } +} + void tsd_slow_update(tsd_t *tsd) { - if (tsd_nominal(tsd)) { - if (malloc_slow || !tsd_tcache_enabled_get(tsd) || - tsd_reentrancy_level_get(tsd) > 0) { - tsd->state = tsd_state_nominal_slow; + uint8_t old_state; + do { + uint8_t new_state = tsd_state_compute(tsd); + old_state = tsd_atomic_exchange(&tsd->state, new_state, + ATOMIC_ACQUIRE); + } while (old_state == tsd_state_nominal_recompute); +} + +void +tsd_state_set(tsd_t *tsd, uint8_t new_state) { + /* Only the tsd module can change the state *to* recompute. */ + assert(new_state != tsd_state_nominal_recompute); + uint8_t old_state = tsd_atomic_load(&tsd->state, ATOMIC_RELAXED); + if (old_state > tsd_state_nominal_max) { + /* + * Not currently in the nominal list, but it might need to be + * inserted there. + */ + assert(!tsd_in_nominal_list(tsd)); + tsd_atomic_store(&tsd->state, new_state, ATOMIC_RELAXED); + if (new_state <= tsd_state_nominal_max) { + tsd_add_nominal(tsd); + } + } else { + /* + * We're currently nominal. If the new state is non-nominal, + * great; we take ourselves off the list and just enter the new + * state. + */ + assert(tsd_in_nominal_list(tsd)); + if (new_state > tsd_state_nominal_max) { + tsd_remove_nominal(tsd); + tsd_atomic_store(&tsd->state, new_state, + ATOMIC_RELAXED); } else { - tsd->state = tsd_state_nominal; + /* + * This is the tricky case. We're transitioning from + * one nominal state to another. The caller can't know + * about any races that are occuring at the same time, + * so we always have to recompute no matter what. + */ + tsd_slow_update(tsd); } } } @@ -71,12 +223,23 @@ tsd_data_init(tsd_t *tsd) { */ rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd)); + /* + * A nondeterministic seed based on the address of tsd reduces + * the likelihood of lockstep non-uniform cache index + * utilization among identical concurrent processes, but at the + * cost of test repeatability. For debug builds, instead use a + * deterministic seed. + */ + *tsd_offset_statep_get(tsd) = config_debug ? 0 : + (uint64_t)(uintptr_t)tsd; + return tsd_tcache_enabled_data_init(tsd); } static void assert_tsd_data_cleanup_done(tsd_t *tsd) { assert(!tsd_nominal(tsd)); + assert(!tsd_in_nominal_list(tsd)); assert(*tsd_arenap_get_unsafe(tsd) == NULL); assert(*tsd_iarenap_get_unsafe(tsd) == NULL); assert(*tsd_arenas_tdata_bypassp_get_unsafe(tsd) == true); @@ -87,8 +250,8 @@ assert_tsd_data_cleanup_done(tsd_t *tsd) { static bool tsd_data_init_nocleanup(tsd_t *tsd) { - assert(tsd->state == tsd_state_reincarnated || - tsd->state == tsd_state_minimal_initialized); + assert(tsd_state_get(tsd) == tsd_state_reincarnated || + tsd_state_get(tsd) == tsd_state_minimal_initialized); /* * During reincarnation, there is no guarantee that the cleanup function * will be called (deallocation may happen after all tsd destructors). @@ -107,27 +270,33 @@ tsd_t * tsd_fetch_slow(tsd_t *tsd, bool minimal) { assert(!tsd_fast(tsd)); - if (tsd->state == tsd_state_nominal_slow) { - /* On slow path but no work needed. */ - assert(malloc_slow || !tsd_tcache_enabled_get(tsd) || - tsd_reentrancy_level_get(tsd) > 0 || - *tsd_arenas_tdata_bypassp_get(tsd)); - } else if (tsd->state == tsd_state_uninitialized) { + if (tsd_state_get(tsd) == tsd_state_nominal_slow) { + /* + * On slow path but no work needed. Note that we can't + * necessarily *assert* that we're slow, because we might be + * slow because of an asynchronous modification to global state, + * which might be asynchronously modified *back*. + */ + } else if (tsd_state_get(tsd) == tsd_state_nominal_recompute) { + tsd_slow_update(tsd); + } else if (tsd_state_get(tsd) == tsd_state_uninitialized) { if (!minimal) { - tsd->state = tsd_state_nominal; - tsd_slow_update(tsd); - /* Trigger cleanup handler registration. */ - tsd_set(tsd); - tsd_data_init(tsd); + if (tsd_booted) { + tsd_state_set(tsd, tsd_state_nominal); + tsd_slow_update(tsd); + /* Trigger cleanup handler registration. */ + tsd_set(tsd); + tsd_data_init(tsd); + } } else { - tsd->state = tsd_state_minimal_initialized; + tsd_state_set(tsd, tsd_state_minimal_initialized); tsd_set(tsd); tsd_data_init_nocleanup(tsd); } - } else if (tsd->state == tsd_state_minimal_initialized) { + } else if (tsd_state_get(tsd) == tsd_state_minimal_initialized) { if (!minimal) { /* Switch to fully initialized. */ - tsd->state = tsd_state_nominal; + tsd_state_set(tsd, tsd_state_nominal); assert(*tsd_reentrancy_levelp_get(tsd) >= 1); (*tsd_reentrancy_levelp_get(tsd))--; tsd_slow_update(tsd); @@ -135,12 +304,12 @@ tsd_fetch_slow(tsd_t *tsd, bool minimal) { } else { assert_tsd_data_cleanup_done(tsd); } - } else if (tsd->state == tsd_state_purgatory) { - tsd->state = tsd_state_reincarnated; + } else if (tsd_state_get(tsd) == tsd_state_purgatory) { + tsd_state_set(tsd, tsd_state_reincarnated); tsd_set(tsd); tsd_data_init_nocleanup(tsd); } else { - assert(tsd->state == tsd_state_reincarnated); + assert(tsd_state_get(tsd) == tsd_state_reincarnated); } return tsd; @@ -204,7 +373,7 @@ void tsd_cleanup(void *arg) { tsd_t *tsd = (tsd_t *)arg; - switch (tsd->state) { + switch (tsd_state_get(tsd)) { case tsd_state_uninitialized: /* Do nothing. */ break; @@ -222,7 +391,7 @@ tsd_cleanup(void *arg) { case tsd_state_nominal: case tsd_state_nominal_slow: tsd_do_data_cleanup(tsd); - tsd->state = tsd_state_purgatory; + tsd_state_set(tsd, tsd_state_purgatory); tsd_set(tsd); break; case tsd_state_purgatory: @@ -250,6 +419,10 @@ malloc_tsd_boot0(void) { tsd_t *tsd; ncleanups = 0; + if (malloc_mutex_init(&tsd_nominal_tsds_lock, "tsd_nominal_tsds_lock", + WITNESS_RANK_OMIT, malloc_mutex_rank_exclusive)) { + return NULL; + } if (tsd_boot0()) { return NULL; } @@ -300,7 +473,7 @@ _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) { # pragma comment(linker, "/INCLUDE:_tls_callback") # else # pragma comment(linker, "/INCLUDE:_tls_used") -# pragma comment(linker, "/INCLUDE:tls_callback") +# pragma comment(linker, "/INCLUDE:" STRINGIFY(tls_callback) ) # endif # pragma section(".CRT$XLY",long,read) #endif @@ -339,3 +512,23 @@ tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block) { malloc_mutex_unlock(TSDN_NULL, &head->lock); } #endif + +void +tsd_prefork(tsd_t *tsd) { + malloc_mutex_prefork(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); +} + +void +tsd_postfork_parent(tsd_t *tsd) { + malloc_mutex_postfork_parent(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); +} + +void +tsd_postfork_child(tsd_t *tsd) { + malloc_mutex_postfork_child(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); + ql_new(&tsd_nominal_tsds); + + if (tsd_state_get(tsd) <= tsd_state_nominal_max) { + tsd_add_nominal(tsd); + } +} diff --git a/deps/jemalloc/src/zone.c b/deps/jemalloc/src/zone.c index 9d3b7b4952..23dfdd04a9 100644 --- a/deps/jemalloc/src/zone.c +++ b/deps/jemalloc/src/zone.c @@ -89,6 +89,7 @@ JEMALLOC_ATTR(weak_import); static malloc_zone_t *default_zone, *purgeable_zone; static malloc_zone_t jemalloc_zone; static struct malloc_introspection_t jemalloc_zone_introspect; +static pid_t zone_force_lock_pid = -1; /******************************************************************************/ /* Function prototypes for non-inline static functions. */ @@ -270,6 +271,12 @@ zone_log(malloc_zone_t *zone, void *address) { static void zone_force_lock(malloc_zone_t *zone) { if (isthreaded) { + /* + * See the note in zone_force_unlock, below, to see why we need + * this. + */ + assert(zone_force_lock_pid == -1); + zone_force_lock_pid = getpid(); jemalloc_prefork(); } } @@ -277,14 +284,25 @@ zone_force_lock(malloc_zone_t *zone) { static void zone_force_unlock(malloc_zone_t *zone) { /* - * Call jemalloc_postfork_child() rather than - * jemalloc_postfork_parent(), because this function is executed by both - * parent and child. The parent can tolerate having state - * reinitialized, but the child cannot unlock mutexes that were locked - * by the parent. + * zone_force_lock and zone_force_unlock are the entry points to the + * forking machinery on OS X. The tricky thing is, the child is not + * allowed to unlock mutexes locked in the parent, even if owned by the + * forking thread (and the mutex type we use in OS X will fail an assert + * if we try). In the child, we can get away with reinitializing all + * the mutexes, which has the effect of unlocking them. In the parent, + * doing this would mean we wouldn't wake any waiters blocked on the + * mutexes we unlock. So, we record the pid of the current thread in + * zone_force_lock, and use that to detect if we're in the parent or + * child here, to decide which unlock logic we need. */ if (isthreaded) { - jemalloc_postfork_child(); + assert(zone_force_lock_pid != -1); + if (getpid() == zone_force_lock_pid) { + jemalloc_postfork_parent(); + } else { + jemalloc_postfork_child(); + } + zone_force_lock_pid = -1; } } |