diff options
author | jackpoz <giacomopoz@gmail.com> | 2017-11-19 11:23:41 +0100 |
---|---|---|
committer | funjoker <funjoker109@gmail.com> | 2021-02-15 19:13:25 +0100 |
commit | 367e9f210eb5ec852458f65ec967497d919afd7a (patch) | |
tree | f8a51b3d5260fbf3e7e941397ad49fa735951991 /dep/jemalloc | |
parent | a9edd9dc47afc56ee0a4b8e9f2be2823e861903f (diff) |
Dep/Jemalloc: Update to Jemalloc 4.0.4
(cherry picked from commit cc6dec72863a771da0c0f3ab3d32f75d7ce863bd)
Diffstat (limited to 'dep/jemalloc')
62 files changed, 13249 insertions, 6728 deletions
diff --git a/dep/jemalloc/CMakeLists.txt b/dep/jemalloc/CMakeLists.txt index dace3662c68..e049468888d 100644 --- a/dep/jemalloc/CMakeLists.txt +++ b/dep/jemalloc/CMakeLists.txt @@ -20,8 +20,8 @@ if(CMAKE_SYSTEM_NAME MATCHES "Linux" AND NOT NOJEM) # Create the header, so we can use it configure_file( - "${CMAKE_SOURCE_DIR}/dep/jemalloc/jemalloc_defs.h.in.cmake" - "${BUILDDIR}/jemalloc_defs.h" + "${CMAKE_SOURCE_DIR}/dep/jemalloc/jemalloc_internal_defs.h.in.cmake" + "${BUILDDIR}/jemalloc_internal_defs.h" @ONLY ) @@ -42,6 +42,7 @@ if(CMAKE_SYSTEM_NAME MATCHES "Linux" AND NOT NOJEM) ${CMAKE_CURRENT_SOURCE_DIR}/src/jemalloc.c ${CMAKE_CURRENT_SOURCE_DIR}/src/mb.c ${CMAKE_CURRENT_SOURCE_DIR}/src/mutex.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/pages.c ${CMAKE_CURRENT_SOURCE_DIR}/src/prof.c ${CMAKE_CURRENT_SOURCE_DIR}/src/quarantine.c ${CMAKE_CURRENT_SOURCE_DIR}/src/rtree.c diff --git a/dep/jemalloc/COPYING b/dep/jemalloc/COPYING index bdda0feb9e5..611968cda50 100644 --- a/dep/jemalloc/COPYING +++ b/dep/jemalloc/COPYING @@ -1,10 +1,10 @@ Unless otherwise specified, files in the jemalloc source distribution are subject to the following license: -------------------------------------------------------------------------------- -Copyright (C) 2002-2014 Jason Evans <jasone@canonware.com>. +Copyright (C) 2002-2015 Jason Evans <jasone@canonware.com>. All rights reserved. Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. -Copyright (C) 2009-2014 Facebook, Inc. All rights reserved. +Copyright (C) 2009-2015 Facebook, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/dep/jemalloc/ChangeLog b/dep/jemalloc/ChangeLog index d56ee999e69..8ed42cbeadb 100644 --- a/dep/jemalloc/ChangeLog +++ b/dep/jemalloc/ChangeLog @@ -1,10 +1,262 @@ Following are change highlights associated with official releases. Important -bug fixes are all mentioned, but internal enhancements are omitted here for -brevity (even though they are more fun to write about). Much more detail can be -found in the git revision history: +bug fixes are all mentioned, but some internal enhancements are omitted here for +brevity. Much more detail can be found in the git revision history: https://github.com/jemalloc/jemalloc +* 4.0.4 (October 24, 2015) + + This bugfix release fixes another xallocx() regression. No other regressions + have come to light in over a month, so this is likely a good starting point + for people who prefer to wait for "dot one" releases with all the major issues + shaken out. + + Bug fixes: + - Fix xallocx(..., MALLOCX_ZERO to zero the last full trailing page of large + allocations that have been randomly assigned an offset of 0 when + --enable-cache-oblivious configure option is enabled. + +* 4.0.3 (September 24, 2015) + + This bugfix release continues the trend of xallocx() and heap profiling fixes. + + Bug fixes: + - Fix xallocx(..., MALLOCX_ZERO) to zero all trailing bytes of large + allocations when --enable-cache-oblivious configure option is enabled. + - Fix xallocx(..., MALLOCX_ZERO) to zero trailing bytes of huge allocations + when resizing from/to a size class that is not a multiple of the chunk size. + - Fix prof_tctx_dump_iter() to filter out nodes that were created after heap + profile dumping started. + - Work around a potentially bad thread-specific data initialization + interaction with NPTL (glibc's pthreads implementation). + +* 4.0.2 (September 21, 2015) + + This bugfix release addresses a few bugs specific to heap profiling. + + Bug fixes: + - Fix ixallocx_prof_sample() to never modify nor create sampled small + allocations. xallocx() is in general incapable of moving small allocations, + so this fix removes buggy code without loss of generality. + - Fix irallocx_prof_sample() to always allocate large regions, even when + alignment is non-zero. + - Fix prof_alloc_rollback() to read tdata from thread-specific data rather + than dereferencing a potentially invalid tctx. + +* 4.0.1 (September 15, 2015) + + This is a bugfix release that is somewhat high risk due to the amount of + refactoring required to address deep xallocx() problems. As a side effect of + these fixes, xallocx() now tries harder to partially fulfill requests for + optional extra space. Note that a couple of minor heap profiling + optimizations are included, but these are better thought of as performance + fixes that were integral to disovering most of the other bugs. + + Optimizations: + - Avoid a chunk metadata read in arena_prof_tctx_set(), since it is in the + fast path when heap profiling is enabled. Additionally, split a special + case out into arena_prof_tctx_reset(), which also avoids chunk metadata + reads. + - Optimize irallocx_prof() to optimistically update the sampler state. The + prior implementation appears to have been a holdover from when + rallocx()/xallocx() functionality was combined as rallocm(). + + Bug fixes: + - Fix TLS configuration such that it is enabled by default for platforms on + which it works correctly. + - Fix arenas_cache_cleanup() and arena_get_hard() to handle + allocation/deallocation within the application's thread-specific data + cleanup functions even after arenas_cache is torn down. + - Fix xallocx() bugs related to size+extra exceeding HUGE_MAXCLASS. + - Fix chunk purge hook calls for in-place huge shrinking reallocation to + specify the old chunk size rather than the new chunk size. This bug caused + no correctness issues for the default chunk purge function, but was + visible to custom functions set via the "arena.<i>.chunk_hooks" mallctl. + - Fix heap profiling bugs: + + Fix heap profiling to distinguish among otherwise identical sample sites + with interposed resets (triggered via the "prof.reset" mallctl). This bug + could cause data structure corruption that would most likely result in a + segfault. + + Fix irealloc_prof() to prof_alloc_rollback() on OOM. + + Make one call to prof_active_get_unlocked() per allocation event, and use + the result throughout the relevant functions that handle an allocation + event. Also add a missing check in prof_realloc(). These fixes protect + allocation events against concurrent prof_active changes. + + Fix ixallocx_prof() to pass usize_max and zero to ixallocx_prof_sample() + in the correct order. + + Fix prof_realloc() to call prof_free_sampled_object() after calling + prof_malloc_sample_object(). Prior to this fix, if tctx and old_tctx were + the same, the tctx could have been prematurely destroyed. + - Fix portability bugs: + + Don't bitshift by negative amounts when encoding/decoding run sizes in + chunk header maps. This affected systems with page sizes greater than 8 + KiB. + + Rename index_t to szind_t to avoid an existing type on Solaris. + + Add JEMALLOC_CXX_THROW to the memalign() function prototype, in order to + match glibc and avoid compilation errors when including both + jemalloc/jemalloc.h and malloc.h in C++ code. + + Don't assume that /bin/sh is appropriate when running size_classes.sh + during configuration. + + Consider __sparcv9 a synonym for __sparc64__ when defining LG_QUANTUM. + + Link tests to librt if it contains clock_gettime(2). + +* 4.0.0 (August 17, 2015) + + This version contains many speed and space optimizations, both minor and + major. The major themes are generalization, unification, and simplification. + Although many of these optimizations cause no visible behavior change, their + cumulative effect is substantial. + + New features: + - Normalize size class spacing to be consistent across the complete size + range. By default there are four size classes per size doubling, but this + is now configurable via the --with-lg-size-class-group option. Also add the + --with-lg-page, --with-lg-page-sizes, --with-lg-quantum, and + --with-lg-tiny-min options, which can be used to tweak page and size class + settings. Impacts: + + Worst case performance for incrementally growing/shrinking reallocation + is improved because there are far fewer size classes, and therefore + copying happens less often. + + Internal fragmentation is limited to 20% for all but the smallest size + classes (those less than four times the quantum). (1B + 4 KiB) + and (1B + 4 MiB) previously suffered nearly 50% internal fragmentation. + + Chunk fragmentation tends to be lower because there are fewer distinct run + sizes to pack. + - Add support for explicit tcaches. The "tcache.create", "tcache.flush", and + "tcache.destroy" mallctls control tcache lifetime and flushing, and the + MALLOCX_TCACHE(tc) and MALLOCX_TCACHE_NONE flags to the *allocx() API + control which tcache is used for each operation. + - Implement per thread heap profiling, as well as the ability to + enable/disable heap profiling on a per thread basis. Add the "prof.reset", + "prof.lg_sample", "thread.prof.name", "thread.prof.active", + "opt.prof_thread_active_init", "prof.thread_active_init", and + "thread.prof.active" mallctls. + - Add support for per arena application-specified chunk allocators, configured + via the "arena.<i>.chunk_hooks" mallctl. + - Refactor huge allocation to be managed by arenas, so that arenas now + function as general purpose independent allocators. This is important in + the context of user-specified chunk allocators, aside from the scalability + benefits. Related new statistics: + + The "stats.arenas.<i>.huge.allocated", "stats.arenas.<i>.huge.nmalloc", + "stats.arenas.<i>.huge.ndalloc", and "stats.arenas.<i>.huge.nrequests" + mallctls provide high level per arena huge allocation statistics. + + The "arenas.nhchunks", "arenas.hchunk.<i>.size", + "stats.arenas.<i>.hchunks.<j>.nmalloc", + "stats.arenas.<i>.hchunks.<j>.ndalloc", + "stats.arenas.<i>.hchunks.<j>.nrequests", and + "stats.arenas.<i>.hchunks.<j>.curhchunks" mallctls provide per size class + statistics. + - Add the 'util' column to malloc_stats_print() output, which reports the + proportion of available regions that are currently in use for each small + size class. + - Add "alloc" and "free" modes for for junk filling (see the "opt.junk" + mallctl), so that it is possible to separately enable junk filling for + allocation versus deallocation. + - Add the jemalloc-config script, which provides information about how + jemalloc was configured, and how to integrate it into application builds. + - Add metadata statistics, which are accessible via the "stats.metadata", + "stats.arenas.<i>.metadata.mapped", and + "stats.arenas.<i>.metadata.allocated" mallctls. + - Add the "stats.resident" mallctl, which reports the upper limit of + physically resident memory mapped by the allocator. + - Add per arena control over unused dirty page purging, via the + "arenas.lg_dirty_mult", "arena.<i>.lg_dirty_mult", and + "stats.arenas.<i>.lg_dirty_mult" mallctls. + - Add the "prof.gdump" mallctl, which makes it possible to toggle the gdump + feature on/off during program execution. + - Add sdallocx(), which implements sized deallocation. The primary + optimization over dallocx() is the removal of a metadata read, which often + suffers an L1 cache miss. + - Add missing header includes in jemalloc/jemalloc.h, so that applications + only have to #include <jemalloc/jemalloc.h>. + - Add support for additional platforms: + + Bitrig + + Cygwin + + DragonFlyBSD + + iOS + + OpenBSD + + OpenRISC/or1k + + Optimizations: + - Maintain dirty runs in per arena LRUs rather than in per arena trees of + dirty-run-containing chunks. In practice this change significantly reduces + dirty page purging volume. + - Integrate whole chunks into the unused dirty page purging machinery. This + reduces the cost of repeated huge allocation/deallocation, because it + effectively introduces a cache of chunks. + - Split the arena chunk map into two separate arrays, in order to increase + cache locality for the frequently accessed bits. + - Move small run metadata out of runs, into arena chunk headers. This reduces + run fragmentation, smaller runs reduce external fragmentation for small size + classes, and packed (less uniformly aligned) metadata layout improves CPU + cache set distribution. + - Randomly distribute large allocation base pointer alignment relative to page + boundaries in order to more uniformly utilize CPU cache sets. This can be + disabled via the --disable-cache-oblivious configure option, and queried via + the "config.cache_oblivious" mallctl. + - Micro-optimize the fast paths for the public API functions. + - Refactor thread-specific data to reside in a single structure. This assures + that only a single TLS read is necessary per call into the public API. + - Implement in-place huge allocation growing and shrinking. + - Refactor rtree (radix tree for chunk lookups) to be lock-free, and make + additional optimizations that reduce maximum lookup depth to one or two + levels. This resolves what was a concurrency bottleneck for per arena huge + allocation, because a global data structure is critical for determining + which arenas own which huge allocations. + + Incompatible changes: + - Replace --enable-cc-silence with --disable-cc-silence to suppress spurious + warnings by default. + - Assure that the constness of malloc_usable_size()'s return type matches that + of the system implementation. + - Change the heap profile dump format to support per thread heap profiling, + rename pprof to jeprof, and enhance it with the --thread=<n> option. As a + result, the bundled jeprof must now be used rather than the upstream + (gperftools) pprof. + - Disable "opt.prof_final" by default, in order to avoid atexit(3), which can + internally deadlock on some platforms. + - Change the "arenas.nlruns" mallctl type from size_t to unsigned. + - Replace the "stats.arenas.<i>.bins.<j>.allocated" mallctl with + "stats.arenas.<i>.bins.<j>.curregs". + - Ignore MALLOC_CONF in set{uid,gid,cap} binaries. + - Ignore MALLOCX_ARENA(a) in dallocx(), in favor of using the + MALLOCX_TCACHE(tc) and MALLOCX_TCACHE_NONE flags to control tcache usage. + + Removed features: + - Remove the *allocm() API, which is superseded by the *allocx() API. + - Remove the --enable-dss options, and make dss non-optional on all platforms + which support sbrk(2). + - Remove the "arenas.purge" mallctl, which was obsoleted by the + "arena.<i>.purge" mallctl in 3.1.0. + - Remove the unnecessary "opt.valgrind" mallctl; jemalloc automatically + detects whether it is running inside Valgrind. + - Remove the "stats.huge.allocated", "stats.huge.nmalloc", and + "stats.huge.ndalloc" mallctls. + - Remove the --enable-mremap option. + - Remove the "stats.chunks.current", "stats.chunks.total", and + "stats.chunks.high" mallctls. + + Bug fixes: + - Fix the cactive statistic to decrease (rather than increase) when active + memory decreases. This regression was first released in 3.5.0. + - Fix OOM handling in memalign() and valloc(). A variant of this bug existed + in all releases since 2.0.0, which introduced these functions. + - Fix an OOM-related regression in arena_tcache_fill_small(), which could + cause cache corruption on OOM. This regression was present in all releases + from 2.2.0 through 3.6.0. + - Fix size class overflow handling for malloc(), posix_memalign(), memalign(), + calloc(), and realloc() when profiling is enabled. + - Fix the "arena.<i>.dss" mallctl to return an error if "primary" or + "secondary" precedence is specified, but sbrk(2) is not supported. + - Fix fallback lg_floor() implementations to handle extremely large inputs. + - Ensure the default purgeable zone is after the default zone on OS X. + - Fix latent bugs in atomic_*(). + - Fix the "arena.<i>.dss" mallctl to handle read-only calls. + - Fix tls_model configuration to enable the initial-exec model when possible. + - Mark malloc_conf as a weak symbol so that the application can override it. + - Correctly detect glibc's adaptive pthread mutexes. + - Fix the --without-export configure option. + * 3.6.0 (March 31, 2014) This version contains a critical bug fix for a regression present in 3.5.0 and @@ -21,7 +273,7 @@ found in the git revision history: backtracing to be reliable. - Use dss allocation precedence for huge allocations as well as small/large allocations. - - Fix test assertion failure message formatting. This bug did not manifect on + - Fix test assertion failure message formatting. This bug did not manifest on x86_64 systems because of implementation subtleties in va_list. - Fix inconsequential test failures for hash and SFMT code. @@ -516,7 +768,7 @@ found in the git revision history: - Make it possible for the application to manually flush a thread's cache, via the "tcache.flush" mallctl. - Base maximum dirty page count on proportion of active memory. - - Compute various addtional run-time statistics, including per size class + - Compute various additional run-time statistics, including per size class statistics for large objects. - Expose malloc_stats_print(), which can be called repeatedly by the application. diff --git a/dep/jemalloc/include/jemalloc/internal/arena.h b/dep/jemalloc/include/jemalloc/internal/arena.h index 9d000c03dec..12c617979f9 100644 --- a/dep/jemalloc/include/jemalloc/internal/arena.h +++ b/dep/jemalloc/include/jemalloc/internal/arena.h @@ -1,30 +1,10 @@ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES -/* - * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized - * as small as possible such that this setting is still honored, without - * violating other constraints. The goal is to make runs as small as possible - * without exceeding a per run external fragmentation threshold. - * - * We use binary fixed point math for overhead computations, where the binary - * point is implicitly RUN_BFP bits to the left. - * - * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be - * honored for some/all object sizes, since when heap profiling is enabled - * there is one pointer of header overhead per object (plus a constant). This - * constraint is relaxed (ignored) for runs that are so small that the - * per-region overhead is greater than: - * - * (RUN_MAX_OVRHD / (reg_interval << (3+RUN_BFP)) - */ -#define RUN_BFP 12 -/* \/ Implicit binary fixed point. */ -#define RUN_MAX_OVRHD 0x0000003dU -#define RUN_MAX_OVRHD_RELAX 0x00001800U +#define LARGE_MINCLASS (ZU(1) << LG_LARGE_MINCLASS) /* Maximum number of regions in one run. */ -#define LG_RUN_MAXREGS 11 +#define LG_RUN_MAXREGS (LG_PAGE - LG_TINY_MIN) #define RUN_MAXREGS (1U << LG_RUN_MAXREGS) /* @@ -36,16 +16,18 @@ /* * The minimum ratio of active:dirty pages per arena is computed as: * - * (nactive >> opt_lg_dirty_mult) >= ndirty + * (nactive >> lg_dirty_mult) >= ndirty * - * So, supposing that opt_lg_dirty_mult is 3, there can be no less than 8 times - * as many active pages as dirty pages. + * So, supposing that lg_dirty_mult is 3, there can be no less than 8 times as + * many active pages as dirty pages. */ #define LG_DIRTY_MULT_DEFAULT 3 -typedef struct arena_chunk_map_s arena_chunk_map_t; -typedef struct arena_chunk_s arena_chunk_t; +typedef struct arena_runs_dirty_link_s arena_runs_dirty_link_t; typedef struct arena_run_s arena_run_t; +typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t; +typedef struct arena_chunk_map_misc_s arena_chunk_map_misc_t; +typedef struct arena_chunk_s arena_chunk_t; typedef struct arena_bin_info_s arena_bin_info_t; typedef struct arena_bin_s arena_bin_t; typedef struct arena_s arena_t; @@ -54,54 +36,34 @@ typedef struct arena_s arena_t; /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS -/* Each element of the chunk map corresponds to one page within the chunk. */ -struct arena_chunk_map_s { -#ifndef JEMALLOC_PROF - /* - * Overlay prof_ctx in order to allow it to be referenced by dead code. - * Such antics aren't warranted for per arena data structures, but - * chunk map overhead accounts for a percentage of memory, rather than - * being just a fixed cost. - */ - union { -#endif - union { - /* - * Linkage for run trees. There are two disjoint uses: - * - * 1) arena_t's runs_avail tree. - * 2) arena_run_t conceptually uses this linkage for in-use - * non-full runs, rather than directly embedding linkage. - */ - rb_node(arena_chunk_map_t) rb_link; - /* - * List of runs currently in purgatory. arena_chunk_purge() - * temporarily allocates runs that contain dirty pages while - * purging, so that other threads cannot use the runs while the - * purging thread is operating without the arena lock held. - */ - ql_elm(arena_chunk_map_t) ql_link; - } u; +#ifdef JEMALLOC_ARENA_STRUCTS_A +struct arena_run_s { + /* Index of bin this run is associated with. */ + szind_t binind; - /* Profile counters, used for large object runs. */ - prof_ctx_t *prof_ctx; -#ifndef JEMALLOC_PROF - }; /* union { ... }; */ -#endif + /* Number of free regions in run. */ + unsigned nfree; + /* Per region allocated/deallocated bitmap. */ + bitmap_t bitmap[BITMAP_GROUPS_MAX]; +}; + +/* Each element of the chunk map corresponds to one page within the chunk. */ +struct arena_chunk_map_bits_s { /* * Run address (or size) and various flags are stored together. The bit * layout looks like (assuming 32-bit system): * - * ???????? ???????? ????nnnn nnnndula + * ???????? ???????? ???nnnnn nnndumla * * ? : Unallocated: Run address for first/last pages, unset for internal * pages. * Small: Run page offset. - * Large: Run size for first page, unset for trailing pages. + * Large: Run page count for first page, unset for trailing pages. * n : binind for small size class, BININD_INVALID for large size class. * d : dirty? * u : unzeroed? + * m : decommitted? * l : large? * a : allocated? * @@ -110,78 +72,109 @@ struct arena_chunk_map_s { * p : run page offset * s : run size * n : binind for size class; large objects set these to BININD_INVALID - * except for promoted allocations (see prof_promote) * x : don't care * - : 0 * + : 1 - * [DULA] : bit set - * [dula] : bit unset + * [DUMLA] : bit set + * [dumla] : bit unset * * Unallocated (clean): - * ssssssss ssssssss ssss++++ ++++du-a - * xxxxxxxx xxxxxxxx xxxxxxxx xxxx-Uxx - * ssssssss ssssssss ssss++++ ++++dU-a + * ssssssss ssssssss sss+++++ +++dum-a + * xxxxxxxx xxxxxxxx xxxxxxxx xxx-Uxxx + * ssssssss ssssssss sss+++++ +++dUm-a * * Unallocated (dirty): - * ssssssss ssssssss ssss++++ ++++D--a + * ssssssss ssssssss sss+++++ +++D-m-a * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx - * ssssssss ssssssss ssss++++ ++++D--a + * ssssssss ssssssss sss+++++ +++D-m-a * * Small: - * pppppppp pppppppp ppppnnnn nnnnd--A - * pppppppp pppppppp ppppnnnn nnnn---A - * pppppppp pppppppp ppppnnnn nnnnd--A + * pppppppp pppppppp pppnnnnn nnnd---A + * pppppppp pppppppp pppnnnnn nnn----A + * pppppppp pppppppp pppnnnnn nnnd---A * * Large: - * ssssssss ssssssss ssss++++ ++++D-LA + * ssssssss ssssssss sss+++++ +++D--LA * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx - * -------- -------- ----++++ ++++D-LA + * -------- -------- ---+++++ +++D--LA * - * Large (sampled, size <= PAGE): - * ssssssss ssssssss ssssnnnn nnnnD-LA + * Large (sampled, size <= LARGE_MINCLASS): + * ssssssss ssssssss sssnnnnn nnnD--LA + * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx + * -------- -------- ---+++++ +++D--LA * - * Large (not sampled, size == PAGE): - * ssssssss ssssssss ssss++++ ++++D-LA + * Large (not sampled, size == LARGE_MINCLASS): + * ssssssss ssssssss sss+++++ +++D--LA + * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx + * -------- -------- ---+++++ +++D--LA */ size_t bits; -#define CHUNK_MAP_BININD_SHIFT 4 +#define CHUNK_MAP_ALLOCATED ((size_t)0x01U) +#define CHUNK_MAP_LARGE ((size_t)0x02U) +#define CHUNK_MAP_STATE_MASK ((size_t)0x3U) + +#define CHUNK_MAP_DECOMMITTED ((size_t)0x04U) +#define CHUNK_MAP_UNZEROED ((size_t)0x08U) +#define CHUNK_MAP_DIRTY ((size_t)0x10U) +#define CHUNK_MAP_FLAGS_MASK ((size_t)0x1cU) + +#define CHUNK_MAP_BININD_SHIFT 5 #define BININD_INVALID ((size_t)0xffU) -/* CHUNK_MAP_BININD_MASK == (BININD_INVALID << CHUNK_MAP_BININD_SHIFT) */ -#define CHUNK_MAP_BININD_MASK ((size_t)0xff0U) +#define CHUNK_MAP_BININD_MASK (BININD_INVALID << CHUNK_MAP_BININD_SHIFT) #define CHUNK_MAP_BININD_INVALID CHUNK_MAP_BININD_MASK -#define CHUNK_MAP_FLAGS_MASK ((size_t)0xcU) -#define CHUNK_MAP_DIRTY ((size_t)0x8U) -#define CHUNK_MAP_UNZEROED ((size_t)0x4U) -#define CHUNK_MAP_LARGE ((size_t)0x2U) -#define CHUNK_MAP_ALLOCATED ((size_t)0x1U) -#define CHUNK_MAP_KEY CHUNK_MAP_ALLOCATED + +#define CHUNK_MAP_RUNIND_SHIFT (CHUNK_MAP_BININD_SHIFT + 8) +#define CHUNK_MAP_SIZE_SHIFT (CHUNK_MAP_RUNIND_SHIFT - LG_PAGE) +#define CHUNK_MAP_SIZE_MASK \ + (~(CHUNK_MAP_BININD_MASK | CHUNK_MAP_FLAGS_MASK | CHUNK_MAP_STATE_MASK)) }; -typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t; -typedef rb_tree(arena_chunk_map_t) arena_run_tree_t; -typedef ql_head(arena_chunk_map_t) arena_chunk_mapelms_t; -/* Arena chunk header. */ -struct arena_chunk_s { - /* Arena that owns the chunk. */ - arena_t *arena; +struct arena_runs_dirty_link_s { + qr(arena_runs_dirty_link_t) rd_link; +}; - /* Linkage for tree of arena chunks that contain dirty runs. */ - rb_node(arena_chunk_t) dirty_link; +/* + * Each arena_chunk_map_misc_t corresponds to one page within the chunk, just + * like arena_chunk_map_bits_t. Two separate arrays are stored within each + * chunk header in order to improve cache locality. + */ +struct arena_chunk_map_misc_s { + /* + * Linkage for run trees. There are two disjoint uses: + * + * 1) arena_t's runs_avail tree. + * 2) arena_run_t conceptually uses this linkage for in-use non-full + * runs, rather than directly embedding linkage. + */ + rb_node(arena_chunk_map_misc_t) rb_link; - /* Number of dirty pages. */ - size_t ndirty; + union { + /* Linkage for list of dirty runs. */ + arena_runs_dirty_link_t rd; - /* Number of available runs. */ - size_t nruns_avail; + /* Profile counters, used for large object runs. */ + union { + void *prof_tctx_pun; + prof_tctx_t *prof_tctx; + }; + /* Small region run metadata. */ + arena_run_t run; + }; +}; +typedef rb_tree(arena_chunk_map_misc_t) arena_avail_tree_t; +typedef rb_tree(arena_chunk_map_misc_t) arena_run_tree_t; +#endif /* JEMALLOC_ARENA_STRUCTS_A */ + +#ifdef JEMALLOC_ARENA_STRUCTS_B +/* Arena chunk header. */ +struct arena_chunk_s { /* - * Number of available run adjacencies that purging could coalesce. - * Clean and dirty available runs are not coalesced, which causes - * virtual memory fragmentation. The ratio of - * (nruns_avail-nruns_adjac):nruns_adjac is used for tracking this - * fragmentation. + * A pointer to the arena that owns the chunk is stored within the node. + * This field as a whole is used by chunks_rtree to support both + * ivsalloc() and core-based debugging. */ - size_t nruns_adjac; + extent_node_t node; /* * Map of pages within chunk that keeps track of free/large/small. The @@ -189,19 +182,7 @@ struct arena_chunk_s { * need to be tracked in the map. This omission saves a header page * for common chunk sizes (e.g. 4 MiB). */ - arena_chunk_map_t map[1]; /* Dynamically sized. */ -}; -typedef rb_tree(arena_chunk_t) arena_chunk_tree_t; - -struct arena_run_s { - /* Bin this run is associated with. */ - arena_bin_t *bin; - - /* Index of next region that has never been allocated, or nregs. */ - uint32_t nextind; - - /* Number of free regions in run. */ - unsigned nfree; + arena_chunk_map_bits_t map_bits[1]; /* Dynamically sized. */ }; /* @@ -212,12 +193,7 @@ struct arena_run_s { * Each run has the following layout: * * /--------------------\ - * | arena_run_t header | - * | ... | - * bitmap_offset | bitmap | - * | ... | - * ctx0_offset | ctx map | - * | ... | + * | pad? | * |--------------------| * | redzone | * reg0_offset | region 0 | @@ -259,23 +235,11 @@ struct arena_bin_info_s { uint32_t nregs; /* - * Offset of first bitmap_t element in a run header for this bin's size - * class. - */ - uint32_t bitmap_offset; - - /* * Metadata used to manipulate bitmaps for runs associated with this * bin. */ bitmap_info_t bitmap_info; - /* - * Offset of first (prof_ctx_t *) in a run header for this bin's size - * class, or 0 if (config_prof == false || opt_prof == false). - */ - uint32_t ctx0_offset; - /* Offset of first region in a run for this bin's size class. */ uint32_t reg0_offset; }; @@ -321,8 +285,7 @@ struct arena_s { /* * There are three classes of arena operations from a locking * perspective: - * 1) Thread asssignment (modifies nthreads) is protected by - * arenas_lock. + * 1) Thread assignment (modifies nthreads) is protected by arenas_lock. * 2) Bin-related operations are protected by bin locks. * 3) Chunk- and run-related operations are protected by this mutex. */ @@ -331,16 +294,20 @@ struct arena_s { arena_stats_t stats; /* * List of tcaches for extant threads associated with this arena. - * Stats from these are merged incrementally, and at exit. + * Stats from these are merged incrementally, and at exit if + * opt_stats_print is enabled. */ ql_head(tcache_t) tcache_ql; uint64_t prof_accumbytes; - dss_prec_t dss_prec; + /* + * PRNG state for cache index randomization of large allocation base + * pointers. + */ + uint64_t offset_state; - /* Tree of dirty-page-containing chunks this arena manages. */ - arena_chunk_tree_t chunks_dirty; + dss_prec_t dss_prec; /* * In order to avoid rapid chunk allocation/deallocation when an arena @@ -354,7 +321,13 @@ struct arena_s { */ arena_chunk_t *spare; - /* Number of pages in active runs. */ + /* Minimum ratio (log base 2) of nactive:ndirty. */ + ssize_t lg_dirty_mult; + + /* True if a thread is currently executing arena_purge(). */ + bool purging; + + /* Number of pages in active runs and huge regions. */ size_t nactive; /* @@ -366,44 +339,116 @@ struct arena_s { size_t ndirty; /* - * Approximate number of pages being purged. It is possible for - * multiple threads to purge dirty pages concurrently, and they use - * npurgatory to indicate the total number of pages all threads are - * attempting to purge. + * Size/address-ordered tree of this arena's available runs. The tree + * is used for first-best-fit run allocation. */ - size_t npurgatory; + arena_avail_tree_t runs_avail; /* - * Size/address-ordered trees of this arena's available runs. The trees - * are used for first-best-fit run allocation. + * Unused dirty memory this arena manages. Dirty memory is conceptually + * tracked as an arbitrarily interleaved LRU of dirty runs and cached + * chunks, but the list linkage is actually semi-duplicated in order to + * avoid extra arena_chunk_map_misc_t space overhead. + * + * LRU-----------------------------------------------------------MRU + * + * /-- arena ---\ + * | | + * | | + * |------------| /- chunk -\ + * ...->|chunks_cache|<--------------------------->| /----\ |<--... + * |------------| | |node| | + * | | | | | | + * | | /- run -\ /- run -\ | | | | + * | | | | | | | | | | + * | | | | | | | | | | + * |------------| |-------| |-------| | |----| | + * ...->|runs_dirty |<-->|rd |<-->|rd |<---->|rd |<----... + * |------------| |-------| |-------| | |----| | + * | | | | | | | | | | + * | | | | | | | \----/ | + * | | \-------/ \-------/ | | + * | | | | + * | | | | + * \------------/ \---------/ */ - arena_avail_tree_t runs_avail; + arena_runs_dirty_link_t runs_dirty; + extent_node_t chunks_cache; + + /* Extant huge allocations. */ + ql_head(extent_node_t) huge; + /* Synchronizes all huge allocation/update/deallocation. */ + malloc_mutex_t huge_mtx; + + /* + * Trees of chunks that were previously allocated (trees differ only in + * node ordering). These are used when allocating chunks, in an attempt + * to re-use address space. Depending on function, different tree + * orderings are needed, which is why there are two trees with the same + * contents. + */ + extent_tree_t chunks_szad_cached; + extent_tree_t chunks_ad_cached; + extent_tree_t chunks_szad_retained; + extent_tree_t chunks_ad_retained; + + malloc_mutex_t chunks_mtx; + /* Cache of nodes that were allocated via base_alloc(). */ + ql_head(extent_node_t) node_cache; + malloc_mutex_t node_cache_mtx; + + /* User-configurable chunk hook functions. */ + chunk_hooks_t chunk_hooks; /* bins is used to store trees of free regions. */ arena_bin_t bins[NBINS]; }; +#endif /* JEMALLOC_ARENA_STRUCTS_B */ #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -extern ssize_t opt_lg_dirty_mult; -/* - * small_size2bin is a compact lookup table that rounds request sizes up to - * size classes. In order to reduce cache footprint, the table is compressed, - * and all accesses are via the SMALL_SIZE2BIN macro. - */ -extern uint8_t const small_size2bin[]; -#define SMALL_SIZE2BIN(s) (small_size2bin[(s-1) >> LG_TINY_MIN]) +static const size_t large_pad = +#ifdef JEMALLOC_CACHE_OBLIVIOUS + PAGE +#else + 0 +#endif + ; -extern arena_bin_info_t arena_bin_info[NBINS]; +extern ssize_t opt_lg_dirty_mult; -/* Number of large size classes. */ -#define nlclasses (chunk_npages - map_bias) +extern arena_bin_info_t arena_bin_info[NBINS]; +extern size_t map_bias; /* Number of arena chunk header pages. */ +extern size_t map_misc_offset; +extern size_t arena_maxrun; /* Max run size for arenas. */ +extern size_t large_maxclass; /* Max large size class. */ +extern unsigned nlclasses; /* Number of large size classes. */ +extern unsigned nhclasses; /* Number of huge size classes. */ + +void arena_chunk_cache_maybe_insert(arena_t *arena, extent_node_t *node, + bool cache); +void arena_chunk_cache_maybe_remove(arena_t *arena, extent_node_t *node, + bool cache); +extent_node_t *arena_node_alloc(arena_t *arena); +void arena_node_dalloc(arena_t *arena, extent_node_t *node); +void *arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment, + bool *zero); +void arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize); +void arena_chunk_ralloc_huge_similar(arena_t *arena, void *chunk, + size_t oldsize, size_t usize); +void arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk, + size_t oldsize, size_t usize); +bool arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, + size_t oldsize, size_t usize, bool *zero); +ssize_t arena_lg_dirty_mult_get(arena_t *arena); +bool arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult); +void arena_maybe_purge(arena_t *arena); void arena_purge_all(arena_t *arena); void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, - size_t binind, uint64_t prof_accumbytes); + szind_t binind, uint64_t prof_accumbytes); void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero); #ifdef JEMALLOC_JET @@ -418,19 +463,22 @@ void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); void arena_quarantine_junk_small(void *ptr, size_t usize); void *arena_malloc_small(arena_t *arena, size_t size, bool zero); void *arena_malloc_large(arena_t *arena, size_t size, bool zero); -void *arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero); +void *arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, + size_t alignment, bool zero, tcache_t *tcache); void arena_prof_promoted(const void *ptr, size_t size); -void arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, - arena_chunk_map_t *mapelm); +void arena_dalloc_bin_junked_locked(arena_t *arena, arena_chunk_t *chunk, + void *ptr, arena_chunk_map_bits_t *bitselm); void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, - size_t pageind, arena_chunk_map_t *mapelm); + size_t pageind, arena_chunk_map_bits_t *bitselm); void arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t pageind); #ifdef JEMALLOC_JET typedef void (arena_dalloc_junk_large_t)(void *, size_t); extern arena_dalloc_junk_large_t *arena_dalloc_junk_large; +#else +void arena_dalloc_junk_large(void *ptr, size_t usize); #endif -void arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, +void arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr); void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); #ifdef JEMALLOC_JET @@ -439,16 +487,18 @@ extern arena_ralloc_junk_large_t *arena_ralloc_junk_large; #endif bool arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); -void *arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, - size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, - bool try_tcache_dalloc); +void *arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, + size_t size, size_t alignment, bool zero, tcache_t *tcache); dss_prec_t arena_dss_prec_get(arena_t *arena); -void arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec); -void arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, - size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, - malloc_large_stats_t *lstats); -bool arena_new(arena_t *arena, unsigned ind); -void arena_boot(void); +bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec); +ssize_t arena_lg_dirty_mult_default_get(void); +bool arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult); +void arena_stats_merge(arena_t *arena, const char **dss, + ssize_t *lg_dirty_mult, size_t *nactive, size_t *ndirty, + arena_stats_t *astats, malloc_bin_stats_t *bstats, + malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats); +arena_t *arena_new(unsigned ind); +bool arena_boot(void); void arena_prefork(arena_t *arena); void arena_postfork_parent(arena_t *arena); void arena_postfork_child(arena_t *arena); @@ -458,64 +508,138 @@ void arena_postfork_child(arena_t *arena); #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -arena_chunk_map_t *arena_mapp_get(arena_chunk_t *chunk, size_t pageind); +arena_chunk_map_bits_t *arena_bitselm_get(arena_chunk_t *chunk, + size_t pageind); +arena_chunk_map_misc_t *arena_miscelm_get(arena_chunk_t *chunk, + size_t pageind); +size_t arena_miscelm_to_pageind(arena_chunk_map_misc_t *miscelm); +void *arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm); +arena_chunk_map_misc_t *arena_rd_to_miscelm(arena_runs_dirty_link_t *rd); +arena_chunk_map_misc_t *arena_run_to_miscelm(arena_run_t *run); size_t *arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbitsp_read(size_t *mapbitsp); size_t arena_mapbits_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_size_decode(size_t mapbits); size_t arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind); +szind_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_decommitted_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind); void arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits); +size_t arena_mapbits_size_encode(size_t size); void arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t flags); void arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, size_t size); +void arena_mapbits_internal_set(arena_chunk_t *chunk, size_t pageind, + size_t flags); void arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t flags); void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, - size_t binind); + szind_t binind); void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, - size_t runind, size_t binind, size_t flags); -void arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, - size_t unzeroed); + size_t runind, szind_t binind, size_t flags); +void arena_metadata_allocated_add(arena_t *arena, size_t size); +void arena_metadata_allocated_sub(arena_t *arena, size_t size); +size_t arena_metadata_allocated_get(arena_t *arena); bool arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes); bool arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes); bool arena_prof_accum(arena_t *arena, uint64_t accumbytes); -size_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); -size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); +szind_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); +szind_t arena_bin_index(arena_t *arena, arena_bin_t *bin); unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr); -prof_ctx_t *arena_prof_ctx_get(const void *ptr); -void arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx); -void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache); +prof_tctx_t *arena_prof_tctx_get(const void *ptr); +void arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx); +void arena_prof_tctx_reset(const void *ptr, size_t usize, + const void *old_ptr, prof_tctx_t *old_tctx); +void *arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, + tcache_t *tcache); +arena_t *arena_aalloc(const void *ptr); size_t arena_salloc(const void *ptr, bool demote); -void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, - bool try_tcache); +void arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache); +void arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) # ifdef JEMALLOC_ARENA_INLINE_A -JEMALLOC_ALWAYS_INLINE arena_chunk_map_t * -arena_mapp_get(arena_chunk_t *chunk, size_t pageind) +JEMALLOC_ALWAYS_INLINE arena_chunk_map_bits_t * +arena_bitselm_get(arena_chunk_t *chunk, size_t pageind) { assert(pageind >= map_bias); assert(pageind < chunk_npages); - return (&chunk->map[pageind-map_bias]); + return (&chunk->map_bits[pageind-map_bias]); +} + +JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t * +arena_miscelm_get(arena_chunk_t *chunk, size_t pageind) +{ + + assert(pageind >= map_bias); + assert(pageind < chunk_npages); + + return ((arena_chunk_map_misc_t *)((uintptr_t)chunk + + (uintptr_t)map_misc_offset) + pageind-map_bias); +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_miscelm_to_pageind(arena_chunk_map_misc_t *miscelm) +{ + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); + size_t pageind = ((uintptr_t)miscelm - ((uintptr_t)chunk + + map_misc_offset)) / sizeof(arena_chunk_map_misc_t) + map_bias; + + assert(pageind >= map_bias); + assert(pageind < chunk_npages); + + return (pageind); +} + +JEMALLOC_ALWAYS_INLINE void * +arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm) +{ + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); + size_t pageind = arena_miscelm_to_pageind(miscelm); + + return ((void *)((uintptr_t)chunk + (pageind << LG_PAGE))); +} + +JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t * +arena_rd_to_miscelm(arena_runs_dirty_link_t *rd) +{ + arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t + *)((uintptr_t)rd - offsetof(arena_chunk_map_misc_t, rd)); + + assert(arena_miscelm_to_pageind(miscelm) >= map_bias); + assert(arena_miscelm_to_pageind(miscelm) < chunk_npages); + + return (miscelm); +} + +JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t * +arena_run_to_miscelm(arena_run_t *run) +{ + arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t + *)((uintptr_t)run - offsetof(arena_chunk_map_misc_t, run)); + + assert(arena_miscelm_to_pageind(miscelm) >= map_bias); + assert(arena_miscelm_to_pageind(miscelm) < chunk_npages); + + return (miscelm); } JEMALLOC_ALWAYS_INLINE size_t * arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind) { - return (&arena_mapp_get(chunk, pageind)->bits); + return (&arena_bitselm_get(chunk, pageind)->bits); } JEMALLOC_ALWAYS_INLINE size_t @@ -533,13 +657,29 @@ arena_mapbits_get(arena_chunk_t *chunk, size_t pageind) } JEMALLOC_ALWAYS_INLINE size_t +arena_mapbits_size_decode(size_t mapbits) +{ + size_t size; + +#if CHUNK_MAP_SIZE_SHIFT > 0 + size = (mapbits & CHUNK_MAP_SIZE_MASK) >> CHUNK_MAP_SIZE_SHIFT; +#elif CHUNK_MAP_SIZE_SHIFT == 0 + size = mapbits & CHUNK_MAP_SIZE_MASK; +#else + size = (mapbits & CHUNK_MAP_SIZE_MASK) << -CHUNK_MAP_SIZE_SHIFT; +#endif + + return (size); +} + +JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; mapbits = arena_mapbits_get(chunk, pageind); assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0); - return (mapbits & ~PAGE_MASK); + return (arena_mapbits_size_decode(mapbits)); } JEMALLOC_ALWAYS_INLINE size_t @@ -550,7 +690,7 @@ arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind) mapbits = arena_mapbits_get(chunk, pageind); assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)); - return (mapbits & ~PAGE_MASK); + return (arena_mapbits_size_decode(mapbits)); } JEMALLOC_ALWAYS_INLINE size_t @@ -561,14 +701,14 @@ arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind) mapbits = arena_mapbits_get(chunk, pageind); assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == CHUNK_MAP_ALLOCATED); - return (mapbits >> LG_PAGE); + return (mapbits >> CHUNK_MAP_RUNIND_SHIFT); } -JEMALLOC_ALWAYS_INLINE size_t +JEMALLOC_ALWAYS_INLINE szind_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; - size_t binind; + szind_t binind; mapbits = arena_mapbits_get(chunk, pageind); binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; @@ -582,6 +722,8 @@ arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind) size_t mapbits; mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & CHUNK_MAP_DECOMMITTED) == 0 || (mapbits & + (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0); return (mapbits & CHUNK_MAP_DIRTY); } @@ -591,10 +733,23 @@ arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind) size_t mapbits; mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & CHUNK_MAP_DECOMMITTED) == 0 || (mapbits & + (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0); return (mapbits & CHUNK_MAP_UNZEROED); } JEMALLOC_ALWAYS_INLINE size_t +arena_mapbits_decommitted_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & CHUNK_MAP_DECOMMITTED) == 0 || (mapbits & + (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0); + return (mapbits & CHUNK_MAP_DECOMMITTED); +} + +JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -619,6 +774,23 @@ arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits) *mapbitsp = mapbits; } +JEMALLOC_ALWAYS_INLINE size_t +arena_mapbits_size_encode(size_t size) +{ + size_t mapbits; + +#if CHUNK_MAP_SIZE_SHIFT > 0 + mapbits = size << CHUNK_MAP_SIZE_SHIFT; +#elif CHUNK_MAP_SIZE_SHIFT == 0 + mapbits = size; +#else + mapbits = size >> -CHUNK_MAP_SIZE_SHIFT; +#endif + + assert((mapbits & ~CHUNK_MAP_SIZE_MASK) == 0); + return (mapbits); +} + JEMALLOC_ALWAYS_INLINE void arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t flags) @@ -626,9 +798,11 @@ arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); assert((size & PAGE_MASK) == 0); - assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0); - assert((flags & (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == flags); - arena_mapbitsp_write(mapbitsp, size | CHUNK_MAP_BININD_INVALID | flags); + assert((flags & CHUNK_MAP_FLAGS_MASK) == flags); + assert((flags & CHUNK_MAP_DECOMMITTED) == 0 || (flags & + (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0); + arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) | + CHUNK_MAP_BININD_INVALID | flags); } JEMALLOC_ALWAYS_INLINE void @@ -640,7 +814,17 @@ arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, assert((size & PAGE_MASK) == 0); assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0); - arena_mapbitsp_write(mapbitsp, size | (mapbits & PAGE_MASK)); + arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) | + (mapbits & ~CHUNK_MAP_SIZE_MASK)); +} + +JEMALLOC_ALWAYS_INLINE void +arena_mapbits_internal_set(arena_chunk_t *chunk, size_t pageind, size_t flags) +{ + size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); + + assert((flags & CHUNK_MAP_UNZEROED) == flags); + arena_mapbitsp_write(mapbitsp, flags); } JEMALLOC_ALWAYS_INLINE void @@ -648,54 +832,62 @@ arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t flags) { size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); - size_t mapbits = arena_mapbitsp_read(mapbitsp); - size_t unzeroed; assert((size & PAGE_MASK) == 0); - assert((flags & CHUNK_MAP_DIRTY) == flags); - unzeroed = mapbits & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */ - arena_mapbitsp_write(mapbitsp, size | CHUNK_MAP_BININD_INVALID | flags - | unzeroed | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED); + assert((flags & CHUNK_MAP_FLAGS_MASK) == flags); + assert((flags & CHUNK_MAP_DECOMMITTED) == 0 || (flags & + (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0); + arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) | + CHUNK_MAP_BININD_INVALID | flags | CHUNK_MAP_LARGE | + CHUNK_MAP_ALLOCATED); } JEMALLOC_ALWAYS_INLINE void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, - size_t binind) + szind_t binind) { size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); size_t mapbits = arena_mapbitsp_read(mapbitsp); assert(binind <= BININD_INVALID); - assert(arena_mapbits_large_size_get(chunk, pageind) == PAGE); + assert(arena_mapbits_large_size_get(chunk, pageind) == LARGE_MINCLASS + + large_pad); arena_mapbitsp_write(mapbitsp, (mapbits & ~CHUNK_MAP_BININD_MASK) | (binind << CHUNK_MAP_BININD_SHIFT)); } JEMALLOC_ALWAYS_INLINE void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, - size_t binind, size_t flags) + szind_t binind, size_t flags) { size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); - size_t mapbits = arena_mapbitsp_read(mapbitsp); - size_t unzeroed; assert(binind < BININD_INVALID); assert(pageind - runind >= map_bias); - assert((flags & CHUNK_MAP_DIRTY) == flags); - unzeroed = mapbits & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */ - arena_mapbitsp_write(mapbitsp, (runind << LG_PAGE) | (binind << - CHUNK_MAP_BININD_SHIFT) | flags | unzeroed | CHUNK_MAP_ALLOCATED); + assert((flags & CHUNK_MAP_UNZEROED) == flags); + arena_mapbitsp_write(mapbitsp, (runind << CHUNK_MAP_RUNIND_SHIFT) | + (binind << CHUNK_MAP_BININD_SHIFT) | flags | CHUNK_MAP_ALLOCATED); } -JEMALLOC_ALWAYS_INLINE void -arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, - size_t unzeroed) +JEMALLOC_INLINE void +arena_metadata_allocated_add(arena_t *arena, size_t size) +{ + + atomic_add_z(&arena->stats.metadata_allocated, size); +} + +JEMALLOC_INLINE void +arena_metadata_allocated_sub(arena_t *arena, size_t size) { - size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); - size_t mapbits = arena_mapbitsp_read(mapbitsp); - arena_mapbitsp_write(mapbitsp, (mapbits & ~CHUNK_MAP_UNZEROED) | - unzeroed); + atomic_sub_z(&arena->stats.metadata_allocated, size); +} + +JEMALLOC_INLINE size_t +arena_metadata_allocated_get(arena_t *arena) +{ + + return (atomic_read_z(&arena->stats.metadata_allocated)); } JEMALLOC_INLINE bool @@ -719,7 +911,7 @@ arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes) cassert(config_prof); - if (prof_interval == 0) + if (likely(prof_interval == 0)) return (false); return (arena_prof_accum_impl(arena, accumbytes)); } @@ -730,7 +922,7 @@ arena_prof_accum(arena_t *arena, uint64_t accumbytes) cassert(config_prof); - if (prof_interval == 0) + if (likely(prof_interval == 0)) return (false); { @@ -743,10 +935,10 @@ arena_prof_accum(arena_t *arena, uint64_t accumbytes) } } -JEMALLOC_ALWAYS_INLINE size_t +JEMALLOC_ALWAYS_INLINE szind_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits) { - size_t binind; + szind_t binind; binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; @@ -755,27 +947,34 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits) arena_t *arena; size_t pageind; size_t actual_mapbits; + size_t rpages_ind; arena_run_t *run; arena_bin_t *bin; - size_t actual_binind; + szind_t run_binind, actual_binind; arena_bin_info_t *bin_info; + arena_chunk_map_misc_t *miscelm; + void *rpages; assert(binind != BININD_INVALID); assert(binind < NBINS); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - arena = chunk->arena; + arena = extent_node_arena_get(&chunk->node); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; actual_mapbits = arena_mapbits_get(chunk, pageind); assert(mapbits == actual_mapbits); assert(arena_mapbits_large_get(chunk, pageind) == 0); assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - (actual_mapbits >> LG_PAGE)) << LG_PAGE)); - bin = run->bin; + rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, + pageind); + miscelm = arena_miscelm_get(chunk, rpages_ind); + run = &miscelm->run; + run_binind = run->binind; + bin = &arena->bins[run_binind]; actual_binind = bin - arena->bins; - assert(binind == actual_binind); + assert(run_binind == actual_binind); bin_info = &arena_bin_info[actual_binind]; - assert(((uintptr_t)ptr - ((uintptr_t)run + + rpages = arena_miscelm_to_rpages(miscelm); + assert(((uintptr_t)ptr - ((uintptr_t)rpages + (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval == 0); } @@ -785,10 +984,10 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits) # endif /* JEMALLOC_ARENA_INLINE_A */ # ifdef JEMALLOC_ARENA_INLINE_B -JEMALLOC_INLINE size_t +JEMALLOC_INLINE szind_t arena_bin_index(arena_t *arena, arena_bin_t *bin) { - size_t binind = bin - arena->bins; + szind_t binind = bin - arena->bins; assert(binind < NBINS); return (binind); } @@ -798,24 +997,26 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) { unsigned shift, diff, regind; size_t interval; + arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run); + void *rpages = arena_miscelm_to_rpages(miscelm); /* * Freeing a pointer lower than region zero can cause assertion * failure. */ - assert((uintptr_t)ptr >= (uintptr_t)run + + assert((uintptr_t)ptr >= (uintptr_t)rpages + (uintptr_t)bin_info->reg0_offset); /* * Avoid doing division with a variable divisor if possible. Using * actual division here can reduce allocator throughput by over 20%! */ - diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - + diff = (unsigned)((uintptr_t)ptr - (uintptr_t)rpages - bin_info->reg0_offset); /* Rescale (factor powers of 2 out of the numerator and denominator). */ interval = bin_info->reg_interval; - shift = ffs(interval) - 1; + shift = jemalloc_ffs(interval) - 1; diff >>= shift; interval >>= shift; @@ -850,8 +1051,8 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31) }; - if (interval <= ((sizeof(interval_invs) / sizeof(unsigned)) + - 2)) { + if (likely(interval <= ((sizeof(interval_invs) / + sizeof(unsigned)) + 2))) { regind = (diff * interval_invs[interval - 3]) >> SIZE_INV_SHIFT; } else @@ -865,113 +1066,138 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) return (regind); } -JEMALLOC_INLINE prof_ctx_t * -arena_prof_ctx_get(const void *ptr) +JEMALLOC_INLINE prof_tctx_t * +arena_prof_tctx_get(const void *ptr) { - prof_ctx_t *ret; + prof_tctx_t *ret; arena_chunk_t *chunk; - size_t pageind, mapbits; cassert(config_prof); assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapbits = arena_mapbits_get(chunk, pageind); - assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); - if ((mapbits & CHUNK_MAP_LARGE) == 0) { - if (prof_promote) - ret = (prof_ctx_t *)(uintptr_t)1U; + if (likely(chunk != ptr)) { + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + size_t mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); + if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) + ret = (prof_tctx_t *)(uintptr_t)1U; else { - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << - LG_PAGE)); - size_t binind = arena_ptr_small_binind_get(ptr, - mapbits); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; - unsigned regind; - - regind = arena_run_regind(run, bin_info, ptr); - ret = *(prof_ctx_t **)((uintptr_t)run + - bin_info->ctx0_offset + (regind * - sizeof(prof_ctx_t *))); + arena_chunk_map_misc_t *elm = arena_miscelm_get(chunk, + pageind); + ret = atomic_read_p(&elm->prof_tctx_pun); } } else - ret = arena_mapp_get(chunk, pageind)->prof_ctx; + ret = huge_prof_tctx_get(ptr); return (ret); } JEMALLOC_INLINE void -arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx) +arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx) { arena_chunk_t *chunk; - size_t pageind; cassert(config_prof); assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - - if (usize > SMALL_MAXCLASS || (prof_promote && - ((uintptr_t)ctx != (uintptr_t)1U || arena_mapbits_large_get(chunk, - pageind) != 0))) { - assert(arena_mapbits_large_get(chunk, pageind) != 0); - arena_mapp_get(chunk, pageind)->prof_ctx = ctx; - } else { - assert(arena_mapbits_large_get(chunk, pageind) == 0); - if (prof_promote == false) { - size_t mapbits = arena_mapbits_get(chunk, pageind); - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << - LG_PAGE)); - size_t binind; - arena_bin_info_t *bin_info; - unsigned regind; - - binind = arena_ptr_small_binind_get(ptr, mapbits); - bin_info = &arena_bin_info[binind]; - regind = arena_run_regind(run, bin_info, ptr); - - *((prof_ctx_t **)((uintptr_t)run + - bin_info->ctx0_offset + (regind * sizeof(prof_ctx_t - *)))) = ctx; + if (likely(chunk != ptr)) { + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + + if (unlikely(usize > SMALL_MAXCLASS || (uintptr_t)tctx > + (uintptr_t)1U)) { + arena_chunk_map_misc_t *elm; + + assert(arena_mapbits_large_get(chunk, pageind) != 0); + + elm = arena_miscelm_get(chunk, pageind); + atomic_write_p(&elm->prof_tctx_pun, tctx); + } else { + /* + * tctx must always be initialized for large runs. + * Assert that the surrounding conditional logic is + * equivalent to checking whether ptr refers to a large + * run. + */ + assert(arena_mapbits_large_get(chunk, pageind) == 0); } + } else + huge_prof_tctx_set(ptr, tctx); +} + +JEMALLOC_INLINE void +arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr, + prof_tctx_t *old_tctx) +{ + + cassert(config_prof); + assert(ptr != NULL); + + if (unlikely(usize > SMALL_MAXCLASS || (ptr == old_ptr && + (uintptr_t)old_tctx > (uintptr_t)1U))) { + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (likely(chunk != ptr)) { + size_t pageind; + arena_chunk_map_misc_t *elm; + + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> + LG_PAGE; + assert(arena_mapbits_allocated_get(chunk, pageind) != + 0); + assert(arena_mapbits_large_get(chunk, pageind) != 0); + + elm = arena_miscelm_get(chunk, pageind); + atomic_write_p(&elm->prof_tctx_pun, + (prof_tctx_t *)(uintptr_t)1U); + } else + huge_prof_tctx_reset(ptr); } } JEMALLOC_ALWAYS_INLINE void * -arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache) +arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, + tcache_t *tcache) { - tcache_t *tcache; assert(size != 0); - assert(size <= arena_maxclass); - if (size <= SMALL_MAXCLASS) { - if (try_tcache && (tcache = tcache_get(true)) != NULL) - return (tcache_alloc_small(tcache, size, zero)); - else { - return (arena_malloc_small(choose_arena(arena), size, + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) + return (NULL); + + if (likely(size <= SMALL_MAXCLASS)) { + if (likely(tcache != NULL)) { + return (tcache_alloc_small(tsd, arena, tcache, size, zero)); - } - } else { + } else + return (arena_malloc_small(arena, size, zero)); + } else if (likely(size <= large_maxclass)) { /* * Initialize tcache after checking size in order to avoid * infinite recursion during tcache initialization. */ - if (try_tcache && size <= tcache_maxclass && (tcache = - tcache_get(true)) != NULL) - return (tcache_alloc_large(tcache, size, zero)); - else { - return (arena_malloc_large(choose_arena(arena), size, + if (likely(tcache != NULL) && size <= tcache_maxclass) { + return (tcache_alloc_large(tsd, arena, tcache, size, zero)); - } - } + } else + return (arena_malloc_large(arena, size, zero)); + } else + return (huge_malloc(tsd, arena, size, zero, tcache)); +} + +JEMALLOC_ALWAYS_INLINE arena_t * +arena_aalloc(const void *ptr) +{ + arena_chunk_t *chunk; + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (likely(chunk != ptr)) + return (extent_node_arena_get(&chunk->node)); + else + return (huge_aalloc(ptr)); } /* Return the size of the allocation pointed to by ptr. */ @@ -980,81 +1206,139 @@ arena_salloc(const void *ptr, bool demote) { size_t ret; arena_chunk_t *chunk; - size_t pageind, binind; + size_t pageind; + szind_t binind; assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - binind = arena_mapbits_binind_get(chunk, pageind); - if (binind == BININD_INVALID || (config_prof && demote == false && - prof_promote && arena_mapbits_large_get(chunk, pageind) != 0)) { - /* - * Large allocation. In the common case (demote == true), and - * as this is an inline function, most callers will only end up - * looking at binind to determine that ptr is a small - * allocation. - */ - assert(((uintptr_t)ptr & PAGE_MASK) == 0); - ret = arena_mapbits_large_size_get(chunk, pageind); - assert(ret != 0); - assert(pageind + (ret>>LG_PAGE) <= chunk_npages); - assert(ret == PAGE || arena_mapbits_large_size_get(chunk, - pageind+(ret>>LG_PAGE)-1) == 0); - assert(binind == arena_mapbits_binind_get(chunk, - pageind+(ret>>LG_PAGE)-1)); - assert(arena_mapbits_dirty_get(chunk, pageind) == - arena_mapbits_dirty_get(chunk, pageind+(ret>>LG_PAGE)-1)); - } else { - /* - * Small allocation (possibly promoted to a large object due to - * prof_promote). - */ - assert(arena_mapbits_large_get(chunk, pageind) != 0 || - arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, - pageind)) == binind); - ret = arena_bin_info[binind].reg_size; - } + if (likely(chunk != ptr)) { + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + binind = arena_mapbits_binind_get(chunk, pageind); + if (unlikely(binind == BININD_INVALID || (config_prof && !demote + && arena_mapbits_large_get(chunk, pageind) != 0))) { + /* + * Large allocation. In the common case (demote), and + * as this is an inline function, most callers will only + * end up looking at binind to determine that ptr is a + * small allocation. + */ + assert(config_cache_oblivious || ((uintptr_t)ptr & + PAGE_MASK) == 0); + ret = arena_mapbits_large_size_get(chunk, pageind) - + large_pad; + assert(ret != 0); + assert(pageind + ((ret+large_pad)>>LG_PAGE) <= + chunk_npages); + assert(arena_mapbits_dirty_get(chunk, pageind) == + arena_mapbits_dirty_get(chunk, + pageind+((ret+large_pad)>>LG_PAGE)-1)); + } else { + /* + * Small allocation (possibly promoted to a large + * object). + */ + assert(arena_mapbits_large_get(chunk, pageind) != 0 || + arena_ptr_small_binind_get(ptr, + arena_mapbits_get(chunk, pageind)) == binind); + ret = index2size(binind); + } + } else + ret = huge_salloc(ptr); return (ret); } JEMALLOC_ALWAYS_INLINE void -arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) +arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) { + arena_chunk_t *chunk; size_t pageind, mapbits; - tcache_t *tcache; - assert(arena != NULL); - assert(chunk->arena == arena); assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapbits = arena_mapbits_get(chunk, pageind); - assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - if ((mapbits & CHUNK_MAP_LARGE) == 0) { - /* Small allocation. */ - if (try_tcache && (tcache = tcache_get(false)) != NULL) { - size_t binind; - - binind = arena_ptr_small_binind_get(ptr, mapbits); - tcache_dalloc_small(tcache, ptr, binind); - } else - arena_dalloc_small(arena, chunk, ptr, pageind); - } else { - size_t size = arena_mapbits_large_size_get(chunk, pageind); + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (likely(chunk != ptr)) { + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + mapbits = arena_mapbits_get(chunk, pageind); + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) { + /* Small allocation. */ + if (likely(tcache != NULL)) { + szind_t binind = arena_ptr_small_binind_get(ptr, + mapbits); + tcache_dalloc_small(tsd, tcache, ptr, binind); + } else { + arena_dalloc_small(extent_node_arena_get( + &chunk->node), chunk, ptr, pageind); + } + } else { + size_t size = arena_mapbits_large_size_get(chunk, + pageind); + + assert(config_cache_oblivious || ((uintptr_t)ptr & + PAGE_MASK) == 0); + + if (likely(tcache != NULL) && size - large_pad <= + tcache_maxclass) { + tcache_dalloc_large(tsd, tcache, ptr, size - + large_pad); + } else { + arena_dalloc_large(extent_node_arena_get( + &chunk->node), chunk, ptr); + } + } + } else + huge_dalloc(tsd, ptr, tcache); +} - assert(((uintptr_t)ptr & PAGE_MASK) == 0); +JEMALLOC_ALWAYS_INLINE void +arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) +{ + arena_chunk_t *chunk; - if (try_tcache && size <= tcache_maxclass && (tcache = - tcache_get(false)) != NULL) { - tcache_dalloc_large(tcache, ptr, size); - } else - arena_dalloc_large(arena, chunk, ptr); - } + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (likely(chunk != ptr)) { + if (config_prof && opt_prof) { + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> + LG_PAGE; + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + if (arena_mapbits_large_get(chunk, pageind) != 0) { + /* + * Make sure to use promoted size, not request + * size. + */ + size = arena_mapbits_large_size_get(chunk, + pageind) - large_pad; + } + } + assert(s2u(size) == s2u(arena_salloc(ptr, false))); + + if (likely(size <= SMALL_MAXCLASS)) { + /* Small allocation. */ + if (likely(tcache != NULL)) { + szind_t binind = size2index(size); + tcache_dalloc_small(tsd, tcache, ptr, binind); + } else { + size_t pageind = ((uintptr_t)ptr - + (uintptr_t)chunk) >> LG_PAGE; + arena_dalloc_small(extent_node_arena_get( + &chunk->node), chunk, ptr, pageind); + } + } else { + assert(config_cache_oblivious || ((uintptr_t)ptr & + PAGE_MASK) == 0); + + if (likely(tcache != NULL) && size <= tcache_maxclass) + tcache_dalloc_large(tsd, tcache, ptr, size); + else { + arena_dalloc_large(extent_node_arena_get( + &chunk->node), chunk, ptr); + } + } + } else + huge_dalloc(tsd, ptr, tcache); } # endif /* JEMALLOC_ARENA_INLINE_B */ #endif diff --git a/dep/jemalloc/include/jemalloc/internal/atomic.h b/dep/jemalloc/include/jemalloc/internal/atomic.h index 11a7b47fe0f..a9aad35d121 100644 --- a/dep/jemalloc/include/jemalloc/internal/atomic.h +++ b/dep/jemalloc/include/jemalloc/internal/atomic.h @@ -11,6 +11,7 @@ #define atomic_read_uint64(p) atomic_add_uint64(p, 0) #define atomic_read_uint32(p) atomic_add_uint32(p, 0) +#define atomic_read_p(p) atomic_add_p(p, NULL) #define atomic_read_z(p) atomic_add_z(p, 0) #define atomic_read_u(p) atomic_add_u(p, 0) @@ -18,113 +19,244 @@ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES +/* + * All arithmetic functions return the arithmetic result of the atomic + * operation. Some atomic operation APIs return the value prior to mutation, in + * which case the following functions must redundantly compute the result so + * that it can be returned. These functions are normally inlined, so the extra + * operations can be optimized away if the return values aren't used by the + * callers. + * + * <t> atomic_read_<t>(<t> *p) { return (*p); } + * <t> atomic_add_<t>(<t> *p, <t> x) { return (*p + x); } + * <t> atomic_sub_<t>(<t> *p, <t> x) { return (*p - x); } + * bool atomic_cas_<t>(<t> *p, <t> c, <t> s) + * { + * if (*p != c) + * return (true); + * *p = s; + * return (false); + * } + * void atomic_write_<t>(<t> *p, <t> x) { *p = x; } + */ + #ifndef JEMALLOC_ENABLE_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x); uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x); +bool atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s); +void atomic_write_uint64(uint64_t *p, uint64_t x); uint32_t atomic_add_uint32(uint32_t *p, uint32_t x); uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x); +bool atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s); +void atomic_write_uint32(uint32_t *p, uint32_t x); +void *atomic_add_p(void **p, void *x); +void *atomic_sub_p(void **p, void *x); +bool atomic_cas_p(void **p, void *c, void *s); +void atomic_write_p(void **p, const void *x); size_t atomic_add_z(size_t *p, size_t x); size_t atomic_sub_z(size_t *p, size_t x); +bool atomic_cas_z(size_t *p, size_t c, size_t s); +void atomic_write_z(size_t *p, size_t x); unsigned atomic_add_u(unsigned *p, unsigned x); unsigned atomic_sub_u(unsigned *p, unsigned x); +bool atomic_cas_u(unsigned *p, unsigned c, unsigned s); +void atomic_write_u(unsigned *p, unsigned x); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_)) /******************************************************************************/ /* 64-bit operations. */ #if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) -# ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 +# if (defined(__amd64__) || defined(__x86_64__)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { + uint64_t t = x; - return (__sync_add_and_fetch(p, x)); + asm volatile ( + "lock; xaddq %0, %1;" + : "+r" (t), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (t + x); } JEMALLOC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x) { + uint64_t t; - return (__sync_sub_and_fetch(p, x)); + x = (uint64_t)(-(int64_t)x); + t = x; + asm volatile ( + "lock; xaddq %0, %1;" + : "+r" (t), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (t + x); } -#elif (defined(_MSC_VER)) + +JEMALLOC_INLINE bool +atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) +{ + uint8_t success; + + asm volatile ( + "lock; cmpxchgq %4, %0;" + "sete %1;" + : "=m" (*p), "=a" (success) /* Outputs. */ + : "m" (*p), "a" (c), "r" (s) /* Inputs. */ + : "memory" /* Clobbers. */ + ); + + return (!(bool)success); +} + +JEMALLOC_INLINE void +atomic_write_uint64(uint64_t *p, uint64_t x) +{ + + asm volatile ( + "xchgq %1, %0;" /* Lock is implied by xchgq. */ + : "=m" (*p), "+r" (x) /* Outputs. */ + : "m" (*p) /* Inputs. */ + : "memory" /* Clobbers. */ + ); +} +# elif (defined(JEMALLOC_C11ATOMICS)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { - - return (InterlockedExchangeAdd64(p, x)); + volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; + return (atomic_fetch_add(a, x) + x); } JEMALLOC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x) { + volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; + return (atomic_fetch_sub(a, x) - x); +} + +JEMALLOC_INLINE bool +atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) +{ + volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; + return (!atomic_compare_exchange_strong(a, &c, s)); +} - return (InterlockedExchangeAdd64(p, -((int64_t)x))); +JEMALLOC_INLINE void +atomic_write_uint64(uint64_t *p, uint64_t x) +{ + volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; + atomic_store(a, x); } -#elif (defined(JEMALLOC_OSATOMIC)) +# elif (defined(JEMALLOC_ATOMIC9)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { - return (OSAtomicAdd64((int64_t)x, (int64_t *)p)); + /* + * atomic_fetchadd_64() doesn't exist, but we only ever use this + * function on LP64 systems, so atomic_fetchadd_long() will do. + */ + assert(sizeof(uint64_t) == sizeof(unsigned long)); + + return (atomic_fetchadd_long(p, (unsigned long)x) + x); } JEMALLOC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x) { - return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p)); + assert(sizeof(uint64_t) == sizeof(unsigned long)); + + return (atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x); +} + +JEMALLOC_INLINE bool +atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) +{ + + assert(sizeof(uint64_t) == sizeof(unsigned long)); + + return (!atomic_cmpset_long(p, (unsigned long)c, (unsigned long)s)); +} + +JEMALLOC_INLINE void +atomic_write_uint64(uint64_t *p, uint64_t x) +{ + + assert(sizeof(uint64_t) == sizeof(unsigned long)); + + atomic_store_rel_long(p, x); } -# elif (defined(__amd64__) || defined(__x86_64__)) +# elif (defined(JEMALLOC_OSATOMIC)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { - asm volatile ( - "lock; xaddq %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return (x); + return (OSAtomicAdd64((int64_t)x, (int64_t *)p)); } JEMALLOC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x) { - x = (uint64_t)(-(int64_t)x); - asm volatile ( - "lock; xaddq %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); + return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p)); +} + +JEMALLOC_INLINE bool +atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) +{ - return (x); + return (!OSAtomicCompareAndSwap64(c, s, (int64_t *)p)); } -# elif (defined(JEMALLOC_ATOMIC9)) + +JEMALLOC_INLINE void +atomic_write_uint64(uint64_t *p, uint64_t x) +{ + uint64_t o; + + /*The documented OSAtomic*() API does not expose an atomic exchange. */ + do { + o = atomic_read_uint64(p); + } while (atomic_cas_uint64(p, o, x)); +} +# elif (defined(_MSC_VER)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { - /* - * atomic_fetchadd_64() doesn't exist, but we only ever use this - * function on LP64 systems, so atomic_fetchadd_long() will do. - */ - assert(sizeof(uint64_t) == sizeof(unsigned long)); - - return (atomic_fetchadd_long(p, (unsigned long)x) + x); + return (InterlockedExchangeAdd64(p, x) + x); } JEMALLOC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x) { - assert(sizeof(uint64_t) == sizeof(unsigned long)); + return (InterlockedExchangeAdd64(p, -((int64_t)x)) - x); +} - return (atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x); +JEMALLOC_INLINE bool +atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) +{ + uint64_t o; + + o = InterlockedCompareExchange64(p, s, c); + return (o != c); } -# elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8)) + +JEMALLOC_INLINE void +atomic_write_uint64(uint64_t *p, uint64_t x) +{ + + InterlockedExchange64(p, x); +} +# elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \ + defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { @@ -138,6 +270,20 @@ atomic_sub_uint64(uint64_t *p, uint64_t x) return (__sync_sub_and_fetch(p, x)); } + +JEMALLOC_INLINE bool +atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) +{ + + return (!__sync_bool_compare_and_swap(p, c, s)); +} + +JEMALLOC_INLINE void +atomic_write_uint64(uint64_t *p, uint64_t x) +{ + + __sync_lock_test_and_set(p, x); +} # else # error "Missing implementation for 64-bit atomic operations" # endif @@ -145,90 +291,184 @@ atomic_sub_uint64(uint64_t *p, uint64_t x) /******************************************************************************/ /* 32-bit operations. */ -#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 +#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) JEMALLOC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) { + uint32_t t = x; - return (__sync_add_and_fetch(p, x)); + asm volatile ( + "lock; xaddl %0, %1;" + : "+r" (t), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (t + x); } JEMALLOC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x) { + uint32_t t; - return (__sync_sub_and_fetch(p, x)); + x = (uint32_t)(-(int32_t)x); + t = x; + asm volatile ( + "lock; xaddl %0, %1;" + : "+r" (t), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (t + x); } -#elif (defined(_MSC_VER)) + +JEMALLOC_INLINE bool +atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) +{ + uint8_t success; + + asm volatile ( + "lock; cmpxchgl %4, %0;" + "sete %1;" + : "=m" (*p), "=a" (success) /* Outputs. */ + : "m" (*p), "a" (c), "r" (s) /* Inputs. */ + : "memory" + ); + + return (!(bool)success); +} + +JEMALLOC_INLINE void +atomic_write_uint32(uint32_t *p, uint32_t x) +{ + + asm volatile ( + "xchgl %1, %0;" /* Lock is implied by xchgl. */ + : "=m" (*p), "+r" (x) /* Outputs. */ + : "m" (*p) /* Inputs. */ + : "memory" /* Clobbers. */ + ); +} +# elif (defined(JEMALLOC_C11ATOMICS)) JEMALLOC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) { - - return (InterlockedExchangeAdd(p, x)); + volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; + return (atomic_fetch_add(a, x) + x); } JEMALLOC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x) { + volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; + return (atomic_fetch_sub(a, x) - x); +} + +JEMALLOC_INLINE bool +atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) +{ + volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; + return (!atomic_compare_exchange_strong(a, &c, s)); +} - return (InterlockedExchangeAdd(p, -((int32_t)x))); +JEMALLOC_INLINE void +atomic_write_uint32(uint32_t *p, uint32_t x) +{ + volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; + atomic_store(a, x); } -#elif (defined(JEMALLOC_OSATOMIC)) +#elif (defined(JEMALLOC_ATOMIC9)) JEMALLOC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) { - return (OSAtomicAdd32((int32_t)x, (int32_t *)p)); + return (atomic_fetchadd_32(p, x) + x); } JEMALLOC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x) { - return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p)); + return (atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x); +} + +JEMALLOC_INLINE bool +atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) +{ + + return (!atomic_cmpset_32(p, c, s)); +} + +JEMALLOC_INLINE void +atomic_write_uint32(uint32_t *p, uint32_t x) +{ + + atomic_store_rel_32(p, x); } -#elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) +#elif (defined(JEMALLOC_OSATOMIC)) JEMALLOC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) { - asm volatile ( - "lock; xaddl %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return (x); + return (OSAtomicAdd32((int32_t)x, (int32_t *)p)); } JEMALLOC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x) { - x = (uint32_t)(-(int32_t)x); - asm volatile ( - "lock; xaddl %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); + return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p)); +} - return (x); +JEMALLOC_INLINE bool +atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) +{ + + return (!OSAtomicCompareAndSwap32(c, s, (int32_t *)p)); } -#elif (defined(JEMALLOC_ATOMIC9)) + +JEMALLOC_INLINE void +atomic_write_uint32(uint32_t *p, uint32_t x) +{ + uint32_t o; + + /*The documented OSAtomic*() API does not expose an atomic exchange. */ + do { + o = atomic_read_uint32(p); + } while (atomic_cas_uint32(p, o, x)); +} +#elif (defined(_MSC_VER)) JEMALLOC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) { - return (atomic_fetchadd_32(p, x) + x); + return (InterlockedExchangeAdd(p, x) + x); } JEMALLOC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x) { - return (atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x); + return (InterlockedExchangeAdd(p, -((int32_t)x)) - x); +} + +JEMALLOC_INLINE bool +atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) +{ + uint32_t o; + + o = InterlockedCompareExchange(p, s, c); + return (o != c); } -#elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4)) + +JEMALLOC_INLINE void +atomic_write_uint32(uint32_t *p, uint32_t x) +{ + + InterlockedExchange(p, x); +} +#elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || \ + defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4)) JEMALLOC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) { @@ -242,11 +482,73 @@ atomic_sub_uint32(uint32_t *p, uint32_t x) return (__sync_sub_and_fetch(p, x)); } + +JEMALLOC_INLINE bool +atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) +{ + + return (!__sync_bool_compare_and_swap(p, c, s)); +} + +JEMALLOC_INLINE void +atomic_write_uint32(uint32_t *p, uint32_t x) +{ + + __sync_lock_test_and_set(p, x); +} #else # error "Missing implementation for 32-bit atomic operations" #endif /******************************************************************************/ +/* Pointer operations. */ +JEMALLOC_INLINE void * +atomic_add_p(void **p, void *x) +{ + +#if (LG_SIZEOF_PTR == 3) + return ((void *)atomic_add_uint64((uint64_t *)p, (uint64_t)x)); +#elif (LG_SIZEOF_PTR == 2) + return ((void *)atomic_add_uint32((uint32_t *)p, (uint32_t)x)); +#endif +} + +JEMALLOC_INLINE void * +atomic_sub_p(void **p, void *x) +{ + +#if (LG_SIZEOF_PTR == 3) + return ((void *)atomic_add_uint64((uint64_t *)p, + (uint64_t)-((int64_t)x))); +#elif (LG_SIZEOF_PTR == 2) + return ((void *)atomic_add_uint32((uint32_t *)p, + (uint32_t)-((int32_t)x))); +#endif +} + +JEMALLOC_INLINE bool +atomic_cas_p(void **p, void *c, void *s) +{ + +#if (LG_SIZEOF_PTR == 3) + return (atomic_cas_uint64((uint64_t *)p, (uint64_t)c, (uint64_t)s)); +#elif (LG_SIZEOF_PTR == 2) + return (atomic_cas_uint32((uint32_t *)p, (uint32_t)c, (uint32_t)s)); +#endif +} + +JEMALLOC_INLINE void +atomic_write_p(void **p, const void *x) +{ + +#if (LG_SIZEOF_PTR == 3) + atomic_write_uint64((uint64_t *)p, (uint64_t)x); +#elif (LG_SIZEOF_PTR == 2) + atomic_write_uint32((uint32_t *)p, (uint32_t)x); +#endif +} + +/******************************************************************************/ /* size_t operations. */ JEMALLOC_INLINE size_t atomic_add_z(size_t *p, size_t x) @@ -272,6 +574,28 @@ atomic_sub_z(size_t *p, size_t x) #endif } +JEMALLOC_INLINE bool +atomic_cas_z(size_t *p, size_t c, size_t s) +{ + +#if (LG_SIZEOF_PTR == 3) + return (atomic_cas_uint64((uint64_t *)p, (uint64_t)c, (uint64_t)s)); +#elif (LG_SIZEOF_PTR == 2) + return (atomic_cas_uint32((uint32_t *)p, (uint32_t)c, (uint32_t)s)); +#endif +} + +JEMALLOC_INLINE void +atomic_write_z(size_t *p, size_t x) +{ + +#if (LG_SIZEOF_PTR == 3) + atomic_write_uint64((uint64_t *)p, (uint64_t)x); +#elif (LG_SIZEOF_PTR == 2) + atomic_write_uint32((uint32_t *)p, (uint32_t)x); +#endif +} + /******************************************************************************/ /* unsigned operations. */ JEMALLOC_INLINE unsigned @@ -297,6 +621,29 @@ atomic_sub_u(unsigned *p, unsigned x) (uint32_t)-((int32_t)x))); #endif } + +JEMALLOC_INLINE bool +atomic_cas_u(unsigned *p, unsigned c, unsigned s) +{ + +#if (LG_SIZEOF_INT == 3) + return (atomic_cas_uint64((uint64_t *)p, (uint64_t)c, (uint64_t)s)); +#elif (LG_SIZEOF_INT == 2) + return (atomic_cas_uint32((uint32_t *)p, (uint32_t)c, (uint32_t)s)); +#endif +} + +JEMALLOC_INLINE void +atomic_write_u(unsigned *p, unsigned x) +{ + +#if (LG_SIZEOF_INT == 3) + atomic_write_uint64((uint64_t *)p, (uint64_t)x); +#elif (LG_SIZEOF_INT == 2) + atomic_write_uint32((uint32_t *)p, (uint32_t)x); +#endif +} + /******************************************************************************/ #endif diff --git a/dep/jemalloc/include/jemalloc/internal/base.h b/dep/jemalloc/include/jemalloc/internal/base.h index 9cf75ffb0b3..39e46ee445d 100644 --- a/dep/jemalloc/include/jemalloc/internal/base.h +++ b/dep/jemalloc/include/jemalloc/internal/base.h @@ -10,9 +10,7 @@ #ifdef JEMALLOC_H_EXTERNS void *base_alloc(size_t size); -void *base_calloc(size_t number, size_t size); -extent_node_t *base_node_alloc(void); -void base_node_dealloc(extent_node_t *node); +void base_stats_get(size_t *allocated, size_t *resident, size_t *mapped); bool base_boot(void); void base_prefork(void); void base_postfork_parent(void); diff --git a/dep/jemalloc/include/jemalloc/internal/bitmap.h b/dep/jemalloc/include/jemalloc/internal/bitmap.h index 605ebac58c1..fcc6005c795 100644 --- a/dep/jemalloc/include/jemalloc/internal/bitmap.h +++ b/dep/jemalloc/include/jemalloc/internal/bitmap.h @@ -3,6 +3,7 @@ /* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */ #define LG_BITMAP_MAXBITS LG_RUN_MAXREGS +#define BITMAP_MAXBITS (ZU(1) << LG_BITMAP_MAXBITS) typedef struct bitmap_level_s bitmap_level_t; typedef struct bitmap_info_s bitmap_info_t; @@ -14,6 +15,51 @@ typedef unsigned long bitmap_t; #define BITMAP_GROUP_NBITS (ZU(1) << LG_BITMAP_GROUP_NBITS) #define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS-1) +/* Number of groups required to store a given number of bits. */ +#define BITMAP_BITS2GROUPS(nbits) \ + ((nbits + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS) + +/* + * Number of groups required at a particular level for a given number of bits. + */ +#define BITMAP_GROUPS_L0(nbits) \ + BITMAP_BITS2GROUPS(nbits) +#define BITMAP_GROUPS_L1(nbits) \ + BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(nbits)) +#define BITMAP_GROUPS_L2(nbits) \ + BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits)))) +#define BITMAP_GROUPS_L3(nbits) \ + BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS( \ + BITMAP_BITS2GROUPS((nbits))))) + +/* + * Assuming the number of levels, number of groups required for a given number + * of bits. + */ +#define BITMAP_GROUPS_1_LEVEL(nbits) \ + BITMAP_GROUPS_L0(nbits) +#define BITMAP_GROUPS_2_LEVEL(nbits) \ + (BITMAP_GROUPS_1_LEVEL(nbits) + BITMAP_GROUPS_L1(nbits)) +#define BITMAP_GROUPS_3_LEVEL(nbits) \ + (BITMAP_GROUPS_2_LEVEL(nbits) + BITMAP_GROUPS_L2(nbits)) +#define BITMAP_GROUPS_4_LEVEL(nbits) \ + (BITMAP_GROUPS_3_LEVEL(nbits) + BITMAP_GROUPS_L3(nbits)) + +/* + * Maximum number of groups required to support LG_BITMAP_MAXBITS. + */ +#if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS +# define BITMAP_GROUPS_MAX BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS) +#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2 +# define BITMAP_GROUPS_MAX BITMAP_GROUPS_2_LEVEL(BITMAP_MAXBITS) +#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 3 +# define BITMAP_GROUPS_MAX BITMAP_GROUPS_3_LEVEL(BITMAP_MAXBITS) +#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 4 +# define BITMAP_GROUPS_MAX BITMAP_GROUPS_4_LEVEL(BITMAP_MAXBITS) +#else +# error "Unsupported bitmap size" +#endif + /* Maximum number of levels possible. */ #define BITMAP_MAX_LEVELS \ (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \ @@ -93,7 +139,7 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) bitmap_t g; assert(bit < binfo->nbits); - assert(bitmap_get(bitmap, binfo, bit) == false); + assert(!bitmap_get(bitmap, binfo, bit)); goff = bit >> LG_BITMAP_GROUP_NBITS; gp = &bitmap[goff]; g = *gp; @@ -126,15 +172,15 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) bitmap_t g; unsigned i; - assert(bitmap_full(bitmap, binfo) == false); + assert(!bitmap_full(bitmap, binfo)); i = binfo->nlevels - 1; g = bitmap[binfo->levels[i].group_offset]; - bit = ffsl(g) - 1; + bit = jemalloc_ffsl(g) - 1; while (i > 0) { i--; g = bitmap[binfo->levels[i].group_offset + bit]; - bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffsl(g) - 1); + bit = (bit << LG_BITMAP_GROUP_NBITS) + (jemalloc_ffsl(g) - 1); } bitmap_set(bitmap, binfo, bit); @@ -158,7 +204,7 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) == 0); g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; - assert(bitmap_get(bitmap, binfo, bit) == false); + assert(!bitmap_get(bitmap, binfo, bit)); /* Propagate group state transitions up the tree. */ if (propagate) { unsigned i; @@ -172,7 +218,7 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) == 0); g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; - if (propagate == false) + if (!propagate) break; } } diff --git a/dep/jemalloc/include/jemalloc/internal/chunk.h b/dep/jemalloc/include/jemalloc/internal/chunk.h index 87d8700dac8..5d193835303 100644 --- a/dep/jemalloc/include/jemalloc/internal/chunk.h +++ b/dep/jemalloc/include/jemalloc/internal/chunk.h @@ -5,7 +5,7 @@ * Size and alignment of memory chunks that are allocated by the OS's virtual * memory system. */ -#define LG_CHUNK_DEFAULT 22 +#define LG_CHUNK_DEFAULT 21 /* Return the chunk address for allocation address a. */ #define CHUNK_ADDR2BASE(a) \ @@ -19,6 +19,16 @@ #define CHUNK_CEILING(s) \ (((s) + chunksize_mask) & ~chunksize_mask) +#define CHUNK_HOOKS_INITIALIZER { \ + NULL, \ + NULL, \ + NULL, \ + NULL, \ + NULL, \ + NULL, \ + NULL \ +} + #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS @@ -30,23 +40,36 @@ extern size_t opt_lg_chunk; extern const char *opt_dss; -/* Protects stats_chunks; currently not used for any other purpose. */ -extern malloc_mutex_t chunks_mtx; -/* Chunk statistics. */ -extern chunk_stats_t stats_chunks; - -extern rtree_t *chunks_rtree; +extern rtree_t chunks_rtree; extern size_t chunksize; extern size_t chunksize_mask; /* (chunksize - 1). */ extern size_t chunk_npages; -extern size_t map_bias; /* Number of arena chunk header pages. */ -extern size_t arena_maxclass; /* Max size class for arenas. */ -void *chunk_alloc(size_t size, size_t alignment, bool base, bool *zero, - dss_prec_t dss_prec); -void chunk_unmap(void *chunk, size_t size); -void chunk_dealloc(void *chunk, size_t size, bool unmap); +extern const chunk_hooks_t chunk_hooks_default; + +chunk_hooks_t chunk_hooks_get(arena_t *arena); +chunk_hooks_t chunk_hooks_set(arena_t *arena, + const chunk_hooks_t *chunk_hooks); + +bool chunk_register(const void *chunk, const extent_node_t *node); +void chunk_deregister(const void *chunk, const extent_node_t *node); +void *chunk_alloc_base(size_t size); +void *chunk_alloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, + void *new_addr, size_t size, size_t alignment, bool *zero, + bool dalloc_node); +void *chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, + void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit); +void chunk_dalloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, + void *chunk, size_t size, bool committed); +void chunk_dalloc_arena(arena_t *arena, chunk_hooks_t *chunk_hooks, + void *chunk, size_t size, bool zeroed, bool committed); +void chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, + void *chunk, size_t size, bool committed); +bool chunk_purge_arena(arena_t *arena, void *chunk, size_t offset, + size_t length); +bool chunk_purge_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, + void *chunk, size_t size, size_t offset, size_t length); bool chunk_boot(void); void chunk_prefork(void); void chunk_postfork_parent(void); @@ -56,6 +79,19 @@ void chunk_postfork_child(void); /******************************************************************************/ #ifdef JEMALLOC_H_INLINES +#ifndef JEMALLOC_ENABLE_INLINE +extent_node_t *chunk_lookup(const void *chunk, bool dependent); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_CHUNK_C_)) +JEMALLOC_INLINE extent_node_t * +chunk_lookup(const void *ptr, bool dependent) +{ + + return (rtree_get(&chunks_rtree, (uintptr_t)ptr, dependent)); +} +#endif + #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ diff --git a/dep/jemalloc/include/jemalloc/internal/chunk_dss.h b/dep/jemalloc/include/jemalloc/internal/chunk_dss.h index 4535ce09c09..388f46be080 100644 --- a/dep/jemalloc/include/jemalloc/internal/chunk_dss.h +++ b/dep/jemalloc/include/jemalloc/internal/chunk_dss.h @@ -23,7 +23,8 @@ extern const char *dss_prec_names[]; dss_prec_t chunk_dss_prec_get(void); bool chunk_dss_prec_set(dss_prec_t dss_prec); -void *chunk_alloc_dss(size_t size, size_t alignment, bool *zero); +void *chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, + size_t alignment, bool *zero, bool *commit); bool chunk_in_dss(void *chunk); bool chunk_dss_boot(void); void chunk_dss_prefork(void); diff --git a/dep/jemalloc/include/jemalloc/internal/chunk_mmap.h b/dep/jemalloc/include/jemalloc/internal/chunk_mmap.h index f24abac7538..7d8014c5817 100644 --- a/dep/jemalloc/include/jemalloc/internal/chunk_mmap.h +++ b/dep/jemalloc/include/jemalloc/internal/chunk_mmap.h @@ -9,10 +9,9 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -bool pages_purge(void *addr, size_t length); - -void *chunk_alloc_mmap(size_t size, size_t alignment, bool *zero); -bool chunk_dealloc_mmap(void *chunk, size_t size); +void *chunk_alloc_mmap(size_t size, size_t alignment, bool *zero, + bool *commit); +bool chunk_dalloc_mmap(void *chunk, size_t size); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/dep/jemalloc/include/jemalloc/internal/ckh.h b/dep/jemalloc/include/jemalloc/internal/ckh.h index 58712a6a763..75c1c979f27 100644 --- a/dep/jemalloc/include/jemalloc/internal/ckh.h +++ b/dep/jemalloc/include/jemalloc/internal/ckh.h @@ -66,13 +66,13 @@ struct ckh_s { /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -bool ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, +bool ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp); -void ckh_delete(ckh_t *ckh); +void ckh_delete(tsd_t *tsd, ckh_t *ckh); size_t ckh_count(ckh_t *ckh); bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data); -bool ckh_insert(ckh_t *ckh, const void *key, const void *data); -bool ckh_remove(ckh_t *ckh, const void *searchkey, void **key, +bool ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data); +bool ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key, void **data); bool ckh_search(ckh_t *ckh, const void *seachkey, void **key, void **data); void ckh_string_hash(const void *key, size_t r_hash[2]); diff --git a/dep/jemalloc/include/jemalloc/internal/ctl.h b/dep/jemalloc/include/jemalloc/internal/ctl.h index 0ffecc5f2a2..751c14b5bad 100644 --- a/dep/jemalloc/include/jemalloc/internal/ctl.h +++ b/dep/jemalloc/include/jemalloc/internal/ctl.h @@ -34,6 +34,7 @@ struct ctl_arena_stats_s { bool initialized; unsigned nthreads; const char *dss; + ssize_t lg_dirty_mult; size_t pactive; size_t pdirty; arena_stats_t astats; @@ -46,22 +47,15 @@ struct ctl_arena_stats_s { malloc_bin_stats_t bstats[NBINS]; malloc_large_stats_t *lstats; /* nlclasses elements. */ + malloc_huge_stats_t *hstats; /* nhclasses elements. */ }; struct ctl_stats_s { size_t allocated; size_t active; + size_t metadata; + size_t resident; size_t mapped; - struct { - size_t current; /* stats_chunks.curchunks */ - uint64_t total; /* stats_chunks.nchunks */ - size_t high; /* stats_chunks.highchunks */ - } chunks; - struct { - size_t allocated; /* huge_allocated */ - uint64_t nmalloc; /* huge_nmalloc */ - uint64_t ndalloc; /* huge_ndalloc */ - } huge; unsigned narenas; ctl_arena_stats_t *arenas; /* (narenas + 1) elements. */ }; diff --git a/dep/jemalloc/include/jemalloc/internal/extent.h b/dep/jemalloc/include/jemalloc/internal/extent.h index ba95ca816bd..386d50ef4cd 100644 --- a/dep/jemalloc/include/jemalloc/internal/extent.h +++ b/dep/jemalloc/include/jemalloc/internal/extent.h @@ -7,25 +7,53 @@ typedef struct extent_node_s extent_node_t; /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS -/* Tree of extents. */ +/* Tree of extents. Use accessor functions for en_* fields. */ struct extent_node_s { - /* Linkage for the size/address-ordered tree. */ - rb_node(extent_node_t) link_szad; + /* Arena from which this extent came, if any. */ + arena_t *en_arena; - /* Linkage for the address-ordered tree. */ - rb_node(extent_node_t) link_ad; + /* Pointer to the extent that this tree node is responsible for. */ + void *en_addr; + + /* Total region size. */ + size_t en_size; + + /* + * The zeroed flag is used by chunk recycling code to track whether + * memory is zero-filled. + */ + bool en_zeroed; + + /* + * True if physical memory is committed to the extent, whether + * explicitly or implicitly as on a system that overcommits and + * satisfies physical memory needs on demand via soft page faults. + */ + bool en_committed; + + /* + * The achunk flag is used to validate that huge allocation lookups + * don't return arena chunks. + */ + bool en_achunk; /* Profile counters, used for huge objects. */ - prof_ctx_t *prof_ctx; + prof_tctx_t *en_prof_tctx; - /* Pointer to the extent that this tree node is responsible for. */ - void *addr; + /* Linkage for arena's runs_dirty and chunks_cache rings. */ + arena_runs_dirty_link_t rd; + qr(extent_node_t) cc_link; - /* Total region size. */ - size_t size; + union { + /* Linkage for the size/address-ordered tree. */ + rb_node(extent_node_t) szad_link; + + /* Linkage for arena's huge and node_cache lists. */ + ql_elm(extent_node_t) ql_link; + }; - /* True if zero-filled; used by chunk recycling code. */ - bool zeroed; + /* Linkage for the address-ordered tree. */ + rb_node(extent_node_t) ad_link; }; typedef rb_tree(extent_node_t) extent_tree_t; @@ -41,6 +69,171 @@ rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t) /******************************************************************************/ #ifdef JEMALLOC_H_INLINES +#ifndef JEMALLOC_ENABLE_INLINE +arena_t *extent_node_arena_get(const extent_node_t *node); +void *extent_node_addr_get(const extent_node_t *node); +size_t extent_node_size_get(const extent_node_t *node); +bool extent_node_zeroed_get(const extent_node_t *node); +bool extent_node_committed_get(const extent_node_t *node); +bool extent_node_achunk_get(const extent_node_t *node); +prof_tctx_t *extent_node_prof_tctx_get(const extent_node_t *node); +void extent_node_arena_set(extent_node_t *node, arena_t *arena); +void extent_node_addr_set(extent_node_t *node, void *addr); +void extent_node_size_set(extent_node_t *node, size_t size); +void extent_node_zeroed_set(extent_node_t *node, bool zeroed); +void extent_node_committed_set(extent_node_t *node, bool committed); +void extent_node_achunk_set(extent_node_t *node, bool achunk); +void extent_node_prof_tctx_set(extent_node_t *node, prof_tctx_t *tctx); +void extent_node_init(extent_node_t *node, arena_t *arena, void *addr, + size_t size, bool zeroed, bool committed); +void extent_node_dirty_linkage_init(extent_node_t *node); +void extent_node_dirty_insert(extent_node_t *node, + arena_runs_dirty_link_t *runs_dirty, extent_node_t *chunks_dirty); +void extent_node_dirty_remove(extent_node_t *node); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_EXTENT_C_)) +JEMALLOC_INLINE arena_t * +extent_node_arena_get(const extent_node_t *node) +{ + + return (node->en_arena); +} + +JEMALLOC_INLINE void * +extent_node_addr_get(const extent_node_t *node) +{ + + return (node->en_addr); +} + +JEMALLOC_INLINE size_t +extent_node_size_get(const extent_node_t *node) +{ + + return (node->en_size); +} + +JEMALLOC_INLINE bool +extent_node_zeroed_get(const extent_node_t *node) +{ + + return (node->en_zeroed); +} + +JEMALLOC_INLINE bool +extent_node_committed_get(const extent_node_t *node) +{ + + assert(!node->en_achunk); + return (node->en_committed); +} + +JEMALLOC_INLINE bool +extent_node_achunk_get(const extent_node_t *node) +{ + + return (node->en_achunk); +} + +JEMALLOC_INLINE prof_tctx_t * +extent_node_prof_tctx_get(const extent_node_t *node) +{ + + return (node->en_prof_tctx); +} + +JEMALLOC_INLINE void +extent_node_arena_set(extent_node_t *node, arena_t *arena) +{ + + node->en_arena = arena; +} + +JEMALLOC_INLINE void +extent_node_addr_set(extent_node_t *node, void *addr) +{ + + node->en_addr = addr; +} + +JEMALLOC_INLINE void +extent_node_size_set(extent_node_t *node, size_t size) +{ + + node->en_size = size; +} + +JEMALLOC_INLINE void +extent_node_zeroed_set(extent_node_t *node, bool zeroed) +{ + + node->en_zeroed = zeroed; +} + +JEMALLOC_INLINE void +extent_node_committed_set(extent_node_t *node, bool committed) +{ + + node->en_committed = committed; +} + +JEMALLOC_INLINE void +extent_node_achunk_set(extent_node_t *node, bool achunk) +{ + + node->en_achunk = achunk; +} + +JEMALLOC_INLINE void +extent_node_prof_tctx_set(extent_node_t *node, prof_tctx_t *tctx) +{ + + node->en_prof_tctx = tctx; +} + +JEMALLOC_INLINE void +extent_node_init(extent_node_t *node, arena_t *arena, void *addr, size_t size, + bool zeroed, bool committed) +{ + + extent_node_arena_set(node, arena); + extent_node_addr_set(node, addr); + extent_node_size_set(node, size); + extent_node_zeroed_set(node, zeroed); + extent_node_committed_set(node, committed); + extent_node_achunk_set(node, false); + if (config_prof) + extent_node_prof_tctx_set(node, NULL); +} + +JEMALLOC_INLINE void +extent_node_dirty_linkage_init(extent_node_t *node) +{ + + qr_new(&node->rd, rd_link); + qr_new(node, cc_link); +} + +JEMALLOC_INLINE void +extent_node_dirty_insert(extent_node_t *node, + arena_runs_dirty_link_t *runs_dirty, extent_node_t *chunks_dirty) +{ + + qr_meld(runs_dirty, &node->rd, rd_link); + qr_meld(chunks_dirty, node, cc_link); +} + +JEMALLOC_INLINE void +extent_node_dirty_remove(extent_node_t *node) +{ + + qr_remove(&node->rd, rd_link); + qr_remove(node, cc_link); +} + +#endif + #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ diff --git a/dep/jemalloc/include/jemalloc/internal/hash.h b/dep/jemalloc/include/jemalloc/internal/hash.h index c7183ede82d..bcead337abc 100644 --- a/dep/jemalloc/include/jemalloc/internal/hash.h +++ b/dep/jemalloc/include/jemalloc/internal/hash.h @@ -35,13 +35,14 @@ JEMALLOC_INLINE uint32_t hash_rotl_32(uint32_t x, int8_t r) { - return (x << r) | (x >> (32 - r)); + return ((x << r) | (x >> (32 - r))); } JEMALLOC_INLINE uint64_t hash_rotl_64(uint64_t x, int8_t r) { - return (x << r) | (x >> (64 - r)); + + return ((x << r) | (x >> (64 - r))); } JEMALLOC_INLINE uint32_t @@ -76,9 +77,9 @@ hash_fmix_64(uint64_t k) { k ^= k >> 33; - k *= QU(0xff51afd7ed558ccdLLU); + k *= KQU(0xff51afd7ed558ccd); k ^= k >> 33; - k *= QU(0xc4ceb9fe1a85ec53LLU); + k *= KQU(0xc4ceb9fe1a85ec53); k ^= k >> 33; return (k); @@ -247,8 +248,8 @@ hash_x64_128(const void *key, const int len, const uint32_t seed, uint64_t h1 = seed; uint64_t h2 = seed; - const uint64_t c1 = QU(0x87c37b91114253d5LLU); - const uint64_t c2 = QU(0x4cf5ad432745937fLLU); + const uint64_t c1 = KQU(0x87c37b91114253d5); + const uint64_t c2 = KQU(0x4cf5ad432745937f); /* body */ { diff --git a/dep/jemalloc/include/jemalloc/internal/huge.h b/dep/jemalloc/include/jemalloc/internal/huge.h index a2b9c779191..ece7af980aa 100644 --- a/dep/jemalloc/include/jemalloc/internal/huge.h +++ b/dep/jemalloc/include/jemalloc/internal/huge.h @@ -9,34 +9,24 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -/* Huge allocation statistics. */ -extern uint64_t huge_nmalloc; -extern uint64_t huge_ndalloc; -extern size_t huge_allocated; - -/* Protects chunk-related data structures. */ -extern malloc_mutex_t huge_mtx; - -void *huge_malloc(size_t size, bool zero, dss_prec_t dss_prec); -void *huge_palloc(size_t size, size_t alignment, bool zero, - dss_prec_t dss_prec); -bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, - size_t extra); -void *huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero, bool try_tcache_dalloc, dss_prec_t dss_prec); +void *huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, + tcache_t *tcache); +void *huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, + bool zero, tcache_t *tcache); +bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t usize_min, + size_t usize_max, bool zero); +void *huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, + size_t usize, size_t alignment, bool zero, tcache_t *tcache); #ifdef JEMALLOC_JET typedef void (huge_dalloc_junk_t)(void *, size_t); extern huge_dalloc_junk_t *huge_dalloc_junk; #endif -void huge_dalloc(void *ptr, bool unmap); +void huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache); +arena_t *huge_aalloc(const void *ptr); size_t huge_salloc(const void *ptr); -dss_prec_t huge_dss_prec_get(arena_t *arena); -prof_ctx_t *huge_prof_ctx_get(const void *ptr); -void huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); -bool huge_boot(void); -void huge_prefork(void); -void huge_postfork_parent(void); -void huge_postfork_child(void); +prof_tctx_t *huge_prof_tctx_get(const void *ptr); +void huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx); +void huge_prof_tctx_reset(const void *ptr); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/dep/jemalloc/include/jemalloc/internal/jemalloc_internal.h b/dep/jemalloc/include/jemalloc/internal/jemalloc_internal.h index cf171326c29..db1d6ba0c2e 100644 --- a/dep/jemalloc/include/jemalloc/internal/jemalloc_internal.h +++ b/dep/jemalloc/include/jemalloc/internal/jemalloc_internal.h @@ -1,79 +1,25 @@ #ifndef JEMALLOC_INTERNAL_H #define JEMALLOC_INTERNAL_H -#include <math.h> -#ifdef _WIN32 -# include <windows.h> -# define ENOENT ERROR_PATH_NOT_FOUND -# define EINVAL ERROR_BAD_ARGUMENTS -# define EAGAIN ERROR_OUTOFMEMORY -# define EPERM ERROR_WRITE_FAULT -# define EFAULT ERROR_INVALID_ADDRESS -# define ENOMEM ERROR_NOT_ENOUGH_MEMORY -# undef ERANGE -# define ERANGE ERROR_INVALID_DATA -#else -# include <sys/param.h> -# include <sys/mman.h> -# include <sys/syscall.h> -# if !defined(SYS_write) && defined(__NR_write) -# define SYS_write __NR_write -# endif -# include <sys/uio.h> -# include <pthread.h> -# include <errno.h> -#endif -#include <sys/types.h> - -#include <limits.h> -#ifndef SIZE_T_MAX -# define SIZE_T_MAX SIZE_MAX -#endif -#include <stdarg.h> -#include <stdbool.h> -#include <stdio.h> -#include <stdlib.h> -#include <stdint.h> -#include <stddef.h> -#ifndef offsetof -# define offsetof(type, member) ((size_t)&(((type *)NULL)->member)) -#endif -#include <inttypes.h> -#include <string.h> -#include <strings.h> -#include <ctype.h> -#ifdef _MSC_VER -# include <io.h> -typedef intptr_t ssize_t; -# define PATH_MAX 1024 -# define STDERR_FILENO 2 -# define __func__ __FUNCTION__ -/* Disable warnings about deprecated system functions */ -# pragma warning(disable: 4996) -#else -# include <unistd.h> -#endif -#include <fcntl.h> -#include "jemalloc_defs.h" +#include "jemalloc_internal_defs.h" +#include "jemalloc/internal/jemalloc_internal_decls.h" #ifdef JEMALLOC_UTRACE #include <sys/ktrace.h> #endif -#ifdef JEMALLOC_VALGRIND -#include <valgrind/valgrind.h> -#include <valgrind/memcheck.h> -#endif - #define JEMALLOC_NO_DEMANGLE -#include "../jemalloc.h" -#include "jemalloc/internal/private_namespace.h" - -#ifdef JEMALLOC_CC_SILENCE -#define UNUSED JEMALLOC_ATTR(unused) +#ifdef JEMALLOC_JET +# define JEMALLOC_N(n) jet_##n +# include "jemalloc/internal/public_namespace.h" +# define JEMALLOC_NO_RENAME +# include "../jemalloc.h" +# undef JEMALLOC_NO_RENAME #else -#define UNUSED +# define JEMALLOC_N(n) je_##n +# include "../jemalloc.h" #endif +#include "jemalloc/internal/private_namespace.h" static const bool config_debug = #ifdef JEMALLOC_DEBUG @@ -82,7 +28,7 @@ static const bool config_debug = false #endif ; -static const bool config_dss = +static const bool have_dss = #ifdef JEMALLOC_DSS true #else @@ -124,8 +70,8 @@ static const bool config_prof_libunwind = false #endif ; -static const bool config_mremap = -#ifdef JEMALLOC_MREMAP +static const bool maps_coalesce = +#ifdef JEMALLOC_MAPS_COALESCE true #else false @@ -187,6 +133,17 @@ static const bool config_ivsalloc = false #endif ; +static const bool config_cache_oblivious = +#ifdef JEMALLOC_CACHE_OBLIVIOUS + true +#else + false +#endif + ; + +#ifdef JEMALLOC_C11ATOMICS +#include <stdatomic.h> +#endif #ifdef JEMALLOC_ATOMIC9 #include <machine/atomic.h> @@ -224,51 +181,45 @@ static const bool config_ivsalloc = /******************************************************************************/ #define JEMALLOC_H_TYPES -#ifndef JEMALLOC_HAS_RESTRICT -# define restrict -#endif +#include "jemalloc/internal/jemalloc_internal_macros.h" -#define MALLOCX_LG_ALIGN_MASK ((int)0x3f) -#define ALLOCM_LG_ALIGN_MASK ((int)0x3f) +/* Size class index type. */ +typedef unsigned szind_t; -#define ZU(z) ((size_t)z) -#define QU(q) ((uint64_t)q) - -#ifndef __DECONST -# define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) -#endif - -#if defined(JEMALLOC_DEBUG) - /* Disable inlining to make debugging easier. */ -# define JEMALLOC_ALWAYS_INLINE -# define JEMALLOC_ALWAYS_INLINE_C static -# define JEMALLOC_INLINE -# define JEMALLOC_INLINE_C static -# define inline -#else -# define JEMALLOC_ENABLE_INLINE -# ifdef JEMALLOC_HAVE_ATTR -# define JEMALLOC_ALWAYS_INLINE \ - static inline JEMALLOC_ATTR(unused) JEMALLOC_ATTR(always_inline) -# define JEMALLOC_ALWAYS_INLINE_C \ - static inline JEMALLOC_ATTR(always_inline) -# else -# define JEMALLOC_ALWAYS_INLINE static inline -# define JEMALLOC_ALWAYS_INLINE_C static inline -# endif -# define JEMALLOC_INLINE static inline -# define JEMALLOC_INLINE_C static inline -# ifdef _MSC_VER -# define inline _inline -# endif -#endif +/* + * Flags bits: + * + * a: arena + * t: tcache + * 0: unused + * z: zero + * n: alignment + * + * aaaaaaaa aaaatttt tttttttt 0znnnnnn + */ +#define MALLOCX_ARENA_MASK ((int)~0xfffff) +#define MALLOCX_ARENA_MAX 0xffe +#define MALLOCX_TCACHE_MASK ((int)~0xfff000ffU) +#define MALLOCX_TCACHE_MAX 0xffd +#define MALLOCX_LG_ALIGN_MASK ((int)0x3f) +/* Use MALLOCX_ALIGN_GET() if alignment may not be specified in flags. */ +#define MALLOCX_ALIGN_GET_SPECIFIED(flags) \ + (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK)) +#define MALLOCX_ALIGN_GET(flags) \ + (MALLOCX_ALIGN_GET_SPECIFIED(flags) & (SIZE_T_MAX-1)) +#define MALLOCX_ZERO_GET(flags) \ + ((bool)(flags & MALLOCX_ZERO)) + +#define MALLOCX_TCACHE_GET(flags) \ + (((unsigned)((flags & MALLOCX_TCACHE_MASK) >> 8)) - 2) +#define MALLOCX_ARENA_GET(flags) \ + (((unsigned)(((unsigned)flags) >> 20)) - 1) /* Smallest size class to support. */ -#define LG_TINY_MIN 3 #define TINY_MIN (1U << LG_TINY_MIN) /* - * Minimum alignment of allocations is 2^LG_QUANTUM bytes (ignoring tiny size + * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size * classes). */ #ifndef LG_QUANTUM @@ -281,7 +232,7 @@ static const bool config_ivsalloc = # ifdef __alpha__ # define LG_QUANTUM 4 # endif -# ifdef __sparc64__ +# if (defined(__sparc64__) || defined(__sparcv9)) # define LG_QUANTUM 4 # endif # if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64)) @@ -299,6 +250,9 @@ static const bool config_ivsalloc = # ifdef __mips__ # define LG_QUANTUM 3 # endif +# ifdef __or1k__ +# define LG_QUANTUM 3 +# endif # ifdef __powerpc__ # define LG_QUANTUM 4 # endif @@ -311,8 +265,12 @@ static const bool config_ivsalloc = # ifdef __tile__ # define LG_QUANTUM 4 # endif +# ifdef __le32__ +# define LG_QUANTUM 4 +# endif # ifndef LG_QUANTUM -# error "No LG_QUANTUM definition for architecture; specify via CPPFLAGS" +# error "Unknown minimum alignment for architecture; specify via " + "--with-lg-quantum" # endif #endif @@ -352,14 +310,17 @@ static const bool config_ivsalloc = #define CACHELINE_CEILING(s) \ (((s) + CACHELINE_MASK) & ~CACHELINE_MASK) -/* Page size. STATIC_PAGE_SHIFT is determined by the configure script. */ +/* Page size. LG_PAGE is determined by the configure script. */ #ifdef PAGE_MASK # undef PAGE_MASK #endif -#define LG_PAGE STATIC_PAGE_SHIFT -#define PAGE ((size_t)(1U << STATIC_PAGE_SHIFT)) +#define PAGE ((size_t)(1U << LG_PAGE)) #define PAGE_MASK ((size_t)(PAGE - 1)) +/* Return the page base address for the page containing address a. */ +#define PAGE_ADDR2BASE(a) \ + ((void *)((uintptr_t)(a) & ~PAGE_MASK)) + /* Return the smallest pagesize multiple that is >= s. */ #define PAGE_CEILING(s) \ (((s) + PAGE_MASK) & ~PAGE_MASK) @@ -376,7 +337,7 @@ static const bool config_ivsalloc = #define ALIGNMENT_CEILING(s, alignment) \ (((s) + (alignment - 1)) & (-(alignment))) -/* Declare a variable length array */ +/* Declare a variable-length array. */ #if __STDC_VERSION__ < 199901L # ifdef _MSC_VER # include <malloc.h> @@ -389,86 +350,12 @@ static const bool config_ivsalloc = # endif # endif # define VARIABLE_ARRAY(type, name, count) \ - type *name = alloca(sizeof(type) * count) -#else -# define VARIABLE_ARRAY(type, name, count) type name[count] -#endif - -#ifdef JEMALLOC_VALGRIND -/* - * The JEMALLOC_VALGRIND_*() macros must be macros rather than functions - * so that when Valgrind reports errors, there are no extra stack frames - * in the backtraces. - * - * The size that is reported to valgrind must be consistent through a chain of - * malloc..realloc..realloc calls. Request size isn't recorded anywhere in - * jemalloc, so it is critical that all callers of these macros provide usize - * rather than request size. As a result, buffer overflow detection is - * technically weakened for the standard API, though it is generally accepted - * practice to consider any extra bytes reported by malloc_usable_size() as - * usable space. - */ -#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do { \ - if (config_valgrind && opt_valgrind && cond) \ - VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(ptr), zero); \ -} while (0) -#define JEMALLOC_VALGRIND_REALLOC(ptr, usize, old_ptr, old_usize, \ - old_rzsize, zero) do { \ - if (config_valgrind && opt_valgrind) { \ - size_t rzsize = p2rz(ptr); \ - \ - if (ptr == old_ptr) { \ - VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize, \ - usize, rzsize); \ - if (zero && old_usize < usize) { \ - VALGRIND_MAKE_MEM_DEFINED( \ - (void *)((uintptr_t)ptr + \ - old_usize), usize - old_usize); \ - } \ - } else { \ - if (old_ptr != NULL) { \ - VALGRIND_FREELIKE_BLOCK(old_ptr, \ - old_rzsize); \ - } \ - if (ptr != NULL) { \ - size_t copy_size = (old_usize < usize) \ - ? old_usize : usize; \ - size_t tail_size = usize - copy_size; \ - VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, \ - rzsize, false); \ - if (copy_size > 0) { \ - VALGRIND_MAKE_MEM_DEFINED(ptr, \ - copy_size); \ - } \ - if (zero && tail_size > 0) { \ - VALGRIND_MAKE_MEM_DEFINED( \ - (void *)((uintptr_t)ptr + \ - copy_size), tail_size); \ - } \ - } \ - } \ - } \ -} while (0) -#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do { \ - if (config_valgrind && opt_valgrind) \ - VALGRIND_FREELIKE_BLOCK(ptr, rzsize); \ -} while (0) + type *name = alloca(sizeof(type) * (count)) #else -#define RUNNING_ON_VALGRIND ((unsigned)0) -#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) \ - do {} while (0) -#define VALGRIND_RESIZEINPLACE_BLOCK(addr, oldSizeB, newSizeB, rzB) \ - do {} while (0) -#define VALGRIND_FREELIKE_BLOCK(addr, rzB) do {} while (0) -#define VALGRIND_MAKE_MEM_NOACCESS(_qzz_addr, _qzz_len) do {} while (0) -#define VALGRIND_MAKE_MEM_UNDEFINED(_qzz_addr, _qzz_len) do {} while (0) -#define VALGRIND_MAKE_MEM_DEFINED(_qzz_addr, _qzz_len) do {} while (0) -#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do {} while (0) -#define JEMALLOC_VALGRIND_REALLOC(ptr, usize, old_ptr, old_usize, \ - old_rzsize, zero) do {} while (0) -#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {} while (0) +# define VARIABLE_ARRAY(type, name, count) type name[(count)] #endif +#include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prng.h" @@ -483,9 +370,10 @@ static const bool config_ivsalloc = #include "jemalloc/internal/arena.h" #include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/base.h" +#include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/pages.h" #include "jemalloc/internal/chunk.h" #include "jemalloc/internal/huge.h" -#include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" #include "jemalloc/internal/quarantine.h" @@ -495,6 +383,7 @@ static const bool config_ivsalloc = /******************************************************************************/ #define JEMALLOC_H_STRUCTS +#include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prng.h" @@ -503,68 +392,83 @@ static const bool config_ivsalloc = #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/tsd.h" #include "jemalloc/internal/mb.h" #include "jemalloc/internal/bitmap.h" +#define JEMALLOC_ARENA_STRUCTS_A +#include "jemalloc/internal/arena.h" +#undef JEMALLOC_ARENA_STRUCTS_A #include "jemalloc/internal/extent.h" +#define JEMALLOC_ARENA_STRUCTS_B #include "jemalloc/internal/arena.h" +#undef JEMALLOC_ARENA_STRUCTS_B #include "jemalloc/internal/base.h" +#include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/pages.h" #include "jemalloc/internal/chunk.h" #include "jemalloc/internal/huge.h" -#include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" #include "jemalloc/internal/quarantine.h" #include "jemalloc/internal/prof.h" -typedef struct { - uint64_t allocated; - uint64_t deallocated; -} thread_allocated_t; -/* - * The JEMALLOC_ARG_CONCAT() wrapper is necessary to pass {0, 0} via a cpp macro - * argument. - */ -#define THREAD_ALLOCATED_INITIALIZER JEMALLOC_ARG_CONCAT({0, 0}) +#include "jemalloc/internal/tsd.h" #undef JEMALLOC_H_STRUCTS /******************************************************************************/ #define JEMALLOC_H_EXTERNS extern bool opt_abort; -extern bool opt_junk; +extern const char *opt_junk; +extern bool opt_junk_alloc; +extern bool opt_junk_free; extern size_t opt_quarantine; extern bool opt_redzone; extern bool opt_utrace; -extern bool opt_valgrind; extern bool opt_xmalloc; extern bool opt_zero; extern size_t opt_narenas; +extern bool in_valgrind; + /* Number of CPUs. */ extern unsigned ncpus; -/* Protects arenas initialization (arenas, arenas_total). */ -extern malloc_mutex_t arenas_lock; /* - * Arenas that are used to service external requests. Not all elements of the - * arenas array are necessarily used; arenas are created lazily as needed. - * - * arenas[0..narenas_auto) are used for automatic multiplexing of threads and - * arenas. arenas[narenas_auto..narenas_total) are only used if the application - * takes some action to create them and allocate from them. + * index2size_tab encodes the same information as could be computed (at + * unacceptable cost in some code paths) by index2size_compute(). */ -extern arena_t **arenas; -extern unsigned narenas_total; -extern unsigned narenas_auto; /* Read-only after initialization. */ - +extern size_t const index2size_tab[NSIZES]; +/* + * size2index_tab is a compact lookup table that rounds request sizes up to + * size classes. In order to reduce cache footprint, the table is compressed, + * and all accesses are via size2index(). + */ +extern uint8_t const size2index_tab[]; + +arena_t *a0get(void); +void *a0malloc(size_t size); +void a0dalloc(void *ptr); +void *bootstrap_malloc(size_t size); +void *bootstrap_calloc(size_t num, size_t size); +void bootstrap_free(void *ptr); arena_t *arenas_extend(unsigned ind); -void arenas_cleanup(void *arg); -arena_t *choose_arena_hard(void); +arena_t *arena_init(unsigned ind); +unsigned narenas_total_get(void); +arena_t *arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing); +arena_t *arena_choose_hard(tsd_t *tsd); +void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind); +unsigned arena_nbound(unsigned ind); +void thread_allocated_cleanup(tsd_t *tsd); +void thread_deallocated_cleanup(tsd_t *tsd); +void arena_cleanup(tsd_t *tsd); +void arenas_cache_cleanup(tsd_t *tsd); +void narenas_cache_cleanup(tsd_t *tsd); +void arenas_cache_bypass_cleanup(tsd_t *tsd); void jemalloc_prefork(void); void jemalloc_postfork_parent(void); void jemalloc_postfork_child(void); +#include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prng.h" @@ -573,24 +477,26 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/tsd.h" #include "jemalloc/internal/mb.h" #include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/extent.h" #include "jemalloc/internal/arena.h" #include "jemalloc/internal/base.h" +#include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/pages.h" #include "jemalloc/internal/chunk.h" #include "jemalloc/internal/huge.h" -#include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" #include "jemalloc/internal/quarantine.h" #include "jemalloc/internal/prof.h" +#include "jemalloc/internal/tsd.h" #undef JEMALLOC_H_EXTERNS /******************************************************************************/ #define JEMALLOC_H_INLINES +#include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prng.h" @@ -603,26 +509,158 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/mb.h" #include "jemalloc/internal/extent.h" #include "jemalloc/internal/base.h" +#include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/pages.h" #include "jemalloc/internal/chunk.h" #include "jemalloc/internal/huge.h" #ifndef JEMALLOC_ENABLE_INLINE -malloc_tsd_protos(JEMALLOC_ATTR(unused), arenas, arena_t *) - +szind_t size2index_compute(size_t size); +szind_t size2index_lookup(size_t size); +szind_t size2index(size_t size); +size_t index2size_compute(szind_t index); +size_t index2size_lookup(szind_t index); +size_t index2size(szind_t index); +size_t s2u_compute(size_t size); +size_t s2u_lookup(size_t size); size_t s2u(size_t size); size_t sa2u(size_t size, size_t alignment); -unsigned narenas_total_get(void); -arena_t *choose_arena(arena_t *arena); +arena_t *arena_choose(tsd_t *tsd, arena_t *arena); +arena_t *arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, + bool refresh_if_missing); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) -/* - * Map of pthread_self() --> arenas[???], used for selecting an arena to use - * for allocations. - */ -malloc_tsd_externs(arenas, arena_t *) -malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, arenas, arena_t *, NULL, - arenas_cleanup) +JEMALLOC_INLINE szind_t +size2index_compute(size_t size) +{ + +#if (NTBINS != 0) + if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { + size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; + size_t lg_ceil = lg_floor(pow2_ceil(size)); + return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin); + } +#endif + { + size_t x = unlikely(ZI(size) < 0) ? ((size<<1) ? + (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1)) + : lg_floor((size<<1)-1); + size_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 : + x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM); + size_t grp = shift << LG_SIZE_CLASS_GROUP; + + size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) + ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; + + size_t delta_inverse_mask = ZI(-1) << lg_delta; + size_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) & + ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); + + size_t index = NTBINS + grp + mod; + return (index); + } +} + +JEMALLOC_ALWAYS_INLINE szind_t +size2index_lookup(size_t size) +{ + + assert(size <= LOOKUP_MAXCLASS); + { + size_t ret = ((size_t)(size2index_tab[(size-1) >> + LG_TINY_MIN])); + assert(ret == size2index_compute(size)); + return (ret); + } +} + +JEMALLOC_ALWAYS_INLINE szind_t +size2index(size_t size) +{ + + assert(size > 0); + if (likely(size <= LOOKUP_MAXCLASS)) + return (size2index_lookup(size)); + return (size2index_compute(size)); +} + +JEMALLOC_INLINE size_t +index2size_compute(szind_t index) +{ + +#if (NTBINS > 0) + if (index < NTBINS) + return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + index)); +#endif + { + size_t reduced_index = index - NTBINS; + size_t grp = reduced_index >> LG_SIZE_CLASS_GROUP; + size_t mod = reduced_index & ((ZU(1) << LG_SIZE_CLASS_GROUP) - + 1); + + size_t grp_size_mask = ~((!!grp)-1); + size_t grp_size = ((ZU(1) << (LG_QUANTUM + + (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask; + + size_t shift = (grp == 0) ? 1 : grp; + size_t lg_delta = shift + (LG_QUANTUM-1); + size_t mod_size = (mod+1) << lg_delta; + + size_t usize = grp_size + mod_size; + return (usize); + } +} + +JEMALLOC_ALWAYS_INLINE size_t +index2size_lookup(szind_t index) +{ + size_t ret = (size_t)index2size_tab[index]; + assert(ret == index2size_compute(index)); + return (ret); +} + +JEMALLOC_ALWAYS_INLINE size_t +index2size(szind_t index) +{ + + assert(index < NSIZES); + return (index2size_lookup(index)); +} + +JEMALLOC_ALWAYS_INLINE size_t +s2u_compute(size_t size) +{ + +#if (NTBINS > 0) + if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { + size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; + size_t lg_ceil = lg_floor(pow2_ceil(size)); + return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) : + (ZU(1) << lg_ceil)); + } +#endif + { + size_t x = unlikely(ZI(size) < 0) ? ((size<<1) ? + (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1)) + : lg_floor((size<<1)-1); + size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) + ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; + size_t delta = ZU(1) << lg_delta; + size_t delta_mask = delta - 1; + size_t usize = (size + delta_mask) & ~delta_mask; + return (usize); + } +} + +JEMALLOC_ALWAYS_INLINE size_t +s2u_lookup(size_t size) +{ + size_t ret = index2size_lookup(size2index_lookup(size)); + + assert(ret == s2u_compute(size)); + return (ret); +} /* * Compute usable size that would result from allocating an object with the @@ -632,11 +670,10 @@ JEMALLOC_ALWAYS_INLINE size_t s2u(size_t size) { - if (size <= SMALL_MAXCLASS) - return (arena_bin_info[SMALL_SIZE2BIN(size)].reg_size); - if (size <= arena_maxclass) - return (PAGE_CEILING(size)); - return (CHUNK_CEILING(size)); + assert(size > 0); + if (likely(size <= LOOKUP_MAXCLASS)) + return (s2u_lookup(size)); + return (s2u_compute(size)); } /* @@ -650,108 +687,128 @@ sa2u(size_t size, size_t alignment) assert(alignment != 0 && ((alignment - 1) & alignment) == 0); - /* - * Round size up to the nearest multiple of alignment. - * - * This done, we can take advantage of the fact that for each small - * size class, every object is aligned at the smallest power of two - * that is non-zero in the base two representation of the size. For - * example: - * - * Size | Base 2 | Minimum alignment - * -----+----------+------------------ - * 96 | 1100000 | 32 - * 144 | 10100000 | 32 - * 192 | 11000000 | 64 - */ - usize = ALIGNMENT_CEILING(size, alignment); - /* - * (usize < size) protects against the combination of maximal - * alignment and size greater than maximal alignment. - */ - if (usize < size) { - /* size_t overflow. */ - return (0); + /* Try for a small size class. */ + if (size <= SMALL_MAXCLASS && alignment < PAGE) { + /* + * Round size up to the nearest multiple of alignment. + * + * This done, we can take advantage of the fact that for each + * small size class, every object is aligned at the smallest + * power of two that is non-zero in the base two representation + * of the size. For example: + * + * Size | Base 2 | Minimum alignment + * -----+----------+------------------ + * 96 | 1100000 | 32 + * 144 | 10100000 | 32 + * 192 | 11000000 | 64 + */ + usize = s2u(ALIGNMENT_CEILING(size, alignment)); + if (usize < LARGE_MINCLASS) + return (usize); } - if (usize <= arena_maxclass && alignment <= PAGE) { - if (usize <= SMALL_MAXCLASS) - return (arena_bin_info[SMALL_SIZE2BIN(usize)].reg_size); - return (PAGE_CEILING(usize)); - } else { - size_t run_size; - + /* Try for a large size class. */ + if (likely(size <= large_maxclass) && likely(alignment < chunksize)) { /* * We can't achieve subpage alignment, so round up alignment - * permanently; it makes later calculations simpler. + * to the minimum that can actually be supported. */ alignment = PAGE_CEILING(alignment); - usize = PAGE_CEILING(size); - /* - * (usize < size) protects against very large sizes within - * PAGE of SIZE_T_MAX. - * - * (usize + alignment < usize) protects against the - * combination of maximal alignment and usize large enough - * to cause overflow. This is similar to the first overflow - * check above, but it needs to be repeated due to the new - * usize value, which may now be *equal* to maximal - * alignment, whereas before we only detected overflow if the - * original size was *greater* than maximal alignment. - */ - if (usize < size || usize + alignment < usize) { - /* size_t overflow. */ - return (0); - } + + /* Make sure result is a large size class. */ + usize = (size <= LARGE_MINCLASS) ? LARGE_MINCLASS : s2u(size); /* * Calculate the size of the over-size run that arena_palloc() * would need to allocate in order to guarantee the alignment. - * If the run wouldn't fit within a chunk, round up to a huge - * allocation size. */ - run_size = usize + alignment - PAGE; - if (run_size <= arena_maxclass) - return (PAGE_CEILING(usize)); - return (CHUNK_CEILING(usize)); + if (usize + large_pad + alignment - PAGE <= arena_maxrun) + return (usize); } -} -JEMALLOC_INLINE unsigned -narenas_total_get(void) -{ - unsigned narenas; + /* Huge size class. Beware of size_t overflow. */ - malloc_mutex_lock(&arenas_lock); - narenas = narenas_total; - malloc_mutex_unlock(&arenas_lock); + /* + * We can't achieve subchunk alignment, so round up alignment to the + * minimum that can actually be supported. + */ + alignment = CHUNK_CEILING(alignment); + if (alignment == 0) { + /* size_t overflow. */ + return (0); + } + + /* Make sure result is a huge size class. */ + if (size <= chunksize) + usize = chunksize; + else { + usize = s2u(size); + if (usize < size) { + /* size_t overflow. */ + return (0); + } + } - return (narenas); + /* + * Calculate the multi-chunk mapping that huge_palloc() would need in + * order to guarantee the alignment. + */ + if (usize + alignment - PAGE < usize) { + /* size_t overflow. */ + return (0); + } + return (usize); } /* Choose an arena based on a per-thread value. */ JEMALLOC_INLINE arena_t * -choose_arena(arena_t *arena) +arena_choose(tsd_t *tsd, arena_t *arena) { arena_t *ret; if (arena != NULL) return (arena); - if ((ret = *arenas_tsd_get()) == NULL) { - ret = choose_arena_hard(); - assert(ret != NULL); - } + if (unlikely((ret = tsd_arena_get(tsd)) == NULL)) + ret = arena_choose_hard(tsd); return (ret); } + +JEMALLOC_INLINE arena_t * +arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, + bool refresh_if_missing) +{ + arena_t *arena; + arena_t **arenas_cache = tsd_arenas_cache_get(tsd); + + /* init_if_missing requires refresh_if_missing. */ + assert(!init_if_missing || refresh_if_missing); + + if (unlikely(arenas_cache == NULL)) { + /* arenas_cache hasn't been initialized yet. */ + return (arena_get_hard(tsd, ind, init_if_missing)); + } + if (unlikely(ind >= tsd_narenas_cache_get(tsd))) { + /* + * ind is invalid, cache is old (too small), or arena to be + * initialized. + */ + return (refresh_if_missing ? arena_get_hard(tsd, ind, + init_if_missing) : NULL); + } + arena = arenas_cache[ind]; + if (likely(arena != NULL) || !refresh_if_missing) + return (arena); + return (arena_get_hard(tsd, ind, init_if_missing)); +} #endif #include "jemalloc/internal/bitmap.h" -#include "jemalloc/internal/rtree.h" /* - * Include arena.h twice in order to resolve circular dependencies with - * tcache.h. + * Include portions of arena.h interleaved with tcache.h in order to resolve + * circular dependencies. */ #define JEMALLOC_ARENA_INLINE_A #include "jemalloc/internal/arena.h" @@ -764,133 +821,155 @@ choose_arena(arena_t *arena) #include "jemalloc/internal/quarantine.h" #ifndef JEMALLOC_ENABLE_INLINE -void *imalloct(size_t size, bool try_tcache, arena_t *arena); -void *imalloc(size_t size); -void *icalloct(size_t size, bool try_tcache, arena_t *arena); -void *icalloc(size_t size); -void *ipalloct(size_t usize, size_t alignment, bool zero, bool try_tcache, - arena_t *arena); -void *ipalloc(size_t usize, size_t alignment, bool zero); +arena_t *iaalloc(const void *ptr); size_t isalloc(const void *ptr, bool demote); +void *iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache, + bool is_metadata, arena_t *arena); +void *imalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena); +void *imalloc(tsd_t *tsd, size_t size); +void *icalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena); +void *icalloc(tsd_t *tsd, size_t size); +void *ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero, + tcache_t *tcache, bool is_metadata, arena_t *arena); +void *ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, + tcache_t *tcache, arena_t *arena); +void *ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero); size_t ivsalloc(const void *ptr, bool demote); size_t u2rz(size_t usize); size_t p2rz(const void *ptr); -void idalloct(void *ptr, bool try_tcache); -void idalloc(void *ptr); -void iqalloct(void *ptr, bool try_tcache); -void iqalloc(void *ptr); -void *iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, +void idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata); +void idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache); +void idalloc(tsd_t *tsd, void *ptr); +void iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache); +void isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); +void isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); +void *iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, + size_t extra, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena); -void *iralloct(void *ptr, size_t size, size_t extra, size_t alignment, - bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena); -void *iralloc(void *ptr, size_t size, size_t extra, size_t alignment, - bool zero); -bool ixalloc(void *ptr, size_t size, size_t extra, size_t alignment, - bool zero); -malloc_tsd_protos(JEMALLOC_ATTR(unused), thread_allocated, thread_allocated_t) +void *iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, + size_t alignment, bool zero, tcache_t *tcache, arena_t *arena); +void *iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, + size_t alignment, bool zero); +bool ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) +JEMALLOC_ALWAYS_INLINE arena_t * +iaalloc(const void *ptr) +{ + + assert(ptr != NULL); + + return (arena_aalloc(ptr)); +} + +/* + * Typical usage: + * void *ptr = [...] + * size_t sz = isalloc(ptr, config_prof); + */ +JEMALLOC_ALWAYS_INLINE size_t +isalloc(const void *ptr, bool demote) +{ + + assert(ptr != NULL); + /* Demotion only makes sense if config_prof is true. */ + assert(config_prof || !demote); + + return (arena_salloc(ptr, demote)); +} + JEMALLOC_ALWAYS_INLINE void * -imalloct(size_t size, bool try_tcache, arena_t *arena) +iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache, bool is_metadata, + arena_t *arena) { + void *ret; assert(size != 0); - if (size <= arena_maxclass) - return (arena_malloc(arena, size, false, try_tcache)); - else - return (huge_malloc(size, false, huge_dss_prec_get(arena))); + ret = arena_malloc(tsd, arena, size, zero, tcache); + if (config_stats && is_metadata && likely(ret != NULL)) { + arena_metadata_allocated_add(iaalloc(ret), isalloc(ret, + config_prof)); + } + return (ret); } JEMALLOC_ALWAYS_INLINE void * -imalloc(size_t size) +imalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena) { - return (imalloct(size, true, NULL)); + return (iallocztm(tsd, size, false, tcache, false, arena)); } JEMALLOC_ALWAYS_INLINE void * -icalloct(size_t size, bool try_tcache, arena_t *arena) +imalloc(tsd_t *tsd, size_t size) { - if (size <= arena_maxclass) - return (arena_malloc(arena, size, true, try_tcache)); - else - return (huge_malloc(size, true, huge_dss_prec_get(arena))); + return (iallocztm(tsd, size, false, tcache_get(tsd, true), false, NULL)); } JEMALLOC_ALWAYS_INLINE void * -icalloc(size_t size) +icalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena) { - return (icalloct(size, true, NULL)); + return (iallocztm(tsd, size, true, tcache, false, arena)); } JEMALLOC_ALWAYS_INLINE void * -ipalloct(size_t usize, size_t alignment, bool zero, bool try_tcache, - arena_t *arena) +icalloc(tsd_t *tsd, size_t size) +{ + + return (iallocztm(tsd, size, true, tcache_get(tsd, true), false, NULL)); +} + +JEMALLOC_ALWAYS_INLINE void * +ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero, + tcache_t *tcache, bool is_metadata, arena_t *arena) { void *ret; assert(usize != 0); assert(usize == sa2u(usize, alignment)); - if (usize <= arena_maxclass && alignment <= PAGE) - ret = arena_malloc(arena, usize, zero, try_tcache); - else { - if (usize <= arena_maxclass) { - ret = arena_palloc(choose_arena(arena), usize, - alignment, zero); - } else if (alignment <= chunksize) - ret = huge_malloc(usize, zero, huge_dss_prec_get(arena)); - else - ret = huge_palloc(usize, alignment, zero, huge_dss_prec_get(arena)); - } - + ret = arena_palloc(tsd, arena, usize, alignment, zero, tcache); assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); + if (config_stats && is_metadata && likely(ret != NULL)) { + arena_metadata_allocated_add(iaalloc(ret), isalloc(ret, + config_prof)); + } return (ret); } JEMALLOC_ALWAYS_INLINE void * -ipalloc(size_t usize, size_t alignment, bool zero) +ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, + tcache_t *tcache, arena_t *arena) { - return (ipalloct(usize, alignment, zero, true, NULL)); + return (ipallocztm(tsd, usize, alignment, zero, tcache, false, arena)); } -/* - * Typical usage: - * void *ptr = [...] - * size_t sz = isalloc(ptr, config_prof); - */ -JEMALLOC_ALWAYS_INLINE size_t -isalloc(const void *ptr, bool demote) +JEMALLOC_ALWAYS_INLINE void * +ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) { - size_t ret; - arena_chunk_t *chunk; - - assert(ptr != NULL); - /* Demotion only makes sense if config_prof is true. */ - assert(config_prof || demote == false); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) - ret = arena_salloc(ptr, demote); - else - ret = huge_salloc(ptr); - - return (ret); + return (ipallocztm(tsd, usize, alignment, zero, tcache_get(tsd, + NULL), false, NULL)); } JEMALLOC_ALWAYS_INLINE size_t ivsalloc(const void *ptr, bool demote) { + extent_node_t *node; /* Return 0 if ptr is not within a chunk managed by jemalloc. */ - if (rtree_get(chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == 0) + node = chunk_lookup(ptr, false); + if (node == NULL) return (0); + /* Only arena chunks should be looked up via interior pointers. */ + assert(extent_node_addr_get(node) == ptr || + extent_node_achunk_get(node)); return (isalloc(ptr, demote)); } @@ -901,7 +980,7 @@ u2rz(size_t usize) size_t ret; if (usize <= SMALL_MAXCLASS) { - size_t binind = SMALL_SIZE2BIN(usize); + szind_t binind = size2index(usize); ret = arena_bin_info[binind].redzone_size; } else ret = 0; @@ -918,47 +997,62 @@ p2rz(const void *ptr) } JEMALLOC_ALWAYS_INLINE void -idalloct(void *ptr, bool try_tcache) +idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata) { - arena_chunk_t *chunk; assert(ptr != NULL); + if (config_stats && is_metadata) { + arena_metadata_allocated_sub(iaalloc(ptr), isalloc(ptr, + config_prof)); + } - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) - arena_dalloc(chunk->arena, chunk, ptr, try_tcache); - else - huge_dalloc(ptr, true); + arena_dalloc(tsd, ptr, tcache); } JEMALLOC_ALWAYS_INLINE void -idalloc(void *ptr) +idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache) { - idalloct(ptr, true); + idalloctm(tsd, ptr, tcache, false); } JEMALLOC_ALWAYS_INLINE void -iqalloct(void *ptr, bool try_tcache) +idalloc(tsd_t *tsd, void *ptr) { - if (config_fill && opt_quarantine) - quarantine(ptr); + idalloctm(tsd, ptr, tcache_get(tsd, false), false); +} + +JEMALLOC_ALWAYS_INLINE void +iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) +{ + + if (config_fill && unlikely(opt_quarantine)) + quarantine(tsd, ptr); else - idalloct(ptr, try_tcache); + idalloctm(tsd, ptr, tcache, false); +} + +JEMALLOC_ALWAYS_INLINE void +isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) +{ + + arena_sdalloc(tsd, ptr, size, tcache); } JEMALLOC_ALWAYS_INLINE void -iqalloc(void *ptr) +isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) { - iqalloct(ptr, true); + if (config_fill && unlikely(opt_quarantine)) + quarantine(tsd, ptr); + else + isdalloct(tsd, ptr, size, tcache); } JEMALLOC_ALWAYS_INLINE void * -iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, - arena_t *arena) +iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, + size_t extra, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena) { void *p; size_t usize, copysize; @@ -966,7 +1060,7 @@ iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra, usize = sa2u(size + extra, alignment); if (usize == 0) return (NULL); - p = ipalloct(usize, alignment, zero, try_tcache_alloc, arena); + p = ipalloct(tsd, usize, alignment, zero, tcache, arena); if (p == NULL) { if (extra == 0) return (NULL); @@ -974,7 +1068,7 @@ iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra, usize = sa2u(size, alignment); if (usize == 0) return (NULL); - p = ipalloct(usize, alignment, zero, try_tcache_alloc, arena); + p = ipalloct(tsd, usize, alignment, zero, tcache, arena); if (p == NULL) return (NULL); } @@ -984,72 +1078,57 @@ iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra, */ copysize = (size < oldsize) ? size : oldsize; memcpy(p, ptr, copysize); - iqalloct(ptr, try_tcache_dalloc); + isqalloc(tsd, ptr, oldsize, tcache); return (p); } JEMALLOC_ALWAYS_INLINE void * -iralloct(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, - bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena) +iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, + bool zero, tcache_t *tcache, arena_t *arena) { - size_t oldsize; assert(ptr != NULL); assert(size != 0); - oldsize = isalloc(ptr, config_prof); - if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) != 0) { /* * Existing object alignment is inadequate; allocate new space * and copy. */ - return (iralloct_realign(ptr, oldsize, size, extra, alignment, - zero, try_tcache_alloc, try_tcache_dalloc, arena)); + return (iralloct_realign(tsd, ptr, oldsize, size, 0, alignment, + zero, tcache, arena)); } - if (size + extra <= arena_maxclass) { - return (arena_ralloc(arena, ptr, oldsize, size, extra, - alignment, zero, try_tcache_alloc, - try_tcache_dalloc)); - } else { - return (huge_ralloc(ptr, oldsize, size, extra, - alignment, zero, try_tcache_dalloc, huge_dss_prec_get(arena))); - } + return (arena_ralloc(tsd, arena, ptr, oldsize, size, alignment, zero, + tcache)); } JEMALLOC_ALWAYS_INLINE void * -iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero) +iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, + bool zero) { - return (iralloct(ptr, size, extra, alignment, zero, true, true, NULL)); + return (iralloct(tsd, ptr, oldsize, size, alignment, zero, + tcache_get(tsd, true), NULL)); } JEMALLOC_ALWAYS_INLINE bool -ixalloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero) +ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, + bool zero) { - size_t oldsize; assert(ptr != NULL); assert(size != 0); - oldsize = isalloc(ptr, config_prof); if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) != 0) { /* Existing object alignment is inadequate. */ return (true); } - if (size <= arena_maxclass) - return (arena_ralloc_no_move(ptr, oldsize, size, extra, zero)); - else - return (huge_ralloc_no_move(ptr, oldsize, size, extra)); + return (arena_ralloc_no_move(ptr, oldsize, size, extra, zero)); } - -malloc_tsd_externs(thread_allocated, thread_allocated_t) -malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, thread_allocated, thread_allocated_t, - THREAD_ALLOCATED_INITIALIZER, malloc_tsd_no_cleanup) #endif #include "jemalloc/internal/prof.h" diff --git a/dep/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h b/dep/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h new file mode 100644 index 00000000000..a601d6ebb2d --- /dev/null +++ b/dep/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h @@ -0,0 +1,64 @@ +#ifndef JEMALLOC_INTERNAL_DECLS_H +#define JEMALLOC_INTERNAL_DECLS_H + +#include <math.h> +#ifdef _WIN32 +# include <windows.h> +# include "msvc_compat/windows_extra.h" + +#else +# include <sys/param.h> +# include <sys/mman.h> +# if !defined(__pnacl__) && !defined(__native_client__) +# include <sys/syscall.h> +# if !defined(SYS_write) && defined(__NR_write) +# define SYS_write __NR_write +# endif +# include <sys/uio.h> +# endif +# include <pthread.h> +# include <errno.h> +#endif +#include <sys/types.h> + +#include <limits.h> +#ifndef SIZE_T_MAX +# define SIZE_T_MAX SIZE_MAX +#endif +#include <stdarg.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <stddef.h> +#ifndef offsetof +# define offsetof(type, member) ((size_t)&(((type *)NULL)->member)) +#endif +#include <string.h> +#include <strings.h> +#include <ctype.h> +#ifdef _MSC_VER +# include <io.h> +typedef intptr_t ssize_t; +# define PATH_MAX 1024 +# define STDERR_FILENO 2 +# define __func__ __FUNCTION__ +# ifdef JEMALLOC_HAS_RESTRICT +# define restrict __restrict +# endif +/* Disable warnings about deprecated system functions. */ +# pragma warning(disable: 4996) +#if _MSC_VER < 1800 +static int +isblank(int c) +{ + + return (c == '\t' || c == ' '); +} +#endif +#else +# include <unistd.h> +#endif +#include <fcntl.h> + +#endif /* JEMALLOC_INTERNAL_H */ diff --git a/dep/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h b/dep/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h new file mode 100644 index 00000000000..a08ba772ead --- /dev/null +++ b/dep/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h @@ -0,0 +1,57 @@ +/* + * JEMALLOC_ALWAYS_INLINE and JEMALLOC_INLINE are used within header files for + * functions that are static inline functions if inlining is enabled, and + * single-definition library-private functions if inlining is disabled. + * + * JEMALLOC_ALWAYS_INLINE_C and JEMALLOC_INLINE_C are for use in .c files, in + * which case the denoted functions are always static, regardless of whether + * inlining is enabled. + */ +#if defined(JEMALLOC_DEBUG) || defined(JEMALLOC_CODE_COVERAGE) + /* Disable inlining to make debugging/profiling easier. */ +# define JEMALLOC_ALWAYS_INLINE +# define JEMALLOC_ALWAYS_INLINE_C static +# define JEMALLOC_INLINE +# define JEMALLOC_INLINE_C static +# define inline +#else +# define JEMALLOC_ENABLE_INLINE +# ifdef JEMALLOC_HAVE_ATTR +# define JEMALLOC_ALWAYS_INLINE \ + static inline JEMALLOC_ATTR(unused) JEMALLOC_ATTR(always_inline) +# define JEMALLOC_ALWAYS_INLINE_C \ + static inline JEMALLOC_ATTR(always_inline) +# else +# define JEMALLOC_ALWAYS_INLINE static inline +# define JEMALLOC_ALWAYS_INLINE_C static inline +# endif +# define JEMALLOC_INLINE static inline +# define JEMALLOC_INLINE_C static inline +# ifdef _MSC_VER +# define inline _inline +# endif +#endif + +#ifdef JEMALLOC_CC_SILENCE +# define UNUSED JEMALLOC_ATTR(unused) +#else +# define UNUSED +#endif + +#define ZU(z) ((size_t)z) +#define ZI(z) ((ssize_t)z) +#define QU(q) ((uint64_t)q) +#define QI(q) ((int64_t)q) + +#define KZU(z) ZU(z##ULL) +#define KZI(z) ZI(z##LL) +#define KQU(q) QU(q##ULL) +#define KQI(q) QI(q##LL) + +#ifndef __DECONST +# define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) +#endif + +#ifndef JEMALLOC_HAS_RESTRICT +# define restrict +#endif diff --git a/dep/jemalloc/include/jemalloc/internal/mutex.h b/dep/jemalloc/include/jemalloc/internal/mutex.h index de44e1435ad..f051f2917b3 100644 --- a/dep/jemalloc/include/jemalloc/internal/mutex.h +++ b/dep/jemalloc/include/jemalloc/internal/mutex.h @@ -10,7 +10,7 @@ typedef struct malloc_mutex_s malloc_mutex_t; #elif (defined(JEMALLOC_MUTEX_INIT_CB)) # define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER, NULL} #else -# if (defined(PTHREAD_MUTEX_ADAPTIVE_NP) && \ +# if (defined(JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP) && \ defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP)) # define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP # define MALLOC_MUTEX_INITIALIZER {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP} @@ -26,7 +26,11 @@ typedef struct malloc_mutex_s malloc_mutex_t; struct malloc_mutex_s { #ifdef _WIN32 +# if _WIN32_WINNT >= 0x0600 + SRWLOCK lock; +# else CRITICAL_SECTION lock; +# endif #elif (defined(JEMALLOC_OSSPIN)) OSSpinLock lock; #elif (defined(JEMALLOC_MUTEX_INIT_CB)) @@ -70,7 +74,11 @@ malloc_mutex_lock(malloc_mutex_t *mutex) if (isthreaded) { #ifdef _WIN32 +# if _WIN32_WINNT >= 0x0600 + AcquireSRWLockExclusive(&mutex->lock); +# else EnterCriticalSection(&mutex->lock); +# endif #elif (defined(JEMALLOC_OSSPIN)) OSSpinLockLock(&mutex->lock); #else @@ -85,7 +93,11 @@ malloc_mutex_unlock(malloc_mutex_t *mutex) if (isthreaded) { #ifdef _WIN32 +# if _WIN32_WINNT >= 0x0600 + ReleaseSRWLockExclusive(&mutex->lock); +# else LeaveCriticalSection(&mutex->lock); +# endif #elif (defined(JEMALLOC_OSSPIN)) OSSpinLockUnlock(&mutex->lock); #else diff --git a/dep/jemalloc/include/jemalloc/internal/pages.h b/dep/jemalloc/include/jemalloc/internal/pages.h new file mode 100644 index 00000000000..da7eb9686db --- /dev/null +++ b/dep/jemalloc/include/jemalloc/internal/pages.h @@ -0,0 +1,26 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void *pages_map(void *addr, size_t size); +void pages_unmap(void *addr, size_t size); +void *pages_trim(void *addr, size_t alloc_size, size_t leadsize, + size_t size); +bool pages_commit(void *addr, size_t size); +bool pages_decommit(void *addr, size_t size); +bool pages_purge(void *addr, size_t size); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ + diff --git a/dep/jemalloc/include/jemalloc/internal/private_namespace.h b/dep/jemalloc/include/jemalloc/internal/private_namespace.h index 35c3b0c6c74..c43f73d6543 100644 --- a/dep/jemalloc/include/jemalloc/internal/private_namespace.h +++ b/dep/jemalloc/include/jemalloc/internal/private_namespace.h @@ -1,44 +1,76 @@ -#define a0calloc JEMALLOC_N(a0calloc) -#define a0free JEMALLOC_N(a0free) +#define a0dalloc JEMALLOC_N(a0dalloc) +#define a0get JEMALLOC_N(a0get) #define a0malloc JEMALLOC_N(a0malloc) +#define arena_aalloc JEMALLOC_N(arena_aalloc) #define arena_alloc_junk_small JEMALLOC_N(arena_alloc_junk_small) #define arena_bin_index JEMALLOC_N(arena_bin_index) #define arena_bin_info JEMALLOC_N(arena_bin_info) +#define arena_bitselm_get JEMALLOC_N(arena_bitselm_get) #define arena_boot JEMALLOC_N(arena_boot) +#define arena_choose JEMALLOC_N(arena_choose) +#define arena_choose_hard JEMALLOC_N(arena_choose_hard) +#define arena_chunk_alloc_huge JEMALLOC_N(arena_chunk_alloc_huge) +#define arena_chunk_cache_maybe_insert JEMALLOC_N(arena_chunk_cache_maybe_insert) +#define arena_chunk_cache_maybe_remove JEMALLOC_N(arena_chunk_cache_maybe_remove) +#define arena_chunk_dalloc_huge JEMALLOC_N(arena_chunk_dalloc_huge) +#define arena_chunk_ralloc_huge_expand JEMALLOC_N(arena_chunk_ralloc_huge_expand) +#define arena_chunk_ralloc_huge_shrink JEMALLOC_N(arena_chunk_ralloc_huge_shrink) +#define arena_chunk_ralloc_huge_similar JEMALLOC_N(arena_chunk_ralloc_huge_similar) +#define arena_cleanup JEMALLOC_N(arena_cleanup) #define arena_dalloc JEMALLOC_N(arena_dalloc) #define arena_dalloc_bin JEMALLOC_N(arena_dalloc_bin) -#define arena_dalloc_bin_locked JEMALLOC_N(arena_dalloc_bin_locked) +#define arena_dalloc_bin_junked_locked JEMALLOC_N(arena_dalloc_bin_junked_locked) #define arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large) #define arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small) #define arena_dalloc_large JEMALLOC_N(arena_dalloc_large) -#define arena_dalloc_large_locked JEMALLOC_N(arena_dalloc_large_locked) +#define arena_dalloc_large_junked_locked JEMALLOC_N(arena_dalloc_large_junked_locked) #define arena_dalloc_small JEMALLOC_N(arena_dalloc_small) #define arena_dss_prec_get JEMALLOC_N(arena_dss_prec_get) #define arena_dss_prec_set JEMALLOC_N(arena_dss_prec_set) +#define arena_get JEMALLOC_N(arena_get) +#define arena_get_hard JEMALLOC_N(arena_get_hard) +#define arena_init JEMALLOC_N(arena_init) +#define arena_lg_dirty_mult_default_get JEMALLOC_N(arena_lg_dirty_mult_default_get) +#define arena_lg_dirty_mult_default_set JEMALLOC_N(arena_lg_dirty_mult_default_set) +#define arena_lg_dirty_mult_get JEMALLOC_N(arena_lg_dirty_mult_get) +#define arena_lg_dirty_mult_set JEMALLOC_N(arena_lg_dirty_mult_set) #define arena_malloc JEMALLOC_N(arena_malloc) #define arena_malloc_large JEMALLOC_N(arena_malloc_large) #define arena_malloc_small JEMALLOC_N(arena_malloc_small) #define arena_mapbits_allocated_get JEMALLOC_N(arena_mapbits_allocated_get) #define arena_mapbits_binind_get JEMALLOC_N(arena_mapbits_binind_get) +#define arena_mapbits_decommitted_get JEMALLOC_N(arena_mapbits_decommitted_get) #define arena_mapbits_dirty_get JEMALLOC_N(arena_mapbits_dirty_get) #define arena_mapbits_get JEMALLOC_N(arena_mapbits_get) +#define arena_mapbits_internal_set JEMALLOC_N(arena_mapbits_internal_set) #define arena_mapbits_large_binind_set JEMALLOC_N(arena_mapbits_large_binind_set) #define arena_mapbits_large_get JEMALLOC_N(arena_mapbits_large_get) #define arena_mapbits_large_set JEMALLOC_N(arena_mapbits_large_set) #define arena_mapbits_large_size_get JEMALLOC_N(arena_mapbits_large_size_get) +#define arena_mapbitsp_get JEMALLOC_N(arena_mapbitsp_get) +#define arena_mapbitsp_read JEMALLOC_N(arena_mapbitsp_read) +#define arena_mapbitsp_write JEMALLOC_N(arena_mapbitsp_write) +#define arena_mapbits_size_decode JEMALLOC_N(arena_mapbits_size_decode) +#define arena_mapbits_size_encode JEMALLOC_N(arena_mapbits_size_encode) #define arena_mapbits_small_runind_get JEMALLOC_N(arena_mapbits_small_runind_get) #define arena_mapbits_small_set JEMALLOC_N(arena_mapbits_small_set) #define arena_mapbits_unallocated_set JEMALLOC_N(arena_mapbits_unallocated_set) #define arena_mapbits_unallocated_size_get JEMALLOC_N(arena_mapbits_unallocated_size_get) #define arena_mapbits_unallocated_size_set JEMALLOC_N(arena_mapbits_unallocated_size_set) #define arena_mapbits_unzeroed_get JEMALLOC_N(arena_mapbits_unzeroed_get) -#define arena_mapbits_unzeroed_set JEMALLOC_N(arena_mapbits_unzeroed_set) -#define arena_mapbitsp_get JEMALLOC_N(arena_mapbitsp_get) -#define arena_mapbitsp_read JEMALLOC_N(arena_mapbitsp_read) -#define arena_mapbitsp_write JEMALLOC_N(arena_mapbitsp_write) -#define arena_mapp_get JEMALLOC_N(arena_mapp_get) -#define arena_maxclass JEMALLOC_N(arena_maxclass) +#define arena_maxrun JEMALLOC_N(arena_maxrun) +#define arena_maybe_purge JEMALLOC_N(arena_maybe_purge) +#define arena_metadata_allocated_add JEMALLOC_N(arena_metadata_allocated_add) +#define arena_metadata_allocated_get JEMALLOC_N(arena_metadata_allocated_get) +#define arena_metadata_allocated_sub JEMALLOC_N(arena_metadata_allocated_sub) +#define arena_migrate JEMALLOC_N(arena_migrate) +#define arena_miscelm_get JEMALLOC_N(arena_miscelm_get) +#define arena_miscelm_to_pageind JEMALLOC_N(arena_miscelm_to_pageind) +#define arena_miscelm_to_rpages JEMALLOC_N(arena_miscelm_to_rpages) +#define arena_nbound JEMALLOC_N(arena_nbound) #define arena_new JEMALLOC_N(arena_new) +#define arena_node_alloc JEMALLOC_N(arena_node_alloc) +#define arena_node_dalloc JEMALLOC_N(arena_node_dalloc) #define arena_palloc JEMALLOC_N(arena_palloc) #define arena_postfork_child JEMALLOC_N(arena_postfork_child) #define arena_postfork_parent JEMALLOC_N(arena_postfork_parent) @@ -46,50 +78,47 @@ #define arena_prof_accum JEMALLOC_N(arena_prof_accum) #define arena_prof_accum_impl JEMALLOC_N(arena_prof_accum_impl) #define arena_prof_accum_locked JEMALLOC_N(arena_prof_accum_locked) -#define arena_prof_ctx_get JEMALLOC_N(arena_prof_ctx_get) -#define arena_prof_ctx_set JEMALLOC_N(arena_prof_ctx_set) #define arena_prof_promoted JEMALLOC_N(arena_prof_promoted) +#define arena_prof_tctx_get JEMALLOC_N(arena_prof_tctx_get) +#define arena_prof_tctx_reset JEMALLOC_N(arena_prof_tctx_reset) +#define arena_prof_tctx_set JEMALLOC_N(arena_prof_tctx_set) #define arena_ptr_small_binind_get JEMALLOC_N(arena_ptr_small_binind_get) #define arena_purge_all JEMALLOC_N(arena_purge_all) #define arena_quarantine_junk_small JEMALLOC_N(arena_quarantine_junk_small) #define arena_ralloc JEMALLOC_N(arena_ralloc) #define arena_ralloc_junk_large JEMALLOC_N(arena_ralloc_junk_large) #define arena_ralloc_no_move JEMALLOC_N(arena_ralloc_no_move) +#define arena_rd_to_miscelm JEMALLOC_N(arena_rd_to_miscelm) #define arena_redzone_corruption JEMALLOC_N(arena_redzone_corruption) #define arena_run_regind JEMALLOC_N(arena_run_regind) +#define arena_run_to_miscelm JEMALLOC_N(arena_run_to_miscelm) #define arena_salloc JEMALLOC_N(arena_salloc) +#define arenas_cache_bypass_cleanup JEMALLOC_N(arenas_cache_bypass_cleanup) +#define arenas_cache_cleanup JEMALLOC_N(arenas_cache_cleanup) +#define arena_sdalloc JEMALLOC_N(arena_sdalloc) #define arena_stats_merge JEMALLOC_N(arena_stats_merge) #define arena_tcache_fill_small JEMALLOC_N(arena_tcache_fill_small) -#define arenas JEMALLOC_N(arenas) -#define arenas_booted JEMALLOC_N(arenas_booted) -#define arenas_cleanup JEMALLOC_N(arenas_cleanup) -#define arenas_extend JEMALLOC_N(arenas_extend) -#define arenas_initialized JEMALLOC_N(arenas_initialized) -#define arenas_lock JEMALLOC_N(arenas_lock) -#define arenas_tls JEMALLOC_N(arenas_tls) -#define arenas_tsd JEMALLOC_N(arenas_tsd) -#define arenas_tsd_boot JEMALLOC_N(arenas_tsd_boot) -#define arenas_tsd_cleanup_wrapper JEMALLOC_N(arenas_tsd_cleanup_wrapper) -#define arenas_tsd_get JEMALLOC_N(arenas_tsd_get) -#define arenas_tsd_get_wrapper JEMALLOC_N(arenas_tsd_get_wrapper) -#define arenas_tsd_init_head JEMALLOC_N(arenas_tsd_init_head) -#define arenas_tsd_set JEMALLOC_N(arenas_tsd_set) +#define atomic_add_p JEMALLOC_N(atomic_add_p) #define atomic_add_u JEMALLOC_N(atomic_add_u) #define atomic_add_uint32 JEMALLOC_N(atomic_add_uint32) #define atomic_add_uint64 JEMALLOC_N(atomic_add_uint64) #define atomic_add_z JEMALLOC_N(atomic_add_z) +#define atomic_cas_p JEMALLOC_N(atomic_cas_p) +#define atomic_cas_u JEMALLOC_N(atomic_cas_u) +#define atomic_cas_uint32 JEMALLOC_N(atomic_cas_uint32) +#define atomic_cas_uint64 JEMALLOC_N(atomic_cas_uint64) +#define atomic_cas_z JEMALLOC_N(atomic_cas_z) +#define atomic_sub_p JEMALLOC_N(atomic_sub_p) #define atomic_sub_u JEMALLOC_N(atomic_sub_u) #define atomic_sub_uint32 JEMALLOC_N(atomic_sub_uint32) #define atomic_sub_uint64 JEMALLOC_N(atomic_sub_uint64) #define atomic_sub_z JEMALLOC_N(atomic_sub_z) #define base_alloc JEMALLOC_N(base_alloc) #define base_boot JEMALLOC_N(base_boot) -#define base_calloc JEMALLOC_N(base_calloc) -#define base_node_alloc JEMALLOC_N(base_node_alloc) -#define base_node_dealloc JEMALLOC_N(base_node_dealloc) #define base_postfork_child JEMALLOC_N(base_postfork_child) #define base_postfork_parent JEMALLOC_N(base_postfork_parent) #define base_prefork JEMALLOC_N(base_prefork) +#define base_stats_get JEMALLOC_N(base_stats_get) #define bitmap_full JEMALLOC_N(bitmap_full) #define bitmap_get JEMALLOC_N(bitmap_get) #define bitmap_info_init JEMALLOC_N(bitmap_info_init) @@ -99,49 +128,54 @@ #define bitmap_sfu JEMALLOC_N(bitmap_sfu) #define bitmap_size JEMALLOC_N(bitmap_size) #define bitmap_unset JEMALLOC_N(bitmap_unset) +#define bootstrap_calloc JEMALLOC_N(bootstrap_calloc) +#define bootstrap_free JEMALLOC_N(bootstrap_free) +#define bootstrap_malloc JEMALLOC_N(bootstrap_malloc) #define bt_init JEMALLOC_N(bt_init) #define buferror JEMALLOC_N(buferror) -#define choose_arena JEMALLOC_N(choose_arena) -#define choose_arena_hard JEMALLOC_N(choose_arena_hard) -#define chunk_alloc JEMALLOC_N(chunk_alloc) +#define chunk_alloc_base JEMALLOC_N(chunk_alloc_base) +#define chunk_alloc_cache JEMALLOC_N(chunk_alloc_cache) #define chunk_alloc_dss JEMALLOC_N(chunk_alloc_dss) #define chunk_alloc_mmap JEMALLOC_N(chunk_alloc_mmap) +#define chunk_alloc_wrapper JEMALLOC_N(chunk_alloc_wrapper) #define chunk_boot JEMALLOC_N(chunk_boot) -#define chunk_dealloc JEMALLOC_N(chunk_dealloc) -#define chunk_dealloc_mmap JEMALLOC_N(chunk_dealloc_mmap) +#define chunk_dalloc_arena JEMALLOC_N(chunk_dalloc_arena) +#define chunk_dalloc_cache JEMALLOC_N(chunk_dalloc_cache) +#define chunk_dalloc_mmap JEMALLOC_N(chunk_dalloc_mmap) +#define chunk_dalloc_wrapper JEMALLOC_N(chunk_dalloc_wrapper) +#define chunk_deregister JEMALLOC_N(chunk_deregister) #define chunk_dss_boot JEMALLOC_N(chunk_dss_boot) #define chunk_dss_postfork_child JEMALLOC_N(chunk_dss_postfork_child) #define chunk_dss_postfork_parent JEMALLOC_N(chunk_dss_postfork_parent) #define chunk_dss_prec_get JEMALLOC_N(chunk_dss_prec_get) #define chunk_dss_prec_set JEMALLOC_N(chunk_dss_prec_set) #define chunk_dss_prefork JEMALLOC_N(chunk_dss_prefork) +#define chunk_hooks_default JEMALLOC_N(chunk_hooks_default) +#define chunk_hooks_get JEMALLOC_N(chunk_hooks_get) +#define chunk_hooks_set JEMALLOC_N(chunk_hooks_set) #define chunk_in_dss JEMALLOC_N(chunk_in_dss) +#define chunk_lookup JEMALLOC_N(chunk_lookup) #define chunk_npages JEMALLOC_N(chunk_npages) #define chunk_postfork_child JEMALLOC_N(chunk_postfork_child) #define chunk_postfork_parent JEMALLOC_N(chunk_postfork_parent) #define chunk_prefork JEMALLOC_N(chunk_prefork) -#define chunk_unmap JEMALLOC_N(chunk_unmap) -#define chunks_mtx JEMALLOC_N(chunks_mtx) -#define chunks_rtree JEMALLOC_N(chunks_rtree) +#define chunk_purge_arena JEMALLOC_N(chunk_purge_arena) +#define chunk_purge_wrapper JEMALLOC_N(chunk_purge_wrapper) +#define chunk_register JEMALLOC_N(chunk_register) #define chunksize JEMALLOC_N(chunksize) #define chunksize_mask JEMALLOC_N(chunksize_mask) -#define ckh_bucket_search JEMALLOC_N(ckh_bucket_search) +#define chunks_rtree JEMALLOC_N(chunks_rtree) #define ckh_count JEMALLOC_N(ckh_count) #define ckh_delete JEMALLOC_N(ckh_delete) -#define ckh_evict_reloc_insert JEMALLOC_N(ckh_evict_reloc_insert) #define ckh_insert JEMALLOC_N(ckh_insert) -#define ckh_isearch JEMALLOC_N(ckh_isearch) #define ckh_iter JEMALLOC_N(ckh_iter) #define ckh_new JEMALLOC_N(ckh_new) #define ckh_pointer_hash JEMALLOC_N(ckh_pointer_hash) #define ckh_pointer_keycomp JEMALLOC_N(ckh_pointer_keycomp) -#define ckh_rebuild JEMALLOC_N(ckh_rebuild) #define ckh_remove JEMALLOC_N(ckh_remove) #define ckh_search JEMALLOC_N(ckh_search) #define ckh_string_hash JEMALLOC_N(ckh_string_hash) #define ckh_string_keycomp JEMALLOC_N(ckh_string_keycomp) -#define ckh_try_bucket_insert JEMALLOC_N(ckh_try_bucket_insert) -#define ckh_try_insert JEMALLOC_N(ckh_try_insert) #define ctl_boot JEMALLOC_N(ctl_boot) #define ctl_bymib JEMALLOC_N(ctl_bymib) #define ctl_byname JEMALLOC_N(ctl_byname) @@ -150,6 +184,23 @@ #define ctl_postfork_parent JEMALLOC_N(ctl_postfork_parent) #define ctl_prefork JEMALLOC_N(ctl_prefork) #define dss_prec_names JEMALLOC_N(dss_prec_names) +#define extent_node_achunk_get JEMALLOC_N(extent_node_achunk_get) +#define extent_node_achunk_set JEMALLOC_N(extent_node_achunk_set) +#define extent_node_addr_get JEMALLOC_N(extent_node_addr_get) +#define extent_node_addr_set JEMALLOC_N(extent_node_addr_set) +#define extent_node_arena_get JEMALLOC_N(extent_node_arena_get) +#define extent_node_arena_set JEMALLOC_N(extent_node_arena_set) +#define extent_node_dirty_insert JEMALLOC_N(extent_node_dirty_insert) +#define extent_node_dirty_linkage_init JEMALLOC_N(extent_node_dirty_linkage_init) +#define extent_node_dirty_remove JEMALLOC_N(extent_node_dirty_remove) +#define extent_node_init JEMALLOC_N(extent_node_init) +#define extent_node_prof_tctx_get JEMALLOC_N(extent_node_prof_tctx_get) +#define extent_node_prof_tctx_set JEMALLOC_N(extent_node_prof_tctx_set) +#define extent_node_size_get JEMALLOC_N(extent_node_size_get) +#define extent_node_size_set JEMALLOC_N(extent_node_size_set) +#define extent_node_zeroed_get JEMALLOC_N(extent_node_zeroed_get) +#define extent_node_zeroed_set JEMALLOC_N(extent_node_zeroed_set) +#define extent_tree_ad_empty JEMALLOC_N(extent_tree_ad_empty) #define extent_tree_ad_first JEMALLOC_N(extent_tree_ad_first) #define extent_tree_ad_insert JEMALLOC_N(extent_tree_ad_insert) #define extent_tree_ad_iter JEMALLOC_N(extent_tree_ad_iter) @@ -166,6 +217,7 @@ #define extent_tree_ad_reverse_iter_recurse JEMALLOC_N(extent_tree_ad_reverse_iter_recurse) #define extent_tree_ad_reverse_iter_start JEMALLOC_N(extent_tree_ad_reverse_iter_start) #define extent_tree_ad_search JEMALLOC_N(extent_tree_ad_search) +#define extent_tree_szad_empty JEMALLOC_N(extent_tree_szad_empty) #define extent_tree_szad_first JEMALLOC_N(extent_tree_szad_first) #define extent_tree_szad_insert JEMALLOC_N(extent_tree_szad_insert) #define extent_tree_szad_iter JEMALLOC_N(extent_tree_szad_iter) @@ -193,45 +245,49 @@ #define hash_x64_128 JEMALLOC_N(hash_x64_128) #define hash_x86_128 JEMALLOC_N(hash_x86_128) #define hash_x86_32 JEMALLOC_N(hash_x86_32) -#define huge_allocated JEMALLOC_N(huge_allocated) -#define huge_boot JEMALLOC_N(huge_boot) +#define huge_aalloc JEMALLOC_N(huge_aalloc) #define huge_dalloc JEMALLOC_N(huge_dalloc) #define huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk) -#define huge_dss_prec_get JEMALLOC_N(huge_dss_prec_get) #define huge_malloc JEMALLOC_N(huge_malloc) -#define huge_mtx JEMALLOC_N(huge_mtx) -#define huge_ndalloc JEMALLOC_N(huge_ndalloc) -#define huge_nmalloc JEMALLOC_N(huge_nmalloc) #define huge_palloc JEMALLOC_N(huge_palloc) -#define huge_postfork_child JEMALLOC_N(huge_postfork_child) -#define huge_postfork_parent JEMALLOC_N(huge_postfork_parent) -#define huge_prefork JEMALLOC_N(huge_prefork) -#define huge_prof_ctx_get JEMALLOC_N(huge_prof_ctx_get) -#define huge_prof_ctx_set JEMALLOC_N(huge_prof_ctx_set) +#define huge_prof_tctx_get JEMALLOC_N(huge_prof_tctx_get) +#define huge_prof_tctx_reset JEMALLOC_N(huge_prof_tctx_reset) +#define huge_prof_tctx_set JEMALLOC_N(huge_prof_tctx_set) #define huge_ralloc JEMALLOC_N(huge_ralloc) #define huge_ralloc_no_move JEMALLOC_N(huge_ralloc_no_move) #define huge_salloc JEMALLOC_N(huge_salloc) -#define iallocm JEMALLOC_N(iallocm) +#define iaalloc JEMALLOC_N(iaalloc) +#define iallocztm JEMALLOC_N(iallocztm) #define icalloc JEMALLOC_N(icalloc) #define icalloct JEMALLOC_N(icalloct) #define idalloc JEMALLOC_N(idalloc) #define idalloct JEMALLOC_N(idalloct) +#define idalloctm JEMALLOC_N(idalloctm) #define imalloc JEMALLOC_N(imalloc) #define imalloct JEMALLOC_N(imalloct) +#define index2size JEMALLOC_N(index2size) +#define index2size_compute JEMALLOC_N(index2size_compute) +#define index2size_lookup JEMALLOC_N(index2size_lookup) +#define index2size_tab JEMALLOC_N(index2size_tab) +#define in_valgrind JEMALLOC_N(in_valgrind) #define ipalloc JEMALLOC_N(ipalloc) #define ipalloct JEMALLOC_N(ipalloct) +#define ipallocztm JEMALLOC_N(ipallocztm) #define iqalloc JEMALLOC_N(iqalloc) -#define iqalloct JEMALLOC_N(iqalloct) #define iralloc JEMALLOC_N(iralloc) #define iralloct JEMALLOC_N(iralloct) #define iralloct_realign JEMALLOC_N(iralloct_realign) #define isalloc JEMALLOC_N(isalloc) +#define isdalloct JEMALLOC_N(isdalloct) +#define isqalloc JEMALLOC_N(isqalloc) #define isthreaded JEMALLOC_N(isthreaded) #define ivsalloc JEMALLOC_N(ivsalloc) #define ixalloc JEMALLOC_N(ixalloc) #define jemalloc_postfork_child JEMALLOC_N(jemalloc_postfork_child) #define jemalloc_postfork_parent JEMALLOC_N(jemalloc_postfork_parent) #define jemalloc_prefork JEMALLOC_N(jemalloc_prefork) +#define large_maxclass JEMALLOC_N(large_maxclass) +#define lg_floor JEMALLOC_N(lg_floor) #define malloc_cprintf JEMALLOC_N(malloc_cprintf) #define malloc_mutex_init JEMALLOC_N(malloc_mutex_init) #define malloc_mutex_lock JEMALLOC_N(malloc_mutex_lock) @@ -242,7 +298,8 @@ #define malloc_printf JEMALLOC_N(malloc_printf) #define malloc_snprintf JEMALLOC_N(malloc_snprintf) #define malloc_strtoumax JEMALLOC_N(malloc_strtoumax) -#define malloc_tsd_boot JEMALLOC_N(malloc_tsd_boot) +#define malloc_tsd_boot0 JEMALLOC_N(malloc_tsd_boot0) +#define malloc_tsd_boot1 JEMALLOC_N(malloc_tsd_boot1) #define malloc_tsd_cleanup_register JEMALLOC_N(malloc_tsd_cleanup_register) #define malloc_tsd_dalloc JEMALLOC_N(malloc_tsd_dalloc) #define malloc_tsd_malloc JEMALLOC_N(malloc_tsd_malloc) @@ -251,16 +308,18 @@ #define malloc_vsnprintf JEMALLOC_N(malloc_vsnprintf) #define malloc_write JEMALLOC_N(malloc_write) #define map_bias JEMALLOC_N(map_bias) +#define map_misc_offset JEMALLOC_N(map_misc_offset) #define mb_write JEMALLOC_N(mb_write) #define mutex_boot JEMALLOC_N(mutex_boot) -#define narenas_auto JEMALLOC_N(narenas_auto) -#define narenas_total JEMALLOC_N(narenas_total) +#define narenas_cache_cleanup JEMALLOC_N(narenas_cache_cleanup) #define narenas_total_get JEMALLOC_N(narenas_total_get) #define ncpus JEMALLOC_N(ncpus) #define nhbins JEMALLOC_N(nhbins) #define opt_abort JEMALLOC_N(opt_abort) #define opt_dss JEMALLOC_N(opt_dss) #define opt_junk JEMALLOC_N(opt_junk) +#define opt_junk_alloc JEMALLOC_N(opt_junk_alloc) +#define opt_junk_free JEMALLOC_N(opt_junk_free) #define opt_lg_chunk JEMALLOC_N(opt_lg_chunk) #define opt_lg_dirty_mult JEMALLOC_N(opt_lg_dirty_mult) #define opt_lg_prof_interval JEMALLOC_N(opt_lg_prof_interval) @@ -274,84 +333,99 @@ #define opt_prof_gdump JEMALLOC_N(opt_prof_gdump) #define opt_prof_leak JEMALLOC_N(opt_prof_leak) #define opt_prof_prefix JEMALLOC_N(opt_prof_prefix) +#define opt_prof_thread_active_init JEMALLOC_N(opt_prof_thread_active_init) #define opt_quarantine JEMALLOC_N(opt_quarantine) #define opt_redzone JEMALLOC_N(opt_redzone) #define opt_stats_print JEMALLOC_N(opt_stats_print) #define opt_tcache JEMALLOC_N(opt_tcache) #define opt_utrace JEMALLOC_N(opt_utrace) -#define opt_valgrind JEMALLOC_N(opt_valgrind) #define opt_xmalloc JEMALLOC_N(opt_xmalloc) #define opt_zero JEMALLOC_N(opt_zero) #define p2rz JEMALLOC_N(p2rz) +#define pages_commit JEMALLOC_N(pages_commit) +#define pages_decommit JEMALLOC_N(pages_decommit) +#define pages_map JEMALLOC_N(pages_map) #define pages_purge JEMALLOC_N(pages_purge) +#define pages_trim JEMALLOC_N(pages_trim) +#define pages_unmap JEMALLOC_N(pages_unmap) #define pow2_ceil JEMALLOC_N(pow2_ceil) +#define prof_active_get JEMALLOC_N(prof_active_get) +#define prof_active_get_unlocked JEMALLOC_N(prof_active_get_unlocked) +#define prof_active_set JEMALLOC_N(prof_active_set) +#define prof_alloc_prep JEMALLOC_N(prof_alloc_prep) +#define prof_alloc_rollback JEMALLOC_N(prof_alloc_rollback) #define prof_backtrace JEMALLOC_N(prof_backtrace) #define prof_boot0 JEMALLOC_N(prof_boot0) #define prof_boot1 JEMALLOC_N(prof_boot1) #define prof_boot2 JEMALLOC_N(prof_boot2) -#define prof_bt_count JEMALLOC_N(prof_bt_count) -#define prof_ctx_get JEMALLOC_N(prof_ctx_get) -#define prof_ctx_set JEMALLOC_N(prof_ctx_set) +#define prof_dump_header JEMALLOC_N(prof_dump_header) #define prof_dump_open JEMALLOC_N(prof_dump_open) #define prof_free JEMALLOC_N(prof_free) +#define prof_free_sampled_object JEMALLOC_N(prof_free_sampled_object) #define prof_gdump JEMALLOC_N(prof_gdump) +#define prof_gdump_get JEMALLOC_N(prof_gdump_get) +#define prof_gdump_get_unlocked JEMALLOC_N(prof_gdump_get_unlocked) +#define prof_gdump_set JEMALLOC_N(prof_gdump_set) +#define prof_gdump_val JEMALLOC_N(prof_gdump_val) #define prof_idump JEMALLOC_N(prof_idump) #define prof_interval JEMALLOC_N(prof_interval) #define prof_lookup JEMALLOC_N(prof_lookup) #define prof_malloc JEMALLOC_N(prof_malloc) +#define prof_malloc_sample_object JEMALLOC_N(prof_malloc_sample_object) #define prof_mdump JEMALLOC_N(prof_mdump) #define prof_postfork_child JEMALLOC_N(prof_postfork_child) #define prof_postfork_parent JEMALLOC_N(prof_postfork_parent) #define prof_prefork JEMALLOC_N(prof_prefork) -#define prof_promote JEMALLOC_N(prof_promote) #define prof_realloc JEMALLOC_N(prof_realloc) +#define prof_reset JEMALLOC_N(prof_reset) #define prof_sample_accum_update JEMALLOC_N(prof_sample_accum_update) #define prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update) -#define prof_tdata_booted JEMALLOC_N(prof_tdata_booted) +#define prof_tctx_get JEMALLOC_N(prof_tctx_get) +#define prof_tctx_reset JEMALLOC_N(prof_tctx_reset) +#define prof_tctx_set JEMALLOC_N(prof_tctx_set) #define prof_tdata_cleanup JEMALLOC_N(prof_tdata_cleanup) #define prof_tdata_get JEMALLOC_N(prof_tdata_get) #define prof_tdata_init JEMALLOC_N(prof_tdata_init) -#define prof_tdata_initialized JEMALLOC_N(prof_tdata_initialized) -#define prof_tdata_tls JEMALLOC_N(prof_tdata_tls) -#define prof_tdata_tsd JEMALLOC_N(prof_tdata_tsd) -#define prof_tdata_tsd_boot JEMALLOC_N(prof_tdata_tsd_boot) -#define prof_tdata_tsd_cleanup_wrapper JEMALLOC_N(prof_tdata_tsd_cleanup_wrapper) -#define prof_tdata_tsd_get JEMALLOC_N(prof_tdata_tsd_get) -#define prof_tdata_tsd_get_wrapper JEMALLOC_N(prof_tdata_tsd_get_wrapper) -#define prof_tdata_tsd_init_head JEMALLOC_N(prof_tdata_tsd_init_head) -#define prof_tdata_tsd_set JEMALLOC_N(prof_tdata_tsd_set) +#define prof_tdata_reinit JEMALLOC_N(prof_tdata_reinit) +#define prof_thread_active_get JEMALLOC_N(prof_thread_active_get) +#define prof_thread_active_init_get JEMALLOC_N(prof_thread_active_init_get) +#define prof_thread_active_init_set JEMALLOC_N(prof_thread_active_init_set) +#define prof_thread_active_set JEMALLOC_N(prof_thread_active_set) +#define prof_thread_name_get JEMALLOC_N(prof_thread_name_get) +#define prof_thread_name_set JEMALLOC_N(prof_thread_name_set) #define quarantine JEMALLOC_N(quarantine) #define quarantine_alloc_hook JEMALLOC_N(quarantine_alloc_hook) -#define quarantine_boot JEMALLOC_N(quarantine_boot) -#define quarantine_booted JEMALLOC_N(quarantine_booted) +#define quarantine_alloc_hook_work JEMALLOC_N(quarantine_alloc_hook_work) #define quarantine_cleanup JEMALLOC_N(quarantine_cleanup) -#define quarantine_init JEMALLOC_N(quarantine_init) -#define quarantine_tls JEMALLOC_N(quarantine_tls) -#define quarantine_tsd JEMALLOC_N(quarantine_tsd) -#define quarantine_tsd_boot JEMALLOC_N(quarantine_tsd_boot) -#define quarantine_tsd_cleanup_wrapper JEMALLOC_N(quarantine_tsd_cleanup_wrapper) -#define quarantine_tsd_get JEMALLOC_N(quarantine_tsd_get) -#define quarantine_tsd_get_wrapper JEMALLOC_N(quarantine_tsd_get_wrapper) -#define quarantine_tsd_init_head JEMALLOC_N(quarantine_tsd_init_head) -#define quarantine_tsd_set JEMALLOC_N(quarantine_tsd_set) #define register_zone JEMALLOC_N(register_zone) +#define rtree_child_read JEMALLOC_N(rtree_child_read) +#define rtree_child_read_hard JEMALLOC_N(rtree_child_read_hard) +#define rtree_child_tryread JEMALLOC_N(rtree_child_tryread) #define rtree_delete JEMALLOC_N(rtree_delete) #define rtree_get JEMALLOC_N(rtree_get) -#define rtree_get_locked JEMALLOC_N(rtree_get_locked) #define rtree_new JEMALLOC_N(rtree_new) -#define rtree_postfork_child JEMALLOC_N(rtree_postfork_child) -#define rtree_postfork_parent JEMALLOC_N(rtree_postfork_parent) -#define rtree_prefork JEMALLOC_N(rtree_prefork) +#define rtree_node_valid JEMALLOC_N(rtree_node_valid) #define rtree_set JEMALLOC_N(rtree_set) +#define rtree_start_level JEMALLOC_N(rtree_start_level) +#define rtree_subkey JEMALLOC_N(rtree_subkey) +#define rtree_subtree_read JEMALLOC_N(rtree_subtree_read) +#define rtree_subtree_read_hard JEMALLOC_N(rtree_subtree_read_hard) +#define rtree_subtree_tryread JEMALLOC_N(rtree_subtree_tryread) +#define rtree_val_read JEMALLOC_N(rtree_val_read) +#define rtree_val_write JEMALLOC_N(rtree_val_write) #define s2u JEMALLOC_N(s2u) +#define s2u_compute JEMALLOC_N(s2u_compute) +#define s2u_lookup JEMALLOC_N(s2u_lookup) #define sa2u JEMALLOC_N(sa2u) #define set_errno JEMALLOC_N(set_errno) -#define small_size2bin JEMALLOC_N(small_size2bin) +#define size2index JEMALLOC_N(size2index) +#define size2index_compute JEMALLOC_N(size2index_compute) +#define size2index_lookup JEMALLOC_N(size2index_lookup) +#define size2index_tab JEMALLOC_N(size2index_tab) #define stats_cactive JEMALLOC_N(stats_cactive) #define stats_cactive_add JEMALLOC_N(stats_cactive_add) #define stats_cactive_get JEMALLOC_N(stats_cactive_get) #define stats_cactive_sub JEMALLOC_N(stats_cactive_sub) -#define stats_chunks JEMALLOC_N(stats_chunks) #define stats_print JEMALLOC_N(stats_print) #define tcache_alloc_easy JEMALLOC_N(tcache_alloc_easy) #define tcache_alloc_large JEMALLOC_N(tcache_alloc_large) @@ -359,55 +433,67 @@ #define tcache_alloc_small_hard JEMALLOC_N(tcache_alloc_small_hard) #define tcache_arena_associate JEMALLOC_N(tcache_arena_associate) #define tcache_arena_dissociate JEMALLOC_N(tcache_arena_dissociate) +#define tcache_arena_reassociate JEMALLOC_N(tcache_arena_reassociate) #define tcache_bin_flush_large JEMALLOC_N(tcache_bin_flush_large) #define tcache_bin_flush_small JEMALLOC_N(tcache_bin_flush_small) #define tcache_bin_info JEMALLOC_N(tcache_bin_info) -#define tcache_boot0 JEMALLOC_N(tcache_boot0) -#define tcache_boot1 JEMALLOC_N(tcache_boot1) -#define tcache_booted JEMALLOC_N(tcache_booted) +#define tcache_boot JEMALLOC_N(tcache_boot) +#define tcache_cleanup JEMALLOC_N(tcache_cleanup) #define tcache_create JEMALLOC_N(tcache_create) #define tcache_dalloc_large JEMALLOC_N(tcache_dalloc_large) #define tcache_dalloc_small JEMALLOC_N(tcache_dalloc_small) -#define tcache_destroy JEMALLOC_N(tcache_destroy) -#define tcache_enabled_booted JEMALLOC_N(tcache_enabled_booted) +#define tcache_enabled_cleanup JEMALLOC_N(tcache_enabled_cleanup) #define tcache_enabled_get JEMALLOC_N(tcache_enabled_get) -#define tcache_enabled_initialized JEMALLOC_N(tcache_enabled_initialized) #define tcache_enabled_set JEMALLOC_N(tcache_enabled_set) -#define tcache_enabled_tls JEMALLOC_N(tcache_enabled_tls) -#define tcache_enabled_tsd JEMALLOC_N(tcache_enabled_tsd) -#define tcache_enabled_tsd_boot JEMALLOC_N(tcache_enabled_tsd_boot) -#define tcache_enabled_tsd_cleanup_wrapper JEMALLOC_N(tcache_enabled_tsd_cleanup_wrapper) -#define tcache_enabled_tsd_get JEMALLOC_N(tcache_enabled_tsd_get) -#define tcache_enabled_tsd_get_wrapper JEMALLOC_N(tcache_enabled_tsd_get_wrapper) -#define tcache_enabled_tsd_init_head JEMALLOC_N(tcache_enabled_tsd_init_head) -#define tcache_enabled_tsd_set JEMALLOC_N(tcache_enabled_tsd_set) #define tcache_event JEMALLOC_N(tcache_event) #define tcache_event_hard JEMALLOC_N(tcache_event_hard) #define tcache_flush JEMALLOC_N(tcache_flush) #define tcache_get JEMALLOC_N(tcache_get) -#define tcache_initialized JEMALLOC_N(tcache_initialized) +#define tcache_get_hard JEMALLOC_N(tcache_get_hard) #define tcache_maxclass JEMALLOC_N(tcache_maxclass) +#define tcaches JEMALLOC_N(tcaches) #define tcache_salloc JEMALLOC_N(tcache_salloc) +#define tcaches_create JEMALLOC_N(tcaches_create) +#define tcaches_destroy JEMALLOC_N(tcaches_destroy) +#define tcaches_flush JEMALLOC_N(tcaches_flush) +#define tcaches_get JEMALLOC_N(tcaches_get) #define tcache_stats_merge JEMALLOC_N(tcache_stats_merge) -#define tcache_thread_cleanup JEMALLOC_N(tcache_thread_cleanup) -#define tcache_tls JEMALLOC_N(tcache_tls) -#define tcache_tsd JEMALLOC_N(tcache_tsd) -#define tcache_tsd_boot JEMALLOC_N(tcache_tsd_boot) -#define tcache_tsd_cleanup_wrapper JEMALLOC_N(tcache_tsd_cleanup_wrapper) -#define tcache_tsd_get JEMALLOC_N(tcache_tsd_get) -#define tcache_tsd_get_wrapper JEMALLOC_N(tcache_tsd_get_wrapper) -#define tcache_tsd_init_head JEMALLOC_N(tcache_tsd_init_head) -#define tcache_tsd_set JEMALLOC_N(tcache_tsd_set) -#define thread_allocated_booted JEMALLOC_N(thread_allocated_booted) -#define thread_allocated_initialized JEMALLOC_N(thread_allocated_initialized) -#define thread_allocated_tls JEMALLOC_N(thread_allocated_tls) -#define thread_allocated_tsd JEMALLOC_N(thread_allocated_tsd) -#define thread_allocated_tsd_boot JEMALLOC_N(thread_allocated_tsd_boot) -#define thread_allocated_tsd_cleanup_wrapper JEMALLOC_N(thread_allocated_tsd_cleanup_wrapper) -#define thread_allocated_tsd_get JEMALLOC_N(thread_allocated_tsd_get) -#define thread_allocated_tsd_get_wrapper JEMALLOC_N(thread_allocated_tsd_get_wrapper) -#define thread_allocated_tsd_init_head JEMALLOC_N(thread_allocated_tsd_init_head) -#define thread_allocated_tsd_set JEMALLOC_N(thread_allocated_tsd_set) +#define thread_allocated_cleanup JEMALLOC_N(thread_allocated_cleanup) +#define thread_deallocated_cleanup JEMALLOC_N(thread_deallocated_cleanup) +#define tsd_arena_get JEMALLOC_N(tsd_arena_get) +#define tsd_arena_set JEMALLOC_N(tsd_arena_set) +#define tsd_boot JEMALLOC_N(tsd_boot) +#define tsd_boot0 JEMALLOC_N(tsd_boot0) +#define tsd_boot1 JEMALLOC_N(tsd_boot1) +#define tsd_booted JEMALLOC_N(tsd_booted) +#define tsd_cleanup JEMALLOC_N(tsd_cleanup) +#define tsd_cleanup_wrapper JEMALLOC_N(tsd_cleanup_wrapper) +#define tsd_fetch JEMALLOC_N(tsd_fetch) +#define tsd_get JEMALLOC_N(tsd_get) +#define tsd_wrapper_get JEMALLOC_N(tsd_wrapper_get) +#define tsd_wrapper_set JEMALLOC_N(tsd_wrapper_set) +#define tsd_initialized JEMALLOC_N(tsd_initialized) #define tsd_init_check_recursion JEMALLOC_N(tsd_init_check_recursion) #define tsd_init_finish JEMALLOC_N(tsd_init_finish) +#define tsd_init_head JEMALLOC_N(tsd_init_head) +#define tsd_nominal JEMALLOC_N(tsd_nominal) +#define tsd_quarantine_get JEMALLOC_N(tsd_quarantine_get) +#define tsd_quarantine_set JEMALLOC_N(tsd_quarantine_set) +#define tsd_set JEMALLOC_N(tsd_set) +#define tsd_tcache_enabled_get JEMALLOC_N(tsd_tcache_enabled_get) +#define tsd_tcache_enabled_set JEMALLOC_N(tsd_tcache_enabled_set) +#define tsd_tcache_get JEMALLOC_N(tsd_tcache_get) +#define tsd_tcache_set JEMALLOC_N(tsd_tcache_set) +#define tsd_tls JEMALLOC_N(tsd_tls) +#define tsd_tsd JEMALLOC_N(tsd_tsd) +#define tsd_prof_tdata_get JEMALLOC_N(tsd_prof_tdata_get) +#define tsd_prof_tdata_set JEMALLOC_N(tsd_prof_tdata_set) +#define tsd_thread_allocated_get JEMALLOC_N(tsd_thread_allocated_get) +#define tsd_thread_allocated_set JEMALLOC_N(tsd_thread_allocated_set) +#define tsd_thread_deallocated_get JEMALLOC_N(tsd_thread_deallocated_get) +#define tsd_thread_deallocated_set JEMALLOC_N(tsd_thread_deallocated_set) #define u2rz JEMALLOC_N(u2rz) +#define valgrind_freelike_block JEMALLOC_N(valgrind_freelike_block) +#define valgrind_make_mem_defined JEMALLOC_N(valgrind_make_mem_defined) +#define valgrind_make_mem_noaccess JEMALLOC_N(valgrind_make_mem_noaccess) +#define valgrind_make_mem_undefined JEMALLOC_N(valgrind_make_mem_undefined) diff --git a/dep/jemalloc/include/jemalloc/internal/prng.h b/dep/jemalloc/include/jemalloc/internal/prng.h index 7b2b06512ff..216d0ef47bd 100644 --- a/dep/jemalloc/include/jemalloc/internal/prng.h +++ b/dep/jemalloc/include/jemalloc/internal/prng.h @@ -15,7 +15,7 @@ * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints. * * This choice of m has the disadvantage that the quality of the bits is - * proportional to bit position. For example. the lowest bit has a cycle of 2, + * proportional to bit position. For example, the lowest bit has a cycle of 2, * the next has a cycle of 4, etc. For this reason, we prefer to use the upper * bits. * @@ -26,22 +26,22 @@ * const uint32_t a, c : See above discussion. */ #define prng32(r, lg_range, state, a, c) do { \ - assert(lg_range > 0); \ - assert(lg_range <= 32); \ + assert((lg_range) > 0); \ + assert((lg_range) <= 32); \ \ r = (state * (a)) + (c); \ state = r; \ - r >>= (32 - lg_range); \ + r >>= (32 - (lg_range)); \ } while (false) /* Same as prng32(), but 64 bits of pseudo-randomness, using uint64_t. */ #define prng64(r, lg_range, state, a, c) do { \ - assert(lg_range > 0); \ - assert(lg_range <= 64); \ + assert((lg_range) > 0); \ + assert((lg_range) <= 64); \ \ r = (state * (a)) + (c); \ state = r; \ - r >>= (64 - lg_range); \ + r >>= (64 - (lg_range)); \ } while (false) #endif /* JEMALLOC_H_TYPES */ diff --git a/dep/jemalloc/include/jemalloc/internal/prof.h b/dep/jemalloc/include/jemalloc/internal/prof.h index 6f162d21e84..e5198c3e815 100644 --- a/dep/jemalloc/include/jemalloc/internal/prof.h +++ b/dep/jemalloc/include/jemalloc/internal/prof.h @@ -3,8 +3,8 @@ typedef struct prof_bt_s prof_bt_t; typedef struct prof_cnt_s prof_cnt_t; -typedef struct prof_thr_cnt_s prof_thr_cnt_t; -typedef struct prof_ctx_s prof_ctx_t; +typedef struct prof_tctx_s prof_tctx_t; +typedef struct prof_gctx_s prof_gctx_t; typedef struct prof_tdata_s prof_tdata_t; /* Option defaults. */ @@ -23,9 +23,6 @@ typedef struct prof_tdata_s prof_tdata_t; */ #define PROF_BT_MAX 128 -/* Maximum number of backtraces to store in each per thread LRU cache. */ -#define PROF_TCMAX 1024 - /* Initial hash table size. */ #define PROF_CKH_MINITEMS 64 @@ -36,12 +33,18 @@ typedef struct prof_tdata_s prof_tdata_t; #define PROF_PRINTF_BUFSIZE 128 /* - * Number of mutexes shared among all ctx's. No space is allocated for these + * Number of mutexes shared among all gctx's. No space is allocated for these * unless profiling is enabled, so it's okay to over-provision. */ #define PROF_NCTX_LOCKS 1024 /* + * Number of mutexes shared among all tdata's. No space is allocated for these + * unless profiling is enabled, so it's okay to over-provision. + */ +#define PROF_NTDATA_LOCKS 256 + +/* * prof_tdata pointers close to NULL are used to encode state information that * is used for cleaning up during thread shutdown. */ @@ -63,141 +66,186 @@ struct prof_bt_s { /* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ typedef struct { prof_bt_t *bt; - unsigned nignore; unsigned max; } prof_unwind_data_t; #endif struct prof_cnt_s { - /* - * Profiling counters. An allocation/deallocation pair can operate on - * different prof_thr_cnt_t objects that are linked into the same - * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go - * negative. In principle it is possible for the *bytes counters to - * overflow/underflow, but a general solution would require something - * like 128-bit counters; this implementation doesn't bother to solve - * that problem. - */ - int64_t curobjs; - int64_t curbytes; + /* Profiling counters. */ + uint64_t curobjs; + uint64_t curbytes; uint64_t accumobjs; uint64_t accumbytes; }; -struct prof_thr_cnt_s { - /* Linkage into prof_ctx_t's cnts_ql. */ - ql_elm(prof_thr_cnt_t) cnts_link; +typedef enum { + prof_tctx_state_initializing, + prof_tctx_state_nominal, + prof_tctx_state_dumping, + prof_tctx_state_purgatory /* Dumper must finish destroying. */ +} prof_tctx_state_t; - /* Linkage into thread's LRU. */ - ql_elm(prof_thr_cnt_t) lru_link; +struct prof_tctx_s { + /* Thread data for thread that performed the allocation. */ + prof_tdata_t *tdata; /* - * Associated context. If a thread frees an object that it did not - * allocate, it is possible that the context is not cached in the - * thread's hash table, in which case it must be able to look up the - * context, insert a new prof_thr_cnt_t into the thread's hash table, - * and link it into the prof_ctx_t's cnts_ql. + * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be + * defunct during teardown. */ - prof_ctx_t *ctx; + uint64_t thr_uid; + uint64_t thr_discrim; + + /* Profiling counters, protected by tdata->lock. */ + prof_cnt_t cnts; + + /* Associated global context. */ + prof_gctx_t *gctx; /* - * Threads use memory barriers to update the counters. Since there is - * only ever one writer, the only challenge is for the reader to get a - * consistent read of the counters. - * - * The writer uses this series of operations: - * - * 1) Increment epoch to an odd number. - * 2) Update counters. - * 3) Increment epoch to an even number. - * - * The reader must assure 1) that the epoch is even while it reads the - * counters, and 2) that the epoch doesn't change between the time it - * starts and finishes reading the counters. + * UID that distinguishes multiple tctx's created by the same thread, + * but coexisting in gctx->tctxs. There are two ways that such + * coexistence can occur: + * - A dumper thread can cause a tctx to be retained in the purgatory + * state. + * - Although a single "producer" thread must create all tctx's which + * share the same thr_uid, multiple "consumers" can each concurrently + * execute portions of prof_tctx_destroy(). prof_tctx_destroy() only + * gets called once each time cnts.cur{objs,bytes} drop to 0, but this + * threshold can be hit again before the first consumer finishes + * executing prof_tctx_destroy(). */ - unsigned epoch; + uint64_t tctx_uid; - /* Profiling counters. */ - prof_cnt_t cnts; -}; + /* Linkage into gctx's tctxs. */ + rb_node(prof_tctx_t) tctx_link; -struct prof_ctx_s { - /* Associated backtrace. */ - prof_bt_t *bt; + /* + * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents + * sample vs destroy race. + */ + bool prepared; + + /* Current dump-related state, protected by gctx->lock. */ + prof_tctx_state_t state; + + /* + * Copy of cnts snapshotted during early dump phase, protected by + * dump_mtx. + */ + prof_cnt_t dump_cnts; +}; +typedef rb_tree(prof_tctx_t) prof_tctx_tree_t; - /* Protects nlimbo, cnt_merged, and cnts_ql. */ +struct prof_gctx_s { + /* Protects nlimbo, cnt_summed, and tctxs. */ malloc_mutex_t *lock; /* - * Number of threads that currently cause this ctx to be in a state of + * Number of threads that currently cause this gctx to be in a state of * limbo due to one of: - * - Initializing per thread counters associated with this ctx. - * - Preparing to destroy this ctx. - * - Dumping a heap profile that includes this ctx. + * - Initializing this gctx. + * - Initializing per thread counters associated with this gctx. + * - Preparing to destroy this gctx. + * - Dumping a heap profile that includes this gctx. * nlimbo must be 1 (single destroyer) in order to safely destroy the - * ctx. + * gctx. */ unsigned nlimbo; - /* Temporary storage for summation during dump. */ - prof_cnt_t cnt_summed; - - /* When threads exit, they merge their stats into cnt_merged. */ - prof_cnt_t cnt_merged; - /* - * List of profile counters, one for each thread that has allocated in + * Tree of profile counters, one for each thread that has allocated in * this context. */ - ql_head(prof_thr_cnt_t) cnts_ql; + prof_tctx_tree_t tctxs; + + /* Linkage for tree of contexts to be dumped. */ + rb_node(prof_gctx_t) dump_link; + + /* Temporary storage for summation during dump. */ + prof_cnt_t cnt_summed; + + /* Associated backtrace. */ + prof_bt_t bt; - /* Linkage for list of contexts to be dumped. */ - ql_elm(prof_ctx_t) dump_link; + /* Backtrace vector, variable size, referred to by bt. */ + void *vec[1]; }; -typedef ql_head(prof_ctx_t) prof_ctx_list_t; +typedef rb_tree(prof_gctx_t) prof_gctx_tree_t; struct prof_tdata_s { + malloc_mutex_t *lock; + + /* Monotonically increasing unique thread identifier. */ + uint64_t thr_uid; + /* - * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread keeps a - * cache of backtraces, with associated thread-specific prof_thr_cnt_t - * objects. Other threads may read the prof_thr_cnt_t contents, but no - * others will ever write them. - * - * Upon thread exit, the thread must merge all the prof_thr_cnt_t - * counter data into the associated prof_ctx_t objects, and unlink/free - * the prof_thr_cnt_t objects. + * Monotonically increasing discriminator among tdata structures + * associated with the same thr_uid. */ - ckh_t bt2cnt; + uint64_t thr_discrim; - /* LRU for contents of bt2cnt. */ - ql_head(prof_thr_cnt_t) lru_ql; + /* Included in heap profile dumps if non-NULL. */ + char *thread_name; - /* Backtrace vector, used for calls to prof_backtrace(). */ - void **vec; + bool attached; + bool expired; + + rb_node(prof_tdata_t) tdata_link; + + /* + * Counter used to initialize prof_tctx_t's tctx_uid. No locking is + * necessary when incrementing this field, because only one thread ever + * does so. + */ + uint64_t tctx_uid_next; + + /* + * Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks + * backtraces for which it has non-zero allocation/deallocation counters + * associated with thread-specific prof_tctx_t objects. Other threads + * may write to prof_tctx_t contents when freeing associated objects. + */ + ckh_t bt2tctx; /* Sampling state. */ uint64_t prng_state; - uint64_t threshold; - uint64_t accum; + uint64_t bytes_until_sample; /* State used to avoid dumping while operating on prof internals. */ bool enq; bool enq_idump; bool enq_gdump; + + /* + * Set to true during an early dump phase for tdata's which are + * currently being dumped. New threads' tdata's have this initialized + * to false so that they aren't accidentally included in later dump + * phases. + */ + bool dumping; + + /* + * True if profiling is active for this tdata's thread + * (thread.prof.active mallctl). + */ + bool active; + + /* Temporary storage for summation during dump. */ + prof_cnt_t cnt_summed; + + /* Backtrace vector, used for calls to prof_backtrace(). */ + void *vec[PROF_BT_MAX]; }; +typedef rb_tree(prof_tdata_t) prof_tdata_tree_t; #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS extern bool opt_prof; -/* - * Even if opt_prof is true, sampling can be temporarily disabled by setting - * opt_prof_active to false. No locking is used when updating opt_prof_active, - * so there are no guarantees regarding how long it will take for all threads - * to notice state changes. - */ extern bool opt_prof_active; +extern bool opt_prof_thread_active_init; extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ extern bool opt_prof_gdump; /* High-water memory dumping. */ @@ -211,6 +259,12 @@ extern char opt_prof_prefix[ #endif 1]; +/* Accessed via prof_active_[gs]et{_unlocked,}(). */ +extern bool prof_active; + +/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */ +extern bool prof_gdump_val; + /* * Profile dump interval, measured in bytes allocated. Each arena triggers a * profile dump when it reaches this threshold. The effect is that the @@ -221,391 +275,269 @@ extern char opt_prof_prefix[ extern uint64_t prof_interval; /* - * If true, promote small sampled objects to large objects, since small run - * headers do not have embedded profile context pointers. + * Initialized as opt_lg_prof_sample, and potentially modified during profiling + * resets. */ -extern bool prof_promote; +extern size_t lg_prof_sample; +void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated); +void prof_malloc_sample_object(const void *ptr, size_t usize, + prof_tctx_t *tctx); +void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx); void bt_init(prof_bt_t *bt, void **vec); -void prof_backtrace(prof_bt_t *bt, unsigned nignore); -prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); +void prof_backtrace(prof_bt_t *bt); +prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt); #ifdef JEMALLOC_JET +size_t prof_tdata_count(void); size_t prof_bt_count(void); +const prof_cnt_t *prof_cnt_all(void); typedef int (prof_dump_open_t)(bool, const char *); extern prof_dump_open_t *prof_dump_open; +typedef bool (prof_dump_header_t)(bool, const prof_cnt_t *); +extern prof_dump_header_t *prof_dump_header; #endif void prof_idump(void); bool prof_mdump(const char *filename); void prof_gdump(void); -prof_tdata_t *prof_tdata_init(void); -void prof_tdata_cleanup(void *arg); +prof_tdata_t *prof_tdata_init(tsd_t *tsd); +prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata); +void prof_reset(tsd_t *tsd, size_t lg_sample); +void prof_tdata_cleanup(tsd_t *tsd); +const char *prof_thread_name_get(void); +bool prof_active_get(void); +bool prof_active_set(bool active); +int prof_thread_name_set(tsd_t *tsd, const char *thread_name); +bool prof_thread_active_get(void); +bool prof_thread_active_set(bool active); +bool prof_thread_active_init_get(void); +bool prof_thread_active_init_set(bool active_init); +bool prof_gdump_get(void); +bool prof_gdump_set(bool active); void prof_boot0(void); void prof_boot1(void); bool prof_boot2(void); void prof_prefork(void); void prof_postfork_parent(void); void prof_postfork_child(void); +void prof_sample_threshold_update(prof_tdata_t *tdata); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES -#define PROF_ALLOC_PREP(nignore, size, ret) do { \ - prof_tdata_t *prof_tdata; \ - prof_bt_t bt; \ - \ - assert(size == s2u(size)); \ - \ - prof_tdata = prof_tdata_get(true); \ - if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) { \ - if (prof_tdata != NULL) \ - ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ - else \ - ret = NULL; \ - break; \ - } \ - \ - if (opt_prof_active == false) { \ - /* Sampling is currently inactive, so avoid sampling. */\ - ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ - } else if (opt_lg_prof_sample == 0) { \ - /* Don't bother with sampling logic, since sampling */\ - /* interval is 1. */\ - bt_init(&bt, prof_tdata->vec); \ - prof_backtrace(&bt, nignore); \ - ret = prof_lookup(&bt); \ - } else { \ - if (prof_tdata->threshold == 0) { \ - /* Initialize. Seed the prng differently for */\ - /* each thread. */\ - prof_tdata->prng_state = \ - (uint64_t)(uintptr_t)&size; \ - prof_sample_threshold_update(prof_tdata); \ - } \ - \ - /* Determine whether to capture a backtrace based on */\ - /* whether size is enough for prof_accum to reach */\ - /* prof_tdata->threshold. However, delay updating */\ - /* these variables until prof_{m,re}alloc(), because */\ - /* we don't know for sure that the allocation will */\ - /* succeed. */\ - /* */\ - /* Use subtraction rather than addition to avoid */\ - /* potential integer overflow. */\ - if (size >= prof_tdata->threshold - \ - prof_tdata->accum) { \ - bt_init(&bt, prof_tdata->vec); \ - prof_backtrace(&bt, nignore); \ - ret = prof_lookup(&bt); \ - } else \ - ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ - } \ -} while (0) - #ifndef JEMALLOC_ENABLE_INLINE -malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *) - -prof_tdata_t *prof_tdata_get(bool create); -void prof_sample_threshold_update(prof_tdata_t *prof_tdata); -prof_ctx_t *prof_ctx_get(const void *ptr); -void prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx); -bool prof_sample_accum_update(size_t size); -void prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt); -void prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt, - size_t old_usize, prof_ctx_t *old_ctx); -void prof_free(const void *ptr, size_t size); +bool prof_active_get_unlocked(void); +bool prof_gdump_get_unlocked(void); +prof_tdata_t *prof_tdata_get(tsd_t *tsd, bool create); +bool prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit, + prof_tdata_t **tdata_out); +prof_tctx_t *prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, + bool update); +prof_tctx_t *prof_tctx_get(const void *ptr); +void prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx); +void prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr, + prof_tctx_t *tctx); +void prof_malloc_sample_object(const void *ptr, size_t usize, + prof_tctx_t *tctx); +void prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx); +void prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, + prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr, + size_t old_usize, prof_tctx_t *old_tctx); +void prof_free(tsd_t *tsd, const void *ptr, size_t usize); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) -/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */ -malloc_tsd_externs(prof_tdata, prof_tdata_t *) -malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL, - prof_tdata_cleanup) +JEMALLOC_ALWAYS_INLINE bool +prof_active_get_unlocked(void) +{ + + /* + * Even if opt_prof is true, sampling can be temporarily disabled by + * setting prof_active to false. No locking is used when reading + * prof_active in the fast path, so there are no guarantees regarding + * how long it will take for all threads to notice state changes. + */ + return (prof_active); +} -JEMALLOC_INLINE prof_tdata_t * -prof_tdata_get(bool create) +JEMALLOC_ALWAYS_INLINE bool +prof_gdump_get_unlocked(void) { - prof_tdata_t *prof_tdata; + + /* + * No locking is used when reading prof_gdump_val in the fast path, so + * there are no guarantees regarding how long it will take for all + * threads to notice state changes. + */ + return (prof_gdump_val); +} + +JEMALLOC_ALWAYS_INLINE prof_tdata_t * +prof_tdata_get(tsd_t *tsd, bool create) +{ + prof_tdata_t *tdata; cassert(config_prof); - prof_tdata = *prof_tdata_tsd_get(); - if (create && prof_tdata == NULL) - prof_tdata = prof_tdata_init(); + tdata = tsd_prof_tdata_get(tsd); + if (create) { + if (unlikely(tdata == NULL)) { + if (tsd_nominal(tsd)) { + tdata = prof_tdata_init(tsd); + tsd_prof_tdata_set(tsd, tdata); + } + } else if (unlikely(tdata->expired)) { + tdata = prof_tdata_reinit(tsd, tdata); + tsd_prof_tdata_set(tsd, tdata); + } + assert(tdata == NULL || tdata->attached); + } - return (prof_tdata); + return (tdata); } -JEMALLOC_INLINE void -prof_sample_threshold_update(prof_tdata_t *prof_tdata) +JEMALLOC_ALWAYS_INLINE prof_tctx_t * +prof_tctx_get(const void *ptr) { - /* - * The body of this function is compiled out unless heap profiling is - * enabled, so that it is possible to compile jemalloc with floating - * point support completely disabled. Avoiding floating point code is - * important on memory-constrained systems, but it also enables a - * workaround for versions of glibc that don't properly save/restore - * floating point registers during dynamic lazy symbol loading (which - * internally calls into whatever malloc implementation happens to be - * integrated into the application). Note that some compilers (e.g. - * gcc 4.8) may use floating point registers for fast memory moves, so - * jemalloc must be compiled with such optimizations disabled (e.g. - * -mno-sse) in order for the workaround to be complete. - */ -#ifdef JEMALLOC_PROF - uint64_t r; - double u; cassert(config_prof); + assert(ptr != NULL); - /* - * Compute sample threshold as a geometrically distributed random - * variable with mean (2^opt_lg_prof_sample). - * - * __ __ - * | log(u) | 1 - * prof_tdata->threshold = | -------- |, where p = ------------------- - * | log(1-p) | opt_lg_prof_sample - * 2 - * - * For more information on the math, see: - * - * Non-Uniform Random Variate Generation - * Luc Devroye - * Springer-Verlag, New York, 1986 - * pp 500 - * (http://luc.devroye.org/rnbookindex.html) - */ - prng64(r, 53, prof_tdata->prng_state, - UINT64_C(6364136223846793005), UINT64_C(1442695040888963407)); - u = (double)r * (1.0/9007199254740992.0L); - prof_tdata->threshold = (uint64_t)(log(u) / - log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample)))) - + (uint64_t)1U; -#endif + return (arena_prof_tctx_get(ptr)); } -JEMALLOC_INLINE prof_ctx_t * -prof_ctx_get(const void *ptr) +JEMALLOC_ALWAYS_INLINE void +prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx) { - prof_ctx_t *ret; - arena_chunk_t *chunk; cassert(config_prof); assert(ptr != NULL); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) { - /* Region. */ - ret = arena_prof_ctx_get(ptr); - } else - ret = huge_prof_ctx_get(ptr); - - return (ret); + arena_prof_tctx_set(ptr, usize, tctx); } -JEMALLOC_INLINE void -prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx) +JEMALLOC_ALWAYS_INLINE void +prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr, + prof_tctx_t *old_tctx) { - arena_chunk_t *chunk; cassert(config_prof); assert(ptr != NULL); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) { - /* Region. */ - arena_prof_ctx_set(ptr, usize, ctx); - } else - huge_prof_ctx_set(ptr, ctx); + arena_prof_tctx_reset(ptr, usize, old_ptr, old_tctx); } -JEMALLOC_INLINE bool -prof_sample_accum_update(size_t size) +JEMALLOC_ALWAYS_INLINE bool +prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, + prof_tdata_t **tdata_out) { - prof_tdata_t *prof_tdata; + prof_tdata_t *tdata; cassert(config_prof); - /* Sampling logic is unnecessary if the interval is 1. */ - assert(opt_lg_prof_sample != 0); - prof_tdata = prof_tdata_get(false); - if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + tdata = prof_tdata_get(tsd, true); + if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + tdata = NULL; + + if (tdata_out != NULL) + *tdata_out = tdata; + + if (tdata == NULL) return (true); - /* Take care to avoid integer overflow. */ - if (size >= prof_tdata->threshold - prof_tdata->accum) { - prof_tdata->accum -= (prof_tdata->threshold - size); - /* Compute new sample threshold. */ - prof_sample_threshold_update(prof_tdata); - while (prof_tdata->accum >= prof_tdata->threshold) { - prof_tdata->accum -= prof_tdata->threshold; - prof_sample_threshold_update(prof_tdata); - } - return (false); - } else { - prof_tdata->accum += size; + if (tdata->bytes_until_sample >= usize) { + if (update) + tdata->bytes_until_sample -= usize; return (true); + } else { + /* Compute new sample threshold. */ + if (update) + prof_sample_threshold_update(tdata); + return (!tdata->active); } } -JEMALLOC_INLINE void -prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt) +JEMALLOC_ALWAYS_INLINE prof_tctx_t * +prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) +{ + prof_tctx_t *ret; + prof_tdata_t *tdata; + prof_bt_t bt; + + assert(usize == s2u(usize)); + + if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update, + &tdata))) + ret = (prof_tctx_t *)(uintptr_t)1U; + else { + bt_init(&bt, tdata->vec); + prof_backtrace(&bt); + ret = prof_lookup(tsd, &bt); + } + + return (ret); +} + +JEMALLOC_ALWAYS_INLINE void +prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx) { cassert(config_prof); assert(ptr != NULL); assert(usize == isalloc(ptr, true)); - if (opt_lg_prof_sample != 0) { - if (prof_sample_accum_update(usize)) { - /* - * Don't sample. For malloc()-like allocation, it is - * always possible to tell in advance how large an - * object's usable size will be, so there should never - * be a difference between the usize passed to - * PROF_ALLOC_PREP() and prof_malloc(). - */ - assert((uintptr_t)cnt == (uintptr_t)1U); - } - } - - if ((uintptr_t)cnt > (uintptr_t)1U) { - prof_ctx_set(ptr, usize, cnt->ctx); - - cnt->epoch++; - /*********/ - mb_write(); - /*********/ - cnt->cnts.curobjs++; - cnt->cnts.curbytes += usize; - if (opt_prof_accum) { - cnt->cnts.accumobjs++; - cnt->cnts.accumbytes += usize; - } - /*********/ - mb_write(); - /*********/ - cnt->epoch++; - /*********/ - mb_write(); - /*********/ - } else - prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U); + if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) + prof_malloc_sample_object(ptr, usize, tctx); + else + prof_tctx_set(ptr, usize, (prof_tctx_t *)(uintptr_t)1U); } -JEMALLOC_INLINE void -prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt, - size_t old_usize, prof_ctx_t *old_ctx) +JEMALLOC_ALWAYS_INLINE void +prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, + bool prof_active, bool updated, const void *old_ptr, size_t old_usize, + prof_tctx_t *old_tctx) { - prof_thr_cnt_t *told_cnt; + bool sampled, old_sampled; cassert(config_prof); - assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U); + assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U); - if (ptr != NULL) { + if (prof_active && !updated && ptr != NULL) { assert(usize == isalloc(ptr, true)); - if (opt_lg_prof_sample != 0) { - if (prof_sample_accum_update(usize)) { - /* - * Don't sample. The usize passed to - * PROF_ALLOC_PREP() was larger than what - * actually got allocated, so a backtrace was - * captured for this allocation, even though - * its actual usize was insufficient to cross - * the sample threshold. - */ - cnt = (prof_thr_cnt_t *)(uintptr_t)1U; - } - } - } - - if ((uintptr_t)old_ctx > (uintptr_t)1U) { - told_cnt = prof_lookup(old_ctx->bt); - if (told_cnt == NULL) { + if (prof_sample_accum_update(tsd, usize, true, NULL)) { /* - * It's too late to propagate OOM for this realloc(), - * so operate directly on old_cnt->ctx->cnt_merged. + * Don't sample. The usize passed to prof_alloc_prep() + * was larger than what actually got allocated, so a + * backtrace was captured for this allocation, even + * though its actual usize was insufficient to cross the + * sample threshold. */ - malloc_mutex_lock(old_ctx->lock); - old_ctx->cnt_merged.curobjs--; - old_ctx->cnt_merged.curbytes -= old_usize; - malloc_mutex_unlock(old_ctx->lock); - told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; + tctx = (prof_tctx_t *)(uintptr_t)1U; } - } else - told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; - - if ((uintptr_t)told_cnt > (uintptr_t)1U) - told_cnt->epoch++; - if ((uintptr_t)cnt > (uintptr_t)1U) { - prof_ctx_set(ptr, usize, cnt->ctx); - cnt->epoch++; - } else if (ptr != NULL) - prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U); - /*********/ - mb_write(); - /*********/ - if ((uintptr_t)told_cnt > (uintptr_t)1U) { - told_cnt->cnts.curobjs--; - told_cnt->cnts.curbytes -= old_usize; } - if ((uintptr_t)cnt > (uintptr_t)1U) { - cnt->cnts.curobjs++; - cnt->cnts.curbytes += usize; - if (opt_prof_accum) { - cnt->cnts.accumobjs++; - cnt->cnts.accumbytes += usize; - } - } - /*********/ - mb_write(); - /*********/ - if ((uintptr_t)told_cnt > (uintptr_t)1U) - told_cnt->epoch++; - if ((uintptr_t)cnt > (uintptr_t)1U) - cnt->epoch++; - /*********/ - mb_write(); /* Not strictly necessary. */ + + sampled = ((uintptr_t)tctx > (uintptr_t)1U); + old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U); + + if (unlikely(sampled)) + prof_malloc_sample_object(ptr, usize, tctx); + else + prof_tctx_reset(ptr, usize, old_ptr, old_tctx); + + if (unlikely(old_sampled)) + prof_free_sampled_object(tsd, old_usize, old_tctx); } -JEMALLOC_INLINE void -prof_free(const void *ptr, size_t size) +JEMALLOC_ALWAYS_INLINE void +prof_free(tsd_t *tsd, const void *ptr, size_t usize) { - prof_ctx_t *ctx = prof_ctx_get(ptr); + prof_tctx_t *tctx = prof_tctx_get(ptr); cassert(config_prof); + assert(usize == isalloc(ptr, true)); - if ((uintptr_t)ctx > (uintptr_t)1) { - prof_thr_cnt_t *tcnt; - assert(size == isalloc(ptr, true)); - tcnt = prof_lookup(ctx->bt); - - if (tcnt != NULL) { - tcnt->epoch++; - /*********/ - mb_write(); - /*********/ - tcnt->cnts.curobjs--; - tcnt->cnts.curbytes -= size; - /*********/ - mb_write(); - /*********/ - tcnt->epoch++; - /*********/ - mb_write(); - /*********/ - } else { - /* - * OOM during free() cannot be propagated, so operate - * directly on cnt->ctx->cnt_merged. - */ - malloc_mutex_lock(ctx->lock); - ctx->cnt_merged.curobjs--; - ctx->cnt_merged.curbytes -= size; - malloc_mutex_unlock(ctx->lock); - } - } + if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) + prof_free_sampled_object(tsd, usize, tctx); } #endif diff --git a/dep/jemalloc/include/jemalloc/internal/ql.h b/dep/jemalloc/include/jemalloc/internal/ql.h index f70c5f6f391..1834bb8557a 100644 --- a/dep/jemalloc/include/jemalloc/internal/ql.h +++ b/dep/jemalloc/include/jemalloc/internal/ql.h @@ -1,6 +1,4 @@ -/* - * List definitions. - */ +/* List definitions. */ #define ql_head(a_type) \ struct { \ a_type *qlh_first; \ diff --git a/dep/jemalloc/include/jemalloc/internal/qr.h b/dep/jemalloc/include/jemalloc/internal/qr.h index 602944b9b4f..0fbaec25e7c 100644 --- a/dep/jemalloc/include/jemalloc/internal/qr.h +++ b/dep/jemalloc/include/jemalloc/internal/qr.h @@ -40,8 +40,10 @@ struct { \ (a_qr_b)->a_field.qre_prev = t; \ } while (0) -/* qr_meld() and qr_split() are functionally equivalent, so there's no need to - * have two copies of the code. */ +/* + * qr_meld() and qr_split() are functionally equivalent, so there's no need to + * have two copies of the code. + */ #define qr_split(a_qr_a, a_qr_b, a_field) \ qr_meld((a_qr_a), (a_qr_b), a_field) diff --git a/dep/jemalloc/include/jemalloc/internal/quarantine.h b/dep/jemalloc/include/jemalloc/internal/quarantine.h index 16f677f73da..ae607399f6d 100644 --- a/dep/jemalloc/include/jemalloc/internal/quarantine.h +++ b/dep/jemalloc/include/jemalloc/internal/quarantine.h @@ -29,36 +29,29 @@ struct quarantine_s { /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -quarantine_t *quarantine_init(size_t lg_maxobjs); -void quarantine(void *ptr); -void quarantine_cleanup(void *arg); -bool quarantine_boot(void); +void quarantine_alloc_hook_work(tsd_t *tsd); +void quarantine(tsd_t *tsd, void *ptr); +void quarantine_cleanup(tsd_t *tsd); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -malloc_tsd_protos(JEMALLOC_ATTR(unused), quarantine, quarantine_t *) - void quarantine_alloc_hook(void); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_QUARANTINE_C_)) -malloc_tsd_externs(quarantine, quarantine_t *) -malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, quarantine, quarantine_t *, NULL, - quarantine_cleanup) - JEMALLOC_ALWAYS_INLINE void quarantine_alloc_hook(void) { - quarantine_t *quarantine; + tsd_t *tsd; assert(config_fill && opt_quarantine); - quarantine = *quarantine_tsd_get(); - if (quarantine == NULL) - quarantine_init(LG_MAXOBJS_INIT); + tsd = tsd_fetch(); + if (tsd_quarantine_get(tsd) == NULL) + quarantine_alloc_hook_work(tsd); } #endif diff --git a/dep/jemalloc/include/jemalloc/internal/rb.h b/dep/jemalloc/include/jemalloc/internal/rb.h index 423802eb2dc..2ca8e5933b2 100644 --- a/dep/jemalloc/include/jemalloc/internal/rb.h +++ b/dep/jemalloc/include/jemalloc/internal/rb.h @@ -158,6 +158,8 @@ struct { \ #define rb_proto(a_attr, a_prefix, a_rbt_type, a_type) \ a_attr void \ a_prefix##new(a_rbt_type *rbtree); \ +a_attr bool \ +a_prefix##empty(a_rbt_type *rbtree); \ a_attr a_type * \ a_prefix##first(a_rbt_type *rbtree); \ a_attr a_type * \ @@ -198,7 +200,7 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ * int (a_cmp *)(a_type *a_node, a_type *a_other); * ^^^^^^ * or a_key - * Interpretation of comparision function return values: + * Interpretation of comparison function return values: * -1 : a_node < a_other * 0 : a_node == a_other * 1 : a_node > a_other @@ -224,6 +226,13 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ * Args: * tree: Pointer to an uninitialized red-black tree object. * + * static bool + * ex_empty(ex_t *tree); + * Description: Determine whether tree is empty. + * Args: + * tree: Pointer to an initialized red-black tree object. + * Ret: True if tree is empty, false otherwise. + * * static ex_node_t * * ex_first(ex_t *tree); * static ex_node_t * @@ -309,6 +318,10 @@ a_attr void \ a_prefix##new(a_rbt_type *rbtree) { \ rb_new(a_type, a_field, rbtree); \ } \ +a_attr bool \ +a_prefix##empty(a_rbt_type *rbtree) { \ + return (rbtree->rbt_root == &rbtree->rbt_nil); \ +} \ a_attr a_type * \ a_prefix##first(a_rbt_type *rbtree) { \ a_type *ret; \ @@ -580,7 +593,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ if (left != &rbtree->rbt_nil) { \ /* node has no successor, but it has a left child. */\ /* Splice node out, without losing the left child. */\ - assert(rbtn_red_get(a_type, a_field, node) == false); \ + assert(!rbtn_red_get(a_type, a_field, node)); \ assert(rbtn_red_get(a_type, a_field, left)); \ rbtn_black_set(a_type, a_field, left); \ if (pathp == path) { \ @@ -616,8 +629,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ if (pathp->cmp < 0) { \ rbtn_left_set(a_type, a_field, pathp->node, \ pathp[1].node); \ - assert(rbtn_red_get(a_type, a_field, pathp[1].node) \ - == false); \ + assert(!rbtn_red_get(a_type, a_field, pathp[1].node)); \ if (rbtn_red_get(a_type, a_field, pathp->node)) { \ a_type *right = rbtn_right_get(a_type, a_field, \ pathp->node); \ @@ -681,7 +693,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ rbtn_rotate_left(a_type, a_field, pathp->node, \ tnode); \ /* Balance restored, but rotation modified */\ - /* subree root, which may actually be the tree */\ + /* subtree root, which may actually be the tree */\ /* root. */\ if (pathp == path) { \ /* Set root. */ \ @@ -849,7 +861,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ } \ /* Set root. */ \ rbtree->rbt_root = path->node; \ - assert(rbtn_red_get(a_type, a_field, rbtree->rbt_root) == false); \ + assert(!rbtn_red_get(a_type, a_field, rbtree->rbt_root)); \ } \ a_attr a_type * \ a_prefix##iter_recurse(a_rbt_type *rbtree, a_type *node, \ diff --git a/dep/jemalloc/include/jemalloc/internal/rtree.h b/dep/jemalloc/include/jemalloc/internal/rtree.h index bc74769f50e..28ae9d1dd2d 100644 --- a/dep/jemalloc/include/jemalloc/internal/rtree.h +++ b/dep/jemalloc/include/jemalloc/internal/rtree.h @@ -1,170 +1,292 @@ /* * This radix tree implementation is tailored to the singular purpose of - * tracking which chunks are currently owned by jemalloc. This functionality - * is mandatory for OS X, where jemalloc must be able to respond to object - * ownership queries. + * associating metadata with chunks that are currently owned by jemalloc. * ******************************************************************************* */ #ifdef JEMALLOC_H_TYPES +typedef struct rtree_node_elm_s rtree_node_elm_t; +typedef struct rtree_level_s rtree_level_t; typedef struct rtree_s rtree_t; /* - * Size of each radix tree node (must be a power of 2). This impacts tree - * depth. + * RTREE_BITS_PER_LEVEL must be a power of two that is no larger than the + * machine address width. */ -#define RTREE_NODESIZE (1U << 16) +#define LG_RTREE_BITS_PER_LEVEL 4 +#define RTREE_BITS_PER_LEVEL (ZU(1) << LG_RTREE_BITS_PER_LEVEL) +#define RTREE_HEIGHT_MAX \ + ((ZU(1) << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL) -typedef void *(rtree_alloc_t)(size_t); -typedef void (rtree_dalloc_t)(void *); +/* Used for two-stage lock-free node initialization. */ +#define RTREE_NODE_INITIALIZING ((rtree_node_elm_t *)0x1) + +/* + * The node allocation callback function's argument is the number of contiguous + * rtree_node_elm_t structures to allocate, and the resulting memory must be + * zeroed. + */ +typedef rtree_node_elm_t *(rtree_node_alloc_t)(size_t); +typedef void (rtree_node_dalloc_t)(rtree_node_elm_t *); #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS +struct rtree_node_elm_s { + union { + void *pun; + rtree_node_elm_t *child; + extent_node_t *val; + }; +}; + +struct rtree_level_s { + /* + * A non-NULL subtree points to a subtree rooted along the hypothetical + * path to the leaf node corresponding to key 0. Depending on what keys + * have been used to store to the tree, an arbitrary combination of + * subtree pointers may remain NULL. + * + * Suppose keys comprise 48 bits, and LG_RTREE_BITS_PER_LEVEL is 4. + * This results in a 3-level tree, and the leftmost leaf can be directly + * accessed via subtrees[2], the subtree prefixed by 0x0000 (excluding + * 0x00000000) can be accessed via subtrees[1], and the remainder of the + * tree can be accessed via subtrees[0]. + * + * levels[0] : [<unused> | 0x0001******** | 0x0002******** | ...] + * + * levels[1] : [<unused> | 0x00000001**** | 0x00000002**** | ... ] + * + * levels[2] : [val(0x000000000000) | val(0x000000000001) | ...] + * + * This has practical implications on x64, which currently uses only the + * lower 47 bits of virtual address space in userland, thus leaving + * subtrees[0] unused and avoiding a level of tree traversal. + */ + union { + void *subtree_pun; + rtree_node_elm_t *subtree; + }; + /* Number of key bits distinguished by this level. */ + unsigned bits; + /* + * Cumulative number of key bits distinguished by traversing to + * corresponding tree level. + */ + unsigned cumbits; +}; + struct rtree_s { - rtree_alloc_t *alloc; - rtree_dalloc_t *dalloc; - malloc_mutex_t mutex; - void **root; - unsigned height; - unsigned level2bits[1]; /* Dynamically sized. */ + rtree_node_alloc_t *alloc; + rtree_node_dalloc_t *dalloc; + unsigned height; + /* + * Precomputed table used to convert from the number of leading 0 key + * bits to which subtree level to start at. + */ + unsigned start_level[RTREE_HEIGHT_MAX]; + rtree_level_t levels[RTREE_HEIGHT_MAX]; }; #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -rtree_t *rtree_new(unsigned bits, rtree_alloc_t *alloc, rtree_dalloc_t *dalloc); +bool rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc, + rtree_node_dalloc_t *dalloc); void rtree_delete(rtree_t *rtree); -void rtree_prefork(rtree_t *rtree); -void rtree_postfork_parent(rtree_t *rtree); -void rtree_postfork_child(rtree_t *rtree); +rtree_node_elm_t *rtree_subtree_read_hard(rtree_t *rtree, + unsigned level); +rtree_node_elm_t *rtree_child_read_hard(rtree_t *rtree, + rtree_node_elm_t *elm, unsigned level); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -#ifdef JEMALLOC_DEBUG -uint8_t rtree_get_locked(rtree_t *rtree, uintptr_t key); -#endif -uint8_t rtree_get(rtree_t *rtree, uintptr_t key); -bool rtree_set(rtree_t *rtree, uintptr_t key, uint8_t val); +unsigned rtree_start_level(rtree_t *rtree, uintptr_t key); +uintptr_t rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level); + +bool rtree_node_valid(rtree_node_elm_t *node); +rtree_node_elm_t *rtree_child_tryread(rtree_node_elm_t *elm); +rtree_node_elm_t *rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, + unsigned level); +extent_node_t *rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm, + bool dependent); +void rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, + const extent_node_t *val); +rtree_node_elm_t *rtree_subtree_tryread(rtree_t *rtree, unsigned level); +rtree_node_elm_t *rtree_subtree_read(rtree_t *rtree, unsigned level); + +extent_node_t *rtree_get(rtree_t *rtree, uintptr_t key, bool dependent); +bool rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_)) -#define RTREE_GET_GENERATE(f) \ -/* The least significant bits of the key are ignored. */ \ -JEMALLOC_INLINE uint8_t \ -f(rtree_t *rtree, uintptr_t key) \ -{ \ - uint8_t ret; \ - uintptr_t subkey; \ - unsigned i, lshift, height, bits; \ - void **node, **child; \ - \ - RTREE_LOCK(&rtree->mutex); \ - for (i = lshift = 0, height = rtree->height, node = rtree->root;\ - i < height - 1; \ - i++, lshift += bits, node = child) { \ - bits = rtree->level2bits[i]; \ - subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR + \ - 3)) - bits); \ - child = (void**)node[subkey]; \ - if (child == NULL) { \ - RTREE_UNLOCK(&rtree->mutex); \ - return (0); \ - } \ - } \ - \ - /* \ - * node is a leaf, so it contains values rather than node \ - * pointers. \ - */ \ - bits = rtree->level2bits[i]; \ - subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - \ - bits); \ - { \ - uint8_t *leaf = (uint8_t *)node; \ - ret = leaf[subkey]; \ - } \ - RTREE_UNLOCK(&rtree->mutex); \ - \ - RTREE_GET_VALIDATE \ - return (ret); \ +JEMALLOC_INLINE unsigned +rtree_start_level(rtree_t *rtree, uintptr_t key) +{ + unsigned start_level; + + if (unlikely(key == 0)) + return (rtree->height - 1); + + start_level = rtree->start_level[lg_floor(key) >> + LG_RTREE_BITS_PER_LEVEL]; + assert(start_level < rtree->height); + return (start_level); } -#ifdef JEMALLOC_DEBUG -# define RTREE_LOCK(l) malloc_mutex_lock(l) -# define RTREE_UNLOCK(l) malloc_mutex_unlock(l) -# define RTREE_GET_VALIDATE -RTREE_GET_GENERATE(rtree_get_locked) -# undef RTREE_LOCK -# undef RTREE_UNLOCK -# undef RTREE_GET_VALIDATE -#endif +JEMALLOC_INLINE uintptr_t +rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level) +{ -#define RTREE_LOCK(l) -#define RTREE_UNLOCK(l) -#ifdef JEMALLOC_DEBUG - /* - * Suppose that it were possible for a jemalloc-allocated chunk to be - * munmap()ped, followed by a different allocator in another thread re-using - * overlapping virtual memory, all without invalidating the cached rtree - * value. The result would be a false positive (the rtree would claim that - * jemalloc owns memory that it had actually discarded). This scenario - * seems impossible, but the following assertion is a prudent sanity check. - */ -# define RTREE_GET_VALIDATE \ - assert(rtree_get_locked(rtree, key) == ret); -#else -# define RTREE_GET_VALIDATE -#endif -RTREE_GET_GENERATE(rtree_get) -#undef RTREE_LOCK -#undef RTREE_UNLOCK -#undef RTREE_GET_VALIDATE + return ((key >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - + rtree->levels[level].cumbits)) & ((ZU(1) << + rtree->levels[level].bits) - 1)); +} JEMALLOC_INLINE bool -rtree_set(rtree_t *rtree, uintptr_t key, uint8_t val) +rtree_node_valid(rtree_node_elm_t *node) +{ + + return ((uintptr_t)node > (uintptr_t)RTREE_NODE_INITIALIZING); +} + +JEMALLOC_INLINE rtree_node_elm_t * +rtree_child_tryread(rtree_node_elm_t *elm) +{ + rtree_node_elm_t *child; + + /* Double-checked read (first read may be stale. */ + child = elm->child; + if (!rtree_node_valid(child)) + child = atomic_read_p(&elm->pun); + return (child); +} + +JEMALLOC_INLINE rtree_node_elm_t * +rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level) +{ + rtree_node_elm_t *child; + + child = rtree_child_tryread(elm); + if (unlikely(!rtree_node_valid(child))) + child = rtree_child_read_hard(rtree, elm, level); + return (child); +} + +JEMALLOC_INLINE extent_node_t * +rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm, bool dependent) +{ + + if (dependent) { + /* + * Reading a val on behalf of a pointer to a valid allocation is + * guaranteed to be a clean read even without synchronization, + * because the rtree update became visible in memory before the + * pointer came into existence. + */ + return (elm->val); + } else { + /* + * An arbitrary read, e.g. on behalf of ivsalloc(), may not be + * dependent on a previous rtree write, which means a stale read + * could result if synchronization were omitted here. + */ + return (atomic_read_p(&elm->pun)); + } +} + +JEMALLOC_INLINE void +rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, const extent_node_t *val) +{ + + atomic_write_p(&elm->pun, val); +} + +JEMALLOC_INLINE rtree_node_elm_t * +rtree_subtree_tryread(rtree_t *rtree, unsigned level) +{ + rtree_node_elm_t *subtree; + + /* Double-checked read (first read may be stale. */ + subtree = rtree->levels[level].subtree; + if (!rtree_node_valid(subtree)) + subtree = atomic_read_p(&rtree->levels[level].subtree_pun); + return (subtree); +} + +JEMALLOC_INLINE rtree_node_elm_t * +rtree_subtree_read(rtree_t *rtree, unsigned level) +{ + rtree_node_elm_t *subtree; + + subtree = rtree_subtree_tryread(rtree, level); + if (unlikely(!rtree_node_valid(subtree))) + subtree = rtree_subtree_read_hard(rtree, level); + return (subtree); +} + +JEMALLOC_INLINE extent_node_t * +rtree_get(rtree_t *rtree, uintptr_t key, bool dependent) { uintptr_t subkey; - unsigned i, lshift, height, bits; - void **node, **child; - - malloc_mutex_lock(&rtree->mutex); - for (i = lshift = 0, height = rtree->height, node = rtree->root; - i < height - 1; - i++, lshift += bits, node = child) { - bits = rtree->level2bits[i]; - subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - - bits); - child = (void**)node[subkey]; - if (child == NULL) { - size_t size = ((i + 1 < height - 1) ? sizeof(void *) - : (sizeof(uint8_t))) << rtree->level2bits[i+1]; - child = (void**)rtree->alloc(size); - if (child == NULL) { - malloc_mutex_unlock(&rtree->mutex); - return (true); - } - memset(child, 0, size); - node[subkey] = child; + unsigned i, start_level; + rtree_node_elm_t *node, *child; + + start_level = rtree_start_level(rtree, key); + + for (i = start_level, node = rtree_subtree_tryread(rtree, start_level); + /**/; i++, node = child) { + if (!dependent && unlikely(!rtree_node_valid(node))) + return (NULL); + subkey = rtree_subkey(rtree, key, i); + if (i == rtree->height - 1) { + /* + * node is a leaf, so it contains values rather than + * child pointers. + */ + return (rtree_val_read(rtree, &node[subkey], + dependent)); } + assert(i < rtree->height - 1); + child = rtree_child_tryread(&node[subkey]); } + not_reached(); +} - /* node is a leaf, so it contains values rather than node pointers. */ - bits = rtree->level2bits[i]; - subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - bits); - { - uint8_t *leaf = (uint8_t *)node; - leaf[subkey] = val; - } - malloc_mutex_unlock(&rtree->mutex); +JEMALLOC_INLINE bool +rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val) +{ + uintptr_t subkey; + unsigned i, start_level; + rtree_node_elm_t *node, *child; - return (false); + start_level = rtree_start_level(rtree, key); + + node = rtree_subtree_read(rtree, start_level); + if (node == NULL) + return (true); + for (i = start_level; /**/; i++, node = child) { + subkey = rtree_subkey(rtree, key, i); + if (i == rtree->height - 1) { + /* + * node is a leaf, so it contains values rather than + * child pointers. + */ + rtree_val_write(rtree, &node[subkey], val); + return (false); + } + assert(i + 1 < rtree->height); + child = rtree_child_read(rtree, &node[subkey], i); + if (child == NULL) + return (true); + } + not_reached(); } #endif diff --git a/dep/jemalloc/include/jemalloc/internal/size_classes.h b/dep/jemalloc/include/jemalloc/internal/size_classes.h index 821102e5c1c..d6715bb79bf 100644 --- a/dep/jemalloc/include/jemalloc/internal/size_classes.h +++ b/dep/jemalloc/include/jemalloc/internal/size_classes.h @@ -2,689 +2,1420 @@ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES -#if (LG_TINY_MIN == 3 && LG_QUANTUM == 3 && LG_PAGE == 12) -#define SIZE_CLASSES_DEFINED -/* SIZE_CLASS(bin, delta, sz) */ -#define SIZE_CLASSES \ - SIZE_CLASS(0, 8, 8) \ - SIZE_CLASS(1, 8, 16) \ - SIZE_CLASS(2, 8, 24) \ - SIZE_CLASS(3, 8, 32) \ - SIZE_CLASS(4, 8, 40) \ - SIZE_CLASS(5, 8, 48) \ - SIZE_CLASS(6, 8, 56) \ - SIZE_CLASS(7, 8, 64) \ - SIZE_CLASS(8, 16, 80) \ - SIZE_CLASS(9, 16, 96) \ - SIZE_CLASS(10, 16, 112) \ - SIZE_CLASS(11, 16, 128) \ - SIZE_CLASS(12, 32, 160) \ - SIZE_CLASS(13, 32, 192) \ - SIZE_CLASS(14, 32, 224) \ - SIZE_CLASS(15, 32, 256) \ - SIZE_CLASS(16, 64, 320) \ - SIZE_CLASS(17, 64, 384) \ - SIZE_CLASS(18, 64, 448) \ - SIZE_CLASS(19, 64, 512) \ - SIZE_CLASS(20, 128, 640) \ - SIZE_CLASS(21, 128, 768) \ - SIZE_CLASS(22, 128, 896) \ - SIZE_CLASS(23, 128, 1024) \ - SIZE_CLASS(24, 256, 1280) \ - SIZE_CLASS(25, 256, 1536) \ - SIZE_CLASS(26, 256, 1792) \ - SIZE_CLASS(27, 256, 2048) \ - SIZE_CLASS(28, 512, 2560) \ - SIZE_CLASS(29, 512, 3072) \ - SIZE_CLASS(30, 512, 3584) \ - -#define NBINS 31 -#define SMALL_MAXCLASS 3584 -#endif - -#if (LG_TINY_MIN == 3 && LG_QUANTUM == 3 && LG_PAGE == 13) -#define SIZE_CLASSES_DEFINED -/* SIZE_CLASS(bin, delta, sz) */ -#define SIZE_CLASSES \ - SIZE_CLASS(0, 8, 8) \ - SIZE_CLASS(1, 8, 16) \ - SIZE_CLASS(2, 8, 24) \ - SIZE_CLASS(3, 8, 32) \ - SIZE_CLASS(4, 8, 40) \ - SIZE_CLASS(5, 8, 48) \ - SIZE_CLASS(6, 8, 56) \ - SIZE_CLASS(7, 8, 64) \ - SIZE_CLASS(8, 16, 80) \ - SIZE_CLASS(9, 16, 96) \ - SIZE_CLASS(10, 16, 112) \ - SIZE_CLASS(11, 16, 128) \ - SIZE_CLASS(12, 32, 160) \ - SIZE_CLASS(13, 32, 192) \ - SIZE_CLASS(14, 32, 224) \ - SIZE_CLASS(15, 32, 256) \ - SIZE_CLASS(16, 64, 320) \ - SIZE_CLASS(17, 64, 384) \ - SIZE_CLASS(18, 64, 448) \ - SIZE_CLASS(19, 64, 512) \ - SIZE_CLASS(20, 128, 640) \ - SIZE_CLASS(21, 128, 768) \ - SIZE_CLASS(22, 128, 896) \ - SIZE_CLASS(23, 128, 1024) \ - SIZE_CLASS(24, 256, 1280) \ - SIZE_CLASS(25, 256, 1536) \ - SIZE_CLASS(26, 256, 1792) \ - SIZE_CLASS(27, 256, 2048) \ - SIZE_CLASS(28, 512, 2560) \ - SIZE_CLASS(29, 512, 3072) \ - SIZE_CLASS(30, 512, 3584) \ - SIZE_CLASS(31, 512, 4096) \ - SIZE_CLASS(32, 1024, 5120) \ - SIZE_CLASS(33, 1024, 6144) \ - SIZE_CLASS(34, 1024, 7168) \ - -#define NBINS 35 -#define SMALL_MAXCLASS 7168 -#endif - -#if (LG_TINY_MIN == 3 && LG_QUANTUM == 3 && LG_PAGE == 14) -#define SIZE_CLASSES_DEFINED -/* SIZE_CLASS(bin, delta, sz) */ -#define SIZE_CLASSES \ - SIZE_CLASS(0, 8, 8) \ - SIZE_CLASS(1, 8, 16) \ - SIZE_CLASS(2, 8, 24) \ - SIZE_CLASS(3, 8, 32) \ - SIZE_CLASS(4, 8, 40) \ - SIZE_CLASS(5, 8, 48) \ - SIZE_CLASS(6, 8, 56) \ - SIZE_CLASS(7, 8, 64) \ - SIZE_CLASS(8, 16, 80) \ - SIZE_CLASS(9, 16, 96) \ - SIZE_CLASS(10, 16, 112) \ - SIZE_CLASS(11, 16, 128) \ - SIZE_CLASS(12, 32, 160) \ - SIZE_CLASS(13, 32, 192) \ - SIZE_CLASS(14, 32, 224) \ - SIZE_CLASS(15, 32, 256) \ - SIZE_CLASS(16, 64, 320) \ - SIZE_CLASS(17, 64, 384) \ - SIZE_CLASS(18, 64, 448) \ - SIZE_CLASS(19, 64, 512) \ - SIZE_CLASS(20, 128, 640) \ - SIZE_CLASS(21, 128, 768) \ - SIZE_CLASS(22, 128, 896) \ - SIZE_CLASS(23, 128, 1024) \ - SIZE_CLASS(24, 256, 1280) \ - SIZE_CLASS(25, 256, 1536) \ - SIZE_CLASS(26, 256, 1792) \ - SIZE_CLASS(27, 256, 2048) \ - SIZE_CLASS(28, 512, 2560) \ - SIZE_CLASS(29, 512, 3072) \ - SIZE_CLASS(30, 512, 3584) \ - SIZE_CLASS(31, 512, 4096) \ - SIZE_CLASS(32, 1024, 5120) \ - SIZE_CLASS(33, 1024, 6144) \ - SIZE_CLASS(34, 1024, 7168) \ - SIZE_CLASS(35, 1024, 8192) \ - SIZE_CLASS(36, 2048, 10240) \ - SIZE_CLASS(37, 2048, 12288) \ - SIZE_CLASS(38, 2048, 14336) \ +/* + * This header requires LG_SIZEOF_PTR, LG_TINY_MIN, LG_QUANTUM, and LG_PAGE to + * be defined prior to inclusion, and it in turn defines: + * + * LG_SIZE_CLASS_GROUP: Lg of size class count for each size doubling. + * SIZE_CLASSES: Complete table of + * SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) + * tuples. + * index: Size class index. + * lg_grp: Lg group base size (no deltas added). + * lg_delta: Lg delta to previous size class. + * ndelta: Delta multiplier. size == 1<<lg_grp + ndelta<<lg_delta + * bin: 'yes' if a small bin size class, 'no' otherwise. + * lg_delta_lookup: Same as lg_delta if a lookup table size class, 'no' + * otherwise. + * NTBINS: Number of tiny bins. + * NLBINS: Number of bins supported by the lookup table. + * NBINS: Number of small size class bins. + * NSIZES: Number of size classes. + * LG_TINY_MAXCLASS: Lg of maximum tiny size class. + * LOOKUP_MAXCLASS: Maximum size class included in lookup table. + * SMALL_MAXCLASS: Maximum small size class. + * LG_LARGE_MINCLASS: Lg of minimum large size class. + * HUGE_MAXCLASS: Maximum (huge) size class. + */ -#define NBINS 39 -#define SMALL_MAXCLASS 14336 -#endif +#define LG_SIZE_CLASS_GROUP 2 -#if (LG_TINY_MIN == 3 && LG_QUANTUM == 3 && LG_PAGE == 15) -#define SIZE_CLASSES_DEFINED -/* SIZE_CLASS(bin, delta, sz) */ -#define SIZE_CLASSES \ - SIZE_CLASS(0, 8, 8) \ - SIZE_CLASS(1, 8, 16) \ - SIZE_CLASS(2, 8, 24) \ - SIZE_CLASS(3, 8, 32) \ - SIZE_CLASS(4, 8, 40) \ - SIZE_CLASS(5, 8, 48) \ - SIZE_CLASS(6, 8, 56) \ - SIZE_CLASS(7, 8, 64) \ - SIZE_CLASS(8, 16, 80) \ - SIZE_CLASS(9, 16, 96) \ - SIZE_CLASS(10, 16, 112) \ - SIZE_CLASS(11, 16, 128) \ - SIZE_CLASS(12, 32, 160) \ - SIZE_CLASS(13, 32, 192) \ - SIZE_CLASS(14, 32, 224) \ - SIZE_CLASS(15, 32, 256) \ - SIZE_CLASS(16, 64, 320) \ - SIZE_CLASS(17, 64, 384) \ - SIZE_CLASS(18, 64, 448) \ - SIZE_CLASS(19, 64, 512) \ - SIZE_CLASS(20, 128, 640) \ - SIZE_CLASS(21, 128, 768) \ - SIZE_CLASS(22, 128, 896) \ - SIZE_CLASS(23, 128, 1024) \ - SIZE_CLASS(24, 256, 1280) \ - SIZE_CLASS(25, 256, 1536) \ - SIZE_CLASS(26, 256, 1792) \ - SIZE_CLASS(27, 256, 2048) \ - SIZE_CLASS(28, 512, 2560) \ - SIZE_CLASS(29, 512, 3072) \ - SIZE_CLASS(30, 512, 3584) \ - SIZE_CLASS(31, 512, 4096) \ - SIZE_CLASS(32, 1024, 5120) \ - SIZE_CLASS(33, 1024, 6144) \ - SIZE_CLASS(34, 1024, 7168) \ - SIZE_CLASS(35, 1024, 8192) \ - SIZE_CLASS(36, 2048, 10240) \ - SIZE_CLASS(37, 2048, 12288) \ - SIZE_CLASS(38, 2048, 14336) \ - SIZE_CLASS(39, 2048, 16384) \ - SIZE_CLASS(40, 4096, 20480) \ - SIZE_CLASS(41, 4096, 24576) \ - SIZE_CLASS(42, 4096, 28672) \ +#if (LG_SIZEOF_PTR == 2 && LG_TINY_MIN == 3 && LG_QUANTUM == 3 && LG_PAGE == 12) +#define SIZE_CLASSES \ + /* index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup */ \ + SC( 0, 3, 3, 0, yes, 3) \ + SC( 1, 3, 3, 1, yes, 3) \ + SC( 2, 3, 3, 2, yes, 3) \ + SC( 3, 3, 3, 3, yes, 3) \ + \ + SC( 4, 5, 3, 1, yes, 3) \ + SC( 5, 5, 3, 2, yes, 3) \ + SC( 6, 5, 3, 3, yes, 3) \ + SC( 7, 5, 3, 4, yes, 3) \ + \ + SC( 8, 6, 4, 1, yes, 4) \ + SC( 9, 6, 4, 2, yes, 4) \ + SC( 10, 6, 4, 3, yes, 4) \ + SC( 11, 6, 4, 4, yes, 4) \ + \ + SC( 12, 7, 5, 1, yes, 5) \ + SC( 13, 7, 5, 2, yes, 5) \ + SC( 14, 7, 5, 3, yes, 5) \ + SC( 15, 7, 5, 4, yes, 5) \ + \ + SC( 16, 8, 6, 1, yes, 6) \ + SC( 17, 8, 6, 2, yes, 6) \ + SC( 18, 8, 6, 3, yes, 6) \ + SC( 19, 8, 6, 4, yes, 6) \ + \ + SC( 20, 9, 7, 1, yes, 7) \ + SC( 21, 9, 7, 2, yes, 7) \ + SC( 22, 9, 7, 3, yes, 7) \ + SC( 23, 9, 7, 4, yes, 7) \ + \ + SC( 24, 10, 8, 1, yes, 8) \ + SC( 25, 10, 8, 2, yes, 8) \ + SC( 26, 10, 8, 3, yes, 8) \ + SC( 27, 10, 8, 4, yes, 8) \ + \ + SC( 28, 11, 9, 1, yes, 9) \ + SC( 29, 11, 9, 2, yes, 9) \ + SC( 30, 11, 9, 3, yes, 9) \ + SC( 31, 11, 9, 4, yes, 9) \ + \ + SC( 32, 12, 10, 1, yes, no) \ + SC( 33, 12, 10, 2, yes, no) \ + SC( 34, 12, 10, 3, yes, no) \ + SC( 35, 12, 10, 4, yes, no) \ + \ + SC( 36, 13, 11, 1, yes, no) \ + SC( 37, 13, 11, 2, yes, no) \ + SC( 38, 13, 11, 3, yes, no) \ + SC( 39, 13, 11, 4, no, no) \ + \ + SC( 40, 14, 12, 1, no, no) \ + SC( 41, 14, 12, 2, no, no) \ + SC( 42, 14, 12, 3, no, no) \ + SC( 43, 14, 12, 4, no, no) \ + \ + SC( 44, 15, 13, 1, no, no) \ + SC( 45, 15, 13, 2, no, no) \ + SC( 46, 15, 13, 3, no, no) \ + SC( 47, 15, 13, 4, no, no) \ + \ + SC( 48, 16, 14, 1, no, no) \ + SC( 49, 16, 14, 2, no, no) \ + SC( 50, 16, 14, 3, no, no) \ + SC( 51, 16, 14, 4, no, no) \ + \ + SC( 52, 17, 15, 1, no, no) \ + SC( 53, 17, 15, 2, no, no) \ + SC( 54, 17, 15, 3, no, no) \ + SC( 55, 17, 15, 4, no, no) \ + \ + SC( 56, 18, 16, 1, no, no) \ + SC( 57, 18, 16, 2, no, no) \ + SC( 58, 18, 16, 3, no, no) \ + SC( 59, 18, 16, 4, no, no) \ + \ + SC( 60, 19, 17, 1, no, no) \ + SC( 61, 19, 17, 2, no, no) \ + SC( 62, 19, 17, 3, no, no) \ + SC( 63, 19, 17, 4, no, no) \ + \ + SC( 64, 20, 18, 1, no, no) \ + SC( 65, 20, 18, 2, no, no) \ + SC( 66, 20, 18, 3, no, no) \ + SC( 67, 20, 18, 4, no, no) \ + \ + SC( 68, 21, 19, 1, no, no) \ + SC( 69, 21, 19, 2, no, no) \ + SC( 70, 21, 19, 3, no, no) \ + SC( 71, 21, 19, 4, no, no) \ + \ + SC( 72, 22, 20, 1, no, no) \ + SC( 73, 22, 20, 2, no, no) \ + SC( 74, 22, 20, 3, no, no) \ + SC( 75, 22, 20, 4, no, no) \ + \ + SC( 76, 23, 21, 1, no, no) \ + SC( 77, 23, 21, 2, no, no) \ + SC( 78, 23, 21, 3, no, no) \ + SC( 79, 23, 21, 4, no, no) \ + \ + SC( 80, 24, 22, 1, no, no) \ + SC( 81, 24, 22, 2, no, no) \ + SC( 82, 24, 22, 3, no, no) \ + SC( 83, 24, 22, 4, no, no) \ + \ + SC( 84, 25, 23, 1, no, no) \ + SC( 85, 25, 23, 2, no, no) \ + SC( 86, 25, 23, 3, no, no) \ + SC( 87, 25, 23, 4, no, no) \ + \ + SC( 88, 26, 24, 1, no, no) \ + SC( 89, 26, 24, 2, no, no) \ + SC( 90, 26, 24, 3, no, no) \ + SC( 91, 26, 24, 4, no, no) \ + \ + SC( 92, 27, 25, 1, no, no) \ + SC( 93, 27, 25, 2, no, no) \ + SC( 94, 27, 25, 3, no, no) \ + SC( 95, 27, 25, 4, no, no) \ + \ + SC( 96, 28, 26, 1, no, no) \ + SC( 97, 28, 26, 2, no, no) \ + SC( 98, 28, 26, 3, no, no) \ + SC( 99, 28, 26, 4, no, no) \ + \ + SC(100, 29, 27, 1, no, no) \ + SC(101, 29, 27, 2, no, no) \ + SC(102, 29, 27, 3, no, no) \ + SC(103, 29, 27, 4, no, no) \ + \ + SC(104, 30, 28, 1, no, no) \ + SC(105, 30, 28, 2, no, no) \ + SC(106, 30, 28, 3, no, no) \ + SC(107, 30, 28, 4, no, no) \ + \ + SC(108, 31, 29, 1, no, no) \ + SC(109, 31, 29, 2, no, no) \ + SC(110, 31, 29, 3, no, no) \ -#define NBINS 43 -#define SMALL_MAXCLASS 28672 -#endif - -#if (LG_TINY_MIN == 3 && LG_QUANTUM == 3 && LG_PAGE == 16) #define SIZE_CLASSES_DEFINED -/* SIZE_CLASS(bin, delta, sz) */ -#define SIZE_CLASSES \ - SIZE_CLASS(0, 8, 8) \ - SIZE_CLASS(1, 8, 16) \ - SIZE_CLASS(2, 8, 24) \ - SIZE_CLASS(3, 8, 32) \ - SIZE_CLASS(4, 8, 40) \ - SIZE_CLASS(5, 8, 48) \ - SIZE_CLASS(6, 8, 56) \ - SIZE_CLASS(7, 8, 64) \ - SIZE_CLASS(8, 16, 80) \ - SIZE_CLASS(9, 16, 96) \ - SIZE_CLASS(10, 16, 112) \ - SIZE_CLASS(11, 16, 128) \ - SIZE_CLASS(12, 32, 160) \ - SIZE_CLASS(13, 32, 192) \ - SIZE_CLASS(14, 32, 224) \ - SIZE_CLASS(15, 32, 256) \ - SIZE_CLASS(16, 64, 320) \ - SIZE_CLASS(17, 64, 384) \ - SIZE_CLASS(18, 64, 448) \ - SIZE_CLASS(19, 64, 512) \ - SIZE_CLASS(20, 128, 640) \ - SIZE_CLASS(21, 128, 768) \ - SIZE_CLASS(22, 128, 896) \ - SIZE_CLASS(23, 128, 1024) \ - SIZE_CLASS(24, 256, 1280) \ - SIZE_CLASS(25, 256, 1536) \ - SIZE_CLASS(26, 256, 1792) \ - SIZE_CLASS(27, 256, 2048) \ - SIZE_CLASS(28, 512, 2560) \ - SIZE_CLASS(29, 512, 3072) \ - SIZE_CLASS(30, 512, 3584) \ - SIZE_CLASS(31, 512, 4096) \ - SIZE_CLASS(32, 1024, 5120) \ - SIZE_CLASS(33, 1024, 6144) \ - SIZE_CLASS(34, 1024, 7168) \ - SIZE_CLASS(35, 1024, 8192) \ - SIZE_CLASS(36, 2048, 10240) \ - SIZE_CLASS(37, 2048, 12288) \ - SIZE_CLASS(38, 2048, 14336) \ - SIZE_CLASS(39, 2048, 16384) \ - SIZE_CLASS(40, 4096, 20480) \ - SIZE_CLASS(41, 4096, 24576) \ - SIZE_CLASS(42, 4096, 28672) \ - SIZE_CLASS(43, 4096, 32768) \ - SIZE_CLASS(44, 8192, 40960) \ - SIZE_CLASS(45, 8192, 49152) \ - SIZE_CLASS(46, 8192, 57344) \ - -#define NBINS 47 -#define SMALL_MAXCLASS 57344 +#define NTBINS 0 +#define NLBINS 32 +#define NBINS 39 +#define NSIZES 111 +#define LG_TINY_MAXCLASS "NA" +#define LOOKUP_MAXCLASS ((((size_t)1) << 11) + (((size_t)4) << 9)) +#define SMALL_MAXCLASS ((((size_t)1) << 13) + (((size_t)3) << 11)) +#define LG_LARGE_MINCLASS 14 +#define HUGE_MAXCLASS ((((size_t)1) << 31) + (((size_t)3) << 29)) #endif -#if (LG_TINY_MIN == 3 && LG_QUANTUM == 4 && LG_PAGE == 12) -#define SIZE_CLASSES_DEFINED -/* SIZE_CLASS(bin, delta, sz) */ -#define SIZE_CLASSES \ - SIZE_CLASS(0, 8, 8) \ - SIZE_CLASS(1, 8, 16) \ - SIZE_CLASS(2, 16, 32) \ - SIZE_CLASS(3, 16, 48) \ - SIZE_CLASS(4, 16, 64) \ - SIZE_CLASS(5, 16, 80) \ - SIZE_CLASS(6, 16, 96) \ - SIZE_CLASS(7, 16, 112) \ - SIZE_CLASS(8, 16, 128) \ - SIZE_CLASS(9, 32, 160) \ - SIZE_CLASS(10, 32, 192) \ - SIZE_CLASS(11, 32, 224) \ - SIZE_CLASS(12, 32, 256) \ - SIZE_CLASS(13, 64, 320) \ - SIZE_CLASS(14, 64, 384) \ - SIZE_CLASS(15, 64, 448) \ - SIZE_CLASS(16, 64, 512) \ - SIZE_CLASS(17, 128, 640) \ - SIZE_CLASS(18, 128, 768) \ - SIZE_CLASS(19, 128, 896) \ - SIZE_CLASS(20, 128, 1024) \ - SIZE_CLASS(21, 256, 1280) \ - SIZE_CLASS(22, 256, 1536) \ - SIZE_CLASS(23, 256, 1792) \ - SIZE_CLASS(24, 256, 2048) \ - SIZE_CLASS(25, 512, 2560) \ - SIZE_CLASS(26, 512, 3072) \ - SIZE_CLASS(27, 512, 3584) \ - -#define NBINS 28 -#define SMALL_MAXCLASS 3584 -#endif +#if (LG_SIZEOF_PTR == 2 && LG_TINY_MIN == 3 && LG_QUANTUM == 4 && LG_PAGE == 12) +#define SIZE_CLASSES \ + /* index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup */ \ + SC( 0, 3, 3, 0, yes, 3) \ + \ + SC( 1, 3, 3, 1, yes, 3) \ + SC( 2, 4, 4, 1, yes, 4) \ + SC( 3, 4, 4, 2, yes, 4) \ + SC( 4, 4, 4, 3, yes, 4) \ + \ + SC( 5, 6, 4, 1, yes, 4) \ + SC( 6, 6, 4, 2, yes, 4) \ + SC( 7, 6, 4, 3, yes, 4) \ + SC( 8, 6, 4, 4, yes, 4) \ + \ + SC( 9, 7, 5, 1, yes, 5) \ + SC( 10, 7, 5, 2, yes, 5) \ + SC( 11, 7, 5, 3, yes, 5) \ + SC( 12, 7, 5, 4, yes, 5) \ + \ + SC( 13, 8, 6, 1, yes, 6) \ + SC( 14, 8, 6, 2, yes, 6) \ + SC( 15, 8, 6, 3, yes, 6) \ + SC( 16, 8, 6, 4, yes, 6) \ + \ + SC( 17, 9, 7, 1, yes, 7) \ + SC( 18, 9, 7, 2, yes, 7) \ + SC( 19, 9, 7, 3, yes, 7) \ + SC( 20, 9, 7, 4, yes, 7) \ + \ + SC( 21, 10, 8, 1, yes, 8) \ + SC( 22, 10, 8, 2, yes, 8) \ + SC( 23, 10, 8, 3, yes, 8) \ + SC( 24, 10, 8, 4, yes, 8) \ + \ + SC( 25, 11, 9, 1, yes, 9) \ + SC( 26, 11, 9, 2, yes, 9) \ + SC( 27, 11, 9, 3, yes, 9) \ + SC( 28, 11, 9, 4, yes, 9) \ + \ + SC( 29, 12, 10, 1, yes, no) \ + SC( 30, 12, 10, 2, yes, no) \ + SC( 31, 12, 10, 3, yes, no) \ + SC( 32, 12, 10, 4, yes, no) \ + \ + SC( 33, 13, 11, 1, yes, no) \ + SC( 34, 13, 11, 2, yes, no) \ + SC( 35, 13, 11, 3, yes, no) \ + SC( 36, 13, 11, 4, no, no) \ + \ + SC( 37, 14, 12, 1, no, no) \ + SC( 38, 14, 12, 2, no, no) \ + SC( 39, 14, 12, 3, no, no) \ + SC( 40, 14, 12, 4, no, no) \ + \ + SC( 41, 15, 13, 1, no, no) \ + SC( 42, 15, 13, 2, no, no) \ + SC( 43, 15, 13, 3, no, no) \ + SC( 44, 15, 13, 4, no, no) \ + \ + SC( 45, 16, 14, 1, no, no) \ + SC( 46, 16, 14, 2, no, no) \ + SC( 47, 16, 14, 3, no, no) \ + SC( 48, 16, 14, 4, no, no) \ + \ + SC( 49, 17, 15, 1, no, no) \ + SC( 50, 17, 15, 2, no, no) \ + SC( 51, 17, 15, 3, no, no) \ + SC( 52, 17, 15, 4, no, no) \ + \ + SC( 53, 18, 16, 1, no, no) \ + SC( 54, 18, 16, 2, no, no) \ + SC( 55, 18, 16, 3, no, no) \ + SC( 56, 18, 16, 4, no, no) \ + \ + SC( 57, 19, 17, 1, no, no) \ + SC( 58, 19, 17, 2, no, no) \ + SC( 59, 19, 17, 3, no, no) \ + SC( 60, 19, 17, 4, no, no) \ + \ + SC( 61, 20, 18, 1, no, no) \ + SC( 62, 20, 18, 2, no, no) \ + SC( 63, 20, 18, 3, no, no) \ + SC( 64, 20, 18, 4, no, no) \ + \ + SC( 65, 21, 19, 1, no, no) \ + SC( 66, 21, 19, 2, no, no) \ + SC( 67, 21, 19, 3, no, no) \ + SC( 68, 21, 19, 4, no, no) \ + \ + SC( 69, 22, 20, 1, no, no) \ + SC( 70, 22, 20, 2, no, no) \ + SC( 71, 22, 20, 3, no, no) \ + SC( 72, 22, 20, 4, no, no) \ + \ + SC( 73, 23, 21, 1, no, no) \ + SC( 74, 23, 21, 2, no, no) \ + SC( 75, 23, 21, 3, no, no) \ + SC( 76, 23, 21, 4, no, no) \ + \ + SC( 77, 24, 22, 1, no, no) \ + SC( 78, 24, 22, 2, no, no) \ + SC( 79, 24, 22, 3, no, no) \ + SC( 80, 24, 22, 4, no, no) \ + \ + SC( 81, 25, 23, 1, no, no) \ + SC( 82, 25, 23, 2, no, no) \ + SC( 83, 25, 23, 3, no, no) \ + SC( 84, 25, 23, 4, no, no) \ + \ + SC( 85, 26, 24, 1, no, no) \ + SC( 86, 26, 24, 2, no, no) \ + SC( 87, 26, 24, 3, no, no) \ + SC( 88, 26, 24, 4, no, no) \ + \ + SC( 89, 27, 25, 1, no, no) \ + SC( 90, 27, 25, 2, no, no) \ + SC( 91, 27, 25, 3, no, no) \ + SC( 92, 27, 25, 4, no, no) \ + \ + SC( 93, 28, 26, 1, no, no) \ + SC( 94, 28, 26, 2, no, no) \ + SC( 95, 28, 26, 3, no, no) \ + SC( 96, 28, 26, 4, no, no) \ + \ + SC( 97, 29, 27, 1, no, no) \ + SC( 98, 29, 27, 2, no, no) \ + SC( 99, 29, 27, 3, no, no) \ + SC(100, 29, 27, 4, no, no) \ + \ + SC(101, 30, 28, 1, no, no) \ + SC(102, 30, 28, 2, no, no) \ + SC(103, 30, 28, 3, no, no) \ + SC(104, 30, 28, 4, no, no) \ + \ + SC(105, 31, 29, 1, no, no) \ + SC(106, 31, 29, 2, no, no) \ + SC(107, 31, 29, 3, no, no) \ -#if (LG_TINY_MIN == 3 && LG_QUANTUM == 4 && LG_PAGE == 13) #define SIZE_CLASSES_DEFINED -/* SIZE_CLASS(bin, delta, sz) */ -#define SIZE_CLASSES \ - SIZE_CLASS(0, 8, 8) \ - SIZE_CLASS(1, 8, 16) \ - SIZE_CLASS(2, 16, 32) \ - SIZE_CLASS(3, 16, 48) \ - SIZE_CLASS(4, 16, 64) \ - SIZE_CLASS(5, 16, 80) \ - SIZE_CLASS(6, 16, 96) \ - SIZE_CLASS(7, 16, 112) \ - SIZE_CLASS(8, 16, 128) \ - SIZE_CLASS(9, 32, 160) \ - SIZE_CLASS(10, 32, 192) \ - SIZE_CLASS(11, 32, 224) \ - SIZE_CLASS(12, 32, 256) \ - SIZE_CLASS(13, 64, 320) \ - SIZE_CLASS(14, 64, 384) \ - SIZE_CLASS(15, 64, 448) \ - SIZE_CLASS(16, 64, 512) \ - SIZE_CLASS(17, 128, 640) \ - SIZE_CLASS(18, 128, 768) \ - SIZE_CLASS(19, 128, 896) \ - SIZE_CLASS(20, 128, 1024) \ - SIZE_CLASS(21, 256, 1280) \ - SIZE_CLASS(22, 256, 1536) \ - SIZE_CLASS(23, 256, 1792) \ - SIZE_CLASS(24, 256, 2048) \ - SIZE_CLASS(25, 512, 2560) \ - SIZE_CLASS(26, 512, 3072) \ - SIZE_CLASS(27, 512, 3584) \ - SIZE_CLASS(28, 512, 4096) \ - SIZE_CLASS(29, 1024, 5120) \ - SIZE_CLASS(30, 1024, 6144) \ - SIZE_CLASS(31, 1024, 7168) \ - -#define NBINS 32 -#define SMALL_MAXCLASS 7168 +#define NTBINS 1 +#define NLBINS 29 +#define NBINS 36 +#define NSIZES 108 +#define LG_TINY_MAXCLASS 3 +#define LOOKUP_MAXCLASS ((((size_t)1) << 11) + (((size_t)4) << 9)) +#define SMALL_MAXCLASS ((((size_t)1) << 13) + (((size_t)3) << 11)) +#define LG_LARGE_MINCLASS 14 +#define HUGE_MAXCLASS ((((size_t)1) << 31) + (((size_t)3) << 29)) #endif -#if (LG_TINY_MIN == 3 && LG_QUANTUM == 4 && LG_PAGE == 14) -#define SIZE_CLASSES_DEFINED -/* SIZE_CLASS(bin, delta, sz) */ -#define SIZE_CLASSES \ - SIZE_CLASS(0, 8, 8) \ - SIZE_CLASS(1, 8, 16) \ - SIZE_CLASS(2, 16, 32) \ - SIZE_CLASS(3, 16, 48) \ - SIZE_CLASS(4, 16, 64) \ - SIZE_CLASS(5, 16, 80) \ - SIZE_CLASS(6, 16, 96) \ - SIZE_CLASS(7, 16, 112) \ - SIZE_CLASS(8, 16, 128) \ - SIZE_CLASS(9, 32, 160) \ - SIZE_CLASS(10, 32, 192) \ - SIZE_CLASS(11, 32, 224) \ - SIZE_CLASS(12, 32, 256) \ - SIZE_CLASS(13, 64, 320) \ - SIZE_CLASS(14, 64, 384) \ - SIZE_CLASS(15, 64, 448) \ - SIZE_CLASS(16, 64, 512) \ - SIZE_CLASS(17, 128, 640) \ - SIZE_CLASS(18, 128, 768) \ - SIZE_CLASS(19, 128, 896) \ - SIZE_CLASS(20, 128, 1024) \ - SIZE_CLASS(21, 256, 1280) \ - SIZE_CLASS(22, 256, 1536) \ - SIZE_CLASS(23, 256, 1792) \ - SIZE_CLASS(24, 256, 2048) \ - SIZE_CLASS(25, 512, 2560) \ - SIZE_CLASS(26, 512, 3072) \ - SIZE_CLASS(27, 512, 3584) \ - SIZE_CLASS(28, 512, 4096) \ - SIZE_CLASS(29, 1024, 5120) \ - SIZE_CLASS(30, 1024, 6144) \ - SIZE_CLASS(31, 1024, 7168) \ - SIZE_CLASS(32, 1024, 8192) \ - SIZE_CLASS(33, 2048, 10240) \ - SIZE_CLASS(34, 2048, 12288) \ - SIZE_CLASS(35, 2048, 14336) \ - -#define NBINS 36 -#define SMALL_MAXCLASS 14336 -#endif +#if (LG_SIZEOF_PTR == 2 && LG_TINY_MIN == 4 && LG_QUANTUM == 4 && LG_PAGE == 12) +#define SIZE_CLASSES \ + /* index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup */ \ + SC( 0, 4, 4, 0, yes, 4) \ + SC( 1, 4, 4, 1, yes, 4) \ + SC( 2, 4, 4, 2, yes, 4) \ + SC( 3, 4, 4, 3, yes, 4) \ + \ + SC( 4, 6, 4, 1, yes, 4) \ + SC( 5, 6, 4, 2, yes, 4) \ + SC( 6, 6, 4, 3, yes, 4) \ + SC( 7, 6, 4, 4, yes, 4) \ + \ + SC( 8, 7, 5, 1, yes, 5) \ + SC( 9, 7, 5, 2, yes, 5) \ + SC( 10, 7, 5, 3, yes, 5) \ + SC( 11, 7, 5, 4, yes, 5) \ + \ + SC( 12, 8, 6, 1, yes, 6) \ + SC( 13, 8, 6, 2, yes, 6) \ + SC( 14, 8, 6, 3, yes, 6) \ + SC( 15, 8, 6, 4, yes, 6) \ + \ + SC( 16, 9, 7, 1, yes, 7) \ + SC( 17, 9, 7, 2, yes, 7) \ + SC( 18, 9, 7, 3, yes, 7) \ + SC( 19, 9, 7, 4, yes, 7) \ + \ + SC( 20, 10, 8, 1, yes, 8) \ + SC( 21, 10, 8, 2, yes, 8) \ + SC( 22, 10, 8, 3, yes, 8) \ + SC( 23, 10, 8, 4, yes, 8) \ + \ + SC( 24, 11, 9, 1, yes, 9) \ + SC( 25, 11, 9, 2, yes, 9) \ + SC( 26, 11, 9, 3, yes, 9) \ + SC( 27, 11, 9, 4, yes, 9) \ + \ + SC( 28, 12, 10, 1, yes, no) \ + SC( 29, 12, 10, 2, yes, no) \ + SC( 30, 12, 10, 3, yes, no) \ + SC( 31, 12, 10, 4, yes, no) \ + \ + SC( 32, 13, 11, 1, yes, no) \ + SC( 33, 13, 11, 2, yes, no) \ + SC( 34, 13, 11, 3, yes, no) \ + SC( 35, 13, 11, 4, no, no) \ + \ + SC( 36, 14, 12, 1, no, no) \ + SC( 37, 14, 12, 2, no, no) \ + SC( 38, 14, 12, 3, no, no) \ + SC( 39, 14, 12, 4, no, no) \ + \ + SC( 40, 15, 13, 1, no, no) \ + SC( 41, 15, 13, 2, no, no) \ + SC( 42, 15, 13, 3, no, no) \ + SC( 43, 15, 13, 4, no, no) \ + \ + SC( 44, 16, 14, 1, no, no) \ + SC( 45, 16, 14, 2, no, no) \ + SC( 46, 16, 14, 3, no, no) \ + SC( 47, 16, 14, 4, no, no) \ + \ + SC( 48, 17, 15, 1, no, no) \ + SC( 49, 17, 15, 2, no, no) \ + SC( 50, 17, 15, 3, no, no) \ + SC( 51, 17, 15, 4, no, no) \ + \ + SC( 52, 18, 16, 1, no, no) \ + SC( 53, 18, 16, 2, no, no) \ + SC( 54, 18, 16, 3, no, no) \ + SC( 55, 18, 16, 4, no, no) \ + \ + SC( 56, 19, 17, 1, no, no) \ + SC( 57, 19, 17, 2, no, no) \ + SC( 58, 19, 17, 3, no, no) \ + SC( 59, 19, 17, 4, no, no) \ + \ + SC( 60, 20, 18, 1, no, no) \ + SC( 61, 20, 18, 2, no, no) \ + SC( 62, 20, 18, 3, no, no) \ + SC( 63, 20, 18, 4, no, no) \ + \ + SC( 64, 21, 19, 1, no, no) \ + SC( 65, 21, 19, 2, no, no) \ + SC( 66, 21, 19, 3, no, no) \ + SC( 67, 21, 19, 4, no, no) \ + \ + SC( 68, 22, 20, 1, no, no) \ + SC( 69, 22, 20, 2, no, no) \ + SC( 70, 22, 20, 3, no, no) \ + SC( 71, 22, 20, 4, no, no) \ + \ + SC( 72, 23, 21, 1, no, no) \ + SC( 73, 23, 21, 2, no, no) \ + SC( 74, 23, 21, 3, no, no) \ + SC( 75, 23, 21, 4, no, no) \ + \ + SC( 76, 24, 22, 1, no, no) \ + SC( 77, 24, 22, 2, no, no) \ + SC( 78, 24, 22, 3, no, no) \ + SC( 79, 24, 22, 4, no, no) \ + \ + SC( 80, 25, 23, 1, no, no) \ + SC( 81, 25, 23, 2, no, no) \ + SC( 82, 25, 23, 3, no, no) \ + SC( 83, 25, 23, 4, no, no) \ + \ + SC( 84, 26, 24, 1, no, no) \ + SC( 85, 26, 24, 2, no, no) \ + SC( 86, 26, 24, 3, no, no) \ + SC( 87, 26, 24, 4, no, no) \ + \ + SC( 88, 27, 25, 1, no, no) \ + SC( 89, 27, 25, 2, no, no) \ + SC( 90, 27, 25, 3, no, no) \ + SC( 91, 27, 25, 4, no, no) \ + \ + SC( 92, 28, 26, 1, no, no) \ + SC( 93, 28, 26, 2, no, no) \ + SC( 94, 28, 26, 3, no, no) \ + SC( 95, 28, 26, 4, no, no) \ + \ + SC( 96, 29, 27, 1, no, no) \ + SC( 97, 29, 27, 2, no, no) \ + SC( 98, 29, 27, 3, no, no) \ + SC( 99, 29, 27, 4, no, no) \ + \ + SC(100, 30, 28, 1, no, no) \ + SC(101, 30, 28, 2, no, no) \ + SC(102, 30, 28, 3, no, no) \ + SC(103, 30, 28, 4, no, no) \ + \ + SC(104, 31, 29, 1, no, no) \ + SC(105, 31, 29, 2, no, no) \ + SC(106, 31, 29, 3, no, no) \ -#if (LG_TINY_MIN == 3 && LG_QUANTUM == 4 && LG_PAGE == 15) #define SIZE_CLASSES_DEFINED -/* SIZE_CLASS(bin, delta, sz) */ -#define SIZE_CLASSES \ - SIZE_CLASS(0, 8, 8) \ - SIZE_CLASS(1, 8, 16) \ - SIZE_CLASS(2, 16, 32) \ - SIZE_CLASS(3, 16, 48) \ - SIZE_CLASS(4, 16, 64) \ - SIZE_CLASS(5, 16, 80) \ - SIZE_CLASS(6, 16, 96) \ - SIZE_CLASS(7, 16, 112) \ - SIZE_CLASS(8, 16, 128) \ - SIZE_CLASS(9, 32, 160) \ - SIZE_CLASS(10, 32, 192) \ - SIZE_CLASS(11, 32, 224) \ - SIZE_CLASS(12, 32, 256) \ - SIZE_CLASS(13, 64, 320) \ - SIZE_CLASS(14, 64, 384) \ - SIZE_CLASS(15, 64, 448) \ - SIZE_CLASS(16, 64, 512) \ - SIZE_CLASS(17, 128, 640) \ - SIZE_CLASS(18, 128, 768) \ - SIZE_CLASS(19, 128, 896) \ - SIZE_CLASS(20, 128, 1024) \ - SIZE_CLASS(21, 256, 1280) \ - SIZE_CLASS(22, 256, 1536) \ - SIZE_CLASS(23, 256, 1792) \ - SIZE_CLASS(24, 256, 2048) \ - SIZE_CLASS(25, 512, 2560) \ - SIZE_CLASS(26, 512, 3072) \ - SIZE_CLASS(27, 512, 3584) \ - SIZE_CLASS(28, 512, 4096) \ - SIZE_CLASS(29, 1024, 5120) \ - SIZE_CLASS(30, 1024, 6144) \ - SIZE_CLASS(31, 1024, 7168) \ - SIZE_CLASS(32, 1024, 8192) \ - SIZE_CLASS(33, 2048, 10240) \ - SIZE_CLASS(34, 2048, 12288) \ - SIZE_CLASS(35, 2048, 14336) \ - SIZE_CLASS(36, 2048, 16384) \ - SIZE_CLASS(37, 4096, 20480) \ - SIZE_CLASS(38, 4096, 24576) \ - SIZE_CLASS(39, 4096, 28672) \ - -#define NBINS 40 -#define SMALL_MAXCLASS 28672 +#define NTBINS 0 +#define NLBINS 28 +#define NBINS 35 +#define NSIZES 107 +#define LG_TINY_MAXCLASS "NA" +#define LOOKUP_MAXCLASS ((((size_t)1) << 11) + (((size_t)4) << 9)) +#define SMALL_MAXCLASS ((((size_t)1) << 13) + (((size_t)3) << 11)) +#define LG_LARGE_MINCLASS 14 +#define HUGE_MAXCLASS ((((size_t)1) << 31) + (((size_t)3) << 29)) #endif -#if (LG_TINY_MIN == 3 && LG_QUANTUM == 4 && LG_PAGE == 16) -#define SIZE_CLASSES_DEFINED -/* SIZE_CLASS(bin, delta, sz) */ -#define SIZE_CLASSES \ - SIZE_CLASS(0, 8, 8) \ - SIZE_CLASS(1, 8, 16) \ - SIZE_CLASS(2, 16, 32) \ - SIZE_CLASS(3, 16, 48) \ - SIZE_CLASS(4, 16, 64) \ - SIZE_CLASS(5, 16, 80) \ - SIZE_CLASS(6, 16, 96) \ - SIZE_CLASS(7, 16, 112) \ - SIZE_CLASS(8, 16, 128) \ - SIZE_CLASS(9, 32, 160) \ - SIZE_CLASS(10, 32, 192) \ - SIZE_CLASS(11, 32, 224) \ - SIZE_CLASS(12, 32, 256) \ - SIZE_CLASS(13, 64, 320) \ - SIZE_CLASS(14, 64, 384) \ - SIZE_CLASS(15, 64, 448) \ - SIZE_CLASS(16, 64, 512) \ - SIZE_CLASS(17, 128, 640) \ - SIZE_CLASS(18, 128, 768) \ - SIZE_CLASS(19, 128, 896) \ - SIZE_CLASS(20, 128, 1024) \ - SIZE_CLASS(21, 256, 1280) \ - SIZE_CLASS(22, 256, 1536) \ - SIZE_CLASS(23, 256, 1792) \ - SIZE_CLASS(24, 256, 2048) \ - SIZE_CLASS(25, 512, 2560) \ - SIZE_CLASS(26, 512, 3072) \ - SIZE_CLASS(27, 512, 3584) \ - SIZE_CLASS(28, 512, 4096) \ - SIZE_CLASS(29, 1024, 5120) \ - SIZE_CLASS(30, 1024, 6144) \ - SIZE_CLASS(31, 1024, 7168) \ - SIZE_CLASS(32, 1024, 8192) \ - SIZE_CLASS(33, 2048, 10240) \ - SIZE_CLASS(34, 2048, 12288) \ - SIZE_CLASS(35, 2048, 14336) \ - SIZE_CLASS(36, 2048, 16384) \ - SIZE_CLASS(37, 4096, 20480) \ - SIZE_CLASS(38, 4096, 24576) \ - SIZE_CLASS(39, 4096, 28672) \ - SIZE_CLASS(40, 4096, 32768) \ - SIZE_CLASS(41, 8192, 40960) \ - SIZE_CLASS(42, 8192, 49152) \ - SIZE_CLASS(43, 8192, 57344) \ - -#define NBINS 44 -#define SMALL_MAXCLASS 57344 -#endif +#if (LG_SIZEOF_PTR == 3 && LG_TINY_MIN == 3 && LG_QUANTUM == 3 && LG_PAGE == 12) +#define SIZE_CLASSES \ + /* index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup */ \ + SC( 0, 3, 3, 0, yes, 3) \ + SC( 1, 3, 3, 1, yes, 3) \ + SC( 2, 3, 3, 2, yes, 3) \ + SC( 3, 3, 3, 3, yes, 3) \ + \ + SC( 4, 5, 3, 1, yes, 3) \ + SC( 5, 5, 3, 2, yes, 3) \ + SC( 6, 5, 3, 3, yes, 3) \ + SC( 7, 5, 3, 4, yes, 3) \ + \ + SC( 8, 6, 4, 1, yes, 4) \ + SC( 9, 6, 4, 2, yes, 4) \ + SC( 10, 6, 4, 3, yes, 4) \ + SC( 11, 6, 4, 4, yes, 4) \ + \ + SC( 12, 7, 5, 1, yes, 5) \ + SC( 13, 7, 5, 2, yes, 5) \ + SC( 14, 7, 5, 3, yes, 5) \ + SC( 15, 7, 5, 4, yes, 5) \ + \ + SC( 16, 8, 6, 1, yes, 6) \ + SC( 17, 8, 6, 2, yes, 6) \ + SC( 18, 8, 6, 3, yes, 6) \ + SC( 19, 8, 6, 4, yes, 6) \ + \ + SC( 20, 9, 7, 1, yes, 7) \ + SC( 21, 9, 7, 2, yes, 7) \ + SC( 22, 9, 7, 3, yes, 7) \ + SC( 23, 9, 7, 4, yes, 7) \ + \ + SC( 24, 10, 8, 1, yes, 8) \ + SC( 25, 10, 8, 2, yes, 8) \ + SC( 26, 10, 8, 3, yes, 8) \ + SC( 27, 10, 8, 4, yes, 8) \ + \ + SC( 28, 11, 9, 1, yes, 9) \ + SC( 29, 11, 9, 2, yes, 9) \ + SC( 30, 11, 9, 3, yes, 9) \ + SC( 31, 11, 9, 4, yes, 9) \ + \ + SC( 32, 12, 10, 1, yes, no) \ + SC( 33, 12, 10, 2, yes, no) \ + SC( 34, 12, 10, 3, yes, no) \ + SC( 35, 12, 10, 4, yes, no) \ + \ + SC( 36, 13, 11, 1, yes, no) \ + SC( 37, 13, 11, 2, yes, no) \ + SC( 38, 13, 11, 3, yes, no) \ + SC( 39, 13, 11, 4, no, no) \ + \ + SC( 40, 14, 12, 1, no, no) \ + SC( 41, 14, 12, 2, no, no) \ + SC( 42, 14, 12, 3, no, no) \ + SC( 43, 14, 12, 4, no, no) \ + \ + SC( 44, 15, 13, 1, no, no) \ + SC( 45, 15, 13, 2, no, no) \ + SC( 46, 15, 13, 3, no, no) \ + SC( 47, 15, 13, 4, no, no) \ + \ + SC( 48, 16, 14, 1, no, no) \ + SC( 49, 16, 14, 2, no, no) \ + SC( 50, 16, 14, 3, no, no) \ + SC( 51, 16, 14, 4, no, no) \ + \ + SC( 52, 17, 15, 1, no, no) \ + SC( 53, 17, 15, 2, no, no) \ + SC( 54, 17, 15, 3, no, no) \ + SC( 55, 17, 15, 4, no, no) \ + \ + SC( 56, 18, 16, 1, no, no) \ + SC( 57, 18, 16, 2, no, no) \ + SC( 58, 18, 16, 3, no, no) \ + SC( 59, 18, 16, 4, no, no) \ + \ + SC( 60, 19, 17, 1, no, no) \ + SC( 61, 19, 17, 2, no, no) \ + SC( 62, 19, 17, 3, no, no) \ + SC( 63, 19, 17, 4, no, no) \ + \ + SC( 64, 20, 18, 1, no, no) \ + SC( 65, 20, 18, 2, no, no) \ + SC( 66, 20, 18, 3, no, no) \ + SC( 67, 20, 18, 4, no, no) \ + \ + SC( 68, 21, 19, 1, no, no) \ + SC( 69, 21, 19, 2, no, no) \ + SC( 70, 21, 19, 3, no, no) \ + SC( 71, 21, 19, 4, no, no) \ + \ + SC( 72, 22, 20, 1, no, no) \ + SC( 73, 22, 20, 2, no, no) \ + SC( 74, 22, 20, 3, no, no) \ + SC( 75, 22, 20, 4, no, no) \ + \ + SC( 76, 23, 21, 1, no, no) \ + SC( 77, 23, 21, 2, no, no) \ + SC( 78, 23, 21, 3, no, no) \ + SC( 79, 23, 21, 4, no, no) \ + \ + SC( 80, 24, 22, 1, no, no) \ + SC( 81, 24, 22, 2, no, no) \ + SC( 82, 24, 22, 3, no, no) \ + SC( 83, 24, 22, 4, no, no) \ + \ + SC( 84, 25, 23, 1, no, no) \ + SC( 85, 25, 23, 2, no, no) \ + SC( 86, 25, 23, 3, no, no) \ + SC( 87, 25, 23, 4, no, no) \ + \ + SC( 88, 26, 24, 1, no, no) \ + SC( 89, 26, 24, 2, no, no) \ + SC( 90, 26, 24, 3, no, no) \ + SC( 91, 26, 24, 4, no, no) \ + \ + SC( 92, 27, 25, 1, no, no) \ + SC( 93, 27, 25, 2, no, no) \ + SC( 94, 27, 25, 3, no, no) \ + SC( 95, 27, 25, 4, no, no) \ + \ + SC( 96, 28, 26, 1, no, no) \ + SC( 97, 28, 26, 2, no, no) \ + SC( 98, 28, 26, 3, no, no) \ + SC( 99, 28, 26, 4, no, no) \ + \ + SC(100, 29, 27, 1, no, no) \ + SC(101, 29, 27, 2, no, no) \ + SC(102, 29, 27, 3, no, no) \ + SC(103, 29, 27, 4, no, no) \ + \ + SC(104, 30, 28, 1, no, no) \ + SC(105, 30, 28, 2, no, no) \ + SC(106, 30, 28, 3, no, no) \ + SC(107, 30, 28, 4, no, no) \ + \ + SC(108, 31, 29, 1, no, no) \ + SC(109, 31, 29, 2, no, no) \ + SC(110, 31, 29, 3, no, no) \ + SC(111, 31, 29, 4, no, no) \ + \ + SC(112, 32, 30, 1, no, no) \ + SC(113, 32, 30, 2, no, no) \ + SC(114, 32, 30, 3, no, no) \ + SC(115, 32, 30, 4, no, no) \ + \ + SC(116, 33, 31, 1, no, no) \ + SC(117, 33, 31, 2, no, no) \ + SC(118, 33, 31, 3, no, no) \ + SC(119, 33, 31, 4, no, no) \ + \ + SC(120, 34, 32, 1, no, no) \ + SC(121, 34, 32, 2, no, no) \ + SC(122, 34, 32, 3, no, no) \ + SC(123, 34, 32, 4, no, no) \ + \ + SC(124, 35, 33, 1, no, no) \ + SC(125, 35, 33, 2, no, no) \ + SC(126, 35, 33, 3, no, no) \ + SC(127, 35, 33, 4, no, no) \ + \ + SC(128, 36, 34, 1, no, no) \ + SC(129, 36, 34, 2, no, no) \ + SC(130, 36, 34, 3, no, no) \ + SC(131, 36, 34, 4, no, no) \ + \ + SC(132, 37, 35, 1, no, no) \ + SC(133, 37, 35, 2, no, no) \ + SC(134, 37, 35, 3, no, no) \ + SC(135, 37, 35, 4, no, no) \ + \ + SC(136, 38, 36, 1, no, no) \ + SC(137, 38, 36, 2, no, no) \ + SC(138, 38, 36, 3, no, no) \ + SC(139, 38, 36, 4, no, no) \ + \ + SC(140, 39, 37, 1, no, no) \ + SC(141, 39, 37, 2, no, no) \ + SC(142, 39, 37, 3, no, no) \ + SC(143, 39, 37, 4, no, no) \ + \ + SC(144, 40, 38, 1, no, no) \ + SC(145, 40, 38, 2, no, no) \ + SC(146, 40, 38, 3, no, no) \ + SC(147, 40, 38, 4, no, no) \ + \ + SC(148, 41, 39, 1, no, no) \ + SC(149, 41, 39, 2, no, no) \ + SC(150, 41, 39, 3, no, no) \ + SC(151, 41, 39, 4, no, no) \ + \ + SC(152, 42, 40, 1, no, no) \ + SC(153, 42, 40, 2, no, no) \ + SC(154, 42, 40, 3, no, no) \ + SC(155, 42, 40, 4, no, no) \ + \ + SC(156, 43, 41, 1, no, no) \ + SC(157, 43, 41, 2, no, no) \ + SC(158, 43, 41, 3, no, no) \ + SC(159, 43, 41, 4, no, no) \ + \ + SC(160, 44, 42, 1, no, no) \ + SC(161, 44, 42, 2, no, no) \ + SC(162, 44, 42, 3, no, no) \ + SC(163, 44, 42, 4, no, no) \ + \ + SC(164, 45, 43, 1, no, no) \ + SC(165, 45, 43, 2, no, no) \ + SC(166, 45, 43, 3, no, no) \ + SC(167, 45, 43, 4, no, no) \ + \ + SC(168, 46, 44, 1, no, no) \ + SC(169, 46, 44, 2, no, no) \ + SC(170, 46, 44, 3, no, no) \ + SC(171, 46, 44, 4, no, no) \ + \ + SC(172, 47, 45, 1, no, no) \ + SC(173, 47, 45, 2, no, no) \ + SC(174, 47, 45, 3, no, no) \ + SC(175, 47, 45, 4, no, no) \ + \ + SC(176, 48, 46, 1, no, no) \ + SC(177, 48, 46, 2, no, no) \ + SC(178, 48, 46, 3, no, no) \ + SC(179, 48, 46, 4, no, no) \ + \ + SC(180, 49, 47, 1, no, no) \ + SC(181, 49, 47, 2, no, no) \ + SC(182, 49, 47, 3, no, no) \ + SC(183, 49, 47, 4, no, no) \ + \ + SC(184, 50, 48, 1, no, no) \ + SC(185, 50, 48, 2, no, no) \ + SC(186, 50, 48, 3, no, no) \ + SC(187, 50, 48, 4, no, no) \ + \ + SC(188, 51, 49, 1, no, no) \ + SC(189, 51, 49, 2, no, no) \ + SC(190, 51, 49, 3, no, no) \ + SC(191, 51, 49, 4, no, no) \ + \ + SC(192, 52, 50, 1, no, no) \ + SC(193, 52, 50, 2, no, no) \ + SC(194, 52, 50, 3, no, no) \ + SC(195, 52, 50, 4, no, no) \ + \ + SC(196, 53, 51, 1, no, no) \ + SC(197, 53, 51, 2, no, no) \ + SC(198, 53, 51, 3, no, no) \ + SC(199, 53, 51, 4, no, no) \ + \ + SC(200, 54, 52, 1, no, no) \ + SC(201, 54, 52, 2, no, no) \ + SC(202, 54, 52, 3, no, no) \ + SC(203, 54, 52, 4, no, no) \ + \ + SC(204, 55, 53, 1, no, no) \ + SC(205, 55, 53, 2, no, no) \ + SC(206, 55, 53, 3, no, no) \ + SC(207, 55, 53, 4, no, no) \ + \ + SC(208, 56, 54, 1, no, no) \ + SC(209, 56, 54, 2, no, no) \ + SC(210, 56, 54, 3, no, no) \ + SC(211, 56, 54, 4, no, no) \ + \ + SC(212, 57, 55, 1, no, no) \ + SC(213, 57, 55, 2, no, no) \ + SC(214, 57, 55, 3, no, no) \ + SC(215, 57, 55, 4, no, no) \ + \ + SC(216, 58, 56, 1, no, no) \ + SC(217, 58, 56, 2, no, no) \ + SC(218, 58, 56, 3, no, no) \ + SC(219, 58, 56, 4, no, no) \ + \ + SC(220, 59, 57, 1, no, no) \ + SC(221, 59, 57, 2, no, no) \ + SC(222, 59, 57, 3, no, no) \ + SC(223, 59, 57, 4, no, no) \ + \ + SC(224, 60, 58, 1, no, no) \ + SC(225, 60, 58, 2, no, no) \ + SC(226, 60, 58, 3, no, no) \ + SC(227, 60, 58, 4, no, no) \ + \ + SC(228, 61, 59, 1, no, no) \ + SC(229, 61, 59, 2, no, no) \ + SC(230, 61, 59, 3, no, no) \ + SC(231, 61, 59, 4, no, no) \ + \ + SC(232, 62, 60, 1, no, no) \ + SC(233, 62, 60, 2, no, no) \ + SC(234, 62, 60, 3, no, no) \ + SC(235, 62, 60, 4, no, no) \ + \ + SC(236, 63, 61, 1, no, no) \ + SC(237, 63, 61, 2, no, no) \ + SC(238, 63, 61, 3, no, no) \ -#if (LG_TINY_MIN == 4 && LG_QUANTUM == 4 && LG_PAGE == 12) #define SIZE_CLASSES_DEFINED -/* SIZE_CLASS(bin, delta, sz) */ -#define SIZE_CLASSES \ - SIZE_CLASS(0, 16, 16) \ - SIZE_CLASS(1, 16, 32) \ - SIZE_CLASS(2, 16, 48) \ - SIZE_CLASS(3, 16, 64) \ - SIZE_CLASS(4, 16, 80) \ - SIZE_CLASS(5, 16, 96) \ - SIZE_CLASS(6, 16, 112) \ - SIZE_CLASS(7, 16, 128) \ - SIZE_CLASS(8, 32, 160) \ - SIZE_CLASS(9, 32, 192) \ - SIZE_CLASS(10, 32, 224) \ - SIZE_CLASS(11, 32, 256) \ - SIZE_CLASS(12, 64, 320) \ - SIZE_CLASS(13, 64, 384) \ - SIZE_CLASS(14, 64, 448) \ - SIZE_CLASS(15, 64, 512) \ - SIZE_CLASS(16, 128, 640) \ - SIZE_CLASS(17, 128, 768) \ - SIZE_CLASS(18, 128, 896) \ - SIZE_CLASS(19, 128, 1024) \ - SIZE_CLASS(20, 256, 1280) \ - SIZE_CLASS(21, 256, 1536) \ - SIZE_CLASS(22, 256, 1792) \ - SIZE_CLASS(23, 256, 2048) \ - SIZE_CLASS(24, 512, 2560) \ - SIZE_CLASS(25, 512, 3072) \ - SIZE_CLASS(26, 512, 3584) \ - -#define NBINS 27 -#define SMALL_MAXCLASS 3584 +#define NTBINS 0 +#define NLBINS 32 +#define NBINS 39 +#define NSIZES 239 +#define LG_TINY_MAXCLASS "NA" +#define LOOKUP_MAXCLASS ((((size_t)1) << 11) + (((size_t)4) << 9)) +#define SMALL_MAXCLASS ((((size_t)1) << 13) + (((size_t)3) << 11)) +#define LG_LARGE_MINCLASS 14 +#define HUGE_MAXCLASS ((((size_t)1) << 63) + (((size_t)3) << 61)) #endif -#if (LG_TINY_MIN == 4 && LG_QUANTUM == 4 && LG_PAGE == 13) -#define SIZE_CLASSES_DEFINED -/* SIZE_CLASS(bin, delta, sz) */ -#define SIZE_CLASSES \ - SIZE_CLASS(0, 16, 16) \ - SIZE_CLASS(1, 16, 32) \ - SIZE_CLASS(2, 16, 48) \ - SIZE_CLASS(3, 16, 64) \ - SIZE_CLASS(4, 16, 80) \ - SIZE_CLASS(5, 16, 96) \ - SIZE_CLASS(6, 16, 112) \ - SIZE_CLASS(7, 16, 128) \ - SIZE_CLASS(8, 32, 160) \ - SIZE_CLASS(9, 32, 192) \ - SIZE_CLASS(10, 32, 224) \ - SIZE_CLASS(11, 32, 256) \ - SIZE_CLASS(12, 64, 320) \ - SIZE_CLASS(13, 64, 384) \ - SIZE_CLASS(14, 64, 448) \ - SIZE_CLASS(15, 64, 512) \ - SIZE_CLASS(16, 128, 640) \ - SIZE_CLASS(17, 128, 768) \ - SIZE_CLASS(18, 128, 896) \ - SIZE_CLASS(19, 128, 1024) \ - SIZE_CLASS(20, 256, 1280) \ - SIZE_CLASS(21, 256, 1536) \ - SIZE_CLASS(22, 256, 1792) \ - SIZE_CLASS(23, 256, 2048) \ - SIZE_CLASS(24, 512, 2560) \ - SIZE_CLASS(25, 512, 3072) \ - SIZE_CLASS(26, 512, 3584) \ - SIZE_CLASS(27, 512, 4096) \ - SIZE_CLASS(28, 1024, 5120) \ - SIZE_CLASS(29, 1024, 6144) \ - SIZE_CLASS(30, 1024, 7168) \ +#if (LG_SIZEOF_PTR == 3 && LG_TINY_MIN == 3 && LG_QUANTUM == 4 && LG_PAGE == 12) +#define SIZE_CLASSES \ + /* index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup */ \ + SC( 0, 3, 3, 0, yes, 3) \ + \ + SC( 1, 3, 3, 1, yes, 3) \ + SC( 2, 4, 4, 1, yes, 4) \ + SC( 3, 4, 4, 2, yes, 4) \ + SC( 4, 4, 4, 3, yes, 4) \ + \ + SC( 5, 6, 4, 1, yes, 4) \ + SC( 6, 6, 4, 2, yes, 4) \ + SC( 7, 6, 4, 3, yes, 4) \ + SC( 8, 6, 4, 4, yes, 4) \ + \ + SC( 9, 7, 5, 1, yes, 5) \ + SC( 10, 7, 5, 2, yes, 5) \ + SC( 11, 7, 5, 3, yes, 5) \ + SC( 12, 7, 5, 4, yes, 5) \ + \ + SC( 13, 8, 6, 1, yes, 6) \ + SC( 14, 8, 6, 2, yes, 6) \ + SC( 15, 8, 6, 3, yes, 6) \ + SC( 16, 8, 6, 4, yes, 6) \ + \ + SC( 17, 9, 7, 1, yes, 7) \ + SC( 18, 9, 7, 2, yes, 7) \ + SC( 19, 9, 7, 3, yes, 7) \ + SC( 20, 9, 7, 4, yes, 7) \ + \ + SC( 21, 10, 8, 1, yes, 8) \ + SC( 22, 10, 8, 2, yes, 8) \ + SC( 23, 10, 8, 3, yes, 8) \ + SC( 24, 10, 8, 4, yes, 8) \ + \ + SC( 25, 11, 9, 1, yes, 9) \ + SC( 26, 11, 9, 2, yes, 9) \ + SC( 27, 11, 9, 3, yes, 9) \ + SC( 28, 11, 9, 4, yes, 9) \ + \ + SC( 29, 12, 10, 1, yes, no) \ + SC( 30, 12, 10, 2, yes, no) \ + SC( 31, 12, 10, 3, yes, no) \ + SC( 32, 12, 10, 4, yes, no) \ + \ + SC( 33, 13, 11, 1, yes, no) \ + SC( 34, 13, 11, 2, yes, no) \ + SC( 35, 13, 11, 3, yes, no) \ + SC( 36, 13, 11, 4, no, no) \ + \ + SC( 37, 14, 12, 1, no, no) \ + SC( 38, 14, 12, 2, no, no) \ + SC( 39, 14, 12, 3, no, no) \ + SC( 40, 14, 12, 4, no, no) \ + \ + SC( 41, 15, 13, 1, no, no) \ + SC( 42, 15, 13, 2, no, no) \ + SC( 43, 15, 13, 3, no, no) \ + SC( 44, 15, 13, 4, no, no) \ + \ + SC( 45, 16, 14, 1, no, no) \ + SC( 46, 16, 14, 2, no, no) \ + SC( 47, 16, 14, 3, no, no) \ + SC( 48, 16, 14, 4, no, no) \ + \ + SC( 49, 17, 15, 1, no, no) \ + SC( 50, 17, 15, 2, no, no) \ + SC( 51, 17, 15, 3, no, no) \ + SC( 52, 17, 15, 4, no, no) \ + \ + SC( 53, 18, 16, 1, no, no) \ + SC( 54, 18, 16, 2, no, no) \ + SC( 55, 18, 16, 3, no, no) \ + SC( 56, 18, 16, 4, no, no) \ + \ + SC( 57, 19, 17, 1, no, no) \ + SC( 58, 19, 17, 2, no, no) \ + SC( 59, 19, 17, 3, no, no) \ + SC( 60, 19, 17, 4, no, no) \ + \ + SC( 61, 20, 18, 1, no, no) \ + SC( 62, 20, 18, 2, no, no) \ + SC( 63, 20, 18, 3, no, no) \ + SC( 64, 20, 18, 4, no, no) \ + \ + SC( 65, 21, 19, 1, no, no) \ + SC( 66, 21, 19, 2, no, no) \ + SC( 67, 21, 19, 3, no, no) \ + SC( 68, 21, 19, 4, no, no) \ + \ + SC( 69, 22, 20, 1, no, no) \ + SC( 70, 22, 20, 2, no, no) \ + SC( 71, 22, 20, 3, no, no) \ + SC( 72, 22, 20, 4, no, no) \ + \ + SC( 73, 23, 21, 1, no, no) \ + SC( 74, 23, 21, 2, no, no) \ + SC( 75, 23, 21, 3, no, no) \ + SC( 76, 23, 21, 4, no, no) \ + \ + SC( 77, 24, 22, 1, no, no) \ + SC( 78, 24, 22, 2, no, no) \ + SC( 79, 24, 22, 3, no, no) \ + SC( 80, 24, 22, 4, no, no) \ + \ + SC( 81, 25, 23, 1, no, no) \ + SC( 82, 25, 23, 2, no, no) \ + SC( 83, 25, 23, 3, no, no) \ + SC( 84, 25, 23, 4, no, no) \ + \ + SC( 85, 26, 24, 1, no, no) \ + SC( 86, 26, 24, 2, no, no) \ + SC( 87, 26, 24, 3, no, no) \ + SC( 88, 26, 24, 4, no, no) \ + \ + SC( 89, 27, 25, 1, no, no) \ + SC( 90, 27, 25, 2, no, no) \ + SC( 91, 27, 25, 3, no, no) \ + SC( 92, 27, 25, 4, no, no) \ + \ + SC( 93, 28, 26, 1, no, no) \ + SC( 94, 28, 26, 2, no, no) \ + SC( 95, 28, 26, 3, no, no) \ + SC( 96, 28, 26, 4, no, no) \ + \ + SC( 97, 29, 27, 1, no, no) \ + SC( 98, 29, 27, 2, no, no) \ + SC( 99, 29, 27, 3, no, no) \ + SC(100, 29, 27, 4, no, no) \ + \ + SC(101, 30, 28, 1, no, no) \ + SC(102, 30, 28, 2, no, no) \ + SC(103, 30, 28, 3, no, no) \ + SC(104, 30, 28, 4, no, no) \ + \ + SC(105, 31, 29, 1, no, no) \ + SC(106, 31, 29, 2, no, no) \ + SC(107, 31, 29, 3, no, no) \ + SC(108, 31, 29, 4, no, no) \ + \ + SC(109, 32, 30, 1, no, no) \ + SC(110, 32, 30, 2, no, no) \ + SC(111, 32, 30, 3, no, no) \ + SC(112, 32, 30, 4, no, no) \ + \ + SC(113, 33, 31, 1, no, no) \ + SC(114, 33, 31, 2, no, no) \ + SC(115, 33, 31, 3, no, no) \ + SC(116, 33, 31, 4, no, no) \ + \ + SC(117, 34, 32, 1, no, no) \ + SC(118, 34, 32, 2, no, no) \ + SC(119, 34, 32, 3, no, no) \ + SC(120, 34, 32, 4, no, no) \ + \ + SC(121, 35, 33, 1, no, no) \ + SC(122, 35, 33, 2, no, no) \ + SC(123, 35, 33, 3, no, no) \ + SC(124, 35, 33, 4, no, no) \ + \ + SC(125, 36, 34, 1, no, no) \ + SC(126, 36, 34, 2, no, no) \ + SC(127, 36, 34, 3, no, no) \ + SC(128, 36, 34, 4, no, no) \ + \ + SC(129, 37, 35, 1, no, no) \ + SC(130, 37, 35, 2, no, no) \ + SC(131, 37, 35, 3, no, no) \ + SC(132, 37, 35, 4, no, no) \ + \ + SC(133, 38, 36, 1, no, no) \ + SC(134, 38, 36, 2, no, no) \ + SC(135, 38, 36, 3, no, no) \ + SC(136, 38, 36, 4, no, no) \ + \ + SC(137, 39, 37, 1, no, no) \ + SC(138, 39, 37, 2, no, no) \ + SC(139, 39, 37, 3, no, no) \ + SC(140, 39, 37, 4, no, no) \ + \ + SC(141, 40, 38, 1, no, no) \ + SC(142, 40, 38, 2, no, no) \ + SC(143, 40, 38, 3, no, no) \ + SC(144, 40, 38, 4, no, no) \ + \ + SC(145, 41, 39, 1, no, no) \ + SC(146, 41, 39, 2, no, no) \ + SC(147, 41, 39, 3, no, no) \ + SC(148, 41, 39, 4, no, no) \ + \ + SC(149, 42, 40, 1, no, no) \ + SC(150, 42, 40, 2, no, no) \ + SC(151, 42, 40, 3, no, no) \ + SC(152, 42, 40, 4, no, no) \ + \ + SC(153, 43, 41, 1, no, no) \ + SC(154, 43, 41, 2, no, no) \ + SC(155, 43, 41, 3, no, no) \ + SC(156, 43, 41, 4, no, no) \ + \ + SC(157, 44, 42, 1, no, no) \ + SC(158, 44, 42, 2, no, no) \ + SC(159, 44, 42, 3, no, no) \ + SC(160, 44, 42, 4, no, no) \ + \ + SC(161, 45, 43, 1, no, no) \ + SC(162, 45, 43, 2, no, no) \ + SC(163, 45, 43, 3, no, no) \ + SC(164, 45, 43, 4, no, no) \ + \ + SC(165, 46, 44, 1, no, no) \ + SC(166, 46, 44, 2, no, no) \ + SC(167, 46, 44, 3, no, no) \ + SC(168, 46, 44, 4, no, no) \ + \ + SC(169, 47, 45, 1, no, no) \ + SC(170, 47, 45, 2, no, no) \ + SC(171, 47, 45, 3, no, no) \ + SC(172, 47, 45, 4, no, no) \ + \ + SC(173, 48, 46, 1, no, no) \ + SC(174, 48, 46, 2, no, no) \ + SC(175, 48, 46, 3, no, no) \ + SC(176, 48, 46, 4, no, no) \ + \ + SC(177, 49, 47, 1, no, no) \ + SC(178, 49, 47, 2, no, no) \ + SC(179, 49, 47, 3, no, no) \ + SC(180, 49, 47, 4, no, no) \ + \ + SC(181, 50, 48, 1, no, no) \ + SC(182, 50, 48, 2, no, no) \ + SC(183, 50, 48, 3, no, no) \ + SC(184, 50, 48, 4, no, no) \ + \ + SC(185, 51, 49, 1, no, no) \ + SC(186, 51, 49, 2, no, no) \ + SC(187, 51, 49, 3, no, no) \ + SC(188, 51, 49, 4, no, no) \ + \ + SC(189, 52, 50, 1, no, no) \ + SC(190, 52, 50, 2, no, no) \ + SC(191, 52, 50, 3, no, no) \ + SC(192, 52, 50, 4, no, no) \ + \ + SC(193, 53, 51, 1, no, no) \ + SC(194, 53, 51, 2, no, no) \ + SC(195, 53, 51, 3, no, no) \ + SC(196, 53, 51, 4, no, no) \ + \ + SC(197, 54, 52, 1, no, no) \ + SC(198, 54, 52, 2, no, no) \ + SC(199, 54, 52, 3, no, no) \ + SC(200, 54, 52, 4, no, no) \ + \ + SC(201, 55, 53, 1, no, no) \ + SC(202, 55, 53, 2, no, no) \ + SC(203, 55, 53, 3, no, no) \ + SC(204, 55, 53, 4, no, no) \ + \ + SC(205, 56, 54, 1, no, no) \ + SC(206, 56, 54, 2, no, no) \ + SC(207, 56, 54, 3, no, no) \ + SC(208, 56, 54, 4, no, no) \ + \ + SC(209, 57, 55, 1, no, no) \ + SC(210, 57, 55, 2, no, no) \ + SC(211, 57, 55, 3, no, no) \ + SC(212, 57, 55, 4, no, no) \ + \ + SC(213, 58, 56, 1, no, no) \ + SC(214, 58, 56, 2, no, no) \ + SC(215, 58, 56, 3, no, no) \ + SC(216, 58, 56, 4, no, no) \ + \ + SC(217, 59, 57, 1, no, no) \ + SC(218, 59, 57, 2, no, no) \ + SC(219, 59, 57, 3, no, no) \ + SC(220, 59, 57, 4, no, no) \ + \ + SC(221, 60, 58, 1, no, no) \ + SC(222, 60, 58, 2, no, no) \ + SC(223, 60, 58, 3, no, no) \ + SC(224, 60, 58, 4, no, no) \ + \ + SC(225, 61, 59, 1, no, no) \ + SC(226, 61, 59, 2, no, no) \ + SC(227, 61, 59, 3, no, no) \ + SC(228, 61, 59, 4, no, no) \ + \ + SC(229, 62, 60, 1, no, no) \ + SC(230, 62, 60, 2, no, no) \ + SC(231, 62, 60, 3, no, no) \ + SC(232, 62, 60, 4, no, no) \ + \ + SC(233, 63, 61, 1, no, no) \ + SC(234, 63, 61, 2, no, no) \ + SC(235, 63, 61, 3, no, no) \ -#define NBINS 31 -#define SMALL_MAXCLASS 7168 -#endif - -#if (LG_TINY_MIN == 4 && LG_QUANTUM == 4 && LG_PAGE == 14) #define SIZE_CLASSES_DEFINED -/* SIZE_CLASS(bin, delta, sz) */ -#define SIZE_CLASSES \ - SIZE_CLASS(0, 16, 16) \ - SIZE_CLASS(1, 16, 32) \ - SIZE_CLASS(2, 16, 48) \ - SIZE_CLASS(3, 16, 64) \ - SIZE_CLASS(4, 16, 80) \ - SIZE_CLASS(5, 16, 96) \ - SIZE_CLASS(6, 16, 112) \ - SIZE_CLASS(7, 16, 128) \ - SIZE_CLASS(8, 32, 160) \ - SIZE_CLASS(9, 32, 192) \ - SIZE_CLASS(10, 32, 224) \ - SIZE_CLASS(11, 32, 256) \ - SIZE_CLASS(12, 64, 320) \ - SIZE_CLASS(13, 64, 384) \ - SIZE_CLASS(14, 64, 448) \ - SIZE_CLASS(15, 64, 512) \ - SIZE_CLASS(16, 128, 640) \ - SIZE_CLASS(17, 128, 768) \ - SIZE_CLASS(18, 128, 896) \ - SIZE_CLASS(19, 128, 1024) \ - SIZE_CLASS(20, 256, 1280) \ - SIZE_CLASS(21, 256, 1536) \ - SIZE_CLASS(22, 256, 1792) \ - SIZE_CLASS(23, 256, 2048) \ - SIZE_CLASS(24, 512, 2560) \ - SIZE_CLASS(25, 512, 3072) \ - SIZE_CLASS(26, 512, 3584) \ - SIZE_CLASS(27, 512, 4096) \ - SIZE_CLASS(28, 1024, 5120) \ - SIZE_CLASS(29, 1024, 6144) \ - SIZE_CLASS(30, 1024, 7168) \ - SIZE_CLASS(31, 1024, 8192) \ - SIZE_CLASS(32, 2048, 10240) \ - SIZE_CLASS(33, 2048, 12288) \ - SIZE_CLASS(34, 2048, 14336) \ - -#define NBINS 35 -#define SMALL_MAXCLASS 14336 +#define NTBINS 1 +#define NLBINS 29 +#define NBINS 36 +#define NSIZES 236 +#define LG_TINY_MAXCLASS 3 +#define LOOKUP_MAXCLASS ((((size_t)1) << 11) + (((size_t)4) << 9)) +#define SMALL_MAXCLASS ((((size_t)1) << 13) + (((size_t)3) << 11)) +#define LG_LARGE_MINCLASS 14 +#define HUGE_MAXCLASS ((((size_t)1) << 63) + (((size_t)3) << 61)) #endif -#if (LG_TINY_MIN == 4 && LG_QUANTUM == 4 && LG_PAGE == 15) -#define SIZE_CLASSES_DEFINED -/* SIZE_CLASS(bin, delta, sz) */ -#define SIZE_CLASSES \ - SIZE_CLASS(0, 16, 16) \ - SIZE_CLASS(1, 16, 32) \ - SIZE_CLASS(2, 16, 48) \ - SIZE_CLASS(3, 16, 64) \ - SIZE_CLASS(4, 16, 80) \ - SIZE_CLASS(5, 16, 96) \ - SIZE_CLASS(6, 16, 112) \ - SIZE_CLASS(7, 16, 128) \ - SIZE_CLASS(8, 32, 160) \ - SIZE_CLASS(9, 32, 192) \ - SIZE_CLASS(10, 32, 224) \ - SIZE_CLASS(11, 32, 256) \ - SIZE_CLASS(12, 64, 320) \ - SIZE_CLASS(13, 64, 384) \ - SIZE_CLASS(14, 64, 448) \ - SIZE_CLASS(15, 64, 512) \ - SIZE_CLASS(16, 128, 640) \ - SIZE_CLASS(17, 128, 768) \ - SIZE_CLASS(18, 128, 896) \ - SIZE_CLASS(19, 128, 1024) \ - SIZE_CLASS(20, 256, 1280) \ - SIZE_CLASS(21, 256, 1536) \ - SIZE_CLASS(22, 256, 1792) \ - SIZE_CLASS(23, 256, 2048) \ - SIZE_CLASS(24, 512, 2560) \ - SIZE_CLASS(25, 512, 3072) \ - SIZE_CLASS(26, 512, 3584) \ - SIZE_CLASS(27, 512, 4096) \ - SIZE_CLASS(28, 1024, 5120) \ - SIZE_CLASS(29, 1024, 6144) \ - SIZE_CLASS(30, 1024, 7168) \ - SIZE_CLASS(31, 1024, 8192) \ - SIZE_CLASS(32, 2048, 10240) \ - SIZE_CLASS(33, 2048, 12288) \ - SIZE_CLASS(34, 2048, 14336) \ - SIZE_CLASS(35, 2048, 16384) \ - SIZE_CLASS(36, 4096, 20480) \ - SIZE_CLASS(37, 4096, 24576) \ - SIZE_CLASS(38, 4096, 28672) \ - -#define NBINS 39 -#define SMALL_MAXCLASS 28672 -#endif +#if (LG_SIZEOF_PTR == 3 && LG_TINY_MIN == 4 && LG_QUANTUM == 4 && LG_PAGE == 12) +#define SIZE_CLASSES \ + /* index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup */ \ + SC( 0, 4, 4, 0, yes, 4) \ + SC( 1, 4, 4, 1, yes, 4) \ + SC( 2, 4, 4, 2, yes, 4) \ + SC( 3, 4, 4, 3, yes, 4) \ + \ + SC( 4, 6, 4, 1, yes, 4) \ + SC( 5, 6, 4, 2, yes, 4) \ + SC( 6, 6, 4, 3, yes, 4) \ + SC( 7, 6, 4, 4, yes, 4) \ + \ + SC( 8, 7, 5, 1, yes, 5) \ + SC( 9, 7, 5, 2, yes, 5) \ + SC( 10, 7, 5, 3, yes, 5) \ + SC( 11, 7, 5, 4, yes, 5) \ + \ + SC( 12, 8, 6, 1, yes, 6) \ + SC( 13, 8, 6, 2, yes, 6) \ + SC( 14, 8, 6, 3, yes, 6) \ + SC( 15, 8, 6, 4, yes, 6) \ + \ + SC( 16, 9, 7, 1, yes, 7) \ + SC( 17, 9, 7, 2, yes, 7) \ + SC( 18, 9, 7, 3, yes, 7) \ + SC( 19, 9, 7, 4, yes, 7) \ + \ + SC( 20, 10, 8, 1, yes, 8) \ + SC( 21, 10, 8, 2, yes, 8) \ + SC( 22, 10, 8, 3, yes, 8) \ + SC( 23, 10, 8, 4, yes, 8) \ + \ + SC( 24, 11, 9, 1, yes, 9) \ + SC( 25, 11, 9, 2, yes, 9) \ + SC( 26, 11, 9, 3, yes, 9) \ + SC( 27, 11, 9, 4, yes, 9) \ + \ + SC( 28, 12, 10, 1, yes, no) \ + SC( 29, 12, 10, 2, yes, no) \ + SC( 30, 12, 10, 3, yes, no) \ + SC( 31, 12, 10, 4, yes, no) \ + \ + SC( 32, 13, 11, 1, yes, no) \ + SC( 33, 13, 11, 2, yes, no) \ + SC( 34, 13, 11, 3, yes, no) \ + SC( 35, 13, 11, 4, no, no) \ + \ + SC( 36, 14, 12, 1, no, no) \ + SC( 37, 14, 12, 2, no, no) \ + SC( 38, 14, 12, 3, no, no) \ + SC( 39, 14, 12, 4, no, no) \ + \ + SC( 40, 15, 13, 1, no, no) \ + SC( 41, 15, 13, 2, no, no) \ + SC( 42, 15, 13, 3, no, no) \ + SC( 43, 15, 13, 4, no, no) \ + \ + SC( 44, 16, 14, 1, no, no) \ + SC( 45, 16, 14, 2, no, no) \ + SC( 46, 16, 14, 3, no, no) \ + SC( 47, 16, 14, 4, no, no) \ + \ + SC( 48, 17, 15, 1, no, no) \ + SC( 49, 17, 15, 2, no, no) \ + SC( 50, 17, 15, 3, no, no) \ + SC( 51, 17, 15, 4, no, no) \ + \ + SC( 52, 18, 16, 1, no, no) \ + SC( 53, 18, 16, 2, no, no) \ + SC( 54, 18, 16, 3, no, no) \ + SC( 55, 18, 16, 4, no, no) \ + \ + SC( 56, 19, 17, 1, no, no) \ + SC( 57, 19, 17, 2, no, no) \ + SC( 58, 19, 17, 3, no, no) \ + SC( 59, 19, 17, 4, no, no) \ + \ + SC( 60, 20, 18, 1, no, no) \ + SC( 61, 20, 18, 2, no, no) \ + SC( 62, 20, 18, 3, no, no) \ + SC( 63, 20, 18, 4, no, no) \ + \ + SC( 64, 21, 19, 1, no, no) \ + SC( 65, 21, 19, 2, no, no) \ + SC( 66, 21, 19, 3, no, no) \ + SC( 67, 21, 19, 4, no, no) \ + \ + SC( 68, 22, 20, 1, no, no) \ + SC( 69, 22, 20, 2, no, no) \ + SC( 70, 22, 20, 3, no, no) \ + SC( 71, 22, 20, 4, no, no) \ + \ + SC( 72, 23, 21, 1, no, no) \ + SC( 73, 23, 21, 2, no, no) \ + SC( 74, 23, 21, 3, no, no) \ + SC( 75, 23, 21, 4, no, no) \ + \ + SC( 76, 24, 22, 1, no, no) \ + SC( 77, 24, 22, 2, no, no) \ + SC( 78, 24, 22, 3, no, no) \ + SC( 79, 24, 22, 4, no, no) \ + \ + SC( 80, 25, 23, 1, no, no) \ + SC( 81, 25, 23, 2, no, no) \ + SC( 82, 25, 23, 3, no, no) \ + SC( 83, 25, 23, 4, no, no) \ + \ + SC( 84, 26, 24, 1, no, no) \ + SC( 85, 26, 24, 2, no, no) \ + SC( 86, 26, 24, 3, no, no) \ + SC( 87, 26, 24, 4, no, no) \ + \ + SC( 88, 27, 25, 1, no, no) \ + SC( 89, 27, 25, 2, no, no) \ + SC( 90, 27, 25, 3, no, no) \ + SC( 91, 27, 25, 4, no, no) \ + \ + SC( 92, 28, 26, 1, no, no) \ + SC( 93, 28, 26, 2, no, no) \ + SC( 94, 28, 26, 3, no, no) \ + SC( 95, 28, 26, 4, no, no) \ + \ + SC( 96, 29, 27, 1, no, no) \ + SC( 97, 29, 27, 2, no, no) \ + SC( 98, 29, 27, 3, no, no) \ + SC( 99, 29, 27, 4, no, no) \ + \ + SC(100, 30, 28, 1, no, no) \ + SC(101, 30, 28, 2, no, no) \ + SC(102, 30, 28, 3, no, no) \ + SC(103, 30, 28, 4, no, no) \ + \ + SC(104, 31, 29, 1, no, no) \ + SC(105, 31, 29, 2, no, no) \ + SC(106, 31, 29, 3, no, no) \ + SC(107, 31, 29, 4, no, no) \ + \ + SC(108, 32, 30, 1, no, no) \ + SC(109, 32, 30, 2, no, no) \ + SC(110, 32, 30, 3, no, no) \ + SC(111, 32, 30, 4, no, no) \ + \ + SC(112, 33, 31, 1, no, no) \ + SC(113, 33, 31, 2, no, no) \ + SC(114, 33, 31, 3, no, no) \ + SC(115, 33, 31, 4, no, no) \ + \ + SC(116, 34, 32, 1, no, no) \ + SC(117, 34, 32, 2, no, no) \ + SC(118, 34, 32, 3, no, no) \ + SC(119, 34, 32, 4, no, no) \ + \ + SC(120, 35, 33, 1, no, no) \ + SC(121, 35, 33, 2, no, no) \ + SC(122, 35, 33, 3, no, no) \ + SC(123, 35, 33, 4, no, no) \ + \ + SC(124, 36, 34, 1, no, no) \ + SC(125, 36, 34, 2, no, no) \ + SC(126, 36, 34, 3, no, no) \ + SC(127, 36, 34, 4, no, no) \ + \ + SC(128, 37, 35, 1, no, no) \ + SC(129, 37, 35, 2, no, no) \ + SC(130, 37, 35, 3, no, no) \ + SC(131, 37, 35, 4, no, no) \ + \ + SC(132, 38, 36, 1, no, no) \ + SC(133, 38, 36, 2, no, no) \ + SC(134, 38, 36, 3, no, no) \ + SC(135, 38, 36, 4, no, no) \ + \ + SC(136, 39, 37, 1, no, no) \ + SC(137, 39, 37, 2, no, no) \ + SC(138, 39, 37, 3, no, no) \ + SC(139, 39, 37, 4, no, no) \ + \ + SC(140, 40, 38, 1, no, no) \ + SC(141, 40, 38, 2, no, no) \ + SC(142, 40, 38, 3, no, no) \ + SC(143, 40, 38, 4, no, no) \ + \ + SC(144, 41, 39, 1, no, no) \ + SC(145, 41, 39, 2, no, no) \ + SC(146, 41, 39, 3, no, no) \ + SC(147, 41, 39, 4, no, no) \ + \ + SC(148, 42, 40, 1, no, no) \ + SC(149, 42, 40, 2, no, no) \ + SC(150, 42, 40, 3, no, no) \ + SC(151, 42, 40, 4, no, no) \ + \ + SC(152, 43, 41, 1, no, no) \ + SC(153, 43, 41, 2, no, no) \ + SC(154, 43, 41, 3, no, no) \ + SC(155, 43, 41, 4, no, no) \ + \ + SC(156, 44, 42, 1, no, no) \ + SC(157, 44, 42, 2, no, no) \ + SC(158, 44, 42, 3, no, no) \ + SC(159, 44, 42, 4, no, no) \ + \ + SC(160, 45, 43, 1, no, no) \ + SC(161, 45, 43, 2, no, no) \ + SC(162, 45, 43, 3, no, no) \ + SC(163, 45, 43, 4, no, no) \ + \ + SC(164, 46, 44, 1, no, no) \ + SC(165, 46, 44, 2, no, no) \ + SC(166, 46, 44, 3, no, no) \ + SC(167, 46, 44, 4, no, no) \ + \ + SC(168, 47, 45, 1, no, no) \ + SC(169, 47, 45, 2, no, no) \ + SC(170, 47, 45, 3, no, no) \ + SC(171, 47, 45, 4, no, no) \ + \ + SC(172, 48, 46, 1, no, no) \ + SC(173, 48, 46, 2, no, no) \ + SC(174, 48, 46, 3, no, no) \ + SC(175, 48, 46, 4, no, no) \ + \ + SC(176, 49, 47, 1, no, no) \ + SC(177, 49, 47, 2, no, no) \ + SC(178, 49, 47, 3, no, no) \ + SC(179, 49, 47, 4, no, no) \ + \ + SC(180, 50, 48, 1, no, no) \ + SC(181, 50, 48, 2, no, no) \ + SC(182, 50, 48, 3, no, no) \ + SC(183, 50, 48, 4, no, no) \ + \ + SC(184, 51, 49, 1, no, no) \ + SC(185, 51, 49, 2, no, no) \ + SC(186, 51, 49, 3, no, no) \ + SC(187, 51, 49, 4, no, no) \ + \ + SC(188, 52, 50, 1, no, no) \ + SC(189, 52, 50, 2, no, no) \ + SC(190, 52, 50, 3, no, no) \ + SC(191, 52, 50, 4, no, no) \ + \ + SC(192, 53, 51, 1, no, no) \ + SC(193, 53, 51, 2, no, no) \ + SC(194, 53, 51, 3, no, no) \ + SC(195, 53, 51, 4, no, no) \ + \ + SC(196, 54, 52, 1, no, no) \ + SC(197, 54, 52, 2, no, no) \ + SC(198, 54, 52, 3, no, no) \ + SC(199, 54, 52, 4, no, no) \ + \ + SC(200, 55, 53, 1, no, no) \ + SC(201, 55, 53, 2, no, no) \ + SC(202, 55, 53, 3, no, no) \ + SC(203, 55, 53, 4, no, no) \ + \ + SC(204, 56, 54, 1, no, no) \ + SC(205, 56, 54, 2, no, no) \ + SC(206, 56, 54, 3, no, no) \ + SC(207, 56, 54, 4, no, no) \ + \ + SC(208, 57, 55, 1, no, no) \ + SC(209, 57, 55, 2, no, no) \ + SC(210, 57, 55, 3, no, no) \ + SC(211, 57, 55, 4, no, no) \ + \ + SC(212, 58, 56, 1, no, no) \ + SC(213, 58, 56, 2, no, no) \ + SC(214, 58, 56, 3, no, no) \ + SC(215, 58, 56, 4, no, no) \ + \ + SC(216, 59, 57, 1, no, no) \ + SC(217, 59, 57, 2, no, no) \ + SC(218, 59, 57, 3, no, no) \ + SC(219, 59, 57, 4, no, no) \ + \ + SC(220, 60, 58, 1, no, no) \ + SC(221, 60, 58, 2, no, no) \ + SC(222, 60, 58, 3, no, no) \ + SC(223, 60, 58, 4, no, no) \ + \ + SC(224, 61, 59, 1, no, no) \ + SC(225, 61, 59, 2, no, no) \ + SC(226, 61, 59, 3, no, no) \ + SC(227, 61, 59, 4, no, no) \ + \ + SC(228, 62, 60, 1, no, no) \ + SC(229, 62, 60, 2, no, no) \ + SC(230, 62, 60, 3, no, no) \ + SC(231, 62, 60, 4, no, no) \ + \ + SC(232, 63, 61, 1, no, no) \ + SC(233, 63, 61, 2, no, no) \ + SC(234, 63, 61, 3, no, no) \ -#if (LG_TINY_MIN == 4 && LG_QUANTUM == 4 && LG_PAGE == 16) #define SIZE_CLASSES_DEFINED -/* SIZE_CLASS(bin, delta, sz) */ -#define SIZE_CLASSES \ - SIZE_CLASS(0, 16, 16) \ - SIZE_CLASS(1, 16, 32) \ - SIZE_CLASS(2, 16, 48) \ - SIZE_CLASS(3, 16, 64) \ - SIZE_CLASS(4, 16, 80) \ - SIZE_CLASS(5, 16, 96) \ - SIZE_CLASS(6, 16, 112) \ - SIZE_CLASS(7, 16, 128) \ - SIZE_CLASS(8, 32, 160) \ - SIZE_CLASS(9, 32, 192) \ - SIZE_CLASS(10, 32, 224) \ - SIZE_CLASS(11, 32, 256) \ - SIZE_CLASS(12, 64, 320) \ - SIZE_CLASS(13, 64, 384) \ - SIZE_CLASS(14, 64, 448) \ - SIZE_CLASS(15, 64, 512) \ - SIZE_CLASS(16, 128, 640) \ - SIZE_CLASS(17, 128, 768) \ - SIZE_CLASS(18, 128, 896) \ - SIZE_CLASS(19, 128, 1024) \ - SIZE_CLASS(20, 256, 1280) \ - SIZE_CLASS(21, 256, 1536) \ - SIZE_CLASS(22, 256, 1792) \ - SIZE_CLASS(23, 256, 2048) \ - SIZE_CLASS(24, 512, 2560) \ - SIZE_CLASS(25, 512, 3072) \ - SIZE_CLASS(26, 512, 3584) \ - SIZE_CLASS(27, 512, 4096) \ - SIZE_CLASS(28, 1024, 5120) \ - SIZE_CLASS(29, 1024, 6144) \ - SIZE_CLASS(30, 1024, 7168) \ - SIZE_CLASS(31, 1024, 8192) \ - SIZE_CLASS(32, 2048, 10240) \ - SIZE_CLASS(33, 2048, 12288) \ - SIZE_CLASS(34, 2048, 14336) \ - SIZE_CLASS(35, 2048, 16384) \ - SIZE_CLASS(36, 4096, 20480) \ - SIZE_CLASS(37, 4096, 24576) \ - SIZE_CLASS(38, 4096, 28672) \ - SIZE_CLASS(39, 4096, 32768) \ - SIZE_CLASS(40, 8192, 40960) \ - SIZE_CLASS(41, 8192, 49152) \ - SIZE_CLASS(42, 8192, 57344) \ - -#define NBINS 43 -#define SMALL_MAXCLASS 57344 +#define NTBINS 0 +#define NLBINS 28 +#define NBINS 35 +#define NSIZES 235 +#define LG_TINY_MAXCLASS "NA" +#define LOOKUP_MAXCLASS ((((size_t)1) << 11) + (((size_t)4) << 9)) +#define SMALL_MAXCLASS ((((size_t)1) << 13) + (((size_t)3) << 11)) +#define LG_LARGE_MINCLASS 14 +#define HUGE_MAXCLASS ((((size_t)1) << 63) + (((size_t)3) << 61)) #endif #ifndef SIZE_CLASSES_DEFINED @@ -692,11 +1423,10 @@ #endif #undef SIZE_CLASSES_DEFINED /* - * The small_size2bin lookup table uses uint8_t to encode each bin index, so we + * The size2index_tab lookup table uses uint8_t to encode each bin index, so we * cannot support more than 256 small size classes. Further constrain NBINS to - * 255 to support prof_promote, since all small size classes, plus a "not - * small" size class must be stored in 8 bits of arena_chunk_map_t's bits - * field. + * 255 since all small size classes, plus a "not small" size class must be + * stored in 8 bits of arena_chunk_map_bits_t's bits field. */ #if (NBINS > 255) # error "Too many small size classes" diff --git a/dep/jemalloc/include/jemalloc/internal/stats.h b/dep/jemalloc/include/jemalloc/internal/stats.h index 27f68e3681c..c91dba99dbe 100644 --- a/dep/jemalloc/include/jemalloc/internal/stats.h +++ b/dep/jemalloc/include/jemalloc/internal/stats.h @@ -4,6 +4,7 @@ typedef struct tcache_bin_stats_s tcache_bin_stats_t; typedef struct malloc_bin_stats_s malloc_bin_stats_t; typedef struct malloc_large_stats_s malloc_large_stats_t; +typedef struct malloc_huge_stats_s malloc_huge_stats_t; typedef struct arena_stats_s arena_stats_t; typedef struct chunk_stats_s chunk_stats_t; @@ -21,12 +22,6 @@ struct tcache_bin_stats_s { struct malloc_bin_stats_s { /* - * Current number of bytes allocated, including objects currently - * cached by tcache. - */ - size_t allocated; - - /* * Total number of allocation/deallocation requests served directly by * the bin. Note that tcache may allocate an object, then recycle it * many times, resulting many increments to nrequests, but only one @@ -42,6 +37,12 @@ struct malloc_bin_stats_s { */ uint64_t nrequests; + /* + * Current number of regions of this size class, including regions + * currently cached by tcache. + */ + size_t curregs; + /* Number of tcache fills from this bin. */ uint64_t nfills; @@ -78,10 +79,25 @@ struct malloc_large_stats_s { */ uint64_t nrequests; - /* Current number of runs of this size class. */ + /* + * Current number of runs of this size class, including runs currently + * cached by tcache. + */ size_t curruns; }; +struct malloc_huge_stats_s { + /* + * Total number of allocation/deallocation requests served directly by + * the arena. + */ + uint64_t nmalloc; + uint64_t ndalloc; + + /* Current number of (multi-)chunk allocations of this size class. */ + size_t curhchunks; +}; + struct arena_stats_s { /* Number of bytes currently mapped. */ size_t mapped; @@ -95,34 +111,28 @@ struct arena_stats_s { uint64_t nmadvise; uint64_t purged; + /* + * Number of bytes currently mapped purely for metadata purposes, and + * number of bytes currently allocated for internal metadata. + */ + size_t metadata_mapped; + size_t metadata_allocated; /* Protected via atomic_*_z(). */ + /* Per-size-category statistics. */ size_t allocated_large; uint64_t nmalloc_large; uint64_t ndalloc_large; uint64_t nrequests_large; - /* - * One element for each possible size class, including sizes that - * overlap with bin size classes. This is necessary because ipalloc() - * sometimes has to use such large objects in order to assure proper - * alignment. - */ - malloc_large_stats_t *lstats; -}; - -struct chunk_stats_s { - /* Number of chunks that were allocated. */ - uint64_t nchunks; + size_t allocated_huge; + uint64_t nmalloc_huge; + uint64_t ndalloc_huge; - /* High-water mark for number of chunks allocated. */ - size_t highchunks; + /* One element for each large size class. */ + malloc_large_stats_t *lstats; - /* - * Current number of chunks allocated. This value isn't maintained for - * any other purpose, so keep track of it in order to be able to set - * highchunks. - */ - size_t curchunks; + /* One element for each huge size class. */ + malloc_huge_stats_t *hstats; }; #endif /* JEMALLOC_H_STRUCTS */ diff --git a/dep/jemalloc/include/jemalloc/internal/tcache.h b/dep/jemalloc/include/jemalloc/internal/tcache.h index c3d4b58d4dc..5079cd26688 100644 --- a/dep/jemalloc/include/jemalloc/internal/tcache.h +++ b/dep/jemalloc/include/jemalloc/internal/tcache.h @@ -4,6 +4,7 @@ typedef struct tcache_bin_info_s tcache_bin_info_t; typedef struct tcache_bin_s tcache_bin_t; typedef struct tcache_s tcache_t; +typedef struct tcaches_s tcaches_t; /* * tcache pointers close to NULL are used to encode state information that is @@ -16,6 +17,11 @@ typedef struct tcache_s tcache_t; #define TCACHE_STATE_MAX TCACHE_STATE_PURGATORY /* + * Absolute minimum number of cache slots for each small bin. + */ +#define TCACHE_NSLOTS_SMALL_MIN 20 + +/* * Absolute maximum number of cache slots for each small bin in the thread * cache. This is an additional constraint beyond that imposed as: twice the * number of regions per run for this size class. @@ -69,10 +75,9 @@ struct tcache_bin_s { struct tcache_s { ql_elm(tcache_t) link; /* Used for aggregating stats. */ - uint64_t prof_accumbytes;/* Cleared after arena_prof_accum() */ - arena_t *arena; /* This thread's arena. */ + uint64_t prof_accumbytes;/* Cleared after arena_prof_accum(). */ unsigned ev_cnt; /* Event count since incremental GC. */ - unsigned next_gc_bin; /* Next bin to GC. */ + szind_t next_gc_bin; /* Next bin to GC. */ tcache_bin_t tbins[1]; /* Dynamically sized. */ /* * The pointer stacks associated with tbins follow as a contiguous @@ -82,6 +87,14 @@ struct tcache_s { */ }; +/* Linkage for list of available (previously used) explicit tcache IDs. */ +struct tcaches_s { + union { + tcache_t *tcache; + tcaches_t *next; + }; +}; + #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS @@ -95,84 +108,90 @@ extern tcache_bin_info_t *tcache_bin_info; * Number of tcache bins. There are NBINS small-object bins, plus 0 or more * large-object bins. */ -extern size_t nhbins; +extern size_t nhbins; /* Maximum cached size class. */ -extern size_t tcache_maxclass; +extern size_t tcache_maxclass; + +/* + * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and + * usable via the MALLOCX_TCACHE() flag. The automatic per thread tcaches are + * completely disjoint from this data structure. tcaches starts off as a sparse + * array, so it has no physical memory footprint until individual pages are + * touched. This allows the entire array to be allocated the first time an + * explicit tcache is created without a disproportionate impact on memory usage. + */ +extern tcaches_t *tcaches; size_t tcache_salloc(const void *ptr); -void tcache_event_hard(tcache_t *tcache); -void *tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, - size_t binind); -void tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, - tcache_t *tcache); -void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, - tcache_t *tcache); +void tcache_event_hard(tsd_t *tsd, tcache_t *tcache); +void *tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache, + tcache_bin_t *tbin, szind_t binind); +void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, + szind_t binind, unsigned rem); +void tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, + unsigned rem, tcache_t *tcache); void tcache_arena_associate(tcache_t *tcache, arena_t *arena); -void tcache_arena_dissociate(tcache_t *tcache); -tcache_t *tcache_create(arena_t *arena); -void tcache_destroy(tcache_t *tcache); -void tcache_thread_cleanup(void *arg); +void tcache_arena_reassociate(tcache_t *tcache, arena_t *oldarena, + arena_t *newarena); +void tcache_arena_dissociate(tcache_t *tcache, arena_t *arena); +tcache_t *tcache_get_hard(tsd_t *tsd); +tcache_t *tcache_create(tsd_t *tsd, arena_t *arena); +void tcache_cleanup(tsd_t *tsd); +void tcache_enabled_cleanup(tsd_t *tsd); void tcache_stats_merge(tcache_t *tcache, arena_t *arena); -bool tcache_boot0(void); -bool tcache_boot1(void); +bool tcaches_create(tsd_t *tsd, unsigned *r_ind); +void tcaches_flush(tsd_t *tsd, unsigned ind); +void tcaches_destroy(tsd_t *tsd, unsigned ind); +bool tcache_boot(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache, tcache_t *) -malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache_enabled, tcache_enabled_t) - -void tcache_event(tcache_t *tcache); +void tcache_event(tsd_t *tsd, tcache_t *tcache); void tcache_flush(void); bool tcache_enabled_get(void); -tcache_t *tcache_get(bool create); +tcache_t *tcache_get(tsd_t *tsd, bool create); void tcache_enabled_set(bool enabled); void *tcache_alloc_easy(tcache_bin_t *tbin); -void *tcache_alloc_small(tcache_t *tcache, size_t size, bool zero); -void *tcache_alloc_large(tcache_t *tcache, size_t size, bool zero); -void tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind); -void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size); +void *tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, + size_t size, bool zero); +void *tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, + size_t size, bool zero); +void tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, + szind_t binind); +void tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, + size_t size); +tcache_t *tcaches_get(tsd_t *tsd, unsigned ind); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_)) -/* Map of thread-specific caches. */ -malloc_tsd_externs(tcache, tcache_t *) -malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, tcache, tcache_t *, NULL, - tcache_thread_cleanup) -/* Per thread flag that allows thread caches to be disabled. */ -malloc_tsd_externs(tcache_enabled, tcache_enabled_t) -malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, tcache_enabled, tcache_enabled_t, - tcache_enabled_default, malloc_tsd_no_cleanup) - JEMALLOC_INLINE void tcache_flush(void) { - tcache_t *tcache; + tsd_t *tsd; cassert(config_tcache); - tcache = *tcache_tsd_get(); - if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX) - return; - tcache_destroy(tcache); - tcache = NULL; - tcache_tsd_set(&tcache); + tsd = tsd_fetch(); + tcache_cleanup(tsd); } JEMALLOC_INLINE bool tcache_enabled_get(void) { + tsd_t *tsd; tcache_enabled_t tcache_enabled; cassert(config_tcache); - tcache_enabled = *tcache_enabled_tsd_get(); + tsd = tsd_fetch(); + tcache_enabled = tsd_tcache_enabled_get(tsd); if (tcache_enabled == tcache_enabled_default) { tcache_enabled = (tcache_enabled_t)opt_tcache; - tcache_enabled_tsd_set(&tcache_enabled); + tsd_tcache_enabled_set(tsd, tcache_enabled); } return ((bool)tcache_enabled); @@ -181,85 +200,41 @@ tcache_enabled_get(void) JEMALLOC_INLINE void tcache_enabled_set(bool enabled) { + tsd_t *tsd; tcache_enabled_t tcache_enabled; - tcache_t *tcache; cassert(config_tcache); + tsd = tsd_fetch(); + tcache_enabled = (tcache_enabled_t)enabled; - tcache_enabled_tsd_set(&tcache_enabled); - tcache = *tcache_tsd_get(); - if (enabled) { - if (tcache == TCACHE_STATE_DISABLED) { - tcache = NULL; - tcache_tsd_set(&tcache); - } - } else /* disabled */ { - if (tcache > TCACHE_STATE_MAX) { - tcache_destroy(tcache); - tcache = NULL; - } - if (tcache == NULL) { - tcache = TCACHE_STATE_DISABLED; - tcache_tsd_set(&tcache); - } - } + tsd_tcache_enabled_set(tsd, tcache_enabled); + + if (!enabled) + tcache_cleanup(tsd); } JEMALLOC_ALWAYS_INLINE tcache_t * -tcache_get(bool create) +tcache_get(tsd_t *tsd, bool create) { tcache_t *tcache; - if (config_tcache == false) - return (NULL); - if (config_lazy_lock && isthreaded == false) + if (!config_tcache) return (NULL); - tcache = *tcache_tsd_get(); - if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX) { - if (tcache == TCACHE_STATE_DISABLED) - return (NULL); - if (tcache == NULL) { - if (create == false) { - /* - * Creating a tcache here would cause - * allocation as a side effect of free(). - * Ordinarily that would be okay since - * tcache_create() failure is a soft failure - * that doesn't propagate. However, if TLS - * data are freed via free() as in glibc, - * subtle corruption could result from setting - * a TLS variable after its backing memory is - * freed. - */ - return (NULL); - } - if (tcache_enabled_get() == false) { - tcache_enabled_set(false); /* Memoize. */ - return (NULL); - } - return (tcache_create(choose_arena(NULL))); - } - if (tcache == TCACHE_STATE_PURGATORY) { - /* - * Make a note that an allocator function was called - * after tcache_thread_cleanup() was called. - */ - tcache = TCACHE_STATE_REINCARNATED; - tcache_tsd_set(&tcache); - return (NULL); - } - if (tcache == TCACHE_STATE_REINCARNATED) - return (NULL); - not_reached(); + tcache = tsd_tcache_get(tsd); + if (!create) + return (tcache); + if (unlikely(tcache == NULL) && tsd_nominal(tsd)) { + tcache = tcache_get_hard(tsd); + tsd_tcache_set(tsd, tcache); } return (tcache); } JEMALLOC_ALWAYS_INLINE void -tcache_event(tcache_t *tcache) +tcache_event(tsd_t *tsd, tcache_t *tcache) { if (TCACHE_GC_INCR == 0) @@ -267,8 +242,8 @@ tcache_event(tcache_t *tcache) tcache->ev_cnt++; assert(tcache->ev_cnt <= TCACHE_GC_INCR); - if (tcache->ev_cnt == TCACHE_GC_INCR) - tcache_event_hard(tcache); + if (unlikely(tcache->ev_cnt == TCACHE_GC_INCR)) + tcache_event_hard(tsd, tcache); } JEMALLOC_ALWAYS_INLINE void * @@ -276,85 +251,87 @@ tcache_alloc_easy(tcache_bin_t *tbin) { void *ret; - if (tbin->ncached == 0) { + if (unlikely(tbin->ncached == 0)) { tbin->low_water = -1; return (NULL); } tbin->ncached--; - if ((int)tbin->ncached < tbin->low_water) + if (unlikely((int)tbin->ncached < tbin->low_water)) tbin->low_water = tbin->ncached; ret = tbin->avail[tbin->ncached]; return (ret); } JEMALLOC_ALWAYS_INLINE void * -tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) +tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, + bool zero) { void *ret; - size_t binind; + szind_t binind; + size_t usize; tcache_bin_t *tbin; - binind = SMALL_SIZE2BIN(size); + binind = size2index(size); assert(binind < NBINS); tbin = &tcache->tbins[binind]; - size = arena_bin_info[binind].reg_size; + usize = index2size(binind); ret = tcache_alloc_easy(tbin); - if (ret == NULL) { - ret = tcache_alloc_small_hard(tcache, tbin, binind); + if (unlikely(ret == NULL)) { + ret = tcache_alloc_small_hard(tsd, arena, tcache, tbin, binind); if (ret == NULL) return (NULL); } - assert(tcache_salloc(ret) == arena_bin_info[binind].reg_size); + assert(tcache_salloc(ret) == usize); - if (zero == false) { + if (likely(!zero)) { if (config_fill) { - if (opt_junk) { + if (unlikely(opt_junk_alloc)) { arena_alloc_junk_small(ret, &arena_bin_info[binind], false); - } else if (opt_zero) - memset(ret, 0, size); + } else if (unlikely(opt_zero)) + memset(ret, 0, usize); } - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } else { - if (config_fill && opt_junk) { + if (config_fill && unlikely(opt_junk_alloc)) { arena_alloc_junk_small(ret, &arena_bin_info[binind], true); } - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); - memset(ret, 0, size); + memset(ret, 0, usize); } if (config_stats) tbin->tstats.nrequests++; if (config_prof) - tcache->prof_accumbytes += arena_bin_info[binind].reg_size; - tcache_event(tcache); + tcache->prof_accumbytes += usize; + tcache_event(tsd, tcache); return (ret); } JEMALLOC_ALWAYS_INLINE void * -tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) +tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, + bool zero) { void *ret; - size_t binind; + szind_t binind; + size_t usize; tcache_bin_t *tbin; - size = PAGE_CEILING(size); - assert(size <= tcache_maxclass); - binind = NBINS + (size >> LG_PAGE) - 1; + binind = size2index(size); + usize = index2size(binind); + assert(usize <= tcache_maxclass); assert(binind < nhbins); tbin = &tcache->tbins[binind]; ret = tcache_alloc_easy(tbin); - if (ret == NULL) { + if (unlikely(ret == NULL)) { /* * Only allocate one large object at a time, because it's quite * expensive to create one and not use it. */ - ret = arena_malloc_large(tcache->arena, size, zero); + ret = arena_malloc_large(arena, usize, zero); if (ret == NULL) return (NULL); } else { - if (config_prof && prof_promote && size == PAGE) { + if (config_prof && usize == LARGE_MINCLASS) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret); size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >> @@ -362,57 +339,54 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) arena_mapbits_large_binind_set(chunk, pageind, BININD_INVALID); } - if (zero == false) { + if (likely(!zero)) { if (config_fill) { - if (opt_junk) - memset(ret, 0xa5, size); - else if (opt_zero) - memset(ret, 0, size); + if (unlikely(opt_junk_alloc)) + memset(ret, 0xa5, usize); + else if (unlikely(opt_zero)) + memset(ret, 0, usize); } - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); - } else { - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); - memset(ret, 0, size); - } + } else + memset(ret, 0, usize); if (config_stats) tbin->tstats.nrequests++; if (config_prof) - tcache->prof_accumbytes += size; + tcache->prof_accumbytes += usize; } - tcache_event(tcache); + tcache_event(tsd, tcache); return (ret); } JEMALLOC_ALWAYS_INLINE void -tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind) +tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind) { tcache_bin_t *tbin; tcache_bin_info_t *tbin_info; assert(tcache_salloc(ptr) <= SMALL_MAXCLASS); - if (config_fill && opt_junk) + if (config_fill && unlikely(opt_junk_free)) arena_dalloc_junk_small(ptr, &arena_bin_info[binind]); tbin = &tcache->tbins[binind]; tbin_info = &tcache_bin_info[binind]; - if (tbin->ncached == tbin_info->ncached_max) { - tcache_bin_flush_small(tbin, binind, (tbin_info->ncached_max >> - 1), tcache); + if (unlikely(tbin->ncached == tbin_info->ncached_max)) { + tcache_bin_flush_small(tsd, tcache, tbin, binind, + (tbin_info->ncached_max >> 1)); } assert(tbin->ncached < tbin_info->ncached_max); tbin->avail[tbin->ncached] = ptr; tbin->ncached++; - tcache_event(tcache); + tcache_event(tsd, tcache); } JEMALLOC_ALWAYS_INLINE void -tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) +tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size) { - size_t binind; + szind_t binind; tcache_bin_t *tbin; tcache_bin_info_t *tbin_info; @@ -420,22 +394,31 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) assert(tcache_salloc(ptr) > SMALL_MAXCLASS); assert(tcache_salloc(ptr) <= tcache_maxclass); - binind = NBINS + (size >> LG_PAGE) - 1; + binind = size2index(size); - if (config_fill && opt_junk) - memset(ptr, 0x5a, size); + if (config_fill && unlikely(opt_junk_free)) + arena_dalloc_junk_large(ptr, size); tbin = &tcache->tbins[binind]; tbin_info = &tcache_bin_info[binind]; - if (tbin->ncached == tbin_info->ncached_max) { - tcache_bin_flush_large(tbin, binind, (tbin_info->ncached_max >> - 1), tcache); + if (unlikely(tbin->ncached == tbin_info->ncached_max)) { + tcache_bin_flush_large(tsd, tbin, binind, + (tbin_info->ncached_max >> 1), tcache); } assert(tbin->ncached < tbin_info->ncached_max); tbin->avail[tbin->ncached] = ptr; tbin->ncached++; - tcache_event(tcache); + tcache_event(tsd, tcache); +} + +JEMALLOC_ALWAYS_INLINE tcache_t * +tcaches_get(tsd_t *tsd, unsigned ind) +{ + tcaches_t *elm = &tcaches[ind]; + if (unlikely(elm->tcache == NULL)) + elm->tcache = tcache_create(tsd, arena_choose(tsd, NULL)); + return (elm->tcache); } #endif diff --git a/dep/jemalloc/include/jemalloc/internal/tsd.h b/dep/jemalloc/include/jemalloc/internal/tsd.h index 9fb4a23ec6b..eed7aa01347 100644 --- a/dep/jemalloc/include/jemalloc/internal/tsd.h +++ b/dep/jemalloc/include/jemalloc/internal/tsd.h @@ -2,7 +2,7 @@ #ifdef JEMALLOC_H_TYPES /* Maximum number of malloc_tsd users with cleanup functions. */ -#define MALLOC_TSD_CLEANUPS_MAX 8 +#define MALLOC_TSD_CLEANUPS_MAX 2 typedef bool (*malloc_tsd_cleanup_t)(void); @@ -12,9 +12,18 @@ typedef struct tsd_init_block_s tsd_init_block_t; typedef struct tsd_init_head_s tsd_init_head_t; #endif +typedef struct tsd_s tsd_t; + +typedef enum { + tsd_state_uninitialized, + tsd_state_nominal, + tsd_state_purgatory, + tsd_state_reincarnated +} tsd_state_t; + /* * TLS/TSD-agnostic macro-based implementation of thread-specific data. There - * are four macros that support (at least) three use cases: file-private, + * are five macros that support (at least) three use cases: file-private, * library-private, and library-private inlined. Following is an example * library-private tsd variable: * @@ -24,34 +33,36 @@ typedef struct tsd_init_head_s tsd_init_head_t; * int y; * } example_t; * #define EX_INITIALIZER JEMALLOC_CONCAT({0, 0}) - * malloc_tsd_protos(, example, example_t *) - * malloc_tsd_externs(example, example_t *) + * malloc_tsd_types(example_, example_t) + * malloc_tsd_protos(, example_, example_t) + * malloc_tsd_externs(example_, example_t) * In example.c: - * malloc_tsd_data(, example, example_t *, EX_INITIALIZER) - * malloc_tsd_funcs(, example, example_t *, EX_INITIALIZER, + * malloc_tsd_data(, example_, example_t, EX_INITIALIZER) + * malloc_tsd_funcs(, example_, example_t, EX_INITIALIZER, * example_tsd_cleanup) * * The result is a set of generated functions, e.g.: * * bool example_tsd_boot(void) {...} - * example_t **example_tsd_get() {...} - * void example_tsd_set(example_t **val) {...} + * example_t *example_tsd_get() {...} + * void example_tsd_set(example_t *val) {...} * * Note that all of the functions deal in terms of (a_type *) rather than - * (a_type) so that it is possible to support non-pointer types (unlike + * (a_type) so that it is possible to support non-pointer types (unlike * pthreads TSD). example_tsd_cleanup() is passed an (a_type *) pointer that is - * cast to (void *). This means that the cleanup function needs to cast *and* - * dereference the function argument, e.g.: + * cast to (void *). This means that the cleanup function needs to cast the + * function argument to (a_type *), then dereference the resulting pointer to + * access fields, e.g. * * void * example_tsd_cleanup(void *arg) * { - * example_t *example = *(example_t **)arg; + * example_t *example = (example_t *)arg; * + * example->x = 42; * [...] - * if ([want the cleanup function to be called again]) { - * example_tsd_set(&example); - * } + * if ([want the cleanup function to be called again]) + * example_tsd_set(example); * } * * If example_tsd_set() is called within example_tsd_cleanup(), it will be @@ -60,63 +71,96 @@ typedef struct tsd_init_head_s tsd_init_head_t; * non-NULL. */ +/* malloc_tsd_types(). */ +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#define malloc_tsd_types(a_name, a_type) +#elif (defined(JEMALLOC_TLS)) +#define malloc_tsd_types(a_name, a_type) +#elif (defined(_WIN32)) +#define malloc_tsd_types(a_name, a_type) \ +typedef struct { \ + bool initialized; \ + a_type val; \ +} a_name##tsd_wrapper_t; +#else +#define malloc_tsd_types(a_name, a_type) \ +typedef struct { \ + bool initialized; \ + a_type val; \ +} a_name##tsd_wrapper_t; +#endif + /* malloc_tsd_protos(). */ #define malloc_tsd_protos(a_attr, a_name, a_type) \ a_attr bool \ -a_name##_tsd_boot(void); \ +a_name##tsd_boot0(void); \ +a_attr void \ +a_name##tsd_boot1(void); \ +a_attr bool \ +a_name##tsd_boot(void); \ a_attr a_type * \ -a_name##_tsd_get(void); \ +a_name##tsd_get(void); \ a_attr void \ -a_name##_tsd_set(a_type *val); +a_name##tsd_set(a_type *val); /* malloc_tsd_externs(). */ #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP #define malloc_tsd_externs(a_name, a_type) \ -extern __thread a_type a_name##_tls; \ -extern __thread bool a_name##_initialized; \ -extern bool a_name##_booted; +extern __thread a_type a_name##tsd_tls; \ +extern __thread bool a_name##tsd_initialized; \ +extern bool a_name##tsd_booted; #elif (defined(JEMALLOC_TLS)) #define malloc_tsd_externs(a_name, a_type) \ -extern __thread a_type a_name##_tls; \ -extern pthread_key_t a_name##_tsd; \ -extern bool a_name##_booted; +extern __thread a_type a_name##tsd_tls; \ +extern pthread_key_t a_name##tsd_tsd; \ +extern bool a_name##tsd_booted; #elif (defined(_WIN32)) #define malloc_tsd_externs(a_name, a_type) \ -extern DWORD a_name##_tsd; \ -extern bool a_name##_booted; +extern DWORD a_name##tsd_tsd; \ +extern a_name##tsd_wrapper_t a_name##tsd_boot_wrapper; \ +extern bool a_name##tsd_booted; #else #define malloc_tsd_externs(a_name, a_type) \ -extern pthread_key_t a_name##_tsd; \ -extern tsd_init_head_t a_name##_tsd_init_head; \ -extern bool a_name##_booted; +extern pthread_key_t a_name##tsd_tsd; \ +extern tsd_init_head_t a_name##tsd_init_head; \ +extern a_name##tsd_wrapper_t a_name##tsd_boot_wrapper; \ +extern bool a_name##tsd_booted; #endif /* malloc_tsd_data(). */ #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP #define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ a_attr __thread a_type JEMALLOC_TLS_MODEL \ - a_name##_tls = a_initializer; \ + a_name##tsd_tls = a_initializer; \ a_attr __thread bool JEMALLOC_TLS_MODEL \ - a_name##_initialized = false; \ -a_attr bool a_name##_booted = false; + a_name##tsd_initialized = false; \ +a_attr bool a_name##tsd_booted = false; #elif (defined(JEMALLOC_TLS)) #define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ a_attr __thread a_type JEMALLOC_TLS_MODEL \ - a_name##_tls = a_initializer; \ -a_attr pthread_key_t a_name##_tsd; \ -a_attr bool a_name##_booted = false; + a_name##tsd_tls = a_initializer; \ +a_attr pthread_key_t a_name##tsd_tsd; \ +a_attr bool a_name##tsd_booted = false; #elif (defined(_WIN32)) #define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ -a_attr DWORD a_name##_tsd; \ -a_attr bool a_name##_booted = false; +a_attr DWORD a_name##tsd_tsd; \ +a_attr a_name##tsd_wrapper_t a_name##tsd_boot_wrapper = { \ + false, \ + a_initializer \ +}; \ +a_attr bool a_name##tsd_booted = false; #else #define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ -a_attr pthread_key_t a_name##_tsd; \ -a_attr tsd_init_head_t a_name##_tsd_init_head = { \ +a_attr pthread_key_t a_name##tsd_tsd; \ +a_attr tsd_init_head_t a_name##tsd_init_head = { \ ql_head_initializer(blocks), \ MALLOC_MUTEX_INITIALIZER \ }; \ -a_attr bool a_name##_booted = false; +a_attr a_name##tsd_wrapper_t a_name##tsd_boot_wrapper = { \ + false, \ + a_initializer \ +}; \ +a_attr bool a_name##tsd_booted = false; #endif /* malloc_tsd_funcs(). */ @@ -125,75 +169,100 @@ a_attr bool a_name##_booted = false; a_cleanup) \ /* Initialization/cleanup. */ \ a_attr bool \ -a_name##_tsd_cleanup_wrapper(void) \ +a_name##tsd_cleanup_wrapper(void) \ { \ \ - if (a_name##_initialized) { \ - a_name##_initialized = false; \ - a_cleanup(&a_name##_tls); \ + if (a_name##tsd_initialized) { \ + a_name##tsd_initialized = false; \ + a_cleanup(&a_name##tsd_tls); \ } \ - return (a_name##_initialized); \ + return (a_name##tsd_initialized); \ } \ a_attr bool \ -a_name##_tsd_boot(void) \ +a_name##tsd_boot0(void) \ { \ \ if (a_cleanup != malloc_tsd_no_cleanup) { \ malloc_tsd_cleanup_register( \ - &a_name##_tsd_cleanup_wrapper); \ + &a_name##tsd_cleanup_wrapper); \ } \ - a_name##_booted = true; \ + a_name##tsd_booted = true; \ return (false); \ } \ +a_attr void \ +a_name##tsd_boot1(void) \ +{ \ + \ + /* Do nothing. */ \ +} \ +a_attr bool \ +a_name##tsd_boot(void) \ +{ \ + \ + return (a_name##tsd_boot0()); \ +} \ /* Get/set. */ \ a_attr a_type * \ -a_name##_tsd_get(void) \ +a_name##tsd_get(void) \ { \ \ - assert(a_name##_booted); \ - return (&a_name##_tls); \ + assert(a_name##tsd_booted); \ + return (&a_name##tsd_tls); \ } \ a_attr void \ -a_name##_tsd_set(a_type *val) \ +a_name##tsd_set(a_type *val) \ { \ \ - assert(a_name##_booted); \ - a_name##_tls = (*val); \ + assert(a_name##tsd_booted); \ + a_name##tsd_tls = (*val); \ if (a_cleanup != malloc_tsd_no_cleanup) \ - a_name##_initialized = true; \ + a_name##tsd_initialized = true; \ } #elif (defined(JEMALLOC_TLS)) #define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ a_cleanup) \ /* Initialization/cleanup. */ \ a_attr bool \ -a_name##_tsd_boot(void) \ +a_name##tsd_boot0(void) \ { \ \ if (a_cleanup != malloc_tsd_no_cleanup) { \ - if (pthread_key_create(&a_name##_tsd, a_cleanup) != 0) \ + if (pthread_key_create(&a_name##tsd_tsd, a_cleanup) != \ + 0) \ return (true); \ } \ - a_name##_booted = true; \ + a_name##tsd_booted = true; \ return (false); \ } \ +a_attr void \ +a_name##tsd_boot1(void) \ +{ \ + \ + /* Do nothing. */ \ +} \ +a_attr bool \ +a_name##tsd_boot(void) \ +{ \ + \ + return (a_name##tsd_boot0()); \ +} \ /* Get/set. */ \ a_attr a_type * \ -a_name##_tsd_get(void) \ +a_name##tsd_get(void) \ { \ \ - assert(a_name##_booted); \ - return (&a_name##_tls); \ + assert(a_name##tsd_booted); \ + return (&a_name##tsd_tls); \ } \ a_attr void \ -a_name##_tsd_set(a_type *val) \ +a_name##tsd_set(a_type *val) \ { \ \ - assert(a_name##_booted); \ - a_name##_tls = (*val); \ + assert(a_name##tsd_booted); \ + a_name##tsd_tls = (*val); \ if (a_cleanup != malloc_tsd_no_cleanup) { \ - if (pthread_setspecific(a_name##_tsd, \ - (void *)(&a_name##_tls))) { \ + if (pthread_setspecific(a_name##tsd_tsd, \ + (void *)(&a_name##tsd_tls))) { \ malloc_write("<jemalloc>: Error" \ " setting TSD for "#a_name"\n"); \ if (opt_abort) \ @@ -204,27 +273,21 @@ a_name##_tsd_set(a_type *val) \ #elif (defined(_WIN32)) #define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ a_cleanup) \ -/* Data structure. */ \ -typedef struct { \ - bool initialized; \ - a_type val; \ -} a_name##_tsd_wrapper_t; \ /* Initialization/cleanup. */ \ a_attr bool \ -a_name##_tsd_cleanup_wrapper(void) \ +a_name##tsd_cleanup_wrapper(void) \ { \ - a_name##_tsd_wrapper_t *wrapper; \ + DWORD error = GetLastError(); \ + a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ + TlsGetValue(a_name##tsd_tsd); \ + SetLastError(error); \ \ - wrapper = (a_name##_tsd_wrapper_t *) TlsGetValue(a_name##_tsd); \ if (wrapper == NULL) \ return (false); \ if (a_cleanup != malloc_tsd_no_cleanup && \ wrapper->initialized) { \ - a_type val = wrapper->val; \ - a_type tsd_static_data = a_initializer; \ wrapper->initialized = false; \ - wrapper->val = tsd_static_data; \ - a_cleanup(&val); \ + a_cleanup(&wrapper->val); \ if (wrapper->initialized) { \ /* Trigger another cleanup round. */ \ return (true); \ @@ -233,63 +296,95 @@ a_name##_tsd_cleanup_wrapper(void) \ malloc_tsd_dalloc(wrapper); \ return (false); \ } \ -a_attr bool \ -a_name##_tsd_boot(void) \ +a_attr void \ +a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \ { \ \ - a_name##_tsd = TlsAlloc(); \ - if (a_name##_tsd == TLS_OUT_OF_INDEXES) \ - return (true); \ - if (a_cleanup != malloc_tsd_no_cleanup) { \ - malloc_tsd_cleanup_register( \ - &a_name##_tsd_cleanup_wrapper); \ + if (!TlsSetValue(a_name##tsd_tsd, (void *)wrapper)) { \ + malloc_write("<jemalloc>: Error setting" \ + " TSD for "#a_name"\n"); \ + abort(); \ } \ - a_name##_booted = true; \ - return (false); \ } \ -/* Get/set. */ \ -a_attr a_name##_tsd_wrapper_t * \ -a_name##_tsd_get_wrapper(void) \ +a_attr a_name##tsd_wrapper_t * \ +a_name##tsd_wrapper_get(void) \ { \ - a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *) \ - TlsGetValue(a_name##_tsd); \ + DWORD error = GetLastError(); \ + a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ + TlsGetValue(a_name##tsd_tsd); \ + SetLastError(error); \ \ - if (wrapper == NULL) { \ - wrapper = (a_name##_tsd_wrapper_t *) \ - malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t)); \ + if (unlikely(wrapper == NULL)) { \ + wrapper = (a_name##tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ if (wrapper == NULL) { \ malloc_write("<jemalloc>: Error allocating" \ " TSD for "#a_name"\n"); \ abort(); \ } else { \ - static a_type tsd_static_data = a_initializer; \ wrapper->initialized = false; \ - wrapper->val = tsd_static_data; \ - } \ - if (!TlsSetValue(a_name##_tsd, (void *)wrapper)) { \ - malloc_write("<jemalloc>: Error setting" \ - " TSD for "#a_name"\n"); \ - abort(); \ + wrapper->val = a_initializer; \ } \ + a_name##tsd_wrapper_set(wrapper); \ } \ return (wrapper); \ } \ +a_attr bool \ +a_name##tsd_boot0(void) \ +{ \ + \ + a_name##tsd_tsd = TlsAlloc(); \ + if (a_name##tsd_tsd == TLS_OUT_OF_INDEXES) \ + return (true); \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + malloc_tsd_cleanup_register( \ + &a_name##tsd_cleanup_wrapper); \ + } \ + a_name##tsd_wrapper_set(&a_name##tsd_boot_wrapper); \ + a_name##tsd_booted = true; \ + return (false); \ +} \ +a_attr void \ +a_name##tsd_boot1(void) \ +{ \ + a_name##tsd_wrapper_t *wrapper; \ + wrapper = (a_name##tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ + if (wrapper == NULL) { \ + malloc_write("<jemalloc>: Error allocating" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } \ + memcpy(wrapper, &a_name##tsd_boot_wrapper, \ + sizeof(a_name##tsd_wrapper_t)); \ + a_name##tsd_wrapper_set(wrapper); \ +} \ +a_attr bool \ +a_name##tsd_boot(void) \ +{ \ + \ + if (a_name##tsd_boot0()) \ + return (true); \ + a_name##tsd_boot1(); \ + return (false); \ +} \ +/* Get/set. */ \ a_attr a_type * \ -a_name##_tsd_get(void) \ +a_name##tsd_get(void) \ { \ - a_name##_tsd_wrapper_t *wrapper; \ + a_name##tsd_wrapper_t *wrapper; \ \ - assert(a_name##_booted); \ - wrapper = a_name##_tsd_get_wrapper(); \ + assert(a_name##tsd_booted); \ + wrapper = a_name##tsd_wrapper_get(); \ return (&wrapper->val); \ } \ a_attr void \ -a_name##_tsd_set(a_type *val) \ +a_name##tsd_set(a_type *val) \ { \ - a_name##_tsd_wrapper_t *wrapper; \ + a_name##tsd_wrapper_t *wrapper; \ \ - assert(a_name##_booted); \ - wrapper = a_name##_tsd_get_wrapper(); \ + assert(a_name##tsd_booted); \ + wrapper = a_name##tsd_wrapper_get(); \ wrapper->val = *(val); \ if (a_cleanup != malloc_tsd_no_cleanup) \ wrapper->initialized = true; \ @@ -297,16 +392,11 @@ a_name##_tsd_set(a_type *val) \ #else #define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ a_cleanup) \ -/* Data structure. */ \ -typedef struct { \ - bool initialized; \ - a_type val; \ -} a_name##_tsd_wrapper_t; \ /* Initialization/cleanup. */ \ a_attr void \ -a_name##_tsd_cleanup_wrapper(void *arg) \ +a_name##tsd_cleanup_wrapper(void *arg) \ { \ - a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *)arg;\ + a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *)arg; \ \ if (a_cleanup != malloc_tsd_no_cleanup && \ wrapper->initialized) { \ @@ -314,7 +404,7 @@ a_name##_tsd_cleanup_wrapper(void *arg) \ a_cleanup(&wrapper->val); \ if (wrapper->initialized) { \ /* Trigger another cleanup round. */ \ - if (pthread_setspecific(a_name##_tsd, \ + if (pthread_setspecific(a_name##tsd_tsd, \ (void *)wrapper)) { \ malloc_write("<jemalloc>: Error" \ " setting TSD for "#a_name"\n"); \ @@ -326,67 +416,97 @@ a_name##_tsd_cleanup_wrapper(void *arg) \ } \ malloc_tsd_dalloc(wrapper); \ } \ -a_attr bool \ -a_name##_tsd_boot(void) \ +a_attr void \ +a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \ { \ \ - if (pthread_key_create(&a_name##_tsd, \ - a_name##_tsd_cleanup_wrapper) != 0) \ - return (true); \ - a_name##_booted = true; \ - return (false); \ + if (pthread_setspecific(a_name##tsd_tsd, \ + (void *)wrapper)) { \ + malloc_write("<jemalloc>: Error setting" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } \ } \ -/* Get/set. */ \ -a_attr a_name##_tsd_wrapper_t * \ -a_name##_tsd_get_wrapper(void) \ +a_attr a_name##tsd_wrapper_t * \ +a_name##tsd_wrapper_get(void) \ { \ - a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *) \ - pthread_getspecific(a_name##_tsd); \ + a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ + pthread_getspecific(a_name##tsd_tsd); \ \ - if (wrapper == NULL) { \ + if (unlikely(wrapper == NULL)) { \ tsd_init_block_t block; \ wrapper = tsd_init_check_recursion( \ - &a_name##_tsd_init_head, &block); \ + &a_name##tsd_init_head, &block); \ if (wrapper) \ return (wrapper); \ - wrapper = (a_name##_tsd_wrapper_t *) \ - malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t)); \ + wrapper = (a_name##tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ block.data = wrapper; \ if (wrapper == NULL) { \ malloc_write("<jemalloc>: Error allocating" \ " TSD for "#a_name"\n"); \ abort(); \ } else { \ - static a_type tsd_static_data = a_initializer; \ wrapper->initialized = false; \ - wrapper->val = tsd_static_data; \ - } \ - if (pthread_setspecific(a_name##_tsd, \ - (void *)wrapper)) { \ - malloc_write("<jemalloc>: Error setting" \ - " TSD for "#a_name"\n"); \ - abort(); \ + wrapper->val = a_initializer; \ } \ - tsd_init_finish(&a_name##_tsd_init_head, &block); \ + a_name##tsd_wrapper_set(wrapper); \ + tsd_init_finish(&a_name##tsd_init_head, &block); \ } \ return (wrapper); \ } \ +a_attr bool \ +a_name##tsd_boot0(void) \ +{ \ + \ + if (pthread_key_create(&a_name##tsd_tsd, \ + a_name##tsd_cleanup_wrapper) != 0) \ + return (true); \ + a_name##tsd_wrapper_set(&a_name##tsd_boot_wrapper); \ + a_name##tsd_booted = true; \ + return (false); \ +} \ +a_attr void \ +a_name##tsd_boot1(void) \ +{ \ + a_name##tsd_wrapper_t *wrapper; \ + wrapper = (a_name##tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ + if (wrapper == NULL) { \ + malloc_write("<jemalloc>: Error allocating" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } \ + memcpy(wrapper, &a_name##tsd_boot_wrapper, \ + sizeof(a_name##tsd_wrapper_t)); \ + a_name##tsd_wrapper_set(wrapper); \ +} \ +a_attr bool \ +a_name##tsd_boot(void) \ +{ \ + \ + if (a_name##tsd_boot0()) \ + return (true); \ + a_name##tsd_boot1(); \ + return (false); \ +} \ +/* Get/set. */ \ a_attr a_type * \ -a_name##_tsd_get(void) \ +a_name##tsd_get(void) \ { \ - a_name##_tsd_wrapper_t *wrapper; \ + a_name##tsd_wrapper_t *wrapper; \ \ - assert(a_name##_booted); \ - wrapper = a_name##_tsd_get_wrapper(); \ + assert(a_name##tsd_booted); \ + wrapper = a_name##tsd_wrapper_get(); \ return (&wrapper->val); \ } \ a_attr void \ -a_name##_tsd_set(a_type *val) \ +a_name##tsd_set(a_type *val) \ { \ - a_name##_tsd_wrapper_t *wrapper; \ + a_name##tsd_wrapper_t *wrapper; \ \ - assert(a_name##_booted); \ - wrapper = a_name##_tsd_get_wrapper(); \ + assert(a_name##tsd_booted); \ + wrapper = a_name##tsd_wrapper_get(); \ wrapper->val = *(val); \ if (a_cleanup != malloc_tsd_no_cleanup) \ wrapper->initialized = true; \ @@ -410,25 +530,136 @@ struct tsd_init_head_s { }; #endif +#define MALLOC_TSD \ +/* O(name, type) */ \ + O(tcache, tcache_t *) \ + O(thread_allocated, uint64_t) \ + O(thread_deallocated, uint64_t) \ + O(prof_tdata, prof_tdata_t *) \ + O(arena, arena_t *) \ + O(arenas_cache, arena_t **) \ + O(narenas_cache, unsigned) \ + O(arenas_cache_bypass, bool) \ + O(tcache_enabled, tcache_enabled_t) \ + O(quarantine, quarantine_t *) \ + +#define TSD_INITIALIZER { \ + tsd_state_uninitialized, \ + NULL, \ + 0, \ + 0, \ + NULL, \ + NULL, \ + NULL, \ + 0, \ + false, \ + tcache_enabled_default, \ + NULL \ +} + +struct tsd_s { + tsd_state_t state; +#define O(n, t) \ + t n; +MALLOC_TSD +#undef O +}; + +static const tsd_t tsd_initializer = TSD_INITIALIZER; + +malloc_tsd_types(, tsd_t) + #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS void *malloc_tsd_malloc(size_t size); void malloc_tsd_dalloc(void *wrapper); -void malloc_tsd_no_cleanup(void *); +void malloc_tsd_no_cleanup(void *arg); void malloc_tsd_cleanup_register(bool (*f)(void)); -void malloc_tsd_boot(void); +bool malloc_tsd_boot0(void); +void malloc_tsd_boot1(void); #if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ !defined(_WIN32)) void *tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block); void tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block); #endif +void tsd_cleanup(void *arg); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES +#ifndef JEMALLOC_ENABLE_INLINE +malloc_tsd_protos(JEMALLOC_ATTR(unused), , tsd_t) + +tsd_t *tsd_fetch(void); +bool tsd_nominal(tsd_t *tsd); +#define O(n, t) \ +t *tsd_##n##p_get(tsd_t *tsd); \ +t tsd_##n##_get(tsd_t *tsd); \ +void tsd_##n##_set(tsd_t *tsd, t n); +MALLOC_TSD +#undef O +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TSD_C_)) +malloc_tsd_externs(, tsd_t) +malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, , tsd_t, tsd_initializer, tsd_cleanup) + +JEMALLOC_ALWAYS_INLINE tsd_t * +tsd_fetch(void) +{ + tsd_t *tsd = tsd_get(); + + if (unlikely(tsd->state != tsd_state_nominal)) { + if (tsd->state == tsd_state_uninitialized) { + tsd->state = tsd_state_nominal; + /* Trigger cleanup handler registration. */ + tsd_set(tsd); + } else if (tsd->state == tsd_state_purgatory) { + tsd->state = tsd_state_reincarnated; + tsd_set(tsd); + } else + assert(tsd->state == tsd_state_reincarnated); + } + + return (tsd); +} + +JEMALLOC_INLINE bool +tsd_nominal(tsd_t *tsd) +{ + + return (tsd->state == tsd_state_nominal); +} + +#define O(n, t) \ +JEMALLOC_ALWAYS_INLINE t * \ +tsd_##n##p_get(tsd_t *tsd) \ +{ \ + \ + return (&tsd->n); \ +} \ + \ +JEMALLOC_ALWAYS_INLINE t \ +tsd_##n##_get(tsd_t *tsd) \ +{ \ + \ + return (*tsd_##n##p_get(tsd)); \ +} \ + \ +JEMALLOC_ALWAYS_INLINE void \ +tsd_##n##_set(tsd_t *tsd, t n) \ +{ \ + \ + assert(tsd->state == tsd_state_nominal); \ + tsd->n = n; \ +} +MALLOC_TSD +#undef O +#endif + #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ diff --git a/dep/jemalloc/include/jemalloc/internal/util.h b/dep/jemalloc/include/jemalloc/internal/util.h index 6b938f74688..b2ea740fdc7 100644 --- a/dep/jemalloc/include/jemalloc/internal/util.h +++ b/dep/jemalloc/include/jemalloc/internal/util.h @@ -1,6 +1,36 @@ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES +#ifdef _WIN32 +# ifdef _WIN64 +# define FMT64_PREFIX "ll" +# define FMTPTR_PREFIX "ll" +# else +# define FMT64_PREFIX "ll" +# define FMTPTR_PREFIX "" +# endif +# define FMTd32 "d" +# define FMTu32 "u" +# define FMTx32 "x" +# define FMTd64 FMT64_PREFIX "d" +# define FMTu64 FMT64_PREFIX "u" +# define FMTx64 FMT64_PREFIX "x" +# define FMTdPTR FMTPTR_PREFIX "d" +# define FMTuPTR FMTPTR_PREFIX "u" +# define FMTxPTR FMTPTR_PREFIX "x" +#else +# include <inttypes.h> +# define FMTd32 PRId32 +# define FMTu32 PRIu32 +# define FMTx32 PRIx32 +# define FMTd64 PRId64 +# define FMTu64 PRIu64 +# define FMTx64 PRIx64 +# define FMTdPTR PRIdPTR +# define FMTuPTR PRIuPTR +# define FMTxPTR PRIxPTR +#endif + /* Size of stack-allocated buffer passed to buferror(). */ #define BUFERROR_BUF 64 @@ -22,9 +52,33 @@ * uninitialized. */ #ifdef JEMALLOC_CC_SILENCE -# define JEMALLOC_CC_SILENCE_INIT(v) = v +# define JEMALLOC_CC_SILENCE_INIT(v) = v #else -# define JEMALLOC_CC_SILENCE_INIT(v) +# define JEMALLOC_CC_SILENCE_INIT(v) +#endif + +#define JEMALLOC_GNUC_PREREQ(major, minor) \ + (!defined(__clang__) && \ + (__GNUC__ > (major) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))) +#ifndef __has_builtin +# define __has_builtin(builtin) (0) +#endif +#define JEMALLOC_CLANG_HAS_BUILTIN(builtin) \ + (defined(__clang__) && __has_builtin(builtin)) + +#ifdef __GNUC__ +# define likely(x) __builtin_expect(!!(x), 1) +# define unlikely(x) __builtin_expect(!!(x), 0) +# if JEMALLOC_GNUC_PREREQ(4, 6) || \ + JEMALLOC_CLANG_HAS_BUILTIN(__builtin_unreachable) +# define unreachable() __builtin_unreachable() +# else +# define unreachable() +# endif +#else +# define likely(x) !!(x) +# define unlikely(x) !!(x) +# define unreachable() #endif /* @@ -33,7 +87,7 @@ */ #ifndef assert #define assert(e) do { \ - if (config_debug && !(e)) { \ + if (unlikely(config_debug && !(e))) { \ malloc_printf( \ "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n", \ __FILE__, __LINE__, #e); \ @@ -50,6 +104,7 @@ __FILE__, __LINE__); \ abort(); \ } \ + unreachable(); \ } while (0) #endif @@ -65,14 +120,14 @@ #ifndef assert_not_implemented #define assert_not_implemented(e) do { \ - if (config_debug && !(e)) \ + if (unlikely(config_debug && !(e))) \ not_implemented(); \ } while (0) #endif /* Use to assert a particular configuration, e.g., cassert(config_debug). */ #define cassert(c) do { \ - if ((c) == false) \ + if (unlikely(!(c))) \ not_reached(); \ } while (0) @@ -96,25 +151,47 @@ void malloc_write(const char *s); int malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap); int malloc_snprintf(char *str, size_t size, const char *format, ...) - JEMALLOC_ATTR(format(printf, 3, 4)); + JEMALLOC_FORMAT_PRINTF(3, 4); void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, const char *format, va_list ap); void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque, - const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4)); -void malloc_printf(const char *format, ...) - JEMALLOC_ATTR(format(printf, 1, 2)); + const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4); +void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE +int jemalloc_ffsl(long bitmap); +int jemalloc_ffs(int bitmap); size_t pow2_ceil(size_t x); +size_t lg_floor(size_t x); void set_errno(int errnum); int get_errno(void); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_)) + +/* Sanity check. */ +#if !defined(JEMALLOC_INTERNAL_FFSL) || !defined(JEMALLOC_INTERNAL_FFS) +# error Both JEMALLOC_INTERNAL_FFSL && JEMALLOC_INTERNAL_FFS should have been defined by configure +#endif + +JEMALLOC_ALWAYS_INLINE int +jemalloc_ffsl(long bitmap) +{ + + return (JEMALLOC_INTERNAL_FFSL(bitmap)); +} + +JEMALLOC_ALWAYS_INLINE int +jemalloc_ffs(int bitmap) +{ + + return (JEMALLOC_INTERNAL_FFS(bitmap)); +} + /* Compute the smallest power of 2 that is >= x. */ JEMALLOC_INLINE size_t pow2_ceil(size_t x) @@ -133,7 +210,82 @@ pow2_ceil(size_t x) return (x); } -/* Sets error code */ +#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) +JEMALLOC_INLINE size_t +lg_floor(size_t x) +{ + size_t ret; + + assert(x != 0); + + asm ("bsr %1, %0" + : "=r"(ret) // Outputs. + : "r"(x) // Inputs. + ); + return (ret); +} +#elif (defined(_MSC_VER)) +JEMALLOC_INLINE size_t +lg_floor(size_t x) +{ + unsigned long ret; + + assert(x != 0); + +#if (LG_SIZEOF_PTR == 3) + _BitScanReverse64(&ret, x); +#elif (LG_SIZEOF_PTR == 2) + _BitScanReverse(&ret, x); +#else +# error "Unsupported type sizes for lg_floor()" +#endif + return (ret); +} +#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ)) +JEMALLOC_INLINE size_t +lg_floor(size_t x) +{ + + assert(x != 0); + +#if (LG_SIZEOF_PTR == LG_SIZEOF_INT) + return (((8 << LG_SIZEOF_PTR) - 1) - __builtin_clz(x)); +#elif (LG_SIZEOF_PTR == LG_SIZEOF_LONG) + return (((8 << LG_SIZEOF_PTR) - 1) - __builtin_clzl(x)); +#else +# error "Unsupported type sizes for lg_floor()" +#endif +} +#else +JEMALLOC_INLINE size_t +lg_floor(size_t x) +{ + + assert(x != 0); + + x |= (x >> 1); + x |= (x >> 2); + x |= (x >> 4); + x |= (x >> 8); + x |= (x >> 16); +#if (LG_SIZEOF_PTR == 3 && LG_SIZEOF_PTR == LG_SIZEOF_LONG) + x |= (x >> 32); + if (x == KZU(0xffffffffffffffff)) + return (63); + x++; + return (jemalloc_ffsl(x) - 2); +#elif (LG_SIZEOF_PTR == 2) + if (x == KZU(0xffffffff)) + return (31); + x++; + return (jemalloc_ffs(x) - 2); +#else +# error "Unsupported type sizes for lg_floor()" +#endif +} +#endif + +/* Set error code. */ JEMALLOC_INLINE void set_errno(int errnum) { @@ -145,7 +297,7 @@ set_errno(int errnum) #endif } -/* Get last error code */ +/* Get last error code. */ JEMALLOC_INLINE int get_errno(void) { diff --git a/dep/jemalloc/include/jemalloc/internal/valgrind.h b/dep/jemalloc/include/jemalloc/internal/valgrind.h new file mode 100644 index 00000000000..a3380df9265 --- /dev/null +++ b/dep/jemalloc/include/jemalloc/internal/valgrind.h @@ -0,0 +1,112 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#ifdef JEMALLOC_VALGRIND +#include <valgrind/valgrind.h> + +/* + * The size that is reported to Valgrind must be consistent through a chain of + * malloc..realloc..realloc calls. Request size isn't recorded anywhere in + * jemalloc, so it is critical that all callers of these macros provide usize + * rather than request size. As a result, buffer overflow detection is + * technically weakened for the standard API, though it is generally accepted + * practice to consider any extra bytes reported by malloc_usable_size() as + * usable space. + */ +#define JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(ptr, usize) do { \ + if (unlikely(in_valgrind)) \ + valgrind_make_mem_noaccess(ptr, usize); \ +} while (0) +#define JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize) do { \ + if (unlikely(in_valgrind)) \ + valgrind_make_mem_undefined(ptr, usize); \ +} while (0) +#define JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ptr, usize) do { \ + if (unlikely(in_valgrind)) \ + valgrind_make_mem_defined(ptr, usize); \ +} while (0) +/* + * The VALGRIND_MALLOCLIKE_BLOCK() and VALGRIND_RESIZEINPLACE_BLOCK() macro + * calls must be embedded in macros rather than in functions so that when + * Valgrind reports errors, there are no extra stack frames in the backtraces. + */ +#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do { \ + if (unlikely(in_valgrind && cond)) \ + VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(ptr), zero); \ +} while (0) +#define JEMALLOC_VALGRIND_REALLOC(maybe_moved, ptr, usize, \ + ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null, \ + zero) do { \ + if (unlikely(in_valgrind)) { \ + size_t rzsize = p2rz(ptr); \ + \ + if (!maybe_moved || ptr == old_ptr) { \ + VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize, \ + usize, rzsize); \ + if (zero && old_usize < usize) { \ + valgrind_make_mem_defined( \ + (void *)((uintptr_t)ptr + \ + old_usize), usize - old_usize); \ + } \ + } else { \ + if (!old_ptr_maybe_null || old_ptr != NULL) { \ + valgrind_freelike_block(old_ptr, \ + old_rzsize); \ + } \ + if (!ptr_maybe_null || ptr != NULL) { \ + size_t copy_size = (old_usize < usize) \ + ? old_usize : usize; \ + size_t tail_size = usize - copy_size; \ + VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, \ + rzsize, false); \ + if (copy_size > 0) { \ + valgrind_make_mem_defined(ptr, \ + copy_size); \ + } \ + if (zero && tail_size > 0) { \ + valgrind_make_mem_defined( \ + (void *)((uintptr_t)ptr + \ + copy_size), tail_size); \ + } \ + } \ + } \ + } \ +} while (0) +#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do { \ + if (unlikely(in_valgrind)) \ + valgrind_freelike_block(ptr, rzsize); \ +} while (0) +#else +#define RUNNING_ON_VALGRIND ((unsigned)0) +#define JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(ptr, usize) do {} while (0) +#define JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize) do {} while (0) +#define JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ptr, usize) do {} while (0) +#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do {} while (0) +#define JEMALLOC_VALGRIND_REALLOC(maybe_moved, ptr, usize, \ + ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null, \ + zero) do {} while (0) +#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {} while (0) +#endif + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#ifdef JEMALLOC_VALGRIND +void valgrind_make_mem_noaccess(void *ptr, size_t usize); +void valgrind_make_mem_undefined(void *ptr, size_t usize); +void valgrind_make_mem_defined(void *ptr, size_t usize); +void valgrind_freelike_block(void *ptr, size_t usize); +#endif + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ + diff --git a/dep/jemalloc/include/jemalloc/jemalloc.h b/dep/jemalloc/include/jemalloc/jemalloc.h index b8ea851e525..f56fba48748 100644 --- a/dep/jemalloc/include/jemalloc/jemalloc.h +++ b/dep/jemalloc/include/jemalloc/jemalloc.h @@ -4,15 +4,83 @@ extern "C" { #endif +/* Defined if __attribute__((...)) syntax is supported. */ +#define JEMALLOC_HAVE_ATTR + +/* Defined if alloc_size attribute is supported. */ +#define JEMALLOC_HAVE_ATTR_ALLOC_SIZE + +/* Defined if format(gnu_printf, ...) attribute is supported. */ +#define JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF + +/* Defined if format(printf, ...) attribute is supported. */ +#define JEMALLOC_HAVE_ATTR_FORMAT_PRINTF + +/* + * Define overrides for non-standard allocator-related functions if they are + * present on the system. + */ +#define JEMALLOC_OVERRIDE_MEMALIGN +#define JEMALLOC_OVERRIDE_VALLOC + +/* + * At least Linux omits the "const" in: + * + * size_t malloc_usable_size(const void *ptr); + * + * Match the operating system's prototype. + */ +#define JEMALLOC_USABLE_SIZE_CONST + +/* + * If defined, specify throw() for the public function prototypes when compiling + * with C++. The only justification for this is to match the prototypes that + * glibc defines. + */ +#define JEMALLOC_USE_CXX_THROW + +/* + * Name mangling for public symbols is controlled by --with-mangling and + * --with-jemalloc-prefix. With default settings the je_ prefix is stripped by + * these macro definitions. + */ +#ifndef JEMALLOC_NO_RENAME +# define je_malloc_conf malloc_conf +# define je_malloc_message malloc_message +# define je_malloc malloc +# define je_calloc calloc +# define je_posix_memalign posix_memalign +# define je_aligned_alloc aligned_alloc +# define je_realloc realloc +# define je_free free +# define je_mallocx mallocx +# define je_rallocx rallocx +# define je_xallocx xallocx +# define je_sallocx sallocx +# define je_dallocx dallocx +# define je_sdallocx sdallocx +# define je_nallocx nallocx +# define je_mallctl mallctl +# define je_mallctlnametomib mallctlnametomib +# define je_mallctlbymib mallctlbymib +# define je_malloc_stats_print malloc_stats_print +# define je_malloc_usable_size malloc_usable_size +# define je_memalign memalign +# define je_valloc valloc +#endif + +#include <stdlib.h> +#include <stdbool.h> +#include <stdint.h> #include <limits.h> #include <strings.h> -#define JEMALLOC_VERSION "3.6.0-0-g46c0af68bd248b04df75e4f92d5fb804c3d75340" -#define JEMALLOC_VERSION_MAJOR 3 -#define JEMALLOC_VERSION_MINOR 6 -#define JEMALLOC_VERSION_BUGFIX 0 +#define JEMALLOC_VERSION "4.0.4-0-g91010a9e2ebfc84b1ac1ed7fdde3bfed4f65f180" +#define JEMALLOC_VERSION_MAJOR 4 +#define JEMALLOC_VERSION_MINOR 0 +#define JEMALLOC_VERSION_BUGFIX 4 #define JEMALLOC_VERSION_NREV 0 -#define JEMALLOC_VERSION_GID "46c0af68bd248b04df75e4f92d5fb804c3d75340" +#define JEMALLOC_VERSION_GID "91010a9e2ebfc84b1ac1ed7fdde3bfed4f65f180" # define MALLOCX_LG_ALIGN(la) (la) # if LG_SIZEOF_PTR == 2 @@ -22,24 +90,86 @@ extern "C" { ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31) # endif # define MALLOCX_ZERO ((int)0x40) -/* Bias arena index bits so that 0 encodes "MALLOCX_ARENA() unspecified". */ -# define MALLOCX_ARENA(a) ((int)(((a)+1) << 8)) +/* + * Bias tcache index bits so that 0 encodes "automatic tcache management", and 1 + * encodes MALLOCX_TCACHE_NONE. + */ +# define MALLOCX_TCACHE(tc) ((int)(((tc)+2) << 8)) +# define MALLOCX_TCACHE_NONE MALLOCX_TCACHE(-1) +/* + * Bias arena index bits so that 0 encodes "use an automatically chosen arena". + */ +# define MALLOCX_ARENA(a) ((int)(((a)+1) << 20)) -#ifdef JEMALLOC_EXPERIMENTAL -# define ALLOCM_LG_ALIGN(la) (la) -# if LG_SIZEOF_PTR == 2 -# define ALLOCM_ALIGN(a) (ffs(a)-1) +#if defined(__cplusplus) && defined(JEMALLOC_USE_CXX_THROW) +# define JEMALLOC_CXX_THROW throw() +#else +# define JEMALLOC_CXX_THROW +#endif + +#ifdef JEMALLOC_HAVE_ATTR +# define JEMALLOC_ATTR(s) __attribute__((s)) +# define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) +# ifdef JEMALLOC_HAVE_ATTR_ALLOC_SIZE +# define JEMALLOC_ALLOC_SIZE(s) JEMALLOC_ATTR(alloc_size(s)) +# define JEMALLOC_ALLOC_SIZE2(s1, s2) JEMALLOC_ATTR(alloc_size(s1, s2)) # else -# define ALLOCM_ALIGN(a) \ - ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31) +# define JEMALLOC_ALLOC_SIZE(s) +# define JEMALLOC_ALLOC_SIZE2(s1, s2) +# endif +# ifndef JEMALLOC_EXPORT +# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) +# endif +# ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF +# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i)) +# elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF) +# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(printf, s, i)) +# else +# define JEMALLOC_FORMAT_PRINTF(s, i) +# endif +# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) +# define JEMALLOC_NOTHROW JEMALLOC_ATTR(nothrow) +# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) +# define JEMALLOC_RESTRICT_RETURN +# define JEMALLOC_ALLOCATOR +#elif _MSC_VER +# define JEMALLOC_ATTR(s) +# define JEMALLOC_ALIGNED(s) __declspec(align(s)) +# define JEMALLOC_ALLOC_SIZE(s) +# define JEMALLOC_ALLOC_SIZE2(s1, s2) +# ifndef JEMALLOC_EXPORT +# ifdef DLLEXPORT +# define JEMALLOC_EXPORT __declspec(dllexport) +# else +# define JEMALLOC_EXPORT __declspec(dllimport) +# endif +# endif +# define JEMALLOC_FORMAT_PRINTF(s, i) +# define JEMALLOC_NOINLINE __declspec(noinline) +# ifdef __cplusplus +# define JEMALLOC_NOTHROW __declspec(nothrow) +# else +# define JEMALLOC_NOTHROW +# endif +# define JEMALLOC_SECTION(s) __declspec(allocate(s)) +# define JEMALLOC_RESTRICT_RETURN __declspec(restrict) +# if _MSC_VER >= 1900 && !defined(__EDG__) +# define JEMALLOC_ALLOCATOR __declspec(allocator) +# else +# define JEMALLOC_ALLOCATOR # endif -# define ALLOCM_ZERO ((int)0x40) -# define ALLOCM_NO_MOVE ((int)0x80) -/* Bias arena index bits so that 0 encodes "ALLOCM_ARENA() unspecified". */ -# define ALLOCM_ARENA(a) ((int)(((a)+1) << 8)) -# define ALLOCM_SUCCESS 0 -# define ALLOCM_ERR_OOM 1 -# define ALLOCM_ERR_NOT_MOVED 2 +#else +# define JEMALLOC_ATTR(s) +# define JEMALLOC_ALIGNED(s) +# define JEMALLOC_ALLOC_SIZE(s) +# define JEMALLOC_ALLOC_SIZE2(s1, s2) +# define JEMALLOC_EXPORT +# define JEMALLOC_FORMAT_PRINTF(s, i) +# define JEMALLOC_NOINLINE +# define JEMALLOC_NOTHROW +# define JEMALLOC_SECTION(s) +# define JEMALLOC_RESTRICT_RETURN +# define JEMALLOC_ALLOCATOR #endif /* @@ -51,55 +181,121 @@ extern JEMALLOC_EXPORT const char *je_malloc_conf; extern JEMALLOC_EXPORT void (*je_malloc_message)(void *cbopaque, const char *s); -JEMALLOC_EXPORT void *je_malloc(size_t size) JEMALLOC_ATTR(malloc); -JEMALLOC_EXPORT void *je_calloc(size_t num, size_t size) - JEMALLOC_ATTR(malloc); -JEMALLOC_EXPORT int je_posix_memalign(void **memptr, size_t alignment, - size_t size) JEMALLOC_ATTR(nonnull(1)); -JEMALLOC_EXPORT void *je_aligned_alloc(size_t alignment, size_t size) - JEMALLOC_ATTR(malloc); -JEMALLOC_EXPORT void *je_realloc(void *ptr, size_t size); -JEMALLOC_EXPORT void je_free(void *ptr); +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN + void JEMALLOC_NOTHROW *je_malloc(size_t size) + JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1); +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN + void JEMALLOC_NOTHROW *je_calloc(size_t num, size_t size) + JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2); +JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_posix_memalign(void **memptr, + size_t alignment, size_t size) JEMALLOC_CXX_THROW JEMALLOC_ATTR(nonnull(1)); +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN + void JEMALLOC_NOTHROW *je_aligned_alloc(size_t alignment, + size_t size) JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) + JEMALLOC_ALLOC_SIZE(2); +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN + void JEMALLOC_NOTHROW *je_realloc(void *ptr, size_t size) + JEMALLOC_CXX_THROW JEMALLOC_ALLOC_SIZE(2); +JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_free(void *ptr) + JEMALLOC_CXX_THROW; -JEMALLOC_EXPORT void *je_mallocx(size_t size, int flags); -JEMALLOC_EXPORT void *je_rallocx(void *ptr, size_t size, int flags); -JEMALLOC_EXPORT size_t je_xallocx(void *ptr, size_t size, size_t extra, +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN + void JEMALLOC_NOTHROW *je_mallocx(size_t size, int flags) + JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1); +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN + void JEMALLOC_NOTHROW *je_rallocx(void *ptr, size_t size, + int flags) JEMALLOC_ALLOC_SIZE(2); +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_xallocx(void *ptr, size_t size, + size_t extra, int flags); +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_sallocx(const void *ptr, + int flags) JEMALLOC_ATTR(pure); +JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_dallocx(void *ptr, int flags); +JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_sdallocx(void *ptr, size_t size, int flags); -JEMALLOC_EXPORT size_t je_sallocx(const void *ptr, int flags); -JEMALLOC_EXPORT void je_dallocx(void *ptr, int flags); -JEMALLOC_EXPORT size_t je_nallocx(size_t size, int flags); - -JEMALLOC_EXPORT int je_mallctl(const char *name, void *oldp, - size_t *oldlenp, void *newp, size_t newlen); -JEMALLOC_EXPORT int je_mallctlnametomib(const char *name, size_t *mibp, - size_t *miblenp); -JEMALLOC_EXPORT int je_mallctlbymib(const size_t *mib, size_t miblen, +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_nallocx(size_t size, int flags) + JEMALLOC_ATTR(pure); + +JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen); -JEMALLOC_EXPORT void je_malloc_stats_print(void (*write_cb)(void *, - const char *), void *je_cbopaque, const char *opts); -JEMALLOC_EXPORT size_t je_malloc_usable_size( - JEMALLOC_USABLE_SIZE_CONST void *ptr); +JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctlnametomib(const char *name, + size_t *mibp, size_t *miblenp); +JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctlbymib(const size_t *mib, + size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen); +JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_malloc_stats_print( + void (*write_cb)(void *, const char *), void *je_cbopaque, + const char *opts); +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_malloc_usable_size( + JEMALLOC_USABLE_SIZE_CONST void *ptr) JEMALLOC_CXX_THROW; #ifdef JEMALLOC_OVERRIDE_MEMALIGN -JEMALLOC_EXPORT void * je_memalign(size_t alignment, size_t size) - JEMALLOC_ATTR(malloc); +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN + void JEMALLOC_NOTHROW *je_memalign(size_t alignment, size_t size) + JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc); #endif #ifdef JEMALLOC_OVERRIDE_VALLOC -JEMALLOC_EXPORT void * je_valloc(size_t size) JEMALLOC_ATTR(malloc); +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN + void JEMALLOC_NOTHROW *je_valloc(size_t size) JEMALLOC_CXX_THROW + JEMALLOC_ATTR(malloc); #endif -#ifdef JEMALLOC_EXPERIMENTAL -JEMALLOC_EXPORT int je_allocm(void **ptr, size_t *rsize, size_t size, - int flags) JEMALLOC_ATTR(nonnull(1)); -JEMALLOC_EXPORT int je_rallocm(void **ptr, size_t *rsize, size_t size, - size_t extra, int flags) JEMALLOC_ATTR(nonnull(1)); -JEMALLOC_EXPORT int je_sallocm(const void *ptr, size_t *rsize, int flags) - JEMALLOC_ATTR(nonnull(1)); -JEMALLOC_EXPORT int je_dallocm(void *ptr, int flags) - JEMALLOC_ATTR(nonnull(1)); -JEMALLOC_EXPORT int je_nallocm(size_t *rsize, size_t size, int flags); -#endif +/* + * void * + * chunk_alloc(void *new_addr, size_t size, size_t alignment, bool *zero, + * bool *commit, unsigned arena_ind); + */ +typedef void *(chunk_alloc_t)(void *, size_t, size_t, bool *, bool *, unsigned); + +/* + * bool + * chunk_dalloc(void *chunk, size_t size, bool committed, unsigned arena_ind); + */ +typedef bool (chunk_dalloc_t)(void *, size_t, bool, unsigned); + +/* + * bool + * chunk_commit(void *chunk, size_t size, size_t offset, size_t length, + * unsigned arena_ind); + */ +typedef bool (chunk_commit_t)(void *, size_t, size_t, size_t, unsigned); + +/* + * bool + * chunk_decommit(void *chunk, size_t size, size_t offset, size_t length, + * unsigned arena_ind); + */ +typedef bool (chunk_decommit_t)(void *, size_t, size_t, size_t, unsigned); + +/* + * bool + * chunk_purge(void *chunk, size_t size, size_t offset, size_t length, + * unsigned arena_ind); + */ +typedef bool (chunk_purge_t)(void *, size_t, size_t, size_t, unsigned); + +/* + * bool + * chunk_split(void *chunk, size_t size, size_t size_a, size_t size_b, + * bool committed, unsigned arena_ind); + */ +typedef bool (chunk_split_t)(void *, size_t, size_t, size_t, bool, unsigned); + +/* + * bool + * chunk_merge(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b, + * bool committed, unsigned arena_ind); + */ +typedef bool (chunk_merge_t)(void *, size_t, void *, size_t, bool, unsigned); + +typedef struct { + chunk_alloc_t *alloc; + chunk_dalloc_t *dalloc; + chunk_commit_t *commit; + chunk_decommit_t *decommit; + chunk_purge_t *purge; + chunk_split_t *split; + chunk_merge_t *merge; +} chunk_hooks_t; /* * By default application code must explicitly refer to mangled symbol names, @@ -125,6 +321,7 @@ JEMALLOC_EXPORT int je_nallocm(size_t *rsize, size_t size, int flags); # define xallocx je_xallocx # define sallocx je_sallocx # define dallocx je_dallocx +# define sdallocx je_sdallocx # define nallocx je_nallocx # define mallctl je_mallctl # define mallctlnametomib je_mallctlnametomib @@ -133,11 +330,6 @@ JEMALLOC_EXPORT int je_nallocm(size_t *rsize, size_t size, int flags); # define malloc_usable_size je_malloc_usable_size # define memalign je_memalign # define valloc je_valloc -# define allocm je_allocm -# define dallocm je_dallocm -# define nallocm je_nallocm -# define rallocm je_rallocm -# define sallocm je_sallocm #endif /* @@ -161,6 +353,7 @@ JEMALLOC_EXPORT int je_nallocm(size_t *rsize, size_t size, int flags); # undef je_xallocx # undef je_sallocx # undef je_dallocx +# undef je_sdallocx # undef je_nallocx # undef je_mallctl # undef je_mallctlnametomib @@ -169,14 +362,9 @@ JEMALLOC_EXPORT int je_nallocm(size_t *rsize, size_t size, int flags); # undef je_malloc_usable_size # undef je_memalign # undef je_valloc -# undef je_allocm -# undef je_dallocm -# undef je_nallocm -# undef je_rallocm -# undef je_sallocm #endif #ifdef __cplusplus -}; +} #endif #endif /* JEMALLOC_H_ */ diff --git a/dep/jemalloc/include/msvc_compat/C99/stdbool.h b/dep/jemalloc/include/msvc_compat/C99/stdbool.h new file mode 100644 index 00000000000..d92160ebc75 --- /dev/null +++ b/dep/jemalloc/include/msvc_compat/C99/stdbool.h @@ -0,0 +1,20 @@ +#ifndef stdbool_h +#define stdbool_h + +#include <wtypes.h> + +/* MSVC doesn't define _Bool or bool in C, but does have BOOL */ +/* Note this doesn't pass autoconf's test because (bool) 0.5 != true */ +/* Clang-cl uses MSVC headers, so needs msvc_compat, but has _Bool as + * a built-in type. */ +#ifndef __clang__ +typedef BOOL _Bool; +#endif + +#define bool _Bool +#define true 1 +#define false 0 + +#define __bool_true_false_are_defined 1 + +#endif /* stdbool_h */ diff --git a/dep/jemalloc/include/msvc_compat/C99/stdint.h b/dep/jemalloc/include/msvc_compat/C99/stdint.h new file mode 100644 index 00000000000..d02608a5972 --- /dev/null +++ b/dep/jemalloc/include/msvc_compat/C99/stdint.h @@ -0,0 +1,247 @@ +// ISO C9x compliant stdint.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006-2008 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. The name of the author may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_STDINT_H_ // [ +#define _MSC_STDINT_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include <limits.h> + +// For Visual Studio 6 in C++ mode and for many Visual Studio versions when +// compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}' +// or compiler give many errors like this: +// error C2733: second C linkage of overloaded function 'wmemchr' not allowed +#ifdef __cplusplus +extern "C" { +#endif +# include <wchar.h> +#ifdef __cplusplus +} +#endif + +// Define _W64 macros to mark types changing their size, like intptr_t. +#ifndef _W64 +# if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300 +# define _W64 __w64 +# else +# define _W64 +# endif +#endif + + +// 7.18.1 Integer types + +// 7.18.1.1 Exact-width integer types + +// Visual Studio 6 and Embedded Visual C++ 4 doesn't +// realize that, e.g. char has the same size as __int8 +// so we give up on __intX for them. +#if (_MSC_VER < 1300) + typedef signed char int8_t; + typedef signed short int16_t; + typedef signed int int32_t; + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; +#else + typedef signed __int8 int8_t; + typedef signed __int16 int16_t; + typedef signed __int32 int32_t; + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; +#endif +typedef signed __int64 int64_t; +typedef unsigned __int64 uint64_t; + + +// 7.18.1.2 Minimum-width integer types +typedef int8_t int_least8_t; +typedef int16_t int_least16_t; +typedef int32_t int_least32_t; +typedef int64_t int_least64_t; +typedef uint8_t uint_least8_t; +typedef uint16_t uint_least16_t; +typedef uint32_t uint_least32_t; +typedef uint64_t uint_least64_t; + +// 7.18.1.3 Fastest minimum-width integer types +typedef int8_t int_fast8_t; +typedef int16_t int_fast16_t; +typedef int32_t int_fast32_t; +typedef int64_t int_fast64_t; +typedef uint8_t uint_fast8_t; +typedef uint16_t uint_fast16_t; +typedef uint32_t uint_fast32_t; +typedef uint64_t uint_fast64_t; + +// 7.18.1.4 Integer types capable of holding object pointers +#ifdef _WIN64 // [ + typedef signed __int64 intptr_t; + typedef unsigned __int64 uintptr_t; +#else // _WIN64 ][ + typedef _W64 signed int intptr_t; + typedef _W64 unsigned int uintptr_t; +#endif // _WIN64 ] + +// 7.18.1.5 Greatest-width integer types +typedef int64_t intmax_t; +typedef uint64_t uintmax_t; + + +// 7.18.2 Limits of specified-width integer types + +#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259 + +// 7.18.2.1 Limits of exact-width integer types +#define INT8_MIN ((int8_t)_I8_MIN) +#define INT8_MAX _I8_MAX +#define INT16_MIN ((int16_t)_I16_MIN) +#define INT16_MAX _I16_MAX +#define INT32_MIN ((int32_t)_I32_MIN) +#define INT32_MAX _I32_MAX +#define INT64_MIN ((int64_t)_I64_MIN) +#define INT64_MAX _I64_MAX +#define UINT8_MAX _UI8_MAX +#define UINT16_MAX _UI16_MAX +#define UINT32_MAX _UI32_MAX +#define UINT64_MAX _UI64_MAX + +// 7.18.2.2 Limits of minimum-width integer types +#define INT_LEAST8_MIN INT8_MIN +#define INT_LEAST8_MAX INT8_MAX +#define INT_LEAST16_MIN INT16_MIN +#define INT_LEAST16_MAX INT16_MAX +#define INT_LEAST32_MIN INT32_MIN +#define INT_LEAST32_MAX INT32_MAX +#define INT_LEAST64_MIN INT64_MIN +#define INT_LEAST64_MAX INT64_MAX +#define UINT_LEAST8_MAX UINT8_MAX +#define UINT_LEAST16_MAX UINT16_MAX +#define UINT_LEAST32_MAX UINT32_MAX +#define UINT_LEAST64_MAX UINT64_MAX + +// 7.18.2.3 Limits of fastest minimum-width integer types +#define INT_FAST8_MIN INT8_MIN +#define INT_FAST8_MAX INT8_MAX +#define INT_FAST16_MIN INT16_MIN +#define INT_FAST16_MAX INT16_MAX +#define INT_FAST32_MIN INT32_MIN +#define INT_FAST32_MAX INT32_MAX +#define INT_FAST64_MIN INT64_MIN +#define INT_FAST64_MAX INT64_MAX +#define UINT_FAST8_MAX UINT8_MAX +#define UINT_FAST16_MAX UINT16_MAX +#define UINT_FAST32_MAX UINT32_MAX +#define UINT_FAST64_MAX UINT64_MAX + +// 7.18.2.4 Limits of integer types capable of holding object pointers +#ifdef _WIN64 // [ +# define INTPTR_MIN INT64_MIN +# define INTPTR_MAX INT64_MAX +# define UINTPTR_MAX UINT64_MAX +#else // _WIN64 ][ +# define INTPTR_MIN INT32_MIN +# define INTPTR_MAX INT32_MAX +# define UINTPTR_MAX UINT32_MAX +#endif // _WIN64 ] + +// 7.18.2.5 Limits of greatest-width integer types +#define INTMAX_MIN INT64_MIN +#define INTMAX_MAX INT64_MAX +#define UINTMAX_MAX UINT64_MAX + +// 7.18.3 Limits of other integer types + +#ifdef _WIN64 // [ +# define PTRDIFF_MIN _I64_MIN +# define PTRDIFF_MAX _I64_MAX +#else // _WIN64 ][ +# define PTRDIFF_MIN _I32_MIN +# define PTRDIFF_MAX _I32_MAX +#endif // _WIN64 ] + +#define SIG_ATOMIC_MIN INT_MIN +#define SIG_ATOMIC_MAX INT_MAX + +#ifndef SIZE_MAX // [ +# ifdef _WIN64 // [ +# define SIZE_MAX _UI64_MAX +# else // _WIN64 ][ +# define SIZE_MAX _UI32_MAX +# endif // _WIN64 ] +#endif // SIZE_MAX ] + +// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h> +#ifndef WCHAR_MIN // [ +# define WCHAR_MIN 0 +#endif // WCHAR_MIN ] +#ifndef WCHAR_MAX // [ +# define WCHAR_MAX _UI16_MAX +#endif // WCHAR_MAX ] + +#define WINT_MIN 0 +#define WINT_MAX _UI16_MAX + +#endif // __STDC_LIMIT_MACROS ] + + +// 7.18.4 Limits of other integer types + +#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260 + +// 7.18.4.1 Macros for minimum-width integer constants + +#define INT8_C(val) val##i8 +#define INT16_C(val) val##i16 +#define INT32_C(val) val##i32 +#define INT64_C(val) val##i64 + +#define UINT8_C(val) val##ui8 +#define UINT16_C(val) val##ui16 +#define UINT32_C(val) val##ui32 +#define UINT64_C(val) val##ui64 + +// 7.18.4.2 Macros for greatest-width integer constants +#define INTMAX_C INT64_C +#define UINTMAX_C UINT64_C + +#endif // __STDC_CONSTANT_MACROS ] + + +#endif // _MSC_STDINT_H_ ] diff --git a/dep/jemalloc/include/msvc_compat/strings.h b/dep/jemalloc/include/msvc_compat/strings.h index c84975b6b8e..f01ffdd18a5 100644 --- a/dep/jemalloc/include/msvc_compat/strings.h +++ b/dep/jemalloc/include/msvc_compat/strings.h @@ -3,8 +3,9 @@ /* MSVC doesn't define ffs/ffsl. This dummy strings.h header is provided * for both */ -#include <intrin.h> -#pragma intrinsic(_BitScanForward) +#ifdef _MSC_VER +# include <intrin.h> +# pragma intrinsic(_BitScanForward) static __forceinline int ffsl(long x) { unsigned long i; @@ -20,4 +21,9 @@ static __forceinline int ffs(int x) return (ffsl(x)); } +#else +# define ffsl(x) __builtin_ffsl(x) +# define ffs(x) __builtin_ffs(x) #endif + +#endif /* strings_h */ diff --git a/dep/jemalloc/include/msvc_compat/windows_extra.h b/dep/jemalloc/include/msvc_compat/windows_extra.h new file mode 100644 index 00000000000..0c5e323ffb8 --- /dev/null +++ b/dep/jemalloc/include/msvc_compat/windows_extra.h @@ -0,0 +1,26 @@ +#ifndef MSVC_COMPAT_WINDOWS_EXTRA_H +#define MSVC_COMPAT_WINDOWS_EXTRA_H + +#ifndef ENOENT +# define ENOENT ERROR_PATH_NOT_FOUND +#endif +#ifndef EINVAL +# define EINVAL ERROR_BAD_ARGUMENTS +#endif +#ifndef EAGAIN +# define EAGAIN ERROR_OUTOFMEMORY +#endif +#ifndef EPERM +# define EPERM ERROR_WRITE_FAULT +#endif +#ifndef EFAULT +# define EFAULT ERROR_INVALID_ADDRESS +#endif +#ifndef ENOMEM +# define ENOMEM ERROR_NOT_ENOUGH_MEMORY +#endif +#ifndef ERANGE +# define ERANGE ERROR_INVALID_DATA +#endif + +#endif /* MSVC_COMPAT_WINDOWS_EXTRA_H */ diff --git a/dep/jemalloc/jemalloc_defs.h.in.cmake b/dep/jemalloc/jemalloc_internal_defs.h.in.cmake index 89e496f4acb..5608c69f610 100644 --- a/dep/jemalloc/jemalloc_defs.h.in.cmake +++ b/dep/jemalloc/jemalloc_internal_defs.h.in.cmake @@ -1,4 +1,6 @@ -/* include/jemalloc/jemalloc_defs.h. Generated from jemalloc_defs.h.in by configure. */ +/* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */ +#ifndef JEMALLOC_INTERNAL_DEFS_H_ +#define JEMALLOC_INTERNAL_DEFS_H_ /* * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all * public APIs to be prefixed. This makes it possible, with some care, to use @@ -8,39 +10,12 @@ /* #undef JEMALLOC_CPREFIX */ /* - * Name mangling for public symbols is controlled by --with-mangling and - * --with-jemalloc-prefix. With default settings the je_ prefix is stripped by - * these macro definitions. - */ -#define je_malloc_conf malloc_conf -#define je_malloc_message malloc_message -#define je_malloc malloc -#define je_calloc calloc -#define je_posix_memalign posix_memalign -#define je_aligned_alloc aligned_alloc -#define je_realloc realloc -#define je_free free -#define je_malloc_usable_size malloc_usable_size -#define je_malloc_stats_print malloc_stats_print -#define je_mallctl mallctl -#define je_mallctlnametomib mallctlnametomib -#define je_mallctlbymib mallctlbymib -#define je_memalign memalign -#define je_valloc valloc -#define je_allocm allocm -#define je_rallocm rallocm -#define je_sallocm sallocm -#define je_dallocm dallocm -#define je_nallocm nallocm - -/* * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. * For shared libraries, symbol visibility mechanisms prevent these symbols * from being exported, but for static libraries, naming collisions are a real * possibility. */ -#define JEMALLOC_PRIVATE_NAMESPACE "" -#define JEMALLOC_N(string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix) string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix +#define JEMALLOC_PRIVATE_NAMESPACE je_ /* * Hyper-threaded CPUs may need a special instruction inside spin loops in @@ -48,6 +23,9 @@ */ #define CPU_SPINWAIT __asm__ volatile("pause") +/* Defined if C11 atomics are available. */ +/* #undef JEMALLOC_C11ATOMICS */ + /* Defined if the equivalent of FreeBSD's atomic(9) functions are available. */ /* #undef JEMALLOC_ATOMIC9 */ @@ -61,7 +39,7 @@ * Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and * __sync_sub_and_fetch(uint32_t *, uint32_t) are available, despite * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 not being defined (which means the - * functions are defined in libgcc instead of being inlines) + * functions are defined in libgcc instead of being inlines). */ /* #undef JE_FORCE_SYNC_COMPARE_AND_SWAP_4 */ @@ -69,17 +47,37 @@ * Defined if __sync_add_and_fetch(uint64_t *, uint64_t) and * __sync_sub_and_fetch(uint64_t *, uint64_t) are available, despite * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 not being defined (which means the - * functions are defined in libgcc instead of being inlines) + * functions are defined in libgcc instead of being inlines). */ /* #undef JE_FORCE_SYNC_COMPARE_AND_SWAP_8 */ /* + * Defined if __builtin_clz() and __builtin_clzl() are available. + */ +#define JEMALLOC_HAVE_BUILTIN_CLZ + +/* + * Defined if madvise(2) is available. + */ +#define JEMALLOC_HAVE_MADVISE + +/* * Defined if OSSpin*() functions are available, as provided by Darwin, and * documented in the spinlock(3) manual page. */ /* #undef JEMALLOC_OSSPIN */ /* + * Defined if secure_getenv(3) is available. + */ +#define JEMALLOC_HAVE_SECURE_GETENV + +/* + * Defined if issetugid(2) is available. + */ +/* #undef JEMALLOC_HAVE_ISSETUGID */ + +/* * Defined if _malloc_thread_cleanup() exists. At least in the case of * FreeBSD, pthread_key_create() allocates, which if used during malloc * bootstrapping will cause recursion into the pthreads library. Therefore, if @@ -102,41 +100,15 @@ */ /* #undef JEMALLOC_MUTEX_INIT_CB */ -/* Defined if __attribute__((...)) syntax is supported. */ -#define JEMALLOC_HAVE_ATTR -#ifdef JEMALLOC_HAVE_ATTR -# define JEMALLOC_ATTR(s) __attribute__((s)) -# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) -# define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) -# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) -# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) -#elif _MSC_VER -# define JEMALLOC_ATTR(s) -# ifdef DLLEXPORT -# define JEMALLOC_EXPORT __declspec(dllexport) -# else -# define JEMALLOC_EXPORT __declspec(dllimport) -# endif -# define JEMALLOC_ALIGNED(s) __declspec(align(s)) -# define JEMALLOC_SECTION(s) __declspec(allocate(s)) -# define JEMALLOC_NOINLINE __declspec(noinline) -#else -# define JEMALLOC_ATTR(s) -# define JEMALLOC_EXPORT -# define JEMALLOC_ALIGNED(s) -# define JEMALLOC_SECTION(s) -# define JEMALLOC_NOINLINE -#endif - -/* Defined if sbrk() is supported. */ -#define JEMALLOC_HAVE_SBRK - /* Non-empty if the tls_model attribute is supported. */ #define JEMALLOC_TLS_MODEL @JEM_TLSMODEL@ /* JEMALLOC_CC_SILENCE enables code that silences unuseful compiler warnings. */ /* #undef JEMALLOC_CC_SILENCE */ +/* JEMALLOC_CODE_COVERAGE enables test code coverage analysis. */ +/* #undef JEMALLOC_CODE_COVERAGE */ + /* * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables * inline functions. @@ -144,7 +116,7 @@ /* #undef JEMALLOC_DEBUG */ /* JEMALLOC_STATS enables statistics calculation. */ -#define JEMALLOC_STATS +/* #undef JEMALLOC_STATS */ /* JEMALLOC_PROF enables allocation profiling. */ /* #undef JEMALLOC_PROF */ @@ -169,14 +141,11 @@ * JEMALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage * segment (DSS). */ -/* #undef JEMALLOC_DSS */ +#define JEMALLOC_DSS /* Support memory filling (junk/zero/quarantine/redzone). */ #define JEMALLOC_FILL -/* Support the experimental API. */ -#define JEMALLOC_EXPERIMENTAL - /* Support utrace(2)-based tracing. */ /* #undef JEMALLOC_UTRACE */ @@ -189,8 +158,26 @@ /* Support lazy locking (avoid locking unless a second thread is launched). */ /* #undef JEMALLOC_LAZY_LOCK */ -/* One page is 2^STATIC_PAGE_SHIFT bytes. */ -#define STATIC_PAGE_SHIFT 12 +/* Minimum size class to support is 2^LG_TINY_MIN bytes. */ +#define LG_TINY_MIN 3 + +/* + * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size + * classes). + */ +/* #undef LG_QUANTUM */ + +/* One page is 2^LG_PAGE bytes. */ +#define LG_PAGE 12 + +/* + * If defined, adjacent virtual memory mappings with identical attributes + * automatically coalesce, and they fragment when changes are made to subranges. + * This is the normal order of things for mmap()/munmap(), but on Windows + * VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e. + * mappings do *not* coalesce/fragment. + */ +#define JEMALLOC_MAPS_COALESCE /* * If defined, use munmap() to unmap freed chunks, rather than storing them for @@ -199,37 +186,27 @@ */ /* #undef JEMALLOC_MUNMAP */ -/* - * If defined, use mremap(...MREMAP_FIXED...) for huge realloc(). This is - * disabled by default because it is Linux-specific and it will cause virtual - * memory map holes, much like munmap(2) does. - */ -/* #undef JEMALLOC_MREMAP */ - /* TLS is used to map arenas and magazine caches to threads. */ #define JEMALLOC_TLS /* - * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside - * within jemalloc-owned chunks before dereferencing them. + * ffs()/ffsl() functions to use for bitmapping. Don't use these directly; + * instead, use jemalloc_ffs() or jemalloc_ffsl() from util.h. */ -/* #undef JEMALLOC_IVSALLOC */ +#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl +#define JEMALLOC_INTERNAL_FFS __builtin_ffs /* - * Define overrides for non-standard allocator-related functions if they - * are present on the system. + * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside + * within jemalloc-owned chunks before dereferencing them. */ -#define JEMALLOC_OVERRIDE_MEMALIGN -#define JEMALLOC_OVERRIDE_VALLOC +/* #undef JEMALLOC_IVSALLOC */ /* - * At least Linux omits the "const" in: - * - * size_t malloc_usable_size(const void *ptr); - * - * Match the operating system's prototype. + * If defined, explicitly attempt to more uniformly distribute large allocation + * pointer alignments across all cache indices. */ -#define JEMALLOC_USABLE_SIZE_CONST +#define JEMALLOC_CACHE_OBLIVIOUS /* * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. @@ -250,13 +227,14 @@ #define JEMALLOC_PURGE_MADVISE_DONTNEED /* #undef JEMALLOC_PURGE_MADVISE_FREE */ -/* - * Define if operating system has alloca.h header. - */ -#define JEMALLOC_HAS_ALLOCA_H +/* Define if operating system has alloca.h header. */ +#define JEMALLOC_HAS_ALLOCA_H 1 -/* sizeof(void *) == 2^LG_SIZEOF_PTR. */ -#define LG_SIZEOF_PTR @JEM_SIZEDEF@ +/* C99 restrict keyword supported. */ +#define JEMALLOC_HAS_RESTRICT 1 + +/* For use by hash code. */ +/* #undef JEMALLOC_BIG_ENDIAN */ /* sizeof(int) == 2^LG_SIZEOF_INT. */ #define LG_SIZEOF_INT 2 @@ -267,8 +245,21 @@ /* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ #define LG_SIZEOF_INTMAX_T 3 -/* C99 restrict keyword supported. */ -#define JEMALLOC_HAS_RESTRICT +/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */ +#define JEMALLOC_GLIBC_MALLOC_HOOK -/* JEMALLOC_CODE_COVERAGE enables test code coverage analysis. */ -#undef JEMALLOC_CODE_COVERAGE +/* glibc memalign hook. */ +#define JEMALLOC_GLIBC_MEMALIGN_HOOK + +/* Adaptive mutex support in pthreads. */ +#define JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP + +/* + * If defined, jemalloc symbols are not exported (doesn't work when + * JEMALLOC_PREFIX is not defined). + */ +/* #undef JEMALLOC_EXPORT */ +/* sizeof(void *) == 2^LG_SIZEOF_PTR. */ +#define LG_SIZEOF_PTR @JEM_SIZEDEF@ + +#endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/dep/jemalloc/src/arena.c b/dep/jemalloc/src/arena.c index dad707b63d0..43733cc1572 100644 --- a/dep/jemalloc/src/arena.c +++ b/dep/jemalloc/src/arena.c @@ -5,37 +5,17 @@ /* Data. */ ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT; +static ssize_t lg_dirty_mult_default; arena_bin_info_t arena_bin_info[NBINS]; -JEMALLOC_ALIGNED(CACHELINE) -const uint8_t small_size2bin[] = { -#define S2B_8(i) i, -#define S2B_16(i) S2B_8(i) S2B_8(i) -#define S2B_32(i) S2B_16(i) S2B_16(i) -#define S2B_64(i) S2B_32(i) S2B_32(i) -#define S2B_128(i) S2B_64(i) S2B_64(i) -#define S2B_256(i) S2B_128(i) S2B_128(i) -#define S2B_512(i) S2B_256(i) S2B_256(i) -#define S2B_1024(i) S2B_512(i) S2B_512(i) -#define S2B_2048(i) S2B_1024(i) S2B_1024(i) -#define S2B_4096(i) S2B_2048(i) S2B_2048(i) -#define S2B_8192(i) S2B_4096(i) S2B_4096(i) -#define SIZE_CLASS(bin, delta, size) \ - S2B_##delta(bin) - SIZE_CLASSES -#undef S2B_8 -#undef S2B_16 -#undef S2B_32 -#undef S2B_64 -#undef S2B_128 -#undef S2B_256 -#undef S2B_512 -#undef S2B_1024 -#undef S2B_2048 -#undef S2B_4096 -#undef S2B_8192 -#undef SIZE_CLASS -}; +size_t map_bias; +size_t map_misc_offset; +size_t arena_maxrun; /* Max run size for arenas. */ +size_t large_maxclass; /* Max large size class. */ +static size_t small_maxrun; /* Max run size used for small size classes. */ +static bool *small_run_tab; /* Valid small run page multiples. */ +unsigned nlclasses; /* Number of large size classes. */ +unsigned nhclasses; /* Number of huge size classes. */ /******************************************************************************/ /* @@ -45,7 +25,7 @@ const uint8_t small_size2bin[] = { static void arena_purge(arena_t *arena, bool all); static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, - bool cleaned); + bool cleaned, bool decommitted); static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin); static void arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, @@ -53,296 +33,326 @@ static void arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, /******************************************************************************/ -static inline int -arena_run_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) +#define CHUNK_MAP_KEY ((uintptr_t)0x1U) + +JEMALLOC_INLINE_C arena_chunk_map_misc_t * +arena_miscelm_key_create(size_t size) { - uintptr_t a_mapelm = (uintptr_t)a; - uintptr_t b_mapelm = (uintptr_t)b; - assert(a != NULL); - assert(b != NULL); + return ((arena_chunk_map_misc_t *)(arena_mapbits_size_encode(size) | + CHUNK_MAP_KEY)); +} + +JEMALLOC_INLINE_C bool +arena_miscelm_is_key(const arena_chunk_map_misc_t *miscelm) +{ - return ((a_mapelm > b_mapelm) - (a_mapelm < b_mapelm)); + return (((uintptr_t)miscelm & CHUNK_MAP_KEY) != 0); } -/* Generate red-black tree functions. */ -rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_t, - u.rb_link, arena_run_comp) +#undef CHUNK_MAP_KEY -static inline int -arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) +JEMALLOC_INLINE_C size_t +arena_miscelm_key_size_get(const arena_chunk_map_misc_t *miscelm) { - int ret; - size_t a_size = a->bits & ~PAGE_MASK; - size_t b_size = b->bits & ~PAGE_MASK; - ret = (a_size > b_size) - (a_size < b_size); - if (ret == 0) { - uintptr_t a_mapelm, b_mapelm; + assert(arena_miscelm_is_key(miscelm)); - if ((a->bits & CHUNK_MAP_KEY) != CHUNK_MAP_KEY) - a_mapelm = (uintptr_t)a; - else { - /* - * Treat keys as though they are lower than anything - * else. - */ - a_mapelm = 0; - } - b_mapelm = (uintptr_t)b; + return (arena_mapbits_size_decode((uintptr_t)miscelm)); +} - ret = (a_mapelm > b_mapelm) - (a_mapelm < b_mapelm); - } +JEMALLOC_INLINE_C size_t +arena_miscelm_size_get(arena_chunk_map_misc_t *miscelm) +{ + arena_chunk_t *chunk; + size_t pageind, mapbits; - return (ret); -} + assert(!arena_miscelm_is_key(miscelm)); -/* Generate red-black tree functions. */ -rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t, arena_chunk_map_t, - u.rb_link, arena_avail_comp) + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); + pageind = arena_miscelm_to_pageind(miscelm); + mapbits = arena_mapbits_get(chunk, pageind); + return (arena_mapbits_size_decode(mapbits)); +} -static inline int -arena_chunk_dirty_comp(arena_chunk_t *a, arena_chunk_t *b) +JEMALLOC_INLINE_C int +arena_run_comp(arena_chunk_map_misc_t *a, arena_chunk_map_misc_t *b) { + uintptr_t a_miscelm = (uintptr_t)a; + uintptr_t b_miscelm = (uintptr_t)b; assert(a != NULL); assert(b != NULL); - /* - * Short-circuit for self comparison. The following comparison code - * would come to the same result, but at the cost of executing the slow - * path. - */ - if (a == b) - return (0); + return ((a_miscelm > b_miscelm) - (a_miscelm < b_miscelm)); +} + +/* Generate red-black tree functions. */ +rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_misc_t, + rb_link, arena_run_comp) + +static size_t +run_quantize(size_t size) +{ + size_t qsize; + + assert(size != 0); + assert(size == PAGE_CEILING(size)); + + /* Don't change sizes that are valid small run sizes. */ + if (size <= small_maxrun && small_run_tab[size >> LG_PAGE]) + return (size); /* - * Order such that chunks with higher fragmentation are "less than" - * those with lower fragmentation -- purging order is from "least" to - * "greatest". Fragmentation is measured as: - * - * mean current avail run size - * -------------------------------- - * mean defragmented avail run size - * - * navail - * ----------- - * nruns_avail nruns_avail-nruns_adjac - * = ========================= = ----------------------- - * navail nruns_avail - * ----------------------- - * nruns_avail-nruns_adjac - * - * The following code multiplies away the denominator prior to - * comparison, in order to avoid division. - * + * Round down to the nearest run size that can actually be requested + * during normal large allocation. Add large_pad so that cache index + * randomization can offset the allocation from the page boundary. */ - { - size_t a_val = (a->nruns_avail - a->nruns_adjac) * - b->nruns_avail; - size_t b_val = (b->nruns_avail - b->nruns_adjac) * - a->nruns_avail; + qsize = index2size(size2index(size - large_pad + 1) - 1) + large_pad; + if (qsize <= SMALL_MAXCLASS + large_pad) + return (run_quantize(size - large_pad)); + assert(qsize <= size); + return (qsize); +} + +static size_t +run_quantize_next(size_t size) +{ + size_t large_run_size_next; + + assert(size != 0); + assert(size == PAGE_CEILING(size)); - if (a_val < b_val) - return (1); - if (a_val > b_val) - return (-1); - } /* - * Break ties by chunk address. For fragmented chunks, report lower - * addresses as "lower", so that fragmentation reduction happens first - * at lower addresses. However, use the opposite ordering for - * unfragmented chunks, in order to increase the chances of - * re-allocating dirty runs. + * Return the next quantized size greater than the input size. + * Quantized sizes comprise the union of run sizes that back small + * region runs, and run sizes that back large regions with no explicit + * alignment constraints. */ - { - uintptr_t a_chunk = (uintptr_t)a; - uintptr_t b_chunk = (uintptr_t)b; - int ret = ((a_chunk > b_chunk) - (a_chunk < b_chunk)); - if (a->nruns_adjac == 0) { - assert(b->nruns_adjac == 0); - ret = -ret; + + if (size > SMALL_MAXCLASS) { + large_run_size_next = PAGE_CEILING(index2size(size2index(size - + large_pad) + 1) + large_pad); + } else + large_run_size_next = SIZE_T_MAX; + if (size >= small_maxrun) + return (large_run_size_next); + + while (true) { + size += PAGE; + assert(size <= small_maxrun); + if (small_run_tab[size >> LG_PAGE]) { + if (large_run_size_next < size) + return (large_run_size_next); + return (size); } - return (ret); } } -/* Generate red-black tree functions. */ -rb_gen(static UNUSED, arena_chunk_dirty_, arena_chunk_tree_t, arena_chunk_t, - dirty_link, arena_chunk_dirty_comp) - -static inline bool -arena_avail_adjac_pred(arena_chunk_t *chunk, size_t pageind) +static size_t +run_quantize_first(size_t size) { - bool ret; + size_t qsize = run_quantize(size); - if (pageind-1 < map_bias) - ret = false; - else { - ret = (arena_mapbits_allocated_get(chunk, pageind-1) == 0); - assert(ret == false || arena_mapbits_dirty_get(chunk, - pageind-1) != arena_mapbits_dirty_get(chunk, pageind)); + if (qsize < size) { + /* + * Skip a quantization that may have an adequately large run, + * because under-sized runs may be mixed in. This only happens + * when an unusual size is requested, i.e. for aligned + * allocation, and is just one of several places where linear + * search would potentially find sufficiently aligned available + * memory somewhere lower. + */ + qsize = run_quantize_next(size); } - return (ret); + return (qsize); } -static inline bool -arena_avail_adjac_succ(arena_chunk_t *chunk, size_t pageind, size_t npages) +JEMALLOC_INLINE_C int +arena_avail_comp(arena_chunk_map_misc_t *a, arena_chunk_map_misc_t *b) { - bool ret; + int ret; + uintptr_t a_miscelm = (uintptr_t)a; + size_t a_qsize = run_quantize(arena_miscelm_is_key(a) ? + arena_miscelm_key_size_get(a) : arena_miscelm_size_get(a)); + size_t b_qsize = run_quantize(arena_miscelm_size_get(b)); - if (pageind+npages == chunk_npages) - ret = false; - else { - assert(pageind+npages < chunk_npages); - ret = (arena_mapbits_allocated_get(chunk, pageind+npages) == 0); - assert(ret == false || arena_mapbits_dirty_get(chunk, pageind) - != arena_mapbits_dirty_get(chunk, pageind+npages)); + /* + * Compare based on quantized size rather than size, in order to sort + * equally useful runs only by address. + */ + ret = (a_qsize > b_qsize) - (a_qsize < b_qsize); + if (ret == 0) { + if (!arena_miscelm_is_key(a)) { + uintptr_t b_miscelm = (uintptr_t)b; + + ret = (a_miscelm > b_miscelm) - (a_miscelm < b_miscelm); + } else { + /* + * Treat keys as if they are lower than anything else. + */ + ret = -1; + } } + return (ret); } -static inline bool -arena_avail_adjac(arena_chunk_t *chunk, size_t pageind, size_t npages) +/* Generate red-black tree functions. */ +rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t, + arena_chunk_map_misc_t, rb_link, arena_avail_comp) + +static void +arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind, + size_t npages) { - return (arena_avail_adjac_pred(chunk, pageind) || - arena_avail_adjac_succ(chunk, pageind, npages)); + assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> + LG_PAGE)); + arena_avail_tree_insert(&arena->runs_avail, arena_miscelm_get(chunk, + pageind)); } static void -arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind, - size_t npages, bool maybe_adjac_pred, bool maybe_adjac_succ) +arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind, + size_t npages) { assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); + arena_avail_tree_remove(&arena->runs_avail, arena_miscelm_get(chunk, + pageind)); +} - /* - * chunks_dirty is keyed by nruns_{avail,adjac}, so the chunk must be - * removed and reinserted even if the run to be inserted is clean. - */ - if (chunk->ndirty != 0) - arena_chunk_dirty_remove(&arena->chunks_dirty, chunk); - - if (maybe_adjac_pred && arena_avail_adjac_pred(chunk, pageind)) - chunk->nruns_adjac++; - if (maybe_adjac_succ && arena_avail_adjac_succ(chunk, pageind, npages)) - chunk->nruns_adjac++; - chunk->nruns_avail++; - assert(chunk->nruns_avail > chunk->nruns_adjac); +static void +arena_run_dirty_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind, + size_t npages) +{ + arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind); - if (arena_mapbits_dirty_get(chunk, pageind) != 0) { - arena->ndirty += npages; - chunk->ndirty += npages; - } - if (chunk->ndirty != 0) - arena_chunk_dirty_insert(&arena->chunks_dirty, chunk); + assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> + LG_PAGE)); + assert(arena_mapbits_dirty_get(chunk, pageind) == CHUNK_MAP_DIRTY); + assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) == + CHUNK_MAP_DIRTY); - arena_avail_tree_insert(&arena->runs_avail, arena_mapp_get(chunk, - pageind)); + qr_new(&miscelm->rd, rd_link); + qr_meld(&arena->runs_dirty, &miscelm->rd, rd_link); + arena->ndirty += npages; } static void -arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind, - size_t npages, bool maybe_adjac_pred, bool maybe_adjac_succ) +arena_run_dirty_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind, + size_t npages) { + arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind); assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); + assert(arena_mapbits_dirty_get(chunk, pageind) == CHUNK_MAP_DIRTY); + assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) == + CHUNK_MAP_DIRTY); - /* - * chunks_dirty is keyed by nruns_{avail,adjac}, so the chunk must be - * removed and reinserted even if the run to be removed is clean. - */ - if (chunk->ndirty != 0) - arena_chunk_dirty_remove(&arena->chunks_dirty, chunk); + qr_remove(&miscelm->rd, rd_link); + assert(arena->ndirty >= npages); + arena->ndirty -= npages; +} - if (maybe_adjac_pred && arena_avail_adjac_pred(chunk, pageind)) - chunk->nruns_adjac--; - if (maybe_adjac_succ && arena_avail_adjac_succ(chunk, pageind, npages)) - chunk->nruns_adjac--; - chunk->nruns_avail--; - assert(chunk->nruns_avail > chunk->nruns_adjac || (chunk->nruns_avail - == 0 && chunk->nruns_adjac == 0)); +static size_t +arena_chunk_dirty_npages(const extent_node_t *node) +{ + + return (extent_node_size_get(node) >> LG_PAGE); +} - if (arena_mapbits_dirty_get(chunk, pageind) != 0) { - arena->ndirty -= npages; - chunk->ndirty -= npages; +void +arena_chunk_cache_maybe_insert(arena_t *arena, extent_node_t *node, bool cache) +{ + + if (cache) { + extent_node_dirty_linkage_init(node); + extent_node_dirty_insert(node, &arena->runs_dirty, + &arena->chunks_cache); + arena->ndirty += arena_chunk_dirty_npages(node); } - if (chunk->ndirty != 0) - arena_chunk_dirty_insert(&arena->chunks_dirty, chunk); +} - arena_avail_tree_remove(&arena->runs_avail, arena_mapp_get(chunk, - pageind)); +void +arena_chunk_cache_maybe_remove(arena_t *arena, extent_node_t *node, bool dirty) +{ + + if (dirty) { + extent_node_dirty_remove(node); + assert(arena->ndirty >= arena_chunk_dirty_npages(node)); + arena->ndirty -= arena_chunk_dirty_npages(node); + } } -static inline void * +JEMALLOC_INLINE_C void * arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) { void *ret; unsigned regind; - bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + - (uintptr_t)bin_info->bitmap_offset); + arena_chunk_map_misc_t *miscelm; + void *rpages; assert(run->nfree > 0); - assert(bitmap_full(bitmap, &bin_info->bitmap_info) == false); + assert(!bitmap_full(run->bitmap, &bin_info->bitmap_info)); - regind = bitmap_sfu(bitmap, &bin_info->bitmap_info); - ret = (void *)((uintptr_t)run + (uintptr_t)bin_info->reg0_offset + + regind = bitmap_sfu(run->bitmap, &bin_info->bitmap_info); + miscelm = arena_run_to_miscelm(run); + rpages = arena_miscelm_to_rpages(miscelm); + ret = (void *)((uintptr_t)rpages + (uintptr_t)bin_info->reg0_offset + (uintptr_t)(bin_info->reg_interval * regind)); run->nfree--; - if (regind == run->nextind) - run->nextind++; - assert(regind < run->nextind); return (ret); } -static inline void +JEMALLOC_INLINE_C void arena_run_reg_dalloc(arena_run_t *run, void *ptr) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; size_t mapbits = arena_mapbits_get(chunk, pageind); - size_t binind = arena_ptr_small_binind_get(ptr, mapbits); + szind_t binind = arena_ptr_small_binind_get(ptr, mapbits); arena_bin_info_t *bin_info = &arena_bin_info[binind]; unsigned regind = arena_run_regind(run, bin_info, ptr); - bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + - (uintptr_t)bin_info->bitmap_offset); assert(run->nfree < bin_info->nregs); /* Freeing an interior pointer can cause assertion failure. */ - assert(((uintptr_t)ptr - ((uintptr_t)run + + assert(((uintptr_t)ptr - + ((uintptr_t)arena_miscelm_to_rpages(arena_run_to_miscelm(run)) + (uintptr_t)bin_info->reg0_offset)) % (uintptr_t)bin_info->reg_interval == 0); - assert((uintptr_t)ptr >= (uintptr_t)run + + assert((uintptr_t)ptr >= + (uintptr_t)arena_miscelm_to_rpages(arena_run_to_miscelm(run)) + (uintptr_t)bin_info->reg0_offset); /* Freeing an unallocated pointer can cause assertion failure. */ - assert(bitmap_get(bitmap, &bin_info->bitmap_info, regind)); + assert(bitmap_get(run->bitmap, &bin_info->bitmap_info, regind)); - bitmap_unset(bitmap, &bin_info->bitmap_info, regind); + bitmap_unset(run->bitmap, &bin_info->bitmap_info, regind); run->nfree++; } -static inline void +JEMALLOC_INLINE_C void arena_run_zero(arena_chunk_t *chunk, size_t run_ind, size_t npages) { - VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + (run_ind << - LG_PAGE)), (npages << LG_PAGE)); + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + + (run_ind << LG_PAGE)), (npages << LG_PAGE)); memset((void *)((uintptr_t)chunk + (run_ind << LG_PAGE)), 0, (npages << LG_PAGE)); } -static inline void +JEMALLOC_INLINE_C void arena_run_page_mark_zeroed(arena_chunk_t *chunk, size_t run_ind) { - VALGRIND_MAKE_MEM_DEFINED((void *)((uintptr_t)chunk + (run_ind << - LG_PAGE)), PAGE); + JEMALLOC_VALGRIND_MAKE_MEM_DEFINED((void *)((uintptr_t)chunk + (run_ind + << LG_PAGE)), PAGE); } -static inline void +JEMALLOC_INLINE_C void arena_run_page_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) { size_t i; @@ -358,9 +368,9 @@ arena_cactive_update(arena_t *arena, size_t add_pages, size_t sub_pages) { if (config_stats) { - ssize_t cactive_diff = CHUNK_CEILING((arena->nactive + - add_pages) << LG_PAGE) - CHUNK_CEILING((arena->nactive - - sub_pages) << LG_PAGE); + ssize_t cactive_diff = CHUNK_CEILING((arena->nactive + add_pages + - sub_pages) << LG_PAGE) - CHUNK_CEILING(arena->nactive << + LG_PAGE); if (cactive_diff != 0) stats_cactive_add(cactive_diff); } @@ -368,10 +378,12 @@ arena_cactive_update(arena_t *arena, size_t add_pages, size_t sub_pages) static void arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind, - size_t flag_dirty, size_t need_pages) + size_t flag_dirty, size_t flag_decommitted, size_t need_pages) { size_t total_pages, rem_pages; + assert(flag_dirty == 0 || flag_decommitted == 0); + total_pages = arena_mapbits_unallocated_size_get(chunk, run_ind) >> LG_PAGE; assert(arena_mapbits_dirty_get(chunk, run_ind+total_pages-1) == @@ -379,58 +391,75 @@ arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind, assert(need_pages <= total_pages); rem_pages = total_pages - need_pages; - arena_avail_remove(arena, chunk, run_ind, total_pages, true, true); + arena_avail_remove(arena, chunk, run_ind, total_pages); + if (flag_dirty != 0) + arena_run_dirty_remove(arena, chunk, run_ind, total_pages); arena_cactive_update(arena, need_pages, 0); arena->nactive += need_pages; /* Keep track of trailing unused pages for later use. */ if (rem_pages > 0) { + size_t flags = flag_dirty | flag_decommitted; + size_t flag_unzeroed_mask = (flags == 0) ? CHUNK_MAP_UNZEROED : + 0; + + arena_mapbits_unallocated_set(chunk, run_ind+need_pages, + (rem_pages << LG_PAGE), flags | + (arena_mapbits_unzeroed_get(chunk, run_ind+need_pages) & + flag_unzeroed_mask)); + arena_mapbits_unallocated_set(chunk, run_ind+total_pages-1, + (rem_pages << LG_PAGE), flags | + (arena_mapbits_unzeroed_get(chunk, run_ind+total_pages-1) & + flag_unzeroed_mask)); if (flag_dirty != 0) { - arena_mapbits_unallocated_set(chunk, - run_ind+need_pages, (rem_pages << LG_PAGE), - flag_dirty); - arena_mapbits_unallocated_set(chunk, - run_ind+total_pages-1, (rem_pages << LG_PAGE), - flag_dirty); - } else { - arena_mapbits_unallocated_set(chunk, run_ind+need_pages, - (rem_pages << LG_PAGE), - arena_mapbits_unzeroed_get(chunk, - run_ind+need_pages)); - arena_mapbits_unallocated_set(chunk, - run_ind+total_pages-1, (rem_pages << LG_PAGE), - arena_mapbits_unzeroed_get(chunk, - run_ind+total_pages-1)); + arena_run_dirty_insert(arena, chunk, run_ind+need_pages, + rem_pages); } - arena_avail_insert(arena, chunk, run_ind+need_pages, rem_pages, - false, true); + arena_avail_insert(arena, chunk, run_ind+need_pages, rem_pages); } } -static void +static bool arena_run_split_large_helper(arena_t *arena, arena_run_t *run, size_t size, bool remove, bool zero) { arena_chunk_t *chunk; - size_t flag_dirty, run_ind, need_pages, i; + arena_chunk_map_misc_t *miscelm; + size_t flag_dirty, flag_decommitted, run_ind, need_pages; + size_t flag_unzeroed_mask; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); + miscelm = arena_run_to_miscelm(run); + run_ind = arena_miscelm_to_pageind(miscelm); flag_dirty = arena_mapbits_dirty_get(chunk, run_ind); + flag_decommitted = arena_mapbits_decommitted_get(chunk, run_ind); need_pages = (size >> LG_PAGE); assert(need_pages > 0); + if (flag_decommitted != 0 && arena->chunk_hooks.commit(chunk, chunksize, + run_ind << LG_PAGE, size, arena->ind)) + return (true); + if (remove) { arena_run_split_remove(arena, chunk, run_ind, flag_dirty, - need_pages); + flag_decommitted, need_pages); } if (zero) { - if (flag_dirty == 0) { + if (flag_decommitted != 0) { + /* The run is untouched, and therefore zeroed. */ + JEMALLOC_VALGRIND_MAKE_MEM_DEFINED((void + *)((uintptr_t)chunk + (run_ind << LG_PAGE)), + (need_pages << LG_PAGE)); + } else if (flag_dirty != 0) { + /* The run is dirty, so all pages must be zeroed. */ + arena_run_zero(chunk, run_ind, need_pages); + } else { /* * The run is clean, so some pages may be zeroed (i.e. * never before touched). */ + size_t i; for (i = 0; i < need_pages; i++) { if (arena_mapbits_unzeroed_get(chunk, run_ind+i) != 0) @@ -443,12 +472,9 @@ arena_run_split_large_helper(arena_t *arena, arena_run_t *run, size_t size, run_ind+i); } } - } else { - /* The run is dirty, so all pages must be zeroed. */ - arena_run_zero(chunk, run_ind, need_pages); } } else { - VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + (run_ind << LG_PAGE)), (need_pages << LG_PAGE)); } @@ -456,68 +482,66 @@ arena_run_split_large_helper(arena_t *arena, arena_run_t *run, size_t size, * Set the last element first, in case the run only contains one page * (i.e. both statements set the same element). */ - arena_mapbits_large_set(chunk, run_ind+need_pages-1, 0, flag_dirty); - arena_mapbits_large_set(chunk, run_ind, size, flag_dirty); + flag_unzeroed_mask = (flag_dirty | flag_decommitted) == 0 ? + CHUNK_MAP_UNZEROED : 0; + arena_mapbits_large_set(chunk, run_ind+need_pages-1, 0, flag_dirty | + (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk, + run_ind+need_pages-1))); + arena_mapbits_large_set(chunk, run_ind, size, flag_dirty | + (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk, run_ind))); + return (false); } -static void +static bool arena_run_split_large(arena_t *arena, arena_run_t *run, size_t size, bool zero) { - arena_run_split_large_helper(arena, run, size, true, zero); + return (arena_run_split_large_helper(arena, run, size, true, zero)); } -static void +static bool arena_run_init_large(arena_t *arena, arena_run_t *run, size_t size, bool zero) { - arena_run_split_large_helper(arena, run, size, false, zero); + return (arena_run_split_large_helper(arena, run, size, false, zero)); } -static void +static bool arena_run_split_small(arena_t *arena, arena_run_t *run, size_t size, - size_t binind) + szind_t binind) { arena_chunk_t *chunk; - size_t flag_dirty, run_ind, need_pages, i; + arena_chunk_map_misc_t *miscelm; + size_t flag_dirty, flag_decommitted, run_ind, need_pages, i; assert(binind != BININD_INVALID); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); + miscelm = arena_run_to_miscelm(run); + run_ind = arena_miscelm_to_pageind(miscelm); flag_dirty = arena_mapbits_dirty_get(chunk, run_ind); + flag_decommitted = arena_mapbits_decommitted_get(chunk, run_ind); need_pages = (size >> LG_PAGE); assert(need_pages > 0); - arena_run_split_remove(arena, chunk, run_ind, flag_dirty, need_pages); + if (flag_decommitted != 0 && arena->chunk_hooks.commit(chunk, chunksize, + run_ind << LG_PAGE, size, arena->ind)) + return (true); - /* - * Propagate the dirty and unzeroed flags to the allocated small run, - * so that arena_dalloc_bin_run() has the ability to conditionally trim - * clean pages. - */ - arena_mapbits_small_set(chunk, run_ind, 0, binind, flag_dirty); - /* - * The first page will always be dirtied during small run - * initialization, so a validation failure here would not actually - * cause an observable failure. - */ - if (config_debug && flag_dirty == 0 && arena_mapbits_unzeroed_get(chunk, - run_ind) == 0) - arena_run_page_validate_zeroed(chunk, run_ind); - for (i = 1; i < need_pages - 1; i++) { - arena_mapbits_small_set(chunk, run_ind+i, i, binind, 0); - if (config_debug && flag_dirty == 0 && - arena_mapbits_unzeroed_get(chunk, run_ind+i) == 0) + arena_run_split_remove(arena, chunk, run_ind, flag_dirty, + flag_decommitted, need_pages); + + for (i = 0; i < need_pages; i++) { + size_t flag_unzeroed = arena_mapbits_unzeroed_get(chunk, + run_ind+i); + arena_mapbits_small_set(chunk, run_ind+i, i, binind, + flag_unzeroed); + if (config_debug && flag_dirty == 0 && flag_unzeroed == 0) arena_run_page_validate_zeroed(chunk, run_ind+i); } - arena_mapbits_small_set(chunk, run_ind+need_pages-1, need_pages-1, - binind, flag_dirty); - if (config_debug && flag_dirty == 0 && arena_mapbits_unzeroed_get(chunk, - run_ind+need_pages-1) == 0) - arena_run_page_validate_zeroed(chunk, run_ind+need_pages-1); - VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + (run_ind << LG_PAGE)), (need_pages << LG_PAGE)); + return (false); } static arena_chunk_t * @@ -533,76 +557,143 @@ arena_chunk_init_spare(arena_t *arena) assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0); assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == - arena_maxclass); + arena_maxrun); assert(arena_mapbits_unallocated_size_get(chunk, chunk_npages-1) == - arena_maxclass); + arena_maxrun); assert(arena_mapbits_dirty_get(chunk, map_bias) == arena_mapbits_dirty_get(chunk, chunk_npages-1)); return (chunk); } +static bool +arena_chunk_register(arena_t *arena, arena_chunk_t *chunk, bool zero) +{ + + /* + * The extent node notion of "committed" doesn't directly apply to + * arena chunks. Arbitrarily mark them as committed. The commit state + * of runs is tracked individually, and upon chunk deallocation the + * entire chunk is in a consistent commit state. + */ + extent_node_init(&chunk->node, arena, chunk, chunksize, zero, true); + extent_node_achunk_set(&chunk->node, true); + return (chunk_register(chunk, &chunk->node)); +} + static arena_chunk_t * -arena_chunk_init_hard(arena_t *arena) +arena_chunk_alloc_internal_hard(arena_t *arena, chunk_hooks_t *chunk_hooks, + bool *zero, bool *commit) { arena_chunk_t *chunk; - bool zero; - size_t unzeroed, i; - - assert(arena->spare == NULL); - zero = false; malloc_mutex_unlock(&arena->lock); - chunk = (arena_chunk_t *)chunk_alloc(chunksize, chunksize, false, - &zero, arena->dss_prec); + + chunk = (arena_chunk_t *)chunk_alloc_wrapper(arena, chunk_hooks, NULL, + chunksize, chunksize, zero, commit); + if (chunk != NULL && !*commit) { + /* Commit header. */ + if (chunk_hooks->commit(chunk, chunksize, 0, map_bias << + LG_PAGE, arena->ind)) { + chunk_dalloc_wrapper(arena, chunk_hooks, + (void *)chunk, chunksize, *commit); + chunk = NULL; + } + } + if (chunk != NULL && arena_chunk_register(arena, chunk, *zero)) { + if (!*commit) { + /* Undo commit of header. */ + chunk_hooks->decommit(chunk, chunksize, 0, map_bias << + LG_PAGE, arena->ind); + } + chunk_dalloc_wrapper(arena, chunk_hooks, (void *)chunk, + chunksize, *commit); + chunk = NULL; + } + malloc_mutex_lock(&arena->lock); - if (chunk == NULL) - return (NULL); - if (config_stats) + return (chunk); +} + +static arena_chunk_t * +arena_chunk_alloc_internal(arena_t *arena, bool *zero, bool *commit) +{ + arena_chunk_t *chunk; + chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; + + chunk = chunk_alloc_cache(arena, &chunk_hooks, NULL, chunksize, + chunksize, zero, true); + if (chunk != NULL) { + if (arena_chunk_register(arena, chunk, *zero)) { + chunk_dalloc_cache(arena, &chunk_hooks, chunk, + chunksize, true); + return (NULL); + } + *commit = true; + } + if (chunk == NULL) { + chunk = arena_chunk_alloc_internal_hard(arena, &chunk_hooks, + zero, commit); + } + + if (config_stats && chunk != NULL) { arena->stats.mapped += chunksize; + arena->stats.metadata_mapped += (map_bias << LG_PAGE); + } - chunk->arena = arena; + return (chunk); +} - /* - * Claim that no pages are in use, since the header is merely overhead. - */ - chunk->ndirty = 0; +static arena_chunk_t * +arena_chunk_init_hard(arena_t *arena) +{ + arena_chunk_t *chunk; + bool zero, commit; + size_t flag_unzeroed, flag_decommitted, i; + + assert(arena->spare == NULL); - chunk->nruns_avail = 0; - chunk->nruns_adjac = 0; + zero = false; + commit = false; + chunk = arena_chunk_alloc_internal(arena, &zero, &commit); + if (chunk == NULL) + return (NULL); /* * Initialize the map to contain one maximal free untouched run. Mark - * the pages as zeroed iff chunk_alloc() returned a zeroed chunk. + * the pages as zeroed if chunk_alloc() returned a zeroed or decommitted + * chunk. */ - unzeroed = zero ? 0 : CHUNK_MAP_UNZEROED; - arena_mapbits_unallocated_set(chunk, map_bias, arena_maxclass, - unzeroed); + flag_unzeroed = (zero || !commit) ? 0 : CHUNK_MAP_UNZEROED; + flag_decommitted = commit ? 0 : CHUNK_MAP_DECOMMITTED; + arena_mapbits_unallocated_set(chunk, map_bias, arena_maxrun, + flag_unzeroed | flag_decommitted); /* * There is no need to initialize the internal page map entries unless * the chunk is not zeroed. */ - if (zero == false) { - VALGRIND_MAKE_MEM_UNDEFINED((void *)arena_mapp_get(chunk, - map_bias+1), (size_t)((uintptr_t) arena_mapp_get(chunk, - chunk_npages-1) - (uintptr_t)arena_mapp_get(chunk, + if (!zero) { + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED( + (void *)arena_bitselm_get(chunk, map_bias+1), + (size_t)((uintptr_t) arena_bitselm_get(chunk, + chunk_npages-1) - (uintptr_t)arena_bitselm_get(chunk, map_bias+1))); for (i = map_bias+1; i < chunk_npages-1; i++) - arena_mapbits_unzeroed_set(chunk, i, unzeroed); + arena_mapbits_internal_set(chunk, i, flag_unzeroed); } else { - VALGRIND_MAKE_MEM_DEFINED((void *)arena_mapp_get(chunk, - map_bias+1), (size_t)((uintptr_t) arena_mapp_get(chunk, - chunk_npages-1) - (uintptr_t)arena_mapp_get(chunk, - map_bias+1))); + JEMALLOC_VALGRIND_MAKE_MEM_DEFINED((void + *)arena_bitselm_get(chunk, map_bias+1), (size_t)((uintptr_t) + arena_bitselm_get(chunk, chunk_npages-1) - + (uintptr_t)arena_bitselm_get(chunk, map_bias+1))); if (config_debug) { for (i = map_bias+1; i < chunk_npages-1; i++) { assert(arena_mapbits_unzeroed_get(chunk, i) == - unzeroed); + flag_unzeroed); } } } - arena_mapbits_unallocated_set(chunk, chunk_npages-1, arena_maxclass, - unzeroed); + arena_mapbits_unallocated_set(chunk, chunk_npages-1, arena_maxrun, + flag_unzeroed); return (chunk); } @@ -621,65 +712,383 @@ arena_chunk_alloc(arena_t *arena) } /* Insert the run into the runs_avail tree. */ - arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias, - false, false); + arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias); return (chunk); } static void -arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) +arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk) { + assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0); assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == - arena_maxclass); + arena_maxrun); assert(arena_mapbits_unallocated_size_get(chunk, chunk_npages-1) == - arena_maxclass); + arena_maxrun); assert(arena_mapbits_dirty_get(chunk, map_bias) == arena_mapbits_dirty_get(chunk, chunk_npages-1)); + assert(arena_mapbits_decommitted_get(chunk, map_bias) == + arena_mapbits_decommitted_get(chunk, chunk_npages-1)); /* * Remove run from the runs_avail tree, so that the arena does not use * it. */ - arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias, - false, false); + arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias); if (arena->spare != NULL) { arena_chunk_t *spare = arena->spare; + chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; + bool committed; arena->spare = chunk; - malloc_mutex_unlock(&arena->lock); - chunk_dealloc((void *)spare, chunksize, true); - malloc_mutex_lock(&arena->lock); - if (config_stats) + if (arena_mapbits_dirty_get(spare, map_bias) != 0) { + arena_run_dirty_remove(arena, spare, map_bias, + chunk_npages-map_bias); + } + + chunk_deregister(spare, &spare->node); + + committed = (arena_mapbits_decommitted_get(spare, map_bias) == + 0); + if (!committed) { + /* + * Decommit the header. Mark the chunk as decommitted + * even if header decommit fails, since treating a + * partially committed chunk as committed has a high + * potential for causing later access of decommitted + * memory. + */ + chunk_hooks = chunk_hooks_get(arena); + chunk_hooks.decommit(spare, chunksize, 0, map_bias << + LG_PAGE, arena->ind); + } + + chunk_dalloc_cache(arena, &chunk_hooks, (void *)spare, + chunksize, committed); + + if (config_stats) { arena->stats.mapped -= chunksize; + arena->stats.metadata_mapped -= (map_bias << LG_PAGE); + } } else arena->spare = chunk; } -static arena_run_t * -arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero) +static void +arena_huge_malloc_stats_update(arena_t *arena, size_t usize) { - arena_run_t *run; - arena_chunk_map_t *mapelm, key; + szind_t index = size2index(usize) - nlclasses - NBINS; - key.bits = size | CHUNK_MAP_KEY; - mapelm = arena_avail_tree_nsearch(&arena->runs_avail, &key); - if (mapelm != NULL) { - arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); - size_t pageind = (((uintptr_t)mapelm - - (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) - + map_bias; + cassert(config_stats); - run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << - LG_PAGE)); - arena_run_split_large(arena, run, size, zero); - return (run); + arena->stats.nmalloc_huge++; + arena->stats.allocated_huge += usize; + arena->stats.hstats[index].nmalloc++; + arena->stats.hstats[index].curhchunks++; +} + +static void +arena_huge_malloc_stats_update_undo(arena_t *arena, size_t usize) +{ + szind_t index = size2index(usize) - nlclasses - NBINS; + + cassert(config_stats); + + arena->stats.nmalloc_huge--; + arena->stats.allocated_huge -= usize; + arena->stats.hstats[index].nmalloc--; + arena->stats.hstats[index].curhchunks--; +} + +static void +arena_huge_dalloc_stats_update(arena_t *arena, size_t usize) +{ + szind_t index = size2index(usize) - nlclasses - NBINS; + + cassert(config_stats); + + arena->stats.ndalloc_huge++; + arena->stats.allocated_huge -= usize; + arena->stats.hstats[index].ndalloc++; + arena->stats.hstats[index].curhchunks--; +} + +static void +arena_huge_dalloc_stats_update_undo(arena_t *arena, size_t usize) +{ + szind_t index = size2index(usize) - nlclasses - NBINS; + + cassert(config_stats); + + arena->stats.ndalloc_huge--; + arena->stats.allocated_huge += usize; + arena->stats.hstats[index].ndalloc--; + arena->stats.hstats[index].curhchunks++; +} + +static void +arena_huge_ralloc_stats_update(arena_t *arena, size_t oldsize, size_t usize) +{ + + arena_huge_dalloc_stats_update(arena, oldsize); + arena_huge_malloc_stats_update(arena, usize); +} + +static void +arena_huge_ralloc_stats_update_undo(arena_t *arena, size_t oldsize, + size_t usize) +{ + + arena_huge_dalloc_stats_update_undo(arena, oldsize); + arena_huge_malloc_stats_update_undo(arena, usize); +} + +extent_node_t * +arena_node_alloc(arena_t *arena) +{ + extent_node_t *node; + + malloc_mutex_lock(&arena->node_cache_mtx); + node = ql_last(&arena->node_cache, ql_link); + if (node == NULL) { + malloc_mutex_unlock(&arena->node_cache_mtx); + return (base_alloc(sizeof(extent_node_t))); } + ql_tail_remove(&arena->node_cache, extent_node_t, ql_link); + malloc_mutex_unlock(&arena->node_cache_mtx); + return (node); +} - return (NULL); +void +arena_node_dalloc(arena_t *arena, extent_node_t *node) +{ + + malloc_mutex_lock(&arena->node_cache_mtx); + ql_elm_new(node, ql_link); + ql_tail_insert(&arena->node_cache, node, ql_link); + malloc_mutex_unlock(&arena->node_cache_mtx); +} + +static void * +arena_chunk_alloc_huge_hard(arena_t *arena, chunk_hooks_t *chunk_hooks, + size_t usize, size_t alignment, bool *zero, size_t csize) +{ + void *ret; + bool commit = true; + + ret = chunk_alloc_wrapper(arena, chunk_hooks, NULL, csize, alignment, + zero, &commit); + if (ret == NULL) { + /* Revert optimistic stats updates. */ + malloc_mutex_lock(&arena->lock); + if (config_stats) { + arena_huge_malloc_stats_update_undo(arena, usize); + arena->stats.mapped -= usize; + } + arena->nactive -= (usize >> LG_PAGE); + malloc_mutex_unlock(&arena->lock); + } + + return (ret); +} + +void * +arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment, + bool *zero) +{ + void *ret; + chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; + size_t csize = CHUNK_CEILING(usize); + + malloc_mutex_lock(&arena->lock); + + /* Optimistically update stats. */ + if (config_stats) { + arena_huge_malloc_stats_update(arena, usize); + arena->stats.mapped += usize; + } + arena->nactive += (usize >> LG_PAGE); + + ret = chunk_alloc_cache(arena, &chunk_hooks, NULL, csize, alignment, + zero, true); + malloc_mutex_unlock(&arena->lock); + if (ret == NULL) { + ret = arena_chunk_alloc_huge_hard(arena, &chunk_hooks, usize, + alignment, zero, csize); + } + + if (config_stats && ret != NULL) + stats_cactive_add(usize); + return (ret); +} + +void +arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize) +{ + chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; + size_t csize; + + csize = CHUNK_CEILING(usize); + malloc_mutex_lock(&arena->lock); + if (config_stats) { + arena_huge_dalloc_stats_update(arena, usize); + arena->stats.mapped -= usize; + stats_cactive_sub(usize); + } + arena->nactive -= (usize >> LG_PAGE); + + chunk_dalloc_cache(arena, &chunk_hooks, chunk, csize, true); + malloc_mutex_unlock(&arena->lock); +} + +void +arena_chunk_ralloc_huge_similar(arena_t *arena, void *chunk, size_t oldsize, + size_t usize) +{ + + assert(CHUNK_CEILING(oldsize) == CHUNK_CEILING(usize)); + assert(oldsize != usize); + + malloc_mutex_lock(&arena->lock); + if (config_stats) + arena_huge_ralloc_stats_update(arena, oldsize, usize); + if (oldsize < usize) { + size_t udiff = usize - oldsize; + arena->nactive += udiff >> LG_PAGE; + if (config_stats) + stats_cactive_add(udiff); + } else { + size_t udiff = oldsize - usize; + arena->nactive -= udiff >> LG_PAGE; + if (config_stats) + stats_cactive_sub(udiff); + } + malloc_mutex_unlock(&arena->lock); +} + +void +arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk, size_t oldsize, + size_t usize) +{ + size_t udiff = oldsize - usize; + size_t cdiff = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize); + + malloc_mutex_lock(&arena->lock); + if (config_stats) { + arena_huge_ralloc_stats_update(arena, oldsize, usize); + if (cdiff != 0) { + arena->stats.mapped -= cdiff; + stats_cactive_sub(udiff); + } + } + arena->nactive -= udiff >> LG_PAGE; + + if (cdiff != 0) { + chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; + void *nchunk = (void *)((uintptr_t)chunk + + CHUNK_CEILING(usize)); + + chunk_dalloc_cache(arena, &chunk_hooks, nchunk, cdiff, true); + } + malloc_mutex_unlock(&arena->lock); +} + +static bool +arena_chunk_ralloc_huge_expand_hard(arena_t *arena, chunk_hooks_t *chunk_hooks, + void *chunk, size_t oldsize, size_t usize, bool *zero, void *nchunk, + size_t udiff, size_t cdiff) +{ + bool err; + bool commit = true; + + err = (chunk_alloc_wrapper(arena, chunk_hooks, nchunk, cdiff, chunksize, + zero, &commit) == NULL); + if (err) { + /* Revert optimistic stats updates. */ + malloc_mutex_lock(&arena->lock); + if (config_stats) { + arena_huge_ralloc_stats_update_undo(arena, oldsize, + usize); + arena->stats.mapped -= cdiff; + } + arena->nactive -= (udiff >> LG_PAGE); + malloc_mutex_unlock(&arena->lock); + } else if (chunk_hooks->merge(chunk, CHUNK_CEILING(oldsize), nchunk, + cdiff, true, arena->ind)) { + chunk_dalloc_arena(arena, chunk_hooks, nchunk, cdiff, *zero, + true); + err = true; + } + return (err); +} + +bool +arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize, + size_t usize, bool *zero) +{ + bool err; + chunk_hooks_t chunk_hooks = chunk_hooks_get(arena); + void *nchunk = (void *)((uintptr_t)chunk + CHUNK_CEILING(oldsize)); + size_t udiff = usize - oldsize; + size_t cdiff = CHUNK_CEILING(usize) - CHUNK_CEILING(oldsize); + + malloc_mutex_lock(&arena->lock); + + /* Optimistically update stats. */ + if (config_stats) { + arena_huge_ralloc_stats_update(arena, oldsize, usize); + arena->stats.mapped += cdiff; + } + arena->nactive += (udiff >> LG_PAGE); + + err = (chunk_alloc_cache(arena, &arena->chunk_hooks, nchunk, cdiff, + chunksize, zero, true) == NULL); + malloc_mutex_unlock(&arena->lock); + if (err) { + err = arena_chunk_ralloc_huge_expand_hard(arena, &chunk_hooks, + chunk, oldsize, usize, zero, nchunk, udiff, + cdiff); + } else if (chunk_hooks.merge(chunk, CHUNK_CEILING(oldsize), nchunk, + cdiff, true, arena->ind)) { + chunk_dalloc_arena(arena, &chunk_hooks, nchunk, cdiff, *zero, + true); + err = true; + } + + if (config_stats && !err) + stats_cactive_add(udiff); + return (err); +} + +/* + * Do first-best-fit run selection, i.e. select the lowest run that best fits. + * Run sizes are quantized, so not all candidate runs are necessarily exactly + * the same size. + */ +static arena_run_t * +arena_run_first_best_fit(arena_t *arena, size_t size) +{ + size_t search_size = run_quantize_first(size); + arena_chunk_map_misc_t *key = arena_miscelm_key_create(search_size); + arena_chunk_map_misc_t *miscelm = + arena_avail_tree_nsearch(&arena->runs_avail, key); + if (miscelm == NULL) + return (NULL); + return (&miscelm->run); +} + +static arena_run_t * +arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero) +{ + arena_run_t *run = arena_run_first_best_fit(arena, s2u(size)); + if (run != NULL) { + if (arena_run_split_large(arena, run, size, zero)) + run = NULL; + } + return (run); } static arena_run_t * @@ -688,8 +1097,8 @@ arena_run_alloc_large(arena_t *arena, size_t size, bool zero) arena_chunk_t *chunk; arena_run_t *run; - assert(size <= arena_maxclass); - assert((size & PAGE_MASK) == 0); + assert(size <= arena_maxrun); + assert(size == PAGE_CEILING(size)); /* Search the arena's chunks for the lowest best fit. */ run = arena_run_alloc_large_helper(arena, size, zero); @@ -701,8 +1110,9 @@ arena_run_alloc_large(arena_t *arena, size_t size, bool zero) */ chunk = arena_chunk_alloc(arena); if (chunk != NULL) { - run = (arena_run_t *)((uintptr_t)chunk + (map_bias << LG_PAGE)); - arena_run_split_large(arena, run, size, zero); + run = &arena_miscelm_get(chunk, map_bias)->run; + if (arena_run_split_large(arena, run, size, zero)) + run = NULL; return (run); } @@ -715,36 +1125,24 @@ arena_run_alloc_large(arena_t *arena, size_t size, bool zero) } static arena_run_t * -arena_run_alloc_small_helper(arena_t *arena, size_t size, size_t binind) +arena_run_alloc_small_helper(arena_t *arena, size_t size, szind_t binind) { - arena_run_t *run; - arena_chunk_map_t *mapelm, key; - - key.bits = size | CHUNK_MAP_KEY; - mapelm = arena_avail_tree_nsearch(&arena->runs_avail, &key); - if (mapelm != NULL) { - arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); - size_t pageind = (((uintptr_t)mapelm - - (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) - + map_bias; - - run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << - LG_PAGE)); - arena_run_split_small(arena, run, size, binind); - return (run); + arena_run_t *run = arena_run_first_best_fit(arena, size); + if (run != NULL) { + if (arena_run_split_small(arena, run, size, binind)) + run = NULL; } - - return (NULL); + return (run); } static arena_run_t * -arena_run_alloc_small(arena_t *arena, size_t size, size_t binind) +arena_run_alloc_small(arena_t *arena, size_t size, szind_t binind) { arena_chunk_t *chunk; arena_run_t *run; - assert(size <= arena_maxclass); - assert((size & PAGE_MASK) == 0); + assert(size <= arena_maxrun); + assert(size == PAGE_CEILING(size)); assert(binind != BININD_INVALID); /* Search the arena's chunks for the lowest best fit. */ @@ -757,8 +1155,9 @@ arena_run_alloc_small(arena_t *arena, size_t size, size_t binind) */ chunk = arena_chunk_alloc(arena); if (chunk != NULL) { - run = (arena_run_t *)((uintptr_t)chunk + (map_bias << LG_PAGE)); - arena_run_split_small(arena, run, size, binind); + run = &arena_miscelm_get(chunk, map_bias)->run; + if (arena_run_split_small(arena, run, size, binind)) + run = NULL; return (run); } @@ -770,313 +1169,373 @@ arena_run_alloc_small(arena_t *arena, size_t size, size_t binind) return (arena_run_alloc_small_helper(arena, size, binind)); } -static inline void +static bool +arena_lg_dirty_mult_valid(ssize_t lg_dirty_mult) +{ + + return (lg_dirty_mult >= -1 && lg_dirty_mult < (ssize_t)(sizeof(size_t) + << 3)); +} + +ssize_t +arena_lg_dirty_mult_get(arena_t *arena) +{ + ssize_t lg_dirty_mult; + + malloc_mutex_lock(&arena->lock); + lg_dirty_mult = arena->lg_dirty_mult; + malloc_mutex_unlock(&arena->lock); + + return (lg_dirty_mult); +} + +bool +arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult) +{ + + if (!arena_lg_dirty_mult_valid(lg_dirty_mult)) + return (true); + + malloc_mutex_lock(&arena->lock); + arena->lg_dirty_mult = lg_dirty_mult; + arena_maybe_purge(arena); + malloc_mutex_unlock(&arena->lock); + + return (false); +} + +void arena_maybe_purge(arena_t *arena) { - size_t npurgeable, threshold; /* Don't purge if the option is disabled. */ - if (opt_lg_dirty_mult < 0) + if (arena->lg_dirty_mult < 0) return; - /* Don't purge if all dirty pages are already being purged. */ - if (arena->ndirty <= arena->npurgatory) + /* Don't recursively purge. */ + if (arena->purging) return; - npurgeable = arena->ndirty - arena->npurgatory; - threshold = (arena->nactive >> opt_lg_dirty_mult); /* - * Don't purge unless the number of purgeable pages exceeds the - * threshold. + * Iterate, since preventing recursive purging could otherwise leave too + * many dirty pages. */ - if (npurgeable <= threshold) - return; - - arena_purge(arena, false); + while (true) { + size_t threshold = (arena->nactive >> arena->lg_dirty_mult); + if (threshold < chunk_npages) + threshold = chunk_npages; + /* + * Don't purge unless the number of purgeable pages exceeds the + * threshold. + */ + if (arena->ndirty <= threshold) + return; + arena_purge(arena, false); + } } -static arena_chunk_t * -chunks_dirty_iter_cb(arena_chunk_tree_t *tree, arena_chunk_t *chunk, void *arg) +static size_t +arena_dirty_count(arena_t *arena) { - size_t *ndirty = (size_t *)arg; + size_t ndirty = 0; + arena_runs_dirty_link_t *rdelm; + extent_node_t *chunkselm; - assert(chunk->ndirty != 0); - *ndirty += chunk->ndirty; - return (NULL); + for (rdelm = qr_next(&arena->runs_dirty, rd_link), + chunkselm = qr_next(&arena->chunks_cache, cc_link); + rdelm != &arena->runs_dirty; rdelm = qr_next(rdelm, rd_link)) { + size_t npages; + + if (rdelm == &chunkselm->rd) { + npages = extent_node_size_get(chunkselm) >> LG_PAGE; + chunkselm = qr_next(chunkselm, cc_link); + } else { + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( + rdelm); + arena_chunk_map_misc_t *miscelm = + arena_rd_to_miscelm(rdelm); + size_t pageind = arena_miscelm_to_pageind(miscelm); + assert(arena_mapbits_allocated_get(chunk, pageind) == + 0); + assert(arena_mapbits_large_get(chunk, pageind) == 0); + assert(arena_mapbits_dirty_get(chunk, pageind) != 0); + npages = arena_mapbits_unallocated_size_get(chunk, + pageind) >> LG_PAGE; + } + ndirty += npages; + } + + return (ndirty); } static size_t -arena_compute_npurgatory(arena_t *arena, bool all) +arena_compute_npurge(arena_t *arena, bool all) { - size_t npurgatory, npurgeable; + size_t npurge; /* * Compute the minimum number of pages that this thread should try to * purge. */ - npurgeable = arena->ndirty - arena->npurgatory; - - if (all == false) { - size_t threshold = (arena->nactive >> opt_lg_dirty_mult); + if (!all) { + size_t threshold = (arena->nactive >> arena->lg_dirty_mult); + threshold = threshold < chunk_npages ? chunk_npages : threshold; - npurgatory = npurgeable - threshold; + npurge = arena->ndirty - threshold; } else - npurgatory = npurgeable; + npurge = arena->ndirty; - return (npurgatory); + return (npurge); } -static void -arena_chunk_stash_dirty(arena_t *arena, arena_chunk_t *chunk, bool all, - arena_chunk_mapelms_t *mapelms) -{ - size_t pageind, npages; - - /* - * Temporarily allocate free dirty runs within chunk. If all is false, - * only operate on dirty runs that are fragments; otherwise operate on - * all dirty runs. - */ - for (pageind = map_bias; pageind < chunk_npages; pageind += npages) { - arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind); - if (arena_mapbits_allocated_get(chunk, pageind) == 0) { +static size_t +arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks, bool all, + size_t npurge, arena_runs_dirty_link_t *purge_runs_sentinel, + extent_node_t *purge_chunks_sentinel) +{ + arena_runs_dirty_link_t *rdelm, *rdelm_next; + extent_node_t *chunkselm; + size_t nstashed = 0; + + /* Stash at least npurge pages. */ + for (rdelm = qr_next(&arena->runs_dirty, rd_link), + chunkselm = qr_next(&arena->chunks_cache, cc_link); + rdelm != &arena->runs_dirty; rdelm = rdelm_next) { + size_t npages; + rdelm_next = qr_next(rdelm, rd_link); + + if (rdelm == &chunkselm->rd) { + extent_node_t *chunkselm_next; + bool zero; + UNUSED void *chunk; + + chunkselm_next = qr_next(chunkselm, cc_link); + /* + * Allocate. chunkselm remains valid due to the + * dalloc_node=false argument to chunk_alloc_cache(). + */ + zero = false; + chunk = chunk_alloc_cache(arena, chunk_hooks, + extent_node_addr_get(chunkselm), + extent_node_size_get(chunkselm), chunksize, &zero, + false); + assert(chunk == extent_node_addr_get(chunkselm)); + assert(zero == extent_node_zeroed_get(chunkselm)); + extent_node_dirty_insert(chunkselm, purge_runs_sentinel, + purge_chunks_sentinel); + npages = extent_node_size_get(chunkselm) >> LG_PAGE; + chunkselm = chunkselm_next; + } else { + arena_chunk_t *chunk = + (arena_chunk_t *)CHUNK_ADDR2BASE(rdelm); + arena_chunk_map_misc_t *miscelm = + arena_rd_to_miscelm(rdelm); + size_t pageind = arena_miscelm_to_pageind(miscelm); + arena_run_t *run = &miscelm->run; size_t run_size = arena_mapbits_unallocated_size_get(chunk, pageind); npages = run_size >> LG_PAGE; + assert(pageind + npages <= chunk_npages); assert(arena_mapbits_dirty_get(chunk, pageind) == arena_mapbits_dirty_get(chunk, pageind+npages-1)); - if (arena_mapbits_dirty_get(chunk, pageind) != 0 && - (all || arena_avail_adjac(chunk, pageind, - npages))) { - arena_run_t *run = (arena_run_t *)((uintptr_t) - chunk + (uintptr_t)(pageind << LG_PAGE)); - - arena_run_split_large(arena, run, run_size, - false); - /* Append to list for later processing. */ - ql_elm_new(mapelm, u.ql_link); - ql_tail_insert(mapelms, mapelm, u.ql_link); - } - } else { - /* Skip run. */ - if (arena_mapbits_large_get(chunk, pageind) != 0) { - npages = arena_mapbits_large_size_get(chunk, - pageind) >> LG_PAGE; - } else { - size_t binind; - arena_bin_info_t *bin_info; - arena_run_t *run = (arena_run_t *)((uintptr_t) - chunk + (uintptr_t)(pageind << LG_PAGE)); - - assert(arena_mapbits_small_runind_get(chunk, - pageind) == 0); - binind = arena_bin_index(arena, run->bin); - bin_info = &arena_bin_info[binind]; - npages = bin_info->run_size >> LG_PAGE; + /* + * If purging the spare chunk's run, make it available + * prior to allocation. + */ + if (chunk == arena->spare) + arena_chunk_alloc(arena); + + /* Temporarily allocate the free dirty run. */ + arena_run_split_large(arena, run, run_size, false); + /* Stash. */ + if (false) + qr_new(rdelm, rd_link); /* Redundant. */ + else { + assert(qr_next(rdelm, rd_link) == rdelm); + assert(qr_prev(rdelm, rd_link) == rdelm); } + qr_meld(purge_runs_sentinel, rdelm, rd_link); } + + nstashed += npages; + if (!all && nstashed >= npurge) + break; } - assert(pageind == chunk_npages); - assert(chunk->ndirty == 0 || all == false); - assert(chunk->nruns_adjac == 0); + + return (nstashed); } static size_t -arena_chunk_purge_stashed(arena_t *arena, arena_chunk_t *chunk, - arena_chunk_mapelms_t *mapelms) +arena_purge_stashed(arena_t *arena, chunk_hooks_t *chunk_hooks, + arena_runs_dirty_link_t *purge_runs_sentinel, + extent_node_t *purge_chunks_sentinel) { - size_t npurged, pageind, npages, nmadvise; - arena_chunk_map_t *mapelm; + size_t npurged, nmadvise; + arena_runs_dirty_link_t *rdelm; + extent_node_t *chunkselm; - malloc_mutex_unlock(&arena->lock); if (config_stats) nmadvise = 0; npurged = 0; - ql_foreach(mapelm, mapelms, u.ql_link) { - bool unzeroed; - size_t flag_unzeroed, i; - - pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t)) + map_bias; - npages = arena_mapbits_large_size_get(chunk, pageind) >> - LG_PAGE; - assert(pageind + npages <= chunk_npages); - unzeroed = pages_purge((void *)((uintptr_t)chunk + (pageind << - LG_PAGE)), (npages << LG_PAGE)); - flag_unzeroed = unzeroed ? CHUNK_MAP_UNZEROED : 0; - /* - * Set the unzeroed flag for all pages, now that pages_purge() - * has returned whether the pages were zeroed as a side effect - * of purging. This chunk map modification is safe even though - * the arena mutex isn't currently owned by this thread, - * because the run is marked as allocated, thus protecting it - * from being modified by any other thread. As long as these - * writes don't perturb the first and last elements' - * CHUNK_MAP_ALLOCATED bits, behavior is well defined. - */ - for (i = 0; i < npages; i++) { - arena_mapbits_unzeroed_set(chunk, pageind+i, - flag_unzeroed); + + malloc_mutex_unlock(&arena->lock); + for (rdelm = qr_next(purge_runs_sentinel, rd_link), + chunkselm = qr_next(purge_chunks_sentinel, cc_link); + rdelm != purge_runs_sentinel; rdelm = qr_next(rdelm, rd_link)) { + size_t npages; + + if (rdelm == &chunkselm->rd) { + /* + * Don't actually purge the chunk here because 1) + * chunkselm is embedded in the chunk and must remain + * valid, and 2) we deallocate the chunk in + * arena_unstash_purged(), where it is destroyed, + * decommitted, or purged, depending on chunk + * deallocation policy. + */ + size_t size = extent_node_size_get(chunkselm); + npages = size >> LG_PAGE; + chunkselm = qr_next(chunkselm, cc_link); + } else { + size_t pageind, run_size, flag_unzeroed, flags, i; + bool decommitted; + arena_chunk_t *chunk = + (arena_chunk_t *)CHUNK_ADDR2BASE(rdelm); + arena_chunk_map_misc_t *miscelm = + arena_rd_to_miscelm(rdelm); + pageind = arena_miscelm_to_pageind(miscelm); + run_size = arena_mapbits_large_size_get(chunk, pageind); + npages = run_size >> LG_PAGE; + + assert(pageind + npages <= chunk_npages); + assert(!arena_mapbits_decommitted_get(chunk, pageind)); + assert(!arena_mapbits_decommitted_get(chunk, + pageind+npages-1)); + decommitted = !chunk_hooks->decommit(chunk, chunksize, + pageind << LG_PAGE, npages << LG_PAGE, arena->ind); + if (decommitted) { + flag_unzeroed = 0; + flags = CHUNK_MAP_DECOMMITTED; + } else { + flag_unzeroed = chunk_purge_wrapper(arena, + chunk_hooks, chunk, chunksize, pageind << + LG_PAGE, run_size) ? CHUNK_MAP_UNZEROED : 0; + flags = flag_unzeroed; + } + arena_mapbits_large_set(chunk, pageind+npages-1, 0, + flags); + arena_mapbits_large_set(chunk, pageind, run_size, + flags); + + /* + * Set the unzeroed flag for internal pages, now that + * chunk_purge_wrapper() has returned whether the pages + * were zeroed as a side effect of purging. This chunk + * map modification is safe even though the arena mutex + * isn't currently owned by this thread, because the run + * is marked as allocated, thus protecting it from being + * modified by any other thread. As long as these + * writes don't perturb the first and last elements' + * CHUNK_MAP_ALLOCATED bits, behavior is well defined. + */ + for (i = 1; i < npages-1; i++) { + arena_mapbits_internal_set(chunk, pageind+i, + flag_unzeroed); + } } + npurged += npages; if (config_stats) nmadvise++; } malloc_mutex_lock(&arena->lock); - if (config_stats) + + if (config_stats) { arena->stats.nmadvise += nmadvise; + arena->stats.purged += npurged; + } return (npurged); } static void -arena_chunk_unstash_purged(arena_t *arena, arena_chunk_t *chunk, - arena_chunk_mapelms_t *mapelms) -{ - arena_chunk_map_t *mapelm; - size_t pageind; - - /* Deallocate runs. */ - for (mapelm = ql_first(mapelms); mapelm != NULL; - mapelm = ql_first(mapelms)) { - arena_run_t *run; - - pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t)) + map_bias; - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)(pageind << - LG_PAGE)); - ql_remove(mapelms, mapelm, u.ql_link); - arena_run_dalloc(arena, run, false, true); +arena_unstash_purged(arena_t *arena, chunk_hooks_t *chunk_hooks, + arena_runs_dirty_link_t *purge_runs_sentinel, + extent_node_t *purge_chunks_sentinel) +{ + arena_runs_dirty_link_t *rdelm, *rdelm_next; + extent_node_t *chunkselm; + + /* Deallocate chunks/runs. */ + for (rdelm = qr_next(purge_runs_sentinel, rd_link), + chunkselm = qr_next(purge_chunks_sentinel, cc_link); + rdelm != purge_runs_sentinel; rdelm = rdelm_next) { + rdelm_next = qr_next(rdelm, rd_link); + if (rdelm == &chunkselm->rd) { + extent_node_t *chunkselm_next = qr_next(chunkselm, + cc_link); + void *addr = extent_node_addr_get(chunkselm); + size_t size = extent_node_size_get(chunkselm); + bool zeroed = extent_node_zeroed_get(chunkselm); + bool committed = extent_node_committed_get(chunkselm); + extent_node_dirty_remove(chunkselm); + arena_node_dalloc(arena, chunkselm); + chunkselm = chunkselm_next; + chunk_dalloc_arena(arena, chunk_hooks, addr, size, + zeroed, committed); + } else { + arena_chunk_t *chunk = + (arena_chunk_t *)CHUNK_ADDR2BASE(rdelm); + arena_chunk_map_misc_t *miscelm = + arena_rd_to_miscelm(rdelm); + size_t pageind = arena_miscelm_to_pageind(miscelm); + bool decommitted = (arena_mapbits_decommitted_get(chunk, + pageind) != 0); + arena_run_t *run = &miscelm->run; + qr_remove(rdelm, rd_link); + arena_run_dalloc(arena, run, false, true, decommitted); + } } } -static inline size_t -arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk, bool all) +static void +arena_purge(arena_t *arena, bool all) { - size_t npurged; - arena_chunk_mapelms_t mapelms; + chunk_hooks_t chunk_hooks = chunk_hooks_get(arena); + size_t npurge, npurgeable, npurged; + arena_runs_dirty_link_t purge_runs_sentinel; + extent_node_t purge_chunks_sentinel; - ql_new(&mapelms); - - /* - * If chunk is the spare, temporarily re-allocate it, 1) so that its - * run is reinserted into runs_avail, and 2) so that it cannot be - * completely discarded by another thread while arena->lock is dropped - * by this thread. Note that the arena_run_dalloc() call will - * implicitly deallocate the chunk, so no explicit action is required - * in this function to deallocate the chunk. - * - * Note that once a chunk contains dirty pages, it cannot again contain - * a single run unless 1) it is a dirty run, or 2) this function purges - * dirty pages and causes the transition to a single clean run. Thus - * (chunk == arena->spare) is possible, but it is not possible for - * this function to be called on the spare unless it contains a dirty - * run. - */ - if (chunk == arena->spare) { - assert(arena_mapbits_dirty_get(chunk, map_bias) != 0); - assert(arena_mapbits_dirty_get(chunk, chunk_npages-1) != 0); - - arena_chunk_alloc(arena); - } - - if (config_stats) - arena->stats.purged += chunk->ndirty; + arena->purging = true; /* - * Operate on all dirty runs if there is no clean/dirty run - * fragmentation. + * Calls to arena_dirty_count() are disabled even for debug builds + * because overhead grows nonlinearly as memory usage increases. */ - if (chunk->nruns_adjac == 0) - all = true; - - arena_chunk_stash_dirty(arena, chunk, all, &mapelms); - npurged = arena_chunk_purge_stashed(arena, chunk, &mapelms); - arena_chunk_unstash_purged(arena, chunk, &mapelms); - - return (npurged); -} - -static void -arena_purge(arena_t *arena, bool all) -{ - arena_chunk_t *chunk; - size_t npurgatory; - if (config_debug) { - size_t ndirty = 0; - - arena_chunk_dirty_iter(&arena->chunks_dirty, NULL, - chunks_dirty_iter_cb, (void *)&ndirty); + if (false && config_debug) { + size_t ndirty = arena_dirty_count(arena); assert(ndirty == arena->ndirty); } - assert(arena->ndirty > arena->npurgatory || all); - assert((arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty - - arena->npurgatory) || all); + assert((arena->nactive >> arena->lg_dirty_mult) < arena->ndirty || all); if (config_stats) arena->stats.npurge++; - /* - * Add the minimum number of pages this thread should try to purge to - * arena->npurgatory. This will keep multiple threads from racing to - * reduce ndirty below the threshold. - */ - npurgatory = arena_compute_npurgatory(arena, all); - arena->npurgatory += npurgatory; - - while (npurgatory > 0) { - size_t npurgeable, npurged, nunpurged; - - /* Get next chunk with dirty pages. */ - chunk = arena_chunk_dirty_first(&arena->chunks_dirty); - if (chunk == NULL) { - /* - * This thread was unable to purge as many pages as - * originally intended, due to races with other threads - * that either did some of the purging work, or re-used - * dirty pages. - */ - arena->npurgatory -= npurgatory; - return; - } - npurgeable = chunk->ndirty; - assert(npurgeable != 0); + npurge = arena_compute_npurge(arena, all); + qr_new(&purge_runs_sentinel, rd_link); + extent_node_dirty_linkage_init(&purge_chunks_sentinel); - if (npurgeable > npurgatory && chunk->nruns_adjac == 0) { - /* - * This thread will purge all the dirty pages in chunk, - * so set npurgatory to reflect this thread's intent to - * purge the pages. This tends to reduce the chances - * of the following scenario: - * - * 1) This thread sets arena->npurgatory such that - * (arena->ndirty - arena->npurgatory) is at the - * threshold. - * 2) This thread drops arena->lock. - * 3) Another thread causes one or more pages to be - * dirtied, and immediately determines that it must - * purge dirty pages. - * - * If this scenario *does* play out, that's okay, - * because all of the purging work being done really - * needs to happen. - */ - arena->npurgatory += npurgeable - npurgatory; - npurgatory = npurgeable; - } + npurgeable = arena_stash_dirty(arena, &chunk_hooks, all, npurge, + &purge_runs_sentinel, &purge_chunks_sentinel); + assert(npurgeable >= npurge); + npurged = arena_purge_stashed(arena, &chunk_hooks, &purge_runs_sentinel, + &purge_chunks_sentinel); + assert(npurged == npurgeable); + arena_unstash_purged(arena, &chunk_hooks, &purge_runs_sentinel, + &purge_chunks_sentinel); - /* - * Keep track of how many pages are purgeable, versus how many - * actually get purged, and adjust counters accordingly. - */ - arena->npurgatory -= npurgeable; - npurgatory -= npurgeable; - npurged = arena_chunk_purge(arena, chunk, all); - nunpurged = npurgeable - npurged; - arena->npurgatory += nunpurged; - npurgatory += nunpurged; - } + arena->purging = false; } void @@ -1090,7 +1549,8 @@ arena_purge_all(arena_t *arena) static void arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size, - size_t *p_run_ind, size_t *p_run_pages, size_t flag_dirty) + size_t *p_run_ind, size_t *p_run_pages, size_t flag_dirty, + size_t flag_decommitted) { size_t size = *p_size; size_t run_ind = *p_run_ind; @@ -1099,7 +1559,9 @@ arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size, /* Try to coalesce forward. */ if (run_ind + run_pages < chunk_npages && arena_mapbits_allocated_get(chunk, run_ind+run_pages) == 0 && - arena_mapbits_dirty_get(chunk, run_ind+run_pages) == flag_dirty) { + arena_mapbits_dirty_get(chunk, run_ind+run_pages) == flag_dirty && + arena_mapbits_decommitted_get(chunk, run_ind+run_pages) == + flag_decommitted) { size_t nrun_size = arena_mapbits_unallocated_size_get(chunk, run_ind+run_pages); size_t nrun_pages = nrun_size >> LG_PAGE; @@ -1112,8 +1574,18 @@ arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size, run_ind+run_pages+nrun_pages-1) == nrun_size); assert(arena_mapbits_dirty_get(chunk, run_ind+run_pages+nrun_pages-1) == flag_dirty); - arena_avail_remove(arena, chunk, run_ind+run_pages, nrun_pages, - false, true); + assert(arena_mapbits_decommitted_get(chunk, + run_ind+run_pages+nrun_pages-1) == flag_decommitted); + arena_avail_remove(arena, chunk, run_ind+run_pages, nrun_pages); + + /* + * If the successor is dirty, remove it from the set of dirty + * pages. + */ + if (flag_dirty != 0) { + arena_run_dirty_remove(arena, chunk, run_ind+run_pages, + nrun_pages); + } size += nrun_size; run_pages += nrun_pages; @@ -1126,7 +1598,8 @@ arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size, /* Try to coalesce backward. */ if (run_ind > map_bias && arena_mapbits_allocated_get(chunk, run_ind-1) == 0 && arena_mapbits_dirty_get(chunk, run_ind-1) == - flag_dirty) { + flag_dirty && arena_mapbits_decommitted_get(chunk, run_ind-1) == + flag_decommitted) { size_t prun_size = arena_mapbits_unallocated_size_get(chunk, run_ind-1); size_t prun_pages = prun_size >> LG_PAGE; @@ -1140,8 +1613,18 @@ arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size, assert(arena_mapbits_unallocated_size_get(chunk, run_ind) == prun_size); assert(arena_mapbits_dirty_get(chunk, run_ind) == flag_dirty); - arena_avail_remove(arena, chunk, run_ind, prun_pages, true, - false); + assert(arena_mapbits_decommitted_get(chunk, run_ind) == + flag_decommitted); + arena_avail_remove(arena, chunk, run_ind, prun_pages); + + /* + * If the predecessor is dirty, remove it from the set of dirty + * pages. + */ + if (flag_dirty != 0) { + arena_run_dirty_remove(arena, chunk, run_ind, + prun_pages); + } size += prun_size; run_pages += prun_pages; @@ -1156,26 +1639,53 @@ arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size, *p_run_pages = run_pages; } -static void -arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) +static size_t +arena_run_size_get(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, + size_t run_ind) { - arena_chunk_t *chunk; - size_t size, run_ind, run_pages, flag_dirty; + size_t size; - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); assert(run_ind >= map_bias); assert(run_ind < chunk_npages); + if (arena_mapbits_large_get(chunk, run_ind) != 0) { size = arena_mapbits_large_size_get(chunk, run_ind); - assert(size == PAGE || - arena_mapbits_large_size_get(chunk, + assert(size == PAGE || arena_mapbits_large_size_get(chunk, run_ind+(size>>LG_PAGE)-1) == 0); } else { - size_t binind = arena_bin_index(arena, run->bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; + arena_bin_info_t *bin_info = &arena_bin_info[run->binind]; size = bin_info->run_size; } + + return (size); +} + +static bool +arena_run_decommit(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run) +{ + arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run); + size_t run_ind = arena_miscelm_to_pageind(miscelm); + size_t offset = run_ind << LG_PAGE; + size_t length = arena_run_size_get(arena, chunk, run, run_ind); + + return (arena->chunk_hooks.decommit(chunk, chunksize, offset, length, + arena->ind)); +} + +static void +arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned, + bool decommitted) +{ + arena_chunk_t *chunk; + arena_chunk_map_misc_t *miscelm; + size_t size, run_ind, run_pages, flag_dirty, flag_decommitted; + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + miscelm = arena_run_to_miscelm(run); + run_ind = arena_miscelm_to_pageind(miscelm); + assert(run_ind >= map_bias); + assert(run_ind < chunk_npages); + size = arena_run_size_get(arena, chunk, run, run_ind); run_pages = (size >> LG_PAGE); arena_cactive_update(arena, 0, run_pages); arena->nactive -= run_pages; @@ -1187,16 +1697,18 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) */ assert(arena_mapbits_dirty_get(chunk, run_ind) == arena_mapbits_dirty_get(chunk, run_ind+run_pages-1)); - if (cleaned == false && arena_mapbits_dirty_get(chunk, run_ind) != 0) + if (!cleaned && !decommitted && arena_mapbits_dirty_get(chunk, run_ind) + != 0) dirty = true; flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0; + flag_decommitted = decommitted ? CHUNK_MAP_DECOMMITTED : 0; /* Mark pages as unallocated in the chunk map. */ - if (dirty) { - arena_mapbits_unallocated_set(chunk, run_ind, size, - CHUNK_MAP_DIRTY); + if (dirty || decommitted) { + size_t flags = flag_dirty | flag_decommitted; + arena_mapbits_unallocated_set(chunk, run_ind, size, flags); arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size, - CHUNK_MAP_DIRTY); + flags); } else { arena_mapbits_unallocated_set(chunk, run_ind, size, arena_mapbits_unzeroed_get(chunk, run_ind)); @@ -1205,20 +1717,25 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) } arena_run_coalesce(arena, chunk, &size, &run_ind, &run_pages, - flag_dirty); + flag_dirty, flag_decommitted); /* Insert into runs_avail, now that coalescing is complete. */ assert(arena_mapbits_unallocated_size_get(chunk, run_ind) == arena_mapbits_unallocated_size_get(chunk, run_ind+run_pages-1)); assert(arena_mapbits_dirty_get(chunk, run_ind) == arena_mapbits_dirty_get(chunk, run_ind+run_pages-1)); - arena_avail_insert(arena, chunk, run_ind, run_pages, true, true); + assert(arena_mapbits_decommitted_get(chunk, run_ind) == + arena_mapbits_decommitted_get(chunk, run_ind+run_pages-1)); + arena_avail_insert(arena, chunk, run_ind, run_pages); + + if (dirty) + arena_run_dirty_insert(arena, chunk, run_ind, run_pages); /* Deallocate chunk if it is now completely unused. */ - if (size == arena_maxclass) { + if (size == arena_maxrun) { assert(run_ind == map_bias); - assert(run_pages == (arena_maxclass >> LG_PAGE)); - arena_chunk_dealloc(arena, chunk); + assert(run_pages == (arena_maxrun >> LG_PAGE)); + arena_chunk_dalloc(arena, chunk); } /* @@ -1233,12 +1750,25 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) } static void +arena_run_dalloc_decommit(arena_t *arena, arena_chunk_t *chunk, + arena_run_t *run) +{ + bool committed = arena_run_decommit(arena, chunk, run); + + arena_run_dalloc(arena, run, committed, false, !committed); +} + +static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, size_t oldsize, size_t newsize) { - size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; + arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run); + size_t pageind = arena_miscelm_to_pageind(miscelm); size_t head_npages = (oldsize - newsize) >> LG_PAGE; size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind); + size_t flag_decommitted = arena_mapbits_decommitted_get(chunk, pageind); + size_t flag_unzeroed_mask = (flag_dirty | flag_decommitted) == 0 ? + CHUNK_MAP_UNZEROED : 0; assert(oldsize > newsize); @@ -1248,8 +1778,11 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, * run first, in case of single-page runs. */ assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize); - arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty); - arena_mapbits_large_set(chunk, pageind, oldsize-newsize, flag_dirty); + arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty | + (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk, + pageind+head_npages-1))); + arena_mapbits_large_set(chunk, pageind, oldsize-newsize, flag_dirty | + (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk, pageind))); if (config_debug) { UNUSED size_t tail_npages = newsize >> LG_PAGE; @@ -1259,18 +1792,25 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, pageind+head_npages+tail_npages-1) == flag_dirty); } arena_mapbits_large_set(chunk, pageind+head_npages, newsize, - flag_dirty); + flag_dirty | (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk, + pageind+head_npages))); - arena_run_dalloc(arena, run, false, false); + arena_run_dalloc(arena, run, false, false, (flag_decommitted != 0)); } static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, size_t oldsize, size_t newsize, bool dirty) { - size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; + arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run); + size_t pageind = arena_miscelm_to_pageind(miscelm); size_t head_npages = newsize >> LG_PAGE; size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind); + size_t flag_decommitted = arena_mapbits_decommitted_get(chunk, pageind); + size_t flag_unzeroed_mask = (flag_dirty | flag_decommitted) == 0 ? + CHUNK_MAP_UNZEROED : 0; + arena_chunk_map_misc_t *tail_miscelm; + arena_run_t *tail_run; assert(oldsize > newsize); @@ -1280,8 +1820,11 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, * run first, in case of single-page runs. */ assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize); - arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty); - arena_mapbits_large_set(chunk, pageind, newsize, flag_dirty); + arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty | + (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk, + pageind+head_npages-1))); + arena_mapbits_large_set(chunk, pageind, newsize, flag_dirty | + (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk, pageind))); if (config_debug) { UNUSED size_t tail_npages = (oldsize - newsize) >> LG_PAGE; @@ -1291,29 +1834,21 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, pageind+head_npages+tail_npages-1) == flag_dirty); } arena_mapbits_large_set(chunk, pageind+head_npages, oldsize-newsize, - flag_dirty); + flag_dirty | (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk, + pageind+head_npages))); - arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize), - dirty, false); + tail_miscelm = arena_miscelm_get(chunk, pageind + head_npages); + tail_run = &tail_miscelm->run; + arena_run_dalloc(arena, tail_run, dirty, false, (flag_decommitted != + 0)); } static arena_run_t * arena_bin_runs_first(arena_bin_t *bin) { - arena_chunk_map_t *mapelm = arena_run_tree_first(&bin->runs); - if (mapelm != NULL) { - arena_chunk_t *chunk; - size_t pageind; - arena_run_t *run; - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); - pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t))) + map_bias; - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - arena_mapbits_small_runind_get(chunk, pageind)) << - LG_PAGE)); - return (run); - } + arena_chunk_map_misc_t *miscelm = arena_run_tree_first(&bin->runs); + if (miscelm != NULL) + return (&miscelm->run); return (NULL); } @@ -1321,25 +1856,21 @@ arena_bin_runs_first(arena_bin_t *bin) static void arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run) { - arena_chunk_t *chunk = CHUNK_ADDR2BASE(run); - size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; - arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind); + arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run); - assert(arena_run_tree_search(&bin->runs, mapelm) == NULL); + assert(arena_run_tree_search(&bin->runs, miscelm) == NULL); - arena_run_tree_insert(&bin->runs, mapelm); + arena_run_tree_insert(&bin->runs, miscelm); } static void arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run) { - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; - arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind); + arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run); - assert(arena_run_tree_search(&bin->runs, mapelm) != NULL); + assert(arena_run_tree_search(&bin->runs, miscelm) != NULL); - arena_run_tree_remove(&bin->runs, mapelm); + arena_run_tree_remove(&bin->runs, miscelm); } static arena_run_t * @@ -1358,7 +1889,7 @@ static arena_run_t * arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) { arena_run_t *run; - size_t binind; + szind_t binind; arena_bin_info_t *bin_info; /* Look for a usable run. */ @@ -1376,14 +1907,10 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) malloc_mutex_lock(&arena->lock); run = arena_run_alloc_small(arena, bin_info->run_size, binind); if (run != NULL) { - bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + - (uintptr_t)bin_info->bitmap_offset); - /* Initialize run internals. */ - run->bin = bin; - run->nextind = 0; + run->binind = binind; run->nfree = bin_info->nregs; - bitmap_init(bitmap, &bin_info->bitmap_info); + bitmap_init(run->bitmap, &bin_info->bitmap_info); } malloc_mutex_unlock(&arena->lock); /********************************/ @@ -1412,8 +1939,7 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) static void * arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) { - void *ret; - size_t binind; + szind_t binind; arena_bin_info_t *bin_info; arena_run_t *run; @@ -1426,6 +1952,7 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) * Another thread updated runcur while this one ran without the * bin lock in arena_bin_nonfull_run_get(). */ + void *ret; assert(bin->runcur->nfree > 0); ret = arena_run_reg_alloc(bin->runcur, bin_info); if (run != NULL) { @@ -1459,13 +1986,11 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) } void -arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, +arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) { unsigned i, nfill; arena_bin_t *bin; - arena_run_t *run; - void *ptr; assert(tbin->ncached == 0); @@ -1475,13 +2000,26 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, malloc_mutex_lock(&bin->lock); for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >> tbin->lg_fill_div); i < nfill; i++) { + arena_run_t *run; + void *ptr; if ((run = bin->runcur) != NULL && run->nfree > 0) ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]); else ptr = arena_bin_malloc_hard(arena, bin); - if (ptr == NULL) + if (ptr == NULL) { + /* + * OOM. tbin->avail isn't yet filled down to its first + * element, so the successful allocations (if any) must + * be moved to the base of tbin->avail before bailing + * out. + */ + if (i > 0) { + memmove(tbin->avail, &tbin->avail[nfill - i], + i * sizeof(void *)); + } break; - if (config_fill && opt_junk) { + } + if (config_fill && unlikely(opt_junk_alloc)) { arena_alloc_junk_small(ptr, &arena_bin_info[binind], true); } @@ -1489,9 +2027,9 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, tbin->avail[nfill - 1 - i] = ptr; } if (config_stats) { - bin->stats.allocated += i * arena_bin_info[binind].reg_size; bin->stats.nmalloc += i; bin->stats.nrequests += tbin->tstats.nrequests; + bin->stats.curregs += i; bin->stats.nfills++; tbin->tstats.nrequests = 0; } @@ -1538,29 +2076,35 @@ arena_redzone_corruption_t *arena_redzone_corruption = static void arena_redzones_validate(void *ptr, arena_bin_info_t *bin_info, bool reset) { - size_t size = bin_info->reg_size; - size_t redzone_size = bin_info->redzone_size; - size_t i; bool error = false; - for (i = 1; i <= redzone_size; i++) { - uint8_t *byte = (uint8_t *)((uintptr_t)ptr - i); - if (*byte != 0xa5) { - error = true; - arena_redzone_corruption(ptr, size, false, i, *byte); - if (reset) - *byte = 0xa5; + if (opt_junk_alloc) { + size_t size = bin_info->reg_size; + size_t redzone_size = bin_info->redzone_size; + size_t i; + + for (i = 1; i <= redzone_size; i++) { + uint8_t *byte = (uint8_t *)((uintptr_t)ptr - i); + if (*byte != 0xa5) { + error = true; + arena_redzone_corruption(ptr, size, false, i, + *byte); + if (reset) + *byte = 0xa5; + } } - } - for (i = 0; i < redzone_size; i++) { - uint8_t *byte = (uint8_t *)((uintptr_t)ptr + size + i); - if (*byte != 0xa5) { - error = true; - arena_redzone_corruption(ptr, size, true, i, *byte); - if (reset) - *byte = 0xa5; + for (i = 0; i < redzone_size; i++) { + uint8_t *byte = (uint8_t *)((uintptr_t)ptr + size + i); + if (*byte != 0xa5) { + error = true; + arena_redzone_corruption(ptr, size, true, i, + *byte); + if (reset) + *byte = 0xa5; + } } } + if (opt_abort && error) abort(); } @@ -1588,14 +2132,14 @@ arena_dalloc_junk_small_t *arena_dalloc_junk_small = void arena_quarantine_junk_small(void *ptr, size_t usize) { - size_t binind; + szind_t binind; arena_bin_info_t *bin_info; cassert(config_fill); - assert(opt_junk); + assert(opt_junk_free); assert(opt_quarantine); assert(usize <= SMALL_MAXCLASS); - binind = SMALL_SIZE2BIN(usize); + binind = size2index(usize); bin_info = &arena_bin_info[binind]; arena_redzones_validate(ptr, bin_info, true); } @@ -1606,12 +2150,12 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) void *ret; arena_bin_t *bin; arena_run_t *run; - size_t binind; + szind_t binind; - binind = SMALL_SIZE2BIN(size); + binind = size2index(size); assert(binind < NBINS); bin = &arena->bins[binind]; - size = arena_bin_info[binind].reg_size; + size = index2size(binind); malloc_mutex_lock(&bin->lock); if ((run = bin->runcur) != NULL && run->nfree > 0) @@ -1625,29 +2169,29 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) } if (config_stats) { - bin->stats.allocated += size; bin->stats.nmalloc++; bin->stats.nrequests++; + bin->stats.curregs++; } malloc_mutex_unlock(&bin->lock); - if (config_prof && isthreaded == false && arena_prof_accum(arena, size)) + if (config_prof && !isthreaded && arena_prof_accum(arena, size)) prof_idump(); - if (zero == false) { + if (!zero) { if (config_fill) { - if (opt_junk) { + if (unlikely(opt_junk_alloc)) { arena_alloc_junk_small(ret, &arena_bin_info[binind], false); - } else if (opt_zero) + } else if (unlikely(opt_zero)) memset(ret, 0, size); } - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } else { - if (config_fill && opt_junk) { + if (config_fill && unlikely(opt_junk_alloc)) { arena_alloc_junk_small(ret, &arena_bin_info[binind], true); } - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); } @@ -1658,36 +2202,59 @@ void * arena_malloc_large(arena_t *arena, size_t size, bool zero) { void *ret; + size_t usize; + uintptr_t random_offset; + arena_run_t *run; + arena_chunk_map_misc_t *miscelm; UNUSED bool idump; /* Large allocation. */ - size = PAGE_CEILING(size); + usize = s2u(size); malloc_mutex_lock(&arena->lock); - ret = (void *)arena_run_alloc_large(arena, size, zero); - if (ret == NULL) { + if (config_cache_oblivious) { + uint64_t r; + + /* + * Compute a uniformly distributed offset within the first page + * that is a multiple of the cacheline size, e.g. [0 .. 63) * 64 + * for 4 KiB pages and 64-byte cachelines. + */ + prng64(r, LG_PAGE - LG_CACHELINE, arena->offset_state, + UINT64_C(6364136223846793009), + UINT64_C(1442695040888963409)); + random_offset = ((uintptr_t)r) << LG_CACHELINE; + } else + random_offset = 0; + run = arena_run_alloc_large(arena, usize + large_pad, zero); + if (run == NULL) { malloc_mutex_unlock(&arena->lock); return (NULL); } + miscelm = arena_run_to_miscelm(run); + ret = (void *)((uintptr_t)arena_miscelm_to_rpages(miscelm) + + random_offset); if (config_stats) { + szind_t index = size2index(usize) - NBINS; + arena->stats.nmalloc_large++; arena->stats.nrequests_large++; - arena->stats.allocated_large += size; - arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; - arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; - arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; + arena->stats.allocated_large += usize; + arena->stats.lstats[index].nmalloc++; + arena->stats.lstats[index].nrequests++; + arena->stats.lstats[index].curruns++; } if (config_prof) - idump = arena_prof_accum_locked(arena, size); + idump = arena_prof_accum_locked(arena, usize); malloc_mutex_unlock(&arena->lock); if (config_prof && idump) prof_idump(); - if (zero == false) { + if (!zero) { if (config_fill) { - if (opt_junk) - memset(ret, 0xa5, size); - else if (opt_zero) - memset(ret, 0, size); + if (unlikely(opt_junk_alloc)) + memset(ret, 0xa5, usize); + else if (unlikely(opt_zero)) + memset(ret, 0, usize); } } @@ -1695,18 +2262,25 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) } /* Only handles large allocations that require more than page alignment. */ -void * -arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) +static void * +arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, + bool zero) { void *ret; size_t alloc_size, leadsize, trailsize; arena_run_t *run; arena_chunk_t *chunk; + arena_chunk_map_misc_t *miscelm; + void *rpages; + + assert(usize == PAGE_CEILING(usize)); - assert((size & PAGE_MASK) == 0); + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) + return (NULL); alignment = PAGE_CEILING(alignment); - alloc_size = size + alignment - PAGE; + alloc_size = usize + large_pad + alignment - PAGE; malloc_mutex_lock(&arena->lock); run = arena_run_alloc_large(arena, alloc_size, false); @@ -1715,37 +2289,94 @@ arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) return (NULL); } chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + miscelm = arena_run_to_miscelm(run); + rpages = arena_miscelm_to_rpages(miscelm); - leadsize = ALIGNMENT_CEILING((uintptr_t)run, alignment) - - (uintptr_t)run; - assert(alloc_size >= leadsize + size); - trailsize = alloc_size - leadsize - size; - ret = (void *)((uintptr_t)run + leadsize); + leadsize = ALIGNMENT_CEILING((uintptr_t)rpages, alignment) - + (uintptr_t)rpages; + assert(alloc_size >= leadsize + usize); + trailsize = alloc_size - leadsize - usize - large_pad; if (leadsize != 0) { - arena_run_trim_head(arena, chunk, run, alloc_size, alloc_size - - leadsize); + arena_chunk_map_misc_t *head_miscelm = miscelm; + arena_run_t *head_run = run; + + miscelm = arena_miscelm_get(chunk, + arena_miscelm_to_pageind(head_miscelm) + (leadsize >> + LG_PAGE)); + run = &miscelm->run; + + arena_run_trim_head(arena, chunk, head_run, alloc_size, + alloc_size - leadsize); } if (trailsize != 0) { - arena_run_trim_tail(arena, chunk, ret, size + trailsize, size, - false); + arena_run_trim_tail(arena, chunk, run, usize + large_pad + + trailsize, usize + large_pad, false); + } + if (arena_run_init_large(arena, run, usize + large_pad, zero)) { + size_t run_ind = + arena_miscelm_to_pageind(arena_run_to_miscelm(run)); + bool dirty = (arena_mapbits_dirty_get(chunk, run_ind) != 0); + bool decommitted = (arena_mapbits_decommitted_get(chunk, + run_ind) != 0); + + assert(decommitted); /* Cause of OOM. */ + arena_run_dalloc(arena, run, dirty, false, decommitted); + malloc_mutex_unlock(&arena->lock); + return (NULL); } - arena_run_init_large(arena, (arena_run_t *)ret, size, zero); + ret = arena_miscelm_to_rpages(miscelm); if (config_stats) { + szind_t index = size2index(usize) - NBINS; + arena->stats.nmalloc_large++; arena->stats.nrequests_large++; - arena->stats.allocated_large += size; - arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; - arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; - arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; + arena->stats.allocated_large += usize; + arena->stats.lstats[index].nmalloc++; + arena->stats.lstats[index].nrequests++; + arena->stats.lstats[index].curruns++; } malloc_mutex_unlock(&arena->lock); - if (config_fill && zero == false) { - if (opt_junk) - memset(ret, 0xa5, size); - else if (opt_zero) - memset(ret, 0, size); + if (config_fill && !zero) { + if (unlikely(opt_junk_alloc)) + memset(ret, 0xa5, usize); + else if (unlikely(opt_zero)) + memset(ret, 0, usize); + } + return (ret); +} + +void * +arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, + bool zero, tcache_t *tcache) +{ + void *ret; + + if (usize <= SMALL_MAXCLASS && (alignment < PAGE || (alignment == PAGE + && (usize & PAGE_MASK) == 0))) { + /* Small; alignment doesn't require special run placement. */ + ret = arena_malloc(tsd, arena, usize, zero, tcache); + } else if (usize <= large_maxclass && alignment <= PAGE) { + /* + * Large; alignment doesn't require special run placement. + * However, the cached pointer may be at a random offset from + * the base of the run, so do some bit manipulation to retrieve + * the base. + */ + ret = arena_malloc(tsd, arena, usize, zero, tcache); + if (config_cache_oblivious) + ret = (void *)((uintptr_t)ret & ~PAGE_MASK); + } else { + if (likely(usize <= large_maxclass)) { + ret = arena_palloc_large(tsd, arena, usize, alignment, + zero); + } else if (likely(alignment <= chunksize)) + ret = huge_malloc(tsd, arena, usize, zero, tcache); + else { + ret = huge_palloc(tsd, arena, usize, alignment, zero, + tcache); + } } return (ret); } @@ -1754,22 +2385,23 @@ void arena_prof_promoted(const void *ptr, size_t size) { arena_chunk_t *chunk; - size_t pageind, binind; + size_t pageind; + szind_t binind; cassert(config_prof); assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); - assert(isalloc(ptr, false) == PAGE); - assert(isalloc(ptr, true) == PAGE); + assert(isalloc(ptr, false) == LARGE_MINCLASS); + assert(isalloc(ptr, true) == LARGE_MINCLASS); assert(size <= SMALL_MAXCLASS); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - binind = SMALL_SIZE2BIN(size); + binind = size2index(size); assert(binind < NBINS); arena_mapbits_large_binind_set(chunk, pageind, binind); - assert(isalloc(ptr, false) == PAGE); + assert(isalloc(ptr, false) == LARGE_MINCLASS); assert(isalloc(ptr, true) == size); } @@ -1782,7 +2414,8 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, if (run == bin->runcur) bin->runcur = NULL; else { - size_t binind = arena_bin_index(chunk->arena, bin); + szind_t binind = arena_bin_index(extent_node_arena_get( + &chunk->node), bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; if (bin_info->nregs != 1) { @@ -1800,46 +2433,15 @@ static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin) { - size_t binind; - arena_bin_info_t *bin_info; - size_t npages, run_ind, past; assert(run != bin->runcur); - assert(arena_run_tree_search(&bin->runs, - arena_mapp_get(chunk, ((uintptr_t)run-(uintptr_t)chunk)>>LG_PAGE)) - == NULL); - - binind = arena_bin_index(chunk->arena, run->bin); - bin_info = &arena_bin_info[binind]; + assert(arena_run_tree_search(&bin->runs, arena_run_to_miscelm(run)) == + NULL); malloc_mutex_unlock(&bin->lock); /******************************/ - npages = bin_info->run_size >> LG_PAGE; - run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); - past = (size_t)(PAGE_CEILING((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset + (uintptr_t)(run->nextind * - bin_info->reg_interval - bin_info->redzone_size) - - (uintptr_t)chunk) >> LG_PAGE); malloc_mutex_lock(&arena->lock); - - /* - * If the run was originally clean, and some pages were never touched, - * trim the clean pages before deallocating the dirty portion of the - * run. - */ - assert(arena_mapbits_dirty_get(chunk, run_ind) == - arena_mapbits_dirty_get(chunk, run_ind+npages-1)); - if (arena_mapbits_dirty_get(chunk, run_ind) == 0 && past - run_ind < - npages) { - /* Trim clean pages. Convert to large run beforehand. */ - assert(npages > 0); - arena_mapbits_large_set(chunk, run_ind, bin_info->run_size, 0); - arena_mapbits_large_set(chunk, run_ind+npages-1, 0, 0); - arena_run_trim_tail(arena, chunk, run, (npages << LG_PAGE), - ((past - run_ind) << LG_PAGE), false); - /* npages = past - run_ind; */ - } - arena_run_dalloc(arena, run, true, false); + arena_run_dalloc_decommit(arena, chunk, run); malloc_mutex_unlock(&arena->lock); /****************************/ malloc_mutex_lock(&bin->lock); @@ -1868,26 +2470,24 @@ arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, arena_bin_runs_insert(bin, run); } -void -arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, - arena_chunk_map_t *mapelm) +static void +arena_dalloc_bin_locked_impl(arena_t *arena, arena_chunk_t *chunk, void *ptr, + arena_chunk_map_bits_t *bitselm, bool junked) { - size_t pageind; + size_t pageind, rpages_ind; arena_run_t *run; arena_bin_t *bin; arena_bin_info_t *bin_info; - size_t size, binind; + szind_t binind; pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE)); - bin = run->bin; - binind = arena_ptr_small_binind_get(ptr, mapelm->bits); + rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind); + run = &arena_miscelm_get(chunk, rpages_ind)->run; + binind = run->binind; + bin = &arena->bins[binind]; bin_info = &arena_bin_info[binind]; - if (config_fill || config_stats) - size = bin_info->reg_size; - if (config_fill && opt_junk) + if (!junked && config_fill && unlikely(opt_junk_free)) arena_dalloc_junk_small(ptr, bin_info); arena_run_reg_dalloc(run, ptr); @@ -1898,23 +2498,32 @@ arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_bin_lower_run(arena, chunk, run, bin); if (config_stats) { - bin->stats.allocated -= size; bin->stats.ndalloc++; + bin->stats.curregs--; } } void +arena_dalloc_bin_junked_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, + arena_chunk_map_bits_t *bitselm) +{ + + arena_dalloc_bin_locked_impl(arena, chunk, ptr, bitselm, true); +} + +void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, - size_t pageind, arena_chunk_map_t *mapelm) + size_t pageind, arena_chunk_map_bits_t *bitselm) { arena_run_t *run; arena_bin_t *bin; + size_t rpages_ind; - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE)); - bin = run->bin; + rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind); + run = &arena_miscelm_get(chunk, rpages_ind)->run; + bin = &arena->bins[run->binind]; malloc_mutex_lock(&bin->lock); - arena_dalloc_bin_locked(arena, chunk, ptr, mapelm); + arena_dalloc_bin_locked_impl(arena, chunk, ptr, bitselm, false); malloc_mutex_unlock(&bin->lock); } @@ -1922,26 +2531,26 @@ void arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t pageind) { - arena_chunk_map_t *mapelm; + arena_chunk_map_bits_t *bitselm; if (config_debug) { /* arena_ptr_small_binind_get() does extra sanity checking. */ assert(arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, pageind)) != BININD_INVALID); } - mapelm = arena_mapp_get(chunk, pageind); - arena_dalloc_bin(arena, chunk, ptr, pageind, mapelm); + bitselm = arena_bitselm_get(chunk, pageind); + arena_dalloc_bin(arena, chunk, ptr, pageind, bitselm); } #ifdef JEMALLOC_JET #undef arena_dalloc_junk_large #define arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large_impl) #endif -static void +void arena_dalloc_junk_large(void *ptr, size_t usize) { - if (config_fill && opt_junk) + if (config_fill && unlikely(opt_junk_free)) memset(ptr, 0x5a, usize); } #ifdef JEMALLOC_JET @@ -1951,24 +2560,39 @@ arena_dalloc_junk_large_t *arena_dalloc_junk_large = JEMALLOC_N(arena_dalloc_junk_large_impl); #endif -void -arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr) +static void +arena_dalloc_large_locked_impl(arena_t *arena, arena_chunk_t *chunk, + void *ptr, bool junked) { + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind); + arena_run_t *run = &miscelm->run; if (config_fill || config_stats) { - size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - size_t usize = arena_mapbits_large_size_get(chunk, pageind); + size_t usize = arena_mapbits_large_size_get(chunk, pageind) - + large_pad; - arena_dalloc_junk_large(ptr, usize); + if (!junked) + arena_dalloc_junk_large(ptr, usize); if (config_stats) { + szind_t index = size2index(usize) - NBINS; + arena->stats.ndalloc_large++; arena->stats.allocated_large -= usize; - arena->stats.lstats[(usize >> LG_PAGE) - 1].ndalloc++; - arena->stats.lstats[(usize >> LG_PAGE) - 1].curruns--; + arena->stats.lstats[index].ndalloc++; + arena->stats.lstats[index].curruns--; } } - arena_run_dalloc(arena, (arena_run_t *)ptr, true, false); + arena_run_dalloc_decommit(arena, chunk, run); +} + +void +arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk, + void *ptr) +{ + + arena_dalloc_large_locked_impl(arena, chunk, ptr, true); } void @@ -1976,7 +2600,7 @@ arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) { malloc_mutex_lock(&arena->lock); - arena_dalloc_large_locked(arena, chunk, ptr); + arena_dalloc_large_locked_impl(arena, chunk, ptr, false); malloc_mutex_unlock(&arena->lock); } @@ -1984,6 +2608,9 @@ static void arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t oldsize, size_t size) { + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind); + arena_run_t *run = &miscelm->run; assert(size < oldsize); @@ -1992,54 +2619,84 @@ arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, * allocations. */ malloc_mutex_lock(&arena->lock); - arena_run_trim_tail(arena, chunk, (arena_run_t *)ptr, oldsize, size, - true); + arena_run_trim_tail(arena, chunk, run, oldsize + large_pad, size + + large_pad, true); if (config_stats) { + szind_t oldindex = size2index(oldsize) - NBINS; + szind_t index = size2index(size) - NBINS; + arena->stats.ndalloc_large++; arena->stats.allocated_large -= oldsize; - arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++; - arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--; + arena->stats.lstats[oldindex].ndalloc++; + arena->stats.lstats[oldindex].curruns--; arena->stats.nmalloc_large++; arena->stats.nrequests_large++; arena->stats.allocated_large += size; - arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; - arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; - arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; + arena->stats.lstats[index].nmalloc++; + arena->stats.lstats[index].nrequests++; + arena->stats.lstats[index].curruns++; } malloc_mutex_unlock(&arena->lock); } static bool arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, - size_t oldsize, size_t size, size_t extra, bool zero) + size_t oldsize, size_t usize_min, size_t usize_max, bool zero) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - size_t npages = oldsize >> LG_PAGE; + size_t npages = (oldsize + large_pad) >> LG_PAGE; size_t followsize; - assert(oldsize == arena_mapbits_large_size_get(chunk, pageind)); + assert(oldsize == arena_mapbits_large_size_get(chunk, pageind) - + large_pad); /* Try to extend the run. */ - assert(size + extra > oldsize); malloc_mutex_lock(&arena->lock); - if (pageind + npages < chunk_npages && - arena_mapbits_allocated_get(chunk, pageind+npages) == 0 && - (followsize = arena_mapbits_unallocated_size_get(chunk, - pageind+npages)) >= size - oldsize) { + if (pageind+npages >= chunk_npages || arena_mapbits_allocated_get(chunk, + pageind+npages) != 0) + goto label_fail; + followsize = arena_mapbits_unallocated_size_get(chunk, pageind+npages); + if (oldsize + followsize >= usize_min) { /* * The next run is available and sufficiently large. Split the * following run, then merge the first part with the existing * allocation. */ - size_t flag_dirty; - size_t splitsize = (oldsize + followsize <= size + extra) - ? followsize : size + extra - oldsize; - arena_run_split_large(arena, (arena_run_t *)((uintptr_t)chunk + - ((pageind+npages) << LG_PAGE)), splitsize, zero); + arena_run_t *run; + size_t usize, splitsize, size, flag_dirty, flag_unzeroed_mask; + + usize = usize_max; + while (oldsize + followsize < usize) + usize = index2size(size2index(usize)-1); + assert(usize >= usize_min); + assert(usize >= oldsize); + splitsize = usize - oldsize; + if (splitsize == 0) + goto label_fail; + + run = &arena_miscelm_get(chunk, pageind+npages)->run; + if (arena_run_split_large(arena, run, splitsize, zero)) + goto label_fail; + + if (config_cache_oblivious && zero) { + /* + * Zero the trailing bytes of the original allocation's + * last page, since they are in an indeterminate state. + * There will always be trailing bytes, because ptr's + * offset from the beginning of the run is a multiple of + * CACHELINE in [0 .. PAGE). + */ + void *zbase = (void *)((uintptr_t)ptr + oldsize); + void *zpast = PAGE_ADDR2BASE((void *)((uintptr_t)zbase + + PAGE)); + size_t nzero = (uintptr_t)zpast - (uintptr_t)zbase; + assert(nzero > 0); + memset(zbase, 0, nzero); + } size = oldsize + splitsize; - npages = size >> LG_PAGE; + npages = (size + large_pad) >> LG_PAGE; /* * Mark the extended run as dirty if either portion of the run @@ -2051,27 +2708,35 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, */ flag_dirty = arena_mapbits_dirty_get(chunk, pageind) | arena_mapbits_dirty_get(chunk, pageind+npages-1); - arena_mapbits_large_set(chunk, pageind, size, flag_dirty); - arena_mapbits_large_set(chunk, pageind+npages-1, 0, flag_dirty); + flag_unzeroed_mask = flag_dirty == 0 ? CHUNK_MAP_UNZEROED : 0; + arena_mapbits_large_set(chunk, pageind, size + large_pad, + flag_dirty | (flag_unzeroed_mask & + arena_mapbits_unzeroed_get(chunk, pageind))); + arena_mapbits_large_set(chunk, pageind+npages-1, 0, flag_dirty | + (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk, + pageind+npages-1))); if (config_stats) { + szind_t oldindex = size2index(oldsize) - NBINS; + szind_t index = size2index(size) - NBINS; + arena->stats.ndalloc_large++; arena->stats.allocated_large -= oldsize; - arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++; - arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--; + arena->stats.lstats[oldindex].ndalloc++; + arena->stats.lstats[oldindex].curruns--; arena->stats.nmalloc_large++; arena->stats.nrequests_large++; arena->stats.allocated_large += size; - arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; - arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; - arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; + arena->stats.lstats[index].nmalloc++; + arena->stats.lstats[index].nrequests++; + arena->stats.lstats[index].curruns++; } malloc_mutex_unlock(&arena->lock); return (false); } +label_fail: malloc_mutex_unlock(&arena->lock); - return (true); } @@ -2083,7 +2748,7 @@ static void arena_ralloc_junk_large(void *ptr, size_t old_usize, size_t usize) { - if (config_fill && opt_junk) { + if (config_fill && unlikely(opt_junk_free)) { memset((void *)((uintptr_t)ptr + usize), 0x5a, old_usize - usize); } @@ -2100,131 +2765,132 @@ arena_ralloc_junk_large_t *arena_ralloc_junk_large = * always fail if growing an object, and the following run is already in use. */ static bool -arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, - bool zero) +arena_ralloc_large(void *ptr, size_t oldsize, size_t usize_min, + size_t usize_max, bool zero) { - size_t psize; + arena_chunk_t *chunk; + arena_t *arena; - psize = PAGE_CEILING(size + extra); - if (psize == oldsize) { - /* Same size class. */ + if (oldsize == usize_max) { + /* Current size class is compatible and maximal. */ return (false); - } else { - arena_chunk_t *chunk; - arena_t *arena; - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - arena = chunk->arena; - - if (psize < oldsize) { - /* Fill before shrinking in order avoid a race. */ - arena_ralloc_junk_large(ptr, oldsize, psize); - arena_ralloc_large_shrink(arena, chunk, ptr, oldsize, - psize); - return (false); - } else { - bool ret = arena_ralloc_large_grow(arena, chunk, ptr, - oldsize, PAGE_CEILING(size), - psize - PAGE_CEILING(size), zero); - if (config_fill && ret == false && zero == false) { - if (opt_junk) { - memset((void *)((uintptr_t)ptr + - oldsize), 0xa5, isalloc(ptr, - config_prof) - oldsize); - } else if (opt_zero) { - memset((void *)((uintptr_t)ptr + - oldsize), 0, isalloc(ptr, - config_prof) - oldsize); - } + } + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + arena = extent_node_arena_get(&chunk->node); + + if (oldsize < usize_max) { + bool ret = arena_ralloc_large_grow(arena, chunk, ptr, oldsize, + usize_min, usize_max, zero); + if (config_fill && !ret && !zero) { + if (unlikely(opt_junk_alloc)) { + memset((void *)((uintptr_t)ptr + oldsize), 0xa5, + isalloc(ptr, config_prof) - oldsize); + } else if (unlikely(opt_zero)) { + memset((void *)((uintptr_t)ptr + oldsize), 0, + isalloc(ptr, config_prof) - oldsize); } - return (ret); } + return (ret); } + + assert(oldsize > usize_max); + /* Fill before shrinking in order avoid a race. */ + arena_ralloc_junk_large(ptr, oldsize, usize_max); + arena_ralloc_large_shrink(arena, chunk, ptr, oldsize, usize_max); + return (false); } bool arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero) { + size_t usize_min, usize_max; - /* - * Avoid moving the allocation if the size class can be left the same. - */ - if (oldsize <= arena_maxclass) { + usize_min = s2u(size); + usize_max = s2u(size + extra); + if (likely(oldsize <= large_maxclass && usize_min <= large_maxclass)) { + /* + * Avoid moving the allocation if the size class can be left the + * same. + */ if (oldsize <= SMALL_MAXCLASS) { - assert(arena_bin_info[SMALL_SIZE2BIN(oldsize)].reg_size - == oldsize); - if ((size + extra <= SMALL_MAXCLASS && - SMALL_SIZE2BIN(size + extra) == - SMALL_SIZE2BIN(oldsize)) || (size <= oldsize && - size + extra >= oldsize)) + assert(arena_bin_info[size2index(oldsize)].reg_size == + oldsize); + if ((usize_max <= SMALL_MAXCLASS && + size2index(usize_max) == size2index(oldsize)) || + (size <= oldsize && usize_max >= oldsize)) return (false); } else { - assert(size <= arena_maxclass); - if (size + extra > SMALL_MAXCLASS) { - if (arena_ralloc_large(ptr, oldsize, size, - extra, zero) == false) + if (usize_max > SMALL_MAXCLASS) { + if (!arena_ralloc_large(ptr, oldsize, usize_min, + usize_max, zero)) return (false); } } + + /* Reallocation would require a move. */ + return (true); + } else { + return (huge_ralloc_no_move(ptr, oldsize, usize_min, usize_max, + zero)); } +} - /* Reallocation would require a move. */ - return (true); +static void * +arena_ralloc_move_helper(tsd_t *tsd, arena_t *arena, size_t usize, + size_t alignment, bool zero, tcache_t *tcache) +{ + + if (alignment == 0) + return (arena_malloc(tsd, arena, usize, zero, tcache)); + usize = sa2u(usize, alignment); + if (usize == 0) + return (NULL); + return (ipalloct(tsd, usize, alignment, zero, tcache, arena)); } void * -arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, - size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, - bool try_tcache_dalloc) +arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, + size_t alignment, bool zero, tcache_t *tcache) { void *ret; - size_t copysize; + size_t usize; - /* Try to avoid moving the allocation. */ - if (arena_ralloc_no_move(ptr, oldsize, size, extra, zero) == false) - return (ptr); + usize = s2u(size); + if (usize == 0) + return (NULL); - /* - * size and oldsize are different enough that we need to move the - * object. In that case, fall back to allocating new space and - * copying. - */ - if (alignment != 0) { - size_t usize = sa2u(size + extra, alignment); - if (usize == 0) - return (NULL); - ret = ipalloct(usize, alignment, zero, try_tcache_alloc, arena); - } else - ret = arena_malloc(arena, size + extra, zero, try_tcache_alloc); + if (likely(usize <= large_maxclass)) { + size_t copysize; - if (ret == NULL) { - if (extra == 0) - return (NULL); - /* Try again, this time without extra. */ - if (alignment != 0) { - size_t usize = sa2u(size, alignment); - if (usize == 0) - return (NULL); - ret = ipalloct(usize, alignment, zero, try_tcache_alloc, - arena); - } else - ret = arena_malloc(arena, size, zero, try_tcache_alloc); + /* Try to avoid moving the allocation. */ + if (!arena_ralloc_no_move(ptr, oldsize, usize, 0, zero)) + return (ptr); + /* + * size and oldsize are different enough that we need to move + * the object. In that case, fall back to allocating new space + * and copying. + */ + ret = arena_ralloc_move_helper(tsd, arena, usize, alignment, + zero, tcache); if (ret == NULL) return (NULL); - } - /* Junk/zero-filling were already done by ipalloc()/arena_malloc(). */ + /* + * Junk/zero-filling were already done by + * ipalloc()/arena_malloc(). + */ - /* - * Copy at most size bytes (not size+extra), since the caller has no - * expectation that the extra bytes will be reliably preserved. - */ - copysize = (size < oldsize) ? size : oldsize; - VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize); - memcpy(ret, ptr, copysize); - iqalloct(ptr, try_tcache_dalloc); + copysize = (usize < oldsize) ? usize : oldsize; + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize); + memcpy(ret, ptr, copysize); + isqalloc(tsd, ptr, oldsize, tcache); + } else { + ret = huge_ralloc(tsd, arena, ptr, oldsize, usize, alignment, + zero, tcache); + } return (ret); } @@ -2239,24 +2905,46 @@ arena_dss_prec_get(arena_t *arena) return (ret); } -void +bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec) { + if (!have_dss) + return (dss_prec != dss_prec_disabled); malloc_mutex_lock(&arena->lock); arena->dss_prec = dss_prec; malloc_mutex_unlock(&arena->lock); + return (false); +} + +ssize_t +arena_lg_dirty_mult_default_get(void) +{ + + return ((ssize_t)atomic_read_z((size_t *)&lg_dirty_mult_default)); +} + +bool +arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult) +{ + + if (!arena_lg_dirty_mult_valid(lg_dirty_mult)) + return (true); + atomic_write_z((size_t *)&lg_dirty_mult_default, (size_t)lg_dirty_mult); + return (false); } void -arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, - size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, - malloc_large_stats_t *lstats) +arena_stats_merge(arena_t *arena, const char **dss, ssize_t *lg_dirty_mult, + size_t *nactive, size_t *ndirty, arena_stats_t *astats, + malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats, + malloc_huge_stats_t *hstats) { unsigned i; malloc_mutex_lock(&arena->lock); *dss = dss_prec_names[arena->dss_prec]; + *lg_dirty_mult = arena->lg_dirty_mult; *nactive += arena->nactive; *ndirty += arena->ndirty; @@ -2264,10 +2952,15 @@ arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, astats->npurge += arena->stats.npurge; astats->nmadvise += arena->stats.nmadvise; astats->purged += arena->stats.purged; + astats->metadata_mapped += arena->stats.metadata_mapped; + astats->metadata_allocated += arena_metadata_allocated_get(arena); astats->allocated_large += arena->stats.allocated_large; astats->nmalloc_large += arena->stats.nmalloc_large; astats->ndalloc_large += arena->stats.ndalloc_large; astats->nrequests_large += arena->stats.nrequests_large; + astats->allocated_huge += arena->stats.allocated_huge; + astats->nmalloc_huge += arena->stats.nmalloc_huge; + astats->ndalloc_huge += arena->stats.ndalloc_huge; for (i = 0; i < nlclasses; i++) { lstats[i].nmalloc += arena->stats.lstats[i].nmalloc; @@ -2275,16 +2968,22 @@ arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, lstats[i].nrequests += arena->stats.lstats[i].nrequests; lstats[i].curruns += arena->stats.lstats[i].curruns; } + + for (i = 0; i < nhclasses; i++) { + hstats[i].nmalloc += arena->stats.hstats[i].nmalloc; + hstats[i].ndalloc += arena->stats.hstats[i].ndalloc; + hstats[i].curhchunks += arena->stats.hstats[i].curhchunks; + } malloc_mutex_unlock(&arena->lock); for (i = 0; i < NBINS; i++) { arena_bin_t *bin = &arena->bins[i]; malloc_mutex_lock(&bin->lock); - bstats[i].allocated += bin->stats.allocated; bstats[i].nmalloc += bin->stats.nmalloc; bstats[i].ndalloc += bin->stats.ndalloc; bstats[i].nrequests += bin->stats.nrequests; + bstats[i].curregs += bin->stats.curregs; if (config_tcache) { bstats[i].nfills += bin->stats.nfills; bstats[i].nflushes += bin->stats.nflushes; @@ -2296,27 +2995,42 @@ arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, } } -bool -arena_new(arena_t *arena, unsigned ind) +arena_t * +arena_new(unsigned ind) { + arena_t *arena; unsigned i; arena_bin_t *bin; + /* + * Allocate arena, arena->lstats, and arena->hstats contiguously, mainly + * because there is no way to clean up if base_alloc() OOMs. + */ + if (config_stats) { + arena = (arena_t *)base_alloc(CACHELINE_CEILING(sizeof(arena_t)) + + QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t) + + nhclasses) * sizeof(malloc_huge_stats_t)); + } else + arena = (arena_t *)base_alloc(sizeof(arena_t)); + if (arena == NULL) + return (NULL); + arena->ind = ind; arena->nthreads = 0; - if (malloc_mutex_init(&arena->lock)) - return (true); + return (NULL); if (config_stats) { memset(&arena->stats, 0, sizeof(arena_stats_t)); - arena->stats.lstats = - (malloc_large_stats_t *)base_alloc(nlclasses * - sizeof(malloc_large_stats_t)); - if (arena->stats.lstats == NULL) - return (true); + arena->stats.lstats = (malloc_large_stats_t *)((uintptr_t)arena + + CACHELINE_CEILING(sizeof(arena_t))); memset(arena->stats.lstats, 0, nlclasses * sizeof(malloc_large_stats_t)); + arena->stats.hstats = (malloc_huge_stats_t *)((uintptr_t)arena + + CACHELINE_CEILING(sizeof(arena_t)) + + QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t))); + memset(arena->stats.hstats, 0, nhclasses * + sizeof(malloc_huge_stats_t)); if (config_tcache) ql_new(&arena->tcache_ql); } @@ -2324,56 +3038,76 @@ arena_new(arena_t *arena, unsigned ind) if (config_prof) arena->prof_accumbytes = 0; + if (config_cache_oblivious) { + /* + * A nondeterministic seed based on the address of arena reduces + * the likelihood of lockstep non-uniform cache index + * utilization among identical concurrent processes, but at the + * cost of test repeatability. For debug builds, instead use a + * deterministic seed. + */ + arena->offset_state = config_debug ? ind : + (uint64_t)(uintptr_t)arena; + } + arena->dss_prec = chunk_dss_prec_get(); - /* Initialize chunks. */ - arena_chunk_dirty_new(&arena->chunks_dirty); arena->spare = NULL; + arena->lg_dirty_mult = arena_lg_dirty_mult_default_get(); + arena->purging = false; arena->nactive = 0; arena->ndirty = 0; - arena->npurgatory = 0; arena_avail_tree_new(&arena->runs_avail); + qr_new(&arena->runs_dirty, rd_link); + qr_new(&arena->chunks_cache, cc_link); + + ql_new(&arena->huge); + if (malloc_mutex_init(&arena->huge_mtx)) + return (NULL); + + extent_tree_szad_new(&arena->chunks_szad_cached); + extent_tree_ad_new(&arena->chunks_ad_cached); + extent_tree_szad_new(&arena->chunks_szad_retained); + extent_tree_ad_new(&arena->chunks_ad_retained); + if (malloc_mutex_init(&arena->chunks_mtx)) + return (NULL); + ql_new(&arena->node_cache); + if (malloc_mutex_init(&arena->node_cache_mtx)) + return (NULL); + + arena->chunk_hooks = chunk_hooks_default; /* Initialize bins. */ for (i = 0; i < NBINS; i++) { bin = &arena->bins[i]; if (malloc_mutex_init(&bin->lock)) - return (true); + return (NULL); bin->runcur = NULL; arena_run_tree_new(&bin->runs); if (config_stats) memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); } - return (false); + return (arena); } /* * Calculate bin_info->run_size such that it meets the following constraints: * - * *) bin_info->run_size >= min_run_size - * *) bin_info->run_size <= arena_maxclass - * *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed). + * *) bin_info->run_size <= arena_maxrun * *) bin_info->nregs <= RUN_MAXREGS * - * bin_info->nregs, bin_info->bitmap_offset, and bin_info->reg0_offset are also - * calculated here, since these settings are all interdependent. + * bin_info->nregs and bin_info->reg0_offset are also calculated here, since + * these settings are all interdependent. */ -static size_t -bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) +static void +bin_info_run_size_calc(arena_bin_info_t *bin_info) { size_t pad_size; - size_t try_run_size, good_run_size; - uint32_t try_nregs, good_nregs; - uint32_t try_hdr_size, good_hdr_size; - uint32_t try_bitmap_offset, good_bitmap_offset; - uint32_t try_ctx0_offset, good_ctx0_offset; - uint32_t try_redzone0_offset, good_redzone0_offset; - - assert(min_run_size >= PAGE); - assert(min_run_size <= arena_maxclass); + size_t try_run_size, perfect_run_size, actual_run_size; + uint32_t try_nregs, perfect_nregs, actual_nregs; /* * Determine redzone size based on minimum alignment and minimum @@ -2382,8 +3116,9 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) * minimum alignment; without the padding, each redzone would have to * be twice as large in order to maintain alignment. */ - if (config_fill && opt_redzone) { - size_t align_min = ZU(1) << (ffs(bin_info->reg_size) - 1); + if (config_fill && unlikely(opt_redzone)) { + size_t align_min = ZU(1) << (jemalloc_ffs(bin_info->reg_size) - + 1); if (align_min <= REDZONE_MINSIZE) { bin_info->redzone_size = REDZONE_MINSIZE; pad_size = 0; @@ -2399,127 +3134,113 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) (bin_info->redzone_size << 1); /* - * Calculate known-valid settings before entering the run_size - * expansion loop, so that the first part of the loop always copies - * valid settings. - * - * The do..while loop iteratively reduces the number of regions until - * the run header and the regions no longer overlap. A closed formula - * would be quite messy, since there is an interdependency between the - * header's mask length and the number of regions. + * Compute run size under ideal conditions (no redzones, no limit on run + * size). */ - try_run_size = min_run_size; - try_nregs = ((try_run_size - sizeof(arena_run_t)) / - bin_info->reg_interval) - + 1; /* Counter-act try_nregs-- in loop. */ - if (try_nregs > RUN_MAXREGS) { - try_nregs = RUN_MAXREGS - + 1; /* Counter-act try_nregs-- in loop. */ - } - do { - try_nregs--; - try_hdr_size = sizeof(arena_run_t); - /* Pad to a long boundary. */ - try_hdr_size = LONG_CEILING(try_hdr_size); - try_bitmap_offset = try_hdr_size; - /* Add space for bitmap. */ - try_hdr_size += bitmap_size(try_nregs); - if (config_prof && opt_prof && prof_promote == false) { - /* Pad to a quantum boundary. */ - try_hdr_size = QUANTUM_CEILING(try_hdr_size); - try_ctx0_offset = try_hdr_size; - /* Add space for one (prof_ctx_t *) per region. */ - try_hdr_size += try_nregs * sizeof(prof_ctx_t *); - } else - try_ctx0_offset = 0; - try_redzone0_offset = try_run_size - (try_nregs * - bin_info->reg_interval) - pad_size; - } while (try_hdr_size > try_redzone0_offset); - - /* run_size expansion loop. */ + try_run_size = PAGE; + try_nregs = try_run_size / bin_info->reg_size; do { - /* - * Copy valid settings before trying more aggressive settings. - */ - good_run_size = try_run_size; - good_nregs = try_nregs; - good_hdr_size = try_hdr_size; - good_bitmap_offset = try_bitmap_offset; - good_ctx0_offset = try_ctx0_offset; - good_redzone0_offset = try_redzone0_offset; - - /* Try more aggressive settings. */ + perfect_run_size = try_run_size; + perfect_nregs = try_nregs; + try_run_size += PAGE; - try_nregs = ((try_run_size - sizeof(arena_run_t) - pad_size) / - bin_info->reg_interval) - + 1; /* Counter-act try_nregs-- in loop. */ - if (try_nregs > RUN_MAXREGS) { - try_nregs = RUN_MAXREGS - + 1; /* Counter-act try_nregs-- in loop. */ - } - do { - try_nregs--; - try_hdr_size = sizeof(arena_run_t); - /* Pad to a long boundary. */ - try_hdr_size = LONG_CEILING(try_hdr_size); - try_bitmap_offset = try_hdr_size; - /* Add space for bitmap. */ - try_hdr_size += bitmap_size(try_nregs); - if (config_prof && opt_prof && prof_promote == false) { - /* Pad to a quantum boundary. */ - try_hdr_size = QUANTUM_CEILING(try_hdr_size); - try_ctx0_offset = try_hdr_size; - /* - * Add space for one (prof_ctx_t *) per region. - */ - try_hdr_size += try_nregs * - sizeof(prof_ctx_t *); - } - try_redzone0_offset = try_run_size - (try_nregs * - bin_info->reg_interval) - pad_size; - } while (try_hdr_size > try_redzone0_offset); - } while (try_run_size <= arena_maxclass - && RUN_MAX_OVRHD * (bin_info->reg_interval << 3) > - RUN_MAX_OVRHD_RELAX - && (try_redzone0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size - && try_nregs < RUN_MAXREGS); + try_nregs = try_run_size / bin_info->reg_size; + } while (perfect_run_size != perfect_nregs * bin_info->reg_size); + assert(perfect_nregs <= RUN_MAXREGS); + + actual_run_size = perfect_run_size; + actual_nregs = (actual_run_size - pad_size) / bin_info->reg_interval; + + /* + * Redzones can require enough padding that not even a single region can + * fit within the number of pages that would normally be dedicated to a + * run for this size class. Increase the run size until at least one + * region fits. + */ + while (actual_nregs == 0) { + assert(config_fill && unlikely(opt_redzone)); - assert(good_hdr_size <= good_redzone0_offset); + actual_run_size += PAGE; + actual_nregs = (actual_run_size - pad_size) / + bin_info->reg_interval; + } + + /* + * Make sure that the run will fit within an arena chunk. + */ + while (actual_run_size > arena_maxrun) { + actual_run_size -= PAGE; + actual_nregs = (actual_run_size - pad_size) / + bin_info->reg_interval; + } + assert(actual_nregs > 0); + assert(actual_run_size == s2u(actual_run_size)); /* Copy final settings. */ - bin_info->run_size = good_run_size; - bin_info->nregs = good_nregs; - bin_info->bitmap_offset = good_bitmap_offset; - bin_info->ctx0_offset = good_ctx0_offset; - bin_info->reg0_offset = good_redzone0_offset + bin_info->redzone_size; + bin_info->run_size = actual_run_size; + bin_info->nregs = actual_nregs; + bin_info->reg0_offset = actual_run_size - (actual_nregs * + bin_info->reg_interval) - pad_size + bin_info->redzone_size; + + if (actual_run_size > small_maxrun) + small_maxrun = actual_run_size; assert(bin_info->reg0_offset - bin_info->redzone_size + (bin_info->nregs * bin_info->reg_interval) + pad_size == bin_info->run_size); - - return (good_run_size); } static void bin_info_init(void) { arena_bin_info_t *bin_info; - size_t prev_run_size = PAGE; -#define SIZE_CLASS(bin, delta, size) \ - bin_info = &arena_bin_info[bin]; \ +#define BIN_INFO_INIT_bin_yes(index, size) \ + bin_info = &arena_bin_info[index]; \ bin_info->reg_size = size; \ - prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);\ + bin_info_run_size_calc(bin_info); \ bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); +#define BIN_INFO_INIT_bin_no(index, size) +#define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \ + BIN_INFO_INIT_bin_##bin(index, (ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta)) SIZE_CLASSES -#undef SIZE_CLASS +#undef BIN_INFO_INIT_bin_yes +#undef BIN_INFO_INIT_bin_no +#undef SC } -void +static bool +small_run_size_init(void) +{ + + assert(small_maxrun != 0); + + small_run_tab = (bool *)base_alloc(sizeof(bool) * (small_maxrun >> + LG_PAGE)); + if (small_run_tab == NULL) + return (true); + +#define TAB_INIT_bin_yes(index, size) { \ + arena_bin_info_t *bin_info = &arena_bin_info[index]; \ + small_run_tab[bin_info->run_size >> LG_PAGE] = true; \ + } +#define TAB_INIT_bin_no(index, size) +#define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \ + TAB_INIT_bin_##bin(index, (ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta)) + SIZE_CLASSES +#undef TAB_INIT_bin_yes +#undef TAB_INIT_bin_no +#undef SC + + return (false); +} + +bool arena_boot(void) { - size_t header_size; unsigned i; + arena_lg_dirty_mult_default_set(opt_lg_dirty_mult); + /* * Compute the header size such that it is large enough to contain the * page map. The page map is biased to omit entries for the header @@ -2534,16 +3255,33 @@ arena_boot(void) */ map_bias = 0; for (i = 0; i < 3; i++) { - header_size = offsetof(arena_chunk_t, map) + - (sizeof(arena_chunk_map_t) * (chunk_npages-map_bias)); - map_bias = (header_size >> LG_PAGE) + ((header_size & PAGE_MASK) - != 0); + size_t header_size = offsetof(arena_chunk_t, map_bits) + + ((sizeof(arena_chunk_map_bits_t) + + sizeof(arena_chunk_map_misc_t)) * (chunk_npages-map_bias)); + map_bias = (header_size + PAGE_MASK) >> LG_PAGE; } assert(map_bias > 0); - arena_maxclass = chunksize - (map_bias << LG_PAGE); + map_misc_offset = offsetof(arena_chunk_t, map_bits) + + sizeof(arena_chunk_map_bits_t) * (chunk_npages-map_bias); + + arena_maxrun = chunksize - (map_bias << LG_PAGE); + assert(arena_maxrun > 0); + large_maxclass = index2size(size2index(chunksize)-1); + if (large_maxclass > arena_maxrun) { + /* + * For small chunk sizes it's possible for there to be fewer + * non-header pages available than are necessary to serve the + * size classes just below chunksize. + */ + large_maxclass = arena_maxrun; + } + assert(large_maxclass > 0); + nlclasses = size2index(large_maxclass) - size2index(SMALL_MAXCLASS); + nhclasses = NSIZES - nlclasses - NBINS; bin_info_init(); + return (small_run_size_init()); } void @@ -2552,6 +3290,9 @@ arena_prefork(arena_t *arena) unsigned i; malloc_mutex_prefork(&arena->lock); + malloc_mutex_prefork(&arena->huge_mtx); + malloc_mutex_prefork(&arena->chunks_mtx); + malloc_mutex_prefork(&arena->node_cache_mtx); for (i = 0; i < NBINS; i++) malloc_mutex_prefork(&arena->bins[i].lock); } @@ -2563,6 +3304,9 @@ arena_postfork_parent(arena_t *arena) for (i = 0; i < NBINS; i++) malloc_mutex_postfork_parent(&arena->bins[i].lock); + malloc_mutex_postfork_parent(&arena->node_cache_mtx); + malloc_mutex_postfork_parent(&arena->chunks_mtx); + malloc_mutex_postfork_parent(&arena->huge_mtx); malloc_mutex_postfork_parent(&arena->lock); } @@ -2573,5 +3317,8 @@ arena_postfork_child(arena_t *arena) for (i = 0; i < NBINS; i++) malloc_mutex_postfork_child(&arena->bins[i].lock); + malloc_mutex_postfork_child(&arena->node_cache_mtx); + malloc_mutex_postfork_child(&arena->chunks_mtx); + malloc_mutex_postfork_child(&arena->huge_mtx); malloc_mutex_postfork_child(&arena->lock); } diff --git a/dep/jemalloc/src/base.c b/dep/jemalloc/src/base.c index 4e62e8fa918..7cdcfed86bd 100644 --- a/dep/jemalloc/src/base.c +++ b/dep/jemalloc/src/base.c @@ -5,107 +5,138 @@ /* Data. */ static malloc_mutex_t base_mtx; - -/* - * Current pages that are being used for internal memory allocations. These - * pages are carved up in cacheline-size quanta, so that there is no chance of - * false cache line sharing. - */ -static void *base_pages; -static void *base_next_addr; -static void *base_past_addr; /* Addr immediately past base_pages. */ +static extent_tree_t base_avail_szad; static extent_node_t *base_nodes; - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static bool base_pages_alloc(size_t minsize); +static size_t base_allocated; +static size_t base_resident; +static size_t base_mapped; /******************************************************************************/ -static bool -base_pages_alloc(size_t minsize) +/* base_mtx must be held. */ +static extent_node_t * +base_node_try_alloc(void) { - size_t csize; - bool zero; + extent_node_t *node; + + if (base_nodes == NULL) + return (NULL); + node = base_nodes; + base_nodes = *(extent_node_t **)node; + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t)); + return (node); +} - assert(minsize != 0); - csize = CHUNK_CEILING(minsize); - zero = false; - base_pages = chunk_alloc(csize, chunksize, true, &zero, - chunk_dss_prec_get()); - if (base_pages == NULL) - return (true); - base_next_addr = base_pages; - base_past_addr = (void *)((uintptr_t)base_pages + csize); +/* base_mtx must be held. */ +static void +base_node_dalloc(extent_node_t *node) +{ - return (false); + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t)); + *(extent_node_t **)node = base_nodes; + base_nodes = node; } -void * -base_alloc(size_t size) +/* base_mtx must be held. */ +static extent_node_t * +base_chunk_alloc(size_t minsize) { - void *ret; - size_t csize; + extent_node_t *node; + size_t csize, nsize; + void *addr; - /* Round size up to nearest multiple of the cacheline size. */ - csize = CACHELINE_CEILING(size); - - malloc_mutex_lock(&base_mtx); - /* Make sure there's enough space for the allocation. */ - if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) { - if (base_pages_alloc(csize)) { - malloc_mutex_unlock(&base_mtx); - return (NULL); + assert(minsize != 0); + node = base_node_try_alloc(); + /* Allocate enough space to also carve a node out if necessary. */ + nsize = (node == NULL) ? CACHELINE_CEILING(sizeof(extent_node_t)) : 0; + csize = CHUNK_CEILING(minsize + nsize); + addr = chunk_alloc_base(csize); + if (addr == NULL) { + if (node != NULL) + base_node_dalloc(node); + return (NULL); + } + base_mapped += csize; + if (node == NULL) { + node = (extent_node_t *)addr; + addr = (void *)((uintptr_t)addr + nsize); + csize -= nsize; + if (config_stats) { + base_allocated += nsize; + base_resident += PAGE_CEILING(nsize); } } - /* Allocate. */ - ret = base_next_addr; - base_next_addr = (void *)((uintptr_t)base_next_addr + csize); - malloc_mutex_unlock(&base_mtx); - VALGRIND_MAKE_MEM_UNDEFINED(ret, csize); - - return (ret); + extent_node_init(node, NULL, addr, csize, true, true); + return (node); } +/* + * base_alloc() guarantees demand-zeroed memory, in order to make multi-page + * sparse data structures such as radix tree nodes efficient with respect to + * physical memory usage. + */ void * -base_calloc(size_t number, size_t size) -{ - void *ret = base_alloc(number * size); - - if (ret != NULL) - memset(ret, 0, number * size); - - return (ret); -} - -extent_node_t * -base_node_alloc(void) +base_alloc(size_t size) { - extent_node_t *ret; + void *ret; + size_t csize, usize; + extent_node_t *node; + extent_node_t key; + + /* + * Round size up to nearest multiple of the cacheline size, so that + * there is no chance of false cache line sharing. + */ + csize = CACHELINE_CEILING(size); + usize = s2u(csize); + extent_node_init(&key, NULL, NULL, usize, false, false); malloc_mutex_lock(&base_mtx); - if (base_nodes != NULL) { - ret = base_nodes; - base_nodes = *(extent_node_t **)ret; - malloc_mutex_unlock(&base_mtx); - VALGRIND_MAKE_MEM_UNDEFINED(ret, sizeof(extent_node_t)); + node = extent_tree_szad_nsearch(&base_avail_szad, &key); + if (node != NULL) { + /* Use existing space. */ + extent_tree_szad_remove(&base_avail_szad, node); } else { - malloc_mutex_unlock(&base_mtx); - ret = (extent_node_t *)base_alloc(sizeof(extent_node_t)); + /* Try to allocate more space. */ + node = base_chunk_alloc(csize); + } + if (node == NULL) { + ret = NULL; + goto label_return; } + ret = extent_node_addr_get(node); + if (extent_node_size_get(node) > csize) { + extent_node_addr_set(node, (void *)((uintptr_t)ret + csize)); + extent_node_size_set(node, extent_node_size_get(node) - csize); + extent_tree_szad_insert(&base_avail_szad, node); + } else + base_node_dalloc(node); + if (config_stats) { + base_allocated += csize; + /* + * Add one PAGE to base_resident for every page boundary that is + * crossed by the new allocation. + */ + base_resident += PAGE_CEILING((uintptr_t)ret + csize) - + PAGE_CEILING((uintptr_t)ret); + } + JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ret, csize); +label_return: + malloc_mutex_unlock(&base_mtx); return (ret); } void -base_node_dealloc(extent_node_t *node) +base_stats_get(size_t *allocated, size_t *resident, size_t *mapped) { - VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t)); malloc_mutex_lock(&base_mtx); - *(extent_node_t **)node = base_nodes; - base_nodes = node; + assert(base_allocated <= base_resident); + assert(base_resident <= base_mapped); + *allocated = base_allocated; + *resident = base_resident; + *mapped = base_mapped; malloc_mutex_unlock(&base_mtx); } @@ -113,9 +144,10 @@ bool base_boot(void) { - base_nodes = NULL; if (malloc_mutex_init(&base_mtx)) return (true); + extent_tree_szad_new(&base_avail_szad); + base_nodes = NULL; return (false); } diff --git a/dep/jemalloc/src/bitmap.c b/dep/jemalloc/src/bitmap.c index e2bd907d558..c733372b4cb 100644 --- a/dep/jemalloc/src/bitmap.c +++ b/dep/jemalloc/src/bitmap.c @@ -2,19 +2,6 @@ #include "jemalloc/internal/jemalloc_internal.h" /******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static size_t bits2groups(size_t nbits); - -/******************************************************************************/ - -static size_t -bits2groups(size_t nbits) -{ - - return ((nbits >> LG_BITMAP_GROUP_NBITS) + - !!(nbits & BITMAP_GROUP_NBITS_MASK)); -} void bitmap_info_init(bitmap_info_t *binfo, size_t nbits) @@ -31,15 +18,16 @@ bitmap_info_init(bitmap_info_t *binfo, size_t nbits) * that requires only one group. */ binfo->levels[0].group_offset = 0; - group_count = bits2groups(nbits); + group_count = BITMAP_BITS2GROUPS(nbits); for (i = 1; group_count > 1; i++) { assert(i < BITMAP_MAX_LEVELS); binfo->levels[i].group_offset = binfo->levels[i-1].group_offset + group_count; - group_count = bits2groups(group_count); + group_count = BITMAP_BITS2GROUPS(group_count); } binfo->levels[i].group_offset = binfo->levels[i-1].group_offset + group_count; + assert(binfo->levels[i].group_offset <= BITMAP_GROUPS_MAX); binfo->nlevels = i; binfo->nbits = nbits; } diff --git a/dep/jemalloc/src/chunk.c b/dep/jemalloc/src/chunk.c index 90ab116ae5f..6ba1ca7a51b 100644 --- a/dep/jemalloc/src/chunk.c +++ b/dep/jemalloc/src/chunk.c @@ -5,129 +5,315 @@ /* Data. */ const char *opt_dss = DSS_DEFAULT; -size_t opt_lg_chunk = LG_CHUNK_DEFAULT; +size_t opt_lg_chunk = 0; -malloc_mutex_t chunks_mtx; -chunk_stats_t stats_chunks; +/* Used exclusively for gdump triggering. */ +static size_t curchunks; +static size_t highchunks; -/* - * Trees of chunks that were previously allocated (trees differ only in node - * ordering). These are used when allocating chunks, in an attempt to re-use - * address space. Depending on function, different tree orderings are needed, - * which is why there are two trees with the same contents. - */ -static extent_tree_t chunks_szad_mmap; -static extent_tree_t chunks_ad_mmap; -static extent_tree_t chunks_szad_dss; -static extent_tree_t chunks_ad_dss; - -rtree_t *chunks_rtree; +rtree_t chunks_rtree; /* Various chunk-related settings. */ size_t chunksize; size_t chunksize_mask; /* (chunksize - 1). */ size_t chunk_npages; -size_t map_bias; -size_t arena_maxclass; /* Max size class for arenas. */ + +static void *chunk_alloc_default(void *new_addr, size_t size, + size_t alignment, bool *zero, bool *commit, unsigned arena_ind); +static bool chunk_dalloc_default(void *chunk, size_t size, bool committed, + unsigned arena_ind); +static bool chunk_commit_default(void *chunk, size_t size, size_t offset, + size_t length, unsigned arena_ind); +static bool chunk_decommit_default(void *chunk, size_t size, size_t offset, + size_t length, unsigned arena_ind); +static bool chunk_purge_default(void *chunk, size_t size, size_t offset, + size_t length, unsigned arena_ind); +static bool chunk_split_default(void *chunk, size_t size, size_t size_a, + size_t size_b, bool committed, unsigned arena_ind); +static bool chunk_merge_default(void *chunk_a, size_t size_a, void *chunk_b, + size_t size_b, bool committed, unsigned arena_ind); + +const chunk_hooks_t chunk_hooks_default = { + chunk_alloc_default, + chunk_dalloc_default, + chunk_commit_default, + chunk_decommit_default, + chunk_purge_default, + chunk_split_default, + chunk_merge_default +}; /******************************************************************************/ -/* Function prototypes for non-inline static functions. */ +/* + * Function prototypes for static functions that are referenced prior to + * definition. + */ -static void *chunk_recycle(extent_tree_t *chunks_szad, - extent_tree_t *chunks_ad, size_t size, size_t alignment, bool base, - bool *zero); -static void chunk_record(extent_tree_t *chunks_szad, - extent_tree_t *chunks_ad, void *chunk, size_t size); +static void chunk_record(arena_t *arena, chunk_hooks_t *chunk_hooks, + extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache, + void *chunk, size_t size, bool zeroed, bool committed); /******************************************************************************/ +static chunk_hooks_t +chunk_hooks_get_locked(arena_t *arena) +{ + + return (arena->chunk_hooks); +} + +chunk_hooks_t +chunk_hooks_get(arena_t *arena) +{ + chunk_hooks_t chunk_hooks; + + malloc_mutex_lock(&arena->chunks_mtx); + chunk_hooks = chunk_hooks_get_locked(arena); + malloc_mutex_unlock(&arena->chunks_mtx); + + return (chunk_hooks); +} + +chunk_hooks_t +chunk_hooks_set(arena_t *arena, const chunk_hooks_t *chunk_hooks) +{ + chunk_hooks_t old_chunk_hooks; + + malloc_mutex_lock(&arena->chunks_mtx); + old_chunk_hooks = arena->chunk_hooks; + /* + * Copy each field atomically so that it is impossible for readers to + * see partially updated pointers. There are places where readers only + * need one hook function pointer (therefore no need to copy the + * entirety of arena->chunk_hooks), and stale reads do not affect + * correctness, so they perform unlocked reads. + */ +#define ATOMIC_COPY_HOOK(n) do { \ + union { \ + chunk_##n##_t **n; \ + void **v; \ + } u; \ + u.n = &arena->chunk_hooks.n; \ + atomic_write_p(u.v, chunk_hooks->n); \ +} while (0) + ATOMIC_COPY_HOOK(alloc); + ATOMIC_COPY_HOOK(dalloc); + ATOMIC_COPY_HOOK(commit); + ATOMIC_COPY_HOOK(decommit); + ATOMIC_COPY_HOOK(purge); + ATOMIC_COPY_HOOK(split); + ATOMIC_COPY_HOOK(merge); +#undef ATOMIC_COPY_HOOK + malloc_mutex_unlock(&arena->chunks_mtx); + + return (old_chunk_hooks); +} + +static void +chunk_hooks_assure_initialized_impl(arena_t *arena, chunk_hooks_t *chunk_hooks, + bool locked) +{ + static const chunk_hooks_t uninitialized_hooks = + CHUNK_HOOKS_INITIALIZER; + + if (memcmp(chunk_hooks, &uninitialized_hooks, sizeof(chunk_hooks_t)) == + 0) { + *chunk_hooks = locked ? chunk_hooks_get_locked(arena) : + chunk_hooks_get(arena); + } +} + +static void +chunk_hooks_assure_initialized_locked(arena_t *arena, + chunk_hooks_t *chunk_hooks) +{ + + chunk_hooks_assure_initialized_impl(arena, chunk_hooks, true); +} + +static void +chunk_hooks_assure_initialized(arena_t *arena, chunk_hooks_t *chunk_hooks) +{ + + chunk_hooks_assure_initialized_impl(arena, chunk_hooks, false); +} + +bool +chunk_register(const void *chunk, const extent_node_t *node) +{ + + assert(extent_node_addr_get(node) == chunk); + + if (rtree_set(&chunks_rtree, (uintptr_t)chunk, node)) + return (true); + if (config_prof && opt_prof) { + size_t size = extent_node_size_get(node); + size_t nadd = (size == 0) ? 1 : size / chunksize; + size_t cur = atomic_add_z(&curchunks, nadd); + size_t high = atomic_read_z(&highchunks); + while (cur > high && atomic_cas_z(&highchunks, high, cur)) { + /* + * Don't refresh cur, because it may have decreased + * since this thread lost the highchunks update race. + */ + high = atomic_read_z(&highchunks); + } + if (cur > high && prof_gdump_get_unlocked()) + prof_gdump(); + } + + return (false); +} + +void +chunk_deregister(const void *chunk, const extent_node_t *node) +{ + bool err; + + err = rtree_set(&chunks_rtree, (uintptr_t)chunk, NULL); + assert(!err); + if (config_prof && opt_prof) { + size_t size = extent_node_size_get(node); + size_t nsub = (size == 0) ? 1 : size / chunksize; + assert(atomic_read_z(&curchunks) >= nsub); + atomic_sub_z(&curchunks, nsub); + } +} + +/* + * Do first-best-fit chunk selection, i.e. select the lowest chunk that best + * fits. + */ +static extent_node_t * +chunk_first_best_fit(arena_t *arena, extent_tree_t *chunks_szad, + extent_tree_t *chunks_ad, size_t size) +{ + extent_node_t key; + + assert(size == CHUNK_CEILING(size)); + + extent_node_init(&key, arena, NULL, size, false, false); + return (extent_tree_szad_nsearch(chunks_szad, &key)); +} + static void * -chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size, - size_t alignment, bool base, bool *zero) +chunk_recycle(arena_t *arena, chunk_hooks_t *chunk_hooks, + extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache, + void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit, + bool dalloc_node) { void *ret; extent_node_t *node; - extent_node_t key; size_t alloc_size, leadsize, trailsize; - bool zeroed; + bool zeroed, committed; - if (base) { - /* - * This function may need to call base_node_{,de}alloc(), but - * the current chunk allocation request is on behalf of the - * base allocator. Avoid deadlock (and if that weren't an - * issue, potential for infinite recursion) by returning NULL. - */ - return (NULL); - } + assert(new_addr == NULL || alignment == chunksize); + /* + * Cached chunks use the node linkage embedded in their headers, in + * which case dalloc_node is true, and new_addr is non-NULL because + * we're operating on a specific chunk. + */ + assert(dalloc_node || new_addr != NULL); - alloc_size = size + alignment - chunksize; + alloc_size = CHUNK_CEILING(s2u(size + alignment - chunksize)); /* Beware size_t wrap-around. */ if (alloc_size < size) return (NULL); - key.addr = NULL; - key.size = alloc_size; - malloc_mutex_lock(&chunks_mtx); - node = extent_tree_szad_nsearch(chunks_szad, &key); - if (node == NULL) { - malloc_mutex_unlock(&chunks_mtx); + malloc_mutex_lock(&arena->chunks_mtx); + chunk_hooks_assure_initialized_locked(arena, chunk_hooks); + if (new_addr != NULL) { + extent_node_t key; + extent_node_init(&key, arena, new_addr, alloc_size, false, + false); + node = extent_tree_ad_search(chunks_ad, &key); + } else { + node = chunk_first_best_fit(arena, chunks_szad, chunks_ad, + alloc_size); + } + if (node == NULL || (new_addr != NULL && extent_node_size_get(node) < + size)) { + malloc_mutex_unlock(&arena->chunks_mtx); return (NULL); } - leadsize = ALIGNMENT_CEILING((uintptr_t)node->addr, alignment) - - (uintptr_t)node->addr; - assert(node->size >= leadsize + size); - trailsize = node->size - leadsize - size; - ret = (void *)((uintptr_t)node->addr + leadsize); - zeroed = node->zeroed; + leadsize = ALIGNMENT_CEILING((uintptr_t)extent_node_addr_get(node), + alignment) - (uintptr_t)extent_node_addr_get(node); + assert(new_addr == NULL || leadsize == 0); + assert(extent_node_size_get(node) >= leadsize + size); + trailsize = extent_node_size_get(node) - leadsize - size; + ret = (void *)((uintptr_t)extent_node_addr_get(node) + leadsize); + zeroed = extent_node_zeroed_get(node); if (zeroed) - *zero = true; + *zero = true; + committed = extent_node_committed_get(node); + if (committed) + *commit = true; + /* Split the lead. */ + if (leadsize != 0 && + chunk_hooks->split(extent_node_addr_get(node), + extent_node_size_get(node), leadsize, size, false, arena->ind)) { + malloc_mutex_unlock(&arena->chunks_mtx); + return (NULL); + } /* Remove node from the tree. */ extent_tree_szad_remove(chunks_szad, node); extent_tree_ad_remove(chunks_ad, node); + arena_chunk_cache_maybe_remove(arena, node, cache); if (leadsize != 0) { /* Insert the leading space as a smaller chunk. */ - node->size = leadsize; + extent_node_size_set(node, leadsize); extent_tree_szad_insert(chunks_szad, node); extent_tree_ad_insert(chunks_ad, node); + arena_chunk_cache_maybe_insert(arena, node, cache); node = NULL; } if (trailsize != 0) { + /* Split the trail. */ + if (chunk_hooks->split(ret, size + trailsize, size, + trailsize, false, arena->ind)) { + if (dalloc_node && node != NULL) + arena_node_dalloc(arena, node); + malloc_mutex_unlock(&arena->chunks_mtx); + chunk_record(arena, chunk_hooks, chunks_szad, chunks_ad, + cache, ret, size + trailsize, zeroed, committed); + return (NULL); + } /* Insert the trailing space as a smaller chunk. */ if (node == NULL) { - /* - * An additional node is required, but - * base_node_alloc() can cause a new base chunk to be - * allocated. Drop chunks_mtx in order to avoid - * deadlock, and if node allocation fails, deallocate - * the result before returning an error. - */ - malloc_mutex_unlock(&chunks_mtx); - node = base_node_alloc(); + node = arena_node_alloc(arena); if (node == NULL) { - chunk_dealloc(ret, size, true); + malloc_mutex_unlock(&arena->chunks_mtx); + chunk_record(arena, chunk_hooks, chunks_szad, + chunks_ad, cache, ret, size + trailsize, + zeroed, committed); return (NULL); } - malloc_mutex_lock(&chunks_mtx); } - node->addr = (void *)((uintptr_t)(ret) + size); - node->size = trailsize; - node->zeroed = zeroed; + extent_node_init(node, arena, (void *)((uintptr_t)(ret) + size), + trailsize, zeroed, committed); extent_tree_szad_insert(chunks_szad, node); extent_tree_ad_insert(chunks_ad, node); + arena_chunk_cache_maybe_insert(arena, node, cache); node = NULL; } - malloc_mutex_unlock(&chunks_mtx); + if (!committed && chunk_hooks->commit(ret, size, 0, size, arena->ind)) { + malloc_mutex_unlock(&arena->chunks_mtx); + chunk_record(arena, chunk_hooks, chunks_szad, chunks_ad, cache, + ret, size, zeroed, committed); + return (NULL); + } + malloc_mutex_unlock(&arena->chunks_mtx); - if (node != NULL) - base_node_dealloc(node); + assert(dalloc_node || node != NULL); + if (dalloc_node && node != NULL) + arena_node_dalloc(arena, node); if (*zero) { - if (zeroed == false) + if (!zeroed) memset(ret, 0, size); else if (config_debug) { size_t i; size_t *p = (size_t *)(uintptr_t)ret; - VALGRIND_MAKE_MEM_DEFINED(ret, size); + JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ret, size); for (i = 0; i < size / sizeof(size_t); i++) assert(p[i] == 0); } @@ -136,138 +322,214 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size, } /* - * If the caller specifies (*zero == false), it is still possible to receive - * zeroed memory, in which case *zero is toggled to true. arena_chunk_alloc() - * takes advantage of this to avoid demanding zeroed chunks, but taking - * advantage of them if they are returned. + * If the caller specifies (!*zero), it is still possible to receive zeroed + * memory, in which case *zero is toggled to true. arena_chunk_alloc() takes + * advantage of this to avoid demanding zeroed chunks, but taking advantage of + * them if they are returned. */ -void * -chunk_alloc(size_t size, size_t alignment, bool base, bool *zero, - dss_prec_t dss_prec) +static void * +chunk_alloc_core(arena_t *arena, void *new_addr, size_t size, size_t alignment, + bool *zero, bool *commit, dss_prec_t dss_prec) { void *ret; + chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; assert(size != 0); assert((size & chunksize_mask) == 0); assert(alignment != 0); assert((alignment & chunksize_mask) == 0); + /* Retained. */ + if ((ret = chunk_recycle(arena, &chunk_hooks, + &arena->chunks_szad_retained, &arena->chunks_ad_retained, false, + new_addr, size, alignment, zero, commit, true)) != NULL) + return (ret); + /* "primary" dss. */ - if (config_dss && dss_prec == dss_prec_primary) { - if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size, - alignment, base, zero)) != NULL) - goto label_return; - if ((ret = chunk_alloc_dss(size, alignment, zero)) != NULL) - goto label_return; - } - /* mmap. */ - if ((ret = chunk_recycle(&chunks_szad_mmap, &chunks_ad_mmap, size, - alignment, base, zero)) != NULL) - goto label_return; - if ((ret = chunk_alloc_mmap(size, alignment, zero)) != NULL) - goto label_return; + if (have_dss && dss_prec == dss_prec_primary && (ret = + chunk_alloc_dss(arena, new_addr, size, alignment, zero, commit)) != + NULL) + return (ret); + /* + * mmap. Requesting an address is not implemented for + * chunk_alloc_mmap(), so only call it if (new_addr == NULL). + */ + if (new_addr == NULL && (ret = chunk_alloc_mmap(size, alignment, zero, + commit)) != NULL) + return (ret); /* "secondary" dss. */ - if (config_dss && dss_prec == dss_prec_secondary) { - if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size, - alignment, base, zero)) != NULL) - goto label_return; - if ((ret = chunk_alloc_dss(size, alignment, zero)) != NULL) - goto label_return; - } + if (have_dss && dss_prec == dss_prec_secondary && (ret = + chunk_alloc_dss(arena, new_addr, size, alignment, zero, commit)) != + NULL) + return (ret); /* All strategies for allocation failed. */ - ret = NULL; -label_return: - if (ret != NULL) { - if (config_ivsalloc && base == false) { - if (rtree_set(chunks_rtree, (uintptr_t)ret, 1)) { - chunk_dealloc(ret, size, true); - return (NULL); - } - } - if (config_stats || config_prof) { - bool gdump; - malloc_mutex_lock(&chunks_mtx); - if (config_stats) - stats_chunks.nchunks += (size / chunksize); - stats_chunks.curchunks += (size / chunksize); - if (stats_chunks.curchunks > stats_chunks.highchunks) { - stats_chunks.highchunks = - stats_chunks.curchunks; - if (config_prof) - gdump = true; - } else if (config_prof) - gdump = false; - malloc_mutex_unlock(&chunks_mtx); - if (config_prof && opt_prof && opt_prof_gdump && gdump) - prof_gdump(); - } - if (config_valgrind) - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); - } - assert(CHUNK_ADDR2BASE(ret) == ret); + return (NULL); +} + +void * +chunk_alloc_base(size_t size) +{ + void *ret; + bool zero, commit; + + /* + * Directly call chunk_alloc_mmap() rather than chunk_alloc_core() + * because it's critical that chunk_alloc_base() return untouched + * demand-zeroed virtual memory. + */ + zero = true; + commit = true; + ret = chunk_alloc_mmap(size, chunksize, &zero, &commit); + if (ret == NULL) + return (NULL); + if (config_valgrind) + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + return (ret); } -static void -chunk_record(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, void *chunk, - size_t size) +void * +chunk_alloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr, + size_t size, size_t alignment, bool *zero, bool dalloc_node) { - bool unzeroed; - extent_node_t *xnode, *node, *prev, *xprev, key; + void *ret; + bool commit; + + assert(size != 0); + assert((size & chunksize_mask) == 0); + assert(alignment != 0); + assert((alignment & chunksize_mask) == 0); - unzeroed = pages_purge(chunk, size); - VALGRIND_MAKE_MEM_NOACCESS(chunk, size); + commit = true; + ret = chunk_recycle(arena, chunk_hooks, &arena->chunks_szad_cached, + &arena->chunks_ad_cached, true, new_addr, size, alignment, zero, + &commit, dalloc_node); + if (ret == NULL) + return (NULL); + assert(commit); + if (config_valgrind) + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + return (ret); +} + +static arena_t * +chunk_arena_get(unsigned arena_ind) +{ + arena_t *arena; + /* Dodge tsd for a0 in order to avoid bootstrapping issues. */ + arena = (arena_ind == 0) ? a0get() : arena_get(tsd_fetch(), arena_ind, + false, true); /* - * Allocate a node before acquiring chunks_mtx even though it might not - * be needed, because base_node_alloc() may cause a new base chunk to - * be allocated, which could cause deadlock if chunks_mtx were already - * held. + * The arena we're allocating on behalf of must have been initialized + * already. */ - xnode = base_node_alloc(); - /* Use xprev to implement conditional deferred deallocation of prev. */ - xprev = NULL; + assert(arena != NULL); + return (arena); +} + +static void * +chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero, + bool *commit, unsigned arena_ind) +{ + void *ret; + arena_t *arena; + + arena = chunk_arena_get(arena_ind); + ret = chunk_alloc_core(arena, new_addr, size, alignment, zero, + commit, arena->dss_prec); + if (ret == NULL) + return (NULL); + if (config_valgrind) + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size); - malloc_mutex_lock(&chunks_mtx); - key.addr = (void *)((uintptr_t)chunk + size); + return (ret); +} + +void * +chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr, + size_t size, size_t alignment, bool *zero, bool *commit) +{ + void *ret; + + chunk_hooks_assure_initialized(arena, chunk_hooks); + ret = chunk_hooks->alloc(new_addr, size, alignment, zero, commit, + arena->ind); + if (ret == NULL) + return (NULL); + if (config_valgrind && chunk_hooks->alloc != chunk_alloc_default) + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, chunksize); + return (ret); +} + +static void +chunk_record(arena_t *arena, chunk_hooks_t *chunk_hooks, + extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache, + void *chunk, size_t size, bool zeroed, bool committed) +{ + bool unzeroed; + extent_node_t *node, *prev; + extent_node_t key; + + assert(!cache || !zeroed); + unzeroed = cache || !zeroed; + JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(chunk, size); + + malloc_mutex_lock(&arena->chunks_mtx); + chunk_hooks_assure_initialized_locked(arena, chunk_hooks); + extent_node_init(&key, arena, (void *)((uintptr_t)chunk + size), 0, + false, false); node = extent_tree_ad_nsearch(chunks_ad, &key); /* Try to coalesce forward. */ - if (node != NULL && node->addr == key.addr) { + if (node != NULL && extent_node_addr_get(node) == + extent_node_addr_get(&key) && extent_node_committed_get(node) == + committed && !chunk_hooks->merge(chunk, size, + extent_node_addr_get(node), extent_node_size_get(node), false, + arena->ind)) { /* * Coalesce chunk with the following address range. This does * not change the position within chunks_ad, so only * remove/insert from/into chunks_szad. */ extent_tree_szad_remove(chunks_szad, node); - node->addr = chunk; - node->size += size; - node->zeroed = (node->zeroed && (unzeroed == false)); + arena_chunk_cache_maybe_remove(arena, node, cache); + extent_node_addr_set(node, chunk); + extent_node_size_set(node, size + extent_node_size_get(node)); + extent_node_zeroed_set(node, extent_node_zeroed_get(node) && + !unzeroed); extent_tree_szad_insert(chunks_szad, node); + arena_chunk_cache_maybe_insert(arena, node, cache); } else { /* Coalescing forward failed, so insert a new node. */ - if (xnode == NULL) { + node = arena_node_alloc(arena); + if (node == NULL) { /* - * base_node_alloc() failed, which is an exceedingly - * unlikely failure. Leak chunk; its pages have - * already been purged, so this is only a virtual - * memory leak. + * Node allocation failed, which is an exceedingly + * unlikely failure. Leak chunk after making sure its + * pages have already been purged, so that this is only + * a virtual memory leak. */ + if (cache) { + chunk_purge_wrapper(arena, chunk_hooks, chunk, + size, 0, size); + } goto label_return; } - node = xnode; - xnode = NULL; /* Prevent deallocation below. */ - node->addr = chunk; - node->size = size; - node->zeroed = (unzeroed == false); + extent_node_init(node, arena, chunk, size, !unzeroed, + committed); extent_tree_ad_insert(chunks_ad, node); extent_tree_szad_insert(chunks_szad, node); + arena_chunk_cache_maybe_insert(arena, node, cache); } /* Try to coalesce backward. */ prev = extent_tree_ad_prev(chunks_ad, node); - if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) == - chunk) { + if (prev != NULL && (void *)((uintptr_t)extent_node_addr_get(prev) + + extent_node_size_get(prev)) == chunk && + extent_node_committed_get(prev) == committed && + !chunk_hooks->merge(extent_node_addr_get(prev), + extent_node_size_get(prev), chunk, size, false, arena->ind)) { /* * Coalesce chunk with the previous address range. This does * not change the position within chunks_ad, so only @@ -275,44 +537,42 @@ chunk_record(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, void *chunk, */ extent_tree_szad_remove(chunks_szad, prev); extent_tree_ad_remove(chunks_ad, prev); - + arena_chunk_cache_maybe_remove(arena, prev, cache); extent_tree_szad_remove(chunks_szad, node); - node->addr = prev->addr; - node->size += prev->size; - node->zeroed = (node->zeroed && prev->zeroed); + arena_chunk_cache_maybe_remove(arena, node, cache); + extent_node_addr_set(node, extent_node_addr_get(prev)); + extent_node_size_set(node, extent_node_size_get(prev) + + extent_node_size_get(node)); + extent_node_zeroed_set(node, extent_node_zeroed_get(prev) && + extent_node_zeroed_get(node)); extent_tree_szad_insert(chunks_szad, node); + arena_chunk_cache_maybe_insert(arena, node, cache); - xprev = prev; + arena_node_dalloc(arena, prev); } label_return: - malloc_mutex_unlock(&chunks_mtx); - /* - * Deallocate xnode and/or xprev after unlocking chunks_mtx in order to - * avoid potential deadlock. - */ - if (xnode != NULL) - base_node_dealloc(xnode); - if (xprev != NULL) - base_node_dealloc(xprev); + malloc_mutex_unlock(&arena->chunks_mtx); } void -chunk_unmap(void *chunk, size_t size) +chunk_dalloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk, + size_t size, bool committed) { + assert(chunk != NULL); assert(CHUNK_ADDR2BASE(chunk) == chunk); assert(size != 0); assert((size & chunksize_mask) == 0); - if (config_dss && chunk_in_dss(chunk)) - chunk_record(&chunks_szad_dss, &chunks_ad_dss, chunk, size); - else if (chunk_dealloc_mmap(chunk, size)) - chunk_record(&chunks_szad_mmap, &chunks_ad_mmap, chunk, size); + chunk_record(arena, chunk_hooks, &arena->chunks_szad_cached, + &arena->chunks_ad_cached, true, chunk, size, false, committed); + arena_maybe_purge(arena); } void -chunk_dealloc(void *chunk, size_t size, bool unmap) +chunk_dalloc_arena(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk, + size_t size, bool zeroed, bool committed) { assert(chunk != NULL); @@ -320,22 +580,149 @@ chunk_dealloc(void *chunk, size_t size, bool unmap) assert(size != 0); assert((size & chunksize_mask) == 0); - if (config_ivsalloc) - rtree_set(chunks_rtree, (uintptr_t)chunk, 0); - if (config_stats || config_prof) { - malloc_mutex_lock(&chunks_mtx); - assert(stats_chunks.curchunks >= (size / chunksize)); - stats_chunks.curchunks -= (size / chunksize); - malloc_mutex_unlock(&chunks_mtx); + chunk_hooks_assure_initialized(arena, chunk_hooks); + /* Try to deallocate. */ + if (!chunk_hooks->dalloc(chunk, size, committed, arena->ind)) + return; + /* Try to decommit; purge if that fails. */ + if (committed) { + committed = chunk_hooks->decommit(chunk, size, 0, size, + arena->ind); } + zeroed = !committed || !chunk_hooks->purge(chunk, size, 0, size, + arena->ind); + chunk_record(arena, chunk_hooks, &arena->chunks_szad_retained, + &arena->chunks_ad_retained, false, chunk, size, zeroed, committed); +} + +static bool +chunk_dalloc_default(void *chunk, size_t size, bool committed, + unsigned arena_ind) +{ + + if (!have_dss || !chunk_in_dss(chunk)) + return (chunk_dalloc_mmap(chunk, size)); + return (true); +} + +void +chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk, + size_t size, bool committed) +{ + + chunk_hooks_assure_initialized(arena, chunk_hooks); + chunk_hooks->dalloc(chunk, size, committed, arena->ind); + if (config_valgrind && chunk_hooks->dalloc != chunk_dalloc_default) + JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(chunk, size); +} + +static bool +chunk_commit_default(void *chunk, size_t size, size_t offset, size_t length, + unsigned arena_ind) +{ + + return (pages_commit((void *)((uintptr_t)chunk + (uintptr_t)offset), + length)); +} + +static bool +chunk_decommit_default(void *chunk, size_t size, size_t offset, size_t length, + unsigned arena_ind) +{ - if (unmap) - chunk_unmap(chunk, size); + return (pages_decommit((void *)((uintptr_t)chunk + (uintptr_t)offset), + length)); +} + +bool +chunk_purge_arena(arena_t *arena, void *chunk, size_t offset, size_t length) +{ + + assert(chunk != NULL); + assert(CHUNK_ADDR2BASE(chunk) == chunk); + assert((offset & PAGE_MASK) == 0); + assert(length != 0); + assert((length & PAGE_MASK) == 0); + + return (pages_purge((void *)((uintptr_t)chunk + (uintptr_t)offset), + length)); +} + +static bool +chunk_purge_default(void *chunk, size_t size, size_t offset, size_t length, + unsigned arena_ind) +{ + + return (chunk_purge_arena(chunk_arena_get(arena_ind), chunk, offset, + length)); +} + +bool +chunk_purge_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk, + size_t size, size_t offset, size_t length) +{ + + chunk_hooks_assure_initialized(arena, chunk_hooks); + return (chunk_hooks->purge(chunk, size, offset, length, arena->ind)); +} + +static bool +chunk_split_default(void *chunk, size_t size, size_t size_a, size_t size_b, + bool committed, unsigned arena_ind) +{ + + if (!maps_coalesce) + return (true); + return (false); +} + +static bool +chunk_merge_default(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b, + bool committed, unsigned arena_ind) +{ + + if (!maps_coalesce) + return (true); + if (have_dss && chunk_in_dss(chunk_a) != chunk_in_dss(chunk_b)) + return (true); + + return (false); +} + +static rtree_node_elm_t * +chunks_rtree_node_alloc(size_t nelms) +{ + + return ((rtree_node_elm_t *)base_alloc(nelms * + sizeof(rtree_node_elm_t))); } bool chunk_boot(void) { +#ifdef _WIN32 + SYSTEM_INFO info; + GetSystemInfo(&info); + + /* + * Verify actual page size is equal to or an integral multiple of + * configured page size. + */ + if (info.dwPageSize & ((1U << LG_PAGE) - 1)) + return (true); + + /* + * Configure chunksize (if not set) to match granularity (usually 64K), + * so pages_map will always take fast path. + */ + if (!opt_lg_chunk) { + opt_lg_chunk = jemalloc_ffs((int)info.dwAllocationGranularity) + - 1; + } +#else + if (!opt_lg_chunk) + opt_lg_chunk = LG_CHUNK_DEFAULT; +#endif /* Set variables according to the value of opt_lg_chunk. */ chunksize = (ZU(1) << opt_lg_chunk); @@ -343,23 +730,11 @@ chunk_boot(void) chunksize_mask = chunksize - 1; chunk_npages = (chunksize >> LG_PAGE); - if (config_stats || config_prof) { - if (malloc_mutex_init(&chunks_mtx)) - return (true); - memset(&stats_chunks, 0, sizeof(chunk_stats_t)); - } - if (config_dss && chunk_dss_boot()) + if (have_dss && chunk_dss_boot()) + return (true); + if (rtree_new(&chunks_rtree, (ZU(1) << (LG_SIZEOF_PTR+3)) - + opt_lg_chunk, chunks_rtree_node_alloc, NULL)) return (true); - extent_tree_szad_new(&chunks_szad_mmap); - extent_tree_ad_new(&chunks_ad_mmap); - extent_tree_szad_new(&chunks_szad_dss); - extent_tree_ad_new(&chunks_ad_dss); - if (config_ivsalloc) { - chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) - - opt_lg_chunk, base_alloc, NULL); - if (chunks_rtree == NULL) - return (true); - } return (false); } @@ -368,9 +743,6 @@ void chunk_prefork(void) { - malloc_mutex_prefork(&chunks_mtx); - if (config_ivsalloc) - rtree_prefork(chunks_rtree); chunk_dss_prefork(); } @@ -379,9 +751,6 @@ chunk_postfork_parent(void) { chunk_dss_postfork_parent(); - if (config_ivsalloc) - rtree_postfork_parent(chunks_rtree); - malloc_mutex_postfork_parent(&chunks_mtx); } void @@ -389,7 +758,4 @@ chunk_postfork_child(void) { chunk_dss_postfork_child(); - if (config_ivsalloc) - rtree_postfork_child(chunks_rtree); - malloc_mutex_postfork_child(&chunks_mtx); } diff --git a/dep/jemalloc/src/chunk_dss.c b/dep/jemalloc/src/chunk_dss.c index 510bb8bee85..61fc9169619 100644 --- a/dep/jemalloc/src/chunk_dss.c +++ b/dep/jemalloc/src/chunk_dss.c @@ -32,7 +32,7 @@ static void * chunk_dss_sbrk(intptr_t increment) { -#ifdef JEMALLOC_HAVE_SBRK +#ifdef JEMALLOC_DSS return (sbrk(increment)); #else not_implemented(); @@ -45,7 +45,7 @@ chunk_dss_prec_get(void) { dss_prec_t ret; - if (config_dss == false) + if (!have_dss) return (dss_prec_disabled); malloc_mutex_lock(&dss_mtx); ret = dss_prec_default; @@ -57,8 +57,8 @@ bool chunk_dss_prec_set(dss_prec_t dss_prec) { - if (config_dss == false) - return (true); + if (!have_dss) + return (dss_prec != dss_prec_disabled); malloc_mutex_lock(&dss_mtx); dss_prec_default = dss_prec; malloc_mutex_unlock(&dss_mtx); @@ -66,11 +66,10 @@ chunk_dss_prec_set(dss_prec_t dss_prec) } void * -chunk_alloc_dss(size_t size, size_t alignment, bool *zero) +chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment, + bool *zero, bool *commit) { - void *ret; - - cassert(config_dss); + cassert(have_dss); assert(size > 0 && (size & chunksize_mask) == 0); assert(alignment > 0 && (alignment & chunksize_mask) == 0); @@ -83,9 +82,6 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero) malloc_mutex_lock(&dss_mtx); if (dss_prev != (void *)-1) { - size_t gap_size, cpad_size; - void *cpad, *dss_next; - intptr_t incr; /* * The loop is necessary to recover from races with other @@ -93,8 +89,20 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero) * malloc. */ do { + void *ret, *cpad, *dss_next; + size_t gap_size, cpad_size; + intptr_t incr; + /* Avoid an unnecessary system call. */ + if (new_addr != NULL && dss_max != new_addr) + break; + /* Get the current end of the DSS. */ dss_max = chunk_dss_sbrk(0); + + /* Make sure the earlier condition still holds. */ + if (new_addr != NULL && dss_max != new_addr) + break; + /* * Calculate how much padding is necessary to * chunk-align the end of the DSS. @@ -123,12 +131,20 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero) /* Success. */ dss_max = dss_next; malloc_mutex_unlock(&dss_mtx); - if (cpad_size != 0) - chunk_unmap(cpad, cpad_size); + if (cpad_size != 0) { + chunk_hooks_t chunk_hooks = + CHUNK_HOOKS_INITIALIZER; + chunk_dalloc_wrapper(arena, + &chunk_hooks, cpad, cpad_size, + true); + } if (*zero) { - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED( + ret, size); memset(ret, 0, size); } + if (!*commit) + *commit = pages_decommit(ret, size); return (ret); } } while (dss_prev != (void *)-1); @@ -143,7 +159,7 @@ chunk_in_dss(void *chunk) { bool ret; - cassert(config_dss); + cassert(have_dss); malloc_mutex_lock(&dss_mtx); if ((uintptr_t)chunk >= (uintptr_t)dss_base @@ -160,7 +176,7 @@ bool chunk_dss_boot(void) { - cassert(config_dss); + cassert(have_dss); if (malloc_mutex_init(&dss_mtx)) return (true); @@ -175,7 +191,7 @@ void chunk_dss_prefork(void) { - if (config_dss) + if (have_dss) malloc_mutex_prefork(&dss_mtx); } @@ -183,7 +199,7 @@ void chunk_dss_postfork_parent(void) { - if (config_dss) + if (have_dss) malloc_mutex_postfork_parent(&dss_mtx); } @@ -191,7 +207,7 @@ void chunk_dss_postfork_child(void) { - if (config_dss) + if (have_dss) malloc_mutex_postfork_child(&dss_mtx); } diff --git a/dep/jemalloc/src/chunk_mmap.c b/dep/jemalloc/src/chunk_mmap.c index 2056d793f05..b9ba74191a4 100644 --- a/dep/jemalloc/src/chunk_mmap.c +++ b/dep/jemalloc/src/chunk_mmap.c @@ -2,154 +2,20 @@ #include "jemalloc/internal/jemalloc_internal.h" /******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static void *pages_map(void *addr, size_t size); -static void pages_unmap(void *addr, size_t size); -static void *chunk_alloc_mmap_slow(size_t size, size_t alignment, - bool *zero); - -/******************************************************************************/ static void * -pages_map(void *addr, size_t size) +chunk_alloc_mmap_slow(size_t size, size_t alignment, bool *zero, bool *commit) { void *ret; - - assert(size != 0); - -#ifdef _WIN32 - /* - * If VirtualAlloc can't allocate at the given address when one is - * given, it fails and returns NULL. - */ - ret = VirtualAlloc(addr, size, MEM_COMMIT | MEM_RESERVE, - PAGE_READWRITE); -#else - /* - * We don't use MAP_FIXED here, because it can cause the *replacement* - * of existing mappings, and we only want to create new mappings. - */ - ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, - -1, 0); - assert(ret != NULL); - - if (ret == MAP_FAILED) - ret = NULL; - else if (addr != NULL && ret != addr) { - /* - * We succeeded in mapping memory, but not in the right place. - */ - if (munmap(ret, size) == -1) { - char buf[BUFERROR_BUF]; - - buferror(get_errno(), buf, sizeof(buf)); - malloc_printf("<jemalloc: Error in munmap(): %s\n", - buf); - if (opt_abort) - abort(); - } - ret = NULL; - } -#endif - assert(ret == NULL || (addr == NULL && ret != addr) - || (addr != NULL && ret == addr)); - return (ret); -} - -static void -pages_unmap(void *addr, size_t size) -{ - -#ifdef _WIN32 - if (VirtualFree(addr, 0, MEM_RELEASE) == 0) -#else - if (munmap(addr, size) == -1) -#endif - { - char buf[BUFERROR_BUF]; - - buferror(get_errno(), buf, sizeof(buf)); - malloc_printf("<jemalloc>: Error in " -#ifdef _WIN32 - "VirtualFree" -#else - "munmap" -#endif - "(): %s\n", buf); - if (opt_abort) - abort(); - } -} - -static void * -pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size) -{ - void *ret = (void *)((uintptr_t)addr + leadsize); - - assert(alloc_size >= leadsize + size); -#ifdef _WIN32 - { - void *new_addr; - - pages_unmap(addr, alloc_size); - new_addr = pages_map(ret, size); - if (new_addr == ret) - return (ret); - if (new_addr) - pages_unmap(new_addr, size); - return (NULL); - } -#else - { - size_t trailsize = alloc_size - leadsize - size; - - if (leadsize != 0) - pages_unmap(addr, leadsize); - if (trailsize != 0) - pages_unmap((void *)((uintptr_t)ret + size), trailsize); - return (ret); - } -#endif -} - -bool -pages_purge(void *addr, size_t length) -{ - bool unzeroed; - -#ifdef _WIN32 - VirtualAlloc(addr, length, MEM_RESET, PAGE_READWRITE); - unzeroed = true; -#else -# ifdef JEMALLOC_PURGE_MADVISE_DONTNEED -# define JEMALLOC_MADV_PURGE MADV_DONTNEED -# define JEMALLOC_MADV_ZEROS true -# elif defined(JEMALLOC_PURGE_MADVISE_FREE) -# define JEMALLOC_MADV_PURGE MADV_FREE -# define JEMALLOC_MADV_ZEROS false -# else -# error "No method defined for purging unused dirty pages." -# endif - int err = madvise(addr, length, JEMALLOC_MADV_PURGE); - unzeroed = (JEMALLOC_MADV_ZEROS == false || err != 0); -# undef JEMALLOC_MADV_PURGE -# undef JEMALLOC_MADV_ZEROS -#endif - return (unzeroed); -} - -static void * -chunk_alloc_mmap_slow(size_t size, size_t alignment, bool *zero) -{ - void *ret, *pages; - size_t alloc_size, leadsize; + size_t alloc_size; alloc_size = size + alignment - PAGE; /* Beware size_t wrap-around. */ if (alloc_size < size) return (NULL); do { + void *pages; + size_t leadsize; pages = pages_map(NULL, alloc_size); if (pages == NULL) return (NULL); @@ -160,11 +26,13 @@ chunk_alloc_mmap_slow(size_t size, size_t alignment, bool *zero) assert(ret != NULL); *zero = true; + if (!*commit) + *commit = pages_decommit(ret, size); return (ret); } void * -chunk_alloc_mmap(size_t size, size_t alignment, bool *zero) +chunk_alloc_mmap(size_t size, size_t alignment, bool *zero, bool *commit) { void *ret; size_t offset; @@ -191,20 +59,22 @@ chunk_alloc_mmap(size_t size, size_t alignment, bool *zero) offset = ALIGNMENT_ADDR2OFFSET(ret, alignment); if (offset != 0) { pages_unmap(ret, size); - return (chunk_alloc_mmap_slow(size, alignment, zero)); + return (chunk_alloc_mmap_slow(size, alignment, zero, commit)); } assert(ret != NULL); *zero = true; + if (!*commit) + *commit = pages_decommit(ret, size); return (ret); } bool -chunk_dealloc_mmap(void *chunk, size_t size) +chunk_dalloc_mmap(void *chunk, size_t size) { if (config_munmap) pages_unmap(chunk, size); - return (config_munmap == false); + return (!config_munmap); } diff --git a/dep/jemalloc/src/ckh.c b/dep/jemalloc/src/ckh.c index 04c52966193..53a1c1ef11d 100644 --- a/dep/jemalloc/src/ckh.c +++ b/dep/jemalloc/src/ckh.c @@ -40,8 +40,8 @@ /******************************************************************************/ /* Function prototypes for non-inline static functions. */ -static bool ckh_grow(ckh_t *ckh); -static void ckh_shrink(ckh_t *ckh); +static bool ckh_grow(tsd_t *tsd, ckh_t *ckh); +static void ckh_shrink(tsd_t *tsd, ckh_t *ckh); /******************************************************************************/ @@ -185,7 +185,7 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey, } bucket = tbucket; - if (ckh_try_bucket_insert(ckh, bucket, key, data) == false) + if (!ckh_try_bucket_insert(ckh, bucket, key, data)) return (false); } } @@ -201,12 +201,12 @@ ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata) /* Try to insert in primary bucket. */ bucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - 1); - if (ckh_try_bucket_insert(ckh, bucket, key, data) == false) + if (!ckh_try_bucket_insert(ckh, bucket, key, data)) return (false); /* Try to insert in secondary bucket. */ bucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1); - if (ckh_try_bucket_insert(ckh, bucket, key, data) == false) + if (!ckh_try_bucket_insert(ckh, bucket, key, data)) return (false); /* @@ -243,7 +243,7 @@ ckh_rebuild(ckh_t *ckh, ckhc_t *aTab) } static bool -ckh_grow(ckh_t *ckh) +ckh_grow(tsd_t *tsd, ckh_t *ckh) { bool ret; ckhc_t *tab, *ttab; @@ -270,7 +270,8 @@ ckh_grow(ckh_t *ckh) ret = true; goto label_return; } - tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); + tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, + true, NULL); if (tab == NULL) { ret = true; goto label_return; @@ -281,13 +282,13 @@ ckh_grow(ckh_t *ckh) tab = ttab; ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; - if (ckh_rebuild(ckh, tab) == false) { - idalloc(tab); + if (!ckh_rebuild(ckh, tab)) { + idalloctm(tsd, tab, tcache_get(tsd, false), true); break; } /* Rebuilding failed, so back out partially rebuilt table. */ - idalloc(ckh->tab); + idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true); ckh->tab = tab; ckh->lg_curbuckets = lg_prevbuckets; } @@ -298,7 +299,7 @@ label_return: } static void -ckh_shrink(ckh_t *ckh) +ckh_shrink(tsd_t *tsd, ckh_t *ckh) { ckhc_t *tab, *ttab; size_t lg_curcells, usize; @@ -313,7 +314,8 @@ ckh_shrink(ckh_t *ckh) usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE); if (usize == 0) return; - tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); + tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true, + NULL); if (tab == NULL) { /* * An OOM error isn't worth propagating, since it doesn't @@ -327,8 +329,8 @@ ckh_shrink(ckh_t *ckh) tab = ttab; ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; - if (ckh_rebuild(ckh, tab) == false) { - idalloc(tab); + if (!ckh_rebuild(ckh, tab)) { + idalloctm(tsd, tab, tcache_get(tsd, false), true); #ifdef CKH_COUNT ckh->nshrinks++; #endif @@ -336,7 +338,7 @@ ckh_shrink(ckh_t *ckh) } /* Rebuilding failed, so back out partially rebuilt table. */ - idalloc(ckh->tab); + idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true); ckh->tab = tab; ckh->lg_curbuckets = lg_prevbuckets; #ifdef CKH_COUNT @@ -345,7 +347,8 @@ ckh_shrink(ckh_t *ckh) } bool -ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) +ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, + ckh_keycomp_t *keycomp) { bool ret; size_t mincells, usize; @@ -366,10 +369,10 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) ckh->count = 0; /* - * Find the minimum power of 2 that is large enough to fit aBaseCount + * Find the minimum power of 2 that is large enough to fit minitems * entries. We are using (2+,2) cuckoo hashing, which has an expected * maximum load factor of at least ~0.86, so 0.75 is a conservative load - * factor that will typically allow 2^aLgMinItems to fit without ever + * factor that will typically allow mincells items to fit without ever * growing the table. */ assert(LG_CKH_BUCKET_CELLS > 0); @@ -388,7 +391,8 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) ret = true; goto label_return; } - ckh->tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); + ckh->tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true, + NULL); if (ckh->tab == NULL) { ret = true; goto label_return; @@ -400,16 +404,16 @@ label_return: } void -ckh_delete(ckh_t *ckh) +ckh_delete(tsd_t *tsd, ckh_t *ckh) { assert(ckh != NULL); #ifdef CKH_VERBOSE malloc_printf( - "%s(%p): ngrows: %"PRIu64", nshrinks: %"PRIu64"," - " nshrinkfails: %"PRIu64", ninserts: %"PRIu64"," - " nrelocs: %"PRIu64"\n", __func__, ckh, + "%s(%p): ngrows: %"FMTu64", nshrinks: %"FMTu64"," + " nshrinkfails: %"FMTu64", ninserts: %"FMTu64"," + " nrelocs: %"FMTu64"\n", __func__, ckh, (unsigned long long)ckh->ngrows, (unsigned long long)ckh->nshrinks, (unsigned long long)ckh->nshrinkfails, @@ -417,7 +421,7 @@ ckh_delete(ckh_t *ckh) (unsigned long long)ckh->nrelocs); #endif - idalloc(ckh->tab); + idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true); if (config_debug) memset(ckh, 0x5a, sizeof(ckh_t)); } @@ -452,7 +456,7 @@ ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data) } bool -ckh_insert(ckh_t *ckh, const void *key, const void *data) +ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data) { bool ret; @@ -464,7 +468,7 @@ ckh_insert(ckh_t *ckh, const void *key, const void *data) #endif while (ckh_try_insert(ckh, &key, &data)) { - if (ckh_grow(ckh)) { + if (ckh_grow(tsd, ckh)) { ret = true; goto label_return; } @@ -476,7 +480,8 @@ label_return: } bool -ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data) +ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key, + void **data) { size_t cell; @@ -497,7 +502,7 @@ ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data) + LG_CKH_BUCKET_CELLS - 2)) && ckh->lg_curbuckets > ckh->lg_minbuckets) { /* Ignore error due to OOM. */ - ckh_shrink(ckh); + ckh_shrink(tsd, ckh); } return (false); diff --git a/dep/jemalloc/src/ctl.c b/dep/jemalloc/src/ctl.c index cc2c5aef570..3de8e602d11 100644 --- a/dep/jemalloc/src/ctl.c +++ b/dep/jemalloc/src/ctl.c @@ -7,7 +7,6 @@ /* * ctl_mtx protects the following: * - ctl_stats.* - * - opt_prof_active */ static malloc_mutex_t ctl_mtx; static bool ctl_initialized; @@ -17,14 +16,14 @@ static ctl_stats_t ctl_stats; /******************************************************************************/ /* Helpers for named and indexed nodes. */ -static inline const ctl_named_node_t * +JEMALLOC_INLINE_C const ctl_named_node_t * ctl_named_node(const ctl_node_t *node) { return ((node->named) ? (const ctl_named_node_t *)node : NULL); } -static inline const ctl_named_node_t * +JEMALLOC_INLINE_C const ctl_named_node_t * ctl_named_children(const ctl_named_node_t *node, int index) { const ctl_named_node_t *children = ctl_named_node(node->children); @@ -32,12 +31,11 @@ ctl_named_children(const ctl_named_node_t *node, int index) return (children ? &children[index] : NULL); } -static inline const ctl_indexed_node_t * +JEMALLOC_INLINE_C const ctl_indexed_node_t * ctl_indexed_node(const ctl_node_t *node) { - return ((node->named == false) ? (const ctl_indexed_node_t *)node : - NULL); + return (!node->named ? (const ctl_indexed_node_t *)node : NULL); } /******************************************************************************/ @@ -68,16 +66,17 @@ CTL_PROTO(version) CTL_PROTO(epoch) CTL_PROTO(thread_tcache_enabled) CTL_PROTO(thread_tcache_flush) +CTL_PROTO(thread_prof_name) +CTL_PROTO(thread_prof_active) CTL_PROTO(thread_arena) CTL_PROTO(thread_allocated) CTL_PROTO(thread_allocatedp) CTL_PROTO(thread_deallocated) CTL_PROTO(thread_deallocatedp) +CTL_PROTO(config_cache_oblivious) CTL_PROTO(config_debug) -CTL_PROTO(config_dss) CTL_PROTO(config_fill) CTL_PROTO(config_lazy_lock) -CTL_PROTO(config_mremap) CTL_PROTO(config_munmap) CTL_PROTO(config_prof) CTL_PROTO(config_prof_libgcc) @@ -99,22 +98,27 @@ CTL_PROTO(opt_zero) CTL_PROTO(opt_quarantine) CTL_PROTO(opt_redzone) CTL_PROTO(opt_utrace) -CTL_PROTO(opt_valgrind) CTL_PROTO(opt_xmalloc) CTL_PROTO(opt_tcache) CTL_PROTO(opt_lg_tcache_max) CTL_PROTO(opt_prof) CTL_PROTO(opt_prof_prefix) CTL_PROTO(opt_prof_active) +CTL_PROTO(opt_prof_thread_active_init) CTL_PROTO(opt_lg_prof_sample) CTL_PROTO(opt_lg_prof_interval) CTL_PROTO(opt_prof_gdump) CTL_PROTO(opt_prof_final) CTL_PROTO(opt_prof_leak) CTL_PROTO(opt_prof_accum) +CTL_PROTO(tcache_create) +CTL_PROTO(tcache_flush) +CTL_PROTO(tcache_destroy) CTL_PROTO(arena_i_purge) static void arena_purge(unsigned arena_ind); CTL_PROTO(arena_i_dss) +CTL_PROTO(arena_i_lg_dirty_mult) +CTL_PROTO(arena_i_chunk_hooks) INDEX_PROTO(arena_i) CTL_PROTO(arenas_bin_i_size) CTL_PROTO(arenas_bin_i_nregs) @@ -122,25 +126,26 @@ CTL_PROTO(arenas_bin_i_run_size) INDEX_PROTO(arenas_bin_i) CTL_PROTO(arenas_lrun_i_size) INDEX_PROTO(arenas_lrun_i) +CTL_PROTO(arenas_hchunk_i_size) +INDEX_PROTO(arenas_hchunk_i) CTL_PROTO(arenas_narenas) CTL_PROTO(arenas_initialized) +CTL_PROTO(arenas_lg_dirty_mult) CTL_PROTO(arenas_quantum) CTL_PROTO(arenas_page) CTL_PROTO(arenas_tcache_max) CTL_PROTO(arenas_nbins) CTL_PROTO(arenas_nhbins) CTL_PROTO(arenas_nlruns) -CTL_PROTO(arenas_purge) +CTL_PROTO(arenas_nhchunks) CTL_PROTO(arenas_extend) +CTL_PROTO(prof_thread_active_init) CTL_PROTO(prof_active) CTL_PROTO(prof_dump) +CTL_PROTO(prof_gdump) +CTL_PROTO(prof_reset) CTL_PROTO(prof_interval) -CTL_PROTO(stats_chunks_current) -CTL_PROTO(stats_chunks_total) -CTL_PROTO(stats_chunks_high) -CTL_PROTO(stats_huge_allocated) -CTL_PROTO(stats_huge_nmalloc) -CTL_PROTO(stats_huge_ndalloc) +CTL_PROTO(lg_prof_sample) CTL_PROTO(stats_arenas_i_small_allocated) CTL_PROTO(stats_arenas_i_small_nmalloc) CTL_PROTO(stats_arenas_i_small_ndalloc) @@ -149,10 +154,14 @@ CTL_PROTO(stats_arenas_i_large_allocated) CTL_PROTO(stats_arenas_i_large_nmalloc) CTL_PROTO(stats_arenas_i_large_ndalloc) CTL_PROTO(stats_arenas_i_large_nrequests) -CTL_PROTO(stats_arenas_i_bins_j_allocated) +CTL_PROTO(stats_arenas_i_huge_allocated) +CTL_PROTO(stats_arenas_i_huge_nmalloc) +CTL_PROTO(stats_arenas_i_huge_ndalloc) +CTL_PROTO(stats_arenas_i_huge_nrequests) CTL_PROTO(stats_arenas_i_bins_j_nmalloc) CTL_PROTO(stats_arenas_i_bins_j_ndalloc) CTL_PROTO(stats_arenas_i_bins_j_nrequests) +CTL_PROTO(stats_arenas_i_bins_j_curregs) CTL_PROTO(stats_arenas_i_bins_j_nfills) CTL_PROTO(stats_arenas_i_bins_j_nflushes) CTL_PROTO(stats_arenas_i_bins_j_nruns) @@ -164,18 +173,28 @@ CTL_PROTO(stats_arenas_i_lruns_j_ndalloc) CTL_PROTO(stats_arenas_i_lruns_j_nrequests) CTL_PROTO(stats_arenas_i_lruns_j_curruns) INDEX_PROTO(stats_arenas_i_lruns_j) +CTL_PROTO(stats_arenas_i_hchunks_j_nmalloc) +CTL_PROTO(stats_arenas_i_hchunks_j_ndalloc) +CTL_PROTO(stats_arenas_i_hchunks_j_nrequests) +CTL_PROTO(stats_arenas_i_hchunks_j_curhchunks) +INDEX_PROTO(stats_arenas_i_hchunks_j) CTL_PROTO(stats_arenas_i_nthreads) CTL_PROTO(stats_arenas_i_dss) +CTL_PROTO(stats_arenas_i_lg_dirty_mult) CTL_PROTO(stats_arenas_i_pactive) CTL_PROTO(stats_arenas_i_pdirty) CTL_PROTO(stats_arenas_i_mapped) CTL_PROTO(stats_arenas_i_npurge) CTL_PROTO(stats_arenas_i_nmadvise) CTL_PROTO(stats_arenas_i_purged) +CTL_PROTO(stats_arenas_i_metadata_mapped) +CTL_PROTO(stats_arenas_i_metadata_allocated) INDEX_PROTO(stats_arenas_i) CTL_PROTO(stats_cactive) CTL_PROTO(stats_allocated) CTL_PROTO(stats_active) +CTL_PROTO(stats_metadata) +CTL_PROTO(stats_resident) CTL_PROTO(stats_mapped) /******************************************************************************/ @@ -197,71 +216,84 @@ CTL_PROTO(stats_mapped) */ #define INDEX(i) {false}, i##_index -static const ctl_named_node_t tcache_node[] = { +static const ctl_named_node_t thread_tcache_node[] = { {NAME("enabled"), CTL(thread_tcache_enabled)}, {NAME("flush"), CTL(thread_tcache_flush)} }; +static const ctl_named_node_t thread_prof_node[] = { + {NAME("name"), CTL(thread_prof_name)}, + {NAME("active"), CTL(thread_prof_active)} +}; + static const ctl_named_node_t thread_node[] = { {NAME("arena"), CTL(thread_arena)}, {NAME("allocated"), CTL(thread_allocated)}, {NAME("allocatedp"), CTL(thread_allocatedp)}, {NAME("deallocated"), CTL(thread_deallocated)}, {NAME("deallocatedp"), CTL(thread_deallocatedp)}, - {NAME("tcache"), CHILD(named, tcache)} + {NAME("tcache"), CHILD(named, thread_tcache)}, + {NAME("prof"), CHILD(named, thread_prof)} }; static const ctl_named_node_t config_node[] = { - {NAME("debug"), CTL(config_debug)}, - {NAME("dss"), CTL(config_dss)}, - {NAME("fill"), CTL(config_fill)}, - {NAME("lazy_lock"), CTL(config_lazy_lock)}, - {NAME("mremap"), CTL(config_mremap)}, - {NAME("munmap"), CTL(config_munmap)}, - {NAME("prof"), CTL(config_prof)}, - {NAME("prof_libgcc"), CTL(config_prof_libgcc)}, - {NAME("prof_libunwind"), CTL(config_prof_libunwind)}, - {NAME("stats"), CTL(config_stats)}, - {NAME("tcache"), CTL(config_tcache)}, - {NAME("tls"), CTL(config_tls)}, - {NAME("utrace"), CTL(config_utrace)}, - {NAME("valgrind"), CTL(config_valgrind)}, - {NAME("xmalloc"), CTL(config_xmalloc)} + {NAME("cache_oblivious"), CTL(config_cache_oblivious)}, + {NAME("debug"), CTL(config_debug)}, + {NAME("fill"), CTL(config_fill)}, + {NAME("lazy_lock"), CTL(config_lazy_lock)}, + {NAME("munmap"), CTL(config_munmap)}, + {NAME("prof"), CTL(config_prof)}, + {NAME("prof_libgcc"), CTL(config_prof_libgcc)}, + {NAME("prof_libunwind"), CTL(config_prof_libunwind)}, + {NAME("stats"), CTL(config_stats)}, + {NAME("tcache"), CTL(config_tcache)}, + {NAME("tls"), CTL(config_tls)}, + {NAME("utrace"), CTL(config_utrace)}, + {NAME("valgrind"), CTL(config_valgrind)}, + {NAME("xmalloc"), CTL(config_xmalloc)} }; static const ctl_named_node_t opt_node[] = { - {NAME("abort"), CTL(opt_abort)}, - {NAME("dss"), CTL(opt_dss)}, - {NAME("lg_chunk"), CTL(opt_lg_chunk)}, - {NAME("narenas"), CTL(opt_narenas)}, - {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)}, - {NAME("stats_print"), CTL(opt_stats_print)}, - {NAME("junk"), CTL(opt_junk)}, - {NAME("zero"), CTL(opt_zero)}, - {NAME("quarantine"), CTL(opt_quarantine)}, - {NAME("redzone"), CTL(opt_redzone)}, - {NAME("utrace"), CTL(opt_utrace)}, - {NAME("valgrind"), CTL(opt_valgrind)}, - {NAME("xmalloc"), CTL(opt_xmalloc)}, - {NAME("tcache"), CTL(opt_tcache)}, - {NAME("lg_tcache_max"), CTL(opt_lg_tcache_max)}, - {NAME("prof"), CTL(opt_prof)}, - {NAME("prof_prefix"), CTL(opt_prof_prefix)}, - {NAME("prof_active"), CTL(opt_prof_active)}, - {NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)}, - {NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)}, - {NAME("prof_gdump"), CTL(opt_prof_gdump)}, - {NAME("prof_final"), CTL(opt_prof_final)}, - {NAME("prof_leak"), CTL(opt_prof_leak)}, - {NAME("prof_accum"), CTL(opt_prof_accum)} + {NAME("abort"), CTL(opt_abort)}, + {NAME("dss"), CTL(opt_dss)}, + {NAME("lg_chunk"), CTL(opt_lg_chunk)}, + {NAME("narenas"), CTL(opt_narenas)}, + {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)}, + {NAME("stats_print"), CTL(opt_stats_print)}, + {NAME("junk"), CTL(opt_junk)}, + {NAME("zero"), CTL(opt_zero)}, + {NAME("quarantine"), CTL(opt_quarantine)}, + {NAME("redzone"), CTL(opt_redzone)}, + {NAME("utrace"), CTL(opt_utrace)}, + {NAME("xmalloc"), CTL(opt_xmalloc)}, + {NAME("tcache"), CTL(opt_tcache)}, + {NAME("lg_tcache_max"), CTL(opt_lg_tcache_max)}, + {NAME("prof"), CTL(opt_prof)}, + {NAME("prof_prefix"), CTL(opt_prof_prefix)}, + {NAME("prof_active"), CTL(opt_prof_active)}, + {NAME("prof_thread_active_init"), CTL(opt_prof_thread_active_init)}, + {NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)}, + {NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)}, + {NAME("prof_gdump"), CTL(opt_prof_gdump)}, + {NAME("prof_final"), CTL(opt_prof_final)}, + {NAME("prof_leak"), CTL(opt_prof_leak)}, + {NAME("prof_accum"), CTL(opt_prof_accum)} +}; + +static const ctl_named_node_t tcache_node[] = { + {NAME("create"), CTL(tcache_create)}, + {NAME("flush"), CTL(tcache_flush)}, + {NAME("destroy"), CTL(tcache_destroy)} }; static const ctl_named_node_t arena_i_node[] = { - {NAME("purge"), CTL(arena_i_purge)}, - {NAME("dss"), CTL(arena_i_dss)} + {NAME("purge"), CTL(arena_i_purge)}, + {NAME("dss"), CTL(arena_i_dss)}, + {NAME("lg_dirty_mult"), CTL(arena_i_lg_dirty_mult)}, + {NAME("chunk_hooks"), CTL(arena_i_chunk_hooks)} }; static const ctl_named_node_t super_arena_i_node[] = { - {NAME(""), CHILD(named, arena_i)} + {NAME(""), CHILD(named, arena_i)} }; static const ctl_indexed_node_t arena_node[] = { @@ -269,12 +301,12 @@ static const ctl_indexed_node_t arena_node[] = { }; static const ctl_named_node_t arenas_bin_i_node[] = { - {NAME("size"), CTL(arenas_bin_i_size)}, - {NAME("nregs"), CTL(arenas_bin_i_nregs)}, - {NAME("run_size"), CTL(arenas_bin_i_run_size)} + {NAME("size"), CTL(arenas_bin_i_size)}, + {NAME("nregs"), CTL(arenas_bin_i_nregs)}, + {NAME("run_size"), CTL(arenas_bin_i_run_size)} }; static const ctl_named_node_t super_arenas_bin_i_node[] = { - {NAME(""), CHILD(named, arenas_bin_i)} + {NAME(""), CHILD(named, arenas_bin_i)} }; static const ctl_indexed_node_t arenas_bin_node[] = { @@ -282,76 +314,93 @@ static const ctl_indexed_node_t arenas_bin_node[] = { }; static const ctl_named_node_t arenas_lrun_i_node[] = { - {NAME("size"), CTL(arenas_lrun_i_size)} + {NAME("size"), CTL(arenas_lrun_i_size)} }; static const ctl_named_node_t super_arenas_lrun_i_node[] = { - {NAME(""), CHILD(named, arenas_lrun_i)} + {NAME(""), CHILD(named, arenas_lrun_i)} }; static const ctl_indexed_node_t arenas_lrun_node[] = { {INDEX(arenas_lrun_i)} }; +static const ctl_named_node_t arenas_hchunk_i_node[] = { + {NAME("size"), CTL(arenas_hchunk_i_size)} +}; +static const ctl_named_node_t super_arenas_hchunk_i_node[] = { + {NAME(""), CHILD(named, arenas_hchunk_i)} +}; + +static const ctl_indexed_node_t arenas_hchunk_node[] = { + {INDEX(arenas_hchunk_i)} +}; + static const ctl_named_node_t arenas_node[] = { - {NAME("narenas"), CTL(arenas_narenas)}, - {NAME("initialized"), CTL(arenas_initialized)}, - {NAME("quantum"), CTL(arenas_quantum)}, - {NAME("page"), CTL(arenas_page)}, - {NAME("tcache_max"), CTL(arenas_tcache_max)}, - {NAME("nbins"), CTL(arenas_nbins)}, - {NAME("nhbins"), CTL(arenas_nhbins)}, - {NAME("bin"), CHILD(indexed, arenas_bin)}, - {NAME("nlruns"), CTL(arenas_nlruns)}, - {NAME("lrun"), CHILD(indexed, arenas_lrun)}, - {NAME("purge"), CTL(arenas_purge)}, - {NAME("extend"), CTL(arenas_extend)} + {NAME("narenas"), CTL(arenas_narenas)}, + {NAME("initialized"), CTL(arenas_initialized)}, + {NAME("lg_dirty_mult"), CTL(arenas_lg_dirty_mult)}, + {NAME("quantum"), CTL(arenas_quantum)}, + {NAME("page"), CTL(arenas_page)}, + {NAME("tcache_max"), CTL(arenas_tcache_max)}, + {NAME("nbins"), CTL(arenas_nbins)}, + {NAME("nhbins"), CTL(arenas_nhbins)}, + {NAME("bin"), CHILD(indexed, arenas_bin)}, + {NAME("nlruns"), CTL(arenas_nlruns)}, + {NAME("lrun"), CHILD(indexed, arenas_lrun)}, + {NAME("nhchunks"), CTL(arenas_nhchunks)}, + {NAME("hchunk"), CHILD(indexed, arenas_hchunk)}, + {NAME("extend"), CTL(arenas_extend)} }; static const ctl_named_node_t prof_node[] = { + {NAME("thread_active_init"), CTL(prof_thread_active_init)}, {NAME("active"), CTL(prof_active)}, {NAME("dump"), CTL(prof_dump)}, - {NAME("interval"), CTL(prof_interval)} + {NAME("gdump"), CTL(prof_gdump)}, + {NAME("reset"), CTL(prof_reset)}, + {NAME("interval"), CTL(prof_interval)}, + {NAME("lg_sample"), CTL(lg_prof_sample)} }; -static const ctl_named_node_t stats_chunks_node[] = { - {NAME("current"), CTL(stats_chunks_current)}, - {NAME("total"), CTL(stats_chunks_total)}, - {NAME("high"), CTL(stats_chunks_high)} -}; - -static const ctl_named_node_t stats_huge_node[] = { - {NAME("allocated"), CTL(stats_huge_allocated)}, - {NAME("nmalloc"), CTL(stats_huge_nmalloc)}, - {NAME("ndalloc"), CTL(stats_huge_ndalloc)} +static const ctl_named_node_t stats_arenas_i_metadata_node[] = { + {NAME("mapped"), CTL(stats_arenas_i_metadata_mapped)}, + {NAME("allocated"), CTL(stats_arenas_i_metadata_allocated)} }; static const ctl_named_node_t stats_arenas_i_small_node[] = { - {NAME("allocated"), CTL(stats_arenas_i_small_allocated)}, - {NAME("nmalloc"), CTL(stats_arenas_i_small_nmalloc)}, - {NAME("ndalloc"), CTL(stats_arenas_i_small_ndalloc)}, - {NAME("nrequests"), CTL(stats_arenas_i_small_nrequests)} + {NAME("allocated"), CTL(stats_arenas_i_small_allocated)}, + {NAME("nmalloc"), CTL(stats_arenas_i_small_nmalloc)}, + {NAME("ndalloc"), CTL(stats_arenas_i_small_ndalloc)}, + {NAME("nrequests"), CTL(stats_arenas_i_small_nrequests)} }; static const ctl_named_node_t stats_arenas_i_large_node[] = { - {NAME("allocated"), CTL(stats_arenas_i_large_allocated)}, - {NAME("nmalloc"), CTL(stats_arenas_i_large_nmalloc)}, - {NAME("ndalloc"), CTL(stats_arenas_i_large_ndalloc)}, - {NAME("nrequests"), CTL(stats_arenas_i_large_nrequests)} + {NAME("allocated"), CTL(stats_arenas_i_large_allocated)}, + {NAME("nmalloc"), CTL(stats_arenas_i_large_nmalloc)}, + {NAME("ndalloc"), CTL(stats_arenas_i_large_ndalloc)}, + {NAME("nrequests"), CTL(stats_arenas_i_large_nrequests)} +}; + +static const ctl_named_node_t stats_arenas_i_huge_node[] = { + {NAME("allocated"), CTL(stats_arenas_i_huge_allocated)}, + {NAME("nmalloc"), CTL(stats_arenas_i_huge_nmalloc)}, + {NAME("ndalloc"), CTL(stats_arenas_i_huge_ndalloc)}, + {NAME("nrequests"), CTL(stats_arenas_i_huge_nrequests)} }; static const ctl_named_node_t stats_arenas_i_bins_j_node[] = { - {NAME("allocated"), CTL(stats_arenas_i_bins_j_allocated)}, - {NAME("nmalloc"), CTL(stats_arenas_i_bins_j_nmalloc)}, - {NAME("ndalloc"), CTL(stats_arenas_i_bins_j_ndalloc)}, - {NAME("nrequests"), CTL(stats_arenas_i_bins_j_nrequests)}, - {NAME("nfills"), CTL(stats_arenas_i_bins_j_nfills)}, - {NAME("nflushes"), CTL(stats_arenas_i_bins_j_nflushes)}, - {NAME("nruns"), CTL(stats_arenas_i_bins_j_nruns)}, - {NAME("nreruns"), CTL(stats_arenas_i_bins_j_nreruns)}, - {NAME("curruns"), CTL(stats_arenas_i_bins_j_curruns)} + {NAME("nmalloc"), CTL(stats_arenas_i_bins_j_nmalloc)}, + {NAME("ndalloc"), CTL(stats_arenas_i_bins_j_ndalloc)}, + {NAME("nrequests"), CTL(stats_arenas_i_bins_j_nrequests)}, + {NAME("curregs"), CTL(stats_arenas_i_bins_j_curregs)}, + {NAME("nfills"), CTL(stats_arenas_i_bins_j_nfills)}, + {NAME("nflushes"), CTL(stats_arenas_i_bins_j_nflushes)}, + {NAME("nruns"), CTL(stats_arenas_i_bins_j_nruns)}, + {NAME("nreruns"), CTL(stats_arenas_i_bins_j_nreruns)}, + {NAME("curruns"), CTL(stats_arenas_i_bins_j_curruns)} }; static const ctl_named_node_t super_stats_arenas_i_bins_j_node[] = { - {NAME(""), CHILD(named, stats_arenas_i_bins_j)} + {NAME(""), CHILD(named, stats_arenas_i_bins_j)} }; static const ctl_indexed_node_t stats_arenas_i_bins_node[] = { @@ -359,35 +408,53 @@ static const ctl_indexed_node_t stats_arenas_i_bins_node[] = { }; static const ctl_named_node_t stats_arenas_i_lruns_j_node[] = { - {NAME("nmalloc"), CTL(stats_arenas_i_lruns_j_nmalloc)}, - {NAME("ndalloc"), CTL(stats_arenas_i_lruns_j_ndalloc)}, - {NAME("nrequests"), CTL(stats_arenas_i_lruns_j_nrequests)}, - {NAME("curruns"), CTL(stats_arenas_i_lruns_j_curruns)} + {NAME("nmalloc"), CTL(stats_arenas_i_lruns_j_nmalloc)}, + {NAME("ndalloc"), CTL(stats_arenas_i_lruns_j_ndalloc)}, + {NAME("nrequests"), CTL(stats_arenas_i_lruns_j_nrequests)}, + {NAME("curruns"), CTL(stats_arenas_i_lruns_j_curruns)} }; static const ctl_named_node_t super_stats_arenas_i_lruns_j_node[] = { - {NAME(""), CHILD(named, stats_arenas_i_lruns_j)} + {NAME(""), CHILD(named, stats_arenas_i_lruns_j)} }; static const ctl_indexed_node_t stats_arenas_i_lruns_node[] = { {INDEX(stats_arenas_i_lruns_j)} }; +static const ctl_named_node_t stats_arenas_i_hchunks_j_node[] = { + {NAME("nmalloc"), CTL(stats_arenas_i_hchunks_j_nmalloc)}, + {NAME("ndalloc"), CTL(stats_arenas_i_hchunks_j_ndalloc)}, + {NAME("nrequests"), CTL(stats_arenas_i_hchunks_j_nrequests)}, + {NAME("curhchunks"), CTL(stats_arenas_i_hchunks_j_curhchunks)} +}; +static const ctl_named_node_t super_stats_arenas_i_hchunks_j_node[] = { + {NAME(""), CHILD(named, stats_arenas_i_hchunks_j)} +}; + +static const ctl_indexed_node_t stats_arenas_i_hchunks_node[] = { + {INDEX(stats_arenas_i_hchunks_j)} +}; + static const ctl_named_node_t stats_arenas_i_node[] = { - {NAME("nthreads"), CTL(stats_arenas_i_nthreads)}, - {NAME("dss"), CTL(stats_arenas_i_dss)}, - {NAME("pactive"), CTL(stats_arenas_i_pactive)}, - {NAME("pdirty"), CTL(stats_arenas_i_pdirty)}, - {NAME("mapped"), CTL(stats_arenas_i_mapped)}, - {NAME("npurge"), CTL(stats_arenas_i_npurge)}, - {NAME("nmadvise"), CTL(stats_arenas_i_nmadvise)}, - {NAME("purged"), CTL(stats_arenas_i_purged)}, - {NAME("small"), CHILD(named, stats_arenas_i_small)}, - {NAME("large"), CHILD(named, stats_arenas_i_large)}, - {NAME("bins"), CHILD(indexed, stats_arenas_i_bins)}, - {NAME("lruns"), CHILD(indexed, stats_arenas_i_lruns)} + {NAME("nthreads"), CTL(stats_arenas_i_nthreads)}, + {NAME("dss"), CTL(stats_arenas_i_dss)}, + {NAME("lg_dirty_mult"), CTL(stats_arenas_i_lg_dirty_mult)}, + {NAME("pactive"), CTL(stats_arenas_i_pactive)}, + {NAME("pdirty"), CTL(stats_arenas_i_pdirty)}, + {NAME("mapped"), CTL(stats_arenas_i_mapped)}, + {NAME("npurge"), CTL(stats_arenas_i_npurge)}, + {NAME("nmadvise"), CTL(stats_arenas_i_nmadvise)}, + {NAME("purged"), CTL(stats_arenas_i_purged)}, + {NAME("metadata"), CHILD(named, stats_arenas_i_metadata)}, + {NAME("small"), CHILD(named, stats_arenas_i_small)}, + {NAME("large"), CHILD(named, stats_arenas_i_large)}, + {NAME("huge"), CHILD(named, stats_arenas_i_huge)}, + {NAME("bins"), CHILD(indexed, stats_arenas_i_bins)}, + {NAME("lruns"), CHILD(indexed, stats_arenas_i_lruns)}, + {NAME("hchunks"), CHILD(indexed, stats_arenas_i_hchunks)} }; static const ctl_named_node_t super_stats_arenas_i_node[] = { - {NAME(""), CHILD(named, stats_arenas_i)} + {NAME(""), CHILD(named, stats_arenas_i)} }; static const ctl_indexed_node_t stats_arenas_node[] = { @@ -395,13 +462,13 @@ static const ctl_indexed_node_t stats_arenas_node[] = { }; static const ctl_named_node_t stats_node[] = { - {NAME("cactive"), CTL(stats_cactive)}, - {NAME("allocated"), CTL(stats_allocated)}, - {NAME("active"), CTL(stats_active)}, - {NAME("mapped"), CTL(stats_mapped)}, - {NAME("chunks"), CHILD(named, stats_chunks)}, - {NAME("huge"), CHILD(named, stats_huge)}, - {NAME("arenas"), CHILD(indexed, stats_arenas)} + {NAME("cactive"), CTL(stats_cactive)}, + {NAME("allocated"), CTL(stats_allocated)}, + {NAME("active"), CTL(stats_active)}, + {NAME("metadata"), CTL(stats_metadata)}, + {NAME("resident"), CTL(stats_resident)}, + {NAME("mapped"), CTL(stats_mapped)}, + {NAME("arenas"), CHILD(indexed, stats_arenas)} }; static const ctl_named_node_t root_node[] = { @@ -410,6 +477,7 @@ static const ctl_named_node_t root_node[] = { {NAME("thread"), CHILD(named, thread)}, {NAME("config"), CHILD(named, config)}, {NAME("opt"), CHILD(named, opt)}, + {NAME("tcache"), CHILD(named, tcache)}, {NAME("arena"), CHILD(indexed, arena)}, {NAME("arenas"), CHILD(named, arenas)}, {NAME("prof"), CHILD(named, prof)}, @@ -431,12 +499,19 @@ ctl_arena_init(ctl_arena_stats_t *astats) { if (astats->lstats == NULL) { - astats->lstats = (malloc_large_stats_t *)base_alloc(nlclasses * + astats->lstats = (malloc_large_stats_t *)a0malloc(nlclasses * sizeof(malloc_large_stats_t)); if (astats->lstats == NULL) return (true); } + if (astats->hstats == NULL) { + astats->hstats = (malloc_huge_stats_t *)a0malloc(nhclasses * + sizeof(malloc_huge_stats_t)); + if (astats->hstats == NULL) + return (true); + } + return (false); } @@ -445,6 +520,7 @@ ctl_arena_clear(ctl_arena_stats_t *astats) { astats->dss = dss_prec_names[dss_prec_limit]; + astats->lg_dirty_mult = -1; astats->pactive = 0; astats->pdirty = 0; if (config_stats) { @@ -456,6 +532,8 @@ ctl_arena_clear(ctl_arena_stats_t *astats) memset(astats->bstats, 0, NBINS * sizeof(malloc_bin_stats_t)); memset(astats->lstats, 0, nlclasses * sizeof(malloc_large_stats_t)); + memset(astats->hstats, 0, nhclasses * + sizeof(malloc_huge_stats_t)); } } @@ -464,11 +542,13 @@ ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena) { unsigned i; - arena_stats_merge(arena, &cstats->dss, &cstats->pactive, - &cstats->pdirty, &cstats->astats, cstats->bstats, cstats->lstats); + arena_stats_merge(arena, &cstats->dss, &cstats->lg_dirty_mult, + &cstats->pactive, &cstats->pdirty, &cstats->astats, cstats->bstats, + cstats->lstats, cstats->hstats); for (i = 0; i < NBINS; i++) { - cstats->allocated_small += cstats->bstats[i].allocated; + cstats->allocated_small += cstats->bstats[i].curregs * + index2size(i); cstats->nmalloc_small += cstats->bstats[i].nmalloc; cstats->ndalloc_small += cstats->bstats[i].ndalloc; cstats->nrequests_small += cstats->bstats[i].nrequests; @@ -488,6 +568,9 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats) sstats->astats.nmadvise += astats->astats.nmadvise; sstats->astats.purged += astats->astats.purged; + sstats->astats.metadata_mapped += astats->astats.metadata_mapped; + sstats->astats.metadata_allocated += astats->astats.metadata_allocated; + sstats->allocated_small += astats->allocated_small; sstats->nmalloc_small += astats->nmalloc_small; sstats->ndalloc_small += astats->ndalloc_small; @@ -498,18 +581,15 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats) sstats->astats.ndalloc_large += astats->astats.ndalloc_large; sstats->astats.nrequests_large += astats->astats.nrequests_large; - for (i = 0; i < nlclasses; i++) { - sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc; - sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc; - sstats->lstats[i].nrequests += astats->lstats[i].nrequests; - sstats->lstats[i].curruns += astats->lstats[i].curruns; - } + sstats->astats.allocated_huge += astats->astats.allocated_huge; + sstats->astats.nmalloc_huge += astats->astats.nmalloc_huge; + sstats->astats.ndalloc_huge += astats->astats.ndalloc_huge; for (i = 0; i < NBINS; i++) { - sstats->bstats[i].allocated += astats->bstats[i].allocated; sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc; sstats->bstats[i].ndalloc += astats->bstats[i].ndalloc; sstats->bstats[i].nrequests += astats->bstats[i].nrequests; + sstats->bstats[i].curregs += astats->bstats[i].curregs; if (config_tcache) { sstats->bstats[i].nfills += astats->bstats[i].nfills; sstats->bstats[i].nflushes += @@ -519,6 +599,19 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats) sstats->bstats[i].reruns += astats->bstats[i].reruns; sstats->bstats[i].curruns += astats->bstats[i].curruns; } + + for (i = 0; i < nlclasses; i++) { + sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc; + sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc; + sstats->lstats[i].nrequests += astats->lstats[i].nrequests; + sstats->lstats[i].curruns += astats->lstats[i].curruns; + } + + for (i = 0; i < nhclasses; i++) { + sstats->hstats[i].nmalloc += astats->hstats[i].nmalloc; + sstats->hstats[i].ndalloc += astats->hstats[i].ndalloc; + sstats->hstats[i].curhchunks += astats->hstats[i].curhchunks; + } } static void @@ -547,27 +640,23 @@ static bool ctl_grow(void) { ctl_arena_stats_t *astats; - arena_t **tarenas; - /* Allocate extended arena stats and arenas arrays. */ - astats = (ctl_arena_stats_t *)imalloc((ctl_stats.narenas + 2) * + /* Initialize new arena. */ + if (arena_init(ctl_stats.narenas) == NULL) + return (true); + + /* Allocate extended arena stats. */ + astats = (ctl_arena_stats_t *)a0malloc((ctl_stats.narenas + 2) * sizeof(ctl_arena_stats_t)); if (astats == NULL) return (true); - tarenas = (arena_t **)imalloc((ctl_stats.narenas + 1) * - sizeof(arena_t *)); - if (tarenas == NULL) { - idalloc(astats); - return (true); - } /* Initialize the new astats element. */ memcpy(astats, ctl_stats.arenas, (ctl_stats.narenas + 1) * sizeof(ctl_arena_stats_t)); memset(&astats[ctl_stats.narenas + 1], 0, sizeof(ctl_arena_stats_t)); if (ctl_arena_init(&astats[ctl_stats.narenas + 1])) { - idalloc(tarenas); - idalloc(astats); + a0dalloc(astats); return (true); } /* Swap merged stats to their new location. */ @@ -580,32 +669,7 @@ ctl_grow(void) memcpy(&astats[ctl_stats.narenas + 1], &tstats, sizeof(ctl_arena_stats_t)); } - /* Initialize the new arenas element. */ - tarenas[ctl_stats.narenas] = NULL; - { - arena_t **arenas_old = arenas; - /* - * Swap extended arenas array into place. Although ctl_mtx - * protects this function from other threads extending the - * array, it does not protect from other threads mutating it - * (i.e. initializing arenas and setting array elements to - * point to them). Therefore, array copying must happen under - * the protection of arenas_lock. - */ - malloc_mutex_lock(&arenas_lock); - arenas = tarenas; - memcpy(arenas, arenas_old, ctl_stats.narenas * - sizeof(arena_t *)); - narenas_total++; - arenas_extend(narenas_total - 1); - malloc_mutex_unlock(&arenas_lock); - /* - * Deallocate arenas_old only if it came from imalloc() (not - * base_alloc()). - */ - if (ctl_stats.narenas != narenas_auto) - idalloc(arenas_old); - } + a0dalloc(ctl_stats.arenas); ctl_stats.arenas = astats; ctl_stats.narenas++; @@ -615,23 +679,11 @@ ctl_grow(void) static void ctl_refresh(void) { + tsd_t *tsd; unsigned i; + bool refreshed; VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas); - if (config_stats) { - malloc_mutex_lock(&chunks_mtx); - ctl_stats.chunks.current = stats_chunks.curchunks; - ctl_stats.chunks.total = stats_chunks.nchunks; - ctl_stats.chunks.high = stats_chunks.highchunks; - malloc_mutex_unlock(&chunks_mtx); - - malloc_mutex_lock(&huge_mtx); - ctl_stats.huge.allocated = huge_allocated; - ctl_stats.huge.nmalloc = huge_nmalloc; - ctl_stats.huge.ndalloc = huge_ndalloc; - malloc_mutex_unlock(&huge_mtx); - } - /* * Clear sum stats, since they will be merged into by * ctl_arena_refresh(). @@ -639,15 +691,22 @@ ctl_refresh(void) ctl_stats.arenas[ctl_stats.narenas].nthreads = 0; ctl_arena_clear(&ctl_stats.arenas[ctl_stats.narenas]); - malloc_mutex_lock(&arenas_lock); - memcpy(tarenas, arenas, sizeof(arena_t *) * ctl_stats.narenas); + tsd = tsd_fetch(); + for (i = 0, refreshed = false; i < ctl_stats.narenas; i++) { + tarenas[i] = arena_get(tsd, i, false, false); + if (tarenas[i] == NULL && !refreshed) { + tarenas[i] = arena_get(tsd, i, false, true); + refreshed = true; + } + } + for (i = 0; i < ctl_stats.narenas; i++) { - if (arenas[i] != NULL) - ctl_stats.arenas[i].nthreads = arenas[i]->nthreads; + if (tarenas[i] != NULL) + ctl_stats.arenas[i].nthreads = arena_nbound(i); else ctl_stats.arenas[i].nthreads = 0; } - malloc_mutex_unlock(&arenas_lock); + for (i = 0; i < ctl_stats.narenas; i++) { bool initialized = (tarenas[i] != NULL); @@ -657,14 +716,24 @@ ctl_refresh(void) } if (config_stats) { + size_t base_allocated, base_resident, base_mapped; + base_stats_get(&base_allocated, &base_resident, &base_mapped); ctl_stats.allocated = - ctl_stats.arenas[ctl_stats.narenas].allocated_small - + ctl_stats.arenas[ctl_stats.narenas].astats.allocated_large - + ctl_stats.huge.allocated; + ctl_stats.arenas[ctl_stats.narenas].allocated_small + + ctl_stats.arenas[ctl_stats.narenas].astats.allocated_large + + ctl_stats.arenas[ctl_stats.narenas].astats.allocated_huge; ctl_stats.active = - (ctl_stats.arenas[ctl_stats.narenas].pactive << LG_PAGE) - + ctl_stats.huge.allocated; - ctl_stats.mapped = (ctl_stats.chunks.current << opt_lg_chunk); + (ctl_stats.arenas[ctl_stats.narenas].pactive << LG_PAGE); + ctl_stats.metadata = base_allocated + + ctl_stats.arenas[ctl_stats.narenas].astats.metadata_mapped + + ctl_stats.arenas[ctl_stats.narenas].astats + .metadata_allocated; + ctl_stats.resident = base_resident + + ctl_stats.arenas[ctl_stats.narenas].astats.metadata_mapped + + ((ctl_stats.arenas[ctl_stats.narenas].pactive + + ctl_stats.arenas[ctl_stats.narenas].pdirty) << LG_PAGE); + ctl_stats.mapped = base_mapped + + ctl_stats.arenas[ctl_stats.narenas].astats.mapped; } ctl_epoch++; @@ -676,14 +745,13 @@ ctl_init(void) bool ret; malloc_mutex_lock(&ctl_mtx); - if (ctl_initialized == false) { + if (!ctl_initialized) { /* * Allocate space for one extra arena stats element, which * contains summed stats across all arenas. */ - assert(narenas_auto == narenas_total_get()); - ctl_stats.narenas = narenas_auto; - ctl_stats.arenas = (ctl_arena_stats_t *)base_alloc( + ctl_stats.narenas = narenas_total_get(); + ctl_stats.arenas = (ctl_arena_stats_t *)a0malloc( (ctl_stats.narenas + 1) * sizeof(ctl_arena_stats_t)); if (ctl_stats.arenas == NULL) { ret = true; @@ -701,6 +769,15 @@ ctl_init(void) unsigned i; for (i = 0; i <= ctl_stats.narenas; i++) { if (ctl_arena_init(&ctl_stats.arenas[i])) { + unsigned j; + for (j = 0; j < i; j++) { + a0dalloc( + ctl_stats.arenas[j].lstats); + a0dalloc( + ctl_stats.arenas[j].hstats); + } + a0dalloc(ctl_stats.arenas); + ctl_stats.arenas = NULL; ret = true; goto label_return; } @@ -826,7 +903,7 @@ ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t mib[CTL_MAX_DEPTH]; const ctl_named_node_t *node; - if (ctl_initialized == false && ctl_init()) { + if (!ctl_initialized && ctl_init()) { ret = EAGAIN; goto label_return; } @@ -853,7 +930,7 @@ ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp) { int ret; - if (ctl_initialized == false && ctl_init()) { + if (!ctl_initialized && ctl_init()) { ret = EAGAIN; goto label_return; } @@ -871,7 +948,7 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, const ctl_named_node_t *node; size_t i; - if (ctl_initialized == false && ctl_init()) { + if (!ctl_initialized && ctl_init()) { ret = EAGAIN; goto label_return; } @@ -963,6 +1040,14 @@ ctl_postfork_child(void) } \ } while (0) +#define READ_XOR_WRITE() do { \ + if ((oldp != NULL && oldlenp != NULL) && (newp != NULL || \ + newlen != 0)) { \ + ret = EPERM; \ + goto label_return; \ + } \ +} while (0) + #define READ(v, t) do { \ if (oldp != NULL && oldlenp != NULL) { \ if (*oldlenp != sizeof(t)) { \ @@ -971,8 +1056,8 @@ ctl_postfork_child(void) memcpy(oldp, (void *)&(v), copylen); \ ret = EINVAL; \ goto label_return; \ - } else \ - *(t *)oldp = (v); \ + } \ + *(t *)oldp = (v); \ } \ } while (0) @@ -998,7 +1083,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ int ret; \ t oldval; \ \ - if ((c) == false) \ + if (!(c)) \ return (ENOENT); \ if (l) \ malloc_mutex_lock(&ctl_mtx); \ @@ -1021,7 +1106,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ int ret; \ t oldval; \ \ - if ((c) == false) \ + if (!(c)) \ return (ENOENT); \ malloc_mutex_lock(&ctl_mtx); \ READONLY(); \ @@ -1065,7 +1150,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ int ret; \ t oldval; \ \ - if ((c) == false) \ + if (!(c)) \ return (ENOENT); \ READONLY(); \ oldval = (v); \ @@ -1093,6 +1178,27 @@ label_return: \ return (ret); \ } +#define CTL_TSD_RO_NL_CGEN(c, n, m, t) \ +static int \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ +{ \ + int ret; \ + t oldval; \ + tsd_t *tsd; \ + \ + if (!(c)) \ + return (ENOENT); \ + READONLY(); \ + tsd = tsd_fetch(); \ + oldval = (m(tsd)); \ + READ(oldval, t); \ + \ + ret = 0; \ +label_return: \ + return (ret); \ +} + #define CTL_RO_BOOL_CONFIG_GEN(n) \ static int \ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ @@ -1135,11 +1241,10 @@ label_return: /******************************************************************************/ +CTL_RO_BOOL_CONFIG_GEN(config_cache_oblivious) CTL_RO_BOOL_CONFIG_GEN(config_debug) -CTL_RO_BOOL_CONFIG_GEN(config_dss) CTL_RO_BOOL_CONFIG_GEN(config_fill) CTL_RO_BOOL_CONFIG_GEN(config_lazy_lock) -CTL_RO_BOOL_CONFIG_GEN(config_mremap) CTL_RO_BOOL_CONFIG_GEN(config_munmap) CTL_RO_BOOL_CONFIG_GEN(config_prof) CTL_RO_BOOL_CONFIG_GEN(config_prof_libgcc) @@ -1159,18 +1264,19 @@ CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t) CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t) CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) -CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, bool) +CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, const char *) CTL_RO_NL_CGEN(config_fill, opt_quarantine, opt_quarantine, size_t) CTL_RO_NL_CGEN(config_fill, opt_redzone, opt_redzone, bool) CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool) CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool) -CTL_RO_NL_CGEN(config_valgrind, opt_valgrind, opt_valgrind, bool) CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool) CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool) CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_max, opt_lg_tcache_max, ssize_t) CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *) -CTL_RO_CGEN(config_prof, opt_prof_active, opt_prof_active, bool) /* Mutable. */ +CTL_RO_NL_CGEN(config_prof, opt_prof_active, opt_prof_active, bool) +CTL_RO_NL_CGEN(config_prof, opt_prof_thread_active_init, + opt_prof_thread_active_init, bool) CTL_RO_NL_CGEN(config_prof, opt_lg_prof_sample, opt_lg_prof_sample, size_t) CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool) CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t) @@ -1185,14 +1291,21 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; + tsd_t *tsd; + arena_t *oldarena; unsigned newind, oldind; + tsd = tsd_fetch(); + oldarena = arena_choose(tsd, NULL); + if (oldarena == NULL) + return (EAGAIN); + malloc_mutex_lock(&ctl_mtx); - newind = oldind = choose_arena(NULL)->ind; + newind = oldind = oldarena->ind; WRITE(newind, unsigned); READ(oldind, unsigned); if (newind != oldind) { - arena_t *arena; + arena_t *newarena; if (newind >= ctl_stats.narenas) { /* New arena index is out of range. */ @@ -1201,28 +1314,20 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, } /* Initialize arena if necessary. */ - malloc_mutex_lock(&arenas_lock); - if ((arena = arenas[newind]) == NULL && (arena = - arenas_extend(newind)) == NULL) { - malloc_mutex_unlock(&arenas_lock); + newarena = arena_get(tsd, newind, true, true); + if (newarena == NULL) { ret = EAGAIN; goto label_return; } - assert(arena == arenas[newind]); - arenas[oldind]->nthreads--; - arenas[newind]->nthreads++; - malloc_mutex_unlock(&arenas_lock); - - /* Set new arena association. */ + /* Set new arena/tcache associations. */ + arena_migrate(tsd, oldind, newind); if (config_tcache) { - tcache_t *tcache; - if ((uintptr_t)(tcache = *tcache_tsd_get()) > - (uintptr_t)TCACHE_STATE_MAX) { - tcache_arena_dissociate(tcache); - tcache_arena_associate(tcache, arena); + tcache_t *tcache = tsd_tcache_get(tsd); + if (tcache != NULL) { + tcache_arena_reassociate(tcache, oldarena, + newarena); } } - arenas_tsd_set(&arena); } ret = 0; @@ -1231,14 +1336,14 @@ label_return: return (ret); } -CTL_RO_NL_CGEN(config_stats, thread_allocated, - thread_allocated_tsd_get()->allocated, uint64_t) -CTL_RO_NL_CGEN(config_stats, thread_allocatedp, - &thread_allocated_tsd_get()->allocated, uint64_t *) -CTL_RO_NL_CGEN(config_stats, thread_deallocated, - thread_allocated_tsd_get()->deallocated, uint64_t) -CTL_RO_NL_CGEN(config_stats, thread_deallocatedp, - &thread_allocated_tsd_get()->deallocated, uint64_t *) +CTL_TSD_RO_NL_CGEN(config_stats, thread_allocated, tsd_thread_allocated_get, + uint64_t) +CTL_TSD_RO_NL_CGEN(config_stats, thread_allocatedp, tsd_thread_allocatedp_get, + uint64_t *) +CTL_TSD_RO_NL_CGEN(config_stats, thread_deallocated, tsd_thread_deallocated_get, + uint64_t) +CTL_TSD_RO_NL_CGEN(config_stats, thread_deallocatedp, + tsd_thread_deallocatedp_get, uint64_t *) static int thread_tcache_enabled_ctl(const size_t *mib, size_t miblen, void *oldp, @@ -1247,7 +1352,7 @@ thread_tcache_enabled_ctl(const size_t *mib, size_t miblen, void *oldp, int ret; bool oldval; - if (config_tcache == false) + if (!config_tcache) return (ENOENT); oldval = tcache_enabled_get(); @@ -1271,7 +1376,7 @@ thread_tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, { int ret; - if (config_tcache == false) + if (!config_tcache) return (ENOENT); READONLY(); @@ -1284,17 +1389,170 @@ label_return: return (ret); } +static int +thread_prof_name_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + + if (!config_prof) + return (ENOENT); + + READ_XOR_WRITE(); + + if (newp != NULL) { + tsd_t *tsd; + + if (newlen != sizeof(const char *)) { + ret = EINVAL; + goto label_return; + } + + tsd = tsd_fetch(); + + if ((ret = prof_thread_name_set(tsd, *(const char **)newp)) != + 0) + goto label_return; + } else { + const char *oldname = prof_thread_name_get(); + READ(oldname, const char *); + } + + ret = 0; +label_return: + return (ret); +} + +static int +thread_prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + bool oldval; + + if (!config_prof) + return (ENOENT); + + oldval = prof_thread_active_get(); + if (newp != NULL) { + if (newlen != sizeof(bool)) { + ret = EINVAL; + goto label_return; + } + if (prof_thread_active_set(*(bool *)newp)) { + ret = EAGAIN; + goto label_return; + } + } + READ(oldval, bool); + + ret = 0; +label_return: + return (ret); +} + +/******************************************************************************/ + +static int +tcache_create_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + tsd_t *tsd; + unsigned tcache_ind; + + if (!config_tcache) + return (ENOENT); + + tsd = tsd_fetch(); + + malloc_mutex_lock(&ctl_mtx); + READONLY(); + if (tcaches_create(tsd, &tcache_ind)) { + ret = EFAULT; + goto label_return; + } + READ(tcache_ind, unsigned); + + ret = 0; +label_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + +static int +tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + tsd_t *tsd; + unsigned tcache_ind; + + if (!config_tcache) + return (ENOENT); + + tsd = tsd_fetch(); + + WRITEONLY(); + tcache_ind = UINT_MAX; + WRITE(tcache_ind, unsigned); + if (tcache_ind == UINT_MAX) { + ret = EFAULT; + goto label_return; + } + tcaches_flush(tsd, tcache_ind); + + ret = 0; +label_return: + return (ret); +} + +static int +tcache_destroy_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + tsd_t *tsd; + unsigned tcache_ind; + + if (!config_tcache) + return (ENOENT); + + tsd = tsd_fetch(); + + WRITEONLY(); + tcache_ind = UINT_MAX; + WRITE(tcache_ind, unsigned); + if (tcache_ind == UINT_MAX) { + ret = EFAULT; + goto label_return; + } + tcaches_destroy(tsd, tcache_ind); + + ret = 0; +label_return: + return (ret); +} + /******************************************************************************/ /* ctl_mutex must be held during execution of this function. */ static void arena_purge(unsigned arena_ind) { + tsd_t *tsd; + unsigned i; + bool refreshed; VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas); - malloc_mutex_lock(&arenas_lock); - memcpy(tarenas, arenas, sizeof(arena_t *) * ctl_stats.narenas); - malloc_mutex_unlock(&arenas_lock); + tsd = tsd_fetch(); + for (i = 0, refreshed = false; i < ctl_stats.narenas; i++) { + tarenas[i] = arena_get(tsd, i, false, false); + if (tarenas[i] == NULL && !refreshed) { + tarenas[i] = arena_get(tsd, i, false, true); + refreshed = true; + } + } if (arena_ind == ctl_stats.narenas) { unsigned i; @@ -1330,47 +1588,117 @@ static int arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { - int ret, i; - bool match, err; - const char *dss; + int ret; + const char *dss = NULL; unsigned arena_ind = mib[1]; dss_prec_t dss_prec_old = dss_prec_limit; dss_prec_t dss_prec = dss_prec_limit; malloc_mutex_lock(&ctl_mtx); WRITE(dss, const char *); - match = false; - for (i = 0; i < dss_prec_limit; i++) { - if (strcmp(dss_prec_names[i], dss) == 0) { - dss_prec = i; - match = true; - break; + if (dss != NULL) { + int i; + bool match = false; + + for (i = 0; i < dss_prec_limit; i++) { + if (strcmp(dss_prec_names[i], dss) == 0) { + dss_prec = i; + match = true; + break; + } + } + + if (!match) { + ret = EINVAL; + goto label_return; } - } - if (match == false) { - ret = EINVAL; - goto label_return; } if (arena_ind < ctl_stats.narenas) { - arena_t *arena = arenas[arena_ind]; - if (arena != NULL) { - dss_prec_old = arena_dss_prec_get(arena); - arena_dss_prec_set(arena, dss_prec); - err = false; - } else - err = true; + arena_t *arena = arena_get(tsd_fetch(), arena_ind, false, true); + if (arena == NULL || (dss_prec != dss_prec_limit && + arena_dss_prec_set(arena, dss_prec))) { + ret = EFAULT; + goto label_return; + } + dss_prec_old = arena_dss_prec_get(arena); } else { + if (dss_prec != dss_prec_limit && + chunk_dss_prec_set(dss_prec)) { + ret = EFAULT; + goto label_return; + } dss_prec_old = chunk_dss_prec_get(); - err = chunk_dss_prec_set(dss_prec); } + dss = dss_prec_names[dss_prec_old]; READ(dss, const char *); - if (err) { + + ret = 0; +label_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + +static int +arena_i_lg_dirty_mult_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + unsigned arena_ind = mib[1]; + arena_t *arena; + + arena = arena_get(tsd_fetch(), arena_ind, false, true); + if (arena == NULL) { ret = EFAULT; goto label_return; } + if (oldp != NULL && oldlenp != NULL) { + size_t oldval = arena_lg_dirty_mult_get(arena); + READ(oldval, ssize_t); + } + if (newp != NULL) { + if (newlen != sizeof(ssize_t)) { + ret = EINVAL; + goto label_return; + } + if (arena_lg_dirty_mult_set(arena, *(ssize_t *)newp)) { + ret = EFAULT; + goto label_return; + } + } + + ret = 0; +label_return: + return (ret); +} + +static int +arena_i_chunk_hooks_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + unsigned arena_ind = mib[1]; + arena_t *arena; + + malloc_mutex_lock(&ctl_mtx); + if (arena_ind < narenas_total_get() && (arena = + arena_get(tsd_fetch(), arena_ind, false, true)) != NULL) { + if (newp != NULL) { + chunk_hooks_t old_chunk_hooks, new_chunk_hooks; + WRITE(new_chunk_hooks, chunk_hooks_t); + old_chunk_hooks = chunk_hooks_set(arena, + &new_chunk_hooks); + READ(old_chunk_hooks, chunk_hooks_t); + } else { + chunk_hooks_t old_chunk_hooks = chunk_hooks_get(arena); + READ(old_chunk_hooks, chunk_hooks_t); + } + } else { + ret = EFAULT; + goto label_return; + } ret = 0; label_return: malloc_mutex_unlock(&ctl_mtx); @@ -1444,6 +1772,32 @@ label_return: return (ret); } +static int +arenas_lg_dirty_mult_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + + if (oldp != NULL && oldlenp != NULL) { + size_t oldval = arena_lg_dirty_mult_default_get(); + READ(oldval, ssize_t); + } + if (newp != NULL) { + if (newlen != sizeof(ssize_t)) { + ret = EINVAL; + goto label_return; + } + if (arena_lg_dirty_mult_default_set(*(ssize_t *)newp)) { + ret = EFAULT; + goto label_return; + } + } + + ret = 0; +label_return: + return (ret); +} + CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t) CTL_RO_NL_GEN(arenas_page, PAGE, size_t) CTL_RO_NL_CGEN(config_tcache, arenas_tcache_max, tcache_maxclass, size_t) @@ -1461,8 +1815,8 @@ arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) return (super_arenas_bin_i_node); } -CTL_RO_NL_GEN(arenas_nlruns, nlclasses, size_t) -CTL_RO_NL_GEN(arenas_lrun_i_size, ((mib[2]+1) << LG_PAGE), size_t) +CTL_RO_NL_GEN(arenas_nlruns, nlclasses, unsigned) +CTL_RO_NL_GEN(arenas_lrun_i_size, index2size(NBINS+mib[2]), size_t) static const ctl_named_node_t * arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) { @@ -1472,29 +1826,15 @@ arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) return (super_arenas_lrun_i_node); } -static int -arenas_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) +CTL_RO_NL_GEN(arenas_nhchunks, nhclasses, unsigned) +CTL_RO_NL_GEN(arenas_hchunk_i_size, index2size(NBINS+nlclasses+mib[2]), size_t) +static const ctl_named_node_t * +arenas_hchunk_i_index(const size_t *mib, size_t miblen, size_t i) { - int ret; - unsigned arena_ind; - - malloc_mutex_lock(&ctl_mtx); - WRITEONLY(); - arena_ind = UINT_MAX; - WRITE(arena_ind, unsigned); - if (newp != NULL && arena_ind >= ctl_stats.narenas) - ret = EFAULT; - else { - if (arena_ind == UINT_MAX) - arena_ind = ctl_stats.narenas; - arena_purge(arena_ind); - ret = 0; - } -label_return: - malloc_mutex_unlock(&ctl_mtx); - return (ret); + if (i > nhclasses) + return (NULL); + return (super_arenas_hchunk_i_node); } static int @@ -1522,31 +1862,52 @@ label_return: /******************************************************************************/ static int +prof_thread_active_init_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + bool oldval; + + if (!config_prof) + return (ENOENT); + + if (newp != NULL) { + if (newlen != sizeof(bool)) { + ret = EINVAL; + goto label_return; + } + oldval = prof_thread_active_init_set(*(bool *)newp); + } else + oldval = prof_thread_active_init_get(); + READ(oldval, bool); + + ret = 0; +label_return: + return (ret); +} + +static int prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; bool oldval; - if (config_prof == false) + if (!config_prof) return (ENOENT); - malloc_mutex_lock(&ctl_mtx); /* Protect opt_prof_active. */ - oldval = opt_prof_active; if (newp != NULL) { - /* - * The memory barriers will tend to make opt_prof_active - * propagate faster on systems with weak memory ordering. - */ - mb_write(); - WRITE(opt_prof_active, bool); - mb_write(); - } + if (newlen != sizeof(bool)) { + ret = EINVAL; + goto label_return; + } + oldval = prof_active_set(*(bool *)newp); + } else + oldval = prof_active_get(); READ(oldval, bool); ret = 0; label_return: - malloc_mutex_unlock(&ctl_mtx); return (ret); } @@ -1557,7 +1918,7 @@ prof_dump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, int ret; const char *filename = NULL; - if (config_prof == false) + if (!config_prof) return (ENOENT); WRITEONLY(); @@ -1573,24 +1934,71 @@ label_return: return (ret); } +static int +prof_gdump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + bool oldval; + + if (!config_prof) + return (ENOENT); + + if (newp != NULL) { + if (newlen != sizeof(bool)) { + ret = EINVAL; + goto label_return; + } + oldval = prof_gdump_set(*(bool *)newp); + } else + oldval = prof_gdump_get(); + READ(oldval, bool); + + ret = 0; +label_return: + return (ret); +} + +static int +prof_reset_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + size_t lg_sample = lg_prof_sample; + tsd_t *tsd; + + if (!config_prof) + return (ENOENT); + + WRITEONLY(); + WRITE(lg_sample, size_t); + if (lg_sample >= (sizeof(uint64_t) << 3)) + lg_sample = (sizeof(uint64_t) << 3) - 1; + + tsd = tsd_fetch(); + + prof_reset(tsd, lg_sample); + + ret = 0; +label_return: + return (ret); +} + CTL_RO_NL_CGEN(config_prof, prof_interval, prof_interval, uint64_t) +CTL_RO_NL_CGEN(config_prof, lg_prof_sample, lg_prof_sample, size_t) /******************************************************************************/ CTL_RO_CGEN(config_stats, stats_cactive, &stats_cactive, size_t *) CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats.allocated, size_t) CTL_RO_CGEN(config_stats, stats_active, ctl_stats.active, size_t) +CTL_RO_CGEN(config_stats, stats_metadata, ctl_stats.metadata, size_t) +CTL_RO_CGEN(config_stats, stats_resident, ctl_stats.resident, size_t) CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t) -CTL_RO_CGEN(config_stats, stats_chunks_current, ctl_stats.chunks.current, - size_t) -CTL_RO_CGEN(config_stats, stats_chunks_total, ctl_stats.chunks.total, uint64_t) -CTL_RO_CGEN(config_stats, stats_chunks_high, ctl_stats.chunks.high, size_t) -CTL_RO_CGEN(config_stats, stats_huge_allocated, huge_allocated, size_t) -CTL_RO_CGEN(config_stats, stats_huge_nmalloc, huge_nmalloc, uint64_t) -CTL_RO_CGEN(config_stats, stats_huge_ndalloc, huge_ndalloc, uint64_t) - CTL_RO_GEN(stats_arenas_i_dss, ctl_stats.arenas[mib[2]].dss, const char *) +CTL_RO_GEN(stats_arenas_i_lg_dirty_mult, ctl_stats.arenas[mib[2]].lg_dirty_mult, + ssize_t) CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned) CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t) CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t) @@ -1602,6 +2010,10 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise, ctl_stats.arenas[mib[2]].astats.nmadvise, uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_purged, ctl_stats.arenas[mib[2]].astats.purged, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_mapped, + ctl_stats.arenas[mib[2]].astats.metadata_mapped, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_allocated, + ctl_stats.arenas[mib[2]].astats.metadata_allocated, size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated, ctl_stats.arenas[mib[2]].allocated_small, size_t) @@ -1619,15 +2031,23 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc, ctl_stats.arenas[mib[2]].astats.ndalloc_large, uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests, ctl_stats.arenas[mib[2]].astats.nrequests_large, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_huge_allocated, + ctl_stats.arenas[mib[2]].astats.allocated_huge, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_huge_nmalloc, + ctl_stats.arenas[mib[2]].astats.nmalloc_huge, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_huge_ndalloc, + ctl_stats.arenas[mib[2]].astats.ndalloc_huge, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_huge_nrequests, + ctl_stats.arenas[mib[2]].astats.nmalloc_huge, uint64_t) /* Intentional. */ -CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_allocated, - ctl_stats.arenas[mib[2]].bstats[mib[4]].allocated, size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nmalloc, ctl_stats.arenas[mib[2]].bstats[mib[4]].nmalloc, uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_ndalloc, ctl_stats.arenas[mib[2]].bstats[mib[4]].ndalloc, uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nrequests, ctl_stats.arenas[mib[2]].bstats[mib[4]].nrequests, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curregs, + ctl_stats.arenas[mib[2]].bstats[mib[4]].curregs, size_t) CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_bins_j_nfills, ctl_stats.arenas[mib[2]].bstats[mib[4]].nfills, uint64_t) CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_bins_j_nflushes, @@ -1666,13 +2086,32 @@ stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j) return (super_stats_arenas_i_lruns_j_node); } +CTL_RO_CGEN(config_stats, stats_arenas_i_hchunks_j_nmalloc, + ctl_stats.arenas[mib[2]].hstats[mib[4]].nmalloc, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_hchunks_j_ndalloc, + ctl_stats.arenas[mib[2]].hstats[mib[4]].ndalloc, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_hchunks_j_nrequests, + ctl_stats.arenas[mib[2]].hstats[mib[4]].nmalloc, /* Intentional. */ + uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_hchunks_j_curhchunks, + ctl_stats.arenas[mib[2]].hstats[mib[4]].curhchunks, size_t) + +static const ctl_named_node_t * +stats_arenas_i_hchunks_j_index(const size_t *mib, size_t miblen, size_t j) +{ + + if (j > nhclasses) + return (NULL); + return (super_stats_arenas_i_hchunks_j_node); +} + static const ctl_named_node_t * stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i) { const ctl_named_node_t * ret; malloc_mutex_lock(&ctl_mtx); - if (i > ctl_stats.narenas || ctl_stats.arenas[i].initialized == false) { + if (i > ctl_stats.narenas || !ctl_stats.arenas[i].initialized) { ret = NULL; goto label_return; } diff --git a/dep/jemalloc/src/extent.c b/dep/jemalloc/src/extent.c index 8c09b486ed8..13f94411c15 100644 --- a/dep/jemalloc/src/extent.c +++ b/dep/jemalloc/src/extent.c @@ -3,17 +3,32 @@ /******************************************************************************/ -static inline int +JEMALLOC_INLINE_C size_t +extent_quantize(size_t size) +{ + + /* + * Round down to the nearest chunk size that can actually be requested + * during normal huge allocation. + */ + return (index2size(size2index(size + 1) - 1)); +} + +JEMALLOC_INLINE_C int extent_szad_comp(extent_node_t *a, extent_node_t *b) { int ret; - size_t a_size = a->size; - size_t b_size = b->size; - - ret = (a_size > b_size) - (a_size < b_size); + size_t a_qsize = extent_quantize(extent_node_size_get(a)); + size_t b_qsize = extent_quantize(extent_node_size_get(b)); + + /* + * Compare based on quantized size rather than size, in order to sort + * equally useful extents only by address. + */ + ret = (a_qsize > b_qsize) - (a_qsize < b_qsize); if (ret == 0) { - uintptr_t a_addr = (uintptr_t)a->addr; - uintptr_t b_addr = (uintptr_t)b->addr; + uintptr_t a_addr = (uintptr_t)extent_node_addr_get(a); + uintptr_t b_addr = (uintptr_t)extent_node_addr_get(b); ret = (a_addr > b_addr) - (a_addr < b_addr); } @@ -22,18 +37,17 @@ extent_szad_comp(extent_node_t *a, extent_node_t *b) } /* Generate red-black tree functions. */ -rb_gen(, extent_tree_szad_, extent_tree_t, extent_node_t, link_szad, +rb_gen(, extent_tree_szad_, extent_tree_t, extent_node_t, szad_link, extent_szad_comp) -static inline int +JEMALLOC_INLINE_C int extent_ad_comp(extent_node_t *a, extent_node_t *b) { - uintptr_t a_addr = (uintptr_t)a->addr; - uintptr_t b_addr = (uintptr_t)b->addr; + uintptr_t a_addr = (uintptr_t)extent_node_addr_get(a); + uintptr_t b_addr = (uintptr_t)extent_node_addr_get(b); return ((a_addr > b_addr) - (a_addr < b_addr)); } /* Generate red-black tree functions. */ -rb_gen(, extent_tree_ad_, extent_tree_t, extent_node_t, link_ad, - extent_ad_comp) +rb_gen(, extent_tree_ad_, extent_tree_t, extent_node_t, ad_link, extent_ad_comp) diff --git a/dep/jemalloc/src/huge.c b/dep/jemalloc/src/huge.c index d72f2135702..1e9a66512f1 100644 --- a/dep/jemalloc/src/huge.c +++ b/dep/jemalloc/src/huge.c @@ -2,44 +2,68 @@ #include "jemalloc/internal/jemalloc_internal.h" /******************************************************************************/ -/* Data. */ -uint64_t huge_nmalloc; -uint64_t huge_ndalloc; -size_t huge_allocated; +static extent_node_t * +huge_node_get(const void *ptr) +{ + extent_node_t *node; -malloc_mutex_t huge_mtx; + node = chunk_lookup(ptr, true); + assert(!extent_node_achunk_get(node)); -/******************************************************************************/ + return (node); +} + +static bool +huge_node_set(const void *ptr, extent_node_t *node) +{ + + assert(extent_node_addr_get(node) == ptr); + assert(!extent_node_achunk_get(node)); + return (chunk_register(ptr, node)); +} -/* Tree of chunks that are stand-alone huge allocations. */ -static extent_tree_t huge; +static void +huge_node_unset(const void *ptr, const extent_node_t *node) +{ + + chunk_deregister(ptr, node); +} void * -huge_malloc(size_t size, bool zero, dss_prec_t dss_prec) +huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, + tcache_t *tcache) { + size_t usize; - return (huge_palloc(size, chunksize, zero, dss_prec)); + usize = s2u(size); + if (usize == 0) { + /* size_t overflow. */ + return (NULL); + } + + return (huge_palloc(tsd, arena, usize, chunksize, zero, tcache)); } void * -huge_palloc(size_t size, size_t alignment, bool zero, dss_prec_t dss_prec) +huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, + bool zero, tcache_t *tcache) { void *ret; - size_t csize; + size_t usize; extent_node_t *node; bool is_zeroed; /* Allocate one or more contiguous chunks for this request. */ - csize = CHUNK_CEILING(size); - if (csize == 0) { - /* size is large enough to cause size_t wrap-around. */ + usize = sa2u(size, alignment); + if (unlikely(usize == 0)) return (NULL); - } + assert(usize >= chunksize); /* Allocate an extent node with which to track the chunk. */ - node = base_node_alloc(); + node = ipallocztm(tsd, CACHELINE_CEILING(sizeof(extent_node_t)), + CACHELINE, false, tcache, true, arena); if (node == NULL) return (NULL); @@ -48,145 +72,33 @@ huge_palloc(size_t size, size_t alignment, bool zero, dss_prec_t dss_prec) * it is possible to make correct junk/zero fill decisions below. */ is_zeroed = zero; - ret = chunk_alloc(csize, alignment, false, &is_zeroed, dss_prec); - if (ret == NULL) { - base_node_dealloc(node); + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(arena, + size, alignment, &is_zeroed)) == NULL) { + idalloctm(tsd, node, tcache, true); return (NULL); } - /* Insert node into huge. */ - node->addr = ret; - node->size = csize; - - malloc_mutex_lock(&huge_mtx); - extent_tree_ad_insert(&huge, node); - if (config_stats) { - stats_cactive_add(csize); - huge_nmalloc++; - huge_allocated += csize; - } - malloc_mutex_unlock(&huge_mtx); - - if (config_fill && zero == false) { - if (opt_junk) - memset(ret, 0xa5, csize); - else if (opt_zero && is_zeroed == false) - memset(ret, 0, csize); - } - - return (ret); -} - -bool -huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra) -{ - - /* - * Avoid moving the allocation if the size class can be left the same. - */ - if (oldsize > arena_maxclass - && CHUNK_CEILING(oldsize) >= CHUNK_CEILING(size) - && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) { - assert(CHUNK_CEILING(oldsize) == oldsize); - return (false); - } - - /* Reallocation would require a move. */ - return (true); -} - -void * -huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero, bool try_tcache_dalloc, dss_prec_t dss_prec) -{ - void *ret; - size_t copysize; - - /* Try to avoid moving the allocation. */ - if (huge_ralloc_no_move(ptr, oldsize, size, extra) == false) - return (ptr); + extent_node_init(node, arena, ret, size, is_zeroed, true); - /* - * size and oldsize are different enough that we need to use a - * different size class. In that case, fall back to allocating new - * space and copying. - */ - if (alignment > chunksize) - ret = huge_palloc(size + extra, alignment, zero, dss_prec); - else - ret = huge_malloc(size + extra, zero, dss_prec); - - if (ret == NULL) { - if (extra == 0) - return (NULL); - /* Try again, this time without extra. */ - if (alignment > chunksize) - ret = huge_palloc(size, alignment, zero, dss_prec); - else - ret = huge_malloc(size, zero, dss_prec); - - if (ret == NULL) - return (NULL); + if (huge_node_set(ret, node)) { + arena_chunk_dalloc_huge(arena, ret, size); + idalloctm(tsd, node, tcache, true); + return (NULL); } - /* - * Copy at most size bytes (not size+extra), since the caller has no - * expectation that the extra bytes will be reliably preserved. - */ - copysize = (size < oldsize) ? size : oldsize; + /* Insert node into huge. */ + malloc_mutex_lock(&arena->huge_mtx); + ql_elm_new(node, ql_link); + ql_tail_insert(&arena->huge, node, ql_link); + malloc_mutex_unlock(&arena->huge_mtx); -#ifdef JEMALLOC_MREMAP - /* - * Use mremap(2) if this is a huge-->huge reallocation, and neither the - * source nor the destination are in dss. - */ - if (oldsize >= chunksize && (config_dss == false || (chunk_in_dss(ptr) - == false && chunk_in_dss(ret) == false))) { - size_t newsize = huge_salloc(ret); + if (zero || (config_fill && unlikely(opt_zero))) { + if (!is_zeroed) + memset(ret, 0, size); + } else if (config_fill && unlikely(opt_junk_alloc)) + memset(ret, 0xa5, size); - /* - * Remove ptr from the tree of huge allocations before - * performing the remap operation, in order to avoid the - * possibility of another thread acquiring that mapping before - * this one removes it from the tree. - */ - huge_dalloc(ptr, false); - if (mremap(ptr, oldsize, newsize, MREMAP_MAYMOVE|MREMAP_FIXED, - ret) == MAP_FAILED) { - /* - * Assuming no chunk management bugs in the allocator, - * the only documented way an error can occur here is - * if the application changed the map type for a - * portion of the old allocation. This is firmly in - * undefined behavior territory, so write a diagnostic - * message, and optionally abort. - */ - char buf[BUFERROR_BUF]; - - buferror(get_errno(), buf, sizeof(buf)); - malloc_printf("<jemalloc>: Error in mremap(): %s\n", - buf); - if (opt_abort) - abort(); - memcpy(ret, ptr, copysize); - chunk_dealloc_mmap(ptr, oldsize); - } else if (config_fill && zero == false && opt_junk && oldsize - < newsize) { - /* - * mremap(2) clobbers the original mapping, so - * junk/zero filling is not preserved. There is no - * need to zero fill here, since any trailing - * uninititialized memory is demand-zeroed by the - * kernel, but junk filling must be redone. - */ - memset(ret + oldsize, 0xa5, newsize - oldsize); - } - } else -#endif - { - memcpy(ret, ptr, copysize); - iqalloct(ptr, try_tcache_dalloc); - } return (ret); } @@ -198,12 +110,12 @@ static void huge_dalloc_junk(void *ptr, size_t usize) { - if (config_fill && config_dss && opt_junk) { + if (config_fill && have_dss && unlikely(opt_junk_free)) { /* * Only bother junk filling if the chunk isn't about to be * unmapped. */ - if (config_munmap == false || (config_dss && chunk_in_dss(ptr))) + if (!config_munmap || (have_dss && chunk_in_dss(ptr))) memset(ptr, 0x5a, usize); } } @@ -213,135 +125,311 @@ huge_dalloc_junk(void *ptr, size_t usize) huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl); #endif -void -huge_dalloc(void *ptr, bool unmap) +static void +huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize_min, + size_t usize_max, bool zero) { - extent_node_t *node, key; + size_t usize, usize_next; + extent_node_t *node; + arena_t *arena; + chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; + bool pre_zeroed, post_zeroed; + + /* Increase usize to incorporate extra. */ + for (usize = usize_min; usize < usize_max && (usize_next = s2u(usize+1)) + <= oldsize; usize = usize_next) + ; /* Do nothing. */ + + if (oldsize == usize) + return; + + node = huge_node_get(ptr); + arena = extent_node_arena_get(node); + pre_zeroed = extent_node_zeroed_get(node); + + /* Fill if necessary (shrinking). */ + if (oldsize > usize) { + size_t sdiff = oldsize - usize; + if (config_fill && unlikely(opt_junk_free)) { + memset((void *)((uintptr_t)ptr + usize), 0x5a, sdiff); + post_zeroed = false; + } else { + post_zeroed = !chunk_purge_wrapper(arena, &chunk_hooks, + ptr, CHUNK_CEILING(oldsize), usize, sdiff); + } + } else + post_zeroed = pre_zeroed; + + malloc_mutex_lock(&arena->huge_mtx); + /* Update the size of the huge allocation. */ + assert(extent_node_size_get(node) != usize); + extent_node_size_set(node, usize); + /* Update zeroed. */ + extent_node_zeroed_set(node, post_zeroed); + malloc_mutex_unlock(&arena->huge_mtx); + + arena_chunk_ralloc_huge_similar(arena, ptr, oldsize, usize); + + /* Fill if necessary (growing). */ + if (oldsize < usize) { + if (zero || (config_fill && unlikely(opt_zero))) { + if (!pre_zeroed) { + memset((void *)((uintptr_t)ptr + oldsize), 0, + usize - oldsize); + } + } else if (config_fill && unlikely(opt_junk_alloc)) { + memset((void *)((uintptr_t)ptr + oldsize), 0xa5, usize - + oldsize); + } + } +} - malloc_mutex_lock(&huge_mtx); +static bool +huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize) +{ + extent_node_t *node; + arena_t *arena; + chunk_hooks_t chunk_hooks; + size_t cdiff; + bool pre_zeroed, post_zeroed; + + node = huge_node_get(ptr); + arena = extent_node_arena_get(node); + pre_zeroed = extent_node_zeroed_get(node); + chunk_hooks = chunk_hooks_get(arena); + + assert(oldsize > usize); + + /* Split excess chunks. */ + cdiff = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize); + if (cdiff != 0 && chunk_hooks.split(ptr, CHUNK_CEILING(oldsize), + CHUNK_CEILING(usize), cdiff, true, arena->ind)) + return (true); - /* Extract from tree of huge allocations. */ - key.addr = ptr; - node = extent_tree_ad_search(&huge, &key); - assert(node != NULL); - assert(node->addr == ptr); - extent_tree_ad_remove(&huge, node); + if (oldsize > usize) { + size_t sdiff = oldsize - usize; + if (config_fill && unlikely(opt_junk_free)) { + huge_dalloc_junk((void *)((uintptr_t)ptr + usize), + sdiff); + post_zeroed = false; + } else { + post_zeroed = !chunk_purge_wrapper(arena, &chunk_hooks, + CHUNK_ADDR2BASE((uintptr_t)ptr + usize), + CHUNK_CEILING(oldsize), + CHUNK_ADDR2OFFSET((uintptr_t)ptr + usize), sdiff); + } + } else + post_zeroed = pre_zeroed; - if (config_stats) { - stats_cactive_sub(node->size); - huge_ndalloc++; - huge_allocated -= node->size; - } + malloc_mutex_lock(&arena->huge_mtx); + /* Update the size of the huge allocation. */ + extent_node_size_set(node, usize); + /* Update zeroed. */ + extent_node_zeroed_set(node, post_zeroed); + malloc_mutex_unlock(&arena->huge_mtx); + + /* Zap the excess chunks. */ + arena_chunk_ralloc_huge_shrink(arena, ptr, oldsize, usize); + + return (false); +} + +static bool +huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t usize, bool zero) { + extent_node_t *node; + arena_t *arena; + bool is_zeroed_subchunk, is_zeroed_chunk; + + node = huge_node_get(ptr); + arena = extent_node_arena_get(node); + malloc_mutex_lock(&arena->huge_mtx); + is_zeroed_subchunk = extent_node_zeroed_get(node); + malloc_mutex_unlock(&arena->huge_mtx); + + /* + * Copy zero into is_zeroed_chunk and pass the copy to chunk_alloc(), so + * that it is possible to make correct junk/zero fill decisions below. + */ + is_zeroed_chunk = zero; - malloc_mutex_unlock(&huge_mtx); + if (arena_chunk_ralloc_huge_expand(arena, ptr, oldsize, usize, + &is_zeroed_chunk)) + return (true); - if (unmap) - huge_dalloc_junk(node->addr, node->size); + malloc_mutex_lock(&arena->huge_mtx); + /* Update the size of the huge allocation. */ + extent_node_size_set(node, usize); + malloc_mutex_unlock(&arena->huge_mtx); - chunk_dealloc(node->addr, node->size, unmap); + if (zero || (config_fill && unlikely(opt_zero))) { + if (!is_zeroed_subchunk) { + memset((void *)((uintptr_t)ptr + oldsize), 0, + CHUNK_CEILING(oldsize) - oldsize); + } + if (!is_zeroed_chunk) { + memset((void *)((uintptr_t)ptr + + CHUNK_CEILING(oldsize)), 0, usize - + CHUNK_CEILING(oldsize)); + } + } else if (config_fill && unlikely(opt_junk_alloc)) { + memset((void *)((uintptr_t)ptr + oldsize), 0xa5, usize - + oldsize); + } - base_node_dealloc(node); + return (false); } -size_t -huge_salloc(const void *ptr) +bool +huge_ralloc_no_move(void *ptr, size_t oldsize, size_t usize_min, + size_t usize_max, bool zero) { - size_t ret; - extent_node_t *node, key; - malloc_mutex_lock(&huge_mtx); + assert(s2u(oldsize) == oldsize); - /* Extract from tree of huge allocations. */ - key.addr = __DECONST(void *, ptr); - node = extent_tree_ad_search(&huge, &key); - assert(node != NULL); + /* Both allocations must be huge to avoid a move. */ + if (oldsize < chunksize || usize_max < chunksize) + return (true); - ret = node->size; + if (CHUNK_CEILING(usize_max) > CHUNK_CEILING(oldsize)) { + /* Attempt to expand the allocation in-place. */ + if (!huge_ralloc_no_move_expand(ptr, oldsize, usize_max, zero)) + return (false); + /* Try again, this time with usize_min. */ + if (usize_min < usize_max && CHUNK_CEILING(usize_min) > + CHUNK_CEILING(oldsize) && huge_ralloc_no_move_expand(ptr, + oldsize, usize_min, zero)) + return (false); + } - malloc_mutex_unlock(&huge_mtx); + /* + * Avoid moving the allocation if the existing chunk size accommodates + * the new size. + */ + if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(usize_min) + && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(usize_max)) { + huge_ralloc_no_move_similar(ptr, oldsize, usize_min, usize_max, + zero); + return (false); + } - return (ret); + /* Attempt to shrink the allocation in-place. */ + if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(usize_max)) + return (huge_ralloc_no_move_shrink(ptr, oldsize, usize_max)); + return (true); } -dss_prec_t -huge_dss_prec_get(arena_t *arena) +static void * +huge_ralloc_move_helper(tsd_t *tsd, arena_t *arena, size_t usize, + size_t alignment, bool zero, tcache_t *tcache) { - return (arena_dss_prec_get(choose_arena(arena))); + if (alignment <= chunksize) + return (huge_malloc(tsd, arena, usize, zero, tcache)); + return (huge_palloc(tsd, arena, usize, alignment, zero, tcache)); } -prof_ctx_t * -huge_prof_ctx_get(const void *ptr) +void * +huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize, + size_t alignment, bool zero, tcache_t *tcache) { - prof_ctx_t *ret; - extent_node_t *node, key; - - malloc_mutex_lock(&huge_mtx); - - /* Extract from tree of huge allocations. */ - key.addr = __DECONST(void *, ptr); - node = extent_tree_ad_search(&huge, &key); - assert(node != NULL); + void *ret; + size_t copysize; - ret = node->prof_ctx; + /* Try to avoid moving the allocation. */ + if (!huge_ralloc_no_move(ptr, oldsize, usize, usize, zero)) + return (ptr); - malloc_mutex_unlock(&huge_mtx); + /* + * usize and oldsize are different enough that we need to use a + * different size class. In that case, fall back to allocating new + * space and copying. + */ + ret = huge_ralloc_move_helper(tsd, arena, usize, alignment, zero, + tcache); + if (ret == NULL) + return (NULL); + copysize = (usize < oldsize) ? usize : oldsize; + memcpy(ret, ptr, copysize); + isqalloc(tsd, ptr, oldsize, tcache); return (ret); } void -huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) +huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) { - extent_node_t *node, key; - - malloc_mutex_lock(&huge_mtx); - - /* Extract from tree of huge allocations. */ - key.addr = __DECONST(void *, ptr); - node = extent_tree_ad_search(&huge, &key); - assert(node != NULL); + extent_node_t *node; + arena_t *arena; + + node = huge_node_get(ptr); + arena = extent_node_arena_get(node); + huge_node_unset(ptr, node); + malloc_mutex_lock(&arena->huge_mtx); + ql_remove(&arena->huge, node, ql_link); + malloc_mutex_unlock(&arena->huge_mtx); + + huge_dalloc_junk(extent_node_addr_get(node), + extent_node_size_get(node)); + arena_chunk_dalloc_huge(extent_node_arena_get(node), + extent_node_addr_get(node), extent_node_size_get(node)); + idalloctm(tsd, node, tcache, true); +} - node->prof_ctx = ctx; +arena_t * +huge_aalloc(const void *ptr) +{ - malloc_mutex_unlock(&huge_mtx); + return (extent_node_arena_get(huge_node_get(ptr))); } -bool -huge_boot(void) +size_t +huge_salloc(const void *ptr) { + size_t size; + extent_node_t *node; + arena_t *arena; - /* Initialize chunks data. */ - if (malloc_mutex_init(&huge_mtx)) - return (true); - extent_tree_ad_new(&huge); - - if (config_stats) { - huge_nmalloc = 0; - huge_ndalloc = 0; - huge_allocated = 0; - } + node = huge_node_get(ptr); + arena = extent_node_arena_get(node); + malloc_mutex_lock(&arena->huge_mtx); + size = extent_node_size_get(node); + malloc_mutex_unlock(&arena->huge_mtx); - return (false); + return (size); } -void -huge_prefork(void) +prof_tctx_t * +huge_prof_tctx_get(const void *ptr) { + prof_tctx_t *tctx; + extent_node_t *node; + arena_t *arena; + + node = huge_node_get(ptr); + arena = extent_node_arena_get(node); + malloc_mutex_lock(&arena->huge_mtx); + tctx = extent_node_prof_tctx_get(node); + malloc_mutex_unlock(&arena->huge_mtx); - malloc_mutex_prefork(&huge_mtx); + return (tctx); } void -huge_postfork_parent(void) +huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx) { + extent_node_t *node; + arena_t *arena; - malloc_mutex_postfork_parent(&huge_mtx); + node = huge_node_get(ptr); + arena = extent_node_arena_get(node); + malloc_mutex_lock(&arena->huge_mtx); + extent_node_prof_tctx_set(node, tctx); + malloc_mutex_unlock(&arena->huge_mtx); } void -huge_postfork_child(void) +huge_prof_tctx_reset(const void *ptr) { - malloc_mutex_postfork_child(&huge_mtx); + huge_prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U); } diff --git a/dep/jemalloc/src/jemalloc.c b/dep/jemalloc/src/jemalloc.c index 204778bc89d..5a2d3240680 100644 --- a/dep/jemalloc/src/jemalloc.c +++ b/dep/jemalloc/src/jemalloc.c @@ -4,12 +4,8 @@ /******************************************************************************/ /* Data. */ -malloc_tsd_data(, arenas, arena_t *, NULL) -malloc_tsd_data(, thread_allocated, thread_allocated_t, - THREAD_ALLOCATED_INITIALIZER) - /* Runtime configuration options. */ -const char *je_malloc_conf; +const char *je_malloc_conf JEMALLOC_ATTR(weak); bool opt_abort = #ifdef JEMALLOC_DEBUG true @@ -17,30 +13,152 @@ bool opt_abort = false #endif ; -bool opt_junk = +const char *opt_junk = +#if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL)) + "true" +#else + "false" +#endif + ; +bool opt_junk_alloc = +#if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL)) + true +#else + false +#endif + ; +bool opt_junk_free = #if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL)) true #else false #endif ; + size_t opt_quarantine = ZU(0); bool opt_redzone = false; bool opt_utrace = false; -bool opt_valgrind = false; bool opt_xmalloc = false; bool opt_zero = false; size_t opt_narenas = 0; -unsigned ncpus; +/* Initialized to true if the process is running inside Valgrind. */ +bool in_valgrind; -malloc_mutex_t arenas_lock; -arena_t **arenas; -unsigned narenas_total; -unsigned narenas_auto; +unsigned ncpus; -/* Set to true once the allocator has been initialized. */ -static bool malloc_initialized = false; +/* Protects arenas initialization (arenas, narenas_total). */ +static malloc_mutex_t arenas_lock; +/* + * Arenas that are used to service external requests. Not all elements of the + * arenas array are necessarily used; arenas are created lazily as needed. + * + * arenas[0..narenas_auto) are used for automatic multiplexing of threads and + * arenas. arenas[narenas_auto..narenas_total) are only used if the application + * takes some action to create them and allocate from them. + */ +static arena_t **arenas; +static unsigned narenas_total; +static arena_t *a0; /* arenas[0]; read-only after initialization. */ +static unsigned narenas_auto; /* Read-only after initialization. */ + +typedef enum { + malloc_init_uninitialized = 3, + malloc_init_a0_initialized = 2, + malloc_init_recursible = 1, + malloc_init_initialized = 0 /* Common case --> jnz. */ +} malloc_init_t; +static malloc_init_t malloc_init_state = malloc_init_uninitialized; + +JEMALLOC_ALIGNED(CACHELINE) +const size_t index2size_tab[NSIZES] = { +#define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \ + ((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta)), + SIZE_CLASSES +#undef SC +}; + +JEMALLOC_ALIGNED(CACHELINE) +const uint8_t size2index_tab[] = { +#if LG_TINY_MIN == 0 +#warning "Dangerous LG_TINY_MIN" +#define S2B_0(i) i, +#elif LG_TINY_MIN == 1 +#warning "Dangerous LG_TINY_MIN" +#define S2B_1(i) i, +#elif LG_TINY_MIN == 2 +#warning "Dangerous LG_TINY_MIN" +#define S2B_2(i) i, +#elif LG_TINY_MIN == 3 +#define S2B_3(i) i, +#elif LG_TINY_MIN == 4 +#define S2B_4(i) i, +#elif LG_TINY_MIN == 5 +#define S2B_5(i) i, +#elif LG_TINY_MIN == 6 +#define S2B_6(i) i, +#elif LG_TINY_MIN == 7 +#define S2B_7(i) i, +#elif LG_TINY_MIN == 8 +#define S2B_8(i) i, +#elif LG_TINY_MIN == 9 +#define S2B_9(i) i, +#elif LG_TINY_MIN == 10 +#define S2B_10(i) i, +#elif LG_TINY_MIN == 11 +#define S2B_11(i) i, +#else +#error "Unsupported LG_TINY_MIN" +#endif +#if LG_TINY_MIN < 1 +#define S2B_1(i) S2B_0(i) S2B_0(i) +#endif +#if LG_TINY_MIN < 2 +#define S2B_2(i) S2B_1(i) S2B_1(i) +#endif +#if LG_TINY_MIN < 3 +#define S2B_3(i) S2B_2(i) S2B_2(i) +#endif +#if LG_TINY_MIN < 4 +#define S2B_4(i) S2B_3(i) S2B_3(i) +#endif +#if LG_TINY_MIN < 5 +#define S2B_5(i) S2B_4(i) S2B_4(i) +#endif +#if LG_TINY_MIN < 6 +#define S2B_6(i) S2B_5(i) S2B_5(i) +#endif +#if LG_TINY_MIN < 7 +#define S2B_7(i) S2B_6(i) S2B_6(i) +#endif +#if LG_TINY_MIN < 8 +#define S2B_8(i) S2B_7(i) S2B_7(i) +#endif +#if LG_TINY_MIN < 9 +#define S2B_9(i) S2B_8(i) S2B_8(i) +#endif +#if LG_TINY_MIN < 10 +#define S2B_10(i) S2B_9(i) S2B_9(i) +#endif +#if LG_TINY_MIN < 11 +#define S2B_11(i) S2B_10(i) S2B_10(i) +#endif +#define S2B_no(i) +#define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \ + S2B_##lg_delta_lookup(index) + SIZE_CLASSES +#undef S2B_3 +#undef S2B_4 +#undef S2B_5 +#undef S2B_6 +#undef S2B_7 +#undef S2B_8 +#undef S2B_9 +#undef S2B_10 +#undef S2B_11 +#undef S2B_no +#undef SC +}; #ifdef JEMALLOC_THREADED_INIT /* Used to let the initializing thread recursively allocate. */ @@ -57,14 +175,28 @@ static bool malloc_initializer = NO_INITIALIZER; /* Used to avoid initialization races. */ #ifdef _WIN32 +#if _WIN32_WINNT >= 0x0600 +static malloc_mutex_t init_lock = SRWLOCK_INIT; +#else static malloc_mutex_t init_lock; +static bool init_lock_initialized = false; JEMALLOC_ATTR(constructor) static void WINAPI _init_init_lock(void) { - malloc_mutex_init(&init_lock); + /* If another constructor in the same binary is using mallctl to + * e.g. setup chunk hooks, it may end up running before this one, + * and malloc_init_hard will crash trying to lock the uninitialized + * lock. So we force an initialization of the lock in + * malloc_init_hard as well. We don't try to care about atomicity + * of the accessed to the init_lock_initialized boolean, since it + * really only matters early in the process creation, before any + * separate thread normally starts doing anything. */ + if (!init_lock_initialized) + malloc_mutex_init(&init_lock); + init_lock_initialized = true; } #ifdef _MSC_VER @@ -72,7 +204,7 @@ _init_init_lock(void) JEMALLOC_SECTION(".CRT$XCU") JEMALLOC_ATTR(used) static const void (WINAPI *init_init_lock)(void) = _init_init_lock; #endif - +#endif #else static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; #endif @@ -85,7 +217,7 @@ typedef struct { #ifdef JEMALLOC_UTRACE # define UTRACE(a, b, c) do { \ - if (opt_utrace) { \ + if (unlikely(opt_utrace)) { \ int utrace_serrno = errno; \ malloc_utrace_t ut; \ ut.p = (a); \ @@ -105,6 +237,7 @@ typedef struct { * definition. */ +static bool malloc_init_hard_a0(void); static bool malloc_init_hard(void); /******************************************************************************/ @@ -112,35 +245,333 @@ static bool malloc_init_hard(void); * Begin miscellaneous support functions. */ +JEMALLOC_ALWAYS_INLINE_C bool +malloc_initialized(void) +{ + + return (malloc_init_state == malloc_init_initialized); +} + +JEMALLOC_ALWAYS_INLINE_C void +malloc_thread_init(void) +{ + + /* + * TSD initialization can't be safely done as a side effect of + * deallocation, because it is possible for a thread to do nothing but + * deallocate its TLS data via free(), in which case writing to TLS + * would cause write-after-free memory corruption. The quarantine + * facility *only* gets used as a side effect of deallocation, so make + * a best effort attempt at initializing its TSD by hooking all + * allocation events. + */ + if (config_fill && unlikely(opt_quarantine)) + quarantine_alloc_hook(); +} + +JEMALLOC_ALWAYS_INLINE_C bool +malloc_init_a0(void) +{ + + if (unlikely(malloc_init_state == malloc_init_uninitialized)) + return (malloc_init_hard_a0()); + return (false); +} + +JEMALLOC_ALWAYS_INLINE_C bool +malloc_init(void) +{ + + if (unlikely(!malloc_initialized()) && malloc_init_hard()) + return (true); + malloc_thread_init(); + + return (false); +} + +/* + * The a0*() functions are used instead of i[mcd]alloc() in situations that + * cannot tolerate TLS variable access. + */ + +arena_t * +a0get(void) +{ + + assert(a0 != NULL); + return (a0); +} + +static void * +a0ialloc(size_t size, bool zero, bool is_metadata) +{ + + if (unlikely(malloc_init_a0())) + return (NULL); + + return (iallocztm(NULL, size, zero, false, is_metadata, a0get())); +} + +static void +a0idalloc(void *ptr, bool is_metadata) +{ + + idalloctm(NULL, ptr, false, is_metadata); +} + +void * +a0malloc(size_t size) +{ + + return (a0ialloc(size, false, true)); +} + +void +a0dalloc(void *ptr) +{ + + a0idalloc(ptr, true); +} + +/* + * FreeBSD's libc uses the bootstrap_*() functions in bootstrap-senstive + * situations that cannot tolerate TLS variable access (TLS allocation and very + * early internal data structure initialization). + */ + +void * +bootstrap_malloc(size_t size) +{ + + if (unlikely(size == 0)) + size = 1; + + return (a0ialloc(size, false, false)); +} + +void * +bootstrap_calloc(size_t num, size_t size) +{ + size_t num_size; + + num_size = num * size; + if (unlikely(num_size == 0)) { + assert(num == 0 || size == 0); + num_size = 1; + } + + return (a0ialloc(num_size, true, false)); +} + +void +bootstrap_free(void *ptr) +{ + + if (unlikely(ptr == NULL)) + return; + + a0idalloc(ptr, false); +} + /* Create a new arena and insert it into the arenas array at index ind. */ +static arena_t * +arena_init_locked(unsigned ind) +{ + arena_t *arena; + + /* Expand arenas if necessary. */ + assert(ind <= narenas_total); + if (ind > MALLOCX_ARENA_MAX) + return (NULL); + if (ind == narenas_total) { + unsigned narenas_new = narenas_total + 1; + arena_t **arenas_new = + (arena_t **)a0malloc(CACHELINE_CEILING(narenas_new * + sizeof(arena_t *))); + if (arenas_new == NULL) + return (NULL); + memcpy(arenas_new, arenas, narenas_total * sizeof(arena_t *)); + arenas_new[ind] = NULL; + /* + * Deallocate only if arenas came from a0malloc() (not + * base_alloc()). + */ + if (narenas_total != narenas_auto) + a0dalloc(arenas); + arenas = arenas_new; + narenas_total = narenas_new; + } + + /* + * Another thread may have already initialized arenas[ind] if it's an + * auto arena. + */ + arena = arenas[ind]; + if (arena != NULL) { + assert(ind < narenas_auto); + return (arena); + } + + /* Actually initialize the arena. */ + arena = arenas[ind] = arena_new(ind); + return (arena); +} + arena_t * -arenas_extend(unsigned ind) +arena_init(unsigned ind) { - arena_t *ret; + arena_t *arena; + + malloc_mutex_lock(&arenas_lock); + arena = arena_init_locked(ind); + malloc_mutex_unlock(&arenas_lock); + return (arena); +} + +unsigned +narenas_total_get(void) +{ + unsigned narenas; + + malloc_mutex_lock(&arenas_lock); + narenas = narenas_total; + malloc_mutex_unlock(&arenas_lock); + + return (narenas); +} + +static void +arena_bind_locked(tsd_t *tsd, unsigned ind) +{ + arena_t *arena; + + arena = arenas[ind]; + arena->nthreads++; + + if (tsd_nominal(tsd)) + tsd_arena_set(tsd, arena); +} + +static void +arena_bind(tsd_t *tsd, unsigned ind) +{ + + malloc_mutex_lock(&arenas_lock); + arena_bind_locked(tsd, ind); + malloc_mutex_unlock(&arenas_lock); +} + +void +arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind) +{ + arena_t *oldarena, *newarena; + + malloc_mutex_lock(&arenas_lock); + oldarena = arenas[oldind]; + newarena = arenas[newind]; + oldarena->nthreads--; + newarena->nthreads++; + malloc_mutex_unlock(&arenas_lock); + tsd_arena_set(tsd, newarena); +} + +unsigned +arena_nbound(unsigned ind) +{ + unsigned nthreads; + + malloc_mutex_lock(&arenas_lock); + nthreads = arenas[ind]->nthreads; + malloc_mutex_unlock(&arenas_lock); + return (nthreads); +} + +static void +arena_unbind(tsd_t *tsd, unsigned ind) +{ + arena_t *arena; + + malloc_mutex_lock(&arenas_lock); + arena = arenas[ind]; + arena->nthreads--; + malloc_mutex_unlock(&arenas_lock); + tsd_arena_set(tsd, NULL); +} - ret = (arena_t *)base_alloc(sizeof(arena_t)); - if (ret != NULL && arena_new(ret, ind) == false) { - arenas[ind] = ret; - return (ret); +arena_t * +arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing) +{ + arena_t *arena; + arena_t **arenas_cache = tsd_arenas_cache_get(tsd); + unsigned narenas_cache = tsd_narenas_cache_get(tsd); + unsigned narenas_actual = narenas_total_get(); + + /* Deallocate old cache if it's too small. */ + if (arenas_cache != NULL && narenas_cache < narenas_actual) { + a0dalloc(arenas_cache); + arenas_cache = NULL; + narenas_cache = 0; + tsd_arenas_cache_set(tsd, arenas_cache); + tsd_narenas_cache_set(tsd, narenas_cache); + } + + /* Allocate cache if it's missing. */ + if (arenas_cache == NULL) { + bool *arenas_cache_bypassp = tsd_arenas_cache_bypassp_get(tsd); + assert(ind < narenas_actual || !init_if_missing); + narenas_cache = (ind < narenas_actual) ? narenas_actual : ind+1; + + if (tsd_nominal(tsd) && !*arenas_cache_bypassp) { + *arenas_cache_bypassp = true; + arenas_cache = (arena_t **)a0malloc(sizeof(arena_t *) * + narenas_cache); + *arenas_cache_bypassp = false; + } + if (arenas_cache == NULL) { + /* + * This function must always tell the truth, even if + * it's slow, so don't let OOM, thread cleanup (note + * tsd_nominal check), nor recursive allocation + * avoidance (note arenas_cache_bypass check) get in the + * way. + */ + if (ind >= narenas_actual) + return (NULL); + malloc_mutex_lock(&arenas_lock); + arena = arenas[ind]; + malloc_mutex_unlock(&arenas_lock); + return (arena); + } + assert(tsd_nominal(tsd) && !*arenas_cache_bypassp); + tsd_arenas_cache_set(tsd, arenas_cache); + tsd_narenas_cache_set(tsd, narenas_cache); } - /* Only reached if there is an OOM error. */ /* - * OOM here is quite inconvenient to propagate, since dealing with it - * would require a check for failure in the fast path. Instead, punt - * by using arenas[0]. In practice, this is an extremely unlikely - * failure. + * Copy to cache. It's possible that the actual number of arenas has + * increased since narenas_total_get() was called above, but that causes + * no correctness issues unless two threads concurrently execute the + * arenas.extend mallctl, which we trust mallctl synchronization to + * prevent. */ - malloc_write("<jemalloc>: Error initializing arena\n"); - if (opt_abort) - abort(); + malloc_mutex_lock(&arenas_lock); + memcpy(arenas_cache, arenas, sizeof(arena_t *) * narenas_actual); + malloc_mutex_unlock(&arenas_lock); + if (narenas_cache > narenas_actual) { + memset(&arenas_cache[narenas_actual], 0, sizeof(arena_t *) * + (narenas_cache - narenas_actual)); + } - return (arenas[0]); + /* Read the refreshed cache, and init the arena if necessary. */ + arena = arenas_cache[ind]; + if (init_if_missing && arena == NULL) + arena = arenas_cache[ind] = arena_init(ind); + return (arena); } -/* Slow path, called only by choose_arena(). */ +/* Slow path, called only by arena_choose(). */ arena_t * -choose_arena_hard(void) +arena_choose_hard(tsd_t *tsd) { arena_t *ret; @@ -150,7 +581,7 @@ choose_arena_hard(void) choose = 0; first_null = narenas_auto; malloc_mutex_lock(&arenas_lock); - assert(arenas[0] != NULL); + assert(a0get() != NULL); for (i = 1; i < narenas_auto; i++) { if (arenas[i] != NULL) { /* @@ -183,22 +614,73 @@ choose_arena_hard(void) ret = arenas[choose]; } else { /* Initialize a new arena. */ - ret = arenas_extend(first_null); + choose = first_null; + ret = arena_init_locked(choose); + if (ret == NULL) { + malloc_mutex_unlock(&arenas_lock); + return (NULL); + } } - ret->nthreads++; + arena_bind_locked(tsd, choose); malloc_mutex_unlock(&arenas_lock); } else { - ret = arenas[0]; - malloc_mutex_lock(&arenas_lock); - ret->nthreads++; - malloc_mutex_unlock(&arenas_lock); + ret = a0get(); + arena_bind(tsd, 0); } - arenas_tsd_set(&ret); - return (ret); } +void +thread_allocated_cleanup(tsd_t *tsd) +{ + + /* Do nothing. */ +} + +void +thread_deallocated_cleanup(tsd_t *tsd) +{ + + /* Do nothing. */ +} + +void +arena_cleanup(tsd_t *tsd) +{ + arena_t *arena; + + arena = tsd_arena_get(tsd); + if (arena != NULL) + arena_unbind(tsd, arena->ind); +} + +void +arenas_cache_cleanup(tsd_t *tsd) +{ + arena_t **arenas_cache; + + arenas_cache = tsd_arenas_cache_get(tsd); + if (arenas_cache != NULL) { + tsd_arenas_cache_set(tsd, NULL); + a0dalloc(arenas_cache); + } +} + +void +narenas_cache_cleanup(tsd_t *tsd) +{ + + /* Do nothing. */ +} + +void +arenas_cache_bypass_cleanup(tsd_t *tsd) +{ + + /* Do nothing. */ +} + static void stats_print_atexit(void) { @@ -243,6 +725,19 @@ stats_print_atexit(void) * Begin initialization functions. */ +#ifndef JEMALLOC_HAVE_SECURE_GETENV +static char * +secure_getenv(const char *name) +{ + +# ifdef JEMALLOC_HAVE_ISSETUGID + if (issetugid() != 0) + return (NULL); +# endif + return (getenv(name)); +} +#endif + static unsigned malloc_ncpus(void) { @@ -258,44 +753,6 @@ malloc_ncpus(void) return ((result == -1) ? 1 : (unsigned)result); } -void -arenas_cleanup(void *arg) -{ - arena_t *arena = *(arena_t **)arg; - - malloc_mutex_lock(&arenas_lock); - arena->nthreads--; - malloc_mutex_unlock(&arenas_lock); -} - -JEMALLOC_ALWAYS_INLINE_C void -malloc_thread_init(void) -{ - - /* - * TSD initialization can't be safely done as a side effect of - * deallocation, because it is possible for a thread to do nothing but - * deallocate its TLS data via free(), in which case writing to TLS - * would cause write-after-free memory corruption. The quarantine - * facility *only* gets used as a side effect of deallocation, so make - * a best effort attempt at initializing its TSD by hooking all - * allocation events. - */ - if (config_fill && opt_quarantine) - quarantine_alloc_hook(); -} - -JEMALLOC_ALWAYS_INLINE_C bool -malloc_init(void) -{ - - if (malloc_initialized == false && malloc_init_hard()) - return (true); - malloc_thread_init(); - - return (false); -} - static bool malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p, char const **v_p, size_t *vlen_p) @@ -305,7 +762,7 @@ malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p, *k_p = opts; - for (accept = false; accept == false;) { + for (accept = false; !accept;) { switch (*opts) { case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': @@ -340,7 +797,7 @@ malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p, } } - for (accept = false; accept == false;) { + for (accept = false; !accept;) { switch (*opts) { case ',': opts++; @@ -394,14 +851,16 @@ malloc_conf_init(void) * valgrind option remains in jemalloc 3.x for compatibility reasons. */ if (config_valgrind) { - opt_valgrind = (RUNNING_ON_VALGRIND != 0) ? true : false; - if (config_fill && opt_valgrind) { - opt_junk = false; - assert(opt_zero == false); + in_valgrind = (RUNNING_ON_VALGRIND != 0) ? true : false; + if (config_fill && unlikely(in_valgrind)) { + opt_junk = "false"; + opt_junk_alloc = false; + opt_junk_free = false; + assert(!opt_zero); opt_quarantine = JEMALLOC_VALGRIND_QUARANTINE_DEFAULT; opt_redzone = true; } - if (config_tcache && opt_valgrind) + if (config_tcache && unlikely(in_valgrind)) opt_tcache = false; } @@ -441,7 +900,7 @@ malloc_conf_init(void) if (linklen == -1) { /* No configuration specified. */ linklen = 0; - /* restore errno */ + /* Restore errno. */ set_errno(saved_errno); } #endif @@ -457,7 +916,7 @@ malloc_conf_init(void) #endif ; - if ((opts = getenv(envname)) != NULL) { + if ((opts = secure_getenv(envname)) != NULL) { /* * Do nothing; opts is already initialized to * the value of the MALLOC_CONF environment @@ -475,27 +934,28 @@ malloc_conf_init(void) opts = buf; } - while (*opts != '\0' && malloc_conf_next(&opts, &k, &klen, &v, - &vlen) == false) { -#define CONF_HANDLE_BOOL(o, n) \ - if (sizeof(n)-1 == klen && strncmp(n, k, \ - klen) == 0) { \ - if (strncmp("true", v, vlen) == 0 && \ - vlen == sizeof("true")-1) \ + while (*opts != '\0' && !malloc_conf_next(&opts, &k, &klen, &v, + &vlen)) { +#define CONF_MATCH(n) \ + (sizeof(n)-1 == klen && strncmp(n, k, klen) == 0) +#define CONF_MATCH_VALUE(n) \ + (sizeof(n)-1 == vlen && strncmp(n, v, vlen) == 0) +#define CONF_HANDLE_BOOL(o, n, cont) \ + if (CONF_MATCH(n)) { \ + if (CONF_MATCH_VALUE("true")) \ o = true; \ - else if (strncmp("false", v, vlen) == \ - 0 && vlen == sizeof("false")-1) \ + else if (CONF_MATCH_VALUE("false")) \ o = false; \ else { \ malloc_conf_error( \ "Invalid conf value", \ k, klen, v, vlen); \ } \ - continue; \ + if (cont) \ + continue; \ } #define CONF_HANDLE_SIZE_T(o, n, min, max, clip) \ - if (sizeof(n)-1 == klen && strncmp(n, k, \ - klen) == 0) { \ + if (CONF_MATCH(n)) { \ uintmax_t um; \ char *end; \ \ @@ -507,15 +967,15 @@ malloc_conf_init(void) "Invalid conf value", \ k, klen, v, vlen); \ } else if (clip) { \ - if (min != 0 && um < min) \ - o = min; \ - else if (um > max) \ - o = max; \ + if ((min) != 0 && um < (min)) \ + o = (min); \ + else if (um > (max)) \ + o = (max); \ else \ o = um; \ } else { \ - if ((min != 0 && um < min) || \ - um > max) { \ + if (((min) != 0 && um < (min)) \ + || um > (max)) { \ malloc_conf_error( \ "Out-of-range " \ "conf value", \ @@ -526,8 +986,7 @@ malloc_conf_init(void) continue; \ } #define CONF_HANDLE_SSIZE_T(o, n, min, max) \ - if (sizeof(n)-1 == klen && strncmp(n, k, \ - klen) == 0) { \ + if (CONF_MATCH(n)) { \ long l; \ char *end; \ \ @@ -538,8 +997,8 @@ malloc_conf_init(void) malloc_conf_error( \ "Invalid conf value", \ k, klen, v, vlen); \ - } else if (l < (ssize_t)min || l > \ - (ssize_t)max) { \ + } else if (l < (ssize_t)(min) || l > \ + (ssize_t)(max)) { \ malloc_conf_error( \ "Out-of-range conf value", \ k, klen, v, vlen); \ @@ -548,8 +1007,7 @@ malloc_conf_init(void) continue; \ } #define CONF_HANDLE_CHAR_P(o, n, d) \ - if (sizeof(n)-1 == klen && strncmp(n, k, \ - klen) == 0) { \ + if (CONF_MATCH(n)) { \ size_t cpylen = (vlen <= \ sizeof(o)-1) ? vlen : \ sizeof(o)-1; \ @@ -558,17 +1016,18 @@ malloc_conf_init(void) continue; \ } - CONF_HANDLE_BOOL(opt_abort, "abort") + CONF_HANDLE_BOOL(opt_abort, "abort", true) /* - * Chunks always require at least one header page, plus - * one data page in the absence of redzones, or three - * pages in the presence of redzones. In order to - * simplify options processing, fix the limit based on - * config_fill. + * Chunks always require at least one header page, + * as many as 2^(LG_SIZE_CLASS_GROUP+1) data pages, and + * possibly an additional page in the presence of + * redzones. In order to simplify options processing, + * use a conservative bound that accommodates all these + * constraints. */ CONF_HANDLE_SIZE_T(opt_lg_chunk, "lg_chunk", LG_PAGE + - (config_fill ? 2 : 1), (sizeof(size_t) << 3) - 1, - true) + LG_SIZE_CLASS_GROUP + (config_fill ? 2 : 1), + (sizeof(size_t) << 3) - 1, true) if (strncmp("dss", k, klen) == 0) { int i; bool match = false; @@ -587,7 +1046,7 @@ malloc_conf_init(void) } } } - if (match == false) { + if (!match) { malloc_conf_error("Invalid conf value", k, klen, v, vlen); } @@ -597,47 +1056,87 @@ malloc_conf_init(void) SIZE_T_MAX, false) CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, "lg_dirty_mult", -1, (sizeof(size_t) << 3) - 1) - CONF_HANDLE_BOOL(opt_stats_print, "stats_print") + CONF_HANDLE_BOOL(opt_stats_print, "stats_print", true) if (config_fill) { - CONF_HANDLE_BOOL(opt_junk, "junk") + if (CONF_MATCH("junk")) { + if (CONF_MATCH_VALUE("true")) { + opt_junk = "true"; + opt_junk_alloc = opt_junk_free = + true; + } else if (CONF_MATCH_VALUE("false")) { + opt_junk = "false"; + opt_junk_alloc = opt_junk_free = + false; + } else if (CONF_MATCH_VALUE("alloc")) { + opt_junk = "alloc"; + opt_junk_alloc = true; + opt_junk_free = false; + } else if (CONF_MATCH_VALUE("free")) { + opt_junk = "free"; + opt_junk_alloc = false; + opt_junk_free = true; + } else { + malloc_conf_error( + "Invalid conf value", k, + klen, v, vlen); + } + continue; + } CONF_HANDLE_SIZE_T(opt_quarantine, "quarantine", 0, SIZE_T_MAX, false) - CONF_HANDLE_BOOL(opt_redzone, "redzone") - CONF_HANDLE_BOOL(opt_zero, "zero") + CONF_HANDLE_BOOL(opt_redzone, "redzone", true) + CONF_HANDLE_BOOL(opt_zero, "zero", true) } if (config_utrace) { - CONF_HANDLE_BOOL(opt_utrace, "utrace") - } - if (config_valgrind) { - CONF_HANDLE_BOOL(opt_valgrind, "valgrind") + CONF_HANDLE_BOOL(opt_utrace, "utrace", true) } if (config_xmalloc) { - CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc") + CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc", true) } if (config_tcache) { - CONF_HANDLE_BOOL(opt_tcache, "tcache") + CONF_HANDLE_BOOL(opt_tcache, "tcache", + !config_valgrind || !in_valgrind) + if (CONF_MATCH("tcache")) { + assert(config_valgrind && in_valgrind); + if (opt_tcache) { + opt_tcache = false; + malloc_conf_error( + "tcache cannot be enabled " + "while running inside Valgrind", + k, klen, v, vlen); + } + continue; + } CONF_HANDLE_SSIZE_T(opt_lg_tcache_max, "lg_tcache_max", -1, (sizeof(size_t) << 3) - 1) } if (config_prof) { - CONF_HANDLE_BOOL(opt_prof, "prof") + CONF_HANDLE_BOOL(opt_prof, "prof", true) CONF_HANDLE_CHAR_P(opt_prof_prefix, "prof_prefix", "jeprof") - CONF_HANDLE_BOOL(opt_prof_active, "prof_active") - CONF_HANDLE_SSIZE_T(opt_lg_prof_sample, + CONF_HANDLE_BOOL(opt_prof_active, "prof_active", + true) + CONF_HANDLE_BOOL(opt_prof_thread_active_init, + "prof_thread_active_init", true) + CONF_HANDLE_SIZE_T(opt_lg_prof_sample, "lg_prof_sample", 0, - (sizeof(uint64_t) << 3) - 1) - CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum") + (sizeof(uint64_t) << 3) - 1, true) + CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum", + true) CONF_HANDLE_SSIZE_T(opt_lg_prof_interval, "lg_prof_interval", -1, (sizeof(uint64_t) << 3) - 1) - CONF_HANDLE_BOOL(opt_prof_gdump, "prof_gdump") - CONF_HANDLE_BOOL(opt_prof_final, "prof_final") - CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak") + CONF_HANDLE_BOOL(opt_prof_gdump, "prof_gdump", + true) + CONF_HANDLE_BOOL(opt_prof_final, "prof_final", + true) + CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak", + true) } malloc_conf_error("Invalid conf pair", k, klen, v, vlen); +#undef CONF_MATCH #undef CONF_HANDLE_BOOL #undef CONF_HANDLE_SIZE_T #undef CONF_HANDLE_SSIZE_T @@ -646,41 +1145,44 @@ malloc_conf_init(void) } } +/* init_lock must be held. */ static bool -malloc_init_hard(void) +malloc_init_hard_needed(void) { - arena_t *init_arenas[1]; - malloc_mutex_lock(&init_lock); - if (malloc_initialized || IS_INITIALIZER) { + if (malloc_initialized() || (IS_INITIALIZER && malloc_init_state == + malloc_init_recursible)) { /* * Another thread initialized the allocator before this one * acquired init_lock, or this thread is the initializing * thread, and it is recursively allocating. */ - malloc_mutex_unlock(&init_lock); return (false); } #ifdef JEMALLOC_THREADED_INIT - if (malloc_initializer != NO_INITIALIZER && IS_INITIALIZER == false) { + if (malloc_initializer != NO_INITIALIZER && !IS_INITIALIZER) { /* Busy-wait until the initializing thread completes. */ do { malloc_mutex_unlock(&init_lock); CPU_SPINWAIT; malloc_mutex_lock(&init_lock); - } while (malloc_initialized == false); - malloc_mutex_unlock(&init_lock); + } while (!malloc_initialized()); return (false); } #endif + return (true); +} + +/* init_lock must be held. */ +static bool +malloc_init_hard_a0_locked(void) +{ + malloc_initializer = INITIALIZER; - malloc_tsd_boot(); if (config_prof) prof_boot0(); - malloc_conf_init(); - if (opt_stats_print) { /* Print statistics at exit. */ if (atexit(stats_print_atexit) != 0) { @@ -689,94 +1191,64 @@ malloc_init_hard(void) abort(); } } - - if (base_boot()) { - malloc_mutex_unlock(&init_lock); + if (base_boot()) return (true); - } - - if (chunk_boot()) { - malloc_mutex_unlock(&init_lock); + if (chunk_boot()) return (true); - } - - if (ctl_boot()) { - malloc_mutex_unlock(&init_lock); + if (ctl_boot()) return (true); - } - if (config_prof) prof_boot1(); - - arena_boot(); - - if (config_tcache && tcache_boot0()) { - malloc_mutex_unlock(&init_lock); + if (arena_boot()) return (true); - } - - if (huge_boot()) { - malloc_mutex_unlock(&init_lock); + if (config_tcache && tcache_boot()) return (true); - } - - if (malloc_mutex_init(&arenas_lock)) { - malloc_mutex_unlock(&init_lock); + if (malloc_mutex_init(&arenas_lock)) return (true); - } - /* * Create enough scaffolding to allow recursive allocation in * malloc_ncpus(). */ narenas_total = narenas_auto = 1; - arenas = init_arenas; + arenas = &a0; memset(arenas, 0, sizeof(arena_t *) * narenas_auto); - /* * Initialize one arena here. The rest are lazily created in - * choose_arena_hard(). + * arena_choose_hard(). */ - arenas_extend(0); - if (arenas[0] == NULL) { - malloc_mutex_unlock(&init_lock); + if (arena_init(0) == NULL) return (true); - } - - /* Initialize allocation counters before any allocations can occur. */ - if (config_stats && thread_allocated_tsd_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - - if (arenas_tsd_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } + malloc_init_state = malloc_init_a0_initialized; + return (false); +} - if (config_tcache && tcache_boot1()) { - malloc_mutex_unlock(&init_lock); - return (true); - } +static bool +malloc_init_hard_a0(void) +{ + bool ret; - if (config_fill && quarantine_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } + malloc_mutex_lock(&init_lock); + ret = malloc_init_hard_a0_locked(); + malloc_mutex_unlock(&init_lock); + return (ret); +} - if (config_prof && prof_boot2()) { - malloc_mutex_unlock(&init_lock); - return (true); - } +/* + * Initialize data structures which may trigger recursive allocation. + * + * init_lock must be held. + */ +static void +malloc_init_hard_recursible(void) +{ + malloc_init_state = malloc_init_recursible; malloc_mutex_unlock(&init_lock); - /**********************************************************************/ - /* Recursive allocation may follow. */ ncpus = malloc_ncpus(); #if (!defined(JEMALLOC_MUTEX_INIT_CB) && !defined(JEMALLOC_ZONE) \ - && !defined(_WIN32)) + && !defined(_WIN32) && !defined(__native_client__)) /* LinuxThreads's pthread_atfork() allocates. */ if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent, jemalloc_postfork_child) != 0) { @@ -785,15 +1257,16 @@ malloc_init_hard(void) abort(); } #endif - - /* Done recursively allocating. */ - /**********************************************************************/ malloc_mutex_lock(&init_lock); +} - if (mutex_boot()) { - malloc_mutex_unlock(&init_lock); +/* init_lock must be held. */ +static bool +malloc_init_hard_finish(void) +{ + + if (mutex_boot()) return (true); - } if (opt_narenas == 0) { /* @@ -820,21 +1293,56 @@ malloc_init_hard(void) /* Allocate and initialize arenas. */ arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas_total); - if (arenas == NULL) { - malloc_mutex_unlock(&init_lock); + if (arenas == NULL) return (true); - } /* * Zero the array. In practice, this should always be pre-zeroed, * since it was just mmap()ed, but let's be sure. */ memset(arenas, 0, sizeof(arena_t *) * narenas_total); /* Copy the pointer to the one arena that was already initialized. */ - arenas[0] = init_arenas[0]; + arenas[0] = a0; - malloc_initialized = true; - malloc_mutex_unlock(&init_lock); + malloc_init_state = malloc_init_initialized; + return (false); +} + +static bool +malloc_init_hard(void) +{ +#if defined(_WIN32) && _WIN32_WINNT < 0x0600 + _init_init_lock(); +#endif + malloc_mutex_lock(&init_lock); + if (!malloc_init_hard_needed()) { + malloc_mutex_unlock(&init_lock); + return (false); + } + + if (malloc_init_state != malloc_init_a0_initialized && + malloc_init_hard_a0_locked()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + if (malloc_tsd_boot0()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + if (config_prof && prof_boot2()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + malloc_init_hard_recursible(); + + if (malloc_init_hard_finish()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + malloc_mutex_unlock(&init_lock); + malloc_tsd_boot1(); return (false); } @@ -847,98 +1355,87 @@ malloc_init_hard(void) */ static void * -imalloc_prof_sample(size_t usize, prof_thr_cnt_t *cnt) +imalloc_prof_sample(tsd_t *tsd, size_t usize, prof_tctx_t *tctx) { void *p; - if (cnt == NULL) + if (tctx == NULL) return (NULL); - if (prof_promote && usize <= SMALL_MAXCLASS) { - p = imalloc(SMALL_MAXCLASS+1); + if (usize <= SMALL_MAXCLASS) { + p = imalloc(tsd, LARGE_MINCLASS); if (p == NULL) return (NULL); arena_prof_promoted(p, usize); } else - p = imalloc(usize); + p = imalloc(tsd, usize); return (p); } JEMALLOC_ALWAYS_INLINE_C void * -imalloc_prof(size_t usize, prof_thr_cnt_t *cnt) +imalloc_prof(tsd_t *tsd, size_t usize) { void *p; + prof_tctx_t *tctx; - if ((uintptr_t)cnt != (uintptr_t)1U) - p = imalloc_prof_sample(usize, cnt); + tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true); + if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) + p = imalloc_prof_sample(tsd, usize, tctx); else - p = imalloc(usize); - if (p == NULL) + p = imalloc(tsd, usize); + if (unlikely(p == NULL)) { + prof_alloc_rollback(tsd, tctx, true); return (NULL); - prof_malloc(p, usize, cnt); + } + prof_malloc(p, usize, tctx); return (p); } -/* - * MALLOC_BODY() is a macro rather than a function because its contents are in - * the fast path, but inlining would cause reliability issues when determining - * how many frames to discard from heap profiling backtraces. - */ -#define MALLOC_BODY(ret, size, usize) do { \ - if (malloc_init()) \ - ret = NULL; \ - else { \ - if (config_prof && opt_prof) { \ - prof_thr_cnt_t *cnt; \ - \ - usize = s2u(size); \ - /* \ - * Call PROF_ALLOC_PREP() here rather than in \ - * imalloc_prof() so that imalloc_prof() can be \ - * inlined without introducing uncertainty \ - * about the number of backtrace frames to \ - * ignore. imalloc_prof() is in the fast path \ - * when heap profiling is enabled, so inlining \ - * is critical to performance. (For \ - * consistency all callers of PROF_ALLOC_PREP() \ - * are structured similarly, even though e.g. \ - * realloc() isn't called enough for inlining \ - * to be critical.) \ - */ \ - PROF_ALLOC_PREP(1, usize, cnt); \ - ret = imalloc_prof(usize, cnt); \ - } else { \ - if (config_stats || (config_valgrind && \ - opt_valgrind)) \ - usize = s2u(size); \ - ret = imalloc(size); \ - } \ - } \ -} while (0) +JEMALLOC_ALWAYS_INLINE_C void * +imalloc_body(size_t size, tsd_t **tsd, size_t *usize) +{ -void * + if (unlikely(malloc_init())) + return (NULL); + *tsd = tsd_fetch(); + + if (config_prof && opt_prof) { + *usize = s2u(size); + if (unlikely(*usize == 0)) + return (NULL); + return (imalloc_prof(*tsd, *usize)); + } + + if (config_stats || (config_valgrind && unlikely(in_valgrind))) + *usize = s2u(size); + return (imalloc(*tsd, size)); +} + +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN +void JEMALLOC_NOTHROW * +JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1) je_malloc(size_t size) { void *ret; + tsd_t *tsd; size_t usize JEMALLOC_CC_SILENCE_INIT(0); if (size == 0) size = 1; - MALLOC_BODY(ret, size, usize); - - if (ret == NULL) { - if (config_xmalloc && opt_xmalloc) { + ret = imalloc_body(size, &tsd, &usize); + if (unlikely(ret == NULL)) { + if (config_xmalloc && unlikely(opt_xmalloc)) { malloc_write("<jemalloc>: Error in malloc(): " "out of memory\n"); abort(); } set_errno(ENOMEM); } - if (config_stats && ret != NULL) { + if (config_stats && likely(ret != NULL)) { assert(usize == isalloc(ret, config_prof)); - thread_allocated_tsd_get()->allocated += usize; + *tsd_thread_allocatedp_get(tsd) += usize; } UTRACE(0, size, ret); JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, false); @@ -946,107 +1443,103 @@ je_malloc(size_t size) } static void * -imemalign_prof_sample(size_t alignment, size_t usize, prof_thr_cnt_t *cnt) +imemalign_prof_sample(tsd_t *tsd, size_t alignment, size_t usize, + prof_tctx_t *tctx) { void *p; - if (cnt == NULL) + if (tctx == NULL) return (NULL); - if (prof_promote && usize <= SMALL_MAXCLASS) { - assert(sa2u(SMALL_MAXCLASS+1, alignment) != 0); - p = ipalloc(sa2u(SMALL_MAXCLASS+1, alignment), alignment, - false); + if (usize <= SMALL_MAXCLASS) { + assert(sa2u(LARGE_MINCLASS, alignment) == LARGE_MINCLASS); + p = ipalloc(tsd, LARGE_MINCLASS, alignment, false); if (p == NULL) return (NULL); arena_prof_promoted(p, usize); } else - p = ipalloc(usize, alignment, false); + p = ipalloc(tsd, usize, alignment, false); return (p); } JEMALLOC_ALWAYS_INLINE_C void * -imemalign_prof(size_t alignment, size_t usize, prof_thr_cnt_t *cnt) +imemalign_prof(tsd_t *tsd, size_t alignment, size_t usize) { void *p; + prof_tctx_t *tctx; - if ((uintptr_t)cnt != (uintptr_t)1U) - p = imemalign_prof_sample(alignment, usize, cnt); + tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true); + if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) + p = imemalign_prof_sample(tsd, alignment, usize, tctx); else - p = ipalloc(usize, alignment, false); - if (p == NULL) + p = ipalloc(tsd, usize, alignment, false); + if (unlikely(p == NULL)) { + prof_alloc_rollback(tsd, tctx, true); return (NULL); - prof_malloc(p, usize, cnt); + } + prof_malloc(p, usize, tctx); return (p); } JEMALLOC_ATTR(nonnull(1)) -#ifdef JEMALLOC_PROF -/* - * Avoid any uncertainty as to how many backtrace frames to ignore in - * PROF_ALLOC_PREP(). - */ -JEMALLOC_NOINLINE -#endif static int imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment) { int ret; + tsd_t *tsd; size_t usize; void *result; assert(min_alignment != 0); - if (malloc_init()) { + if (unlikely(malloc_init())) { result = NULL; goto label_oom; - } else { - if (size == 0) - size = 1; - - /* Make sure that alignment is a large enough power of 2. */ - if (((alignment - 1) & alignment) != 0 - || (alignment < min_alignment)) { - if (config_xmalloc && opt_xmalloc) { - malloc_write("<jemalloc>: Error allocating " - "aligned memory: invalid alignment\n"); - abort(); - } - result = NULL; - ret = EINVAL; - goto label_return; - } + } + tsd = tsd_fetch(); + if (size == 0) + size = 1; - usize = sa2u(size, alignment); - if (usize == 0) { - result = NULL; - goto label_oom; + /* Make sure that alignment is a large enough power of 2. */ + if (unlikely(((alignment - 1) & alignment) != 0 + || (alignment < min_alignment))) { + if (config_xmalloc && unlikely(opt_xmalloc)) { + malloc_write("<jemalloc>: Error allocating " + "aligned memory: invalid alignment\n"); + abort(); } + result = NULL; + ret = EINVAL; + goto label_return; + } - if (config_prof && opt_prof) { - prof_thr_cnt_t *cnt; - - PROF_ALLOC_PREP(2, usize, cnt); - result = imemalign_prof(alignment, usize, cnt); - } else - result = ipalloc(usize, alignment, false); - if (result == NULL) - goto label_oom; + usize = sa2u(size, alignment); + if (unlikely(usize == 0)) { + result = NULL; + goto label_oom; } + if (config_prof && opt_prof) + result = imemalign_prof(tsd, alignment, usize); + else + result = ipalloc(tsd, usize, alignment, false); + if (unlikely(result == NULL)) + goto label_oom; + assert(((uintptr_t)result & (alignment - 1)) == ZU(0)); + *memptr = result; ret = 0; label_return: - if (config_stats && result != NULL) { + if (config_stats && likely(result != NULL)) { assert(usize == isalloc(result, config_prof)); - thread_allocated_tsd_get()->allocated += usize; + *tsd_thread_allocatedp_get(tsd) += usize; } UTRACE(0, size, result); return (ret); label_oom: assert(result == NULL); - if (config_xmalloc && opt_xmalloc) { + if (config_xmalloc && unlikely(opt_xmalloc)) { malloc_write("<jemalloc>: Error allocating aligned memory: " "out of memory\n"); abort(); @@ -1055,7 +1548,8 @@ label_oom: goto label_return; } -int +JEMALLOC_EXPORT int JEMALLOC_NOTHROW +JEMALLOC_ATTR(nonnull(1)) je_posix_memalign(void **memptr, size_t alignment, size_t size) { int ret = imemalign(memptr, alignment, size, sizeof(void *)); @@ -1064,13 +1558,15 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) return (ret); } -void * +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN +void JEMALLOC_NOTHROW * +JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(2) je_aligned_alloc(size_t alignment, size_t size) { void *ret; int err; - if ((err = imemalign(&ret, alignment, size, 1)) != 0) { + if (unlikely((err = imemalign(&ret, alignment, size, 1)) != 0)) { ret = NULL; set_errno(err); } @@ -1080,54 +1576,62 @@ je_aligned_alloc(size_t alignment, size_t size) } static void * -icalloc_prof_sample(size_t usize, prof_thr_cnt_t *cnt) +icalloc_prof_sample(tsd_t *tsd, size_t usize, prof_tctx_t *tctx) { void *p; - if (cnt == NULL) + if (tctx == NULL) return (NULL); - if (prof_promote && usize <= SMALL_MAXCLASS) { - p = icalloc(SMALL_MAXCLASS+1); + if (usize <= SMALL_MAXCLASS) { + p = icalloc(tsd, LARGE_MINCLASS); if (p == NULL) return (NULL); arena_prof_promoted(p, usize); } else - p = icalloc(usize); + p = icalloc(tsd, usize); return (p); } JEMALLOC_ALWAYS_INLINE_C void * -icalloc_prof(size_t usize, prof_thr_cnt_t *cnt) +icalloc_prof(tsd_t *tsd, size_t usize) { void *p; + prof_tctx_t *tctx; - if ((uintptr_t)cnt != (uintptr_t)1U) - p = icalloc_prof_sample(usize, cnt); + tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true); + if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) + p = icalloc_prof_sample(tsd, usize, tctx); else - p = icalloc(usize); - if (p == NULL) + p = icalloc(tsd, usize); + if (unlikely(p == NULL)) { + prof_alloc_rollback(tsd, tctx, true); return (NULL); - prof_malloc(p, usize, cnt); + } + prof_malloc(p, usize, tctx); return (p); } -void * +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN +void JEMALLOC_NOTHROW * +JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2) je_calloc(size_t num, size_t size) { void *ret; + tsd_t *tsd; size_t num_size; size_t usize JEMALLOC_CC_SILENCE_INIT(0); - if (malloc_init()) { + if (unlikely(malloc_init())) { num_size = 0; ret = NULL; goto label_return; } + tsd = tsd_fetch(); num_size = num * size; - if (num_size == 0) { + if (unlikely(num_size == 0)) { if (num == 0 || size == 0) num_size = 1; else { @@ -1139,37 +1643,38 @@ je_calloc(size_t num, size_t size) * overflow during multiplication if neither operand uses any of the * most significant half of the bits in a size_t. */ - } else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2))) - && (num_size / size != num)) { + } else if (unlikely(((num | size) & (SIZE_T_MAX << (sizeof(size_t) << + 2))) && (num_size / size != num))) { /* size_t overflow. */ ret = NULL; goto label_return; } if (config_prof && opt_prof) { - prof_thr_cnt_t *cnt; - usize = s2u(num_size); - PROF_ALLOC_PREP(1, usize, cnt); - ret = icalloc_prof(usize, cnt); + if (unlikely(usize == 0)) { + ret = NULL; + goto label_return; + } + ret = icalloc_prof(tsd, usize); } else { - if (config_stats || (config_valgrind && opt_valgrind)) + if (config_stats || (config_valgrind && unlikely(in_valgrind))) usize = s2u(num_size); - ret = icalloc(num_size); + ret = icalloc(tsd, num_size); } label_return: - if (ret == NULL) { - if (config_xmalloc && opt_xmalloc) { + if (unlikely(ret == NULL)) { + if (config_xmalloc && unlikely(opt_xmalloc)) { malloc_write("<jemalloc>: Error in calloc(): out of " "memory\n"); abort(); } set_errno(ENOMEM); } - if (config_stats && ret != NULL) { + if (config_stats && likely(ret != NULL)) { assert(usize == isalloc(ret, config_prof)); - thread_allocated_tsd_get()->allocated += usize; + *tsd_thread_allocatedp_get(tsd) += usize; } UTRACE(0, num_size, ret); JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, true); @@ -1177,135 +1682,162 @@ label_return: } static void * -irealloc_prof_sample(void *oldptr, size_t usize, prof_thr_cnt_t *cnt) +irealloc_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize, + prof_tctx_t *tctx) { void *p; - if (cnt == NULL) + if (tctx == NULL) return (NULL); - if (prof_promote && usize <= SMALL_MAXCLASS) { - p = iralloc(oldptr, SMALL_MAXCLASS+1, 0, 0, false); + if (usize <= SMALL_MAXCLASS) { + p = iralloc(tsd, old_ptr, old_usize, LARGE_MINCLASS, 0, false); if (p == NULL) return (NULL); arena_prof_promoted(p, usize); } else - p = iralloc(oldptr, usize, 0, 0, false); + p = iralloc(tsd, old_ptr, old_usize, usize, 0, false); return (p); } JEMALLOC_ALWAYS_INLINE_C void * -irealloc_prof(void *oldptr, size_t old_usize, size_t usize, prof_thr_cnt_t *cnt) +irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize) { void *p; - prof_ctx_t *old_ctx; - - old_ctx = prof_ctx_get(oldptr); - if ((uintptr_t)cnt != (uintptr_t)1U) - p = irealloc_prof_sample(oldptr, usize, cnt); + bool prof_active; + prof_tctx_t *old_tctx, *tctx; + + prof_active = prof_active_get_unlocked(); + old_tctx = prof_tctx_get(old_ptr); + tctx = prof_alloc_prep(tsd, usize, prof_active, true); + if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) + p = irealloc_prof_sample(tsd, old_ptr, old_usize, usize, tctx); else - p = iralloc(oldptr, usize, 0, 0, false); - if (p == NULL) + p = iralloc(tsd, old_ptr, old_usize, usize, 0, false); + if (unlikely(p == NULL)) { + prof_alloc_rollback(tsd, tctx, true); return (NULL); - prof_realloc(p, usize, cnt, old_usize, old_ctx); + } + prof_realloc(tsd, p, usize, tctx, prof_active, true, old_ptr, old_usize, + old_tctx); return (p); } JEMALLOC_INLINE_C void -ifree(void *ptr) +ifree(tsd_t *tsd, void *ptr, tcache_t *tcache) { size_t usize; UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); assert(ptr != NULL); - assert(malloc_initialized || IS_INITIALIZER); + assert(malloc_initialized() || IS_INITIALIZER); if (config_prof && opt_prof) { usize = isalloc(ptr, config_prof); - prof_free(ptr, usize); + prof_free(tsd, ptr, usize); } else if (config_stats || config_valgrind) usize = isalloc(ptr, config_prof); if (config_stats) - thread_allocated_tsd_get()->deallocated += usize; - if (config_valgrind && opt_valgrind) + *tsd_thread_deallocatedp_get(tsd) += usize; + if (config_valgrind && unlikely(in_valgrind)) rzsize = p2rz(ptr); - iqalloc(ptr); + iqalloc(tsd, ptr, tcache); JEMALLOC_VALGRIND_FREE(ptr, rzsize); } -void * +JEMALLOC_INLINE_C void +isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache) +{ + UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); + + assert(ptr != NULL); + assert(malloc_initialized() || IS_INITIALIZER); + + if (config_prof && opt_prof) + prof_free(tsd, ptr, usize); + if (config_stats) + *tsd_thread_deallocatedp_get(tsd) += usize; + if (config_valgrind && unlikely(in_valgrind)) + rzsize = p2rz(ptr); + isqalloc(tsd, ptr, usize, tcache); + JEMALLOC_VALGRIND_FREE(ptr, rzsize); +} + +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN +void JEMALLOC_NOTHROW * +JEMALLOC_ALLOC_SIZE(2) je_realloc(void *ptr, size_t size) { void *ret; + tsd_t *tsd JEMALLOC_CC_SILENCE_INIT(NULL); size_t usize JEMALLOC_CC_SILENCE_INIT(0); size_t old_usize = 0; UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); - if (size == 0) { + if (unlikely(size == 0)) { if (ptr != NULL) { /* realloc(ptr, 0) is equivalent to free(ptr). */ UTRACE(ptr, 0, 0); - ifree(ptr); + tsd = tsd_fetch(); + ifree(tsd, ptr, tcache_get(tsd, false)); return (NULL); } size = 1; } - if (ptr != NULL) { - assert(malloc_initialized || IS_INITIALIZER); + if (likely(ptr != NULL)) { + assert(malloc_initialized() || IS_INITIALIZER); malloc_thread_init(); + tsd = tsd_fetch(); - if ((config_prof && opt_prof) || config_stats || - (config_valgrind && opt_valgrind)) - old_usize = isalloc(ptr, config_prof); - if (config_valgrind && opt_valgrind) + old_usize = isalloc(ptr, config_prof); + if (config_valgrind && unlikely(in_valgrind)) old_rzsize = config_prof ? p2rz(ptr) : u2rz(old_usize); if (config_prof && opt_prof) { - prof_thr_cnt_t *cnt; - usize = s2u(size); - PROF_ALLOC_PREP(1, usize, cnt); - ret = irealloc_prof(ptr, old_usize, usize, cnt); + ret = unlikely(usize == 0) ? NULL : irealloc_prof(tsd, + ptr, old_usize, usize); } else { - if (config_stats || (config_valgrind && opt_valgrind)) + if (config_stats || (config_valgrind && + unlikely(in_valgrind))) usize = s2u(size); - ret = iralloc(ptr, size, 0, 0, false); + ret = iralloc(tsd, ptr, old_usize, size, 0, false); } } else { /* realloc(NULL, size) is equivalent to malloc(size). */ - MALLOC_BODY(ret, size, usize); + ret = imalloc_body(size, &tsd, &usize); } - if (ret == NULL) { - if (config_xmalloc && opt_xmalloc) { + if (unlikely(ret == NULL)) { + if (config_xmalloc && unlikely(opt_xmalloc)) { malloc_write("<jemalloc>: Error in realloc(): " "out of memory\n"); abort(); } set_errno(ENOMEM); } - if (config_stats && ret != NULL) { - thread_allocated_t *ta; + if (config_stats && likely(ret != NULL)) { assert(usize == isalloc(ret, config_prof)); - ta = thread_allocated_tsd_get(); - ta->allocated += usize; - ta->deallocated += old_usize; + *tsd_thread_allocatedp_get(tsd) += usize; + *tsd_thread_deallocatedp_get(tsd) += old_usize; } UTRACE(ptr, size, ret); - JEMALLOC_VALGRIND_REALLOC(ret, usize, ptr, old_usize, old_rzsize, - false); + JEMALLOC_VALGRIND_REALLOC(true, ret, usize, true, ptr, old_usize, + old_rzsize, true, false); return (ret); } -void +JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_free(void *ptr) { UTRACE(ptr, 0, 0); - if (ptr != NULL) - ifree(ptr); + if (likely(ptr != NULL)) { + tsd_t *tsd = tsd_fetch(); + ifree(tsd, ptr, tcache_get(tsd, false)); + } } /* @@ -1317,22 +1849,28 @@ je_free(void *ptr) */ #ifdef JEMALLOC_OVERRIDE_MEMALIGN -void * +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN +void JEMALLOC_NOTHROW * +JEMALLOC_ATTR(malloc) je_memalign(size_t alignment, size_t size) { void *ret JEMALLOC_CC_SILENCE_INIT(NULL); - imemalign(&ret, alignment, size, 1); + if (unlikely(imemalign(&ret, alignment, size, 1) != 0)) + ret = NULL; JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false); return (ret); } #endif #ifdef JEMALLOC_OVERRIDE_VALLOC -void * +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN +void JEMALLOC_NOTHROW * +JEMALLOC_ATTR(malloc) je_valloc(size_t size) { void *ret JEMALLOC_CC_SILENCE_INIT(NULL); - imemalign(&ret, PAGE, size, 1); + if (unlikely(imemalign(&ret, PAGE, size, 1) != 0)) + ret = NULL; JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false); return (ret); } @@ -1346,7 +1884,7 @@ je_valloc(size_t size) #define is_malloc_(a) malloc_is_ ## a #define is_malloc(a) is_malloc_(a) -#if ((is_malloc(je_malloc) == 1) && defined(__GLIBC__) && !defined(__UCLIBC__)) +#if ((is_malloc(je_malloc) == 1) && defined(JEMALLOC_GLIBC_MALLOC_HOOK)) /* * glibc provides the RTLD_DEEPBIND flag for dlopen which can make it possible * to inconsistently reference libc's malloc(3)-compatible functions @@ -1356,11 +1894,13 @@ je_valloc(size_t size) * passed an extra argument for the caller return address, which will be * ignored. */ -JEMALLOC_EXPORT void (* __free_hook)(void *ptr) = je_free; -JEMALLOC_EXPORT void *(* __malloc_hook)(size_t size) = je_malloc; -JEMALLOC_EXPORT void *(* __realloc_hook)(void *ptr, size_t size) = je_realloc; -JEMALLOC_EXPORT void *(* __memalign_hook)(size_t alignment, size_t size) = +JEMALLOC_EXPORT void (*__free_hook)(void *ptr) = je_free; +JEMALLOC_EXPORT void *(*__malloc_hook)(size_t size) = je_malloc; +JEMALLOC_EXPORT void *(*__realloc_hook)(void *ptr, size_t size) = je_realloc; +# ifdef JEMALLOC_GLIBC_MEMALIGN_HOOK +JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) = je_memalign; +# endif #endif /* @@ -1371,111 +1911,173 @@ JEMALLOC_EXPORT void *(* __memalign_hook)(size_t alignment, size_t size) = * Begin non-standard functions. */ -JEMALLOC_ALWAYS_INLINE_C void * -imallocx(size_t usize, size_t alignment, bool zero, bool try_tcache, - arena_t *arena) +JEMALLOC_ALWAYS_INLINE_C bool +imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize, + size_t *alignment, bool *zero, tcache_t **tcache, arena_t **arena) { - assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize, - alignment))); + if ((flags & MALLOCX_LG_ALIGN_MASK) == 0) { + *alignment = 0; + *usize = s2u(size); + } else { + *alignment = MALLOCX_ALIGN_GET_SPECIFIED(flags); + *usize = sa2u(size, *alignment); + } + assert(*usize != 0); + *zero = MALLOCX_ZERO_GET(flags); + if ((flags & MALLOCX_TCACHE_MASK) != 0) { + if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) + *tcache = NULL; + else + *tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags)); + } else + *tcache = tcache_get(tsd, true); + if ((flags & MALLOCX_ARENA_MASK) != 0) { + unsigned arena_ind = MALLOCX_ARENA_GET(flags); + *arena = arena_get(tsd, arena_ind, true, true); + if (unlikely(*arena == NULL)) + return (true); + } else + *arena = NULL; + return (false); +} - if (alignment != 0) - return (ipalloct(usize, alignment, zero, try_tcache, arena)); - else if (zero) - return (icalloct(usize, try_tcache, arena)); - else - return (imalloct(usize, try_tcache, arena)); +JEMALLOC_ALWAYS_INLINE_C bool +imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize, + size_t *alignment, bool *zero, tcache_t **tcache, arena_t **arena) +{ + + if (likely(flags == 0)) { + *usize = s2u(size); + assert(*usize != 0); + *alignment = 0; + *zero = false; + *tcache = tcache_get(tsd, true); + *arena = NULL; + return (false); + } else { + return (imallocx_flags_decode_hard(tsd, size, flags, usize, + alignment, zero, tcache, arena)); + } +} + +JEMALLOC_ALWAYS_INLINE_C void * +imallocx_flags(tsd_t *tsd, size_t usize, size_t alignment, bool zero, + tcache_t *tcache, arena_t *arena) +{ + + if (unlikely(alignment != 0)) + return (ipalloct(tsd, usize, alignment, zero, tcache, arena)); + if (unlikely(zero)) + return (icalloct(tsd, usize, tcache, arena)); + return (imalloct(tsd, usize, tcache, arena)); } static void * -imallocx_prof_sample(size_t usize, size_t alignment, bool zero, bool try_tcache, - arena_t *arena, prof_thr_cnt_t *cnt) +imallocx_prof_sample(tsd_t *tsd, size_t usize, size_t alignment, bool zero, + tcache_t *tcache, arena_t *arena) { void *p; - if (cnt == NULL) - return (NULL); - if (prof_promote && usize <= SMALL_MAXCLASS) { - size_t usize_promoted = (alignment == 0) ? - s2u(SMALL_MAXCLASS+1) : sa2u(SMALL_MAXCLASS+1, alignment); - assert(usize_promoted != 0); - p = imallocx(usize_promoted, alignment, zero, try_tcache, + if (usize <= SMALL_MAXCLASS) { + assert(((alignment == 0) ? s2u(LARGE_MINCLASS) : + sa2u(LARGE_MINCLASS, alignment)) == LARGE_MINCLASS); + p = imallocx_flags(tsd, LARGE_MINCLASS, alignment, zero, tcache, arena); if (p == NULL) return (NULL); arena_prof_promoted(p, usize); } else - p = imallocx(usize, alignment, zero, try_tcache, arena); + p = imallocx_flags(tsd, usize, alignment, zero, tcache, arena); return (p); } JEMALLOC_ALWAYS_INLINE_C void * -imallocx_prof(size_t usize, size_t alignment, bool zero, bool try_tcache, - arena_t *arena, prof_thr_cnt_t *cnt) +imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) { void *p; + size_t alignment; + bool zero; + tcache_t *tcache; + arena_t *arena; + prof_tctx_t *tctx; - if ((uintptr_t)cnt != (uintptr_t)1U) { - p = imallocx_prof_sample(usize, alignment, zero, try_tcache, - arena, cnt); + if (unlikely(imallocx_flags_decode(tsd, size, flags, usize, &alignment, + &zero, &tcache, &arena))) + return (NULL); + tctx = prof_alloc_prep(tsd, *usize, prof_active_get_unlocked(), true); + if (likely((uintptr_t)tctx == (uintptr_t)1U)) + p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena); + else if ((uintptr_t)tctx > (uintptr_t)1U) { + p = imallocx_prof_sample(tsd, *usize, alignment, zero, tcache, + arena); } else - p = imallocx(usize, alignment, zero, try_tcache, arena); - if (p == NULL) + p = NULL; + if (unlikely(p == NULL)) { + prof_alloc_rollback(tsd, tctx, true); return (NULL); - prof_malloc(p, usize, cnt); + } + prof_malloc(p, *usize, tctx); + assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0)); return (p); } -void * -je_mallocx(size_t size, int flags) +JEMALLOC_ALWAYS_INLINE_C void * +imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) { void *p; - size_t usize; - size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK) - & (SIZE_T_MAX-1)); - bool zero = flags & MALLOCX_ZERO; - unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; + size_t alignment; + bool zero; + tcache_t *tcache; arena_t *arena; - bool try_tcache; - assert(size != 0); + if (likely(flags == 0)) { + if (config_stats || (config_valgrind && unlikely(in_valgrind))) + *usize = s2u(size); + return (imalloc(tsd, size)); + } - if (malloc_init()) - goto label_oom; + if (unlikely(imallocx_flags_decode_hard(tsd, size, flags, usize, + &alignment, &zero, &tcache, &arena))) + return (NULL); + p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena); + assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0)); + return (p); +} - if (arena_ind != UINT_MAX) { - arena = arenas[arena_ind]; - try_tcache = false; - } else { - arena = NULL; - try_tcache = true; - } +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN +void JEMALLOC_NOTHROW * +JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1) +je_mallocx(size_t size, int flags) +{ + tsd_t *tsd; + void *p; + size_t usize; - usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); - assert(usize != 0); + assert(size != 0); - if (config_prof && opt_prof) { - prof_thr_cnt_t *cnt; + if (unlikely(malloc_init())) + goto label_oom; + tsd = tsd_fetch(); - PROF_ALLOC_PREP(1, usize, cnt); - p = imallocx_prof(usize, alignment, zero, try_tcache, arena, - cnt); - } else - p = imallocx(usize, alignment, zero, try_tcache, arena); - if (p == NULL) + if (config_prof && opt_prof) + p = imallocx_prof(tsd, size, flags, &usize); + else + p = imallocx_no_prof(tsd, size, flags, &usize); + if (unlikely(p == NULL)) goto label_oom; if (config_stats) { assert(usize == isalloc(p, config_prof)); - thread_allocated_tsd_get()->allocated += usize; + *tsd_thread_allocatedp_get(tsd) += usize; } UTRACE(0, size, p); - JEMALLOC_VALGRIND_MALLOC(true, p, usize, zero); + JEMALLOC_VALGRIND_MALLOC(true, p, usize, MALLOCX_ZERO_GET(flags)); return (p); label_oom: - if (config_xmalloc && opt_xmalloc) { + if (config_xmalloc && unlikely(opt_xmalloc)) { malloc_write("<jemalloc>: Error in mallocx(): out of memory\n"); abort(); } @@ -1484,49 +2086,53 @@ label_oom: } static void * -irallocx_prof_sample(void *oldptr, size_t size, size_t alignment, size_t usize, - bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena, - prof_thr_cnt_t *cnt) +irallocx_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, + size_t usize, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena, + prof_tctx_t *tctx) { void *p; - if (cnt == NULL) + if (tctx == NULL) return (NULL); - if (prof_promote && usize <= SMALL_MAXCLASS) { - p = iralloct(oldptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= - size) ? 0 : size - (SMALL_MAXCLASS+1), alignment, zero, - try_tcache_alloc, try_tcache_dalloc, arena); + if (usize <= SMALL_MAXCLASS) { + p = iralloct(tsd, old_ptr, old_usize, LARGE_MINCLASS, alignment, + zero, tcache, arena); if (p == NULL) return (NULL); arena_prof_promoted(p, usize); } else { - p = iralloct(oldptr, size, 0, alignment, zero, - try_tcache_alloc, try_tcache_dalloc, arena); + p = iralloct(tsd, old_ptr, old_usize, usize, alignment, zero, + tcache, arena); } return (p); } JEMALLOC_ALWAYS_INLINE_C void * -irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment, - size_t *usize, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, - arena_t *arena, prof_thr_cnt_t *cnt) +irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size, + size_t alignment, size_t *usize, bool zero, tcache_t *tcache, + arena_t *arena) { void *p; - prof_ctx_t *old_ctx; - - old_ctx = prof_ctx_get(oldptr); - if ((uintptr_t)cnt != (uintptr_t)1U) - p = irallocx_prof_sample(oldptr, size, alignment, *usize, zero, - try_tcache_alloc, try_tcache_dalloc, arena, cnt); - else { - p = iralloct(oldptr, size, 0, alignment, zero, - try_tcache_alloc, try_tcache_dalloc, arena); + bool prof_active; + prof_tctx_t *old_tctx, *tctx; + + prof_active = prof_active_get_unlocked(); + old_tctx = prof_tctx_get(old_ptr); + tctx = prof_alloc_prep(tsd, *usize, prof_active, true); + if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) { + p = irallocx_prof_sample(tsd, old_ptr, old_usize, *usize, + alignment, zero, tcache, arena, tctx); + } else { + p = iralloct(tsd, old_ptr, old_usize, size, alignment, zero, + tcache, arena); } - if (p == NULL) + if (unlikely(p == NULL)) { + prof_alloc_rollback(tsd, tctx, true); return (NULL); + } - if (p == oldptr && alignment != 0) { + if (p == old_ptr && alignment != 0) { /* * The allocation did not move, so it is possible that the size * class is smaller than would guarantee the requested @@ -1537,78 +2143,80 @@ irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment, */ *usize = isalloc(p, config_prof); } - prof_realloc(p, *usize, cnt, old_usize, old_ctx); + prof_realloc(tsd, p, *usize, tctx, prof_active, true, old_ptr, + old_usize, old_tctx); return (p); } -void * +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN +void JEMALLOC_NOTHROW * +JEMALLOC_ALLOC_SIZE(2) je_rallocx(void *ptr, size_t size, int flags) { void *p; - size_t usize, old_usize; + tsd_t *tsd; + size_t usize; + size_t old_usize; UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); - size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK) - & (SIZE_T_MAX-1)); + size_t alignment = MALLOCX_ALIGN_GET(flags); bool zero = flags & MALLOCX_ZERO; - unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; - bool try_tcache_alloc, try_tcache_dalloc; arena_t *arena; + tcache_t *tcache; assert(ptr != NULL); assert(size != 0); - assert(malloc_initialized || IS_INITIALIZER); + assert(malloc_initialized() || IS_INITIALIZER); malloc_thread_init(); + tsd = tsd_fetch(); - if (arena_ind != UINT_MAX) { - arena_chunk_t *chunk; - try_tcache_alloc = false; - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - try_tcache_dalloc = (chunk == ptr || chunk->arena != - arenas[arena_ind]); - arena = arenas[arena_ind]; - } else { - try_tcache_alloc = true; - try_tcache_dalloc = true; + if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) { + unsigned arena_ind = MALLOCX_ARENA_GET(flags); + arena = arena_get(tsd, arena_ind, true, true); + if (unlikely(arena == NULL)) + goto label_oom; + } else arena = NULL; - } - if ((config_prof && opt_prof) || config_stats || - (config_valgrind && opt_valgrind)) - old_usize = isalloc(ptr, config_prof); - if (config_valgrind && opt_valgrind) + if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) { + if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) + tcache = NULL; + else + tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags)); + } else + tcache = tcache_get(tsd, true); + + old_usize = isalloc(ptr, config_prof); + if (config_valgrind && unlikely(in_valgrind)) old_rzsize = u2rz(old_usize); if (config_prof && opt_prof) { - prof_thr_cnt_t *cnt; - usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); assert(usize != 0); - PROF_ALLOC_PREP(1, usize, cnt); - p = irallocx_prof(ptr, old_usize, size, alignment, &usize, zero, - try_tcache_alloc, try_tcache_dalloc, arena, cnt); - if (p == NULL) + p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize, + zero, tcache, arena); + if (unlikely(p == NULL)) goto label_oom; } else { - p = iralloct(ptr, size, 0, alignment, zero, try_tcache_alloc, - try_tcache_dalloc, arena); - if (p == NULL) + p = iralloct(tsd, ptr, old_usize, size, alignment, zero, + tcache, arena); + if (unlikely(p == NULL)) goto label_oom; - if (config_stats || (config_valgrind && opt_valgrind)) + if (config_stats || (config_valgrind && unlikely(in_valgrind))) usize = isalloc(p, config_prof); } + assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0)); if (config_stats) { - thread_allocated_t *ta; - ta = thread_allocated_tsd_get(); - ta->allocated += usize; - ta->deallocated += old_usize; + *tsd_thread_allocatedp_get(tsd) += usize; + *tsd_thread_deallocatedp_get(tsd) += old_usize; } UTRACE(ptr, size, p); - JEMALLOC_VALGRIND_REALLOC(p, usize, ptr, old_usize, old_rzsize, zero); + JEMALLOC_VALGRIND_REALLOC(true, p, usize, false, ptr, old_usize, + old_rzsize, false, zero); return (p); label_oom: - if (config_xmalloc && opt_xmalloc) { + if (config_xmalloc && unlikely(opt_xmalloc)) { malloc_write("<jemalloc>: Error in rallocx(): out of memory\n"); abort(); } @@ -1618,11 +2226,11 @@ label_oom: JEMALLOC_ALWAYS_INLINE_C size_t ixallocx_helper(void *ptr, size_t old_usize, size_t size, size_t extra, - size_t alignment, bool zero, arena_t *arena) + size_t alignment, bool zero) { size_t usize; - if (ixalloc(ptr, size, extra, alignment, zero)) + if (ixalloc(ptr, old_usize, size, extra, alignment, zero)) return (old_usize); usize = isalloc(ptr, config_prof); @@ -1631,215 +2239,229 @@ ixallocx_helper(void *ptr, size_t old_usize, size_t size, size_t extra, static size_t ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra, - size_t alignment, size_t max_usize, bool zero, arena_t *arena, - prof_thr_cnt_t *cnt) + size_t alignment, bool zero, prof_tctx_t *tctx) { size_t usize; - if (cnt == NULL) + if (tctx == NULL) return (old_usize); - /* Use minimum usize to determine whether promotion may happen. */ - if (prof_promote && ((alignment == 0) ? s2u(size) : sa2u(size, - alignment)) <= SMALL_MAXCLASS) { - if (ixalloc(ptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= - size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1), - alignment, zero)) - return (old_usize); - usize = isalloc(ptr, config_prof); - if (max_usize < PAGE) - arena_prof_promoted(ptr, usize); - } else { - usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, - zero, arena); - } + usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, zero); return (usize); } JEMALLOC_ALWAYS_INLINE_C size_t -ixallocx_prof(void *ptr, size_t old_usize, size_t size, size_t extra, - size_t alignment, size_t max_usize, bool zero, arena_t *arena, - prof_thr_cnt_t *cnt) +ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size, + size_t extra, size_t alignment, bool zero) { - size_t usize; - prof_ctx_t *old_ctx; + size_t usize_max, usize; + bool prof_active; + prof_tctx_t *old_tctx, *tctx; - old_ctx = prof_ctx_get(ptr); - if ((uintptr_t)cnt != (uintptr_t)1U) { + prof_active = prof_active_get_unlocked(); + old_tctx = prof_tctx_get(ptr); + /* + * usize isn't knowable before ixalloc() returns when extra is non-zero. + * Therefore, compute its maximum possible value and use that in + * prof_alloc_prep() to decide whether to capture a backtrace. + * prof_realloc() will use the actual usize to decide whether to sample. + */ + usize_max = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra, + alignment); + assert(usize_max != 0); + tctx = prof_alloc_prep(tsd, usize_max, prof_active, false); + if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) { usize = ixallocx_prof_sample(ptr, old_usize, size, extra, - alignment, zero, max_usize, arena, cnt); + alignment, zero, tctx); } else { usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, - zero, arena); + zero); } - if (usize == old_usize) + if (usize == old_usize) { + prof_alloc_rollback(tsd, tctx, false); return (usize); - prof_realloc(ptr, usize, cnt, old_usize, old_ctx); + } + prof_realloc(tsd, ptr, usize, tctx, prof_active, false, ptr, old_usize, + old_tctx); return (usize); } -size_t +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_xallocx(void *ptr, size_t size, size_t extra, int flags) { + tsd_t *tsd; size_t usize, old_usize; UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); - size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK) - & (SIZE_T_MAX-1)); + size_t alignment = MALLOCX_ALIGN_GET(flags); bool zero = flags & MALLOCX_ZERO; - unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; - arena_t *arena; assert(ptr != NULL); assert(size != 0); assert(SIZE_T_MAX - size >= extra); - assert(malloc_initialized || IS_INITIALIZER); + assert(malloc_initialized() || IS_INITIALIZER); malloc_thread_init(); - - if (arena_ind != UINT_MAX) - arena = arenas[arena_ind]; - else - arena = NULL; + tsd = tsd_fetch(); old_usize = isalloc(ptr, config_prof); - if (config_valgrind && opt_valgrind) + + /* Clamp extra if necessary to avoid (size + extra) overflow. */ + if (unlikely(size + extra > HUGE_MAXCLASS)) { + /* Check for size overflow. */ + if (unlikely(size > HUGE_MAXCLASS)) { + usize = old_usize; + goto label_not_resized; + } + extra = HUGE_MAXCLASS - size; + } + + if (config_valgrind && unlikely(in_valgrind)) old_rzsize = u2rz(old_usize); if (config_prof && opt_prof) { - prof_thr_cnt_t *cnt; - /* - * usize isn't knowable before ixalloc() returns when extra is - * non-zero. Therefore, compute its maximum possible value and - * use that in PROF_ALLOC_PREP() to decide whether to capture a - * backtrace. prof_realloc() will use the actual usize to - * decide whether to sample. - */ - size_t max_usize = (alignment == 0) ? s2u(size+extra) : - sa2u(size+extra, alignment); - PROF_ALLOC_PREP(1, max_usize, cnt); - usize = ixallocx_prof(ptr, old_usize, size, extra, alignment, - max_usize, zero, arena, cnt); + usize = ixallocx_prof(tsd, ptr, old_usize, size, extra, + alignment, zero); } else { usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, - zero, arena); + zero); } - if (usize == old_usize) + if (unlikely(usize == old_usize)) goto label_not_resized; if (config_stats) { - thread_allocated_t *ta; - ta = thread_allocated_tsd_get(); - ta->allocated += usize; - ta->deallocated += old_usize; + *tsd_thread_allocatedp_get(tsd) += usize; + *tsd_thread_deallocatedp_get(tsd) += old_usize; } - JEMALLOC_VALGRIND_REALLOC(ptr, usize, ptr, old_usize, old_rzsize, zero); + JEMALLOC_VALGRIND_REALLOC(false, ptr, usize, false, ptr, old_usize, + old_rzsize, false, zero); label_not_resized: UTRACE(ptr, size, ptr); return (usize); } -size_t +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW +JEMALLOC_ATTR(pure) je_sallocx(const void *ptr, int flags) { size_t usize; - assert(malloc_initialized || IS_INITIALIZER); + assert(malloc_initialized() || IS_INITIALIZER); malloc_thread_init(); if (config_ivsalloc) usize = ivsalloc(ptr, config_prof); - else { - assert(ptr != NULL); + else usize = isalloc(ptr, config_prof); - } return (usize); } -void +JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_dallocx(void *ptr, int flags) { - size_t usize; - UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); - unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; - bool try_tcache; + tsd_t *tsd; + tcache_t *tcache; assert(ptr != NULL); - assert(malloc_initialized || IS_INITIALIZER); + assert(malloc_initialized() || IS_INITIALIZER); - if (arena_ind != UINT_MAX) { - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - try_tcache = (chunk == ptr || chunk->arena != - arenas[arena_ind]); + tsd = tsd_fetch(); + if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) { + if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) + tcache = NULL; + else + tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags)); } else - try_tcache = true; + tcache = tcache_get(tsd, false); UTRACE(ptr, 0, 0); - if (config_stats || config_valgrind) - usize = isalloc(ptr, config_prof); - if (config_prof && opt_prof) { - if (config_stats == false && config_valgrind == false) - usize = isalloc(ptr, config_prof); - prof_free(ptr, usize); - } - if (config_stats) - thread_allocated_tsd_get()->deallocated += usize; - if (config_valgrind && opt_valgrind) - rzsize = p2rz(ptr); - iqalloct(ptr, try_tcache); - JEMALLOC_VALGRIND_FREE(ptr, rzsize); + ifree(tsd_fetch(), ptr, tcache); } -size_t -je_nallocx(size_t size, int flags) +JEMALLOC_ALWAYS_INLINE_C size_t +inallocx(size_t size, int flags) +{ + size_t usize; + + if (likely((flags & MALLOCX_LG_ALIGN_MASK) == 0)) + usize = s2u(size); + else + usize = sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags)); + assert(usize != 0); + return (usize); +} + +JEMALLOC_EXPORT void JEMALLOC_NOTHROW +je_sdallocx(void *ptr, size_t size, int flags) { + tsd_t *tsd; + tcache_t *tcache; size_t usize; - size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK) - & (SIZE_T_MAX-1)); + + assert(ptr != NULL); + assert(malloc_initialized() || IS_INITIALIZER); + usize = inallocx(size, flags); + assert(usize == isalloc(ptr, config_prof)); + + tsd = tsd_fetch(); + if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) { + if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) + tcache = NULL; + else + tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags)); + } else + tcache = tcache_get(tsd, false); + + UTRACE(ptr, 0, 0); + isfree(tsd, ptr, usize, tcache); +} + +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW +JEMALLOC_ATTR(pure) +je_nallocx(size_t size, int flags) +{ assert(size != 0); - if (malloc_init()) + if (unlikely(malloc_init())) return (0); - usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); - assert(usize != 0); - return (usize); + return (inallocx(size, flags)); } -int +JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { - if (malloc_init()) + if (unlikely(malloc_init())) return (EAGAIN); return (ctl_byname(name, oldp, oldlenp, newp, newlen)); } -int +JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) { - if (malloc_init()) + if (unlikely(malloc_init())) return (EAGAIN); return (ctl_nametomib(name, mibp, miblenp)); } -int +JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { - if (malloc_init()) + if (unlikely(malloc_init())) return (EAGAIN); return (ctl_bymib(mib, miblen, oldp, oldlenp, newp, newlen)); } -void +JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque, const char *opts) { @@ -1847,18 +2469,18 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque, stats_print(write_cb, cbopaque, opts); } -size_t +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) { size_t ret; - assert(malloc_initialized || IS_INITIALIZER); + assert(malloc_initialized() || IS_INITIALIZER); malloc_thread_init(); if (config_ivsalloc) ret = ivsalloc(ptr, config_prof); else - ret = (ptr != NULL) ? isalloc(ptr, config_prof) : 0; + ret = (ptr == NULL) ? 0 : isalloc(ptr, config_prof); return (ret); } @@ -1868,91 +2490,6 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) */ /******************************************************************************/ /* - * Begin experimental functions. - */ -#ifdef JEMALLOC_EXPERIMENTAL - -int -je_allocm(void **ptr, size_t *rsize, size_t size, int flags) -{ - void *p; - - assert(ptr != NULL); - - p = je_mallocx(size, flags); - if (p == NULL) - return (ALLOCM_ERR_OOM); - if (rsize != NULL) - *rsize = isalloc(p, config_prof); - *ptr = p; - return (ALLOCM_SUCCESS); -} - -int -je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) -{ - int ret; - bool no_move = flags & ALLOCM_NO_MOVE; - - assert(ptr != NULL); - assert(*ptr != NULL); - assert(size != 0); - assert(SIZE_T_MAX - size >= extra); - - if (no_move) { - size_t usize = je_xallocx(*ptr, size, extra, flags); - ret = (usize >= size) ? ALLOCM_SUCCESS : ALLOCM_ERR_NOT_MOVED; - if (rsize != NULL) - *rsize = usize; - } else { - void *p = je_rallocx(*ptr, size+extra, flags); - if (p != NULL) { - *ptr = p; - ret = ALLOCM_SUCCESS; - } else - ret = ALLOCM_ERR_OOM; - if (rsize != NULL) - *rsize = isalloc(*ptr, config_prof); - } - return (ret); -} - -int -je_sallocm(const void *ptr, size_t *rsize, int flags) -{ - - assert(rsize != NULL); - *rsize = je_sallocx(ptr, flags); - return (ALLOCM_SUCCESS); -} - -int -je_dallocm(void *ptr, int flags) -{ - - je_dallocx(ptr, flags); - return (ALLOCM_SUCCESS); -} - -int -je_nallocm(size_t *rsize, size_t size, int flags) -{ - size_t usize; - - usize = je_nallocx(size, flags); - if (usize == 0) - return (ALLOCM_ERR_OOM); - if (rsize != NULL) - *rsize = usize; - return (ALLOCM_SUCCESS); -} - -#endif -/* - * End experimental functions. - */ -/******************************************************************************/ -/* * The following functions are used by threading libraries for protection of * malloc during fork(). */ @@ -1966,9 +2503,9 @@ je_nallocm(size_t *rsize, size_t size, int flags) * fork/malloc races via the following functions it registers during * initialization using pthread_atfork(), but of course that does no good if * the allocator isn't fully initialized at fork time. The following library - * constructor is a partial solution to this problem. It may still possible to - * trigger the deadlock described above, but doing so would involve forking via - * a library constructor that runs before jemalloc's runs. + * constructor is a partial solution to this problem. It may still be possible + * to trigger the deadlock described above, but doing so would involve forking + * via a library constructor that runs before jemalloc's runs. */ JEMALLOC_ATTR(constructor) static void @@ -1989,10 +2526,10 @@ _malloc_prefork(void) unsigned i; #ifdef JEMALLOC_MUTEX_INIT_CB - if (malloc_initialized == false) + if (!malloc_initialized()) return; #endif - assert(malloc_initialized); + assert(malloc_initialized()); /* Acquire all mutexes in a safe order. */ ctl_prefork(); @@ -2004,7 +2541,6 @@ _malloc_prefork(void) } chunk_prefork(); base_prefork(); - huge_prefork(); } #ifndef JEMALLOC_MUTEX_INIT_CB @@ -2018,13 +2554,12 @@ _malloc_postfork(void) unsigned i; #ifdef JEMALLOC_MUTEX_INIT_CB - if (malloc_initialized == false) + if (!malloc_initialized()) return; #endif - assert(malloc_initialized); + assert(malloc_initialized()); /* Release all mutexes, now that fork() has completed. */ - huge_postfork_parent(); base_postfork_parent(); chunk_postfork_parent(); for (i = 0; i < narenas_total; i++) { @@ -2041,10 +2576,9 @@ jemalloc_postfork_child(void) { unsigned i; - assert(malloc_initialized); + assert(malloc_initialized()); /* Release all mutexes, now that fork() has completed. */ - huge_postfork_child(); base_postfork_child(); chunk_postfork_child(); for (i = 0; i < narenas_total; i++) { @@ -2057,55 +2591,3 @@ jemalloc_postfork_child(void) } /******************************************************************************/ -/* - * The following functions are used for TLS allocation/deallocation in static - * binaries on FreeBSD. The primary difference between these and i[mcd]alloc() - * is that these avoid accessing TLS variables. - */ - -static void * -a0alloc(size_t size, bool zero) -{ - - if (malloc_init()) - return (NULL); - - if (size == 0) - size = 1; - - if (size <= arena_maxclass) - return (arena_malloc(arenas[0], size, zero, false)); - else - return (huge_malloc(size, zero, huge_dss_prec_get(arenas[0]))); -} - -void * -a0malloc(size_t size) -{ - - return (a0alloc(size, false)); -} - -void * -a0calloc(size_t num, size_t size) -{ - - return (a0alloc(num * size, true)); -} - -void -a0free(void *ptr) -{ - arena_chunk_t *chunk; - - if (ptr == NULL) - return; - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) - arena_dalloc(chunk->arena, chunk, ptr, false); - else - huge_dalloc(ptr, true); -} - -/******************************************************************************/ diff --git a/dep/jemalloc/src/mutex.c b/dep/jemalloc/src/mutex.c index 788eca38703..2d47af976c5 100644 --- a/dep/jemalloc/src/mutex.c +++ b/dep/jemalloc/src/mutex.c @@ -73,9 +73,13 @@ malloc_mutex_init(malloc_mutex_t *mutex) { #ifdef _WIN32 +# if _WIN32_WINNT >= 0x0600 + InitializeSRWLock(&mutex->lock); +# else if (!InitializeCriticalSectionAndSpinCount(&mutex->lock, _CRT_SPINCOUNT)) return (true); +# endif #elif (defined(JEMALLOC_OSSPIN)) mutex->lock = 0; #elif (defined(JEMALLOC_MUTEX_INIT_CB)) @@ -83,8 +87,8 @@ malloc_mutex_init(malloc_mutex_t *mutex) mutex->postponed_next = postponed_mutexes; postponed_mutexes = mutex; } else { - if (_pthread_mutex_init_calloc_cb(&mutex->lock, base_calloc) != - 0) + if (_pthread_mutex_init_calloc_cb(&mutex->lock, + bootstrap_calloc) != 0) return (true); } #else @@ -140,7 +144,7 @@ mutex_boot(void) postpone_init = false; while (postponed_mutexes != NULL) { if (_pthread_mutex_init_calloc_cb(&postponed_mutexes->lock, - base_calloc) != 0) + bootstrap_calloc) != 0) return (true); postponed_mutexes = postponed_mutexes->postponed_next; } diff --git a/dep/jemalloc/src/pages.c b/dep/jemalloc/src/pages.c new file mode 100644 index 00000000000..83a167f6701 --- /dev/null +++ b/dep/jemalloc/src/pages.c @@ -0,0 +1,173 @@ +#define JEMALLOC_PAGES_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ + +void * +pages_map(void *addr, size_t size) +{ + void *ret; + + assert(size != 0); + +#ifdef _WIN32 + /* + * If VirtualAlloc can't allocate at the given address when one is + * given, it fails and returns NULL. + */ + ret = VirtualAlloc(addr, size, MEM_COMMIT | MEM_RESERVE, + PAGE_READWRITE); +#else + /* + * We don't use MAP_FIXED here, because it can cause the *replacement* + * of existing mappings, and we only want to create new mappings. + */ + ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, + -1, 0); + assert(ret != NULL); + + if (ret == MAP_FAILED) + ret = NULL; + else if (addr != NULL && ret != addr) { + /* + * We succeeded in mapping memory, but not in the right place. + */ + pages_unmap(ret, size); + ret = NULL; + } +#endif + assert(ret == NULL || (addr == NULL && ret != addr) + || (addr != NULL && ret == addr)); + return (ret); +} + +void +pages_unmap(void *addr, size_t size) +{ + +#ifdef _WIN32 + if (VirtualFree(addr, 0, MEM_RELEASE) == 0) +#else + if (munmap(addr, size) == -1) +#endif + { + char buf[BUFERROR_BUF]; + + buferror(get_errno(), buf, sizeof(buf)); + malloc_printf("<jemalloc>: Error in " +#ifdef _WIN32 + "VirtualFree" +#else + "munmap" +#endif + "(): %s\n", buf); + if (opt_abort) + abort(); + } +} + +void * +pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size) +{ + void *ret = (void *)((uintptr_t)addr + leadsize); + + assert(alloc_size >= leadsize + size); +#ifdef _WIN32 + { + void *new_addr; + + pages_unmap(addr, alloc_size); + new_addr = pages_map(ret, size); + if (new_addr == ret) + return (ret); + if (new_addr) + pages_unmap(new_addr, size); + return (NULL); + } +#else + { + size_t trailsize = alloc_size - leadsize - size; + + if (leadsize != 0) + pages_unmap(addr, leadsize); + if (trailsize != 0) + pages_unmap((void *)((uintptr_t)ret + size), trailsize); + return (ret); + } +#endif +} + +static bool +pages_commit_impl(void *addr, size_t size, bool commit) +{ + +#ifndef _WIN32 + /* + * The following decommit/commit implementation is functional, but + * always disabled because it doesn't add value beyong improved + * debugging (at the cost of extra system calls) on systems that + * overcommit. + */ + if (false) { + int prot = commit ? (PROT_READ | PROT_WRITE) : PROT_NONE; + void *result = mmap(addr, size, prot, MAP_PRIVATE | MAP_ANON | + MAP_FIXED, -1, 0); + if (result == MAP_FAILED) + return (true); + if (result != addr) { + /* + * We succeeded in mapping memory, but not in the right + * place. + */ + pages_unmap(result, size); + return (true); + } + return (false); + } +#endif + return (true); +} + +bool +pages_commit(void *addr, size_t size) +{ + + return (pages_commit_impl(addr, size, true)); +} + +bool +pages_decommit(void *addr, size_t size) +{ + + return (pages_commit_impl(addr, size, false)); +} + +bool +pages_purge(void *addr, size_t size) +{ + bool unzeroed; + +#ifdef _WIN32 + VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE); + unzeroed = true; +#elif defined(JEMALLOC_HAVE_MADVISE) +# ifdef JEMALLOC_PURGE_MADVISE_DONTNEED +# define JEMALLOC_MADV_PURGE MADV_DONTNEED +# define JEMALLOC_MADV_ZEROS true +# elif defined(JEMALLOC_PURGE_MADVISE_FREE) +# define JEMALLOC_MADV_PURGE MADV_FREE +# define JEMALLOC_MADV_ZEROS false +# else +# error "No madvise(2) flag defined for purging unused dirty pages." +# endif + int err = madvise(addr, size, JEMALLOC_MADV_PURGE); + unzeroed = (!JEMALLOC_MADV_ZEROS || err != 0); +# undef JEMALLOC_MADV_PURGE +# undef JEMALLOC_MADV_ZEROS +#else + /* Last resort no-op. */ + unzeroed = true; +#endif + return (unzeroed); +} + diff --git a/dep/jemalloc/src/prof.c b/dep/jemalloc/src/prof.c index 7722b7b4373..5d2b9598fdb 100644 --- a/dep/jemalloc/src/prof.c +++ b/dep/jemalloc/src/prof.c @@ -14,14 +14,13 @@ /******************************************************************************/ /* Data. */ -malloc_tsd_data(, prof_tdata, prof_tdata_t *, NULL) - bool opt_prof = false; bool opt_prof_active = true; +bool opt_prof_thread_active_init = true; size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT; ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT; bool opt_prof_gdump = false; -bool opt_prof_final = true; +bool opt_prof_final = false; bool opt_prof_leak = false; bool opt_prof_accum = false; char opt_prof_prefix[ @@ -31,25 +30,65 @@ char opt_prof_prefix[ #endif 1]; +/* + * Initialized as opt_prof_active, and accessed via + * prof_active_[gs]et{_unlocked,}(). + */ +bool prof_active; +static malloc_mutex_t prof_active_mtx; + +/* + * Initialized as opt_prof_thread_active_init, and accessed via + * prof_thread_active_init_[gs]et(). + */ +static bool prof_thread_active_init; +static malloc_mutex_t prof_thread_active_init_mtx; + +/* + * Initialized as opt_prof_gdump, and accessed via + * prof_gdump_[gs]et{_unlocked,}(). + */ +bool prof_gdump_val; +static malloc_mutex_t prof_gdump_mtx; + uint64_t prof_interval = 0; -bool prof_promote; + +size_t lg_prof_sample; /* - * Table of mutexes that are shared among ctx's. These are leaf locks, so - * there is no problem with using them for more than one ctx at the same time. - * The primary motivation for this sharing though is that ctx's are ephemeral, + * Table of mutexes that are shared among gctx's. These are leaf locks, so + * there is no problem with using them for more than one gctx at the same time. + * The primary motivation for this sharing though is that gctx's are ephemeral, * and destroying mutexes causes complications for systems that allocate when * creating/destroying mutexes. */ -static malloc_mutex_t *ctx_locks; -static unsigned cum_ctxs; /* Atomic counter. */ +static malloc_mutex_t *gctx_locks; +static unsigned cum_gctxs; /* Atomic counter. */ + +/* + * Table of mutexes that are shared among tdata's. No operations require + * holding multiple tdata locks, so there is no problem with using them for more + * than one tdata at the same time, even though a gctx lock may be acquired + * while holding a tdata lock. + */ +static malloc_mutex_t *tdata_locks; /* - * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data + * Global hash of (prof_bt_t *)-->(prof_gctx_t *). This is the master data * structure that knows about all backtraces currently captured. */ -static ckh_t bt2ctx; -static malloc_mutex_t bt2ctx_mtx; +static ckh_t bt2gctx; +static malloc_mutex_t bt2gctx_mtx; + +/* + * Tree of all extant prof_tdata_t structures, regardless of state, + * {attached,detached,expired}. + */ +static prof_tdata_tree_t tdatas; +static malloc_mutex_t tdatas_mtx; + +static uint64_t next_thr_uid; +static malloc_mutex_t next_thr_uid_mtx; static malloc_mutex_t prof_dump_seq_mtx; static uint64_t prof_dump_seq; @@ -77,120 +116,210 @@ static int prof_dump_fd; static bool prof_booted = false; /******************************************************************************/ +/* + * Function prototypes for static functions that are referenced prior to + * definition. + */ + +static bool prof_tctx_should_destroy(prof_tctx_t *tctx); +static void prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx); +static bool prof_tdata_should_destroy(prof_tdata_t *tdata, + bool even_if_attached); +static void prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, + bool even_if_attached); +static char *prof_thread_name_alloc(tsd_t *tsd, const char *thread_name); + +/******************************************************************************/ +/* Red-black trees. */ + +JEMALLOC_INLINE_C int +prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) +{ + uint64_t a_thr_uid = a->thr_uid; + uint64_t b_thr_uid = b->thr_uid; + int ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid); + if (ret == 0) { + uint64_t a_thr_discrim = a->thr_discrim; + uint64_t b_thr_discrim = b->thr_discrim; + ret = (a_thr_discrim > b_thr_discrim) - (a_thr_discrim < + b_thr_discrim); + if (ret == 0) { + uint64_t a_tctx_uid = a->tctx_uid; + uint64_t b_tctx_uid = b->tctx_uid; + ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid < + b_tctx_uid); + } + } + return (ret); +} + +rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t, + tctx_link, prof_tctx_comp) + +JEMALLOC_INLINE_C int +prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) +{ + unsigned a_len = a->bt.len; + unsigned b_len = b->bt.len; + unsigned comp_len = (a_len < b_len) ? a_len : b_len; + int ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *)); + if (ret == 0) + ret = (a_len > b_len) - (a_len < b_len); + return (ret); +} + +rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link, + prof_gctx_comp) + +JEMALLOC_INLINE_C int +prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) +{ + int ret; + uint64_t a_uid = a->thr_uid; + uint64_t b_uid = b->thr_uid; + + ret = ((a_uid > b_uid) - (a_uid < b_uid)); + if (ret == 0) { + uint64_t a_discrim = a->thr_discrim; + uint64_t b_discrim = b->thr_discrim; + + ret = ((a_discrim > b_discrim) - (a_discrim < b_discrim)); + } + return (ret); +} + +rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link, + prof_tdata_comp) + +/******************************************************************************/ void -bt_init(prof_bt_t *bt, void **vec) +prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) { + prof_tdata_t *tdata; cassert(config_prof); - bt->vec = vec; - bt->len = 0; + if (updated) { + /* + * Compute a new sample threshold. This isn't very important in + * practice, because this function is rarely executed, so the + * potential for sample bias is minimal except in contrived + * programs. + */ + tdata = prof_tdata_get(tsd, true); + if (tdata != NULL) + prof_sample_threshold_update(tdata); + } + + if ((uintptr_t)tctx > (uintptr_t)1U) { + malloc_mutex_lock(tctx->tdata->lock); + tctx->prepared = false; + if (prof_tctx_should_destroy(tctx)) + prof_tctx_destroy(tsd, tctx); + else + malloc_mutex_unlock(tctx->tdata->lock); + } } -static void -bt_destroy(prof_bt_t *bt) +void +prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx) { - cassert(config_prof); + prof_tctx_set(ptr, usize, tctx); - idalloc(bt); + malloc_mutex_lock(tctx->tdata->lock); + tctx->cnts.curobjs++; + tctx->cnts.curbytes += usize; + if (opt_prof_accum) { + tctx->cnts.accumobjs++; + tctx->cnts.accumbytes += usize; + } + tctx->prepared = false; + malloc_mutex_unlock(tctx->tdata->lock); } -static prof_bt_t * -bt_dup(prof_bt_t *bt) +void +prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx) { - prof_bt_t *ret; - cassert(config_prof); + malloc_mutex_lock(tctx->tdata->lock); + assert(tctx->cnts.curobjs > 0); + assert(tctx->cnts.curbytes >= usize); + tctx->cnts.curobjs--; + tctx->cnts.curbytes -= usize; - /* - * Create a single allocation that has space for vec immediately - * following the prof_bt_t structure. The backtraces that get - * stored in the backtrace caches are copied from stack-allocated - * temporary variables, so size is known at creation time. Making this - * a contiguous object improves cache locality. - */ - ret = (prof_bt_t *)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t)) + - (bt->len * sizeof(void *))); - if (ret == NULL) - return (NULL); - ret->vec = (void **)((uintptr_t)ret + - QUANTUM_CEILING(sizeof(prof_bt_t))); - memcpy(ret->vec, bt->vec, bt->len * sizeof(void *)); - ret->len = bt->len; + if (prof_tctx_should_destroy(tctx)) + prof_tctx_destroy(tsd, tctx); + else + malloc_mutex_unlock(tctx->tdata->lock); +} - return (ret); +void +bt_init(prof_bt_t *bt, void **vec) +{ + + cassert(config_prof); + + bt->vec = vec; + bt->len = 0; } -static inline void -prof_enter(prof_tdata_t *prof_tdata) +JEMALLOC_INLINE_C void +prof_enter(tsd_t *tsd, prof_tdata_t *tdata) { cassert(config_prof); + assert(tdata == prof_tdata_get(tsd, false)); - assert(prof_tdata->enq == false); - prof_tdata->enq = true; + if (tdata != NULL) { + assert(!tdata->enq); + tdata->enq = true; + } - malloc_mutex_lock(&bt2ctx_mtx); + malloc_mutex_lock(&bt2gctx_mtx); } -static inline void -prof_leave(prof_tdata_t *prof_tdata) +JEMALLOC_INLINE_C void +prof_leave(tsd_t *tsd, prof_tdata_t *tdata) { - bool idump, gdump; cassert(config_prof); + assert(tdata == prof_tdata_get(tsd, false)); + + malloc_mutex_unlock(&bt2gctx_mtx); - malloc_mutex_unlock(&bt2ctx_mtx); + if (tdata != NULL) { + bool idump, gdump; - assert(prof_tdata->enq); - prof_tdata->enq = false; - idump = prof_tdata->enq_idump; - prof_tdata->enq_idump = false; - gdump = prof_tdata->enq_gdump; - prof_tdata->enq_gdump = false; + assert(tdata->enq); + tdata->enq = false; + idump = tdata->enq_idump; + tdata->enq_idump = false; + gdump = tdata->enq_gdump; + tdata->enq_gdump = false; - if (idump) - prof_idump(); - if (gdump) - prof_gdump(); + if (idump) + prof_idump(); + if (gdump) + prof_gdump(); + } } #ifdef JEMALLOC_PROF_LIBUNWIND void -prof_backtrace(prof_bt_t *bt, unsigned nignore) +prof_backtrace(prof_bt_t *bt) { - unw_context_t uc; - unw_cursor_t cursor; - unsigned i; - int err; + int nframes; cassert(config_prof); assert(bt->len == 0); assert(bt->vec != NULL); - unw_getcontext(&uc); - unw_init_local(&cursor, &uc); - - /* Throw away (nignore+1) stack frames, if that many exist. */ - for (i = 0; i < nignore + 1; i++) { - err = unw_step(&cursor); - if (err <= 0) - return; - } - - /* - * Iterate over stack frames until there are no more, or until no space - * remains in bt. - */ - for (i = 0; i < PROF_BT_MAX; i++) { - unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]); - bt->len++; - err = unw_step(&cursor); - if (err <= 0) - break; - } + nframes = unw_backtrace(bt->vec, PROF_BT_MAX); + if (nframes <= 0) + return; + bt->len = nframes; } #elif (defined(JEMALLOC_PROF_LIBGCC)) static _Unwind_Reason_Code @@ -206,25 +335,25 @@ static _Unwind_Reason_Code prof_unwind_callback(struct _Unwind_Context *context, void *arg) { prof_unwind_data_t *data = (prof_unwind_data_t *)arg; + void *ip; cassert(config_prof); - if (data->nignore > 0) - data->nignore--; - else { - data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context); - data->bt->len++; - if (data->bt->len == data->max) - return (_URC_END_OF_STACK); - } + ip = (void *)_Unwind_GetIP(context); + if (ip == NULL) + return (_URC_END_OF_STACK); + data->bt->vec[data->bt->len] = ip; + data->bt->len++; + if (data->bt->len == data->max) + return (_URC_END_OF_STACK); return (_URC_NO_REASON); } void -prof_backtrace(prof_bt_t *bt, unsigned nignore) +prof_backtrace(prof_bt_t *bt) { - prof_unwind_data_t data = {bt, nignore, PROF_BT_MAX}; + prof_unwind_data_t data = {bt, PROF_BT_MAX}; cassert(config_prof); @@ -232,25 +361,22 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore) } #elif (defined(JEMALLOC_PROF_GCC)) void -prof_backtrace(prof_bt_t *bt, unsigned nignore) +prof_backtrace(prof_bt_t *bt) { #define BT_FRAME(i) \ - if ((i) < nignore + PROF_BT_MAX) { \ + if ((i) < PROF_BT_MAX) { \ void *p; \ if (__builtin_frame_address(i) == 0) \ return; \ p = __builtin_return_address(i); \ if (p == NULL) \ return; \ - if (i >= nignore) { \ - bt->vec[(i) - nignore] = p; \ - bt->len = (i) - nignore + 1; \ - } \ + bt->vec[(i)] = p; \ + bt->len = (i) + 1; \ } else \ return; cassert(config_prof); - assert(nignore <= 3); BT_FRAME(0) BT_FRAME(1) @@ -392,16 +518,11 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore) BT_FRAME(125) BT_FRAME(126) BT_FRAME(127) - - /* Extras to compensate for nignore. */ - BT_FRAME(128) - BT_FRAME(129) - BT_FRAME(130) #undef BT_FRAME } #else void -prof_backtrace(prof_bt_t *bt, unsigned nignore) +prof_backtrace(prof_bt_t *bt) { cassert(config_prof); @@ -410,256 +531,394 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore) #endif static malloc_mutex_t * -prof_ctx_mutex_choose(void) +prof_gctx_mutex_choose(void) { - unsigned nctxs = atomic_add_u(&cum_ctxs, 1); + unsigned ngctxs = atomic_add_u(&cum_gctxs, 1); - return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]); + return (&gctx_locks[(ngctxs - 1) % PROF_NCTX_LOCKS]); } -static void -prof_ctx_init(prof_ctx_t *ctx, prof_bt_t *bt) +static malloc_mutex_t * +prof_tdata_mutex_choose(uint64_t thr_uid) { - ctx->bt = bt; - ctx->lock = prof_ctx_mutex_choose(); + return (&tdata_locks[thr_uid % PROF_NTDATA_LOCKS]); +} + +static prof_gctx_t * +prof_gctx_create(tsd_t *tsd, prof_bt_t *bt) +{ + /* + * Create a single allocation that has space for vec of length bt->len. + */ + prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsd, offsetof(prof_gctx_t, + vec) + (bt->len * sizeof(void *)), false, tcache_get(tsd, true), + true, NULL); + if (gctx == NULL) + return (NULL); + gctx->lock = prof_gctx_mutex_choose(); /* * Set nlimbo to 1, in order to avoid a race condition with - * prof_ctx_merge()/prof_ctx_destroy(). + * prof_tctx_destroy()/prof_gctx_try_destroy(). */ - ctx->nlimbo = 1; - ql_elm_new(ctx, dump_link); - memset(&ctx->cnt_merged, 0, sizeof(prof_cnt_t)); - ql_new(&ctx->cnts_ql); + gctx->nlimbo = 1; + tctx_tree_new(&gctx->tctxs); + /* Duplicate bt. */ + memcpy(gctx->vec, bt->vec, bt->len * sizeof(void *)); + gctx->bt.vec = gctx->vec; + gctx->bt.len = bt->len; + return (gctx); } static void -prof_ctx_destroy(prof_ctx_t *ctx) +prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx, + prof_tdata_t *tdata) { - prof_tdata_t *prof_tdata; cassert(config_prof); /* - * Check that ctx is still unused by any thread cache before destroying - * it. prof_lookup() increments ctx->nlimbo in order to avoid a race - * condition with this function, as does prof_ctx_merge() in order to - * avoid a race between the main body of prof_ctx_merge() and entry + * Check that gctx is still unused by any thread cache before destroying + * it. prof_lookup() increments gctx->nlimbo in order to avoid a race + * condition with this function, as does prof_tctx_destroy() in order to + * avoid a race between the main body of prof_tctx_destroy() and entry * into this function. */ - prof_tdata = prof_tdata_get(false); - assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX); - prof_enter(prof_tdata); - malloc_mutex_lock(ctx->lock); - if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 && - ctx->nlimbo == 1) { - assert(ctx->cnt_merged.curbytes == 0); - assert(ctx->cnt_merged.accumobjs == 0); - assert(ctx->cnt_merged.accumbytes == 0); - /* Remove ctx from bt2ctx. */ - if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL)) + prof_enter(tsd, tdata_self); + malloc_mutex_lock(gctx->lock); + assert(gctx->nlimbo != 0); + if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) { + /* Remove gctx from bt2gctx. */ + if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL)) not_reached(); - prof_leave(prof_tdata); - /* Destroy ctx. */ - malloc_mutex_unlock(ctx->lock); - bt_destroy(ctx->bt); - idalloc(ctx); + prof_leave(tsd, tdata_self); + /* Destroy gctx. */ + malloc_mutex_unlock(gctx->lock); + idalloctm(tsd, gctx, tcache_get(tsd, false), true); } else { /* - * Compensate for increment in prof_ctx_merge() or + * Compensate for increment in prof_tctx_destroy() or * prof_lookup(). */ - ctx->nlimbo--; - malloc_mutex_unlock(ctx->lock); - prof_leave(prof_tdata); + gctx->nlimbo--; + malloc_mutex_unlock(gctx->lock); + prof_leave(tsd, tdata_self); } } -static void -prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) +/* tctx->tdata->lock must be held. */ +static bool +prof_tctx_should_destroy(prof_tctx_t *tctx) { - bool destroy; - cassert(config_prof); + if (opt_prof_accum) + return (false); + if (tctx->cnts.curobjs != 0) + return (false); + if (tctx->prepared) + return (false); + return (true); +} + +static bool +prof_gctx_should_destroy(prof_gctx_t *gctx) +{ + + if (opt_prof_accum) + return (false); + if (!tctx_tree_empty(&gctx->tctxs)) + return (false); + if (gctx->nlimbo != 0) + return (false); + return (true); +} - /* Merge cnt stats and detach from ctx. */ - malloc_mutex_lock(ctx->lock); - ctx->cnt_merged.curobjs += cnt->cnts.curobjs; - ctx->cnt_merged.curbytes += cnt->cnts.curbytes; - ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs; - ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes; - ql_remove(&ctx->cnts_ql, cnt, cnts_link); - if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL && - ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) { +/* tctx->tdata->lock is held upon entry, and released before return. */ +static void +prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) +{ + prof_tdata_t *tdata = tctx->tdata; + prof_gctx_t *gctx = tctx->gctx; + bool destroy_tdata, destroy_tctx, destroy_gctx; + + assert(tctx->cnts.curobjs == 0); + assert(tctx->cnts.curbytes == 0); + assert(!opt_prof_accum); + assert(tctx->cnts.accumobjs == 0); + assert(tctx->cnts.accumbytes == 0); + + ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL); + destroy_tdata = prof_tdata_should_destroy(tdata, false); + malloc_mutex_unlock(tdata->lock); + + malloc_mutex_lock(gctx->lock); + switch (tctx->state) { + case prof_tctx_state_nominal: + tctx_tree_remove(&gctx->tctxs, tctx); + destroy_tctx = true; + if (prof_gctx_should_destroy(gctx)) { + /* + * Increment gctx->nlimbo in order to keep another + * thread from winning the race to destroy gctx while + * this one has gctx->lock dropped. Without this, it + * would be possible for another thread to: + * + * 1) Sample an allocation associated with gctx. + * 2) Deallocate the sampled object. + * 3) Successfully prof_gctx_try_destroy(gctx). + * + * The result would be that gctx no longer exists by the + * time this thread accesses it in + * prof_gctx_try_destroy(). + */ + gctx->nlimbo++; + destroy_gctx = true; + } else + destroy_gctx = false; + break; + case prof_tctx_state_dumping: /* - * Increment ctx->nlimbo in order to keep another thread from - * winning the race to destroy ctx while this one has ctx->lock - * dropped. Without this, it would be possible for another - * thread to: - * - * 1) Sample an allocation associated with ctx. - * 2) Deallocate the sampled object. - * 3) Successfully prof_ctx_destroy(ctx). - * - * The result would be that ctx no longer exists by the time - * this thread accesses it in prof_ctx_destroy(). + * A dumping thread needs tctx to remain valid until dumping + * has finished. Change state such that the dumping thread will + * complete destruction during a late dump iteration phase. */ - ctx->nlimbo++; - destroy = true; - } else - destroy = false; - malloc_mutex_unlock(ctx->lock); - if (destroy) - prof_ctx_destroy(ctx); + tctx->state = prof_tctx_state_purgatory; + destroy_tctx = false; + destroy_gctx = false; + break; + default: + not_reached(); + destroy_tctx = false; + destroy_gctx = false; + } + malloc_mutex_unlock(gctx->lock); + if (destroy_gctx) { + prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx, + tdata); + } + + if (destroy_tdata) + prof_tdata_destroy(tsd, tdata, false); + + if (destroy_tctx) + idalloctm(tsd, tctx, tcache_get(tsd, false), true); } static bool -prof_lookup_global(prof_bt_t *bt, prof_tdata_t *prof_tdata, void **p_btkey, - prof_ctx_t **p_ctx, bool *p_new_ctx) +prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata, + void **p_btkey, prof_gctx_t **p_gctx, bool *p_new_gctx) { union { - prof_ctx_t *p; + prof_gctx_t *p; void *v; - } ctx; + } gctx; union { prof_bt_t *p; void *v; } btkey; - bool new_ctx; + bool new_gctx; - prof_enter(prof_tdata); - if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) { + prof_enter(tsd, tdata); + if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) { /* bt has never been seen before. Insert it. */ - ctx.v = imalloc(sizeof(prof_ctx_t)); - if (ctx.v == NULL) { - prof_leave(prof_tdata); + gctx.p = prof_gctx_create(tsd, bt); + if (gctx.v == NULL) { + prof_leave(tsd, tdata); return (true); } - btkey.p = bt_dup(bt); - if (btkey.v == NULL) { - prof_leave(prof_tdata); - idalloc(ctx.v); - return (true); - } - prof_ctx_init(ctx.p, btkey.p); - if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { + btkey.p = &gctx.p->bt; + if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) { /* OOM. */ - prof_leave(prof_tdata); - idalloc(btkey.v); - idalloc(ctx.v); + prof_leave(tsd, tdata); + idalloctm(tsd, gctx.v, tcache_get(tsd, false), true); return (true); } - new_ctx = true; + new_gctx = true; } else { /* * Increment nlimbo, in order to avoid a race condition with - * prof_ctx_merge()/prof_ctx_destroy(). + * prof_tctx_destroy()/prof_gctx_try_destroy(). */ - malloc_mutex_lock(ctx.p->lock); - ctx.p->nlimbo++; - malloc_mutex_unlock(ctx.p->lock); - new_ctx = false; + malloc_mutex_lock(gctx.p->lock); + gctx.p->nlimbo++; + malloc_mutex_unlock(gctx.p->lock); + new_gctx = false; } - prof_leave(prof_tdata); + prof_leave(tsd, tdata); *p_btkey = btkey.v; - *p_ctx = ctx.p; - *p_new_ctx = new_ctx; + *p_gctx = gctx.p; + *p_new_gctx = new_gctx; return (false); } -prof_thr_cnt_t * -prof_lookup(prof_bt_t *bt) +prof_tctx_t * +prof_lookup(tsd_t *tsd, prof_bt_t *bt) { union { - prof_thr_cnt_t *p; + prof_tctx_t *p; void *v; } ret; - prof_tdata_t *prof_tdata; + prof_tdata_t *tdata; + bool not_found; cassert(config_prof); - prof_tdata = prof_tdata_get(false); - if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + tdata = prof_tdata_get(tsd, false); + if (tdata == NULL) return (NULL); - if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) { + malloc_mutex_lock(tdata->lock); + not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v); + if (!not_found) /* Note double negative! */ + ret.p->prepared = true; + malloc_mutex_unlock(tdata->lock); + if (not_found) { + tcache_t *tcache; void *btkey; - prof_ctx_t *ctx; - bool new_ctx; + prof_gctx_t *gctx; + bool new_gctx, error; /* * This thread's cache lacks bt. Look for it in the global * cache. */ - if (prof_lookup_global(bt, prof_tdata, &btkey, &ctx, &new_ctx)) + if (prof_lookup_global(tsd, bt, tdata, &btkey, &gctx, + &new_gctx)) return (NULL); - /* Link a prof_thd_cnt_t into ctx for this thread. */ - if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) { - assert(ckh_count(&prof_tdata->bt2cnt) > 0); - /* - * Flush the least recently used cnt in order to keep - * bt2cnt from becoming too large. - */ - ret.p = ql_last(&prof_tdata->lru_ql, lru_link); - assert(ret.v != NULL); - if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, - NULL, NULL)) - not_reached(); - ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); - prof_ctx_merge(ret.p->ctx, ret.p); - /* ret can now be re-used. */ - } else { - assert(ckh_count(&prof_tdata->bt2cnt) < PROF_TCMAX); - /* Allocate and partially initialize a new cnt. */ - ret.v = imalloc(sizeof(prof_thr_cnt_t)); - if (ret.p == NULL) { - if (new_ctx) - prof_ctx_destroy(ctx); - return (NULL); - } - ql_elm_new(ret.p, cnts_link); - ql_elm_new(ret.p, lru_link); + /* Link a prof_tctx_t into gctx for this thread. */ + tcache = tcache_get(tsd, true); + ret.v = iallocztm(tsd, sizeof(prof_tctx_t), false, tcache, true, + NULL); + if (ret.p == NULL) { + if (new_gctx) + prof_gctx_try_destroy(tsd, tdata, gctx, tdata); + return (NULL); } - /* Finish initializing ret. */ - ret.p->ctx = ctx; - ret.p->epoch = 0; + ret.p->tdata = tdata; + ret.p->thr_uid = tdata->thr_uid; + ret.p->thr_discrim = tdata->thr_discrim; memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); - if (ckh_insert(&prof_tdata->bt2cnt, btkey, ret.v)) { - if (new_ctx) - prof_ctx_destroy(ctx); - idalloc(ret.v); + ret.p->gctx = gctx; + ret.p->tctx_uid = tdata->tctx_uid_next++; + ret.p->prepared = true; + ret.p->state = prof_tctx_state_initializing; + malloc_mutex_lock(tdata->lock); + error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v); + malloc_mutex_unlock(tdata->lock); + if (error) { + if (new_gctx) + prof_gctx_try_destroy(tsd, tdata, gctx, tdata); + idalloctm(tsd, ret.v, tcache, true); return (NULL); } - ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); - malloc_mutex_lock(ctx->lock); - ql_tail_insert(&ctx->cnts_ql, ret.p, cnts_link); - ctx->nlimbo--; - malloc_mutex_unlock(ctx->lock); - } else { - /* Move ret to the front of the LRU. */ - ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); - ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); + malloc_mutex_lock(gctx->lock); + ret.p->state = prof_tctx_state_nominal; + tctx_tree_insert(&gctx->tctxs, ret.p); + gctx->nlimbo--; + malloc_mutex_unlock(gctx->lock); } return (ret.p); } +void +prof_sample_threshold_update(prof_tdata_t *tdata) +{ + /* + * The body of this function is compiled out unless heap profiling is + * enabled, so that it is possible to compile jemalloc with floating + * point support completely disabled. Avoiding floating point code is + * important on memory-constrained systems, but it also enables a + * workaround for versions of glibc that don't properly save/restore + * floating point registers during dynamic lazy symbol loading (which + * internally calls into whatever malloc implementation happens to be + * integrated into the application). Note that some compilers (e.g. + * gcc 4.8) may use floating point registers for fast memory moves, so + * jemalloc must be compiled with such optimizations disabled (e.g. + * -mno-sse) in order for the workaround to be complete. + */ +#ifdef JEMALLOC_PROF + uint64_t r; + double u; + + if (!config_prof) + return; + + if (lg_prof_sample == 0) { + tdata->bytes_until_sample = 0; + return; + } + + /* + * Compute sample interval as a geometrically distributed random + * variable with mean (2^lg_prof_sample). + * + * __ __ + * | log(u) | 1 + * tdata->bytes_until_sample = | -------- |, where p = --------------- + * | log(1-p) | lg_prof_sample + * 2 + * + * For more information on the math, see: + * + * Non-Uniform Random Variate Generation + * Luc Devroye + * Springer-Verlag, New York, 1986 + * pp 500 + * (http://luc.devroye.org/rnbookindex.html) + */ + prng64(r, 53, tdata->prng_state, UINT64_C(6364136223846793005), + UINT64_C(1442695040888963407)); + u = (double)r * (1.0/9007199254740992.0L); + tdata->bytes_until_sample = (uint64_t)(log(u) / + log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample)))) + + (uint64_t)1U; +#endif +} + +#ifdef JEMALLOC_JET +static prof_tdata_t * +prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg) +{ + size_t *tdata_count = (size_t *)arg; + + (*tdata_count)++; + + return (NULL); +} + +size_t +prof_tdata_count(void) +{ + size_t tdata_count = 0; + + malloc_mutex_lock(&tdatas_mtx); + tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter, + (void *)&tdata_count); + malloc_mutex_unlock(&tdatas_mtx); + + return (tdata_count); +} +#endif + #ifdef JEMALLOC_JET size_t prof_bt_count(void) { size_t bt_count; - prof_tdata_t *prof_tdata; + tsd_t *tsd; + prof_tdata_t *tdata; - prof_tdata = prof_tdata_get(false); - if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + tsd = tsd_fetch(); + tdata = prof_tdata_get(tsd, false); + if (tdata == NULL) return (0); - prof_enter(prof_tdata); - bt_count = ckh_count(&bt2ctx); - prof_leave(prof_tdata); + malloc_mutex_lock(&bt2gctx_mtx); + bt_count = ckh_count(&bt2gctx); + malloc_mutex_unlock(&bt2gctx_mtx); return (bt_count); } @@ -675,7 +934,7 @@ prof_dump_open(bool propagate_err, const char *filename) int fd; fd = creat(filename, 0644); - if (fd == -1 && propagate_err == false) { + if (fd == -1 && !propagate_err) { malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n", filename); if (opt_abort) @@ -700,7 +959,7 @@ prof_dump_flush(bool propagate_err) err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end); if (err == -1) { - if (propagate_err == false) { + if (!propagate_err) { malloc_write("<jemalloc>: write() failed during heap " "profile flush\n"); if (opt_abort) @@ -756,7 +1015,7 @@ prof_dump_write(bool propagate_err, const char *s) return (false); } -JEMALLOC_ATTR(format(printf, 2, 3)) +JEMALLOC_FORMAT_PRINTF(2, 3) static bool prof_dump_printf(bool propagate_err, const char *format, ...) { @@ -772,176 +1031,367 @@ prof_dump_printf(bool propagate_err, const char *format, ...) return (ret); } +/* tctx->tdata->lock is held. */ static void -prof_dump_ctx_prep(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx, - prof_ctx_list_t *ctx_ql) +prof_tctx_merge_tdata(prof_tctx_t *tctx, prof_tdata_t *tdata) { - prof_thr_cnt_t *thr_cnt; - prof_cnt_t tcnt; - - cassert(config_prof); - - malloc_mutex_lock(ctx->lock); - - /* - * Increment nlimbo so that ctx won't go away before dump. - * Additionally, link ctx into the dump list so that it is included in - * prof_dump()'s second pass. - */ - ctx->nlimbo++; - ql_tail_insert(ctx_ql, ctx, dump_link); - memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t)); - ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) { - volatile unsigned *epoch = &thr_cnt->epoch; + malloc_mutex_lock(tctx->gctx->lock); - while (true) { - unsigned epoch0 = *epoch; - - /* Make sure epoch is even. */ - if (epoch0 & 1U) - continue; - - memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t)); + switch (tctx->state) { + case prof_tctx_state_initializing: + malloc_mutex_unlock(tctx->gctx->lock); + return; + case prof_tctx_state_nominal: + tctx->state = prof_tctx_state_dumping; + malloc_mutex_unlock(tctx->gctx->lock); - /* Terminate if epoch didn't change while reading. */ - if (*epoch == epoch0) - break; - } + memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t)); - ctx->cnt_summed.curobjs += tcnt.curobjs; - ctx->cnt_summed.curbytes += tcnt.curbytes; + tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs; + tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes; if (opt_prof_accum) { - ctx->cnt_summed.accumobjs += tcnt.accumobjs; - ctx->cnt_summed.accumbytes += tcnt.accumbytes; + tdata->cnt_summed.accumobjs += + tctx->dump_cnts.accumobjs; + tdata->cnt_summed.accumbytes += + tctx->dump_cnts.accumbytes; } + break; + case prof_tctx_state_dumping: + case prof_tctx_state_purgatory: + not_reached(); } +} - if (ctx->cnt_summed.curobjs != 0) - (*leak_nctx)++; +/* gctx->lock is held. */ +static void +prof_tctx_merge_gctx(prof_tctx_t *tctx, prof_gctx_t *gctx) +{ - /* Add to cnt_all. */ - cnt_all->curobjs += ctx->cnt_summed.curobjs; - cnt_all->curbytes += ctx->cnt_summed.curbytes; + gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs; + gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes; if (opt_prof_accum) { - cnt_all->accumobjs += ctx->cnt_summed.accumobjs; - cnt_all->accumbytes += ctx->cnt_summed.accumbytes; + gctx->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs; + gctx->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes; } - - malloc_mutex_unlock(ctx->lock); } -static bool -prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all) +/* tctx->gctx is held. */ +static prof_tctx_t * +prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) { - if (opt_lg_prof_sample == 0) { - if (prof_dump_printf(propagate_err, - "heap profile: %"PRId64": %"PRId64 - " [%"PRIu64": %"PRIu64"] @ heapprofile\n", - cnt_all->curobjs, cnt_all->curbytes, - cnt_all->accumobjs, cnt_all->accumbytes)) - return (true); - } else { + switch (tctx->state) { + case prof_tctx_state_nominal: + /* New since dumping started; ignore. */ + break; + case prof_tctx_state_dumping: + case prof_tctx_state_purgatory: + prof_tctx_merge_gctx(tctx, tctx->gctx); + break; + default: + not_reached(); + } + + return (NULL); +} + +/* gctx->lock is held. */ +static prof_tctx_t * +prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) +{ + bool propagate_err = *(bool *)arg; + + switch (tctx->state) { + case prof_tctx_state_initializing: + case prof_tctx_state_nominal: + /* Not captured by this dump. */ + break; + case prof_tctx_state_dumping: + case prof_tctx_state_purgatory: if (prof_dump_printf(propagate_err, - "heap profile: %"PRId64": %"PRId64 - " [%"PRIu64": %"PRIu64"] @ heap_v2/%"PRIu64"\n", - cnt_all->curobjs, cnt_all->curbytes, - cnt_all->accumobjs, cnt_all->accumbytes, - ((uint64_t)1U << opt_lg_prof_sample))) - return (true); + " t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": " + "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs, + tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs, + tctx->dump_cnts.accumbytes)) + return (tctx); + break; + default: + not_reached(); } + return (NULL); +} - return (false); +/* tctx->gctx is held. */ +static prof_tctx_t * +prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) +{ + prof_tctx_t *ret; + + switch (tctx->state) { + case prof_tctx_state_nominal: + /* New since dumping started; ignore. */ + break; + case prof_tctx_state_dumping: + tctx->state = prof_tctx_state_nominal; + break; + case prof_tctx_state_purgatory: + ret = tctx; + goto label_return; + default: + not_reached(); + } + + ret = NULL; +label_return: + return (ret); } static void -prof_dump_ctx_cleanup_locked(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql) +prof_dump_gctx_prep(prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) { - ctx->nlimbo--; - ql_remove(ctx_ql, ctx, dump_link); + cassert(config_prof); + + malloc_mutex_lock(gctx->lock); + + /* + * Increment nlimbo so that gctx won't go away before dump. + * Additionally, link gctx into the dump list so that it is included in + * prof_dump()'s second pass. + */ + gctx->nlimbo++; + gctx_tree_insert(gctxs, gctx); + + memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t)); + + malloc_mutex_unlock(gctx->lock); +} + +static prof_gctx_t * +prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *arg) +{ + size_t *leak_ngctx = (size_t *)arg; + + malloc_mutex_lock(gctx->lock); + tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter, NULL); + if (gctx->cnt_summed.curobjs != 0) + (*leak_ngctx)++; + malloc_mutex_unlock(gctx->lock); + + return (NULL); } static void -prof_dump_ctx_cleanup(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql) +prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) { + prof_tdata_t *tdata = prof_tdata_get(tsd, false); + prof_gctx_t *gctx; - malloc_mutex_lock(ctx->lock); - prof_dump_ctx_cleanup_locked(ctx, ctx_ql); - malloc_mutex_unlock(ctx->lock); + /* + * Standard tree iteration won't work here, because as soon as we + * decrement gctx->nlimbo and unlock gctx, another thread can + * concurrently destroy it, which will corrupt the tree. Therefore, + * tear down the tree one node at a time during iteration. + */ + while ((gctx = gctx_tree_first(gctxs)) != NULL) { + gctx_tree_remove(gctxs, gctx); + malloc_mutex_lock(gctx->lock); + { + prof_tctx_t *next; + + next = NULL; + do { + prof_tctx_t *to_destroy = + tctx_tree_iter(&gctx->tctxs, next, + prof_tctx_finish_iter, NULL); + if (to_destroy != NULL) { + next = tctx_tree_next(&gctx->tctxs, + to_destroy); + tctx_tree_remove(&gctx->tctxs, + to_destroy); + idalloctm(tsd, to_destroy, + tcache_get(tsd, false), true); + } else + next = NULL; + } while (next != NULL); + } + gctx->nlimbo--; + if (prof_gctx_should_destroy(gctx)) { + gctx->nlimbo++; + malloc_mutex_unlock(gctx->lock); + prof_gctx_try_destroy(tsd, tdata, gctx, tdata); + } else + malloc_mutex_unlock(gctx->lock); + } } +static prof_tdata_t * +prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg) +{ + prof_cnt_t *cnt_all = (prof_cnt_t *)arg; + + malloc_mutex_lock(tdata->lock); + if (!tdata->expired) { + size_t tabind; + union { + prof_tctx_t *p; + void *v; + } tctx; + + tdata->dumping = true; + memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t)); + for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL, + &tctx.v);) + prof_tctx_merge_tdata(tctx.p, tdata); + + cnt_all->curobjs += tdata->cnt_summed.curobjs; + cnt_all->curbytes += tdata->cnt_summed.curbytes; + if (opt_prof_accum) { + cnt_all->accumobjs += tdata->cnt_summed.accumobjs; + cnt_all->accumbytes += tdata->cnt_summed.accumbytes; + } + } else + tdata->dumping = false; + malloc_mutex_unlock(tdata->lock); + + return (NULL); +} + +static prof_tdata_t * +prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg) +{ + bool propagate_err = *(bool *)arg; + + if (!tdata->dumping) + return (NULL); + + if (prof_dump_printf(propagate_err, + " t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]%s%s\n", + tdata->thr_uid, tdata->cnt_summed.curobjs, + tdata->cnt_summed.curbytes, tdata->cnt_summed.accumobjs, + tdata->cnt_summed.accumbytes, + (tdata->thread_name != NULL) ? " " : "", + (tdata->thread_name != NULL) ? tdata->thread_name : "")) + return (tdata); + return (NULL); +} + +#ifdef JEMALLOC_JET +#undef prof_dump_header +#define prof_dump_header JEMALLOC_N(prof_dump_header_impl) +#endif +static bool +prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all) +{ + bool ret; + + if (prof_dump_printf(propagate_err, + "heap_v2/%"FMTu64"\n" + " t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n", + ((uint64_t)1U << lg_prof_sample), cnt_all->curobjs, + cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes)) + return (true); + + malloc_mutex_lock(&tdatas_mtx); + ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter, + (void *)&propagate_err) != NULL); + malloc_mutex_unlock(&tdatas_mtx); + return (ret); +} +#ifdef JEMALLOC_JET +#undef prof_dump_header +#define prof_dump_header JEMALLOC_N(prof_dump_header) +prof_dump_header_t *prof_dump_header = JEMALLOC_N(prof_dump_header_impl); +#endif + +/* gctx->lock is held. */ static bool -prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, const prof_bt_t *bt, - prof_ctx_list_t *ctx_ql) +prof_dump_gctx(bool propagate_err, prof_gctx_t *gctx, const prof_bt_t *bt, + prof_gctx_tree_t *gctxs) { bool ret; unsigned i; cassert(config_prof); - /* - * Current statistics can sum to 0 as a result of unmerged per thread - * statistics. Additionally, interval- and growth-triggered dumps can - * occur between the time a ctx is created and when its statistics are - * filled in. Avoid dumping any ctx that is an artifact of either - * implementation detail. - */ - malloc_mutex_lock(ctx->lock); - if ((opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) || - (opt_prof_accum && ctx->cnt_summed.accumobjs == 0)) { - assert(ctx->cnt_summed.curobjs == 0); - assert(ctx->cnt_summed.curbytes == 0); - assert(ctx->cnt_summed.accumobjs == 0); - assert(ctx->cnt_summed.accumbytes == 0); + /* Avoid dumping such gctx's that have no useful data. */ + if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) || + (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) { + assert(gctx->cnt_summed.curobjs == 0); + assert(gctx->cnt_summed.curbytes == 0); + assert(gctx->cnt_summed.accumobjs == 0); + assert(gctx->cnt_summed.accumbytes == 0); ret = false; goto label_return; } - if (prof_dump_printf(propagate_err, "%"PRId64": %"PRId64 - " [%"PRIu64": %"PRIu64"] @", - ctx->cnt_summed.curobjs, ctx->cnt_summed.curbytes, - ctx->cnt_summed.accumobjs, ctx->cnt_summed.accumbytes)) { + if (prof_dump_printf(propagate_err, "@")) { ret = true; goto label_return; } - for (i = 0; i < bt->len; i++) { - if (prof_dump_printf(propagate_err, " %#"PRIxPTR, + if (prof_dump_printf(propagate_err, " %#"FMTxPTR, (uintptr_t)bt->vec[i])) { ret = true; goto label_return; } } - if (prof_dump_write(propagate_err, "\n")) { + if (prof_dump_printf(propagate_err, + "\n" + " t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n", + gctx->cnt_summed.curobjs, gctx->cnt_summed.curbytes, + gctx->cnt_summed.accumobjs, gctx->cnt_summed.accumbytes)) { + ret = true; + goto label_return; + } + + if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter, + (void *)&propagate_err) != NULL) { ret = true; goto label_return; } ret = false; label_return: - prof_dump_ctx_cleanup_locked(ctx, ctx_ql); - malloc_mutex_unlock(ctx->lock); return (ret); } +JEMALLOC_FORMAT_PRINTF(1, 2) +static int +prof_open_maps(const char *format, ...) +{ + int mfd; + va_list ap; + char filename[PATH_MAX + 1]; + + va_start(ap, format); + malloc_vsnprintf(filename, sizeof(filename), format, ap); + va_end(ap); + mfd = open(filename, O_RDONLY); + + return (mfd); +} + static bool prof_dump_maps(bool propagate_err) { bool ret; int mfd; - char filename[PATH_MAX + 1]; cassert(config_prof); #ifdef __FreeBSD__ - malloc_snprintf(filename, sizeof(filename), "/proc/curproc/map"); + mfd = prof_open_maps("/proc/curproc/map"); #else - malloc_snprintf(filename, sizeof(filename), "/proc/%d/maps", - (int)getpid()); + { + int pid = getpid(); + + mfd = prof_open_maps("/proc/%d/task/%d/maps", pid, pid); + if (mfd == -1) + mfd = prof_open_maps("/proc/%d/maps", pid); + } #endif - mfd = open(filename, O_RDONLY); if (mfd != -1) { ssize_t nread; @@ -977,51 +1427,85 @@ label_return: } static void -prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_nctx, +prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx, const char *filename) { if (cnt_all->curbytes != 0) { - malloc_printf("<jemalloc>: Leak summary: %"PRId64" byte%s, %" - PRId64" object%s, %zu context%s\n", + malloc_printf("<jemalloc>: Leak summary: %"FMTu64" byte%s, %" + FMTu64" object%s, %zu context%s\n", cnt_all->curbytes, (cnt_all->curbytes != 1) ? "s" : "", cnt_all->curobjs, (cnt_all->curobjs != 1) ? "s" : "", - leak_nctx, (leak_nctx != 1) ? "s" : ""); + leak_ngctx, (leak_ngctx != 1) ? "s" : ""); malloc_printf( - "<jemalloc>: Run pprof on \"%s\" for leak detail\n", + "<jemalloc>: Run jeprof on \"%s\" for leak detail\n", filename); } } +static prof_gctx_t * +prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *arg) +{ + prof_gctx_t *ret; + bool propagate_err = *(bool *)arg; + + malloc_mutex_lock(gctx->lock); + + if (prof_dump_gctx(propagate_err, gctx, &gctx->bt, gctxs)) { + ret = gctx; + goto label_return; + } + + ret = NULL; +label_return: + malloc_mutex_unlock(gctx->lock); + return (ret); +} + static bool -prof_dump(bool propagate_err, const char *filename, bool leakcheck) +prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck) { - prof_tdata_t *prof_tdata; + prof_tdata_t *tdata; prof_cnt_t cnt_all; size_t tabind; union { - prof_ctx_t *p; + prof_gctx_t *p; void *v; - } ctx; - size_t leak_nctx; - prof_ctx_list_t ctx_ql; + } gctx; + size_t leak_ngctx; + prof_gctx_tree_t gctxs; cassert(config_prof); - prof_tdata = prof_tdata_get(false); - if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + tdata = prof_tdata_get(tsd, true); + if (tdata == NULL) return (true); malloc_mutex_lock(&prof_dump_mtx); + prof_enter(tsd, tdata); - /* Merge per thread profile stats, and sum them in cnt_all. */ + /* + * Put gctx's in limbo and clear their counters in preparation for + * summing. + */ + gctx_tree_new(&gctxs); + for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);) + prof_dump_gctx_prep(gctx.p, &gctxs); + + /* + * Iterate over tdatas, and for the non-expired ones snapshot their tctx + * stats and merge them into the associated gctx's. + */ memset(&cnt_all, 0, sizeof(prof_cnt_t)); - leak_nctx = 0; - ql_new(&ctx_ql); - prof_enter(prof_tdata); - for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;) - prof_dump_ctx_prep(ctx.p, &cnt_all, &leak_nctx, &ctx_ql); - prof_leave(prof_tdata); + malloc_mutex_lock(&tdatas_mtx); + tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter, (void *)&cnt_all); + malloc_mutex_unlock(&tdatas_mtx); + + /* Merge tctx stats into gctx's. */ + leak_ngctx = 0; + gctx_tree_iter(&gctxs, NULL, prof_gctx_merge_iter, (void *)&leak_ngctx); + + prof_leave(tsd, tdata); /* Create dump file. */ if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) @@ -1031,11 +1515,10 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) if (prof_dump_header(propagate_err, &cnt_all)) goto label_write_error; - /* Dump per ctx profile stats. */ - while ((ctx.p = ql_first(&ctx_ql)) != NULL) { - if (prof_dump_ctx(propagate_err, ctx.p, ctx.p->bt, &ctx_ql)) - goto label_write_error; - } + /* Dump per gctx profile stats. */ + if (gctx_tree_iter(&gctxs, NULL, prof_gctx_dump_iter, + (void *)&propagate_err) != NULL) + goto label_write_error; /* Dump /proc/<pid>/maps if possible. */ if (prof_dump_maps(propagate_err)) @@ -1044,17 +1527,17 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) if (prof_dump_close(propagate_err)) goto label_open_close_error; + prof_gctx_finish(tsd, &gctxs); malloc_mutex_unlock(&prof_dump_mtx); if (leakcheck) - prof_leakcheck(&cnt_all, leak_nctx, filename); + prof_leakcheck(&cnt_all, leak_ngctx, filename); return (false); label_write_error: prof_dump_close(propagate_err); label_open_close_error: - while ((ctx.p = ql_first(&ctx_ql)) != NULL) - prof_dump_ctx_cleanup(ctx.p, &ctx_ql); + prof_gctx_finish(tsd, &gctxs); malloc_mutex_unlock(&prof_dump_mtx); return (true); } @@ -1062,7 +1545,7 @@ label_open_close_error: #define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1) #define VSEQ_INVALID UINT64_C(0xffffffffffffffff) static void -prof_dump_filename(char *filename, char v, int64_t vseq) +prof_dump_filename(char *filename, char v, uint64_t vseq) { cassert(config_prof); @@ -1070,12 +1553,12 @@ prof_dump_filename(char *filename, char v, int64_t vseq) if (vseq != VSEQ_INVALID) { /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */ malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, - "%s.%d.%"PRIu64".%c%"PRId64".heap", + "%s.%d.%"FMTu64".%c%"FMTu64".heap", opt_prof_prefix, (int)getpid(), prof_dump_seq, v, vseq); } else { /* "<prefix>.<pid>.<seq>.<v>.heap" */ malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, - "%s.%d.%"PRIu64".%c.heap", + "%s.%d.%"FMTu64".%c.heap", opt_prof_prefix, (int)getpid(), prof_dump_seq, v); } prof_dump_seq++; @@ -1084,57 +1567,63 @@ prof_dump_filename(char *filename, char v, int64_t vseq) static void prof_fdump(void) { + tsd_t *tsd; char filename[DUMP_FILENAME_BUFSIZE]; cassert(config_prof); + assert(opt_prof_final); + assert(opt_prof_prefix[0] != '\0'); - if (prof_booted == false) + if (!prof_booted) return; + tsd = tsd_fetch(); - if (opt_prof_final && opt_prof_prefix[0] != '\0') { - malloc_mutex_lock(&prof_dump_seq_mtx); - prof_dump_filename(filename, 'f', VSEQ_INVALID); - malloc_mutex_unlock(&prof_dump_seq_mtx); - prof_dump(false, filename, opt_prof_leak); - } + malloc_mutex_lock(&prof_dump_seq_mtx); + prof_dump_filename(filename, 'f', VSEQ_INVALID); + malloc_mutex_unlock(&prof_dump_seq_mtx); + prof_dump(tsd, false, filename, opt_prof_leak); } void prof_idump(void) { - prof_tdata_t *prof_tdata; - char filename[PATH_MAX + 1]; + tsd_t *tsd; + prof_tdata_t *tdata; cassert(config_prof); - if (prof_booted == false) + if (!prof_booted) return; - prof_tdata = prof_tdata_get(false); - if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + tsd = tsd_fetch(); + tdata = prof_tdata_get(tsd, false); + if (tdata == NULL) return; - if (prof_tdata->enq) { - prof_tdata->enq_idump = true; + if (tdata->enq) { + tdata->enq_idump = true; return; } if (opt_prof_prefix[0] != '\0') { + char filename[PATH_MAX + 1]; malloc_mutex_lock(&prof_dump_seq_mtx); prof_dump_filename(filename, 'i', prof_dump_iseq); prof_dump_iseq++; malloc_mutex_unlock(&prof_dump_seq_mtx); - prof_dump(false, filename, false); + prof_dump(tsd, false, filename, false); } } bool prof_mdump(const char *filename) { + tsd_t *tsd; char filename_buf[DUMP_FILENAME_BUFSIZE]; cassert(config_prof); - if (opt_prof == false || prof_booted == false) + if (!opt_prof || !prof_booted) return (true); + tsd = tsd_fetch(); if (filename == NULL) { /* No filename specified, so automatically generate one. */ @@ -1146,33 +1635,35 @@ prof_mdump(const char *filename) malloc_mutex_unlock(&prof_dump_seq_mtx); filename = filename_buf; } - return (prof_dump(true, filename, false)); + return (prof_dump(tsd, true, filename, false)); } void prof_gdump(void) { - prof_tdata_t *prof_tdata; - char filename[DUMP_FILENAME_BUFSIZE]; + tsd_t *tsd; + prof_tdata_t *tdata; cassert(config_prof); - if (prof_booted == false) + if (!prof_booted) return; - prof_tdata = prof_tdata_get(false); - if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + tsd = tsd_fetch(); + tdata = prof_tdata_get(tsd, false); + if (tdata == NULL) return; - if (prof_tdata->enq) { - prof_tdata->enq_gdump = true; + if (tdata->enq) { + tdata->enq_gdump = true; return; } if (opt_prof_prefix[0] != '\0') { + char filename[DUMP_FILENAME_BUFSIZE]; malloc_mutex_lock(&prof_dump_seq_mtx); prof_dump_filename(filename, 'u', prof_dump_useq); prof_dump_useq++; malloc_mutex_unlock(&prof_dump_seq_mtx); - prof_dump(false, filename, false); + prof_dump(tsd, false, filename, false); } } @@ -1199,88 +1690,375 @@ prof_bt_keycomp(const void *k1, const void *k2) return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0); } -prof_tdata_t * -prof_tdata_init(void) +JEMALLOC_INLINE_C uint64_t +prof_thr_uid_alloc(void) +{ + uint64_t thr_uid; + + malloc_mutex_lock(&next_thr_uid_mtx); + thr_uid = next_thr_uid; + next_thr_uid++; + malloc_mutex_unlock(&next_thr_uid_mtx); + + return (thr_uid); +} + +static prof_tdata_t * +prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim, + char *thread_name, bool active) { - prof_tdata_t *prof_tdata; + prof_tdata_t *tdata; + tcache_t *tcache; cassert(config_prof); /* Initialize an empty cache for this thread. */ - prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t)); - if (prof_tdata == NULL) + tcache = tcache_get(tsd, true); + tdata = (prof_tdata_t *)iallocztm(tsd, sizeof(prof_tdata_t), false, + tcache, true, NULL); + if (tdata == NULL) return (NULL); - if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS, + tdata->lock = prof_tdata_mutex_choose(thr_uid); + tdata->thr_uid = thr_uid; + tdata->thr_discrim = thr_discrim; + tdata->thread_name = thread_name; + tdata->attached = true; + tdata->expired = false; + tdata->tctx_uid_next = 0; + + if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash, prof_bt_keycomp)) { - idalloc(prof_tdata); + idalloctm(tsd, tdata, tcache, true); return (NULL); } - ql_new(&prof_tdata->lru_ql); - prof_tdata->vec = imalloc(sizeof(void *) * PROF_BT_MAX); - if (prof_tdata->vec == NULL) { - ckh_delete(&prof_tdata->bt2cnt); - idalloc(prof_tdata); - return (NULL); - } + tdata->prng_state = (uint64_t)(uintptr_t)tdata; + prof_sample_threshold_update(tdata); - prof_tdata->prng_state = 0; - prof_tdata->threshold = 0; - prof_tdata->accum = 0; + tdata->enq = false; + tdata->enq_idump = false; + tdata->enq_gdump = false; - prof_tdata->enq = false; - prof_tdata->enq_idump = false; - prof_tdata->enq_gdump = false; + tdata->dumping = false; + tdata->active = active; - prof_tdata_tsd_set(&prof_tdata); + malloc_mutex_lock(&tdatas_mtx); + tdata_tree_insert(&tdatas, tdata); + malloc_mutex_unlock(&tdatas_mtx); - return (prof_tdata); + return (tdata); } -void -prof_tdata_cleanup(void *arg) +prof_tdata_t * +prof_tdata_init(tsd_t *tsd) { - prof_thr_cnt_t *cnt; - prof_tdata_t *prof_tdata = *(prof_tdata_t **)arg; - cassert(config_prof); + return (prof_tdata_init_impl(tsd, prof_thr_uid_alloc(), 0, NULL, + prof_thread_active_init_get())); +} - if (prof_tdata == PROF_TDATA_STATE_REINCARNATED) { - /* - * Another destructor deallocated memory after this destructor - * was called. Reset prof_tdata to PROF_TDATA_STATE_PURGATORY - * in order to receive another callback. - */ - prof_tdata = PROF_TDATA_STATE_PURGATORY; - prof_tdata_tsd_set(&prof_tdata); - } else if (prof_tdata == PROF_TDATA_STATE_PURGATORY) { - /* - * The previous time this destructor was called, we set the key - * to PROF_TDATA_STATE_PURGATORY so that other destructors - * wouldn't cause re-creation of the prof_tdata. This time, do - * nothing, so that the destructor will not be called again. - */ - } else if (prof_tdata != NULL) { - /* - * Delete the hash table. All of its contents can still be - * iterated over via the LRU. - */ - ckh_delete(&prof_tdata->bt2cnt); +/* tdata->lock must be held. */ +static bool +prof_tdata_should_destroy(prof_tdata_t *tdata, bool even_if_attached) +{ + + if (tdata->attached && !even_if_attached) + return (false); + if (ckh_count(&tdata->bt2tctx) != 0) + return (false); + return (true); +} + +/* tdatas_mtx must be held. */ +static void +prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata, + bool even_if_attached) +{ + tcache_t *tcache; + + assert(prof_tdata_should_destroy(tdata, even_if_attached)); + assert(tsd_prof_tdata_get(tsd) != tdata); + + tdata_tree_remove(&tdatas, tdata); + + tcache = tcache_get(tsd, false); + if (tdata->thread_name != NULL) + idalloctm(tsd, tdata->thread_name, tcache, true); + ckh_delete(tsd, &tdata->bt2tctx); + idalloctm(tsd, tdata, tcache, true); +} + +static void +prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) +{ + + malloc_mutex_lock(&tdatas_mtx); + prof_tdata_destroy_locked(tsd, tdata, even_if_attached); + malloc_mutex_unlock(&tdatas_mtx); +} + +static void +prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) +{ + bool destroy_tdata; + + malloc_mutex_lock(tdata->lock); + if (tdata->attached) { + destroy_tdata = prof_tdata_should_destroy(tdata, true); /* - * Iteratively merge cnt's into the global stats and delete - * them. + * Only detach if !destroy_tdata, because detaching would allow + * another thread to win the race to destroy tdata. */ - while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { - ql_remove(&prof_tdata->lru_ql, cnt, lru_link); - prof_ctx_merge(cnt->ctx, cnt); - idalloc(cnt); - } - idalloc(prof_tdata->vec); - idalloc(prof_tdata); - prof_tdata = PROF_TDATA_STATE_PURGATORY; - prof_tdata_tsd_set(&prof_tdata); + if (!destroy_tdata) + tdata->attached = false; + tsd_prof_tdata_set(tsd, NULL); + } else + destroy_tdata = false; + malloc_mutex_unlock(tdata->lock); + if (destroy_tdata) + prof_tdata_destroy(tsd, tdata, true); +} + +prof_tdata_t * +prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) +{ + uint64_t thr_uid = tdata->thr_uid; + uint64_t thr_discrim = tdata->thr_discrim + 1; + char *thread_name = (tdata->thread_name != NULL) ? + prof_thread_name_alloc(tsd, tdata->thread_name) : NULL; + bool active = tdata->active; + + prof_tdata_detach(tsd, tdata); + return (prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name, + active)); +} + +static bool +prof_tdata_expire(prof_tdata_t *tdata) +{ + bool destroy_tdata; + + malloc_mutex_lock(tdata->lock); + if (!tdata->expired) { + tdata->expired = true; + destroy_tdata = tdata->attached ? false : + prof_tdata_should_destroy(tdata, false); + } else + destroy_tdata = false; + malloc_mutex_unlock(tdata->lock); + + return (destroy_tdata); +} + +static prof_tdata_t * +prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg) +{ + + return (prof_tdata_expire(tdata) ? tdata : NULL); +} + +void +prof_reset(tsd_t *tsd, size_t lg_sample) +{ + prof_tdata_t *next; + + assert(lg_sample < (sizeof(uint64_t) << 3)); + + malloc_mutex_lock(&prof_dump_mtx); + malloc_mutex_lock(&tdatas_mtx); + + lg_prof_sample = lg_sample; + + next = NULL; + do { + prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next, + prof_tdata_reset_iter, NULL); + if (to_destroy != NULL) { + next = tdata_tree_next(&tdatas, to_destroy); + prof_tdata_destroy_locked(tsd, to_destroy, false); + } else + next = NULL; + } while (next != NULL); + + malloc_mutex_unlock(&tdatas_mtx); + malloc_mutex_unlock(&prof_dump_mtx); +} + +void +prof_tdata_cleanup(tsd_t *tsd) +{ + prof_tdata_t *tdata; + + if (!config_prof) + return; + + tdata = tsd_prof_tdata_get(tsd); + if (tdata != NULL) + prof_tdata_detach(tsd, tdata); +} + +bool +prof_active_get(void) +{ + bool prof_active_current; + + malloc_mutex_lock(&prof_active_mtx); + prof_active_current = prof_active; + malloc_mutex_unlock(&prof_active_mtx); + return (prof_active_current); +} + +bool +prof_active_set(bool active) +{ + bool prof_active_old; + + malloc_mutex_lock(&prof_active_mtx); + prof_active_old = prof_active; + prof_active = active; + malloc_mutex_unlock(&prof_active_mtx); + return (prof_active_old); +} + +const char * +prof_thread_name_get(void) +{ + tsd_t *tsd; + prof_tdata_t *tdata; + + tsd = tsd_fetch(); + tdata = prof_tdata_get(tsd, true); + if (tdata == NULL) + return (""); + return (tdata->thread_name != NULL ? tdata->thread_name : ""); +} + +static char * +prof_thread_name_alloc(tsd_t *tsd, const char *thread_name) +{ + char *ret; + size_t size; + + if (thread_name == NULL) + return (NULL); + + size = strlen(thread_name) + 1; + if (size == 1) + return (""); + + ret = iallocztm(tsd, size, false, tcache_get(tsd, true), true, NULL); + if (ret == NULL) + return (NULL); + memcpy(ret, thread_name, size); + return (ret); +} + +int +prof_thread_name_set(tsd_t *tsd, const char *thread_name) +{ + prof_tdata_t *tdata; + unsigned i; + char *s; + + tdata = prof_tdata_get(tsd, true); + if (tdata == NULL) + return (EAGAIN); + + /* Validate input. */ + if (thread_name == NULL) + return (EFAULT); + for (i = 0; thread_name[i] != '\0'; i++) { + char c = thread_name[i]; + if (!isgraph(c) && !isblank(c)) + return (EFAULT); } + + s = prof_thread_name_alloc(tsd, thread_name); + if (s == NULL) + return (EAGAIN); + + if (tdata->thread_name != NULL) { + idalloctm(tsd, tdata->thread_name, tcache_get(tsd, false), + true); + tdata->thread_name = NULL; + } + if (strlen(s) > 0) + tdata->thread_name = s; + return (0); +} + +bool +prof_thread_active_get(void) +{ + tsd_t *tsd; + prof_tdata_t *tdata; + + tsd = tsd_fetch(); + tdata = prof_tdata_get(tsd, true); + if (tdata == NULL) + return (false); + return (tdata->active); +} + +bool +prof_thread_active_set(bool active) +{ + tsd_t *tsd; + prof_tdata_t *tdata; + + tsd = tsd_fetch(); + tdata = prof_tdata_get(tsd, true); + if (tdata == NULL) + return (true); + tdata->active = active; + return (false); +} + +bool +prof_thread_active_init_get(void) +{ + bool active_init; + + malloc_mutex_lock(&prof_thread_active_init_mtx); + active_init = prof_thread_active_init; + malloc_mutex_unlock(&prof_thread_active_init_mtx); + return (active_init); +} + +bool +prof_thread_active_init_set(bool active_init) +{ + bool active_init_old; + + malloc_mutex_lock(&prof_thread_active_init_mtx); + active_init_old = prof_thread_active_init; + prof_thread_active_init = active_init; + malloc_mutex_unlock(&prof_thread_active_init_mtx); + return (active_init_old); +} + +bool +prof_gdump_get(void) +{ + bool prof_gdump_current; + + malloc_mutex_lock(&prof_gdump_mtx); + prof_gdump_current = prof_gdump_val; + malloc_mutex_unlock(&prof_gdump_mtx); + return (prof_gdump_current); +} + +bool +prof_gdump_set(bool gdump) +{ + bool prof_gdump_old; + + malloc_mutex_lock(&prof_gdump_mtx); + prof_gdump_old = prof_gdump_val; + prof_gdump_val = gdump; + malloc_mutex_unlock(&prof_gdump_mtx); + return (prof_gdump_old); } void @@ -1300,11 +2078,11 @@ prof_boot1(void) cassert(config_prof); /* - * opt_prof and prof_promote must be in their final state before any - * arenas are initialized, so this function must be executed early. + * opt_prof must be in its final state before any arenas are + * initialized, so this function must be executed early. */ - if (opt_prof_leak && opt_prof == false) { + if (opt_prof_leak && !opt_prof) { /* * Enable opt_prof, but in such a way that profiles are never * automatically dumped. @@ -1317,8 +2095,6 @@ prof_boot1(void) opt_lg_prof_interval); } } - - prof_promote = (opt_prof && opt_lg_prof_sample > LG_PAGE); } bool @@ -1328,36 +2104,65 @@ prof_boot2(void) cassert(config_prof); if (opt_prof) { + tsd_t *tsd; unsigned i; - if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash, + lg_prof_sample = opt_lg_prof_sample; + + prof_active = opt_prof_active; + if (malloc_mutex_init(&prof_active_mtx)) + return (true); + + prof_gdump_val = opt_prof_gdump; + if (malloc_mutex_init(&prof_gdump_mtx)) + return (true); + + prof_thread_active_init = opt_prof_thread_active_init; + if (malloc_mutex_init(&prof_thread_active_init_mtx)) + return (true); + + tsd = tsd_fetch(); + if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash, prof_bt_keycomp)) return (true); - if (malloc_mutex_init(&bt2ctx_mtx)) + if (malloc_mutex_init(&bt2gctx_mtx)) + return (true); + + tdata_tree_new(&tdatas); + if (malloc_mutex_init(&tdatas_mtx)) + return (true); + + next_thr_uid = 0; + if (malloc_mutex_init(&next_thr_uid_mtx)) return (true); - if (prof_tdata_tsd_boot()) { - malloc_write( - "<jemalloc>: Error in pthread_key_create()\n"); - abort(); - } if (malloc_mutex_init(&prof_dump_seq_mtx)) return (true); if (malloc_mutex_init(&prof_dump_mtx)) return (true); - if (atexit(prof_fdump) != 0) { + if (opt_prof_final && opt_prof_prefix[0] != '\0' && + atexit(prof_fdump) != 0) { malloc_write("<jemalloc>: Error in atexit()\n"); if (opt_abort) abort(); } - ctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS * + gctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS * sizeof(malloc_mutex_t)); - if (ctx_locks == NULL) + if (gctx_locks == NULL) return (true); for (i = 0; i < PROF_NCTX_LOCKS; i++) { - if (malloc_mutex_init(&ctx_locks[i])) + if (malloc_mutex_init(&gctx_locks[i])) + return (true); + } + + tdata_locks = (malloc_mutex_t *)base_alloc(PROF_NTDATA_LOCKS * + sizeof(malloc_mutex_t)); + if (tdata_locks == NULL) + return (true); + for (i = 0; i < PROF_NTDATA_LOCKS; i++) { + if (malloc_mutex_init(&tdata_locks[i])) return (true); } } @@ -1382,10 +2187,14 @@ prof_prefork(void) if (opt_prof) { unsigned i; - malloc_mutex_prefork(&bt2ctx_mtx); + malloc_mutex_prefork(&tdatas_mtx); + malloc_mutex_prefork(&bt2gctx_mtx); + malloc_mutex_prefork(&next_thr_uid_mtx); malloc_mutex_prefork(&prof_dump_seq_mtx); for (i = 0; i < PROF_NCTX_LOCKS; i++) - malloc_mutex_prefork(&ctx_locks[i]); + malloc_mutex_prefork(&gctx_locks[i]); + for (i = 0; i < PROF_NTDATA_LOCKS; i++) + malloc_mutex_prefork(&tdata_locks[i]); } } @@ -1396,10 +2205,14 @@ prof_postfork_parent(void) if (opt_prof) { unsigned i; + for (i = 0; i < PROF_NTDATA_LOCKS; i++) + malloc_mutex_postfork_parent(&tdata_locks[i]); for (i = 0; i < PROF_NCTX_LOCKS; i++) - malloc_mutex_postfork_parent(&ctx_locks[i]); + malloc_mutex_postfork_parent(&gctx_locks[i]); malloc_mutex_postfork_parent(&prof_dump_seq_mtx); - malloc_mutex_postfork_parent(&bt2ctx_mtx); + malloc_mutex_postfork_parent(&next_thr_uid_mtx); + malloc_mutex_postfork_parent(&bt2gctx_mtx); + malloc_mutex_postfork_parent(&tdatas_mtx); } } @@ -1410,10 +2223,14 @@ prof_postfork_child(void) if (opt_prof) { unsigned i; + for (i = 0; i < PROF_NTDATA_LOCKS; i++) + malloc_mutex_postfork_child(&tdata_locks[i]); for (i = 0; i < PROF_NCTX_LOCKS; i++) - malloc_mutex_postfork_child(&ctx_locks[i]); + malloc_mutex_postfork_child(&gctx_locks[i]); malloc_mutex_postfork_child(&prof_dump_seq_mtx); - malloc_mutex_postfork_child(&bt2ctx_mtx); + malloc_mutex_postfork_child(&next_thr_uid_mtx); + malloc_mutex_postfork_child(&bt2gctx_mtx); + malloc_mutex_postfork_child(&tdatas_mtx); } } diff --git a/dep/jemalloc/src/quarantine.c b/dep/jemalloc/src/quarantine.c index 5431511640a..6c43dfcaa3a 100644 --- a/dep/jemalloc/src/quarantine.c +++ b/dep/jemalloc/src/quarantine.c @@ -2,7 +2,7 @@ #include "jemalloc/internal/jemalloc_internal.h" /* - * quarantine pointers close to NULL are used to encode state information that + * Quarantine pointers close to NULL are used to encode state information that * is used for cleaning up during thread shutdown. */ #define QUARANTINE_STATE_REINCARNATED ((quarantine_t *)(uintptr_t)1) @@ -10,26 +10,25 @@ #define QUARANTINE_STATE_MAX QUARANTINE_STATE_PURGATORY /******************************************************************************/ -/* Data. */ - -malloc_tsd_data(, quarantine, quarantine_t *, NULL) - -/******************************************************************************/ /* Function prototypes for non-inline static functions. */ -static quarantine_t *quarantine_grow(quarantine_t *quarantine); -static void quarantine_drain_one(quarantine_t *quarantine); -static void quarantine_drain(quarantine_t *quarantine, size_t upper_bound); +static quarantine_t *quarantine_grow(tsd_t *tsd, quarantine_t *quarantine); +static void quarantine_drain_one(tsd_t *tsd, quarantine_t *quarantine); +static void quarantine_drain(tsd_t *tsd, quarantine_t *quarantine, + size_t upper_bound); /******************************************************************************/ -quarantine_t * -quarantine_init(size_t lg_maxobjs) +static quarantine_t * +quarantine_init(tsd_t *tsd, size_t lg_maxobjs) { quarantine_t *quarantine; - quarantine = (quarantine_t *)imalloc(offsetof(quarantine_t, objs) + - ((ZU(1) << lg_maxobjs) * sizeof(quarantine_obj_t))); + assert(tsd_nominal(tsd)); + + quarantine = (quarantine_t *)iallocztm(tsd, offsetof(quarantine_t, objs) + + ((ZU(1) << lg_maxobjs) * sizeof(quarantine_obj_t)), false, + tcache_get(tsd, true), true, NULL); if (quarantine == NULL) return (NULL); quarantine->curbytes = 0; @@ -37,19 +36,36 @@ quarantine_init(size_t lg_maxobjs) quarantine->first = 0; quarantine->lg_maxobjs = lg_maxobjs; - quarantine_tsd_set(&quarantine); - return (quarantine); } +void +quarantine_alloc_hook_work(tsd_t *tsd) +{ + quarantine_t *quarantine; + + if (!tsd_nominal(tsd)) + return; + + quarantine = quarantine_init(tsd, LG_MAXOBJS_INIT); + /* + * Check again whether quarantine has been initialized, because + * quarantine_init() may have triggered recursive initialization. + */ + if (tsd_quarantine_get(tsd) == NULL) + tsd_quarantine_set(tsd, quarantine); + else + idalloctm(tsd, quarantine, tcache_get(tsd, false), true); +} + static quarantine_t * -quarantine_grow(quarantine_t *quarantine) +quarantine_grow(tsd_t *tsd, quarantine_t *quarantine) { quarantine_t *ret; - ret = quarantine_init(quarantine->lg_maxobjs + 1); + ret = quarantine_init(tsd, quarantine->lg_maxobjs + 1); if (ret == NULL) { - quarantine_drain_one(quarantine); + quarantine_drain_one(tsd, quarantine); return (quarantine); } @@ -71,17 +87,18 @@ quarantine_grow(quarantine_t *quarantine) memcpy(&ret->objs[ncopy_a], quarantine->objs, ncopy_b * sizeof(quarantine_obj_t)); } - idalloc(quarantine); + idalloctm(tsd, quarantine, tcache_get(tsd, false), true); + tsd_quarantine_set(tsd, ret); return (ret); } static void -quarantine_drain_one(quarantine_t *quarantine) +quarantine_drain_one(tsd_t *tsd, quarantine_t *quarantine) { quarantine_obj_t *obj = &quarantine->objs[quarantine->first]; assert(obj->usize == isalloc(obj->ptr, config_prof)); - idalloc(obj->ptr); + idalloctm(tsd, obj->ptr, NULL, false); quarantine->curbytes -= obj->usize; quarantine->curobjs--; quarantine->first = (quarantine->first + 1) & ((ZU(1) << @@ -89,15 +106,15 @@ quarantine_drain_one(quarantine_t *quarantine) } static void -quarantine_drain(quarantine_t *quarantine, size_t upper_bound) +quarantine_drain(tsd_t *tsd, quarantine_t *quarantine, size_t upper_bound) { while (quarantine->curbytes > upper_bound && quarantine->curobjs > 0) - quarantine_drain_one(quarantine); + quarantine_drain_one(tsd, quarantine); } void -quarantine(void *ptr) +quarantine(tsd_t *tsd, void *ptr) { quarantine_t *quarantine; size_t usize = isalloc(ptr, config_prof); @@ -105,17 +122,8 @@ quarantine(void *ptr) cassert(config_fill); assert(opt_quarantine); - quarantine = *quarantine_tsd_get(); - if ((uintptr_t)quarantine <= (uintptr_t)QUARANTINE_STATE_MAX) { - if (quarantine == QUARANTINE_STATE_PURGATORY) { - /* - * Make a note that quarantine() was called after - * quarantine_cleanup() was called. - */ - quarantine = QUARANTINE_STATE_REINCARNATED; - quarantine_tsd_set(&quarantine); - } - idalloc(ptr); + if ((quarantine = tsd_quarantine_get(tsd)) == NULL) { + idalloctm(tsd, ptr, NULL, false); return; } /* @@ -125,11 +133,11 @@ quarantine(void *ptr) if (quarantine->curbytes + usize > opt_quarantine) { size_t upper_bound = (opt_quarantine >= usize) ? opt_quarantine - usize : 0; - quarantine_drain(quarantine, upper_bound); + quarantine_drain(tsd, quarantine, upper_bound); } /* Grow the quarantine ring buffer if it's full. */ if (quarantine->curobjs == (ZU(1) << quarantine->lg_maxobjs)) - quarantine = quarantine_grow(quarantine); + quarantine = quarantine_grow(tsd, quarantine); /* quarantine_grow() must free a slot if it fails to grow. */ assert(quarantine->curobjs < (ZU(1) << quarantine->lg_maxobjs)); /* Append ptr if its size doesn't exceed the quarantine size. */ @@ -141,12 +149,12 @@ quarantine(void *ptr) obj->usize = usize; quarantine->curbytes += usize; quarantine->curobjs++; - if (config_fill && opt_junk) { + if (config_fill && unlikely(opt_junk_free)) { /* * Only do redzone validation if Valgrind isn't in * operation. */ - if ((config_valgrind == false || opt_valgrind == false) + if ((!config_valgrind || likely(!in_valgrind)) && usize <= SMALL_MAXCLASS) arena_quarantine_junk_small(ptr, usize); else @@ -154,46 +162,22 @@ quarantine(void *ptr) } } else { assert(quarantine->curbytes == 0); - idalloc(ptr); + idalloctm(tsd, ptr, NULL, false); } } void -quarantine_cleanup(void *arg) -{ - quarantine_t *quarantine = *(quarantine_t **)arg; - - if (quarantine == QUARANTINE_STATE_REINCARNATED) { - /* - * Another destructor deallocated memory after this destructor - * was called. Reset quarantine to QUARANTINE_STATE_PURGATORY - * in order to receive another callback. - */ - quarantine = QUARANTINE_STATE_PURGATORY; - quarantine_tsd_set(&quarantine); - } else if (quarantine == QUARANTINE_STATE_PURGATORY) { - /* - * The previous time this destructor was called, we set the key - * to QUARANTINE_STATE_PURGATORY so that other destructors - * wouldn't cause re-creation of the quarantine. This time, do - * nothing, so that the destructor will not be called again. - */ - } else if (quarantine != NULL) { - quarantine_drain(quarantine, 0); - idalloc(quarantine); - quarantine = QUARANTINE_STATE_PURGATORY; - quarantine_tsd_set(&quarantine); - } -} - -bool -quarantine_boot(void) +quarantine_cleanup(tsd_t *tsd) { + quarantine_t *quarantine; - cassert(config_fill); - - if (quarantine_tsd_boot()) - return (true); + if (!config_fill) + return; - return (false); + quarantine = tsd_quarantine_get(tsd); + if (quarantine != NULL) { + quarantine_drain(tsd, quarantine, 0); + idalloctm(tsd, quarantine, tcache_get(tsd, false), true); + tsd_quarantine_set(tsd, NULL); + } } diff --git a/dep/jemalloc/src/rtree.c b/dep/jemalloc/src/rtree.c index 205957ac4e1..af0d97e7530 100644 --- a/dep/jemalloc/src/rtree.c +++ b/dep/jemalloc/src/rtree.c @@ -1,73 +1,74 @@ #define JEMALLOC_RTREE_C_ #include "jemalloc/internal/jemalloc_internal.h" -rtree_t * -rtree_new(unsigned bits, rtree_alloc_t *alloc, rtree_dalloc_t *dalloc) +static unsigned +hmin(unsigned ha, unsigned hb) { - rtree_t *ret; - unsigned bits_per_level, bits_in_leaf, height, i; + + return (ha < hb ? ha : hb); +} + +/* Only the most significant bits of keys passed to rtree_[gs]et() are used. */ +bool +rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc, + rtree_node_dalloc_t *dalloc) +{ + unsigned bits_in_leaf, height, i; assert(bits > 0 && bits <= (sizeof(uintptr_t) << 3)); - bits_per_level = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void *)))) - 1; - bits_in_leaf = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(uint8_t)))) - 1; + bits_in_leaf = (bits % RTREE_BITS_PER_LEVEL) == 0 ? RTREE_BITS_PER_LEVEL + : (bits % RTREE_BITS_PER_LEVEL); if (bits > bits_in_leaf) { - height = 1 + (bits - bits_in_leaf) / bits_per_level; - if ((height-1) * bits_per_level + bits_in_leaf != bits) + height = 1 + (bits - bits_in_leaf) / RTREE_BITS_PER_LEVEL; + if ((height-1) * RTREE_BITS_PER_LEVEL + bits_in_leaf != bits) height++; - } else { + } else height = 1; + assert((height-1) * RTREE_BITS_PER_LEVEL + bits_in_leaf == bits); + + rtree->alloc = alloc; + rtree->dalloc = dalloc; + rtree->height = height; + + /* Root level. */ + rtree->levels[0].subtree = NULL; + rtree->levels[0].bits = (height > 1) ? RTREE_BITS_PER_LEVEL : + bits_in_leaf; + rtree->levels[0].cumbits = rtree->levels[0].bits; + /* Interior levels. */ + for (i = 1; i < height-1; i++) { + rtree->levels[i].subtree = NULL; + rtree->levels[i].bits = RTREE_BITS_PER_LEVEL; + rtree->levels[i].cumbits = rtree->levels[i-1].cumbits + + RTREE_BITS_PER_LEVEL; } - assert((height-1) * bits_per_level + bits_in_leaf >= bits); - - ret = (rtree_t*)alloc(offsetof(rtree_t, level2bits) + - (sizeof(unsigned) * height)); - if (ret == NULL) - return (NULL); - memset(ret, 0, offsetof(rtree_t, level2bits) + (sizeof(unsigned) * - height)); - - ret->alloc = alloc; - ret->dalloc = dalloc; - if (malloc_mutex_init(&ret->mutex)) { - if (dalloc != NULL) - dalloc(ret); - return (NULL); - } - ret->height = height; + /* Leaf level. */ if (height > 1) { - if ((height-1) * bits_per_level + bits_in_leaf > bits) { - ret->level2bits[0] = (bits - bits_in_leaf) % - bits_per_level; - } else - ret->level2bits[0] = bits_per_level; - for (i = 1; i < height-1; i++) - ret->level2bits[i] = bits_per_level; - ret->level2bits[height-1] = bits_in_leaf; - } else - ret->level2bits[0] = bits; + rtree->levels[height-1].subtree = NULL; + rtree->levels[height-1].bits = bits_in_leaf; + rtree->levels[height-1].cumbits = bits; + } - ret->root = (void**)alloc(sizeof(void *) << ret->level2bits[0]); - if (ret->root == NULL) { - if (dalloc != NULL) - dalloc(ret); - return (NULL); + /* Compute lookup table to be used by rtree_start_level(). */ + for (i = 0; i < RTREE_HEIGHT_MAX; i++) { + rtree->start_level[i] = hmin(RTREE_HEIGHT_MAX - 1 - i, height - + 1); } - memset(ret->root, 0, sizeof(void *) << ret->level2bits[0]); - return (ret); + return (false); } static void -rtree_delete_subtree(rtree_t *rtree, void **node, unsigned level) +rtree_delete_subtree(rtree_t *rtree, rtree_node_elm_t *node, unsigned level) { - if (level < rtree->height - 1) { + if (level + 1 < rtree->height) { size_t nchildren, i; - nchildren = ZU(1) << rtree->level2bits[level]; + nchildren = ZU(1) << rtree->levels[level].bits; for (i = 0; i < nchildren; i++) { - void **child = (void **)node[i]; + rtree_node_elm_t *child = node[i].child; if (child != NULL) rtree_delete_subtree(rtree, child, level + 1); } @@ -78,28 +79,49 @@ rtree_delete_subtree(rtree_t *rtree, void **node, unsigned level) void rtree_delete(rtree_t *rtree) { + unsigned i; - rtree_delete_subtree(rtree, rtree->root, 0); - rtree->dalloc(rtree); + for (i = 0; i < rtree->height; i++) { + rtree_node_elm_t *subtree = rtree->levels[i].subtree; + if (subtree != NULL) + rtree_delete_subtree(rtree, subtree, i); + } } -void -rtree_prefork(rtree_t *rtree) +static rtree_node_elm_t * +rtree_node_init(rtree_t *rtree, unsigned level, rtree_node_elm_t **elmp) { + rtree_node_elm_t *node; + + if (atomic_cas_p((void **)elmp, NULL, RTREE_NODE_INITIALIZING)) { + /* + * Another thread is already in the process of initializing. + * Spin-wait until initialization is complete. + */ + do { + CPU_SPINWAIT; + node = atomic_read_p((void **)elmp); + } while (node == RTREE_NODE_INITIALIZING); + } else { + node = rtree->alloc(ZU(1) << rtree->levels[level].bits); + if (node == NULL) + return (NULL); + atomic_write_p((void **)elmp, node); + } - malloc_mutex_prefork(&rtree->mutex); + return (node); } -void -rtree_postfork_parent(rtree_t *rtree) +rtree_node_elm_t * +rtree_subtree_read_hard(rtree_t *rtree, unsigned level) { - malloc_mutex_postfork_parent(&rtree->mutex); + return (rtree_node_init(rtree, level, &rtree->levels[level].subtree)); } -void -rtree_postfork_child(rtree_t *rtree) +rtree_node_elm_t * +rtree_child_read_hard(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level) { - malloc_mutex_postfork_child(&rtree->mutex); + return (rtree_node_init(rtree, level, &elm->child)); } diff --git a/dep/jemalloc/src/stats.c b/dep/jemalloc/src/stats.c index bef2ab33cd4..154c3e74cd3 100644 --- a/dep/jemalloc/src/stats.c +++ b/dep/jemalloc/src/stats.c @@ -6,31 +6,22 @@ xmallctl(n, v, &sz, NULL, 0); \ } while (0) -#define CTL_I_GET(n, v, t) do { \ +#define CTL_M2_GET(n, i, v, t) do { \ size_t mib[6]; \ size_t miblen = sizeof(mib) / sizeof(size_t); \ size_t sz = sizeof(t); \ xmallctlnametomib(n, mib, &miblen); \ - mib[2] = i; \ + mib[2] = (i); \ xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \ } while (0) -#define CTL_J_GET(n, v, t) do { \ +#define CTL_M2_M4_GET(n, i, j, v, t) do { \ size_t mib[6]; \ size_t miblen = sizeof(mib) / sizeof(size_t); \ size_t sz = sizeof(t); \ xmallctlnametomib(n, mib, &miblen); \ - mib[2] = j; \ - xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \ -} while (0) - -#define CTL_IJ_GET(n, v, t) do { \ - size_t mib[6]; \ - size_t miblen = sizeof(mib) / sizeof(size_t); \ - size_t sz = sizeof(t); \ - xmallctlnametomib(n, mib, &miblen); \ - mib[2] = i; \ - mib[4] = j; \ + mib[2] = (i); \ + mib[4] = (j); \ xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \ } while (0) @@ -48,8 +39,10 @@ static void stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i); static void stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i); +static void stats_arena_hchunks_print( + void (*write_cb)(void *, const char *), void *cbopaque, unsigned i); static void stats_arena_print(void (*write_cb)(void *, const char *), - void *cbopaque, unsigned i, bool bins, bool large); + void *cbopaque, unsigned i, bool bins, bool large, bool huge); /******************************************************************************/ @@ -58,100 +51,109 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i) { size_t page; - bool config_tcache; - unsigned nbins, j, gap_start; + bool config_tcache, in_gap; + unsigned nbins, j; CTL_GET("arenas.page", &page, size_t); CTL_GET("config.tcache", &config_tcache, bool); if (config_tcache) { malloc_cprintf(write_cb, cbopaque, - "bins: bin size regs pgs allocated nmalloc" - " ndalloc nrequests nfills nflushes" - " newruns reruns curruns\n"); + "bins: size ind allocated nmalloc" + " ndalloc nrequests curregs curruns regs" + " pgs util nfills nflushes newruns" + " reruns\n"); } else { malloc_cprintf(write_cb, cbopaque, - "bins: bin size regs pgs allocated nmalloc" - " ndalloc newruns reruns curruns\n"); + "bins: size ind allocated nmalloc" + " ndalloc nrequests curregs curruns regs" + " pgs util newruns reruns\n"); } CTL_GET("arenas.nbins", &nbins, unsigned); - for (j = 0, gap_start = UINT_MAX; j < nbins; j++) { + for (j = 0, in_gap = false; j < nbins; j++) { uint64_t nruns; - CTL_IJ_GET("stats.arenas.0.bins.0.nruns", &nruns, uint64_t); - if (nruns == 0) { - if (gap_start == UINT_MAX) - gap_start = j; - } else { - size_t reg_size, run_size, allocated; + CTL_M2_M4_GET("stats.arenas.0.bins.0.nruns", i, j, &nruns, + uint64_t); + if (nruns == 0) + in_gap = true; + else { + size_t reg_size, run_size, curregs, availregs, milli; + size_t curruns; uint32_t nregs; uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes; uint64_t reruns; - size_t curruns; + char util[6]; /* "x.yyy". */ - if (gap_start != UINT_MAX) { - if (j > gap_start + 1) { - /* Gap of more than one size class. */ - malloc_cprintf(write_cb, cbopaque, - "[%u..%u]\n", gap_start, - j - 1); - } else { - /* Gap of one size class. */ - malloc_cprintf(write_cb, cbopaque, - "[%u]\n", gap_start); - } - gap_start = UINT_MAX; + if (in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); + in_gap = false; } - CTL_J_GET("arenas.bin.0.size", ®_size, size_t); - CTL_J_GET("arenas.bin.0.nregs", &nregs, uint32_t); - CTL_J_GET("arenas.bin.0.run_size", &run_size, size_t); - CTL_IJ_GET("stats.arenas.0.bins.0.allocated", - &allocated, size_t); - CTL_IJ_GET("stats.arenas.0.bins.0.nmalloc", + CTL_M2_GET("arenas.bin.0.size", j, ®_size, size_t); + CTL_M2_GET("arenas.bin.0.nregs", j, &nregs, uint32_t); + CTL_M2_GET("arenas.bin.0.run_size", j, &run_size, + size_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.nmalloc", i, j, &nmalloc, uint64_t); - CTL_IJ_GET("stats.arenas.0.bins.0.ndalloc", + CTL_M2_M4_GET("stats.arenas.0.bins.0.ndalloc", i, j, &ndalloc, uint64_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.curregs", i, j, + &curregs, size_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.nrequests", i, j, + &nrequests, uint64_t); if (config_tcache) { - CTL_IJ_GET("stats.arenas.0.bins.0.nrequests", - &nrequests, uint64_t); - CTL_IJ_GET("stats.arenas.0.bins.0.nfills", - &nfills, uint64_t); - CTL_IJ_GET("stats.arenas.0.bins.0.nflushes", - &nflushes, uint64_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.nfills", i, + j, &nfills, uint64_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.nflushes", + i, j, &nflushes, uint64_t); } - CTL_IJ_GET("stats.arenas.0.bins.0.nreruns", &reruns, - uint64_t); - CTL_IJ_GET("stats.arenas.0.bins.0.curruns", &curruns, - size_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.nreruns", i, j, + &reruns, uint64_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.curruns", i, j, + &curruns, size_t); + + availregs = nregs * curruns; + milli = (availregs != 0) ? (1000 * curregs) / availregs + : 1000; + assert(milli <= 1000); + if (milli < 10) { + malloc_snprintf(util, sizeof(util), + "0.00%zu", milli); + } else if (milli < 100) { + malloc_snprintf(util, sizeof(util), "0.0%zu", + milli); + } else if (milli < 1000) { + malloc_snprintf(util, sizeof(util), "0.%zu", + milli); + } else + malloc_snprintf(util, sizeof(util), "1"); + if (config_tcache) { malloc_cprintf(write_cb, cbopaque, - "%13u %5zu %4u %3zu %12zu %12"PRIu64 - " %12"PRIu64" %12"PRIu64" %12"PRIu64 - " %12"PRIu64" %12"PRIu64" %12"PRIu64 - " %12zu\n", - j, reg_size, nregs, run_size / page, - allocated, nmalloc, ndalloc, nrequests, - nfills, nflushes, nruns, reruns, curruns); + "%20zu %3u %12zu %12"FMTu64 + " %12"FMTu64" %12"FMTu64" %12zu" + " %12zu %4u %3zu %-5s %12"FMTu64 + " %12"FMTu64" %12"FMTu64" %12"FMTu64"\n", + reg_size, j, curregs * reg_size, nmalloc, + ndalloc, nrequests, curregs, curruns, nregs, + run_size / page, util, nfills, nflushes, + nruns, reruns); } else { malloc_cprintf(write_cb, cbopaque, - "%13u %5zu %4u %3zu %12zu %12"PRIu64 - " %12"PRIu64" %12"PRIu64" %12"PRIu64 - " %12zu\n", - j, reg_size, nregs, run_size / page, - allocated, nmalloc, ndalloc, nruns, reruns, - curruns); + "%20zu %3u %12zu %12"FMTu64 + " %12"FMTu64" %12"FMTu64" %12zu" + " %12zu %4u %3zu %-5s %12"FMTu64 + " %12"FMTu64"\n", + reg_size, j, curregs * reg_size, nmalloc, + ndalloc, nrequests, curregs, curruns, nregs, + run_size / page, util, nruns, reruns); } } } - if (gap_start != UINT_MAX) { - if (j > gap_start + 1) { - /* Gap of more than one size class. */ - malloc_cprintf(write_cb, cbopaque, "[%u..%u]\n", - gap_start, j - 1); - } else { - /* Gap of one size class. */ - malloc_cprintf(write_cb, cbopaque, "[%u]\n", gap_start); - } + if (in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); } } @@ -159,110 +161,199 @@ static void stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i) { - size_t page, nlruns, j; - ssize_t gap_start; - - CTL_GET("arenas.page", &page, size_t); + unsigned nbins, nlruns, j; + bool in_gap; malloc_cprintf(write_cb, cbopaque, - "large: size pages nmalloc ndalloc nrequests" - " curruns\n"); - CTL_GET("arenas.nlruns", &nlruns, size_t); - for (j = 0, gap_start = -1; j < nlruns; j++) { + "large: size ind allocated nmalloc ndalloc" + " nrequests curruns\n"); + CTL_GET("arenas.nbins", &nbins, unsigned); + CTL_GET("arenas.nlruns", &nlruns, unsigned); + for (j = 0, in_gap = false; j < nlruns; j++) { uint64_t nmalloc, ndalloc, nrequests; size_t run_size, curruns; - CTL_IJ_GET("stats.arenas.0.lruns.0.nmalloc", &nmalloc, + CTL_M2_M4_GET("stats.arenas.0.lruns.0.nmalloc", i, j, &nmalloc, uint64_t); - CTL_IJ_GET("stats.arenas.0.lruns.0.ndalloc", &ndalloc, + CTL_M2_M4_GET("stats.arenas.0.lruns.0.ndalloc", i, j, &ndalloc, uint64_t); - CTL_IJ_GET("stats.arenas.0.lruns.0.nrequests", &nrequests, - uint64_t); - if (nrequests == 0) { - if (gap_start == -1) - gap_start = j; - } else { - CTL_J_GET("arenas.lrun.0.size", &run_size, size_t); - CTL_IJ_GET("stats.arenas.0.lruns.0.curruns", &curruns, + CTL_M2_M4_GET("stats.arenas.0.lruns.0.nrequests", i, j, + &nrequests, uint64_t); + if (nrequests == 0) + in_gap = true; + else { + CTL_M2_GET("arenas.lrun.0.size", j, &run_size, size_t); + CTL_M2_M4_GET("stats.arenas.0.lruns.0.curruns", i, j, + &curruns, size_t); + if (in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); + in_gap = false; + } + malloc_cprintf(write_cb, cbopaque, + "%20zu %3u %12zu %12"FMTu64" %12"FMTu64 + " %12"FMTu64" %12zu\n", + run_size, nbins + j, curruns * run_size, nmalloc, + ndalloc, nrequests, curruns); + } + } + if (in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); + } +} + +static void +stats_arena_hchunks_print(void (*write_cb)(void *, const char *), + void *cbopaque, unsigned i) +{ + unsigned nbins, nlruns, nhchunks, j; + bool in_gap; + + malloc_cprintf(write_cb, cbopaque, + "huge: size ind allocated nmalloc ndalloc" + " nrequests curhchunks\n"); + CTL_GET("arenas.nbins", &nbins, unsigned); + CTL_GET("arenas.nlruns", &nlruns, unsigned); + CTL_GET("arenas.nhchunks", &nhchunks, unsigned); + for (j = 0, in_gap = false; j < nhchunks; j++) { + uint64_t nmalloc, ndalloc, nrequests; + size_t hchunk_size, curhchunks; + + CTL_M2_M4_GET("stats.arenas.0.hchunks.0.nmalloc", i, j, + &nmalloc, uint64_t); + CTL_M2_M4_GET("stats.arenas.0.hchunks.0.ndalloc", i, j, + &ndalloc, uint64_t); + CTL_M2_M4_GET("stats.arenas.0.hchunks.0.nrequests", i, j, + &nrequests, uint64_t); + if (nrequests == 0) + in_gap = true; + else { + CTL_M2_GET("arenas.hchunk.0.size", j, &hchunk_size, size_t); - if (gap_start != -1) { - malloc_cprintf(write_cb, cbopaque, "[%zu]\n", - j - gap_start); - gap_start = -1; + CTL_M2_M4_GET("stats.arenas.0.hchunks.0.curhchunks", i, + j, &curhchunks, size_t); + if (in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); + in_gap = false; } malloc_cprintf(write_cb, cbopaque, - "%13zu %5zu %12"PRIu64" %12"PRIu64" %12"PRIu64 - " %12zu\n", - run_size, run_size / page, nmalloc, ndalloc, - nrequests, curruns); + "%20zu %3u %12zu %12"FMTu64" %12"FMTu64 + " %12"FMTu64" %12zu\n", + hchunk_size, nbins + nlruns + j, + curhchunks * hchunk_size, nmalloc, ndalloc, + nrequests, curhchunks); } } - if (gap_start != -1) - malloc_cprintf(write_cb, cbopaque, "[%zu]\n", j - gap_start); + if (in_gap) { + malloc_cprintf(write_cb, cbopaque, + " ---\n"); + } } static void stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, - unsigned i, bool bins, bool large) + unsigned i, bool bins, bool large, bool huge) { unsigned nthreads; const char *dss; + ssize_t lg_dirty_mult; size_t page, pactive, pdirty, mapped; + size_t metadata_mapped, metadata_allocated; uint64_t npurge, nmadvise, purged; size_t small_allocated; uint64_t small_nmalloc, small_ndalloc, small_nrequests; size_t large_allocated; uint64_t large_nmalloc, large_ndalloc, large_nrequests; + size_t huge_allocated; + uint64_t huge_nmalloc, huge_ndalloc, huge_nrequests; CTL_GET("arenas.page", &page, size_t); - CTL_I_GET("stats.arenas.0.nthreads", &nthreads, unsigned); + CTL_M2_GET("stats.arenas.0.nthreads", i, &nthreads, unsigned); malloc_cprintf(write_cb, cbopaque, "assigned threads: %u\n", nthreads); - CTL_I_GET("stats.arenas.0.dss", &dss, const char *); + CTL_M2_GET("stats.arenas.0.dss", i, &dss, const char *); malloc_cprintf(write_cb, cbopaque, "dss allocation precedence: %s\n", dss); - CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t); - CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t); - CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t); - CTL_I_GET("stats.arenas.0.nmadvise", &nmadvise, uint64_t); - CTL_I_GET("stats.arenas.0.purged", &purged, uint64_t); + CTL_M2_GET("stats.arenas.0.lg_dirty_mult", i, &lg_dirty_mult, ssize_t); + if (lg_dirty_mult >= 0) { + malloc_cprintf(write_cb, cbopaque, + "min active:dirty page ratio: %u:1\n", + (1U << lg_dirty_mult)); + } else { + malloc_cprintf(write_cb, cbopaque, + "min active:dirty page ratio: N/A\n"); + } + CTL_M2_GET("stats.arenas.0.pactive", i, &pactive, size_t); + CTL_M2_GET("stats.arenas.0.pdirty", i, &pdirty, size_t); + CTL_M2_GET("stats.arenas.0.npurge", i, &npurge, uint64_t); + CTL_M2_GET("stats.arenas.0.nmadvise", i, &nmadvise, uint64_t); + CTL_M2_GET("stats.arenas.0.purged", i, &purged, uint64_t); malloc_cprintf(write_cb, cbopaque, - "dirty pages: %zu:%zu active:dirty, %"PRIu64" sweep%s," - " %"PRIu64" madvise%s, %"PRIu64" purged\n", - pactive, pdirty, npurge, npurge == 1 ? "" : "s", - nmadvise, nmadvise == 1 ? "" : "s", purged); + "dirty pages: %zu:%zu active:dirty, %"FMTu64" sweep%s, %"FMTu64 + " madvise%s, %"FMTu64" purged\n", pactive, pdirty, npurge, npurge == + 1 ? "" : "s", nmadvise, nmadvise == 1 ? "" : "s", purged); malloc_cprintf(write_cb, cbopaque, - " allocated nmalloc ndalloc nrequests\n"); - CTL_I_GET("stats.arenas.0.small.allocated", &small_allocated, size_t); - CTL_I_GET("stats.arenas.0.small.nmalloc", &small_nmalloc, uint64_t); - CTL_I_GET("stats.arenas.0.small.ndalloc", &small_ndalloc, uint64_t); - CTL_I_GET("stats.arenas.0.small.nrequests", &small_nrequests, uint64_t); + " allocated nmalloc ndalloc" + " nrequests\n"); + CTL_M2_GET("stats.arenas.0.small.allocated", i, &small_allocated, + size_t); + CTL_M2_GET("stats.arenas.0.small.nmalloc", i, &small_nmalloc, uint64_t); + CTL_M2_GET("stats.arenas.0.small.ndalloc", i, &small_ndalloc, uint64_t); + CTL_M2_GET("stats.arenas.0.small.nrequests", i, &small_nrequests, + uint64_t); malloc_cprintf(write_cb, cbopaque, - "small: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n", + "small: %12zu %12"FMTu64" %12"FMTu64 + " %12"FMTu64"\n", small_allocated, small_nmalloc, small_ndalloc, small_nrequests); - CTL_I_GET("stats.arenas.0.large.allocated", &large_allocated, size_t); - CTL_I_GET("stats.arenas.0.large.nmalloc", &large_nmalloc, uint64_t); - CTL_I_GET("stats.arenas.0.large.ndalloc", &large_ndalloc, uint64_t); - CTL_I_GET("stats.arenas.0.large.nrequests", &large_nrequests, uint64_t); + CTL_M2_GET("stats.arenas.0.large.allocated", i, &large_allocated, + size_t); + CTL_M2_GET("stats.arenas.0.large.nmalloc", i, &large_nmalloc, uint64_t); + CTL_M2_GET("stats.arenas.0.large.ndalloc", i, &large_ndalloc, uint64_t); + CTL_M2_GET("stats.arenas.0.large.nrequests", i, &large_nrequests, + uint64_t); malloc_cprintf(write_cb, cbopaque, - "large: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n", + "large: %12zu %12"FMTu64" %12"FMTu64 + " %12"FMTu64"\n", large_allocated, large_nmalloc, large_ndalloc, large_nrequests); + CTL_M2_GET("stats.arenas.0.huge.allocated", i, &huge_allocated, size_t); + CTL_M2_GET("stats.arenas.0.huge.nmalloc", i, &huge_nmalloc, uint64_t); + CTL_M2_GET("stats.arenas.0.huge.ndalloc", i, &huge_ndalloc, uint64_t); + CTL_M2_GET("stats.arenas.0.huge.nrequests", i, &huge_nrequests, + uint64_t); + malloc_cprintf(write_cb, cbopaque, + "huge: %12zu %12"FMTu64" %12"FMTu64 + " %12"FMTu64"\n", + huge_allocated, huge_nmalloc, huge_ndalloc, huge_nrequests); + malloc_cprintf(write_cb, cbopaque, + "total: %12zu %12"FMTu64" %12"FMTu64 + " %12"FMTu64"\n", + small_allocated + large_allocated + huge_allocated, + small_nmalloc + large_nmalloc + huge_nmalloc, + small_ndalloc + large_ndalloc + huge_ndalloc, + small_nrequests + large_nrequests + huge_nrequests); + malloc_cprintf(write_cb, cbopaque, + "active: %12zu\n", pactive * page); + CTL_M2_GET("stats.arenas.0.mapped", i, &mapped, size_t); malloc_cprintf(write_cb, cbopaque, - "total: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n", - small_allocated + large_allocated, - small_nmalloc + large_nmalloc, - small_ndalloc + large_ndalloc, - small_nrequests + large_nrequests); - malloc_cprintf(write_cb, cbopaque, "active: %12zu\n", pactive * page); - CTL_I_GET("stats.arenas.0.mapped", &mapped, size_t); - malloc_cprintf(write_cb, cbopaque, "mapped: %12zu\n", mapped); + "mapped: %12zu\n", mapped); + CTL_M2_GET("stats.arenas.0.metadata.mapped", i, &metadata_mapped, + size_t); + CTL_M2_GET("stats.arenas.0.metadata.allocated", i, &metadata_allocated, + size_t); + malloc_cprintf(write_cb, cbopaque, + "metadata: mapped: %zu, allocated: %zu\n", + metadata_mapped, metadata_allocated); if (bins) stats_arena_bins_print(write_cb, cbopaque, i); if (large) stats_arena_lruns_print(write_cb, cbopaque, i); + if (huge) + stats_arena_hchunks_print(write_cb, cbopaque, i); } void @@ -277,6 +368,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, bool unmerged = true; bool bins = true; bool large = true; + bool huge = true; /* * Refresh stats, in case mallctl() was called by the application. @@ -319,6 +411,9 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, case 'l': large = false; break; + case 'h': + huge = false; + break; default:; } } @@ -327,7 +422,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, malloc_cprintf(write_cb, cbopaque, "___ Begin jemalloc statistics ___\n"); if (general) { - int err; const char *cpv; bool bv; unsigned uv; @@ -346,26 +440,40 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, bv ? "enabled" : "disabled"); #define OPT_WRITE_BOOL(n) \ - if ((err = je_mallctl("opt."#n, &bv, &bsz, NULL, 0)) \ - == 0) { \ + if (je_mallctl("opt."#n, &bv, &bsz, NULL, 0) == 0) { \ malloc_cprintf(write_cb, cbopaque, \ " opt."#n": %s\n", bv ? "true" : "false"); \ } +#define OPT_WRITE_BOOL_MUTABLE(n, m) { \ + bool bv2; \ + if (je_mallctl("opt."#n, &bv, &bsz, NULL, 0) == 0 && \ + je_mallctl(#m, &bv2, &bsz, NULL, 0) == 0) { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %s ("#m": %s)\n", bv ? "true" \ + : "false", bv2 ? "true" : "false"); \ + } \ +} #define OPT_WRITE_SIZE_T(n) \ - if ((err = je_mallctl("opt."#n, &sv, &ssz, NULL, 0)) \ - == 0) { \ + if (je_mallctl("opt."#n, &sv, &ssz, NULL, 0) == 0) { \ malloc_cprintf(write_cb, cbopaque, \ " opt."#n": %zu\n", sv); \ } #define OPT_WRITE_SSIZE_T(n) \ - if ((err = je_mallctl("opt."#n, &ssv, &sssz, NULL, 0)) \ - == 0) { \ + if (je_mallctl("opt."#n, &ssv, &sssz, NULL, 0) == 0) { \ malloc_cprintf(write_cb, cbopaque, \ " opt."#n": %zd\n", ssv); \ } +#define OPT_WRITE_SSIZE_T_MUTABLE(n, m) { \ + ssize_t ssv2; \ + if (je_mallctl("opt."#n, &ssv, &sssz, NULL, 0) == 0 && \ + je_mallctl(#m, &ssv2, &sssz, NULL, 0) == 0) { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %zd ("#m": %zd)\n", \ + ssv, ssv2); \ + } \ +} #define OPT_WRITE_CHAR_P(n) \ - if ((err = je_mallctl("opt."#n, &cpv, &cpsz, NULL, 0)) \ - == 0) { \ + if (je_mallctl("opt."#n, &cpv, &cpsz, NULL, 0) == 0) { \ malloc_cprintf(write_cb, cbopaque, \ " opt."#n": \"%s\"\n", cpv); \ } @@ -376,9 +484,9 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, OPT_WRITE_SIZE_T(lg_chunk) OPT_WRITE_CHAR_P(dss) OPT_WRITE_SIZE_T(narenas) - OPT_WRITE_SSIZE_T(lg_dirty_mult) + OPT_WRITE_SSIZE_T_MUTABLE(lg_dirty_mult, arenas.lg_dirty_mult) OPT_WRITE_BOOL(stats_print) - OPT_WRITE_BOOL(junk) + OPT_WRITE_CHAR_P(junk) OPT_WRITE_SIZE_T(quarantine) OPT_WRITE_BOOL(redzone) OPT_WRITE_BOOL(zero) @@ -389,7 +497,9 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, OPT_WRITE_SSIZE_T(lg_tcache_max) OPT_WRITE_BOOL(prof) OPT_WRITE_CHAR_P(prof_prefix) - OPT_WRITE_BOOL(prof_active) + OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active) + OPT_WRITE_BOOL_MUTABLE(prof_thread_active_init, + prof.thread_active_init) OPT_WRITE_SSIZE_T(lg_prof_sample) OPT_WRITE_BOOL(prof_accum) OPT_WRITE_SSIZE_T(lg_prof_interval) @@ -398,6 +508,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, OPT_WRITE_BOOL(prof_leak) #undef OPT_WRITE_BOOL +#undef OPT_WRITE_BOOL_MUTABLE #undef OPT_WRITE_SIZE_T #undef OPT_WRITE_SSIZE_T #undef OPT_WRITE_CHAR_P @@ -411,12 +522,13 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, sizeof(void *)); CTL_GET("arenas.quantum", &sv, size_t); - malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n", sv); + malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n", + sv); CTL_GET("arenas.page", &sv, size_t); malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv); - CTL_GET("opt.lg_dirty_mult", &ssv, ssize_t); + CTL_GET("arenas.lg_dirty_mult", &ssv, ssize_t); if (ssv >= 0) { malloc_cprintf(write_cb, cbopaque, "Min active:dirty page ratio per arena: %u:1\n", @@ -425,22 +537,20 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, malloc_cprintf(write_cb, cbopaque, "Min active:dirty page ratio per arena: N/A\n"); } - if ((err = je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0)) - == 0) { + if (je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0) == 0) { malloc_cprintf(write_cb, cbopaque, "Maximum thread-cached size class: %zu\n", sv); } - if ((err = je_mallctl("opt.prof", &bv, &bsz, NULL, 0)) == 0 && - bv) { - CTL_GET("opt.lg_prof_sample", &sv, size_t); + if (je_mallctl("opt.prof", &bv, &bsz, NULL, 0) == 0 && bv) { + CTL_GET("prof.lg_sample", &sv, size_t); malloc_cprintf(write_cb, cbopaque, - "Average profile sample interval: %"PRIu64 + "Average profile sample interval: %"FMTu64 " (2^%zu)\n", (((uint64_t)1U) << sv), sv); CTL_GET("opt.lg_prof_interval", &ssv, ssize_t); if (ssv >= 0) { malloc_cprintf(write_cb, cbopaque, - "Average profile dump interval: %"PRIu64 + "Average profile dump interval: %"FMTu64 " (2^%zd)\n", (((uint64_t)1U) << ssv), ssv); } else { @@ -449,47 +559,27 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, } } CTL_GET("opt.lg_chunk", &sv, size_t); - malloc_cprintf(write_cb, cbopaque, "Chunk size: %zu (2^%zu)\n", - (ZU(1) << sv), sv); + malloc_cprintf(write_cb, cbopaque, + "Chunk size: %zu (2^%zu)\n", (ZU(1) << sv), sv); } if (config_stats) { size_t *cactive; - size_t allocated, active, mapped; - size_t chunks_current, chunks_high; - uint64_t chunks_total; - size_t huge_allocated; - uint64_t huge_nmalloc, huge_ndalloc; + size_t allocated, active, metadata, resident, mapped; CTL_GET("stats.cactive", &cactive, size_t *); CTL_GET("stats.allocated", &allocated, size_t); CTL_GET("stats.active", &active, size_t); + CTL_GET("stats.metadata", &metadata, size_t); + CTL_GET("stats.resident", &resident, size_t); CTL_GET("stats.mapped", &mapped, size_t); malloc_cprintf(write_cb, cbopaque, - "Allocated: %zu, active: %zu, mapped: %zu\n", - allocated, active, mapped); - malloc_cprintf(write_cb, cbopaque, - "Current active ceiling: %zu\n", atomic_read_z(cactive)); - - /* Print chunk stats. */ - CTL_GET("stats.chunks.total", &chunks_total, uint64_t); - CTL_GET("stats.chunks.high", &chunks_high, size_t); - CTL_GET("stats.chunks.current", &chunks_current, size_t); - malloc_cprintf(write_cb, cbopaque, "chunks: nchunks " - "highchunks curchunks\n"); - malloc_cprintf(write_cb, cbopaque, - " %13"PRIu64" %12zu %12zu\n", - chunks_total, chunks_high, chunks_current); - - /* Print huge stats. */ - CTL_GET("stats.huge.nmalloc", &huge_nmalloc, uint64_t); - CTL_GET("stats.huge.ndalloc", &huge_ndalloc, uint64_t); - CTL_GET("stats.huge.allocated", &huge_allocated, size_t); - malloc_cprintf(write_cb, cbopaque, - "huge: nmalloc ndalloc allocated\n"); + "Allocated: %zu, active: %zu, metadata: %zu," + " resident: %zu, mapped: %zu\n", + allocated, active, metadata, resident, mapped); malloc_cprintf(write_cb, cbopaque, - " %12"PRIu64" %12"PRIu64" %12zu\n", - huge_nmalloc, huge_ndalloc, huge_allocated); + "Current active ceiling: %zu\n", + atomic_read_z(cactive)); if (merged) { unsigned narenas; @@ -508,12 +598,12 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, ninitialized++; } - if (ninitialized > 1 || unmerged == false) { + if (ninitialized > 1 || !unmerged) { /* Print merged arena stats. */ malloc_cprintf(write_cb, cbopaque, "\nMerged arenas stats:\n"); stats_arena_print(write_cb, cbopaque, - narenas, bins, large); + narenas, bins, large, huge); } } } @@ -539,7 +629,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, cbopaque, "\narenas[%u]:\n", i); stats_arena_print(write_cb, - cbopaque, i, bins, large); + cbopaque, i, bins, large, + huge); } } } diff --git a/dep/jemalloc/src/tcache.c b/dep/jemalloc/src/tcache.c index 6de92960b2d..fdafd0c620a 100644 --- a/dep/jemalloc/src/tcache.c +++ b/dep/jemalloc/src/tcache.c @@ -4,9 +4,6 @@ /******************************************************************************/ /* Data. */ -malloc_tsd_data(, tcache, tcache_t *, NULL) -malloc_tsd_data(, tcache_enabled, tcache_enabled_t, tcache_enabled_default) - bool opt_tcache = true; ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; @@ -16,6 +13,14 @@ static unsigned stack_nelms; /* Total stack elms per tcache. */ size_t nhbins; size_t tcache_maxclass; +tcaches_t *tcaches; + +/* Index of first element within tcaches that has never been used. */ +static unsigned tcaches_past; + +/* Head of singly linked list tracking available tcaches elements. */ +static tcaches_t *tcaches_avail; + /******************************************************************************/ size_t tcache_salloc(const void *ptr) @@ -25,9 +30,9 @@ size_t tcache_salloc(const void *ptr) } void -tcache_event_hard(tcache_t *tcache) +tcache_event_hard(tsd_t *tsd, tcache_t *tcache) { - size_t binind = tcache->next_gc_bin; + szind_t binind = tcache->next_gc_bin; tcache_bin_t *tbin = &tcache->tbins[binind]; tcache_bin_info_t *tbin_info = &tcache_bin_info[binind]; @@ -36,11 +41,12 @@ tcache_event_hard(tcache_t *tcache) * Flush (ceiling) 3/4 of the objects below the low water mark. */ if (binind < NBINS) { - tcache_bin_flush_small(tbin, binind, tbin->ncached - - tbin->low_water + (tbin->low_water >> 2), tcache); + tcache_bin_flush_small(tsd, tcache, tbin, binind, + tbin->ncached - tbin->low_water + (tbin->low_water + >> 2)); } else { - tcache_bin_flush_large(tbin, binind, tbin->ncached - - tbin->low_water + (tbin->low_water >> 2), tcache); + tcache_bin_flush_large(tsd, tbin, binind, tbin->ncached + - tbin->low_water + (tbin->low_water >> 2), tcache); } /* * Reduce fill count by 2X. Limit lg_fill_div such that the @@ -65,12 +71,13 @@ tcache_event_hard(tcache_t *tcache) } void * -tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind) +tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache, + tcache_bin_t *tbin, szind_t binind) { void *ret; - arena_tcache_fill_small(tcache->arena, tbin, binind, - config_prof ? tcache->prof_accumbytes : 0); + arena_tcache_fill_small(arena, tbin, binind, config_prof ? + tcache->prof_accumbytes : 0); if (config_prof) tcache->prof_accumbytes = 0; ret = tcache_alloc_easy(tbin); @@ -79,9 +86,10 @@ tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind) } void -tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, - tcache_t *tcache) +tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, + szind_t binind, unsigned rem) { + arena_t *arena; void *ptr; unsigned i, nflush, ndeferred; bool merged_stats = false; @@ -89,22 +97,24 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, assert(binind < NBINS); assert(rem <= tbin->ncached); + arena = arena_choose(tsd, NULL); + assert(arena != NULL); for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { /* Lock the arena bin associated with the first object. */ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( tbin->avail[0]); - arena_t *arena = chunk->arena; - arena_bin_t *bin = &arena->bins[binind]; + arena_t *bin_arena = extent_node_arena_get(&chunk->node); + arena_bin_t *bin = &bin_arena->bins[binind]; - if (config_prof && arena == tcache->arena) { + if (config_prof && bin_arena == arena) { if (arena_prof_accum(arena, tcache->prof_accumbytes)) prof_idump(); tcache->prof_accumbytes = 0; } malloc_mutex_lock(&bin->lock); - if (config_stats && arena == tcache->arena) { - assert(merged_stats == false); + if (config_stats && bin_arena == arena) { + assert(!merged_stats); merged_stats = true; bin->stats.nflushes++; bin->stats.nrequests += tbin->tstats.nrequests; @@ -115,17 +125,13 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, ptr = tbin->avail[i]; assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk->arena == arena) { + if (extent_node_arena_get(&chunk->node) == bin_arena) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - arena_chunk_map_t *mapelm = - arena_mapp_get(chunk, pageind); - if (config_fill && opt_junk) { - arena_alloc_junk_small(ptr, - &arena_bin_info[binind], true); - } - arena_dalloc_bin_locked(arena, chunk, ptr, - mapelm); + arena_chunk_map_bits_t *bitselm = + arena_bitselm_get(chunk, pageind); + arena_dalloc_bin_junked_locked(bin_arena, chunk, + ptr, bitselm); } else { /* * This object was allocated via a different @@ -139,12 +145,12 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, } malloc_mutex_unlock(&bin->lock); } - if (config_stats && merged_stats == false) { + if (config_stats && !merged_stats) { /* * The flush loop didn't happen to flush to this thread's * arena, so the stats didn't get merged. Manually do so now. */ - arena_bin_t *bin = &tcache->arena->bins[binind]; + arena_bin_t *bin = &arena->bins[binind]; malloc_mutex_lock(&bin->lock); bin->stats.nflushes++; bin->stats.nrequests += tbin->tstats.nrequests; @@ -160,9 +166,10 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, } void -tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, - tcache_t *tcache) +tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, + unsigned rem, tcache_t *tcache) { + arena_t *arena; void *ptr; unsigned i, nflush, ndeferred; bool merged_stats = false; @@ -170,17 +177,19 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, assert(binind < nhbins); assert(rem <= tbin->ncached); + arena = arena_choose(tsd, NULL); + assert(arena != NULL); for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { /* Lock the arena associated with the first object. */ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( tbin->avail[0]); - arena_t *arena = chunk->arena; + arena_t *locked_arena = extent_node_arena_get(&chunk->node); UNUSED bool idump; if (config_prof) idump = false; - malloc_mutex_lock(&arena->lock); - if ((config_prof || config_stats) && arena == tcache->arena) { + malloc_mutex_lock(&locked_arena->lock); + if ((config_prof || config_stats) && locked_arena == arena) { if (config_prof) { idump = arena_prof_accum_locked(arena, tcache->prof_accumbytes); @@ -200,9 +209,11 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, ptr = tbin->avail[i]; assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk->arena == arena) - arena_dalloc_large_locked(arena, chunk, ptr); - else { + if (extent_node_arena_get(&chunk->node) == + locked_arena) { + arena_dalloc_large_junked_locked(locked_arena, + chunk, ptr); + } else { /* * This object was allocated via a different * arena than the one that is currently locked. @@ -213,16 +224,15 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, ndeferred++; } } - malloc_mutex_unlock(&arena->lock); + malloc_mutex_unlock(&locked_arena->lock); if (config_prof && idump) prof_idump(); } - if (config_stats && merged_stats == false) { + if (config_stats && !merged_stats) { /* * The flush loop didn't happen to flush to this thread's * arena, so the stats didn't get merged. Manually do so now. */ - arena_t *arena = tcache->arena; malloc_mutex_lock(&arena->lock); arena->stats.nrequests_large += tbin->tstats.nrequests; arena->stats.lstats[binind - NBINS].nrequests += @@ -249,24 +259,58 @@ tcache_arena_associate(tcache_t *tcache, arena_t *arena) ql_tail_insert(&arena->tcache_ql, tcache, link); malloc_mutex_unlock(&arena->lock); } - tcache->arena = arena; } void -tcache_arena_dissociate(tcache_t *tcache) +tcache_arena_reassociate(tcache_t *tcache, arena_t *oldarena, arena_t *newarena) +{ + + tcache_arena_dissociate(tcache, oldarena); + tcache_arena_associate(tcache, newarena); +} + +void +tcache_arena_dissociate(tcache_t *tcache, arena_t *arena) { if (config_stats) { /* Unlink from list of extant tcaches. */ - malloc_mutex_lock(&tcache->arena->lock); - ql_remove(&tcache->arena->tcache_ql, tcache, link); - tcache_stats_merge(tcache, tcache->arena); - malloc_mutex_unlock(&tcache->arena->lock); + malloc_mutex_lock(&arena->lock); + if (config_debug) { + bool in_ql = false; + tcache_t *iter; + ql_foreach(iter, &arena->tcache_ql, link) { + if (iter == tcache) { + in_ql = true; + break; + } + } + assert(in_ql); + } + ql_remove(&arena->tcache_ql, tcache, link); + tcache_stats_merge(tcache, arena); + malloc_mutex_unlock(&arena->lock); } } tcache_t * -tcache_create(arena_t *arena) +tcache_get_hard(tsd_t *tsd) +{ + arena_t *arena; + + if (!tcache_enabled_get()) { + if (tsd_nominal(tsd)) + tcache_enabled_set(false); /* Memoize. */ + return (NULL); + } + arena = arena_choose(tsd, NULL); + if (unlikely(arena == NULL)) + return (NULL); + return (tcache_create(tsd, arena)); +} + +tcache_t * +tcache_create(tsd_t *tsd, arena_t *arena) { tcache_t *tcache; size_t size, stack_offset; @@ -277,23 +321,10 @@ tcache_create(arena_t *arena) size = PTR_CEILING(size); stack_offset = size; size += stack_nelms * sizeof(void *); - /* - * Round up to the nearest multiple of the cacheline size, in order to - * avoid the possibility of false cacheline sharing. - * - * That this works relies on the same logic as in ipalloc(), but we - * cannot directly call ipalloc() here due to tcache bootstrapping - * issues. - */ - size = (size + CACHELINE_MASK) & (-CACHELINE); - - if (size <= SMALL_MAXCLASS) - tcache = (tcache_t *)arena_malloc_small(arena, size, true); - else if (size <= tcache_maxclass) - tcache = (tcache_t *)arena_malloc_large(arena, size, true); - else - tcache = (tcache_t *)icalloct(size, false, arena); + /* Avoid false cacheline sharing. */ + size = sa2u(size, CACHELINE); + tcache = ipallocztm(tsd, size, CACHELINE, true, false, true, a0get()); if (tcache == NULL) return (NULL); @@ -307,25 +338,23 @@ tcache_create(arena_t *arena) stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *); } - tcache_tsd_set(&tcache); - return (tcache); } -void -tcache_destroy(tcache_t *tcache) +static void +tcache_destroy(tsd_t *tsd, tcache_t *tcache) { + arena_t *arena; unsigned i; - size_t tcache_size; - tcache_arena_dissociate(tcache); + arena = arena_choose(tsd, NULL); + tcache_arena_dissociate(tcache, arena); for (i = 0; i < NBINS; i++) { tcache_bin_t *tbin = &tcache->tbins[i]; - tcache_bin_flush_small(tbin, i, 0, tcache); + tcache_bin_flush_small(tsd, tcache, tbin, i, 0); if (config_stats && tbin->tstats.nrequests != 0) { - arena_t *arena = tcache->arena; arena_bin_t *bin = &arena->bins[i]; malloc_mutex_lock(&bin->lock); bin->stats.nrequests += tbin->tstats.nrequests; @@ -335,10 +364,9 @@ tcache_destroy(tcache_t *tcache) for (; i < nhbins; i++) { tcache_bin_t *tbin = &tcache->tbins[i]; - tcache_bin_flush_large(tbin, i, 0, tcache); + tcache_bin_flush_large(tsd, tbin, i, 0, tcache); if (config_stats && tbin->tstats.nrequests != 0) { - arena_t *arena = tcache->arena; malloc_mutex_lock(&arena->lock); arena->stats.nrequests_large += tbin->tstats.nrequests; arena->stats.lstats[i - NBINS].nrequests += @@ -348,57 +376,33 @@ tcache_destroy(tcache_t *tcache) } if (config_prof && tcache->prof_accumbytes > 0 && - arena_prof_accum(tcache->arena, tcache->prof_accumbytes)) + arena_prof_accum(arena, tcache->prof_accumbytes)) prof_idump(); - tcache_size = arena_salloc(tcache, false); - if (tcache_size <= SMALL_MAXCLASS) { - arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); - arena_t *arena = chunk->arena; - size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >> - LG_PAGE; - arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind); - - arena_dalloc_bin(arena, chunk, tcache, pageind, mapelm); - } else if (tcache_size <= tcache_maxclass) { - arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); - arena_t *arena = chunk->arena; - - arena_dalloc_large(arena, chunk, tcache); - } else - idalloct(tcache, false); + idalloctm(tsd, tcache, false, true); } void -tcache_thread_cleanup(void *arg) +tcache_cleanup(tsd_t *tsd) { - tcache_t *tcache = *(tcache_t **)arg; + tcache_t *tcache; - if (tcache == TCACHE_STATE_DISABLED) { - /* Do nothing. */ - } else if (tcache == TCACHE_STATE_REINCARNATED) { - /* - * Another destructor called an allocator function after this - * destructor was called. Reset tcache to - * TCACHE_STATE_PURGATORY in order to receive another callback. - */ - tcache = TCACHE_STATE_PURGATORY; - tcache_tsd_set(&tcache); - } else if (tcache == TCACHE_STATE_PURGATORY) { - /* - * The previous time this destructor was called, we set the key - * to TCACHE_STATE_PURGATORY so that other destructors wouldn't - * cause re-creation of the tcache. This time, do nothing, so - * that the destructor will not be called again. - */ - } else if (tcache != NULL) { - assert(tcache != TCACHE_STATE_PURGATORY); - tcache_destroy(tcache); - tcache = TCACHE_STATE_PURGATORY; - tcache_tsd_set(&tcache); + if (!config_tcache) + return; + + if ((tcache = tsd_tcache_get(tsd)) != NULL) { + tcache_destroy(tsd, tcache); + tsd_tcache_set(tsd, NULL); } } +void +tcache_enabled_cleanup(tsd_t *tsd) +{ + + /* Do nothing. */ +} + /* Caller must own arena->lock. */ void tcache_stats_merge(tcache_t *tcache, arena_t *arena) @@ -427,22 +431,82 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena) } bool -tcache_boot0(void) +tcaches_create(tsd_t *tsd, unsigned *r_ind) +{ + tcache_t *tcache; + tcaches_t *elm; + + if (tcaches == NULL) { + tcaches = base_alloc(sizeof(tcache_t *) * + (MALLOCX_TCACHE_MAX+1)); + if (tcaches == NULL) + return (true); + } + + if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX) + return (true); + tcache = tcache_create(tsd, a0get()); + if (tcache == NULL) + return (true); + + if (tcaches_avail != NULL) { + elm = tcaches_avail; + tcaches_avail = tcaches_avail->next; + elm->tcache = tcache; + *r_ind = elm - tcaches; + } else { + elm = &tcaches[tcaches_past]; + elm->tcache = tcache; + *r_ind = tcaches_past; + tcaches_past++; + } + + return (false); +} + +static void +tcaches_elm_flush(tsd_t *tsd, tcaches_t *elm) +{ + + if (elm->tcache == NULL) + return; + tcache_destroy(tsd, elm->tcache); + elm->tcache = NULL; +} + +void +tcaches_flush(tsd_t *tsd, unsigned ind) +{ + + tcaches_elm_flush(tsd, &tcaches[ind]); +} + +void +tcaches_destroy(tsd_t *tsd, unsigned ind) +{ + tcaches_t *elm = &tcaches[ind]; + tcaches_elm_flush(tsd, elm); + elm->next = tcaches_avail; + tcaches_avail = elm; +} + +bool +tcache_boot(void) { unsigned i; /* - * If necessary, clamp opt_lg_tcache_max, now that arena_maxclass is + * If necessary, clamp opt_lg_tcache_max, now that large_maxclass is * known. */ if (opt_lg_tcache_max < 0 || (1U << opt_lg_tcache_max) < SMALL_MAXCLASS) tcache_maxclass = SMALL_MAXCLASS; - else if ((1U << opt_lg_tcache_max) > arena_maxclass) - tcache_maxclass = arena_maxclass; + else if ((1U << opt_lg_tcache_max) > large_maxclass) + tcache_maxclass = large_maxclass; else tcache_maxclass = (1U << opt_lg_tcache_max); - nhbins = NBINS + (tcache_maxclass >> LG_PAGE); + nhbins = size2index(tcache_maxclass) + 1; /* Initialize tcache_bin_info. */ tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins * @@ -451,7 +515,11 @@ tcache_boot0(void) return (true); stack_nelms = 0; for (i = 0; i < NBINS; i++) { - if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) { + if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) { + tcache_bin_info[i].ncached_max = + TCACHE_NSLOTS_SMALL_MIN; + } else if ((arena_bin_info[i].nregs << 1) <= + TCACHE_NSLOTS_SMALL_MAX) { tcache_bin_info[i].ncached_max = (arena_bin_info[i].nregs << 1); } else { @@ -467,13 +535,3 @@ tcache_boot0(void) return (false); } - -bool -tcache_boot1(void) -{ - - if (tcache_tsd_boot() || tcache_enabled_tsd_boot()) - return (true); - - return (false); -} diff --git a/dep/jemalloc/src/tsd.c b/dep/jemalloc/src/tsd.c index 700caabfe47..9ffe9afef7a 100644 --- a/dep/jemalloc/src/tsd.c +++ b/dep/jemalloc/src/tsd.c @@ -7,21 +7,22 @@ static unsigned ncleanups; static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX]; +malloc_tsd_data(, , tsd_t, TSD_INITIALIZER) + /******************************************************************************/ void * malloc_tsd_malloc(size_t size) { - /* Avoid choose_arena() in order to dodge bootstrapping issues. */ - return (arena_malloc(arenas[0], size, false, false)); + return (a0malloc(CACHELINE_CEILING(size))); } void malloc_tsd_dalloc(void *wrapper) { - idalloct(wrapper, false); + a0dalloc(wrapper); } void @@ -67,10 +68,61 @@ malloc_tsd_cleanup_register(bool (*f)(void)) } void -malloc_tsd_boot(void) +tsd_cleanup(void *arg) +{ + tsd_t *tsd = (tsd_t *)arg; + + switch (tsd->state) { + case tsd_state_uninitialized: + /* Do nothing. */ + break; + case tsd_state_nominal: +#define O(n, t) \ + n##_cleanup(tsd); +MALLOC_TSD +#undef O + tsd->state = tsd_state_purgatory; + tsd_set(tsd); + break; + case tsd_state_purgatory: + /* + * The previous time this destructor was called, we set the + * state to tsd_state_purgatory so that other destructors + * wouldn't cause re-creation of the tsd. This time, do + * nothing, and do not request another callback. + */ + break; + case tsd_state_reincarnated: + /* + * Another destructor deallocated memory after this destructor + * was called. Reset state to tsd_state_purgatory and request + * another callback. + */ + tsd->state = tsd_state_purgatory; + tsd_set(tsd); + break; + default: + not_reached(); + } +} + +bool +malloc_tsd_boot0(void) { ncleanups = 0; + if (tsd_boot0()) + return (true); + *tsd_arenas_cache_bypassp_get(tsd_fetch()) = true; + return (false); +} + +void +malloc_tsd_boot1(void) +{ + + tsd_boot1(); + *tsd_arenas_cache_bypassp_get(tsd_fetch()) = false; } #ifdef _WIN32 @@ -102,7 +154,7 @@ _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) # pragma section(".CRT$XLY",long,read) #endif JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used) -static const BOOL (WINAPI *tls_callback)(HINSTANCE hinstDLL, +static BOOL (WINAPI *const tls_callback)(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) = _tls_callback; #endif diff --git a/dep/jemalloc/src/util.c b/dep/jemalloc/src/util.c index 93a19fd16f7..4cb0d6c1e68 100644 --- a/dep/jemalloc/src/util.c +++ b/dep/jemalloc/src/util.c @@ -81,10 +81,10 @@ buferror(int err, char *buf, size_t buflen) { #ifdef _WIN32 - FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, GetLastError(), 0, + FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, err, 0, (LPSTR)buf, buflen, NULL); return (0); -#elif defined(_GNU_SOURCE) +#elif defined(__GLIBC__) && defined(_GNU_SOURCE) char *b = strerror_r(err, buf, buflen); if (b != buf) { strncpy(buf, b, buflen); @@ -100,7 +100,7 @@ uintmax_t malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base) { uintmax_t ret, digit; - int b; + unsigned b; bool neg; const char *p, *ns; @@ -266,7 +266,7 @@ d2s(intmax_t x, char sign, char *s, size_t *slen_p) sign = '-'; switch (sign) { case '-': - if (neg == false) + if (!neg) break; /* Fall through. */ case ' ': @@ -329,7 +329,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) /* Left padding. */ \ size_t pad_len = (width == -1) ? 0 : ((slen < (size_t)width) ? \ (size_t)width - slen : 0); \ - if (left_justify == false && pad_len != 0) { \ + if (!left_justify && pad_len != 0) { \ size_t j; \ for (j = 0; j < pad_len; j++) \ APPEND_C(' '); \ @@ -381,7 +381,9 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) case 'p': /* Synthetic; used for %p. */ \ val = va_arg(ap, uintptr_t); \ break; \ - default: not_reached(); \ + default: \ + not_reached(); \ + val = 0; \ } \ } while (0) @@ -404,19 +406,19 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) while (true) { switch (*f) { case '#': - assert(alt_form == false); + assert(!alt_form); alt_form = true; break; case '-': - assert(left_justify == false); + assert(!left_justify); left_justify = true; break; case ' ': - assert(plus_space == false); + assert(!plus_space); plus_space = true; break; case '+': - assert(plus_plus == false); + assert(!plus_plus); plus_plus = true; break; default: goto label_width; @@ -548,7 +550,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) assert(len == '?' || len == 'l'); assert_not_implemented(len != 'l'); s = va_arg(ap, char *); - slen = (prec < 0) ? strlen(s) : prec; + slen = (prec < 0) ? strlen(s) : (size_t)prec; APPEND_PADDED_S(s, slen, width, left_justify); f++; break; @@ -584,7 +586,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) return (ret); } -JEMALLOC_ATTR(format(printf, 3, 4)) +JEMALLOC_FORMAT_PRINTF(3, 4) int malloc_snprintf(char *str, size_t size, const char *format, ...) { @@ -623,7 +625,7 @@ malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, * Print to a callback function in such a way as to (hopefully) avoid memory * allocation. */ -JEMALLOC_ATTR(format(printf, 3, 4)) +JEMALLOC_FORMAT_PRINTF(3, 4) void malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque, const char *format, ...) @@ -636,7 +638,7 @@ malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque, } /* Print to stderr in such a way as to avoid memory allocation. */ -JEMALLOC_ATTR(format(printf, 1, 2)) +JEMALLOC_FORMAT_PRINTF(1, 2) void malloc_printf(const char *format, ...) { diff --git a/dep/jemalloc/src/valgrind.c b/dep/jemalloc/src/valgrind.c new file mode 100644 index 00000000000..8e7ef3a2e63 --- /dev/null +++ b/dep/jemalloc/src/valgrind.c @@ -0,0 +1,34 @@ +#include "jemalloc/internal/jemalloc_internal.h" +#ifndef JEMALLOC_VALGRIND +# error "This source file is for Valgrind integration." +#endif + +#include <valgrind/memcheck.h> + +void +valgrind_make_mem_noaccess(void *ptr, size_t usize) +{ + + VALGRIND_MAKE_MEM_NOACCESS(ptr, usize); +} + +void +valgrind_make_mem_undefined(void *ptr, size_t usize) +{ + + VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize); +} + +void +valgrind_make_mem_defined(void *ptr, size_t usize) +{ + + VALGRIND_MAKE_MEM_DEFINED(ptr, usize); +} + +void +valgrind_freelike_block(void *ptr, size_t usize) +{ + + VALGRIND_FREELIKE_BLOCK(ptr, usize); +} diff --git a/dep/jemalloc/src/zone.c b/dep/jemalloc/src/zone.c index e0302ef4edc..12e1734a9eb 100644 --- a/dep/jemalloc/src/zone.c +++ b/dep/jemalloc/src/zone.c @@ -176,6 +176,7 @@ register_zone(void) * register jemalloc's. */ malloc_zone_t *default_zone = malloc_default_zone(); + malloc_zone_t *purgeable_zone = NULL; if (!default_zone->zone_name || strcmp(default_zone->zone_name, "DefaultMallocZone") != 0) { return; @@ -237,22 +238,37 @@ register_zone(void) * run time. */ if (malloc_default_purgeable_zone != NULL) - malloc_default_purgeable_zone(); + purgeable_zone = malloc_default_purgeable_zone(); /* Register the custom zone. At this point it won't be the default. */ malloc_zone_register(&zone); - /* - * Unregister and reregister the default zone. On OSX >= 10.6, - * unregistering takes the last registered zone and places it at the - * location of the specified zone. Unregistering the default zone thus - * makes the last registered one the default. On OSX < 10.6, - * unregistering shifts all registered zones. The first registered zone - * then becomes the default. - */ do { default_zone = malloc_default_zone(); + /* + * Unregister and reregister the default zone. On OSX >= 10.6, + * unregistering takes the last registered zone and places it + * at the location of the specified zone. Unregistering the + * default zone thus makes the last registered one the default. + * On OSX < 10.6, unregistering shifts all registered zones. + * The first registered zone then becomes the default. + */ malloc_zone_unregister(default_zone); malloc_zone_register(default_zone); + /* + * On OSX 10.6, having the default purgeable zone appear before + * the default zone makes some things crash because it thinks it + * owns the default zone allocated pointers. We thus + * unregister/re-register it in order to ensure it's always + * after the default zone. On OSX < 10.6, there is no purgeable + * zone, so this does nothing. On OSX >= 10.6, unregistering + * replaces the purgeable zone with the last registered zone + * above, i.e. the default zone. Registering it again then puts + * it at the end, obviously after the default zone. + */ + if (purgeable_zone) { + malloc_zone_unregister(purgeable_zone); + malloc_zone_register(purgeable_zone); + } } while (malloc_default_zone() != &zone); } |