diff options
-rw-r--r-- | dep/PackageList.txt | 2 | ||||
-rw-r--r-- | dep/jemalloc/CMakeLists.txt | 4 | ||||
-rw-r--r-- | dep/jemalloc/COPYING | 51 | ||||
-rw-r--r-- | dep/jemalloc/README | 16 | ||||
-rw-r--r-- | dep/jemalloc/TC_NOTE.txt | 9 | ||||
-rw-r--r-- | dep/jemalloc/VERSION | 1 | ||||
-rw-r--r-- | dep/jemalloc/include/jemalloc/internal/arena.h (renamed from dep/jemalloc/jemalloc/internal/arena.h) | 194 | ||||
-rw-r--r-- | dep/jemalloc/include/jemalloc/internal/base.h (renamed from dep/jemalloc/jemalloc/internal/base.h) | 0 | ||||
-rw-r--r-- | dep/jemalloc/include/jemalloc/internal/chunk.h (renamed from dep/jemalloc/jemalloc/internal/chunk.h) | 8 | ||||
-rw-r--r-- | dep/jemalloc/include/jemalloc/internal/chunk_dss.h (renamed from dep/jemalloc/jemalloc/internal/chunk_dss.h) | 1 | ||||
-rw-r--r-- | dep/jemalloc/include/jemalloc/internal/chunk_mmap.h (renamed from dep/jemalloc/jemalloc/internal/chunk_mmap.h) | 3 | ||||
-rw-r--r-- | dep/jemalloc/include/jemalloc/internal/chunk_swap.h (renamed from dep/jemalloc/jemalloc/internal/chunk_swap.h) | 1 | ||||
-rw-r--r-- | dep/jemalloc/include/jemalloc/internal/ckh.h (renamed from dep/jemalloc/jemalloc/internal/ckh.h) | 2 | ||||
-rw-r--r-- | dep/jemalloc/include/jemalloc/internal/ctl.h (renamed from dep/jemalloc/jemalloc/internal/ctl.h) | 10 | ||||
-rw-r--r-- | dep/jemalloc/include/jemalloc/internal/extent.h (renamed from dep/jemalloc/jemalloc/internal/extent.h) | 2 | ||||
-rw-r--r-- | dep/jemalloc/include/jemalloc/internal/hash.h (renamed from dep/jemalloc/jemalloc/internal/hash.h) | 0 | ||||
-rw-r--r-- | dep/jemalloc/include/jemalloc/internal/huge.h (renamed from dep/jemalloc/jemalloc/internal/huge.h) | 13 | ||||
-rw-r--r-- | dep/jemalloc/include/jemalloc/internal/jemalloc_internal.h (renamed from dep/jemalloc/jemalloc/internal/jemalloc_internal.h) | 488 | ||||
-rw-r--r-- | dep/jemalloc/include/jemalloc/internal/jemprn.h (renamed from dep/jemalloc/jemalloc/internal/totally_not_p_r_n.h) | 0 | ||||
-rw-r--r-- | dep/jemalloc/include/jemalloc/internal/mb.h (renamed from dep/jemalloc/jemalloc/internal/mb.h) | 0 | ||||
-rw-r--r-- | dep/jemalloc/include/jemalloc/internal/mutex.h (renamed from dep/jemalloc/jemalloc/internal/mutex.h) | 7 | ||||
-rw-r--r-- | dep/jemalloc/include/jemalloc/internal/prof.h | 547 | ||||
-rw-r--r-- | dep/jemalloc/include/jemalloc/internal/ql.h (renamed from dep/jemalloc/jemalloc/internal/ql.h) | 0 | ||||
-rw-r--r-- | dep/jemalloc/include/jemalloc/internal/qr.h (renamed from dep/jemalloc/jemalloc/internal/qr.h) | 0 | ||||
-rw-r--r-- | dep/jemalloc/include/jemalloc/internal/rb.h (renamed from dep/jemalloc/jemalloc/internal/rb.h) | 0 | ||||
-rw-r--r-- | dep/jemalloc/include/jemalloc/internal/rtree.h | 161 | ||||
-rw-r--r-- | dep/jemalloc/include/jemalloc/internal/stats.h (renamed from dep/jemalloc/jemalloc/internal/stats.h) | 2 | ||||
-rw-r--r-- | dep/jemalloc/include/jemalloc/internal/tcache.h (renamed from dep/jemalloc/jemalloc/internal/tcache.h) | 48 | ||||
-rw-r--r-- | dep/jemalloc/include/jemalloc/jemalloc.h (renamed from dep/jemalloc/jemalloc/jemalloc.h) | 34 | ||||
-rw-r--r-- | dep/jemalloc/include/jemalloc/jemalloc_defs.h (renamed from dep/jemalloc/jemalloc/jemalloc_defs.h) | 51 | ||||
-rw-r--r-- | dep/jemalloc/jemalloc.c | 1349 | ||||
-rw-r--r-- | dep/jemalloc/jemalloc/internal/jemalloc_internal.h.in | 561 | ||||
-rw-r--r-- | dep/jemalloc/jemalloc/internal/prof.h | 171 | ||||
-rw-r--r-- | dep/jemalloc/jemalloc/jemalloc.h.in | 42 | ||||
-rw-r--r-- | dep/jemalloc/jemalloc/jemalloc_defs.h.in | 101 | ||||
-rw-r--r-- | dep/jemalloc/src/arena.c (renamed from dep/jemalloc/arena.c) | 1093 | ||||
-rw-r--r-- | dep/jemalloc/src/base.c (renamed from dep/jemalloc/base.c) | 2 | ||||
-rw-r--r-- | dep/jemalloc/src/chunk.c (renamed from dep/jemalloc/chunk.c) | 39 | ||||
-rw-r--r-- | dep/jemalloc/src/chunk_dss.c (renamed from dep/jemalloc/chunk_dss.c) | 16 | ||||
-rw-r--r-- | dep/jemalloc/src/chunk_mmap.c (renamed from dep/jemalloc/chunk_mmap.c) | 96 | ||||
-rw-r--r-- | dep/jemalloc/src/chunk_swap.c (renamed from dep/jemalloc/chunk_swap.c) | 33 | ||||
-rw-r--r-- | dep/jemalloc/src/ckh.c (renamed from dep/jemalloc/ckh.c) | 28 | ||||
-rw-r--r-- | dep/jemalloc/src/ctl.c (renamed from dep/jemalloc/ctl.c) | 355 | ||||
-rw-r--r-- | dep/jemalloc/src/extent.c (renamed from dep/jemalloc/extent.c) | 0 | ||||
-rw-r--r-- | dep/jemalloc/src/hash.c (renamed from dep/jemalloc/hash.c) | 0 | ||||
-rw-r--r-- | dep/jemalloc/src/huge.c (renamed from dep/jemalloc/huge.c) | 154 | ||||
-rw-r--r-- | dep/jemalloc/src/jemalloc.c | 1759 | ||||
-rw-r--r-- | dep/jemalloc/src/mb.c (renamed from dep/jemalloc/mb.c) | 0 | ||||
-rw-r--r-- | dep/jemalloc/src/mutex.c (renamed from dep/jemalloc/mutex.c) | 14 | ||||
-rw-r--r-- | dep/jemalloc/src/prof.c (renamed from dep/jemalloc/prof.c) | 789 | ||||
-rw-r--r-- | dep/jemalloc/src/rtree.c | 43 | ||||
-rw-r--r-- | dep/jemalloc/src/stats.c (renamed from dep/jemalloc/stats.c) | 204 | ||||
-rw-r--r-- | dep/jemalloc/src/tcache.c (renamed from dep/jemalloc/tcache.c) | 82 |
53 files changed, 4914 insertions, 3672 deletions
diff --git a/dep/PackageList.txt b/dep/PackageList.txt index 22fdc0d555a..c71d23764a3 100644 --- a/dep/PackageList.txt +++ b/dep/PackageList.txt @@ -14,7 +14,7 @@ G3D (a commercial-grade C++ 3D engine available as Open Source (BSD License) jemalloc (a general-purpose scalable concurrent malloc-implementation) http://www.canonware.com/jemalloc/ - Version: 1.0.0 + Version: 2.1.0 libMPQ (a library for reading MPQ files) https://libmpq.org/ diff --git a/dep/jemalloc/CMakeLists.txt b/dep/jemalloc/CMakeLists.txt index d560c6ba675..0a802cd8f5b 100644 --- a/dep/jemalloc/CMakeLists.txt +++ b/dep/jemalloc/CMakeLists.txt @@ -8,16 +8,14 @@ # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -file(GLOB sources *.c) +file(GLOB sources src/*.c) set(jemalloc_STAT_SRC ${sources} ) include_directories( - ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/include - ${CMAKE_CURRENT_SOURCE_DIR}/include/internal ) add_definitions(-D_GNU_SOURCE -D_REENTRANT) diff --git a/dep/jemalloc/COPYING b/dep/jemalloc/COPYING new file mode 100644 index 00000000000..10ade120049 --- /dev/null +++ b/dep/jemalloc/COPYING @@ -0,0 +1,51 @@ +Unless otherwise specified, files in the jemalloc source distribution are +subject to the following licenses: +-------------------------------------------------------------------------------- +Copyright (C) 2002-2010 Jason Evans <jasone@canonware.com>. +All rights reserved. +Copyright (C) 2007-2010 Mozilla Foundation. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +1. Redistributions of source code must retain the above copyright notice(s), + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice(s), + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS +OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +-------------------------------------------------------------------------------- +Copyright (C) 2009-2010 Facebook, Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. +* Neither the name of Facebook, Inc. nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +-------------------------------------------------------------------------------- diff --git a/dep/jemalloc/README b/dep/jemalloc/README new file mode 100644 index 00000000000..4d7b552bf3d --- /dev/null +++ b/dep/jemalloc/README @@ -0,0 +1,16 @@ +jemalloc is a general-purpose scalable concurrent malloc(3) implementation. +This distribution is a stand-alone "portable" implementation that currently +targets Linux and Apple OS X. jemalloc is included as the default allocator in +the FreeBSD and NetBSD operating systems, and it is used by the Mozilla Firefox +web browser on Microsoft Windows-related platforms. Depending on your needs, +one of the other divergent versions may suit your needs better than this +distribution. + +The COPYING file contains copyright and licensing information. + +The INSTALL file contains information on how to configure, build, and install +jemalloc. + +The ChangeLog file contains a brief summary of changes for each release. + +URL: http://www.canonware.com/jemalloc/ diff --git a/dep/jemalloc/TC_NOTE.txt b/dep/jemalloc/TC_NOTE.txt new file mode 100644 index 00000000000..7b045631d8c --- /dev/null +++ b/dep/jemalloc/TC_NOTE.txt @@ -0,0 +1,9 @@ +*** THIS FILE CONTAINS INFORMATION ABOUT CHANGES DONE TO THE JEMALLOC LIBRARY FILES *** +Removed from archive, as OSX does not use jemalloc: + src/zone.c + include/jemalloc/internal/zone.h + +Filename changed, as it messes up Windows-systems (when will Microsoft learn?) : + include/jemalloc/internal/prn.h -> include/jemalloc/internal/jemprn.h + +References to prn.h has been changed to use the new filename where needed. diff --git a/dep/jemalloc/VERSION b/dep/jemalloc/VERSION new file mode 100644 index 00000000000..585f53edd80 --- /dev/null +++ b/dep/jemalloc/VERSION @@ -0,0 +1 @@ +2.1.0-0-g1c4b088b08d3bc7617a34387e196ce03716160bf diff --git a/dep/jemalloc/jemalloc/internal/arena.h b/dep/jemalloc/include/jemalloc/internal/arena.h index bb4ce2a54f7..9556c2c68f7 100644 --- a/dep/jemalloc/jemalloc/internal/arena.h +++ b/dep/jemalloc/include/jemalloc/internal/arena.h @@ -98,7 +98,7 @@ struct arena_chunk_map_s { #ifdef JEMALLOC_PROF /* Profile counters, used for large object runs. */ - prof_thr_cnt_t *prof_cnt; + prof_ctx_t *prof_ctx; #endif /* @@ -121,17 +121,17 @@ struct arena_chunk_map_s { * * p : run page offset * s : run size - * c : size class (used only if prof_promote is true) + * c : (binind+1) for size class (used only if prof_promote is true) * x : don't care * - : 0 * + : 1 - * [DZLA] : bit set - * [dzla] : bit unset + * [DULA] : bit set + * [dula] : bit unset * * Unallocated (clean): - * ssssssss ssssssss ssss---- ----dz-- - * xxxxxxxx xxxxxxxx xxxx---- -----Zxx - * ssssssss ssssssss ssss---- ----dZ-- + * ssssssss ssssssss ssss---- ----du-- + * xxxxxxxx xxxxxxxx xxxx---- -----Uxx + * ssssssss ssssssss ssss---- ----dU-- * * Unallocated (dirty): * ssssssss ssssssss ssss---- ----D--- @@ -144,7 +144,7 @@ struct arena_chunk_map_s { * pppppppp pppppppp pppp---- ----d--a * * Large: - * ssssssss ssssssss ssss++++ ++++D-la + * ssssssss ssssssss ssss---- ----D-la * xxxxxxxx xxxxxxxx xxxx---- ----xxxx * -------- -------- -------- ----D-la * @@ -152,7 +152,7 @@ struct arena_chunk_map_s { * ssssssss ssssssss sssscccc ccccD-la * * Large (not sampled, size == PAGE_SIZE): - * ssssssss ssssssss ssss++++ ++++D-la + * ssssssss ssssssss ssss---- ----D-la */ size_t bits; #ifdef JEMALLOC_PROF @@ -161,7 +161,7 @@ struct arena_chunk_map_s { #endif #define CHUNK_MAP_FLAGS_MASK ((size_t)0xfU) #define CHUNK_MAP_DIRTY ((size_t)0x8U) -#define CHUNK_MAP_ZEROED ((size_t)0x4U) +#define CHUNK_MAP_UNZEROED ((size_t)0x4U) #define CHUNK_MAP_LARGE ((size_t)0x2U) #define CHUNK_MAP_ALLOCATED ((size_t)0x1U) #define CHUNK_MAP_KEY CHUNK_MAP_ALLOCATED @@ -187,7 +187,12 @@ struct arena_chunk_s { /* Number of dirty pages. */ size_t ndirty; - /* Map of pages within chunk that keeps track of free/large/small. */ + /* + * Map of pages within chunk that keeps track of free/large/small. The + * first map_bias entries are omitted, since the chunk header does not + * need to be tracked in the map. This omission saves a header page + * for common chunk sizes (e.g. 4 MiB). + */ arena_chunk_map_t map[1]; /* Dynamically sized. */ }; typedef rb_tree(arena_chunk_t) arena_chunk_tree_t; @@ -246,10 +251,10 @@ struct arena_bin_s { #ifdef JEMALLOC_PROF /* - * Offset of first (prof_cnt_t *) in a run header for this bin's size + * Offset of first (prof_ctx_t *) in a run header for this bin's size * class, or 0 if (opt_prof == false). */ - uint32_t cnt0_offset; + uint32_t ctx0_offset; #endif /* Offset of first region in a run for this bin's size class. */ @@ -416,8 +421,12 @@ extern size_t sspace_min; extern size_t sspace_max; #define small_maxclass sspace_max -#define nlclasses (chunk_npages - arena_chunk_header_npages) +#define nlclasses (chunk_npages - map_bias) +void arena_purge_all(arena_t *arena); +#ifdef JEMALLOC_PROF +void arena_prof_accum(arena_t *arena, uint64_t accumbytes); +#endif #ifdef JEMALLOC_TCACHE void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind @@ -426,20 +435,15 @@ void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, # endif ); #endif -#ifdef JEMALLOC_PROF -void arena_prof_accum(arena_t *arena, uint64_t accumbytes); -#endif void *arena_malloc_small(arena_t *arena, size_t size, bool zero); void *arena_malloc_large(arena_t *arena, size_t size, bool zero); void *arena_malloc(size_t size, bool zero); -void *arena_palloc(arena_t *arena, size_t alignment, size_t size, - size_t alloc_size); +void *arena_palloc(arena_t *arena, size_t size, size_t alloc_size, + size_t alignment, bool zero); size_t arena_salloc(const void *ptr); #ifdef JEMALLOC_PROF void arena_prof_promoted(const void *ptr, size_t size); size_t arena_salloc_demote(const void *ptr); -prof_thr_cnt_t *arena_prof_cnt_get(const void *ptr); -void arena_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt); #endif void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_chunk_map_t *mapelm); @@ -449,7 +453,10 @@ void arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats); #endif -void *arena_ralloc(void *ptr, size_t size, size_t oldsize); +void *arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, + size_t extra, bool zero); +void *arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero); bool arena_new(arena_t *arena, unsigned ind); bool arena_boot(void); @@ -458,10 +465,149 @@ bool arena_boot(void); #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE +unsigned arena_run_regind(arena_run_t *run, arena_bin_t *bin, + const void *ptr, size_t size); +# ifdef JEMALLOC_PROF +prof_ctx_t *arena_prof_ctx_get(const void *ptr); +void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); +# endif void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) +JEMALLOC_INLINE unsigned +arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr, + size_t size) +{ + unsigned shift, diff, regind; + + assert(run->magic == ARENA_RUN_MAGIC); + + /* + * Avoid doing division with a variable divisor if possible. Using + * actual division here can reduce allocator throughput by over 20%! + */ + diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset); + + /* Rescale (factor powers of 2 out of the numerator and denominator). */ + shift = ffs(size) - 1; + diff >>= shift; + size >>= shift; + + if (size == 1) { + /* The divisor was a power of 2. */ + regind = diff; + } else { + /* + * To divide by a number D that is not a power of two we + * multiply by (2^21 / D) and then right shift by 21 positions. + * + * X / D + * + * becomes + * + * (X * size_invs[D - 3]) >> SIZE_INV_SHIFT + * + * We can omit the first three elements, because we never + * divide by 0, and 1 and 2 are both powers of two, which are + * handled above. + */ +#define SIZE_INV_SHIFT 21 +#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1) + static const unsigned size_invs[] = { + SIZE_INV(3), + SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7), + SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11), + SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15), + SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19), + SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23), + SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27), + SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31) + }; + + if (size <= ((sizeof(size_invs) / sizeof(unsigned)) + 2)) + regind = (diff * size_invs[size - 3]) >> SIZE_INV_SHIFT; + else + regind = diff / size; +#undef SIZE_INV +#undef SIZE_INV_SHIFT + } + assert(diff == regind * size); + assert(regind < bin->nregs); + + return (regind); +} + +#ifdef JEMALLOC_PROF +JEMALLOC_INLINE prof_ctx_t * +arena_prof_ctx_get(const void *ptr) +{ + prof_ctx_t *ret; + arena_chunk_t *chunk; + size_t pageind, mapbits; + + assert(ptr != NULL); + assert(CHUNK_ADDR2BASE(ptr) != ptr); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + mapbits = chunk->map[pageind-map_bias].bits; + assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); + if ((mapbits & CHUNK_MAP_LARGE) == 0) { + if (prof_promote) + ret = (prof_ctx_t *)(uintptr_t)1U; + else { + arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << + PAGE_SHIFT)); + arena_bin_t *bin = run->bin; + unsigned regind; + + assert(run->magic == ARENA_RUN_MAGIC); + regind = arena_run_regind(run, bin, ptr, bin->reg_size); + ret = *(prof_ctx_t **)((uintptr_t)run + + bin->ctx0_offset + (regind * + sizeof(prof_ctx_t *))); + } + } else + ret = chunk->map[pageind-map_bias].prof_ctx; + + return (ret); +} + +JEMALLOC_INLINE void +arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) +{ + arena_chunk_t *chunk; + size_t pageind, mapbits; + + assert(ptr != NULL); + assert(CHUNK_ADDR2BASE(ptr) != ptr); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + mapbits = chunk->map[pageind-map_bias].bits; + assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); + if ((mapbits & CHUNK_MAP_LARGE) == 0) { + if (prof_promote == false) { + arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << + PAGE_SHIFT)); + arena_bin_t *bin = run->bin; + unsigned regind; + + assert(run->magic == ARENA_RUN_MAGIC); + regind = arena_run_regind(run, bin, ptr, bin->reg_size); + + *((prof_ctx_t **)((uintptr_t)run + bin->ctx0_offset + + (regind * sizeof(prof_ctx_t *)))) = ctx; + } else + assert((uintptr_t)ctx == (uintptr_t)1U); + } else + chunk->map[pageind-map_bias].prof_ctx = ctx; +} +#endif + JEMALLOC_INLINE void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) { @@ -474,8 +620,8 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); - pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT); - mapelm = &chunk->map[pageind]; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + mapelm = &chunk->map[pageind-map_bias]; assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0); if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) { /* Small allocation. */ diff --git a/dep/jemalloc/jemalloc/internal/base.h b/dep/jemalloc/include/jemalloc/internal/base.h index e353f309bd2..e353f309bd2 100644 --- a/dep/jemalloc/jemalloc/internal/base.h +++ b/dep/jemalloc/include/jemalloc/internal/base.h diff --git a/dep/jemalloc/jemalloc/internal/chunk.h b/dep/jemalloc/include/jemalloc/internal/chunk.h index 1f6abf782f1..a60f0ad7498 100644 --- a/dep/jemalloc/jemalloc/internal/chunk.h +++ b/dep/jemalloc/include/jemalloc/internal/chunk.h @@ -39,13 +39,17 @@ extern malloc_mutex_t chunks_mtx; extern chunk_stats_t stats_chunks; #endif +#ifdef JEMALLOC_IVSALLOC +extern rtree_t *chunks_rtree; +#endif + extern size_t chunksize; extern size_t chunksize_mask; /* (chunksize - 1). */ extern size_t chunk_npages; -extern size_t arena_chunk_header_npages; +extern size_t map_bias; /* Number of arena chunk header pages. */ extern size_t arena_maxclass; /* Max size class for arenas. */ -void *chunk_alloc(size_t size, bool *zero); +void *chunk_alloc(size_t size, bool base, bool *zero); void chunk_dealloc(void *chunk, size_t size); bool chunk_boot(void); diff --git a/dep/jemalloc/jemalloc/internal/chunk_dss.h b/dep/jemalloc/include/jemalloc/internal/chunk_dss.h index 6be4ad1f212..6f005222181 100644 --- a/dep/jemalloc/jemalloc/internal/chunk_dss.h +++ b/dep/jemalloc/include/jemalloc/internal/chunk_dss.h @@ -17,6 +17,7 @@ extern malloc_mutex_t dss_mtx; void *chunk_alloc_dss(size_t size, bool *zero); +bool chunk_in_dss(void *chunk); bool chunk_dealloc_dss(void *chunk, size_t size); bool chunk_dss_boot(void); diff --git a/dep/jemalloc/jemalloc/internal/chunk_mmap.h b/dep/jemalloc/include/jemalloc/internal/chunk_mmap.h index 8fb90b77c9b..07b50a4dc37 100644 --- a/dep/jemalloc/jemalloc/internal/chunk_mmap.h +++ b/dep/jemalloc/include/jemalloc/internal/chunk_mmap.h @@ -10,8 +10,11 @@ #ifdef JEMALLOC_H_EXTERNS void *chunk_alloc_mmap(size_t size); +void *chunk_alloc_mmap_noreserve(size_t size); void chunk_dealloc_mmap(void *chunk, size_t size); +bool chunk_mmap_boot(void); + #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES diff --git a/dep/jemalloc/jemalloc/internal/chunk_swap.h b/dep/jemalloc/include/jemalloc/internal/chunk_swap.h index d50cb197449..9faa739f713 100644 --- a/dep/jemalloc/jemalloc/internal/chunk_swap.h +++ b/dep/jemalloc/include/jemalloc/internal/chunk_swap.h @@ -20,6 +20,7 @@ extern size_t swap_avail; #endif void *chunk_alloc_swap(size_t size, bool *zero); +bool chunk_in_swap(void *chunk); bool chunk_dealloc_swap(void *chunk, size_t size); bool chunk_swap_enable(const int *fds, unsigned nfds, bool prezeroed); bool chunk_swap_boot(void); diff --git a/dep/jemalloc/jemalloc/internal/ckh.h b/dep/jemalloc/include/jemalloc/internal/ckh.h index c39ea5c75ef..d4e391b6360 100644 --- a/dep/jemalloc/jemalloc/internal/ckh.h +++ b/dep/jemalloc/include/jemalloc/internal/ckh.h @@ -45,7 +45,7 @@ struct ckh_s { #endif /* Used for pseudo-random number generation. */ -#define CKH_A 12345 +#define CKH_A 1103515241 #define CKH_C 12347 uint32_t prn_state; diff --git a/dep/jemalloc/jemalloc/internal/ctl.h b/dep/jemalloc/include/jemalloc/internal/ctl.h index 7bbf21e0e85..8776ad135a7 100644 --- a/dep/jemalloc/jemalloc/internal/ctl.h +++ b/dep/jemalloc/include/jemalloc/internal/ctl.h @@ -82,9 +82,9 @@ bool ctl_boot(void); #define xmallctl(name, oldp, oldlenp, newp, newlen) do { \ if (JEMALLOC_P(mallctl)(name, oldp, oldlenp, newp, newlen) \ != 0) { \ - malloc_write("<jemalloc>: Invalid xmallctl(\""); \ + malloc_write("<jemalloc>: Failure in xmallctl(\""); \ malloc_write(name); \ - malloc_write("\", ...) call\n"); \ + malloc_write("\", ...)\n"); \ abort(); \ } \ } while (0) @@ -92,9 +92,9 @@ bool ctl_boot(void); #define xmallctlnametomib(name, mibp, miblenp) do { \ if (JEMALLOC_P(mallctlnametomib)(name, mibp, miblenp) != 0) { \ malloc_write( \ - "<jemalloc>: Invalid xmallctlnametomib(\""); \ + "<jemalloc>: Failure in xmallctlnametomib(\""); \ malloc_write(name); \ - malloc_write("\", ...) call\n"); \ + malloc_write("\", ...)\n"); \ abort(); \ } \ } while (0) @@ -103,7 +103,7 @@ bool ctl_boot(void); if (JEMALLOC_P(mallctlbymib)(mib, miblen, oldp, oldlenp, newp, \ newlen) != 0) { \ malloc_write( \ - "<jemalloc>: Invalid xmallctlbymib() call\n"); \ + "<jemalloc>: Failure in xmallctlbymib()\n"); \ abort(); \ } \ } while (0) diff --git a/dep/jemalloc/jemalloc/internal/extent.h b/dep/jemalloc/include/jemalloc/internal/extent.h index 33a4e9a3852..6fe9702b5f6 100644 --- a/dep/jemalloc/jemalloc/internal/extent.h +++ b/dep/jemalloc/include/jemalloc/internal/extent.h @@ -19,7 +19,7 @@ struct extent_node_s { #ifdef JEMALLOC_PROF /* Profile counters, used for huge objects. */ - prof_thr_cnt_t *prof_cnt; + prof_ctx_t *prof_ctx; #endif /* Pointer to the extent that this tree node is responsible for. */ diff --git a/dep/jemalloc/jemalloc/internal/hash.h b/dep/jemalloc/include/jemalloc/internal/hash.h index d12cdb8359f..d12cdb8359f 100644 --- a/dep/jemalloc/jemalloc/internal/hash.h +++ b/dep/jemalloc/include/jemalloc/internal/hash.h diff --git a/dep/jemalloc/jemalloc/internal/huge.h b/dep/jemalloc/include/jemalloc/internal/huge.h index 3cf32f7506d..66544cf8d97 100644 --- a/dep/jemalloc/jemalloc/internal/huge.h +++ b/dep/jemalloc/include/jemalloc/internal/huge.h @@ -20,13 +20,16 @@ extern size_t huge_allocated; extern malloc_mutex_t huge_mtx; void *huge_malloc(size_t size, bool zero); -void *huge_palloc(size_t alignment, size_t size); -void *huge_ralloc(void *ptr, size_t size, size_t oldsize); -void huge_dalloc(void *ptr); +void *huge_palloc(size_t size, size_t alignment, bool zero); +void *huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, + size_t extra); +void *huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero); +void huge_dalloc(void *ptr, bool unmap); size_t huge_salloc(const void *ptr); #ifdef JEMALLOC_PROF -prof_thr_cnt_t *huge_prof_cnt_get(const void *ptr); -void huge_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt); +prof_ctx_t *huge_prof_ctx_get(const void *ptr); +void huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); #endif bool huge_boot(void); diff --git a/dep/jemalloc/jemalloc/internal/jemalloc_internal.h b/dep/jemalloc/include/jemalloc/internal/jemalloc_internal.h index 03782dd6690..611f0c665a1 100644 --- a/dep/jemalloc/jemalloc/internal/jemalloc_internal.h +++ b/dep/jemalloc/include/jemalloc/internal/jemalloc_internal.h @@ -17,16 +17,29 @@ #include <stdio.h> #include <stdlib.h> #include <stdint.h> +#include <stddef.h> +#ifndef offsetof +# define offsetof(type, member) ((size_t)&(((type *)NULL)->member)) +#endif #include <inttypes.h> #include <string.h> #include <strings.h> +#include <ctype.h> #include <unistd.h> #include <fcntl.h> #include <pthread.h> +#include <math.h> #define JEMALLOC_MANGLE #include "../jemalloc.h" +#ifdef JEMALLOC_ZONE +#include <mach/mach_error.h> +#include <mach/mach_init.h> +#include <mach/vm_map.h> +#include <malloc/malloc.h> +#endif + #ifdef JEMALLOC_LAZY_LOCK #include <dlfcn.h> #endif @@ -49,7 +62,7 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); malloc_write("<jemalloc>: "); \ malloc_write(__FILE__); \ malloc_write(":"); \ - malloc_write(umax2s(__LINE__, 10, line_buf)); \ + malloc_write(u2s(__LINE__, 10, line_buf)); \ malloc_write(": Failed assertion: "); \ malloc_write("\""); \ malloc_write(#e); \ @@ -77,6 +90,8 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); /******************************************************************************/ #define JEMALLOC_H_TYPES +#define ALLOCM_LG_ALIGN_MASK ((int)0x3f) + #define ZU(z) ((size_t)z) #ifndef __DECONST @@ -92,8 +107,8 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); # define JEMALLOC_INLINE static inline #endif -/* Size of stack-allocated buffer passed to strerror_r(). */ -#define STRERROR_BUF 64 +/* Size of stack-allocated buffer passed to buferror(). */ +#define BUFERROR_BUF 64 /* Minimum alignment of allocations is 2^LG_QUANTUM bytes. */ #ifdef __i386__ @@ -159,6 +174,16 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #define STATIC_PAGE_SIZE ((size_t)(1U << STATIC_PAGE_SHIFT)) #define STATIC_PAGE_MASK ((size_t)(STATIC_PAGE_SIZE - 1)) +#ifdef PAGE_SHIFT +# undef PAGE_SHIFT +#endif +#ifdef PAGE_SIZE +# undef PAGE_SIZE +#endif +#ifdef PAGE_MASK +# undef PAGE_MASK +#endif + #ifdef DYNAMIC_PAGE_SHIFT # define PAGE_SHIFT lg_pagesize # define PAGE_SIZE pagesize @@ -173,7 +198,7 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #define PAGE_CEILING(s) \ (((s) + PAGE_MASK) & ~PAGE_MASK) -#include "jemalloc/internal/totally_not_p_r_n.h" +#include "jemalloc/internal/jemprn.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" @@ -184,15 +209,19 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #include "jemalloc/internal/base.h" #include "jemalloc/internal/chunk.h" #include "jemalloc/internal/huge.h" +#include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" +#ifdef JEMALLOC_ZONE +#include "jemalloc/internal/zone.h" +#endif #include "jemalloc/internal/prof.h" #undef JEMALLOC_H_TYPES /******************************************************************************/ #define JEMALLOC_H_STRUCTS -#include "jemalloc/internal/totally_not_p_r_n.h" +#include "jemalloc/internal/jemprn.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" @@ -203,8 +232,12 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #include "jemalloc/internal/base.h" #include "jemalloc/internal/chunk.h" #include "jemalloc/internal/huge.h" +#include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" +#ifdef JEMALLOC_ZONE +#include "jemalloc/internal/zone.h" +#endif #include "jemalloc/internal/prof.h" #undef JEMALLOC_H_STRUCTS @@ -224,6 +257,7 @@ extern bool opt_xmalloc; #ifdef JEMALLOC_FILL extern bool opt_zero; #endif +extern size_t opt_narenas; #ifdef DYNAMIC_PAGE_SHIFT extern size_t pagesize; @@ -240,8 +274,19 @@ extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */ * Map of pthread_self() --> arenas[???], used for selecting an arena to use * for allocations. */ -extern __thread arena_t *arenas_map JEMALLOC_ATTR(tls_model("initial-exec")); +extern __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); +# define ARENA_GET() arenas_tls +# define ARENA_SET(v) do { \ + arenas_tls = (v); \ +} while (0) +#else +extern pthread_key_t arenas_tsd; +# define ARENA_GET() ((arena_t *)pthread_getspecific(arenas_tsd)) +# define ARENA_SET(v) do { \ + pthread_setspecific(arenas_tsd, (void *)(v)); \ +} while (0) #endif + /* * Arenas that are used to service external requests. Not all elements of the * arenas array are necessarily used; arenas are created lazily as needed. @@ -249,12 +294,57 @@ extern __thread arena_t *arenas_map JEMALLOC_ATTR(tls_model("initial-exec")); extern arena_t **arenas; extern unsigned narenas; +#ifdef JEMALLOC_STATS +typedef struct { + uint64_t allocated; + uint64_t deallocated; +} thread_allocated_t; +# ifndef NO_TLS +extern __thread thread_allocated_t thread_allocated_tls; +# define ALLOCATED_GET() thread_allocated_tls.allocated +# define DEALLOCATED_GET() thread_allocated_tls.deallocated +# define ALLOCATED_ADD(a, d) do { \ + thread_allocated_tls.allocated += a; \ + thread_allocated_tls.deallocated += d; \ +} while (0) +# else +extern pthread_key_t thread_allocated_tsd; +# define ALLOCATED_GET() \ + (uint64_t)((pthread_getspecific(thread_allocated_tsd) != NULL) \ + ? ((thread_allocated_t *) \ + pthread_getspecific(thread_allocated_tsd))->allocated : 0) +# define DEALLOCATED_GET() \ + (uint64_t)((pthread_getspecific(thread_allocated_tsd) != NULL) \ + ? ((thread_allocated_t \ + *)pthread_getspecific(thread_allocated_tsd))->deallocated : \ + 0) +# define ALLOCATED_ADD(a, d) do { \ + thread_allocated_t *thread_allocated = (thread_allocated_t *) \ + pthread_getspecific(thread_allocated_tsd); \ + if (thread_allocated != NULL) { \ + thread_allocated->allocated += (a); \ + thread_allocated->deallocated += (d); \ + } else { \ + thread_allocated = (thread_allocated_t *) \ + imalloc(sizeof(thread_allocated_t)); \ + if (thread_allocated != NULL) { \ + pthread_setspecific(thread_allocated_tsd, \ + thread_allocated); \ + thread_allocated->allocated = (a); \ + thread_allocated->deallocated = (d); \ + } \ + } \ +} while (0) +# endif +#endif + arena_t *arenas_extend(unsigned ind); -#ifndef NO_TLS arena_t *choose_arena_hard(void); -#endif +int buferror(int errnum, char *buf, size_t buflen); +void jemalloc_prefork(void); +void jemalloc_postfork(void); -#include "jemalloc/internal/totally_not_p_r_n.h" +#include "jemalloc/internal/jemprn.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" @@ -265,15 +355,19 @@ arena_t *choose_arena_hard(void); #include "jemalloc/internal/base.h" #include "jemalloc/internal/chunk.h" #include "jemalloc/internal/huge.h" +#include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" +#ifdef JEMALLOC_ZONE +#include "jemalloc/internal/zone.h" +#endif #include "jemalloc/internal/prof.h" #undef JEMALLOC_H_EXTERNS /******************************************************************************/ #define JEMALLOC_H_INLINES -#include "jemalloc/internal/totally_not_p_r_n.h" +#include "jemalloc/internal/jemprn.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" @@ -285,133 +379,55 @@ arena_t *choose_arena_hard(void); #include "jemalloc/internal/huge.h" #ifndef JEMALLOC_ENABLE_INLINE +size_t pow2_ceil(size_t x); +size_t s2u(size_t size); +size_t sa2u(size_t size, size_t alignment, size_t *run_size_p); void malloc_write(const char *s); arena_t *choose_arena(void); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) -/* - * Wrapper around malloc_message() that avoids the need for - * JEMALLOC_P(malloc_message)(...) throughout the code. - */ -JEMALLOC_INLINE void -malloc_write(const char *s) +/* Compute the smallest power of 2 that is >= x. */ +JEMALLOC_INLINE size_t +pow2_ceil(size_t x) { - JEMALLOC_P(malloc_message)(NULL, s); + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; +#if (LG_SIZEOF_PTR == 3) + x |= x >> 32; +#endif + x++; + return (x); } /* - * Choose an arena based on a per-thread value (fast-path code, calls slow-path - * code if necessary). + * Compute usable size that would result from allocating an object with the + * specified size. */ -JEMALLOC_INLINE arena_t * -choose_arena(void) -{ - arena_t *ret; - - /* - * We can only use TLS if this is a PIC library, since for the static - * library version, libc's malloc is used by TLS allocation, which - * introduces a bootstrapping issue. - */ -#ifndef NO_TLS - ret = arenas_map; - if (ret == NULL) { - ret = choose_arena_hard(); - assert(ret != NULL); - } -#else - if (isthreaded && narenas > 1) { - unsigned long ind; - - /* - * Hash pthread_self() to one of the arenas. There is a prime - * number of arenas, so this has a reasonable chance of - * working. Even so, the hashing can be easily thwarted by - * inconvenient pthread_self() values. Without specific - * knowledge of how pthread_self() calculates values, we can't - * easily do much better than this. - */ - ind = (unsigned long) pthread_self() % narenas; - - /* - * Optimistially assume that arenas[ind] has been initialized. - * At worst, we find out that some other thread has already - * done so, after acquiring the lock in preparation. Note that - * this lazy locking also has the effect of lazily forcing - * cache coherency; without the lock acquisition, there's no - * guarantee that modification of arenas[ind] by another thread - * would be seen on this CPU for an arbitrary amount of time. - * - * In general, this approach to modifying a synchronized value - * isn't a good idea, but in this case we only ever modify the - * value once, so things work out well. - */ - ret = arenas[ind]; - if (ret == NULL) { - /* - * Avoid races with another thread that may have already - * initialized arenas[ind]. - */ - malloc_mutex_lock(&arenas_lock); - if (arenas[ind] == NULL) - ret = arenas_extend((unsigned)ind); - else - ret = arenas[ind]; - malloc_mutex_unlock(&arenas_lock); - } - } else - ret = arenas[0]; -#endif - - assert(ret != NULL); - return (ret); -} -#endif - -#include "jemalloc/internal/tcache.h" -#include "jemalloc/internal/arena.h" -#include "jemalloc/internal/hash.h" -#include "jemalloc/internal/prof.h" - -#ifndef JEMALLOC_ENABLE_INLINE -void *imalloc(size_t size); -void *icalloc(size_t size); -void *ipalloc(size_t alignment, size_t size); -size_t isalloc(const void *ptr); -void *iralloc(void *ptr, size_t size); -void idalloc(void *ptr); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) -JEMALLOC_INLINE void * -imalloc(size_t size) -{ - - assert(size != 0); - - if (size <= arena_maxclass) - return (arena_malloc(size, false)); - else - return (huge_malloc(size, false)); -} - -JEMALLOC_INLINE void * -icalloc(size_t size) +JEMALLOC_INLINE size_t +s2u(size_t size) { + if (size <= small_maxclass) + return arenas[0]->bins[small_size2bin[size]].reg_size; if (size <= arena_maxclass) - return (arena_malloc(size, true)); - else - return (huge_malloc(size, true)); + return PAGE_CEILING(size); + return CHUNK_CEILING(size); } -JEMALLOC_INLINE void * -ipalloc(size_t alignment, size_t size) +/* + * Compute usable size that would result from allocating an object with the + * specified size and alignment. + */ +JEMALLOC_INLINE size_t +sa2u(size_t size, size_t alignment, size_t *run_size_p) { - void *ret; - size_t ceil_size; + size_t usize; /* * Round size up to the nearest multiple of alignment. @@ -431,20 +447,23 @@ ipalloc(size_t alignment, size_t size) * will further round up to a power of two, but that never causes * correctness issues. */ - ceil_size = (size + (alignment - 1)) & (-alignment); + usize = (size + (alignment - 1)) & (-alignment); /* - * (ceil_size < size) protects against the combination of maximal + * (usize < size) protects against the combination of maximal * alignment and size greater than maximal alignment. */ - if (ceil_size < size) { + if (usize < size) { /* size_t overflow. */ - return (NULL); + return (0); } - if (ceil_size <= PAGE_SIZE || (alignment <= PAGE_SIZE - && ceil_size <= arena_maxclass)) - ret = arena_malloc(ceil_size, false); - else { + if (usize <= arena_maxclass && alignment <= PAGE_SIZE) { + if (usize <= small_maxclass) { + return + (arenas[0]->bins[small_size2bin[usize]].reg_size); + } + return (PAGE_CEILING(usize)); + } else { size_t run_size; /* @@ -452,30 +471,30 @@ ipalloc(size_t alignment, size_t size) * permanently; it makes later calculations simpler. */ alignment = PAGE_CEILING(alignment); - ceil_size = PAGE_CEILING(size); + usize = PAGE_CEILING(size); /* - * (ceil_size < size) protects against very large sizes within + * (usize < size) protects against very large sizes within * PAGE_SIZE of SIZE_T_MAX. * - * (ceil_size + alignment < ceil_size) protects against the - * combination of maximal alignment and ceil_size large enough + * (usize + alignment < usize) protects against the + * combination of maximal alignment and usize large enough * to cause overflow. This is similar to the first overflow * check above, but it needs to be repeated due to the new - * ceil_size value, which may now be *equal* to maximal + * usize value, which may now be *equal* to maximal * alignment, whereas before we only detected overflow if the * original size was *greater* than maximal alignment. */ - if (ceil_size < size || ceil_size + alignment < ceil_size) { + if (usize < size || usize + alignment < usize) { /* size_t overflow. */ - return (NULL); + return (0); } /* * Calculate the size of the over-size run that arena_palloc() * would need to allocate in order to guarantee the alignment. */ - if (ceil_size >= alignment) - run_size = ceil_size + alignment - PAGE_SIZE; + if (usize >= alignment) + run_size = usize + alignment - PAGE_SIZE; else { /* * It is possible that (alignment << 1) will cause @@ -488,15 +507,112 @@ ipalloc(size_t alignment, size_t size) */ run_size = (alignment << 1) - PAGE_SIZE; } + if (run_size_p != NULL) + *run_size_p = run_size; - if (run_size <= arena_maxclass) { - ret = arena_palloc(choose_arena(), alignment, ceil_size, - run_size); - } else if (alignment <= chunksize) - ret = huge_malloc(ceil_size, false); - else - ret = huge_palloc(alignment, ceil_size); + if (run_size <= arena_maxclass) + return (PAGE_CEILING(usize)); + return (CHUNK_CEILING(usize)); } +} + +/* + * Wrapper around malloc_message() that avoids the need for + * JEMALLOC_P(malloc_message)(...) throughout the code. + */ +JEMALLOC_INLINE void +malloc_write(const char *s) +{ + + JEMALLOC_P(malloc_message)(NULL, s); +} + +/* + * Choose an arena based on a per-thread value (fast-path code, calls slow-path + * code if necessary). + */ +JEMALLOC_INLINE arena_t * +choose_arena(void) +{ + arena_t *ret; + + ret = ARENA_GET(); + if (ret == NULL) { + ret = choose_arena_hard(); + assert(ret != NULL); + } + + return (ret); +} +#endif + +#include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/hash.h" +#ifdef JEMALLOC_ZONE +#include "jemalloc/internal/zone.h" +#endif + +#ifndef JEMALLOC_ENABLE_INLINE +void *imalloc(size_t size); +void *icalloc(size_t size); +void *ipalloc(size_t size, size_t alignment, bool zero); +size_t isalloc(const void *ptr); +# ifdef JEMALLOC_IVSALLOC +size_t ivsalloc(const void *ptr); +# endif +void idalloc(void *ptr); +void *iralloc(void *ptr, size_t size, size_t extra, size_t alignment, + bool zero, bool no_move); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) +JEMALLOC_INLINE void * +imalloc(size_t size) +{ + + assert(size != 0); + + if (size <= arena_maxclass) + return (arena_malloc(size, false)); + else + return (huge_malloc(size, false)); +} + +JEMALLOC_INLINE void * +icalloc(size_t size) +{ + + if (size <= arena_maxclass) + return (arena_malloc(size, true)); + else + return (huge_malloc(size, true)); +} + +JEMALLOC_INLINE void * +ipalloc(size_t size, size_t alignment, bool zero) +{ + void *ret; + size_t usize; + size_t run_size +# ifdef JEMALLOC_CC_SILENCE + = 0 +# endif + ; + + usize = sa2u(size, alignment, &run_size); + if (usize == 0) + return (NULL); + if (usize <= arena_maxclass && alignment <= PAGE_SIZE) + ret = arena_malloc(usize, zero); + else if (run_size <= arena_maxclass) { + ret = arena_palloc(choose_arena(), usize, run_size, alignment, + zero); + } else if (alignment <= chunksize) + ret = huge_malloc(usize, zero); + else + ret = huge_palloc(usize, alignment, zero); assert(((uintptr_t)ret & (alignment - 1)) == 0); return (ret); @@ -526,21 +642,18 @@ isalloc(const void *ptr) return (ret); } -JEMALLOC_INLINE void * -iralloc(void *ptr, size_t size) +#ifdef JEMALLOC_IVSALLOC +JEMALLOC_INLINE size_t +ivsalloc(const void *ptr) { - size_t oldsize; - assert(ptr != NULL); - assert(size != 0); + /* Return 0 if ptr is not within a chunk managed by jemalloc. */ + if (rtree_get(chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == NULL) + return (0); - oldsize = isalloc(ptr); - - if (size <= arena_maxclass) - return (arena_ralloc(ptr, size, oldsize)); - else - return (huge_ralloc(ptr, size, oldsize)); + return (isalloc(ptr)); } +#endif JEMALLOC_INLINE void idalloc(void *ptr) @@ -553,9 +666,72 @@ idalloc(void *ptr) if (chunk != ptr) arena_dalloc(chunk->arena, chunk, ptr); else - huge_dalloc(ptr); + huge_dalloc(ptr, true); +} + +JEMALLOC_INLINE void * +iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, + bool no_move) +{ + void *ret; + size_t oldsize; + + assert(ptr != NULL); + assert(size != 0); + + oldsize = isalloc(ptr); + + if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) + != 0) { + size_t copysize; + + /* + * Existing object alignment is inadquate; allocate new space + * and copy. + */ + if (no_move) + return (NULL); + ret = ipalloc(size + extra, alignment, zero); + if (ret == NULL) { + if (extra == 0) + return (NULL); + /* Try again, without extra this time. */ + ret = ipalloc(size, alignment, zero); + if (ret == NULL) + return (NULL); + } + /* + * Copy at most size bytes (not size+extra), since the caller + * has no expectation that the extra bytes will be reliably + * preserved. + */ + copysize = (size < oldsize) ? size : oldsize; + memcpy(ret, ptr, copysize); + idalloc(ptr); + return (ret); + } + + if (no_move) { + if (size <= arena_maxclass) { + return (arena_ralloc_no_move(ptr, oldsize, size, + extra, zero)); + } else { + return (huge_ralloc_no_move(ptr, oldsize, size, + extra)); + } + } else { + if (size + extra <= arena_maxclass) { + return (arena_ralloc(ptr, oldsize, size, extra, + alignment, zero)); + } else { + return (huge_ralloc(ptr, oldsize, size, extra, + alignment, zero)); + } + } } #endif +#include "jemalloc/internal/prof.h" + #undef JEMALLOC_H_INLINES /******************************************************************************/ diff --git a/dep/jemalloc/jemalloc/internal/totally_not_p_r_n.h b/dep/jemalloc/include/jemalloc/internal/jemprn.h index 0709d708012..0709d708012 100644 --- a/dep/jemalloc/jemalloc/internal/totally_not_p_r_n.h +++ b/dep/jemalloc/include/jemalloc/internal/jemprn.h diff --git a/dep/jemalloc/jemalloc/internal/mb.h b/dep/jemalloc/include/jemalloc/internal/mb.h index 1707aa91d68..1707aa91d68 100644 --- a/dep/jemalloc/jemalloc/internal/mb.h +++ b/dep/jemalloc/include/jemalloc/internal/mb.h diff --git a/dep/jemalloc/jemalloc/internal/mutex.h b/dep/jemalloc/include/jemalloc/internal/mutex.h index 108bfa8abfd..dcca01edd5d 100644 --- a/dep/jemalloc/jemalloc/internal/mutex.h +++ b/dep/jemalloc/include/jemalloc/internal/mutex.h @@ -3,6 +3,12 @@ typedef pthread_mutex_t malloc_mutex_t; +#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP +# define MALLOC_MUTEX_INITIALIZER PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP +#else +# define MALLOC_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER +#endif + #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS @@ -18,6 +24,7 @@ extern bool isthreaded; #endif bool malloc_mutex_init(malloc_mutex_t *mutex); +void malloc_mutex_destroy(malloc_mutex_t *mutex); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/dep/jemalloc/include/jemalloc/internal/prof.h b/dep/jemalloc/include/jemalloc/internal/prof.h new file mode 100644 index 00000000000..7864000b88b --- /dev/null +++ b/dep/jemalloc/include/jemalloc/internal/prof.h @@ -0,0 +1,547 @@ +#ifdef JEMALLOC_PROF +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct prof_bt_s prof_bt_t; +typedef struct prof_cnt_s prof_cnt_t; +typedef struct prof_thr_cnt_s prof_thr_cnt_t; +typedef struct prof_ctx_s prof_ctx_t; +typedef struct prof_tdata_s prof_tdata_t; + +/* Option defaults. */ +#define PROF_PREFIX_DEFAULT "jeprof" +#define LG_PROF_BT_MAX_DEFAULT 7 +#define LG_PROF_SAMPLE_DEFAULT 0 +#define LG_PROF_INTERVAL_DEFAULT -1 +#define LG_PROF_TCMAX_DEFAULT -1 + +/* + * Hard limit on stack backtrace depth. Note that the version of + * prof_backtrace() that is based on __builtin_return_address() necessarily has + * a hard-coded number of backtrace frame handlers. + */ +#if (defined(JEMALLOC_PROF_LIBGCC) || defined(JEMALLOC_PROF_LIBUNWIND)) +# define LG_PROF_BT_MAX ((ZU(1) << (LG_SIZEOF_PTR+3)) - 1) +#else +# define LG_PROF_BT_MAX 7 /* >= LG_PROF_BT_MAX_DEFAULT */ +#endif +#define PROF_BT_MAX (1U << LG_PROF_BT_MAX) + +/* Initial hash table size. */ +#define PROF_CKH_MINITEMS 64 + +/* Size of memory buffer to use when writing dump files. */ +#define PROF_DUMP_BUF_SIZE 65536 + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct prof_bt_s { + /* Backtrace, stored as len program counters. */ + void **vec; + unsigned len; +}; + +#ifdef JEMALLOC_PROF_LIBGCC +/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ +typedef struct { + prof_bt_t *bt; + unsigned nignore; + unsigned max; +} prof_unwind_data_t; +#endif + +struct prof_cnt_s { + /* + * Profiling counters. An allocation/deallocation pair can operate on + * different prof_thr_cnt_t objects that are linked into the same + * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go + * negative. In principle it is possible for the *bytes counters to + * overflow/underflow, but a general solution would require something + * like 128-bit counters; this implementation doesn't bother to solve + * that problem. + */ + int64_t curobjs; + int64_t curbytes; + uint64_t accumobjs; + uint64_t accumbytes; +}; + +struct prof_thr_cnt_s { + /* Linkage into prof_ctx_t's cnts_ql. */ + ql_elm(prof_thr_cnt_t) cnts_link; + + /* Linkage into thread's LRU. */ + ql_elm(prof_thr_cnt_t) lru_link; + + /* + * Associated context. If a thread frees an object that it did not + * allocate, it is possible that the context is not cached in the + * thread's hash table, in which case it must be able to look up the + * context, insert a new prof_thr_cnt_t into the thread's hash table, + * and link it into the prof_ctx_t's cnts_ql. + */ + prof_ctx_t *ctx; + + /* + * Threads use memory barriers to update the counters. Since there is + * only ever one writer, the only challenge is for the reader to get a + * consistent read of the counters. + * + * The writer uses this series of operations: + * + * 1) Increment epoch to an odd number. + * 2) Update counters. + * 3) Increment epoch to an even number. + * + * The reader must assure 1) that the epoch is even while it reads the + * counters, and 2) that the epoch doesn't change between the time it + * starts and finishes reading the counters. + */ + unsigned epoch; + + /* Profiling counters. */ + prof_cnt_t cnts; +}; + +struct prof_ctx_s { + /* Associated backtrace. */ + prof_bt_t *bt; + + /* Protects cnt_merged and cnts_ql. */ + malloc_mutex_t lock; + + /* Temporary storage for summation during dump. */ + prof_cnt_t cnt_summed; + + /* When threads exit, they merge their stats into cnt_merged. */ + prof_cnt_t cnt_merged; + + /* + * List of profile counters, one for each thread that has allocated in + * this context. + */ + ql_head(prof_thr_cnt_t) cnts_ql; +}; + +struct prof_tdata_s { + /* + * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread keeps a + * cache of backtraces, with associated thread-specific prof_thr_cnt_t + * objects. Other threads may read the prof_thr_cnt_t contents, but no + * others will ever write them. + * + * Upon thread exit, the thread must merge all the prof_thr_cnt_t + * counter data into the associated prof_ctx_t objects, and unlink/free + * the prof_thr_cnt_t objects. + */ + ckh_t bt2cnt; + + /* LRU for contents of bt2cnt. */ + ql_head(prof_thr_cnt_t) lru_ql; + + /* Backtrace vector, used for calls to prof_backtrace(). */ + void **vec; + + /* Sampling state. */ + uint64_t prn_state; + uint64_t threshold; + uint64_t accum; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern bool opt_prof; +/* + * Even if opt_prof is true, sampling can be temporarily disabled by setting + * opt_prof_active to false. No locking is used when updating opt_prof_active, + * so there are no guarantees regarding how long it will take for all threads + * to notice state changes. + */ +extern bool opt_prof_active; +extern size_t opt_lg_prof_bt_max; /* Maximum backtrace depth. */ +extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ +extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ +extern bool opt_prof_gdump; /* High-water memory dumping. */ +extern bool opt_prof_leak; /* Dump leak summary at exit. */ +extern bool opt_prof_accum; /* Report cumulative bytes. */ +extern ssize_t opt_lg_prof_tcmax; /* lg(max per thread bactrace cache) */ +extern char opt_prof_prefix[PATH_MAX + 1]; + +/* + * Profile dump interval, measured in bytes allocated. Each arena triggers a + * profile dump when it reaches this threshold. The effect is that the + * interval between profile dumps averages prof_interval, though the actual + * interval between dumps will tend to be sporadic, and the interval will be a + * maximum of approximately (prof_interval * narenas). + */ +extern uint64_t prof_interval; + +/* + * If true, promote small sampled objects to large objects, since small run + * headers do not have embedded profile context pointers. + */ +extern bool prof_promote; + +/* (1U << opt_lg_prof_bt_max). */ +extern unsigned prof_bt_max; + +/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */ +#ifndef NO_TLS +extern __thread prof_tdata_t *prof_tdata_tls + JEMALLOC_ATTR(tls_model("initial-exec")); +# define PROF_TCACHE_GET() prof_tdata_tls +# define PROF_TCACHE_SET(v) do { \ + prof_tdata_tls = (v); \ + pthread_setspecific(prof_tdata_tsd, (void *)(v)); \ +} while (0) +#else +# define PROF_TCACHE_GET() \ + ((prof_tdata_t *)pthread_getspecific(prof_tdata_tsd)) +# define PROF_TCACHE_SET(v) do { \ + pthread_setspecific(prof_tdata_tsd, (void *)(v)); \ +} while (0) +#endif +/* + * Same contents as b2cnt_tls, but initialized such that the TSD destructor is + * called when a thread exits, so that prof_tdata_tls contents can be merged, + * unlinked, and deallocated. + */ +extern pthread_key_t prof_tdata_tsd; + +void bt_init(prof_bt_t *bt, void **vec); +void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max); +prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); +void prof_idump(void); +bool prof_mdump(const char *filename); +void prof_gdump(void); +prof_tdata_t *prof_tdata_init(void); +void prof_boot0(void); +void prof_boot1(void); +bool prof_boot2(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +void prof_sample_threshold_update(prof_tdata_t *prof_tdata); +prof_thr_cnt_t *prof_alloc_prep(size_t size); +prof_ctx_t *prof_ctx_get(const void *ptr); +void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); +bool prof_sample_accum_update(size_t size); +void prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt); +void prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, + size_t old_size, prof_ctx_t *old_ctx); +void prof_free(const void *ptr, size_t size); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) +JEMALLOC_INLINE void +prof_sample_threshold_update(prof_tdata_t *prof_tdata) +{ + uint64_t r; + double u; + + /* + * Compute prof_sample_threshold as a geometrically distributed random + * variable with mean (2^opt_lg_prof_sample). + */ + prn64(r, 53, prof_tdata->prn_state, + (uint64_t)6364136223846793005LLU, (uint64_t)1442695040888963407LLU); + u = (double)r * (1.0/9007199254740992.0L); + prof_tdata->threshold = (uint64_t)(log(u) / + log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample)))) + + (uint64_t)1U; +} + +JEMALLOC_INLINE prof_thr_cnt_t * +prof_alloc_prep(size_t size) +{ +#ifdef JEMALLOC_ENABLE_INLINE + /* This function does not have its own stack frame, because it is inlined. */ +# define NIGNORE 1 +#else +# define NIGNORE 2 +#endif + prof_thr_cnt_t *ret; + prof_tdata_t *prof_tdata; + prof_bt_t bt; + + assert(size == s2u(size)); + + prof_tdata = PROF_TCACHE_GET(); + if (prof_tdata == NULL) { + prof_tdata = prof_tdata_init(); + if (prof_tdata == NULL) + return (NULL); + } + + if (opt_prof_active == false) { + /* Sampling is currently inactive, so avoid sampling. */ + ret = (prof_thr_cnt_t *)(uintptr_t)1U; + } else if (opt_lg_prof_sample == 0) { + /* + * Don't bother with sampling logic, since sampling interval is + * 1. + */ + bt_init(&bt, prof_tdata->vec); + prof_backtrace(&bt, NIGNORE, prof_bt_max); + ret = prof_lookup(&bt); + } else { + if (prof_tdata->threshold == 0) { + /* + * Initialize. Seed the prng differently for each + * thread. + */ + prof_tdata->prn_state = (uint64_t)(uintptr_t)&size; + prof_sample_threshold_update(prof_tdata); + } + + /* + * Determine whether to capture a backtrace based on whether + * size is enough for prof_accum to reach + * prof_tdata->threshold. However, delay updating these + * variables until prof_{m,re}alloc(), because we don't know + * for sure that the allocation will succeed. + * + * Use subtraction rather than addition to avoid potential + * integer overflow. + */ + if (size >= prof_tdata->threshold - prof_tdata->accum) { + bt_init(&bt, prof_tdata->vec); + prof_backtrace(&bt, NIGNORE, prof_bt_max); + ret = prof_lookup(&bt); + } else + ret = (prof_thr_cnt_t *)(uintptr_t)1U; + } + + return (ret); +#undef NIGNORE +} + +JEMALLOC_INLINE prof_ctx_t * +prof_ctx_get(const void *ptr) +{ + prof_ctx_t *ret; + arena_chunk_t *chunk; + + assert(ptr != NULL); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk != ptr) { + /* Region. */ + assert(chunk->arena->magic == ARENA_MAGIC); + + ret = arena_prof_ctx_get(ptr); + } else + ret = huge_prof_ctx_get(ptr); + + return (ret); +} + +JEMALLOC_INLINE void +prof_ctx_set(const void *ptr, prof_ctx_t *ctx) +{ + arena_chunk_t *chunk; + + assert(ptr != NULL); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk != ptr) { + /* Region. */ + assert(chunk->arena->magic == ARENA_MAGIC); + + arena_prof_ctx_set(ptr, ctx); + } else + huge_prof_ctx_set(ptr, ctx); +} + +JEMALLOC_INLINE bool +prof_sample_accum_update(size_t size) +{ + prof_tdata_t *prof_tdata; + + /* Sampling logic is unnecessary if the interval is 1. */ + assert(opt_lg_prof_sample != 0); + + prof_tdata = PROF_TCACHE_GET(); + assert(prof_tdata != NULL); + + /* Take care to avoid integer overflow. */ + if (size >= prof_tdata->threshold - prof_tdata->accum) { + prof_tdata->accum -= (prof_tdata->threshold - size); + /* Compute new prof_sample_threshold. */ + prof_sample_threshold_update(prof_tdata); + while (prof_tdata->accum >= prof_tdata->threshold) { + prof_tdata->accum -= prof_tdata->threshold; + prof_sample_threshold_update(prof_tdata); + } + return (false); + } else { + prof_tdata->accum += size; + return (true); + } +} + +JEMALLOC_INLINE void +prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) +{ + + assert(ptr != NULL); + assert(size == isalloc(ptr)); + + if (opt_lg_prof_sample != 0) { + if (prof_sample_accum_update(size)) { + /* + * Don't sample. For malloc()-like allocation, it is + * always possible to tell in advance how large an + * object's usable size will be, so there should never + * be a difference between the size passed to + * prof_alloc_prep() and prof_malloc(). + */ + assert((uintptr_t)cnt == (uintptr_t)1U); + } + } + + if ((uintptr_t)cnt > (uintptr_t)1U) { + prof_ctx_set(ptr, cnt->ctx); + + cnt->epoch++; + /*********/ + mb_write(); + /*********/ + cnt->cnts.curobjs++; + cnt->cnts.curbytes += size; + if (opt_prof_accum) { + cnt->cnts.accumobjs++; + cnt->cnts.accumbytes += size; + } + /*********/ + mb_write(); + /*********/ + cnt->epoch++; + /*********/ + mb_write(); + /*********/ + } else + prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); +} + +JEMALLOC_INLINE void +prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, + size_t old_size, prof_ctx_t *old_ctx) +{ + prof_thr_cnt_t *told_cnt; + + assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U); + + if (ptr != NULL) { + assert(size == isalloc(ptr)); + if (opt_lg_prof_sample != 0) { + if (prof_sample_accum_update(size)) { + /* + * Don't sample. The size passed to + * prof_alloc_prep() was larger than what + * actually got allocated, so a backtrace was + * captured for this allocation, even though + * its actual size was insufficient to cross + * the sample threshold. + */ + cnt = (prof_thr_cnt_t *)(uintptr_t)1U; + } + } + } + + if ((uintptr_t)old_ctx > (uintptr_t)1U) { + told_cnt = prof_lookup(old_ctx->bt); + if (told_cnt == NULL) { + /* + * It's too late to propagate OOM for this realloc(), + * so operate directly on old_cnt->ctx->cnt_merged. + */ + malloc_mutex_lock(&old_ctx->lock); + old_ctx->cnt_merged.curobjs--; + old_ctx->cnt_merged.curbytes -= old_size; + malloc_mutex_unlock(&old_ctx->lock); + told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; + } + } else + told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; + + if ((uintptr_t)told_cnt > (uintptr_t)1U) + told_cnt->epoch++; + if ((uintptr_t)cnt > (uintptr_t)1U) { + prof_ctx_set(ptr, cnt->ctx); + cnt->epoch++; + } else + prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); + /*********/ + mb_write(); + /*********/ + if ((uintptr_t)told_cnt > (uintptr_t)1U) { + told_cnt->cnts.curobjs--; + told_cnt->cnts.curbytes -= old_size; + } + if ((uintptr_t)cnt > (uintptr_t)1U) { + cnt->cnts.curobjs++; + cnt->cnts.curbytes += size; + if (opt_prof_accum) { + cnt->cnts.accumobjs++; + cnt->cnts.accumbytes += size; + } + } + /*********/ + mb_write(); + /*********/ + if ((uintptr_t)told_cnt > (uintptr_t)1U) + told_cnt->epoch++; + if ((uintptr_t)cnt > (uintptr_t)1U) + cnt->epoch++; + /*********/ + mb_write(); /* Not strictly necessary. */ +} + +JEMALLOC_INLINE void +prof_free(const void *ptr, size_t size) +{ + prof_ctx_t *ctx = prof_ctx_get(ptr); + + if ((uintptr_t)ctx > (uintptr_t)1) { + assert(size == isalloc(ptr)); + prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt); + + if (tcnt != NULL) { + tcnt->epoch++; + /*********/ + mb_write(); + /*********/ + tcnt->cnts.curobjs--; + tcnt->cnts.curbytes -= size; + /*********/ + mb_write(); + /*********/ + tcnt->epoch++; + /*********/ + mb_write(); + /*********/ + } else { + /* + * OOM during free() cannot be propagated, so operate + * directly on cnt->ctx->cnt_merged. + */ + malloc_mutex_lock(&ctx->lock); + ctx->cnt_merged.curobjs--; + ctx->cnt_merged.curbytes -= size; + malloc_mutex_unlock(&ctx->lock); + } + } +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ +#endif /* JEMALLOC_PROF */ diff --git a/dep/jemalloc/jemalloc/internal/ql.h b/dep/jemalloc/include/jemalloc/internal/ql.h index a9ed2393f0c..a9ed2393f0c 100644 --- a/dep/jemalloc/jemalloc/internal/ql.h +++ b/dep/jemalloc/include/jemalloc/internal/ql.h diff --git a/dep/jemalloc/jemalloc/internal/qr.h b/dep/jemalloc/include/jemalloc/internal/qr.h index fe22352fedd..fe22352fedd 100644 --- a/dep/jemalloc/jemalloc/internal/qr.h +++ b/dep/jemalloc/include/jemalloc/internal/qr.h diff --git a/dep/jemalloc/jemalloc/internal/rb.h b/dep/jemalloc/include/jemalloc/internal/rb.h index ee9b009d235..ee9b009d235 100644 --- a/dep/jemalloc/jemalloc/internal/rb.h +++ b/dep/jemalloc/include/jemalloc/internal/rb.h diff --git a/dep/jemalloc/include/jemalloc/internal/rtree.h b/dep/jemalloc/include/jemalloc/internal/rtree.h new file mode 100644 index 00000000000..9d58ebac545 --- /dev/null +++ b/dep/jemalloc/include/jemalloc/internal/rtree.h @@ -0,0 +1,161 @@ +/* + * This radix tree implementation is tailored to the singular purpose of + * tracking which chunks are currently owned by jemalloc. This functionality + * is mandatory for OS X, where jemalloc must be able to respond to object + * ownership queries. + * + ******************************************************************************* + */ +#ifdef JEMALLOC_H_TYPES + +typedef struct rtree_s rtree_t; + +/* + * Size of each radix tree node (must be a power of 2). This impacts tree + * depth. + */ +#if (LG_SIZEOF_PTR == 2) +# define RTREE_NODESIZE (1U << 14) +#else +# define RTREE_NODESIZE CACHELINE +#endif + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct rtree_s { + malloc_mutex_t mutex; + void **root; + unsigned height; + unsigned level2bits[1]; /* Dynamically sized. */ +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +rtree_t *rtree_new(unsigned bits); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +#ifndef JEMALLOC_DEBUG +void *rtree_get_locked(rtree_t *rtree, uintptr_t key); +#endif +void *rtree_get(rtree_t *rtree, uintptr_t key); +bool rtree_set(rtree_t *rtree, uintptr_t key, void *val); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(RTREE_C_)) +#define RTREE_GET_GENERATE(f) \ +/* The least significant bits of the key are ignored. */ \ +JEMALLOC_INLINE void * \ +f(rtree_t *rtree, uintptr_t key) \ +{ \ + void *ret; \ + uintptr_t subkey; \ + unsigned i, lshift, height, bits; \ + void **node, **child; \ + \ + RTREE_LOCK(&rtree->mutex); \ + for (i = lshift = 0, height = rtree->height, node = rtree->root;\ + i < height - 1; \ + i++, lshift += bits, node = child) { \ + bits = rtree->level2bits[i]; \ + subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR + \ + 3)) - bits); \ + child = (void**)node[subkey]; \ + if (child == NULL) { \ + RTREE_UNLOCK(&rtree->mutex); \ + return (NULL); \ + } \ + } \ + \ + /* \ + * node is a leaf, so it contains values rather than node \ + * pointers. \ + */ \ + bits = rtree->level2bits[i]; \ + subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - \ + bits); \ + ret = node[subkey]; \ + RTREE_UNLOCK(&rtree->mutex); \ + \ + RTREE_GET_VALIDATE \ + return (ret); \ +} + +#ifdef JEMALLOC_DEBUG +# define RTREE_LOCK(l) malloc_mutex_lock(l) +# define RTREE_UNLOCK(l) malloc_mutex_unlock(l) +# define RTREE_GET_VALIDATE +RTREE_GET_GENERATE(rtree_get_locked) +# undef RTREE_LOCK +# undef RTREE_UNLOCK +# undef RTREE_GET_VALIDATE +#endif + +#define RTREE_LOCK(l) +#define RTREE_UNLOCK(l) +#ifdef JEMALLOC_DEBUG + /* + * Suppose that it were possible for a jemalloc-allocated chunk to be + * munmap()ped, followed by a different allocator in another thread re-using + * overlapping virtual memory, all without invalidating the cached rtree + * value. The result would be a false positive (the rtree would claim that + * jemalloc owns memory that it had actually discarded). This scenario + * seems impossible, but the following assertion is a prudent sanity check. + */ +# define RTREE_GET_VALIDATE \ + assert(rtree_get_locked(rtree, key) == ret); +#else +# define RTREE_GET_VALIDATE +#endif +RTREE_GET_GENERATE(rtree_get) +#undef RTREE_LOCK +#undef RTREE_UNLOCK +#undef RTREE_GET_VALIDATE + +JEMALLOC_INLINE bool +rtree_set(rtree_t *rtree, uintptr_t key, void *val) +{ + uintptr_t subkey; + unsigned i, lshift, height, bits; + void **node, **child; + + malloc_mutex_lock(&rtree->mutex); + for (i = lshift = 0, height = rtree->height, node = rtree->root; + i < height - 1; + i++, lshift += bits, node = child) { + bits = rtree->level2bits[i]; + subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - + bits); + child = (void**)node[subkey]; + if (child == NULL) { + child = (void**)base_alloc(sizeof(void *) << + rtree->level2bits[i+1]); + if (child == NULL) { + malloc_mutex_unlock(&rtree->mutex); + return (true); + } + memset(child, 0, sizeof(void *) << + rtree->level2bits[i+1]); + node[subkey] = child; + } + } + + /* node is a leaf, so it contains values rather than node pointers. */ + bits = rtree->level2bits[i]; + subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - bits); + node[subkey] = val; + malloc_mutex_unlock(&rtree->mutex); + + return (false); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/dep/jemalloc/jemalloc/internal/stats.h b/dep/jemalloc/include/jemalloc/internal/stats.h index cbf035ff2b9..3fc2080a34b 100644 --- a/dep/jemalloc/jemalloc/internal/stats.h +++ b/dep/jemalloc/include/jemalloc/internal/stats.h @@ -154,7 +154,7 @@ struct chunk_stats_s { extern bool opt_stats_print; -char *umax2s(uintmax_t x, unsigned base, char *s); +char *u2s(uint64_t x, unsigned base, char *s); #ifdef JEMALLOC_STATS void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque, const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4)); diff --git a/dep/jemalloc/jemalloc/internal/tcache.h b/dep/jemalloc/include/jemalloc/internal/tcache.h index c76597fafab..1ad91a9b1e0 100644 --- a/dep/jemalloc/jemalloc/internal/tcache.h +++ b/dep/jemalloc/include/jemalloc/internal/tcache.h @@ -17,7 +17,7 @@ typedef struct tcache_s tcache_t; /* Number of cache slots for large size classes. */ #define TCACHE_NSLOTS_LARGE 20 -/* (1U << opt_lg_tcache_maxclass) is used to compute tcache_maxclass. */ +/* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */ #define LG_TCACHE_MAXCLASS_DEFAULT 15 /* @@ -61,12 +61,25 @@ struct tcache_s { #ifdef JEMALLOC_H_EXTERNS extern bool opt_tcache; -extern ssize_t opt_lg_tcache_maxclass; +extern ssize_t opt_lg_tcache_max; extern ssize_t opt_lg_tcache_gc_sweep; /* Map of thread-specific caches. */ +#ifndef NO_TLS extern __thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec")); +# define TCACHE_GET() tcache_tls +# define TCACHE_SET(v) do { \ + tcache_tls = (tcache_t *)(v); \ + pthread_setspecific(tcache_tsd, (void *)(v)); \ +} while (0) +#else +# define TCACHE_GET() ((tcache_t *)pthread_getspecific(tcache_tsd)) +# define TCACHE_SET(v) do { \ + pthread_setspecific(tcache_tsd, (void *)(v)); \ +} while (0) +#endif +extern pthread_key_t tcache_tsd; /* * Number of tcache bins. There are nbins small-object bins, plus 0 or more @@ -122,14 +135,23 @@ tcache_get(void) if ((isthreaded & opt_tcache) == false) return (NULL); - tcache = tcache_tls; - if ((uintptr_t)tcache <= (uintptr_t)1) { + tcache = TCACHE_GET(); + if ((uintptr_t)tcache <= (uintptr_t)2) { if (tcache == NULL) { tcache = tcache_create(choose_arena()); if (tcache == NULL) return (NULL); - } else + } else { + if (tcache == (void *)(uintptr_t)1) { + /* + * Make a note that an allocator function was + * called after the tcache_thread_cleanup() was + * called. + */ + TCACHE_SET((uintptr_t)2); + } return (NULL); + } } return (tcache); @@ -258,9 +280,9 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) } else { #ifdef JEMALLOC_PROF arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret); - size_t pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) - >> PAGE_SHIFT); - chunk->map[pageind].bits |= CHUNK_MAP_CLASS_MASK; + size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >> + PAGE_SHIFT); + chunk->map[pageind-map_bias].bits &= ~CHUNK_MAP_CLASS_MASK; #endif if (zero == false) { #ifdef JEMALLOC_FILL @@ -299,8 +321,8 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); arena = chunk->arena; - pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT); - mapelm = &chunk->map[pageind]; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + mapelm = &chunk->map[pageind-map_bias]; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); assert(run->magic == ARENA_RUN_MAGIC); @@ -339,7 +361,6 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) arena_chunk_t *chunk; size_t pageind, binind; tcache_bin_t *tbin; - arena_chunk_map_t *mapelm; assert((size & PAGE_MASK) == 0); assert(arena_salloc(ptr) > small_maxclass); @@ -347,13 +368,12 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); arena = chunk->arena; - pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT); - mapelm = &chunk->map[pageind]; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; binind = nbins + (size >> PAGE_SHIFT) - 1; #ifdef JEMALLOC_FILL if (opt_junk) - memset(ptr, 0x5a, bin->reg_size); + memset(ptr, 0x5a, size); #endif tbin = &tcache->tbins[binind]; diff --git a/dep/jemalloc/jemalloc/jemalloc.h b/dep/jemalloc/include/jemalloc/jemalloc.h index d9bafbfff55..287dac46ed2 100644 --- a/dep/jemalloc/jemalloc/jemalloc.h +++ b/dep/jemalloc/include/jemalloc/jemalloc.h @@ -4,19 +4,35 @@ extern "C" { #endif -#define JEMALLOC_VERSION "1.0.0-0-g5523399" -#define JEMALLOC_VERSION_MAJOR 1 -#define JEMALLOC_VERSION_MINOR 0 +#include <limits.h> +#include <strings.h> + +#define JEMALLOC_VERSION "2.1.0-0-g1c4b088b08d3bc7617a34387e196ce03716160bf" +#define JEMALLOC_VERSION_MAJOR 2 +#define JEMALLOC_VERSION_MINOR 1 #define JEMALLOC_VERSION_BUGFIX 0 #define JEMALLOC_VERSION_NREV 0 -#define JEMALLOC_VERSION_GID "5523399" +#define JEMALLOC_VERSION_GID "1c4b088b08d3bc7617a34387e196ce03716160bf" #include "jemalloc_defs.h" #ifndef JEMALLOC_P # define JEMALLOC_P(s) s #endif -extern const char *JEMALLOC_P(malloc_options); +#define ALLOCM_LG_ALIGN ((int)0x3f) +#if LG_SIZEOF_PTR == 2 +#define ALLOCM_ALIGN(a) (ffs(a)-1) +#else +#define ALLOCM_ALIGN(a) ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31) +#endif +#define ALLOCM_ZERO ((int)0x40) +#define ALLOCM_NO_MOVE ((int)0x80) + +#define ALLOCM_SUCCESS 0 +#define ALLOCM_ERR_OOM 1 +#define ALLOCM_ERR_NOT_MOVED 2 + +extern const char *JEMALLOC_P(malloc_conf); extern void (*JEMALLOC_P(malloc_message))(void *, const char *); void *JEMALLOC_P(malloc)(size_t size) JEMALLOC_ATTR(malloc); @@ -36,6 +52,14 @@ int JEMALLOC_P(mallctlnametomib)(const char *name, size_t *mibp, int JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen); +int JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) + JEMALLOC_ATTR(nonnull(1)); +int JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, + size_t extra, int flags) JEMALLOC_ATTR(nonnull(1)); +int JEMALLOC_P(sallocm)(const void *ptr, size_t *rsize, int flags) + JEMALLOC_ATTR(nonnull(1)); +int JEMALLOC_P(dallocm)(void *ptr, int flags) JEMALLOC_ATTR(nonnull(1)); + #ifdef __cplusplus }; #endif diff --git a/dep/jemalloc/jemalloc/jemalloc_defs.h b/dep/jemalloc/include/jemalloc/jemalloc_defs.h index e8acaed3abd..715e02efdff 100644 --- a/dep/jemalloc/jemalloc/jemalloc_defs.h +++ b/dep/jemalloc/include/jemalloc/jemalloc_defs.h @@ -14,6 +14,7 @@ * the API prefixing. */ /* #undef JEMALLOC_PREFIX */ +/* #undef JEMALLOC_CPREFIX */ #if (defined(JEMALLOC_PREFIX) && defined(JEMALLOC_MANGLE)) /* #undef JEMALLOC_P */ #endif @@ -32,6 +33,9 @@ # define JEMALLOC_ATTR(s) #endif +/* JEMALLOC_CC_SILENCE enables code that silences unuseful compiler warnings. */ +/* #undef JEMALLOC_CC_SILENCE */ + /* * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables * inline functions. @@ -67,19 +71,19 @@ * JEMALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage * segment (DSS). */ -/* #undef JEMALLOC_DSS */ +#define JEMALLOC_DSS /* JEMALLOC_SWAP enables mmap()ed swap file support. */ -/* #undef JEMALLOC_SWAP */ +#define JEMALLOC_SWAP /* Support memory filling (junk/zero). */ -/* #undef JEMALLOC_FILL */ +#define JEMALLOC_FILL /* Support optional abort() on OOM. */ -/* #undef JEMALLOC_XMALLOC */ +#define JEMALLOC_XMALLOC /* Support SYSV semantics. */ -/* #undef JEMALLOC_SYSV */ +#define JEMALLOC_SYSV /* Support lazy locking (avoid locking unless a second thread is launched). */ #define JEMALLOC_LAZY_LOCK @@ -93,8 +97,43 @@ /* TLS is used to map arenas and magazine caches to threads. */ /* #undef NO_TLS */ +/* + * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside + * within jemalloc-owned chunks before dereferencing them. + */ +/* #undef JEMALLOC_IVSALLOC */ + +/* + * Define overrides for non-standard allocator-related functions if they + * are present on the system. + */ +#define JEMALLOC_OVERRIDE_MEMALIGN 1 +#define JEMALLOC_OVERRIDE_VALLOC 1 + +/* + * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. + */ +/* #undef JEMALLOC_ZONE */ +/* #undef JEMALLOC_ZONE_VERSION */ + +/* If defined, use mremap(...MREMAP_FIXED...) for huge realloc(). */ +#define JEMALLOC_MREMAP_FIXED 1 + +/* + * Methods for purging unused pages differ between operating systems. + * + * madvise(..., MADV_DONTNEED) : On Linux, this immediately discards pages, + * such that new pages will be demand-zeroed if + * the address region is later touched. + * madvise(..., MADV_FREE) : On FreeBSD and Darwin, this marks pages as being + * unused, such that they will be discarded rather + * than swapped out. + */ +#define JEMALLOC_PURGE_MADVISE_DONTNEED 1 +/* #undef JEMALLOC_PURGE_MADVISE_FREE */ + /* sizeof(void *) == 2^LG_SIZEOF_PTR. */ -#define LG_SIZEOF_PTR 3 +#define LG_SIZEOF_PTR 2 /* sizeof(int) == 2^LG_SIZEOF_INT. */ #define LG_SIZEOF_INT 2 diff --git a/dep/jemalloc/jemalloc.c b/dep/jemalloc/jemalloc.c deleted file mode 100644 index e01de0d5066..00000000000 --- a/dep/jemalloc/jemalloc.c +++ /dev/null @@ -1,1349 +0,0 @@ -/*- - * This allocator implementation is designed to provide scalable performance - * for multi-threaded programs on multi-processor systems. The following - * features are included for this purpose: - * - * + Multiple arenas are used if there are multiple CPUs, which reduces lock - * contention and cache sloshing. - * - * + Thread-specific caching is used if there are multiple threads, which - * reduces the amount of locking. - * - * + Cache line sharing between arenas is avoided for internal data - * structures. - * - * + Memory is managed in chunks and runs (chunks can be split into runs), - * rather than as individual pages. This provides a constant-time - * mechanism for associating allocations with particular arenas. - * - * Allocation requests are rounded up to the nearest size class, and no record - * of the original request size is maintained. Allocations are broken into - * categories according to size class. Assuming 1 MiB chunks, 4 KiB pages and - * a 16 byte quantum on a 32-bit system, the size classes in each category are - * as follows: - * - * |========================================| - * | Category | Subcategory | Size | - * |========================================| - * | Small | Tiny | 2 | - * | | | 4 | - * | | | 8 | - * | |------------------+----------| - * | | Quantum-spaced | 16 | - * | | | 32 | - * | | | 48 | - * | | | ... | - * | | | 96 | - * | | | 112 | - * | | | 128 | - * | |------------------+----------| - * | | Cacheline-spaced | 192 | - * | | | 256 | - * | | | 320 | - * | | | 384 | - * | | | 448 | - * | | | 512 | - * | |------------------+----------| - * | | Sub-page | 760 | - * | | | 1024 | - * | | | 1280 | - * | | | ... | - * | | | 3328 | - * | | | 3584 | - * | | | 3840 | - * |========================================| - * | Large | 4 KiB | - * | | 8 KiB | - * | | 12 KiB | - * | | ... | - * | | 1012 KiB | - * | | 1016 KiB | - * | | 1020 KiB | - * |========================================| - * | Huge | 1 MiB | - * | | 2 MiB | - * | | 3 MiB | - * | | ... | - * |========================================| - * - * Different mechanisms are used accoding to category: - * - * Small: Each size class is segregated into its own set of runs. Each run - * maintains a bitmap of which regions are free/allocated. - * - * Large : Each allocation is backed by a dedicated run. Metadata are stored - * in the associated arena chunk header maps. - * - * Huge : Each allocation is backed by a dedicated contiguous set of chunks. - * Metadata are stored in a separate red-black tree. - * - ******************************************************************************* - */ - -#define JEMALLOC_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -/******************************************************************************/ -/* Data. */ - -malloc_mutex_t arenas_lock; -arena_t **arenas; -unsigned narenas; -#ifndef NO_TLS -static unsigned next_arena; -#endif - -#ifndef NO_TLS -__thread arena_t *arenas_map JEMALLOC_ATTR(tls_model("initial-exec")); -#endif - -/* Set to true once the allocator has been initialized. */ -static bool malloc_initialized = false; - -/* Used to let the initializing thread recursively allocate. */ -static pthread_t malloc_initializer = (unsigned long)0; - -/* Used to avoid initialization races. */ -static malloc_mutex_t init_lock = PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP; - -#ifdef DYNAMIC_PAGE_SHIFT -size_t pagesize; -size_t pagesize_mask; -size_t lg_pagesize; -#endif - -unsigned ncpus; - -/* Runtime configuration options. */ -const char *JEMALLOC_P(malloc_options) - JEMALLOC_ATTR(visibility("default")); -#ifdef JEMALLOC_DEBUG -bool opt_abort = true; -# ifdef JEMALLOC_FILL -bool opt_junk = true; -# endif -#else -bool opt_abort = false; -# ifdef JEMALLOC_FILL -bool opt_junk = false; -# endif -#endif -#ifdef JEMALLOC_SYSV -bool opt_sysv = false; -#endif -#ifdef JEMALLOC_XMALLOC -bool opt_xmalloc = false; -#endif -#ifdef JEMALLOC_FILL -bool opt_zero = false; -#endif -static int opt_narenas_lshift = 0; - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static void wrtmessage(void *cbopaque, const char *s); -static void stats_print_atexit(void); -static unsigned malloc_ncpus(void); -static bool malloc_init_hard(void); -static void jemalloc_prefork(void); -static void jemalloc_postfork(void); - -/******************************************************************************/ -/* malloc_message() setup. */ - -#ifdef JEMALLOC_HAVE_ATTR -JEMALLOC_ATTR(visibility("hidden")) -#else -static -#endif -void -wrtmessage(void *cbopaque, const char *s) -{ - - write(STDERR_FILENO, s, strlen(s)); -} - -void (*JEMALLOC_P(malloc_message))(void *, const char *s) - JEMALLOC_ATTR(visibility("default")) = wrtmessage; - -/******************************************************************************/ -/* - * Begin miscellaneous support functions. - */ - -/* Create a new arena and insert it into the arenas array at index ind. */ -arena_t * -arenas_extend(unsigned ind) -{ - arena_t *ret; - - /* Allocate enough space for trailing bins. */ - ret = (arena_t *)base_alloc(sizeof(arena_t) - + (sizeof(arena_bin_t) * (nbins - 1))); - if (ret != NULL && arena_new(ret, ind) == false) { - arenas[ind] = ret; - return (ret); - } - /* Only reached if there is an OOM error. */ - - /* - * OOM here is quite inconvenient to propagate, since dealing with it - * would require a check for failure in the fast path. Instead, punt - * by using arenas[0]. In practice, this is an extremely unlikely - * failure. - */ - malloc_write("<jemalloc>: Error initializing arena\n"); - if (opt_abort) - abort(); - - return (arenas[0]); -} - -#ifndef NO_TLS -/* - * Choose an arena based on a per-thread value (slow-path code only, called - * only by choose_arena()). - */ -arena_t * -choose_arena_hard(void) -{ - arena_t *ret; - - if (narenas > 1) { - malloc_mutex_lock(&arenas_lock); - if ((ret = arenas[next_arena]) == NULL) - ret = arenas_extend(next_arena); - next_arena = (next_arena + 1) % narenas; - malloc_mutex_unlock(&arenas_lock); - } else - ret = arenas[0]; - - arenas_map = ret; - - return (ret); -} -#endif - -static void -stats_print_atexit(void) -{ - -#if (defined(JEMALLOC_TCACHE) && defined(JEMALLOC_STATS)) - unsigned i; - - /* - * Merge stats from extant threads. This is racy, since individual - * threads do not lock when recording tcache stats events. As a - * consequence, the final stats may be slightly out of date by the time - * they are reported, if other threads continue to allocate. - */ - for (i = 0; i < narenas; i++) { - arena_t *arena = arenas[i]; - if (arena != NULL) { - tcache_t *tcache; - - /* - * tcache_stats_merge() locks bins, so if any code is - * introduced that acquires both arena and bin locks in - * the opposite order, deadlocks may result. - */ - malloc_mutex_lock(&arena->lock); - ql_foreach(tcache, &arena->tcache_ql, link) { - tcache_stats_merge(tcache, arena); - } - malloc_mutex_unlock(&arena->lock); - } - } -#endif - JEMALLOC_P(malloc_stats_print)(NULL, NULL, NULL); -} - -/* - * End miscellaneous support functions. - */ -/******************************************************************************/ -/* - * Begin initialization functions. - */ - -static unsigned -malloc_ncpus(void) -{ - unsigned ret; - long result; - - result = sysconf(_SC_NPROCESSORS_ONLN); - if (result == -1) { - /* Error. */ - ret = 1; - } - ret = (unsigned)result; - - return (ret); -} - -/* - * FreeBSD's pthreads implementation calls malloc(3), so the malloc - * implementation has to take pains to avoid infinite recursion during - * initialization. - */ -static inline bool -malloc_init(void) -{ - - if (malloc_initialized == false) - return (malloc_init_hard()); - - return (false); -} - -static bool -malloc_init_hard(void) -{ - unsigned i; - int linklen; - char buf[PATH_MAX + 1]; - const char *opts; - arena_t *init_arenas[1]; - - malloc_mutex_lock(&init_lock); - if (malloc_initialized || malloc_initializer == pthread_self()) { - /* - * Another thread initialized the allocator before this one - * acquired init_lock, or this thread is the initializing - * thread, and it is recursively allocating. - */ - malloc_mutex_unlock(&init_lock); - return (false); - } - if (malloc_initializer != (unsigned long)0) { - /* Busy-wait until the initializing thread completes. */ - do { - malloc_mutex_unlock(&init_lock); - CPU_SPINWAIT; - malloc_mutex_lock(&init_lock); - } while (malloc_initialized == false); - return (false); - } - -#ifdef DYNAMIC_PAGE_SHIFT - /* Get page size. */ - { - long result; - - result = sysconf(_SC_PAGESIZE); - assert(result != -1); - pagesize = (unsigned)result; - - /* - * We assume that pagesize is a power of 2 when calculating - * pagesize_mask and lg_pagesize. - */ - assert(((result - 1) & result) == 0); - pagesize_mask = result - 1; - lg_pagesize = ffs((int)result) - 1; - } -#endif - - for (i = 0; i < 3; i++) { - unsigned j; - - /* Get runtime configuration. */ - switch (i) { - case 0: - if ((linklen = readlink("/etc/jemalloc.conf", buf, - sizeof(buf) - 1)) != -1) { - /* - * Use the contents of the "/etc/jemalloc.conf" - * symbolic link's name. - */ - buf[linklen] = '\0'; - opts = buf; - } else { - /* No configuration specified. */ - buf[0] = '\0'; - opts = buf; - } - break; - case 1: - if ((opts = getenv("JEMALLOC_OPTIONS")) != NULL) { - /* - * Do nothing; opts is already initialized to - * the value of the JEMALLOC_OPTIONS - * environment variable. - */ - } else { - /* No configuration specified. */ - buf[0] = '\0'; - opts = buf; - } - break; - case 2: - if (JEMALLOC_P(malloc_options) != NULL) { - /* - * Use options that were compiled into the - * program. - */ - opts = JEMALLOC_P(malloc_options); - } else { - /* No configuration specified. */ - buf[0] = '\0'; - opts = buf; - } - break; - default: - /* NOTREACHED */ - assert(false); - buf[0] = '\0'; - opts = buf; - } - - for (j = 0; opts[j] != '\0'; j++) { - unsigned k, nreps; - bool nseen; - - /* Parse repetition count, if any. */ - for (nreps = 0, nseen = false;; j++, nseen = true) { - switch (opts[j]) { - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': - case '8': case '9': - nreps *= 10; - nreps += opts[j] - '0'; - break; - default: - goto MALLOC_OUT; - } - } -MALLOC_OUT: - if (nseen == false) - nreps = 1; - - for (k = 0; k < nreps; k++) { - switch (opts[j]) { - case 'a': - opt_abort = false; - break; - case 'A': - opt_abort = true; - break; -#ifdef JEMALLOC_PROF - case 'b': - if (opt_lg_prof_bt_max > 0) - opt_lg_prof_bt_max--; - break; - case 'B': - if (opt_lg_prof_bt_max < LG_PROF_BT_MAX) - opt_lg_prof_bt_max++; - break; -#endif - case 'c': - if (opt_lg_cspace_max - 1 > - opt_lg_qspace_max && - opt_lg_cspace_max > - LG_CACHELINE) - opt_lg_cspace_max--; - break; - case 'C': - if (opt_lg_cspace_max < PAGE_SHIFT - - 1) - opt_lg_cspace_max++; - break; - case 'd': - if (opt_lg_dirty_mult + 1 < - (sizeof(size_t) << 3)) - opt_lg_dirty_mult++; - break; - case 'D': - if (opt_lg_dirty_mult >= 0) - opt_lg_dirty_mult--; - break; -#ifdef JEMALLOC_PROF - case 'e': - opt_prof_active = false; - break; - case 'E': - opt_prof_active = true; - break; - case 'f': - opt_prof = false; - break; - case 'F': - opt_prof = true; - break; -#endif -#ifdef JEMALLOC_TCACHE - case 'g': - if (opt_lg_tcache_gc_sweep >= 0) - opt_lg_tcache_gc_sweep--; - break; - case 'G': - if (opt_lg_tcache_gc_sweep + 1 < - (sizeof(size_t) << 3)) - opt_lg_tcache_gc_sweep++; - break; - case 'h': - opt_tcache = false; - break; - case 'H': - opt_tcache = true; - break; -#endif -#ifdef JEMALLOC_PROF - case 'i': - if (opt_lg_prof_interval >= 0) - opt_lg_prof_interval--; - break; - case 'I': - if (opt_lg_prof_interval + 1 < - (sizeof(uint64_t) << 3)) - opt_lg_prof_interval++; - break; -#endif -#ifdef JEMALLOC_FILL - case 'j': - opt_junk = false; - break; - case 'J': - opt_junk = true; - break; -#endif - case 'k': - /* - * Chunks always require at least one - * header page, plus one data page. - */ - if ((1U << (opt_lg_chunk - 1)) >= - (2U << PAGE_SHIFT)) - opt_lg_chunk--; - break; - case 'K': - if (opt_lg_chunk + 1 < - (sizeof(size_t) << 3)) - opt_lg_chunk++; - break; -#ifdef JEMALLOC_PROF - case 'l': - opt_prof_leak = false; - break; - case 'L': - opt_prof_leak = true; - break; -#endif -#ifdef JEMALLOC_TCACHE - case 'm': - if (opt_lg_tcache_maxclass >= 0) - opt_lg_tcache_maxclass--; - break; - case 'M': - if (opt_lg_tcache_maxclass + 1 < - (sizeof(size_t) << 3)) - opt_lg_tcache_maxclass++; - break; -#endif - case 'n': - opt_narenas_lshift--; - break; - case 'N': - opt_narenas_lshift++; - break; -#ifdef JEMALLOC_SWAP - case 'o': - opt_overcommit = false; - break; - case 'O': - opt_overcommit = true; - break; -#endif - case 'p': - opt_stats_print = false; - break; - case 'P': - opt_stats_print = true; - break; - case 'q': - if (opt_lg_qspace_max > LG_QUANTUM) - opt_lg_qspace_max--; - break; - case 'Q': - if (opt_lg_qspace_max + 1 < - opt_lg_cspace_max) - opt_lg_qspace_max++; - break; -#ifdef JEMALLOC_PROF - case 's': - if (opt_lg_prof_sample > 0) - opt_lg_prof_sample--; - break; - case 'S': - if (opt_lg_prof_sample + 1 < - (sizeof(uint64_t) << 3)) - opt_lg_prof_sample++; - break; - case 'u': - opt_prof_udump = false; - break; - case 'U': - opt_prof_udump = true; - break; -#endif -#ifdef JEMALLOC_SYSV - case 'v': - opt_sysv = false; - break; - case 'V': - opt_sysv = true; - break; -#endif -#ifdef JEMALLOC_XMALLOC - case 'x': - opt_xmalloc = false; - break; - case 'X': - opt_xmalloc = true; - break; -#endif -#ifdef JEMALLOC_FILL - case 'z': - opt_zero = false; - break; - case 'Z': - opt_zero = true; - break; -#endif - default: { - char cbuf[2]; - - cbuf[0] = opts[j]; - cbuf[1] = '\0'; - malloc_write( - "<jemalloc>: Unsupported character " - "in malloc options: '"); - malloc_write(cbuf); - malloc_write("'\n"); - } - } - } - } - } - - /* Register fork handlers. */ - if (pthread_atfork(jemalloc_prefork, jemalloc_postfork, - jemalloc_postfork) != 0) { - malloc_write("<jemalloc>: Error in pthread_atfork()\n"); - if (opt_abort) - abort(); - } - - if (ctl_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - - if (opt_stats_print) { - /* Print statistics at exit. */ - if (atexit(stats_print_atexit) != 0) { - malloc_write("<jemalloc>: Error in atexit()\n"); - if (opt_abort) - abort(); - } - } - - if (chunk_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - - if (base_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - -#ifdef JEMALLOC_PROF - prof_boot0(); -#endif - - if (arena_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - -#ifdef JEMALLOC_TCACHE - tcache_boot(); -#endif - - if (huge_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - - /* - * Create enough scaffolding to allow recursive allocation in - * malloc_ncpus(). - */ - narenas = 1; - arenas = init_arenas; - memset(arenas, 0, sizeof(arena_t *) * narenas); - - /* - * Initialize one arena here. The rest are lazily created in - * choose_arena_hard(). - */ - arenas_extend(0); - if (arenas[0] == NULL) { - malloc_mutex_unlock(&init_lock); - return (true); - } - -#ifndef NO_TLS - /* - * Assign the initial arena to the initial thread, in order to avoid - * spurious creation of an extra arena if the application switches to - * threaded mode. - */ - arenas_map = arenas[0]; -#endif - - malloc_mutex_init(&arenas_lock); - -#ifdef JEMALLOC_PROF - if (prof_boot1()) { - malloc_mutex_unlock(&init_lock); - return (true); - } -#endif - - /* Get number of CPUs. */ - malloc_initializer = pthread_self(); - malloc_mutex_unlock(&init_lock); - ncpus = malloc_ncpus(); - malloc_mutex_lock(&init_lock); - - if (ncpus > 1) { - /* - * For SMP systems, create more than one arena per CPU by - * default. - */ - opt_narenas_lshift += 2; - } - - /* Determine how many arenas to use. */ - narenas = ncpus; - if (opt_narenas_lshift > 0) { - if ((narenas << opt_narenas_lshift) > narenas) - narenas <<= opt_narenas_lshift; - /* - * Make sure not to exceed the limits of what base_alloc() can - * handle. - */ - if (narenas * sizeof(arena_t *) > chunksize) - narenas = chunksize / sizeof(arena_t *); - } else if (opt_narenas_lshift < 0) { - if ((narenas >> -opt_narenas_lshift) < narenas) - narenas >>= -opt_narenas_lshift; - /* Make sure there is at least one arena. */ - if (narenas == 0) - narenas = 1; - } - -#ifdef NO_TLS - if (narenas > 1) { - static const unsigned primes[] = {1, 3, 5, 7, 11, 13, 17, 19, - 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, - 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, - 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, - 223, 227, 229, 233, 239, 241, 251, 257, 263}; - unsigned nprimes, parenas; - - /* - * Pick a prime number of hash arenas that is more than narenas - * so that direct hashing of pthread_self() pointers tends to - * spread allocations evenly among the arenas. - */ - assert((narenas & 1) == 0); /* narenas must be even. */ - nprimes = (sizeof(primes) >> LG_SIZEOF_INT); - parenas = primes[nprimes - 1]; /* In case not enough primes. */ - for (i = 1; i < nprimes; i++) { - if (primes[i] > narenas) { - parenas = primes[i]; - break; - } - } - narenas = parenas; - } -#endif - -#ifndef NO_TLS - next_arena = 0; -#endif - - /* Allocate and initialize arenas. */ - arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas); - if (arenas == NULL) { - malloc_mutex_unlock(&init_lock); - return (true); - } - /* - * Zero the array. In practice, this should always be pre-zeroed, - * since it was just mmap()ed, but let's be sure. - */ - memset(arenas, 0, sizeof(arena_t *) * narenas); - /* Copy the pointer to the one arena that was already initialized. */ - arenas[0] = init_arenas[0]; - - malloc_initialized = true; - malloc_mutex_unlock(&init_lock); - return (false); -} - -/* - * End initialization functions. - */ -/******************************************************************************/ -/* - * Begin malloc(3)-compatible functions. - */ - -JEMALLOC_ATTR(malloc) -JEMALLOC_ATTR(visibility("default")) -void * -JEMALLOC_P(malloc)(size_t size) -{ - void *ret; -#ifdef JEMALLOC_PROF - prof_thr_cnt_t *cnt; -#endif - - if (malloc_init()) { - ret = NULL; - goto OOM; - } - - if (size == 0) { -#ifdef JEMALLOC_SYSV - if (opt_sysv == false) -#endif - size = 1; -#ifdef JEMALLOC_SYSV - else { -# ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { - malloc_write("<jemalloc>: Error in malloc(): " - "invalid size 0\n"); - abort(); - } -# endif - ret = NULL; - goto RETURN; - } -#endif - } - -#ifdef JEMALLOC_PROF - if (opt_prof) { - if ((cnt = prof_alloc_prep(size)) == NULL) { - ret = NULL; - goto OOM; - } - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && size <= - small_maxclass) { - ret = imalloc(small_maxclass+1); - if (ret != NULL) - arena_prof_promoted(ret, size); - } else - ret = imalloc(size); - } else -#endif - ret = imalloc(size); - -OOM: - if (ret == NULL) { -#ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { - malloc_write("<jemalloc>: Error in malloc(): " - "out of memory\n"); - abort(); - } -#endif - errno = ENOMEM; - } - -#ifdef JEMALLOC_SYSV -RETURN: -#endif -#ifdef JEMALLOC_PROF - if (opt_prof && ret != NULL) - prof_malloc(ret, cnt); -#endif - return (ret); -} - -JEMALLOC_ATTR(nonnull(1)) -JEMALLOC_ATTR(visibility("default")) -int -JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) -{ - int ret; - void *result; -#ifdef JEMALLOC_PROF - prof_thr_cnt_t *cnt; -#endif - - if (malloc_init()) - result = NULL; - else { - if (size == 0) { -#ifdef JEMALLOC_SYSV - if (opt_sysv == false) -#endif - size = 1; -#ifdef JEMALLOC_SYSV - else { -# ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { - malloc_write("<jemalloc>: Error in " - "posix_memalign(): invalid size " - "0\n"); - abort(); - } -# endif - result = NULL; - *memptr = NULL; - ret = 0; - goto RETURN; - } -#endif - } - - /* Make sure that alignment is a large enough power of 2. */ - if (((alignment - 1) & alignment) != 0 - || alignment < sizeof(void *)) { -#ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { - malloc_write("<jemalloc>: Error in " - "posix_memalign(): invalid alignment\n"); - abort(); - } -#endif - result = NULL; - ret = EINVAL; - goto RETURN; - } - -#ifdef JEMALLOC_PROF - if (opt_prof) { - if ((cnt = prof_alloc_prep(size)) == NULL) { - result = NULL; - ret = EINVAL; - } else { - if (prof_promote && (uintptr_t)cnt != - (uintptr_t)1U && size <= small_maxclass) { - result = ipalloc(alignment, - small_maxclass+1); - if (result != NULL) { - arena_prof_promoted(result, - size); - } - } else - result = ipalloc(alignment, size); - } - } else -#endif - result = ipalloc(alignment, size); - } - - if (result == NULL) { -#ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { - malloc_write("<jemalloc>: Error in posix_memalign(): " - "out of memory\n"); - abort(); - } -#endif - ret = ENOMEM; - goto RETURN; - } - - *memptr = result; - ret = 0; - -RETURN: -#ifdef JEMALLOC_PROF - if (opt_prof && result != NULL) - prof_malloc(result, cnt); -#endif - return (ret); -} - -JEMALLOC_ATTR(malloc) -JEMALLOC_ATTR(visibility("default")) -void * -JEMALLOC_P(calloc)(size_t num, size_t size) -{ - void *ret; - size_t num_size; -#ifdef JEMALLOC_PROF - prof_thr_cnt_t *cnt; -#endif - - if (malloc_init()) { - num_size = 0; - ret = NULL; - goto RETURN; - } - - num_size = num * size; - if (num_size == 0) { -#ifdef JEMALLOC_SYSV - if ((opt_sysv == false) && ((num == 0) || (size == 0))) -#endif - num_size = 1; -#ifdef JEMALLOC_SYSV - else { - ret = NULL; - goto RETURN; - } -#endif - /* - * Try to avoid division here. We know that it isn't possible to - * overflow during multiplication if neither operand uses any of the - * most significant half of the bits in a size_t. - */ - } else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2))) - && (num_size / size != num)) { - /* size_t overflow. */ - ret = NULL; - goto RETURN; - } - -#ifdef JEMALLOC_PROF - if (opt_prof) { - if ((cnt = prof_alloc_prep(num_size)) == NULL) { - ret = NULL; - goto RETURN; - } - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && num_size - <= small_maxclass) { - ret = icalloc(small_maxclass+1); - if (ret != NULL) - arena_prof_promoted(ret, num_size); - } else - ret = icalloc(num_size); - } else -#endif - ret = icalloc(num_size); - -RETURN: - if (ret == NULL) { -#ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { - malloc_write("<jemalloc>: Error in calloc(): out of " - "memory\n"); - abort(); - } -#endif - errno = ENOMEM; - } - -#ifdef JEMALLOC_PROF - if (opt_prof && ret != NULL) - prof_malloc(ret, cnt); -#endif - return (ret); -} - -JEMALLOC_ATTR(visibility("default")) -void * -JEMALLOC_P(realloc)(void *ptr, size_t size) -{ - void *ret; -#ifdef JEMALLOC_PROF - size_t old_size; - prof_thr_cnt_t *cnt, *old_cnt; -#endif - - if (size == 0) { -#ifdef JEMALLOC_SYSV - if (opt_sysv == false) -#endif - size = 1; -#ifdef JEMALLOC_SYSV - else { - if (ptr != NULL) { -#ifdef JEMALLOC_PROF - if (opt_prof) { - old_size = isalloc(ptr); - old_cnt = prof_cnt_get(ptr); - cnt = NULL; - } -#endif - idalloc(ptr); - } -#ifdef JEMALLOC_PROF - else if (opt_prof) { - old_size = 0; - old_cnt = NULL; - cnt = NULL; - } -#endif - ret = NULL; - goto RETURN; - } -#endif - } - - if (ptr != NULL) { - assert(malloc_initialized || malloc_initializer == - pthread_self()); - -#ifdef JEMALLOC_PROF - if (opt_prof) { - old_size = isalloc(ptr); - old_cnt = prof_cnt_get(ptr); - if ((cnt = prof_alloc_prep(size)) == NULL) { - ret = NULL; - goto OOM; - } - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && - size <= small_maxclass) { - ret = iralloc(ptr, small_maxclass+1); - if (ret != NULL) - arena_prof_promoted(ret, size); - } else - ret = iralloc(ptr, size); - } else -#endif - ret = iralloc(ptr, size); - -#ifdef JEMALLOC_PROF -OOM: -#endif - if (ret == NULL) { -#ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { - malloc_write("<jemalloc>: Error in realloc(): " - "out of memory\n"); - abort(); - } -#endif - errno = ENOMEM; - } - } else { -#ifdef JEMALLOC_PROF - if (opt_prof) { - old_size = 0; - old_cnt = NULL; - } -#endif - if (malloc_init()) { -#ifdef JEMALLOC_PROF - if (opt_prof) - cnt = NULL; -#endif - ret = NULL; - } else { -#ifdef JEMALLOC_PROF - if (opt_prof) { - if ((cnt = prof_alloc_prep(size)) == NULL) - ret = NULL; - else { - if (prof_promote && (uintptr_t)cnt != - (uintptr_t)1U && size <= - small_maxclass) { - ret = imalloc(small_maxclass+1); - if (ret != NULL) { - arena_prof_promoted(ret, - size); - } - } else - ret = imalloc(size); - } - } else -#endif - ret = imalloc(size); - } - - if (ret == NULL) { -#ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { - malloc_write("<jemalloc>: Error in realloc(): " - "out of memory\n"); - abort(); - } -#endif - errno = ENOMEM; - } - } - -#ifdef JEMALLOC_SYSV -RETURN: -#endif -#ifdef JEMALLOC_PROF - if (opt_prof) - prof_realloc(ret, cnt, ptr, old_size, old_cnt); -#endif - return (ret); -} - -JEMALLOC_ATTR(visibility("default")) -void -JEMALLOC_P(free)(void *ptr) -{ - - if (ptr != NULL) { - assert(malloc_initialized || malloc_initializer == - pthread_self()); - -#ifdef JEMALLOC_PROF - if (opt_prof) - prof_free(ptr); -#endif - idalloc(ptr); - } -} - -/* - * End malloc(3)-compatible functions. - */ -/******************************************************************************/ -/* - * Begin non-standard functions. - */ - -JEMALLOC_ATTR(visibility("default")) -size_t -JEMALLOC_P(malloc_usable_size)(const void *ptr) -{ - size_t ret; - - assert(ptr != NULL); - ret = isalloc(ptr); - - return (ret); -} - -#ifdef JEMALLOC_SWAP -JEMALLOC_ATTR(visibility("default")) -int -JEMALLOC_P(malloc_swap_enable)(const int *fds, unsigned nfds, int prezeroed) -{ - - /* - * Make sure malloc is initialized, because we need page size, chunk - * size, etc. - */ - if (malloc_init()) - return (-1); - - return (chunk_swap_enable(fds, nfds, (prezeroed != 0)) ? -1 : 0); -} -#endif - -JEMALLOC_ATTR(visibility("default")) -void -JEMALLOC_P(malloc_stats_print)(void (*write_cb)(void *, const char *), - void *cbopaque, const char *opts) -{ - - stats_print(write_cb, cbopaque, opts); -} - -JEMALLOC_ATTR(visibility("default")) -int -JEMALLOC_P(mallctl)(const char *name, void *oldp, size_t *oldlenp, void *newp, - size_t newlen) -{ - - if (malloc_init()) - return (EAGAIN); - - return (ctl_byname(name, oldp, oldlenp, newp, newlen)); -} - -JEMALLOC_ATTR(visibility("default")) -int -JEMALLOC_P(mallctlnametomib)(const char *name, size_t *mibp, size_t *miblenp) -{ - - if (malloc_init()) - return (EAGAIN); - - return (ctl_nametomib(name, mibp, miblenp)); -} - -JEMALLOC_ATTR(visibility("default")) -int -JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) -{ - - if (malloc_init()) - return (EAGAIN); - - return (ctl_bymib(mib, miblen, oldp, oldlenp, newp, newlen)); -} - -/* - * End non-standard functions. - */ -/******************************************************************************/ - -/* - * The following functions are used by threading libraries for protection of - * malloc during fork(). These functions are only called if the program is - * running in threaded mode, so there is no need to check whether the program - * is threaded here. - */ - -static void -jemalloc_prefork(void) -{ - unsigned i; - - /* Acquire all mutexes in a safe order. */ - - malloc_mutex_lock(&arenas_lock); - for (i = 0; i < narenas; i++) { - if (arenas[i] != NULL) - malloc_mutex_lock(&arenas[i]->lock); - } - - malloc_mutex_lock(&base_mtx); - - malloc_mutex_lock(&huge_mtx); - -#ifdef JEMALLOC_DSS - malloc_mutex_lock(&dss_mtx); -#endif - -#ifdef JEMALLOC_SWAP - malloc_mutex_lock(&swap_mtx); -#endif -} - -static void -jemalloc_postfork(void) -{ - unsigned i; - - /* Release all mutexes, now that fork() has completed. */ - -#ifdef JEMALLOC_SWAP - malloc_mutex_unlock(&swap_mtx); -#endif - -#ifdef JEMALLOC_DSS - malloc_mutex_unlock(&dss_mtx); -#endif - - malloc_mutex_unlock(&huge_mtx); - - malloc_mutex_unlock(&base_mtx); - - for (i = 0; i < narenas; i++) { - if (arenas[i] != NULL) - malloc_mutex_unlock(&arenas[i]->lock); - } - malloc_mutex_unlock(&arenas_lock); -} diff --git a/dep/jemalloc/jemalloc/internal/jemalloc_internal.h.in b/dep/jemalloc/jemalloc/internal/jemalloc_internal.h.in deleted file mode 100644 index 2c3f32f126d..00000000000 --- a/dep/jemalloc/jemalloc/internal/jemalloc_internal.h.in +++ /dev/null @@ -1,561 +0,0 @@ -#include <sys/mman.h> -#include <sys/param.h> -#include <sys/time.h> -#include <sys/types.h> -#include <sys/sysctl.h> -#include <sys/uio.h> - -#include <errno.h> -#include <limits.h> -#ifndef SIZE_T_MAX -# define SIZE_T_MAX SIZE_MAX -#endif -#include <pthread.h> -#include <sched.h> -#include <stdarg.h> -#include <stdbool.h> -#include <stdio.h> -#include <stdlib.h> -#include <stdint.h> -#include <inttypes.h> -#include <string.h> -#include <strings.h> -#include <unistd.h> -#include <fcntl.h> -#include <pthread.h> - -#define JEMALLOC_MANGLE -#include "../jemalloc@install_suffix@.h" - -#ifdef JEMALLOC_LAZY_LOCK -#include <dlfcn.h> -#endif - -#define RB_COMPACT -#include "jemalloc/internal/rb.h" -#include "jemalloc/internal/qr.h" -#include "jemalloc/internal/ql.h" - -extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); - -/* - * Define a custom assert() in order to reduce the chances of deadlock during - * assertion failure. - */ -#ifdef JEMALLOC_DEBUG -# define assert(e) do { \ - if (!(e)) { \ - char line_buf[UMAX2S_BUFSIZE]; \ - malloc_write("<jemalloc>: "); \ - malloc_write(__FILE__); \ - malloc_write(":"); \ - malloc_write(umax2s(__LINE__, 10, line_buf)); \ - malloc_write(": Failed assertion: "); \ - malloc_write("\""); \ - malloc_write(#e); \ - malloc_write("\"\n"); \ - abort(); \ - } \ -} while (0) -#else -#define assert(e) -#endif - -/* - * jemalloc can conceptually be broken into components (arena, tcache, etc.), - * but there are circular dependencies that cannot be broken without - * substantial performance degradation. In order to reduce the effect on - * visual code flow, read the header files in multiple passes, with one of the - * following cpp variables defined during each pass: - * - * JEMALLOC_H_TYPES : Preprocessor-defined constants and psuedo-opaque data - * types. - * JEMALLOC_H_STRUCTS : Data structures. - * JEMALLOC_H_EXTERNS : Extern data declarations and function prototypes. - * JEMALLOC_H_INLINES : Inline functions. - */ -/******************************************************************************/ -#define JEMALLOC_H_TYPES - -#define ZU(z) ((size_t)z) - -#ifndef __DECONST -# define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) -#endif - -#ifdef JEMALLOC_DEBUG - /* Disable inlining to make debugging easier. */ -# define JEMALLOC_INLINE -# define inline -#else -# define JEMALLOC_ENABLE_INLINE -# define JEMALLOC_INLINE static inline -#endif - -/* Size of stack-allocated buffer passed to strerror_r(). */ -#define STRERROR_BUF 64 - -/* Minimum alignment of allocations is 2^LG_QUANTUM bytes. */ -#ifdef __i386__ -# define LG_QUANTUM 4 -#endif -#ifdef __ia64__ -# define LG_QUANTUM 4 -#endif -#ifdef __alpha__ -# define LG_QUANTUM 4 -#endif -#ifdef __sparc64__ -# define LG_QUANTUM 4 -#endif -#if (defined(__amd64__) || defined(__x86_64__)) -# define LG_QUANTUM 4 -#endif -#ifdef __arm__ -# define LG_QUANTUM 3 -#endif -#ifdef __mips__ -# define LG_QUANTUM 3 -#endif -#ifdef __powerpc__ -# define LG_QUANTUM 4 -#endif -#ifdef __s390x__ -# define LG_QUANTUM 4 -#endif - -#define QUANTUM ((size_t)(1U << LG_QUANTUM)) -#define QUANTUM_MASK (QUANTUM - 1) - -/* Return the smallest quantum multiple that is >= a. */ -#define QUANTUM_CEILING(a) \ - (((a) + QUANTUM_MASK) & ~QUANTUM_MASK) - -#define SIZEOF_PTR (1U << LG_SIZEOF_PTR) - -/* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */ -#if (!defined(PIC) && !defined(NO_TLS)) -# define NO_TLS -#endif - -/* - * Maximum size of L1 cache line. This is used to avoid cache line aliasing. - * In addition, this controls the spacing of cacheline-spaced size classes. - */ -#define LG_CACHELINE 6 -#define CACHELINE ((size_t)(1U << LG_CACHELINE)) -#define CACHELINE_MASK (CACHELINE - 1) - -/* Return the smallest cacheline multiple that is >= s. */ -#define CACHELINE_CEILING(s) \ - (((s) + CACHELINE_MASK) & ~CACHELINE_MASK) - -/* - * Page size. STATIC_PAGE_SHIFT is determined by the configure script. If - * DYNAMIC_PAGE_SHIFT is enabled, only use the STATIC_PAGE_* macros where - * compile-time values are required for the purposes of defining data - * structures. - */ -#define STATIC_PAGE_SIZE ((size_t)(1U << STATIC_PAGE_SHIFT)) -#define STATIC_PAGE_MASK ((size_t)(STATIC_PAGE_SIZE - 1)) - -#ifdef DYNAMIC_PAGE_SHIFT -# define PAGE_SHIFT lg_pagesize -# define PAGE_SIZE pagesize -# define PAGE_MASK pagesize_mask -#else -# define PAGE_SHIFT STATIC_PAGE_SHIFT -# define PAGE_SIZE STATIC_PAGE_SIZE -# define PAGE_MASK STATIC_PAGE_MASK -#endif - -/* Return the smallest pagesize multiple that is >= s. */ -#define PAGE_CEILING(s) \ - (((s) + PAGE_MASK) & ~PAGE_MASK) - -#include "jemalloc/internal/prn.h" -#include "jemalloc/internal/ckh.h" -#include "jemalloc/internal/stats.h" -#include "jemalloc/internal/ctl.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/mb.h" -#include "jemalloc/internal/extent.h" -#include "jemalloc/internal/arena.h" -#include "jemalloc/internal/base.h" -#include "jemalloc/internal/chunk.h" -#include "jemalloc/internal/huge.h" -#include "jemalloc/internal/tcache.h" -#include "jemalloc/internal/hash.h" -#include "jemalloc/internal/prof.h" - -#undef JEMALLOC_H_TYPES -/******************************************************************************/ -#define JEMALLOC_H_STRUCTS - -#include "jemalloc/internal/prn.h" -#include "jemalloc/internal/ckh.h" -#include "jemalloc/internal/stats.h" -#include "jemalloc/internal/ctl.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/mb.h" -#include "jemalloc/internal/extent.h" -#include "jemalloc/internal/arena.h" -#include "jemalloc/internal/base.h" -#include "jemalloc/internal/chunk.h" -#include "jemalloc/internal/huge.h" -#include "jemalloc/internal/tcache.h" -#include "jemalloc/internal/hash.h" -#include "jemalloc/internal/prof.h" - -#undef JEMALLOC_H_STRUCTS -/******************************************************************************/ -#define JEMALLOC_H_EXTERNS - -extern bool opt_abort; -#ifdef JEMALLOC_FILL -extern bool opt_junk; -#endif -#ifdef JEMALLOC_SYSV -extern bool opt_sysv; -#endif -#ifdef JEMALLOC_XMALLOC -extern bool opt_xmalloc; -#endif -#ifdef JEMALLOC_FILL -extern bool opt_zero; -#endif - -#ifdef DYNAMIC_PAGE_SHIFT -extern size_t pagesize; -extern size_t pagesize_mask; -extern size_t lg_pagesize; -#endif - -/* Number of CPUs. */ -extern unsigned ncpus; - -extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */ -#ifndef NO_TLS -/* - * Map of pthread_self() --> arenas[???], used for selecting an arena to use - * for allocations. - */ -extern __thread arena_t *arenas_map JEMALLOC_ATTR(tls_model("initial-exec")); -#endif -/* - * Arenas that are used to service external requests. Not all elements of the - * arenas array are necessarily used; arenas are created lazily as needed. - */ -extern arena_t **arenas; -extern unsigned narenas; - -arena_t *arenas_extend(unsigned ind); -#ifndef NO_TLS -arena_t *choose_arena_hard(void); -#endif - -#include "jemalloc/internal/prn.h" -#include "jemalloc/internal/ckh.h" -#include "jemalloc/internal/stats.h" -#include "jemalloc/internal/ctl.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/mb.h" -#include "jemalloc/internal/extent.h" -#include "jemalloc/internal/arena.h" -#include "jemalloc/internal/base.h" -#include "jemalloc/internal/chunk.h" -#include "jemalloc/internal/huge.h" -#include "jemalloc/internal/tcache.h" -#include "jemalloc/internal/hash.h" -#include "jemalloc/internal/prof.h" - -#undef JEMALLOC_H_EXTERNS -/******************************************************************************/ -#define JEMALLOC_H_INLINES - -#include "jemalloc/internal/prn.h" -#include "jemalloc/internal/ckh.h" -#include "jemalloc/internal/stats.h" -#include "jemalloc/internal/ctl.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/mb.h" -#include "jemalloc/internal/extent.h" -#include "jemalloc/internal/base.h" -#include "jemalloc/internal/chunk.h" -#include "jemalloc/internal/huge.h" - -#ifndef JEMALLOC_ENABLE_INLINE -void malloc_write(const char *s); -arena_t *choose_arena(void); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) -/* - * Wrapper around malloc_message() that avoids the need for - * JEMALLOC_P(malloc_message)(...) throughout the code. - */ -JEMALLOC_INLINE void -malloc_write(const char *s) -{ - - JEMALLOC_P(malloc_message)(NULL, s); -} - -/* - * Choose an arena based on a per-thread value (fast-path code, calls slow-path - * code if necessary). - */ -JEMALLOC_INLINE arena_t * -choose_arena(void) -{ - arena_t *ret; - - /* - * We can only use TLS if this is a PIC library, since for the static - * library version, libc's malloc is used by TLS allocation, which - * introduces a bootstrapping issue. - */ -#ifndef NO_TLS - ret = arenas_map; - if (ret == NULL) { - ret = choose_arena_hard(); - assert(ret != NULL); - } -#else - if (isthreaded && narenas > 1) { - unsigned long ind; - - /* - * Hash pthread_self() to one of the arenas. There is a prime - * number of arenas, so this has a reasonable chance of - * working. Even so, the hashing can be easily thwarted by - * inconvenient pthread_self() values. Without specific - * knowledge of how pthread_self() calculates values, we can't - * easily do much better than this. - */ - ind = (unsigned long) pthread_self() % narenas; - - /* - * Optimistially assume that arenas[ind] has been initialized. - * At worst, we find out that some other thread has already - * done so, after acquiring the lock in preparation. Note that - * this lazy locking also has the effect of lazily forcing - * cache coherency; without the lock acquisition, there's no - * guarantee that modification of arenas[ind] by another thread - * would be seen on this CPU for an arbitrary amount of time. - * - * In general, this approach to modifying a synchronized value - * isn't a good idea, but in this case we only ever modify the - * value once, so things work out well. - */ - ret = arenas[ind]; - if (ret == NULL) { - /* - * Avoid races with another thread that may have already - * initialized arenas[ind]. - */ - malloc_mutex_lock(&arenas_lock); - if (arenas[ind] == NULL) - ret = arenas_extend((unsigned)ind); - else - ret = arenas[ind]; - malloc_mutex_unlock(&arenas_lock); - } - } else - ret = arenas[0]; -#endif - - assert(ret != NULL); - return (ret); -} -#endif - -#include "jemalloc/internal/tcache.h" -#include "jemalloc/internal/arena.h" -#include "jemalloc/internal/hash.h" -#include "jemalloc/internal/prof.h" - -#ifndef JEMALLOC_ENABLE_INLINE -void *imalloc(size_t size); -void *icalloc(size_t size); -void *ipalloc(size_t alignment, size_t size); -size_t isalloc(const void *ptr); -void *iralloc(void *ptr, size_t size); -void idalloc(void *ptr); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) -JEMALLOC_INLINE void * -imalloc(size_t size) -{ - - assert(size != 0); - - if (size <= arena_maxclass) - return (arena_malloc(size, false)); - else - return (huge_malloc(size, false)); -} - -JEMALLOC_INLINE void * -icalloc(size_t size) -{ - - if (size <= arena_maxclass) - return (arena_malloc(size, true)); - else - return (huge_malloc(size, true)); -} - -JEMALLOC_INLINE void * -ipalloc(size_t alignment, size_t size) -{ - void *ret; - size_t ceil_size; - - /* - * Round size up to the nearest multiple of alignment. - * - * This done, we can take advantage of the fact that for each small - * size class, every object is aligned at the smallest power of two - * that is non-zero in the base two representation of the size. For - * example: - * - * Size | Base 2 | Minimum alignment - * -----+----------+------------------ - * 96 | 1100000 | 32 - * 144 | 10100000 | 32 - * 192 | 11000000 | 64 - * - * Depending on runtime settings, it is possible that arena_malloc() - * will further round up to a power of two, but that never causes - * correctness issues. - */ - ceil_size = (size + (alignment - 1)) & (-alignment); - /* - * (ceil_size < size) protects against the combination of maximal - * alignment and size greater than maximal alignment. - */ - if (ceil_size < size) { - /* size_t overflow. */ - return (NULL); - } - - if (ceil_size <= PAGE_SIZE || (alignment <= PAGE_SIZE - && ceil_size <= arena_maxclass)) - ret = arena_malloc(ceil_size, false); - else { - size_t run_size; - - /* - * We can't achieve subpage alignment, so round up alignment - * permanently; it makes later calculations simpler. - */ - alignment = PAGE_CEILING(alignment); - ceil_size = PAGE_CEILING(size); - /* - * (ceil_size < size) protects against very large sizes within - * PAGE_SIZE of SIZE_T_MAX. - * - * (ceil_size + alignment < ceil_size) protects against the - * combination of maximal alignment and ceil_size large enough - * to cause overflow. This is similar to the first overflow - * check above, but it needs to be repeated due to the new - * ceil_size value, which may now be *equal* to maximal - * alignment, whereas before we only detected overflow if the - * original size was *greater* than maximal alignment. - */ - if (ceil_size < size || ceil_size + alignment < ceil_size) { - /* size_t overflow. */ - return (NULL); - } - - /* - * Calculate the size of the over-size run that arena_palloc() - * would need to allocate in order to guarantee the alignment. - */ - if (ceil_size >= alignment) - run_size = ceil_size + alignment - PAGE_SIZE; - else { - /* - * It is possible that (alignment << 1) will cause - * overflow, but it doesn't matter because we also - * subtract PAGE_SIZE, which in the case of overflow - * leaves us with a very large run_size. That causes - * the first conditional below to fail, which means - * that the bogus run_size value never gets used for - * anything important. - */ - run_size = (alignment << 1) - PAGE_SIZE; - } - - if (run_size <= arena_maxclass) { - ret = arena_palloc(choose_arena(), alignment, ceil_size, - run_size); - } else if (alignment <= chunksize) - ret = huge_malloc(ceil_size, false); - else - ret = huge_palloc(alignment, ceil_size); - } - - assert(((uintptr_t)ret & (alignment - 1)) == 0); - return (ret); -} - -JEMALLOC_INLINE size_t -isalloc(const void *ptr) -{ - size_t ret; - arena_chunk_t *chunk; - - assert(ptr != NULL); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) { - /* Region. */ - assert(chunk->arena->magic == ARENA_MAGIC); - -#ifdef JEMALLOC_PROF - ret = arena_salloc_demote(ptr); -#else - ret = arena_salloc(ptr); -#endif - } else - ret = huge_salloc(ptr); - - return (ret); -} - -JEMALLOC_INLINE void * -iralloc(void *ptr, size_t size) -{ - size_t oldsize; - - assert(ptr != NULL); - assert(size != 0); - - oldsize = isalloc(ptr); - - if (size <= arena_maxclass) - return (arena_ralloc(ptr, size, oldsize)); - else - return (huge_ralloc(ptr, size, oldsize)); -} - -JEMALLOC_INLINE void -idalloc(void *ptr) -{ - arena_chunk_t *chunk; - - assert(ptr != NULL); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) - arena_dalloc(chunk->arena, chunk, ptr); - else - huge_dalloc(ptr); -} -#endif - -#undef JEMALLOC_H_INLINES -/******************************************************************************/ diff --git a/dep/jemalloc/jemalloc/internal/prof.h b/dep/jemalloc/jemalloc/internal/prof.h deleted file mode 100644 index 6e71552d85e..00000000000 --- a/dep/jemalloc/jemalloc/internal/prof.h +++ /dev/null @@ -1,171 +0,0 @@ -#ifdef JEMALLOC_PROF -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct prof_bt_s prof_bt_t; -typedef struct prof_cnt_s prof_cnt_t; -typedef struct prof_thr_cnt_s prof_thr_cnt_t; -typedef struct prof_ctx_s prof_ctx_t; -typedef struct prof_s prof_t; - -/* Option defaults. */ -#define LG_PROF_BT_MAX_DEFAULT 2 -#define LG_PROF_SAMPLE_DEFAULT 0 -#define LG_PROF_INTERVAL_DEFAULT 30 - -/* - * Hard limit on stack backtrace depth. Note that the version of - * prof_backtrace() that is based on __builtin_return_address() necessarily has - * a hard-coded number of backtrace frame handlers, so increasing - * LG_PROF_BT_MAX requires changing prof_backtrace(). - */ -#define LG_PROF_BT_MAX 7 /* >= LG_PROF_BT_MAX_DEFAULT */ -#define PROF_BT_MAX (1U << LG_PROF_BT_MAX) - -/* Initial hash table size. */ -#define PROF_CKH_MINITEMS 64 - -/* Size of memory buffer to use when writing dump files. */ -#define PROF_DUMP_BUF_SIZE 65536 - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct prof_bt_s { - /* Backtrace, stored as len program counters. */ - void **vec; - unsigned len; -}; - -#ifdef JEMALLOC_PROF_LIBGCC -/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ -typedef struct { - prof_bt_t *bt; - unsigned nignore; - unsigned max; -} prof_unwind_data_t; -#endif - -struct prof_cnt_s { - /* - * Profiling counters. An allocation/deallocation pair can operate on - * different prof_thr_cnt_t objects that are linked into the same - * prof_ctx_t sets_ql, so it is possible for the cur* counters to go - * negative. In principle it is possible for the *bytes counters to - * overflow/underflow, but a general solution would require some form - * of 128-bit counter solution; this implementation doesn't bother to - * solve that problem. - */ - int64_t curobjs; - int64_t curbytes; - uint64_t accumobjs; - uint64_t accumbytes; -}; - -struct prof_thr_cnt_s { - /* Linkage into prof_ctx_t's sets_ql. */ - ql_elm(prof_thr_cnt_t) link; - - /* - * Associated context. If a thread frees an object that it did not - * allocate, it is possible that the context is not cached in the - * thread's hash table, in which case it must be able to look up the - * context, insert a new prof_thr_cnt_t into the thread's hash table, - * and link it into the prof_ctx_t's sets_ql. - */ - prof_ctx_t *ctx; - - /* - * Threads use memory barriers to update the counters. Since there is - * only ever one writer, the only challenge is for the reader to get a - * consistent read of the counters. - * - * The writer uses this series of operations: - * - * 1) Increment epoch to an odd number. - * 2) Update counters. - * 3) Increment epoch to an even number. - * - * The reader must assure 1) that the epoch is even while it reads the - * counters, and 2) that the epoch doesn't change between the time it - * starts and finishes reading the counters. - */ - unsigned epoch; - - /* Profiling counters. */ - prof_cnt_t cnts; -}; - -struct prof_ctx_s { - /* Protects cnt_merged and sets_ql. */ - malloc_mutex_t lock; - - /* Temporary storage for aggregation during dump. */ - prof_cnt_t cnt_dump; - - /* When threads exit, they merge their stats into cnt_merged. */ - prof_cnt_t cnt_merged; - - /* - * List of profile counters, one for each thread that has allocated in - * this context. - */ - ql_head(prof_thr_cnt_t) cnts_ql; -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -extern bool opt_prof; -/* - * Even if opt_prof is true, sampling can be temporarily disabled by setting - * opt_prof_active to false. No locking is used when updating opt_prof_active, - * so there are no guarantees regarding how long it will take for all threads - * to notice state changes. - */ -extern bool opt_prof_active; -extern size_t opt_lg_prof_bt_max; /* Maximum backtrace depth. */ -extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ -extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ -extern bool opt_prof_udump; /* High-water memory dumping. */ -extern bool opt_prof_leak; /* Dump leak summary at exit. */ - -/* - * Profile dump interval, measured in bytes allocated. Each arena triggers a - * profile dump when it reaches this threshold. The effect is that the - * interval between profile dumps averages prof_interval, though the actual - * interval between dumps will tend to be sporadic, and the interval will be a - * maximum of approximately (prof_interval * narenas). - */ -extern uint64_t prof_interval; - -/* - * If true, promote small sampled objects to large objects, since small run - * headers do not have embedded profile context pointers. - */ -extern bool prof_promote; - -bool prof_init(prof_t *prof, bool master); -void prof_destroy(prof_t *prof); - -prof_thr_cnt_t *prof_alloc_prep(size_t size); -prof_thr_cnt_t *prof_cnt_get(const void *ptr); -void prof_malloc(const void *ptr, prof_thr_cnt_t *cnt); -void prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr, - size_t old_size, prof_thr_cnt_t *old_cnt); -void prof_free(const void *ptr); -void prof_idump(void); -bool prof_mdump(const char *filename); -void prof_udump(void); -void prof_boot0(void); -bool prof_boot1(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ -#endif /* JEMALLOC_PROF */ diff --git a/dep/jemalloc/jemalloc/jemalloc.h.in b/dep/jemalloc/jemalloc/jemalloc.h.in deleted file mode 100644 index 8ef8183686e..00000000000 --- a/dep/jemalloc/jemalloc/jemalloc.h.in +++ /dev/null @@ -1,42 +0,0 @@ -#ifndef JEMALLOC_H_ -#define JEMALLOC_H_ -#ifdef __cplusplus -extern "C" { -#endif - -#define JEMALLOC_VERSION "@jemalloc_version@" -#define JEMALLOC_VERSION_MAJOR @jemalloc_version_major@ -#define JEMALLOC_VERSION_MINOR @jemalloc_version_minor@ -#define JEMALLOC_VERSION_BUGFIX @jemalloc_version_bugfix@ -#define JEMALLOC_VERSION_NREV @jemalloc_version_nrev@ -#define JEMALLOC_VERSION_GID "@jemalloc_version_gid@" - -#include "jemalloc_defs@install_suffix@.h" -#ifndef JEMALLOC_P -# define JEMALLOC_P(s) s -#endif - -extern const char *JEMALLOC_P(malloc_options); -extern void (*JEMALLOC_P(malloc_message))(void *, const char *); - -void *JEMALLOC_P(malloc)(size_t size) JEMALLOC_ATTR(malloc); -void *JEMALLOC_P(calloc)(size_t num, size_t size) JEMALLOC_ATTR(malloc); -int JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) - JEMALLOC_ATTR(nonnull(1)); -void *JEMALLOC_P(realloc)(void *ptr, size_t size); -void JEMALLOC_P(free)(void *ptr); - -size_t JEMALLOC_P(malloc_usable_size)(const void *ptr); -void JEMALLOC_P(malloc_stats_print)(void (*write_cb)(void *, const char *), - void *cbopaque, const char *opts); -int JEMALLOC_P(mallctl)(const char *name, void *oldp, size_t *oldlenp, - void *newp, size_t newlen); -int JEMALLOC_P(mallctlnametomib)(const char *name, size_t *mibp, - size_t *miblenp); -int JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen); - -#ifdef __cplusplus -}; -#endif -#endif /* JEMALLOC_H_ */ diff --git a/dep/jemalloc/jemalloc/jemalloc_defs.h.in b/dep/jemalloc/jemalloc/jemalloc_defs.h.in deleted file mode 100644 index 8b98d670acc..00000000000 --- a/dep/jemalloc/jemalloc/jemalloc_defs.h.in +++ /dev/null @@ -1,101 +0,0 @@ -#ifndef JEMALLOC_DEFS_H_ -#define JEMALLOC_DEFS_H_ - -/* - * If JEMALLOC_PREFIX is defined, it will cause all public APIs to be prefixed. - * This makes it possible, with some care, to use multiple allocators - * simultaneously. - * - * In many cases it is more convenient to manually prefix allocator function - * calls than to let macros do it automatically, particularly when using - * multiple allocators simultaneously. Define JEMALLOC_MANGLE before - * #include'ing jemalloc.h in order to cause name mangling that corresponds to - * the API prefixing. - */ -#undef JEMALLOC_PREFIX -#if (defined(JEMALLOC_PREFIX) && defined(JEMALLOC_MANGLE)) -#undef JEMALLOC_P -#endif - -/* - * Hyper-threaded CPUs may need a special instruction inside spin loops in - * order to yield to another virtual CPU. - */ -#undef CPU_SPINWAIT - -/* Defined if __attribute__((...)) syntax is supported. */ -#undef JEMALLOC_HAVE_ATTR -#ifdef JEMALLOC_HAVE_ATTR -# define JEMALLOC_ATTR(s) __attribute__((s)) -#else -# define JEMALLOC_ATTR(s) -#endif - -/* - * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables - * inline functions. - */ -#undef JEMALLOC_DEBUG - -/* JEMALLOC_STATS enables statistics calculation. */ -#undef JEMALLOC_STATS - -/* JEMALLOC_PROF enables allocation profiling. */ -#undef JEMALLOC_PROF - -/* Use libunwind for profile backtracing if defined. */ -#undef JEMALLOC_PROF_LIBUNWIND - -/* Use libgcc for profile backtracing if defined. */ -#undef JEMALLOC_PROF_LIBGCC - -/* - * JEMALLOC_TINY enables support for tiny objects, which are smaller than one - * quantum. - */ -#undef JEMALLOC_TINY - -/* - * JEMALLOC_TCACHE enables a thread-specific caching layer for small objects. - * This makes it possible to allocate/deallocate objects without any locking - * when the cache is in the steady state. - */ -#undef JEMALLOC_TCACHE - -/* - * JEMALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage - * segment (DSS). - */ -#undef JEMALLOC_DSS - -/* JEMALLOC_SWAP enables mmap()ed swap file support. */ -#undef JEMALLOC_SWAP - -/* Support memory filling (junk/zero). */ -#undef JEMALLOC_FILL - -/* Support optional abort() on OOM. */ -#undef JEMALLOC_XMALLOC - -/* Support SYSV semantics. */ -#undef JEMALLOC_SYSV - -/* Support lazy locking (avoid locking unless a second thread is launched). */ -#undef JEMALLOC_LAZY_LOCK - -/* Determine page size at run time if defined. */ -#undef DYNAMIC_PAGE_SHIFT - -/* One page is 2^STATIC_PAGE_SHIFT bytes. */ -#undef STATIC_PAGE_SHIFT - -/* TLS is used to map arenas and magazine caches to threads. */ -#undef NO_TLS - -/* sizeof(void *) == 2^LG_SIZEOF_PTR. */ -#undef LG_SIZEOF_PTR - -/* sizeof(int) == 2^LG_SIZEOF_INT. */ -#undef LG_SIZEOF_INT - -#endif /* JEMALLOC_DEFS_H_ */ diff --git a/dep/jemalloc/arena.c b/dep/jemalloc/src/arena.c index e74b4701907..7f939b3cd77 100644 --- a/dep/jemalloc/arena.c +++ b/dep/jemalloc/src/arena.c @@ -165,7 +165,7 @@ static arena_chunk_t *arena_chunk_alloc(arena_t *arena); static void arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk); static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero); -static void arena_purge(arena_t *arena); +static void arena_purge(arena_t *arena, bool all); static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty); static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, size_t oldsize, size_t newsize); @@ -174,16 +174,18 @@ static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin); static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin); static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size); +static void arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, + arena_bin_t *bin); static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin); +static void arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, + arena_run_t *run, arena_bin_t *bin); static void arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, - void *ptr, size_t size, size_t oldsize); + void *ptr, size_t oldsize, size_t size); static bool arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, - void *ptr, size_t size, size_t oldsize); -static bool arena_ralloc_large(void *ptr, size_t size, size_t oldsize); -#ifdef JEMALLOC_TINY -static size_t pow2_ceil(size_t x); -#endif + void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); +static bool arena_ralloc_large(void *ptr, size_t oldsize, size_t size, + size_t extra, bool zero); static bool small_size2bin_init(void); #ifdef JEMALLOC_DEBUG static void small_size2bin_validate(void); @@ -254,7 +256,6 @@ arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin) run->nfree--; ret = run->avail; if (ret != NULL) { - run->avail = *(void **)ret; /* Double free can cause assertion failure.*/ assert(ret != NULL); /* Write-after free can cause assertion failure. */ @@ -264,6 +265,7 @@ arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin) assert(((uintptr_t)ret - ((uintptr_t)run + (uintptr_t)bin->reg0_offset)) % (uintptr_t)bin->reg_size == 0); + run->avail = *(void **)ret; return (ret); } ret = run->next; @@ -281,12 +283,35 @@ arena_run_reg_dalloc(arena_run_t *run, void *ptr) assert(((uintptr_t)ptr - ((uintptr_t)run + (uintptr_t)run->bin->reg0_offset)) % (uintptr_t)run->bin->reg_size == 0); + /* + * Freeing a pointer lower than region zero can cause assertion + * failure. + */ + assert((uintptr_t)ptr >= (uintptr_t)run + + (uintptr_t)run->bin->reg0_offset); + /* + * Freeing a pointer past in the run's frontier can cause assertion + * failure. + */ + assert((uintptr_t)ptr < (uintptr_t)run->next); *(void **)ptr = run->avail; run->avail = ptr; run->nfree++; } +#ifdef JEMALLOC_DEBUG +static inline void +arena_chunk_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) +{ + size_t i; + size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << PAGE_SHIFT)); + + for (i = 0; i < PAGE_SIZE / sizeof(size_t); i++) + assert(p[i] == 0); +} +#endif + static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, bool zero) @@ -300,39 +325,40 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, old_ndirty = chunk->ndirty; run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT); - flag_dirty = chunk->map[run_ind].bits & CHUNK_MAP_DIRTY; + flag_dirty = chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY; runs_avail = (flag_dirty != 0) ? &arena->runs_avail_dirty : &arena->runs_avail_clean; - total_pages = (chunk->map[run_ind].bits & ~PAGE_MASK) >> + total_pages = (chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) >> PAGE_SHIFT; - assert((chunk->map[run_ind+total_pages-1].bits & CHUNK_MAP_DIRTY) == - flag_dirty); + assert((chunk->map[run_ind+total_pages-1-map_bias].bits & + CHUNK_MAP_DIRTY) == flag_dirty); need_pages = (size >> PAGE_SHIFT); assert(need_pages > 0); assert(need_pages <= total_pages); rem_pages = total_pages - need_pages; - arena_avail_tree_remove(runs_avail, &chunk->map[run_ind]); + arena_avail_tree_remove(runs_avail, &chunk->map[run_ind-map_bias]); arena->nactive += need_pages; /* Keep track of trailing unused pages for later use. */ if (rem_pages > 0) { if (flag_dirty != 0) { - chunk->map[run_ind+need_pages].bits = (rem_pages << - PAGE_SHIFT) | CHUNK_MAP_DIRTY; - chunk->map[run_ind+total_pages-1].bits = (rem_pages << - PAGE_SHIFT) | CHUNK_MAP_DIRTY; + chunk->map[run_ind+need_pages-map_bias].bits = + (rem_pages << PAGE_SHIFT) | CHUNK_MAP_DIRTY; + chunk->map[run_ind+total_pages-1-map_bias].bits = + (rem_pages << PAGE_SHIFT) | CHUNK_MAP_DIRTY; } else { - chunk->map[run_ind+need_pages].bits = (rem_pages << - PAGE_SHIFT) | (chunk->map[run_ind+need_pages].bits & - CHUNK_MAP_ZEROED); - chunk->map[run_ind+total_pages-1].bits = (rem_pages << - PAGE_SHIFT) | - (chunk->map[run_ind+total_pages-1].bits & - CHUNK_MAP_ZEROED); + chunk->map[run_ind+need_pages-map_bias].bits = + (rem_pages << PAGE_SHIFT) | + (chunk->map[run_ind+need_pages-map_bias].bits & + CHUNK_MAP_UNZEROED); + chunk->map[run_ind+total_pages-1-map_bias].bits = + (rem_pages << PAGE_SHIFT) | + (chunk->map[run_ind+total_pages-1-map_bias].bits & + CHUNK_MAP_UNZEROED); } arena_avail_tree_insert(runs_avail, - &chunk->map[run_ind+need_pages]); + &chunk->map[run_ind+need_pages-map_bias]); } /* Update dirty page accounting. */ @@ -353,13 +379,19 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, * zeroed (i.e. never before touched). */ for (i = 0; i < need_pages; i++) { - if ((chunk->map[run_ind + i].bits & - CHUNK_MAP_ZEROED) == 0) { + if ((chunk->map[run_ind+i-map_bias].bits + & CHUNK_MAP_UNZEROED) != 0) { memset((void *)((uintptr_t) - chunk + ((run_ind + i) << + chunk + ((run_ind+i) << PAGE_SHIFT)), 0, PAGE_SIZE); } +#ifdef JEMALLOC_DEBUG + else { + arena_chunk_validate_zeroed( + chunk, run_ind+i); + } +#endif } } else { /* @@ -376,27 +408,54 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, * Set the last element first, in case the run only contains one * page (i.e. both statements set the same element). */ - chunk->map[run_ind+need_pages-1].bits = CHUNK_MAP_LARGE | - CHUNK_MAP_ALLOCATED | flag_dirty; - chunk->map[run_ind].bits = size | CHUNK_MAP_LARGE | -#ifdef JEMALLOC_PROF - CHUNK_MAP_CLASS_MASK | -#endif - CHUNK_MAP_ALLOCATED | flag_dirty; + chunk->map[run_ind+need_pages-1-map_bias].bits = + CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED | flag_dirty; + chunk->map[run_ind-map_bias].bits = size | flag_dirty | + CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; } else { assert(zero == false); /* - * Propagate the dirty flag to the allocated small run, so that - * arena_dalloc_bin_run() has the ability to conditionally trim - * clean pages. + * Propagate the dirty and unzeroed flags to the allocated + * small run, so that arena_dalloc_bin_run() has the ability to + * conditionally trim clean pages. */ - chunk->map[run_ind].bits = CHUNK_MAP_ALLOCATED | flag_dirty; + chunk->map[run_ind-map_bias].bits = + (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED) | + CHUNK_MAP_ALLOCATED | flag_dirty; +#ifdef JEMALLOC_DEBUG + /* + * The first page will always be dirtied during small run + * initialization, so a validation failure here would not + * actually cause an observable failure. + */ + if (flag_dirty == 0 && + (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED) + == 0) + arena_chunk_validate_zeroed(chunk, run_ind); +#endif for (i = 1; i < need_pages - 1; i++) { - chunk->map[run_ind + i].bits = (i << PAGE_SHIFT) - | CHUNK_MAP_ALLOCATED; + chunk->map[run_ind+i-map_bias].bits = (i << PAGE_SHIFT) + | (chunk->map[run_ind+i-map_bias].bits & + CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED; +#ifdef JEMALLOC_DEBUG + if (flag_dirty == 0 && + (chunk->map[run_ind+i-map_bias].bits & + CHUNK_MAP_UNZEROED) == 0) + arena_chunk_validate_zeroed(chunk, run_ind+i); +#endif + } + chunk->map[run_ind+need_pages-1-map_bias].bits = ((need_pages + - 1) << PAGE_SHIFT) | + (chunk->map[run_ind+need_pages-1-map_bias].bits & + CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED | flag_dirty; +#ifdef JEMALLOC_DEBUG + if (flag_dirty == 0 && + (chunk->map[run_ind+need_pages-1-map_bias].bits & + CHUNK_MAP_UNZEROED) == 0) { + arena_chunk_validate_zeroed(chunk, + run_ind+need_pages-1); } - chunk->map[run_ind + need_pages - 1].bits = ((need_pages - 1) << - PAGE_SHIFT) | CHUNK_MAP_ALLOCATED | flag_dirty; +#endif } } @@ -413,20 +472,24 @@ arena_chunk_alloc(arena_t *arena) arena->spare = NULL; /* Insert the run into the appropriate runs_avail_* tree. */ - if ((chunk->map[arena_chunk_header_npages].bits & - CHUNK_MAP_DIRTY) == 0) + if ((chunk->map[0].bits & CHUNK_MAP_DIRTY) == 0) runs_avail = &arena->runs_avail_clean; else runs_avail = &arena->runs_avail_dirty; - arena_avail_tree_insert(runs_avail, - &chunk->map[arena_chunk_header_npages]); + assert((chunk->map[0].bits & ~PAGE_MASK) == arena_maxclass); + assert((chunk->map[chunk_npages-1-map_bias].bits & ~PAGE_MASK) + == arena_maxclass); + assert((chunk->map[0].bits & CHUNK_MAP_DIRTY) == + (chunk->map[chunk_npages-1-map_bias].bits & + CHUNK_MAP_DIRTY)); + arena_avail_tree_insert(runs_avail, &chunk->map[0]); } else { bool zero; - size_t zeroed; + size_t unzeroed; zero = false; malloc_mutex_unlock(&arena->lock); - chunk = (arena_chunk_t *)chunk_alloc(chunksize, &zero); + chunk = (arena_chunk_t *)chunk_alloc(chunksize, false, &zero); malloc_mutex_lock(&arena->lock); if (chunk == NULL) return (NULL); @@ -449,17 +512,28 @@ arena_chunk_alloc(arena_t *arena) * Mark the pages as zeroed iff chunk_alloc() returned a zeroed * chunk. */ - zeroed = zero ? CHUNK_MAP_ZEROED : 0; - for (i = 0; i < arena_chunk_header_npages; i++) - chunk->map[i].bits = 0; - chunk->map[i].bits = arena_maxclass | zeroed; - for (i++; i < chunk_npages-1; i++) - chunk->map[i].bits = zeroed; - chunk->map[chunk_npages-1].bits = arena_maxclass | zeroed; + unzeroed = zero ? 0 : CHUNK_MAP_UNZEROED; + chunk->map[0].bits = arena_maxclass | unzeroed; + /* + * There is no need to initialize the internal page map entries + * unless the chunk is not zeroed. + */ + if (zero == false) { + for (i = map_bias+1; i < chunk_npages-1; i++) + chunk->map[i-map_bias].bits = unzeroed; + } +#ifdef JEMALLOC_DEBUG + else { + for (i = map_bias+1; i < chunk_npages-1; i++) + assert(chunk->map[i-map_bias].bits == unzeroed); + } +#endif + chunk->map[chunk_npages-1-map_bias].bits = arena_maxclass | + unzeroed; /* Insert the run into the runs_avail_clean tree. */ arena_avail_tree_insert(&arena->runs_avail_clean, - &chunk->map[arena_chunk_header_npages]); + &chunk->map[0]); } return (chunk); @@ -470,10 +544,20 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) { arena_avail_tree_t *runs_avail; - while (arena->spare != NULL) { + /* + * Remove run from the appropriate runs_avail_* tree, so that the arena + * does not use it. + */ + if ((chunk->map[0].bits & CHUNK_MAP_DIRTY) == 0) + runs_avail = &arena->runs_avail_clean; + else + runs_avail = &arena->runs_avail_dirty; + arena_avail_tree_remove(runs_avail, &chunk->map[0]); + + if (arena->spare != NULL) { arena_chunk_t *spare = arena->spare; - arena->spare = NULL; + arena->spare = chunk; if (spare->dirtied) { ql_remove(&chunk->arena->chunks_dirty, spare, link_dirty); @@ -485,21 +569,8 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) #ifdef JEMALLOC_STATS arena->stats.mapped -= chunksize; #endif - } - - /* - * Remove run from the appropriate runs_avail_* tree, so that the arena - * does not use it. - */ - if ((chunk->map[arena_chunk_header_npages].bits & - CHUNK_MAP_DIRTY) == 0) - runs_avail = &arena->runs_avail_clean; - else - runs_avail = &arena->runs_avail_dirty; - arena_avail_tree_remove(runs_avail, - &chunk->map[arena_chunk_header_npages]); - - arena->spare = chunk; + } else + arena->spare = chunk; } static arena_run_t * @@ -517,8 +588,9 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) mapelm = arena_avail_tree_nsearch(&arena->runs_avail_dirty, &key); if (mapelm != NULL) { arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); - size_t pageind = ((uintptr_t)mapelm - (uintptr_t)run_chunk->map) - / sizeof(arena_chunk_map_t); + size_t pageind = (((uintptr_t)mapelm - + (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) + + map_bias; run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << PAGE_SHIFT)); @@ -528,8 +600,9 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key); if (mapelm != NULL) { arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); - size_t pageind = ((uintptr_t)mapelm - (uintptr_t)run_chunk->map) - / sizeof(arena_chunk_map_t); + size_t pageind = (((uintptr_t)mapelm - + (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) + + map_bias; run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << PAGE_SHIFT)); @@ -542,8 +615,8 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) */ chunk = arena_chunk_alloc(arena); if (chunk != NULL) { - run = (arena_run_t *)((uintptr_t)chunk + - (arena_chunk_header_npages << PAGE_SHIFT)); + run = (arena_run_t *)((uintptr_t)chunk + (map_bias << + PAGE_SHIFT)); arena_run_split(arena, run, size, large, zero); return (run); } @@ -556,8 +629,9 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) mapelm = arena_avail_tree_nsearch(&arena->runs_avail_dirty, &key); if (mapelm != NULL) { arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); - size_t pageind = ((uintptr_t)mapelm - (uintptr_t)run_chunk->map) - / sizeof(arena_chunk_map_t); + size_t pageind = (((uintptr_t)mapelm - + (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) + + map_bias; run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << PAGE_SHIFT)); @@ -567,8 +641,9 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key); if (mapelm != NULL) { arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); - size_t pageind = ((uintptr_t)mapelm - (uintptr_t)run_chunk->map) - / sizeof(arena_chunk_map_t); + size_t pageind = (((uintptr_t)mapelm - + (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) + + map_bias; run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << PAGE_SHIFT)); @@ -588,7 +663,7 @@ arena_maybe_purge(arena_t *arena) (arena->ndirty - arena->npurgatory) > chunk_npages && (arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty - arena->npurgatory)) - arena_purge(arena); + arena_purge(arena, false); } static inline void @@ -596,7 +671,7 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) { ql_head(arena_chunk_map_t) mapelms; arena_chunk_map_t *mapelm; - size_t pageind, flag_zeroed; + size_t pageind, flag_unzeroed; #ifdef JEMALLOC_DEBUG size_t ndirty; #endif @@ -606,11 +681,19 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) ql_new(&mapelms); - flag_zeroed = -#ifdef JEMALLOC_SWAP - swap_enabled ? 0 : + flag_unzeroed = +#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED + /* + * madvise(..., MADV_DONTNEED) results in zero-filled pages for anonymous + * mappings, but not for file-backed mappings. + */ +# ifdef JEMALLOC_SWAP + swap_enabled ? CHUNK_MAP_UNZEROED : +# endif + 0; +#else + CHUNK_MAP_UNZEROED; #endif - CHUNK_MAP_ZEROED; /* * If chunk is the spare, temporarily re-allocate it, 1) so that its @@ -628,14 +711,13 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) * run. */ if (chunk == arena->spare) { - assert((chunk->map[arena_chunk_header_npages].bits & - CHUNK_MAP_DIRTY) != 0); + assert((chunk->map[0].bits & CHUNK_MAP_DIRTY) != 0); arena_chunk_alloc(arena); } /* Temporarily allocate all free dirty runs within chunk. */ - for (pageind = arena_chunk_header_npages; pageind < chunk_npages;) { - mapelm = &chunk->map[pageind]; + for (pageind = map_bias; pageind < chunk_npages;) { + mapelm = &chunk->map[pageind-map_bias]; if ((mapelm->bits & CHUNK_MAP_ALLOCATED) == 0) { size_t npages; @@ -647,25 +729,22 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) arena_avail_tree_remove( &arena->runs_avail_dirty, mapelm); + mapelm->bits = (npages << PAGE_SHIFT) | + flag_unzeroed | CHUNK_MAP_LARGE | + CHUNK_MAP_ALLOCATED; /* * Update internal elements in the page map, so - * that CHUNK_MAP_ZEROED is properly set. - * madvise(..., MADV_DONTNEED) results in - * zero-filled pages for anonymous mappings, - * but not for file-backed mappings. + * that CHUNK_MAP_UNZEROED is properly set. */ - mapelm->bits = (npages << PAGE_SHIFT) | - CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED | - flag_zeroed; for (i = 1; i < npages - 1; i++) { - chunk->map[pageind + i].bits = - flag_zeroed; + chunk->map[pageind+i-map_bias].bits = + flag_unzeroed; } if (npages > 1) { - chunk->map[pageind + npages - 1].bits = - (npages << PAGE_SHIFT) | - CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED | - flag_zeroed; + chunk->map[ + pageind+npages-1-map_bias].bits = + flag_unzeroed | CHUNK_MAP_LARGE | + CHUNK_MAP_ALLOCATED; } arena->nactive += npages; @@ -707,8 +786,8 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) nmadvise = 0; #endif ql_foreach(mapelm, &mapelms, u.ql_link) { - size_t pageind = ((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t); + size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / + sizeof(arena_chunk_map_t)) + map_bias; size_t npages = mapelm->bits >> PAGE_SHIFT; assert(pageind + npages <= chunk_npages); @@ -716,8 +795,17 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) assert(ndirty >= npages); ndirty -= npages; #endif + +#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED madvise((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)), (npages << PAGE_SHIFT), MADV_DONTNEED); +#elif defined(JEMALLOC_PURGE_MADVISE_FREE) + madvise((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)), + (npages << PAGE_SHIFT), MADV_FREE); +#else +# error "No method defined for purging unused dirty pages." +#endif + #ifdef JEMALLOC_STATS nmadvise++; #endif @@ -733,8 +821,8 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) /* Deallocate runs. */ for (mapelm = ql_first(&mapelms); mapelm != NULL; mapelm = ql_first(&mapelms)) { - size_t pageind = ((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t); + size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / + sizeof(arena_chunk_map_t)) + map_bias; arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)(pageind << PAGE_SHIFT)); @@ -744,7 +832,7 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) } static void -arena_purge(arena_t *arena) +arena_purge(arena_t *arena, bool all) { arena_chunk_t *chunk; size_t npurgatory; @@ -758,8 +846,8 @@ arena_purge(arena_t *arena) assert(ndirty == arena->ndirty); #endif assert(arena->ndirty > arena->npurgatory); - assert(arena->ndirty > chunk_npages); - assert((arena->nactive >> opt_lg_dirty_mult) < arena->ndirty); + assert(arena->ndirty > chunk_npages || all); + assert((arena->nactive >> opt_lg_dirty_mult) < arena->ndirty || all); #ifdef JEMALLOC_STATS arena->stats.npurge++; @@ -770,8 +858,9 @@ arena_purge(arena_t *arena) * purge, and add the result to arena->npurgatory. This will keep * multiple threads from racing to reduce ndirty below the threshold. */ - npurgatory = (arena->ndirty - arena->npurgatory) - (arena->nactive >> - opt_lg_dirty_mult); + npurgatory = arena->ndirty - arena->npurgatory; + if (all == false) + npurgatory -= arena->nactive >> opt_lg_dirty_mult; arena->npurgatory += npurgatory; while (npurgatory > 0) { @@ -827,6 +916,15 @@ arena_purge(arena_t *arena) } } +void +arena_purge_all(arena_t *arena) +{ + + malloc_mutex_lock(&arena->lock); + arena_purge(arena, true); + malloc_mutex_unlock(&arena->lock); +} + static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) { @@ -837,11 +935,18 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT); - assert(run_ind >= arena_chunk_header_npages); + assert(run_ind >= map_bias); assert(run_ind < chunk_npages); - if ((chunk->map[run_ind].bits & CHUNK_MAP_LARGE) != 0) - size = chunk->map[run_ind].bits & ~PAGE_MASK; - else + if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_LARGE) != 0) { + size = chunk->map[run_ind-map_bias].bits & ~PAGE_MASK; + assert(size == PAGE_SIZE || + (chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits & + ~PAGE_MASK) == 0); + assert((chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits & + CHUNK_MAP_LARGE) != 0); + assert((chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits & + CHUNK_MAP_ALLOCATED) != 0); + } else size = run->bin->run_size; run_pages = (size >> PAGE_SHIFT); arena->nactive -= run_pages; @@ -850,7 +955,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) * The run is dirty if the caller claims to have dirtied it, as well as * if it was already dirty before being allocated. */ - if ((chunk->map[run_ind].bits & CHUNK_MAP_DIRTY) != 0) + if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) != 0) dirty = true; flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0; runs_avail = dirty ? &arena->runs_avail_dirty : @@ -858,96 +963,122 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) /* Mark pages as unallocated in the chunk map. */ if (dirty) { - chunk->map[run_ind].bits = size | flag_dirty; - chunk->map[run_ind+run_pages-1].bits = size | flag_dirty; + chunk->map[run_ind-map_bias].bits = size | CHUNK_MAP_DIRTY; + chunk->map[run_ind+run_pages-1-map_bias].bits = size | + CHUNK_MAP_DIRTY; chunk->ndirty += run_pages; arena->ndirty += run_pages; } else { - chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits & - CHUNK_MAP_ZEROED); - chunk->map[run_ind+run_pages-1].bits = size | - (chunk->map[run_ind+run_pages-1].bits & CHUNK_MAP_ZEROED); + chunk->map[run_ind-map_bias].bits = size | + (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED); + chunk->map[run_ind+run_pages-1-map_bias].bits = size | + (chunk->map[run_ind+run_pages-1-map_bias].bits & + CHUNK_MAP_UNZEROED); } /* Try to coalesce forward. */ if (run_ind + run_pages < chunk_npages && - (chunk->map[run_ind+run_pages].bits & CHUNK_MAP_ALLOCATED) == 0 && - (chunk->map[run_ind+run_pages].bits & CHUNK_MAP_DIRTY) == - flag_dirty) { - size_t nrun_size = chunk->map[run_ind+run_pages].bits & + (chunk->map[run_ind+run_pages-map_bias].bits & CHUNK_MAP_ALLOCATED) + == 0 && (chunk->map[run_ind+run_pages-map_bias].bits & + CHUNK_MAP_DIRTY) == flag_dirty) { + size_t nrun_size = chunk->map[run_ind+run_pages-map_bias].bits & ~PAGE_MASK; + size_t nrun_pages = nrun_size >> PAGE_SHIFT; /* * Remove successor from runs_avail; the coalesced run is * inserted later. */ + assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits + & ~PAGE_MASK) == nrun_size); + assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits + & CHUNK_MAP_ALLOCATED) == 0); + assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits + & CHUNK_MAP_DIRTY) == flag_dirty); arena_avail_tree_remove(runs_avail, - &chunk->map[run_ind+run_pages]); + &chunk->map[run_ind+run_pages-map_bias]); size += nrun_size; - run_pages = size >> PAGE_SHIFT; + run_pages += nrun_pages; - assert((chunk->map[run_ind+run_pages-1].bits & ~PAGE_MASK) - == nrun_size); - chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits & - CHUNK_MAP_FLAGS_MASK); - chunk->map[run_ind+run_pages-1].bits = size | - (chunk->map[run_ind+run_pages-1].bits & + chunk->map[run_ind-map_bias].bits = size | + (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_FLAGS_MASK); + chunk->map[run_ind+run_pages-1-map_bias].bits = size | + (chunk->map[run_ind+run_pages-1-map_bias].bits & CHUNK_MAP_FLAGS_MASK); } /* Try to coalesce backward. */ - if (run_ind > arena_chunk_header_npages && (chunk->map[run_ind-1].bits & - CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[run_ind-1].bits & + if (run_ind > map_bias && (chunk->map[run_ind-1-map_bias].bits & + CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[run_ind-1-map_bias].bits & CHUNK_MAP_DIRTY) == flag_dirty) { - size_t prun_size = chunk->map[run_ind-1].bits & ~PAGE_MASK; + size_t prun_size = chunk->map[run_ind-1-map_bias].bits & + ~PAGE_MASK; + size_t prun_pages = prun_size >> PAGE_SHIFT; - run_ind -= prun_size >> PAGE_SHIFT; + run_ind -= prun_pages; /* * Remove predecessor from runs_avail; the coalesced run is * inserted later. */ - arena_avail_tree_remove(runs_avail, &chunk->map[run_ind]); + assert((chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) + == prun_size); + assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_ALLOCATED) + == 0); + assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) + == flag_dirty); + arena_avail_tree_remove(runs_avail, + &chunk->map[run_ind-map_bias]); size += prun_size; - run_pages = size >> PAGE_SHIFT; + run_pages += prun_pages; - assert((chunk->map[run_ind].bits & ~PAGE_MASK) == prun_size); - chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits & - CHUNK_MAP_FLAGS_MASK); - chunk->map[run_ind+run_pages-1].bits = size | - (chunk->map[run_ind+run_pages-1].bits & + chunk->map[run_ind-map_bias].bits = size | + (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_FLAGS_MASK); + chunk->map[run_ind+run_pages-1-map_bias].bits = size | + (chunk->map[run_ind+run_pages-1-map_bias].bits & CHUNK_MAP_FLAGS_MASK); } /* Insert into runs_avail, now that coalescing is complete. */ - arena_avail_tree_insert(runs_avail, &chunk->map[run_ind]); + assert((chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) == + (chunk->map[run_ind+run_pages-1-map_bias].bits & ~PAGE_MASK)); + assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) == + (chunk->map[run_ind+run_pages-1-map_bias].bits & CHUNK_MAP_DIRTY)); + arena_avail_tree_insert(runs_avail, &chunk->map[run_ind-map_bias]); + + if (dirty) { + /* + * Insert into chunks_dirty before potentially calling + * arena_chunk_dealloc(), so that chunks_dirty and + * arena->ndirty are consistent. + */ + if (chunk->dirtied == false) { + ql_tail_insert(&arena->chunks_dirty, chunk, link_dirty); + chunk->dirtied = true; + } + } /* * Deallocate chunk if it is now completely unused. The bit * manipulation checks whether the first run is unallocated and extends * to the end of the chunk. */ - if ((chunk->map[arena_chunk_header_npages].bits & (~PAGE_MASK | - CHUNK_MAP_ALLOCATED)) == arena_maxclass) + if ((chunk->map[0].bits & (~PAGE_MASK | CHUNK_MAP_ALLOCATED)) == + arena_maxclass) arena_chunk_dealloc(arena, chunk); /* - * It is okay to do dirty page processing even if the chunk was + * It is okay to do dirty page processing here even if the chunk was * deallocated above, since in that case it is the spare. Waiting * until after possible chunk deallocation to do dirty processing * allows for an old spare to be fully deallocated, thus decreasing the * chances of spuriously crossing the dirty page purging threshold. */ - if (dirty) { - if (chunk->dirtied == false) { - ql_tail_insert(&arena->chunks_dirty, chunk, link_dirty); - chunk->dirtied = true; - } + if (dirty) arena_maybe_purge(arena); - } } static void @@ -956,18 +1087,40 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, { size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT; size_t head_npages = (oldsize - newsize) >> PAGE_SHIFT; - size_t flags = chunk->map[pageind].bits & CHUNK_MAP_FLAGS_MASK; + size_t flag_dirty = chunk->map[pageind-map_bias].bits & CHUNK_MAP_DIRTY; assert(oldsize > newsize); /* * Update the chunk map so that arena_run_dalloc() can treat the - * leading run as separately allocated. + * leading run as separately allocated. Set the last element of each + * run first, in case of single-page runs. */ - assert(chunk->map[pageind].bits & CHUNK_MAP_LARGE); - assert(chunk->map[pageind].bits & CHUNK_MAP_ALLOCATED); - chunk->map[pageind].bits = (oldsize - newsize) | flags; - chunk->map[pageind+head_npages].bits = newsize | flags; + assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_LARGE) != 0); + assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_ALLOCATED) != 0); + chunk->map[pageind+head_npages-1-map_bias].bits = flag_dirty | + (chunk->map[pageind+head_npages-1-map_bias].bits & + CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + chunk->map[pageind-map_bias].bits = (oldsize - newsize) + | flag_dirty | (chunk->map[pageind-map_bias].bits & + CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + +#ifdef JEMALLOC_DEBUG + { + size_t tail_npages = newsize >> PAGE_SHIFT; + assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] + .bits & ~PAGE_MASK) == 0); + assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] + .bits & CHUNK_MAP_DIRTY) == flag_dirty); + assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] + .bits & CHUNK_MAP_LARGE) != 0); + assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] + .bits & CHUNK_MAP_ALLOCATED) != 0); + } +#endif + chunk->map[pageind+head_npages-map_bias].bits = newsize | flag_dirty | + (chunk->map[pageind+head_npages-map_bias].bits & + CHUNK_MAP_FLAGS_MASK) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; arena_run_dalloc(arena, run, false); } @@ -977,20 +1130,40 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, size_t oldsize, size_t newsize, bool dirty) { size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT; - size_t npages = newsize >> PAGE_SHIFT; - size_t flags = chunk->map[pageind].bits & CHUNK_MAP_FLAGS_MASK; + size_t head_npages = newsize >> PAGE_SHIFT; + size_t tail_npages = (oldsize - newsize) >> PAGE_SHIFT; + size_t flag_dirty = chunk->map[pageind-map_bias].bits & + CHUNK_MAP_DIRTY; assert(oldsize > newsize); /* * Update the chunk map so that arena_run_dalloc() can treat the - * trailing run as separately allocated. + * trailing run as separately allocated. Set the last element of each + * run first, in case of single-page runs. */ - assert(chunk->map[pageind].bits & CHUNK_MAP_LARGE); - assert(chunk->map[pageind].bits & CHUNK_MAP_ALLOCATED); - chunk->map[pageind].bits = newsize | flags; - chunk->map[pageind+npages-1].bits = newsize | flags; - chunk->map[pageind+npages].bits = (oldsize - newsize) | flags; + assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_LARGE) != 0); + assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_ALLOCATED) != 0); + chunk->map[pageind+head_npages-1-map_bias].bits = flag_dirty | + (chunk->map[pageind+head_npages-1-map_bias].bits & + CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + chunk->map[pageind-map_bias].bits = newsize | flag_dirty | + (chunk->map[pageind-map_bias].bits & CHUNK_MAP_UNZEROED) | + CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + + assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & + ~PAGE_MASK) == 0); + assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & + CHUNK_MAP_LARGE) != 0); + assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & + CHUNK_MAP_ALLOCATED) != 0); + chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits = + flag_dirty | + (chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & + CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + chunk->map[pageind+head_npages-map_bias].bits = (oldsize - newsize) | + flag_dirty | (chunk->map[pageind+head_npages-map_bias].bits & + CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize), dirty); @@ -1012,8 +1185,8 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) arena_run_tree_remove(&bin->runs, mapelm); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); - pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t)); + pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) / + sizeof(arena_chunk_map_t))) + map_bias; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); @@ -1033,7 +1206,7 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) /* Initialize run internals. */ run->bin = bin; run->avail = NULL; - run->next = (void *)(((uintptr_t)run) + + run->next = (void *)((uintptr_t)run + (uintptr_t)bin->reg0_offset); run->nfree = bin->nregs; #ifdef JEMALLOC_DEBUG @@ -1055,7 +1228,7 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) /* * arena_run_alloc() failed, but another thread may have made - * sufficient memory available while this one dopped bin->lock above, + * sufficient memory available while this one dropped bin->lock above, * so search one more time. */ mapelm = arena_run_tree_first(&bin->runs); @@ -1067,8 +1240,8 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) arena_run_tree_remove(&bin->runs, mapelm); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); - pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t)); + pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) / + sizeof(arena_chunk_map_t))) + map_bias; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); @@ -1099,11 +1272,21 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) assert(bin->runcur->nfree > 0); ret = arena_run_reg_alloc(bin->runcur, bin); if (run != NULL) { - malloc_mutex_unlock(&bin->lock); - malloc_mutex_lock(&arena->lock); - arena_run_dalloc(arena, run, false); - malloc_mutex_unlock(&arena->lock); - malloc_mutex_lock(&bin->lock); + arena_chunk_t *chunk; + + /* + * arena_run_alloc() may have allocated run, or it may + * have pulled it from the bin's run tree. Therefore + * it is unsafe to make any assumptions about how run + * has previously been used, and arena_bin_lower_run() + * must be called, as if a region were just deallocated + * from the run. + */ + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + if (run->nfree == bin->nregs) + arena_dalloc_bin_run(arena, chunk, run, bin); + else + arena_bin_lower_run(arena, chunk, run, bin); } return (ret); } @@ -1198,7 +1381,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size) uint32_t try_nregs, good_nregs; uint32_t try_hdr_size, good_hdr_size; #ifdef JEMALLOC_PROF - uint32_t try_cnt0_offset, good_cnt0_offset; + uint32_t try_ctx0_offset, good_ctx0_offset; #endif uint32_t try_reg0_offset, good_reg0_offset; @@ -1225,11 +1408,11 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size) if (opt_prof && prof_promote == false) { /* Pad to a quantum boundary. */ try_hdr_size = QUANTUM_CEILING(try_hdr_size); - try_cnt0_offset = try_hdr_size; - /* Add space for one (prof_thr_cnt_t *) per region. */ - try_hdr_size += try_nregs * sizeof(prof_thr_cnt_t *); + try_ctx0_offset = try_hdr_size; + /* Add space for one (prof_ctx_t *) per region. */ + try_hdr_size += try_nregs * sizeof(prof_ctx_t *); } else - try_cnt0_offset = 0; + try_ctx0_offset = 0; #endif try_reg0_offset = try_run_size - (try_nregs * bin->reg_size); } while (try_hdr_size > try_reg0_offset); @@ -1243,7 +1426,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size) good_nregs = try_nregs; good_hdr_size = try_hdr_size; #ifdef JEMALLOC_PROF - good_cnt0_offset = try_cnt0_offset; + good_ctx0_offset = try_ctx0_offset; #endif good_reg0_offset = try_reg0_offset; @@ -1258,13 +1441,12 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size) if (opt_prof && prof_promote == false) { /* Pad to a quantum boundary. */ try_hdr_size = QUANTUM_CEILING(try_hdr_size); - try_cnt0_offset = try_hdr_size; + try_ctx0_offset = try_hdr_size; /* - * Add space for one (prof_thr_cnt_t *) per - * region. + * Add space for one (prof_ctx_t *) per region. */ try_hdr_size += try_nregs * - sizeof(prof_thr_cnt_t *); + sizeof(prof_ctx_t *); } #endif try_reg0_offset = try_run_size - (try_nregs * @@ -1282,7 +1464,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size) bin->run_size = good_run_size; bin->nregs = good_nregs; #ifdef JEMALLOC_PROF - bin->cnt0_offset = good_cnt0_offset; + bin->ctx0_offset = good_ctx0_offset; #endif bin->reg0_offset = good_reg0_offset; @@ -1419,17 +1601,19 @@ arena_malloc(size_t size, bool zero) /* Only handles large allocations that require more than page alignment. */ void * -arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size) +arena_palloc(arena_t *arena, size_t size, size_t alloc_size, size_t alignment, + bool zero) { void *ret; size_t offset; arena_chunk_t *chunk; assert((size & PAGE_MASK) == 0); - assert((alignment & PAGE_MASK) == 0); + + alignment = PAGE_CEILING(alignment); malloc_mutex_lock(&arena->lock); - ret = (void *)arena_run_alloc(arena, alloc_size, true, false); + ret = (void *)arena_run_alloc(arena, alloc_size, true, zero); if (ret == NULL) { malloc_mutex_unlock(&arena->lock); return (NULL); @@ -1477,10 +1661,12 @@ arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size) malloc_mutex_unlock(&arena->lock); #ifdef JEMALLOC_FILL - if (opt_junk) - memset(ret, 0xa5, size); - else if (opt_zero) - memset(ret, 0, size); + if (zero == false) { + if (opt_junk) + memset(ret, 0xa5, size); + else if (opt_zero) + memset(ret, 0, size); + } #endif return (ret); } @@ -1497,8 +1683,8 @@ arena_salloc(const void *ptr) assert(CHUNK_ADDR2BASE(ptr) != ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT); - mapbits = chunk->map[pageind].bits; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + mapbits = chunk->map[pageind-map_bias].bits; assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); if ((mapbits & CHUNK_MAP_LARGE) == 0) { arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + @@ -1530,11 +1716,11 @@ arena_prof_promoted(const void *ptr, size_t size) assert(isalloc(ptr) == PAGE_SIZE); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT); + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; binind = small_size2bin[size]; assert(binind < nbins); - chunk->map[pageind].bits = (chunk->map[pageind].bits & - ~CHUNK_MAP_CLASS_MASK) | (binind << CHUNK_MAP_CLASS_SHIFT); + chunk->map[pageind-map_bias].bits = (chunk->map[pageind-map_bias].bits & + ~CHUNK_MAP_CLASS_MASK) | ((binind+1) << CHUNK_MAP_CLASS_SHIFT); } size_t @@ -1548,8 +1734,8 @@ arena_salloc_demote(const void *ptr) assert(CHUNK_ADDR2BASE(ptr) != ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT); - mapbits = chunk->map[pageind].bits; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + mapbits = chunk->map[pageind-map_bias].bits; assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); if ((mapbits & CHUNK_MAP_LARGE) == 0) { arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + @@ -1564,9 +1750,9 @@ arena_salloc_demote(const void *ptr) assert(((uintptr_t)ptr & PAGE_MASK) == 0); ret = mapbits & ~PAGE_MASK; if (prof_promote && ret == PAGE_SIZE && (mapbits & - CHUNK_MAP_CLASS_MASK) != CHUNK_MAP_CLASS_MASK) { + CHUNK_MAP_CLASS_MASK) != 0) { size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >> - CHUNK_MAP_CLASS_SHIFT); + CHUNK_MAP_CLASS_SHIFT) - 1; assert(binind < nbins); ret = chunk->arena->bins[binind].reg_size; } @@ -1575,144 +1761,12 @@ arena_salloc_demote(const void *ptr) return (ret); } - -static inline unsigned -arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr, - size_t size) -{ - unsigned shift, diff, regind; - - assert(run->magic == ARENA_RUN_MAGIC); - - /* - * Avoid doing division with a variable divisor if possible. Using - * actual division here can reduce allocator throughput by over 20%! - */ - diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset); - - /* Rescale (factor powers of 2 out of the numerator and denominator). */ - shift = ffs(size) - 1; - diff >>= shift; - size >>= shift; - - if (size == 1) { - /* The divisor was a power of 2. */ - regind = diff; - } else { - /* - * To divide by a number D that is not a power of two we - * multiply by (2^21 / D) and then right shift by 21 positions. - * - * X / D - * - * becomes - * - * (X * size_invs[D - 3]) >> SIZE_INV_SHIFT - * - * We can omit the first three elements, because we never - * divide by 0, and 1 and 2 are both powers of two, which are - * handled above. - */ -#define SIZE_INV_SHIFT 21 -#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1) - static const unsigned size_invs[] = { - SIZE_INV(3), - SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7), - SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11), - SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15), - SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19), - SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23), - SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27), - SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31) - }; - - if (size <= ((sizeof(size_invs) / sizeof(unsigned)) + 2)) - regind = (diff * size_invs[size - 3]) >> SIZE_INV_SHIFT; - else - regind = diff / size; -#undef SIZE_INV -#undef SIZE_INV_SHIFT - } - assert(diff == regind * size); - assert(regind < bin->nregs); - - return (regind); -} - -prof_thr_cnt_t * -arena_prof_cnt_get(const void *ptr) -{ - prof_thr_cnt_t *ret; - arena_chunk_t *chunk; - size_t pageind, mapbits; - - assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT); - mapbits = chunk->map[pageind].bits; - assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); - if ((mapbits & CHUNK_MAP_LARGE) == 0) { - if (prof_promote) - ret = (prof_thr_cnt_t *)(uintptr_t)1U; - else { - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << - PAGE_SHIFT)); - arena_bin_t *bin = run->bin; - unsigned regind; - - assert(run->magic == ARENA_RUN_MAGIC); - regind = arena_run_regind(run, bin, ptr, bin->reg_size); - ret = *(prof_thr_cnt_t **)((uintptr_t)run + - bin->cnt0_offset + (regind * - sizeof(prof_thr_cnt_t *))); - } - } else - ret = chunk->map[pageind].prof_cnt; - - return (ret); -} - -void -arena_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt) -{ - arena_chunk_t *chunk; - size_t pageind, mapbits; - - assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT); - mapbits = chunk->map[pageind].bits; - assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); - if ((mapbits & CHUNK_MAP_LARGE) == 0) { - if (prof_promote == false) { - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << - PAGE_SHIFT)); - arena_bin_t *bin = run->bin; - unsigned regind; - - assert(run->magic == ARENA_RUN_MAGIC); - regind = arena_run_regind(run, bin, ptr, bin->reg_size); - - *((prof_thr_cnt_t **)((uintptr_t)run + bin->cnt0_offset - + (regind * sizeof(prof_thr_cnt_t *)))) = cnt; - } else - assert((uintptr_t)cnt == (uintptr_t)1U); - } else - chunk->map[pageind].prof_cnt = cnt; -} #endif static void -arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, +arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin) { - size_t npages, run_ind, past; /* Dissociate run from bin. */ if (run == bin->runcur) @@ -1720,7 +1774,8 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, else if (bin->nregs != 1) { size_t run_pageind = (((uintptr_t)run - (uintptr_t)chunk)) >> PAGE_SHIFT; - arena_chunk_map_t *run_mapelm = &chunk->map[run_pageind]; + arena_chunk_map_t *run_mapelm = + &chunk->map[run_pageind-map_bias]; /* * This block's conditional is necessary because if the run * only contains one region, then it never gets inserted into @@ -1728,13 +1783,24 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, */ arena_run_tree_remove(&bin->runs, run_mapelm); } +} + +static void +arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, + arena_bin_t *bin) +{ + size_t npages, run_ind, past; + + assert(run != bin->runcur); + assert(arena_run_tree_search(&bin->runs, &chunk->map[ + (((uintptr_t)run-(uintptr_t)chunk)>>PAGE_SHIFT)-map_bias]) == NULL); malloc_mutex_unlock(&bin->lock); /******************************/ npages = bin->run_size >> PAGE_SHIFT; run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT); - past = (size_t)(((uintptr_t)run->next - (uintptr_t)1U - - (uintptr_t)chunk) >> PAGE_SHIFT) + 1; + past = (size_t)((PAGE_CEILING((uintptr_t)run->next) - (uintptr_t)chunk) + >> PAGE_SHIFT); malloc_mutex_lock(&arena->lock); /* @@ -1742,19 +1808,21 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, * trim the clean pages before deallocating the dirty portion of the * run. */ - if ((chunk->map[run_ind].bits & CHUNK_MAP_DIRTY) == 0 && past - run_ind - < npages) { + if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) == 0 && past + - run_ind < npages) { /* * Trim clean pages. Convert to large run beforehand. Set the * last map element first, in case this is a one-page run. */ - chunk->map[run_ind+npages-1].bits = CHUNK_MAP_LARGE | - (chunk->map[run_ind].bits & CHUNK_MAP_FLAGS_MASK); - chunk->map[run_ind].bits = bin->run_size | CHUNK_MAP_LARGE | - (chunk->map[run_ind].bits & CHUNK_MAP_FLAGS_MASK); + chunk->map[run_ind+npages-1-map_bias].bits = CHUNK_MAP_LARGE | + (chunk->map[run_ind+npages-1-map_bias].bits & + CHUNK_MAP_FLAGS_MASK); + chunk->map[run_ind-map_bias].bits = bin->run_size | + CHUNK_MAP_LARGE | (chunk->map[run_ind-map_bias].bits & + CHUNK_MAP_FLAGS_MASK); arena_run_trim_tail(arena, chunk, run, (npages << PAGE_SHIFT), - ((npages - (past - run_ind)) << PAGE_SHIFT), false); - npages = past - run_ind; + ((past - run_ind) << PAGE_SHIFT), false); + /* npages = past - run_ind; */ } #ifdef JEMALLOC_DEBUG run->magic = 0; @@ -1768,6 +1836,42 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, #endif } +static void +arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, + arena_bin_t *bin) +{ + + /* + * Make sure that bin->runcur always refers to the lowest non-full run, + * if one exists. + */ + if (bin->runcur == NULL) + bin->runcur = run; + else if ((uintptr_t)run < (uintptr_t)bin->runcur) { + /* Switch runcur. */ + if (bin->runcur->nfree > 0) { + arena_chunk_t *runcur_chunk = + CHUNK_ADDR2BASE(bin->runcur); + size_t runcur_pageind = (((uintptr_t)bin->runcur - + (uintptr_t)runcur_chunk)) >> PAGE_SHIFT; + arena_chunk_map_t *runcur_mapelm = + &runcur_chunk->map[runcur_pageind-map_bias]; + + /* Insert runcur. */ + arena_run_tree_insert(&bin->runs, runcur_mapelm); + } + bin->runcur = run; + } else { + size_t run_pageind = (((uintptr_t)run - + (uintptr_t)chunk)) >> PAGE_SHIFT; + arena_chunk_map_t *run_mapelm = + &chunk->map[run_pageind-map_bias]; + + assert(arena_run_tree_search(&bin->runs, run_mapelm) == NULL); + arena_run_tree_insert(&bin->runs, run_mapelm); + } +} + void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_chunk_map_t *mapelm) @@ -1779,7 +1883,7 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t size; #endif - pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT); + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); assert(run->magic == ARENA_RUN_MAGIC); @@ -1794,43 +1898,11 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, #endif arena_run_reg_dalloc(run, ptr); - - if (run->nfree == bin->nregs) + if (run->nfree == bin->nregs) { + arena_dissociate_bin_run(chunk, run, bin); arena_dalloc_bin_run(arena, chunk, run, bin); - else if (run->nfree == 1 && run != bin->runcur) { - /* - * Make sure that bin->runcur always refers to the lowest - * non-full run, if one exists. - */ - if (bin->runcur == NULL) - bin->runcur = run; - else if ((uintptr_t)run < (uintptr_t)bin->runcur) { - /* Switch runcur. */ - if (bin->runcur->nfree > 0) { - arena_chunk_t *runcur_chunk = - CHUNK_ADDR2BASE(bin->runcur); - size_t runcur_pageind = - (((uintptr_t)bin->runcur - - (uintptr_t)runcur_chunk)) >> PAGE_SHIFT; - arena_chunk_map_t *runcur_mapelm = - &runcur_chunk->map[runcur_pageind]; - - /* Insert runcur. */ - arena_run_tree_insert(&bin->runs, - runcur_mapelm); - } - bin->runcur = run; - } else { - size_t run_pageind = (((uintptr_t)run - - (uintptr_t)chunk)) >> PAGE_SHIFT; - arena_chunk_map_t *run_mapelm = - &chunk->map[run_pageind]; - - assert(arena_run_tree_search(&bin->runs, run_mapelm) == - NULL); - arena_run_tree_insert(&bin->runs, run_mapelm); - } - } + } else if (run->nfree == 1 && run != bin->runcur) + arena_bin_lower_run(arena, chunk, run, bin); #ifdef JEMALLOC_STATS bin->stats.allocated -= size; @@ -1903,7 +1975,7 @@ arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) #if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS)) size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; - size_t size = chunk->map[pageind].bits & ~PAGE_MASK; + size_t size = chunk->map[pageind-map_bias].bits & ~PAGE_MASK; #endif #ifdef JEMALLOC_FILL @@ -1925,7 +1997,7 @@ arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) static void arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, - size_t size, size_t oldsize) + size_t oldsize, size_t size) { assert(size < oldsize); @@ -1960,50 +2032,71 @@ arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, static bool arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, - size_t size, size_t oldsize) + size_t oldsize, size_t size, size_t extra, bool zero) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; size_t npages = oldsize >> PAGE_SHIFT; + size_t followsize; - assert(oldsize == (chunk->map[pageind].bits & ~PAGE_MASK)); + assert(oldsize == (chunk->map[pageind-map_bias].bits & ~PAGE_MASK)); /* Try to extend the run. */ - assert(size > oldsize); + assert(size + extra > oldsize); malloc_mutex_lock(&arena->lock); - if (pageind + npages < chunk_npages && (chunk->map[pageind+npages].bits - & CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[pageind+npages].bits & - ~PAGE_MASK) >= size - oldsize) { + if (pageind + npages < chunk_npages && + (chunk->map[pageind+npages-map_bias].bits + & CHUNK_MAP_ALLOCATED) == 0 && (followsize = + chunk->map[pageind+npages-map_bias].bits & ~PAGE_MASK) >= size - + oldsize) { /* * The next run is available and sufficiently large. Split the * following run, then merge the first part with the existing * allocation. */ + size_t flag_dirty; + size_t splitsize = (oldsize + followsize <= size + extra) + ? followsize : size + extra - oldsize; arena_run_split(arena, (arena_run_t *)((uintptr_t)chunk + - ((pageind+npages) << PAGE_SHIFT)), size - oldsize, true, - false); + ((pageind+npages) << PAGE_SHIFT)), splitsize, true, zero); - chunk->map[pageind].bits = size | CHUNK_MAP_LARGE | - CHUNK_MAP_ALLOCATED; - chunk->map[pageind+npages].bits = CHUNK_MAP_LARGE | - CHUNK_MAP_ALLOCATED; + size = oldsize + splitsize; + npages = size >> PAGE_SHIFT; -#ifdef JEMALLOC_STATS - arena->stats.ndalloc_large++; - arena->stats.allocated_large -= oldsize; - arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].ndalloc++; - arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--; + /* + * Mark the extended run as dirty if either portion of the run + * was dirty before allocation. This is rather pedantic, + * because there's not actually any sequence of events that + * could cause the resulting run to be passed to + * arena_run_dalloc() with the dirty argument set to false + * (which is when dirty flag consistency would really matter). + */ + flag_dirty = (chunk->map[pageind-map_bias].bits & + CHUNK_MAP_DIRTY) | + (chunk->map[pageind+npages-1-map_bias].bits & + CHUNK_MAP_DIRTY); + chunk->map[pageind-map_bias].bits = size | flag_dirty + | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + chunk->map[pageind+npages-1-map_bias].bits = flag_dirty | + CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; - arena->stats.nmalloc_large++; - arena->stats.nrequests_large++; - arena->stats.allocated_large += size; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; - if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns; - } +#ifdef JEMALLOC_STATS + arena->stats.ndalloc_large++; + arena->stats.allocated_large -= oldsize; + arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].ndalloc++; + arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--; + + arena->stats.nmalloc_large++; + arena->stats.nrequests_large++; + arena->stats.allocated_large += size; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; + if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = + arena->stats.lstats[(size >> PAGE_SHIFT) - + 1].curruns; + } #endif malloc_mutex_unlock(&arena->lock); return (false); @@ -2018,11 +2111,12 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, * always fail if growing an object, and the following run is already in use. */ static bool -arena_ralloc_large(void *ptr, size_t size, size_t oldsize) +arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, + bool zero) { size_t psize; - psize = PAGE_CEILING(size); + psize = PAGE_CEILING(size + extra); if (psize == oldsize) { /* Same size class. */ #ifdef JEMALLOC_FILL @@ -2048,14 +2142,15 @@ arena_ralloc_large(void *ptr, size_t size, size_t oldsize) oldsize - size); } #endif - arena_ralloc_large_shrink(arena, chunk, ptr, psize, - oldsize); + arena_ralloc_large_shrink(arena, chunk, ptr, oldsize, + psize); return (false); } else { bool ret = arena_ralloc_large_grow(arena, chunk, ptr, - psize, oldsize); + oldsize, PAGE_CEILING(size), + psize - PAGE_CEILING(size), zero); #ifdef JEMALLOC_FILL - if (ret == false && opt_zero) { + if (ret == false && zero == false && opt_zero) { memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize); } @@ -2066,49 +2161,89 @@ arena_ralloc_large(void *ptr, size_t size, size_t oldsize) } void * -arena_ralloc(void *ptr, size_t size, size_t oldsize) +arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, + bool zero) { - void *ret; - size_t copysize; - /* Try to avoid moving the allocation. */ + /* + * Avoid moving the allocation if the size class can be left the same. + */ if (oldsize <= arena_maxclass) { if (oldsize <= small_maxclass) { - if (size <= small_maxclass && small_size2bin[size] == - small_size2bin[oldsize]) - goto IN_PLACE; + assert(choose_arena()->bins[small_size2bin[ + oldsize]].reg_size == oldsize); + if ((size + extra <= small_maxclass && + small_size2bin[size + extra] == + small_size2bin[oldsize]) || (size <= oldsize && + size + extra >= oldsize)) { +#ifdef JEMALLOC_FILL + if (opt_junk && size < oldsize) { + memset((void *)((uintptr_t)ptr + size), + 0x5a, oldsize - size); + } +#endif + return (ptr); + } } else { assert(size <= arena_maxclass); - if (size > small_maxclass) { - if (arena_ralloc_large(ptr, size, oldsize) == - false) + if (size + extra > small_maxclass) { + if (arena_ralloc_large(ptr, oldsize, size, + extra, zero) == false) return (ptr); } } } + /* Reallocation would require a move. */ + return (NULL); +} + +void * +arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero) +{ + void *ret; + size_t copysize; + + /* Try to avoid moving the allocation. */ + ret = arena_ralloc_no_move(ptr, oldsize, size, extra, zero); + if (ret != NULL) + return (ret); + + /* - * If we get here, then size and oldsize are different enough that we - * need to move the object. In that case, fall back to allocating new - * space and copying. + * size and oldsize are different enough that we need to move the + * object. In that case, fall back to allocating new space and + * copying. */ - ret = arena_malloc(size, false); - if (ret == NULL) - return (NULL); + if (alignment != 0) + ret = ipalloc(size + extra, alignment, zero); + else + ret = arena_malloc(size + extra, zero); + + if (ret == NULL) { + if (extra == 0) + return (NULL); + /* Try again, this time without extra. */ + if (alignment != 0) + ret = ipalloc(size, alignment, zero); + else + ret = arena_malloc(size, zero); + + if (ret == NULL) + return (NULL); + } - /* Junk/zero-filling were already done by arena_malloc(). */ + /* Junk/zero-filling were already done by ipalloc()/arena_malloc(). */ + + /* + * Copy at most size bytes (not size+extra), since the caller has no + * expectation that the extra bytes will be reliably preserved. + */ copysize = (size < oldsize) ? size : oldsize; memcpy(ret, ptr, copysize); idalloc(ptr); return (ret); -IN_PLACE: -#ifdef JEMALLOC_FILL - if (opt_junk && size < oldsize) - memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size); - else if (opt_zero && size > oldsize) - memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize); -#endif - return (ptr); } bool @@ -2234,26 +2369,6 @@ arena_new(arena_t *arena, unsigned ind) return (false); } -#ifdef JEMALLOC_TINY -/* Compute the smallest power of 2 that is >= x. */ -static size_t -pow2_ceil(size_t x) -{ - - x--; - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; - x |= x >> 16; -#if (SIZEOF_PTR == 8) - x |= x >> 32; -#endif - x++; - return (x); -} -#endif - #ifdef JEMALLOC_DEBUG static void small_size2bin_validate(void) @@ -2376,6 +2491,7 @@ bool arena_boot(void) { size_t header_size; + unsigned i; /* Set variables according to the value of opt_lg_[qc]space_max. */ qspace_max = (1U << opt_lg_qspace_max); @@ -2415,7 +2531,7 @@ arena_boot(void) if (nbins > 255) { char line_buf[UMAX2S_BUFSIZE]; malloc_write("<jemalloc>: Too many small size classes ("); - malloc_write(umax2s(nbins, 10, line_buf)); + malloc_write(u2s(nbins, 10, line_buf)); malloc_write(" > max 255)\n"); abort(); } @@ -2424,7 +2540,7 @@ arena_boot(void) if (nbins > 256) { char line_buf[UMAX2S_BUFSIZE]; malloc_write("<jemalloc>: Too many small size classes ("); - malloc_write(umax2s(nbins, 10, line_buf)); + malloc_write(u2s(nbins, 10, line_buf)); malloc_write(" > max 256)\n"); abort(); } @@ -2434,13 +2550,26 @@ arena_boot(void) /* * Compute the header size such that it is large enough to contain the - * page map. + * page map. The page map is biased to omit entries for the header + * itself, so some iteration is necessary to compute the map bias. + * + * 1) Compute safe header_size and map_bias values that include enough + * space for an unbiased page map. + * 2) Refine map_bias based on (1) to omit the header pages in the page + * map. The resulting map_bias may be one too small. + * 3) Refine map_bias based on (2). The result will be >= the result + * from (2), and will always be correct. */ - header_size = sizeof(arena_chunk_t) + - (sizeof(arena_chunk_map_t) * (chunk_npages - 1)); - arena_chunk_header_npages = (header_size >> PAGE_SHIFT) + - ((header_size & PAGE_MASK) != 0); - arena_maxclass = chunksize - (arena_chunk_header_npages << PAGE_SHIFT); + map_bias = 0; + for (i = 0; i < 3; i++) { + header_size = offsetof(arena_chunk_t, map) + + (sizeof(arena_chunk_map_t) * (chunk_npages-map_bias)); + map_bias = (header_size >> PAGE_SHIFT) + ((header_size & + PAGE_MASK) != 0); + } + assert(map_bias > 0); + + arena_maxclass = chunksize - (map_bias << PAGE_SHIFT); return (false); } diff --git a/dep/jemalloc/base.c b/dep/jemalloc/src/base.c index 605197eaced..cc85e8494ec 100644 --- a/dep/jemalloc/base.c +++ b/dep/jemalloc/src/base.c @@ -32,7 +32,7 @@ base_pages_alloc(size_t minsize) assert(minsize != 0); csize = CHUNK_CEILING(minsize); zero = false; - base_pages = chunk_alloc(csize, &zero); + base_pages = chunk_alloc(csize, true, &zero); if (base_pages == NULL) return (true); base_next_addr = base_pages; diff --git a/dep/jemalloc/chunk.c b/dep/jemalloc/src/chunk.c index e6e3bcd195a..301519e8042 100644 --- a/dep/jemalloc/chunk.c +++ b/dep/jemalloc/src/chunk.c @@ -14,11 +14,15 @@ malloc_mutex_t chunks_mtx; chunk_stats_t stats_chunks; #endif +#ifdef JEMALLOC_IVSALLOC +rtree_t *chunks_rtree; +#endif + /* Various chunk-related settings. */ size_t chunksize; size_t chunksize_mask; /* (chunksize - 1). */ size_t chunk_npages; -size_t arena_chunk_header_npages; +size_t map_bias; size_t arena_maxclass; /* Max size class for arenas. */ /******************************************************************************/ @@ -30,7 +34,7 @@ size_t arena_maxclass; /* Max size class for arenas. */ * advantage of them if they are returned. */ void * -chunk_alloc(size_t size, bool *zero) +chunk_alloc(size_t size, bool base, bool *zero) { void *ret; @@ -63,10 +67,18 @@ chunk_alloc(size_t size, bool *zero) /* All strategies for allocation failed. */ ret = NULL; RETURN: +#ifdef JEMALLOC_IVSALLOC + if (base == false && ret != NULL) { + if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) { + chunk_dealloc(ret, size); + return (NULL); + } + } +#endif #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) if (ret != NULL) { # ifdef JEMALLOC_PROF - bool udump; + bool gdump; # endif malloc_mutex_lock(&chunks_mtx); # ifdef JEMALLOC_STATS @@ -76,17 +88,17 @@ RETURN: if (stats_chunks.curchunks > stats_chunks.highchunks) { stats_chunks.highchunks = stats_chunks.curchunks; # ifdef JEMALLOC_PROF - udump = true; + gdump = true; # endif } # ifdef JEMALLOC_PROF else - udump = false; + gdump = false; # endif malloc_mutex_unlock(&chunks_mtx); # ifdef JEMALLOC_PROF - if (opt_prof && opt_prof_udump && udump) - prof_udump(); + if (opt_prof && opt_prof_gdump && gdump) + prof_gdump(); # endif } #endif @@ -104,6 +116,9 @@ chunk_dealloc(void *chunk, size_t size) assert(size != 0); assert((size & chunksize_mask) == 0); +#ifdef JEMALLOC_IVSALLOC + rtree_set(chunks_rtree, (uintptr_t)chunk, NULL); +#endif #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) malloc_mutex_lock(&chunks_mtx); stats_chunks.curchunks -= (size / chunksize); @@ -126,7 +141,7 @@ chunk_boot(void) { /* Set variables according to the value of opt_lg_chunk. */ - chunksize = (1LU << opt_lg_chunk); + chunksize = (ZU(1) << opt_lg_chunk); assert(chunksize >= PAGE_SIZE); chunksize_mask = chunksize - 1; chunk_npages = (chunksize >> PAGE_SHIFT); @@ -136,15 +151,21 @@ chunk_boot(void) return (true); memset(&stats_chunks, 0, sizeof(chunk_stats_t)); #endif - #ifdef JEMALLOC_SWAP if (chunk_swap_boot()) return (true); #endif + if (chunk_mmap_boot()) + return (true); #ifdef JEMALLOC_DSS if (chunk_dss_boot()) return (true); #endif +#ifdef JEMALLOC_IVSALLOC + chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) - opt_lg_chunk); + if (chunks_rtree == NULL) + return (true); +#endif return (false); } diff --git a/dep/jemalloc/chunk_dss.c b/dep/jemalloc/src/chunk_dss.c index d9bd63c3ac4..5c0e290e441 100644 --- a/dep/jemalloc/chunk_dss.c +++ b/dep/jemalloc/src/chunk_dss.c @@ -200,6 +200,22 @@ chunk_dealloc_dss_record(void *chunk, size_t size) } bool +chunk_in_dss(void *chunk) +{ + bool ret; + + malloc_mutex_lock(&dss_mtx); + if ((uintptr_t)chunk >= (uintptr_t)dss_base + && (uintptr_t)chunk < (uintptr_t)dss_max) + ret = true; + else + ret = false; + malloc_mutex_unlock(&dss_mtx); + + return (ret); +} + +bool chunk_dealloc_dss(void *chunk, size_t size) { bool ret; diff --git a/dep/jemalloc/chunk_mmap.c b/dep/jemalloc/src/chunk_mmap.c index 8f0711384e3..bc367559774 100644 --- a/dep/jemalloc/chunk_mmap.c +++ b/dep/jemalloc/src/chunk_mmap.c @@ -6,31 +6,36 @@ /* * Used by chunk_alloc_mmap() to decide whether to attempt the fast path and - * potentially avoid some system calls. We can get away without TLS here, - * since the state of mmap_unaligned only affects performance, rather than - * correct function. + * potentially avoid some system calls. */ -static #ifndef NO_TLS - __thread +static __thread bool mmap_unaligned_tls + JEMALLOC_ATTR(tls_model("initial-exec")); +#define MMAP_UNALIGNED_GET() mmap_unaligned_tls +#define MMAP_UNALIGNED_SET(v) do { \ + mmap_unaligned_tls = (v); \ +} while (0) +#else +static pthread_key_t mmap_unaligned_tsd; +#define MMAP_UNALIGNED_GET() ((bool)pthread_getspecific(mmap_unaligned_tsd)) +#define MMAP_UNALIGNED_SET(v) do { \ + pthread_setspecific(mmap_unaligned_tsd, (void *)(v)); \ +} while (0) #endif - bool mmap_unaligned -#ifndef NO_TLS - JEMALLOC_ATTR(tls_model("initial-exec")) -#endif - ; /******************************************************************************/ /* Function prototypes for non-inline static functions. */ -static void *pages_map(void *addr, size_t size); +static void *pages_map(void *addr, size_t size, bool noreserve); static void pages_unmap(void *addr, size_t size); -static void *chunk_alloc_mmap_slow(size_t size, bool unaligned); +static void *chunk_alloc_mmap_slow(size_t size, bool unaligned, + bool noreserve); +static void *chunk_alloc_mmap_internal(size_t size, bool noreserve); /******************************************************************************/ static void * -pages_map(void *addr, size_t size) +pages_map(void *addr, size_t size, bool noreserve) { void *ret; @@ -38,8 +43,12 @@ pages_map(void *addr, size_t size) * We don't use MAP_FIXED here, because it can cause the *replacement* * of existing mappings, and we only want to create new mappings. */ - ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, - -1, 0); + int flags = MAP_PRIVATE | MAP_ANON; +#ifdef MAP_NORESERVE + if (noreserve) + flags |= MAP_NORESERVE; +#endif + ret = mmap(addr, size, PROT_READ | PROT_WRITE, flags, -1, 0); assert(ret != NULL); if (ret == MAP_FAILED) @@ -49,9 +58,9 @@ pages_map(void *addr, size_t size) * We succeeded in mapping memory, but not in the right place. */ if (munmap(ret, size) == -1) { - char buf[STRERROR_BUF]; + char buf[BUFERROR_BUF]; - strerror_r(errno, buf, sizeof(buf)); + buferror(errno, buf, sizeof(buf)); malloc_write("<jemalloc>: Error in munmap(): "); malloc_write(buf); malloc_write("\n"); @@ -71,9 +80,9 @@ pages_unmap(void *addr, size_t size) { if (munmap(addr, size) == -1) { - char buf[STRERROR_BUF]; + char buf[BUFERROR_BUF]; - strerror_r(errno, buf, sizeof(buf)); + buferror(errno, buf, sizeof(buf)); malloc_write("<jemalloc>: Error in munmap(): "); malloc_write(buf); malloc_write("\n"); @@ -83,7 +92,7 @@ pages_unmap(void *addr, size_t size) } static void * -chunk_alloc_mmap_slow(size_t size, bool unaligned) +chunk_alloc_mmap_slow(size_t size, bool unaligned, bool noreserve) { void *ret; size_t offset; @@ -92,7 +101,7 @@ chunk_alloc_mmap_slow(size_t size, bool unaligned) if (size + chunksize <= size) return (NULL); - ret = pages_map(NULL, size + chunksize); + ret = pages_map(NULL, size + chunksize, noreserve); if (ret == NULL) return (NULL); @@ -123,13 +132,13 @@ chunk_alloc_mmap_slow(size_t size, bool unaligned) * method. */ if (unaligned == false) - mmap_unaligned = false; + MMAP_UNALIGNED_SET(false); return (ret); } -void * -chunk_alloc_mmap(size_t size) +static void * +chunk_alloc_mmap_internal(size_t size, bool noreserve) { void *ret; @@ -161,25 +170,26 @@ chunk_alloc_mmap(size_t size) * fast method next time. */ - if (mmap_unaligned == false) { + if (MMAP_UNALIGNED_GET() == false) { size_t offset; - ret = pages_map(NULL, size); + ret = pages_map(NULL, size, noreserve); if (ret == NULL) return (NULL); offset = CHUNK_ADDR2OFFSET(ret); if (offset != 0) { - mmap_unaligned = true; + MMAP_UNALIGNED_SET(true); /* Try to extend chunk boundary. */ if (pages_map((void *)((uintptr_t)ret + size), - chunksize - offset) == NULL) { + chunksize - offset, noreserve) == NULL) { /* * Extension failed. Clean up, then revert to * the reliable-but-expensive method. */ pages_unmap(ret, size); - ret = chunk_alloc_mmap_slow(size, true); + ret = chunk_alloc_mmap_slow(size, true, + noreserve); } else { /* Clean up unneeded leading space. */ pages_unmap(ret, chunksize - offset); @@ -188,14 +198,40 @@ chunk_alloc_mmap(size_t size) } } } else - ret = chunk_alloc_mmap_slow(size, false); + ret = chunk_alloc_mmap_slow(size, false, noreserve); return (ret); } +void * +chunk_alloc_mmap(size_t size) +{ + return chunk_alloc_mmap_internal(size, false); +} + +void * +chunk_alloc_mmap_noreserve(size_t size) +{ + return chunk_alloc_mmap_internal(size, true); +} + void chunk_dealloc_mmap(void *chunk, size_t size) { pages_unmap(chunk, size); } + +bool +chunk_mmap_boot(void) +{ + +#ifdef NO_TLS + if (pthread_key_create(&mmap_unaligned_tsd, NULL) != 0) { + malloc_write("<jemalloc>: Error in pthread_key_create()\n"); + return (true); + } +#endif + + return (false); +} diff --git a/dep/jemalloc/chunk_swap.c b/dep/jemalloc/src/chunk_swap.c index b8c880f0a17..cb25ae0dde2 100644 --- a/dep/jemalloc/chunk_swap.c +++ b/dep/jemalloc/src/chunk_swap.c @@ -185,6 +185,24 @@ chunk_dealloc_swap_record(void *chunk, size_t size) } bool +chunk_in_swap(void *chunk) +{ + bool ret; + + assert(swap_enabled); + + malloc_mutex_lock(&swap_mtx); + if ((uintptr_t)chunk >= (uintptr_t)swap_base + && (uintptr_t)chunk < (uintptr_t)swap_max) + ret = true; + else + ret = false; + malloc_mutex_unlock(&swap_mtx); + + return (ret); +} + +bool chunk_dealloc_swap(void *chunk, size_t size) { bool ret; @@ -219,15 +237,15 @@ chunk_dealloc_swap(void *chunk, size_t size) } else madvise(chunk, size, MADV_DONTNEED); +#ifdef JEMALLOC_STATS + swap_avail += size; +#endif ret = false; goto RETURN; } ret = true; RETURN: -#ifdef JEMALLOC_STATS - swap_avail += size; -#endif malloc_mutex_unlock(&swap_mtx); return (ret); } @@ -283,7 +301,7 @@ chunk_swap_enable(const int *fds, unsigned nfds, bool prezeroed) * Allocate a chunk-aligned region of anonymous memory, which will * be the final location for the memory-mapped files. */ - vaddr = chunk_alloc_mmap(cumsize); + vaddr = chunk_alloc_mmap_noreserve(cumsize); if (vaddr == NULL) { ret = true; goto RETURN; @@ -294,9 +312,10 @@ chunk_swap_enable(const int *fds, unsigned nfds, bool prezeroed) void *addr = mmap((void *)((uintptr_t)vaddr + voff), sizes[i], PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fds[i], 0); if (addr == MAP_FAILED) { - char buf[STRERROR_BUF]; + char buf[BUFERROR_BUF]; + - strerror_r(errno, buf, sizeof(buf)); + buferror(errno, buf, sizeof(buf)); malloc_write( "<jemalloc>: Error in mmap(..., MAP_FIXED, ...): "); malloc_write(buf); @@ -304,7 +323,7 @@ chunk_swap_enable(const int *fds, unsigned nfds, bool prezeroed) if (opt_abort) abort(); if (munmap(vaddr, voff) == -1) { - strerror_r(errno, buf, sizeof(buf)); + buferror(errno, buf, sizeof(buf)); malloc_write("<jemalloc>: Error in munmap(): "); malloc_write(buf); malloc_write("\n"); diff --git a/dep/jemalloc/ckh.c b/dep/jemalloc/src/ckh.c index a0c4162aa19..682a8db65bf 100644 --- a/dep/jemalloc/ckh.c +++ b/dep/jemalloc/src/ckh.c @@ -263,13 +263,12 @@ ckh_grow(ckh_t *ckh) lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS; while (true) { lg_curcells++; - tab = (ckhc_t *) ipalloc((ZU(1) << LG_CACHELINE), - sizeof(ckhc_t) << lg_curcells); + tab = (ckhc_t *)ipalloc(sizeof(ckhc_t) << lg_curcells, + ZU(1) << LG_CACHELINE, true); if (tab == NULL) { ret = true; goto RETURN; } - memset(tab, 0, sizeof(ckhc_t) << lg_curcells); /* Swap in new table. */ ttab = ckh->tab; ckh->tab = tab; @@ -305,8 +304,8 @@ ckh_shrink(ckh_t *ckh) */ lg_prevbuckets = ckh->lg_curbuckets; lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1; - tab = (ckhc_t *)ipalloc((ZU(1) << LG_CACHELINE), - sizeof(ckhc_t) << lg_curcells); + tab = (ckhc_t *)ipalloc(sizeof(ckhc_t) << lg_curcells, + ZU(1) << LG_CACHELINE, true); if (tab == NULL) { /* * An OOM error isn't worth propagating, since it doesn't @@ -314,7 +313,6 @@ ckh_shrink(ckh_t *ckh) */ return; } - memset(tab, 0, sizeof(ckhc_t) << lg_curcells); /* Swap in new table. */ ttab = ckh->tab; ckh->tab = tab; @@ -377,13 +375,12 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) ckh->hash = hash; ckh->keycomp = keycomp; - ckh->tab = (ckhc_t *)ipalloc((ZU(1) << LG_CACHELINE), - sizeof(ckhc_t) << lg_mincells); + ckh->tab = (ckhc_t *)ipalloc(sizeof(ckhc_t) << lg_mincells, + (ZU(1) << LG_CACHELINE), true); if (ckh->tab == NULL) { ret = true; goto RETURN; } - memset(ckh->tab, 0, sizeof(ckhc_t) << lg_mincells); #ifdef JEMALLOC_DEBUG ckh->magic = CKH_MAGIG; @@ -570,12 +567,21 @@ ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1, { size_t ret1, ret2; uint64_t h; + union { + const void *v; + uint64_t i; + } u; assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64)); assert(hash1 != NULL); assert(hash2 != NULL); - h = hash(&key, sizeof(void *), 0xd983396e68886082LLU); + assert(sizeof(u.v) == sizeof(u.i)); +#if (LG_SIZEOF_PTR != LG_SIZEOF_INT) + u.i = 0; +#endif + u.v = key; + h = hash(&u.i, sizeof(u.i), 0xd983396e68886082LLU); if (minbits <= 32) { /* * Avoid doing multiple hashes, since a single hash provides @@ -586,7 +592,7 @@ ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1, } else { assert(SIZEOF_PTR == 8); ret1 = h; - ret2 = hash(&key, sizeof(void *), 0x5e2be9aff8709a5dLLU); + ret2 = hash(&u.i, sizeof(u.i), 0x5e2be9aff8709a5dLLU); } *hash1 = ret1; diff --git a/dep/jemalloc/ctl.c b/dep/jemalloc/src/ctl.c index ffb732d5bef..3c8adab90a3 100644 --- a/dep/jemalloc/ctl.c +++ b/dep/jemalloc/src/ctl.c @@ -4,6 +4,13 @@ /******************************************************************************/ /* Data. */ +/* + * ctl_mtx protects the following: + * - ctl_stats.* + * - opt_prof_active + * - swap_enabled + * - swap_prezeroed + */ static malloc_mutex_t ctl_mtx; static bool ctl_initialized; static uint64_t ctl_epoch; @@ -41,6 +48,13 @@ CTL_PROTO(epoch) #ifdef JEMALLOC_TCACHE CTL_PROTO(tcache_flush) #endif +CTL_PROTO(thread_arena) +#ifdef JEMALLOC_STATS +CTL_PROTO(thread_allocated) +CTL_PROTO(thread_allocatedp) +CTL_PROTO(thread_deallocated) +CTL_PROTO(thread_deallocatedp) +#endif CTL_PROTO(config_debug) CTL_PROTO(config_dss) CTL_PROTO(config_dynamic_page_shift) @@ -57,8 +71,15 @@ CTL_PROTO(config_tiny) CTL_PROTO(config_tls) CTL_PROTO(config_xmalloc) CTL_PROTO(opt_abort) +CTL_PROTO(opt_lg_qspace_max) +CTL_PROTO(opt_lg_cspace_max) +CTL_PROTO(opt_lg_chunk) +CTL_PROTO(opt_narenas) +CTL_PROTO(opt_lg_dirty_mult) +CTL_PROTO(opt_stats_print) #ifdef JEMALLOC_FILL CTL_PROTO(opt_junk) +CTL_PROTO(opt_zero) #endif #ifdef JEMALLOC_SYSV CTL_PROTO(opt_sysv) @@ -66,27 +87,22 @@ CTL_PROTO(opt_sysv) #ifdef JEMALLOC_XMALLOC CTL_PROTO(opt_xmalloc) #endif -#ifdef JEMALLOC_ZERO -CTL_PROTO(opt_zero) -#endif #ifdef JEMALLOC_TCACHE CTL_PROTO(opt_tcache) CTL_PROTO(opt_lg_tcache_gc_sweep) #endif #ifdef JEMALLOC_PROF CTL_PROTO(opt_prof) +CTL_PROTO(opt_prof_prefix) CTL_PROTO(opt_prof_active) CTL_PROTO(opt_lg_prof_bt_max) CTL_PROTO(opt_lg_prof_sample) CTL_PROTO(opt_lg_prof_interval) -CTL_PROTO(opt_prof_udump) +CTL_PROTO(opt_prof_gdump) CTL_PROTO(opt_prof_leak) +CTL_PROTO(opt_prof_accum) +CTL_PROTO(opt_lg_prof_tcmax) #endif -CTL_PROTO(opt_stats_print) -CTL_PROTO(opt_lg_qspace_max) -CTL_PROTO(opt_lg_cspace_max) -CTL_PROTO(opt_lg_dirty_mult) -CTL_PROTO(opt_lg_chunk) #ifdef JEMALLOC_SWAP CTL_PROTO(opt_overcommit) #endif @@ -125,6 +141,7 @@ CTL_PROTO(arenas_nbins) CTL_PROTO(arenas_nhbins) #endif CTL_PROTO(arenas_nlruns) +CTL_PROTO(arenas_purge) #ifdef JEMALLOC_PROF CTL_PROTO(prof_active) CTL_PROTO(prof_dump) @@ -210,6 +227,17 @@ static const ctl_node_t tcache_node[] = { }; #endif +static const ctl_node_t thread_node[] = { + {NAME("arena"), CTL(thread_arena)} +#ifdef JEMALLOC_STATS + , + {NAME("allocated"), CTL(thread_allocated)}, + {NAME("allocatedp"), CTL(thread_allocatedp)}, + {NAME("deallocated"), CTL(thread_deallocated)}, + {NAME("deallocatedp"), CTL(thread_deallocatedp)} +#endif +}; + static const ctl_node_t config_node[] = { {NAME("debug"), CTL(config_debug)}, {NAME("dss"), CTL(config_dss)}, @@ -230,36 +258,43 @@ static const ctl_node_t config_node[] = { static const ctl_node_t opt_node[] = { {NAME("abort"), CTL(opt_abort)}, + {NAME("lg_qspace_max"), CTL(opt_lg_qspace_max)}, + {NAME("lg_cspace_max"), CTL(opt_lg_cspace_max)}, + {NAME("lg_chunk"), CTL(opt_lg_chunk)}, + {NAME("narenas"), CTL(opt_narenas)}, + {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)}, + {NAME("stats_print"), CTL(opt_stats_print)} #ifdef JEMALLOC_FILL + , {NAME("junk"), CTL(opt_junk)}, + {NAME("zero"), CTL(opt_zero)} #endif #ifdef JEMALLOC_SYSV - {NAME("sysv"), CTL(opt_sysv)}, + , + {NAME("sysv"), CTL(opt_sysv)} #endif #ifdef JEMALLOC_XMALLOC - {NAME("xmalloc"), CTL(opt_xmalloc)}, -#endif -#ifdef JEMALLOC_ZERO - {NAME("zero"), CTL(opt_zero)}, + , + {NAME("xmalloc"), CTL(opt_xmalloc)} #endif #ifdef JEMALLOC_TCACHE + , {NAME("tcache"), CTL(opt_tcache)}, - {NAME("lg_tcache_gc_sweep"), CTL(opt_lg_tcache_gc_sweep)}, + {NAME("lg_tcache_gc_sweep"), CTL(opt_lg_tcache_gc_sweep)} #endif #ifdef JEMALLOC_PROF + , {NAME("prof"), CTL(opt_prof)}, + {NAME("prof_prefix"), CTL(opt_prof_prefix)}, {NAME("prof_active"), CTL(opt_prof_active)}, {NAME("lg_prof_bt_max"), CTL(opt_lg_prof_bt_max)}, {NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)}, {NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)}, - {NAME("prof_udump"), CTL(opt_prof_udump)}, + {NAME("prof_gdump"), CTL(opt_prof_gdump)}, {NAME("prof_leak"), CTL(opt_prof_leak)}, + {NAME("prof_accum"), CTL(opt_prof_accum)}, + {NAME("lg_prof_tcmax"), CTL(opt_lg_prof_tcmax)} #endif - {NAME("stats_print"), CTL(opt_stats_print)}, - {NAME("lg_qspace_max"), CTL(opt_lg_qspace_max)}, - {NAME("lg_cspace_max"), CTL(opt_lg_cspace_max)}, - {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)}, - {NAME("lg_chunk"), CTL(opt_lg_chunk)} #ifdef JEMALLOC_SWAP , {NAME("overcommit"), CTL(opt_overcommit)} @@ -321,7 +356,8 @@ static const ctl_node_t arenas_node[] = { #endif {NAME("bin"), CHILD(arenas_bin)}, {NAME("nlruns"), CTL(arenas_nlruns)}, - {NAME("lrun"), CHILD(arenas_lrun)} + {NAME("lrun"), CHILD(arenas_lrun)}, + {NAME("purge"), CTL(arenas_purge)} }; #ifdef JEMALLOC_PROF @@ -448,6 +484,7 @@ static const ctl_node_t root_node[] = { #ifdef JEMALLOC_TCACHE {NAME("tcache"), CHILD(tcache)}, #endif + {NAME("thread"), CHILD(thread)}, {NAME("config"), CHILD(config)}, {NAME("opt"), CHILD(opt)}, {NAME("arenas"), CHILD(arenas)}, @@ -654,7 +691,9 @@ ctl_refresh(void) static bool ctl_init(void) { + bool ret; + malloc_mutex_lock(&ctl_mtx); if (ctl_initialized == false) { #ifdef JEMALLOC_STATS unsigned i; @@ -666,8 +705,10 @@ ctl_init(void) */ ctl_stats.arenas = (ctl_arena_stats_t *)base_alloc( (narenas + 1) * sizeof(ctl_arena_stats_t)); - if (ctl_stats.arenas == NULL) - return (true); + if (ctl_stats.arenas == NULL) { + ret = true; + goto RETURN; + } memset(ctl_stats.arenas, 0, (narenas + 1) * sizeof(ctl_arena_stats_t)); @@ -678,8 +719,10 @@ ctl_init(void) */ #ifdef JEMALLOC_STATS for (i = 0; i <= narenas; i++) { - if (ctl_arena_init(&ctl_stats.arenas[i])) - return (true); + if (ctl_arena_init(&ctl_stats.arenas[i])) { + ret = true; + goto RETURN; + } } #endif ctl_stats.arenas[narenas].initialized = true; @@ -689,7 +732,10 @@ ctl_init(void) ctl_initialized = true; } - return (false); + ret = false; +RETURN: + malloc_mutex_unlock(&ctl_mtx); + return (ret); } static int @@ -799,8 +845,7 @@ ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp, ctl_node_t const *nodes[CTL_MAX_DEPTH]; size_t mib[CTL_MAX_DEPTH]; - malloc_mutex_lock(&ctl_mtx); - if (ctl_init()) { + if (ctl_initialized == false && ctl_init()) { ret = EAGAIN; goto RETURN; } @@ -815,10 +860,9 @@ ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp, ret = ENOENT; goto RETURN; } - ret = nodes[depth-1]->ctl(mib, depth, oldp, oldlenp, newp, newlen); + ret = nodes[depth-1]->ctl(mib, depth, oldp, oldlenp, newp, newlen); RETURN: - malloc_mutex_unlock(&ctl_mtx); return(ret); } @@ -827,16 +871,13 @@ ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp) { int ret; - malloc_mutex_lock(&ctl_mtx); - if (ctl_init()) { + if (ctl_initialized == false && ctl_init()) { ret = EAGAIN; goto RETURN; } ret = ctl_lookup(name, NULL, mibp, miblenp); - RETURN: - malloc_mutex_unlock(&ctl_mtx); return(ret); } @@ -848,8 +889,7 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, const ctl_node_t *node; size_t i; - malloc_mutex_lock(&ctl_mtx); - if (ctl_init()) { + if (ctl_initialized == false && ctl_init()) { ret = EAGAIN; goto RETURN; } @@ -886,7 +926,6 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, ret = node->ctl(mib, miblen, oldp, oldlenp, newp, newlen); RETURN: - malloc_mutex_unlock(&ctl_mtx); return(ret); } @@ -955,6 +994,29 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ int ret; \ t oldval; \ \ + malloc_mutex_lock(&ctl_mtx); \ + READONLY(); \ + oldval = v; \ + READ(oldval, t); \ + \ + ret = 0; \ +RETURN: \ + malloc_mutex_unlock(&ctl_mtx); \ + return (ret); \ +} + +/* + * ctl_mtx is not acquired, under the assumption that no pertinent data will + * mutate during the call. + */ +#define CTL_RO_NL_GEN(n, v, t) \ +static int \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ +{ \ + int ret; \ + t oldval; \ + \ READONLY(); \ oldval = v; \ READ(oldval, t); \ @@ -998,7 +1060,7 @@ RETURN: \ return (ret); \ } -CTL_RO_GEN(version, JEMALLOC_VERSION, const char *) +CTL_RO_NL_GEN(version, JEMALLOC_VERSION, const char *) static int epoch_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, @@ -1007,6 +1069,7 @@ epoch_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, int ret; uint64_t newval; + malloc_mutex_lock(&ctl_mtx); newval = 0; WRITE(newval, uint64_t); if (newval != 0) @@ -1015,6 +1078,7 @@ epoch_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, ret = 0; RETURN: + malloc_mutex_unlock(&ctl_mtx); return (ret); } @@ -1028,13 +1092,13 @@ tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, VOID(); - tcache = tcache_tls; + tcache = TCACHE_GET(); if (tcache == NULL) { ret = 0; goto RETURN; } tcache_destroy(tcache); - tcache_tls = NULL; + TCACHE_SET(NULL); ret = 0; RETURN: @@ -1042,6 +1106,51 @@ RETURN: } #endif +static int +thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + unsigned newind, oldind; + + newind = oldind = choose_arena()->ind; + WRITE(oldind, unsigned); + READ(newind, unsigned); + if (newind != oldind) { + arena_t *arena; + + if (newind >= narenas) { + /* New arena index is out of range. */ + ret = EFAULT; + goto RETURN; + } + + /* Initialize arena if necessary. */ + malloc_mutex_lock(&arenas_lock); + if ((arena = arenas[newind]) == NULL) + arena = arenas_extend(newind); + malloc_mutex_unlock(&arenas_lock); + if (arena == NULL) { + ret = EAGAIN; + goto RETURN; + } + + /* Set new arena association. */ + ARENA_SET(arena); + } + + ret = 0; +RETURN: + return (ret); +} + +#ifdef JEMALLOC_STATS +CTL_RO_NL_GEN(thread_allocated, ALLOCATED_GET(), uint64_t); +CTL_RO_NL_GEN(thread_allocatedp, &ALLOCATED_GET(), uint64_t *); +CTL_RO_NL_GEN(thread_deallocated, DEALLOCATED_GET(), uint64_t); +CTL_RO_NL_GEN(thread_deallocatedp, &DEALLOCATED_GET(), uint64_t *); +#endif + /******************************************************************************/ #ifdef JEMALLOC_DEBUG @@ -1136,46 +1245,48 @@ CTL_RO_FALSE_GEN(config_xmalloc) /******************************************************************************/ -CTL_RO_GEN(opt_abort, opt_abort, bool) +CTL_RO_NL_GEN(opt_abort, opt_abort, bool) +CTL_RO_NL_GEN(opt_lg_qspace_max, opt_lg_qspace_max, size_t) +CTL_RO_NL_GEN(opt_lg_cspace_max, opt_lg_cspace_max, size_t) +CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t) +CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t) +CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) +CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) #ifdef JEMALLOC_FILL -CTL_RO_GEN(opt_junk, opt_junk, bool) +CTL_RO_NL_GEN(opt_junk, opt_junk, bool) +CTL_RO_NL_GEN(opt_zero, opt_zero, bool) #endif #ifdef JEMALLOC_SYSV -CTL_RO_GEN(opt_sysv, opt_sysv, bool) +CTL_RO_NL_GEN(opt_sysv, opt_sysv, bool) #endif #ifdef JEMALLOC_XMALLOC -CTL_RO_GEN(opt_xmalloc, opt_xmalloc, bool) -#endif -#ifdef JEMALLOC_ZERO -CTL_RO_GEN(opt_zero, opt_zero, bool) +CTL_RO_NL_GEN(opt_xmalloc, opt_xmalloc, bool) #endif #ifdef JEMALLOC_TCACHE -CTL_RO_GEN(opt_tcache, opt_tcache, bool) -CTL_RO_GEN(opt_lg_tcache_gc_sweep, opt_lg_tcache_gc_sweep, ssize_t) +CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool) +CTL_RO_NL_GEN(opt_lg_tcache_gc_sweep, opt_lg_tcache_gc_sweep, ssize_t) #endif #ifdef JEMALLOC_PROF -CTL_RO_GEN(opt_prof, opt_prof, bool) -CTL_RO_GEN(opt_prof_active, opt_prof_active, bool) -CTL_RO_GEN(opt_lg_prof_bt_max, opt_lg_prof_bt_max, size_t) -CTL_RO_GEN(opt_lg_prof_sample, opt_lg_prof_sample, size_t) -CTL_RO_GEN(opt_lg_prof_interval, opt_lg_prof_interval, ssize_t) -CTL_RO_GEN(opt_prof_udump, opt_prof_udump, bool) -CTL_RO_GEN(opt_prof_leak, opt_prof_leak, bool) -#endif -CTL_RO_GEN(opt_stats_print, opt_stats_print, bool) -CTL_RO_GEN(opt_lg_qspace_max, opt_lg_qspace_max, size_t) -CTL_RO_GEN(opt_lg_cspace_max, opt_lg_cspace_max, size_t) -CTL_RO_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) -CTL_RO_GEN(opt_lg_chunk, opt_lg_chunk, size_t) +CTL_RO_NL_GEN(opt_prof, opt_prof, bool) +CTL_RO_NL_GEN(opt_prof_prefix, opt_prof_prefix, const char *) +CTL_RO_GEN(opt_prof_active, opt_prof_active, bool) /* Mutable. */ +CTL_RO_NL_GEN(opt_lg_prof_bt_max, opt_lg_prof_bt_max, size_t) +CTL_RO_NL_GEN(opt_lg_prof_sample, opt_lg_prof_sample, size_t) +CTL_RO_NL_GEN(opt_lg_prof_interval, opt_lg_prof_interval, ssize_t) +CTL_RO_NL_GEN(opt_prof_gdump, opt_prof_gdump, bool) +CTL_RO_NL_GEN(opt_prof_leak, opt_prof_leak, bool) +CTL_RO_NL_GEN(opt_prof_accum, opt_prof_accum, bool) +CTL_RO_NL_GEN(opt_lg_prof_tcmax, opt_lg_prof_tcmax, ssize_t) +#endif #ifdef JEMALLOC_SWAP -CTL_RO_GEN(opt_overcommit, opt_overcommit, bool) +CTL_RO_NL_GEN(opt_overcommit, opt_overcommit, bool) #endif /******************************************************************************/ -CTL_RO_GEN(arenas_bin_i_size, arenas[0]->bins[mib[2]].reg_size, size_t) -CTL_RO_GEN(arenas_bin_i_nregs, arenas[0]->bins[mib[2]].nregs, uint32_t) -CTL_RO_GEN(arenas_bin_i_run_size, arenas[0]->bins[mib[2]].run_size, size_t) +CTL_RO_NL_GEN(arenas_bin_i_size, arenas[0]->bins[mib[2]].reg_size, size_t) +CTL_RO_NL_GEN(arenas_bin_i_nregs, arenas[0]->bins[mib[2]].nregs, uint32_t) +CTL_RO_NL_GEN(arenas_bin_i_run_size, arenas[0]->bins[mib[2]].run_size, size_t) const ctl_node_t * arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) { @@ -1185,7 +1296,7 @@ arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) return (super_arenas_bin_i_node); } -CTL_RO_GEN(arenas_lrun_i_size, ((mib[2]+1) << PAGE_SHIFT), size_t) +CTL_RO_NL_GEN(arenas_lrun_i_size, ((mib[2]+1) << PAGE_SHIFT), size_t) const ctl_node_t * arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) { @@ -1195,7 +1306,7 @@ arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) return (super_arenas_lrun_i_node); } -CTL_RO_GEN(arenas_narenas, narenas, unsigned) +CTL_RO_NL_GEN(arenas_narenas, narenas, unsigned) static int arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp, @@ -1204,6 +1315,7 @@ arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp, int ret; unsigned nread, i; + malloc_mutex_lock(&ctl_mtx); READONLY(); if (*oldlenp != narenas * sizeof(bool)) { ret = EINVAL; @@ -1218,36 +1330,75 @@ arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp, ((bool *)oldp)[i] = ctl_stats.arenas[i].initialized; RETURN: + malloc_mutex_unlock(&ctl_mtx); return (ret); } -CTL_RO_GEN(arenas_quantum, QUANTUM, size_t) -CTL_RO_GEN(arenas_cacheline, CACHELINE, size_t) -CTL_RO_GEN(arenas_subpage, SUBPAGE, size_t) -CTL_RO_GEN(arenas_pagesize, PAGE_SIZE, size_t) -CTL_RO_GEN(arenas_chunksize, chunksize, size_t) +CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t) +CTL_RO_NL_GEN(arenas_cacheline, CACHELINE, size_t) +CTL_RO_NL_GEN(arenas_subpage, SUBPAGE, size_t) +CTL_RO_NL_GEN(arenas_pagesize, PAGE_SIZE, size_t) +CTL_RO_NL_GEN(arenas_chunksize, chunksize, size_t) #ifdef JEMALLOC_TINY -CTL_RO_GEN(arenas_tspace_min, (1U << LG_TINY_MIN), size_t) -CTL_RO_GEN(arenas_tspace_max, (qspace_min >> 1), size_t) -#endif -CTL_RO_GEN(arenas_qspace_min, qspace_min, size_t) -CTL_RO_GEN(arenas_qspace_max, qspace_max, size_t) -CTL_RO_GEN(arenas_cspace_min, cspace_min, size_t) -CTL_RO_GEN(arenas_cspace_max, cspace_max, size_t) -CTL_RO_GEN(arenas_sspace_min, sspace_min, size_t) -CTL_RO_GEN(arenas_sspace_max, sspace_max, size_t) +CTL_RO_NL_GEN(arenas_tspace_min, (1U << LG_TINY_MIN), size_t) +CTL_RO_NL_GEN(arenas_tspace_max, (qspace_min >> 1), size_t) +#endif +CTL_RO_NL_GEN(arenas_qspace_min, qspace_min, size_t) +CTL_RO_NL_GEN(arenas_qspace_max, qspace_max, size_t) +CTL_RO_NL_GEN(arenas_cspace_min, cspace_min, size_t) +CTL_RO_NL_GEN(arenas_cspace_max, cspace_max, size_t) +CTL_RO_NL_GEN(arenas_sspace_min, sspace_min, size_t) +CTL_RO_NL_GEN(arenas_sspace_max, sspace_max, size_t) #ifdef JEMALLOC_TCACHE -CTL_RO_GEN(arenas_tcache_max, tcache_maxclass, size_t) +CTL_RO_NL_GEN(arenas_tcache_max, tcache_maxclass, size_t) #endif -CTL_RO_GEN(arenas_ntbins, ntbins, unsigned) -CTL_RO_GEN(arenas_nqbins, nqbins, unsigned) -CTL_RO_GEN(arenas_ncbins, ncbins, unsigned) -CTL_RO_GEN(arenas_nsbins, nsbins, unsigned) -CTL_RO_GEN(arenas_nbins, nbins, unsigned) +CTL_RO_NL_GEN(arenas_ntbins, ntbins, unsigned) +CTL_RO_NL_GEN(arenas_nqbins, nqbins, unsigned) +CTL_RO_NL_GEN(arenas_ncbins, ncbins, unsigned) +CTL_RO_NL_GEN(arenas_nsbins, nsbins, unsigned) +CTL_RO_NL_GEN(arenas_nbins, nbins, unsigned) #ifdef JEMALLOC_TCACHE -CTL_RO_GEN(arenas_nhbins, nhbins, unsigned) +CTL_RO_NL_GEN(arenas_nhbins, nhbins, unsigned) #endif -CTL_RO_GEN(arenas_nlruns, nlclasses, size_t) +CTL_RO_NL_GEN(arenas_nlruns, nlclasses, size_t) + +static int +arenas_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + unsigned arena; + + WRITEONLY(); + arena = UINT_MAX; + WRITE(arena, unsigned); + if (newp != NULL && arena >= narenas) { + ret = EFAULT; + goto RETURN; + } else { + arena_t *tarenas[narenas]; + + malloc_mutex_lock(&arenas_lock); + memcpy(tarenas, arenas, sizeof(arena_t *) * narenas); + malloc_mutex_unlock(&arenas_lock); + + if (arena == UINT_MAX) { + unsigned i; + for (i = 0; i < narenas; i++) { + if (tarenas[i] != NULL) + arena_purge_all(tarenas[i]); + } + } else { + assert(arena < narenas); + if (tarenas[arena] != NULL) + arena_purge_all(tarenas[arena]); + } + } + + ret = 0; +RETURN: + return (ret); +} /******************************************************************************/ @@ -1259,6 +1410,7 @@ prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, int ret; bool oldval; + malloc_mutex_lock(&ctl_mtx); /* Protect opt_prof_active. */ oldval = opt_prof_active; if (newp != NULL) { /* @@ -1273,6 +1425,7 @@ prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, ret = 0; RETURN: + malloc_mutex_unlock(&ctl_mtx); return (ret); } @@ -1296,7 +1449,7 @@ RETURN: return (ret); } -CTL_RO_GEN(prof_interval, prof_interval, uint64_t) +CTL_RO_NL_GEN(prof_interval, prof_interval, uint64_t) #endif /******************************************************************************/ @@ -1394,10 +1547,18 @@ CTL_RO_GEN(stats_arenas_i_purged, ctl_stats.arenas[mib[2]].astats.purged, const ctl_node_t * stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i) { + const ctl_node_t * ret; - if (ctl_stats.arenas[i].initialized == false) - return (NULL); - return (super_stats_arenas_i_node); + malloc_mutex_lock(&ctl_mtx); + if (ctl_stats.arenas[i].initialized == false) { + ret = NULL; + goto RETURN; + } + + ret = super_stats_arenas_i_node; +RETURN: + malloc_mutex_unlock(&ctl_mtx); + return (ret); } #ifdef JEMALLOC_STATS @@ -1419,6 +1580,7 @@ swap_prezeroed_ctl(const size_t *mib, size_t miblen, void *oldp, { int ret; + malloc_mutex_lock(&ctl_mtx); if (swap_enabled) { READONLY(); } else { @@ -1436,6 +1598,7 @@ swap_prezeroed_ctl(const size_t *mib, size_t miblen, void *oldp, ret = 0; RETURN: + malloc_mutex_unlock(&ctl_mtx); return (ret); } @@ -1447,6 +1610,7 @@ swap_fds_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, { int ret; + malloc_mutex_lock(&ctl_mtx); if (swap_enabled) { READONLY(); } else if (newp != NULL) { @@ -1477,6 +1641,7 @@ swap_fds_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, ret = 0; RETURN: + malloc_mutex_unlock(&ctl_mtx); return (ret); } #endif diff --git a/dep/jemalloc/extent.c b/dep/jemalloc/src/extent.c index 3c04d3aa5d1..3c04d3aa5d1 100644 --- a/dep/jemalloc/extent.c +++ b/dep/jemalloc/src/extent.c diff --git a/dep/jemalloc/hash.c b/dep/jemalloc/src/hash.c index 6a13d7a03c0..6a13d7a03c0 100644 --- a/dep/jemalloc/hash.c +++ b/dep/jemalloc/src/hash.c diff --git a/dep/jemalloc/huge.c b/dep/jemalloc/src/huge.c index d35aa5cdd00..0aadc4339a9 100644 --- a/dep/jemalloc/huge.c +++ b/dep/jemalloc/src/huge.c @@ -37,7 +37,7 @@ huge_malloc(size_t size, bool zero) if (node == NULL) return (NULL); - ret = chunk_alloc(csize, &zero); + ret = chunk_alloc(csize, false, &zero); if (ret == NULL) { base_node_dealloc(node); return (NULL); @@ -69,12 +69,11 @@ huge_malloc(size_t size, bool zero) /* Only handles large allocations that require more than chunk alignment. */ void * -huge_palloc(size_t alignment, size_t size) +huge_palloc(size_t size, size_t alignment, bool zero) { void *ret; size_t alloc_size, chunk_size, offset; extent_node_t *node; - bool zero; /* * This allocation requires alignment that is even larger than chunk @@ -98,8 +97,7 @@ huge_palloc(size_t alignment, size_t size) if (node == NULL) return (NULL); - zero = false; - ret = chunk_alloc(alloc_size, &zero); + ret = chunk_alloc(alloc_size, false, &zero); if (ret == NULL) { base_node_dealloc(node); return (NULL); @@ -142,53 +140,131 @@ huge_palloc(size_t alignment, size_t size) malloc_mutex_unlock(&huge_mtx); #ifdef JEMALLOC_FILL - if (opt_junk) - memset(ret, 0xa5, chunk_size); - else if (opt_zero) - memset(ret, 0, chunk_size); + if (zero == false) { + if (opt_junk) + memset(ret, 0xa5, chunk_size); + else if (opt_zero) + memset(ret, 0, chunk_size); + } #endif return (ret); } void * -huge_ralloc(void *ptr, size_t size, size_t oldsize) +huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra) { - void *ret; - size_t copysize; - /* Avoid moving the allocation if the size class would not change. */ - if (oldsize > arena_maxclass && - CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) { + /* + * Avoid moving the allocation if the size class can be left the same. + */ + if (oldsize > arena_maxclass + && CHUNK_CEILING(oldsize) >= CHUNK_CEILING(size) + && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) { + assert(CHUNK_CEILING(oldsize) == oldsize); #ifdef JEMALLOC_FILL if (opt_junk && size < oldsize) { - memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - - size); - } else if (opt_zero && size > oldsize) { - memset((void *)((uintptr_t)ptr + oldsize), 0, size - - oldsize); + memset((void *)((uintptr_t)ptr + size), 0x5a, + oldsize - size); } #endif return (ptr); } + /* Reallocation would require a move. */ + return (NULL); +} + +void * +huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero) +{ + void *ret; + size_t copysize; + + /* Try to avoid moving the allocation. */ + ret = huge_ralloc_no_move(ptr, oldsize, size, extra); + if (ret != NULL) + return (ret); + /* - * If we get here, then size and oldsize are different enough that we - * need to use a different size class. In that case, fall back to - * allocating new space and copying. + * size and oldsize are different enough that we need to use a + * different size class. In that case, fall back to allocating new + * space and copying. */ - ret = huge_malloc(size, false); - if (ret == NULL) - return (NULL); + if (alignment != 0) + ret = huge_palloc(size + extra, alignment, zero); + else + ret = huge_malloc(size + extra, zero); + + if (ret == NULL) { + if (extra == 0) + return (NULL); + /* Try again, this time without extra. */ + if (alignment != 0) + ret = huge_palloc(size, alignment, zero); + else + ret = huge_malloc(size, zero); + + if (ret == NULL) + return (NULL); + } + /* + * Copy at most size bytes (not size+extra), since the caller has no + * expectation that the extra bytes will be reliably preserved. + */ copysize = (size < oldsize) ? size : oldsize; - memcpy(ret, ptr, copysize); - idalloc(ptr); + + /* + * Use mremap(2) if this is a huge-->huge reallocation, and neither the + * source nor the destination are in swap or dss. + */ +#ifdef JEMALLOC_MREMAP_FIXED + if (oldsize >= chunksize +# ifdef JEMALLOC_SWAP + && (swap_enabled == false || (chunk_in_swap(ptr) == false && + chunk_in_swap(ret) == false)) +# endif +# ifdef JEMALLOC_DSS + && chunk_in_dss(ptr) == false && chunk_in_dss(ret) == false +# endif + ) { + size_t newsize = huge_salloc(ret); + + if (mremap(ptr, oldsize, newsize, MREMAP_MAYMOVE|MREMAP_FIXED, + ret) == MAP_FAILED) { + /* + * Assuming no chunk management bugs in the allocator, + * the only documented way an error can occur here is + * if the application changed the map type for a + * portion of the old allocation. This is firmly in + * undefined behavior territory, so write a diagnostic + * message, and optionally abort. + */ + char buf[BUFERROR_BUF]; + + buferror(errno, buf, sizeof(buf)); + malloc_write("<jemalloc>: Error in mremap(): "); + malloc_write(buf); + malloc_write("\n"); + if (opt_abort) + abort(); + memcpy(ret, ptr, copysize); + idalloc(ptr); + } else + huge_dalloc(ptr, false); + } else +#endif + { + memcpy(ret, ptr, copysize); + idalloc(ptr); + } return (ret); } void -huge_dalloc(void *ptr) +huge_dalloc(void *ptr, bool unmap) { extent_node_t *node, key; @@ -208,14 +284,16 @@ huge_dalloc(void *ptr) malloc_mutex_unlock(&huge_mtx); + if (unmap) { /* Unmap chunk. */ #ifdef JEMALLOC_FILL #if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS)) - if (opt_junk) - memset(node->addr, 0x5a, node->size); + if (opt_junk) + memset(node->addr, 0x5a, node->size); #endif #endif - chunk_dealloc(node->addr, node->size); + chunk_dealloc(node->addr, node->size); + } base_node_dealloc(node); } @@ -241,10 +319,10 @@ huge_salloc(const void *ptr) } #ifdef JEMALLOC_PROF -prof_thr_cnt_t * -huge_prof_cnt_get(const void *ptr) +prof_ctx_t * +huge_prof_ctx_get(const void *ptr) { - prof_thr_cnt_t *ret; + prof_ctx_t *ret; extent_node_t *node, key; malloc_mutex_lock(&huge_mtx); @@ -254,7 +332,7 @@ huge_prof_cnt_get(const void *ptr) node = extent_tree_ad_search(&huge, &key); assert(node != NULL); - ret = node->prof_cnt; + ret = node->prof_ctx; malloc_mutex_unlock(&huge_mtx); @@ -262,7 +340,7 @@ huge_prof_cnt_get(const void *ptr) } void -huge_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt) +huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) { extent_node_t *node, key; @@ -273,7 +351,7 @@ huge_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt) node = extent_tree_ad_search(&huge, &key); assert(node != NULL); - node->prof_cnt = cnt; + node->prof_ctx = ctx; malloc_mutex_unlock(&huge_mtx); } diff --git a/dep/jemalloc/src/jemalloc.c b/dep/jemalloc/src/jemalloc.c new file mode 100644 index 00000000000..2aebc51dd19 --- /dev/null +++ b/dep/jemalloc/src/jemalloc.c @@ -0,0 +1,1759 @@ +#define JEMALLOC_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +malloc_mutex_t arenas_lock; +arena_t **arenas; +unsigned narenas; +static unsigned next_arena; + +#ifndef NO_TLS +__thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); +#else +pthread_key_t arenas_tsd; +#endif + +#ifdef JEMALLOC_STATS +# ifndef NO_TLS +__thread thread_allocated_t thread_allocated_tls; +# else +pthread_key_t thread_allocated_tsd; +# endif +#endif + +/* Set to true once the allocator has been initialized. */ +static bool malloc_initialized = false; + +/* Used to let the initializing thread recursively allocate. */ +static pthread_t malloc_initializer = (unsigned long)0; + +/* Used to avoid initialization races. */ +static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; + +#ifdef DYNAMIC_PAGE_SHIFT +size_t pagesize; +size_t pagesize_mask; +size_t lg_pagesize; +#endif + +unsigned ncpus; + +/* Runtime configuration options. */ +const char *JEMALLOC_P(malloc_conf) JEMALLOC_ATTR(visibility("default")); +#ifdef JEMALLOC_DEBUG +bool opt_abort = true; +# ifdef JEMALLOC_FILL +bool opt_junk = true; +# endif +#else +bool opt_abort = false; +# ifdef JEMALLOC_FILL +bool opt_junk = false; +# endif +#endif +#ifdef JEMALLOC_SYSV +bool opt_sysv = false; +#endif +#ifdef JEMALLOC_XMALLOC +bool opt_xmalloc = false; +#endif +#ifdef JEMALLOC_FILL +bool opt_zero = false; +#endif +size_t opt_narenas = 0; + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static void wrtmessage(void *cbopaque, const char *s); +static void stats_print_atexit(void); +static unsigned malloc_ncpus(void); +#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) +static void thread_allocated_cleanup(void *arg); +#endif +static bool malloc_conf_next(char const **opts_p, char const **k_p, + size_t *klen_p, char const **v_p, size_t *vlen_p); +static void malloc_conf_error(const char *msg, const char *k, size_t klen, + const char *v, size_t vlen); +static void malloc_conf_init(void); +static bool malloc_init_hard(void); + +/******************************************************************************/ +/* malloc_message() setup. */ + +#ifdef JEMALLOC_HAVE_ATTR +JEMALLOC_ATTR(visibility("hidden")) +#else +static +#endif +void +wrtmessage(void *cbopaque, const char *s) +{ +#ifdef JEMALLOC_CC_SILENCE + int result = +#endif + write(STDERR_FILENO, s, strlen(s)); +#ifdef JEMALLOC_CC_SILENCE + if (result < 0) + result = errno; +#endif +} + +void (*JEMALLOC_P(malloc_message))(void *, const char *s) + JEMALLOC_ATTR(visibility("default")) = wrtmessage; + +/******************************************************************************/ +/* + * Begin miscellaneous support functions. + */ + +/* Create a new arena and insert it into the arenas array at index ind. */ +arena_t * +arenas_extend(unsigned ind) +{ + arena_t *ret; + + /* Allocate enough space for trailing bins. */ + ret = (arena_t *)base_alloc(offsetof(arena_t, bins) + + (sizeof(arena_bin_t) * nbins)); + if (ret != NULL && arena_new(ret, ind) == false) { + arenas[ind] = ret; + return (ret); + } + /* Only reached if there is an OOM error. */ + + /* + * OOM here is quite inconvenient to propagate, since dealing with it + * would require a check for failure in the fast path. Instead, punt + * by using arenas[0]. In practice, this is an extremely unlikely + * failure. + */ + malloc_write("<jemalloc>: Error initializing arena\n"); + if (opt_abort) + abort(); + + return (arenas[0]); +} + +/* + * Choose an arena based on a per-thread value (slow-path code only, called + * only by choose_arena()). + */ +arena_t * +choose_arena_hard(void) +{ + arena_t *ret; + + if (narenas > 1) { + malloc_mutex_lock(&arenas_lock); + if ((ret = arenas[next_arena]) == NULL) + ret = arenas_extend(next_arena); + next_arena = (next_arena + 1) % narenas; + malloc_mutex_unlock(&arenas_lock); + } else + ret = arenas[0]; + + ARENA_SET(ret); + + return (ret); +} + +/* + * glibc provides a non-standard strerror_r() when _GNU_SOURCE is defined, so + * provide a wrapper. + */ +int +buferror(int errnum, char *buf, size_t buflen) +{ +#ifdef _GNU_SOURCE + char *b = strerror_r(errno, buf, buflen); + if (b != buf) { + strncpy(buf, b, buflen); + buf[buflen-1] = '\0'; + } + return (0); +#else + return (strerror_r(errno, buf, buflen)); +#endif +} + +static void +stats_print_atexit(void) +{ + +#if (defined(JEMALLOC_TCACHE) && defined(JEMALLOC_STATS)) + unsigned i; + + /* + * Merge stats from extant threads. This is racy, since individual + * threads do not lock when recording tcache stats events. As a + * consequence, the final stats may be slightly out of date by the time + * they are reported, if other threads continue to allocate. + */ + for (i = 0; i < narenas; i++) { + arena_t *arena = arenas[i]; + if (arena != NULL) { + tcache_t *tcache; + + /* + * tcache_stats_merge() locks bins, so if any code is + * introduced that acquires both arena and bin locks in + * the opposite order, deadlocks may result. + */ + malloc_mutex_lock(&arena->lock); + ql_foreach(tcache, &arena->tcache_ql, link) { + tcache_stats_merge(tcache, arena); + } + malloc_mutex_unlock(&arena->lock); + } + } +#endif + JEMALLOC_P(malloc_stats_print)(NULL, NULL, NULL); +} + +/* + * End miscellaneous support functions. + */ +/******************************************************************************/ +/* + * Begin initialization functions. + */ + +static unsigned +malloc_ncpus(void) +{ + unsigned ret; + long result; + + result = sysconf(_SC_NPROCESSORS_ONLN); + if (result == -1) { + /* Error. */ + ret = 1; + } + ret = (unsigned)result; + + return (ret); +} + +#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) +static void +thread_allocated_cleanup(void *arg) +{ + uint64_t *allocated = (uint64_t *)arg; + + if (allocated != NULL) + idalloc(allocated); +} +#endif + +/* + * FreeBSD's pthreads implementation calls malloc(3), so the malloc + * implementation has to take pains to avoid infinite recursion during + * initialization. + */ +static inline bool +malloc_init(void) +{ + + if (malloc_initialized == false) + return (malloc_init_hard()); + + return (false); +} + +static bool +malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p, + char const **v_p, size_t *vlen_p) +{ + bool accept; + const char *opts = *opts_p; + + *k_p = opts; + + for (accept = false; accept == false;) { + switch (*opts) { + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': case 'G': case 'H': case 'I': case 'J': + case 'K': case 'L': case 'M': case 'N': case 'O': + case 'P': case 'Q': case 'R': case 'S': case 'T': + case 'U': case 'V': case 'W': case 'X': case 'Y': + case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': case 'g': case 'h': case 'i': case 'j': + case 'k': case 'l': case 'm': case 'n': case 'o': + case 'p': case 'q': case 'r': case 's': case 't': + case 'u': case 'v': case 'w': case 'x': case 'y': + case 'z': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case '_': + opts++; + break; + case ':': + opts++; + *klen_p = (uintptr_t)opts - 1 - (uintptr_t)*k_p; + *v_p = opts; + accept = true; + break; + case '\0': + if (opts != *opts_p) { + malloc_write("<jemalloc>: Conf string " + "ends with key\n"); + } + return (true); + default: + malloc_write("<jemalloc>: Malformed conf " + "string\n"); + return (true); + } + } + + for (accept = false; accept == false;) { + switch (*opts) { + case ',': + opts++; + /* + * Look ahead one character here, because the + * next time this function is called, it will + * assume that end of input has been cleanly + * reached if no input remains, but we have + * optimistically already consumed the comma if + * one exists. + */ + if (*opts == '\0') { + malloc_write("<jemalloc>: Conf string " + "ends with comma\n"); + } + *vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p; + accept = true; + break; + case '\0': + *vlen_p = (uintptr_t)opts - (uintptr_t)*v_p; + accept = true; + break; + default: + opts++; + break; + } + } + + *opts_p = opts; + return (false); +} + +static void +malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v, + size_t vlen) +{ + char buf[PATH_MAX + 1]; + + malloc_write("<jemalloc>: "); + malloc_write(msg); + malloc_write(": "); + memcpy(buf, k, klen); + memcpy(&buf[klen], ":", 1); + memcpy(&buf[klen+1], v, vlen); + buf[klen+1+vlen] = '\0'; + malloc_write(buf); + malloc_write("\n"); +} + +static void +malloc_conf_init(void) +{ + unsigned i; + char buf[PATH_MAX + 1]; + const char *opts, *k, *v; + size_t klen, vlen; + + for (i = 0; i < 3; i++) { + /* Get runtime configuration. */ + switch (i) { + case 0: + if (JEMALLOC_P(malloc_conf) != NULL) { + /* + * Use options that were compiled into the + * program. + */ + opts = JEMALLOC_P(malloc_conf); + } else { + /* No configuration specified. */ + buf[0] = '\0'; + opts = buf; + } + break; + case 1: { + int linklen; + const char *linkname = +#ifdef JEMALLOC_PREFIX + "/etc/"JEMALLOC_PREFIX"malloc.conf" +#else + "/etc/malloc.conf" +#endif + ; + + if ((linklen = readlink(linkname, buf, + sizeof(buf) - 1)) != -1) { + /* + * Use the contents of the "/etc/malloc.conf" + * symbolic link's name. + */ + buf[linklen] = '\0'; + opts = buf; + } else { + /* No configuration specified. */ + buf[0] = '\0'; + opts = buf; + } + break; + } + case 2: { + const char *envname = +#ifdef JEMALLOC_PREFIX + JEMALLOC_CPREFIX"MALLOC_CONF" +#else + "MALLOC_CONF" +#endif + ; + + if ((opts = getenv(envname)) != NULL) { + /* + * Do nothing; opts is already initialized to + * the value of the JEMALLOC_OPTIONS + * environment variable. + */ + } else { + /* No configuration specified. */ + buf[0] = '\0'; + opts = buf; + } + break; + } + default: + /* NOTREACHED */ + assert(false); + buf[0] = '\0'; + opts = buf; + } + + while (*opts != '\0' && malloc_conf_next(&opts, &k, &klen, &v, + &vlen) == false) { +#define CONF_HANDLE_BOOL(n) \ + if (sizeof(#n)-1 == klen && strncmp(#n, k, \ + klen) == 0) { \ + if (strncmp("true", v, vlen) == 0 && \ + vlen == sizeof("true")-1) \ + opt_##n = true; \ + else if (strncmp("false", v, vlen) == \ + 0 && vlen == sizeof("false")-1) \ + opt_##n = false; \ + else { \ + malloc_conf_error( \ + "Invalid conf value", \ + k, klen, v, vlen); \ + } \ + continue; \ + } +#define CONF_HANDLE_SIZE_T(n, min, max) \ + if (sizeof(#n)-1 == klen && strncmp(#n, k, \ + klen) == 0) { \ + unsigned long ul; \ + char *end; \ + \ + errno = 0; \ + ul = strtoul(v, &end, 0); \ + if (errno != 0 || (uintptr_t)end - \ + (uintptr_t)v != vlen) { \ + malloc_conf_error( \ + "Invalid conf value", \ + k, klen, v, vlen); \ + } else if (ul < min || ul > max) { \ + malloc_conf_error( \ + "Out-of-range conf value", \ + k, klen, v, vlen); \ + } else \ + opt_##n = ul; \ + continue; \ + } +#define CONF_HANDLE_SSIZE_T(n, min, max) \ + if (sizeof(#n)-1 == klen && strncmp(#n, k, \ + klen) == 0) { \ + long l; \ + char *end; \ + \ + errno = 0; \ + l = strtol(v, &end, 0); \ + if (errno != 0 || (uintptr_t)end - \ + (uintptr_t)v != vlen) { \ + malloc_conf_error( \ + "Invalid conf value", \ + k, klen, v, vlen); \ + } else if (l < (ssize_t)min || l > \ + (ssize_t)max) { \ + malloc_conf_error( \ + "Out-of-range conf value", \ + k, klen, v, vlen); \ + } else \ + opt_##n = l; \ + continue; \ + } +#define CONF_HANDLE_CHAR_P(n, d) \ + if (sizeof(#n)-1 == klen && strncmp(#n, k, \ + klen) == 0) { \ + size_t cpylen = (vlen <= \ + sizeof(opt_##n)-1) ? vlen : \ + sizeof(opt_##n)-1; \ + strncpy(opt_##n, v, cpylen); \ + opt_##n[cpylen] = '\0'; \ + continue; \ + } + + CONF_HANDLE_BOOL(abort) + CONF_HANDLE_SIZE_T(lg_qspace_max, LG_QUANTUM, + PAGE_SHIFT-1) + CONF_HANDLE_SIZE_T(lg_cspace_max, LG_QUANTUM, + PAGE_SHIFT-1) + /* + * Chunks always require at least one * header page, + * plus one data page. + */ + CONF_HANDLE_SIZE_T(lg_chunk, PAGE_SHIFT+1, + (sizeof(size_t) << 3) - 1) + CONF_HANDLE_SIZE_T(narenas, 1, SIZE_T_MAX) + CONF_HANDLE_SSIZE_T(lg_dirty_mult, -1, + (sizeof(size_t) << 3) - 1) + CONF_HANDLE_BOOL(stats_print) +#ifdef JEMALLOC_FILL + CONF_HANDLE_BOOL(junk) + CONF_HANDLE_BOOL(zero) +#endif +#ifdef JEMALLOC_SYSV + CONF_HANDLE_BOOL(sysv) +#endif +#ifdef JEMALLOC_XMALLOC + CONF_HANDLE_BOOL(xmalloc) +#endif +#ifdef JEMALLOC_TCACHE + CONF_HANDLE_BOOL(tcache) + CONF_HANDLE_SSIZE_T(lg_tcache_gc_sweep, -1, + (sizeof(size_t) << 3) - 1) + CONF_HANDLE_SSIZE_T(lg_tcache_max, -1, + (sizeof(size_t) << 3) - 1) +#endif +#ifdef JEMALLOC_PROF + CONF_HANDLE_BOOL(prof) + CONF_HANDLE_CHAR_P(prof_prefix, "jeprof") + CONF_HANDLE_SIZE_T(lg_prof_bt_max, 0, LG_PROF_BT_MAX) + CONF_HANDLE_BOOL(prof_active) + CONF_HANDLE_SSIZE_T(lg_prof_sample, 0, + (sizeof(uint64_t) << 3) - 1) + CONF_HANDLE_BOOL(prof_accum) + CONF_HANDLE_SSIZE_T(lg_prof_tcmax, -1, + (sizeof(size_t) << 3) - 1) + CONF_HANDLE_SSIZE_T(lg_prof_interval, -1, + (sizeof(uint64_t) << 3) - 1) + CONF_HANDLE_BOOL(prof_gdump) + CONF_HANDLE_BOOL(prof_leak) +#endif +#ifdef JEMALLOC_SWAP + CONF_HANDLE_BOOL(overcommit) +#endif + malloc_conf_error("Invalid conf pair", k, klen, v, + vlen); +#undef CONF_HANDLE_BOOL +#undef CONF_HANDLE_SIZE_T +#undef CONF_HANDLE_SSIZE_T +#undef CONF_HANDLE_CHAR_P + } + + /* Validate configuration of options that are inter-related. */ + if (opt_lg_qspace_max+1 >= opt_lg_cspace_max) { + malloc_write("<jemalloc>: Invalid lg_[qc]space_max " + "relationship; restoring defaults\n"); + opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT; + opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT; + } + } +} + +static bool +malloc_init_hard(void) +{ + arena_t *init_arenas[1]; + + malloc_mutex_lock(&init_lock); + if (malloc_initialized || malloc_initializer == pthread_self()) { + /* + * Another thread initialized the allocator before this one + * acquired init_lock, or this thread is the initializing + * thread, and it is recursively allocating. + */ + malloc_mutex_unlock(&init_lock); + return (false); + } + if (malloc_initializer != (unsigned long)0) { + /* Busy-wait until the initializing thread completes. */ + do { + malloc_mutex_unlock(&init_lock); + CPU_SPINWAIT; + malloc_mutex_lock(&init_lock); + } while (malloc_initialized == false); + malloc_mutex_unlock(&init_lock); + return (false); + } + +#ifdef DYNAMIC_PAGE_SHIFT + /* Get page size. */ + { + long result; + + result = sysconf(_SC_PAGESIZE); + assert(result != -1); + pagesize = (unsigned)result; + + /* + * We assume that pagesize is a power of 2 when calculating + * pagesize_mask and lg_pagesize. + */ + assert(((result - 1) & result) == 0); + pagesize_mask = result - 1; + lg_pagesize = ffs((int)result) - 1; + } +#endif + +#ifdef JEMALLOC_PROF + prof_boot0(); +#endif + + malloc_conf_init(); + + /* Register fork handlers. */ + if (pthread_atfork(jemalloc_prefork, jemalloc_postfork, + jemalloc_postfork) != 0) { + malloc_write("<jemalloc>: Error in pthread_atfork()\n"); + if (opt_abort) + abort(); + } + + if (ctl_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + if (opt_stats_print) { + /* Print statistics at exit. */ + if (atexit(stats_print_atexit) != 0) { + malloc_write("<jemalloc>: Error in atexit()\n"); + if (opt_abort) + abort(); + } + } + + if (chunk_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + if (base_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + +#ifdef JEMALLOC_PROF + prof_boot1(); +#endif + + if (arena_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + +#ifdef JEMALLOC_TCACHE + tcache_boot(); +#endif + + if (huge_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + +#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) + /* Initialize allocation counters before any allocations can occur. */ + if (pthread_key_create(&thread_allocated_tsd, thread_allocated_cleanup) + != 0) { + malloc_mutex_unlock(&init_lock); + return (true); + } +#endif + + /* + * Create enough scaffolding to allow recursive allocation in + * malloc_ncpus(). + */ + narenas = 1; + arenas = init_arenas; + memset(arenas, 0, sizeof(arena_t *) * narenas); + + /* + * Initialize one arena here. The rest are lazily created in + * choose_arena_hard(). + */ + arenas_extend(0); + if (arenas[0] == NULL) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + /* + * Assign the initial arena to the initial thread, in order to avoid + * spurious creation of an extra arena if the application switches to + * threaded mode. + */ + ARENA_SET(arenas[0]); + + malloc_mutex_init(&arenas_lock); + +#ifdef JEMALLOC_PROF + if (prof_boot2()) { + malloc_mutex_unlock(&init_lock); + return (true); + } +#endif + + /* Get number of CPUs. */ + malloc_initializer = pthread_self(); + malloc_mutex_unlock(&init_lock); + ncpus = malloc_ncpus(); + malloc_mutex_lock(&init_lock); + + if (opt_narenas == 0) { + /* + * For SMP systems, create more than one arena per CPU by + * default. + */ + if (ncpus > 1) + opt_narenas = ncpus << 2; + else + opt_narenas = 1; + } + narenas = opt_narenas; + /* + * Make sure that the arenas array can be allocated. In practice, this + * limit is enough to allow the allocator to function, but the ctl + * machinery will fail to allocate memory at far lower limits. + */ + if (narenas > chunksize / sizeof(arena_t *)) { + char buf[UMAX2S_BUFSIZE]; + + narenas = chunksize / sizeof(arena_t *); + malloc_write("<jemalloc>: Reducing narenas to limit ("); + malloc_write(u2s(narenas, 10, buf)); + malloc_write(")\n"); + } + + next_arena = (narenas > 0) ? 1 : 0; + +#ifdef NO_TLS + if (pthread_key_create(&arenas_tsd, NULL) != 0) { + malloc_mutex_unlock(&init_lock); + return (true); + } +#endif + + /* Allocate and initialize arenas. */ + arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas); + if (arenas == NULL) { + malloc_mutex_unlock(&init_lock); + return (true); + } + /* + * Zero the array. In practice, this should always be pre-zeroed, + * since it was just mmap()ed, but let's be sure. + */ + memset(arenas, 0, sizeof(arena_t *) * narenas); + /* Copy the pointer to the one arena that was already initialized. */ + arenas[0] = init_arenas[0]; + +#ifdef JEMALLOC_ZONE + /* Register the custom zone. */ + malloc_zone_register(create_zone()); + + /* + * Convert the default szone to an "overlay zone" that is capable of + * deallocating szone-allocated objects, but allocating new objects + * from jemalloc. + */ + szone2ozone(malloc_default_zone()); +#endif + + malloc_initialized = true; + malloc_mutex_unlock(&init_lock); + return (false); +} + + +#ifdef JEMALLOC_ZONE +JEMALLOC_ATTR(constructor) +void +jemalloc_darwin_init(void) +{ + + if (malloc_init_hard()) + abort(); +} +#endif + +/* + * End initialization functions. + */ +/******************************************************************************/ +/* + * Begin malloc(3)-compatible functions. + */ + +JEMALLOC_ATTR(malloc) +JEMALLOC_ATTR(visibility("default")) +void * +JEMALLOC_P(malloc)(size_t size) +{ + void *ret; +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + size_t usize +# ifdef JEMALLOC_CC_SILENCE + = 0 +# endif + ; +#endif +#ifdef JEMALLOC_PROF + prof_thr_cnt_t *cnt +# ifdef JEMALLOC_CC_SILENCE + = NULL +# endif + ; +#endif + + if (malloc_init()) { + ret = NULL; + goto OOM; + } + + if (size == 0) { +#ifdef JEMALLOC_SYSV + if (opt_sysv == false) +#endif + size = 1; +#ifdef JEMALLOC_SYSV + else { +# ifdef JEMALLOC_XMALLOC + if (opt_xmalloc) { + malloc_write("<jemalloc>: Error in malloc(): " + "invalid size 0\n"); + abort(); + } +# endif + ret = NULL; + goto RETURN; + } +#endif + } + +#ifdef JEMALLOC_PROF + if (opt_prof) { + usize = s2u(size); + if ((cnt = prof_alloc_prep(usize)) == NULL) { + ret = NULL; + goto OOM; + } + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= + small_maxclass) { + ret = imalloc(small_maxclass+1); + if (ret != NULL) + arena_prof_promoted(ret, usize); + } else + ret = imalloc(size); + } else +#endif + { +#ifdef JEMALLOC_STATS + usize = s2u(size); +#endif + ret = imalloc(size); + } + +OOM: + if (ret == NULL) { +#ifdef JEMALLOC_XMALLOC + if (opt_xmalloc) { + malloc_write("<jemalloc>: Error in malloc(): " + "out of memory\n"); + abort(); + } +#endif + errno = ENOMEM; + } + +#ifdef JEMALLOC_SYSV +RETURN: +#endif +#ifdef JEMALLOC_PROF + if (opt_prof && ret != NULL) + prof_malloc(ret, usize, cnt); +#endif +#ifdef JEMALLOC_STATS + if (ret != NULL) { + assert(usize == isalloc(ret)); + ALLOCATED_ADD(usize, 0); + } +#endif + return (ret); +} + +JEMALLOC_ATTR(nonnull(1)) +JEMALLOC_ATTR(visibility("default")) +int +JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) +{ + int ret; + void *result; +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + size_t usize +# ifdef JEMALLOC_CC_SILENCE + = 0 +# endif + ; +#endif +#ifdef JEMALLOC_PROF + prof_thr_cnt_t *cnt +# ifdef JEMALLOC_CC_SILENCE + = NULL +# endif + ; +#endif + + if (malloc_init()) + result = NULL; + else { + if (size == 0) { +#ifdef JEMALLOC_SYSV + if (opt_sysv == false) +#endif + size = 1; +#ifdef JEMALLOC_SYSV + else { +# ifdef JEMALLOC_XMALLOC + if (opt_xmalloc) { + malloc_write("<jemalloc>: Error in " + "posix_memalign(): invalid size " + "0\n"); + abort(); + } +# endif + result = NULL; + *memptr = NULL; + ret = 0; + goto RETURN; + } +#endif + } + + /* Make sure that alignment is a large enough power of 2. */ + if (((alignment - 1) & alignment) != 0 + || alignment < sizeof(void *)) { +#ifdef JEMALLOC_XMALLOC + if (opt_xmalloc) { + malloc_write("<jemalloc>: Error in " + "posix_memalign(): invalid alignment\n"); + abort(); + } +#endif + result = NULL; + ret = EINVAL; + goto RETURN; + } + +#ifdef JEMALLOC_PROF + if (opt_prof) { + usize = sa2u(size, alignment, NULL); + if ((cnt = prof_alloc_prep(usize)) == NULL) { + result = NULL; + ret = EINVAL; + } else { + if (prof_promote && (uintptr_t)cnt != + (uintptr_t)1U && usize <= small_maxclass) { + result = ipalloc(small_maxclass+1, + alignment, false); + if (result != NULL) { + arena_prof_promoted(result, + usize); + } + } else { + result = ipalloc(size, alignment, + false); + } + } + } else +#endif + { +#ifdef JEMALLOC_STATS + usize = sa2u(size, alignment, NULL); +#endif + result = ipalloc(size, alignment, false); + } + } + + if (result == NULL) { +#ifdef JEMALLOC_XMALLOC + if (opt_xmalloc) { + malloc_write("<jemalloc>: Error in posix_memalign(): " + "out of memory\n"); + abort(); + } +#endif + ret = ENOMEM; + goto RETURN; + } + + *memptr = result; + ret = 0; + +RETURN: +#ifdef JEMALLOC_STATS + if (result != NULL) { + assert(usize == isalloc(result)); + ALLOCATED_ADD(usize, 0); + } +#endif +#ifdef JEMALLOC_PROF + if (opt_prof && result != NULL) + prof_malloc(result, usize, cnt); +#endif + return (ret); +} + +JEMALLOC_ATTR(malloc) +JEMALLOC_ATTR(visibility("default")) +void * +JEMALLOC_P(calloc)(size_t num, size_t size) +{ + void *ret; + size_t num_size; +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + size_t usize +# ifdef JEMALLOC_CC_SILENCE + = 0 +# endif + ; +#endif +#ifdef JEMALLOC_PROF + prof_thr_cnt_t *cnt +# ifdef JEMALLOC_CC_SILENCE + = NULL +# endif + ; +#endif + + if (malloc_init()) { + num_size = 0; + ret = NULL; + goto RETURN; + } + + num_size = num * size; + if (num_size == 0) { +#ifdef JEMALLOC_SYSV + if ((opt_sysv == false) && ((num == 0) || (size == 0))) +#endif + num_size = 1; +#ifdef JEMALLOC_SYSV + else { + ret = NULL; + goto RETURN; + } +#endif + /* + * Try to avoid division here. We know that it isn't possible to + * overflow during multiplication if neither operand uses any of the + * most significant half of the bits in a size_t. + */ + } else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2))) + && (num_size / size != num)) { + /* size_t overflow. */ + ret = NULL; + goto RETURN; + } + +#ifdef JEMALLOC_PROF + if (opt_prof) { + usize = s2u(num_size); + if ((cnt = prof_alloc_prep(usize)) == NULL) { + ret = NULL; + goto RETURN; + } + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize + <= small_maxclass) { + ret = icalloc(small_maxclass+1); + if (ret != NULL) + arena_prof_promoted(ret, usize); + } else + ret = icalloc(num_size); + } else +#endif + { +#ifdef JEMALLOC_STATS + usize = s2u(num_size); +#endif + ret = icalloc(num_size); + } + +RETURN: + if (ret == NULL) { +#ifdef JEMALLOC_XMALLOC + if (opt_xmalloc) { + malloc_write("<jemalloc>: Error in calloc(): out of " + "memory\n"); + abort(); + } +#endif + errno = ENOMEM; + } + +#ifdef JEMALLOC_PROF + if (opt_prof && ret != NULL) + prof_malloc(ret, usize, cnt); +#endif +#ifdef JEMALLOC_STATS + if (ret != NULL) { + assert(usize == isalloc(ret)); + ALLOCATED_ADD(usize, 0); + } +#endif + return (ret); +} + +JEMALLOC_ATTR(visibility("default")) +void * +JEMALLOC_P(realloc)(void *ptr, size_t size) +{ + void *ret; +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + size_t usize +# ifdef JEMALLOC_CC_SILENCE + = 0 +# endif + ; + size_t old_size = 0; +#endif +#ifdef JEMALLOC_PROF + prof_thr_cnt_t *cnt +# ifdef JEMALLOC_CC_SILENCE + = NULL +# endif + ; + prof_ctx_t *old_ctx +# ifdef JEMALLOC_CC_SILENCE + = NULL +# endif + ; +#endif + + if (size == 0) { +#ifdef JEMALLOC_SYSV + if (opt_sysv == false) +#endif + size = 1; +#ifdef JEMALLOC_SYSV + else { + if (ptr != NULL) { +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + old_size = isalloc(ptr); +#endif +#ifdef JEMALLOC_PROF + if (opt_prof) { + old_ctx = prof_ctx_get(ptr); + cnt = NULL; + } +#endif + idalloc(ptr); + } +#ifdef JEMALLOC_PROF + else if (opt_prof) { + old_ctx = NULL; + cnt = NULL; + } +#endif + ret = NULL; + goto RETURN; + } +#endif + } + + if (ptr != NULL) { + assert(malloc_initialized || malloc_initializer == + pthread_self()); + +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + old_size = isalloc(ptr); +#endif +#ifdef JEMALLOC_PROF + if (opt_prof) { + usize = s2u(size); + old_ctx = prof_ctx_get(ptr); + if ((cnt = prof_alloc_prep(usize)) == NULL) { + ret = NULL; + goto OOM; + } + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && + usize <= small_maxclass) { + ret = iralloc(ptr, small_maxclass+1, 0, 0, + false, false); + if (ret != NULL) + arena_prof_promoted(ret, usize); + } else + ret = iralloc(ptr, size, 0, 0, false, false); + } else +#endif + { +#ifdef JEMALLOC_STATS + usize = s2u(size); +#endif + ret = iralloc(ptr, size, 0, 0, false, false); + } + +#ifdef JEMALLOC_PROF +OOM: +#endif + if (ret == NULL) { +#ifdef JEMALLOC_XMALLOC + if (opt_xmalloc) { + malloc_write("<jemalloc>: Error in realloc(): " + "out of memory\n"); + abort(); + } +#endif + errno = ENOMEM; + } + } else { +#ifdef JEMALLOC_PROF + if (opt_prof) + old_ctx = NULL; +#endif + if (malloc_init()) { +#ifdef JEMALLOC_PROF + if (opt_prof) + cnt = NULL; +#endif + ret = NULL; + } else { +#ifdef JEMALLOC_PROF + if (opt_prof) { + usize = s2u(size); + if ((cnt = prof_alloc_prep(usize)) == NULL) + ret = NULL; + else { + if (prof_promote && (uintptr_t)cnt != + (uintptr_t)1U && usize <= + small_maxclass) { + ret = imalloc(small_maxclass+1); + if (ret != NULL) { + arena_prof_promoted(ret, + usize); + } + } else + ret = imalloc(size); + } + } else +#endif + { +#ifdef JEMALLOC_STATS + usize = s2u(size); +#endif + ret = imalloc(size); + } + } + + if (ret == NULL) { +#ifdef JEMALLOC_XMALLOC + if (opt_xmalloc) { + malloc_write("<jemalloc>: Error in realloc(): " + "out of memory\n"); + abort(); + } +#endif + errno = ENOMEM; + } + } + +#ifdef JEMALLOC_SYSV +RETURN: +#endif +#ifdef JEMALLOC_PROF + if (opt_prof) + prof_realloc(ret, usize, cnt, old_size, old_ctx); +#endif +#ifdef JEMALLOC_STATS + if (ret != NULL) { + assert(usize == isalloc(ret)); + ALLOCATED_ADD(usize, old_size); + } +#endif + return (ret); +} + +JEMALLOC_ATTR(visibility("default")) +void +JEMALLOC_P(free)(void *ptr) +{ + + if (ptr != NULL) { +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + size_t usize; +#endif + + assert(malloc_initialized || malloc_initializer == + pthread_self()); + +#ifdef JEMALLOC_STATS + usize = isalloc(ptr); +#endif +#ifdef JEMALLOC_PROF + if (opt_prof) { +# ifndef JEMALLOC_STATS + usize = isalloc(ptr); +# endif + prof_free(ptr, usize); + } +#endif +#ifdef JEMALLOC_STATS + ALLOCATED_ADD(0, usize); +#endif + idalloc(ptr); + } +} + +/* + * End malloc(3)-compatible functions. + */ +/******************************************************************************/ +/* + * Begin non-standard override functions. + * + * These overrides are omitted if the JEMALLOC_PREFIX is defined, since the + * entire point is to avoid accidental mixed allocator usage. + */ +#ifndef JEMALLOC_PREFIX + +#ifdef JEMALLOC_OVERRIDE_MEMALIGN +JEMALLOC_ATTR(malloc) +JEMALLOC_ATTR(visibility("default")) +void * +JEMALLOC_P(memalign)(size_t alignment, size_t size) +{ + void *ret; +#ifdef JEMALLOC_CC_SILENCE + int result = +#endif + JEMALLOC_P(posix_memalign)(&ret, alignment, size); +#ifdef JEMALLOC_CC_SILENCE + if (result != 0) + return (NULL); +#endif + return (ret); +} +#endif + +#ifdef JEMALLOC_OVERRIDE_VALLOC +JEMALLOC_ATTR(malloc) +JEMALLOC_ATTR(visibility("default")) +void * +JEMALLOC_P(valloc)(size_t size) +{ + void *ret; +#ifdef JEMALLOC_CC_SILENCE + int result = +#endif + JEMALLOC_P(posix_memalign)(&ret, PAGE_SIZE, size); +#ifdef JEMALLOC_CC_SILENCE + if (result != 0) + return (NULL); +#endif + return (ret); +} +#endif + +#endif /* JEMALLOC_PREFIX */ +/* + * End non-standard override functions. + */ +/******************************************************************************/ +/* + * Begin non-standard functions. + */ + +JEMALLOC_ATTR(visibility("default")) +size_t +JEMALLOC_P(malloc_usable_size)(const void *ptr) +{ + size_t ret; + + assert(malloc_initialized || malloc_initializer == pthread_self()); + +#ifdef JEMALLOC_IVSALLOC + ret = ivsalloc(ptr); +#else + assert(ptr != NULL); + ret = isalloc(ptr); +#endif + + return (ret); +} + +JEMALLOC_ATTR(visibility("default")) +void +JEMALLOC_P(malloc_stats_print)(void (*write_cb)(void *, const char *), + void *cbopaque, const char *opts) +{ + + stats_print(write_cb, cbopaque, opts); +} + +JEMALLOC_ATTR(visibility("default")) +int +JEMALLOC_P(mallctl)(const char *name, void *oldp, size_t *oldlenp, void *newp, + size_t newlen) +{ + + if (malloc_init()) + return (EAGAIN); + + return (ctl_byname(name, oldp, oldlenp, newp, newlen)); +} + +JEMALLOC_ATTR(visibility("default")) +int +JEMALLOC_P(mallctlnametomib)(const char *name, size_t *mibp, size_t *miblenp) +{ + + if (malloc_init()) + return (EAGAIN); + + return (ctl_nametomib(name, mibp, miblenp)); +} + +JEMALLOC_ATTR(visibility("default")) +int +JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + + if (malloc_init()) + return (EAGAIN); + + return (ctl_bymib(mib, miblen, oldp, oldlenp, newp, newlen)); +} + +JEMALLOC_INLINE void * +iallocm(size_t size, size_t alignment, bool zero) +{ + + if (alignment != 0) + return (ipalloc(size, alignment, zero)); + else if (zero) + return (icalloc(size)); + else + return (imalloc(size)); +} + +JEMALLOC_ATTR(nonnull(1)) +JEMALLOC_ATTR(visibility("default")) +int +JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) +{ + void *p; + size_t usize; + size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) + & (SIZE_T_MAX-1)); + bool zero = flags & ALLOCM_ZERO; +#ifdef JEMALLOC_PROF + prof_thr_cnt_t *cnt; +#endif + + assert(ptr != NULL); + assert(size != 0); + + if (malloc_init()) + goto OOM; + +#ifdef JEMALLOC_PROF + if (opt_prof) { + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, + NULL); + if ((cnt = prof_alloc_prep(usize)) == NULL) + goto OOM; + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= + small_maxclass) { + p = iallocm(small_maxclass+1, alignment, zero); + if (p == NULL) + goto OOM; + arena_prof_promoted(p, usize); + } else { + p = iallocm(size, alignment, zero); + if (p == NULL) + goto OOM; + } + + if (rsize != NULL) + *rsize = usize; + } else +#endif + { + p = iallocm(size, alignment, zero); + if (p == NULL) + goto OOM; +#ifndef JEMALLOC_STATS + if (rsize != NULL) +#endif + { + usize = (alignment == 0) ? s2u(size) : sa2u(size, + alignment, NULL); +#ifdef JEMALLOC_STATS + if (rsize != NULL) +#endif + *rsize = usize; + } + } + + *ptr = p; +#ifdef JEMALLOC_STATS + assert(usize == isalloc(p)); + ALLOCATED_ADD(usize, 0); +#endif + return (ALLOCM_SUCCESS); +OOM: +#ifdef JEMALLOC_XMALLOC + if (opt_xmalloc) { + malloc_write("<jemalloc>: Error in allocm(): " + "out of memory\n"); + abort(); + } +#endif + *ptr = NULL; + return (ALLOCM_ERR_OOM); +} + +JEMALLOC_ATTR(nonnull(1)) +JEMALLOC_ATTR(visibility("default")) +int +JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, + int flags) +{ + void *p, *q; + size_t usize; +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + size_t old_size; +#endif + size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) + & (SIZE_T_MAX-1)); + bool zero = flags & ALLOCM_ZERO; + bool no_move = flags & ALLOCM_NO_MOVE; +#ifdef JEMALLOC_PROF + prof_thr_cnt_t *cnt; + prof_ctx_t *old_ctx; +#endif + + assert(ptr != NULL); + assert(*ptr != NULL); + assert(size != 0); + assert(SIZE_T_MAX - size >= extra); + assert(malloc_initialized || malloc_initializer == pthread_self()); + + p = *ptr; +#ifdef JEMALLOC_PROF + if (opt_prof) { + /* + * usize isn't knowable before iralloc() returns when extra is + * non-zero. Therefore, compute its maximum possible value and + * use that in prof_alloc_prep() to decide whether to capture a + * backtrace. prof_realloc() will use the actual usize to + * decide whether to sample. + */ + size_t max_usize = (alignment == 0) ? s2u(size+extra) : + sa2u(size+extra, alignment, NULL); + old_size = isalloc(p); + old_ctx = prof_ctx_get(p); + if ((cnt = prof_alloc_prep(max_usize)) == NULL) + goto OOM; + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && max_usize + <= small_maxclass) { + q = iralloc(p, small_maxclass+1, (small_maxclass+1 >= + size+extra) ? 0 : size+extra - (small_maxclass+1), + alignment, zero, no_move); + if (q == NULL) + goto ERR; + usize = isalloc(q); + arena_prof_promoted(q, usize); + } else { + q = iralloc(p, size, extra, alignment, zero, no_move); + if (q == NULL) + goto ERR; + usize = isalloc(q); + } + prof_realloc(q, usize, cnt, old_size, old_ctx); + } else +#endif + { +#ifdef JEMALLOC_STATS + old_size = isalloc(p); +#endif + q = iralloc(p, size, extra, alignment, zero, no_move); + if (q == NULL) + goto ERR; +#ifndef JEMALLOC_STATS + if (rsize != NULL) +#endif + { + usize = isalloc(q); +#ifdef JEMALLOC_STATS + if (rsize != NULL) +#endif + *rsize = usize; + } + } + + *ptr = q; +#ifdef JEMALLOC_STATS + ALLOCATED_ADD(usize, old_size); +#endif + return (ALLOCM_SUCCESS); +ERR: + if (no_move) + return (ALLOCM_ERR_NOT_MOVED); +#ifdef JEMALLOC_PROF +OOM: +#endif +#ifdef JEMALLOC_XMALLOC + if (opt_xmalloc) { + malloc_write("<jemalloc>: Error in rallocm(): " + "out of memory\n"); + abort(); + } +#endif + return (ALLOCM_ERR_OOM); +} + +JEMALLOC_ATTR(nonnull(1)) +JEMALLOC_ATTR(visibility("default")) +int +JEMALLOC_P(sallocm)(const void *ptr, size_t *rsize, int flags) +{ + size_t sz; + + assert(malloc_initialized || malloc_initializer == pthread_self()); + +#ifdef JEMALLOC_IVSALLOC + sz = ivsalloc(ptr); +#else + assert(ptr != NULL); + sz = isalloc(ptr); +#endif + assert(rsize != NULL); + *rsize = sz; + + return (ALLOCM_SUCCESS); +} + +JEMALLOC_ATTR(nonnull(1)) +JEMALLOC_ATTR(visibility("default")) +int +JEMALLOC_P(dallocm)(void *ptr, int flags) +{ +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + size_t usize; +#endif + + assert(ptr != NULL); + assert(malloc_initialized || malloc_initializer == pthread_self()); + +#ifdef JEMALLOC_STATS + usize = isalloc(ptr); +#endif +#ifdef JEMALLOC_PROF + if (opt_prof) { +# ifndef JEMALLOC_STATS + usize = isalloc(ptr); +# endif + prof_free(ptr, usize); + } +#endif +#ifdef JEMALLOC_STATS + ALLOCATED_ADD(0, usize); +#endif + idalloc(ptr); + + return (ALLOCM_SUCCESS); +} + +/* + * End non-standard functions. + */ +/******************************************************************************/ + +/* + * The following functions are used by threading libraries for protection of + * malloc during fork(). + */ + +void +jemalloc_prefork(void) +{ + unsigned i; + + /* Acquire all mutexes in a safe order. */ + + malloc_mutex_lock(&arenas_lock); + for (i = 0; i < narenas; i++) { + if (arenas[i] != NULL) + malloc_mutex_lock(&arenas[i]->lock); + } + + malloc_mutex_lock(&base_mtx); + + malloc_mutex_lock(&huge_mtx); + +#ifdef JEMALLOC_DSS + malloc_mutex_lock(&dss_mtx); +#endif + +#ifdef JEMALLOC_SWAP + malloc_mutex_lock(&swap_mtx); +#endif +} + +void +jemalloc_postfork(void) +{ + unsigned i; + + /* Release all mutexes, now that fork() has completed. */ + +#ifdef JEMALLOC_SWAP + malloc_mutex_unlock(&swap_mtx); +#endif + +#ifdef JEMALLOC_DSS + malloc_mutex_unlock(&dss_mtx); +#endif + + malloc_mutex_unlock(&huge_mtx); + + malloc_mutex_unlock(&base_mtx); + + for (i = 0; i < narenas; i++) { + if (arenas[i] != NULL) + malloc_mutex_unlock(&arenas[i]->lock); + } + malloc_mutex_unlock(&arenas_lock); +} + +/******************************************************************************/ diff --git a/dep/jemalloc/mb.c b/dep/jemalloc/src/mb.c index 30a1a2e997a..30a1a2e997a 100644 --- a/dep/jemalloc/mb.c +++ b/dep/jemalloc/src/mb.c diff --git a/dep/jemalloc/mutex.c b/dep/jemalloc/src/mutex.c index 3b6081a4c4f..3ecb18a340e 100644 --- a/dep/jemalloc/mutex.c +++ b/dep/jemalloc/src/mutex.c @@ -59,7 +59,11 @@ malloc_mutex_init(malloc_mutex_t *mutex) if (pthread_mutexattr_init(&attr) != 0) return (true); +#ifdef PTHREAD_MUTEX_ADAPTIVE_NP pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); +#else + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_DEFAULT); +#endif if (pthread_mutex_init(mutex, &attr) != 0) { pthread_mutexattr_destroy(&attr); return (true); @@ -68,3 +72,13 @@ malloc_mutex_init(malloc_mutex_t *mutex) return (false); } + +void +malloc_mutex_destroy(malloc_mutex_t *mutex) +{ + + if (pthread_mutex_destroy(mutex) != 0) { + malloc_write("<jemalloc>: Error in pthread_mutex_destroy()\n"); + abort(); + } +} diff --git a/dep/jemalloc/prof.c b/dep/jemalloc/src/prof.c index 6326188e50f..636cccef52a 100644 --- a/dep/jemalloc/prof.c +++ b/dep/jemalloc/src/prof.c @@ -12,8 +12,6 @@ #include <libunwind.h> #endif -#include <math.h> - /******************************************************************************/ /* Data. */ @@ -22,48 +20,30 @@ bool opt_prof_active = true; size_t opt_lg_prof_bt_max = LG_PROF_BT_MAX_DEFAULT; size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT; ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT; -bool opt_prof_udump = false; +bool opt_prof_gdump = false; bool opt_prof_leak = false; +bool opt_prof_accum = true; +ssize_t opt_lg_prof_tcmax = LG_PROF_TCMAX_DEFAULT; +char opt_prof_prefix[PATH_MAX + 1]; uint64_t prof_interval; bool prof_promote; +unsigned prof_bt_max; + +#ifndef NO_TLS +__thread prof_tdata_t *prof_tdata_tls + JEMALLOC_ATTR(tls_model("initial-exec")); +#endif +pthread_key_t prof_tdata_tsd; + /* * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data - * structure that knows about all backtraces ever captured. + * structure that knows about all backtraces currently captured. */ static ckh_t bt2ctx; static malloc_mutex_t bt2ctx_mtx; -/* - * Thread-specific hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread - * keeps a cache of backtraces, with associated thread-specific prof_thr_cnt_t - * objects. Other threads may read the prof_thr_cnt_t contents, but no others - * will ever write them. - * - * Upon thread exit, the thread must merge all the prof_thr_cnt_t counter data - * into the associated prof_ctx_t objects, and unlink/free the prof_thr_cnt_t - * objects. - */ -static __thread ckh_t *bt2cnt_tls JEMALLOC_ATTR(tls_model("initial-exec")); - -/* - * Same contents as b2cnt, but initialized such that the TSD destructor is - * called when a thread exits, so that bt2cnt_tls contents can be merged, - * unlinked, and deallocated. - */ -static pthread_key_t bt2cnt_tsd; - -/* (1U << opt_lg_prof_bt_max). */ -static unsigned prof_bt_max; - -static __thread uint64_t prof_sample_prn_state - JEMALLOC_ATTR(tls_model("initial-exec")); -static __thread uint64_t prof_sample_threshold - JEMALLOC_ATTR(tls_model("initial-exec")); -static __thread uint64_t prof_sample_accum - JEMALLOC_ATTR(tls_model("initial-exec")); - static malloc_mutex_t prof_dump_seq_mtx; static uint64_t prof_dump_seq; static uint64_t prof_dump_iseq; @@ -85,26 +65,25 @@ static bool prof_booted = false; static malloc_mutex_t enq_mtx; static bool enq; static bool enq_idump; -static bool enq_udump; +static bool enq_gdump; /******************************************************************************/ /* Function prototypes for non-inline static functions. */ static prof_bt_t *bt_dup(prof_bt_t *bt); -static void bt_init(prof_bt_t *bt, void **vec); +static void bt_destroy(prof_bt_t *bt); #ifdef JEMALLOC_PROF_LIBGCC static _Unwind_Reason_Code prof_unwind_init_callback( struct _Unwind_Context *context, void *arg); static _Unwind_Reason_Code prof_unwind_callback( struct _Unwind_Context *context, void *arg); #endif -static void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max); -static prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); -static void prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt); static bool prof_flush(bool propagate_err); static bool prof_write(const char *s, bool propagate_err); -static void prof_ctx_merge(prof_ctx_t *ctx, prof_cnt_t *cnt_all, +static void prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx); +static void prof_ctx_destroy(prof_ctx_t *ctx); +static void prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt); static bool prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt, bool propagate_err); static bool prof_dump_maps(bool propagate_err); @@ -115,11 +94,11 @@ static void prof_fdump(void); static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2); static bool prof_bt_keycomp(const void *k1, const void *k2); -static void bt2cnt_thread_cleanup(void *arg); +static void prof_tdata_cleanup(void *arg); /******************************************************************************/ -static void +void bt_init(prof_bt_t *bt, void **vec) { @@ -127,6 +106,13 @@ bt_init(prof_bt_t *bt, void **vec) bt->len = 0; } +static void +bt_destroy(prof_bt_t *bt) +{ + + idalloc(bt); +} + static prof_bt_t * bt_dup(prof_bt_t *bt) { @@ -165,7 +151,7 @@ prof_enter(void) static inline void prof_leave(void) { - bool idump, udump; + bool idump, gdump; malloc_mutex_unlock(&bt2ctx_mtx); @@ -173,14 +159,14 @@ prof_leave(void) enq = false; idump = enq_idump; enq_idump = false; - udump = enq_udump; - enq_udump = false; + gdump = enq_gdump; + enq_gdump = false; malloc_mutex_unlock(&enq_mtx); if (idump) prof_idump(); - if (udump) - prof_udump(); + if (gdump) + prof_gdump(); } #ifdef JEMALLOC_PROF_LIBGCC @@ -208,7 +194,7 @@ prof_unwind_callback(struct _Unwind_Context *context, void *arg) return (_URC_NO_REASON); } -static void +void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) { prof_unwind_data_t data = {bt, nignore, max}; @@ -216,7 +202,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) _Unwind_Backtrace(prof_unwind_callback, &data); } #elif defined(JEMALLOC_PROF_LIBUNWIND) -static void +void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) { unw_context_t uc; @@ -239,54 +225,42 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) } /* - * Iterate over stack frames until there are no more. Heap-allocate - * and iteratively grow a larger bt if necessary. + * Iterate over stack frames until there are no more, or until no space + * remains in bt. */ for (i = 0; i < max; i++) { unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]); + bt->len++; err = unw_step(&cursor); - if (err <= 0) { - bt->len = i; + if (err <= 0) break; - } } } #else -static void +void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) { -#define NIGNORE 3 #define BT_FRAME(i) \ - if ((i) < NIGNORE + max) { \ + if ((i) < nignore + max) { \ void *p; \ if (__builtin_frame_address(i) == 0) \ return; \ p = __builtin_return_address(i); \ if (p == NULL) \ return; \ - if (i >= NIGNORE) { \ - bt->vec[(i) - NIGNORE] = p; \ - bt->len = (i) - NIGNORE + 1; \ + if (i >= nignore) { \ + bt->vec[(i) - nignore] = p; \ + bt->len = (i) - nignore + 1; \ } \ } else \ return; + assert(nignore <= 3); assert(max <= (1U << opt_lg_prof_bt_max)); - /* - * Ignore the first three frames, since they are: - * - * 0: prof_backtrace() - * 1: prof_alloc_prep() - * 2: malloc(), calloc(), etc. - */ -#if 1 - assert(nignore + 1 == NIGNORE); -#else BT_FRAME(0) BT_FRAME(1) BT_FRAME(2) -#endif BT_FRAME(3) BT_FRAME(4) BT_FRAME(5) @@ -425,7 +399,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) BT_FRAME(126) BT_FRAME(127) - /* Extras to compensate for NIGNORE. */ + /* Extras to compensate for nignore. */ BT_FRAME(128) BT_FRAME(129) BT_FRAME(130) @@ -433,305 +407,122 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) } #endif -static prof_thr_cnt_t * +prof_thr_cnt_t * prof_lookup(prof_bt_t *bt) { - prof_thr_cnt_t *ret; - ckh_t *bt2cnt = bt2cnt_tls; - - if (bt2cnt == NULL) { - /* Initialize an empty cache for this thread. */ - bt2cnt = (ckh_t *)imalloc(sizeof(ckh_t)); - if (bt2cnt == NULL) - return (NULL); - if (ckh_new(bt2cnt, PROF_CKH_MINITEMS, prof_bt_hash, - prof_bt_keycomp)) { - idalloc(bt2cnt); + union { + prof_thr_cnt_t *p; + void *v; + } ret; + prof_tdata_t *prof_tdata; + + prof_tdata = PROF_TCACHE_GET(); + if (prof_tdata == NULL) { + prof_tdata = prof_tdata_init(); + if (prof_tdata == NULL) return (NULL); - } - bt2cnt_tls = bt2cnt; } - if (ckh_search(bt2cnt, bt, NULL, (void **)&ret)) { - prof_bt_t *btkey; - prof_ctx_t *ctx; + if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) { + union { + prof_bt_t *p; + void *v; + } btkey; + union { + prof_ctx_t *p; + void *v; + } ctx; /* * This thread's cache lacks bt. Look for it in the global * cache. */ prof_enter(); - if (ckh_search(&bt2ctx, bt, (void **)&btkey, (void **)&ctx)) { - + if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) { /* bt has never been seen before. Insert it. */ - ctx = (prof_ctx_t *)imalloc(sizeof(prof_ctx_t)); - if (ctx == NULL) { + ctx.v = imalloc(sizeof(prof_ctx_t)); + if (ctx.v == NULL) { prof_leave(); return (NULL); } - btkey = bt_dup(bt); - if (btkey == NULL) { + btkey.p = bt_dup(bt); + if (btkey.v == NULL) { prof_leave(); - idalloc(ctx); + idalloc(ctx.v); return (NULL); } - if (malloc_mutex_init(&ctx->lock)) { + ctx.p->bt = btkey.p; + if (malloc_mutex_init(&ctx.p->lock)) { prof_leave(); - idalloc(btkey); - idalloc(ctx); + idalloc(btkey.v); + idalloc(ctx.v); return (NULL); } - memset(&ctx->cnt_merged, 0, sizeof(prof_cnt_t)); - ql_new(&ctx->cnts_ql); - if (ckh_insert(&bt2ctx, btkey, ctx)) { + memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t)); + ql_new(&ctx.p->cnts_ql); + if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { /* OOM. */ prof_leave(); - idalloc(btkey); - idalloc(ctx); + malloc_mutex_destroy(&ctx.p->lock); + idalloc(btkey.v); + idalloc(ctx.v); return (NULL); } } + /* + * Acquire ctx's lock before releasing bt2ctx_mtx, in order to + * avoid a race condition with prof_ctx_destroy(). + */ + malloc_mutex_lock(&ctx.p->lock); prof_leave(); /* Link a prof_thd_cnt_t into ctx for this thread. */ - ret = (prof_thr_cnt_t *)imalloc(sizeof(prof_thr_cnt_t)); - if (ret == NULL) - return (NULL); - ql_elm_new(ret, link); - ret->ctx = ctx; - ret->epoch = 0; - memset(&ret->cnts, 0, sizeof(prof_cnt_t)); - if (ckh_insert(bt2cnt, btkey, ret)) { - idalloc(ret); - return (NULL); - } - malloc_mutex_lock(&ctx->lock); - ql_tail_insert(&ctx->cnts_ql, ret, link); - malloc_mutex_unlock(&ctx->lock); - } - - return (ret); -} - -static inline void -prof_sample_threshold_update(void) -{ - uint64_t r; - double u; - - /* - * Compute prof_sample_threshold as a geometrically distributed random - * variable with mean (2^opt_lg_prof_sample). - */ - prn64(r, 53, prof_sample_prn_state, (uint64_t)1125899906842625LLU, - 1058392653243283975); - u = (double)r * (1.0/9007199254740992.0L); - prof_sample_threshold = (uint64_t)(log(u) / - log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample)))) - + (uint64_t)1U; -} - -prof_thr_cnt_t * -prof_alloc_prep(size_t size) -{ - prof_thr_cnt_t *ret; - void *vec[prof_bt_max]; - prof_bt_t bt; - - if (opt_prof_active == false) { - /* Sampling is currently inactive, so avoid sampling. */ - ret = (prof_thr_cnt_t *)(uintptr_t)1U; - } else if (opt_lg_prof_sample == 0) { - /* - * Don't bother with sampling logic, since sampling interval is - * 1. - */ - bt_init(&bt, vec); - prof_backtrace(&bt, 2, prof_bt_max); - ret = prof_lookup(&bt); - } else { - if (prof_sample_threshold == 0) { + if (opt_lg_prof_tcmax >= 0 && ckh_count(&prof_tdata->bt2cnt) + == (ZU(1) << opt_lg_prof_tcmax)) { + assert(ckh_count(&prof_tdata->bt2cnt) > 0); /* - * Initialize. Seed the prng differently for each - * thread. + * Flush the least recently used cnt in order to keep + * bt2cnt from becoming too large. */ - prof_sample_prn_state = (uint64_t)(uintptr_t)&size; - prof_sample_threshold_update(); + ret.p = ql_last(&prof_tdata->lru_ql, lru_link); + assert(ret.v != NULL); + ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, NULL, + NULL); + ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); + prof_ctx_merge(ret.p->ctx, ret.p); + /* ret can now be re-used. */ + } else { + assert(opt_lg_prof_tcmax < 0 || + ckh_count(&prof_tdata->bt2cnt) < (ZU(1) << + opt_lg_prof_tcmax)); + /* Allocate and partially initialize a new cnt. */ + ret.v = imalloc(sizeof(prof_thr_cnt_t)); + if (ret.p == NULL) { + malloc_mutex_unlock(&ctx.p->lock); + return (NULL); + } + ql_elm_new(ret.p, cnts_link); + ql_elm_new(ret.p, lru_link); } - - /* - * Determine whether to capture a backtrace based on whether - * size is enough for prof_accum to reach - * prof_sample_threshold. However, delay updating these - * variables until prof_{m,re}alloc(), because we don't know - * for sure that the allocation will succeed. - * - * Use subtraction rather than addition to avoid potential - * integer overflow. - */ - if (size >= prof_sample_threshold - prof_sample_accum) { - bt_init(&bt, vec); - prof_backtrace(&bt, 2, prof_bt_max); - ret = prof_lookup(&bt); - } else - ret = (prof_thr_cnt_t *)(uintptr_t)1U; - } - - return (ret); -} - -prof_thr_cnt_t * -prof_cnt_get(const void *ptr) -{ - prof_thr_cnt_t *ret; - arena_chunk_t *chunk; - - assert(ptr != NULL); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) { - /* Region. */ - assert(chunk->arena->magic == ARENA_MAGIC); - - ret = arena_prof_cnt_get(ptr); - } else - ret = huge_prof_cnt_get(ptr); - - return (ret); -} - -static void -prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt) -{ - arena_chunk_t *chunk; - - assert(ptr != NULL); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) { - /* Region. */ - assert(chunk->arena->magic == ARENA_MAGIC); - - arena_prof_cnt_set(ptr, cnt); - } else - huge_prof_cnt_set(ptr, cnt); -} - -static inline void -prof_sample_accum_update(size_t size) -{ - - if (opt_lg_prof_sample == 0) { - /* - * Don't bother with sampling logic, since sampling interval is - * 1. - */ - return; - } - - /* Take care to avoid integer overflow. */ - if (size >= prof_sample_threshold - prof_sample_accum) { - prof_sample_accum -= (prof_sample_threshold - size); - /* Compute new prof_sample_threshold. */ - prof_sample_threshold_update(); - while (prof_sample_accum >= prof_sample_threshold) { - prof_sample_accum -= prof_sample_threshold; - prof_sample_threshold_update(); + /* Finish initializing ret. */ + ret.p->ctx = ctx.p; + ret.p->epoch = 0; + memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); + if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) { + malloc_mutex_unlock(&ctx.p->lock); + idalloc(ret.v); + return (NULL); } - } else - prof_sample_accum += size; -} - -void -prof_malloc(const void *ptr, prof_thr_cnt_t *cnt) -{ - size_t size = isalloc(ptr); - - assert(ptr != NULL); - - prof_cnt_set(ptr, cnt); - prof_sample_accum_update(size); - - if ((uintptr_t)cnt > (uintptr_t)1U) { - cnt->epoch++; - /*********/ - mb_write(); - /*********/ - cnt->cnts.curobjs++; - cnt->cnts.curbytes += size; - cnt->cnts.accumobjs++; - cnt->cnts.accumbytes += size; - /*********/ - mb_write(); - /*********/ - cnt->epoch++; - /*********/ - mb_write(); - /*********/ - } -} - -void -prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr, - size_t old_size, prof_thr_cnt_t *old_cnt) -{ - size_t size = isalloc(ptr); - - if (ptr != NULL) { - prof_cnt_set(ptr, cnt); - prof_sample_accum_update(size); - } - - if ((uintptr_t)old_cnt > (uintptr_t)1U) - old_cnt->epoch++; - if ((uintptr_t)cnt > (uintptr_t)1U) - cnt->epoch++; - /*********/ - mb_write(); - /*********/ - if ((uintptr_t)old_cnt > (uintptr_t)1U) { - old_cnt->cnts.curobjs--; - old_cnt->cnts.curbytes -= old_size; - } - if ((uintptr_t)cnt > (uintptr_t)1U) { - cnt->cnts.curobjs++; - cnt->cnts.curbytes += size; - cnt->cnts.accumobjs++; - cnt->cnts.accumbytes += size; + ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); + ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link); + malloc_mutex_unlock(&ctx.p->lock); + } else { + /* Move ret to the front of the LRU. */ + ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); + ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); } - /*********/ - mb_write(); - /*********/ - if ((uintptr_t)old_cnt > (uintptr_t)1U) - old_cnt->epoch++; - if ((uintptr_t)cnt > (uintptr_t)1U) - cnt->epoch++; - /*********/ - mb_write(); /* Not strictly necessary. */ -} -void -prof_free(const void *ptr) -{ - prof_thr_cnt_t *cnt = prof_cnt_get(ptr); - - if ((uintptr_t)cnt > (uintptr_t)1) { - size_t size = isalloc(ptr); - - cnt->epoch++; - /*********/ - mb_write(); - /*********/ - cnt->cnts.curobjs--; - cnt->cnts.curbytes -= size; - /*********/ - mb_write(); - /*********/ - cnt->epoch++; - /*********/ - mb_write(); - /*********/ - } + return (ret.p); } static bool @@ -784,15 +575,15 @@ prof_write(const char *s, bool propagate_err) } static void -prof_ctx_merge(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx) +prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx) { prof_thr_cnt_t *thr_cnt; prof_cnt_t tcnt; malloc_mutex_lock(&ctx->lock); - memcpy(&ctx->cnt_dump, &ctx->cnt_merged, sizeof(prof_cnt_t)); - ql_foreach(thr_cnt, &ctx->cnts_ql, link) { + memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t)); + ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) { volatile unsigned *epoch = &thr_cnt->epoch; while (true) { @@ -809,22 +600,96 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx) break; } - ctx->cnt_dump.curobjs += tcnt.curobjs; - ctx->cnt_dump.curbytes += tcnt.curbytes; - ctx->cnt_dump.accumobjs += tcnt.accumobjs; - ctx->cnt_dump.accumbytes += tcnt.accumbytes; + ctx->cnt_summed.curobjs += tcnt.curobjs; + ctx->cnt_summed.curbytes += tcnt.curbytes; + if (opt_prof_accum) { + ctx->cnt_summed.accumobjs += tcnt.accumobjs; + ctx->cnt_summed.accumbytes += tcnt.accumbytes; + } + } + + if (ctx->cnt_summed.curobjs != 0) + (*leak_nctx)++; + + /* Add to cnt_all. */ + cnt_all->curobjs += ctx->cnt_summed.curobjs; + cnt_all->curbytes += ctx->cnt_summed.curbytes; + if (opt_prof_accum) { + cnt_all->accumobjs += ctx->cnt_summed.accumobjs; + cnt_all->accumbytes += ctx->cnt_summed.accumbytes; + } + + malloc_mutex_unlock(&ctx->lock); +} + +static void +prof_ctx_destroy(prof_ctx_t *ctx) +{ - if (tcnt.curobjs != 0) - (*leak_nctx)++; + /* + * Check that ctx is still unused by any thread cache before destroying + * it. prof_lookup() interlocks bt2ctx_mtx and ctx->lock in order to + * avoid a race condition with this function, and prof_ctx_merge() + * artificially raises ctx->cnt_merged.curobjs in order to avoid a race + * between the main body of prof_ctx_merge() and entry into this + * function. + */ + prof_enter(); + malloc_mutex_lock(&ctx->lock); + if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 1) { + assert(ctx->cnt_merged.curbytes == 0); + assert(ctx->cnt_merged.accumobjs == 0); + assert(ctx->cnt_merged.accumbytes == 0); + /* Remove ctx from bt2ctx. */ + ckh_remove(&bt2ctx, ctx->bt, NULL, NULL); + prof_leave(); + /* Destroy ctx. */ + malloc_mutex_unlock(&ctx->lock); + bt_destroy(ctx->bt); + malloc_mutex_destroy(&ctx->lock); + idalloc(ctx); + } else { + /* Compensate for increment in prof_ctx_merge(). */ + ctx->cnt_merged.curobjs--; + malloc_mutex_unlock(&ctx->lock); + prof_leave(); } +} - /* Merge into cnt_all. */ - cnt_all->curobjs += ctx->cnt_dump.curobjs; - cnt_all->curbytes += ctx->cnt_dump.curbytes; - cnt_all->accumobjs += ctx->cnt_dump.accumobjs; - cnt_all->accumbytes += ctx->cnt_dump.accumbytes; +static void +prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) +{ + bool destroy; + /* Merge cnt stats and detach from ctx. */ + malloc_mutex_lock(&ctx->lock); + ctx->cnt_merged.curobjs += cnt->cnts.curobjs; + ctx->cnt_merged.curbytes += cnt->cnts.curbytes; + ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs; + ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes; + ql_remove(&ctx->cnts_ql, cnt, cnts_link); + if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL && + ctx->cnt_merged.curobjs == 0) { + /* + * Artificially raise ctx->cnt_merged.curobjs in order to keep + * another thread from winning the race to destroy ctx while + * this one has ctx->lock dropped. Without this, it would be + * possible for another thread to: + * + * 1) Sample an allocation associated with ctx. + * 2) Deallocate the sampled object. + * 3) Successfully prof_ctx_destroy(ctx). + * + * The result would be that ctx no longer exists by the time + * this thread accesses it in prof_ctx_destroy(). + */ + ctx->cnt_merged.curobjs++; + destroy = true; + } else + destroy = false; malloc_mutex_unlock(&ctx->lock); + if (destroy) + prof_ctx_destroy(ctx); } static bool @@ -833,22 +698,29 @@ prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt, bool propagate_err) char buf[UMAX2S_BUFSIZE]; unsigned i; - if (prof_write(umax2s(ctx->cnt_dump.curobjs, 10, buf), propagate_err) + if (opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) { + assert(ctx->cnt_summed.curbytes == 0); + assert(ctx->cnt_summed.accumobjs == 0); + assert(ctx->cnt_summed.accumbytes == 0); + return (false); + } + + if (prof_write(u2s(ctx->cnt_summed.curobjs, 10, buf), propagate_err) || prof_write(": ", propagate_err) - || prof_write(umax2s(ctx->cnt_dump.curbytes, 10, buf), + || prof_write(u2s(ctx->cnt_summed.curbytes, 10, buf), propagate_err) || prof_write(" [", propagate_err) - || prof_write(umax2s(ctx->cnt_dump.accumobjs, 10, buf), + || prof_write(u2s(ctx->cnt_summed.accumobjs, 10, buf), propagate_err) || prof_write(": ", propagate_err) - || prof_write(umax2s(ctx->cnt_dump.accumbytes, 10, buf), + || prof_write(u2s(ctx->cnt_summed.accumbytes, 10, buf), propagate_err) || prof_write("] @", propagate_err)) return (true); for (i = 0; i < bt->len; i++) { if (prof_write(" 0x", propagate_err) - || prof_write(umax2s((uintptr_t)bt->vec[i], 16, buf), + || prof_write(u2s((uintptr_t)bt->vec[i], 16, buf), propagate_err)) return (true); } @@ -877,7 +749,7 @@ prof_dump_maps(bool propagate_err) memcpy(&mpath[i], s, slen); i += slen; - s = umax2s(getpid(), 10, buf); + s = u2s(getpid(), 10, buf); slen = strlen(s); memcpy(&mpath[i], s, slen); i += slen; @@ -919,8 +791,14 @@ prof_dump(const char *filename, bool leakcheck, bool propagate_err) { prof_cnt_t cnt_all; size_t tabind; - prof_bt_t *bt; - prof_ctx_t *ctx; + union { + prof_bt_t *p; + void *v; + } bt; + union { + prof_ctx_t *p; + void *v; + } ctx; char buf[UMAX2S_BUFSIZE]; size_t leak_nctx; @@ -940,20 +818,18 @@ prof_dump(const char *filename, bool leakcheck, bool propagate_err) /* Merge per thread profile stats, and sum them in cnt_all. */ memset(&cnt_all, 0, sizeof(prof_cnt_t)); leak_nctx = 0; - for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, (void **)&ctx) - == false;) { - prof_ctx_merge(ctx, &cnt_all, &leak_nctx); - } + for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;) + prof_ctx_sum(ctx.p, &cnt_all, &leak_nctx); /* Dump profile header. */ if (prof_write("heap profile: ", propagate_err) - || prof_write(umax2s(cnt_all.curobjs, 10, buf), propagate_err) + || prof_write(u2s(cnt_all.curobjs, 10, buf), propagate_err) || prof_write(": ", propagate_err) - || prof_write(umax2s(cnt_all.curbytes, 10, buf), propagate_err) + || prof_write(u2s(cnt_all.curbytes, 10, buf), propagate_err) || prof_write(" [", propagate_err) - || prof_write(umax2s(cnt_all.accumobjs, 10, buf), propagate_err) + || prof_write(u2s(cnt_all.accumobjs, 10, buf), propagate_err) || prof_write(": ", propagate_err) - || prof_write(umax2s(cnt_all.accumbytes, 10, buf), propagate_err)) + || prof_write(u2s(cnt_all.accumbytes, 10, buf), propagate_err)) goto ERROR; if (opt_lg_prof_sample == 0) { @@ -961,16 +837,16 @@ prof_dump(const char *filename, bool leakcheck, bool propagate_err) goto ERROR; } else { if (prof_write("] @ heap_v2/", propagate_err) - || prof_write(umax2s((uint64_t)1U << opt_lg_prof_sample, 10, + || prof_write(u2s((uint64_t)1U << opt_lg_prof_sample, 10, buf), propagate_err) || prof_write("\n", propagate_err)) goto ERROR; } /* Dump per ctx profile stats. */ - for (tabind = 0; ckh_iter(&bt2ctx, &tabind, (void **)&bt, (void **)&ctx) + for (tabind = 0; ckh_iter(&bt2ctx, &tabind, &bt.v, &ctx.v) == false;) { - if (prof_dump_ctx(ctx, bt, propagate_err)) + if (prof_dump_ctx(ctx.p, bt.p, propagate_err)) goto ERROR; } @@ -985,12 +861,12 @@ prof_dump(const char *filename, bool leakcheck, bool propagate_err) if (leakcheck && cnt_all.curbytes != 0) { malloc_write("<jemalloc>: Leak summary: "); - malloc_write(umax2s(cnt_all.curbytes, 10, buf)); + malloc_write(u2s(cnt_all.curbytes, 10, buf)); malloc_write((cnt_all.curbytes != 1) ? " bytes, " : " byte, "); - malloc_write(umax2s(cnt_all.curobjs, 10, buf)); + malloc_write(u2s(cnt_all.curobjs, 10, buf)); malloc_write((cnt_all.curobjs != 1) ? " objects, " : " object, "); - malloc_write(umax2s(leak_nctx, 10, buf)); + malloc_write(u2s(leak_nctx, 10, buf)); malloc_write((leak_nctx != 1) ? " contexts\n" : " context\n"); malloc_write("<jemalloc>: Run pprof on \""); malloc_write(filename); @@ -1020,31 +896,21 @@ prof_dump_filename(char *filename, char v, int64_t vseq) * Construct a filename of the form: * * <prefix>.<pid>.<seq>.v<vseq>.heap\0 - * or - * jeprof.<pid>.<seq>.v<vseq>.heap\0 */ i = 0; - /* - * Use JEMALLOC_PROF_PREFIX if it's set, and if it is short enough to - * avoid overflowing DUMP_FILENAME_BUFSIZE. The result may exceed - * PATH_MAX, but creat(2) will catch that problem. - */ - if ((s = getenv("JEMALLOC_PROF_PREFIX")) != NULL - && strlen(s) + (DUMP_FILENAME_BUFSIZE - PATH_MAX) <= PATH_MAX) { - slen = strlen(s); - memcpy(&filename[i], s, slen); - i += slen; + s = opt_prof_prefix; + slen = strlen(s); + memcpy(&filename[i], s, slen); + i += slen; - s = "."; - } else - s = "jeprof."; + s = "."; slen = strlen(s); memcpy(&filename[i], s, slen); i += slen; - s = umax2s(getpid(), 10, buf); + s = u2s(getpid(), 10, buf); slen = strlen(s); memcpy(&filename[i], s, slen); i += slen; @@ -1054,7 +920,7 @@ prof_dump_filename(char *filename, char v, int64_t vseq) memcpy(&filename[i], s, slen); i += slen; - s = umax2s(prof_dump_seq, 10, buf); + s = u2s(prof_dump_seq, 10, buf); prof_dump_seq++; slen = strlen(s); memcpy(&filename[i], s, slen); @@ -1069,7 +935,7 @@ prof_dump_filename(char *filename, char v, int64_t vseq) i++; if (vseq != 0xffffffffffffffffLLU) { - s = umax2s(vseq, 10, buf); + s = u2s(vseq, 10, buf); slen = strlen(s); memcpy(&filename[i], s, slen); i += slen; @@ -1091,10 +957,12 @@ prof_fdump(void) if (prof_booted == false) return; - malloc_mutex_lock(&prof_dump_seq_mtx); - prof_dump_filename(filename, 'f', 0xffffffffffffffffLLU); - malloc_mutex_unlock(&prof_dump_seq_mtx); - prof_dump(filename, opt_prof_leak, false); + if (opt_prof_prefix[0] != '\0') { + malloc_mutex_lock(&prof_dump_seq_mtx); + prof_dump_filename(filename, 'f', 0xffffffffffffffffLLU); + malloc_mutex_unlock(&prof_dump_seq_mtx); + prof_dump(filename, opt_prof_leak, false); + } } void @@ -1112,11 +980,13 @@ prof_idump(void) } malloc_mutex_unlock(&enq_mtx); - malloc_mutex_lock(&prof_dump_seq_mtx); - prof_dump_filename(filename, 'i', prof_dump_iseq); - prof_dump_iseq++; - malloc_mutex_unlock(&prof_dump_seq_mtx); - prof_dump(filename, false, false); + if (opt_prof_prefix[0] != '\0') { + malloc_mutex_lock(&prof_dump_seq_mtx); + prof_dump_filename(filename, 'i', prof_dump_iseq); + prof_dump_iseq++; + malloc_mutex_unlock(&prof_dump_seq_mtx); + prof_dump(filename, false, false); + } } bool @@ -1129,6 +999,8 @@ prof_mdump(const char *filename) if (filename == NULL) { /* No filename specified, so automatically generate one. */ + if (opt_prof_prefix[0] == '\0') + return (true); malloc_mutex_lock(&prof_dump_seq_mtx); prof_dump_filename(filename_buf, 'm', prof_dump_mseq); prof_dump_mseq++; @@ -1139,7 +1011,7 @@ prof_mdump(const char *filename) } void -prof_udump(void) +prof_gdump(void) { char filename[DUMP_FILENAME_BUFSIZE]; @@ -1147,17 +1019,19 @@ prof_udump(void) return; malloc_mutex_lock(&enq_mtx); if (enq) { - enq_udump = true; + enq_gdump = true; malloc_mutex_unlock(&enq_mtx); return; } malloc_mutex_unlock(&enq_mtx); - malloc_mutex_lock(&prof_dump_seq_mtx); - prof_dump_filename(filename, 'u', prof_dump_useq); - prof_dump_useq++; - malloc_mutex_unlock(&prof_dump_seq_mtx); - prof_dump(filename, false, false); + if (opt_prof_prefix[0] != '\0') { + malloc_mutex_lock(&prof_dump_seq_mtx); + prof_dump_filename(filename, 'u', prof_dump_useq); + prof_dump_useq++; + malloc_mutex_unlock(&prof_dump_seq_mtx); + prof_dump(filename, false, false); + } } static void @@ -1200,52 +1074,69 @@ prof_bt_keycomp(const void *k1, const void *k2) return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0); } -static void -bt2cnt_thread_cleanup(void *arg) +prof_tdata_t * +prof_tdata_init(void) { - ckh_t *bt2cnt; + prof_tdata_t *prof_tdata; - bt2cnt = bt2cnt_tls; - if (bt2cnt != NULL) { - ql_head(prof_thr_cnt_t) cnts_ql; - size_t tabind; - prof_thr_cnt_t *cnt; + /* Initialize an empty cache for this thread. */ + prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t)); + if (prof_tdata == NULL) + return (NULL); + + if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS, + prof_bt_hash, prof_bt_keycomp)) { + idalloc(prof_tdata); + return (NULL); + } + ql_new(&prof_tdata->lru_ql); - /* Iteratively merge cnt's into the global stats. */ - ql_new(&cnts_ql); - tabind = 0; - while (ckh_iter(bt2cnt, &tabind, NULL, (void **)&cnt) == - false) { - prof_ctx_t *ctx = cnt->ctx; - /* Merge stats and detach from ctx. */ - malloc_mutex_lock(&ctx->lock); - ctx->cnt_merged.curobjs += cnt->cnts.curobjs; - ctx->cnt_merged.curbytes += cnt->cnts.curbytes; - ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs; - ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes; - ql_remove(&ctx->cnts_ql, cnt, link); - malloc_mutex_unlock(&ctx->lock); + prof_tdata->vec = imalloc(sizeof(void *) * prof_bt_max); + if (prof_tdata->vec == NULL) { - /* - * Stash cnt for deletion after finishing with - * ckh_iter(). - */ - ql_tail_insert(&cnts_ql, cnt, link); - } + ckh_delete(&prof_tdata->bt2cnt); + idalloc(prof_tdata); + return (NULL); + } + + prof_tdata->prn_state = 0; + prof_tdata->threshold = 0; + prof_tdata->accum = 0; + + PROF_TCACHE_SET(prof_tdata); + + return (prof_tdata); +} + +static void +prof_tdata_cleanup(void *arg) +{ + prof_tdata_t *prof_tdata; + + prof_tdata = PROF_TCACHE_GET(); + if (prof_tdata != NULL) { + prof_thr_cnt_t *cnt; /* - * Delete the hash table now that cnts_ql has a list of all - * cnt's. + * Delete the hash table. All of its contents can still be + * iterated over via the LRU. */ - ckh_delete(bt2cnt); - idalloc(bt2cnt); - bt2cnt_tls = NULL; + ckh_delete(&prof_tdata->bt2cnt); - /* Delete cnt's. */ - while ((cnt = ql_last(&cnts_ql, link)) != NULL) { - ql_remove(&cnts_ql, cnt, link); + /* + * Iteratively merge cnt's into the global stats and delete + * them. + */ + while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { + prof_ctx_merge(cnt->ctx, cnt); + ql_remove(&prof_tdata->lru_ql, cnt, lru_link); idalloc(cnt); } + + idalloc(prof_tdata->vec); + + idalloc(prof_tdata); + PROF_TCACHE_SET(NULL); } } @@ -1253,6 +1144,14 @@ void prof_boot0(void) { + memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT, + sizeof(PROF_PREFIX_DEFAULT)); +} + +void +prof_boot1(void) +{ + /* * opt_prof and prof_promote must be in their final state before any * arenas are initialized, so this function must be executed early. @@ -1264,7 +1163,7 @@ prof_boot0(void) * automatically dumped. */ opt_prof = true; - opt_prof_udump = false; + opt_prof_gdump = false; prof_interval = 0; } else if (opt_prof) { if (opt_lg_prof_interval >= 0) { @@ -1278,7 +1177,7 @@ prof_boot0(void) } bool -prof_boot1(void) +prof_boot2(void) { if (opt_prof) { @@ -1287,7 +1186,7 @@ prof_boot1(void) return (true); if (malloc_mutex_init(&bt2ctx_mtx)) return (true); - if (pthread_key_create(&bt2cnt_tsd, bt2cnt_thread_cleanup) + if (pthread_key_create(&prof_tdata_tsd, prof_tdata_cleanup) != 0) { malloc_write( "<jemalloc>: Error in pthread_key_create()\n"); @@ -1302,7 +1201,7 @@ prof_boot1(void) return (true); enq = false; enq_idump = false; - enq_udump = false; + enq_gdump = false; if (atexit(prof_fdump) != 0) { malloc_write("<jemalloc>: Error in atexit()\n"); diff --git a/dep/jemalloc/src/rtree.c b/dep/jemalloc/src/rtree.c new file mode 100644 index 00000000000..7753743c5e6 --- /dev/null +++ b/dep/jemalloc/src/rtree.c @@ -0,0 +1,43 @@ +#define RTREE_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +rtree_t * +rtree_new(unsigned bits) +{ + rtree_t *ret; + unsigned bits_per_level, height, i; + + bits_per_level = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void *)))) - 1; + height = bits / bits_per_level; + if (height * bits_per_level != bits) + height++; + assert(height * bits_per_level >= bits); + + ret = (rtree_t*)base_alloc(offsetof(rtree_t, level2bits) + + (sizeof(unsigned) * height)); + if (ret == NULL) + return (NULL); + memset(ret, 0, offsetof(rtree_t, level2bits) + (sizeof(unsigned) * + height)); + + malloc_mutex_init(&ret->mutex); + ret->height = height; + if (bits_per_level * height > bits) + ret->level2bits[0] = bits % bits_per_level; + else + ret->level2bits[0] = bits_per_level; + for (i = 1; i < height; i++) + ret->level2bits[i] = bits_per_level; + + ret->root = (void**)base_alloc(sizeof(void *) << ret->level2bits[0]); + if (ret->root == NULL) { + /* + * We leak the rtree here, since there's no generic base + * deallocation. + */ + return (NULL); + } + memset(ret->root, 0, sizeof(void *) << ret->level2bits[0]); + + return (ret); +} diff --git a/dep/jemalloc/stats.c b/dep/jemalloc/src/stats.c index 9dc75293731..3dfe0d232a6 100644 --- a/dep/jemalloc/stats.c +++ b/dep/jemalloc/src/stats.c @@ -57,12 +57,12 @@ static void stats_arena_print(void (*write_cb)(void *, const char *), /* * We don't want to depend on vsnprintf() for production builds, since that can - * cause unnecessary bloat for static binaries. umax2s() provides minimal - * integer printing functionality, so that malloc_printf() use can be limited to + * cause unnecessary bloat for static binaries. u2s() provides minimal integer + * printing functionality, so that malloc_printf() use can be limited to * JEMALLOC_STATS code. */ char * -umax2s(uintmax_t x, unsigned base, char *s) +u2s(uint64_t x, unsigned base, char *s) { unsigned i; @@ -72,8 +72,8 @@ umax2s(uintmax_t x, unsigned base, char *s) case 10: do { i--; - s[i] = "0123456789"[x % 10]; - x /= 10; + s[i] = "0123456789"[x % (uint64_t)10]; + x /= (uint64_t)10; } while (x > 0); break; case 16: @@ -86,8 +86,9 @@ umax2s(uintmax_t x, unsigned base, char *s) default: do { i--; - s[i] = "0123456789abcdefghijklmnopqrstuvwxyz"[x % base]; - x /= base; + s[i] = "0123456789abcdefghijklmnopqrstuvwxyz"[x % + (uint64_t)base]; + x /= (uint64_t)base; } while (x > 0); } @@ -374,6 +375,7 @@ void stats_print(void (*write_cb)(void *, const char *), void *cbopaque, const char *opts) { + int err; uint64_t epoch; size_t u64sz; char s[UMAX2S_BUFSIZE]; @@ -383,10 +385,27 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, bool bins = true; bool large = true; - /* Refresh stats, in case mallctl() was called by the application. */ + /* + * Refresh stats, in case mallctl() was called by the application. + * + * Check for OOM here, since refreshing the ctl cache can trigger + * allocation. In practice, none of the subsequent mallctl()-related + * calls in this function will cause OOM if this one succeeds. + * */ epoch = 1; u64sz = sizeof(uint64_t); - xmallctl("epoch", &epoch, &u64sz, &epoch, sizeof(uint64_t)); + err = JEMALLOC_P(mallctl)("epoch", &epoch, &u64sz, &epoch, + sizeof(uint64_t)); + if (err != 0) { + if (err == EAGAIN) { + malloc_write("<jemalloc>: Memory allocation failure in " + "mallctl(\"epoch\", ...)\n"); + return; + } + malloc_write("<jemalloc>: Failure in mallctl(\"epoch\", " + "...)\n"); + abort(); + } if (write_cb == NULL) { /* @@ -430,10 +449,12 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, bool bv; unsigned uv; ssize_t ssv; - size_t sv, bsz, ssz; + size_t sv, bsz, ssz, sssz, cpsz; bsz = sizeof(bool); ssz = sizeof(size_t); + sssz = sizeof(ssize_t); + cpsz = sizeof(const char *); CTL_GET("version", &cpv, const char *); write_cb(cbopaque, "Version: "); @@ -444,113 +465,140 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, write_cb(cbopaque, bv ? "enabled" : "disabled"); write_cb(cbopaque, "\n"); - write_cb(cbopaque, "Boolean JEMALLOC_OPTIONS: "); - if ((err = JEMALLOC_P(mallctl)("opt.abort", &bv, &bsz, NULL, 0)) - == 0) - write_cb(cbopaque, bv ? "A" : "a"); - if ((err = JEMALLOC_P(mallctl)("prof.active", &bv, &bsz, - NULL, 0)) == 0) - write_cb(cbopaque, bv ? "E" : "e"); - if ((err = JEMALLOC_P(mallctl)("opt.prof", &bv, &bsz, NULL, 0)) - == 0) - write_cb(cbopaque, bv ? "F" : "f"); - if ((err = JEMALLOC_P(mallctl)("opt.tcache", &bv, &bsz, NULL, - 0)) == 0) - write_cb(cbopaque, bv ? "H" : "h"); - if ((err = JEMALLOC_P(mallctl)("opt.junk", &bv, &bsz, NULL, 0)) - == 0) - write_cb(cbopaque, bv ? "J" : "j"); - if ((err = JEMALLOC_P(mallctl)("opt.prof_leak", &bv, &bsz, NULL, - 0)) == 0) - write_cb(cbopaque, bv ? "L" : "l"); - if ((err = JEMALLOC_P(mallctl)("opt.overcommit", &bv, &bsz, - NULL, 0)) == 0) - write_cb(cbopaque, bv ? "O" : "o"); - if ((err = JEMALLOC_P(mallctl)("opt.stats_print", &bv, &bsz, - NULL, 0)) == 0) - write_cb(cbopaque, bv ? "P" : "p"); - if ((err = JEMALLOC_P(mallctl)("opt.prof_udump", &bv, &bsz, - NULL, 0)) == 0) - write_cb(cbopaque, bv ? "U" : "u"); - if ((err = JEMALLOC_P(mallctl)("opt.sysv", &bv, &bsz, NULL, 0)) - == 0) - write_cb(cbopaque, bv ? "V" : "v"); - if ((err = JEMALLOC_P(mallctl)("opt.xmalloc", &bv, &bsz, NULL, - 0)) == 0) - write_cb(cbopaque, bv ? "X" : "x"); - if ((err = JEMALLOC_P(mallctl)("opt.zero", &bv, &bsz, NULL, 0)) - == 0) - write_cb(cbopaque, bv ? "Z" : "z"); - write_cb(cbopaque, "\n"); +#define OPT_WRITE_BOOL(n) \ + if ((err = JEMALLOC_P(mallctl)("opt."#n, &bv, &bsz, \ + NULL, 0)) == 0) { \ + write_cb(cbopaque, " opt."#n": "); \ + write_cb(cbopaque, bv ? "true" : "false"); \ + write_cb(cbopaque, "\n"); \ + } +#define OPT_WRITE_SIZE_T(n) \ + if ((err = JEMALLOC_P(mallctl)("opt."#n, &sv, &ssz, \ + NULL, 0)) == 0) { \ + write_cb(cbopaque, " opt."#n": "); \ + write_cb(cbopaque, u2s(sv, 10, s)); \ + write_cb(cbopaque, "\n"); \ + } +#define OPT_WRITE_SSIZE_T(n) \ + if ((err = JEMALLOC_P(mallctl)("opt."#n, &ssv, &sssz, \ + NULL, 0)) == 0) { \ + if (ssv >= 0) { \ + write_cb(cbopaque, " opt."#n": "); \ + write_cb(cbopaque, u2s(ssv, 10, s)); \ + } else { \ + write_cb(cbopaque, " opt."#n": -"); \ + write_cb(cbopaque, u2s(-ssv, 10, s)); \ + } \ + write_cb(cbopaque, "\n"); \ + } +#define OPT_WRITE_CHAR_P(n) \ + if ((err = JEMALLOC_P(mallctl)("opt."#n, &cpv, &cpsz, \ + NULL, 0)) == 0) { \ + write_cb(cbopaque, " opt."#n": \""); \ + write_cb(cbopaque, cpv); \ + write_cb(cbopaque, "\"\n"); \ + } + + write_cb(cbopaque, "Run-time option settings:\n"); + OPT_WRITE_BOOL(abort) + OPT_WRITE_SIZE_T(lg_qspace_max) + OPT_WRITE_SIZE_T(lg_cspace_max) + OPT_WRITE_SIZE_T(lg_chunk) + OPT_WRITE_SIZE_T(narenas) + OPT_WRITE_SSIZE_T(lg_dirty_mult) + OPT_WRITE_BOOL(stats_print) + OPT_WRITE_BOOL(junk) + OPT_WRITE_BOOL(zero) + OPT_WRITE_BOOL(sysv) + OPT_WRITE_BOOL(xmalloc) + OPT_WRITE_BOOL(tcache) + OPT_WRITE_SSIZE_T(lg_tcache_gc_sweep) + OPT_WRITE_SSIZE_T(lg_tcache_max) + OPT_WRITE_BOOL(prof) + OPT_WRITE_CHAR_P(prof_prefix) + OPT_WRITE_SIZE_T(lg_prof_bt_max) + OPT_WRITE_BOOL(prof_active) + OPT_WRITE_SSIZE_T(lg_prof_sample) + OPT_WRITE_BOOL(prof_accum) + OPT_WRITE_SSIZE_T(lg_prof_tcmax) + OPT_WRITE_SSIZE_T(lg_prof_interval) + OPT_WRITE_BOOL(prof_gdump) + OPT_WRITE_BOOL(prof_leak) + OPT_WRITE_BOOL(overcommit) + +#undef OPT_WRITE_BOOL +#undef OPT_WRITE_SIZE_T +#undef OPT_WRITE_SSIZE_T +#undef OPT_WRITE_CHAR_P write_cb(cbopaque, "CPUs: "); - write_cb(cbopaque, umax2s(ncpus, 10, s)); + write_cb(cbopaque, u2s(ncpus, 10, s)); write_cb(cbopaque, "\n"); CTL_GET("arenas.narenas", &uv, unsigned); write_cb(cbopaque, "Max arenas: "); - write_cb(cbopaque, umax2s(uv, 10, s)); + write_cb(cbopaque, u2s(uv, 10, s)); write_cb(cbopaque, "\n"); write_cb(cbopaque, "Pointer size: "); - write_cb(cbopaque, umax2s(sizeof(void *), 10, s)); + write_cb(cbopaque, u2s(sizeof(void *), 10, s)); write_cb(cbopaque, "\n"); CTL_GET("arenas.quantum", &sv, size_t); write_cb(cbopaque, "Quantum size: "); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, "\n"); CTL_GET("arenas.cacheline", &sv, size_t); write_cb(cbopaque, "Cacheline size (assumed): "); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, "\n"); CTL_GET("arenas.subpage", &sv, size_t); write_cb(cbopaque, "Subpage spacing: "); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, "\n"); if ((err = JEMALLOC_P(mallctl)("arenas.tspace_min", &sv, &ssz, NULL, 0)) == 0) { write_cb(cbopaque, "Tiny 2^n-spaced sizes: ["); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, ".."); CTL_GET("arenas.tspace_max", &sv, size_t); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, "]\n"); } CTL_GET("arenas.qspace_min", &sv, size_t); write_cb(cbopaque, "Quantum-spaced sizes: ["); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, ".."); CTL_GET("arenas.qspace_max", &sv, size_t); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, "]\n"); CTL_GET("arenas.cspace_min", &sv, size_t); write_cb(cbopaque, "Cacheline-spaced sizes: ["); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, ".."); CTL_GET("arenas.cspace_max", &sv, size_t); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, "]\n"); CTL_GET("arenas.sspace_min", &sv, size_t); write_cb(cbopaque, "Subpage-spaced sizes: ["); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, ".."); CTL_GET("arenas.sspace_max", &sv, size_t); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, "]\n"); CTL_GET("opt.lg_dirty_mult", &ssv, ssize_t); if (ssv >= 0) { write_cb(cbopaque, "Min active:dirty page ratio per arena: "); - write_cb(cbopaque, umax2s((1U << ssv), 10, s)); + write_cb(cbopaque, u2s((1U << ssv), 10, s)); write_cb(cbopaque, ":1\n"); } else { write_cb(cbopaque, @@ -560,7 +608,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, &ssz, NULL, 0)) == 0) { write_cb(cbopaque, "Maximum thread-cached size class: "); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, "\n"); } if ((err = JEMALLOC_P(mallctl)("opt.lg_tcache_gc_sweep", &ssv, @@ -570,39 +618,51 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("opt.tcache", &tcache_enabled, bool); write_cb(cbopaque, "Thread cache GC sweep interval: "); write_cb(cbopaque, tcache_enabled && ssv >= 0 ? - umax2s(tcache_gc_sweep, 10, s) : "N/A"); + u2s(tcache_gc_sweep, 10, s) : "N/A"); write_cb(cbopaque, "\n"); } if ((err = JEMALLOC_P(mallctl)("opt.prof", &bv, &bsz, NULL, 0)) == 0 && bv) { CTL_GET("opt.lg_prof_bt_max", &sv, size_t); write_cb(cbopaque, "Maximum profile backtrace depth: "); - write_cb(cbopaque, umax2s((1U << sv), 10, s)); + write_cb(cbopaque, u2s((1U << sv), 10, s)); write_cb(cbopaque, "\n"); + CTL_GET("opt.lg_prof_tcmax", &ssv, ssize_t); + write_cb(cbopaque, + "Maximum per thread backtrace cache: "); + if (ssv >= 0) { + write_cb(cbopaque, u2s((1U << ssv), 10, s)); + write_cb(cbopaque, " (2^"); + write_cb(cbopaque, u2s(ssv, 10, s)); + write_cb(cbopaque, ")\n"); + } else + write_cb(cbopaque, "N/A\n"); + CTL_GET("opt.lg_prof_sample", &sv, size_t); write_cb(cbopaque, "Average profile sample interval: "); - write_cb(cbopaque, umax2s((1U << sv), 10, s)); + write_cb(cbopaque, u2s((((uint64_t)1U) << sv), 10, s)); write_cb(cbopaque, " (2^"); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, ")\n"); CTL_GET("opt.lg_prof_interval", &ssv, ssize_t); write_cb(cbopaque, "Average profile dump interval: "); if (ssv >= 0) { - write_cb(cbopaque, umax2s((1U << ssv), 10, s)); + write_cb(cbopaque, u2s((((uint64_t)1U) << ssv), + 10, s)); write_cb(cbopaque, " (2^"); - write_cb(cbopaque, umax2s(ssv, 10, s)); + write_cb(cbopaque, u2s(ssv, 10, s)); write_cb(cbopaque, ")\n"); } else write_cb(cbopaque, "N/A\n"); } CTL_GET("arenas.chunksize", &sv, size_t); write_cb(cbopaque, "Chunk size: "); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); CTL_GET("opt.lg_chunk", &sv, size_t); write_cb(cbopaque, " (2^"); - write_cb(cbopaque, umax2s(sv, 10, s)); + write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, ")\n"); } diff --git a/dep/jemalloc/tcache.c b/dep/jemalloc/src/tcache.c index ce6ec996159..cbbe7a113a9 100644 --- a/dep/jemalloc/tcache.c +++ b/dep/jemalloc/src/tcache.c @@ -5,17 +5,19 @@ /* Data. */ bool opt_tcache = true; -ssize_t opt_lg_tcache_maxclass = LG_TCACHE_MAXCLASS_DEFAULT; +ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT; /* Map of thread-specific caches. */ +#ifndef NO_TLS __thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec")); +#endif /* * Same contents as tcache, but initialized such that the TSD destructor is * called when a thread exits, so that the cache can be cleaned up. */ -static pthread_key_t tcache_tsd; +pthread_key_t tcache_tsd; size_t nhbins; size_t tcache_maxclass; @@ -55,12 +57,14 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem { void *flush, *deferred, *ptr; unsigned i, nflush, ndeferred; + bool first_pass; assert(binind < nbins); assert(rem <= tbin->ncached); + assert(tbin->ncached > 0 || tbin->avail == NULL); - for (flush = tbin->avail, nflush = tbin->ncached - rem; flush != NULL; - flush = deferred, nflush = ndeferred) { + for (flush = tbin->avail, nflush = tbin->ncached - rem, first_pass = + true; flush != NULL; flush = deferred, nflush = ndeferred) { /* Lock the arena bin associated with the first object. */ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(flush); arena_t *arena = chunk->arena; @@ -91,10 +95,10 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem flush = *(void **)ptr; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk->arena == arena) { - size_t pageind = (((uintptr_t)ptr - - (uintptr_t)chunk) >> PAGE_SHIFT); + size_t pageind = ((uintptr_t)ptr - + (uintptr_t)chunk) >> PAGE_SHIFT; arena_chunk_map_t *mapelm = - &chunk->map[pageind]; + &chunk->map[pageind-map_bias]; arena_dalloc_bin(arena, chunk, ptr, mapelm); } else { /* @@ -110,12 +114,9 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem } malloc_mutex_unlock(&bin->lock); - if (flush != NULL) { - /* - * This was the first pass, and rem cached objects - * remain. - */ + if (first_pass) { tbin->avail = flush; + first_pass = false; } } @@ -133,12 +134,14 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem { void *flush, *deferred, *ptr; unsigned i, nflush, ndeferred; + bool first_pass; assert(binind < nhbins); assert(rem <= tbin->ncached); + assert(tbin->ncached > 0 || tbin->avail == NULL); - for (flush = tbin->avail, nflush = tbin->ncached - rem; flush != NULL; - flush = deferred, nflush = ndeferred) { + for (flush = tbin->avail, nflush = tbin->ncached - rem, first_pass = + true; flush != NULL; flush = deferred, nflush = ndeferred) { /* Lock the arena associated with the first object. */ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(flush); arena_t *arena = chunk->arena; @@ -183,12 +186,9 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem } malloc_mutex_unlock(&arena->lock); - if (flush != NULL) { - /* - * This was the first pass, and rem cached objects - * remain. - */ + if (first_pass) { tbin->avail = flush; + first_pass = false; } } @@ -204,12 +204,14 @@ tcache_create(arena_t *arena) size_t size; unsigned i; - size = sizeof(tcache_t) + (sizeof(tcache_bin_t) * (nhbins - 1)); + size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins); /* * Round up to the nearest multiple of the cacheline size, in order to * avoid the possibility of false cacheline sharing. * - * That this works relies on the same logic as in ipalloc(). + * That this works relies on the same logic as in ipalloc(), but we + * cannot directly call ipalloc() here due to tcache bootstrapping + * issues. */ size = (size + CACHELINE_MASK) & (-CACHELINE); @@ -241,8 +243,7 @@ tcache_create(arena_t *arena) for (; i < nhbins; i++) tcache->tbins[i].ncached_max = TCACHE_NSLOTS_LARGE; - tcache_tls = tcache; - pthread_setspecific(tcache_tsd, tcache); + TCACHE_SET(tcache); return (tcache); } @@ -310,9 +311,9 @@ tcache_destroy(tcache_t *tcache) if (arena_salloc(tcache) <= small_maxclass) { arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); arena_t *arena = chunk->arena; - size_t pageind = (((uintptr_t)tcache - (uintptr_t)chunk) >> - PAGE_SHIFT); - arena_chunk_map_t *mapelm = &chunk->map[pageind]; + size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >> + PAGE_SHIFT; + arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); @@ -330,11 +331,24 @@ tcache_thread_cleanup(void *arg) { tcache_t *tcache = (tcache_t *)arg; - assert(tcache == tcache_tls); - if (tcache != NULL) { + if (tcache == (void *)(uintptr_t)1) { + /* + * The previous time this destructor was called, we set the key + * to 1 so that other destructors wouldn't cause re-creation of + * the tcache. This time, do nothing, so that the destructor + * will not be called again. + */ + } else if (tcache == (void *)(uintptr_t)2) { + /* + * Another destructor called an allocator function after this + * destructor was called. Reset tcache to 1 in order to + * receive another callback. + */ + TCACHE_SET((uintptr_t)1); + } else if (tcache != NULL) { assert(tcache != (void *)(uintptr_t)1); tcache_destroy(tcache); - tcache_tls = (void *)(uintptr_t)1; + TCACHE_SET((uintptr_t)1); } } @@ -370,16 +384,16 @@ tcache_boot(void) if (opt_tcache) { /* - * If necessary, clamp opt_lg_tcache_maxclass, now that + * If necessary, clamp opt_lg_tcache_max, now that * small_maxclass and arena_maxclass are known. */ - if (opt_lg_tcache_maxclass < 0 || (1U << - opt_lg_tcache_maxclass) < small_maxclass) + if (opt_lg_tcache_max < 0 || (1U << + opt_lg_tcache_max) < small_maxclass) tcache_maxclass = small_maxclass; - else if ((1U << opt_lg_tcache_maxclass) > arena_maxclass) + else if ((1U << opt_lg_tcache_max) > arena_maxclass) tcache_maxclass = arena_maxclass; else - tcache_maxclass = (1U << opt_lg_tcache_maxclass); + tcache_maxclass = (1U << opt_lg_tcache_max); nhbins = nbins + (tcache_maxclass >> PAGE_SHIFT); |