diff options
author | Multivitamin <DasUmba@.(none)> | 2012-04-13 10:49:09 +0200 |
---|---|---|
committer | Multivitamin <DasUmba@.(none)> | 2012-04-13 10:49:09 +0200 |
commit | 126fd13e5d6b57dc0c8830248d44db504c7d103f (patch) | |
tree | e33b3e612f5e1e694a7028c4fe5171f0105c6654 | |
parent | 6400c13fcb342fff7e2d808e469f2c57d83766c5 (diff) |
DEP: Updated Jemalloc to Version 2.5
Signed-off-by: Multivitamin <DasUmba@.(none)>
34 files changed, 2067 insertions, 804 deletions
diff --git a/dep/jemalloc/VERSION b/dep/jemalloc/VERSION index 585f53edd80..aa85f5a2acf 100644 --- a/dep/jemalloc/VERSION +++ b/dep/jemalloc/VERSION @@ -1 +1 @@ -2.1.0-0-g1c4b088b08d3bc7617a34387e196ce03716160bf +2.2.5-0-gfc1bb70e5f0d9a58b39efa39cc549b5af5104760 diff --git a/dep/jemalloc/include/jemalloc/internal/arena.h b/dep/jemalloc/include/jemalloc/internal/arena.h index 9556c2c68f7..b80c118d811 100644 --- a/dep/jemalloc/include/jemalloc/internal/arena.h +++ b/dep/jemalloc/include/jemalloc/internal/arena.h @@ -19,6 +19,7 @@ #ifdef JEMALLOC_TINY /* Smallest size class to support. */ # define LG_TINY_MIN LG_SIZEOF_PTR +# define TINY_MIN (1U << LG_TINY_MIN) #endif /* @@ -45,9 +46,10 @@ * point is implicitly RUN_BFP bits to the left. * * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be - * honored for some/all object sizes, since there is one bit of header overhead - * per object (plus a constant). This constraint is relaxed (ignored) for runs - * that are so small that the per-region overhead is greater than: + * honored for some/all object sizes, since when heap profiling is enabled + * there is one pointer of header overhead per object (plus a constant). This + * constraint is relaxed (ignored) for runs that are so small that the + * per-region overhead is greater than: * * (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP)) */ @@ -56,6 +58,10 @@ #define RUN_MAX_OVRHD 0x0000003dU #define RUN_MAX_OVRHD_RELAX 0x00001800U +/* Maximum number of regions in one run. */ +#define LG_RUN_MAXREGS 11 +#define RUN_MAXREGS (1U << LG_RUN_MAXREGS) + /* * The minimum ratio of active:dirty pages per arena is computed as: * @@ -69,6 +75,7 @@ typedef struct arena_chunk_map_s arena_chunk_map_t; typedef struct arena_chunk_s arena_chunk_t; typedef struct arena_run_s arena_run_t; +typedef struct arena_bin_info_s arena_bin_info_t; typedef struct arena_bin_s arena_bin_t; typedef struct arena_s arena_t; @@ -105,7 +112,7 @@ struct arena_chunk_map_s { * Run address (or size) and various flags are stored together. The bit * layout looks like (assuming 32-bit system): * - * ???????? ???????? ????---- ----dzla + * ???????? ???????? ????---- ----dula * * ? : Unallocated: Run address for first/last pages, unset for internal * pages. @@ -113,7 +120,7 @@ struct arena_chunk_map_s { * Large: Run size for first page, unset for trailing pages. * - : Unused. * d : dirty? - * z : zeroed? + * u : unzeroed? * l : large? * a : allocated? * @@ -129,30 +136,30 @@ struct arena_chunk_map_s { * [dula] : bit unset * * Unallocated (clean): - * ssssssss ssssssss ssss---- ----du-- + * ssssssss ssssssss ssss---- ----du-a * xxxxxxxx xxxxxxxx xxxx---- -----Uxx - * ssssssss ssssssss ssss---- ----dU-- + * ssssssss ssssssss ssss---- ----dU-a * * Unallocated (dirty): - * ssssssss ssssssss ssss---- ----D--- + * ssssssss ssssssss ssss---- ----D--a * xxxxxxxx xxxxxxxx xxxx---- ----xxxx - * ssssssss ssssssss ssss---- ----D--- + * ssssssss ssssssss ssss---- ----D--a * * Small: - * pppppppp pppppppp pppp---- ----d--a - * pppppppp pppppppp pppp---- -------a - * pppppppp pppppppp pppp---- ----d--a + * pppppppp pppppppp pppp---- ----d--A + * pppppppp pppppppp pppp---- -------A + * pppppppp pppppppp pppp---- ----d--A * * Large: - * ssssssss ssssssss ssss---- ----D-la + * ssssssss ssssssss ssss---- ----D-LA * xxxxxxxx xxxxxxxx xxxx---- ----xxxx - * -------- -------- -------- ----D-la + * -------- -------- -------- ----D-LA * * Large (sampled, size <= PAGE_SIZE): - * ssssssss ssssssss sssscccc ccccD-la + * ssssssss ssssssss sssscccc ccccD-LA * * Large (not sampled, size == PAGE_SIZE): - * ssssssss ssssssss ssss---- ----D-la + * ssssssss ssssssss ssss---- ----D-LA */ size_t bits; #ifdef JEMALLOC_PROF @@ -206,16 +213,52 @@ struct arena_run_s { /* Bin this run is associated with. */ arena_bin_t *bin; - /* Stack of available freed regions, or NULL. */ - void *avail; - - /* Next region that has never been allocated, or run boundary. */ - void *next; + /* Index of next region that has never been allocated, or nregs. */ + uint32_t nextind; /* Number of free regions in run. */ unsigned nfree; }; +/* + * Read-only information associated with each element of arena_t's bins array + * is stored separately, partly to reduce memory usage (only one copy, rather + * than one per arena), but mainly to avoid false cacheline sharing. + */ +struct arena_bin_info_s { + /* Size of regions in a run for this bin's size class. */ + size_t reg_size; + + /* Total size of a run for this bin's size class. */ + size_t run_size; + + /* Total number of regions in a run for this bin's size class. */ + uint32_t nregs; + + /* + * Offset of first bitmap_t element in a run header for this bin's size + * class. + */ + uint32_t bitmap_offset; + + /* + * Metadata used to manipulate bitmaps for runs associated with this + * bin. + */ + bitmap_info_t bitmap_info; + +#ifdef JEMALLOC_PROF + /* + * Offset of first (prof_ctx_t *) in a run header for this bin's size + * class, or 0 if (opt_prof == false). + */ + uint32_t ctx0_offset; +#endif + + /* Offset of first region in a run for this bin's size class. */ + uint32_t reg0_offset; +}; + struct arena_bin_s { /* * All operations on runcur, runs, and stats require that lock be @@ -240,26 +283,6 @@ struct arena_bin_s { */ arena_run_tree_t runs; - /* Size of regions in a run for this bin's size class. */ - size_t reg_size; - - /* Total size of a run for this bin's size class. */ - size_t run_size; - - /* Total number of regions in a run for this bin's size class. */ - uint32_t nregs; - -#ifdef JEMALLOC_PROF - /* - * Offset of first (prof_ctx_t *) in a run header for this bin's size - * class, or 0 if (opt_prof == false). - */ - uint32_t ctx0_offset; -#endif - - /* Offset of first region in a run for this bin's size class. */ - uint32_t reg0_offset; - #ifdef JEMALLOC_STATS /* Bin statistics. */ malloc_bin_stats_t stats; @@ -276,8 +299,18 @@ struct arena_s { unsigned ind; /* - * All non-bin-related operations on this arena require that lock be - * locked. + * Number of threads currently assigned to this arena. This field is + * protected by arenas_lock. + */ + unsigned nthreads; + + /* + * There are three classes of arena operations from a locking + * perspective: + * 1) Thread asssignment (modifies nthreads) is protected by + * arenas_lock. + * 2) Bin-related operations are protected by bin locks. + * 3) Chunk- and run-related operations are protected by this mutex. */ malloc_mutex_t lock; @@ -347,45 +380,35 @@ struct arena_s { /* * bins is used to store trees of free regions of the following sizes, - * assuming a 16-byte quantum, 4 KiB page size, and default - * JEMALLOC_OPTIONS. + * assuming a 64-bit system with 16-byte quantum, 4 KiB page size, and + * default MALLOC_CONF. * * bins[i] | size | * --------+--------+ - * 0 | 2 | - * 1 | 4 | - * 2 | 8 | + * 0 | 8 | * --------+--------+ - * 3 | 16 | - * 4 | 32 | - * 5 | 48 | + * 1 | 16 | + * 2 | 32 | + * 3 | 48 | * : : - * 8 | 96 | - * 9 | 112 | - * 10 | 128 | + * 6 | 96 | + * 7 | 112 | + * 8 | 128 | * --------+--------+ - * 11 | 192 | - * 12 | 256 | - * 13 | 320 | - * 14 | 384 | - * 15 | 448 | - * 16 | 512 | - * --------+--------+ - * 17 | 768 | - * 18 | 1024 | - * 19 | 1280 | - * : : - * 27 | 3328 | - * 28 | 3584 | - * 29 | 3840 | + * 9 | 192 | + * 10 | 256 | + * 11 | 320 | + * 12 | 384 | + * 13 | 448 | + * 14 | 512 | * --------+--------+ - * 30 | 4 KiB | - * 31 | 6 KiB | - * 33 | 8 KiB | + * 15 | 768 | + * 16 | 1024 | + * 17 | 1280 | * : : - * 43 | 28 KiB | - * 44 | 30 KiB | - * 45 | 32 KiB | + * 25 | 3328 | + * 26 | 3584 | + * 27 | 3840 | * --------+--------+ */ arena_bin_t bins[1]; /* Dynamically sized. */ @@ -397,8 +420,16 @@ struct arena_s { extern size_t opt_lg_qspace_max; extern size_t opt_lg_cspace_max; -extern ssize_t opt_lg_dirty_mult; +extern ssize_t opt_lg_dirty_mult; +/* + * small_size2bin is a compact lookup table that rounds request sizes up to + * size classes. In order to reduce cache footprint, the table is compressed, + * and all accesses are via the SMALL_SIZE2BIN macro. + */ extern uint8_t const *small_size2bin; +#define SMALL_SIZE2BIN(s) (small_size2bin[(s-1) >> LG_TINY_MIN]) + +extern arena_bin_info_t *arena_bin_info; /* Various bin-related settings. */ #ifdef JEMALLOC_TINY /* Number of (2^n)-spaced tiny bins. */ @@ -465,8 +496,9 @@ bool arena_boot(void); #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -unsigned arena_run_regind(arena_run_t *run, arena_bin_t *bin, - const void *ptr, size_t size); +size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); +unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, + const void *ptr); # ifdef JEMALLOC_PROF prof_ctx_t *arena_prof_ctx_get(const void *ptr); void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); @@ -475,21 +507,37 @@ void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) +JEMALLOC_INLINE size_t +arena_bin_index(arena_t *arena, arena_bin_t *bin) +{ + size_t binind = bin - arena->bins; + assert(binind < nbins); + return (binind); +} + JEMALLOC_INLINE unsigned -arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr, - size_t size) +arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) { unsigned shift, diff, regind; + size_t size; - assert(run->magic == ARENA_RUN_MAGIC); + dassert(run->magic == ARENA_RUN_MAGIC); + /* + * Freeing a pointer lower than region zero can cause assertion + * failure. + */ + assert((uintptr_t)ptr >= (uintptr_t)run + + (uintptr_t)bin_info->reg0_offset); /* * Avoid doing division with a variable divisor if possible. Using * actual division here can reduce allocator throughput by over 20%! */ - diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset); + diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - + bin_info->reg0_offset); /* Rescale (factor powers of 2 out of the numerator and denominator). */ + size = bin_info->reg_size; shift = ffs(size) - 1; diff >>= shift; size >>= shift; @@ -512,8 +560,8 @@ arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr, * divide by 0, and 1 and 2 are both powers of two, which are * handled above. */ -#define SIZE_INV_SHIFT 21 -#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1) +#define SIZE_INV_SHIFT ((sizeof(unsigned) << 3) - LG_RUN_MAXREGS) +#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1) static const unsigned size_invs[] = { SIZE_INV(3), SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7), @@ -533,7 +581,7 @@ arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr, #undef SIZE_INV_SHIFT } assert(diff == regind * size); - assert(regind < bin->nregs); + assert(regind < bin_info->nregs); return (regind); } @@ -560,13 +608,14 @@ arena_prof_ctx_get(const void *ptr) arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << PAGE_SHIFT)); - arena_bin_t *bin = run->bin; + size_t binind = arena_bin_index(chunk->arena, run->bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; unsigned regind; - assert(run->magic == ARENA_RUN_MAGIC); - regind = arena_run_regind(run, bin, ptr, bin->reg_size); + dassert(run->magic == ARENA_RUN_MAGIC); + regind = arena_run_regind(run, bin_info, ptr); ret = *(prof_ctx_t **)((uintptr_t)run + - bin->ctx0_offset + (regind * + bin_info->ctx0_offset + (regind * sizeof(prof_ctx_t *))); } } else @@ -594,12 +643,16 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << PAGE_SHIFT)); arena_bin_t *bin = run->bin; + size_t binind; + arena_bin_info_t *bin_info; unsigned regind; - assert(run->magic == ARENA_RUN_MAGIC); - regind = arena_run_regind(run, bin, ptr, bin->reg_size); + dassert(run->magic == ARENA_RUN_MAGIC); + binind = arena_bin_index(chunk->arena, bin); + bin_info = &arena_bin_info[binind]; + regind = arena_run_regind(run, bin_info, ptr); - *((prof_ctx_t **)((uintptr_t)run + bin->ctx0_offset + *((prof_ctx_t **)((uintptr_t)run + bin_info->ctx0_offset + (regind * sizeof(prof_ctx_t *)))) = ctx; } else assert((uintptr_t)ctx == (uintptr_t)1U); @@ -615,7 +668,7 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) arena_chunk_map_t *mapelm; assert(arena != NULL); - assert(arena->magic == ARENA_MAGIC); + dassert(arena->magic == ARENA_MAGIC); assert(chunk->arena == arena); assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); @@ -638,11 +691,18 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); - assert(run->magic == ARENA_RUN_MAGIC); - assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)run->bin->reg0_offset)) % - run->bin->reg_size == 0); + dassert(run->magic == ARENA_RUN_MAGIC); bin = run->bin; +#ifdef JEMALLOC_DEBUG + { + size_t binind = arena_bin_index(arena, bin); + arena_bin_info_t *bin_info = + &arena_bin_info[binind]; + assert(((uintptr_t)ptr - ((uintptr_t)run + + (uintptr_t)bin_info->reg0_offset)) % + bin_info->reg_size == 0); + } +#endif malloc_mutex_lock(&bin->lock); arena_dalloc_bin(arena, chunk, ptr, mapelm); malloc_mutex_unlock(&bin->lock); diff --git a/dep/jemalloc/include/jemalloc/internal/atomic.h b/dep/jemalloc/include/jemalloc/internal/atomic.h new file mode 100644 index 00000000000..9a298623f8a --- /dev/null +++ b/dep/jemalloc/include/jemalloc/internal/atomic.h @@ -0,0 +1,169 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#define atomic_read_uint64(p) atomic_add_uint64(p, 0) +#define atomic_read_uint32(p) atomic_add_uint32(p, 0) + +#if (LG_SIZEOF_PTR == 3) +# define atomic_read_z(p) \ + (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)0) +# define atomic_add_z(p, x) \ + (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x) +# define atomic_sub_z(p, x) \ + (size_t)atomic_sub_uint64((uint64_t *)p, (uint64_t)x) +#elif (LG_SIZEOF_PTR == 2) +# define atomic_read_z(p) \ + (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)0) +# define atomic_add_z(p, x) \ + (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x) +# define atomic_sub_z(p, x) \ + (size_t)atomic_sub_uint32((uint32_t *)p, (uint32_t)x) +#endif + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +uint64_t atomic_add_uint64(uint64_t *p, uint64_t x); +uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x); +uint32_t atomic_add_uint32(uint32_t *p, uint32_t x); +uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_)) +/******************************************************************************/ +/* 64-bit operations. */ +#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + return (__sync_add_and_fetch(p, x)); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + return (__sync_sub_and_fetch(p, x)); +} +#elif (defined(JEMALLOC_OSATOMIC)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + return (OSAtomicAdd64((int64_t)x, (int64_t *)p)); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p)); +} +#elif (defined(__amd64_) || defined(__x86_64__)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + asm volatile ( + "lock; xaddq %0, %1;" + : "+r" (x), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (x); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + x = (uint64_t)(-(int64_t)x); + asm volatile ( + "lock; xaddq %0, %1;" + : "+r" (x), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (x); +} +#else +# if (LG_SIZEOF_PTR == 3) +# error "Missing implementation for 64-bit atomic operations" +# endif +#endif + +/******************************************************************************/ +/* 32-bit operations. */ +#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + return (__sync_add_and_fetch(p, x)); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + return (__sync_sub_and_fetch(p, x)); +} +#elif (defined(JEMALLOC_OSATOMIC)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + return (OSAtomicAdd32((int32_t)x, (int32_t *)p)); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p)); +} +#elif (defined(__i386__) || defined(__amd64_) || defined(__x86_64__)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + asm volatile ( + "lock; xaddl %0, %1;" + : "+r" (x), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (x); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + x = (uint32_t)(-(int32_t)x); + asm volatile ( + "lock; xaddl %0, %1;" + : "+r" (x), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (x); +} +#else +# error "Missing implementation for 32-bit atomic operations" +#endif +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/dep/jemalloc/include/jemalloc/internal/bitmap.h b/dep/jemalloc/include/jemalloc/internal/bitmap.h new file mode 100644 index 00000000000..605ebac58c1 --- /dev/null +++ b/dep/jemalloc/include/jemalloc/internal/bitmap.h @@ -0,0 +1,184 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */ +#define LG_BITMAP_MAXBITS LG_RUN_MAXREGS + +typedef struct bitmap_level_s bitmap_level_t; +typedef struct bitmap_info_s bitmap_info_t; +typedef unsigned long bitmap_t; +#define LG_SIZEOF_BITMAP LG_SIZEOF_LONG + +/* Number of bits per group. */ +#define LG_BITMAP_GROUP_NBITS (LG_SIZEOF_BITMAP + 3) +#define BITMAP_GROUP_NBITS (ZU(1) << LG_BITMAP_GROUP_NBITS) +#define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS-1) + +/* Maximum number of levels possible. */ +#define BITMAP_MAX_LEVELS \ + (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \ + + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct bitmap_level_s { + /* Offset of this level's groups within the array of groups. */ + size_t group_offset; +}; + +struct bitmap_info_s { + /* Logical number of bits in bitmap (stored at bottom level). */ + size_t nbits; + + /* Number of levels necessary for nbits. */ + unsigned nlevels; + + /* + * Only the first (nlevels+1) elements are used, and levels are ordered + * bottom to top (e.g. the bottom level is stored in levels[0]). + */ + bitmap_level_t levels[BITMAP_MAX_LEVELS+1]; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void bitmap_info_init(bitmap_info_t *binfo, size_t nbits); +size_t bitmap_info_ngroups(const bitmap_info_t *binfo); +size_t bitmap_size(size_t nbits); +void bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +bool bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo); +bool bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); +void bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); +size_t bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo); +void bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BITMAP_C_)) +JEMALLOC_INLINE bool +bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) +{ + unsigned rgoff = binfo->levels[binfo->nlevels].group_offset - 1; + bitmap_t rg = bitmap[rgoff]; + /* The bitmap is full iff the root group is 0. */ + return (rg == 0); +} + +JEMALLOC_INLINE bool +bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) +{ + size_t goff; + bitmap_t g; + + assert(bit < binfo->nbits); + goff = bit >> LG_BITMAP_GROUP_NBITS; + g = bitmap[goff]; + return (!(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)))); +} + +JEMALLOC_INLINE void +bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) +{ + size_t goff; + bitmap_t *gp; + bitmap_t g; + + assert(bit < binfo->nbits); + assert(bitmap_get(bitmap, binfo, bit) == false); + goff = bit >> LG_BITMAP_GROUP_NBITS; + gp = &bitmap[goff]; + g = *gp; + assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))); + g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + *gp = g; + assert(bitmap_get(bitmap, binfo, bit)); + /* Propagate group state transitions up the tree. */ + if (g == 0) { + unsigned i; + for (i = 1; i < binfo->nlevels; i++) { + bit = goff; + goff = bit >> LG_BITMAP_GROUP_NBITS; + gp = &bitmap[binfo->levels[i].group_offset + goff]; + g = *gp; + assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))); + g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + *gp = g; + if (g != 0) + break; + } + } +} + +/* sfu: set first unset. */ +JEMALLOC_INLINE size_t +bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) +{ + size_t bit; + bitmap_t g; + unsigned i; + + assert(bitmap_full(bitmap, binfo) == false); + + i = binfo->nlevels - 1; + g = bitmap[binfo->levels[i].group_offset]; + bit = ffsl(g) - 1; + while (i > 0) { + i--; + g = bitmap[binfo->levels[i].group_offset + bit]; + bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffsl(g) - 1); + } + + bitmap_set(bitmap, binfo, bit); + return (bit); +} + +JEMALLOC_INLINE void +bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) +{ + size_t goff; + bitmap_t *gp; + bitmap_t g; + bool propagate; + + assert(bit < binfo->nbits); + assert(bitmap_get(bitmap, binfo, bit)); + goff = bit >> LG_BITMAP_GROUP_NBITS; + gp = &bitmap[goff]; + g = *gp; + propagate = (g == 0); + assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) == 0); + g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + *gp = g; + assert(bitmap_get(bitmap, binfo, bit) == false); + /* Propagate group state transitions up the tree. */ + if (propagate) { + unsigned i; + for (i = 1; i < binfo->nlevels; i++) { + bit = goff; + goff = bit >> LG_BITMAP_GROUP_NBITS; + gp = &bitmap[binfo->levels[i].group_offset + goff]; + g = *gp; + propagate = (g == 0); + assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) + == 0); + g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + *gp = g; + if (propagate == false) + break; + } + } +} + +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/dep/jemalloc/include/jemalloc/internal/chunk.h b/dep/jemalloc/include/jemalloc/internal/chunk.h index a60f0ad7498..54b6a3ec886 100644 --- a/dep/jemalloc/include/jemalloc/internal/chunk.h +++ b/dep/jemalloc/include/jemalloc/internal/chunk.h @@ -50,7 +50,7 @@ extern size_t map_bias; /* Number of arena chunk header pages. */ extern size_t arena_maxclass; /* Max size class for arenas. */ void *chunk_alloc(size_t size, bool base, bool *zero); -void chunk_dealloc(void *chunk, size_t size); +void chunk_dealloc(void *chunk, size_t size, bool unmap); bool chunk_boot(void); #endif /* JEMALLOC_H_EXTERNS */ diff --git a/dep/jemalloc/include/jemalloc/internal/ckh.h b/dep/jemalloc/include/jemalloc/internal/ckh.h index d4e391b6360..3e4ad4c85f9 100644 --- a/dep/jemalloc/include/jemalloc/internal/ckh.h +++ b/dep/jemalloc/include/jemalloc/internal/ckh.h @@ -31,7 +31,7 @@ struct ckhc_s { struct ckh_s { #ifdef JEMALLOC_DEBUG -#define CKH_MAGIG 0x3af2489d +#define CKH_MAGIC 0x3af2489d uint32_t magic; #endif diff --git a/dep/jemalloc/include/jemalloc/internal/ctl.h b/dep/jemalloc/include/jemalloc/internal/ctl.h index 8776ad135a7..f1f5eb70a2a 100644 --- a/dep/jemalloc/include/jemalloc/internal/ctl.h +++ b/dep/jemalloc/include/jemalloc/internal/ctl.h @@ -29,6 +29,7 @@ struct ctl_node_s { struct ctl_arena_stats_s { bool initialized; + unsigned nthreads; size_t pactive; size_t pdirty; #ifdef JEMALLOC_STATS diff --git a/dep/jemalloc/include/jemalloc/internal/hash.h b/dep/jemalloc/include/jemalloc/internal/hash.h index d12cdb8359f..8a46ce30803 100644 --- a/dep/jemalloc/include/jemalloc/internal/hash.h +++ b/dep/jemalloc/include/jemalloc/internal/hash.h @@ -17,7 +17,7 @@ uint64_t hash(const void *key, size_t len, uint64_t seed); #endif -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(HASH_C_)) +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_HASH_C_)) /* * The following hash function is based on MurmurHash64A(), placed into the * public domain by Austin Appleby. See http://murmurhash.googlepages.com/ for @@ -26,7 +26,7 @@ uint64_t hash(const void *key, size_t len, uint64_t seed); JEMALLOC_INLINE uint64_t hash(const void *key, size_t len, uint64_t seed) { - const uint64_t m = 0xc6a4a7935bd1e995; + const uint64_t m = 0xc6a4a7935bd1e995LLU; const int r = 47; uint64_t h = seed ^ (len * m); const uint64_t *data = (const uint64_t *)key; @@ -62,7 +62,7 @@ hash(const void *key, size_t len, uint64_t seed) h *= m; h ^= h >> r; - return h; + return (h); } #endif diff --git a/dep/jemalloc/include/jemalloc/internal/jemalloc_internal.h b/dep/jemalloc/include/jemalloc/internal/jemalloc_internal.h index 611f0c665a1..cd554bea1b9 100644 --- a/dep/jemalloc/include/jemalloc/internal/jemalloc_internal.h +++ b/dep/jemalloc/include/jemalloc/internal/jemalloc_internal.h @@ -33,6 +33,12 @@ #define JEMALLOC_MANGLE #include "../jemalloc.h" +#include "jemalloc/internal/private_namespace.h" + +#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN)) +#include <libkern/OSAtomic.h> +#endif + #ifdef JEMALLOC_ZONE #include <mach/mach_error.h> #include <mach/mach_init.h> @@ -55,8 +61,9 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); * Define a custom assert() in order to reduce the chances of deadlock during * assertion failure. */ -#ifdef JEMALLOC_DEBUG -# define assert(e) do { \ +#ifndef assert +# ifdef JEMALLOC_DEBUG +# define assert(e) do { \ if (!(e)) { \ char line_buf[UMAX2S_BUFSIZE]; \ malloc_write("<jemalloc>: "); \ @@ -70,8 +77,15 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); abort(); \ } \ } while (0) +# else +# define assert(e) +# endif +#endif + +#ifdef JEMALLOC_DEBUG +# define dassert(e) assert(e) #else -#define assert(e) +# define dassert(e) #endif /* @@ -146,12 +160,19 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #define QUANTUM_CEILING(a) \ (((a) + QUANTUM_MASK) & ~QUANTUM_MASK) +#define LONG ((size_t)(1U << LG_SIZEOF_LONG)) +#define LONG_MASK (LONG - 1) + +/* Return the smallest long multiple that is >= a. */ +#define LONG_CEILING(a) \ + (((a) + LONG_MASK) & ~LONG_MASK) + #define SIZEOF_PTR (1U << LG_SIZEOF_PTR) +#define PTR_MASK (SIZEOF_PTR - 1) -/* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */ -#if (!defined(PIC) && !defined(NO_TLS)) -# define NO_TLS -#endif +/* Return the smallest (void *) multiple that is >= a. */ +#define PTR_CEILING(a) \ + (((a) + PTR_MASK) & ~PTR_MASK) /* * Maximum size of L1 cache line. This is used to avoid cache line aliasing. @@ -198,6 +219,7 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #define PAGE_CEILING(s) \ (((s) + PAGE_MASK) & ~PAGE_MASK) +#include "jemalloc/internal/atomic.h" #include "jemalloc/internal/jemprn.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/stats.h" @@ -206,6 +228,7 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #include "jemalloc/internal/mb.h" #include "jemalloc/internal/extent.h" #include "jemalloc/internal/arena.h" +#include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/base.h" #include "jemalloc/internal/chunk.h" #include "jemalloc/internal/huge.h" @@ -221,12 +244,14 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); /******************************************************************************/ #define JEMALLOC_H_STRUCTS +#include "jemalloc/internal/atomic.h" #include "jemalloc/internal/jemprn.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/mb.h" +#include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/extent.h" #include "jemalloc/internal/arena.h" #include "jemalloc/internal/base.h" @@ -240,6 +265,13 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #endif #include "jemalloc/internal/prof.h" +#ifdef JEMALLOC_STATS +typedef struct { + uint64_t allocated; + uint64_t deallocated; +} thread_allocated_t; +#endif + #undef JEMALLOC_H_STRUCTS /******************************************************************************/ #define JEMALLOC_H_EXTERNS @@ -269,6 +301,7 @@ extern size_t lg_pagesize; extern unsigned ncpus; extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */ +extern pthread_key_t arenas_tsd; #ifndef NO_TLS /* * Map of pthread_self() --> arenas[???], used for selecting an arena to use @@ -278,9 +311,9 @@ extern __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); # define ARENA_GET() arenas_tls # define ARENA_SET(v) do { \ arenas_tls = (v); \ + pthread_setspecific(arenas_tsd, (void *)(v)); \ } while (0) #else -extern pthread_key_t arenas_tsd; # define ARENA_GET() ((arena_t *)pthread_getspecific(arenas_tsd)) # define ARENA_SET(v) do { \ pthread_setspecific(arenas_tsd, (void *)(v)); \ @@ -295,45 +328,28 @@ extern arena_t **arenas; extern unsigned narenas; #ifdef JEMALLOC_STATS -typedef struct { - uint64_t allocated; - uint64_t deallocated; -} thread_allocated_t; # ifndef NO_TLS extern __thread thread_allocated_t thread_allocated_tls; -# define ALLOCATED_GET() thread_allocated_tls.allocated -# define DEALLOCATED_GET() thread_allocated_tls.deallocated +# define ALLOCATED_GET() (thread_allocated_tls.allocated) +# define ALLOCATEDP_GET() (&thread_allocated_tls.allocated) +# define DEALLOCATED_GET() (thread_allocated_tls.deallocated) +# define DEALLOCATEDP_GET() (&thread_allocated_tls.deallocated) # define ALLOCATED_ADD(a, d) do { \ thread_allocated_tls.allocated += a; \ thread_allocated_tls.deallocated += d; \ } while (0) # else extern pthread_key_t thread_allocated_tsd; -# define ALLOCATED_GET() \ - (uint64_t)((pthread_getspecific(thread_allocated_tsd) != NULL) \ - ? ((thread_allocated_t *) \ - pthread_getspecific(thread_allocated_tsd))->allocated : 0) -# define DEALLOCATED_GET() \ - (uint64_t)((pthread_getspecific(thread_allocated_tsd) != NULL) \ - ? ((thread_allocated_t \ - *)pthread_getspecific(thread_allocated_tsd))->deallocated : \ - 0) +thread_allocated_t *thread_allocated_get_hard(void); + +# define ALLOCATED_GET() (thread_allocated_get()->allocated) +# define ALLOCATEDP_GET() (&thread_allocated_get()->allocated) +# define DEALLOCATED_GET() (thread_allocated_get()->deallocated) +# define DEALLOCATEDP_GET() (&thread_allocated_get()->deallocated) # define ALLOCATED_ADD(a, d) do { \ - thread_allocated_t *thread_allocated = (thread_allocated_t *) \ - pthread_getspecific(thread_allocated_tsd); \ - if (thread_allocated != NULL) { \ - thread_allocated->allocated += (a); \ - thread_allocated->deallocated += (d); \ - } else { \ - thread_allocated = (thread_allocated_t *) \ - imalloc(sizeof(thread_allocated_t)); \ - if (thread_allocated != NULL) { \ - pthread_setspecific(thread_allocated_tsd, \ - thread_allocated); \ - thread_allocated->allocated = (a); \ - thread_allocated->deallocated = (d); \ - } \ - } \ + thread_allocated_t *thread_allocated = thread_allocated_get(); \ + thread_allocated->allocated += (a); \ + thread_allocated->deallocated += (d); \ } while (0) # endif #endif @@ -344,12 +360,14 @@ int buferror(int errnum, char *buf, size_t buflen); void jemalloc_prefork(void); void jemalloc_postfork(void); +#include "jemalloc/internal/atomic.h" #include "jemalloc/internal/jemprn.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/mb.h" +#include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/extent.h" #include "jemalloc/internal/arena.h" #include "jemalloc/internal/base.h" @@ -367,6 +385,7 @@ void jemalloc_postfork(void); /******************************************************************************/ #define JEMALLOC_H_INLINES +#include "jemalloc/internal/atomic.h" #include "jemalloc/internal/jemprn.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/stats.h" @@ -384,6 +403,9 @@ size_t s2u(size_t size); size_t sa2u(size_t size, size_t alignment, size_t *run_size_p); void malloc_write(const char *s); arena_t *choose_arena(void); +# if (defined(JEMALLOC_STATS) && defined(NO_TLS)) +thread_allocated_t *thread_allocated_get(void); +# endif #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) @@ -414,10 +436,10 @@ s2u(size_t size) { if (size <= small_maxclass) - return arenas[0]->bins[small_size2bin[size]].reg_size; + return (arena_bin_info[SMALL_SIZE2BIN(size)].reg_size); if (size <= arena_maxclass) - return PAGE_CEILING(size); - return CHUNK_CEILING(size); + return (PAGE_CEILING(size)); + return (CHUNK_CEILING(size)); } /* @@ -458,10 +480,8 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) } if (usize <= arena_maxclass && alignment <= PAGE_SIZE) { - if (usize <= small_maxclass) { - return - (arenas[0]->bins[small_size2bin[usize]].reg_size); - } + if (usize <= small_maxclass) + return (arena_bin_info[SMALL_SIZE2BIN(usize)].reg_size); return (PAGE_CEILING(usize)); } else { size_t run_size; @@ -544,8 +564,22 @@ choose_arena(void) return (ret); } + +#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) +JEMALLOC_INLINE thread_allocated_t * +thread_allocated_get(void) +{ + thread_allocated_t *thread_allocated = (thread_allocated_t *) + pthread_getspecific(thread_allocated_tsd); + + if (thread_allocated == NULL) + return (thread_allocated_get_hard()); + return (thread_allocated); +} +#endif #endif +#include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/arena.h" @@ -557,7 +591,7 @@ choose_arena(void) #ifndef JEMALLOC_ENABLE_INLINE void *imalloc(size_t size); void *icalloc(size_t size); -void *ipalloc(size_t size, size_t alignment, bool zero); +void *ipalloc(size_t usize, size_t alignment, bool zero); size_t isalloc(const void *ptr); # ifdef JEMALLOC_IVSALLOC size_t ivsalloc(const void *ptr); @@ -591,28 +625,39 @@ icalloc(size_t size) } JEMALLOC_INLINE void * -ipalloc(size_t size, size_t alignment, bool zero) +ipalloc(size_t usize, size_t alignment, bool zero) { void *ret; - size_t usize; - size_t run_size -# ifdef JEMALLOC_CC_SILENCE - = 0 -# endif - ; - usize = sa2u(size, alignment, &run_size); - if (usize == 0) - return (NULL); + assert(usize != 0); + assert(usize == sa2u(usize, alignment, NULL)); + if (usize <= arena_maxclass && alignment <= PAGE_SIZE) ret = arena_malloc(usize, zero); - else if (run_size <= arena_maxclass) { - ret = arena_palloc(choose_arena(), usize, run_size, alignment, - zero); - } else if (alignment <= chunksize) - ret = huge_malloc(usize, zero); - else - ret = huge_palloc(usize, alignment, zero); + else { + size_t run_size +#ifdef JEMALLOC_CC_SILENCE + = 0 +#endif + ; + + /* + * Ideally we would only ever call sa2u() once per aligned + * allocation request, and the caller of this function has + * already done so once. However, it's rather burdensome to + * require every caller to pass in run_size, especially given + * that it's only relevant to large allocations. Therefore, + * just call it again here in order to get run_size. + */ + sa2u(usize, alignment, &run_size); + if (run_size <= arena_maxclass) { + ret = arena_palloc(choose_arena(), usize, run_size, + alignment, zero); + } else if (alignment <= chunksize) + ret = huge_malloc(usize, zero); + else + ret = huge_palloc(usize, alignment, zero); + } assert(((uintptr_t)ret & (alignment - 1)) == 0); return (ret); @@ -629,7 +674,7 @@ isalloc(const void *ptr) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - assert(chunk->arena->magic == ARENA_MAGIC); + dassert(chunk->arena->magic == ARENA_MAGIC); #ifdef JEMALLOC_PROF ret = arena_salloc_demote(ptr); @@ -683,7 +728,7 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) != 0) { - size_t copysize; + size_t usize, copysize; /* * Existing object alignment is inadquate; allocate new space @@ -691,12 +736,18 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, */ if (no_move) return (NULL); - ret = ipalloc(size + extra, alignment, zero); + usize = sa2u(size + extra, alignment, NULL); + if (usize == 0) + return (NULL); + ret = ipalloc(usize, alignment, zero); if (ret == NULL) { if (extra == 0) return (NULL); /* Try again, without extra this time. */ - ret = ipalloc(size, alignment, zero); + usize = sa2u(size, alignment, NULL); + if (usize == 0) + return (NULL); + ret = ipalloc(usize, alignment, zero); if (ret == NULL) return (NULL); } diff --git a/dep/jemalloc/include/jemalloc/internal/mb.h b/dep/jemalloc/include/jemalloc/internal/mb.h index 1707aa91d68..dc9f2a54262 100644 --- a/dep/jemalloc/include/jemalloc/internal/mb.h +++ b/dep/jemalloc/include/jemalloc/internal/mb.h @@ -17,7 +17,7 @@ void mb_write(void); #endif -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(MB_C_)) +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MB_C_)) #ifdef __i386__ /* * According to the Intel Architecture Software Developer's Manual, current diff --git a/dep/jemalloc/include/jemalloc/internal/mutex.h b/dep/jemalloc/include/jemalloc/internal/mutex.h index dcca01edd5d..62947ced55e 100644 --- a/dep/jemalloc/include/jemalloc/internal/mutex.h +++ b/dep/jemalloc/include/jemalloc/internal/mutex.h @@ -1,7 +1,11 @@ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES +#ifdef JEMALLOC_OSSPIN +typedef OSSpinLock malloc_mutex_t; +#else typedef pthread_mutex_t malloc_mutex_t; +#endif #ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP # define MALLOC_MUTEX_INITIALIZER PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP @@ -41,17 +45,26 @@ JEMALLOC_INLINE void malloc_mutex_lock(malloc_mutex_t *mutex) { - if (isthreaded) + if (isthreaded) { +#ifdef JEMALLOC_OSSPIN + OSSpinLockLock(mutex); +#else pthread_mutex_lock(mutex); +#endif + } } JEMALLOC_INLINE bool malloc_mutex_trylock(malloc_mutex_t *mutex) { - if (isthreaded) + if (isthreaded) { +#ifdef JEMALLOC_OSSPIN + return (OSSpinLockTry(mutex) == false); +#else return (pthread_mutex_trylock(mutex) != 0); - else +#endif + } else return (false); } @@ -59,8 +72,13 @@ JEMALLOC_INLINE void malloc_mutex_unlock(malloc_mutex_t *mutex) { - if (isthreaded) + if (isthreaded) { +#ifdef JEMALLOC_OSSPIN + OSSpinLockUnlock(mutex); +#else pthread_mutex_unlock(mutex); +#endif + } } #endif diff --git a/dep/jemalloc/include/jemalloc/internal/private_namespace.h b/dep/jemalloc/include/jemalloc/internal/private_namespace.h new file mode 100644 index 00000000000..d4f5f96d7b2 --- /dev/null +++ b/dep/jemalloc/include/jemalloc/internal/private_namespace.h @@ -0,0 +1,195 @@ +#define arena_bin_index JEMALLOC_N(arena_bin_index) +#define arena_boot JEMALLOC_N(arena_boot) +#define arena_dalloc JEMALLOC_N(arena_dalloc) +#define arena_dalloc_bin JEMALLOC_N(arena_dalloc_bin) +#define arena_dalloc_large JEMALLOC_N(arena_dalloc_large) +#define arena_malloc JEMALLOC_N(arena_malloc) +#define arena_malloc_large JEMALLOC_N(arena_malloc_large) +#define arena_malloc_small JEMALLOC_N(arena_malloc_small) +#define arena_new JEMALLOC_N(arena_new) +#define arena_palloc JEMALLOC_N(arena_palloc) +#define arena_prof_accum JEMALLOC_N(arena_prof_accum) +#define arena_prof_ctx_get JEMALLOC_N(arena_prof_ctx_get) +#define arena_prof_ctx_set JEMALLOC_N(arena_prof_ctx_set) +#define arena_prof_promoted JEMALLOC_N(arena_prof_promoted) +#define arena_purge_all JEMALLOC_N(arena_purge_all) +#define arena_ralloc JEMALLOC_N(arena_ralloc) +#define arena_ralloc_no_move JEMALLOC_N(arena_ralloc_no_move) +#define arena_run_regind JEMALLOC_N(arena_run_regind) +#define arena_salloc JEMALLOC_N(arena_salloc) +#define arena_salloc_demote JEMALLOC_N(arena_salloc_demote) +#define arena_stats_merge JEMALLOC_N(arena_stats_merge) +#define arena_tcache_fill_small JEMALLOC_N(arena_tcache_fill_small) +#define arenas_bin_i_index JEMALLOC_N(arenas_bin_i_index) +#define arenas_extend JEMALLOC_N(arenas_extend) +#define arenas_lrun_i_index JEMALLOC_N(arenas_lrun_i_index) +#define atomic_add_uint32 JEMALLOC_N(atomic_add_uint32) +#define atomic_add_uint64 JEMALLOC_N(atomic_add_uint64) +#define atomic_sub_uint32 JEMALLOC_N(atomic_sub_uint32) +#define atomic_sub_uint64 JEMALLOC_N(atomic_sub_uint64) +#define base_alloc JEMALLOC_N(base_alloc) +#define base_boot JEMALLOC_N(base_boot) +#define base_node_alloc JEMALLOC_N(base_node_alloc) +#define base_node_dealloc JEMALLOC_N(base_node_dealloc) +#define bitmap_full JEMALLOC_N(bitmap_full) +#define bitmap_get JEMALLOC_N(bitmap_get) +#define bitmap_info_init JEMALLOC_N(bitmap_info_init) +#define bitmap_info_ngroups JEMALLOC_N(bitmap_info_ngroups) +#define bitmap_init JEMALLOC_N(bitmap_init) +#define bitmap_set JEMALLOC_N(bitmap_set) +#define bitmap_sfu JEMALLOC_N(bitmap_sfu) +#define bitmap_size JEMALLOC_N(bitmap_size) +#define bitmap_unset JEMALLOC_N(bitmap_unset) +#define bt_init JEMALLOC_N(bt_init) +#define buferror JEMALLOC_N(buferror) +#define choose_arena JEMALLOC_N(choose_arena) +#define choose_arena_hard JEMALLOC_N(choose_arena_hard) +#define chunk_alloc JEMALLOC_N(chunk_alloc) +#define chunk_alloc_dss JEMALLOC_N(chunk_alloc_dss) +#define chunk_alloc_mmap JEMALLOC_N(chunk_alloc_mmap) +#define chunk_alloc_mmap_noreserve JEMALLOC_N(chunk_alloc_mmap_noreserve) +#define chunk_alloc_swap JEMALLOC_N(chunk_alloc_swap) +#define chunk_boot JEMALLOC_N(chunk_boot) +#define chunk_dealloc JEMALLOC_N(chunk_dealloc) +#define chunk_dealloc_dss JEMALLOC_N(chunk_dealloc_dss) +#define chunk_dealloc_mmap JEMALLOC_N(chunk_dealloc_mmap) +#define chunk_dealloc_swap JEMALLOC_N(chunk_dealloc_swap) +#define chunk_dss_boot JEMALLOC_N(chunk_dss_boot) +#define chunk_in_dss JEMALLOC_N(chunk_in_dss) +#define chunk_in_swap JEMALLOC_N(chunk_in_swap) +#define chunk_mmap_boot JEMALLOC_N(chunk_mmap_boot) +#define chunk_swap_boot JEMALLOC_N(chunk_swap_boot) +#define chunk_swap_enable JEMALLOC_N(chunk_swap_enable) +#define ckh_bucket_search JEMALLOC_N(ckh_bucket_search) +#define ckh_count JEMALLOC_N(ckh_count) +#define ckh_delete JEMALLOC_N(ckh_delete) +#define ckh_evict_reloc_insert JEMALLOC_N(ckh_evict_reloc_insert) +#define ckh_insert JEMALLOC_N(ckh_insert) +#define ckh_isearch JEMALLOC_N(ckh_isearch) +#define ckh_iter JEMALLOC_N(ckh_iter) +#define ckh_new JEMALLOC_N(ckh_new) +#define ckh_pointer_hash JEMALLOC_N(ckh_pointer_hash) +#define ckh_pointer_keycomp JEMALLOC_N(ckh_pointer_keycomp) +#define ckh_rebuild JEMALLOC_N(ckh_rebuild) +#define ckh_remove JEMALLOC_N(ckh_remove) +#define ckh_search JEMALLOC_N(ckh_search) +#define ckh_string_hash JEMALLOC_N(ckh_string_hash) +#define ckh_string_keycomp JEMALLOC_N(ckh_string_keycomp) +#define ckh_try_bucket_insert JEMALLOC_N(ckh_try_bucket_insert) +#define ckh_try_insert JEMALLOC_N(ckh_try_insert) +#define create_zone JEMALLOC_N(create_zone) +#define ctl_boot JEMALLOC_N(ctl_boot) +#define ctl_bymib JEMALLOC_N(ctl_bymib) +#define ctl_byname JEMALLOC_N(ctl_byname) +#define ctl_nametomib JEMALLOC_N(ctl_nametomib) +#define extent_tree_ad_first JEMALLOC_N(extent_tree_ad_first) +#define extent_tree_ad_insert JEMALLOC_N(extent_tree_ad_insert) +#define extent_tree_ad_iter JEMALLOC_N(extent_tree_ad_iter) +#define extent_tree_ad_iter_recurse JEMALLOC_N(extent_tree_ad_iter_recurse) +#define extent_tree_ad_iter_start JEMALLOC_N(extent_tree_ad_iter_start) +#define extent_tree_ad_last JEMALLOC_N(extent_tree_ad_last) +#define extent_tree_ad_new JEMALLOC_N(extent_tree_ad_new) +#define extent_tree_ad_next JEMALLOC_N(extent_tree_ad_next) +#define extent_tree_ad_nsearch JEMALLOC_N(extent_tree_ad_nsearch) +#define extent_tree_ad_prev JEMALLOC_N(extent_tree_ad_prev) +#define extent_tree_ad_psearch JEMALLOC_N(extent_tree_ad_psearch) +#define extent_tree_ad_remove JEMALLOC_N(extent_tree_ad_remove) +#define extent_tree_ad_reverse_iter JEMALLOC_N(extent_tree_ad_reverse_iter) +#define extent_tree_ad_reverse_iter_recurse JEMALLOC_N(extent_tree_ad_reverse_iter_recurse) +#define extent_tree_ad_reverse_iter_start JEMALLOC_N(extent_tree_ad_reverse_iter_start) +#define extent_tree_ad_search JEMALLOC_N(extent_tree_ad_search) +#define extent_tree_szad_first JEMALLOC_N(extent_tree_szad_first) +#define extent_tree_szad_insert JEMALLOC_N(extent_tree_szad_insert) +#define extent_tree_szad_iter JEMALLOC_N(extent_tree_szad_iter) +#define extent_tree_szad_iter_recurse JEMALLOC_N(extent_tree_szad_iter_recurse) +#define extent_tree_szad_iter_start JEMALLOC_N(extent_tree_szad_iter_start) +#define extent_tree_szad_last JEMALLOC_N(extent_tree_szad_last) +#define extent_tree_szad_new JEMALLOC_N(extent_tree_szad_new) +#define extent_tree_szad_next JEMALLOC_N(extent_tree_szad_next) +#define extent_tree_szad_nsearch JEMALLOC_N(extent_tree_szad_nsearch) +#define extent_tree_szad_prev JEMALLOC_N(extent_tree_szad_prev) +#define extent_tree_szad_psearch JEMALLOC_N(extent_tree_szad_psearch) +#define extent_tree_szad_remove JEMALLOC_N(extent_tree_szad_remove) +#define extent_tree_szad_reverse_iter JEMALLOC_N(extent_tree_szad_reverse_iter) +#define extent_tree_szad_reverse_iter_recurse JEMALLOC_N(extent_tree_szad_reverse_iter_recurse) +#define extent_tree_szad_reverse_iter_start JEMALLOC_N(extent_tree_szad_reverse_iter_start) +#define extent_tree_szad_search JEMALLOC_N(extent_tree_szad_search) +#define hash JEMALLOC_N(hash) +#define huge_boot JEMALLOC_N(huge_boot) +#define huge_dalloc JEMALLOC_N(huge_dalloc) +#define huge_malloc JEMALLOC_N(huge_malloc) +#define huge_palloc JEMALLOC_N(huge_palloc) +#define huge_prof_ctx_get JEMALLOC_N(huge_prof_ctx_get) +#define huge_prof_ctx_set JEMALLOC_N(huge_prof_ctx_set) +#define huge_ralloc JEMALLOC_N(huge_ralloc) +#define huge_ralloc_no_move JEMALLOC_N(huge_ralloc_no_move) +#define huge_salloc JEMALLOC_N(huge_salloc) +#define iallocm JEMALLOC_N(iallocm) +#define icalloc JEMALLOC_N(icalloc) +#define idalloc JEMALLOC_N(idalloc) +#define imalloc JEMALLOC_N(imalloc) +#define ipalloc JEMALLOC_N(ipalloc) +#define iralloc JEMALLOC_N(iralloc) +#define isalloc JEMALLOC_N(isalloc) +#define ivsalloc JEMALLOC_N(ivsalloc) +#define jemalloc_darwin_init JEMALLOC_N(jemalloc_darwin_init) +#define jemalloc_postfork JEMALLOC_N(jemalloc_postfork) +#define jemalloc_prefork JEMALLOC_N(jemalloc_prefork) +#define malloc_cprintf JEMALLOC_N(malloc_cprintf) +#define malloc_mutex_destroy JEMALLOC_N(malloc_mutex_destroy) +#define malloc_mutex_init JEMALLOC_N(malloc_mutex_init) +#define malloc_mutex_lock JEMALLOC_N(malloc_mutex_lock) +#define malloc_mutex_trylock JEMALLOC_N(malloc_mutex_trylock) +#define malloc_mutex_unlock JEMALLOC_N(malloc_mutex_unlock) +#define malloc_printf JEMALLOC_N(malloc_printf) +#define malloc_write JEMALLOC_N(malloc_write) +#define mb_write JEMALLOC_N(mb_write) +#define pow2_ceil JEMALLOC_N(pow2_ceil) +#define prof_backtrace JEMALLOC_N(prof_backtrace) +#define prof_boot0 JEMALLOC_N(prof_boot0) +#define prof_boot1 JEMALLOC_N(prof_boot1) +#define prof_boot2 JEMALLOC_N(prof_boot2) +#define prof_ctx_get JEMALLOC_N(prof_ctx_get) +#define prof_ctx_set JEMALLOC_N(prof_ctx_set) +#define prof_free JEMALLOC_N(prof_free) +#define prof_gdump JEMALLOC_N(prof_gdump) +#define prof_idump JEMALLOC_N(prof_idump) +#define prof_lookup JEMALLOC_N(prof_lookup) +#define prof_malloc JEMALLOC_N(prof_malloc) +#define prof_mdump JEMALLOC_N(prof_mdump) +#define prof_realloc JEMALLOC_N(prof_realloc) +#define prof_sample_accum_update JEMALLOC_N(prof_sample_accum_update) +#define prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update) +#define prof_tdata_init JEMALLOC_N(prof_tdata_init) +#define pthread_create JEMALLOC_N(pthread_create) +#define rtree_get JEMALLOC_N(rtree_get) +#define rtree_get_locked JEMALLOC_N(rtree_get_locked) +#define rtree_new JEMALLOC_N(rtree_new) +#define rtree_set JEMALLOC_N(rtree_set) +#define s2u JEMALLOC_N(s2u) +#define sa2u JEMALLOC_N(sa2u) +#define stats_arenas_i_bins_j_index JEMALLOC_N(stats_arenas_i_bins_j_index) +#define stats_arenas_i_index JEMALLOC_N(stats_arenas_i_index) +#define stats_arenas_i_lruns_j_index JEMALLOC_N(stats_arenas_i_lruns_j_index) +#define stats_cactive_add JEMALLOC_N(stats_cactive_add) +#define stats_cactive_get JEMALLOC_N(stats_cactive_get) +#define stats_cactive_sub JEMALLOC_N(stats_cactive_sub) +#define stats_print JEMALLOC_N(stats_print) +#define szone2ozone JEMALLOC_N(szone2ozone) +#define tcache_alloc_easy JEMALLOC_N(tcache_alloc_easy) +#define tcache_alloc_large JEMALLOC_N(tcache_alloc_large) +#define tcache_alloc_small JEMALLOC_N(tcache_alloc_small) +#define tcache_alloc_small_hard JEMALLOC_N(tcache_alloc_small_hard) +#define tcache_bin_flush_large JEMALLOC_N(tcache_bin_flush_large) +#define tcache_bin_flush_small JEMALLOC_N(tcache_bin_flush_small) +#define tcache_boot JEMALLOC_N(tcache_boot) +#define tcache_create JEMALLOC_N(tcache_create) +#define tcache_dalloc_large JEMALLOC_N(tcache_dalloc_large) +#define tcache_dalloc_small JEMALLOC_N(tcache_dalloc_small) +#define tcache_destroy JEMALLOC_N(tcache_destroy) +#define tcache_event JEMALLOC_N(tcache_event) +#define tcache_get JEMALLOC_N(tcache_get) +#define tcache_stats_merge JEMALLOC_N(tcache_stats_merge) +#define thread_allocated_get JEMALLOC_N(thread_allocated_get) +#define thread_allocated_get_hard JEMALLOC_N(thread_allocated_get_hard) +#define u2s JEMALLOC_N(u2s) diff --git a/dep/jemalloc/include/jemalloc/internal/prof.h b/dep/jemalloc/include/jemalloc/internal/prof.h index 7864000b88b..e9064ba6e73 100644 --- a/dep/jemalloc/include/jemalloc/internal/prof.h +++ b/dep/jemalloc/include/jemalloc/internal/prof.h @@ -227,9 +227,60 @@ bool prof_boot2(void); /******************************************************************************/ #ifdef JEMALLOC_H_INLINES +#define PROF_ALLOC_PREP(nignore, size, ret) do { \ + prof_tdata_t *prof_tdata; \ + prof_bt_t bt; \ + \ + assert(size == s2u(size)); \ + \ + prof_tdata = PROF_TCACHE_GET(); \ + if (prof_tdata == NULL) { \ + prof_tdata = prof_tdata_init(); \ + if (prof_tdata == NULL) { \ + ret = NULL; \ + break; \ + } \ + } \ + \ + if (opt_prof_active == false) { \ + /* Sampling is currently inactive, so avoid sampling. */\ + ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ + } else if (opt_lg_prof_sample == 0) { \ + /* Don't bother with sampling logic, since sampling */\ + /* interval is 1. */\ + bt_init(&bt, prof_tdata->vec); \ + prof_backtrace(&bt, nignore, prof_bt_max); \ + ret = prof_lookup(&bt); \ + } else { \ + if (prof_tdata->threshold == 0) { \ + /* Initialize. Seed the prng differently for */\ + /* each thread. */\ + prof_tdata->prn_state = \ + (uint64_t)(uintptr_t)&size; \ + prof_sample_threshold_update(prof_tdata); \ + } \ + \ + /* Determine whether to capture a backtrace based on */\ + /* whether size is enough for prof_accum to reach */\ + /* prof_tdata->threshold. However, delay updating */\ + /* these variables until prof_{m,re}alloc(), because */\ + /* we don't know for sure that the allocation will */\ + /* succeed. */\ + /* */\ + /* Use subtraction rather than addition to avoid */\ + /* potential integer overflow. */\ + if (size >= prof_tdata->threshold - \ + prof_tdata->accum) { \ + bt_init(&bt, prof_tdata->vec); \ + prof_backtrace(&bt, nignore, prof_bt_max); \ + ret = prof_lookup(&bt); \ + } else \ + ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ + } \ +} while (0) + #ifndef JEMALLOC_ENABLE_INLINE void prof_sample_threshold_update(prof_tdata_t *prof_tdata); -prof_thr_cnt_t *prof_alloc_prep(size_t size); prof_ctx_t *prof_ctx_get(const void *ptr); void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); bool prof_sample_accum_update(size_t size); @@ -247,8 +298,22 @@ prof_sample_threshold_update(prof_tdata_t *prof_tdata) double u; /* - * Compute prof_sample_threshold as a geometrically distributed random + * Compute sample threshold as a geometrically distributed random * variable with mean (2^opt_lg_prof_sample). + * + * __ __ + * | log(u) | 1 + * prof_tdata->threshold = | -------- |, where p = ------------------- + * | log(1-p) | opt_lg_prof_sample + * 2 + * + * For more information on the math, see: + * + * Non-Uniform Random Variate Generation + * Luc Devroye + * Springer-Verlag, New York, 1986 + * pp 500 + * (http://cg.scs.carleton.ca/~luc/rnbookindex.html) */ prn64(r, 53, prof_tdata->prn_state, (uint64_t)6364136223846793005LLU, (uint64_t)1442695040888963407LLU); @@ -258,71 +323,6 @@ prof_sample_threshold_update(prof_tdata_t *prof_tdata) + (uint64_t)1U; } -JEMALLOC_INLINE prof_thr_cnt_t * -prof_alloc_prep(size_t size) -{ -#ifdef JEMALLOC_ENABLE_INLINE - /* This function does not have its own stack frame, because it is inlined. */ -# define NIGNORE 1 -#else -# define NIGNORE 2 -#endif - prof_thr_cnt_t *ret; - prof_tdata_t *prof_tdata; - prof_bt_t bt; - - assert(size == s2u(size)); - - prof_tdata = PROF_TCACHE_GET(); - if (prof_tdata == NULL) { - prof_tdata = prof_tdata_init(); - if (prof_tdata == NULL) - return (NULL); - } - - if (opt_prof_active == false) { - /* Sampling is currently inactive, so avoid sampling. */ - ret = (prof_thr_cnt_t *)(uintptr_t)1U; - } else if (opt_lg_prof_sample == 0) { - /* - * Don't bother with sampling logic, since sampling interval is - * 1. - */ - bt_init(&bt, prof_tdata->vec); - prof_backtrace(&bt, NIGNORE, prof_bt_max); - ret = prof_lookup(&bt); - } else { - if (prof_tdata->threshold == 0) { - /* - * Initialize. Seed the prng differently for each - * thread. - */ - prof_tdata->prn_state = (uint64_t)(uintptr_t)&size; - prof_sample_threshold_update(prof_tdata); - } - - /* - * Determine whether to capture a backtrace based on whether - * size is enough for prof_accum to reach - * prof_tdata->threshold. However, delay updating these - * variables until prof_{m,re}alloc(), because we don't know - * for sure that the allocation will succeed. - * - * Use subtraction rather than addition to avoid potential - * integer overflow. - */ - if (size >= prof_tdata->threshold - prof_tdata->accum) { - bt_init(&bt, prof_tdata->vec); - prof_backtrace(&bt, NIGNORE, prof_bt_max); - ret = prof_lookup(&bt); - } else - ret = (prof_thr_cnt_t *)(uintptr_t)1U; - } - - return (ret); -#undef NIGNORE -} - JEMALLOC_INLINE prof_ctx_t * prof_ctx_get(const void *ptr) { @@ -334,7 +334,7 @@ prof_ctx_get(const void *ptr) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - assert(chunk->arena->magic == ARENA_MAGIC); + dassert(chunk->arena->magic == ARENA_MAGIC); ret = arena_prof_ctx_get(ptr); } else @@ -353,7 +353,7 @@ prof_ctx_set(const void *ptr, prof_ctx_t *ctx) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - assert(chunk->arena->magic == ARENA_MAGIC); + dassert(chunk->arena->magic == ARENA_MAGIC); arena_prof_ctx_set(ptr, ctx); } else @@ -374,7 +374,7 @@ prof_sample_accum_update(size_t size) /* Take care to avoid integer overflow. */ if (size >= prof_tdata->threshold - prof_tdata->accum) { prof_tdata->accum -= (prof_tdata->threshold - size); - /* Compute new prof_sample_threshold. */ + /* Compute new sample threshold. */ prof_sample_threshold_update(prof_tdata); while (prof_tdata->accum >= prof_tdata->threshold) { prof_tdata->accum -= prof_tdata->threshold; @@ -401,7 +401,7 @@ prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) * always possible to tell in advance how large an * object's usable size will be, so there should never * be a difference between the size passed to - * prof_alloc_prep() and prof_malloc(). + * PROF_ALLOC_PREP() and prof_malloc(). */ assert((uintptr_t)cnt == (uintptr_t)1U); } @@ -445,7 +445,7 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, if (prof_sample_accum_update(size)) { /* * Don't sample. The size passed to - * prof_alloc_prep() was larger than what + * PROF_ALLOC_PREP() was larger than what * actually got allocated, so a backtrace was * captured for this allocation, even though * its actual size was insufficient to cross diff --git a/dep/jemalloc/include/jemalloc/internal/rtree.h b/dep/jemalloc/include/jemalloc/internal/rtree.h index 9d58ebac545..95d6355a5f4 100644 --- a/dep/jemalloc/include/jemalloc/internal/rtree.h +++ b/dep/jemalloc/include/jemalloc/internal/rtree.h @@ -49,7 +49,7 @@ void *rtree_get(rtree_t *rtree, uintptr_t key); bool rtree_set(rtree_t *rtree, uintptr_t key, void *val); #endif -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(RTREE_C_)) +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_)) #define RTREE_GET_GENERATE(f) \ /* The least significant bits of the key are ignored. */ \ JEMALLOC_INLINE void * \ diff --git a/dep/jemalloc/include/jemalloc/internal/stats.h b/dep/jemalloc/include/jemalloc/internal/stats.h index 3fc2080a34b..2a9b31d9ffc 100644 --- a/dep/jemalloc/include/jemalloc/internal/stats.h +++ b/dep/jemalloc/include/jemalloc/internal/stats.h @@ -154,6 +154,10 @@ struct chunk_stats_s { extern bool opt_stats_print; +#ifdef JEMALLOC_STATS +extern size_t stats_cactive; +#endif + char *u2s(uint64_t x, unsigned base, char *s); #ifdef JEMALLOC_STATS void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque, @@ -166,9 +170,38 @@ void stats_print(void (*write)(void *, const char *), void *cbopaque, #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ -#ifdef JEMALLOC_STATS #ifdef JEMALLOC_H_INLINES +#ifdef JEMALLOC_STATS + +#ifndef JEMALLOC_ENABLE_INLINE +size_t stats_cactive_get(void); +void stats_cactive_add(size_t size); +void stats_cactive_sub(size_t size); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_STATS_C_)) +JEMALLOC_INLINE size_t +stats_cactive_get(void) +{ + + return (atomic_read_z(&stats_cactive)); +} + +JEMALLOC_INLINE void +stats_cactive_add(size_t size) +{ + + atomic_add_z(&stats_cactive, size); +} + +JEMALLOC_INLINE void +stats_cactive_sub(size_t size) +{ + + atomic_sub_z(&stats_cactive, size); +} +#endif -#endif /* JEMALLOC_H_INLINES */ #endif /* JEMALLOC_STATS */ +#endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ diff --git a/dep/jemalloc/include/jemalloc/internal/tcache.h b/dep/jemalloc/include/jemalloc/internal/tcache.h index 1ad91a9b1e0..da3c68c5770 100644 --- a/dep/jemalloc/include/jemalloc/internal/tcache.h +++ b/dep/jemalloc/include/jemalloc/internal/tcache.h @@ -2,6 +2,7 @@ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES +typedef struct tcache_bin_info_s tcache_bin_info_t; typedef struct tcache_bin_s tcache_bin_t; typedef struct tcache_s tcache_t; @@ -32,15 +33,22 @@ typedef struct tcache_s tcache_t; /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS +/* + * Read-only information associated with each element of tcache_t's tbins array + * is stored separately, mainly to reduce memory usage. + */ +struct tcache_bin_info_s { + unsigned ncached_max; /* Upper limit on ncached. */ +}; + struct tcache_bin_s { # ifdef JEMALLOC_STATS tcache_bin_stats_t tstats; # endif - unsigned low_water; /* Min # cached since last GC. */ - unsigned high_water; /* Max # cached since last GC. */ + int low_water; /* Min # cached since last GC. */ + unsigned lg_fill_div; /* Fill (ncached_max >> lg_fill_div). */ unsigned ncached; /* # of cached objects. */ - unsigned ncached_max; /* Upper limit on ncached. */ - void *avail; /* Chain of available objects. */ + void **avail; /* Stack of available objects. */ }; struct tcache_s { @@ -54,6 +62,12 @@ struct tcache_s { unsigned ev_cnt; /* Event count since incremental GC. */ unsigned next_gc_bin; /* Next bin to GC. */ tcache_bin_t tbins[1]; /* Dynamically sized. */ + /* + * The pointer stacks associated with tbins follow as a contiguous + * array. During tcache initialization, the avail pointer in each + * element of tbins is initialized to point to the proper offset within + * this array. + */ }; #endif /* JEMALLOC_H_STRUCTS */ @@ -64,6 +78,8 @@ extern bool opt_tcache; extern ssize_t opt_lg_tcache_max; extern ssize_t opt_lg_tcache_gc_sweep; +extern tcache_bin_info_t *tcache_bin_info; + /* Map of thread-specific caches. */ #ifndef NO_TLS extern __thread tcache_t *tcache_tls @@ -110,7 +126,7 @@ void tcache_destroy(tcache_t *tcache); #ifdef JEMALLOC_STATS void tcache_stats_merge(tcache_t *tcache, arena_t *arena); #endif -void tcache_boot(void); +bool tcache_boot(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ @@ -169,6 +185,7 @@ tcache_event(tcache_t *tcache) if (tcache->ev_cnt == tcache_gc_incr) { size_t binind = tcache->next_gc_bin; tcache_bin_t *tbin = &tcache->tbins[binind]; + tcache_bin_info_t *tbin_info = &tcache_bin_info[binind]; if (tbin->low_water > 0) { /* @@ -192,9 +209,22 @@ tcache_event(tcache_t *tcache) #endif ); } + /* + * Reduce fill count by 2X. Limit lg_fill_div such that + * the fill count is always at least 1. + */ + if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1)) + >= 1) + tbin->lg_fill_div++; + } else if (tbin->low_water < 0) { + /* + * Increase fill count by 2X. Make sure lg_fill_div + * stays greater than 0. + */ + if (tbin->lg_fill_div > 1) + tbin->lg_fill_div--; } tbin->low_water = tbin->ncached; - tbin->high_water = tbin->ncached; tcache->next_gc_bin++; if (tcache->next_gc_bin == nhbins) @@ -208,13 +238,14 @@ tcache_alloc_easy(tcache_bin_t *tbin) { void *ret; - if (tbin->ncached == 0) + if (tbin->ncached == 0) { + tbin->low_water = -1; return (NULL); + } tbin->ncached--; - if (tbin->ncached < tbin->low_water) + if ((int)tbin->ncached < tbin->low_water) tbin->low_water = tbin->ncached; - ret = tbin->avail; - tbin->avail = *(void **)ret; + ret = tbin->avail[tbin->ncached]; return (ret); } @@ -225,7 +256,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) size_t binind; tcache_bin_t *tbin; - binind = small_size2bin[size]; + binind = SMALL_SIZE2BIN(size); assert(binind < nbins); tbin = &tcache->tbins[binind]; ret = tcache_alloc_easy(tbin); @@ -234,7 +265,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) if (ret == NULL) return (NULL); } - assert(arena_salloc(ret) == tcache->arena->bins[binind].reg_size); + assert(arena_salloc(ret) == arena_bin_info[binind].reg_size); if (zero == false) { #ifdef JEMALLOC_FILL @@ -250,7 +281,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) tbin->tstats.nrequests++; #endif #ifdef JEMALLOC_PROF - tcache->prof_accumbytes += tcache->arena->bins[binind].reg_size; + tcache->prof_accumbytes += arena_bin_info[binind].reg_size; #endif tcache_event(tcache); return (ret); @@ -314,6 +345,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) arena_run_t *run; arena_bin_t *bin; tcache_bin_t *tbin; + tcache_bin_info_t *tbin_info; size_t pageind, binind; arena_chunk_map_t *mapelm; @@ -325,7 +357,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) mapelm = &chunk->map[pageind-map_bias]; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); - assert(run->magic == ARENA_RUN_MAGIC); + dassert(run->magic == ARENA_RUN_MAGIC); bin = run->bin; binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) / sizeof(arena_bin_t); @@ -333,23 +365,22 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) #ifdef JEMALLOC_FILL if (opt_junk) - memset(ptr, 0x5a, bin->reg_size); + memset(ptr, 0x5a, arena_bin_info[binind].reg_size); #endif tbin = &tcache->tbins[binind]; - if (tbin->ncached == tbin->ncached_max) { - tcache_bin_flush_small(tbin, binind, (tbin->ncached_max >> 1) + tbin_info = &tcache_bin_info[binind]; + if (tbin->ncached == tbin_info->ncached_max) { + tcache_bin_flush_small(tbin, binind, (tbin_info->ncached_max >> + 1) #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) , tcache #endif ); } - assert(tbin->ncached < tbin->ncached_max); - *(void **)ptr = tbin->avail; - tbin->avail = ptr; + assert(tbin->ncached < tbin_info->ncached_max); + tbin->avail[tbin->ncached] = ptr; tbin->ncached++; - if (tbin->ncached > tbin->high_water) - tbin->high_water = tbin->ncached; tcache_event(tcache); } @@ -361,6 +392,7 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) arena_chunk_t *chunk; size_t pageind, binind; tcache_bin_t *tbin; + tcache_bin_info_t *tbin_info; assert((size & PAGE_MASK) == 0); assert(arena_salloc(ptr) > small_maxclass); @@ -377,19 +409,18 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) #endif tbin = &tcache->tbins[binind]; - if (tbin->ncached == tbin->ncached_max) { - tcache_bin_flush_large(tbin, binind, (tbin->ncached_max >> 1) + tbin_info = &tcache_bin_info[binind]; + if (tbin->ncached == tbin_info->ncached_max) { + tcache_bin_flush_large(tbin, binind, (tbin_info->ncached_max >> + 1) #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) , tcache #endif ); } - assert(tbin->ncached < tbin->ncached_max); - *(void **)ptr = tbin->avail; - tbin->avail = ptr; + assert(tbin->ncached < tbin_info->ncached_max); + tbin->avail[tbin->ncached] = ptr; tbin->ncached++; - if (tbin->ncached > tbin->high_water) - tbin->high_water = tbin->ncached; tcache_event(tcache); } diff --git a/dep/jemalloc/include/jemalloc/jemalloc.h b/dep/jemalloc/include/jemalloc/jemalloc.h index 287dac46ed2..3842e28115e 100644 --- a/dep/jemalloc/include/jemalloc/jemalloc.h +++ b/dep/jemalloc/include/jemalloc/jemalloc.h @@ -7,19 +7,19 @@ extern "C" { #include <limits.h> #include <strings.h> -#define JEMALLOC_VERSION "2.1.0-0-g1c4b088b08d3bc7617a34387e196ce03716160bf" +#define JEMALLOC_VERSION "2.2.5-0-gfc1bb70e5f0d9a58b39efa39cc549b5af5104760" #define JEMALLOC_VERSION_MAJOR 2 -#define JEMALLOC_VERSION_MINOR 1 -#define JEMALLOC_VERSION_BUGFIX 0 +#define JEMALLOC_VERSION_MINOR 2 +#define JEMALLOC_VERSION_BUGFIX 5 #define JEMALLOC_VERSION_NREV 0 -#define JEMALLOC_VERSION_GID "1c4b088b08d3bc7617a34387e196ce03716160bf" +#define JEMALLOC_VERSION_GID "fc1bb70e5f0d9a58b39efa39cc549b5af5104760" #include "jemalloc_defs.h" #ifndef JEMALLOC_P # define JEMALLOC_P(s) s #endif -#define ALLOCM_LG_ALIGN ((int)0x3f) +#define ALLOCM_LG_ALIGN(la) (la) #if LG_SIZEOF_PTR == 2 #define ALLOCM_ALIGN(a) (ffs(a)-1) #else diff --git a/dep/jemalloc/include/jemalloc/jemalloc_defs.h b/dep/jemalloc/include/jemalloc/jemalloc_defs.h index a641b56da03..f0f8fa71a4d 100644 --- a/dep/jemalloc/include/jemalloc/jemalloc_defs.h +++ b/dep/jemalloc/include/jemalloc/jemalloc_defs.h @@ -20,11 +20,32 @@ #endif /* + * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. + * For shared libraries, symbol visibility mechanisms prevent these symbols + * from being exported, but for static libraries, naming collisions are a real + * possibility. + */ +#define JEMALLOC_PRIVATE_NAMESPACE "" +#define JEMALLOC_N(string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix) string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix + +/* * Hyper-threaded CPUs may need a special instruction inside spin loops in * order to yield to another virtual CPU. */ #define CPU_SPINWAIT __asm__ volatile("pause") +/* + * Defined if OSAtomic*() functions are available, as provided by Darwin, and + * documented in the atomic(3) manual page. + */ +/* #undef JEMALLOC_OSATOMIC */ + +/* + * Defined if OSSpin*() functions are available, as provided by Darwin, and + * documented in the spinlock(3) manual page. + */ +/* #undef JEMALLOC_OSSPIN */ + /* Defined if __attribute__((...)) syntax is supported. */ #define JEMALLOC_HAVE_ATTR #ifdef JEMALLOC_HAVE_ATTR @@ -54,18 +75,21 @@ /* Use libgcc for profile backtracing if defined. */ /* #undef JEMALLOC_PROF_LIBGCC */ +/* Use gcc intrinsics for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_GCC */ + /* * JEMALLOC_TINY enables support for tiny objects, which are smaller than one * quantum. */ -/* #undef JEMALLOC_TINY */ +#define JEMALLOC_TINY /* * JEMALLOC_TCACHE enables a thread-specific caching layer for small objects. * This makes it possible to allocate/deallocate objects without any locking * when the cache is in the steady state. */ -/* #undef JEMALLOC_TCACHE */ +#define JEMALLOC_TCACHE /* * JEMALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage @@ -86,7 +110,7 @@ /* #undef JEMALLOC_SYSV */ /* Support lazy locking (avoid locking unless a second thread is launched). */ -/* #undef JEMALLOC_LAZY_LOCK */ +#define JEMALLOC_LAZY_LOCK /* Determine page size at run time if defined. */ /* #undef DYNAMIC_PAGE_SHIFT */ @@ -133,9 +157,12 @@ /* #undef JEMALLOC_PURGE_MADVISE_FREE */ /* sizeof(void *) == 2^LG_SIZEOF_PTR. */ -#define LG_SIZEOF_PTR 2 +#define LG_SIZEOF_PTR 3 /* sizeof(int) == 2^LG_SIZEOF_INT. */ #define LG_SIZEOF_INT 2 +/* sizeof(long) == 2^LG_SIZEOF_LONG. */ +#define LG_SIZEOF_LONG 3 + #endif /* JEMALLOC_DEFS_H_ */ diff --git a/dep/jemalloc/src/arena.c b/dep/jemalloc/src/arena.c index 7f939b3cd77..d166ca1ec4d 100644 --- a/dep/jemalloc/src/arena.c +++ b/dep/jemalloc/src/arena.c @@ -8,6 +8,7 @@ size_t opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT; size_t opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT; ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT; uint8_t const *small_size2bin; +arena_bin_info_t *arena_bin_info; /* Various bin-related settings. */ unsigned nqbins; @@ -25,26 +26,27 @@ size_t mspace_mask; /* * const_small_size2bin is a static constant lookup table that in the common - * case can be used as-is for small_size2bin. For dynamically linked programs, - * this avoids a page of memory overhead per process. + * case can be used as-is for small_size2bin. */ -#define S2B_1(i) i, -#define S2B_2(i) S2B_1(i) S2B_1(i) -#define S2B_4(i) S2B_2(i) S2B_2(i) +#if (LG_TINY_MIN == 2) +#define S2B_4(i) i, #define S2B_8(i) S2B_4(i) S2B_4(i) +#elif (LG_TINY_MIN == 3) +#define S2B_8(i) i, +#else +# error "Unsupported LG_TINY_MIN" +#endif #define S2B_16(i) S2B_8(i) S2B_8(i) #define S2B_32(i) S2B_16(i) S2B_16(i) #define S2B_64(i) S2B_32(i) S2B_32(i) #define S2B_128(i) S2B_64(i) S2B_64(i) #define S2B_256(i) S2B_128(i) S2B_128(i) /* - * The number of elements in const_small_size2bin is dependent on page size - * and on the definition for SUBPAGE. If SUBPAGE changes, the '- 255' must also - * change, along with the addition/removal of static lookup table element - * definitions. + * The number of elements in const_small_size2bin is dependent on the + * definition for SUBPAGE. */ -static const uint8_t const_small_size2bin[STATIC_PAGE_SIZE - 255] = { - S2B_1(0xffU) /* 0 */ +static JEMALLOC_ATTR(aligned(CACHELINE)) + const uint8_t const_small_size2bin[] = { #if (LG_QUANTUM == 4) /* 16-byte quantum **********************/ # ifdef JEMALLOC_TINY @@ -173,7 +175,6 @@ static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, size_t oldsize, size_t newsize, bool dirty); static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin); static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin); -static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size); static void arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin); static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, @@ -191,6 +192,9 @@ static bool small_size2bin_init(void); static void small_size2bin_validate(void); #endif static bool small_size2bin_init_hard(void); +static size_t bin_info_run_size_calc(arena_bin_info_t *bin_info, + size_t min_run_size); +static bool bin_info_init(void); /******************************************************************************/ @@ -246,57 +250,48 @@ rb_gen(static JEMALLOC_ATTR(unused), arena_avail_tree_, arena_avail_tree_t, arena_chunk_map_t, u.rb_link, arena_avail_comp) static inline void * -arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin) +arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) { void *ret; + unsigned regind; + bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + + (uintptr_t)bin_info->bitmap_offset); - assert(run->magic == ARENA_RUN_MAGIC); + dassert(run->magic == ARENA_RUN_MAGIC); assert(run->nfree > 0); + assert(bitmap_full(bitmap, &bin_info->bitmap_info) == false); + regind = bitmap_sfu(bitmap, &bin_info->bitmap_info); + ret = (void *)((uintptr_t)run + (uintptr_t)bin_info->reg0_offset + + (uintptr_t)(bin_info->reg_size * regind)); run->nfree--; - ret = run->avail; - if (ret != NULL) { - /* Double free can cause assertion failure.*/ - assert(ret != NULL); - /* Write-after free can cause assertion failure. */ - assert((uintptr_t)ret >= (uintptr_t)run + - (uintptr_t)bin->reg0_offset); - assert((uintptr_t)ret < (uintptr_t)run->next); - assert(((uintptr_t)ret - ((uintptr_t)run + - (uintptr_t)bin->reg0_offset)) % (uintptr_t)bin->reg_size == - 0); - run->avail = *(void **)ret; - return (ret); - } - ret = run->next; - run->next = (void *)((uintptr_t)ret + (uintptr_t)bin->reg_size); - assert(ret != NULL); + if (regind == run->nextind) + run->nextind++; + assert(regind < run->nextind); return (ret); } static inline void arena_run_reg_dalloc(arena_run_t *run, void *ptr) { - - assert(run->nfree < run->bin->nregs); + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + size_t binind = arena_bin_index(chunk->arena, run->bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; + unsigned regind = arena_run_regind(run, bin_info, ptr); + bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + + (uintptr_t)bin_info->bitmap_offset); + + assert(run->nfree < bin_info->nregs); /* Freeing an interior pointer can cause assertion failure. */ assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)run->bin->reg0_offset)) % (uintptr_t)run->bin->reg_size + (uintptr_t)bin_info->reg0_offset)) % (uintptr_t)bin_info->reg_size == 0); - /* - * Freeing a pointer lower than region zero can cause assertion - * failure. - */ assert((uintptr_t)ptr >= (uintptr_t)run + - (uintptr_t)run->bin->reg0_offset); - /* - * Freeing a pointer past in the run's frontier can cause assertion - * failure. - */ - assert((uintptr_t)ptr < (uintptr_t)run->next); + (uintptr_t)bin_info->reg0_offset); + /* Freeing an unallocated pointer can cause assertion failure. */ + assert(bitmap_get(bitmap, &bin_info->bitmap_info, regind)); - *(void **)ptr = run->avail; - run->avail = ptr; + bitmap_unset(bitmap, &bin_info->bitmap_info, regind); run->nfree++; } @@ -320,6 +315,9 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, size_t old_ndirty, run_ind, total_pages, need_pages, rem_pages, i; size_t flag_dirty; arena_avail_tree_t *runs_avail; +#ifdef JEMALLOC_STATS + size_t cactive_diff; +#endif chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); old_ndirty = chunk->ndirty; @@ -338,6 +336,13 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, rem_pages = total_pages - need_pages; arena_avail_tree_remove(runs_avail, &chunk->map[run_ind-map_bias]); +#ifdef JEMALLOC_STATS + /* Update stats_cactive if nactive is crossing a chunk multiple. */ + cactive_diff = CHUNK_CEILING((arena->nactive + need_pages) << + PAGE_SHIFT) - CHUNK_CEILING(arena->nactive << PAGE_SHIFT); + if (cactive_diff != 0) + stats_cactive_add(cactive_diff); +#endif arena->nactive += need_pages; /* Keep track of trailing unused pages for later use. */ @@ -564,7 +569,7 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) arena->ndirty -= spare->ndirty; } malloc_mutex_unlock(&arena->lock); - chunk_dealloc((void *)spare, chunksize); + chunk_dealloc((void *)spare, chunksize, true); malloc_mutex_lock(&arena->lock); #ifdef JEMALLOC_STATS arena->stats.mapped -= chunksize; @@ -725,6 +730,9 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) assert(pageind + npages <= chunk_npages); if (mapelm->bits & CHUNK_MAP_DIRTY) { size_t i; +#ifdef JEMALLOC_STATS + size_t cactive_diff; +#endif arena_avail_tree_remove( &arena->runs_avail_dirty, mapelm); @@ -747,6 +755,17 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) CHUNK_MAP_ALLOCATED; } +#ifdef JEMALLOC_STATS + /* + * Update stats_cactive if nactive is crossing a + * chunk multiple. + */ + cactive_diff = CHUNK_CEILING((arena->nactive + + npages) << PAGE_SHIFT) - + CHUNK_CEILING(arena->nactive << PAGE_SHIFT); + if (cactive_diff != 0) + stats_cactive_add(cactive_diff); +#endif arena->nactive += npages; /* Append to list for later processing. */ ql_elm_new(mapelm, u.ql_link); @@ -763,8 +782,12 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) chunk + (uintptr_t)(pageind << PAGE_SHIFT)); assert((mapelm->bits >> PAGE_SHIFT) == 0); - assert(run->magic == ARENA_RUN_MAGIC); - pageind += run->bin->run_size >> PAGE_SHIFT; + dassert(run->magic == ARENA_RUN_MAGIC); + size_t binind = arena_bin_index(arena, + run->bin); + arena_bin_info_t *bin_info = + &arena_bin_info[binind]; + pageind += bin_info->run_size >> PAGE_SHIFT; } } } @@ -845,9 +868,10 @@ arena_purge(arena_t *arena, bool all) } assert(ndirty == arena->ndirty); #endif - assert(arena->ndirty > arena->npurgatory); - assert(arena->ndirty > chunk_npages || all); - assert((arena->nactive >> opt_lg_dirty_mult) < arena->ndirty || all); + assert(arena->ndirty > arena->npurgatory || all); + assert(arena->ndirty - arena->npurgatory > chunk_npages || all); + assert((arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty - + arena->npurgatory) || all); #ifdef JEMALLOC_STATS arena->stats.npurge++; @@ -859,8 +883,10 @@ arena_purge(arena_t *arena, bool all) * multiple threads from racing to reduce ndirty below the threshold. */ npurgatory = arena->ndirty - arena->npurgatory; - if (all == false) + if (all == false) { + assert(npurgatory >= arena->nactive >> opt_lg_dirty_mult); npurgatory -= arena->nactive >> opt_lg_dirty_mult; + } arena->npurgatory += npurgatory; while (npurgatory > 0) { @@ -931,6 +957,9 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) arena_chunk_t *chunk; size_t size, run_ind, run_pages, flag_dirty; arena_avail_tree_t *runs_avail; +#ifdef JEMALLOC_STATS + size_t cactive_diff; +#endif chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) @@ -946,9 +975,19 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) CHUNK_MAP_LARGE) != 0); assert((chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits & CHUNK_MAP_ALLOCATED) != 0); - } else - size = run->bin->run_size; + } else { + size_t binind = arena_bin_index(arena, run->bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; + size = bin_info->run_size; + } run_pages = (size >> PAGE_SHIFT); +#ifdef JEMALLOC_STATS + /* Update stats_cactive if nactive is crossing a chunk multiple. */ + cactive_diff = CHUNK_CEILING(arena->nactive << PAGE_SHIFT) - + CHUNK_CEILING((arena->nactive - run_pages) << PAGE_SHIFT); + if (cactive_diff != 0) + stats_cactive_sub(cactive_diff); +#endif arena->nactive -= run_pages; /* @@ -1174,6 +1213,8 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) { arena_chunk_map_t *mapelm; arena_run_t *run; + size_t binind; + arena_bin_info_t *bin_info; /* Look for a usable run. */ mapelm = arena_run_tree_first(&bin->runs); @@ -1197,18 +1238,23 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) } /* No existing runs have any space available. */ + binind = arena_bin_index(arena, bin); + bin_info = &arena_bin_info[binind]; + /* Allocate a new run. */ malloc_mutex_unlock(&bin->lock); /******************************/ malloc_mutex_lock(&arena->lock); - run = arena_run_alloc(arena, bin->run_size, false, false); + run = arena_run_alloc(arena, bin_info->run_size, false, false); if (run != NULL) { + bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + + (uintptr_t)bin_info->bitmap_offset); + /* Initialize run internals. */ run->bin = bin; - run->avail = NULL; - run->next = (void *)((uintptr_t)run + - (uintptr_t)bin->reg0_offset); - run->nfree = bin->nregs; + run->nextind = 0; + run->nfree = bin_info->nregs; + bitmap_init(bitmap, &bin_info->bitmap_info); #ifdef JEMALLOC_DEBUG run->magic = ARENA_RUN_MAGIC; #endif @@ -1259,8 +1305,12 @@ static void * arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) { void *ret; + size_t binind; + arena_bin_info_t *bin_info; arena_run_t *run; + binind = arena_bin_index(arena, bin); + bin_info = &arena_bin_info[binind]; bin->runcur = NULL; run = arena_bin_nonfull_run_get(arena, bin); if (bin->runcur != NULL && bin->runcur->nfree > 0) { @@ -1268,22 +1318,22 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) * Another thread updated runcur while this one ran without the * bin lock in arena_bin_nonfull_run_get(). */ - assert(bin->runcur->magic == ARENA_RUN_MAGIC); + dassert(bin->runcur->magic == ARENA_RUN_MAGIC); assert(bin->runcur->nfree > 0); - ret = arena_run_reg_alloc(bin->runcur, bin); + ret = arena_run_reg_alloc(bin->runcur, bin_info); if (run != NULL) { arena_chunk_t *chunk; /* * arena_run_alloc() may have allocated run, or it may - * have pulled it from the bin's run tree. Therefore + * have pulled run from the bin's run tree. Therefore * it is unsafe to make any assumptions about how run * has previously been used, and arena_bin_lower_run() * must be called, as if a region were just deallocated * from the run. */ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - if (run->nfree == bin->nregs) + if (run->nfree == bin_info->nregs) arena_dalloc_bin_run(arena, chunk, run, bin); else arena_bin_lower_run(arena, chunk, run, bin); @@ -1296,10 +1346,10 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) bin->runcur = run; - assert(bin->runcur->magic == ARENA_RUN_MAGIC); + dassert(bin->runcur->magic == ARENA_RUN_MAGIC); assert(bin->runcur->nfree > 0); - return (arena_run_reg_alloc(bin->runcur, bin)); + return (arena_run_reg_alloc(bin->runcur, bin_info)); } #ifdef JEMALLOC_PROF @@ -1339,18 +1389,19 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind #endif bin = &arena->bins[binind]; malloc_mutex_lock(&bin->lock); - for (i = 0, nfill = (tbin->ncached_max >> 1); i < nfill; i++) { + for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >> + tbin->lg_fill_div); i < nfill; i++) { if ((run = bin->runcur) != NULL && run->nfree > 0) - ptr = arena_run_reg_alloc(run, bin); + ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]); else ptr = arena_bin_malloc_hard(arena, bin); if (ptr == NULL) break; - *(void **)ptr = tbin->avail; - tbin->avail = ptr; + /* Insert such that low regions get used first. */ + tbin->avail[nfill - 1 - i] = ptr; } #ifdef JEMALLOC_STATS - bin->stats.allocated += (i - tbin->ncached) * bin->reg_size; + bin->stats.allocated += i * arena_bin_info[binind].reg_size; bin->stats.nmalloc += i; bin->stats.nrequests += tbin->tstats.nrequests; bin->stats.nfills++; @@ -1358,119 +1409,9 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind #endif malloc_mutex_unlock(&bin->lock); tbin->ncached = i; - if (tbin->ncached > tbin->high_water) - tbin->high_water = tbin->ncached; } #endif -/* - * Calculate bin->run_size such that it meets the following constraints: - * - * *) bin->run_size >= min_run_size - * *) bin->run_size <= arena_maxclass - * *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed). - * *) run header size < PAGE_SIZE - * - * bin->nregs and bin->reg0_offset are also calculated here, since these - * settings are all interdependent. - */ -static size_t -arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size) -{ - size_t try_run_size, good_run_size; - uint32_t try_nregs, good_nregs; - uint32_t try_hdr_size, good_hdr_size; -#ifdef JEMALLOC_PROF - uint32_t try_ctx0_offset, good_ctx0_offset; -#endif - uint32_t try_reg0_offset, good_reg0_offset; - - assert(min_run_size >= PAGE_SIZE); - assert(min_run_size <= arena_maxclass); - - /* - * Calculate known-valid settings before entering the run_size - * expansion loop, so that the first part of the loop always copies - * valid settings. - * - * The do..while loop iteratively reduces the number of regions until - * the run header and the regions no longer overlap. A closed formula - * would be quite messy, since there is an interdependency between the - * header's mask length and the number of regions. - */ - try_run_size = min_run_size; - try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin->reg_size) - + 1; /* Counter-act try_nregs-- in loop. */ - do { - try_nregs--; - try_hdr_size = sizeof(arena_run_t); -#ifdef JEMALLOC_PROF - if (opt_prof && prof_promote == false) { - /* Pad to a quantum boundary. */ - try_hdr_size = QUANTUM_CEILING(try_hdr_size); - try_ctx0_offset = try_hdr_size; - /* Add space for one (prof_ctx_t *) per region. */ - try_hdr_size += try_nregs * sizeof(prof_ctx_t *); - } else - try_ctx0_offset = 0; -#endif - try_reg0_offset = try_run_size - (try_nregs * bin->reg_size); - } while (try_hdr_size > try_reg0_offset); - - /* run_size expansion loop. */ - do { - /* - * Copy valid settings before trying more aggressive settings. - */ - good_run_size = try_run_size; - good_nregs = try_nregs; - good_hdr_size = try_hdr_size; -#ifdef JEMALLOC_PROF - good_ctx0_offset = try_ctx0_offset; -#endif - good_reg0_offset = try_reg0_offset; - - /* Try more aggressive settings. */ - try_run_size += PAGE_SIZE; - try_nregs = ((try_run_size - sizeof(arena_run_t)) / - bin->reg_size) + 1; /* Counter-act try_nregs-- in loop. */ - do { - try_nregs--; - try_hdr_size = sizeof(arena_run_t); -#ifdef JEMALLOC_PROF - if (opt_prof && prof_promote == false) { - /* Pad to a quantum boundary. */ - try_hdr_size = QUANTUM_CEILING(try_hdr_size); - try_ctx0_offset = try_hdr_size; - /* - * Add space for one (prof_ctx_t *) per region. - */ - try_hdr_size += try_nregs * - sizeof(prof_ctx_t *); - } -#endif - try_reg0_offset = try_run_size - (try_nregs * - bin->reg_size); - } while (try_hdr_size > try_reg0_offset); - } while (try_run_size <= arena_maxclass - && try_run_size <= arena_maxclass - && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX - && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size - && try_hdr_size < PAGE_SIZE); - - assert(good_hdr_size <= good_reg0_offset); - - /* Copy final settings. */ - bin->run_size = good_run_size; - bin->nregs = good_nregs; -#ifdef JEMALLOC_PROF - bin->ctx0_offset = good_ctx0_offset; -#endif - bin->reg0_offset = good_reg0_offset; - - return (good_run_size); -} - void * arena_malloc_small(arena_t *arena, size_t size, bool zero) { @@ -1479,14 +1420,14 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) arena_run_t *run; size_t binind; - binind = small_size2bin[size]; + binind = SMALL_SIZE2BIN(size); assert(binind < nbins); bin = &arena->bins[binind]; - size = bin->reg_size; + size = arena_bin_info[binind].reg_size; malloc_mutex_lock(&bin->lock); if ((run = bin->runcur) != NULL && run->nfree > 0) - ret = arena_run_reg_alloc(run, bin); + ret = arena_run_reg_alloc(run, &arena_bin_info[binind]); else ret = arena_bin_malloc_hard(arena, bin); @@ -1690,11 +1631,13 @@ arena_salloc(const void *ptr) arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << PAGE_SHIFT)); - assert(run->magic == ARENA_RUN_MAGIC); + dassert(run->magic == ARENA_RUN_MAGIC); + size_t binind = arena_bin_index(chunk->arena, run->bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)run->bin->reg0_offset)) % run->bin->reg_size == + (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_size == 0); - ret = run->bin->reg_size; + ret = bin_info->reg_size; } else { assert(((uintptr_t)ptr & PAGE_MASK) == 0); ret = mapbits & ~PAGE_MASK; @@ -1714,10 +1657,11 @@ arena_prof_promoted(const void *ptr, size_t size) assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); assert(isalloc(ptr) == PAGE_SIZE); + assert(size <= small_maxclass); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; - binind = small_size2bin[size]; + binind = SMALL_SIZE2BIN(size); assert(binind < nbins); chunk->map[pageind-map_bias].bits = (chunk->map[pageind-map_bias].bits & ~CHUNK_MAP_CLASS_MASK) | ((binind+1) << CHUNK_MAP_CLASS_SHIFT); @@ -1741,11 +1685,13 @@ arena_salloc_demote(const void *ptr) arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << PAGE_SHIFT)); - assert(run->magic == ARENA_RUN_MAGIC); + dassert(run->magic == ARENA_RUN_MAGIC); + size_t binind = arena_bin_index(chunk->arena, run->bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)run->bin->reg0_offset)) % run->bin->reg_size == + (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_size == 0); - ret = run->bin->reg_size; + ret = bin_info->reg_size; } else { assert(((uintptr_t)ptr & PAGE_MASK) == 0); ret = mapbits & ~PAGE_MASK; @@ -1754,7 +1700,7 @@ arena_salloc_demote(const void *ptr) size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >> CHUNK_MAP_CLASS_SHIFT) - 1; assert(binind < nbins); - ret = chunk->arena->bins[binind].reg_size; + ret = arena_bin_info[binind].reg_size; } assert(ret != 0); } @@ -1771,17 +1717,22 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, /* Dissociate run from bin. */ if (run == bin->runcur) bin->runcur = NULL; - else if (bin->nregs != 1) { - size_t run_pageind = (((uintptr_t)run - (uintptr_t)chunk)) >> - PAGE_SHIFT; - arena_chunk_map_t *run_mapelm = - &chunk->map[run_pageind-map_bias]; - /* - * This block's conditional is necessary because if the run - * only contains one region, then it never gets inserted into - * the non-full runs tree. - */ - arena_run_tree_remove(&bin->runs, run_mapelm); + else { + size_t binind = arena_bin_index(chunk->arena, bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; + + if (bin_info->nregs != 1) { + size_t run_pageind = (((uintptr_t)run - + (uintptr_t)chunk)) >> PAGE_SHIFT; + arena_chunk_map_t *run_mapelm = + &chunk->map[run_pageind-map_bias]; + /* + * This block's conditional is necessary because if the + * run only contains one region, then it never gets + * inserted into the non-full runs tree. + */ + arena_run_tree_remove(&bin->runs, run_mapelm); + } } } @@ -1789,18 +1740,24 @@ static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin) { + size_t binind; + arena_bin_info_t *bin_info; size_t npages, run_ind, past; assert(run != bin->runcur); assert(arena_run_tree_search(&bin->runs, &chunk->map[ (((uintptr_t)run-(uintptr_t)chunk)>>PAGE_SHIFT)-map_bias]) == NULL); + binind = arena_bin_index(chunk->arena, run->bin); + bin_info = &arena_bin_info[binind]; + malloc_mutex_unlock(&bin->lock); /******************************/ - npages = bin->run_size >> PAGE_SHIFT; + npages = bin_info->run_size >> PAGE_SHIFT; run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT); - past = (size_t)((PAGE_CEILING((uintptr_t)run->next) - (uintptr_t)chunk) - >> PAGE_SHIFT); + past = (size_t)(PAGE_CEILING((uintptr_t)run + + (uintptr_t)bin_info->reg0_offset + (uintptr_t)(run->nextind * + bin_info->reg_size) - (uintptr_t)chunk) >> PAGE_SHIFT); malloc_mutex_lock(&arena->lock); /* @@ -1817,7 +1774,7 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, chunk->map[run_ind+npages-1-map_bias].bits = CHUNK_MAP_LARGE | (chunk->map[run_ind+npages-1-map_bias].bits & CHUNK_MAP_FLAGS_MASK); - chunk->map[run_ind-map_bias].bits = bin->run_size | + chunk->map[run_ind-map_bias].bits = bin_info->run_size | CHUNK_MAP_LARGE | (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_FLAGS_MASK); arena_run_trim_tail(arena, chunk, run, (npages << PAGE_SHIFT), @@ -1886,10 +1843,12 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); - assert(run->magic == ARENA_RUN_MAGIC); + dassert(run->magic == ARENA_RUN_MAGIC); bin = run->bin; + size_t binind = arena_bin_index(arena, bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; #if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS)) - size = bin->reg_size; + size = bin_info->reg_size; #endif #ifdef JEMALLOC_FILL @@ -1898,7 +1857,7 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, #endif arena_run_reg_dalloc(run, ptr); - if (run->nfree == bin->nregs) { + if (run->nfree == bin_info->nregs) { arena_dissociate_bin_run(chunk, run, bin); arena_dalloc_bin_run(arena, chunk, run, bin); } else if (run->nfree == 1 && run != bin->runcur) @@ -2132,7 +2091,7 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); arena = chunk->arena; - assert(arena->magic == ARENA_MAGIC); + dassert(arena->magic == ARENA_MAGIC); if (psize < oldsize) { #ifdef JEMALLOC_FILL @@ -2170,11 +2129,11 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, */ if (oldsize <= arena_maxclass) { if (oldsize <= small_maxclass) { - assert(choose_arena()->bins[small_size2bin[ - oldsize]].reg_size == oldsize); + assert(arena_bin_info[SMALL_SIZE2BIN(oldsize)].reg_size + == oldsize); if ((size + extra <= small_maxclass && - small_size2bin[size + extra] == - small_size2bin[oldsize]) || (size <= oldsize && + SMALL_SIZE2BIN(size + extra) == + SMALL_SIZE2BIN(oldsize)) || (size <= oldsize && size + extra >= oldsize)) { #ifdef JEMALLOC_FILL if (opt_junk && size < oldsize) { @@ -2210,24 +2169,29 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, if (ret != NULL) return (ret); - /* * size and oldsize are different enough that we need to move the * object. In that case, fall back to allocating new space and * copying. */ - if (alignment != 0) - ret = ipalloc(size + extra, alignment, zero); - else + if (alignment != 0) { + size_t usize = sa2u(size + extra, alignment, NULL); + if (usize == 0) + return (NULL); + ret = ipalloc(usize, alignment, zero); + } else ret = arena_malloc(size + extra, zero); if (ret == NULL) { if (extra == 0) return (NULL); /* Try again, this time without extra. */ - if (alignment != 0) - ret = ipalloc(size, alignment, zero); - else + if (alignment != 0) { + size_t usize = sa2u(size, alignment, NULL); + if (usize == 0) + return (NULL); + ret = ipalloc(usize, alignment, zero); + } else ret = arena_malloc(size, zero); if (ret == NULL) @@ -2251,9 +2215,9 @@ arena_new(arena_t *arena, unsigned ind) { unsigned i; arena_bin_t *bin; - size_t prev_run_size; arena->ind = ind; + arena->nthreads = 0; if (malloc_mutex_init(&arena->lock)) return (true); @@ -2287,8 +2251,6 @@ arena_new(arena_t *arena, unsigned ind) arena_avail_tree_new(&arena->runs_avail_dirty); /* Initialize bins. */ - prev_run_size = PAGE_SIZE; - i = 0; #ifdef JEMALLOC_TINY /* (2^n)-spaced tiny bins. */ @@ -2298,11 +2260,6 @@ arena_new(arena_t *arena, unsigned ind) return (true); bin->runcur = NULL; arena_run_tree_new(&bin->runs); - - bin->reg_size = (1U << (LG_TINY_MIN + i)); - - prev_run_size = arena_bin_run_size_calc(bin, prev_run_size); - #ifdef JEMALLOC_STATS memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); #endif @@ -2316,11 +2273,6 @@ arena_new(arena_t *arena, unsigned ind) return (true); bin->runcur = NULL; arena_run_tree_new(&bin->runs); - - bin->reg_size = (i - ntbins + 1) << LG_QUANTUM; - - prev_run_size = arena_bin_run_size_calc(bin, prev_run_size); - #ifdef JEMALLOC_STATS memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); #endif @@ -2333,12 +2285,6 @@ arena_new(arena_t *arena, unsigned ind) return (true); bin->runcur = NULL; arena_run_tree_new(&bin->runs); - - bin->reg_size = cspace_min + ((i - (ntbins + nqbins)) << - LG_CACHELINE); - - prev_run_size = arena_bin_run_size_calc(bin, prev_run_size); - #ifdef JEMALLOC_STATS memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); #endif @@ -2351,12 +2297,6 @@ arena_new(arena_t *arena, unsigned ind) return (true); bin->runcur = NULL; arena_run_tree_new(&bin->runs); - - bin->reg_size = sspace_min + ((i - (ntbins + nqbins + ncbins)) - << LG_SUBPAGE); - - prev_run_size = arena_bin_run_size_calc(bin, prev_run_size); - #ifdef JEMALLOC_STATS memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); #endif @@ -2375,40 +2315,39 @@ small_size2bin_validate(void) { size_t i, size, binind; - assert(small_size2bin[0] == 0xffU); i = 1; # ifdef JEMALLOC_TINY /* Tiny. */ for (; i < (1U << LG_TINY_MIN); i++) { size = pow2_ceil(1U << LG_TINY_MIN); binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - assert(small_size2bin[i] == binind); + assert(SMALL_SIZE2BIN(i) == binind); } for (; i < qspace_min; i++) { size = pow2_ceil(i); binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - assert(small_size2bin[i] == binind); + assert(SMALL_SIZE2BIN(i) == binind); } # endif /* Quantum-spaced. */ for (; i <= qspace_max; i++) { size = QUANTUM_CEILING(i); binind = ntbins + (size >> LG_QUANTUM) - 1; - assert(small_size2bin[i] == binind); + assert(SMALL_SIZE2BIN(i) == binind); } /* Cacheline-spaced. */ for (; i <= cspace_max; i++) { size = CACHELINE_CEILING(i); binind = ntbins + nqbins + ((size - cspace_min) >> LG_CACHELINE); - assert(small_size2bin[i] == binind); + assert(SMALL_SIZE2BIN(i) == binind); } /* Sub-page. */ for (; i <= sspace_max; i++) { size = SUBPAGE_CEILING(i); binind = ntbins + nqbins + ncbins + ((size - sspace_min) >> LG_SUBPAGE); - assert(small_size2bin[i] == binind); + assert(SMALL_SIZE2BIN(i) == binind); } } #endif @@ -2419,12 +2358,12 @@ small_size2bin_init(void) if (opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT - || sizeof(const_small_size2bin) != small_maxclass + 1) + || (sizeof(const_small_size2bin) != ((small_maxclass-1) >> + LG_TINY_MIN) + 1)) return (small_size2bin_init_hard()); small_size2bin = const_small_size2bin; #ifdef JEMALLOC_DEBUG - assert(sizeof(const_small_size2bin) == small_maxclass + 1); small_size2bin_validate(); #endif return (false); @@ -2435,49 +2374,52 @@ small_size2bin_init_hard(void) { size_t i, size, binind; uint8_t *custom_small_size2bin; +#define CUSTOM_SMALL_SIZE2BIN(s) \ + custom_small_size2bin[(s-1) >> LG_TINY_MIN] assert(opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT - || sizeof(const_small_size2bin) != small_maxclass + 1); + || (sizeof(const_small_size2bin) != ((small_maxclass-1) >> + LG_TINY_MIN) + 1)); - custom_small_size2bin = (uint8_t *)base_alloc(small_maxclass + 1); + custom_small_size2bin = (uint8_t *) + base_alloc(small_maxclass >> LG_TINY_MIN); if (custom_small_size2bin == NULL) return (true); - custom_small_size2bin[0] = 0xffU; i = 1; #ifdef JEMALLOC_TINY /* Tiny. */ - for (; i < (1U << LG_TINY_MIN); i++) { + for (; i < (1U << LG_TINY_MIN); i += TINY_MIN) { size = pow2_ceil(1U << LG_TINY_MIN); binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - custom_small_size2bin[i] = binind; + CUSTOM_SMALL_SIZE2BIN(i) = binind; } - for (; i < qspace_min; i++) { + for (; i < qspace_min; i += TINY_MIN) { size = pow2_ceil(i); binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - custom_small_size2bin[i] = binind; + CUSTOM_SMALL_SIZE2BIN(i) = binind; } #endif /* Quantum-spaced. */ - for (; i <= qspace_max; i++) { + for (; i <= qspace_max; i += TINY_MIN) { size = QUANTUM_CEILING(i); binind = ntbins + (size >> LG_QUANTUM) - 1; - custom_small_size2bin[i] = binind; + CUSTOM_SMALL_SIZE2BIN(i) = binind; } /* Cacheline-spaced. */ - for (; i <= cspace_max; i++) { + for (; i <= cspace_max; i += TINY_MIN) { size = CACHELINE_CEILING(i); binind = ntbins + nqbins + ((size - cspace_min) >> LG_CACHELINE); - custom_small_size2bin[i] = binind; + CUSTOM_SMALL_SIZE2BIN(i) = binind; } /* Sub-page. */ - for (; i <= sspace_max; i++) { + for (; i <= sspace_max; i += TINY_MIN) { size = SUBPAGE_CEILING(i); binind = ntbins + nqbins + ncbins + ((size - sspace_min) >> LG_SUBPAGE); - custom_small_size2bin[i] = binind; + CUSTOM_SMALL_SIZE2BIN(i) = binind; } small_size2bin = custom_small_size2bin; @@ -2485,6 +2427,190 @@ small_size2bin_init_hard(void) small_size2bin_validate(); #endif return (false); +#undef CUSTOM_SMALL_SIZE2BIN +} + +/* + * Calculate bin_info->run_size such that it meets the following constraints: + * + * *) bin_info->run_size >= min_run_size + * *) bin_info->run_size <= arena_maxclass + * *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed). + * *) bin_info->nregs <= RUN_MAXREGS + * + * bin_info->nregs, bin_info->bitmap_offset, and bin_info->reg0_offset are also + * calculated here, since these settings are all interdependent. + */ +static size_t +bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) +{ + size_t try_run_size, good_run_size; + uint32_t try_nregs, good_nregs; + uint32_t try_hdr_size, good_hdr_size; + uint32_t try_bitmap_offset, good_bitmap_offset; +#ifdef JEMALLOC_PROF + uint32_t try_ctx0_offset, good_ctx0_offset; +#endif + uint32_t try_reg0_offset, good_reg0_offset; + + assert(min_run_size >= PAGE_SIZE); + assert(min_run_size <= arena_maxclass); + + /* + * Calculate known-valid settings before entering the run_size + * expansion loop, so that the first part of the loop always copies + * valid settings. + * + * The do..while loop iteratively reduces the number of regions until + * the run header and the regions no longer overlap. A closed formula + * would be quite messy, since there is an interdependency between the + * header's mask length and the number of regions. + */ + try_run_size = min_run_size; + try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin_info->reg_size) + + 1; /* Counter-act try_nregs-- in loop. */ + if (try_nregs > RUN_MAXREGS) { + try_nregs = RUN_MAXREGS + + 1; /* Counter-act try_nregs-- in loop. */ + } + do { + try_nregs--; + try_hdr_size = sizeof(arena_run_t); + /* Pad to a long boundary. */ + try_hdr_size = LONG_CEILING(try_hdr_size); + try_bitmap_offset = try_hdr_size; + /* Add space for bitmap. */ + try_hdr_size += bitmap_size(try_nregs); +#ifdef JEMALLOC_PROF + if (opt_prof && prof_promote == false) { + /* Pad to a quantum boundary. */ + try_hdr_size = QUANTUM_CEILING(try_hdr_size); + try_ctx0_offset = try_hdr_size; + /* Add space for one (prof_ctx_t *) per region. */ + try_hdr_size += try_nregs * sizeof(prof_ctx_t *); + } else + try_ctx0_offset = 0; +#endif + try_reg0_offset = try_run_size - (try_nregs * + bin_info->reg_size); + } while (try_hdr_size > try_reg0_offset); + + /* run_size expansion loop. */ + do { + /* + * Copy valid settings before trying more aggressive settings. + */ + good_run_size = try_run_size; + good_nregs = try_nregs; + good_hdr_size = try_hdr_size; + good_bitmap_offset = try_bitmap_offset; +#ifdef JEMALLOC_PROF + good_ctx0_offset = try_ctx0_offset; +#endif + good_reg0_offset = try_reg0_offset; + + /* Try more aggressive settings. */ + try_run_size += PAGE_SIZE; + try_nregs = ((try_run_size - sizeof(arena_run_t)) / + bin_info->reg_size) + + 1; /* Counter-act try_nregs-- in loop. */ + if (try_nregs > RUN_MAXREGS) { + try_nregs = RUN_MAXREGS + + 1; /* Counter-act try_nregs-- in loop. */ + } + do { + try_nregs--; + try_hdr_size = sizeof(arena_run_t); + /* Pad to a long boundary. */ + try_hdr_size = LONG_CEILING(try_hdr_size); + try_bitmap_offset = try_hdr_size; + /* Add space for bitmap. */ + try_hdr_size += bitmap_size(try_nregs); +#ifdef JEMALLOC_PROF + if (opt_prof && prof_promote == false) { + /* Pad to a quantum boundary. */ + try_hdr_size = QUANTUM_CEILING(try_hdr_size); + try_ctx0_offset = try_hdr_size; + /* + * Add space for one (prof_ctx_t *) per region. + */ + try_hdr_size += try_nregs * + sizeof(prof_ctx_t *); + } +#endif + try_reg0_offset = try_run_size - (try_nregs * + bin_info->reg_size); + } while (try_hdr_size > try_reg0_offset); + } while (try_run_size <= arena_maxclass + && try_run_size <= arena_maxclass + && RUN_MAX_OVRHD * (bin_info->reg_size << 3) > RUN_MAX_OVRHD_RELAX + && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size + && try_nregs < RUN_MAXREGS); + + assert(good_hdr_size <= good_reg0_offset); + + /* Copy final settings. */ + bin_info->run_size = good_run_size; + bin_info->nregs = good_nregs; + bin_info->bitmap_offset = good_bitmap_offset; +#ifdef JEMALLOC_PROF + bin_info->ctx0_offset = good_ctx0_offset; +#endif + bin_info->reg0_offset = good_reg0_offset; + + return (good_run_size); +} + +static bool +bin_info_init(void) +{ + arena_bin_info_t *bin_info; + unsigned i; + size_t prev_run_size; + + arena_bin_info = base_alloc(sizeof(arena_bin_info_t) * nbins); + if (arena_bin_info == NULL) + return (true); + + prev_run_size = PAGE_SIZE; + i = 0; +#ifdef JEMALLOC_TINY + /* (2^n)-spaced tiny bins. */ + for (; i < ntbins; i++) { + bin_info = &arena_bin_info[i]; + bin_info->reg_size = (1U << (LG_TINY_MIN + i)); + prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); + bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); + } +#endif + + /* Quantum-spaced bins. */ + for (; i < ntbins + nqbins; i++) { + bin_info = &arena_bin_info[i]; + bin_info->reg_size = (i - ntbins + 1) << LG_QUANTUM; + prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); + bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); + } + + /* Cacheline-spaced bins. */ + for (; i < ntbins + nqbins + ncbins; i++) { + bin_info = &arena_bin_info[i]; + bin_info->reg_size = cspace_min + ((i - (ntbins + nqbins)) << + LG_CACHELINE); + prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); + bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); + } + + /* Subpage-spaced bins. */ + for (; i < nbins; i++) { + bin_info = &arena_bin_info[i]; + bin_info->reg_size = sspace_min + ((i - (ntbins + nqbins + + ncbins)) << LG_SUBPAGE); + prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); + bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); + } + + return (false); } bool @@ -2545,9 +2671,6 @@ arena_boot(void) abort(); } - if (small_size2bin_init()) - return (true); - /* * Compute the header size such that it is large enough to contain the * page map. The page map is biased to omit entries for the header @@ -2571,5 +2694,11 @@ arena_boot(void) arena_maxclass = chunksize - (map_bias << PAGE_SHIFT); + if (small_size2bin_init()) + return (true); + + if (bin_info_init()) + return (true); + return (false); } diff --git a/dep/jemalloc/src/atomic.c b/dep/jemalloc/src/atomic.c new file mode 100644 index 00000000000..77ee313113b --- /dev/null +++ b/dep/jemalloc/src/atomic.c @@ -0,0 +1,2 @@ +#define JEMALLOC_ATOMIC_C_ +#include "jemalloc/internal/jemalloc_internal.h" diff --git a/dep/jemalloc/src/bitmap.c b/dep/jemalloc/src/bitmap.c new file mode 100644 index 00000000000..b47e2629093 --- /dev/null +++ b/dep/jemalloc/src/bitmap.c @@ -0,0 +1,90 @@ +#define JEMALLOC_BITMAP_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static size_t bits2groups(size_t nbits); + +/******************************************************************************/ + +static size_t +bits2groups(size_t nbits) +{ + + return ((nbits >> LG_BITMAP_GROUP_NBITS) + + !!(nbits & BITMAP_GROUP_NBITS_MASK)); +} + +void +bitmap_info_init(bitmap_info_t *binfo, size_t nbits) +{ + unsigned i; + size_t group_count; + + assert(nbits > 0); + assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS)); + + /* + * Compute the number of groups necessary to store nbits bits, and + * progressively work upward through the levels until reaching a level + * that requires only one group. + */ + binfo->levels[0].group_offset = 0; + group_count = bits2groups(nbits); + for (i = 1; group_count > 1; i++) { + assert(i < BITMAP_MAX_LEVELS); + binfo->levels[i].group_offset = binfo->levels[i-1].group_offset + + group_count; + group_count = bits2groups(group_count); + } + binfo->levels[i].group_offset = binfo->levels[i-1].group_offset + + group_count; + binfo->nlevels = i; + binfo->nbits = nbits; +} + +size_t +bitmap_info_ngroups(const bitmap_info_t *binfo) +{ + + return (binfo->levels[binfo->nlevels].group_offset << LG_SIZEOF_BITMAP); +} + +size_t +bitmap_size(size_t nbits) +{ + bitmap_info_t binfo; + + bitmap_info_init(&binfo, nbits); + return (bitmap_info_ngroups(&binfo)); +} + +void +bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo) +{ + size_t extra; + unsigned i; + + /* + * Bits are actually inverted with regard to the external bitmap + * interface, so the bitmap starts out with all 1 bits, except for + * trailing unused bits (if any). Note that each group uses bit 0 to + * correspond to the first logical bit in the group, so extra bits + * are the most significant bits of the last group. + */ + memset(bitmap, 0xffU, binfo->levels[binfo->nlevels].group_offset << + LG_SIZEOF_BITMAP); + extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK)) + & BITMAP_GROUP_NBITS_MASK; + if (extra != 0) + bitmap[binfo->levels[1].group_offset - 1] >>= extra; + for (i = 1; i < binfo->nlevels; i++) { + size_t group_count = binfo->levels[i].group_offset - + binfo->levels[i-1].group_offset; + extra = (BITMAP_GROUP_NBITS - (group_count & + BITMAP_GROUP_NBITS_MASK)) & BITMAP_GROUP_NBITS_MASK; + if (extra != 0) + bitmap[binfo->levels[i+1].group_offset - 1] >>= extra; + } +} diff --git a/dep/jemalloc/src/chunk.c b/dep/jemalloc/src/chunk.c index 301519e8042..d190c6f49b3 100644 --- a/dep/jemalloc/src/chunk.c +++ b/dep/jemalloc/src/chunk.c @@ -70,7 +70,7 @@ RETURN: #ifdef JEMALLOC_IVSALLOC if (base == false && ret != NULL) { if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) { - chunk_dealloc(ret, size); + chunk_dealloc(ret, size, true); return (NULL); } } @@ -108,7 +108,7 @@ RETURN: } void -chunk_dealloc(void *chunk, size_t size) +chunk_dealloc(void *chunk, size_t size, bool unmap) { assert(chunk != NULL); @@ -125,15 +125,17 @@ chunk_dealloc(void *chunk, size_t size) malloc_mutex_unlock(&chunks_mtx); #endif + if (unmap) { #ifdef JEMALLOC_SWAP - if (swap_enabled && chunk_dealloc_swap(chunk, size) == false) - return; + if (swap_enabled && chunk_dealloc_swap(chunk, size) == false) + return; #endif #ifdef JEMALLOC_DSS - if (chunk_dealloc_dss(chunk, size) == false) - return; + if (chunk_dealloc_dss(chunk, size) == false) + return; #endif - chunk_dealloc_mmap(chunk, size); + chunk_dealloc_mmap(chunk, size); + } } bool diff --git a/dep/jemalloc/src/chunk_mmap.c b/dep/jemalloc/src/chunk_mmap.c index bc367559774..164e86e7b38 100644 --- a/dep/jemalloc/src/chunk_mmap.c +++ b/dep/jemalloc/src/chunk_mmap.c @@ -206,13 +206,15 @@ chunk_alloc_mmap_internal(size_t size, bool noreserve) void * chunk_alloc_mmap(size_t size) { - return chunk_alloc_mmap_internal(size, false); + + return (chunk_alloc_mmap_internal(size, false)); } void * chunk_alloc_mmap_noreserve(size_t size) { - return chunk_alloc_mmap_internal(size, true); + + return (chunk_alloc_mmap_internal(size, true)); } void diff --git a/dep/jemalloc/src/ckh.c b/dep/jemalloc/src/ckh.c index 682a8db65bf..43fcc25239d 100644 --- a/dep/jemalloc/src/ckh.c +++ b/dep/jemalloc/src/ckh.c @@ -34,7 +34,7 @@ * respectively. * ******************************************************************************/ -#define CKH_C_ +#define JEMALLOC_CKH_C_ #include "jemalloc/internal/jemalloc_internal.h" /******************************************************************************/ @@ -73,7 +73,7 @@ ckh_isearch(ckh_t *ckh, const void *key) size_t hash1, hash2, bucket, cell; assert(ckh != NULL); - assert(ckh->magic = CKH_MAGIG); + dassert(ckh->magic == CKH_MAGIC); ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2); @@ -262,9 +262,15 @@ ckh_grow(ckh_t *ckh) lg_prevbuckets = ckh->lg_curbuckets; lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS; while (true) { + size_t usize; + lg_curcells++; - tab = (ckhc_t *)ipalloc(sizeof(ckhc_t) << lg_curcells, - ZU(1) << LG_CACHELINE, true); + usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL); + if (usize == 0) { + ret = true; + goto RETURN; + } + tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); if (tab == NULL) { ret = true; goto RETURN; @@ -295,7 +301,7 @@ static void ckh_shrink(ckh_t *ckh) { ckhc_t *tab, *ttab; - size_t lg_curcells; + size_t lg_curcells, usize; unsigned lg_prevbuckets; /* @@ -304,8 +310,10 @@ ckh_shrink(ckh_t *ckh) */ lg_prevbuckets = ckh->lg_curbuckets; lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1; - tab = (ckhc_t *)ipalloc(sizeof(ckhc_t) << lg_curcells, - ZU(1) << LG_CACHELINE, true); + usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL); + if (usize == 0) + return; + tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); if (tab == NULL) { /* * An OOM error isn't worth propagating, since it doesn't @@ -340,7 +348,7 @@ bool ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) { bool ret; - size_t mincells; + size_t mincells, usize; unsigned lg_mincells; assert(minitems > 0); @@ -375,15 +383,19 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) ckh->hash = hash; ckh->keycomp = keycomp; - ckh->tab = (ckhc_t *)ipalloc(sizeof(ckhc_t) << lg_mincells, - (ZU(1) << LG_CACHELINE), true); + usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE, NULL); + if (usize == 0) { + ret = true; + goto RETURN; + } + ckh->tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); if (ckh->tab == NULL) { ret = true; goto RETURN; } #ifdef JEMALLOC_DEBUG - ckh->magic = CKH_MAGIG; + ckh->magic = CKH_MAGIC; #endif ret = false; @@ -396,7 +408,7 @@ ckh_delete(ckh_t *ckh) { assert(ckh != NULL); - assert(ckh->magic = CKH_MAGIG); + dassert(ckh->magic == CKH_MAGIC); #ifdef CKH_VERBOSE malloc_printf( @@ -421,7 +433,7 @@ ckh_count(ckh_t *ckh) { assert(ckh != NULL); - assert(ckh->magic = CKH_MAGIG); + dassert(ckh->magic == CKH_MAGIC); return (ckh->count); } @@ -452,7 +464,7 @@ ckh_insert(ckh_t *ckh, const void *key, const void *data) bool ret; assert(ckh != NULL); - assert(ckh->magic = CKH_MAGIG); + dassert(ckh->magic == CKH_MAGIC); assert(ckh_search(ckh, key, NULL, NULL)); #ifdef CKH_COUNT @@ -477,7 +489,7 @@ ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data) size_t cell; assert(ckh != NULL); - assert(ckh->magic = CKH_MAGIG); + dassert(ckh->magic == CKH_MAGIC); cell = ckh_isearch(ckh, searchkey); if (cell != SIZE_T_MAX) { @@ -509,7 +521,7 @@ ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data) size_t cell; assert(ckh != NULL); - assert(ckh->magic = CKH_MAGIG); + dassert(ckh->magic == CKH_MAGIC); cell = ckh_isearch(ckh, searchkey); if (cell != SIZE_T_MAX) { @@ -544,7 +556,7 @@ ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) } else { ret1 = h; ret2 = hash(key, strlen((const char *)key), - 0x8432a476666bbc13U); + 0x8432a476666bbc13LLU); } *hash1 = ret1; diff --git a/dep/jemalloc/src/ctl.c b/dep/jemalloc/src/ctl.c index 3c8adab90a3..e5336d36949 100644 --- a/dep/jemalloc/src/ctl.c +++ b/dep/jemalloc/src/ctl.c @@ -182,6 +182,7 @@ CTL_PROTO(stats_arenas_i_lruns_j_highruns) CTL_PROTO(stats_arenas_i_lruns_j_curruns) INDEX_PROTO(stats_arenas_i_lruns_j) #endif +CTL_PROTO(stats_arenas_i_nthreads) CTL_PROTO(stats_arenas_i_pactive) CTL_PROTO(stats_arenas_i_pdirty) #ifdef JEMALLOC_STATS @@ -192,6 +193,7 @@ CTL_PROTO(stats_arenas_i_purged) #endif INDEX_PROTO(stats_arenas_i) #ifdef JEMALLOC_STATS +CTL_PROTO(stats_cactive) CTL_PROTO(stats_allocated) CTL_PROTO(stats_active) CTL_PROTO(stats_mapped) @@ -434,6 +436,7 @@ static const ctl_node_t stats_arenas_i_lruns_node[] = { #endif static const ctl_node_t stats_arenas_i_node[] = { + {NAME("nthreads"), CTL(stats_arenas_i_nthreads)}, {NAME("pactive"), CTL(stats_arenas_i_pactive)}, {NAME("pdirty"), CTL(stats_arenas_i_pdirty)} #ifdef JEMALLOC_STATS @@ -458,6 +461,7 @@ static const ctl_node_t stats_arenas_node[] = { static const ctl_node_t stats_node[] = { #ifdef JEMALLOC_STATS + {NAME("cactive"), CTL(stats_cactive)}, {NAME("allocated"), CTL(stats_allocated)}, {NAME("active"), CTL(stats_active)}, {NAME("mapped"), CTL(stats_mapped)}, @@ -620,6 +624,7 @@ ctl_arena_refresh(arena_t *arena, unsigned i) ctl_arena_clear(astats); + sstats->nthreads += astats->nthreads; #ifdef JEMALLOC_STATS ctl_arena_stats_amerge(astats, arena); /* Merge into sum stats as well. */ @@ -657,10 +662,17 @@ ctl_refresh(void) * Clear sum stats, since they will be merged into by * ctl_arena_refresh(). */ + ctl_stats.arenas[narenas].nthreads = 0; ctl_arena_clear(&ctl_stats.arenas[narenas]); malloc_mutex_lock(&arenas_lock); memcpy(tarenas, arenas, sizeof(arena_t *) * narenas); + for (i = 0; i < narenas; i++) { + if (arenas[i] != NULL) + ctl_stats.arenas[i].nthreads = arenas[i]->nthreads; + else + ctl_stats.arenas[i].nthreads = 0; + } malloc_mutex_unlock(&arenas_lock); for (i = 0; i < narenas; i++) { bool initialized = (tarenas[i] != NULL); @@ -1114,8 +1126,8 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, unsigned newind, oldind; newind = oldind = choose_arena()->ind; - WRITE(oldind, unsigned); - READ(newind, unsigned); + WRITE(newind, unsigned); + READ(oldind, unsigned); if (newind != oldind) { arena_t *arena; @@ -1129,6 +1141,8 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, malloc_mutex_lock(&arenas_lock); if ((arena = arenas[newind]) == NULL) arena = arenas_extend(newind); + arenas[oldind]->nthreads--; + arenas[newind]->nthreads++; malloc_mutex_unlock(&arenas_lock); if (arena == NULL) { ret = EAGAIN; @@ -1137,6 +1151,13 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, /* Set new arena association. */ ARENA_SET(arena); +#ifdef JEMALLOC_TCACHE + { + tcache_t *tcache = TCACHE_GET(); + if (tcache != NULL) + tcache->arena = arena; + } +#endif } ret = 0; @@ -1146,9 +1167,9 @@ RETURN: #ifdef JEMALLOC_STATS CTL_RO_NL_GEN(thread_allocated, ALLOCATED_GET(), uint64_t); -CTL_RO_NL_GEN(thread_allocatedp, &ALLOCATED_GET(), uint64_t *); +CTL_RO_NL_GEN(thread_allocatedp, ALLOCATEDP_GET(), uint64_t *); CTL_RO_NL_GEN(thread_deallocated, DEALLOCATED_GET(), uint64_t); -CTL_RO_NL_GEN(thread_deallocatedp, &DEALLOCATED_GET(), uint64_t *); +CTL_RO_NL_GEN(thread_deallocatedp, DEALLOCATEDP_GET(), uint64_t *); #endif /******************************************************************************/ @@ -1284,9 +1305,9 @@ CTL_RO_NL_GEN(opt_overcommit, opt_overcommit, bool) /******************************************************************************/ -CTL_RO_NL_GEN(arenas_bin_i_size, arenas[0]->bins[mib[2]].reg_size, size_t) -CTL_RO_NL_GEN(arenas_bin_i_nregs, arenas[0]->bins[mib[2]].nregs, uint32_t) -CTL_RO_NL_GEN(arenas_bin_i_run_size, arenas[0]->bins[mib[2]].run_size, size_t) +CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t) +CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t) +CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t) const ctl_node_t * arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) { @@ -1531,6 +1552,7 @@ stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j) } #endif +CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned) CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t) CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t) #ifdef JEMALLOC_STATS @@ -1562,6 +1584,7 @@ RETURN: } #ifdef JEMALLOC_STATS +CTL_RO_GEN(stats_cactive, &stats_cactive, size_t *) CTL_RO_GEN(stats_allocated, ctl_stats.allocated, size_t) CTL_RO_GEN(stats_active, ctl_stats.active, size_t) CTL_RO_GEN(stats_mapped, ctl_stats.mapped, size_t) diff --git a/dep/jemalloc/src/hash.c b/dep/jemalloc/src/hash.c index 6a13d7a03c0..cfa4da0275c 100644 --- a/dep/jemalloc/src/hash.c +++ b/dep/jemalloc/src/hash.c @@ -1,2 +1,2 @@ -#define HASH_C_ +#define JEMALLOC_HASH_C_ #include "jemalloc/internal/jemalloc_internal.h" diff --git a/dep/jemalloc/src/huge.c b/dep/jemalloc/src/huge.c index 0aadc4339a9..a4f9b054ed5 100644 --- a/dep/jemalloc/src/huge.c +++ b/dep/jemalloc/src/huge.c @@ -50,6 +50,7 @@ huge_malloc(size_t size, bool zero) malloc_mutex_lock(&huge_mtx); extent_tree_ad_insert(&huge, node); #ifdef JEMALLOC_STATS + stats_cactive_add(csize); huge_nmalloc++; huge_allocated += csize; #endif @@ -83,7 +84,7 @@ huge_palloc(size_t size, size_t alignment, bool zero) * alignment, in order to assure the alignment can be achieved, then * unmap leading and trailing chunks. */ - assert(alignment >= chunksize); + assert(alignment > chunksize); chunk_size = CHUNK_CEILING(size); @@ -109,12 +110,12 @@ huge_palloc(size_t size, size_t alignment, bool zero) if (offset == 0) { /* Trim trailing space. */ chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size - - chunk_size); + - chunk_size, true); } else { size_t trailsize; /* Trim leading space. */ - chunk_dealloc(ret, alignment - offset); + chunk_dealloc(ret, alignment - offset, true); ret = (void *)((uintptr_t)ret + (alignment - offset)); @@ -123,7 +124,7 @@ huge_palloc(size_t size, size_t alignment, bool zero) /* Trim trailing space. */ assert(trailsize < alloc_size); chunk_dealloc((void *)((uintptr_t)ret + chunk_size), - trailsize); + trailsize, true); } } @@ -134,6 +135,7 @@ huge_palloc(size_t size, size_t alignment, bool zero) malloc_mutex_lock(&huge_mtx); extent_tree_ad_insert(&huge, node); #ifdef JEMALLOC_STATS + stats_cactive_add(chunk_size); huge_nmalloc++; huge_allocated += chunk_size; #endif @@ -192,7 +194,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, * different size class. In that case, fall back to allocating new * space and copying. */ - if (alignment != 0) + if (alignment > chunksize) ret = huge_palloc(size + extra, alignment, zero); else ret = huge_malloc(size + extra, zero); @@ -201,7 +203,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, if (extra == 0) return (NULL); /* Try again, this time without extra. */ - if (alignment != 0) + if (alignment > chunksize) ret = huge_palloc(size, alignment, zero); else ret = huge_malloc(size, zero); @@ -232,6 +234,13 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, ) { size_t newsize = huge_salloc(ret); + /* + * Remove ptr from the tree of huge allocations before + * performing the remap operation, in order to avoid the + * possibility of another thread acquiring that mapping before + * this one removes it from the tree. + */ + huge_dalloc(ptr, false); if (mremap(ptr, oldsize, newsize, MREMAP_MAYMOVE|MREMAP_FIXED, ret) == MAP_FAILED) { /* @@ -251,9 +260,8 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, if (opt_abort) abort(); memcpy(ret, ptr, copysize); - idalloc(ptr); - } else - huge_dalloc(ptr, false); + chunk_dealloc_mmap(ptr, oldsize); + } } else #endif { @@ -278,6 +286,7 @@ huge_dalloc(void *ptr, bool unmap) extent_tree_ad_remove(&huge, node); #ifdef JEMALLOC_STATS + stats_cactive_sub(node->size); huge_ndalloc++; huge_allocated -= node->size; #endif @@ -292,9 +301,10 @@ huge_dalloc(void *ptr, bool unmap) memset(node->addr, 0x5a, node->size); #endif #endif - chunk_dealloc(node->addr, node->size); } + chunk_dealloc(node->addr, node->size, unmap); + base_node_dealloc(node); } diff --git a/dep/jemalloc/src/jemalloc.c b/dep/jemalloc/src/jemalloc.c index 2aebc51dd19..a161c2e26e1 100644 --- a/dep/jemalloc/src/jemalloc.c +++ b/dep/jemalloc/src/jemalloc.c @@ -7,12 +7,10 @@ malloc_mutex_t arenas_lock; arena_t **arenas; unsigned narenas; -static unsigned next_arena; +pthread_key_t arenas_tsd; #ifndef NO_TLS __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); -#else -pthread_key_t arenas_tsd; #endif #ifdef JEMALLOC_STATS @@ -30,7 +28,13 @@ static bool malloc_initialized = false; static pthread_t malloc_initializer = (unsigned long)0; /* Used to avoid initialization races. */ -static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; +static malloc_mutex_t init_lock = +#ifdef JEMALLOC_OSSPIN + 0 +#else + MALLOC_MUTEX_INITIALIZER +#endif + ; #ifdef DYNAMIC_PAGE_SHIFT size_t pagesize; @@ -70,6 +74,7 @@ size_t opt_narenas = 0; static void wrtmessage(void *cbopaque, const char *s); static void stats_print_atexit(void); static unsigned malloc_ncpus(void); +static void arenas_cleanup(void *arg); #if (defined(JEMALLOC_STATS) && defined(NO_TLS)) static void thread_allocated_cleanup(void *arg); #endif @@ -79,6 +84,7 @@ static void malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v, size_t vlen); static void malloc_conf_init(void); static bool malloc_init_hard(void); +static int imemalign(void **memptr, size_t alignment, size_t size); /******************************************************************************/ /* malloc_message() setup. */ @@ -147,13 +153,53 @@ choose_arena_hard(void) arena_t *ret; if (narenas > 1) { + unsigned i, choose, first_null; + + choose = 0; + first_null = narenas; malloc_mutex_lock(&arenas_lock); - if ((ret = arenas[next_arena]) == NULL) - ret = arenas_extend(next_arena); - next_arena = (next_arena + 1) % narenas; + assert(arenas[0] != NULL); + for (i = 1; i < narenas; i++) { + if (arenas[i] != NULL) { + /* + * Choose the first arena that has the lowest + * number of threads assigned to it. + */ + if (arenas[i]->nthreads < + arenas[choose]->nthreads) + choose = i; + } else if (first_null == narenas) { + /* + * Record the index of the first uninitialized + * arena, in case all extant arenas are in use. + * + * NB: It is possible for there to be + * discontinuities in terms of initialized + * versus uninitialized arenas, due to the + * "thread.arena" mallctl. + */ + first_null = i; + } + } + + if (arenas[choose] == 0 || first_null == narenas) { + /* + * Use an unloaded arena, or the least loaded arena if + * all arenas are already initialized. + */ + ret = arenas[choose]; + } else { + /* Initialize a new arena. */ + ret = arenas_extend(first_null); + } + ret->nthreads++; malloc_mutex_unlock(&arenas_lock); - } else + } else { ret = arenas[0]; + malloc_mutex_lock(&arenas_lock); + ret->nthreads++; + malloc_mutex_unlock(&arenas_lock); + } ARENA_SET(ret); @@ -213,6 +259,28 @@ stats_print_atexit(void) JEMALLOC_P(malloc_stats_print)(NULL, NULL, NULL); } +#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) +thread_allocated_t * +thread_allocated_get_hard(void) +{ + thread_allocated_t *thread_allocated = (thread_allocated_t *) + imalloc(sizeof(thread_allocated_t)); + if (thread_allocated == NULL) { + static thread_allocated_t static_thread_allocated = {0, 0}; + malloc_write("<jemalloc>: Error allocating TSD;" + " mallctl(\"thread.{de,}allocated[p]\", ...)" + " will be inaccurate\n"); + if (opt_abort) + abort(); + return (&static_thread_allocated); + } + pthread_setspecific(thread_allocated_tsd, thread_allocated); + thread_allocated->allocated = 0; + thread_allocated->deallocated = 0; + return (thread_allocated); +} +#endif + /* * End miscellaneous support functions. */ @@ -237,6 +305,16 @@ malloc_ncpus(void) return (ret); } +static void +arenas_cleanup(void *arg) +{ + arena_t *arena = (arena_t *)arg; + + malloc_mutex_lock(&arenas_lock); + arena->nthreads--; + malloc_mutex_unlock(&arenas_lock); +} + #if (defined(JEMALLOC_STATS) && defined(NO_TLS)) static void thread_allocated_cleanup(void *arg) @@ -421,8 +499,8 @@ malloc_conf_init(void) if ((opts = getenv(envname)) != NULL) { /* * Do nothing; opts is already initialized to - * the value of the JEMALLOC_OPTIONS - * environment variable. + * the value of the MALLOC_CONF environment + * variable. */ } else { /* No configuration specified. */ @@ -611,7 +689,7 @@ malloc_init_hard(void) result = sysconf(_SC_PAGESIZE); assert(result != -1); - pagesize = (unsigned)result; + pagesize = (size_t)result; /* * We assume that pagesize is a power of 2 when calculating @@ -671,7 +749,10 @@ malloc_init_hard(void) } #ifdef JEMALLOC_TCACHE - tcache_boot(); + if (tcache_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } #endif if (huge_boot()) { @@ -688,6 +769,14 @@ malloc_init_hard(void) } #endif + if (malloc_mutex_init(&arenas_lock)) + return (true); + + if (pthread_key_create(&arenas_tsd, arenas_cleanup) != 0) { + malloc_mutex_unlock(&init_lock); + return (true); + } + /* * Create enough scaffolding to allow recursive allocation in * malloc_ncpus(). @@ -712,8 +801,7 @@ malloc_init_hard(void) * threaded mode. */ ARENA_SET(arenas[0]); - - malloc_mutex_init(&arenas_lock); + arenas[0]->nthreads++; #ifdef JEMALLOC_PROF if (prof_boot2()) { @@ -753,15 +841,6 @@ malloc_init_hard(void) malloc_write(")\n"); } - next_arena = (narenas > 0) ? 1 : 0; - -#ifdef NO_TLS - if (pthread_key_create(&arenas_tsd, NULL) != 0) { - malloc_mutex_unlock(&init_lock); - return (true); - } -#endif - /* Allocate and initialize arenas. */ arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas); if (arenas == NULL) { @@ -793,7 +872,6 @@ malloc_init_hard(void) return (false); } - #ifdef JEMALLOC_ZONE JEMALLOC_ATTR(constructor) void @@ -862,7 +940,8 @@ JEMALLOC_P(malloc)(size_t size) #ifdef JEMALLOC_PROF if (opt_prof) { usize = s2u(size); - if ((cnt = prof_alloc_prep(usize)) == NULL) { + PROF_ALLOC_PREP(1, usize, cnt); + if (cnt == NULL) { ret = NULL; goto OOM; } @@ -911,19 +990,23 @@ RETURN: } JEMALLOC_ATTR(nonnull(1)) -JEMALLOC_ATTR(visibility("default")) -int -JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) +#ifdef JEMALLOC_PROF +/* + * Avoid any uncertainty as to how many backtrace frames to ignore in + * PROF_ALLOC_PREP(). + */ +JEMALLOC_ATTR(noinline) +#endif +static int +imemalign(void **memptr, size_t alignment, size_t size) { int ret; - void *result; -#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) size_t usize -# ifdef JEMALLOC_CC_SILENCE +#ifdef JEMALLOC_CC_SILENCE = 0 -# endif - ; #endif + ; + void *result; #ifdef JEMALLOC_PROF prof_thr_cnt_t *cnt # ifdef JEMALLOC_CC_SILENCE @@ -973,34 +1056,38 @@ JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) goto RETURN; } + usize = sa2u(size, alignment, NULL); + if (usize == 0) { + result = NULL; + ret = ENOMEM; + goto RETURN; + } + #ifdef JEMALLOC_PROF if (opt_prof) { - usize = sa2u(size, alignment, NULL); - if ((cnt = prof_alloc_prep(usize)) == NULL) { + PROF_ALLOC_PREP(2, usize, cnt); + if (cnt == NULL) { result = NULL; ret = EINVAL; } else { if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= small_maxclass) { - result = ipalloc(small_maxclass+1, - alignment, false); + assert(sa2u(small_maxclass+1, + alignment, NULL) != 0); + result = ipalloc(sa2u(small_maxclass+1, + alignment, NULL), alignment, false); if (result != NULL) { arena_prof_promoted(result, usize); } } else { - result = ipalloc(size, alignment, + result = ipalloc(usize, alignment, false); } } } else #endif - { -#ifdef JEMALLOC_STATS - usize = sa2u(size, alignment, NULL); -#endif - result = ipalloc(size, alignment, false); - } + result = ipalloc(usize, alignment, false); } if (result == NULL) { @@ -1032,6 +1119,15 @@ RETURN: return (ret); } +JEMALLOC_ATTR(nonnull(1)) +JEMALLOC_ATTR(visibility("default")) +int +JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) +{ + + return imemalign(memptr, alignment, size); +} + JEMALLOC_ATTR(malloc) JEMALLOC_ATTR(visibility("default")) void * @@ -1087,7 +1183,8 @@ JEMALLOC_P(calloc)(size_t num, size_t size) #ifdef JEMALLOC_PROF if (opt_prof) { usize = s2u(num_size); - if ((cnt = prof_alloc_prep(usize)) == NULL) { + PROF_ALLOC_PREP(1, usize, cnt); + if (cnt == NULL) { ret = NULL; goto RETURN; } @@ -1200,7 +1297,9 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) if (opt_prof) { usize = s2u(size); old_ctx = prof_ctx_get(ptr); - if ((cnt = prof_alloc_prep(usize)) == NULL) { + PROF_ALLOC_PREP(1, usize, cnt); + if (cnt == NULL) { + old_ctx = NULL; ret = NULL; goto OOM; } @@ -1210,8 +1309,13 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) false, false); if (ret != NULL) arena_prof_promoted(ret, usize); - } else + else + old_ctx = NULL; + } else { ret = iralloc(ptr, size, 0, 0, false, false); + if (ret == NULL) + old_ctx = NULL; + } } else #endif { @@ -1249,7 +1353,8 @@ OOM: #ifdef JEMALLOC_PROF if (opt_prof) { usize = s2u(size); - if ((cnt = prof_alloc_prep(usize)) == NULL) + PROF_ALLOC_PREP(1, usize, cnt); + if (cnt == NULL) ret = NULL; else { if (prof_promote && (uintptr_t)cnt != @@ -1354,7 +1459,7 @@ JEMALLOC_P(memalign)(size_t alignment, size_t size) #ifdef JEMALLOC_CC_SILENCE int result = #endif - JEMALLOC_P(posix_memalign)(&ret, alignment, size); + imemalign(&ret, alignment, size); #ifdef JEMALLOC_CC_SILENCE if (result != 0) return (NULL); @@ -1373,7 +1478,7 @@ JEMALLOC_P(valloc)(size_t size) #ifdef JEMALLOC_CC_SILENCE int result = #endif - JEMALLOC_P(posix_memalign)(&ret, PAGE_SIZE, size); + imemalign(&ret, PAGE_SIZE, size); #ifdef JEMALLOC_CC_SILENCE if (result != 0) return (NULL); @@ -1454,15 +1559,18 @@ JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp, } JEMALLOC_INLINE void * -iallocm(size_t size, size_t alignment, bool zero) +iallocm(size_t usize, size_t alignment, bool zero) { + assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize, alignment, + NULL))); + if (alignment != 0) - return (ipalloc(size, alignment, zero)); + return (ipalloc(usize, alignment, zero)); else if (zero) - return (icalloc(size)); + return (icalloc(usize)); else - return (imalloc(size)); + return (imalloc(usize)); } JEMALLOC_ATTR(nonnull(1)) @@ -1485,38 +1593,43 @@ JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) if (malloc_init()) goto OOM; + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, NULL); + if (usize == 0) + goto OOM; + #ifdef JEMALLOC_PROF if (opt_prof) { - usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, - NULL); - if ((cnt = prof_alloc_prep(usize)) == NULL) + PROF_ALLOC_PREP(1, usize, cnt); + if (cnt == NULL) goto OOM; if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= small_maxclass) { - p = iallocm(small_maxclass+1, alignment, zero); + size_t usize_promoted = (alignment == 0) ? + s2u(small_maxclass+1) : sa2u(small_maxclass+1, + alignment, NULL); + assert(usize_promoted != 0); + p = iallocm(usize_promoted, alignment, zero); if (p == NULL) goto OOM; arena_prof_promoted(p, usize); } else { - p = iallocm(size, alignment, zero); + p = iallocm(usize, alignment, zero); if (p == NULL) goto OOM; } - + prof_malloc(p, usize, cnt); if (rsize != NULL) *rsize = usize; } else #endif { - p = iallocm(size, alignment, zero); + p = iallocm(usize, alignment, zero); if (p == NULL) goto OOM; #ifndef JEMALLOC_STATS if (rsize != NULL) #endif { - usize = (alignment == 0) ? s2u(size) : sa2u(size, - alignment, NULL); #ifdef JEMALLOC_STATS if (rsize != NULL) #endif @@ -1559,7 +1672,6 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, bool no_move = flags & ALLOCM_NO_MOVE; #ifdef JEMALLOC_PROF prof_thr_cnt_t *cnt; - prof_ctx_t *old_ctx; #endif assert(ptr != NULL); @@ -1574,25 +1686,33 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, /* * usize isn't knowable before iralloc() returns when extra is * non-zero. Therefore, compute its maximum possible value and - * use that in prof_alloc_prep() to decide whether to capture a + * use that in PROF_ALLOC_PREP() to decide whether to capture a * backtrace. prof_realloc() will use the actual usize to * decide whether to sample. */ size_t max_usize = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra, alignment, NULL); + prof_ctx_t *old_ctx = prof_ctx_get(p); old_size = isalloc(p); - old_ctx = prof_ctx_get(p); - if ((cnt = prof_alloc_prep(max_usize)) == NULL) + PROF_ALLOC_PREP(1, max_usize, cnt); + if (cnt == NULL) goto OOM; - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && max_usize - <= small_maxclass) { + /* + * Use minimum usize to determine whether promotion may happen. + */ + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U + && ((alignment == 0) ? s2u(size) : sa2u(size, + alignment, NULL)) <= small_maxclass) { q = iralloc(p, small_maxclass+1, (small_maxclass+1 >= size+extra) ? 0 : size+extra - (small_maxclass+1), alignment, zero, no_move); if (q == NULL) goto ERR; - usize = isalloc(q); - arena_prof_promoted(q, usize); + if (max_usize < PAGE_SIZE) { + usize = max_usize; + arena_prof_promoted(q, usize); + } else + usize = isalloc(q); } else { q = iralloc(p, size, extra, alignment, zero, no_move); if (q == NULL) @@ -1600,6 +1720,8 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, usize = isalloc(q); } prof_realloc(q, usize, cnt, old_size, old_ctx); + if (rsize != NULL) + *rsize = usize; } else #endif { diff --git a/dep/jemalloc/src/mb.c b/dep/jemalloc/src/mb.c index 30a1a2e997a..dc2c0a256fd 100644 --- a/dep/jemalloc/src/mb.c +++ b/dep/jemalloc/src/mb.c @@ -1,2 +1,2 @@ -#define MB_C_ +#define JEMALLOC_MB_C_ #include "jemalloc/internal/jemalloc_internal.h" diff --git a/dep/jemalloc/src/mutex.c b/dep/jemalloc/src/mutex.c index 3ecb18a340e..ca89ef1c962 100644 --- a/dep/jemalloc/src/mutex.c +++ b/dep/jemalloc/src/mutex.c @@ -55,6 +55,9 @@ pthread_create(pthread_t *__restrict thread, bool malloc_mutex_init(malloc_mutex_t *mutex) { +#ifdef JEMALLOC_OSSPIN + *mutex = 0; +#else pthread_mutexattr_t attr; if (pthread_mutexattr_init(&attr) != 0) @@ -70,6 +73,7 @@ malloc_mutex_init(malloc_mutex_t *mutex) } pthread_mutexattr_destroy(&attr); +#endif return (false); } @@ -77,8 +81,10 @@ void malloc_mutex_destroy(malloc_mutex_t *mutex) { +#ifndef JEMALLOC_OSSPIN if (pthread_mutex_destroy(mutex) != 0) { malloc_write("<jemalloc>: Error in pthread_mutex_destroy()\n"); abort(); } +#endif } diff --git a/dep/jemalloc/src/prof.c b/dep/jemalloc/src/prof.c index 636cccef52a..8a144b4e46c 100644 --- a/dep/jemalloc/src/prof.c +++ b/dep/jemalloc/src/prof.c @@ -3,15 +3,15 @@ #ifdef JEMALLOC_PROF /******************************************************************************/ -#ifdef JEMALLOC_PROF_LIBGCC -#include <unwind.h> -#endif - #ifdef JEMALLOC_PROF_LIBUNWIND #define UNW_LOCAL_ONLY #include <libunwind.h> #endif +#ifdef JEMALLOC_PROF_LIBGCC +#include <unwind.h> +#endif + /******************************************************************************/ /* Data. */ @@ -169,39 +169,7 @@ prof_leave(void) prof_gdump(); } -#ifdef JEMALLOC_PROF_LIBGCC -static _Unwind_Reason_Code -prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) -{ - - return (_URC_NO_REASON); -} - -static _Unwind_Reason_Code -prof_unwind_callback(struct _Unwind_Context *context, void *arg) -{ - prof_unwind_data_t *data = (prof_unwind_data_t *)arg; - - if (data->nignore > 0) - data->nignore--; - else { - data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context); - data->bt->len++; - if (data->bt->len == data->max) - return (_URC_END_OF_STACK); - } - - return (_URC_NO_REASON); -} - -void -prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) -{ - prof_unwind_data_t data = {bt, nignore, max}; - - _Unwind_Backtrace(prof_unwind_callback, &data); -} -#elif defined(JEMALLOC_PROF_LIBUNWIND) +#ifdef JEMALLOC_PROF_LIBUNWIND void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) { @@ -236,7 +204,41 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) break; } } -#else +#endif +#ifdef JEMALLOC_PROF_LIBGCC +static _Unwind_Reason_Code +prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) +{ + + return (_URC_NO_REASON); +} + +static _Unwind_Reason_Code +prof_unwind_callback(struct _Unwind_Context *context, void *arg) +{ + prof_unwind_data_t *data = (prof_unwind_data_t *)arg; + + if (data->nignore > 0) + data->nignore--; + else { + data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context); + data->bt->len++; + if (data->bt->len == data->max) + return (_URC_END_OF_STACK); + } + + return (_URC_NO_REASON); +} + +void +prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) +{ + prof_unwind_data_t data = {bt, nignore, max}; + + _Unwind_Backtrace(prof_unwind_callback, &data); +} +#endif +#ifdef JEMALLOC_PROF_GCC void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) { @@ -432,6 +434,7 @@ prof_lookup(prof_bt_t *bt) prof_ctx_t *p; void *v; } ctx; + bool new_ctx; /* * This thread's cache lacks bt. Look for it in the global @@ -468,12 +471,26 @@ prof_lookup(prof_bt_t *bt) idalloc(ctx.v); return (NULL); } + /* + * Artificially raise curobjs, in order to avoid a race + * condition with prof_ctx_merge()/prof_ctx_destroy(). + * + * No locking is necessary for ctx here because no other + * threads have had the opportunity to fetch it from + * bt2ctx yet. + */ + ctx.p->cnt_merged.curobjs++; + new_ctx = true; + } else { + /* + * Artificially raise curobjs, in order to avoid a race + * condition with prof_ctx_merge()/prof_ctx_destroy(). + */ + malloc_mutex_lock(&ctx.p->lock); + ctx.p->cnt_merged.curobjs++; + malloc_mutex_unlock(&ctx.p->lock); + new_ctx = false; } - /* - * Acquire ctx's lock before releasing bt2ctx_mtx, in order to - * avoid a race condition with prof_ctx_destroy(). - */ - malloc_mutex_lock(&ctx.p->lock); prof_leave(); /* Link a prof_thd_cnt_t into ctx for this thread. */ @@ -486,8 +503,9 @@ prof_lookup(prof_bt_t *bt) */ ret.p = ql_last(&prof_tdata->lru_ql, lru_link); assert(ret.v != NULL); - ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, NULL, - NULL); + if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, + NULL, NULL)) + assert(false); ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); prof_ctx_merge(ret.p->ctx, ret.p); /* ret can now be re-used. */ @@ -498,7 +516,8 @@ prof_lookup(prof_bt_t *bt) /* Allocate and partially initialize a new cnt. */ ret.v = imalloc(sizeof(prof_thr_cnt_t)); if (ret.p == NULL) { - malloc_mutex_unlock(&ctx.p->lock); + if (new_ctx) + prof_ctx_destroy(ctx.p); return (NULL); } ql_elm_new(ret.p, cnts_link); @@ -509,12 +528,15 @@ prof_lookup(prof_bt_t *bt) ret.p->epoch = 0; memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) { - malloc_mutex_unlock(&ctx.p->lock); + if (new_ctx) + prof_ctx_destroy(ctx.p); idalloc(ret.v); return (NULL); } ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); + malloc_mutex_lock(&ctx.p->lock); ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link); + ctx.p->cnt_merged.curobjs--; malloc_mutex_unlock(&ctx.p->lock); } else { /* Move ret to the front of the LRU. */ @@ -628,11 +650,10 @@ prof_ctx_destroy(prof_ctx_t *ctx) /* * Check that ctx is still unused by any thread cache before destroying - * it. prof_lookup() interlocks bt2ctx_mtx and ctx->lock in order to - * avoid a race condition with this function, and prof_ctx_merge() - * artificially raises ctx->cnt_merged.curobjs in order to avoid a race - * between the main body of prof_ctx_merge() and entry into this - * function. + * it. prof_lookup() artificially raises ctx->cnt_merge.curobjs in + * order to avoid a race condition with this function, as does + * prof_ctx_merge() in order to avoid a race between the main body of + * prof_ctx_merge() and entry into this function. */ prof_enter(); malloc_mutex_lock(&ctx->lock); @@ -641,7 +662,8 @@ prof_ctx_destroy(prof_ctx_t *ctx) assert(ctx->cnt_merged.accumobjs == 0); assert(ctx->cnt_merged.accumbytes == 0); /* Remove ctx from bt2ctx. */ - ckh_remove(&bt2ctx, ctx->bt, NULL, NULL); + if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL)) + assert(false); prof_leave(); /* Destroy ctx. */ malloc_mutex_unlock(&ctx->lock); @@ -649,7 +671,10 @@ prof_ctx_destroy(prof_ctx_t *ctx) malloc_mutex_destroy(&ctx->lock); idalloc(ctx); } else { - /* Compensate for increment in prof_ctx_merge(). */ + /* + * Compensate for increment in prof_ctx_merge() or + * prof_lookup(). + */ ctx->cnt_merged.curobjs--; malloc_mutex_unlock(&ctx->lock); prof_leave(); @@ -1056,7 +1081,7 @@ prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) } else { ret1 = h; ret2 = hash(bt->vec, bt->len * sizeof(void *), - 0x8432a476666bbc13U); + 0x8432a476666bbc13LLU); } *hash1 = ret1; @@ -1093,7 +1118,6 @@ prof_tdata_init(void) prof_tdata->vec = imalloc(sizeof(void *) * prof_bt_max); if (prof_tdata->vec == NULL) { - ckh_delete(&prof_tdata->bt2cnt); idalloc(prof_tdata); return (NULL); @@ -1111,33 +1135,26 @@ prof_tdata_init(void) static void prof_tdata_cleanup(void *arg) { - prof_tdata_t *prof_tdata; + prof_thr_cnt_t *cnt; + prof_tdata_t *prof_tdata = (prof_tdata_t *)arg; - prof_tdata = PROF_TCACHE_GET(); - if (prof_tdata != NULL) { - prof_thr_cnt_t *cnt; - - /* - * Delete the hash table. All of its contents can still be - * iterated over via the LRU. - */ - ckh_delete(&prof_tdata->bt2cnt); + /* + * Delete the hash table. All of its contents can still be iterated + * over via the LRU. + */ + ckh_delete(&prof_tdata->bt2cnt); - /* - * Iteratively merge cnt's into the global stats and delete - * them. - */ - while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { - prof_ctx_merge(cnt->ctx, cnt); - ql_remove(&prof_tdata->lru_ql, cnt, lru_link); - idalloc(cnt); - } + /* Iteratively merge cnt's into the global stats and delete them. */ + while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { + ql_remove(&prof_tdata->lru_ql, cnt, lru_link); + prof_ctx_merge(cnt->ctx, cnt); + idalloc(cnt); + } - idalloc(prof_tdata->vec); + idalloc(prof_tdata->vec); - idalloc(prof_tdata); - PROF_TCACHE_SET(NULL); - } + idalloc(prof_tdata); + PROF_TCACHE_SET(NULL); } void diff --git a/dep/jemalloc/src/rtree.c b/dep/jemalloc/src/rtree.c index 7753743c5e6..eb0ff1e24af 100644 --- a/dep/jemalloc/src/rtree.c +++ b/dep/jemalloc/src/rtree.c @@ -1,4 +1,4 @@ -#define RTREE_C_ +#define JEMALLOC_RTREE_C_ #include "jemalloc/internal/jemalloc_internal.h" rtree_t * @@ -20,7 +20,10 @@ rtree_new(unsigned bits) memset(ret, 0, offsetof(rtree_t, level2bits) + (sizeof(unsigned) * height)); - malloc_mutex_init(&ret->mutex); + if (malloc_mutex_init(&ret->mutex)) { + /* Leak the rtree. */ + return (NULL); + } ret->height = height; if (bits_per_level * height > bits) ret->level2bits[0] = bits % bits_per_level; diff --git a/dep/jemalloc/src/stats.c b/dep/jemalloc/src/stats.c index 3dfe0d232a6..dc172e425c0 100644 --- a/dep/jemalloc/src/stats.c +++ b/dep/jemalloc/src/stats.c @@ -39,6 +39,10 @@ bool opt_stats_print = false; +#ifdef JEMALLOC_STATS +size_t stats_cactive = 0; +#endif + /******************************************************************************/ /* Function prototypes for non-inline static functions. */ @@ -319,6 +323,7 @@ static void stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i) { + unsigned nthreads; size_t pagesize, pactive, pdirty, mapped; uint64_t npurge, nmadvise, purged; size_t small_allocated; @@ -328,6 +333,9 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("arenas.pagesize", &pagesize, size_t); + CTL_I_GET("stats.arenas.0.nthreads", &nthreads, unsigned); + malloc_cprintf(write_cb, cbopaque, + "assigned threads: %u\n", nthreads); CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t); CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t); CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t); @@ -669,21 +677,26 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, #ifdef JEMALLOC_STATS { int err; - size_t ssz; + size_t sszp, ssz; + size_t *cactive; size_t allocated, active, mapped; size_t chunks_current, chunks_high, swap_avail; uint64_t chunks_total; size_t huge_allocated; uint64_t huge_nmalloc, huge_ndalloc; + sszp = sizeof(size_t *); ssz = sizeof(size_t); + CTL_GET("stats.cactive", &cactive, size_t *); CTL_GET("stats.allocated", &allocated, size_t); CTL_GET("stats.active", &active, size_t); CTL_GET("stats.mapped", &mapped, size_t); malloc_cprintf(write_cb, cbopaque, - "Allocated: %zu, active: %zu, mapped: %zu\n", allocated, - active, mapped); + "Allocated: %zu, active: %zu, mapped: %zu\n", + allocated, active, mapped); + malloc_cprintf(write_cb, cbopaque, + "Current active ceiling: %zu\n", atomic_read_z(cactive)); /* Print chunk stats. */ CTL_GET("stats.chunks.total", &chunks_total, uint64_t); @@ -735,7 +748,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, ninitialized++; } - if (ninitialized > 1) { + if (ninitialized > 1 || unmerged == false) { /* Print merged arena stats. */ malloc_cprintf(write_cb, cbopaque, "\nMerged arenas stats:\n"); diff --git a/dep/jemalloc/src/tcache.c b/dep/jemalloc/src/tcache.c index cbbe7a113a9..31c329e1613 100644 --- a/dep/jemalloc/src/tcache.c +++ b/dep/jemalloc/src/tcache.c @@ -8,6 +8,9 @@ bool opt_tcache = true; ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT; +tcache_bin_info_t *tcache_bin_info; +static unsigned stack_nelms; /* Total stack elms per tcache. */ + /* Map of thread-specific caches. */ #ifndef NO_TLS __thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec")); @@ -55,18 +58,19 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem #endif ) { - void *flush, *deferred, *ptr; + void *ptr; unsigned i, nflush, ndeferred; - bool first_pass; +#ifdef JEMALLOC_STATS + bool merged_stats = false; +#endif assert(binind < nbins); assert(rem <= tbin->ncached); - assert(tbin->ncached > 0 || tbin->avail == NULL); - for (flush = tbin->avail, nflush = tbin->ncached - rem, first_pass = - true; flush != NULL; flush = deferred, nflush = ndeferred) { + for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { /* Lock the arena bin associated with the first object. */ - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(flush); + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( + tbin->avail[0]); arena_t *arena = chunk->arena; arena_bin_t *bin = &arena->bins[binind]; @@ -82,17 +86,17 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem malloc_mutex_lock(&bin->lock); #ifdef JEMALLOC_STATS if (arena == tcache->arena) { + assert(merged_stats == false); + merged_stats = true; bin->stats.nflushes++; bin->stats.nrequests += tbin->tstats.nrequests; tbin->tstats.nrequests = 0; } #endif - deferred = NULL; ndeferred = 0; for (i = 0; i < nflush; i++) { - ptr = flush; + ptr = tbin->avail[i]; assert(ptr != NULL); - flush = *(void **)ptr; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk->arena == arena) { size_t pageind = ((uintptr_t)ptr - @@ -107,21 +111,31 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem * locked. Stash the object, so that it can be * handled in a future pass. */ - *(void **)ptr = deferred; - deferred = ptr; + tbin->avail[ndeferred] = ptr; ndeferred++; } } malloc_mutex_unlock(&bin->lock); - - if (first_pass) { - tbin->avail = flush; - first_pass = false; - } } +#ifdef JEMALLOC_STATS + if (merged_stats == false) { + /* + * The flush loop didn't happen to flush to this thread's + * arena, so the stats didn't get merged. Manually do so now. + */ + arena_bin_t *bin = &tcache->arena->bins[binind]; + malloc_mutex_lock(&bin->lock); + bin->stats.nflushes++; + bin->stats.nrequests += tbin->tstats.nrequests; + tbin->tstats.nrequests = 0; + malloc_mutex_unlock(&bin->lock); + } +#endif + memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], + rem * sizeof(void *)); tbin->ncached = rem; - if (tbin->ncached < tbin->low_water) + if ((int)tbin->ncached < tbin->low_water) tbin->low_water = tbin->ncached; } @@ -132,18 +146,19 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem #endif ) { - void *flush, *deferred, *ptr; + void *ptr; unsigned i, nflush, ndeferred; - bool first_pass; +#ifdef JEMALLOC_STATS + bool merged_stats = false; +#endif assert(binind < nhbins); assert(rem <= tbin->ncached); - assert(tbin->ncached > 0 || tbin->avail == NULL); - for (flush = tbin->avail, nflush = tbin->ncached - rem, first_pass = - true; flush != NULL; flush = deferred, nflush = ndeferred) { + for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { /* Lock the arena associated with the first object. */ - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(flush); + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( + tbin->avail[0]); arena_t *arena = chunk->arena; malloc_mutex_lock(&arena->lock); @@ -155,6 +170,7 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem tcache->prof_accumbytes = 0; #endif #ifdef JEMALLOC_STATS + merged_stats = true; arena->stats.nrequests_large += tbin->tstats.nrequests; arena->stats.lstats[binind - nbins].nrequests += tbin->tstats.nrequests; @@ -163,12 +179,10 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem #if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) } #endif - deferred = NULL; ndeferred = 0; for (i = 0; i < nflush; i++) { - ptr = flush; + ptr = tbin->avail[i]; assert(ptr != NULL); - flush = *(void **)ptr; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk->arena == arena) arena_dalloc_large(arena, chunk, ptr); @@ -179,21 +193,32 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem * Stash the object, so that it can be handled * in a future pass. */ - *(void **)ptr = deferred; - deferred = ptr; + tbin->avail[ndeferred] = ptr; ndeferred++; } } malloc_mutex_unlock(&arena->lock); - - if (first_pass) { - tbin->avail = flush; - first_pass = false; - } } +#ifdef JEMALLOC_STATS + if (merged_stats == false) { + /* + * The flush loop didn't happen to flush to this thread's + * arena, so the stats didn't get merged. Manually do so now. + */ + arena_t *arena = tcache->arena; + malloc_mutex_lock(&arena->lock); + arena->stats.nrequests_large += tbin->tstats.nrequests; + arena->stats.lstats[binind - nbins].nrequests += + tbin->tstats.nrequests; + tbin->tstats.nrequests = 0; + malloc_mutex_unlock(&arena->lock); + } +#endif + memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], + rem * sizeof(void *)); tbin->ncached = rem; - if (tbin->ncached < tbin->low_water) + if ((int)tbin->ncached < tbin->low_water) tbin->low_water = tbin->ncached; } @@ -201,10 +226,14 @@ tcache_t * tcache_create(arena_t *arena) { tcache_t *tcache; - size_t size; + size_t size, stack_offset; unsigned i; size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins); + /* Naturally align the pointer stacks. */ + size = PTR_CEILING(size); + stack_offset = size; + size += stack_nelms * sizeof(void *); /* * Round up to the nearest multiple of the cacheline size, in order to * avoid the possibility of false cacheline sharing. @@ -217,6 +246,8 @@ tcache_create(arena_t *arena) if (size <= small_maxclass) tcache = (tcache_t *)arena_malloc_small(arena, size, true); + else if (size <= tcache_maxclass) + tcache = (tcache_t *)arena_malloc_large(arena, size, true); else tcache = (tcache_t *)icalloc(size); @@ -233,15 +264,12 @@ tcache_create(arena_t *arena) tcache->arena = arena; assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0); - for (i = 0; i < nbins; i++) { - if ((arena->bins[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) { - tcache->tbins[i].ncached_max = (arena->bins[i].nregs << - 1); - } else - tcache->tbins[i].ncached_max = TCACHE_NSLOTS_SMALL_MAX; + for (i = 0; i < nhbins; i++) { + tcache->tbins[i].lg_fill_div = 1; + tcache->tbins[i].avail = (void **)((uintptr_t)tcache + + (uintptr_t)stack_offset); + stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *); } - for (; i < nhbins; i++) - tcache->tbins[i].ncached_max = TCACHE_NSLOTS_LARGE; TCACHE_SET(tcache); @@ -252,6 +280,7 @@ void tcache_destroy(tcache_t *tcache) { unsigned i; + size_t tcache_size; #ifdef JEMALLOC_STATS /* Unlink from list of extant tcaches. */ @@ -308,7 +337,8 @@ tcache_destroy(tcache_t *tcache) } #endif - if (arena_salloc(tcache) <= small_maxclass) { + tcache_size = arena_salloc(tcache); + if (tcache_size <= small_maxclass) { arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); arena_t *arena = chunk->arena; size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >> @@ -322,6 +352,13 @@ tcache_destroy(tcache_t *tcache) malloc_mutex_lock(&bin->lock); arena_dalloc_bin(arena, chunk, tcache, mapelm); malloc_mutex_unlock(&bin->lock); + } else if (tcache_size <= tcache_maxclass) { + arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); + arena_t *arena = chunk->arena; + + malloc_mutex_lock(&arena->lock); + arena_dalloc_large(arena, chunk, tcache); + malloc_mutex_unlock(&arena->lock); } else idalloc(tcache); } @@ -378,11 +415,13 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena) } #endif -void +bool tcache_boot(void) { if (opt_tcache) { + unsigned i; + /* * If necessary, clamp opt_lg_tcache_max, now that * small_maxclass and arena_maxclass are known. @@ -397,6 +436,28 @@ tcache_boot(void) nhbins = nbins + (tcache_maxclass >> PAGE_SHIFT); + /* Initialize tcache_bin_info. */ + tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins * + sizeof(tcache_bin_info_t)); + if (tcache_bin_info == NULL) + return (true); + stack_nelms = 0; + for (i = 0; i < nbins; i++) { + if ((arena_bin_info[i].nregs << 1) <= + TCACHE_NSLOTS_SMALL_MAX) { + tcache_bin_info[i].ncached_max = + (arena_bin_info[i].nregs << 1); + } else { + tcache_bin_info[i].ncached_max = + TCACHE_NSLOTS_SMALL_MAX; + } + stack_nelms += tcache_bin_info[i].ncached_max; + } + for (; i < nhbins; i++) { + tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE; + stack_nelms += tcache_bin_info[i].ncached_max; + } + /* Compute incremental GC event threshold. */ if (opt_lg_tcache_gc_sweep >= 0) { tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) / @@ -412,6 +473,8 @@ tcache_boot(void) abort(); } } + + return (false); } /******************************************************************************/ #endif /* JEMALLOC_TCACHE */ |