diff options
author | click <click@gonnamakeyou.com> | 2012-04-23 20:23:30 +0200 |
---|---|---|
committer | click <click@gonnamakeyou.com> | 2012-04-23 20:23:30 +0200 |
commit | c4123289916daa7bd1c7feb191e8c647fd17b163 (patch) | |
tree | d6235b61f7ead417757273184acb27a252a96bc5 /dep/jemalloc | |
parent | 5da5021464c649d84c755a921eae43519eba8567 (diff) |
Revert "DEP: Updated Jemalloc to Version 2.5" - this version of the jemalloc-library is crashy at best, and should not have been pushed.
Further investigations on why this occurs is required before it will be slammed into master.
This reverts commit 126fd13e5d6b57dc0c8830248d44db504c7d103f.
Diffstat (limited to 'dep/jemalloc')
34 files changed, 804 insertions, 2067 deletions
diff --git a/dep/jemalloc/VERSION b/dep/jemalloc/VERSION index aa85f5a2acf..585f53edd80 100644 --- a/dep/jemalloc/VERSION +++ b/dep/jemalloc/VERSION @@ -1 +1 @@ -2.2.5-0-gfc1bb70e5f0d9a58b39efa39cc549b5af5104760 +2.1.0-0-g1c4b088b08d3bc7617a34387e196ce03716160bf diff --git a/dep/jemalloc/include/jemalloc/internal/arena.h b/dep/jemalloc/include/jemalloc/internal/arena.h index b80c118d811..9556c2c68f7 100644 --- a/dep/jemalloc/include/jemalloc/internal/arena.h +++ b/dep/jemalloc/include/jemalloc/internal/arena.h @@ -19,7 +19,6 @@ #ifdef JEMALLOC_TINY /* Smallest size class to support. */ # define LG_TINY_MIN LG_SIZEOF_PTR -# define TINY_MIN (1U << LG_TINY_MIN) #endif /* @@ -46,10 +45,9 @@ * point is implicitly RUN_BFP bits to the left. * * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be - * honored for some/all object sizes, since when heap profiling is enabled - * there is one pointer of header overhead per object (plus a constant). This - * constraint is relaxed (ignored) for runs that are so small that the - * per-region overhead is greater than: + * honored for some/all object sizes, since there is one bit of header overhead + * per object (plus a constant). This constraint is relaxed (ignored) for runs + * that are so small that the per-region overhead is greater than: * * (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP)) */ @@ -58,10 +56,6 @@ #define RUN_MAX_OVRHD 0x0000003dU #define RUN_MAX_OVRHD_RELAX 0x00001800U -/* Maximum number of regions in one run. */ -#define LG_RUN_MAXREGS 11 -#define RUN_MAXREGS (1U << LG_RUN_MAXREGS) - /* * The minimum ratio of active:dirty pages per arena is computed as: * @@ -75,7 +69,6 @@ typedef struct arena_chunk_map_s arena_chunk_map_t; typedef struct arena_chunk_s arena_chunk_t; typedef struct arena_run_s arena_run_t; -typedef struct arena_bin_info_s arena_bin_info_t; typedef struct arena_bin_s arena_bin_t; typedef struct arena_s arena_t; @@ -112,7 +105,7 @@ struct arena_chunk_map_s { * Run address (or size) and various flags are stored together. The bit * layout looks like (assuming 32-bit system): * - * ???????? ???????? ????---- ----dula + * ???????? ???????? ????---- ----dzla * * ? : Unallocated: Run address for first/last pages, unset for internal * pages. @@ -120,7 +113,7 @@ struct arena_chunk_map_s { * Large: Run size for first page, unset for trailing pages. * - : Unused. * d : dirty? - * u : unzeroed? + * z : zeroed? * l : large? * a : allocated? * @@ -136,30 +129,30 @@ struct arena_chunk_map_s { * [dula] : bit unset * * Unallocated (clean): - * ssssssss ssssssss ssss---- ----du-a + * ssssssss ssssssss ssss---- ----du-- * xxxxxxxx xxxxxxxx xxxx---- -----Uxx - * ssssssss ssssssss ssss---- ----dU-a + * ssssssss ssssssss ssss---- ----dU-- * * Unallocated (dirty): - * ssssssss ssssssss ssss---- ----D--a + * ssssssss ssssssss ssss---- ----D--- * xxxxxxxx xxxxxxxx xxxx---- ----xxxx - * ssssssss ssssssss ssss---- ----D--a + * ssssssss ssssssss ssss---- ----D--- * * Small: - * pppppppp pppppppp pppp---- ----d--A - * pppppppp pppppppp pppp---- -------A - * pppppppp pppppppp pppp---- ----d--A + * pppppppp pppppppp pppp---- ----d--a + * pppppppp pppppppp pppp---- -------a + * pppppppp pppppppp pppp---- ----d--a * * Large: - * ssssssss ssssssss ssss---- ----D-LA + * ssssssss ssssssss ssss---- ----D-la * xxxxxxxx xxxxxxxx xxxx---- ----xxxx - * -------- -------- -------- ----D-LA + * -------- -------- -------- ----D-la * * Large (sampled, size <= PAGE_SIZE): - * ssssssss ssssssss sssscccc ccccD-LA + * ssssssss ssssssss sssscccc ccccD-la * * Large (not sampled, size == PAGE_SIZE): - * ssssssss ssssssss ssss---- ----D-LA + * ssssssss ssssssss ssss---- ----D-la */ size_t bits; #ifdef JEMALLOC_PROF @@ -213,52 +206,16 @@ struct arena_run_s { /* Bin this run is associated with. */ arena_bin_t *bin; - /* Index of next region that has never been allocated, or nregs. */ - uint32_t nextind; + /* Stack of available freed regions, or NULL. */ + void *avail; + + /* Next region that has never been allocated, or run boundary. */ + void *next; /* Number of free regions in run. */ unsigned nfree; }; -/* - * Read-only information associated with each element of arena_t's bins array - * is stored separately, partly to reduce memory usage (only one copy, rather - * than one per arena), but mainly to avoid false cacheline sharing. - */ -struct arena_bin_info_s { - /* Size of regions in a run for this bin's size class. */ - size_t reg_size; - - /* Total size of a run for this bin's size class. */ - size_t run_size; - - /* Total number of regions in a run for this bin's size class. */ - uint32_t nregs; - - /* - * Offset of first bitmap_t element in a run header for this bin's size - * class. - */ - uint32_t bitmap_offset; - - /* - * Metadata used to manipulate bitmaps for runs associated with this - * bin. - */ - bitmap_info_t bitmap_info; - -#ifdef JEMALLOC_PROF - /* - * Offset of first (prof_ctx_t *) in a run header for this bin's size - * class, or 0 if (opt_prof == false). - */ - uint32_t ctx0_offset; -#endif - - /* Offset of first region in a run for this bin's size class. */ - uint32_t reg0_offset; -}; - struct arena_bin_s { /* * All operations on runcur, runs, and stats require that lock be @@ -283,6 +240,26 @@ struct arena_bin_s { */ arena_run_tree_t runs; + /* Size of regions in a run for this bin's size class. */ + size_t reg_size; + + /* Total size of a run for this bin's size class. */ + size_t run_size; + + /* Total number of regions in a run for this bin's size class. */ + uint32_t nregs; + +#ifdef JEMALLOC_PROF + /* + * Offset of first (prof_ctx_t *) in a run header for this bin's size + * class, or 0 if (opt_prof == false). + */ + uint32_t ctx0_offset; +#endif + + /* Offset of first region in a run for this bin's size class. */ + uint32_t reg0_offset; + #ifdef JEMALLOC_STATS /* Bin statistics. */ malloc_bin_stats_t stats; @@ -299,18 +276,8 @@ struct arena_s { unsigned ind; /* - * Number of threads currently assigned to this arena. This field is - * protected by arenas_lock. - */ - unsigned nthreads; - - /* - * There are three classes of arena operations from a locking - * perspective: - * 1) Thread asssignment (modifies nthreads) is protected by - * arenas_lock. - * 2) Bin-related operations are protected by bin locks. - * 3) Chunk- and run-related operations are protected by this mutex. + * All non-bin-related operations on this arena require that lock be + * locked. */ malloc_mutex_t lock; @@ -380,35 +347,45 @@ struct arena_s { /* * bins is used to store trees of free regions of the following sizes, - * assuming a 64-bit system with 16-byte quantum, 4 KiB page size, and - * default MALLOC_CONF. + * assuming a 16-byte quantum, 4 KiB page size, and default + * JEMALLOC_OPTIONS. * * bins[i] | size | * --------+--------+ - * 0 | 8 | + * 0 | 2 | + * 1 | 4 | + * 2 | 8 | * --------+--------+ - * 1 | 16 | - * 2 | 32 | - * 3 | 48 | + * 3 | 16 | + * 4 | 32 | + * 5 | 48 | * : : - * 6 | 96 | - * 7 | 112 | - * 8 | 128 | + * 8 | 96 | + * 9 | 112 | + * 10 | 128 | * --------+--------+ - * 9 | 192 | - * 10 | 256 | - * 11 | 320 | - * 12 | 384 | - * 13 | 448 | - * 14 | 512 | + * 11 | 192 | + * 12 | 256 | + * 13 | 320 | + * 14 | 384 | + * 15 | 448 | + * 16 | 512 | + * --------+--------+ + * 17 | 768 | + * 18 | 1024 | + * 19 | 1280 | + * : : + * 27 | 3328 | + * 28 | 3584 | + * 29 | 3840 | * --------+--------+ - * 15 | 768 | - * 16 | 1024 | - * 17 | 1280 | + * 30 | 4 KiB | + * 31 | 6 KiB | + * 33 | 8 KiB | * : : - * 25 | 3328 | - * 26 | 3584 | - * 27 | 3840 | + * 43 | 28 KiB | + * 44 | 30 KiB | + * 45 | 32 KiB | * --------+--------+ */ arena_bin_t bins[1]; /* Dynamically sized. */ @@ -420,16 +397,8 @@ struct arena_s { extern size_t opt_lg_qspace_max; extern size_t opt_lg_cspace_max; -extern ssize_t opt_lg_dirty_mult; -/* - * small_size2bin is a compact lookup table that rounds request sizes up to - * size classes. In order to reduce cache footprint, the table is compressed, - * and all accesses are via the SMALL_SIZE2BIN macro. - */ +extern ssize_t opt_lg_dirty_mult; extern uint8_t const *small_size2bin; -#define SMALL_SIZE2BIN(s) (small_size2bin[(s-1) >> LG_TINY_MIN]) - -extern arena_bin_info_t *arena_bin_info; /* Various bin-related settings. */ #ifdef JEMALLOC_TINY /* Number of (2^n)-spaced tiny bins. */ @@ -496,9 +465,8 @@ bool arena_boot(void); #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); -unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, - const void *ptr); +unsigned arena_run_regind(arena_run_t *run, arena_bin_t *bin, + const void *ptr, size_t size); # ifdef JEMALLOC_PROF prof_ctx_t *arena_prof_ctx_get(const void *ptr); void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); @@ -507,37 +475,21 @@ void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) -JEMALLOC_INLINE size_t -arena_bin_index(arena_t *arena, arena_bin_t *bin) -{ - size_t binind = bin - arena->bins; - assert(binind < nbins); - return (binind); -} - JEMALLOC_INLINE unsigned -arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) +arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr, + size_t size) { unsigned shift, diff, regind; - size_t size; - dassert(run->magic == ARENA_RUN_MAGIC); - /* - * Freeing a pointer lower than region zero can cause assertion - * failure. - */ - assert((uintptr_t)ptr >= (uintptr_t)run + - (uintptr_t)bin_info->reg0_offset); + assert(run->magic == ARENA_RUN_MAGIC); /* * Avoid doing division with a variable divisor if possible. Using * actual division here can reduce allocator throughput by over 20%! */ - diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - - bin_info->reg0_offset); + diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset); /* Rescale (factor powers of 2 out of the numerator and denominator). */ - size = bin_info->reg_size; shift = ffs(size) - 1; diff >>= shift; size >>= shift; @@ -560,8 +512,8 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) * divide by 0, and 1 and 2 are both powers of two, which are * handled above. */ -#define SIZE_INV_SHIFT ((sizeof(unsigned) << 3) - LG_RUN_MAXREGS) -#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1) +#define SIZE_INV_SHIFT 21 +#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1) static const unsigned size_invs[] = { SIZE_INV(3), SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7), @@ -581,7 +533,7 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) #undef SIZE_INV_SHIFT } assert(diff == regind * size); - assert(regind < bin_info->nregs); + assert(regind < bin->nregs); return (regind); } @@ -608,14 +560,13 @@ arena_prof_ctx_get(const void *ptr) arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << PAGE_SHIFT)); - size_t binind = arena_bin_index(chunk->arena, run->bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; + arena_bin_t *bin = run->bin; unsigned regind; - dassert(run->magic == ARENA_RUN_MAGIC); - regind = arena_run_regind(run, bin_info, ptr); + assert(run->magic == ARENA_RUN_MAGIC); + regind = arena_run_regind(run, bin, ptr, bin->reg_size); ret = *(prof_ctx_t **)((uintptr_t)run + - bin_info->ctx0_offset + (regind * + bin->ctx0_offset + (regind * sizeof(prof_ctx_t *))); } } else @@ -643,16 +594,12 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << PAGE_SHIFT)); arena_bin_t *bin = run->bin; - size_t binind; - arena_bin_info_t *bin_info; unsigned regind; - dassert(run->magic == ARENA_RUN_MAGIC); - binind = arena_bin_index(chunk->arena, bin); - bin_info = &arena_bin_info[binind]; - regind = arena_run_regind(run, bin_info, ptr); + assert(run->magic == ARENA_RUN_MAGIC); + regind = arena_run_regind(run, bin, ptr, bin->reg_size); - *((prof_ctx_t **)((uintptr_t)run + bin_info->ctx0_offset + *((prof_ctx_t **)((uintptr_t)run + bin->ctx0_offset + (regind * sizeof(prof_ctx_t *)))) = ctx; } else assert((uintptr_t)ctx == (uintptr_t)1U); @@ -668,7 +615,7 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) arena_chunk_map_t *mapelm; assert(arena != NULL); - dassert(arena->magic == ARENA_MAGIC); + assert(arena->magic == ARENA_MAGIC); assert(chunk->arena == arena); assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); @@ -691,18 +638,11 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); - dassert(run->magic == ARENA_RUN_MAGIC); + assert(run->magic == ARENA_RUN_MAGIC); + assert(((uintptr_t)ptr - ((uintptr_t)run + + (uintptr_t)run->bin->reg0_offset)) % + run->bin->reg_size == 0); bin = run->bin; -#ifdef JEMALLOC_DEBUG - { - size_t binind = arena_bin_index(arena, bin); - arena_bin_info_t *bin_info = - &arena_bin_info[binind]; - assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset)) % - bin_info->reg_size == 0); - } -#endif malloc_mutex_lock(&bin->lock); arena_dalloc_bin(arena, chunk, ptr, mapelm); malloc_mutex_unlock(&bin->lock); diff --git a/dep/jemalloc/include/jemalloc/internal/atomic.h b/dep/jemalloc/include/jemalloc/internal/atomic.h deleted file mode 100644 index 9a298623f8a..00000000000 --- a/dep/jemalloc/include/jemalloc/internal/atomic.h +++ /dev/null @@ -1,169 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#define atomic_read_uint64(p) atomic_add_uint64(p, 0) -#define atomic_read_uint32(p) atomic_add_uint32(p, 0) - -#if (LG_SIZEOF_PTR == 3) -# define atomic_read_z(p) \ - (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)0) -# define atomic_add_z(p, x) \ - (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x) -# define atomic_sub_z(p, x) \ - (size_t)atomic_sub_uint64((uint64_t *)p, (uint64_t)x) -#elif (LG_SIZEOF_PTR == 2) -# define atomic_read_z(p) \ - (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)0) -# define atomic_add_z(p, x) \ - (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x) -# define atomic_sub_z(p, x) \ - (size_t)atomic_sub_uint32((uint32_t *)p, (uint32_t)x) -#endif - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -uint64_t atomic_add_uint64(uint64_t *p, uint64_t x); -uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x); -uint32_t atomic_add_uint32(uint32_t *p, uint32_t x); -uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_)) -/******************************************************************************/ -/* 64-bit operations. */ -#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 -JEMALLOC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - - return (__sync_add_and_fetch(p, x)); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - - return (__sync_sub_and_fetch(p, x)); -} -#elif (defined(JEMALLOC_OSATOMIC)) -JEMALLOC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - - return (OSAtomicAdd64((int64_t)x, (int64_t *)p)); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - - return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p)); -} -#elif (defined(__amd64_) || defined(__x86_64__)) -JEMALLOC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - - asm volatile ( - "lock; xaddq %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return (x); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - - x = (uint64_t)(-(int64_t)x); - asm volatile ( - "lock; xaddq %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return (x); -} -#else -# if (LG_SIZEOF_PTR == 3) -# error "Missing implementation for 64-bit atomic operations" -# endif -#endif - -/******************************************************************************/ -/* 32-bit operations. */ -#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 -JEMALLOC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - - return (__sync_add_and_fetch(p, x)); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - - return (__sync_sub_and_fetch(p, x)); -} -#elif (defined(JEMALLOC_OSATOMIC)) -JEMALLOC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - - return (OSAtomicAdd32((int32_t)x, (int32_t *)p)); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - - return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p)); -} -#elif (defined(__i386__) || defined(__amd64_) || defined(__x86_64__)) -JEMALLOC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - - asm volatile ( - "lock; xaddl %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return (x); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - - x = (uint32_t)(-(int32_t)x); - asm volatile ( - "lock; xaddl %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return (x); -} -#else -# error "Missing implementation for 32-bit atomic operations" -#endif -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/dep/jemalloc/include/jemalloc/internal/bitmap.h b/dep/jemalloc/include/jemalloc/internal/bitmap.h deleted file mode 100644 index 605ebac58c1..00000000000 --- a/dep/jemalloc/include/jemalloc/internal/bitmap.h +++ /dev/null @@ -1,184 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */ -#define LG_BITMAP_MAXBITS LG_RUN_MAXREGS - -typedef struct bitmap_level_s bitmap_level_t; -typedef struct bitmap_info_s bitmap_info_t; -typedef unsigned long bitmap_t; -#define LG_SIZEOF_BITMAP LG_SIZEOF_LONG - -/* Number of bits per group. */ -#define LG_BITMAP_GROUP_NBITS (LG_SIZEOF_BITMAP + 3) -#define BITMAP_GROUP_NBITS (ZU(1) << LG_BITMAP_GROUP_NBITS) -#define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS-1) - -/* Maximum number of levels possible. */ -#define BITMAP_MAX_LEVELS \ - (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \ - + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP) - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct bitmap_level_s { - /* Offset of this level's groups within the array of groups. */ - size_t group_offset; -}; - -struct bitmap_info_s { - /* Logical number of bits in bitmap (stored at bottom level). */ - size_t nbits; - - /* Number of levels necessary for nbits. */ - unsigned nlevels; - - /* - * Only the first (nlevels+1) elements are used, and levels are ordered - * bottom to top (e.g. the bottom level is stored in levels[0]). - */ - bitmap_level_t levels[BITMAP_MAX_LEVELS+1]; -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -void bitmap_info_init(bitmap_info_t *binfo, size_t nbits); -size_t bitmap_info_ngroups(const bitmap_info_t *binfo); -size_t bitmap_size(size_t nbits); -void bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -bool bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo); -bool bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); -void bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); -size_t bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo); -void bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BITMAP_C_)) -JEMALLOC_INLINE bool -bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) -{ - unsigned rgoff = binfo->levels[binfo->nlevels].group_offset - 1; - bitmap_t rg = bitmap[rgoff]; - /* The bitmap is full iff the root group is 0. */ - return (rg == 0); -} - -JEMALLOC_INLINE bool -bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) -{ - size_t goff; - bitmap_t g; - - assert(bit < binfo->nbits); - goff = bit >> LG_BITMAP_GROUP_NBITS; - g = bitmap[goff]; - return (!(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)))); -} - -JEMALLOC_INLINE void -bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) -{ - size_t goff; - bitmap_t *gp; - bitmap_t g; - - assert(bit < binfo->nbits); - assert(bitmap_get(bitmap, binfo, bit) == false); - goff = bit >> LG_BITMAP_GROUP_NBITS; - gp = &bitmap[goff]; - g = *gp; - assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))); - g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); - *gp = g; - assert(bitmap_get(bitmap, binfo, bit)); - /* Propagate group state transitions up the tree. */ - if (g == 0) { - unsigned i; - for (i = 1; i < binfo->nlevels; i++) { - bit = goff; - goff = bit >> LG_BITMAP_GROUP_NBITS; - gp = &bitmap[binfo->levels[i].group_offset + goff]; - g = *gp; - assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))); - g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); - *gp = g; - if (g != 0) - break; - } - } -} - -/* sfu: set first unset. */ -JEMALLOC_INLINE size_t -bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) -{ - size_t bit; - bitmap_t g; - unsigned i; - - assert(bitmap_full(bitmap, binfo) == false); - - i = binfo->nlevels - 1; - g = bitmap[binfo->levels[i].group_offset]; - bit = ffsl(g) - 1; - while (i > 0) { - i--; - g = bitmap[binfo->levels[i].group_offset + bit]; - bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffsl(g) - 1); - } - - bitmap_set(bitmap, binfo, bit); - return (bit); -} - -JEMALLOC_INLINE void -bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) -{ - size_t goff; - bitmap_t *gp; - bitmap_t g; - bool propagate; - - assert(bit < binfo->nbits); - assert(bitmap_get(bitmap, binfo, bit)); - goff = bit >> LG_BITMAP_GROUP_NBITS; - gp = &bitmap[goff]; - g = *gp; - propagate = (g == 0); - assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) == 0); - g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); - *gp = g; - assert(bitmap_get(bitmap, binfo, bit) == false); - /* Propagate group state transitions up the tree. */ - if (propagate) { - unsigned i; - for (i = 1; i < binfo->nlevels; i++) { - bit = goff; - goff = bit >> LG_BITMAP_GROUP_NBITS; - gp = &bitmap[binfo->levels[i].group_offset + goff]; - g = *gp; - propagate = (g == 0); - assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) - == 0); - g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); - *gp = g; - if (propagate == false) - break; - } - } -} - -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/dep/jemalloc/include/jemalloc/internal/chunk.h b/dep/jemalloc/include/jemalloc/internal/chunk.h index 54b6a3ec886..a60f0ad7498 100644 --- a/dep/jemalloc/include/jemalloc/internal/chunk.h +++ b/dep/jemalloc/include/jemalloc/internal/chunk.h @@ -50,7 +50,7 @@ extern size_t map_bias; /* Number of arena chunk header pages. */ extern size_t arena_maxclass; /* Max size class for arenas. */ void *chunk_alloc(size_t size, bool base, bool *zero); -void chunk_dealloc(void *chunk, size_t size, bool unmap); +void chunk_dealloc(void *chunk, size_t size); bool chunk_boot(void); #endif /* JEMALLOC_H_EXTERNS */ diff --git a/dep/jemalloc/include/jemalloc/internal/ckh.h b/dep/jemalloc/include/jemalloc/internal/ckh.h index 3e4ad4c85f9..d4e391b6360 100644 --- a/dep/jemalloc/include/jemalloc/internal/ckh.h +++ b/dep/jemalloc/include/jemalloc/internal/ckh.h @@ -31,7 +31,7 @@ struct ckhc_s { struct ckh_s { #ifdef JEMALLOC_DEBUG -#define CKH_MAGIC 0x3af2489d +#define CKH_MAGIG 0x3af2489d uint32_t magic; #endif diff --git a/dep/jemalloc/include/jemalloc/internal/ctl.h b/dep/jemalloc/include/jemalloc/internal/ctl.h index f1f5eb70a2a..8776ad135a7 100644 --- a/dep/jemalloc/include/jemalloc/internal/ctl.h +++ b/dep/jemalloc/include/jemalloc/internal/ctl.h @@ -29,7 +29,6 @@ struct ctl_node_s { struct ctl_arena_stats_s { bool initialized; - unsigned nthreads; size_t pactive; size_t pdirty; #ifdef JEMALLOC_STATS diff --git a/dep/jemalloc/include/jemalloc/internal/hash.h b/dep/jemalloc/include/jemalloc/internal/hash.h index 8a46ce30803..d12cdb8359f 100644 --- a/dep/jemalloc/include/jemalloc/internal/hash.h +++ b/dep/jemalloc/include/jemalloc/internal/hash.h @@ -17,7 +17,7 @@ uint64_t hash(const void *key, size_t len, uint64_t seed); #endif -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_HASH_C_)) +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(HASH_C_)) /* * The following hash function is based on MurmurHash64A(), placed into the * public domain by Austin Appleby. See http://murmurhash.googlepages.com/ for @@ -26,7 +26,7 @@ uint64_t hash(const void *key, size_t len, uint64_t seed); JEMALLOC_INLINE uint64_t hash(const void *key, size_t len, uint64_t seed) { - const uint64_t m = 0xc6a4a7935bd1e995LLU; + const uint64_t m = 0xc6a4a7935bd1e995; const int r = 47; uint64_t h = seed ^ (len * m); const uint64_t *data = (const uint64_t *)key; @@ -62,7 +62,7 @@ hash(const void *key, size_t len, uint64_t seed) h *= m; h ^= h >> r; - return (h); + return h; } #endif diff --git a/dep/jemalloc/include/jemalloc/internal/jemalloc_internal.h b/dep/jemalloc/include/jemalloc/internal/jemalloc_internal.h index cd554bea1b9..611f0c665a1 100644 --- a/dep/jemalloc/include/jemalloc/internal/jemalloc_internal.h +++ b/dep/jemalloc/include/jemalloc/internal/jemalloc_internal.h @@ -33,12 +33,6 @@ #define JEMALLOC_MANGLE #include "../jemalloc.h" -#include "jemalloc/internal/private_namespace.h" - -#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN)) -#include <libkern/OSAtomic.h> -#endif - #ifdef JEMALLOC_ZONE #include <mach/mach_error.h> #include <mach/mach_init.h> @@ -61,9 +55,8 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); * Define a custom assert() in order to reduce the chances of deadlock during * assertion failure. */ -#ifndef assert -# ifdef JEMALLOC_DEBUG -# define assert(e) do { \ +#ifdef JEMALLOC_DEBUG +# define assert(e) do { \ if (!(e)) { \ char line_buf[UMAX2S_BUFSIZE]; \ malloc_write("<jemalloc>: "); \ @@ -77,15 +70,8 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); abort(); \ } \ } while (0) -# else -# define assert(e) -# endif -#endif - -#ifdef JEMALLOC_DEBUG -# define dassert(e) assert(e) #else -# define dassert(e) +#define assert(e) #endif /* @@ -160,19 +146,12 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #define QUANTUM_CEILING(a) \ (((a) + QUANTUM_MASK) & ~QUANTUM_MASK) -#define LONG ((size_t)(1U << LG_SIZEOF_LONG)) -#define LONG_MASK (LONG - 1) - -/* Return the smallest long multiple that is >= a. */ -#define LONG_CEILING(a) \ - (((a) + LONG_MASK) & ~LONG_MASK) - #define SIZEOF_PTR (1U << LG_SIZEOF_PTR) -#define PTR_MASK (SIZEOF_PTR - 1) -/* Return the smallest (void *) multiple that is >= a. */ -#define PTR_CEILING(a) \ - (((a) + PTR_MASK) & ~PTR_MASK) +/* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */ +#if (!defined(PIC) && !defined(NO_TLS)) +# define NO_TLS +#endif /* * Maximum size of L1 cache line. This is used to avoid cache line aliasing. @@ -219,7 +198,6 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #define PAGE_CEILING(s) \ (((s) + PAGE_MASK) & ~PAGE_MASK) -#include "jemalloc/internal/atomic.h" #include "jemalloc/internal/jemprn.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/stats.h" @@ -228,7 +206,6 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #include "jemalloc/internal/mb.h" #include "jemalloc/internal/extent.h" #include "jemalloc/internal/arena.h" -#include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/base.h" #include "jemalloc/internal/chunk.h" #include "jemalloc/internal/huge.h" @@ -244,14 +221,12 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); /******************************************************************************/ #define JEMALLOC_H_STRUCTS -#include "jemalloc/internal/atomic.h" #include "jemalloc/internal/jemprn.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/mb.h" -#include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/extent.h" #include "jemalloc/internal/arena.h" #include "jemalloc/internal/base.h" @@ -265,13 +240,6 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #endif #include "jemalloc/internal/prof.h" -#ifdef JEMALLOC_STATS -typedef struct { - uint64_t allocated; - uint64_t deallocated; -} thread_allocated_t; -#endif - #undef JEMALLOC_H_STRUCTS /******************************************************************************/ #define JEMALLOC_H_EXTERNS @@ -301,7 +269,6 @@ extern size_t lg_pagesize; extern unsigned ncpus; extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */ -extern pthread_key_t arenas_tsd; #ifndef NO_TLS /* * Map of pthread_self() --> arenas[???], used for selecting an arena to use @@ -311,9 +278,9 @@ extern __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); # define ARENA_GET() arenas_tls # define ARENA_SET(v) do { \ arenas_tls = (v); \ - pthread_setspecific(arenas_tsd, (void *)(v)); \ } while (0) #else +extern pthread_key_t arenas_tsd; # define ARENA_GET() ((arena_t *)pthread_getspecific(arenas_tsd)) # define ARENA_SET(v) do { \ pthread_setspecific(arenas_tsd, (void *)(v)); \ @@ -328,28 +295,45 @@ extern arena_t **arenas; extern unsigned narenas; #ifdef JEMALLOC_STATS +typedef struct { + uint64_t allocated; + uint64_t deallocated; +} thread_allocated_t; # ifndef NO_TLS extern __thread thread_allocated_t thread_allocated_tls; -# define ALLOCATED_GET() (thread_allocated_tls.allocated) -# define ALLOCATEDP_GET() (&thread_allocated_tls.allocated) -# define DEALLOCATED_GET() (thread_allocated_tls.deallocated) -# define DEALLOCATEDP_GET() (&thread_allocated_tls.deallocated) +# define ALLOCATED_GET() thread_allocated_tls.allocated +# define DEALLOCATED_GET() thread_allocated_tls.deallocated # define ALLOCATED_ADD(a, d) do { \ thread_allocated_tls.allocated += a; \ thread_allocated_tls.deallocated += d; \ } while (0) # else extern pthread_key_t thread_allocated_tsd; -thread_allocated_t *thread_allocated_get_hard(void); - -# define ALLOCATED_GET() (thread_allocated_get()->allocated) -# define ALLOCATEDP_GET() (&thread_allocated_get()->allocated) -# define DEALLOCATED_GET() (thread_allocated_get()->deallocated) -# define DEALLOCATEDP_GET() (&thread_allocated_get()->deallocated) +# define ALLOCATED_GET() \ + (uint64_t)((pthread_getspecific(thread_allocated_tsd) != NULL) \ + ? ((thread_allocated_t *) \ + pthread_getspecific(thread_allocated_tsd))->allocated : 0) +# define DEALLOCATED_GET() \ + (uint64_t)((pthread_getspecific(thread_allocated_tsd) != NULL) \ + ? ((thread_allocated_t \ + *)pthread_getspecific(thread_allocated_tsd))->deallocated : \ + 0) # define ALLOCATED_ADD(a, d) do { \ - thread_allocated_t *thread_allocated = thread_allocated_get(); \ - thread_allocated->allocated += (a); \ - thread_allocated->deallocated += (d); \ + thread_allocated_t *thread_allocated = (thread_allocated_t *) \ + pthread_getspecific(thread_allocated_tsd); \ + if (thread_allocated != NULL) { \ + thread_allocated->allocated += (a); \ + thread_allocated->deallocated += (d); \ + } else { \ + thread_allocated = (thread_allocated_t *) \ + imalloc(sizeof(thread_allocated_t)); \ + if (thread_allocated != NULL) { \ + pthread_setspecific(thread_allocated_tsd, \ + thread_allocated); \ + thread_allocated->allocated = (a); \ + thread_allocated->deallocated = (d); \ + } \ + } \ } while (0) # endif #endif @@ -360,14 +344,12 @@ int buferror(int errnum, char *buf, size_t buflen); void jemalloc_prefork(void); void jemalloc_postfork(void); -#include "jemalloc/internal/atomic.h" #include "jemalloc/internal/jemprn.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/mb.h" -#include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/extent.h" #include "jemalloc/internal/arena.h" #include "jemalloc/internal/base.h" @@ -385,7 +367,6 @@ void jemalloc_postfork(void); /******************************************************************************/ #define JEMALLOC_H_INLINES -#include "jemalloc/internal/atomic.h" #include "jemalloc/internal/jemprn.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/stats.h" @@ -403,9 +384,6 @@ size_t s2u(size_t size); size_t sa2u(size_t size, size_t alignment, size_t *run_size_p); void malloc_write(const char *s); arena_t *choose_arena(void); -# if (defined(JEMALLOC_STATS) && defined(NO_TLS)) -thread_allocated_t *thread_allocated_get(void); -# endif #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) @@ -436,10 +414,10 @@ s2u(size_t size) { if (size <= small_maxclass) - return (arena_bin_info[SMALL_SIZE2BIN(size)].reg_size); + return arenas[0]->bins[small_size2bin[size]].reg_size; if (size <= arena_maxclass) - return (PAGE_CEILING(size)); - return (CHUNK_CEILING(size)); + return PAGE_CEILING(size); + return CHUNK_CEILING(size); } /* @@ -480,8 +458,10 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) } if (usize <= arena_maxclass && alignment <= PAGE_SIZE) { - if (usize <= small_maxclass) - return (arena_bin_info[SMALL_SIZE2BIN(usize)].reg_size); + if (usize <= small_maxclass) { + return + (arenas[0]->bins[small_size2bin[usize]].reg_size); + } return (PAGE_CEILING(usize)); } else { size_t run_size; @@ -564,22 +544,8 @@ choose_arena(void) return (ret); } - -#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) -JEMALLOC_INLINE thread_allocated_t * -thread_allocated_get(void) -{ - thread_allocated_t *thread_allocated = (thread_allocated_t *) - pthread_getspecific(thread_allocated_tsd); - - if (thread_allocated == NULL) - return (thread_allocated_get_hard()); - return (thread_allocated); -} -#endif #endif -#include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/arena.h" @@ -591,7 +557,7 @@ thread_allocated_get(void) #ifndef JEMALLOC_ENABLE_INLINE void *imalloc(size_t size); void *icalloc(size_t size); -void *ipalloc(size_t usize, size_t alignment, bool zero); +void *ipalloc(size_t size, size_t alignment, bool zero); size_t isalloc(const void *ptr); # ifdef JEMALLOC_IVSALLOC size_t ivsalloc(const void *ptr); @@ -625,39 +591,28 @@ icalloc(size_t size) } JEMALLOC_INLINE void * -ipalloc(size_t usize, size_t alignment, bool zero) +ipalloc(size_t size, size_t alignment, bool zero) { void *ret; + size_t usize; + size_t run_size +# ifdef JEMALLOC_CC_SILENCE + = 0 +# endif + ; - assert(usize != 0); - assert(usize == sa2u(usize, alignment, NULL)); - + usize = sa2u(size, alignment, &run_size); + if (usize == 0) + return (NULL); if (usize <= arena_maxclass && alignment <= PAGE_SIZE) ret = arena_malloc(usize, zero); - else { - size_t run_size -#ifdef JEMALLOC_CC_SILENCE - = 0 -#endif - ; - - /* - * Ideally we would only ever call sa2u() once per aligned - * allocation request, and the caller of this function has - * already done so once. However, it's rather burdensome to - * require every caller to pass in run_size, especially given - * that it's only relevant to large allocations. Therefore, - * just call it again here in order to get run_size. - */ - sa2u(usize, alignment, &run_size); - if (run_size <= arena_maxclass) { - ret = arena_palloc(choose_arena(), usize, run_size, - alignment, zero); - } else if (alignment <= chunksize) - ret = huge_malloc(usize, zero); - else - ret = huge_palloc(usize, alignment, zero); - } + else if (run_size <= arena_maxclass) { + ret = arena_palloc(choose_arena(), usize, run_size, alignment, + zero); + } else if (alignment <= chunksize) + ret = huge_malloc(usize, zero); + else + ret = huge_palloc(usize, alignment, zero); assert(((uintptr_t)ret & (alignment - 1)) == 0); return (ret); @@ -674,7 +629,7 @@ isalloc(const void *ptr) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - dassert(chunk->arena->magic == ARENA_MAGIC); + assert(chunk->arena->magic == ARENA_MAGIC); #ifdef JEMALLOC_PROF ret = arena_salloc_demote(ptr); @@ -728,7 +683,7 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) != 0) { - size_t usize, copysize; + size_t copysize; /* * Existing object alignment is inadquate; allocate new space @@ -736,18 +691,12 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, */ if (no_move) return (NULL); - usize = sa2u(size + extra, alignment, NULL); - if (usize == 0) - return (NULL); - ret = ipalloc(usize, alignment, zero); + ret = ipalloc(size + extra, alignment, zero); if (ret == NULL) { if (extra == 0) return (NULL); /* Try again, without extra this time. */ - usize = sa2u(size, alignment, NULL); - if (usize == 0) - return (NULL); - ret = ipalloc(usize, alignment, zero); + ret = ipalloc(size, alignment, zero); if (ret == NULL) return (NULL); } diff --git a/dep/jemalloc/include/jemalloc/internal/mb.h b/dep/jemalloc/include/jemalloc/internal/mb.h index dc9f2a54262..1707aa91d68 100644 --- a/dep/jemalloc/include/jemalloc/internal/mb.h +++ b/dep/jemalloc/include/jemalloc/internal/mb.h @@ -17,7 +17,7 @@ void mb_write(void); #endif -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MB_C_)) +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(MB_C_)) #ifdef __i386__ /* * According to the Intel Architecture Software Developer's Manual, current diff --git a/dep/jemalloc/include/jemalloc/internal/mutex.h b/dep/jemalloc/include/jemalloc/internal/mutex.h index 62947ced55e..dcca01edd5d 100644 --- a/dep/jemalloc/include/jemalloc/internal/mutex.h +++ b/dep/jemalloc/include/jemalloc/internal/mutex.h @@ -1,11 +1,7 @@ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES -#ifdef JEMALLOC_OSSPIN -typedef OSSpinLock malloc_mutex_t; -#else typedef pthread_mutex_t malloc_mutex_t; -#endif #ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP # define MALLOC_MUTEX_INITIALIZER PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP @@ -45,26 +41,17 @@ JEMALLOC_INLINE void malloc_mutex_lock(malloc_mutex_t *mutex) { - if (isthreaded) { -#ifdef JEMALLOC_OSSPIN - OSSpinLockLock(mutex); -#else + if (isthreaded) pthread_mutex_lock(mutex); -#endif - } } JEMALLOC_INLINE bool malloc_mutex_trylock(malloc_mutex_t *mutex) { - if (isthreaded) { -#ifdef JEMALLOC_OSSPIN - return (OSSpinLockTry(mutex) == false); -#else + if (isthreaded) return (pthread_mutex_trylock(mutex) != 0); -#endif - } else + else return (false); } @@ -72,13 +59,8 @@ JEMALLOC_INLINE void malloc_mutex_unlock(malloc_mutex_t *mutex) { - if (isthreaded) { -#ifdef JEMALLOC_OSSPIN - OSSpinLockUnlock(mutex); -#else + if (isthreaded) pthread_mutex_unlock(mutex); -#endif - } } #endif diff --git a/dep/jemalloc/include/jemalloc/internal/private_namespace.h b/dep/jemalloc/include/jemalloc/internal/private_namespace.h deleted file mode 100644 index d4f5f96d7b2..00000000000 --- a/dep/jemalloc/include/jemalloc/internal/private_namespace.h +++ /dev/null @@ -1,195 +0,0 @@ -#define arena_bin_index JEMALLOC_N(arena_bin_index) -#define arena_boot JEMALLOC_N(arena_boot) -#define arena_dalloc JEMALLOC_N(arena_dalloc) -#define arena_dalloc_bin JEMALLOC_N(arena_dalloc_bin) -#define arena_dalloc_large JEMALLOC_N(arena_dalloc_large) -#define arena_malloc JEMALLOC_N(arena_malloc) -#define arena_malloc_large JEMALLOC_N(arena_malloc_large) -#define arena_malloc_small JEMALLOC_N(arena_malloc_small) -#define arena_new JEMALLOC_N(arena_new) -#define arena_palloc JEMALLOC_N(arena_palloc) -#define arena_prof_accum JEMALLOC_N(arena_prof_accum) -#define arena_prof_ctx_get JEMALLOC_N(arena_prof_ctx_get) -#define arena_prof_ctx_set JEMALLOC_N(arena_prof_ctx_set) -#define arena_prof_promoted JEMALLOC_N(arena_prof_promoted) -#define arena_purge_all JEMALLOC_N(arena_purge_all) -#define arena_ralloc JEMALLOC_N(arena_ralloc) -#define arena_ralloc_no_move JEMALLOC_N(arena_ralloc_no_move) -#define arena_run_regind JEMALLOC_N(arena_run_regind) -#define arena_salloc JEMALLOC_N(arena_salloc) -#define arena_salloc_demote JEMALLOC_N(arena_salloc_demote) -#define arena_stats_merge JEMALLOC_N(arena_stats_merge) -#define arena_tcache_fill_small JEMALLOC_N(arena_tcache_fill_small) -#define arenas_bin_i_index JEMALLOC_N(arenas_bin_i_index) -#define arenas_extend JEMALLOC_N(arenas_extend) -#define arenas_lrun_i_index JEMALLOC_N(arenas_lrun_i_index) -#define atomic_add_uint32 JEMALLOC_N(atomic_add_uint32) -#define atomic_add_uint64 JEMALLOC_N(atomic_add_uint64) -#define atomic_sub_uint32 JEMALLOC_N(atomic_sub_uint32) -#define atomic_sub_uint64 JEMALLOC_N(atomic_sub_uint64) -#define base_alloc JEMALLOC_N(base_alloc) -#define base_boot JEMALLOC_N(base_boot) -#define base_node_alloc JEMALLOC_N(base_node_alloc) -#define base_node_dealloc JEMALLOC_N(base_node_dealloc) -#define bitmap_full JEMALLOC_N(bitmap_full) -#define bitmap_get JEMALLOC_N(bitmap_get) -#define bitmap_info_init JEMALLOC_N(bitmap_info_init) -#define bitmap_info_ngroups JEMALLOC_N(bitmap_info_ngroups) -#define bitmap_init JEMALLOC_N(bitmap_init) -#define bitmap_set JEMALLOC_N(bitmap_set) -#define bitmap_sfu JEMALLOC_N(bitmap_sfu) -#define bitmap_size JEMALLOC_N(bitmap_size) -#define bitmap_unset JEMALLOC_N(bitmap_unset) -#define bt_init JEMALLOC_N(bt_init) -#define buferror JEMALLOC_N(buferror) -#define choose_arena JEMALLOC_N(choose_arena) -#define choose_arena_hard JEMALLOC_N(choose_arena_hard) -#define chunk_alloc JEMALLOC_N(chunk_alloc) -#define chunk_alloc_dss JEMALLOC_N(chunk_alloc_dss) -#define chunk_alloc_mmap JEMALLOC_N(chunk_alloc_mmap) -#define chunk_alloc_mmap_noreserve JEMALLOC_N(chunk_alloc_mmap_noreserve) -#define chunk_alloc_swap JEMALLOC_N(chunk_alloc_swap) -#define chunk_boot JEMALLOC_N(chunk_boot) -#define chunk_dealloc JEMALLOC_N(chunk_dealloc) -#define chunk_dealloc_dss JEMALLOC_N(chunk_dealloc_dss) -#define chunk_dealloc_mmap JEMALLOC_N(chunk_dealloc_mmap) -#define chunk_dealloc_swap JEMALLOC_N(chunk_dealloc_swap) -#define chunk_dss_boot JEMALLOC_N(chunk_dss_boot) -#define chunk_in_dss JEMALLOC_N(chunk_in_dss) -#define chunk_in_swap JEMALLOC_N(chunk_in_swap) -#define chunk_mmap_boot JEMALLOC_N(chunk_mmap_boot) -#define chunk_swap_boot JEMALLOC_N(chunk_swap_boot) -#define chunk_swap_enable JEMALLOC_N(chunk_swap_enable) -#define ckh_bucket_search JEMALLOC_N(ckh_bucket_search) -#define ckh_count JEMALLOC_N(ckh_count) -#define ckh_delete JEMALLOC_N(ckh_delete) -#define ckh_evict_reloc_insert JEMALLOC_N(ckh_evict_reloc_insert) -#define ckh_insert JEMALLOC_N(ckh_insert) -#define ckh_isearch JEMALLOC_N(ckh_isearch) -#define ckh_iter JEMALLOC_N(ckh_iter) -#define ckh_new JEMALLOC_N(ckh_new) -#define ckh_pointer_hash JEMALLOC_N(ckh_pointer_hash) -#define ckh_pointer_keycomp JEMALLOC_N(ckh_pointer_keycomp) -#define ckh_rebuild JEMALLOC_N(ckh_rebuild) -#define ckh_remove JEMALLOC_N(ckh_remove) -#define ckh_search JEMALLOC_N(ckh_search) -#define ckh_string_hash JEMALLOC_N(ckh_string_hash) -#define ckh_string_keycomp JEMALLOC_N(ckh_string_keycomp) -#define ckh_try_bucket_insert JEMALLOC_N(ckh_try_bucket_insert) -#define ckh_try_insert JEMALLOC_N(ckh_try_insert) -#define create_zone JEMALLOC_N(create_zone) -#define ctl_boot JEMALLOC_N(ctl_boot) -#define ctl_bymib JEMALLOC_N(ctl_bymib) -#define ctl_byname JEMALLOC_N(ctl_byname) -#define ctl_nametomib JEMALLOC_N(ctl_nametomib) -#define extent_tree_ad_first JEMALLOC_N(extent_tree_ad_first) -#define extent_tree_ad_insert JEMALLOC_N(extent_tree_ad_insert) -#define extent_tree_ad_iter JEMALLOC_N(extent_tree_ad_iter) -#define extent_tree_ad_iter_recurse JEMALLOC_N(extent_tree_ad_iter_recurse) -#define extent_tree_ad_iter_start JEMALLOC_N(extent_tree_ad_iter_start) -#define extent_tree_ad_last JEMALLOC_N(extent_tree_ad_last) -#define extent_tree_ad_new JEMALLOC_N(extent_tree_ad_new) -#define extent_tree_ad_next JEMALLOC_N(extent_tree_ad_next) -#define extent_tree_ad_nsearch JEMALLOC_N(extent_tree_ad_nsearch) -#define extent_tree_ad_prev JEMALLOC_N(extent_tree_ad_prev) -#define extent_tree_ad_psearch JEMALLOC_N(extent_tree_ad_psearch) -#define extent_tree_ad_remove JEMALLOC_N(extent_tree_ad_remove) -#define extent_tree_ad_reverse_iter JEMALLOC_N(extent_tree_ad_reverse_iter) -#define extent_tree_ad_reverse_iter_recurse JEMALLOC_N(extent_tree_ad_reverse_iter_recurse) -#define extent_tree_ad_reverse_iter_start JEMALLOC_N(extent_tree_ad_reverse_iter_start) -#define extent_tree_ad_search JEMALLOC_N(extent_tree_ad_search) -#define extent_tree_szad_first JEMALLOC_N(extent_tree_szad_first) -#define extent_tree_szad_insert JEMALLOC_N(extent_tree_szad_insert) -#define extent_tree_szad_iter JEMALLOC_N(extent_tree_szad_iter) -#define extent_tree_szad_iter_recurse JEMALLOC_N(extent_tree_szad_iter_recurse) -#define extent_tree_szad_iter_start JEMALLOC_N(extent_tree_szad_iter_start) -#define extent_tree_szad_last JEMALLOC_N(extent_tree_szad_last) -#define extent_tree_szad_new JEMALLOC_N(extent_tree_szad_new) -#define extent_tree_szad_next JEMALLOC_N(extent_tree_szad_next) -#define extent_tree_szad_nsearch JEMALLOC_N(extent_tree_szad_nsearch) -#define extent_tree_szad_prev JEMALLOC_N(extent_tree_szad_prev) -#define extent_tree_szad_psearch JEMALLOC_N(extent_tree_szad_psearch) -#define extent_tree_szad_remove JEMALLOC_N(extent_tree_szad_remove) -#define extent_tree_szad_reverse_iter JEMALLOC_N(extent_tree_szad_reverse_iter) -#define extent_tree_szad_reverse_iter_recurse JEMALLOC_N(extent_tree_szad_reverse_iter_recurse) -#define extent_tree_szad_reverse_iter_start JEMALLOC_N(extent_tree_szad_reverse_iter_start) -#define extent_tree_szad_search JEMALLOC_N(extent_tree_szad_search) -#define hash JEMALLOC_N(hash) -#define huge_boot JEMALLOC_N(huge_boot) -#define huge_dalloc JEMALLOC_N(huge_dalloc) -#define huge_malloc JEMALLOC_N(huge_malloc) -#define huge_palloc JEMALLOC_N(huge_palloc) -#define huge_prof_ctx_get JEMALLOC_N(huge_prof_ctx_get) -#define huge_prof_ctx_set JEMALLOC_N(huge_prof_ctx_set) -#define huge_ralloc JEMALLOC_N(huge_ralloc) -#define huge_ralloc_no_move JEMALLOC_N(huge_ralloc_no_move) -#define huge_salloc JEMALLOC_N(huge_salloc) -#define iallocm JEMALLOC_N(iallocm) -#define icalloc JEMALLOC_N(icalloc) -#define idalloc JEMALLOC_N(idalloc) -#define imalloc JEMALLOC_N(imalloc) -#define ipalloc JEMALLOC_N(ipalloc) -#define iralloc JEMALLOC_N(iralloc) -#define isalloc JEMALLOC_N(isalloc) -#define ivsalloc JEMALLOC_N(ivsalloc) -#define jemalloc_darwin_init JEMALLOC_N(jemalloc_darwin_init) -#define jemalloc_postfork JEMALLOC_N(jemalloc_postfork) -#define jemalloc_prefork JEMALLOC_N(jemalloc_prefork) -#define malloc_cprintf JEMALLOC_N(malloc_cprintf) -#define malloc_mutex_destroy JEMALLOC_N(malloc_mutex_destroy) -#define malloc_mutex_init JEMALLOC_N(malloc_mutex_init) -#define malloc_mutex_lock JEMALLOC_N(malloc_mutex_lock) -#define malloc_mutex_trylock JEMALLOC_N(malloc_mutex_trylock) -#define malloc_mutex_unlock JEMALLOC_N(malloc_mutex_unlock) -#define malloc_printf JEMALLOC_N(malloc_printf) -#define malloc_write JEMALLOC_N(malloc_write) -#define mb_write JEMALLOC_N(mb_write) -#define pow2_ceil JEMALLOC_N(pow2_ceil) -#define prof_backtrace JEMALLOC_N(prof_backtrace) -#define prof_boot0 JEMALLOC_N(prof_boot0) -#define prof_boot1 JEMALLOC_N(prof_boot1) -#define prof_boot2 JEMALLOC_N(prof_boot2) -#define prof_ctx_get JEMALLOC_N(prof_ctx_get) -#define prof_ctx_set JEMALLOC_N(prof_ctx_set) -#define prof_free JEMALLOC_N(prof_free) -#define prof_gdump JEMALLOC_N(prof_gdump) -#define prof_idump JEMALLOC_N(prof_idump) -#define prof_lookup JEMALLOC_N(prof_lookup) -#define prof_malloc JEMALLOC_N(prof_malloc) -#define prof_mdump JEMALLOC_N(prof_mdump) -#define prof_realloc JEMALLOC_N(prof_realloc) -#define prof_sample_accum_update JEMALLOC_N(prof_sample_accum_update) -#define prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update) -#define prof_tdata_init JEMALLOC_N(prof_tdata_init) -#define pthread_create JEMALLOC_N(pthread_create) -#define rtree_get JEMALLOC_N(rtree_get) -#define rtree_get_locked JEMALLOC_N(rtree_get_locked) -#define rtree_new JEMALLOC_N(rtree_new) -#define rtree_set JEMALLOC_N(rtree_set) -#define s2u JEMALLOC_N(s2u) -#define sa2u JEMALLOC_N(sa2u) -#define stats_arenas_i_bins_j_index JEMALLOC_N(stats_arenas_i_bins_j_index) -#define stats_arenas_i_index JEMALLOC_N(stats_arenas_i_index) -#define stats_arenas_i_lruns_j_index JEMALLOC_N(stats_arenas_i_lruns_j_index) -#define stats_cactive_add JEMALLOC_N(stats_cactive_add) -#define stats_cactive_get JEMALLOC_N(stats_cactive_get) -#define stats_cactive_sub JEMALLOC_N(stats_cactive_sub) -#define stats_print JEMALLOC_N(stats_print) -#define szone2ozone JEMALLOC_N(szone2ozone) -#define tcache_alloc_easy JEMALLOC_N(tcache_alloc_easy) -#define tcache_alloc_large JEMALLOC_N(tcache_alloc_large) -#define tcache_alloc_small JEMALLOC_N(tcache_alloc_small) -#define tcache_alloc_small_hard JEMALLOC_N(tcache_alloc_small_hard) -#define tcache_bin_flush_large JEMALLOC_N(tcache_bin_flush_large) -#define tcache_bin_flush_small JEMALLOC_N(tcache_bin_flush_small) -#define tcache_boot JEMALLOC_N(tcache_boot) -#define tcache_create JEMALLOC_N(tcache_create) -#define tcache_dalloc_large JEMALLOC_N(tcache_dalloc_large) -#define tcache_dalloc_small JEMALLOC_N(tcache_dalloc_small) -#define tcache_destroy JEMALLOC_N(tcache_destroy) -#define tcache_event JEMALLOC_N(tcache_event) -#define tcache_get JEMALLOC_N(tcache_get) -#define tcache_stats_merge JEMALLOC_N(tcache_stats_merge) -#define thread_allocated_get JEMALLOC_N(thread_allocated_get) -#define thread_allocated_get_hard JEMALLOC_N(thread_allocated_get_hard) -#define u2s JEMALLOC_N(u2s) diff --git a/dep/jemalloc/include/jemalloc/internal/prof.h b/dep/jemalloc/include/jemalloc/internal/prof.h index e9064ba6e73..7864000b88b 100644 --- a/dep/jemalloc/include/jemalloc/internal/prof.h +++ b/dep/jemalloc/include/jemalloc/internal/prof.h @@ -227,60 +227,9 @@ bool prof_boot2(void); /******************************************************************************/ #ifdef JEMALLOC_H_INLINES -#define PROF_ALLOC_PREP(nignore, size, ret) do { \ - prof_tdata_t *prof_tdata; \ - prof_bt_t bt; \ - \ - assert(size == s2u(size)); \ - \ - prof_tdata = PROF_TCACHE_GET(); \ - if (prof_tdata == NULL) { \ - prof_tdata = prof_tdata_init(); \ - if (prof_tdata == NULL) { \ - ret = NULL; \ - break; \ - } \ - } \ - \ - if (opt_prof_active == false) { \ - /* Sampling is currently inactive, so avoid sampling. */\ - ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ - } else if (opt_lg_prof_sample == 0) { \ - /* Don't bother with sampling logic, since sampling */\ - /* interval is 1. */\ - bt_init(&bt, prof_tdata->vec); \ - prof_backtrace(&bt, nignore, prof_bt_max); \ - ret = prof_lookup(&bt); \ - } else { \ - if (prof_tdata->threshold == 0) { \ - /* Initialize. Seed the prng differently for */\ - /* each thread. */\ - prof_tdata->prn_state = \ - (uint64_t)(uintptr_t)&size; \ - prof_sample_threshold_update(prof_tdata); \ - } \ - \ - /* Determine whether to capture a backtrace based on */\ - /* whether size is enough for prof_accum to reach */\ - /* prof_tdata->threshold. However, delay updating */\ - /* these variables until prof_{m,re}alloc(), because */\ - /* we don't know for sure that the allocation will */\ - /* succeed. */\ - /* */\ - /* Use subtraction rather than addition to avoid */\ - /* potential integer overflow. */\ - if (size >= prof_tdata->threshold - \ - prof_tdata->accum) { \ - bt_init(&bt, prof_tdata->vec); \ - prof_backtrace(&bt, nignore, prof_bt_max); \ - ret = prof_lookup(&bt); \ - } else \ - ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ - } \ -} while (0) - #ifndef JEMALLOC_ENABLE_INLINE void prof_sample_threshold_update(prof_tdata_t *prof_tdata); +prof_thr_cnt_t *prof_alloc_prep(size_t size); prof_ctx_t *prof_ctx_get(const void *ptr); void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); bool prof_sample_accum_update(size_t size); @@ -298,22 +247,8 @@ prof_sample_threshold_update(prof_tdata_t *prof_tdata) double u; /* - * Compute sample threshold as a geometrically distributed random + * Compute prof_sample_threshold as a geometrically distributed random * variable with mean (2^opt_lg_prof_sample). - * - * __ __ - * | log(u) | 1 - * prof_tdata->threshold = | -------- |, where p = ------------------- - * | log(1-p) | opt_lg_prof_sample - * 2 - * - * For more information on the math, see: - * - * Non-Uniform Random Variate Generation - * Luc Devroye - * Springer-Verlag, New York, 1986 - * pp 500 - * (http://cg.scs.carleton.ca/~luc/rnbookindex.html) */ prn64(r, 53, prof_tdata->prn_state, (uint64_t)6364136223846793005LLU, (uint64_t)1442695040888963407LLU); @@ -323,6 +258,71 @@ prof_sample_threshold_update(prof_tdata_t *prof_tdata) + (uint64_t)1U; } +JEMALLOC_INLINE prof_thr_cnt_t * +prof_alloc_prep(size_t size) +{ +#ifdef JEMALLOC_ENABLE_INLINE + /* This function does not have its own stack frame, because it is inlined. */ +# define NIGNORE 1 +#else +# define NIGNORE 2 +#endif + prof_thr_cnt_t *ret; + prof_tdata_t *prof_tdata; + prof_bt_t bt; + + assert(size == s2u(size)); + + prof_tdata = PROF_TCACHE_GET(); + if (prof_tdata == NULL) { + prof_tdata = prof_tdata_init(); + if (prof_tdata == NULL) + return (NULL); + } + + if (opt_prof_active == false) { + /* Sampling is currently inactive, so avoid sampling. */ + ret = (prof_thr_cnt_t *)(uintptr_t)1U; + } else if (opt_lg_prof_sample == 0) { + /* + * Don't bother with sampling logic, since sampling interval is + * 1. + */ + bt_init(&bt, prof_tdata->vec); + prof_backtrace(&bt, NIGNORE, prof_bt_max); + ret = prof_lookup(&bt); + } else { + if (prof_tdata->threshold == 0) { + /* + * Initialize. Seed the prng differently for each + * thread. + */ + prof_tdata->prn_state = (uint64_t)(uintptr_t)&size; + prof_sample_threshold_update(prof_tdata); + } + + /* + * Determine whether to capture a backtrace based on whether + * size is enough for prof_accum to reach + * prof_tdata->threshold. However, delay updating these + * variables until prof_{m,re}alloc(), because we don't know + * for sure that the allocation will succeed. + * + * Use subtraction rather than addition to avoid potential + * integer overflow. + */ + if (size >= prof_tdata->threshold - prof_tdata->accum) { + bt_init(&bt, prof_tdata->vec); + prof_backtrace(&bt, NIGNORE, prof_bt_max); + ret = prof_lookup(&bt); + } else + ret = (prof_thr_cnt_t *)(uintptr_t)1U; + } + + return (ret); +#undef NIGNORE +} + JEMALLOC_INLINE prof_ctx_t * prof_ctx_get(const void *ptr) { @@ -334,7 +334,7 @@ prof_ctx_get(const void *ptr) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - dassert(chunk->arena->magic == ARENA_MAGIC); + assert(chunk->arena->magic == ARENA_MAGIC); ret = arena_prof_ctx_get(ptr); } else @@ -353,7 +353,7 @@ prof_ctx_set(const void *ptr, prof_ctx_t *ctx) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - dassert(chunk->arena->magic == ARENA_MAGIC); + assert(chunk->arena->magic == ARENA_MAGIC); arena_prof_ctx_set(ptr, ctx); } else @@ -374,7 +374,7 @@ prof_sample_accum_update(size_t size) /* Take care to avoid integer overflow. */ if (size >= prof_tdata->threshold - prof_tdata->accum) { prof_tdata->accum -= (prof_tdata->threshold - size); - /* Compute new sample threshold. */ + /* Compute new prof_sample_threshold. */ prof_sample_threshold_update(prof_tdata); while (prof_tdata->accum >= prof_tdata->threshold) { prof_tdata->accum -= prof_tdata->threshold; @@ -401,7 +401,7 @@ prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) * always possible to tell in advance how large an * object's usable size will be, so there should never * be a difference between the size passed to - * PROF_ALLOC_PREP() and prof_malloc(). + * prof_alloc_prep() and prof_malloc(). */ assert((uintptr_t)cnt == (uintptr_t)1U); } @@ -445,7 +445,7 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, if (prof_sample_accum_update(size)) { /* * Don't sample. The size passed to - * PROF_ALLOC_PREP() was larger than what + * prof_alloc_prep() was larger than what * actually got allocated, so a backtrace was * captured for this allocation, even though * its actual size was insufficient to cross diff --git a/dep/jemalloc/include/jemalloc/internal/rtree.h b/dep/jemalloc/include/jemalloc/internal/rtree.h index 95d6355a5f4..9d58ebac545 100644 --- a/dep/jemalloc/include/jemalloc/internal/rtree.h +++ b/dep/jemalloc/include/jemalloc/internal/rtree.h @@ -49,7 +49,7 @@ void *rtree_get(rtree_t *rtree, uintptr_t key); bool rtree_set(rtree_t *rtree, uintptr_t key, void *val); #endif -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_)) +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(RTREE_C_)) #define RTREE_GET_GENERATE(f) \ /* The least significant bits of the key are ignored. */ \ JEMALLOC_INLINE void * \ diff --git a/dep/jemalloc/include/jemalloc/internal/stats.h b/dep/jemalloc/include/jemalloc/internal/stats.h index 2a9b31d9ffc..3fc2080a34b 100644 --- a/dep/jemalloc/include/jemalloc/internal/stats.h +++ b/dep/jemalloc/include/jemalloc/internal/stats.h @@ -154,10 +154,6 @@ struct chunk_stats_s { extern bool opt_stats_print; -#ifdef JEMALLOC_STATS -extern size_t stats_cactive; -#endif - char *u2s(uint64_t x, unsigned base, char *s); #ifdef JEMALLOC_STATS void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque, @@ -170,38 +166,9 @@ void stats_print(void (*write)(void *, const char *), void *cbopaque, #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ -#ifdef JEMALLOC_H_INLINES #ifdef JEMALLOC_STATS +#ifdef JEMALLOC_H_INLINES -#ifndef JEMALLOC_ENABLE_INLINE -size_t stats_cactive_get(void); -void stats_cactive_add(size_t size); -void stats_cactive_sub(size_t size); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_STATS_C_)) -JEMALLOC_INLINE size_t -stats_cactive_get(void) -{ - - return (atomic_read_z(&stats_cactive)); -} - -JEMALLOC_INLINE void -stats_cactive_add(size_t size) -{ - - atomic_add_z(&stats_cactive, size); -} - -JEMALLOC_INLINE void -stats_cactive_sub(size_t size) -{ - - atomic_sub_z(&stats_cactive, size); -} -#endif - -#endif /* JEMALLOC_STATS */ #endif /* JEMALLOC_H_INLINES */ +#endif /* JEMALLOC_STATS */ /******************************************************************************/ diff --git a/dep/jemalloc/include/jemalloc/internal/tcache.h b/dep/jemalloc/include/jemalloc/internal/tcache.h index da3c68c5770..1ad91a9b1e0 100644 --- a/dep/jemalloc/include/jemalloc/internal/tcache.h +++ b/dep/jemalloc/include/jemalloc/internal/tcache.h @@ -2,7 +2,6 @@ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES -typedef struct tcache_bin_info_s tcache_bin_info_t; typedef struct tcache_bin_s tcache_bin_t; typedef struct tcache_s tcache_t; @@ -33,22 +32,15 @@ typedef struct tcache_s tcache_t; /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS -/* - * Read-only information associated with each element of tcache_t's tbins array - * is stored separately, mainly to reduce memory usage. - */ -struct tcache_bin_info_s { - unsigned ncached_max; /* Upper limit on ncached. */ -}; - struct tcache_bin_s { # ifdef JEMALLOC_STATS tcache_bin_stats_t tstats; # endif - int low_water; /* Min # cached since last GC. */ - unsigned lg_fill_div; /* Fill (ncached_max >> lg_fill_div). */ + unsigned low_water; /* Min # cached since last GC. */ + unsigned high_water; /* Max # cached since last GC. */ unsigned ncached; /* # of cached objects. */ - void **avail; /* Stack of available objects. */ + unsigned ncached_max; /* Upper limit on ncached. */ + void *avail; /* Chain of available objects. */ }; struct tcache_s { @@ -62,12 +54,6 @@ struct tcache_s { unsigned ev_cnt; /* Event count since incremental GC. */ unsigned next_gc_bin; /* Next bin to GC. */ tcache_bin_t tbins[1]; /* Dynamically sized. */ - /* - * The pointer stacks associated with tbins follow as a contiguous - * array. During tcache initialization, the avail pointer in each - * element of tbins is initialized to point to the proper offset within - * this array. - */ }; #endif /* JEMALLOC_H_STRUCTS */ @@ -78,8 +64,6 @@ extern bool opt_tcache; extern ssize_t opt_lg_tcache_max; extern ssize_t opt_lg_tcache_gc_sweep; -extern tcache_bin_info_t *tcache_bin_info; - /* Map of thread-specific caches. */ #ifndef NO_TLS extern __thread tcache_t *tcache_tls @@ -126,7 +110,7 @@ void tcache_destroy(tcache_t *tcache); #ifdef JEMALLOC_STATS void tcache_stats_merge(tcache_t *tcache, arena_t *arena); #endif -bool tcache_boot(void); +void tcache_boot(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ @@ -185,7 +169,6 @@ tcache_event(tcache_t *tcache) if (tcache->ev_cnt == tcache_gc_incr) { size_t binind = tcache->next_gc_bin; tcache_bin_t *tbin = &tcache->tbins[binind]; - tcache_bin_info_t *tbin_info = &tcache_bin_info[binind]; if (tbin->low_water > 0) { /* @@ -209,22 +192,9 @@ tcache_event(tcache_t *tcache) #endif ); } - /* - * Reduce fill count by 2X. Limit lg_fill_div such that - * the fill count is always at least 1. - */ - if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1)) - >= 1) - tbin->lg_fill_div++; - } else if (tbin->low_water < 0) { - /* - * Increase fill count by 2X. Make sure lg_fill_div - * stays greater than 0. - */ - if (tbin->lg_fill_div > 1) - tbin->lg_fill_div--; } tbin->low_water = tbin->ncached; + tbin->high_water = tbin->ncached; tcache->next_gc_bin++; if (tcache->next_gc_bin == nhbins) @@ -238,14 +208,13 @@ tcache_alloc_easy(tcache_bin_t *tbin) { void *ret; - if (tbin->ncached == 0) { - tbin->low_water = -1; + if (tbin->ncached == 0) return (NULL); - } tbin->ncached--; - if ((int)tbin->ncached < tbin->low_water) + if (tbin->ncached < tbin->low_water) tbin->low_water = tbin->ncached; - ret = tbin->avail[tbin->ncached]; + ret = tbin->avail; + tbin->avail = *(void **)ret; return (ret); } @@ -256,7 +225,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) size_t binind; tcache_bin_t *tbin; - binind = SMALL_SIZE2BIN(size); + binind = small_size2bin[size]; assert(binind < nbins); tbin = &tcache->tbins[binind]; ret = tcache_alloc_easy(tbin); @@ -265,7 +234,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) if (ret == NULL) return (NULL); } - assert(arena_salloc(ret) == arena_bin_info[binind].reg_size); + assert(arena_salloc(ret) == tcache->arena->bins[binind].reg_size); if (zero == false) { #ifdef JEMALLOC_FILL @@ -281,7 +250,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) tbin->tstats.nrequests++; #endif #ifdef JEMALLOC_PROF - tcache->prof_accumbytes += arena_bin_info[binind].reg_size; + tcache->prof_accumbytes += tcache->arena->bins[binind].reg_size; #endif tcache_event(tcache); return (ret); @@ -345,7 +314,6 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) arena_run_t *run; arena_bin_t *bin; tcache_bin_t *tbin; - tcache_bin_info_t *tbin_info; size_t pageind, binind; arena_chunk_map_t *mapelm; @@ -357,7 +325,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) mapelm = &chunk->map[pageind-map_bias]; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); - dassert(run->magic == ARENA_RUN_MAGIC); + assert(run->magic == ARENA_RUN_MAGIC); bin = run->bin; binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) / sizeof(arena_bin_t); @@ -365,22 +333,23 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) #ifdef JEMALLOC_FILL if (opt_junk) - memset(ptr, 0x5a, arena_bin_info[binind].reg_size); + memset(ptr, 0x5a, bin->reg_size); #endif tbin = &tcache->tbins[binind]; - tbin_info = &tcache_bin_info[binind]; - if (tbin->ncached == tbin_info->ncached_max) { - tcache_bin_flush_small(tbin, binind, (tbin_info->ncached_max >> - 1) + if (tbin->ncached == tbin->ncached_max) { + tcache_bin_flush_small(tbin, binind, (tbin->ncached_max >> 1) #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) , tcache #endif ); } - assert(tbin->ncached < tbin_info->ncached_max); - tbin->avail[tbin->ncached] = ptr; + assert(tbin->ncached < tbin->ncached_max); + *(void **)ptr = tbin->avail; + tbin->avail = ptr; tbin->ncached++; + if (tbin->ncached > tbin->high_water) + tbin->high_water = tbin->ncached; tcache_event(tcache); } @@ -392,7 +361,6 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) arena_chunk_t *chunk; size_t pageind, binind; tcache_bin_t *tbin; - tcache_bin_info_t *tbin_info; assert((size & PAGE_MASK) == 0); assert(arena_salloc(ptr) > small_maxclass); @@ -409,18 +377,19 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) #endif tbin = &tcache->tbins[binind]; - tbin_info = &tcache_bin_info[binind]; - if (tbin->ncached == tbin_info->ncached_max) { - tcache_bin_flush_large(tbin, binind, (tbin_info->ncached_max >> - 1) + if (tbin->ncached == tbin->ncached_max) { + tcache_bin_flush_large(tbin, binind, (tbin->ncached_max >> 1) #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) , tcache #endif ); } - assert(tbin->ncached < tbin_info->ncached_max); - tbin->avail[tbin->ncached] = ptr; + assert(tbin->ncached < tbin->ncached_max); + *(void **)ptr = tbin->avail; + tbin->avail = ptr; tbin->ncached++; + if (tbin->ncached > tbin->high_water) + tbin->high_water = tbin->ncached; tcache_event(tcache); } diff --git a/dep/jemalloc/include/jemalloc/jemalloc.h b/dep/jemalloc/include/jemalloc/jemalloc.h index 3842e28115e..287dac46ed2 100644 --- a/dep/jemalloc/include/jemalloc/jemalloc.h +++ b/dep/jemalloc/include/jemalloc/jemalloc.h @@ -7,19 +7,19 @@ extern "C" { #include <limits.h> #include <strings.h> -#define JEMALLOC_VERSION "2.2.5-0-gfc1bb70e5f0d9a58b39efa39cc549b5af5104760" +#define JEMALLOC_VERSION "2.1.0-0-g1c4b088b08d3bc7617a34387e196ce03716160bf" #define JEMALLOC_VERSION_MAJOR 2 -#define JEMALLOC_VERSION_MINOR 2 -#define JEMALLOC_VERSION_BUGFIX 5 +#define JEMALLOC_VERSION_MINOR 1 +#define JEMALLOC_VERSION_BUGFIX 0 #define JEMALLOC_VERSION_NREV 0 -#define JEMALLOC_VERSION_GID "fc1bb70e5f0d9a58b39efa39cc549b5af5104760" +#define JEMALLOC_VERSION_GID "1c4b088b08d3bc7617a34387e196ce03716160bf" #include "jemalloc_defs.h" #ifndef JEMALLOC_P # define JEMALLOC_P(s) s #endif -#define ALLOCM_LG_ALIGN(la) (la) +#define ALLOCM_LG_ALIGN ((int)0x3f) #if LG_SIZEOF_PTR == 2 #define ALLOCM_ALIGN(a) (ffs(a)-1) #else diff --git a/dep/jemalloc/include/jemalloc/jemalloc_defs.h b/dep/jemalloc/include/jemalloc/jemalloc_defs.h index f0f8fa71a4d..a641b56da03 100644 --- a/dep/jemalloc/include/jemalloc/jemalloc_defs.h +++ b/dep/jemalloc/include/jemalloc/jemalloc_defs.h @@ -20,32 +20,11 @@ #endif /* - * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. - * For shared libraries, symbol visibility mechanisms prevent these symbols - * from being exported, but for static libraries, naming collisions are a real - * possibility. - */ -#define JEMALLOC_PRIVATE_NAMESPACE "" -#define JEMALLOC_N(string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix) string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix - -/* * Hyper-threaded CPUs may need a special instruction inside spin loops in * order to yield to another virtual CPU. */ #define CPU_SPINWAIT __asm__ volatile("pause") -/* - * Defined if OSAtomic*() functions are available, as provided by Darwin, and - * documented in the atomic(3) manual page. - */ -/* #undef JEMALLOC_OSATOMIC */ - -/* - * Defined if OSSpin*() functions are available, as provided by Darwin, and - * documented in the spinlock(3) manual page. - */ -/* #undef JEMALLOC_OSSPIN */ - /* Defined if __attribute__((...)) syntax is supported. */ #define JEMALLOC_HAVE_ATTR #ifdef JEMALLOC_HAVE_ATTR @@ -75,21 +54,18 @@ /* Use libgcc for profile backtracing if defined. */ /* #undef JEMALLOC_PROF_LIBGCC */ -/* Use gcc intrinsics for profile backtracing if defined. */ -/* #undef JEMALLOC_PROF_GCC */ - /* * JEMALLOC_TINY enables support for tiny objects, which are smaller than one * quantum. */ -#define JEMALLOC_TINY +/* #undef JEMALLOC_TINY */ /* * JEMALLOC_TCACHE enables a thread-specific caching layer for small objects. * This makes it possible to allocate/deallocate objects without any locking * when the cache is in the steady state. */ -#define JEMALLOC_TCACHE +/* #undef JEMALLOC_TCACHE */ /* * JEMALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage @@ -110,7 +86,7 @@ /* #undef JEMALLOC_SYSV */ /* Support lazy locking (avoid locking unless a second thread is launched). */ -#define JEMALLOC_LAZY_LOCK +/* #undef JEMALLOC_LAZY_LOCK */ /* Determine page size at run time if defined. */ /* #undef DYNAMIC_PAGE_SHIFT */ @@ -157,12 +133,9 @@ /* #undef JEMALLOC_PURGE_MADVISE_FREE */ /* sizeof(void *) == 2^LG_SIZEOF_PTR. */ -#define LG_SIZEOF_PTR 3 +#define LG_SIZEOF_PTR 2 /* sizeof(int) == 2^LG_SIZEOF_INT. */ #define LG_SIZEOF_INT 2 -/* sizeof(long) == 2^LG_SIZEOF_LONG. */ -#define LG_SIZEOF_LONG 3 - #endif /* JEMALLOC_DEFS_H_ */ diff --git a/dep/jemalloc/src/arena.c b/dep/jemalloc/src/arena.c index d166ca1ec4d..7f939b3cd77 100644 --- a/dep/jemalloc/src/arena.c +++ b/dep/jemalloc/src/arena.c @@ -8,7 +8,6 @@ size_t opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT; size_t opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT; ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT; uint8_t const *small_size2bin; -arena_bin_info_t *arena_bin_info; /* Various bin-related settings. */ unsigned nqbins; @@ -26,27 +25,26 @@ size_t mspace_mask; /* * const_small_size2bin is a static constant lookup table that in the common - * case can be used as-is for small_size2bin. + * case can be used as-is for small_size2bin. For dynamically linked programs, + * this avoids a page of memory overhead per process. */ -#if (LG_TINY_MIN == 2) -#define S2B_4(i) i, +#define S2B_1(i) i, +#define S2B_2(i) S2B_1(i) S2B_1(i) +#define S2B_4(i) S2B_2(i) S2B_2(i) #define S2B_8(i) S2B_4(i) S2B_4(i) -#elif (LG_TINY_MIN == 3) -#define S2B_8(i) i, -#else -# error "Unsupported LG_TINY_MIN" -#endif #define S2B_16(i) S2B_8(i) S2B_8(i) #define S2B_32(i) S2B_16(i) S2B_16(i) #define S2B_64(i) S2B_32(i) S2B_32(i) #define S2B_128(i) S2B_64(i) S2B_64(i) #define S2B_256(i) S2B_128(i) S2B_128(i) /* - * The number of elements in const_small_size2bin is dependent on the - * definition for SUBPAGE. + * The number of elements in const_small_size2bin is dependent on page size + * and on the definition for SUBPAGE. If SUBPAGE changes, the '- 255' must also + * change, along with the addition/removal of static lookup table element + * definitions. */ -static JEMALLOC_ATTR(aligned(CACHELINE)) - const uint8_t const_small_size2bin[] = { +static const uint8_t const_small_size2bin[STATIC_PAGE_SIZE - 255] = { + S2B_1(0xffU) /* 0 */ #if (LG_QUANTUM == 4) /* 16-byte quantum **********************/ # ifdef JEMALLOC_TINY @@ -175,6 +173,7 @@ static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, size_t oldsize, size_t newsize, bool dirty); static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin); static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin); +static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size); static void arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin); static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, @@ -192,9 +191,6 @@ static bool small_size2bin_init(void); static void small_size2bin_validate(void); #endif static bool small_size2bin_init_hard(void); -static size_t bin_info_run_size_calc(arena_bin_info_t *bin_info, - size_t min_run_size); -static bool bin_info_init(void); /******************************************************************************/ @@ -250,48 +246,57 @@ rb_gen(static JEMALLOC_ATTR(unused), arena_avail_tree_, arena_avail_tree_t, arena_chunk_map_t, u.rb_link, arena_avail_comp) static inline void * -arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) +arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin) { void *ret; - unsigned regind; - bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + - (uintptr_t)bin_info->bitmap_offset); - dassert(run->magic == ARENA_RUN_MAGIC); + assert(run->magic == ARENA_RUN_MAGIC); assert(run->nfree > 0); - assert(bitmap_full(bitmap, &bin_info->bitmap_info) == false); - regind = bitmap_sfu(bitmap, &bin_info->bitmap_info); - ret = (void *)((uintptr_t)run + (uintptr_t)bin_info->reg0_offset + - (uintptr_t)(bin_info->reg_size * regind)); run->nfree--; - if (regind == run->nextind) - run->nextind++; - assert(regind < run->nextind); + ret = run->avail; + if (ret != NULL) { + /* Double free can cause assertion failure.*/ + assert(ret != NULL); + /* Write-after free can cause assertion failure. */ + assert((uintptr_t)ret >= (uintptr_t)run + + (uintptr_t)bin->reg0_offset); + assert((uintptr_t)ret < (uintptr_t)run->next); + assert(((uintptr_t)ret - ((uintptr_t)run + + (uintptr_t)bin->reg0_offset)) % (uintptr_t)bin->reg_size == + 0); + run->avail = *(void **)ret; + return (ret); + } + ret = run->next; + run->next = (void *)((uintptr_t)ret + (uintptr_t)bin->reg_size); + assert(ret != NULL); return (ret); } static inline void arena_run_reg_dalloc(arena_run_t *run, void *ptr) { - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - size_t binind = arena_bin_index(chunk->arena, run->bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; - unsigned regind = arena_run_regind(run, bin_info, ptr); - bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + - (uintptr_t)bin_info->bitmap_offset); - - assert(run->nfree < bin_info->nregs); + + assert(run->nfree < run->bin->nregs); /* Freeing an interior pointer can cause assertion failure. */ assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset)) % (uintptr_t)bin_info->reg_size + (uintptr_t)run->bin->reg0_offset)) % (uintptr_t)run->bin->reg_size == 0); + /* + * Freeing a pointer lower than region zero can cause assertion + * failure. + */ assert((uintptr_t)ptr >= (uintptr_t)run + - (uintptr_t)bin_info->reg0_offset); - /* Freeing an unallocated pointer can cause assertion failure. */ - assert(bitmap_get(bitmap, &bin_info->bitmap_info, regind)); + (uintptr_t)run->bin->reg0_offset); + /* + * Freeing a pointer past in the run's frontier can cause assertion + * failure. + */ + assert((uintptr_t)ptr < (uintptr_t)run->next); - bitmap_unset(bitmap, &bin_info->bitmap_info, regind); + *(void **)ptr = run->avail; + run->avail = ptr; run->nfree++; } @@ -315,9 +320,6 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, size_t old_ndirty, run_ind, total_pages, need_pages, rem_pages, i; size_t flag_dirty; arena_avail_tree_t *runs_avail; -#ifdef JEMALLOC_STATS - size_t cactive_diff; -#endif chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); old_ndirty = chunk->ndirty; @@ -336,13 +338,6 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, rem_pages = total_pages - need_pages; arena_avail_tree_remove(runs_avail, &chunk->map[run_ind-map_bias]); -#ifdef JEMALLOC_STATS - /* Update stats_cactive if nactive is crossing a chunk multiple. */ - cactive_diff = CHUNK_CEILING((arena->nactive + need_pages) << - PAGE_SHIFT) - CHUNK_CEILING(arena->nactive << PAGE_SHIFT); - if (cactive_diff != 0) - stats_cactive_add(cactive_diff); -#endif arena->nactive += need_pages; /* Keep track of trailing unused pages for later use. */ @@ -569,7 +564,7 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) arena->ndirty -= spare->ndirty; } malloc_mutex_unlock(&arena->lock); - chunk_dealloc((void *)spare, chunksize, true); + chunk_dealloc((void *)spare, chunksize); malloc_mutex_lock(&arena->lock); #ifdef JEMALLOC_STATS arena->stats.mapped -= chunksize; @@ -730,9 +725,6 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) assert(pageind + npages <= chunk_npages); if (mapelm->bits & CHUNK_MAP_DIRTY) { size_t i; -#ifdef JEMALLOC_STATS - size_t cactive_diff; -#endif arena_avail_tree_remove( &arena->runs_avail_dirty, mapelm); @@ -755,17 +747,6 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) CHUNK_MAP_ALLOCATED; } -#ifdef JEMALLOC_STATS - /* - * Update stats_cactive if nactive is crossing a - * chunk multiple. - */ - cactive_diff = CHUNK_CEILING((arena->nactive + - npages) << PAGE_SHIFT) - - CHUNK_CEILING(arena->nactive << PAGE_SHIFT); - if (cactive_diff != 0) - stats_cactive_add(cactive_diff); -#endif arena->nactive += npages; /* Append to list for later processing. */ ql_elm_new(mapelm, u.ql_link); @@ -782,12 +763,8 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) chunk + (uintptr_t)(pageind << PAGE_SHIFT)); assert((mapelm->bits >> PAGE_SHIFT) == 0); - dassert(run->magic == ARENA_RUN_MAGIC); - size_t binind = arena_bin_index(arena, - run->bin); - arena_bin_info_t *bin_info = - &arena_bin_info[binind]; - pageind += bin_info->run_size >> PAGE_SHIFT; + assert(run->magic == ARENA_RUN_MAGIC); + pageind += run->bin->run_size >> PAGE_SHIFT; } } } @@ -868,10 +845,9 @@ arena_purge(arena_t *arena, bool all) } assert(ndirty == arena->ndirty); #endif - assert(arena->ndirty > arena->npurgatory || all); - assert(arena->ndirty - arena->npurgatory > chunk_npages || all); - assert((arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty - - arena->npurgatory) || all); + assert(arena->ndirty > arena->npurgatory); + assert(arena->ndirty > chunk_npages || all); + assert((arena->nactive >> opt_lg_dirty_mult) < arena->ndirty || all); #ifdef JEMALLOC_STATS arena->stats.npurge++; @@ -883,10 +859,8 @@ arena_purge(arena_t *arena, bool all) * multiple threads from racing to reduce ndirty below the threshold. */ npurgatory = arena->ndirty - arena->npurgatory; - if (all == false) { - assert(npurgatory >= arena->nactive >> opt_lg_dirty_mult); + if (all == false) npurgatory -= arena->nactive >> opt_lg_dirty_mult; - } arena->npurgatory += npurgatory; while (npurgatory > 0) { @@ -957,9 +931,6 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) arena_chunk_t *chunk; size_t size, run_ind, run_pages, flag_dirty; arena_avail_tree_t *runs_avail; -#ifdef JEMALLOC_STATS - size_t cactive_diff; -#endif chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) @@ -975,19 +946,9 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) CHUNK_MAP_LARGE) != 0); assert((chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits & CHUNK_MAP_ALLOCATED) != 0); - } else { - size_t binind = arena_bin_index(arena, run->bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; - size = bin_info->run_size; - } + } else + size = run->bin->run_size; run_pages = (size >> PAGE_SHIFT); -#ifdef JEMALLOC_STATS - /* Update stats_cactive if nactive is crossing a chunk multiple. */ - cactive_diff = CHUNK_CEILING(arena->nactive << PAGE_SHIFT) - - CHUNK_CEILING((arena->nactive - run_pages) << PAGE_SHIFT); - if (cactive_diff != 0) - stats_cactive_sub(cactive_diff); -#endif arena->nactive -= run_pages; /* @@ -1213,8 +1174,6 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) { arena_chunk_map_t *mapelm; arena_run_t *run; - size_t binind; - arena_bin_info_t *bin_info; /* Look for a usable run. */ mapelm = arena_run_tree_first(&bin->runs); @@ -1238,23 +1197,18 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) } /* No existing runs have any space available. */ - binind = arena_bin_index(arena, bin); - bin_info = &arena_bin_info[binind]; - /* Allocate a new run. */ malloc_mutex_unlock(&bin->lock); /******************************/ malloc_mutex_lock(&arena->lock); - run = arena_run_alloc(arena, bin_info->run_size, false, false); + run = arena_run_alloc(arena, bin->run_size, false, false); if (run != NULL) { - bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + - (uintptr_t)bin_info->bitmap_offset); - /* Initialize run internals. */ run->bin = bin; - run->nextind = 0; - run->nfree = bin_info->nregs; - bitmap_init(bitmap, &bin_info->bitmap_info); + run->avail = NULL; + run->next = (void *)((uintptr_t)run + + (uintptr_t)bin->reg0_offset); + run->nfree = bin->nregs; #ifdef JEMALLOC_DEBUG run->magic = ARENA_RUN_MAGIC; #endif @@ -1305,12 +1259,8 @@ static void * arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) { void *ret; - size_t binind; - arena_bin_info_t *bin_info; arena_run_t *run; - binind = arena_bin_index(arena, bin); - bin_info = &arena_bin_info[binind]; bin->runcur = NULL; run = arena_bin_nonfull_run_get(arena, bin); if (bin->runcur != NULL && bin->runcur->nfree > 0) { @@ -1318,22 +1268,22 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) * Another thread updated runcur while this one ran without the * bin lock in arena_bin_nonfull_run_get(). */ - dassert(bin->runcur->magic == ARENA_RUN_MAGIC); + assert(bin->runcur->magic == ARENA_RUN_MAGIC); assert(bin->runcur->nfree > 0); - ret = arena_run_reg_alloc(bin->runcur, bin_info); + ret = arena_run_reg_alloc(bin->runcur, bin); if (run != NULL) { arena_chunk_t *chunk; /* * arena_run_alloc() may have allocated run, or it may - * have pulled run from the bin's run tree. Therefore + * have pulled it from the bin's run tree. Therefore * it is unsafe to make any assumptions about how run * has previously been used, and arena_bin_lower_run() * must be called, as if a region were just deallocated * from the run. */ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - if (run->nfree == bin_info->nregs) + if (run->nfree == bin->nregs) arena_dalloc_bin_run(arena, chunk, run, bin); else arena_bin_lower_run(arena, chunk, run, bin); @@ -1346,10 +1296,10 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) bin->runcur = run; - dassert(bin->runcur->magic == ARENA_RUN_MAGIC); + assert(bin->runcur->magic == ARENA_RUN_MAGIC); assert(bin->runcur->nfree > 0); - return (arena_run_reg_alloc(bin->runcur, bin_info)); + return (arena_run_reg_alloc(bin->runcur, bin)); } #ifdef JEMALLOC_PROF @@ -1389,19 +1339,18 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind #endif bin = &arena->bins[binind]; malloc_mutex_lock(&bin->lock); - for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >> - tbin->lg_fill_div); i < nfill; i++) { + for (i = 0, nfill = (tbin->ncached_max >> 1); i < nfill; i++) { if ((run = bin->runcur) != NULL && run->nfree > 0) - ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]); + ptr = arena_run_reg_alloc(run, bin); else ptr = arena_bin_malloc_hard(arena, bin); if (ptr == NULL) break; - /* Insert such that low regions get used first. */ - tbin->avail[nfill - 1 - i] = ptr; + *(void **)ptr = tbin->avail; + tbin->avail = ptr; } #ifdef JEMALLOC_STATS - bin->stats.allocated += i * arena_bin_info[binind].reg_size; + bin->stats.allocated += (i - tbin->ncached) * bin->reg_size; bin->stats.nmalloc += i; bin->stats.nrequests += tbin->tstats.nrequests; bin->stats.nfills++; @@ -1409,9 +1358,119 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind #endif malloc_mutex_unlock(&bin->lock); tbin->ncached = i; + if (tbin->ncached > tbin->high_water) + tbin->high_water = tbin->ncached; } #endif +/* + * Calculate bin->run_size such that it meets the following constraints: + * + * *) bin->run_size >= min_run_size + * *) bin->run_size <= arena_maxclass + * *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed). + * *) run header size < PAGE_SIZE + * + * bin->nregs and bin->reg0_offset are also calculated here, since these + * settings are all interdependent. + */ +static size_t +arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size) +{ + size_t try_run_size, good_run_size; + uint32_t try_nregs, good_nregs; + uint32_t try_hdr_size, good_hdr_size; +#ifdef JEMALLOC_PROF + uint32_t try_ctx0_offset, good_ctx0_offset; +#endif + uint32_t try_reg0_offset, good_reg0_offset; + + assert(min_run_size >= PAGE_SIZE); + assert(min_run_size <= arena_maxclass); + + /* + * Calculate known-valid settings before entering the run_size + * expansion loop, so that the first part of the loop always copies + * valid settings. + * + * The do..while loop iteratively reduces the number of regions until + * the run header and the regions no longer overlap. A closed formula + * would be quite messy, since there is an interdependency between the + * header's mask length and the number of regions. + */ + try_run_size = min_run_size; + try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin->reg_size) + + 1; /* Counter-act try_nregs-- in loop. */ + do { + try_nregs--; + try_hdr_size = sizeof(arena_run_t); +#ifdef JEMALLOC_PROF + if (opt_prof && prof_promote == false) { + /* Pad to a quantum boundary. */ + try_hdr_size = QUANTUM_CEILING(try_hdr_size); + try_ctx0_offset = try_hdr_size; + /* Add space for one (prof_ctx_t *) per region. */ + try_hdr_size += try_nregs * sizeof(prof_ctx_t *); + } else + try_ctx0_offset = 0; +#endif + try_reg0_offset = try_run_size - (try_nregs * bin->reg_size); + } while (try_hdr_size > try_reg0_offset); + + /* run_size expansion loop. */ + do { + /* + * Copy valid settings before trying more aggressive settings. + */ + good_run_size = try_run_size; + good_nregs = try_nregs; + good_hdr_size = try_hdr_size; +#ifdef JEMALLOC_PROF + good_ctx0_offset = try_ctx0_offset; +#endif + good_reg0_offset = try_reg0_offset; + + /* Try more aggressive settings. */ + try_run_size += PAGE_SIZE; + try_nregs = ((try_run_size - sizeof(arena_run_t)) / + bin->reg_size) + 1; /* Counter-act try_nregs-- in loop. */ + do { + try_nregs--; + try_hdr_size = sizeof(arena_run_t); +#ifdef JEMALLOC_PROF + if (opt_prof && prof_promote == false) { + /* Pad to a quantum boundary. */ + try_hdr_size = QUANTUM_CEILING(try_hdr_size); + try_ctx0_offset = try_hdr_size; + /* + * Add space for one (prof_ctx_t *) per region. + */ + try_hdr_size += try_nregs * + sizeof(prof_ctx_t *); + } +#endif + try_reg0_offset = try_run_size - (try_nregs * + bin->reg_size); + } while (try_hdr_size > try_reg0_offset); + } while (try_run_size <= arena_maxclass + && try_run_size <= arena_maxclass + && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX + && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size + && try_hdr_size < PAGE_SIZE); + + assert(good_hdr_size <= good_reg0_offset); + + /* Copy final settings. */ + bin->run_size = good_run_size; + bin->nregs = good_nregs; +#ifdef JEMALLOC_PROF + bin->ctx0_offset = good_ctx0_offset; +#endif + bin->reg0_offset = good_reg0_offset; + + return (good_run_size); +} + void * arena_malloc_small(arena_t *arena, size_t size, bool zero) { @@ -1420,14 +1479,14 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) arena_run_t *run; size_t binind; - binind = SMALL_SIZE2BIN(size); + binind = small_size2bin[size]; assert(binind < nbins); bin = &arena->bins[binind]; - size = arena_bin_info[binind].reg_size; + size = bin->reg_size; malloc_mutex_lock(&bin->lock); if ((run = bin->runcur) != NULL && run->nfree > 0) - ret = arena_run_reg_alloc(run, &arena_bin_info[binind]); + ret = arena_run_reg_alloc(run, bin); else ret = arena_bin_malloc_hard(arena, bin); @@ -1631,13 +1690,11 @@ arena_salloc(const void *ptr) arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << PAGE_SHIFT)); - dassert(run->magic == ARENA_RUN_MAGIC); - size_t binind = arena_bin_index(chunk->arena, run->bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; + assert(run->magic == ARENA_RUN_MAGIC); assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_size == + (uintptr_t)run->bin->reg0_offset)) % run->bin->reg_size == 0); - ret = bin_info->reg_size; + ret = run->bin->reg_size; } else { assert(((uintptr_t)ptr & PAGE_MASK) == 0); ret = mapbits & ~PAGE_MASK; @@ -1657,11 +1714,10 @@ arena_prof_promoted(const void *ptr, size_t size) assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); assert(isalloc(ptr) == PAGE_SIZE); - assert(size <= small_maxclass); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; - binind = SMALL_SIZE2BIN(size); + binind = small_size2bin[size]; assert(binind < nbins); chunk->map[pageind-map_bias].bits = (chunk->map[pageind-map_bias].bits & ~CHUNK_MAP_CLASS_MASK) | ((binind+1) << CHUNK_MAP_CLASS_SHIFT); @@ -1685,13 +1741,11 @@ arena_salloc_demote(const void *ptr) arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << PAGE_SHIFT)); - dassert(run->magic == ARENA_RUN_MAGIC); - size_t binind = arena_bin_index(chunk->arena, run->bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; + assert(run->magic == ARENA_RUN_MAGIC); assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_size == + (uintptr_t)run->bin->reg0_offset)) % run->bin->reg_size == 0); - ret = bin_info->reg_size; + ret = run->bin->reg_size; } else { assert(((uintptr_t)ptr & PAGE_MASK) == 0); ret = mapbits & ~PAGE_MASK; @@ -1700,7 +1754,7 @@ arena_salloc_demote(const void *ptr) size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >> CHUNK_MAP_CLASS_SHIFT) - 1; assert(binind < nbins); - ret = arena_bin_info[binind].reg_size; + ret = chunk->arena->bins[binind].reg_size; } assert(ret != 0); } @@ -1717,22 +1771,17 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, /* Dissociate run from bin. */ if (run == bin->runcur) bin->runcur = NULL; - else { - size_t binind = arena_bin_index(chunk->arena, bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; - - if (bin_info->nregs != 1) { - size_t run_pageind = (((uintptr_t)run - - (uintptr_t)chunk)) >> PAGE_SHIFT; - arena_chunk_map_t *run_mapelm = - &chunk->map[run_pageind-map_bias]; - /* - * This block's conditional is necessary because if the - * run only contains one region, then it never gets - * inserted into the non-full runs tree. - */ - arena_run_tree_remove(&bin->runs, run_mapelm); - } + else if (bin->nregs != 1) { + size_t run_pageind = (((uintptr_t)run - (uintptr_t)chunk)) >> + PAGE_SHIFT; + arena_chunk_map_t *run_mapelm = + &chunk->map[run_pageind-map_bias]; + /* + * This block's conditional is necessary because if the run + * only contains one region, then it never gets inserted into + * the non-full runs tree. + */ + arena_run_tree_remove(&bin->runs, run_mapelm); } } @@ -1740,24 +1789,18 @@ static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin) { - size_t binind; - arena_bin_info_t *bin_info; size_t npages, run_ind, past; assert(run != bin->runcur); assert(arena_run_tree_search(&bin->runs, &chunk->map[ (((uintptr_t)run-(uintptr_t)chunk)>>PAGE_SHIFT)-map_bias]) == NULL); - binind = arena_bin_index(chunk->arena, run->bin); - bin_info = &arena_bin_info[binind]; - malloc_mutex_unlock(&bin->lock); /******************************/ - npages = bin_info->run_size >> PAGE_SHIFT; + npages = bin->run_size >> PAGE_SHIFT; run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT); - past = (size_t)(PAGE_CEILING((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset + (uintptr_t)(run->nextind * - bin_info->reg_size) - (uintptr_t)chunk) >> PAGE_SHIFT); + past = (size_t)((PAGE_CEILING((uintptr_t)run->next) - (uintptr_t)chunk) + >> PAGE_SHIFT); malloc_mutex_lock(&arena->lock); /* @@ -1774,7 +1817,7 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, chunk->map[run_ind+npages-1-map_bias].bits = CHUNK_MAP_LARGE | (chunk->map[run_ind+npages-1-map_bias].bits & CHUNK_MAP_FLAGS_MASK); - chunk->map[run_ind-map_bias].bits = bin_info->run_size | + chunk->map[run_ind-map_bias].bits = bin->run_size | CHUNK_MAP_LARGE | (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_FLAGS_MASK); arena_run_trim_tail(arena, chunk, run, (npages << PAGE_SHIFT), @@ -1843,12 +1886,10 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); - dassert(run->magic == ARENA_RUN_MAGIC); + assert(run->magic == ARENA_RUN_MAGIC); bin = run->bin; - size_t binind = arena_bin_index(arena, bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; #if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS)) - size = bin_info->reg_size; + size = bin->reg_size; #endif #ifdef JEMALLOC_FILL @@ -1857,7 +1898,7 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, #endif arena_run_reg_dalloc(run, ptr); - if (run->nfree == bin_info->nregs) { + if (run->nfree == bin->nregs) { arena_dissociate_bin_run(chunk, run, bin); arena_dalloc_bin_run(arena, chunk, run, bin); } else if (run->nfree == 1 && run != bin->runcur) @@ -2091,7 +2132,7 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); arena = chunk->arena; - dassert(arena->magic == ARENA_MAGIC); + assert(arena->magic == ARENA_MAGIC); if (psize < oldsize) { #ifdef JEMALLOC_FILL @@ -2129,11 +2170,11 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, */ if (oldsize <= arena_maxclass) { if (oldsize <= small_maxclass) { - assert(arena_bin_info[SMALL_SIZE2BIN(oldsize)].reg_size - == oldsize); + assert(choose_arena()->bins[small_size2bin[ + oldsize]].reg_size == oldsize); if ((size + extra <= small_maxclass && - SMALL_SIZE2BIN(size + extra) == - SMALL_SIZE2BIN(oldsize)) || (size <= oldsize && + small_size2bin[size + extra] == + small_size2bin[oldsize]) || (size <= oldsize && size + extra >= oldsize)) { #ifdef JEMALLOC_FILL if (opt_junk && size < oldsize) { @@ -2169,29 +2210,24 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, if (ret != NULL) return (ret); + /* * size and oldsize are different enough that we need to move the * object. In that case, fall back to allocating new space and * copying. */ - if (alignment != 0) { - size_t usize = sa2u(size + extra, alignment, NULL); - if (usize == 0) - return (NULL); - ret = ipalloc(usize, alignment, zero); - } else + if (alignment != 0) + ret = ipalloc(size + extra, alignment, zero); + else ret = arena_malloc(size + extra, zero); if (ret == NULL) { if (extra == 0) return (NULL); /* Try again, this time without extra. */ - if (alignment != 0) { - size_t usize = sa2u(size, alignment, NULL); - if (usize == 0) - return (NULL); - ret = ipalloc(usize, alignment, zero); - } else + if (alignment != 0) + ret = ipalloc(size, alignment, zero); + else ret = arena_malloc(size, zero); if (ret == NULL) @@ -2215,9 +2251,9 @@ arena_new(arena_t *arena, unsigned ind) { unsigned i; arena_bin_t *bin; + size_t prev_run_size; arena->ind = ind; - arena->nthreads = 0; if (malloc_mutex_init(&arena->lock)) return (true); @@ -2251,6 +2287,8 @@ arena_new(arena_t *arena, unsigned ind) arena_avail_tree_new(&arena->runs_avail_dirty); /* Initialize bins. */ + prev_run_size = PAGE_SIZE; + i = 0; #ifdef JEMALLOC_TINY /* (2^n)-spaced tiny bins. */ @@ -2260,6 +2298,11 @@ arena_new(arena_t *arena, unsigned ind) return (true); bin->runcur = NULL; arena_run_tree_new(&bin->runs); + + bin->reg_size = (1U << (LG_TINY_MIN + i)); + + prev_run_size = arena_bin_run_size_calc(bin, prev_run_size); + #ifdef JEMALLOC_STATS memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); #endif @@ -2273,6 +2316,11 @@ arena_new(arena_t *arena, unsigned ind) return (true); bin->runcur = NULL; arena_run_tree_new(&bin->runs); + + bin->reg_size = (i - ntbins + 1) << LG_QUANTUM; + + prev_run_size = arena_bin_run_size_calc(bin, prev_run_size); + #ifdef JEMALLOC_STATS memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); #endif @@ -2285,6 +2333,12 @@ arena_new(arena_t *arena, unsigned ind) return (true); bin->runcur = NULL; arena_run_tree_new(&bin->runs); + + bin->reg_size = cspace_min + ((i - (ntbins + nqbins)) << + LG_CACHELINE); + + prev_run_size = arena_bin_run_size_calc(bin, prev_run_size); + #ifdef JEMALLOC_STATS memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); #endif @@ -2297,6 +2351,12 @@ arena_new(arena_t *arena, unsigned ind) return (true); bin->runcur = NULL; arena_run_tree_new(&bin->runs); + + bin->reg_size = sspace_min + ((i - (ntbins + nqbins + ncbins)) + << LG_SUBPAGE); + + prev_run_size = arena_bin_run_size_calc(bin, prev_run_size); + #ifdef JEMALLOC_STATS memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); #endif @@ -2315,39 +2375,40 @@ small_size2bin_validate(void) { size_t i, size, binind; + assert(small_size2bin[0] == 0xffU); i = 1; # ifdef JEMALLOC_TINY /* Tiny. */ for (; i < (1U << LG_TINY_MIN); i++) { size = pow2_ceil(1U << LG_TINY_MIN); binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - assert(SMALL_SIZE2BIN(i) == binind); + assert(small_size2bin[i] == binind); } for (; i < qspace_min; i++) { size = pow2_ceil(i); binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - assert(SMALL_SIZE2BIN(i) == binind); + assert(small_size2bin[i] == binind); } # endif /* Quantum-spaced. */ for (; i <= qspace_max; i++) { size = QUANTUM_CEILING(i); binind = ntbins + (size >> LG_QUANTUM) - 1; - assert(SMALL_SIZE2BIN(i) == binind); + assert(small_size2bin[i] == binind); } /* Cacheline-spaced. */ for (; i <= cspace_max; i++) { size = CACHELINE_CEILING(i); binind = ntbins + nqbins + ((size - cspace_min) >> LG_CACHELINE); - assert(SMALL_SIZE2BIN(i) == binind); + assert(small_size2bin[i] == binind); } /* Sub-page. */ for (; i <= sspace_max; i++) { size = SUBPAGE_CEILING(i); binind = ntbins + nqbins + ncbins + ((size - sspace_min) >> LG_SUBPAGE); - assert(SMALL_SIZE2BIN(i) == binind); + assert(small_size2bin[i] == binind); } } #endif @@ -2358,12 +2419,12 @@ small_size2bin_init(void) if (opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT - || (sizeof(const_small_size2bin) != ((small_maxclass-1) >> - LG_TINY_MIN) + 1)) + || sizeof(const_small_size2bin) != small_maxclass + 1) return (small_size2bin_init_hard()); small_size2bin = const_small_size2bin; #ifdef JEMALLOC_DEBUG + assert(sizeof(const_small_size2bin) == small_maxclass + 1); small_size2bin_validate(); #endif return (false); @@ -2374,52 +2435,49 @@ small_size2bin_init_hard(void) { size_t i, size, binind; uint8_t *custom_small_size2bin; -#define CUSTOM_SMALL_SIZE2BIN(s) \ - custom_small_size2bin[(s-1) >> LG_TINY_MIN] assert(opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT - || (sizeof(const_small_size2bin) != ((small_maxclass-1) >> - LG_TINY_MIN) + 1)); + || sizeof(const_small_size2bin) != small_maxclass + 1); - custom_small_size2bin = (uint8_t *) - base_alloc(small_maxclass >> LG_TINY_MIN); + custom_small_size2bin = (uint8_t *)base_alloc(small_maxclass + 1); if (custom_small_size2bin == NULL) return (true); + custom_small_size2bin[0] = 0xffU; i = 1; #ifdef JEMALLOC_TINY /* Tiny. */ - for (; i < (1U << LG_TINY_MIN); i += TINY_MIN) { + for (; i < (1U << LG_TINY_MIN); i++) { size = pow2_ceil(1U << LG_TINY_MIN); binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - CUSTOM_SMALL_SIZE2BIN(i) = binind; + custom_small_size2bin[i] = binind; } - for (; i < qspace_min; i += TINY_MIN) { + for (; i < qspace_min; i++) { size = pow2_ceil(i); binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - CUSTOM_SMALL_SIZE2BIN(i) = binind; + custom_small_size2bin[i] = binind; } #endif /* Quantum-spaced. */ - for (; i <= qspace_max; i += TINY_MIN) { + for (; i <= qspace_max; i++) { size = QUANTUM_CEILING(i); binind = ntbins + (size >> LG_QUANTUM) - 1; - CUSTOM_SMALL_SIZE2BIN(i) = binind; + custom_small_size2bin[i] = binind; } /* Cacheline-spaced. */ - for (; i <= cspace_max; i += TINY_MIN) { + for (; i <= cspace_max; i++) { size = CACHELINE_CEILING(i); binind = ntbins + nqbins + ((size - cspace_min) >> LG_CACHELINE); - CUSTOM_SMALL_SIZE2BIN(i) = binind; + custom_small_size2bin[i] = binind; } /* Sub-page. */ - for (; i <= sspace_max; i += TINY_MIN) { + for (; i <= sspace_max; i++) { size = SUBPAGE_CEILING(i); binind = ntbins + nqbins + ncbins + ((size - sspace_min) >> LG_SUBPAGE); - CUSTOM_SMALL_SIZE2BIN(i) = binind; + custom_small_size2bin[i] = binind; } small_size2bin = custom_small_size2bin; @@ -2427,190 +2485,6 @@ small_size2bin_init_hard(void) small_size2bin_validate(); #endif return (false); -#undef CUSTOM_SMALL_SIZE2BIN -} - -/* - * Calculate bin_info->run_size such that it meets the following constraints: - * - * *) bin_info->run_size >= min_run_size - * *) bin_info->run_size <= arena_maxclass - * *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed). - * *) bin_info->nregs <= RUN_MAXREGS - * - * bin_info->nregs, bin_info->bitmap_offset, and bin_info->reg0_offset are also - * calculated here, since these settings are all interdependent. - */ -static size_t -bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) -{ - size_t try_run_size, good_run_size; - uint32_t try_nregs, good_nregs; - uint32_t try_hdr_size, good_hdr_size; - uint32_t try_bitmap_offset, good_bitmap_offset; -#ifdef JEMALLOC_PROF - uint32_t try_ctx0_offset, good_ctx0_offset; -#endif - uint32_t try_reg0_offset, good_reg0_offset; - - assert(min_run_size >= PAGE_SIZE); - assert(min_run_size <= arena_maxclass); - - /* - * Calculate known-valid settings before entering the run_size - * expansion loop, so that the first part of the loop always copies - * valid settings. - * - * The do..while loop iteratively reduces the number of regions until - * the run header and the regions no longer overlap. A closed formula - * would be quite messy, since there is an interdependency between the - * header's mask length and the number of regions. - */ - try_run_size = min_run_size; - try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin_info->reg_size) - + 1; /* Counter-act try_nregs-- in loop. */ - if (try_nregs > RUN_MAXREGS) { - try_nregs = RUN_MAXREGS - + 1; /* Counter-act try_nregs-- in loop. */ - } - do { - try_nregs--; - try_hdr_size = sizeof(arena_run_t); - /* Pad to a long boundary. */ - try_hdr_size = LONG_CEILING(try_hdr_size); - try_bitmap_offset = try_hdr_size; - /* Add space for bitmap. */ - try_hdr_size += bitmap_size(try_nregs); -#ifdef JEMALLOC_PROF - if (opt_prof && prof_promote == false) { - /* Pad to a quantum boundary. */ - try_hdr_size = QUANTUM_CEILING(try_hdr_size); - try_ctx0_offset = try_hdr_size; - /* Add space for one (prof_ctx_t *) per region. */ - try_hdr_size += try_nregs * sizeof(prof_ctx_t *); - } else - try_ctx0_offset = 0; -#endif - try_reg0_offset = try_run_size - (try_nregs * - bin_info->reg_size); - } while (try_hdr_size > try_reg0_offset); - - /* run_size expansion loop. */ - do { - /* - * Copy valid settings before trying more aggressive settings. - */ - good_run_size = try_run_size; - good_nregs = try_nregs; - good_hdr_size = try_hdr_size; - good_bitmap_offset = try_bitmap_offset; -#ifdef JEMALLOC_PROF - good_ctx0_offset = try_ctx0_offset; -#endif - good_reg0_offset = try_reg0_offset; - - /* Try more aggressive settings. */ - try_run_size += PAGE_SIZE; - try_nregs = ((try_run_size - sizeof(arena_run_t)) / - bin_info->reg_size) - + 1; /* Counter-act try_nregs-- in loop. */ - if (try_nregs > RUN_MAXREGS) { - try_nregs = RUN_MAXREGS - + 1; /* Counter-act try_nregs-- in loop. */ - } - do { - try_nregs--; - try_hdr_size = sizeof(arena_run_t); - /* Pad to a long boundary. */ - try_hdr_size = LONG_CEILING(try_hdr_size); - try_bitmap_offset = try_hdr_size; - /* Add space for bitmap. */ - try_hdr_size += bitmap_size(try_nregs); -#ifdef JEMALLOC_PROF - if (opt_prof && prof_promote == false) { - /* Pad to a quantum boundary. */ - try_hdr_size = QUANTUM_CEILING(try_hdr_size); - try_ctx0_offset = try_hdr_size; - /* - * Add space for one (prof_ctx_t *) per region. - */ - try_hdr_size += try_nregs * - sizeof(prof_ctx_t *); - } -#endif - try_reg0_offset = try_run_size - (try_nregs * - bin_info->reg_size); - } while (try_hdr_size > try_reg0_offset); - } while (try_run_size <= arena_maxclass - && try_run_size <= arena_maxclass - && RUN_MAX_OVRHD * (bin_info->reg_size << 3) > RUN_MAX_OVRHD_RELAX - && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size - && try_nregs < RUN_MAXREGS); - - assert(good_hdr_size <= good_reg0_offset); - - /* Copy final settings. */ - bin_info->run_size = good_run_size; - bin_info->nregs = good_nregs; - bin_info->bitmap_offset = good_bitmap_offset; -#ifdef JEMALLOC_PROF - bin_info->ctx0_offset = good_ctx0_offset; -#endif - bin_info->reg0_offset = good_reg0_offset; - - return (good_run_size); -} - -static bool -bin_info_init(void) -{ - arena_bin_info_t *bin_info; - unsigned i; - size_t prev_run_size; - - arena_bin_info = base_alloc(sizeof(arena_bin_info_t) * nbins); - if (arena_bin_info == NULL) - return (true); - - prev_run_size = PAGE_SIZE; - i = 0; -#ifdef JEMALLOC_TINY - /* (2^n)-spaced tiny bins. */ - for (; i < ntbins; i++) { - bin_info = &arena_bin_info[i]; - bin_info->reg_size = (1U << (LG_TINY_MIN + i)); - prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); - bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); - } -#endif - - /* Quantum-spaced bins. */ - for (; i < ntbins + nqbins; i++) { - bin_info = &arena_bin_info[i]; - bin_info->reg_size = (i - ntbins + 1) << LG_QUANTUM; - prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); - bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); - } - - /* Cacheline-spaced bins. */ - for (; i < ntbins + nqbins + ncbins; i++) { - bin_info = &arena_bin_info[i]; - bin_info->reg_size = cspace_min + ((i - (ntbins + nqbins)) << - LG_CACHELINE); - prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); - bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); - } - - /* Subpage-spaced bins. */ - for (; i < nbins; i++) { - bin_info = &arena_bin_info[i]; - bin_info->reg_size = sspace_min + ((i - (ntbins + nqbins + - ncbins)) << LG_SUBPAGE); - prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); - bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); - } - - return (false); } bool @@ -2671,6 +2545,9 @@ arena_boot(void) abort(); } + if (small_size2bin_init()) + return (true); + /* * Compute the header size such that it is large enough to contain the * page map. The page map is biased to omit entries for the header @@ -2694,11 +2571,5 @@ arena_boot(void) arena_maxclass = chunksize - (map_bias << PAGE_SHIFT); - if (small_size2bin_init()) - return (true); - - if (bin_info_init()) - return (true); - return (false); } diff --git a/dep/jemalloc/src/atomic.c b/dep/jemalloc/src/atomic.c deleted file mode 100644 index 77ee313113b..00000000000 --- a/dep/jemalloc/src/atomic.c +++ /dev/null @@ -1,2 +0,0 @@ -#define JEMALLOC_ATOMIC_C_ -#include "jemalloc/internal/jemalloc_internal.h" diff --git a/dep/jemalloc/src/bitmap.c b/dep/jemalloc/src/bitmap.c deleted file mode 100644 index b47e2629093..00000000000 --- a/dep/jemalloc/src/bitmap.c +++ /dev/null @@ -1,90 +0,0 @@ -#define JEMALLOC_BITMAP_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static size_t bits2groups(size_t nbits); - -/******************************************************************************/ - -static size_t -bits2groups(size_t nbits) -{ - - return ((nbits >> LG_BITMAP_GROUP_NBITS) + - !!(nbits & BITMAP_GROUP_NBITS_MASK)); -} - -void -bitmap_info_init(bitmap_info_t *binfo, size_t nbits) -{ - unsigned i; - size_t group_count; - - assert(nbits > 0); - assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS)); - - /* - * Compute the number of groups necessary to store nbits bits, and - * progressively work upward through the levels until reaching a level - * that requires only one group. - */ - binfo->levels[0].group_offset = 0; - group_count = bits2groups(nbits); - for (i = 1; group_count > 1; i++) { - assert(i < BITMAP_MAX_LEVELS); - binfo->levels[i].group_offset = binfo->levels[i-1].group_offset - + group_count; - group_count = bits2groups(group_count); - } - binfo->levels[i].group_offset = binfo->levels[i-1].group_offset - + group_count; - binfo->nlevels = i; - binfo->nbits = nbits; -} - -size_t -bitmap_info_ngroups(const bitmap_info_t *binfo) -{ - - return (binfo->levels[binfo->nlevels].group_offset << LG_SIZEOF_BITMAP); -} - -size_t -bitmap_size(size_t nbits) -{ - bitmap_info_t binfo; - - bitmap_info_init(&binfo, nbits); - return (bitmap_info_ngroups(&binfo)); -} - -void -bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo) -{ - size_t extra; - unsigned i; - - /* - * Bits are actually inverted with regard to the external bitmap - * interface, so the bitmap starts out with all 1 bits, except for - * trailing unused bits (if any). Note that each group uses bit 0 to - * correspond to the first logical bit in the group, so extra bits - * are the most significant bits of the last group. - */ - memset(bitmap, 0xffU, binfo->levels[binfo->nlevels].group_offset << - LG_SIZEOF_BITMAP); - extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK)) - & BITMAP_GROUP_NBITS_MASK; - if (extra != 0) - bitmap[binfo->levels[1].group_offset - 1] >>= extra; - for (i = 1; i < binfo->nlevels; i++) { - size_t group_count = binfo->levels[i].group_offset - - binfo->levels[i-1].group_offset; - extra = (BITMAP_GROUP_NBITS - (group_count & - BITMAP_GROUP_NBITS_MASK)) & BITMAP_GROUP_NBITS_MASK; - if (extra != 0) - bitmap[binfo->levels[i+1].group_offset - 1] >>= extra; - } -} diff --git a/dep/jemalloc/src/chunk.c b/dep/jemalloc/src/chunk.c index d190c6f49b3..301519e8042 100644 --- a/dep/jemalloc/src/chunk.c +++ b/dep/jemalloc/src/chunk.c @@ -70,7 +70,7 @@ RETURN: #ifdef JEMALLOC_IVSALLOC if (base == false && ret != NULL) { if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) { - chunk_dealloc(ret, size, true); + chunk_dealloc(ret, size); return (NULL); } } @@ -108,7 +108,7 @@ RETURN: } void -chunk_dealloc(void *chunk, size_t size, bool unmap) +chunk_dealloc(void *chunk, size_t size) { assert(chunk != NULL); @@ -125,17 +125,15 @@ chunk_dealloc(void *chunk, size_t size, bool unmap) malloc_mutex_unlock(&chunks_mtx); #endif - if (unmap) { #ifdef JEMALLOC_SWAP - if (swap_enabled && chunk_dealloc_swap(chunk, size) == false) - return; + if (swap_enabled && chunk_dealloc_swap(chunk, size) == false) + return; #endif #ifdef JEMALLOC_DSS - if (chunk_dealloc_dss(chunk, size) == false) - return; + if (chunk_dealloc_dss(chunk, size) == false) + return; #endif - chunk_dealloc_mmap(chunk, size); - } + chunk_dealloc_mmap(chunk, size); } bool diff --git a/dep/jemalloc/src/chunk_mmap.c b/dep/jemalloc/src/chunk_mmap.c index 164e86e7b38..bc367559774 100644 --- a/dep/jemalloc/src/chunk_mmap.c +++ b/dep/jemalloc/src/chunk_mmap.c @@ -206,15 +206,13 @@ chunk_alloc_mmap_internal(size_t size, bool noreserve) void * chunk_alloc_mmap(size_t size) { - - return (chunk_alloc_mmap_internal(size, false)); + return chunk_alloc_mmap_internal(size, false); } void * chunk_alloc_mmap_noreserve(size_t size) { - - return (chunk_alloc_mmap_internal(size, true)); + return chunk_alloc_mmap_internal(size, true); } void diff --git a/dep/jemalloc/src/ckh.c b/dep/jemalloc/src/ckh.c index 43fcc25239d..682a8db65bf 100644 --- a/dep/jemalloc/src/ckh.c +++ b/dep/jemalloc/src/ckh.c @@ -34,7 +34,7 @@ * respectively. * ******************************************************************************/ -#define JEMALLOC_CKH_C_ +#define CKH_C_ #include "jemalloc/internal/jemalloc_internal.h" /******************************************************************************/ @@ -73,7 +73,7 @@ ckh_isearch(ckh_t *ckh, const void *key) size_t hash1, hash2, bucket, cell; assert(ckh != NULL); - dassert(ckh->magic == CKH_MAGIC); + assert(ckh->magic = CKH_MAGIG); ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2); @@ -262,15 +262,9 @@ ckh_grow(ckh_t *ckh) lg_prevbuckets = ckh->lg_curbuckets; lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS; while (true) { - size_t usize; - lg_curcells++; - usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL); - if (usize == 0) { - ret = true; - goto RETURN; - } - tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); + tab = (ckhc_t *)ipalloc(sizeof(ckhc_t) << lg_curcells, + ZU(1) << LG_CACHELINE, true); if (tab == NULL) { ret = true; goto RETURN; @@ -301,7 +295,7 @@ static void ckh_shrink(ckh_t *ckh) { ckhc_t *tab, *ttab; - size_t lg_curcells, usize; + size_t lg_curcells; unsigned lg_prevbuckets; /* @@ -310,10 +304,8 @@ ckh_shrink(ckh_t *ckh) */ lg_prevbuckets = ckh->lg_curbuckets; lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1; - usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL); - if (usize == 0) - return; - tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); + tab = (ckhc_t *)ipalloc(sizeof(ckhc_t) << lg_curcells, + ZU(1) << LG_CACHELINE, true); if (tab == NULL) { /* * An OOM error isn't worth propagating, since it doesn't @@ -348,7 +340,7 @@ bool ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) { bool ret; - size_t mincells, usize; + size_t mincells; unsigned lg_mincells; assert(minitems > 0); @@ -383,19 +375,15 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) ckh->hash = hash; ckh->keycomp = keycomp; - usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE, NULL); - if (usize == 0) { - ret = true; - goto RETURN; - } - ckh->tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); + ckh->tab = (ckhc_t *)ipalloc(sizeof(ckhc_t) << lg_mincells, + (ZU(1) << LG_CACHELINE), true); if (ckh->tab == NULL) { ret = true; goto RETURN; } #ifdef JEMALLOC_DEBUG - ckh->magic = CKH_MAGIC; + ckh->magic = CKH_MAGIG; #endif ret = false; @@ -408,7 +396,7 @@ ckh_delete(ckh_t *ckh) { assert(ckh != NULL); - dassert(ckh->magic == CKH_MAGIC); + assert(ckh->magic = CKH_MAGIG); #ifdef CKH_VERBOSE malloc_printf( @@ -433,7 +421,7 @@ ckh_count(ckh_t *ckh) { assert(ckh != NULL); - dassert(ckh->magic == CKH_MAGIC); + assert(ckh->magic = CKH_MAGIG); return (ckh->count); } @@ -464,7 +452,7 @@ ckh_insert(ckh_t *ckh, const void *key, const void *data) bool ret; assert(ckh != NULL); - dassert(ckh->magic == CKH_MAGIC); + assert(ckh->magic = CKH_MAGIG); assert(ckh_search(ckh, key, NULL, NULL)); #ifdef CKH_COUNT @@ -489,7 +477,7 @@ ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data) size_t cell; assert(ckh != NULL); - dassert(ckh->magic == CKH_MAGIC); + assert(ckh->magic = CKH_MAGIG); cell = ckh_isearch(ckh, searchkey); if (cell != SIZE_T_MAX) { @@ -521,7 +509,7 @@ ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data) size_t cell; assert(ckh != NULL); - dassert(ckh->magic == CKH_MAGIC); + assert(ckh->magic = CKH_MAGIG); cell = ckh_isearch(ckh, searchkey); if (cell != SIZE_T_MAX) { @@ -556,7 +544,7 @@ ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) } else { ret1 = h; ret2 = hash(key, strlen((const char *)key), - 0x8432a476666bbc13LLU); + 0x8432a476666bbc13U); } *hash1 = ret1; diff --git a/dep/jemalloc/src/ctl.c b/dep/jemalloc/src/ctl.c index e5336d36949..3c8adab90a3 100644 --- a/dep/jemalloc/src/ctl.c +++ b/dep/jemalloc/src/ctl.c @@ -182,7 +182,6 @@ CTL_PROTO(stats_arenas_i_lruns_j_highruns) CTL_PROTO(stats_arenas_i_lruns_j_curruns) INDEX_PROTO(stats_arenas_i_lruns_j) #endif -CTL_PROTO(stats_arenas_i_nthreads) CTL_PROTO(stats_arenas_i_pactive) CTL_PROTO(stats_arenas_i_pdirty) #ifdef JEMALLOC_STATS @@ -193,7 +192,6 @@ CTL_PROTO(stats_arenas_i_purged) #endif INDEX_PROTO(stats_arenas_i) #ifdef JEMALLOC_STATS -CTL_PROTO(stats_cactive) CTL_PROTO(stats_allocated) CTL_PROTO(stats_active) CTL_PROTO(stats_mapped) @@ -436,7 +434,6 @@ static const ctl_node_t stats_arenas_i_lruns_node[] = { #endif static const ctl_node_t stats_arenas_i_node[] = { - {NAME("nthreads"), CTL(stats_arenas_i_nthreads)}, {NAME("pactive"), CTL(stats_arenas_i_pactive)}, {NAME("pdirty"), CTL(stats_arenas_i_pdirty)} #ifdef JEMALLOC_STATS @@ -461,7 +458,6 @@ static const ctl_node_t stats_arenas_node[] = { static const ctl_node_t stats_node[] = { #ifdef JEMALLOC_STATS - {NAME("cactive"), CTL(stats_cactive)}, {NAME("allocated"), CTL(stats_allocated)}, {NAME("active"), CTL(stats_active)}, {NAME("mapped"), CTL(stats_mapped)}, @@ -624,7 +620,6 @@ ctl_arena_refresh(arena_t *arena, unsigned i) ctl_arena_clear(astats); - sstats->nthreads += astats->nthreads; #ifdef JEMALLOC_STATS ctl_arena_stats_amerge(astats, arena); /* Merge into sum stats as well. */ @@ -662,17 +657,10 @@ ctl_refresh(void) * Clear sum stats, since they will be merged into by * ctl_arena_refresh(). */ - ctl_stats.arenas[narenas].nthreads = 0; ctl_arena_clear(&ctl_stats.arenas[narenas]); malloc_mutex_lock(&arenas_lock); memcpy(tarenas, arenas, sizeof(arena_t *) * narenas); - for (i = 0; i < narenas; i++) { - if (arenas[i] != NULL) - ctl_stats.arenas[i].nthreads = arenas[i]->nthreads; - else - ctl_stats.arenas[i].nthreads = 0; - } malloc_mutex_unlock(&arenas_lock); for (i = 0; i < narenas; i++) { bool initialized = (tarenas[i] != NULL); @@ -1126,8 +1114,8 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, unsigned newind, oldind; newind = oldind = choose_arena()->ind; - WRITE(newind, unsigned); - READ(oldind, unsigned); + WRITE(oldind, unsigned); + READ(newind, unsigned); if (newind != oldind) { arena_t *arena; @@ -1141,8 +1129,6 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, malloc_mutex_lock(&arenas_lock); if ((arena = arenas[newind]) == NULL) arena = arenas_extend(newind); - arenas[oldind]->nthreads--; - arenas[newind]->nthreads++; malloc_mutex_unlock(&arenas_lock); if (arena == NULL) { ret = EAGAIN; @@ -1151,13 +1137,6 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, /* Set new arena association. */ ARENA_SET(arena); -#ifdef JEMALLOC_TCACHE - { - tcache_t *tcache = TCACHE_GET(); - if (tcache != NULL) - tcache->arena = arena; - } -#endif } ret = 0; @@ -1167,9 +1146,9 @@ RETURN: #ifdef JEMALLOC_STATS CTL_RO_NL_GEN(thread_allocated, ALLOCATED_GET(), uint64_t); -CTL_RO_NL_GEN(thread_allocatedp, ALLOCATEDP_GET(), uint64_t *); +CTL_RO_NL_GEN(thread_allocatedp, &ALLOCATED_GET(), uint64_t *); CTL_RO_NL_GEN(thread_deallocated, DEALLOCATED_GET(), uint64_t); -CTL_RO_NL_GEN(thread_deallocatedp, DEALLOCATEDP_GET(), uint64_t *); +CTL_RO_NL_GEN(thread_deallocatedp, &DEALLOCATED_GET(), uint64_t *); #endif /******************************************************************************/ @@ -1305,9 +1284,9 @@ CTL_RO_NL_GEN(opt_overcommit, opt_overcommit, bool) /******************************************************************************/ -CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t) -CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t) -CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t) +CTL_RO_NL_GEN(arenas_bin_i_size, arenas[0]->bins[mib[2]].reg_size, size_t) +CTL_RO_NL_GEN(arenas_bin_i_nregs, arenas[0]->bins[mib[2]].nregs, uint32_t) +CTL_RO_NL_GEN(arenas_bin_i_run_size, arenas[0]->bins[mib[2]].run_size, size_t) const ctl_node_t * arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) { @@ -1552,7 +1531,6 @@ stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j) } #endif -CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned) CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t) CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t) #ifdef JEMALLOC_STATS @@ -1584,7 +1562,6 @@ RETURN: } #ifdef JEMALLOC_STATS -CTL_RO_GEN(stats_cactive, &stats_cactive, size_t *) CTL_RO_GEN(stats_allocated, ctl_stats.allocated, size_t) CTL_RO_GEN(stats_active, ctl_stats.active, size_t) CTL_RO_GEN(stats_mapped, ctl_stats.mapped, size_t) diff --git a/dep/jemalloc/src/hash.c b/dep/jemalloc/src/hash.c index cfa4da0275c..6a13d7a03c0 100644 --- a/dep/jemalloc/src/hash.c +++ b/dep/jemalloc/src/hash.c @@ -1,2 +1,2 @@ -#define JEMALLOC_HASH_C_ +#define HASH_C_ #include "jemalloc/internal/jemalloc_internal.h" diff --git a/dep/jemalloc/src/huge.c b/dep/jemalloc/src/huge.c index a4f9b054ed5..0aadc4339a9 100644 --- a/dep/jemalloc/src/huge.c +++ b/dep/jemalloc/src/huge.c @@ -50,7 +50,6 @@ huge_malloc(size_t size, bool zero) malloc_mutex_lock(&huge_mtx); extent_tree_ad_insert(&huge, node); #ifdef JEMALLOC_STATS - stats_cactive_add(csize); huge_nmalloc++; huge_allocated += csize; #endif @@ -84,7 +83,7 @@ huge_palloc(size_t size, size_t alignment, bool zero) * alignment, in order to assure the alignment can be achieved, then * unmap leading and trailing chunks. */ - assert(alignment > chunksize); + assert(alignment >= chunksize); chunk_size = CHUNK_CEILING(size); @@ -110,12 +109,12 @@ huge_palloc(size_t size, size_t alignment, bool zero) if (offset == 0) { /* Trim trailing space. */ chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size - - chunk_size, true); + - chunk_size); } else { size_t trailsize; /* Trim leading space. */ - chunk_dealloc(ret, alignment - offset, true); + chunk_dealloc(ret, alignment - offset); ret = (void *)((uintptr_t)ret + (alignment - offset)); @@ -124,7 +123,7 @@ huge_palloc(size_t size, size_t alignment, bool zero) /* Trim trailing space. */ assert(trailsize < alloc_size); chunk_dealloc((void *)((uintptr_t)ret + chunk_size), - trailsize, true); + trailsize); } } @@ -135,7 +134,6 @@ huge_palloc(size_t size, size_t alignment, bool zero) malloc_mutex_lock(&huge_mtx); extent_tree_ad_insert(&huge, node); #ifdef JEMALLOC_STATS - stats_cactive_add(chunk_size); huge_nmalloc++; huge_allocated += chunk_size; #endif @@ -194,7 +192,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, * different size class. In that case, fall back to allocating new * space and copying. */ - if (alignment > chunksize) + if (alignment != 0) ret = huge_palloc(size + extra, alignment, zero); else ret = huge_malloc(size + extra, zero); @@ -203,7 +201,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, if (extra == 0) return (NULL); /* Try again, this time without extra. */ - if (alignment > chunksize) + if (alignment != 0) ret = huge_palloc(size, alignment, zero); else ret = huge_malloc(size, zero); @@ -234,13 +232,6 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, ) { size_t newsize = huge_salloc(ret); - /* - * Remove ptr from the tree of huge allocations before - * performing the remap operation, in order to avoid the - * possibility of another thread acquiring that mapping before - * this one removes it from the tree. - */ - huge_dalloc(ptr, false); if (mremap(ptr, oldsize, newsize, MREMAP_MAYMOVE|MREMAP_FIXED, ret) == MAP_FAILED) { /* @@ -260,8 +251,9 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, if (opt_abort) abort(); memcpy(ret, ptr, copysize); - chunk_dealloc_mmap(ptr, oldsize); - } + idalloc(ptr); + } else + huge_dalloc(ptr, false); } else #endif { @@ -286,7 +278,6 @@ huge_dalloc(void *ptr, bool unmap) extent_tree_ad_remove(&huge, node); #ifdef JEMALLOC_STATS - stats_cactive_sub(node->size); huge_ndalloc++; huge_allocated -= node->size; #endif @@ -301,10 +292,9 @@ huge_dalloc(void *ptr, bool unmap) memset(node->addr, 0x5a, node->size); #endif #endif + chunk_dealloc(node->addr, node->size); } - chunk_dealloc(node->addr, node->size, unmap); - base_node_dealloc(node); } diff --git a/dep/jemalloc/src/jemalloc.c b/dep/jemalloc/src/jemalloc.c index a161c2e26e1..2aebc51dd19 100644 --- a/dep/jemalloc/src/jemalloc.c +++ b/dep/jemalloc/src/jemalloc.c @@ -7,10 +7,12 @@ malloc_mutex_t arenas_lock; arena_t **arenas; unsigned narenas; +static unsigned next_arena; -pthread_key_t arenas_tsd; #ifndef NO_TLS __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); +#else +pthread_key_t arenas_tsd; #endif #ifdef JEMALLOC_STATS @@ -28,13 +30,7 @@ static bool malloc_initialized = false; static pthread_t malloc_initializer = (unsigned long)0; /* Used to avoid initialization races. */ -static malloc_mutex_t init_lock = -#ifdef JEMALLOC_OSSPIN - 0 -#else - MALLOC_MUTEX_INITIALIZER -#endif - ; +static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; #ifdef DYNAMIC_PAGE_SHIFT size_t pagesize; @@ -74,7 +70,6 @@ size_t opt_narenas = 0; static void wrtmessage(void *cbopaque, const char *s); static void stats_print_atexit(void); static unsigned malloc_ncpus(void); -static void arenas_cleanup(void *arg); #if (defined(JEMALLOC_STATS) && defined(NO_TLS)) static void thread_allocated_cleanup(void *arg); #endif @@ -84,7 +79,6 @@ static void malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v, size_t vlen); static void malloc_conf_init(void); static bool malloc_init_hard(void); -static int imemalign(void **memptr, size_t alignment, size_t size); /******************************************************************************/ /* malloc_message() setup. */ @@ -153,53 +147,13 @@ choose_arena_hard(void) arena_t *ret; if (narenas > 1) { - unsigned i, choose, first_null; - - choose = 0; - first_null = narenas; malloc_mutex_lock(&arenas_lock); - assert(arenas[0] != NULL); - for (i = 1; i < narenas; i++) { - if (arenas[i] != NULL) { - /* - * Choose the first arena that has the lowest - * number of threads assigned to it. - */ - if (arenas[i]->nthreads < - arenas[choose]->nthreads) - choose = i; - } else if (first_null == narenas) { - /* - * Record the index of the first uninitialized - * arena, in case all extant arenas are in use. - * - * NB: It is possible for there to be - * discontinuities in terms of initialized - * versus uninitialized arenas, due to the - * "thread.arena" mallctl. - */ - first_null = i; - } - } - - if (arenas[choose] == 0 || first_null == narenas) { - /* - * Use an unloaded arena, or the least loaded arena if - * all arenas are already initialized. - */ - ret = arenas[choose]; - } else { - /* Initialize a new arena. */ - ret = arenas_extend(first_null); - } - ret->nthreads++; + if ((ret = arenas[next_arena]) == NULL) + ret = arenas_extend(next_arena); + next_arena = (next_arena + 1) % narenas; malloc_mutex_unlock(&arenas_lock); - } else { + } else ret = arenas[0]; - malloc_mutex_lock(&arenas_lock); - ret->nthreads++; - malloc_mutex_unlock(&arenas_lock); - } ARENA_SET(ret); @@ -259,28 +213,6 @@ stats_print_atexit(void) JEMALLOC_P(malloc_stats_print)(NULL, NULL, NULL); } -#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) -thread_allocated_t * -thread_allocated_get_hard(void) -{ - thread_allocated_t *thread_allocated = (thread_allocated_t *) - imalloc(sizeof(thread_allocated_t)); - if (thread_allocated == NULL) { - static thread_allocated_t static_thread_allocated = {0, 0}; - malloc_write("<jemalloc>: Error allocating TSD;" - " mallctl(\"thread.{de,}allocated[p]\", ...)" - " will be inaccurate\n"); - if (opt_abort) - abort(); - return (&static_thread_allocated); - } - pthread_setspecific(thread_allocated_tsd, thread_allocated); - thread_allocated->allocated = 0; - thread_allocated->deallocated = 0; - return (thread_allocated); -} -#endif - /* * End miscellaneous support functions. */ @@ -305,16 +237,6 @@ malloc_ncpus(void) return (ret); } -static void -arenas_cleanup(void *arg) -{ - arena_t *arena = (arena_t *)arg; - - malloc_mutex_lock(&arenas_lock); - arena->nthreads--; - malloc_mutex_unlock(&arenas_lock); -} - #if (defined(JEMALLOC_STATS) && defined(NO_TLS)) static void thread_allocated_cleanup(void *arg) @@ -499,8 +421,8 @@ malloc_conf_init(void) if ((opts = getenv(envname)) != NULL) { /* * Do nothing; opts is already initialized to - * the value of the MALLOC_CONF environment - * variable. + * the value of the JEMALLOC_OPTIONS + * environment variable. */ } else { /* No configuration specified. */ @@ -689,7 +611,7 @@ malloc_init_hard(void) result = sysconf(_SC_PAGESIZE); assert(result != -1); - pagesize = (size_t)result; + pagesize = (unsigned)result; /* * We assume that pagesize is a power of 2 when calculating @@ -749,10 +671,7 @@ malloc_init_hard(void) } #ifdef JEMALLOC_TCACHE - if (tcache_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } + tcache_boot(); #endif if (huge_boot()) { @@ -769,14 +688,6 @@ malloc_init_hard(void) } #endif - if (malloc_mutex_init(&arenas_lock)) - return (true); - - if (pthread_key_create(&arenas_tsd, arenas_cleanup) != 0) { - malloc_mutex_unlock(&init_lock); - return (true); - } - /* * Create enough scaffolding to allow recursive allocation in * malloc_ncpus(). @@ -801,7 +712,8 @@ malloc_init_hard(void) * threaded mode. */ ARENA_SET(arenas[0]); - arenas[0]->nthreads++; + + malloc_mutex_init(&arenas_lock); #ifdef JEMALLOC_PROF if (prof_boot2()) { @@ -841,6 +753,15 @@ malloc_init_hard(void) malloc_write(")\n"); } + next_arena = (narenas > 0) ? 1 : 0; + +#ifdef NO_TLS + if (pthread_key_create(&arenas_tsd, NULL) != 0) { + malloc_mutex_unlock(&init_lock); + return (true); + } +#endif + /* Allocate and initialize arenas. */ arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas); if (arenas == NULL) { @@ -872,6 +793,7 @@ malloc_init_hard(void) return (false); } + #ifdef JEMALLOC_ZONE JEMALLOC_ATTR(constructor) void @@ -940,8 +862,7 @@ JEMALLOC_P(malloc)(size_t size) #ifdef JEMALLOC_PROF if (opt_prof) { usize = s2u(size); - PROF_ALLOC_PREP(1, usize, cnt); - if (cnt == NULL) { + if ((cnt = prof_alloc_prep(usize)) == NULL) { ret = NULL; goto OOM; } @@ -990,23 +911,19 @@ RETURN: } JEMALLOC_ATTR(nonnull(1)) -#ifdef JEMALLOC_PROF -/* - * Avoid any uncertainty as to how many backtrace frames to ignore in - * PROF_ALLOC_PREP(). - */ -JEMALLOC_ATTR(noinline) -#endif -static int -imemalign(void **memptr, size_t alignment, size_t size) +JEMALLOC_ATTR(visibility("default")) +int +JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) { int ret; + void *result; +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) size_t usize -#ifdef JEMALLOC_CC_SILENCE +# ifdef JEMALLOC_CC_SILENCE = 0 -#endif +# endif ; - void *result; +#endif #ifdef JEMALLOC_PROF prof_thr_cnt_t *cnt # ifdef JEMALLOC_CC_SILENCE @@ -1056,38 +973,34 @@ imemalign(void **memptr, size_t alignment, size_t size) goto RETURN; } - usize = sa2u(size, alignment, NULL); - if (usize == 0) { - result = NULL; - ret = ENOMEM; - goto RETURN; - } - #ifdef JEMALLOC_PROF if (opt_prof) { - PROF_ALLOC_PREP(2, usize, cnt); - if (cnt == NULL) { + usize = sa2u(size, alignment, NULL); + if ((cnt = prof_alloc_prep(usize)) == NULL) { result = NULL; ret = EINVAL; } else { if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= small_maxclass) { - assert(sa2u(small_maxclass+1, - alignment, NULL) != 0); - result = ipalloc(sa2u(small_maxclass+1, - alignment, NULL), alignment, false); + result = ipalloc(small_maxclass+1, + alignment, false); if (result != NULL) { arena_prof_promoted(result, usize); } } else { - result = ipalloc(usize, alignment, + result = ipalloc(size, alignment, false); } } } else #endif - result = ipalloc(usize, alignment, false); + { +#ifdef JEMALLOC_STATS + usize = sa2u(size, alignment, NULL); +#endif + result = ipalloc(size, alignment, false); + } } if (result == NULL) { @@ -1119,15 +1032,6 @@ RETURN: return (ret); } -JEMALLOC_ATTR(nonnull(1)) -JEMALLOC_ATTR(visibility("default")) -int -JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) -{ - - return imemalign(memptr, alignment, size); -} - JEMALLOC_ATTR(malloc) JEMALLOC_ATTR(visibility("default")) void * @@ -1183,8 +1087,7 @@ JEMALLOC_P(calloc)(size_t num, size_t size) #ifdef JEMALLOC_PROF if (opt_prof) { usize = s2u(num_size); - PROF_ALLOC_PREP(1, usize, cnt); - if (cnt == NULL) { + if ((cnt = prof_alloc_prep(usize)) == NULL) { ret = NULL; goto RETURN; } @@ -1297,9 +1200,7 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) if (opt_prof) { usize = s2u(size); old_ctx = prof_ctx_get(ptr); - PROF_ALLOC_PREP(1, usize, cnt); - if (cnt == NULL) { - old_ctx = NULL; + if ((cnt = prof_alloc_prep(usize)) == NULL) { ret = NULL; goto OOM; } @@ -1309,13 +1210,8 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) false, false); if (ret != NULL) arena_prof_promoted(ret, usize); - else - old_ctx = NULL; - } else { + } else ret = iralloc(ptr, size, 0, 0, false, false); - if (ret == NULL) - old_ctx = NULL; - } } else #endif { @@ -1353,8 +1249,7 @@ OOM: #ifdef JEMALLOC_PROF if (opt_prof) { usize = s2u(size); - PROF_ALLOC_PREP(1, usize, cnt); - if (cnt == NULL) + if ((cnt = prof_alloc_prep(usize)) == NULL) ret = NULL; else { if (prof_promote && (uintptr_t)cnt != @@ -1459,7 +1354,7 @@ JEMALLOC_P(memalign)(size_t alignment, size_t size) #ifdef JEMALLOC_CC_SILENCE int result = #endif - imemalign(&ret, alignment, size); + JEMALLOC_P(posix_memalign)(&ret, alignment, size); #ifdef JEMALLOC_CC_SILENCE if (result != 0) return (NULL); @@ -1478,7 +1373,7 @@ JEMALLOC_P(valloc)(size_t size) #ifdef JEMALLOC_CC_SILENCE int result = #endif - imemalign(&ret, PAGE_SIZE, size); + JEMALLOC_P(posix_memalign)(&ret, PAGE_SIZE, size); #ifdef JEMALLOC_CC_SILENCE if (result != 0) return (NULL); @@ -1559,18 +1454,15 @@ JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp, } JEMALLOC_INLINE void * -iallocm(size_t usize, size_t alignment, bool zero) +iallocm(size_t size, size_t alignment, bool zero) { - assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize, alignment, - NULL))); - if (alignment != 0) - return (ipalloc(usize, alignment, zero)); + return (ipalloc(size, alignment, zero)); else if (zero) - return (icalloc(usize)); + return (icalloc(size)); else - return (imalloc(usize)); + return (imalloc(size)); } JEMALLOC_ATTR(nonnull(1)) @@ -1593,43 +1485,38 @@ JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) if (malloc_init()) goto OOM; - usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, NULL); - if (usize == 0) - goto OOM; - #ifdef JEMALLOC_PROF if (opt_prof) { - PROF_ALLOC_PREP(1, usize, cnt); - if (cnt == NULL) + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, + NULL); + if ((cnt = prof_alloc_prep(usize)) == NULL) goto OOM; if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= small_maxclass) { - size_t usize_promoted = (alignment == 0) ? - s2u(small_maxclass+1) : sa2u(small_maxclass+1, - alignment, NULL); - assert(usize_promoted != 0); - p = iallocm(usize_promoted, alignment, zero); + p = iallocm(small_maxclass+1, alignment, zero); if (p == NULL) goto OOM; arena_prof_promoted(p, usize); } else { - p = iallocm(usize, alignment, zero); + p = iallocm(size, alignment, zero); if (p == NULL) goto OOM; } - prof_malloc(p, usize, cnt); + if (rsize != NULL) *rsize = usize; } else #endif { - p = iallocm(usize, alignment, zero); + p = iallocm(size, alignment, zero); if (p == NULL) goto OOM; #ifndef JEMALLOC_STATS if (rsize != NULL) #endif { + usize = (alignment == 0) ? s2u(size) : sa2u(size, + alignment, NULL); #ifdef JEMALLOC_STATS if (rsize != NULL) #endif @@ -1672,6 +1559,7 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, bool no_move = flags & ALLOCM_NO_MOVE; #ifdef JEMALLOC_PROF prof_thr_cnt_t *cnt; + prof_ctx_t *old_ctx; #endif assert(ptr != NULL); @@ -1686,33 +1574,25 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, /* * usize isn't knowable before iralloc() returns when extra is * non-zero. Therefore, compute its maximum possible value and - * use that in PROF_ALLOC_PREP() to decide whether to capture a + * use that in prof_alloc_prep() to decide whether to capture a * backtrace. prof_realloc() will use the actual usize to * decide whether to sample. */ size_t max_usize = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra, alignment, NULL); - prof_ctx_t *old_ctx = prof_ctx_get(p); old_size = isalloc(p); - PROF_ALLOC_PREP(1, max_usize, cnt); - if (cnt == NULL) + old_ctx = prof_ctx_get(p); + if ((cnt = prof_alloc_prep(max_usize)) == NULL) goto OOM; - /* - * Use minimum usize to determine whether promotion may happen. - */ - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U - && ((alignment == 0) ? s2u(size) : sa2u(size, - alignment, NULL)) <= small_maxclass) { + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && max_usize + <= small_maxclass) { q = iralloc(p, small_maxclass+1, (small_maxclass+1 >= size+extra) ? 0 : size+extra - (small_maxclass+1), alignment, zero, no_move); if (q == NULL) goto ERR; - if (max_usize < PAGE_SIZE) { - usize = max_usize; - arena_prof_promoted(q, usize); - } else - usize = isalloc(q); + usize = isalloc(q); + arena_prof_promoted(q, usize); } else { q = iralloc(p, size, extra, alignment, zero, no_move); if (q == NULL) @@ -1720,8 +1600,6 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, usize = isalloc(q); } prof_realloc(q, usize, cnt, old_size, old_ctx); - if (rsize != NULL) - *rsize = usize; } else #endif { diff --git a/dep/jemalloc/src/mb.c b/dep/jemalloc/src/mb.c index dc2c0a256fd..30a1a2e997a 100644 --- a/dep/jemalloc/src/mb.c +++ b/dep/jemalloc/src/mb.c @@ -1,2 +1,2 @@ -#define JEMALLOC_MB_C_ +#define MB_C_ #include "jemalloc/internal/jemalloc_internal.h" diff --git a/dep/jemalloc/src/mutex.c b/dep/jemalloc/src/mutex.c index ca89ef1c962..3ecb18a340e 100644 --- a/dep/jemalloc/src/mutex.c +++ b/dep/jemalloc/src/mutex.c @@ -55,9 +55,6 @@ pthread_create(pthread_t *__restrict thread, bool malloc_mutex_init(malloc_mutex_t *mutex) { -#ifdef JEMALLOC_OSSPIN - *mutex = 0; -#else pthread_mutexattr_t attr; if (pthread_mutexattr_init(&attr) != 0) @@ -73,7 +70,6 @@ malloc_mutex_init(malloc_mutex_t *mutex) } pthread_mutexattr_destroy(&attr); -#endif return (false); } @@ -81,10 +77,8 @@ void malloc_mutex_destroy(malloc_mutex_t *mutex) { -#ifndef JEMALLOC_OSSPIN if (pthread_mutex_destroy(mutex) != 0) { malloc_write("<jemalloc>: Error in pthread_mutex_destroy()\n"); abort(); } -#endif } diff --git a/dep/jemalloc/src/prof.c b/dep/jemalloc/src/prof.c index 8a144b4e46c..636cccef52a 100644 --- a/dep/jemalloc/src/prof.c +++ b/dep/jemalloc/src/prof.c @@ -3,15 +3,15 @@ #ifdef JEMALLOC_PROF /******************************************************************************/ +#ifdef JEMALLOC_PROF_LIBGCC +#include <unwind.h> +#endif + #ifdef JEMALLOC_PROF_LIBUNWIND #define UNW_LOCAL_ONLY #include <libunwind.h> #endif -#ifdef JEMALLOC_PROF_LIBGCC -#include <unwind.h> -#endif - /******************************************************************************/ /* Data. */ @@ -169,7 +169,39 @@ prof_leave(void) prof_gdump(); } -#ifdef JEMALLOC_PROF_LIBUNWIND +#ifdef JEMALLOC_PROF_LIBGCC +static _Unwind_Reason_Code +prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) +{ + + return (_URC_NO_REASON); +} + +static _Unwind_Reason_Code +prof_unwind_callback(struct _Unwind_Context *context, void *arg) +{ + prof_unwind_data_t *data = (prof_unwind_data_t *)arg; + + if (data->nignore > 0) + data->nignore--; + else { + data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context); + data->bt->len++; + if (data->bt->len == data->max) + return (_URC_END_OF_STACK); + } + + return (_URC_NO_REASON); +} + +void +prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) +{ + prof_unwind_data_t data = {bt, nignore, max}; + + _Unwind_Backtrace(prof_unwind_callback, &data); +} +#elif defined(JEMALLOC_PROF_LIBUNWIND) void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) { @@ -204,41 +236,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) break; } } -#endif -#ifdef JEMALLOC_PROF_LIBGCC -static _Unwind_Reason_Code -prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) -{ - - return (_URC_NO_REASON); -} - -static _Unwind_Reason_Code -prof_unwind_callback(struct _Unwind_Context *context, void *arg) -{ - prof_unwind_data_t *data = (prof_unwind_data_t *)arg; - - if (data->nignore > 0) - data->nignore--; - else { - data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context); - data->bt->len++; - if (data->bt->len == data->max) - return (_URC_END_OF_STACK); - } - - return (_URC_NO_REASON); -} - -void -prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) -{ - prof_unwind_data_t data = {bt, nignore, max}; - - _Unwind_Backtrace(prof_unwind_callback, &data); -} -#endif -#ifdef JEMALLOC_PROF_GCC +#else void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) { @@ -434,7 +432,6 @@ prof_lookup(prof_bt_t *bt) prof_ctx_t *p; void *v; } ctx; - bool new_ctx; /* * This thread's cache lacks bt. Look for it in the global @@ -471,26 +468,12 @@ prof_lookup(prof_bt_t *bt) idalloc(ctx.v); return (NULL); } - /* - * Artificially raise curobjs, in order to avoid a race - * condition with prof_ctx_merge()/prof_ctx_destroy(). - * - * No locking is necessary for ctx here because no other - * threads have had the opportunity to fetch it from - * bt2ctx yet. - */ - ctx.p->cnt_merged.curobjs++; - new_ctx = true; - } else { - /* - * Artificially raise curobjs, in order to avoid a race - * condition with prof_ctx_merge()/prof_ctx_destroy(). - */ - malloc_mutex_lock(&ctx.p->lock); - ctx.p->cnt_merged.curobjs++; - malloc_mutex_unlock(&ctx.p->lock); - new_ctx = false; } + /* + * Acquire ctx's lock before releasing bt2ctx_mtx, in order to + * avoid a race condition with prof_ctx_destroy(). + */ + malloc_mutex_lock(&ctx.p->lock); prof_leave(); /* Link a prof_thd_cnt_t into ctx for this thread. */ @@ -503,9 +486,8 @@ prof_lookup(prof_bt_t *bt) */ ret.p = ql_last(&prof_tdata->lru_ql, lru_link); assert(ret.v != NULL); - if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, - NULL, NULL)) - assert(false); + ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, NULL, + NULL); ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); prof_ctx_merge(ret.p->ctx, ret.p); /* ret can now be re-used. */ @@ -516,8 +498,7 @@ prof_lookup(prof_bt_t *bt) /* Allocate and partially initialize a new cnt. */ ret.v = imalloc(sizeof(prof_thr_cnt_t)); if (ret.p == NULL) { - if (new_ctx) - prof_ctx_destroy(ctx.p); + malloc_mutex_unlock(&ctx.p->lock); return (NULL); } ql_elm_new(ret.p, cnts_link); @@ -528,15 +509,12 @@ prof_lookup(prof_bt_t *bt) ret.p->epoch = 0; memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) { - if (new_ctx) - prof_ctx_destroy(ctx.p); + malloc_mutex_unlock(&ctx.p->lock); idalloc(ret.v); return (NULL); } ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); - malloc_mutex_lock(&ctx.p->lock); ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link); - ctx.p->cnt_merged.curobjs--; malloc_mutex_unlock(&ctx.p->lock); } else { /* Move ret to the front of the LRU. */ @@ -650,10 +628,11 @@ prof_ctx_destroy(prof_ctx_t *ctx) /* * Check that ctx is still unused by any thread cache before destroying - * it. prof_lookup() artificially raises ctx->cnt_merge.curobjs in - * order to avoid a race condition with this function, as does - * prof_ctx_merge() in order to avoid a race between the main body of - * prof_ctx_merge() and entry into this function. + * it. prof_lookup() interlocks bt2ctx_mtx and ctx->lock in order to + * avoid a race condition with this function, and prof_ctx_merge() + * artificially raises ctx->cnt_merged.curobjs in order to avoid a race + * between the main body of prof_ctx_merge() and entry into this + * function. */ prof_enter(); malloc_mutex_lock(&ctx->lock); @@ -662,8 +641,7 @@ prof_ctx_destroy(prof_ctx_t *ctx) assert(ctx->cnt_merged.accumobjs == 0); assert(ctx->cnt_merged.accumbytes == 0); /* Remove ctx from bt2ctx. */ - if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL)) - assert(false); + ckh_remove(&bt2ctx, ctx->bt, NULL, NULL); prof_leave(); /* Destroy ctx. */ malloc_mutex_unlock(&ctx->lock); @@ -671,10 +649,7 @@ prof_ctx_destroy(prof_ctx_t *ctx) malloc_mutex_destroy(&ctx->lock); idalloc(ctx); } else { - /* - * Compensate for increment in prof_ctx_merge() or - * prof_lookup(). - */ + /* Compensate for increment in prof_ctx_merge(). */ ctx->cnt_merged.curobjs--; malloc_mutex_unlock(&ctx->lock); prof_leave(); @@ -1081,7 +1056,7 @@ prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) } else { ret1 = h; ret2 = hash(bt->vec, bt->len * sizeof(void *), - 0x8432a476666bbc13LLU); + 0x8432a476666bbc13U); } *hash1 = ret1; @@ -1118,6 +1093,7 @@ prof_tdata_init(void) prof_tdata->vec = imalloc(sizeof(void *) * prof_bt_max); if (prof_tdata->vec == NULL) { + ckh_delete(&prof_tdata->bt2cnt); idalloc(prof_tdata); return (NULL); @@ -1135,26 +1111,33 @@ prof_tdata_init(void) static void prof_tdata_cleanup(void *arg) { - prof_thr_cnt_t *cnt; - prof_tdata_t *prof_tdata = (prof_tdata_t *)arg; + prof_tdata_t *prof_tdata; - /* - * Delete the hash table. All of its contents can still be iterated - * over via the LRU. - */ - ckh_delete(&prof_tdata->bt2cnt); + prof_tdata = PROF_TCACHE_GET(); + if (prof_tdata != NULL) { + prof_thr_cnt_t *cnt; - /* Iteratively merge cnt's into the global stats and delete them. */ - while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { - ql_remove(&prof_tdata->lru_ql, cnt, lru_link); - prof_ctx_merge(cnt->ctx, cnt); - idalloc(cnt); - } + /* + * Delete the hash table. All of its contents can still be + * iterated over via the LRU. + */ + ckh_delete(&prof_tdata->bt2cnt); - idalloc(prof_tdata->vec); + /* + * Iteratively merge cnt's into the global stats and delete + * them. + */ + while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { + prof_ctx_merge(cnt->ctx, cnt); + ql_remove(&prof_tdata->lru_ql, cnt, lru_link); + idalloc(cnt); + } - idalloc(prof_tdata); - PROF_TCACHE_SET(NULL); + idalloc(prof_tdata->vec); + + idalloc(prof_tdata); + PROF_TCACHE_SET(NULL); + } } void diff --git a/dep/jemalloc/src/rtree.c b/dep/jemalloc/src/rtree.c index eb0ff1e24af..7753743c5e6 100644 --- a/dep/jemalloc/src/rtree.c +++ b/dep/jemalloc/src/rtree.c @@ -1,4 +1,4 @@ -#define JEMALLOC_RTREE_C_ +#define RTREE_C_ #include "jemalloc/internal/jemalloc_internal.h" rtree_t * @@ -20,10 +20,7 @@ rtree_new(unsigned bits) memset(ret, 0, offsetof(rtree_t, level2bits) + (sizeof(unsigned) * height)); - if (malloc_mutex_init(&ret->mutex)) { - /* Leak the rtree. */ - return (NULL); - } + malloc_mutex_init(&ret->mutex); ret->height = height; if (bits_per_level * height > bits) ret->level2bits[0] = bits % bits_per_level; diff --git a/dep/jemalloc/src/stats.c b/dep/jemalloc/src/stats.c index dc172e425c0..3dfe0d232a6 100644 --- a/dep/jemalloc/src/stats.c +++ b/dep/jemalloc/src/stats.c @@ -39,10 +39,6 @@ bool opt_stats_print = false; -#ifdef JEMALLOC_STATS -size_t stats_cactive = 0; -#endif - /******************************************************************************/ /* Function prototypes for non-inline static functions. */ @@ -323,7 +319,6 @@ static void stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i) { - unsigned nthreads; size_t pagesize, pactive, pdirty, mapped; uint64_t npurge, nmadvise, purged; size_t small_allocated; @@ -333,9 +328,6 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("arenas.pagesize", &pagesize, size_t); - CTL_I_GET("stats.arenas.0.nthreads", &nthreads, unsigned); - malloc_cprintf(write_cb, cbopaque, - "assigned threads: %u\n", nthreads); CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t); CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t); CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t); @@ -677,26 +669,21 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, #ifdef JEMALLOC_STATS { int err; - size_t sszp, ssz; - size_t *cactive; + size_t ssz; size_t allocated, active, mapped; size_t chunks_current, chunks_high, swap_avail; uint64_t chunks_total; size_t huge_allocated; uint64_t huge_nmalloc, huge_ndalloc; - sszp = sizeof(size_t *); ssz = sizeof(size_t); - CTL_GET("stats.cactive", &cactive, size_t *); CTL_GET("stats.allocated", &allocated, size_t); CTL_GET("stats.active", &active, size_t); CTL_GET("stats.mapped", &mapped, size_t); malloc_cprintf(write_cb, cbopaque, - "Allocated: %zu, active: %zu, mapped: %zu\n", - allocated, active, mapped); - malloc_cprintf(write_cb, cbopaque, - "Current active ceiling: %zu\n", atomic_read_z(cactive)); + "Allocated: %zu, active: %zu, mapped: %zu\n", allocated, + active, mapped); /* Print chunk stats. */ CTL_GET("stats.chunks.total", &chunks_total, uint64_t); @@ -748,7 +735,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, ninitialized++; } - if (ninitialized > 1 || unmerged == false) { + if (ninitialized > 1) { /* Print merged arena stats. */ malloc_cprintf(write_cb, cbopaque, "\nMerged arenas stats:\n"); diff --git a/dep/jemalloc/src/tcache.c b/dep/jemalloc/src/tcache.c index 31c329e1613..cbbe7a113a9 100644 --- a/dep/jemalloc/src/tcache.c +++ b/dep/jemalloc/src/tcache.c @@ -8,9 +8,6 @@ bool opt_tcache = true; ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT; -tcache_bin_info_t *tcache_bin_info; -static unsigned stack_nelms; /* Total stack elms per tcache. */ - /* Map of thread-specific caches. */ #ifndef NO_TLS __thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec")); @@ -58,19 +55,18 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem #endif ) { - void *ptr; + void *flush, *deferred, *ptr; unsigned i, nflush, ndeferred; -#ifdef JEMALLOC_STATS - bool merged_stats = false; -#endif + bool first_pass; assert(binind < nbins); assert(rem <= tbin->ncached); + assert(tbin->ncached > 0 || tbin->avail == NULL); - for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { + for (flush = tbin->avail, nflush = tbin->ncached - rem, first_pass = + true; flush != NULL; flush = deferred, nflush = ndeferred) { /* Lock the arena bin associated with the first object. */ - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( - tbin->avail[0]); + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(flush); arena_t *arena = chunk->arena; arena_bin_t *bin = &arena->bins[binind]; @@ -86,17 +82,17 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem malloc_mutex_lock(&bin->lock); #ifdef JEMALLOC_STATS if (arena == tcache->arena) { - assert(merged_stats == false); - merged_stats = true; bin->stats.nflushes++; bin->stats.nrequests += tbin->tstats.nrequests; tbin->tstats.nrequests = 0; } #endif + deferred = NULL; ndeferred = 0; for (i = 0; i < nflush; i++) { - ptr = tbin->avail[i]; + ptr = flush; assert(ptr != NULL); + flush = *(void **)ptr; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk->arena == arena) { size_t pageind = ((uintptr_t)ptr - @@ -111,31 +107,21 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem * locked. Stash the object, so that it can be * handled in a future pass. */ - tbin->avail[ndeferred] = ptr; + *(void **)ptr = deferred; + deferred = ptr; ndeferred++; } } malloc_mutex_unlock(&bin->lock); + + if (first_pass) { + tbin->avail = flush; + first_pass = false; + } } -#ifdef JEMALLOC_STATS - if (merged_stats == false) { - /* - * The flush loop didn't happen to flush to this thread's - * arena, so the stats didn't get merged. Manually do so now. - */ - arena_bin_t *bin = &tcache->arena->bins[binind]; - malloc_mutex_lock(&bin->lock); - bin->stats.nflushes++; - bin->stats.nrequests += tbin->tstats.nrequests; - tbin->tstats.nrequests = 0; - malloc_mutex_unlock(&bin->lock); - } -#endif - memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], - rem * sizeof(void *)); tbin->ncached = rem; - if ((int)tbin->ncached < tbin->low_water) + if (tbin->ncached < tbin->low_water) tbin->low_water = tbin->ncached; } @@ -146,19 +132,18 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem #endif ) { - void *ptr; + void *flush, *deferred, *ptr; unsigned i, nflush, ndeferred; -#ifdef JEMALLOC_STATS - bool merged_stats = false; -#endif + bool first_pass; assert(binind < nhbins); assert(rem <= tbin->ncached); + assert(tbin->ncached > 0 || tbin->avail == NULL); - for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { + for (flush = tbin->avail, nflush = tbin->ncached - rem, first_pass = + true; flush != NULL; flush = deferred, nflush = ndeferred) { /* Lock the arena associated with the first object. */ - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( - tbin->avail[0]); + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(flush); arena_t *arena = chunk->arena; malloc_mutex_lock(&arena->lock); @@ -170,7 +155,6 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem tcache->prof_accumbytes = 0; #endif #ifdef JEMALLOC_STATS - merged_stats = true; arena->stats.nrequests_large += tbin->tstats.nrequests; arena->stats.lstats[binind - nbins].nrequests += tbin->tstats.nrequests; @@ -179,10 +163,12 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem #if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) } #endif + deferred = NULL; ndeferred = 0; for (i = 0; i < nflush; i++) { - ptr = tbin->avail[i]; + ptr = flush; assert(ptr != NULL); + flush = *(void **)ptr; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk->arena == arena) arena_dalloc_large(arena, chunk, ptr); @@ -193,32 +179,21 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem * Stash the object, so that it can be handled * in a future pass. */ - tbin->avail[ndeferred] = ptr; + *(void **)ptr = deferred; + deferred = ptr; ndeferred++; } } malloc_mutex_unlock(&arena->lock); + + if (first_pass) { + tbin->avail = flush; + first_pass = false; + } } -#ifdef JEMALLOC_STATS - if (merged_stats == false) { - /* - * The flush loop didn't happen to flush to this thread's - * arena, so the stats didn't get merged. Manually do so now. - */ - arena_t *arena = tcache->arena; - malloc_mutex_lock(&arena->lock); - arena->stats.nrequests_large += tbin->tstats.nrequests; - arena->stats.lstats[binind - nbins].nrequests += - tbin->tstats.nrequests; - tbin->tstats.nrequests = 0; - malloc_mutex_unlock(&arena->lock); - } -#endif - memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], - rem * sizeof(void *)); tbin->ncached = rem; - if ((int)tbin->ncached < tbin->low_water) + if (tbin->ncached < tbin->low_water) tbin->low_water = tbin->ncached; } @@ -226,14 +201,10 @@ tcache_t * tcache_create(arena_t *arena) { tcache_t *tcache; - size_t size, stack_offset; + size_t size; unsigned i; size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins); - /* Naturally align the pointer stacks. */ - size = PTR_CEILING(size); - stack_offset = size; - size += stack_nelms * sizeof(void *); /* * Round up to the nearest multiple of the cacheline size, in order to * avoid the possibility of false cacheline sharing. @@ -246,8 +217,6 @@ tcache_create(arena_t *arena) if (size <= small_maxclass) tcache = (tcache_t *)arena_malloc_small(arena, size, true); - else if (size <= tcache_maxclass) - tcache = (tcache_t *)arena_malloc_large(arena, size, true); else tcache = (tcache_t *)icalloc(size); @@ -264,12 +233,15 @@ tcache_create(arena_t *arena) tcache->arena = arena; assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0); - for (i = 0; i < nhbins; i++) { - tcache->tbins[i].lg_fill_div = 1; - tcache->tbins[i].avail = (void **)((uintptr_t)tcache + - (uintptr_t)stack_offset); - stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *); + for (i = 0; i < nbins; i++) { + if ((arena->bins[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) { + tcache->tbins[i].ncached_max = (arena->bins[i].nregs << + 1); + } else + tcache->tbins[i].ncached_max = TCACHE_NSLOTS_SMALL_MAX; } + for (; i < nhbins; i++) + tcache->tbins[i].ncached_max = TCACHE_NSLOTS_LARGE; TCACHE_SET(tcache); @@ -280,7 +252,6 @@ void tcache_destroy(tcache_t *tcache) { unsigned i; - size_t tcache_size; #ifdef JEMALLOC_STATS /* Unlink from list of extant tcaches. */ @@ -337,8 +308,7 @@ tcache_destroy(tcache_t *tcache) } #endif - tcache_size = arena_salloc(tcache); - if (tcache_size <= small_maxclass) { + if (arena_salloc(tcache) <= small_maxclass) { arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); arena_t *arena = chunk->arena; size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >> @@ -352,13 +322,6 @@ tcache_destroy(tcache_t *tcache) malloc_mutex_lock(&bin->lock); arena_dalloc_bin(arena, chunk, tcache, mapelm); malloc_mutex_unlock(&bin->lock); - } else if (tcache_size <= tcache_maxclass) { - arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); - arena_t *arena = chunk->arena; - - malloc_mutex_lock(&arena->lock); - arena_dalloc_large(arena, chunk, tcache); - malloc_mutex_unlock(&arena->lock); } else idalloc(tcache); } @@ -415,13 +378,11 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena) } #endif -bool +void tcache_boot(void) { if (opt_tcache) { - unsigned i; - /* * If necessary, clamp opt_lg_tcache_max, now that * small_maxclass and arena_maxclass are known. @@ -436,28 +397,6 @@ tcache_boot(void) nhbins = nbins + (tcache_maxclass >> PAGE_SHIFT); - /* Initialize tcache_bin_info. */ - tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins * - sizeof(tcache_bin_info_t)); - if (tcache_bin_info == NULL) - return (true); - stack_nelms = 0; - for (i = 0; i < nbins; i++) { - if ((arena_bin_info[i].nregs << 1) <= - TCACHE_NSLOTS_SMALL_MAX) { - tcache_bin_info[i].ncached_max = - (arena_bin_info[i].nregs << 1); - } else { - tcache_bin_info[i].ncached_max = - TCACHE_NSLOTS_SMALL_MAX; - } - stack_nelms += tcache_bin_info[i].ncached_max; - } - for (; i < nhbins; i++) { - tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE; - stack_nelms += tcache_bin_info[i].ncached_max; - } - /* Compute incremental GC event threshold. */ if (opt_lg_tcache_gc_sweep >= 0) { tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) / @@ -473,8 +412,6 @@ tcache_boot(void) abort(); } } - - return (false); } /******************************************************************************/ #endif /* JEMALLOC_TCACHE */ |