diff options
Diffstat (limited to 'deps/jemalloc')
63 files changed, 22007 insertions, 0 deletions
diff --git a/deps/jemalloc/CMakeLists.txt b/deps/jemalloc/CMakeLists.txt new file mode 100644 index 0000000000..fe976cb6ca --- /dev/null +++ b/deps/jemalloc/CMakeLists.txt @@ -0,0 +1,60 @@ +# Copyright (C) +# +# This file is free software; as a special exception the author gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY, to the extent permitted by law; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +# We need to generate the jemalloc_def.h header based on platform-specific settings +if (PLATFORM EQUAL 32) + set(JEM_SIZEDEF 2) + set(JEM_TLSMODEL) +else() + set(JEM_SIZEDEF 3) + set(JEM_TLSMODEL "__attribute__\(\(tls_model\(\"initial-exec\"\)\)\)") +endif() + +# Create the header, so we can use it +configure_file( + "${CMAKE_SOURCE_DIR}/modules/worldengine/deps/jemalloc/jemalloc_defs.h.in.cmake" + "${BUILDDIR}/jemalloc_defs.h" + @ONLY +) + +# Done, let's continue +set(jemalloc_STAT_SRC + ${CMAKE_CURRENT_SOURCE_DIR}/src/arena.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/atomic.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/base.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/bitmap.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/chunk.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/chunk_dss.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/chunk_mmap.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/ckh.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/ctl.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/extent.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/hash.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/huge.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/jemalloc.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/mb.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/mutex.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/prof.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/quarantine.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/rtree.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/stats.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/tcache.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/tsd.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/util.c +) + +include_directories( + ${BUILDDIR}/ + ${CMAKE_CURRENT_SOURCE_DIR}/include +) + +add_definitions(-D_GNU_SOURCE -D_REENTRANT) + +add_library(jemalloc STATIC ${jemalloc_STAT_SRC}) diff --git a/deps/jemalloc/COPYING b/deps/jemalloc/COPYING new file mode 100644 index 0000000000..bdda0feb9e --- /dev/null +++ b/deps/jemalloc/COPYING @@ -0,0 +1,27 @@ +Unless otherwise specified, files in the jemalloc source distribution are +subject to the following license: +-------------------------------------------------------------------------------- +Copyright (C) 2002-2014 Jason Evans <jasone@canonware.com>. +All rights reserved. +Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. +Copyright (C) 2009-2014 Facebook, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +1. Redistributions of source code must retain the above copyright notice(s), + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice(s), + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS +OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +-------------------------------------------------------------------------------- diff --git a/deps/jemalloc/ChangeLog b/deps/jemalloc/ChangeLog new file mode 100644 index 0000000000..d56ee999e6 --- /dev/null +++ b/deps/jemalloc/ChangeLog @@ -0,0 +1,548 @@ +Following are change highlights associated with official releases. Important +bug fixes are all mentioned, but internal enhancements are omitted here for +brevity (even though they are more fun to write about). Much more detail can be +found in the git revision history: + + https://github.com/jemalloc/jemalloc + +* 3.6.0 (March 31, 2014) + + This version contains a critical bug fix for a regression present in 3.5.0 and + 3.5.1. + + Bug fixes: + - Fix a regression in arena_chunk_alloc() that caused crashes during + small/large allocation if chunk allocation failed. In the absence of this + bug, chunk allocation failure would result in allocation failure, e.g. NULL + return from malloc(). This regression was introduced in 3.5.0. + - Fix backtracing for gcc intrinsics-based backtracing by specifying + -fno-omit-frame-pointer to gcc. Note that the application (and all the + libraries it links to) must also be compiled with this option for + backtracing to be reliable. + - Use dss allocation precedence for huge allocations as well as small/large + allocations. + - Fix test assertion failure message formatting. This bug did not manifect on + x86_64 systems because of implementation subtleties in va_list. + - Fix inconsequential test failures for hash and SFMT code. + + New features: + - Support heap profiling on FreeBSD. This feature depends on the proc + filesystem being mounted during heap profile dumping. + +* 3.5.1 (February 25, 2014) + + This version primarily addresses minor bugs in test code. + + Bug fixes: + - Configure Solaris/Illumos to use MADV_FREE. + - Fix junk filling for mremap(2)-based huge reallocation. This is only + relevant if configuring with the --enable-mremap option specified. + - Avoid compilation failure if 'restrict' C99 keyword is not supported by the + compiler. + - Add a configure test for SSE2 rather than assuming it is usable on i686 + systems. This fixes test compilation errors, especially on 32-bit Linux + systems. + - Fix mallctl argument size mismatches (size_t vs. uint64_t) in the stats unit + test. + - Fix/remove flawed alignment-related overflow tests. + - Prevent compiler optimizations that could change backtraces in the + prof_accum unit test. + +* 3.5.0 (January 22, 2014) + + This version focuses on refactoring and automated testing, though it also + includes some non-trivial heap profiling optimizations not mentioned below. + + New features: + - Add the *allocx() API, which is a successor to the experimental *allocm() + API. The *allocx() functions are slightly simpler to use because they have + fewer parameters, they directly return the results of primary interest, and + mallocx()/rallocx() avoid the strict aliasing pitfall that + allocm()/rallocm() share with posix_memalign(). Note that *allocm() is + slated for removal in the next non-bugfix release. + - Add support for LinuxThreads. + + Bug fixes: + - Unless heap profiling is enabled, disable floating point code and don't link + with libm. This, in combination with e.g. EXTRA_CFLAGS=-mno-sse on x64 + systems, makes it possible to completely disable floating point register + use. Some versions of glibc neglect to save/restore caller-saved floating + point registers during dynamic lazy symbol loading, and the symbol loading + code uses whatever malloc the application happens to have linked/loaded + with, the result being potential floating point register corruption. + - Report ENOMEM rather than EINVAL if an OOM occurs during heap profiling + backtrace creation in imemalign(). This bug impacted posix_memalign() and + aligned_alloc(). + - Fix a file descriptor leak in a prof_dump_maps() error path. + - Fix prof_dump() to close the dump file descriptor for all relevant error + paths. + - Fix rallocm() to use the arena specified by the ALLOCM_ARENA(s) flag for + allocation, not just deallocation. + - Fix a data race for large allocation stats counters. + - Fix a potential infinite loop during thread exit. This bug occurred on + Solaris, and could affect other platforms with similar pthreads TSD + implementations. + - Don't junk-fill reallocations unless usable size changes. This fixes a + violation of the *allocx()/*allocm() semantics. + - Fix growing large reallocation to junk fill new space. + - Fix huge deallocation to junk fill when munmap is disabled. + - Change the default private namespace prefix from empty to je_, and change + --with-private-namespace-prefix so that it prepends an additional prefix + rather than replacing je_. This reduces the likelihood of applications + which statically link jemalloc experiencing symbol name collisions. + - Add missing private namespace mangling (relevant when + --with-private-namespace is specified). + - Add and use JEMALLOC_INLINE_C so that static inline functions are marked as + static even for debug builds. + - Add a missing mutex unlock in a malloc_init_hard() error path. In practice + this error path is never executed. + - Fix numerous bugs in malloc_strotumax() error handling/reporting. These + bugs had no impact except for malformed inputs. + - Fix numerous bugs in malloc_snprintf(). These bugs were not exercised by + existing calls, so they had no impact. + +* 3.4.1 (October 20, 2013) + + Bug fixes: + - Fix a race in the "arenas.extend" mallctl that could cause memory corruption + of internal data structures and subsequent crashes. + - Fix Valgrind integration flaws that caused Valgrind warnings about reads of + uninitialized memory in: + + arena chunk headers + + internal zero-initialized data structures (relevant to tcache and prof + code) + - Preserve errno during the first allocation. A readlink(2) call during + initialization fails unless /etc/malloc.conf exists, so errno was typically + set during the first allocation prior to this fix. + - Fix compilation warnings reported by gcc 4.8.1. + +* 3.4.0 (June 2, 2013) + + This version is essentially a small bugfix release, but the addition of + aarch64 support requires that the minor version be incremented. + + Bug fixes: + - Fix race-triggered deadlocks in chunk_record(). These deadlocks were + typically triggered by multiple threads concurrently deallocating huge + objects. + + New features: + - Add support for the aarch64 architecture. + +* 3.3.1 (March 6, 2013) + + This version fixes bugs that are typically encountered only when utilizing + custom run-time options. + + Bug fixes: + - Fix a locking order bug that could cause deadlock during fork if heap + profiling were enabled. + - Fix a chunk recycling bug that could cause the allocator to lose track of + whether a chunk was zeroed. On FreeBSD, NetBSD, and OS X, it could cause + corruption if allocating via sbrk(2) (unlikely unless running with the + "dss:primary" option specified). This was completely harmless on Linux + unless using mlockall(2) (and unlikely even then, unless the + --disable-munmap configure option or the "dss:primary" option was + specified). This regression was introduced in 3.1.0 by the + mlockall(2)/madvise(2) interaction fix. + - Fix TLS-related memory corruption that could occur during thread exit if the + thread never allocated memory. Only the quarantine and prof facilities were + susceptible. + - Fix two quarantine bugs: + + Internal reallocation of the quarantined object array leaked the old + array. + + Reallocation failure for internal reallocation of the quarantined object + array (very unlikely) resulted in memory corruption. + - Fix Valgrind integration to annotate all internally allocated memory in a + way that keeps Valgrind happy about internal data structure access. + - Fix building for s390 systems. + +* 3.3.0 (January 23, 2013) + + This version includes a few minor performance improvements in addition to the + listed new features and bug fixes. + + New features: + - Add clipping support to lg_chunk option processing. + - Add the --enable-ivsalloc option. + - Add the --without-export option. + - Add the --disable-zone-allocator option. + + Bug fixes: + - Fix "arenas.extend" mallctl to output the number of arenas. + - Fix chunk_recycle() to unconditionally inform Valgrind that returned memory + is undefined. + - Fix build break on FreeBSD related to alloca.h. + +* 3.2.0 (November 9, 2012) + + In addition to a couple of bug fixes, this version modifies page run + allocation and dirty page purging algorithms in order to better control + page-level virtual memory fragmentation. + + Incompatible changes: + - Change the "opt.lg_dirty_mult" default from 5 to 3 (32:1 to 8:1). + + Bug fixes: + - Fix dss/mmap allocation precedence code to use recyclable mmap memory only + after primary dss allocation fails. + - Fix deadlock in the "arenas.purge" mallctl. This regression was introduced + in 3.1.0 by the addition of the "arena.<i>.purge" mallctl. + +* 3.1.0 (October 16, 2012) + + New features: + - Auto-detect whether running inside Valgrind, thus removing the need to + manually specify MALLOC_CONF=valgrind:true. + - Add the "arenas.extend" mallctl, which allows applications to create + manually managed arenas. + - Add the ALLOCM_ARENA() flag for {,r,d}allocm(). + - Add the "opt.dss", "arena.<i>.dss", and "stats.arenas.<i>.dss" mallctls, + which provide control over dss/mmap precedence. + - Add the "arena.<i>.purge" mallctl, which obsoletes "arenas.purge". + - Define LG_QUANTUM for hppa. + + Incompatible changes: + - Disable tcache by default if running inside Valgrind, in order to avoid + making unallocated objects appear reachable to Valgrind. + - Drop const from malloc_usable_size() argument on Linux. + + Bug fixes: + - Fix heap profiling crash if sampled object is freed via realloc(p, 0). + - Remove const from __*_hook variable declarations, so that glibc can modify + them during process forking. + - Fix mlockall(2)/madvise(2) interaction. + - Fix fork(2)-related deadlocks. + - Fix error return value for "thread.tcache.enabled" mallctl. + +* 3.0.0 (May 11, 2012) + + Although this version adds some major new features, the primary focus is on + internal code cleanup that facilitates maintainability and portability, most + of which is not reflected in the ChangeLog. This is the first release to + incorporate substantial contributions from numerous other developers, and the + result is a more broadly useful allocator (see the git revision history for + contribution details). Note that the license has been unified, thanks to + Facebook granting a license under the same terms as the other copyright + holders (see COPYING). + + New features: + - Implement Valgrind support, redzones, and quarantine. + - Add support for additional platforms: + + FreeBSD + + Mac OS X Lion + + MinGW + + Windows (no support yet for replacing the system malloc) + - Add support for additional architectures: + + MIPS + + SH4 + + Tilera + - Add support for cross compiling. + - Add nallocm(), which rounds a request size up to the nearest size class + without actually allocating. + - Implement aligned_alloc() (blame C11). + - Add the "thread.tcache.enabled" mallctl. + - Add the "opt.prof_final" mallctl. + - Update pprof (from gperftools 2.0). + - Add the --with-mangling option. + - Add the --disable-experimental option. + - Add the --disable-munmap option, and make it the default on Linux. + - Add the --enable-mremap option, which disables use of mremap(2) by default. + + Incompatible changes: + - Enable stats by default. + - Enable fill by default. + - Disable lazy locking by default. + - Rename the "tcache.flush" mallctl to "thread.tcache.flush". + - Rename the "arenas.pagesize" mallctl to "arenas.page". + - Change the "opt.lg_prof_sample" default from 0 to 19 (1 B to 512 KiB). + - Change the "opt.prof_accum" default from true to false. + + Removed features: + - Remove the swap feature, including the "config.swap", "swap.avail", + "swap.prezeroed", "swap.nfds", and "swap.fds" mallctls. + - Remove highruns statistics, including the + "stats.arenas.<i>.bins.<j>.highruns" and + "stats.arenas.<i>.lruns.<j>.highruns" mallctls. + - As part of small size class refactoring, remove the "opt.lg_[qc]space_max", + "arenas.cacheline", "arenas.subpage", "arenas.[tqcs]space_{min,max}", and + "arenas.[tqcs]bins" mallctls. + - Remove the "arenas.chunksize" mallctl. + - Remove the "opt.lg_prof_tcmax" option. + - Remove the "opt.lg_prof_bt_max" option. + - Remove the "opt.lg_tcache_gc_sweep" option. + - Remove the --disable-tiny option, including the "config.tiny" mallctl. + - Remove the --enable-dynamic-page-shift configure option. + - Remove the --enable-sysv configure option. + + Bug fixes: + - Fix a statistics-related bug in the "thread.arena" mallctl that could cause + invalid statistics and crashes. + - Work around TLS deallocation via free() on Linux. This bug could cause + write-after-free memory corruption. + - Fix a potential deadlock that could occur during interval- and + growth-triggered heap profile dumps. + - Fix large calloc() zeroing bugs due to dropping chunk map unzeroed flags. + - Fix chunk_alloc_dss() to stop claiming memory is zeroed. This bug could + cause memory corruption and crashes with --enable-dss specified. + - Fix fork-related bugs that could cause deadlock in children between fork + and exec. + - Fix malloc_stats_print() to honor 'b' and 'l' in the opts parameter. + - Fix realloc(p, 0) to act like free(p). + - Do not enforce minimum alignment in memalign(). + - Check for NULL pointer in malloc_usable_size(). + - Fix an off-by-one heap profile statistics bug that could be observed in + interval- and growth-triggered heap profiles. + - Fix the "epoch" mallctl to update cached stats even if the passed in epoch + is 0. + - Fix bin->runcur management to fix a layout policy bug. This bug did not + affect correctness. + - Fix a bug in choose_arena_hard() that potentially caused more arenas to be + initialized than necessary. + - Add missing "opt.lg_tcache_max" mallctl implementation. + - Use glibc allocator hooks to make mixed allocator usage less likely. + - Fix build issues for --disable-tcache. + - Don't mangle pthread_create() when --with-private-namespace is specified. + +* 2.2.5 (November 14, 2011) + + Bug fixes: + - Fix huge_ralloc() race when using mremap(2). This is a serious bug that + could cause memory corruption and/or crashes. + - Fix huge_ralloc() to maintain chunk statistics. + - Fix malloc_stats_print(..., "a") output. + +* 2.2.4 (November 5, 2011) + + Bug fixes: + - Initialize arenas_tsd before using it. This bug existed for 2.2.[0-3], as + well as for --disable-tls builds in earlier releases. + - Do not assume a 4 KiB page size in test/rallocm.c. + +* 2.2.3 (August 31, 2011) + + This version fixes numerous bugs related to heap profiling. + + Bug fixes: + - Fix a prof-related race condition. This bug could cause memory corruption, + but only occurred in non-default configurations (prof_accum:false). + - Fix off-by-one backtracing issues (make sure that prof_alloc_prep() is + excluded from backtraces). + - Fix a prof-related bug in realloc() (only triggered by OOM errors). + - Fix prof-related bugs in allocm() and rallocm(). + - Fix prof_tdata_cleanup() for --disable-tls builds. + - Fix a relative include path, to fix objdir builds. + +* 2.2.2 (July 30, 2011) + + Bug fixes: + - Fix a build error for --disable-tcache. + - Fix assertions in arena_purge() (for real this time). + - Add the --with-private-namespace option. This is a workaround for symbol + conflicts that can inadvertently arise when using static libraries. + +* 2.2.1 (March 30, 2011) + + Bug fixes: + - Implement atomic operations for x86/x64. This fixes compilation failures + for versions of gcc that are still in wide use. + - Fix an assertion in arena_purge(). + +* 2.2.0 (March 22, 2011) + + This version incorporates several improvements to algorithms and data + structures that tend to reduce fragmentation and increase speed. + + New features: + - Add the "stats.cactive" mallctl. + - Update pprof (from google-perftools 1.7). + - Improve backtracing-related configuration logic, and add the + --disable-prof-libgcc option. + + Bug fixes: + - Change default symbol visibility from "internal", to "hidden", which + decreases the overhead of library-internal function calls. + - Fix symbol visibility so that it is also set on OS X. + - Fix a build dependency regression caused by the introduction of the .pic.o + suffix for PIC object files. + - Add missing checks for mutex initialization failures. + - Don't use libgcc-based backtracing except on x64, where it is known to work. + - Fix deadlocks on OS X that were due to memory allocation in + pthread_mutex_lock(). + - Heap profiling-specific fixes: + + Fix memory corruption due to integer overflow in small region index + computation, when using a small enough sample interval that profiling + context pointers are stored in small run headers. + + Fix a bootstrap ordering bug that only occurred with TLS disabled. + + Fix a rallocm() rsize bug. + + Fix error detection bugs for aligned memory allocation. + +* 2.1.3 (March 14, 2011) + + Bug fixes: + - Fix a cpp logic regression (due to the "thread.{de,}allocatedp" mallctl fix + for OS X in 2.1.2). + - Fix a "thread.arena" mallctl bug. + - Fix a thread cache stats merging bug. + +* 2.1.2 (March 2, 2011) + + Bug fixes: + - Fix "thread.{de,}allocatedp" mallctl for OS X. + - Add missing jemalloc.a to build system. + +* 2.1.1 (January 31, 2011) + + Bug fixes: + - Fix aligned huge reallocation (affected allocm()). + - Fix the ALLOCM_LG_ALIGN macro definition. + - Fix a heap dumping deadlock. + - Fix a "thread.arena" mallctl bug. + +* 2.1.0 (December 3, 2010) + + This version incorporates some optimizations that can't quite be considered + bug fixes. + + New features: + - Use Linux's mremap(2) for huge object reallocation when possible. + - Avoid locking in mallctl*() when possible. + - Add the "thread.[de]allocatedp" mallctl's. + - Convert the manual page source from roff to DocBook, and generate both roff + and HTML manuals. + + Bug fixes: + - Fix a crash due to incorrect bootstrap ordering. This only impacted + --enable-debug --enable-dss configurations. + - Fix a minor statistics bug for mallctl("swap.avail", ...). + +* 2.0.1 (October 29, 2010) + + Bug fixes: + - Fix a race condition in heap profiling that could cause undefined behavior + if "opt.prof_accum" were disabled. + - Add missing mutex unlocks for some OOM error paths in the heap profiling + code. + - Fix a compilation error for non-C99 builds. + +* 2.0.0 (October 24, 2010) + + This version focuses on the experimental *allocm() API, and on improved + run-time configuration/introspection. Nonetheless, numerous performance + improvements are also included. + + New features: + - Implement the experimental {,r,s,d}allocm() API, which provides a superset + of the functionality available via malloc(), calloc(), posix_memalign(), + realloc(), malloc_usable_size(), and free(). These functions can be used to + allocate/reallocate aligned zeroed memory, ask for optional extra memory + during reallocation, prevent object movement during reallocation, etc. + - Replace JEMALLOC_OPTIONS/JEMALLOC_PROF_PREFIX with MALLOC_CONF, which is + more human-readable, and more flexible. For example: + JEMALLOC_OPTIONS=AJP + is now: + MALLOC_CONF=abort:true,fill:true,stats_print:true + - Port to Apple OS X. Sponsored by Mozilla. + - Make it possible for the application to control thread-->arena mappings via + the "thread.arena" mallctl. + - Add compile-time support for all TLS-related functionality via pthreads TSD. + This is mainly of interest for OS X, which does not support TLS, but has a + TSD implementation with similar performance. + - Override memalign() and valloc() if they are provided by the system. + - Add the "arenas.purge" mallctl, which can be used to synchronously purge all + dirty unused pages. + - Make cumulative heap profiling data optional, so that it is possible to + limit the amount of memory consumed by heap profiling data structures. + - Add per thread allocation counters that can be accessed via the + "thread.allocated" and "thread.deallocated" mallctls. + + Incompatible changes: + - Remove JEMALLOC_OPTIONS and malloc_options (see MALLOC_CONF above). + - Increase default backtrace depth from 4 to 128 for heap profiling. + - Disable interval-based profile dumps by default. + + Bug fixes: + - Remove bad assertions in fork handler functions. These assertions could + cause aborts for some combinations of configure settings. + - Fix strerror_r() usage to deal with non-standard semantics in GNU libc. + - Fix leak context reporting. This bug tended to cause the number of contexts + to be underreported (though the reported number of objects and bytes were + correct). + - Fix a realloc() bug for large in-place growing reallocation. This bug could + cause memory corruption, but it was hard to trigger. + - Fix an allocation bug for small allocations that could be triggered if + multiple threads raced to create a new run of backing pages. + - Enhance the heap profiler to trigger samples based on usable size, rather + than request size. + - Fix a heap profiling bug due to sometimes losing track of requested object + size for sampled objects. + +* 1.0.3 (August 12, 2010) + + Bug fixes: + - Fix the libunwind-based implementation of stack backtracing (used for heap + profiling). This bug could cause zero-length backtraces to be reported. + - Add a missing mutex unlock in library initialization code. If multiple + threads raced to initialize malloc, some of them could end up permanently + blocked. + +* 1.0.2 (May 11, 2010) + + Bug fixes: + - Fix junk filling of large objects, which could cause memory corruption. + - Add MAP_NORESERVE support for chunk mapping, because otherwise virtual + memory limits could cause swap file configuration to fail. Contributed by + Jordan DeLong. + +* 1.0.1 (April 14, 2010) + + Bug fixes: + - Fix compilation when --enable-fill is specified. + - Fix threads-related profiling bugs that affected accuracy and caused memory + to be leaked during thread exit. + - Fix dirty page purging race conditions that could cause crashes. + - Fix crash in tcache flushing code during thread destruction. + +* 1.0.0 (April 11, 2010) + + This release focuses on speed and run-time introspection. Numerous + algorithmic improvements make this release substantially faster than its + predecessors. + + New features: + - Implement autoconf-based configuration system. + - Add mallctl*(), for the purposes of introspection and run-time + configuration. + - Make it possible for the application to manually flush a thread's cache, via + the "tcache.flush" mallctl. + - Base maximum dirty page count on proportion of active memory. + - Compute various addtional run-time statistics, including per size class + statistics for large objects. + - Expose malloc_stats_print(), which can be called repeatedly by the + application. + - Simplify the malloc_message() signature to only take one string argument, + and incorporate an opaque data pointer argument for use by the application + in combination with malloc_stats_print(). + - Add support for allocation backed by one or more swap files, and allow the + application to disable over-commit if swap files are in use. + - Implement allocation profiling and leak checking. + + Removed features: + - Remove the dynamic arena rebalancing code, since thread-specific caching + reduces its utility. + + Bug fixes: + - Modify chunk allocation to work when address space layout randomization + (ASLR) is in use. + - Fix thread cleanup bugs related to TLS destruction. + - Handle 0-size allocation requests in posix_memalign(). + - Fix a chunk leak. The leaked chunks were never touched, so this impacted + virtual memory usage, but not physical memory usage. + +* linux_2008082[78]a (August 27/28, 2008) + + These snapshot releases are the simple result of incorporating Linux-specific + support into the FreeBSD malloc sources. + +-------------------------------------------------------------------------------- +vim:filetype=text:textwidth=80 diff --git a/deps/jemalloc/README b/deps/jemalloc/README new file mode 100644 index 0000000000..9b268f4228 --- /dev/null +++ b/deps/jemalloc/README @@ -0,0 +1,20 @@ +jemalloc is a general purpose malloc(3) implementation that emphasizes +fragmentation avoidance and scalable concurrency support. jemalloc first came +into use as the FreeBSD libc allocator in 2005, and since then it has found its +way into numerous applications that rely on its predictable behavior. In 2010 +jemalloc development efforts broadened to include developer support features +such as heap profiling, Valgrind integration, and extensive monitoring/tuning +hooks. Modern jemalloc releases continue to be integrated back into FreeBSD, +and therefore versatility remains critical. Ongoing development efforts trend +toward making jemalloc among the best allocators for a broad range of demanding +applications, and eliminating/mitigating weaknesses that have practical +repercussions for real world applications. + +The COPYING file contains copyright and licensing information. + +The INSTALL file contains information on how to configure, build, and install +jemalloc. + +The ChangeLog file contains a brief summary of changes for each release. + +URL: http://www.canonware.com/jemalloc/ diff --git a/deps/jemalloc/TC_NOTE.txt b/deps/jemalloc/TC_NOTE.txt new file mode 100644 index 0000000000..7a44e3563c --- /dev/null +++ b/deps/jemalloc/TC_NOTE.txt @@ -0,0 +1,4 @@ +*** THIS FILE CONTAINS INFORMATION ABOUT CHANGES DONE TO THE JEMALLOC LIBRARY FILES *** +Removed from archive, as OSX does not use jemalloc: + src/zone.c + include/jemalloc/internal/zone.h diff --git a/deps/jemalloc/VERSION b/deps/jemalloc/VERSION new file mode 100644 index 0000000000..dace31ba7b --- /dev/null +++ b/deps/jemalloc/VERSION @@ -0,0 +1 @@ +3.6.0-0-g46c0af68bd248b04df75e4f92d5fb804c3d75340 diff --git a/deps/jemalloc/include/jemalloc/internal/arena.h b/deps/jemalloc/include/jemalloc/internal/arena.h new file mode 100644 index 0000000000..9d000c03de --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/arena.h @@ -0,0 +1,1063 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* + * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized + * as small as possible such that this setting is still honored, without + * violating other constraints. The goal is to make runs as small as possible + * without exceeding a per run external fragmentation threshold. + * + * We use binary fixed point math for overhead computations, where the binary + * point is implicitly RUN_BFP bits to the left. + * + * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be + * honored for some/all object sizes, since when heap profiling is enabled + * there is one pointer of header overhead per object (plus a constant). This + * constraint is relaxed (ignored) for runs that are so small that the + * per-region overhead is greater than: + * + * (RUN_MAX_OVRHD / (reg_interval << (3+RUN_BFP)) + */ +#define RUN_BFP 12 +/* \/ Implicit binary fixed point. */ +#define RUN_MAX_OVRHD 0x0000003dU +#define RUN_MAX_OVRHD_RELAX 0x00001800U + +/* Maximum number of regions in one run. */ +#define LG_RUN_MAXREGS 11 +#define RUN_MAXREGS (1U << LG_RUN_MAXREGS) + +/* + * Minimum redzone size. Redzones may be larger than this if necessary to + * preserve region alignment. + */ +#define REDZONE_MINSIZE 16 + +/* + * The minimum ratio of active:dirty pages per arena is computed as: + * + * (nactive >> opt_lg_dirty_mult) >= ndirty + * + * So, supposing that opt_lg_dirty_mult is 3, there can be no less than 8 times + * as many active pages as dirty pages. + */ +#define LG_DIRTY_MULT_DEFAULT 3 + +typedef struct arena_chunk_map_s arena_chunk_map_t; +typedef struct arena_chunk_s arena_chunk_t; +typedef struct arena_run_s arena_run_t; +typedef struct arena_bin_info_s arena_bin_info_t; +typedef struct arena_bin_s arena_bin_t; +typedef struct arena_s arena_t; + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +/* Each element of the chunk map corresponds to one page within the chunk. */ +struct arena_chunk_map_s { +#ifndef JEMALLOC_PROF + /* + * Overlay prof_ctx in order to allow it to be referenced by dead code. + * Such antics aren't warranted for per arena data structures, but + * chunk map overhead accounts for a percentage of memory, rather than + * being just a fixed cost. + */ + union { +#endif + union { + /* + * Linkage for run trees. There are two disjoint uses: + * + * 1) arena_t's runs_avail tree. + * 2) arena_run_t conceptually uses this linkage for in-use + * non-full runs, rather than directly embedding linkage. + */ + rb_node(arena_chunk_map_t) rb_link; + /* + * List of runs currently in purgatory. arena_chunk_purge() + * temporarily allocates runs that contain dirty pages while + * purging, so that other threads cannot use the runs while the + * purging thread is operating without the arena lock held. + */ + ql_elm(arena_chunk_map_t) ql_link; + } u; + + /* Profile counters, used for large object runs. */ + prof_ctx_t *prof_ctx; +#ifndef JEMALLOC_PROF + }; /* union { ... }; */ +#endif + + /* + * Run address (or size) and various flags are stored together. The bit + * layout looks like (assuming 32-bit system): + * + * ???????? ???????? ????nnnn nnnndula + * + * ? : Unallocated: Run address for first/last pages, unset for internal + * pages. + * Small: Run page offset. + * Large: Run size for first page, unset for trailing pages. + * n : binind for small size class, BININD_INVALID for large size class. + * d : dirty? + * u : unzeroed? + * l : large? + * a : allocated? + * + * Following are example bit patterns for the three types of runs. + * + * p : run page offset + * s : run size + * n : binind for size class; large objects set these to BININD_INVALID + * except for promoted allocations (see prof_promote) + * x : don't care + * - : 0 + * + : 1 + * [DULA] : bit set + * [dula] : bit unset + * + * Unallocated (clean): + * ssssssss ssssssss ssss++++ ++++du-a + * xxxxxxxx xxxxxxxx xxxxxxxx xxxx-Uxx + * ssssssss ssssssss ssss++++ ++++dU-a + * + * Unallocated (dirty): + * ssssssss ssssssss ssss++++ ++++D--a + * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx + * ssssssss ssssssss ssss++++ ++++D--a + * + * Small: + * pppppppp pppppppp ppppnnnn nnnnd--A + * pppppppp pppppppp ppppnnnn nnnn---A + * pppppppp pppppppp ppppnnnn nnnnd--A + * + * Large: + * ssssssss ssssssss ssss++++ ++++D-LA + * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx + * -------- -------- ----++++ ++++D-LA + * + * Large (sampled, size <= PAGE): + * ssssssss ssssssss ssssnnnn nnnnD-LA + * + * Large (not sampled, size == PAGE): + * ssssssss ssssssss ssss++++ ++++D-LA + */ + size_t bits; +#define CHUNK_MAP_BININD_SHIFT 4 +#define BININD_INVALID ((size_t)0xffU) +/* CHUNK_MAP_BININD_MASK == (BININD_INVALID << CHUNK_MAP_BININD_SHIFT) */ +#define CHUNK_MAP_BININD_MASK ((size_t)0xff0U) +#define CHUNK_MAP_BININD_INVALID CHUNK_MAP_BININD_MASK +#define CHUNK_MAP_FLAGS_MASK ((size_t)0xcU) +#define CHUNK_MAP_DIRTY ((size_t)0x8U) +#define CHUNK_MAP_UNZEROED ((size_t)0x4U) +#define CHUNK_MAP_LARGE ((size_t)0x2U) +#define CHUNK_MAP_ALLOCATED ((size_t)0x1U) +#define CHUNK_MAP_KEY CHUNK_MAP_ALLOCATED +}; +typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t; +typedef rb_tree(arena_chunk_map_t) arena_run_tree_t; +typedef ql_head(arena_chunk_map_t) arena_chunk_mapelms_t; + +/* Arena chunk header. */ +struct arena_chunk_s { + /* Arena that owns the chunk. */ + arena_t *arena; + + /* Linkage for tree of arena chunks that contain dirty runs. */ + rb_node(arena_chunk_t) dirty_link; + + /* Number of dirty pages. */ + size_t ndirty; + + /* Number of available runs. */ + size_t nruns_avail; + + /* + * Number of available run adjacencies that purging could coalesce. + * Clean and dirty available runs are not coalesced, which causes + * virtual memory fragmentation. The ratio of + * (nruns_avail-nruns_adjac):nruns_adjac is used for tracking this + * fragmentation. + */ + size_t nruns_adjac; + + /* + * Map of pages within chunk that keeps track of free/large/small. The + * first map_bias entries are omitted, since the chunk header does not + * need to be tracked in the map. This omission saves a header page + * for common chunk sizes (e.g. 4 MiB). + */ + arena_chunk_map_t map[1]; /* Dynamically sized. */ +}; +typedef rb_tree(arena_chunk_t) arena_chunk_tree_t; + +struct arena_run_s { + /* Bin this run is associated with. */ + arena_bin_t *bin; + + /* Index of next region that has never been allocated, or nregs. */ + uint32_t nextind; + + /* Number of free regions in run. */ + unsigned nfree; +}; + +/* + * Read-only information associated with each element of arena_t's bins array + * is stored separately, partly to reduce memory usage (only one copy, rather + * than one per arena), but mainly to avoid false cacheline sharing. + * + * Each run has the following layout: + * + * /--------------------\ + * | arena_run_t header | + * | ... | + * bitmap_offset | bitmap | + * | ... | + * ctx0_offset | ctx map | + * | ... | + * |--------------------| + * | redzone | + * reg0_offset | region 0 | + * | redzone | + * |--------------------| \ + * | redzone | | + * | region 1 | > reg_interval + * | redzone | / + * |--------------------| + * | ... | + * | ... | + * | ... | + * |--------------------| + * | redzone | + * | region nregs-1 | + * | redzone | + * |--------------------| + * | alignment pad? | + * \--------------------/ + * + * reg_interval has at least the same minimum alignment as reg_size; this + * preserves the alignment constraint that sa2u() depends on. Alignment pad is + * either 0 or redzone_size; it is present only if needed to align reg0_offset. + */ +struct arena_bin_info_s { + /* Size of regions in a run for this bin's size class. */ + size_t reg_size; + + /* Redzone size. */ + size_t redzone_size; + + /* Interval between regions (reg_size + (redzone_size << 1)). */ + size_t reg_interval; + + /* Total size of a run for this bin's size class. */ + size_t run_size; + + /* Total number of regions in a run for this bin's size class. */ + uint32_t nregs; + + /* + * Offset of first bitmap_t element in a run header for this bin's size + * class. + */ + uint32_t bitmap_offset; + + /* + * Metadata used to manipulate bitmaps for runs associated with this + * bin. + */ + bitmap_info_t bitmap_info; + + /* + * Offset of first (prof_ctx_t *) in a run header for this bin's size + * class, or 0 if (config_prof == false || opt_prof == false). + */ + uint32_t ctx0_offset; + + /* Offset of first region in a run for this bin's size class. */ + uint32_t reg0_offset; +}; + +struct arena_bin_s { + /* + * All operations on runcur, runs, and stats require that lock be + * locked. Run allocation/deallocation are protected by the arena lock, + * which may be acquired while holding one or more bin locks, but not + * vise versa. + */ + malloc_mutex_t lock; + + /* + * Current run being used to service allocations of this bin's size + * class. + */ + arena_run_t *runcur; + + /* + * Tree of non-full runs. This tree is used when looking for an + * existing run when runcur is no longer usable. We choose the + * non-full run that is lowest in memory; this policy tends to keep + * objects packed well, and it can also help reduce the number of + * almost-empty chunks. + */ + arena_run_tree_t runs; + + /* Bin statistics. */ + malloc_bin_stats_t stats; +}; + +struct arena_s { + /* This arena's index within the arenas array. */ + unsigned ind; + + /* + * Number of threads currently assigned to this arena. This field is + * protected by arenas_lock. + */ + unsigned nthreads; + + /* + * There are three classes of arena operations from a locking + * perspective: + * 1) Thread asssignment (modifies nthreads) is protected by + * arenas_lock. + * 2) Bin-related operations are protected by bin locks. + * 3) Chunk- and run-related operations are protected by this mutex. + */ + malloc_mutex_t lock; + + arena_stats_t stats; + /* + * List of tcaches for extant threads associated with this arena. + * Stats from these are merged incrementally, and at exit. + */ + ql_head(tcache_t) tcache_ql; + + uint64_t prof_accumbytes; + + dss_prec_t dss_prec; + + /* Tree of dirty-page-containing chunks this arena manages. */ + arena_chunk_tree_t chunks_dirty; + + /* + * In order to avoid rapid chunk allocation/deallocation when an arena + * oscillates right on the cusp of needing a new chunk, cache the most + * recently freed chunk. The spare is left in the arena's chunk trees + * until it is deleted. + * + * There is one spare chunk per arena, rather than one spare total, in + * order to avoid interactions between multiple threads that could make + * a single spare inadequate. + */ + arena_chunk_t *spare; + + /* Number of pages in active runs. */ + size_t nactive; + + /* + * Current count of pages within unused runs that are potentially + * dirty, and for which madvise(... MADV_DONTNEED) has not been called. + * By tracking this, we can institute a limit on how much dirty unused + * memory is mapped for each arena. + */ + size_t ndirty; + + /* + * Approximate number of pages being purged. It is possible for + * multiple threads to purge dirty pages concurrently, and they use + * npurgatory to indicate the total number of pages all threads are + * attempting to purge. + */ + size_t npurgatory; + + /* + * Size/address-ordered trees of this arena's available runs. The trees + * are used for first-best-fit run allocation. + */ + arena_avail_tree_t runs_avail; + + /* bins is used to store trees of free regions. */ + arena_bin_t bins[NBINS]; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern ssize_t opt_lg_dirty_mult; +/* + * small_size2bin is a compact lookup table that rounds request sizes up to + * size classes. In order to reduce cache footprint, the table is compressed, + * and all accesses are via the SMALL_SIZE2BIN macro. + */ +extern uint8_t const small_size2bin[]; +#define SMALL_SIZE2BIN(s) (small_size2bin[(s-1) >> LG_TINY_MIN]) + +extern arena_bin_info_t arena_bin_info[NBINS]; + +/* Number of large size classes. */ +#define nlclasses (chunk_npages - map_bias) + +void arena_purge_all(arena_t *arena); +void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, + size_t binind, uint64_t prof_accumbytes); +void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, + bool zero); +#ifdef JEMALLOC_JET +typedef void (arena_redzone_corruption_t)(void *, size_t, bool, size_t, + uint8_t); +extern arena_redzone_corruption_t *arena_redzone_corruption; +typedef void (arena_dalloc_junk_small_t)(void *, arena_bin_info_t *); +extern arena_dalloc_junk_small_t *arena_dalloc_junk_small; +#else +void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); +#endif +void arena_quarantine_junk_small(void *ptr, size_t usize); +void *arena_malloc_small(arena_t *arena, size_t size, bool zero); +void *arena_malloc_large(arena_t *arena, size_t size, bool zero); +void *arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero); +void arena_prof_promoted(const void *ptr, size_t size); +void arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, + arena_chunk_map_t *mapelm); +void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, + size_t pageind, arena_chunk_map_t *mapelm); +void arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, + size_t pageind); +#ifdef JEMALLOC_JET +typedef void (arena_dalloc_junk_large_t)(void *, size_t); +extern arena_dalloc_junk_large_t *arena_dalloc_junk_large; +#endif +void arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, + void *ptr); +void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); +#ifdef JEMALLOC_JET +typedef void (arena_ralloc_junk_large_t)(void *, size_t, size_t); +extern arena_ralloc_junk_large_t *arena_ralloc_junk_large; +#endif +bool arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, + size_t extra, bool zero); +void *arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, + size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, + bool try_tcache_dalloc); +dss_prec_t arena_dss_prec_get(arena_t *arena); +void arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec); +void arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, + size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, + malloc_large_stats_t *lstats); +bool arena_new(arena_t *arena, unsigned ind); +void arena_boot(void); +void arena_prefork(arena_t *arena); +void arena_postfork_parent(arena_t *arena); +void arena_postfork_child(arena_t *arena); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +arena_chunk_map_t *arena_mapp_get(arena_chunk_t *chunk, size_t pageind); +size_t *arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbitsp_read(size_t *mapbitsp); +size_t arena_mapbits_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, + size_t pageind); +size_t arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind); +void arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits); +void arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, + size_t size, size_t flags); +void arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, + size_t size); +void arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, + size_t size, size_t flags); +void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, + size_t binind); +void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, + size_t runind, size_t binind, size_t flags); +void arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, + size_t unzeroed); +bool arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes); +bool arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes); +bool arena_prof_accum(arena_t *arena, uint64_t accumbytes); +size_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); +size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); +unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, + const void *ptr); +prof_ctx_t *arena_prof_ctx_get(const void *ptr); +void arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx); +void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache); +size_t arena_salloc(const void *ptr, bool demote); +void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, + bool try_tcache); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) +# ifdef JEMALLOC_ARENA_INLINE_A +JEMALLOC_ALWAYS_INLINE arena_chunk_map_t * +arena_mapp_get(arena_chunk_t *chunk, size_t pageind) +{ + + assert(pageind >= map_bias); + assert(pageind < chunk_npages); + + return (&chunk->map[pageind-map_bias]); +} + +JEMALLOC_ALWAYS_INLINE size_t * +arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind) +{ + + return (&arena_mapp_get(chunk, pageind)->bits); +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_mapbitsp_read(size_t *mapbitsp) +{ + + return (*mapbitsp); +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_mapbits_get(arena_chunk_t *chunk, size_t pageind) +{ + + return (arena_mapbitsp_read(arena_mapbitsp_get(chunk, pageind))); +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0); + return (mapbits & ~PAGE_MASK); +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == + (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)); + return (mapbits & ~PAGE_MASK); +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == + CHUNK_MAP_ALLOCATED); + return (mapbits >> LG_PAGE); +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + size_t binind; + + mapbits = arena_mapbits_get(chunk, pageind); + binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; + assert(binind < NBINS || binind == BININD_INVALID); + return (binind); +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + return (mapbits & CHUNK_MAP_DIRTY); +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + return (mapbits & CHUNK_MAP_UNZEROED); +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + return (mapbits & CHUNK_MAP_LARGE); +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + return (mapbits & CHUNK_MAP_ALLOCATED); +} + +JEMALLOC_ALWAYS_INLINE void +arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits) +{ + + *mapbitsp = mapbits; +} + +JEMALLOC_ALWAYS_INLINE void +arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, + size_t flags) +{ + size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); + + assert((size & PAGE_MASK) == 0); + assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0); + assert((flags & (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == flags); + arena_mapbitsp_write(mapbitsp, size | CHUNK_MAP_BININD_INVALID | flags); +} + +JEMALLOC_ALWAYS_INLINE void +arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, + size_t size) +{ + size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); + size_t mapbits = arena_mapbitsp_read(mapbitsp); + + assert((size & PAGE_MASK) == 0); + assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0); + arena_mapbitsp_write(mapbitsp, size | (mapbits & PAGE_MASK)); +} + +JEMALLOC_ALWAYS_INLINE void +arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, + size_t flags) +{ + size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); + size_t mapbits = arena_mapbitsp_read(mapbitsp); + size_t unzeroed; + + assert((size & PAGE_MASK) == 0); + assert((flags & CHUNK_MAP_DIRTY) == flags); + unzeroed = mapbits & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */ + arena_mapbitsp_write(mapbitsp, size | CHUNK_MAP_BININD_INVALID | flags + | unzeroed | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED); +} + +JEMALLOC_ALWAYS_INLINE void +arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, + size_t binind) +{ + size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); + size_t mapbits = arena_mapbitsp_read(mapbitsp); + + assert(binind <= BININD_INVALID); + assert(arena_mapbits_large_size_get(chunk, pageind) == PAGE); + arena_mapbitsp_write(mapbitsp, (mapbits & ~CHUNK_MAP_BININD_MASK) | + (binind << CHUNK_MAP_BININD_SHIFT)); +} + +JEMALLOC_ALWAYS_INLINE void +arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, + size_t binind, size_t flags) +{ + size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); + size_t mapbits = arena_mapbitsp_read(mapbitsp); + size_t unzeroed; + + assert(binind < BININD_INVALID); + assert(pageind - runind >= map_bias); + assert((flags & CHUNK_MAP_DIRTY) == flags); + unzeroed = mapbits & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */ + arena_mapbitsp_write(mapbitsp, (runind << LG_PAGE) | (binind << + CHUNK_MAP_BININD_SHIFT) | flags | unzeroed | CHUNK_MAP_ALLOCATED); +} + +JEMALLOC_ALWAYS_INLINE void +arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, + size_t unzeroed) +{ + size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); + size_t mapbits = arena_mapbitsp_read(mapbitsp); + + arena_mapbitsp_write(mapbitsp, (mapbits & ~CHUNK_MAP_UNZEROED) | + unzeroed); +} + +JEMALLOC_INLINE bool +arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes) +{ + + cassert(config_prof); + assert(prof_interval != 0); + + arena->prof_accumbytes += accumbytes; + if (arena->prof_accumbytes >= prof_interval) { + arena->prof_accumbytes -= prof_interval; + return (true); + } + return (false); +} + +JEMALLOC_INLINE bool +arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes) +{ + + cassert(config_prof); + + if (prof_interval == 0) + return (false); + return (arena_prof_accum_impl(arena, accumbytes)); +} + +JEMALLOC_INLINE bool +arena_prof_accum(arena_t *arena, uint64_t accumbytes) +{ + + cassert(config_prof); + + if (prof_interval == 0) + return (false); + + { + bool ret; + + malloc_mutex_lock(&arena->lock); + ret = arena_prof_accum_impl(arena, accumbytes); + malloc_mutex_unlock(&arena->lock); + return (ret); + } +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_ptr_small_binind_get(const void *ptr, size_t mapbits) +{ + size_t binind; + + binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; + + if (config_debug) { + arena_chunk_t *chunk; + arena_t *arena; + size_t pageind; + size_t actual_mapbits; + arena_run_t *run; + arena_bin_t *bin; + size_t actual_binind; + arena_bin_info_t *bin_info; + + assert(binind != BININD_INVALID); + assert(binind < NBINS); + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + arena = chunk->arena; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + actual_mapbits = arena_mapbits_get(chunk, pageind); + assert(mapbits == actual_mapbits); + assert(arena_mapbits_large_get(chunk, pageind) == 0); + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - + (actual_mapbits >> LG_PAGE)) << LG_PAGE)); + bin = run->bin; + actual_binind = bin - arena->bins; + assert(binind == actual_binind); + bin_info = &arena_bin_info[actual_binind]; + assert(((uintptr_t)ptr - ((uintptr_t)run + + (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval + == 0); + } + + return (binind); +} +# endif /* JEMALLOC_ARENA_INLINE_A */ + +# ifdef JEMALLOC_ARENA_INLINE_B +JEMALLOC_INLINE size_t +arena_bin_index(arena_t *arena, arena_bin_t *bin) +{ + size_t binind = bin - arena->bins; + assert(binind < NBINS); + return (binind); +} + +JEMALLOC_INLINE unsigned +arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) +{ + unsigned shift, diff, regind; + size_t interval; + + /* + * Freeing a pointer lower than region zero can cause assertion + * failure. + */ + assert((uintptr_t)ptr >= (uintptr_t)run + + (uintptr_t)bin_info->reg0_offset); + + /* + * Avoid doing division with a variable divisor if possible. Using + * actual division here can reduce allocator throughput by over 20%! + */ + diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - + bin_info->reg0_offset); + + /* Rescale (factor powers of 2 out of the numerator and denominator). */ + interval = bin_info->reg_interval; + shift = ffs(interval) - 1; + diff >>= shift; + interval >>= shift; + + if (interval == 1) { + /* The divisor was a power of 2. */ + regind = diff; + } else { + /* + * To divide by a number D that is not a power of two we + * multiply by (2^21 / D) and then right shift by 21 positions. + * + * X / D + * + * becomes + * + * (X * interval_invs[D - 3]) >> SIZE_INV_SHIFT + * + * We can omit the first three elements, because we never + * divide by 0, and 1 and 2 are both powers of two, which are + * handled above. + */ +#define SIZE_INV_SHIFT ((sizeof(unsigned) << 3) - LG_RUN_MAXREGS) +#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1) + static const unsigned interval_invs[] = { + SIZE_INV(3), + SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7), + SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11), + SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15), + SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19), + SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23), + SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27), + SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31) + }; + + if (interval <= ((sizeof(interval_invs) / sizeof(unsigned)) + + 2)) { + regind = (diff * interval_invs[interval - 3]) >> + SIZE_INV_SHIFT; + } else + regind = diff / interval; +#undef SIZE_INV +#undef SIZE_INV_SHIFT + } + assert(diff == regind * interval); + assert(regind < bin_info->nregs); + + return (regind); +} + +JEMALLOC_INLINE prof_ctx_t * +arena_prof_ctx_get(const void *ptr) +{ + prof_ctx_t *ret; + arena_chunk_t *chunk; + size_t pageind, mapbits; + + cassert(config_prof); + assert(ptr != NULL); + assert(CHUNK_ADDR2BASE(ptr) != ptr); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); + if ((mapbits & CHUNK_MAP_LARGE) == 0) { + if (prof_promote) + ret = (prof_ctx_t *)(uintptr_t)1U; + else { + arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << + LG_PAGE)); + size_t binind = arena_ptr_small_binind_get(ptr, + mapbits); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; + unsigned regind; + + regind = arena_run_regind(run, bin_info, ptr); + ret = *(prof_ctx_t **)((uintptr_t)run + + bin_info->ctx0_offset + (regind * + sizeof(prof_ctx_t *))); + } + } else + ret = arena_mapp_get(chunk, pageind)->prof_ctx; + + return (ret); +} + +JEMALLOC_INLINE void +arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx) +{ + arena_chunk_t *chunk; + size_t pageind; + + cassert(config_prof); + assert(ptr != NULL); + assert(CHUNK_ADDR2BASE(ptr) != ptr); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + + if (usize > SMALL_MAXCLASS || (prof_promote && + ((uintptr_t)ctx != (uintptr_t)1U || arena_mapbits_large_get(chunk, + pageind) != 0))) { + assert(arena_mapbits_large_get(chunk, pageind) != 0); + arena_mapp_get(chunk, pageind)->prof_ctx = ctx; + } else { + assert(arena_mapbits_large_get(chunk, pageind) == 0); + if (prof_promote == false) { + size_t mapbits = arena_mapbits_get(chunk, pageind); + arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << + LG_PAGE)); + size_t binind; + arena_bin_info_t *bin_info; + unsigned regind; + + binind = arena_ptr_small_binind_get(ptr, mapbits); + bin_info = &arena_bin_info[binind]; + regind = arena_run_regind(run, bin_info, ptr); + + *((prof_ctx_t **)((uintptr_t)run + + bin_info->ctx0_offset + (regind * sizeof(prof_ctx_t + *)))) = ctx; + } + } +} + +JEMALLOC_ALWAYS_INLINE void * +arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache) +{ + tcache_t *tcache; + + assert(size != 0); + assert(size <= arena_maxclass); + + if (size <= SMALL_MAXCLASS) { + if (try_tcache && (tcache = tcache_get(true)) != NULL) + return (tcache_alloc_small(tcache, size, zero)); + else { + return (arena_malloc_small(choose_arena(arena), size, + zero)); + } + } else { + /* + * Initialize tcache after checking size in order to avoid + * infinite recursion during tcache initialization. + */ + if (try_tcache && size <= tcache_maxclass && (tcache = + tcache_get(true)) != NULL) + return (tcache_alloc_large(tcache, size, zero)); + else { + return (arena_malloc_large(choose_arena(arena), size, + zero)); + } + } +} + +/* Return the size of the allocation pointed to by ptr. */ +JEMALLOC_ALWAYS_INLINE size_t +arena_salloc(const void *ptr, bool demote) +{ + size_t ret; + arena_chunk_t *chunk; + size_t pageind, binind; + + assert(ptr != NULL); + assert(CHUNK_ADDR2BASE(ptr) != ptr); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + binind = arena_mapbits_binind_get(chunk, pageind); + if (binind == BININD_INVALID || (config_prof && demote == false && + prof_promote && arena_mapbits_large_get(chunk, pageind) != 0)) { + /* + * Large allocation. In the common case (demote == true), and + * as this is an inline function, most callers will only end up + * looking at binind to determine that ptr is a small + * allocation. + */ + assert(((uintptr_t)ptr & PAGE_MASK) == 0); + ret = arena_mapbits_large_size_get(chunk, pageind); + assert(ret != 0); + assert(pageind + (ret>>LG_PAGE) <= chunk_npages); + assert(ret == PAGE || arena_mapbits_large_size_get(chunk, + pageind+(ret>>LG_PAGE)-1) == 0); + assert(binind == arena_mapbits_binind_get(chunk, + pageind+(ret>>LG_PAGE)-1)); + assert(arena_mapbits_dirty_get(chunk, pageind) == + arena_mapbits_dirty_get(chunk, pageind+(ret>>LG_PAGE)-1)); + } else { + /* + * Small allocation (possibly promoted to a large object due to + * prof_promote). + */ + assert(arena_mapbits_large_get(chunk, pageind) != 0 || + arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, + pageind)) == binind); + ret = arena_bin_info[binind].reg_size; + } + + return (ret); +} + +JEMALLOC_ALWAYS_INLINE void +arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) +{ + size_t pageind, mapbits; + tcache_t *tcache; + + assert(arena != NULL); + assert(chunk->arena == arena); + assert(ptr != NULL); + assert(CHUNK_ADDR2BASE(ptr) != ptr); + + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + mapbits = arena_mapbits_get(chunk, pageind); + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + if ((mapbits & CHUNK_MAP_LARGE) == 0) { + /* Small allocation. */ + if (try_tcache && (tcache = tcache_get(false)) != NULL) { + size_t binind; + + binind = arena_ptr_small_binind_get(ptr, mapbits); + tcache_dalloc_small(tcache, ptr, binind); + } else + arena_dalloc_small(arena, chunk, ptr, pageind); + } else { + size_t size = arena_mapbits_large_size_get(chunk, pageind); + + assert(((uintptr_t)ptr & PAGE_MASK) == 0); + + if (try_tcache && size <= tcache_maxclass && (tcache = + tcache_get(false)) != NULL) { + tcache_dalloc_large(tcache, ptr, size); + } else + arena_dalloc_large(arena, chunk, ptr); + } +} +# endif /* JEMALLOC_ARENA_INLINE_B */ +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/atomic.h b/deps/jemalloc/include/jemalloc/internal/atomic.h new file mode 100644 index 0000000000..11a7b47fe0 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/atomic.h @@ -0,0 +1,304 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#define atomic_read_uint64(p) atomic_add_uint64(p, 0) +#define atomic_read_uint32(p) atomic_add_uint32(p, 0) +#define atomic_read_z(p) atomic_add_z(p, 0) +#define atomic_read_u(p) atomic_add_u(p, 0) + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +uint64_t atomic_add_uint64(uint64_t *p, uint64_t x); +uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x); +uint32_t atomic_add_uint32(uint32_t *p, uint32_t x); +uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x); +size_t atomic_add_z(size_t *p, size_t x); +size_t atomic_sub_z(size_t *p, size_t x); +unsigned atomic_add_u(unsigned *p, unsigned x); +unsigned atomic_sub_u(unsigned *p, unsigned x); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_)) +/******************************************************************************/ +/* 64-bit operations. */ +#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) +# ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + return (__sync_add_and_fetch(p, x)); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + return (__sync_sub_and_fetch(p, x)); +} +#elif (defined(_MSC_VER)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + return (InterlockedExchangeAdd64(p, x)); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + return (InterlockedExchangeAdd64(p, -((int64_t)x))); +} +#elif (defined(JEMALLOC_OSATOMIC)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + return (OSAtomicAdd64((int64_t)x, (int64_t *)p)); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p)); +} +# elif (defined(__amd64__) || defined(__x86_64__)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + asm volatile ( + "lock; xaddq %0, %1;" + : "+r" (x), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (x); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + x = (uint64_t)(-(int64_t)x); + asm volatile ( + "lock; xaddq %0, %1;" + : "+r" (x), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (x); +} +# elif (defined(JEMALLOC_ATOMIC9)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + /* + * atomic_fetchadd_64() doesn't exist, but we only ever use this + * function on LP64 systems, so atomic_fetchadd_long() will do. + */ + assert(sizeof(uint64_t) == sizeof(unsigned long)); + + return (atomic_fetchadd_long(p, (unsigned long)x) + x); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + assert(sizeof(uint64_t) == sizeof(unsigned long)); + + return (atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x); +} +# elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + return (__sync_add_and_fetch(p, x)); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + return (__sync_sub_and_fetch(p, x)); +} +# else +# error "Missing implementation for 64-bit atomic operations" +# endif +#endif + +/******************************************************************************/ +/* 32-bit operations. */ +#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + return (__sync_add_and_fetch(p, x)); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + return (__sync_sub_and_fetch(p, x)); +} +#elif (defined(_MSC_VER)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + return (InterlockedExchangeAdd(p, x)); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + return (InterlockedExchangeAdd(p, -((int32_t)x))); +} +#elif (defined(JEMALLOC_OSATOMIC)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + return (OSAtomicAdd32((int32_t)x, (int32_t *)p)); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p)); +} +#elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + asm volatile ( + "lock; xaddl %0, %1;" + : "+r" (x), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (x); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + x = (uint32_t)(-(int32_t)x); + asm volatile ( + "lock; xaddl %0, %1;" + : "+r" (x), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (x); +} +#elif (defined(JEMALLOC_ATOMIC9)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + return (atomic_fetchadd_32(p, x) + x); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + return (atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x); +} +#elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + return (__sync_add_and_fetch(p, x)); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + return (__sync_sub_and_fetch(p, x)); +} +#else +# error "Missing implementation for 32-bit atomic operations" +#endif + +/******************************************************************************/ +/* size_t operations. */ +JEMALLOC_INLINE size_t +atomic_add_z(size_t *p, size_t x) +{ + +#if (LG_SIZEOF_PTR == 3) + return ((size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x)); +#elif (LG_SIZEOF_PTR == 2) + return ((size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x)); +#endif +} + +JEMALLOC_INLINE size_t +atomic_sub_z(size_t *p, size_t x) +{ + +#if (LG_SIZEOF_PTR == 3) + return ((size_t)atomic_add_uint64((uint64_t *)p, + (uint64_t)-((int64_t)x))); +#elif (LG_SIZEOF_PTR == 2) + return ((size_t)atomic_add_uint32((uint32_t *)p, + (uint32_t)-((int32_t)x))); +#endif +} + +/******************************************************************************/ +/* unsigned operations. */ +JEMALLOC_INLINE unsigned +atomic_add_u(unsigned *p, unsigned x) +{ + +#if (LG_SIZEOF_INT == 3) + return ((unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x)); +#elif (LG_SIZEOF_INT == 2) + return ((unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x)); +#endif +} + +JEMALLOC_INLINE unsigned +atomic_sub_u(unsigned *p, unsigned x) +{ + +#if (LG_SIZEOF_INT == 3) + return ((unsigned)atomic_add_uint64((uint64_t *)p, + (uint64_t)-((int64_t)x))); +#elif (LG_SIZEOF_INT == 2) + return ((unsigned)atomic_add_uint32((uint32_t *)p, + (uint32_t)-((int32_t)x))); +#endif +} +/******************************************************************************/ +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/base.h b/deps/jemalloc/include/jemalloc/internal/base.h new file mode 100644 index 0000000000..9cf75ffb0b --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/base.h @@ -0,0 +1,26 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void *base_alloc(size_t size); +void *base_calloc(size_t number, size_t size); +extent_node_t *base_node_alloc(void); +void base_node_dealloc(extent_node_t *node); +bool base_boot(void); +void base_prefork(void); +void base_postfork_parent(void); +void base_postfork_child(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/bitmap.h b/deps/jemalloc/include/jemalloc/internal/bitmap.h new file mode 100644 index 0000000000..605ebac58c --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/bitmap.h @@ -0,0 +1,184 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */ +#define LG_BITMAP_MAXBITS LG_RUN_MAXREGS + +typedef struct bitmap_level_s bitmap_level_t; +typedef struct bitmap_info_s bitmap_info_t; +typedef unsigned long bitmap_t; +#define LG_SIZEOF_BITMAP LG_SIZEOF_LONG + +/* Number of bits per group. */ +#define LG_BITMAP_GROUP_NBITS (LG_SIZEOF_BITMAP + 3) +#define BITMAP_GROUP_NBITS (ZU(1) << LG_BITMAP_GROUP_NBITS) +#define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS-1) + +/* Maximum number of levels possible. */ +#define BITMAP_MAX_LEVELS \ + (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \ + + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct bitmap_level_s { + /* Offset of this level's groups within the array of groups. */ + size_t group_offset; +}; + +struct bitmap_info_s { + /* Logical number of bits in bitmap (stored at bottom level). */ + size_t nbits; + + /* Number of levels necessary for nbits. */ + unsigned nlevels; + + /* + * Only the first (nlevels+1) elements are used, and levels are ordered + * bottom to top (e.g. the bottom level is stored in levels[0]). + */ + bitmap_level_t levels[BITMAP_MAX_LEVELS+1]; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void bitmap_info_init(bitmap_info_t *binfo, size_t nbits); +size_t bitmap_info_ngroups(const bitmap_info_t *binfo); +size_t bitmap_size(size_t nbits); +void bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +bool bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo); +bool bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); +void bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); +size_t bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo); +void bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BITMAP_C_)) +JEMALLOC_INLINE bool +bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) +{ + unsigned rgoff = binfo->levels[binfo->nlevels].group_offset - 1; + bitmap_t rg = bitmap[rgoff]; + /* The bitmap is full iff the root group is 0. */ + return (rg == 0); +} + +JEMALLOC_INLINE bool +bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) +{ + size_t goff; + bitmap_t g; + + assert(bit < binfo->nbits); + goff = bit >> LG_BITMAP_GROUP_NBITS; + g = bitmap[goff]; + return (!(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)))); +} + +JEMALLOC_INLINE void +bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) +{ + size_t goff; + bitmap_t *gp; + bitmap_t g; + + assert(bit < binfo->nbits); + assert(bitmap_get(bitmap, binfo, bit) == false); + goff = bit >> LG_BITMAP_GROUP_NBITS; + gp = &bitmap[goff]; + g = *gp; + assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))); + g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + *gp = g; + assert(bitmap_get(bitmap, binfo, bit)); + /* Propagate group state transitions up the tree. */ + if (g == 0) { + unsigned i; + for (i = 1; i < binfo->nlevels; i++) { + bit = goff; + goff = bit >> LG_BITMAP_GROUP_NBITS; + gp = &bitmap[binfo->levels[i].group_offset + goff]; + g = *gp; + assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))); + g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + *gp = g; + if (g != 0) + break; + } + } +} + +/* sfu: set first unset. */ +JEMALLOC_INLINE size_t +bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) +{ + size_t bit; + bitmap_t g; + unsigned i; + + assert(bitmap_full(bitmap, binfo) == false); + + i = binfo->nlevels - 1; + g = bitmap[binfo->levels[i].group_offset]; + bit = ffsl(g) - 1; + while (i > 0) { + i--; + g = bitmap[binfo->levels[i].group_offset + bit]; + bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffsl(g) - 1); + } + + bitmap_set(bitmap, binfo, bit); + return (bit); +} + +JEMALLOC_INLINE void +bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) +{ + size_t goff; + bitmap_t *gp; + bitmap_t g; + bool propagate; + + assert(bit < binfo->nbits); + assert(bitmap_get(bitmap, binfo, bit)); + goff = bit >> LG_BITMAP_GROUP_NBITS; + gp = &bitmap[goff]; + g = *gp; + propagate = (g == 0); + assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) == 0); + g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + *gp = g; + assert(bitmap_get(bitmap, binfo, bit) == false); + /* Propagate group state transitions up the tree. */ + if (propagate) { + unsigned i; + for (i = 1; i < binfo->nlevels; i++) { + bit = goff; + goff = bit >> LG_BITMAP_GROUP_NBITS; + gp = &bitmap[binfo->levels[i].group_offset + goff]; + g = *gp; + propagate = (g == 0); + assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) + == 0); + g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + *gp = g; + if (propagate == false) + break; + } + } +} + +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/chunk.h b/deps/jemalloc/include/jemalloc/internal/chunk.h new file mode 100644 index 0000000000..87d8700dac --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/chunk.h @@ -0,0 +1,63 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* + * Size and alignment of memory chunks that are allocated by the OS's virtual + * memory system. + */ +#define LG_CHUNK_DEFAULT 22 + +/* Return the chunk address for allocation address a. */ +#define CHUNK_ADDR2BASE(a) \ + ((void *)((uintptr_t)(a) & ~chunksize_mask)) + +/* Return the chunk offset of address a. */ +#define CHUNK_ADDR2OFFSET(a) \ + ((size_t)((uintptr_t)(a) & chunksize_mask)) + +/* Return the smallest chunk multiple that is >= s. */ +#define CHUNK_CEILING(s) \ + (((s) + chunksize_mask) & ~chunksize_mask) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern size_t opt_lg_chunk; +extern const char *opt_dss; + +/* Protects stats_chunks; currently not used for any other purpose. */ +extern malloc_mutex_t chunks_mtx; +/* Chunk statistics. */ +extern chunk_stats_t stats_chunks; + +extern rtree_t *chunks_rtree; + +extern size_t chunksize; +extern size_t chunksize_mask; /* (chunksize - 1). */ +extern size_t chunk_npages; +extern size_t map_bias; /* Number of arena chunk header pages. */ +extern size_t arena_maxclass; /* Max size class for arenas. */ + +void *chunk_alloc(size_t size, size_t alignment, bool base, bool *zero, + dss_prec_t dss_prec); +void chunk_unmap(void *chunk, size_t size); +void chunk_dealloc(void *chunk, size_t size, bool unmap); +bool chunk_boot(void); +void chunk_prefork(void); +void chunk_postfork_parent(void); +void chunk_postfork_child(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ + +#include "jemalloc/internal/chunk_dss.h" +#include "jemalloc/internal/chunk_mmap.h" diff --git a/deps/jemalloc/include/jemalloc/internal/chunk_dss.h b/deps/jemalloc/include/jemalloc/internal/chunk_dss.h new file mode 100644 index 0000000000..4535ce09c0 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/chunk_dss.h @@ -0,0 +1,38 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef enum { + dss_prec_disabled = 0, + dss_prec_primary = 1, + dss_prec_secondary = 2, + + dss_prec_limit = 3 +} dss_prec_t; +#define DSS_PREC_DEFAULT dss_prec_secondary +#define DSS_DEFAULT "secondary" + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +extern const char *dss_prec_names[]; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +dss_prec_t chunk_dss_prec_get(void); +bool chunk_dss_prec_set(dss_prec_t dss_prec); +void *chunk_alloc_dss(size_t size, size_t alignment, bool *zero); +bool chunk_in_dss(void *chunk); +bool chunk_dss_boot(void); +void chunk_dss_prefork(void); +void chunk_dss_postfork_parent(void); +void chunk_dss_postfork_child(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h b/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h new file mode 100644 index 0000000000..f24abac753 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h @@ -0,0 +1,22 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +bool pages_purge(void *addr, size_t length); + +void *chunk_alloc_mmap(size_t size, size_t alignment, bool *zero); +bool chunk_dealloc_mmap(void *chunk, size_t size); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/ckh.h b/deps/jemalloc/include/jemalloc/internal/ckh.h new file mode 100644 index 0000000000..58712a6a76 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/ckh.h @@ -0,0 +1,88 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct ckh_s ckh_t; +typedef struct ckhc_s ckhc_t; + +/* Typedefs to allow easy function pointer passing. */ +typedef void ckh_hash_t (const void *, size_t[2]); +typedef bool ckh_keycomp_t (const void *, const void *); + +/* Maintain counters used to get an idea of performance. */ +/* #define CKH_COUNT */ +/* Print counter values in ckh_delete() (requires CKH_COUNT). */ +/* #define CKH_VERBOSE */ + +/* + * There are 2^LG_CKH_BUCKET_CELLS cells in each hash table bucket. Try to fit + * one bucket per L1 cache line. + */ +#define LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +/* Hash table cell. */ +struct ckhc_s { + const void *key; + const void *data; +}; + +struct ckh_s { +#ifdef CKH_COUNT + /* Counters used to get an idea of performance. */ + uint64_t ngrows; + uint64_t nshrinks; + uint64_t nshrinkfails; + uint64_t ninserts; + uint64_t nrelocs; +#endif + + /* Used for pseudo-random number generation. */ +#define CKH_A 1103515241 +#define CKH_C 12347 + uint32_t prng_state; + + /* Total number of items. */ + size_t count; + + /* + * Minimum and current number of hash table buckets. There are + * 2^LG_CKH_BUCKET_CELLS cells per bucket. + */ + unsigned lg_minbuckets; + unsigned lg_curbuckets; + + /* Hash and comparison functions. */ + ckh_hash_t *hash; + ckh_keycomp_t *keycomp; + + /* Hash table with 2^lg_curbuckets buckets. */ + ckhc_t *tab; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +bool ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, + ckh_keycomp_t *keycomp); +void ckh_delete(ckh_t *ckh); +size_t ckh_count(ckh_t *ckh); +bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data); +bool ckh_insert(ckh_t *ckh, const void *key, const void *data); +bool ckh_remove(ckh_t *ckh, const void *searchkey, void **key, + void **data); +bool ckh_search(ckh_t *ckh, const void *seachkey, void **key, void **data); +void ckh_string_hash(const void *key, size_t r_hash[2]); +bool ckh_string_keycomp(const void *k1, const void *k2); +void ckh_pointer_hash(const void *key, size_t r_hash[2]); +bool ckh_pointer_keycomp(const void *k1, const void *k2); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/ctl.h b/deps/jemalloc/include/jemalloc/internal/ctl.h new file mode 100644 index 0000000000..0ffecc5f2a --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/ctl.h @@ -0,0 +1,117 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct ctl_node_s ctl_node_t; +typedef struct ctl_named_node_s ctl_named_node_t; +typedef struct ctl_indexed_node_s ctl_indexed_node_t; +typedef struct ctl_arena_stats_s ctl_arena_stats_t; +typedef struct ctl_stats_s ctl_stats_t; + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct ctl_node_s { + bool named; +}; + +struct ctl_named_node_s { + struct ctl_node_s node; + const char *name; + /* If (nchildren == 0), this is a terminal node. */ + unsigned nchildren; + const ctl_node_t *children; + int (*ctl)(const size_t *, size_t, void *, size_t *, + void *, size_t); +}; + +struct ctl_indexed_node_s { + struct ctl_node_s node; + const ctl_named_node_t *(*index)(const size_t *, size_t, size_t); +}; + +struct ctl_arena_stats_s { + bool initialized; + unsigned nthreads; + const char *dss; + size_t pactive; + size_t pdirty; + arena_stats_t astats; + + /* Aggregate stats for small size classes, based on bin stats. */ + size_t allocated_small; + uint64_t nmalloc_small; + uint64_t ndalloc_small; + uint64_t nrequests_small; + + malloc_bin_stats_t bstats[NBINS]; + malloc_large_stats_t *lstats; /* nlclasses elements. */ +}; + +struct ctl_stats_s { + size_t allocated; + size_t active; + size_t mapped; + struct { + size_t current; /* stats_chunks.curchunks */ + uint64_t total; /* stats_chunks.nchunks */ + size_t high; /* stats_chunks.highchunks */ + } chunks; + struct { + size_t allocated; /* huge_allocated */ + uint64_t nmalloc; /* huge_nmalloc */ + uint64_t ndalloc; /* huge_ndalloc */ + } huge; + unsigned narenas; + ctl_arena_stats_t *arenas; /* (narenas + 1) elements. */ +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +int ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp, + size_t newlen); +int ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp); + +int ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen); +bool ctl_boot(void); +void ctl_prefork(void); +void ctl_postfork_parent(void); +void ctl_postfork_child(void); + +#define xmallctl(name, oldp, oldlenp, newp, newlen) do { \ + if (je_mallctl(name, oldp, oldlenp, newp, newlen) \ + != 0) { \ + malloc_printf( \ + "<jemalloc>: Failure in xmallctl(\"%s\", ...)\n", \ + name); \ + abort(); \ + } \ +} while (0) + +#define xmallctlnametomib(name, mibp, miblenp) do { \ + if (je_mallctlnametomib(name, mibp, miblenp) != 0) { \ + malloc_printf("<jemalloc>: Failure in " \ + "xmallctlnametomib(\"%s\", ...)\n", name); \ + abort(); \ + } \ +} while (0) + +#define xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do { \ + if (je_mallctlbymib(mib, miblen, oldp, oldlenp, newp, \ + newlen) != 0) { \ + malloc_write( \ + "<jemalloc>: Failure in xmallctlbymib()\n"); \ + abort(); \ + } \ +} while (0) + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ + diff --git a/deps/jemalloc/include/jemalloc/internal/extent.h b/deps/jemalloc/include/jemalloc/internal/extent.h new file mode 100644 index 0000000000..ba95ca816b --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/extent.h @@ -0,0 +1,46 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct extent_node_s extent_node_t; + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +/* Tree of extents. */ +struct extent_node_s { + /* Linkage for the size/address-ordered tree. */ + rb_node(extent_node_t) link_szad; + + /* Linkage for the address-ordered tree. */ + rb_node(extent_node_t) link_ad; + + /* Profile counters, used for huge objects. */ + prof_ctx_t *prof_ctx; + + /* Pointer to the extent that this tree node is responsible for. */ + void *addr; + + /* Total region size. */ + size_t size; + + /* True if zero-filled; used by chunk recycling code. */ + bool zeroed; +}; +typedef rb_tree(extent_node_t) extent_tree_t; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +rb_proto(, extent_tree_szad_, extent_tree_t, extent_node_t) + +rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t) + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ + diff --git a/deps/jemalloc/include/jemalloc/internal/hash.h b/deps/jemalloc/include/jemalloc/internal/hash.h new file mode 100644 index 0000000000..c7183ede82 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/hash.h @@ -0,0 +1,335 @@ +/* + * The following hash function is based on MurmurHash3, placed into the public + * domain by Austin Appleby. See http://code.google.com/p/smhasher/ for + * details. + */ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +uint32_t hash_x86_32(const void *key, int len, uint32_t seed); +void hash_x86_128(const void *key, const int len, uint32_t seed, + uint64_t r_out[2]); +void hash_x64_128(const void *key, const int len, const uint32_t seed, + uint64_t r_out[2]); +void hash(const void *key, size_t len, const uint32_t seed, + size_t r_hash[2]); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_HASH_C_)) +/******************************************************************************/ +/* Internal implementation. */ +JEMALLOC_INLINE uint32_t +hash_rotl_32(uint32_t x, int8_t r) +{ + + return (x << r) | (x >> (32 - r)); +} + +JEMALLOC_INLINE uint64_t +hash_rotl_64(uint64_t x, int8_t r) +{ + return (x << r) | (x >> (64 - r)); +} + +JEMALLOC_INLINE uint32_t +hash_get_block_32(const uint32_t *p, int i) +{ + + return (p[i]); +} + +JEMALLOC_INLINE uint64_t +hash_get_block_64(const uint64_t *p, int i) +{ + + return (p[i]); +} + +JEMALLOC_INLINE uint32_t +hash_fmix_32(uint32_t h) +{ + + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return (h); +} + +JEMALLOC_INLINE uint64_t +hash_fmix_64(uint64_t k) +{ + + k ^= k >> 33; + k *= QU(0xff51afd7ed558ccdLLU); + k ^= k >> 33; + k *= QU(0xc4ceb9fe1a85ec53LLU); + k ^= k >> 33; + + return (k); +} + +JEMALLOC_INLINE uint32_t +hash_x86_32(const void *key, int len, uint32_t seed) +{ + const uint8_t *data = (const uint8_t *) key; + const int nblocks = len / 4; + + uint32_t h1 = seed; + + const uint32_t c1 = 0xcc9e2d51; + const uint32_t c2 = 0x1b873593; + + /* body */ + { + const uint32_t *blocks = (const uint32_t *) (data + nblocks*4); + int i; + + for (i = -nblocks; i; i++) { + uint32_t k1 = hash_get_block_32(blocks, i); + + k1 *= c1; + k1 = hash_rotl_32(k1, 15); + k1 *= c2; + + h1 ^= k1; + h1 = hash_rotl_32(h1, 13); + h1 = h1*5 + 0xe6546b64; + } + } + + /* tail */ + { + const uint8_t *tail = (const uint8_t *) (data + nblocks*4); + + uint32_t k1 = 0; + + switch (len & 3) { + case 3: k1 ^= tail[2] << 16; + case 2: k1 ^= tail[1] << 8; + case 1: k1 ^= tail[0]; k1 *= c1; k1 = hash_rotl_32(k1, 15); + k1 *= c2; h1 ^= k1; + } + } + + /* finalization */ + h1 ^= len; + + h1 = hash_fmix_32(h1); + + return (h1); +} + +UNUSED JEMALLOC_INLINE void +hash_x86_128(const void *key, const int len, uint32_t seed, + uint64_t r_out[2]) +{ + const uint8_t * data = (const uint8_t *) key; + const int nblocks = len / 16; + + uint32_t h1 = seed; + uint32_t h2 = seed; + uint32_t h3 = seed; + uint32_t h4 = seed; + + const uint32_t c1 = 0x239b961b; + const uint32_t c2 = 0xab0e9789; + const uint32_t c3 = 0x38b34ae5; + const uint32_t c4 = 0xa1e38b93; + + /* body */ + { + const uint32_t *blocks = (const uint32_t *) (data + nblocks*16); + int i; + + for (i = -nblocks; i; i++) { + uint32_t k1 = hash_get_block_32(blocks, i*4 + 0); + uint32_t k2 = hash_get_block_32(blocks, i*4 + 1); + uint32_t k3 = hash_get_block_32(blocks, i*4 + 2); + uint32_t k4 = hash_get_block_32(blocks, i*4 + 3); + + k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1; + + h1 = hash_rotl_32(h1, 19); h1 += h2; + h1 = h1*5 + 0x561ccd1b; + + k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2; + + h2 = hash_rotl_32(h2, 17); h2 += h3; + h2 = h2*5 + 0x0bcaa747; + + k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3; + + h3 = hash_rotl_32(h3, 15); h3 += h4; + h3 = h3*5 + 0x96cd1c35; + + k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4; + + h4 = hash_rotl_32(h4, 13); h4 += h1; + h4 = h4*5 + 0x32ac3b17; + } + } + + /* tail */ + { + const uint8_t *tail = (const uint8_t *) (data + nblocks*16); + uint32_t k1 = 0; + uint32_t k2 = 0; + uint32_t k3 = 0; + uint32_t k4 = 0; + + switch (len & 15) { + case 15: k4 ^= tail[14] << 16; + case 14: k4 ^= tail[13] << 8; + case 13: k4 ^= tail[12] << 0; + k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4; + + case 12: k3 ^= tail[11] << 24; + case 11: k3 ^= tail[10] << 16; + case 10: k3 ^= tail[ 9] << 8; + case 9: k3 ^= tail[ 8] << 0; + k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3; + + case 8: k2 ^= tail[ 7] << 24; + case 7: k2 ^= tail[ 6] << 16; + case 6: k2 ^= tail[ 5] << 8; + case 5: k2 ^= tail[ 4] << 0; + k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2; + + case 4: k1 ^= tail[ 3] << 24; + case 3: k1 ^= tail[ 2] << 16; + case 2: k1 ^= tail[ 1] << 8; + case 1: k1 ^= tail[ 0] << 0; + k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1; + } + } + + /* finalization */ + h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len; + + h1 += h2; h1 += h3; h1 += h4; + h2 += h1; h3 += h1; h4 += h1; + + h1 = hash_fmix_32(h1); + h2 = hash_fmix_32(h2); + h3 = hash_fmix_32(h3); + h4 = hash_fmix_32(h4); + + h1 += h2; h1 += h3; h1 += h4; + h2 += h1; h3 += h1; h4 += h1; + + r_out[0] = (((uint64_t) h2) << 32) | h1; + r_out[1] = (((uint64_t) h4) << 32) | h3; +} + +UNUSED JEMALLOC_INLINE void +hash_x64_128(const void *key, const int len, const uint32_t seed, + uint64_t r_out[2]) +{ + const uint8_t *data = (const uint8_t *) key; + const int nblocks = len / 16; + + uint64_t h1 = seed; + uint64_t h2 = seed; + + const uint64_t c1 = QU(0x87c37b91114253d5LLU); + const uint64_t c2 = QU(0x4cf5ad432745937fLLU); + + /* body */ + { + const uint64_t *blocks = (const uint64_t *) (data); + int i; + + for (i = 0; i < nblocks; i++) { + uint64_t k1 = hash_get_block_64(blocks, i*2 + 0); + uint64_t k2 = hash_get_block_64(blocks, i*2 + 1); + + k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1; + + h1 = hash_rotl_64(h1, 27); h1 += h2; + h1 = h1*5 + 0x52dce729; + + k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2; + + h2 = hash_rotl_64(h2, 31); h2 += h1; + h2 = h2*5 + 0x38495ab5; + } + } + + /* tail */ + { + const uint8_t *tail = (const uint8_t*)(data + nblocks*16); + uint64_t k1 = 0; + uint64_t k2 = 0; + + switch (len & 15) { + case 15: k2 ^= ((uint64_t)(tail[14])) << 48; + case 14: k2 ^= ((uint64_t)(tail[13])) << 40; + case 13: k2 ^= ((uint64_t)(tail[12])) << 32; + case 12: k2 ^= ((uint64_t)(tail[11])) << 24; + case 11: k2 ^= ((uint64_t)(tail[10])) << 16; + case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8; + case 9: k2 ^= ((uint64_t)(tail[ 8])) << 0; + k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2; + + case 8: k1 ^= ((uint64_t)(tail[ 7])) << 56; + case 7: k1 ^= ((uint64_t)(tail[ 6])) << 48; + case 6: k1 ^= ((uint64_t)(tail[ 5])) << 40; + case 5: k1 ^= ((uint64_t)(tail[ 4])) << 32; + case 4: k1 ^= ((uint64_t)(tail[ 3])) << 24; + case 3: k1 ^= ((uint64_t)(tail[ 2])) << 16; + case 2: k1 ^= ((uint64_t)(tail[ 1])) << 8; + case 1: k1 ^= ((uint64_t)(tail[ 0])) << 0; + k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1; + } + } + + /* finalization */ + h1 ^= len; h2 ^= len; + + h1 += h2; + h2 += h1; + + h1 = hash_fmix_64(h1); + h2 = hash_fmix_64(h2); + + h1 += h2; + h2 += h1; + + r_out[0] = h1; + r_out[1] = h2; +} + +/******************************************************************************/ +/* API. */ +JEMALLOC_INLINE void +hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2]) +{ +#if (LG_SIZEOF_PTR == 3 && !defined(JEMALLOC_BIG_ENDIAN)) + hash_x64_128(key, len, seed, (uint64_t *)r_hash); +#else + uint64_t hashes[2]; + hash_x86_128(key, len, seed, hashes); + r_hash[0] = (size_t)hashes[0]; + r_hash[1] = (size_t)hashes[1]; +#endif +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/huge.h b/deps/jemalloc/include/jemalloc/internal/huge.h new file mode 100644 index 0000000000..a2b9c77919 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/huge.h @@ -0,0 +1,46 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +/* Huge allocation statistics. */ +extern uint64_t huge_nmalloc; +extern uint64_t huge_ndalloc; +extern size_t huge_allocated; + +/* Protects chunk-related data structures. */ +extern malloc_mutex_t huge_mtx; + +void *huge_malloc(size_t size, bool zero, dss_prec_t dss_prec); +void *huge_palloc(size_t size, size_t alignment, bool zero, + dss_prec_t dss_prec); +bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, + size_t extra); +void *huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero, bool try_tcache_dalloc, dss_prec_t dss_prec); +#ifdef JEMALLOC_JET +typedef void (huge_dalloc_junk_t)(void *, size_t); +extern huge_dalloc_junk_t *huge_dalloc_junk; +#endif +void huge_dalloc(void *ptr, bool unmap); +size_t huge_salloc(const void *ptr); +dss_prec_t huge_dss_prec_get(arena_t *arena); +prof_ctx_t *huge_prof_ctx_get(const void *ptr); +void huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); +bool huge_boot(void); +void huge_prefork(void); +void huge_postfork_parent(void); +void huge_postfork_child(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h new file mode 100644 index 0000000000..cf171326c2 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h @@ -0,0 +1,1059 @@ +#ifndef JEMALLOC_INTERNAL_H +#define JEMALLOC_INTERNAL_H +#include <math.h> +#ifdef _WIN32 +# include <windows.h> +# define ENOENT ERROR_PATH_NOT_FOUND +# define EINVAL ERROR_BAD_ARGUMENTS +# define EAGAIN ERROR_OUTOFMEMORY +# define EPERM ERROR_WRITE_FAULT +# define EFAULT ERROR_INVALID_ADDRESS +# define ENOMEM ERROR_NOT_ENOUGH_MEMORY +# undef ERANGE +# define ERANGE ERROR_INVALID_DATA +#else +# include <sys/param.h> +# include <sys/mman.h> +# include <sys/syscall.h> +# if !defined(SYS_write) && defined(__NR_write) +# define SYS_write __NR_write +# endif +# include <sys/uio.h> +# include <pthread.h> +# include <errno.h> +#endif +#include <sys/types.h> + +#include <limits.h> +#ifndef SIZE_T_MAX +# define SIZE_T_MAX SIZE_MAX +#endif +#include <stdarg.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <stddef.h> +#ifndef offsetof +# define offsetof(type, member) ((size_t)&(((type *)NULL)->member)) +#endif +#include <inttypes.h> +#include <string.h> +#include <strings.h> +#include <ctype.h> +#ifdef _MSC_VER +# include <io.h> +typedef intptr_t ssize_t; +# define PATH_MAX 1024 +# define STDERR_FILENO 2 +# define __func__ __FUNCTION__ +/* Disable warnings about deprecated system functions */ +# pragma warning(disable: 4996) +#else +# include <unistd.h> +#endif +#include <fcntl.h> + +#include "jemalloc_defs.h" + +#ifdef JEMALLOC_UTRACE +#include <sys/ktrace.h> +#endif + +#ifdef JEMALLOC_VALGRIND +#include <valgrind/valgrind.h> +#include <valgrind/memcheck.h> +#endif + +#define JEMALLOC_NO_DEMANGLE +#include "../jemalloc.h" +#include "jemalloc/internal/private_namespace.h" + +#ifdef JEMALLOC_CC_SILENCE +#define UNUSED JEMALLOC_ATTR(unused) +#else +#define UNUSED +#endif + +static const bool config_debug = +#ifdef JEMALLOC_DEBUG + true +#else + false +#endif + ; +static const bool config_dss = +#ifdef JEMALLOC_DSS + true +#else + false +#endif + ; +static const bool config_fill = +#ifdef JEMALLOC_FILL + true +#else + false +#endif + ; +static const bool config_lazy_lock = +#ifdef JEMALLOC_LAZY_LOCK + true +#else + false +#endif + ; +static const bool config_prof = +#ifdef JEMALLOC_PROF + true +#else + false +#endif + ; +static const bool config_prof_libgcc = +#ifdef JEMALLOC_PROF_LIBGCC + true +#else + false +#endif + ; +static const bool config_prof_libunwind = +#ifdef JEMALLOC_PROF_LIBUNWIND + true +#else + false +#endif + ; +static const bool config_mremap = +#ifdef JEMALLOC_MREMAP + true +#else + false +#endif + ; +static const bool config_munmap = +#ifdef JEMALLOC_MUNMAP + true +#else + false +#endif + ; +static const bool config_stats = +#ifdef JEMALLOC_STATS + true +#else + false +#endif + ; +static const bool config_tcache = +#ifdef JEMALLOC_TCACHE + true +#else + false +#endif + ; +static const bool config_tls = +#ifdef JEMALLOC_TLS + true +#else + false +#endif + ; +static const bool config_utrace = +#ifdef JEMALLOC_UTRACE + true +#else + false +#endif + ; +static const bool config_valgrind = +#ifdef JEMALLOC_VALGRIND + true +#else + false +#endif + ; +static const bool config_xmalloc = +#ifdef JEMALLOC_XMALLOC + true +#else + false +#endif + ; +static const bool config_ivsalloc = +#ifdef JEMALLOC_IVSALLOC + true +#else + false +#endif + ; + +#ifdef JEMALLOC_ATOMIC9 +#include <machine/atomic.h> +#endif + +#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN)) +#include <libkern/OSAtomic.h> +#endif + +#ifdef JEMALLOC_ZONE +#include <mach/mach_error.h> +#include <mach/mach_init.h> +#include <mach/vm_map.h> +#include <malloc/malloc.h> +#endif + +#define RB_COMPACT +#include "jemalloc/internal/rb.h" +#include "jemalloc/internal/qr.h" +#include "jemalloc/internal/ql.h" + +/* + * jemalloc can conceptually be broken into components (arena, tcache, etc.), + * but there are circular dependencies that cannot be broken without + * substantial performance degradation. In order to reduce the effect on + * visual code flow, read the header files in multiple passes, with one of the + * following cpp variables defined during each pass: + * + * JEMALLOC_H_TYPES : Preprocessor-defined constants and psuedo-opaque data + * types. + * JEMALLOC_H_STRUCTS : Data structures. + * JEMALLOC_H_EXTERNS : Extern data declarations and function prototypes. + * JEMALLOC_H_INLINES : Inline functions. + */ +/******************************************************************************/ +#define JEMALLOC_H_TYPES + +#ifndef JEMALLOC_HAS_RESTRICT +# define restrict +#endif + +#define MALLOCX_LG_ALIGN_MASK ((int)0x3f) +#define ALLOCM_LG_ALIGN_MASK ((int)0x3f) + +#define ZU(z) ((size_t)z) +#define QU(q) ((uint64_t)q) + +#ifndef __DECONST +# define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) +#endif + +#if defined(JEMALLOC_DEBUG) + /* Disable inlining to make debugging easier. */ +# define JEMALLOC_ALWAYS_INLINE +# define JEMALLOC_ALWAYS_INLINE_C static +# define JEMALLOC_INLINE +# define JEMALLOC_INLINE_C static +# define inline +#else +# define JEMALLOC_ENABLE_INLINE +# ifdef JEMALLOC_HAVE_ATTR +# define JEMALLOC_ALWAYS_INLINE \ + static inline JEMALLOC_ATTR(unused) JEMALLOC_ATTR(always_inline) +# define JEMALLOC_ALWAYS_INLINE_C \ + static inline JEMALLOC_ATTR(always_inline) +# else +# define JEMALLOC_ALWAYS_INLINE static inline +# define JEMALLOC_ALWAYS_INLINE_C static inline +# endif +# define JEMALLOC_INLINE static inline +# define JEMALLOC_INLINE_C static inline +# ifdef _MSC_VER +# define inline _inline +# endif +#endif + +/* Smallest size class to support. */ +#define LG_TINY_MIN 3 +#define TINY_MIN (1U << LG_TINY_MIN) + +/* + * Minimum alignment of allocations is 2^LG_QUANTUM bytes (ignoring tiny size + * classes). + */ +#ifndef LG_QUANTUM +# if (defined(__i386__) || defined(_M_IX86)) +# define LG_QUANTUM 4 +# endif +# ifdef __ia64__ +# define LG_QUANTUM 4 +# endif +# ifdef __alpha__ +# define LG_QUANTUM 4 +# endif +# ifdef __sparc64__ +# define LG_QUANTUM 4 +# endif +# if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64)) +# define LG_QUANTUM 4 +# endif +# ifdef __arm__ +# define LG_QUANTUM 3 +# endif +# ifdef __aarch64__ +# define LG_QUANTUM 4 +# endif +# ifdef __hppa__ +# define LG_QUANTUM 4 +# endif +# ifdef __mips__ +# define LG_QUANTUM 3 +# endif +# ifdef __powerpc__ +# define LG_QUANTUM 4 +# endif +# ifdef __s390__ +# define LG_QUANTUM 4 +# endif +# ifdef __SH4__ +# define LG_QUANTUM 4 +# endif +# ifdef __tile__ +# define LG_QUANTUM 4 +# endif +# ifndef LG_QUANTUM +# error "No LG_QUANTUM definition for architecture; specify via CPPFLAGS" +# endif +#endif + +#define QUANTUM ((size_t)(1U << LG_QUANTUM)) +#define QUANTUM_MASK (QUANTUM - 1) + +/* Return the smallest quantum multiple that is >= a. */ +#define QUANTUM_CEILING(a) \ + (((a) + QUANTUM_MASK) & ~QUANTUM_MASK) + +#define LONG ((size_t)(1U << LG_SIZEOF_LONG)) +#define LONG_MASK (LONG - 1) + +/* Return the smallest long multiple that is >= a. */ +#define LONG_CEILING(a) \ + (((a) + LONG_MASK) & ~LONG_MASK) + +#define SIZEOF_PTR (1U << LG_SIZEOF_PTR) +#define PTR_MASK (SIZEOF_PTR - 1) + +/* Return the smallest (void *) multiple that is >= a. */ +#define PTR_CEILING(a) \ + (((a) + PTR_MASK) & ~PTR_MASK) + +/* + * Maximum size of L1 cache line. This is used to avoid cache line aliasing. + * In addition, this controls the spacing of cacheline-spaced size classes. + * + * CACHELINE cannot be based on LG_CACHELINE because __declspec(align()) can + * only handle raw constants. + */ +#define LG_CACHELINE 6 +#define CACHELINE 64 +#define CACHELINE_MASK (CACHELINE - 1) + +/* Return the smallest cacheline multiple that is >= s. */ +#define CACHELINE_CEILING(s) \ + (((s) + CACHELINE_MASK) & ~CACHELINE_MASK) + +/* Page size. STATIC_PAGE_SHIFT is determined by the configure script. */ +#ifdef PAGE_MASK +# undef PAGE_MASK +#endif +#define LG_PAGE STATIC_PAGE_SHIFT +#define PAGE ((size_t)(1U << STATIC_PAGE_SHIFT)) +#define PAGE_MASK ((size_t)(PAGE - 1)) + +/* Return the smallest pagesize multiple that is >= s. */ +#define PAGE_CEILING(s) \ + (((s) + PAGE_MASK) & ~PAGE_MASK) + +/* Return the nearest aligned address at or below a. */ +#define ALIGNMENT_ADDR2BASE(a, alignment) \ + ((void *)((uintptr_t)(a) & (-(alignment)))) + +/* Return the offset between a and the nearest aligned address at or below a. */ +#define ALIGNMENT_ADDR2OFFSET(a, alignment) \ + ((size_t)((uintptr_t)(a) & (alignment - 1))) + +/* Return the smallest alignment multiple that is >= s. */ +#define ALIGNMENT_CEILING(s, alignment) \ + (((s) + (alignment - 1)) & (-(alignment))) + +/* Declare a variable length array */ +#if __STDC_VERSION__ < 199901L +# ifdef _MSC_VER +# include <malloc.h> +# define alloca _alloca +# else +# ifdef JEMALLOC_HAS_ALLOCA_H +# include <alloca.h> +# else +# include <stdlib.h> +# endif +# endif +# define VARIABLE_ARRAY(type, name, count) \ + type *name = alloca(sizeof(type) * count) +#else +# define VARIABLE_ARRAY(type, name, count) type name[count] +#endif + +#ifdef JEMALLOC_VALGRIND +/* + * The JEMALLOC_VALGRIND_*() macros must be macros rather than functions + * so that when Valgrind reports errors, there are no extra stack frames + * in the backtraces. + * + * The size that is reported to valgrind must be consistent through a chain of + * malloc..realloc..realloc calls. Request size isn't recorded anywhere in + * jemalloc, so it is critical that all callers of these macros provide usize + * rather than request size. As a result, buffer overflow detection is + * technically weakened for the standard API, though it is generally accepted + * practice to consider any extra bytes reported by malloc_usable_size() as + * usable space. + */ +#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do { \ + if (config_valgrind && opt_valgrind && cond) \ + VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(ptr), zero); \ +} while (0) +#define JEMALLOC_VALGRIND_REALLOC(ptr, usize, old_ptr, old_usize, \ + old_rzsize, zero) do { \ + if (config_valgrind && opt_valgrind) { \ + size_t rzsize = p2rz(ptr); \ + \ + if (ptr == old_ptr) { \ + VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize, \ + usize, rzsize); \ + if (zero && old_usize < usize) { \ + VALGRIND_MAKE_MEM_DEFINED( \ + (void *)((uintptr_t)ptr + \ + old_usize), usize - old_usize); \ + } \ + } else { \ + if (old_ptr != NULL) { \ + VALGRIND_FREELIKE_BLOCK(old_ptr, \ + old_rzsize); \ + } \ + if (ptr != NULL) { \ + size_t copy_size = (old_usize < usize) \ + ? old_usize : usize; \ + size_t tail_size = usize - copy_size; \ + VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, \ + rzsize, false); \ + if (copy_size > 0) { \ + VALGRIND_MAKE_MEM_DEFINED(ptr, \ + copy_size); \ + } \ + if (zero && tail_size > 0) { \ + VALGRIND_MAKE_MEM_DEFINED( \ + (void *)((uintptr_t)ptr + \ + copy_size), tail_size); \ + } \ + } \ + } \ + } \ +} while (0) +#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do { \ + if (config_valgrind && opt_valgrind) \ + VALGRIND_FREELIKE_BLOCK(ptr, rzsize); \ +} while (0) +#else +#define RUNNING_ON_VALGRIND ((unsigned)0) +#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) \ + do {} while (0) +#define VALGRIND_RESIZEINPLACE_BLOCK(addr, oldSizeB, newSizeB, rzB) \ + do {} while (0) +#define VALGRIND_FREELIKE_BLOCK(addr, rzB) do {} while (0) +#define VALGRIND_MAKE_MEM_NOACCESS(_qzz_addr, _qzz_len) do {} while (0) +#define VALGRIND_MAKE_MEM_UNDEFINED(_qzz_addr, _qzz_len) do {} while (0) +#define VALGRIND_MAKE_MEM_DEFINED(_qzz_addr, _qzz_len) do {} while (0) +#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do {} while (0) +#define JEMALLOC_VALGRIND_REALLOC(ptr, usize, old_ptr, old_usize, \ + old_rzsize, zero) do {} while (0) +#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {} while (0) +#endif + +#include "jemalloc/internal/util.h" +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/prng.h" +#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/stats.h" +#include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/tsd.h" +#include "jemalloc/internal/mb.h" +#include "jemalloc/internal/extent.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/bitmap.h" +#include "jemalloc/internal/base.h" +#include "jemalloc/internal/chunk.h" +#include "jemalloc/internal/huge.h" +#include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/hash.h" +#include "jemalloc/internal/quarantine.h" +#include "jemalloc/internal/prof.h" + +#undef JEMALLOC_H_TYPES +/******************************************************************************/ +#define JEMALLOC_H_STRUCTS + +#include "jemalloc/internal/util.h" +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/prng.h" +#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/stats.h" +#include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/tsd.h" +#include "jemalloc/internal/mb.h" +#include "jemalloc/internal/bitmap.h" +#include "jemalloc/internal/extent.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/base.h" +#include "jemalloc/internal/chunk.h" +#include "jemalloc/internal/huge.h" +#include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/hash.h" +#include "jemalloc/internal/quarantine.h" +#include "jemalloc/internal/prof.h" + +typedef struct { + uint64_t allocated; + uint64_t deallocated; +} thread_allocated_t; +/* + * The JEMALLOC_ARG_CONCAT() wrapper is necessary to pass {0, 0} via a cpp macro + * argument. + */ +#define THREAD_ALLOCATED_INITIALIZER JEMALLOC_ARG_CONCAT({0, 0}) + +#undef JEMALLOC_H_STRUCTS +/******************************************************************************/ +#define JEMALLOC_H_EXTERNS + +extern bool opt_abort; +extern bool opt_junk; +extern size_t opt_quarantine; +extern bool opt_redzone; +extern bool opt_utrace; +extern bool opt_valgrind; +extern bool opt_xmalloc; +extern bool opt_zero; +extern size_t opt_narenas; + +/* Number of CPUs. */ +extern unsigned ncpus; + +/* Protects arenas initialization (arenas, arenas_total). */ +extern malloc_mutex_t arenas_lock; +/* + * Arenas that are used to service external requests. Not all elements of the + * arenas array are necessarily used; arenas are created lazily as needed. + * + * arenas[0..narenas_auto) are used for automatic multiplexing of threads and + * arenas. arenas[narenas_auto..narenas_total) are only used if the application + * takes some action to create them and allocate from them. + */ +extern arena_t **arenas; +extern unsigned narenas_total; +extern unsigned narenas_auto; /* Read-only after initialization. */ + +arena_t *arenas_extend(unsigned ind); +void arenas_cleanup(void *arg); +arena_t *choose_arena_hard(void); +void jemalloc_prefork(void); +void jemalloc_postfork_parent(void); +void jemalloc_postfork_child(void); + +#include "jemalloc/internal/util.h" +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/prng.h" +#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/stats.h" +#include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/tsd.h" +#include "jemalloc/internal/mb.h" +#include "jemalloc/internal/bitmap.h" +#include "jemalloc/internal/extent.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/base.h" +#include "jemalloc/internal/chunk.h" +#include "jemalloc/internal/huge.h" +#include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/hash.h" +#include "jemalloc/internal/quarantine.h" +#include "jemalloc/internal/prof.h" + +#undef JEMALLOC_H_EXTERNS +/******************************************************************************/ +#define JEMALLOC_H_INLINES + +#include "jemalloc/internal/util.h" +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/prng.h" +#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/stats.h" +#include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/tsd.h" +#include "jemalloc/internal/mb.h" +#include "jemalloc/internal/extent.h" +#include "jemalloc/internal/base.h" +#include "jemalloc/internal/chunk.h" +#include "jemalloc/internal/huge.h" + +#ifndef JEMALLOC_ENABLE_INLINE +malloc_tsd_protos(JEMALLOC_ATTR(unused), arenas, arena_t *) + +size_t s2u(size_t size); +size_t sa2u(size_t size, size_t alignment); +unsigned narenas_total_get(void); +arena_t *choose_arena(arena_t *arena); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) +/* + * Map of pthread_self() --> arenas[???], used for selecting an arena to use + * for allocations. + */ +malloc_tsd_externs(arenas, arena_t *) +malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, arenas, arena_t *, NULL, + arenas_cleanup) + +/* + * Compute usable size that would result from allocating an object with the + * specified size. + */ +JEMALLOC_ALWAYS_INLINE size_t +s2u(size_t size) +{ + + if (size <= SMALL_MAXCLASS) + return (arena_bin_info[SMALL_SIZE2BIN(size)].reg_size); + if (size <= arena_maxclass) + return (PAGE_CEILING(size)); + return (CHUNK_CEILING(size)); +} + +/* + * Compute usable size that would result from allocating an object with the + * specified size and alignment. + */ +JEMALLOC_ALWAYS_INLINE size_t +sa2u(size_t size, size_t alignment) +{ + size_t usize; + + assert(alignment != 0 && ((alignment - 1) & alignment) == 0); + + /* + * Round size up to the nearest multiple of alignment. + * + * This done, we can take advantage of the fact that for each small + * size class, every object is aligned at the smallest power of two + * that is non-zero in the base two representation of the size. For + * example: + * + * Size | Base 2 | Minimum alignment + * -----+----------+------------------ + * 96 | 1100000 | 32 + * 144 | 10100000 | 32 + * 192 | 11000000 | 64 + */ + usize = ALIGNMENT_CEILING(size, alignment); + /* + * (usize < size) protects against the combination of maximal + * alignment and size greater than maximal alignment. + */ + if (usize < size) { + /* size_t overflow. */ + return (0); + } + + if (usize <= arena_maxclass && alignment <= PAGE) { + if (usize <= SMALL_MAXCLASS) + return (arena_bin_info[SMALL_SIZE2BIN(usize)].reg_size); + return (PAGE_CEILING(usize)); + } else { + size_t run_size; + + /* + * We can't achieve subpage alignment, so round up alignment + * permanently; it makes later calculations simpler. + */ + alignment = PAGE_CEILING(alignment); + usize = PAGE_CEILING(size); + /* + * (usize < size) protects against very large sizes within + * PAGE of SIZE_T_MAX. + * + * (usize + alignment < usize) protects against the + * combination of maximal alignment and usize large enough + * to cause overflow. This is similar to the first overflow + * check above, but it needs to be repeated due to the new + * usize value, which may now be *equal* to maximal + * alignment, whereas before we only detected overflow if the + * original size was *greater* than maximal alignment. + */ + if (usize < size || usize + alignment < usize) { + /* size_t overflow. */ + return (0); + } + + /* + * Calculate the size of the over-size run that arena_palloc() + * would need to allocate in order to guarantee the alignment. + * If the run wouldn't fit within a chunk, round up to a huge + * allocation size. + */ + run_size = usize + alignment - PAGE; + if (run_size <= arena_maxclass) + return (PAGE_CEILING(usize)); + return (CHUNK_CEILING(usize)); + } +} + +JEMALLOC_INLINE unsigned +narenas_total_get(void) +{ + unsigned narenas; + + malloc_mutex_lock(&arenas_lock); + narenas = narenas_total; + malloc_mutex_unlock(&arenas_lock); + + return (narenas); +} + +/* Choose an arena based on a per-thread value. */ +JEMALLOC_INLINE arena_t * +choose_arena(arena_t *arena) +{ + arena_t *ret; + + if (arena != NULL) + return (arena); + + if ((ret = *arenas_tsd_get()) == NULL) { + ret = choose_arena_hard(); + assert(ret != NULL); + } + + return (ret); +} +#endif + +#include "jemalloc/internal/bitmap.h" +#include "jemalloc/internal/rtree.h" +/* + * Include arena.h twice in order to resolve circular dependencies with + * tcache.h. + */ +#define JEMALLOC_ARENA_INLINE_A +#include "jemalloc/internal/arena.h" +#undef JEMALLOC_ARENA_INLINE_A +#include "jemalloc/internal/tcache.h" +#define JEMALLOC_ARENA_INLINE_B +#include "jemalloc/internal/arena.h" +#undef JEMALLOC_ARENA_INLINE_B +#include "jemalloc/internal/hash.h" +#include "jemalloc/internal/quarantine.h" + +#ifndef JEMALLOC_ENABLE_INLINE +void *imalloct(size_t size, bool try_tcache, arena_t *arena); +void *imalloc(size_t size); +void *icalloct(size_t size, bool try_tcache, arena_t *arena); +void *icalloc(size_t size); +void *ipalloct(size_t usize, size_t alignment, bool zero, bool try_tcache, + arena_t *arena); +void *ipalloc(size_t usize, size_t alignment, bool zero); +size_t isalloc(const void *ptr, bool demote); +size_t ivsalloc(const void *ptr, bool demote); +size_t u2rz(size_t usize); +size_t p2rz(const void *ptr); +void idalloct(void *ptr, bool try_tcache); +void idalloc(void *ptr); +void iqalloct(void *ptr, bool try_tcache); +void iqalloc(void *ptr); +void *iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, + arena_t *arena); +void *iralloct(void *ptr, size_t size, size_t extra, size_t alignment, + bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena); +void *iralloc(void *ptr, size_t size, size_t extra, size_t alignment, + bool zero); +bool ixalloc(void *ptr, size_t size, size_t extra, size_t alignment, + bool zero); +malloc_tsd_protos(JEMALLOC_ATTR(unused), thread_allocated, thread_allocated_t) +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) +JEMALLOC_ALWAYS_INLINE void * +imalloct(size_t size, bool try_tcache, arena_t *arena) +{ + + assert(size != 0); + + if (size <= arena_maxclass) + return (arena_malloc(arena, size, false, try_tcache)); + else + return (huge_malloc(size, false, huge_dss_prec_get(arena))); +} + +JEMALLOC_ALWAYS_INLINE void * +imalloc(size_t size) +{ + + return (imalloct(size, true, NULL)); +} + +JEMALLOC_ALWAYS_INLINE void * +icalloct(size_t size, bool try_tcache, arena_t *arena) +{ + + if (size <= arena_maxclass) + return (arena_malloc(arena, size, true, try_tcache)); + else + return (huge_malloc(size, true, huge_dss_prec_get(arena))); +} + +JEMALLOC_ALWAYS_INLINE void * +icalloc(size_t size) +{ + + return (icalloct(size, true, NULL)); +} + +JEMALLOC_ALWAYS_INLINE void * +ipalloct(size_t usize, size_t alignment, bool zero, bool try_tcache, + arena_t *arena) +{ + void *ret; + + assert(usize != 0); + assert(usize == sa2u(usize, alignment)); + + if (usize <= arena_maxclass && alignment <= PAGE) + ret = arena_malloc(arena, usize, zero, try_tcache); + else { + if (usize <= arena_maxclass) { + ret = arena_palloc(choose_arena(arena), usize, + alignment, zero); + } else if (alignment <= chunksize) + ret = huge_malloc(usize, zero, huge_dss_prec_get(arena)); + else + ret = huge_palloc(usize, alignment, zero, huge_dss_prec_get(arena)); + } + + assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); + return (ret); +} + +JEMALLOC_ALWAYS_INLINE void * +ipalloc(size_t usize, size_t alignment, bool zero) +{ + + return (ipalloct(usize, alignment, zero, true, NULL)); +} + +/* + * Typical usage: + * void *ptr = [...] + * size_t sz = isalloc(ptr, config_prof); + */ +JEMALLOC_ALWAYS_INLINE size_t +isalloc(const void *ptr, bool demote) +{ + size_t ret; + arena_chunk_t *chunk; + + assert(ptr != NULL); + /* Demotion only makes sense if config_prof is true. */ + assert(config_prof || demote == false); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk != ptr) + ret = arena_salloc(ptr, demote); + else + ret = huge_salloc(ptr); + + return (ret); +} + +JEMALLOC_ALWAYS_INLINE size_t +ivsalloc(const void *ptr, bool demote) +{ + + /* Return 0 if ptr is not within a chunk managed by jemalloc. */ + if (rtree_get(chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == 0) + return (0); + + return (isalloc(ptr, demote)); +} + +JEMALLOC_INLINE size_t +u2rz(size_t usize) +{ + size_t ret; + + if (usize <= SMALL_MAXCLASS) { + size_t binind = SMALL_SIZE2BIN(usize); + ret = arena_bin_info[binind].redzone_size; + } else + ret = 0; + + return (ret); +} + +JEMALLOC_INLINE size_t +p2rz(const void *ptr) +{ + size_t usize = isalloc(ptr, false); + + return (u2rz(usize)); +} + +JEMALLOC_ALWAYS_INLINE void +idalloct(void *ptr, bool try_tcache) +{ + arena_chunk_t *chunk; + + assert(ptr != NULL); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk != ptr) + arena_dalloc(chunk->arena, chunk, ptr, try_tcache); + else + huge_dalloc(ptr, true); +} + +JEMALLOC_ALWAYS_INLINE void +idalloc(void *ptr) +{ + + idalloct(ptr, true); +} + +JEMALLOC_ALWAYS_INLINE void +iqalloct(void *ptr, bool try_tcache) +{ + + if (config_fill && opt_quarantine) + quarantine(ptr); + else + idalloct(ptr, try_tcache); +} + +JEMALLOC_ALWAYS_INLINE void +iqalloc(void *ptr) +{ + + iqalloct(ptr, true); +} + +JEMALLOC_ALWAYS_INLINE void * +iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, + arena_t *arena) +{ + void *p; + size_t usize, copysize; + + usize = sa2u(size + extra, alignment); + if (usize == 0) + return (NULL); + p = ipalloct(usize, alignment, zero, try_tcache_alloc, arena); + if (p == NULL) { + if (extra == 0) + return (NULL); + /* Try again, without extra this time. */ + usize = sa2u(size, alignment); + if (usize == 0) + return (NULL); + p = ipalloct(usize, alignment, zero, try_tcache_alloc, arena); + if (p == NULL) + return (NULL); + } + /* + * Copy at most size bytes (not size+extra), since the caller has no + * expectation that the extra bytes will be reliably preserved. + */ + copysize = (size < oldsize) ? size : oldsize; + memcpy(p, ptr, copysize); + iqalloct(ptr, try_tcache_dalloc); + return (p); +} + +JEMALLOC_ALWAYS_INLINE void * +iralloct(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, + bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena) +{ + size_t oldsize; + + assert(ptr != NULL); + assert(size != 0); + + oldsize = isalloc(ptr, config_prof); + + if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) + != 0) { + /* + * Existing object alignment is inadequate; allocate new space + * and copy. + */ + return (iralloct_realign(ptr, oldsize, size, extra, alignment, + zero, try_tcache_alloc, try_tcache_dalloc, arena)); + } + + if (size + extra <= arena_maxclass) { + return (arena_ralloc(arena, ptr, oldsize, size, extra, + alignment, zero, try_tcache_alloc, + try_tcache_dalloc)); + } else { + return (huge_ralloc(ptr, oldsize, size, extra, + alignment, zero, try_tcache_dalloc, huge_dss_prec_get(arena))); + } +} + +JEMALLOC_ALWAYS_INLINE void * +iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero) +{ + + return (iralloct(ptr, size, extra, alignment, zero, true, true, NULL)); +} + +JEMALLOC_ALWAYS_INLINE bool +ixalloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero) +{ + size_t oldsize; + + assert(ptr != NULL); + assert(size != 0); + + oldsize = isalloc(ptr, config_prof); + if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) + != 0) { + /* Existing object alignment is inadequate. */ + return (true); + } + + if (size <= arena_maxclass) + return (arena_ralloc_no_move(ptr, oldsize, size, extra, zero)); + else + return (huge_ralloc_no_move(ptr, oldsize, size, extra)); +} + +malloc_tsd_externs(thread_allocated, thread_allocated_t) +malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, thread_allocated, thread_allocated_t, + THREAD_ALLOCATED_INITIALIZER, malloc_tsd_no_cleanup) +#endif + +#include "jemalloc/internal/prof.h" + +#undef JEMALLOC_H_INLINES +/******************************************************************************/ +#endif /* JEMALLOC_INTERNAL_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/mb.h b/deps/jemalloc/include/jemalloc/internal/mb.h new file mode 100644 index 0000000000..3cfa787294 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/mb.h @@ -0,0 +1,115 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +void mb_write(void); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MB_C_)) +#ifdef __i386__ +/* + * According to the Intel Architecture Software Developer's Manual, current + * processors execute instructions in order from the perspective of other + * processors in a multiprocessor system, but 1) Intel reserves the right to + * change that, and 2) the compiler's optimizer could re-order instructions if + * there weren't some form of barrier. Therefore, even if running on an + * architecture that does not need memory barriers (everything through at least + * i686), an "optimizer barrier" is necessary. + */ +JEMALLOC_INLINE void +mb_write(void) +{ + +# if 0 + /* This is a true memory barrier. */ + asm volatile ("pusha;" + "xor %%eax,%%eax;" + "cpuid;" + "popa;" + : /* Outputs. */ + : /* Inputs. */ + : "memory" /* Clobbers. */ + ); +#else + /* + * This is hopefully enough to keep the compiler from reordering + * instructions around this one. + */ + asm volatile ("nop;" + : /* Outputs. */ + : /* Inputs. */ + : "memory" /* Clobbers. */ + ); +#endif +} +#elif (defined(__amd64__) || defined(__x86_64__)) +JEMALLOC_INLINE void +mb_write(void) +{ + + asm volatile ("sfence" + : /* Outputs. */ + : /* Inputs. */ + : "memory" /* Clobbers. */ + ); +} +#elif defined(__powerpc__) +JEMALLOC_INLINE void +mb_write(void) +{ + + asm volatile ("eieio" + : /* Outputs. */ + : /* Inputs. */ + : "memory" /* Clobbers. */ + ); +} +#elif defined(__sparc64__) +JEMALLOC_INLINE void +mb_write(void) +{ + + asm volatile ("membar #StoreStore" + : /* Outputs. */ + : /* Inputs. */ + : "memory" /* Clobbers. */ + ); +} +#elif defined(__tile__) +JEMALLOC_INLINE void +mb_write(void) +{ + + __sync_synchronize(); +} +#else +/* + * This is much slower than a simple memory barrier, but the semantics of mutex + * unlock make this work. + */ +JEMALLOC_INLINE void +mb_write(void) +{ + malloc_mutex_t mtx; + + malloc_mutex_init(&mtx); + malloc_mutex_lock(&mtx); + malloc_mutex_unlock(&mtx); +} +#endif +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/mutex.h b/deps/jemalloc/include/jemalloc/internal/mutex.h new file mode 100644 index 0000000000..de44e1435a --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/mutex.h @@ -0,0 +1,99 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct malloc_mutex_s malloc_mutex_t; + +#ifdef _WIN32 +# define MALLOC_MUTEX_INITIALIZER +#elif (defined(JEMALLOC_OSSPIN)) +# define MALLOC_MUTEX_INITIALIZER {0} +#elif (defined(JEMALLOC_MUTEX_INIT_CB)) +# define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER, NULL} +#else +# if (defined(PTHREAD_MUTEX_ADAPTIVE_NP) && \ + defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP)) +# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP +# define MALLOC_MUTEX_INITIALIZER {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP} +# else +# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT +# define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER} +# endif +#endif + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct malloc_mutex_s { +#ifdef _WIN32 + CRITICAL_SECTION lock; +#elif (defined(JEMALLOC_OSSPIN)) + OSSpinLock lock; +#elif (defined(JEMALLOC_MUTEX_INIT_CB)) + pthread_mutex_t lock; + malloc_mutex_t *postponed_next; +#else + pthread_mutex_t lock; +#endif +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#ifdef JEMALLOC_LAZY_LOCK +extern bool isthreaded; +#else +# undef isthreaded /* Undo private_namespace.h definition. */ +# define isthreaded true +#endif + +bool malloc_mutex_init(malloc_mutex_t *mutex); +void malloc_mutex_prefork(malloc_mutex_t *mutex); +void malloc_mutex_postfork_parent(malloc_mutex_t *mutex); +void malloc_mutex_postfork_child(malloc_mutex_t *mutex); +bool mutex_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +void malloc_mutex_lock(malloc_mutex_t *mutex); +void malloc_mutex_unlock(malloc_mutex_t *mutex); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_)) +JEMALLOC_INLINE void +malloc_mutex_lock(malloc_mutex_t *mutex) +{ + + if (isthreaded) { +#ifdef _WIN32 + EnterCriticalSection(&mutex->lock); +#elif (defined(JEMALLOC_OSSPIN)) + OSSpinLockLock(&mutex->lock); +#else + pthread_mutex_lock(&mutex->lock); +#endif + } +} + +JEMALLOC_INLINE void +malloc_mutex_unlock(malloc_mutex_t *mutex) +{ + + if (isthreaded) { +#ifdef _WIN32 + LeaveCriticalSection(&mutex->lock); +#elif (defined(JEMALLOC_OSSPIN)) + OSSpinLockUnlock(&mutex->lock); +#else + pthread_mutex_unlock(&mutex->lock); +#endif + } +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/private_namespace.h b/deps/jemalloc/include/jemalloc/internal/private_namespace.h new file mode 100644 index 0000000000..35c3b0c6c7 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/private_namespace.h @@ -0,0 +1,413 @@ +#define a0calloc JEMALLOC_N(a0calloc) +#define a0free JEMALLOC_N(a0free) +#define a0malloc JEMALLOC_N(a0malloc) +#define arena_alloc_junk_small JEMALLOC_N(arena_alloc_junk_small) +#define arena_bin_index JEMALLOC_N(arena_bin_index) +#define arena_bin_info JEMALLOC_N(arena_bin_info) +#define arena_boot JEMALLOC_N(arena_boot) +#define arena_dalloc JEMALLOC_N(arena_dalloc) +#define arena_dalloc_bin JEMALLOC_N(arena_dalloc_bin) +#define arena_dalloc_bin_locked JEMALLOC_N(arena_dalloc_bin_locked) +#define arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large) +#define arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small) +#define arena_dalloc_large JEMALLOC_N(arena_dalloc_large) +#define arena_dalloc_large_locked JEMALLOC_N(arena_dalloc_large_locked) +#define arena_dalloc_small JEMALLOC_N(arena_dalloc_small) +#define arena_dss_prec_get JEMALLOC_N(arena_dss_prec_get) +#define arena_dss_prec_set JEMALLOC_N(arena_dss_prec_set) +#define arena_malloc JEMALLOC_N(arena_malloc) +#define arena_malloc_large JEMALLOC_N(arena_malloc_large) +#define arena_malloc_small JEMALLOC_N(arena_malloc_small) +#define arena_mapbits_allocated_get JEMALLOC_N(arena_mapbits_allocated_get) +#define arena_mapbits_binind_get JEMALLOC_N(arena_mapbits_binind_get) +#define arena_mapbits_dirty_get JEMALLOC_N(arena_mapbits_dirty_get) +#define arena_mapbits_get JEMALLOC_N(arena_mapbits_get) +#define arena_mapbits_large_binind_set JEMALLOC_N(arena_mapbits_large_binind_set) +#define arena_mapbits_large_get JEMALLOC_N(arena_mapbits_large_get) +#define arena_mapbits_large_set JEMALLOC_N(arena_mapbits_large_set) +#define arena_mapbits_large_size_get JEMALLOC_N(arena_mapbits_large_size_get) +#define arena_mapbits_small_runind_get JEMALLOC_N(arena_mapbits_small_runind_get) +#define arena_mapbits_small_set JEMALLOC_N(arena_mapbits_small_set) +#define arena_mapbits_unallocated_set JEMALLOC_N(arena_mapbits_unallocated_set) +#define arena_mapbits_unallocated_size_get JEMALLOC_N(arena_mapbits_unallocated_size_get) +#define arena_mapbits_unallocated_size_set JEMALLOC_N(arena_mapbits_unallocated_size_set) +#define arena_mapbits_unzeroed_get JEMALLOC_N(arena_mapbits_unzeroed_get) +#define arena_mapbits_unzeroed_set JEMALLOC_N(arena_mapbits_unzeroed_set) +#define arena_mapbitsp_get JEMALLOC_N(arena_mapbitsp_get) +#define arena_mapbitsp_read JEMALLOC_N(arena_mapbitsp_read) +#define arena_mapbitsp_write JEMALLOC_N(arena_mapbitsp_write) +#define arena_mapp_get JEMALLOC_N(arena_mapp_get) +#define arena_maxclass JEMALLOC_N(arena_maxclass) +#define arena_new JEMALLOC_N(arena_new) +#define arena_palloc JEMALLOC_N(arena_palloc) +#define arena_postfork_child JEMALLOC_N(arena_postfork_child) +#define arena_postfork_parent JEMALLOC_N(arena_postfork_parent) +#define arena_prefork JEMALLOC_N(arena_prefork) +#define arena_prof_accum JEMALLOC_N(arena_prof_accum) +#define arena_prof_accum_impl JEMALLOC_N(arena_prof_accum_impl) +#define arena_prof_accum_locked JEMALLOC_N(arena_prof_accum_locked) +#define arena_prof_ctx_get JEMALLOC_N(arena_prof_ctx_get) +#define arena_prof_ctx_set JEMALLOC_N(arena_prof_ctx_set) +#define arena_prof_promoted JEMALLOC_N(arena_prof_promoted) +#define arena_ptr_small_binind_get JEMALLOC_N(arena_ptr_small_binind_get) +#define arena_purge_all JEMALLOC_N(arena_purge_all) +#define arena_quarantine_junk_small JEMALLOC_N(arena_quarantine_junk_small) +#define arena_ralloc JEMALLOC_N(arena_ralloc) +#define arena_ralloc_junk_large JEMALLOC_N(arena_ralloc_junk_large) +#define arena_ralloc_no_move JEMALLOC_N(arena_ralloc_no_move) +#define arena_redzone_corruption JEMALLOC_N(arena_redzone_corruption) +#define arena_run_regind JEMALLOC_N(arena_run_regind) +#define arena_salloc JEMALLOC_N(arena_salloc) +#define arena_stats_merge JEMALLOC_N(arena_stats_merge) +#define arena_tcache_fill_small JEMALLOC_N(arena_tcache_fill_small) +#define arenas JEMALLOC_N(arenas) +#define arenas_booted JEMALLOC_N(arenas_booted) +#define arenas_cleanup JEMALLOC_N(arenas_cleanup) +#define arenas_extend JEMALLOC_N(arenas_extend) +#define arenas_initialized JEMALLOC_N(arenas_initialized) +#define arenas_lock JEMALLOC_N(arenas_lock) +#define arenas_tls JEMALLOC_N(arenas_tls) +#define arenas_tsd JEMALLOC_N(arenas_tsd) +#define arenas_tsd_boot JEMALLOC_N(arenas_tsd_boot) +#define arenas_tsd_cleanup_wrapper JEMALLOC_N(arenas_tsd_cleanup_wrapper) +#define arenas_tsd_get JEMALLOC_N(arenas_tsd_get) +#define arenas_tsd_get_wrapper JEMALLOC_N(arenas_tsd_get_wrapper) +#define arenas_tsd_init_head JEMALLOC_N(arenas_tsd_init_head) +#define arenas_tsd_set JEMALLOC_N(arenas_tsd_set) +#define atomic_add_u JEMALLOC_N(atomic_add_u) +#define atomic_add_uint32 JEMALLOC_N(atomic_add_uint32) +#define atomic_add_uint64 JEMALLOC_N(atomic_add_uint64) +#define atomic_add_z JEMALLOC_N(atomic_add_z) +#define atomic_sub_u JEMALLOC_N(atomic_sub_u) +#define atomic_sub_uint32 JEMALLOC_N(atomic_sub_uint32) +#define atomic_sub_uint64 JEMALLOC_N(atomic_sub_uint64) +#define atomic_sub_z JEMALLOC_N(atomic_sub_z) +#define base_alloc JEMALLOC_N(base_alloc) +#define base_boot JEMALLOC_N(base_boot) +#define base_calloc JEMALLOC_N(base_calloc) +#define base_node_alloc JEMALLOC_N(base_node_alloc) +#define base_node_dealloc JEMALLOC_N(base_node_dealloc) +#define base_postfork_child JEMALLOC_N(base_postfork_child) +#define base_postfork_parent JEMALLOC_N(base_postfork_parent) +#define base_prefork JEMALLOC_N(base_prefork) +#define bitmap_full JEMALLOC_N(bitmap_full) +#define bitmap_get JEMALLOC_N(bitmap_get) +#define bitmap_info_init JEMALLOC_N(bitmap_info_init) +#define bitmap_info_ngroups JEMALLOC_N(bitmap_info_ngroups) +#define bitmap_init JEMALLOC_N(bitmap_init) +#define bitmap_set JEMALLOC_N(bitmap_set) +#define bitmap_sfu JEMALLOC_N(bitmap_sfu) +#define bitmap_size JEMALLOC_N(bitmap_size) +#define bitmap_unset JEMALLOC_N(bitmap_unset) +#define bt_init JEMALLOC_N(bt_init) +#define buferror JEMALLOC_N(buferror) +#define choose_arena JEMALLOC_N(choose_arena) +#define choose_arena_hard JEMALLOC_N(choose_arena_hard) +#define chunk_alloc JEMALLOC_N(chunk_alloc) +#define chunk_alloc_dss JEMALLOC_N(chunk_alloc_dss) +#define chunk_alloc_mmap JEMALLOC_N(chunk_alloc_mmap) +#define chunk_boot JEMALLOC_N(chunk_boot) +#define chunk_dealloc JEMALLOC_N(chunk_dealloc) +#define chunk_dealloc_mmap JEMALLOC_N(chunk_dealloc_mmap) +#define chunk_dss_boot JEMALLOC_N(chunk_dss_boot) +#define chunk_dss_postfork_child JEMALLOC_N(chunk_dss_postfork_child) +#define chunk_dss_postfork_parent JEMALLOC_N(chunk_dss_postfork_parent) +#define chunk_dss_prec_get JEMALLOC_N(chunk_dss_prec_get) +#define chunk_dss_prec_set JEMALLOC_N(chunk_dss_prec_set) +#define chunk_dss_prefork JEMALLOC_N(chunk_dss_prefork) +#define chunk_in_dss JEMALLOC_N(chunk_in_dss) +#define chunk_npages JEMALLOC_N(chunk_npages) +#define chunk_postfork_child JEMALLOC_N(chunk_postfork_child) +#define chunk_postfork_parent JEMALLOC_N(chunk_postfork_parent) +#define chunk_prefork JEMALLOC_N(chunk_prefork) +#define chunk_unmap JEMALLOC_N(chunk_unmap) +#define chunks_mtx JEMALLOC_N(chunks_mtx) +#define chunks_rtree JEMALLOC_N(chunks_rtree) +#define chunksize JEMALLOC_N(chunksize) +#define chunksize_mask JEMALLOC_N(chunksize_mask) +#define ckh_bucket_search JEMALLOC_N(ckh_bucket_search) +#define ckh_count JEMALLOC_N(ckh_count) +#define ckh_delete JEMALLOC_N(ckh_delete) +#define ckh_evict_reloc_insert JEMALLOC_N(ckh_evict_reloc_insert) +#define ckh_insert JEMALLOC_N(ckh_insert) +#define ckh_isearch JEMALLOC_N(ckh_isearch) +#define ckh_iter JEMALLOC_N(ckh_iter) +#define ckh_new JEMALLOC_N(ckh_new) +#define ckh_pointer_hash JEMALLOC_N(ckh_pointer_hash) +#define ckh_pointer_keycomp JEMALLOC_N(ckh_pointer_keycomp) +#define ckh_rebuild JEMALLOC_N(ckh_rebuild) +#define ckh_remove JEMALLOC_N(ckh_remove) +#define ckh_search JEMALLOC_N(ckh_search) +#define ckh_string_hash JEMALLOC_N(ckh_string_hash) +#define ckh_string_keycomp JEMALLOC_N(ckh_string_keycomp) +#define ckh_try_bucket_insert JEMALLOC_N(ckh_try_bucket_insert) +#define ckh_try_insert JEMALLOC_N(ckh_try_insert) +#define ctl_boot JEMALLOC_N(ctl_boot) +#define ctl_bymib JEMALLOC_N(ctl_bymib) +#define ctl_byname JEMALLOC_N(ctl_byname) +#define ctl_nametomib JEMALLOC_N(ctl_nametomib) +#define ctl_postfork_child JEMALLOC_N(ctl_postfork_child) +#define ctl_postfork_parent JEMALLOC_N(ctl_postfork_parent) +#define ctl_prefork JEMALLOC_N(ctl_prefork) +#define dss_prec_names JEMALLOC_N(dss_prec_names) +#define extent_tree_ad_first JEMALLOC_N(extent_tree_ad_first) +#define extent_tree_ad_insert JEMALLOC_N(extent_tree_ad_insert) +#define extent_tree_ad_iter JEMALLOC_N(extent_tree_ad_iter) +#define extent_tree_ad_iter_recurse JEMALLOC_N(extent_tree_ad_iter_recurse) +#define extent_tree_ad_iter_start JEMALLOC_N(extent_tree_ad_iter_start) +#define extent_tree_ad_last JEMALLOC_N(extent_tree_ad_last) +#define extent_tree_ad_new JEMALLOC_N(extent_tree_ad_new) +#define extent_tree_ad_next JEMALLOC_N(extent_tree_ad_next) +#define extent_tree_ad_nsearch JEMALLOC_N(extent_tree_ad_nsearch) +#define extent_tree_ad_prev JEMALLOC_N(extent_tree_ad_prev) +#define extent_tree_ad_psearch JEMALLOC_N(extent_tree_ad_psearch) +#define extent_tree_ad_remove JEMALLOC_N(extent_tree_ad_remove) +#define extent_tree_ad_reverse_iter JEMALLOC_N(extent_tree_ad_reverse_iter) +#define extent_tree_ad_reverse_iter_recurse JEMALLOC_N(extent_tree_ad_reverse_iter_recurse) +#define extent_tree_ad_reverse_iter_start JEMALLOC_N(extent_tree_ad_reverse_iter_start) +#define extent_tree_ad_search JEMALLOC_N(extent_tree_ad_search) +#define extent_tree_szad_first JEMALLOC_N(extent_tree_szad_first) +#define extent_tree_szad_insert JEMALLOC_N(extent_tree_szad_insert) +#define extent_tree_szad_iter JEMALLOC_N(extent_tree_szad_iter) +#define extent_tree_szad_iter_recurse JEMALLOC_N(extent_tree_szad_iter_recurse) +#define extent_tree_szad_iter_start JEMALLOC_N(extent_tree_szad_iter_start) +#define extent_tree_szad_last JEMALLOC_N(extent_tree_szad_last) +#define extent_tree_szad_new JEMALLOC_N(extent_tree_szad_new) +#define extent_tree_szad_next JEMALLOC_N(extent_tree_szad_next) +#define extent_tree_szad_nsearch JEMALLOC_N(extent_tree_szad_nsearch) +#define extent_tree_szad_prev JEMALLOC_N(extent_tree_szad_prev) +#define extent_tree_szad_psearch JEMALLOC_N(extent_tree_szad_psearch) +#define extent_tree_szad_remove JEMALLOC_N(extent_tree_szad_remove) +#define extent_tree_szad_reverse_iter JEMALLOC_N(extent_tree_szad_reverse_iter) +#define extent_tree_szad_reverse_iter_recurse JEMALLOC_N(extent_tree_szad_reverse_iter_recurse) +#define extent_tree_szad_reverse_iter_start JEMALLOC_N(extent_tree_szad_reverse_iter_start) +#define extent_tree_szad_search JEMALLOC_N(extent_tree_szad_search) +#define get_errno JEMALLOC_N(get_errno) +#define hash JEMALLOC_N(hash) +#define hash_fmix_32 JEMALLOC_N(hash_fmix_32) +#define hash_fmix_64 JEMALLOC_N(hash_fmix_64) +#define hash_get_block_32 JEMALLOC_N(hash_get_block_32) +#define hash_get_block_64 JEMALLOC_N(hash_get_block_64) +#define hash_rotl_32 JEMALLOC_N(hash_rotl_32) +#define hash_rotl_64 JEMALLOC_N(hash_rotl_64) +#define hash_x64_128 JEMALLOC_N(hash_x64_128) +#define hash_x86_128 JEMALLOC_N(hash_x86_128) +#define hash_x86_32 JEMALLOC_N(hash_x86_32) +#define huge_allocated JEMALLOC_N(huge_allocated) +#define huge_boot JEMALLOC_N(huge_boot) +#define huge_dalloc JEMALLOC_N(huge_dalloc) +#define huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk) +#define huge_dss_prec_get JEMALLOC_N(huge_dss_prec_get) +#define huge_malloc JEMALLOC_N(huge_malloc) +#define huge_mtx JEMALLOC_N(huge_mtx) +#define huge_ndalloc JEMALLOC_N(huge_ndalloc) +#define huge_nmalloc JEMALLOC_N(huge_nmalloc) +#define huge_palloc JEMALLOC_N(huge_palloc) +#define huge_postfork_child JEMALLOC_N(huge_postfork_child) +#define huge_postfork_parent JEMALLOC_N(huge_postfork_parent) +#define huge_prefork JEMALLOC_N(huge_prefork) +#define huge_prof_ctx_get JEMALLOC_N(huge_prof_ctx_get) +#define huge_prof_ctx_set JEMALLOC_N(huge_prof_ctx_set) +#define huge_ralloc JEMALLOC_N(huge_ralloc) +#define huge_ralloc_no_move JEMALLOC_N(huge_ralloc_no_move) +#define huge_salloc JEMALLOC_N(huge_salloc) +#define iallocm JEMALLOC_N(iallocm) +#define icalloc JEMALLOC_N(icalloc) +#define icalloct JEMALLOC_N(icalloct) +#define idalloc JEMALLOC_N(idalloc) +#define idalloct JEMALLOC_N(idalloct) +#define imalloc JEMALLOC_N(imalloc) +#define imalloct JEMALLOC_N(imalloct) +#define ipalloc JEMALLOC_N(ipalloc) +#define ipalloct JEMALLOC_N(ipalloct) +#define iqalloc JEMALLOC_N(iqalloc) +#define iqalloct JEMALLOC_N(iqalloct) +#define iralloc JEMALLOC_N(iralloc) +#define iralloct JEMALLOC_N(iralloct) +#define iralloct_realign JEMALLOC_N(iralloct_realign) +#define isalloc JEMALLOC_N(isalloc) +#define isthreaded JEMALLOC_N(isthreaded) +#define ivsalloc JEMALLOC_N(ivsalloc) +#define ixalloc JEMALLOC_N(ixalloc) +#define jemalloc_postfork_child JEMALLOC_N(jemalloc_postfork_child) +#define jemalloc_postfork_parent JEMALLOC_N(jemalloc_postfork_parent) +#define jemalloc_prefork JEMALLOC_N(jemalloc_prefork) +#define malloc_cprintf JEMALLOC_N(malloc_cprintf) +#define malloc_mutex_init JEMALLOC_N(malloc_mutex_init) +#define malloc_mutex_lock JEMALLOC_N(malloc_mutex_lock) +#define malloc_mutex_postfork_child JEMALLOC_N(malloc_mutex_postfork_child) +#define malloc_mutex_postfork_parent JEMALLOC_N(malloc_mutex_postfork_parent) +#define malloc_mutex_prefork JEMALLOC_N(malloc_mutex_prefork) +#define malloc_mutex_unlock JEMALLOC_N(malloc_mutex_unlock) +#define malloc_printf JEMALLOC_N(malloc_printf) +#define malloc_snprintf JEMALLOC_N(malloc_snprintf) +#define malloc_strtoumax JEMALLOC_N(malloc_strtoumax) +#define malloc_tsd_boot JEMALLOC_N(malloc_tsd_boot) +#define malloc_tsd_cleanup_register JEMALLOC_N(malloc_tsd_cleanup_register) +#define malloc_tsd_dalloc JEMALLOC_N(malloc_tsd_dalloc) +#define malloc_tsd_malloc JEMALLOC_N(malloc_tsd_malloc) +#define malloc_tsd_no_cleanup JEMALLOC_N(malloc_tsd_no_cleanup) +#define malloc_vcprintf JEMALLOC_N(malloc_vcprintf) +#define malloc_vsnprintf JEMALLOC_N(malloc_vsnprintf) +#define malloc_write JEMALLOC_N(malloc_write) +#define map_bias JEMALLOC_N(map_bias) +#define mb_write JEMALLOC_N(mb_write) +#define mutex_boot JEMALLOC_N(mutex_boot) +#define narenas_auto JEMALLOC_N(narenas_auto) +#define narenas_total JEMALLOC_N(narenas_total) +#define narenas_total_get JEMALLOC_N(narenas_total_get) +#define ncpus JEMALLOC_N(ncpus) +#define nhbins JEMALLOC_N(nhbins) +#define opt_abort JEMALLOC_N(opt_abort) +#define opt_dss JEMALLOC_N(opt_dss) +#define opt_junk JEMALLOC_N(opt_junk) +#define opt_lg_chunk JEMALLOC_N(opt_lg_chunk) +#define opt_lg_dirty_mult JEMALLOC_N(opt_lg_dirty_mult) +#define opt_lg_prof_interval JEMALLOC_N(opt_lg_prof_interval) +#define opt_lg_prof_sample JEMALLOC_N(opt_lg_prof_sample) +#define opt_lg_tcache_max JEMALLOC_N(opt_lg_tcache_max) +#define opt_narenas JEMALLOC_N(opt_narenas) +#define opt_prof JEMALLOC_N(opt_prof) +#define opt_prof_accum JEMALLOC_N(opt_prof_accum) +#define opt_prof_active JEMALLOC_N(opt_prof_active) +#define opt_prof_final JEMALLOC_N(opt_prof_final) +#define opt_prof_gdump JEMALLOC_N(opt_prof_gdump) +#define opt_prof_leak JEMALLOC_N(opt_prof_leak) +#define opt_prof_prefix JEMALLOC_N(opt_prof_prefix) +#define opt_quarantine JEMALLOC_N(opt_quarantine) +#define opt_redzone JEMALLOC_N(opt_redzone) +#define opt_stats_print JEMALLOC_N(opt_stats_print) +#define opt_tcache JEMALLOC_N(opt_tcache) +#define opt_utrace JEMALLOC_N(opt_utrace) +#define opt_valgrind JEMALLOC_N(opt_valgrind) +#define opt_xmalloc JEMALLOC_N(opt_xmalloc) +#define opt_zero JEMALLOC_N(opt_zero) +#define p2rz JEMALLOC_N(p2rz) +#define pages_purge JEMALLOC_N(pages_purge) +#define pow2_ceil JEMALLOC_N(pow2_ceil) +#define prof_backtrace JEMALLOC_N(prof_backtrace) +#define prof_boot0 JEMALLOC_N(prof_boot0) +#define prof_boot1 JEMALLOC_N(prof_boot1) +#define prof_boot2 JEMALLOC_N(prof_boot2) +#define prof_bt_count JEMALLOC_N(prof_bt_count) +#define prof_ctx_get JEMALLOC_N(prof_ctx_get) +#define prof_ctx_set JEMALLOC_N(prof_ctx_set) +#define prof_dump_open JEMALLOC_N(prof_dump_open) +#define prof_free JEMALLOC_N(prof_free) +#define prof_gdump JEMALLOC_N(prof_gdump) +#define prof_idump JEMALLOC_N(prof_idump) +#define prof_interval JEMALLOC_N(prof_interval) +#define prof_lookup JEMALLOC_N(prof_lookup) +#define prof_malloc JEMALLOC_N(prof_malloc) +#define prof_mdump JEMALLOC_N(prof_mdump) +#define prof_postfork_child JEMALLOC_N(prof_postfork_child) +#define prof_postfork_parent JEMALLOC_N(prof_postfork_parent) +#define prof_prefork JEMALLOC_N(prof_prefork) +#define prof_promote JEMALLOC_N(prof_promote) +#define prof_realloc JEMALLOC_N(prof_realloc) +#define prof_sample_accum_update JEMALLOC_N(prof_sample_accum_update) +#define prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update) +#define prof_tdata_booted JEMALLOC_N(prof_tdata_booted) +#define prof_tdata_cleanup JEMALLOC_N(prof_tdata_cleanup) +#define prof_tdata_get JEMALLOC_N(prof_tdata_get) +#define prof_tdata_init JEMALLOC_N(prof_tdata_init) +#define prof_tdata_initialized JEMALLOC_N(prof_tdata_initialized) +#define prof_tdata_tls JEMALLOC_N(prof_tdata_tls) +#define prof_tdata_tsd JEMALLOC_N(prof_tdata_tsd) +#define prof_tdata_tsd_boot JEMALLOC_N(prof_tdata_tsd_boot) +#define prof_tdata_tsd_cleanup_wrapper JEMALLOC_N(prof_tdata_tsd_cleanup_wrapper) +#define prof_tdata_tsd_get JEMALLOC_N(prof_tdata_tsd_get) +#define prof_tdata_tsd_get_wrapper JEMALLOC_N(prof_tdata_tsd_get_wrapper) +#define prof_tdata_tsd_init_head JEMALLOC_N(prof_tdata_tsd_init_head) +#define prof_tdata_tsd_set JEMALLOC_N(prof_tdata_tsd_set) +#define quarantine JEMALLOC_N(quarantine) +#define quarantine_alloc_hook JEMALLOC_N(quarantine_alloc_hook) +#define quarantine_boot JEMALLOC_N(quarantine_boot) +#define quarantine_booted JEMALLOC_N(quarantine_booted) +#define quarantine_cleanup JEMALLOC_N(quarantine_cleanup) +#define quarantine_init JEMALLOC_N(quarantine_init) +#define quarantine_tls JEMALLOC_N(quarantine_tls) +#define quarantine_tsd JEMALLOC_N(quarantine_tsd) +#define quarantine_tsd_boot JEMALLOC_N(quarantine_tsd_boot) +#define quarantine_tsd_cleanup_wrapper JEMALLOC_N(quarantine_tsd_cleanup_wrapper) +#define quarantine_tsd_get JEMALLOC_N(quarantine_tsd_get) +#define quarantine_tsd_get_wrapper JEMALLOC_N(quarantine_tsd_get_wrapper) +#define quarantine_tsd_init_head JEMALLOC_N(quarantine_tsd_init_head) +#define quarantine_tsd_set JEMALLOC_N(quarantine_tsd_set) +#define register_zone JEMALLOC_N(register_zone) +#define rtree_delete JEMALLOC_N(rtree_delete) +#define rtree_get JEMALLOC_N(rtree_get) +#define rtree_get_locked JEMALLOC_N(rtree_get_locked) +#define rtree_new JEMALLOC_N(rtree_new) +#define rtree_postfork_child JEMALLOC_N(rtree_postfork_child) +#define rtree_postfork_parent JEMALLOC_N(rtree_postfork_parent) +#define rtree_prefork JEMALLOC_N(rtree_prefork) +#define rtree_set JEMALLOC_N(rtree_set) +#define s2u JEMALLOC_N(s2u) +#define sa2u JEMALLOC_N(sa2u) +#define set_errno JEMALLOC_N(set_errno) +#define small_size2bin JEMALLOC_N(small_size2bin) +#define stats_cactive JEMALLOC_N(stats_cactive) +#define stats_cactive_add JEMALLOC_N(stats_cactive_add) +#define stats_cactive_get JEMALLOC_N(stats_cactive_get) +#define stats_cactive_sub JEMALLOC_N(stats_cactive_sub) +#define stats_chunks JEMALLOC_N(stats_chunks) +#define stats_print JEMALLOC_N(stats_print) +#define tcache_alloc_easy JEMALLOC_N(tcache_alloc_easy) +#define tcache_alloc_large JEMALLOC_N(tcache_alloc_large) +#define tcache_alloc_small JEMALLOC_N(tcache_alloc_small) +#define tcache_alloc_small_hard JEMALLOC_N(tcache_alloc_small_hard) +#define tcache_arena_associate JEMALLOC_N(tcache_arena_associate) +#define tcache_arena_dissociate JEMALLOC_N(tcache_arena_dissociate) +#define tcache_bin_flush_large JEMALLOC_N(tcache_bin_flush_large) +#define tcache_bin_flush_small JEMALLOC_N(tcache_bin_flush_small) +#define tcache_bin_info JEMALLOC_N(tcache_bin_info) +#define tcache_boot0 JEMALLOC_N(tcache_boot0) +#define tcache_boot1 JEMALLOC_N(tcache_boot1) +#define tcache_booted JEMALLOC_N(tcache_booted) +#define tcache_create JEMALLOC_N(tcache_create) +#define tcache_dalloc_large JEMALLOC_N(tcache_dalloc_large) +#define tcache_dalloc_small JEMALLOC_N(tcache_dalloc_small) +#define tcache_destroy JEMALLOC_N(tcache_destroy) +#define tcache_enabled_booted JEMALLOC_N(tcache_enabled_booted) +#define tcache_enabled_get JEMALLOC_N(tcache_enabled_get) +#define tcache_enabled_initialized JEMALLOC_N(tcache_enabled_initialized) +#define tcache_enabled_set JEMALLOC_N(tcache_enabled_set) +#define tcache_enabled_tls JEMALLOC_N(tcache_enabled_tls) +#define tcache_enabled_tsd JEMALLOC_N(tcache_enabled_tsd) +#define tcache_enabled_tsd_boot JEMALLOC_N(tcache_enabled_tsd_boot) +#define tcache_enabled_tsd_cleanup_wrapper JEMALLOC_N(tcache_enabled_tsd_cleanup_wrapper) +#define tcache_enabled_tsd_get JEMALLOC_N(tcache_enabled_tsd_get) +#define tcache_enabled_tsd_get_wrapper JEMALLOC_N(tcache_enabled_tsd_get_wrapper) +#define tcache_enabled_tsd_init_head JEMALLOC_N(tcache_enabled_tsd_init_head) +#define tcache_enabled_tsd_set JEMALLOC_N(tcache_enabled_tsd_set) +#define tcache_event JEMALLOC_N(tcache_event) +#define tcache_event_hard JEMALLOC_N(tcache_event_hard) +#define tcache_flush JEMALLOC_N(tcache_flush) +#define tcache_get JEMALLOC_N(tcache_get) +#define tcache_initialized JEMALLOC_N(tcache_initialized) +#define tcache_maxclass JEMALLOC_N(tcache_maxclass) +#define tcache_salloc JEMALLOC_N(tcache_salloc) +#define tcache_stats_merge JEMALLOC_N(tcache_stats_merge) +#define tcache_thread_cleanup JEMALLOC_N(tcache_thread_cleanup) +#define tcache_tls JEMALLOC_N(tcache_tls) +#define tcache_tsd JEMALLOC_N(tcache_tsd) +#define tcache_tsd_boot JEMALLOC_N(tcache_tsd_boot) +#define tcache_tsd_cleanup_wrapper JEMALLOC_N(tcache_tsd_cleanup_wrapper) +#define tcache_tsd_get JEMALLOC_N(tcache_tsd_get) +#define tcache_tsd_get_wrapper JEMALLOC_N(tcache_tsd_get_wrapper) +#define tcache_tsd_init_head JEMALLOC_N(tcache_tsd_init_head) +#define tcache_tsd_set JEMALLOC_N(tcache_tsd_set) +#define thread_allocated_booted JEMALLOC_N(thread_allocated_booted) +#define thread_allocated_initialized JEMALLOC_N(thread_allocated_initialized) +#define thread_allocated_tls JEMALLOC_N(thread_allocated_tls) +#define thread_allocated_tsd JEMALLOC_N(thread_allocated_tsd) +#define thread_allocated_tsd_boot JEMALLOC_N(thread_allocated_tsd_boot) +#define thread_allocated_tsd_cleanup_wrapper JEMALLOC_N(thread_allocated_tsd_cleanup_wrapper) +#define thread_allocated_tsd_get JEMALLOC_N(thread_allocated_tsd_get) +#define thread_allocated_tsd_get_wrapper JEMALLOC_N(thread_allocated_tsd_get_wrapper) +#define thread_allocated_tsd_init_head JEMALLOC_N(thread_allocated_tsd_init_head) +#define thread_allocated_tsd_set JEMALLOC_N(thread_allocated_tsd_set) +#define tsd_init_check_recursion JEMALLOC_N(tsd_init_check_recursion) +#define tsd_init_finish JEMALLOC_N(tsd_init_finish) +#define u2rz JEMALLOC_N(u2rz) diff --git a/deps/jemalloc/include/jemalloc/internal/prng.h b/deps/jemalloc/include/jemalloc/internal/prng.h new file mode 100644 index 0000000000..7b2b06512f --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/prng.h @@ -0,0 +1,60 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* + * Simple linear congruential pseudo-random number generator: + * + * prng(y) = (a*x + c) % m + * + * where the following constants ensure maximal period: + * + * a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4. + * c == Odd number (relatively prime to 2^n). + * m == 2^32 + * + * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints. + * + * This choice of m has the disadvantage that the quality of the bits is + * proportional to bit position. For example. the lowest bit has a cycle of 2, + * the next has a cycle of 4, etc. For this reason, we prefer to use the upper + * bits. + * + * Macro parameters: + * uint32_t r : Result. + * unsigned lg_range : (0..32], number of least significant bits to return. + * uint32_t state : Seed value. + * const uint32_t a, c : See above discussion. + */ +#define prng32(r, lg_range, state, a, c) do { \ + assert(lg_range > 0); \ + assert(lg_range <= 32); \ + \ + r = (state * (a)) + (c); \ + state = r; \ + r >>= (32 - lg_range); \ +} while (false) + +/* Same as prng32(), but 64 bits of pseudo-randomness, using uint64_t. */ +#define prng64(r, lg_range, state, a, c) do { \ + assert(lg_range > 0); \ + assert(lg_range <= 64); \ + \ + r = (state * (a)) + (c); \ + state = r; \ + r >>= (64 - lg_range); \ +} while (false) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/prof.h b/deps/jemalloc/include/jemalloc/internal/prof.h new file mode 100644 index 0000000000..6f162d21e8 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/prof.h @@ -0,0 +1,613 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct prof_bt_s prof_bt_t; +typedef struct prof_cnt_s prof_cnt_t; +typedef struct prof_thr_cnt_s prof_thr_cnt_t; +typedef struct prof_ctx_s prof_ctx_t; +typedef struct prof_tdata_s prof_tdata_t; + +/* Option defaults. */ +#ifdef JEMALLOC_PROF +# define PROF_PREFIX_DEFAULT "jeprof" +#else +# define PROF_PREFIX_DEFAULT "" +#endif +#define LG_PROF_SAMPLE_DEFAULT 19 +#define LG_PROF_INTERVAL_DEFAULT -1 + +/* + * Hard limit on stack backtrace depth. The version of prof_backtrace() that + * is based on __builtin_return_address() necessarily has a hard-coded number + * of backtrace frame handlers, and should be kept in sync with this setting. + */ +#define PROF_BT_MAX 128 + +/* Maximum number of backtraces to store in each per thread LRU cache. */ +#define PROF_TCMAX 1024 + +/* Initial hash table size. */ +#define PROF_CKH_MINITEMS 64 + +/* Size of memory buffer to use when writing dump files. */ +#define PROF_DUMP_BUFSIZE 65536 + +/* Size of stack-allocated buffer used by prof_printf(). */ +#define PROF_PRINTF_BUFSIZE 128 + +/* + * Number of mutexes shared among all ctx's. No space is allocated for these + * unless profiling is enabled, so it's okay to over-provision. + */ +#define PROF_NCTX_LOCKS 1024 + +/* + * prof_tdata pointers close to NULL are used to encode state information that + * is used for cleaning up during thread shutdown. + */ +#define PROF_TDATA_STATE_REINCARNATED ((prof_tdata_t *)(uintptr_t)1) +#define PROF_TDATA_STATE_PURGATORY ((prof_tdata_t *)(uintptr_t)2) +#define PROF_TDATA_STATE_MAX PROF_TDATA_STATE_PURGATORY + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct prof_bt_s { + /* Backtrace, stored as len program counters. */ + void **vec; + unsigned len; +}; + +#ifdef JEMALLOC_PROF_LIBGCC +/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ +typedef struct { + prof_bt_t *bt; + unsigned nignore; + unsigned max; +} prof_unwind_data_t; +#endif + +struct prof_cnt_s { + /* + * Profiling counters. An allocation/deallocation pair can operate on + * different prof_thr_cnt_t objects that are linked into the same + * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go + * negative. In principle it is possible for the *bytes counters to + * overflow/underflow, but a general solution would require something + * like 128-bit counters; this implementation doesn't bother to solve + * that problem. + */ + int64_t curobjs; + int64_t curbytes; + uint64_t accumobjs; + uint64_t accumbytes; +}; + +struct prof_thr_cnt_s { + /* Linkage into prof_ctx_t's cnts_ql. */ + ql_elm(prof_thr_cnt_t) cnts_link; + + /* Linkage into thread's LRU. */ + ql_elm(prof_thr_cnt_t) lru_link; + + /* + * Associated context. If a thread frees an object that it did not + * allocate, it is possible that the context is not cached in the + * thread's hash table, in which case it must be able to look up the + * context, insert a new prof_thr_cnt_t into the thread's hash table, + * and link it into the prof_ctx_t's cnts_ql. + */ + prof_ctx_t *ctx; + + /* + * Threads use memory barriers to update the counters. Since there is + * only ever one writer, the only challenge is for the reader to get a + * consistent read of the counters. + * + * The writer uses this series of operations: + * + * 1) Increment epoch to an odd number. + * 2) Update counters. + * 3) Increment epoch to an even number. + * + * The reader must assure 1) that the epoch is even while it reads the + * counters, and 2) that the epoch doesn't change between the time it + * starts and finishes reading the counters. + */ + unsigned epoch; + + /* Profiling counters. */ + prof_cnt_t cnts; +}; + +struct prof_ctx_s { + /* Associated backtrace. */ + prof_bt_t *bt; + + /* Protects nlimbo, cnt_merged, and cnts_ql. */ + malloc_mutex_t *lock; + + /* + * Number of threads that currently cause this ctx to be in a state of + * limbo due to one of: + * - Initializing per thread counters associated with this ctx. + * - Preparing to destroy this ctx. + * - Dumping a heap profile that includes this ctx. + * nlimbo must be 1 (single destroyer) in order to safely destroy the + * ctx. + */ + unsigned nlimbo; + + /* Temporary storage for summation during dump. */ + prof_cnt_t cnt_summed; + + /* When threads exit, they merge their stats into cnt_merged. */ + prof_cnt_t cnt_merged; + + /* + * List of profile counters, one for each thread that has allocated in + * this context. + */ + ql_head(prof_thr_cnt_t) cnts_ql; + + /* Linkage for list of contexts to be dumped. */ + ql_elm(prof_ctx_t) dump_link; +}; +typedef ql_head(prof_ctx_t) prof_ctx_list_t; + +struct prof_tdata_s { + /* + * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread keeps a + * cache of backtraces, with associated thread-specific prof_thr_cnt_t + * objects. Other threads may read the prof_thr_cnt_t contents, but no + * others will ever write them. + * + * Upon thread exit, the thread must merge all the prof_thr_cnt_t + * counter data into the associated prof_ctx_t objects, and unlink/free + * the prof_thr_cnt_t objects. + */ + ckh_t bt2cnt; + + /* LRU for contents of bt2cnt. */ + ql_head(prof_thr_cnt_t) lru_ql; + + /* Backtrace vector, used for calls to prof_backtrace(). */ + void **vec; + + /* Sampling state. */ + uint64_t prng_state; + uint64_t threshold; + uint64_t accum; + + /* State used to avoid dumping while operating on prof internals. */ + bool enq; + bool enq_idump; + bool enq_gdump; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern bool opt_prof; +/* + * Even if opt_prof is true, sampling can be temporarily disabled by setting + * opt_prof_active to false. No locking is used when updating opt_prof_active, + * so there are no guarantees regarding how long it will take for all threads + * to notice state changes. + */ +extern bool opt_prof_active; +extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ +extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ +extern bool opt_prof_gdump; /* High-water memory dumping. */ +extern bool opt_prof_final; /* Final profile dumping. */ +extern bool opt_prof_leak; /* Dump leak summary at exit. */ +extern bool opt_prof_accum; /* Report cumulative bytes. */ +extern char opt_prof_prefix[ + /* Minimize memory bloat for non-prof builds. */ +#ifdef JEMALLOC_PROF + PATH_MAX + +#endif + 1]; + +/* + * Profile dump interval, measured in bytes allocated. Each arena triggers a + * profile dump when it reaches this threshold. The effect is that the + * interval between profile dumps averages prof_interval, though the actual + * interval between dumps will tend to be sporadic, and the interval will be a + * maximum of approximately (prof_interval * narenas). + */ +extern uint64_t prof_interval; + +/* + * If true, promote small sampled objects to large objects, since small run + * headers do not have embedded profile context pointers. + */ +extern bool prof_promote; + +void bt_init(prof_bt_t *bt, void **vec); +void prof_backtrace(prof_bt_t *bt, unsigned nignore); +prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); +#ifdef JEMALLOC_JET +size_t prof_bt_count(void); +typedef int (prof_dump_open_t)(bool, const char *); +extern prof_dump_open_t *prof_dump_open; +#endif +void prof_idump(void); +bool prof_mdump(const char *filename); +void prof_gdump(void); +prof_tdata_t *prof_tdata_init(void); +void prof_tdata_cleanup(void *arg); +void prof_boot0(void); +void prof_boot1(void); +bool prof_boot2(void); +void prof_prefork(void); +void prof_postfork_parent(void); +void prof_postfork_child(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#define PROF_ALLOC_PREP(nignore, size, ret) do { \ + prof_tdata_t *prof_tdata; \ + prof_bt_t bt; \ + \ + assert(size == s2u(size)); \ + \ + prof_tdata = prof_tdata_get(true); \ + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) { \ + if (prof_tdata != NULL) \ + ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ + else \ + ret = NULL; \ + break; \ + } \ + \ + if (opt_prof_active == false) { \ + /* Sampling is currently inactive, so avoid sampling. */\ + ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ + } else if (opt_lg_prof_sample == 0) { \ + /* Don't bother with sampling logic, since sampling */\ + /* interval is 1. */\ + bt_init(&bt, prof_tdata->vec); \ + prof_backtrace(&bt, nignore); \ + ret = prof_lookup(&bt); \ + } else { \ + if (prof_tdata->threshold == 0) { \ + /* Initialize. Seed the prng differently for */\ + /* each thread. */\ + prof_tdata->prng_state = \ + (uint64_t)(uintptr_t)&size; \ + prof_sample_threshold_update(prof_tdata); \ + } \ + \ + /* Determine whether to capture a backtrace based on */\ + /* whether size is enough for prof_accum to reach */\ + /* prof_tdata->threshold. However, delay updating */\ + /* these variables until prof_{m,re}alloc(), because */\ + /* we don't know for sure that the allocation will */\ + /* succeed. */\ + /* */\ + /* Use subtraction rather than addition to avoid */\ + /* potential integer overflow. */\ + if (size >= prof_tdata->threshold - \ + prof_tdata->accum) { \ + bt_init(&bt, prof_tdata->vec); \ + prof_backtrace(&bt, nignore); \ + ret = prof_lookup(&bt); \ + } else \ + ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ + } \ +} while (0) + +#ifndef JEMALLOC_ENABLE_INLINE +malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *) + +prof_tdata_t *prof_tdata_get(bool create); +void prof_sample_threshold_update(prof_tdata_t *prof_tdata); +prof_ctx_t *prof_ctx_get(const void *ptr); +void prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx); +bool prof_sample_accum_update(size_t size); +void prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt); +void prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt, + size_t old_usize, prof_ctx_t *old_ctx); +void prof_free(const void *ptr, size_t size); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) +/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */ +malloc_tsd_externs(prof_tdata, prof_tdata_t *) +malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL, + prof_tdata_cleanup) + +JEMALLOC_INLINE prof_tdata_t * +prof_tdata_get(bool create) +{ + prof_tdata_t *prof_tdata; + + cassert(config_prof); + + prof_tdata = *prof_tdata_tsd_get(); + if (create && prof_tdata == NULL) + prof_tdata = prof_tdata_init(); + + return (prof_tdata); +} + +JEMALLOC_INLINE void +prof_sample_threshold_update(prof_tdata_t *prof_tdata) +{ + /* + * The body of this function is compiled out unless heap profiling is + * enabled, so that it is possible to compile jemalloc with floating + * point support completely disabled. Avoiding floating point code is + * important on memory-constrained systems, but it also enables a + * workaround for versions of glibc that don't properly save/restore + * floating point registers during dynamic lazy symbol loading (which + * internally calls into whatever malloc implementation happens to be + * integrated into the application). Note that some compilers (e.g. + * gcc 4.8) may use floating point registers for fast memory moves, so + * jemalloc must be compiled with such optimizations disabled (e.g. + * -mno-sse) in order for the workaround to be complete. + */ +#ifdef JEMALLOC_PROF + uint64_t r; + double u; + + cassert(config_prof); + + /* + * Compute sample threshold as a geometrically distributed random + * variable with mean (2^opt_lg_prof_sample). + * + * __ __ + * | log(u) | 1 + * prof_tdata->threshold = | -------- |, where p = ------------------- + * | log(1-p) | opt_lg_prof_sample + * 2 + * + * For more information on the math, see: + * + * Non-Uniform Random Variate Generation + * Luc Devroye + * Springer-Verlag, New York, 1986 + * pp 500 + * (http://luc.devroye.org/rnbookindex.html) + */ + prng64(r, 53, prof_tdata->prng_state, + UINT64_C(6364136223846793005), UINT64_C(1442695040888963407)); + u = (double)r * (1.0/9007199254740992.0L); + prof_tdata->threshold = (uint64_t)(log(u) / + log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample)))) + + (uint64_t)1U; +#endif +} + +JEMALLOC_INLINE prof_ctx_t * +prof_ctx_get(const void *ptr) +{ + prof_ctx_t *ret; + arena_chunk_t *chunk; + + cassert(config_prof); + assert(ptr != NULL); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk != ptr) { + /* Region. */ + ret = arena_prof_ctx_get(ptr); + } else + ret = huge_prof_ctx_get(ptr); + + return (ret); +} + +JEMALLOC_INLINE void +prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx) +{ + arena_chunk_t *chunk; + + cassert(config_prof); + assert(ptr != NULL); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk != ptr) { + /* Region. */ + arena_prof_ctx_set(ptr, usize, ctx); + } else + huge_prof_ctx_set(ptr, ctx); +} + +JEMALLOC_INLINE bool +prof_sample_accum_update(size_t size) +{ + prof_tdata_t *prof_tdata; + + cassert(config_prof); + /* Sampling logic is unnecessary if the interval is 1. */ + assert(opt_lg_prof_sample != 0); + + prof_tdata = prof_tdata_get(false); + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + return (true); + + /* Take care to avoid integer overflow. */ + if (size >= prof_tdata->threshold - prof_tdata->accum) { + prof_tdata->accum -= (prof_tdata->threshold - size); + /* Compute new sample threshold. */ + prof_sample_threshold_update(prof_tdata); + while (prof_tdata->accum >= prof_tdata->threshold) { + prof_tdata->accum -= prof_tdata->threshold; + prof_sample_threshold_update(prof_tdata); + } + return (false); + } else { + prof_tdata->accum += size; + return (true); + } +} + +JEMALLOC_INLINE void +prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt) +{ + + cassert(config_prof); + assert(ptr != NULL); + assert(usize == isalloc(ptr, true)); + + if (opt_lg_prof_sample != 0) { + if (prof_sample_accum_update(usize)) { + /* + * Don't sample. For malloc()-like allocation, it is + * always possible to tell in advance how large an + * object's usable size will be, so there should never + * be a difference between the usize passed to + * PROF_ALLOC_PREP() and prof_malloc(). + */ + assert((uintptr_t)cnt == (uintptr_t)1U); + } + } + + if ((uintptr_t)cnt > (uintptr_t)1U) { + prof_ctx_set(ptr, usize, cnt->ctx); + + cnt->epoch++; + /*********/ + mb_write(); + /*********/ + cnt->cnts.curobjs++; + cnt->cnts.curbytes += usize; + if (opt_prof_accum) { + cnt->cnts.accumobjs++; + cnt->cnts.accumbytes += usize; + } + /*********/ + mb_write(); + /*********/ + cnt->epoch++; + /*********/ + mb_write(); + /*********/ + } else + prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U); +} + +JEMALLOC_INLINE void +prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt, + size_t old_usize, prof_ctx_t *old_ctx) +{ + prof_thr_cnt_t *told_cnt; + + cassert(config_prof); + assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U); + + if (ptr != NULL) { + assert(usize == isalloc(ptr, true)); + if (opt_lg_prof_sample != 0) { + if (prof_sample_accum_update(usize)) { + /* + * Don't sample. The usize passed to + * PROF_ALLOC_PREP() was larger than what + * actually got allocated, so a backtrace was + * captured for this allocation, even though + * its actual usize was insufficient to cross + * the sample threshold. + */ + cnt = (prof_thr_cnt_t *)(uintptr_t)1U; + } + } + } + + if ((uintptr_t)old_ctx > (uintptr_t)1U) { + told_cnt = prof_lookup(old_ctx->bt); + if (told_cnt == NULL) { + /* + * It's too late to propagate OOM for this realloc(), + * so operate directly on old_cnt->ctx->cnt_merged. + */ + malloc_mutex_lock(old_ctx->lock); + old_ctx->cnt_merged.curobjs--; + old_ctx->cnt_merged.curbytes -= old_usize; + malloc_mutex_unlock(old_ctx->lock); + told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; + } + } else + told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; + + if ((uintptr_t)told_cnt > (uintptr_t)1U) + told_cnt->epoch++; + if ((uintptr_t)cnt > (uintptr_t)1U) { + prof_ctx_set(ptr, usize, cnt->ctx); + cnt->epoch++; + } else if (ptr != NULL) + prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U); + /*********/ + mb_write(); + /*********/ + if ((uintptr_t)told_cnt > (uintptr_t)1U) { + told_cnt->cnts.curobjs--; + told_cnt->cnts.curbytes -= old_usize; + } + if ((uintptr_t)cnt > (uintptr_t)1U) { + cnt->cnts.curobjs++; + cnt->cnts.curbytes += usize; + if (opt_prof_accum) { + cnt->cnts.accumobjs++; + cnt->cnts.accumbytes += usize; + } + } + /*********/ + mb_write(); + /*********/ + if ((uintptr_t)told_cnt > (uintptr_t)1U) + told_cnt->epoch++; + if ((uintptr_t)cnt > (uintptr_t)1U) + cnt->epoch++; + /*********/ + mb_write(); /* Not strictly necessary. */ +} + +JEMALLOC_INLINE void +prof_free(const void *ptr, size_t size) +{ + prof_ctx_t *ctx = prof_ctx_get(ptr); + + cassert(config_prof); + + if ((uintptr_t)ctx > (uintptr_t)1) { + prof_thr_cnt_t *tcnt; + assert(size == isalloc(ptr, true)); + tcnt = prof_lookup(ctx->bt); + + if (tcnt != NULL) { + tcnt->epoch++; + /*********/ + mb_write(); + /*********/ + tcnt->cnts.curobjs--; + tcnt->cnts.curbytes -= size; + /*********/ + mb_write(); + /*********/ + tcnt->epoch++; + /*********/ + mb_write(); + /*********/ + } else { + /* + * OOM during free() cannot be propagated, so operate + * directly on cnt->ctx->cnt_merged. + */ + malloc_mutex_lock(ctx->lock); + ctx->cnt_merged.curobjs--; + ctx->cnt_merged.curbytes -= size; + malloc_mutex_unlock(ctx->lock); + } + } +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/ql.h b/deps/jemalloc/include/jemalloc/internal/ql.h new file mode 100644 index 0000000000..f70c5f6f39 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/ql.h @@ -0,0 +1,83 @@ +/* + * List definitions. + */ +#define ql_head(a_type) \ +struct { \ + a_type *qlh_first; \ +} + +#define ql_head_initializer(a_head) {NULL} + +#define ql_elm(a_type) qr(a_type) + +/* List functions. */ +#define ql_new(a_head) do { \ + (a_head)->qlh_first = NULL; \ +} while (0) + +#define ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field) + +#define ql_first(a_head) ((a_head)->qlh_first) + +#define ql_last(a_head, a_field) \ + ((ql_first(a_head) != NULL) \ + ? qr_prev(ql_first(a_head), a_field) : NULL) + +#define ql_next(a_head, a_elm, a_field) \ + ((ql_last(a_head, a_field) != (a_elm)) \ + ? qr_next((a_elm), a_field) : NULL) + +#define ql_prev(a_head, a_elm, a_field) \ + ((ql_first(a_head) != (a_elm)) ? qr_prev((a_elm), a_field) \ + : NULL) + +#define ql_before_insert(a_head, a_qlelm, a_elm, a_field) do { \ + qr_before_insert((a_qlelm), (a_elm), a_field); \ + if (ql_first(a_head) == (a_qlelm)) { \ + ql_first(a_head) = (a_elm); \ + } \ +} while (0) + +#define ql_after_insert(a_qlelm, a_elm, a_field) \ + qr_after_insert((a_qlelm), (a_elm), a_field) + +#define ql_head_insert(a_head, a_elm, a_field) do { \ + if (ql_first(a_head) != NULL) { \ + qr_before_insert(ql_first(a_head), (a_elm), a_field); \ + } \ + ql_first(a_head) = (a_elm); \ +} while (0) + +#define ql_tail_insert(a_head, a_elm, a_field) do { \ + if (ql_first(a_head) != NULL) { \ + qr_before_insert(ql_first(a_head), (a_elm), a_field); \ + } \ + ql_first(a_head) = qr_next((a_elm), a_field); \ +} while (0) + +#define ql_remove(a_head, a_elm, a_field) do { \ + if (ql_first(a_head) == (a_elm)) { \ + ql_first(a_head) = qr_next(ql_first(a_head), a_field); \ + } \ + if (ql_first(a_head) != (a_elm)) { \ + qr_remove((a_elm), a_field); \ + } else { \ + ql_first(a_head) = NULL; \ + } \ +} while (0) + +#define ql_head_remove(a_head, a_type, a_field) do { \ + a_type *t = ql_first(a_head); \ + ql_remove((a_head), t, a_field); \ +} while (0) + +#define ql_tail_remove(a_head, a_type, a_field) do { \ + a_type *t = ql_last(a_head, a_field); \ + ql_remove((a_head), t, a_field); \ +} while (0) + +#define ql_foreach(a_var, a_head, a_field) \ + qr_foreach((a_var), ql_first(a_head), a_field) + +#define ql_reverse_foreach(a_var, a_head, a_field) \ + qr_reverse_foreach((a_var), ql_first(a_head), a_field) diff --git a/deps/jemalloc/include/jemalloc/internal/qr.h b/deps/jemalloc/include/jemalloc/internal/qr.h new file mode 100644 index 0000000000..602944b9b4 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/qr.h @@ -0,0 +1,67 @@ +/* Ring definitions. */ +#define qr(a_type) \ +struct { \ + a_type *qre_next; \ + a_type *qre_prev; \ +} + +/* Ring functions. */ +#define qr_new(a_qr, a_field) do { \ + (a_qr)->a_field.qre_next = (a_qr); \ + (a_qr)->a_field.qre_prev = (a_qr); \ +} while (0) + +#define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next) + +#define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev) + +#define qr_before_insert(a_qrelm, a_qr, a_field) do { \ + (a_qr)->a_field.qre_prev = (a_qrelm)->a_field.qre_prev; \ + (a_qr)->a_field.qre_next = (a_qrelm); \ + (a_qr)->a_field.qre_prev->a_field.qre_next = (a_qr); \ + (a_qrelm)->a_field.qre_prev = (a_qr); \ +} while (0) + +#define qr_after_insert(a_qrelm, a_qr, a_field) \ + do \ + { \ + (a_qr)->a_field.qre_next = (a_qrelm)->a_field.qre_next; \ + (a_qr)->a_field.qre_prev = (a_qrelm); \ + (a_qr)->a_field.qre_next->a_field.qre_prev = (a_qr); \ + (a_qrelm)->a_field.qre_next = (a_qr); \ + } while (0) + +#define qr_meld(a_qr_a, a_qr_b, a_field) do { \ + void *t; \ + (a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_b); \ + (a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_a); \ + t = (a_qr_a)->a_field.qre_prev; \ + (a_qr_a)->a_field.qre_prev = (a_qr_b)->a_field.qre_prev; \ + (a_qr_b)->a_field.qre_prev = t; \ +} while (0) + +/* qr_meld() and qr_split() are functionally equivalent, so there's no need to + * have two copies of the code. */ +#define qr_split(a_qr_a, a_qr_b, a_field) \ + qr_meld((a_qr_a), (a_qr_b), a_field) + +#define qr_remove(a_qr, a_field) do { \ + (a_qr)->a_field.qre_prev->a_field.qre_next \ + = (a_qr)->a_field.qre_next; \ + (a_qr)->a_field.qre_next->a_field.qre_prev \ + = (a_qr)->a_field.qre_prev; \ + (a_qr)->a_field.qre_next = (a_qr); \ + (a_qr)->a_field.qre_prev = (a_qr); \ +} while (0) + +#define qr_foreach(var, a_qr, a_field) \ + for ((var) = (a_qr); \ + (var) != NULL; \ + (var) = (((var)->a_field.qre_next != (a_qr)) \ + ? (var)->a_field.qre_next : NULL)) + +#define qr_reverse_foreach(var, a_qr, a_field) \ + for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL; \ + (var) != NULL; \ + (var) = (((var) != (a_qr)) \ + ? (var)->a_field.qre_prev : NULL)) diff --git a/deps/jemalloc/include/jemalloc/internal/quarantine.h b/deps/jemalloc/include/jemalloc/internal/quarantine.h new file mode 100644 index 0000000000..16f677f73d --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/quarantine.h @@ -0,0 +1,67 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct quarantine_obj_s quarantine_obj_t; +typedef struct quarantine_s quarantine_t; + +/* Default per thread quarantine size if valgrind is enabled. */ +#define JEMALLOC_VALGRIND_QUARANTINE_DEFAULT (ZU(1) << 24) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct quarantine_obj_s { + void *ptr; + size_t usize; +}; + +struct quarantine_s { + size_t curbytes; + size_t curobjs; + size_t first; +#define LG_MAXOBJS_INIT 10 + size_t lg_maxobjs; + quarantine_obj_t objs[1]; /* Dynamically sized ring buffer. */ +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +quarantine_t *quarantine_init(size_t lg_maxobjs); +void quarantine(void *ptr); +void quarantine_cleanup(void *arg); +bool quarantine_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +malloc_tsd_protos(JEMALLOC_ATTR(unused), quarantine, quarantine_t *) + +void quarantine_alloc_hook(void); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_QUARANTINE_C_)) +malloc_tsd_externs(quarantine, quarantine_t *) +malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, quarantine, quarantine_t *, NULL, + quarantine_cleanup) + +JEMALLOC_ALWAYS_INLINE void +quarantine_alloc_hook(void) +{ + quarantine_t *quarantine; + + assert(config_fill && opt_quarantine); + + quarantine = *quarantine_tsd_get(); + if (quarantine == NULL) + quarantine_init(LG_MAXOBJS_INIT); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ + diff --git a/deps/jemalloc/include/jemalloc/internal/rb.h b/deps/jemalloc/include/jemalloc/internal/rb.h new file mode 100644 index 0000000000..423802eb2d --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/rb.h @@ -0,0 +1,969 @@ +/*- + ******************************************************************************* + * + * cpp macro implementation of left-leaning 2-3 red-black trees. Parent + * pointers are not used, and color bits are stored in the least significant + * bit of right-child pointers (if RB_COMPACT is defined), thus making node + * linkage as compact as is possible for red-black trees. + * + * Usage: + * + * #include <stdint.h> + * #include <stdbool.h> + * #define NDEBUG // (Optional, see assert(3).) + * #include <assert.h> + * #define RB_COMPACT // (Optional, embed color bits in right-child pointers.) + * #include <rb.h> + * ... + * + ******************************************************************************* + */ + +#ifndef RB_H_ +#define RB_H_ + +#ifdef RB_COMPACT +/* Node structure. */ +#define rb_node(a_type) \ +struct { \ + a_type *rbn_left; \ + a_type *rbn_right_red; \ +} +#else +#define rb_node(a_type) \ +struct { \ + a_type *rbn_left; \ + a_type *rbn_right; \ + bool rbn_red; \ +} +#endif + +/* Root structure. */ +#define rb_tree(a_type) \ +struct { \ + a_type *rbt_root; \ + a_type rbt_nil; \ +} + +/* Left accessors. */ +#define rbtn_left_get(a_type, a_field, a_node) \ + ((a_node)->a_field.rbn_left) +#define rbtn_left_set(a_type, a_field, a_node, a_left) do { \ + (a_node)->a_field.rbn_left = a_left; \ +} while (0) + +#ifdef RB_COMPACT +/* Right accessors. */ +#define rbtn_right_get(a_type, a_field, a_node) \ + ((a_type *) (((intptr_t) (a_node)->a_field.rbn_right_red) \ + & ((ssize_t)-2))) +#define rbtn_right_set(a_type, a_field, a_node, a_right) do { \ + (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t) a_right) \ + | (((uintptr_t) (a_node)->a_field.rbn_right_red) & ((size_t)1))); \ +} while (0) + +/* Color accessors. */ +#define rbtn_red_get(a_type, a_field, a_node) \ + ((bool) (((uintptr_t) (a_node)->a_field.rbn_right_red) \ + & ((size_t)1))) +#define rbtn_color_set(a_type, a_field, a_node, a_red) do { \ + (a_node)->a_field.rbn_right_red = (a_type *) ((((intptr_t) \ + (a_node)->a_field.rbn_right_red) & ((ssize_t)-2)) \ + | ((ssize_t)a_red)); \ +} while (0) +#define rbtn_red_set(a_type, a_field, a_node) do { \ + (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t) \ + (a_node)->a_field.rbn_right_red) | ((size_t)1)); \ +} while (0) +#define rbtn_black_set(a_type, a_field, a_node) do { \ + (a_node)->a_field.rbn_right_red = (a_type *) (((intptr_t) \ + (a_node)->a_field.rbn_right_red) & ((ssize_t)-2)); \ +} while (0) +#else +/* Right accessors. */ +#define rbtn_right_get(a_type, a_field, a_node) \ + ((a_node)->a_field.rbn_right) +#define rbtn_right_set(a_type, a_field, a_node, a_right) do { \ + (a_node)->a_field.rbn_right = a_right; \ +} while (0) + +/* Color accessors. */ +#define rbtn_red_get(a_type, a_field, a_node) \ + ((a_node)->a_field.rbn_red) +#define rbtn_color_set(a_type, a_field, a_node, a_red) do { \ + (a_node)->a_field.rbn_red = (a_red); \ +} while (0) +#define rbtn_red_set(a_type, a_field, a_node) do { \ + (a_node)->a_field.rbn_red = true; \ +} while (0) +#define rbtn_black_set(a_type, a_field, a_node) do { \ + (a_node)->a_field.rbn_red = false; \ +} while (0) +#endif + +/* Node initializer. */ +#define rbt_node_new(a_type, a_field, a_rbt, a_node) do { \ + rbtn_left_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \ + rbtn_right_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \ + rbtn_red_set(a_type, a_field, (a_node)); \ +} while (0) + +/* Tree initializer. */ +#define rb_new(a_type, a_field, a_rbt) do { \ + (a_rbt)->rbt_root = &(a_rbt)->rbt_nil; \ + rbt_node_new(a_type, a_field, a_rbt, &(a_rbt)->rbt_nil); \ + rbtn_black_set(a_type, a_field, &(a_rbt)->rbt_nil); \ +} while (0) + +/* Internal utility macros. */ +#define rbtn_first(a_type, a_field, a_rbt, a_root, r_node) do { \ + (r_node) = (a_root); \ + if ((r_node) != &(a_rbt)->rbt_nil) { \ + for (; \ + rbtn_left_get(a_type, a_field, (r_node)) != &(a_rbt)->rbt_nil;\ + (r_node) = rbtn_left_get(a_type, a_field, (r_node))) { \ + } \ + } \ +} while (0) + +#define rbtn_last(a_type, a_field, a_rbt, a_root, r_node) do { \ + (r_node) = (a_root); \ + if ((r_node) != &(a_rbt)->rbt_nil) { \ + for (; rbtn_right_get(a_type, a_field, (r_node)) != \ + &(a_rbt)->rbt_nil; (r_node) = rbtn_right_get(a_type, a_field, \ + (r_node))) { \ + } \ + } \ +} while (0) + +#define rbtn_rotate_left(a_type, a_field, a_node, r_node) do { \ + (r_node) = rbtn_right_get(a_type, a_field, (a_node)); \ + rbtn_right_set(a_type, a_field, (a_node), \ + rbtn_left_get(a_type, a_field, (r_node))); \ + rbtn_left_set(a_type, a_field, (r_node), (a_node)); \ +} while (0) + +#define rbtn_rotate_right(a_type, a_field, a_node, r_node) do { \ + (r_node) = rbtn_left_get(a_type, a_field, (a_node)); \ + rbtn_left_set(a_type, a_field, (a_node), \ + rbtn_right_get(a_type, a_field, (r_node))); \ + rbtn_right_set(a_type, a_field, (r_node), (a_node)); \ +} while (0) + +/* + * The rb_proto() macro generates function prototypes that correspond to the + * functions generated by an equivalently parameterized call to rb_gen(). + */ + +#define rb_proto(a_attr, a_prefix, a_rbt_type, a_type) \ +a_attr void \ +a_prefix##new(a_rbt_type *rbtree); \ +a_attr a_type * \ +a_prefix##first(a_rbt_type *rbtree); \ +a_attr a_type * \ +a_prefix##last(a_rbt_type *rbtree); \ +a_attr a_type * \ +a_prefix##next(a_rbt_type *rbtree, a_type *node); \ +a_attr a_type * \ +a_prefix##prev(a_rbt_type *rbtree, a_type *node); \ +a_attr a_type * \ +a_prefix##search(a_rbt_type *rbtree, a_type *key); \ +a_attr a_type * \ +a_prefix##nsearch(a_rbt_type *rbtree, a_type *key); \ +a_attr a_type * \ +a_prefix##psearch(a_rbt_type *rbtree, a_type *key); \ +a_attr void \ +a_prefix##insert(a_rbt_type *rbtree, a_type *node); \ +a_attr void \ +a_prefix##remove(a_rbt_type *rbtree, a_type *node); \ +a_attr a_type * \ +a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)( \ + a_rbt_type *, a_type *, void *), void *arg); \ +a_attr a_type * \ +a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ + a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg); + +/* + * The rb_gen() macro generates a type-specific red-black tree implementation, + * based on the above cpp macros. + * + * Arguments: + * + * a_attr : Function attribute for generated functions (ex: static). + * a_prefix : Prefix for generated functions (ex: ex_). + * a_rb_type : Type for red-black tree data structure (ex: ex_t). + * a_type : Type for red-black tree node data structure (ex: ex_node_t). + * a_field : Name of red-black tree node linkage (ex: ex_link). + * a_cmp : Node comparison function name, with the following prototype: + * int (a_cmp *)(a_type *a_node, a_type *a_other); + * ^^^^^^ + * or a_key + * Interpretation of comparision function return values: + * -1 : a_node < a_other + * 0 : a_node == a_other + * 1 : a_node > a_other + * In all cases, the a_node or a_key macro argument is the first + * argument to the comparison function, which makes it possible + * to write comparison functions that treat the first argument + * specially. + * + * Assuming the following setup: + * + * typedef struct ex_node_s ex_node_t; + * struct ex_node_s { + * rb_node(ex_node_t) ex_link; + * }; + * typedef rb_tree(ex_node_t) ex_t; + * rb_gen(static, ex_, ex_t, ex_node_t, ex_link, ex_cmp) + * + * The following API is generated: + * + * static void + * ex_new(ex_t *tree); + * Description: Initialize a red-black tree structure. + * Args: + * tree: Pointer to an uninitialized red-black tree object. + * + * static ex_node_t * + * ex_first(ex_t *tree); + * static ex_node_t * + * ex_last(ex_t *tree); + * Description: Get the first/last node in tree. + * Args: + * tree: Pointer to an initialized red-black tree object. + * Ret: First/last node in tree, or NULL if tree is empty. + * + * static ex_node_t * + * ex_next(ex_t *tree, ex_node_t *node); + * static ex_node_t * + * ex_prev(ex_t *tree, ex_node_t *node); + * Description: Get node's successor/predecessor. + * Args: + * tree: Pointer to an initialized red-black tree object. + * node: A node in tree. + * Ret: node's successor/predecessor in tree, or NULL if node is + * last/first. + * + * static ex_node_t * + * ex_search(ex_t *tree, ex_node_t *key); + * Description: Search for node that matches key. + * Args: + * tree: Pointer to an initialized red-black tree object. + * key : Search key. + * Ret: Node in tree that matches key, or NULL if no match. + * + * static ex_node_t * + * ex_nsearch(ex_t *tree, ex_node_t *key); + * static ex_node_t * + * ex_psearch(ex_t *tree, ex_node_t *key); + * Description: Search for node that matches key. If no match is found, + * return what would be key's successor/predecessor, were + * key in tree. + * Args: + * tree: Pointer to an initialized red-black tree object. + * key : Search key. + * Ret: Node in tree that matches key, or if no match, hypothetical node's + * successor/predecessor (NULL if no successor/predecessor). + * + * static void + * ex_insert(ex_t *tree, ex_node_t *node); + * Description: Insert node into tree. + * Args: + * tree: Pointer to an initialized red-black tree object. + * node: Node to be inserted into tree. + * + * static void + * ex_remove(ex_t *tree, ex_node_t *node); + * Description: Remove node from tree. + * Args: + * tree: Pointer to an initialized red-black tree object. + * node: Node in tree to be removed. + * + * static ex_node_t * + * ex_iter(ex_t *tree, ex_node_t *start, ex_node_t *(*cb)(ex_t *, + * ex_node_t *, void *), void *arg); + * static ex_node_t * + * ex_reverse_iter(ex_t *tree, ex_node_t *start, ex_node *(*cb)(ex_t *, + * ex_node_t *, void *), void *arg); + * Description: Iterate forward/backward over tree, starting at node. If + * tree is modified, iteration must be immediately + * terminated by the callback function that causes the + * modification. + * Args: + * tree : Pointer to an initialized red-black tree object. + * start: Node at which to start iteration, or NULL to start at + * first/last node. + * cb : Callback function, which is called for each node during + * iteration. Under normal circumstances the callback function + * should return NULL, which causes iteration to continue. If a + * callback function returns non-NULL, iteration is immediately + * terminated and the non-NULL return value is returned by the + * iterator. This is useful for re-starting iteration after + * modifying tree. + * arg : Opaque pointer passed to cb(). + * Ret: NULL if iteration completed, or the non-NULL callback return value + * that caused termination of the iteration. + */ +#define rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp) \ +a_attr void \ +a_prefix##new(a_rbt_type *rbtree) { \ + rb_new(a_type, a_field, rbtree); \ +} \ +a_attr a_type * \ +a_prefix##first(a_rbt_type *rbtree) { \ + a_type *ret; \ + rbtn_first(a_type, a_field, rbtree, rbtree->rbt_root, ret); \ + if (ret == &rbtree->rbt_nil) { \ + ret = NULL; \ + } \ + return (ret); \ +} \ +a_attr a_type * \ +a_prefix##last(a_rbt_type *rbtree) { \ + a_type *ret; \ + rbtn_last(a_type, a_field, rbtree, rbtree->rbt_root, ret); \ + if (ret == &rbtree->rbt_nil) { \ + ret = NULL; \ + } \ + return (ret); \ +} \ +a_attr a_type * \ +a_prefix##next(a_rbt_type *rbtree, a_type *node) { \ + a_type *ret; \ + if (rbtn_right_get(a_type, a_field, node) != &rbtree->rbt_nil) { \ + rbtn_first(a_type, a_field, rbtree, rbtn_right_get(a_type, \ + a_field, node), ret); \ + } else { \ + a_type *tnode = rbtree->rbt_root; \ + assert(tnode != &rbtree->rbt_nil); \ + ret = &rbtree->rbt_nil; \ + while (true) { \ + int cmp = (a_cmp)(node, tnode); \ + if (cmp < 0) { \ + ret = tnode; \ + tnode = rbtn_left_get(a_type, a_field, tnode); \ + } else if (cmp > 0) { \ + tnode = rbtn_right_get(a_type, a_field, tnode); \ + } else { \ + break; \ + } \ + assert(tnode != &rbtree->rbt_nil); \ + } \ + } \ + if (ret == &rbtree->rbt_nil) { \ + ret = (NULL); \ + } \ + return (ret); \ +} \ +a_attr a_type * \ +a_prefix##prev(a_rbt_type *rbtree, a_type *node) { \ + a_type *ret; \ + if (rbtn_left_get(a_type, a_field, node) != &rbtree->rbt_nil) { \ + rbtn_last(a_type, a_field, rbtree, rbtn_left_get(a_type, \ + a_field, node), ret); \ + } else { \ + a_type *tnode = rbtree->rbt_root; \ + assert(tnode != &rbtree->rbt_nil); \ + ret = &rbtree->rbt_nil; \ + while (true) { \ + int cmp = (a_cmp)(node, tnode); \ + if (cmp < 0) { \ + tnode = rbtn_left_get(a_type, a_field, tnode); \ + } else if (cmp > 0) { \ + ret = tnode; \ + tnode = rbtn_right_get(a_type, a_field, tnode); \ + } else { \ + break; \ + } \ + assert(tnode != &rbtree->rbt_nil); \ + } \ + } \ + if (ret == &rbtree->rbt_nil) { \ + ret = (NULL); \ + } \ + return (ret); \ +} \ +a_attr a_type * \ +a_prefix##search(a_rbt_type *rbtree, a_type *key) { \ + a_type *ret; \ + int cmp; \ + ret = rbtree->rbt_root; \ + while (ret != &rbtree->rbt_nil \ + && (cmp = (a_cmp)(key, ret)) != 0) { \ + if (cmp < 0) { \ + ret = rbtn_left_get(a_type, a_field, ret); \ + } else { \ + ret = rbtn_right_get(a_type, a_field, ret); \ + } \ + } \ + if (ret == &rbtree->rbt_nil) { \ + ret = (NULL); \ + } \ + return (ret); \ +} \ +a_attr a_type * \ +a_prefix##nsearch(a_rbt_type *rbtree, a_type *key) { \ + a_type *ret; \ + a_type *tnode = rbtree->rbt_root; \ + ret = &rbtree->rbt_nil; \ + while (tnode != &rbtree->rbt_nil) { \ + int cmp = (a_cmp)(key, tnode); \ + if (cmp < 0) { \ + ret = tnode; \ + tnode = rbtn_left_get(a_type, a_field, tnode); \ + } else if (cmp > 0) { \ + tnode = rbtn_right_get(a_type, a_field, tnode); \ + } else { \ + ret = tnode; \ + break; \ + } \ + } \ + if (ret == &rbtree->rbt_nil) { \ + ret = (NULL); \ + } \ + return (ret); \ +} \ +a_attr a_type * \ +a_prefix##psearch(a_rbt_type *rbtree, a_type *key) { \ + a_type *ret; \ + a_type *tnode = rbtree->rbt_root; \ + ret = &rbtree->rbt_nil; \ + while (tnode != &rbtree->rbt_nil) { \ + int cmp = (a_cmp)(key, tnode); \ + if (cmp < 0) { \ + tnode = rbtn_left_get(a_type, a_field, tnode); \ + } else if (cmp > 0) { \ + ret = tnode; \ + tnode = rbtn_right_get(a_type, a_field, tnode); \ + } else { \ + ret = tnode; \ + break; \ + } \ + } \ + if (ret == &rbtree->rbt_nil) { \ + ret = (NULL); \ + } \ + return (ret); \ +} \ +a_attr void \ +a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \ + struct { \ + a_type *node; \ + int cmp; \ + } path[sizeof(void *) << 4], *pathp; \ + rbt_node_new(a_type, a_field, rbtree, node); \ + /* Wind. */ \ + path->node = rbtree->rbt_root; \ + for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) { \ + int cmp = pathp->cmp = a_cmp(node, pathp->node); \ + assert(cmp != 0); \ + if (cmp < 0) { \ + pathp[1].node = rbtn_left_get(a_type, a_field, \ + pathp->node); \ + } else { \ + pathp[1].node = rbtn_right_get(a_type, a_field, \ + pathp->node); \ + } \ + } \ + pathp->node = node; \ + /* Unwind. */ \ + for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) { \ + a_type *cnode = pathp->node; \ + if (pathp->cmp < 0) { \ + a_type *left = pathp[1].node; \ + rbtn_left_set(a_type, a_field, cnode, left); \ + if (rbtn_red_get(a_type, a_field, left)) { \ + a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ + if (rbtn_red_get(a_type, a_field, leftleft)) { \ + /* Fix up 4-node. */ \ + a_type *tnode; \ + rbtn_black_set(a_type, a_field, leftleft); \ + rbtn_rotate_right(a_type, a_field, cnode, tnode); \ + cnode = tnode; \ + } \ + } else { \ + return; \ + } \ + } else { \ + a_type *right = pathp[1].node; \ + rbtn_right_set(a_type, a_field, cnode, right); \ + if (rbtn_red_get(a_type, a_field, right)) { \ + a_type *left = rbtn_left_get(a_type, a_field, cnode); \ + if (rbtn_red_get(a_type, a_field, left)) { \ + /* Split 4-node. */ \ + rbtn_black_set(a_type, a_field, left); \ + rbtn_black_set(a_type, a_field, right); \ + rbtn_red_set(a_type, a_field, cnode); \ + } else { \ + /* Lean left. */ \ + a_type *tnode; \ + bool tred = rbtn_red_get(a_type, a_field, cnode); \ + rbtn_rotate_left(a_type, a_field, cnode, tnode); \ + rbtn_color_set(a_type, a_field, tnode, tred); \ + rbtn_red_set(a_type, a_field, cnode); \ + cnode = tnode; \ + } \ + } else { \ + return; \ + } \ + } \ + pathp->node = cnode; \ + } \ + /* Set root, and make it black. */ \ + rbtree->rbt_root = path->node; \ + rbtn_black_set(a_type, a_field, rbtree->rbt_root); \ +} \ +a_attr void \ +a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ + struct { \ + a_type *node; \ + int cmp; \ + } *pathp, *nodep, path[sizeof(void *) << 4]; \ + /* Wind. */ \ + nodep = NULL; /* Silence compiler warning. */ \ + path->node = rbtree->rbt_root; \ + for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) { \ + int cmp = pathp->cmp = a_cmp(node, pathp->node); \ + if (cmp < 0) { \ + pathp[1].node = rbtn_left_get(a_type, a_field, \ + pathp->node); \ + } else { \ + pathp[1].node = rbtn_right_get(a_type, a_field, \ + pathp->node); \ + if (cmp == 0) { \ + /* Find node's successor, in preparation for swap. */ \ + pathp->cmp = 1; \ + nodep = pathp; \ + for (pathp++; pathp->node != &rbtree->rbt_nil; \ + pathp++) { \ + pathp->cmp = -1; \ + pathp[1].node = rbtn_left_get(a_type, a_field, \ + pathp->node); \ + } \ + break; \ + } \ + } \ + } \ + assert(nodep->node == node); \ + pathp--; \ + if (pathp->node != node) { \ + /* Swap node with its successor. */ \ + bool tred = rbtn_red_get(a_type, a_field, pathp->node); \ + rbtn_color_set(a_type, a_field, pathp->node, \ + rbtn_red_get(a_type, a_field, node)); \ + rbtn_left_set(a_type, a_field, pathp->node, \ + rbtn_left_get(a_type, a_field, node)); \ + /* If node's successor is its right child, the following code */\ + /* will do the wrong thing for the right child pointer. */\ + /* However, it doesn't matter, because the pointer will be */\ + /* properly set when the successor is pruned. */\ + rbtn_right_set(a_type, a_field, pathp->node, \ + rbtn_right_get(a_type, a_field, node)); \ + rbtn_color_set(a_type, a_field, node, tred); \ + /* The pruned leaf node's child pointers are never accessed */\ + /* again, so don't bother setting them to nil. */\ + nodep->node = pathp->node; \ + pathp->node = node; \ + if (nodep == path) { \ + rbtree->rbt_root = nodep->node; \ + } else { \ + if (nodep[-1].cmp < 0) { \ + rbtn_left_set(a_type, a_field, nodep[-1].node, \ + nodep->node); \ + } else { \ + rbtn_right_set(a_type, a_field, nodep[-1].node, \ + nodep->node); \ + } \ + } \ + } else { \ + a_type *left = rbtn_left_get(a_type, a_field, node); \ + if (left != &rbtree->rbt_nil) { \ + /* node has no successor, but it has a left child. */\ + /* Splice node out, without losing the left child. */\ + assert(rbtn_red_get(a_type, a_field, node) == false); \ + assert(rbtn_red_get(a_type, a_field, left)); \ + rbtn_black_set(a_type, a_field, left); \ + if (pathp == path) { \ + rbtree->rbt_root = left; \ + } else { \ + if (pathp[-1].cmp < 0) { \ + rbtn_left_set(a_type, a_field, pathp[-1].node, \ + left); \ + } else { \ + rbtn_right_set(a_type, a_field, pathp[-1].node, \ + left); \ + } \ + } \ + return; \ + } else if (pathp == path) { \ + /* The tree only contained one node. */ \ + rbtree->rbt_root = &rbtree->rbt_nil; \ + return; \ + } \ + } \ + if (rbtn_red_get(a_type, a_field, pathp->node)) { \ + /* Prune red node, which requires no fixup. */ \ + assert(pathp[-1].cmp < 0); \ + rbtn_left_set(a_type, a_field, pathp[-1].node, \ + &rbtree->rbt_nil); \ + return; \ + } \ + /* The node to be pruned is black, so unwind until balance is */\ + /* restored. */\ + pathp->node = &rbtree->rbt_nil; \ + for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) { \ + assert(pathp->cmp != 0); \ + if (pathp->cmp < 0) { \ + rbtn_left_set(a_type, a_field, pathp->node, \ + pathp[1].node); \ + assert(rbtn_red_get(a_type, a_field, pathp[1].node) \ + == false); \ + if (rbtn_red_get(a_type, a_field, pathp->node)) { \ + a_type *right = rbtn_right_get(a_type, a_field, \ + pathp->node); \ + a_type *rightleft = rbtn_left_get(a_type, a_field, \ + right); \ + a_type *tnode; \ + if (rbtn_red_get(a_type, a_field, rightleft)) { \ + /* In the following diagrams, ||, //, and \\ */\ + /* indicate the path to the removed node. */\ + /* */\ + /* || */\ + /* pathp(r) */\ + /* // \ */\ + /* (b) (b) */\ + /* / */\ + /* (r) */\ + /* */\ + rbtn_black_set(a_type, a_field, pathp->node); \ + rbtn_rotate_right(a_type, a_field, right, tnode); \ + rbtn_right_set(a_type, a_field, pathp->node, tnode);\ + rbtn_rotate_left(a_type, a_field, pathp->node, \ + tnode); \ + } else { \ + /* || */\ + /* pathp(r) */\ + /* // \ */\ + /* (b) (b) */\ + /* / */\ + /* (b) */\ + /* */\ + rbtn_rotate_left(a_type, a_field, pathp->node, \ + tnode); \ + } \ + /* Balance restored, but rotation modified subtree */\ + /* root. */\ + assert((uintptr_t)pathp > (uintptr_t)path); \ + if (pathp[-1].cmp < 0) { \ + rbtn_left_set(a_type, a_field, pathp[-1].node, \ + tnode); \ + } else { \ + rbtn_right_set(a_type, a_field, pathp[-1].node, \ + tnode); \ + } \ + return; \ + } else { \ + a_type *right = rbtn_right_get(a_type, a_field, \ + pathp->node); \ + a_type *rightleft = rbtn_left_get(a_type, a_field, \ + right); \ + if (rbtn_red_get(a_type, a_field, rightleft)) { \ + /* || */\ + /* pathp(b) */\ + /* // \ */\ + /* (b) (b) */\ + /* / */\ + /* (r) */\ + a_type *tnode; \ + rbtn_black_set(a_type, a_field, rightleft); \ + rbtn_rotate_right(a_type, a_field, right, tnode); \ + rbtn_right_set(a_type, a_field, pathp->node, tnode);\ + rbtn_rotate_left(a_type, a_field, pathp->node, \ + tnode); \ + /* Balance restored, but rotation modified */\ + /* subree root, which may actually be the tree */\ + /* root. */\ + if (pathp == path) { \ + /* Set root. */ \ + rbtree->rbt_root = tnode; \ + } else { \ + if (pathp[-1].cmp < 0) { \ + rbtn_left_set(a_type, a_field, \ + pathp[-1].node, tnode); \ + } else { \ + rbtn_right_set(a_type, a_field, \ + pathp[-1].node, tnode); \ + } \ + } \ + return; \ + } else { \ + /* || */\ + /* pathp(b) */\ + /* // \ */\ + /* (b) (b) */\ + /* / */\ + /* (b) */\ + a_type *tnode; \ + rbtn_red_set(a_type, a_field, pathp->node); \ + rbtn_rotate_left(a_type, a_field, pathp->node, \ + tnode); \ + pathp->node = tnode; \ + } \ + } \ + } else { \ + a_type *left; \ + rbtn_right_set(a_type, a_field, pathp->node, \ + pathp[1].node); \ + left = rbtn_left_get(a_type, a_field, pathp->node); \ + if (rbtn_red_get(a_type, a_field, left)) { \ + a_type *tnode; \ + a_type *leftright = rbtn_right_get(a_type, a_field, \ + left); \ + a_type *leftrightleft = rbtn_left_get(a_type, a_field, \ + leftright); \ + if (rbtn_red_get(a_type, a_field, leftrightleft)) { \ + /* || */\ + /* pathp(b) */\ + /* / \\ */\ + /* (r) (b) */\ + /* \ */\ + /* (b) */\ + /* / */\ + /* (r) */\ + a_type *unode; \ + rbtn_black_set(a_type, a_field, leftrightleft); \ + rbtn_rotate_right(a_type, a_field, pathp->node, \ + unode); \ + rbtn_rotate_right(a_type, a_field, pathp->node, \ + tnode); \ + rbtn_right_set(a_type, a_field, unode, tnode); \ + rbtn_rotate_left(a_type, a_field, unode, tnode); \ + } else { \ + /* || */\ + /* pathp(b) */\ + /* / \\ */\ + /* (r) (b) */\ + /* \ */\ + /* (b) */\ + /* / */\ + /* (b) */\ + assert(leftright != &rbtree->rbt_nil); \ + rbtn_red_set(a_type, a_field, leftright); \ + rbtn_rotate_right(a_type, a_field, pathp->node, \ + tnode); \ + rbtn_black_set(a_type, a_field, tnode); \ + } \ + /* Balance restored, but rotation modified subtree */\ + /* root, which may actually be the tree root. */\ + if (pathp == path) { \ + /* Set root. */ \ + rbtree->rbt_root = tnode; \ + } else { \ + if (pathp[-1].cmp < 0) { \ + rbtn_left_set(a_type, a_field, pathp[-1].node, \ + tnode); \ + } else { \ + rbtn_right_set(a_type, a_field, pathp[-1].node, \ + tnode); \ + } \ + } \ + return; \ + } else if (rbtn_red_get(a_type, a_field, pathp->node)) { \ + a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ + if (rbtn_red_get(a_type, a_field, leftleft)) { \ + /* || */\ + /* pathp(r) */\ + /* / \\ */\ + /* (b) (b) */\ + /* / */\ + /* (r) */\ + a_type *tnode; \ + rbtn_black_set(a_type, a_field, pathp->node); \ + rbtn_red_set(a_type, a_field, left); \ + rbtn_black_set(a_type, a_field, leftleft); \ + rbtn_rotate_right(a_type, a_field, pathp->node, \ + tnode); \ + /* Balance restored, but rotation modified */\ + /* subtree root. */\ + assert((uintptr_t)pathp > (uintptr_t)path); \ + if (pathp[-1].cmp < 0) { \ + rbtn_left_set(a_type, a_field, pathp[-1].node, \ + tnode); \ + } else { \ + rbtn_right_set(a_type, a_field, pathp[-1].node, \ + tnode); \ + } \ + return; \ + } else { \ + /* || */\ + /* pathp(r) */\ + /* / \\ */\ + /* (b) (b) */\ + /* / */\ + /* (b) */\ + rbtn_red_set(a_type, a_field, left); \ + rbtn_black_set(a_type, a_field, pathp->node); \ + /* Balance restored. */ \ + return; \ + } \ + } else { \ + a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ + if (rbtn_red_get(a_type, a_field, leftleft)) { \ + /* || */\ + /* pathp(b) */\ + /* / \\ */\ + /* (b) (b) */\ + /* / */\ + /* (r) */\ + a_type *tnode; \ + rbtn_black_set(a_type, a_field, leftleft); \ + rbtn_rotate_right(a_type, a_field, pathp->node, \ + tnode); \ + /* Balance restored, but rotation modified */\ + /* subtree root, which may actually be the tree */\ + /* root. */\ + if (pathp == path) { \ + /* Set root. */ \ + rbtree->rbt_root = tnode; \ + } else { \ + if (pathp[-1].cmp < 0) { \ + rbtn_left_set(a_type, a_field, \ + pathp[-1].node, tnode); \ + } else { \ + rbtn_right_set(a_type, a_field, \ + pathp[-1].node, tnode); \ + } \ + } \ + return; \ + } else { \ + /* || */\ + /* pathp(b) */\ + /* / \\ */\ + /* (b) (b) */\ + /* / */\ + /* (b) */\ + rbtn_red_set(a_type, a_field, left); \ + } \ + } \ + } \ + } \ + /* Set root. */ \ + rbtree->rbt_root = path->node; \ + assert(rbtn_red_get(a_type, a_field, rbtree->rbt_root) == false); \ +} \ +a_attr a_type * \ +a_prefix##iter_recurse(a_rbt_type *rbtree, a_type *node, \ + a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ + if (node == &rbtree->rbt_nil) { \ + return (&rbtree->rbt_nil); \ + } else { \ + a_type *ret; \ + if ((ret = a_prefix##iter_recurse(rbtree, rbtn_left_get(a_type, \ + a_field, node), cb, arg)) != &rbtree->rbt_nil \ + || (ret = cb(rbtree, node, arg)) != NULL) { \ + return (ret); \ + } \ + return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ + a_field, node), cb, arg)); \ + } \ +} \ +a_attr a_type * \ +a_prefix##iter_start(a_rbt_type *rbtree, a_type *start, a_type *node, \ + a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ + int cmp = a_cmp(start, node); \ + if (cmp < 0) { \ + a_type *ret; \ + if ((ret = a_prefix##iter_start(rbtree, start, \ + rbtn_left_get(a_type, a_field, node), cb, arg)) != \ + &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ + return (ret); \ + } \ + return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ + a_field, node), cb, arg)); \ + } else if (cmp > 0) { \ + return (a_prefix##iter_start(rbtree, start, \ + rbtn_right_get(a_type, a_field, node), cb, arg)); \ + } else { \ + a_type *ret; \ + if ((ret = cb(rbtree, node, arg)) != NULL) { \ + return (ret); \ + } \ + return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ + a_field, node), cb, arg)); \ + } \ +} \ +a_attr a_type * \ +a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)( \ + a_rbt_type *, a_type *, void *), void *arg) { \ + a_type *ret; \ + if (start != NULL) { \ + ret = a_prefix##iter_start(rbtree, start, rbtree->rbt_root, \ + cb, arg); \ + } else { \ + ret = a_prefix##iter_recurse(rbtree, rbtree->rbt_root, cb, arg);\ + } \ + if (ret == &rbtree->rbt_nil) { \ + ret = NULL; \ + } \ + return (ret); \ +} \ +a_attr a_type * \ +a_prefix##reverse_iter_recurse(a_rbt_type *rbtree, a_type *node, \ + a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ + if (node == &rbtree->rbt_nil) { \ + return (&rbtree->rbt_nil); \ + } else { \ + a_type *ret; \ + if ((ret = a_prefix##reverse_iter_recurse(rbtree, \ + rbtn_right_get(a_type, a_field, node), cb, arg)) != \ + &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ + return (ret); \ + } \ + return (a_prefix##reverse_iter_recurse(rbtree, \ + rbtn_left_get(a_type, a_field, node), cb, arg)); \ + } \ +} \ +a_attr a_type * \ +a_prefix##reverse_iter_start(a_rbt_type *rbtree, a_type *start, \ + a_type *node, a_type *(*cb)(a_rbt_type *, a_type *, void *), \ + void *arg) { \ + int cmp = a_cmp(start, node); \ + if (cmp > 0) { \ + a_type *ret; \ + if ((ret = a_prefix##reverse_iter_start(rbtree, start, \ + rbtn_right_get(a_type, a_field, node), cb, arg)) != \ + &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ + return (ret); \ + } \ + return (a_prefix##reverse_iter_recurse(rbtree, \ + rbtn_left_get(a_type, a_field, node), cb, arg)); \ + } else if (cmp < 0) { \ + return (a_prefix##reverse_iter_start(rbtree, start, \ + rbtn_left_get(a_type, a_field, node), cb, arg)); \ + } else { \ + a_type *ret; \ + if ((ret = cb(rbtree, node, arg)) != NULL) { \ + return (ret); \ + } \ + return (a_prefix##reverse_iter_recurse(rbtree, \ + rbtn_left_get(a_type, a_field, node), cb, arg)); \ + } \ +} \ +a_attr a_type * \ +a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ + a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ + a_type *ret; \ + if (start != NULL) { \ + ret = a_prefix##reverse_iter_start(rbtree, start, \ + rbtree->rbt_root, cb, arg); \ + } else { \ + ret = a_prefix##reverse_iter_recurse(rbtree, rbtree->rbt_root, \ + cb, arg); \ + } \ + if (ret == &rbtree->rbt_nil) { \ + ret = NULL; \ + } \ + return (ret); \ +} + +#endif /* RB_H_ */ diff --git a/deps/jemalloc/include/jemalloc/internal/rtree.h b/deps/jemalloc/include/jemalloc/internal/rtree.h new file mode 100644 index 0000000000..bc74769f50 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/rtree.h @@ -0,0 +1,172 @@ +/* + * This radix tree implementation is tailored to the singular purpose of + * tracking which chunks are currently owned by jemalloc. This functionality + * is mandatory for OS X, where jemalloc must be able to respond to object + * ownership queries. + * + ******************************************************************************* + */ +#ifdef JEMALLOC_H_TYPES + +typedef struct rtree_s rtree_t; + +/* + * Size of each radix tree node (must be a power of 2). This impacts tree + * depth. + */ +#define RTREE_NODESIZE (1U << 16) + +typedef void *(rtree_alloc_t)(size_t); +typedef void (rtree_dalloc_t)(void *); + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct rtree_s { + rtree_alloc_t *alloc; + rtree_dalloc_t *dalloc; + malloc_mutex_t mutex; + void **root; + unsigned height; + unsigned level2bits[1]; /* Dynamically sized. */ +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +rtree_t *rtree_new(unsigned bits, rtree_alloc_t *alloc, rtree_dalloc_t *dalloc); +void rtree_delete(rtree_t *rtree); +void rtree_prefork(rtree_t *rtree); +void rtree_postfork_parent(rtree_t *rtree); +void rtree_postfork_child(rtree_t *rtree); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +#ifdef JEMALLOC_DEBUG +uint8_t rtree_get_locked(rtree_t *rtree, uintptr_t key); +#endif +uint8_t rtree_get(rtree_t *rtree, uintptr_t key); +bool rtree_set(rtree_t *rtree, uintptr_t key, uint8_t val); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_)) +#define RTREE_GET_GENERATE(f) \ +/* The least significant bits of the key are ignored. */ \ +JEMALLOC_INLINE uint8_t \ +f(rtree_t *rtree, uintptr_t key) \ +{ \ + uint8_t ret; \ + uintptr_t subkey; \ + unsigned i, lshift, height, bits; \ + void **node, **child; \ + \ + RTREE_LOCK(&rtree->mutex); \ + for (i = lshift = 0, height = rtree->height, node = rtree->root;\ + i < height - 1; \ + i++, lshift += bits, node = child) { \ + bits = rtree->level2bits[i]; \ + subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR + \ + 3)) - bits); \ + child = (void**)node[subkey]; \ + if (child == NULL) { \ + RTREE_UNLOCK(&rtree->mutex); \ + return (0); \ + } \ + } \ + \ + /* \ + * node is a leaf, so it contains values rather than node \ + * pointers. \ + */ \ + bits = rtree->level2bits[i]; \ + subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - \ + bits); \ + { \ + uint8_t *leaf = (uint8_t *)node; \ + ret = leaf[subkey]; \ + } \ + RTREE_UNLOCK(&rtree->mutex); \ + \ + RTREE_GET_VALIDATE \ + return (ret); \ +} + +#ifdef JEMALLOC_DEBUG +# define RTREE_LOCK(l) malloc_mutex_lock(l) +# define RTREE_UNLOCK(l) malloc_mutex_unlock(l) +# define RTREE_GET_VALIDATE +RTREE_GET_GENERATE(rtree_get_locked) +# undef RTREE_LOCK +# undef RTREE_UNLOCK +# undef RTREE_GET_VALIDATE +#endif + +#define RTREE_LOCK(l) +#define RTREE_UNLOCK(l) +#ifdef JEMALLOC_DEBUG + /* + * Suppose that it were possible for a jemalloc-allocated chunk to be + * munmap()ped, followed by a different allocator in another thread re-using + * overlapping virtual memory, all without invalidating the cached rtree + * value. The result would be a false positive (the rtree would claim that + * jemalloc owns memory that it had actually discarded). This scenario + * seems impossible, but the following assertion is a prudent sanity check. + */ +# define RTREE_GET_VALIDATE \ + assert(rtree_get_locked(rtree, key) == ret); +#else +# define RTREE_GET_VALIDATE +#endif +RTREE_GET_GENERATE(rtree_get) +#undef RTREE_LOCK +#undef RTREE_UNLOCK +#undef RTREE_GET_VALIDATE + +JEMALLOC_INLINE bool +rtree_set(rtree_t *rtree, uintptr_t key, uint8_t val) +{ + uintptr_t subkey; + unsigned i, lshift, height, bits; + void **node, **child; + + malloc_mutex_lock(&rtree->mutex); + for (i = lshift = 0, height = rtree->height, node = rtree->root; + i < height - 1; + i++, lshift += bits, node = child) { + bits = rtree->level2bits[i]; + subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - + bits); + child = (void**)node[subkey]; + if (child == NULL) { + size_t size = ((i + 1 < height - 1) ? sizeof(void *) + : (sizeof(uint8_t))) << rtree->level2bits[i+1]; + child = (void**)rtree->alloc(size); + if (child == NULL) { + malloc_mutex_unlock(&rtree->mutex); + return (true); + } + memset(child, 0, size); + node[subkey] = child; + } + } + + /* node is a leaf, so it contains values rather than node pointers. */ + bits = rtree->level2bits[i]; + subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - bits); + { + uint8_t *leaf = (uint8_t *)node; + leaf[subkey] = val; + } + malloc_mutex_unlock(&rtree->mutex); + + return (false); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/size_classes.h b/deps/jemalloc/include/jemalloc/internal/size_classes.h new file mode 100644 index 0000000000..821102e5c1 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/size_classes.h @@ -0,0 +1,721 @@ +/* This file was automatically generated by size_classes.sh. */ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#if (LG_TINY_MIN == 3 && LG_QUANTUM == 3 && LG_PAGE == 12) +#define SIZE_CLASSES_DEFINED +/* SIZE_CLASS(bin, delta, sz) */ +#define SIZE_CLASSES \ + SIZE_CLASS(0, 8, 8) \ + SIZE_CLASS(1, 8, 16) \ + SIZE_CLASS(2, 8, 24) \ + SIZE_CLASS(3, 8, 32) \ + SIZE_CLASS(4, 8, 40) \ + SIZE_CLASS(5, 8, 48) \ + SIZE_CLASS(6, 8, 56) \ + SIZE_CLASS(7, 8, 64) \ + SIZE_CLASS(8, 16, 80) \ + SIZE_CLASS(9, 16, 96) \ + SIZE_CLASS(10, 16, 112) \ + SIZE_CLASS(11, 16, 128) \ + SIZE_CLASS(12, 32, 160) \ + SIZE_CLASS(13, 32, 192) \ + SIZE_CLASS(14, 32, 224) \ + SIZE_CLASS(15, 32, 256) \ + SIZE_CLASS(16, 64, 320) \ + SIZE_CLASS(17, 64, 384) \ + SIZE_CLASS(18, 64, 448) \ + SIZE_CLASS(19, 64, 512) \ + SIZE_CLASS(20, 128, 640) \ + SIZE_CLASS(21, 128, 768) \ + SIZE_CLASS(22, 128, 896) \ + SIZE_CLASS(23, 128, 1024) \ + SIZE_CLASS(24, 256, 1280) \ + SIZE_CLASS(25, 256, 1536) \ + SIZE_CLASS(26, 256, 1792) \ + SIZE_CLASS(27, 256, 2048) \ + SIZE_CLASS(28, 512, 2560) \ + SIZE_CLASS(29, 512, 3072) \ + SIZE_CLASS(30, 512, 3584) \ + +#define NBINS 31 +#define SMALL_MAXCLASS 3584 +#endif + +#if (LG_TINY_MIN == 3 && LG_QUANTUM == 3 && LG_PAGE == 13) +#define SIZE_CLASSES_DEFINED +/* SIZE_CLASS(bin, delta, sz) */ +#define SIZE_CLASSES \ + SIZE_CLASS(0, 8, 8) \ + SIZE_CLASS(1, 8, 16) \ + SIZE_CLASS(2, 8, 24) \ + SIZE_CLASS(3, 8, 32) \ + SIZE_CLASS(4, 8, 40) \ + SIZE_CLASS(5, 8, 48) \ + SIZE_CLASS(6, 8, 56) \ + SIZE_CLASS(7, 8, 64) \ + SIZE_CLASS(8, 16, 80) \ + SIZE_CLASS(9, 16, 96) \ + SIZE_CLASS(10, 16, 112) \ + SIZE_CLASS(11, 16, 128) \ + SIZE_CLASS(12, 32, 160) \ + SIZE_CLASS(13, 32, 192) \ + SIZE_CLASS(14, 32, 224) \ + SIZE_CLASS(15, 32, 256) \ + SIZE_CLASS(16, 64, 320) \ + SIZE_CLASS(17, 64, 384) \ + SIZE_CLASS(18, 64, 448) \ + SIZE_CLASS(19, 64, 512) \ + SIZE_CLASS(20, 128, 640) \ + SIZE_CLASS(21, 128, 768) \ + SIZE_CLASS(22, 128, 896) \ + SIZE_CLASS(23, 128, 1024) \ + SIZE_CLASS(24, 256, 1280) \ + SIZE_CLASS(25, 256, 1536) \ + SIZE_CLASS(26, 256, 1792) \ + SIZE_CLASS(27, 256, 2048) \ + SIZE_CLASS(28, 512, 2560) \ + SIZE_CLASS(29, 512, 3072) \ + SIZE_CLASS(30, 512, 3584) \ + SIZE_CLASS(31, 512, 4096) \ + SIZE_CLASS(32, 1024, 5120) \ + SIZE_CLASS(33, 1024, 6144) \ + SIZE_CLASS(34, 1024, 7168) \ + +#define NBINS 35 +#define SMALL_MAXCLASS 7168 +#endif + +#if (LG_TINY_MIN == 3 && LG_QUANTUM == 3 && LG_PAGE == 14) +#define SIZE_CLASSES_DEFINED +/* SIZE_CLASS(bin, delta, sz) */ +#define SIZE_CLASSES \ + SIZE_CLASS(0, 8, 8) \ + SIZE_CLASS(1, 8, 16) \ + SIZE_CLASS(2, 8, 24) \ + SIZE_CLASS(3, 8, 32) \ + SIZE_CLASS(4, 8, 40) \ + SIZE_CLASS(5, 8, 48) \ + SIZE_CLASS(6, 8, 56) \ + SIZE_CLASS(7, 8, 64) \ + SIZE_CLASS(8, 16, 80) \ + SIZE_CLASS(9, 16, 96) \ + SIZE_CLASS(10, 16, 112) \ + SIZE_CLASS(11, 16, 128) \ + SIZE_CLASS(12, 32, 160) \ + SIZE_CLASS(13, 32, 192) \ + SIZE_CLASS(14, 32, 224) \ + SIZE_CLASS(15, 32, 256) \ + SIZE_CLASS(16, 64, 320) \ + SIZE_CLASS(17, 64, 384) \ + SIZE_CLASS(18, 64, 448) \ + SIZE_CLASS(19, 64, 512) \ + SIZE_CLASS(20, 128, 640) \ + SIZE_CLASS(21, 128, 768) \ + SIZE_CLASS(22, 128, 896) \ + SIZE_CLASS(23, 128, 1024) \ + SIZE_CLASS(24, 256, 1280) \ + SIZE_CLASS(25, 256, 1536) \ + SIZE_CLASS(26, 256, 1792) \ + SIZE_CLASS(27, 256, 2048) \ + SIZE_CLASS(28, 512, 2560) \ + SIZE_CLASS(29, 512, 3072) \ + SIZE_CLASS(30, 512, 3584) \ + SIZE_CLASS(31, 512, 4096) \ + SIZE_CLASS(32, 1024, 5120) \ + SIZE_CLASS(33, 1024, 6144) \ + SIZE_CLASS(34, 1024, 7168) \ + SIZE_CLASS(35, 1024, 8192) \ + SIZE_CLASS(36, 2048, 10240) \ + SIZE_CLASS(37, 2048, 12288) \ + SIZE_CLASS(38, 2048, 14336) \ + +#define NBINS 39 +#define SMALL_MAXCLASS 14336 +#endif + +#if (LG_TINY_MIN == 3 && LG_QUANTUM == 3 && LG_PAGE == 15) +#define SIZE_CLASSES_DEFINED +/* SIZE_CLASS(bin, delta, sz) */ +#define SIZE_CLASSES \ + SIZE_CLASS(0, 8, 8) \ + SIZE_CLASS(1, 8, 16) \ + SIZE_CLASS(2, 8, 24) \ + SIZE_CLASS(3, 8, 32) \ + SIZE_CLASS(4, 8, 40) \ + SIZE_CLASS(5, 8, 48) \ + SIZE_CLASS(6, 8, 56) \ + SIZE_CLASS(7, 8, 64) \ + SIZE_CLASS(8, 16, 80) \ + SIZE_CLASS(9, 16, 96) \ + SIZE_CLASS(10, 16, 112) \ + SIZE_CLASS(11, 16, 128) \ + SIZE_CLASS(12, 32, 160) \ + SIZE_CLASS(13, 32, 192) \ + SIZE_CLASS(14, 32, 224) \ + SIZE_CLASS(15, 32, 256) \ + SIZE_CLASS(16, 64, 320) \ + SIZE_CLASS(17, 64, 384) \ + SIZE_CLASS(18, 64, 448) \ + SIZE_CLASS(19, 64, 512) \ + SIZE_CLASS(20, 128, 640) \ + SIZE_CLASS(21, 128, 768) \ + SIZE_CLASS(22, 128, 896) \ + SIZE_CLASS(23, 128, 1024) \ + SIZE_CLASS(24, 256, 1280) \ + SIZE_CLASS(25, 256, 1536) \ + SIZE_CLASS(26, 256, 1792) \ + SIZE_CLASS(27, 256, 2048) \ + SIZE_CLASS(28, 512, 2560) \ + SIZE_CLASS(29, 512, 3072) \ + SIZE_CLASS(30, 512, 3584) \ + SIZE_CLASS(31, 512, 4096) \ + SIZE_CLASS(32, 1024, 5120) \ + SIZE_CLASS(33, 1024, 6144) \ + SIZE_CLASS(34, 1024, 7168) \ + SIZE_CLASS(35, 1024, 8192) \ + SIZE_CLASS(36, 2048, 10240) \ + SIZE_CLASS(37, 2048, 12288) \ + SIZE_CLASS(38, 2048, 14336) \ + SIZE_CLASS(39, 2048, 16384) \ + SIZE_CLASS(40, 4096, 20480) \ + SIZE_CLASS(41, 4096, 24576) \ + SIZE_CLASS(42, 4096, 28672) \ + +#define NBINS 43 +#define SMALL_MAXCLASS 28672 +#endif + +#if (LG_TINY_MIN == 3 && LG_QUANTUM == 3 && LG_PAGE == 16) +#define SIZE_CLASSES_DEFINED +/* SIZE_CLASS(bin, delta, sz) */ +#define SIZE_CLASSES \ + SIZE_CLASS(0, 8, 8) \ + SIZE_CLASS(1, 8, 16) \ + SIZE_CLASS(2, 8, 24) \ + SIZE_CLASS(3, 8, 32) \ + SIZE_CLASS(4, 8, 40) \ + SIZE_CLASS(5, 8, 48) \ + SIZE_CLASS(6, 8, 56) \ + SIZE_CLASS(7, 8, 64) \ + SIZE_CLASS(8, 16, 80) \ + SIZE_CLASS(9, 16, 96) \ + SIZE_CLASS(10, 16, 112) \ + SIZE_CLASS(11, 16, 128) \ + SIZE_CLASS(12, 32, 160) \ + SIZE_CLASS(13, 32, 192) \ + SIZE_CLASS(14, 32, 224) \ + SIZE_CLASS(15, 32, 256) \ + SIZE_CLASS(16, 64, 320) \ + SIZE_CLASS(17, 64, 384) \ + SIZE_CLASS(18, 64, 448) \ + SIZE_CLASS(19, 64, 512) \ + SIZE_CLASS(20, 128, 640) \ + SIZE_CLASS(21, 128, 768) \ + SIZE_CLASS(22, 128, 896) \ + SIZE_CLASS(23, 128, 1024) \ + SIZE_CLASS(24, 256, 1280) \ + SIZE_CLASS(25, 256, 1536) \ + SIZE_CLASS(26, 256, 1792) \ + SIZE_CLASS(27, 256, 2048) \ + SIZE_CLASS(28, 512, 2560) \ + SIZE_CLASS(29, 512, 3072) \ + SIZE_CLASS(30, 512, 3584) \ + SIZE_CLASS(31, 512, 4096) \ + SIZE_CLASS(32, 1024, 5120) \ + SIZE_CLASS(33, 1024, 6144) \ + SIZE_CLASS(34, 1024, 7168) \ + SIZE_CLASS(35, 1024, 8192) \ + SIZE_CLASS(36, 2048, 10240) \ + SIZE_CLASS(37, 2048, 12288) \ + SIZE_CLASS(38, 2048, 14336) \ + SIZE_CLASS(39, 2048, 16384) \ + SIZE_CLASS(40, 4096, 20480) \ + SIZE_CLASS(41, 4096, 24576) \ + SIZE_CLASS(42, 4096, 28672) \ + SIZE_CLASS(43, 4096, 32768) \ + SIZE_CLASS(44, 8192, 40960) \ + SIZE_CLASS(45, 8192, 49152) \ + SIZE_CLASS(46, 8192, 57344) \ + +#define NBINS 47 +#define SMALL_MAXCLASS 57344 +#endif + +#if (LG_TINY_MIN == 3 && LG_QUANTUM == 4 && LG_PAGE == 12) +#define SIZE_CLASSES_DEFINED +/* SIZE_CLASS(bin, delta, sz) */ +#define SIZE_CLASSES \ + SIZE_CLASS(0, 8, 8) \ + SIZE_CLASS(1, 8, 16) \ + SIZE_CLASS(2, 16, 32) \ + SIZE_CLASS(3, 16, 48) \ + SIZE_CLASS(4, 16, 64) \ + SIZE_CLASS(5, 16, 80) \ + SIZE_CLASS(6, 16, 96) \ + SIZE_CLASS(7, 16, 112) \ + SIZE_CLASS(8, 16, 128) \ + SIZE_CLASS(9, 32, 160) \ + SIZE_CLASS(10, 32, 192) \ + SIZE_CLASS(11, 32, 224) \ + SIZE_CLASS(12, 32, 256) \ + SIZE_CLASS(13, 64, 320) \ + SIZE_CLASS(14, 64, 384) \ + SIZE_CLASS(15, 64, 448) \ + SIZE_CLASS(16, 64, 512) \ + SIZE_CLASS(17, 128, 640) \ + SIZE_CLASS(18, 128, 768) \ + SIZE_CLASS(19, 128, 896) \ + SIZE_CLASS(20, 128, 1024) \ + SIZE_CLASS(21, 256, 1280) \ + SIZE_CLASS(22, 256, 1536) \ + SIZE_CLASS(23, 256, 1792) \ + SIZE_CLASS(24, 256, 2048) \ + SIZE_CLASS(25, 512, 2560) \ + SIZE_CLASS(26, 512, 3072) \ + SIZE_CLASS(27, 512, 3584) \ + +#define NBINS 28 +#define SMALL_MAXCLASS 3584 +#endif + +#if (LG_TINY_MIN == 3 && LG_QUANTUM == 4 && LG_PAGE == 13) +#define SIZE_CLASSES_DEFINED +/* SIZE_CLASS(bin, delta, sz) */ +#define SIZE_CLASSES \ + SIZE_CLASS(0, 8, 8) \ + SIZE_CLASS(1, 8, 16) \ + SIZE_CLASS(2, 16, 32) \ + SIZE_CLASS(3, 16, 48) \ + SIZE_CLASS(4, 16, 64) \ + SIZE_CLASS(5, 16, 80) \ + SIZE_CLASS(6, 16, 96) \ + SIZE_CLASS(7, 16, 112) \ + SIZE_CLASS(8, 16, 128) \ + SIZE_CLASS(9, 32, 160) \ + SIZE_CLASS(10, 32, 192) \ + SIZE_CLASS(11, 32, 224) \ + SIZE_CLASS(12, 32, 256) \ + SIZE_CLASS(13, 64, 320) \ + SIZE_CLASS(14, 64, 384) \ + SIZE_CLASS(15, 64, 448) \ + SIZE_CLASS(16, 64, 512) \ + SIZE_CLASS(17, 128, 640) \ + SIZE_CLASS(18, 128, 768) \ + SIZE_CLASS(19, 128, 896) \ + SIZE_CLASS(20, 128, 1024) \ + SIZE_CLASS(21, 256, 1280) \ + SIZE_CLASS(22, 256, 1536) \ + SIZE_CLASS(23, 256, 1792) \ + SIZE_CLASS(24, 256, 2048) \ + SIZE_CLASS(25, 512, 2560) \ + SIZE_CLASS(26, 512, 3072) \ + SIZE_CLASS(27, 512, 3584) \ + SIZE_CLASS(28, 512, 4096) \ + SIZE_CLASS(29, 1024, 5120) \ + SIZE_CLASS(30, 1024, 6144) \ + SIZE_CLASS(31, 1024, 7168) \ + +#define NBINS 32 +#define SMALL_MAXCLASS 7168 +#endif + +#if (LG_TINY_MIN == 3 && LG_QUANTUM == 4 && LG_PAGE == 14) +#define SIZE_CLASSES_DEFINED +/* SIZE_CLASS(bin, delta, sz) */ +#define SIZE_CLASSES \ + SIZE_CLASS(0, 8, 8) \ + SIZE_CLASS(1, 8, 16) \ + SIZE_CLASS(2, 16, 32) \ + SIZE_CLASS(3, 16, 48) \ + SIZE_CLASS(4, 16, 64) \ + SIZE_CLASS(5, 16, 80) \ + SIZE_CLASS(6, 16, 96) \ + SIZE_CLASS(7, 16, 112) \ + SIZE_CLASS(8, 16, 128) \ + SIZE_CLASS(9, 32, 160) \ + SIZE_CLASS(10, 32, 192) \ + SIZE_CLASS(11, 32, 224) \ + SIZE_CLASS(12, 32, 256) \ + SIZE_CLASS(13, 64, 320) \ + SIZE_CLASS(14, 64, 384) \ + SIZE_CLASS(15, 64, 448) \ + SIZE_CLASS(16, 64, 512) \ + SIZE_CLASS(17, 128, 640) \ + SIZE_CLASS(18, 128, 768) \ + SIZE_CLASS(19, 128, 896) \ + SIZE_CLASS(20, 128, 1024) \ + SIZE_CLASS(21, 256, 1280) \ + SIZE_CLASS(22, 256, 1536) \ + SIZE_CLASS(23, 256, 1792) \ + SIZE_CLASS(24, 256, 2048) \ + SIZE_CLASS(25, 512, 2560) \ + SIZE_CLASS(26, 512, 3072) \ + SIZE_CLASS(27, 512, 3584) \ + SIZE_CLASS(28, 512, 4096) \ + SIZE_CLASS(29, 1024, 5120) \ + SIZE_CLASS(30, 1024, 6144) \ + SIZE_CLASS(31, 1024, 7168) \ + SIZE_CLASS(32, 1024, 8192) \ + SIZE_CLASS(33, 2048, 10240) \ + SIZE_CLASS(34, 2048, 12288) \ + SIZE_CLASS(35, 2048, 14336) \ + +#define NBINS 36 +#define SMALL_MAXCLASS 14336 +#endif + +#if (LG_TINY_MIN == 3 && LG_QUANTUM == 4 && LG_PAGE == 15) +#define SIZE_CLASSES_DEFINED +/* SIZE_CLASS(bin, delta, sz) */ +#define SIZE_CLASSES \ + SIZE_CLASS(0, 8, 8) \ + SIZE_CLASS(1, 8, 16) \ + SIZE_CLASS(2, 16, 32) \ + SIZE_CLASS(3, 16, 48) \ + SIZE_CLASS(4, 16, 64) \ + SIZE_CLASS(5, 16, 80) \ + SIZE_CLASS(6, 16, 96) \ + SIZE_CLASS(7, 16, 112) \ + SIZE_CLASS(8, 16, 128) \ + SIZE_CLASS(9, 32, 160) \ + SIZE_CLASS(10, 32, 192) \ + SIZE_CLASS(11, 32, 224) \ + SIZE_CLASS(12, 32, 256) \ + SIZE_CLASS(13, 64, 320) \ + SIZE_CLASS(14, 64, 384) \ + SIZE_CLASS(15, 64, 448) \ + SIZE_CLASS(16, 64, 512) \ + SIZE_CLASS(17, 128, 640) \ + SIZE_CLASS(18, 128, 768) \ + SIZE_CLASS(19, 128, 896) \ + SIZE_CLASS(20, 128, 1024) \ + SIZE_CLASS(21, 256, 1280) \ + SIZE_CLASS(22, 256, 1536) \ + SIZE_CLASS(23, 256, 1792) \ + SIZE_CLASS(24, 256, 2048) \ + SIZE_CLASS(25, 512, 2560) \ + SIZE_CLASS(26, 512, 3072) \ + SIZE_CLASS(27, 512, 3584) \ + SIZE_CLASS(28, 512, 4096) \ + SIZE_CLASS(29, 1024, 5120) \ + SIZE_CLASS(30, 1024, 6144) \ + SIZE_CLASS(31, 1024, 7168) \ + SIZE_CLASS(32, 1024, 8192) \ + SIZE_CLASS(33, 2048, 10240) \ + SIZE_CLASS(34, 2048, 12288) \ + SIZE_CLASS(35, 2048, 14336) \ + SIZE_CLASS(36, 2048, 16384) \ + SIZE_CLASS(37, 4096, 20480) \ + SIZE_CLASS(38, 4096, 24576) \ + SIZE_CLASS(39, 4096, 28672) \ + +#define NBINS 40 +#define SMALL_MAXCLASS 28672 +#endif + +#if (LG_TINY_MIN == 3 && LG_QUANTUM == 4 && LG_PAGE == 16) +#define SIZE_CLASSES_DEFINED +/* SIZE_CLASS(bin, delta, sz) */ +#define SIZE_CLASSES \ + SIZE_CLASS(0, 8, 8) \ + SIZE_CLASS(1, 8, 16) \ + SIZE_CLASS(2, 16, 32) \ + SIZE_CLASS(3, 16, 48) \ + SIZE_CLASS(4, 16, 64) \ + SIZE_CLASS(5, 16, 80) \ + SIZE_CLASS(6, 16, 96) \ + SIZE_CLASS(7, 16, 112) \ + SIZE_CLASS(8, 16, 128) \ + SIZE_CLASS(9, 32, 160) \ + SIZE_CLASS(10, 32, 192) \ + SIZE_CLASS(11, 32, 224) \ + SIZE_CLASS(12, 32, 256) \ + SIZE_CLASS(13, 64, 320) \ + SIZE_CLASS(14, 64, 384) \ + SIZE_CLASS(15, 64, 448) \ + SIZE_CLASS(16, 64, 512) \ + SIZE_CLASS(17, 128, 640) \ + SIZE_CLASS(18, 128, 768) \ + SIZE_CLASS(19, 128, 896) \ + SIZE_CLASS(20, 128, 1024) \ + SIZE_CLASS(21, 256, 1280) \ + SIZE_CLASS(22, 256, 1536) \ + SIZE_CLASS(23, 256, 1792) \ + SIZE_CLASS(24, 256, 2048) \ + SIZE_CLASS(25, 512, 2560) \ + SIZE_CLASS(26, 512, 3072) \ + SIZE_CLASS(27, 512, 3584) \ + SIZE_CLASS(28, 512, 4096) \ + SIZE_CLASS(29, 1024, 5120) \ + SIZE_CLASS(30, 1024, 6144) \ + SIZE_CLASS(31, 1024, 7168) \ + SIZE_CLASS(32, 1024, 8192) \ + SIZE_CLASS(33, 2048, 10240) \ + SIZE_CLASS(34, 2048, 12288) \ + SIZE_CLASS(35, 2048, 14336) \ + SIZE_CLASS(36, 2048, 16384) \ + SIZE_CLASS(37, 4096, 20480) \ + SIZE_CLASS(38, 4096, 24576) \ + SIZE_CLASS(39, 4096, 28672) \ + SIZE_CLASS(40, 4096, 32768) \ + SIZE_CLASS(41, 8192, 40960) \ + SIZE_CLASS(42, 8192, 49152) \ + SIZE_CLASS(43, 8192, 57344) \ + +#define NBINS 44 +#define SMALL_MAXCLASS 57344 +#endif + +#if (LG_TINY_MIN == 4 && LG_QUANTUM == 4 && LG_PAGE == 12) +#define SIZE_CLASSES_DEFINED +/* SIZE_CLASS(bin, delta, sz) */ +#define SIZE_CLASSES \ + SIZE_CLASS(0, 16, 16) \ + SIZE_CLASS(1, 16, 32) \ + SIZE_CLASS(2, 16, 48) \ + SIZE_CLASS(3, 16, 64) \ + SIZE_CLASS(4, 16, 80) \ + SIZE_CLASS(5, 16, 96) \ + SIZE_CLASS(6, 16, 112) \ + SIZE_CLASS(7, 16, 128) \ + SIZE_CLASS(8, 32, 160) \ + SIZE_CLASS(9, 32, 192) \ + SIZE_CLASS(10, 32, 224) \ + SIZE_CLASS(11, 32, 256) \ + SIZE_CLASS(12, 64, 320) \ + SIZE_CLASS(13, 64, 384) \ + SIZE_CLASS(14, 64, 448) \ + SIZE_CLASS(15, 64, 512) \ + SIZE_CLASS(16, 128, 640) \ + SIZE_CLASS(17, 128, 768) \ + SIZE_CLASS(18, 128, 896) \ + SIZE_CLASS(19, 128, 1024) \ + SIZE_CLASS(20, 256, 1280) \ + SIZE_CLASS(21, 256, 1536) \ + SIZE_CLASS(22, 256, 1792) \ + SIZE_CLASS(23, 256, 2048) \ + SIZE_CLASS(24, 512, 2560) \ + SIZE_CLASS(25, 512, 3072) \ + SIZE_CLASS(26, 512, 3584) \ + +#define NBINS 27 +#define SMALL_MAXCLASS 3584 +#endif + +#if (LG_TINY_MIN == 4 && LG_QUANTUM == 4 && LG_PAGE == 13) +#define SIZE_CLASSES_DEFINED +/* SIZE_CLASS(bin, delta, sz) */ +#define SIZE_CLASSES \ + SIZE_CLASS(0, 16, 16) \ + SIZE_CLASS(1, 16, 32) \ + SIZE_CLASS(2, 16, 48) \ + SIZE_CLASS(3, 16, 64) \ + SIZE_CLASS(4, 16, 80) \ + SIZE_CLASS(5, 16, 96) \ + SIZE_CLASS(6, 16, 112) \ + SIZE_CLASS(7, 16, 128) \ + SIZE_CLASS(8, 32, 160) \ + SIZE_CLASS(9, 32, 192) \ + SIZE_CLASS(10, 32, 224) \ + SIZE_CLASS(11, 32, 256) \ + SIZE_CLASS(12, 64, 320) \ + SIZE_CLASS(13, 64, 384) \ + SIZE_CLASS(14, 64, 448) \ + SIZE_CLASS(15, 64, 512) \ + SIZE_CLASS(16, 128, 640) \ + SIZE_CLASS(17, 128, 768) \ + SIZE_CLASS(18, 128, 896) \ + SIZE_CLASS(19, 128, 1024) \ + SIZE_CLASS(20, 256, 1280) \ + SIZE_CLASS(21, 256, 1536) \ + SIZE_CLASS(22, 256, 1792) \ + SIZE_CLASS(23, 256, 2048) \ + SIZE_CLASS(24, 512, 2560) \ + SIZE_CLASS(25, 512, 3072) \ + SIZE_CLASS(26, 512, 3584) \ + SIZE_CLASS(27, 512, 4096) \ + SIZE_CLASS(28, 1024, 5120) \ + SIZE_CLASS(29, 1024, 6144) \ + SIZE_CLASS(30, 1024, 7168) \ + +#define NBINS 31 +#define SMALL_MAXCLASS 7168 +#endif + +#if (LG_TINY_MIN == 4 && LG_QUANTUM == 4 && LG_PAGE == 14) +#define SIZE_CLASSES_DEFINED +/* SIZE_CLASS(bin, delta, sz) */ +#define SIZE_CLASSES \ + SIZE_CLASS(0, 16, 16) \ + SIZE_CLASS(1, 16, 32) \ + SIZE_CLASS(2, 16, 48) \ + SIZE_CLASS(3, 16, 64) \ + SIZE_CLASS(4, 16, 80) \ + SIZE_CLASS(5, 16, 96) \ + SIZE_CLASS(6, 16, 112) \ + SIZE_CLASS(7, 16, 128) \ + SIZE_CLASS(8, 32, 160) \ + SIZE_CLASS(9, 32, 192) \ + SIZE_CLASS(10, 32, 224) \ + SIZE_CLASS(11, 32, 256) \ + SIZE_CLASS(12, 64, 320) \ + SIZE_CLASS(13, 64, 384) \ + SIZE_CLASS(14, 64, 448) \ + SIZE_CLASS(15, 64, 512) \ + SIZE_CLASS(16, 128, 640) \ + SIZE_CLASS(17, 128, 768) \ + SIZE_CLASS(18, 128, 896) \ + SIZE_CLASS(19, 128, 1024) \ + SIZE_CLASS(20, 256, 1280) \ + SIZE_CLASS(21, 256, 1536) \ + SIZE_CLASS(22, 256, 1792) \ + SIZE_CLASS(23, 256, 2048) \ + SIZE_CLASS(24, 512, 2560) \ + SIZE_CLASS(25, 512, 3072) \ + SIZE_CLASS(26, 512, 3584) \ + SIZE_CLASS(27, 512, 4096) \ + SIZE_CLASS(28, 1024, 5120) \ + SIZE_CLASS(29, 1024, 6144) \ + SIZE_CLASS(30, 1024, 7168) \ + SIZE_CLASS(31, 1024, 8192) \ + SIZE_CLASS(32, 2048, 10240) \ + SIZE_CLASS(33, 2048, 12288) \ + SIZE_CLASS(34, 2048, 14336) \ + +#define NBINS 35 +#define SMALL_MAXCLASS 14336 +#endif + +#if (LG_TINY_MIN == 4 && LG_QUANTUM == 4 && LG_PAGE == 15) +#define SIZE_CLASSES_DEFINED +/* SIZE_CLASS(bin, delta, sz) */ +#define SIZE_CLASSES \ + SIZE_CLASS(0, 16, 16) \ + SIZE_CLASS(1, 16, 32) \ + SIZE_CLASS(2, 16, 48) \ + SIZE_CLASS(3, 16, 64) \ + SIZE_CLASS(4, 16, 80) \ + SIZE_CLASS(5, 16, 96) \ + SIZE_CLASS(6, 16, 112) \ + SIZE_CLASS(7, 16, 128) \ + SIZE_CLASS(8, 32, 160) \ + SIZE_CLASS(9, 32, 192) \ + SIZE_CLASS(10, 32, 224) \ + SIZE_CLASS(11, 32, 256) \ + SIZE_CLASS(12, 64, 320) \ + SIZE_CLASS(13, 64, 384) \ + SIZE_CLASS(14, 64, 448) \ + SIZE_CLASS(15, 64, 512) \ + SIZE_CLASS(16, 128, 640) \ + SIZE_CLASS(17, 128, 768) \ + SIZE_CLASS(18, 128, 896) \ + SIZE_CLASS(19, 128, 1024) \ + SIZE_CLASS(20, 256, 1280) \ + SIZE_CLASS(21, 256, 1536) \ + SIZE_CLASS(22, 256, 1792) \ + SIZE_CLASS(23, 256, 2048) \ + SIZE_CLASS(24, 512, 2560) \ + SIZE_CLASS(25, 512, 3072) \ + SIZE_CLASS(26, 512, 3584) \ + SIZE_CLASS(27, 512, 4096) \ + SIZE_CLASS(28, 1024, 5120) \ + SIZE_CLASS(29, 1024, 6144) \ + SIZE_CLASS(30, 1024, 7168) \ + SIZE_CLASS(31, 1024, 8192) \ + SIZE_CLASS(32, 2048, 10240) \ + SIZE_CLASS(33, 2048, 12288) \ + SIZE_CLASS(34, 2048, 14336) \ + SIZE_CLASS(35, 2048, 16384) \ + SIZE_CLASS(36, 4096, 20480) \ + SIZE_CLASS(37, 4096, 24576) \ + SIZE_CLASS(38, 4096, 28672) \ + +#define NBINS 39 +#define SMALL_MAXCLASS 28672 +#endif + +#if (LG_TINY_MIN == 4 && LG_QUANTUM == 4 && LG_PAGE == 16) +#define SIZE_CLASSES_DEFINED +/* SIZE_CLASS(bin, delta, sz) */ +#define SIZE_CLASSES \ + SIZE_CLASS(0, 16, 16) \ + SIZE_CLASS(1, 16, 32) \ + SIZE_CLASS(2, 16, 48) \ + SIZE_CLASS(3, 16, 64) \ + SIZE_CLASS(4, 16, 80) \ + SIZE_CLASS(5, 16, 96) \ + SIZE_CLASS(6, 16, 112) \ + SIZE_CLASS(7, 16, 128) \ + SIZE_CLASS(8, 32, 160) \ + SIZE_CLASS(9, 32, 192) \ + SIZE_CLASS(10, 32, 224) \ + SIZE_CLASS(11, 32, 256) \ + SIZE_CLASS(12, 64, 320) \ + SIZE_CLASS(13, 64, 384) \ + SIZE_CLASS(14, 64, 448) \ + SIZE_CLASS(15, 64, 512) \ + SIZE_CLASS(16, 128, 640) \ + SIZE_CLASS(17, 128, 768) \ + SIZE_CLASS(18, 128, 896) \ + SIZE_CLASS(19, 128, 1024) \ + SIZE_CLASS(20, 256, 1280) \ + SIZE_CLASS(21, 256, 1536) \ + SIZE_CLASS(22, 256, 1792) \ + SIZE_CLASS(23, 256, 2048) \ + SIZE_CLASS(24, 512, 2560) \ + SIZE_CLASS(25, 512, 3072) \ + SIZE_CLASS(26, 512, 3584) \ + SIZE_CLASS(27, 512, 4096) \ + SIZE_CLASS(28, 1024, 5120) \ + SIZE_CLASS(29, 1024, 6144) \ + SIZE_CLASS(30, 1024, 7168) \ + SIZE_CLASS(31, 1024, 8192) \ + SIZE_CLASS(32, 2048, 10240) \ + SIZE_CLASS(33, 2048, 12288) \ + SIZE_CLASS(34, 2048, 14336) \ + SIZE_CLASS(35, 2048, 16384) \ + SIZE_CLASS(36, 4096, 20480) \ + SIZE_CLASS(37, 4096, 24576) \ + SIZE_CLASS(38, 4096, 28672) \ + SIZE_CLASS(39, 4096, 32768) \ + SIZE_CLASS(40, 8192, 40960) \ + SIZE_CLASS(41, 8192, 49152) \ + SIZE_CLASS(42, 8192, 57344) \ + +#define NBINS 43 +#define SMALL_MAXCLASS 57344 +#endif + +#ifndef SIZE_CLASSES_DEFINED +# error "No size class definitions match configuration" +#endif +#undef SIZE_CLASSES_DEFINED +/* + * The small_size2bin lookup table uses uint8_t to encode each bin index, so we + * cannot support more than 256 small size classes. Further constrain NBINS to + * 255 to support prof_promote, since all small size classes, plus a "not + * small" size class must be stored in 8 bits of arena_chunk_map_t's bits + * field. + */ +#if (NBINS > 255) +# error "Too many small size classes" +#endif + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/stats.h b/deps/jemalloc/include/jemalloc/internal/stats.h new file mode 100644 index 0000000000..27f68e3681 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/stats.h @@ -0,0 +1,173 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct tcache_bin_stats_s tcache_bin_stats_t; +typedef struct malloc_bin_stats_s malloc_bin_stats_t; +typedef struct malloc_large_stats_s malloc_large_stats_t; +typedef struct arena_stats_s arena_stats_t; +typedef struct chunk_stats_s chunk_stats_t; + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct tcache_bin_stats_s { + /* + * Number of allocation requests that corresponded to the size of this + * bin. + */ + uint64_t nrequests; +}; + +struct malloc_bin_stats_s { + /* + * Current number of bytes allocated, including objects currently + * cached by tcache. + */ + size_t allocated; + + /* + * Total number of allocation/deallocation requests served directly by + * the bin. Note that tcache may allocate an object, then recycle it + * many times, resulting many increments to nrequests, but only one + * each to nmalloc and ndalloc. + */ + uint64_t nmalloc; + uint64_t ndalloc; + + /* + * Number of allocation requests that correspond to the size of this + * bin. This includes requests served by tcache, though tcache only + * periodically merges into this counter. + */ + uint64_t nrequests; + + /* Number of tcache fills from this bin. */ + uint64_t nfills; + + /* Number of tcache flushes to this bin. */ + uint64_t nflushes; + + /* Total number of runs created for this bin's size class. */ + uint64_t nruns; + + /* + * Total number of runs reused by extracting them from the runs tree for + * this bin's size class. + */ + uint64_t reruns; + + /* Current number of runs in this bin. */ + size_t curruns; +}; + +struct malloc_large_stats_s { + /* + * Total number of allocation/deallocation requests served directly by + * the arena. Note that tcache may allocate an object, then recycle it + * many times, resulting many increments to nrequests, but only one + * each to nmalloc and ndalloc. + */ + uint64_t nmalloc; + uint64_t ndalloc; + + /* + * Number of allocation requests that correspond to this size class. + * This includes requests served by tcache, though tcache only + * periodically merges into this counter. + */ + uint64_t nrequests; + + /* Current number of runs of this size class. */ + size_t curruns; +}; + +struct arena_stats_s { + /* Number of bytes currently mapped. */ + size_t mapped; + + /* + * Total number of purge sweeps, total number of madvise calls made, + * and total pages purged in order to keep dirty unused memory under + * control. + */ + uint64_t npurge; + uint64_t nmadvise; + uint64_t purged; + + /* Per-size-category statistics. */ + size_t allocated_large; + uint64_t nmalloc_large; + uint64_t ndalloc_large; + uint64_t nrequests_large; + + /* + * One element for each possible size class, including sizes that + * overlap with bin size classes. This is necessary because ipalloc() + * sometimes has to use such large objects in order to assure proper + * alignment. + */ + malloc_large_stats_t *lstats; +}; + +struct chunk_stats_s { + /* Number of chunks that were allocated. */ + uint64_t nchunks; + + /* High-water mark for number of chunks allocated. */ + size_t highchunks; + + /* + * Current number of chunks allocated. This value isn't maintained for + * any other purpose, so keep track of it in order to be able to set + * highchunks. + */ + size_t curchunks; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern bool opt_stats_print; + +extern size_t stats_cactive; + +void stats_print(void (*write)(void *, const char *), void *cbopaque, + const char *opts); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +size_t stats_cactive_get(void); +void stats_cactive_add(size_t size); +void stats_cactive_sub(size_t size); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_STATS_C_)) +JEMALLOC_INLINE size_t +stats_cactive_get(void) +{ + + return (atomic_read_z(&stats_cactive)); +} + +JEMALLOC_INLINE void +stats_cactive_add(size_t size) +{ + + atomic_add_z(&stats_cactive, size); +} + +JEMALLOC_INLINE void +stats_cactive_sub(size_t size) +{ + + atomic_sub_z(&stats_cactive, size); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/tcache.h b/deps/jemalloc/include/jemalloc/internal/tcache.h new file mode 100644 index 0000000000..c3d4b58d4d --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/tcache.h @@ -0,0 +1,443 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct tcache_bin_info_s tcache_bin_info_t; +typedef struct tcache_bin_s tcache_bin_t; +typedef struct tcache_s tcache_t; + +/* + * tcache pointers close to NULL are used to encode state information that is + * used for two purposes: preventing thread caching on a per thread basis and + * cleaning up during thread shutdown. + */ +#define TCACHE_STATE_DISABLED ((tcache_t *)(uintptr_t)1) +#define TCACHE_STATE_REINCARNATED ((tcache_t *)(uintptr_t)2) +#define TCACHE_STATE_PURGATORY ((tcache_t *)(uintptr_t)3) +#define TCACHE_STATE_MAX TCACHE_STATE_PURGATORY + +/* + * Absolute maximum number of cache slots for each small bin in the thread + * cache. This is an additional constraint beyond that imposed as: twice the + * number of regions per run for this size class. + * + * This constant must be an even number. + */ +#define TCACHE_NSLOTS_SMALL_MAX 200 + +/* Number of cache slots for large size classes. */ +#define TCACHE_NSLOTS_LARGE 20 + +/* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */ +#define LG_TCACHE_MAXCLASS_DEFAULT 15 + +/* + * TCACHE_GC_SWEEP is the approximate number of allocation events between + * full GC sweeps. Integer rounding may cause the actual number to be + * slightly higher, since GC is performed incrementally. + */ +#define TCACHE_GC_SWEEP 8192 + +/* Number of tcache allocation/deallocation events between incremental GCs. */ +#define TCACHE_GC_INCR \ + ((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1)) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +typedef enum { + tcache_enabled_false = 0, /* Enable cast to/from bool. */ + tcache_enabled_true = 1, + tcache_enabled_default = 2 +} tcache_enabled_t; + +/* + * Read-only information associated with each element of tcache_t's tbins array + * is stored separately, mainly to reduce memory usage. + */ +struct tcache_bin_info_s { + unsigned ncached_max; /* Upper limit on ncached. */ +}; + +struct tcache_bin_s { + tcache_bin_stats_t tstats; + int low_water; /* Min # cached since last GC. */ + unsigned lg_fill_div; /* Fill (ncached_max >> lg_fill_div). */ + unsigned ncached; /* # of cached objects. */ + void **avail; /* Stack of available objects. */ +}; + +struct tcache_s { + ql_elm(tcache_t) link; /* Used for aggregating stats. */ + uint64_t prof_accumbytes;/* Cleared after arena_prof_accum() */ + arena_t *arena; /* This thread's arena. */ + unsigned ev_cnt; /* Event count since incremental GC. */ + unsigned next_gc_bin; /* Next bin to GC. */ + tcache_bin_t tbins[1]; /* Dynamically sized. */ + /* + * The pointer stacks associated with tbins follow as a contiguous + * array. During tcache initialization, the avail pointer in each + * element of tbins is initialized to point to the proper offset within + * this array. + */ +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern bool opt_tcache; +extern ssize_t opt_lg_tcache_max; + +extern tcache_bin_info_t *tcache_bin_info; + +/* + * Number of tcache bins. There are NBINS small-object bins, plus 0 or more + * large-object bins. + */ +extern size_t nhbins; + +/* Maximum cached size class. */ +extern size_t tcache_maxclass; + +size_t tcache_salloc(const void *ptr); +void tcache_event_hard(tcache_t *tcache); +void *tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, + size_t binind); +void tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, + tcache_t *tcache); +void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, + tcache_t *tcache); +void tcache_arena_associate(tcache_t *tcache, arena_t *arena); +void tcache_arena_dissociate(tcache_t *tcache); +tcache_t *tcache_create(arena_t *arena); +void tcache_destroy(tcache_t *tcache); +void tcache_thread_cleanup(void *arg); +void tcache_stats_merge(tcache_t *tcache, arena_t *arena); +bool tcache_boot0(void); +bool tcache_boot1(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache, tcache_t *) +malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache_enabled, tcache_enabled_t) + +void tcache_event(tcache_t *tcache); +void tcache_flush(void); +bool tcache_enabled_get(void); +tcache_t *tcache_get(bool create); +void tcache_enabled_set(bool enabled); +void *tcache_alloc_easy(tcache_bin_t *tbin); +void *tcache_alloc_small(tcache_t *tcache, size_t size, bool zero); +void *tcache_alloc_large(tcache_t *tcache, size_t size, bool zero); +void tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind); +void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_)) +/* Map of thread-specific caches. */ +malloc_tsd_externs(tcache, tcache_t *) +malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, tcache, tcache_t *, NULL, + tcache_thread_cleanup) +/* Per thread flag that allows thread caches to be disabled. */ +malloc_tsd_externs(tcache_enabled, tcache_enabled_t) +malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, tcache_enabled, tcache_enabled_t, + tcache_enabled_default, malloc_tsd_no_cleanup) + +JEMALLOC_INLINE void +tcache_flush(void) +{ + tcache_t *tcache; + + cassert(config_tcache); + + tcache = *tcache_tsd_get(); + if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX) + return; + tcache_destroy(tcache); + tcache = NULL; + tcache_tsd_set(&tcache); +} + +JEMALLOC_INLINE bool +tcache_enabled_get(void) +{ + tcache_enabled_t tcache_enabled; + + cassert(config_tcache); + + tcache_enabled = *tcache_enabled_tsd_get(); + if (tcache_enabled == tcache_enabled_default) { + tcache_enabled = (tcache_enabled_t)opt_tcache; + tcache_enabled_tsd_set(&tcache_enabled); + } + + return ((bool)tcache_enabled); +} + +JEMALLOC_INLINE void +tcache_enabled_set(bool enabled) +{ + tcache_enabled_t tcache_enabled; + tcache_t *tcache; + + cassert(config_tcache); + + tcache_enabled = (tcache_enabled_t)enabled; + tcache_enabled_tsd_set(&tcache_enabled); + tcache = *tcache_tsd_get(); + if (enabled) { + if (tcache == TCACHE_STATE_DISABLED) { + tcache = NULL; + tcache_tsd_set(&tcache); + } + } else /* disabled */ { + if (tcache > TCACHE_STATE_MAX) { + tcache_destroy(tcache); + tcache = NULL; + } + if (tcache == NULL) { + tcache = TCACHE_STATE_DISABLED; + tcache_tsd_set(&tcache); + } + } +} + +JEMALLOC_ALWAYS_INLINE tcache_t * +tcache_get(bool create) +{ + tcache_t *tcache; + + if (config_tcache == false) + return (NULL); + if (config_lazy_lock && isthreaded == false) + return (NULL); + + tcache = *tcache_tsd_get(); + if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX) { + if (tcache == TCACHE_STATE_DISABLED) + return (NULL); + if (tcache == NULL) { + if (create == false) { + /* + * Creating a tcache here would cause + * allocation as a side effect of free(). + * Ordinarily that would be okay since + * tcache_create() failure is a soft failure + * that doesn't propagate. However, if TLS + * data are freed via free() as in glibc, + * subtle corruption could result from setting + * a TLS variable after its backing memory is + * freed. + */ + return (NULL); + } + if (tcache_enabled_get() == false) { + tcache_enabled_set(false); /* Memoize. */ + return (NULL); + } + return (tcache_create(choose_arena(NULL))); + } + if (tcache == TCACHE_STATE_PURGATORY) { + /* + * Make a note that an allocator function was called + * after tcache_thread_cleanup() was called. + */ + tcache = TCACHE_STATE_REINCARNATED; + tcache_tsd_set(&tcache); + return (NULL); + } + if (tcache == TCACHE_STATE_REINCARNATED) + return (NULL); + not_reached(); + } + + return (tcache); +} + +JEMALLOC_ALWAYS_INLINE void +tcache_event(tcache_t *tcache) +{ + + if (TCACHE_GC_INCR == 0) + return; + + tcache->ev_cnt++; + assert(tcache->ev_cnt <= TCACHE_GC_INCR); + if (tcache->ev_cnt == TCACHE_GC_INCR) + tcache_event_hard(tcache); +} + +JEMALLOC_ALWAYS_INLINE void * +tcache_alloc_easy(tcache_bin_t *tbin) +{ + void *ret; + + if (tbin->ncached == 0) { + tbin->low_water = -1; + return (NULL); + } + tbin->ncached--; + if ((int)tbin->ncached < tbin->low_water) + tbin->low_water = tbin->ncached; + ret = tbin->avail[tbin->ncached]; + return (ret); +} + +JEMALLOC_ALWAYS_INLINE void * +tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) +{ + void *ret; + size_t binind; + tcache_bin_t *tbin; + + binind = SMALL_SIZE2BIN(size); + assert(binind < NBINS); + tbin = &tcache->tbins[binind]; + size = arena_bin_info[binind].reg_size; + ret = tcache_alloc_easy(tbin); + if (ret == NULL) { + ret = tcache_alloc_small_hard(tcache, tbin, binind); + if (ret == NULL) + return (NULL); + } + assert(tcache_salloc(ret) == arena_bin_info[binind].reg_size); + + if (zero == false) { + if (config_fill) { + if (opt_junk) { + arena_alloc_junk_small(ret, + &arena_bin_info[binind], false); + } else if (opt_zero) + memset(ret, 0, size); + } + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + } else { + if (config_fill && opt_junk) { + arena_alloc_junk_small(ret, &arena_bin_info[binind], + true); + } + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + memset(ret, 0, size); + } + + if (config_stats) + tbin->tstats.nrequests++; + if (config_prof) + tcache->prof_accumbytes += arena_bin_info[binind].reg_size; + tcache_event(tcache); + return (ret); +} + +JEMALLOC_ALWAYS_INLINE void * +tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) +{ + void *ret; + size_t binind; + tcache_bin_t *tbin; + + size = PAGE_CEILING(size); + assert(size <= tcache_maxclass); + binind = NBINS + (size >> LG_PAGE) - 1; + assert(binind < nhbins); + tbin = &tcache->tbins[binind]; + ret = tcache_alloc_easy(tbin); + if (ret == NULL) { + /* + * Only allocate one large object at a time, because it's quite + * expensive to create one and not use it. + */ + ret = arena_malloc_large(tcache->arena, size, zero); + if (ret == NULL) + return (NULL); + } else { + if (config_prof && prof_promote && size == PAGE) { + arena_chunk_t *chunk = + (arena_chunk_t *)CHUNK_ADDR2BASE(ret); + size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >> + LG_PAGE); + arena_mapbits_large_binind_set(chunk, pageind, + BININD_INVALID); + } + if (zero == false) { + if (config_fill) { + if (opt_junk) + memset(ret, 0xa5, size); + else if (opt_zero) + memset(ret, 0, size); + } + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + } else { + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + memset(ret, 0, size); + } + + if (config_stats) + tbin->tstats.nrequests++; + if (config_prof) + tcache->prof_accumbytes += size; + } + + tcache_event(tcache); + return (ret); +} + +JEMALLOC_ALWAYS_INLINE void +tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind) +{ + tcache_bin_t *tbin; + tcache_bin_info_t *tbin_info; + + assert(tcache_salloc(ptr) <= SMALL_MAXCLASS); + + if (config_fill && opt_junk) + arena_dalloc_junk_small(ptr, &arena_bin_info[binind]); + + tbin = &tcache->tbins[binind]; + tbin_info = &tcache_bin_info[binind]; + if (tbin->ncached == tbin_info->ncached_max) { + tcache_bin_flush_small(tbin, binind, (tbin_info->ncached_max >> + 1), tcache); + } + assert(tbin->ncached < tbin_info->ncached_max); + tbin->avail[tbin->ncached] = ptr; + tbin->ncached++; + + tcache_event(tcache); +} + +JEMALLOC_ALWAYS_INLINE void +tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) +{ + size_t binind; + tcache_bin_t *tbin; + tcache_bin_info_t *tbin_info; + + assert((size & PAGE_MASK) == 0); + assert(tcache_salloc(ptr) > SMALL_MAXCLASS); + assert(tcache_salloc(ptr) <= tcache_maxclass); + + binind = NBINS + (size >> LG_PAGE) - 1; + + if (config_fill && opt_junk) + memset(ptr, 0x5a, size); + + tbin = &tcache->tbins[binind]; + tbin_info = &tcache_bin_info[binind]; + if (tbin->ncached == tbin_info->ncached_max) { + tcache_bin_flush_large(tbin, binind, (tbin_info->ncached_max >> + 1), tcache); + } + assert(tbin->ncached < tbin_info->ncached_max); + tbin->avail[tbin->ncached] = ptr; + tbin->ncached++; + + tcache_event(tcache); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/tsd.h b/deps/jemalloc/include/jemalloc/internal/tsd.h new file mode 100644 index 0000000000..9fb4a23ec6 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/tsd.h @@ -0,0 +1,434 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* Maximum number of malloc_tsd users with cleanup functions. */ +#define MALLOC_TSD_CLEANUPS_MAX 8 + +typedef bool (*malloc_tsd_cleanup_t)(void); + +#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ + !defined(_WIN32)) +typedef struct tsd_init_block_s tsd_init_block_t; +typedef struct tsd_init_head_s tsd_init_head_t; +#endif + +/* + * TLS/TSD-agnostic macro-based implementation of thread-specific data. There + * are four macros that support (at least) three use cases: file-private, + * library-private, and library-private inlined. Following is an example + * library-private tsd variable: + * + * In example.h: + * typedef struct { + * int x; + * int y; + * } example_t; + * #define EX_INITIALIZER JEMALLOC_CONCAT({0, 0}) + * malloc_tsd_protos(, example, example_t *) + * malloc_tsd_externs(example, example_t *) + * In example.c: + * malloc_tsd_data(, example, example_t *, EX_INITIALIZER) + * malloc_tsd_funcs(, example, example_t *, EX_INITIALIZER, + * example_tsd_cleanup) + * + * The result is a set of generated functions, e.g.: + * + * bool example_tsd_boot(void) {...} + * example_t **example_tsd_get() {...} + * void example_tsd_set(example_t **val) {...} + * + * Note that all of the functions deal in terms of (a_type *) rather than + * (a_type) so that it is possible to support non-pointer types (unlike + * pthreads TSD). example_tsd_cleanup() is passed an (a_type *) pointer that is + * cast to (void *). This means that the cleanup function needs to cast *and* + * dereference the function argument, e.g.: + * + * void + * example_tsd_cleanup(void *arg) + * { + * example_t *example = *(example_t **)arg; + * + * [...] + * if ([want the cleanup function to be called again]) { + * example_tsd_set(&example); + * } + * } + * + * If example_tsd_set() is called within example_tsd_cleanup(), it will be + * called again. This is similar to how pthreads TSD destruction works, except + * that pthreads only calls the cleanup function again if the value was set to + * non-NULL. + */ + +/* malloc_tsd_protos(). */ +#define malloc_tsd_protos(a_attr, a_name, a_type) \ +a_attr bool \ +a_name##_tsd_boot(void); \ +a_attr a_type * \ +a_name##_tsd_get(void); \ +a_attr void \ +a_name##_tsd_set(a_type *val); + +/* malloc_tsd_externs(). */ +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#define malloc_tsd_externs(a_name, a_type) \ +extern __thread a_type a_name##_tls; \ +extern __thread bool a_name##_initialized; \ +extern bool a_name##_booted; +#elif (defined(JEMALLOC_TLS)) +#define malloc_tsd_externs(a_name, a_type) \ +extern __thread a_type a_name##_tls; \ +extern pthread_key_t a_name##_tsd; \ +extern bool a_name##_booted; +#elif (defined(_WIN32)) +#define malloc_tsd_externs(a_name, a_type) \ +extern DWORD a_name##_tsd; \ +extern bool a_name##_booted; +#else +#define malloc_tsd_externs(a_name, a_type) \ +extern pthread_key_t a_name##_tsd; \ +extern tsd_init_head_t a_name##_tsd_init_head; \ +extern bool a_name##_booted; +#endif + +/* malloc_tsd_data(). */ +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ +a_attr __thread a_type JEMALLOC_TLS_MODEL \ + a_name##_tls = a_initializer; \ +a_attr __thread bool JEMALLOC_TLS_MODEL \ + a_name##_initialized = false; \ +a_attr bool a_name##_booted = false; +#elif (defined(JEMALLOC_TLS)) +#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ +a_attr __thread a_type JEMALLOC_TLS_MODEL \ + a_name##_tls = a_initializer; \ +a_attr pthread_key_t a_name##_tsd; \ +a_attr bool a_name##_booted = false; +#elif (defined(_WIN32)) +#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ +a_attr DWORD a_name##_tsd; \ +a_attr bool a_name##_booted = false; +#else +#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ +a_attr pthread_key_t a_name##_tsd; \ +a_attr tsd_init_head_t a_name##_tsd_init_head = { \ + ql_head_initializer(blocks), \ + MALLOC_MUTEX_INITIALIZER \ +}; \ +a_attr bool a_name##_booted = false; +#endif + +/* malloc_tsd_funcs(). */ +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ + a_cleanup) \ +/* Initialization/cleanup. */ \ +a_attr bool \ +a_name##_tsd_cleanup_wrapper(void) \ +{ \ + \ + if (a_name##_initialized) { \ + a_name##_initialized = false; \ + a_cleanup(&a_name##_tls); \ + } \ + return (a_name##_initialized); \ +} \ +a_attr bool \ +a_name##_tsd_boot(void) \ +{ \ + \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + malloc_tsd_cleanup_register( \ + &a_name##_tsd_cleanup_wrapper); \ + } \ + a_name##_booted = true; \ + return (false); \ +} \ +/* Get/set. */ \ +a_attr a_type * \ +a_name##_tsd_get(void) \ +{ \ + \ + assert(a_name##_booted); \ + return (&a_name##_tls); \ +} \ +a_attr void \ +a_name##_tsd_set(a_type *val) \ +{ \ + \ + assert(a_name##_booted); \ + a_name##_tls = (*val); \ + if (a_cleanup != malloc_tsd_no_cleanup) \ + a_name##_initialized = true; \ +} +#elif (defined(JEMALLOC_TLS)) +#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ + a_cleanup) \ +/* Initialization/cleanup. */ \ +a_attr bool \ +a_name##_tsd_boot(void) \ +{ \ + \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + if (pthread_key_create(&a_name##_tsd, a_cleanup) != 0) \ + return (true); \ + } \ + a_name##_booted = true; \ + return (false); \ +} \ +/* Get/set. */ \ +a_attr a_type * \ +a_name##_tsd_get(void) \ +{ \ + \ + assert(a_name##_booted); \ + return (&a_name##_tls); \ +} \ +a_attr void \ +a_name##_tsd_set(a_type *val) \ +{ \ + \ + assert(a_name##_booted); \ + a_name##_tls = (*val); \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + if (pthread_setspecific(a_name##_tsd, \ + (void *)(&a_name##_tls))) { \ + malloc_write("<jemalloc>: Error" \ + " setting TSD for "#a_name"\n"); \ + if (opt_abort) \ + abort(); \ + } \ + } \ +} +#elif (defined(_WIN32)) +#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ + a_cleanup) \ +/* Data structure. */ \ +typedef struct { \ + bool initialized; \ + a_type val; \ +} a_name##_tsd_wrapper_t; \ +/* Initialization/cleanup. */ \ +a_attr bool \ +a_name##_tsd_cleanup_wrapper(void) \ +{ \ + a_name##_tsd_wrapper_t *wrapper; \ + \ + wrapper = (a_name##_tsd_wrapper_t *) TlsGetValue(a_name##_tsd); \ + if (wrapper == NULL) \ + return (false); \ + if (a_cleanup != malloc_tsd_no_cleanup && \ + wrapper->initialized) { \ + a_type val = wrapper->val; \ + a_type tsd_static_data = a_initializer; \ + wrapper->initialized = false; \ + wrapper->val = tsd_static_data; \ + a_cleanup(&val); \ + if (wrapper->initialized) { \ + /* Trigger another cleanup round. */ \ + return (true); \ + } \ + } \ + malloc_tsd_dalloc(wrapper); \ + return (false); \ +} \ +a_attr bool \ +a_name##_tsd_boot(void) \ +{ \ + \ + a_name##_tsd = TlsAlloc(); \ + if (a_name##_tsd == TLS_OUT_OF_INDEXES) \ + return (true); \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + malloc_tsd_cleanup_register( \ + &a_name##_tsd_cleanup_wrapper); \ + } \ + a_name##_booted = true; \ + return (false); \ +} \ +/* Get/set. */ \ +a_attr a_name##_tsd_wrapper_t * \ +a_name##_tsd_get_wrapper(void) \ +{ \ + a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *) \ + TlsGetValue(a_name##_tsd); \ + \ + if (wrapper == NULL) { \ + wrapper = (a_name##_tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t)); \ + if (wrapper == NULL) { \ + malloc_write("<jemalloc>: Error allocating" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } else { \ + static a_type tsd_static_data = a_initializer; \ + wrapper->initialized = false; \ + wrapper->val = tsd_static_data; \ + } \ + if (!TlsSetValue(a_name##_tsd, (void *)wrapper)) { \ + malloc_write("<jemalloc>: Error setting" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } \ + } \ + return (wrapper); \ +} \ +a_attr a_type * \ +a_name##_tsd_get(void) \ +{ \ + a_name##_tsd_wrapper_t *wrapper; \ + \ + assert(a_name##_booted); \ + wrapper = a_name##_tsd_get_wrapper(); \ + return (&wrapper->val); \ +} \ +a_attr void \ +a_name##_tsd_set(a_type *val) \ +{ \ + a_name##_tsd_wrapper_t *wrapper; \ + \ + assert(a_name##_booted); \ + wrapper = a_name##_tsd_get_wrapper(); \ + wrapper->val = *(val); \ + if (a_cleanup != malloc_tsd_no_cleanup) \ + wrapper->initialized = true; \ +} +#else +#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ + a_cleanup) \ +/* Data structure. */ \ +typedef struct { \ + bool initialized; \ + a_type val; \ +} a_name##_tsd_wrapper_t; \ +/* Initialization/cleanup. */ \ +a_attr void \ +a_name##_tsd_cleanup_wrapper(void *arg) \ +{ \ + a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *)arg;\ + \ + if (a_cleanup != malloc_tsd_no_cleanup && \ + wrapper->initialized) { \ + wrapper->initialized = false; \ + a_cleanup(&wrapper->val); \ + if (wrapper->initialized) { \ + /* Trigger another cleanup round. */ \ + if (pthread_setspecific(a_name##_tsd, \ + (void *)wrapper)) { \ + malloc_write("<jemalloc>: Error" \ + " setting TSD for "#a_name"\n"); \ + if (opt_abort) \ + abort(); \ + } \ + return; \ + } \ + } \ + malloc_tsd_dalloc(wrapper); \ +} \ +a_attr bool \ +a_name##_tsd_boot(void) \ +{ \ + \ + if (pthread_key_create(&a_name##_tsd, \ + a_name##_tsd_cleanup_wrapper) != 0) \ + return (true); \ + a_name##_booted = true; \ + return (false); \ +} \ +/* Get/set. */ \ +a_attr a_name##_tsd_wrapper_t * \ +a_name##_tsd_get_wrapper(void) \ +{ \ + a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *) \ + pthread_getspecific(a_name##_tsd); \ + \ + if (wrapper == NULL) { \ + tsd_init_block_t block; \ + wrapper = tsd_init_check_recursion( \ + &a_name##_tsd_init_head, &block); \ + if (wrapper) \ + return (wrapper); \ + wrapper = (a_name##_tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t)); \ + block.data = wrapper; \ + if (wrapper == NULL) { \ + malloc_write("<jemalloc>: Error allocating" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } else { \ + static a_type tsd_static_data = a_initializer; \ + wrapper->initialized = false; \ + wrapper->val = tsd_static_data; \ + } \ + if (pthread_setspecific(a_name##_tsd, \ + (void *)wrapper)) { \ + malloc_write("<jemalloc>: Error setting" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } \ + tsd_init_finish(&a_name##_tsd_init_head, &block); \ + } \ + return (wrapper); \ +} \ +a_attr a_type * \ +a_name##_tsd_get(void) \ +{ \ + a_name##_tsd_wrapper_t *wrapper; \ + \ + assert(a_name##_booted); \ + wrapper = a_name##_tsd_get_wrapper(); \ + return (&wrapper->val); \ +} \ +a_attr void \ +a_name##_tsd_set(a_type *val) \ +{ \ + a_name##_tsd_wrapper_t *wrapper; \ + \ + assert(a_name##_booted); \ + wrapper = a_name##_tsd_get_wrapper(); \ + wrapper->val = *(val); \ + if (a_cleanup != malloc_tsd_no_cleanup) \ + wrapper->initialized = true; \ +} +#endif + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ + !defined(_WIN32)) +struct tsd_init_block_s { + ql_elm(tsd_init_block_t) link; + pthread_t thread; + void *data; +}; +struct tsd_init_head_s { + ql_head(tsd_init_block_t) blocks; + malloc_mutex_t lock; +}; +#endif + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void *malloc_tsd_malloc(size_t size); +void malloc_tsd_dalloc(void *wrapper); +void malloc_tsd_no_cleanup(void *); +void malloc_tsd_cleanup_register(bool (*f)(void)); +void malloc_tsd_boot(void); +#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ + !defined(_WIN32)) +void *tsd_init_check_recursion(tsd_init_head_t *head, + tsd_init_block_t *block); +void tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block); +#endif + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/util.h b/deps/jemalloc/include/jemalloc/internal/util.h new file mode 100644 index 0000000000..6b938f7468 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/util.h @@ -0,0 +1,162 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* Size of stack-allocated buffer passed to buferror(). */ +#define BUFERROR_BUF 64 + +/* + * Size of stack-allocated buffer used by malloc_{,v,vc}printf(). This must be + * large enough for all possible uses within jemalloc. + */ +#define MALLOC_PRINTF_BUFSIZE 4096 + +/* + * Wrap a cpp argument that contains commas such that it isn't broken up into + * multiple arguments. + */ +#define JEMALLOC_ARG_CONCAT(...) __VA_ARGS__ + +/* + * Silence compiler warnings due to uninitialized values. This is used + * wherever the compiler fails to recognize that the variable is never used + * uninitialized. + */ +#ifdef JEMALLOC_CC_SILENCE +# define JEMALLOC_CC_SILENCE_INIT(v) = v +#else +# define JEMALLOC_CC_SILENCE_INIT(v) +#endif + +/* + * Define a custom assert() in order to reduce the chances of deadlock during + * assertion failure. + */ +#ifndef assert +#define assert(e) do { \ + if (config_debug && !(e)) { \ + malloc_printf( \ + "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n", \ + __FILE__, __LINE__, #e); \ + abort(); \ + } \ +} while (0) +#endif + +#ifndef not_reached +#define not_reached() do { \ + if (config_debug) { \ + malloc_printf( \ + "<jemalloc>: %s:%d: Unreachable code reached\n", \ + __FILE__, __LINE__); \ + abort(); \ + } \ +} while (0) +#endif + +#ifndef not_implemented +#define not_implemented() do { \ + if (config_debug) { \ + malloc_printf("<jemalloc>: %s:%d: Not implemented\n", \ + __FILE__, __LINE__); \ + abort(); \ + } \ +} while (0) +#endif + +#ifndef assert_not_implemented +#define assert_not_implemented(e) do { \ + if (config_debug && !(e)) \ + not_implemented(); \ +} while (0) +#endif + +/* Use to assert a particular configuration, e.g., cassert(config_debug). */ +#define cassert(c) do { \ + if ((c) == false) \ + not_reached(); \ +} while (0) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +int buferror(int err, char *buf, size_t buflen); +uintmax_t malloc_strtoumax(const char *restrict nptr, + char **restrict endptr, int base); +void malloc_write(const char *s); + +/* + * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating + * point math. + */ +int malloc_vsnprintf(char *str, size_t size, const char *format, + va_list ap); +int malloc_snprintf(char *str, size_t size, const char *format, ...) + JEMALLOC_ATTR(format(printf, 3, 4)); +void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, + const char *format, va_list ap); +void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque, + const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4)); +void malloc_printf(const char *format, ...) + JEMALLOC_ATTR(format(printf, 1, 2)); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +size_t pow2_ceil(size_t x); +void set_errno(int errnum); +int get_errno(void); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_)) +/* Compute the smallest power of 2 that is >= x. */ +JEMALLOC_INLINE size_t +pow2_ceil(size_t x) +{ + + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; +#if (LG_SIZEOF_PTR == 3) + x |= x >> 32; +#endif + x++; + return (x); +} + +/* Sets error code */ +JEMALLOC_INLINE void +set_errno(int errnum) +{ + +#ifdef _WIN32 + SetLastError(errnum); +#else + errno = errnum; +#endif +} + +/* Get last error code */ +JEMALLOC_INLINE int +get_errno(void) +{ + +#ifdef _WIN32 + return (GetLastError()); +#else + return (errno); +#endif +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/jemalloc.h b/deps/jemalloc/include/jemalloc/jemalloc.h new file mode 100644 index 0000000000..b8ea851e52 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/jemalloc.h @@ -0,0 +1,182 @@ +#ifndef JEMALLOC_H_ +#define JEMALLOC_H_ +#ifdef __cplusplus +extern "C" { +#endif + +#include <limits.h> +#include <strings.h> + +#define JEMALLOC_VERSION "3.6.0-0-g46c0af68bd248b04df75e4f92d5fb804c3d75340" +#define JEMALLOC_VERSION_MAJOR 3 +#define JEMALLOC_VERSION_MINOR 6 +#define JEMALLOC_VERSION_BUGFIX 0 +#define JEMALLOC_VERSION_NREV 0 +#define JEMALLOC_VERSION_GID "46c0af68bd248b04df75e4f92d5fb804c3d75340" + +# define MALLOCX_LG_ALIGN(la) (la) +# if LG_SIZEOF_PTR == 2 +# define MALLOCX_ALIGN(a) (ffs(a)-1) +# else +# define MALLOCX_ALIGN(a) \ + ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31) +# endif +# define MALLOCX_ZERO ((int)0x40) +/* Bias arena index bits so that 0 encodes "MALLOCX_ARENA() unspecified". */ +# define MALLOCX_ARENA(a) ((int)(((a)+1) << 8)) + +#ifdef JEMALLOC_EXPERIMENTAL +# define ALLOCM_LG_ALIGN(la) (la) +# if LG_SIZEOF_PTR == 2 +# define ALLOCM_ALIGN(a) (ffs(a)-1) +# else +# define ALLOCM_ALIGN(a) \ + ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31) +# endif +# define ALLOCM_ZERO ((int)0x40) +# define ALLOCM_NO_MOVE ((int)0x80) +/* Bias arena index bits so that 0 encodes "ALLOCM_ARENA() unspecified". */ +# define ALLOCM_ARENA(a) ((int)(((a)+1) << 8)) +# define ALLOCM_SUCCESS 0 +# define ALLOCM_ERR_OOM 1 +# define ALLOCM_ERR_NOT_MOVED 2 +#endif + +/* + * The je_ prefix on the following public symbol declarations is an artifact + * of namespace management, and should be omitted in application code unless + * JEMALLOC_NO_DEMANGLE is defined (see jemalloc_mangle.h). + */ +extern JEMALLOC_EXPORT const char *je_malloc_conf; +extern JEMALLOC_EXPORT void (*je_malloc_message)(void *cbopaque, + const char *s); + +JEMALLOC_EXPORT void *je_malloc(size_t size) JEMALLOC_ATTR(malloc); +JEMALLOC_EXPORT void *je_calloc(size_t num, size_t size) + JEMALLOC_ATTR(malloc); +JEMALLOC_EXPORT int je_posix_memalign(void **memptr, size_t alignment, + size_t size) JEMALLOC_ATTR(nonnull(1)); +JEMALLOC_EXPORT void *je_aligned_alloc(size_t alignment, size_t size) + JEMALLOC_ATTR(malloc); +JEMALLOC_EXPORT void *je_realloc(void *ptr, size_t size); +JEMALLOC_EXPORT void je_free(void *ptr); + +JEMALLOC_EXPORT void *je_mallocx(size_t size, int flags); +JEMALLOC_EXPORT void *je_rallocx(void *ptr, size_t size, int flags); +JEMALLOC_EXPORT size_t je_xallocx(void *ptr, size_t size, size_t extra, + int flags); +JEMALLOC_EXPORT size_t je_sallocx(const void *ptr, int flags); +JEMALLOC_EXPORT void je_dallocx(void *ptr, int flags); +JEMALLOC_EXPORT size_t je_nallocx(size_t size, int flags); + +JEMALLOC_EXPORT int je_mallctl(const char *name, void *oldp, + size_t *oldlenp, void *newp, size_t newlen); +JEMALLOC_EXPORT int je_mallctlnametomib(const char *name, size_t *mibp, + size_t *miblenp); +JEMALLOC_EXPORT int je_mallctlbymib(const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen); +JEMALLOC_EXPORT void je_malloc_stats_print(void (*write_cb)(void *, + const char *), void *je_cbopaque, const char *opts); +JEMALLOC_EXPORT size_t je_malloc_usable_size( + JEMALLOC_USABLE_SIZE_CONST void *ptr); + +#ifdef JEMALLOC_OVERRIDE_MEMALIGN +JEMALLOC_EXPORT void * je_memalign(size_t alignment, size_t size) + JEMALLOC_ATTR(malloc); +#endif + +#ifdef JEMALLOC_OVERRIDE_VALLOC +JEMALLOC_EXPORT void * je_valloc(size_t size) JEMALLOC_ATTR(malloc); +#endif + +#ifdef JEMALLOC_EXPERIMENTAL +JEMALLOC_EXPORT int je_allocm(void **ptr, size_t *rsize, size_t size, + int flags) JEMALLOC_ATTR(nonnull(1)); +JEMALLOC_EXPORT int je_rallocm(void **ptr, size_t *rsize, size_t size, + size_t extra, int flags) JEMALLOC_ATTR(nonnull(1)); +JEMALLOC_EXPORT int je_sallocm(const void *ptr, size_t *rsize, int flags) + JEMALLOC_ATTR(nonnull(1)); +JEMALLOC_EXPORT int je_dallocm(void *ptr, int flags) + JEMALLOC_ATTR(nonnull(1)); +JEMALLOC_EXPORT int je_nallocm(size_t *rsize, size_t size, int flags); +#endif + +/* + * By default application code must explicitly refer to mangled symbol names, + * so that it is possible to use jemalloc in conjunction with another allocator + * in the same application. Define JEMALLOC_MANGLE in order to cause automatic + * name mangling that matches the API prefixing that happened as a result of + * --with-mangling and/or --with-jemalloc-prefix configuration settings. + */ +#ifdef JEMALLOC_MANGLE +# ifndef JEMALLOC_NO_DEMANGLE +# define JEMALLOC_NO_DEMANGLE +# endif +# define malloc_conf je_malloc_conf +# define malloc_message je_malloc_message +# define malloc je_malloc +# define calloc je_calloc +# define posix_memalign je_posix_memalign +# define aligned_alloc je_aligned_alloc +# define realloc je_realloc +# define free je_free +# define mallocx je_mallocx +# define rallocx je_rallocx +# define xallocx je_xallocx +# define sallocx je_sallocx +# define dallocx je_dallocx +# define nallocx je_nallocx +# define mallctl je_mallctl +# define mallctlnametomib je_mallctlnametomib +# define mallctlbymib je_mallctlbymib +# define malloc_stats_print je_malloc_stats_print +# define malloc_usable_size je_malloc_usable_size +# define memalign je_memalign +# define valloc je_valloc +# define allocm je_allocm +# define dallocm je_dallocm +# define nallocm je_nallocm +# define rallocm je_rallocm +# define sallocm je_sallocm +#endif + +/* + * The je_* macros can be used as stable alternative names for the + * public jemalloc API if JEMALLOC_NO_DEMANGLE is defined. This is primarily + * meant for use in jemalloc itself, but it can be used by application code to + * provide isolation from the name mangling specified via --with-mangling + * and/or --with-jemalloc-prefix. + */ +#ifndef JEMALLOC_NO_DEMANGLE +# undef je_malloc_conf +# undef je_malloc_message +# undef je_malloc +# undef je_calloc +# undef je_posix_memalign +# undef je_aligned_alloc +# undef je_realloc +# undef je_free +# undef je_mallocx +# undef je_rallocx +# undef je_xallocx +# undef je_sallocx +# undef je_dallocx +# undef je_nallocx +# undef je_mallctl +# undef je_mallctlnametomib +# undef je_mallctlbymib +# undef je_malloc_stats_print +# undef je_malloc_usable_size +# undef je_memalign +# undef je_valloc +# undef je_allocm +# undef je_dallocm +# undef je_nallocm +# undef je_rallocm +# undef je_sallocm +#endif + +#ifdef __cplusplus +}; +#endif +#endif /* JEMALLOC_H_ */ diff --git a/deps/jemalloc/include/msvc_compat/inttypes.h b/deps/jemalloc/include/msvc_compat/inttypes.h new file mode 100644 index 0000000000..a4e6b75cb9 --- /dev/null +++ b/deps/jemalloc/include/msvc_compat/inttypes.h @@ -0,0 +1,313 @@ +// ISO C9x compliant inttypes.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. The name of the author may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_INTTYPES_H_ // [ +#define _MSC_INTTYPES_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include "stdint.h" + +// 7.8 Format conversion of integer types + +typedef struct { + intmax_t quot; + intmax_t rem; +} imaxdiv_t; + +// 7.8.1 Macros for format specifiers + +#if !defined(__cplusplus) || defined(__STDC_FORMAT_MACROS) // [ See footnote 185 at page 198 + +#ifdef _WIN64 +# define __PRI64_PREFIX "l" +# define __PRIPTR_PREFIX "l" +#else +# define __PRI64_PREFIX "ll" +# define __PRIPTR_PREFIX +#endif + +// The fprintf macros for signed integers are: +#define PRId8 "d" +#define PRIi8 "i" +#define PRIdLEAST8 "d" +#define PRIiLEAST8 "i" +#define PRIdFAST8 "d" +#define PRIiFAST8 "i" + +#define PRId16 "hd" +#define PRIi16 "hi" +#define PRIdLEAST16 "hd" +#define PRIiLEAST16 "hi" +#define PRIdFAST16 "hd" +#define PRIiFAST16 "hi" + +#define PRId32 "d" +#define PRIi32 "i" +#define PRIdLEAST32 "d" +#define PRIiLEAST32 "i" +#define PRIdFAST32 "d" +#define PRIiFAST32 "i" + +#define PRId64 __PRI64_PREFIX "d" +#define PRIi64 __PRI64_PREFIX "i" +#define PRIdLEAST64 __PRI64_PREFIX "d" +#define PRIiLEAST64 __PRI64_PREFIX "i" +#define PRIdFAST64 __PRI64_PREFIX "d" +#define PRIiFAST64 __PRI64_PREFIX "i" + +#define PRIdMAX __PRI64_PREFIX "d" +#define PRIiMAX __PRI64_PREFIX "i" + +#define PRIdPTR __PRIPTR_PREFIX "d" +#define PRIiPTR __PRIPTR_PREFIX "i" + +// The fprintf macros for unsigned integers are: +#define PRIo8 "o" +#define PRIu8 "u" +#define PRIx8 "x" +#define PRIX8 "X" +#define PRIoLEAST8 "o" +#define PRIuLEAST8 "u" +#define PRIxLEAST8 "x" +#define PRIXLEAST8 "X" +#define PRIoFAST8 "o" +#define PRIuFAST8 "u" +#define PRIxFAST8 "x" +#define PRIXFAST8 "X" + +#define PRIo16 "ho" +#define PRIu16 "hu" +#define PRIx16 "hx" +#define PRIX16 "hX" +#define PRIoLEAST16 "ho" +#define PRIuLEAST16 "hu" +#define PRIxLEAST16 "hx" +#define PRIXLEAST16 "hX" +#define PRIoFAST16 "ho" +#define PRIuFAST16 "hu" +#define PRIxFAST16 "hx" +#define PRIXFAST16 "hX" + +#define PRIo32 "o" +#define PRIu32 "u" +#define PRIx32 "x" +#define PRIX32 "X" +#define PRIoLEAST32 "o" +#define PRIuLEAST32 "u" +#define PRIxLEAST32 "x" +#define PRIXLEAST32 "X" +#define PRIoFAST32 "o" +#define PRIuFAST32 "u" +#define PRIxFAST32 "x" +#define PRIXFAST32 "X" + +#define PRIo64 __PRI64_PREFIX "o" +#define PRIu64 __PRI64_PREFIX "u" +#define PRIx64 __PRI64_PREFIX "x" +#define PRIX64 __PRI64_PREFIX "X" +#define PRIoLEAST64 __PRI64_PREFIX "o" +#define PRIuLEAST64 __PRI64_PREFIX "u" +#define PRIxLEAST64 __PRI64_PREFIX "x" +#define PRIXLEAST64 __PRI64_PREFIX "X" +#define PRIoFAST64 __PRI64_PREFIX "o" +#define PRIuFAST64 __PRI64_PREFIX "u" +#define PRIxFAST64 __PRI64_PREFIX "x" +#define PRIXFAST64 __PRI64_PREFIX "X" + +#define PRIoMAX __PRI64_PREFIX "o" +#define PRIuMAX __PRI64_PREFIX "u" +#define PRIxMAX __PRI64_PREFIX "x" +#define PRIXMAX __PRI64_PREFIX "X" + +#define PRIoPTR __PRIPTR_PREFIX "o" +#define PRIuPTR __PRIPTR_PREFIX "u" +#define PRIxPTR __PRIPTR_PREFIX "x" +#define PRIXPTR __PRIPTR_PREFIX "X" + +// The fscanf macros for signed integers are: +#define SCNd8 "d" +#define SCNi8 "i" +#define SCNdLEAST8 "d" +#define SCNiLEAST8 "i" +#define SCNdFAST8 "d" +#define SCNiFAST8 "i" + +#define SCNd16 "hd" +#define SCNi16 "hi" +#define SCNdLEAST16 "hd" +#define SCNiLEAST16 "hi" +#define SCNdFAST16 "hd" +#define SCNiFAST16 "hi" + +#define SCNd32 "ld" +#define SCNi32 "li" +#define SCNdLEAST32 "ld" +#define SCNiLEAST32 "li" +#define SCNdFAST32 "ld" +#define SCNiFAST32 "li" + +#define SCNd64 "I64d" +#define SCNi64 "I64i" +#define SCNdLEAST64 "I64d" +#define SCNiLEAST64 "I64i" +#define SCNdFAST64 "I64d" +#define SCNiFAST64 "I64i" + +#define SCNdMAX "I64d" +#define SCNiMAX "I64i" + +#ifdef _WIN64 // [ +# define SCNdPTR "I64d" +# define SCNiPTR "I64i" +#else // _WIN64 ][ +# define SCNdPTR "ld" +# define SCNiPTR "li" +#endif // _WIN64 ] + +// The fscanf macros for unsigned integers are: +#define SCNo8 "o" +#define SCNu8 "u" +#define SCNx8 "x" +#define SCNX8 "X" +#define SCNoLEAST8 "o" +#define SCNuLEAST8 "u" +#define SCNxLEAST8 "x" +#define SCNXLEAST8 "X" +#define SCNoFAST8 "o" +#define SCNuFAST8 "u" +#define SCNxFAST8 "x" +#define SCNXFAST8 "X" + +#define SCNo16 "ho" +#define SCNu16 "hu" +#define SCNx16 "hx" +#define SCNX16 "hX" +#define SCNoLEAST16 "ho" +#define SCNuLEAST16 "hu" +#define SCNxLEAST16 "hx" +#define SCNXLEAST16 "hX" +#define SCNoFAST16 "ho" +#define SCNuFAST16 "hu" +#define SCNxFAST16 "hx" +#define SCNXFAST16 "hX" + +#define SCNo32 "lo" +#define SCNu32 "lu" +#define SCNx32 "lx" +#define SCNX32 "lX" +#define SCNoLEAST32 "lo" +#define SCNuLEAST32 "lu" +#define SCNxLEAST32 "lx" +#define SCNXLEAST32 "lX" +#define SCNoFAST32 "lo" +#define SCNuFAST32 "lu" +#define SCNxFAST32 "lx" +#define SCNXFAST32 "lX" + +#define SCNo64 "I64o" +#define SCNu64 "I64u" +#define SCNx64 "I64x" +#define SCNX64 "I64X" +#define SCNoLEAST64 "I64o" +#define SCNuLEAST64 "I64u" +#define SCNxLEAST64 "I64x" +#define SCNXLEAST64 "I64X" +#define SCNoFAST64 "I64o" +#define SCNuFAST64 "I64u" +#define SCNxFAST64 "I64x" +#define SCNXFAST64 "I64X" + +#define SCNoMAX "I64o" +#define SCNuMAX "I64u" +#define SCNxMAX "I64x" +#define SCNXMAX "I64X" + +#ifdef _WIN64 // [ +# define SCNoPTR "I64o" +# define SCNuPTR "I64u" +# define SCNxPTR "I64x" +# define SCNXPTR "I64X" +#else // _WIN64 ][ +# define SCNoPTR "lo" +# define SCNuPTR "lu" +# define SCNxPTR "lx" +# define SCNXPTR "lX" +#endif // _WIN64 ] + +#endif // __STDC_FORMAT_MACROS ] + +// 7.8.2 Functions for greatest-width integer types + +// 7.8.2.1 The imaxabs function +#define imaxabs _abs64 + +// 7.8.2.2 The imaxdiv function + +// This is modified version of div() function from Microsoft's div.c found +// in %MSVC.NET%\crt\src\div.c +#ifdef STATIC_IMAXDIV // [ +static +#else // STATIC_IMAXDIV ][ +_inline +#endif // STATIC_IMAXDIV ] +imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom) +{ + imaxdiv_t result; + + result.quot = numer / denom; + result.rem = numer % denom; + + if (numer < 0 && result.rem > 0) { + // did division wrong; must fix up + ++result.quot; + result.rem -= denom; + } + + return result; +} + +// 7.8.2.3 The strtoimax and strtoumax functions +#define strtoimax _strtoi64 +#define strtoumax _strtoui64 + +// 7.8.2.4 The wcstoimax and wcstoumax functions +#define wcstoimax _wcstoi64 +#define wcstoumax _wcstoui64 + + +#endif // _MSC_INTTYPES_H_ ] diff --git a/deps/jemalloc/include/msvc_compat/stdbool.h b/deps/jemalloc/include/msvc_compat/stdbool.h new file mode 100644 index 0000000000..da9ee8b809 --- /dev/null +++ b/deps/jemalloc/include/msvc_compat/stdbool.h @@ -0,0 +1,16 @@ +#ifndef stdbool_h +#define stdbool_h + +#include <wtypes.h> + +/* MSVC doesn't define _Bool or bool in C, but does have BOOL */ +/* Note this doesn't pass autoconf's test because (bool) 0.5 != true */ +typedef BOOL _Bool; + +#define bool _Bool +#define true 1 +#define false 0 + +#define __bool_true_false_are_defined 1 + +#endif /* stdbool_h */ diff --git a/deps/jemalloc/include/msvc_compat/stdint.h b/deps/jemalloc/include/msvc_compat/stdint.h new file mode 100644 index 0000000000..d02608a597 --- /dev/null +++ b/deps/jemalloc/include/msvc_compat/stdint.h @@ -0,0 +1,247 @@ +// ISO C9x compliant stdint.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006-2008 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. The name of the author may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_STDINT_H_ // [ +#define _MSC_STDINT_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include <limits.h> + +// For Visual Studio 6 in C++ mode and for many Visual Studio versions when +// compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}' +// or compiler give many errors like this: +// error C2733: second C linkage of overloaded function 'wmemchr' not allowed +#ifdef __cplusplus +extern "C" { +#endif +# include <wchar.h> +#ifdef __cplusplus +} +#endif + +// Define _W64 macros to mark types changing their size, like intptr_t. +#ifndef _W64 +# if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300 +# define _W64 __w64 +# else +# define _W64 +# endif +#endif + + +// 7.18.1 Integer types + +// 7.18.1.1 Exact-width integer types + +// Visual Studio 6 and Embedded Visual C++ 4 doesn't +// realize that, e.g. char has the same size as __int8 +// so we give up on __intX for them. +#if (_MSC_VER < 1300) + typedef signed char int8_t; + typedef signed short int16_t; + typedef signed int int32_t; + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; +#else + typedef signed __int8 int8_t; + typedef signed __int16 int16_t; + typedef signed __int32 int32_t; + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; +#endif +typedef signed __int64 int64_t; +typedef unsigned __int64 uint64_t; + + +// 7.18.1.2 Minimum-width integer types +typedef int8_t int_least8_t; +typedef int16_t int_least16_t; +typedef int32_t int_least32_t; +typedef int64_t int_least64_t; +typedef uint8_t uint_least8_t; +typedef uint16_t uint_least16_t; +typedef uint32_t uint_least32_t; +typedef uint64_t uint_least64_t; + +// 7.18.1.3 Fastest minimum-width integer types +typedef int8_t int_fast8_t; +typedef int16_t int_fast16_t; +typedef int32_t int_fast32_t; +typedef int64_t int_fast64_t; +typedef uint8_t uint_fast8_t; +typedef uint16_t uint_fast16_t; +typedef uint32_t uint_fast32_t; +typedef uint64_t uint_fast64_t; + +// 7.18.1.4 Integer types capable of holding object pointers +#ifdef _WIN64 // [ + typedef signed __int64 intptr_t; + typedef unsigned __int64 uintptr_t; +#else // _WIN64 ][ + typedef _W64 signed int intptr_t; + typedef _W64 unsigned int uintptr_t; +#endif // _WIN64 ] + +// 7.18.1.5 Greatest-width integer types +typedef int64_t intmax_t; +typedef uint64_t uintmax_t; + + +// 7.18.2 Limits of specified-width integer types + +#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259 + +// 7.18.2.1 Limits of exact-width integer types +#define INT8_MIN ((int8_t)_I8_MIN) +#define INT8_MAX _I8_MAX +#define INT16_MIN ((int16_t)_I16_MIN) +#define INT16_MAX _I16_MAX +#define INT32_MIN ((int32_t)_I32_MIN) +#define INT32_MAX _I32_MAX +#define INT64_MIN ((int64_t)_I64_MIN) +#define INT64_MAX _I64_MAX +#define UINT8_MAX _UI8_MAX +#define UINT16_MAX _UI16_MAX +#define UINT32_MAX _UI32_MAX +#define UINT64_MAX _UI64_MAX + +// 7.18.2.2 Limits of minimum-width integer types +#define INT_LEAST8_MIN INT8_MIN +#define INT_LEAST8_MAX INT8_MAX +#define INT_LEAST16_MIN INT16_MIN +#define INT_LEAST16_MAX INT16_MAX +#define INT_LEAST32_MIN INT32_MIN +#define INT_LEAST32_MAX INT32_MAX +#define INT_LEAST64_MIN INT64_MIN +#define INT_LEAST64_MAX INT64_MAX +#define UINT_LEAST8_MAX UINT8_MAX +#define UINT_LEAST16_MAX UINT16_MAX +#define UINT_LEAST32_MAX UINT32_MAX +#define UINT_LEAST64_MAX UINT64_MAX + +// 7.18.2.3 Limits of fastest minimum-width integer types +#define INT_FAST8_MIN INT8_MIN +#define INT_FAST8_MAX INT8_MAX +#define INT_FAST16_MIN INT16_MIN +#define INT_FAST16_MAX INT16_MAX +#define INT_FAST32_MIN INT32_MIN +#define INT_FAST32_MAX INT32_MAX +#define INT_FAST64_MIN INT64_MIN +#define INT_FAST64_MAX INT64_MAX +#define UINT_FAST8_MAX UINT8_MAX +#define UINT_FAST16_MAX UINT16_MAX +#define UINT_FAST32_MAX UINT32_MAX +#define UINT_FAST64_MAX UINT64_MAX + +// 7.18.2.4 Limits of integer types capable of holding object pointers +#ifdef _WIN64 // [ +# define INTPTR_MIN INT64_MIN +# define INTPTR_MAX INT64_MAX +# define UINTPTR_MAX UINT64_MAX +#else // _WIN64 ][ +# define INTPTR_MIN INT32_MIN +# define INTPTR_MAX INT32_MAX +# define UINTPTR_MAX UINT32_MAX +#endif // _WIN64 ] + +// 7.18.2.5 Limits of greatest-width integer types +#define INTMAX_MIN INT64_MIN +#define INTMAX_MAX INT64_MAX +#define UINTMAX_MAX UINT64_MAX + +// 7.18.3 Limits of other integer types + +#ifdef _WIN64 // [ +# define PTRDIFF_MIN _I64_MIN +# define PTRDIFF_MAX _I64_MAX +#else // _WIN64 ][ +# define PTRDIFF_MIN _I32_MIN +# define PTRDIFF_MAX _I32_MAX +#endif // _WIN64 ] + +#define SIG_ATOMIC_MIN INT_MIN +#define SIG_ATOMIC_MAX INT_MAX + +#ifndef SIZE_MAX // [ +# ifdef _WIN64 // [ +# define SIZE_MAX _UI64_MAX +# else // _WIN64 ][ +# define SIZE_MAX _UI32_MAX +# endif // _WIN64 ] +#endif // SIZE_MAX ] + +// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h> +#ifndef WCHAR_MIN // [ +# define WCHAR_MIN 0 +#endif // WCHAR_MIN ] +#ifndef WCHAR_MAX // [ +# define WCHAR_MAX _UI16_MAX +#endif // WCHAR_MAX ] + +#define WINT_MIN 0 +#define WINT_MAX _UI16_MAX + +#endif // __STDC_LIMIT_MACROS ] + + +// 7.18.4 Limits of other integer types + +#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260 + +// 7.18.4.1 Macros for minimum-width integer constants + +#define INT8_C(val) val##i8 +#define INT16_C(val) val##i16 +#define INT32_C(val) val##i32 +#define INT64_C(val) val##i64 + +#define UINT8_C(val) val##ui8 +#define UINT16_C(val) val##ui16 +#define UINT32_C(val) val##ui32 +#define UINT64_C(val) val##ui64 + +// 7.18.4.2 Macros for greatest-width integer constants +#define INTMAX_C INT64_C +#define UINTMAX_C UINT64_C + +#endif // __STDC_CONSTANT_MACROS ] + + +#endif // _MSC_STDINT_H_ ] diff --git a/deps/jemalloc/include/msvc_compat/strings.h b/deps/jemalloc/include/msvc_compat/strings.h new file mode 100644 index 0000000000..c84975b6b8 --- /dev/null +++ b/deps/jemalloc/include/msvc_compat/strings.h @@ -0,0 +1,23 @@ +#ifndef strings_h +#define strings_h + +/* MSVC doesn't define ffs/ffsl. This dummy strings.h header is provided + * for both */ +#include <intrin.h> +#pragma intrinsic(_BitScanForward) +static __forceinline int ffsl(long x) +{ + unsigned long i; + + if (_BitScanForward(&i, x)) + return (i + 1); + return (0); +} + +static __forceinline int ffs(int x) +{ + + return (ffsl(x)); +} + +#endif diff --git a/deps/jemalloc/jemalloc_defs.h.in.cmake b/deps/jemalloc/jemalloc_defs.h.in.cmake new file mode 100644 index 0000000000..04512ccf6a --- /dev/null +++ b/deps/jemalloc/jemalloc_defs.h.in.cmake @@ -0,0 +1,274 @@ +/* include/jemalloc/jemalloc_defs.h. Generated from jemalloc_defs.h.in by configure. */ +/* + * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all + * public APIs to be prefixed. This makes it possible, with some care, to use + * multiple allocators simultaneously. + */ +/* #undef JEMALLOC_PREFIX */ +/* #undef JEMALLOC_CPREFIX */ + +/* + * Name mangling for public symbols is controlled by --with-mangling and + * --with-jemalloc-prefix. With default settings the je_ prefix is stripped by + * these macro definitions. + */ +#define je_malloc_conf malloc_conf +#define je_malloc_message malloc_message +#define je_malloc malloc +#define je_calloc calloc +#define je_posix_memalign posix_memalign +#define je_aligned_alloc aligned_alloc +#define je_realloc realloc +#define je_free free +#define je_malloc_usable_size malloc_usable_size +#define je_malloc_stats_print malloc_stats_print +#define je_mallctl mallctl +#define je_mallctlnametomib mallctlnametomib +#define je_mallctlbymib mallctlbymib +#define je_memalign memalign +#define je_valloc valloc +#define je_allocm allocm +#define je_rallocm rallocm +#define je_sallocm sallocm +#define je_dallocm dallocm +#define je_nallocm nallocm + +/* + * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. + * For shared libraries, symbol visibility mechanisms prevent these symbols + * from being exported, but for static libraries, naming collisions are a real + * possibility. + */ +#define JEMALLOC_PRIVATE_NAMESPACE "" +#define JEMALLOC_N(string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix) string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix + +/* + * Hyper-threaded CPUs may need a special instruction inside spin loops in + * order to yield to another virtual CPU. + */ +#define CPU_SPINWAIT __asm__ volatile("pause") + +/* Defined if the equivalent of FreeBSD's atomic(9) functions are available. */ +/* #undef JEMALLOC_ATOMIC9 */ + +/* + * Defined if OSAtomic*() functions are available, as provided by Darwin, and + * documented in the atomic(3) manual page. + */ +/* #undef JEMALLOC_OSATOMIC */ + +/* + * Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and + * __sync_sub_and_fetch(uint32_t *, uint32_t) are available, despite + * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 not being defined (which means the + * functions are defined in libgcc instead of being inlines) + */ +/* #undef JE_FORCE_SYNC_COMPARE_AND_SWAP_4 */ + +/* + * Defined if __sync_add_and_fetch(uint64_t *, uint64_t) and + * __sync_sub_and_fetch(uint64_t *, uint64_t) are available, despite + * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 not being defined (which means the + * functions are defined in libgcc instead of being inlines) + */ +/* #undef JE_FORCE_SYNC_COMPARE_AND_SWAP_8 */ + +/* + * Defined if OSSpin*() functions are available, as provided by Darwin, and + * documented in the spinlock(3) manual page. + */ +/* #undef JEMALLOC_OSSPIN */ + +/* + * Defined if _malloc_thread_cleanup() exists. At least in the case of + * FreeBSD, pthread_key_create() allocates, which if used during malloc + * bootstrapping will cause recursion into the pthreads library. Therefore, if + * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in + * malloc_tsd. + */ +/* #undef JEMALLOC_MALLOC_THREAD_CLEANUP */ + +/* + * Defined if threaded initialization is known to be safe on this platform. + * Among other things, it must be possible to initialize a mutex without + * triggering allocation in order for threaded allocation to be safe. + */ +#define JEMALLOC_THREADED_INIT + +/* + * Defined if the pthreads implementation defines + * _pthread_mutex_init_calloc_cb(), in which case the function is used in order + * to avoid recursive allocation during mutex initialization. + */ +/* #undef JEMALLOC_MUTEX_INIT_CB */ + +/* Defined if __attribute__((...)) syntax is supported. */ +#define JEMALLOC_HAVE_ATTR +#ifdef JEMALLOC_HAVE_ATTR +# define JEMALLOC_ATTR(s) __attribute__((s)) +# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) +# define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) +# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) +# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) +#elif _MSC_VER +# define JEMALLOC_ATTR(s) +# ifdef DLLEXPORT +# define JEMALLOC_EXPORT __declspec(dllexport) +# else +# define JEMALLOC_EXPORT __declspec(dllimport) +# endif +# define JEMALLOC_ALIGNED(s) __declspec(align(s)) +# define JEMALLOC_SECTION(s) __declspec(allocate(s)) +# define JEMALLOC_NOINLINE __declspec(noinline) +#else +# define JEMALLOC_ATTR(s) +# define JEMALLOC_EXPORT +# define JEMALLOC_ALIGNED(s) +# define JEMALLOC_SECTION(s) +# define JEMALLOC_NOINLINE +#endif + +/* Defined if sbrk() is supported. */ +#define JEMALLOC_HAVE_SBRK + +/* Non-empty if the tls_model attribute is supported. */ +#define JEMALLOC_TLS_MODEL @JEM_TLSMODEL@ + +/* JEMALLOC_CC_SILENCE enables code that silences unuseful compiler warnings. */ +/* #undef JEMALLOC_CC_SILENCE */ + +/* + * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables + * inline functions. + */ +/* #undef JEMALLOC_DEBUG */ + +/* JEMALLOC_STATS enables statistics calculation. */ +#define JEMALLOC_STATS + +/* JEMALLOC_PROF enables allocation profiling. */ +/* #undef JEMALLOC_PROF */ + +/* Use libunwind for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBUNWIND */ + +/* Use libgcc for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBGCC */ + +/* Use gcc intrinsics for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_GCC */ + +/* + * JEMALLOC_TCACHE enables a thread-specific caching layer for small objects. + * This makes it possible to allocate/deallocate objects without any locking + * when the cache is in the steady state. + */ +#define JEMALLOC_TCACHE + +/* + * JEMALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage + * segment (DSS). + */ +/* #undef JEMALLOC_DSS */ + +/* Support memory filling (junk/zero/quarantine/redzone). */ +#define JEMALLOC_FILL + +/* Support the experimental API. */ +#define JEMALLOC_EXPERIMENTAL + +/* Support utrace(2)-based tracing. */ +/* #undef JEMALLOC_UTRACE */ + +/* Support Valgrind. */ +/* #undef JEMALLOC_VALGRIND */ + +/* Support optional abort() on OOM. */ +/* #undef JEMALLOC_XMALLOC */ + +/* Support lazy locking (avoid locking unless a second thread is launched). */ +/* #undef JEMALLOC_LAZY_LOCK */ + +/* One page is 2^STATIC_PAGE_SHIFT bytes. */ +#define STATIC_PAGE_SHIFT 12 + +/* + * If defined, use munmap() to unmap freed chunks, rather than storing them for + * later reuse. This is disabled by default on Linux because common sequences + * of mmap()/munmap() calls will cause virtual memory map holes. + */ +/* #undef JEMALLOC_MUNMAP */ + +/* + * If defined, use mremap(...MREMAP_FIXED...) for huge realloc(). This is + * disabled by default because it is Linux-specific and it will cause virtual + * memory map holes, much like munmap(2) does. + */ +/* #undef JEMALLOC_MREMAP */ + +/* TLS is used to map arenas and magazine caches to threads. */ +#define JEMALLOC_TLS + +/* + * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside + * within jemalloc-owned chunks before dereferencing them. + */ +/* #undef JEMALLOC_IVSALLOC */ + +/* + * Define overrides for non-standard allocator-related functions if they + * are present on the system. + */ +#define JEMALLOC_OVERRIDE_MEMALIGN +#define JEMALLOC_OVERRIDE_VALLOC + +/* + * At least Linux omits the "const" in: + * + * size_t malloc_usable_size(const void *ptr); + * + * Match the operating system's prototype. + */ +#define JEMALLOC_USABLE_SIZE_CONST + +/* + * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. + */ +/* #undef JEMALLOC_ZONE */ +/* #undef JEMALLOC_ZONE_VERSION */ + +/* + * Methods for purging unused pages differ between operating systems. + * + * madvise(..., MADV_DONTNEED) : On Linux, this immediately discards pages, + * such that new pages will be demand-zeroed if + * the address region is later touched. + * madvise(..., MADV_FREE) : On FreeBSD and Darwin, this marks pages as being + * unused, such that they will be discarded rather + * than swapped out. + */ +#define JEMALLOC_PURGE_MADVISE_DONTNEED +/* #undef JEMALLOC_PURGE_MADVISE_FREE */ + +/* + * Define if operating system has alloca.h header. + */ +#define JEMALLOC_HAS_ALLOCA_H + +/* sizeof(void *) == 2^LG_SIZEOF_PTR. */ +#define LG_SIZEOF_PTR @JEM_SIZEDEF@ + +/* sizeof(int) == 2^LG_SIZEOF_INT. */ +#define LG_SIZEOF_INT 2 + +/* sizeof(long) == 2^LG_SIZEOF_LONG. */ +#define LG_SIZEOF_LONG @JEM_SIZEDEF@ + +/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ +#define LG_SIZEOF_INTMAX_T 3 + +/* C99 restrict keyword supported. */ +/*#define JEMALLOC_HAS_RESTRICT*/ + +/* JEMALLOC_CODE_COVERAGE enables test code coverage analysis. */ +#undef JEMALLOC_CODE_COVERAGE diff --git a/deps/jemalloc/src/arena.c b/deps/jemalloc/src/arena.c new file mode 100644 index 0000000000..dad707b63d --- /dev/null +++ b/deps/jemalloc/src/arena.c @@ -0,0 +1,2577 @@ +#define JEMALLOC_ARENA_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT; +arena_bin_info_t arena_bin_info[NBINS]; + +JEMALLOC_ALIGNED(CACHELINE) +const uint8_t small_size2bin[] = { +#define S2B_8(i) i, +#define S2B_16(i) S2B_8(i) S2B_8(i) +#define S2B_32(i) S2B_16(i) S2B_16(i) +#define S2B_64(i) S2B_32(i) S2B_32(i) +#define S2B_128(i) S2B_64(i) S2B_64(i) +#define S2B_256(i) S2B_128(i) S2B_128(i) +#define S2B_512(i) S2B_256(i) S2B_256(i) +#define S2B_1024(i) S2B_512(i) S2B_512(i) +#define S2B_2048(i) S2B_1024(i) S2B_1024(i) +#define S2B_4096(i) S2B_2048(i) S2B_2048(i) +#define S2B_8192(i) S2B_4096(i) S2B_4096(i) +#define SIZE_CLASS(bin, delta, size) \ + S2B_##delta(bin) + SIZE_CLASSES +#undef S2B_8 +#undef S2B_16 +#undef S2B_32 +#undef S2B_64 +#undef S2B_128 +#undef S2B_256 +#undef S2B_512 +#undef S2B_1024 +#undef S2B_2048 +#undef S2B_4096 +#undef S2B_8192 +#undef SIZE_CLASS +}; + +/******************************************************************************/ +/* + * Function prototypes for static functions that are referenced prior to + * definition. + */ + +static void arena_purge(arena_t *arena, bool all); +static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, + bool cleaned); +static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, + arena_run_t *run, arena_bin_t *bin); +static void arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, + arena_run_t *run, arena_bin_t *bin); + +/******************************************************************************/ + +static inline int +arena_run_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) +{ + uintptr_t a_mapelm = (uintptr_t)a; + uintptr_t b_mapelm = (uintptr_t)b; + + assert(a != NULL); + assert(b != NULL); + + return ((a_mapelm > b_mapelm) - (a_mapelm < b_mapelm)); +} + +/* Generate red-black tree functions. */ +rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_t, + u.rb_link, arena_run_comp) + +static inline int +arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) +{ + int ret; + size_t a_size = a->bits & ~PAGE_MASK; + size_t b_size = b->bits & ~PAGE_MASK; + + ret = (a_size > b_size) - (a_size < b_size); + if (ret == 0) { + uintptr_t a_mapelm, b_mapelm; + + if ((a->bits & CHUNK_MAP_KEY) != CHUNK_MAP_KEY) + a_mapelm = (uintptr_t)a; + else { + /* + * Treat keys as though they are lower than anything + * else. + */ + a_mapelm = 0; + } + b_mapelm = (uintptr_t)b; + + ret = (a_mapelm > b_mapelm) - (a_mapelm < b_mapelm); + } + + return (ret); +} + +/* Generate red-black tree functions. */ +rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t, arena_chunk_map_t, + u.rb_link, arena_avail_comp) + +static inline int +arena_chunk_dirty_comp(arena_chunk_t *a, arena_chunk_t *b) +{ + + assert(a != NULL); + assert(b != NULL); + + /* + * Short-circuit for self comparison. The following comparison code + * would come to the same result, but at the cost of executing the slow + * path. + */ + if (a == b) + return (0); + + /* + * Order such that chunks with higher fragmentation are "less than" + * those with lower fragmentation -- purging order is from "least" to + * "greatest". Fragmentation is measured as: + * + * mean current avail run size + * -------------------------------- + * mean defragmented avail run size + * + * navail + * ----------- + * nruns_avail nruns_avail-nruns_adjac + * = ========================= = ----------------------- + * navail nruns_avail + * ----------------------- + * nruns_avail-nruns_adjac + * + * The following code multiplies away the denominator prior to + * comparison, in order to avoid division. + * + */ + { + size_t a_val = (a->nruns_avail - a->nruns_adjac) * + b->nruns_avail; + size_t b_val = (b->nruns_avail - b->nruns_adjac) * + a->nruns_avail; + + if (a_val < b_val) + return (1); + if (a_val > b_val) + return (-1); + } + /* + * Break ties by chunk address. For fragmented chunks, report lower + * addresses as "lower", so that fragmentation reduction happens first + * at lower addresses. However, use the opposite ordering for + * unfragmented chunks, in order to increase the chances of + * re-allocating dirty runs. + */ + { + uintptr_t a_chunk = (uintptr_t)a; + uintptr_t b_chunk = (uintptr_t)b; + int ret = ((a_chunk > b_chunk) - (a_chunk < b_chunk)); + if (a->nruns_adjac == 0) { + assert(b->nruns_adjac == 0); + ret = -ret; + } + return (ret); + } +} + +/* Generate red-black tree functions. */ +rb_gen(static UNUSED, arena_chunk_dirty_, arena_chunk_tree_t, arena_chunk_t, + dirty_link, arena_chunk_dirty_comp) + +static inline bool +arena_avail_adjac_pred(arena_chunk_t *chunk, size_t pageind) +{ + bool ret; + + if (pageind-1 < map_bias) + ret = false; + else { + ret = (arena_mapbits_allocated_get(chunk, pageind-1) == 0); + assert(ret == false || arena_mapbits_dirty_get(chunk, + pageind-1) != arena_mapbits_dirty_get(chunk, pageind)); + } + return (ret); +} + +static inline bool +arena_avail_adjac_succ(arena_chunk_t *chunk, size_t pageind, size_t npages) +{ + bool ret; + + if (pageind+npages == chunk_npages) + ret = false; + else { + assert(pageind+npages < chunk_npages); + ret = (arena_mapbits_allocated_get(chunk, pageind+npages) == 0); + assert(ret == false || arena_mapbits_dirty_get(chunk, pageind) + != arena_mapbits_dirty_get(chunk, pageind+npages)); + } + return (ret); +} + +static inline bool +arena_avail_adjac(arena_chunk_t *chunk, size_t pageind, size_t npages) +{ + + return (arena_avail_adjac_pred(chunk, pageind) || + arena_avail_adjac_succ(chunk, pageind, npages)); +} + +static void +arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind, + size_t npages, bool maybe_adjac_pred, bool maybe_adjac_succ) +{ + + assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> + LG_PAGE)); + + /* + * chunks_dirty is keyed by nruns_{avail,adjac}, so the chunk must be + * removed and reinserted even if the run to be inserted is clean. + */ + if (chunk->ndirty != 0) + arena_chunk_dirty_remove(&arena->chunks_dirty, chunk); + + if (maybe_adjac_pred && arena_avail_adjac_pred(chunk, pageind)) + chunk->nruns_adjac++; + if (maybe_adjac_succ && arena_avail_adjac_succ(chunk, pageind, npages)) + chunk->nruns_adjac++; + chunk->nruns_avail++; + assert(chunk->nruns_avail > chunk->nruns_adjac); + + if (arena_mapbits_dirty_get(chunk, pageind) != 0) { + arena->ndirty += npages; + chunk->ndirty += npages; + } + if (chunk->ndirty != 0) + arena_chunk_dirty_insert(&arena->chunks_dirty, chunk); + + arena_avail_tree_insert(&arena->runs_avail, arena_mapp_get(chunk, + pageind)); +} + +static void +arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind, + size_t npages, bool maybe_adjac_pred, bool maybe_adjac_succ) +{ + + assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> + LG_PAGE)); + + /* + * chunks_dirty is keyed by nruns_{avail,adjac}, so the chunk must be + * removed and reinserted even if the run to be removed is clean. + */ + if (chunk->ndirty != 0) + arena_chunk_dirty_remove(&arena->chunks_dirty, chunk); + + if (maybe_adjac_pred && arena_avail_adjac_pred(chunk, pageind)) + chunk->nruns_adjac--; + if (maybe_adjac_succ && arena_avail_adjac_succ(chunk, pageind, npages)) + chunk->nruns_adjac--; + chunk->nruns_avail--; + assert(chunk->nruns_avail > chunk->nruns_adjac || (chunk->nruns_avail + == 0 && chunk->nruns_adjac == 0)); + + if (arena_mapbits_dirty_get(chunk, pageind) != 0) { + arena->ndirty -= npages; + chunk->ndirty -= npages; + } + if (chunk->ndirty != 0) + arena_chunk_dirty_insert(&arena->chunks_dirty, chunk); + + arena_avail_tree_remove(&arena->runs_avail, arena_mapp_get(chunk, + pageind)); +} + +static inline void * +arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) +{ + void *ret; + unsigned regind; + bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + + (uintptr_t)bin_info->bitmap_offset); + + assert(run->nfree > 0); + assert(bitmap_full(bitmap, &bin_info->bitmap_info) == false); + + regind = bitmap_sfu(bitmap, &bin_info->bitmap_info); + ret = (void *)((uintptr_t)run + (uintptr_t)bin_info->reg0_offset + + (uintptr_t)(bin_info->reg_interval * regind)); + run->nfree--; + if (regind == run->nextind) + run->nextind++; + assert(regind < run->nextind); + return (ret); +} + +static inline void +arena_run_reg_dalloc(arena_run_t *run, void *ptr) +{ + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + size_t mapbits = arena_mapbits_get(chunk, pageind); + size_t binind = arena_ptr_small_binind_get(ptr, mapbits); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; + unsigned regind = arena_run_regind(run, bin_info, ptr); + bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + + (uintptr_t)bin_info->bitmap_offset); + + assert(run->nfree < bin_info->nregs); + /* Freeing an interior pointer can cause assertion failure. */ + assert(((uintptr_t)ptr - ((uintptr_t)run + + (uintptr_t)bin_info->reg0_offset)) % + (uintptr_t)bin_info->reg_interval == 0); + assert((uintptr_t)ptr >= (uintptr_t)run + + (uintptr_t)bin_info->reg0_offset); + /* Freeing an unallocated pointer can cause assertion failure. */ + assert(bitmap_get(bitmap, &bin_info->bitmap_info, regind)); + + bitmap_unset(bitmap, &bin_info->bitmap_info, regind); + run->nfree++; +} + +static inline void +arena_run_zero(arena_chunk_t *chunk, size_t run_ind, size_t npages) +{ + + VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + (run_ind << + LG_PAGE)), (npages << LG_PAGE)); + memset((void *)((uintptr_t)chunk + (run_ind << LG_PAGE)), 0, + (npages << LG_PAGE)); +} + +static inline void +arena_run_page_mark_zeroed(arena_chunk_t *chunk, size_t run_ind) +{ + + VALGRIND_MAKE_MEM_DEFINED((void *)((uintptr_t)chunk + (run_ind << + LG_PAGE)), PAGE); +} + +static inline void +arena_run_page_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) +{ + size_t i; + UNUSED size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << LG_PAGE)); + + arena_run_page_mark_zeroed(chunk, run_ind); + for (i = 0; i < PAGE / sizeof(size_t); i++) + assert(p[i] == 0); +} + +static void +arena_cactive_update(arena_t *arena, size_t add_pages, size_t sub_pages) +{ + + if (config_stats) { + ssize_t cactive_diff = CHUNK_CEILING((arena->nactive + + add_pages) << LG_PAGE) - CHUNK_CEILING((arena->nactive - + sub_pages) << LG_PAGE); + if (cactive_diff != 0) + stats_cactive_add(cactive_diff); + } +} + +static void +arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind, + size_t flag_dirty, size_t need_pages) +{ + size_t total_pages, rem_pages; + + total_pages = arena_mapbits_unallocated_size_get(chunk, run_ind) >> + LG_PAGE; + assert(arena_mapbits_dirty_get(chunk, run_ind+total_pages-1) == + flag_dirty); + assert(need_pages <= total_pages); + rem_pages = total_pages - need_pages; + + arena_avail_remove(arena, chunk, run_ind, total_pages, true, true); + arena_cactive_update(arena, need_pages, 0); + arena->nactive += need_pages; + + /* Keep track of trailing unused pages for later use. */ + if (rem_pages > 0) { + if (flag_dirty != 0) { + arena_mapbits_unallocated_set(chunk, + run_ind+need_pages, (rem_pages << LG_PAGE), + flag_dirty); + arena_mapbits_unallocated_set(chunk, + run_ind+total_pages-1, (rem_pages << LG_PAGE), + flag_dirty); + } else { + arena_mapbits_unallocated_set(chunk, run_ind+need_pages, + (rem_pages << LG_PAGE), + arena_mapbits_unzeroed_get(chunk, + run_ind+need_pages)); + arena_mapbits_unallocated_set(chunk, + run_ind+total_pages-1, (rem_pages << LG_PAGE), + arena_mapbits_unzeroed_get(chunk, + run_ind+total_pages-1)); + } + arena_avail_insert(arena, chunk, run_ind+need_pages, rem_pages, + false, true); + } +} + +static void +arena_run_split_large_helper(arena_t *arena, arena_run_t *run, size_t size, + bool remove, bool zero) +{ + arena_chunk_t *chunk; + size_t flag_dirty, run_ind, need_pages, i; + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); + flag_dirty = arena_mapbits_dirty_get(chunk, run_ind); + need_pages = (size >> LG_PAGE); + assert(need_pages > 0); + + if (remove) { + arena_run_split_remove(arena, chunk, run_ind, flag_dirty, + need_pages); + } + + if (zero) { + if (flag_dirty == 0) { + /* + * The run is clean, so some pages may be zeroed (i.e. + * never before touched). + */ + for (i = 0; i < need_pages; i++) { + if (arena_mapbits_unzeroed_get(chunk, run_ind+i) + != 0) + arena_run_zero(chunk, run_ind+i, 1); + else if (config_debug) { + arena_run_page_validate_zeroed(chunk, + run_ind+i); + } else { + arena_run_page_mark_zeroed(chunk, + run_ind+i); + } + } + } else { + /* The run is dirty, so all pages must be zeroed. */ + arena_run_zero(chunk, run_ind, need_pages); + } + } else { + VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + + (run_ind << LG_PAGE)), (need_pages << LG_PAGE)); + } + + /* + * Set the last element first, in case the run only contains one page + * (i.e. both statements set the same element). + */ + arena_mapbits_large_set(chunk, run_ind+need_pages-1, 0, flag_dirty); + arena_mapbits_large_set(chunk, run_ind, size, flag_dirty); +} + +static void +arena_run_split_large(arena_t *arena, arena_run_t *run, size_t size, bool zero) +{ + + arena_run_split_large_helper(arena, run, size, true, zero); +} + +static void +arena_run_init_large(arena_t *arena, arena_run_t *run, size_t size, bool zero) +{ + + arena_run_split_large_helper(arena, run, size, false, zero); +} + +static void +arena_run_split_small(arena_t *arena, arena_run_t *run, size_t size, + size_t binind) +{ + arena_chunk_t *chunk; + size_t flag_dirty, run_ind, need_pages, i; + + assert(binind != BININD_INVALID); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); + flag_dirty = arena_mapbits_dirty_get(chunk, run_ind); + need_pages = (size >> LG_PAGE); + assert(need_pages > 0); + + arena_run_split_remove(arena, chunk, run_ind, flag_dirty, need_pages); + + /* + * Propagate the dirty and unzeroed flags to the allocated small run, + * so that arena_dalloc_bin_run() has the ability to conditionally trim + * clean pages. + */ + arena_mapbits_small_set(chunk, run_ind, 0, binind, flag_dirty); + /* + * The first page will always be dirtied during small run + * initialization, so a validation failure here would not actually + * cause an observable failure. + */ + if (config_debug && flag_dirty == 0 && arena_mapbits_unzeroed_get(chunk, + run_ind) == 0) + arena_run_page_validate_zeroed(chunk, run_ind); + for (i = 1; i < need_pages - 1; i++) { + arena_mapbits_small_set(chunk, run_ind+i, i, binind, 0); + if (config_debug && flag_dirty == 0 && + arena_mapbits_unzeroed_get(chunk, run_ind+i) == 0) + arena_run_page_validate_zeroed(chunk, run_ind+i); + } + arena_mapbits_small_set(chunk, run_ind+need_pages-1, need_pages-1, + binind, flag_dirty); + if (config_debug && flag_dirty == 0 && arena_mapbits_unzeroed_get(chunk, + run_ind+need_pages-1) == 0) + arena_run_page_validate_zeroed(chunk, run_ind+need_pages-1); + VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + + (run_ind << LG_PAGE)), (need_pages << LG_PAGE)); +} + +static arena_chunk_t * +arena_chunk_init_spare(arena_t *arena) +{ + arena_chunk_t *chunk; + + assert(arena->spare != NULL); + + chunk = arena->spare; + arena->spare = NULL; + + assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); + assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0); + assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == + arena_maxclass); + assert(arena_mapbits_unallocated_size_get(chunk, chunk_npages-1) == + arena_maxclass); + assert(arena_mapbits_dirty_get(chunk, map_bias) == + arena_mapbits_dirty_get(chunk, chunk_npages-1)); + + return (chunk); +} + +static arena_chunk_t * +arena_chunk_init_hard(arena_t *arena) +{ + arena_chunk_t *chunk; + bool zero; + size_t unzeroed, i; + + assert(arena->spare == NULL); + + zero = false; + malloc_mutex_unlock(&arena->lock); + chunk = (arena_chunk_t *)chunk_alloc(chunksize, chunksize, false, + &zero, arena->dss_prec); + malloc_mutex_lock(&arena->lock); + if (chunk == NULL) + return (NULL); + if (config_stats) + arena->stats.mapped += chunksize; + + chunk->arena = arena; + + /* + * Claim that no pages are in use, since the header is merely overhead. + */ + chunk->ndirty = 0; + + chunk->nruns_avail = 0; + chunk->nruns_adjac = 0; + + /* + * Initialize the map to contain one maximal free untouched run. Mark + * the pages as zeroed iff chunk_alloc() returned a zeroed chunk. + */ + unzeroed = zero ? 0 : CHUNK_MAP_UNZEROED; + arena_mapbits_unallocated_set(chunk, map_bias, arena_maxclass, + unzeroed); + /* + * There is no need to initialize the internal page map entries unless + * the chunk is not zeroed. + */ + if (zero == false) { + VALGRIND_MAKE_MEM_UNDEFINED((void *)arena_mapp_get(chunk, + map_bias+1), (size_t)((uintptr_t) arena_mapp_get(chunk, + chunk_npages-1) - (uintptr_t)arena_mapp_get(chunk, + map_bias+1))); + for (i = map_bias+1; i < chunk_npages-1; i++) + arena_mapbits_unzeroed_set(chunk, i, unzeroed); + } else { + VALGRIND_MAKE_MEM_DEFINED((void *)arena_mapp_get(chunk, + map_bias+1), (size_t)((uintptr_t) arena_mapp_get(chunk, + chunk_npages-1) - (uintptr_t)arena_mapp_get(chunk, + map_bias+1))); + if (config_debug) { + for (i = map_bias+1; i < chunk_npages-1; i++) { + assert(arena_mapbits_unzeroed_get(chunk, i) == + unzeroed); + } + } + } + arena_mapbits_unallocated_set(chunk, chunk_npages-1, arena_maxclass, + unzeroed); + + return (chunk); +} + +static arena_chunk_t * +arena_chunk_alloc(arena_t *arena) +{ + arena_chunk_t *chunk; + + if (arena->spare != NULL) + chunk = arena_chunk_init_spare(arena); + else { + chunk = arena_chunk_init_hard(arena); + if (chunk == NULL) + return (NULL); + } + + /* Insert the run into the runs_avail tree. */ + arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias, + false, false); + + return (chunk); +} + +static void +arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) +{ + assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); + assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0); + assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == + arena_maxclass); + assert(arena_mapbits_unallocated_size_get(chunk, chunk_npages-1) == + arena_maxclass); + assert(arena_mapbits_dirty_get(chunk, map_bias) == + arena_mapbits_dirty_get(chunk, chunk_npages-1)); + + /* + * Remove run from the runs_avail tree, so that the arena does not use + * it. + */ + arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias, + false, false); + + if (arena->spare != NULL) { + arena_chunk_t *spare = arena->spare; + + arena->spare = chunk; + malloc_mutex_unlock(&arena->lock); + chunk_dealloc((void *)spare, chunksize, true); + malloc_mutex_lock(&arena->lock); + if (config_stats) + arena->stats.mapped -= chunksize; + } else + arena->spare = chunk; +} + +static arena_run_t * +arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero) +{ + arena_run_t *run; + arena_chunk_map_t *mapelm, key; + + key.bits = size | CHUNK_MAP_KEY; + mapelm = arena_avail_tree_nsearch(&arena->runs_avail, &key); + if (mapelm != NULL) { + arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); + size_t pageind = (((uintptr_t)mapelm - + (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) + + map_bias; + + run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << + LG_PAGE)); + arena_run_split_large(arena, run, size, zero); + return (run); + } + + return (NULL); +} + +static arena_run_t * +arena_run_alloc_large(arena_t *arena, size_t size, bool zero) +{ + arena_chunk_t *chunk; + arena_run_t *run; + + assert(size <= arena_maxclass); + assert((size & PAGE_MASK) == 0); + + /* Search the arena's chunks for the lowest best fit. */ + run = arena_run_alloc_large_helper(arena, size, zero); + if (run != NULL) + return (run); + + /* + * No usable runs. Create a new chunk from which to allocate the run. + */ + chunk = arena_chunk_alloc(arena); + if (chunk != NULL) { + run = (arena_run_t *)((uintptr_t)chunk + (map_bias << LG_PAGE)); + arena_run_split_large(arena, run, size, zero); + return (run); + } + + /* + * arena_chunk_alloc() failed, but another thread may have made + * sufficient memory available while this one dropped arena->lock in + * arena_chunk_alloc(), so search one more time. + */ + return (arena_run_alloc_large_helper(arena, size, zero)); +} + +static arena_run_t * +arena_run_alloc_small_helper(arena_t *arena, size_t size, size_t binind) +{ + arena_run_t *run; + arena_chunk_map_t *mapelm, key; + + key.bits = size | CHUNK_MAP_KEY; + mapelm = arena_avail_tree_nsearch(&arena->runs_avail, &key); + if (mapelm != NULL) { + arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); + size_t pageind = (((uintptr_t)mapelm - + (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) + + map_bias; + + run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << + LG_PAGE)); + arena_run_split_small(arena, run, size, binind); + return (run); + } + + return (NULL); +} + +static arena_run_t * +arena_run_alloc_small(arena_t *arena, size_t size, size_t binind) +{ + arena_chunk_t *chunk; + arena_run_t *run; + + assert(size <= arena_maxclass); + assert((size & PAGE_MASK) == 0); + assert(binind != BININD_INVALID); + + /* Search the arena's chunks for the lowest best fit. */ + run = arena_run_alloc_small_helper(arena, size, binind); + if (run != NULL) + return (run); + + /* + * No usable runs. Create a new chunk from which to allocate the run. + */ + chunk = arena_chunk_alloc(arena); + if (chunk != NULL) { + run = (arena_run_t *)((uintptr_t)chunk + (map_bias << LG_PAGE)); + arena_run_split_small(arena, run, size, binind); + return (run); + } + + /* + * arena_chunk_alloc() failed, but another thread may have made + * sufficient memory available while this one dropped arena->lock in + * arena_chunk_alloc(), so search one more time. + */ + return (arena_run_alloc_small_helper(arena, size, binind)); +} + +static inline void +arena_maybe_purge(arena_t *arena) +{ + size_t npurgeable, threshold; + + /* Don't purge if the option is disabled. */ + if (opt_lg_dirty_mult < 0) + return; + /* Don't purge if all dirty pages are already being purged. */ + if (arena->ndirty <= arena->npurgatory) + return; + npurgeable = arena->ndirty - arena->npurgatory; + threshold = (arena->nactive >> opt_lg_dirty_mult); + /* + * Don't purge unless the number of purgeable pages exceeds the + * threshold. + */ + if (npurgeable <= threshold) + return; + + arena_purge(arena, false); +} + +static arena_chunk_t * +chunks_dirty_iter_cb(arena_chunk_tree_t *tree, arena_chunk_t *chunk, void *arg) +{ + size_t *ndirty = (size_t *)arg; + + assert(chunk->ndirty != 0); + *ndirty += chunk->ndirty; + return (NULL); +} + +static size_t +arena_compute_npurgatory(arena_t *arena, bool all) +{ + size_t npurgatory, npurgeable; + + /* + * Compute the minimum number of pages that this thread should try to + * purge. + */ + npurgeable = arena->ndirty - arena->npurgatory; + + if (all == false) { + size_t threshold = (arena->nactive >> opt_lg_dirty_mult); + + npurgatory = npurgeable - threshold; + } else + npurgatory = npurgeable; + + return (npurgatory); +} + +static void +arena_chunk_stash_dirty(arena_t *arena, arena_chunk_t *chunk, bool all, + arena_chunk_mapelms_t *mapelms) +{ + size_t pageind, npages; + + /* + * Temporarily allocate free dirty runs within chunk. If all is false, + * only operate on dirty runs that are fragments; otherwise operate on + * all dirty runs. + */ + for (pageind = map_bias; pageind < chunk_npages; pageind += npages) { + arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind); + if (arena_mapbits_allocated_get(chunk, pageind) == 0) { + size_t run_size = + arena_mapbits_unallocated_size_get(chunk, pageind); + + npages = run_size >> LG_PAGE; + assert(pageind + npages <= chunk_npages); + assert(arena_mapbits_dirty_get(chunk, pageind) == + arena_mapbits_dirty_get(chunk, pageind+npages-1)); + + if (arena_mapbits_dirty_get(chunk, pageind) != 0 && + (all || arena_avail_adjac(chunk, pageind, + npages))) { + arena_run_t *run = (arena_run_t *)((uintptr_t) + chunk + (uintptr_t)(pageind << LG_PAGE)); + + arena_run_split_large(arena, run, run_size, + false); + /* Append to list for later processing. */ + ql_elm_new(mapelm, u.ql_link); + ql_tail_insert(mapelms, mapelm, u.ql_link); + } + } else { + /* Skip run. */ + if (arena_mapbits_large_get(chunk, pageind) != 0) { + npages = arena_mapbits_large_size_get(chunk, + pageind) >> LG_PAGE; + } else { + size_t binind; + arena_bin_info_t *bin_info; + arena_run_t *run = (arena_run_t *)((uintptr_t) + chunk + (uintptr_t)(pageind << LG_PAGE)); + + assert(arena_mapbits_small_runind_get(chunk, + pageind) == 0); + binind = arena_bin_index(arena, run->bin); + bin_info = &arena_bin_info[binind]; + npages = bin_info->run_size >> LG_PAGE; + } + } + } + assert(pageind == chunk_npages); + assert(chunk->ndirty == 0 || all == false); + assert(chunk->nruns_adjac == 0); +} + +static size_t +arena_chunk_purge_stashed(arena_t *arena, arena_chunk_t *chunk, + arena_chunk_mapelms_t *mapelms) +{ + size_t npurged, pageind, npages, nmadvise; + arena_chunk_map_t *mapelm; + + malloc_mutex_unlock(&arena->lock); + if (config_stats) + nmadvise = 0; + npurged = 0; + ql_foreach(mapelm, mapelms, u.ql_link) { + bool unzeroed; + size_t flag_unzeroed, i; + + pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / + sizeof(arena_chunk_map_t)) + map_bias; + npages = arena_mapbits_large_size_get(chunk, pageind) >> + LG_PAGE; + assert(pageind + npages <= chunk_npages); + unzeroed = pages_purge((void *)((uintptr_t)chunk + (pageind << + LG_PAGE)), (npages << LG_PAGE)); + flag_unzeroed = unzeroed ? CHUNK_MAP_UNZEROED : 0; + /* + * Set the unzeroed flag for all pages, now that pages_purge() + * has returned whether the pages were zeroed as a side effect + * of purging. This chunk map modification is safe even though + * the arena mutex isn't currently owned by this thread, + * because the run is marked as allocated, thus protecting it + * from being modified by any other thread. As long as these + * writes don't perturb the first and last elements' + * CHUNK_MAP_ALLOCATED bits, behavior is well defined. + */ + for (i = 0; i < npages; i++) { + arena_mapbits_unzeroed_set(chunk, pageind+i, + flag_unzeroed); + } + npurged += npages; + if (config_stats) + nmadvise++; + } + malloc_mutex_lock(&arena->lock); + if (config_stats) + arena->stats.nmadvise += nmadvise; + + return (npurged); +} + +static void +arena_chunk_unstash_purged(arena_t *arena, arena_chunk_t *chunk, + arena_chunk_mapelms_t *mapelms) +{ + arena_chunk_map_t *mapelm; + size_t pageind; + + /* Deallocate runs. */ + for (mapelm = ql_first(mapelms); mapelm != NULL; + mapelm = ql_first(mapelms)) { + arena_run_t *run; + + pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / + sizeof(arena_chunk_map_t)) + map_bias; + run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)(pageind << + LG_PAGE)); + ql_remove(mapelms, mapelm, u.ql_link); + arena_run_dalloc(arena, run, false, true); + } +} + +static inline size_t +arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk, bool all) +{ + size_t npurged; + arena_chunk_mapelms_t mapelms; + + ql_new(&mapelms); + + /* + * If chunk is the spare, temporarily re-allocate it, 1) so that its + * run is reinserted into runs_avail, and 2) so that it cannot be + * completely discarded by another thread while arena->lock is dropped + * by this thread. Note that the arena_run_dalloc() call will + * implicitly deallocate the chunk, so no explicit action is required + * in this function to deallocate the chunk. + * + * Note that once a chunk contains dirty pages, it cannot again contain + * a single run unless 1) it is a dirty run, or 2) this function purges + * dirty pages and causes the transition to a single clean run. Thus + * (chunk == arena->spare) is possible, but it is not possible for + * this function to be called on the spare unless it contains a dirty + * run. + */ + if (chunk == arena->spare) { + assert(arena_mapbits_dirty_get(chunk, map_bias) != 0); + assert(arena_mapbits_dirty_get(chunk, chunk_npages-1) != 0); + + arena_chunk_alloc(arena); + } + + if (config_stats) + arena->stats.purged += chunk->ndirty; + + /* + * Operate on all dirty runs if there is no clean/dirty run + * fragmentation. + */ + if (chunk->nruns_adjac == 0) + all = true; + + arena_chunk_stash_dirty(arena, chunk, all, &mapelms); + npurged = arena_chunk_purge_stashed(arena, chunk, &mapelms); + arena_chunk_unstash_purged(arena, chunk, &mapelms); + + return (npurged); +} + +static void +arena_purge(arena_t *arena, bool all) +{ + arena_chunk_t *chunk; + size_t npurgatory; + if (config_debug) { + size_t ndirty = 0; + + arena_chunk_dirty_iter(&arena->chunks_dirty, NULL, + chunks_dirty_iter_cb, (void *)&ndirty); + assert(ndirty == arena->ndirty); + } + assert(arena->ndirty > arena->npurgatory || all); + assert((arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty - + arena->npurgatory) || all); + + if (config_stats) + arena->stats.npurge++; + + /* + * Add the minimum number of pages this thread should try to purge to + * arena->npurgatory. This will keep multiple threads from racing to + * reduce ndirty below the threshold. + */ + npurgatory = arena_compute_npurgatory(arena, all); + arena->npurgatory += npurgatory; + + while (npurgatory > 0) { + size_t npurgeable, npurged, nunpurged; + + /* Get next chunk with dirty pages. */ + chunk = arena_chunk_dirty_first(&arena->chunks_dirty); + if (chunk == NULL) { + /* + * This thread was unable to purge as many pages as + * originally intended, due to races with other threads + * that either did some of the purging work, or re-used + * dirty pages. + */ + arena->npurgatory -= npurgatory; + return; + } + npurgeable = chunk->ndirty; + assert(npurgeable != 0); + + if (npurgeable > npurgatory && chunk->nruns_adjac == 0) { + /* + * This thread will purge all the dirty pages in chunk, + * so set npurgatory to reflect this thread's intent to + * purge the pages. This tends to reduce the chances + * of the following scenario: + * + * 1) This thread sets arena->npurgatory such that + * (arena->ndirty - arena->npurgatory) is at the + * threshold. + * 2) This thread drops arena->lock. + * 3) Another thread causes one or more pages to be + * dirtied, and immediately determines that it must + * purge dirty pages. + * + * If this scenario *does* play out, that's okay, + * because all of the purging work being done really + * needs to happen. + */ + arena->npurgatory += npurgeable - npurgatory; + npurgatory = npurgeable; + } + + /* + * Keep track of how many pages are purgeable, versus how many + * actually get purged, and adjust counters accordingly. + */ + arena->npurgatory -= npurgeable; + npurgatory -= npurgeable; + npurged = arena_chunk_purge(arena, chunk, all); + nunpurged = npurgeable - npurged; + arena->npurgatory += nunpurged; + npurgatory += nunpurged; + } +} + +void +arena_purge_all(arena_t *arena) +{ + + malloc_mutex_lock(&arena->lock); + arena_purge(arena, true); + malloc_mutex_unlock(&arena->lock); +} + +static void +arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size, + size_t *p_run_ind, size_t *p_run_pages, size_t flag_dirty) +{ + size_t size = *p_size; + size_t run_ind = *p_run_ind; + size_t run_pages = *p_run_pages; + + /* Try to coalesce forward. */ + if (run_ind + run_pages < chunk_npages && + arena_mapbits_allocated_get(chunk, run_ind+run_pages) == 0 && + arena_mapbits_dirty_get(chunk, run_ind+run_pages) == flag_dirty) { + size_t nrun_size = arena_mapbits_unallocated_size_get(chunk, + run_ind+run_pages); + size_t nrun_pages = nrun_size >> LG_PAGE; + + /* + * Remove successor from runs_avail; the coalesced run is + * inserted later. + */ + assert(arena_mapbits_unallocated_size_get(chunk, + run_ind+run_pages+nrun_pages-1) == nrun_size); + assert(arena_mapbits_dirty_get(chunk, + run_ind+run_pages+nrun_pages-1) == flag_dirty); + arena_avail_remove(arena, chunk, run_ind+run_pages, nrun_pages, + false, true); + + size += nrun_size; + run_pages += nrun_pages; + + arena_mapbits_unallocated_size_set(chunk, run_ind, size); + arena_mapbits_unallocated_size_set(chunk, run_ind+run_pages-1, + size); + } + + /* Try to coalesce backward. */ + if (run_ind > map_bias && arena_mapbits_allocated_get(chunk, + run_ind-1) == 0 && arena_mapbits_dirty_get(chunk, run_ind-1) == + flag_dirty) { + size_t prun_size = arena_mapbits_unallocated_size_get(chunk, + run_ind-1); + size_t prun_pages = prun_size >> LG_PAGE; + + run_ind -= prun_pages; + + /* + * Remove predecessor from runs_avail; the coalesced run is + * inserted later. + */ + assert(arena_mapbits_unallocated_size_get(chunk, run_ind) == + prun_size); + assert(arena_mapbits_dirty_get(chunk, run_ind) == flag_dirty); + arena_avail_remove(arena, chunk, run_ind, prun_pages, true, + false); + + size += prun_size; + run_pages += prun_pages; + + arena_mapbits_unallocated_size_set(chunk, run_ind, size); + arena_mapbits_unallocated_size_set(chunk, run_ind+run_pages-1, + size); + } + + *p_size = size; + *p_run_ind = run_ind; + *p_run_pages = run_pages; +} + +static void +arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) +{ + arena_chunk_t *chunk; + size_t size, run_ind, run_pages, flag_dirty; + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); + assert(run_ind >= map_bias); + assert(run_ind < chunk_npages); + if (arena_mapbits_large_get(chunk, run_ind) != 0) { + size = arena_mapbits_large_size_get(chunk, run_ind); + assert(size == PAGE || + arena_mapbits_large_size_get(chunk, + run_ind+(size>>LG_PAGE)-1) == 0); + } else { + size_t binind = arena_bin_index(arena, run->bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; + size = bin_info->run_size; + } + run_pages = (size >> LG_PAGE); + arena_cactive_update(arena, 0, run_pages); + arena->nactive -= run_pages; + + /* + * The run is dirty if the caller claims to have dirtied it, as well as + * if it was already dirty before being allocated and the caller + * doesn't claim to have cleaned it. + */ + assert(arena_mapbits_dirty_get(chunk, run_ind) == + arena_mapbits_dirty_get(chunk, run_ind+run_pages-1)); + if (cleaned == false && arena_mapbits_dirty_get(chunk, run_ind) != 0) + dirty = true; + flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0; + + /* Mark pages as unallocated in the chunk map. */ + if (dirty) { + arena_mapbits_unallocated_set(chunk, run_ind, size, + CHUNK_MAP_DIRTY); + arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size, + CHUNK_MAP_DIRTY); + } else { + arena_mapbits_unallocated_set(chunk, run_ind, size, + arena_mapbits_unzeroed_get(chunk, run_ind)); + arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size, + arena_mapbits_unzeroed_get(chunk, run_ind+run_pages-1)); + } + + arena_run_coalesce(arena, chunk, &size, &run_ind, &run_pages, + flag_dirty); + + /* Insert into runs_avail, now that coalescing is complete. */ + assert(arena_mapbits_unallocated_size_get(chunk, run_ind) == + arena_mapbits_unallocated_size_get(chunk, run_ind+run_pages-1)); + assert(arena_mapbits_dirty_get(chunk, run_ind) == + arena_mapbits_dirty_get(chunk, run_ind+run_pages-1)); + arena_avail_insert(arena, chunk, run_ind, run_pages, true, true); + + /* Deallocate chunk if it is now completely unused. */ + if (size == arena_maxclass) { + assert(run_ind == map_bias); + assert(run_pages == (arena_maxclass >> LG_PAGE)); + arena_chunk_dealloc(arena, chunk); + } + + /* + * It is okay to do dirty page processing here even if the chunk was + * deallocated above, since in that case it is the spare. Waiting + * until after possible chunk deallocation to do dirty processing + * allows for an old spare to be fully deallocated, thus decreasing the + * chances of spuriously crossing the dirty page purging threshold. + */ + if (dirty) + arena_maybe_purge(arena); +} + +static void +arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, + size_t oldsize, size_t newsize) +{ + size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; + size_t head_npages = (oldsize - newsize) >> LG_PAGE; + size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind); + + assert(oldsize > newsize); + + /* + * Update the chunk map so that arena_run_dalloc() can treat the + * leading run as separately allocated. Set the last element of each + * run first, in case of single-page runs. + */ + assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize); + arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty); + arena_mapbits_large_set(chunk, pageind, oldsize-newsize, flag_dirty); + + if (config_debug) { + UNUSED size_t tail_npages = newsize >> LG_PAGE; + assert(arena_mapbits_large_size_get(chunk, + pageind+head_npages+tail_npages-1) == 0); + assert(arena_mapbits_dirty_get(chunk, + pageind+head_npages+tail_npages-1) == flag_dirty); + } + arena_mapbits_large_set(chunk, pageind+head_npages, newsize, + flag_dirty); + + arena_run_dalloc(arena, run, false, false); +} + +static void +arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, + size_t oldsize, size_t newsize, bool dirty) +{ + size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; + size_t head_npages = newsize >> LG_PAGE; + size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind); + + assert(oldsize > newsize); + + /* + * Update the chunk map so that arena_run_dalloc() can treat the + * trailing run as separately allocated. Set the last element of each + * run first, in case of single-page runs. + */ + assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize); + arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty); + arena_mapbits_large_set(chunk, pageind, newsize, flag_dirty); + + if (config_debug) { + UNUSED size_t tail_npages = (oldsize - newsize) >> LG_PAGE; + assert(arena_mapbits_large_size_get(chunk, + pageind+head_npages+tail_npages-1) == 0); + assert(arena_mapbits_dirty_get(chunk, + pageind+head_npages+tail_npages-1) == flag_dirty); + } + arena_mapbits_large_set(chunk, pageind+head_npages, oldsize-newsize, + flag_dirty); + + arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize), + dirty, false); +} + +static arena_run_t * +arena_bin_runs_first(arena_bin_t *bin) +{ + arena_chunk_map_t *mapelm = arena_run_tree_first(&bin->runs); + if (mapelm != NULL) { + arena_chunk_t *chunk; + size_t pageind; + arena_run_t *run; + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); + pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) / + sizeof(arena_chunk_map_t))) + map_bias; + run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - + arena_mapbits_small_runind_get(chunk, pageind)) << + LG_PAGE)); + return (run); + } + + return (NULL); +} + +static void +arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run) +{ + arena_chunk_t *chunk = CHUNK_ADDR2BASE(run); + size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; + arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind); + + assert(arena_run_tree_search(&bin->runs, mapelm) == NULL); + + arena_run_tree_insert(&bin->runs, mapelm); +} + +static void +arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run) +{ + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; + arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind); + + assert(arena_run_tree_search(&bin->runs, mapelm) != NULL); + + arena_run_tree_remove(&bin->runs, mapelm); +} + +static arena_run_t * +arena_bin_nonfull_run_tryget(arena_bin_t *bin) +{ + arena_run_t *run = arena_bin_runs_first(bin); + if (run != NULL) { + arena_bin_runs_remove(bin, run); + if (config_stats) + bin->stats.reruns++; + } + return (run); +} + +static arena_run_t * +arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) +{ + arena_run_t *run; + size_t binind; + arena_bin_info_t *bin_info; + + /* Look for a usable run. */ + run = arena_bin_nonfull_run_tryget(bin); + if (run != NULL) + return (run); + /* No existing runs have any space available. */ + + binind = arena_bin_index(arena, bin); + bin_info = &arena_bin_info[binind]; + + /* Allocate a new run. */ + malloc_mutex_unlock(&bin->lock); + /******************************/ + malloc_mutex_lock(&arena->lock); + run = arena_run_alloc_small(arena, bin_info->run_size, binind); + if (run != NULL) { + bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + + (uintptr_t)bin_info->bitmap_offset); + + /* Initialize run internals. */ + run->bin = bin; + run->nextind = 0; + run->nfree = bin_info->nregs; + bitmap_init(bitmap, &bin_info->bitmap_info); + } + malloc_mutex_unlock(&arena->lock); + /********************************/ + malloc_mutex_lock(&bin->lock); + if (run != NULL) { + if (config_stats) { + bin->stats.nruns++; + bin->stats.curruns++; + } + return (run); + } + + /* + * arena_run_alloc_small() failed, but another thread may have made + * sufficient memory available while this one dropped bin->lock above, + * so search one more time. + */ + run = arena_bin_nonfull_run_tryget(bin); + if (run != NULL) + return (run); + + return (NULL); +} + +/* Re-fill bin->runcur, then call arena_run_reg_alloc(). */ +static void * +arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) +{ + void *ret; + size_t binind; + arena_bin_info_t *bin_info; + arena_run_t *run; + + binind = arena_bin_index(arena, bin); + bin_info = &arena_bin_info[binind]; + bin->runcur = NULL; + run = arena_bin_nonfull_run_get(arena, bin); + if (bin->runcur != NULL && bin->runcur->nfree > 0) { + /* + * Another thread updated runcur while this one ran without the + * bin lock in arena_bin_nonfull_run_get(). + */ + assert(bin->runcur->nfree > 0); + ret = arena_run_reg_alloc(bin->runcur, bin_info); + if (run != NULL) { + arena_chunk_t *chunk; + + /* + * arena_run_alloc_small() may have allocated run, or + * it may have pulled run from the bin's run tree. + * Therefore it is unsafe to make any assumptions about + * how run has previously been used, and + * arena_bin_lower_run() must be called, as if a region + * were just deallocated from the run. + */ + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + if (run->nfree == bin_info->nregs) + arena_dalloc_bin_run(arena, chunk, run, bin); + else + arena_bin_lower_run(arena, chunk, run, bin); + } + return (ret); + } + + if (run == NULL) + return (NULL); + + bin->runcur = run; + + assert(bin->runcur->nfree > 0); + + return (arena_run_reg_alloc(bin->runcur, bin_info)); +} + +void +arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, + uint64_t prof_accumbytes) +{ + unsigned i, nfill; + arena_bin_t *bin; + arena_run_t *run; + void *ptr; + + assert(tbin->ncached == 0); + + if (config_prof && arena_prof_accum(arena, prof_accumbytes)) + prof_idump(); + bin = &arena->bins[binind]; + malloc_mutex_lock(&bin->lock); + for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >> + tbin->lg_fill_div); i < nfill; i++) { + if ((run = bin->runcur) != NULL && run->nfree > 0) + ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]); + else + ptr = arena_bin_malloc_hard(arena, bin); + if (ptr == NULL) + break; + if (config_fill && opt_junk) { + arena_alloc_junk_small(ptr, &arena_bin_info[binind], + true); + } + /* Insert such that low regions get used first. */ + tbin->avail[nfill - 1 - i] = ptr; + } + if (config_stats) { + bin->stats.allocated += i * arena_bin_info[binind].reg_size; + bin->stats.nmalloc += i; + bin->stats.nrequests += tbin->tstats.nrequests; + bin->stats.nfills++; + tbin->tstats.nrequests = 0; + } + malloc_mutex_unlock(&bin->lock); + tbin->ncached = i; +} + +void +arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero) +{ + + if (zero) { + size_t redzone_size = bin_info->redzone_size; + memset((void *)((uintptr_t)ptr - redzone_size), 0xa5, + redzone_size); + memset((void *)((uintptr_t)ptr + bin_info->reg_size), 0xa5, + redzone_size); + } else { + memset((void *)((uintptr_t)ptr - bin_info->redzone_size), 0xa5, + bin_info->reg_interval); + } +} + +#ifdef JEMALLOC_JET +#undef arena_redzone_corruption +#define arena_redzone_corruption JEMALLOC_N(arena_redzone_corruption_impl) +#endif +static void +arena_redzone_corruption(void *ptr, size_t usize, bool after, + size_t offset, uint8_t byte) +{ + + malloc_printf("<jemalloc>: Corrupt redzone %zu byte%s %s %p " + "(size %zu), byte=%#x\n", offset, (offset == 1) ? "" : "s", + after ? "after" : "before", ptr, usize, byte); +} +#ifdef JEMALLOC_JET +#undef arena_redzone_corruption +#define arena_redzone_corruption JEMALLOC_N(arena_redzone_corruption) +arena_redzone_corruption_t *arena_redzone_corruption = + JEMALLOC_N(arena_redzone_corruption_impl); +#endif + +static void +arena_redzones_validate(void *ptr, arena_bin_info_t *bin_info, bool reset) +{ + size_t size = bin_info->reg_size; + size_t redzone_size = bin_info->redzone_size; + size_t i; + bool error = false; + + for (i = 1; i <= redzone_size; i++) { + uint8_t *byte = (uint8_t *)((uintptr_t)ptr - i); + if (*byte != 0xa5) { + error = true; + arena_redzone_corruption(ptr, size, false, i, *byte); + if (reset) + *byte = 0xa5; + } + } + for (i = 0; i < redzone_size; i++) { + uint8_t *byte = (uint8_t *)((uintptr_t)ptr + size + i); + if (*byte != 0xa5) { + error = true; + arena_redzone_corruption(ptr, size, true, i, *byte); + if (reset) + *byte = 0xa5; + } + } + if (opt_abort && error) + abort(); +} + +#ifdef JEMALLOC_JET +#undef arena_dalloc_junk_small +#define arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small_impl) +#endif +void +arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info) +{ + size_t redzone_size = bin_info->redzone_size; + + arena_redzones_validate(ptr, bin_info, false); + memset((void *)((uintptr_t)ptr - redzone_size), 0x5a, + bin_info->reg_interval); +} +#ifdef JEMALLOC_JET +#undef arena_dalloc_junk_small +#define arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small) +arena_dalloc_junk_small_t *arena_dalloc_junk_small = + JEMALLOC_N(arena_dalloc_junk_small_impl); +#endif + +void +arena_quarantine_junk_small(void *ptr, size_t usize) +{ + size_t binind; + arena_bin_info_t *bin_info; + cassert(config_fill); + assert(opt_junk); + assert(opt_quarantine); + assert(usize <= SMALL_MAXCLASS); + + binind = SMALL_SIZE2BIN(usize); + bin_info = &arena_bin_info[binind]; + arena_redzones_validate(ptr, bin_info, true); +} + +void * +arena_malloc_small(arena_t *arena, size_t size, bool zero) +{ + void *ret; + arena_bin_t *bin; + arena_run_t *run; + size_t binind; + + binind = SMALL_SIZE2BIN(size); + assert(binind < NBINS); + bin = &arena->bins[binind]; + size = arena_bin_info[binind].reg_size; + + malloc_mutex_lock(&bin->lock); + if ((run = bin->runcur) != NULL && run->nfree > 0) + ret = arena_run_reg_alloc(run, &arena_bin_info[binind]); + else + ret = arena_bin_malloc_hard(arena, bin); + + if (ret == NULL) { + malloc_mutex_unlock(&bin->lock); + return (NULL); + } + + if (config_stats) { + bin->stats.allocated += size; + bin->stats.nmalloc++; + bin->stats.nrequests++; + } + malloc_mutex_unlock(&bin->lock); + if (config_prof && isthreaded == false && arena_prof_accum(arena, size)) + prof_idump(); + + if (zero == false) { + if (config_fill) { + if (opt_junk) { + arena_alloc_junk_small(ret, + &arena_bin_info[binind], false); + } else if (opt_zero) + memset(ret, 0, size); + } + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + } else { + if (config_fill && opt_junk) { + arena_alloc_junk_small(ret, &arena_bin_info[binind], + true); + } + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + memset(ret, 0, size); + } + + return (ret); +} + +void * +arena_malloc_large(arena_t *arena, size_t size, bool zero) +{ + void *ret; + UNUSED bool idump; + + /* Large allocation. */ + size = PAGE_CEILING(size); + malloc_mutex_lock(&arena->lock); + ret = (void *)arena_run_alloc_large(arena, size, zero); + if (ret == NULL) { + malloc_mutex_unlock(&arena->lock); + return (NULL); + } + if (config_stats) { + arena->stats.nmalloc_large++; + arena->stats.nrequests_large++; + arena->stats.allocated_large += size; + arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; + arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; + arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; + } + if (config_prof) + idump = arena_prof_accum_locked(arena, size); + malloc_mutex_unlock(&arena->lock); + if (config_prof && idump) + prof_idump(); + + if (zero == false) { + if (config_fill) { + if (opt_junk) + memset(ret, 0xa5, size); + else if (opt_zero) + memset(ret, 0, size); + } + } + + return (ret); +} + +/* Only handles large allocations that require more than page alignment. */ +void * +arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) +{ + void *ret; + size_t alloc_size, leadsize, trailsize; + arena_run_t *run; + arena_chunk_t *chunk; + + assert((size & PAGE_MASK) == 0); + + alignment = PAGE_CEILING(alignment); + alloc_size = size + alignment - PAGE; + + malloc_mutex_lock(&arena->lock); + run = arena_run_alloc_large(arena, alloc_size, false); + if (run == NULL) { + malloc_mutex_unlock(&arena->lock); + return (NULL); + } + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + + leadsize = ALIGNMENT_CEILING((uintptr_t)run, alignment) - + (uintptr_t)run; + assert(alloc_size >= leadsize + size); + trailsize = alloc_size - leadsize - size; + ret = (void *)((uintptr_t)run + leadsize); + if (leadsize != 0) { + arena_run_trim_head(arena, chunk, run, alloc_size, alloc_size - + leadsize); + } + if (trailsize != 0) { + arena_run_trim_tail(arena, chunk, ret, size + trailsize, size, + false); + } + arena_run_init_large(arena, (arena_run_t *)ret, size, zero); + + if (config_stats) { + arena->stats.nmalloc_large++; + arena->stats.nrequests_large++; + arena->stats.allocated_large += size; + arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; + arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; + arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; + } + malloc_mutex_unlock(&arena->lock); + + if (config_fill && zero == false) { + if (opt_junk) + memset(ret, 0xa5, size); + else if (opt_zero) + memset(ret, 0, size); + } + return (ret); +} + +void +arena_prof_promoted(const void *ptr, size_t size) +{ + arena_chunk_t *chunk; + size_t pageind, binind; + + cassert(config_prof); + assert(ptr != NULL); + assert(CHUNK_ADDR2BASE(ptr) != ptr); + assert(isalloc(ptr, false) == PAGE); + assert(isalloc(ptr, true) == PAGE); + assert(size <= SMALL_MAXCLASS); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + binind = SMALL_SIZE2BIN(size); + assert(binind < NBINS); + arena_mapbits_large_binind_set(chunk, pageind, binind); + + assert(isalloc(ptr, false) == PAGE); + assert(isalloc(ptr, true) == size); +} + +static void +arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, + arena_bin_t *bin) +{ + + /* Dissociate run from bin. */ + if (run == bin->runcur) + bin->runcur = NULL; + else { + size_t binind = arena_bin_index(chunk->arena, bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; + + if (bin_info->nregs != 1) { + /* + * This block's conditional is necessary because if the + * run only contains one region, then it never gets + * inserted into the non-full runs tree. + */ + arena_bin_runs_remove(bin, run); + } + } +} + +static void +arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, + arena_bin_t *bin) +{ + size_t binind; + arena_bin_info_t *bin_info; + size_t npages, run_ind, past; + + assert(run != bin->runcur); + assert(arena_run_tree_search(&bin->runs, + arena_mapp_get(chunk, ((uintptr_t)run-(uintptr_t)chunk)>>LG_PAGE)) + == NULL); + + binind = arena_bin_index(chunk->arena, run->bin); + bin_info = &arena_bin_info[binind]; + + malloc_mutex_unlock(&bin->lock); + /******************************/ + npages = bin_info->run_size >> LG_PAGE; + run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); + past = (size_t)(PAGE_CEILING((uintptr_t)run + + (uintptr_t)bin_info->reg0_offset + (uintptr_t)(run->nextind * + bin_info->reg_interval - bin_info->redzone_size) - + (uintptr_t)chunk) >> LG_PAGE); + malloc_mutex_lock(&arena->lock); + + /* + * If the run was originally clean, and some pages were never touched, + * trim the clean pages before deallocating the dirty portion of the + * run. + */ + assert(arena_mapbits_dirty_get(chunk, run_ind) == + arena_mapbits_dirty_get(chunk, run_ind+npages-1)); + if (arena_mapbits_dirty_get(chunk, run_ind) == 0 && past - run_ind < + npages) { + /* Trim clean pages. Convert to large run beforehand. */ + assert(npages > 0); + arena_mapbits_large_set(chunk, run_ind, bin_info->run_size, 0); + arena_mapbits_large_set(chunk, run_ind+npages-1, 0, 0); + arena_run_trim_tail(arena, chunk, run, (npages << LG_PAGE), + ((past - run_ind) << LG_PAGE), false); + /* npages = past - run_ind; */ + } + arena_run_dalloc(arena, run, true, false); + malloc_mutex_unlock(&arena->lock); + /****************************/ + malloc_mutex_lock(&bin->lock); + if (config_stats) + bin->stats.curruns--; +} + +static void +arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, + arena_bin_t *bin) +{ + + /* + * Make sure that if bin->runcur is non-NULL, it refers to the lowest + * non-full run. It is okay to NULL runcur out rather than proactively + * keeping it pointing at the lowest non-full run. + */ + if ((uintptr_t)run < (uintptr_t)bin->runcur) { + /* Switch runcur. */ + if (bin->runcur->nfree > 0) + arena_bin_runs_insert(bin, bin->runcur); + bin->runcur = run; + if (config_stats) + bin->stats.reruns++; + } else + arena_bin_runs_insert(bin, run); +} + +void +arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, + arena_chunk_map_t *mapelm) +{ + size_t pageind; + arena_run_t *run; + arena_bin_t *bin; + arena_bin_info_t *bin_info; + size_t size, binind; + + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - + arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE)); + bin = run->bin; + binind = arena_ptr_small_binind_get(ptr, mapelm->bits); + bin_info = &arena_bin_info[binind]; + if (config_fill || config_stats) + size = bin_info->reg_size; + + if (config_fill && opt_junk) + arena_dalloc_junk_small(ptr, bin_info); + + arena_run_reg_dalloc(run, ptr); + if (run->nfree == bin_info->nregs) { + arena_dissociate_bin_run(chunk, run, bin); + arena_dalloc_bin_run(arena, chunk, run, bin); + } else if (run->nfree == 1 && run != bin->runcur) + arena_bin_lower_run(arena, chunk, run, bin); + + if (config_stats) { + bin->stats.allocated -= size; + bin->stats.ndalloc++; + } +} + +void +arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, + size_t pageind, arena_chunk_map_t *mapelm) +{ + arena_run_t *run; + arena_bin_t *bin; + + run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - + arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE)); + bin = run->bin; + malloc_mutex_lock(&bin->lock); + arena_dalloc_bin_locked(arena, chunk, ptr, mapelm); + malloc_mutex_unlock(&bin->lock); +} + +void +arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, + size_t pageind) +{ + arena_chunk_map_t *mapelm; + + if (config_debug) { + /* arena_ptr_small_binind_get() does extra sanity checking. */ + assert(arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, + pageind)) != BININD_INVALID); + } + mapelm = arena_mapp_get(chunk, pageind); + arena_dalloc_bin(arena, chunk, ptr, pageind, mapelm); +} + +#ifdef JEMALLOC_JET +#undef arena_dalloc_junk_large +#define arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large_impl) +#endif +static void +arena_dalloc_junk_large(void *ptr, size_t usize) +{ + + if (config_fill && opt_junk) + memset(ptr, 0x5a, usize); +} +#ifdef JEMALLOC_JET +#undef arena_dalloc_junk_large +#define arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large) +arena_dalloc_junk_large_t *arena_dalloc_junk_large = + JEMALLOC_N(arena_dalloc_junk_large_impl); +#endif + +void +arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr) +{ + + if (config_fill || config_stats) { + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + size_t usize = arena_mapbits_large_size_get(chunk, pageind); + + arena_dalloc_junk_large(ptr, usize); + if (config_stats) { + arena->stats.ndalloc_large++; + arena->stats.allocated_large -= usize; + arena->stats.lstats[(usize >> LG_PAGE) - 1].ndalloc++; + arena->stats.lstats[(usize >> LG_PAGE) - 1].curruns--; + } + } + + arena_run_dalloc(arena, (arena_run_t *)ptr, true, false); +} + +void +arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) +{ + + malloc_mutex_lock(&arena->lock); + arena_dalloc_large_locked(arena, chunk, ptr); + malloc_mutex_unlock(&arena->lock); +} + +static void +arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, + size_t oldsize, size_t size) +{ + + assert(size < oldsize); + + /* + * Shrink the run, and make trailing pages available for other + * allocations. + */ + malloc_mutex_lock(&arena->lock); + arena_run_trim_tail(arena, chunk, (arena_run_t *)ptr, oldsize, size, + true); + if (config_stats) { + arena->stats.ndalloc_large++; + arena->stats.allocated_large -= oldsize; + arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++; + arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--; + + arena->stats.nmalloc_large++; + arena->stats.nrequests_large++; + arena->stats.allocated_large += size; + arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; + arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; + arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; + } + malloc_mutex_unlock(&arena->lock); +} + +static bool +arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, + size_t oldsize, size_t size, size_t extra, bool zero) +{ + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + size_t npages = oldsize >> LG_PAGE; + size_t followsize; + + assert(oldsize == arena_mapbits_large_size_get(chunk, pageind)); + + /* Try to extend the run. */ + assert(size + extra > oldsize); + malloc_mutex_lock(&arena->lock); + if (pageind + npages < chunk_npages && + arena_mapbits_allocated_get(chunk, pageind+npages) == 0 && + (followsize = arena_mapbits_unallocated_size_get(chunk, + pageind+npages)) >= size - oldsize) { + /* + * The next run is available and sufficiently large. Split the + * following run, then merge the first part with the existing + * allocation. + */ + size_t flag_dirty; + size_t splitsize = (oldsize + followsize <= size + extra) + ? followsize : size + extra - oldsize; + arena_run_split_large(arena, (arena_run_t *)((uintptr_t)chunk + + ((pageind+npages) << LG_PAGE)), splitsize, zero); + + size = oldsize + splitsize; + npages = size >> LG_PAGE; + + /* + * Mark the extended run as dirty if either portion of the run + * was dirty before allocation. This is rather pedantic, + * because there's not actually any sequence of events that + * could cause the resulting run to be passed to + * arena_run_dalloc() with the dirty argument set to false + * (which is when dirty flag consistency would really matter). + */ + flag_dirty = arena_mapbits_dirty_get(chunk, pageind) | + arena_mapbits_dirty_get(chunk, pageind+npages-1); + arena_mapbits_large_set(chunk, pageind, size, flag_dirty); + arena_mapbits_large_set(chunk, pageind+npages-1, 0, flag_dirty); + + if (config_stats) { + arena->stats.ndalloc_large++; + arena->stats.allocated_large -= oldsize; + arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++; + arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--; + + arena->stats.nmalloc_large++; + arena->stats.nrequests_large++; + arena->stats.allocated_large += size; + arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; + arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; + arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; + } + malloc_mutex_unlock(&arena->lock); + return (false); + } + malloc_mutex_unlock(&arena->lock); + + return (true); +} + +#ifdef JEMALLOC_JET +#undef arena_ralloc_junk_large +#define arena_ralloc_junk_large JEMALLOC_N(arena_ralloc_junk_large_impl) +#endif +static void +arena_ralloc_junk_large(void *ptr, size_t old_usize, size_t usize) +{ + + if (config_fill && opt_junk) { + memset((void *)((uintptr_t)ptr + usize), 0x5a, + old_usize - usize); + } +} +#ifdef JEMALLOC_JET +#undef arena_ralloc_junk_large +#define arena_ralloc_junk_large JEMALLOC_N(arena_ralloc_junk_large) +arena_ralloc_junk_large_t *arena_ralloc_junk_large = + JEMALLOC_N(arena_ralloc_junk_large_impl); +#endif + +/* + * Try to resize a large allocation, in order to avoid copying. This will + * always fail if growing an object, and the following run is already in use. + */ +static bool +arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, + bool zero) +{ + size_t psize; + + psize = PAGE_CEILING(size + extra); + if (psize == oldsize) { + /* Same size class. */ + return (false); + } else { + arena_chunk_t *chunk; + arena_t *arena; + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + arena = chunk->arena; + + if (psize < oldsize) { + /* Fill before shrinking in order avoid a race. */ + arena_ralloc_junk_large(ptr, oldsize, psize); + arena_ralloc_large_shrink(arena, chunk, ptr, oldsize, + psize); + return (false); + } else { + bool ret = arena_ralloc_large_grow(arena, chunk, ptr, + oldsize, PAGE_CEILING(size), + psize - PAGE_CEILING(size), zero); + if (config_fill && ret == false && zero == false) { + if (opt_junk) { + memset((void *)((uintptr_t)ptr + + oldsize), 0xa5, isalloc(ptr, + config_prof) - oldsize); + } else if (opt_zero) { + memset((void *)((uintptr_t)ptr + + oldsize), 0, isalloc(ptr, + config_prof) - oldsize); + } + } + return (ret); + } + } +} + +bool +arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, + bool zero) +{ + + /* + * Avoid moving the allocation if the size class can be left the same. + */ + if (oldsize <= arena_maxclass) { + if (oldsize <= SMALL_MAXCLASS) { + assert(arena_bin_info[SMALL_SIZE2BIN(oldsize)].reg_size + == oldsize); + if ((size + extra <= SMALL_MAXCLASS && + SMALL_SIZE2BIN(size + extra) == + SMALL_SIZE2BIN(oldsize)) || (size <= oldsize && + size + extra >= oldsize)) + return (false); + } else { + assert(size <= arena_maxclass); + if (size + extra > SMALL_MAXCLASS) { + if (arena_ralloc_large(ptr, oldsize, size, + extra, zero) == false) + return (false); + } + } + } + + /* Reallocation would require a move. */ + return (true); +} + +void * +arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, + size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, + bool try_tcache_dalloc) +{ + void *ret; + size_t copysize; + + /* Try to avoid moving the allocation. */ + if (arena_ralloc_no_move(ptr, oldsize, size, extra, zero) == false) + return (ptr); + + /* + * size and oldsize are different enough that we need to move the + * object. In that case, fall back to allocating new space and + * copying. + */ + if (alignment != 0) { + size_t usize = sa2u(size + extra, alignment); + if (usize == 0) + return (NULL); + ret = ipalloct(usize, alignment, zero, try_tcache_alloc, arena); + } else + ret = arena_malloc(arena, size + extra, zero, try_tcache_alloc); + + if (ret == NULL) { + if (extra == 0) + return (NULL); + /* Try again, this time without extra. */ + if (alignment != 0) { + size_t usize = sa2u(size, alignment); + if (usize == 0) + return (NULL); + ret = ipalloct(usize, alignment, zero, try_tcache_alloc, + arena); + } else + ret = arena_malloc(arena, size, zero, try_tcache_alloc); + + if (ret == NULL) + return (NULL); + } + + /* Junk/zero-filling were already done by ipalloc()/arena_malloc(). */ + + /* + * Copy at most size bytes (not size+extra), since the caller has no + * expectation that the extra bytes will be reliably preserved. + */ + copysize = (size < oldsize) ? size : oldsize; + VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize); + memcpy(ret, ptr, copysize); + iqalloct(ptr, try_tcache_dalloc); + return (ret); +} + +dss_prec_t +arena_dss_prec_get(arena_t *arena) +{ + dss_prec_t ret; + + malloc_mutex_lock(&arena->lock); + ret = arena->dss_prec; + malloc_mutex_unlock(&arena->lock); + return (ret); +} + +void +arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec) +{ + + malloc_mutex_lock(&arena->lock); + arena->dss_prec = dss_prec; + malloc_mutex_unlock(&arena->lock); +} + +void +arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, + size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, + malloc_large_stats_t *lstats) +{ + unsigned i; + + malloc_mutex_lock(&arena->lock); + *dss = dss_prec_names[arena->dss_prec]; + *nactive += arena->nactive; + *ndirty += arena->ndirty; + + astats->mapped += arena->stats.mapped; + astats->npurge += arena->stats.npurge; + astats->nmadvise += arena->stats.nmadvise; + astats->purged += arena->stats.purged; + astats->allocated_large += arena->stats.allocated_large; + astats->nmalloc_large += arena->stats.nmalloc_large; + astats->ndalloc_large += arena->stats.ndalloc_large; + astats->nrequests_large += arena->stats.nrequests_large; + + for (i = 0; i < nlclasses; i++) { + lstats[i].nmalloc += arena->stats.lstats[i].nmalloc; + lstats[i].ndalloc += arena->stats.lstats[i].ndalloc; + lstats[i].nrequests += arena->stats.lstats[i].nrequests; + lstats[i].curruns += arena->stats.lstats[i].curruns; + } + malloc_mutex_unlock(&arena->lock); + + for (i = 0; i < NBINS; i++) { + arena_bin_t *bin = &arena->bins[i]; + + malloc_mutex_lock(&bin->lock); + bstats[i].allocated += bin->stats.allocated; + bstats[i].nmalloc += bin->stats.nmalloc; + bstats[i].ndalloc += bin->stats.ndalloc; + bstats[i].nrequests += bin->stats.nrequests; + if (config_tcache) { + bstats[i].nfills += bin->stats.nfills; + bstats[i].nflushes += bin->stats.nflushes; + } + bstats[i].nruns += bin->stats.nruns; + bstats[i].reruns += bin->stats.reruns; + bstats[i].curruns += bin->stats.curruns; + malloc_mutex_unlock(&bin->lock); + } +} + +bool +arena_new(arena_t *arena, unsigned ind) +{ + unsigned i; + arena_bin_t *bin; + + arena->ind = ind; + arena->nthreads = 0; + + if (malloc_mutex_init(&arena->lock)) + return (true); + + if (config_stats) { + memset(&arena->stats, 0, sizeof(arena_stats_t)); + arena->stats.lstats = + (malloc_large_stats_t *)base_alloc(nlclasses * + sizeof(malloc_large_stats_t)); + if (arena->stats.lstats == NULL) + return (true); + memset(arena->stats.lstats, 0, nlclasses * + sizeof(malloc_large_stats_t)); + if (config_tcache) + ql_new(&arena->tcache_ql); + } + + if (config_prof) + arena->prof_accumbytes = 0; + + arena->dss_prec = chunk_dss_prec_get(); + + /* Initialize chunks. */ + arena_chunk_dirty_new(&arena->chunks_dirty); + arena->spare = NULL; + + arena->nactive = 0; + arena->ndirty = 0; + arena->npurgatory = 0; + + arena_avail_tree_new(&arena->runs_avail); + + /* Initialize bins. */ + for (i = 0; i < NBINS; i++) { + bin = &arena->bins[i]; + if (malloc_mutex_init(&bin->lock)) + return (true); + bin->runcur = NULL; + arena_run_tree_new(&bin->runs); + if (config_stats) + memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); + } + + return (false); +} + +/* + * Calculate bin_info->run_size such that it meets the following constraints: + * + * *) bin_info->run_size >= min_run_size + * *) bin_info->run_size <= arena_maxclass + * *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed). + * *) bin_info->nregs <= RUN_MAXREGS + * + * bin_info->nregs, bin_info->bitmap_offset, and bin_info->reg0_offset are also + * calculated here, since these settings are all interdependent. + */ +static size_t +bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) +{ + size_t pad_size; + size_t try_run_size, good_run_size; + uint32_t try_nregs, good_nregs; + uint32_t try_hdr_size, good_hdr_size; + uint32_t try_bitmap_offset, good_bitmap_offset; + uint32_t try_ctx0_offset, good_ctx0_offset; + uint32_t try_redzone0_offset, good_redzone0_offset; + + assert(min_run_size >= PAGE); + assert(min_run_size <= arena_maxclass); + + /* + * Determine redzone size based on minimum alignment and minimum + * redzone size. Add padding to the end of the run if it is needed to + * align the regions. The padding allows each redzone to be half the + * minimum alignment; without the padding, each redzone would have to + * be twice as large in order to maintain alignment. + */ + if (config_fill && opt_redzone) { + size_t align_min = ZU(1) << (ffs(bin_info->reg_size) - 1); + if (align_min <= REDZONE_MINSIZE) { + bin_info->redzone_size = REDZONE_MINSIZE; + pad_size = 0; + } else { + bin_info->redzone_size = align_min >> 1; + pad_size = bin_info->redzone_size; + } + } else { + bin_info->redzone_size = 0; + pad_size = 0; + } + bin_info->reg_interval = bin_info->reg_size + + (bin_info->redzone_size << 1); + + /* + * Calculate known-valid settings before entering the run_size + * expansion loop, so that the first part of the loop always copies + * valid settings. + * + * The do..while loop iteratively reduces the number of regions until + * the run header and the regions no longer overlap. A closed formula + * would be quite messy, since there is an interdependency between the + * header's mask length and the number of regions. + */ + try_run_size = min_run_size; + try_nregs = ((try_run_size - sizeof(arena_run_t)) / + bin_info->reg_interval) + + 1; /* Counter-act try_nregs-- in loop. */ + if (try_nregs > RUN_MAXREGS) { + try_nregs = RUN_MAXREGS + + 1; /* Counter-act try_nregs-- in loop. */ + } + do { + try_nregs--; + try_hdr_size = sizeof(arena_run_t); + /* Pad to a long boundary. */ + try_hdr_size = LONG_CEILING(try_hdr_size); + try_bitmap_offset = try_hdr_size; + /* Add space for bitmap. */ + try_hdr_size += bitmap_size(try_nregs); + if (config_prof && opt_prof && prof_promote == false) { + /* Pad to a quantum boundary. */ + try_hdr_size = QUANTUM_CEILING(try_hdr_size); + try_ctx0_offset = try_hdr_size; + /* Add space for one (prof_ctx_t *) per region. */ + try_hdr_size += try_nregs * sizeof(prof_ctx_t *); + } else + try_ctx0_offset = 0; + try_redzone0_offset = try_run_size - (try_nregs * + bin_info->reg_interval) - pad_size; + } while (try_hdr_size > try_redzone0_offset); + + /* run_size expansion loop. */ + do { + /* + * Copy valid settings before trying more aggressive settings. + */ + good_run_size = try_run_size; + good_nregs = try_nregs; + good_hdr_size = try_hdr_size; + good_bitmap_offset = try_bitmap_offset; + good_ctx0_offset = try_ctx0_offset; + good_redzone0_offset = try_redzone0_offset; + + /* Try more aggressive settings. */ + try_run_size += PAGE; + try_nregs = ((try_run_size - sizeof(arena_run_t) - pad_size) / + bin_info->reg_interval) + + 1; /* Counter-act try_nregs-- in loop. */ + if (try_nregs > RUN_MAXREGS) { + try_nregs = RUN_MAXREGS + + 1; /* Counter-act try_nregs-- in loop. */ + } + do { + try_nregs--; + try_hdr_size = sizeof(arena_run_t); + /* Pad to a long boundary. */ + try_hdr_size = LONG_CEILING(try_hdr_size); + try_bitmap_offset = try_hdr_size; + /* Add space for bitmap. */ + try_hdr_size += bitmap_size(try_nregs); + if (config_prof && opt_prof && prof_promote == false) { + /* Pad to a quantum boundary. */ + try_hdr_size = QUANTUM_CEILING(try_hdr_size); + try_ctx0_offset = try_hdr_size; + /* + * Add space for one (prof_ctx_t *) per region. + */ + try_hdr_size += try_nregs * + sizeof(prof_ctx_t *); + } + try_redzone0_offset = try_run_size - (try_nregs * + bin_info->reg_interval) - pad_size; + } while (try_hdr_size > try_redzone0_offset); + } while (try_run_size <= arena_maxclass + && RUN_MAX_OVRHD * (bin_info->reg_interval << 3) > + RUN_MAX_OVRHD_RELAX + && (try_redzone0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size + && try_nregs < RUN_MAXREGS); + + assert(good_hdr_size <= good_redzone0_offset); + + /* Copy final settings. */ + bin_info->run_size = good_run_size; + bin_info->nregs = good_nregs; + bin_info->bitmap_offset = good_bitmap_offset; + bin_info->ctx0_offset = good_ctx0_offset; + bin_info->reg0_offset = good_redzone0_offset + bin_info->redzone_size; + + assert(bin_info->reg0_offset - bin_info->redzone_size + (bin_info->nregs + * bin_info->reg_interval) + pad_size == bin_info->run_size); + + return (good_run_size); +} + +static void +bin_info_init(void) +{ + arena_bin_info_t *bin_info; + size_t prev_run_size = PAGE; + +#define SIZE_CLASS(bin, delta, size) \ + bin_info = &arena_bin_info[bin]; \ + bin_info->reg_size = size; \ + prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);\ + bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); + SIZE_CLASSES +#undef SIZE_CLASS +} + +void +arena_boot(void) +{ + size_t header_size; + unsigned i; + + /* + * Compute the header size such that it is large enough to contain the + * page map. The page map is biased to omit entries for the header + * itself, so some iteration is necessary to compute the map bias. + * + * 1) Compute safe header_size and map_bias values that include enough + * space for an unbiased page map. + * 2) Refine map_bias based on (1) to omit the header pages in the page + * map. The resulting map_bias may be one too small. + * 3) Refine map_bias based on (2). The result will be >= the result + * from (2), and will always be correct. + */ + map_bias = 0; + for (i = 0; i < 3; i++) { + header_size = offsetof(arena_chunk_t, map) + + (sizeof(arena_chunk_map_t) * (chunk_npages-map_bias)); + map_bias = (header_size >> LG_PAGE) + ((header_size & PAGE_MASK) + != 0); + } + assert(map_bias > 0); + + arena_maxclass = chunksize - (map_bias << LG_PAGE); + + bin_info_init(); +} + +void +arena_prefork(arena_t *arena) +{ + unsigned i; + + malloc_mutex_prefork(&arena->lock); + for (i = 0; i < NBINS; i++) + malloc_mutex_prefork(&arena->bins[i].lock); +} + +void +arena_postfork_parent(arena_t *arena) +{ + unsigned i; + + for (i = 0; i < NBINS; i++) + malloc_mutex_postfork_parent(&arena->bins[i].lock); + malloc_mutex_postfork_parent(&arena->lock); +} + +void +arena_postfork_child(arena_t *arena) +{ + unsigned i; + + for (i = 0; i < NBINS; i++) + malloc_mutex_postfork_child(&arena->bins[i].lock); + malloc_mutex_postfork_child(&arena->lock); +} diff --git a/deps/jemalloc/src/atomic.c b/deps/jemalloc/src/atomic.c new file mode 100644 index 0000000000..77ee313113 --- /dev/null +++ b/deps/jemalloc/src/atomic.c @@ -0,0 +1,2 @@ +#define JEMALLOC_ATOMIC_C_ +#include "jemalloc/internal/jemalloc_internal.h" diff --git a/deps/jemalloc/src/base.c b/deps/jemalloc/src/base.c new file mode 100644 index 0000000000..4e62e8fa91 --- /dev/null +++ b/deps/jemalloc/src/base.c @@ -0,0 +1,142 @@ +#define JEMALLOC_BASE_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +static malloc_mutex_t base_mtx; + +/* + * Current pages that are being used for internal memory allocations. These + * pages are carved up in cacheline-size quanta, so that there is no chance of + * false cache line sharing. + */ +static void *base_pages; +static void *base_next_addr; +static void *base_past_addr; /* Addr immediately past base_pages. */ +static extent_node_t *base_nodes; + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static bool base_pages_alloc(size_t minsize); + +/******************************************************************************/ + +static bool +base_pages_alloc(size_t minsize) +{ + size_t csize; + bool zero; + + assert(minsize != 0); + csize = CHUNK_CEILING(minsize); + zero = false; + base_pages = chunk_alloc(csize, chunksize, true, &zero, + chunk_dss_prec_get()); + if (base_pages == NULL) + return (true); + base_next_addr = base_pages; + base_past_addr = (void *)((uintptr_t)base_pages + csize); + + return (false); +} + +void * +base_alloc(size_t size) +{ + void *ret; + size_t csize; + + /* Round size up to nearest multiple of the cacheline size. */ + csize = CACHELINE_CEILING(size); + + malloc_mutex_lock(&base_mtx); + /* Make sure there's enough space for the allocation. */ + if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) { + if (base_pages_alloc(csize)) { + malloc_mutex_unlock(&base_mtx); + return (NULL); + } + } + /* Allocate. */ + ret = base_next_addr; + base_next_addr = (void *)((uintptr_t)base_next_addr + csize); + malloc_mutex_unlock(&base_mtx); + VALGRIND_MAKE_MEM_UNDEFINED(ret, csize); + + return (ret); +} + +void * +base_calloc(size_t number, size_t size) +{ + void *ret = base_alloc(number * size); + + if (ret != NULL) + memset(ret, 0, number * size); + + return (ret); +} + +extent_node_t * +base_node_alloc(void) +{ + extent_node_t *ret; + + malloc_mutex_lock(&base_mtx); + if (base_nodes != NULL) { + ret = base_nodes; + base_nodes = *(extent_node_t **)ret; + malloc_mutex_unlock(&base_mtx); + VALGRIND_MAKE_MEM_UNDEFINED(ret, sizeof(extent_node_t)); + } else { + malloc_mutex_unlock(&base_mtx); + ret = (extent_node_t *)base_alloc(sizeof(extent_node_t)); + } + + return (ret); +} + +void +base_node_dealloc(extent_node_t *node) +{ + + VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t)); + malloc_mutex_lock(&base_mtx); + *(extent_node_t **)node = base_nodes; + base_nodes = node; + malloc_mutex_unlock(&base_mtx); +} + +bool +base_boot(void) +{ + + base_nodes = NULL; + if (malloc_mutex_init(&base_mtx)) + return (true); + + return (false); +} + +void +base_prefork(void) +{ + + malloc_mutex_prefork(&base_mtx); +} + +void +base_postfork_parent(void) +{ + + malloc_mutex_postfork_parent(&base_mtx); +} + +void +base_postfork_child(void) +{ + + malloc_mutex_postfork_child(&base_mtx); +} diff --git a/deps/jemalloc/src/bitmap.c b/deps/jemalloc/src/bitmap.c new file mode 100644 index 0000000000..e2bd907d55 --- /dev/null +++ b/deps/jemalloc/src/bitmap.c @@ -0,0 +1,90 @@ +#define JEMALLOC_BITMAP_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static size_t bits2groups(size_t nbits); + +/******************************************************************************/ + +static size_t +bits2groups(size_t nbits) +{ + + return ((nbits >> LG_BITMAP_GROUP_NBITS) + + !!(nbits & BITMAP_GROUP_NBITS_MASK)); +} + +void +bitmap_info_init(bitmap_info_t *binfo, size_t nbits) +{ + unsigned i; + size_t group_count; + + assert(nbits > 0); + assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS)); + + /* + * Compute the number of groups necessary to store nbits bits, and + * progressively work upward through the levels until reaching a level + * that requires only one group. + */ + binfo->levels[0].group_offset = 0; + group_count = bits2groups(nbits); + for (i = 1; group_count > 1; i++) { + assert(i < BITMAP_MAX_LEVELS); + binfo->levels[i].group_offset = binfo->levels[i-1].group_offset + + group_count; + group_count = bits2groups(group_count); + } + binfo->levels[i].group_offset = binfo->levels[i-1].group_offset + + group_count; + binfo->nlevels = i; + binfo->nbits = nbits; +} + +size_t +bitmap_info_ngroups(const bitmap_info_t *binfo) +{ + + return (binfo->levels[binfo->nlevels].group_offset << LG_SIZEOF_BITMAP); +} + +size_t +bitmap_size(size_t nbits) +{ + bitmap_info_t binfo; + + bitmap_info_init(&binfo, nbits); + return (bitmap_info_ngroups(&binfo)); +} + +void +bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo) +{ + size_t extra; + unsigned i; + + /* + * Bits are actually inverted with regard to the external bitmap + * interface, so the bitmap starts out with all 1 bits, except for + * trailing unused bits (if any). Note that each group uses bit 0 to + * correspond to the first logical bit in the group, so extra bits + * are the most significant bits of the last group. + */ + memset(bitmap, 0xffU, binfo->levels[binfo->nlevels].group_offset << + LG_SIZEOF_BITMAP); + extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK)) + & BITMAP_GROUP_NBITS_MASK; + if (extra != 0) + bitmap[binfo->levels[1].group_offset - 1] >>= extra; + for (i = 1; i < binfo->nlevels; i++) { + size_t group_count = binfo->levels[i].group_offset - + binfo->levels[i-1].group_offset; + extra = (BITMAP_GROUP_NBITS - (group_count & + BITMAP_GROUP_NBITS_MASK)) & BITMAP_GROUP_NBITS_MASK; + if (extra != 0) + bitmap[binfo->levels[i+1].group_offset - 1] >>= extra; + } +} diff --git a/deps/jemalloc/src/chunk.c b/deps/jemalloc/src/chunk.c new file mode 100644 index 0000000000..90ab116ae5 --- /dev/null +++ b/deps/jemalloc/src/chunk.c @@ -0,0 +1,395 @@ +#define JEMALLOC_CHUNK_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +const char *opt_dss = DSS_DEFAULT; +size_t opt_lg_chunk = LG_CHUNK_DEFAULT; + +malloc_mutex_t chunks_mtx; +chunk_stats_t stats_chunks; + +/* + * Trees of chunks that were previously allocated (trees differ only in node + * ordering). These are used when allocating chunks, in an attempt to re-use + * address space. Depending on function, different tree orderings are needed, + * which is why there are two trees with the same contents. + */ +static extent_tree_t chunks_szad_mmap; +static extent_tree_t chunks_ad_mmap; +static extent_tree_t chunks_szad_dss; +static extent_tree_t chunks_ad_dss; + +rtree_t *chunks_rtree; + +/* Various chunk-related settings. */ +size_t chunksize; +size_t chunksize_mask; /* (chunksize - 1). */ +size_t chunk_npages; +size_t map_bias; +size_t arena_maxclass; /* Max size class for arenas. */ + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static void *chunk_recycle(extent_tree_t *chunks_szad, + extent_tree_t *chunks_ad, size_t size, size_t alignment, bool base, + bool *zero); +static void chunk_record(extent_tree_t *chunks_szad, + extent_tree_t *chunks_ad, void *chunk, size_t size); + +/******************************************************************************/ + +static void * +chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size, + size_t alignment, bool base, bool *zero) +{ + void *ret; + extent_node_t *node; + extent_node_t key; + size_t alloc_size, leadsize, trailsize; + bool zeroed; + + if (base) { + /* + * This function may need to call base_node_{,de}alloc(), but + * the current chunk allocation request is on behalf of the + * base allocator. Avoid deadlock (and if that weren't an + * issue, potential for infinite recursion) by returning NULL. + */ + return (NULL); + } + + alloc_size = size + alignment - chunksize; + /* Beware size_t wrap-around. */ + if (alloc_size < size) + return (NULL); + key.addr = NULL; + key.size = alloc_size; + malloc_mutex_lock(&chunks_mtx); + node = extent_tree_szad_nsearch(chunks_szad, &key); + if (node == NULL) { + malloc_mutex_unlock(&chunks_mtx); + return (NULL); + } + leadsize = ALIGNMENT_CEILING((uintptr_t)node->addr, alignment) - + (uintptr_t)node->addr; + assert(node->size >= leadsize + size); + trailsize = node->size - leadsize - size; + ret = (void *)((uintptr_t)node->addr + leadsize); + zeroed = node->zeroed; + if (zeroed) + *zero = true; + /* Remove node from the tree. */ + extent_tree_szad_remove(chunks_szad, node); + extent_tree_ad_remove(chunks_ad, node); + if (leadsize != 0) { + /* Insert the leading space as a smaller chunk. */ + node->size = leadsize; + extent_tree_szad_insert(chunks_szad, node); + extent_tree_ad_insert(chunks_ad, node); + node = NULL; + } + if (trailsize != 0) { + /* Insert the trailing space as a smaller chunk. */ + if (node == NULL) { + /* + * An additional node is required, but + * base_node_alloc() can cause a new base chunk to be + * allocated. Drop chunks_mtx in order to avoid + * deadlock, and if node allocation fails, deallocate + * the result before returning an error. + */ + malloc_mutex_unlock(&chunks_mtx); + node = base_node_alloc(); + if (node == NULL) { + chunk_dealloc(ret, size, true); + return (NULL); + } + malloc_mutex_lock(&chunks_mtx); + } + node->addr = (void *)((uintptr_t)(ret) + size); + node->size = trailsize; + node->zeroed = zeroed; + extent_tree_szad_insert(chunks_szad, node); + extent_tree_ad_insert(chunks_ad, node); + node = NULL; + } + malloc_mutex_unlock(&chunks_mtx); + + if (node != NULL) + base_node_dealloc(node); + if (*zero) { + if (zeroed == false) + memset(ret, 0, size); + else if (config_debug) { + size_t i; + size_t *p = (size_t *)(uintptr_t)ret; + + VALGRIND_MAKE_MEM_DEFINED(ret, size); + for (i = 0; i < size / sizeof(size_t); i++) + assert(p[i] == 0); + } + } + return (ret); +} + +/* + * If the caller specifies (*zero == false), it is still possible to receive + * zeroed memory, in which case *zero is toggled to true. arena_chunk_alloc() + * takes advantage of this to avoid demanding zeroed chunks, but taking + * advantage of them if they are returned. + */ +void * +chunk_alloc(size_t size, size_t alignment, bool base, bool *zero, + dss_prec_t dss_prec) +{ + void *ret; + + assert(size != 0); + assert((size & chunksize_mask) == 0); + assert(alignment != 0); + assert((alignment & chunksize_mask) == 0); + + /* "primary" dss. */ + if (config_dss && dss_prec == dss_prec_primary) { + if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size, + alignment, base, zero)) != NULL) + goto label_return; + if ((ret = chunk_alloc_dss(size, alignment, zero)) != NULL) + goto label_return; + } + /* mmap. */ + if ((ret = chunk_recycle(&chunks_szad_mmap, &chunks_ad_mmap, size, + alignment, base, zero)) != NULL) + goto label_return; + if ((ret = chunk_alloc_mmap(size, alignment, zero)) != NULL) + goto label_return; + /* "secondary" dss. */ + if (config_dss && dss_prec == dss_prec_secondary) { + if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size, + alignment, base, zero)) != NULL) + goto label_return; + if ((ret = chunk_alloc_dss(size, alignment, zero)) != NULL) + goto label_return; + } + + /* All strategies for allocation failed. */ + ret = NULL; +label_return: + if (ret != NULL) { + if (config_ivsalloc && base == false) { + if (rtree_set(chunks_rtree, (uintptr_t)ret, 1)) { + chunk_dealloc(ret, size, true); + return (NULL); + } + } + if (config_stats || config_prof) { + bool gdump; + malloc_mutex_lock(&chunks_mtx); + if (config_stats) + stats_chunks.nchunks += (size / chunksize); + stats_chunks.curchunks += (size / chunksize); + if (stats_chunks.curchunks > stats_chunks.highchunks) { + stats_chunks.highchunks = + stats_chunks.curchunks; + if (config_prof) + gdump = true; + } else if (config_prof) + gdump = false; + malloc_mutex_unlock(&chunks_mtx); + if (config_prof && opt_prof && opt_prof_gdump && gdump) + prof_gdump(); + } + if (config_valgrind) + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + } + assert(CHUNK_ADDR2BASE(ret) == ret); + return (ret); +} + +static void +chunk_record(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, void *chunk, + size_t size) +{ + bool unzeroed; + extent_node_t *xnode, *node, *prev, *xprev, key; + + unzeroed = pages_purge(chunk, size); + VALGRIND_MAKE_MEM_NOACCESS(chunk, size); + + /* + * Allocate a node before acquiring chunks_mtx even though it might not + * be needed, because base_node_alloc() may cause a new base chunk to + * be allocated, which could cause deadlock if chunks_mtx were already + * held. + */ + xnode = base_node_alloc(); + /* Use xprev to implement conditional deferred deallocation of prev. */ + xprev = NULL; + + malloc_mutex_lock(&chunks_mtx); + key.addr = (void *)((uintptr_t)chunk + size); + node = extent_tree_ad_nsearch(chunks_ad, &key); + /* Try to coalesce forward. */ + if (node != NULL && node->addr == key.addr) { + /* + * Coalesce chunk with the following address range. This does + * not change the position within chunks_ad, so only + * remove/insert from/into chunks_szad. + */ + extent_tree_szad_remove(chunks_szad, node); + node->addr = chunk; + node->size += size; + node->zeroed = (node->zeroed && (unzeroed == false)); + extent_tree_szad_insert(chunks_szad, node); + } else { + /* Coalescing forward failed, so insert a new node. */ + if (xnode == NULL) { + /* + * base_node_alloc() failed, which is an exceedingly + * unlikely failure. Leak chunk; its pages have + * already been purged, so this is only a virtual + * memory leak. + */ + goto label_return; + } + node = xnode; + xnode = NULL; /* Prevent deallocation below. */ + node->addr = chunk; + node->size = size; + node->zeroed = (unzeroed == false); + extent_tree_ad_insert(chunks_ad, node); + extent_tree_szad_insert(chunks_szad, node); + } + + /* Try to coalesce backward. */ + prev = extent_tree_ad_prev(chunks_ad, node); + if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) == + chunk) { + /* + * Coalesce chunk with the previous address range. This does + * not change the position within chunks_ad, so only + * remove/insert node from/into chunks_szad. + */ + extent_tree_szad_remove(chunks_szad, prev); + extent_tree_ad_remove(chunks_ad, prev); + + extent_tree_szad_remove(chunks_szad, node); + node->addr = prev->addr; + node->size += prev->size; + node->zeroed = (node->zeroed && prev->zeroed); + extent_tree_szad_insert(chunks_szad, node); + + xprev = prev; + } + +label_return: + malloc_mutex_unlock(&chunks_mtx); + /* + * Deallocate xnode and/or xprev after unlocking chunks_mtx in order to + * avoid potential deadlock. + */ + if (xnode != NULL) + base_node_dealloc(xnode); + if (xprev != NULL) + base_node_dealloc(xprev); +} + +void +chunk_unmap(void *chunk, size_t size) +{ + assert(chunk != NULL); + assert(CHUNK_ADDR2BASE(chunk) == chunk); + assert(size != 0); + assert((size & chunksize_mask) == 0); + + if (config_dss && chunk_in_dss(chunk)) + chunk_record(&chunks_szad_dss, &chunks_ad_dss, chunk, size); + else if (chunk_dealloc_mmap(chunk, size)) + chunk_record(&chunks_szad_mmap, &chunks_ad_mmap, chunk, size); +} + +void +chunk_dealloc(void *chunk, size_t size, bool unmap) +{ + + assert(chunk != NULL); + assert(CHUNK_ADDR2BASE(chunk) == chunk); + assert(size != 0); + assert((size & chunksize_mask) == 0); + + if (config_ivsalloc) + rtree_set(chunks_rtree, (uintptr_t)chunk, 0); + if (config_stats || config_prof) { + malloc_mutex_lock(&chunks_mtx); + assert(stats_chunks.curchunks >= (size / chunksize)); + stats_chunks.curchunks -= (size / chunksize); + malloc_mutex_unlock(&chunks_mtx); + } + + if (unmap) + chunk_unmap(chunk, size); +} + +bool +chunk_boot(void) +{ + + /* Set variables according to the value of opt_lg_chunk. */ + chunksize = (ZU(1) << opt_lg_chunk); + assert(chunksize >= PAGE); + chunksize_mask = chunksize - 1; + chunk_npages = (chunksize >> LG_PAGE); + + if (config_stats || config_prof) { + if (malloc_mutex_init(&chunks_mtx)) + return (true); + memset(&stats_chunks, 0, sizeof(chunk_stats_t)); + } + if (config_dss && chunk_dss_boot()) + return (true); + extent_tree_szad_new(&chunks_szad_mmap); + extent_tree_ad_new(&chunks_ad_mmap); + extent_tree_szad_new(&chunks_szad_dss); + extent_tree_ad_new(&chunks_ad_dss); + if (config_ivsalloc) { + chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) - + opt_lg_chunk, base_alloc, NULL); + if (chunks_rtree == NULL) + return (true); + } + + return (false); +} + +void +chunk_prefork(void) +{ + + malloc_mutex_prefork(&chunks_mtx); + if (config_ivsalloc) + rtree_prefork(chunks_rtree); + chunk_dss_prefork(); +} + +void +chunk_postfork_parent(void) +{ + + chunk_dss_postfork_parent(); + if (config_ivsalloc) + rtree_postfork_parent(chunks_rtree); + malloc_mutex_postfork_parent(&chunks_mtx); +} + +void +chunk_postfork_child(void) +{ + + chunk_dss_postfork_child(); + if (config_ivsalloc) + rtree_postfork_child(chunks_rtree); + malloc_mutex_postfork_child(&chunks_mtx); +} diff --git a/deps/jemalloc/src/chunk_dss.c b/deps/jemalloc/src/chunk_dss.c new file mode 100644 index 0000000000..510bb8bee8 --- /dev/null +++ b/deps/jemalloc/src/chunk_dss.c @@ -0,0 +1,198 @@ +#define JEMALLOC_CHUNK_DSS_C_ +#include "jemalloc/internal/jemalloc_internal.h" +/******************************************************************************/ +/* Data. */ + +const char *dss_prec_names[] = { + "disabled", + "primary", + "secondary", + "N/A" +}; + +/* Current dss precedence default, used when creating new arenas. */ +static dss_prec_t dss_prec_default = DSS_PREC_DEFAULT; + +/* + * Protects sbrk() calls. This avoids malloc races among threads, though it + * does not protect against races with threads that call sbrk() directly. + */ +static malloc_mutex_t dss_mtx; + +/* Base address of the DSS. */ +static void *dss_base; +/* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */ +static void *dss_prev; +/* Current upper limit on DSS addresses. */ +static void *dss_max; + +/******************************************************************************/ + +static void * +chunk_dss_sbrk(intptr_t increment) +{ + +#ifdef JEMALLOC_HAVE_SBRK + return (sbrk(increment)); +#else + not_implemented(); + return (NULL); +#endif +} + +dss_prec_t +chunk_dss_prec_get(void) +{ + dss_prec_t ret; + + if (config_dss == false) + return (dss_prec_disabled); + malloc_mutex_lock(&dss_mtx); + ret = dss_prec_default; + malloc_mutex_unlock(&dss_mtx); + return (ret); +} + +bool +chunk_dss_prec_set(dss_prec_t dss_prec) +{ + + if (config_dss == false) + return (true); + malloc_mutex_lock(&dss_mtx); + dss_prec_default = dss_prec; + malloc_mutex_unlock(&dss_mtx); + return (false); +} + +void * +chunk_alloc_dss(size_t size, size_t alignment, bool *zero) +{ + void *ret; + + cassert(config_dss); + assert(size > 0 && (size & chunksize_mask) == 0); + assert(alignment > 0 && (alignment & chunksize_mask) == 0); + + /* + * sbrk() uses a signed increment argument, so take care not to + * interpret a huge allocation request as a negative increment. + */ + if ((intptr_t)size < 0) + return (NULL); + + malloc_mutex_lock(&dss_mtx); + if (dss_prev != (void *)-1) { + size_t gap_size, cpad_size; + void *cpad, *dss_next; + intptr_t incr; + + /* + * The loop is necessary to recover from races with other + * threads that are using the DSS for something other than + * malloc. + */ + do { + /* Get the current end of the DSS. */ + dss_max = chunk_dss_sbrk(0); + /* + * Calculate how much padding is necessary to + * chunk-align the end of the DSS. + */ + gap_size = (chunksize - CHUNK_ADDR2OFFSET(dss_max)) & + chunksize_mask; + /* + * Compute how much chunk-aligned pad space (if any) is + * necessary to satisfy alignment. This space can be + * recycled for later use. + */ + cpad = (void *)((uintptr_t)dss_max + gap_size); + ret = (void *)ALIGNMENT_CEILING((uintptr_t)dss_max, + alignment); + cpad_size = (uintptr_t)ret - (uintptr_t)cpad; + dss_next = (void *)((uintptr_t)ret + size); + if ((uintptr_t)ret < (uintptr_t)dss_max || + (uintptr_t)dss_next < (uintptr_t)dss_max) { + /* Wrap-around. */ + malloc_mutex_unlock(&dss_mtx); + return (NULL); + } + incr = gap_size + cpad_size + size; + dss_prev = chunk_dss_sbrk(incr); + if (dss_prev == dss_max) { + /* Success. */ + dss_max = dss_next; + malloc_mutex_unlock(&dss_mtx); + if (cpad_size != 0) + chunk_unmap(cpad, cpad_size); + if (*zero) { + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + memset(ret, 0, size); + } + return (ret); + } + } while (dss_prev != (void *)-1); + } + malloc_mutex_unlock(&dss_mtx); + + return (NULL); +} + +bool +chunk_in_dss(void *chunk) +{ + bool ret; + + cassert(config_dss); + + malloc_mutex_lock(&dss_mtx); + if ((uintptr_t)chunk >= (uintptr_t)dss_base + && (uintptr_t)chunk < (uintptr_t)dss_max) + ret = true; + else + ret = false; + malloc_mutex_unlock(&dss_mtx); + + return (ret); +} + +bool +chunk_dss_boot(void) +{ + + cassert(config_dss); + + if (malloc_mutex_init(&dss_mtx)) + return (true); + dss_base = chunk_dss_sbrk(0); + dss_prev = dss_base; + dss_max = dss_base; + + return (false); +} + +void +chunk_dss_prefork(void) +{ + + if (config_dss) + malloc_mutex_prefork(&dss_mtx); +} + +void +chunk_dss_postfork_parent(void) +{ + + if (config_dss) + malloc_mutex_postfork_parent(&dss_mtx); +} + +void +chunk_dss_postfork_child(void) +{ + + if (config_dss) + malloc_mutex_postfork_child(&dss_mtx); +} + +/******************************************************************************/ diff --git a/deps/jemalloc/src/chunk_mmap.c b/deps/jemalloc/src/chunk_mmap.c new file mode 100644 index 0000000000..2056d793f0 --- /dev/null +++ b/deps/jemalloc/src/chunk_mmap.c @@ -0,0 +1,210 @@ +#define JEMALLOC_CHUNK_MMAP_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static void *pages_map(void *addr, size_t size); +static void pages_unmap(void *addr, size_t size); +static void *chunk_alloc_mmap_slow(size_t size, size_t alignment, + bool *zero); + +/******************************************************************************/ + +static void * +pages_map(void *addr, size_t size) +{ + void *ret; + + assert(size != 0); + +#ifdef _WIN32 + /* + * If VirtualAlloc can't allocate at the given address when one is + * given, it fails and returns NULL. + */ + ret = VirtualAlloc(addr, size, MEM_COMMIT | MEM_RESERVE, + PAGE_READWRITE); +#else + /* + * We don't use MAP_FIXED here, because it can cause the *replacement* + * of existing mappings, and we only want to create new mappings. + */ + ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, + -1, 0); + assert(ret != NULL); + + if (ret == MAP_FAILED) + ret = NULL; + else if (addr != NULL && ret != addr) { + /* + * We succeeded in mapping memory, but not in the right place. + */ + if (munmap(ret, size) == -1) { + char buf[BUFERROR_BUF]; + + buferror(get_errno(), buf, sizeof(buf)); + malloc_printf("<jemalloc: Error in munmap(): %s\n", + buf); + if (opt_abort) + abort(); + } + ret = NULL; + } +#endif + assert(ret == NULL || (addr == NULL && ret != addr) + || (addr != NULL && ret == addr)); + return (ret); +} + +static void +pages_unmap(void *addr, size_t size) +{ + +#ifdef _WIN32 + if (VirtualFree(addr, 0, MEM_RELEASE) == 0) +#else + if (munmap(addr, size) == -1) +#endif + { + char buf[BUFERROR_BUF]; + + buferror(get_errno(), buf, sizeof(buf)); + malloc_printf("<jemalloc>: Error in " +#ifdef _WIN32 + "VirtualFree" +#else + "munmap" +#endif + "(): %s\n", buf); + if (opt_abort) + abort(); + } +} + +static void * +pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size) +{ + void *ret = (void *)((uintptr_t)addr + leadsize); + + assert(alloc_size >= leadsize + size); +#ifdef _WIN32 + { + void *new_addr; + + pages_unmap(addr, alloc_size); + new_addr = pages_map(ret, size); + if (new_addr == ret) + return (ret); + if (new_addr) + pages_unmap(new_addr, size); + return (NULL); + } +#else + { + size_t trailsize = alloc_size - leadsize - size; + + if (leadsize != 0) + pages_unmap(addr, leadsize); + if (trailsize != 0) + pages_unmap((void *)((uintptr_t)ret + size), trailsize); + return (ret); + } +#endif +} + +bool +pages_purge(void *addr, size_t length) +{ + bool unzeroed; + +#ifdef _WIN32 + VirtualAlloc(addr, length, MEM_RESET, PAGE_READWRITE); + unzeroed = true; +#else +# ifdef JEMALLOC_PURGE_MADVISE_DONTNEED +# define JEMALLOC_MADV_PURGE MADV_DONTNEED +# define JEMALLOC_MADV_ZEROS true +# elif defined(JEMALLOC_PURGE_MADVISE_FREE) +# define JEMALLOC_MADV_PURGE MADV_FREE +# define JEMALLOC_MADV_ZEROS false +# else +# error "No method defined for purging unused dirty pages." +# endif + int err = madvise(addr, length, JEMALLOC_MADV_PURGE); + unzeroed = (JEMALLOC_MADV_ZEROS == false || err != 0); +# undef JEMALLOC_MADV_PURGE +# undef JEMALLOC_MADV_ZEROS +#endif + return (unzeroed); +} + +static void * +chunk_alloc_mmap_slow(size_t size, size_t alignment, bool *zero) +{ + void *ret, *pages; + size_t alloc_size, leadsize; + + alloc_size = size + alignment - PAGE; + /* Beware size_t wrap-around. */ + if (alloc_size < size) + return (NULL); + do { + pages = pages_map(NULL, alloc_size); + if (pages == NULL) + return (NULL); + leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) - + (uintptr_t)pages; + ret = pages_trim(pages, alloc_size, leadsize, size); + } while (ret == NULL); + + assert(ret != NULL); + *zero = true; + return (ret); +} + +void * +chunk_alloc_mmap(size_t size, size_t alignment, bool *zero) +{ + void *ret; + size_t offset; + + /* + * Ideally, there would be a way to specify alignment to mmap() (like + * NetBSD has), but in the absence of such a feature, we have to work + * hard to efficiently create aligned mappings. The reliable, but + * slow method is to create a mapping that is over-sized, then trim the + * excess. However, that always results in one or two calls to + * pages_unmap(). + * + * Optimistically try mapping precisely the right amount before falling + * back to the slow method, with the expectation that the optimistic + * approach works most of the time. + */ + + assert(alignment != 0); + assert((alignment & chunksize_mask) == 0); + + ret = pages_map(NULL, size); + if (ret == NULL) + return (NULL); + offset = ALIGNMENT_ADDR2OFFSET(ret, alignment); + if (offset != 0) { + pages_unmap(ret, size); + return (chunk_alloc_mmap_slow(size, alignment, zero)); + } + + assert(ret != NULL); + *zero = true; + return (ret); +} + +bool +chunk_dealloc_mmap(void *chunk, size_t size) +{ + + if (config_munmap) + pages_unmap(chunk, size); + + return (config_munmap == false); +} diff --git a/deps/jemalloc/src/ckh.c b/deps/jemalloc/src/ckh.c new file mode 100644 index 0000000000..04c5296619 --- /dev/null +++ b/deps/jemalloc/src/ckh.c @@ -0,0 +1,563 @@ +/* + ******************************************************************************* + * Implementation of (2^1+,2) cuckoo hashing, where 2^1+ indicates that each + * hash bucket contains 2^n cells, for n >= 1, and 2 indicates that two hash + * functions are employed. The original cuckoo hashing algorithm was described + * in: + * + * Pagh, R., F.F. Rodler (2004) Cuckoo Hashing. Journal of Algorithms + * 51(2):122-144. + * + * Generalization of cuckoo hashing was discussed in: + * + * Erlingsson, U., M. Manasse, F. McSherry (2006) A cool and practical + * alternative to traditional hash tables. In Proceedings of the 7th + * Workshop on Distributed Data and Structures (WDAS'06), Santa Clara, CA, + * January 2006. + * + * This implementation uses precisely two hash functions because that is the + * fewest that can work, and supporting multiple hashes is an implementation + * burden. Here is a reproduction of Figure 1 from Erlingsson et al. (2006) + * that shows approximate expected maximum load factors for various + * configurations: + * + * | #cells/bucket | + * #hashes | 1 | 2 | 4 | 8 | + * --------+-------+-------+-------+-------+ + * 1 | 0.006 | 0.006 | 0.03 | 0.12 | + * 2 | 0.49 | 0.86 |>0.93< |>0.96< | + * 3 | 0.91 | 0.97 | 0.98 | 0.999 | + * 4 | 0.97 | 0.99 | 0.999 | | + * + * The number of cells per bucket is chosen such that a bucket fits in one cache + * line. So, on 32- and 64-bit systems, we use (8,2) and (4,2) cuckoo hashing, + * respectively. + * + ******************************************************************************/ +#define JEMALLOC_CKH_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static bool ckh_grow(ckh_t *ckh); +static void ckh_shrink(ckh_t *ckh); + +/******************************************************************************/ + +/* + * Search bucket for key and return the cell number if found; SIZE_T_MAX + * otherwise. + */ +JEMALLOC_INLINE_C size_t +ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key) +{ + ckhc_t *cell; + unsigned i; + + for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) { + cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i]; + if (cell->key != NULL && ckh->keycomp(key, cell->key)) + return ((bucket << LG_CKH_BUCKET_CELLS) + i); + } + + return (SIZE_T_MAX); +} + +/* + * Search table for key and return cell number if found; SIZE_T_MAX otherwise. + */ +JEMALLOC_INLINE_C size_t +ckh_isearch(ckh_t *ckh, const void *key) +{ + size_t hashes[2], bucket, cell; + + assert(ckh != NULL); + + ckh->hash(key, hashes); + + /* Search primary bucket. */ + bucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - 1); + cell = ckh_bucket_search(ckh, bucket, key); + if (cell != SIZE_T_MAX) + return (cell); + + /* Search secondary bucket. */ + bucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1); + cell = ckh_bucket_search(ckh, bucket, key); + return (cell); +} + +JEMALLOC_INLINE_C bool +ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key, + const void *data) +{ + ckhc_t *cell; + unsigned offset, i; + + /* + * Cycle through the cells in the bucket, starting at a random position. + * The randomness avoids worst-case search overhead as buckets fill up. + */ + prng32(offset, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C); + for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) { + cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + + ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))]; + if (cell->key == NULL) { + cell->key = key; + cell->data = data; + ckh->count++; + return (false); + } + } + + return (true); +} + +/* + * No space is available in bucket. Randomly evict an item, then try to find an + * alternate location for that item. Iteratively repeat this + * eviction/relocation procedure until either success or detection of an + * eviction/relocation bucket cycle. + */ +JEMALLOC_INLINE_C bool +ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey, + void const **argdata) +{ + const void *key, *data, *tkey, *tdata; + ckhc_t *cell; + size_t hashes[2], bucket, tbucket; + unsigned i; + + bucket = argbucket; + key = *argkey; + data = *argdata; + while (true) { + /* + * Choose a random item within the bucket to evict. This is + * critical to correct function, because without (eventually) + * evicting all items within a bucket during iteration, it + * would be possible to get stuck in an infinite loop if there + * were an item for which both hashes indicated the same + * bucket. + */ + prng32(i, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C); + cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i]; + assert(cell->key != NULL); + + /* Swap cell->{key,data} and {key,data} (evict). */ + tkey = cell->key; tdata = cell->data; + cell->key = key; cell->data = data; + key = tkey; data = tdata; + +#ifdef CKH_COUNT + ckh->nrelocs++; +#endif + + /* Find the alternate bucket for the evicted item. */ + ckh->hash(key, hashes); + tbucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1); + if (tbucket == bucket) { + tbucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) + - 1); + /* + * It may be that (tbucket == bucket) still, if the + * item's hashes both indicate this bucket. However, + * we are guaranteed to eventually escape this bucket + * during iteration, assuming pseudo-random item + * selection (true randomness would make infinite + * looping a remote possibility). The reason we can + * never get trapped forever is that there are two + * cases: + * + * 1) This bucket == argbucket, so we will quickly + * detect an eviction cycle and terminate. + * 2) An item was evicted to this bucket from another, + * which means that at least one item in this bucket + * has hashes that indicate distinct buckets. + */ + } + /* Check for a cycle. */ + if (tbucket == argbucket) { + *argkey = key; + *argdata = data; + return (true); + } + + bucket = tbucket; + if (ckh_try_bucket_insert(ckh, bucket, key, data) == false) + return (false); + } +} + +JEMALLOC_INLINE_C bool +ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata) +{ + size_t hashes[2], bucket; + const void *key = *argkey; + const void *data = *argdata; + + ckh->hash(key, hashes); + + /* Try to insert in primary bucket. */ + bucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - 1); + if (ckh_try_bucket_insert(ckh, bucket, key, data) == false) + return (false); + + /* Try to insert in secondary bucket. */ + bucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1); + if (ckh_try_bucket_insert(ckh, bucket, key, data) == false) + return (false); + + /* + * Try to find a place for this item via iterative eviction/relocation. + */ + return (ckh_evict_reloc_insert(ckh, bucket, argkey, argdata)); +} + +/* + * Try to rebuild the hash table from scratch by inserting all items from the + * old table into the new. + */ +JEMALLOC_INLINE_C bool +ckh_rebuild(ckh_t *ckh, ckhc_t *aTab) +{ + size_t count, i, nins; + const void *key, *data; + + count = ckh->count; + ckh->count = 0; + for (i = nins = 0; nins < count; i++) { + if (aTab[i].key != NULL) { + key = aTab[i].key; + data = aTab[i].data; + if (ckh_try_insert(ckh, &key, &data)) { + ckh->count = count; + return (true); + } + nins++; + } + } + + return (false); +} + +static bool +ckh_grow(ckh_t *ckh) +{ + bool ret; + ckhc_t *tab, *ttab; + size_t lg_curcells; + unsigned lg_prevbuckets; + +#ifdef CKH_COUNT + ckh->ngrows++; +#endif + + /* + * It is possible (though unlikely, given well behaved hashes) that the + * table will have to be doubled more than once in order to create a + * usable table. + */ + lg_prevbuckets = ckh->lg_curbuckets; + lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS; + while (true) { + size_t usize; + + lg_curcells++; + usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE); + if (usize == 0) { + ret = true; + goto label_return; + } + tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); + if (tab == NULL) { + ret = true; + goto label_return; + } + /* Swap in new table. */ + ttab = ckh->tab; + ckh->tab = tab; + tab = ttab; + ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; + + if (ckh_rebuild(ckh, tab) == false) { + idalloc(tab); + break; + } + + /* Rebuilding failed, so back out partially rebuilt table. */ + idalloc(ckh->tab); + ckh->tab = tab; + ckh->lg_curbuckets = lg_prevbuckets; + } + + ret = false; +label_return: + return (ret); +} + +static void +ckh_shrink(ckh_t *ckh) +{ + ckhc_t *tab, *ttab; + size_t lg_curcells, usize; + unsigned lg_prevbuckets; + + /* + * It is possible (though unlikely, given well behaved hashes) that the + * table rebuild will fail. + */ + lg_prevbuckets = ckh->lg_curbuckets; + lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1; + usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE); + if (usize == 0) + return; + tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); + if (tab == NULL) { + /* + * An OOM error isn't worth propagating, since it doesn't + * prevent this or future operations from proceeding. + */ + return; + } + /* Swap in new table. */ + ttab = ckh->tab; + ckh->tab = tab; + tab = ttab; + ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; + + if (ckh_rebuild(ckh, tab) == false) { + idalloc(tab); +#ifdef CKH_COUNT + ckh->nshrinks++; +#endif + return; + } + + /* Rebuilding failed, so back out partially rebuilt table. */ + idalloc(ckh->tab); + ckh->tab = tab; + ckh->lg_curbuckets = lg_prevbuckets; +#ifdef CKH_COUNT + ckh->nshrinkfails++; +#endif +} + +bool +ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) +{ + bool ret; + size_t mincells, usize; + unsigned lg_mincells; + + assert(minitems > 0); + assert(hash != NULL); + assert(keycomp != NULL); + +#ifdef CKH_COUNT + ckh->ngrows = 0; + ckh->nshrinks = 0; + ckh->nshrinkfails = 0; + ckh->ninserts = 0; + ckh->nrelocs = 0; +#endif + ckh->prng_state = 42; /* Value doesn't really matter. */ + ckh->count = 0; + + /* + * Find the minimum power of 2 that is large enough to fit aBaseCount + * entries. We are using (2+,2) cuckoo hashing, which has an expected + * maximum load factor of at least ~0.86, so 0.75 is a conservative load + * factor that will typically allow 2^aLgMinItems to fit without ever + * growing the table. + */ + assert(LG_CKH_BUCKET_CELLS > 0); + mincells = ((minitems + (3 - (minitems % 3))) / 3) << 2; + for (lg_mincells = LG_CKH_BUCKET_CELLS; + (ZU(1) << lg_mincells) < mincells; + lg_mincells++) + ; /* Do nothing. */ + ckh->lg_minbuckets = lg_mincells - LG_CKH_BUCKET_CELLS; + ckh->lg_curbuckets = lg_mincells - LG_CKH_BUCKET_CELLS; + ckh->hash = hash; + ckh->keycomp = keycomp; + + usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE); + if (usize == 0) { + ret = true; + goto label_return; + } + ckh->tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); + if (ckh->tab == NULL) { + ret = true; + goto label_return; + } + + ret = false; +label_return: + return (ret); +} + +void +ckh_delete(ckh_t *ckh) +{ + + assert(ckh != NULL); + +#ifdef CKH_VERBOSE + malloc_printf( + "%s(%p): ngrows: %"PRIu64", nshrinks: %"PRIu64"," + " nshrinkfails: %"PRIu64", ninserts: %"PRIu64"," + " nrelocs: %"PRIu64"\n", __func__, ckh, + (unsigned long long)ckh->ngrows, + (unsigned long long)ckh->nshrinks, + (unsigned long long)ckh->nshrinkfails, + (unsigned long long)ckh->ninserts, + (unsigned long long)ckh->nrelocs); +#endif + + idalloc(ckh->tab); + if (config_debug) + memset(ckh, 0x5a, sizeof(ckh_t)); +} + +size_t +ckh_count(ckh_t *ckh) +{ + + assert(ckh != NULL); + + return (ckh->count); +} + +bool +ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data) +{ + size_t i, ncells; + + for (i = *tabind, ncells = (ZU(1) << (ckh->lg_curbuckets + + LG_CKH_BUCKET_CELLS)); i < ncells; i++) { + if (ckh->tab[i].key != NULL) { + if (key != NULL) + *key = (void *)ckh->tab[i].key; + if (data != NULL) + *data = (void *)ckh->tab[i].data; + *tabind = i + 1; + return (false); + } + } + + return (true); +} + +bool +ckh_insert(ckh_t *ckh, const void *key, const void *data) +{ + bool ret; + + assert(ckh != NULL); + assert(ckh_search(ckh, key, NULL, NULL)); + +#ifdef CKH_COUNT + ckh->ninserts++; +#endif + + while (ckh_try_insert(ckh, &key, &data)) { + if (ckh_grow(ckh)) { + ret = true; + goto label_return; + } + } + + ret = false; +label_return: + return (ret); +} + +bool +ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data) +{ + size_t cell; + + assert(ckh != NULL); + + cell = ckh_isearch(ckh, searchkey); + if (cell != SIZE_T_MAX) { + if (key != NULL) + *key = (void *)ckh->tab[cell].key; + if (data != NULL) + *data = (void *)ckh->tab[cell].data; + ckh->tab[cell].key = NULL; + ckh->tab[cell].data = NULL; /* Not necessary. */ + + ckh->count--; + /* Try to halve the table if it is less than 1/4 full. */ + if (ckh->count < (ZU(1) << (ckh->lg_curbuckets + + LG_CKH_BUCKET_CELLS - 2)) && ckh->lg_curbuckets + > ckh->lg_minbuckets) { + /* Ignore error due to OOM. */ + ckh_shrink(ckh); + } + + return (false); + } + + return (true); +} + +bool +ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data) +{ + size_t cell; + + assert(ckh != NULL); + + cell = ckh_isearch(ckh, searchkey); + if (cell != SIZE_T_MAX) { + if (key != NULL) + *key = (void *)ckh->tab[cell].key; + if (data != NULL) + *data = (void *)ckh->tab[cell].data; + return (false); + } + + return (true); +} + +void +ckh_string_hash(const void *key, size_t r_hash[2]) +{ + + hash(key, strlen((const char *)key), 0x94122f33U, r_hash); +} + +bool +ckh_string_keycomp(const void *k1, const void *k2) +{ + + assert(k1 != NULL); + assert(k2 != NULL); + + return (strcmp((char *)k1, (char *)k2) ? false : true); +} + +void +ckh_pointer_hash(const void *key, size_t r_hash[2]) +{ + union { + const void *v; + size_t i; + } u; + + assert(sizeof(u.v) == sizeof(u.i)); + u.v = key; + hash(&u.i, sizeof(u.i), 0xd983396eU, r_hash); +} + +bool +ckh_pointer_keycomp(const void *k1, const void *k2) +{ + + return ((k1 == k2) ? true : false); +} diff --git a/deps/jemalloc/src/ctl.c b/deps/jemalloc/src/ctl.c new file mode 100644 index 0000000000..cc2c5aef57 --- /dev/null +++ b/deps/jemalloc/src/ctl.c @@ -0,0 +1,1684 @@ +#define JEMALLOC_CTL_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +/* + * ctl_mtx protects the following: + * - ctl_stats.* + * - opt_prof_active + */ +static malloc_mutex_t ctl_mtx; +static bool ctl_initialized; +static uint64_t ctl_epoch; +static ctl_stats_t ctl_stats; + +/******************************************************************************/ +/* Helpers for named and indexed nodes. */ + +static inline const ctl_named_node_t * +ctl_named_node(const ctl_node_t *node) +{ + + return ((node->named) ? (const ctl_named_node_t *)node : NULL); +} + +static inline const ctl_named_node_t * +ctl_named_children(const ctl_named_node_t *node, int index) +{ + const ctl_named_node_t *children = ctl_named_node(node->children); + + return (children ? &children[index] : NULL); +} + +static inline const ctl_indexed_node_t * +ctl_indexed_node(const ctl_node_t *node) +{ + + return ((node->named == false) ? (const ctl_indexed_node_t *)node : + NULL); +} + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +#define CTL_PROTO(n) \ +static int n##_ctl(const size_t *mib, size_t miblen, void *oldp, \ + size_t *oldlenp, void *newp, size_t newlen); + +#define INDEX_PROTO(n) \ +static const ctl_named_node_t *n##_index(const size_t *mib, \ + size_t miblen, size_t i); + +static bool ctl_arena_init(ctl_arena_stats_t *astats); +static void ctl_arena_clear(ctl_arena_stats_t *astats); +static void ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, + arena_t *arena); +static void ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, + ctl_arena_stats_t *astats); +static void ctl_arena_refresh(arena_t *arena, unsigned i); +static bool ctl_grow(void); +static void ctl_refresh(void); +static bool ctl_init(void); +static int ctl_lookup(const char *name, ctl_node_t const **nodesp, + size_t *mibp, size_t *depthp); + +CTL_PROTO(version) +CTL_PROTO(epoch) +CTL_PROTO(thread_tcache_enabled) +CTL_PROTO(thread_tcache_flush) +CTL_PROTO(thread_arena) +CTL_PROTO(thread_allocated) +CTL_PROTO(thread_allocatedp) +CTL_PROTO(thread_deallocated) +CTL_PROTO(thread_deallocatedp) +CTL_PROTO(config_debug) +CTL_PROTO(config_dss) +CTL_PROTO(config_fill) +CTL_PROTO(config_lazy_lock) +CTL_PROTO(config_mremap) +CTL_PROTO(config_munmap) +CTL_PROTO(config_prof) +CTL_PROTO(config_prof_libgcc) +CTL_PROTO(config_prof_libunwind) +CTL_PROTO(config_stats) +CTL_PROTO(config_tcache) +CTL_PROTO(config_tls) +CTL_PROTO(config_utrace) +CTL_PROTO(config_valgrind) +CTL_PROTO(config_xmalloc) +CTL_PROTO(opt_abort) +CTL_PROTO(opt_dss) +CTL_PROTO(opt_lg_chunk) +CTL_PROTO(opt_narenas) +CTL_PROTO(opt_lg_dirty_mult) +CTL_PROTO(opt_stats_print) +CTL_PROTO(opt_junk) +CTL_PROTO(opt_zero) +CTL_PROTO(opt_quarantine) +CTL_PROTO(opt_redzone) +CTL_PROTO(opt_utrace) +CTL_PROTO(opt_valgrind) +CTL_PROTO(opt_xmalloc) +CTL_PROTO(opt_tcache) +CTL_PROTO(opt_lg_tcache_max) +CTL_PROTO(opt_prof) +CTL_PROTO(opt_prof_prefix) +CTL_PROTO(opt_prof_active) +CTL_PROTO(opt_lg_prof_sample) +CTL_PROTO(opt_lg_prof_interval) +CTL_PROTO(opt_prof_gdump) +CTL_PROTO(opt_prof_final) +CTL_PROTO(opt_prof_leak) +CTL_PROTO(opt_prof_accum) +CTL_PROTO(arena_i_purge) +static void arena_purge(unsigned arena_ind); +CTL_PROTO(arena_i_dss) +INDEX_PROTO(arena_i) +CTL_PROTO(arenas_bin_i_size) +CTL_PROTO(arenas_bin_i_nregs) +CTL_PROTO(arenas_bin_i_run_size) +INDEX_PROTO(arenas_bin_i) +CTL_PROTO(arenas_lrun_i_size) +INDEX_PROTO(arenas_lrun_i) +CTL_PROTO(arenas_narenas) +CTL_PROTO(arenas_initialized) +CTL_PROTO(arenas_quantum) +CTL_PROTO(arenas_page) +CTL_PROTO(arenas_tcache_max) +CTL_PROTO(arenas_nbins) +CTL_PROTO(arenas_nhbins) +CTL_PROTO(arenas_nlruns) +CTL_PROTO(arenas_purge) +CTL_PROTO(arenas_extend) +CTL_PROTO(prof_active) +CTL_PROTO(prof_dump) +CTL_PROTO(prof_interval) +CTL_PROTO(stats_chunks_current) +CTL_PROTO(stats_chunks_total) +CTL_PROTO(stats_chunks_high) +CTL_PROTO(stats_huge_allocated) +CTL_PROTO(stats_huge_nmalloc) +CTL_PROTO(stats_huge_ndalloc) +CTL_PROTO(stats_arenas_i_small_allocated) +CTL_PROTO(stats_arenas_i_small_nmalloc) +CTL_PROTO(stats_arenas_i_small_ndalloc) +CTL_PROTO(stats_arenas_i_small_nrequests) +CTL_PROTO(stats_arenas_i_large_allocated) +CTL_PROTO(stats_arenas_i_large_nmalloc) +CTL_PROTO(stats_arenas_i_large_ndalloc) +CTL_PROTO(stats_arenas_i_large_nrequests) +CTL_PROTO(stats_arenas_i_bins_j_allocated) +CTL_PROTO(stats_arenas_i_bins_j_nmalloc) +CTL_PROTO(stats_arenas_i_bins_j_ndalloc) +CTL_PROTO(stats_arenas_i_bins_j_nrequests) +CTL_PROTO(stats_arenas_i_bins_j_nfills) +CTL_PROTO(stats_arenas_i_bins_j_nflushes) +CTL_PROTO(stats_arenas_i_bins_j_nruns) +CTL_PROTO(stats_arenas_i_bins_j_nreruns) +CTL_PROTO(stats_arenas_i_bins_j_curruns) +INDEX_PROTO(stats_arenas_i_bins_j) +CTL_PROTO(stats_arenas_i_lruns_j_nmalloc) +CTL_PROTO(stats_arenas_i_lruns_j_ndalloc) +CTL_PROTO(stats_arenas_i_lruns_j_nrequests) +CTL_PROTO(stats_arenas_i_lruns_j_curruns) +INDEX_PROTO(stats_arenas_i_lruns_j) +CTL_PROTO(stats_arenas_i_nthreads) +CTL_PROTO(stats_arenas_i_dss) +CTL_PROTO(stats_arenas_i_pactive) +CTL_PROTO(stats_arenas_i_pdirty) +CTL_PROTO(stats_arenas_i_mapped) +CTL_PROTO(stats_arenas_i_npurge) +CTL_PROTO(stats_arenas_i_nmadvise) +CTL_PROTO(stats_arenas_i_purged) +INDEX_PROTO(stats_arenas_i) +CTL_PROTO(stats_cactive) +CTL_PROTO(stats_allocated) +CTL_PROTO(stats_active) +CTL_PROTO(stats_mapped) + +/******************************************************************************/ +/* mallctl tree. */ + +/* Maximum tree depth. */ +#define CTL_MAX_DEPTH 6 + +#define NAME(n) {true}, n +#define CHILD(t, c) \ + sizeof(c##_node) / sizeof(ctl_##t##_node_t), \ + (ctl_node_t *)c##_node, \ + NULL +#define CTL(c) 0, NULL, c##_ctl + +/* + * Only handles internal indexed nodes, since there are currently no external + * ones. + */ +#define INDEX(i) {false}, i##_index + +static const ctl_named_node_t tcache_node[] = { + {NAME("enabled"), CTL(thread_tcache_enabled)}, + {NAME("flush"), CTL(thread_tcache_flush)} +}; + +static const ctl_named_node_t thread_node[] = { + {NAME("arena"), CTL(thread_arena)}, + {NAME("allocated"), CTL(thread_allocated)}, + {NAME("allocatedp"), CTL(thread_allocatedp)}, + {NAME("deallocated"), CTL(thread_deallocated)}, + {NAME("deallocatedp"), CTL(thread_deallocatedp)}, + {NAME("tcache"), CHILD(named, tcache)} +}; + +static const ctl_named_node_t config_node[] = { + {NAME("debug"), CTL(config_debug)}, + {NAME("dss"), CTL(config_dss)}, + {NAME("fill"), CTL(config_fill)}, + {NAME("lazy_lock"), CTL(config_lazy_lock)}, + {NAME("mremap"), CTL(config_mremap)}, + {NAME("munmap"), CTL(config_munmap)}, + {NAME("prof"), CTL(config_prof)}, + {NAME("prof_libgcc"), CTL(config_prof_libgcc)}, + {NAME("prof_libunwind"), CTL(config_prof_libunwind)}, + {NAME("stats"), CTL(config_stats)}, + {NAME("tcache"), CTL(config_tcache)}, + {NAME("tls"), CTL(config_tls)}, + {NAME("utrace"), CTL(config_utrace)}, + {NAME("valgrind"), CTL(config_valgrind)}, + {NAME("xmalloc"), CTL(config_xmalloc)} +}; + +static const ctl_named_node_t opt_node[] = { + {NAME("abort"), CTL(opt_abort)}, + {NAME("dss"), CTL(opt_dss)}, + {NAME("lg_chunk"), CTL(opt_lg_chunk)}, + {NAME("narenas"), CTL(opt_narenas)}, + {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)}, + {NAME("stats_print"), CTL(opt_stats_print)}, + {NAME("junk"), CTL(opt_junk)}, + {NAME("zero"), CTL(opt_zero)}, + {NAME("quarantine"), CTL(opt_quarantine)}, + {NAME("redzone"), CTL(opt_redzone)}, + {NAME("utrace"), CTL(opt_utrace)}, + {NAME("valgrind"), CTL(opt_valgrind)}, + {NAME("xmalloc"), CTL(opt_xmalloc)}, + {NAME("tcache"), CTL(opt_tcache)}, + {NAME("lg_tcache_max"), CTL(opt_lg_tcache_max)}, + {NAME("prof"), CTL(opt_prof)}, + {NAME("prof_prefix"), CTL(opt_prof_prefix)}, + {NAME("prof_active"), CTL(opt_prof_active)}, + {NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)}, + {NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)}, + {NAME("prof_gdump"), CTL(opt_prof_gdump)}, + {NAME("prof_final"), CTL(opt_prof_final)}, + {NAME("prof_leak"), CTL(opt_prof_leak)}, + {NAME("prof_accum"), CTL(opt_prof_accum)} +}; + +static const ctl_named_node_t arena_i_node[] = { + {NAME("purge"), CTL(arena_i_purge)}, + {NAME("dss"), CTL(arena_i_dss)} +}; +static const ctl_named_node_t super_arena_i_node[] = { + {NAME(""), CHILD(named, arena_i)} +}; + +static const ctl_indexed_node_t arena_node[] = { + {INDEX(arena_i)} +}; + +static const ctl_named_node_t arenas_bin_i_node[] = { + {NAME("size"), CTL(arenas_bin_i_size)}, + {NAME("nregs"), CTL(arenas_bin_i_nregs)}, + {NAME("run_size"), CTL(arenas_bin_i_run_size)} +}; +static const ctl_named_node_t super_arenas_bin_i_node[] = { + {NAME(""), CHILD(named, arenas_bin_i)} +}; + +static const ctl_indexed_node_t arenas_bin_node[] = { + {INDEX(arenas_bin_i)} +}; + +static const ctl_named_node_t arenas_lrun_i_node[] = { + {NAME("size"), CTL(arenas_lrun_i_size)} +}; +static const ctl_named_node_t super_arenas_lrun_i_node[] = { + {NAME(""), CHILD(named, arenas_lrun_i)} +}; + +static const ctl_indexed_node_t arenas_lrun_node[] = { + {INDEX(arenas_lrun_i)} +}; + +static const ctl_named_node_t arenas_node[] = { + {NAME("narenas"), CTL(arenas_narenas)}, + {NAME("initialized"), CTL(arenas_initialized)}, + {NAME("quantum"), CTL(arenas_quantum)}, + {NAME("page"), CTL(arenas_page)}, + {NAME("tcache_max"), CTL(arenas_tcache_max)}, + {NAME("nbins"), CTL(arenas_nbins)}, + {NAME("nhbins"), CTL(arenas_nhbins)}, + {NAME("bin"), CHILD(indexed, arenas_bin)}, + {NAME("nlruns"), CTL(arenas_nlruns)}, + {NAME("lrun"), CHILD(indexed, arenas_lrun)}, + {NAME("purge"), CTL(arenas_purge)}, + {NAME("extend"), CTL(arenas_extend)} +}; + +static const ctl_named_node_t prof_node[] = { + {NAME("active"), CTL(prof_active)}, + {NAME("dump"), CTL(prof_dump)}, + {NAME("interval"), CTL(prof_interval)} +}; + +static const ctl_named_node_t stats_chunks_node[] = { + {NAME("current"), CTL(stats_chunks_current)}, + {NAME("total"), CTL(stats_chunks_total)}, + {NAME("high"), CTL(stats_chunks_high)} +}; + +static const ctl_named_node_t stats_huge_node[] = { + {NAME("allocated"), CTL(stats_huge_allocated)}, + {NAME("nmalloc"), CTL(stats_huge_nmalloc)}, + {NAME("ndalloc"), CTL(stats_huge_ndalloc)} +}; + +static const ctl_named_node_t stats_arenas_i_small_node[] = { + {NAME("allocated"), CTL(stats_arenas_i_small_allocated)}, + {NAME("nmalloc"), CTL(stats_arenas_i_small_nmalloc)}, + {NAME("ndalloc"), CTL(stats_arenas_i_small_ndalloc)}, + {NAME("nrequests"), CTL(stats_arenas_i_small_nrequests)} +}; + +static const ctl_named_node_t stats_arenas_i_large_node[] = { + {NAME("allocated"), CTL(stats_arenas_i_large_allocated)}, + {NAME("nmalloc"), CTL(stats_arenas_i_large_nmalloc)}, + {NAME("ndalloc"), CTL(stats_arenas_i_large_ndalloc)}, + {NAME("nrequests"), CTL(stats_arenas_i_large_nrequests)} +}; + +static const ctl_named_node_t stats_arenas_i_bins_j_node[] = { + {NAME("allocated"), CTL(stats_arenas_i_bins_j_allocated)}, + {NAME("nmalloc"), CTL(stats_arenas_i_bins_j_nmalloc)}, + {NAME("ndalloc"), CTL(stats_arenas_i_bins_j_ndalloc)}, + {NAME("nrequests"), CTL(stats_arenas_i_bins_j_nrequests)}, + {NAME("nfills"), CTL(stats_arenas_i_bins_j_nfills)}, + {NAME("nflushes"), CTL(stats_arenas_i_bins_j_nflushes)}, + {NAME("nruns"), CTL(stats_arenas_i_bins_j_nruns)}, + {NAME("nreruns"), CTL(stats_arenas_i_bins_j_nreruns)}, + {NAME("curruns"), CTL(stats_arenas_i_bins_j_curruns)} +}; +static const ctl_named_node_t super_stats_arenas_i_bins_j_node[] = { + {NAME(""), CHILD(named, stats_arenas_i_bins_j)} +}; + +static const ctl_indexed_node_t stats_arenas_i_bins_node[] = { + {INDEX(stats_arenas_i_bins_j)} +}; + +static const ctl_named_node_t stats_arenas_i_lruns_j_node[] = { + {NAME("nmalloc"), CTL(stats_arenas_i_lruns_j_nmalloc)}, + {NAME("ndalloc"), CTL(stats_arenas_i_lruns_j_ndalloc)}, + {NAME("nrequests"), CTL(stats_arenas_i_lruns_j_nrequests)}, + {NAME("curruns"), CTL(stats_arenas_i_lruns_j_curruns)} +}; +static const ctl_named_node_t super_stats_arenas_i_lruns_j_node[] = { + {NAME(""), CHILD(named, stats_arenas_i_lruns_j)} +}; + +static const ctl_indexed_node_t stats_arenas_i_lruns_node[] = { + {INDEX(stats_arenas_i_lruns_j)} +}; + +static const ctl_named_node_t stats_arenas_i_node[] = { + {NAME("nthreads"), CTL(stats_arenas_i_nthreads)}, + {NAME("dss"), CTL(stats_arenas_i_dss)}, + {NAME("pactive"), CTL(stats_arenas_i_pactive)}, + {NAME("pdirty"), CTL(stats_arenas_i_pdirty)}, + {NAME("mapped"), CTL(stats_arenas_i_mapped)}, + {NAME("npurge"), CTL(stats_arenas_i_npurge)}, + {NAME("nmadvise"), CTL(stats_arenas_i_nmadvise)}, + {NAME("purged"), CTL(stats_arenas_i_purged)}, + {NAME("small"), CHILD(named, stats_arenas_i_small)}, + {NAME("large"), CHILD(named, stats_arenas_i_large)}, + {NAME("bins"), CHILD(indexed, stats_arenas_i_bins)}, + {NAME("lruns"), CHILD(indexed, stats_arenas_i_lruns)} +}; +static const ctl_named_node_t super_stats_arenas_i_node[] = { + {NAME(""), CHILD(named, stats_arenas_i)} +}; + +static const ctl_indexed_node_t stats_arenas_node[] = { + {INDEX(stats_arenas_i)} +}; + +static const ctl_named_node_t stats_node[] = { + {NAME("cactive"), CTL(stats_cactive)}, + {NAME("allocated"), CTL(stats_allocated)}, + {NAME("active"), CTL(stats_active)}, + {NAME("mapped"), CTL(stats_mapped)}, + {NAME("chunks"), CHILD(named, stats_chunks)}, + {NAME("huge"), CHILD(named, stats_huge)}, + {NAME("arenas"), CHILD(indexed, stats_arenas)} +}; + +static const ctl_named_node_t root_node[] = { + {NAME("version"), CTL(version)}, + {NAME("epoch"), CTL(epoch)}, + {NAME("thread"), CHILD(named, thread)}, + {NAME("config"), CHILD(named, config)}, + {NAME("opt"), CHILD(named, opt)}, + {NAME("arena"), CHILD(indexed, arena)}, + {NAME("arenas"), CHILD(named, arenas)}, + {NAME("prof"), CHILD(named, prof)}, + {NAME("stats"), CHILD(named, stats)} +}; +static const ctl_named_node_t super_root_node[] = { + {NAME(""), CHILD(named, root)} +}; + +#undef NAME +#undef CHILD +#undef CTL +#undef INDEX + +/******************************************************************************/ + +static bool +ctl_arena_init(ctl_arena_stats_t *astats) +{ + + if (astats->lstats == NULL) { + astats->lstats = (malloc_large_stats_t *)base_alloc(nlclasses * + sizeof(malloc_large_stats_t)); + if (astats->lstats == NULL) + return (true); + } + + return (false); +} + +static void +ctl_arena_clear(ctl_arena_stats_t *astats) +{ + + astats->dss = dss_prec_names[dss_prec_limit]; + astats->pactive = 0; + astats->pdirty = 0; + if (config_stats) { + memset(&astats->astats, 0, sizeof(arena_stats_t)); + astats->allocated_small = 0; + astats->nmalloc_small = 0; + astats->ndalloc_small = 0; + astats->nrequests_small = 0; + memset(astats->bstats, 0, NBINS * sizeof(malloc_bin_stats_t)); + memset(astats->lstats, 0, nlclasses * + sizeof(malloc_large_stats_t)); + } +} + +static void +ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena) +{ + unsigned i; + + arena_stats_merge(arena, &cstats->dss, &cstats->pactive, + &cstats->pdirty, &cstats->astats, cstats->bstats, cstats->lstats); + + for (i = 0; i < NBINS; i++) { + cstats->allocated_small += cstats->bstats[i].allocated; + cstats->nmalloc_small += cstats->bstats[i].nmalloc; + cstats->ndalloc_small += cstats->bstats[i].ndalloc; + cstats->nrequests_small += cstats->bstats[i].nrequests; + } +} + +static void +ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats) +{ + unsigned i; + + sstats->pactive += astats->pactive; + sstats->pdirty += astats->pdirty; + + sstats->astats.mapped += astats->astats.mapped; + sstats->astats.npurge += astats->astats.npurge; + sstats->astats.nmadvise += astats->astats.nmadvise; + sstats->astats.purged += astats->astats.purged; + + sstats->allocated_small += astats->allocated_small; + sstats->nmalloc_small += astats->nmalloc_small; + sstats->ndalloc_small += astats->ndalloc_small; + sstats->nrequests_small += astats->nrequests_small; + + sstats->astats.allocated_large += astats->astats.allocated_large; + sstats->astats.nmalloc_large += astats->astats.nmalloc_large; + sstats->astats.ndalloc_large += astats->astats.ndalloc_large; + sstats->astats.nrequests_large += astats->astats.nrequests_large; + + for (i = 0; i < nlclasses; i++) { + sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc; + sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc; + sstats->lstats[i].nrequests += astats->lstats[i].nrequests; + sstats->lstats[i].curruns += astats->lstats[i].curruns; + } + + for (i = 0; i < NBINS; i++) { + sstats->bstats[i].allocated += astats->bstats[i].allocated; + sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc; + sstats->bstats[i].ndalloc += astats->bstats[i].ndalloc; + sstats->bstats[i].nrequests += astats->bstats[i].nrequests; + if (config_tcache) { + sstats->bstats[i].nfills += astats->bstats[i].nfills; + sstats->bstats[i].nflushes += + astats->bstats[i].nflushes; + } + sstats->bstats[i].nruns += astats->bstats[i].nruns; + sstats->bstats[i].reruns += astats->bstats[i].reruns; + sstats->bstats[i].curruns += astats->bstats[i].curruns; + } +} + +static void +ctl_arena_refresh(arena_t *arena, unsigned i) +{ + ctl_arena_stats_t *astats = &ctl_stats.arenas[i]; + ctl_arena_stats_t *sstats = &ctl_stats.arenas[ctl_stats.narenas]; + + ctl_arena_clear(astats); + + sstats->nthreads += astats->nthreads; + if (config_stats) { + ctl_arena_stats_amerge(astats, arena); + /* Merge into sum stats as well. */ + ctl_arena_stats_smerge(sstats, astats); + } else { + astats->pactive += arena->nactive; + astats->pdirty += arena->ndirty; + /* Merge into sum stats as well. */ + sstats->pactive += arena->nactive; + sstats->pdirty += arena->ndirty; + } +} + +static bool +ctl_grow(void) +{ + ctl_arena_stats_t *astats; + arena_t **tarenas; + + /* Allocate extended arena stats and arenas arrays. */ + astats = (ctl_arena_stats_t *)imalloc((ctl_stats.narenas + 2) * + sizeof(ctl_arena_stats_t)); + if (astats == NULL) + return (true); + tarenas = (arena_t **)imalloc((ctl_stats.narenas + 1) * + sizeof(arena_t *)); + if (tarenas == NULL) { + idalloc(astats); + return (true); + } + + /* Initialize the new astats element. */ + memcpy(astats, ctl_stats.arenas, (ctl_stats.narenas + 1) * + sizeof(ctl_arena_stats_t)); + memset(&astats[ctl_stats.narenas + 1], 0, sizeof(ctl_arena_stats_t)); + if (ctl_arena_init(&astats[ctl_stats.narenas + 1])) { + idalloc(tarenas); + idalloc(astats); + return (true); + } + /* Swap merged stats to their new location. */ + { + ctl_arena_stats_t tstats; + memcpy(&tstats, &astats[ctl_stats.narenas], + sizeof(ctl_arena_stats_t)); + memcpy(&astats[ctl_stats.narenas], + &astats[ctl_stats.narenas + 1], sizeof(ctl_arena_stats_t)); + memcpy(&astats[ctl_stats.narenas + 1], &tstats, + sizeof(ctl_arena_stats_t)); + } + /* Initialize the new arenas element. */ + tarenas[ctl_stats.narenas] = NULL; + { + arena_t **arenas_old = arenas; + /* + * Swap extended arenas array into place. Although ctl_mtx + * protects this function from other threads extending the + * array, it does not protect from other threads mutating it + * (i.e. initializing arenas and setting array elements to + * point to them). Therefore, array copying must happen under + * the protection of arenas_lock. + */ + malloc_mutex_lock(&arenas_lock); + arenas = tarenas; + memcpy(arenas, arenas_old, ctl_stats.narenas * + sizeof(arena_t *)); + narenas_total++; + arenas_extend(narenas_total - 1); + malloc_mutex_unlock(&arenas_lock); + /* + * Deallocate arenas_old only if it came from imalloc() (not + * base_alloc()). + */ + if (ctl_stats.narenas != narenas_auto) + idalloc(arenas_old); + } + ctl_stats.arenas = astats; + ctl_stats.narenas++; + + return (false); +} + +static void +ctl_refresh(void) +{ + unsigned i; + VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas); + + if (config_stats) { + malloc_mutex_lock(&chunks_mtx); + ctl_stats.chunks.current = stats_chunks.curchunks; + ctl_stats.chunks.total = stats_chunks.nchunks; + ctl_stats.chunks.high = stats_chunks.highchunks; + malloc_mutex_unlock(&chunks_mtx); + + malloc_mutex_lock(&huge_mtx); + ctl_stats.huge.allocated = huge_allocated; + ctl_stats.huge.nmalloc = huge_nmalloc; + ctl_stats.huge.ndalloc = huge_ndalloc; + malloc_mutex_unlock(&huge_mtx); + } + + /* + * Clear sum stats, since they will be merged into by + * ctl_arena_refresh(). + */ + ctl_stats.arenas[ctl_stats.narenas].nthreads = 0; + ctl_arena_clear(&ctl_stats.arenas[ctl_stats.narenas]); + + malloc_mutex_lock(&arenas_lock); + memcpy(tarenas, arenas, sizeof(arena_t *) * ctl_stats.narenas); + for (i = 0; i < ctl_stats.narenas; i++) { + if (arenas[i] != NULL) + ctl_stats.arenas[i].nthreads = arenas[i]->nthreads; + else + ctl_stats.arenas[i].nthreads = 0; + } + malloc_mutex_unlock(&arenas_lock); + for (i = 0; i < ctl_stats.narenas; i++) { + bool initialized = (tarenas[i] != NULL); + + ctl_stats.arenas[i].initialized = initialized; + if (initialized) + ctl_arena_refresh(tarenas[i], i); + } + + if (config_stats) { + ctl_stats.allocated = + ctl_stats.arenas[ctl_stats.narenas].allocated_small + + ctl_stats.arenas[ctl_stats.narenas].astats.allocated_large + + ctl_stats.huge.allocated; + ctl_stats.active = + (ctl_stats.arenas[ctl_stats.narenas].pactive << LG_PAGE) + + ctl_stats.huge.allocated; + ctl_stats.mapped = (ctl_stats.chunks.current << opt_lg_chunk); + } + + ctl_epoch++; +} + +static bool +ctl_init(void) +{ + bool ret; + + malloc_mutex_lock(&ctl_mtx); + if (ctl_initialized == false) { + /* + * Allocate space for one extra arena stats element, which + * contains summed stats across all arenas. + */ + assert(narenas_auto == narenas_total_get()); + ctl_stats.narenas = narenas_auto; + ctl_stats.arenas = (ctl_arena_stats_t *)base_alloc( + (ctl_stats.narenas + 1) * sizeof(ctl_arena_stats_t)); + if (ctl_stats.arenas == NULL) { + ret = true; + goto label_return; + } + memset(ctl_stats.arenas, 0, (ctl_stats.narenas + 1) * + sizeof(ctl_arena_stats_t)); + + /* + * Initialize all stats structures, regardless of whether they + * ever get used. Lazy initialization would allow errors to + * cause inconsistent state to be viewable by the application. + */ + if (config_stats) { + unsigned i; + for (i = 0; i <= ctl_stats.narenas; i++) { + if (ctl_arena_init(&ctl_stats.arenas[i])) { + ret = true; + goto label_return; + } + } + } + ctl_stats.arenas[ctl_stats.narenas].initialized = true; + + ctl_epoch = 0; + ctl_refresh(); + ctl_initialized = true; + } + + ret = false; +label_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + +static int +ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, + size_t *depthp) +{ + int ret; + const char *elm, *tdot, *dot; + size_t elen, i, j; + const ctl_named_node_t *node; + + elm = name; + /* Equivalent to strchrnul(). */ + dot = ((tdot = strchr(elm, '.')) != NULL) ? tdot : strchr(elm, '\0'); + elen = (size_t)((uintptr_t)dot - (uintptr_t)elm); + if (elen == 0) { + ret = ENOENT; + goto label_return; + } + node = super_root_node; + for (i = 0; i < *depthp; i++) { + assert(node); + assert(node->nchildren > 0); + if (ctl_named_node(node->children) != NULL) { + const ctl_named_node_t *pnode = node; + + /* Children are named. */ + for (j = 0; j < node->nchildren; j++) { + const ctl_named_node_t *child = + ctl_named_children(node, j); + if (strlen(child->name) == elen && + strncmp(elm, child->name, elen) == 0) { + node = child; + if (nodesp != NULL) + nodesp[i] = + (const ctl_node_t *)node; + mibp[i] = j; + break; + } + } + if (node == pnode) { + ret = ENOENT; + goto label_return; + } + } else { + uintmax_t index; + const ctl_indexed_node_t *inode; + + /* Children are indexed. */ + index = malloc_strtoumax(elm, NULL, 10); + if (index == UINTMAX_MAX || index > SIZE_T_MAX) { + ret = ENOENT; + goto label_return; + } + + inode = ctl_indexed_node(node->children); + node = inode->index(mibp, *depthp, (size_t)index); + if (node == NULL) { + ret = ENOENT; + goto label_return; + } + + if (nodesp != NULL) + nodesp[i] = (const ctl_node_t *)node; + mibp[i] = (size_t)index; + } + + if (node->ctl != NULL) { + /* Terminal node. */ + if (*dot != '\0') { + /* + * The name contains more elements than are + * in this path through the tree. + */ + ret = ENOENT; + goto label_return; + } + /* Complete lookup successful. */ + *depthp = i + 1; + break; + } + + /* Update elm. */ + if (*dot == '\0') { + /* No more elements. */ + ret = ENOENT; + goto label_return; + } + elm = &dot[1]; + dot = ((tdot = strchr(elm, '.')) != NULL) ? tdot : + strchr(elm, '\0'); + elen = (size_t)((uintptr_t)dot - (uintptr_t)elm); + } + + ret = 0; +label_return: + return (ret); +} + +int +ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp, + size_t newlen) +{ + int ret; + size_t depth; + ctl_node_t const *nodes[CTL_MAX_DEPTH]; + size_t mib[CTL_MAX_DEPTH]; + const ctl_named_node_t *node; + + if (ctl_initialized == false && ctl_init()) { + ret = EAGAIN; + goto label_return; + } + + depth = CTL_MAX_DEPTH; + ret = ctl_lookup(name, nodes, mib, &depth); + if (ret != 0) + goto label_return; + + node = ctl_named_node(nodes[depth-1]); + if (node != NULL && node->ctl) + ret = node->ctl(mib, depth, oldp, oldlenp, newp, newlen); + else { + /* The name refers to a partial path through the ctl tree. */ + ret = ENOENT; + } + +label_return: + return(ret); +} + +int +ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp) +{ + int ret; + + if (ctl_initialized == false && ctl_init()) { + ret = EAGAIN; + goto label_return; + } + + ret = ctl_lookup(name, NULL, mibp, miblenp); +label_return: + return(ret); +} + +int +ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + const ctl_named_node_t *node; + size_t i; + + if (ctl_initialized == false && ctl_init()) { + ret = EAGAIN; + goto label_return; + } + + /* Iterate down the tree. */ + node = super_root_node; + for (i = 0; i < miblen; i++) { + assert(node); + assert(node->nchildren > 0); + if (ctl_named_node(node->children) != NULL) { + /* Children are named. */ + if (node->nchildren <= mib[i]) { + ret = ENOENT; + goto label_return; + } + node = ctl_named_children(node, mib[i]); + } else { + const ctl_indexed_node_t *inode; + + /* Indexed element. */ + inode = ctl_indexed_node(node->children); + node = inode->index(mib, miblen, mib[i]); + if (node == NULL) { + ret = ENOENT; + goto label_return; + } + } + } + + /* Call the ctl function. */ + if (node && node->ctl) + ret = node->ctl(mib, miblen, oldp, oldlenp, newp, newlen); + else { + /* Partial MIB. */ + ret = ENOENT; + } + +label_return: + return(ret); +} + +bool +ctl_boot(void) +{ + + if (malloc_mutex_init(&ctl_mtx)) + return (true); + + ctl_initialized = false; + + return (false); +} + +void +ctl_prefork(void) +{ + + malloc_mutex_prefork(&ctl_mtx); +} + +void +ctl_postfork_parent(void) +{ + + malloc_mutex_postfork_parent(&ctl_mtx); +} + +void +ctl_postfork_child(void) +{ + + malloc_mutex_postfork_child(&ctl_mtx); +} + +/******************************************************************************/ +/* *_ctl() functions. */ + +#define READONLY() do { \ + if (newp != NULL || newlen != 0) { \ + ret = EPERM; \ + goto label_return; \ + } \ +} while (0) + +#define WRITEONLY() do { \ + if (oldp != NULL || oldlenp != NULL) { \ + ret = EPERM; \ + goto label_return; \ + } \ +} while (0) + +#define READ(v, t) do { \ + if (oldp != NULL && oldlenp != NULL) { \ + if (*oldlenp != sizeof(t)) { \ + size_t copylen = (sizeof(t) <= *oldlenp) \ + ? sizeof(t) : *oldlenp; \ + memcpy(oldp, (void *)&(v), copylen); \ + ret = EINVAL; \ + goto label_return; \ + } else \ + *(t *)oldp = (v); \ + } \ +} while (0) + +#define WRITE(v, t) do { \ + if (newp != NULL) { \ + if (newlen != sizeof(t)) { \ + ret = EINVAL; \ + goto label_return; \ + } \ + (v) = *(t *)newp; \ + } \ +} while (0) + +/* + * There's a lot of code duplication in the following macros due to limitations + * in how nested cpp macros are expanded. + */ +#define CTL_RO_CLGEN(c, l, n, v, t) \ +static int \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ +{ \ + int ret; \ + t oldval; \ + \ + if ((c) == false) \ + return (ENOENT); \ + if (l) \ + malloc_mutex_lock(&ctl_mtx); \ + READONLY(); \ + oldval = (v); \ + READ(oldval, t); \ + \ + ret = 0; \ +label_return: \ + if (l) \ + malloc_mutex_unlock(&ctl_mtx); \ + return (ret); \ +} + +#define CTL_RO_CGEN(c, n, v, t) \ +static int \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ +{ \ + int ret; \ + t oldval; \ + \ + if ((c) == false) \ + return (ENOENT); \ + malloc_mutex_lock(&ctl_mtx); \ + READONLY(); \ + oldval = (v); \ + READ(oldval, t); \ + \ + ret = 0; \ +label_return: \ + malloc_mutex_unlock(&ctl_mtx); \ + return (ret); \ +} + +#define CTL_RO_GEN(n, v, t) \ +static int \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ +{ \ + int ret; \ + t oldval; \ + \ + malloc_mutex_lock(&ctl_mtx); \ + READONLY(); \ + oldval = (v); \ + READ(oldval, t); \ + \ + ret = 0; \ +label_return: \ + malloc_mutex_unlock(&ctl_mtx); \ + return (ret); \ +} + +/* + * ctl_mtx is not acquired, under the assumption that no pertinent data will + * mutate during the call. + */ +#define CTL_RO_NL_CGEN(c, n, v, t) \ +static int \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ +{ \ + int ret; \ + t oldval; \ + \ + if ((c) == false) \ + return (ENOENT); \ + READONLY(); \ + oldval = (v); \ + READ(oldval, t); \ + \ + ret = 0; \ +label_return: \ + return (ret); \ +} + +#define CTL_RO_NL_GEN(n, v, t) \ +static int \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ +{ \ + int ret; \ + t oldval; \ + \ + READONLY(); \ + oldval = (v); \ + READ(oldval, t); \ + \ + ret = 0; \ +label_return: \ + return (ret); \ +} + +#define CTL_RO_BOOL_CONFIG_GEN(n) \ +static int \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ +{ \ + int ret; \ + bool oldval; \ + \ + READONLY(); \ + oldval = n; \ + READ(oldval, bool); \ + \ + ret = 0; \ +label_return: \ + return (ret); \ +} + +/******************************************************************************/ + +CTL_RO_NL_GEN(version, JEMALLOC_VERSION, const char *) + +static int +epoch_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + UNUSED uint64_t newval; + + malloc_mutex_lock(&ctl_mtx); + WRITE(newval, uint64_t); + if (newp != NULL) + ctl_refresh(); + READ(ctl_epoch, uint64_t); + + ret = 0; +label_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + +/******************************************************************************/ + +CTL_RO_BOOL_CONFIG_GEN(config_debug) +CTL_RO_BOOL_CONFIG_GEN(config_dss) +CTL_RO_BOOL_CONFIG_GEN(config_fill) +CTL_RO_BOOL_CONFIG_GEN(config_lazy_lock) +CTL_RO_BOOL_CONFIG_GEN(config_mremap) +CTL_RO_BOOL_CONFIG_GEN(config_munmap) +CTL_RO_BOOL_CONFIG_GEN(config_prof) +CTL_RO_BOOL_CONFIG_GEN(config_prof_libgcc) +CTL_RO_BOOL_CONFIG_GEN(config_prof_libunwind) +CTL_RO_BOOL_CONFIG_GEN(config_stats) +CTL_RO_BOOL_CONFIG_GEN(config_tcache) +CTL_RO_BOOL_CONFIG_GEN(config_tls) +CTL_RO_BOOL_CONFIG_GEN(config_utrace) +CTL_RO_BOOL_CONFIG_GEN(config_valgrind) +CTL_RO_BOOL_CONFIG_GEN(config_xmalloc) + +/******************************************************************************/ + +CTL_RO_NL_GEN(opt_abort, opt_abort, bool) +CTL_RO_NL_GEN(opt_dss, opt_dss, const char *) +CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t) +CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t) +CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) +CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) +CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, bool) +CTL_RO_NL_CGEN(config_fill, opt_quarantine, opt_quarantine, size_t) +CTL_RO_NL_CGEN(config_fill, opt_redzone, opt_redzone, bool) +CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool) +CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool) +CTL_RO_NL_CGEN(config_valgrind, opt_valgrind, opt_valgrind, bool) +CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool) +CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool) +CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_max, opt_lg_tcache_max, ssize_t) +CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool) +CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *) +CTL_RO_CGEN(config_prof, opt_prof_active, opt_prof_active, bool) /* Mutable. */ +CTL_RO_NL_CGEN(config_prof, opt_lg_prof_sample, opt_lg_prof_sample, size_t) +CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool) +CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t) +CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool) +CTL_RO_NL_CGEN(config_prof, opt_prof_final, opt_prof_final, bool) +CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool) + +/******************************************************************************/ + +static int +thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + unsigned newind, oldind; + + malloc_mutex_lock(&ctl_mtx); + newind = oldind = choose_arena(NULL)->ind; + WRITE(newind, unsigned); + READ(oldind, unsigned); + if (newind != oldind) { + arena_t *arena; + + if (newind >= ctl_stats.narenas) { + /* New arena index is out of range. */ + ret = EFAULT; + goto label_return; + } + + /* Initialize arena if necessary. */ + malloc_mutex_lock(&arenas_lock); + if ((arena = arenas[newind]) == NULL && (arena = + arenas_extend(newind)) == NULL) { + malloc_mutex_unlock(&arenas_lock); + ret = EAGAIN; + goto label_return; + } + assert(arena == arenas[newind]); + arenas[oldind]->nthreads--; + arenas[newind]->nthreads++; + malloc_mutex_unlock(&arenas_lock); + + /* Set new arena association. */ + if (config_tcache) { + tcache_t *tcache; + if ((uintptr_t)(tcache = *tcache_tsd_get()) > + (uintptr_t)TCACHE_STATE_MAX) { + tcache_arena_dissociate(tcache); + tcache_arena_associate(tcache, arena); + } + } + arenas_tsd_set(&arena); + } + + ret = 0; +label_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + +CTL_RO_NL_CGEN(config_stats, thread_allocated, + thread_allocated_tsd_get()->allocated, uint64_t) +CTL_RO_NL_CGEN(config_stats, thread_allocatedp, + &thread_allocated_tsd_get()->allocated, uint64_t *) +CTL_RO_NL_CGEN(config_stats, thread_deallocated, + thread_allocated_tsd_get()->deallocated, uint64_t) +CTL_RO_NL_CGEN(config_stats, thread_deallocatedp, + &thread_allocated_tsd_get()->deallocated, uint64_t *) + +static int +thread_tcache_enabled_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + bool oldval; + + if (config_tcache == false) + return (ENOENT); + + oldval = tcache_enabled_get(); + if (newp != NULL) { + if (newlen != sizeof(bool)) { + ret = EINVAL; + goto label_return; + } + tcache_enabled_set(*(bool *)newp); + } + READ(oldval, bool); + + ret = 0; +label_return: + return (ret); +} + +static int +thread_tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + + if (config_tcache == false) + return (ENOENT); + + READONLY(); + WRITEONLY(); + + tcache_flush(); + + ret = 0; +label_return: + return (ret); +} + +/******************************************************************************/ + +/* ctl_mutex must be held during execution of this function. */ +static void +arena_purge(unsigned arena_ind) +{ + VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas); + + malloc_mutex_lock(&arenas_lock); + memcpy(tarenas, arenas, sizeof(arena_t *) * ctl_stats.narenas); + malloc_mutex_unlock(&arenas_lock); + + if (arena_ind == ctl_stats.narenas) { + unsigned i; + for (i = 0; i < ctl_stats.narenas; i++) { + if (tarenas[i] != NULL) + arena_purge_all(tarenas[i]); + } + } else { + assert(arena_ind < ctl_stats.narenas); + if (tarenas[arena_ind] != NULL) + arena_purge_all(tarenas[arena_ind]); + } +} + +static int +arena_i_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + + READONLY(); + WRITEONLY(); + malloc_mutex_lock(&ctl_mtx); + arena_purge(mib[1]); + malloc_mutex_unlock(&ctl_mtx); + + ret = 0; +label_return: + return (ret); +} + +static int +arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret, i; + bool match, err; + const char *dss; + unsigned arena_ind = mib[1]; + dss_prec_t dss_prec_old = dss_prec_limit; + dss_prec_t dss_prec = dss_prec_limit; + + malloc_mutex_lock(&ctl_mtx); + WRITE(dss, const char *); + match = false; + for (i = 0; i < dss_prec_limit; i++) { + if (strcmp(dss_prec_names[i], dss) == 0) { + dss_prec = i; + match = true; + break; + } + } + if (match == false) { + ret = EINVAL; + goto label_return; + } + + if (arena_ind < ctl_stats.narenas) { + arena_t *arena = arenas[arena_ind]; + if (arena != NULL) { + dss_prec_old = arena_dss_prec_get(arena); + arena_dss_prec_set(arena, dss_prec); + err = false; + } else + err = true; + } else { + dss_prec_old = chunk_dss_prec_get(); + err = chunk_dss_prec_set(dss_prec); + } + dss = dss_prec_names[dss_prec_old]; + READ(dss, const char *); + if (err) { + ret = EFAULT; + goto label_return; + } + + ret = 0; +label_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + +static const ctl_named_node_t * +arena_i_index(const size_t *mib, size_t miblen, size_t i) +{ + const ctl_named_node_t * ret; + + malloc_mutex_lock(&ctl_mtx); + if (i > ctl_stats.narenas) { + ret = NULL; + goto label_return; + } + + ret = super_arena_i_node; +label_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + +/******************************************************************************/ + +static int +arenas_narenas_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + unsigned narenas; + + malloc_mutex_lock(&ctl_mtx); + READONLY(); + if (*oldlenp != sizeof(unsigned)) { + ret = EINVAL; + goto label_return; + } + narenas = ctl_stats.narenas; + READ(narenas, unsigned); + + ret = 0; +label_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + +static int +arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + unsigned nread, i; + + malloc_mutex_lock(&ctl_mtx); + READONLY(); + if (*oldlenp != ctl_stats.narenas * sizeof(bool)) { + ret = EINVAL; + nread = (*oldlenp < ctl_stats.narenas * sizeof(bool)) + ? (*oldlenp / sizeof(bool)) : ctl_stats.narenas; + } else { + ret = 0; + nread = ctl_stats.narenas; + } + + for (i = 0; i < nread; i++) + ((bool *)oldp)[i] = ctl_stats.arenas[i].initialized; + +label_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + +CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t) +CTL_RO_NL_GEN(arenas_page, PAGE, size_t) +CTL_RO_NL_CGEN(config_tcache, arenas_tcache_max, tcache_maxclass, size_t) +CTL_RO_NL_GEN(arenas_nbins, NBINS, unsigned) +CTL_RO_NL_CGEN(config_tcache, arenas_nhbins, nhbins, unsigned) +CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t) +CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t) +CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t) +static const ctl_named_node_t * +arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) +{ + + if (i > NBINS) + return (NULL); + return (super_arenas_bin_i_node); +} + +CTL_RO_NL_GEN(arenas_nlruns, nlclasses, size_t) +CTL_RO_NL_GEN(arenas_lrun_i_size, ((mib[2]+1) << LG_PAGE), size_t) +static const ctl_named_node_t * +arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) +{ + + if (i > nlclasses) + return (NULL); + return (super_arenas_lrun_i_node); +} + +static int +arenas_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + unsigned arena_ind; + + malloc_mutex_lock(&ctl_mtx); + WRITEONLY(); + arena_ind = UINT_MAX; + WRITE(arena_ind, unsigned); + if (newp != NULL && arena_ind >= ctl_stats.narenas) + ret = EFAULT; + else { + if (arena_ind == UINT_MAX) + arena_ind = ctl_stats.narenas; + arena_purge(arena_ind); + ret = 0; + } + +label_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + +static int +arenas_extend_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + unsigned narenas; + + malloc_mutex_lock(&ctl_mtx); + READONLY(); + if (ctl_grow()) { + ret = EAGAIN; + goto label_return; + } + narenas = ctl_stats.narenas - 1; + READ(narenas, unsigned); + + ret = 0; +label_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + +/******************************************************************************/ + +static int +prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + bool oldval; + + if (config_prof == false) + return (ENOENT); + + malloc_mutex_lock(&ctl_mtx); /* Protect opt_prof_active. */ + oldval = opt_prof_active; + if (newp != NULL) { + /* + * The memory barriers will tend to make opt_prof_active + * propagate faster on systems with weak memory ordering. + */ + mb_write(); + WRITE(opt_prof_active, bool); + mb_write(); + } + READ(oldval, bool); + + ret = 0; +label_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + +static int +prof_dump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + const char *filename = NULL; + + if (config_prof == false) + return (ENOENT); + + WRITEONLY(); + WRITE(filename, const char *); + + if (prof_mdump(filename)) { + ret = EFAULT; + goto label_return; + } + + ret = 0; +label_return: + return (ret); +} + +CTL_RO_NL_CGEN(config_prof, prof_interval, prof_interval, uint64_t) + +/******************************************************************************/ + +CTL_RO_CGEN(config_stats, stats_cactive, &stats_cactive, size_t *) +CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats.allocated, size_t) +CTL_RO_CGEN(config_stats, stats_active, ctl_stats.active, size_t) +CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t) + +CTL_RO_CGEN(config_stats, stats_chunks_current, ctl_stats.chunks.current, + size_t) +CTL_RO_CGEN(config_stats, stats_chunks_total, ctl_stats.chunks.total, uint64_t) +CTL_RO_CGEN(config_stats, stats_chunks_high, ctl_stats.chunks.high, size_t) +CTL_RO_CGEN(config_stats, stats_huge_allocated, huge_allocated, size_t) +CTL_RO_CGEN(config_stats, stats_huge_nmalloc, huge_nmalloc, uint64_t) +CTL_RO_CGEN(config_stats, stats_huge_ndalloc, huge_ndalloc, uint64_t) + +CTL_RO_GEN(stats_arenas_i_dss, ctl_stats.arenas[mib[2]].dss, const char *) +CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned) +CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t) +CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_mapped, + ctl_stats.arenas[mib[2]].astats.mapped, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_npurge, + ctl_stats.arenas[mib[2]].astats.npurge, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise, + ctl_stats.arenas[mib[2]].astats.nmadvise, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_purged, + ctl_stats.arenas[mib[2]].astats.purged, uint64_t) + +CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated, + ctl_stats.arenas[mib[2]].allocated_small, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_small_nmalloc, + ctl_stats.arenas[mib[2]].nmalloc_small, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_small_ndalloc, + ctl_stats.arenas[mib[2]].ndalloc_small, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_small_nrequests, + ctl_stats.arenas[mib[2]].nrequests_small, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_large_allocated, + ctl_stats.arenas[mib[2]].astats.allocated_large, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_large_nmalloc, + ctl_stats.arenas[mib[2]].astats.nmalloc_large, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc, + ctl_stats.arenas[mib[2]].astats.ndalloc_large, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests, + ctl_stats.arenas[mib[2]].astats.nrequests_large, uint64_t) + +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_allocated, + ctl_stats.arenas[mib[2]].bstats[mib[4]].allocated, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nmalloc, + ctl_stats.arenas[mib[2]].bstats[mib[4]].nmalloc, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_ndalloc, + ctl_stats.arenas[mib[2]].bstats[mib[4]].ndalloc, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nrequests, + ctl_stats.arenas[mib[2]].bstats[mib[4]].nrequests, uint64_t) +CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_bins_j_nfills, + ctl_stats.arenas[mib[2]].bstats[mib[4]].nfills, uint64_t) +CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_bins_j_nflushes, + ctl_stats.arenas[mib[2]].bstats[mib[4]].nflushes, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nruns, + ctl_stats.arenas[mib[2]].bstats[mib[4]].nruns, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreruns, + ctl_stats.arenas[mib[2]].bstats[mib[4]].reruns, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curruns, + ctl_stats.arenas[mib[2]].bstats[mib[4]].curruns, size_t) + +static const ctl_named_node_t * +stats_arenas_i_bins_j_index(const size_t *mib, size_t miblen, size_t j) +{ + + if (j > NBINS) + return (NULL); + return (super_stats_arenas_i_bins_j_node); +} + +CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_nmalloc, + ctl_stats.arenas[mib[2]].lstats[mib[4]].nmalloc, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_ndalloc, + ctl_stats.arenas[mib[2]].lstats[mib[4]].ndalloc, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_nrequests, + ctl_stats.arenas[mib[2]].lstats[mib[4]].nrequests, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_curruns, + ctl_stats.arenas[mib[2]].lstats[mib[4]].curruns, size_t) + +static const ctl_named_node_t * +stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j) +{ + + if (j > nlclasses) + return (NULL); + return (super_stats_arenas_i_lruns_j_node); +} + +static const ctl_named_node_t * +stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i) +{ + const ctl_named_node_t * ret; + + malloc_mutex_lock(&ctl_mtx); + if (i > ctl_stats.narenas || ctl_stats.arenas[i].initialized == false) { + ret = NULL; + goto label_return; + } + + ret = super_stats_arenas_i_node; +label_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} diff --git a/deps/jemalloc/src/extent.c b/deps/jemalloc/src/extent.c new file mode 100644 index 0000000000..8c09b486ed --- /dev/null +++ b/deps/jemalloc/src/extent.c @@ -0,0 +1,39 @@ +#define JEMALLOC_EXTENT_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ + +static inline int +extent_szad_comp(extent_node_t *a, extent_node_t *b) +{ + int ret; + size_t a_size = a->size; + size_t b_size = b->size; + + ret = (a_size > b_size) - (a_size < b_size); + if (ret == 0) { + uintptr_t a_addr = (uintptr_t)a->addr; + uintptr_t b_addr = (uintptr_t)b->addr; + + ret = (a_addr > b_addr) - (a_addr < b_addr); + } + + return (ret); +} + +/* Generate red-black tree functions. */ +rb_gen(, extent_tree_szad_, extent_tree_t, extent_node_t, link_szad, + extent_szad_comp) + +static inline int +extent_ad_comp(extent_node_t *a, extent_node_t *b) +{ + uintptr_t a_addr = (uintptr_t)a->addr; + uintptr_t b_addr = (uintptr_t)b->addr; + + return ((a_addr > b_addr) - (a_addr < b_addr)); +} + +/* Generate red-black tree functions. */ +rb_gen(, extent_tree_ad_, extent_tree_t, extent_node_t, link_ad, + extent_ad_comp) diff --git a/deps/jemalloc/src/hash.c b/deps/jemalloc/src/hash.c new file mode 100644 index 0000000000..cfa4da0275 --- /dev/null +++ b/deps/jemalloc/src/hash.c @@ -0,0 +1,2 @@ +#define JEMALLOC_HASH_C_ +#include "jemalloc/internal/jemalloc_internal.h" diff --git a/deps/jemalloc/src/huge.c b/deps/jemalloc/src/huge.c new file mode 100644 index 0000000000..d72f213570 --- /dev/null +++ b/deps/jemalloc/src/huge.c @@ -0,0 +1,347 @@ +#define JEMALLOC_HUGE_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +uint64_t huge_nmalloc; +uint64_t huge_ndalloc; +size_t huge_allocated; + +malloc_mutex_t huge_mtx; + +/******************************************************************************/ + +/* Tree of chunks that are stand-alone huge allocations. */ +static extent_tree_t huge; + +void * +huge_malloc(size_t size, bool zero, dss_prec_t dss_prec) +{ + + return (huge_palloc(size, chunksize, zero, dss_prec)); +} + +void * +huge_palloc(size_t size, size_t alignment, bool zero, dss_prec_t dss_prec) +{ + void *ret; + size_t csize; + extent_node_t *node; + bool is_zeroed; + + /* Allocate one or more contiguous chunks for this request. */ + + csize = CHUNK_CEILING(size); + if (csize == 0) { + /* size is large enough to cause size_t wrap-around. */ + return (NULL); + } + + /* Allocate an extent node with which to track the chunk. */ + node = base_node_alloc(); + if (node == NULL) + return (NULL); + + /* + * Copy zero into is_zeroed and pass the copy to chunk_alloc(), so that + * it is possible to make correct junk/zero fill decisions below. + */ + is_zeroed = zero; + ret = chunk_alloc(csize, alignment, false, &is_zeroed, dss_prec); + if (ret == NULL) { + base_node_dealloc(node); + return (NULL); + } + + /* Insert node into huge. */ + node->addr = ret; + node->size = csize; + + malloc_mutex_lock(&huge_mtx); + extent_tree_ad_insert(&huge, node); + if (config_stats) { + stats_cactive_add(csize); + huge_nmalloc++; + huge_allocated += csize; + } + malloc_mutex_unlock(&huge_mtx); + + if (config_fill && zero == false) { + if (opt_junk) + memset(ret, 0xa5, csize); + else if (opt_zero && is_zeroed == false) + memset(ret, 0, csize); + } + + return (ret); +} + +bool +huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra) +{ + + /* + * Avoid moving the allocation if the size class can be left the same. + */ + if (oldsize > arena_maxclass + && CHUNK_CEILING(oldsize) >= CHUNK_CEILING(size) + && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) { + assert(CHUNK_CEILING(oldsize) == oldsize); + return (false); + } + + /* Reallocation would require a move. */ + return (true); +} + +void * +huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero, bool try_tcache_dalloc, dss_prec_t dss_prec) +{ + void *ret; + size_t copysize; + + /* Try to avoid moving the allocation. */ + if (huge_ralloc_no_move(ptr, oldsize, size, extra) == false) + return (ptr); + + /* + * size and oldsize are different enough that we need to use a + * different size class. In that case, fall back to allocating new + * space and copying. + */ + if (alignment > chunksize) + ret = huge_palloc(size + extra, alignment, zero, dss_prec); + else + ret = huge_malloc(size + extra, zero, dss_prec); + + if (ret == NULL) { + if (extra == 0) + return (NULL); + /* Try again, this time without extra. */ + if (alignment > chunksize) + ret = huge_palloc(size, alignment, zero, dss_prec); + else + ret = huge_malloc(size, zero, dss_prec); + + if (ret == NULL) + return (NULL); + } + + /* + * Copy at most size bytes (not size+extra), since the caller has no + * expectation that the extra bytes will be reliably preserved. + */ + copysize = (size < oldsize) ? size : oldsize; + +#ifdef JEMALLOC_MREMAP + /* + * Use mremap(2) if this is a huge-->huge reallocation, and neither the + * source nor the destination are in dss. + */ + if (oldsize >= chunksize && (config_dss == false || (chunk_in_dss(ptr) + == false && chunk_in_dss(ret) == false))) { + size_t newsize = huge_salloc(ret); + + /* + * Remove ptr from the tree of huge allocations before + * performing the remap operation, in order to avoid the + * possibility of another thread acquiring that mapping before + * this one removes it from the tree. + */ + huge_dalloc(ptr, false); + if (mremap(ptr, oldsize, newsize, MREMAP_MAYMOVE|MREMAP_FIXED, + ret) == MAP_FAILED) { + /* + * Assuming no chunk management bugs in the allocator, + * the only documented way an error can occur here is + * if the application changed the map type for a + * portion of the old allocation. This is firmly in + * undefined behavior territory, so write a diagnostic + * message, and optionally abort. + */ + char buf[BUFERROR_BUF]; + + buferror(get_errno(), buf, sizeof(buf)); + malloc_printf("<jemalloc>: Error in mremap(): %s\n", + buf); + if (opt_abort) + abort(); + memcpy(ret, ptr, copysize); + chunk_dealloc_mmap(ptr, oldsize); + } else if (config_fill && zero == false && opt_junk && oldsize + < newsize) { + /* + * mremap(2) clobbers the original mapping, so + * junk/zero filling is not preserved. There is no + * need to zero fill here, since any trailing + * uninititialized memory is demand-zeroed by the + * kernel, but junk filling must be redone. + */ + memset(ret + oldsize, 0xa5, newsize - oldsize); + } + } else +#endif + { + memcpy(ret, ptr, copysize); + iqalloct(ptr, try_tcache_dalloc); + } + return (ret); +} + +#ifdef JEMALLOC_JET +#undef huge_dalloc_junk +#define huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk_impl) +#endif +static void +huge_dalloc_junk(void *ptr, size_t usize) +{ + + if (config_fill && config_dss && opt_junk) { + /* + * Only bother junk filling if the chunk isn't about to be + * unmapped. + */ + if (config_munmap == false || (config_dss && chunk_in_dss(ptr))) + memset(ptr, 0x5a, usize); + } +} +#ifdef JEMALLOC_JET +#undef huge_dalloc_junk +#define huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk) +huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl); +#endif + +void +huge_dalloc(void *ptr, bool unmap) +{ + extent_node_t *node, key; + + malloc_mutex_lock(&huge_mtx); + + /* Extract from tree of huge allocations. */ + key.addr = ptr; + node = extent_tree_ad_search(&huge, &key); + assert(node != NULL); + assert(node->addr == ptr); + extent_tree_ad_remove(&huge, node); + + if (config_stats) { + stats_cactive_sub(node->size); + huge_ndalloc++; + huge_allocated -= node->size; + } + + malloc_mutex_unlock(&huge_mtx); + + if (unmap) + huge_dalloc_junk(node->addr, node->size); + + chunk_dealloc(node->addr, node->size, unmap); + + base_node_dealloc(node); +} + +size_t +huge_salloc(const void *ptr) +{ + size_t ret; + extent_node_t *node, key; + + malloc_mutex_lock(&huge_mtx); + + /* Extract from tree of huge allocations. */ + key.addr = __DECONST(void *, ptr); + node = extent_tree_ad_search(&huge, &key); + assert(node != NULL); + + ret = node->size; + + malloc_mutex_unlock(&huge_mtx); + + return (ret); +} + +dss_prec_t +huge_dss_prec_get(arena_t *arena) +{ + + return (arena_dss_prec_get(choose_arena(arena))); +} + +prof_ctx_t * +huge_prof_ctx_get(const void *ptr) +{ + prof_ctx_t *ret; + extent_node_t *node, key; + + malloc_mutex_lock(&huge_mtx); + + /* Extract from tree of huge allocations. */ + key.addr = __DECONST(void *, ptr); + node = extent_tree_ad_search(&huge, &key); + assert(node != NULL); + + ret = node->prof_ctx; + + malloc_mutex_unlock(&huge_mtx); + + return (ret); +} + +void +huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) +{ + extent_node_t *node, key; + + malloc_mutex_lock(&huge_mtx); + + /* Extract from tree of huge allocations. */ + key.addr = __DECONST(void *, ptr); + node = extent_tree_ad_search(&huge, &key); + assert(node != NULL); + + node->prof_ctx = ctx; + + malloc_mutex_unlock(&huge_mtx); +} + +bool +huge_boot(void) +{ + + /* Initialize chunks data. */ + if (malloc_mutex_init(&huge_mtx)) + return (true); + extent_tree_ad_new(&huge); + + if (config_stats) { + huge_nmalloc = 0; + huge_ndalloc = 0; + huge_allocated = 0; + } + + return (false); +} + +void +huge_prefork(void) +{ + + malloc_mutex_prefork(&huge_mtx); +} + +void +huge_postfork_parent(void) +{ + + malloc_mutex_postfork_parent(&huge_mtx); +} + +void +huge_postfork_child(void) +{ + + malloc_mutex_postfork_child(&huge_mtx); +} diff --git a/deps/jemalloc/src/jemalloc.c b/deps/jemalloc/src/jemalloc.c new file mode 100644 index 0000000000..204778bc89 --- /dev/null +++ b/deps/jemalloc/src/jemalloc.c @@ -0,0 +1,2111 @@ +#define JEMALLOC_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +malloc_tsd_data(, arenas, arena_t *, NULL) +malloc_tsd_data(, thread_allocated, thread_allocated_t, + THREAD_ALLOCATED_INITIALIZER) + +/* Runtime configuration options. */ +const char *je_malloc_conf; +bool opt_abort = +#ifdef JEMALLOC_DEBUG + true +#else + false +#endif + ; +bool opt_junk = +#if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL)) + true +#else + false +#endif + ; +size_t opt_quarantine = ZU(0); +bool opt_redzone = false; +bool opt_utrace = false; +bool opt_valgrind = false; +bool opt_xmalloc = false; +bool opt_zero = false; +size_t opt_narenas = 0; + +unsigned ncpus; + +malloc_mutex_t arenas_lock; +arena_t **arenas; +unsigned narenas_total; +unsigned narenas_auto; + +/* Set to true once the allocator has been initialized. */ +static bool malloc_initialized = false; + +#ifdef JEMALLOC_THREADED_INIT +/* Used to let the initializing thread recursively allocate. */ +# define NO_INITIALIZER ((unsigned long)0) +# define INITIALIZER pthread_self() +# define IS_INITIALIZER (malloc_initializer == pthread_self()) +static pthread_t malloc_initializer = NO_INITIALIZER; +#else +# define NO_INITIALIZER false +# define INITIALIZER true +# define IS_INITIALIZER malloc_initializer +static bool malloc_initializer = NO_INITIALIZER; +#endif + +/* Used to avoid initialization races. */ +#ifdef _WIN32 +static malloc_mutex_t init_lock; + +JEMALLOC_ATTR(constructor) +static void WINAPI +_init_init_lock(void) +{ + + malloc_mutex_init(&init_lock); +} + +#ifdef _MSC_VER +# pragma section(".CRT$XCU", read) +JEMALLOC_SECTION(".CRT$XCU") JEMALLOC_ATTR(used) +static const void (WINAPI *init_init_lock)(void) = _init_init_lock; +#endif + +#else +static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; +#endif + +typedef struct { + void *p; /* Input pointer (as in realloc(p, s)). */ + size_t s; /* Request size. */ + void *r; /* Result pointer. */ +} malloc_utrace_t; + +#ifdef JEMALLOC_UTRACE +# define UTRACE(a, b, c) do { \ + if (opt_utrace) { \ + int utrace_serrno = errno; \ + malloc_utrace_t ut; \ + ut.p = (a); \ + ut.s = (b); \ + ut.r = (c); \ + utrace(&ut, sizeof(ut)); \ + errno = utrace_serrno; \ + } \ +} while (0) +#else +# define UTRACE(a, b, c) +#endif + +/******************************************************************************/ +/* + * Function prototypes for static functions that are referenced prior to + * definition. + */ + +static bool malloc_init_hard(void); + +/******************************************************************************/ +/* + * Begin miscellaneous support functions. + */ + +/* Create a new arena and insert it into the arenas array at index ind. */ +arena_t * +arenas_extend(unsigned ind) +{ + arena_t *ret; + + ret = (arena_t *)base_alloc(sizeof(arena_t)); + if (ret != NULL && arena_new(ret, ind) == false) { + arenas[ind] = ret; + return (ret); + } + /* Only reached if there is an OOM error. */ + + /* + * OOM here is quite inconvenient to propagate, since dealing with it + * would require a check for failure in the fast path. Instead, punt + * by using arenas[0]. In practice, this is an extremely unlikely + * failure. + */ + malloc_write("<jemalloc>: Error initializing arena\n"); + if (opt_abort) + abort(); + + return (arenas[0]); +} + +/* Slow path, called only by choose_arena(). */ +arena_t * +choose_arena_hard(void) +{ + arena_t *ret; + + if (narenas_auto > 1) { + unsigned i, choose, first_null; + + choose = 0; + first_null = narenas_auto; + malloc_mutex_lock(&arenas_lock); + assert(arenas[0] != NULL); + for (i = 1; i < narenas_auto; i++) { + if (arenas[i] != NULL) { + /* + * Choose the first arena that has the lowest + * number of threads assigned to it. + */ + if (arenas[i]->nthreads < + arenas[choose]->nthreads) + choose = i; + } else if (first_null == narenas_auto) { + /* + * Record the index of the first uninitialized + * arena, in case all extant arenas are in use. + * + * NB: It is possible for there to be + * discontinuities in terms of initialized + * versus uninitialized arenas, due to the + * "thread.arena" mallctl. + */ + first_null = i; + } + } + + if (arenas[choose]->nthreads == 0 + || first_null == narenas_auto) { + /* + * Use an unloaded arena, or the least loaded arena if + * all arenas are already initialized. + */ + ret = arenas[choose]; + } else { + /* Initialize a new arena. */ + ret = arenas_extend(first_null); + } + ret->nthreads++; + malloc_mutex_unlock(&arenas_lock); + } else { + ret = arenas[0]; + malloc_mutex_lock(&arenas_lock); + ret->nthreads++; + malloc_mutex_unlock(&arenas_lock); + } + + arenas_tsd_set(&ret); + + return (ret); +} + +static void +stats_print_atexit(void) +{ + + if (config_tcache && config_stats) { + unsigned narenas, i; + + /* + * Merge stats from extant threads. This is racy, since + * individual threads do not lock when recording tcache stats + * events. As a consequence, the final stats may be slightly + * out of date by the time they are reported, if other threads + * continue to allocate. + */ + for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { + arena_t *arena = arenas[i]; + if (arena != NULL) { + tcache_t *tcache; + + /* + * tcache_stats_merge() locks bins, so if any + * code is introduced that acquires both arena + * and bin locks in the opposite order, + * deadlocks may result. + */ + malloc_mutex_lock(&arena->lock); + ql_foreach(tcache, &arena->tcache_ql, link) { + tcache_stats_merge(tcache, arena); + } + malloc_mutex_unlock(&arena->lock); + } + } + } + je_malloc_stats_print(NULL, NULL, NULL); +} + +/* + * End miscellaneous support functions. + */ +/******************************************************************************/ +/* + * Begin initialization functions. + */ + +static unsigned +malloc_ncpus(void) +{ + long result; + +#ifdef _WIN32 + SYSTEM_INFO si; + GetSystemInfo(&si); + result = si.dwNumberOfProcessors; +#else + result = sysconf(_SC_NPROCESSORS_ONLN); +#endif + return ((result == -1) ? 1 : (unsigned)result); +} + +void +arenas_cleanup(void *arg) +{ + arena_t *arena = *(arena_t **)arg; + + malloc_mutex_lock(&arenas_lock); + arena->nthreads--; + malloc_mutex_unlock(&arenas_lock); +} + +JEMALLOC_ALWAYS_INLINE_C void +malloc_thread_init(void) +{ + + /* + * TSD initialization can't be safely done as a side effect of + * deallocation, because it is possible for a thread to do nothing but + * deallocate its TLS data via free(), in which case writing to TLS + * would cause write-after-free memory corruption. The quarantine + * facility *only* gets used as a side effect of deallocation, so make + * a best effort attempt at initializing its TSD by hooking all + * allocation events. + */ + if (config_fill && opt_quarantine) + quarantine_alloc_hook(); +} + +JEMALLOC_ALWAYS_INLINE_C bool +malloc_init(void) +{ + + if (malloc_initialized == false && malloc_init_hard()) + return (true); + malloc_thread_init(); + + return (false); +} + +static bool +malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p, + char const **v_p, size_t *vlen_p) +{ + bool accept; + const char *opts = *opts_p; + + *k_p = opts; + + for (accept = false; accept == false;) { + switch (*opts) { + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + case '_': + opts++; + break; + case ':': + opts++; + *klen_p = (uintptr_t)opts - 1 - (uintptr_t)*k_p; + *v_p = opts; + accept = true; + break; + case '\0': + if (opts != *opts_p) { + malloc_write("<jemalloc>: Conf string ends " + "with key\n"); + } + return (true); + default: + malloc_write("<jemalloc>: Malformed conf string\n"); + return (true); + } + } + + for (accept = false; accept == false;) { + switch (*opts) { + case ',': + opts++; + /* + * Look ahead one character here, because the next time + * this function is called, it will assume that end of + * input has been cleanly reached if no input remains, + * but we have optimistically already consumed the + * comma if one exists. + */ + if (*opts == '\0') { + malloc_write("<jemalloc>: Conf string ends " + "with comma\n"); + } + *vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p; + accept = true; + break; + case '\0': + *vlen_p = (uintptr_t)opts - (uintptr_t)*v_p; + accept = true; + break; + default: + opts++; + break; + } + } + + *opts_p = opts; + return (false); +} + +static void +malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v, + size_t vlen) +{ + + malloc_printf("<jemalloc>: %s: %.*s:%.*s\n", msg, (int)klen, k, + (int)vlen, v); +} + +static void +malloc_conf_init(void) +{ + unsigned i; + char buf[PATH_MAX + 1]; + const char *opts, *k, *v; + size_t klen, vlen; + + /* + * Automatically configure valgrind before processing options. The + * valgrind option remains in jemalloc 3.x for compatibility reasons. + */ + if (config_valgrind) { + opt_valgrind = (RUNNING_ON_VALGRIND != 0) ? true : false; + if (config_fill && opt_valgrind) { + opt_junk = false; + assert(opt_zero == false); + opt_quarantine = JEMALLOC_VALGRIND_QUARANTINE_DEFAULT; + opt_redzone = true; + } + if (config_tcache && opt_valgrind) + opt_tcache = false; + } + + for (i = 0; i < 3; i++) { + /* Get runtime configuration. */ + switch (i) { + case 0: + if (je_malloc_conf != NULL) { + /* + * Use options that were compiled into the + * program. + */ + opts = je_malloc_conf; + } else { + /* No configuration specified. */ + buf[0] = '\0'; + opts = buf; + } + break; + case 1: { + int linklen = 0; +#ifndef _WIN32 + int saved_errno = errno; + const char *linkname = +# ifdef JEMALLOC_PREFIX + "/etc/"JEMALLOC_PREFIX"malloc.conf" +# else + "/etc/malloc.conf" +# endif + ; + + /* + * Try to use the contents of the "/etc/malloc.conf" + * symbolic link's name. + */ + linklen = readlink(linkname, buf, sizeof(buf) - 1); + if (linklen == -1) { + /* No configuration specified. */ + linklen = 0; + /* restore errno */ + set_errno(saved_errno); + } +#endif + buf[linklen] = '\0'; + opts = buf; + break; + } case 2: { + const char *envname = +#ifdef JEMALLOC_PREFIX + JEMALLOC_CPREFIX"MALLOC_CONF" +#else + "MALLOC_CONF" +#endif + ; + + if ((opts = getenv(envname)) != NULL) { + /* + * Do nothing; opts is already initialized to + * the value of the MALLOC_CONF environment + * variable. + */ + } else { + /* No configuration specified. */ + buf[0] = '\0'; + opts = buf; + } + break; + } default: + not_reached(); + buf[0] = '\0'; + opts = buf; + } + + while (*opts != '\0' && malloc_conf_next(&opts, &k, &klen, &v, + &vlen) == false) { +#define CONF_HANDLE_BOOL(o, n) \ + if (sizeof(n)-1 == klen && strncmp(n, k, \ + klen) == 0) { \ + if (strncmp("true", v, vlen) == 0 && \ + vlen == sizeof("true")-1) \ + o = true; \ + else if (strncmp("false", v, vlen) == \ + 0 && vlen == sizeof("false")-1) \ + o = false; \ + else { \ + malloc_conf_error( \ + "Invalid conf value", \ + k, klen, v, vlen); \ + } \ + continue; \ + } +#define CONF_HANDLE_SIZE_T(o, n, min, max, clip) \ + if (sizeof(n)-1 == klen && strncmp(n, k, \ + klen) == 0) { \ + uintmax_t um; \ + char *end; \ + \ + set_errno(0); \ + um = malloc_strtoumax(v, &end, 0); \ + if (get_errno() != 0 || (uintptr_t)end -\ + (uintptr_t)v != vlen) { \ + malloc_conf_error( \ + "Invalid conf value", \ + k, klen, v, vlen); \ + } else if (clip) { \ + if (min != 0 && um < min) \ + o = min; \ + else if (um > max) \ + o = max; \ + else \ + o = um; \ + } else { \ + if ((min != 0 && um < min) || \ + um > max) { \ + malloc_conf_error( \ + "Out-of-range " \ + "conf value", \ + k, klen, v, vlen); \ + } else \ + o = um; \ + } \ + continue; \ + } +#define CONF_HANDLE_SSIZE_T(o, n, min, max) \ + if (sizeof(n)-1 == klen && strncmp(n, k, \ + klen) == 0) { \ + long l; \ + char *end; \ + \ + set_errno(0); \ + l = strtol(v, &end, 0); \ + if (get_errno() != 0 || (uintptr_t)end -\ + (uintptr_t)v != vlen) { \ + malloc_conf_error( \ + "Invalid conf value", \ + k, klen, v, vlen); \ + } else if (l < (ssize_t)min || l > \ + (ssize_t)max) { \ + malloc_conf_error( \ + "Out-of-range conf value", \ + k, klen, v, vlen); \ + } else \ + o = l; \ + continue; \ + } +#define CONF_HANDLE_CHAR_P(o, n, d) \ + if (sizeof(n)-1 == klen && strncmp(n, k, \ + klen) == 0) { \ + size_t cpylen = (vlen <= \ + sizeof(o)-1) ? vlen : \ + sizeof(o)-1; \ + strncpy(o, v, cpylen); \ + o[cpylen] = '\0'; \ + continue; \ + } + + CONF_HANDLE_BOOL(opt_abort, "abort") + /* + * Chunks always require at least one header page, plus + * one data page in the absence of redzones, or three + * pages in the presence of redzones. In order to + * simplify options processing, fix the limit based on + * config_fill. + */ + CONF_HANDLE_SIZE_T(opt_lg_chunk, "lg_chunk", LG_PAGE + + (config_fill ? 2 : 1), (sizeof(size_t) << 3) - 1, + true) + if (strncmp("dss", k, klen) == 0) { + int i; + bool match = false; + for (i = 0; i < dss_prec_limit; i++) { + if (strncmp(dss_prec_names[i], v, vlen) + == 0) { + if (chunk_dss_prec_set(i)) { + malloc_conf_error( + "Error setting dss", + k, klen, v, vlen); + } else { + opt_dss = + dss_prec_names[i]; + match = true; + break; + } + } + } + if (match == false) { + malloc_conf_error("Invalid conf value", + k, klen, v, vlen); + } + continue; + } + CONF_HANDLE_SIZE_T(opt_narenas, "narenas", 1, + SIZE_T_MAX, false) + CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, "lg_dirty_mult", + -1, (sizeof(size_t) << 3) - 1) + CONF_HANDLE_BOOL(opt_stats_print, "stats_print") + if (config_fill) { + CONF_HANDLE_BOOL(opt_junk, "junk") + CONF_HANDLE_SIZE_T(opt_quarantine, "quarantine", + 0, SIZE_T_MAX, false) + CONF_HANDLE_BOOL(opt_redzone, "redzone") + CONF_HANDLE_BOOL(opt_zero, "zero") + } + if (config_utrace) { + CONF_HANDLE_BOOL(opt_utrace, "utrace") + } + if (config_valgrind) { + CONF_HANDLE_BOOL(opt_valgrind, "valgrind") + } + if (config_xmalloc) { + CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc") + } + if (config_tcache) { + CONF_HANDLE_BOOL(opt_tcache, "tcache") + CONF_HANDLE_SSIZE_T(opt_lg_tcache_max, + "lg_tcache_max", -1, + (sizeof(size_t) << 3) - 1) + } + if (config_prof) { + CONF_HANDLE_BOOL(opt_prof, "prof") + CONF_HANDLE_CHAR_P(opt_prof_prefix, + "prof_prefix", "jeprof") + CONF_HANDLE_BOOL(opt_prof_active, "prof_active") + CONF_HANDLE_SSIZE_T(opt_lg_prof_sample, + "lg_prof_sample", 0, + (sizeof(uint64_t) << 3) - 1) + CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum") + CONF_HANDLE_SSIZE_T(opt_lg_prof_interval, + "lg_prof_interval", -1, + (sizeof(uint64_t) << 3) - 1) + CONF_HANDLE_BOOL(opt_prof_gdump, "prof_gdump") + CONF_HANDLE_BOOL(opt_prof_final, "prof_final") + CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak") + } + malloc_conf_error("Invalid conf pair", k, klen, v, + vlen); +#undef CONF_HANDLE_BOOL +#undef CONF_HANDLE_SIZE_T +#undef CONF_HANDLE_SSIZE_T +#undef CONF_HANDLE_CHAR_P + } + } +} + +static bool +malloc_init_hard(void) +{ + arena_t *init_arenas[1]; + + malloc_mutex_lock(&init_lock); + if (malloc_initialized || IS_INITIALIZER) { + /* + * Another thread initialized the allocator before this one + * acquired init_lock, or this thread is the initializing + * thread, and it is recursively allocating. + */ + malloc_mutex_unlock(&init_lock); + return (false); + } +#ifdef JEMALLOC_THREADED_INIT + if (malloc_initializer != NO_INITIALIZER && IS_INITIALIZER == false) { + /* Busy-wait until the initializing thread completes. */ + do { + malloc_mutex_unlock(&init_lock); + CPU_SPINWAIT; + malloc_mutex_lock(&init_lock); + } while (malloc_initialized == false); + malloc_mutex_unlock(&init_lock); + return (false); + } +#endif + malloc_initializer = INITIALIZER; + + malloc_tsd_boot(); + if (config_prof) + prof_boot0(); + + malloc_conf_init(); + + if (opt_stats_print) { + /* Print statistics at exit. */ + if (atexit(stats_print_atexit) != 0) { + malloc_write("<jemalloc>: Error in atexit()\n"); + if (opt_abort) + abort(); + } + } + + if (base_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + if (chunk_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + if (ctl_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + if (config_prof) + prof_boot1(); + + arena_boot(); + + if (config_tcache && tcache_boot0()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + if (huge_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + if (malloc_mutex_init(&arenas_lock)) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + /* + * Create enough scaffolding to allow recursive allocation in + * malloc_ncpus(). + */ + narenas_total = narenas_auto = 1; + arenas = init_arenas; + memset(arenas, 0, sizeof(arena_t *) * narenas_auto); + + /* + * Initialize one arena here. The rest are lazily created in + * choose_arena_hard(). + */ + arenas_extend(0); + if (arenas[0] == NULL) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + /* Initialize allocation counters before any allocations can occur. */ + if (config_stats && thread_allocated_tsd_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + if (arenas_tsd_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + if (config_tcache && tcache_boot1()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + if (config_fill && quarantine_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + if (config_prof && prof_boot2()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + malloc_mutex_unlock(&init_lock); + /**********************************************************************/ + /* Recursive allocation may follow. */ + + ncpus = malloc_ncpus(); + +#if (!defined(JEMALLOC_MUTEX_INIT_CB) && !defined(JEMALLOC_ZONE) \ + && !defined(_WIN32)) + /* LinuxThreads's pthread_atfork() allocates. */ + if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent, + jemalloc_postfork_child) != 0) { + malloc_write("<jemalloc>: Error in pthread_atfork()\n"); + if (opt_abort) + abort(); + } +#endif + + /* Done recursively allocating. */ + /**********************************************************************/ + malloc_mutex_lock(&init_lock); + + if (mutex_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + if (opt_narenas == 0) { + /* + * For SMP systems, create more than one arena per CPU by + * default. + */ + if (ncpus > 1) + opt_narenas = ncpus << 2; + else + opt_narenas = 1; + } + narenas_auto = opt_narenas; + /* + * Make sure that the arenas array can be allocated. In practice, this + * limit is enough to allow the allocator to function, but the ctl + * machinery will fail to allocate memory at far lower limits. + */ + if (narenas_auto > chunksize / sizeof(arena_t *)) { + narenas_auto = chunksize / sizeof(arena_t *); + malloc_printf("<jemalloc>: Reducing narenas to limit (%d)\n", + narenas_auto); + } + narenas_total = narenas_auto; + + /* Allocate and initialize arenas. */ + arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas_total); + if (arenas == NULL) { + malloc_mutex_unlock(&init_lock); + return (true); + } + /* + * Zero the array. In practice, this should always be pre-zeroed, + * since it was just mmap()ed, but let's be sure. + */ + memset(arenas, 0, sizeof(arena_t *) * narenas_total); + /* Copy the pointer to the one arena that was already initialized. */ + arenas[0] = init_arenas[0]; + + malloc_initialized = true; + malloc_mutex_unlock(&init_lock); + + return (false); +} + +/* + * End initialization functions. + */ +/******************************************************************************/ +/* + * Begin malloc(3)-compatible functions. + */ + +static void * +imalloc_prof_sample(size_t usize, prof_thr_cnt_t *cnt) +{ + void *p; + + if (cnt == NULL) + return (NULL); + if (prof_promote && usize <= SMALL_MAXCLASS) { + p = imalloc(SMALL_MAXCLASS+1); + if (p == NULL) + return (NULL); + arena_prof_promoted(p, usize); + } else + p = imalloc(usize); + + return (p); +} + +JEMALLOC_ALWAYS_INLINE_C void * +imalloc_prof(size_t usize, prof_thr_cnt_t *cnt) +{ + void *p; + + if ((uintptr_t)cnt != (uintptr_t)1U) + p = imalloc_prof_sample(usize, cnt); + else + p = imalloc(usize); + if (p == NULL) + return (NULL); + prof_malloc(p, usize, cnt); + + return (p); +} + +/* + * MALLOC_BODY() is a macro rather than a function because its contents are in + * the fast path, but inlining would cause reliability issues when determining + * how many frames to discard from heap profiling backtraces. + */ +#define MALLOC_BODY(ret, size, usize) do { \ + if (malloc_init()) \ + ret = NULL; \ + else { \ + if (config_prof && opt_prof) { \ + prof_thr_cnt_t *cnt; \ + \ + usize = s2u(size); \ + /* \ + * Call PROF_ALLOC_PREP() here rather than in \ + * imalloc_prof() so that imalloc_prof() can be \ + * inlined without introducing uncertainty \ + * about the number of backtrace frames to \ + * ignore. imalloc_prof() is in the fast path \ + * when heap profiling is enabled, so inlining \ + * is critical to performance. (For \ + * consistency all callers of PROF_ALLOC_PREP() \ + * are structured similarly, even though e.g. \ + * realloc() isn't called enough for inlining \ + * to be critical.) \ + */ \ + PROF_ALLOC_PREP(1, usize, cnt); \ + ret = imalloc_prof(usize, cnt); \ + } else { \ + if (config_stats || (config_valgrind && \ + opt_valgrind)) \ + usize = s2u(size); \ + ret = imalloc(size); \ + } \ + } \ +} while (0) + +void * +je_malloc(size_t size) +{ + void *ret; + size_t usize JEMALLOC_CC_SILENCE_INIT(0); + + if (size == 0) + size = 1; + + MALLOC_BODY(ret, size, usize); + + if (ret == NULL) { + if (config_xmalloc && opt_xmalloc) { + malloc_write("<jemalloc>: Error in malloc(): " + "out of memory\n"); + abort(); + } + set_errno(ENOMEM); + } + if (config_stats && ret != NULL) { + assert(usize == isalloc(ret, config_prof)); + thread_allocated_tsd_get()->allocated += usize; + } + UTRACE(0, size, ret); + JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, false); + return (ret); +} + +static void * +imemalign_prof_sample(size_t alignment, size_t usize, prof_thr_cnt_t *cnt) +{ + void *p; + + if (cnt == NULL) + return (NULL); + if (prof_promote && usize <= SMALL_MAXCLASS) { + assert(sa2u(SMALL_MAXCLASS+1, alignment) != 0); + p = ipalloc(sa2u(SMALL_MAXCLASS+1, alignment), alignment, + false); + if (p == NULL) + return (NULL); + arena_prof_promoted(p, usize); + } else + p = ipalloc(usize, alignment, false); + + return (p); +} + +JEMALLOC_ALWAYS_INLINE_C void * +imemalign_prof(size_t alignment, size_t usize, prof_thr_cnt_t *cnt) +{ + void *p; + + if ((uintptr_t)cnt != (uintptr_t)1U) + p = imemalign_prof_sample(alignment, usize, cnt); + else + p = ipalloc(usize, alignment, false); + if (p == NULL) + return (NULL); + prof_malloc(p, usize, cnt); + + return (p); +} + +JEMALLOC_ATTR(nonnull(1)) +#ifdef JEMALLOC_PROF +/* + * Avoid any uncertainty as to how many backtrace frames to ignore in + * PROF_ALLOC_PREP(). + */ +JEMALLOC_NOINLINE +#endif +static int +imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment) +{ + int ret; + size_t usize; + void *result; + + assert(min_alignment != 0); + + if (malloc_init()) { + result = NULL; + goto label_oom; + } else { + if (size == 0) + size = 1; + + /* Make sure that alignment is a large enough power of 2. */ + if (((alignment - 1) & alignment) != 0 + || (alignment < min_alignment)) { + if (config_xmalloc && opt_xmalloc) { + malloc_write("<jemalloc>: Error allocating " + "aligned memory: invalid alignment\n"); + abort(); + } + result = NULL; + ret = EINVAL; + goto label_return; + } + + usize = sa2u(size, alignment); + if (usize == 0) { + result = NULL; + goto label_oom; + } + + if (config_prof && opt_prof) { + prof_thr_cnt_t *cnt; + + PROF_ALLOC_PREP(2, usize, cnt); + result = imemalign_prof(alignment, usize, cnt); + } else + result = ipalloc(usize, alignment, false); + if (result == NULL) + goto label_oom; + } + + *memptr = result; + ret = 0; +label_return: + if (config_stats && result != NULL) { + assert(usize == isalloc(result, config_prof)); + thread_allocated_tsd_get()->allocated += usize; + } + UTRACE(0, size, result); + return (ret); +label_oom: + assert(result == NULL); + if (config_xmalloc && opt_xmalloc) { + malloc_write("<jemalloc>: Error allocating aligned memory: " + "out of memory\n"); + abort(); + } + ret = ENOMEM; + goto label_return; +} + +int +je_posix_memalign(void **memptr, size_t alignment, size_t size) +{ + int ret = imemalign(memptr, alignment, size, sizeof(void *)); + JEMALLOC_VALGRIND_MALLOC(ret == 0, *memptr, isalloc(*memptr, + config_prof), false); + return (ret); +} + +void * +je_aligned_alloc(size_t alignment, size_t size) +{ + void *ret; + int err; + + if ((err = imemalign(&ret, alignment, size, 1)) != 0) { + ret = NULL; + set_errno(err); + } + JEMALLOC_VALGRIND_MALLOC(err == 0, ret, isalloc(ret, config_prof), + false); + return (ret); +} + +static void * +icalloc_prof_sample(size_t usize, prof_thr_cnt_t *cnt) +{ + void *p; + + if (cnt == NULL) + return (NULL); + if (prof_promote && usize <= SMALL_MAXCLASS) { + p = icalloc(SMALL_MAXCLASS+1); + if (p == NULL) + return (NULL); + arena_prof_promoted(p, usize); + } else + p = icalloc(usize); + + return (p); +} + +JEMALLOC_ALWAYS_INLINE_C void * +icalloc_prof(size_t usize, prof_thr_cnt_t *cnt) +{ + void *p; + + if ((uintptr_t)cnt != (uintptr_t)1U) + p = icalloc_prof_sample(usize, cnt); + else + p = icalloc(usize); + if (p == NULL) + return (NULL); + prof_malloc(p, usize, cnt); + + return (p); +} + +void * +je_calloc(size_t num, size_t size) +{ + void *ret; + size_t num_size; + size_t usize JEMALLOC_CC_SILENCE_INIT(0); + + if (malloc_init()) { + num_size = 0; + ret = NULL; + goto label_return; + } + + num_size = num * size; + if (num_size == 0) { + if (num == 0 || size == 0) + num_size = 1; + else { + ret = NULL; + goto label_return; + } + /* + * Try to avoid division here. We know that it isn't possible to + * overflow during multiplication if neither operand uses any of the + * most significant half of the bits in a size_t. + */ + } else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2))) + && (num_size / size != num)) { + /* size_t overflow. */ + ret = NULL; + goto label_return; + } + + if (config_prof && opt_prof) { + prof_thr_cnt_t *cnt; + + usize = s2u(num_size); + PROF_ALLOC_PREP(1, usize, cnt); + ret = icalloc_prof(usize, cnt); + } else { + if (config_stats || (config_valgrind && opt_valgrind)) + usize = s2u(num_size); + ret = icalloc(num_size); + } + +label_return: + if (ret == NULL) { + if (config_xmalloc && opt_xmalloc) { + malloc_write("<jemalloc>: Error in calloc(): out of " + "memory\n"); + abort(); + } + set_errno(ENOMEM); + } + if (config_stats && ret != NULL) { + assert(usize == isalloc(ret, config_prof)); + thread_allocated_tsd_get()->allocated += usize; + } + UTRACE(0, num_size, ret); + JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, true); + return (ret); +} + +static void * +irealloc_prof_sample(void *oldptr, size_t usize, prof_thr_cnt_t *cnt) +{ + void *p; + + if (cnt == NULL) + return (NULL); + if (prof_promote && usize <= SMALL_MAXCLASS) { + p = iralloc(oldptr, SMALL_MAXCLASS+1, 0, 0, false); + if (p == NULL) + return (NULL); + arena_prof_promoted(p, usize); + } else + p = iralloc(oldptr, usize, 0, 0, false); + + return (p); +} + +JEMALLOC_ALWAYS_INLINE_C void * +irealloc_prof(void *oldptr, size_t old_usize, size_t usize, prof_thr_cnt_t *cnt) +{ + void *p; + prof_ctx_t *old_ctx; + + old_ctx = prof_ctx_get(oldptr); + if ((uintptr_t)cnt != (uintptr_t)1U) + p = irealloc_prof_sample(oldptr, usize, cnt); + else + p = iralloc(oldptr, usize, 0, 0, false); + if (p == NULL) + return (NULL); + prof_realloc(p, usize, cnt, old_usize, old_ctx); + + return (p); +} + +JEMALLOC_INLINE_C void +ifree(void *ptr) +{ + size_t usize; + UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); + + assert(ptr != NULL); + assert(malloc_initialized || IS_INITIALIZER); + + if (config_prof && opt_prof) { + usize = isalloc(ptr, config_prof); + prof_free(ptr, usize); + } else if (config_stats || config_valgrind) + usize = isalloc(ptr, config_prof); + if (config_stats) + thread_allocated_tsd_get()->deallocated += usize; + if (config_valgrind && opt_valgrind) + rzsize = p2rz(ptr); + iqalloc(ptr); + JEMALLOC_VALGRIND_FREE(ptr, rzsize); +} + +void * +je_realloc(void *ptr, size_t size) +{ + void *ret; + size_t usize JEMALLOC_CC_SILENCE_INIT(0); + size_t old_usize = 0; + UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); + + if (size == 0) { + if (ptr != NULL) { + /* realloc(ptr, 0) is equivalent to free(ptr). */ + UTRACE(ptr, 0, 0); + ifree(ptr); + return (NULL); + } + size = 1; + } + + if (ptr != NULL) { + assert(malloc_initialized || IS_INITIALIZER); + malloc_thread_init(); + + if ((config_prof && opt_prof) || config_stats || + (config_valgrind && opt_valgrind)) + old_usize = isalloc(ptr, config_prof); + if (config_valgrind && opt_valgrind) + old_rzsize = config_prof ? p2rz(ptr) : u2rz(old_usize); + + if (config_prof && opt_prof) { + prof_thr_cnt_t *cnt; + + usize = s2u(size); + PROF_ALLOC_PREP(1, usize, cnt); + ret = irealloc_prof(ptr, old_usize, usize, cnt); + } else { + if (config_stats || (config_valgrind && opt_valgrind)) + usize = s2u(size); + ret = iralloc(ptr, size, 0, 0, false); + } + } else { + /* realloc(NULL, size) is equivalent to malloc(size). */ + MALLOC_BODY(ret, size, usize); + } + + if (ret == NULL) { + if (config_xmalloc && opt_xmalloc) { + malloc_write("<jemalloc>: Error in realloc(): " + "out of memory\n"); + abort(); + } + set_errno(ENOMEM); + } + if (config_stats && ret != NULL) { + thread_allocated_t *ta; + assert(usize == isalloc(ret, config_prof)); + ta = thread_allocated_tsd_get(); + ta->allocated += usize; + ta->deallocated += old_usize; + } + UTRACE(ptr, size, ret); + JEMALLOC_VALGRIND_REALLOC(ret, usize, ptr, old_usize, old_rzsize, + false); + return (ret); +} + +void +je_free(void *ptr) +{ + + UTRACE(ptr, 0, 0); + if (ptr != NULL) + ifree(ptr); +} + +/* + * End malloc(3)-compatible functions. + */ +/******************************************************************************/ +/* + * Begin non-standard override functions. + */ + +#ifdef JEMALLOC_OVERRIDE_MEMALIGN +void * +je_memalign(size_t alignment, size_t size) +{ + void *ret JEMALLOC_CC_SILENCE_INIT(NULL); + imemalign(&ret, alignment, size, 1); + JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false); + return (ret); +} +#endif + +#ifdef JEMALLOC_OVERRIDE_VALLOC +void * +je_valloc(size_t size) +{ + void *ret JEMALLOC_CC_SILENCE_INIT(NULL); + imemalign(&ret, PAGE, size, 1); + JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false); + return (ret); +} +#endif + +/* + * is_malloc(je_malloc) is some macro magic to detect if jemalloc_defs.h has + * #define je_malloc malloc + */ +#define malloc_is_malloc 1 +#define is_malloc_(a) malloc_is_ ## a +#define is_malloc(a) is_malloc_(a) + +#if ((is_malloc(je_malloc) == 1) && defined(__GLIBC__) && !defined(__UCLIBC__)) +/* + * glibc provides the RTLD_DEEPBIND flag for dlopen which can make it possible + * to inconsistently reference libc's malloc(3)-compatible functions + * (https://bugzilla.mozilla.org/show_bug.cgi?id=493541). + * + * These definitions interpose hooks in glibc. The functions are actually + * passed an extra argument for the caller return address, which will be + * ignored. + */ +JEMALLOC_EXPORT void (* __free_hook)(void *ptr) = je_free; +JEMALLOC_EXPORT void *(* __malloc_hook)(size_t size) = je_malloc; +JEMALLOC_EXPORT void *(* __realloc_hook)(void *ptr, size_t size) = je_realloc; +JEMALLOC_EXPORT void *(* __memalign_hook)(size_t alignment, size_t size) = + je_memalign; +#endif + +/* + * End non-standard override functions. + */ +/******************************************************************************/ +/* + * Begin non-standard functions. + */ + +JEMALLOC_ALWAYS_INLINE_C void * +imallocx(size_t usize, size_t alignment, bool zero, bool try_tcache, + arena_t *arena) +{ + + assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize, + alignment))); + + if (alignment != 0) + return (ipalloct(usize, alignment, zero, try_tcache, arena)); + else if (zero) + return (icalloct(usize, try_tcache, arena)); + else + return (imalloct(usize, try_tcache, arena)); +} + +static void * +imallocx_prof_sample(size_t usize, size_t alignment, bool zero, bool try_tcache, + arena_t *arena, prof_thr_cnt_t *cnt) +{ + void *p; + + if (cnt == NULL) + return (NULL); + if (prof_promote && usize <= SMALL_MAXCLASS) { + size_t usize_promoted = (alignment == 0) ? + s2u(SMALL_MAXCLASS+1) : sa2u(SMALL_MAXCLASS+1, alignment); + assert(usize_promoted != 0); + p = imallocx(usize_promoted, alignment, zero, try_tcache, + arena); + if (p == NULL) + return (NULL); + arena_prof_promoted(p, usize); + } else + p = imallocx(usize, alignment, zero, try_tcache, arena); + + return (p); +} + +JEMALLOC_ALWAYS_INLINE_C void * +imallocx_prof(size_t usize, size_t alignment, bool zero, bool try_tcache, + arena_t *arena, prof_thr_cnt_t *cnt) +{ + void *p; + + if ((uintptr_t)cnt != (uintptr_t)1U) { + p = imallocx_prof_sample(usize, alignment, zero, try_tcache, + arena, cnt); + } else + p = imallocx(usize, alignment, zero, try_tcache, arena); + if (p == NULL) + return (NULL); + prof_malloc(p, usize, cnt); + + return (p); +} + +void * +je_mallocx(size_t size, int flags) +{ + void *p; + size_t usize; + size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK) + & (SIZE_T_MAX-1)); + bool zero = flags & MALLOCX_ZERO; + unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; + arena_t *arena; + bool try_tcache; + + assert(size != 0); + + if (malloc_init()) + goto label_oom; + + if (arena_ind != UINT_MAX) { + arena = arenas[arena_ind]; + try_tcache = false; + } else { + arena = NULL; + try_tcache = true; + } + + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); + assert(usize != 0); + + if (config_prof && opt_prof) { + prof_thr_cnt_t *cnt; + + PROF_ALLOC_PREP(1, usize, cnt); + p = imallocx_prof(usize, alignment, zero, try_tcache, arena, + cnt); + } else + p = imallocx(usize, alignment, zero, try_tcache, arena); + if (p == NULL) + goto label_oom; + + if (config_stats) { + assert(usize == isalloc(p, config_prof)); + thread_allocated_tsd_get()->allocated += usize; + } + UTRACE(0, size, p); + JEMALLOC_VALGRIND_MALLOC(true, p, usize, zero); + return (p); +label_oom: + if (config_xmalloc && opt_xmalloc) { + malloc_write("<jemalloc>: Error in mallocx(): out of memory\n"); + abort(); + } + UTRACE(0, size, 0); + return (NULL); +} + +static void * +irallocx_prof_sample(void *oldptr, size_t size, size_t alignment, size_t usize, + bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena, + prof_thr_cnt_t *cnt) +{ + void *p; + + if (cnt == NULL) + return (NULL); + if (prof_promote && usize <= SMALL_MAXCLASS) { + p = iralloct(oldptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= + size) ? 0 : size - (SMALL_MAXCLASS+1), alignment, zero, + try_tcache_alloc, try_tcache_dalloc, arena); + if (p == NULL) + return (NULL); + arena_prof_promoted(p, usize); + } else { + p = iralloct(oldptr, size, 0, alignment, zero, + try_tcache_alloc, try_tcache_dalloc, arena); + } + + return (p); +} + +JEMALLOC_ALWAYS_INLINE_C void * +irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment, + size_t *usize, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, + arena_t *arena, prof_thr_cnt_t *cnt) +{ + void *p; + prof_ctx_t *old_ctx; + + old_ctx = prof_ctx_get(oldptr); + if ((uintptr_t)cnt != (uintptr_t)1U) + p = irallocx_prof_sample(oldptr, size, alignment, *usize, zero, + try_tcache_alloc, try_tcache_dalloc, arena, cnt); + else { + p = iralloct(oldptr, size, 0, alignment, zero, + try_tcache_alloc, try_tcache_dalloc, arena); + } + if (p == NULL) + return (NULL); + + if (p == oldptr && alignment != 0) { + /* + * The allocation did not move, so it is possible that the size + * class is smaller than would guarantee the requested + * alignment, and that the alignment constraint was + * serendipitously satisfied. Additionally, old_usize may not + * be the same as the current usize because of in-place large + * reallocation. Therefore, query the actual value of usize. + */ + *usize = isalloc(p, config_prof); + } + prof_realloc(p, *usize, cnt, old_usize, old_ctx); + + return (p); +} + +void * +je_rallocx(void *ptr, size_t size, int flags) +{ + void *p; + size_t usize, old_usize; + UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); + size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK) + & (SIZE_T_MAX-1)); + bool zero = flags & MALLOCX_ZERO; + unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; + bool try_tcache_alloc, try_tcache_dalloc; + arena_t *arena; + + assert(ptr != NULL); + assert(size != 0); + assert(malloc_initialized || IS_INITIALIZER); + malloc_thread_init(); + + if (arena_ind != UINT_MAX) { + arena_chunk_t *chunk; + try_tcache_alloc = false; + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + try_tcache_dalloc = (chunk == ptr || chunk->arena != + arenas[arena_ind]); + arena = arenas[arena_ind]; + } else { + try_tcache_alloc = true; + try_tcache_dalloc = true; + arena = NULL; + } + + if ((config_prof && opt_prof) || config_stats || + (config_valgrind && opt_valgrind)) + old_usize = isalloc(ptr, config_prof); + if (config_valgrind && opt_valgrind) + old_rzsize = u2rz(old_usize); + + if (config_prof && opt_prof) { + prof_thr_cnt_t *cnt; + + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); + assert(usize != 0); + PROF_ALLOC_PREP(1, usize, cnt); + p = irallocx_prof(ptr, old_usize, size, alignment, &usize, zero, + try_tcache_alloc, try_tcache_dalloc, arena, cnt); + if (p == NULL) + goto label_oom; + } else { + p = iralloct(ptr, size, 0, alignment, zero, try_tcache_alloc, + try_tcache_dalloc, arena); + if (p == NULL) + goto label_oom; + if (config_stats || (config_valgrind && opt_valgrind)) + usize = isalloc(p, config_prof); + } + + if (config_stats) { + thread_allocated_t *ta; + ta = thread_allocated_tsd_get(); + ta->allocated += usize; + ta->deallocated += old_usize; + } + UTRACE(ptr, size, p); + JEMALLOC_VALGRIND_REALLOC(p, usize, ptr, old_usize, old_rzsize, zero); + return (p); +label_oom: + if (config_xmalloc && opt_xmalloc) { + malloc_write("<jemalloc>: Error in rallocx(): out of memory\n"); + abort(); + } + UTRACE(ptr, size, 0); + return (NULL); +} + +JEMALLOC_ALWAYS_INLINE_C size_t +ixallocx_helper(void *ptr, size_t old_usize, size_t size, size_t extra, + size_t alignment, bool zero, arena_t *arena) +{ + size_t usize; + + if (ixalloc(ptr, size, extra, alignment, zero)) + return (old_usize); + usize = isalloc(ptr, config_prof); + + return (usize); +} + +static size_t +ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra, + size_t alignment, size_t max_usize, bool zero, arena_t *arena, + prof_thr_cnt_t *cnt) +{ + size_t usize; + + if (cnt == NULL) + return (old_usize); + /* Use minimum usize to determine whether promotion may happen. */ + if (prof_promote && ((alignment == 0) ? s2u(size) : sa2u(size, + alignment)) <= SMALL_MAXCLASS) { + if (ixalloc(ptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= + size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1), + alignment, zero)) + return (old_usize); + usize = isalloc(ptr, config_prof); + if (max_usize < PAGE) + arena_prof_promoted(ptr, usize); + } else { + usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, + zero, arena); + } + + return (usize); +} + +JEMALLOC_ALWAYS_INLINE_C size_t +ixallocx_prof(void *ptr, size_t old_usize, size_t size, size_t extra, + size_t alignment, size_t max_usize, bool zero, arena_t *arena, + prof_thr_cnt_t *cnt) +{ + size_t usize; + prof_ctx_t *old_ctx; + + old_ctx = prof_ctx_get(ptr); + if ((uintptr_t)cnt != (uintptr_t)1U) { + usize = ixallocx_prof_sample(ptr, old_usize, size, extra, + alignment, zero, max_usize, arena, cnt); + } else { + usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, + zero, arena); + } + if (usize == old_usize) + return (usize); + prof_realloc(ptr, usize, cnt, old_usize, old_ctx); + + return (usize); +} + +size_t +je_xallocx(void *ptr, size_t size, size_t extra, int flags) +{ + size_t usize, old_usize; + UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); + size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK) + & (SIZE_T_MAX-1)); + bool zero = flags & MALLOCX_ZERO; + unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; + arena_t *arena; + + assert(ptr != NULL); + assert(size != 0); + assert(SIZE_T_MAX - size >= extra); + assert(malloc_initialized || IS_INITIALIZER); + malloc_thread_init(); + + if (arena_ind != UINT_MAX) + arena = arenas[arena_ind]; + else + arena = NULL; + + old_usize = isalloc(ptr, config_prof); + if (config_valgrind && opt_valgrind) + old_rzsize = u2rz(old_usize); + + if (config_prof && opt_prof) { + prof_thr_cnt_t *cnt; + /* + * usize isn't knowable before ixalloc() returns when extra is + * non-zero. Therefore, compute its maximum possible value and + * use that in PROF_ALLOC_PREP() to decide whether to capture a + * backtrace. prof_realloc() will use the actual usize to + * decide whether to sample. + */ + size_t max_usize = (alignment == 0) ? s2u(size+extra) : + sa2u(size+extra, alignment); + PROF_ALLOC_PREP(1, max_usize, cnt); + usize = ixallocx_prof(ptr, old_usize, size, extra, alignment, + max_usize, zero, arena, cnt); + } else { + usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, + zero, arena); + } + if (usize == old_usize) + goto label_not_resized; + + if (config_stats) { + thread_allocated_t *ta; + ta = thread_allocated_tsd_get(); + ta->allocated += usize; + ta->deallocated += old_usize; + } + JEMALLOC_VALGRIND_REALLOC(ptr, usize, ptr, old_usize, old_rzsize, zero); +label_not_resized: + UTRACE(ptr, size, ptr); + return (usize); +} + +size_t +je_sallocx(const void *ptr, int flags) +{ + size_t usize; + + assert(malloc_initialized || IS_INITIALIZER); + malloc_thread_init(); + + if (config_ivsalloc) + usize = ivsalloc(ptr, config_prof); + else { + assert(ptr != NULL); + usize = isalloc(ptr, config_prof); + } + + return (usize); +} + +void +je_dallocx(void *ptr, int flags) +{ + size_t usize; + UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); + unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; + bool try_tcache; + + assert(ptr != NULL); + assert(malloc_initialized || IS_INITIALIZER); + + if (arena_ind != UINT_MAX) { + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + try_tcache = (chunk == ptr || chunk->arena != + arenas[arena_ind]); + } else + try_tcache = true; + + UTRACE(ptr, 0, 0); + if (config_stats || config_valgrind) + usize = isalloc(ptr, config_prof); + if (config_prof && opt_prof) { + if (config_stats == false && config_valgrind == false) + usize = isalloc(ptr, config_prof); + prof_free(ptr, usize); + } + if (config_stats) + thread_allocated_tsd_get()->deallocated += usize; + if (config_valgrind && opt_valgrind) + rzsize = p2rz(ptr); + iqalloct(ptr, try_tcache); + JEMALLOC_VALGRIND_FREE(ptr, rzsize); +} + +size_t +je_nallocx(size_t size, int flags) +{ + size_t usize; + size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK) + & (SIZE_T_MAX-1)); + + assert(size != 0); + + if (malloc_init()) + return (0); + + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); + assert(usize != 0); + return (usize); +} + +int +je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, + size_t newlen) +{ + + if (malloc_init()) + return (EAGAIN); + + return (ctl_byname(name, oldp, oldlenp, newp, newlen)); +} + +int +je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) +{ + + if (malloc_init()) + return (EAGAIN); + + return (ctl_nametomib(name, mibp, miblenp)); +} + +int +je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + + if (malloc_init()) + return (EAGAIN); + + return (ctl_bymib(mib, miblen, oldp, oldlenp, newp, newlen)); +} + +void +je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque, + const char *opts) +{ + + stats_print(write_cb, cbopaque, opts); +} + +size_t +je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) +{ + size_t ret; + + assert(malloc_initialized || IS_INITIALIZER); + malloc_thread_init(); + + if (config_ivsalloc) + ret = ivsalloc(ptr, config_prof); + else + ret = (ptr != NULL) ? isalloc(ptr, config_prof) : 0; + + return (ret); +} + +/* + * End non-standard functions. + */ +/******************************************************************************/ +/* + * Begin experimental functions. + */ +#ifdef JEMALLOC_EXPERIMENTAL + +int +je_allocm(void **ptr, size_t *rsize, size_t size, int flags) +{ + void *p; + + assert(ptr != NULL); + + p = je_mallocx(size, flags); + if (p == NULL) + return (ALLOCM_ERR_OOM); + if (rsize != NULL) + *rsize = isalloc(p, config_prof); + *ptr = p; + return (ALLOCM_SUCCESS); +} + +int +je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) +{ + int ret; + bool no_move = flags & ALLOCM_NO_MOVE; + + assert(ptr != NULL); + assert(*ptr != NULL); + assert(size != 0); + assert(SIZE_T_MAX - size >= extra); + + if (no_move) { + size_t usize = je_xallocx(*ptr, size, extra, flags); + ret = (usize >= size) ? ALLOCM_SUCCESS : ALLOCM_ERR_NOT_MOVED; + if (rsize != NULL) + *rsize = usize; + } else { + void *p = je_rallocx(*ptr, size+extra, flags); + if (p != NULL) { + *ptr = p; + ret = ALLOCM_SUCCESS; + } else + ret = ALLOCM_ERR_OOM; + if (rsize != NULL) + *rsize = isalloc(*ptr, config_prof); + } + return (ret); +} + +int +je_sallocm(const void *ptr, size_t *rsize, int flags) +{ + + assert(rsize != NULL); + *rsize = je_sallocx(ptr, flags); + return (ALLOCM_SUCCESS); +} + +int +je_dallocm(void *ptr, int flags) +{ + + je_dallocx(ptr, flags); + return (ALLOCM_SUCCESS); +} + +int +je_nallocm(size_t *rsize, size_t size, int flags) +{ + size_t usize; + + usize = je_nallocx(size, flags); + if (usize == 0) + return (ALLOCM_ERR_OOM); + if (rsize != NULL) + *rsize = usize; + return (ALLOCM_SUCCESS); +} + +#endif +/* + * End experimental functions. + */ +/******************************************************************************/ +/* + * The following functions are used by threading libraries for protection of + * malloc during fork(). + */ + +/* + * If an application creates a thread before doing any allocation in the main + * thread, then calls fork(2) in the main thread followed by memory allocation + * in the child process, a race can occur that results in deadlock within the + * child: the main thread may have forked while the created thread had + * partially initialized the allocator. Ordinarily jemalloc prevents + * fork/malloc races via the following functions it registers during + * initialization using pthread_atfork(), but of course that does no good if + * the allocator isn't fully initialized at fork time. The following library + * constructor is a partial solution to this problem. It may still possible to + * trigger the deadlock described above, but doing so would involve forking via + * a library constructor that runs before jemalloc's runs. + */ +JEMALLOC_ATTR(constructor) +static void +jemalloc_constructor(void) +{ + + malloc_init(); +} + +#ifndef JEMALLOC_MUTEX_INIT_CB +void +jemalloc_prefork(void) +#else +JEMALLOC_EXPORT void +_malloc_prefork(void) +#endif +{ + unsigned i; + +#ifdef JEMALLOC_MUTEX_INIT_CB + if (malloc_initialized == false) + return; +#endif + assert(malloc_initialized); + + /* Acquire all mutexes in a safe order. */ + ctl_prefork(); + prof_prefork(); + malloc_mutex_prefork(&arenas_lock); + for (i = 0; i < narenas_total; i++) { + if (arenas[i] != NULL) + arena_prefork(arenas[i]); + } + chunk_prefork(); + base_prefork(); + huge_prefork(); +} + +#ifndef JEMALLOC_MUTEX_INIT_CB +void +jemalloc_postfork_parent(void) +#else +JEMALLOC_EXPORT void +_malloc_postfork(void) +#endif +{ + unsigned i; + +#ifdef JEMALLOC_MUTEX_INIT_CB + if (malloc_initialized == false) + return; +#endif + assert(malloc_initialized); + + /* Release all mutexes, now that fork() has completed. */ + huge_postfork_parent(); + base_postfork_parent(); + chunk_postfork_parent(); + for (i = 0; i < narenas_total; i++) { + if (arenas[i] != NULL) + arena_postfork_parent(arenas[i]); + } + malloc_mutex_postfork_parent(&arenas_lock); + prof_postfork_parent(); + ctl_postfork_parent(); +} + +void +jemalloc_postfork_child(void) +{ + unsigned i; + + assert(malloc_initialized); + + /* Release all mutexes, now that fork() has completed. */ + huge_postfork_child(); + base_postfork_child(); + chunk_postfork_child(); + for (i = 0; i < narenas_total; i++) { + if (arenas[i] != NULL) + arena_postfork_child(arenas[i]); + } + malloc_mutex_postfork_child(&arenas_lock); + prof_postfork_child(); + ctl_postfork_child(); +} + +/******************************************************************************/ +/* + * The following functions are used for TLS allocation/deallocation in static + * binaries on FreeBSD. The primary difference between these and i[mcd]alloc() + * is that these avoid accessing TLS variables. + */ + +static void * +a0alloc(size_t size, bool zero) +{ + + if (malloc_init()) + return (NULL); + + if (size == 0) + size = 1; + + if (size <= arena_maxclass) + return (arena_malloc(arenas[0], size, zero, false)); + else + return (huge_malloc(size, zero, huge_dss_prec_get(arenas[0]))); +} + +void * +a0malloc(size_t size) +{ + + return (a0alloc(size, false)); +} + +void * +a0calloc(size_t num, size_t size) +{ + + return (a0alloc(num * size, true)); +} + +void +a0free(void *ptr) +{ + arena_chunk_t *chunk; + + if (ptr == NULL) + return; + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk != ptr) + arena_dalloc(chunk->arena, chunk, ptr, false); + else + huge_dalloc(ptr, true); +} + +/******************************************************************************/ diff --git a/deps/jemalloc/src/mb.c b/deps/jemalloc/src/mb.c new file mode 100644 index 0000000000..dc2c0a256f --- /dev/null +++ b/deps/jemalloc/src/mb.c @@ -0,0 +1,2 @@ +#define JEMALLOC_MB_C_ +#include "jemalloc/internal/jemalloc_internal.h" diff --git a/deps/jemalloc/src/mutex.c b/deps/jemalloc/src/mutex.c new file mode 100644 index 0000000000..788eca3870 --- /dev/null +++ b/deps/jemalloc/src/mutex.c @@ -0,0 +1,149 @@ +#define JEMALLOC_MUTEX_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32) +#include <dlfcn.h> +#endif + +#ifndef _CRT_SPINCOUNT +#define _CRT_SPINCOUNT 4000 +#endif + +/******************************************************************************/ +/* Data. */ + +#ifdef JEMALLOC_LAZY_LOCK +bool isthreaded = false; +#endif +#ifdef JEMALLOC_MUTEX_INIT_CB +static bool postpone_init = true; +static malloc_mutex_t *postponed_mutexes = NULL; +#endif + +#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32) +static void pthread_create_once(void); +#endif + +/******************************************************************************/ +/* + * We intercept pthread_create() calls in order to toggle isthreaded if the + * process goes multi-threaded. + */ + +#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32) +static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *, + void *(*)(void *), void *__restrict); + +static void +pthread_create_once(void) +{ + + pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create"); + if (pthread_create_fptr == NULL) { + malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, " + "\"pthread_create\")\n"); + abort(); + } + + isthreaded = true; +} + +JEMALLOC_EXPORT int +pthread_create(pthread_t *__restrict thread, + const pthread_attr_t *__restrict attr, void *(*start_routine)(void *), + void *__restrict arg) +{ + static pthread_once_t once_control = PTHREAD_ONCE_INIT; + + pthread_once(&once_control, pthread_create_once); + + return (pthread_create_fptr(thread, attr, start_routine, arg)); +} +#endif + +/******************************************************************************/ + +#ifdef JEMALLOC_MUTEX_INIT_CB +JEMALLOC_EXPORT int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex, + void *(calloc_cb)(size_t, size_t)); +#endif + +bool +malloc_mutex_init(malloc_mutex_t *mutex) +{ + +#ifdef _WIN32 + if (!InitializeCriticalSectionAndSpinCount(&mutex->lock, + _CRT_SPINCOUNT)) + return (true); +#elif (defined(JEMALLOC_OSSPIN)) + mutex->lock = 0; +#elif (defined(JEMALLOC_MUTEX_INIT_CB)) + if (postpone_init) { + mutex->postponed_next = postponed_mutexes; + postponed_mutexes = mutex; + } else { + if (_pthread_mutex_init_calloc_cb(&mutex->lock, base_calloc) != + 0) + return (true); + } +#else + pthread_mutexattr_t attr; + + if (pthread_mutexattr_init(&attr) != 0) + return (true); + pthread_mutexattr_settype(&attr, MALLOC_MUTEX_TYPE); + if (pthread_mutex_init(&mutex->lock, &attr) != 0) { + pthread_mutexattr_destroy(&attr); + return (true); + } + pthread_mutexattr_destroy(&attr); +#endif + return (false); +} + +void +malloc_mutex_prefork(malloc_mutex_t *mutex) +{ + + malloc_mutex_lock(mutex); +} + +void +malloc_mutex_postfork_parent(malloc_mutex_t *mutex) +{ + + malloc_mutex_unlock(mutex); +} + +void +malloc_mutex_postfork_child(malloc_mutex_t *mutex) +{ + +#ifdef JEMALLOC_MUTEX_INIT_CB + malloc_mutex_unlock(mutex); +#else + if (malloc_mutex_init(mutex)) { + malloc_printf("<jemalloc>: Error re-initializing mutex in " + "child\n"); + if (opt_abort) + abort(); + } +#endif +} + +bool +mutex_boot(void) +{ + +#ifdef JEMALLOC_MUTEX_INIT_CB + postpone_init = false; + while (postponed_mutexes != NULL) { + if (_pthread_mutex_init_calloc_cb(&postponed_mutexes->lock, + base_calloc) != 0) + return (true); + postponed_mutexes = postponed_mutexes->postponed_next; + } +#endif + return (false); +} diff --git a/deps/jemalloc/src/prof.c b/deps/jemalloc/src/prof.c new file mode 100644 index 0000000000..7722b7b437 --- /dev/null +++ b/deps/jemalloc/src/prof.c @@ -0,0 +1,1420 @@ +#define JEMALLOC_PROF_C_ +#include "jemalloc/internal/jemalloc_internal.h" +/******************************************************************************/ + +#ifdef JEMALLOC_PROF_LIBUNWIND +#define UNW_LOCAL_ONLY +#include <libunwind.h> +#endif + +#ifdef JEMALLOC_PROF_LIBGCC +#include <unwind.h> +#endif + +/******************************************************************************/ +/* Data. */ + +malloc_tsd_data(, prof_tdata, prof_tdata_t *, NULL) + +bool opt_prof = false; +bool opt_prof_active = true; +size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT; +ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT; +bool opt_prof_gdump = false; +bool opt_prof_final = true; +bool opt_prof_leak = false; +bool opt_prof_accum = false; +char opt_prof_prefix[ + /* Minimize memory bloat for non-prof builds. */ +#ifdef JEMALLOC_PROF + PATH_MAX + +#endif + 1]; + +uint64_t prof_interval = 0; +bool prof_promote; + +/* + * Table of mutexes that are shared among ctx's. These are leaf locks, so + * there is no problem with using them for more than one ctx at the same time. + * The primary motivation for this sharing though is that ctx's are ephemeral, + * and destroying mutexes causes complications for systems that allocate when + * creating/destroying mutexes. + */ +static malloc_mutex_t *ctx_locks; +static unsigned cum_ctxs; /* Atomic counter. */ + +/* + * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data + * structure that knows about all backtraces currently captured. + */ +static ckh_t bt2ctx; +static malloc_mutex_t bt2ctx_mtx; + +static malloc_mutex_t prof_dump_seq_mtx; +static uint64_t prof_dump_seq; +static uint64_t prof_dump_iseq; +static uint64_t prof_dump_mseq; +static uint64_t prof_dump_useq; + +/* + * This buffer is rather large for stack allocation, so use a single buffer for + * all profile dumps. + */ +static malloc_mutex_t prof_dump_mtx; +static char prof_dump_buf[ + /* Minimize memory bloat for non-prof builds. */ +#ifdef JEMALLOC_PROF + PROF_DUMP_BUFSIZE +#else + 1 +#endif +]; +static unsigned prof_dump_buf_end; +static int prof_dump_fd; + +/* Do not dump any profiles until bootstrapping is complete. */ +static bool prof_booted = false; + +/******************************************************************************/ + +void +bt_init(prof_bt_t *bt, void **vec) +{ + + cassert(config_prof); + + bt->vec = vec; + bt->len = 0; +} + +static void +bt_destroy(prof_bt_t *bt) +{ + + cassert(config_prof); + + idalloc(bt); +} + +static prof_bt_t * +bt_dup(prof_bt_t *bt) +{ + prof_bt_t *ret; + + cassert(config_prof); + + /* + * Create a single allocation that has space for vec immediately + * following the prof_bt_t structure. The backtraces that get + * stored in the backtrace caches are copied from stack-allocated + * temporary variables, so size is known at creation time. Making this + * a contiguous object improves cache locality. + */ + ret = (prof_bt_t *)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t)) + + (bt->len * sizeof(void *))); + if (ret == NULL) + return (NULL); + ret->vec = (void **)((uintptr_t)ret + + QUANTUM_CEILING(sizeof(prof_bt_t))); + memcpy(ret->vec, bt->vec, bt->len * sizeof(void *)); + ret->len = bt->len; + + return (ret); +} + +static inline void +prof_enter(prof_tdata_t *prof_tdata) +{ + + cassert(config_prof); + + assert(prof_tdata->enq == false); + prof_tdata->enq = true; + + malloc_mutex_lock(&bt2ctx_mtx); +} + +static inline void +prof_leave(prof_tdata_t *prof_tdata) +{ + bool idump, gdump; + + cassert(config_prof); + + malloc_mutex_unlock(&bt2ctx_mtx); + + assert(prof_tdata->enq); + prof_tdata->enq = false; + idump = prof_tdata->enq_idump; + prof_tdata->enq_idump = false; + gdump = prof_tdata->enq_gdump; + prof_tdata->enq_gdump = false; + + if (idump) + prof_idump(); + if (gdump) + prof_gdump(); +} + +#ifdef JEMALLOC_PROF_LIBUNWIND +void +prof_backtrace(prof_bt_t *bt, unsigned nignore) +{ + unw_context_t uc; + unw_cursor_t cursor; + unsigned i; + int err; + + cassert(config_prof); + assert(bt->len == 0); + assert(bt->vec != NULL); + + unw_getcontext(&uc); + unw_init_local(&cursor, &uc); + + /* Throw away (nignore+1) stack frames, if that many exist. */ + for (i = 0; i < nignore + 1; i++) { + err = unw_step(&cursor); + if (err <= 0) + return; + } + + /* + * Iterate over stack frames until there are no more, or until no space + * remains in bt. + */ + for (i = 0; i < PROF_BT_MAX; i++) { + unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]); + bt->len++; + err = unw_step(&cursor); + if (err <= 0) + break; + } +} +#elif (defined(JEMALLOC_PROF_LIBGCC)) +static _Unwind_Reason_Code +prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) +{ + + cassert(config_prof); + + return (_URC_NO_REASON); +} + +static _Unwind_Reason_Code +prof_unwind_callback(struct _Unwind_Context *context, void *arg) +{ + prof_unwind_data_t *data = (prof_unwind_data_t *)arg; + + cassert(config_prof); + + if (data->nignore > 0) + data->nignore--; + else { + data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context); + data->bt->len++; + if (data->bt->len == data->max) + return (_URC_END_OF_STACK); + } + + return (_URC_NO_REASON); +} + +void +prof_backtrace(prof_bt_t *bt, unsigned nignore) +{ + prof_unwind_data_t data = {bt, nignore, PROF_BT_MAX}; + + cassert(config_prof); + + _Unwind_Backtrace(prof_unwind_callback, &data); +} +#elif (defined(JEMALLOC_PROF_GCC)) +void +prof_backtrace(prof_bt_t *bt, unsigned nignore) +{ +#define BT_FRAME(i) \ + if ((i) < nignore + PROF_BT_MAX) { \ + void *p; \ + if (__builtin_frame_address(i) == 0) \ + return; \ + p = __builtin_return_address(i); \ + if (p == NULL) \ + return; \ + if (i >= nignore) { \ + bt->vec[(i) - nignore] = p; \ + bt->len = (i) - nignore + 1; \ + } \ + } else \ + return; + + cassert(config_prof); + assert(nignore <= 3); + + BT_FRAME(0) + BT_FRAME(1) + BT_FRAME(2) + BT_FRAME(3) + BT_FRAME(4) + BT_FRAME(5) + BT_FRAME(6) + BT_FRAME(7) + BT_FRAME(8) + BT_FRAME(9) + + BT_FRAME(10) + BT_FRAME(11) + BT_FRAME(12) + BT_FRAME(13) + BT_FRAME(14) + BT_FRAME(15) + BT_FRAME(16) + BT_FRAME(17) + BT_FRAME(18) + BT_FRAME(19) + + BT_FRAME(20) + BT_FRAME(21) + BT_FRAME(22) + BT_FRAME(23) + BT_FRAME(24) + BT_FRAME(25) + BT_FRAME(26) + BT_FRAME(27) + BT_FRAME(28) + BT_FRAME(29) + + BT_FRAME(30) + BT_FRAME(31) + BT_FRAME(32) + BT_FRAME(33) + BT_FRAME(34) + BT_FRAME(35) + BT_FRAME(36) + BT_FRAME(37) + BT_FRAME(38) + BT_FRAME(39) + + BT_FRAME(40) + BT_FRAME(41) + BT_FRAME(42) + BT_FRAME(43) + BT_FRAME(44) + BT_FRAME(45) + BT_FRAME(46) + BT_FRAME(47) + BT_FRAME(48) + BT_FRAME(49) + + BT_FRAME(50) + BT_FRAME(51) + BT_FRAME(52) + BT_FRAME(53) + BT_FRAME(54) + BT_FRAME(55) + BT_FRAME(56) + BT_FRAME(57) + BT_FRAME(58) + BT_FRAME(59) + + BT_FRAME(60) + BT_FRAME(61) + BT_FRAME(62) + BT_FRAME(63) + BT_FRAME(64) + BT_FRAME(65) + BT_FRAME(66) + BT_FRAME(67) + BT_FRAME(68) + BT_FRAME(69) + + BT_FRAME(70) + BT_FRAME(71) + BT_FRAME(72) + BT_FRAME(73) + BT_FRAME(74) + BT_FRAME(75) + BT_FRAME(76) + BT_FRAME(77) + BT_FRAME(78) + BT_FRAME(79) + + BT_FRAME(80) + BT_FRAME(81) + BT_FRAME(82) + BT_FRAME(83) + BT_FRAME(84) + BT_FRAME(85) + BT_FRAME(86) + BT_FRAME(87) + BT_FRAME(88) + BT_FRAME(89) + + BT_FRAME(90) + BT_FRAME(91) + BT_FRAME(92) + BT_FRAME(93) + BT_FRAME(94) + BT_FRAME(95) + BT_FRAME(96) + BT_FRAME(97) + BT_FRAME(98) + BT_FRAME(99) + + BT_FRAME(100) + BT_FRAME(101) + BT_FRAME(102) + BT_FRAME(103) + BT_FRAME(104) + BT_FRAME(105) + BT_FRAME(106) + BT_FRAME(107) + BT_FRAME(108) + BT_FRAME(109) + + BT_FRAME(110) + BT_FRAME(111) + BT_FRAME(112) + BT_FRAME(113) + BT_FRAME(114) + BT_FRAME(115) + BT_FRAME(116) + BT_FRAME(117) + BT_FRAME(118) + BT_FRAME(119) + + BT_FRAME(120) + BT_FRAME(121) + BT_FRAME(122) + BT_FRAME(123) + BT_FRAME(124) + BT_FRAME(125) + BT_FRAME(126) + BT_FRAME(127) + + /* Extras to compensate for nignore. */ + BT_FRAME(128) + BT_FRAME(129) + BT_FRAME(130) +#undef BT_FRAME +} +#else +void +prof_backtrace(prof_bt_t *bt, unsigned nignore) +{ + + cassert(config_prof); + not_reached(); +} +#endif + +static malloc_mutex_t * +prof_ctx_mutex_choose(void) +{ + unsigned nctxs = atomic_add_u(&cum_ctxs, 1); + + return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]); +} + +static void +prof_ctx_init(prof_ctx_t *ctx, prof_bt_t *bt) +{ + + ctx->bt = bt; + ctx->lock = prof_ctx_mutex_choose(); + /* + * Set nlimbo to 1, in order to avoid a race condition with + * prof_ctx_merge()/prof_ctx_destroy(). + */ + ctx->nlimbo = 1; + ql_elm_new(ctx, dump_link); + memset(&ctx->cnt_merged, 0, sizeof(prof_cnt_t)); + ql_new(&ctx->cnts_ql); +} + +static void +prof_ctx_destroy(prof_ctx_t *ctx) +{ + prof_tdata_t *prof_tdata; + + cassert(config_prof); + + /* + * Check that ctx is still unused by any thread cache before destroying + * it. prof_lookup() increments ctx->nlimbo in order to avoid a race + * condition with this function, as does prof_ctx_merge() in order to + * avoid a race between the main body of prof_ctx_merge() and entry + * into this function. + */ + prof_tdata = prof_tdata_get(false); + assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX); + prof_enter(prof_tdata); + malloc_mutex_lock(ctx->lock); + if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 && + ctx->nlimbo == 1) { + assert(ctx->cnt_merged.curbytes == 0); + assert(ctx->cnt_merged.accumobjs == 0); + assert(ctx->cnt_merged.accumbytes == 0); + /* Remove ctx from bt2ctx. */ + if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL)) + not_reached(); + prof_leave(prof_tdata); + /* Destroy ctx. */ + malloc_mutex_unlock(ctx->lock); + bt_destroy(ctx->bt); + idalloc(ctx); + } else { + /* + * Compensate for increment in prof_ctx_merge() or + * prof_lookup(). + */ + ctx->nlimbo--; + malloc_mutex_unlock(ctx->lock); + prof_leave(prof_tdata); + } +} + +static void +prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) +{ + bool destroy; + + cassert(config_prof); + + /* Merge cnt stats and detach from ctx. */ + malloc_mutex_lock(ctx->lock); + ctx->cnt_merged.curobjs += cnt->cnts.curobjs; + ctx->cnt_merged.curbytes += cnt->cnts.curbytes; + ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs; + ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes; + ql_remove(&ctx->cnts_ql, cnt, cnts_link); + if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL && + ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) { + /* + * Increment ctx->nlimbo in order to keep another thread from + * winning the race to destroy ctx while this one has ctx->lock + * dropped. Without this, it would be possible for another + * thread to: + * + * 1) Sample an allocation associated with ctx. + * 2) Deallocate the sampled object. + * 3) Successfully prof_ctx_destroy(ctx). + * + * The result would be that ctx no longer exists by the time + * this thread accesses it in prof_ctx_destroy(). + */ + ctx->nlimbo++; + destroy = true; + } else + destroy = false; + malloc_mutex_unlock(ctx->lock); + if (destroy) + prof_ctx_destroy(ctx); +} + +static bool +prof_lookup_global(prof_bt_t *bt, prof_tdata_t *prof_tdata, void **p_btkey, + prof_ctx_t **p_ctx, bool *p_new_ctx) +{ + union { + prof_ctx_t *p; + void *v; + } ctx; + union { + prof_bt_t *p; + void *v; + } btkey; + bool new_ctx; + + prof_enter(prof_tdata); + if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) { + /* bt has never been seen before. Insert it. */ + ctx.v = imalloc(sizeof(prof_ctx_t)); + if (ctx.v == NULL) { + prof_leave(prof_tdata); + return (true); + } + btkey.p = bt_dup(bt); + if (btkey.v == NULL) { + prof_leave(prof_tdata); + idalloc(ctx.v); + return (true); + } + prof_ctx_init(ctx.p, btkey.p); + if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { + /* OOM. */ + prof_leave(prof_tdata); + idalloc(btkey.v); + idalloc(ctx.v); + return (true); + } + new_ctx = true; + } else { + /* + * Increment nlimbo, in order to avoid a race condition with + * prof_ctx_merge()/prof_ctx_destroy(). + */ + malloc_mutex_lock(ctx.p->lock); + ctx.p->nlimbo++; + malloc_mutex_unlock(ctx.p->lock); + new_ctx = false; + } + prof_leave(prof_tdata); + + *p_btkey = btkey.v; + *p_ctx = ctx.p; + *p_new_ctx = new_ctx; + return (false); +} + +prof_thr_cnt_t * +prof_lookup(prof_bt_t *bt) +{ + union { + prof_thr_cnt_t *p; + void *v; + } ret; + prof_tdata_t *prof_tdata; + + cassert(config_prof); + + prof_tdata = prof_tdata_get(false); + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + return (NULL); + + if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) { + void *btkey; + prof_ctx_t *ctx; + bool new_ctx; + + /* + * This thread's cache lacks bt. Look for it in the global + * cache. + */ + if (prof_lookup_global(bt, prof_tdata, &btkey, &ctx, &new_ctx)) + return (NULL); + + /* Link a prof_thd_cnt_t into ctx for this thread. */ + if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) { + assert(ckh_count(&prof_tdata->bt2cnt) > 0); + /* + * Flush the least recently used cnt in order to keep + * bt2cnt from becoming too large. + */ + ret.p = ql_last(&prof_tdata->lru_ql, lru_link); + assert(ret.v != NULL); + if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, + NULL, NULL)) + not_reached(); + ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); + prof_ctx_merge(ret.p->ctx, ret.p); + /* ret can now be re-used. */ + } else { + assert(ckh_count(&prof_tdata->bt2cnt) < PROF_TCMAX); + /* Allocate and partially initialize a new cnt. */ + ret.v = imalloc(sizeof(prof_thr_cnt_t)); + if (ret.p == NULL) { + if (new_ctx) + prof_ctx_destroy(ctx); + return (NULL); + } + ql_elm_new(ret.p, cnts_link); + ql_elm_new(ret.p, lru_link); + } + /* Finish initializing ret. */ + ret.p->ctx = ctx; + ret.p->epoch = 0; + memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); + if (ckh_insert(&prof_tdata->bt2cnt, btkey, ret.v)) { + if (new_ctx) + prof_ctx_destroy(ctx); + idalloc(ret.v); + return (NULL); + } + ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); + malloc_mutex_lock(ctx->lock); + ql_tail_insert(&ctx->cnts_ql, ret.p, cnts_link); + ctx->nlimbo--; + malloc_mutex_unlock(ctx->lock); + } else { + /* Move ret to the front of the LRU. */ + ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); + ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); + } + + return (ret.p); +} + +#ifdef JEMALLOC_JET +size_t +prof_bt_count(void) +{ + size_t bt_count; + prof_tdata_t *prof_tdata; + + prof_tdata = prof_tdata_get(false); + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + return (0); + + prof_enter(prof_tdata); + bt_count = ckh_count(&bt2ctx); + prof_leave(prof_tdata); + + return (bt_count); +} +#endif + +#ifdef JEMALLOC_JET +#undef prof_dump_open +#define prof_dump_open JEMALLOC_N(prof_dump_open_impl) +#endif +static int +prof_dump_open(bool propagate_err, const char *filename) +{ + int fd; + + fd = creat(filename, 0644); + if (fd == -1 && propagate_err == false) { + malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n", + filename); + if (opt_abort) + abort(); + } + + return (fd); +} +#ifdef JEMALLOC_JET +#undef prof_dump_open +#define prof_dump_open JEMALLOC_N(prof_dump_open) +prof_dump_open_t *prof_dump_open = JEMALLOC_N(prof_dump_open_impl); +#endif + +static bool +prof_dump_flush(bool propagate_err) +{ + bool ret = false; + ssize_t err; + + cassert(config_prof); + + err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end); + if (err == -1) { + if (propagate_err == false) { + malloc_write("<jemalloc>: write() failed during heap " + "profile flush\n"); + if (opt_abort) + abort(); + } + ret = true; + } + prof_dump_buf_end = 0; + + return (ret); +} + +static bool +prof_dump_close(bool propagate_err) +{ + bool ret; + + assert(prof_dump_fd != -1); + ret = prof_dump_flush(propagate_err); + close(prof_dump_fd); + prof_dump_fd = -1; + + return (ret); +} + +static bool +prof_dump_write(bool propagate_err, const char *s) +{ + unsigned i, slen, n; + + cassert(config_prof); + + i = 0; + slen = strlen(s); + while (i < slen) { + /* Flush the buffer if it is full. */ + if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) + if (prof_dump_flush(propagate_err) && propagate_err) + return (true); + + if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) { + /* Finish writing. */ + n = slen - i; + } else { + /* Write as much of s as will fit. */ + n = PROF_DUMP_BUFSIZE - prof_dump_buf_end; + } + memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n); + prof_dump_buf_end += n; + i += n; + } + + return (false); +} + +JEMALLOC_ATTR(format(printf, 2, 3)) +static bool +prof_dump_printf(bool propagate_err, const char *format, ...) +{ + bool ret; + va_list ap; + char buf[PROF_PRINTF_BUFSIZE]; + + va_start(ap, format); + malloc_vsnprintf(buf, sizeof(buf), format, ap); + va_end(ap); + ret = prof_dump_write(propagate_err, buf); + + return (ret); +} + +static void +prof_dump_ctx_prep(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx, + prof_ctx_list_t *ctx_ql) +{ + prof_thr_cnt_t *thr_cnt; + prof_cnt_t tcnt; + + cassert(config_prof); + + malloc_mutex_lock(ctx->lock); + + /* + * Increment nlimbo so that ctx won't go away before dump. + * Additionally, link ctx into the dump list so that it is included in + * prof_dump()'s second pass. + */ + ctx->nlimbo++; + ql_tail_insert(ctx_ql, ctx, dump_link); + + memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t)); + ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) { + volatile unsigned *epoch = &thr_cnt->epoch; + + while (true) { + unsigned epoch0 = *epoch; + + /* Make sure epoch is even. */ + if (epoch0 & 1U) + continue; + + memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t)); + + /* Terminate if epoch didn't change while reading. */ + if (*epoch == epoch0) + break; + } + + ctx->cnt_summed.curobjs += tcnt.curobjs; + ctx->cnt_summed.curbytes += tcnt.curbytes; + if (opt_prof_accum) { + ctx->cnt_summed.accumobjs += tcnt.accumobjs; + ctx->cnt_summed.accumbytes += tcnt.accumbytes; + } + } + + if (ctx->cnt_summed.curobjs != 0) + (*leak_nctx)++; + + /* Add to cnt_all. */ + cnt_all->curobjs += ctx->cnt_summed.curobjs; + cnt_all->curbytes += ctx->cnt_summed.curbytes; + if (opt_prof_accum) { + cnt_all->accumobjs += ctx->cnt_summed.accumobjs; + cnt_all->accumbytes += ctx->cnt_summed.accumbytes; + } + + malloc_mutex_unlock(ctx->lock); +} + +static bool +prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all) +{ + + if (opt_lg_prof_sample == 0) { + if (prof_dump_printf(propagate_err, + "heap profile: %"PRId64": %"PRId64 + " [%"PRIu64": %"PRIu64"] @ heapprofile\n", + cnt_all->curobjs, cnt_all->curbytes, + cnt_all->accumobjs, cnt_all->accumbytes)) + return (true); + } else { + if (prof_dump_printf(propagate_err, + "heap profile: %"PRId64": %"PRId64 + " [%"PRIu64": %"PRIu64"] @ heap_v2/%"PRIu64"\n", + cnt_all->curobjs, cnt_all->curbytes, + cnt_all->accumobjs, cnt_all->accumbytes, + ((uint64_t)1U << opt_lg_prof_sample))) + return (true); + } + + return (false); +} + +static void +prof_dump_ctx_cleanup_locked(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql) +{ + + ctx->nlimbo--; + ql_remove(ctx_ql, ctx, dump_link); +} + +static void +prof_dump_ctx_cleanup(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql) +{ + + malloc_mutex_lock(ctx->lock); + prof_dump_ctx_cleanup_locked(ctx, ctx_ql); + malloc_mutex_unlock(ctx->lock); +} + +static bool +prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, const prof_bt_t *bt, + prof_ctx_list_t *ctx_ql) +{ + bool ret; + unsigned i; + + cassert(config_prof); + + /* + * Current statistics can sum to 0 as a result of unmerged per thread + * statistics. Additionally, interval- and growth-triggered dumps can + * occur between the time a ctx is created and when its statistics are + * filled in. Avoid dumping any ctx that is an artifact of either + * implementation detail. + */ + malloc_mutex_lock(ctx->lock); + if ((opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) || + (opt_prof_accum && ctx->cnt_summed.accumobjs == 0)) { + assert(ctx->cnt_summed.curobjs == 0); + assert(ctx->cnt_summed.curbytes == 0); + assert(ctx->cnt_summed.accumobjs == 0); + assert(ctx->cnt_summed.accumbytes == 0); + ret = false; + goto label_return; + } + + if (prof_dump_printf(propagate_err, "%"PRId64": %"PRId64 + " [%"PRIu64": %"PRIu64"] @", + ctx->cnt_summed.curobjs, ctx->cnt_summed.curbytes, + ctx->cnt_summed.accumobjs, ctx->cnt_summed.accumbytes)) { + ret = true; + goto label_return; + } + + for (i = 0; i < bt->len; i++) { + if (prof_dump_printf(propagate_err, " %#"PRIxPTR, + (uintptr_t)bt->vec[i])) { + ret = true; + goto label_return; + } + } + + if (prof_dump_write(propagate_err, "\n")) { + ret = true; + goto label_return; + } + + ret = false; +label_return: + prof_dump_ctx_cleanup_locked(ctx, ctx_ql); + malloc_mutex_unlock(ctx->lock); + return (ret); +} + +static bool +prof_dump_maps(bool propagate_err) +{ + bool ret; + int mfd; + char filename[PATH_MAX + 1]; + + cassert(config_prof); +#ifdef __FreeBSD__ + malloc_snprintf(filename, sizeof(filename), "/proc/curproc/map"); +#else + malloc_snprintf(filename, sizeof(filename), "/proc/%d/maps", + (int)getpid()); +#endif + mfd = open(filename, O_RDONLY); + if (mfd != -1) { + ssize_t nread; + + if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") && + propagate_err) { + ret = true; + goto label_return; + } + nread = 0; + do { + prof_dump_buf_end += nread; + if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) { + /* Make space in prof_dump_buf before read(). */ + if (prof_dump_flush(propagate_err) && + propagate_err) { + ret = true; + goto label_return; + } + } + nread = read(mfd, &prof_dump_buf[prof_dump_buf_end], + PROF_DUMP_BUFSIZE - prof_dump_buf_end); + } while (nread > 0); + } else { + ret = true; + goto label_return; + } + + ret = false; +label_return: + if (mfd != -1) + close(mfd); + return (ret); +} + +static void +prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_nctx, + const char *filename) +{ + + if (cnt_all->curbytes != 0) { + malloc_printf("<jemalloc>: Leak summary: %"PRId64" byte%s, %" + PRId64" object%s, %zu context%s\n", + cnt_all->curbytes, (cnt_all->curbytes != 1) ? "s" : "", + cnt_all->curobjs, (cnt_all->curobjs != 1) ? "s" : "", + leak_nctx, (leak_nctx != 1) ? "s" : ""); + malloc_printf( + "<jemalloc>: Run pprof on \"%s\" for leak detail\n", + filename); + } +} + +static bool +prof_dump(bool propagate_err, const char *filename, bool leakcheck) +{ + prof_tdata_t *prof_tdata; + prof_cnt_t cnt_all; + size_t tabind; + union { + prof_ctx_t *p; + void *v; + } ctx; + size_t leak_nctx; + prof_ctx_list_t ctx_ql; + + cassert(config_prof); + + prof_tdata = prof_tdata_get(false); + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + return (true); + + malloc_mutex_lock(&prof_dump_mtx); + + /* Merge per thread profile stats, and sum them in cnt_all. */ + memset(&cnt_all, 0, sizeof(prof_cnt_t)); + leak_nctx = 0; + ql_new(&ctx_ql); + prof_enter(prof_tdata); + for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;) + prof_dump_ctx_prep(ctx.p, &cnt_all, &leak_nctx, &ctx_ql); + prof_leave(prof_tdata); + + /* Create dump file. */ + if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) + goto label_open_close_error; + + /* Dump profile header. */ + if (prof_dump_header(propagate_err, &cnt_all)) + goto label_write_error; + + /* Dump per ctx profile stats. */ + while ((ctx.p = ql_first(&ctx_ql)) != NULL) { + if (prof_dump_ctx(propagate_err, ctx.p, ctx.p->bt, &ctx_ql)) + goto label_write_error; + } + + /* Dump /proc/<pid>/maps if possible. */ + if (prof_dump_maps(propagate_err)) + goto label_write_error; + + if (prof_dump_close(propagate_err)) + goto label_open_close_error; + + malloc_mutex_unlock(&prof_dump_mtx); + + if (leakcheck) + prof_leakcheck(&cnt_all, leak_nctx, filename); + + return (false); +label_write_error: + prof_dump_close(propagate_err); +label_open_close_error: + while ((ctx.p = ql_first(&ctx_ql)) != NULL) + prof_dump_ctx_cleanup(ctx.p, &ctx_ql); + malloc_mutex_unlock(&prof_dump_mtx); + return (true); +} + +#define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1) +#define VSEQ_INVALID UINT64_C(0xffffffffffffffff) +static void +prof_dump_filename(char *filename, char v, int64_t vseq) +{ + + cassert(config_prof); + + if (vseq != VSEQ_INVALID) { + /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */ + malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, + "%s.%d.%"PRIu64".%c%"PRId64".heap", + opt_prof_prefix, (int)getpid(), prof_dump_seq, v, vseq); + } else { + /* "<prefix>.<pid>.<seq>.<v>.heap" */ + malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, + "%s.%d.%"PRIu64".%c.heap", + opt_prof_prefix, (int)getpid(), prof_dump_seq, v); + } + prof_dump_seq++; +} + +static void +prof_fdump(void) +{ + char filename[DUMP_FILENAME_BUFSIZE]; + + cassert(config_prof); + + if (prof_booted == false) + return; + + if (opt_prof_final && opt_prof_prefix[0] != '\0') { + malloc_mutex_lock(&prof_dump_seq_mtx); + prof_dump_filename(filename, 'f', VSEQ_INVALID); + malloc_mutex_unlock(&prof_dump_seq_mtx); + prof_dump(false, filename, opt_prof_leak); + } +} + +void +prof_idump(void) +{ + prof_tdata_t *prof_tdata; + char filename[PATH_MAX + 1]; + + cassert(config_prof); + + if (prof_booted == false) + return; + prof_tdata = prof_tdata_get(false); + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + return; + if (prof_tdata->enq) { + prof_tdata->enq_idump = true; + return; + } + + if (opt_prof_prefix[0] != '\0') { + malloc_mutex_lock(&prof_dump_seq_mtx); + prof_dump_filename(filename, 'i', prof_dump_iseq); + prof_dump_iseq++; + malloc_mutex_unlock(&prof_dump_seq_mtx); + prof_dump(false, filename, false); + } +} + +bool +prof_mdump(const char *filename) +{ + char filename_buf[DUMP_FILENAME_BUFSIZE]; + + cassert(config_prof); + + if (opt_prof == false || prof_booted == false) + return (true); + + if (filename == NULL) { + /* No filename specified, so automatically generate one. */ + if (opt_prof_prefix[0] == '\0') + return (true); + malloc_mutex_lock(&prof_dump_seq_mtx); + prof_dump_filename(filename_buf, 'm', prof_dump_mseq); + prof_dump_mseq++; + malloc_mutex_unlock(&prof_dump_seq_mtx); + filename = filename_buf; + } + return (prof_dump(true, filename, false)); +} + +void +prof_gdump(void) +{ + prof_tdata_t *prof_tdata; + char filename[DUMP_FILENAME_BUFSIZE]; + + cassert(config_prof); + + if (prof_booted == false) + return; + prof_tdata = prof_tdata_get(false); + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + return; + if (prof_tdata->enq) { + prof_tdata->enq_gdump = true; + return; + } + + if (opt_prof_prefix[0] != '\0') { + malloc_mutex_lock(&prof_dump_seq_mtx); + prof_dump_filename(filename, 'u', prof_dump_useq); + prof_dump_useq++; + malloc_mutex_unlock(&prof_dump_seq_mtx); + prof_dump(false, filename, false); + } +} + +static void +prof_bt_hash(const void *key, size_t r_hash[2]) +{ + prof_bt_t *bt = (prof_bt_t *)key; + + cassert(config_prof); + + hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash); +} + +static bool +prof_bt_keycomp(const void *k1, const void *k2) +{ + const prof_bt_t *bt1 = (prof_bt_t *)k1; + const prof_bt_t *bt2 = (prof_bt_t *)k2; + + cassert(config_prof); + + if (bt1->len != bt2->len) + return (false); + return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0); +} + +prof_tdata_t * +prof_tdata_init(void) +{ + prof_tdata_t *prof_tdata; + + cassert(config_prof); + + /* Initialize an empty cache for this thread. */ + prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t)); + if (prof_tdata == NULL) + return (NULL); + + if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS, + prof_bt_hash, prof_bt_keycomp)) { + idalloc(prof_tdata); + return (NULL); + } + ql_new(&prof_tdata->lru_ql); + + prof_tdata->vec = imalloc(sizeof(void *) * PROF_BT_MAX); + if (prof_tdata->vec == NULL) { + ckh_delete(&prof_tdata->bt2cnt); + idalloc(prof_tdata); + return (NULL); + } + + prof_tdata->prng_state = 0; + prof_tdata->threshold = 0; + prof_tdata->accum = 0; + + prof_tdata->enq = false; + prof_tdata->enq_idump = false; + prof_tdata->enq_gdump = false; + + prof_tdata_tsd_set(&prof_tdata); + + return (prof_tdata); +} + +void +prof_tdata_cleanup(void *arg) +{ + prof_thr_cnt_t *cnt; + prof_tdata_t *prof_tdata = *(prof_tdata_t **)arg; + + cassert(config_prof); + + if (prof_tdata == PROF_TDATA_STATE_REINCARNATED) { + /* + * Another destructor deallocated memory after this destructor + * was called. Reset prof_tdata to PROF_TDATA_STATE_PURGATORY + * in order to receive another callback. + */ + prof_tdata = PROF_TDATA_STATE_PURGATORY; + prof_tdata_tsd_set(&prof_tdata); + } else if (prof_tdata == PROF_TDATA_STATE_PURGATORY) { + /* + * The previous time this destructor was called, we set the key + * to PROF_TDATA_STATE_PURGATORY so that other destructors + * wouldn't cause re-creation of the prof_tdata. This time, do + * nothing, so that the destructor will not be called again. + */ + } else if (prof_tdata != NULL) { + /* + * Delete the hash table. All of its contents can still be + * iterated over via the LRU. + */ + ckh_delete(&prof_tdata->bt2cnt); + /* + * Iteratively merge cnt's into the global stats and delete + * them. + */ + while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { + ql_remove(&prof_tdata->lru_ql, cnt, lru_link); + prof_ctx_merge(cnt->ctx, cnt); + idalloc(cnt); + } + idalloc(prof_tdata->vec); + idalloc(prof_tdata); + prof_tdata = PROF_TDATA_STATE_PURGATORY; + prof_tdata_tsd_set(&prof_tdata); + } +} + +void +prof_boot0(void) +{ + + cassert(config_prof); + + memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT, + sizeof(PROF_PREFIX_DEFAULT)); +} + +void +prof_boot1(void) +{ + + cassert(config_prof); + + /* + * opt_prof and prof_promote must be in their final state before any + * arenas are initialized, so this function must be executed early. + */ + + if (opt_prof_leak && opt_prof == false) { + /* + * Enable opt_prof, but in such a way that profiles are never + * automatically dumped. + */ + opt_prof = true; + opt_prof_gdump = false; + } else if (opt_prof) { + if (opt_lg_prof_interval >= 0) { + prof_interval = (((uint64_t)1U) << + opt_lg_prof_interval); + } + } + + prof_promote = (opt_prof && opt_lg_prof_sample > LG_PAGE); +} + +bool +prof_boot2(void) +{ + + cassert(config_prof); + + if (opt_prof) { + unsigned i; + + if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash, + prof_bt_keycomp)) + return (true); + if (malloc_mutex_init(&bt2ctx_mtx)) + return (true); + if (prof_tdata_tsd_boot()) { + malloc_write( + "<jemalloc>: Error in pthread_key_create()\n"); + abort(); + } + + if (malloc_mutex_init(&prof_dump_seq_mtx)) + return (true); + if (malloc_mutex_init(&prof_dump_mtx)) + return (true); + + if (atexit(prof_fdump) != 0) { + malloc_write("<jemalloc>: Error in atexit()\n"); + if (opt_abort) + abort(); + } + + ctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS * + sizeof(malloc_mutex_t)); + if (ctx_locks == NULL) + return (true); + for (i = 0; i < PROF_NCTX_LOCKS; i++) { + if (malloc_mutex_init(&ctx_locks[i])) + return (true); + } + } + +#ifdef JEMALLOC_PROF_LIBGCC + /* + * Cause the backtracing machinery to allocate its internal state + * before enabling profiling. + */ + _Unwind_Backtrace(prof_unwind_init_callback, NULL); +#endif + + prof_booted = true; + + return (false); +} + +void +prof_prefork(void) +{ + + if (opt_prof) { + unsigned i; + + malloc_mutex_prefork(&bt2ctx_mtx); + malloc_mutex_prefork(&prof_dump_seq_mtx); + for (i = 0; i < PROF_NCTX_LOCKS; i++) + malloc_mutex_prefork(&ctx_locks[i]); + } +} + +void +prof_postfork_parent(void) +{ + + if (opt_prof) { + unsigned i; + + for (i = 0; i < PROF_NCTX_LOCKS; i++) + malloc_mutex_postfork_parent(&ctx_locks[i]); + malloc_mutex_postfork_parent(&prof_dump_seq_mtx); + malloc_mutex_postfork_parent(&bt2ctx_mtx); + } +} + +void +prof_postfork_child(void) +{ + + if (opt_prof) { + unsigned i; + + for (i = 0; i < PROF_NCTX_LOCKS; i++) + malloc_mutex_postfork_child(&ctx_locks[i]); + malloc_mutex_postfork_child(&prof_dump_seq_mtx); + malloc_mutex_postfork_child(&bt2ctx_mtx); + } +} + +/******************************************************************************/ diff --git a/deps/jemalloc/src/quarantine.c b/deps/jemalloc/src/quarantine.c new file mode 100644 index 0000000000..5431511640 --- /dev/null +++ b/deps/jemalloc/src/quarantine.c @@ -0,0 +1,199 @@ +#define JEMALLOC_QUARANTINE_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/* + * quarantine pointers close to NULL are used to encode state information that + * is used for cleaning up during thread shutdown. + */ +#define QUARANTINE_STATE_REINCARNATED ((quarantine_t *)(uintptr_t)1) +#define QUARANTINE_STATE_PURGATORY ((quarantine_t *)(uintptr_t)2) +#define QUARANTINE_STATE_MAX QUARANTINE_STATE_PURGATORY + +/******************************************************************************/ +/* Data. */ + +malloc_tsd_data(, quarantine, quarantine_t *, NULL) + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static quarantine_t *quarantine_grow(quarantine_t *quarantine); +static void quarantine_drain_one(quarantine_t *quarantine); +static void quarantine_drain(quarantine_t *quarantine, size_t upper_bound); + +/******************************************************************************/ + +quarantine_t * +quarantine_init(size_t lg_maxobjs) +{ + quarantine_t *quarantine; + + quarantine = (quarantine_t *)imalloc(offsetof(quarantine_t, objs) + + ((ZU(1) << lg_maxobjs) * sizeof(quarantine_obj_t))); + if (quarantine == NULL) + return (NULL); + quarantine->curbytes = 0; + quarantine->curobjs = 0; + quarantine->first = 0; + quarantine->lg_maxobjs = lg_maxobjs; + + quarantine_tsd_set(&quarantine); + + return (quarantine); +} + +static quarantine_t * +quarantine_grow(quarantine_t *quarantine) +{ + quarantine_t *ret; + + ret = quarantine_init(quarantine->lg_maxobjs + 1); + if (ret == NULL) { + quarantine_drain_one(quarantine); + return (quarantine); + } + + ret->curbytes = quarantine->curbytes; + ret->curobjs = quarantine->curobjs; + if (quarantine->first + quarantine->curobjs <= (ZU(1) << + quarantine->lg_maxobjs)) { + /* objs ring buffer data are contiguous. */ + memcpy(ret->objs, &quarantine->objs[quarantine->first], + quarantine->curobjs * sizeof(quarantine_obj_t)); + } else { + /* objs ring buffer data wrap around. */ + size_t ncopy_a = (ZU(1) << quarantine->lg_maxobjs) - + quarantine->first; + size_t ncopy_b = quarantine->curobjs - ncopy_a; + + memcpy(ret->objs, &quarantine->objs[quarantine->first], ncopy_a + * sizeof(quarantine_obj_t)); + memcpy(&ret->objs[ncopy_a], quarantine->objs, ncopy_b * + sizeof(quarantine_obj_t)); + } + idalloc(quarantine); + + return (ret); +} + +static void +quarantine_drain_one(quarantine_t *quarantine) +{ + quarantine_obj_t *obj = &quarantine->objs[quarantine->first]; + assert(obj->usize == isalloc(obj->ptr, config_prof)); + idalloc(obj->ptr); + quarantine->curbytes -= obj->usize; + quarantine->curobjs--; + quarantine->first = (quarantine->first + 1) & ((ZU(1) << + quarantine->lg_maxobjs) - 1); +} + +static void +quarantine_drain(quarantine_t *quarantine, size_t upper_bound) +{ + + while (quarantine->curbytes > upper_bound && quarantine->curobjs > 0) + quarantine_drain_one(quarantine); +} + +void +quarantine(void *ptr) +{ + quarantine_t *quarantine; + size_t usize = isalloc(ptr, config_prof); + + cassert(config_fill); + assert(opt_quarantine); + + quarantine = *quarantine_tsd_get(); + if ((uintptr_t)quarantine <= (uintptr_t)QUARANTINE_STATE_MAX) { + if (quarantine == QUARANTINE_STATE_PURGATORY) { + /* + * Make a note that quarantine() was called after + * quarantine_cleanup() was called. + */ + quarantine = QUARANTINE_STATE_REINCARNATED; + quarantine_tsd_set(&quarantine); + } + idalloc(ptr); + return; + } + /* + * Drain one or more objects if the quarantine size limit would be + * exceeded by appending ptr. + */ + if (quarantine->curbytes + usize > opt_quarantine) { + size_t upper_bound = (opt_quarantine >= usize) ? opt_quarantine + - usize : 0; + quarantine_drain(quarantine, upper_bound); + } + /* Grow the quarantine ring buffer if it's full. */ + if (quarantine->curobjs == (ZU(1) << quarantine->lg_maxobjs)) + quarantine = quarantine_grow(quarantine); + /* quarantine_grow() must free a slot if it fails to grow. */ + assert(quarantine->curobjs < (ZU(1) << quarantine->lg_maxobjs)); + /* Append ptr if its size doesn't exceed the quarantine size. */ + if (quarantine->curbytes + usize <= opt_quarantine) { + size_t offset = (quarantine->first + quarantine->curobjs) & + ((ZU(1) << quarantine->lg_maxobjs) - 1); + quarantine_obj_t *obj = &quarantine->objs[offset]; + obj->ptr = ptr; + obj->usize = usize; + quarantine->curbytes += usize; + quarantine->curobjs++; + if (config_fill && opt_junk) { + /* + * Only do redzone validation if Valgrind isn't in + * operation. + */ + if ((config_valgrind == false || opt_valgrind == false) + && usize <= SMALL_MAXCLASS) + arena_quarantine_junk_small(ptr, usize); + else + memset(ptr, 0x5a, usize); + } + } else { + assert(quarantine->curbytes == 0); + idalloc(ptr); + } +} + +void +quarantine_cleanup(void *arg) +{ + quarantine_t *quarantine = *(quarantine_t **)arg; + + if (quarantine == QUARANTINE_STATE_REINCARNATED) { + /* + * Another destructor deallocated memory after this destructor + * was called. Reset quarantine to QUARANTINE_STATE_PURGATORY + * in order to receive another callback. + */ + quarantine = QUARANTINE_STATE_PURGATORY; + quarantine_tsd_set(&quarantine); + } else if (quarantine == QUARANTINE_STATE_PURGATORY) { + /* + * The previous time this destructor was called, we set the key + * to QUARANTINE_STATE_PURGATORY so that other destructors + * wouldn't cause re-creation of the quarantine. This time, do + * nothing, so that the destructor will not be called again. + */ + } else if (quarantine != NULL) { + quarantine_drain(quarantine, 0); + idalloc(quarantine); + quarantine = QUARANTINE_STATE_PURGATORY; + quarantine_tsd_set(&quarantine); + } +} + +bool +quarantine_boot(void) +{ + + cassert(config_fill); + + if (quarantine_tsd_boot()) + return (true); + + return (false); +} diff --git a/deps/jemalloc/src/rtree.c b/deps/jemalloc/src/rtree.c new file mode 100644 index 0000000000..205957ac4e --- /dev/null +++ b/deps/jemalloc/src/rtree.c @@ -0,0 +1,105 @@ +#define JEMALLOC_RTREE_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +rtree_t * +rtree_new(unsigned bits, rtree_alloc_t *alloc, rtree_dalloc_t *dalloc) +{ + rtree_t *ret; + unsigned bits_per_level, bits_in_leaf, height, i; + + assert(bits > 0 && bits <= (sizeof(uintptr_t) << 3)); + + bits_per_level = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void *)))) - 1; + bits_in_leaf = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(uint8_t)))) - 1; + if (bits > bits_in_leaf) { + height = 1 + (bits - bits_in_leaf) / bits_per_level; + if ((height-1) * bits_per_level + bits_in_leaf != bits) + height++; + } else { + height = 1; + } + assert((height-1) * bits_per_level + bits_in_leaf >= bits); + + ret = (rtree_t*)alloc(offsetof(rtree_t, level2bits) + + (sizeof(unsigned) * height)); + if (ret == NULL) + return (NULL); + memset(ret, 0, offsetof(rtree_t, level2bits) + (sizeof(unsigned) * + height)); + + ret->alloc = alloc; + ret->dalloc = dalloc; + if (malloc_mutex_init(&ret->mutex)) { + if (dalloc != NULL) + dalloc(ret); + return (NULL); + } + ret->height = height; + if (height > 1) { + if ((height-1) * bits_per_level + bits_in_leaf > bits) { + ret->level2bits[0] = (bits - bits_in_leaf) % + bits_per_level; + } else + ret->level2bits[0] = bits_per_level; + for (i = 1; i < height-1; i++) + ret->level2bits[i] = bits_per_level; + ret->level2bits[height-1] = bits_in_leaf; + } else + ret->level2bits[0] = bits; + + ret->root = (void**)alloc(sizeof(void *) << ret->level2bits[0]); + if (ret->root == NULL) { + if (dalloc != NULL) + dalloc(ret); + return (NULL); + } + memset(ret->root, 0, sizeof(void *) << ret->level2bits[0]); + + return (ret); +} + +static void +rtree_delete_subtree(rtree_t *rtree, void **node, unsigned level) +{ + + if (level < rtree->height - 1) { + size_t nchildren, i; + + nchildren = ZU(1) << rtree->level2bits[level]; + for (i = 0; i < nchildren; i++) { + void **child = (void **)node[i]; + if (child != NULL) + rtree_delete_subtree(rtree, child, level + 1); + } + } + rtree->dalloc(node); +} + +void +rtree_delete(rtree_t *rtree) +{ + + rtree_delete_subtree(rtree, rtree->root, 0); + rtree->dalloc(rtree); +} + +void +rtree_prefork(rtree_t *rtree) +{ + + malloc_mutex_prefork(&rtree->mutex); +} + +void +rtree_postfork_parent(rtree_t *rtree) +{ + + malloc_mutex_postfork_parent(&rtree->mutex); +} + +void +rtree_postfork_child(rtree_t *rtree) +{ + + malloc_mutex_postfork_child(&rtree->mutex); +} diff --git a/deps/jemalloc/src/stats.c b/deps/jemalloc/src/stats.c new file mode 100644 index 0000000000..bef2ab33cd --- /dev/null +++ b/deps/jemalloc/src/stats.c @@ -0,0 +1,549 @@ +#define JEMALLOC_STATS_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +#define CTL_GET(n, v, t) do { \ + size_t sz = sizeof(t); \ + xmallctl(n, v, &sz, NULL, 0); \ +} while (0) + +#define CTL_I_GET(n, v, t) do { \ + size_t mib[6]; \ + size_t miblen = sizeof(mib) / sizeof(size_t); \ + size_t sz = sizeof(t); \ + xmallctlnametomib(n, mib, &miblen); \ + mib[2] = i; \ + xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \ +} while (0) + +#define CTL_J_GET(n, v, t) do { \ + size_t mib[6]; \ + size_t miblen = sizeof(mib) / sizeof(size_t); \ + size_t sz = sizeof(t); \ + xmallctlnametomib(n, mib, &miblen); \ + mib[2] = j; \ + xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \ +} while (0) + +#define CTL_IJ_GET(n, v, t) do { \ + size_t mib[6]; \ + size_t miblen = sizeof(mib) / sizeof(size_t); \ + size_t sz = sizeof(t); \ + xmallctlnametomib(n, mib, &miblen); \ + mib[2] = i; \ + mib[4] = j; \ + xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \ +} while (0) + +/******************************************************************************/ +/* Data. */ + +bool opt_stats_print = false; + +size_t stats_cactive = 0; + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static void stats_arena_bins_print(void (*write_cb)(void *, const char *), + void *cbopaque, unsigned i); +static void stats_arena_lruns_print(void (*write_cb)(void *, const char *), + void *cbopaque, unsigned i); +static void stats_arena_print(void (*write_cb)(void *, const char *), + void *cbopaque, unsigned i, bool bins, bool large); + +/******************************************************************************/ + +static void +stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, + unsigned i) +{ + size_t page; + bool config_tcache; + unsigned nbins, j, gap_start; + + CTL_GET("arenas.page", &page, size_t); + + CTL_GET("config.tcache", &config_tcache, bool); + if (config_tcache) { + malloc_cprintf(write_cb, cbopaque, + "bins: bin size regs pgs allocated nmalloc" + " ndalloc nrequests nfills nflushes" + " newruns reruns curruns\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + "bins: bin size regs pgs allocated nmalloc" + " ndalloc newruns reruns curruns\n"); + } + CTL_GET("arenas.nbins", &nbins, unsigned); + for (j = 0, gap_start = UINT_MAX; j < nbins; j++) { + uint64_t nruns; + + CTL_IJ_GET("stats.arenas.0.bins.0.nruns", &nruns, uint64_t); + if (nruns == 0) { + if (gap_start == UINT_MAX) + gap_start = j; + } else { + size_t reg_size, run_size, allocated; + uint32_t nregs; + uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes; + uint64_t reruns; + size_t curruns; + + if (gap_start != UINT_MAX) { + if (j > gap_start + 1) { + /* Gap of more than one size class. */ + malloc_cprintf(write_cb, cbopaque, + "[%u..%u]\n", gap_start, + j - 1); + } else { + /* Gap of one size class. */ + malloc_cprintf(write_cb, cbopaque, + "[%u]\n", gap_start); + } + gap_start = UINT_MAX; + } + CTL_J_GET("arenas.bin.0.size", ®_size, size_t); + CTL_J_GET("arenas.bin.0.nregs", &nregs, uint32_t); + CTL_J_GET("arenas.bin.0.run_size", &run_size, size_t); + CTL_IJ_GET("stats.arenas.0.bins.0.allocated", + &allocated, size_t); + CTL_IJ_GET("stats.arenas.0.bins.0.nmalloc", + &nmalloc, uint64_t); + CTL_IJ_GET("stats.arenas.0.bins.0.ndalloc", + &ndalloc, uint64_t); + if (config_tcache) { + CTL_IJ_GET("stats.arenas.0.bins.0.nrequests", + &nrequests, uint64_t); + CTL_IJ_GET("stats.arenas.0.bins.0.nfills", + &nfills, uint64_t); + CTL_IJ_GET("stats.arenas.0.bins.0.nflushes", + &nflushes, uint64_t); + } + CTL_IJ_GET("stats.arenas.0.bins.0.nreruns", &reruns, + uint64_t); + CTL_IJ_GET("stats.arenas.0.bins.0.curruns", &curruns, + size_t); + if (config_tcache) { + malloc_cprintf(write_cb, cbopaque, + "%13u %5zu %4u %3zu %12zu %12"PRIu64 + " %12"PRIu64" %12"PRIu64" %12"PRIu64 + " %12"PRIu64" %12"PRIu64" %12"PRIu64 + " %12zu\n", + j, reg_size, nregs, run_size / page, + allocated, nmalloc, ndalloc, nrequests, + nfills, nflushes, nruns, reruns, curruns); + } else { + malloc_cprintf(write_cb, cbopaque, + "%13u %5zu %4u %3zu %12zu %12"PRIu64 + " %12"PRIu64" %12"PRIu64" %12"PRIu64 + " %12zu\n", + j, reg_size, nregs, run_size / page, + allocated, nmalloc, ndalloc, nruns, reruns, + curruns); + } + } + } + if (gap_start != UINT_MAX) { + if (j > gap_start + 1) { + /* Gap of more than one size class. */ + malloc_cprintf(write_cb, cbopaque, "[%u..%u]\n", + gap_start, j - 1); + } else { + /* Gap of one size class. */ + malloc_cprintf(write_cb, cbopaque, "[%u]\n", gap_start); + } + } +} + +static void +stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, + unsigned i) +{ + size_t page, nlruns, j; + ssize_t gap_start; + + CTL_GET("arenas.page", &page, size_t); + + malloc_cprintf(write_cb, cbopaque, + "large: size pages nmalloc ndalloc nrequests" + " curruns\n"); + CTL_GET("arenas.nlruns", &nlruns, size_t); + for (j = 0, gap_start = -1; j < nlruns; j++) { + uint64_t nmalloc, ndalloc, nrequests; + size_t run_size, curruns; + + CTL_IJ_GET("stats.arenas.0.lruns.0.nmalloc", &nmalloc, + uint64_t); + CTL_IJ_GET("stats.arenas.0.lruns.0.ndalloc", &ndalloc, + uint64_t); + CTL_IJ_GET("stats.arenas.0.lruns.0.nrequests", &nrequests, + uint64_t); + if (nrequests == 0) { + if (gap_start == -1) + gap_start = j; + } else { + CTL_J_GET("arenas.lrun.0.size", &run_size, size_t); + CTL_IJ_GET("stats.arenas.0.lruns.0.curruns", &curruns, + size_t); + if (gap_start != -1) { + malloc_cprintf(write_cb, cbopaque, "[%zu]\n", + j - gap_start); + gap_start = -1; + } + malloc_cprintf(write_cb, cbopaque, + "%13zu %5zu %12"PRIu64" %12"PRIu64" %12"PRIu64 + " %12zu\n", + run_size, run_size / page, nmalloc, ndalloc, + nrequests, curruns); + } + } + if (gap_start != -1) + malloc_cprintf(write_cb, cbopaque, "[%zu]\n", j - gap_start); +} + +static void +stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, + unsigned i, bool bins, bool large) +{ + unsigned nthreads; + const char *dss; + size_t page, pactive, pdirty, mapped; + uint64_t npurge, nmadvise, purged; + size_t small_allocated; + uint64_t small_nmalloc, small_ndalloc, small_nrequests; + size_t large_allocated; + uint64_t large_nmalloc, large_ndalloc, large_nrequests; + + CTL_GET("arenas.page", &page, size_t); + + CTL_I_GET("stats.arenas.0.nthreads", &nthreads, unsigned); + malloc_cprintf(write_cb, cbopaque, + "assigned threads: %u\n", nthreads); + CTL_I_GET("stats.arenas.0.dss", &dss, const char *); + malloc_cprintf(write_cb, cbopaque, "dss allocation precedence: %s\n", + dss); + CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t); + CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t); + CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t); + CTL_I_GET("stats.arenas.0.nmadvise", &nmadvise, uint64_t); + CTL_I_GET("stats.arenas.0.purged", &purged, uint64_t); + malloc_cprintf(write_cb, cbopaque, + "dirty pages: %zu:%zu active:dirty, %"PRIu64" sweep%s," + " %"PRIu64" madvise%s, %"PRIu64" purged\n", + pactive, pdirty, npurge, npurge == 1 ? "" : "s", + nmadvise, nmadvise == 1 ? "" : "s", purged); + + malloc_cprintf(write_cb, cbopaque, + " allocated nmalloc ndalloc nrequests\n"); + CTL_I_GET("stats.arenas.0.small.allocated", &small_allocated, size_t); + CTL_I_GET("stats.arenas.0.small.nmalloc", &small_nmalloc, uint64_t); + CTL_I_GET("stats.arenas.0.small.ndalloc", &small_ndalloc, uint64_t); + CTL_I_GET("stats.arenas.0.small.nrequests", &small_nrequests, uint64_t); + malloc_cprintf(write_cb, cbopaque, + "small: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n", + small_allocated, small_nmalloc, small_ndalloc, small_nrequests); + CTL_I_GET("stats.arenas.0.large.allocated", &large_allocated, size_t); + CTL_I_GET("stats.arenas.0.large.nmalloc", &large_nmalloc, uint64_t); + CTL_I_GET("stats.arenas.0.large.ndalloc", &large_ndalloc, uint64_t); + CTL_I_GET("stats.arenas.0.large.nrequests", &large_nrequests, uint64_t); + malloc_cprintf(write_cb, cbopaque, + "large: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n", + large_allocated, large_nmalloc, large_ndalloc, large_nrequests); + malloc_cprintf(write_cb, cbopaque, + "total: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n", + small_allocated + large_allocated, + small_nmalloc + large_nmalloc, + small_ndalloc + large_ndalloc, + small_nrequests + large_nrequests); + malloc_cprintf(write_cb, cbopaque, "active: %12zu\n", pactive * page); + CTL_I_GET("stats.arenas.0.mapped", &mapped, size_t); + malloc_cprintf(write_cb, cbopaque, "mapped: %12zu\n", mapped); + + if (bins) + stats_arena_bins_print(write_cb, cbopaque, i); + if (large) + stats_arena_lruns_print(write_cb, cbopaque, i); +} + +void +stats_print(void (*write_cb)(void *, const char *), void *cbopaque, + const char *opts) +{ + int err; + uint64_t epoch; + size_t u64sz; + bool general = true; + bool merged = true; + bool unmerged = true; + bool bins = true; + bool large = true; + + /* + * Refresh stats, in case mallctl() was called by the application. + * + * Check for OOM here, since refreshing the ctl cache can trigger + * allocation. In practice, none of the subsequent mallctl()-related + * calls in this function will cause OOM if this one succeeds. + * */ + epoch = 1; + u64sz = sizeof(uint64_t); + err = je_mallctl("epoch", &epoch, &u64sz, &epoch, sizeof(uint64_t)); + if (err != 0) { + if (err == EAGAIN) { + malloc_write("<jemalloc>: Memory allocation failure in " + "mallctl(\"epoch\", ...)\n"); + return; + } + malloc_write("<jemalloc>: Failure in mallctl(\"epoch\", " + "...)\n"); + abort(); + } + + if (opts != NULL) { + unsigned i; + + for (i = 0; opts[i] != '\0'; i++) { + switch (opts[i]) { + case 'g': + general = false; + break; + case 'm': + merged = false; + break; + case 'a': + unmerged = false; + break; + case 'b': + bins = false; + break; + case 'l': + large = false; + break; + default:; + } + } + } + + malloc_cprintf(write_cb, cbopaque, + "___ Begin jemalloc statistics ___\n"); + if (general) { + int err; + const char *cpv; + bool bv; + unsigned uv; + ssize_t ssv; + size_t sv, bsz, ssz, sssz, cpsz; + + bsz = sizeof(bool); + ssz = sizeof(size_t); + sssz = sizeof(ssize_t); + cpsz = sizeof(const char *); + + CTL_GET("version", &cpv, const char *); + malloc_cprintf(write_cb, cbopaque, "Version: %s\n", cpv); + CTL_GET("config.debug", &bv, bool); + malloc_cprintf(write_cb, cbopaque, "Assertions %s\n", + bv ? "enabled" : "disabled"); + +#define OPT_WRITE_BOOL(n) \ + if ((err = je_mallctl("opt."#n, &bv, &bsz, NULL, 0)) \ + == 0) { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %s\n", bv ? "true" : "false"); \ + } +#define OPT_WRITE_SIZE_T(n) \ + if ((err = je_mallctl("opt."#n, &sv, &ssz, NULL, 0)) \ + == 0) { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %zu\n", sv); \ + } +#define OPT_WRITE_SSIZE_T(n) \ + if ((err = je_mallctl("opt."#n, &ssv, &sssz, NULL, 0)) \ + == 0) { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %zd\n", ssv); \ + } +#define OPT_WRITE_CHAR_P(n) \ + if ((err = je_mallctl("opt."#n, &cpv, &cpsz, NULL, 0)) \ + == 0) { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": \"%s\"\n", cpv); \ + } + + malloc_cprintf(write_cb, cbopaque, + "Run-time option settings:\n"); + OPT_WRITE_BOOL(abort) + OPT_WRITE_SIZE_T(lg_chunk) + OPT_WRITE_CHAR_P(dss) + OPT_WRITE_SIZE_T(narenas) + OPT_WRITE_SSIZE_T(lg_dirty_mult) + OPT_WRITE_BOOL(stats_print) + OPT_WRITE_BOOL(junk) + OPT_WRITE_SIZE_T(quarantine) + OPT_WRITE_BOOL(redzone) + OPT_WRITE_BOOL(zero) + OPT_WRITE_BOOL(utrace) + OPT_WRITE_BOOL(valgrind) + OPT_WRITE_BOOL(xmalloc) + OPT_WRITE_BOOL(tcache) + OPT_WRITE_SSIZE_T(lg_tcache_max) + OPT_WRITE_BOOL(prof) + OPT_WRITE_CHAR_P(prof_prefix) + OPT_WRITE_BOOL(prof_active) + OPT_WRITE_SSIZE_T(lg_prof_sample) + OPT_WRITE_BOOL(prof_accum) + OPT_WRITE_SSIZE_T(lg_prof_interval) + OPT_WRITE_BOOL(prof_gdump) + OPT_WRITE_BOOL(prof_final) + OPT_WRITE_BOOL(prof_leak) + +#undef OPT_WRITE_BOOL +#undef OPT_WRITE_SIZE_T +#undef OPT_WRITE_SSIZE_T +#undef OPT_WRITE_CHAR_P + + malloc_cprintf(write_cb, cbopaque, "CPUs: %u\n", ncpus); + + CTL_GET("arenas.narenas", &uv, unsigned); + malloc_cprintf(write_cb, cbopaque, "Arenas: %u\n", uv); + + malloc_cprintf(write_cb, cbopaque, "Pointer size: %zu\n", + sizeof(void *)); + + CTL_GET("arenas.quantum", &sv, size_t); + malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n", sv); + + CTL_GET("arenas.page", &sv, size_t); + malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv); + + CTL_GET("opt.lg_dirty_mult", &ssv, ssize_t); + if (ssv >= 0) { + malloc_cprintf(write_cb, cbopaque, + "Min active:dirty page ratio per arena: %u:1\n", + (1U << ssv)); + } else { + malloc_cprintf(write_cb, cbopaque, + "Min active:dirty page ratio per arena: N/A\n"); + } + if ((err = je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0)) + == 0) { + malloc_cprintf(write_cb, cbopaque, + "Maximum thread-cached size class: %zu\n", sv); + } + if ((err = je_mallctl("opt.prof", &bv, &bsz, NULL, 0)) == 0 && + bv) { + CTL_GET("opt.lg_prof_sample", &sv, size_t); + malloc_cprintf(write_cb, cbopaque, + "Average profile sample interval: %"PRIu64 + " (2^%zu)\n", (((uint64_t)1U) << sv), sv); + + CTL_GET("opt.lg_prof_interval", &ssv, ssize_t); + if (ssv >= 0) { + malloc_cprintf(write_cb, cbopaque, + "Average profile dump interval: %"PRIu64 + " (2^%zd)\n", + (((uint64_t)1U) << ssv), ssv); + } else { + malloc_cprintf(write_cb, cbopaque, + "Average profile dump interval: N/A\n"); + } + } + CTL_GET("opt.lg_chunk", &sv, size_t); + malloc_cprintf(write_cb, cbopaque, "Chunk size: %zu (2^%zu)\n", + (ZU(1) << sv), sv); + } + + if (config_stats) { + size_t *cactive; + size_t allocated, active, mapped; + size_t chunks_current, chunks_high; + uint64_t chunks_total; + size_t huge_allocated; + uint64_t huge_nmalloc, huge_ndalloc; + + CTL_GET("stats.cactive", &cactive, size_t *); + CTL_GET("stats.allocated", &allocated, size_t); + CTL_GET("stats.active", &active, size_t); + CTL_GET("stats.mapped", &mapped, size_t); + malloc_cprintf(write_cb, cbopaque, + "Allocated: %zu, active: %zu, mapped: %zu\n", + allocated, active, mapped); + malloc_cprintf(write_cb, cbopaque, + "Current active ceiling: %zu\n", atomic_read_z(cactive)); + + /* Print chunk stats. */ + CTL_GET("stats.chunks.total", &chunks_total, uint64_t); + CTL_GET("stats.chunks.high", &chunks_high, size_t); + CTL_GET("stats.chunks.current", &chunks_current, size_t); + malloc_cprintf(write_cb, cbopaque, "chunks: nchunks " + "highchunks curchunks\n"); + malloc_cprintf(write_cb, cbopaque, + " %13"PRIu64" %12zu %12zu\n", + chunks_total, chunks_high, chunks_current); + + /* Print huge stats. */ + CTL_GET("stats.huge.nmalloc", &huge_nmalloc, uint64_t); + CTL_GET("stats.huge.ndalloc", &huge_ndalloc, uint64_t); + CTL_GET("stats.huge.allocated", &huge_allocated, size_t); + malloc_cprintf(write_cb, cbopaque, + "huge: nmalloc ndalloc allocated\n"); + malloc_cprintf(write_cb, cbopaque, + " %12"PRIu64" %12"PRIu64" %12zu\n", + huge_nmalloc, huge_ndalloc, huge_allocated); + + if (merged) { + unsigned narenas; + + CTL_GET("arenas.narenas", &narenas, unsigned); + { + VARIABLE_ARRAY(bool, initialized, narenas); + size_t isz; + unsigned i, ninitialized; + + isz = sizeof(bool) * narenas; + xmallctl("arenas.initialized", initialized, + &isz, NULL, 0); + for (i = ninitialized = 0; i < narenas; i++) { + if (initialized[i]) + ninitialized++; + } + + if (ninitialized > 1 || unmerged == false) { + /* Print merged arena stats. */ + malloc_cprintf(write_cb, cbopaque, + "\nMerged arenas stats:\n"); + stats_arena_print(write_cb, cbopaque, + narenas, bins, large); + } + } + } + + if (unmerged) { + unsigned narenas; + + /* Print stats for each arena. */ + + CTL_GET("arenas.narenas", &narenas, unsigned); + { + VARIABLE_ARRAY(bool, initialized, narenas); + size_t isz; + unsigned i; + + isz = sizeof(bool) * narenas; + xmallctl("arenas.initialized", initialized, + &isz, NULL, 0); + + for (i = 0; i < narenas; i++) { + if (initialized[i]) { + malloc_cprintf(write_cb, + cbopaque, + "\narenas[%u]:\n", i); + stats_arena_print(write_cb, + cbopaque, i, bins, large); + } + } + } + } + } + malloc_cprintf(write_cb, cbopaque, "--- End jemalloc statistics ---\n"); +} diff --git a/deps/jemalloc/src/tcache.c b/deps/jemalloc/src/tcache.c new file mode 100644 index 0000000000..6de92960b2 --- /dev/null +++ b/deps/jemalloc/src/tcache.c @@ -0,0 +1,479 @@ +#define JEMALLOC_TCACHE_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +malloc_tsd_data(, tcache, tcache_t *, NULL) +malloc_tsd_data(, tcache_enabled, tcache_enabled_t, tcache_enabled_default) + +bool opt_tcache = true; +ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; + +tcache_bin_info_t *tcache_bin_info; +static unsigned stack_nelms; /* Total stack elms per tcache. */ + +size_t nhbins; +size_t tcache_maxclass; + +/******************************************************************************/ + +size_t tcache_salloc(const void *ptr) +{ + + return (arena_salloc(ptr, false)); +} + +void +tcache_event_hard(tcache_t *tcache) +{ + size_t binind = tcache->next_gc_bin; + tcache_bin_t *tbin = &tcache->tbins[binind]; + tcache_bin_info_t *tbin_info = &tcache_bin_info[binind]; + + if (tbin->low_water > 0) { + /* + * Flush (ceiling) 3/4 of the objects below the low water mark. + */ + if (binind < NBINS) { + tcache_bin_flush_small(tbin, binind, tbin->ncached - + tbin->low_water + (tbin->low_water >> 2), tcache); + } else { + tcache_bin_flush_large(tbin, binind, tbin->ncached - + tbin->low_water + (tbin->low_water >> 2), tcache); + } + /* + * Reduce fill count by 2X. Limit lg_fill_div such that the + * fill count is always at least 1. + */ + if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1)) >= 1) + tbin->lg_fill_div++; + } else if (tbin->low_water < 0) { + /* + * Increase fill count by 2X. Make sure lg_fill_div stays + * greater than 0. + */ + if (tbin->lg_fill_div > 1) + tbin->lg_fill_div--; + } + tbin->low_water = tbin->ncached; + + tcache->next_gc_bin++; + if (tcache->next_gc_bin == nhbins) + tcache->next_gc_bin = 0; + tcache->ev_cnt = 0; +} + +void * +tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind) +{ + void *ret; + + arena_tcache_fill_small(tcache->arena, tbin, binind, + config_prof ? tcache->prof_accumbytes : 0); + if (config_prof) + tcache->prof_accumbytes = 0; + ret = tcache_alloc_easy(tbin); + + return (ret); +} + +void +tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, + tcache_t *tcache) +{ + void *ptr; + unsigned i, nflush, ndeferred; + bool merged_stats = false; + + assert(binind < NBINS); + assert(rem <= tbin->ncached); + + for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { + /* Lock the arena bin associated with the first object. */ + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( + tbin->avail[0]); + arena_t *arena = chunk->arena; + arena_bin_t *bin = &arena->bins[binind]; + + if (config_prof && arena == tcache->arena) { + if (arena_prof_accum(arena, tcache->prof_accumbytes)) + prof_idump(); + tcache->prof_accumbytes = 0; + } + + malloc_mutex_lock(&bin->lock); + if (config_stats && arena == tcache->arena) { + assert(merged_stats == false); + merged_stats = true; + bin->stats.nflushes++; + bin->stats.nrequests += tbin->tstats.nrequests; + tbin->tstats.nrequests = 0; + } + ndeferred = 0; + for (i = 0; i < nflush; i++) { + ptr = tbin->avail[i]; + assert(ptr != NULL); + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk->arena == arena) { + size_t pageind = ((uintptr_t)ptr - + (uintptr_t)chunk) >> LG_PAGE; + arena_chunk_map_t *mapelm = + arena_mapp_get(chunk, pageind); + if (config_fill && opt_junk) { + arena_alloc_junk_small(ptr, + &arena_bin_info[binind], true); + } + arena_dalloc_bin_locked(arena, chunk, ptr, + mapelm); + } else { + /* + * This object was allocated via a different + * arena bin than the one that is currently + * locked. Stash the object, so that it can be + * handled in a future pass. + */ + tbin->avail[ndeferred] = ptr; + ndeferred++; + } + } + malloc_mutex_unlock(&bin->lock); + } + if (config_stats && merged_stats == false) { + /* + * The flush loop didn't happen to flush to this thread's + * arena, so the stats didn't get merged. Manually do so now. + */ + arena_bin_t *bin = &tcache->arena->bins[binind]; + malloc_mutex_lock(&bin->lock); + bin->stats.nflushes++; + bin->stats.nrequests += tbin->tstats.nrequests; + tbin->tstats.nrequests = 0; + malloc_mutex_unlock(&bin->lock); + } + + memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], + rem * sizeof(void *)); + tbin->ncached = rem; + if ((int)tbin->ncached < tbin->low_water) + tbin->low_water = tbin->ncached; +} + +void +tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, + tcache_t *tcache) +{ + void *ptr; + unsigned i, nflush, ndeferred; + bool merged_stats = false; + + assert(binind < nhbins); + assert(rem <= tbin->ncached); + + for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { + /* Lock the arena associated with the first object. */ + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( + tbin->avail[0]); + arena_t *arena = chunk->arena; + UNUSED bool idump; + + if (config_prof) + idump = false; + malloc_mutex_lock(&arena->lock); + if ((config_prof || config_stats) && arena == tcache->arena) { + if (config_prof) { + idump = arena_prof_accum_locked(arena, + tcache->prof_accumbytes); + tcache->prof_accumbytes = 0; + } + if (config_stats) { + merged_stats = true; + arena->stats.nrequests_large += + tbin->tstats.nrequests; + arena->stats.lstats[binind - NBINS].nrequests += + tbin->tstats.nrequests; + tbin->tstats.nrequests = 0; + } + } + ndeferred = 0; + for (i = 0; i < nflush; i++) { + ptr = tbin->avail[i]; + assert(ptr != NULL); + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk->arena == arena) + arena_dalloc_large_locked(arena, chunk, ptr); + else { + /* + * This object was allocated via a different + * arena than the one that is currently locked. + * Stash the object, so that it can be handled + * in a future pass. + */ + tbin->avail[ndeferred] = ptr; + ndeferred++; + } + } + malloc_mutex_unlock(&arena->lock); + if (config_prof && idump) + prof_idump(); + } + if (config_stats && merged_stats == false) { + /* + * The flush loop didn't happen to flush to this thread's + * arena, so the stats didn't get merged. Manually do so now. + */ + arena_t *arena = tcache->arena; + malloc_mutex_lock(&arena->lock); + arena->stats.nrequests_large += tbin->tstats.nrequests; + arena->stats.lstats[binind - NBINS].nrequests += + tbin->tstats.nrequests; + tbin->tstats.nrequests = 0; + malloc_mutex_unlock(&arena->lock); + } + + memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], + rem * sizeof(void *)); + tbin->ncached = rem; + if ((int)tbin->ncached < tbin->low_water) + tbin->low_water = tbin->ncached; +} + +void +tcache_arena_associate(tcache_t *tcache, arena_t *arena) +{ + + if (config_stats) { + /* Link into list of extant tcaches. */ + malloc_mutex_lock(&arena->lock); + ql_elm_new(tcache, link); + ql_tail_insert(&arena->tcache_ql, tcache, link); + malloc_mutex_unlock(&arena->lock); + } + tcache->arena = arena; +} + +void +tcache_arena_dissociate(tcache_t *tcache) +{ + + if (config_stats) { + /* Unlink from list of extant tcaches. */ + malloc_mutex_lock(&tcache->arena->lock); + ql_remove(&tcache->arena->tcache_ql, tcache, link); + tcache_stats_merge(tcache, tcache->arena); + malloc_mutex_unlock(&tcache->arena->lock); + } +} + +tcache_t * +tcache_create(arena_t *arena) +{ + tcache_t *tcache; + size_t size, stack_offset; + unsigned i; + + size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins); + /* Naturally align the pointer stacks. */ + size = PTR_CEILING(size); + stack_offset = size; + size += stack_nelms * sizeof(void *); + /* + * Round up to the nearest multiple of the cacheline size, in order to + * avoid the possibility of false cacheline sharing. + * + * That this works relies on the same logic as in ipalloc(), but we + * cannot directly call ipalloc() here due to tcache bootstrapping + * issues. + */ + size = (size + CACHELINE_MASK) & (-CACHELINE); + + if (size <= SMALL_MAXCLASS) + tcache = (tcache_t *)arena_malloc_small(arena, size, true); + else if (size <= tcache_maxclass) + tcache = (tcache_t *)arena_malloc_large(arena, size, true); + else + tcache = (tcache_t *)icalloct(size, false, arena); + + if (tcache == NULL) + return (NULL); + + tcache_arena_associate(tcache, arena); + + assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0); + for (i = 0; i < nhbins; i++) { + tcache->tbins[i].lg_fill_div = 1; + tcache->tbins[i].avail = (void **)((uintptr_t)tcache + + (uintptr_t)stack_offset); + stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *); + } + + tcache_tsd_set(&tcache); + + return (tcache); +} + +void +tcache_destroy(tcache_t *tcache) +{ + unsigned i; + size_t tcache_size; + + tcache_arena_dissociate(tcache); + + for (i = 0; i < NBINS; i++) { + tcache_bin_t *tbin = &tcache->tbins[i]; + tcache_bin_flush_small(tbin, i, 0, tcache); + + if (config_stats && tbin->tstats.nrequests != 0) { + arena_t *arena = tcache->arena; + arena_bin_t *bin = &arena->bins[i]; + malloc_mutex_lock(&bin->lock); + bin->stats.nrequests += tbin->tstats.nrequests; + malloc_mutex_unlock(&bin->lock); + } + } + + for (; i < nhbins; i++) { + tcache_bin_t *tbin = &tcache->tbins[i]; + tcache_bin_flush_large(tbin, i, 0, tcache); + + if (config_stats && tbin->tstats.nrequests != 0) { + arena_t *arena = tcache->arena; + malloc_mutex_lock(&arena->lock); + arena->stats.nrequests_large += tbin->tstats.nrequests; + arena->stats.lstats[i - NBINS].nrequests += + tbin->tstats.nrequests; + malloc_mutex_unlock(&arena->lock); + } + } + + if (config_prof && tcache->prof_accumbytes > 0 && + arena_prof_accum(tcache->arena, tcache->prof_accumbytes)) + prof_idump(); + + tcache_size = arena_salloc(tcache, false); + if (tcache_size <= SMALL_MAXCLASS) { + arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); + arena_t *arena = chunk->arena; + size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >> + LG_PAGE; + arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind); + + arena_dalloc_bin(arena, chunk, tcache, pageind, mapelm); + } else if (tcache_size <= tcache_maxclass) { + arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); + arena_t *arena = chunk->arena; + + arena_dalloc_large(arena, chunk, tcache); + } else + idalloct(tcache, false); +} + +void +tcache_thread_cleanup(void *arg) +{ + tcache_t *tcache = *(tcache_t **)arg; + + if (tcache == TCACHE_STATE_DISABLED) { + /* Do nothing. */ + } else if (tcache == TCACHE_STATE_REINCARNATED) { + /* + * Another destructor called an allocator function after this + * destructor was called. Reset tcache to + * TCACHE_STATE_PURGATORY in order to receive another callback. + */ + tcache = TCACHE_STATE_PURGATORY; + tcache_tsd_set(&tcache); + } else if (tcache == TCACHE_STATE_PURGATORY) { + /* + * The previous time this destructor was called, we set the key + * to TCACHE_STATE_PURGATORY so that other destructors wouldn't + * cause re-creation of the tcache. This time, do nothing, so + * that the destructor will not be called again. + */ + } else if (tcache != NULL) { + assert(tcache != TCACHE_STATE_PURGATORY); + tcache_destroy(tcache); + tcache = TCACHE_STATE_PURGATORY; + tcache_tsd_set(&tcache); + } +} + +/* Caller must own arena->lock. */ +void +tcache_stats_merge(tcache_t *tcache, arena_t *arena) +{ + unsigned i; + + cassert(config_stats); + + /* Merge and reset tcache stats. */ + for (i = 0; i < NBINS; i++) { + arena_bin_t *bin = &arena->bins[i]; + tcache_bin_t *tbin = &tcache->tbins[i]; + malloc_mutex_lock(&bin->lock); + bin->stats.nrequests += tbin->tstats.nrequests; + malloc_mutex_unlock(&bin->lock); + tbin->tstats.nrequests = 0; + } + + for (; i < nhbins; i++) { + malloc_large_stats_t *lstats = &arena->stats.lstats[i - NBINS]; + tcache_bin_t *tbin = &tcache->tbins[i]; + arena->stats.nrequests_large += tbin->tstats.nrequests; + lstats->nrequests += tbin->tstats.nrequests; + tbin->tstats.nrequests = 0; + } +} + +bool +tcache_boot0(void) +{ + unsigned i; + + /* + * If necessary, clamp opt_lg_tcache_max, now that arena_maxclass is + * known. + */ + if (opt_lg_tcache_max < 0 || (1U << opt_lg_tcache_max) < SMALL_MAXCLASS) + tcache_maxclass = SMALL_MAXCLASS; + else if ((1U << opt_lg_tcache_max) > arena_maxclass) + tcache_maxclass = arena_maxclass; + else + tcache_maxclass = (1U << opt_lg_tcache_max); + + nhbins = NBINS + (tcache_maxclass >> LG_PAGE); + + /* Initialize tcache_bin_info. */ + tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins * + sizeof(tcache_bin_info_t)); + if (tcache_bin_info == NULL) + return (true); + stack_nelms = 0; + for (i = 0; i < NBINS; i++) { + if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) { + tcache_bin_info[i].ncached_max = + (arena_bin_info[i].nregs << 1); + } else { + tcache_bin_info[i].ncached_max = + TCACHE_NSLOTS_SMALL_MAX; + } + stack_nelms += tcache_bin_info[i].ncached_max; + } + for (; i < nhbins; i++) { + tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE; + stack_nelms += tcache_bin_info[i].ncached_max; + } + + return (false); +} + +bool +tcache_boot1(void) +{ + + if (tcache_tsd_boot() || tcache_enabled_tsd_boot()) + return (true); + + return (false); +} diff --git a/deps/jemalloc/src/tsd.c b/deps/jemalloc/src/tsd.c new file mode 100644 index 0000000000..700caabfe4 --- /dev/null +++ b/deps/jemalloc/src/tsd.c @@ -0,0 +1,141 @@ +#define JEMALLOC_TSD_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +static unsigned ncleanups; +static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX]; + +/******************************************************************************/ + +void * +malloc_tsd_malloc(size_t size) +{ + + /* Avoid choose_arena() in order to dodge bootstrapping issues. */ + return (arena_malloc(arenas[0], size, false, false)); +} + +void +malloc_tsd_dalloc(void *wrapper) +{ + + idalloct(wrapper, false); +} + +void +malloc_tsd_no_cleanup(void *arg) +{ + + not_reached(); +} + +#if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32) +#ifndef _WIN32 +JEMALLOC_EXPORT +#endif +void +_malloc_thread_cleanup(void) +{ + bool pending[MALLOC_TSD_CLEANUPS_MAX], again; + unsigned i; + + for (i = 0; i < ncleanups; i++) + pending[i] = true; + + do { + again = false; + for (i = 0; i < ncleanups; i++) { + if (pending[i]) { + pending[i] = cleanups[i](); + if (pending[i]) + again = true; + } + } + } while (again); +} +#endif + +void +malloc_tsd_cleanup_register(bool (*f)(void)) +{ + + assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX); + cleanups[ncleanups] = f; + ncleanups++; +} + +void +malloc_tsd_boot(void) +{ + + ncleanups = 0; +} + +#ifdef _WIN32 +static BOOL WINAPI +_tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) +{ + + switch (fdwReason) { +#ifdef JEMALLOC_LAZY_LOCK + case DLL_THREAD_ATTACH: + isthreaded = true; + break; +#endif + case DLL_THREAD_DETACH: + _malloc_thread_cleanup(); + break; + default: + break; + } + return (true); +} + +#ifdef _MSC_VER +# ifdef _M_IX86 +# pragma comment(linker, "/INCLUDE:__tls_used") +# else +# pragma comment(linker, "/INCLUDE:_tls_used") +# endif +# pragma section(".CRT$XLY",long,read) +#endif +JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used) +static const BOOL (WINAPI *tls_callback)(HINSTANCE hinstDLL, + DWORD fdwReason, LPVOID lpvReserved) = _tls_callback; +#endif + +#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ + !defined(_WIN32)) +void * +tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) +{ + pthread_t self = pthread_self(); + tsd_init_block_t *iter; + + /* Check whether this thread has already inserted into the list. */ + malloc_mutex_lock(&head->lock); + ql_foreach(iter, &head->blocks, link) { + if (iter->thread == self) { + malloc_mutex_unlock(&head->lock); + return (iter->data); + } + } + /* Insert block into list. */ + ql_elm_new(block, link); + block->thread = self; + ql_tail_insert(&head->blocks, block, link); + malloc_mutex_unlock(&head->lock); + return (NULL); +} + +void +tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block) +{ + + malloc_mutex_lock(&head->lock); + ql_remove(&head->blocks, block, link); + malloc_mutex_unlock(&head->lock); +} +#endif diff --git a/deps/jemalloc/src/util.c b/deps/jemalloc/src/util.c new file mode 100644 index 0000000000..93a19fd16f --- /dev/null +++ b/deps/jemalloc/src/util.c @@ -0,0 +1,648 @@ +#define assert(e) do { \ + if (config_debug && !(e)) { \ + malloc_write("<jemalloc>: Failed assertion\n"); \ + abort(); \ + } \ +} while (0) + +#define not_reached() do { \ + if (config_debug) { \ + malloc_write("<jemalloc>: Unreachable code reached\n"); \ + abort(); \ + } \ +} while (0) + +#define not_implemented() do { \ + if (config_debug) { \ + malloc_write("<jemalloc>: Not implemented\n"); \ + abort(); \ + } \ +} while (0) + +#define JEMALLOC_UTIL_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static void wrtmessage(void *cbopaque, const char *s); +#define U2S_BUFSIZE ((1U << (LG_SIZEOF_INTMAX_T + 3)) + 1) +static char *u2s(uintmax_t x, unsigned base, bool uppercase, char *s, + size_t *slen_p); +#define D2S_BUFSIZE (1 + U2S_BUFSIZE) +static char *d2s(intmax_t x, char sign, char *s, size_t *slen_p); +#define O2S_BUFSIZE (1 + U2S_BUFSIZE) +static char *o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p); +#define X2S_BUFSIZE (2 + U2S_BUFSIZE) +static char *x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, + size_t *slen_p); + +/******************************************************************************/ + +/* malloc_message() setup. */ +static void +wrtmessage(void *cbopaque, const char *s) +{ + +#ifdef SYS_write + /* + * Use syscall(2) rather than write(2) when possible in order to avoid + * the possibility of memory allocation within libc. This is necessary + * on FreeBSD; most operating systems do not have this problem though. + */ + UNUSED int result = syscall(SYS_write, STDERR_FILENO, s, strlen(s)); +#else + UNUSED int result = write(STDERR_FILENO, s, strlen(s)); +#endif +} + +JEMALLOC_EXPORT void (*je_malloc_message)(void *, const char *s); + +/* + * Wrapper around malloc_message() that avoids the need for + * je_malloc_message(...) throughout the code. + */ +void +malloc_write(const char *s) +{ + + if (je_malloc_message != NULL) + je_malloc_message(NULL, s); + else + wrtmessage(NULL, s); +} + +/* + * glibc provides a non-standard strerror_r() when _GNU_SOURCE is defined, so + * provide a wrapper. + */ +int +buferror(int err, char *buf, size_t buflen) +{ + +#ifdef _WIN32 + FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, GetLastError(), 0, + (LPSTR)buf, buflen, NULL); + return (0); +#elif defined(_GNU_SOURCE) + char *b = strerror_r(err, buf, buflen); + if (b != buf) { + strncpy(buf, b, buflen); + buf[buflen-1] = '\0'; + } + return (0); +#else + return (strerror_r(err, buf, buflen)); +#endif +} + +uintmax_t +malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base) +{ + uintmax_t ret, digit; + int b; + bool neg; + const char *p, *ns; + + p = nptr; + if (base < 0 || base == 1 || base > 36) { + ns = p; + set_errno(EINVAL); + ret = UINTMAX_MAX; + goto label_return; + } + b = base; + + /* Swallow leading whitespace and get sign, if any. */ + neg = false; + while (true) { + switch (*p) { + case '\t': case '\n': case '\v': case '\f': case '\r': case ' ': + p++; + break; + case '-': + neg = true; + /* Fall through. */ + case '+': + p++; + /* Fall through. */ + default: + goto label_prefix; + } + } + + /* Get prefix, if any. */ + label_prefix: + /* + * Note where the first non-whitespace/sign character is so that it is + * possible to tell whether any digits are consumed (e.g., " 0" vs. + * " -x"). + */ + ns = p; + if (*p == '0') { + switch (p[1]) { + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': + if (b == 0) + b = 8; + if (b == 8) + p++; + break; + case 'X': case 'x': + switch (p[2]) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + if (b == 0) + b = 16; + if (b == 16) + p += 2; + break; + default: + break; + } + break; + default: + p++; + ret = 0; + goto label_return; + } + } + if (b == 0) + b = 10; + + /* Convert. */ + ret = 0; + while ((*p >= '0' && *p <= '9' && (digit = *p - '0') < b) + || (*p >= 'A' && *p <= 'Z' && (digit = 10 + *p - 'A') < b) + || (*p >= 'a' && *p <= 'z' && (digit = 10 + *p - 'a') < b)) { + uintmax_t pret = ret; + ret *= b; + ret += digit; + if (ret < pret) { + /* Overflow. */ + set_errno(ERANGE); + ret = UINTMAX_MAX; + goto label_return; + } + p++; + } + if (neg) + ret = -ret; + + if (p == ns) { + /* No conversion performed. */ + set_errno(EINVAL); + ret = UINTMAX_MAX; + goto label_return; + } + +label_return: + if (endptr != NULL) { + if (p == ns) { + /* No characters were converted. */ + *endptr = (char *)nptr; + } else + *endptr = (char *)p; + } + return (ret); +} + +static char * +u2s(uintmax_t x, unsigned base, bool uppercase, char *s, size_t *slen_p) +{ + unsigned i; + + i = U2S_BUFSIZE - 1; + s[i] = '\0'; + switch (base) { + case 10: + do { + i--; + s[i] = "0123456789"[x % (uint64_t)10]; + x /= (uint64_t)10; + } while (x > 0); + break; + case 16: { + const char *digits = (uppercase) + ? "0123456789ABCDEF" + : "0123456789abcdef"; + + do { + i--; + s[i] = digits[x & 0xf]; + x >>= 4; + } while (x > 0); + break; + } default: { + const char *digits = (uppercase) + ? "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" + : "0123456789abcdefghijklmnopqrstuvwxyz"; + + assert(base >= 2 && base <= 36); + do { + i--; + s[i] = digits[x % (uint64_t)base]; + x /= (uint64_t)base; + } while (x > 0); + }} + + *slen_p = U2S_BUFSIZE - 1 - i; + return (&s[i]); +} + +static char * +d2s(intmax_t x, char sign, char *s, size_t *slen_p) +{ + bool neg; + + if ((neg = (x < 0))) + x = -x; + s = u2s(x, 10, false, s, slen_p); + if (neg) + sign = '-'; + switch (sign) { + case '-': + if (neg == false) + break; + /* Fall through. */ + case ' ': + case '+': + s--; + (*slen_p)++; + *s = sign; + break; + default: not_reached(); + } + return (s); +} + +static char * +o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p) +{ + + s = u2s(x, 8, false, s, slen_p); + if (alt_form && *s != '0') { + s--; + (*slen_p)++; + *s = '0'; + } + return (s); +} + +static char * +x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, size_t *slen_p) +{ + + s = u2s(x, 16, uppercase, s, slen_p); + if (alt_form) { + s -= 2; + (*slen_p) += 2; + memcpy(s, uppercase ? "0X" : "0x", 2); + } + return (s); +} + +int +malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) +{ + int ret; + size_t i; + const char *f; + +#define APPEND_C(c) do { \ + if (i < size) \ + str[i] = (c); \ + i++; \ +} while (0) +#define APPEND_S(s, slen) do { \ + if (i < size) { \ + size_t cpylen = (slen <= size - i) ? slen : size - i; \ + memcpy(&str[i], s, cpylen); \ + } \ + i += slen; \ +} while (0) +#define APPEND_PADDED_S(s, slen, width, left_justify) do { \ + /* Left padding. */ \ + size_t pad_len = (width == -1) ? 0 : ((slen < (size_t)width) ? \ + (size_t)width - slen : 0); \ + if (left_justify == false && pad_len != 0) { \ + size_t j; \ + for (j = 0; j < pad_len; j++) \ + APPEND_C(' '); \ + } \ + /* Value. */ \ + APPEND_S(s, slen); \ + /* Right padding. */ \ + if (left_justify && pad_len != 0) { \ + size_t j; \ + for (j = 0; j < pad_len; j++) \ + APPEND_C(' '); \ + } \ +} while (0) +#define GET_ARG_NUMERIC(val, len) do { \ + switch (len) { \ + case '?': \ + val = va_arg(ap, int); \ + break; \ + case '?' | 0x80: \ + val = va_arg(ap, unsigned int); \ + break; \ + case 'l': \ + val = va_arg(ap, long); \ + break; \ + case 'l' | 0x80: \ + val = va_arg(ap, unsigned long); \ + break; \ + case 'q': \ + val = va_arg(ap, long long); \ + break; \ + case 'q' | 0x80: \ + val = va_arg(ap, unsigned long long); \ + break; \ + case 'j': \ + val = va_arg(ap, intmax_t); \ + break; \ + case 'j' | 0x80: \ + val = va_arg(ap, uintmax_t); \ + break; \ + case 't': \ + val = va_arg(ap, ptrdiff_t); \ + break; \ + case 'z': \ + val = va_arg(ap, ssize_t); \ + break; \ + case 'z' | 0x80: \ + val = va_arg(ap, size_t); \ + break; \ + case 'p': /* Synthetic; used for %p. */ \ + val = va_arg(ap, uintptr_t); \ + break; \ + default: not_reached(); \ + } \ +} while (0) + + i = 0; + f = format; + while (true) { + switch (*f) { + case '\0': goto label_out; + case '%': { + bool alt_form = false; + bool left_justify = false; + bool plus_space = false; + bool plus_plus = false; + int prec = -1; + int width = -1; + unsigned char len = '?'; + + f++; + /* Flags. */ + while (true) { + switch (*f) { + case '#': + assert(alt_form == false); + alt_form = true; + break; + case '-': + assert(left_justify == false); + left_justify = true; + break; + case ' ': + assert(plus_space == false); + plus_space = true; + break; + case '+': + assert(plus_plus == false); + plus_plus = true; + break; + default: goto label_width; + } + f++; + } + /* Width. */ + label_width: + switch (*f) { + case '*': + width = va_arg(ap, int); + f++; + if (width < 0) { + left_justify = true; + width = -width; + } + break; + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': { + uintmax_t uwidth; + set_errno(0); + uwidth = malloc_strtoumax(f, (char **)&f, 10); + assert(uwidth != UINTMAX_MAX || get_errno() != + ERANGE); + width = (int)uwidth; + break; + } default: + break; + } + /* Width/precision separator. */ + if (*f == '.') + f++; + else + goto label_length; + /* Precision. */ + switch (*f) { + case '*': + prec = va_arg(ap, int); + f++; + break; + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': { + uintmax_t uprec; + set_errno(0); + uprec = malloc_strtoumax(f, (char **)&f, 10); + assert(uprec != UINTMAX_MAX || get_errno() != + ERANGE); + prec = (int)uprec; + break; + } + default: break; + } + /* Length. */ + label_length: + switch (*f) { + case 'l': + f++; + if (*f == 'l') { + len = 'q'; + f++; + } else + len = 'l'; + break; + case 'q': case 'j': case 't': case 'z': + len = *f; + f++; + break; + default: break; + } + /* Conversion specifier. */ + switch (*f) { + char *s; + size_t slen; + case '%': + /* %% */ + APPEND_C(*f); + f++; + break; + case 'd': case 'i': { + intmax_t val JEMALLOC_CC_SILENCE_INIT(0); + char buf[D2S_BUFSIZE]; + + GET_ARG_NUMERIC(val, len); + s = d2s(val, (plus_plus ? '+' : (plus_space ? + ' ' : '-')), buf, &slen); + APPEND_PADDED_S(s, slen, width, left_justify); + f++; + break; + } case 'o': { + uintmax_t val JEMALLOC_CC_SILENCE_INIT(0); + char buf[O2S_BUFSIZE]; + + GET_ARG_NUMERIC(val, len | 0x80); + s = o2s(val, alt_form, buf, &slen); + APPEND_PADDED_S(s, slen, width, left_justify); + f++; + break; + } case 'u': { + uintmax_t val JEMALLOC_CC_SILENCE_INIT(0); + char buf[U2S_BUFSIZE]; + + GET_ARG_NUMERIC(val, len | 0x80); + s = u2s(val, 10, false, buf, &slen); + APPEND_PADDED_S(s, slen, width, left_justify); + f++; + break; + } case 'x': case 'X': { + uintmax_t val JEMALLOC_CC_SILENCE_INIT(0); + char buf[X2S_BUFSIZE]; + + GET_ARG_NUMERIC(val, len | 0x80); + s = x2s(val, alt_form, *f == 'X', buf, &slen); + APPEND_PADDED_S(s, slen, width, left_justify); + f++; + break; + } case 'c': { + unsigned char val; + char buf[2]; + + assert(len == '?' || len == 'l'); + assert_not_implemented(len != 'l'); + val = va_arg(ap, int); + buf[0] = val; + buf[1] = '\0'; + APPEND_PADDED_S(buf, 1, width, left_justify); + f++; + break; + } case 's': + assert(len == '?' || len == 'l'); + assert_not_implemented(len != 'l'); + s = va_arg(ap, char *); + slen = (prec < 0) ? strlen(s) : prec; + APPEND_PADDED_S(s, slen, width, left_justify); + f++; + break; + case 'p': { + uintmax_t val; + char buf[X2S_BUFSIZE]; + + GET_ARG_NUMERIC(val, 'p'); + s = x2s(val, true, false, buf, &slen); + APPEND_PADDED_S(s, slen, width, left_justify); + f++; + break; + } default: not_reached(); + } + break; + } default: { + APPEND_C(*f); + f++; + break; + }} + } + label_out: + if (i < size) + str[i] = '\0'; + else + str[size - 1] = '\0'; + ret = i; + +#undef APPEND_C +#undef APPEND_S +#undef APPEND_PADDED_S +#undef GET_ARG_NUMERIC + return (ret); +} + +JEMALLOC_ATTR(format(printf, 3, 4)) +int +malloc_snprintf(char *str, size_t size, const char *format, ...) +{ + int ret; + va_list ap; + + va_start(ap, format); + ret = malloc_vsnprintf(str, size, format, ap); + va_end(ap); + + return (ret); +} + +void +malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, + const char *format, va_list ap) +{ + char buf[MALLOC_PRINTF_BUFSIZE]; + + if (write_cb == NULL) { + /* + * The caller did not provide an alternate write_cb callback + * function, so use the default one. malloc_write() is an + * inline function, so use malloc_message() directly here. + */ + write_cb = (je_malloc_message != NULL) ? je_malloc_message : + wrtmessage; + cbopaque = NULL; + } + + malloc_vsnprintf(buf, sizeof(buf), format, ap); + write_cb(cbopaque, buf); +} + +/* + * Print to a callback function in such a way as to (hopefully) avoid memory + * allocation. + */ +JEMALLOC_ATTR(format(printf, 3, 4)) +void +malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque, + const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + malloc_vcprintf(write_cb, cbopaque, format, ap); + va_end(ap); +} + +/* Print to stderr in such a way as to avoid memory allocation. */ +JEMALLOC_ATTR(format(printf, 1, 2)) +void +malloc_printf(const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + malloc_vcprintf(NULL, NULL, format, ap); + va_end(ap); +} diff --git a/deps/jemalloc/src/zone.c b/deps/jemalloc/src/zone.c new file mode 100644 index 0000000000..e0302ef4ed --- /dev/null +++ b/deps/jemalloc/src/zone.c @@ -0,0 +1,258 @@ +#include "jemalloc/internal/jemalloc_internal.h" +#ifndef JEMALLOC_ZONE +# error "This source file is for zones on Darwin (OS X)." +#endif + +/* + * The malloc_default_purgeable_zone function is only available on >= 10.6. + * We need to check whether it is present at runtime, thus the weak_import. + */ +extern malloc_zone_t *malloc_default_purgeable_zone(void) +JEMALLOC_ATTR(weak_import); + +/******************************************************************************/ +/* Data. */ + +static malloc_zone_t zone; +static struct malloc_introspection_t zone_introspect; + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static size_t zone_size(malloc_zone_t *zone, void *ptr); +static void *zone_malloc(malloc_zone_t *zone, size_t size); +static void *zone_calloc(malloc_zone_t *zone, size_t num, size_t size); +static void *zone_valloc(malloc_zone_t *zone, size_t size); +static void zone_free(malloc_zone_t *zone, void *ptr); +static void *zone_realloc(malloc_zone_t *zone, void *ptr, size_t size); +#if (JEMALLOC_ZONE_VERSION >= 5) +static void *zone_memalign(malloc_zone_t *zone, size_t alignment, +#endif +#if (JEMALLOC_ZONE_VERSION >= 6) + size_t size); +static void zone_free_definite_size(malloc_zone_t *zone, void *ptr, + size_t size); +#endif +static void *zone_destroy(malloc_zone_t *zone); +static size_t zone_good_size(malloc_zone_t *zone, size_t size); +static void zone_force_lock(malloc_zone_t *zone); +static void zone_force_unlock(malloc_zone_t *zone); + +/******************************************************************************/ +/* + * Functions. + */ + +static size_t +zone_size(malloc_zone_t *zone, void *ptr) +{ + + /* + * There appear to be places within Darwin (such as setenv(3)) that + * cause calls to this function with pointers that *no* zone owns. If + * we knew that all pointers were owned by *some* zone, we could split + * our zone into two parts, and use one as the default allocator and + * the other as the default deallocator/reallocator. Since that will + * not work in practice, we must check all pointers to assure that they + * reside within a mapped chunk before determining size. + */ + return (ivsalloc(ptr, config_prof)); +} + +static void * +zone_malloc(malloc_zone_t *zone, size_t size) +{ + + return (je_malloc(size)); +} + +static void * +zone_calloc(malloc_zone_t *zone, size_t num, size_t size) +{ + + return (je_calloc(num, size)); +} + +static void * +zone_valloc(malloc_zone_t *zone, size_t size) +{ + void *ret = NULL; /* Assignment avoids useless compiler warning. */ + + je_posix_memalign(&ret, PAGE, size); + + return (ret); +} + +static void +zone_free(malloc_zone_t *zone, void *ptr) +{ + + if (ivsalloc(ptr, config_prof) != 0) { + je_free(ptr); + return; + } + + free(ptr); +} + +static void * +zone_realloc(malloc_zone_t *zone, void *ptr, size_t size) +{ + + if (ivsalloc(ptr, config_prof) != 0) + return (je_realloc(ptr, size)); + + return (realloc(ptr, size)); +} + +#if (JEMALLOC_ZONE_VERSION >= 5) +static void * +zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size) +{ + void *ret = NULL; /* Assignment avoids useless compiler warning. */ + + je_posix_memalign(&ret, alignment, size); + + return (ret); +} +#endif + +#if (JEMALLOC_ZONE_VERSION >= 6) +static void +zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size) +{ + + if (ivsalloc(ptr, config_prof) != 0) { + assert(ivsalloc(ptr, config_prof) == size); + je_free(ptr); + return; + } + + free(ptr); +} +#endif + +static void * +zone_destroy(malloc_zone_t *zone) +{ + + /* This function should never be called. */ + not_reached(); + return (NULL); +} + +static size_t +zone_good_size(malloc_zone_t *zone, size_t size) +{ + + if (size == 0) + size = 1; + return (s2u(size)); +} + +static void +zone_force_lock(malloc_zone_t *zone) +{ + + if (isthreaded) + jemalloc_prefork(); +} + +static void +zone_force_unlock(malloc_zone_t *zone) +{ + + if (isthreaded) + jemalloc_postfork_parent(); +} + +JEMALLOC_ATTR(constructor) +void +register_zone(void) +{ + + /* + * If something else replaced the system default zone allocator, don't + * register jemalloc's. + */ + malloc_zone_t *default_zone = malloc_default_zone(); + if (!default_zone->zone_name || + strcmp(default_zone->zone_name, "DefaultMallocZone") != 0) { + return; + } + + zone.size = (void *)zone_size; + zone.malloc = (void *)zone_malloc; + zone.calloc = (void *)zone_calloc; + zone.valloc = (void *)zone_valloc; + zone.free = (void *)zone_free; + zone.realloc = (void *)zone_realloc; + zone.destroy = (void *)zone_destroy; + zone.zone_name = "jemalloc_zone"; + zone.batch_malloc = NULL; + zone.batch_free = NULL; + zone.introspect = &zone_introspect; + zone.version = JEMALLOC_ZONE_VERSION; +#if (JEMALLOC_ZONE_VERSION >= 5) + zone.memalign = zone_memalign; +#endif +#if (JEMALLOC_ZONE_VERSION >= 6) + zone.free_definite_size = zone_free_definite_size; +#endif +#if (JEMALLOC_ZONE_VERSION >= 8) + zone.pressure_relief = NULL; +#endif + + zone_introspect.enumerator = NULL; + zone_introspect.good_size = (void *)zone_good_size; + zone_introspect.check = NULL; + zone_introspect.print = NULL; + zone_introspect.log = NULL; + zone_introspect.force_lock = (void *)zone_force_lock; + zone_introspect.force_unlock = (void *)zone_force_unlock; + zone_introspect.statistics = NULL; +#if (JEMALLOC_ZONE_VERSION >= 6) + zone_introspect.zone_locked = NULL; +#endif +#if (JEMALLOC_ZONE_VERSION >= 7) + zone_introspect.enable_discharge_checking = NULL; + zone_introspect.disable_discharge_checking = NULL; + zone_introspect.discharge = NULL; +#ifdef __BLOCKS__ + zone_introspect.enumerate_discharged_pointers = NULL; +#else + zone_introspect.enumerate_unavailable_without_blocks = NULL; +#endif +#endif + + /* + * The default purgeable zone is created lazily by OSX's libc. It uses + * the default zone when it is created for "small" allocations + * (< 15 KiB), but assumes the default zone is a scalable_zone. This + * obviously fails when the default zone is the jemalloc zone, so + * malloc_default_purgeable_zone is called beforehand so that the + * default purgeable zone is created when the default zone is still + * a scalable_zone. As purgeable zones only exist on >= 10.6, we need + * to check for the existence of malloc_default_purgeable_zone() at + * run time. + */ + if (malloc_default_purgeable_zone != NULL) + malloc_default_purgeable_zone(); + + /* Register the custom zone. At this point it won't be the default. */ + malloc_zone_register(&zone); + + /* + * Unregister and reregister the default zone. On OSX >= 10.6, + * unregistering takes the last registered zone and places it at the + * location of the specified zone. Unregistering the default zone thus + * makes the last registered one the default. On OSX < 10.6, + * unregistering shifts all registered zones. The first registered zone + * then becomes the default. + */ + do { + default_zone = malloc_default_zone(); + malloc_zone_unregister(default_zone); + malloc_zone_register(default_zone); + } while (malloc_default_zone() != &zone); +} |