Source/JavaScriptCore/wtf/FastMalloc.cpp

   1 // Copyright (c) 2005, 2007, Google Inc.
   2 // All rights reserved.
   3 // Copyright (C) 2005, 2006, 2007, 2008, 2009, 2011 Apple Inc. All rights reserved.
   4 //
   5 // Redistribution and use in source and binary forms, with or without
   6 // modification, are permitted provided that the following conditions are
   7 // met:
   8 //
   9 //     * Redistributions of source code must retain the above copyright
  10 // notice, this list of conditions and the following disclaimer.
  11 //     * Redistributions in binary form must reproduce the above
  12 // copyright notice, this list of conditions and the following disclaimer
  13 // in the documentation and/or other materials provided with the
  14 // distribution.
  15 //     * Neither the name of Google Inc. nor the names of its
  16 // contributors may be used to endorse or promote products derived from
  17 // this software without specific prior written permission.
  18 //
  19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30
  31 // ---
  32 // Author: Sanjay Ghemawat <opensource@google.com>
  33 //
  34 // A malloc that uses a per-thread cache to satisfy small malloc requests.
  35 // (The time for malloc/free of a small object drops from 300 ns to 50 ns.)
  36 //
  37 // See doc/tcmalloc.html for a high-level
  38 // description of how this malloc works.
  39 //
  40 // SYNCHRONIZATION
  41 //  1. The thread-specific lists are accessed without acquiring any locks.
  42 //     This is safe because each such list is only accessed by one thread.
  43 //  2. We have a lock per central free-list, and hold it while manipulating
  44 //     the central free list for a particular size.
  45 //  3. The central page allocator is protected by "pageheap_lock".
  46 //  4. The pagemap (which maps from page-number to descriptor),
  47 //     can be read without holding any locks, and written while holding
  48 //     the "pageheap_lock".
  49 //  5. To improve performance, a subset of the information one can get
  50 //     from the pagemap is cached in a data structure, pagemap_cache_,
  51 //     that atomically reads and writes its entries.  This cache can be
  52 //     read and written without locking.
  53 //
  54 //     This multi-threaded access to the pagemap is safe for fairly
  55 //     subtle reasons.  We basically assume that when an object X is
  56 //     allocated by thread A and deallocated by thread B, there must
  57 //     have been appropriate synchronization in the handoff of object
  58 //     X from thread A to thread B.  The same logic applies to pagemap_cache_.
  59 //
  60 // THE PAGEID-TO-SIZECLASS CACHE
  61 // Hot PageID-to-sizeclass mappings are held by pagemap_cache_.  If this cache
  62 // returns 0 for a particular PageID then that means "no information," not that
  63 // the sizeclass is 0.  The cache may have stale information for pages that do
  64 // not hold the beginning of any free()'able object.  Staleness is eliminated
  65 // in Populate() for pages with sizeclass > 0 objects, and in do_malloc() and
  66 // do_memalign() for all other relevant pages.
  67 //
  68 // TODO: Bias reclamation to larger addresses
  69 // TODO: implement mallinfo/mallopt
  70 // TODO: Better testing
  71 //
  72 // 9/28/2003 (new page-level allocator replaces ptmalloc2):
  73 // * malloc/free of small objects goes from ~300 ns to ~50 ns.
  74 // * allocation of a reasonably complicated struct
  75 //   goes from about 1100 ns to about 300 ns.
  76
  77 #include "config.h"
  78 #include "FastMalloc.h"
  79
  80 #include "Assertions.h"
  81 #include <limits>
  82 #if ENABLE(JSC_MULTIPLE_THREADS)
  83 #include <pthread.h>
  84 #endif
  85 #include <wtf/StdLibExtras.h>
  86
  87 #ifndef NO_TCMALLOC_SAMPLES
  88 #ifdef WTF_CHANGES
  89 #define NO_TCMALLOC_SAMPLES
  90 #endif
  91 #endif
  92
  93 #if !(defined(USE_SYSTEM_MALLOC) && USE_SYSTEM_MALLOC) && defined(NDEBUG)
  94 #define FORCE_SYSTEM_MALLOC 0
  95 #else
  96 #define FORCE_SYSTEM_MALLOC 1
  97 #endif
  98
  99 // Use a background thread to periodically scavenge memory to release back to the system
 100 // https://bugs.webkit.org/show_bug.cgi?id=27900: don't turn this on for Tiger until we have figured out why it caused a crash.
 101 #if defined(BUILDING_ON_TIGER)
 102 #define USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 0
 103 #else
 104 #define USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1
 105 #endif
 106
 107 #ifndef NDEBUG
 108 namespace WTF {
 109
 110 #if ENABLE(JSC_MULTIPLE_THREADS)
 111 static pthread_key_t isForbiddenKey;
 112 static pthread_once_t isForbiddenKeyOnce = PTHREAD_ONCE_INIT;
 113 static void initializeIsForbiddenKey()
 114 {
 115   pthread_key_create(&isForbiddenKey, 0);
 116 }
 117
 118 #if !ASSERT_DISABLED
 119 static bool isForbidden()
 120 {
 121     pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey);
 122     return !!pthread_getspecific(isForbiddenKey);
 123 }
 124 #endif
 125
 126 void fastMallocForbid()
 127 {
 128     pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey);
 129     pthread_setspecific(isForbiddenKey, &isForbiddenKey);
 130 }
 131
 132 void fastMallocAllow()
 133 {
 134     pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey);
 135     pthread_setspecific(isForbiddenKey, 0);
 136 }
 137
 138 #else
 139
 140 static bool staticIsForbidden;
 141 static bool isForbidden()
 142 {
 143     return staticIsForbidden;
 144 }
 145
 146 void fastMallocForbid()
 147 {
 148     staticIsForbidden = true;
 149 }
 150
 151 void fastMallocAllow()
 152 {
 153     staticIsForbidden = false;
 154 }
 155 #endif // ENABLE(JSC_MULTIPLE_THREADS)
 156
 157 } // namespace WTF
 158 #endif // NDEBUG
 159
 160 #include <string.h>
 161
 162 namespace WTF {
 163
 164 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
 165
 166 namespace Internal {
 167
 168 void fastMallocMatchFailed(void*)
 169 {
 170     CRASH();
 171 }
 172
 173 } // namespace Internal
 174
 175 #endif
 176
 177 void* fastZeroedMalloc(size_t n)
 178 {
 179     void* result = fastMalloc(n);
 180     memset(result, 0, n);
 181     return result;
 182 }
 183
 184 char* fastStrDup(const char* src)
 185 {
 186     int len = strlen(src) + 1;
 187     char* dup = static_cast<char*>(fastMalloc(len));
 188
 189     if (dup)
 190         memcpy(dup, src, len);
 191
 192     return dup;
 193 }
 194
 195 TryMallocReturnValue tryFastZeroedMalloc(size_t n)
 196 {
 197     void* result;
 198     if (!tryFastMalloc(n).getValue(result))
 199         return 0;
 200     memset(result, 0, n);
 201     return result;
 202 }
 203
 204 } // namespace WTF
 205
 206 #if FORCE_SYSTEM_MALLOC
 207
 208 #if PLATFORM(BREWMP)
 209 #include "brew/SystemMallocBrew.h"
 210 #endif
 211
 212 #if OS(DARWIN)
 213 #include <malloc/malloc.h>
 214 #elif OS(WINDOWS)
 215 #include <malloc.h>
 216 #endif
 217
 218 namespace WTF {
 219
 220 TryMallocReturnValue tryFastMalloc(size_t n)
 221 {
 222     ASSERT(!isForbidden());
 223
 224 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
 225     if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= n)  // If overflow would occur...
 226         return 0;
 227
 228     void* result = malloc(n + sizeof(AllocAlignmentInteger));
 229     if (!result)
 230         return 0;
 231
 232     *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc;
 233     result = static_cast<AllocAlignmentInteger*>(result) + 1;
 234
 235     return result;
 236 #else
 237     return malloc(n);
 238 #endif
 239 }
 240
 241 void* fastMalloc(size_t n)
 242 {
 243     ASSERT(!isForbidden());
 244
 245 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
 246     TryMallocReturnValue returnValue = tryFastMalloc(n);
 247     void* result;
 248     returnValue.getValue(result);
 249 #else
 250     void* result = malloc(n);
 251 #endif
 252
 253     if (!result) {
 254 #if PLATFORM(BREWMP)
 255         // The behavior of malloc(0) is implementation defined.
 256         // To make sure that fastMalloc never returns 0, retry with fastMalloc(1).
 257         if (!n)
 258             return fastMalloc(1);
 259 #endif
 260         CRASH();
 261     }
 262
 263     return result;
 264 }
 265
 266 TryMallocReturnValue tryFastCalloc(size_t n_elements, size_t element_size)
 267 {
 268     ASSERT(!isForbidden());
 269
 270 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
 271     size_t totalBytes = n_elements * element_size;
 272     if (n_elements > 1 && element_size && (totalBytes / element_size) != n_elements || (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= totalBytes))
 273         return 0;
 274
 275     totalBytes += sizeof(AllocAlignmentInteger);
 276     void* result = malloc(totalBytes);
 277     if (!result)
 278         return 0;
 279
 280     memset(result, 0, totalBytes);
 281     *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc;
 282     result = static_cast<AllocAlignmentInteger*>(result) + 1;
 283     return result;
 284 #else
 285     return calloc(n_elements, element_size);
 286 #endif
 287 }
 288
 289 void* fastCalloc(size_t n_elements, size_t element_size)
 290 {
 291     ASSERT(!isForbidden());
 292
 293 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
 294     TryMallocReturnValue returnValue = tryFastCalloc(n_elements, element_size);
 295     void* result;
 296     returnValue.getValue(result);
 297 #else
 298     void* result = calloc(n_elements, element_size);
 299 #endif
 300
 301     if (!result) {
 302 #if PLATFORM(BREWMP)
 303         // If either n_elements or element_size is 0, the behavior of calloc is implementation defined.
 304         // To make sure that fastCalloc never returns 0, retry with fastCalloc(1, 1).
 305         if (!n_elements || !element_size)
 306             return fastCalloc(1, 1);
 307 #endif
 308         CRASH();
 309     }
 310
 311     return result;
 312 }
 313
 314 void fastFree(void* p)
 315 {
 316     ASSERT(!isForbidden());
 317
 318 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
 319     if (!p)
 320         return;
 321
 322     AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(p);
 323     if (*header != Internal::AllocTypeMalloc)
 324         Internal::fastMallocMatchFailed(p);
 325     free(header);
 326 #else
 327     free(p);
 328 #endif
 329 }
 330
 331 TryMallocReturnValue tryFastRealloc(void* p, size_t n)
 332 {
 333     ASSERT(!isForbidden());
 334
 335 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
 336     if (p) {
 337         if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= n)  // If overflow would occur...
 338             return 0;
 339         AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(p);
 340         if (*header != Internal::AllocTypeMalloc)
 341             Internal::fastMallocMatchFailed(p);
 342         void* result = realloc(header, n + sizeof(AllocAlignmentInteger));
 343         if (!result)
 344             return 0;
 345
 346         // This should not be needed because the value is already there:
 347         // *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc;
 348         result = static_cast<AllocAlignmentInteger*>(result) + 1;
 349         return result;
 350     } else {
 351         return fastMalloc(n);
 352     }
 353 #else
 354     return realloc(p, n);
 355 #endif
 356 }
 357
 358 void* fastRealloc(void* p, size_t n)
 359 {
 360     ASSERT(!isForbidden());
 361
 362 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
 363     TryMallocReturnValue returnValue = tryFastRealloc(p, n);
 364     void* result;
 365     returnValue.getValue(result);
 366 #else
 367     void* result = realloc(p, n);
 368 #endif
 369
 370     if (!result)
 371         CRASH();
 372     return result;
 373 }
 374
 375 void releaseFastMallocFreeMemory() { }
 376
 377 FastMallocStatistics fastMallocStatistics()
 378 {
 379     FastMallocStatistics statistics = { 0, 0, 0 };
 380     return statistics;
 381 }
 382
 383 size_t fastMallocSize(const void* p)
 384 {
 385 #if OS(DARWIN)
 386     return malloc_size(p);
 387 #elif OS(WINDOWS) && !PLATFORM(BREWMP)
 388     // Brew MP uses its own memory allocator, so _msize does not work on the Brew MP simulator.
 389     return _msize(const_cast<void*>(p));
 390 #else
 391     return 1;
 392 #endif
 393 }
 394
 395 } // namespace WTF
 396
 397 #if OS(DARWIN)
 398 // This symbol is present in the JavaScriptCore exports file even when FastMalloc is disabled.
 399 // It will never be used in this case, so it's type and value are less interesting than its presence.
 400 extern "C" const int jscore_fastmalloc_introspection = 0;
 401 #endif
 402
 403 #else // FORCE_SYSTEM_MALLOC
 404
 405 #if HAVE(STDINT_H)
 406 #include <stdint.h>
 407 #elif HAVE(INTTYPES_H)
 408 #include <inttypes.h>
 409 #else
 410 #include <sys/types.h>
 411 #endif
 412
 413 #include "AlwaysInline.h"
 414 #include "Assertions.h"
 415 #include "TCPackedCache.h"
 416 #include "TCPageMap.h"
 417 #include "TCSpinLock.h"
 418 #include "TCSystemAlloc.h"
 419 #include <algorithm>
 420 #include <limits>
 421 #include <pthread.h>
 422 #include <stdarg.h>
 423 #include <stddef.h>
 424 #include <stdio.h>
 425 #if HAVE(ERRNO_H)
 426 #include <errno.h>
 427 #endif
 428 #if OS(UNIX)
 429 #include <unistd.h>
 430 #endif
 431 #if OS(WINDOWS)
 432 #ifndef WIN32_LEAN_AND_MEAN
 433 #define WIN32_LEAN_AND_MEAN
 434 #endif
 435 #include <windows.h>
 436 #endif
 437
 438 #ifdef WTF_CHANGES
 439
 440 #if OS(DARWIN)
 441 #include "MallocZoneSupport.h"
 442 #include <wtf/HashSet.h>
 443 #include <wtf/Vector.h>
 444 #endif
 445
 446 #if HAVE(HEADER_DETECTION_H)
 447 #include "HeaderDetection.h"
 448 #endif
 449
 450 #if HAVE(DISPATCH_H)
 451 #include <dispatch/dispatch.h>
 452 #endif
 453
 454 #if HAVE(PTHREAD_MACHDEP_H)
 455 #include <System/pthread_machdep.h>
 456
 457 #if defined(__PTK_FRAMEWORK_JAVASCRIPTCORE_KEY0)
 458 #define WTF_USE_PTHREAD_GETSPECIFIC_DIRECT 1
 459 #endif
 460 #endif
 461
 462 #ifndef PRIuS
 463 #define PRIuS "zu"
 464 #endif
 465
 466 // Calling pthread_getspecific through a global function pointer is faster than a normal
 467 // call to the function on Mac OS X, and it's used in performance-critical code. So we
 468 // use a function pointer. But that's not necessarily faster on other platforms, and we had
 469 // problems with this technique on Windows, so we'll do this only on Mac OS X.
 470 #if OS(DARWIN)
 471 #if !USE(PTHREAD_GETSPECIFIC_DIRECT)
 472 static void* (*pthread_getspecific_function_pointer)(pthread_key_t) = pthread_getspecific;
 473 #define pthread_getspecific(key) pthread_getspecific_function_pointer(key)
 474 #else
 475 #define pthread_getspecific(key) _pthread_getspecific_direct(key)
 476 #define pthread_setspecific(key, val) _pthread_setspecific_direct(key, (val))
 477 #endif
 478 #endif
 479
 480 #define DEFINE_VARIABLE(type, name, value, meaning) \
 481   namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead {  \
 482   type FLAGS_##name(value);                                \
 483   char FLAGS_no##name;                                                        \
 484   }                                                                           \
 485   using FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead::FLAGS_##name
 486
 487 #define DEFINE_int64(name, value, meaning) \
 488   DEFINE_VARIABLE(int64_t, name, value, meaning)
 489
 490 #define DEFINE_double(name, value, meaning) \
 491   DEFINE_VARIABLE(double, name, value, meaning)
 492
 493 namespace WTF {
 494
 495 #define malloc fastMalloc
 496 #define calloc fastCalloc
 497 #define free fastFree
 498 #define realloc fastRealloc
 499
 500 #define MESSAGE LOG_ERROR
 501 #define CHECK_CONDITION ASSERT
 502
 503 #if OS(DARWIN)
 504 struct Span;
 505 class TCMalloc_Central_FreeListPadded;
 506 class TCMalloc_PageHeap;
 507 class TCMalloc_ThreadCache;
 508 template <typename T> class PageHeapAllocator;
 509
 510 class FastMallocZone {
 511 public:
 512     static void init();
 513
 514     static kern_return_t enumerate(task_t, void*, unsigned typeMmask, vm_address_t zoneAddress, memory_reader_t, vm_range_recorder_t);
 515     static size_t goodSize(malloc_zone_t*, size_t size) { return size; }
 516     static boolean_t check(malloc_zone_t*) { return true; }
 517     static void  print(malloc_zone_t*, boolean_t) { }
 518     static void log(malloc_zone_t*, void*) { }
 519     static void forceLock(malloc_zone_t*) { }
 520     static void forceUnlock(malloc_zone_t*) { }
 521     static void statistics(malloc_zone_t*, malloc_statistics_t* stats) { memset(stats, 0, sizeof(malloc_statistics_t)); }
 522
 523 private:
 524     FastMallocZone(TCMalloc_PageHeap*, TCMalloc_ThreadCache**, TCMalloc_Central_FreeListPadded*, PageHeapAllocator<Span>*, PageHeapAllocator<TCMalloc_ThreadCache>*);
 525     static size_t size(malloc_zone_t*, const void*);
 526     static void* zoneMalloc(malloc_zone_t*, size_t);
 527     static void* zoneCalloc(malloc_zone_t*, size_t numItems, size_t size);
 528     static void zoneFree(malloc_zone_t*, void*);
 529     static void* zoneRealloc(malloc_zone_t*, void*, size_t);
 530     static void* zoneValloc(malloc_zone_t*, size_t) { LOG_ERROR("valloc is not supported"); return 0; }
 531     static void zoneDestroy(malloc_zone_t*) { }
 532
 533     malloc_zone_t m_zone;
 534     TCMalloc_PageHeap* m_pageHeap;
 535     TCMalloc_ThreadCache** m_threadHeaps;
 536     TCMalloc_Central_FreeListPadded* m_centralCaches;
 537     PageHeapAllocator<Span>* m_spanAllocator;
 538     PageHeapAllocator<TCMalloc_ThreadCache>* m_pageHeapAllocator;
 539 };
 540
 541 #endif
 542
 543 #endif
 544
 545 #ifndef WTF_CHANGES
 546 // This #ifdef should almost never be set.  Set NO_TCMALLOC_SAMPLES if
 547 // you're porting to a system where you really can't get a stacktrace.
 548 #ifdef NO_TCMALLOC_SAMPLES
 549 // We use #define so code compiles even if you #include stacktrace.h somehow.
 550 # define GetStackTrace(stack, depth, skip)  (0)
 551 #else
 552 # include <google/stacktrace.h>
 553 #endif
 554 #endif
 555
 556 // Even if we have support for thread-local storage in the compiler
 557 // and linker, the OS may not support it.  We need to check that at
 558 // runtime.  Right now, we have to keep a manual set of "bad" OSes.
 559 #if defined(HAVE_TLS)
 560   static bool kernel_supports_tls = false;      // be conservative
 561   static inline bool KernelSupportsTLS() {
 562     return kernel_supports_tls;
 563   }
 564 # if !HAVE_DECL_UNAME   // if too old for uname, probably too old for TLS
 565     static void CheckIfKernelSupportsTLS() {
 566       kernel_supports_tls = false;
 567     }
 568 # else
 569 #   include <sys/utsname.h>    // DECL_UNAME checked for <sys/utsname.h> too
 570     static void CheckIfKernelSupportsTLS() {
 571       struct utsname buf;
 572       if (uname(&buf) != 0) {   // should be impossible
 573         MESSAGE("uname failed assuming no TLS support (errno=%d)\n", errno);
 574         kernel_supports_tls = false;
 575       } else if (strcasecmp(buf.sysname, "linux") == 0) {
 576         // The linux case: the first kernel to support TLS was 2.6.0
 577         if (buf.release[0] < '2' && buf.release[1] == '.')    // 0.x or 1.x
 578           kernel_supports_tls = false;
 579         else if (buf.release[0] == '2' && buf.release[1] == '.' &&
 580                  buf.release[2] >= '0' && buf.release[2] < '6' &&
 581                  buf.release[3] == '.')                       // 2.0 - 2.5
 582           kernel_supports_tls = false;
 583         else
 584           kernel_supports_tls = true;
 585       } else {        // some other kernel, we'll be optimisitic
 586         kernel_supports_tls = true;
 587       }
 588       // TODO(csilvers): VLOG(1) the tls status once we support RAW_VLOG
 589     }
 590 #  endif  // HAVE_DECL_UNAME
 591 #endif    // HAVE_TLS
 592
 593 // __THROW is defined in glibc systems.  It means, counter-intuitively,
 594 // "This function will never throw an exception."  It's an optional
 595 // optimization tool, but we may need to use it to match glibc prototypes.
 596 #ifndef __THROW    // I guess we're not on a glibc system
 597 # define __THROW   // __THROW is just an optimization, so ok to make it ""
 598 #endif
 599
 600 //-------------------------------------------------------------------
 601 // Configuration
 602 //-------------------------------------------------------------------
 603
 604 // Not all possible combinations of the following parameters make
 605 // sense.  In particular, if kMaxSize increases, you may have to
 606 // increase kNumClasses as well.
 607 static const size_t kPageShift  = 12;
 608 static const size_t kPageSize   = 1 << kPageShift;
 609 static const size_t kMaxSize    = 8u * kPageSize;
 610 static const size_t kAlignShift = 3;
 611 static const size_t kAlignment  = 1 << kAlignShift;
 612 static const size_t kNumClasses = 68;
 613
 614 // Allocates a big block of memory for the pagemap once we reach more than
 615 // 128MB
 616 static const size_t kPageMapBigAllocationThreshold = 128 << 20;
 617
 618 // Minimum number of pages to fetch from system at a time.  Must be
 619 // significantly bigger than kPageSize to amortize system-call
 620 // overhead, and also to reduce external fragementation.  Also, we
 621 // should keep this value big because various incarnations of Linux
 622 // have small limits on the number of mmap() regions per
 623 // address-space.
 624 static const size_t kMinSystemAlloc = 1 << (20 - kPageShift);
 625
 626 // Number of objects to move between a per-thread list and a central
 627 // list in one shot.  We want this to be not too small so we can
 628 // amortize the lock overhead for accessing the central list.  Making
 629 // it too big may temporarily cause unnecessary memory wastage in the
 630 // per-thread free list until the scavenger cleans up the list.
 631 static int num_objects_to_move[kNumClasses];
 632
 633 // Maximum length we allow a per-thread free-list to have before we
 634 // move objects from it into the corresponding central free-list.  We
 635 // want this big to avoid locking the central free-list too often.  It
 636 // should not hurt to make this list somewhat big because the
 637 // scavenging code will shrink it down when its contents are not in use.
 638 static const int kMaxFreeListLength = 256;
 639
 640 // Lower and upper bounds on the per-thread cache sizes
 641 static const size_t kMinThreadCacheSize = kMaxSize * 2;
 642 static const size_t kMaxThreadCacheSize = 2 << 20;
 643
 644 // Default bound on the total amount of thread caches
 645 static const size_t kDefaultOverallThreadCacheSize = 16 << 20;
 646
 647 // For all span-lengths < kMaxPages we keep an exact-size list.
 648 // REQUIRED: kMaxPages >= kMinSystemAlloc;
 649 static const size_t kMaxPages = kMinSystemAlloc;
 650
 651 /* The smallest prime > 2^n */
 652 static int primes_list[] = {
 653     // Small values might cause high rates of sampling
 654     // and hence commented out.
 655     // 2, 5, 11, 17, 37, 67, 131, 257,
 656     // 521, 1031, 2053, 4099, 8209, 16411,
 657     32771, 65537, 131101, 262147, 524309, 1048583,
 658     2097169, 4194319, 8388617, 16777259, 33554467 };
 659
 660 // Twice the approximate gap between sampling actions.
 661 // I.e., we take one sample approximately once every
 662 //      tcmalloc_sample_parameter/2
 663 // bytes of allocation, i.e., ~ once every 128KB.
 664 // Must be a prime number.
 665 #ifdef NO_TCMALLOC_SAMPLES
 666 DEFINE_int64(tcmalloc_sample_parameter, 0,
 667              "Unused: code is compiled with NO_TCMALLOC_SAMPLES");
 668 static size_t sample_period = 0;
 669 #else
 670 DEFINE_int64(tcmalloc_sample_parameter, 262147,
 671          "Twice the approximate gap between sampling actions."
 672          " Must be a prime number. Otherwise will be rounded up to a "
 673          " larger prime number");
 674 static size_t sample_period = 262147;
 675 #endif
 676
 677 // Protects sample_period above
 678 static SpinLock sample_period_lock = SPINLOCK_INITIALIZER;
 679
 680 // Parameters for controlling how fast memory is returned to the OS.
 681
 682 DEFINE_double(tcmalloc_release_rate, 1,
 683               "Rate at which we release unused memory to the system.  "
 684               "Zero means we never release memory back to the system.  "
 685               "Increase this flag to return memory faster; decrease it "
 686               "to return memory slower.  Reasonable rates are in the "
 687               "range [0,10]");
 688
 689 //-------------------------------------------------------------------
 690 // Mapping from size to size_class and vice versa
 691 //-------------------------------------------------------------------
 692
 693 // Sizes <= 1024 have an alignment >= 8.  So for such sizes we have an
 694 // array indexed by ceil(size/8).  Sizes > 1024 have an alignment >= 128.
 695 // So for these larger sizes we have an array indexed by ceil(size/128).
 696 //
 697 // We flatten both logical arrays into one physical array and use
 698 // arithmetic to compute an appropriate index.  The constants used by
 699 // ClassIndex() were selected to make the flattening work.
 700 //
 701 // Examples:
 702 //   Size       Expression                      Index
 703 //   -------------------------------------------------------
 704 //   0          (0 + 7) / 8                     0
 705 //   1          (1 + 7) / 8                     1
 706 //   ...
 707 //   1024       (1024 + 7) / 8                  128
 708 //   1025       (1025 + 127 + (120<<7)) / 128   129
 709 //   ...
 710 //   32768      (32768 + 127 + (120<<7)) / 128  376
 711 static const size_t kMaxSmallSize = 1024;
 712 static const int shift_amount[2] = { 3, 7 };  // For divides by 8 or 128
 713 static const int add_amount[2] = { 7, 127 + (120 << 7) };
 714 static unsigned char class_array[377];
 715
 716 // Compute index of the class_array[] entry for a given size
 717 static inline int ClassIndex(size_t s) {
 718   const int i = (s > kMaxSmallSize);
 719   return static_cast<int>((s + add_amount[i]) >> shift_amount[i]);
 720 }
 721
 722 // Mapping from size class to max size storable in that class
 723 static size_t class_to_size[kNumClasses];
 724
 725 // Mapping from size class to number of pages to allocate at a time
 726 static size_t class_to_pages[kNumClasses];
 727
 728 // TransferCache is used to cache transfers of num_objects_to_move[size_class]
 729 // back and forth between thread caches and the central cache for a given size
 730 // class.
 731 struct TCEntry {
 732   void *head;  // Head of chain of objects.
 733   void *tail;  // Tail of chain of objects.
 734 };
 735 // A central cache freelist can have anywhere from 0 to kNumTransferEntries
 736 // slots to put link list chains into.  To keep memory usage bounded the total
 737 // number of TCEntries across size classes is fixed.  Currently each size
 738 // class is initially given one TCEntry which also means that the maximum any
 739 // one class can have is kNumClasses.
 740 static const int kNumTransferEntries = kNumClasses;
 741
 742 // Note: the following only works for "n"s that fit in 32-bits, but
 743 // that is fine since we only use it for small sizes.
 744 static inline int LgFloor(size_t n) {
 745   int log = 0;
 746   for (int i = 4; i >= 0; --i) {
 747     int shift = (1 << i);
 748     size_t x = n >> shift;
 749     if (x != 0) {
 750       n = x;
 751       log += shift;
 752     }
 753   }
 754   ASSERT(n == 1);
 755   return log;
 756 }
 757
 758 // Some very basic linked list functions for dealing with using void * as
 759 // storage.
 760
 761 static inline void *SLL_Next(void *t) {
 762   return *(reinterpret_cast<void**>(t));
 763 }
 764
 765 static inline void SLL_SetNext(void *t, void *n) {
 766   *(reinterpret_cast<void**>(t)) = n;
 767 }
 768
 769 static inline void SLL_Push(void **list, void *element) {
 770   SLL_SetNext(element, *list);
 771   *list = element;
 772 }
 773
 774 static inline void *SLL_Pop(void **list) {
 775   void *result = *list;
 776   *list = SLL_Next(*list);
 777   return result;
 778 }
 779
 780
 781 // Remove N elements from a linked list to which head points.  head will be
 782 // modified to point to the new head.  start and end will point to the first
 783 // and last nodes of the range.  Note that end will point to NULL after this
 784 // function is called.
 785 static inline void SLL_PopRange(void **head, int N, void **start, void **end) {
 786   if (N == 0) {
 787     *start = NULL;
 788     *end = NULL;
 789     return;
 790   }
 791
 792   void *tmp = *head;
 793   for (int i = 1; i < N; ++i) {
 794     tmp = SLL_Next(tmp);
 795   }
 796
 797   *start = *head;
 798   *end = tmp;
 799   *head = SLL_Next(tmp);
 800   // Unlink range from list.
 801   SLL_SetNext(tmp, NULL);
 802 }
 803
 804 static inline void SLL_PushRange(void **head, void *start, void *end) {
 805   if (!start) return;
 806   SLL_SetNext(end, *head);
 807   *head = start;
 808 }
 809
 810 static inline size_t SLL_Size(void *head) {
 811   int count = 0;
 812   while (head) {
 813     count++;
 814     head = SLL_Next(head);
 815   }
 816   return count;
 817 }
 818
 819 // Setup helper functions.
 820
 821 static ALWAYS_INLINE size_t SizeClass(size_t size) {
 822   return class_array[ClassIndex(size)];
 823 }
 824
 825 // Get the byte-size for a specified class
 826 static ALWAYS_INLINE size_t ByteSizeForClass(size_t cl) {
 827   return class_to_size[cl];
 828 }
 829 static int NumMoveSize(size_t size) {
 830   if (size == 0) return 0;
 831   // Use approx 64k transfers between thread and central caches.
 832   int num = static_cast<int>(64.0 * 1024.0 / size);
 833   if (num < 2) num = 2;
 834   // Clamp well below kMaxFreeListLength to avoid ping pong between central
 835   // and thread caches.
 836   if (num > static_cast<int>(0.8 * kMaxFreeListLength))
 837     num = static_cast<int>(0.8 * kMaxFreeListLength);
 838
 839   // Also, avoid bringing in too many objects into small object free
 840   // lists.  There are lots of such lists, and if we allow each one to
 841   // fetch too many at a time, we end up having to scavenge too often
 842   // (especially when there are lots of threads and each thread gets a
 843   // small allowance for its thread cache).
 844   //
 845   // TODO: Make thread cache free list sizes dynamic so that we do not
 846   // have to equally divide a fixed resource amongst lots of threads.
 847   if (num > 32) num = 32;
 848
 849   return num;
 850 }
 851
 852 // Initialize the mapping arrays
 853 static void InitSizeClasses() {
 854   // Do some sanity checking on add_amount[]/shift_amount[]/class_array[]
 855   if (ClassIndex(0) < 0) {
 856     MESSAGE("Invalid class index %d for size 0\n", ClassIndex(0));
 857     CRASH();
 858   }
 859   if (static_cast<size_t>(ClassIndex(kMaxSize)) >= sizeof(class_array)) {
 860     MESSAGE("Invalid class index %d for kMaxSize\n", ClassIndex(kMaxSize));
 861     CRASH();
 862   }
 863
 864   // Compute the size classes we want to use
 865   size_t sc = 1;   // Next size class to assign
 866   unsigned char alignshift = kAlignShift;
 867   int last_lg = -1;
 868   for (size_t size = kAlignment; size <= kMaxSize; size += (1 << alignshift)) {
 869     int lg = LgFloor(size);
 870     if (lg > last_lg) {
 871       // Increase alignment every so often.
 872       //
 873       // Since we double the alignment every time size doubles and
 874       // size >= 128, this means that space wasted due to alignment is
 875       // at most 16/128 i.e., 12.5%.  Plus we cap the alignment at 256
 876       // bytes, so the space wasted as a percentage starts falling for
 877       // sizes > 2K.
 878       if ((lg >= 7) && (alignshift < 8)) {
 879         alignshift++;
 880       }
 881       last_lg = lg;
 882     }
 883
 884     // Allocate enough pages so leftover is less than 1/8 of total.
 885     // This bounds wasted space to at most 12.5%.
 886     size_t psize = kPageSize;
 887     while ((psize % size) > (psize >> 3)) {
 888       psize += kPageSize;
 889     }
 890     const size_t my_pages = psize >> kPageShift;
 891
 892     if (sc > 1 && my_pages == class_to_pages[sc-1]) {
 893       // See if we can merge this into the previous class without
 894       // increasing the fragmentation of the previous class.
 895       const size_t my_objects = (my_pages << kPageShift) / size;
 896       const size_t prev_objects = (class_to_pages[sc-1] << kPageShift)
 897                                   / class_to_size[sc-1];
 898       if (my_objects == prev_objects) {
 899         // Adjust last class to include this size
 900         class_to_size[sc-1] = size;
 901         continue;
 902       }
 903     }
 904
 905     // Add new class
 906     class_to_pages[sc] = my_pages;
 907     class_to_size[sc] = size;
 908     sc++;
 909   }
 910   if (sc != kNumClasses) {
 911     MESSAGE("wrong number of size classes: found %" PRIuS " instead of %d\n",
 912             sc, int(kNumClasses));
 913     CRASH();
 914   }
 915
 916   // Initialize the mapping arrays
 917   int next_size = 0;
 918   for (unsigned char c = 1; c < kNumClasses; c++) {
 919     const size_t max_size_in_class = class_to_size[c];
 920     for (size_t s = next_size; s <= max_size_in_class; s += kAlignment) {
 921       class_array[ClassIndex(s)] = c;
 922     }
 923     next_size = static_cast<int>(max_size_in_class + kAlignment);
 924   }
 925
 926   // Double-check sizes just to be safe
 927   for (size_t size = 0; size <= kMaxSize; size++) {
 928     const size_t sc = SizeClass(size);
 929     if (sc == 0) {
 930       MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size);
 931       CRASH();
 932     }
 933     if (sc > 1 && size <= class_to_size[sc-1]) {
 934       MESSAGE("Allocating unnecessarily large class %" PRIuS " for %" PRIuS
 935               "\n", sc, size);
 936       CRASH();
 937     }
 938     if (sc >= kNumClasses) {
 939       MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size);
 940       CRASH();
 941     }
 942     const size_t s = class_to_size[sc];
 943     if (size > s) {
 944      MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc);
 945       CRASH();
 946     }
 947     if (s == 0) {
 948       MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc);
 949       CRASH();
 950     }
 951   }
 952
 953   // Initialize the num_objects_to_move array.
 954   for (size_t cl = 1; cl  < kNumClasses; ++cl) {
 955     num_objects_to_move[cl] = NumMoveSize(ByteSizeForClass(cl));
 956   }
 957
 958 #ifndef WTF_CHANGES
 959   if (false) {
 960     // Dump class sizes and maximum external wastage per size class
 961     for (size_t cl = 1; cl  < kNumClasses; ++cl) {
 962       const int alloc_size = class_to_pages[cl] << kPageShift;
 963       const int alloc_objs = alloc_size / class_to_size[cl];
 964       const int min_used = (class_to_size[cl-1] + 1) * alloc_objs;
 965       const int max_waste = alloc_size - min_used;
 966       MESSAGE("SC %3d [ %8d .. %8d ] from %8d ; %2.0f%% maxwaste\n",
 967               int(cl),
 968               int(class_to_size[cl-1] + 1),
 969               int(class_to_size[cl]),
 970               int(class_to_pages[cl] << kPageShift),
 971               max_waste * 100.0 / alloc_size
 972               );
 973     }
 974   }
 975 #endif
 976 }
 977
 978 // -------------------------------------------------------------------------
 979 // Simple allocator for objects of a specified type.  External locking
 980 // is required before accessing one of these objects.
 981 // -------------------------------------------------------------------------
 982
 983 // Metadata allocator -- keeps stats about how many bytes allocated
 984 static uint64_t metadata_system_bytes = 0;
 985 static void* MetaDataAlloc(size_t bytes) {
 986   void* result = TCMalloc_SystemAlloc(bytes, 0);
 987   if (result != NULL) {
 988     metadata_system_bytes += bytes;
 989   }
 990   return result;
 991 }
 992
 993 template <class T>
 994 class PageHeapAllocator {
 995  private:
 996   // How much to allocate from system at a time
 997   static const size_t kAllocIncrement = 32 << 10;
 998
 999   // Aligned size of T
1000   static const size_t kAlignedSize
1001   = (((sizeof(T) + kAlignment - 1) / kAlignment) * kAlignment);
1002
1003   // Free area from which to carve new objects
1004   char* free_area_;
1005   size_t free_avail_;
1006
1007   // Linked list of all regions allocated by this allocator
1008   void* allocated_regions_;
1009
1010   // Free list of already carved objects
1011   void* free_list_;
1012
1013   // Number of allocated but unfreed objects
1014   int inuse_;
1015
1016  public:
1017   void Init() {
1018     ASSERT(kAlignedSize <= kAllocIncrement);
1019     inuse_ = 0;
1020     allocated_regions_ = 0;
1021     free_area_ = NULL;
1022     free_avail_ = 0;
1023     free_list_ = NULL;
1024   }
1025
1026   T* New() {
1027     // Consult free list
1028     void* result;
1029     if (free_list_ != NULL) {
1030       result = free_list_;
1031       free_list_ = *(reinterpret_cast<void**>(result));
1032     } else {
1033       if (free_avail_ < kAlignedSize) {
1034         // Need more room
1035         char* new_allocation = reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement));
1036         if (!new_allocation)
1037           CRASH();
1038
1039         *reinterpret_cast_ptr<void**>(new_allocation) = allocated_regions_;
1040         allocated_regions_ = new_allocation;
1041         free_area_ = new_allocation + kAlignedSize;
1042         free_avail_ = kAllocIncrement - kAlignedSize;
1043       }
1044       result = free_area_;
1045       free_area_ += kAlignedSize;
1046       free_avail_ -= kAlignedSize;
1047     }
1048     inuse_++;
1049     return reinterpret_cast<T*>(result);
1050   }
1051
1052   void Delete(T* p) {
1053     *(reinterpret_cast<void**>(p)) = free_list_;
1054     free_list_ = p;
1055     inuse_--;
1056   }
1057
1058   int inuse() const { return inuse_; }
1059
1060 #if defined(WTF_CHANGES) && OS(DARWIN)
1061   template <class Recorder>
1062   void recordAdministrativeRegions(Recorder& recorder, const RemoteMemoryReader& reader)
1063   {
1064       vm_address_t adminAllocation = reinterpret_cast<vm_address_t>(allocated_regions_);
1065       while (adminAllocation) {
1066           recorder.recordRegion(adminAllocation, kAllocIncrement);
1067           adminAllocation = *reader(reinterpret_cast<vm_address_t*>(adminAllocation));
1068       }
1069   }
1070 #endif
1071 };
1072
1073 // -------------------------------------------------------------------------
1074 // Span - a contiguous run of pages
1075 // -------------------------------------------------------------------------
1076
1077 // Type that can hold a page number
1078 typedef uintptr_t PageID;
1079
1080 // Type that can hold the length of a run of pages
1081 typedef uintptr_t Length;
1082
1083 static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift;
1084
1085 // Convert byte size into pages.  This won't overflow, but may return
1086 // an unreasonably large value if bytes is huge enough.
1087 static inline Length pages(size_t bytes) {
1088   return (bytes >> kPageShift) +
1089       ((bytes & (kPageSize - 1)) > 0 ? 1 : 0);
1090 }
1091
1092 // Convert a user size into the number of bytes that will actually be
1093 // allocated
1094 static size_t AllocationSize(size_t bytes) {
1095   if (bytes > kMaxSize) {
1096     // Large object: we allocate an integral number of pages
1097     ASSERT(bytes <= (kMaxValidPages << kPageShift));
1098     return pages(bytes) << kPageShift;
1099   } else {
1100     // Small object: find the size class to which it belongs
1101     return ByteSizeForClass(SizeClass(bytes));
1102   }
1103 }
1104
1105 // Information kept for a span (a contiguous run of pages).
1106 struct Span {
1107   PageID        start;          // Starting page number
1108   Length        length;         // Number of pages in span
1109   Span*         next;           // Used when in link list
1110   Span*         prev;           // Used when in link list
1111   void*         objects;        // Linked list of free objects
1112   unsigned int  free : 1;       // Is the span free
1113 #ifndef NO_TCMALLOC_SAMPLES
1114   unsigned int  sample : 1;     // Sampled object?
1115 #endif
1116   unsigned int  sizeclass : 8;  // Size-class for small objects (or 0)
1117   unsigned int  refcount : 11;  // Number of non-free objects
1118   bool decommitted : 1;
1119
1120 #undef SPAN_HISTORY
1121 #ifdef SPAN_HISTORY
1122   // For debugging, we can keep a log events per span
1123   int nexthistory;
1124   char history[64];
1125   int value[64];
1126 #endif
1127 };
1128
1129 #define ASSERT_SPAN_COMMITTED(span) ASSERT(!span->decommitted)
1130
1131 #ifdef SPAN_HISTORY
1132 void Event(Span* span, char op, int v = 0) {
1133   span->history[span->nexthistory] = op;
1134   span->value[span->nexthistory] = v;
1135   span->nexthistory++;
1136   if (span->nexthistory == sizeof(span->history)) span->nexthistory = 0;
1137 }
1138 #else
1139 #define Event(s,o,v) ((void) 0)
1140 #endif
1141
1142 // Allocator/deallocator for spans
1143 static PageHeapAllocator<Span> span_allocator;
1144 static Span* NewSpan(PageID p, Length len) {
1145   Span* result = span_allocator.New();
1146   memset(result, 0, sizeof(*result));
1147   result->start = p;
1148   result->length = len;
1149 #ifdef SPAN_HISTORY
1150   result->nexthistory = 0;
1151 #endif
1152   return result;
1153 }
1154
1155 static inline void DeleteSpan(Span* span) {
1156 #ifndef NDEBUG
1157   // In debug mode, trash the contents of deleted Spans
1158   memset(span, 0x3f, sizeof(*span));
1159 #endif
1160   span_allocator.Delete(span);
1161 }
1162
1163 // -------------------------------------------------------------------------
1164 // Doubly linked list of spans.
1165 // -------------------------------------------------------------------------
1166
1167 static inline void DLL_Init(Span* list) {
1168   list->next = list;
1169   list->prev = list;
1170 }
1171
1172 static inline void DLL_Remove(Span* span) {
1173   span->prev->next = span->next;
1174   span->next->prev = span->prev;
1175   span->prev = NULL;
1176   span->next = NULL;
1177 }
1178
1179 static ALWAYS_INLINE bool DLL_IsEmpty(const Span* list) {
1180   return list->next == list;
1181 }
1182
1183 static int DLL_Length(const Span* list) {
1184   int result = 0;
1185   for (Span* s = list->next; s != list; s = s->next) {
1186     result++;
1187   }
1188   return result;
1189 }
1190
1191 #if 0 /* Not needed at the moment -- causes compiler warnings if not used */
1192 static void DLL_Print(const char* label, const Span* list) {
1193   MESSAGE("%-10s %p:", label, list);
1194   for (const Span* s = list->next; s != list; s = s->next) {
1195     MESSAGE(" <%p,%u,%u>", s, s->start, s->length);
1196   }
1197   MESSAGE("\n");
1198 }
1199 #endif
1200
1201 static inline void DLL_Prepend(Span* list, Span* span) {
1202   ASSERT(span->next == NULL);
1203   ASSERT(span->prev == NULL);
1204   span->next = list->next;
1205   span->prev = list;
1206   list->next->prev = span;
1207   list->next = span;
1208 }
1209
1210 // -------------------------------------------------------------------------
1211 // Stack traces kept for sampled allocations
1212 //   The following state is protected by pageheap_lock_.
1213 // -------------------------------------------------------------------------
1214
1215 // size/depth are made the same size as a pointer so that some generic
1216 // code below can conveniently cast them back and forth to void*.
1217 static const int kMaxStackDepth = 31;
1218 struct StackTrace {
1219   uintptr_t size;          // Size of object
1220   uintptr_t depth;         // Number of PC values stored in array below
1221   void*     stack[kMaxStackDepth];
1222 };
1223 static PageHeapAllocator<StackTrace> stacktrace_allocator;
1224 static Span sampled_objects;
1225
1226 // -------------------------------------------------------------------------
1227 // Map from page-id to per-page data
1228 // -------------------------------------------------------------------------
1229
1230 // We use PageMap2<> for 32-bit and PageMap3<> for 64-bit machines.
1231 // We also use a simple one-level cache for hot PageID-to-sizeclass mappings,
1232 // because sometimes the sizeclass is all the information we need.
1233
1234 // Selector class -- general selector uses 3-level map
1235 template <int BITS> class MapSelector {
1236  public:
1237   typedef TCMalloc_PageMap3<BITS-kPageShift> Type;
1238   typedef PackedCache<BITS, uint64_t> CacheType;
1239 };
1240
1241 #if defined(WTF_CHANGES)
1242 #if CPU(X86_64)
1243 // On all known X86-64 platforms, the upper 16 bits are always unused and therefore
1244 // can be excluded from the PageMap key.
1245 // See http://en.wikipedia.org/wiki/X86-64#Virtual_address_space_details
1246
1247 static const size_t kBitsUnusedOn64Bit = 16;
1248 #else
1249 static const size_t kBitsUnusedOn64Bit = 0;
1250 #endif
1251
1252 // A three-level map for 64-bit machines
1253 template <> class MapSelector<64> {
1254  public:
1255   typedef TCMalloc_PageMap3<64 - kPageShift - kBitsUnusedOn64Bit> Type;
1256   typedef PackedCache<64, uint64_t> CacheType;
1257 };
1258 #endif
1259
1260 // A two-level map for 32-bit machines
1261 template <> class MapSelector<32> {
1262  public:
1263   typedef TCMalloc_PageMap2<32 - kPageShift> Type;
1264   typedef PackedCache<32 - kPageShift, uint16_t> CacheType;
1265 };
1266
1267 // -------------------------------------------------------------------------
1268 // Page-level allocator
1269 //  * Eager coalescing
1270 //
1271 // Heap for page-level allocation.  We allow allocating and freeing a
1272 // contiguous runs of pages (called a "span").
1273 // -------------------------------------------------------------------------
1274
1275 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1276 // The page heap maintains a free list for spans that are no longer in use by
1277 // the central cache or any thread caches. We use a background thread to
1278 // periodically scan the free list and release a percentage of it back to the OS.
1279
1280 // If free_committed_pages_ exceeds kMinimumFreeCommittedPageCount, the
1281 // background thread:
1282 //     - wakes up
1283 //     - pauses for kScavengeDelayInSeconds
1284 //     - returns to the OS a percentage of the memory that remained unused during
1285 //       that pause (kScavengePercentage * min_free_committed_pages_since_last_scavenge_)
1286 // The goal of this strategy is to reduce memory pressure in a timely fashion
1287 // while avoiding thrashing the OS allocator.
1288
1289 // Time delay before the page heap scavenger will consider returning pages to
1290 // the OS.
1291 static const int kScavengeDelayInSeconds = 2;
1292
1293 // Approximate percentage of free committed pages to return to the OS in one
1294 // scavenge.
1295 static const float kScavengePercentage = .5f;
1296
1297 // number of span lists to keep spans in when memory is returned.
1298 static const int kMinSpanListsWithSpans = 32;
1299
1300 // Number of free committed pages that we want to keep around.  The minimum number of pages used when there
1301 // is 1 span in each of the first kMinSpanListsWithSpans spanlists.  Currently 528 pages.
1302 static const size_t kMinimumFreeCommittedPageCount = kMinSpanListsWithSpans * ((1.0f+kMinSpanListsWithSpans) / 2.0f);
1303
1304 #endif
1305
1306 class TCMalloc_PageHeap {
1307  public:
1308   void init();
1309
1310   // Allocate a run of "n" pages.  Returns zero if out of memory.
1311   Span* New(Length n);
1312
1313   // Delete the span "[p, p+n-1]".
1314   // REQUIRES: span was returned by earlier call to New() and
1315   //           has not yet been deleted.
1316   void Delete(Span* span);
1317
1318   // Mark an allocated span as being used for small objects of the
1319   // specified size-class.
1320   // REQUIRES: span was returned by an earlier call to New()
1321   //           and has not yet been deleted.
1322   void RegisterSizeClass(Span* span, size_t sc);
1323
1324   // Split an allocated span into two spans: one of length "n" pages
1325   // followed by another span of length "span->length - n" pages.
1326   // Modifies "*span" to point to the first span of length "n" pages.
1327   // Returns a pointer to the second span.
1328   //
1329   // REQUIRES: "0 < n < span->length"
1330   // REQUIRES: !span->free
1331   // REQUIRES: span->sizeclass == 0
1332   Span* Split(Span* span, Length n);
1333
1334   // Return the descriptor for the specified page.
1335   inline Span* GetDescriptor(PageID p) const {
1336     return reinterpret_cast<Span*>(pagemap_.get(p));
1337   }
1338
1339 #ifdef WTF_CHANGES
1340   inline Span* GetDescriptorEnsureSafe(PageID p)
1341   {
1342       pagemap_.Ensure(p, 1);
1343       return GetDescriptor(p);
1344   }
1345
1346   size_t ReturnedBytes() const;
1347 #endif
1348
1349   // Dump state to stderr
1350 #ifndef WTF_CHANGES
1351   void Dump(TCMalloc_Printer* out);
1352 #endif
1353
1354   // Return number of bytes allocated from system
1355   inline uint64_t SystemBytes() const { return system_bytes_; }
1356
1357   // Return number of free bytes in heap
1358   uint64_t FreeBytes() const {
1359     return (static_cast<uint64_t>(free_pages_) << kPageShift);
1360   }
1361
1362   bool Check();
1363   bool CheckList(Span* list, Length min_pages, Length max_pages);
1364
1365   // Release all pages on the free list for reuse by the OS:
1366   void ReleaseFreePages();
1367
1368   // Return 0 if we have no information, or else the correct sizeclass for p.
1369   // Reads and writes to pagemap_cache_ do not require locking.
1370   // The entries are 64 bits on 64-bit hardware and 16 bits on
1371   // 32-bit hardware, and we don't mind raciness as long as each read of
1372   // an entry yields a valid entry, not a partially updated entry.
1373   size_t GetSizeClassIfCached(PageID p) const {
1374     return pagemap_cache_.GetOrDefault(p, 0);
1375   }
1376   void CacheSizeClass(PageID p, size_t cl) const { pagemap_cache_.Put(p, cl); }
1377
1378  private:
1379   // Pick the appropriate map and cache types based on pointer size
1380   typedef MapSelector<8*sizeof(uintptr_t)>::Type PageMap;
1381   typedef MapSelector<8*sizeof(uintptr_t)>::CacheType PageMapCache;
1382   PageMap pagemap_;
1383   mutable PageMapCache pagemap_cache_;
1384
1385   // We segregate spans of a given size into two circular linked
1386   // lists: one for normal spans, and one for spans whose memory
1387   // has been returned to the system.
1388   struct SpanList {
1389     Span        normal;
1390     Span        returned;
1391   };
1392
1393   // List of free spans of length >= kMaxPages
1394   SpanList large_;
1395
1396   // Array mapping from span length to a doubly linked list of free spans
1397   SpanList free_[kMaxPages];
1398
1399   // Number of pages kept in free lists
1400   uintptr_t free_pages_;
1401
1402   // Bytes allocated from system
1403   uint64_t system_bytes_;
1404
1405 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1406   // Number of pages kept in free lists that are still committed.
1407   Length free_committed_pages_;
1408
1409   // Minimum number of free committed pages since last scavenge. (Can be 0 if
1410   // we've committed new pages since the last scavenge.)
1411   Length min_free_committed_pages_since_last_scavenge_;
1412 #endif
1413
1414   bool GrowHeap(Length n);
1415
1416   // REQUIRES   span->length >= n
1417   // Remove span from its free list, and move any leftover part of
1418   // span into appropriate free lists.  Also update "span" to have
1419   // length exactly "n" and mark it as non-free so it can be returned
1420   // to the client.
1421   //
1422   // "released" is true iff "span" was found on a "returned" list.
1423   void Carve(Span* span, Length n, bool released);
1424
1425   void RecordSpan(Span* span) {
1426     pagemap_.set(span->start, span);
1427     if (span->length > 1) {
1428       pagemap_.set(span->start + span->length - 1, span);
1429     }
1430   }
1431
1432     // Allocate a large span of length == n.  If successful, returns a
1433   // span of exactly the specified length.  Else, returns NULL.
1434   Span* AllocLarge(Length n);
1435
1436 #if !USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1437   // Incrementally release some memory to the system.
1438   // IncrementalScavenge(n) is called whenever n pages are freed.
1439   void IncrementalScavenge(Length n);
1440 #endif
1441
1442   // Number of pages to deallocate before doing more scavenging
1443   int64_t scavenge_counter_;
1444
1445   // Index of last free list we scavenged
1446   size_t scavenge_index_;
1447
1448 #if defined(WTF_CHANGES) && OS(DARWIN)
1449   friend class FastMallocZone;
1450 #endif
1451
1452 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1453   void initializeScavenger();
1454   ALWAYS_INLINE void signalScavenger();
1455   void scavenge();
1456   ALWAYS_INLINE bool shouldScavenge() const;
1457
1458 #if HAVE(DISPATCH_H) || OS(WINDOWS)
1459   void periodicScavenge();
1460   ALWAYS_INLINE bool isScavengerSuspended();
1461   ALWAYS_INLINE void scheduleScavenger();
1462   ALWAYS_INLINE void rescheduleScavenger();
1463   ALWAYS_INLINE void suspendScavenger();
1464 #endif
1465
1466 #if HAVE(DISPATCH_H)
1467   dispatch_queue_t m_scavengeQueue;
1468   dispatch_source_t m_scavengeTimer;
1469   bool m_scavengingSuspended;
1470 #elif OS(WINDOWS)
1471   static void CALLBACK scavengerTimerFired(void*, BOOLEAN);
1472   HANDLE m_scavengeQueueTimer;
1473 #else
1474   static NO_RETURN_WITH_VALUE void* runScavengerThread(void*);
1475   NO_RETURN void scavengerThread();
1476
1477   // Keeps track of whether the background thread is actively scavenging memory every kScavengeDelayInSeconds, or
1478   // it's blocked waiting for more pages to be deleted.
1479   bool m_scavengeThreadActive;
1480
1481   pthread_mutex_t m_scavengeMutex;
1482   pthread_cond_t m_scavengeCondition;
1483 #endif
1484
1485 #endif  // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1486 };
1487
1488 void TCMalloc_PageHeap::init()
1489 {
1490   pagemap_.init(MetaDataAlloc);
1491   pagemap_cache_ = PageMapCache(0);
1492   free_pages_ = 0;
1493   system_bytes_ = 0;
1494
1495 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1496   free_committed_pages_ = 0;
1497   min_free_committed_pages_since_last_scavenge_ = 0;
1498 #endif  // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1499
1500   scavenge_counter_ = 0;
1501   // Start scavenging at kMaxPages list
1502   scavenge_index_ = kMaxPages-1;
1503   COMPILE_ASSERT(kNumClasses <= (1 << PageMapCache::kValuebits), valuebits);
1504   DLL_Init(&large_.normal);
1505   DLL_Init(&large_.returned);
1506   for (size_t i = 0; i < kMaxPages; i++) {
1507     DLL_Init(&free_[i].normal);
1508     DLL_Init(&free_[i].returned);
1509   }
1510
1511 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1512   initializeScavenger();
1513 #endif  // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1514 }
1515
1516 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1517
1518 #if HAVE(DISPATCH_H)
1519
1520 void TCMalloc_PageHeap::initializeScavenger()
1521 {
1522     m_scavengeQueue = dispatch_queue_create("com.apple.JavaScriptCore.FastMallocSavenger", NULL);
1523     m_scavengeTimer = dispatch_source_create(DISPATCH_SOURCE_TYPE_TIMER, 0, 0, m_scavengeQueue);
1524     dispatch_time_t startTime = dispatch_time(DISPATCH_TIME_NOW, kScavengeDelayInSeconds * NSEC_PER_SEC);
1525     dispatch_source_set_timer(m_scavengeTimer, startTime, kScavengeDelayInSeconds * NSEC_PER_SEC, 1000 * NSEC_PER_USEC);
1526     dispatch_source_set_event_handler(m_scavengeTimer, ^{ periodicScavenge(); });
1527     m_scavengingSuspended = true;
1528 }
1529
1530 ALWAYS_INLINE bool TCMalloc_PageHeap::isScavengerSuspended()
1531 {
1532     ASSERT(IsHeld(pageheap_lock));
1533     return m_scavengingSuspended;
1534 }
1535
1536 ALWAYS_INLINE void TCMalloc_PageHeap::scheduleScavenger()
1537 {
1538     ASSERT(IsHeld(pageheap_lock));
1539     m_scavengingSuspended = false;
1540     dispatch_resume(m_scavengeTimer);
1541 }
1542
1543 ALWAYS_INLINE void TCMalloc_PageHeap::rescheduleScavenger()
1544 {
1545     // Nothing to do here for libdispatch.
1546 }
1547
1548 ALWAYS_INLINE void TCMalloc_PageHeap::suspendScavenger()
1549 {
1550     ASSERT(IsHeld(pageheap_lock));
1551     m_scavengingSuspended = true;
1552     dispatch_suspend(m_scavengeTimer);
1553 }
1554
1555 #elif OS(WINDOWS)
1556
1557 void TCMalloc_PageHeap::scavengerTimerFired(void* context, BOOLEAN)
1558 {
1559     static_cast<TCMalloc_PageHeap*>(context)->periodicScavenge();
1560 }
1561
1562 void TCMalloc_PageHeap::initializeScavenger()
1563 {
1564     m_scavengeQueueTimer = 0;
1565 }
1566
1567 ALWAYS_INLINE bool TCMalloc_PageHeap::isScavengerSuspended()
1568 {
1569     ASSERT(IsHeld(pageheap_lock));
1570     return !m_scavengeQueueTimer;
1571 }
1572
1573 ALWAYS_INLINE void TCMalloc_PageHeap::scheduleScavenger()
1574 {
1575     // We need to use WT_EXECUTEONLYONCE here and reschedule the timer, because
1576     // Windows will fire the timer event even when the function is already running.
1577     ASSERT(IsHeld(pageheap_lock));
1578     CreateTimerQueueTimer(&m_scavengeQueueTimer, 0, scavengerTimerFired, this, kScavengeDelayInSeconds * 1000, 0, WT_EXECUTEONLYONCE);
1579 }
1580
1581 ALWAYS_INLINE void TCMalloc_PageHeap::rescheduleScavenger()
1582 {
1583     // We must delete the timer and create it again, because it is not possible to retrigger a timer on Windows.
1584     suspendScavenger();
1585     scheduleScavenger();
1586 }
1587
1588 ALWAYS_INLINE void TCMalloc_PageHeap::suspendScavenger()
1589 {
1590     ASSERT(IsHeld(pageheap_lock));
1591     HANDLE scavengeQueueTimer = m_scavengeQueueTimer;
1592     m_scavengeQueueTimer = 0;
1593     DeleteTimerQueueTimer(0, scavengeQueueTimer, 0);
1594 }
1595
1596 #else
1597
1598 void TCMalloc_PageHeap::initializeScavenger()
1599 {
1600     // Create a non-recursive mutex.
1601 #if !defined(PTHREAD_MUTEX_NORMAL) || PTHREAD_MUTEX_NORMAL == PTHREAD_MUTEX_DEFAULT
1602     pthread_mutex_init(&m_scavengeMutex, 0);
1603 #else
1604     pthread_mutexattr_t attr;
1605     pthread_mutexattr_init(&attr);
1606     pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_NORMAL);
1607
1608     pthread_mutex_init(&m_scavengeMutex, &attr);
1609
1610     pthread_mutexattr_destroy(&attr);
1611 #endif
1612
1613     pthread_cond_init(&m_scavengeCondition, 0);
1614     m_scavengeThreadActive = true;
1615     pthread_t thread;
1616     pthread_create(&thread, 0, runScavengerThread, this);
1617 }
1618
1619 void* TCMalloc_PageHeap::runScavengerThread(void* context)
1620 {
1621   static_cast<TCMalloc_PageHeap*>(context)->scavengerThread();
1622 #if COMPILER(MSVC)
1623   // Without this, Visual Studio will complain that this method does not return a value.
1624   return 0;
1625 #endif
1626 }
1627
1628 ALWAYS_INLINE void TCMalloc_PageHeap::signalScavenger()
1629 {
1630     // m_scavengeMutex should be held before accessing m_scavengeThreadActive.
1631     ASSERT(pthread_mutex_trylock(m_scavengeMutex));
1632     if (!m_scavengeThreadActive && shouldScavenge())
1633         pthread_cond_signal(&m_scavengeCondition);
1634 }
1635
1636 #endif
1637
1638 void TCMalloc_PageHeap::scavenge()
1639 {
1640     size_t pagesToRelease = min_free_committed_pages_since_last_scavenge_ * kScavengePercentage;
1641     size_t targetPageCount = std::max<size_t>(kMinimumFreeCommittedPageCount, free_committed_pages_ - pagesToRelease);
1642
1643     while (free_committed_pages_ > targetPageCount) {
1644         for (int i = kMaxPages; i > 0 && free_committed_pages_ >= targetPageCount; i--) {
1645             SpanList* slist = (static_cast<size_t>(i) == kMaxPages) ? &large_ : &free_[i];
1646             // If the span size is bigger than kMinSpanListsWithSpans pages return all the spans in the list, else return all but 1 span.
1647             // Return only 50% of a spanlist at a time so spans of size 1 are not the only ones left.
1648             size_t length = DLL_Length(&slist->normal);
1649             size_t numSpansToReturn = (i > kMinSpanListsWithSpans) ? length : length / 2;
1650             for (int j = 0; static_cast<size_t>(j) < numSpansToReturn && !DLL_IsEmpty(&slist->normal) && free_committed_pages_ > targetPageCount; j++) {
1651                 Span* s = slist->normal.prev;
1652                 DLL_Remove(s);
1653                 ASSERT(!s->decommitted);
1654                 if (!s->decommitted) {
1655                     TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift),
1656                                            static_cast<size_t>(s->length << kPageShift));
1657                     ASSERT(free_committed_pages_ >= s->length);
1658                     free_committed_pages_ -= s->length;
1659                     s->decommitted = true;
1660                 }
1661                 DLL_Prepend(&slist->returned, s);
1662             }
1663         }
1664     }
1665
1666     min_free_committed_pages_since_last_scavenge_ = free_committed_pages_;
1667 }
1668
1669 ALWAYS_INLINE bool TCMalloc_PageHeap::shouldScavenge() const
1670 {
1671     return free_committed_pages_ > kMinimumFreeCommittedPageCount;
1672 }
1673
1674 #endif  // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1675
1676 inline Span* TCMalloc_PageHeap::New(Length n) {
1677   ASSERT(Check());
1678   ASSERT(n > 0);
1679
1680   // Find first size >= n that has a non-empty list
1681   for (Length s = n; s < kMaxPages; s++) {
1682     Span* ll = NULL;
1683     bool released = false;
1684     if (!DLL_IsEmpty(&free_[s].normal)) {
1685       // Found normal span
1686       ll = &free_[s].normal;
1687     } else if (!DLL_IsEmpty(&free_[s].returned)) {
1688       // Found returned span; reallocate it
1689       ll = &free_[s].returned;
1690       released = true;
1691     } else {
1692       // Keep looking in larger classes
1693       continue;
1694     }
1695
1696     Span* result = ll->next;
1697     Carve(result, n, released);
1698 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1699     // The newly allocated memory is from a span that's in the normal span list (already committed).  Update the
1700     // free committed pages count.
1701     ASSERT(free_committed_pages_ >= n);
1702     free_committed_pages_ -= n;
1703     if (free_committed_pages_ < min_free_committed_pages_since_last_scavenge_)
1704       min_free_committed_pages_since_last_scavenge_ = free_committed_pages_;
1705 #endif  // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1706     ASSERT(Check());
1707     free_pages_ -= n;
1708     return result;
1709   }
1710
1711   Span* result = AllocLarge(n);
1712   if (result != NULL) {
1713       ASSERT_SPAN_COMMITTED(result);
1714       return result;
1715   }
1716
1717   // Grow the heap and try again
1718   if (!GrowHeap(n)) {
1719     ASSERT(Check());
1720     return NULL;
1721   }
1722
1723   return AllocLarge(n);
1724 }
1725
1726 Span* TCMalloc_PageHeap::AllocLarge(Length n) {
1727   // find the best span (closest to n in size).
1728   // The following loops implements address-ordered best-fit.
1729   bool from_released = false;
1730   Span *best = NULL;
1731
1732   // Search through normal list
1733   for (Span* span = large_.normal.next;
1734        span != &large_.normal;
1735        span = span->next) {
1736     if (span->length >= n) {
1737       if ((best == NULL)
1738           || (span->length < best->length)
1739           || ((span->length == best->length) && (span->start < best->start))) {
1740         best = span;
1741         from_released = false;
1742       }
1743     }
1744   }
1745
1746   // Search through released list in case it has a better fit
1747   for (Span* span = large_.returned.next;
1748        span != &large_.returned;
1749        span = span->next) {
1750     if (span->length >= n) {
1751       if ((best == NULL)
1752           || (span->length < best->length)
1753           || ((span->length == best->length) && (span->start < best->start))) {
1754         best = span;
1755         from_released = true;
1756       }
1757     }
1758   }
1759
1760   if (best != NULL) {
1761     Carve(best, n, from_released);
1762 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1763     // The newly allocated memory is from a span that's in the normal span list (already committed).  Update the
1764     // free committed pages count.
1765     ASSERT(free_committed_pages_ >= n);
1766     free_committed_pages_ -= n;
1767     if (free_committed_pages_ < min_free_committed_pages_since_last_scavenge_)
1768       min_free_committed_pages_since_last_scavenge_ = free_committed_pages_;
1769 #endif  // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1770     ASSERT(Check());
1771     free_pages_ -= n;
1772     return best;
1773   }
1774   return NULL;
1775 }
1776
1777 Span* TCMalloc_PageHeap::Split(Span* span, Length n) {
1778   ASSERT(0 < n);
1779   ASSERT(n < span->length);
1780   ASSERT(!span->free);
1781   ASSERT(span->sizeclass == 0);
1782   Event(span, 'T', n);
1783
1784   const Length extra = span->length - n;
1785   Span* leftover = NewSpan(span->start + n, extra);
1786   Event(leftover, 'U', extra);
1787   RecordSpan(leftover);
1788   pagemap_.set(span->start + n - 1, span); // Update map from pageid to span
1789   span->length = n;
1790
1791   return leftover;
1792 }
1793
1794 inline void TCMalloc_PageHeap::Carve(Span* span, Length n, bool released) {
1795   ASSERT(n > 0);
1796   DLL_Remove(span);
1797   span->free = 0;
1798   Event(span, 'A', n);
1799
1800   if (released) {
1801     // If the span chosen to carve from is decommited, commit the entire span at once to avoid committing spans 1 page at a time.
1802     ASSERT(span->decommitted);
1803     TCMalloc_SystemCommit(reinterpret_cast<void*>(span->start << kPageShift), static_cast<size_t>(span->length << kPageShift));
1804     span->decommitted = false;
1805 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1806     free_committed_pages_ += span->length;
1807 #endif
1808   }
1809
1810   const int extra = static_cast<int>(span->length - n);
1811   ASSERT(extra >= 0);
1812   if (extra > 0) {
1813     Span* leftover = NewSpan(span->start + n, extra);
1814     leftover->free = 1;
1815     leftover->decommitted = false;
1816     Event(leftover, 'S', extra);
1817     RecordSpan(leftover);
1818
1819     // Place leftover span on appropriate free list
1820     SpanList* listpair = (static_cast<size_t>(extra) < kMaxPages) ? &free_[extra] : &large_;
1821     Span* dst = &listpair->normal;
1822     DLL_Prepend(dst, leftover);
1823
1824     span->length = n;
1825     pagemap_.set(span->start + n - 1, span);
1826   }
1827 }
1828
1829 static ALWAYS_INLINE void mergeDecommittedStates(Span* destination, Span* other)
1830 {
1831     if (destination->decommitted && !other->decommitted) {
1832         TCMalloc_SystemRelease(reinterpret_cast<void*>(other->start << kPageShift),
1833                                static_cast<size_t>(other->length << kPageShift));
1834     } else if (other->decommitted && !destination->decommitted) {
1835         TCMalloc_SystemRelease(reinterpret_cast<void*>(destination->start << kPageShift),
1836                                static_cast<size_t>(destination->length << kPageShift));
1837         destination->decommitted = true;
1838     }
1839 }
1840
1841 inline void TCMalloc_PageHeap::Delete(Span* span) {
1842   ASSERT(Check());
1843   ASSERT(!span->free);
1844   ASSERT(span->length > 0);
1845   ASSERT(GetDescriptor(span->start) == span);
1846   ASSERT(GetDescriptor(span->start + span->length - 1) == span);
1847   span->sizeclass = 0;
1848 #ifndef NO_TCMALLOC_SAMPLES
1849   span->sample = 0;
1850 #endif
1851
1852   // Coalesce -- we guarantee that "p" != 0, so no bounds checking
1853   // necessary.  We do not bother resetting the stale pagemap
1854   // entries for the pieces we are merging together because we only
1855   // care about the pagemap entries for the boundaries.
1856 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1857   // Track the total size of the neighboring free spans that are committed.
1858   Length neighboringCommittedSpansLength = 0;
1859 #endif
1860   const PageID p = span->start;
1861   const Length n = span->length;
1862   Span* prev = GetDescriptor(p-1);
1863   if (prev != NULL && prev->free) {
1864     // Merge preceding span into this span
1865     ASSERT(prev->start + prev->length == p);
1866     const Length len = prev->length;
1867 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1868     if (!prev->decommitted)
1869         neighboringCommittedSpansLength += len;
1870 #endif
1871     mergeDecommittedStates(span, prev);
1872     DLL_Remove(prev);
1873     DeleteSpan(prev);
1874     span->start -= len;
1875     span->length += len;
1876     pagemap_.set(span->start, span);
1877     Event(span, 'L', len);
1878   }
1879   Span* next = GetDescriptor(p+n);
1880   if (next != NULL && next->free) {
1881     // Merge next span into this span
1882     ASSERT(next->start == p+n);
1883     const Length len = next->length;
1884 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1885     if (!next->decommitted)
1886         neighboringCommittedSpansLength += len;
1887 #endif
1888     mergeDecommittedStates(span, next);
1889     DLL_Remove(next);
1890     DeleteSpan(next);
1891     span->length += len;
1892     pagemap_.set(span->start + span->length - 1, span);
1893     Event(span, 'R', len);
1894   }
1895
1896   Event(span, 'D', span->length);
1897   span->free = 1;
1898   if (span->decommitted) {
1899     if (span->length < kMaxPages)
1900       DLL_Prepend(&free_[span->length].returned, span);
1901     else
1902       DLL_Prepend(&large_.returned, span);
1903   } else {
1904     if (span->length < kMaxPages)
1905       DLL_Prepend(&free_[span->length].normal, span);
1906     else
1907       DLL_Prepend(&large_.normal, span);
1908   }
1909   free_pages_ += n;
1910
1911 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1912   if (span->decommitted) {
1913       // If the merged span is decommitted, that means we decommitted any neighboring spans that were
1914       // committed.  Update the free committed pages count.
1915       free_committed_pages_ -= neighboringCommittedSpansLength;
1916       if (free_committed_pages_ < min_free_committed_pages_since_last_scavenge_)
1917             min_free_committed_pages_since_last_scavenge_ = free_committed_pages_;
1918   } else {
1919       // If the merged span remains committed, add the deleted span's size to the free committed pages count.
1920       free_committed_pages_ += n;
1921   }
1922
1923   // Make sure the scavenge thread becomes active if we have enough freed pages to release some back to the system.
1924   signalScavenger();
1925 #else
1926   IncrementalScavenge(n);
1927 #endif
1928
1929   ASSERT(Check());
1930 }
1931
1932 #if !USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1933 void TCMalloc_PageHeap::IncrementalScavenge(Length n) {
1934   // Fast path; not yet time to release memory
1935   scavenge_counter_ -= n;
1936   if (scavenge_counter_ >= 0) return;  // Not yet time to scavenge
1937
1938   // If there is nothing to release, wait for so many pages before
1939   // scavenging again.  With 4K pages, this comes to 16MB of memory.
1940   static const size_t kDefaultReleaseDelay = 1 << 8;
1941
1942   // Find index of free list to scavenge
1943   size_t index = scavenge_index_ + 1;
1944   for (size_t i = 0; i < kMaxPages+1; i++) {
1945     if (index > kMaxPages) index = 0;
1946     SpanList* slist = (index == kMaxPages) ? &large_ : &free_[index];
1947     if (!DLL_IsEmpty(&slist->normal)) {
1948       // Release the last span on the normal portion of this list
1949       Span* s = slist->normal.prev;
1950       DLL_Remove(s);
1951       TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift),
1952                              static_cast<size_t>(s->length << kPageShift));
1953       s->decommitted = true;
1954       DLL_Prepend(&slist->returned, s);
1955
1956       scavenge_counter_ = std::max<size_t>(64UL, std::min<size_t>(kDefaultReleaseDelay, kDefaultReleaseDelay - (free_pages_ / kDefaultReleaseDelay)));
1957
1958       if (index == kMaxPages && !DLL_IsEmpty(&slist->normal))
1959         scavenge_index_ = index - 1;
1960       else
1961         scavenge_index_ = index;
1962       return;
1963     }
1964     index++;
1965   }
1966
1967   // Nothing to scavenge, delay for a while
1968   scavenge_counter_ = kDefaultReleaseDelay;
1969 }
1970 #endif
1971
1972 void TCMalloc_PageHeap::RegisterSizeClass(Span* span, size_t sc) {
1973   // Associate span object with all interior pages as well
1974   ASSERT(!span->free);
1975   ASSERT(GetDescriptor(span->start) == span);
1976   ASSERT(GetDescriptor(span->start+span->length-1) == span);
1977   Event(span, 'C', sc);
1978   span->sizeclass = static_cast<unsigned int>(sc);
1979   for (Length i = 1; i < span->length-1; i++) {
1980     pagemap_.set(span->start+i, span);
1981   }
1982 }
1983
1984 #ifdef WTF_CHANGES
1985 size_t TCMalloc_PageHeap::ReturnedBytes() const {
1986     size_t result = 0;
1987     for (unsigned s = 0; s < kMaxPages; s++) {
1988         const int r_length = DLL_Length(&free_[s].returned);
1989         unsigned r_pages = s * r_length;
1990         result += r_pages << kPageShift;
1991     }
1992
1993     for (Span* s = large_.returned.next; s != &large_.returned; s = s->next)
1994         result += s->length << kPageShift;
1995     return result;
1996 }
1997 #endif
1998
1999 #ifndef WTF_CHANGES
2000 static double PagesToMB(uint64_t pages) {
2001   return (pages << kPageShift) / 1048576.0;
2002 }
2003
2004 void TCMalloc_PageHeap::Dump(TCMalloc_Printer* out) {
2005   int nonempty_sizes = 0;
2006   for (int s = 0; s < kMaxPages; s++) {
2007     if (!DLL_IsEmpty(&free_[s].normal) || !DLL_IsEmpty(&free_[s].returned)) {
2008       nonempty_sizes++;
2009     }
2010   }
2011   out->printf("------------------------------------------------\n");
2012   out->printf("PageHeap: %d sizes; %6.1f MB free\n",
2013               nonempty_sizes, PagesToMB(free_pages_));
2014   out->printf("------------------------------------------------\n");
2015   uint64_t total_normal = 0;
2016   uint64_t total_returned = 0;
2017   for (int s = 0; s < kMaxPages; s++) {
2018     const int n_length = DLL_Length(&free_[s].normal);
2019     const int r_length = DLL_Length(&free_[s].returned);
2020     if (n_length + r_length > 0) {
2021       uint64_t n_pages = s * n_length;
2022       uint64_t r_pages = s * r_length;
2023       total_normal += n_pages;
2024       total_returned += r_pages;
2025       out->printf("%6u pages * %6u spans ~ %6.1f MB; %6.1f MB cum"
2026                   "; unmapped: %6.1f MB; %6.1f MB cum\n",
2027                   s,
2028                   (n_length + r_length),
2029                   PagesToMB(n_pages + r_pages),
2030                   PagesToMB(total_normal + total_returned),
2031                   PagesToMB(r_pages),
2032                   PagesToMB(total_returned));
2033     }
2034   }
2035
2036   uint64_t n_pages = 0;
2037   uint64_t r_pages = 0;
2038   int n_spans = 0;
2039   int r_spans = 0;
2040   out->printf("Normal large spans:\n");
2041   for (Span* s = large_.normal.next; s != &large_.normal; s = s->next) {
2042     out->printf("   [ %6" PRIuS " pages ] %6.1f MB\n",
2043                 s->length, PagesToMB(s->length));
2044     n_pages += s->length;
2045     n_spans++;
2046   }
2047   out->printf("Unmapped large spans:\n");
2048   for (Span* s = large_.returned.next; s != &large_.returned; s = s->next) {
2049     out->printf("   [ %6" PRIuS " pages ] %6.1f MB\n",
2050                 s->length, PagesToMB(s->length));
2051     r_pages += s->length;
2052     r_spans++;
2053   }
2054   total_normal += n_pages;
2055   total_returned += r_pages;
2056   out->printf(">255   large * %6u spans ~ %6.1f MB; %6.1f MB cum"
2057               "; unmapped: %6.1f MB; %6.1f MB cum\n",
2058               (n_spans + r_spans),
2059               PagesToMB(n_pages + r_pages),
2060               PagesToMB(total_normal + total_returned),
2061               PagesToMB(r_pages),
2062               PagesToMB(total_returned));
2063 }
2064 #endif
2065
2066 bool TCMalloc_PageHeap::GrowHeap(Length n) {
2067   ASSERT(kMaxPages >= kMinSystemAlloc);
2068   if (n > kMaxValidPages) return false;
2069   Length ask = (n>kMinSystemAlloc) ? n : static_cast<Length>(kMinSystemAlloc);
2070   size_t actual_size;
2071   void* ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize);
2072   if (ptr == NULL) {
2073     if (n < ask) {
2074       // Try growing just "n" pages
2075       ask = n;
2076       ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize);
2077     }
2078     if (ptr == NULL) return false;
2079   }
2080   ask = actual_size >> kPageShift;
2081
2082   uint64_t old_system_bytes = system_bytes_;
2083   system_bytes_ += (ask << kPageShift);
2084   const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
2085   ASSERT(p > 0);
2086
2087   // If we have already a lot of pages allocated, just pre allocate a bunch of
2088   // memory for the page map. This prevents fragmentation by pagemap metadata
2089   // when a program keeps allocating and freeing large blocks.
2090
2091   if (old_system_bytes < kPageMapBigAllocationThreshold
2092       && system_bytes_ >= kPageMapBigAllocationThreshold) {
2093     pagemap_.PreallocateMoreMemory();
2094   }
2095
2096   // Make sure pagemap_ has entries for all of the new pages.
2097   // Plus ensure one before and one after so coalescing code
2098   // does not need bounds-checking.
2099   if (pagemap_.Ensure(p-1, ask+2)) {
2100     // Pretend the new area is allocated and then Delete() it to
2101     // cause any necessary coalescing to occur.
2102     //
2103     // We do not adjust free_pages_ here since Delete() will do it for us.
2104     Span* span = NewSpan(p, ask);
2105     RecordSpan(span);
2106     Delete(span);
2107     ASSERT(Check());
2108     return true;
2109   } else {
2110     // We could not allocate memory within "pagemap_"
2111     // TODO: Once we can return memory to the system, return the new span
2112     return false;
2113   }
2114 }
2115
2116 bool TCMalloc_PageHeap::Check() {
2117   ASSERT(free_[0].normal.next == &free_[0].normal);
2118   ASSERT(free_[0].returned.next == &free_[0].returned);
2119   CheckList(&large_.normal, kMaxPages, 1000000000);
2120   CheckList(&large_.returned, kMaxPages, 1000000000);
2121   for (Length s = 1; s < kMaxPages; s++) {
2122     CheckList(&free_[s].normal, s, s);
2123     CheckList(&free_[s].returned, s, s);
2124   }
2125   return true;
2126 }
2127
2128 #if ASSERT_DISABLED
2129 bool TCMalloc_PageHeap::CheckList(Span*, Length, Length) {
2130   return true;
2131 }
2132 #else
2133 bool TCMalloc_PageHeap::CheckList(Span* list, Length min_pages, Length max_pages) {
2134   for (Span* s = list->next; s != list; s = s->next) {
2135     CHECK_CONDITION(s->free);
2136     CHECK_CONDITION(s->length >= min_pages);
2137     CHECK_CONDITION(s->length <= max_pages);
2138     CHECK_CONDITION(GetDescriptor(s->start) == s);
2139     CHECK_CONDITION(GetDescriptor(s->start+s->length-1) == s);
2140   }
2141   return true;
2142 }
2143 #endif
2144
2145 static void ReleaseFreeList(Span* list, Span* returned) {
2146   // Walk backwards through list so that when we push these
2147   // spans on the "returned" list, we preserve the order.
2148   while (!DLL_IsEmpty(list)) {
2149     Span* s = list->prev;
2150     DLL_Remove(s);
2151     DLL_Prepend(returned, s);
2152     TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift),
2153                            static_cast<size_t>(s->length << kPageShift));
2154   }
2155 }
2156
2157 void TCMalloc_PageHeap::ReleaseFreePages() {
2158   for (Length s = 0; s < kMaxPages; s++) {
2159     ReleaseFreeList(&free_[s].normal, &free_[s].returned);
2160   }
2161   ReleaseFreeList(&large_.normal, &large_.returned);
2162   ASSERT(Check());
2163 }
2164
2165 //-------------------------------------------------------------------
2166 // Free list
2167 //-------------------------------------------------------------------
2168
2169 class TCMalloc_ThreadCache_FreeList {
2170  private:
2171   void*    list_;       // Linked list of nodes
2172   uint16_t length_;     // Current length
2173   uint16_t lowater_;    // Low water mark for list length
2174
2175  public:
2176   void Init() {
2177     list_ = NULL;
2178     length_ = 0;
2179     lowater_ = 0;
2180   }
2181
2182   // Return current length of list
2183   int length() const {
2184     return length_;
2185   }
2186
2187   // Is list empty?
2188   bool empty() const {
2189     return list_ == NULL;
2190   }
2191
2192   // Low-water mark management
2193   int lowwatermark() const { return lowater_; }
2194   void clear_lowwatermark() { lowater_ = length_; }
2195
2196   ALWAYS_INLINE void Push(void* ptr) {
2197     SLL_Push(&list_, ptr);
2198     length_++;
2199   }
2200
2201   void PushRange(int N, void *start, void *end) {
2202     SLL_PushRange(&list_, start, end);
2203     length_ = length_ + static_cast<uint16_t>(N);
2204   }
2205
2206   void PopRange(int N, void **start, void **end) {
2207     SLL_PopRange(&list_, N, start, end);
2208     ASSERT(length_ >= N);
2209     length_ = length_ - static_cast<uint16_t>(N);
2210     if (length_ < lowater_) lowater_ = length_;
2211   }
2212
2213   ALWAYS_INLINE void* Pop() {
2214     ASSERT(list_ != NULL);
2215     length_--;
2216     if (length_ < lowater_) lowater_ = length_;
2217     return SLL_Pop(&list_);
2218   }
2219
2220 #ifdef WTF_CHANGES
2221   template <class Finder, class Reader>
2222   void enumerateFreeObjects(Finder& finder, const Reader& reader)
2223   {
2224       for (void* nextObject = list_; nextObject; nextObject = *reader(reinterpret_cast<void**>(nextObject)))
2225           finder.visit(nextObject);
2226   }
2227 #endif
2228 };
2229
2230 //-------------------------------------------------------------------
2231 // Data kept per thread
2232 //-------------------------------------------------------------------
2233
2234 class TCMalloc_ThreadCache {
2235  private:
2236   typedef TCMalloc_ThreadCache_FreeList FreeList;
2237 #if OS(WINDOWS)
2238   typedef DWORD ThreadIdentifier;
2239 #else
2240   typedef pthread_t ThreadIdentifier;
2241 #endif
2242
2243   size_t        size_;                  // Combined size of data
2244   ThreadIdentifier tid_;                // Which thread owns it
2245   bool          in_setspecific_;           // Called pthread_setspecific?
2246   FreeList      list_[kNumClasses];     // Array indexed by size-class
2247
2248   // We sample allocations, biased by the size of the allocation
2249   uint32_t      rnd_;                   // Cheap random number generator
2250   size_t        bytes_until_sample_;    // Bytes until we sample next
2251
2252   // Allocate a new heap. REQUIRES: pageheap_lock is held.
2253   static inline TCMalloc_ThreadCache* NewHeap(ThreadIdentifier tid);
2254
2255   // Use only as pthread thread-specific destructor function.
2256   static void DestroyThreadCache(void* ptr);
2257  public:
2258   // All ThreadCache objects are kept in a linked list (for stats collection)
2259   TCMalloc_ThreadCache* next_;
2260   TCMalloc_ThreadCache* prev_;
2261
2262   void Init(ThreadIdentifier tid);
2263   void Cleanup();
2264
2265   // Accessors (mostly just for printing stats)
2266   int freelist_length(size_t cl) const { return list_[cl].length(); }
2267
2268   // Total byte size in cache
2269   size_t Size() const { return size_; }
2270
2271   ALWAYS_INLINE void* Allocate(size_t size);
2272   void Deallocate(void* ptr, size_t size_class);
2273
2274   ALWAYS_INLINE void FetchFromCentralCache(size_t cl, size_t allocationSize);
2275   void ReleaseToCentralCache(size_t cl, int N);
2276   void Scavenge();
2277   void Print() const;
2278
2279   // Record allocation of "k" bytes.  Return true iff allocation
2280   // should be sampled
2281   bool SampleAllocation(size_t k);
2282
2283   // Pick next sampling point
2284   void PickNextSample(size_t k);
2285
2286   static void                  InitModule();
2287   static void                  InitTSD();
2288   static TCMalloc_ThreadCache* GetThreadHeap();
2289   static TCMalloc_ThreadCache* GetCache();
2290   static TCMalloc_ThreadCache* GetCacheIfPresent();
2291   static TCMalloc_ThreadCache* CreateCacheIfNecessary();
2292   static void                  DeleteCache(TCMalloc_ThreadCache* heap);
2293   static void                  BecomeIdle();
2294   static void                  RecomputeThreadCacheSize();
2295
2296 #ifdef WTF_CHANGES
2297   template <class Finder, class Reader>
2298   void enumerateFreeObjects(Finder& finder, const Reader& reader)
2299   {
2300       for (unsigned sizeClass = 0; sizeClass < kNumClasses; sizeClass++)
2301           list_[sizeClass].enumerateFreeObjects(finder, reader);
2302   }
2303 #endif
2304 };
2305
2306 //-------------------------------------------------------------------
2307 // Data kept per size-class in central cache
2308 //-------------------------------------------------------------------
2309
2310 class TCMalloc_Central_FreeList {
2311  public:
2312   void Init(size_t cl);
2313
2314   // These methods all do internal locking.
2315
2316   // Insert the specified range into the central freelist.  N is the number of
2317   // elements in the range.
2318   void InsertRange(void *start, void *end, int N);
2319
2320   // Returns the actual number of fetched elements into N.
2321   void RemoveRange(void **start, void **end, int *N);
2322
2323   // Returns the number of free objects in cache.
2324   size_t length() {
2325     SpinLockHolder h(&lock_);
2326     return counter_;
2327   }
2328
2329   // Returns the number of free objects in the transfer cache.
2330   int tc_length() {
2331     SpinLockHolder h(&lock_);
2332     return used_slots_ * num_objects_to_move[size_class_];
2333   }
2334
2335 #ifdef WTF_CHANGES
2336   template <class Finder, class Reader>
2337   void enumerateFreeObjects(Finder& finder, const Reader& reader, TCMalloc_Central_FreeList* remoteCentralFreeList)
2338   {
2339     for (Span* span = &empty_; span && span != &empty_; span = (span->next ? reader(span->next) : 0))
2340       ASSERT(!span->objects);
2341
2342     ASSERT(!nonempty_.objects);
2343     static const ptrdiff_t nonemptyOffset = reinterpret_cast<const char*>(&nonempty_) - reinterpret_cast<const char*>(this);
2344
2345     Span* remoteNonempty = reinterpret_cast<Span*>(reinterpret_cast<char*>(remoteCentralFreeList) + nonemptyOffset);
2346     Span* remoteSpan = nonempty_.next;
2347
2348     for (Span* span = reader(remoteSpan); span && remoteSpan != remoteNonempty; remoteSpan = span->next, span = (span->next ? reader(span->next) : 0)) {
2349       for (void* nextObject = span->objects; nextObject; nextObject = *reader(reinterpret_cast<void**>(nextObject)))
2350         finder.visit(nextObject);
2351     }
2352   }
2353 #endif
2354
2355  private:
2356   // REQUIRES: lock_ is held
2357   // Remove object from cache and return.
2358   // Return NULL if no free entries in cache.
2359   void* FetchFromSpans();
2360
2361   // REQUIRES: lock_ is held
2362   // Remove object from cache and return.  Fetches
2363   // from pageheap if cache is empty.  Only returns
2364   // NULL on allocation failure.
2365   void* FetchFromSpansSafe();
2366
2367   // REQUIRES: lock_ is held
2368   // Release a linked list of objects to spans.
2369   // May temporarily release lock_.
2370   void ReleaseListToSpans(void *start);
2371
2372   // REQUIRES: lock_ is held
2373   // Release an object to spans.
2374   // May temporarily release lock_.
2375   ALWAYS_INLINE void ReleaseToSpans(void* object);
2376
2377   // REQUIRES: lock_ is held
2378   // Populate cache by fetching from the page heap.
2379   // May temporarily release lock_.
2380   ALWAYS_INLINE void Populate();
2381
2382   // REQUIRES: lock is held.
2383   // Tries to make room for a TCEntry.  If the cache is full it will try to
2384   // expand it at the cost of some other cache size.  Return false if there is
2385   // no space.
2386   bool MakeCacheSpace();
2387
2388   // REQUIRES: lock_ for locked_size_class is held.
2389   // Picks a "random" size class to steal TCEntry slot from.  In reality it
2390   // just iterates over the sizeclasses but does so without taking a lock.
2391   // Returns true on success.
2392   // May temporarily lock a "random" size class.
2393   static ALWAYS_INLINE bool EvictRandomSizeClass(size_t locked_size_class, bool force);
2394
2395   // REQUIRES: lock_ is *not* held.
2396   // Tries to shrink the Cache.  If force is true it will relase objects to
2397   // spans if it allows it to shrink the cache.  Return false if it failed to
2398   // shrink the cache.  Decrements cache_size_ on succeess.
2399   // May temporarily take lock_.  If it takes lock_, the locked_size_class
2400   // lock is released to the thread from holding two size class locks
2401   // concurrently which could lead to a deadlock.
2402   bool ShrinkCache(int locked_size_class, bool force);
2403
2404   // This lock protects all the data members.  cached_entries and cache_size_
2405   // may be looked at without holding the lock.
2406   SpinLock lock_;
2407
2408   // We keep linked lists of empty and non-empty spans.
2409   size_t   size_class_;     // My size class
2410   Span     empty_;          // Dummy header for list of empty spans
2411   Span     nonempty_;       // Dummy header for list of non-empty spans
2412   size_t   counter_;        // Number of free objects in cache entry
2413
2414   // Here we reserve space for TCEntry cache slots.  Since one size class can
2415   // end up getting all the TCEntries quota in the system we just preallocate
2416   // sufficient number of entries here.
2417   TCEntry tc_slots_[kNumTransferEntries];
2418
2419   // Number of currently used cached entries in tc_slots_.  This variable is
2420   // updated under a lock but can be read without one.
2421   int32_t used_slots_;
2422   // The current number of slots for this size class.  This is an
2423   // adaptive value that is increased if there is lots of traffic
2424   // on a given size class.
2425   int32_t cache_size_;
2426 };
2427
2428 // Pad each CentralCache object to multiple of 64 bytes
2429 class TCMalloc_Central_FreeListPadded : public TCMalloc_Central_FreeList {
2430  private:
2431   char pad_[(64 - (sizeof(TCMalloc_Central_FreeList) % 64)) % 64];
2432 };
2433
2434 //-------------------------------------------------------------------
2435 // Global variables
2436 //-------------------------------------------------------------------
2437
2438 // Central cache -- a collection of free-lists, one per size-class.
2439 // We have a separate lock per free-list to reduce contention.
2440 static TCMalloc_Central_FreeListPadded central_cache[kNumClasses];
2441
2442 // Page-level allocator
2443 static SpinLock pageheap_lock = SPINLOCK_INITIALIZER;
2444 static AllocAlignmentInteger pageheap_memory[(sizeof(TCMalloc_PageHeap) + sizeof(AllocAlignmentInteger) - 1) / sizeof(AllocAlignmentInteger)];
2445 static bool phinited = false;
2446
2447 // Avoid extra level of indirection by making "pageheap" be just an alias
2448 // of pageheap_memory.
2449 typedef union {
2450     void* m_memory;
2451     TCMalloc_PageHeap* m_pageHeap;
2452 } PageHeapUnion;
2453
2454 static inline TCMalloc_PageHeap* getPageHeap()
2455 {
2456     PageHeapUnion u = { &pageheap_memory[0] };
2457     return u.m_pageHeap;
2458 }
2459
2460 #define pageheap getPageHeap()
2461
2462 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
2463
2464 #if HAVE(DISPATCH_H) || OS(WINDOWS)
2465
2466 void TCMalloc_PageHeap::periodicScavenge()
2467 {
2468     SpinLockHolder h(&pageheap_lock);
2469     pageheap->scavenge();
2470
2471     if (shouldScavenge()) {
2472         rescheduleScavenger();
2473         return;
2474     }
2475
2476     suspendScavenger();
2477 }
2478
2479 ALWAYS_INLINE void TCMalloc_PageHeap::signalScavenger()
2480 {
2481     ASSERT(IsHeld(pageheap_lock));
2482     if (isScavengerSuspended() && shouldScavenge())
2483         scheduleScavenger();
2484 }
2485
2486 #else
2487
2488 void TCMalloc_PageHeap::scavengerThread()
2489 {
2490 #if HAVE(PTHREAD_SETNAME_NP)
2491   pthread_setname_np("JavaScriptCore: FastMalloc scavenger");
2492 #endif
2493
2494   while (1) {
2495       if (!shouldScavenge()) {
2496           pthread_mutex_lock(&m_scavengeMutex);
2497           m_scavengeThreadActive = false;
2498           // Block until there are enough free committed pages to release back to the system.
2499           pthread_cond_wait(&m_scavengeCondition, &m_scavengeMutex);
2500           m_scavengeThreadActive = true;
2501           pthread_mutex_unlock(&m_scavengeMutex);
2502       }
2503       sleep(kScavengeDelayInSeconds);
2504       {
2505           SpinLockHolder h(&pageheap_lock);
2506           pageheap->scavenge();
2507       }
2508   }
2509 }
2510
2511 #endif
2512
2513 #endif
2514
2515 // If TLS is available, we also store a copy
2516 // of the per-thread object in a __thread variable
2517 // since __thread variables are faster to read
2518 // than pthread_getspecific().  We still need
2519 // pthread_setspecific() because __thread
2520 // variables provide no way to run cleanup
2521 // code when a thread is destroyed.
2522 #ifdef HAVE_TLS
2523 static __thread TCMalloc_ThreadCache *threadlocal_heap;
2524 #endif
2525 // Thread-specific key.  Initialization here is somewhat tricky
2526 // because some Linux startup code invokes malloc() before it
2527 // is in a good enough state to handle pthread_keycreate().
2528 // Therefore, we use TSD keys only after tsd_inited is set to true.
2529 // Until then, we use a slow path to get the heap object.
2530 static bool tsd_inited = false;
2531 #if USE(PTHREAD_GETSPECIFIC_DIRECT)
2532 static const pthread_key_t heap_key = __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY0;
2533 #else
2534 static pthread_key_t heap_key;
2535 #endif
2536 #if OS(WINDOWS)
2537 DWORD tlsIndex = TLS_OUT_OF_INDEXES;
2538 #endif
2539
2540 static ALWAYS_INLINE void setThreadHeap(TCMalloc_ThreadCache* heap)
2541 {
2542 #if USE(PTHREAD_GETSPECIFIC_DIRECT)
2543     // Can't have two libraries both doing this in the same process,
2544     // so check and make this crash right away.
2545     if (pthread_getspecific(heap_key))
2546         CRASH();
2547 #endif
2548
2549     // Still do pthread_setspecific even if there's an alternate form
2550     // of thread-local storage in use, to benefit from the delete callback.
2551     pthread_setspecific(heap_key, heap);
2552
2553 #if OS(WINDOWS)
2554     TlsSetValue(tlsIndex, heap);
2555 #endif
2556 }
2557
2558 // Allocator for thread heaps
2559 static PageHeapAllocator<TCMalloc_ThreadCache> threadheap_allocator;
2560
2561 // Linked list of heap objects.  Protected by pageheap_lock.
2562 static TCMalloc_ThreadCache* thread_heaps = NULL;
2563 static int thread_heap_count = 0;
2564
2565 // Overall thread cache size.  Protected by pageheap_lock.
2566 static size_t overall_thread_cache_size = kDefaultOverallThreadCacheSize;
2567
2568 // Global per-thread cache size.  Writes are protected by
2569 // pageheap_lock.  Reads are done without any locking, which should be
2570 // fine as long as size_t can be written atomically and we don't place
2571 // invariants between this variable and other pieces of state.
2572 static volatile size_t per_thread_cache_size = kMaxThreadCacheSize;
2573
2574 //-------------------------------------------------------------------
2575 // Central cache implementation
2576 //-------------------------------------------------------------------
2577
2578 void TCMalloc_Central_FreeList::Init(size_t cl) {
2579   lock_.Init();
2580   size_class_ = cl;
2581   DLL_Init(&empty_);
2582   DLL_Init(&nonempty_);
2583   counter_ = 0;
2584
2585   cache_size_ = 1;
2586   used_slots_ = 0;
2587   ASSERT(cache_size_ <= kNumTransferEntries);
2588 }
2589
2590 void TCMalloc_Central_FreeList::ReleaseListToSpans(void* start) {
2591   while (start) {
2592     void *next = SLL_Next(start);
2593     ReleaseToSpans(start);
2594     start = next;
2595   }
2596 }
2597
2598 ALWAYS_INLINE void TCMalloc_Central_FreeList::ReleaseToSpans(void* object) {
2599   const PageID p = reinterpret_cast<uintptr_t>(object) >> kPageShift;
2600   Span* span = pageheap->GetDescriptor(p);
2601   ASSERT(span != NULL);
2602   ASSERT(span->refcount > 0);
2603
2604   // If span is empty, move it to non-empty list
2605   if (span->objects == NULL) {
2606     DLL_Remove(span);
2607     DLL_Prepend(&nonempty_, span);
2608     Event(span, 'N', 0);
2609   }
2610
2611   // The following check is expensive, so it is disabled by default
2612   if (false) {
2613     // Check that object does not occur in list
2614     unsigned got = 0;
2615     for (void* p = span->objects; p != NULL; p = *((void**) p)) {
2616       ASSERT(p != object);
2617       got++;
2618     }
2619     ASSERT(got + span->refcount ==
2620            (span->length<<kPageShift)/ByteSizeForClass(span->sizeclass));
2621   }
2622
2623   counter_++;
2624   span->refcount--;
2625   if (span->refcount == 0) {
2626     Event(span, '#', 0);
2627     counter_ -= (span->length<<kPageShift) / ByteSizeForClass(span->sizeclass);
2628     DLL_Remove(span);
2629
2630     // Release central list lock while operating on pageheap
2631     lock_.Unlock();
2632     {
2633       SpinLockHolder h(&pageheap_lock);
2634       pageheap->Delete(span);
2635     }
2636     lock_.Lock();
2637   } else {
2638     *(reinterpret_cast<void**>(object)) = span->objects;
2639     span->objects = object;
2640   }
2641 }
2642
2643 ALWAYS_INLINE bool TCMalloc_Central_FreeList::EvictRandomSizeClass(
2644     size_t locked_size_class, bool force) {
2645   static int race_counter = 0;
2646   int t = race_counter++;  // Updated without a lock, but who cares.
2647   if (t >= static_cast<int>(kNumClasses)) {
2648     while (t >= static_cast<int>(kNumClasses)) {
2649       t -= kNumClasses;
2650     }
2651     race_counter = t;
2652   }
2653   ASSERT(t >= 0);
2654   ASSERT(t < static_cast<int>(kNumClasses));
2655   if (t == static_cast<int>(locked_size_class)) return false;
2656   return central_cache[t].ShrinkCache(static_cast<int>(locked_size_class), force);
2657 }
2658
2659 bool TCMalloc_Central_FreeList::MakeCacheSpace() {
2660   // Is there room in the cache?
2661   if (used_slots_ < cache_size_) return true;
2662   // Check if we can expand this cache?
2663   if (cache_size_ == kNumTransferEntries) return false;
2664   // Ok, we'll try to grab an entry from some other size class.
2665   if (EvictRandomSizeClass(size_class_, false) ||
2666       EvictRandomSizeClass(size_class_, true)) {
2667     // Succeeded in evicting, we're going to make our cache larger.
2668     cache_size_++;
2669     return true;
2670   }
2671   return false;
2672 }
2673
2674
2675 namespace {
2676 class LockInverter {
2677  private:
2678   SpinLock *held_, *temp_;
2679  public:
2680   inline explicit LockInverter(SpinLock* held, SpinLock *temp)
2681     : held_(held), temp_(temp) { held_->Unlock(); temp_->Lock(); }
2682   inline ~LockInverter() { temp_->Unlock(); held_->Lock();  }
2683 };
2684 }
2685
2686 bool TCMalloc_Central_FreeList::ShrinkCache(int locked_size_class, bool force) {
2687   // Start with a quick check without taking a lock.
2688   if (cache_size_ == 0) return false;
2689   // We don't evict from a full cache unless we are 'forcing'.
2690   if (force == false && used_slots_ == cache_size_) return false;
2691
2692   // Grab lock, but first release the other lock held by this thread.  We use
2693   // the lock inverter to ensure that we never hold two size class locks
2694   // concurrently.  That can create a deadlock because there is no well
2695   // defined nesting order.
2696   LockInverter li(&central_cache[locked_size_class].lock_, &lock_);
2697   ASSERT(used_slots_ <= cache_size_);
2698   ASSERT(0 <= cache_size_);
2699   if (cache_size_ == 0) return false;
2700   if (used_slots_ == cache_size_) {
2701     if (force == false) return false;
2702     // ReleaseListToSpans releases the lock, so we have to make all the
2703     // updates to the central list before calling it.
2704     cache_size_--;
2705     used_slots_--;
2706     ReleaseListToSpans(tc_slots_[used_slots_].head);
2707     return true;
2708   }
2709   cache_size_--;
2710   return true;
2711 }
2712
2713 void TCMalloc_Central_FreeList::InsertRange(void *start, void *end, int N) {
2714   SpinLockHolder h(&lock_);
2715   if (N == num_objects_to_move[size_class_] &&
2716     MakeCacheSpace()) {
2717     int slot = used_slots_++;
2718     ASSERT(slot >=0);
2719     ASSERT(slot < kNumTransferEntries);
2720     TCEntry *entry = &tc_slots_[slot];
2721     entry->head = start;
2722     entry->tail = end;
2723     return;
2724   }
2725   ReleaseListToSpans(start);
2726 }
2727
2728 void TCMalloc_Central_FreeList::RemoveRange(void **start, void **end, int *N) {
2729   int num = *N;
2730   ASSERT(num > 0);
2731
2732   SpinLockHolder h(&lock_);
2733   if (num == num_objects_to_move[size_class_] && used_slots_ > 0) {
2734     int slot = --used_slots_;
2735     ASSERT(slot >= 0);
2736     TCEntry *entry = &tc_slots_[slot];
2737     *start = entry->head;
2738     *end = entry->tail;
2739     return;
2740   }
2741
2742   // TODO: Prefetch multiple TCEntries?
2743   void *tail = FetchFromSpansSafe();
2744   if (!tail) {
2745     // We are completely out of memory.
2746     *start = *end = NULL;
2747     *N = 0;
2748     return;
2749   }
2750
2751   SLL_SetNext(tail, NULL);
2752   void *head = tail;
2753   int count = 1;
2754   while (count < num) {
2755     void *t = FetchFromSpans();
2756     if (!t) break;
2757     SLL_Push(&head, t);
2758     count++;
2759   }
2760   *start = head;
2761   *end = tail;
2762   *N = count;
2763 }
2764
2765
2766 void* TCMalloc_Central_FreeList::FetchFromSpansSafe() {
2767   void *t = FetchFromSpans();
2768   if (!t) {
2769     Populate();
2770     t = FetchFromSpans();
2771   }
2772   return t;
2773 }
2774
2775 void* TCMalloc_Central_FreeList::FetchFromSpans() {
2776   if (DLL_IsEmpty(&nonempty_)) return NULL;
2777   Span* span = nonempty_.next;
2778
2779   ASSERT(span->objects != NULL);
2780   ASSERT_SPAN_COMMITTED(span);
2781   span->refcount++;
2782   void* result = span->objects;
2783   span->objects = *(reinterpret_cast<void**>(result));
2784   if (span->objects == NULL) {
2785     // Move to empty list
2786     DLL_Remove(span);
2787     DLL_Prepend(&empty_, span);
2788     Event(span, 'E', 0);
2789   }
2790   counter_--;
2791   return result;
2792 }
2793
2794 // Fetch memory from the system and add to the central cache freelist.
2795 ALWAYS_INLINE void TCMalloc_Central_FreeList::Populate() {
2796   // Release central list lock while operating on pageheap
2797   lock_.Unlock();
2798   const size_t npages = class_to_pages[size_class_];
2799
2800   Span* span;
2801   {
2802     SpinLockHolder h(&pageheap_lock);
2803     span = pageheap->New(npages);
2804     if (span) pageheap->RegisterSizeClass(span, size_class_);
2805   }
2806   if (span == NULL) {
2807 #if HAVE(ERRNO_H)
2808     MESSAGE("allocation failed: %d\n", errno);
2809 #elif OS(WINDOWS)
2810     MESSAGE("allocation failed: %d\n", ::GetLastError());
2811 #else
2812     MESSAGE("allocation failed\n");
2813 #endif
2814     lock_.Lock();
2815     return;
2816   }
2817   ASSERT_SPAN_COMMITTED(span);
2818   ASSERT(span->length == npages);
2819   // Cache sizeclass info eagerly.  Locking is not necessary.
2820   // (Instead of being eager, we could just replace any stale info
2821   // about this span, but that seems to be no better in practice.)
2822   for (size_t i = 0; i < npages; i++) {
2823     pageheap->CacheSizeClass(span->start + i, size_class_);
2824   }
2825
2826   // Split the block into pieces and add to the free-list
2827   // TODO: coloring of objects to avoid cache conflicts?
2828   void** tail = &span->objects;
2829   char* ptr = reinterpret_cast<char*>(span->start << kPageShift);
2830   char* limit = ptr + (npages << kPageShift);
2831   const size_t size = ByteSizeForClass(size_class_);
2832   int num = 0;
2833   char* nptr;
2834   while ((nptr = ptr + size) <= limit) {
2835     *tail = ptr;
2836     tail = reinterpret_cast_ptr<void**>(ptr);
2837     ptr = nptr;
2838     num++;
2839   }
2840   ASSERT(ptr <= limit);
2841   *tail = NULL;
2842   span->refcount = 0; // No sub-object in use yet
2843
2844   // Add span to list of non-empty spans
2845   lock_.Lock();
2846   DLL_Prepend(&nonempty_, span);
2847   counter_ += num;
2848 }
2849
2850 //-------------------------------------------------------------------
2851 // TCMalloc_ThreadCache implementation
2852 //-------------------------------------------------------------------
2853
2854 inline bool TCMalloc_ThreadCache::SampleAllocation(size_t k) {
2855   if (bytes_until_sample_ < k) {
2856     PickNextSample(k);
2857     return true;
2858   } else {
2859     bytes_until_sample_ -= k;
2860     return false;
2861   }
2862 }
2863
2864 void TCMalloc_ThreadCache::Init(ThreadIdentifier tid) {
2865   size_ = 0;
2866   next_ = NULL;
2867   prev_ = NULL;
2868   tid_  = tid;
2869   in_setspecific_ = false;
2870   for (size_t cl = 0; cl < kNumClasses; ++cl) {
2871     list_[cl].Init();
2872   }
2873
2874   // Initialize RNG -- run it for a bit to get to good values
2875   bytes_until_sample_ = 0;
2876   rnd_ = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(this));
2877   for (int i = 0; i < 100; i++) {
2878     PickNextSample(static_cast<size_t>(FLAGS_tcmalloc_sample_parameter * 2));
2879   }
2880 }
2881
2882 void TCMalloc_ThreadCache::Cleanup() {
2883   // Put unused memory back into central cache
2884   for (size_t cl = 0; cl < kNumClasses; ++cl) {
2885     if (list_[cl].length() > 0) {
2886       ReleaseToCentralCache(cl, list_[cl].length());
2887     }
2888   }
2889 }
2890
2891 ALWAYS_INLINE void* TCMalloc_ThreadCache::Allocate(size_t size) {
2892   ASSERT(size <= kMaxSize);
2893   const size_t cl = SizeClass(size);
2894   FreeList* list = &list_[cl];
2895   size_t allocationSize = ByteSizeForClass(cl);
2896   if (list->empty()) {
2897     FetchFromCentralCache(cl, allocationSize);
2898     if (list->empty()) return NULL;
2899   }
2900   size_ -= allocationSize;
2901   return list->Pop();
2902 }
2903
2904 inline void TCMalloc_ThreadCache::Deallocate(void* ptr, size_t cl) {
2905   size_ += ByteSizeForClass(cl);
2906   FreeList* list = &list_[cl];
2907   list->Push(ptr);
2908   // If enough data is free, put back into central cache
2909   if (list->length() > kMaxFreeListLength) {
2910     ReleaseToCentralCache(cl, num_objects_to_move[cl]);
2911   }
2912   if (size_ >= per_thread_cache_size) Scavenge();
2913 }
2914
2915 // Remove some objects of class "cl" from central cache and add to thread heap
2916 ALWAYS_INLINE void TCMalloc_ThreadCache::FetchFromCentralCache(size_t cl, size_t allocationSize) {
2917   int fetch_count = num_objects_to_move[cl];
2918   void *start, *end;
2919   central_cache[cl].RemoveRange(&start, &end, &fetch_count);
2920   list_[cl].PushRange(fetch_count, start, end);
2921   size_ += allocationSize * fetch_count;
2922 }
2923
2924 // Remove some objects of class "cl" from thread heap and add to central cache
2925 inline void TCMalloc_ThreadCache::ReleaseToCentralCache(size_t cl, int N) {
2926   ASSERT(N > 0);
2927   FreeList* src = &list_[cl];
2928   if (N > src->length()) N = src->length();
2929   size_ -= N*ByteSizeForClass(cl);
2930
2931   // We return prepackaged chains of the correct size to the central cache.
2932   // TODO: Use the same format internally in the thread caches?
2933   int batch_size = num_objects_to_move[cl];
2934   while (N > batch_size) {
2935     void *tail, *head;
2936     src->PopRange(batch_size, &head, &tail);
2937     central_cache[cl].InsertRange(head, tail, batch_size);
2938     N -= batch_size;
2939   }
2940   void *tail, *head;
2941   src->PopRange(N, &head, &tail);
2942   central_cache[cl].InsertRange(head, tail, N);
2943 }
2944
2945 // Release idle memory to the central cache
2946 inline void TCMalloc_ThreadCache::Scavenge() {
2947   // If the low-water mark for the free list is L, it means we would
2948   // not have had to allocate anything from the central cache even if
2949   // we had reduced the free list size by L.  We aim to get closer to
2950   // that situation by dropping L/2 nodes from the free list.  This
2951   // may not release much memory, but if so we will call scavenge again
2952   // pretty soon and the low-water marks will be high on that call.
2953   //int64 start = CycleClock::Now();
2954
2955   for (size_t cl = 0; cl < kNumClasses; cl++) {
2956     FreeList* list = &list_[cl];
2957     const int lowmark = list->lowwatermark();
2958     if (lowmark > 0) {
2959       const int drop = (lowmark > 1) ? lowmark/2 : 1;
2960       ReleaseToCentralCache(cl, drop);
2961     }
2962     list->clear_lowwatermark();
2963   }
2964
2965   //int64 finish = CycleClock::Now();
2966   //CycleTimer ct;
2967   //MESSAGE("GC: %.0f ns\n", ct.CyclesToUsec(finish-start)*1000.0);
2968 }
2969
2970 void TCMalloc_ThreadCache::PickNextSample(size_t k) {
2971   // Make next "random" number
2972   // x^32+x^22+x^2+x^1+1 is a primitive polynomial for random numbers
2973   static const uint32_t kPoly = (1 << 22) | (1 << 2) | (1 << 1) | (1 << 0);
2974   uint32_t r = rnd_;
2975   rnd_ = (r << 1) ^ ((static_cast<int32_t>(r) >> 31) & kPoly);
2976
2977   // Next point is "rnd_ % (sample_period)".  I.e., average
2978   // increment is "sample_period/2".
2979   const int flag_value = static_cast<int>(FLAGS_tcmalloc_sample_parameter);
2980   static int last_flag_value = -1;
2981
2982   if (flag_value != last_flag_value) {
2983     SpinLockHolder h(&sample_period_lock);
2984     int i;
2985     for (i = 0; i < (static_cast<int>(sizeof(primes_list)/sizeof(primes_list[0])) - 1); i++) {
2986       if (primes_list[i] >= flag_value) {
2987         break;
2988       }
2989     }
2990     sample_period = primes_list[i];
2991     last_flag_value = flag_value;
2992   }
2993
2994   bytes_until_sample_ += rnd_ % sample_period;
2995
2996   if (k > (static_cast<size_t>(-1) >> 2)) {
2997     // If the user has asked for a huge allocation then it is possible
2998     // for the code below to loop infinitely.  Just return (note that
2999     // this throws off the sampling accuracy somewhat, but a user who
3000     // is allocating more than 1G of memory at a time can live with a
3001     // minor inaccuracy in profiling of small allocations, and also
3002     // would rather not wait for the loop below to terminate).
3003     return;
3004   }
3005
3006   while (bytes_until_sample_ < k) {
3007     // Increase bytes_until_sample_ by enough average sampling periods
3008     // (sample_period >> 1) to allow us to sample past the current
3009     // allocation.
3010     bytes_until_sample_ += (sample_period >> 1);
3011   }
3012
3013   bytes_until_sample_ -= k;
3014 }
3015
3016 void TCMalloc_ThreadCache::InitModule() {
3017   // There is a slight potential race here because of double-checked
3018   // locking idiom.  However, as long as the program does a small
3019   // allocation before switching to multi-threaded mode, we will be
3020   // fine.  We increase the chances of doing such a small allocation
3021   // by doing one in the constructor of the module_enter_exit_hook
3022   // object declared below.
3023   SpinLockHolder h(&pageheap_lock);
3024   if (!phinited) {
3025 #ifdef WTF_CHANGES
3026     InitTSD();
3027 #endif
3028     InitSizeClasses();
3029     threadheap_allocator.Init();
3030     span_allocator.Init();
3031     span_allocator.New(); // Reduce cache conflicts
3032     span_allocator.New(); // Reduce cache conflicts
3033     stacktrace_allocator.Init();
3034     DLL_Init(&sampled_objects);
3035     for (size_t i = 0; i < kNumClasses; ++i) {
3036       central_cache[i].Init(i);
3037     }
3038     pageheap->init();
3039     phinited = 1;
3040 #if defined(WTF_CHANGES) && OS(DARWIN)
3041     FastMallocZone::init();
3042 #endif
3043   }
3044 }
3045
3046 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::NewHeap(ThreadIdentifier tid) {
3047   // Create the heap and add it to the linked list
3048   TCMalloc_ThreadCache *heap = threadheap_allocator.New();
3049   heap->Init(tid);
3050   heap->next_ = thread_heaps;
3051   heap->prev_ = NULL;
3052   if (thread_heaps != NULL) thread_heaps->prev_ = heap;
3053   thread_heaps = heap;
3054   thread_heap_count++;
3055   RecomputeThreadCacheSize();
3056   return heap;
3057 }
3058
3059 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetThreadHeap() {
3060 #ifdef HAVE_TLS
3061     // __thread is faster, but only when the kernel supports it
3062   if (KernelSupportsTLS())
3063     return threadlocal_heap;
3064 #elif OS(WINDOWS)
3065     return static_cast<TCMalloc_ThreadCache*>(TlsGetValue(tlsIndex));
3066 #else
3067     return static_cast<TCMalloc_ThreadCache*>(pthread_getspecific(heap_key));
3068 #endif
3069 }
3070
3071 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCache() {
3072   TCMalloc_ThreadCache* ptr = NULL;
3073   if (!tsd_inited) {
3074     InitModule();
3075   } else {
3076     ptr = GetThreadHeap();
3077   }
3078   if (ptr == NULL) ptr = CreateCacheIfNecessary();
3079   return ptr;
3080 }
3081
3082 // In deletion paths, we do not try to create a thread-cache.  This is
3083 // because we may be in the thread destruction code and may have
3084 // already cleaned up the cache for this thread.
3085 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCacheIfPresent() {
3086   if (!tsd_inited) return NULL;
3087   void* const p = GetThreadHeap();
3088   return reinterpret_cast<TCMalloc_ThreadCache*>(p);
3089 }
3090
3091 void TCMalloc_ThreadCache::InitTSD() {
3092   ASSERT(!tsd_inited);
3093 #if USE(PTHREAD_GETSPECIFIC_DIRECT)
3094   pthread_key_init_np(heap_key, DestroyThreadCache);
3095 #else
3096   pthread_key_create(&heap_key, DestroyThreadCache);
3097 #endif
3098 #if OS(WINDOWS)
3099   tlsIndex = TlsAlloc();
3100 #endif
3101   tsd_inited = true;
3102
3103 #if !OS(WINDOWS)
3104   // We may have used a fake pthread_t for the main thread.  Fix it.
3105   pthread_t zero;
3106   memset(&zero, 0, sizeof(zero));
3107 #endif
3108 #ifndef WTF_CHANGES
3109   SpinLockHolder h(&pageheap_lock);
3110 #else
3111   ASSERT(pageheap_lock.IsHeld());
3112 #endif
3113   for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) {
3114 #if OS(WINDOWS)
3115     if (h->tid_ == 0) {
3116       h->tid_ = GetCurrentThreadId();
3117     }
3118 #else
3119     if (pthread_equal(h->tid_, zero)) {
3120       h->tid_ = pthread_self();
3121     }
3122 #endif
3123   }
3124 }
3125
3126 TCMalloc_ThreadCache* TCMalloc_ThreadCache::CreateCacheIfNecessary() {
3127   // Initialize per-thread data if necessary
3128   TCMalloc_ThreadCache* heap = NULL;
3129   {
3130     SpinLockHolder h(&pageheap_lock);
3131
3132 #if OS(WINDOWS)
3133     DWORD me;
3134     if (!tsd_inited) {
3135       me = 0;
3136     } else {
3137       me = GetCurrentThreadId();
3138     }
3139 #else
3140     // Early on in glibc's life, we cannot even call pthread_self()
3141     pthread_t me;
3142     if (!tsd_inited) {
3143       memset(&me, 0, sizeof(me));
3144     } else {
3145       me = pthread_self();
3146     }
3147 #endif
3148
3149     // This may be a recursive malloc call from pthread_setspecific()
3150     // In that case, the heap for this thread has already been created
3151     // and added to the linked list.  So we search for that first.
3152     for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) {
3153 #if OS(WINDOWS)
3154       if (h->tid_ == me) {
3155 #else
3156       if (pthread_equal(h->tid_, me)) {
3157 #endif
3158         heap = h;
3159         break;
3160       }
3161     }
3162
3163     if (heap == NULL) heap = NewHeap(me);
3164   }
3165
3166   // We call pthread_setspecific() outside the lock because it may
3167   // call malloc() recursively.  The recursive call will never get
3168   // here again because it will find the already allocated heap in the
3169   // linked list of heaps.
3170   if (!heap->in_setspecific_ && tsd_inited) {
3171     heap->in_setspecific_ = true;
3172     setThreadHeap(heap);
3173   }
3174   return heap;
3175 }
3176
3177 void TCMalloc_ThreadCache::BecomeIdle() {
3178   if (!tsd_inited) return;              // No caches yet
3179   TCMalloc_ThreadCache* heap = GetThreadHeap();
3180   if (heap == NULL) return;             // No thread cache to remove
3181   if (heap->in_setspecific_) return;    // Do not disturb the active caller
3182
3183   heap->in_setspecific_ = true;
3184   setThreadHeap(NULL);
3185 #ifdef HAVE_TLS
3186   // Also update the copy in __thread
3187   threadlocal_heap = NULL;
3188 #endif
3189   heap->in_setspecific_ = false;
3190   if (GetThreadHeap() == heap) {
3191     // Somehow heap got reinstated by a recursive call to malloc
3192     // from pthread_setspecific.  We give up in this case.
3193     return;
3194   }
3195
3196   // We can now get rid of the heap
3197   DeleteCache(heap);
3198 }
3199
3200 void TCMalloc_ThreadCache::DestroyThreadCache(void* ptr) {
3201   // Note that "ptr" cannot be NULL since pthread promises not
3202   // to invoke the destructor on NULL values, but for safety,
3203   // we check anyway.
3204   if (ptr == NULL) return;
3205 #ifdef HAVE_TLS
3206   // Prevent fast path of GetThreadHeap() from returning heap.
3207   threadlocal_heap = NULL;
3208 #endif
3209   DeleteCache(reinterpret_cast<TCMalloc_ThreadCache*>(ptr));
3210 }
3211
3212 void TCMalloc_ThreadCache::DeleteCache(TCMalloc_ThreadCache* heap) {
3213   // Remove all memory from heap
3214   heap->Cleanup();
3215
3216   // Remove from linked list
3217   SpinLockHolder h(&pageheap_lock);
3218   if (heap->next_ != NULL) heap->next_->prev_ = heap->prev_;
3219   if (heap->prev_ != NULL) heap->prev_->next_ = heap->next_;
3220   if (thread_heaps == heap) thread_heaps = heap->next_;
3221   thread_heap_count--;
3222   RecomputeThreadCacheSize();
3223
3224   threadheap_allocator.Delete(heap);
3225 }
3226
3227 void TCMalloc_ThreadCache::RecomputeThreadCacheSize() {
3228   // Divide available space across threads
3229   int n = thread_heap_count > 0 ? thread_heap_count : 1;
3230   size_t space = overall_thread_cache_size / n;
3231
3232   // Limit to allowed range
3233   if (space < kMinThreadCacheSize) space = kMinThreadCacheSize;
3234   if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize;
3235
3236   per_thread_cache_size = space;
3237 }
3238
3239 void TCMalloc_ThreadCache::Print() const {
3240   for (size_t cl = 0; cl < kNumClasses; ++cl) {
3241     MESSAGE("      %5" PRIuS " : %4d len; %4d lo\n",
3242             ByteSizeForClass(cl),
3243             list_[cl].length(),
3244             list_[cl].lowwatermark());
3245   }
3246 }
3247
3248 // Extract interesting stats
3249 struct TCMallocStats {
3250   uint64_t system_bytes;        // Bytes alloced from system
3251   uint64_t thread_bytes;        // Bytes in thread caches
3252   uint64_t central_bytes;       // Bytes in central cache
3253   uint64_t transfer_bytes;      // Bytes in central transfer cache
3254   uint64_t pageheap_bytes;      // Bytes in page heap
3255   uint64_t metadata_bytes;      // Bytes alloced for metadata
3256 };
3257
3258 #ifndef WTF_CHANGES
3259 // Get stats into "r".  Also get per-size-class counts if class_count != NULL
3260 static void ExtractStats(TCMallocStats* r, uint64_t* class_count) {
3261   r->central_bytes = 0;
3262   r->transfer_bytes = 0;
3263   for (int cl = 0; cl < kNumClasses; ++cl) {
3264     const int length = central_cache[cl].length();
3265     const int tc_length = central_cache[cl].tc_length();
3266     r->central_bytes += static_cast<uint64_t>(ByteSizeForClass(cl)) * length;
3267     r->transfer_bytes +=
3268       static_cast<uint64_t>(ByteSizeForClass(cl)) * tc_length;
3269     if (class_count) class_count[cl] = length + tc_length;
3270   }
3271
3272   // Add stats from per-thread heaps
3273   r->thread_bytes = 0;
3274   { // scope
3275     SpinLockHolder h(&pageheap_lock);
3276     for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) {
3277       r->thread_bytes += h->Size();
3278       if (class_count) {
3279         for (size_t cl = 0; cl < kNumClasses; ++cl) {
3280           class_count[cl] += h->freelist_length(cl);
3281         }
3282       }
3283     }
3284   }
3285
3286   { //scope
3287     SpinLockHolder h(&pageheap_lock);
3288     r->system_bytes = pageheap->SystemBytes();
3289     r->metadata_bytes = metadata_system_bytes;
3290     r->pageheap_bytes = pageheap->FreeBytes();
3291   }
3292 }
3293 #endif
3294
3295 #ifndef WTF_CHANGES
3296 // WRITE stats to "out"
3297 static void DumpStats(TCMalloc_Printer* out, int level) {
3298   TCMallocStats stats;
3299   uint64_t class_count[kNumClasses];
3300   ExtractStats(&stats, (level >= 2 ? class_count : NULL));
3301
3302   if (level >= 2) {
3303     out->printf("------------------------------------------------\n");
3304     uint64_t cumulative = 0;
3305     for (int cl = 0; cl < kNumClasses; ++cl) {
3306       if (class_count[cl] > 0) {
3307         uint64_t class_bytes = class_count[cl] * ByteSizeForClass(cl);
3308         cumulative += class_bytes;
3309         out->printf("class %3d [ %8" PRIuS " bytes ] : "
3310                 "%8" PRIu64 " objs; %5.1f MB; %5.1f cum MB\n",
3311                 cl, ByteSizeForClass(cl),
3312                 class_count[cl],
3313                 class_bytes / 1048576.0,
3314                 cumulative / 1048576.0);
3315       }
3316     }
3317
3318     SpinLockHolder h(&pageheap_lock);
3319     pageheap->Dump(out);
3320   }
3321
3322   const uint64_t bytes_in_use = stats.system_bytes
3323                                 - stats.pageheap_bytes
3324                                 - stats.central_bytes
3325                                 - stats.transfer_bytes
3326                                 - stats.thread_bytes;
3327
3328   out->printf("------------------------------------------------\n"
3329               "MALLOC: %12" PRIu64 " Heap size\n"
3330               "MALLOC: %12" PRIu64 " Bytes in use by application\n"
3331               "MALLOC: %12" PRIu64 " Bytes free in page heap\n"
3332               "MALLOC: %12" PRIu64 " Bytes free in central cache\n"
3333               "MALLOC: %12" PRIu64 " Bytes free in transfer cache\n"
3334               "MALLOC: %12" PRIu64 " Bytes free in thread caches\n"
3335               "MALLOC: %12" PRIu64 " Spans in use\n"
3336               "MALLOC: %12" PRIu64 " Thread heaps in use\n"
3337               "MALLOC: %12" PRIu64 " Metadata allocated\n"
3338               "------------------------------------------------\n",
3339               stats.system_bytes,
3340               bytes_in_use,
3341               stats.pageheap_bytes,
3342               stats.central_bytes,
3343               stats.transfer_bytes,
3344               stats.thread_bytes,
3345               uint64_t(span_allocator.inuse()),
3346               uint64_t(threadheap_allocator.inuse()),
3347               stats.metadata_bytes);
3348 }
3349
3350 static void PrintStats(int level) {
3351   const int kBufferSize = 16 << 10;
3352   char* buffer = new char[kBufferSize];
3353   TCMalloc_Printer printer(buffer, kBufferSize);
3354   DumpStats(&printer, level);
3355   write(STDERR_FILENO, buffer, strlen(buffer));
3356   delete[] buffer;
3357 }
3358
3359 static void** DumpStackTraces() {
3360   // Count how much space we need
3361   int needed_slots = 0;
3362   {
3363     SpinLockHolder h(&pageheap_lock);
3364     for (Span* s = sampled_objects.next; s != &sampled_objects; s = s->next) {
3365       StackTrace* stack = reinterpret_cast<StackTrace*>(s->objects);
3366       needed_slots += 3 + stack->depth;
3367     }
3368     needed_slots += 100;            // Slop in case sample grows
3369     needed_slots += needed_slots/8; // An extra 12.5% slop
3370   }
3371
3372   void** result = new void*[needed_slots];
3373   if (result == NULL) {
3374     MESSAGE("tcmalloc: could not allocate %d slots for stack traces\n",
3375             needed_slots);
3376     return NULL;
3377   }
3378
3379   SpinLockHolder h(&pageheap_lock);
3380   int used_slots = 0;
3381   for (Span* s = sampled_objects.next; s != &sampled_objects; s = s->next) {
3382     ASSERT(used_slots < needed_slots);  // Need to leave room for terminator
3383     StackTrace* stack = reinterpret_cast<StackTrace*>(s->objects);
3384     if (used_slots + 3 + stack->depth >= needed_slots) {
3385       // No more room
3386       break;
3387     }
3388
3389     result[used_slots+0] = reinterpret_cast<void*>(static_cast<uintptr_t>(1));
3390     result[used_slots+1] = reinterpret_cast<void*>(stack->size);
3391     result[used_slots+2] = reinterpret_cast<void*>(stack->depth);
3392     for (int d = 0; d < stack->depth; d++) {
3393       result[used_slots+3+d] = stack->stack[d];
3394     }
3395     used_slots += 3 + stack->depth;
3396   }
3397   result[used_slots] = reinterpret_cast<void*>(static_cast<uintptr_t>(0));
3398   return result;
3399 }
3400 #endif
3401
3402 #ifndef WTF_CHANGES
3403
3404 // TCMalloc's support for extra malloc interfaces
3405 class TCMallocImplementation : public MallocExtension {
3406  public:
3407   virtual void GetStats(char* buffer, int buffer_length) {
3408     ASSERT(buffer_length > 0);
3409     TCMalloc_Printer printer(buffer, buffer_length);
3410
3411     // Print level one stats unless lots of space is available
3412     if (buffer_length < 10000) {
3413       DumpStats(&printer, 1);
3414     } else {
3415       DumpStats(&printer, 2);
3416     }
3417   }
3418
3419   virtual void** ReadStackTraces() {
3420     return DumpStackTraces();
3421   }
3422
3423   virtual bool GetNumericProperty(const char* name, size_t* value) {
3424     ASSERT(name != NULL);
3425
3426     if (strcmp(name, "generic.current_allocated_bytes") == 0) {
3427       TCMallocStats stats;
3428       ExtractStats(&stats, NULL);
3429       *value = stats.system_bytes
3430                - stats.thread_bytes
3431                - stats.central_bytes
3432                - stats.pageheap_bytes;
3433       return true;
3434     }
3435
3436     if (strcmp(name, "generic.heap_size") == 0) {
3437       TCMallocStats stats;
3438       ExtractStats(&stats, NULL);
3439       *value = stats.system_bytes;
3440       return true;
3441     }
3442
3443     if (strcmp(name, "tcmalloc.slack_bytes") == 0) {
3444       // We assume that bytes in the page heap are not fragmented too
3445       // badly, and are therefore available for allocation.
3446       SpinLockHolder l(&pageheap_lock);
3447       *value = pageheap->FreeBytes();
3448       return true;
3449     }
3450
3451     if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) {
3452       SpinLockHolder l(&pageheap_lock);
3453       *value = overall_thread_cache_size;
3454       return true;
3455     }
3456
3457     if (strcmp(name, "tcmalloc.current_total_thread_cache_bytes") == 0) {
3458       TCMallocStats stats;
3459       ExtractStats(&stats, NULL);
3460       *value = stats.thread_bytes;
3461       return true;
3462     }
3463
3464     return false;
3465   }
3466
3467   virtual bool SetNumericProperty(const char* name, size_t value) {
3468     ASSERT(name != NULL);
3469
3470     if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) {
3471       // Clip the value to a reasonable range
3472       if (value < kMinThreadCacheSize) value = kMinThreadCacheSize;
3473       if (value > (1<<30)) value = (1<<30);     // Limit to 1GB
3474
3475       SpinLockHolder l(&pageheap_lock);
3476       overall_thread_cache_size = static_cast<size_t>(value);
3477       TCMalloc_ThreadCache::RecomputeThreadCacheSize();
3478       return true;
3479     }
3480
3481     return false;
3482   }
3483
3484   virtual void MarkThreadIdle() {
3485     TCMalloc_ThreadCache::BecomeIdle();
3486   }
3487
3488   virtual void ReleaseFreeMemory() {
3489     SpinLockHolder h(&pageheap_lock);
3490     pageheap->ReleaseFreePages();
3491   }
3492 };
3493 #endif
3494
3495 // The constructor allocates an object to ensure that initialization
3496 // runs before main(), and therefore we do not have a chance to become
3497 // multi-threaded before initialization.  We also create the TSD key
3498 // here.  Presumably by the time this constructor runs, glibc is in
3499 // good enough shape to handle pthread_key_create().
3500 //
3501 // The constructor also takes the opportunity to tell STL to use
3502 // tcmalloc.  We want to do this early, before construct time, so
3503 // all user STL allocations go through tcmalloc (which works really
3504 // well for STL).
3505 //
3506 // The destructor prints stats when the program exits.
3507 class TCMallocGuard {
3508  public:
3509
3510   TCMallocGuard() {
3511 #ifdef HAVE_TLS    // this is true if the cc/ld/libc combo support TLS
3512     // Check whether the kernel also supports TLS (needs to happen at runtime)
3513     CheckIfKernelSupportsTLS();
3514 #endif
3515 #ifndef WTF_CHANGES
3516 #ifdef WIN32                    // patch the windows VirtualAlloc, etc.
3517     PatchWindowsFunctions();    // defined in windows/patch_functions.cc
3518 #endif
3519 #endif
3520     free(malloc(1));
3521     TCMalloc_ThreadCache::InitTSD();
3522     free(malloc(1));
3523 #ifndef WTF_CHANGES
3524     MallocExtension::Register(new TCMallocImplementation);
3525 #endif
3526   }
3527
3528 #ifndef WTF_CHANGES
3529   ~TCMallocGuard() {
3530     const char* env = getenv("MALLOCSTATS");
3531     if (env != NULL) {
3532       int level = atoi(env);
3533       if (level < 1) level = 1;
3534       PrintStats(level);
3535     }
3536 #ifdef WIN32
3537     UnpatchWindowsFunctions();
3538 #endif
3539   }
3540 #endif
3541 };
3542
3543 #ifndef WTF_CHANGES
3544 static TCMallocGuard module_enter_exit_hook;
3545 #endif
3546
3547
3548 //-------------------------------------------------------------------
3549 // Helpers for the exported routines below
3550 //-------------------------------------------------------------------
3551
3552 #ifndef WTF_CHANGES
3553
3554 static Span* DoSampledAllocation(size_t size) {
3555
3556   // Grab the stack trace outside the heap lock
3557   StackTrace tmp;
3558   tmp.depth = GetStackTrace(tmp.stack, kMaxStackDepth, 1);
3559   tmp.size = size;
3560
3561   SpinLockHolder h(&pageheap_lock);
3562   // Allocate span
3563   Span *span = pageheap->New(pages(size == 0 ? 1 : size));
3564   if (span == NULL) {
3565     return NULL;
3566   }
3567
3568   // Allocate stack trace
3569   StackTrace *stack = stacktrace_allocator.New();
3570   if (stack == NULL) {
3571     // Sampling failed because of lack of memory
3572     return span;
3573   }
3574
3575   *stack = tmp;
3576   span->sample = 1;
3577   span->objects = stack;
3578   DLL_Prepend(&sampled_objects, span);
3579
3580   return span;
3581 }
3582 #endif
3583
3584 static inline bool CheckCachedSizeClass(void *ptr) {
3585   PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
3586   size_t cached_value = pageheap->GetSizeClassIfCached(p);
3587   return cached_value == 0 ||
3588       cached_value == pageheap->GetDescriptor(p)->sizeclass;
3589 }
3590
3591 static inline void* CheckedMallocResult(void *result)
3592 {
3593   ASSERT(result == 0 || CheckCachedSizeClass(result));
3594   return result;
3595 }
3596
3597 static inline void* SpanToMallocResult(Span *span) {
3598   ASSERT_SPAN_COMMITTED(span);
3599   pageheap->CacheSizeClass(span->start, 0);
3600   return
3601       CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift));
3602 }
3603
3604 #ifdef WTF_CHANGES
3605 template <bool crashOnFailure>
3606 #endif
3607 static ALWAYS_INLINE void* do_malloc(size_t size) {
3608   void* ret = NULL;
3609
3610 #ifdef WTF_CHANGES
3611     ASSERT(!isForbidden());
3612 #endif
3613
3614   // The following call forces module initialization
3615   TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache();
3616 #ifndef WTF_CHANGES
3617   if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
3618     Span* span = DoSampledAllocation(size);
3619     if (span != NULL) {
3620       ret = SpanToMallocResult(span);
3621     }
3622   } else
3623 #endif
3624   if (size > kMaxSize) {
3625     // Use page-level allocator
3626     SpinLockHolder h(&pageheap_lock);
3627     Span* span = pageheap->New(pages(size));
3628     if (span != NULL) {
3629       ret = SpanToMallocResult(span);
3630     }
3631   } else {
3632     // The common case, and also the simplest.  This just pops the
3633     // size-appropriate freelist, afer replenishing it if it's empty.
3634     ret = CheckedMallocResult(heap->Allocate(size));
3635   }
3636   if (!ret) {
3637 #ifdef WTF_CHANGES
3638     if (crashOnFailure) // This branch should be optimized out by the compiler.
3639         CRASH();
3640 #else
3641     errno = ENOMEM;
3642 #endif
3643   }
3644   return ret;
3645 }
3646
3647 static ALWAYS_INLINE void do_free(void* ptr) {
3648   if (ptr == NULL) return;
3649   ASSERT(pageheap != NULL);  // Should not call free() before malloc()
3650   const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
3651   Span* span = NULL;
3652   size_t cl = pageheap->GetSizeClassIfCached(p);
3653
3654   if (cl == 0) {
3655     span = pageheap->GetDescriptor(p);
3656     cl = span->sizeclass;
3657     pageheap->CacheSizeClass(p, cl);
3658   }
3659   if (cl != 0) {
3660 #ifndef NO_TCMALLOC_SAMPLES
3661     ASSERT(!pageheap->GetDescriptor(p)->sample);
3662 #endif
3663     TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCacheIfPresent();
3664     if (heap != NULL) {
3665       heap->Deallocate(ptr, cl);
3666     } else {
3667       // Delete directly into central cache
3668       SLL_SetNext(ptr, NULL);
3669       central_cache[cl].InsertRange(ptr, ptr, 1);
3670     }
3671   } else {
3672     SpinLockHolder h(&pageheap_lock);
3673     ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0);
3674     ASSERT(span != NULL && span->start == p);
3675 #ifndef NO_TCMALLOC_SAMPLES
3676     if (span->sample) {
3677       DLL_Remove(span);
3678       stacktrace_allocator.Delete(reinterpret_cast<StackTrace*>(span->objects));
3679       span->objects = NULL;
3680     }
3681 #endif
3682     pageheap->Delete(span);
3683   }
3684 }
3685
3686 #ifndef WTF_CHANGES
3687 // For use by exported routines below that want specific alignments
3688 //
3689 // Note: this code can be slow, and can significantly fragment memory.
3690 // The expectation is that memalign/posix_memalign/valloc/pvalloc will
3691 // not be invoked very often.  This requirement simplifies our
3692 // implementation and allows us to tune for expected allocation
3693 // patterns.
3694 static void* do_memalign(size_t align, size_t size) {
3695   ASSERT((align & (align - 1)) == 0);
3696   ASSERT(align > 0);
3697   if (pageheap == NULL) TCMalloc_ThreadCache::InitModule();
3698
3699   // Allocate at least one byte to avoid boundary conditions below
3700   if (size == 0) size = 1;
3701
3702   if (size <= kMaxSize && align < kPageSize) {
3703     // Search through acceptable size classes looking for one with
3704     // enough alignment.  This depends on the fact that
3705     // InitSizeClasses() currently produces several size classes that
3706     // are aligned at powers of two.  We will waste time and space if
3707     // we miss in the size class array, but that is deemed acceptable
3708     // since memalign() should be used rarely.
3709     size_t cl = SizeClass(size);
3710     while (cl < kNumClasses && ((class_to_size[cl] & (align - 1)) != 0)) {
3711       cl++;
3712     }
3713     if (cl < kNumClasses) {
3714       TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache();
3715       return CheckedMallocResult(heap->Allocate(class_to_size[cl]));
3716     }
3717   }
3718
3719   // We will allocate directly from the page heap
3720   SpinLockHolder h(&pageheap_lock);
3721
3722   if (align <= kPageSize) {
3723     // Any page-level allocation will be fine
3724     // TODO: We could put the rest of this page in the appropriate
3725     // TODO: cache but it does not seem worth it.
3726     Span* span = pageheap->New(pages(size));
3727     return span == NULL ? NULL : SpanToMallocResult(span);
3728   }
3729
3730   // Allocate extra pages and carve off an aligned portion
3731   const Length alloc = pages(size + align);
3732   Span* span = pageheap->New(alloc);
3733   if (span == NULL) return NULL;
3734
3735   // Skip starting portion so that we end up aligned
3736   Length skip = 0;
3737   while ((((span->start+skip) << kPageShift) & (align - 1)) != 0) {
3738     skip++;
3739   }
3740   ASSERT(skip < alloc);
3741   if (skip > 0) {
3742     Span* rest = pageheap->Split(span, skip);
3743     pageheap->Delete(span);
3744     span = rest;
3745   }
3746
3747   // Skip trailing portion that we do not need to return
3748   const Length needed = pages(size);
3749   ASSERT(span->length >= needed);
3750   if (span->length > needed) {
3751     Span* trailer = pageheap->Split(span, needed);
3752     pageheap->Delete(trailer);
3753   }
3754   return SpanToMallocResult(span);
3755 }
3756 #endif
3757
3758 // Helpers for use by exported routines below:
3759
3760 #ifndef WTF_CHANGES
3761 static inline void do_malloc_stats() {
3762   PrintStats(1);
3763 }
3764 #endif
3765
3766 static inline int do_mallopt(int, int) {
3767   return 1;     // Indicates error
3768 }
3769
3770 #ifdef HAVE_STRUCT_MALLINFO  // mallinfo isn't defined on freebsd, for instance
3771 static inline struct mallinfo do_mallinfo() {
3772   TCMallocStats stats;
3773   ExtractStats(&stats, NULL);
3774
3775   // Just some of the fields are filled in.
3776   struct mallinfo info;
3777   memset(&info, 0, sizeof(info));
3778
3779   // Unfortunately, the struct contains "int" field, so some of the
3780   // size values will be truncated.
3781   info.arena     = static_cast<int>(stats.system_bytes);
3782   info.fsmblks   = static_cast<int>(stats.thread_bytes
3783                                     + stats.central_bytes
3784                                     + stats.transfer_bytes);
3785   info.fordblks  = static_cast<int>(stats.pageheap_bytes);
3786   info.uordblks  = static_cast<int>(stats.system_bytes
3787                                     - stats.thread_bytes
3788                                     - stats.central_bytes
3789                                     - stats.transfer_bytes
3790                                     - stats.pageheap_bytes);
3791
3792   return info;
3793 }
3794 #endif
3795
3796 //-------------------------------------------------------------------
3797 // Exported routines
3798 //-------------------------------------------------------------------
3799
3800 // CAVEAT: The code structure below ensures that MallocHook methods are always
3801 //         called from the stack frame of the invoked allocation function.
3802 //         heap-checker.cc depends on this to start a stack trace from
3803 //         the call to the (de)allocation function.
3804
3805 #ifndef WTF_CHANGES
3806 extern "C"
3807 #else
3808 #define do_malloc do_malloc<crashOnFailure>
3809
3810 template <bool crashOnFailure>
3811 ALWAYS_INLINE void* malloc(size_t);
3812
3813 void* fastMalloc(size_t size)
3814 {
3815     return malloc<true>(size);
3816 }
3817
3818 TryMallocReturnValue tryFastMalloc(size_t size)
3819 {
3820     return malloc<false>(size);
3821 }
3822
3823 template <bool crashOnFailure>
3824 ALWAYS_INLINE
3825 #endif
3826 void* malloc(size_t size) {
3827 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3828     if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= size)  // If overflow would occur...
3829         return 0;
3830     size += sizeof(AllocAlignmentInteger);
3831     void* result = do_malloc(size);
3832     if (!result)
3833         return 0;
3834
3835     *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc;
3836     result = static_cast<AllocAlignmentInteger*>(result) + 1;
3837 #else
3838     void* result = do_malloc(size);
3839 #endif
3840
3841 #ifndef WTF_CHANGES
3842   MallocHook::InvokeNewHook(result, size);
3843 #endif
3844   return result;
3845 }
3846
3847 #ifndef WTF_CHANGES
3848 extern "C"
3849 #endif
3850 void free(void* ptr) {
3851 #ifndef WTF_CHANGES
3852   MallocHook::InvokeDeleteHook(ptr);
3853 #endif
3854
3855 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3856     if (!ptr)
3857         return;
3858
3859     AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(ptr);
3860     if (*header != Internal::AllocTypeMalloc)
3861         Internal::fastMallocMatchFailed(ptr);
3862     do_free(header);
3863 #else
3864     do_free(ptr);
3865 #endif
3866 }
3867
3868 #ifndef WTF_CHANGES
3869 extern "C"
3870 #else
3871 template <bool crashOnFailure>
3872 ALWAYS_INLINE void* calloc(size_t, size_t);
3873
3874 void* fastCalloc(size_t n, size_t elem_size)
3875 {
3876     return calloc<true>(n, elem_size);
3877 }
3878
3879 TryMallocReturnValue tryFastCalloc(size_t n, size_t elem_size)
3880 {
3881     return calloc<false>(n, elem_size);
3882 }
3883
3884 template <bool crashOnFailure>
3885 ALWAYS_INLINE
3886 #endif
3887 void* calloc(size_t n, size_t elem_size) {
3888   size_t totalBytes = n * elem_size;
3889
3890   // Protect against overflow
3891   if (n > 1 && elem_size && (totalBytes / elem_size) != n)
3892     return 0;
3893
3894 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3895     if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= totalBytes)  // If overflow would occur...
3896         return 0;
3897
3898     totalBytes += sizeof(AllocAlignmentInteger);
3899     void* result = do_malloc(totalBytes);
3900     if (!result)
3901         return 0;
3902
3903     memset(result, 0, totalBytes);
3904     *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc;
3905     result = static_cast<AllocAlignmentInteger*>(result) + 1;
3906 #else
3907     void* result = do_malloc(totalBytes);
3908     if (result != NULL) {
3909         memset(result, 0, totalBytes);
3910     }
3911 #endif
3912
3913 #ifndef WTF_CHANGES
3914   MallocHook::InvokeNewHook(result, totalBytes);
3915 #endif
3916   return result;
3917 }
3918
3919 // Since cfree isn't used anywhere, we don't compile it in.
3920 #ifndef WTF_CHANGES
3921 #ifndef WTF_CHANGES
3922 extern "C"
3923 #endif
3924 void cfree(void* ptr) {
3925 #ifndef WTF_CHANGES
3926     MallocHook::InvokeDeleteHook(ptr);
3927 #endif
3928   do_free(ptr);
3929 }
3930 #endif
3931
3932 #ifndef WTF_CHANGES
3933 extern "C"
3934 #else
3935 template <bool crashOnFailure>
3936 ALWAYS_INLINE void* realloc(void*, size_t);
3937
3938 void* fastRealloc(void* old_ptr, size_t new_size)
3939 {
3940     return realloc<true>(old_ptr, new_size);
3941 }
3942
3943 TryMallocReturnValue tryFastRealloc(void* old_ptr, size_t new_size)
3944 {
3945     return realloc<false>(old_ptr, new_size);
3946 }
3947
3948 template <bool crashOnFailure>
3949 ALWAYS_INLINE
3950 #endif
3951 void* realloc(void* old_ptr, size_t new_size) {
3952   if (old_ptr == NULL) {
3953 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3954     void* result = malloc(new_size);
3955 #else
3956     void* result = do_malloc(new_size);
3957 #ifndef WTF_CHANGES
3958     MallocHook::InvokeNewHook(result, new_size);
3959 #endif
3960 #endif
3961     return result;
3962   }
3963   if (new_size == 0) {
3964 #ifndef WTF_CHANGES
3965     MallocHook::InvokeDeleteHook(old_ptr);
3966 #endif
3967     free(old_ptr);
3968     return NULL;
3969   }
3970
3971 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3972     if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= new_size)  // If overflow would occur...
3973         return 0;
3974     new_size += sizeof(AllocAlignmentInteger);
3975     AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(old_ptr);
3976     if (*header != Internal::AllocTypeMalloc)
3977         Internal::fastMallocMatchFailed(old_ptr);
3978     old_ptr = header;
3979 #endif
3980
3981   // Get the size of the old entry
3982   const PageID p = reinterpret_cast<uintptr_t>(old_ptr) >> kPageShift;
3983   size_t cl = pageheap->GetSizeClassIfCached(p);
3984   Span *span = NULL;
3985   size_t old_size;
3986   if (cl == 0) {
3987     span = pageheap->GetDescriptor(p);
3988     cl = span->sizeclass;
3989     pageheap->CacheSizeClass(p, cl);
3990   }
3991   if (cl != 0) {
3992     old_size = ByteSizeForClass(cl);
3993   } else {
3994     ASSERT(span != NULL);
3995     old_size = span->length << kPageShift;
3996   }
3997
3998   // Reallocate if the new size is larger than the old size,
3999   // or if the new size is significantly smaller than the old size.
4000   if ((new_size > old_size) || (AllocationSize(new_size) < old_size)) {
4001     // Need to reallocate
4002     void* new_ptr = do_malloc(new_size);
4003     if (new_ptr == NULL) {
4004       return NULL;
4005     }
4006 #ifndef WTF_CHANGES
4007     MallocHook::InvokeNewHook(new_ptr, new_size);
4008 #endif
4009     memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size));
4010 #ifndef WTF_CHANGES
4011     MallocHook::InvokeDeleteHook(old_ptr);
4012 #endif
4013     // We could use a variant of do_free() that leverages the fact
4014     // that we already know the sizeclass of old_ptr.  The benefit
4015     // would be small, so don't bother.
4016     do_free(old_ptr);
4017 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
4018     new_ptr = static_cast<AllocAlignmentInteger*>(new_ptr) + 1;
4019 #endif
4020     return new_ptr;
4021   } else {
4022 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
4023     old_ptr = static_cast<AllocAlignmentInteger*>(old_ptr) + 1; // Set old_ptr back to the user pointer.
4024 #endif
4025     return old_ptr;
4026   }
4027 }
4028
4029 #ifdef WTF_CHANGES
4030 #undef do_malloc
4031 #else
4032
4033 static SpinLock set_new_handler_lock = SPINLOCK_INITIALIZER;
4034
4035 static inline void* cpp_alloc(size_t size, bool nothrow) {
4036   for (;;) {
4037     void* p = do_malloc(size);
4038 #ifdef PREANSINEW
4039     return p;
4040 #else
4041     if (p == NULL) {  // allocation failed
4042       // Get the current new handler.  NB: this function is not
4043       // thread-safe.  We make a feeble stab at making it so here, but
4044       // this lock only protects against tcmalloc interfering with
4045       // itself, not with other libraries calling set_new_handler.
4046       std::new_handler nh;
4047       {
4048         SpinLockHolder h(&set_new_handler_lock);
4049         nh = std::set_new_handler(0);
4050         (void) std::set_new_handler(nh);
4051       }
4052       // If no new_handler is established, the allocation failed.
4053       if (!nh) {
4054         if (nothrow) return 0;
4055         throw std::bad_alloc();
4056       }
4057       // Otherwise, try the new_handler.  If it returns, retry the
4058       // allocation.  If it throws std::bad_alloc, fail the allocation.
4059       // if it throws something else, don't interfere.
4060       try {
4061         (*nh)();
4062       } catch (const std::bad_alloc&) {
4063         if (!nothrow) throw;
4064         return p;
4065       }
4066     } else {  // allocation success
4067       return p;
4068     }
4069 #endif
4070   }
4071 }
4072
4073 #if ENABLE(GLOBAL_FASTMALLOC_NEW)
4074
4075 void* operator new(size_t size) {
4076   void* p = cpp_alloc(size, false);
4077   // We keep this next instruction out of cpp_alloc for a reason: when
4078   // it's in, and new just calls cpp_alloc, the optimizer may fold the
4079   // new call into cpp_alloc, which messes up our whole section-based
4080   // stacktracing (see ATTRIBUTE_SECTION, above).  This ensures cpp_alloc
4081   // isn't the last thing this fn calls, and prevents the folding.
4082   MallocHook::InvokeNewHook(p, size);
4083   return p;
4084 }
4085
4086 void* operator new(size_t size, const std::nothrow_t&) __THROW {
4087   void* p = cpp_alloc(size, true);
4088   MallocHook::InvokeNewHook(p, size);
4089   return p;
4090 }
4091
4092 void operator delete(void* p) __THROW {
4093   MallocHook::InvokeDeleteHook(p);
4094   do_free(p);
4095 }
4096
4097 void operator delete(void* p, const std::nothrow_t&) __THROW {
4098   MallocHook::InvokeDeleteHook(p);
4099   do_free(p);
4100 }
4101
4102 void* operator new[](size_t size) {
4103   void* p = cpp_alloc(size, false);
4104   // We keep this next instruction out of cpp_alloc for a reason: when
4105   // it's in, and new just calls cpp_alloc, the optimizer may fold the
4106   // new call into cpp_alloc, which messes up our whole section-based
4107   // stacktracing (see ATTRIBUTE_SECTION, above).  This ensures cpp_alloc
4108   // isn't the last thing this fn calls, and prevents the folding.
4109   MallocHook::InvokeNewHook(p, size);
4110   return p;
4111 }
4112
4113 void* operator new[](size_t size, const std::nothrow_t&) __THROW {
4114   void* p = cpp_alloc(size, true);
4115   MallocHook::InvokeNewHook(p, size);
4116   return p;
4117 }
4118
4119 void operator delete[](void* p) __THROW {
4120   MallocHook::InvokeDeleteHook(p);
4121   do_free(p);
4122 }
4123
4124 void operator delete[](void* p, const std::nothrow_t&) __THROW {
4125   MallocHook::InvokeDeleteHook(p);
4126   do_free(p);
4127 }
4128
4129 #endif
4130
4131 extern "C" void* memalign(size_t align, size_t size) __THROW {
4132   void* result = do_memalign(align, size);
4133   MallocHook::InvokeNewHook(result, size);
4134   return result;
4135 }
4136
4137 extern "C" int posix_memalign(void** result_ptr, size_t align, size_t size)
4138     __THROW {
4139   if (((align % sizeof(void*)) != 0) ||
4140       ((align & (align - 1)) != 0) ||
4141       (align == 0)) {
4142     return EINVAL;
4143   }
4144
4145   void* result = do_memalign(align, size);
4146   MallocHook::InvokeNewHook(result, size);
4147   if (result == NULL) {
4148     return ENOMEM;
4149   } else {
4150     *result_ptr = result;
4151     return 0;
4152   }
4153 }
4154
4155 static size_t pagesize = 0;
4156
4157 extern "C" void* valloc(size_t size) __THROW {
4158   // Allocate page-aligned object of length >= size bytes
4159   if (pagesize == 0) pagesize = getpagesize();
4160   void* result = do_memalign(pagesize, size);
4161   MallocHook::InvokeNewHook(result, size);
4162   return result;
4163 }
4164
4165 extern "C" void* pvalloc(size_t size) __THROW {
4166   // Round up size to a multiple of pagesize
4167   if (pagesize == 0) pagesize = getpagesize();
4168   size = (size + pagesize - 1) & ~(pagesize - 1);
4169   void* result = do_memalign(pagesize, size);
4170   MallocHook::InvokeNewHook(result, size);
4171   return result;
4172 }
4173
4174 extern "C" void malloc_stats(void) {
4175   do_malloc_stats();
4176 }
4177
4178 extern "C" int mallopt(int cmd, int value) {
4179   return do_mallopt(cmd, value);
4180 }
4181
4182 #ifdef HAVE_STRUCT_MALLINFO
4183 extern "C" struct mallinfo mallinfo(void) {
4184   return do_mallinfo();
4185 }
4186 #endif
4187
4188 //-------------------------------------------------------------------
4189 // Some library routines on RedHat 9 allocate memory using malloc()
4190 // and free it using __libc_free() (or vice-versa).  Since we provide
4191 // our own implementations of malloc/free, we need to make sure that
4192 // the __libc_XXX variants (defined as part of glibc) also point to
4193 // the same implementations.
4194 //-------------------------------------------------------------------
4195
4196 #if defined(__GLIBC__)
4197 extern "C" {
4198 #if COMPILER(GCC) && !defined(__MACH__) && defined(HAVE___ATTRIBUTE__)
4199   // Potentially faster variants that use the gcc alias extension.
4200   // Mach-O (Darwin) does not support weak aliases, hence the __MACH__ check.
4201 # define ALIAS(x) __attribute__ ((weak, alias (x)))
4202   void* __libc_malloc(size_t size)              ALIAS("malloc");
4203   void  __libc_free(void* ptr)                  ALIAS("free");
4204   void* __libc_realloc(void* ptr, size_t size)  ALIAS("realloc");
4205   void* __libc_calloc(size_t n, size_t size)    ALIAS("calloc");
4206   void  __libc_cfree(void* ptr)                 ALIAS("cfree");
4207   void* __libc_memalign(size_t align, size_t s) ALIAS("memalign");
4208   void* __libc_valloc(size_t size)              ALIAS("valloc");
4209   void* __libc_pvalloc(size_t size)             ALIAS("pvalloc");
4210   int __posix_memalign(void** r, size_t a, size_t s) ALIAS("posix_memalign");
4211 # undef ALIAS
4212 # else   /* not __GNUC__ */
4213   // Portable wrappers
4214   void* __libc_malloc(size_t size)              { return malloc(size);       }
4215   void  __libc_free(void* ptr)                  { free(ptr);                 }
4216   void* __libc_realloc(void* ptr, size_t size)  { return realloc(ptr, size); }
4217   void* __libc_calloc(size_t n, size_t size)    { return calloc(n, size);    }
4218   void  __libc_cfree(void* ptr)                 { cfree(ptr);                }
4219   void* __libc_memalign(size_t align, size_t s) { return memalign(align, s); }
4220   void* __libc_valloc(size_t size)              { return valloc(size);       }
4221   void* __libc_pvalloc(size_t size)             { return pvalloc(size);      }
4222   int __posix_memalign(void** r, size_t a, size_t s) {
4223     return posix_memalign(r, a, s);
4224   }
4225 # endif  /* __GNUC__ */
4226 }
4227 #endif   /* __GLIBC__ */
4228
4229 // Override __libc_memalign in libc on linux boxes specially.
4230 // They have a bug in libc that causes them to (very rarely) allocate
4231 // with __libc_memalign() yet deallocate with free() and the
4232 // definitions above don't catch it.
4233 // This function is an exception to the rule of calling MallocHook method
4234 // from the stack frame of the allocation function;
4235 // heap-checker handles this special case explicitly.
4236 static void *MemalignOverride(size_t align, size_t size, const void *caller)
4237     __THROW {
4238   void* result = do_memalign(align, size);
4239   MallocHook::InvokeNewHook(result, size);
4240   return result;
4241 }
4242 void *(*__memalign_hook)(size_t, size_t, const void *) = MemalignOverride;
4243
4244 #endif
4245
4246 #ifdef WTF_CHANGES
4247 void releaseFastMallocFreeMemory()
4248 {
4249     // Flush free pages in the current thread cache back to the page heap.
4250     // Low watermark mechanism in Scavenge() prevents full return on the first pass.
4251     // The second pass flushes everything.
4252     if (TCMalloc_ThreadCache* threadCache = TCMalloc_ThreadCache::GetCacheIfPresent()) {
4253         threadCache->Scavenge();
4254         threadCache->Scavenge();
4255     }
4256
4257     SpinLockHolder h(&pageheap_lock);
4258     pageheap->ReleaseFreePages();
4259 }
4260
4261 FastMallocStatistics fastMallocStatistics()
4262 {
4263     FastMallocStatistics statistics;
4264
4265     SpinLockHolder lockHolder(&pageheap_lock);
4266     statistics.reservedVMBytes = static_cast<size_t>(pageheap->SystemBytes());
4267     statistics.committedVMBytes = statistics.reservedVMBytes - pageheap->ReturnedBytes();
4268
4269     statistics.freeListBytes = 0;
4270     for (unsigned cl = 0; cl < kNumClasses; ++cl) {
4271         const int length = central_cache[cl].length();
4272         const int tc_length = central_cache[cl].tc_length();
4273
4274         statistics.freeListBytes += ByteSizeForClass(cl) * (length + tc_length);
4275     }
4276     for (TCMalloc_ThreadCache* threadCache = thread_heaps; threadCache ; threadCache = threadCache->next_)
4277         statistics.freeListBytes += threadCache->Size();
4278
4279     return statistics;
4280 }
4281
4282 size_t fastMallocSize(const void* ptr)
4283 {
4284     const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
4285     Span* span = pageheap->GetDescriptorEnsureSafe(p);
4286
4287     if (!span || span->free)
4288         return 0;
4289
4290     for (void* free = span->objects; free != NULL; free = *((void**) free)) {
4291         if (ptr == free)
4292             return 0;
4293     }
4294
4295     if (size_t cl = span->sizeclass)
4296         return ByteSizeForClass(cl);
4297
4298     return span->length << kPageShift;
4299 }
4300
4301 #if OS(DARWIN)
4302
4303 class FreeObjectFinder {
4304     const RemoteMemoryReader& m_reader;
4305     HashSet<void*> m_freeObjects;
4306
4307 public:
4308     FreeObjectFinder(const RemoteMemoryReader& reader) : m_reader(reader) { }
4309
4310     void visit(void* ptr) { m_freeObjects.add(ptr); }
4311     bool isFreeObject(void* ptr) const { return m_freeObjects.contains(ptr); }
4312     bool isFreeObject(vm_address_t ptr) const { return isFreeObject(reinterpret_cast<void*>(ptr)); }
4313     size_t freeObjectCount() const { return m_freeObjects.size(); }
4314
4315     void findFreeObjects(TCMalloc_ThreadCache* threadCache)
4316     {
4317         for (; threadCache; threadCache = (threadCache->next_ ? m_reader(threadCache->next_) : 0))
4318             threadCache->enumerateFreeObjects(*this, m_reader);
4319     }
4320
4321     void findFreeObjects(TCMalloc_Central_FreeListPadded* centralFreeList, size_t numSizes, TCMalloc_Central_FreeListPadded* remoteCentralFreeList)
4322     {
4323         for (unsigned i = 0; i < numSizes; i++)
4324             centralFreeList[i].enumerateFreeObjects(*this, m_reader, remoteCentralFreeList + i);
4325     }
4326 };
4327
4328 class PageMapFreeObjectFinder {
4329     const RemoteMemoryReader& m_reader;
4330     FreeObjectFinder& m_freeObjectFinder;
4331
4332 public:
4333     PageMapFreeObjectFinder(const RemoteMemoryReader& reader, FreeObjectFinder& freeObjectFinder)
4334         : m_reader(reader)
4335         , m_freeObjectFinder(freeObjectFinder)
4336     { }
4337
4338     int visit(void* ptr) const
4339     {
4340         if (!ptr)
4341             return 1;
4342
4343         Span* span = m_reader(reinterpret_cast<Span*>(ptr));
4344         if (span->free) {
4345             void* ptr = reinterpret_cast<void*>(span->start << kPageShift);
4346             m_freeObjectFinder.visit(ptr);
4347         } else if (span->sizeclass) {
4348             // Walk the free list of the small-object span, keeping track of each object seen
4349             for (void* nextObject = span->objects; nextObject; nextObject = *m_reader(reinterpret_cast<void**>(nextObject)))
4350                 m_freeObjectFinder.visit(nextObject);
4351         }
4352         return span->length;
4353     }
4354 };
4355
4356 class PageMapMemoryUsageRecorder {
4357     task_t m_task;
4358     void* m_context;
4359     unsigned m_typeMask;
4360     vm_range_recorder_t* m_recorder;
4361     const RemoteMemoryReader& m_reader;
4362     const FreeObjectFinder& m_freeObjectFinder;
4363
4364     HashSet<void*> m_seenPointers;
4365     Vector<Span*> m_coalescedSpans;
4366
4367 public:
4368     PageMapMemoryUsageRecorder(task_t task, void* context, unsigned typeMask, vm_range_recorder_t* recorder, const RemoteMemoryReader& reader, const FreeObjectFinder& freeObjectFinder)
4369         : m_task(task)
4370         , m_context(context)
4371         , m_typeMask(typeMask)
4372         , m_recorder(recorder)
4373         , m_reader(reader)
4374         , m_freeObjectFinder(freeObjectFinder)
4375     { }
4376
4377     ~PageMapMemoryUsageRecorder()
4378     {
4379         ASSERT(!m_coalescedSpans.size());
4380     }
4381
4382     void recordPendingRegions()
4383     {
4384         Span* lastSpan = m_coalescedSpans[m_coalescedSpans.size() - 1];
4385         vm_range_t ptrRange = { m_coalescedSpans[0]->start << kPageShift, 0 };
4386         ptrRange.size = (lastSpan->start << kPageShift) - ptrRange.address + (lastSpan->length * kPageSize);
4387
4388         // Mark the memory region the spans represent as a candidate for containing pointers
4389         if (m_typeMask & MALLOC_PTR_REGION_RANGE_TYPE)
4390             (*m_recorder)(m_task, m_context, MALLOC_PTR_REGION_RANGE_TYPE, &ptrRange, 1);
4391
4392         if (!(m_typeMask & MALLOC_PTR_IN_USE_RANGE_TYPE)) {
4393             m_coalescedSpans.clear();
4394             return;
4395         }
4396
4397         Vector<vm_range_t, 1024> allocatedPointers;
4398         for (size_t i = 0; i < m_coalescedSpans.size(); ++i) {
4399             Span *theSpan = m_coalescedSpans[i];
4400             if (theSpan->free)
4401                 continue;
4402
4403             vm_address_t spanStartAddress = theSpan->start << kPageShift;
4404             vm_size_t spanSizeInBytes = theSpan->length * kPageSize;
4405
4406             if (!theSpan->sizeclass) {
4407                 // If it's an allocated large object span, mark it as in use
4408                 if (!m_freeObjectFinder.isFreeObject(spanStartAddress))
4409                     allocatedPointers.append((vm_range_t){spanStartAddress, spanSizeInBytes});
4410             } else {
4411                 const size_t objectSize = ByteSizeForClass(theSpan->sizeclass);
4412
4413                 // Mark each allocated small object within the span as in use
4414                 const vm_address_t endOfSpan = spanStartAddress + spanSizeInBytes;
4415                 for (vm_address_t object = spanStartAddress; object + objectSize <= endOfSpan; object += objectSize) {
4416                     if (!m_freeObjectFinder.isFreeObject(object))
4417                         allocatedPointers.append((vm_range_t){object, objectSize});
4418                 }
4419             }
4420         }
4421
4422         (*m_recorder)(m_task, m_context, MALLOC_PTR_IN_USE_RANGE_TYPE, allocatedPointers.data(), allocatedPointers.size());
4423
4424         m_coalescedSpans.clear();
4425     }
4426
4427     int visit(void* ptr)
4428     {
4429         if (!ptr)
4430             return 1;
4431
4432         Span* span = m_reader(reinterpret_cast<Span*>(ptr));
4433         if (!span->start)
4434             return 1;
4435
4436         if (m_seenPointers.contains(ptr))
4437             return span->length;
4438         m_seenPointers.add(ptr);
4439
4440         if (!m_coalescedSpans.size()) {
4441             m_coalescedSpans.append(span);
4442             return span->length;
4443         }
4444
4445         Span* previousSpan = m_coalescedSpans[m_coalescedSpans.size() - 1];
4446         vm_address_t previousSpanStartAddress = previousSpan->start << kPageShift;
4447         vm_size_t previousSpanSizeInBytes = previousSpan->length * kPageSize;
4448
4449         // If the new span is adjacent to the previous span, do nothing for now.
4450         vm_address_t spanStartAddress = span->start << kPageShift;
4451         if (spanStartAddress == previousSpanStartAddress + previousSpanSizeInBytes) {
4452             m_coalescedSpans.append(span);
4453             return span->length;
4454         }
4455
4456         // New span is not adjacent to previous span, so record the spans coalesced so far.
4457         recordPendingRegions();
4458         m_coalescedSpans.append(span);
4459
4460         return span->length;
4461     }
4462 };
4463
4464 class AdminRegionRecorder {
4465     task_t m_task;
4466     void* m_context;
4467     unsigned m_typeMask;
4468     vm_range_recorder_t* m_recorder;
4469     const RemoteMemoryReader& m_reader;
4470
4471     Vector<vm_range_t, 1024> m_pendingRegions;
4472
4473 public:
4474     AdminRegionRecorder(task_t task, void* context, unsigned typeMask, vm_range_recorder_t* recorder, const RemoteMemoryReader& reader)
4475         : m_task(task)
4476         , m_context(context)
4477         , m_typeMask(typeMask)
4478         , m_recorder(recorder)
4479         , m_reader(reader)
4480     { }
4481
4482     void recordRegion(vm_address_t ptr, size_t size)
4483     {
4484         if (m_typeMask & MALLOC_ADMIN_REGION_RANGE_TYPE)
4485             m_pendingRegions.append((vm_range_t){ ptr, size });
4486     }
4487
4488     void visit(void *ptr, size_t size)
4489     {
4490         recordRegion(reinterpret_cast<vm_address_t>(ptr), size);
4491     }
4492
4493     void recordPendingRegions()
4494     {
4495         if (m_pendingRegions.size()) {
4496             (*m_recorder)(m_task, m_context, MALLOC_ADMIN_REGION_RANGE_TYPE, m_pendingRegions.data(), m_pendingRegions.size());
4497             m_pendingRegions.clear();
4498         }
4499     }
4500
4501     ~AdminRegionRecorder()
4502     {
4503         ASSERT(!m_pendingRegions.size());
4504     }
4505 };
4506
4507 kern_return_t FastMallocZone::enumerate(task_t task, void* context, unsigned typeMask, vm_address_t zoneAddress, memory_reader_t reader, vm_range_recorder_t recorder)
4508 {
4509     RemoteMemoryReader memoryReader(task, reader);
4510
4511     InitSizeClasses();
4512
4513     FastMallocZone* mzone = memoryReader(reinterpret_cast<FastMallocZone*>(zoneAddress));
4514     TCMalloc_PageHeap* pageHeap = memoryReader(mzone->m_pageHeap);
4515     TCMalloc_ThreadCache** threadHeapsPointer = memoryReader(mzone->m_threadHeaps);
4516     TCMalloc_ThreadCache* threadHeaps = memoryReader(*threadHeapsPointer);
4517
4518     TCMalloc_Central_FreeListPadded* centralCaches = memoryReader(mzone->m_centralCaches, sizeof(TCMalloc_Central_FreeListPadded) * kNumClasses);
4519
4520     FreeObjectFinder finder(memoryReader);
4521     finder.findFreeObjects(threadHeaps);
4522     finder.findFreeObjects(centralCaches, kNumClasses, mzone->m_centralCaches);
4523
4524     TCMalloc_PageHeap::PageMap* pageMap = &pageHeap->pagemap_;
4525     PageMapFreeObjectFinder pageMapFinder(memoryReader, finder);
4526     pageMap->visitValues(pageMapFinder, memoryReader);
4527
4528     PageMapMemoryUsageRecorder usageRecorder(task, context, typeMask, recorder, memoryReader, finder);
4529     pageMap->visitValues(usageRecorder, memoryReader);
4530     usageRecorder.recordPendingRegions();
4531
4532     AdminRegionRecorder adminRegionRecorder(task, context, typeMask, recorder, memoryReader);
4533     pageMap->visitAllocations(adminRegionRecorder, memoryReader);
4534
4535     PageHeapAllocator<Span>* spanAllocator = memoryReader(mzone->m_spanAllocator);
4536     PageHeapAllocator<TCMalloc_ThreadCache>* pageHeapAllocator = memoryReader(mzone->m_pageHeapAllocator);
4537
4538     spanAllocator->recordAdministrativeRegions(adminRegionRecorder, memoryReader);
4539     pageHeapAllocator->recordAdministrativeRegions(adminRegionRecorder, memoryReader);
4540
4541     adminRegionRecorder.recordPendingRegions();
4542
4543     return 0;
4544 }
4545
4546 size_t FastMallocZone::size(malloc_zone_t*, const void*)
4547 {
4548     return 0;
4549 }
4550
4551 void* FastMallocZone::zoneMalloc(malloc_zone_t*, size_t)
4552 {
4553     return 0;
4554 }
4555
4556 void* FastMallocZone::zoneCalloc(malloc_zone_t*, size_t, size_t)
4557 {
4558     return 0;
4559 }
4560
4561 void FastMallocZone::zoneFree(malloc_zone_t*, void* ptr)
4562 {
4563     // Due to <rdar://problem/5671357> zoneFree may be called by the system free even if the pointer
4564     // is not in this zone.  When this happens, the pointer being freed was not allocated by any
4565     // zone so we need to print a useful error for the application developer.
4566     malloc_printf("*** error for object %p: pointer being freed was not allocated\n", ptr);
4567 }
4568
4569 void* FastMallocZone::zoneRealloc(malloc_zone_t*, void*, size_t)
4570 {
4571     return 0;
4572 }
4573
4574
4575 #undef malloc
4576 #undef free
4577 #undef realloc
4578 #undef calloc
4579
4580 extern "C" {
4581 malloc_introspection_t jscore_fastmalloc_introspection = { &FastMallocZone::enumerate, &FastMallocZone::goodSize, &FastMallocZone::check, &FastMallocZone::print,
4582     &FastMallocZone::log, &FastMallocZone::forceLock, &FastMallocZone::forceUnlock, &FastMallocZone::statistics
4583
4584 #if !defined(BUILDING_ON_TIGER) && !defined(BUILDING_ON_LEOPARD)
4585     , 0 // zone_locked will not be called on the zone unless it advertises itself as version five or higher.
4586 #endif
4587 #if !defined(BUILDING_ON_TIGER) && !defined(BUILDING_ON_LEOPARD) && !defined(BUILDING_ON_SNOW_LEOPARD)
4588     , 0, 0, 0, 0 // These members will not be used unless the zone advertises itself as version seven or higher.
4589 #endif
4590
4591     };
4592 }
4593
4594 FastMallocZone::FastMallocZone(TCMalloc_PageHeap* pageHeap, TCMalloc_ThreadCache** threadHeaps, TCMalloc_Central_FreeListPadded* centralCaches, PageHeapAllocator<Span>* spanAllocator, PageHeapAllocator<TCMalloc_ThreadCache>* pageHeapAllocator)
4595     : m_pageHeap(pageHeap)
4596     , m_threadHeaps(threadHeaps)
4597     , m_centralCaches(centralCaches)
4598     , m_spanAllocator(spanAllocator)
4599     , m_pageHeapAllocator(pageHeapAllocator)
4600 {
4601     memset(&m_zone, 0, sizeof(m_zone));
4602     m_zone.version = 4;
4603     m_zone.zone_name = "JavaScriptCore FastMalloc";
4604     m_zone.size = &FastMallocZone::size;
4605     m_zone.malloc = &FastMallocZone::zoneMalloc;
4606     m_zone.calloc = &FastMallocZone::zoneCalloc;
4607     m_zone.realloc = &FastMallocZone::zoneRealloc;
4608     m_zone.free = &FastMallocZone::zoneFree;
4609     m_zone.valloc = &FastMallocZone::zoneValloc;
4610     m_zone.destroy = &FastMallocZone::zoneDestroy;
4611     m_zone.introspect = &jscore_fastmalloc_introspection;
4612     malloc_zone_register(&m_zone);
4613 }
4614
4615
4616 void FastMallocZone::init()
4617 {
4618     static FastMallocZone zone(pageheap, &thread_heaps, static_cast<TCMalloc_Central_FreeListPadded*>(central_cache), &span_allocator, &threadheap_allocator);
4619 }
4620
4621 #endif // OS(DARWIN)
4622
4623 } // namespace WTF
4624 #endif // WTF_CHANGES
4625
4626 #endif // FORCE_SYSTEM_MALLOC