runtime/atomic.h

   1 /*
   2  * Copyright (C) 2008 The Android Open Source Project
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #ifndef ART_RUNTIME_ATOMIC_H_
  18 #define ART_RUNTIME_ATOMIC_H_
  19
  20 #ifdef __clang__
  21 #define ART_HAVE_STDATOMIC 1
  22 #endif
  23
  24 #include <stdint.h>
  25 #if ART_HAVE_STDATOMIC
  26 #include <atomic>
  27 #endif
  28 #include <limits>
  29 #include <vector>
  30
  31 #include "base/logging.h"
  32 #include "base/macros.h"
  33
  34 namespace art {
  35
  36 class Mutex;
  37
  38 // QuasiAtomic encapsulates two separate facilities that we are
  39 // trying to move away from:  "quasiatomic" 64 bit operations
  40 // and custom memory fences.  For the time being, they remain
  41 // exposed.  Clients should be converted to use either class Atomic
  42 // below whenever possible, and should eventually use C++11 atomics.
  43 // The two facilities that do not have a good C++11 analog are
  44 // ThreadFenceForConstructor and Atomic::*JavaData.
  45 //
  46 // NOTE: Two "quasiatomic" operations on the exact same memory address
  47 // are guaranteed to operate atomically with respect to each other,
  48 // but no guarantees are made about quasiatomic operations mixed with
  49 // non-quasiatomic operations on the same address, nor about
  50 // quasiatomic operations that are performed on partially-overlapping
  51 // memory.
  52 class QuasiAtomic {
  53 #if defined(__mips__) && !defined(__LP64__)
  54   static constexpr bool kNeedSwapMutexes = true;
  55 #else
  56   static constexpr bool kNeedSwapMutexes = false;
  57 #endif
  58
  59  public:
  60   static void Startup();
  61
  62   static void Shutdown();
  63
  64   // Reads the 64-bit value at "addr" without tearing.
  65   static int64_t Read64(volatile const int64_t* addr) {
  66     if (!kNeedSwapMutexes) {
  67       int64_t value;
  68 #if defined(__LP64__)
  69       value = *addr;
  70 #else
  71 #if defined(__arm__)
  72 #if defined(__ARM_FEATURE_LPAE)
  73       // With LPAE support (such as Cortex-A15) then ldrd is defined not to tear.
  74       __asm__ __volatile__("@ QuasiAtomic::Read64\n"
  75         "ldrd     %0, %H0, %1"
  76         : "=r" (value)
  77         : "m" (*addr));
  78 #else
  79       // Exclusive loads are defined not to tear, clearing the exclusive state isn't necessary.
  80       __asm__ __volatile__("@ QuasiAtomic::Read64\n"
  81         "ldrexd     %0, %H0, %1"
  82         : "=r" (value)
  83         : "Q" (*addr));
  84 #endif
  85 #elif defined(__i386__)
  86   __asm__ __volatile__(
  87       "movq     %1, %0\n"
  88       : "=x" (value)
  89       : "m" (*addr));
  90 #else
  91       LOG(FATAL) << "Unsupported architecture";
  92 #endif
  93 #endif  // defined(__LP64__)
  94       return value;
  95     } else {
  96       return SwapMutexRead64(addr);
  97     }
  98   }
  99
 100   // Writes to the 64-bit value at "addr" without tearing.
 101   static void Write64(volatile int64_t* addr, int64_t value) {
 102     if (!kNeedSwapMutexes) {
 103 #if defined(__LP64__)
 104       *addr = value;
 105 #else
 106 #if defined(__arm__)
 107 #if defined(__ARM_FEATURE_LPAE)
 108     // If we know that ARM architecture has LPAE (such as Cortex-A15) strd is defined not to tear.
 109     __asm__ __volatile__("@ QuasiAtomic::Write64\n"
 110       "strd     %1, %H1, %0"
 111       : "=m"(*addr)
 112       : "r" (value));
 113 #else
 114     // The write is done as a swap so that the cache-line is in the exclusive state for the store.
 115     int64_t prev;
 116     int status;
 117     do {
 118       __asm__ __volatile__("@ QuasiAtomic::Write64\n"
 119         "ldrexd     %0, %H0, %2\n"
 120         "strexd     %1, %3, %H3, %2"
 121         : "=&r" (prev), "=&r" (status), "+Q"(*addr)
 122         : "r" (value)
 123         : "cc");
 124       } while (UNLIKELY(status != 0));
 125 #endif
 126 #elif defined(__i386__)
 127       __asm__ __volatile__(
 128         "movq     %1, %0"
 129         : "=m" (*addr)
 130         : "x" (value));
 131 #else
 132       LOG(FATAL) << "Unsupported architecture";
 133 #endif
 134 #endif  // defined(__LP64__)
 135     } else {
 136       SwapMutexWrite64(addr, value);
 137     }
 138   }
 139
 140   // Atomically compare the value at "addr" to "old_value", if equal replace it with "new_value"
 141   // and return true. Otherwise, don't swap, and return false.
 142   // This is fully ordered, i.e. it has C++11 memory_order_seq_cst
 143   // semantics (assuming all other accesses use a mutex if this one does).
 144   // This has "strong" semantics; if it fails then it is guaranteed that
 145   // at some point during the execution of Cas64, *addr was not equal to
 146   // old_value.
 147   static bool Cas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) {
 148     if (!kNeedSwapMutexes) {
 149       return __sync_bool_compare_and_swap(addr, old_value, new_value);
 150     } else {
 151       return SwapMutexCas64(old_value, new_value, addr);
 152     }
 153   }
 154
 155   // Does the architecture provide reasonable atomic long operations or do we fall back on mutexes?
 156   static bool LongAtomicsUseMutexes() {
 157     return kNeedSwapMutexes;
 158   }
 159
 160   #if ART_HAVE_STDATOMIC
 161
 162   static void ThreadFenceAcquire() {
 163     std::atomic_thread_fence(std::memory_order_acquire);
 164   }
 165
 166   static void ThreadFenceRelease() {
 167     std::atomic_thread_fence(std::memory_order_release);
 168   }
 169
 170   static void ThreadFenceForConstructor() {
 171     #if defined(__aarch64__)
 172       __asm__ __volatile__("dmb ishst" : : : "memory");
 173     #else
 174       std::atomic_thread_fence(std::memory_order_release);
 175     #endif
 176   }
 177
 178   static void ThreadFenceSequentiallyConsistent() {
 179     std::atomic_thread_fence(std::memory_order_seq_cst);
 180   }
 181
 182   #else
 183
 184   static void ThreadFenceAcquire() {
 185   #if defined(__arm__) || defined(__aarch64__)
 186     __asm__ __volatile__("dmb ish" : : : "memory");
 187     // Could possibly use dmb ishld on aarch64
 188     // But currently we also use this on volatile loads
 189     // to enforce store atomicity.  Ishld is
 190     // insufficient for that purpose.
 191   #elif defined(__i386__) || defined(__x86_64__)
 192     __asm__ __volatile__("" : : : "memory");
 193   #elif defined(__mips__)
 194     __asm__ __volatile__("sync" : : : "memory");
 195   #else
 196   #error Unexpected architecture
 197   #endif
 198   }
 199
 200   static void ThreadFenceRelease() {
 201   #if defined(__arm__) || defined(__aarch64__)
 202     __asm__ __volatile__("dmb ish" : : : "memory");
 203     // ishst doesn't order load followed by store.
 204   #elif defined(__i386__) || defined(__x86_64__)
 205     __asm__ __volatile__("" : : : "memory");
 206   #elif defined(__mips__)
 207     __asm__ __volatile__("sync" : : : "memory");
 208   #else
 209   #error Unexpected architecture
 210   #endif
 211   }
 212
 213   // Fence at the end of a constructor with final fields
 214   // or allocation.  We believe this
 215   // only has to order stores, and can thus be weaker than
 216   // release on aarch64.
 217   static void ThreadFenceForConstructor() {
 218   #if defined(__arm__) || defined(__aarch64__)
 219     __asm__ __volatile__("dmb ishst" : : : "memory");
 220   #elif defined(__i386__) || defined(__x86_64__)
 221     __asm__ __volatile__("" : : : "memory");
 222   #elif defined(__mips__)
 223     __asm__ __volatile__("sync" : : : "memory");
 224   #else
 225   #error Unexpected architecture
 226   #endif
 227   }
 228
 229   static void ThreadFenceSequentiallyConsistent() {
 230   #if defined(__arm__) || defined(__aarch64__)
 231     __asm__ __volatile__("dmb ish" : : : "memory");
 232   #elif defined(__i386__) || defined(__x86_64__)
 233     __asm__ __volatile__("mfence" : : : "memory");
 234   #elif defined(__mips__)
 235     __asm__ __volatile__("sync" : : : "memory");
 236   #else
 237   #error Unexpected architecture
 238   #endif
 239   }
 240   #endif
 241
 242  private:
 243   static Mutex* GetSwapMutex(const volatile int64_t* addr);
 244   static int64_t SwapMutexRead64(volatile const int64_t* addr);
 245   static void SwapMutexWrite64(volatile int64_t* addr, int64_t val);
 246   static bool SwapMutexCas64(int64_t old_value, int64_t new_value, volatile int64_t* addr);
 247
 248   // We stripe across a bunch of different mutexes to reduce contention.
 249   static constexpr size_t kSwapMutexCount = 32;
 250   static std::vector<Mutex*>* gSwapMutexes;
 251
 252   DISALLOW_COPY_AND_ASSIGN(QuasiAtomic);
 253 };
 254
 255 #if ART_HAVE_STDATOMIC
 256 template<typename T>
 257 class Atomic : public std::atomic<T> {
 258  public:
 259   Atomic<T>() : std::atomic<T>() { }
 260
 261   explicit Atomic<T>(T value) : std::atomic<T>(value) { }
 262
 263   // Load from memory without ordering or synchronization constraints.
 264   T LoadRelaxed() const {
 265     return this->load(std::memory_order_relaxed);
 266   }
 267
 268   // Word tearing allowed, but may race.
 269   // TODO: Optimize?
 270   // There has been some discussion of eventually disallowing word
 271   // tearing for Java data loads.
 272   T LoadJavaData() const {
 273     return this->load(std::memory_order_relaxed);
 274   }
 275
 276   // Load from memory with a total ordering.
 277   // Corresponds exactly to a Java volatile load.
 278   T LoadSequentiallyConsistent() const {
 279     return this->load(std::memory_order_seq_cst);
 280   }
 281
 282   // Store to memory without ordering or synchronization constraints.
 283   void StoreRelaxed(T desired) {
 284     this->store(desired, std::memory_order_relaxed);
 285   }
 286
 287   // Word tearing allowed, but may race.
 288   void StoreJavaData(T desired) {
 289     this->store(desired, std::memory_order_relaxed);
 290   }
 291
 292   // Store to memory with release ordering.
 293   void StoreRelease(T desired) {
 294     this->store(desired, std::memory_order_release);
 295   }
 296
 297   // Store to memory with a total ordering.
 298   void StoreSequentiallyConsistent(T desired) {
 299     this->store(desired, std::memory_order_seq_cst);
 300   }
 301
 302   // Atomically replace the value with desired value if it matches the expected value.
 303   // Participates in total ordering of atomic operations.
 304   bool CompareExchangeStrongSequentiallyConsistent(T expected_value, T desired_value) {
 305     return this->compare_exchange_strong(expected_value, desired_value, std::memory_order_seq_cst);
 306   }
 307
 308   // The same, except it may fail spuriously.
 309   bool CompareExchangeWeakSequentiallyConsistent(T expected_value, T desired_value) {
 310     return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_seq_cst);
 311   }
 312
 313   // Atomically replace the value with desired value if it matches the expected value. Doesn't
 314   // imply ordering or synchronization constraints.
 315   bool CompareExchangeStrongRelaxed(T expected_value, T desired_value) {
 316     return this->compare_exchange_strong(expected_value, desired_value, std::memory_order_relaxed);
 317   }
 318
 319   // The same, except it may fail spuriously.
 320   bool CompareExchangeWeakRelaxed(T expected_value, T desired_value) {
 321     return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_relaxed);
 322   }
 323
 324   // Atomically replace the value with desired value if it matches the expected value. Prior writes
 325   // made to other memory locations by the thread that did the release become visible in this
 326   // thread.
 327   bool CompareExchangeWeakAcquire(T expected_value, T desired_value) {
 328     return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_acquire);
 329   }
 330
 331   // Atomically replace the value with desired value if it matches the expected value. prior writes
 332   // to other memory locations become visible to the threads that do a consume or an acquire on the
 333   // same location.
 334   bool CompareExchangeWeakRelease(T expected_value, T desired_value) {
 335     return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_release);
 336   }
 337
 338   T FetchAndAddSequentiallyConsistent(const T value) {
 339     return this->fetch_add(value, std::memory_order_seq_cst);  // Return old_value.
 340   }
 341
 342   T FetchAndSubSequentiallyConsistent(const T value) {
 343     return this->fetch_sub(value, std::memory_order_seq_cst);  // Return old value.
 344   }
 345
 346   T FetchAndOrSequentiallyConsistent(const T value) {
 347     return this->fetch_or(value, std::memory_order_seq_cst);  // Return old_value.
 348   }
 349
 350   T FetchAndAndSequentiallyConsistent(const T value) {
 351     return this->fetch_and(value, std::memory_order_seq_cst);  // Return old_value.
 352   }
 353
 354   volatile T* Address() {
 355     return reinterpret_cast<T*>(this);
 356   }
 357
 358   static T MaxValue() {
 359     return std::numeric_limits<T>::max();
 360   }
 361 };
 362
 363 #else
 364
 365 template<typename T> class Atomic;
 366
 367 // Helper class for Atomic to deal separately with size 8 and small
 368 // objects.  Should not be used directly.
 369
 370 template<int SZ, class T> struct AtomicHelper {
 371   friend class Atomic<T>;
 372
 373  private:
 374   COMPILE_ASSERT(sizeof(T) <= 4, bad_atomic_helper_arg);
 375
 376   static T LoadRelaxed(const volatile T* loc) {
 377     // sizeof(T) <= 4
 378     return *loc;
 379   }
 380
 381   static void StoreRelaxed(volatile T* loc, T desired) {
 382     // sizeof(T) <= 4
 383     *loc = desired;
 384   }
 385
 386   static bool CompareExchangeStrongSequentiallyConsistent(volatile T* loc,
 387                                                   T expected_value, T desired_value) {
 388     // sizeof(T) <= 4
 389     return __sync_bool_compare_and_swap(loc, expected_value, desired_value);
 390   }
 391 };
 392
 393 // Interpret the bit pattern of input (type U) as type V. Requires the size
 394 // of V >= size of U (compile-time checked).
 395 // Reproduced here from utils.h to keep dependencies small.
 396 template<typename U, typename V>
 397 static inline V bit_cast_atomic(U in) {
 398   COMPILE_ASSERT(sizeof(U) == sizeof(V), size_of_u_not_eq_size_of_v);
 399   union {
 400     U u;
 401     V v;
 402   } tmp;
 403   tmp.u = in;
 404   return tmp.v;
 405 }
 406
 407 template<class T> struct AtomicHelper<8, T> {
 408   friend class Atomic<T>;
 409
 410  private:
 411   COMPILE_ASSERT(sizeof(T) == 8, bad_large_atomic_helper_arg);
 412
 413   static T LoadRelaxed(const volatile T* loc) {
 414     // sizeof(T) == 8
 415     volatile const int64_t* loc_ptr =
 416               reinterpret_cast<volatile const int64_t*>(loc);
 417     return bit_cast_atomic<int64_t, T>(QuasiAtomic::Read64(loc_ptr));
 418   }
 419
 420   static void StoreRelaxed(volatile T* loc, T desired) {
 421     // sizeof(T) == 8
 422     volatile int64_t* loc_ptr =
 423                 reinterpret_cast<volatile int64_t*>(loc);
 424     QuasiAtomic::Write64(loc_ptr, bit_cast_atomic<T, int64_t>(desired));
 425   }
 426
 427
 428   static bool CompareExchangeStrongSequentiallyConsistent(volatile T* loc,
 429                                                   T expected_value, T desired_value) {
 430     // sizeof(T) == 8
 431     volatile int64_t* loc_ptr = reinterpret_cast<volatile int64_t*>(loc);
 432     return QuasiAtomic::Cas64(bit_cast_atomic<T, int64_t>(expected_value),
 433                               bit_cast_atomic<T, int64_t>(desired_value),
 434                               loc_ptr);
 435   }
 436 };
 437
 438 template<typename T>
 439 class PACKED(sizeof(T)) Atomic {
 440  private:
 441   COMPILE_ASSERT(sizeof(T) <= 4 || sizeof(T) == 8, bad_atomic_arg);
 442
 443  public:
 444   Atomic<T>() : value_(0) { }
 445
 446   explicit Atomic<T>(T value) : value_(value) { }
 447
 448   // Load from memory without ordering or synchronization constraints.
 449   T LoadRelaxed() const {
 450     return AtomicHelper<sizeof(T), T>::LoadRelaxed(&value_);
 451   }
 452
 453   // Word tearing allowed, but may race.
 454   T LoadJavaData() const {
 455     return value_;
 456   }
 457
 458   // Load from memory with a total ordering.
 459   T LoadSequentiallyConsistent() const;
 460
 461   // Store to memory without ordering or synchronization constraints.
 462   void StoreRelaxed(T desired) {
 463     AtomicHelper<sizeof(T), T>::StoreRelaxed(&value_, desired);
 464   }
 465
 466   // Word tearing allowed, but may race.
 467   void StoreJavaData(T desired) {
 468     value_ = desired;
 469   }
 470
 471   // Store to memory with release ordering.
 472   void StoreRelease(T desired);
 473
 474   // Store to memory with a total ordering.
 475   void StoreSequentiallyConsistent(T desired);
 476
 477   // Atomically replace the value with desired value if it matches the expected value.
 478   // Participates in total ordering of atomic operations.
 479   bool CompareExchangeStrongSequentiallyConsistent(T expected_value, T desired_value) {
 480     return AtomicHelper<sizeof(T), T>::
 481         CompareExchangeStrongSequentiallyConsistent(&value_, expected_value, desired_value);
 482   }
 483
 484   // The same, but may fail spuriously.
 485   bool CompareExchangeWeakSequentiallyConsistent(T expected_value, T desired_value) {
 486     // TODO: Take advantage of the fact that it may fail spuriously.
 487     return AtomicHelper<sizeof(T), T>::
 488         CompareExchangeStrongSequentiallyConsistent(&value_, expected_value, desired_value);
 489   }
 490
 491   // Atomically replace the value with desired value if it matches the expected value. Doesn't
 492   // imply ordering or synchronization constraints.
 493   bool CompareExchangeStrongRelaxed(T expected_value, T desired_value) {
 494     // TODO: make this relaxed.
 495     return CompareExchangeStrongSequentiallyConsistent(expected_value, desired_value);
 496   }
 497
 498   // The same, but may fail spuriously.
 499   bool CompareExchangeWeakRelaxed(T expected_value, T desired_value) {
 500     // TODO: Take advantage of the fact that it may fail spuriously.
 501     // TODO: make this relaxed.
 502     return CompareExchangeStrongSequentiallyConsistent(expected_value, desired_value);
 503   }
 504
 505   // Atomically replace the value with desired value if it matches the expected value. Prior accesses
 506   // made to other memory locations by the thread that did the release become visible in this
 507   // thread.
 508   bool CompareExchangeWeakAcquire(T expected_value, T desired_value) {
 509     // TODO: make this acquire.
 510     return CompareExchangeWeakSequentiallyConsistent(expected_value, desired_value);
 511   }
 512
 513   // Atomically replace the value with desired value if it matches the expected value. Prior accesses
 514   // to other memory locations become visible to the threads that do a consume or an acquire on the
 515   // same location.
 516   bool CompareExchangeWeakRelease(T expected_value, T desired_value) {
 517     // TODO: make this release.
 518     return CompareExchangeWeakSequentiallyConsistent(expected_value, desired_value);
 519   }
 520
 521   volatile T* Address() {
 522     return &value_;
 523   }
 524
 525   T FetchAndAddSequentiallyConsistent(const T value) {
 526     if (sizeof(T) <= 4) {
 527       return __sync_fetch_and_add(&value_, value);  // Return old value.
 528     } else {
 529       T expected;
 530       do {
 531         expected = LoadRelaxed();
 532       } while (!CompareExchangeWeakSequentiallyConsistent(expected, expected + value));
 533       return expected;
 534     }
 535   }
 536
 537   T FetchAndSubSequentiallyConsistent(const T value) {
 538     if (sizeof(T) <= 4) {
 539       return __sync_fetch_and_sub(&value_, value);  // Return old value.
 540     } else {
 541       return FetchAndAddSequentiallyConsistent(-value);
 542     }
 543   }
 544
 545   T FetchAndOrSequentiallyConsistent(const T value) {
 546     if (sizeof(T) <= 4) {
 547       return __sync_fetch_and_or(&value_, value);  // Return old value.
 548     } else {
 549       T expected;
 550       do {
 551         expected = LoadRelaxed();
 552       } while (!CompareExchangeWeakSequentiallyConsistent(expected, expected | value));
 553       return expected;
 554     }
 555   }
 556
 557   T FetchAndAndSequentiallyConsistent(const T value) {
 558     if (sizeof(T) <= 4) {
 559       return __sync_fetch_and_and(&value_, value);  // Return old value.
 560     } else {
 561       T expected;
 562       do {
 563         expected = LoadRelaxed();
 564       } while (!CompareExchangeWeakSequentiallyConsistent(expected, expected & value));
 565       return expected;
 566     }
 567   }
 568
 569   T operator++() {  // Prefix operator.
 570     if (sizeof(T) <= 4) {
 571       return __sync_add_and_fetch(&value_, 1);  // Return new value.
 572     } else {
 573       return FetchAndAddSequentiallyConsistent(1) + 1;
 574     }
 575   }
 576
 577   T operator++(int) {  // Postfix operator.
 578     return FetchAndAddSequentiallyConsistent(1);
 579   }
 580
 581   T operator--() {  // Prefix operator.
 582     if (sizeof(T) <= 4) {
 583       return __sync_sub_and_fetch(&value_, 1);  // Return new value.
 584     } else {
 585       return FetchAndSubSequentiallyConsistent(1) - 1;
 586     }
 587   }
 588
 589   T operator--(int) {  // Postfix operator.
 590     return FetchAndSubSequentiallyConsistent(1);
 591   }
 592
 593   static T MaxValue() {
 594     return std::numeric_limits<T>::max();
 595   }
 596
 597
 598  private:
 599   volatile T value_;
 600 };
 601 #endif
 602
 603 typedef Atomic<int32_t> AtomicInteger;
 604
 605 COMPILE_ASSERT(sizeof(AtomicInteger) == sizeof(int32_t), weird_atomic_int_size);
 606 COMPILE_ASSERT(alignof(AtomicInteger) == alignof(int32_t),
 607                atomic_int_alignment_differs_from_that_of_underlying_type);
 608 COMPILE_ASSERT(sizeof(Atomic<int64_t>) == sizeof(int64_t), weird_atomic_int64_size);
 609 #if defined(__LP64__)
 610   COMPILE_ASSERT(alignof(Atomic<int64_t>) == alignof(int64_t),
 611                  atomic_int64_alignment_differs_from_that_of_underlying_type);
 612 #endif
 613 // The above fails on x86-32.
 614 // This is OK, since we explicitly arrange for alignment of 8-byte fields.
 615
 616
 617 #if !ART_HAVE_STDATOMIC
 618 template<typename T>
 619 inline T Atomic<T>::LoadSequentiallyConsistent() const {
 620   T result = value_;
 621   if (sizeof(T) != 8 || !QuasiAtomic::LongAtomicsUseMutexes()) {
 622     QuasiAtomic::ThreadFenceAcquire();
 623     // We optimistically assume this suffices for store atomicity.
 624     // On ARMv8 we strengthen ThreadFenceAcquire to make that true.
 625   }
 626   return result;
 627 }
 628
 629 template<typename T>
 630 inline void Atomic<T>::StoreRelease(T desired) {
 631   if (sizeof(T) != 8 || !QuasiAtomic::LongAtomicsUseMutexes()) {
 632     QuasiAtomic::ThreadFenceRelease();
 633   }
 634   StoreRelaxed(desired);
 635 }
 636
 637 template<typename T>
 638 inline void Atomic<T>::StoreSequentiallyConsistent(T desired) {
 639   if (sizeof(T) != 8 || !QuasiAtomic::LongAtomicsUseMutexes()) {
 640     QuasiAtomic::ThreadFenceRelease();
 641   }
 642   StoreRelaxed(desired);
 643   if (sizeof(T) != 8 || !QuasiAtomic::LongAtomicsUseMutexes()) {
 644     QuasiAtomic::ThreadFenceSequentiallyConsistent();
 645   }
 646 }
 647
 648 #endif
 649
 650 }  // namespace art
 651
 652 #endif  // ART_RUNTIME_ATOMIC_H_