runtime/atomic.h

   1 /*
   2  * Copyright (C) 2008 The Android Open Source Project
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #ifndef ART_RUNTIME_ATOMIC_H_
  18 #define ART_RUNTIME_ATOMIC_H_
  19
  20 #ifdef __clang__
  21 #define ART_HAVE_STDATOMIC 1
  22 #endif
  23
  24 #include <stdint.h>
  25 #if ART_HAVE_STDATOMIC
  26 #include <atomic>
  27 #endif
  28 #include <limits>
  29 #include <vector>
  30
  31 #include "base/logging.h"
  32 #include "base/macros.h"
  33
  34 namespace art {
  35
  36 class Mutex;
  37
  38 // QuasiAtomic encapsulates two separate facilities that we are
  39 // trying to move away from:  "quasiatomic" 64 bit operations
  40 // and custom memory fences.  For the time being, they remain
  41 // exposed.  Clients should be converted to use either class Atomic
  42 // below whenever possible, and should eventually use C++11 atomics.
  43 // The two facilities that do not have a good C++11 analog are
  44 // ThreadFenceForConstructor and Atomic::*JavaData.
  45 //
  46 // NOTE: Two "quasiatomic" operations on the exact same memory address
  47 // are guaranteed to operate atomically with respect to each other,
  48 // but no guarantees are made about quasiatomic operations mixed with
  49 // non-quasiatomic operations on the same address, nor about
  50 // quasiatomic operations that are performed on partially-overlapping
  51 // memory.
  52 class QuasiAtomic {
  53 #if defined(__mips__) && !defined(__LP64__)
  54   static constexpr bool kNeedSwapMutexes = true;
  55 #else
  56   static constexpr bool kNeedSwapMutexes = false;
  57 #endif
  58
  59  public:
  60   static void Startup();
  61
  62   static void Shutdown();
  63
  64   // Reads the 64-bit value at "addr" without tearing.
  65   static int64_t Read64(volatile const int64_t* addr) {
  66     if (!kNeedSwapMutexes) {
  67       int64_t value;
  68 #if defined(__LP64__)
  69       value = *addr;
  70 #else
  71 #if defined(__arm__)
  72 #if defined(__ARM_FEATURE_LPAE)
  73       // With LPAE support (such as Cortex-A15) then ldrd is defined not to tear.
  74       __asm__ __volatile__("@ QuasiAtomic::Read64\n"
  75         "ldrd     %0, %H0, %1"
  76         : "=r" (value)
  77         : "m" (*addr));
  78 #else
  79       // Exclusive loads are defined not to tear, clearing the exclusive state isn't necessary.
  80       __asm__ __volatile__("@ QuasiAtomic::Read64\n"
  81         "ldrexd     %0, %H0, %1"
  82         : "=r" (value)
  83         : "Q" (*addr));
  84 #endif
  85 #elif defined(__i386__)
  86   __asm__ __volatile__(
  87       "movq     %1, %0\n"
  88       : "=x" (value)
  89       : "m" (*addr));
  90 #else
  91       LOG(FATAL) << "Unsupported architecture";
  92 #endif
  93 #endif  // defined(__LP64__)
  94       return value;
  95     } else {
  96       return SwapMutexRead64(addr);
  97     }
  98   }
  99
 100   // Writes to the 64-bit value at "addr" without tearing.
 101   static void Write64(volatile int64_t* addr, int64_t value) {
 102     if (!kNeedSwapMutexes) {
 103 #if defined(__LP64__)
 104       *addr = value;
 105 #else
 106 #if defined(__arm__)
 107 #if defined(__ARM_FEATURE_LPAE)
 108     // If we know that ARM architecture has LPAE (such as Cortex-A15) strd is defined not to tear.
 109     __asm__ __volatile__("@ QuasiAtomic::Write64\n"
 110       "strd     %1, %H1, %0"
 111       : "=m"(*addr)
 112       : "r" (value));
 113 #else
 114     // The write is done as a swap so that the cache-line is in the exclusive state for the store.
 115     int64_t prev;
 116     int status;
 117     do {
 118       __asm__ __volatile__("@ QuasiAtomic::Write64\n"
 119         "ldrexd     %0, %H0, %2\n"
 120         "strexd     %1, %3, %H3, %2"
 121         : "=&r" (prev), "=&r" (status), "+Q"(*addr)
 122         : "r" (value)
 123         : "cc");
 124       } while (UNLIKELY(status != 0));
 125 #endif
 126 #elif defined(__i386__)
 127       __asm__ __volatile__(
 128         "movq     %1, %0"
 129         : "=m" (*addr)
 130         : "x" (value));
 131 #else
 132       LOG(FATAL) << "Unsupported architecture";
 133 #endif
 134 #endif  // defined(__LP64__)
 135     } else {
 136       SwapMutexWrite64(addr, value);
 137     }
 138   }
 139
 140   // Atomically compare the value at "addr" to "old_value", if equal replace it with "new_value"
 141   // and return true. Otherwise, don't swap, and return false.
 142   // This is fully ordered, i.e. it has C++11 memory_order_seq_cst
 143   // semantics (assuming all other accesses use a mutex if this one does).
 144   // This has "strong" semantics; if it fails then it is guaranteed that
 145   // at some point during the execution of Cas64, *addr was not equal to
 146   // old_value.
 147   static bool Cas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) {
 148     if (!kNeedSwapMutexes) {
 149       return __sync_bool_compare_and_swap(addr, old_value, new_value);
 150     } else {
 151       return SwapMutexCas64(old_value, new_value, addr);
 152     }
 153   }
 154
 155   // Does the architecture provide reasonable atomic long operations or do we fall back on mutexes?
 156   static bool LongAtomicsUseMutexes() {
 157     return kNeedSwapMutexes;
 158   }
 159
 160   #if ART_HAVE_STDATOMIC
 161
 162   static void ThreadFenceAcquire() {
 163     std::atomic_thread_fence(std::memory_order_acquire);
 164   }
 165
 166   static void ThreadFenceRelease() {
 167     std::atomic_thread_fence(std::memory_order_release);
 168   }
 169
 170   static void ThreadFenceForConstructor() {
 171     #if defined(__aarch64__)
 172       __asm__ __volatile__("dmb ishst" : : : "memory");
 173     #else
 174       std::atomic_thread_fence(std::memory_order_release);
 175     #endif
 176   }
 177
 178   static void ThreadFenceSequentiallyConsistent() {
 179     std::atomic_thread_fence(std::memory_order_seq_cst);
 180   }
 181
 182   #else
 183
 184   static void ThreadFenceAcquire() {
 185   #if defined(__arm__) || defined(__aarch64__)
 186     __asm__ __volatile__("dmb ish" : : : "memory");
 187     // Could possibly use dmb ishld on aarch64
 188     // But currently we also use this on volatile loads
 189     // to enforce store atomicity.  Ishld is
 190     // insufficient for that purpose.
 191   #elif defined(__i386__) || defined(__x86_64__)
 192     __asm__ __volatile__("" : : : "memory");
 193   #elif defined(__mips__)
 194     __asm__ __volatile__("sync" : : : "memory");
 195   #else
 196   #error Unexpected architecture
 197   #endif
 198   }
 199
 200   static void ThreadFenceRelease() {
 201   #if defined(__arm__) || defined(__aarch64__)
 202     __asm__ __volatile__("dmb ish" : : : "memory");
 203     // ishst doesn't order load followed by store.
 204   #elif defined(__i386__) || defined(__x86_64__)
 205     __asm__ __volatile__("" : : : "memory");
 206   #elif defined(__mips__)
 207     __asm__ __volatile__("sync" : : : "memory");
 208   #else
 209   #error Unexpected architecture
 210   #endif
 211   }
 212
 213   // Fence at the end of a constructor with final fields
 214   // or allocation.  We believe this
 215   // only has to order stores, and can thus be weaker than
 216   // release on aarch64.
 217   static void ThreadFenceForConstructor() {
 218   #if defined(__arm__) || defined(__aarch64__)
 219     __asm__ __volatile__("dmb ishst" : : : "memory");
 220   #elif defined(__i386__) || defined(__x86_64__)
 221     __asm__ __volatile__("" : : : "memory");
 222   #elif defined(__mips__)
 223     __asm__ __volatile__("sync" : : : "memory");
 224   #else
 225   #error Unexpected architecture
 226   #endif
 227   }
 228
 229   static void ThreadFenceSequentiallyConsistent() {
 230   #if defined(__arm__) || defined(__aarch64__)
 231     __asm__ __volatile__("dmb ish" : : : "memory");
 232   #elif defined(__i386__) || defined(__x86_64__)
 233     __asm__ __volatile__("mfence" : : : "memory");
 234   #elif defined(__mips__)
 235     __asm__ __volatile__("sync" : : : "memory");
 236   #else
 237   #error Unexpected architecture
 238   #endif
 239   }
 240   #endif
 241
 242  private:
 243   static Mutex* GetSwapMutex(const volatile int64_t* addr);
 244   static int64_t SwapMutexRead64(volatile const int64_t* addr);
 245   static void SwapMutexWrite64(volatile int64_t* addr, int64_t val);
 246   static bool SwapMutexCas64(int64_t old_value, int64_t new_value, volatile int64_t* addr);
 247
 248   // We stripe across a bunch of different mutexes to reduce contention.
 249   static constexpr size_t kSwapMutexCount = 32;
 250   static std::vector<Mutex*>* gSwapMutexes;
 251
 252   DISALLOW_COPY_AND_ASSIGN(QuasiAtomic);
 253 };
 254
 255 #if ART_HAVE_STDATOMIC
 256 template<typename T>
 257 class Atomic : public std::atomic<T> {
 258  public:
 259   Atomic<T>() : std::atomic<T>() { }
 260
 261   explicit Atomic<T>(T value) : std::atomic<T>(value) { }
 262
 263   // Load from memory without ordering or synchronization constraints.
 264   T LoadRelaxed() const {
 265     return this->load(std::memory_order_relaxed);
 266   }
 267
 268   // Word tearing allowed, but may race.
 269   // TODO: Optimize?
 270   // There has been some discussion of eventually disallowing word
 271   // tearing for Java data loads.
 272   T LoadJavaData() const {
 273     return this->load(std::memory_order_relaxed);
 274   }
 275
 276   // Load from memory with a total ordering.
 277   // Corresponds exactly to a Java volatile load.
 278   T LoadSequentiallyConsistent() const {
 279     return this->load(std::memory_order_seq_cst);
 280   }
 281
 282   // Store to memory without ordering or synchronization constraints.
 283   void StoreRelaxed(T desired) {
 284     this->store(desired, std::memory_order_relaxed);
 285   }
 286
 287   // Word tearing allowed, but may race.
 288   void StoreJavaData(T desired) {
 289     this->store(desired, std::memory_order_relaxed);
 290   }
 291
 292   // Store to memory with release ordering.
 293   void StoreRelease(T desired) {
 294     this->store(desired, std::memory_order_release);
 295   }
 296
 297   // Store to memory with a total ordering.
 298   void StoreSequentiallyConsistent(T desired) {
 299     this->store(desired, std::memory_order_seq_cst);
 300   }
 301
 302   // Atomically replace the value with desired value if it matches the expected value.
 303   // Participates in total ordering of atomic operations.
 304   bool CompareExchangeStrongSequentiallyConsistent(T expected_value, T desired_value) {
 305     return this->compare_exchange_strong(expected_value, desired_value, std::memory_order_seq_cst);
 306   }
 307
 308   // The same, except it may fail spuriously.
 309   bool CompareExchangeWeakSequentiallyConsistent(T expected_value, T desired_value) {
 310     return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_seq_cst);
 311   }
 312
 313   // Atomically replace the value with desired value if it matches the expected value. Doesn't
 314   // imply ordering or synchronization constraints.
 315   bool CompareExchangeStrongRelaxed(T expected_value, T desired_value) {
 316     return this->compare_exchange_strong(expected_value, desired_value, std::memory_order_relaxed);
 317   }
 318
 319   // The same, except it may fail spuriously.
 320   bool CompareExchangeWeakRelaxed(T expected_value, T desired_value) {
 321     return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_relaxed);
 322   }
 323
 324   // Atomically replace the value with desired value if it matches the expected value. Prior writes
 325   // made to other memory locations by the thread that did the release become visible in this
 326   // thread.
 327   bool CompareExchangeWeakAcquire(T expected_value, T desired_value) {
 328     return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_acquire);
 329   }
 330
 331   // Atomically replace the value with desired value if it matches the expected value. prior writes
 332   // to other memory locations become visible to the threads that do a consume or an acquire on the
 333   // same location.
 334   bool CompareExchangeWeakRelease(T expected_value, T desired_value) {
 335     return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_release);
 336   }
 337
 338   T FetchAndAddSequentiallyConsistent(const T value) {
 339     return this->fetch_add(value, std::memory_order_seq_cst);  // Return old_value.
 340   }
 341
 342   T FetchAndSubSequentiallyConsistent(const T value) {
 343     return this->fetch_sub(value, std::memory_order_seq_cst);  // Return old value.
 344   }
 345
 346   T FetchAndOrSequentiallyConsistent(const T value) {
 347     return this->fetch_or(value, std::memory_order_seq_cst);  // Return old_value.
 348   }
 349
 350   T FetchAndAndSequentiallyConsistent(const T value) {
 351     return this->fetch_and(value, std::memory_order_seq_cst);  // Return old_value.
 352   }
 353
 354   volatile T* Address() {
 355     return reinterpret_cast<T*>(this);
 356   }
 357
 358   static T MaxValue() {
 359     return std::numeric_limits<T>::max();
 360   }
 361 };
 362
 363 #else
 364
 365 template<typename T> class Atomic;
 366
 367 // Helper class for Atomic to deal separately with size 8 and small
 368 // objects.  Should not be used directly.
 369
 370 template<int SZ, class T> struct AtomicHelper {
 371   friend class Atomic<T>;
 372
 373  private:
 374   COMPILE_ASSERT(sizeof(T) <= 4, bad_atomic_helper_arg);
 375
 376   static T LoadRelaxed(const volatile T* loc) {
 377     // sizeof(T) <= 4
 378     return *loc;
 379   }
 380
 381   static void StoreRelaxed(volatile T* loc, T desired) {
 382     // sizeof(T) <= 4
 383     *loc = desired;
 384   }
 385
 386   static bool CompareExchangeStrongSequentiallyConsistent(volatile T* loc,
 387                                                   T expected_value, T desired_value) {
 388     // sizeof(T) <= 4
 389     return __sync_bool_compare_and_swap(loc, expected_value, desired_value);
 390   }
 391 };
 392
 393 template<class T> struct AtomicHelper<8, T> {
 394   friend class Atomic<T>;
 395
 396  private:
 397   COMPILE_ASSERT(sizeof(T) == 8, bad_large_atomic_helper_arg);
 398
 399   static T LoadRelaxed(const volatile T* loc) {
 400     // sizeof(T) == 8
 401     volatile const int64_t* loc_ptr =
 402               reinterpret_cast<volatile const int64_t*>(loc);
 403     return static_cast<T>(QuasiAtomic::Read64(loc_ptr));
 404   }
 405
 406   static void StoreRelaxed(volatile T* loc, T desired) {
 407     // sizeof(T) == 8
 408     volatile int64_t* loc_ptr =
 409                 reinterpret_cast<volatile int64_t*>(loc);
 410     QuasiAtomic::Write64(loc_ptr,
 411                          static_cast<int64_t>(desired));
 412   }
 413
 414
 415   static bool CompareExchangeStrongSequentiallyConsistent(volatile T* loc,
 416                                                   T expected_value, T desired_value) {
 417     // sizeof(T) == 8
 418     volatile int64_t* loc_ptr = reinterpret_cast<volatile int64_t*>(loc);
 419     return QuasiAtomic::Cas64(
 420                  static_cast<int64_t>(reinterpret_cast<uintptr_t>(expected_value)),
 421                  static_cast<int64_t>(reinterpret_cast<uintptr_t>(desired_value)), loc_ptr);
 422   }
 423 };
 424
 425 template<typename T>
 426 class PACKED(sizeof(T)) Atomic {
 427  private:
 428   COMPILE_ASSERT(sizeof(T) <= 4 || sizeof(T) == 8, bad_atomic_arg);
 429
 430  public:
 431   Atomic<T>() : value_(0) { }
 432
 433   explicit Atomic<T>(T value) : value_(value) { }
 434
 435   // Load from memory without ordering or synchronization constraints.
 436   T LoadRelaxed() const {
 437     return AtomicHelper<sizeof(T), T>::LoadRelaxed(&value_);
 438   }
 439
 440   // Word tearing allowed, but may race.
 441   T LoadJavaData() const {
 442     return value_;
 443   }
 444
 445   // Load from memory with a total ordering.
 446   T LoadSequentiallyConsistent() const;
 447
 448   // Store to memory without ordering or synchronization constraints.
 449   void StoreRelaxed(T desired) {
 450     AtomicHelper<sizeof(T), T>::StoreRelaxed(&value_, desired);
 451   }
 452
 453   // Word tearing allowed, but may race.
 454   void StoreJavaData(T desired) {
 455     value_ = desired;
 456   }
 457
 458   // Store to memory with release ordering.
 459   void StoreRelease(T desired);
 460
 461   // Store to memory with a total ordering.
 462   void StoreSequentiallyConsistent(T desired);
 463
 464   // Atomically replace the value with desired value if it matches the expected value.
 465   // Participates in total ordering of atomic operations.
 466   bool CompareExchangeStrongSequentiallyConsistent(T expected_value, T desired_value) {
 467     return AtomicHelper<sizeof(T), T>::
 468         CompareExchangeStrongSequentiallyConsistent(&value_, expected_value, desired_value);
 469   }
 470
 471   // The same, but may fail spuriously.
 472   bool CompareExchangeWeakSequentiallyConsistent(T expected_value, T desired_value) {
 473     // TODO: Take advantage of the fact that it may fail spuriously.
 474     return AtomicHelper<sizeof(T), T>::
 475         CompareExchangeStrongSequentiallyConsistent(&value_, expected_value, desired_value);
 476   }
 477
 478   // Atomically replace the value with desired value if it matches the expected value. Doesn't
 479   // imply ordering or synchronization constraints.
 480   bool CompareExchangeStrongRelaxed(T expected_value, T desired_value) {
 481     // TODO: make this relaxed.
 482     return CompareExchangeStrongSequentiallyConsistent(expected_value, desired_value);
 483   }
 484
 485   // The same, but may fail spuriously.
 486   bool CompareExchangeWeakRelaxed(T expected_value, T desired_value) {
 487     // TODO: Take advantage of the fact that it may fail spuriously.
 488     // TODO: make this relaxed.
 489     return CompareExchangeStrongSequentiallyConsistent(expected_value, desired_value);
 490   }
 491
 492   // Atomically replace the value with desired value if it matches the expected value. Prior accesses
 493   // made to other memory locations by the thread that did the release become visible in this
 494   // thread.
 495   bool CompareExchangeWeakAcquire(T expected_value, T desired_value) {
 496     // TODO: make this acquire.
 497     return CompareExchangeWeakSequentiallyConsistent(expected_value, desired_value);
 498   }
 499
 500   // Atomically replace the value with desired value if it matches the expected value. Prior accesses
 501   // to other memory locations become visible to the threads that do a consume or an acquire on the
 502   // same location.
 503   bool CompareExchangeWeakRelease(T expected_value, T desired_value) {
 504     // TODO: make this release.
 505     return CompareExchangeWeakSequentiallyConsistent(expected_value, desired_value);
 506   }
 507
 508   volatile T* Address() {
 509     return &value_;
 510   }
 511
 512   T FetchAndAddSequentiallyConsistent(const T value) {
 513     if (sizeof(T) <= 4) {
 514       return __sync_fetch_and_add(&value_, value);  // Return old value.
 515     } else {
 516       T expected;
 517       do {
 518         expected = LoadRelaxed();
 519       } while (!CompareExchangeWeakSequentiallyConsistent(expected, expected + value));
 520       return expected;
 521     }
 522   }
 523
 524   T FetchAndSubSequentiallyConsistent(const T value) {
 525     if (sizeof(T) <= 4) {
 526       return __sync_fetch_and_sub(&value_, value);  // Return old value.
 527     } else {
 528       return FetchAndAddSequentiallyConsistent(-value);
 529     }
 530   }
 531
 532   T FetchAndOrSequentiallyConsistent(const T value) {
 533     if (sizeof(T) <= 4) {
 534       return __sync_fetch_and_or(&value_, value);  // Return old value.
 535     } else {
 536       T expected;
 537       do {
 538         expected = LoadRelaxed();
 539       } while (!CompareExchangeWeakSequentiallyConsistent(expected, expected | value));
 540       return expected;
 541     }
 542   }
 543
 544   T FetchAndAndSequentiallyConsistent(const T value) {
 545     if (sizeof(T) <= 4) {
 546       return __sync_fetch_and_and(&value_, value);  // Return old value.
 547     } else {
 548       T expected;
 549       do {
 550         expected = LoadRelaxed();
 551       } while (!CompareExchangeWeakSequentiallyConsistent(expected, expected & value));
 552       return expected;
 553     }
 554   }
 555
 556   T operator++() {  // Prefix operator.
 557     if (sizeof(T) <= 4) {
 558       return __sync_add_and_fetch(&value_, 1);  // Return new value.
 559     } else {
 560       return FetchAndAddSequentiallyConsistent(1) + 1;
 561     }
 562   }
 563
 564   T operator++(int) {  // Postfix operator.
 565     return FetchAndAddSequentiallyConsistent(1);
 566   }
 567
 568   T operator--() {  // Prefix operator.
 569     if (sizeof(T) <= 4) {
 570       return __sync_sub_and_fetch(&value_, 1);  // Return new value.
 571     } else {
 572       return FetchAndSubSequentiallyConsistent(1) - 1;
 573     }
 574   }
 575
 576   T operator--(int) {  // Postfix operator.
 577     return FetchAndSubSequentiallyConsistent(1);
 578   }
 579
 580   static T MaxValue() {
 581     return std::numeric_limits<T>::max();
 582   }
 583
 584
 585  private:
 586   volatile T value_;
 587 };
 588 #endif
 589
 590 typedef Atomic<int32_t> AtomicInteger;
 591
 592 COMPILE_ASSERT(sizeof(AtomicInteger) == sizeof(int32_t), weird_atomic_int_size);
 593 COMPILE_ASSERT(alignof(AtomicInteger) == alignof(int32_t),
 594                atomic_int_alignment_differs_from_that_of_underlying_type);
 595 COMPILE_ASSERT(sizeof(Atomic<int64_t>) == sizeof(int64_t), weird_atomic_int64_size);
 596 #if defined(__LP64__)
 597   COMPILE_ASSERT(alignof(Atomic<int64_t>) == alignof(int64_t),
 598                  atomic_int64_alignment_differs_from_that_of_underlying_type);
 599 #endif
 600 // The above fails on x86-32.
 601 // This is OK, since we explicitly arrange for alignment of 8-byte fields.
 602
 603
 604 #if !ART_HAVE_STDATOMIC
 605 template<typename T>
 606 inline T Atomic<T>::LoadSequentiallyConsistent() const {
 607   T result = value_;
 608   if (sizeof(T) != 8 || !QuasiAtomic::LongAtomicsUseMutexes()) {
 609     QuasiAtomic::ThreadFenceAcquire();
 610     // We optimistically assume this suffices for store atomicity.
 611     // On ARMv8 we strengthen ThreadFenceAcquire to make that true.
 612   }
 613   return result;
 614 }
 615
 616 template<typename T>
 617 inline void Atomic<T>::StoreRelease(T desired) {
 618   if (sizeof(T) != 8 || !QuasiAtomic::LongAtomicsUseMutexes()) {
 619     QuasiAtomic::ThreadFenceRelease();
 620   }
 621   StoreRelaxed(desired);
 622 }
 623
 624 template<typename T>
 625 inline void Atomic<T>::StoreSequentiallyConsistent(T desired) {
 626   if (sizeof(T) != 8 || !QuasiAtomic::LongAtomicsUseMutexes()) {
 627     QuasiAtomic::ThreadFenceRelease();
 628   }
 629   StoreRelaxed(desired);
 630   if (sizeof(T) != 8 || !QuasiAtomic::LongAtomicsUseMutexes()) {
 631     QuasiAtomic::ThreadFenceSequentiallyConsistent();
 632   }
 633 }
 634
 635 #endif
 636
 637 }  // namespace art
 638
 639 #endif  // ART_RUNTIME_ATOMIC_H_