2 * Copyright (C) 2008 The Android Open Source Project
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #ifndef ART_RUNTIME_ATOMIC_H_
18 #define ART_RUNTIME_ATOMIC_H_
21 #define ART_HAVE_STDATOMIC 1
25 #if ART_HAVE_STDATOMIC
31 #include "base/logging.h"
32 #include "base/macros.h"
38 // QuasiAtomic encapsulates two separate facilities that we are
39 // trying to move away from: "quasiatomic" 64 bit operations
40 // and custom memory fences. For the time being, they remain
41 // exposed. Clients should be converted to use either class Atomic
42 // below whenever possible, and should eventually use C++11 atomics.
43 // The two facilities that do not have a good C++11 analog are
44 // ThreadFenceForConstructor and Atomic::*JavaData.
46 // NOTE: Two "quasiatomic" operations on the exact same memory address
47 // are guaranteed to operate atomically with respect to each other,
48 // but no guarantees are made about quasiatomic operations mixed with
49 // non-quasiatomic operations on the same address, nor about
50 // quasiatomic operations that are performed on partially-overlapping
53 #if defined(__mips__) && !defined(__LP64__)
54 static constexpr bool kNeedSwapMutexes = true;
56 static constexpr bool kNeedSwapMutexes = false;
60 static void Startup();
62 static void Shutdown();
64 // Reads the 64-bit value at "addr" without tearing.
65 static int64_t Read64(volatile const int64_t* addr) {
66 if (!kNeedSwapMutexes) {
72 #if defined(__ARM_FEATURE_LPAE)
73 // With LPAE support (such as Cortex-A15) then ldrd is defined not to tear.
74 __asm__ __volatile__("@ QuasiAtomic::Read64\n"
79 // Exclusive loads are defined not to tear, clearing the exclusive state isn't necessary.
80 __asm__ __volatile__("@ QuasiAtomic::Read64\n"
85 #elif defined(__i386__)
91 LOG(FATAL) << "Unsupported architecture";
93 #endif // defined(__LP64__)
96 return SwapMutexRead64(addr);
100 // Writes to the 64-bit value at "addr" without tearing.
101 static void Write64(volatile int64_t* addr, int64_t value) {
102 if (!kNeedSwapMutexes) {
103 #if defined(__LP64__)
107 #if defined(__ARM_FEATURE_LPAE)
108 // If we know that ARM architecture has LPAE (such as Cortex-A15) strd is defined not to tear.
109 __asm__ __volatile__("@ QuasiAtomic::Write64\n"
114 // The write is done as a swap so that the cache-line is in the exclusive state for the store.
118 __asm__ __volatile__("@ QuasiAtomic::Write64\n"
119 "ldrexd %0, %H0, %2\n"
120 "strexd %1, %3, %H3, %2"
121 : "=&r" (prev), "=&r" (status), "+Q"(*addr)
124 } while (UNLIKELY(status != 0));
126 #elif defined(__i386__)
127 __asm__ __volatile__(
132 LOG(FATAL) << "Unsupported architecture";
134 #endif // defined(__LP64__)
136 SwapMutexWrite64(addr, value);
140 // Atomically compare the value at "addr" to "old_value", if equal replace it with "new_value"
141 // and return true. Otherwise, don't swap, and return false.
142 // This is fully ordered, i.e. it has C++11 memory_order_seq_cst
143 // semantics (assuming all other accesses use a mutex if this one does).
144 // This has "strong" semantics; if it fails then it is guaranteed that
145 // at some point during the execution of Cas64, *addr was not equal to
147 static bool Cas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) {
148 if (!kNeedSwapMutexes) {
149 return __sync_bool_compare_and_swap(addr, old_value, new_value);
151 return SwapMutexCas64(old_value, new_value, addr);
155 // Does the architecture provide reasonable atomic long operations or do we fall back on mutexes?
156 static bool LongAtomicsUseMutexes() {
157 return kNeedSwapMutexes;
160 #if ART_HAVE_STDATOMIC
162 static void ThreadFenceAcquire() {
163 std::atomic_thread_fence(std::memory_order_acquire);
166 static void ThreadFenceRelease() {
167 std::atomic_thread_fence(std::memory_order_release);
170 static void ThreadFenceForConstructor() {
171 #if defined(__aarch64__)
172 __asm__ __volatile__("dmb ishst" : : : "memory");
174 std::atomic_thread_fence(std::memory_order_release);
178 static void ThreadFenceSequentiallyConsistent() {
179 std::atomic_thread_fence(std::memory_order_seq_cst);
184 static void ThreadFenceAcquire() {
185 #if defined(__arm__) || defined(__aarch64__)
186 __asm__ __volatile__("dmb ish" : : : "memory");
187 // Could possibly use dmb ishld on aarch64
188 // But currently we also use this on volatile loads
189 // to enforce store atomicity. Ishld is
190 // insufficient for that purpose.
191 #elif defined(__i386__) || defined(__x86_64__)
192 __asm__ __volatile__("" : : : "memory");
193 #elif defined(__mips__)
194 __asm__ __volatile__("sync" : : : "memory");
196 #error Unexpected architecture
200 static void ThreadFenceRelease() {
201 #if defined(__arm__) || defined(__aarch64__)
202 __asm__ __volatile__("dmb ish" : : : "memory");
203 // ishst doesn't order load followed by store.
204 #elif defined(__i386__) || defined(__x86_64__)
205 __asm__ __volatile__("" : : : "memory");
206 #elif defined(__mips__)
207 __asm__ __volatile__("sync" : : : "memory");
209 #error Unexpected architecture
213 // Fence at the end of a constructor with final fields
214 // or allocation. We believe this
215 // only has to order stores, and can thus be weaker than
216 // release on aarch64.
217 static void ThreadFenceForConstructor() {
218 #if defined(__arm__) || defined(__aarch64__)
219 __asm__ __volatile__("dmb ishst" : : : "memory");
220 #elif defined(__i386__) || defined(__x86_64__)
221 __asm__ __volatile__("" : : : "memory");
222 #elif defined(__mips__)
223 __asm__ __volatile__("sync" : : : "memory");
225 #error Unexpected architecture
229 static void ThreadFenceSequentiallyConsistent() {
230 #if defined(__arm__) || defined(__aarch64__)
231 __asm__ __volatile__("dmb ish" : : : "memory");
232 #elif defined(__i386__) || defined(__x86_64__)
233 __asm__ __volatile__("mfence" : : : "memory");
234 #elif defined(__mips__)
235 __asm__ __volatile__("sync" : : : "memory");
237 #error Unexpected architecture
243 static Mutex* GetSwapMutex(const volatile int64_t* addr);
244 static int64_t SwapMutexRead64(volatile const int64_t* addr);
245 static void SwapMutexWrite64(volatile int64_t* addr, int64_t val);
246 static bool SwapMutexCas64(int64_t old_value, int64_t new_value, volatile int64_t* addr);
248 // We stripe across a bunch of different mutexes to reduce contention.
249 static constexpr size_t kSwapMutexCount = 32;
250 static std::vector<Mutex*>* gSwapMutexes;
252 DISALLOW_COPY_AND_ASSIGN(QuasiAtomic);
255 #if ART_HAVE_STDATOMIC
257 class Atomic : public std::atomic<T> {
259 Atomic<T>() : std::atomic<T>() { }
261 explicit Atomic<T>(T value) : std::atomic<T>(value) { }
263 // Load from memory without ordering or synchronization constraints.
264 T LoadRelaxed() const {
265 return this->load(std::memory_order_relaxed);
268 // Word tearing allowed, but may race.
270 // There has been some discussion of eventually disallowing word
271 // tearing for Java data loads.
272 T LoadJavaData() const {
273 return this->load(std::memory_order_relaxed);
276 // Load from memory with a total ordering.
277 // Corresponds exactly to a Java volatile load.
278 T LoadSequentiallyConsistent() const {
279 return this->load(std::memory_order_seq_cst);
282 // Store to memory without ordering or synchronization constraints.
283 void StoreRelaxed(T desired) {
284 this->store(desired, std::memory_order_relaxed);
287 // Word tearing allowed, but may race.
288 void StoreJavaData(T desired) {
289 this->store(desired, std::memory_order_relaxed);
292 // Store to memory with release ordering.
293 void StoreRelease(T desired) {
294 this->store(desired, std::memory_order_release);
297 // Store to memory with a total ordering.
298 void StoreSequentiallyConsistent(T desired) {
299 this->store(desired, std::memory_order_seq_cst);
302 // Atomically replace the value with desired value if it matches the expected value.
303 // Participates in total ordering of atomic operations.
304 bool CompareExchangeStrongSequentiallyConsistent(T expected_value, T desired_value) {
305 return this->compare_exchange_strong(expected_value, desired_value, std::memory_order_seq_cst);
308 // The same, except it may fail spuriously.
309 bool CompareExchangeWeakSequentiallyConsistent(T expected_value, T desired_value) {
310 return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_seq_cst);
313 // Atomically replace the value with desired value if it matches the expected value. Doesn't
314 // imply ordering or synchronization constraints.
315 bool CompareExchangeStrongRelaxed(T expected_value, T desired_value) {
316 return this->compare_exchange_strong(expected_value, desired_value, std::memory_order_relaxed);
319 // The same, except it may fail spuriously.
320 bool CompareExchangeWeakRelaxed(T expected_value, T desired_value) {
321 return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_relaxed);
324 // Atomically replace the value with desired value if it matches the expected value. Prior writes
325 // made to other memory locations by the thread that did the release become visible in this
327 bool CompareExchangeWeakAcquire(T expected_value, T desired_value) {
328 return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_acquire);
331 // Atomically replace the value with desired value if it matches the expected value. prior writes
332 // to other memory locations become visible to the threads that do a consume or an acquire on the
334 bool CompareExchangeWeakRelease(T expected_value, T desired_value) {
335 return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_release);
338 T FetchAndAddSequentiallyConsistent(const T value) {
339 return this->fetch_add(value, std::memory_order_seq_cst); // Return old_value.
342 T FetchAndSubSequentiallyConsistent(const T value) {
343 return this->fetch_sub(value, std::memory_order_seq_cst); // Return old value.
346 T FetchAndOrSequentiallyConsistent(const T value) {
347 return this->fetch_or(value, std::memory_order_seq_cst); // Return old_value.
350 T FetchAndAndSequentiallyConsistent(const T value) {
351 return this->fetch_and(value, std::memory_order_seq_cst); // Return old_value.
354 volatile T* Address() {
355 return reinterpret_cast<T*>(this);
358 static T MaxValue() {
359 return std::numeric_limits<T>::max();
365 template<typename T> class Atomic;
367 // Helper class for Atomic to deal separately with size 8 and small
368 // objects. Should not be used directly.
370 template<int SZ, class T> struct AtomicHelper {
371 friend class Atomic<T>;
374 COMPILE_ASSERT(sizeof(T) <= 4, bad_atomic_helper_arg);
376 static T LoadRelaxed(const volatile T* loc) {
381 static void StoreRelaxed(volatile T* loc, T desired) {
386 static bool CompareExchangeStrongSequentiallyConsistent(volatile T* loc,
387 T expected_value, T desired_value) {
389 return __sync_bool_compare_and_swap(loc, expected_value, desired_value);
393 template<class T> struct AtomicHelper<8, T> {
394 friend class Atomic<T>;
397 COMPILE_ASSERT(sizeof(T) == 8, bad_large_atomic_helper_arg);
399 static T LoadRelaxed(const volatile T* loc) {
401 volatile const int64_t* loc_ptr =
402 reinterpret_cast<volatile const int64_t*>(loc);
403 return static_cast<T>(QuasiAtomic::Read64(loc_ptr));
406 static void StoreRelaxed(volatile T* loc, T desired) {
408 volatile int64_t* loc_ptr =
409 reinterpret_cast<volatile int64_t*>(loc);
410 QuasiAtomic::Write64(loc_ptr,
411 static_cast<int64_t>(desired));
415 static bool CompareExchangeStrongSequentiallyConsistent(volatile T* loc,
416 T expected_value, T desired_value) {
418 volatile int64_t* loc_ptr = reinterpret_cast<volatile int64_t*>(loc);
419 return QuasiAtomic::Cas64(
420 static_cast<int64_t>(reinterpret_cast<uintptr_t>(expected_value)),
421 static_cast<int64_t>(reinterpret_cast<uintptr_t>(desired_value)), loc_ptr);
426 class PACKED(sizeof(T)) Atomic {
428 COMPILE_ASSERT(sizeof(T) <= 4 || sizeof(T) == 8, bad_atomic_arg);
431 Atomic<T>() : value_(0) { }
433 explicit Atomic<T>(T value) : value_(value) { }
435 // Load from memory without ordering or synchronization constraints.
436 T LoadRelaxed() const {
437 return AtomicHelper<sizeof(T), T>::LoadRelaxed(&value_);
440 // Word tearing allowed, but may race.
441 T LoadJavaData() const {
445 // Load from memory with a total ordering.
446 T LoadSequentiallyConsistent() const;
448 // Store to memory without ordering or synchronization constraints.
449 void StoreRelaxed(T desired) {
450 AtomicHelper<sizeof(T), T>::StoreRelaxed(&value_, desired);
453 // Word tearing allowed, but may race.
454 void StoreJavaData(T desired) {
458 // Store to memory with release ordering.
459 void StoreRelease(T desired);
461 // Store to memory with a total ordering.
462 void StoreSequentiallyConsistent(T desired);
464 // Atomically replace the value with desired value if it matches the expected value.
465 // Participates in total ordering of atomic operations.
466 bool CompareExchangeStrongSequentiallyConsistent(T expected_value, T desired_value) {
467 return AtomicHelper<sizeof(T), T>::
468 CompareExchangeStrongSequentiallyConsistent(&value_, expected_value, desired_value);
471 // The same, but may fail spuriously.
472 bool CompareExchangeWeakSequentiallyConsistent(T expected_value, T desired_value) {
473 // TODO: Take advantage of the fact that it may fail spuriously.
474 return AtomicHelper<sizeof(T), T>::
475 CompareExchangeStrongSequentiallyConsistent(&value_, expected_value, desired_value);
478 // Atomically replace the value with desired value if it matches the expected value. Doesn't
479 // imply ordering or synchronization constraints.
480 bool CompareExchangeStrongRelaxed(T expected_value, T desired_value) {
481 // TODO: make this relaxed.
482 return CompareExchangeStrongSequentiallyConsistent(expected_value, desired_value);
485 // The same, but may fail spuriously.
486 bool CompareExchangeWeakRelaxed(T expected_value, T desired_value) {
487 // TODO: Take advantage of the fact that it may fail spuriously.
488 // TODO: make this relaxed.
489 return CompareExchangeStrongSequentiallyConsistent(expected_value, desired_value);
492 // Atomically replace the value with desired value if it matches the expected value. Prior accesses
493 // made to other memory locations by the thread that did the release become visible in this
495 bool CompareExchangeWeakAcquire(T expected_value, T desired_value) {
496 // TODO: make this acquire.
497 return CompareExchangeWeakSequentiallyConsistent(expected_value, desired_value);
500 // Atomically replace the value with desired value if it matches the expected value. Prior accesses
501 // to other memory locations become visible to the threads that do a consume or an acquire on the
503 bool CompareExchangeWeakRelease(T expected_value, T desired_value) {
504 // TODO: make this release.
505 return CompareExchangeWeakSequentiallyConsistent(expected_value, desired_value);
508 volatile T* Address() {
512 T FetchAndAddSequentiallyConsistent(const T value) {
513 if (sizeof(T) <= 4) {
514 return __sync_fetch_and_add(&value_, value); // Return old value.
518 expected = LoadRelaxed();
519 } while (!CompareExchangeWeakSequentiallyConsistent(expected, expected + value));
524 T FetchAndSubSequentiallyConsistent(const T value) {
525 if (sizeof(T) <= 4) {
526 return __sync_fetch_and_sub(&value_, value); // Return old value.
528 return FetchAndAddSequentiallyConsistent(-value);
532 T FetchAndOrSequentiallyConsistent(const T value) {
533 if (sizeof(T) <= 4) {
534 return __sync_fetch_and_or(&value_, value); // Return old value.
538 expected = LoadRelaxed();
539 } while (!CompareExchangeWeakSequentiallyConsistent(expected, expected | value));
544 T FetchAndAndSequentiallyConsistent(const T value) {
545 if (sizeof(T) <= 4) {
546 return __sync_fetch_and_and(&value_, value); // Return old value.
550 expected = LoadRelaxed();
551 } while (!CompareExchangeWeakSequentiallyConsistent(expected, expected & value));
556 T operator++() { // Prefix operator.
557 if (sizeof(T) <= 4) {
558 return __sync_add_and_fetch(&value_, 1); // Return new value.
560 return FetchAndAddSequentiallyConsistent(1) + 1;
564 T operator++(int) { // Postfix operator.
565 return FetchAndAddSequentiallyConsistent(1);
568 T operator--() { // Prefix operator.
569 if (sizeof(T) <= 4) {
570 return __sync_sub_and_fetch(&value_, 1); // Return new value.
572 return FetchAndSubSequentiallyConsistent(1) - 1;
576 T operator--(int) { // Postfix operator.
577 return FetchAndSubSequentiallyConsistent(1);
580 static T MaxValue() {
581 return std::numeric_limits<T>::max();
590 typedef Atomic<int32_t> AtomicInteger;
592 COMPILE_ASSERT(sizeof(AtomicInteger) == sizeof(int32_t), weird_atomic_int_size);
593 COMPILE_ASSERT(alignof(AtomicInteger) == alignof(int32_t),
594 atomic_int_alignment_differs_from_that_of_underlying_type);
595 COMPILE_ASSERT(sizeof(Atomic<int64_t>) == sizeof(int64_t), weird_atomic_int64_size);
596 #if defined(__LP64__)
597 COMPILE_ASSERT(alignof(Atomic<int64_t>) == alignof(int64_t),
598 atomic_int64_alignment_differs_from_that_of_underlying_type);
600 // The above fails on x86-32.
601 // This is OK, since we explicitly arrange for alignment of 8-byte fields.
604 #if !ART_HAVE_STDATOMIC
606 inline T Atomic<T>::LoadSequentiallyConsistent() const {
608 if (sizeof(T) != 8 || !QuasiAtomic::LongAtomicsUseMutexes()) {
609 QuasiAtomic::ThreadFenceAcquire();
610 // We optimistically assume this suffices for store atomicity.
611 // On ARMv8 we strengthen ThreadFenceAcquire to make that true.
617 inline void Atomic<T>::StoreRelease(T desired) {
618 if (sizeof(T) != 8 || !QuasiAtomic::LongAtomicsUseMutexes()) {
619 QuasiAtomic::ThreadFenceRelease();
621 StoreRelaxed(desired);
625 inline void Atomic<T>::StoreSequentiallyConsistent(T desired) {
626 if (sizeof(T) != 8 || !QuasiAtomic::LongAtomicsUseMutexes()) {
627 QuasiAtomic::ThreadFenceRelease();
629 StoreRelaxed(desired);
630 if (sizeof(T) != 8 || !QuasiAtomic::LongAtomicsUseMutexes()) {
631 QuasiAtomic::ThreadFenceSequentiallyConsistent();
639 #endif // ART_RUNTIME_ATOMIC_H_