From: Hans Boehm Date: Thu, 31 Jul 2014 22:53:22 +0000 (-0700) Subject: Add memory ordering constraint, convert to C11 atomics X-Git-Tag: android-x86-7.1-r1~757^2~767^2 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=30214b901e8dbec9ec11230187a8e71fc8a04014;p=android-x86%2Fbionic.git Add memory ordering constraint, convert to C11 atomics Add an ordering constraint/fence to __system_property_serial. This slows down a read on a Nexus 5 from about 50 to about 70 ns, but avoids the possibility of seeing an inconsistent property value. Use C11 atomic operations where easy and appropriate. This code remains not fully C++11 memory model conformant, but I would now expect the generated code to now be correct with current compilers. Bug:14970171 Change-Id: I0891ff1d0f914ae5c3857e3d76b6a7c8a4a07d83 --- diff --git a/libc/bionic/system_properties.cpp b/libc/bionic/system_properties.cpp index a564c3939..30081a59d 100644 --- a/libc/bionic/system_properties.cpp +++ b/libc/bionic/system_properties.cpp @@ -26,6 +26,7 @@ * SUCH DAMAGE. */ #include +#include #include #include #include @@ -45,7 +46,6 @@ #include #include #include -#include #define _REALLY_INCLUDE_SYS__SYSTEM_PROPERTIES_H_ #include @@ -80,6 +80,16 @@ struct prop_bt { uint8_t namelen; uint8_t reserved[3]; + // TODO: The following fields should be declared as atomic_uint32_t. + // They should be assigned to with release semantics, instead of using + // explicit fences. Unfortunately, the read accesses are generally + // followed by more dependent read accesses, and the dependence + // is assumed to enforce memory ordering. Which it does on supported + // hardware. This technically should use memory_order_consume, if + // that worked as intended. + // We should also avoid rereading these fields redundantly, since not + // all processor implementations ensure that multiple loads from the + // same field are carried out in the right order. volatile uint32_t prop; volatile uint32_t left; @@ -93,7 +103,8 @@ struct prop_bt { this->namelen = name_length; memcpy(this->name, name, name_length); this->name[name_length] = '\0'; - ANDROID_MEMBAR_FULL(); + ANDROID_MEMBAR_FULL(); // TODO: Instead use a release store + // for subsequent pointer assignment. } private: @@ -102,14 +113,15 @@ private: struct prop_area { uint32_t bytes_used; - volatile uint32_t serial; + atomic_uint_least32_t serial; uint32_t magic; uint32_t version; uint32_t reserved[28]; char data[0]; prop_area(const uint32_t magic, const uint32_t version) : - serial(0), magic(magic), version(version) { + magic(magic), version(version) { + atomic_init(&serial, 0); memset(reserved, 0, sizeof(reserved)); // Allocate enough space for the root node. bytes_used = sizeof(prop_bt); @@ -120,7 +132,7 @@ private: }; struct prop_info { - volatile uint32_t serial; + atomic_uint_least32_t serial; char value[PROP_VALUE_MAX]; char name[0]; @@ -128,10 +140,11 @@ struct prop_info { const uint8_t valuelen) { memcpy(this->name, name, namelen); this->name[namelen] = '\0'; - this->serial = (valuelen << 24); + atomic_init(&this->serial, valuelen << 24); memcpy(this->value, value, valuelen); this->value[valuelen] = '\0'; - ANDROID_MEMBAR_FULL(); + ANDROID_MEMBAR_FULL(); // TODO: Instead use a release store + // for subsequent point assignment. } private: DISALLOW_COPY_AND_ASSIGN(prop_info); @@ -605,11 +618,20 @@ int __system_property_read(const prop_info *pi, char *name, char *value) } while (true) { - uint32_t serial = __system_property_serial(pi); + uint32_t serial = __system_property_serial(pi); // acquire semantics size_t len = SERIAL_VALUE_LEN(serial); memcpy(value, pi->value, len + 1); - ANDROID_MEMBAR_FULL(); - if (serial == pi->serial) { + // TODO: Fix the synchronization scheme here. + // There is no fully supported way to implement this kind + // of synchronization in C++11, since the memcpy races with + // updates to pi, and the data being accessed is not atomic. + // The following fence is unintuitive, but would be the + // correct one if memcpy used memory_order_relaxed atomic accesses. + // In practice it seems unlikely that the generated code would + // would be any different, so this should be OK. + atomic_thread_fence(memory_order_acquire); + if (serial == + atomic_load_explicit(&(pi->serial), memory_order_relaxed)) { if (name != 0) { strcpy(name, pi->name); } @@ -658,14 +680,24 @@ int __system_property_update(prop_info *pi, const char *value, unsigned int len) if (len >= PROP_VALUE_MAX) return -1; - pi->serial = pi->serial | 1; - ANDROID_MEMBAR_FULL(); + uint32_t serial = atomic_load_explicit(&pi->serial, memory_order_relaxed); + serial |= 1; + atomic_store_explicit(&pi->serial, serial, memory_order_relaxed); + // The memcpy call here also races. Again pretend it + // used memory_order_relaxed atomics, and use the analogous + // counterintuitive fence. + atomic_thread_fence(memory_order_release); memcpy(pi->value, value, len + 1); - ANDROID_MEMBAR_FULL(); - pi->serial = (len << 24) | ((pi->serial + 1) & 0xffffff); + atomic_store_explicit( + &pi->serial, + (len << 24) | ((serial + 1) & 0xffffff), + memory_order_release); __futex_wake(&pi->serial, INT32_MAX); - pa->serial++; + atomic_store_explicit( + &pa->serial, + atomic_load_explicit(&pa->serial, memory_order_relaxed) + 1, + memory_order_release); __futex_wake(&pa->serial, INT32_MAX); return 0; @@ -688,17 +720,25 @@ int __system_property_add(const char *name, unsigned int namelen, if (!pi) return -1; - pa->serial++; + // There is only a single mutator, but we want to make sure that + // updates are visible to a reader waiting for the update. + atomic_store_explicit( + &pa->serial, + atomic_load_explicit(&pa->serial, memory_order_relaxed) + 1, + memory_order_release); __futex_wake(&pa->serial, INT32_MAX); return 0; } +// Wait for non-locked serial, and retrieve it with acquire semantics. unsigned int __system_property_serial(const prop_info *pi) { - uint32_t serial = pi->serial; + uint32_t serial = atomic_load_explicit(&pi->serial, memory_order_acquire); while (SERIAL_DIRTY(serial)) { - __futex_wait(const_cast(&pi->serial), serial, NULL); - serial = pi->serial; + __futex_wait(const_cast( + reinterpret_cast(&pi->serial)), + serial, NULL); + serial = atomic_load_explicit(&pi->serial, memory_order_acquire); } return serial; } @@ -706,12 +746,14 @@ unsigned int __system_property_serial(const prop_info *pi) unsigned int __system_property_wait_any(unsigned int serial) { prop_area *pa = __system_property_area__; + uint32_t my_serial; do { __futex_wait(&pa->serial, serial, NULL); - } while (pa->serial == serial); + my_serial = atomic_load_explicit(&pa->serial, memory_order_acquire); + } while (my_serial == serial); - return pa->serial; + return my_serial; } const prop_info *__system_property_find_nth(unsigned n)