runtime/utils.cc

   1 /*
   2  * Copyright (C) 2011 The Android Open Source Project
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "utils.h"
  18
  19 #include <inttypes.h>
  20 #include <pthread.h>
  21 #include <sys/stat.h>
  22 #include <sys/syscall.h>
  23 #include <sys/types.h>
  24 #include <sys/wait.h>
  25 #include <unistd.h>
  26 #include <memory>
  27
  28 #include "art_field-inl.h"
  29 #include "art_method-inl.h"
  30 #include "base/stl_util.h"
  31 #include "base/unix_file/fd_file.h"
  32 #include "dex_file-inl.h"
  33 #include "dex_instruction.h"
  34 #include "mirror/class-inl.h"
  35 #include "mirror/class_loader.h"
  36 #include "mirror/object-inl.h"
  37 #include "mirror/object_array-inl.h"
  38 #include "mirror/string.h"
  39 #include "oat_quick_method_header.h"
  40 #include "os.h"
  41 #include "scoped_thread_state_change.h"
  42 #include "utf-inl.h"
  43
  44 #if defined(__APPLE__)
  45 #include "AvailabilityMacros.h"  // For MAC_OS_X_VERSION_MAX_ALLOWED
  46 #include <sys/syscall.h>
  47 #endif
  48
  49 // For DumpNativeStack.
  50 #include <backtrace/Backtrace.h>
  51 #include <backtrace/BacktraceMap.h>
  52
  53 #if defined(__linux__)
  54 #include <linux/unistd.h>
  55 #endif
  56
  57 namespace art {
  58
  59 #if defined(__linux__)
  60 static constexpr bool kUseAddr2line = !kIsTargetBuild;
  61 #endif
  62
  63 pid_t GetTid() {
  64 #if defined(__APPLE__)
  65   uint64_t owner;
  66   CHECK_PTHREAD_CALL(pthread_threadid_np, (nullptr, &owner), __FUNCTION__);  // Requires Mac OS 10.6
  67   return owner;
  68 #elif defined(__BIONIC__)
  69   return gettid();
  70 #else
  71   return syscall(__NR_gettid);
  72 #endif
  73 }
  74
  75 std::string GetThreadName(pid_t tid) {
  76   std::string result;
  77   if (ReadFileToString(StringPrintf("/proc/self/task/%d/comm", tid), &result)) {
  78     result.resize(result.size() - 1);  // Lose the trailing '\n'.
  79   } else {
  80     result = "<unknown>";
  81   }
  82   return result;
  83 }
  84
  85 void GetThreadStack(pthread_t thread, void** stack_base, size_t* stack_size, size_t* guard_size) {
  86 #if defined(__APPLE__)
  87   *stack_size = pthread_get_stacksize_np(thread);
  88   void* stack_addr = pthread_get_stackaddr_np(thread);
  89
  90   // Check whether stack_addr is the base or end of the stack.
  91   // (On Mac OS 10.7, it's the end.)
  92   int stack_variable;
  93   if (stack_addr > &stack_variable) {
  94     *stack_base = reinterpret_cast<uint8_t*>(stack_addr) - *stack_size;
  95   } else {
  96     *stack_base = stack_addr;
  97   }
  98
  99   // This is wrong, but there doesn't seem to be a way to get the actual value on the Mac.
 100   pthread_attr_t attributes;
 101   CHECK_PTHREAD_CALL(pthread_attr_init, (&attributes), __FUNCTION__);
 102   CHECK_PTHREAD_CALL(pthread_attr_getguardsize, (&attributes, guard_size), __FUNCTION__);
 103   CHECK_PTHREAD_CALL(pthread_attr_destroy, (&attributes), __FUNCTION__);
 104 #else
 105   pthread_attr_t attributes;
 106   CHECK_PTHREAD_CALL(pthread_getattr_np, (thread, &attributes), __FUNCTION__);
 107   CHECK_PTHREAD_CALL(pthread_attr_getstack, (&attributes, stack_base, stack_size), __FUNCTION__);
 108   CHECK_PTHREAD_CALL(pthread_attr_getguardsize, (&attributes, guard_size), __FUNCTION__);
 109   CHECK_PTHREAD_CALL(pthread_attr_destroy, (&attributes), __FUNCTION__);
 110
 111 #if defined(__GLIBC__)
 112   // If we're the main thread, check whether we were run with an unlimited stack. In that case,
 113   // glibc will have reported a 2GB stack for our 32-bit process, and our stack overflow detection
 114   // will be broken because we'll die long before we get close to 2GB.
 115   bool is_main_thread = (::art::GetTid() == getpid());
 116   if (is_main_thread) {
 117     rlimit stack_limit;
 118     if (getrlimit(RLIMIT_STACK, &stack_limit) == -1) {
 119       PLOG(FATAL) << "getrlimit(RLIMIT_STACK) failed";
 120     }
 121     if (stack_limit.rlim_cur == RLIM_INFINITY) {
 122       size_t old_stack_size = *stack_size;
 123
 124       // Use the kernel default limit as our size, and adjust the base to match.
 125       *stack_size = 8 * MB;
 126       *stack_base = reinterpret_cast<uint8_t*>(*stack_base) + (old_stack_size - *stack_size);
 127
 128       VLOG(threads) << "Limiting unlimited stack (reported as " << PrettySize(old_stack_size) << ")"
 129                     << " to " << PrettySize(*stack_size)
 130                     << " with base " << *stack_base;
 131     }
 132   }
 133 #endif
 134
 135 #endif
 136 }
 137
 138 bool ReadFileToString(const std::string& file_name, std::string* result) {
 139   File file;
 140   if (!file.Open(file_name, O_RDONLY)) {
 141     return false;
 142   }
 143
 144   std::vector<char> buf(8 * KB);
 145   while (true) {
 146     int64_t n = TEMP_FAILURE_RETRY(read(file.Fd(), &buf[0], buf.size()));
 147     if (n == -1) {
 148       return false;
 149     }
 150     if (n == 0) {
 151       return true;
 152     }
 153     result->append(&buf[0], n);
 154   }
 155 }
 156
 157 bool PrintFileToLog(const std::string& file_name, LogSeverity level) {
 158   File file;
 159   if (!file.Open(file_name, O_RDONLY)) {
 160     return false;
 161   }
 162
 163   constexpr size_t kBufSize = 256;  // Small buffer. Avoid stack overflow and stack size warnings.
 164   char buf[kBufSize + 1];           // +1 for terminator.
 165   size_t filled_to = 0;
 166   while (true) {
 167     DCHECK_LT(filled_to, kBufSize);
 168     int64_t n = TEMP_FAILURE_RETRY(read(file.Fd(), &buf[filled_to], kBufSize - filled_to));
 169     if (n <= 0) {
 170       // Print the rest of the buffer, if it exists.
 171       if (filled_to > 0) {
 172         buf[filled_to] = 0;
 173         LOG(level) << buf;
 174       }
 175       return n == 0;
 176     }
 177     // Scan for '\n'.
 178     size_t i = filled_to;
 179     bool found_newline = false;
 180     for (; i < filled_to + n; ++i) {
 181       if (buf[i] == '\n') {
 182         // Found a line break, that's something to print now.
 183         buf[i] = 0;
 184         LOG(level) << buf;
 185         // Copy the rest to the front.
 186         if (i + 1 < filled_to + n) {
 187           memmove(&buf[0], &buf[i + 1], filled_to + n - i - 1);
 188           filled_to = filled_to + n - i - 1;
 189         } else {
 190           filled_to = 0;
 191         }
 192         found_newline = true;
 193         break;
 194       }
 195     }
 196     if (found_newline) {
 197       continue;
 198     } else {
 199       filled_to += n;
 200       // Check if we must flush now.
 201       if (filled_to == kBufSize) {
 202         buf[kBufSize] = 0;
 203         LOG(level) << buf;
 204         filled_to = 0;
 205       }
 206     }
 207   }
 208 }
 209
 210 std::string PrettyDescriptor(mirror::String* java_descriptor) {
 211   if (java_descriptor == nullptr) {
 212     return "null";
 213   }
 214   return PrettyDescriptor(java_descriptor->ToModifiedUtf8().c_str());
 215 }
 216
 217 std::string PrettyDescriptor(mirror::Class* klass) {
 218   if (klass == nullptr) {
 219     return "null";
 220   }
 221   std::string temp;
 222   return PrettyDescriptor(klass->GetDescriptor(&temp));
 223 }
 224
 225 std::string PrettyDescriptor(const char* descriptor) {
 226   // Count the number of '['s to get the dimensionality.
 227   const char* c = descriptor;
 228   size_t dim = 0;
 229   while (*c == '[') {
 230     dim++;
 231     c++;
 232   }
 233
 234   // Reference or primitive?
 235   if (*c == 'L') {
 236     // "[[La/b/C;" -> "a.b.C[][]".
 237     c++;  // Skip the 'L'.
 238   } else {
 239     // "[[B" -> "byte[][]".
 240     // To make life easier, we make primitives look like unqualified
 241     // reference types.
 242     switch (*c) {
 243     case 'B': c = "byte;"; break;
 244     case 'C': c = "char;"; break;
 245     case 'D': c = "double;"; break;
 246     case 'F': c = "float;"; break;
 247     case 'I': c = "int;"; break;
 248     case 'J': c = "long;"; break;
 249     case 'S': c = "short;"; break;
 250     case 'Z': c = "boolean;"; break;
 251     case 'V': c = "void;"; break;  // Used when decoding return types.
 252     default: return descriptor;
 253     }
 254   }
 255
 256   // At this point, 'c' is a string of the form "fully/qualified/Type;"
 257   // or "primitive;". Rewrite the type with '.' instead of '/':
 258   std::string result;
 259   const char* p = c;
 260   while (*p != ';') {
 261     char ch = *p++;
 262     if (ch == '/') {
 263       ch = '.';
 264     }
 265     result.push_back(ch);
 266   }
 267   // ...and replace the semicolon with 'dim' "[]" pairs:
 268   for (size_t i = 0; i < dim; ++i) {
 269     result += "[]";
 270   }
 271   return result;
 272 }
 273
 274 std::string PrettyField(ArtField* f, bool with_type) {
 275   if (f == nullptr) {
 276     return "null";
 277   }
 278   std::string result;
 279   if (with_type) {
 280     result += PrettyDescriptor(f->GetTypeDescriptor());
 281     result += ' ';
 282   }
 283   std::string temp;
 284   result += PrettyDescriptor(f->GetDeclaringClass()->GetDescriptor(&temp));
 285   result += '.';
 286   result += f->GetName();
 287   return result;
 288 }
 289
 290 std::string PrettyField(uint32_t field_idx, const DexFile& dex_file, bool with_type) {
 291   if (field_idx >= dex_file.NumFieldIds()) {
 292     return StringPrintf("<<invalid-field-idx-%d>>", field_idx);
 293   }
 294   const DexFile::FieldId& field_id = dex_file.GetFieldId(field_idx);
 295   std::string result;
 296   if (with_type) {
 297     result += dex_file.GetFieldTypeDescriptor(field_id);
 298     result += ' ';
 299   }
 300   result += PrettyDescriptor(dex_file.GetFieldDeclaringClassDescriptor(field_id));
 301   result += '.';
 302   result += dex_file.GetFieldName(field_id);
 303   return result;
 304 }
 305
 306 std::string PrettyType(uint32_t type_idx, const DexFile& dex_file) {
 307   if (type_idx >= dex_file.NumTypeIds()) {
 308     return StringPrintf("<<invalid-type-idx-%d>>", type_idx);
 309   }
 310   const DexFile::TypeId& type_id = dex_file.GetTypeId(type_idx);
 311   return PrettyDescriptor(dex_file.GetTypeDescriptor(type_id));
 312 }
 313
 314 std::string PrettyArguments(const char* signature) {
 315   std::string result;
 316   result += '(';
 317   CHECK_EQ(*signature, '(');
 318   ++signature;  // Skip the '('.
 319   while (*signature != ')') {
 320     size_t argument_length = 0;
 321     while (signature[argument_length] == '[') {
 322       ++argument_length;
 323     }
 324     if (signature[argument_length] == 'L') {
 325       argument_length = (strchr(signature, ';') - signature + 1);
 326     } else {
 327       ++argument_length;
 328     }
 329     {
 330       std::string argument_descriptor(signature, argument_length);
 331       result += PrettyDescriptor(argument_descriptor.c_str());
 332     }
 333     if (signature[argument_length] != ')') {
 334       result += ", ";
 335     }
 336     signature += argument_length;
 337   }
 338   CHECK_EQ(*signature, ')');
 339   ++signature;  // Skip the ')'.
 340   result += ')';
 341   return result;
 342 }
 343
 344 std::string PrettyReturnType(const char* signature) {
 345   const char* return_type = strchr(signature, ')');
 346   CHECK(return_type != nullptr);
 347   ++return_type;  // Skip ')'.
 348   return PrettyDescriptor(return_type);
 349 }
 350
 351 std::string PrettyMethod(ArtMethod* m, bool with_signature) {
 352   if (m == nullptr) {
 353     return "null";
 354   }
 355   if (!m->IsRuntimeMethod()) {
 356     m = m->GetInterfaceMethodIfProxy(Runtime::Current()->GetClassLinker()->GetImagePointerSize());
 357   }
 358   std::string result(PrettyDescriptor(m->GetDeclaringClassDescriptor()));
 359   result += '.';
 360   result += m->GetName();
 361   if (UNLIKELY(m->IsFastNative())) {
 362     result += "!";
 363   }
 364   if (with_signature) {
 365     const Signature signature = m->GetSignature();
 366     std::string sig_as_string(signature.ToString());
 367     if (signature == Signature::NoSignature()) {
 368       return result + sig_as_string;
 369     }
 370     result = PrettyReturnType(sig_as_string.c_str()) + " " + result +
 371         PrettyArguments(sig_as_string.c_str());
 372   }
 373   return result;
 374 }
 375
 376 std::string PrettyMethod(uint32_t method_idx, const DexFile& dex_file, bool with_signature) {
 377   if (method_idx >= dex_file.NumMethodIds()) {
 378     return StringPrintf("<<invalid-method-idx-%d>>", method_idx);
 379   }
 380   const DexFile::MethodId& method_id = dex_file.GetMethodId(method_idx);
 381   std::string result(PrettyDescriptor(dex_file.GetMethodDeclaringClassDescriptor(method_id)));
 382   result += '.';
 383   result += dex_file.GetMethodName(method_id);
 384   if (with_signature) {
 385     const Signature signature = dex_file.GetMethodSignature(method_id);
 386     std::string sig_as_string(signature.ToString());
 387     if (signature == Signature::NoSignature()) {
 388       return result + sig_as_string;
 389     }
 390     result = PrettyReturnType(sig_as_string.c_str()) + " " + result +
 391         PrettyArguments(sig_as_string.c_str());
 392   }
 393   return result;
 394 }
 395
 396 std::string PrettyTypeOf(mirror::Object* obj) {
 397   if (obj == nullptr) {
 398     return "null";
 399   }
 400   if (obj->GetClass() == nullptr) {
 401     return "(raw)";
 402   }
 403   std::string temp;
 404   std::string result(PrettyDescriptor(obj->GetClass()->GetDescriptor(&temp)));
 405   if (obj->IsClass()) {
 406     result += "<" + PrettyDescriptor(obj->AsClass()->GetDescriptor(&temp)) + ">";
 407   }
 408   return result;
 409 }
 410
 411 std::string PrettyClass(mirror::Class* c) {
 412   if (c == nullptr) {
 413     return "null";
 414   }
 415   std::string result;
 416   result += "java.lang.Class<";
 417   result += PrettyDescriptor(c);
 418   result += ">";
 419   return result;
 420 }
 421
 422 std::string PrettyClassAndClassLoader(mirror::Class* c) {
 423   if (c == nullptr) {
 424     return "null";
 425   }
 426   std::string result;
 427   result += "java.lang.Class<";
 428   result += PrettyDescriptor(c);
 429   result += ",";
 430   result += PrettyTypeOf(c->GetClassLoader());
 431   // TODO: add an identifying hash value for the loader
 432   result += ">";
 433   return result;
 434 }
 435
 436 std::string PrettyJavaAccessFlags(uint32_t access_flags) {
 437   std::string result;
 438   if ((access_flags & kAccPublic) != 0) {
 439     result += "public ";
 440   }
 441   if ((access_flags & kAccProtected) != 0) {
 442     result += "protected ";
 443   }
 444   if ((access_flags & kAccPrivate) != 0) {
 445     result += "private ";
 446   }
 447   if ((access_flags & kAccFinal) != 0) {
 448     result += "final ";
 449   }
 450   if ((access_flags & kAccStatic) != 0) {
 451     result += "static ";
 452   }
 453   if ((access_flags & kAccTransient) != 0) {
 454     result += "transient ";
 455   }
 456   if ((access_flags & kAccVolatile) != 0) {
 457     result += "volatile ";
 458   }
 459   if ((access_flags & kAccSynchronized) != 0) {
 460     result += "synchronized ";
 461   }
 462   return result;
 463 }
 464
 465 std::string PrettySize(int64_t byte_count) {
 466   // The byte thresholds at which we display amounts.  A byte count is displayed
 467   // in unit U when kUnitThresholds[U] <= bytes < kUnitThresholds[U+1].
 468   static const int64_t kUnitThresholds[] = {
 469     0,              // B up to...
 470     3*1024,         // KB up to...
 471     2*1024*1024,    // MB up to...
 472     1024*1024*1024  // GB from here.
 473   };
 474   static const int64_t kBytesPerUnit[] = { 1, KB, MB, GB };
 475   static const char* const kUnitStrings[] = { "B", "KB", "MB", "GB" };
 476   const char* negative_str = "";
 477   if (byte_count < 0) {
 478     negative_str = "-";
 479     byte_count = -byte_count;
 480   }
 481   int i = arraysize(kUnitThresholds);
 482   while (--i > 0) {
 483     if (byte_count >= kUnitThresholds[i]) {
 484       break;
 485     }
 486   }
 487   return StringPrintf("%s%" PRId64 "%s",
 488                       negative_str, byte_count / kBytesPerUnit[i], kUnitStrings[i]);
 489 }
 490
 491 std::string PrintableChar(uint16_t ch) {
 492   std::string result;
 493   result += '\'';
 494   if (NeedsEscaping(ch)) {
 495     StringAppendF(&result, "\\u%04x", ch);
 496   } else {
 497     result += ch;
 498   }
 499   result += '\'';
 500   return result;
 501 }
 502
 503 std::string PrintableString(const char* utf) {
 504   std::string result;
 505   result += '"';
 506   const char* p = utf;
 507   size_t char_count = CountModifiedUtf8Chars(p);
 508   for (size_t i = 0; i < char_count; ++i) {
 509     uint32_t ch = GetUtf16FromUtf8(&p);
 510     if (ch == '\\') {
 511       result += "\\\\";
 512     } else if (ch == '\n') {
 513       result += "\\n";
 514     } else if (ch == '\r') {
 515       result += "\\r";
 516     } else if (ch == '\t') {
 517       result += "\\t";
 518     } else {
 519       const uint16_t leading = GetLeadingUtf16Char(ch);
 520
 521       if (NeedsEscaping(leading)) {
 522         StringAppendF(&result, "\\u%04x", leading);
 523       } else {
 524         result += leading;
 525       }
 526
 527       const uint32_t trailing = GetTrailingUtf16Char(ch);
 528       if (trailing != 0) {
 529         // All high surrogates will need escaping.
 530         StringAppendF(&result, "\\u%04x", trailing);
 531       }
 532     }
 533   }
 534   result += '"';
 535   return result;
 536 }
 537
 538 // See http://java.sun.com/j2se/1.5.0/docs/guide/jni/spec/design.html#wp615 for the full rules.
 539 std::string MangleForJni(const std::string& s) {
 540   std::string result;
 541   size_t char_count = CountModifiedUtf8Chars(s.c_str());
 542   const char* cp = &s[0];
 543   for (size_t i = 0; i < char_count; ++i) {
 544     uint32_t ch = GetUtf16FromUtf8(&cp);
 545     if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9')) {
 546       result.push_back(ch);
 547     } else if (ch == '.' || ch == '/') {
 548       result += "_";
 549     } else if (ch == '_') {
 550       result += "_1";
 551     } else if (ch == ';') {
 552       result += "_2";
 553     } else if (ch == '[') {
 554       result += "_3";
 555     } else {
 556       const uint16_t leading = GetLeadingUtf16Char(ch);
 557       const uint32_t trailing = GetTrailingUtf16Char(ch);
 558
 559       StringAppendF(&result, "_0%04x", leading);
 560       if (trailing != 0) {
 561         StringAppendF(&result, "_0%04x", trailing);
 562       }
 563     }
 564   }
 565   return result;
 566 }
 567
 568 std::string DotToDescriptor(const char* class_name) {
 569   std::string descriptor(class_name);
 570   std::replace(descriptor.begin(), descriptor.end(), '.', '/');
 571   if (descriptor.length() > 0 && descriptor[0] != '[') {
 572     descriptor = "L" + descriptor + ";";
 573   }
 574   return descriptor;
 575 }
 576
 577 std::string DescriptorToDot(const char* descriptor) {
 578   size_t length = strlen(descriptor);
 579   if (length > 1) {
 580     if (descriptor[0] == 'L' && descriptor[length - 1] == ';') {
 581       // Descriptors have the leading 'L' and trailing ';' stripped.
 582       std::string result(descriptor + 1, length - 2);
 583       std::replace(result.begin(), result.end(), '/', '.');
 584       return result;
 585     } else {
 586       // For arrays the 'L' and ';' remain intact.
 587       std::string result(descriptor);
 588       std::replace(result.begin(), result.end(), '/', '.');
 589       return result;
 590     }
 591   }
 592   // Do nothing for non-class/array descriptors.
 593   return descriptor;
 594 }
 595
 596 std::string DescriptorToName(const char* descriptor) {
 597   size_t length = strlen(descriptor);
 598   if (descriptor[0] == 'L' && descriptor[length - 1] == ';') {
 599     std::string result(descriptor + 1, length - 2);
 600     return result;
 601   }
 602   return descriptor;
 603 }
 604
 605 std::string JniShortName(ArtMethod* m) {
 606   std::string class_name(m->GetDeclaringClassDescriptor());
 607   // Remove the leading 'L' and trailing ';'...
 608   CHECK_EQ(class_name[0], 'L') << class_name;
 609   CHECK_EQ(class_name[class_name.size() - 1], ';') << class_name;
 610   class_name.erase(0, 1);
 611   class_name.erase(class_name.size() - 1, 1);
 612
 613   std::string method_name(m->GetName());
 614
 615   std::string short_name;
 616   short_name += "Java_";
 617   short_name += MangleForJni(class_name);
 618   short_name += "_";
 619   short_name += MangleForJni(method_name);
 620   return short_name;
 621 }
 622
 623 std::string JniLongName(ArtMethod* m) {
 624   std::string long_name;
 625   long_name += JniShortName(m);
 626   long_name += "__";
 627
 628   std::string signature(m->GetSignature().ToString());
 629   signature.erase(0, 1);
 630   signature.erase(signature.begin() + signature.find(')'), signature.end());
 631
 632   long_name += MangleForJni(signature);
 633
 634   return long_name;
 635 }
 636
 637 // Helper for IsValidPartOfMemberNameUtf8(), a bit vector indicating valid low ascii.
 638 uint32_t DEX_MEMBER_VALID_LOW_ASCII[4] = {
 639   0x00000000,  // 00..1f low control characters; nothing valid
 640   0x03ff2010,  // 20..3f digits and symbols; valid: '0'..'9', '$', '-'
 641   0x87fffffe,  // 40..5f uppercase etc.; valid: 'A'..'Z', '_'
 642   0x07fffffe   // 60..7f lowercase etc.; valid: 'a'..'z'
 643 };
 644
 645 // Helper for IsValidPartOfMemberNameUtf8(); do not call directly.
 646 bool IsValidPartOfMemberNameUtf8Slow(const char** pUtf8Ptr) {
 647   /*
 648    * It's a multibyte encoded character. Decode it and analyze. We
 649    * accept anything that isn't (a) an improperly encoded low value,
 650    * (b) an improper surrogate pair, (c) an encoded '\0', (d) a high
 651    * control character, or (e) a high space, layout, or special
 652    * character (U+00a0, U+2000..U+200f, U+2028..U+202f,
 653    * U+fff0..U+ffff). This is all specified in the dex format
 654    * document.
 655    */
 656
 657   const uint32_t pair = GetUtf16FromUtf8(pUtf8Ptr);
 658   const uint16_t leading = GetLeadingUtf16Char(pair);
 659
 660   // We have a surrogate pair resulting from a valid 4 byte UTF sequence.
 661   // No further checks are necessary because 4 byte sequences span code
 662   // points [U+10000, U+1FFFFF], which are valid codepoints in a dex
 663   // identifier. Furthermore, GetUtf16FromUtf8 guarantees that each of
 664   // the surrogate halves are valid and well formed in this instance.
 665   if (GetTrailingUtf16Char(pair) != 0) {
 666     return true;
 667   }
 668
 669
 670   // We've encountered a one, two or three byte UTF-8 sequence. The
 671   // three byte UTF-8 sequence could be one half of a surrogate pair.
 672   switch (leading >> 8) {
 673     case 0x00:
 674       // It's only valid if it's above the ISO-8859-1 high space (0xa0).
 675       return (leading > 0x00a0);
 676     case 0xd8:
 677     case 0xd9:
 678     case 0xda:
 679     case 0xdb:
 680       {
 681         // We found a three byte sequence encoding one half of a surrogate.
 682         // Look for the other half.
 683         const uint32_t pair2 = GetUtf16FromUtf8(pUtf8Ptr);
 684         const uint16_t trailing = GetLeadingUtf16Char(pair2);
 685
 686         return (GetTrailingUtf16Char(pair2) == 0) && (0xdc00 <= trailing && trailing <= 0xdfff);
 687       }
 688     case 0xdc:
 689     case 0xdd:
 690     case 0xde:
 691     case 0xdf:
 692       // It's a trailing surrogate, which is not valid at this point.
 693       return false;
 694     case 0x20:
 695     case 0xff:
 696       // It's in the range that has spaces, controls, and specials.
 697       switch (leading & 0xfff8) {
 698         case 0x2000:
 699         case 0x2008:
 700         case 0x2028:
 701         case 0xfff0:
 702         case 0xfff8:
 703           return false;
 704       }
 705       return true;
 706     default:
 707       return true;
 708   }
 709
 710   UNREACHABLE();
 711 }
 712
 713 /* Return whether the pointed-at modified-UTF-8 encoded character is
 714  * valid as part of a member name, updating the pointer to point past
 715  * the consumed character. This will consume two encoded UTF-16 code
 716  * points if the character is encoded as a surrogate pair. Also, if
 717  * this function returns false, then the given pointer may only have
 718  * been partially advanced.
 719  */
 720 static bool IsValidPartOfMemberNameUtf8(const char** pUtf8Ptr) {
 721   uint8_t c = (uint8_t) **pUtf8Ptr;
 722   if (LIKELY(c <= 0x7f)) {
 723     // It's low-ascii, so check the table.
 724     uint32_t wordIdx = c >> 5;
 725     uint32_t bitIdx = c & 0x1f;
 726     (*pUtf8Ptr)++;
 727     return (DEX_MEMBER_VALID_LOW_ASCII[wordIdx] & (1 << bitIdx)) != 0;
 728   }
 729
 730   // It's a multibyte encoded character. Call a non-inline function
 731   // for the heavy lifting.
 732   return IsValidPartOfMemberNameUtf8Slow(pUtf8Ptr);
 733 }
 734
 735 bool IsValidMemberName(const char* s) {
 736   bool angle_name = false;
 737
 738   switch (*s) {
 739     case '\0':
 740       // The empty string is not a valid name.
 741       return false;
 742     case '<':
 743       angle_name = true;
 744       s++;
 745       break;
 746   }
 747
 748   while (true) {
 749     switch (*s) {
 750       case '\0':
 751         return !angle_name;
 752       case '>':
 753         return angle_name && s[1] == '\0';
 754     }
 755
 756     if (!IsValidPartOfMemberNameUtf8(&s)) {
 757       return false;
 758     }
 759   }
 760 }
 761
 762 enum ClassNameType { kName, kDescriptor };
 763 template<ClassNameType kType, char kSeparator>
 764 static bool IsValidClassName(const char* s) {
 765   int arrayCount = 0;
 766   while (*s == '[') {
 767     arrayCount++;
 768     s++;
 769   }
 770
 771   if (arrayCount > 255) {
 772     // Arrays may have no more than 255 dimensions.
 773     return false;
 774   }
 775
 776   ClassNameType type = kType;
 777   if (type != kDescriptor && arrayCount != 0) {
 778     /*
 779      * If we're looking at an array of some sort, then it doesn't
 780      * matter if what is being asked for is a class name; the
 781      * format looks the same as a type descriptor in that case, so
 782      * treat it as such.
 783      */
 784     type = kDescriptor;
 785   }
 786
 787   if (type == kDescriptor) {
 788     /*
 789      * We are looking for a descriptor. Either validate it as a
 790      * single-character primitive type, or continue on to check the
 791      * embedded class name (bracketed by "L" and ";").
 792      */
 793     switch (*(s++)) {
 794     case 'B':
 795     case 'C':
 796     case 'D':
 797     case 'F':
 798     case 'I':
 799     case 'J':
 800     case 'S':
 801     case 'Z':
 802       // These are all single-character descriptors for primitive types.
 803       return (*s == '\0');
 804     case 'V':
 805       // Non-array void is valid, but you can't have an array of void.
 806       return (arrayCount == 0) && (*s == '\0');
 807     case 'L':
 808       // Class name: Break out and continue below.
 809       break;
 810     default:
 811       // Oddball descriptor character.
 812       return false;
 813     }
 814   }
 815
 816   /*
 817    * We just consumed the 'L' that introduces a class name as part
 818    * of a type descriptor, or we are looking for an unadorned class
 819    * name.
 820    */
 821
 822   bool sepOrFirst = true;  // first character or just encountered a separator.
 823   for (;;) {
 824     uint8_t c = (uint8_t) *s;
 825     switch (c) {
 826     case '\0':
 827       /*
 828        * Premature end for a type descriptor, but valid for
 829        * a class name as long as we haven't encountered an
 830        * empty component (including the degenerate case of
 831        * the empty string "").
 832        */
 833       return (type == kName) && !sepOrFirst;
 834     case ';':
 835       /*
 836        * Invalid character for a class name, but the
 837        * legitimate end of a type descriptor. In the latter
 838        * case, make sure that this is the end of the string
 839        * and that it doesn't end with an empty component
 840        * (including the degenerate case of "L;").
 841        */
 842       return (type == kDescriptor) && !sepOrFirst && (s[1] == '\0');
 843     case '/':
 844     case '.':
 845       if (c != kSeparator) {
 846         // The wrong separator character.
 847         return false;
 848       }
 849       if (sepOrFirst) {
 850         // Separator at start or two separators in a row.
 851         return false;
 852       }
 853       sepOrFirst = true;
 854       s++;
 855       break;
 856     default:
 857       if (!IsValidPartOfMemberNameUtf8(&s)) {
 858         return false;
 859       }
 860       sepOrFirst = false;
 861       break;
 862     }
 863   }
 864 }
 865
 866 bool IsValidBinaryClassName(const char* s) {
 867   return IsValidClassName<kName, '.'>(s);
 868 }
 869
 870 bool IsValidJniClassName(const char* s) {
 871   return IsValidClassName<kName, '/'>(s);
 872 }
 873
 874 bool IsValidDescriptor(const char* s) {
 875   return IsValidClassName<kDescriptor, '/'>(s);
 876 }
 877
 878 void Split(const std::string& s, char separator, std::vector<std::string>* result) {
 879   const char* p = s.data();
 880   const char* end = p + s.size();
 881   while (p != end) {
 882     if (*p == separator) {
 883       ++p;
 884     } else {
 885       const char* start = p;
 886       while (++p != end && *p != separator) {
 887         // Skip to the next occurrence of the separator.
 888       }
 889       result->push_back(std::string(start, p - start));
 890     }
 891   }
 892 }
 893
 894 std::string Trim(const std::string& s) {
 895   std::string result;
 896   unsigned int start_index = 0;
 897   unsigned int end_index = s.size() - 1;
 898
 899   // Skip initial whitespace.
 900   while (start_index < s.size()) {
 901     if (!isspace(s[start_index])) {
 902       break;
 903     }
 904     start_index++;
 905   }
 906
 907   // Skip terminating whitespace.
 908   while (end_index >= start_index) {
 909     if (!isspace(s[end_index])) {
 910       break;
 911     }
 912     end_index--;
 913   }
 914
 915   // All spaces, no beef.
 916   if (end_index < start_index) {
 917     return "";
 918   }
 919   // Start_index is the first non-space, end_index is the last one.
 920   return s.substr(start_index, end_index - start_index + 1);
 921 }
 922
 923 template <typename StringT>
 924 std::string Join(const std::vector<StringT>& strings, char separator) {
 925   if (strings.empty()) {
 926     return "";
 927   }
 928
 929   std::string result(strings[0]);
 930   for (size_t i = 1; i < strings.size(); ++i) {
 931     result += separator;
 932     result += strings[i];
 933   }
 934   return result;
 935 }
 936
 937 // Explicit instantiations.
 938 template std::string Join<std::string>(const std::vector<std::string>& strings, char separator);
 939 template std::string Join<const char*>(const std::vector<const char*>& strings, char separator);
 940
 941 bool StartsWith(const std::string& s, const char* prefix) {
 942   return s.compare(0, strlen(prefix), prefix) == 0;
 943 }
 944
 945 bool EndsWith(const std::string& s, const char* suffix) {
 946   size_t suffix_length = strlen(suffix);
 947   size_t string_length = s.size();
 948   if (suffix_length > string_length) {
 949     return false;
 950   }
 951   size_t offset = string_length - suffix_length;
 952   return s.compare(offset, suffix_length, suffix) == 0;
 953 }
 954
 955 void SetThreadName(const char* thread_name) {
 956   int hasAt = 0;
 957   int hasDot = 0;
 958   const char* s = thread_name;
 959   while (*s) {
 960     if (*s == '.') {
 961       hasDot = 1;
 962     } else if (*s == '@') {
 963       hasAt = 1;
 964     }
 965     s++;
 966   }
 967   int len = s - thread_name;
 968   if (len < 15 || hasAt || !hasDot) {
 969     s = thread_name;
 970   } else {
 971     s = thread_name + len - 15;
 972   }
 973 #if defined(__linux__)
 974   // pthread_setname_np fails rather than truncating long strings.
 975   char buf[16];       // MAX_TASK_COMM_LEN=16 is hard-coded in the kernel.
 976   strncpy(buf, s, sizeof(buf)-1);
 977   buf[sizeof(buf)-1] = '\0';
 978   errno = pthread_setname_np(pthread_self(), buf);
 979   if (errno != 0) {
 980     PLOG(WARNING) << "Unable to set the name of current thread to '" << buf << "'";
 981   }
 982 #else  // __APPLE__
 983   pthread_setname_np(thread_name);
 984 #endif
 985 }
 986
 987 void GetTaskStats(pid_t tid, char* state, int* utime, int* stime, int* task_cpu) {
 988   *utime = *stime = *task_cpu = 0;
 989   std::string stats;
 990   if (!ReadFileToString(StringPrintf("/proc/self/task/%d/stat", tid), &stats)) {
 991     return;
 992   }
 993   // Skip the command, which may contain spaces.
 994   stats = stats.substr(stats.find(')') + 2);
 995   // Extract the three fields we care about.
 996   std::vector<std::string> fields;
 997   Split(stats, ' ', &fields);
 998   *state = fields[0][0];
 999   *utime = strtoull(fields[11].c_str(), nullptr, 10);
1000   *stime = strtoull(fields[12].c_str(), nullptr, 10);
1001   *task_cpu = strtoull(fields[36].c_str(), nullptr, 10);
1002 }
1003
1004 std::string GetSchedulerGroupName(pid_t tid) {
1005   // /proc/<pid>/cgroup looks like this:
1006   // 2:devices:/
1007   // 1:cpuacct,cpu:/
1008   // We want the third field from the line whose second field contains the "cpu" token.
1009   std::string cgroup_file;
1010   if (!ReadFileToString(StringPrintf("/proc/self/task/%d/cgroup", tid), &cgroup_file)) {
1011     return "";
1012   }
1013   std::vector<std::string> cgroup_lines;
1014   Split(cgroup_file, '\n', &cgroup_lines);
1015   for (size_t i = 0; i < cgroup_lines.size(); ++i) {
1016     std::vector<std::string> cgroup_fields;
1017     Split(cgroup_lines[i], ':', &cgroup_fields);
1018     std::vector<std::string> cgroups;
1019     Split(cgroup_fields[1], ',', &cgroups);
1020     for (size_t j = 0; j < cgroups.size(); ++j) {
1021       if (cgroups[j] == "cpu") {
1022         return cgroup_fields[2].substr(1);  // Skip the leading slash.
1023       }
1024     }
1025   }
1026   return "";
1027 }
1028
1029 #if defined(__linux__)
1030
1031 ALWAYS_INLINE
1032 static inline void WritePrefix(std::ostream* os, const char* prefix, bool odd) {
1033   if (prefix != nullptr) {
1034     *os << prefix;
1035   }
1036   *os << "  ";
1037   if (!odd) {
1038     *os << " ";
1039   }
1040 }
1041
1042 static bool RunCommand(std::string cmd, std::ostream* os, const char* prefix) {
1043   FILE* stream = popen(cmd.c_str(), "r");
1044   if (stream) {
1045     if (os != nullptr) {
1046       bool odd_line = true;               // We indent them differently.
1047       bool wrote_prefix = false;          // Have we already written a prefix?
1048       constexpr size_t kMaxBuffer = 128;  // Relatively small buffer. Should be OK as we're on an
1049                                           // alt stack, but just to be sure...
1050       char buffer[kMaxBuffer];
1051       while (!feof(stream)) {
1052         if (fgets(buffer, kMaxBuffer, stream) != nullptr) {
1053           // Split on newlines.
1054           char* tmp = buffer;
1055           for (;;) {
1056             char* new_line = strchr(tmp, '\n');
1057             if (new_line == nullptr) {
1058               // Print the rest.
1059               if (*tmp != 0) {
1060                 if (!wrote_prefix) {
1061                   WritePrefix(os, prefix, odd_line);
1062                 }
1063                 wrote_prefix = true;
1064                 *os << tmp;
1065               }
1066               break;
1067             }
1068             if (!wrote_prefix) {
1069               WritePrefix(os, prefix, odd_line);
1070             }
1071             char saved = *(new_line + 1);
1072             *(new_line + 1) = 0;
1073             *os << tmp;
1074             *(new_line + 1) = saved;
1075             tmp = new_line + 1;
1076             odd_line = !odd_line;
1077             wrote_prefix = false;
1078           }
1079         }
1080       }
1081     }
1082     pclose(stream);
1083     return true;
1084   } else {
1085     return false;
1086   }
1087 }
1088
1089 static void Addr2line(const std::string& map_src, uintptr_t offset, std::ostream& os,
1090                       const char* prefix) {
1091   std::string cmdline(StringPrintf("addr2line --functions --inlines --demangle -e %s %zx",
1092                                    map_src.c_str(), offset));
1093   RunCommand(cmdline.c_str(), &os, prefix);
1094 }
1095
1096 static bool PcIsWithinQuickCode(ArtMethod* method, uintptr_t pc) NO_THREAD_SAFETY_ANALYSIS {
1097   uintptr_t code = reinterpret_cast<uintptr_t>(EntryPointToCodePointer(
1098       method->GetEntryPointFromQuickCompiledCode()));
1099   if (code == 0) {
1100     return pc == 0;
1101   }
1102   uintptr_t code_size = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].code_size_;
1103   return code <= pc && pc <= (code + code_size);
1104 }
1105 #endif
1106
1107 void DumpNativeStack(std::ostream& os, pid_t tid, BacktraceMap* existing_map, const char* prefix,
1108     ArtMethod* current_method, void* ucontext_ptr) {
1109 #if __linux__
1110   // b/18119146
1111   if (RUNNING_ON_MEMORY_TOOL != 0) {
1112     return;
1113   }
1114
1115   BacktraceMap* map = existing_map;
1116   std::unique_ptr<BacktraceMap> tmp_map;
1117   if (map == nullptr) {
1118     tmp_map.reset(BacktraceMap::Create(getpid()));
1119     map = tmp_map.get();
1120   }
1121   std::unique_ptr<Backtrace> backtrace(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, tid, map));
1122   if (!backtrace->Unwind(0, reinterpret_cast<ucontext*>(ucontext_ptr))) {
1123     os << prefix << "(backtrace::Unwind failed for thread " << tid
1124        << ": " <<  backtrace->GetErrorString(backtrace->GetError()) << ")\n";
1125     return;
1126   } else if (backtrace->NumFrames() == 0) {
1127     os << prefix << "(no native stack frames for thread " << tid << ")\n";
1128     return;
1129   }
1130
1131   // Check whether we have and should use addr2line.
1132   bool use_addr2line;
1133   if (kUseAddr2line) {
1134     // Try to run it to see whether we have it. Push an argument so that it doesn't assume a.out
1135     // and print to stderr.
1136     use_addr2line = (gAborting > 0) && RunCommand("addr2line -h", nullptr, nullptr);
1137   } else {
1138     use_addr2line = false;
1139   }
1140
1141   for (Backtrace::const_iterator it = backtrace->begin();
1142        it != backtrace->end(); ++it) {
1143     // We produce output like this:
1144     // ]    #00 pc 000075bb8  /system/lib/libc.so (unwind_backtrace_thread+536)
1145     // In order for parsing tools to continue to function, the stack dump
1146     // format must at least adhere to this format:
1147     //  #XX pc <RELATIVE_ADDR>  <FULL_PATH_TO_SHARED_LIBRARY> ...
1148     // The parsers require a single space before and after pc, and two spaces
1149     // after the <RELATIVE_ADDR>. There can be any prefix data before the
1150     // #XX. <RELATIVE_ADDR> has to be a hex number but with no 0x prefix.
1151     os << prefix << StringPrintf("#%02zu pc ", it->num);
1152     bool try_addr2line = false;
1153     if (!BacktraceMap::IsValid(it->map)) {
1154       os << StringPrintf(Is64BitInstructionSet(kRuntimeISA) ? "%016" PRIxPTR "  ???"
1155                                                             : "%08" PRIxPTR "  ???",
1156                          it->pc);
1157     } else {
1158       os << StringPrintf(Is64BitInstructionSet(kRuntimeISA) ? "%016" PRIxPTR "  "
1159                                                             : "%08" PRIxPTR "  ",
1160                          BacktraceMap::GetRelativePc(it->map, it->pc));
1161       os << it->map.name;
1162       os << " (";
1163       if (!it->func_name.empty()) {
1164         os << it->func_name;
1165         if (it->func_offset != 0) {
1166           os << "+" << it->func_offset;
1167         }
1168         try_addr2line = true;
1169       } else if (current_method != nullptr &&
1170           Locks::mutator_lock_->IsSharedHeld(Thread::Current()) &&
1171           PcIsWithinQuickCode(current_method, it->pc)) {
1172         const void* start_of_code = current_method->GetEntryPointFromQuickCompiledCode();
1173         os << JniLongName(current_method) << "+"
1174            << (it->pc - reinterpret_cast<uintptr_t>(start_of_code));
1175       } else {
1176         os << "???";
1177       }
1178       os << ")";
1179     }
1180     os << "\n";
1181     if (try_addr2line && use_addr2line) {
1182       Addr2line(it->map.name, it->pc - it->map.start, os, prefix);
1183     }
1184   }
1185 #else
1186   UNUSED(os, tid, existing_map, prefix, current_method, ucontext_ptr);
1187 #endif
1188 }
1189
1190 #if defined(__APPLE__)
1191
1192 // TODO: is there any way to get the kernel stack on Mac OS?
1193 void DumpKernelStack(std::ostream&, pid_t, const char*, bool) {}
1194
1195 #else
1196
1197 void DumpKernelStack(std::ostream& os, pid_t tid, const char* prefix, bool include_count) {
1198   if (tid == GetTid()) {
1199     // There's no point showing that we're reading our stack out of /proc!
1200     return;
1201   }
1202
1203   std::string kernel_stack_filename(StringPrintf("/proc/self/task/%d/stack", tid));
1204   std::string kernel_stack;
1205   if (!ReadFileToString(kernel_stack_filename, &kernel_stack)) {
1206     os << prefix << "(couldn't read " << kernel_stack_filename << ")\n";
1207     return;
1208   }
1209
1210   std::vector<std::string> kernel_stack_frames;
1211   Split(kernel_stack, '\n', &kernel_stack_frames);
1212   // We skip the last stack frame because it's always equivalent to "[<ffffffff>] 0xffffffff",
1213   // which looking at the source appears to be the kernel's way of saying "that's all, folks!".
1214   kernel_stack_frames.pop_back();
1215   for (size_t i = 0; i < kernel_stack_frames.size(); ++i) {
1216     // Turn "[<ffffffff8109156d>] futex_wait_queue_me+0xcd/0x110"
1217     // into "futex_wait_queue_me+0xcd/0x110".
1218     const char* text = kernel_stack_frames[i].c_str();
1219     const char* close_bracket = strchr(text, ']');
1220     if (close_bracket != nullptr) {
1221       text = close_bracket + 2;
1222     }
1223     os << prefix;
1224     if (include_count) {
1225       os << StringPrintf("#%02zd ", i);
1226     }
1227     os << text << "\n";
1228   }
1229 }
1230
1231 #endif
1232
1233 const char* GetAndroidRoot() {
1234   const char* android_root = getenv("ANDROID_ROOT");
1235   if (android_root == nullptr) {
1236     if (OS::DirectoryExists("/system")) {
1237       android_root = "/system";
1238     } else {
1239       LOG(FATAL) << "ANDROID_ROOT not set and /system does not exist";
1240       return "";
1241     }
1242   }
1243   if (!OS::DirectoryExists(android_root)) {
1244     LOG(FATAL) << "Failed to find ANDROID_ROOT directory " << android_root;
1245     return "";
1246   }
1247   return android_root;
1248 }
1249
1250 const char* GetAndroidData() {
1251   std::string error_msg;
1252   const char* dir = GetAndroidDataSafe(&error_msg);
1253   if (dir != nullptr) {
1254     return dir;
1255   } else {
1256     LOG(FATAL) << error_msg;
1257     return "";
1258   }
1259 }
1260
1261 const char* GetAndroidDataSafe(std::string* error_msg) {
1262   const char* android_data = getenv("ANDROID_DATA");
1263   if (android_data == nullptr) {
1264     if (OS::DirectoryExists("/data")) {
1265       android_data = "/data";
1266     } else {
1267       *error_msg = "ANDROID_DATA not set and /data does not exist";
1268       return nullptr;
1269     }
1270   }
1271   if (!OS::DirectoryExists(android_data)) {
1272     *error_msg = StringPrintf("Failed to find ANDROID_DATA directory %s", android_data);
1273     return nullptr;
1274   }
1275   return android_data;
1276 }
1277
1278 void GetDalvikCache(const char* subdir, const bool create_if_absent, std::string* dalvik_cache,
1279                     bool* have_android_data, bool* dalvik_cache_exists, bool* is_global_cache) {
1280   CHECK(subdir != nullptr);
1281   std::string error_msg;
1282   const char* android_data = GetAndroidDataSafe(&error_msg);
1283   if (android_data == nullptr) {
1284     *have_android_data = false;
1285     *dalvik_cache_exists = false;
1286     *is_global_cache = false;
1287     return;
1288   } else {
1289     *have_android_data = true;
1290   }
1291   const std::string dalvik_cache_root(StringPrintf("%s/dalvik-cache/", android_data));
1292   *dalvik_cache = dalvik_cache_root + subdir;
1293   *dalvik_cache_exists = OS::DirectoryExists(dalvik_cache->c_str());
1294   *is_global_cache = strcmp(android_data, "/data") == 0;
1295   if (create_if_absent && !*dalvik_cache_exists && !*is_global_cache) {
1296     // Don't create the system's /data/dalvik-cache/... because it needs special permissions.
1297     *dalvik_cache_exists = ((mkdir(dalvik_cache_root.c_str(), 0700) == 0 || errno == EEXIST) &&
1298                             (mkdir(dalvik_cache->c_str(), 0700) == 0 || errno == EEXIST));
1299   }
1300 }
1301
1302 static std::string GetDalvikCacheImpl(const char* subdir,
1303                                       const bool create_if_absent,
1304                                       const bool abort_on_error) {
1305   CHECK(subdir != nullptr);
1306   const char* android_data = GetAndroidData();
1307   const std::string dalvik_cache_root(StringPrintf("%s/dalvik-cache/", android_data));
1308   const std::string dalvik_cache = dalvik_cache_root + subdir;
1309   if (!OS::DirectoryExists(dalvik_cache.c_str())) {
1310     if (!create_if_absent) {
1311       // TODO: Check callers. Traditional behavior is to not to abort, even when abort_on_error.
1312       return "";
1313     }
1314
1315     // Don't create the system's /data/dalvik-cache/... because it needs special permissions.
1316     if (strcmp(android_data, "/data") == 0) {
1317       if (abort_on_error) {
1318         LOG(FATAL) << "Failed to find dalvik-cache directory " << dalvik_cache
1319                    << ", cannot create /data dalvik-cache.";
1320         UNREACHABLE();
1321       }
1322       return "";
1323     }
1324
1325     int result = mkdir(dalvik_cache_root.c_str(), 0700);
1326     if (result != 0 && errno != EEXIST) {
1327       if (abort_on_error) {
1328         PLOG(FATAL) << "Failed to create dalvik-cache root directory " << dalvik_cache_root;
1329         UNREACHABLE();
1330       }
1331       return "";
1332     }
1333
1334     result = mkdir(dalvik_cache.c_str(), 0700);
1335     if (result != 0) {
1336       if (abort_on_error) {
1337         PLOG(FATAL) << "Failed to create dalvik-cache directory " << dalvik_cache;
1338         UNREACHABLE();
1339       }
1340       return "";
1341     }
1342   }
1343   return dalvik_cache;
1344 }
1345
1346 std::string GetDalvikCache(const char* subdir, const bool create_if_absent) {
1347   return GetDalvikCacheImpl(subdir, create_if_absent, false);
1348 }
1349
1350 std::string GetDalvikCacheOrDie(const char* subdir, const bool create_if_absent) {
1351   return GetDalvikCacheImpl(subdir, create_if_absent, true);
1352 }
1353
1354 bool GetDalvikCacheFilename(const char* location, const char* cache_location,
1355                             std::string* filename, std::string* error_msg) {
1356   if (location[0] != '/') {
1357     *error_msg = StringPrintf("Expected path in location to be absolute: %s", location);
1358     return false;
1359   }
1360   std::string cache_file(&location[1]);  // skip leading slash
1361   if (!EndsWith(location, ".dex") && !EndsWith(location, ".art") && !EndsWith(location, ".oat")) {
1362     cache_file += "/";
1363     cache_file += DexFile::kClassesDex;
1364   }
1365   std::replace(cache_file.begin(), cache_file.end(), '/', '@');
1366   *filename = StringPrintf("%s/%s", cache_location, cache_file.c_str());
1367   return true;
1368 }
1369
1370 std::string GetDalvikCacheFilenameOrDie(const char* location, const char* cache_location) {
1371   std::string ret;
1372   std::string error_msg;
1373   if (!GetDalvikCacheFilename(location, cache_location, &ret, &error_msg)) {
1374     LOG(FATAL) << error_msg;
1375   }
1376   return ret;
1377 }
1378
1379 static void InsertIsaDirectory(const InstructionSet isa, std::string* filename) {
1380   // in = /foo/bar/baz
1381   // out = /foo/bar/<isa>/baz
1382   size_t pos = filename->rfind('/');
1383   CHECK_NE(pos, std::string::npos) << *filename << " " << isa;
1384   filename->insert(pos, "/", 1);
1385   filename->insert(pos + 1, GetInstructionSetString(isa));
1386 }
1387
1388 std::string GetSystemImageFilename(const char* location, const InstructionSet isa) {
1389   // location = /system/framework/boot.art
1390   // filename = /system/framework/<isa>/boot.art
1391   std::string filename(location);
1392   InsertIsaDirectory(isa, &filename);
1393   return filename;
1394 }
1395
1396 int ExecAndReturnCode(std::vector<std::string>& arg_vector, std::string* error_msg) {
1397   const std::string command_line(Join(arg_vector, ' '));
1398   CHECK_GE(arg_vector.size(), 1U) << command_line;
1399
1400   // Convert the args to char pointers.
1401   const char* program = arg_vector[0].c_str();
1402   std::vector<char*> args;
1403   for (size_t i = 0; i < arg_vector.size(); ++i) {
1404     const std::string& arg = arg_vector[i];
1405     char* arg_str = const_cast<char*>(arg.c_str());
1406     CHECK(arg_str != nullptr) << i;
1407     args.push_back(arg_str);
1408   }
1409   args.push_back(nullptr);
1410
1411   // fork and exec
1412   pid_t pid = fork();
1413   if (pid == 0) {
1414     // no allocation allowed between fork and exec
1415
1416     // change process groups, so we don't get reaped by ProcessManager
1417     setpgid(0, 0);
1418
1419     execv(program, &args[0]);
1420     PLOG(ERROR) << "Failed to execv(" << command_line << ")";
1421     // _exit to avoid atexit handlers in child.
1422     _exit(1);
1423   } else {
1424     if (pid == -1) {
1425       *error_msg = StringPrintf("Failed to execv(%s) because fork failed: %s",
1426                                 command_line.c_str(), strerror(errno));
1427       return -1;
1428     }
1429
1430     // wait for subprocess to finish
1431     int status = -1;
1432     pid_t got_pid = TEMP_FAILURE_RETRY(waitpid(pid, &status, 0));
1433     if (got_pid != pid) {
1434       *error_msg = StringPrintf("Failed after fork for execv(%s) because waitpid failed: "
1435                                 "wanted %d, got %d: %s",
1436                                 command_line.c_str(), pid, got_pid, strerror(errno));
1437       return -1;
1438     }
1439     if (WIFEXITED(status)) {
1440       return WEXITSTATUS(status);
1441     }
1442     return -1;
1443   }
1444 }
1445
1446 bool Exec(std::vector<std::string>& arg_vector, std::string* error_msg) {
1447   int status = ExecAndReturnCode(arg_vector, error_msg);
1448   if (status != 0) {
1449     const std::string command_line(Join(arg_vector, ' '));
1450     *error_msg = StringPrintf("Failed execv(%s) because non-0 exit status",
1451                               command_line.c_str());
1452     return false;
1453   }
1454   return true;
1455 }
1456
1457 bool FileExists(const std::string& filename) {
1458   struct stat buffer;
1459   return stat(filename.c_str(), &buffer) == 0;
1460 }
1461
1462 bool FileExistsAndNotEmpty(const std::string& filename) {
1463   struct stat buffer;
1464   if (stat(filename.c_str(), &buffer) != 0) {
1465     return false;
1466   }
1467   return buffer.st_size > 0;
1468 }
1469
1470 std::string PrettyDescriptor(Primitive::Type type) {
1471   return PrettyDescriptor(Primitive::Descriptor(type));
1472 }
1473
1474 static void DumpMethodCFGImpl(const DexFile* dex_file,
1475                               uint32_t dex_method_idx,
1476                               const DexFile::CodeItem* code_item,
1477                               std::ostream& os) {
1478   os << "digraph {\n";
1479   os << "  # /* " << PrettyMethod(dex_method_idx, *dex_file, true) << " */\n";
1480
1481   std::set<uint32_t> dex_pc_is_branch_target;
1482   {
1483     // Go and populate.
1484     const Instruction* inst = Instruction::At(code_item->insns_);
1485     for (uint32_t dex_pc = 0;
1486          dex_pc < code_item->insns_size_in_code_units_;
1487          dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) {
1488       if (inst->IsBranch()) {
1489         dex_pc_is_branch_target.insert(dex_pc + inst->GetTargetOffset());
1490       } else if (inst->IsSwitch()) {
1491         const uint16_t* insns = code_item->insns_ + dex_pc;
1492         int32_t switch_offset = insns[1] | (static_cast<int32_t>(insns[2]) << 16);
1493         const uint16_t* switch_insns = insns + switch_offset;
1494         uint32_t switch_count = switch_insns[1];
1495         int32_t targets_offset;
1496         if ((*insns & 0xff) == Instruction::PACKED_SWITCH) {
1497           /* 0=sig, 1=count, 2/3=firstKey */
1498           targets_offset = 4;
1499         } else {
1500           /* 0=sig, 1=count, 2..count*2 = keys */
1501           targets_offset = 2 + 2 * switch_count;
1502         }
1503         for (uint32_t targ = 0; targ < switch_count; targ++) {
1504           int32_t offset =
1505               static_cast<int32_t>(switch_insns[targets_offset + targ * 2]) |
1506               static_cast<int32_t>(switch_insns[targets_offset + targ * 2 + 1] << 16);
1507           dex_pc_is_branch_target.insert(dex_pc + offset);
1508         }
1509       }
1510     }
1511   }
1512
1513   // Create nodes for "basic blocks."
1514   std::map<uint32_t, uint32_t> dex_pc_to_node_id;  // This only has entries for block starts.
1515   std::map<uint32_t, uint32_t> dex_pc_to_incl_id;  // This has entries for all dex pcs.
1516
1517   {
1518     const Instruction* inst = Instruction::At(code_item->insns_);
1519     bool first_in_block = true;
1520     bool force_new_block = false;
1521     for (uint32_t dex_pc = 0;
1522          dex_pc < code_item->insns_size_in_code_units_;
1523          dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) {
1524       if (dex_pc == 0 ||
1525           (dex_pc_is_branch_target.find(dex_pc) != dex_pc_is_branch_target.end()) ||
1526           force_new_block) {
1527         uint32_t id = dex_pc_to_node_id.size();
1528         if (id > 0) {
1529           // End last node.
1530           os << "}\"];\n";
1531         }
1532         // Start next node.
1533         os << "  node" << id << " [shape=record,label=\"{";
1534         dex_pc_to_node_id.insert(std::make_pair(dex_pc, id));
1535         first_in_block = true;
1536         force_new_block = false;
1537       }
1538
1539       // Register instruction.
1540       dex_pc_to_incl_id.insert(std::make_pair(dex_pc, dex_pc_to_node_id.size() - 1));
1541
1542       // Print instruction.
1543       if (!first_in_block) {
1544         os << " | ";
1545       } else {
1546         first_in_block = false;
1547       }
1548
1549       // Dump the instruction. Need to escape '"', '<', '>', '{' and '}'.
1550       os << "<" << "p" << dex_pc << ">";
1551       os << " 0x" << std::hex << dex_pc << std::dec << ": ";
1552       std::string inst_str = inst->DumpString(dex_file);
1553       size_t cur_start = 0;  // It's OK to start at zero, instruction dumps don't start with chars
1554                              // we need to escape.
1555       while (cur_start != std::string::npos) {
1556         size_t next_escape = inst_str.find_first_of("\"{}<>", cur_start + 1);
1557         if (next_escape == std::string::npos) {
1558           os << inst_str.substr(cur_start, inst_str.size() - cur_start);
1559           break;
1560         } else {
1561           os << inst_str.substr(cur_start, next_escape - cur_start);
1562           // Escape all necessary characters.
1563           while (next_escape < inst_str.size()) {
1564             char c = inst_str.at(next_escape);
1565             if (c == '"' || c == '{' || c == '}' || c == '<' || c == '>') {
1566               os << '\\' << c;
1567             } else {
1568               break;
1569             }
1570             next_escape++;
1571           }
1572           if (next_escape >= inst_str.size()) {
1573             next_escape = std::string::npos;
1574           }
1575           cur_start = next_escape;
1576         }
1577       }
1578
1579       // Force a new block for some fall-throughs and some instructions that terminate the "local"
1580       // control flow.
1581       force_new_block = inst->IsSwitch() || inst->IsBasicBlockEnd();
1582     }
1583     // Close last node.
1584     if (dex_pc_to_node_id.size() > 0) {
1585       os << "}\"];\n";
1586     }
1587   }
1588
1589   // Create edges between them.
1590   {
1591     std::ostringstream regular_edges;
1592     std::ostringstream taken_edges;
1593     std::ostringstream exception_edges;
1594
1595     // Common set of exception edges.
1596     std::set<uint32_t> exception_targets;
1597
1598     // These blocks (given by the first dex pc) need exception per dex-pc handling in a second
1599     // pass. In the first pass we try and see whether we can use a common set of edges.
1600     std::set<uint32_t> blocks_with_detailed_exceptions;
1601
1602     {
1603       uint32_t last_node_id = std::numeric_limits<uint32_t>::max();
1604       uint32_t old_dex_pc = 0;
1605       uint32_t block_start_dex_pc = std::numeric_limits<uint32_t>::max();
1606       const Instruction* inst = Instruction::At(code_item->insns_);
1607       for (uint32_t dex_pc = 0;
1608           dex_pc < code_item->insns_size_in_code_units_;
1609           old_dex_pc = dex_pc, dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) {
1610         {
1611           auto it = dex_pc_to_node_id.find(dex_pc);
1612           if (it != dex_pc_to_node_id.end()) {
1613             if (!exception_targets.empty()) {
1614               // It seems the last block had common exception handlers. Add the exception edges now.
1615               uint32_t node_id = dex_pc_to_node_id.find(block_start_dex_pc)->second;
1616               for (uint32_t handler_pc : exception_targets) {
1617                 auto node_id_it = dex_pc_to_incl_id.find(handler_pc);
1618                 if (node_id_it != dex_pc_to_incl_id.end()) {
1619                   exception_edges << "  node" << node_id
1620                       << " -> node" << node_id_it->second << ":p" << handler_pc
1621                       << ";\n";
1622                 }
1623               }
1624               exception_targets.clear();
1625             }
1626
1627             block_start_dex_pc = dex_pc;
1628
1629             // Seems to be a fall-through, connect to last_node_id. May be spurious edges for things
1630             // like switch data.
1631             uint32_t old_last = last_node_id;
1632             last_node_id = it->second;
1633             if (old_last != std::numeric_limits<uint32_t>::max()) {
1634               regular_edges << "  node" << old_last << ":p" << old_dex_pc
1635                   << " -> node" << last_node_id << ":p" << dex_pc
1636                   << ";\n";
1637             }
1638           }
1639
1640           // Look at the exceptions of the first entry.
1641           CatchHandlerIterator catch_it(*code_item, dex_pc);
1642           for (; catch_it.HasNext(); catch_it.Next()) {
1643             exception_targets.insert(catch_it.GetHandlerAddress());
1644           }
1645         }
1646
1647         // Handle instruction.
1648
1649         // Branch: something with at most two targets.
1650         if (inst->IsBranch()) {
1651           const int32_t offset = inst->GetTargetOffset();
1652           const bool conditional = !inst->IsUnconditional();
1653
1654           auto target_it = dex_pc_to_node_id.find(dex_pc + offset);
1655           if (target_it != dex_pc_to_node_id.end()) {
1656             taken_edges << "  node" << last_node_id << ":p" << dex_pc
1657                 << " -> node" << target_it->second << ":p" << (dex_pc + offset)
1658                 << ";\n";
1659           }
1660           if (!conditional) {
1661             // No fall-through.
1662             last_node_id = std::numeric_limits<uint32_t>::max();
1663           }
1664         } else if (inst->IsSwitch()) {
1665           // TODO: Iterate through all switch targets.
1666           const uint16_t* insns = code_item->insns_ + dex_pc;
1667           /* make sure the start of the switch is in range */
1668           int32_t switch_offset = insns[1] | (static_cast<int32_t>(insns[2]) << 16);
1669           /* offset to switch table is a relative branch-style offset */
1670           const uint16_t* switch_insns = insns + switch_offset;
1671           uint32_t switch_count = switch_insns[1];
1672           int32_t targets_offset;
1673           if ((*insns & 0xff) == Instruction::PACKED_SWITCH) {
1674             /* 0=sig, 1=count, 2/3=firstKey */
1675             targets_offset = 4;
1676           } else {
1677             /* 0=sig, 1=count, 2..count*2 = keys */
1678             targets_offset = 2 + 2 * switch_count;
1679           }
1680           /* make sure the end of the switch is in range */
1681           /* verify each switch target */
1682           for (uint32_t targ = 0; targ < switch_count; targ++) {
1683             int32_t offset =
1684                 static_cast<int32_t>(switch_insns[targets_offset + targ * 2]) |
1685                 static_cast<int32_t>(switch_insns[targets_offset + targ * 2 + 1] << 16);
1686             int32_t abs_offset = dex_pc + offset;
1687             auto target_it = dex_pc_to_node_id.find(abs_offset);
1688             if (target_it != dex_pc_to_node_id.end()) {
1689               // TODO: value label.
1690               taken_edges << "  node" << last_node_id << ":p" << dex_pc
1691                   << " -> node" << target_it->second << ":p" << (abs_offset)
1692                   << ";\n";
1693             }
1694           }
1695         }
1696
1697         // Exception edges. If this is not the first instruction in the block
1698         if (block_start_dex_pc != dex_pc) {
1699           std::set<uint32_t> current_handler_pcs;
1700           CatchHandlerIterator catch_it(*code_item, dex_pc);
1701           for (; catch_it.HasNext(); catch_it.Next()) {
1702             current_handler_pcs.insert(catch_it.GetHandlerAddress());
1703           }
1704           if (current_handler_pcs != exception_targets) {
1705             exception_targets.clear();  // Clear so we don't do something at the end.
1706             blocks_with_detailed_exceptions.insert(block_start_dex_pc);
1707           }
1708         }
1709
1710         if (inst->IsReturn() ||
1711             (inst->Opcode() == Instruction::THROW) ||
1712             (inst->IsBranch() && inst->IsUnconditional())) {
1713           // No fall-through.
1714           last_node_id = std::numeric_limits<uint32_t>::max();
1715         }
1716       }
1717       // Finish up the last block, if it had common exceptions.
1718       if (!exception_targets.empty()) {
1719         // It seems the last block had common exception handlers. Add the exception edges now.
1720         uint32_t node_id = dex_pc_to_node_id.find(block_start_dex_pc)->second;
1721         for (uint32_t handler_pc : exception_targets) {
1722           auto node_id_it = dex_pc_to_incl_id.find(handler_pc);
1723           if (node_id_it != dex_pc_to_incl_id.end()) {
1724             exception_edges << "  node" << node_id
1725                 << " -> node" << node_id_it->second << ":p" << handler_pc
1726                 << ";\n";
1727           }
1728         }
1729         exception_targets.clear();
1730       }
1731     }
1732
1733     // Second pass for detailed exception blocks.
1734     // TODO
1735     // Exception edges. If this is not the first instruction in the block
1736     for (uint32_t dex_pc : blocks_with_detailed_exceptions) {
1737       const Instruction* inst = Instruction::At(&code_item->insns_[dex_pc]);
1738       uint32_t this_node_id = dex_pc_to_incl_id.find(dex_pc)->second;
1739       while (true) {
1740         CatchHandlerIterator catch_it(*code_item, dex_pc);
1741         if (catch_it.HasNext()) {
1742           std::set<uint32_t> handled_targets;
1743           for (; catch_it.HasNext(); catch_it.Next()) {
1744             uint32_t handler_pc = catch_it.GetHandlerAddress();
1745             auto it = handled_targets.find(handler_pc);
1746             if (it == handled_targets.end()) {
1747               auto node_id_it = dex_pc_to_incl_id.find(handler_pc);
1748               if (node_id_it != dex_pc_to_incl_id.end()) {
1749                 exception_edges << "  node" << this_node_id << ":p" << dex_pc
1750                     << " -> node" << node_id_it->second << ":p" << handler_pc
1751                     << ";\n";
1752               }
1753
1754               // Mark as done.
1755               handled_targets.insert(handler_pc);
1756             }
1757           }
1758         }
1759         if (inst->IsBasicBlockEnd()) {
1760           break;
1761         }
1762
1763         // Loop update. Have a break-out if the next instruction is a branch target and thus in
1764         // another block.
1765         dex_pc += inst->SizeInCodeUnits();
1766         if (dex_pc >= code_item->insns_size_in_code_units_) {
1767           break;
1768         }
1769         if (dex_pc_to_node_id.find(dex_pc) != dex_pc_to_node_id.end()) {
1770           break;
1771         }
1772         inst = inst->Next();
1773       }
1774     }
1775
1776     // Write out the sub-graphs to make edges styled.
1777     os << "\n";
1778     os << "  subgraph regular_edges {\n";
1779     os << "    edge [color=\"#000000\",weight=.3,len=3];\n\n";
1780     os << "    " << regular_edges.str() << "\n";
1781     os << "  }\n\n";
1782
1783     os << "  subgraph taken_edges {\n";
1784     os << "    edge [color=\"#00FF00\",weight=.3,len=3];\n\n";
1785     os << "    " << taken_edges.str() << "\n";
1786     os << "  }\n\n";
1787
1788     os << "  subgraph exception_edges {\n";
1789     os << "    edge [color=\"#FF0000\",weight=.3,len=3];\n\n";
1790     os << "    " << exception_edges.str() << "\n";
1791     os << "  }\n\n";
1792   }
1793
1794   os << "}\n";
1795 }
1796
1797 void DumpMethodCFG(ArtMethod* method, std::ostream& os) {
1798   const DexFile* dex_file = method->GetDexFile();
1799   const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset());
1800
1801   DumpMethodCFGImpl(dex_file, method->GetDexMethodIndex(), code_item, os);
1802 }
1803
1804 void DumpMethodCFG(const DexFile* dex_file, uint32_t dex_method_idx, std::ostream& os) {
1805   // This is painful, we need to find the code item. That means finding the class, and then
1806   // iterating the table.
1807   if (dex_method_idx >= dex_file->NumMethodIds()) {
1808     os << "Could not find method-idx.";
1809     return;
1810   }
1811   const DexFile::MethodId& method_id = dex_file->GetMethodId(dex_method_idx);
1812
1813   const DexFile::ClassDef* class_def = dex_file->FindClassDef(method_id.class_idx_);
1814   if (class_def == nullptr) {
1815     os << "Could not find class-def.";
1816     return;
1817   }
1818
1819   const uint8_t* class_data = dex_file->GetClassData(*class_def);
1820   if (class_data == nullptr) {
1821     os << "No class data.";
1822     return;
1823   }
1824
1825   ClassDataItemIterator it(*dex_file, class_data);
1826   // Skip fields
1827   while (it.HasNextStaticField() || it.HasNextInstanceField()) {
1828     it.Next();
1829   }
1830
1831   // Find method, and dump it.
1832   while (it.HasNextDirectMethod() || it.HasNextVirtualMethod()) {
1833     uint32_t method_idx = it.GetMemberIndex();
1834     if (method_idx == dex_method_idx) {
1835       DumpMethodCFGImpl(dex_file, dex_method_idx, it.GetMethodCodeItem(), os);
1836       return;
1837     }
1838     it.Next();
1839   }
1840
1841   // Otherwise complain.
1842   os << "Something went wrong, didn't find the method in the class data.";
1843 }
1844
1845 static void ParseStringAfterChar(const std::string& s,
1846                                  char c,
1847                                  std::string* parsed_value,
1848                                  UsageFn Usage) {
1849   std::string::size_type colon = s.find(c);
1850   if (colon == std::string::npos) {
1851     Usage("Missing char %c in option %s\n", c, s.c_str());
1852   }
1853   // Add one to remove the char we were trimming until.
1854   *parsed_value = s.substr(colon + 1);
1855 }
1856
1857 void ParseDouble(const std::string& option,
1858                  char after_char,
1859                  double min,
1860                  double max,
1861                  double* parsed_value,
1862                  UsageFn Usage) {
1863   std::string substring;
1864   ParseStringAfterChar(option, after_char, &substring, Usage);
1865   bool sane_val = true;
1866   double value;
1867   if ((false)) {
1868     // TODO: this doesn't seem to work on the emulator.  b/15114595
1869     std::stringstream iss(substring);
1870     iss >> value;
1871     // Ensure that we have a value, there was no cruft after it and it satisfies a sensible range.
1872     sane_val = iss.eof() && (value >= min) && (value <= max);
1873   } else {
1874     char* end = nullptr;
1875     value = strtod(substring.c_str(), &end);
1876     sane_val = *end == '\0' && value >= min && value <= max;
1877   }
1878   if (!sane_val) {
1879     Usage("Invalid double value %s for option %s\n", substring.c_str(), option.c_str());
1880   }
1881   *parsed_value = value;
1882 }
1883
1884 int64_t GetFileSizeBytes(const std::string& filename) {
1885   struct stat stat_buf;
1886   int rc = stat(filename.c_str(), &stat_buf);
1887   return rc == 0 ? stat_buf.st_size : -1;
1888 }
1889
1890 void SleepForever() {
1891   while (true) {
1892     usleep(1000000);
1893   }
1894 }
1895
1896 }  // namespace art