libdex/DexFile.c

   1 /*
   2  * Copyright (C) 2008 The Android Open Source Project
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 /*
  18  * Access the contents of a .dex file.
  19  */
  20
  21 #include "DexFile.h"
  22 #include "DexProto.h"
  23 #include "DexCatch.h"
  24 #include "Leb128.h"
  25 #include "sha1.h"
  26 #include "ZipArchive.h"
  27
  28 #include <zlib.h>
  29
  30 #include <stdlib.h>
  31 #include <stddef.h>
  32 #include <string.h>
  33 #include <fcntl.h>
  34 #include <errno.h>
  35
  36 // fwd
  37 static u4 dexComputeOptChecksum(const DexOptHeader* pOptHeader);
  38
  39
  40 /*
  41  * Verifying checksums is good, but it slows things down and causes us to
  42  * touch every page.  In the "optimized" world, it doesn't work at all,
  43  * because we rewrite the contents.
  44  */
  45 static const bool kVerifyChecksum = false;
  46 static const bool kVerifySignature = false;
  47
  48
  49 /* Compare two '\0'-terminated modified UTF-8 strings, using Unicode
  50  * code point values for comparison. This treats different encodings
  51  * for the same code point as equivalent, except that only a real '\0'
  52  * byte is considered the string terminator. The return value is as
  53  * for strcmp(). */
  54 int dexUtf8Cmp(const char* s1, const char* s2) {
  55     for (;;) {
  56         if (*s1 == '\0') {
  57             if (*s2 == '\0') {
  58                 return 0;
  59             }
  60             return -1;
  61         } else if (*s2 == '\0') {
  62             return 1;
  63         }
  64
  65         int utf1 = dexGetUtf16FromUtf8(&s1);
  66         int utf2 = dexGetUtf16FromUtf8(&s2);
  67         int diff = utf1 - utf2;
  68
  69         if (diff != 0) {
  70             return diff;
  71         }
  72     }
  73 }
  74
  75 /* for dexIsValidMemberNameUtf8(), a bit vector indicating valid low ascii */
  76 u4 DEX_MEMBER_VALID_LOW_ASCII[4] = {
  77     0x00000000, // 00..1f low control characters; nothing valid
  78     0x03ff2010, // 20..3f digits and symbols; valid: '0'..'9', '$', '-'
  79     0x87fffffe, // 40..5f uppercase etc.; valid: 'A'..'Z', '_'
  80     0x07fffffe  // 60..7f lowercase etc.; valid: 'a'..'z'
  81 };
  82
  83 /* Helper for dexIsValidMemberNameUtf8(); do not call directly. */
  84 bool dexIsValidMemberNameUtf8_0(const char** pUtf8Ptr) {
  85     /*
  86      * It's a multibyte encoded character. Decode it and analyze. We
  87      * accept anything that isn't (a) an improperly encoded low value,
  88      * (b) an improper surrogate pair, (c) an encoded '\0', (d) a high
  89      * control character, or (e) a high space, layout, or special
  90      * character (U+00a0, U+2000..U+200f, U+2028..U+202f,
  91      * U+fff0..U+ffff).
  92      */
  93
  94     u2 utf16 = dexGetUtf16FromUtf8(pUtf8Ptr);
  95
  96     // Perform follow-up tests based on the high 8 bits.
  97     switch (utf16 >> 8) {
  98         case 0x00: {
  99             // It's only valid if it's above the ISO-8859-1 high space (0xa0).
 100             return (utf16 > 0x00a0);
 101         }
 102         case 0xd8:
 103         case 0xd9:
 104         case 0xda:
 105         case 0xdb: {
 106             /*
 107              * It's a leading surrogate. Check to see that a trailing
 108              * surrogate follows.
 109              */
 110             utf16 = dexGetUtf16FromUtf8(pUtf8Ptr);
 111             return (utf16 >= 0xdc00) && (utf16 <= 0xdfff);
 112         }
 113         case 0xdc:
 114         case 0xdd:
 115         case 0xde:
 116         case 0xdf: {
 117             // It's a trailing surrogate, which is not valid at this point.
 118             return false;
 119         }
 120         case 0x20:
 121         case 0xff: {
 122             // It's in the range that has spaces, controls, and specials.
 123             switch (utf16 & 0xfff8) {
 124                 case 0x2000:
 125                 case 0x2008:
 126                 case 0x2028:
 127                 case 0xfff0:
 128                 case 0xfff8: {
 129                     return false;
 130                 }
 131             }
 132             break;
 133         }
 134     }
 135
 136     return true;
 137 }
 138
 139 /* Return whether the given string is a valid field or method name. */
 140 bool dexIsValidMemberName(const char* s) {
 141     bool angleName = false;
 142
 143     switch (*s) {
 144         case '\0': {
 145             // The empty string is not a valid name.
 146             return false;
 147         }
 148         case '<': {
 149             /*
 150              * '<' is allowed only at the start of a name, and if present,
 151              * means that the name must end with '>'.
 152              */
 153             angleName = true;
 154             s++;
 155             break;
 156         }
 157     }
 158
 159     for (;;) {
 160         switch (*s) {
 161             case '\0': {
 162                 return !angleName;
 163             }
 164             case '>': {
 165                 return angleName && s[1] == '\0';
 166             }
 167         }
 168         if (!dexIsValidMemberNameUtf8(&s)) {
 169             return false;
 170         }
 171     }
 172 }
 173
 174 /* Return whether the given string is a valid type descriptor. */
 175 bool dexIsValidTypeDescriptor(const char* s) {
 176     int arrayCount = 0;
 177
 178     while (*s == '[') {
 179         arrayCount++;
 180         s++;
 181     }
 182
 183     if (arrayCount > 255) {
 184         // Arrays may have no more than 255 dimensions.
 185         return false;
 186     }
 187
 188     switch (*(s++)) {
 189         case 'B':
 190         case 'C':
 191         case 'D':
 192         case 'F':
 193         case 'I':
 194         case 'J':
 195         case 'S':
 196         case 'Z': {
 197             // These are all single-character descriptors for primitive types.
 198             return (*s == '\0');
 199         }
 200         case 'V': {
 201             // You can't have an array of void.
 202             return (arrayCount == 0) && (*s == '\0');
 203         }
 204         case 'L': {
 205             // Break out and continue below.
 206             break;
 207         }
 208         default: {
 209             // Oddball descriptor character.
 210             return false;
 211         }
 212     }
 213
 214     // We just consumed the 'L' that introduces a class name.
 215
 216     bool slashOrFirst = true; // first character or just encountered a slash
 217     for (;;) {
 218         u1 c = (u1) *s;
 219         switch (c) {
 220             case '\0': {
 221                 // Premature end.
 222                 return false;
 223             }
 224             case ';': {
 225                 /*
 226                  * Make sure that this is the end of the string and that
 227                  * it doesn't end with an empty component (including the
 228                  * degenerate case of "L;").
 229                  */
 230                 return (s[1] == '\0') && !slashOrFirst;
 231             }
 232             case '/': {
 233                 if (slashOrFirst) {
 234                     // Slash at start or two slashes in a row.
 235                     return false;
 236                 }
 237                 slashOrFirst = true;
 238                 s++;
 239                 break;
 240             }
 241             default: {
 242                 if (!dexIsValidMemberNameUtf8(&s)) {
 243                     return false;
 244                 }
 245                 slashOrFirst = false;
 246                 break;
 247             }
 248         }
 249     }
 250 }
 251
 252 /* Return whether the given string is a valid reference descriptor. This
 253  * is true if dexIsValidTypeDescriptor() returns true and the descriptor
 254  * is for a class or array and not a primitive type. */
 255 bool dexIsReferenceDescriptor(const char* s) {
 256     if (!dexIsValidTypeDescriptor(s)) {
 257         return false;
 258     }
 259
 260     return (s[0] == 'L') || (s[0] == '[');
 261 }
 262
 263 /* Return whether the given string is a valid class descriptor. This
 264  * is true if dexIsValidTypeDescriptor() returns true and the descriptor
 265  * is for a class and not an array or primitive type. */
 266 bool dexIsClassDescriptor(const char* s) {
 267     if (!dexIsValidTypeDescriptor(s)) {
 268         return false;
 269     }
 270
 271     return s[0] == 'L';
 272 }
 273
 274 /* Return whether the given string is a valid field type descriptor. This
 275  * is true if dexIsValidTypeDescriptor() returns true and the descriptor
 276  * is for anything but "void". */
 277 bool dexIsFieldDescriptor(const char* s) {
 278     if (!dexIsValidTypeDescriptor(s)) {
 279         return false;
 280     }
 281
 282     return s[0] != 'V';
 283 }
 284
 285 /* Return the UTF-8 encoded string with the specified string_id index,
 286  * also filling in the UTF-16 size (number of 16-bit code points).*/
 287 const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx,
 288         u4* utf16Size) {
 289     const DexStringId* pStringId = dexGetStringId(pDexFile, idx);
 290     const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff;
 291
 292     *utf16Size = readUnsignedLeb128(&ptr);
 293     return (const char*) ptr;
 294 }
 295
 296 /*
 297  * Format an SHA-1 digest for printing.  tmpBuf must be able to hold at
 298  * least kSHA1DigestOutputLen bytes.
 299  */
 300 const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf);
 301
 302 /*
 303  * Compute a SHA-1 digest on a range of bytes.
 304  */
 305 static void dexComputeSHA1Digest(const unsigned char* data, size_t length,
 306     unsigned char digest[])
 307 {
 308     SHA1_CTX context;
 309     SHA1Init(&context);
 310     SHA1Update(&context, data, length);
 311     SHA1Final(digest, &context);
 312 }
 313
 314 /*
 315  * Format the SHA-1 digest into the buffer, which must be able to hold at
 316  * least kSHA1DigestOutputLen bytes.  Returns a pointer to the buffer,
 317  */
 318 static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf)
 319 {
 320     static const char hexDigit[] = "0123456789abcdef";
 321     char* cp;
 322     int i;
 323
 324     cp = tmpBuf;
 325     for (i = 0; i < kSHA1DigestLen; i++) {
 326         *cp++ = hexDigit[digest[i] >> 4];
 327         *cp++ = hexDigit[digest[i] & 0x0f];
 328     }
 329     *cp++ = '\0';
 330
 331     assert(cp == tmpBuf + kSHA1DigestOutputLen);
 332
 333     return tmpBuf;
 334 }
 335
 336 /*
 337  * Compute a hash code on a UTF-8 string, for use with internal hash tables.
 338  *
 339  * This may or may not be compatible with UTF-8 hash functions used inside
 340  * the Dalvik VM.
 341  *
 342  * The basic "multiply by 31 and add" approach does better on class names
 343  * than most other things tried (e.g. adler32).
 344  */
 345 static u4 classDescriptorHash(const char* str)
 346 {
 347     u4 hash = 1;
 348
 349     while (*str != '\0')
 350         hash = hash * 31 + *str++;
 351
 352     return hash;
 353 }
 354
 355 /*
 356  * Add an entry to the class lookup table.  We hash the string and probe
 357  * until we find an open slot.
 358  */
 359 static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup,
 360     int stringOff, int classDefOff, int* pNumProbes)
 361 {
 362     const char* classDescriptor =
 363         (const char*) (pDexFile->baseAddr + stringOff);
 364     const DexClassDef* pClassDef =
 365         (const DexClassDef*) (pDexFile->baseAddr + classDefOff);
 366     u4 hash = classDescriptorHash(classDescriptor);
 367     int mask = pLookup->numEntries-1;
 368     int idx = hash & mask;
 369
 370     /*
 371      * Find the first empty slot.  We oversized the table, so this is
 372      * guaranteed to finish.
 373      */
 374     int probes = 0;
 375     while (pLookup->table[idx].classDescriptorOffset != 0) {
 376         idx = (idx + 1) & mask;
 377         probes++;
 378     }
 379     //if (probes > 1)
 380     //    LOGW("classLookupAdd: probes=%d\n", probes);
 381
 382     pLookup->table[idx].classDescriptorHash = hash;
 383     pLookup->table[idx].classDescriptorOffset = stringOff;
 384     pLookup->table[idx].classDefOffset = classDefOff;
 385     *pNumProbes = probes;
 386 }
 387
 388 /*
 389  * Round up to the next highest power of 2.
 390  *
 391  * Found on http://graphics.stanford.edu/~seander/bithacks.html.
 392  */
 393 u4 dexRoundUpPower2(u4 val)
 394 {
 395     val--;
 396     val |= val >> 1;
 397     val |= val >> 2;
 398     val |= val >> 4;
 399     val |= val >> 8;
 400     val |= val >> 16;
 401     val++;
 402
 403     return val;
 404 }
 405
 406 /*
 407  * Create the class lookup hash table.
 408  *
 409  * Returns newly-allocated storage.
 410  */
 411 DexClassLookup* dexCreateClassLookup(DexFile* pDexFile)
 412 {
 413     DexClassLookup* pLookup;
 414     int allocSize;
 415     int i, numEntries;
 416     int numProbes, totalProbes, maxProbes;
 417
 418     numProbes = totalProbes = maxProbes = 0;
 419
 420     assert(pDexFile != NULL);
 421
 422     /*
 423      * Using a factor of 3 results in far less probing than a factor of 2,
 424      * but almost doubles the flash storage requirements for the bootstrap
 425      * DEX files.  The overall impact on class loading performance seems
 426      * to be minor.  We could probably get some performance improvement by
 427      * using a secondary hash.
 428      */
 429     numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2);
 430     allocSize = offsetof(DexClassLookup, table)
 431                     + numEntries * sizeof(pLookup->table[0]);
 432
 433     pLookup = (DexClassLookup*) calloc(1, allocSize);
 434     if (pLookup == NULL)
 435         return NULL;
 436     pLookup->size = allocSize;
 437     pLookup->numEntries = numEntries;
 438
 439     for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) {
 440         const DexClassDef* pClassDef;
 441         const char* pString;
 442
 443         pClassDef = dexGetClassDef(pDexFile, i);
 444         pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);
 445
 446         classLookupAdd(pDexFile, pLookup,
 447             (u1*)pString - pDexFile->baseAddr,
 448             (u1*)pClassDef - pDexFile->baseAddr, &numProbes);
 449
 450         if (numProbes > maxProbes)
 451             maxProbes = numProbes;
 452         totalProbes += numProbes;
 453     }
 454
 455     LOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d"
 456          " total=%d max=%d\n",
 457         pDexFile->pHeader->classDefsSize, numEntries,
 458         (100 * pDexFile->pHeader->classDefsSize) / numEntries,
 459         allocSize, totalProbes, maxProbes);
 460
 461     return pLookup;
 462 }
 463
 464
 465 /*
 466  * Set up the basic raw data pointers of a DexFile. This function isn't
 467  * meant for general use.
 468  */
 469 void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) {
 470     DexHeader *pHeader = (DexHeader*) data;
 471
 472     pDexFile->baseAddr = data;
 473     pDexFile->pHeader = pHeader;
 474     pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff);
 475     pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff);
 476     pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff);
 477     pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff);
 478     pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff);
 479     pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff);
 480     pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff);
 481 }
 482
 483
 484 /*
 485  * Parse out an index map entry, advancing "*pData" and reducing "*pSize".
 486  */
 487 static bool parseIndexMapEntry(const u1** pData, u4* pSize, bool expanding,
 488     u4* pFullCount, u4* pReducedCount, const u2** pMap)
 489 {
 490     const u4* wordPtr = (const u4*) *pData;
 491     u4 size = *pSize;
 492     u4 mapCount;
 493
 494     if (expanding) {
 495         if (size < 4)
 496             return false;
 497         mapCount = *pReducedCount = *wordPtr++;
 498         *pFullCount = (u4) -1;
 499         size -= sizeof(u4);
 500     } else {
 501         if (size < 8)
 502             return false;
 503         mapCount = *pFullCount = *wordPtr++;
 504         *pReducedCount = *wordPtr++;
 505         size -= sizeof(u4) * 2;
 506     }
 507
 508     u4 mapSize = mapCount * sizeof(u2);
 509
 510     if (size < mapSize)
 511         return false;
 512     *pMap = (const u2*) wordPtr;
 513     size -= mapSize;
 514
 515     /* advance the pointer */
 516     const u1* ptr = (const u1*) wordPtr;
 517     ptr += (mapSize + 3) & ~0x3;
 518
 519     /* update pass-by-reference values */
 520     *pData = (const u1*) ptr;
 521     *pSize = size;
 522
 523     return true;
 524 }
 525
 526 /*
 527  * Set up some pointers into the mapped data.
 528  *
 529  * See analysis/ReduceConstants.c for the data layout description.
 530  */
 531 static bool parseIndexMap(DexFile* pDexFile, const u1* data, u4 size,
 532     bool expanding)
 533 {
 534     if (!parseIndexMapEntry(&data, &size, expanding,
 535             &pDexFile->indexMap.classFullCount,
 536             &pDexFile->indexMap.classReducedCount,
 537             &pDexFile->indexMap.classMap))
 538     {
 539         return false;
 540     }
 541
 542     if (!parseIndexMapEntry(&data, &size, expanding,
 543             &pDexFile->indexMap.methodFullCount,
 544             &pDexFile->indexMap.methodReducedCount,
 545             &pDexFile->indexMap.methodMap))
 546     {
 547         return false;
 548     }
 549
 550     if (!parseIndexMapEntry(&data, &size, expanding,
 551             &pDexFile->indexMap.fieldFullCount,
 552             &pDexFile->indexMap.fieldReducedCount,
 553             &pDexFile->indexMap.fieldMap))
 554     {
 555         return false;
 556     }
 557
 558     if (!parseIndexMapEntry(&data, &size, expanding,
 559             &pDexFile->indexMap.stringFullCount,
 560             &pDexFile->indexMap.stringReducedCount,
 561             &pDexFile->indexMap.stringMap))
 562     {
 563         return false;
 564     }
 565
 566     if (expanding) {
 567         /*
 568          * The map includes the "reduced" counts; pull the original counts
 569          * out of the DexFile so that code has a consistent source.
 570          */
 571         assert(pDexFile->indexMap.classFullCount == (u4) -1);
 572         assert(pDexFile->indexMap.methodFullCount == (u4) -1);
 573         assert(pDexFile->indexMap.fieldFullCount == (u4) -1);
 574         assert(pDexFile->indexMap.stringFullCount == (u4) -1);
 575
 576 #if 0   // TODO: not available yet -- do later or just skip this
 577         pDexFile->indexMap.classFullCount =
 578             pDexFile->pHeader->typeIdsSize;
 579         pDexFile->indexMap.methodFullCount =
 580             pDexFile->pHeader->methodIdsSize;
 581         pDexFile->indexMap.fieldFullCount =
 582             pDexFile->pHeader->fieldIdsSize;
 583         pDexFile->indexMap.stringFullCount =
 584             pDexFile->pHeader->stringIdsSize;
 585 #endif
 586     }
 587
 588     LOGI("Class : %u %u %u\n",
 589         pDexFile->indexMap.classFullCount,
 590         pDexFile->indexMap.classReducedCount,
 591         pDexFile->indexMap.classMap[0]);
 592     LOGI("Method: %u %u %u\n",
 593         pDexFile->indexMap.methodFullCount,
 594         pDexFile->indexMap.methodReducedCount,
 595         pDexFile->indexMap.methodMap[0]);
 596     LOGI("Field : %u %u %u\n",
 597         pDexFile->indexMap.fieldFullCount,
 598         pDexFile->indexMap.fieldReducedCount,
 599         pDexFile->indexMap.fieldMap[0]);
 600     LOGI("String: %u %u %u\n",
 601         pDexFile->indexMap.stringFullCount,
 602         pDexFile->indexMap.stringReducedCount,
 603         pDexFile->indexMap.stringMap[0]);
 604
 605     return true;
 606 }
 607
 608 /*
 609  * Parse some auxillary data tables.
 610  *
 611  * v1.0 wrote a zero in the first 32 bits, followed by the DexClassLookup
 612  * table.  Subsequent versions switched to the "chunk" format.
 613  */
 614 static bool parseAuxData(const u1* data, DexFile* pDexFile)
 615 {
 616     const u4* pAux = (const u4*) (data + pDexFile->pOptHeader->auxOffset);
 617     u4 indexMapType = 0;
 618
 619     /* v1.0 format? */
 620     if (*pAux == 0) {
 621         LOGV("+++ found OLD dex format\n");
 622         pDexFile->pClassLookup = (const DexClassLookup*) (pAux+1);
 623         return true;
 624     }
 625     LOGV("+++ found NEW dex format\n");
 626
 627     /* process chunks until we see the end marker */
 628     while (*pAux != kDexChunkEnd) {
 629         u4 size = *(pAux+1);
 630         u1* data = (u1*) (pAux + 2);
 631
 632         switch (*pAux) {
 633         case kDexChunkClassLookup:
 634             pDexFile->pClassLookup = (const DexClassLookup*) data;
 635             break;
 636         case kDexChunkReducingIndexMap:
 637             LOGI("+++ found reducing index map, size=%u\n", size);
 638             if (!parseIndexMap(pDexFile, data, size, false)) {
 639                 LOGE("Failed parsing reducing index map\n");
 640                 return false;
 641             }
 642             indexMapType = *pAux;
 643             break;
 644         case kDexChunkExpandingIndexMap:
 645             LOGI("+++ found expanding index map, size=%u\n", size);
 646             if (!parseIndexMap(pDexFile, data, size, true)) {
 647                 LOGE("Failed parsing expanding index map\n");
 648                 return false;
 649             }
 650             indexMapType = *pAux;
 651             break;
 652         case kDexChunkRegisterMaps:
 653             LOGV("+++ found register maps, size=%u\n", size);
 654             pDexFile->pRegisterMapPool = data;
 655             break;
 656         default:
 657             LOGI("Unknown chunk 0x%08x (%c%c%c%c), size=%d in aux data area\n",
 658                 *pAux,
 659                 (char) ((*pAux) >> 24), (char) ((*pAux) >> 16),
 660                 (char) ((*pAux) >> 8),  (char)  (*pAux),
 661                 size);
 662             break;
 663         }
 664
 665         /*
 666          * Advance pointer, padding to 64-bit boundary.  The extra "+8" is
 667          * for the type/size header.
 668          */
 669         size = (size + 8 + 7) & ~7;
 670         pAux += size / sizeof(u4);
 671     }
 672
 673 #if 0   // TODO: propagate expected map type from the VM through the API
 674     /*
 675      * If we're configured to expect an index map, and we don't find one,
 676      * reject this DEX so we'll regenerate it.  Also, if we found an
 677      * "expanding" map but we're not configured to use it, we have to fail
 678      * because the constants aren't usable without translation.
 679      */
 680     if (indexMapType != expectedIndexMapType) {
 681         LOGW("Incompatible index map configuration: found 0x%04x, need %d\n",
 682             indexMapType, DVM_REDUCE_CONSTANTS);
 683         return false;
 684     }
 685 #endif
 686
 687     return true;
 688 }
 689
 690 /*
 691  * Parse an optimized or unoptimized .dex file sitting in memory.  This is
 692  * called after the byte-ordering and structure alignment has been fixed up.
 693  *
 694  * On success, return a newly-allocated DexFile.
 695  */
 696 DexFile* dexFileParse(const u1* data, size_t length, int flags)
 697 {
 698     DexFile* pDexFile = NULL;
 699     const DexHeader* pHeader;
 700     const u1* magic;
 701     int result = -1;
 702
 703     if (length < sizeof(DexHeader)) {
 704         LOGE("too short to be a valid .dex\n");
 705         goto bail;      /* bad file format */
 706     }
 707
 708     pDexFile = (DexFile*) malloc(sizeof(DexFile));
 709     if (pDexFile == NULL)
 710         goto bail;      /* alloc failure */
 711     memset(pDexFile, 0, sizeof(DexFile));
 712
 713     /*
 714      * Peel off the optimized header.
 715      */
 716     if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) {
 717         magic = data;
 718         if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) {
 719             LOGE("bad opt version (0x%02x %02x %02x %02x)\n",
 720                  magic[4], magic[5], magic[6], magic[7]);
 721             goto bail;
 722         }
 723
 724         pDexFile->pOptHeader = (const DexOptHeader*) data;
 725         LOGV("Good opt header, DEX offset is %d, flags=0x%02x\n",
 726             pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags);
 727
 728         /* locate some auxillary data tables */
 729         if (!parseAuxData(data, pDexFile))
 730             goto bail;
 731
 732         /* ignore the opt header and appended data from here on out */
 733         data += pDexFile->pOptHeader->dexOffset;
 734         length -= pDexFile->pOptHeader->dexOffset;
 735         if (pDexFile->pOptHeader->dexLength > length) {
 736             LOGE("File truncated? stored len=%d, rem len=%d\n",
 737                 pDexFile->pOptHeader->dexLength, (int) length);
 738             goto bail;
 739         }
 740         length = pDexFile->pOptHeader->dexLength;
 741     }
 742
 743     dexFileSetupBasicPointers(pDexFile, data);
 744     pHeader = pDexFile->pHeader;
 745
 746     magic = pHeader->magic;
 747     if (memcmp(magic, DEX_MAGIC, 4) != 0) {
 748         /* not expected */
 749         LOGE("bad magic number (0x%02x %02x %02x %02x)\n",
 750              magic[0], magic[1], magic[2], magic[3]);
 751         goto bail;
 752     }
 753     if (memcmp(magic+4, DEX_MAGIC_VERS, 4) != 0) {
 754         LOGE("bad dex version (0x%02x %02x %02x %02x)\n",
 755              magic[4], magic[5], magic[6], magic[7]);
 756         goto bail;
 757     }
 758
 759     /*
 760      * Verify the checksum(s).  This is reasonably quick, but does require
 761      * touching every byte in the DEX file.  The base checksum changes after
 762      * byte-swapping and DEX optimization.
 763      */
 764     if (flags & kDexParseVerifyChecksum) {
 765         u4 adler = dexComputeChecksum(pHeader);
 766         if (adler != pHeader->checksum) {
 767             LOGE("ERROR: bad checksum (%08x vs %08x)\n",
 768                 adler, pHeader->checksum);
 769             if (!(flags & kDexParseContinueOnError))
 770                 goto bail;
 771         } else {
 772             LOGV("+++ adler32 checksum (%08x) verified\n", adler);
 773         }
 774
 775         const DexOptHeader* pOptHeader = pDexFile->pOptHeader;
 776         if (pOptHeader != NULL) {
 777             adler = dexComputeOptChecksum(pOptHeader);
 778             if (adler != pOptHeader->checksum) {
 779                 LOGE("ERROR: bad opt checksum (%08x vs %08x)\n",
 780                     adler, pOptHeader->checksum);
 781                 if (!(flags & kDexParseContinueOnError))
 782                     goto bail;
 783             } else {
 784                 LOGV("+++ adler32 opt checksum (%08x) verified\n", adler);
 785             }
 786         }
 787     }
 788
 789     /*
 790      * Verify the SHA-1 digest.  (Normally we don't want to do this --
 791      * the digest is used to uniquely identify the original DEX file, and
 792      * can't be computed for verification after the DEX is byte-swapped
 793      * and optimized.)
 794      */
 795     if (kVerifySignature) {
 796         unsigned char sha1Digest[kSHA1DigestLen];
 797         const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) +
 798                             kSHA1DigestLen;
 799
 800         dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest);
 801         if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) {
 802             char tmpBuf1[kSHA1DigestOutputLen];
 803             char tmpBuf2[kSHA1DigestOutputLen];
 804             LOGE("ERROR: bad SHA1 digest (%s vs %s)\n",
 805                 dexSHA1DigestToStr(sha1Digest, tmpBuf1),
 806                 dexSHA1DigestToStr(pHeader->signature, tmpBuf2));
 807             if (!(flags & kDexParseContinueOnError))
 808                 goto bail;
 809         } else {
 810             LOGV("+++ sha1 digest verified\n");
 811         }
 812     }
 813
 814     if (pHeader->fileSize != length) {
 815         LOGE("ERROR: stored file size (%d) != expected (%d)\n",
 816             (int) pHeader->fileSize, (int) length);
 817         if (!(flags & kDexParseContinueOnError))
 818             goto bail;
 819     }
 820
 821     if (pHeader->classDefsSize == 0) {
 822         LOGE("ERROR: DEX file has no classes in it, failing\n");
 823         goto bail;
 824     }
 825
 826     /*
 827      * Success!
 828      */
 829     result = 0;
 830
 831 bail:
 832     if (result != 0 && pDexFile != NULL) {
 833         dexFileFree(pDexFile);
 834         pDexFile = NULL;
 835     }
 836     return pDexFile;
 837 }
 838
 839 /*
 840  * Free up the DexFile and any associated data structures.
 841  *
 842  * Note we may be called with a partially-initialized DexFile.
 843  */
 844 void dexFileFree(DexFile* pDexFile)
 845 {
 846     if (pDexFile == NULL)
 847         return;
 848
 849     free(pDexFile);
 850 }
 851
 852 /*
 853  * Look up a class definition entry by descriptor.
 854  *
 855  * "descriptor" should look like "Landroid/debug/Stuff;".
 856  */
 857 const DexClassDef* dexFindClass(const DexFile* pDexFile,
 858     const char* descriptor)
 859 {
 860     const DexClassLookup* pLookup = pDexFile->pClassLookup;
 861     u4 hash;
 862     int idx, mask;
 863
 864     hash = classDescriptorHash(descriptor);
 865     mask = pLookup->numEntries - 1;
 866     idx = hash & mask;
 867
 868     /*
 869      * Search until we find a matching entry or an empty slot.
 870      */
 871     while (true) {
 872         int offset;
 873
 874         offset = pLookup->table[idx].classDescriptorOffset;
 875         if (offset == 0)
 876             return NULL;
 877
 878         if (pLookup->table[idx].classDescriptorHash == hash) {
 879             const char* str;
 880
 881             str = (const char*) (pDexFile->baseAddr + offset);
 882             if (strcmp(str, descriptor) == 0) {
 883                 return (const DexClassDef*)
 884                     (pDexFile->baseAddr + pLookup->table[idx].classDefOffset);
 885             }
 886         }
 887
 888         idx = (idx + 1) & mask;
 889     }
 890 }
 891
 892
 893 /*
 894  * Compute the DEX file checksum for a memory-mapped DEX file.
 895  */
 896 u4 dexComputeChecksum(const DexHeader* pHeader)
 897 {
 898     const u1* start = (const u1*) pHeader;
 899
 900     uLong adler = adler32(0L, Z_NULL, 0);
 901     const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum);
 902
 903     return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum);
 904 }
 905
 906 /*
 907  * Compute the checksum on the data appended to the DEX file by dexopt.
 908  */
 909 static u4 dexComputeOptChecksum(const DexOptHeader* pOptHeader)
 910 {
 911     const u1* start = (const u1*) pOptHeader + pOptHeader->depsOffset;
 912     const u1* end = (const u1*) pOptHeader +
 913         pOptHeader->auxOffset + pOptHeader->auxLength;
 914
 915     uLong adler = adler32(0L, Z_NULL, 0);
 916
 917     return (u4) adler32(adler, start, end - start);
 918 }
 919
 920
 921 /*
 922  * Compute the size, in bytes, of a DexCode.
 923  */
 924 size_t dexGetDexCodeSize(const DexCode* pCode)
 925 {
 926     /*
 927      * The catch handler data is the last entry.  It has a variable number
 928      * of variable-size pieces, so we need to create an iterator.
 929      */
 930     u4 handlersSize;
 931     u4 offset;
 932     u4 ui;
 933
 934     if (pCode->triesSize != 0) {
 935         handlersSize = dexGetHandlersSize(pCode);
 936         offset = dexGetFirstHandlerOffset(pCode);
 937     } else {
 938         handlersSize = 0;
 939         offset = 0;
 940     }
 941
 942     for (ui = 0; ui < handlersSize; ui++) {
 943         DexCatchIterator iterator;
 944         dexCatchIteratorInit(&iterator, pCode, offset);
 945         offset = dexCatchIteratorGetEndOffset(&iterator, pCode);
 946     }
 947
 948     const u1* handlerData = dexGetCatchHandlerData(pCode);
 949
 950     //LOGD("+++ pCode=%p handlerData=%p last offset=%d\n",
 951     //    pCode, handlerData, offset);
 952
 953     /* return the size of the catch handler + everything before it */
 954     return (handlerData - (u1*) pCode) + offset;
 955 }
 956
 957
 958 /*
 959  * ===========================================================================
 960  *      Debug info
 961  * ===========================================================================
 962  */
 963
 964 /*
 965  * Decode the arguments in a method signature, which looks something
 966  * like "(ID[Ljava/lang/String;)V".
 967  *
 968  * Returns the type signature letter for the next argument, or ')' if
 969  * there are no more args.  Advances "pSig" to point to the character
 970  * after the one returned.
 971  */
 972 static char decodeSignature(const char** pSig)
 973 {
 974     const char* sig = *pSig;
 975
 976     if (*sig == '(')
 977         sig++;
 978
 979     if (*sig == 'L') {
 980         /* object ref */
 981         while (*++sig != ';')
 982             ;
 983         *pSig = sig+1;
 984         return 'L';
 985     }
 986     if (*sig == '[') {
 987         /* array; advance past array type */
 988         while (*++sig == '[')
 989             ;
 990         if (*sig == 'L') {
 991             while (*++sig != ';')
 992                 ;
 993         }
 994         *pSig = sig+1;
 995         return '[';
 996     }
 997     if (*sig == '\0')
 998         return *sig;        /* don't advance further */
 999
1000     *pSig = sig+1;
1001     return *sig;
1002 }
1003
1004 /*
1005  * returns the length of a type string, given the start of the
1006  * type string. Used for the case where the debug info format
1007  * references types that are inside a method type signature.
1008  */
1009 static int typeLength (const char *type) {
1010     // Assumes any leading '(' has already been gobbled
1011     const char *end = type;
1012     decodeSignature(&end);
1013     return end - type;
1014 }
1015
1016 /*
1017  * Reads a string index as encoded for the debug info format,
1018  * returning a string pointer or NULL as appropriate.
1019  */
1020 static const char* readStringIdx(const DexFile* pDexFile,
1021         const u1** pStream) {
1022     u4 stringIdx = readUnsignedLeb128(pStream);
1023
1024     // Remember, encoded string indicies have 1 added to them.
1025     if (stringIdx == 0) {
1026         return NULL;
1027     } else {
1028         return dexStringById(pDexFile, stringIdx - 1);
1029     }
1030 }
1031
1032 /*
1033  * Reads a type index as encoded for the debug info format, returning
1034  * a string pointer for its descriptor or NULL as appropriate.
1035  */
1036 static const char* readTypeIdx(const DexFile* pDexFile,
1037         const u1** pStream) {
1038     u4 typeIdx = readUnsignedLeb128(pStream);
1039
1040     // Remember, encoded type indicies have 1 added to them.
1041     if (typeIdx == 0) {
1042         return NULL;
1043     } else {
1044         return dexStringByTypeIdx(pDexFile, typeIdx - 1);
1045     }
1046 }
1047
1048 /* access_flag value indicating that a method is static */
1049 #define ACC_STATIC              0x0008
1050
1051 typedef struct LocalInfo {
1052     const char *name;
1053     const char *descriptor;
1054     const char *signature;
1055     u2 startAddress;
1056     bool live;
1057 } LocalInfo;
1058
1059 static void emitLocalCbIfLive (void *cnxt, int reg, u4 endAddress,
1060         LocalInfo *localInReg, DexDebugNewLocalCb localCb)
1061 {
1062     if (localCb != NULL && localInReg[reg].live) {
1063         localCb(cnxt, reg, localInReg[reg].startAddress, endAddress,
1064                 localInReg[reg].name,
1065                 localInReg[reg].descriptor,
1066                 localInReg[reg].signature == NULL
1067                 ? "" : localInReg[reg].signature );
1068     }
1069 }
1070
1071 // TODO optimize localCb == NULL case
1072 void dexDecodeDebugInfo(
1073             const DexFile* pDexFile,
1074             const DexCode* pCode,
1075             const char* classDescriptor,
1076             u4 protoIdx,
1077             u4 accessFlags,
1078             DexDebugNewPositionCb posCb, DexDebugNewLocalCb localCb,
1079             void* cnxt)
1080 {
1081     const u1 *stream = dexGetDebugInfoStream(pDexFile, pCode);
1082     u4 line;
1083     u4 parametersSize;
1084     u4 address = 0;
1085     LocalInfo localInReg[pCode->registersSize];
1086     u4 insnsSize = pCode->insnsSize;
1087     DexProto proto = { pDexFile, protoIdx };
1088
1089     memset(localInReg, 0, sizeof(LocalInfo) * pCode->registersSize);
1090
1091     if (stream == NULL) {
1092         goto end;
1093     }
1094
1095     line = readUnsignedLeb128(&stream);
1096     parametersSize = readUnsignedLeb128(&stream);
1097
1098     u2 argReg = pCode->registersSize - pCode->insSize;
1099
1100     if ((accessFlags & ACC_STATIC) == 0) {
1101         /*
1102          * The code is an instance method, which means that there is
1103          * an initial this parameter. Also, the proto list should
1104          * contain exactly one fewer argument word than the insSize
1105          * indicates.
1106          */
1107         assert(pCode->insSize == (dexProtoComputeArgsSize(&proto) + 1));
1108         localInReg[argReg].name = "this";
1109         localInReg[argReg].descriptor = classDescriptor;
1110         localInReg[argReg].startAddress = 0;
1111         localInReg[argReg].live = true;
1112         argReg++;
1113     } else {
1114         assert(pCode->insSize == dexProtoComputeArgsSize(&proto));
1115     }
1116
1117     DexParameterIterator iterator;
1118     dexParameterIteratorInit(&iterator, &proto);
1119
1120     while (parametersSize-- != 0) {
1121         const char* descriptor = dexParameterIteratorNextDescriptor(&iterator);
1122         const char *name;
1123         int reg;
1124
1125         if ((argReg >= pCode->registersSize) || (descriptor == NULL)) {
1126             goto invalid_stream;
1127         }
1128
1129         name = readStringIdx(pDexFile, &stream);
1130         reg = argReg;
1131
1132         switch (descriptor[0]) {
1133             case 'D':
1134             case 'J':
1135                 argReg += 2;
1136                 break;
1137             default:
1138                 argReg += 1;
1139                 break;
1140         }
1141
1142         if (name != NULL) {
1143             localInReg[reg].name = name;
1144             localInReg[reg].descriptor = descriptor;
1145             localInReg[reg].signature = NULL;
1146             localInReg[reg].startAddress = address;
1147             localInReg[reg].live = true;
1148         }
1149     }
1150
1151     for (;;)  {
1152         u1 opcode = *stream++;
1153         u2 reg;
1154
1155         switch (opcode) {
1156             case DBG_END_SEQUENCE:
1157                 goto end;
1158
1159             case DBG_ADVANCE_PC:
1160                 address += readUnsignedLeb128(&stream);
1161                 break;
1162
1163             case DBG_ADVANCE_LINE:
1164                 line += readSignedLeb128(&stream);
1165                 break;
1166
1167             case DBG_START_LOCAL:
1168             case DBG_START_LOCAL_EXTENDED:
1169                 reg = readUnsignedLeb128(&stream);
1170                 if (reg > pCode->registersSize) goto invalid_stream;
1171
1172                 // Emit what was previously there, if anything
1173                 emitLocalCbIfLive (cnxt, reg, address,
1174                     localInReg, localCb);
1175
1176                 localInReg[reg].name = readStringIdx(pDexFile, &stream);
1177                 localInReg[reg].descriptor = readTypeIdx(pDexFile, &stream);
1178                 if (opcode == DBG_START_LOCAL_EXTENDED) {
1179                     localInReg[reg].signature
1180                         = readStringIdx(pDexFile, &stream);
1181                 } else {
1182                     localInReg[reg].signature = NULL;
1183                 }
1184                 localInReg[reg].startAddress = address;
1185                 localInReg[reg].live = true;
1186                 break;
1187
1188             case DBG_END_LOCAL:
1189                 reg = readUnsignedLeb128(&stream);
1190                 if (reg > pCode->registersSize) goto invalid_stream;
1191
1192                 emitLocalCbIfLive (cnxt, reg, address, localInReg, localCb);
1193                 localInReg[reg].live = false;
1194                 break;
1195
1196             case DBG_RESTART_LOCAL:
1197                 reg = readUnsignedLeb128(&stream);
1198                 if (reg > pCode->registersSize) goto invalid_stream;
1199
1200                 if (localInReg[reg].name == NULL
1201                         || localInReg[reg].descriptor == NULL) {
1202                     goto invalid_stream;
1203                 }
1204
1205                 /*
1206                  * If the register is live, the "restart" is superfluous,
1207                  * and we don't want to mess with the existing start address.
1208                  */
1209                 if (!localInReg[reg].live) {
1210                     localInReg[reg].startAddress = address;
1211                     localInReg[reg].live = true;
1212                 }
1213                 break;
1214
1215             case DBG_SET_PROLOGUE_END:
1216             case DBG_SET_EPILOGUE_BEGIN:
1217             case DBG_SET_FILE:
1218                 break;
1219
1220             default: {
1221                 int adjopcode = opcode - DBG_FIRST_SPECIAL;
1222
1223                 address += adjopcode / DBG_LINE_RANGE;
1224                 line += DBG_LINE_BASE + (adjopcode % DBG_LINE_RANGE);
1225
1226                 if (posCb != NULL) {
1227                     int done;
1228                     done = posCb(cnxt, address, line);
1229
1230                     if (done) {
1231                         // early exit
1232                         goto end;
1233                     }
1234                 }
1235                 break;
1236             }
1237         }
1238     }
1239
1240 end:
1241     {
1242         int reg;
1243         for (reg = 0; reg < pCode->registersSize; reg++) {
1244             emitLocalCbIfLive (cnxt, reg, insnsSize, localInReg, localCb);
1245         }
1246     }
1247     return;
1248
1249 invalid_stream:
1250     IF_LOGE() {
1251         char* methodDescriptor = dexProtoCopyMethodDescriptor(&proto);
1252         LOGE("Invalid debug info stream. class %s; proto %s",
1253                 classDescriptor, methodDescriptor);
1254         free(methodDescriptor);
1255     }
1256 }
1257