2 * Copyright (C) 2008 The Android Open Source Project
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * Read-only access to Zip archives, with minimal heap allocation.
20 #include "ZipArchive.h"
30 #include <JNIHelp.h> // TEMP_FAILURE_RETRY may or may not be in unistd
36 #define kEOCDSignature 0x06054b50
38 #define kEOCDNumEntries 8 // offset to #of entries in file
39 #define kEOCDSize 12 // size of the central directory
40 #define kEOCDFileOffset 16 // offset to central directory
42 #define kMaxCommentLen 65535 // longest possible in ushort
43 #define kMaxEOCDSearch (kMaxCommentLen + kEOCDLen)
45 #define kLFHSignature 0x04034b50
46 #define kLFHLen 30 // excluding variable-len fields
47 #define kLFHNameLen 26 // offset to filename length
48 #define kLFHExtraLen 28 // offset to extra length
50 #define kCDESignature 0x02014b50
51 #define kCDELen 46 // excluding variable-len fields
52 #define kCDEMethod 10 // offset to compression method
53 #define kCDEModWhen 12 // offset to modification timestamp
54 #define kCDECRC 16 // offset to entry CRC
55 #define kCDECompLen 20 // offset to compressed length
56 #define kCDEUncompLen 24 // offset to uncompressed length
57 #define kCDENameLen 28 // offset to filename length
58 #define kCDEExtraLen 30 // offset to extra length
59 #define kCDECommentLen 32 // offset to comment length
60 #define kCDELocalOffset 42 // offset to local hdr
63 * The values we return for ZipEntry use 0 as an invalid value, so we
64 * want to adjust the hash table index by a fixed amount. Using a large
65 * value helps insure that people don't mix & match arguments, e.g. with
68 #define kZipEntryAdj 10000
71 * Convert a ZipEntry to a hash table index, verifying that it's in a
74 static int entryToIndex(const ZipArchive* pArchive, const ZipEntry entry)
76 long ent = ((long) entry) - kZipEntryAdj;
77 if (ent < 0 || ent >= pArchive->mHashTableSize ||
78 pArchive->mHashTable[ent].name == NULL)
80 LOGW("Zip: invalid ZipEntry %p (%ld)\n", entry, ent);
87 * Simple string hash function for non-null-terminated strings.
89 static unsigned int computeHash(const char* str, int len)
91 unsigned int hash = 0;
94 hash = hash * 31 + *str++;
100 * Add a new entry to the hash table.
102 static void addToHash(ZipArchive* pArchive, const char* str, int strLen,
105 const int hashTableSize = pArchive->mHashTableSize;
106 int ent = hash & (hashTableSize - 1);
109 * We over-allocated the table, so we're guaranteed to find an empty slot.
111 while (pArchive->mHashTable[ent].name != NULL)
112 ent = (ent + 1) & (hashTableSize-1);
114 pArchive->mHashTable[ent].name = str;
115 pArchive->mHashTable[ent].nameLen = strLen;
119 * Get 2 little-endian bytes.
121 static u2 get2LE(unsigned char const* pSrc)
123 return pSrc[0] | (pSrc[1] << 8);
127 * Get 4 little-endian bytes.
129 static u4 get4LE(unsigned char const* pSrc)
134 result |= pSrc[1] << 8;
135 result |= pSrc[2] << 16;
136 result |= pSrc[3] << 24;
142 * Find the zip Central Directory and memory-map it.
144 * On success, returns 0 after populating fields from the EOCD area:
149 static int mapCentralDirectory(int fd, const char* debugFileName,
150 ZipArchive* pArchive)
156 * Get and test file length.
158 off_t fileLength = lseek(fd, 0, SEEK_END);
159 if (fileLength < kEOCDLen) {
160 LOGV("Zip: length %ld is too small to be zip\n", (long) fileLength);
165 * Perform the traditional EOCD snipe hunt.
167 * We're searching for the End of Central Directory magic number,
168 * which appears at the start of the EOCD block. It's followed by
169 * 18 bytes of EOCD stuff and up to 64KB of archive comment. We
170 * need to read the last part of the file into a buffer, dig through
171 * it to find the magic number, parse some values out, and use those
172 * to determine the extent of the CD.
174 * We start by pulling in the last part of the file.
176 size_t readAmount = kMaxEOCDSearch;
177 if (readAmount > (size_t) fileLength)
178 readAmount = fileLength;
179 off_t searchStart = fileLength - readAmount;
181 scanBuf = (u1*) malloc(readAmount);
182 if (lseek(fd, searchStart, SEEK_SET) != searchStart) {
183 LOGW("Zip: seek %ld failed: %s\n", (long) searchStart, strerror(errno));
186 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, scanBuf, readAmount));
187 if (actual != (ssize_t) readAmount) {
188 LOGW("Zip: read %zd failed: %s\n", readAmount, strerror(errno));
193 * Scan backward for the EOCD magic. In an archive without a trailing
194 * comment, we'll find it on the first try. (We may want to consider
195 * doing an initial minimal read; if we don't find it, retry with a
196 * second read as above.)
199 for (i = readAmount - kEOCDLen; i >= 0; i--) {
200 if (scanBuf[i] == 0x50 && get4LE(&scanBuf[i]) == kEOCDSignature) {
201 LOGV("+++ Found EOCD at buf+%d\n", i);
206 LOGD("Zip: EOCD not found, %s is not zip\n", debugFileName);
210 off_t eocdOffset = searchStart + i;
211 const u1* eocdPtr = scanBuf + i;
213 assert(eocdOffset < fileLength);
216 * Grab the CD offset and size, and the number of entries in the
217 * archive. Verify that they look reasonable.
219 u4 numEntries = get2LE(eocdPtr + kEOCDNumEntries);
220 u4 dirSize = get4LE(eocdPtr + kEOCDSize);
221 u4 dirOffset = get4LE(eocdPtr + kEOCDFileOffset);
223 if ((long long) dirOffset + (long long) dirSize > (long long) eocdOffset) {
224 LOGW("Zip: bad offsets (dir %ld, size %u, eocd %ld)\n",
225 (long) dirOffset, dirSize, (long) eocdOffset);
228 if (numEntries == 0) {
229 LOGW("Zip: empty archive?\n");
233 LOGV("+++ numEntries=%d dirSize=%d dirOffset=%d\n",
234 numEntries, dirSize, dirOffset);
237 * It all looks good. Create a mapping for the CD, and set the fields
240 if (sysMapFileSegmentInShmem(fd, dirOffset, dirSize,
241 &pArchive->mDirectoryMap) != 0)
243 LOGW("Zip: cd map failed\n");
247 pArchive->mNumEntries = numEntries;
248 pArchive->mDirectoryOffset = dirOffset;
258 * Parses the Zip archive's Central Directory. Allocates and populates the
261 * Returns 0 on success.
263 static int parseZipArchive(ZipArchive* pArchive)
266 const u1* cdPtr = (const u1*)pArchive->mDirectoryMap.addr;
267 size_t cdLength = pArchive->mDirectoryMap.length;
268 int numEntries = pArchive->mNumEntries;
271 * Create hash table. We have a minimum 75% load factor, possibly as
272 * low as 50% after we round off to a power of 2. There must be at
273 * least one unused entry to avoid an infinite loop during creation.
275 pArchive->mHashTableSize = dexRoundUpPower2(1 + (numEntries * 4) / 3);
276 pArchive->mHashTable = (ZipHashEntry*)
277 calloc(pArchive->mHashTableSize, sizeof(ZipHashEntry));
280 * Walk through the central directory, adding entries to the hash
281 * table and verifying values.
283 const u1* ptr = cdPtr;
285 for (i = 0; i < numEntries; i++) {
286 if (get4LE(ptr) != kCDESignature) {
287 LOGW("Zip: missed a central dir sig (at %d)\n", i);
290 if (ptr + kCDELen > cdPtr + cdLength) {
291 LOGW("Zip: ran off the end (at %d)\n", i);
295 long localHdrOffset = (long) get4LE(ptr + kCDELocalOffset);
296 if (localHdrOffset >= pArchive->mDirectoryOffset) {
297 LOGW("Zip: bad LFH offset %ld at entry %d\n", localHdrOffset, i);
301 unsigned int fileNameLen, extraLen, commentLen, hash;
302 fileNameLen = get2LE(ptr + kCDENameLen);
303 extraLen = get2LE(ptr + kCDEExtraLen);
304 commentLen = get2LE(ptr + kCDECommentLen);
306 /* add the CDE filename to the hash table */
307 hash = computeHash((const char*)ptr + kCDELen, fileNameLen);
308 addToHash(pArchive, (const char*)ptr + kCDELen, fileNameLen, hash);
310 ptr += kCDELen + fileNameLen + extraLen + commentLen;
311 if ((size_t)(ptr - cdPtr) > cdLength) {
312 LOGW("Zip: bad CD advance (%d vs %zd) at entry %d\n",
313 (int) (ptr - cdPtr), cdLength, i);
317 LOGV("+++ zip good scan %d entries\n", numEntries);
326 * Open the specified file read-only. We examine the contents and verify
327 * that it appears to be a valid zip file.
329 * This will be called on non-Zip files, especially during VM startup, so
330 * we don't want to be too noisy about certain types of failure. (Do
331 * we want a "quiet" flag?)
333 * On success, we fill out the contents of "pArchive" and return 0. On
334 * failure we return the errno value.
336 int dexZipOpenArchive(const char* fileName, ZipArchive* pArchive)
340 LOGV("Opening as zip '%s' %p\n", fileName, pArchive);
342 memset(pArchive, 0, sizeof(ZipArchive));
344 fd = open(fileName, O_RDONLY, 0);
346 err = errno ? errno : -1;
347 LOGV("Unable to open '%s': %s\n", fileName, strerror(err));
351 return dexZipPrepArchive(fd, fileName, pArchive);
355 * Prepare to access a ZipArchive through an open file descriptor.
357 * On success, we fill out the contents of "pArchive" and return 0.
359 int dexZipPrepArchive(int fd, const char* debugFileName, ZipArchive* pArchive)
363 memset(pArchive, 0, sizeof(*pArchive));
366 if (mapCentralDirectory(fd, debugFileName, pArchive) != 0)
369 if (parseZipArchive(pArchive) != 0) {
370 LOGV("Zip: parsing '%s' failed\n", debugFileName);
379 dexZipCloseArchive(pArchive);
385 * Close a ZipArchive, closing the file and freeing the contents.
387 * NOTE: the ZipArchive may not have been fully created.
389 void dexZipCloseArchive(ZipArchive* pArchive)
391 LOGV("Closing archive %p\n", pArchive);
393 if (pArchive->mFd >= 0)
394 close(pArchive->mFd);
396 sysReleaseShmem(&pArchive->mDirectoryMap);
398 free(pArchive->mHashTable);
400 /* ensure nobody tries to use the ZipArchive after it's closed */
401 pArchive->mDirectoryOffset = -1;
403 pArchive->mNumEntries = -1;
404 pArchive->mHashTableSize = -1;
405 pArchive->mHashTable = NULL;
410 * Find a matching entry.
412 * Returns 0 if not found.
414 ZipEntry dexZipFindEntry(const ZipArchive* pArchive, const char* entryName)
416 int nameLen = strlen(entryName);
417 unsigned int hash = computeHash(entryName, nameLen);
418 const int hashTableSize = pArchive->mHashTableSize;
419 int ent = hash & (hashTableSize-1);
421 while (pArchive->mHashTable[ent].name != NULL) {
422 if (pArchive->mHashTable[ent].nameLen == nameLen &&
423 memcmp(pArchive->mHashTable[ent].name, entryName, nameLen) == 0)
426 return (ZipEntry)(long)(ent + kZipEntryAdj);
429 ent = (ent + 1) & (hashTableSize-1);
437 * Find the Nth entry.
439 * This currently involves walking through the sparse hash table, counting
440 * non-empty entries. If we need to speed this up we can either allocate
441 * a parallel lookup table or (perhaps better) provide an iterator interface.
443 ZipEntry findEntryByIndex(ZipArchive* pArchive, int idx)
445 if (idx < 0 || idx >= pArchive->mNumEntries) {
446 LOGW("Invalid index %d\n", idx);
451 for (ent = 0; ent < pArchive->mHashTableSize; ent++) {
452 if (pArchive->mHashTable[ent].name != NULL) {
454 return (ZipEntry) (ent + kZipEntryAdj);
463 * Get the useful fields from the zip entry.
465 * Returns non-zero if the contents of the fields (particularly the data
466 * offset) appear to be bogus.
468 int dexZipGetEntryInfo(const ZipArchive* pArchive, ZipEntry entry,
469 int* pMethod, size_t* pUncompLen, size_t* pCompLen, off_t* pOffset,
470 long* pModWhen, long* pCrc32)
472 int ent = entryToIndex(pArchive, entry);
477 * Recover the start of the central directory entry from the filename
478 * pointer. The filename is the first entry past the fixed-size data,
479 * so we can just subtract back from that.
481 const unsigned char* basePtr = (const unsigned char*)
482 pArchive->mDirectoryMap.addr;
483 const unsigned char* ptr = (const unsigned char*)
484 pArchive->mHashTable[ent].name;
485 off_t cdOffset = pArchive->mDirectoryOffset;
489 int method = get2LE(ptr + kCDEMethod);
493 if (pModWhen != NULL)
494 *pModWhen = get4LE(ptr + kCDEModWhen);
496 *pCrc32 = get4LE(ptr + kCDECRC);
498 size_t compLen = get4LE(ptr + kCDECompLen);
499 if (pCompLen != NULL)
501 size_t uncompLen = get4LE(ptr + kCDEUncompLen);
502 if (pUncompLen != NULL)
503 *pUncompLen = uncompLen;
506 * If requested, determine the offset of the start of the data. All we
507 * have is the offset to the Local File Header, which is variable size,
508 * so we have to read the contents of the struct to figure out where
509 * the actual data starts.
511 * We also need to make sure that the lengths are not so large that
512 * somebody trying to map the compressed or uncompressed data runs
513 * off the end of the mapped region.
515 * Note we don't verify compLen/uncompLen if they don't request the
516 * dataOffset, because dataOffset is expensive to determine. However,
517 * if they don't have the file offset, they're not likely to be doing
518 * anything with the contents.
520 if (pOffset != NULL) {
521 long localHdrOffset = (long) get4LE(ptr + kCDELocalOffset);
522 if (localHdrOffset + kLFHLen >= cdOffset) {
523 LOGW("Zip: bad local hdr offset in zip\n");
528 if (lseek(pArchive->mFd, localHdrOffset, SEEK_SET) != localHdrOffset) {
529 LOGW("Zip: failed seeking to lfh at offset %ld\n", localHdrOffset);
533 TEMP_FAILURE_RETRY(read(pArchive->mFd, lfhBuf, sizeof(lfhBuf)));
534 if (actual != sizeof(lfhBuf)) {
535 LOGW("Zip: failed reading lfh from offset %ld\n", localHdrOffset);
539 if (get4LE(lfhBuf) != kLFHSignature) {
540 LOGW("Zip: didn't find signature at start of lfh, offset=%ld\n",
545 off_t dataOffset = localHdrOffset + kLFHLen
546 + get2LE(lfhBuf + kLFHNameLen) + get2LE(lfhBuf + kLFHExtraLen);
547 if (dataOffset >= cdOffset) {
548 LOGW("Zip: bad data offset %ld in zip\n", (long) dataOffset);
553 if ((off_t)(dataOffset + compLen) > cdOffset) {
554 LOGW("Zip: bad compressed length in zip (%ld + %zd > %ld)\n",
555 (long) dataOffset, compLen, (long) cdOffset);
559 if (method == kCompressStored &&
560 (off_t)(dataOffset + uncompLen) > cdOffset)
562 LOGW("Zip: bad uncompressed length in zip (%ld + %zd > %ld)\n",
563 (long) dataOffset, uncompLen, (long) cdOffset);
567 *pOffset = dataOffset;
573 * Uncompress "deflate" data from the archive's file to an open file
576 static int inflateToFile(int inFd, int outFd, size_t uncompLen, size_t compLen)
579 const size_t kBufSize = 32768;
580 unsigned char* readBuf = (unsigned char*) malloc(kBufSize);
581 unsigned char* writeBuf = (unsigned char*) malloc(kBufSize);
585 if (readBuf == NULL || writeBuf == NULL)
589 * Initialize the zlib stream struct.
591 memset(&zstream, 0, sizeof(zstream));
592 zstream.zalloc = Z_NULL;
593 zstream.zfree = Z_NULL;
594 zstream.opaque = Z_NULL;
595 zstream.next_in = NULL;
596 zstream.avail_in = 0;
597 zstream.next_out = (Bytef*) writeBuf;
598 zstream.avail_out = kBufSize;
599 zstream.data_type = Z_UNKNOWN;
602 * Use the undocumented "negative window bits" feature to tell zlib
603 * that there's no zlib header waiting for it.
605 zerr = inflateInit2(&zstream, -MAX_WBITS);
607 if (zerr == Z_VERSION_ERROR) {
608 LOGE("Installed zlib is not compatible with linked version (%s)\n",
611 LOGW("Call to inflateInit2 failed (zerr=%d)\n", zerr);
617 * Loop while we have more to do.
620 /* read as much as we can */
621 if (zstream.avail_in == 0) {
622 size_t getSize = (compLen > kBufSize) ? kBufSize : compLen;
624 ssize_t actual = TEMP_FAILURE_RETRY(read(inFd, readBuf, getSize));
625 if (actual != (ssize_t) getSize) {
626 LOGW("Zip: inflate read failed (%d vs %zd)\n",
627 (int)actual, getSize);
633 zstream.next_in = readBuf;
634 zstream.avail_in = getSize;
637 /* uncompress the data */
638 zerr = inflate(&zstream, Z_NO_FLUSH);
639 if (zerr != Z_OK && zerr != Z_STREAM_END) {
640 LOGW("Zip: inflate zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)\n",
641 zerr, zstream.next_in, zstream.avail_in,
642 zstream.next_out, zstream.avail_out);
646 /* write when we're full or when we're done */
647 if (zstream.avail_out == 0 ||
648 (zerr == Z_STREAM_END && zstream.avail_out != kBufSize))
650 size_t writeSize = zstream.next_out - writeBuf;
651 if (sysWriteFully(outFd, writeBuf, writeSize, "Zip inflate") != 0)
654 zstream.next_out = writeBuf;
655 zstream.avail_out = kBufSize;
657 } while (zerr == Z_OK);
659 assert(zerr == Z_STREAM_END); /* other errors should've been caught */
662 if (zstream.total_out != uncompLen) {
663 LOGW("Zip: size mismatch on inflated file (%ld vs %zd)\n",
664 zstream.total_out, uncompLen);
671 inflateEnd(&zstream); /* free up any allocated structures */
680 * Copy bytes from input to output.
682 static int copyFileToFile(int inFd, int outFd, size_t uncompLen)
684 const size_t kBufSize = 32768;
685 unsigned char buf[kBufSize];
687 while (uncompLen != 0) {
688 size_t getSize = (uncompLen > kBufSize) ? kBufSize : uncompLen;
690 ssize_t actual = TEMP_FAILURE_RETRY(read(inFd, buf, getSize));
691 if (actual != (ssize_t) getSize) {
692 LOGW("Zip: copy read failed (%d vs %zd)\n", (int)actual, getSize);
696 if (sysWriteFully(outFd, buf, getSize, "Zip copy") != 0)
699 uncompLen -= getSize;
706 * Uncompress an entry, in its entirety, to an open file descriptor.
708 * TODO: this doesn't verify the data's CRC, but probably should (especially
709 * for uncompressed data).
711 int dexZipExtractEntryToFile(const ZipArchive* pArchive,
712 const ZipEntry entry, int fd)
715 int ent = entryToIndex(pArchive, entry);
717 LOGW("Zip: extract can't find entry %p\n", entry);
722 size_t uncompLen, compLen;
725 if (dexZipGetEntryInfo(pArchive, entry, &method, &uncompLen, &compLen,
726 &dataOffset, NULL, NULL) != 0)
730 if (lseek(pArchive->mFd, dataOffset, SEEK_SET) != dataOffset) {
731 LOGW("Zip: lseek to data at %ld failed\n", (long) dataOffset);
735 if (method == kCompressStored) {
736 if (copyFileToFile(pArchive->mFd, fd, uncompLen) != 0)
739 if (inflateToFile(pArchive->mFd, fd, uncompLen, compLen) != 0)