2 * Copyright (C) 2008 The Android Open Source Project
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * Read-only access to Zip archives, with minimal heap allocation.
20 #include "ZipArchive.h"
30 #include <JNIHelp.h> // TEMP_FAILURE_RETRY may or may not be in unistd
39 #define kEOCDSignature 0x06054b50
41 #define kEOCDNumEntries 8 // offset to #of entries in file
42 #define kEOCDSize 12 // size of the central directory
43 #define kEOCDFileOffset 16 // offset to central directory
45 #define kMaxCommentLen 65535 // longest possible in ushort
46 #define kMaxEOCDSearch (kMaxCommentLen + kEOCDLen)
48 #define kLFHSignature 0x04034b50
49 #define kLFHLen 30 // excluding variable-len fields
50 #define kLFHNameLen 26 // offset to filename length
51 #define kLFHExtraLen 28 // offset to extra length
53 #define kCDESignature 0x02014b50
54 #define kCDELen 46 // excluding variable-len fields
55 #define kCDEMethod 10 // offset to compression method
56 #define kCDEModWhen 12 // offset to modification timestamp
57 #define kCDECRC 16 // offset to entry CRC
58 #define kCDECompLen 20 // offset to compressed length
59 #define kCDEUncompLen 24 // offset to uncompressed length
60 #define kCDENameLen 28 // offset to filename length
61 #define kCDEExtraLen 30 // offset to extra length
62 #define kCDECommentLen 32 // offset to comment length
63 #define kCDELocalOffset 42 // offset to local hdr
66 * The values we return for ZipEntry use 0 as an invalid value, so we
67 * want to adjust the hash table index by a fixed amount. Using a large
68 * value helps insure that people don't mix & match arguments, e.g. with
71 #define kZipEntryAdj 10000
74 * Convert a ZipEntry to a hash table index, verifying that it's in a
77 static int entryToIndex(const ZipArchive* pArchive, const ZipEntry entry)
79 long ent = ((long) entry) - kZipEntryAdj;
80 if (ent < 0 || ent >= pArchive->mHashTableSize ||
81 pArchive->mHashTable[ent].name == NULL)
83 ALOGW("Zip: invalid ZipEntry %p (%ld)", entry, ent);
90 * Simple string hash function for non-null-terminated strings.
92 static unsigned int computeHash(const char* str, int len)
94 unsigned int hash = 0;
97 hash = hash * 31 + *str++;
103 * Add a new entry to the hash table.
105 static void addToHash(ZipArchive* pArchive, const char* str, int strLen,
108 const int hashTableSize = pArchive->mHashTableSize;
109 int ent = hash & (hashTableSize - 1);
112 * We over-allocated the table, so we're guaranteed to find an empty slot.
114 while (pArchive->mHashTable[ent].name != NULL)
115 ent = (ent + 1) & (hashTableSize-1);
117 pArchive->mHashTable[ent].name = str;
118 pArchive->mHashTable[ent].nameLen = strLen;
122 * Get 2 little-endian bytes.
124 static u2 get2LE(unsigned char const* pSrc)
126 return pSrc[0] | (pSrc[1] << 8);
130 * Get 4 little-endian bytes.
132 static u4 get4LE(unsigned char const* pSrc)
137 result |= pSrc[1] << 8;
138 result |= pSrc[2] << 16;
139 result |= pSrc[3] << 24;
144 static int mapCentralDirectory0(int fd, const char* debugFileName,
145 ZipArchive* pArchive, off_t fileLength, size_t readAmount, u1* scanBuf)
147 off_t searchStart = fileLength - readAmount;
149 if (lseek(fd, searchStart, SEEK_SET) != searchStart) {
150 ALOGW("Zip: seek %ld failed: %s", (long) searchStart, strerror(errno));
153 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, scanBuf, readAmount));
154 if (actual != (ssize_t) readAmount) {
155 ALOGW("Zip: read %zd failed: %s", readAmount, strerror(errno));
160 * Scan backward for the EOCD magic. In an archive without a trailing
161 * comment, we'll find it on the first try. (We may want to consider
162 * doing an initial minimal read; if we don't find it, retry with a
163 * second read as above.)
166 for (i = readAmount - kEOCDLen; i >= 0; i--) {
167 if (scanBuf[i] == 0x50 && get4LE(&scanBuf[i]) == kEOCDSignature) {
168 ALOGV("+++ Found EOCD at buf+%d", i);
173 ALOGD("Zip: EOCD not found, %s is not zip", debugFileName);
177 off_t eocdOffset = searchStart + i;
178 const u1* eocdPtr = scanBuf + i;
180 assert(eocdOffset < fileLength);
183 * Grab the CD offset and size, and the number of entries in the
184 * archive. Verify that they look reasonable.
186 u4 numEntries = get2LE(eocdPtr + kEOCDNumEntries);
187 u4 dirSize = get4LE(eocdPtr + kEOCDSize);
188 u4 dirOffset = get4LE(eocdPtr + kEOCDFileOffset);
190 if ((long long) dirOffset + (long long) dirSize > (long long) eocdOffset) {
191 ALOGW("Zip: bad offsets (dir %ld, size %u, eocd %ld)",
192 (long) dirOffset, dirSize, (long) eocdOffset);
195 if (numEntries == 0) {
196 ALOGW("Zip: empty archive?");
200 ALOGV("+++ numEntries=%d dirSize=%d dirOffset=%d",
201 numEntries, dirSize, dirOffset);
204 * It all looks good. Create a mapping for the CD, and set the fields
207 if (sysMapFileSegmentInShmem(fd, dirOffset, dirSize,
208 &pArchive->mDirectoryMap) != 0)
210 ALOGW("Zip: cd map failed");
214 pArchive->mNumEntries = numEntries;
215 pArchive->mDirectoryOffset = dirOffset;
221 * Find the zip Central Directory and memory-map it.
223 * On success, returns 0 after populating fields from the EOCD area:
228 static int mapCentralDirectory(int fd, const char* debugFileName,
229 ZipArchive* pArchive)
232 * Get and test file length.
234 off_t fileLength = lseek(fd, 0, SEEK_END);
235 if (fileLength < kEOCDLen) {
236 ALOGV("Zip: length %ld is too small to be zip", (long) fileLength);
241 * Perform the traditional EOCD snipe hunt.
243 * We're searching for the End of Central Directory magic number,
244 * which appears at the start of the EOCD block. It's followed by
245 * 18 bytes of EOCD stuff and up to 64KB of archive comment. We
246 * need to read the last part of the file into a buffer, dig through
247 * it to find the magic number, parse some values out, and use those
248 * to determine the extent of the CD.
250 * We start by pulling in the last part of the file.
252 size_t readAmount = kMaxEOCDSearch;
253 if (fileLength < off_t(readAmount))
254 readAmount = fileLength;
256 u1* scanBuf = (u1*) malloc(readAmount);
257 if (scanBuf == NULL) {
261 int result = mapCentralDirectory0(fd, debugFileName, pArchive,
262 fileLength, readAmount, scanBuf);
269 * Parses the Zip archive's Central Directory. Allocates and populates the
272 * Returns 0 on success.
274 static int parseZipArchive(ZipArchive* pArchive)
277 const u1* cdPtr = (const u1*)pArchive->mDirectoryMap.addr;
278 size_t cdLength = pArchive->mDirectoryMap.length;
279 int numEntries = pArchive->mNumEntries;
282 * Create hash table. We have a minimum 75% load factor, possibly as
283 * low as 50% after we round off to a power of 2. There must be at
284 * least one unused entry to avoid an infinite loop during creation.
286 pArchive->mHashTableSize = dexRoundUpPower2(1 + (numEntries * 4) / 3);
287 pArchive->mHashTable = (ZipHashEntry*)
288 calloc(pArchive->mHashTableSize, sizeof(ZipHashEntry));
291 * Walk through the central directory, adding entries to the hash
292 * table and verifying values.
294 const u1* ptr = cdPtr;
296 for (i = 0; i < numEntries; i++) {
297 if (get4LE(ptr) != kCDESignature) {
298 ALOGW("Zip: missed a central dir sig (at %d)", i);
301 if (ptr + kCDELen > cdPtr + cdLength) {
302 ALOGW("Zip: ran off the end (at %d)", i);
306 long localHdrOffset = (long) get4LE(ptr + kCDELocalOffset);
307 if (localHdrOffset >= pArchive->mDirectoryOffset) {
308 ALOGW("Zip: bad LFH offset %ld at entry %d", localHdrOffset, i);
312 unsigned int fileNameLen, extraLen, commentLen, hash;
313 fileNameLen = get2LE(ptr + kCDENameLen);
314 extraLen = get2LE(ptr + kCDEExtraLen);
315 commentLen = get2LE(ptr + kCDECommentLen);
317 /* add the CDE filename to the hash table */
318 hash = computeHash((const char*)ptr + kCDELen, fileNameLen);
319 addToHash(pArchive, (const char*)ptr + kCDELen, fileNameLen, hash);
321 ptr += kCDELen + fileNameLen + extraLen + commentLen;
322 if ((size_t)(ptr - cdPtr) > cdLength) {
323 ALOGW("Zip: bad CD advance (%d vs %zd) at entry %d",
324 (int) (ptr - cdPtr), cdLength, i);
328 ALOGV("+++ zip good scan %d entries", numEntries);
337 * Open the specified file read-only. We examine the contents and verify
338 * that it appears to be a valid zip file.
340 * This will be called on non-Zip files, especially during VM startup, so
341 * we don't want to be too noisy about certain types of failure. (Do
342 * we want a "quiet" flag?)
344 * On success, we fill out the contents of "pArchive" and return 0. On
345 * failure we return the errno value.
347 int dexZipOpenArchive(const char* fileName, ZipArchive* pArchive)
351 ALOGV("Opening as zip '%s' %p", fileName, pArchive);
353 memset(pArchive, 0, sizeof(ZipArchive));
355 fd = open(fileName, O_RDONLY | O_BINARY, 0);
357 err = errno ? errno : -1;
358 ALOGV("Unable to open '%s': %s", fileName, strerror(err));
362 return dexZipPrepArchive(fd, fileName, pArchive);
366 * Prepare to access a ZipArchive through an open file descriptor.
368 * On success, we fill out the contents of "pArchive" and return 0.
370 int dexZipPrepArchive(int fd, const char* debugFileName, ZipArchive* pArchive)
374 memset(pArchive, 0, sizeof(*pArchive));
377 if (mapCentralDirectory(fd, debugFileName, pArchive) != 0)
380 if (parseZipArchive(pArchive) != 0) {
381 ALOGV("Zip: parsing '%s' failed", debugFileName);
390 dexZipCloseArchive(pArchive);
396 * Close a ZipArchive, closing the file and freeing the contents.
398 * NOTE: the ZipArchive may not have been fully created.
400 void dexZipCloseArchive(ZipArchive* pArchive)
402 ALOGV("Closing archive %p", pArchive);
404 if (pArchive->mFd >= 0)
405 close(pArchive->mFd);
407 sysReleaseShmem(&pArchive->mDirectoryMap);
409 free(pArchive->mHashTable);
411 /* ensure nobody tries to use the ZipArchive after it's closed */
412 pArchive->mDirectoryOffset = -1;
414 pArchive->mNumEntries = -1;
415 pArchive->mHashTableSize = -1;
416 pArchive->mHashTable = NULL;
421 * Find a matching entry.
423 * Returns 0 if not found.
425 ZipEntry dexZipFindEntry(const ZipArchive* pArchive, const char* entryName)
427 int nameLen = strlen(entryName);
428 unsigned int hash = computeHash(entryName, nameLen);
429 const int hashTableSize = pArchive->mHashTableSize;
430 int ent = hash & (hashTableSize-1);
432 while (pArchive->mHashTable[ent].name != NULL) {
433 if (pArchive->mHashTable[ent].nameLen == nameLen &&
434 memcmp(pArchive->mHashTable[ent].name, entryName, nameLen) == 0)
437 return (ZipEntry)(long)(ent + kZipEntryAdj);
440 ent = (ent + 1) & (hashTableSize-1);
448 * Find the Nth entry.
450 * This currently involves walking through the sparse hash table, counting
451 * non-empty entries. If we need to speed this up we can either allocate
452 * a parallel lookup table or (perhaps better) provide an iterator interface.
454 ZipEntry findEntryByIndex(ZipArchive* pArchive, int idx)
456 if (idx < 0 || idx >= pArchive->mNumEntries) {
457 ALOGW("Invalid index %d", idx);
462 for (ent = 0; ent < pArchive->mHashTableSize; ent++) {
463 if (pArchive->mHashTable[ent].name != NULL) {
465 return (ZipEntry) (ent + kZipEntryAdj);
474 * Get the useful fields from the zip entry.
476 * Returns non-zero if the contents of the fields (particularly the data
477 * offset) appear to be bogus.
479 int dexZipGetEntryInfo(const ZipArchive* pArchive, ZipEntry entry,
480 int* pMethod, size_t* pUncompLen, size_t* pCompLen, off_t* pOffset,
481 long* pModWhen, long* pCrc32)
483 int ent = entryToIndex(pArchive, entry);
488 * Recover the start of the central directory entry from the filename
489 * pointer. The filename is the first entry past the fixed-size data,
490 * so we can just subtract back from that.
492 const unsigned char* basePtr = (const unsigned char*)
493 pArchive->mDirectoryMap.addr;
494 const unsigned char* ptr = (const unsigned char*)
495 pArchive->mHashTable[ent].name;
496 off_t cdOffset = pArchive->mDirectoryOffset;
500 int method = get2LE(ptr + kCDEMethod);
504 if (pModWhen != NULL)
505 *pModWhen = get4LE(ptr + kCDEModWhen);
507 *pCrc32 = get4LE(ptr + kCDECRC);
509 size_t compLen = get4LE(ptr + kCDECompLen);
510 if (pCompLen != NULL)
512 size_t uncompLen = get4LE(ptr + kCDEUncompLen);
513 if (pUncompLen != NULL)
514 *pUncompLen = uncompLen;
517 * If requested, determine the offset of the start of the data. All we
518 * have is the offset to the Local File Header, which is variable size,
519 * so we have to read the contents of the struct to figure out where
520 * the actual data starts.
522 * We also need to make sure that the lengths are not so large that
523 * somebody trying to map the compressed or uncompressed data runs
524 * off the end of the mapped region.
526 * Note we don't verify compLen/uncompLen if they don't request the
527 * dataOffset, because dataOffset is expensive to determine. However,
528 * if they don't have the file offset, they're not likely to be doing
529 * anything with the contents.
531 if (pOffset != NULL) {
532 long localHdrOffset = (long) get4LE(ptr + kCDELocalOffset);
533 if (localHdrOffset + kLFHLen >= cdOffset) {
534 ALOGW("Zip: bad local hdr offset in zip");
539 if (lseek(pArchive->mFd, localHdrOffset, SEEK_SET) != localHdrOffset) {
540 ALOGW("Zip: failed seeking to lfh at offset %ld", localHdrOffset);
544 TEMP_FAILURE_RETRY(read(pArchive->mFd, lfhBuf, sizeof(lfhBuf)));
545 if (actual != sizeof(lfhBuf)) {
546 ALOGW("Zip: failed reading lfh from offset %ld", localHdrOffset);
550 if (get4LE(lfhBuf) != kLFHSignature) {
551 ALOGW("Zip: didn't find signature at start of lfh, offset=%ld",
556 off_t dataOffset = localHdrOffset + kLFHLen
557 + get2LE(lfhBuf + kLFHNameLen) + get2LE(lfhBuf + kLFHExtraLen);
558 if (dataOffset >= cdOffset) {
559 ALOGW("Zip: bad data offset %ld in zip", (long) dataOffset);
564 if ((off_t)(dataOffset + compLen) > cdOffset) {
565 ALOGW("Zip: bad compressed length in zip (%ld + %zd > %ld)",
566 (long) dataOffset, compLen, (long) cdOffset);
570 if (method == kCompressStored &&
571 (off_t)(dataOffset + uncompLen) > cdOffset)
573 ALOGW("Zip: bad uncompressed length in zip (%ld + %zd > %ld)",
574 (long) dataOffset, uncompLen, (long) cdOffset);
578 *pOffset = dataOffset;
584 * Uncompress "deflate" data from the archive's file to an open file
587 static int inflateToFile(int outFd, int inFd, size_t uncompLen, size_t compLen)
590 const size_t kBufSize = 32768;
591 unsigned char* readBuf = (unsigned char*) malloc(kBufSize);
592 unsigned char* writeBuf = (unsigned char*) malloc(kBufSize);
596 if (readBuf == NULL || writeBuf == NULL)
600 * Initialize the zlib stream struct.
602 memset(&zstream, 0, sizeof(zstream));
603 zstream.zalloc = Z_NULL;
604 zstream.zfree = Z_NULL;
605 zstream.opaque = Z_NULL;
606 zstream.next_in = NULL;
607 zstream.avail_in = 0;
608 zstream.next_out = (Bytef*) writeBuf;
609 zstream.avail_out = kBufSize;
610 zstream.data_type = Z_UNKNOWN;
613 * Use the undocumented "negative window bits" feature to tell zlib
614 * that there's no zlib header waiting for it.
616 zerr = inflateInit2(&zstream, -MAX_WBITS);
618 if (zerr == Z_VERSION_ERROR) {
619 ALOGE("Installed zlib is not compatible with linked version (%s)",
622 ALOGW("Call to inflateInit2 failed (zerr=%d)", zerr);
628 * Loop while we have more to do.
631 /* read as much as we can */
632 if (zstream.avail_in == 0) {
633 size_t getSize = (compLen > kBufSize) ? kBufSize : compLen;
635 ssize_t actual = TEMP_FAILURE_RETRY(read(inFd, readBuf, getSize));
636 if (actual != (ssize_t) getSize) {
637 ALOGW("Zip: inflate read failed (%d vs %zd)",
638 (int)actual, getSize);
644 zstream.next_in = readBuf;
645 zstream.avail_in = getSize;
648 /* uncompress the data */
649 zerr = inflate(&zstream, Z_NO_FLUSH);
650 if (zerr != Z_OK && zerr != Z_STREAM_END) {
651 ALOGW("Zip: inflate zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)",
652 zerr, zstream.next_in, zstream.avail_in,
653 zstream.next_out, zstream.avail_out);
657 /* write when we're full or when we're done */
658 if (zstream.avail_out == 0 ||
659 (zerr == Z_STREAM_END && zstream.avail_out != kBufSize))
661 size_t writeSize = zstream.next_out - writeBuf;
662 if (sysWriteFully(outFd, writeBuf, writeSize, "Zip inflate") != 0)
665 zstream.next_out = writeBuf;
666 zstream.avail_out = kBufSize;
668 } while (zerr == Z_OK);
670 assert(zerr == Z_STREAM_END); /* other errors should've been caught */
673 if (zstream.total_out != uncompLen) {
674 ALOGW("Zip: size mismatch on inflated file (%ld vs %zd)",
675 zstream.total_out, uncompLen);
682 inflateEnd(&zstream); /* free up any allocated structures */
691 * Uncompress an entry, in its entirety, to an open file descriptor.
693 * TODO: this doesn't verify the data's CRC, but probably should (especially
694 * for uncompressed data).
696 int dexZipExtractEntryToFile(const ZipArchive* pArchive,
697 const ZipEntry entry, int fd)
700 int ent = entryToIndex(pArchive, entry);
702 ALOGW("Zip: extract can't find entry %p", entry);
707 size_t uncompLen, compLen;
710 if (dexZipGetEntryInfo(pArchive, entry, &method, &uncompLen, &compLen,
711 &dataOffset, NULL, NULL) != 0)
715 if (lseek(pArchive->mFd, dataOffset, SEEK_SET) != dataOffset) {
716 ALOGW("Zip: lseek to data at %ld failed", (long) dataOffset);
720 if (method == kCompressStored) {
721 if (sysCopyFileToFile(fd, pArchive->mFd, uncompLen) != 0)
724 if (inflateToFile(fd, pArchive->mFd, uncompLen, compLen) != 0)