From 4d3e6c13b683b158016e38aa7ba3ff9e2f8972b0 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Fri, 29 Jun 2018 16:50:41 +0000 Subject: [PATCH] [dsymutil] Introduce a new CachedBinaryHolder The original binary holder has an optimization where it caches a static library (archive) between consecutive calls to GetObjects. However, the actual memory buffer wasn't cached between calls. This made sense when dsymutil was processing objects one after each other, but when processing them in parallel, several binaries have to be in memory at the same time. For this reason, every link context contained a binary holder. Having one binary holder per context is problematic, because the same static archive was cached for every object file. Luckily, when the file is mmap'ed, this was only costing us virtual memory. This patch introduces a new BinaryHolder variant that is fully cached, for all the object files it load, as well as the static archives. This way, we don't have to give up on this optimization of bypassing the file system. Differential revision: https://reviews.llvm.org/D48501 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@335990 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/dsymutil/BinaryHolder.cpp | 226 ++++++++++++++++++++++++++++++++++++++++ tools/dsymutil/BinaryHolder.h | 142 ++++++++++++++++++++++++- tools/dsymutil/DwarfLinker.cpp | 44 ++++---- tools/dsymutil/DwarfLinker.h | 17 ++- tools/dsymutil/dsymutil.cpp | 6 +- tools/dsymutil/dsymutil.h | 6 +- 6 files changed, 409 insertions(+), 32 deletions(-) diff --git a/tools/dsymutil/BinaryHolder.cpp b/tools/dsymutil/BinaryHolder.cpp index 356156a5ebf..8264f465a5d 100644 --- a/tools/dsymutil/BinaryHolder.cpp +++ b/tools/dsymutil/BinaryHolder.cpp @@ -14,11 +14,21 @@ #include "BinaryHolder.h" #include "llvm/Object/MachO.h" +#include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" namespace llvm { namespace dsymutil { +static std::pair +getArchiveAndObjectName(StringRef Filename) { + StringRef Archive = Filename.substr(0, Filename.find('(')); + StringRef Object = Filename.substr(Archive.size() + 1).drop_back(); + return {Archive, Object}; +} + +static bool isArchive(StringRef Filename) { return Filename.endswith(")"); } + static std::vector getMachOFatMemoryBuffers(StringRef Filename, MemoryBuffer &Mem, object::MachOUniversalBinary &Fat) { @@ -32,6 +42,222 @@ getMachOFatMemoryBuffers(StringRef Filename, MemoryBuffer &Mem, return Buffers; } +Error CachedBinaryHolder::ArchiveEntry::load(StringRef Filename, + TimestampTy Timestamp, + bool Verbose) { + StringRef ArchiveFilename = getArchiveAndObjectName(Filename).first; + + // Try to load archive and force it to be memory mapped. + auto ErrOrBuff = MemoryBuffer::getFileOrSTDIN(ArchiveFilename, -1, false); + if (auto Err = ErrOrBuff.getError()) + return errorCodeToError(Err); + + MemoryBuffer = std::move(*ErrOrBuff); + + if (Verbose) + WithColor::note() << "opened archive '" << ArchiveFilename << "'\n"; + + // Load one or more archive buffers, depending on whether we're dealing with + // a fat binary. + std::vector ArchiveBuffers; + + auto ErrOrFat = + object::MachOUniversalBinary::create(MemoryBuffer->getMemBufferRef()); + if (!ErrOrFat) { + consumeError(ErrOrFat.takeError()); + ArchiveBuffers.push_back(MemoryBuffer->getMemBufferRef()); + } else { + FatBinary = std::move(*ErrOrFat); + FatBinaryName = ArchiveFilename; + ArchiveBuffers = + getMachOFatMemoryBuffers(FatBinaryName, *MemoryBuffer, *FatBinary); + } + + // Finally, try to load the archives. + Archives.reserve(ArchiveBuffers.size()); + for (auto MemRef : ArchiveBuffers) { + auto ErrOrArchive = object::Archive::create(MemRef); + if (!ErrOrArchive) + return ErrOrArchive.takeError(); + Archives.push_back(std::move(*ErrOrArchive)); + } + + return Error::success(); +} + +Error CachedBinaryHolder::ObjectEntry::load(StringRef Filename, bool Verbose) { + // Try to load regular binary and force it to be memory mapped. + auto ErrOrBuff = MemoryBuffer::getFileOrSTDIN(Filename, -1, false); + if (auto Err = ErrOrBuff.getError()) + return errorCodeToError(Err); + + MemoryBuffer = std::move(*ErrOrBuff); + + if (Verbose) + WithColor::note() << "opened object.\n"; + + // Load one or more object buffers, depending on whether we're dealing with a + // fat binary. + std::vector ObjectBuffers; + + auto ErrOrFat = + object::MachOUniversalBinary::create(MemoryBuffer->getMemBufferRef()); + if (!ErrOrFat) { + consumeError(ErrOrFat.takeError()); + ObjectBuffers.push_back(MemoryBuffer->getMemBufferRef()); + } else { + FatBinary = std::move(*ErrOrFat); + FatBinaryName = Filename; + ObjectBuffers = + getMachOFatMemoryBuffers(FatBinaryName, *MemoryBuffer, *FatBinary); + } + + Objects.reserve(ObjectBuffers.size()); + for (auto MemRef : ObjectBuffers) { + auto ErrOrObjectFile = object::ObjectFile::createObjectFile(MemRef); + if (!ErrOrObjectFile) + return ErrOrObjectFile.takeError(); + Objects.push_back(std::move(*ErrOrObjectFile)); + } + + return Error::success(); +} + +std::vector +CachedBinaryHolder::ObjectEntry::getObjects() const { + std::vector Result; + Result.reserve(Objects.size()); + for (auto &Object : Objects) { + Result.push_back(Object.get()); + } + return Result; +} +Expected +CachedBinaryHolder::ObjectEntry::getObject(const Triple &T) const { + for (const auto &Obj : Objects) { + if (const auto *MachO = dyn_cast(Obj.get())) { + if (MachO->getArchTriple().str() == T.str()) + return *MachO; + } else if (Obj->getArch() == T.getArch()) + return *Obj; + } + return errorCodeToError(object::object_error::arch_not_found); +} + +Expected +CachedBinaryHolder::ArchiveEntry::getObjectEntry(StringRef Filename, + TimestampTy Timestamp, + bool Verbose) { + StringRef ArchiveFilename; + StringRef ObjectFilename; + std::tie(ArchiveFilename, ObjectFilename) = getArchiveAndObjectName(Filename); + + // Try the cache first. + KeyTy Key = {ObjectFilename, Timestamp}; + + { + std::lock_guard Lock(MemberCacheMutex); + if (MemberCache.count(Key)) + return MemberCache[Key]; + } + + // Create a new ObjectEntry, but don't add it to the cache yet. Loading of + // the archive members might fail and we don't want to lock the whole archive + // during this operation. + ObjectEntry OE; + + for (const auto &Archive : Archives) { + Error Err = Error::success(); + for (auto Child : Archive->children(Err)) { + if (auto NameOrErr = Child.getName()) { + if (*NameOrErr == ObjectFilename) { + auto ModTimeOrErr = Child.getLastModified(); + if (!ModTimeOrErr) + return ModTimeOrErr.takeError(); + + if (Timestamp != sys::TimePoint<>() && + Timestamp != ModTimeOrErr.get()) { + if (Verbose) + WithColor::warning() << "member has timestamp mismatch.\n"; + continue; + } + + if (Verbose) + WithColor::note() << "found member in current archive.\n"; + + auto ErrOrMem = Child.getMemoryBufferRef(); + if (!ErrOrMem) + return ErrOrMem.takeError(); + + auto ErrOrObjectFile = + object::ObjectFile::createObjectFile(*ErrOrMem); + if (!ErrOrObjectFile) + return ErrOrObjectFile.takeError(); + + OE.Objects.push_back(std::move(*ErrOrObjectFile)); + } + } + } + if (Err) + return std::move(Err); + } + + if (OE.Objects.empty()) + return errorCodeToError(errc::no_such_file_or_directory); + + std::lock_guard Lock(MemberCacheMutex); + MemberCache.try_emplace(Key, std::move(OE)); + return MemberCache[Key]; +} + +Expected +CachedBinaryHolder::getObjectEntry(StringRef Filename, TimestampTy Timestamp) { + if (Verbose) + WithColor::note() << "trying to open '" << Filename << "'\n"; + + // If this is an archive, we might have either the object or the archive + // cached. In this case we can load it without accessing the file system. + if (isArchive(Filename)) { + StringRef ArchiveFilename = getArchiveAndObjectName(Filename).first; + std::lock_guard Lock(ArchiveCacheMutex); + if (ArchiveCache.count(ArchiveFilename)) { + return ArchiveCache[ArchiveFilename].getObjectEntry(Filename, Timestamp); + } else { + ArchiveEntry &AE = ArchiveCache[ArchiveFilename]; + auto Err = AE.load(Filename, Timestamp, Verbose); + if (Err) { + ArchiveCache.erase(ArchiveFilename); + // Don't return the error here: maybe the file wasn't an archive. + llvm::consumeError(std::move(Err)); + } else { + return ArchiveCache[ArchiveFilename].getObjectEntry(Filename, + Timestamp); + } + } + } + + // If this is an object, we might have it cached. If not we'll have to load + // it from the file system and cache it now. + std::lock_guard Lock(ObjectCacheMutex); + if (!ObjectCache.count(Filename)) { + ObjectEntry &OE = ObjectCache[Filename]; + auto Err = OE.load(Filename); + if (Err) { + ObjectCache.erase(Filename); + return std::move(Err); + } + } + + return ObjectCache[Filename]; +} + +void CachedBinaryHolder::clear() { + std::lock_guard ArchiveLock(ArchiveCacheMutex); + std::lock_guard ObjectLock(ObjectCacheMutex); + ArchiveCache.clear(); + ObjectCache.clear(); +} + void BinaryHolder::changeBackingMemoryBuffer( std::unique_ptr &&Buf) { CurrentArchives.clear(); diff --git a/tools/dsymutil/BinaryHolder.h b/tools/dsymutil/BinaryHolder.h index f3ab7f6726a..c4eef47ac65 100644 --- a/tools/dsymutil/BinaryHolder.h +++ b/tools/dsymutil/BinaryHolder.h @@ -14,6 +14,7 @@ #ifndef LLVM_TOOLS_DSYMUTIL_BINARYHOLDER_H #define LLVM_TOOLS_DSYMUTIL_BINARYHOLDER_H +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Triple.h" #include "llvm/Object/Archive.h" #include "llvm/Object/Error.h" @@ -23,9 +24,118 @@ #include "llvm/Support/Errc.h" #include "llvm/Support/ErrorOr.h" +#include + namespace llvm { namespace dsymutil { +/// The CachedBinaryHolder class is responsible for creating and owning +/// ObjectFiles and their underlying MemoryBuffers. It differs from a simple +/// OwningBinary in that it handles accessing and caching of archives and its +/// members. +class CachedBinaryHolder { +public: + using TimestampTy = sys::TimePoint; + + CachedBinaryHolder(bool Verbose = false) : Verbose(Verbose) {} + + // Forward declarations for friend declaration. + class ObjectEntry; + class ArchiveEntry; + + /// Base class shared by cached entries, representing objects and archives. + class EntryBase { + protected: + std::unique_ptr MemoryBuffer; + std::unique_ptr FatBinary; + std::string FatBinaryName; + }; + + /// Cached entry holding one or more (in case of a fat binary) object files. + class ObjectEntry : public EntryBase { + public: + /// Load the given object binary in memory. + Error load(StringRef Filename, bool Verbose = false); + + /// Access all owned ObjectFiles. + std::vector getObjects() const; + + /// Access to a derived version of all the currently owned ObjectFiles. The + /// conversion might be invalid, in which case an Error is returned. + template + Expected> getObjectsAs() const { + std::vector Result; + Result.reserve(Objects.size()); + for (auto &Object : Objects) { + const auto *Derived = dyn_cast(Object.get()); + if (!Derived) + return errorCodeToError(object::object_error::invalid_file_type); + Result.push_back(Derived); + } + return Result; + } + + /// Access the owned ObjectFile with architecture \p T. + Expected getObject(const Triple &T) const; + + /// Access to a derived version of the currently owned ObjectFile with + /// architecture \p T. The conversion must be known to be valid. + template + Expected getObjectAs(const Triple &T) const { + auto Object = getObject(T); + if (!Object) + return Object.takeError(); + return cast(*Object); + } + + private: + std::vector> Objects; + friend ArchiveEntry; + }; + + /// Cached entry holding one or more (in the of a fat binary) archive files. + class ArchiveEntry : public EntryBase { + public: + struct KeyTy { + std::string Filename; + TimestampTy Timestamp; + + KeyTy() : Filename(), Timestamp() {} + KeyTy(StringRef Filename, TimestampTy Timestamp) + : Filename(Filename.str()), Timestamp(Timestamp) {} + }; + + /// Load the given object binary in memory. + Error load(StringRef Filename, TimestampTy Timestamp, bool Verbose = false); + + Expected getObjectEntry(StringRef Filename, + TimestampTy Timestamp, + bool Verbose = false); + + private: + std::vector> Archives; + DenseMap MemberCache; + std::mutex MemberCacheMutex; + }; + + Expected getObjectEntry(StringRef Filename, + TimestampTy Timestamp); + + void clear(); + +private: + /// Cache of static archives. Objects that are part of a static archive are + /// stored under this object, rather than in the map below. + StringMap ArchiveCache; + std::mutex ArchiveCacheMutex; + + /// Object entries for objects that are not in a static archive. + StringMap ObjectCache; + std::mutex ObjectCacheMutex; + + bool Verbose; +}; + /// The BinaryHolder class is responsible for creating and owning ObjectFile /// objects and their underlying MemoryBuffer. This is different from a simple /// OwningBinary in that it handles accessing to archive members. @@ -123,8 +233,8 @@ public: return getObjfileForArch(T); } - /// Access to a derived version of the currently owned - /// ObjectFile. The conversion must be known to be valid. + /// Get and cast to a subclass of the currently owned ObjectFile. The + /// conversion must be known to be valid. template ErrorOr GetAs(const Triple &T) { auto ErrOrObj = Get(T); @@ -134,5 +244,33 @@ public: } }; } // namespace dsymutil + +template <> +struct DenseMapInfo { + + static inline dsymutil::CachedBinaryHolder::ArchiveEntry::KeyTy + getEmptyKey() { + return dsymutil::CachedBinaryHolder::ArchiveEntry::KeyTy(); + } + + static inline dsymutil::CachedBinaryHolder::ArchiveEntry::KeyTy + getTombstoneKey() { + return dsymutil::CachedBinaryHolder::ArchiveEntry::KeyTy("/", {}); + } + + static unsigned + getHashValue(const dsymutil::CachedBinaryHolder::ArchiveEntry::KeyTy &K) { + return hash_combine(DenseMapInfo::getHashValue(K.Filename), + DenseMapInfo::getHashValue( + K.Timestamp.time_since_epoch().count())); + } + + static bool + isEqual(const dsymutil::CachedBinaryHolder::ArchiveEntry::KeyTy &LHS, + const dsymutil::CachedBinaryHolder::ArchiveEntry::KeyTy &RHS) { + return LHS.Filename == RHS.Filename && LHS.Timestamp == RHS.Timestamp; + } +}; + } // namespace llvm #endif diff --git a/tools/dsymutil/DwarfLinker.cpp b/tools/dsymutil/DwarfLinker.cpp index a622ad52f48..48248dc3c33 100644 --- a/tools/dsymutil/DwarfLinker.cpp +++ b/tools/dsymutil/DwarfLinker.cpp @@ -1982,18 +1982,25 @@ bool DwarfLinker::registerModuleReference( } ErrorOr -DwarfLinker::loadObject(BinaryHolder &BinaryHolder, const DebugMapObject &Obj, - const DebugMap &Map) { - auto ErrOrObjs = - BinaryHolder.GetObjectFiles(Obj.getObjectFilename(), Obj.getTimestamp()); - if (std::error_code EC = ErrOrObjs.getError()) { - reportWarning(Twine(Obj.getObjectFilename()) + ": " + EC.message(), Obj); - return EC; - } - auto ErrOrObj = BinaryHolder.Get(Map.getTriple()); - if (std::error_code EC = ErrOrObj.getError()) - reportWarning(Twine(Obj.getObjectFilename()) + ": " + EC.message(), Obj); - return ErrOrObj; +DwarfLinker::loadObject(const DebugMapObject &Obj, const DebugMap &Map) { + auto ObjectEntry = + BinHolder.getObjectEntry(Obj.getObjectFilename(), Obj.getTimestamp()); + if (!ObjectEntry) { + auto Err = ObjectEntry.takeError(); + reportWarning( + Twine(Obj.getObjectFilename()) + ": " + toString(std::move(Err)), Obj); + return errorToErrorCode(std::move(Err)); + } + + auto Object = ObjectEntry->getObject(Map.getTriple()); + if (!Object) { + auto Err = Object.takeError(); + reportWarning( + Twine(Obj.getObjectFilename()) + ": " + toString(std::move(Err)), Obj); + return errorToErrorCode(std::move(Err)); + } + + return *Object; } Error DwarfLinker::loadClangModule(StringRef Filename, StringRef ModulePath, @@ -2009,10 +2016,11 @@ Error DwarfLinker::loadClangModule(StringRef Filename, StringRef ModulePath, sys::path::append(Path, ModulePath, Filename); else sys::path::append(Path, Filename); - BinaryHolder ObjHolder(Options.Verbose); + // Don't use the cached binary holder because we have no thread-safety + // guarantee and the lifetime is limited. auto &Obj = ModuleMap.addDebugMapObject( Path, sys::TimePoint(), MachO::N_OSO); - auto ErrOrObj = loadObject(ObjHolder, Obj, ModuleMap); + auto ErrOrObj = loadObject(Obj, ModuleMap); if (!ErrOrObj) { // Try and emit more helpful warnings by applying some heuristics. StringRef ObjFile = DMO.getObjectFilename(); @@ -2238,7 +2246,7 @@ bool DwarfLinker::link(const DebugMap &Map) { std::vector ObjectContexts; ObjectContexts.reserve(NumObjects); for (const auto &Obj : Map.objects()) - ObjectContexts.emplace_back(Map, *this, *Obj.get(), Options.Verbose); + ObjectContexts.emplace_back(Map, *this, *Obj.get()); // This Dwarf string pool which is only used for uniquing. This one should // never be used for offsets as its not thread-safe or predictable. @@ -2459,9 +2467,9 @@ bool DwarfLinker::link(const DebugMap &Map) { return Options.NoOutput ? true : Streamer->finish(Map); } -bool linkDwarf(raw_fd_ostream &OutFile, const DebugMap &DM, - const LinkOptions &Options) { - DwarfLinker Linker(OutFile, Options); +bool linkDwarf(raw_fd_ostream &OutFile, CachedBinaryHolder &BinHolder, + const DebugMap &DM, const LinkOptions &Options) { + DwarfLinker Linker(OutFile, BinHolder, Options); return Linker.link(DM); } diff --git a/tools/dsymutil/DwarfLinker.h b/tools/dsymutil/DwarfLinker.h index 4b8484f4c26..618b1ae9bd7 100644 --- a/tools/dsymutil/DwarfLinker.h +++ b/tools/dsymutil/DwarfLinker.h @@ -56,8 +56,9 @@ using UnitListTy = std::vector>; /// first step when we start processing a DebugMapObject. class DwarfLinker { public: - DwarfLinker(raw_fd_ostream &OutFile, const LinkOptions &Options) - : OutFile(OutFile), Options(Options) {} + DwarfLinker(raw_fd_ostream &OutFile, CachedBinaryHolder &BinHolder, + const LinkOptions &Options) + : OutFile(OutFile), BinHolder(BinHolder), Options(Options) {} /// Link the contents of the DebugMap. bool link(const DebugMap &); @@ -139,22 +140,20 @@ private: /// Keeps track of data associated with one object during linking. struct LinkContext { DebugMapObject &DMO; - BinaryHolder BinHolder; const object::ObjectFile *ObjectFile; RelocationManager RelocMgr; std::unique_ptr DwarfContext; RangesTy Ranges; UnitListTy CompileUnits; - LinkContext(const DebugMap &Map, DwarfLinker &Linker, DebugMapObject &DMO, - bool Verbose = false) - : DMO(DMO), BinHolder(Verbose), RelocMgr(Linker) { + LinkContext(const DebugMap &Map, DwarfLinker &Linker, DebugMapObject &DMO) + : DMO(DMO), RelocMgr(Linker) { // Swift ASTs are not object files. if (DMO.getType() == MachO::N_AST) { ObjectFile = nullptr; return; } - auto ErrOrObj = Linker.loadObject(BinHolder, DMO, Map); + auto ErrOrObj = Linker.loadObject(DMO, Map); ObjectFile = ErrOrObj ? &*ErrOrObj : nullptr; DwarfContext = ObjectFile ? DWARFContext::create(*ObjectFile) : nullptr; } @@ -441,12 +440,12 @@ private: bool createStreamer(const Triple &TheTriple, raw_fd_ostream &OutFile); /// Attempt to load a debug object from disk. - ErrorOr loadObject(BinaryHolder &BinaryHolder, - const DebugMapObject &Obj, + ErrorOr loadObject(const DebugMapObject &Obj, const DebugMap &Map); /// @} raw_fd_ostream &OutFile; + CachedBinaryHolder &BinHolder; LinkOptions Options; std::unique_ptr Streamer; uint64_t OutputDebugInfoSize; diff --git a/tools/dsymutil/dsymutil.cpp b/tools/dsymutil/dsymutil.cpp index 6205a3d5775..6effa1f6a98 100644 --- a/tools/dsymutil/dsymutil.cpp +++ b/tools/dsymutil/dsymutil.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "dsymutil.h" +#include "BinaryHolder.h" #include "CFBundle.h" #include "DebugMap.h" #include "LinkUtils.h" @@ -494,6 +495,9 @@ int main(int argc, char **argv) { return 1; } + // Shared a single binary holder for all the link steps. + CachedBinaryHolder BinHolder; + NumThreads = std::min(OptionsOrErr->Threads, DebugMapPtrsOrErr->size()); llvm::ThreadPool Threads(NumThreads); @@ -546,7 +550,7 @@ int main(int argc, char **argv) { auto LinkLambda = [&, OutputFile](std::shared_ptr Stream) { - AllOK.fetch_and(linkDwarf(*Stream, *Map, *OptionsOrErr)); + AllOK.fetch_and(linkDwarf(*Stream, BinHolder, *Map, *OptionsOrErr)); Stream->flush(); if (Verify && !NoOutput) AllOK.fetch_and(verify(OutputFile, Map->getTriple().getArchName())); diff --git a/tools/dsymutil/dsymutil.h b/tools/dsymutil/dsymutil.h index 4e3842967be..d54b46cde56 100644 --- a/tools/dsymutil/dsymutil.h +++ b/tools/dsymutil/dsymutil.h @@ -30,6 +30,8 @@ namespace llvm { namespace dsymutil { +class CachedBinaryHolder; + /// Extract the DebugMaps from the given file. /// The file has to be a MachO object file. Multiple debug maps can be /// returned when the file is universal (aka fat) binary. @@ -44,8 +46,8 @@ bool dumpStab(StringRef InputFile, ArrayRef Archs, /// Link the Dwarf debug info as directed by the passed DebugMap \p DM into a /// DwarfFile named \p OutputFilename. \returns false if the link failed. -bool linkDwarf(raw_fd_ostream &OutFile, const DebugMap &DM, - const LinkOptions &Options); +bool linkDwarf(raw_fd_ostream &OutFile, CachedBinaryHolder &BinHolder, + const DebugMap &DM, const LinkOptions &Options); } // end namespace dsymutil } // end namespace llvm -- 2.11.0