Error setBlockData(uint32_t BlockIndex, uint32_t Offset,
ArrayRef<uint8_t> Data) const override;
+ ArrayRef<uint32_t> getFpmPages() const { return FpmPages; }
+
ArrayRef<support::ulittle32_t> getStreamSizes() const {
return ContainerLayout.StreamSizes;
}
std::unique_ptr<msf::ReadableStream> Buffer;
+ std::vector<uint32_t> FpmPages;
msf::MSFLayout ContainerLayout;
std::unique_ptr<InfoStream> Info;
ContainerLayout.SB = SB;
// Initialize Free Page Map.
- ContainerLayout.FreePageMap.resize(getBlockSize() * 8);
- uint64_t FPMOffset = SB->FreeBlockMapBlock * getBlockSize();
- ArrayRef<uint8_t> FPMBlock;
- if (auto EC = Buffer->readBytes(FPMOffset, getBlockSize(), FPMBlock))
- return EC;
- for (uint32_t I = 0, E = getBlockSize() * 8; I != E; ++I)
- if (FPMBlock[I / 8] & (1 << (I % 8)))
- ContainerLayout.FreePageMap[I] = true;
+ ContainerLayout.FreePageMap.resize(SB->NumBlocks);
+ ArrayRef<uint8_t> FpmBytes;
+ // The Fpm exists either at block 1 or block 2 of the MSF. However, this
+ // allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and
+ // thusly an equal number of total blocks in the file. For a block size
+ // of 4KiB (very common), this would yield 32KiB total blocks in file, for a
+ // maximum file size of 32KiB * 4KiB = 128MiB. Obviously this won't do, so
+ // the Fpm is split across the file at `getBlockSize()` intervals. As a
+ // result, every block whose index is of the form |{1,2} + getBlockSize() * k|
+ // for any non-negative integer k is an Fpm block. In theory, we only really
+ // need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but
+ // current versions of the MSF format already expect the Fpm to be arranged
+ // at getBlockSize() intervals, so we have to be compatible.
+ // See the function fpmPn() for more information:
+ // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489
+
+ uint32_t BlocksPerSection = getBlockSize();
+ uint64_t FpmBlockOffset = SB->FreeBlockMapBlock;
+ uint32_t BlocksRemaining = getBlockCount();
+ for (uint32_t SI = 0; BlocksRemaining > 0; ++SI) {
+ uint32_t FpmFileOffset = FpmBlockOffset * getBlockSize();
+
+ if (auto EC = Buffer->readBytes(FpmFileOffset, getBlockSize(), FpmBytes))
+ return EC;
+
+ uint32_t BlocksThisSection = std::min(BlocksRemaining, BlocksPerSection);
+ for (uint32_t I = 0; I < BlocksThisSection; ++I) {
+ uint32_t BI = I + BlocksPerSection * SI;
+
+ if (FpmBytes[I / 8] & (1 << (I % 8)))
+ ContainerLayout.FreePageMap[BI] = true;
+ }
+ BlocksRemaining -= BlocksThisSection;
+ FpmBlockOffset += BlocksPerSection;
+ }
Reader.setOffset(getBlockMapOffset());
if (auto EC = Reader.readArray(ContainerLayout.DirectoryBlocks,
; RUN: -sym-record-bytes -publics -module-files -stream-name=/names \
; RUN: -stream-summary -stream-blocks -ipi-records -ipi-record-bytes \
; RUN: -section-contribs -section-map -section-headers -line-info \
-; RUN: -tpi-hash -fpo -fpm %p/Inputs/empty.pdb | FileCheck -check-prefix=EMPTY %s
+; RUN: -tpi-hash -fpo -page-stats %p/Inputs/empty.pdb | FileCheck -check-prefix=EMPTY %s
; RUN: llvm-pdbdump raw -all %p/Inputs/empty.pdb | FileCheck -check-prefix=ALL %s
; RUN: llvm-pdbdump raw -headers -stream-name=/names -modules -module-files \
; RUN: %p/Inputs/big-read.pdb | FileCheck -check-prefix=BIG %s
; EMPTY-NEXT: Stream 15: [TPI Hash] (308 bytes)
; EMPTY-NEXT: Stream 16: [IPI Hash] (68 bytes)
; EMPTY-NEXT: ]
-; EMPTY-NEXT: Used Page Map: [0, 1, 2, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
+; EMPTY-NEXT: Msf Free Pages: [3, 4, 5, 8, 9]
+; EMPTY-NEXT: Orphaned Pages: []
+; EMPTY-NEXT: Multiply Used Pages: []
+; EMPTY-NEXT: Use After Free Pages: [8]
; EMPTY-NEXT: StreamBlocks [
; EMPTY-NEXT: Stream 0: [8]
; EMPTY-NEXT: Stream 1: [19]
; ALL: Stream 15: [TPI Hash] (308 bytes)
; ALL: Stream 16: [IPI Hash] (68 bytes)
; ALL: ]
-; ALL: Used Page Map: [0, 1, 2, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
+; ALL: Msf Free Pages: [3, 4, 5, 8, 9]
+; ALL: Orphaned Pages: []
+; ALL: Multiply Used Pages: []
+; ALL: Use After Free Pages: [8]
; ALL: StreamBlocks [
; ALL: Stream 0: [8]
; ALL: Stream 1: [19]
using namespace llvm::msf;
using namespace llvm::pdb;
+namespace {
+struct PageStats {
+ explicit PageStats(const BitVector &FreePages)
+ : Upm(FreePages), ActualUsedPages(FreePages.size()),
+ MultiUsePages(FreePages.size()), UseAfterFreePages(FreePages.size()) {
+ const_cast<BitVector &>(Upm).flip();
+ // To calculate orphaned pages, we start with the set of pages that the
+ // MSF thinks are used. Each time we find one that actually *is* used,
+ // we unset it. Whichever bits remain set at the end are orphaned.
+ OrphanedPages = Upm;
+ }
+
+ // The inverse of the MSF File's copy of the Fpm. The basis for which we
+ // determine the allocation status of each page.
+ const BitVector Upm;
+
+ // Pages which are marked as used in the FPM and are used at least once.
+ BitVector ActualUsedPages;
+
+ // Pages which are marked as used in the FPM but are used more than once.
+ BitVector MultiUsePages;
+
+ // Pages which are marked as used in the FPM but are not used at all.
+ BitVector OrphanedPages;
+
+ // Pages which are marked free in the FPM but are used.
+ BitVector UseAfterFreePages;
+};
+}
+
+static void recordKnownUsedPage(PageStats &Stats, uint32_t UsedIndex) {
+ if (Stats.Upm.test(UsedIndex)) {
+ if (Stats.ActualUsedPages.test(UsedIndex))
+ Stats.MultiUsePages.set(UsedIndex);
+ Stats.ActualUsedPages.set(UsedIndex);
+ Stats.OrphanedPages.reset(UsedIndex);
+ } else {
+ // The MSF doesn't think this page is used, but it is.
+ Stats.UseAfterFreePages.set(UsedIndex);
+ }
+}
+
static void printSectionOffset(llvm::raw_ostream &OS,
const SectionOffset &Off) {
OS << Off.Off << ", " << Off.Isect;
}
Error LLVMOutputStyle::dumpFreePageMap() {
- if (!opts::raw::DumpFreePageMap)
+ if (!opts::raw::DumpPageStats)
return Error::success();
- const BitVector &FPM = File.getMsfLayout().FreePageMap;
-
- std::vector<uint32_t> Vec;
- for (uint32_t I = 0, E = FPM.size(); I != E; ++I)
- if (!FPM[I])
- Vec.push_back(I);
- // Prints out used pages instead of free pages because
+ // Start with used pages instead of free pages because
// the number of free pages is far larger than used pages.
- P.printList("Used Page Map", Vec);
+ BitVector FPM = File.getMsfLayout().FreePageMap;
+
+ PageStats PS(FPM);
+
+ recordKnownUsedPage(PS, 0); // MSF Super Block
+
+ uint32_t BlocksPerSection = File.getBlockSize();
+ uint32_t NumSections =
+ llvm::alignTo(File.getBlockCount(), BlocksPerSection) / BlocksPerSection;
+ for (uint32_t I = 0; I < NumSections; ++I) {
+ uint32_t Fpm0 = 1 + BlocksPerSection * I;
+ // 2 Fpm blocks spaced at `getBlockSize()` block intervals
+ recordKnownUsedPage(PS, Fpm0);
+ recordKnownUsedPage(PS, Fpm0 + 1);
+ }
+
+ recordKnownUsedPage(PS, File.getBlockMapIndex()); // Stream Table
+
+ for (auto DB : File.getDirectoryBlockArray()) {
+ recordKnownUsedPage(PS, DB);
+ }
+ for (auto &SE : File.getStreamMap()) {
+ for (auto &S : SE) {
+ recordKnownUsedPage(PS, S);
+ }
+ }
+
+ dumpBitVector("Msf Free Pages", FPM);
+ dumpBitVector("Orphaned Pages", PS.OrphanedPages);
+ dumpBitVector("Multiply Used Pages", PS.MultiUsePages);
+ dumpBitVector("Use After Free Pages", PS.UseAfterFreePages);
return Error::success();
}
+void LLVMOutputStyle::dumpBitVector(StringRef Name, const BitVector &V) {
+ std::vector<uint32_t> Vec;
+ for (uint32_t I = 0, E = V.size(); I != E; ++I)
+ if (V[I])
+ Vec.push_back(I);
+ P.printList(Name, Vec);
+}
+
Error LLVMOutputStyle::dumpStreamBlocks() {
if (!opts::raw::DumpStreamBlocks)
return Error::success();
#include "llvm/Support/ScopedPrinter.h"
namespace llvm {
+class BitVector;
namespace pdb {
class LLVMOutputStyle : public OutputStyle {
public:
Error dumpSectionHeaders();
Error dumpFpoStream();
+ void dumpBitVector(StringRef Name, const BitVector &V);
+
void flush();
PDBFile &File;
cl::opt<bool> DumpStreamSummary("stream-summary",
cl::desc("dump summary of the PDB streams"),
cl::cat(MsfOptions), cl::sub(RawSubcommand));
-cl::opt<bool> DumpFreePageMap("fpm", cl::desc("dump free page bitmap"),
- cl::cat(MsfOptions), cl::sub(RawSubcommand));
+cl::opt<bool> DumpPageStats(
+ "page-stats",
+ cl::desc("dump allocation stats of the pages in the MSF file"),
+ cl::cat(MsfOptions), cl::sub(RawSubcommand));
// TYPE OPTIONS
cl::opt<bool>
opts::raw::DumpPublics = true;
opts::raw::DumpSectionHeaders = true;
opts::raw::DumpStreamSummary = true;
- opts::raw::DumpFreePageMap = true;
+ opts::raw::DumpPageStats = true;
opts::raw::DumpStreamBlocks = true;
opts::raw::DumpTpiRecords = true;
opts::raw::DumpTpiHash = true;
extern llvm::cl::opt<bool> DumpHeaders;
extern llvm::cl::opt<bool> DumpStreamBlocks;
extern llvm::cl::opt<bool> DumpStreamSummary;
-extern llvm::cl::opt<bool> DumpFreePageMap;
+extern llvm::cl::opt<bool> DumpPageStats;
extern llvm::cl::opt<bool> DumpTpiHash;
extern llvm::cl::opt<bool> DumpTpiRecordBytes;
extern llvm::cl::opt<bool> DumpTpiRecords;