1 //===-- sancov.cc --------------------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file is a command-line tool for reading and analyzing sanitizer
12 //===----------------------------------------------------------------------===//
13 #include "llvm/ADT/STLExtras.h"
14 #include "llvm/ADT/StringExtras.h"
15 #include "llvm/ADT/Twine.h"
16 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
17 #include "llvm/MC/MCAsmInfo.h"
18 #include "llvm/MC/MCContext.h"
19 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
20 #include "llvm/MC/MCInst.h"
21 #include "llvm/MC/MCInstPrinter.h"
22 #include "llvm/MC/MCInstrAnalysis.h"
23 #include "llvm/MC/MCInstrInfo.h"
24 #include "llvm/MC/MCObjectFileInfo.h"
25 #include "llvm/MC/MCRegisterInfo.h"
26 #include "llvm/MC/MCSubtargetInfo.h"
27 #include "llvm/Object/Archive.h"
28 #include "llvm/Object/Binary.h"
29 #include "llvm/Object/COFF.h"
30 #include "llvm/Object/ELFObjectFile.h"
31 #include "llvm/Object/MachO.h"
32 #include "llvm/Object/ObjectFile.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Errc.h"
36 #include "llvm/Support/ErrorOr.h"
37 #include "llvm/Support/FileSystem.h"
38 #include "llvm/Support/LineIterator.h"
39 #include "llvm/Support/MD5.h"
40 #include "llvm/Support/ManagedStatic.h"
41 #include "llvm/Support/MemoryBuffer.h"
42 #include "llvm/Support/Path.h"
43 #include "llvm/Support/PrettyStackTrace.h"
44 #include "llvm/Support/Regex.h"
45 #include "llvm/Support/SHA1.h"
46 #include "llvm/Support/Signals.h"
47 #include "llvm/Support/SourceMgr.h"
48 #include "llvm/Support/SpecialCaseList.h"
49 #include "llvm/Support/TargetRegistry.h"
50 #include "llvm/Support/TargetSelect.h"
51 #include "llvm/Support/ToolOutputFile.h"
52 #include "llvm/Support/YAMLParser.h"
53 #include "llvm/Support/raw_ostream.h"
66 // --------- COMMAND LINE FLAGS ---------
69 CoveredFunctionsAction,
72 NotCoveredFunctionsAction,
79 cl::opt<ActionType> Action(
80 cl::desc("Action (required)"), cl::Required,
82 clEnumValN(PrintAction, "print", "Print coverage addresses"),
83 clEnumValN(PrintCovPointsAction, "print-coverage-pcs",
84 "Print coverage instrumentation points addresses."),
85 clEnumValN(CoveredFunctionsAction, "covered-functions",
86 "Print all covered funcions."),
87 clEnumValN(NotCoveredFunctionsAction, "not-covered-functions",
88 "Print all not covered funcions."),
89 clEnumValN(StatsAction, "print-coverage-stats",
90 "Print coverage statistics."),
91 clEnumValN(HtmlReportAction, "html-report",
92 "REMOVED. Use -symbolize & coverage-report-server.py."),
93 clEnumValN(SymbolizeAction, "symbolize",
94 "Produces a symbolized JSON report from binary report."),
95 clEnumValN(MergeAction, "merge", "Merges reports.")));
97 static cl::list<std::string>
98 ClInputFiles(cl::Positional, cl::OneOrMore,
99 cl::desc("<action> <binary files...> <.sancov files...> "
100 "<.symcov files...>"));
102 static cl::opt<bool> ClDemangle("demangle", cl::init(true),
103 cl::desc("Print demangled function name."));
106 ClSkipDeadFiles("skip-dead-files", cl::init(true),
107 cl::desc("Do not list dead source files in reports."));
109 static cl::opt<std::string> ClStripPathPrefix(
110 "strip_path_prefix", cl::init(""),
111 cl::desc("Strip this prefix from file paths in reports."));
113 static cl::opt<std::string>
114 ClBlacklist("blacklist", cl::init(""),
115 cl::desc("Blacklist file (sanitizer blacklist format)."));
117 static cl::opt<bool> ClUseDefaultBlacklist(
118 "use_default_blacklist", cl::init(true), cl::Hidden,
119 cl::desc("Controls if default blacklist should be used."));
121 static const char *const DefaultBlacklistStr = "fun:__sanitizer_.*\n"
122 "src:/usr/include/.*\n"
123 "src:.*/libc\\+\\+/.*\n";
125 // --------- FORMAT SPECIFICATION ---------
132 static const uint32_t BinCoverageMagic = 0xC0BFFFFF;
133 static const uint32_t Bitness32 = 0xFFFFFF32;
134 static const uint32_t Bitness64 = 0xFFFFFF64;
136 static Regex SancovFileRegex("(.*)\\.[0-9]+\\.sancov");
137 static Regex SymcovFileRegex(".*\\.symcov");
139 // --------- MAIN DATASTRUCTURES ----------
141 // Contents of .sancov file: list of coverage point addresses that were
144 explicit RawCoverage(std::unique_ptr<std::set<uint64_t>> Addrs)
145 : Addrs(std::move(Addrs)) {}
147 // Read binary .sancov file.
148 static ErrorOr<std::unique_ptr<RawCoverage>>
149 read(const std::string &FileName);
151 std::unique_ptr<std::set<uint64_t>> Addrs;
154 // Coverage point has an opaque Id and corresponds to multiple source locations.
155 struct CoveragePoint {
156 explicit CoveragePoint(const std::string &Id) : Id(Id) {}
159 SmallVector<DILineInfo, 1> Locs;
162 // Symcov file content: set of covered Ids plus information about all available
164 struct SymbolizedCoverage {
165 // Read json .symcov file.
166 static std::unique_ptr<SymbolizedCoverage> read(const std::string &InputFile);
168 std::set<std::string> CoveredIds;
169 std::string BinaryHash;
170 std::vector<CoveragePoint> Points;
173 struct CoverageStats {
180 // --------- ERROR HANDLING ---------
182 static void fail(const llvm::Twine &E) {
183 errs() << "Error: " << E << "\n";
187 static void failIf(bool B, const llvm::Twine &E) {
192 static void failIfError(std::error_code Error) {
195 errs() << "Error: " << Error.message() << "(" << Error.value() << ")\n";
199 template <typename T> static void failIfError(const ErrorOr<T> &E) {
200 failIfError(E.getError());
203 static void failIfError(Error Err) {
205 logAllUnhandledErrors(std::move(Err), errs(), "Error: ");
210 template <typename T> static void failIfError(Expected<T> &E) {
211 failIfError(E.takeError());
214 static void failIfNotEmpty(const llvm::Twine &E) {
220 template <typename T>
221 static void failIfEmpty(const std::unique_ptr<T> &Ptr,
222 const std::string &Message) {
228 // ----------- Coverage I/O ----------
229 template <typename T>
230 static void readInts(const char *Start, const char *End,
231 std::set<uint64_t> *Ints) {
232 const T *S = reinterpret_cast<const T *>(Start);
233 const T *E = reinterpret_cast<const T *>(End);
234 std::copy(S, E, std::inserter(*Ints, Ints->end()));
237 ErrorOr<std::unique_ptr<RawCoverage>>
238 RawCoverage::read(const std::string &FileName) {
239 ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
240 MemoryBuffer::getFile(FileName);
242 return BufOrErr.getError();
243 std::unique_ptr<MemoryBuffer> Buf = std::move(BufOrErr.get());
244 if (Buf->getBufferSize() < 8) {
245 errs() << "File too small (<8): " << Buf->getBufferSize() << '\n';
246 return make_error_code(errc::illegal_byte_sequence);
248 const FileHeader *Header =
249 reinterpret_cast<const FileHeader *>(Buf->getBufferStart());
251 if (Header->Magic != BinCoverageMagic) {
252 errs() << "Wrong magic: " << Header->Magic << '\n';
253 return make_error_code(errc::illegal_byte_sequence);
256 auto Addrs = llvm::make_unique<std::set<uint64_t>>();
258 switch (Header->Bitness) {
260 readInts<uint64_t>(Buf->getBufferStart() + 8, Buf->getBufferEnd(),
264 readInts<uint32_t>(Buf->getBufferStart() + 8, Buf->getBufferEnd(),
268 errs() << "Unsupported bitness: " << Header->Bitness << '\n';
269 return make_error_code(errc::illegal_byte_sequence);
272 return std::unique_ptr<RawCoverage>(new RawCoverage(std::move(Addrs)));
275 // Print coverage addresses.
276 raw_ostream &operator<<(raw_ostream &OS, const RawCoverage &CoverageData) {
277 for (auto Addr : *CoverageData.Addrs) {
285 static raw_ostream &operator<<(raw_ostream &OS, const CoverageStats &Stats) {
286 OS << "all-edges: " << Stats.AllPoints << "\n";
287 OS << "cov-edges: " << Stats.CovPoints << "\n";
288 OS << "all-functions: " << Stats.AllFns << "\n";
289 OS << "cov-functions: " << Stats.CovFns << "\n";
293 // Helper for writing out JSON. Handles indents and commas using
294 // scope variables for objects and arrays.
297 JSONWriter(raw_ostream &Out) : OS(Out) {}
298 JSONWriter(const JSONWriter &) = delete;
299 ~JSONWriter() { OS << "\n"; }
301 void operator<<(StringRef S) { printJSONStringLiteral(S, OS); }
303 // Helper RAII class to output JSON objects.
306 Object(JSONWriter *W, raw_ostream &OS) : W(W), OS(OS) {
310 Object(const Object &) = delete;
318 void key(StringRef Key) {
324 printJSONStringLiteral(Key, OS);
334 std::unique_ptr<Object> object() { return make_unique<Object>(this, OS); }
336 // Helper RAII class to output JSON arrays.
339 Array(raw_ostream &OS) : OS(OS) { OS << "["; }
340 Array(const Array &) = delete;
341 ~Array() { OS << "]"; }
353 std::unique_ptr<Array> array() { return make_unique<Array>(OS); }
356 void indent() { OS.indent(Indent * 2); }
358 static void printJSONStringLiteral(StringRef S, raw_ostream &OS) {
359 if (S.find('"') == std::string::npos) {
360 OS << "\"" << S << "\"";
364 for (char Ch : S.bytes()) {
376 // Output symbolized information for coverage points in JSON.
380 // '<function_name>' : {
381 // '<point_id'> : '<line_number>:'<column_number'.
386 static void operator<<(JSONWriter &W,
387 const std::vector<CoveragePoint> &Points) {
388 // Group points by file.
389 auto ByFile(W.object());
390 std::map<std::string, std::vector<const CoveragePoint *>> PointsByFile;
391 for (const auto &Point : Points) {
392 for (const DILineInfo &Loc : Point.Locs) {
393 PointsByFile[Loc.FileName].push_back(&Point);
397 for (const auto &P : PointsByFile) {
398 std::string FileName = P.first;
399 ByFile->key(FileName);
401 // Group points by function.
402 auto ByFn(W.object());
403 std::map<std::string, std::vector<const CoveragePoint *>> PointsByFn;
404 for (auto PointPtr : P.second) {
405 for (const DILineInfo &Loc : PointPtr->Locs) {
406 PointsByFn[Loc.FunctionName].push_back(PointPtr);
410 for (const auto &P : PointsByFn) {
411 std::string FunctionName = P.first;
412 std::set<std::string> WrittenIds;
414 ByFn->key(FunctionName);
416 // Output <point_id> : "<line>:<col>".
417 auto ById(W.object());
418 for (const CoveragePoint *Point : P.second) {
419 for (const auto &Loc : Point->Locs) {
420 if (Loc.FileName != FileName || Loc.FunctionName != FunctionName)
422 if (WrittenIds.find(Point->Id) != WrittenIds.end())
425 WrittenIds.insert(Point->Id);
426 ById->key(Point->Id);
427 W << (utostr(Loc.Line) + ":" + utostr(Loc.Column));
434 static void operator<<(JSONWriter &W, const SymbolizedCoverage &C) {
438 O->key("covered-points");
439 auto PointsArray(W.array());
441 for (const auto &P : C.CoveredIds) {
448 if (!C.BinaryHash.empty()) {
449 O->key("binary-hash");
455 O->key("point-symbol-info");
460 static std::string parseScalarString(yaml::Node *N) {
461 SmallString<64> StringStorage;
462 yaml::ScalarNode *S = dyn_cast<yaml::ScalarNode>(N);
463 failIf(!S, "expected string");
464 return S->getValue(StringStorage);
467 std::unique_ptr<SymbolizedCoverage>
468 SymbolizedCoverage::read(const std::string &InputFile) {
469 auto Coverage(make_unique<SymbolizedCoverage>());
471 std::map<std::string, CoveragePoint> Points;
472 ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
473 MemoryBuffer::getFile(InputFile);
474 failIfError(BufOrErr);
477 yaml::Stream S(**BufOrErr, SM);
479 yaml::document_iterator DI = S.begin();
480 failIf(DI == S.end(), "empty document: " + InputFile);
481 yaml::Node *Root = DI->getRoot();
482 failIf(!Root, "expecting root node: " + InputFile);
483 yaml::MappingNode *Top = dyn_cast<yaml::MappingNode>(Root);
484 failIf(!Top, "expecting mapping node: " + InputFile);
486 for (auto &KVNode : *Top) {
487 auto Key = parseScalarString(KVNode.getKey());
489 if (Key == "covered-points") {
490 yaml::SequenceNode *Points =
491 dyn_cast<yaml::SequenceNode>(KVNode.getValue());
492 failIf(!Points, "expected array: " + InputFile);
494 for (auto I = Points->begin(), E = Points->end(); I != E; ++I) {
495 Coverage->CoveredIds.insert(parseScalarString(&*I));
497 } else if (Key == "binary-hash") {
498 Coverage->BinaryHash = parseScalarString(KVNode.getValue());
499 } else if (Key == "point-symbol-info") {
500 yaml::MappingNode *PointSymbolInfo =
501 dyn_cast<yaml::MappingNode>(KVNode.getValue());
502 failIf(!PointSymbolInfo, "expected mapping node: " + InputFile);
504 for (auto &FileKVNode : *PointSymbolInfo) {
505 auto Filename = parseScalarString(FileKVNode.getKey());
507 yaml::MappingNode *FileInfo =
508 dyn_cast<yaml::MappingNode>(FileKVNode.getValue());
509 failIf(!FileInfo, "expected mapping node: " + InputFile);
511 for (auto &FunctionKVNode : *FileInfo) {
512 auto FunctionName = parseScalarString(FunctionKVNode.getKey());
514 yaml::MappingNode *FunctionInfo =
515 dyn_cast<yaml::MappingNode>(FunctionKVNode.getValue());
516 failIf(!FunctionInfo, "expected mapping node: " + InputFile);
518 for (auto &PointKVNode : *FunctionInfo) {
519 auto PointId = parseScalarString(PointKVNode.getKey());
520 auto Loc = parseScalarString(PointKVNode.getValue());
522 size_t ColonPos = Loc.find(':');
523 failIf(ColonPos == std::string::npos, "expected ':': " + InputFile);
525 auto LineStr = Loc.substr(0, ColonPos);
526 auto ColStr = Loc.substr(ColonPos + 1, Loc.size());
528 if (Points.find(PointId) == Points.end())
529 Points.insert(std::make_pair(PointId, CoveragePoint(PointId)));
532 LineInfo.FileName = Filename;
533 LineInfo.FunctionName = FunctionName;
535 LineInfo.Line = std::strtoul(LineStr.c_str(), &End, 10);
536 LineInfo.Column = std::strtoul(ColStr.c_str(), &End, 10);
538 CoveragePoint *CoveragePoint = &Points.find(PointId)->second;
539 CoveragePoint->Locs.push_back(LineInfo);
544 errs() << "Ignoring unknown key: " << Key << "\n";
548 for (auto &KV : Points) {
549 Coverage->Points.push_back(KV.second);
555 // ---------- MAIN FUNCTIONALITY ----------
557 std::string stripPathPrefix(std::string Path) {
558 if (ClStripPathPrefix.empty())
560 size_t Pos = Path.find(ClStripPathPrefix);
561 if (Pos == std::string::npos)
563 return Path.substr(Pos + ClStripPathPrefix.size());
566 static std::unique_ptr<symbolize::LLVMSymbolizer> createSymbolizer() {
567 symbolize::LLVMSymbolizer::Options SymbolizerOptions;
568 SymbolizerOptions.Demangle = ClDemangle;
569 SymbolizerOptions.UseSymbolTable = true;
570 return std::unique_ptr<symbolize::LLVMSymbolizer>(
571 new symbolize::LLVMSymbolizer(SymbolizerOptions));
574 static std::string normalizeFilename(const std::string &FileName) {
575 SmallString<256> S(FileName);
576 sys::path::remove_dots(S, /* remove_dot_dot */ true);
577 return stripPathPrefix(S.str().str());
583 : DefaultBlacklist(createDefaultBlacklist()),
584 UserBlacklist(createUserBlacklist()) {}
586 bool isBlacklisted(const DILineInfo &I) {
587 if (DefaultBlacklist && DefaultBlacklist->inSection("fun", I.FunctionName))
589 if (DefaultBlacklist && DefaultBlacklist->inSection("src", I.FileName))
591 if (UserBlacklist && UserBlacklist->inSection("fun", I.FunctionName))
593 if (UserBlacklist && UserBlacklist->inSection("src", I.FileName))
599 static std::unique_ptr<SpecialCaseList> createDefaultBlacklist() {
600 if (!ClUseDefaultBlacklist)
601 return std::unique_ptr<SpecialCaseList>();
602 std::unique_ptr<MemoryBuffer> MB =
603 MemoryBuffer::getMemBuffer(DefaultBlacklistStr);
605 auto Blacklist = SpecialCaseList::create(MB.get(), Error);
606 failIfNotEmpty(Error);
610 static std::unique_ptr<SpecialCaseList> createUserBlacklist() {
611 if (ClBlacklist.empty())
612 return std::unique_ptr<SpecialCaseList>();
614 return SpecialCaseList::createOrDie({{ClBlacklist}});
616 std::unique_ptr<SpecialCaseList> DefaultBlacklist;
617 std::unique_ptr<SpecialCaseList> UserBlacklist;
620 static std::vector<CoveragePoint>
621 getCoveragePoints(const std::string &ObjectFile,
622 const std::set<uint64_t> &Addrs,
623 const std::set<uint64_t> &CoveredAddrs) {
624 std::vector<CoveragePoint> Result;
625 auto Symbolizer(createSymbolizer());
628 std::set<std::string> CoveredFiles;
629 if (ClSkipDeadFiles) {
630 for (auto Addr : CoveredAddrs) {
631 auto LineInfo = Symbolizer->symbolizeCode(ObjectFile, Addr);
632 failIfError(LineInfo);
633 CoveredFiles.insert(LineInfo->FileName);
634 auto InliningInfo = Symbolizer->symbolizeInlinedCode(ObjectFile, Addr);
635 failIfError(InliningInfo);
636 for (uint32_t I = 0; I < InliningInfo->getNumberOfFrames(); ++I) {
637 auto FrameInfo = InliningInfo->getFrame(I);
638 CoveredFiles.insert(FrameInfo.FileName);
643 for (auto Addr : Addrs) {
644 std::set<DILineInfo> Infos; // deduplicate debug info.
646 auto LineInfo = Symbolizer->symbolizeCode(ObjectFile, Addr);
647 failIfError(LineInfo);
648 if (ClSkipDeadFiles &&
649 CoveredFiles.find(LineInfo->FileName) == CoveredFiles.end())
651 LineInfo->FileName = normalizeFilename(LineInfo->FileName);
652 if (B.isBlacklisted(*LineInfo))
655 auto Id = utohexstr(Addr, true);
656 auto Point = CoveragePoint(Id);
657 Infos.insert(*LineInfo);
658 Point.Locs.push_back(*LineInfo);
660 auto InliningInfo = Symbolizer->symbolizeInlinedCode(ObjectFile, Addr);
661 failIfError(InliningInfo);
662 for (uint32_t I = 0; I < InliningInfo->getNumberOfFrames(); ++I) {
663 auto FrameInfo = InliningInfo->getFrame(I);
664 if (ClSkipDeadFiles &&
665 CoveredFiles.find(FrameInfo.FileName) == CoveredFiles.end())
667 FrameInfo.FileName = normalizeFilename(FrameInfo.FileName);
668 if (B.isBlacklisted(FrameInfo))
670 if (Infos.find(FrameInfo) == Infos.end()) {
671 Infos.insert(FrameInfo);
672 Point.Locs.push_back(FrameInfo);
676 Result.push_back(Point);
682 static bool isCoveragePointSymbol(StringRef Name) {
683 return Name == "__sanitizer_cov" || Name == "__sanitizer_cov_with_check" ||
684 Name == "__sanitizer_cov_trace_func_enter" ||
685 Name == "__sanitizer_cov_trace_pc_guard" ||
686 // Mac has '___' prefix
687 Name == "___sanitizer_cov" || Name == "___sanitizer_cov_with_check" ||
688 Name == "___sanitizer_cov_trace_func_enter" ||
689 Name == "___sanitizer_cov_trace_pc_guard";
692 // Locate __sanitizer_cov* function addresses inside the stubs table on MachO.
693 static void findMachOIndirectCovFunctions(const object::MachOObjectFile &O,
694 std::set<uint64_t> *Result) {
695 MachO::dysymtab_command Dysymtab = O.getDysymtabLoadCommand();
696 MachO::symtab_command Symtab = O.getSymtabLoadCommand();
698 for (const auto &Load : O.load_commands()) {
699 if (Load.C.cmd == MachO::LC_SEGMENT_64) {
700 MachO::segment_command_64 Seg = O.getSegment64LoadCommand(Load);
701 for (unsigned J = 0; J < Seg.nsects; ++J) {
702 MachO::section_64 Sec = O.getSection64(Load, J);
704 uint32_t SectionType = Sec.flags & MachO::SECTION_TYPE;
705 if (SectionType == MachO::S_SYMBOL_STUBS) {
706 uint32_t Stride = Sec.reserved2;
707 uint32_t Cnt = Sec.size / Stride;
708 uint32_t N = Sec.reserved1;
709 for (uint32_t J = 0; J < Cnt && N + J < Dysymtab.nindirectsyms; J++) {
710 uint32_t IndirectSymbol =
711 O.getIndirectSymbolTableEntry(Dysymtab, N + J);
712 uint64_t Addr = Sec.addr + J * Stride;
713 if (IndirectSymbol < Symtab.nsyms) {
714 object::SymbolRef Symbol = *(O.getSymbolByIndex(IndirectSymbol));
715 Expected<StringRef> Name = Symbol.getName();
717 if (isCoveragePointSymbol(Name.get())) {
718 Result->insert(Addr);
725 if (Load.C.cmd == MachO::LC_SEGMENT) {
726 errs() << "ERROR: 32 bit MachO binaries not supported\n";
731 // Locate __sanitizer_cov* function addresses that are used for coverage
733 static std::set<uint64_t>
734 findSanitizerCovFunctions(const object::ObjectFile &O) {
735 std::set<uint64_t> Result;
737 for (const object::SymbolRef &Symbol : O.symbols()) {
738 Expected<uint64_t> AddressOrErr = Symbol.getAddress();
739 failIfError(AddressOrErr);
740 uint64_t Address = AddressOrErr.get();
742 Expected<StringRef> NameOrErr = Symbol.getName();
743 failIfError(NameOrErr);
744 StringRef Name = NameOrErr.get();
746 if (!(Symbol.getFlags() & object::BasicSymbolRef::SF_Undefined) &&
747 isCoveragePointSymbol(Name)) {
748 Result.insert(Address);
752 if (const auto *CO = dyn_cast<object::COFFObjectFile>(&O)) {
753 for (const object::ExportDirectoryEntryRef &Export :
754 CO->export_directories()) {
756 std::error_code EC = Export.getExportRVA(RVA);
760 EC = Export.getSymbolName(Name);
763 if (isCoveragePointSymbol(Name))
764 Result.insert(CO->getImageBase() + RVA);
768 if (const auto *MO = dyn_cast<object::MachOObjectFile>(&O)) {
769 findMachOIndirectCovFunctions(*MO, &Result);
775 // Locate addresses of all coverage points in a file. Coverage point
776 // is defined as the 'address of instruction following __sanitizer_cov
778 static void getObjectCoveragePoints(const object::ObjectFile &O,
779 std::set<uint64_t> *Addrs) {
780 Triple TheTriple("unknown-unknown-unknown");
781 TheTriple.setArch(Triple::ArchType(O.getArch()));
782 auto TripleName = TheTriple.getTriple();
785 const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
786 failIfNotEmpty(Error);
788 std::unique_ptr<const MCSubtargetInfo> STI(
789 TheTarget->createMCSubtargetInfo(TripleName, "", ""));
790 failIfEmpty(STI, "no subtarget info for target " + TripleName);
792 std::unique_ptr<const MCRegisterInfo> MRI(
793 TheTarget->createMCRegInfo(TripleName));
794 failIfEmpty(MRI, "no register info for target " + TripleName);
796 std::unique_ptr<const MCAsmInfo> AsmInfo(
797 TheTarget->createMCAsmInfo(*MRI, TripleName));
798 failIfEmpty(AsmInfo, "no asm info for target " + TripleName);
800 std::unique_ptr<const MCObjectFileInfo> MOFI(new MCObjectFileInfo);
801 MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
802 std::unique_ptr<MCDisassembler> DisAsm(
803 TheTarget->createMCDisassembler(*STI, Ctx));
804 failIfEmpty(DisAsm, "no disassembler info for target " + TripleName);
806 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
807 failIfEmpty(MII, "no instruction info for target " + TripleName);
809 std::unique_ptr<const MCInstrAnalysis> MIA(
810 TheTarget->createMCInstrAnalysis(MII.get()));
811 failIfEmpty(MIA, "no instruction analysis info for target " + TripleName);
813 auto SanCovAddrs = findSanitizerCovFunctions(O);
814 if (SanCovAddrs.empty())
815 fail("__sanitizer_cov* functions not found");
817 for (object::SectionRef Section : O.sections()) {
818 if (Section.isVirtual() || !Section.isText()) // llvm-objdump does the same.
820 uint64_t SectionAddr = Section.getAddress();
821 uint64_t SectSize = Section.getSize();
826 failIfError(Section.getContents(BytesStr));
827 ArrayRef<uint8_t> Bytes(reinterpret_cast<const uint8_t *>(BytesStr.data()),
830 for (uint64_t Index = 0, Size = 0; Index < Section.getSize();
833 if (!DisAsm->getInstruction(Inst, Size, Bytes.slice(Index),
834 SectionAddr + Index, nulls(), nulls())) {
839 uint64_t Addr = Index + SectionAddr;
840 // Sanitizer coverage uses the address of the next instruction - 1.
841 uint64_t CovPoint = Addr + Size - 1;
843 if (MIA->isCall(Inst) &&
844 MIA->evaluateBranch(Inst, SectionAddr + Index, Size, Target) &&
845 SanCovAddrs.find(Target) != SanCovAddrs.end())
846 Addrs->insert(CovPoint);
852 visitObjectFiles(const object::Archive &A,
853 function_ref<void(const object::ObjectFile &)> Fn) {
854 Error Err = Error::success();
855 for (auto &C : A.children(Err)) {
856 Expected<std::unique_ptr<object::Binary>> ChildOrErr = C.getAsBinary();
857 failIfError(ChildOrErr);
858 if (auto *O = dyn_cast<object::ObjectFile>(&*ChildOrErr.get()))
861 failIfError(object::object_error::invalid_file_type);
863 failIfError(std::move(Err));
867 visitObjectFiles(const std::string &FileName,
868 function_ref<void(const object::ObjectFile &)> Fn) {
869 Expected<object::OwningBinary<object::Binary>> BinaryOrErr =
870 object::createBinary(FileName);
872 failIfError(BinaryOrErr);
874 object::Binary &Binary = *BinaryOrErr.get().getBinary();
875 if (object::Archive *A = dyn_cast<object::Archive>(&Binary))
876 visitObjectFiles(*A, Fn);
877 else if (object::ObjectFile *O = dyn_cast<object::ObjectFile>(&Binary))
880 failIfError(object::object_error::invalid_file_type);
883 static std::set<uint64_t>
884 findSanitizerCovFunctions(const std::string &FileName) {
885 std::set<uint64_t> Result;
886 visitObjectFiles(FileName, [&](const object::ObjectFile &O) {
887 auto Addrs = findSanitizerCovFunctions(O);
888 Result.insert(Addrs.begin(), Addrs.end());
893 // Locate addresses of all coverage points in a file. Coverage point
894 // is defined as the 'address of instruction following __sanitizer_cov
896 static std::set<uint64_t> findCoveragePointAddrs(const std::string &FileName) {
897 std::set<uint64_t> Result;
898 visitObjectFiles(FileName, [&](const object::ObjectFile &O) {
899 getObjectCoveragePoints(O, &Result);
904 static void printCovPoints(const std::string &ObjFile, raw_ostream &OS) {
905 for (uint64_t Addr : findCoveragePointAddrs(ObjFile)) {
912 static ErrorOr<bool> isCoverageFile(const std::string &FileName) {
913 auto ShortFileName = llvm::sys::path::filename(FileName);
914 if (!SancovFileRegex.match(ShortFileName))
917 ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
918 MemoryBuffer::getFile(FileName);
920 errs() << "Warning: " << BufOrErr.getError().message() << "("
921 << BufOrErr.getError().value()
922 << "), filename: " << llvm::sys::path::filename(FileName) << "\n";
923 return BufOrErr.getError();
925 std::unique_ptr<MemoryBuffer> Buf = std::move(BufOrErr.get());
926 if (Buf->getBufferSize() < 8) {
929 const FileHeader *Header =
930 reinterpret_cast<const FileHeader *>(Buf->getBufferStart());
931 return Header->Magic == BinCoverageMagic;
934 static bool isSymbolizedCoverageFile(const std::string &FileName) {
935 auto ShortFileName = llvm::sys::path::filename(FileName);
936 return SymcovFileRegex.match(ShortFileName);
939 static std::unique_ptr<SymbolizedCoverage>
940 symbolize(const RawCoverage &Data, const std::string ObjectFile) {
941 auto Coverage = make_unique<SymbolizedCoverage>();
943 ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
944 MemoryBuffer::getFile(ObjectFile);
945 failIfError(BufOrErr);
947 Hasher.update((*BufOrErr)->getBuffer());
948 Coverage->BinaryHash = toHex(Hasher.final());
951 auto Symbolizer(createSymbolizer());
953 for (uint64_t Addr : *Data.Addrs) {
954 auto LineInfo = Symbolizer->symbolizeCode(ObjectFile, Addr);
955 failIfError(LineInfo);
956 if (B.isBlacklisted(*LineInfo))
959 Coverage->CoveredIds.insert(utohexstr(Addr, true));
962 std::set<uint64_t> AllAddrs = findCoveragePointAddrs(ObjectFile);
963 if (!std::includes(AllAddrs.begin(), AllAddrs.end(), Data.Addrs->begin(),
964 Data.Addrs->end())) {
965 fail("Coverage points in binary and .sancov file do not match.");
967 Coverage->Points = getCoveragePoints(ObjectFile, AllAddrs, *Data.Addrs);
972 bool operator<(const FileFn &RHS) const {
973 return std::tie(FileName, FunctionName) <
974 std::tie(RHS.FileName, RHS.FunctionName);
977 std::string FileName;
978 std::string FunctionName;
981 static std::set<FileFn>
982 computeFunctions(const std::vector<CoveragePoint> &Points) {
983 std::set<FileFn> Fns;
984 for (const auto &Point : Points) {
985 for (const auto &Loc : Point.Locs) {
986 Fns.insert(FileFn{Loc.FileName, Loc.FunctionName});
992 static std::set<FileFn>
993 computeNotCoveredFunctions(const SymbolizedCoverage &Coverage) {
994 auto Fns = computeFunctions(Coverage.Points);
996 for (const auto &Point : Coverage.Points) {
997 if (Coverage.CoveredIds.find(Point.Id) == Coverage.CoveredIds.end())
1000 for (const auto &Loc : Point.Locs) {
1001 Fns.erase(FileFn{Loc.FileName, Loc.FunctionName});
1008 static std::set<FileFn>
1009 computeCoveredFunctions(const SymbolizedCoverage &Coverage) {
1010 auto AllFns = computeFunctions(Coverage.Points);
1011 std::set<FileFn> Result;
1013 for (const auto &Point : Coverage.Points) {
1014 if (Coverage.CoveredIds.find(Point.Id) == Coverage.CoveredIds.end())
1017 for (const auto &Loc : Point.Locs) {
1018 Result.insert(FileFn{Loc.FileName, Loc.FunctionName});
1025 typedef std::map<FileFn, std::pair<uint32_t, uint32_t>> FunctionLocs;
1026 // finds first location in a file for each function.
1027 static FunctionLocs resolveFunctions(const SymbolizedCoverage &Coverage,
1028 const std::set<FileFn> &Fns) {
1029 FunctionLocs Result;
1030 for (const auto &Point : Coverage.Points) {
1031 for (const auto &Loc : Point.Locs) {
1032 FileFn Fn = FileFn{Loc.FileName, Loc.FunctionName};
1033 if (Fns.find(Fn) == Fns.end())
1036 auto P = std::make_pair(Loc.Line, Loc.Column);
1037 auto I = Result.find(Fn);
1038 if (I == Result.end() || I->second > P) {
1046 static void printFunctionLocs(const FunctionLocs &FnLocs, raw_ostream &OS) {
1047 for (const auto &P : FnLocs) {
1048 OS << stripPathPrefix(P.first.FileName) << ":" << P.second.first << " "
1049 << P.first.FunctionName << "\n";
1052 CoverageStats computeStats(const SymbolizedCoverage &Coverage) {
1053 CoverageStats Stats = {Coverage.Points.size(), Coverage.CoveredIds.size(),
1054 computeFunctions(Coverage.Points).size(),
1055 computeCoveredFunctions(Coverage).size()};
1059 // Print list of covered functions.
1060 // Line format: <file_name>:<line> <function_name>
1061 static void printCoveredFunctions(const SymbolizedCoverage &CovData,
1063 auto CoveredFns = computeCoveredFunctions(CovData);
1064 printFunctionLocs(resolveFunctions(CovData, CoveredFns), OS);
1067 // Print list of not covered functions.
1068 // Line format: <file_name>:<line> <function_name>
1069 static void printNotCoveredFunctions(const SymbolizedCoverage &CovData,
1071 auto NotCoveredFns = computeNotCoveredFunctions(CovData);
1072 printFunctionLocs(resolveFunctions(CovData, NotCoveredFns), OS);
1075 // Read list of files and merges their coverage info.
1076 static void readAndPrintRawCoverage(const std::vector<std::string> &FileNames,
1078 std::vector<std::unique_ptr<RawCoverage>> Covs;
1079 for (const auto &FileName : FileNames) {
1080 auto Cov = RawCoverage::read(FileName);
1087 static std::unique_ptr<SymbolizedCoverage>
1088 merge(const std::vector<std::unique_ptr<SymbolizedCoverage>> &Coverages) {
1089 auto Result = make_unique<SymbolizedCoverage>();
1091 for (size_t I = 0; I < Coverages.size(); ++I) {
1092 const SymbolizedCoverage &Coverage = *Coverages[I];
1094 if (Coverages.size() > 1) {
1095 // prefix is not needed when there's only one file.
1099 for (const auto &Id : Coverage.CoveredIds) {
1100 Result->CoveredIds.insert(Prefix + Id);
1103 for (const auto &CovPoint : Coverage.Points) {
1104 CoveragePoint NewPoint(CovPoint);
1105 NewPoint.Id = Prefix + CovPoint.Id;
1106 Result->Points.push_back(NewPoint);
1110 if (Coverages.size() == 1) {
1111 Result->BinaryHash = Coverages[0]->BinaryHash;
1117 static std::unique_ptr<SymbolizedCoverage>
1118 readSymbolizeAndMergeCmdArguments(std::vector<std::string> FileNames) {
1119 std::vector<std::unique_ptr<SymbolizedCoverage>> Coverages;
1122 // Short name => file name.
1123 std::map<std::string, std::string> ObjFiles;
1124 std::string FirstObjFile;
1125 std::set<std::string> CovFiles;
1127 // Partition input values into coverage/object files.
1128 for (const auto &FileName : FileNames) {
1129 if (isSymbolizedCoverageFile(FileName)) {
1130 Coverages.push_back(SymbolizedCoverage::read(FileName));
1133 auto ErrorOrIsCoverage = isCoverageFile(FileName);
1134 if (!ErrorOrIsCoverage)
1136 if (ErrorOrIsCoverage.get()) {
1137 CovFiles.insert(FileName);
1139 auto ShortFileName = llvm::sys::path::filename(FileName);
1140 if (ObjFiles.find(ShortFileName) != ObjFiles.end()) {
1141 fail("Duplicate binary file with a short name: " + ShortFileName);
1144 ObjFiles[ShortFileName] = FileName;
1145 if (FirstObjFile.empty())
1146 FirstObjFile = FileName;
1150 SmallVector<StringRef, 2> Components;
1152 // Object file => list of corresponding coverage file names.
1153 std::map<std::string, std::vector<std::string>> CoverageByObjFile;
1154 for (const auto &FileName : CovFiles) {
1155 auto ShortFileName = llvm::sys::path::filename(FileName);
1156 auto Ok = SancovFileRegex.match(ShortFileName, &Components);
1158 fail("Can't match coverage file name against "
1159 "<module_name>.<pid>.sancov pattern: " +
1163 auto Iter = ObjFiles.find(Components[1]);
1164 if (Iter == ObjFiles.end()) {
1165 fail("Object file for coverage not found: " + FileName);
1168 CoverageByObjFile[Iter->second].push_back(FileName);
1171 // Read raw coverage and symbolize it.
1172 for (const auto &Pair : CoverageByObjFile) {
1173 if (findSanitizerCovFunctions(Pair.first).empty()) {
1175 << "Ignoring " << Pair.first
1176 << " and its coverage because __sanitizer_cov* functions were not "
1181 for (const std::string &CoverageFile : Pair.second) {
1182 auto DataOrError = RawCoverage::read(CoverageFile);
1183 failIfError(DataOrError);
1184 Coverages.push_back(symbolize(*DataOrError.get(), Pair.first));
1189 return merge(Coverages);
1194 int main(int Argc, char **Argv) {
1195 // Print stack trace if we signal out.
1196 sys::PrintStackTraceOnErrorSignal(Argv[0]);
1197 PrettyStackTraceProgram X(Argc, Argv);
1198 llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
1200 llvm::InitializeAllTargetInfos();
1201 llvm::InitializeAllTargetMCs();
1202 llvm::InitializeAllDisassemblers();
1204 cl::ParseCommandLineOptions(Argc, Argv,
1205 "Sanitizer Coverage Processing Tool (sancov)\n\n"
1206 " This tool can extract various coverage-related information from: \n"
1207 " coverage-instrumented binary files, raw .sancov files and their "
1208 "symbolized .symcov version.\n"
1209 " Depending on chosen action the tool expects different input files:\n"
1210 " -print-coverage-pcs - coverage-instrumented binary files\n"
1211 " -print-coverage - .sancov files\n"
1212 " <other actions> - .sancov files & corresponding binary "
1213 "files, .symcov files\n"
1216 // -print doesn't need object files.
1217 if (Action == PrintAction) {
1218 readAndPrintRawCoverage(ClInputFiles, outs());
1220 } else if (Action == PrintCovPointsAction) {
1221 // -print-coverage-points doesn't need coverage files.
1222 for (const std::string &ObjFile : ClInputFiles) {
1223 printCovPoints(ObjFile, outs());
1228 auto Coverage = readSymbolizeAndMergeCmdArguments(ClInputFiles);
1229 failIf(!Coverage, "No valid coverage files given.");
1232 case CoveredFunctionsAction: {
1233 printCoveredFunctions(*Coverage, outs());
1236 case NotCoveredFunctionsAction: {
1237 printNotCoveredFunctions(*Coverage, outs());
1241 outs() << computeStats(*Coverage);
1245 case SymbolizeAction: { // merge & symbolize are synonims.
1246 JSONWriter W(outs());
1250 case HtmlReportAction:
1251 errs() << "-html-report option is removed: "
1252 "use -symbolize & coverage-report-server.py instead\n";
1255 case PrintCovPointsAction:
1256 llvm_unreachable("unsupported action");