1 //===-- MachOUtils.cpp - Mach-o specific helpers for dsymutil ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MachOUtils.h"
11 #include "BinaryHolder.h"
13 #include "LinkUtils.h"
14 #include "NonRelocatableStringpool.h"
15 #include "llvm/MC/MCAsmLayout.h"
16 #include "llvm/MC/MCMachObjectWriter.h"
17 #include "llvm/MC/MCObjectStreamer.h"
18 #include "llvm/MC/MCSectionMachO.h"
19 #include "llvm/MC/MCStreamer.h"
20 #include "llvm/Object/MachO.h"
21 #include "llvm/Support/FileUtilities.h"
22 #include "llvm/Support/Program.h"
23 #include "llvm/Support/WithColor.h"
24 #include "llvm/Support/raw_ostream.h"
28 namespace MachOUtils {
30 std::string getArchName(StringRef Arch) {
31 if (Arch.startswith("thumb"))
32 return (llvm::Twine("arm") + Arch.drop_front(5)).str();
36 static bool runLipo(StringRef SDKPath, SmallVectorImpl<StringRef> &Args) {
37 auto Path = sys::findProgramByName("lipo", makeArrayRef(SDKPath));
39 Path = sys::findProgramByName("lipo");
42 WithColor::error() << "lipo: " << Path.getError().message() << "\n";
47 int result = sys::ExecuteAndWait(*Path, Args, None, {}, 0, 0, &ErrMsg);
49 WithColor::error() << "lipo: " << ErrMsg << "\n";
56 bool generateUniversalBinary(SmallVectorImpl<ArchAndFilename> &ArchFiles,
57 StringRef OutputFileName,
58 const LinkOptions &Options, StringRef SDKPath) {
59 // No need to merge one file into a universal fat binary. First, try
60 // to move it (rename) to the final location. If that fails because
61 // of cross-device link issues then copy and delete.
62 if (ArchFiles.size() == 1) {
63 StringRef From(ArchFiles.front().Path);
64 if (sys::fs::rename(From, OutputFileName)) {
65 if (std::error_code EC = sys::fs::copy_file(From, OutputFileName)) {
66 WithColor::error() << "while copying " << From << " to "
67 << OutputFileName << ": " << EC.message() << "\n";
70 sys::fs::remove(From);
75 SmallVector<StringRef, 8> Args;
76 Args.push_back("lipo");
77 Args.push_back("-create");
79 for (auto &Thin : ArchFiles)
80 Args.push_back(Thin.Path);
82 // Align segments to match dsymutil-classic alignment
83 for (auto &Thin : ArchFiles) {
84 Thin.Arch = getArchName(Thin.Arch);
85 Args.push_back("-segalign");
86 Args.push_back(Thin.Arch);
90 Args.push_back("-output");
91 Args.push_back(OutputFileName.data());
93 if (Options.Verbose) {
94 outs() << "Running lipo\n";
100 return Options.NoOutput ? true : runLipo(SDKPath, Args);
103 // Return a MachO::segment_command_64 that holds the same values as the passed
104 // MachO::segment_command. We do that to avoid having to duplicate the logic
105 // for 32bits and 64bits segments.
106 struct MachO::segment_command_64 adaptFrom32bits(MachO::segment_command Seg) {
107 MachO::segment_command_64 Seg64;
109 Seg64.cmdsize = Seg.cmdsize;
110 memcpy(Seg64.segname, Seg.segname, sizeof(Seg.segname));
111 Seg64.vmaddr = Seg.vmaddr;
112 Seg64.vmsize = Seg.vmsize;
113 Seg64.fileoff = Seg.fileoff;
114 Seg64.filesize = Seg.filesize;
115 Seg64.maxprot = Seg.maxprot;
116 Seg64.initprot = Seg.initprot;
117 Seg64.nsects = Seg.nsects;
118 Seg64.flags = Seg.flags;
122 // Iterate on all \a Obj segments, and apply \a Handler to them.
123 template <typename FunctionTy>
124 static void iterateOnSegments(const object::MachOObjectFile &Obj,
125 FunctionTy Handler) {
126 for (const auto &LCI : Obj.load_commands()) {
127 MachO::segment_command_64 Segment;
128 if (LCI.C.cmd == MachO::LC_SEGMENT)
129 Segment = adaptFrom32bits(Obj.getSegmentLoadCommand(LCI));
130 else if (LCI.C.cmd == MachO::LC_SEGMENT_64)
131 Segment = Obj.getSegment64LoadCommand(LCI);
139 // Transfer the symbols described by \a NList to \a NewSymtab which is just the
140 // raw contents of the symbol table for the dSYM companion file. \returns
141 // whether the symbol was transferred or not.
142 template <typename NListTy>
143 static bool transferSymbol(NListTy NList, bool IsLittleEndian,
144 StringRef Strings, SmallVectorImpl<char> &NewSymtab,
145 NonRelocatableStringpool &NewStrings,
147 // Do not transfer undefined symbols, we want real addresses.
148 if ((NList.n_type & MachO::N_TYPE) == MachO::N_UNDF)
151 StringRef Name = StringRef(Strings.begin() + NList.n_strx);
154 (NList.n_type != MachO::N_SO) || (!Name.empty() && Name[0] != '\0');
156 } else if (NList.n_type == MachO::N_SO) {
161 // FIXME: The + 1 is here to mimic dsymutil-classic that has 2 empty
162 // strings at the start of the generated string table (There is
163 // corresponding code in the string table emission).
164 NList.n_strx = NewStrings.getStringOffset(Name) + 1;
165 if (IsLittleEndian != sys::IsLittleEndianHost)
166 MachO::swapStruct(NList);
168 NewSymtab.append(reinterpret_cast<char *>(&NList),
169 reinterpret_cast<char *>(&NList + 1));
173 // Wrapper around transferSymbol to transfer all of \a Obj symbols
174 // to \a NewSymtab. This function does not write in the output file.
175 // \returns the number of symbols in \a NewSymtab.
176 static unsigned transferSymbols(const object::MachOObjectFile &Obj,
177 SmallVectorImpl<char> &NewSymtab,
178 NonRelocatableStringpool &NewStrings) {
180 StringRef Strings = Obj.getStringTableData();
181 bool IsLittleEndian = Obj.isLittleEndian();
182 bool InDebugNote = false;
185 for (const object::SymbolRef &Symbol : Obj.symbols()) {
186 object::DataRefImpl DRI = Symbol.getRawDataRefImpl();
187 if (transferSymbol(Obj.getSymbol64TableEntry(DRI), IsLittleEndian,
188 Strings, NewSymtab, NewStrings, InDebugNote))
192 for (const object::SymbolRef &Symbol : Obj.symbols()) {
193 object::DataRefImpl DRI = Symbol.getRawDataRefImpl();
194 if (transferSymbol(Obj.getSymbolTableEntry(DRI), IsLittleEndian, Strings,
195 NewSymtab, NewStrings, InDebugNote))
202 static MachO::section
203 getSection(const object::MachOObjectFile &Obj,
204 const MachO::segment_command &Seg,
205 const object::MachOObjectFile::LoadCommandInfo &LCI, unsigned Idx) {
206 return Obj.getSection(LCI, Idx);
209 static MachO::section_64
210 getSection(const object::MachOObjectFile &Obj,
211 const MachO::segment_command_64 &Seg,
212 const object::MachOObjectFile::LoadCommandInfo &LCI, unsigned Idx) {
213 return Obj.getSection64(LCI, Idx);
216 // Transfer \a Segment from \a Obj to the output file. This calls into \a Writer
217 // to write these load commands directly in the output file at the current
219 // The function also tries to find a hole in the address map to fit the __DWARF
220 // segment of \a DwarfSegmentSize size. \a EndAddress is updated to point at the
221 // highest segment address.
222 // When the __LINKEDIT segment is transferred, its offset and size are set resp.
223 // to \a LinkeditOffset and \a LinkeditSize.
224 template <typename SegmentTy>
225 static void transferSegmentAndSections(
226 const object::MachOObjectFile::LoadCommandInfo &LCI, SegmentTy Segment,
227 const object::MachOObjectFile &Obj, MachObjectWriter &Writer,
228 uint64_t LinkeditOffset, uint64_t LinkeditSize, uint64_t DwarfSegmentSize,
229 uint64_t &GapForDwarf, uint64_t &EndAddress) {
230 if (StringRef("__DWARF") == Segment.segname)
233 Segment.fileoff = Segment.filesize = 0;
235 if (StringRef("__LINKEDIT") == Segment.segname) {
236 Segment.fileoff = LinkeditOffset;
237 Segment.filesize = LinkeditSize;
238 // Resize vmsize by rounding to the page size.
239 Segment.vmsize = alignTo(LinkeditSize, 0x1000);
242 // Check if the end address of the last segment and our current
243 // start address leave a sufficient gap to store the __DWARF
245 uint64_t PrevEndAddress = EndAddress;
246 EndAddress = alignTo(EndAddress, 0x1000);
247 if (GapForDwarf == UINT64_MAX && Segment.vmaddr > EndAddress &&
248 Segment.vmaddr - EndAddress >= DwarfSegmentSize)
249 GapForDwarf = EndAddress;
251 // The segments are not necessarily sorted by their vmaddr.
253 std::max<uint64_t>(PrevEndAddress, Segment.vmaddr + Segment.vmsize);
254 unsigned nsects = Segment.nsects;
255 if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
256 MachO::swapStruct(Segment);
257 Writer.W.OS.write(reinterpret_cast<char *>(&Segment), sizeof(Segment));
258 for (unsigned i = 0; i < nsects; ++i) {
259 auto Sect = getSection(Obj, Segment, LCI, i);
260 Sect.offset = Sect.reloff = Sect.nreloc = 0;
261 if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
262 MachO::swapStruct(Sect);
263 Writer.W.OS.write(reinterpret_cast<char *>(&Sect), sizeof(Sect));
267 // Write the __DWARF segment load command to the output file.
268 static void createDwarfSegment(uint64_t VMAddr, uint64_t FileOffset,
269 uint64_t FileSize, unsigned NumSections,
270 MCAsmLayout &Layout, MachObjectWriter &Writer) {
271 Writer.writeSegmentLoadCommand("__DWARF", NumSections, VMAddr,
272 alignTo(FileSize, 0x1000), FileOffset,
273 FileSize, /* MaxProt */ 7,
276 for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
277 MCSection *Sec = Layout.getSectionOrder()[i];
278 if (Sec->begin() == Sec->end() || !Layout.getSectionFileSize(Sec))
281 unsigned Align = Sec->getAlignment();
283 VMAddr = alignTo(VMAddr, Align);
284 FileOffset = alignTo(FileOffset, Align);
286 Writer.writeSection(Layout, *Sec, VMAddr, FileOffset, 0, 0, 0);
288 FileOffset += Layout.getSectionAddressSize(Sec);
289 VMAddr += Layout.getSectionAddressSize(Sec);
293 static bool isExecutable(const object::MachOObjectFile &Obj) {
295 return Obj.getHeader64().filetype != MachO::MH_OBJECT;
297 return Obj.getHeader().filetype != MachO::MH_OBJECT;
300 static bool hasLinkEditSegment(const object::MachOObjectFile &Obj) {
301 bool HasLinkEditSegment = false;
302 iterateOnSegments(Obj, [&](const MachO::segment_command_64 &Segment) {
303 if (StringRef("__LINKEDIT") == Segment.segname)
304 HasLinkEditSegment = true;
306 return HasLinkEditSegment;
309 static unsigned segmentLoadCommandSize(bool Is64Bit, unsigned NumSections) {
311 return sizeof(MachO::segment_command_64) +
312 NumSections * sizeof(MachO::section_64);
314 return sizeof(MachO::segment_command) + NumSections * sizeof(MachO::section);
317 // Stream a dSYM companion binary file corresponding to the binary referenced
318 // by \a DM to \a OutFile. The passed \a MS MCStreamer is setup to write to
319 // \a OutFile and it must be using a MachObjectWriter object to do so.
320 bool generateDsymCompanion(const DebugMap &DM, MCStreamer &MS,
321 raw_fd_ostream &OutFile) {
322 auto &ObjectStreamer = static_cast<MCObjectStreamer &>(MS);
323 MCAssembler &MCAsm = ObjectStreamer.getAssembler();
324 auto &Writer = static_cast<MachObjectWriter &>(MCAsm.getWriter());
325 MCAsmLayout Layout(MCAsm);
327 MCAsm.layout(Layout);
329 BinaryHolder InputBinaryHolder(false);
330 auto ErrOrObjs = InputBinaryHolder.GetObjectFiles(DM.getBinaryPath());
331 if (auto Error = ErrOrObjs.getError())
332 return error(Twine("opening ") + DM.getBinaryPath() + ": " +
334 "output file streaming");
336 auto ErrOrInputBinary =
337 InputBinaryHolder.GetAs<object::MachOObjectFile>(DM.getTriple());
338 if (auto Error = ErrOrInputBinary.getError())
339 return error(Twine("opening ") + DM.getBinaryPath() + ": " +
341 "output file streaming");
342 auto &InputBinary = *ErrOrInputBinary;
344 bool Is64Bit = Writer.is64Bit();
345 MachO::symtab_command SymtabCmd = InputBinary.getSymtabLoadCommand();
348 MachO::uuid_command UUIDCmd;
349 memset(&UUIDCmd, 0, sizeof(UUIDCmd));
350 UUIDCmd.cmd = MachO::LC_UUID;
351 UUIDCmd.cmdsize = sizeof(MachO::uuid_command);
352 for (auto &LCI : InputBinary.load_commands()) {
353 if (LCI.C.cmd == MachO::LC_UUID) {
354 UUIDCmd = InputBinary.getUuidCommand(LCI);
359 // Compute the number of load commands we will need.
360 unsigned LoadCommandSize = 0;
361 unsigned NumLoadCommands = 0;
362 // We will copy the UUID if there is one.
363 if (UUIDCmd.cmd != 0) {
365 LoadCommandSize += sizeof(MachO::uuid_command);
368 // If we have a valid symtab to copy, do it.
369 bool ShouldEmitSymtab =
370 isExecutable(InputBinary) && hasLinkEditSegment(InputBinary);
371 if (ShouldEmitSymtab) {
372 LoadCommandSize += sizeof(MachO::symtab_command);
376 unsigned HeaderSize =
377 Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
378 // We will copy every segment that isn't __DWARF.
379 iterateOnSegments(InputBinary, [&](const MachO::segment_command_64 &Segment) {
380 if (StringRef("__DWARF") == Segment.segname)
384 LoadCommandSize += segmentLoadCommandSize(Is64Bit, Segment.nsects);
387 // We will add our own brand new __DWARF segment if we have debug
389 unsigned NumDwarfSections = 0;
390 uint64_t DwarfSegmentSize = 0;
392 for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
393 MCSection *Sec = Layout.getSectionOrder()[i];
394 if (Sec->begin() == Sec->end())
397 if (uint64_t Size = Layout.getSectionFileSize(Sec)) {
398 DwarfSegmentSize = alignTo(DwarfSegmentSize, Sec->getAlignment());
399 DwarfSegmentSize += Size;
404 if (NumDwarfSections) {
406 LoadCommandSize += segmentLoadCommandSize(Is64Bit, NumDwarfSections);
409 SmallString<0> NewSymtab;
410 NonRelocatableStringpool NewStrings;
411 unsigned NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
412 unsigned NumSyms = 0;
413 uint64_t NewStringsSize = 0;
414 if (ShouldEmitSymtab) {
415 NewSymtab.reserve(SymtabCmd.nsyms * NListSize / 2);
416 NumSyms = transferSymbols(InputBinary, NewSymtab, NewStrings);
417 NewStringsSize = NewStrings.getSize() + 1;
420 uint64_t SymtabStart = LoadCommandSize;
421 SymtabStart += HeaderSize;
422 SymtabStart = alignTo(SymtabStart, 0x1000);
424 // We gathered all the information we need, start emitting the output file.
425 Writer.writeHeader(MachO::MH_DSYM, NumLoadCommands, LoadCommandSize, false);
427 // Write the load commands.
428 assert(OutFile.tell() == HeaderSize);
429 if (UUIDCmd.cmd != 0) {
430 Writer.W.write<uint32_t>(UUIDCmd.cmd);
431 Writer.W.write<uint32_t>(UUIDCmd.cmdsize);
432 OutFile.write(reinterpret_cast<const char *>(UUIDCmd.uuid), 16);
433 assert(OutFile.tell() == HeaderSize + sizeof(UUIDCmd));
436 assert(SymtabCmd.cmd && "No symbol table.");
437 uint64_t StringStart = SymtabStart + NumSyms * NListSize;
438 if (ShouldEmitSymtab)
439 Writer.writeSymtabLoadCommand(SymtabStart, NumSyms, StringStart,
442 uint64_t DwarfSegmentStart = StringStart + NewStringsSize;
443 DwarfSegmentStart = alignTo(DwarfSegmentStart, 0x1000);
445 // Write the load commands for the segments and sections we 'import' from
446 // the original binary.
447 uint64_t EndAddress = 0;
448 uint64_t GapForDwarf = UINT64_MAX;
449 for (auto &LCI : InputBinary.load_commands()) {
450 if (LCI.C.cmd == MachO::LC_SEGMENT)
451 transferSegmentAndSections(LCI, InputBinary.getSegmentLoadCommand(LCI),
452 InputBinary, Writer, SymtabStart,
453 StringStart + NewStringsSize - SymtabStart,
454 DwarfSegmentSize, GapForDwarf, EndAddress);
455 else if (LCI.C.cmd == MachO::LC_SEGMENT_64)
456 transferSegmentAndSections(LCI, InputBinary.getSegment64LoadCommand(LCI),
457 InputBinary, Writer, SymtabStart,
458 StringStart + NewStringsSize - SymtabStart,
459 DwarfSegmentSize, GapForDwarf, EndAddress);
462 uint64_t DwarfVMAddr = alignTo(EndAddress, 0x1000);
463 uint64_t DwarfVMMax = Is64Bit ? UINT64_MAX : UINT32_MAX;
464 if (DwarfVMAddr + DwarfSegmentSize > DwarfVMMax ||
465 DwarfVMAddr + DwarfSegmentSize < DwarfVMAddr /* Overflow */) {
466 // There is no room for the __DWARF segment at the end of the
467 // address space. Look through segments to find a gap.
468 DwarfVMAddr = GapForDwarf;
469 if (DwarfVMAddr == UINT64_MAX)
470 warn("not enough VM space for the __DWARF segment.",
471 "output file streaming");
474 // Write the load command for the __DWARF segment.
475 createDwarfSegment(DwarfVMAddr, DwarfSegmentStart, DwarfSegmentSize,
476 NumDwarfSections, Layout, Writer);
478 assert(OutFile.tell() == LoadCommandSize + HeaderSize);
479 OutFile.write_zeros(SymtabStart - (LoadCommandSize + HeaderSize));
480 assert(OutFile.tell() == SymtabStart);
483 if (ShouldEmitSymtab) {
484 OutFile << NewSymtab.str();
485 assert(OutFile.tell() == StringStart);
487 // Transfer string table.
488 // FIXME: The NonRelocatableStringpool starts with an empty string, but
489 // dsymutil-classic starts the reconstructed string table with 2 of these.
490 // Reproduce that behavior for now (there is corresponding code in
493 std::vector<DwarfStringPoolEntryRef> Strings = NewStrings.getEntries();
494 for (auto EntryRef : Strings) {
495 if (EntryRef.getIndex() == -1U)
497 OutFile.write(EntryRef.getString().data(),
498 EntryRef.getString().size() + 1);
502 assert(OutFile.tell() == StringStart + NewStringsSize);
504 // Pad till the Dwarf segment start.
505 OutFile.write_zeros(DwarfSegmentStart - (StringStart + NewStringsSize));
506 assert(OutFile.tell() == DwarfSegmentStart);
508 // Emit the Dwarf sections contents.
509 for (const MCSection &Sec : MCAsm) {
510 if (Sec.begin() == Sec.end())
513 uint64_t Pos = OutFile.tell();
514 OutFile.write_zeros(alignTo(Pos, Sec.getAlignment()) - Pos);
515 MCAsm.writeSectionData(OutFile, &Sec, Layout);
520 } // namespace MachOUtils
521 } // namespace dsymutil