Page MenuHomePhabricator

0001-Move-lld-Parallel-algorithms-to-llvm.patch

Authored By
zjturner
May 14 2017, 1:33 AM
Size
89 KB
Referenced Files
None
Subscribers
None

0001-Move-lld-Parallel-algorithms-to-llvm.patch

From 8a80031afd75102808bc01098a6e5c1ec186300a Mon Sep 17 00:00:00 2001
From: Zachary Turner <zturner@google.com>
Date: Tue, 9 May 2017 19:02:16 -0700
Subject: [PATCH] Move lld Parallel algorithms to llvm.
---
lld/COFF/ICF.cpp | 2 +-
lld/COFF/MapFile.cpp | 4 +-
lld/COFF/Writer.cpp | 10 ++--
lld/ELF/Threads.h | 10 ++--
lld/include/lld/Core/TaskGroup.h | 65 ----------------------
lld/lib/Core/CMakeLists.txt | 1 -
lld/lib/ReaderWriter/MachO/LayoutPass.cpp | 4 +-
lld/unittests/CMakeLists.txt | 1 -
lld/unittests/CoreTests/CMakeLists.txt | 7 ---
.../Core => llvm/include/llvm/Support}/Parallel.h | 60 +++++++++++++++++---
llvm/lib/Support/CMakeLists.txt | 1 +
.../TaskGroup.cpp => llvm/lib/Support/Parallel.cpp | 19 +++----
llvm/unittests/Support/CMakeLists.txt | 1 +
.../unittests/Support}/ParallelTest.cpp | 10 ++--
14 files changed, 81 insertions(+), 114 deletions(-)
delete mode 100644 lld/include/lld/Core/TaskGroup.h
delete mode 100644 lld/unittests/CoreTests/CMakeLists.txt
rename {lld/include/lld/Core => llvm/include/llvm/Support}/Parallel.h (85%)
rename lld/lib/Core/TaskGroup.cpp => llvm/lib/Support/Parallel.cpp (89%)
rename {lld/unittests/CoreTests => llvm/unittests/Support}/ParallelTest.cpp (82%)
diff --git a/lld/COFF/ICF.cpp b/lld/COFF/ICF.cpp
index e3a7d27..3b7cc42 100644
--- a/lld/COFF/ICF.cpp
+++ b/lld/COFF/ICF.cpp
@@ -1,261 +1,261 @@
//===- ICF.cpp ------------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// ICF is short for Identical Code Folding. That is a size optimization to
// identify and merge two or more read-only sections (typically functions)
// that happened to have the same contents. It usually reduces output size
// by a few percent.
//
// On Windows, ICF is enabled by default.
//
// See ELF/ICF.cpp for the details about the algortihm.
//
//===----------------------------------------------------------------------===//
#include "Chunks.h"
#include "Error.h"
#include "Symbols.h"
-#include "lld/Core/Parallel.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Parallel.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <atomic>
#include <vector>
using namespace llvm;
namespace lld {
namespace coff {
class ICF {
public:
void run(const std::vector<Chunk *> &V);
private:
void segregate(size_t Begin, size_t End, bool Constant);
bool equalsConstant(const SectionChunk *A, const SectionChunk *B);
bool equalsVariable(const SectionChunk *A, const SectionChunk *B);
uint32_t getHash(SectionChunk *C);
bool isEligible(SectionChunk *C);
size_t findBoundary(size_t Begin, size_t End);
void forEachClassRange(size_t Begin, size_t End,
std::function<void(size_t, size_t)> Fn);
void forEachClass(std::function<void(size_t, size_t)> Fn);
std::vector<SectionChunk *> Chunks;
int Cnt = 0;
std::atomic<uint32_t> NextId = {1};
std::atomic<bool> Repeat = {false};
};
// Returns a hash value for S.
uint32_t ICF::getHash(SectionChunk *C) {
return hash_combine(C->getPermissions(),
hash_value(C->SectionName),
C->NumRelocs,
C->getAlign(),
uint32_t(C->Header->SizeOfRawData),
C->Checksum);
}
// Returns true if section S is subject of ICF.
//
// Microsoft's documentation
// (https://msdn.microsoft.com/en-us/library/bxwfs976.aspx; visited April
// 2017) says that /opt:icf folds both functions and read-only data.
// Despite that, the MSVC linker folds only functions. We found
// a few instances of programs that are not safe for data merging.
// Therefore, we merge only functions just like the MSVC tool.
bool ICF::isEligible(SectionChunk *C) {
bool Global = C->Sym && C->Sym->isExternal();
bool Executable = C->getPermissions() & llvm::COFF::IMAGE_SCN_MEM_EXECUTE;
bool Writable = C->getPermissions() & llvm::COFF::IMAGE_SCN_MEM_WRITE;
return C->isCOMDAT() && C->isLive() && Global && Executable && !Writable;
}
// Split an equivalence class into smaller classes.
void ICF::segregate(size_t Begin, size_t End, bool Constant) {
while (Begin < End) {
// Divide [Begin, End) into two. Let Mid be the start index of the
// second group.
auto Bound = std::stable_partition(
Chunks.begin() + Begin + 1, Chunks.begin() + End, [&](SectionChunk *S) {
if (Constant)
return equalsConstant(Chunks[Begin], S);
return equalsVariable(Chunks[Begin], S);
});
size_t Mid = Bound - Chunks.begin();
// Split [Begin, End) into [Begin, Mid) and [Mid, End).
uint32_t Id = NextId++;
for (size_t I = Begin; I < Mid; ++I)
Chunks[I]->Class[(Cnt + 1) % 2] = Id;
// If we created a group, we need to iterate the main loop again.
if (Mid != End)
Repeat = true;
Begin = Mid;
}
}
// Compare "non-moving" part of two sections, namely everything
// except relocation targets.
bool ICF::equalsConstant(const SectionChunk *A, const SectionChunk *B) {
if (A->NumRelocs != B->NumRelocs)
return false;
// Compare relocations.
auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) {
if (R1.Type != R2.Type ||
R1.VirtualAddress != R2.VirtualAddress) {
return false;
}
SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex);
SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex);
if (B1 == B2)
return true;
if (auto *D1 = dyn_cast<DefinedRegular>(B1))
if (auto *D2 = dyn_cast<DefinedRegular>(B2))
return D1->getValue() == D2->getValue() &&
D1->getChunk()->Class[Cnt % 2] == D2->getChunk()->Class[Cnt % 2];
return false;
};
if (!std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), Eq))
return false;
// Compare section attributes and contents.
return A->getPermissions() == B->getPermissions() &&
A->SectionName == B->SectionName &&
A->getAlign() == B->getAlign() &&
A->Header->SizeOfRawData == B->Header->SizeOfRawData &&
A->Checksum == B->Checksum &&
A->getContents() == B->getContents();
}
// Compare "moving" part of two sections, namely relocation targets.
bool ICF::equalsVariable(const SectionChunk *A, const SectionChunk *B) {
// Compare relocations.
auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) {
SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex);
SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex);
if (B1 == B2)
return true;
if (auto *D1 = dyn_cast<DefinedRegular>(B1))
if (auto *D2 = dyn_cast<DefinedRegular>(B2))
return D1->getChunk()->Class[Cnt % 2] == D2->getChunk()->Class[Cnt % 2];
return false;
};
return std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), Eq);
}
size_t ICF::findBoundary(size_t Begin, size_t End) {
for (size_t I = Begin + 1; I < End; ++I)
if (Chunks[Begin]->Class[Cnt % 2] != Chunks[I]->Class[Cnt % 2])
return I;
return End;
}
void ICF::forEachClassRange(size_t Begin, size_t End,
std::function<void(size_t, size_t)> Fn) {
if (Begin > 0)
Begin = findBoundary(Begin - 1, End);
while (Begin < End) {
size_t Mid = findBoundary(Begin, Chunks.size());
Fn(Begin, Mid);
Begin = Mid;
}
}
// Call Fn on each class group.
void ICF::forEachClass(std::function<void(size_t, size_t)> Fn) {
// If the number of sections are too small to use threading,
// call Fn sequentially.
if (Chunks.size() < 1024) {
forEachClassRange(0, Chunks.size(), Fn);
return;
}
// Split sections into 256 shards and call Fn in parallel.
size_t NumShards = 256;
size_t Step = Chunks.size() / NumShards;
for_each_n(parallel::par, size_t(0), NumShards, [&](size_t I) {
forEachClassRange(I * Step, (I + 1) * Step, Fn);
});
forEachClassRange(Step * NumShards, Chunks.size(), Fn);
}
// Merge identical COMDAT sections.
// Two sections are considered the same if their section headers,
// contents and relocations are all the same.
void ICF::run(const std::vector<Chunk *> &Vec) {
// Collect only mergeable sections and group by hash value.
for (Chunk *C : Vec) {
auto *SC = dyn_cast<SectionChunk>(C);
if (!SC)
continue;
if (isEligible(SC)) {
// Set MSB to 1 to avoid collisions with non-hash classs.
SC->Class[0] = getHash(SC) | (1 << 31);
Chunks.push_back(SC);
} else {
SC->Class[0] = NextId++;
}
}
if (Chunks.empty())
return;
// From now on, sections in Chunks are ordered so that sections in
// the same group are consecutive in the vector.
std::stable_sort(Chunks.begin(), Chunks.end(),
[](SectionChunk *A, SectionChunk *B) {
return A->Class[0] < B->Class[0];
});
// Compare static contents and assign unique IDs for each static content.
forEachClass([&](size_t Begin, size_t End) { segregate(Begin, End, true); });
++Cnt;
// Split groups by comparing relocations until convergence is obtained.
do {
Repeat = false;
forEachClass(
[&](size_t Begin, size_t End) { segregate(Begin, End, false); });
++Cnt;
} while (Repeat);
log("ICF needed " + Twine(Cnt) + " iterations");
// Merge sections in the same classs.
forEachClass([&](size_t Begin, size_t End) {
if (End - Begin == 1)
return;
log("Selected " + Chunks[Begin]->getDebugName());
for (size_t I = Begin + 1; I < End; ++I) {
log(" Removed " + Chunks[I]->getDebugName());
Chunks[Begin]->replace(Chunks[I]);
}
});
}
// Entry point to ICF.
void doICF(const std::vector<Chunk *> &Chunks) { ICF().run(Chunks); }
} // namespace coff
} // namespace lld
diff --git a/lld/COFF/MapFile.cpp b/lld/COFF/MapFile.cpp
index 7df88f3..f31639d 100644
--- a/lld/COFF/MapFile.cpp
+++ b/lld/COFF/MapFile.cpp
@@ -1,125 +1,125 @@
//===- MapFile.cpp --------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the /lldmap option. It shows lists in order and
// hierarchically the output sections, input sections, input files and
// symbol:
//
// Address Size Align Out File Symbol
// 00201000 00000015 4 .text
// 00201000 0000000e 4 test.o:(.text)
// 0020100e 00000000 0 local
// 00201005 00000000 0 f(int)
//
//===----------------------------------------------------------------------===//
#include "MapFile.h"
#include "Error.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "Writer.h"
-#include "lld/Core/Parallel.h"
+#include "llvm/Support/Parallel.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace llvm::object;
using namespace lld;
using namespace lld::coff;
typedef DenseMap<const SectionChunk *, SmallVector<DefinedRegular *, 4>>
SymbolMapTy;
// Print out the first three columns of a line.
static void writeHeader(raw_ostream &OS, uint64_t Addr, uint64_t Size,
uint64_t Align) {
OS << format("%08llx %08llx %5lld ", Addr, Size, Align);
}
static std::string indent(int Depth) { return std::string(Depth * 8, ' '); }
// Returns a list of all symbols that we want to print out.
static std::vector<DefinedRegular *> getSymbols() {
std::vector<DefinedRegular *> V;
for (coff::ObjectFile *File : Symtab->ObjectFiles)
for (SymbolBody *B : File->getSymbols())
if (auto *Sym = dyn_cast<DefinedRegular>(B))
if (Sym && !Sym->getCOFFSymbol().isSectionDefinition())
V.push_back(Sym);
return V;
}
// Returns a map from sections to their symbols.
static SymbolMapTy getSectionSyms(ArrayRef<DefinedRegular *> Syms) {
SymbolMapTy Ret;
for (DefinedRegular *S : Syms)
Ret[S->getChunk()].push_back(S);
// Sort symbols by address.
for (auto &It : Ret) {
SmallVectorImpl<DefinedRegular *> &V = It.second;
std::sort(V.begin(), V.end(), [](DefinedRegular *A, DefinedRegular *B) {
return A->getRVA() < B->getRVA();
});
}
return Ret;
}
// Construct a map from symbols to their stringified representations.
static DenseMap<DefinedRegular *, std::string>
getSymbolStrings(ArrayRef<DefinedRegular *> Syms) {
std::vector<std::string> Str(Syms.size());
- for_each_n(parallel::par, (size_t)0, Syms.size(), [&](size_t I) {
+ for_each_n(llvm::parallel::par, (size_t)0, Syms.size(), [&](size_t I) {
raw_string_ostream OS(Str[I]);
writeHeader(OS, Syms[I]->getRVA(), 0, 0);
OS << indent(2) << toString(*Syms[I]);
});
DenseMap<DefinedRegular *, std::string> Ret;
for (size_t I = 0, E = Syms.size(); I < E; ++I)
Ret[Syms[I]] = std::move(Str[I]);
return Ret;
}
void coff::writeMapFile(ArrayRef<OutputSection *> OutputSections) {
if (Config->MapFile.empty())
return;
std::error_code EC;
raw_fd_ostream OS(Config->MapFile, EC, sys::fs::F_None);
if (EC)
fatal("cannot open " + Config->MapFile + ": " + EC.message());
// Collect symbol info that we want to print out.
std::vector<DefinedRegular *> Syms = getSymbols();
SymbolMapTy SectionSyms = getSectionSyms(Syms);
DenseMap<DefinedRegular *, std::string> SymStr = getSymbolStrings(Syms);
// Print out the header line.
OS << "Address Size Align Out In Symbol\n";
// Print out file contents.
for (OutputSection *Sec : OutputSections) {
writeHeader(OS, Sec->getRVA(), Sec->getVirtualSize(), /*Align=*/PageSize);
OS << Sec->getName() << '\n';
for (Chunk *C : Sec->getChunks()) {
auto *SC = dyn_cast<SectionChunk>(C);
if (!SC)
continue;
writeHeader(OS, SC->getRVA(), SC->getSize(), SC->getAlign());
OS << indent(1) << SC->File->getName() << ":(" << SC->getSectionName()
<< ")\n";
for (DefinedRegular *Sym : SectionSyms[SC])
OS << SymStr[Sym] << '\n';
}
}
}
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index d61d871..b87f7ff 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -1,873 +1,873 @@
//===- Writer.cpp ---------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "Writer.h"
#include "Config.h"
#include "DLL.h"
#include "Error.h"
#include "InputFiles.h"
#include "MapFile.h"
#include "Memory.h"
#include "PDB.h"
#include "SymbolTable.h"
#include "Symbols.h"
-#include "lld/Core/Parallel.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/FileOutputBuffer.h"
+#include "llvm/Support/Parallel.h"
#include "llvm/Support/RandomNumberGenerator.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cstdio>
#include <map>
#include <memory>
#include <utility>
using namespace llvm;
using namespace llvm::COFF;
using namespace llvm::object;
using namespace llvm::support;
using namespace llvm::support::endian;
using namespace lld;
using namespace lld::coff;
static const int SectorSize = 512;
static const int DOSStubSize = 64;
static const int NumberfOfDataDirectory = 16;
namespace {
class DebugDirectoryChunk : public Chunk {
public:
DebugDirectoryChunk(const std::vector<std::unique_ptr<Chunk>> &R)
: Records(R) {}
size_t getSize() const override {
return Records.size() * sizeof(debug_directory);
}
void writeTo(uint8_t *B) const override {
auto *D = reinterpret_cast<debug_directory *>(B + OutputSectionOff);
for (const std::unique_ptr<Chunk> &Record : Records) {
D->Characteristics = 0;
D->TimeDateStamp = 0;
D->MajorVersion = 0;
D->MinorVersion = 0;
D->Type = COFF::IMAGE_DEBUG_TYPE_CODEVIEW;
D->SizeOfData = Record->getSize();
D->AddressOfRawData = Record->getRVA();
// TODO(compnerd) get the file offset
D->PointerToRawData = 0;
++D;
}
}
private:
const std::vector<std::unique_ptr<Chunk>> &Records;
};
class CVDebugRecordChunk : public Chunk {
size_t getSize() const override {
return sizeof(codeview::DebugInfo) + Config->PDBPath.size() + 1;
}
void writeTo(uint8_t *B) const override {
// Save off the DebugInfo entry to backfill the file signature (build id)
// in Writer::writeBuildId
DI = reinterpret_cast<codeview::DebugInfo *>(B + OutputSectionOff);
DI->Signature.CVSignature = OMF::Signature::PDB70;
// variable sized field (PDB Path)
auto *P = reinterpret_cast<char *>(B + OutputSectionOff + sizeof(*DI));
if (!Config->PDBPath.empty())
memcpy(P, Config->PDBPath.data(), Config->PDBPath.size());
P[Config->PDBPath.size()] = '\0';
}
public:
mutable codeview::DebugInfo *DI = nullptr;
};
// The writer writes a SymbolTable result to a file.
class Writer {
public:
Writer(SymbolTable *T) : Symtab(T) {}
void run();
private:
void createSections();
void createMiscChunks();
void createImportTables();
void createExportTable();
void assignAddresses();
void removeEmptySections();
void createSymbolAndStringTable();
void openFile(StringRef OutputPath);
template <typename PEHeaderTy> void writeHeader();
void fixSafeSEHSymbols();
void setSectionPermissions();
void writeSections();
void sortExceptionTable();
void writeBuildId();
void applyRelocations();
llvm::Optional<coff_symbol16> createSymbol(Defined *D);
size_t addEntryToStringTable(StringRef Str);
OutputSection *findSection(StringRef Name);
OutputSection *createSection(StringRef Name);
void addBaserels(OutputSection *Dest);
void addBaserelBlocks(OutputSection *Dest, std::vector<Baserel> &V);
uint32_t getSizeOfInitializedData();
std::map<StringRef, std::vector<DefinedImportData *>> binImports();
SymbolTable *Symtab;
std::unique_ptr<FileOutputBuffer> Buffer;
std::vector<OutputSection *> OutputSections;
std::vector<char> Strtab;
std::vector<llvm::object::coff_symbol16> OutputSymtab;
IdataContents Idata;
DelayLoadContents DelayIdata;
EdataContents Edata;
std::unique_ptr<SEHTableChunk> SEHTable;
std::unique_ptr<Chunk> DebugDirectory;
std::vector<std::unique_ptr<Chunk>> DebugRecords;
CVDebugRecordChunk *BuildId = nullptr;
ArrayRef<uint8_t> SectionTable;
uint64_t FileSize;
uint32_t PointerToSymbolTable = 0;
uint64_t SizeOfImage;
uint64_t SizeOfHeaders;
std::vector<std::unique_ptr<Chunk>> Chunks;
};
} // anonymous namespace
namespace lld {
namespace coff {
void writeResult(SymbolTable *T) { Writer(T).run(); }
void OutputSection::setRVA(uint64_t RVA) {
Header.VirtualAddress = RVA;
for (Chunk *C : Chunks)
C->setRVA(C->getRVA() + RVA);
}
void OutputSection::setFileOffset(uint64_t Off) {
// If a section has no actual data (i.e. BSS section), we want to
// set 0 to its PointerToRawData. Otherwise the output is rejected
// by the loader.
if (Header.SizeOfRawData == 0)
return;
Header.PointerToRawData = Off;
}
void OutputSection::addChunk(Chunk *C) {
Chunks.push_back(C);
C->setOutputSection(this);
uint64_t Off = Header.VirtualSize;
Off = alignTo(Off, C->getAlign());
C->setRVA(Off);
C->setOutputSectionOff(Off);
Off += C->getSize();
Header.VirtualSize = Off;
if (C->hasData())
Header.SizeOfRawData = alignTo(Off, SectorSize);
}
void OutputSection::addPermissions(uint32_t C) {
Header.Characteristics |= C & PermMask;
}
void OutputSection::setPermissions(uint32_t C) {
Header.Characteristics = C & PermMask;
}
// Write the section header to a given buffer.
void OutputSection::writeHeaderTo(uint8_t *Buf) {
auto *Hdr = reinterpret_cast<coff_section *>(Buf);
*Hdr = Header;
if (StringTableOff) {
// If name is too long, write offset into the string table as a name.
sprintf(Hdr->Name, "/%d", StringTableOff);
} else {
assert(!Config->Debug || Name.size() <= COFF::NameSize);
strncpy(Hdr->Name, Name.data(),
std::min(Name.size(), (size_t)COFF::NameSize));
}
}
uint64_t Defined::getSecrel() {
if (auto *D = dyn_cast<DefinedRegular>(this))
return getRVA() - D->getChunk()->getOutputSection()->getRVA();
fatal("SECREL relocation points to a non-regular symbol");
}
uint64_t Defined::getSectionIndex() {
if (auto *D = dyn_cast<DefinedRegular>(this))
return D->getChunk()->getOutputSection()->SectionIndex;
fatal("SECTION relocation points to a non-regular symbol");
}
bool Defined::isExecutable() {
const auto X = IMAGE_SCN_MEM_EXECUTE;
if (auto *D = dyn_cast<DefinedRegular>(this))
return D->getChunk()->getOutputSection()->getPermissions() & X;
return isa<DefinedImportThunk>(this);
}
} // namespace coff
} // namespace lld
// The main function of the writer.
void Writer::run() {
createSections();
createMiscChunks();
createImportTables();
createExportTable();
if (Config->Relocatable)
createSection(".reloc");
assignAddresses();
removeEmptySections();
setSectionPermissions();
createSymbolAndStringTable();
openFile(Config->OutputFile);
if (Config->is64()) {
writeHeader<pe32plus_header>();
} else {
writeHeader<pe32_header>();
}
fixSafeSEHSymbols();
writeSections();
sortExceptionTable();
writeBuildId();
if (!Config->PDBPath.empty()) {
const llvm::codeview::DebugInfo *DI = nullptr;
if (Config->DebugTypes & static_cast<unsigned>(coff::DebugType::CV))
DI = BuildId->DI;
createPDB(Config->PDBPath, Symtab, SectionTable, DI);
}
writeMapFile(OutputSections);
if (auto EC = Buffer->commit())
fatal(EC, "failed to write the output file");
}
static StringRef getOutputSection(StringRef Name) {
StringRef S = Name.split('$').first;
auto It = Config->Merge.find(S);
if (It == Config->Merge.end())
return S;
return It->second;
}
// Create output section objects and add them to OutputSections.
void Writer::createSections() {
// First, bin chunks by name.
std::map<StringRef, std::vector<Chunk *>> Map;
for (Chunk *C : Symtab->getChunks()) {
auto *SC = dyn_cast<SectionChunk>(C);
if (SC && !SC->isLive()) {
if (Config->Verbose)
SC->printDiscardedMessage();
continue;
}
Map[C->getSectionName()].push_back(C);
}
// Then create an OutputSection for each section.
// '$' and all following characters in input section names are
// discarded when determining output section. So, .text$foo
// contributes to .text, for example. See PE/COFF spec 3.2.
SmallDenseMap<StringRef, OutputSection *> Sections;
for (auto Pair : Map) {
StringRef Name = getOutputSection(Pair.first);
OutputSection *&Sec = Sections[Name];
if (!Sec) {
Sec = make<OutputSection>(Name);
OutputSections.push_back(Sec);
}
std::vector<Chunk *> &Chunks = Pair.second;
for (Chunk *C : Chunks) {
Sec->addChunk(C);
Sec->addPermissions(C->getPermissions());
}
}
}
void Writer::createMiscChunks() {
OutputSection *RData = createSection(".rdata");
// Create thunks for locally-dllimported symbols.
if (!Symtab->LocalImportChunks.empty()) {
for (Chunk *C : Symtab->LocalImportChunks)
RData->addChunk(C);
}
// Create Debug Information Chunks
if (Config->Debug) {
DebugDirectory = llvm::make_unique<DebugDirectoryChunk>(DebugRecords);
// TODO(compnerd) create a coffgrp entry if DebugType::CV is not enabled
if (Config->DebugTypes & static_cast<unsigned>(coff::DebugType::CV)) {
auto Chunk = llvm::make_unique<CVDebugRecordChunk>();
BuildId = Chunk.get();
DebugRecords.push_back(std::move(Chunk));
}
RData->addChunk(DebugDirectory.get());
for (const std::unique_ptr<Chunk> &C : DebugRecords)
RData->addChunk(C.get());
}
// Create SEH table. x86-only.
if (Config->Machine != I386)
return;
std::set<Defined *> Handlers;
for (lld::coff::ObjectFile *File : Symtab->ObjectFiles) {
if (!File->SEHCompat)
return;
for (SymbolBody *B : File->SEHandlers)
Handlers.insert(cast<Defined>(B));
}
SEHTable.reset(new SEHTableChunk(Handlers));
RData->addChunk(SEHTable.get());
}
// Create .idata section for the DLL-imported symbol table.
// The format of this section is inherently Windows-specific.
// IdataContents class abstracted away the details for us,
// so we just let it create chunks and add them to the section.
void Writer::createImportTables() {
if (Symtab->ImportFiles.empty())
return;
// Initialize DLLOrder so that import entries are ordered in
// the same order as in the command line. (That affects DLL
// initialization order, and this ordering is MSVC-compatible.)
for (ImportFile *File : Symtab->ImportFiles) {
std::string DLL = StringRef(File->DLLName).lower();
if (Config->DLLOrder.count(DLL) == 0)
Config->DLLOrder[DLL] = Config->DLLOrder.size();
}
OutputSection *Text = createSection(".text");
for (ImportFile *File : Symtab->ImportFiles) {
if (DefinedImportThunk *Thunk = File->ThunkSym)
Text->addChunk(Thunk->getChunk());
if (Config->DelayLoads.count(StringRef(File->DLLName).lower())) {
DelayIdata.add(File->ImpSym);
} else {
Idata.add(File->ImpSym);
}
}
if (!Idata.empty()) {
OutputSection *Sec = createSection(".idata");
for (Chunk *C : Idata.getChunks())
Sec->addChunk(C);
}
if (!DelayIdata.empty()) {
Defined *Helper = cast<Defined>(Config->DelayLoadHelper);
DelayIdata.create(Helper);
OutputSection *Sec = createSection(".didat");
for (Chunk *C : DelayIdata.getChunks())
Sec->addChunk(C);
Sec = createSection(".data");
for (Chunk *C : DelayIdata.getDataChunks())
Sec->addChunk(C);
Sec = createSection(".text");
for (std::unique_ptr<Chunk> &C : DelayIdata.getCodeChunks())
Sec->addChunk(C.get());
}
}
void Writer::createExportTable() {
if (Config->Exports.empty())
return;
OutputSection *Sec = createSection(".edata");
for (std::unique_ptr<Chunk> &C : Edata.Chunks)
Sec->addChunk(C.get());
}
// The Windows loader doesn't seem to like empty sections,
// so we remove them if any.
void Writer::removeEmptySections() {
auto IsEmpty = [](OutputSection *S) { return S->getVirtualSize() == 0; };
OutputSections.erase(
std::remove_if(OutputSections.begin(), OutputSections.end(), IsEmpty),
OutputSections.end());
uint32_t Idx = 1;
for (OutputSection *Sec : OutputSections)
Sec->SectionIndex = Idx++;
}
size_t Writer::addEntryToStringTable(StringRef Str) {
assert(Str.size() > COFF::NameSize);
size_t OffsetOfEntry = Strtab.size() + 4; // +4 for the size field
Strtab.insert(Strtab.end(), Str.begin(), Str.end());
Strtab.push_back('\0');
return OffsetOfEntry;
}
Optional<coff_symbol16> Writer::createSymbol(Defined *Def) {
// Relative symbols are unrepresentable in a COFF symbol table.
if (isa<DefinedRelative>(Def))
return None;
if (auto *D = dyn_cast<DefinedRegular>(Def))
if (!D->getChunk()->isLive())
return None;
coff_symbol16 Sym;
StringRef Name = Def->getName();
if (Name.size() > COFF::NameSize) {
Sym.Name.Offset.Zeroes = 0;
Sym.Name.Offset.Offset = addEntryToStringTable(Name);
} else {
memset(Sym.Name.ShortName, 0, COFF::NameSize);
memcpy(Sym.Name.ShortName, Name.data(), Name.size());
}
if (auto *D = dyn_cast<DefinedCOFF>(Def)) {
COFFSymbolRef Ref = D->getCOFFSymbol();
Sym.Type = Ref.getType();
Sym.StorageClass = Ref.getStorageClass();
} else {
Sym.Type = IMAGE_SYM_TYPE_NULL;
Sym.StorageClass = IMAGE_SYM_CLASS_EXTERNAL;
}
Sym.NumberOfAuxSymbols = 0;
switch (Def->kind()) {
case SymbolBody::DefinedAbsoluteKind:
Sym.Value = Def->getRVA();
Sym.SectionNumber = IMAGE_SYM_ABSOLUTE;
break;
default: {
uint64_t RVA = Def->getRVA();
OutputSection *Sec = nullptr;
for (OutputSection *S : OutputSections) {
if (S->getRVA() > RVA)
break;
Sec = S;
}
Sym.Value = RVA - Sec->getRVA();
Sym.SectionNumber = Sec->SectionIndex;
break;
}
}
return Sym;
}
void Writer::createSymbolAndStringTable() {
if (!Config->Debug || !Config->WriteSymtab)
return;
// Name field in the section table is 8 byte long. Longer names need
// to be written to the string table. First, construct string table.
for (OutputSection *Sec : OutputSections) {
StringRef Name = Sec->getName();
if (Name.size() <= COFF::NameSize)
continue;
Sec->setStringTableOff(addEntryToStringTable(Name));
}
for (lld::coff::ObjectFile *File : Symtab->ObjectFiles)
for (SymbolBody *B : File->getSymbols())
if (auto *D = dyn_cast<Defined>(B))
if (!D->WrittenToSymtab) {
D->WrittenToSymtab = true;
if (Optional<coff_symbol16> Sym = createSymbol(D))
OutputSymtab.push_back(*Sym);
}
OutputSection *LastSection = OutputSections.back();
// We position the symbol table to be adjacent to the end of the last section.
uint64_t FileOff = LastSection->getFileOff() +
alignTo(LastSection->getRawSize(), SectorSize);
if (!OutputSymtab.empty()) {
PointerToSymbolTable = FileOff;
FileOff += OutputSymtab.size() * sizeof(coff_symbol16);
}
if (!Strtab.empty())
FileOff += Strtab.size() + 4;
FileSize = alignTo(FileOff, SectorSize);
}
// Visits all sections to assign incremental, non-overlapping RVAs and
// file offsets.
void Writer::assignAddresses() {
SizeOfHeaders = DOSStubSize + sizeof(PEMagic) + sizeof(coff_file_header) +
sizeof(data_directory) * NumberfOfDataDirectory +
sizeof(coff_section) * OutputSections.size();
SizeOfHeaders +=
Config->is64() ? sizeof(pe32plus_header) : sizeof(pe32_header);
SizeOfHeaders = alignTo(SizeOfHeaders, SectorSize);
uint64_t RVA = 0x1000; // The first page is kept unmapped.
FileSize = SizeOfHeaders;
// Move DISCARDABLE (or non-memory-mapped) sections to the end of file because
// the loader cannot handle holes.
std::stable_partition(
OutputSections.begin(), OutputSections.end(), [](OutputSection *S) {
return (S->getPermissions() & IMAGE_SCN_MEM_DISCARDABLE) == 0;
});
for (OutputSection *Sec : OutputSections) {
if (Sec->getName() == ".reloc")
addBaserels(Sec);
Sec->setRVA(RVA);
Sec->setFileOffset(FileSize);
RVA += alignTo(Sec->getVirtualSize(), PageSize);
FileSize += alignTo(Sec->getRawSize(), SectorSize);
}
SizeOfImage = SizeOfHeaders + alignTo(RVA - 0x1000, PageSize);
}
template <typename PEHeaderTy> void Writer::writeHeader() {
// Write DOS stub
uint8_t *Buf = Buffer->getBufferStart();
auto *DOS = reinterpret_cast<dos_header *>(Buf);
Buf += DOSStubSize;
DOS->Magic[0] = 'M';
DOS->Magic[1] = 'Z';
DOS->AddressOfRelocationTable = sizeof(dos_header);
DOS->AddressOfNewExeHeader = DOSStubSize;
// Write PE magic
memcpy(Buf, PEMagic, sizeof(PEMagic));
Buf += sizeof(PEMagic);
// Write COFF header
auto *COFF = reinterpret_cast<coff_file_header *>(Buf);
Buf += sizeof(*COFF);
COFF->Machine = Config->Machine;
COFF->NumberOfSections = OutputSections.size();
COFF->Characteristics = IMAGE_FILE_EXECUTABLE_IMAGE;
if (Config->LargeAddressAware)
COFF->Characteristics |= IMAGE_FILE_LARGE_ADDRESS_AWARE;
if (!Config->is64())
COFF->Characteristics |= IMAGE_FILE_32BIT_MACHINE;
if (Config->DLL)
COFF->Characteristics |= IMAGE_FILE_DLL;
if (!Config->Relocatable)
COFF->Characteristics |= IMAGE_FILE_RELOCS_STRIPPED;
COFF->SizeOfOptionalHeader =
sizeof(PEHeaderTy) + sizeof(data_directory) * NumberfOfDataDirectory;
// Write PE header
auto *PE = reinterpret_cast<PEHeaderTy *>(Buf);
Buf += sizeof(*PE);
PE->Magic = Config->is64() ? PE32Header::PE32_PLUS : PE32Header::PE32;
PE->ImageBase = Config->ImageBase;
PE->SectionAlignment = PageSize;
PE->FileAlignment = SectorSize;
PE->MajorImageVersion = Config->MajorImageVersion;
PE->MinorImageVersion = Config->MinorImageVersion;
PE->MajorOperatingSystemVersion = Config->MajorOSVersion;
PE->MinorOperatingSystemVersion = Config->MinorOSVersion;
PE->MajorSubsystemVersion = Config->MajorOSVersion;
PE->MinorSubsystemVersion = Config->MinorOSVersion;
PE->Subsystem = Config->Subsystem;
PE->SizeOfImage = SizeOfImage;
PE->SizeOfHeaders = SizeOfHeaders;
if (!Config->NoEntry) {
Defined *Entry = cast<Defined>(Config->Entry);
PE->AddressOfEntryPoint = Entry->getRVA();
// Pointer to thumb code must have the LSB set, so adjust it.
if (Config->Machine == ARMNT)
PE->AddressOfEntryPoint |= 1;
}
PE->SizeOfStackReserve = Config->StackReserve;
PE->SizeOfStackCommit = Config->StackCommit;
PE->SizeOfHeapReserve = Config->HeapReserve;
PE->SizeOfHeapCommit = Config->HeapCommit;
if (Config->AppContainer)
PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_APPCONTAINER;
if (Config->DynamicBase)
PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE;
if (Config->HighEntropyVA)
PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA;
if (!Config->AllowBind)
PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_BIND;
if (Config->NxCompat)
PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NX_COMPAT;
if (!Config->AllowIsolation)
PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION;
if (Config->TerminalServerAware)
PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE;
PE->NumberOfRvaAndSize = NumberfOfDataDirectory;
if (OutputSection *Text = findSection(".text")) {
PE->BaseOfCode = Text->getRVA();
PE->SizeOfCode = Text->getRawSize();
}
PE->SizeOfInitializedData = getSizeOfInitializedData();
// Write data directory
auto *Dir = reinterpret_cast<data_directory *>(Buf);
Buf += sizeof(*Dir) * NumberfOfDataDirectory;
if (OutputSection *Sec = findSection(".edata")) {
Dir[EXPORT_TABLE].RelativeVirtualAddress = Sec->getRVA();
Dir[EXPORT_TABLE].Size = Sec->getVirtualSize();
}
if (!Idata.empty()) {
Dir[IMPORT_TABLE].RelativeVirtualAddress = Idata.getDirRVA();
Dir[IMPORT_TABLE].Size = Idata.getDirSize();
Dir[IAT].RelativeVirtualAddress = Idata.getIATRVA();
Dir[IAT].Size = Idata.getIATSize();
}
if (OutputSection *Sec = findSection(".rsrc")) {
Dir[RESOURCE_TABLE].RelativeVirtualAddress = Sec->getRVA();
Dir[RESOURCE_TABLE].Size = Sec->getVirtualSize();
}
if (OutputSection *Sec = findSection(".pdata")) {
Dir[EXCEPTION_TABLE].RelativeVirtualAddress = Sec->getRVA();
Dir[EXCEPTION_TABLE].Size = Sec->getVirtualSize();
}
if (OutputSection *Sec = findSection(".reloc")) {
Dir[BASE_RELOCATION_TABLE].RelativeVirtualAddress = Sec->getRVA();
Dir[BASE_RELOCATION_TABLE].Size = Sec->getVirtualSize();
}
if (Symbol *Sym = Symtab->findUnderscore("_tls_used")) {
if (Defined *B = dyn_cast<Defined>(Sym->body())) {
Dir[TLS_TABLE].RelativeVirtualAddress = B->getRVA();
Dir[TLS_TABLE].Size = Config->is64()
? sizeof(object::coff_tls_directory64)
: sizeof(object::coff_tls_directory32);
}
}
if (Config->Debug) {
Dir[DEBUG_DIRECTORY].RelativeVirtualAddress = DebugDirectory->getRVA();
Dir[DEBUG_DIRECTORY].Size = DebugDirectory->getSize();
}
if (Symbol *Sym = Symtab->findUnderscore("_load_config_used")) {
if (auto *B = dyn_cast<DefinedRegular>(Sym->body())) {
SectionChunk *SC = B->getChunk();
assert(B->getRVA() >= SC->getRVA());
uint64_t OffsetInChunk = B->getRVA() - SC->getRVA();
if (!SC->hasData() || OffsetInChunk + 4 > SC->getSize())
fatal("_load_config_used is malformed");
ArrayRef<uint8_t> SecContents = SC->getContents();
uint32_t LoadConfigSize =
*reinterpret_cast<const ulittle32_t *>(&SecContents[OffsetInChunk]);
if (OffsetInChunk + LoadConfigSize > SC->getSize())
fatal("_load_config_used is too large");
Dir[LOAD_CONFIG_TABLE].RelativeVirtualAddress = B->getRVA();
Dir[LOAD_CONFIG_TABLE].Size = LoadConfigSize;
}
}
if (!DelayIdata.empty()) {
Dir[DELAY_IMPORT_DESCRIPTOR].RelativeVirtualAddress =
DelayIdata.getDirRVA();
Dir[DELAY_IMPORT_DESCRIPTOR].Size = DelayIdata.getDirSize();
}
// Write section table
for (OutputSection *Sec : OutputSections) {
Sec->writeHeaderTo(Buf);
Buf += sizeof(coff_section);
}
SectionTable = ArrayRef<uint8_t>(
Buf - OutputSections.size() * sizeof(coff_section), Buf);
if (OutputSymtab.empty())
return;
COFF->PointerToSymbolTable = PointerToSymbolTable;
uint32_t NumberOfSymbols = OutputSymtab.size();
COFF->NumberOfSymbols = NumberOfSymbols;
auto *SymbolTable = reinterpret_cast<coff_symbol16 *>(
Buffer->getBufferStart() + COFF->PointerToSymbolTable);
for (size_t I = 0; I != NumberOfSymbols; ++I)
SymbolTable[I] = OutputSymtab[I];
// Create the string table, it follows immediately after the symbol table.
// The first 4 bytes is length including itself.
Buf = reinterpret_cast<uint8_t *>(&SymbolTable[NumberOfSymbols]);
write32le(Buf, Strtab.size() + 4);
if (!Strtab.empty())
memcpy(Buf + 4, Strtab.data(), Strtab.size());
}
void Writer::openFile(StringRef Path) {
Buffer = check(
FileOutputBuffer::create(Path, FileSize, FileOutputBuffer::F_executable),
"failed to open " + Path);
}
void Writer::fixSafeSEHSymbols() {
if (!SEHTable)
return;
if (auto *T = dyn_cast<DefinedRelative>(Config->SEHTable->body()))
T->setRVA(SEHTable->getRVA());
if (auto *C = dyn_cast<DefinedAbsolute>(Config->SEHCount->body()))
C->setVA(SEHTable->getSize() / 4);
}
// Handles /section options to allow users to overwrite
// section attributes.
void Writer::setSectionPermissions() {
for (auto &P : Config->Section) {
StringRef Name = P.first;
uint32_t Perm = P.second;
if (auto *Sec = findSection(Name))
Sec->setPermissions(Perm);
}
}
// Write section contents to a mmap'ed file.
void Writer::writeSections() {
uint8_t *Buf = Buffer->getBufferStart();
for (OutputSection *Sec : OutputSections) {
uint8_t *SecBuf = Buf + Sec->getFileOff();
// Fill gaps between functions in .text with INT3 instructions
// instead of leaving as NUL bytes (which can be interpreted as
// ADD instructions).
if (Sec->getPermissions() & IMAGE_SCN_CNT_CODE)
memset(SecBuf, 0xCC, Sec->getRawSize());
- for_each(parallel::par, Sec->getChunks().begin(), Sec->getChunks().end(),
- [&](Chunk *C) { C->writeTo(SecBuf); });
+ for_each(llvm::parallel::par, Sec->getChunks().begin(),
+ Sec->getChunks().end(), [&](Chunk *C) { C->writeTo(SecBuf); });
}
}
// Sort .pdata section contents according to PE/COFF spec 5.5.
void Writer::sortExceptionTable() {
OutputSection *Sec = findSection(".pdata");
if (!Sec)
return;
// We assume .pdata contains function table entries only.
uint8_t *Begin = Buffer->getBufferStart() + Sec->getFileOff();
uint8_t *End = Begin + Sec->getVirtualSize();
if (Config->Machine == AMD64) {
struct Entry { ulittle32_t Begin, End, Unwind; };
- sort(parallel::par, (Entry *)Begin, (Entry *)End,
+ sort(llvm::parallel::par, (Entry *)Begin, (Entry *)End,
[](const Entry &A, const Entry &B) { return A.Begin < B.Begin; });
return;
}
if (Config->Machine == ARMNT) {
struct Entry { ulittle32_t Begin, Unwind; };
- sort(parallel::par, (Entry *)Begin, (Entry *)End,
+ sort(llvm::parallel::par, (Entry *)Begin, (Entry *)End,
[](const Entry &A, const Entry &B) { return A.Begin < B.Begin; });
return;
}
errs() << "warning: don't know how to handle .pdata.\n";
}
// Backfill the CVSignature in a PDB70 Debug Record. This backfilling allows us
// to get reproducible builds.
void Writer::writeBuildId() {
// There is nothing to backfill if BuildId was not setup.
if (BuildId == nullptr)
return;
MD5 Hash;
MD5::MD5Result Res;
Hash.update(ArrayRef<uint8_t>{Buffer->getBufferStart(),
Buffer->getBufferEnd()});
Hash.final(Res);
assert(BuildId->DI->Signature.CVSignature == OMF::Signature::PDB70 &&
"only PDB 7.0 is supported");
assert(sizeof(Res) == sizeof(BuildId->DI->PDB70.Signature) &&
"signature size mismatch");
memcpy(BuildId->DI->PDB70.Signature, Res.Bytes.data(),
sizeof(codeview::PDB70DebugInfo::Signature));
// TODO(compnerd) track the Age
BuildId->DI->PDB70.Age = 1;
}
OutputSection *Writer::findSection(StringRef Name) {
for (OutputSection *Sec : OutputSections)
if (Sec->getName() == Name)
return Sec;
return nullptr;
}
uint32_t Writer::getSizeOfInitializedData() {
uint32_t Res = 0;
for (OutputSection *S : OutputSections)
if (S->getPermissions() & IMAGE_SCN_CNT_INITIALIZED_DATA)
Res += S->getRawSize();
return Res;
}
// Returns an existing section or create a new one if not found.
OutputSection *Writer::createSection(StringRef Name) {
if (auto *Sec = findSection(Name))
return Sec;
const auto DATA = IMAGE_SCN_CNT_INITIALIZED_DATA;
const auto BSS = IMAGE_SCN_CNT_UNINITIALIZED_DATA;
const auto CODE = IMAGE_SCN_CNT_CODE;
const auto DISCARDABLE = IMAGE_SCN_MEM_DISCARDABLE;
const auto R = IMAGE_SCN_MEM_READ;
const auto W = IMAGE_SCN_MEM_WRITE;
const auto X = IMAGE_SCN_MEM_EXECUTE;
uint32_t Perms = StringSwitch<uint32_t>(Name)
.Case(".bss", BSS | R | W)
.Case(".data", DATA | R | W)
.Cases(".didat", ".edata", ".idata", ".rdata", DATA | R)
.Case(".reloc", DATA | DISCARDABLE | R)
.Case(".text", CODE | R | X)
.Default(0);
if (!Perms)
llvm_unreachable("unknown section name");
auto Sec = make<OutputSection>(Name);
Sec->addPermissions(Perms);
OutputSections.push_back(Sec);
return Sec;
}
// Dest is .reloc section. Add contents to that section.
void Writer::addBaserels(OutputSection *Dest) {
std::vector<Baserel> V;
for (OutputSection *Sec : OutputSections) {
if (Sec == Dest)
continue;
// Collect all locations for base relocations.
for (Chunk *C : Sec->getChunks())
C->getBaserels(&V);
// Add the addresses to .reloc section.
if (!V.empty())
addBaserelBlocks(Dest, V);
V.clear();
}
}
// Add addresses to .reloc section. Note that addresses are grouped by page.
void Writer::addBaserelBlocks(OutputSection *Dest, std::vector<Baserel> &V) {
const uint32_t Mask = ~uint32_t(PageSize - 1);
uint32_t Page = V[0].RVA & Mask;
size_t I = 0, J = 1;
for (size_t E = V.size(); J < E; ++J) {
uint32_t P = V[J].RVA & Mask;
if (P == Page)
continue;
Dest->addChunk(make<BaserelChunk>(Page, &V[I], &V[0] + J));
I = J;
Page = P;
}
if (I == J)
return;
Dest->addChunk(make<BaserelChunk>(Page, &V[I], &V[0] + J));
}
diff --git a/lld/ELF/Threads.h b/lld/ELF/Threads.h
index e6f680c..f754df0 100644
--- a/lld/ELF/Threads.h
+++ b/lld/ELF/Threads.h
@@ -1,89 +1,89 @@
//===- Threads.h ------------------------------------------------*- C++ -*-===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// LLD supports threads to distribute workloads to multiple cores. Using
// multicore is most effective when more than one core are idle. At the
// last step of a build, it is often the case that a linker is the only
// active process on a computer. So, we are naturally interested in using
// threads wisely to reduce latency to deliver results to users.
//
// That said, we don't want to do "too clever" things using threads.
// Complex multi-threaded algorithms are sometimes extremely hard to
// reason about and can easily mess up the entire design.
//
// Fortunately, when a linker links large programs (when the link time is
// most critical), it spends most of the time to work on massive number of
// small pieces of data of the same kind, and there are opportunities for
// large parallelism there. Here are examples:
//
// - We have hundreds of thousands of input sections that need to be
// copied to a result file at the last step of link. Once we fix a file
// layout, each section can be copied to its destination and its
// relocations can be applied independently.
//
// - We have tens of millions of small strings when constructing a
// mergeable string section.
//
// For the cases such as the former, we can just use parallel_for_each
// instead of std::for_each (or a plain for loop). Because tasks are
// completely independent from each other, we can run them in parallel
// without any coordination between them. That's very easy to understand
// and reason about.
//
// For the cases such as the latter, we can use parallel algorithms to
// deal with massive data. We have to write code for a tailored algorithm
// for each problem, but the complexity of multi-threading is isolated in
// a single pass and doesn't affect the linker's overall design.
//
// The above approach seems to be working fairly well. As an example, when
// linking Chromium (output size 1.6 GB), using 4 cores reduces latency to
// 75% compared to single core (from 12.66 seconds to 9.55 seconds) on my
// Ivy Bridge Xeon 2.8 GHz machine. Using 40 cores reduces it to 63% (from
// 12.66 seconds to 7.95 seconds). Because of the Amdahl's law, the
// speedup is not linear, but as you add more cores, it gets faster.
//
// On a final note, if you are trying to optimize, keep the axiom "don't
// guess, measure!" in mind. Some important passes of the linker are not
// that slow. For example, resolving all symbols is not a very heavy pass,
// although it would be very hard to parallelize it. You want to first
// identify a slow pass and then optimize it.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_ELF_THREADS_H
#define LLD_ELF_THREADS_H
#include "Config.h"
-#include "lld/Core/Parallel.h"
+#include "llvm/Support/Parallel.h"
#include <algorithm>
#include <functional>
namespace lld {
namespace elf {
template <class IterTy, class FuncTy>
void parallelForEach(IterTy Begin, IterTy End, FuncTy Fn) {
if (Config->Threads)
- for_each(parallel::par, Begin, End, Fn);
+ for_each(llvm::parallel::par, Begin, End, Fn);
else
- for_each(parallel::seq, Begin, End, Fn);
+ for_each(llvm::parallel::seq, Begin, End, Fn);
}
inline void parallelFor(size_t Begin, size_t End,
std::function<void(size_t)> Fn) {
if (Config->Threads)
- for_each_n(parallel::par, Begin, End, Fn);
+ for_each_n(llvm::parallel::par, Begin, End, Fn);
else
- for_each_n(parallel::seq, Begin, End, Fn);
+ for_each_n(llvm::parallel::seq, Begin, End, Fn);
}
}
}
#endif
diff --git a/lld/include/lld/Core/TaskGroup.h b/lld/include/lld/Core/TaskGroup.h
deleted file mode 100644
index 82e9122..0000000
--- a/lld/include/lld/Core/TaskGroup.h
+++ /dev/null
@@ -1,65 +0,0 @@
-//===- lld/Core/TaskGroup.h - Task Group ----------------------------------===//
-//
-// The LLVM Linker
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLD_CORE_TASKGROUP_H
-#define LLD_CORE_TASKGROUP_H
-
-#include "lld/Core/LLVM.h"
-
-#include <condition_variable>
-#include <functional>
-#include <mutex>
-
-namespace lld {
-/// \brief Allows one or more threads to wait on a potentially unknown number of
-/// events.
-///
-/// A latch starts at \p count. inc() increments this, and dec() decrements it.
-/// All calls to sync() will block while the count is not 0.
-///
-/// Calling dec() on a Latch with a count of 0 has undefined behaivor.
-class Latch {
- uint32_t _count;
- mutable std::mutex _condMut;
- mutable std::condition_variable _cond;
-
-public:
- explicit Latch(uint32_t count = 0) : _count(count) {}
- ~Latch() { sync(); }
-
- void inc() {
- std::unique_lock<std::mutex> lock(_condMut);
- ++_count;
- }
-
- void dec() {
- std::unique_lock<std::mutex> lock(_condMut);
- if (--_count == 0)
- _cond.notify_all();
- }
-
- void sync() const {
- std::unique_lock<std::mutex> lock(_condMut);
- _cond.wait(lock, [&] { return _count == 0; });
- }
-};
-
-/// \brief Allows launching a number of tasks and waiting for them to finish
-/// either explicitly via sync() or implicitly on destruction.
-class TaskGroup {
- Latch _latch;
-
-public:
- void spawn(std::function<void()> f);
-
- void sync() const { _latch.sync(); }
-};
-}
-
-#endif
diff --git a/lld/lib/Core/CMakeLists.txt b/lld/lib/Core/CMakeLists.txt
index cdd4e67..f2bf905 100644
--- a/lld/lib/Core/CMakeLists.txt
+++ b/lld/lib/Core/CMakeLists.txt
@@ -1,30 +1,29 @@
if(NOT LLD_BUILT_STANDALONE)
set(tablegen_deps intrinsics_gen)
endif()
add_lld_library(lldCore
DefinedAtom.cpp
Error.cpp
File.cpp
LinkingContext.cpp
Reader.cpp
Reproduce.cpp
Resolver.cpp
SymbolTable.cpp
TargetOptionsCommandFlags.cpp
- TaskGroup.cpp
Writer.cpp
ADDITIONAL_HEADER_DIRS
${LLD_INCLUDE_DIR}/lld/Core
LINK_COMPONENTS
MC
Support
LINK_LIBS
${LLVM_PTHREAD_LIB}
DEPENDS
${tablegen_deps}
)
diff --git a/lld/lib/ReaderWriter/MachO/LayoutPass.cpp b/lld/lib/ReaderWriter/MachO/LayoutPass.cpp
index 2b5a46c..7bca07e 100644
--- a/lld/lib/ReaderWriter/MachO/LayoutPass.cpp
+++ b/lld/lib/ReaderWriter/MachO/LayoutPass.cpp
@@ -1,489 +1,489 @@
//===-- ReaderWriter/MachO/LayoutPass.cpp - Layout atoms ------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "LayoutPass.h"
#include "lld/Core/Instrumentation.h"
-#include "lld/Core/Parallel.h"
#include "lld/Core/PassManager.h"
#include "lld/ReaderWriter/MachOLinkingContext.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Parallel.h"
#include <algorithm>
#include <set>
#include <utility>
using namespace lld;
#define DEBUG_TYPE "LayoutPass"
namespace lld {
namespace mach_o {
static bool compareAtoms(const LayoutPass::SortKey &,
const LayoutPass::SortKey &,
LayoutPass::SortOverride customSorter);
#ifndef NDEBUG
// Return "reason (leftval, rightval)"
static std::string formatReason(StringRef reason, int leftVal, int rightVal) {
return (Twine(reason) + " (" + Twine(leftVal) + ", " + Twine(rightVal) + ")")
.str();
}
// Less-than relationship of two atoms must be transitive, which is, if a < b
// and b < c, a < c must be true. This function checks the transitivity by
// checking the sort results.
static void checkTransitivity(std::vector<LayoutPass::SortKey> &vec,
LayoutPass::SortOverride customSorter) {
for (auto i = vec.begin(), e = vec.end(); (i + 1) != e; ++i) {
for (auto j = i + 1; j != e; ++j) {
assert(compareAtoms(*i, *j, customSorter));
assert(!compareAtoms(*j, *i, customSorter));
}
}
}
// Helper functions to check follow-on graph.
typedef llvm::DenseMap<const DefinedAtom *, const DefinedAtom *> AtomToAtomT;
static std::string atomToDebugString(const Atom *atom) {
const DefinedAtom *definedAtom = dyn_cast<DefinedAtom>(atom);
std::string str;
llvm::raw_string_ostream s(str);
if (definedAtom->name().empty())
s << "<anonymous " << definedAtom << ">";
else
s << definedAtom->name();
s << " in ";
if (definedAtom->customSectionName().empty())
s << "<anonymous>";
else
s << definedAtom->customSectionName();
s.flush();
return str;
}
static void showCycleDetectedError(const Registry &registry,
AtomToAtomT &followOnNexts,
const DefinedAtom *atom) {
const DefinedAtom *start = atom;
llvm::dbgs() << "There's a cycle in a follow-on chain!\n";
do {
llvm::dbgs() << " " << atomToDebugString(atom) << "\n";
for (const Reference *ref : *atom) {
StringRef kindValStr;
if (!registry.referenceKindToString(ref->kindNamespace(), ref->kindArch(),
ref->kindValue(), kindValStr)) {
kindValStr = "<unknown>";
}
llvm::dbgs() << " " << kindValStr
<< ": " << atomToDebugString(ref->target()) << "\n";
}
atom = followOnNexts[atom];
} while (atom != start);
llvm::report_fatal_error("Cycle detected");
}
/// Exit if there's a cycle in a followon chain reachable from the
/// given root atom. Uses the tortoise and hare algorithm to detect a
/// cycle.
static void checkNoCycleInFollowonChain(const Registry &registry,
AtomToAtomT &followOnNexts,
const DefinedAtom *root) {
const DefinedAtom *tortoise = root;
const DefinedAtom *hare = followOnNexts[root];
while (true) {
if (!tortoise || !hare)
return;
if (tortoise == hare)
showCycleDetectedError(registry, followOnNexts, tortoise);
tortoise = followOnNexts[tortoise];
hare = followOnNexts[followOnNexts[hare]];
}
}
static void checkReachabilityFromRoot(AtomToAtomT &followOnRoots,
const DefinedAtom *atom) {
if (!atom) return;
auto i = followOnRoots.find(atom);
if (i == followOnRoots.end()) {
llvm_unreachable(((Twine("Atom <") + atomToDebugString(atom) +
"> has no follow-on root!"))
.str()
.c_str());
}
const DefinedAtom *ap = i->second;
while (true) {
const DefinedAtom *next = followOnRoots[ap];
if (!next) {
llvm_unreachable((Twine("Atom <" + atomToDebugString(atom) +
"> is not reachable from its root!"))
.str()
.c_str());
}
if (next == ap)
return;
ap = next;
}
}
static void printDefinedAtoms(const File::AtomRange<DefinedAtom> &atomRange) {
for (const DefinedAtom *atom : atomRange) {
llvm::dbgs() << " file=" << atom->file().path()
<< ", name=" << atom->name()
<< ", size=" << atom->size()
<< ", type=" << atom->contentType()
<< ", ordinal=" << atom->ordinal()
<< "\n";
}
}
/// Verify that the followon chain is sane. Should not be called in
/// release binary.
void LayoutPass::checkFollowonChain(const File::AtomRange<DefinedAtom> &range) {
ScopedTask task(getDefaultDomain(), "LayoutPass::checkFollowonChain");
// Verify that there's no cycle in follow-on chain.
std::set<const DefinedAtom *> roots;
for (const auto &ai : _followOnRoots)
roots.insert(ai.second);
for (const DefinedAtom *root : roots)
checkNoCycleInFollowonChain(_registry, _followOnNexts, root);
// Verify that all the atoms in followOnNexts have references to
// their roots.
for (const auto &ai : _followOnNexts) {
checkReachabilityFromRoot(_followOnRoots, ai.first);
checkReachabilityFromRoot(_followOnRoots, ai.second);
}
}
#endif // #ifndef NDEBUG
/// The function compares atoms by sorting atoms in the following order
/// a) Sorts atoms by their ordinal overrides (layout-after/ingroup)
/// b) Sorts atoms by their permissions
/// c) Sorts atoms by their content
/// d) Sorts atoms by custom sorter
/// e) Sorts atoms on how they appear using File Ordinality
/// f) Sorts atoms on how they appear within the File
static bool compareAtomsSub(const LayoutPass::SortKey &lc,
const LayoutPass::SortKey &rc,
LayoutPass::SortOverride customSorter,
std::string &reason) {
const DefinedAtom *left = lc._atom.get();
const DefinedAtom *right = rc._atom.get();
if (left == right) {
reason = "same";
return false;
}
// Find the root of the chain if it is a part of a follow-on chain.
const DefinedAtom *leftRoot = lc._root;
const DefinedAtom *rightRoot = rc._root;
// Sort atoms by their ordinal overrides only if they fall in the same
// chain.
if (leftRoot == rightRoot) {
DEBUG(reason = formatReason("override", lc._override, rc._override));
return lc._override < rc._override;
}
// Sort same permissions together.
DefinedAtom::ContentPermissions leftPerms = leftRoot->permissions();
DefinedAtom::ContentPermissions rightPerms = rightRoot->permissions();
if (leftPerms != rightPerms) {
DEBUG(reason =
formatReason("contentPerms", (int)leftPerms, (int)rightPerms));
return leftPerms < rightPerms;
}
// Sort same content types together.
DefinedAtom::ContentType leftType = leftRoot->contentType();
DefinedAtom::ContentType rightType = rightRoot->contentType();
if (leftType != rightType) {
DEBUG(reason = formatReason("contentType", (int)leftType, (int)rightType));
return leftType < rightType;
}
// Use custom sorter if supplied.
if (customSorter) {
bool leftBeforeRight;
if (customSorter(leftRoot, rightRoot, leftBeforeRight))
return leftBeforeRight;
}
// Sort by .o order.
const File *leftFile = &leftRoot->file();
const File *rightFile = &rightRoot->file();
if (leftFile != rightFile) {
DEBUG(reason = formatReason(".o order", (int)leftFile->ordinal(),
(int)rightFile->ordinal()));
return leftFile->ordinal() < rightFile->ordinal();
}
// Sort by atom order with .o file.
uint64_t leftOrdinal = leftRoot->ordinal();
uint64_t rightOrdinal = rightRoot->ordinal();
if (leftOrdinal != rightOrdinal) {
DEBUG(reason = formatReason("ordinal", (int)leftRoot->ordinal(),
(int)rightRoot->ordinal()));
return leftOrdinal < rightOrdinal;
}
llvm::errs() << "Unordered: <" << left->name() << "> <"
<< right->name() << ">\n";
llvm_unreachable("Atoms with Same Ordinal!");
}
static bool compareAtoms(const LayoutPass::SortKey &lc,
const LayoutPass::SortKey &rc,
LayoutPass::SortOverride customSorter) {
std::string reason;
bool result = compareAtomsSub(lc, rc, customSorter, reason);
DEBUG({
StringRef comp = result ? "<" : ">=";
llvm::dbgs() << "Layout: '" << lc._atom.get()->name()
<< "' " << comp << " '"
<< rc._atom.get()->name() << "' (" << reason << ")\n";
});
return result;
}
LayoutPass::LayoutPass(const Registry &registry, SortOverride sorter)
: _registry(registry), _customSorter(std::move(sorter)) {}
// Returns the atom immediately followed by the given atom in the followon
// chain.
const DefinedAtom *LayoutPass::findAtomFollowedBy(
const DefinedAtom *targetAtom) {
// Start from the beginning of the chain and follow the chain until
// we find the targetChain.
const DefinedAtom *atom = _followOnRoots[targetAtom];
while (true) {
const DefinedAtom *prevAtom = atom;
AtomToAtomT::iterator targetFollowOnAtomsIter = _followOnNexts.find(atom);
// The target atom must be in the chain of its root.
assert(targetFollowOnAtomsIter != _followOnNexts.end());
atom = targetFollowOnAtomsIter->second;
if (atom == targetAtom)
return prevAtom;
}
}
// Check if all the atoms followed by the given target atom are of size zero.
// When this method is called, an atom being added is not of size zero and
// will be added to the head of the followon chain. All the atoms between the
// atom and the targetAtom (specified by layout-after) need to be of size zero
// in this case. Otherwise the desired layout is impossible.
bool LayoutPass::checkAllPrevAtomsZeroSize(const DefinedAtom *targetAtom) {
const DefinedAtom *atom = _followOnRoots[targetAtom];
while (true) {
if (atom == targetAtom)
return true;
if (atom->size() != 0)
// TODO: print warning that an impossible layout is being desired by the
// user.
return false;
AtomToAtomT::iterator targetFollowOnAtomsIter = _followOnNexts.find(atom);
// The target atom must be in the chain of its root.
assert(targetFollowOnAtomsIter != _followOnNexts.end());
atom = targetFollowOnAtomsIter->second;
}
}
// Set the root of all atoms in targetAtom's chain to the given root.
void LayoutPass::setChainRoot(const DefinedAtom *targetAtom,
const DefinedAtom *root) {
// Walk through the followon chain and override each node's root.
while (true) {
_followOnRoots[targetAtom] = root;
AtomToAtomT::iterator targetFollowOnAtomsIter =
_followOnNexts.find(targetAtom);
if (targetFollowOnAtomsIter == _followOnNexts.end())
return;
targetAtom = targetFollowOnAtomsIter->second;
}
}
/// This pass builds the followon tables described by two DenseMaps
/// followOnRoots and followonNexts.
/// The followOnRoots map contains a mapping of a DefinedAtom to its root
/// The followOnNexts map contains a mapping of what DefinedAtom follows the
/// current Atom
/// The algorithm follows a very simple approach
/// a) If the atom is first seen, then make that as the root atom
/// b) The targetAtom which this Atom contains, has the root thats set to the
/// root of the current atom
/// c) If the targetAtom is part of a different tree and the root of the
/// targetAtom is itself, Chain all the atoms that are contained in the tree
/// to the current Tree
/// d) If the targetAtom is part of a different chain and the root of the
/// targetAtom until the targetAtom has all atoms of size 0, then chain the
/// targetAtoms and its tree to the current chain
void LayoutPass::buildFollowOnTable(const File::AtomRange<DefinedAtom> &range) {
ScopedTask task(getDefaultDomain(), "LayoutPass::buildFollowOnTable");
// Set the initial size of the followon and the followonNext hash to the
// number of atoms that we have.
_followOnRoots.reserve(range.size());
_followOnNexts.reserve(range.size());
for (const DefinedAtom *ai : range) {
for (const Reference *r : *ai) {
if (r->kindNamespace() != lld::Reference::KindNamespace::all ||
r->kindValue() != lld::Reference::kindLayoutAfter)
continue;
const DefinedAtom *targetAtom = dyn_cast<DefinedAtom>(r->target());
_followOnNexts[ai] = targetAtom;
// If we find a followon for the first time, let's make that atom as the
// root atom.
if (_followOnRoots.count(ai) == 0)
_followOnRoots[ai] = ai;
auto iter = _followOnRoots.find(targetAtom);
if (iter == _followOnRoots.end()) {
// If the targetAtom is not a root of any chain, let's make the root of
// the targetAtom to the root of the current chain.
// The expression m[i] = m[j] where m is a DenseMap and i != j is not
// safe. m[j] returns a reference, which would be invalidated when a
// rehashing occurs. If rehashing occurs to make room for m[i], m[j]
// becomes invalid, and that invalid reference would be used as the RHS
// value of the expression.
// Copy the value to workaround.
const DefinedAtom *tmp = _followOnRoots[ai];
_followOnRoots[targetAtom] = tmp;
continue;
}
if (iter->second == targetAtom) {
// If the targetAtom is the root of a chain, the chain becomes part of
// the current chain. Rewrite the subchain's root to the current
// chain's root.
setChainRoot(targetAtom, _followOnRoots[ai]);
continue;
}
// The targetAtom is already a part of a chain. If the current atom is
// of size zero, we can insert it in the middle of the chain just
// before the target atom, while not breaking other atom's followon
// relationships. If it's not, we can only insert the current atom at
// the beginning of the chain. All the atoms followed by the target
// atom must be of size zero in that case to satisfy the followon
// relationships.
size_t currentAtomSize = ai->size();
if (currentAtomSize == 0) {
const DefinedAtom *targetPrevAtom = findAtomFollowedBy(targetAtom);
_followOnNexts[targetPrevAtom] = ai;
const DefinedAtom *tmp = _followOnRoots[targetPrevAtom];
_followOnRoots[ai] = tmp;
continue;
}
if (!checkAllPrevAtomsZeroSize(targetAtom))
break;
_followOnNexts[ai] = _followOnRoots[targetAtom];
setChainRoot(_followOnRoots[targetAtom], _followOnRoots[ai]);
}
}
}
/// Build an ordinal override map by traversing the followon chain, and
/// assigning ordinals to each atom, if the atoms have their ordinals
/// already assigned skip the atom and move to the next. This is the
/// main map thats used to sort the atoms while comparing two atoms together
void
LayoutPass::buildOrdinalOverrideMap(const File::AtomRange<DefinedAtom> &range) {
ScopedTask task(getDefaultDomain(), "LayoutPass::buildOrdinalOverrideMap");
uint64_t index = 0;
for (const DefinedAtom *ai : range) {
const DefinedAtom *atom = ai;
if (_ordinalOverrideMap.find(atom) != _ordinalOverrideMap.end())
continue;
AtomToAtomT::iterator start = _followOnRoots.find(atom);
if (start == _followOnRoots.end())
continue;
for (const DefinedAtom *nextAtom = start->second; nextAtom;
nextAtom = _followOnNexts[nextAtom]) {
AtomToOrdinalT::iterator pos = _ordinalOverrideMap.find(nextAtom);
if (pos == _ordinalOverrideMap.end())
_ordinalOverrideMap[nextAtom] = index++;
}
}
}
std::vector<LayoutPass::SortKey>
LayoutPass::decorate(File::AtomRange<DefinedAtom> &atomRange) const {
std::vector<SortKey> ret;
for (OwningAtomPtr<DefinedAtom> &atom : atomRange.owning_ptrs()) {
auto ri = _followOnRoots.find(atom.get());
auto oi = _ordinalOverrideMap.find(atom.get());
const auto *root = (ri == _followOnRoots.end()) ? atom.get() : ri->second;
uint64_t override = (oi == _ordinalOverrideMap.end()) ? 0 : oi->second;
ret.push_back(SortKey(std::move(atom), root, override));
}
return ret;
}
void LayoutPass::undecorate(File::AtomRange<DefinedAtom> &atomRange,
std::vector<SortKey> &keys) const {
size_t i = 0;
for (SortKey &k : keys)
atomRange[i++] = std::move(k._atom);
}
/// Perform the actual pass
llvm::Error LayoutPass::perform(SimpleFile &mergedFile) {
DEBUG(llvm::dbgs() << "******** Laying out atoms:\n");
// sort the atoms
ScopedTask task(getDefaultDomain(), "LayoutPass");
File::AtomRange<DefinedAtom> atomRange = mergedFile.defined();
// Build follow on tables
buildFollowOnTable(atomRange);
// Check the structure of followon graph if running in debug mode.
DEBUG(checkFollowonChain(atomRange));
// Build override maps
buildOrdinalOverrideMap(atomRange);
DEBUG({
llvm::dbgs() << "unsorted atoms:\n";
printDefinedAtoms(atomRange);
});
std::vector<LayoutPass::SortKey> vec = decorate(atomRange);
- sort(parallel::par, vec.begin(), vec.end(),
+ sort(llvm::parallel::par, vec.begin(), vec.end(),
[&](const LayoutPass::SortKey &l, const LayoutPass::SortKey &r) -> bool {
return compareAtoms(l, r, _customSorter);
});
DEBUG(checkTransitivity(vec, _customSorter));
undecorate(atomRange, vec);
DEBUG({
llvm::dbgs() << "sorted atoms:\n";
printDefinedAtoms(atomRange);
});
DEBUG(llvm::dbgs() << "******** Finished laying out atoms\n");
return llvm::Error::success();
}
void addLayoutPass(PassManager &pm, const MachOLinkingContext &ctx) {
pm.add(llvm::make_unique<LayoutPass>(
ctx.registry(), [&](const DefinedAtom * left, const DefinedAtom * right,
bool & leftBeforeRight) ->bool {
return ctx.customAtomOrderer(left, right, leftBeforeRight);
}));
}
} // namespace mach_o
} // namespace lld
diff --git a/lld/unittests/CMakeLists.txt b/lld/unittests/CMakeLists.txt
index 9cd0853..84d35d4 100644
--- a/lld/unittests/CMakeLists.txt
+++ b/lld/unittests/CMakeLists.txt
@@ -1,17 +1,16 @@
add_custom_target(LLDUnitTests)
set_target_properties(LLDUnitTests PROPERTIES FOLDER "lld tests")
set(CMAKE_BUILD_WITH_INSTALL_RPATH OFF)
# add_lld_unittest(test_dirname file1.cpp file2.cpp)
#
# Will compile the list of files together and link against lld
# Produces a binary named 'basename(test_dirname)'.
function(add_lld_unittest test_dirname)
add_unittest(LLDUnitTests ${test_dirname} ${ARGN})
target_link_libraries(${test_dirname} ${LLVM_COMMON_LIBS})
endfunction()
-add_subdirectory(CoreTests)
add_subdirectory(DriverTests)
add_subdirectory(MachOTests)
diff --git a/lld/unittests/CoreTests/CMakeLists.txt b/lld/unittests/CoreTests/CMakeLists.txt
deleted file mode 100644
index 9f68f56..0000000
--- a/lld/unittests/CoreTests/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-add_lld_unittest(CoreTests
- ParallelTest.cpp
- )
-
-target_link_libraries(CoreTests
- lldCore ${LLVM_PTHREAD_LIB}
- )
diff --git a/lld/include/lld/Core/Parallel.h b/llvm/include/llvm/Support/Parallel.h
similarity index 85%
rename from lld/include/lld/Core/Parallel.h
rename to llvm/include/llvm/Support/Parallel.h
index a514b2e..4c1a3e0 100644
--- a/lld/include/lld/Core/Parallel.h
+++ b/llvm/include/llvm/Support/Parallel.h
@@ -1,209 +1,251 @@
-//===- lld/Core/Parallel.h - Parallel utilities ---------------------------===//
+//===- llvm/Support/Parallel.h - Parallel algorithms ----------------------===//
//
-// The LLVM Linker
+// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-#ifndef LLD_CORE_PARALLEL_H
-#define LLD_CORE_PARALLEL_H
+#ifndef LLVM_SUPPORT_PARALLEL_H
+#define LLVM_SUPPORT_PARALLEL_H
-#include "lld/Core/LLVM.h"
-#include "lld/Core/TaskGroup.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TaskGroup.h"
#include <algorithm>
+#include <condition_variable>
+#include <functional>
+#include <mutex>
#if defined(_MSC_VER) && LLVM_ENABLE_THREADS
+#pragma warning(push)
+#pragma warning(disable : 4530)
#include <concrt.h>
#include <ppl.h>
+#pragma warning(pop)
#endif
-namespace lld {
+namespace llvm {
+
+namespace detail {
+class Latch {
+ uint32_t Count;
+ mutable std::mutex Mutex;
+ mutable std::condition_variable Cond;
+
+public:
+ explicit Latch(uint32_t count = 0) : Count(Count) {}
+ ~Latch() { sync(); }
+
+ void inc() {
+ std::unique_lock<std::mutex> lock(Mutex);
+ ++Count;
+ }
+
+ void dec() {
+ std::unique_lock<std::mutex> lock(Mutex);
+ if (--Count == 0)
+ Cond.notify_all();
+ }
+
+ void sync() const {
+ std::unique_lock<std::mutex> lock(Mutex);
+ Cond.wait(lock, [&] { return Count == 0; });
+ }
+};
+
+class TaskGroup {
+ Latch L;
+
+public:
+ void spawn(std::function<void()> f);
+
+ void sync() const { L.sync(); }
+};
+}
namespace parallel {
struct sequential_execution_policy {};
struct parallel_execution_policy {};
template <typename T>
struct is_execution_policy
: public std::integral_constant<
bool, llvm::is_one_of<T, sequential_execution_policy,
parallel_execution_policy>::value> {};
constexpr sequential_execution_policy seq{};
constexpr parallel_execution_policy par{};
#if LLVM_ENABLE_THREADS
namespace detail {
#if defined(_MSC_VER)
template <class RandomAccessIterator, class Comparator>
void parallel_sort(RandomAccessIterator Start, RandomAccessIterator End,
const Comparator &Comp) {
concurrency::parallel_sort(Start, End, Comp);
}
template <class IterTy, class FuncTy>
void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) {
concurrency::parallel_for_each(Begin, End, Fn);
}
template <class IndexTy, class FuncTy>
void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) {
concurrency::parallel_for(Begin, End, Fn);
}
#else
const ptrdiff_t MinParallelSize = 1024;
/// \brief Inclusive median.
template <class RandomAccessIterator, class Comparator>
RandomAccessIterator medianOf3(RandomAccessIterator Start,
RandomAccessIterator End,
const Comparator &Comp) {
RandomAccessIterator Mid = Start + (std::distance(Start, End) / 2);
return Comp(*Start, *(End - 1))
? (Comp(*Mid, *(End - 1)) ? (Comp(*Start, *Mid) ? Mid : Start)
: End - 1)
: (Comp(*Mid, *Start) ? (Comp(*(End - 1), *Mid) ? Mid : End - 1)
: Start);
}
template <class RandomAccessIterator, class Comparator>
void parallel_quick_sort(RandomAccessIterator Start, RandomAccessIterator End,
const Comparator &Comp, TaskGroup &TG, size_t Depth) {
// Do a sequential sort for small inputs.
if (std::distance(Start, End) < detail::MinParallelSize || Depth == 0) {
std::sort(Start, End, Comp);
return;
}
// Partition.
auto Pivot = medianOf3(Start, End, Comp);
// Move Pivot to End.
std::swap(*(End - 1), *Pivot);
Pivot = std::partition(Start, End - 1, [&Comp, End](decltype(*Start) V) {
return Comp(V, *(End - 1));
});
// Move Pivot to middle of partition.
std::swap(*Pivot, *(End - 1));
// Recurse.
TG.spawn([=, &Comp, &TG] {
parallel_quick_sort(Start, Pivot, Comp, TG, Depth - 1);
});
parallel_quick_sort(Pivot + 1, End, Comp, TG, Depth - 1);
}
template <class RandomAccessIterator, class Comparator>
void parallel_sort(RandomAccessIterator Start, RandomAccessIterator End,
const Comparator &Comp) {
TaskGroup TG;
parallel_quick_sort(Start, End, Comp, TG,
llvm::Log2_64(std::distance(Start, End)) + 1);
}
template <class IterTy, class FuncTy>
void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) {
// TaskGroup has a relatively high overhead, so we want to reduce
// the number of spawn() calls. We'll create up to 1024 tasks here.
// (Note that 1024 is an arbitrary number. This code probably needs
// improving to take the number of available cores into account.)
ptrdiff_t TaskSize = std::distance(Begin, End) / 1024;
if (TaskSize == 0)
TaskSize = 1;
TaskGroup TG;
while (TaskSize <= std::distance(Begin, End)) {
TG.spawn([=, &Fn] { std::for_each(Begin, Begin + TaskSize, Fn); });
Begin += TaskSize;
}
TG.spawn([=, &Fn] { std::for_each(Begin, End, Fn); });
}
template <class IndexTy, class FuncTy>
void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) {
ptrdiff_t TaskSize = (End - Begin) / 1024;
if (TaskSize == 0)
TaskSize = 1;
TaskGroup TG;
IndexTy I = Begin;
for (; I + TaskSize < End; I += TaskSize) {
TG.spawn([=, &Fn] {
for (IndexTy J = I, E = I + TaskSize; J != E; ++J)
Fn(J);
});
}
TG.spawn([=, &Fn] {
for (IndexTy J = I; J < End; ++J)
Fn(J);
});
}
#endif
template <typename Iter>
using DefComparator =
std::less<typename std::iterator_traits<Iter>::value_type>;
} // namespace detail
#endif
// sequential algorithm implementations.
template <class Policy, class RandomAccessIterator,
class Comparator = detail::DefComparator<RandomAccessIterator>>
void sort(Policy policy, RandomAccessIterator Start, RandomAccessIterator End,
const Comparator &Comp = Comparator()) {
static_assert(is_execution_policy<Policy>::value,
"Invalid execution policy!");
std::sort(Start, End, Comp);
}
template <class Policy, class IterTy, class FuncTy>
void for_each(Policy policy, IterTy Begin, IterTy End, FuncTy Fn) {
static_assert(is_execution_policy<Policy>::value,
"Invalid execution policy!");
std::for_each(Begin, End, Fn);
}
template <class Policy, class IndexTy, class FuncTy>
void for_each_n(Policy policy, IndexTy Begin, IndexTy End, FuncTy Fn) {
static_assert(is_execution_policy<Policy>::value,
"Invalid execution policy!");
for (IndexTy I = Begin; I != End; ++I)
Fn(I);
}
// Parallel algorithm implementations, only available when LLVM_ENABLE_THREADS
// is true.
#if defined(LLVM_ENABLE_THREADS)
template <class RandomAccessIterator,
class Comparator = detail::DefComparator<RandomAccessIterator>>
void sort(parallel_execution_policy policy, RandomAccessIterator Start,
RandomAccessIterator End, const Comparator &Comp = Comparator()) {
detail::parallel_sort(Start, End, Comp);
}
template <class IterTy, class FuncTy>
void for_each(parallel_execution_policy policy, IterTy Begin, IterTy End,
FuncTy Fn) {
detail::parallel_for_each(Begin, End, Fn);
}
template <class IndexTy, class FuncTy>
void for_each_n(parallel_execution_policy policy, IndexTy Begin, IndexTy End,
FuncTy Fn) {
detail::parallel_for_each_n(Begin, End, Fn);
}
#endif
} // namespace parallel
-} // End namespace lld
+} // namespace llvm
-#endif // LLD_CORE_PARALLEL_H
+#endif // LLVM_SUPPORT_PARALLEL_H
diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt
index 63c4400..8337628 100644
--- a/llvm/lib/Support/CMakeLists.txt
+++ b/llvm/lib/Support/CMakeLists.txt
@@ -1,149 +1,150 @@
set(system_libs)
if( MSVC OR MINGW )
# libuuid required for FOLDERID_Profile usage in lib/Support/Windows/Path.inc.
set(system_libs ${system_libs} psapi shell32 ole32 uuid)
elseif( CMAKE_HOST_UNIX )
if( HAVE_LIBRT )
set(system_libs ${system_libs} rt)
endif()
if( HAVE_LIBDL )
set(system_libs ${system_libs} ${CMAKE_DL_LIBS})
endif()
if( HAVE_BACKTRACE )
set(system_libs ${system_libs} ${Backtrace_LIBRARIES})
endif()
if(LLVM_ENABLE_TERMINFO)
if(HAVE_TERMINFO)
set(system_libs ${system_libs} ${TERMINFO_LIBS})
endif()
endif()
if( LLVM_ENABLE_THREADS AND HAVE_LIBATOMIC )
set(system_libs ${system_libs} atomic)
endif()
set(system_libs ${system_libs} ${LLVM_PTHREAD_LIB})
if ( LLVM_ENABLE_ZLIB AND HAVE_LIBZ )
set(system_libs ${system_libs} z)
endif()
if( UNIX AND NOT (BEOS OR HAIKU) )
set(system_libs ${system_libs} m)
endif()
endif( MSVC OR MINGW )
add_llvm_library(LLVMSupport
APFloat.cpp
APInt.cpp
APSInt.cpp
ARMBuildAttrs.cpp
ARMAttributeParser.cpp
ARMWinEH.cpp
Allocator.cpp
BinaryStreamError.cpp
BinaryStreamReader.cpp
BinaryStreamWriter.cpp
BlockFrequency.cpp
BranchProbability.cpp
CachePruning.cpp
circular_raw_ostream.cpp
Chrono.cpp
COM.cpp
CommandLine.cpp
Compression.cpp
ConvertUTF.cpp
ConvertUTFWrapper.cpp
CrashRecoveryContext.cpp
DataExtractor.cpp
Debug.cpp
DebugCounter.cpp
DeltaAlgorithm.cpp
DAGDeltaAlgorithm.cpp
Dwarf.cpp
Error.cpp
ErrorHandling.cpp
FileUtilities.cpp
FileOutputBuffer.cpp
FoldingSet.cpp
FormattedStream.cpp
FormatVariadic.cpp
GlobPattern.cpp
GraphWriter.cpp
Hashing.cpp
IntEqClasses.cpp
IntervalMap.cpp
JamCRC.cpp
LEB128.cpp
LineIterator.cpp
Locale.cpp
LockFileManager.cpp
LowLevelType.cpp
ManagedStatic.cpp
MathExtras.cpp
MemoryBuffer.cpp
MD5.cpp
NativeFormatting.cpp
Options.cpp
+ Parallel.cpp
PluginLoader.cpp
PrettyStackTrace.cpp
RandomNumberGenerator.cpp
Regex.cpp
ScaledNumber.cpp
ScopedPrinter.cpp
SHA1.cpp
SmallPtrSet.cpp
SmallVector.cpp
SourceMgr.cpp
SpecialCaseList.cpp
Statistic.cpp
StringExtras.cpp
StringMap.cpp
StringPool.cpp
StringSaver.cpp
StringRef.cpp
SystemUtils.cpp
TarWriter.cpp
TargetParser.cpp
ThreadPool.cpp
Timer.cpp
ToolOutputFile.cpp
TrigramIndex.cpp
Triple.cpp
Twine.cpp
Unicode.cpp
YAMLParser.cpp
YAMLTraits.cpp
raw_os_ostream.cpp
raw_ostream.cpp
regcomp.c
regerror.c
regexec.c
regfree.c
regstrlcpy.c
xxhash.cpp
# System
Atomic.cpp
DynamicLibrary.cpp
Errno.cpp
Host.cpp
Memory.cpp
Mutex.cpp
Path.cpp
Process.cpp
Program.cpp
RWMutex.cpp
Signals.cpp
TargetRegistry.cpp
ThreadLocal.cpp
Threading.cpp
Valgrind.cpp
Watchdog.cpp
ADDITIONAL_HEADER_DIRS
Unix
Windows
${LLVM_MAIN_INCLUDE_DIR}/llvm/ADT
${LLVM_MAIN_INCLUDE_DIR}/llvm/Support
${Backtrace_INCLUDE_DIRS}
LINK_LIBS ${system_libs}
)
set_property(TARGET LLVMSupport PROPERTY LLVM_SYSTEM_LIBS "${system_libs}")
diff --git a/lld/lib/Core/TaskGroup.cpp b/llvm/lib/Support/Parallel.cpp
similarity index 89%
rename from lld/lib/Core/TaskGroup.cpp
rename to llvm/lib/Support/Parallel.cpp
index d4de48c..4e38806 100644
--- a/lld/lib/Core/TaskGroup.cpp
+++ b/llvm/lib/Support/Parallel.cpp
@@ -1,141 +1,136 @@
-//===- lld/Core/TaskGroup.cpp - Task Group --------------------------------===//
+//===- llvm/Support/Parallel.cpp - Parallel algorithms --------------------===//
//
-// The LLVM Linker
+// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-#include "lld/Core/TaskGroup.h"
+#include "llvm/Support/Parallel.h"
#include "llvm/Config/llvm-config.h"
#include <atomic>
#include <stack>
#include <thread>
-#if defined(_MSC_VER) && LLVM_ENABLE_THREADS
-#include <concrt.h>
-#include <ppl.h>
-#endif
-
-using namespace lld;
+using namespace llvm;
namespace {
/// \brief An abstract class that takes closures and runs them asynchronously.
class Executor {
public:
virtual ~Executor() = default;
virtual void add(std::function<void()> func) = 0;
static Executor *getDefaultExecutor();
};
#if !LLVM_ENABLE_THREADS
class SyncExecutor : public Executor {
public:
virtual void add(std::function<void()> F) { F(); }
};
Executor *Executor::getDefaultExecutor() {
static SyncExecutor Exec;
return &Exec;
}
#elif defined(_MSC_VER)
/// \brief An Executor that runs tasks via ConcRT.
class ConcRTExecutor : public Executor {
struct Taskish {
Taskish(std::function<void()> Task) : Task(Task) {}
std::function<void()> Task;
static void run(void *P) {
Taskish *Self = static_cast<Taskish *>(P);
Self->Task();
concurrency::Free(Self);
}
};
public:
virtual void add(std::function<void()> F) {
Concurrency::CurrentScheduler::ScheduleTask(
Taskish::run, new (concurrency::Alloc(sizeof(Taskish))) Taskish(F));
}
};
Executor *Executor::getDefaultExecutor() {
static ConcRTExecutor exec;
return &exec;
}
#else
/// \brief An implementation of an Executor that runs closures on a thread pool
/// in filo order.
class ThreadPoolExecutor : public Executor {
public:
explicit ThreadPoolExecutor(
unsigned ThreadCount = std::thread::hardware_concurrency())
: Done(ThreadCount) {
// Spawn all but one of the threads in another thread as spawning threads
// can take a while.
std::thread([&, ThreadCount] {
for (size_t i = 1; i < ThreadCount; ++i) {
std::thread([=] { work(); }).detach();
}
work();
}).detach();
}
~ThreadPoolExecutor() override {
std::unique_lock<std::mutex> Lock(Mutex);
Stop = true;
Lock.unlock();
Cond.notify_all();
// Wait for ~Latch.
}
void add(std::function<void()> F) override {
std::unique_lock<std::mutex> Lock(Mutex);
WorkStack.push(F);
Lock.unlock();
Cond.notify_one();
}
private:
void work() {
while (true) {
std::unique_lock<std::mutex> Lock(Mutex);
Cond.wait(Lock, [&] { return Stop || !WorkStack.empty(); });
if (Stop)
break;
auto Task = WorkStack.top();
WorkStack.pop();
Lock.unlock();
Task();
}
Done.dec();
}
std::atomic<bool> Stop{false};
std::stack<std::function<void()>> WorkStack;
std::mutex Mutex;
std::condition_variable Cond;
Latch Done;
};
Executor *Executor::getDefaultExecutor() {
static ThreadPoolExecutor exec;
return &exec;
}
#endif
}
-void TaskGroup::spawn(std::function<void()> f) {
- _latch.inc();
+void detail::TaskGroup::spawn(std::function<void()> f) {
+ L.inc();
Executor::getDefaultExecutor()->add([&, f] {
f();
- _latch.dec();
+ L.dec();
});
}
diff --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt
index 1f67710..f8d3c1c 100644
--- a/llvm/unittests/Support/CMakeLists.txt
+++ b/llvm/unittests/Support/CMakeLists.txt
@@ -1,71 +1,72 @@
set(LLVM_LINK_COMPONENTS
Support
)
add_llvm_unittest(SupportTests
AlignOfTest.cpp
AllocatorTest.cpp
ARMAttributeParser.cpp
ArrayRecyclerTest.cpp
BinaryStreamTest.cpp
BlockFrequencyTest.cpp
BranchProbabilityTest.cpp
CachePruningTest.cpp
Casting.cpp
Chrono.cpp
CommandLineTest.cpp
CompressionTest.cpp
ConvertUTFTest.cpp
DataExtractorTest.cpp
DebugTest.cpp
DwarfTest.cpp
EndianStreamTest.cpp
EndianTest.cpp
ErrorOrTest.cpp
ErrorTest.cpp
FileOutputBufferTest.cpp
FormatVariadicTest.cpp
GlobPatternTest.cpp
Host.cpp
LEB128Test.cpp
LineIteratorTest.cpp
LockFileManagerTest.cpp
MD5Test.cpp
ManagedStatic.cpp
MathExtrasTest.cpp
MemoryBufferTest.cpp
MemoryTest.cpp
NativeFormatTests.cpp
+ ParallelTest.cpp
Path.cpp
ProcessTest.cpp
ProgramTest.cpp
RegexTest.cpp
ReplaceFileTest.cpp
ScaledNumberTest.cpp
SourceMgrTest.cpp
SpecialCaseListTest.cpp
StringPool.cpp
SwapByteOrderTest.cpp
TarWriterTest.cpp
TargetParserTest.cpp
ThreadLocalTest.cpp
ThreadPool.cpp
Threading.cpp
TimerTest.cpp
TypeNameTest.cpp
TrailingObjectsTest.cpp
TrigramIndexTest.cpp
UnicodeTest.cpp
YAMLIOTest.cpp
YAMLParserTest.cpp
formatted_raw_ostream_test.cpp
raw_ostream_test.cpp
raw_pwrite_stream_test.cpp
raw_sha1_ostream_test.cpp
xxhashTest.cpp
)
# ManagedStatic.cpp uses <pthread>.
target_link_libraries(SupportTests ${LLVM_PTHREAD_LIB})
add_subdirectory(DynamicLibrary)
diff --git a/lld/unittests/CoreTests/ParallelTest.cpp b/llvm/unittests/Support/ParallelTest.cpp
similarity index 82%
rename from lld/unittests/CoreTests/ParallelTest.cpp
rename to llvm/unittests/Support/ParallelTest.cpp
index 601a2b0..f381631 100644
--- a/lld/unittests/CoreTests/ParallelTest.cpp
+++ b/llvm/unittests/Support/ParallelTest.cpp
@@ -1,46 +1,48 @@
-//===- lld/unittest/ParallelTest.cpp --------------------------------------===//
+//===- llvm/unittest/Support/ParallelTest.cpp -----------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief Parallel.h unit tests.
///
//===----------------------------------------------------------------------===//
+#include "llvm/Support/Parallel.h"
#include "gtest/gtest.h"
-#include "lld/Core/Parallel.h"
#include <array>
#include <random>
uint32_t array[1024 * 1024];
+using namespace llvm;
+
TEST(Parallel, sort) {
std::mt19937 randEngine;
std::uniform_int_distribution<uint32_t> dist;
for (auto &i : array)
i = dist(randEngine);
- sort(lld::parallel::par, std::begin(array), std::end(array));
+ sort(parallel::par, std::begin(array), std::end(array));
ASSERT_TRUE(std::is_sorted(std::begin(array), std::end(array)));
}
TEST(Parallel, parallel_for) {
// We need to test the case with a TaskSize > 1. We are white-box testing
// here. The TaskSize is calculated as (End - Begin) / 1024 at the time of
// writing.
uint32_t range[2050];
std::fill(range, range + 2050, 1);
- for_each_n(lld::parallel::par, 0, 2049, [&range](size_t I) { ++range[I]; });
+ for_each_n(parallel::par, 0, 2049, [&range](size_t I) { ++range[I]; });
uint32_t expected[2049];
std::fill(expected, expected + 2049, 2);
ASSERT_TRUE(std::equal(range, range + 2049, expected));
// Check that we don't write past the end of the requested range.
ASSERT_EQ(range[2049], 1u);
}

File Metadata

Mime Type
text/x-diff
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
651326
Default Alt Text
0001-Move-lld-Parallel-algorithms-to-llvm.patch (89 KB)

Event Timeline