From 40a2c5247c293749c005681aba150f7ae9f0cc51 Mon Sep 17 00:00:00 2001 From: Vedant Kumar Date: Tue, 19 Jul 2016 00:57:09 +0000 Subject: [PATCH] Revert "[llvm-profdata] Speed up merging by using a thread pool" This reverts commit r275921. It broke the ppc64be bot: http://lab.llvm.org:8011/builders/clang-ppc64be-linux-multistage/builds/3537 I'm not sure why it broke, but based on the output, it looks like an off-by-one (one profile left un-merged). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275937 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/CommandGuide/llvm-profdata.rst | 5 - include/llvm/ProfileData/InstrProfWriter.h | 2 - lib/ProfileData/InstrProfWriter.cpp | 8 -- test/tools/llvm-profdata/multiple-inputs.test | 40 -------- tools/llvm-profdata/llvm-profdata.cpp | 138 +++++--------------------- unittests/ProfileData/InstrProfTest.cpp | 25 ----- 6 files changed, 23 insertions(+), 195 deletions(-) diff --git a/docs/CommandGuide/llvm-profdata.rst b/docs/CommandGuide/llvm-profdata.rst index bae0ff7d4ce..f5508b5b2b8 100644 --- a/docs/CommandGuide/llvm-profdata.rst +++ b/docs/CommandGuide/llvm-profdata.rst @@ -106,11 +106,6 @@ OPTIONS conjunction with -instr. Defaults to false, since it can inhibit compiler optimization during PGO. -.. option:: -num-threads=N, -j=N - - Use N threads to perform profile merging. When N=0, llvm-profdata auto-detects - an appropriate number of threads to use. This is the default. - EXAMPLES ^^^^^^^^ Basic Usage diff --git a/include/llvm/ProfileData/InstrProfWriter.h b/include/llvm/ProfileData/InstrProfWriter.h index f7780fb4500..7d292731ccc 100644 --- a/include/llvm/ProfileData/InstrProfWriter.h +++ b/include/llvm/ProfileData/InstrProfWriter.h @@ -47,8 +47,6 @@ public: /// for this function and the hash and number of counts match, each counter is /// summed. Optionally scale counts by \p Weight. Error addRecord(InstrProfRecord &&I, uint64_t Weight = 1); - /// Merge existing function counts from the given writer. - Error mergeRecordsFromWriter(InstrProfWriter &&IPW); /// Write the profile to \c OS void write(raw_fd_ostream &OS); /// Write the profile in text format to \c OS diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp index 7fabcdbff63..e25299ef670 100644 --- a/lib/ProfileData/InstrProfWriter.cpp +++ b/lib/ProfileData/InstrProfWriter.cpp @@ -182,14 +182,6 @@ Error InstrProfWriter::addRecord(InstrProfRecord &&I, uint64_t Weight) { return Dest.takeError(); } -Error InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW) { - for (auto &I : IPW.FunctionData) - for (auto &Func : I.getValue()) - if (Error E = addRecord(std::move(Func.second), 1)) - return E; - return Error::success(); -} - bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) { if (!Sparse) return true; diff --git a/test/tools/llvm-profdata/multiple-inputs.test b/test/tools/llvm-profdata/multiple-inputs.test index 399438a4d2d..40d11083c0c 100644 --- a/test/tools/llvm-profdata/multiple-inputs.test +++ b/test/tools/llvm-profdata/multiple-inputs.test @@ -51,43 +51,3 @@ DISJOINT-2: Block counts: [2, 3] DISJOINT: Total functions: 2 DISJOINT: Maximum function count: 1 DISJOINT: Maximum internal block count: 3 - -RUN: llvm-profdata merge %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \ -RUN: %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \ -RUN: -num-threads 2 -o %t -RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO4 -RUN: llvm-profdata merge %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \ -RUN: %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \ -RUN: -j 3 -o %t -RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO4 -FOO4: foo: -FOO4: Counters: 3 -FOO4: Function count: 4 -FOO4: Block counts: [8, 12] -FOO4: Total functions: 1 -FOO4: Maximum function count: 4 -FOO4: Maximum internal block count: 12 - -RUN: llvm-profdata merge %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \ -RUN: %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \ -RUN: %p/Inputs/foo3-1.proftext -j 2 -o %t -RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO5 -RUN: llvm-profdata merge %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \ -RUN: %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \ -RUN: %p/Inputs/foo3-1.proftext -j 3 -o %t -RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO5 -RUN: llvm-profdata merge %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \ -RUN: %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \ -RUN: %p/Inputs/foo3-1.proftext -o %t -RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO5 -RUN: llvm-profdata merge %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \ -RUN: %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \ -RUN: %p/Inputs/foo3-1.proftext -j 1 -o %t -RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO5 -FOO5: foo: -FOO5: Counters: 3 -FOO5: Function count: 5 -FOO5: Block counts: [10, 15] -FOO5: Total functions: 1 -FOO5: Maximum function count: 5 -FOO5: Maximum internal block count: 15 diff --git a/tools/llvm-profdata/llvm-profdata.cpp b/tools/llvm-profdata/llvm-profdata.cpp index e57b19f5a06..8e4b4c3d4ed 100644 --- a/tools/llvm-profdata/llvm-profdata.cpp +++ b/tools/llvm-profdata/llvm-profdata.cpp @@ -29,7 +29,6 @@ #include "llvm/Support/Path.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Signals.h" -#include "llvm/Support/ThreadPool.h" #include "llvm/Support/raw_ostream.h" #include @@ -118,68 +117,9 @@ struct WeightedFile { }; typedef SmallVector WeightedFileVector; -/// Keep track of merged data and reported errors. -struct WriterContext { - std::mutex Lock; - InstrProfWriter Writer; - Error Err; - StringRef ErrWhence; - std::mutex &ErrLock; - SmallSet &WriterErrorCodes; - - WriterContext(bool IsSparse, std::mutex &ErrLock, - SmallSet &WriterErrorCodes) - : Lock(), Writer(IsSparse), Err(Error::success()), ErrWhence(""), - ErrLock(ErrLock), WriterErrorCodes(WriterErrorCodes) {} -}; - -/// Load an input into a writer context. -static void loadInput(const WeightedFile &Input, WriterContext *WC) { - std::unique_lock CtxGuard{WC->Lock}; - - // If there's a pending hard error, don't do more work. - if (WC->Err) - return; - - WC->ErrWhence = Input.Filename; - - auto ReaderOrErr = InstrProfReader::create(Input.Filename); - if ((WC->Err = ReaderOrErr.takeError())) - return; - - auto Reader = std::move(ReaderOrErr.get()); - bool IsIRProfile = Reader->isIRLevelProfile(); - if (WC->Writer.setIsIRLevelProfile(IsIRProfile)) { - WC->Err = make_error( - "Merge IR generated profile with Clang generated profile.", - std::error_code()); - return; - } - - for (auto &I : *Reader) { - if (Error E = WC->Writer.addRecord(std::move(I), Input.Weight)) { - // Only show hint the first time an error occurs. - instrprof_error IPE = InstrProfError::take(std::move(E)); - std::unique_lock ErrGuard{WC->ErrLock}; - bool firstTime = WC->WriterErrorCodes.insert(IPE).second; - handleMergeWriterError(make_error(IPE), Input.Filename, - I.Name, firstTime); - } - } - if (Reader->hasError()) - WC->Err = Reader->getError(); -} - -/// Merge the \p Src writer context into \p Dst. -static void mergeWriterContexts(WriterContext *Dst, WriterContext *Src) { - if (Error E = Dst->Writer.mergeRecordsFromWriter(std::move(Src->Writer))) - Dst->Err = std::move(E); -} - static void mergeInstrProfile(const WeightedFileVector &Inputs, StringRef OutputFilename, - ProfileFormat OutputFormat, bool OutputSparse, - unsigned NumThreads) { + ProfileFormat OutputFormat, bool OutputSparse) { if (OutputFilename.compare("-") == 0) exitWithError("Cannot write indexed profdata format to stdout."); @@ -191,57 +131,30 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs, if (EC) exitWithErrorCode(EC, OutputFilename); - std::mutex ErrorLock; + InstrProfWriter Writer(OutputSparse); SmallSet WriterErrorCodes; - - // If NumThreads is not specified, auto-detect a good default. - if (NumThreads == 0) - NumThreads = std::max(1U, std::min(std::thread::hardware_concurrency(), - unsigned(Inputs.size() / 2))); - - // Initialize the writer contexts. - SmallVector, 4> Contexts; - for (unsigned I = 0; I < NumThreads; ++I) - Contexts.emplace_back(llvm::make_unique( - OutputSparse, ErrorLock, WriterErrorCodes)); - - if (NumThreads == 1) { - for (const auto &Input : Inputs) - loadInput(Input, Contexts[0].get()); - } else { - ThreadPool Pool(NumThreads); - - // Load the inputs in parallel (N/NumThreads serial steps). - unsigned Ctx = 0; - for (const auto &Input : Inputs) { - Pool.async(loadInput, Input, Contexts[Ctx].get()); - Ctx = (Ctx + 1) % NumThreads; + for (const auto &Input : Inputs) { + auto ReaderOrErr = InstrProfReader::create(Input.Filename); + if (Error E = ReaderOrErr.takeError()) + exitWithError(std::move(E), Input.Filename); + + auto Reader = std::move(ReaderOrErr.get()); + bool IsIRProfile = Reader->isIRLevelProfile(); + if (Writer.setIsIRLevelProfile(IsIRProfile)) + exitWithError("Merge IR generated profile with Clang generated profile."); + + for (auto &I : *Reader) { + if (Error E = Writer.addRecord(std::move(I), Input.Weight)) { + // Only show hint the first time an error occurs. + instrprof_error IPE = InstrProfError::take(std::move(E)); + bool firstTime = WriterErrorCodes.insert(IPE).second; + handleMergeWriterError(make_error(IPE), Input.Filename, + I.Name, firstTime); + } } - Pool.wait(); - - // Merge the writer contexts together (lg(NumThreads) serial steps). - unsigned Mid = Contexts.size() / 2; - unsigned End = Contexts.size(); - assert(Mid > 0 && "Expected more than one context"); - do { - for (unsigned I = 0; I < Mid; ++I) - Pool.async(mergeWriterContexts, Contexts[I].get(), - Contexts[I + Mid].get()); - if (End & 1) - Pool.async(mergeWriterContexts, Contexts[0].get(), - Contexts[End - 1].get()); - Pool.wait(); - End = Mid; - Mid /= 2; - } while (Mid > 0); + if (Reader->hasError()) + exitWithError(Reader->getError(), Input.Filename); } - - // Handle deferred hard errors encountered during merging. - for (std::unique_ptr &WC : Contexts) - if (WC->Err) - exitWithError(std::move(WC->Err), WC->ErrWhence); - - InstrProfWriter &Writer = Contexts[0]->Writer; if (OutputFormat == PF_Text) Writer.writeText(Output); else @@ -375,11 +288,6 @@ static int merge_main(int argc, const char *argv[]) { clEnumValEnd)); cl::opt OutputSparse("sparse", cl::init(false), cl::desc("Generate a sparse profile (only meaningful for -instr)")); - cl::opt NumThreads( - "num-threads", cl::init(0), - cl::desc("Number of merge threads to use (default: autodetect)")); - cl::alias NumThreadsA("j", cl::desc("Alias for --num-threads"), - cl::aliasopt(NumThreads)); cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n"); @@ -406,7 +314,7 @@ static int merge_main(int argc, const char *argv[]) { if (ProfileKind == instr) mergeInstrProfile(WeightedInputs, OutputFilename, OutputFormat, - OutputSparse, NumThreads); + OutputSparse); else mergeSampleProfile(WeightedInputs, OutputFilename, OutputFormat); diff --git a/unittests/ProfileData/InstrProfTest.cpp b/unittests/ProfileData/InstrProfTest.cpp index 4efb17ecc26..c13f31251de 100644 --- a/unittests/ProfileData/InstrProfTest.cpp +++ b/unittests/ProfileData/InstrProfTest.cpp @@ -204,31 +204,6 @@ TEST_F(InstrProfTest, get_profile_summary) { delete PSFromMD; } -TEST_F(InstrProfTest, test_writer_merge) { - InstrProfRecord Record1("func1", 0x1234, {42}); - NoError(Writer.addRecord(std::move(Record1))); - - InstrProfWriter Writer2; - InstrProfRecord Record2("func2", 0x1234, {0, 0}); - NoError(Writer2.addRecord(std::move(Record2))); - - NoError(Writer.mergeRecordsFromWriter(std::move(Writer2))); - - auto Profile = Writer.writeBuffer(); - readProfile(std::move(Profile)); - - Expected R = Reader->getInstrProfRecord("func1", 0x1234); - ASSERT_TRUE(NoError(R.takeError())); - ASSERT_EQ(1U, R->Counts.size()); - ASSERT_EQ(42U, R->Counts[0]); - - R = Reader->getInstrProfRecord("func2", 0x1234); - ASSERT_TRUE(NoError(R.takeError())); - ASSERT_EQ(2U, R->Counts.size()); - ASSERT_EQ(0U, R->Counts[0]); - ASSERT_EQ(0U, R->Counts[1]); -} - static const char callee1[] = "callee1"; static const char callee2[] = "callee2"; static const char callee3[] = "callee3"; -- 2.11.0