From a6b3c30ea293de16f5ee9aa73f09cfebeb5bf6b0 Mon Sep 17 00:00:00 2001 From: Kostya Serebryany Date: Wed, 21 Sep 2016 21:41:48 +0000 Subject: [PATCH] [libFuzzer] more refactoring; don't compute sha1sum every time we mutate a unit from the corpus, use the stored one. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@282115 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Fuzzer/FuzzerCorpus.h | 51 +++++++++++++++++++++++++++++++------- lib/Fuzzer/FuzzerInternal.h | 15 ++--------- lib/Fuzzer/FuzzerLoop.cpp | 31 +++-------------------- lib/Fuzzer/test/FuzzerUnittest.cpp | 13 ++++------ 4 files changed, 53 insertions(+), 57 deletions(-) diff --git a/lib/Fuzzer/FuzzerCorpus.h b/lib/Fuzzer/FuzzerCorpus.h index 877a483a539..d42e7be475b 100644 --- a/lib/Fuzzer/FuzzerCorpus.h +++ b/lib/Fuzzer/FuzzerCorpus.h @@ -12,22 +12,26 @@ #ifndef LLVM_FUZZER_CORPUS #define LLVM_FUZZER_CORPUS +#include + #include "FuzzerDefs.h" +#include "FuzzerRandom.h" namespace fuzzer { struct InputInfo { Unit U; // The actual input data. + uint8_t Sha1[kSHA1NumBytes]; // Checksum. }; class InputCorpus { public: InputCorpus() { - Corpus.reserve(1 << 14); // Avoid too many resizes. + Inputs.reserve(1 << 14); // Avoid too many resizes. } - size_t size() const { return Corpus.size(); } - bool empty() const { return Corpus.empty(); } - const Unit &operator[] (size_t Idx) const { return Corpus[Idx].U; } + size_t size() const { return Inputs.size(); } + bool empty() const { return Inputs.empty(); } + const Unit &operator[] (size_t Idx) const { return Inputs[Idx].U; } void Append(const std::vector &V) { for (auto &U : V) push_back(U); @@ -37,18 +41,47 @@ class InputCorpus { if (!Hashes.insert(H).second) return; InputInfo II; II.U = U; - Corpus.push_back(II); + memcpy(II.Sha1, H.data(), kSHA1NumBytes); + Inputs.push_back(II); + UpdateCorpusDistribution(); } typedef const std::vector::const_iterator ConstIter; - ConstIter begin() const { return Corpus.begin(); } - ConstIter end() const { return Corpus.end(); } + ConstIter begin() const { return Inputs.begin(); } + ConstIter end() const { return Inputs.end(); } bool HasUnit(const Unit &U) { return Hashes.count(Hash(U)); } + const InputInfo &ChooseUnitToMutate(Random &Rand) { + return Inputs[ChooseUnitIdxToMutate(Rand)]; + }; + + // Returns an index of random unit from the corpus to mutate. + // Hypothesis: units added to the corpus last are more likely to be + // interesting. This function gives more weight to the more recent units. + size_t ChooseUnitIdxToMutate(Random &Rand) { + size_t Idx = + static_cast(CorpusDistribution(Rand.Get_mt19937())); + assert(Idx < Inputs.size()); + return Idx; + } + +private: + + // Updates the probability distribution for the units in the corpus. + // Must be called whenever the corpus or unit weights are changed. + void UpdateCorpusDistribution() { + size_t N = Inputs.size(); + std::vector Intervals(N + 1); + std::vector Weights(N); + std::iota(Intervals.begin(), Intervals.end(), 0); + std::iota(Weights.begin(), Weights.end(), 1); + CorpusDistribution = std::piecewise_constant_distribution( + Intervals.begin(), Intervals.end(), Weights.begin()); + } + std::piecewise_constant_distribution CorpusDistribution; - private: std::unordered_set Hashes; - std::vector Corpus; + std::vector Inputs; }; } // namespace fuzzer diff --git a/lib/Fuzzer/FuzzerInternal.h b/lib/Fuzzer/FuzzerInternal.h index fc77f1d79fb..bc3f61a6e85 100644 --- a/lib/Fuzzer/FuzzerInternal.h +++ b/lib/Fuzzer/FuzzerInternal.h @@ -17,7 +17,6 @@ #include #include #include -#include #include #include @@ -26,7 +25,7 @@ #include "FuzzerInterface.h" #include "FuzzerOptions.h" #include "FuzzerValueBitMap.h" -#include "FuzzerCorpus.h" // TODO(kcc): remove this from here. +#include "FuzzerCorpus.h" namespace fuzzer { @@ -67,12 +66,7 @@ public: Fuzzer(UserCallback CB, MutationDispatcher &MD, FuzzingOptions Options); ~Fuzzer(); - void AddToCorpus(const Unit &U) { - Corpus.push_back(U); - UpdateCorpusDistribution(); - } - size_t ChooseUnitIdxToMutate(); - const Unit &ChooseUnitToMutate() { return Corpus[ChooseUnitIdxToMutate()]; }; + void AddToCorpus(const Unit &U) { Corpus.push_back(U); } void Loop(); void ShuffleAndMinimize(UnitVector *V); void InitializeTraceState(); @@ -132,10 +126,6 @@ private: void TryDetectingAMemoryLeak(const uint8_t *Data, size_t Size, bool DuringInitialCorpusExecution); - // Updates the probability distribution for the units in the corpus. - // Must be called whenever the corpus or unit weights are changed. - void UpdateCorpusDistribution(); - bool UpdateMaxCoverage(); // Trace-based fuzzing: we run a unit with some kind of tracing @@ -170,7 +160,6 @@ private: InputCorpus Corpus; - std::piecewise_constant_distribution CorpusDistribution; UserCallback CB; MutationDispatcher &MD; FuzzingOptions Options; diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp index b68185ba3b5..0edc21db3ac 100644 --- a/lib/Fuzzer/FuzzerLoop.cpp +++ b/lib/Fuzzer/FuzzerLoop.cpp @@ -374,7 +374,6 @@ void Fuzzer::RereadOutputCorpus(size_t MaxSize) { if (!Corpus.HasUnit(X)) { if (RunOne(X)) { Corpus.push_back(X); - UpdateCorpusDistribution(); PrintStats("RELOAD"); } } @@ -404,7 +403,6 @@ void Fuzzer::ShuffleAndMinimize(UnitVector *InitialCorpus) { TryDetectingAMemoryLeak(U.data(), U.size(), /*DuringInitialCorpusExecution*/ true); } - UpdateCorpusDistribution(); PrintStats("INITED"); if (Corpus.empty()) { Printf("ERROR: no interesting inputs were found. " @@ -543,7 +541,6 @@ void Fuzzer::PrintNewPCs() { void Fuzzer::ReportNewCoverage(const Unit &U) { Corpus.push_back(U); - UpdateCorpusDistribution(); MD.RecordSuccessfulMutationSequence(); PrintStatusForNewUnit(U); WriteToOutputCorpus(U); @@ -656,8 +653,9 @@ void Fuzzer::MutateAndTestOne() { LazyAllocateCurrentUnitData(); MD.StartMutationSequence(); - auto &U = ChooseUnitToMutate(); - ComputeSHA1(U.data(), U.size(), BaseSha1); // Remember where we started. + const auto &II = Corpus.ChooseUnitToMutate(MD.GetRand()); + const auto &U = II.U; + memcpy(BaseSha1, II.Sha1, sizeof(BaseSha1)); assert(CurrentUnitData); size_t Size = U.size(); assert(Size <= Options.MaxLen && "Oversized Unit"); @@ -667,8 +665,7 @@ void Fuzzer::MutateAndTestOne() { size_t NewSize = 0; NewSize = MD.Mutate(CurrentUnitData, Size, Options.MaxLen); assert(NewSize > 0 && "Mutator returned empty unit"); - assert(NewSize <= Options.MaxLen && - "Mutator return overisized unit"); + assert(NewSize <= Options.MaxLen && "Mutator return overisized unit"); Size = NewSize; if (i == 0) StartTraceRecording(); @@ -679,16 +676,6 @@ void Fuzzer::MutateAndTestOne() { } } -// Returns an index of random unit from the corpus to mutate. -// Hypothesis: units added to the corpus last are more likely to be interesting. -// This function gives more weight to the more recent units. -size_t Fuzzer::ChooseUnitIdxToMutate() { - size_t Idx = - static_cast(CorpusDistribution(MD.GetRand().Get_mt19937())); - assert(Idx < Corpus.size()); - return Idx; -} - void Fuzzer::ResetCoverage() { ResetEdgeCoverage(); MaxCoverage.Reset(); @@ -720,16 +707,6 @@ void Fuzzer::Loop() { MD.PrintRecommendedDictionary(); } -void Fuzzer::UpdateCorpusDistribution() { - size_t N = Corpus.size(); - std::vector Intervals(N + 1); - std::vector Weights(N); - std::iota(Intervals.begin(), Intervals.end(), 0); - std::iota(Weights.begin(), Weights.end(), 1); - CorpusDistribution = std::piecewise_constant_distribution( - Intervals.begin(), Intervals.end(), Weights.begin()); -} - } // namespace fuzzer extern "C" { diff --git a/lib/Fuzzer/test/FuzzerUnittest.cpp b/lib/Fuzzer/test/FuzzerUnittest.cpp index 383849e59b1..4bad901ac59 100644 --- a/lib/Fuzzer/test/FuzzerUnittest.cpp +++ b/lib/Fuzzer/test/FuzzerUnittest.cpp @@ -577,19 +577,16 @@ TEST(FuzzerUtil, Base64) { } TEST(Corpus, Distribution) { - std::unique_ptr t(new ExternalFunctions()); - fuzzer::EF = t.get(); Random Rand(0); - MutationDispatcher MD(Rand, {}); - Fuzzer Fuzz(LLVMFuzzerTestOneInput, MD, {}); + InputCorpus C; size_t N = 10; size_t TriesPerUnit = 1<<20; - for (size_t i = 0; i < N; i++) { - Fuzz.AddToCorpus(Unit{ static_cast(i) }); - } + for (size_t i = 0; i < N; i++) + C.push_back(Unit{ static_cast(i) }); + std::vector Hist(N); for (size_t i = 0; i < N * TriesPerUnit; i++) { - Hist[Fuzz.ChooseUnitIdxToMutate()]++; + Hist[C.ChooseUnitIdxToMutate(Rand)]++; } for (size_t i = 0; i < N; i++) { // A weak sanity check that every unit gets invoked. -- 2.11.0