1//===- FuzzerFork.cpp - run fuzzing in separate subprocesses --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// Spawn and orchestrate separate fuzzing processes.
9//===----------------------------------------------------------------------===//
10
11#include "FuzzerCommand.h"
12#include "FuzzerFork.h"
13#include "FuzzerIO.h"
14#include "FuzzerInternal.h"
15#include "FuzzerMerge.h"
16#include "FuzzerSHA1.h"
17#include "FuzzerTracePC.h"
18#include "FuzzerUtil.h"
19
20#include <atomic>
21#include <chrono>
22#include <condition_variable>
23#include <fstream>
24#include <memory>
25#include <mutex>
26#include <queue>
27#include <sstream>
28#include <thread>
29
30namespace fuzzer {
31
32struct Stats {
33  size_t number_of_executed_units = 0;
34  size_t peak_rss_mb = 0;
35  size_t average_exec_per_sec = 0;
36};
37
38static Stats ParseFinalStatsFromLog(const std::string &LogPath) {
39  std::ifstream In(LogPath);
40  std::string Line;
41  Stats Res;
42  struct {
43    const char *Name;
44    size_t *Var;
45  } NameVarPairs[] = {
46      {"stat::number_of_executed_units:", &Res.number_of_executed_units},
47      {"stat::peak_rss_mb:", &Res.peak_rss_mb},
48      {"stat::average_exec_per_sec:", &Res.average_exec_per_sec},
49      {nullptr, nullptr},
50  };
51  while (std::getline(In, Line, '\n')) {
52    if (Line.find("stat::") != 0) continue;
53    std::istringstream ISS(Line);
54    std::string Name;
55    size_t Val;
56    ISS >> Name >> Val;
57    for (size_t i = 0; NameVarPairs[i].Name; i++)
58      if (Name == NameVarPairs[i].Name)
59        *NameVarPairs[i].Var = Val;
60  }
61  return Res;
62}
63
64struct FuzzJob {
65  // Inputs.
66  Command Cmd;
67  std::string CorpusDir;
68  std::string FeaturesDir;
69  std::string LogPath;
70  std::string SeedListPath;
71  std::string CFPath;
72  size_t      JobId;
73
74  int         DftTimeInSeconds = 0;
75
76  // Fuzzing Outputs.
77  int ExitCode;
78
79  ~FuzzJob() {
80    RemoveFile(CFPath);
81    RemoveFile(LogPath);
82    RemoveFile(SeedListPath);
83    RmDirRecursive(CorpusDir);
84    RmDirRecursive(FeaturesDir);
85  }
86};
87
88struct GlobalEnv {
89  Vector<std::string> Args;
90  Vector<std::string> CorpusDirs;
91  std::string MainCorpusDir;
92  std::string TempDir;
93  std::string DFTDir;
94  std::string DataFlowBinary;
95  Set<uint32_t> Features, Cov;
96  Set<std::string> FilesWithDFT;
97  Vector<std::string> Files;
98  Random *Rand;
99  std::chrono::system_clock::time_point ProcessStartTime;
100  int Verbosity = 0;
101
102  size_t NumTimeouts = 0;
103  size_t NumOOMs = 0;
104  size_t NumCrashes = 0;
105
106
107  size_t NumRuns = 0;
108
109  std::string StopFile() { return DirPlusFile(TempDir, "STOP"); }
110
111  size_t secondsSinceProcessStartUp() const {
112    return std::chrono::duration_cast<std::chrono::seconds>(
113               std::chrono::system_clock::now() - ProcessStartTime)
114        .count();
115  }
116
117  FuzzJob *CreateNewJob(size_t JobId) {
118    Command Cmd(Args);
119    Cmd.removeFlag("fork");
120    Cmd.removeFlag("runs");
121    Cmd.removeFlag("collect_data_flow");
122    for (auto &C : CorpusDirs) // Remove all corpora from the args.
123      Cmd.removeArgument(C);
124    Cmd.addFlag("reload", "0");  // working in an isolated dir, no reload.
125    Cmd.addFlag("print_final_stats", "1");
126    Cmd.addFlag("print_funcs", "0");  // no need to spend time symbolizing.
127    Cmd.addFlag("max_total_time", std::to_string(std::min((size_t)300, JobId)));
128    Cmd.addFlag("stop_file", StopFile());
129    if (!DataFlowBinary.empty()) {
130      Cmd.addFlag("data_flow_trace", DFTDir);
131      if (!Cmd.hasFlag("focus_function"))
132        Cmd.addFlag("focus_function", "auto");
133    }
134    auto Job = new FuzzJob;
135    std::string Seeds;
136    if (size_t CorpusSubsetSize =
137            std::min(Files.size(), (size_t)sqrt(Files.size() + 2))) {
138      auto Time1 = std::chrono::system_clock::now();
139      for (size_t i = 0; i < CorpusSubsetSize; i++) {
140        auto &SF = Files[Rand->SkewTowardsLast(Files.size())];
141        Seeds += (Seeds.empty() ? "" : ",") + SF;
142        CollectDFT(SF);
143      }
144      auto Time2 = std::chrono::system_clock::now();
145      Job->DftTimeInSeconds = duration_cast<seconds>(Time2 - Time1).count();
146    }
147    if (!Seeds.empty()) {
148      Job->SeedListPath =
149          DirPlusFile(TempDir, std::to_string(JobId) + ".seeds");
150      WriteToFile(Seeds, Job->SeedListPath);
151      Cmd.addFlag("seed_inputs", "@" + Job->SeedListPath);
152    }
153    Job->LogPath = DirPlusFile(TempDir, std::to_string(JobId) + ".log");
154    Job->CorpusDir = DirPlusFile(TempDir, "C" + std::to_string(JobId));
155    Job->FeaturesDir = DirPlusFile(TempDir, "F" + std::to_string(JobId));
156    Job->CFPath = DirPlusFile(TempDir, std::to_string(JobId) + ".merge");
157    Job->JobId = JobId;
158
159
160    Cmd.addArgument(Job->CorpusDir);
161    Cmd.addFlag("features_dir", Job->FeaturesDir);
162
163    for (auto &D : {Job->CorpusDir, Job->FeaturesDir}) {
164      RmDirRecursive(D);
165      MkDir(D);
166    }
167
168    Cmd.setOutputFile(Job->LogPath);
169    Cmd.combineOutAndErr();
170
171    Job->Cmd = Cmd;
172
173    if (Verbosity >= 2)
174      Printf("Job %zd/%p Created: %s\n", JobId, Job,
175             Job->Cmd.toString().c_str());
176    // Start from very short runs and gradually increase them.
177    return Job;
178  }
179
180  void RunOneMergeJob(FuzzJob *Job) {
181    auto Stats = ParseFinalStatsFromLog(Job->LogPath);
182    NumRuns += Stats.number_of_executed_units;
183
184    Vector<SizedFile> TempFiles, MergeCandidates;
185    // Read all newly created inputs and their feature sets.
186    // Choose only those inputs that have new features.
187    GetSizedFilesFromDir(Job->CorpusDir, &TempFiles);
188    std::sort(TempFiles.begin(), TempFiles.end());
189    for (auto &F : TempFiles) {
190      auto FeatureFile = F.File;
191      FeatureFile.replace(0, Job->CorpusDir.size(), Job->FeaturesDir);
192      auto FeatureBytes = FileToVector(FeatureFile, 0, false);
193      assert((FeatureBytes.size() % sizeof(uint32_t)) == 0);
194      Vector<uint32_t> NewFeatures(FeatureBytes.size() / sizeof(uint32_t));
195      memcpy(NewFeatures.data(), FeatureBytes.data(), FeatureBytes.size());
196      for (auto Ft : NewFeatures) {
197        if (!Features.count(Ft)) {
198          MergeCandidates.push_back(F);
199          break;
200        }
201      }
202    }
203    // if (!FilesToAdd.empty() || Job->ExitCode != 0)
204    Printf("#%zd: cov: %zd ft: %zd corp: %zd exec/s %zd "
205           "oom/timeout/crash: %zd/%zd/%zd time: %zds job: %zd dft_time: %d\n",
206           NumRuns, Cov.size(), Features.size(), Files.size(),
207           Stats.average_exec_per_sec, NumOOMs, NumTimeouts, NumCrashes,
208           secondsSinceProcessStartUp(), Job->JobId, Job->DftTimeInSeconds);
209
210    if (MergeCandidates.empty()) return;
211
212    Vector<std::string> FilesToAdd;
213    Set<uint32_t> NewFeatures, NewCov;
214    CrashResistantMerge(Args, {}, MergeCandidates, &FilesToAdd, Features,
215                        &NewFeatures, Cov, &NewCov, Job->CFPath, false);
216    for (auto &Path : FilesToAdd) {
217      auto U = FileToVector(Path);
218      auto NewPath = DirPlusFile(MainCorpusDir, Hash(U));
219      WriteToFile(U, NewPath);
220      Files.push_back(NewPath);
221    }
222    Features.insert(NewFeatures.begin(), NewFeatures.end());
223    Cov.insert(NewCov.begin(), NewCov.end());
224    for (auto Idx : NewCov)
225      if (auto *TE = TPC.PCTableEntryByIdx(Idx))
226        if (TPC.PcIsFuncEntry(TE))
227          PrintPC("  NEW_FUNC: %p %F %L\n", "",
228                  TPC.GetNextInstructionPc(TE->PC));
229
230  }
231
232
233  void CollectDFT(const std::string &InputPath) {
234    if (DataFlowBinary.empty()) return;
235    if (!FilesWithDFT.insert(InputPath).second) return;
236    Command Cmd(Args);
237    Cmd.removeFlag("fork");
238    Cmd.removeFlag("runs");
239    Cmd.addFlag("data_flow_trace", DFTDir);
240    Cmd.addArgument(InputPath);
241    for (auto &C : CorpusDirs) // Remove all corpora from the args.
242      Cmd.removeArgument(C);
243    Cmd.setOutputFile(DirPlusFile(TempDir, "dft.log"));
244    Cmd.combineOutAndErr();
245    // Printf("CollectDFT: %s\n", Cmd.toString().c_str());
246    ExecuteCommand(Cmd);
247  }
248
249};
250
251struct JobQueue {
252  std::queue<FuzzJob *> Qu;
253  std::mutex Mu;
254  std::condition_variable Cv;
255
256  void Push(FuzzJob *Job) {
257    {
258      std::lock_guard<std::mutex> Lock(Mu);
259      Qu.push(Job);
260    }
261    Cv.notify_one();
262  }
263  FuzzJob *Pop() {
264    std::unique_lock<std::mutex> Lk(Mu);
265    // std::lock_guard<std::mutex> Lock(Mu);
266    Cv.wait(Lk, [&]{return !Qu.empty();});
267    assert(!Qu.empty());
268    auto Job = Qu.front();
269    Qu.pop();
270    return Job;
271  }
272};
273
274void WorkerThread(JobQueue *FuzzQ, JobQueue *MergeQ) {
275  while (auto Job = FuzzQ->Pop()) {
276    // Printf("WorkerThread: job %p\n", Job);
277    Job->ExitCode = ExecuteCommand(Job->Cmd);
278    MergeQ->Push(Job);
279  }
280}
281
282// This is just a skeleton of an experimental -fork=1 feature.
283void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
284                  const Vector<std::string> &Args,
285                  const Vector<std::string> &CorpusDirs, int NumJobs) {
286  Printf("INFO: -fork=%d: fuzzing in separate process(s)\n", NumJobs);
287
288  GlobalEnv Env;
289  Env.Args = Args;
290  Env.CorpusDirs = CorpusDirs;
291  Env.Rand = &Rand;
292  Env.Verbosity = Options.Verbosity;
293  Env.ProcessStartTime = std::chrono::system_clock::now();
294  Env.DataFlowBinary = Options.CollectDataFlow;
295
296  Vector<SizedFile> SeedFiles;
297  for (auto &Dir : CorpusDirs)
298    GetSizedFilesFromDir(Dir, &SeedFiles);
299  std::sort(SeedFiles.begin(), SeedFiles.end());
300  Env.TempDir = TempPath(".dir");
301  Env.DFTDir = DirPlusFile(Env.TempDir, "DFT");
302  RmDirRecursive(Env.TempDir);  // in case there is a leftover from old runs.
303  MkDir(Env.TempDir);
304  MkDir(Env.DFTDir);
305
306
307  if (CorpusDirs.empty())
308    MkDir(Env.MainCorpusDir = DirPlusFile(Env.TempDir, "C"));
309  else
310    Env.MainCorpusDir = CorpusDirs[0];
311
312  auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");
313  CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features,
314                      {}, &Env.Cov,
315                      CFPath, false);
316  RemoveFile(CFPath);
317  Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs,
318         Env.Files.size(), Env.TempDir.c_str());
319
320  int ExitCode = 0;
321
322  JobQueue FuzzQ, MergeQ;
323
324  auto StopJobs = [&]() {
325    for (int i = 0; i < NumJobs; i++)
326      FuzzQ.Push(nullptr);
327    MergeQ.Push(nullptr);
328    WriteToFile(Unit({1}), Env.StopFile());
329  };
330
331  size_t JobId = 1;
332  Vector<std::thread> Threads;
333  for (int t = 0; t < NumJobs; t++) {
334    Threads.push_back(std::thread(WorkerThread, &FuzzQ, &MergeQ));
335    FuzzQ.Push(Env.CreateNewJob(JobId++));
336  }
337
338  while (true) {
339    std::unique_ptr<FuzzJob> Job(MergeQ.Pop());
340    if (!Job)
341      break;
342    ExitCode = Job->ExitCode;
343    if (ExitCode == Options.InterruptExitCode) {
344      Printf("==%lu== libFuzzer: a child was interrupted; exiting\n", GetPid());
345      StopJobs();
346      break;
347    }
348    Fuzzer::MaybeExitGracefully();
349
350    Env.RunOneMergeJob(Job.get());
351
352    // Continue if our crash is one of the ignorred ones.
353    if (Options.IgnoreTimeouts && ExitCode == Options.TimeoutExitCode)
354      Env.NumTimeouts++;
355    else if (Options.IgnoreOOMs && ExitCode == Options.OOMExitCode)
356      Env.NumOOMs++;
357    else if (ExitCode != 0) {
358      Env.NumCrashes++;
359      if (Options.IgnoreCrashes) {
360        std::ifstream In(Job->LogPath);
361        std::string Line;
362        while (std::getline(In, Line, '\n'))
363          if (Line.find("ERROR:") != Line.npos ||
364              Line.find("runtime error:") != Line.npos)
365            Printf("%s\n", Line.c_str());
366      } else {
367        // And exit if we don't ignore this crash.
368        Printf("INFO: log from the inner process:\n%s",
369               FileToString(Job->LogPath).c_str());
370        StopJobs();
371        break;
372      }
373    }
374
375    // Stop if we are over the time budget.
376    // This is not precise, since other threads are still running
377    // and we will wait while joining them.
378    // We also don't stop instantly: other jobs need to finish.
379    if (Options.MaxTotalTimeSec > 0 &&
380        Env.secondsSinceProcessStartUp() >= (size_t)Options.MaxTotalTimeSec) {
381      Printf("INFO: fuzzed for %zd seconds, wrapping up soon\n",
382             Env.secondsSinceProcessStartUp());
383      StopJobs();
384      break;
385    }
386    if (Env.NumRuns >= Options.MaxNumberOfRuns) {
387      Printf("INFO: fuzzed for %zd iterations, wrapping up soon\n",
388             Env.NumRuns);
389      StopJobs();
390      break;
391    }
392
393    FuzzQ.Push(Env.CreateNewJob(JobId++));
394  }
395
396  for (auto &T : Threads)
397    T.join();
398
399  // The workers have terminated. Don't try to remove the directory before they
400  // terminate to avoid a race condition preventing cleanup on Windows.
401  RmDirRecursive(Env.TempDir);
402
403  // Use the exit code from the last child process.
404  Printf("INFO: exiting: %d time: %zds\n", ExitCode,
405         Env.secondsSinceProcessStartUp());
406  exit(ExitCode);
407}
408
409} // namespace fuzzer
410