1326943Sdim//===- FuzzerMerge.h - merging corpa ----------------------------*- C++ -* ===//
2326943Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6326943Sdim//
7326943Sdim//===----------------------------------------------------------------------===//
8326943Sdim// Merging Corpora.
9326943Sdim//
10326943Sdim// The task:
11326943Sdim//   Take the existing corpus (possibly empty) and merge new inputs into
12326943Sdim//   it so that only inputs with new coverage ('features') are added.
13326943Sdim//   The process should tolerate the crashes, OOMs, leaks, etc.
14326943Sdim//
15326943Sdim// Algorithm:
16326943Sdim//   The outter process collects the set of files and writes their names
17326943Sdim//   into a temporary "control" file, then repeatedly launches the inner
18326943Sdim//   process until all inputs are processed.
19326943Sdim//   The outer process does not actually execute the target code.
20326943Sdim//
21326943Sdim//   The inner process reads the control file and sees a) list of all the inputs
22326943Sdim//   and b) the last processed input. Then it starts processing the inputs one
23326943Sdim//   by one. Before processing every input it writes one line to control file:
24326943Sdim//   STARTED INPUT_ID INPUT_SIZE
25326943Sdim//   After processing an input it write another line:
26326943Sdim//   DONE INPUT_ID Feature1 Feature2 Feature3 ...
27326943Sdim//   If a crash happens while processing an input the last line in the control
28326943Sdim//   file will be "STARTED INPUT_ID" and so the next process will know
29326943Sdim//   where to resume.
30326943Sdim//
31326943Sdim//   Once all inputs are processed by the innner process(es) the outer process
32326943Sdim//   reads the control files and does the merge based entirely on the contents
33326943Sdim//   of control file.
34326943Sdim//   It uses a single pass greedy algorithm choosing first the smallest inputs
35326943Sdim//   within the same size the inputs that have more new features.
36326943Sdim//
37326943Sdim//===----------------------------------------------------------------------===//
38326943Sdim
39326943Sdim#ifndef LLVM_FUZZER_MERGE_H
40326943Sdim#define LLVM_FUZZER_MERGE_H
41326943Sdim
42326943Sdim#include "FuzzerDefs.h"
43326943Sdim
44326943Sdim#include <istream>
45326943Sdim#include <ostream>
46326943Sdim#include <set>
47326943Sdim#include <vector>
48326943Sdim
49326943Sdimnamespace fuzzer {
50326943Sdim
51326943Sdimstruct MergeFileInfo {
52326943Sdim  std::string Name;
53326943Sdim  size_t Size = 0;
54353358Sdim  Vector<uint32_t> Features, Cov;
55326943Sdim};
56326943Sdim
57326943Sdimstruct Merger {
58326943Sdim  Vector<MergeFileInfo> Files;
59326943Sdim  size_t NumFilesInFirstCorpus = 0;
60326943Sdim  size_t FirstNotProcessedFile = 0;
61326943Sdim  std::string LastFailure;
62326943Sdim
63326943Sdim  bool Parse(std::istream &IS, bool ParseCoverage);
64326943Sdim  bool Parse(const std::string &Str, bool ParseCoverage);
65326943Sdim  void ParseOrExit(std::istream &IS, bool ParseCoverage);
66353358Sdim  size_t Merge(const Set<uint32_t> &InitialFeatures, Set<uint32_t> *NewFeatures,
67353358Sdim               const Set<uint32_t> &InitialCov, Set<uint32_t> *NewCov,
68326943Sdim               Vector<std::string> *NewFiles);
69326943Sdim  size_t ApproximateMemoryConsumption() const;
70326943Sdim  Set<uint32_t> AllFeatures() const;
71326943Sdim};
72326943Sdim
73353358Sdimvoid CrashResistantMerge(const Vector<std::string> &Args,
74353358Sdim                         const Vector<SizedFile> &OldCorpus,
75353358Sdim                         const Vector<SizedFile> &NewCorpus,
76353358Sdim                         Vector<std::string> *NewFiles,
77353358Sdim                         const Set<uint32_t> &InitialFeatures,
78353358Sdim                         Set<uint32_t> *NewFeatures,
79353358Sdim                         const Set<uint32_t> &InitialCov,
80353358Sdim                         Set<uint32_t> *NewCov,
81353358Sdim                         const std::string &CFPath,
82353358Sdim                         bool Verbose);
83353358Sdim
84326943Sdim}  // namespace fuzzer
85326943Sdim
86326943Sdim#endif  // LLVM_FUZZER_MERGE_H
87