1//===- FuzzerMerge.h - merging corpa ----------------------------*- C++ -* ===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// Merging Corpora.
9//
10// The task:
11//   Take the existing corpus (possibly empty) and merge new inputs into
12//   it so that only inputs with new coverage ('features') are added.
13//   The process should tolerate the crashes, OOMs, leaks, etc.
14//
15// Algorithm:
16//   The outer process collects the set of files and writes their names
17//   into a temporary "control" file, then repeatedly launches the inner
18//   process until all inputs are processed.
19//   The outer process does not actually execute the target code.
20//
21//   The inner process reads the control file and sees a) list of all the inputs
22//   and b) the last processed input. Then it starts processing the inputs one
23//   by one. Before processing every input it writes one line to control file:
24//   STARTED INPUT_ID INPUT_SIZE
25//   After processing an input it writes the following lines:
26//   FT INPUT_ID Feature1 Feature2 Feature3 ...
27//   COV INPUT_ID Coverage1 Coverage2 Coverage3 ...
28//   If a crash happens while processing an input the last line in the control
29//   file will be "STARTED INPUT_ID" and so the next process will know
30//   where to resume.
31//
32//   Once all inputs are processed by the inner process(es) the outer process
33//   reads the control files and does the merge based entirely on the contents
34//   of control file.
35//   It uses a single pass greedy algorithm choosing first the smallest inputs
36//   within the same size the inputs that have more new features.
37//
38//===----------------------------------------------------------------------===//
39
40#ifndef LLVM_FUZZER_MERGE_H
41#define LLVM_FUZZER_MERGE_H
42
43#include "FuzzerDefs.h"
44
45#include <istream>
46#include <ostream>
47#include <set>
48#include <vector>
49
50namespace fuzzer {
51
52struct MergeFileInfo {
53  std::string Name;
54  size_t Size = 0;
55  Vector<uint32_t> Features, Cov;
56};
57
58struct Merger {
59  Vector<MergeFileInfo> Files;
60  size_t NumFilesInFirstCorpus = 0;
61  size_t FirstNotProcessedFile = 0;
62  std::string LastFailure;
63
64  bool Parse(std::istream &IS, bool ParseCoverage);
65  bool Parse(const std::string &Str, bool ParseCoverage);
66  void ParseOrExit(std::istream &IS, bool ParseCoverage);
67  size_t Merge(const Set<uint32_t> &InitialFeatures, Set<uint32_t> *NewFeatures,
68               const Set<uint32_t> &InitialCov, Set<uint32_t> *NewCov,
69               Vector<std::string> *NewFiles);
70  size_t ApproximateMemoryConsumption() const;
71  Set<uint32_t> AllFeatures() const;
72};
73
74void CrashResistantMerge(const Vector<std::string> &Args,
75                         const Vector<SizedFile> &OldCorpus,
76                         const Vector<SizedFile> &NewCorpus,
77                         Vector<std::string> *NewFiles,
78                         const Set<uint32_t> &InitialFeatures,
79                         Set<uint32_t> *NewFeatures,
80                         const Set<uint32_t> &InitialCov,
81                         Set<uint32_t> *NewCov,
82                         const std::string &CFPath,
83                         bool Verbose);
84
85}  // namespace fuzzer
86
87#endif  // LLVM_FUZZER_MERGE_H
88