1326943Sdim//===- FuzzerMerge.h - merging corpa ----------------------------*- C++ -* ===// 2326943Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6326943Sdim// 7326943Sdim//===----------------------------------------------------------------------===// 8326943Sdim// Merging Corpora. 9326943Sdim// 10326943Sdim// The task: 11326943Sdim// Take the existing corpus (possibly empty) and merge new inputs into 12326943Sdim// it so that only inputs with new coverage ('features') are added. 13326943Sdim// The process should tolerate the crashes, OOMs, leaks, etc. 14326943Sdim// 15326943Sdim// Algorithm: 16326943Sdim// The outter process collects the set of files and writes their names 17326943Sdim// into a temporary "control" file, then repeatedly launches the inner 18326943Sdim// process until all inputs are processed. 19326943Sdim// The outer process does not actually execute the target code. 20326943Sdim// 21326943Sdim// The inner process reads the control file and sees a) list of all the inputs 22326943Sdim// and b) the last processed input. Then it starts processing the inputs one 23326943Sdim// by one. Before processing every input it writes one line to control file: 24326943Sdim// STARTED INPUT_ID INPUT_SIZE 25326943Sdim// After processing an input it write another line: 26326943Sdim// DONE INPUT_ID Feature1 Feature2 Feature3 ... 27326943Sdim// If a crash happens while processing an input the last line in the control 28326943Sdim// file will be "STARTED INPUT_ID" and so the next process will know 29326943Sdim// where to resume. 30326943Sdim// 31326943Sdim// Once all inputs are processed by the innner process(es) the outer process 32326943Sdim// reads the control files and does the merge based entirely on the contents 33326943Sdim// of control file. 34326943Sdim// It uses a single pass greedy algorithm choosing first the smallest inputs 35326943Sdim// within the same size the inputs that have more new features. 36326943Sdim// 37326943Sdim//===----------------------------------------------------------------------===// 38326943Sdim 39326943Sdim#ifndef LLVM_FUZZER_MERGE_H 40326943Sdim#define LLVM_FUZZER_MERGE_H 41326943Sdim 42326943Sdim#include "FuzzerDefs.h" 43326943Sdim 44326943Sdim#include <istream> 45326943Sdim#include <ostream> 46326943Sdim#include <set> 47326943Sdim#include <vector> 48326943Sdim 49326943Sdimnamespace fuzzer { 50326943Sdim 51326943Sdimstruct MergeFileInfo { 52326943Sdim std::string Name; 53326943Sdim size_t Size = 0; 54353358Sdim Vector<uint32_t> Features, Cov; 55326943Sdim}; 56326943Sdim 57326943Sdimstruct Merger { 58326943Sdim Vector<MergeFileInfo> Files; 59326943Sdim size_t NumFilesInFirstCorpus = 0; 60326943Sdim size_t FirstNotProcessedFile = 0; 61326943Sdim std::string LastFailure; 62326943Sdim 63326943Sdim bool Parse(std::istream &IS, bool ParseCoverage); 64326943Sdim bool Parse(const std::string &Str, bool ParseCoverage); 65326943Sdim void ParseOrExit(std::istream &IS, bool ParseCoverage); 66353358Sdim size_t Merge(const Set<uint32_t> &InitialFeatures, Set<uint32_t> *NewFeatures, 67353358Sdim const Set<uint32_t> &InitialCov, Set<uint32_t> *NewCov, 68326943Sdim Vector<std::string> *NewFiles); 69326943Sdim size_t ApproximateMemoryConsumption() const; 70326943Sdim Set<uint32_t> AllFeatures() const; 71326943Sdim}; 72326943Sdim 73353358Sdimvoid CrashResistantMerge(const Vector<std::string> &Args, 74353358Sdim const Vector<SizedFile> &OldCorpus, 75353358Sdim const Vector<SizedFile> &NewCorpus, 76353358Sdim Vector<std::string> *NewFiles, 77353358Sdim const Set<uint32_t> &InitialFeatures, 78353358Sdim Set<uint32_t> *NewFeatures, 79353358Sdim const Set<uint32_t> &InitialCov, 80353358Sdim Set<uint32_t> *NewCov, 81353358Sdim const std::string &CFPath, 82353358Sdim bool Verbose); 83353358Sdim 84326943Sdim} // namespace fuzzer 85326943Sdim 86326943Sdim#endif // LLVM_FUZZER_MERGE_H 87