1//===- split-file.cpp - Input splitting utility ---------------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// Split input into multipe parts separated by regex '^(.|//)--- ' and extract 10// the specified part. 11// 12//===----------------------------------------------------------------------===// 13 14#include "llvm/ADT/DenseMap.h" 15#include "llvm/ADT/StringExtras.h" 16#include "llvm/ADT/StringRef.h" 17#include "llvm/Support/CommandLine.h" 18#include "llvm/Support/FileOutputBuffer.h" 19#include "llvm/Support/FileSystem.h" 20#include "llvm/Support/LineIterator.h" 21#include "llvm/Support/MemoryBuffer.h" 22#include "llvm/Support/Path.h" 23#include "llvm/Support/ToolOutputFile.h" 24#include "llvm/Support/WithColor.h" 25#include <string> 26#include <system_error> 27 28using namespace llvm; 29 30static cl::OptionCategory cat("split-file Options"); 31 32static cl::opt<std::string> input(cl::Positional, cl::desc("filename"), 33 cl::cat(cat)); 34 35static cl::opt<std::string> output(cl::Positional, cl::desc("directory"), 36 cl::value_desc("directory"), cl::cat(cat)); 37 38static cl::opt<bool> leadingLines("leading-lines", 39 cl::desc("Preserve line numbers"), 40 cl::cat(cat)); 41 42static cl::opt<bool> noLeadingLines("no-leading-lines", 43 cl::desc("Don't preserve line numbers (default)"), 44 cl::cat(cat)); 45 46static StringRef toolName; 47static int errorCount; 48 49[[noreturn]] static void fatal(StringRef filename, const Twine &message) { 50 if (filename.empty()) 51 WithColor::error(errs(), toolName) << message << '\n'; 52 else 53 WithColor::error(errs(), toolName) << filename << ": " << message << '\n'; 54 exit(1); 55} 56 57static void error(StringRef filename, int64_t line, const Twine &message) { 58 ++errorCount; 59 errs() << filename << ':' << line << ": "; 60 WithColor::error(errs()) << message << '\n'; 61} 62 63namespace { 64struct Part { 65 const char *begin = nullptr; 66 const char *end = nullptr; 67 int64_t leadingLines = 0; 68}; 69} // namespace 70 71static int handle(MemoryBuffer &inputBuf, StringRef input) { 72 DenseMap<StringRef, Part> partToBegin; 73 StringRef lastPart, separator; 74 StringRef EOL = inputBuf.getBuffer().detectEOL(); 75 for (line_iterator i(inputBuf, /*SkipBlanks=*/false, '\0'); !i.is_at_eof();) { 76 const int64_t lineNo = i.line_number(); 77 const StringRef line = *i++; 78 const size_t markerLen = line.starts_with("//") ? 6 : 5; 79 if (!(line.size() >= markerLen && 80 line.substr(markerLen - 4).starts_with("--- "))) 81 continue; 82 separator = line.substr(0, markerLen); 83 const StringRef partName = line.substr(markerLen); 84 if (partName.empty()) { 85 error(input, lineNo, "empty part name"); 86 continue; 87 } 88 if (isSpace(partName.front()) || isSpace(partName.back())) { 89 error(input, lineNo, "part name cannot have leading or trailing space"); 90 continue; 91 } 92 93 auto res = partToBegin.try_emplace(partName); 94 if (!res.second) { 95 error(input, lineNo, 96 "'" + separator + partName + "' occurs more than once"); 97 continue; 98 } 99 if (!lastPart.empty()) 100 partToBegin[lastPart].end = line.data(); 101 Part &cur = res.first->second; 102 if (!i.is_at_eof()) 103 cur.begin = i->data(); 104 // If --leading-lines is specified, numEmptyLines is 0. Append newlines so 105 // that the extracted part preserves line numbers. 106 cur.leadingLines = leadingLines ? i.line_number() - 1 : 0; 107 108 lastPart = partName; 109 } 110 if (lastPart.empty()) 111 fatal(input, "no part separator was found"); 112 if (errorCount) 113 return 1; 114 partToBegin[lastPart].end = inputBuf.getBufferEnd(); 115 116 std::vector<std::unique_ptr<ToolOutputFile>> outputFiles; 117 SmallString<256> partPath; 118 for (auto &keyValue : partToBegin) { 119 partPath.clear(); 120 sys::path::append(partPath, output, keyValue.first); 121 std::error_code ec = 122 sys::fs::create_directories(sys::path::parent_path(partPath)); 123 if (ec) 124 fatal(input, ec.message()); 125 auto f = std::make_unique<ToolOutputFile>(partPath.str(), ec, 126 llvm::sys::fs::OF_None); 127 if (!f) 128 fatal(input, ec.message()); 129 130 Part &part = keyValue.second; 131 for (int64_t i = 0; i != part.leadingLines; ++i) 132 (*f).os() << EOL; 133 if (part.begin) 134 (*f).os().write(part.begin, part.end - part.begin); 135 outputFiles.push_back(std::move(f)); 136 } 137 138 for (std::unique_ptr<ToolOutputFile> &outputFile : outputFiles) 139 outputFile->keep(); 140 return 0; 141} 142 143int main(int argc, const char **argv) { 144 toolName = sys::path::stem(argv[0]); 145 cl::HideUnrelatedOptions({&cat}); 146 cl::ParseCommandLineOptions( 147 argc, argv, 148 "Split input into multiple parts separated by regex '^(.|//)--- ' and " 149 "extract the part specified by '^(.|//)--- <part>'\n", 150 nullptr, 151 /*EnvVar=*/nullptr, 152 /*LongOptionsUseDoubleDash=*/true); 153 154 if (input.empty()) 155 fatal("", "input filename is not specified"); 156 if (output.empty()) 157 fatal("", "output directory is not specified"); 158 ErrorOr<std::unique_ptr<MemoryBuffer>> bufferOrErr = 159 MemoryBuffer::getFileOrSTDIN(input); 160 if (std::error_code ec = bufferOrErr.getError()) 161 fatal(input, ec.message()); 162 163 // Delete output if it is a file or an empty directory, so that we can create 164 // a directory. 165 sys::fs::file_status status; 166 if (std::error_code ec = sys::fs::status(output, status)) 167 if (ec.value() != static_cast<int>(std::errc::no_such_file_or_directory)) 168 fatal(output, ec.message()); 169 if (status.type() != sys::fs::file_type::file_not_found && 170 status.type() != sys::fs::file_type::directory_file && 171 status.type() != sys::fs::file_type::regular_file) 172 fatal(output, "output cannot be a special file"); 173 if (std::error_code ec = sys::fs::remove(output, /*IgnoreNonExisting=*/true)) 174 if (ec.value() != static_cast<int>(std::errc::directory_not_empty) && 175 ec.value() != static_cast<int>(std::errc::file_exists)) 176 fatal(output, ec.message()); 177 return handle(**bufferOrErr, input); 178} 179