1//===- split-file.cpp - Input splitting utility ---------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Split input into multipe parts separated by regex '^(.|//)--- ' and extract
10// the specified part.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/StringExtras.h"
16#include "llvm/ADT/StringRef.h"
17#include "llvm/Support/CommandLine.h"
18#include "llvm/Support/FileOutputBuffer.h"
19#include "llvm/Support/FileSystem.h"
20#include "llvm/Support/LineIterator.h"
21#include "llvm/Support/MemoryBuffer.h"
22#include "llvm/Support/Path.h"
23#include "llvm/Support/ToolOutputFile.h"
24#include "llvm/Support/WithColor.h"
25#include <string>
26#include <system_error>
27
28using namespace llvm;
29
30static cl::OptionCategory cat("split-file Options");
31
32static cl::opt<std::string> input(cl::Positional, cl::desc("filename"),
33                                  cl::cat(cat));
34
35static cl::opt<std::string> output(cl::Positional, cl::desc("directory"),
36                                   cl::value_desc("directory"), cl::cat(cat));
37
38static cl::opt<bool> leadingLines("leading-lines",
39                                    cl::desc("Preserve line numbers"),
40                                    cl::cat(cat));
41
42static cl::opt<bool> noLeadingLines("no-leading-lines",
43                                    cl::desc("Don't preserve line numbers (default)"),
44                                    cl::cat(cat));
45
46static StringRef toolName;
47static int errorCount;
48
49[[noreturn]] static void fatal(StringRef filename, const Twine &message) {
50  if (filename.empty())
51    WithColor::error(errs(), toolName) << message << '\n';
52  else
53    WithColor::error(errs(), toolName) << filename << ": " << message << '\n';
54  exit(1);
55}
56
57static void error(StringRef filename, int64_t line, const Twine &message) {
58  ++errorCount;
59  errs() << filename << ':' << line << ": ";
60  WithColor::error(errs()) << message << '\n';
61}
62
63namespace {
64struct Part {
65  const char *begin = nullptr;
66  const char *end = nullptr;
67  int64_t leadingLines = 0;
68};
69} // namespace
70
71static int handle(MemoryBuffer &inputBuf, StringRef input) {
72  DenseMap<StringRef, Part> partToBegin;
73  StringRef lastPart, separator;
74  StringRef EOL = inputBuf.getBuffer().detectEOL();
75  for (line_iterator i(inputBuf, /*SkipBlanks=*/false, '\0'); !i.is_at_eof();) {
76    const int64_t lineNo = i.line_number();
77    const StringRef line = *i++;
78    const size_t markerLen = line.starts_with("//") ? 6 : 5;
79    if (!(line.size() >= markerLen &&
80          line.substr(markerLen - 4).starts_with("--- ")))
81      continue;
82    separator = line.substr(0, markerLen);
83    const StringRef partName = line.substr(markerLen);
84    if (partName.empty()) {
85      error(input, lineNo, "empty part name");
86      continue;
87    }
88    if (isSpace(partName.front()) || isSpace(partName.back())) {
89      error(input, lineNo, "part name cannot have leading or trailing space");
90      continue;
91    }
92
93    auto res = partToBegin.try_emplace(partName);
94    if (!res.second) {
95      error(input, lineNo,
96            "'" + separator + partName + "' occurs more than once");
97      continue;
98    }
99    if (!lastPart.empty())
100      partToBegin[lastPart].end = line.data();
101    Part &cur = res.first->second;
102    if (!i.is_at_eof())
103      cur.begin = i->data();
104    // If --leading-lines is specified, numEmptyLines is 0. Append newlines so
105    // that the extracted part preserves line numbers.
106    cur.leadingLines = leadingLines ? i.line_number() - 1 : 0;
107
108    lastPart = partName;
109  }
110  if (lastPart.empty())
111    fatal(input, "no part separator was found");
112  if (errorCount)
113    return 1;
114  partToBegin[lastPart].end = inputBuf.getBufferEnd();
115
116  std::vector<std::unique_ptr<ToolOutputFile>> outputFiles;
117  SmallString<256> partPath;
118  for (auto &keyValue : partToBegin) {
119    partPath.clear();
120    sys::path::append(partPath, output, keyValue.first);
121    std::error_code ec =
122        sys::fs::create_directories(sys::path::parent_path(partPath));
123    if (ec)
124      fatal(input, ec.message());
125    auto f = std::make_unique<ToolOutputFile>(partPath.str(), ec,
126                                              llvm::sys::fs::OF_None);
127    if (!f)
128      fatal(input, ec.message());
129
130    Part &part = keyValue.second;
131    for (int64_t i = 0; i != part.leadingLines; ++i)
132      (*f).os() << EOL;
133    if (part.begin)
134      (*f).os().write(part.begin, part.end - part.begin);
135    outputFiles.push_back(std::move(f));
136  }
137
138  for (std::unique_ptr<ToolOutputFile> &outputFile : outputFiles)
139    outputFile->keep();
140  return 0;
141}
142
143int main(int argc, const char **argv) {
144  toolName = sys::path::stem(argv[0]);
145  cl::HideUnrelatedOptions({&cat});
146  cl::ParseCommandLineOptions(
147      argc, argv,
148      "Split input into multiple parts separated by regex '^(.|//)--- ' and "
149      "extract the part specified by '^(.|//)--- <part>'\n",
150      nullptr,
151      /*EnvVar=*/nullptr,
152      /*LongOptionsUseDoubleDash=*/true);
153
154  if (input.empty())
155    fatal("", "input filename is not specified");
156  if (output.empty())
157    fatal("", "output directory is not specified");
158  ErrorOr<std::unique_ptr<MemoryBuffer>> bufferOrErr =
159      MemoryBuffer::getFileOrSTDIN(input);
160  if (std::error_code ec = bufferOrErr.getError())
161    fatal(input, ec.message());
162
163  // Delete output if it is a file or an empty directory, so that we can create
164  // a directory.
165  sys::fs::file_status status;
166  if (std::error_code ec = sys::fs::status(output, status))
167    if (ec.value() != static_cast<int>(std::errc::no_such_file_or_directory))
168      fatal(output, ec.message());
169  if (status.type() != sys::fs::file_type::file_not_found &&
170      status.type() != sys::fs::file_type::directory_file &&
171      status.type() != sys::fs::file_type::regular_file)
172    fatal(output, "output cannot be a special file");
173  if (std::error_code ec = sys::fs::remove(output, /*IgnoreNonExisting=*/true))
174    if (ec.value() != static_cast<int>(std::errc::directory_not_empty) &&
175        ec.value() != static_cast<int>(std::errc::file_exists))
176      fatal(output, ec.message());
177  return handle(**bufferOrErr, input);
178}
179