1//===- Replacement.h - Framework for clang refactoring tools ----*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// Classes supporting refactorings that span multiple translation units. 10// While single translation unit refactorings are supported via the Rewriter, 11// when refactoring multiple translation units changes must be stored in a 12// SourceManager independent form, duplicate changes need to be removed, and 13// all changes must be applied at once at the end of the refactoring so that 14// the code is always parseable. 15// 16//===----------------------------------------------------------------------===// 17 18#ifndef LLVM_CLANG_TOOLING_CORE_REPLACEMENT_H 19#define LLVM_CLANG_TOOLING_CORE_REPLACEMENT_H 20 21#include "clang/Basic/LangOptions.h" 22#include "clang/Basic/SourceLocation.h" 23#include "llvm/ADT/Optional.h" 24#include "llvm/ADT/StringRef.h" 25#include "llvm/Support/Compiler.h" 26#include "llvm/Support/Error.h" 27#include "llvm/Support/raw_ostream.h" 28#include <map> 29#include <set> 30#include <string> 31#include <system_error> 32#include <utility> 33#include <vector> 34 35namespace clang { 36 37class FileManager; 38class Rewriter; 39class SourceManager; 40 41namespace tooling { 42 43/// A source range independent of the \c SourceManager. 44class Range { 45public: 46 Range() = default; 47 Range(unsigned Offset, unsigned Length) : Offset(Offset), Length(Length) {} 48 49 /// Accessors. 50 /// @{ 51 unsigned getOffset() const { return Offset; } 52 unsigned getLength() const { return Length; } 53 /// @} 54 55 /// \name Range Predicates 56 /// @{ 57 /// Whether this range overlaps with \p RHS or not. 58 bool overlapsWith(Range RHS) const { 59 return Offset + Length > RHS.Offset && Offset < RHS.Offset + RHS.Length; 60 } 61 62 /// Whether this range contains \p RHS or not. 63 bool contains(Range RHS) const { 64 return RHS.Offset >= Offset && 65 (RHS.Offset + RHS.Length) <= (Offset + Length); 66 } 67 68 /// Whether this range equals to \p RHS or not. 69 bool operator==(const Range &RHS) const { 70 return Offset == RHS.getOffset() && Length == RHS.getLength(); 71 } 72 /// @} 73 74private: 75 unsigned Offset = 0; 76 unsigned Length = 0; 77}; 78 79/// A text replacement. 80/// 81/// Represents a SourceManager independent replacement of a range of text in a 82/// specific file. 83class Replacement { 84public: 85 /// Creates an invalid (not applicable) replacement. 86 Replacement(); 87 88 /// Creates a replacement of the range [Offset, Offset+Length) in 89 /// FilePath with ReplacementText. 90 /// 91 /// \param FilePath A source file accessible via a SourceManager. 92 /// \param Offset The byte offset of the start of the range in the file. 93 /// \param Length The length of the range in bytes. 94 Replacement(StringRef FilePath, unsigned Offset, unsigned Length, 95 StringRef ReplacementText); 96 97 /// Creates a Replacement of the range [Start, Start+Length) with 98 /// ReplacementText. 99 Replacement(const SourceManager &Sources, SourceLocation Start, 100 unsigned Length, StringRef ReplacementText); 101 102 /// Creates a Replacement of the given range with ReplacementText. 103 Replacement(const SourceManager &Sources, const CharSourceRange &Range, 104 StringRef ReplacementText, 105 const LangOptions &LangOpts = LangOptions()); 106 107 /// Creates a Replacement of the node with ReplacementText. 108 template <typename Node> 109 Replacement(const SourceManager &Sources, const Node &NodeToReplace, 110 StringRef ReplacementText, 111 const LangOptions &LangOpts = LangOptions()); 112 113 /// Returns whether this replacement can be applied to a file. 114 /// 115 /// Only replacements that are in a valid file can be applied. 116 bool isApplicable() const; 117 118 /// Accessors. 119 /// @{ 120 StringRef getFilePath() const { return FilePath; } 121 unsigned getOffset() const { return ReplacementRange.getOffset(); } 122 unsigned getLength() const { return ReplacementRange.getLength(); } 123 StringRef getReplacementText() const { return ReplacementText; } 124 /// @} 125 126 /// Applies the replacement on the Rewriter. 127 bool apply(Rewriter &Rewrite) const; 128 129 /// Returns a human readable string representation. 130 std::string toString() const; 131 132private: 133 void setFromSourceLocation(const SourceManager &Sources, SourceLocation Start, 134 unsigned Length, StringRef ReplacementText); 135 void setFromSourceRange(const SourceManager &Sources, 136 const CharSourceRange &Range, 137 StringRef ReplacementText, 138 const LangOptions &LangOpts); 139 140 std::string FilePath; 141 Range ReplacementRange; 142 std::string ReplacementText; 143}; 144 145enum class replacement_error { 146 fail_to_apply = 0, 147 wrong_file_path, 148 overlap_conflict, 149 insert_conflict, 150}; 151 152/// Carries extra error information in replacement-related llvm::Error, 153/// e.g. fail applying replacements and replacements conflict. 154class ReplacementError : public llvm::ErrorInfo<ReplacementError> { 155public: 156 ReplacementError(replacement_error Err) : Err(Err) {} 157 158 /// Constructs an error related to an existing replacement. 159 ReplacementError(replacement_error Err, Replacement Existing) 160 : Err(Err), ExistingReplacement(std::move(Existing)) {} 161 162 /// Constructs an error related to a new replacement and an existing 163 /// replacement in a set of replacements. 164 ReplacementError(replacement_error Err, Replacement New, Replacement Existing) 165 : Err(Err), NewReplacement(std::move(New)), 166 ExistingReplacement(std::move(Existing)) {} 167 168 std::string message() const override; 169 170 void log(raw_ostream &OS) const override { OS << message(); } 171 172 replacement_error get() const { return Err; } 173 174 static char ID; 175 176 const llvm::Optional<Replacement> &getNewReplacement() const { 177 return NewReplacement; 178 } 179 180 const llvm::Optional<Replacement> &getExistingReplacement() const { 181 return ExistingReplacement; 182 } 183 184private: 185 // Users are not expected to use error_code. 186 std::error_code convertToErrorCode() const override { 187 return llvm::inconvertibleErrorCode(); 188 } 189 190 replacement_error Err; 191 192 // A new replacement, which is to expected be added into a set of 193 // replacements, that is causing problem. 194 llvm::Optional<Replacement> NewReplacement; 195 196 // An existing replacement in a replacements set that is causing problem. 197 llvm::Optional<Replacement> ExistingReplacement; 198}; 199 200/// Less-than operator between two Replacements. 201bool operator<(const Replacement &LHS, const Replacement &RHS); 202 203/// Equal-to operator between two Replacements. 204bool operator==(const Replacement &LHS, const Replacement &RHS); 205 206/// Maintains a set of replacements that are conflict-free. 207/// Two replacements are considered conflicts if they overlap or have the same 208/// offset (i.e. order-dependent). 209class Replacements { 210private: 211 using ReplacementsImpl = std::set<Replacement>; 212 213public: 214 using const_iterator = ReplacementsImpl::const_iterator; 215 using const_reverse_iterator = ReplacementsImpl::const_reverse_iterator; 216 217 Replacements() = default; 218 219 explicit Replacements(const Replacement &R) { Replaces.insert(R); } 220 221 /// Adds a new replacement \p R to the current set of replacements. 222 /// \p R must have the same file path as all existing replacements. 223 /// Returns `success` if the replacement is successfully inserted; otherwise, 224 /// it returns an llvm::Error, i.e. there is a conflict between R and the 225 /// existing replacements (i.e. they are order-dependent) or R's file path is 226 /// different from the filepath of existing replacements. Callers must 227 /// explicitly check the Error returned, and the returned error can be 228 /// converted to a string message with `llvm::toString()`. This prevents users 229 /// from adding order-dependent replacements. To control the order in which 230 /// order-dependent replacements are applied, use merge({R}) with R referring 231 /// to the changed code after applying all existing replacements. 232 /// Two replacements A and B are considered order-independent if applying them 233 /// in either order produces the same result. Note that the range of the 234 /// replacement that is applied later still refers to the original code. 235 /// These include (but not restricted to) replacements that: 236 /// - don't overlap (being directly adjacent is fine) and 237 /// - are overlapping deletions. 238 /// - are insertions at the same offset and applying them in either order 239 /// has the same effect, i.e. X + Y = Y + X when inserting X and Y 240 /// respectively. 241 /// - are identical replacements, i.e. applying the same replacement twice 242 /// is equivalent to applying it once. 243 /// Examples: 244 /// 1. Replacement A(0, 0, "a") and B(0, 0, "aa") are order-independent since 245 /// applying them in either order gives replacement (0, 0, "aaa"). 246 /// However, A(0, 0, "a") and B(0, 0, "b") are order-dependent since 247 /// applying A first gives (0, 0, "ab") while applying B first gives (B, A, 248 /// "ba"). 249 /// 2. Replacement A(0, 2, "123") and B(0, 2, "123") are order-independent 250 /// since applying them in either order gives (0, 2, "123"). 251 /// 3. Replacement A(0, 3, "123") and B(2, 3, "321") are order-independent 252 /// since either order gives (0, 5, "12321"). 253 /// 4. Replacement A(0, 3, "ab") and B(0, 3, "ab") are order-independent since 254 /// applying the same replacement twice is equivalent to applying it once. 255 /// Replacements with offset UINT_MAX are special - we do not detect conflicts 256 /// for such replacements since users may add them intentionally as a special 257 /// category of replacements. 258 llvm::Error add(const Replacement &R); 259 260 /// Merges \p Replaces into the current replacements. \p Replaces 261 /// refers to code after applying the current replacements. 262 LLVM_NODISCARD Replacements merge(const Replacements &Replaces) const; 263 264 // Returns the affected ranges in the changed code. 265 std::vector<Range> getAffectedRanges() const; 266 267 // Returns the new offset in the code after replacements being applied. 268 // Note that if there is an insertion at Offset in the current replacements, 269 // \p Offset will be shifted to Offset + Length in inserted text. 270 unsigned getShiftedCodePosition(unsigned Position) const; 271 272 unsigned size() const { return Replaces.size(); } 273 274 void clear() { Replaces.clear(); } 275 276 bool empty() const { return Replaces.empty(); } 277 278 const_iterator begin() const { return Replaces.begin(); } 279 280 const_iterator end() const { return Replaces.end(); } 281 282 const_reverse_iterator rbegin() const { return Replaces.rbegin(); } 283 284 const_reverse_iterator rend() const { return Replaces.rend(); } 285 286 bool operator==(const Replacements &RHS) const { 287 return Replaces == RHS.Replaces; 288 } 289 290private: 291 Replacements(const_iterator Begin, const_iterator End) 292 : Replaces(Begin, End) {} 293 294 // Returns `R` with new range that refers to code after `Replaces` being 295 // applied. 296 Replacement getReplacementInChangedCode(const Replacement &R) const; 297 298 // Returns a set of replacements that is equivalent to the current 299 // replacements by merging all adjacent replacements. Two sets of replacements 300 // are considered equivalent if they have the same effect when they are 301 // applied. 302 Replacements getCanonicalReplacements() const; 303 304 // If `R` and all existing replacements are order-indepedent, then merge it 305 // with `Replaces` and returns the merged replacements; otherwise, returns an 306 // error. 307 llvm::Expected<Replacements> 308 mergeIfOrderIndependent(const Replacement &R) const; 309 310 ReplacementsImpl Replaces; 311}; 312 313/// Apply all replacements in \p Replaces to the Rewriter \p Rewrite. 314/// 315/// Replacement applications happen independently of the success of 316/// other applications. 317/// 318/// \returns true if all replacements apply. false otherwise. 319bool applyAllReplacements(const Replacements &Replaces, Rewriter &Rewrite); 320 321/// Applies all replacements in \p Replaces to \p Code. 322/// 323/// This completely ignores the path stored in each replacement. If all 324/// replacements are applied successfully, this returns the code with 325/// replacements applied; otherwise, an llvm::Error carrying llvm::StringError 326/// is returned (the Error message can be converted to string using 327/// `llvm::toString()` and 'std::error_code` in the `Error` should be ignored). 328llvm::Expected<std::string> applyAllReplacements(StringRef Code, 329 const Replacements &Replaces); 330 331/// Collection of Replacements generated from a single translation unit. 332struct TranslationUnitReplacements { 333 /// Name of the main source for the translation unit. 334 std::string MainSourceFile; 335 336 std::vector<Replacement> Replacements; 337}; 338 339/// Calculates the new ranges after \p Replaces are applied. These 340/// include both the original \p Ranges and the affected ranges of \p Replaces 341/// in the new code. 342/// 343/// \pre Replacements must be for the same file. 344/// 345/// \return The new ranges after \p Replaces are applied. The new ranges will be 346/// sorted and non-overlapping. 347std::vector<Range> 348calculateRangesAfterReplacements(const Replacements &Replaces, 349 const std::vector<Range> &Ranges); 350 351/// If there are multiple <File, Replacements> pairs with the same file 352/// entry, we only keep one pair and discard the rest. 353/// If a file does not exist, its corresponding replacements will be ignored. 354std::map<std::string, Replacements> groupReplacementsByFile( 355 FileManager &FileMgr, 356 const std::map<std::string, Replacements> &FileToReplaces); 357 358template <typename Node> 359Replacement::Replacement(const SourceManager &Sources, 360 const Node &NodeToReplace, StringRef ReplacementText, 361 const LangOptions &LangOpts) { 362 const CharSourceRange Range = 363 CharSourceRange::getTokenRange(NodeToReplace->getSourceRange()); 364 setFromSourceRange(Sources, Range, ReplacementText, LangOpts); 365} 366 367} // namespace tooling 368 369} // namespace clang 370 371#endif // LLVM_CLANG_TOOLING_CORE_REPLACEMENT_H 372