1//===--- YAMLParser.cpp - Simple YAML parser ------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements a YAML parser. 11// 12//===----------------------------------------------------------------------===// 13 14#include "llvm/Support/YAMLParser.h" 15 16#include "llvm/ADT/ilist.h" 17#include "llvm/ADT/ilist_node.h" 18#include "llvm/ADT/SmallVector.h" 19#include "llvm/ADT/StringExtras.h" 20#include "llvm/ADT/Twine.h" 21#include "llvm/Support/ErrorHandling.h" 22#include "llvm/Support/MemoryBuffer.h" 23#include "llvm/Support/raw_ostream.h" 24#include "llvm/Support/SourceMgr.h" 25 26using namespace llvm; 27using namespace yaml; 28 29enum UnicodeEncodingForm { 30 UEF_UTF32_LE, ///< UTF-32 Little Endian 31 UEF_UTF32_BE, ///< UTF-32 Big Endian 32 UEF_UTF16_LE, ///< UTF-16 Little Endian 33 UEF_UTF16_BE, ///< UTF-16 Big Endian 34 UEF_UTF8, ///< UTF-8 or ascii. 35 UEF_Unknown ///< Not a valid Unicode encoding. 36}; 37 38/// EncodingInfo - Holds the encoding type and length of the byte order mark if 39/// it exists. Length is in {0, 2, 3, 4}. 40typedef std::pair<UnicodeEncodingForm, unsigned> EncodingInfo; 41 42/// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode 43/// encoding form of \a Input. 44/// 45/// @param Input A string of length 0 or more. 46/// @returns An EncodingInfo indicating the Unicode encoding form of the input 47/// and how long the byte order mark is if one exists. 48static EncodingInfo getUnicodeEncoding(StringRef Input) { 49 if (Input.size() == 0) 50 return std::make_pair(UEF_Unknown, 0); 51 52 switch (uint8_t(Input[0])) { 53 case 0x00: 54 if (Input.size() >= 4) { 55 if ( Input[1] == 0 56 && uint8_t(Input[2]) == 0xFE 57 && uint8_t(Input[3]) == 0xFF) 58 return std::make_pair(UEF_UTF32_BE, 4); 59 if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0) 60 return std::make_pair(UEF_UTF32_BE, 0); 61 } 62 63 if (Input.size() >= 2 && Input[1] != 0) 64 return std::make_pair(UEF_UTF16_BE, 0); 65 return std::make_pair(UEF_Unknown, 0); 66 case 0xFF: 67 if ( Input.size() >= 4 68 && uint8_t(Input[1]) == 0xFE 69 && Input[2] == 0 70 && Input[3] == 0) 71 return std::make_pair(UEF_UTF32_LE, 4); 72 73 if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFE) 74 return std::make_pair(UEF_UTF16_LE, 2); 75 return std::make_pair(UEF_Unknown, 0); 76 case 0xFE: 77 if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF) 78 return std::make_pair(UEF_UTF16_BE, 2); 79 return std::make_pair(UEF_Unknown, 0); 80 case 0xEF: 81 if ( Input.size() >= 3 82 && uint8_t(Input[1]) == 0xBB 83 && uint8_t(Input[2]) == 0xBF) 84 return std::make_pair(UEF_UTF8, 3); 85 return std::make_pair(UEF_Unknown, 0); 86 } 87 88 // It could still be utf-32 or utf-16. 89 if (Input.size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0) 90 return std::make_pair(UEF_UTF32_LE, 0); 91 92 if (Input.size() >= 2 && Input[1] == 0) 93 return std::make_pair(UEF_UTF16_LE, 0); 94 95 return std::make_pair(UEF_UTF8, 0); 96} 97 98namespace llvm { 99namespace yaml { 100/// Token - A single YAML token. 101struct Token : ilist_node<Token> { 102 enum TokenKind { 103 TK_Error, // Uninitialized token. 104 TK_StreamStart, 105 TK_StreamEnd, 106 TK_VersionDirective, 107 TK_TagDirective, 108 TK_DocumentStart, 109 TK_DocumentEnd, 110 TK_BlockEntry, 111 TK_BlockEnd, 112 TK_BlockSequenceStart, 113 TK_BlockMappingStart, 114 TK_FlowEntry, 115 TK_FlowSequenceStart, 116 TK_FlowSequenceEnd, 117 TK_FlowMappingStart, 118 TK_FlowMappingEnd, 119 TK_Key, 120 TK_Value, 121 TK_Scalar, 122 TK_Alias, 123 TK_Anchor, 124 TK_Tag 125 } Kind; 126 127 /// A string of length 0 or more whose begin() points to the logical location 128 /// of the token in the input. 129 StringRef Range; 130 131 Token() : Kind(TK_Error) {} 132}; 133} 134} 135 136namespace llvm { 137template<> 138struct ilist_sentinel_traits<Token> { 139 Token *createSentinel() const { 140 return &Sentinel; 141 } 142 static void destroySentinel(Token*) {} 143 144 Token *provideInitialHead() const { return createSentinel(); } 145 Token *ensureHead(Token*) const { return createSentinel(); } 146 static void noteHead(Token*, Token*) {} 147 148private: 149 mutable Token Sentinel; 150}; 151 152template<> 153struct ilist_node_traits<Token> { 154 Token *createNode(const Token &V) { 155 return new (Alloc.Allocate<Token>()) Token(V); 156 } 157 static void deleteNode(Token *V) {} 158 159 void addNodeToList(Token *) {} 160 void removeNodeFromList(Token *) {} 161 void transferNodesFromList(ilist_node_traits & /*SrcTraits*/, 162 ilist_iterator<Token> /*first*/, 163 ilist_iterator<Token> /*last*/) {} 164 165 BumpPtrAllocator Alloc; 166}; 167} 168 169typedef ilist<Token> TokenQueueT; 170 171namespace { 172/// @brief This struct is used to track simple keys. 173/// 174/// Simple keys are handled by creating an entry in SimpleKeys for each Token 175/// which could legally be the start of a simple key. When peekNext is called, 176/// if the Token To be returned is referenced by a SimpleKey, we continue 177/// tokenizing until that potential simple key has either been found to not be 178/// a simple key (we moved on to the next line or went further than 1024 chars). 179/// Or when we run into a Value, and then insert a Key token (and possibly 180/// others) before the SimpleKey's Tok. 181struct SimpleKey { 182 TokenQueueT::iterator Tok; 183 unsigned Column; 184 unsigned Line; 185 unsigned FlowLevel; 186 bool IsRequired; 187 188 bool operator ==(const SimpleKey &Other) { 189 return Tok == Other.Tok; 190 } 191}; 192} 193 194/// @brief The Unicode scalar value of a UTF-8 minimal well-formed code unit 195/// subsequence and the subsequence's length in code units (uint8_t). 196/// A length of 0 represents an error. 197typedef std::pair<uint32_t, unsigned> UTF8Decoded; 198 199static UTF8Decoded decodeUTF8(StringRef Range) { 200 StringRef::iterator Position= Range.begin(); 201 StringRef::iterator End = Range.end(); 202 // 1 byte: [0x00, 0x7f] 203 // Bit pattern: 0xxxxxxx 204 if ((*Position & 0x80) == 0) { 205 return std::make_pair(*Position, 1); 206 } 207 // 2 bytes: [0x80, 0x7ff] 208 // Bit pattern: 110xxxxx 10xxxxxx 209 if (Position + 1 != End && 210 ((*Position & 0xE0) == 0xC0) && 211 ((*(Position + 1) & 0xC0) == 0x80)) { 212 uint32_t codepoint = ((*Position & 0x1F) << 6) | 213 (*(Position + 1) & 0x3F); 214 if (codepoint >= 0x80) 215 return std::make_pair(codepoint, 2); 216 } 217 // 3 bytes: [0x8000, 0xffff] 218 // Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx 219 if (Position + 2 != End && 220 ((*Position & 0xF0) == 0xE0) && 221 ((*(Position + 1) & 0xC0) == 0x80) && 222 ((*(Position + 2) & 0xC0) == 0x80)) { 223 uint32_t codepoint = ((*Position & 0x0F) << 12) | 224 ((*(Position + 1) & 0x3F) << 6) | 225 (*(Position + 2) & 0x3F); 226 // Codepoints between 0xD800 and 0xDFFF are invalid, as 227 // they are high / low surrogate halves used by UTF-16. 228 if (codepoint >= 0x800 && 229 (codepoint < 0xD800 || codepoint > 0xDFFF)) 230 return std::make_pair(codepoint, 3); 231 } 232 // 4 bytes: [0x10000, 0x10FFFF] 233 // Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 234 if (Position + 3 != End && 235 ((*Position & 0xF8) == 0xF0) && 236 ((*(Position + 1) & 0xC0) == 0x80) && 237 ((*(Position + 2) & 0xC0) == 0x80) && 238 ((*(Position + 3) & 0xC0) == 0x80)) { 239 uint32_t codepoint = ((*Position & 0x07) << 18) | 240 ((*(Position + 1) & 0x3F) << 12) | 241 ((*(Position + 2) & 0x3F) << 6) | 242 (*(Position + 3) & 0x3F); 243 if (codepoint >= 0x10000 && codepoint <= 0x10FFFF) 244 return std::make_pair(codepoint, 4); 245 } 246 return std::make_pair(0, 0); 247} 248 249namespace llvm { 250namespace yaml { 251/// @brief Scans YAML tokens from a MemoryBuffer. 252class Scanner { 253public: 254 Scanner(const StringRef Input, SourceMgr &SM); 255 256 /// @brief Parse the next token and return it without popping it. 257 Token &peekNext(); 258 259 /// @brief Parse the next token and pop it from the queue. 260 Token getNext(); 261 262 void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, 263 ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) { 264 SM.PrintMessage(Loc, Kind, Message, Ranges); 265 } 266 267 void setError(const Twine &Message, StringRef::iterator Position) { 268 if (Current >= End) 269 Current = End - 1; 270 271 // Don't print out more errors after the first one we encounter. The rest 272 // are just the result of the first, and have no meaning. 273 if (!Failed) 274 printError(SMLoc::getFromPointer(Current), SourceMgr::DK_Error, Message); 275 Failed = true; 276 } 277 278 void setError(const Twine &Message) { 279 setError(Message, Current); 280 } 281 282 /// @brief Returns true if an error occurred while parsing. 283 bool failed() { 284 return Failed; 285 } 286 287private: 288 StringRef currentInput() { 289 return StringRef(Current, End - Current); 290 } 291 292 /// @brief Decode a UTF-8 minimal well-formed code unit subsequence starting 293 /// at \a Position. 294 /// 295 /// If the UTF-8 code units starting at Position do not form a well-formed 296 /// code unit subsequence, then the Unicode scalar value is 0, and the length 297 /// is 0. 298 UTF8Decoded decodeUTF8(StringRef::iterator Position) { 299 return ::decodeUTF8(StringRef(Position, End - Position)); 300 } 301 302 // The following functions are based on the gramar rules in the YAML spec. The 303 // style of the function names it meant to closely match how they are written 304 // in the spec. The number within the [] is the number of the grammar rule in 305 // the spec. 306 // 307 // See 4.2 [Production Naming Conventions] for the meaning of the prefixes. 308 // 309 // c- 310 // A production starting and ending with a special character. 311 // b- 312 // A production matching a single line break. 313 // nb- 314 // A production starting and ending with a non-break character. 315 // s- 316 // A production starting and ending with a white space character. 317 // ns- 318 // A production starting and ending with a non-space character. 319 // l- 320 // A production matching complete line(s). 321 322 /// @brief Skip a single nb-char[27] starting at Position. 323 /// 324 /// A nb-char is 0x9 | [0x20-0x7E] | 0x85 | [0xA0-0xD7FF] | [0xE000-0xFEFE] 325 /// | [0xFF00-0xFFFD] | [0x10000-0x10FFFF] 326 /// 327 /// @returns The code unit after the nb-char, or Position if it's not an 328 /// nb-char. 329 StringRef::iterator skip_nb_char(StringRef::iterator Position); 330 331 /// @brief Skip a single b-break[28] starting at Position. 332 /// 333 /// A b-break is 0xD 0xA | 0xD | 0xA 334 /// 335 /// @returns The code unit after the b-break, or Position if it's not a 336 /// b-break. 337 StringRef::iterator skip_b_break(StringRef::iterator Position); 338 339 /// @brief Skip a single s-white[33] starting at Position. 340 /// 341 /// A s-white is 0x20 | 0x9 342 /// 343 /// @returns The code unit after the s-white, or Position if it's not a 344 /// s-white. 345 StringRef::iterator skip_s_white(StringRef::iterator Position); 346 347 /// @brief Skip a single ns-char[34] starting at Position. 348 /// 349 /// A ns-char is nb-char - s-white 350 /// 351 /// @returns The code unit after the ns-char, or Position if it's not a 352 /// ns-char. 353 StringRef::iterator skip_ns_char(StringRef::iterator Position); 354 355 typedef StringRef::iterator (Scanner::*SkipWhileFunc)(StringRef::iterator); 356 /// @brief Skip minimal well-formed code unit subsequences until Func 357 /// returns its input. 358 /// 359 /// @returns The code unit after the last minimal well-formed code unit 360 /// subsequence that Func accepted. 361 StringRef::iterator skip_while( SkipWhileFunc Func 362 , StringRef::iterator Position); 363 364 /// @brief Scan ns-uri-char[39]s starting at Cur. 365 /// 366 /// This updates Cur and Column while scanning. 367 /// 368 /// @returns A StringRef starting at Cur which covers the longest contiguous 369 /// sequence of ns-uri-char. 370 StringRef scan_ns_uri_char(); 371 372 /// @brief Scan ns-plain-one-line[133] starting at \a Cur. 373 StringRef scan_ns_plain_one_line(); 374 375 /// @brief Consume a minimal well-formed code unit subsequence starting at 376 /// \a Cur. Return false if it is not the same Unicode scalar value as 377 /// \a Expected. This updates \a Column. 378 bool consume(uint32_t Expected); 379 380 /// @brief Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column. 381 void skip(uint32_t Distance); 382 383 /// @brief Return true if the minimal well-formed code unit subsequence at 384 /// Pos is whitespace or a new line 385 bool isBlankOrBreak(StringRef::iterator Position); 386 387 /// @brief If IsSimpleKeyAllowed, create and push_back a new SimpleKey. 388 void saveSimpleKeyCandidate( TokenQueueT::iterator Tok 389 , unsigned AtColumn 390 , bool IsRequired); 391 392 /// @brief Remove simple keys that can no longer be valid simple keys. 393 /// 394 /// Invalid simple keys are not on the current line or are further than 1024 395 /// columns back. 396 void removeStaleSimpleKeyCandidates(); 397 398 /// @brief Remove all simple keys on FlowLevel \a Level. 399 void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level); 400 401 /// @brief Unroll indentation in \a Indents back to \a Col. Creates BlockEnd 402 /// tokens if needed. 403 bool unrollIndent(int ToColumn); 404 405 /// @brief Increase indent to \a Col. Creates \a Kind token at \a InsertPoint 406 /// if needed. 407 bool rollIndent( int ToColumn 408 , Token::TokenKind Kind 409 , TokenQueueT::iterator InsertPoint); 410 411 /// @brief Skip whitespace and comments until the start of the next token. 412 void scanToNextToken(); 413 414 /// @brief Must be the first token generated. 415 bool scanStreamStart(); 416 417 /// @brief Generate tokens needed to close out the stream. 418 bool scanStreamEnd(); 419 420 /// @brief Scan a %BLAH directive. 421 bool scanDirective(); 422 423 /// @brief Scan a ... or ---. 424 bool scanDocumentIndicator(bool IsStart); 425 426 /// @brief Scan a [ or { and generate the proper flow collection start token. 427 bool scanFlowCollectionStart(bool IsSequence); 428 429 /// @brief Scan a ] or } and generate the proper flow collection end token. 430 bool scanFlowCollectionEnd(bool IsSequence); 431 432 /// @brief Scan the , that separates entries in a flow collection. 433 bool scanFlowEntry(); 434 435 /// @brief Scan the - that starts block sequence entries. 436 bool scanBlockEntry(); 437 438 /// @brief Scan an explicit ? indicating a key. 439 bool scanKey(); 440 441 /// @brief Scan an explicit : indicating a value. 442 bool scanValue(); 443 444 /// @brief Scan a quoted scalar. 445 bool scanFlowScalar(bool IsDoubleQuoted); 446 447 /// @brief Scan an unquoted scalar. 448 bool scanPlainScalar(); 449 450 /// @brief Scan an Alias or Anchor starting with * or &. 451 bool scanAliasOrAnchor(bool IsAlias); 452 453 /// @brief Scan a block scalar starting with | or >. 454 bool scanBlockScalar(bool IsLiteral); 455 456 /// @brief Scan a tag of the form !stuff. 457 bool scanTag(); 458 459 /// @brief Dispatch to the next scanning function based on \a *Cur. 460 bool fetchMoreTokens(); 461 462 /// @brief The SourceMgr used for diagnostics and buffer management. 463 SourceMgr &SM; 464 465 /// @brief The original input. 466 MemoryBuffer *InputBuffer; 467 468 /// @brief The current position of the scanner. 469 StringRef::iterator Current; 470 471 /// @brief The end of the input (one past the last character). 472 StringRef::iterator End; 473 474 /// @brief Current YAML indentation level in spaces. 475 int Indent; 476 477 /// @brief Current column number in Unicode code points. 478 unsigned Column; 479 480 /// @brief Current line number. 481 unsigned Line; 482 483 /// @brief How deep we are in flow style containers. 0 Means at block level. 484 unsigned FlowLevel; 485 486 /// @brief Are we at the start of the stream? 487 bool IsStartOfStream; 488 489 /// @brief Can the next token be the start of a simple key? 490 bool IsSimpleKeyAllowed; 491 492 /// @brief True if an error has occurred. 493 bool Failed; 494 495 /// @brief Queue of tokens. This is required to queue up tokens while looking 496 /// for the end of a simple key. And for cases where a single character 497 /// can produce multiple tokens (e.g. BlockEnd). 498 TokenQueueT TokenQueue; 499 500 /// @brief Indentation levels. 501 SmallVector<int, 4> Indents; 502 503 /// @brief Potential simple keys. 504 SmallVector<SimpleKey, 4> SimpleKeys; 505}; 506 507} // end namespace yaml 508} // end namespace llvm 509 510/// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result. 511static void encodeUTF8( uint32_t UnicodeScalarValue 512 , SmallVectorImpl<char> &Result) { 513 if (UnicodeScalarValue <= 0x7F) { 514 Result.push_back(UnicodeScalarValue & 0x7F); 515 } else if (UnicodeScalarValue <= 0x7FF) { 516 uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6); 517 uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F); 518 Result.push_back(FirstByte); 519 Result.push_back(SecondByte); 520 } else if (UnicodeScalarValue <= 0xFFFF) { 521 uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12); 522 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6); 523 uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F); 524 Result.push_back(FirstByte); 525 Result.push_back(SecondByte); 526 Result.push_back(ThirdByte); 527 } else if (UnicodeScalarValue <= 0x10FFFF) { 528 uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18); 529 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12); 530 uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6); 531 uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F); 532 Result.push_back(FirstByte); 533 Result.push_back(SecondByte); 534 Result.push_back(ThirdByte); 535 Result.push_back(FourthByte); 536 } 537} 538 539bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) { 540 SourceMgr SM; 541 Scanner scanner(Input, SM); 542 while (true) { 543 Token T = scanner.getNext(); 544 switch (T.Kind) { 545 case Token::TK_StreamStart: 546 OS << "Stream-Start: "; 547 break; 548 case Token::TK_StreamEnd: 549 OS << "Stream-End: "; 550 break; 551 case Token::TK_VersionDirective: 552 OS << "Version-Directive: "; 553 break; 554 case Token::TK_TagDirective: 555 OS << "Tag-Directive: "; 556 break; 557 case Token::TK_DocumentStart: 558 OS << "Document-Start: "; 559 break; 560 case Token::TK_DocumentEnd: 561 OS << "Document-End: "; 562 break; 563 case Token::TK_BlockEntry: 564 OS << "Block-Entry: "; 565 break; 566 case Token::TK_BlockEnd: 567 OS << "Block-End: "; 568 break; 569 case Token::TK_BlockSequenceStart: 570 OS << "Block-Sequence-Start: "; 571 break; 572 case Token::TK_BlockMappingStart: 573 OS << "Block-Mapping-Start: "; 574 break; 575 case Token::TK_FlowEntry: 576 OS << "Flow-Entry: "; 577 break; 578 case Token::TK_FlowSequenceStart: 579 OS << "Flow-Sequence-Start: "; 580 break; 581 case Token::TK_FlowSequenceEnd: 582 OS << "Flow-Sequence-End: "; 583 break; 584 case Token::TK_FlowMappingStart: 585 OS << "Flow-Mapping-Start: "; 586 break; 587 case Token::TK_FlowMappingEnd: 588 OS << "Flow-Mapping-End: "; 589 break; 590 case Token::TK_Key: 591 OS << "Key: "; 592 break; 593 case Token::TK_Value: 594 OS << "Value: "; 595 break; 596 case Token::TK_Scalar: 597 OS << "Scalar: "; 598 break; 599 case Token::TK_Alias: 600 OS << "Alias: "; 601 break; 602 case Token::TK_Anchor: 603 OS << "Anchor: "; 604 break; 605 case Token::TK_Tag: 606 OS << "Tag: "; 607 break; 608 case Token::TK_Error: 609 break; 610 } 611 OS << T.Range << "\n"; 612 if (T.Kind == Token::TK_StreamEnd) 613 break; 614 else if (T.Kind == Token::TK_Error) 615 return false; 616 } 617 return true; 618} 619 620bool yaml::scanTokens(StringRef Input) { 621 llvm::SourceMgr SM; 622 llvm::yaml::Scanner scanner(Input, SM); 623 for (;;) { 624 llvm::yaml::Token T = scanner.getNext(); 625 if (T.Kind == Token::TK_StreamEnd) 626 break; 627 else if (T.Kind == Token::TK_Error) 628 return false; 629 } 630 return true; 631} 632 633std::string yaml::escape(StringRef Input) { 634 std::string EscapedInput; 635 for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) { 636 if (*i == '\\') 637 EscapedInput += "\\\\"; 638 else if (*i == '"') 639 EscapedInput += "\\\""; 640 else if (*i == 0) 641 EscapedInput += "\\0"; 642 else if (*i == 0x07) 643 EscapedInput += "\\a"; 644 else if (*i == 0x08) 645 EscapedInput += "\\b"; 646 else if (*i == 0x09) 647 EscapedInput += "\\t"; 648 else if (*i == 0x0A) 649 EscapedInput += "\\n"; 650 else if (*i == 0x0B) 651 EscapedInput += "\\v"; 652 else if (*i == 0x0C) 653 EscapedInput += "\\f"; 654 else if (*i == 0x0D) 655 EscapedInput += "\\r"; 656 else if (*i == 0x1B) 657 EscapedInput += "\\e"; 658 else if ((unsigned char)*i < 0x20) { // Control characters not handled above. 659 std::string HexStr = utohexstr(*i); 660 EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; 661 } else if (*i & 0x80) { // UTF-8 multiple code unit subsequence. 662 UTF8Decoded UnicodeScalarValue 663 = decodeUTF8(StringRef(i, Input.end() - i)); 664 if (UnicodeScalarValue.second == 0) { 665 // Found invalid char. 666 SmallString<4> Val; 667 encodeUTF8(0xFFFD, Val); 668 EscapedInput.insert(EscapedInput.end(), Val.begin(), Val.end()); 669 // FIXME: Error reporting. 670 return EscapedInput; 671 } 672 if (UnicodeScalarValue.first == 0x85) 673 EscapedInput += "\\N"; 674 else if (UnicodeScalarValue.first == 0xA0) 675 EscapedInput += "\\_"; 676 else if (UnicodeScalarValue.first == 0x2028) 677 EscapedInput += "\\L"; 678 else if (UnicodeScalarValue.first == 0x2029) 679 EscapedInput += "\\P"; 680 else { 681 std::string HexStr = utohexstr(UnicodeScalarValue.first); 682 if (HexStr.size() <= 2) 683 EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; 684 else if (HexStr.size() <= 4) 685 EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr; 686 else if (HexStr.size() <= 8) 687 EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr; 688 } 689 i += UnicodeScalarValue.second - 1; 690 } else 691 EscapedInput.push_back(*i); 692 } 693 return EscapedInput; 694} 695 696Scanner::Scanner(StringRef Input, SourceMgr &sm) 697 : SM(sm) 698 , Indent(-1) 699 , Column(0) 700 , Line(0) 701 , FlowLevel(0) 702 , IsStartOfStream(true) 703 , IsSimpleKeyAllowed(true) 704 , Failed(false) { 705 InputBuffer = MemoryBuffer::getMemBuffer(Input, "YAML"); 706 SM.AddNewSourceBuffer(InputBuffer, SMLoc()); 707 Current = InputBuffer->getBufferStart(); 708 End = InputBuffer->getBufferEnd(); 709} 710 711Token &Scanner::peekNext() { 712 // If the current token is a possible simple key, keep parsing until we 713 // can confirm. 714 bool NeedMore = false; 715 while (true) { 716 if (TokenQueue.empty() || NeedMore) { 717 if (!fetchMoreTokens()) { 718 TokenQueue.clear(); 719 TokenQueue.push_back(Token()); 720 return TokenQueue.front(); 721 } 722 } 723 assert(!TokenQueue.empty() && 724 "fetchMoreTokens lied about getting tokens!"); 725 726 removeStaleSimpleKeyCandidates(); 727 SimpleKey SK; 728 SK.Tok = TokenQueue.front(); 729 if (std::find(SimpleKeys.begin(), SimpleKeys.end(), SK) 730 == SimpleKeys.end()) 731 break; 732 else 733 NeedMore = true; 734 } 735 return TokenQueue.front(); 736} 737 738Token Scanner::getNext() { 739 Token Ret = peekNext(); 740 // TokenQueue can be empty if there was an error getting the next token. 741 if (!TokenQueue.empty()) 742 TokenQueue.pop_front(); 743 744 // There cannot be any referenced Token's if the TokenQueue is empty. So do a 745 // quick deallocation of them all. 746 if (TokenQueue.empty()) { 747 TokenQueue.Alloc.Reset(); 748 } 749 750 return Ret; 751} 752 753StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) { 754 if (Position == End) 755 return Position; 756 // Check 7 bit c-printable - b-char. 757 if ( *Position == 0x09 758 || (*Position >= 0x20 && *Position <= 0x7E)) 759 return Position + 1; 760 761 // Check for valid UTF-8. 762 if (uint8_t(*Position) & 0x80) { 763 UTF8Decoded u8d = decodeUTF8(Position); 764 if ( u8d.second != 0 765 && u8d.first != 0xFEFF 766 && ( u8d.first == 0x85 767 || ( u8d.first >= 0xA0 768 && u8d.first <= 0xD7FF) 769 || ( u8d.first >= 0xE000 770 && u8d.first <= 0xFFFD) 771 || ( u8d.first >= 0x10000 772 && u8d.first <= 0x10FFFF))) 773 return Position + u8d.second; 774 } 775 return Position; 776} 777 778StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) { 779 if (Position == End) 780 return Position; 781 if (*Position == 0x0D) { 782 if (Position + 1 != End && *(Position + 1) == 0x0A) 783 return Position + 2; 784 return Position + 1; 785 } 786 787 if (*Position == 0x0A) 788 return Position + 1; 789 return Position; 790} 791 792 793StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) { 794 if (Position == End) 795 return Position; 796 if (*Position == ' ' || *Position == '\t') 797 return Position + 1; 798 return Position; 799} 800 801StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) { 802 if (Position == End) 803 return Position; 804 if (*Position == ' ' || *Position == '\t') 805 return Position; 806 return skip_nb_char(Position); 807} 808 809StringRef::iterator Scanner::skip_while( SkipWhileFunc Func 810 , StringRef::iterator Position) { 811 while (true) { 812 StringRef::iterator i = (this->*Func)(Position); 813 if (i == Position) 814 break; 815 Position = i; 816 } 817 return Position; 818} 819 820static bool is_ns_hex_digit(const char C) { 821 return (C >= '0' && C <= '9') 822 || (C >= 'a' && C <= 'z') 823 || (C >= 'A' && C <= 'Z'); 824} 825 826static bool is_ns_word_char(const char C) { 827 return C == '-' 828 || (C >= 'a' && C <= 'z') 829 || (C >= 'A' && C <= 'Z'); 830} 831 832StringRef Scanner::scan_ns_uri_char() { 833 StringRef::iterator Start = Current; 834 while (true) { 835 if (Current == End) 836 break; 837 if (( *Current == '%' 838 && Current + 2 < End 839 && is_ns_hex_digit(*(Current + 1)) 840 && is_ns_hex_digit(*(Current + 2))) 841 || is_ns_word_char(*Current) 842 || StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]") 843 != StringRef::npos) { 844 ++Current; 845 ++Column; 846 } else 847 break; 848 } 849 return StringRef(Start, Current - Start); 850} 851 852StringRef Scanner::scan_ns_plain_one_line() { 853 StringRef::iterator start = Current; 854 // The first character must already be verified. 855 ++Current; 856 while (true) { 857 if (Current == End) { 858 break; 859 } else if (*Current == ':') { 860 // Check if the next character is a ns-char. 861 if (Current + 1 == End) 862 break; 863 StringRef::iterator i = skip_ns_char(Current + 1); 864 if (Current + 1 != i) { 865 Current = i; 866 Column += 2; // Consume both the ':' and ns-char. 867 } else 868 break; 869 } else if (*Current == '#') { 870 // Check if the previous character was a ns-char. 871 // The & 0x80 check is to check for the trailing byte of a utf-8 872 if (*(Current - 1) & 0x80 || skip_ns_char(Current - 1) == Current) { 873 ++Current; 874 ++Column; 875 } else 876 break; 877 } else { 878 StringRef::iterator i = skip_nb_char(Current); 879 if (i == Current) 880 break; 881 Current = i; 882 ++Column; 883 } 884 } 885 return StringRef(start, Current - start); 886} 887 888bool Scanner::consume(uint32_t Expected) { 889 if (Expected >= 0x80) 890 report_fatal_error("Not dealing with this yet"); 891 if (Current == End) 892 return false; 893 if (uint8_t(*Current) >= 0x80) 894 report_fatal_error("Not dealing with this yet"); 895 if (uint8_t(*Current) == Expected) { 896 ++Current; 897 ++Column; 898 return true; 899 } 900 return false; 901} 902 903void Scanner::skip(uint32_t Distance) { 904 Current += Distance; 905 Column += Distance; 906 assert(Current <= End && "Skipped past the end"); 907} 908 909bool Scanner::isBlankOrBreak(StringRef::iterator Position) { 910 if (Position == End) 911 return false; 912 if ( *Position == ' ' || *Position == '\t' 913 || *Position == '\r' || *Position == '\n') 914 return true; 915 return false; 916} 917 918void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok 919 , unsigned AtColumn 920 , bool IsRequired) { 921 if (IsSimpleKeyAllowed) { 922 SimpleKey SK; 923 SK.Tok = Tok; 924 SK.Line = Line; 925 SK.Column = AtColumn; 926 SK.IsRequired = IsRequired; 927 SK.FlowLevel = FlowLevel; 928 SimpleKeys.push_back(SK); 929 } 930} 931 932void Scanner::removeStaleSimpleKeyCandidates() { 933 for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin(); 934 i != SimpleKeys.end();) { 935 if (i->Line != Line || i->Column + 1024 < Column) { 936 if (i->IsRequired) 937 setError( "Could not find expected : for simple key" 938 , i->Tok->Range.begin()); 939 i = SimpleKeys.erase(i); 940 } else 941 ++i; 942 } 943} 944 945void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) { 946 if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level) 947 SimpleKeys.pop_back(); 948} 949 950bool Scanner::unrollIndent(int ToColumn) { 951 Token T; 952 // Indentation is ignored in flow. 953 if (FlowLevel != 0) 954 return true; 955 956 while (Indent > ToColumn) { 957 T.Kind = Token::TK_BlockEnd; 958 T.Range = StringRef(Current, 1); 959 TokenQueue.push_back(T); 960 Indent = Indents.pop_back_val(); 961 } 962 963 return true; 964} 965 966bool Scanner::rollIndent( int ToColumn 967 , Token::TokenKind Kind 968 , TokenQueueT::iterator InsertPoint) { 969 if (FlowLevel) 970 return true; 971 if (Indent < ToColumn) { 972 Indents.push_back(Indent); 973 Indent = ToColumn; 974 975 Token T; 976 T.Kind = Kind; 977 T.Range = StringRef(Current, 0); 978 TokenQueue.insert(InsertPoint, T); 979 } 980 return true; 981} 982 983void Scanner::scanToNextToken() { 984 while (true) { 985 while (*Current == ' ' || *Current == '\t') { 986 skip(1); 987 } 988 989 // Skip comment. 990 if (*Current == '#') { 991 while (true) { 992 // This may skip more than one byte, thus Column is only incremented 993 // for code points. 994 StringRef::iterator i = skip_nb_char(Current); 995 if (i == Current) 996 break; 997 Current = i; 998 ++Column; 999 } 1000 } 1001 1002 // Skip EOL. 1003 StringRef::iterator i = skip_b_break(Current); 1004 if (i == Current) 1005 break; 1006 Current = i; 1007 ++Line; 1008 Column = 0; 1009 // New lines may start a simple key. 1010 if (!FlowLevel) 1011 IsSimpleKeyAllowed = true; 1012 } 1013} 1014 1015bool Scanner::scanStreamStart() { 1016 IsStartOfStream = false; 1017 1018 EncodingInfo EI = getUnicodeEncoding(currentInput()); 1019 1020 Token T; 1021 T.Kind = Token::TK_StreamStart; 1022 T.Range = StringRef(Current, EI.second); 1023 TokenQueue.push_back(T); 1024 Current += EI.second; 1025 return true; 1026} 1027 1028bool Scanner::scanStreamEnd() { 1029 // Force an ending new line if one isn't present. 1030 if (Column != 0) { 1031 Column = 0; 1032 ++Line; 1033 } 1034 1035 unrollIndent(-1); 1036 SimpleKeys.clear(); 1037 IsSimpleKeyAllowed = false; 1038 1039 Token T; 1040 T.Kind = Token::TK_StreamEnd; 1041 T.Range = StringRef(Current, 0); 1042 TokenQueue.push_back(T); 1043 return true; 1044} 1045 1046bool Scanner::scanDirective() { 1047 // Reset the indentation level. 1048 unrollIndent(-1); 1049 SimpleKeys.clear(); 1050 IsSimpleKeyAllowed = false; 1051 1052 StringRef::iterator Start = Current; 1053 consume('%'); 1054 StringRef::iterator NameStart = Current; 1055 Current = skip_while(&Scanner::skip_ns_char, Current); 1056 StringRef Name(NameStart, Current - NameStart); 1057 Current = skip_while(&Scanner::skip_s_white, Current); 1058 1059 if (Name == "YAML") { 1060 Current = skip_while(&Scanner::skip_ns_char, Current); 1061 Token T; 1062 T.Kind = Token::TK_VersionDirective; 1063 T.Range = StringRef(Start, Current - Start); 1064 TokenQueue.push_back(T); 1065 return true; 1066 } 1067 return false; 1068} 1069 1070bool Scanner::scanDocumentIndicator(bool IsStart) { 1071 unrollIndent(-1); 1072 SimpleKeys.clear(); 1073 IsSimpleKeyAllowed = false; 1074 1075 Token T; 1076 T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd; 1077 T.Range = StringRef(Current, 3); 1078 skip(3); 1079 TokenQueue.push_back(T); 1080 return true; 1081} 1082 1083bool Scanner::scanFlowCollectionStart(bool IsSequence) { 1084 Token T; 1085 T.Kind = IsSequence ? Token::TK_FlowSequenceStart 1086 : Token::TK_FlowMappingStart; 1087 T.Range = StringRef(Current, 1); 1088 skip(1); 1089 TokenQueue.push_back(T); 1090 1091 // [ and { may begin a simple key. 1092 saveSimpleKeyCandidate(TokenQueue.back(), Column - 1, false); 1093 1094 // And may also be followed by a simple key. 1095 IsSimpleKeyAllowed = true; 1096 ++FlowLevel; 1097 return true; 1098} 1099 1100bool Scanner::scanFlowCollectionEnd(bool IsSequence) { 1101 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); 1102 IsSimpleKeyAllowed = false; 1103 Token T; 1104 T.Kind = IsSequence ? Token::TK_FlowSequenceEnd 1105 : Token::TK_FlowMappingEnd; 1106 T.Range = StringRef(Current, 1); 1107 skip(1); 1108 TokenQueue.push_back(T); 1109 if (FlowLevel) 1110 --FlowLevel; 1111 return true; 1112} 1113 1114bool Scanner::scanFlowEntry() { 1115 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); 1116 IsSimpleKeyAllowed = true; 1117 Token T; 1118 T.Kind = Token::TK_FlowEntry; 1119 T.Range = StringRef(Current, 1); 1120 skip(1); 1121 TokenQueue.push_back(T); 1122 return true; 1123} 1124 1125bool Scanner::scanBlockEntry() { 1126 rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end()); 1127 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); 1128 IsSimpleKeyAllowed = true; 1129 Token T; 1130 T.Kind = Token::TK_BlockEntry; 1131 T.Range = StringRef(Current, 1); 1132 skip(1); 1133 TokenQueue.push_back(T); 1134 return true; 1135} 1136 1137bool Scanner::scanKey() { 1138 if (!FlowLevel) 1139 rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end()); 1140 1141 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); 1142 IsSimpleKeyAllowed = !FlowLevel; 1143 1144 Token T; 1145 T.Kind = Token::TK_Key; 1146 T.Range = StringRef(Current, 1); 1147 skip(1); 1148 TokenQueue.push_back(T); 1149 return true; 1150} 1151 1152bool Scanner::scanValue() { 1153 // If the previous token could have been a simple key, insert the key token 1154 // into the token queue. 1155 if (!SimpleKeys.empty()) { 1156 SimpleKey SK = SimpleKeys.pop_back_val(); 1157 Token T; 1158 T.Kind = Token::TK_Key; 1159 T.Range = SK.Tok->Range; 1160 TokenQueueT::iterator i, e; 1161 for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) { 1162 if (i == SK.Tok) 1163 break; 1164 } 1165 assert(i != e && "SimpleKey not in token queue!"); 1166 i = TokenQueue.insert(i, T); 1167 1168 // We may also need to add a Block-Mapping-Start token. 1169 rollIndent(SK.Column, Token::TK_BlockMappingStart, i); 1170 1171 IsSimpleKeyAllowed = false; 1172 } else { 1173 if (!FlowLevel) 1174 rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end()); 1175 IsSimpleKeyAllowed = !FlowLevel; 1176 } 1177 1178 Token T; 1179 T.Kind = Token::TK_Value; 1180 T.Range = StringRef(Current, 1); 1181 skip(1); 1182 TokenQueue.push_back(T); 1183 return true; 1184} 1185 1186// Forbidding inlining improves performance by roughly 20%. 1187// FIXME: Remove once llvm optimizes this to the faster version without hints. 1188LLVM_ATTRIBUTE_NOINLINE static bool 1189wasEscaped(StringRef::iterator First, StringRef::iterator Position); 1190 1191// Returns whether a character at 'Position' was escaped with a leading '\'. 1192// 'First' specifies the position of the first character in the string. 1193static bool wasEscaped(StringRef::iterator First, 1194 StringRef::iterator Position) { 1195 assert(Position - 1 >= First); 1196 StringRef::iterator I = Position - 1; 1197 // We calculate the number of consecutive '\'s before the current position 1198 // by iterating backwards through our string. 1199 while (I >= First && *I == '\\') --I; 1200 // (Position - 1 - I) now contains the number of '\'s before the current 1201 // position. If it is odd, the character at 'Position' was escaped. 1202 return (Position - 1 - I) % 2 == 1; 1203} 1204 1205bool Scanner::scanFlowScalar(bool IsDoubleQuoted) { 1206 StringRef::iterator Start = Current; 1207 unsigned ColStart = Column; 1208 if (IsDoubleQuoted) { 1209 do { 1210 ++Current; 1211 while (Current != End && *Current != '"') 1212 ++Current; 1213 // Repeat until the previous character was not a '\' or was an escaped 1214 // backslash. 1215 } while ( Current != End 1216 && *(Current - 1) == '\\' 1217 && wasEscaped(Start + 1, Current)); 1218 } else { 1219 skip(1); 1220 while (true) { 1221 // Skip a ' followed by another '. 1222 if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') { 1223 skip(2); 1224 continue; 1225 } else if (*Current == '\'') 1226 break; 1227 StringRef::iterator i = skip_nb_char(Current); 1228 if (i == Current) { 1229 i = skip_b_break(Current); 1230 if (i == Current) 1231 break; 1232 Current = i; 1233 Column = 0; 1234 ++Line; 1235 } else { 1236 if (i == End) 1237 break; 1238 Current = i; 1239 ++Column; 1240 } 1241 } 1242 } 1243 1244 if (Current == End) { 1245 setError("Expected quote at end of scalar", Current); 1246 return false; 1247 } 1248 1249 skip(1); // Skip ending quote. 1250 Token T; 1251 T.Kind = Token::TK_Scalar; 1252 T.Range = StringRef(Start, Current - Start); 1253 TokenQueue.push_back(T); 1254 1255 saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false); 1256 1257 IsSimpleKeyAllowed = false; 1258 1259 return true; 1260} 1261 1262bool Scanner::scanPlainScalar() { 1263 StringRef::iterator Start = Current; 1264 unsigned ColStart = Column; 1265 unsigned LeadingBlanks = 0; 1266 assert(Indent >= -1 && "Indent must be >= -1 !"); 1267 unsigned indent = static_cast<unsigned>(Indent + 1); 1268 while (true) { 1269 if (*Current == '#') 1270 break; 1271 1272 while (!isBlankOrBreak(Current)) { 1273 if ( FlowLevel && *Current == ':' 1274 && !(isBlankOrBreak(Current + 1) || *(Current + 1) == ',')) { 1275 setError("Found unexpected ':' while scanning a plain scalar", Current); 1276 return false; 1277 } 1278 1279 // Check for the end of the plain scalar. 1280 if ( (*Current == ':' && isBlankOrBreak(Current + 1)) 1281 || ( FlowLevel 1282 && (StringRef(Current, 1).find_first_of(",:?[]{}") 1283 != StringRef::npos))) 1284 break; 1285 1286 StringRef::iterator i = skip_nb_char(Current); 1287 if (i == Current) 1288 break; 1289 Current = i; 1290 ++Column; 1291 } 1292 1293 // Are we at the end? 1294 if (!isBlankOrBreak(Current)) 1295 break; 1296 1297 // Eat blanks. 1298 StringRef::iterator Tmp = Current; 1299 while (isBlankOrBreak(Tmp)) { 1300 StringRef::iterator i = skip_s_white(Tmp); 1301 if (i != Tmp) { 1302 if (LeadingBlanks && (Column < indent) && *Tmp == '\t') { 1303 setError("Found invalid tab character in indentation", Tmp); 1304 return false; 1305 } 1306 Tmp = i; 1307 ++Column; 1308 } else { 1309 i = skip_b_break(Tmp); 1310 if (!LeadingBlanks) 1311 LeadingBlanks = 1; 1312 Tmp = i; 1313 Column = 0; 1314 ++Line; 1315 } 1316 } 1317 1318 if (!FlowLevel && Column < indent) 1319 break; 1320 1321 Current = Tmp; 1322 } 1323 if (Start == Current) { 1324 setError("Got empty plain scalar", Start); 1325 return false; 1326 } 1327 Token T; 1328 T.Kind = Token::TK_Scalar; 1329 T.Range = StringRef(Start, Current - Start); 1330 TokenQueue.push_back(T); 1331 1332 // Plain scalars can be simple keys. 1333 saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false); 1334 1335 IsSimpleKeyAllowed = false; 1336 1337 return true; 1338} 1339 1340bool Scanner::scanAliasOrAnchor(bool IsAlias) { 1341 StringRef::iterator Start = Current; 1342 unsigned ColStart = Column; 1343 skip(1); 1344 while(true) { 1345 if ( *Current == '[' || *Current == ']' 1346 || *Current == '{' || *Current == '}' 1347 || *Current == ',' 1348 || *Current == ':') 1349 break; 1350 StringRef::iterator i = skip_ns_char(Current); 1351 if (i == Current) 1352 break; 1353 Current = i; 1354 ++Column; 1355 } 1356 1357 if (Start == Current) { 1358 setError("Got empty alias or anchor", Start); 1359 return false; 1360 } 1361 1362 Token T; 1363 T.Kind = IsAlias ? Token::TK_Alias : Token::TK_Anchor; 1364 T.Range = StringRef(Start, Current - Start); 1365 TokenQueue.push_back(T); 1366 1367 // Alias and anchors can be simple keys. 1368 saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false); 1369 1370 IsSimpleKeyAllowed = false; 1371 1372 return true; 1373} 1374 1375bool Scanner::scanBlockScalar(bool IsLiteral) { 1376 StringRef::iterator Start = Current; 1377 skip(1); // Eat | or > 1378 while(true) { 1379 StringRef::iterator i = skip_nb_char(Current); 1380 if (i == Current) { 1381 if (Column == 0) 1382 break; 1383 i = skip_b_break(Current); 1384 if (i != Current) { 1385 // We got a line break. 1386 Column = 0; 1387 ++Line; 1388 Current = i; 1389 continue; 1390 } else { 1391 // There was an error, which should already have been printed out. 1392 return false; 1393 } 1394 } 1395 Current = i; 1396 ++Column; 1397 } 1398 1399 if (Start == Current) { 1400 setError("Got empty block scalar", Start); 1401 return false; 1402 } 1403 1404 Token T; 1405 T.Kind = Token::TK_Scalar; 1406 T.Range = StringRef(Start, Current - Start); 1407 TokenQueue.push_back(T); 1408 return true; 1409} 1410 1411bool Scanner::scanTag() { 1412 StringRef::iterator Start = Current; 1413 unsigned ColStart = Column; 1414 skip(1); // Eat !. 1415 if (Current == End || isBlankOrBreak(Current)); // An empty tag. 1416 else if (*Current == '<') { 1417 skip(1); 1418 scan_ns_uri_char(); 1419 if (!consume('>')) 1420 return false; 1421 } else { 1422 // FIXME: Actually parse the c-ns-shorthand-tag rule. 1423 Current = skip_while(&Scanner::skip_ns_char, Current); 1424 } 1425 1426 Token T; 1427 T.Kind = Token::TK_Tag; 1428 T.Range = StringRef(Start, Current - Start); 1429 TokenQueue.push_back(T); 1430 1431 // Tags can be simple keys. 1432 saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false); 1433 1434 IsSimpleKeyAllowed = false; 1435 1436 return true; 1437} 1438 1439bool Scanner::fetchMoreTokens() { 1440 if (IsStartOfStream) 1441 return scanStreamStart(); 1442 1443 scanToNextToken(); 1444 1445 if (Current == End) 1446 return scanStreamEnd(); 1447 1448 removeStaleSimpleKeyCandidates(); 1449 1450 unrollIndent(Column); 1451 1452 if (Column == 0 && *Current == '%') 1453 return scanDirective(); 1454 1455 if (Column == 0 && Current + 4 <= End 1456 && *Current == '-' 1457 && *(Current + 1) == '-' 1458 && *(Current + 2) == '-' 1459 && (Current + 3 == End || isBlankOrBreak(Current + 3))) 1460 return scanDocumentIndicator(true); 1461 1462 if (Column == 0 && Current + 4 <= End 1463 && *Current == '.' 1464 && *(Current + 1) == '.' 1465 && *(Current + 2) == '.' 1466 && (Current + 3 == End || isBlankOrBreak(Current + 3))) 1467 return scanDocumentIndicator(false); 1468 1469 if (*Current == '[') 1470 return scanFlowCollectionStart(true); 1471 1472 if (*Current == '{') 1473 return scanFlowCollectionStart(false); 1474 1475 if (*Current == ']') 1476 return scanFlowCollectionEnd(true); 1477 1478 if (*Current == '}') 1479 return scanFlowCollectionEnd(false); 1480 1481 if (*Current == ',') 1482 return scanFlowEntry(); 1483 1484 if (*Current == '-' && isBlankOrBreak(Current + 1)) 1485 return scanBlockEntry(); 1486 1487 if (*Current == '?' && (FlowLevel || isBlankOrBreak(Current + 1))) 1488 return scanKey(); 1489 1490 if (*Current == ':' && (FlowLevel || isBlankOrBreak(Current + 1))) 1491 return scanValue(); 1492 1493 if (*Current == '*') 1494 return scanAliasOrAnchor(true); 1495 1496 if (*Current == '&') 1497 return scanAliasOrAnchor(false); 1498 1499 if (*Current == '!') 1500 return scanTag(); 1501 1502 if (*Current == '|' && !FlowLevel) 1503 return scanBlockScalar(true); 1504 1505 if (*Current == '>' && !FlowLevel) 1506 return scanBlockScalar(false); 1507 1508 if (*Current == '\'') 1509 return scanFlowScalar(false); 1510 1511 if (*Current == '"') 1512 return scanFlowScalar(true); 1513 1514 // Get a plain scalar. 1515 StringRef FirstChar(Current, 1); 1516 if (!(isBlankOrBreak(Current) 1517 || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos) 1518 || (*Current == '-' && !isBlankOrBreak(Current + 1)) 1519 || (!FlowLevel && (*Current == '?' || *Current == ':') 1520 && isBlankOrBreak(Current + 1)) 1521 || (!FlowLevel && *Current == ':' 1522 && Current + 2 < End 1523 && *(Current + 1) == ':' 1524 && !isBlankOrBreak(Current + 2))) 1525 return scanPlainScalar(); 1526 1527 setError("Unrecognized character while tokenizing."); 1528 return false; 1529} 1530 1531Stream::Stream(StringRef Input, SourceMgr &SM) 1532 : scanner(new Scanner(Input, SM)) 1533 , CurrentDoc(0) {} 1534 1535Stream::~Stream() {} 1536 1537bool Stream::failed() { return scanner->failed(); } 1538 1539void Stream::printError(Node *N, const Twine &Msg) { 1540 SmallVector<SMRange, 1> Ranges; 1541 Ranges.push_back(N->getSourceRange()); 1542 scanner->printError( N->getSourceRange().Start 1543 , SourceMgr::DK_Error 1544 , Msg 1545 , Ranges); 1546} 1547 1548void Stream::handleYAMLDirective(const Token &t) { 1549 // TODO: Ensure version is 1.x. 1550} 1551 1552document_iterator Stream::begin() { 1553 if (CurrentDoc) 1554 report_fatal_error("Can only iterate over the stream once"); 1555 1556 // Skip Stream-Start. 1557 scanner->getNext(); 1558 1559 CurrentDoc.reset(new Document(*this)); 1560 return document_iterator(CurrentDoc); 1561} 1562 1563document_iterator Stream::end() { 1564 return document_iterator(); 1565} 1566 1567void Stream::skip() { 1568 for (document_iterator i = begin(), e = end(); i != e; ++i) 1569 i->skip(); 1570} 1571 1572Node::Node(unsigned int Type, OwningPtr<Document> &D, StringRef A) 1573 : Doc(D) 1574 , TypeID(Type) 1575 , Anchor(A) { 1576 SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin()); 1577 SourceRange = SMRange(Start, Start); 1578} 1579 1580Token &Node::peekNext() { 1581 return Doc->peekNext(); 1582} 1583 1584Token Node::getNext() { 1585 return Doc->getNext(); 1586} 1587 1588Node *Node::parseBlockNode() { 1589 return Doc->parseBlockNode(); 1590} 1591 1592BumpPtrAllocator &Node::getAllocator() { 1593 return Doc->NodeAllocator; 1594} 1595 1596void Node::setError(const Twine &Msg, Token &Tok) const { 1597 Doc->setError(Msg, Tok); 1598} 1599 1600bool Node::failed() const { 1601 return Doc->failed(); 1602} 1603 1604 1605 1606StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const { 1607 // TODO: Handle newlines properly. We need to remove leading whitespace. 1608 if (Value[0] == '"') { // Double quoted. 1609 // Pull off the leading and trailing "s. 1610 StringRef UnquotedValue = Value.substr(1, Value.size() - 2); 1611 // Search for characters that would require unescaping the value. 1612 StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n"); 1613 if (i != StringRef::npos) 1614 return unescapeDoubleQuoted(UnquotedValue, i, Storage); 1615 return UnquotedValue; 1616 } else if (Value[0] == '\'') { // Single quoted. 1617 // Pull off the leading and trailing 's. 1618 StringRef UnquotedValue = Value.substr(1, Value.size() - 2); 1619 StringRef::size_type i = UnquotedValue.find('\''); 1620 if (i != StringRef::npos) { 1621 // We're going to need Storage. 1622 Storage.clear(); 1623 Storage.reserve(UnquotedValue.size()); 1624 for (; i != StringRef::npos; i = UnquotedValue.find('\'')) { 1625 StringRef Valid(UnquotedValue.begin(), i); 1626 Storage.insert(Storage.end(), Valid.begin(), Valid.end()); 1627 Storage.push_back('\''); 1628 UnquotedValue = UnquotedValue.substr(i + 2); 1629 } 1630 Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end()); 1631 return StringRef(Storage.begin(), Storage.size()); 1632 } 1633 return UnquotedValue; 1634 } 1635 // Plain or block. 1636 return Value.rtrim(" "); 1637} 1638 1639StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue 1640 , StringRef::size_type i 1641 , SmallVectorImpl<char> &Storage) 1642 const { 1643 // Use Storage to build proper value. 1644 Storage.clear(); 1645 Storage.reserve(UnquotedValue.size()); 1646 for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")) { 1647 // Insert all previous chars into Storage. 1648 StringRef Valid(UnquotedValue.begin(), i); 1649 Storage.insert(Storage.end(), Valid.begin(), Valid.end()); 1650 // Chop off inserted chars. 1651 UnquotedValue = UnquotedValue.substr(i); 1652 1653 assert(!UnquotedValue.empty() && "Can't be empty!"); 1654 1655 // Parse escape or line break. 1656 switch (UnquotedValue[0]) { 1657 case '\r': 1658 case '\n': 1659 Storage.push_back('\n'); 1660 if ( UnquotedValue.size() > 1 1661 && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n')) 1662 UnquotedValue = UnquotedValue.substr(1); 1663 UnquotedValue = UnquotedValue.substr(1); 1664 break; 1665 default: 1666 if (UnquotedValue.size() == 1) 1667 // TODO: Report error. 1668 break; 1669 UnquotedValue = UnquotedValue.substr(1); 1670 switch (UnquotedValue[0]) { 1671 default: { 1672 Token T; 1673 T.Range = StringRef(UnquotedValue.begin(), 1); 1674 setError("Unrecognized escape code!", T); 1675 return ""; 1676 } 1677 case '\r': 1678 case '\n': 1679 // Remove the new line. 1680 if ( UnquotedValue.size() > 1 1681 && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n')) 1682 UnquotedValue = UnquotedValue.substr(1); 1683 // If this was just a single byte newline, it will get skipped 1684 // below. 1685 break; 1686 case '0': 1687 Storage.push_back(0x00); 1688 break; 1689 case 'a': 1690 Storage.push_back(0x07); 1691 break; 1692 case 'b': 1693 Storage.push_back(0x08); 1694 break; 1695 case 't': 1696 case 0x09: 1697 Storage.push_back(0x09); 1698 break; 1699 case 'n': 1700 Storage.push_back(0x0A); 1701 break; 1702 case 'v': 1703 Storage.push_back(0x0B); 1704 break; 1705 case 'f': 1706 Storage.push_back(0x0C); 1707 break; 1708 case 'r': 1709 Storage.push_back(0x0D); 1710 break; 1711 case 'e': 1712 Storage.push_back(0x1B); 1713 break; 1714 case ' ': 1715 Storage.push_back(0x20); 1716 break; 1717 case '"': 1718 Storage.push_back(0x22); 1719 break; 1720 case '/': 1721 Storage.push_back(0x2F); 1722 break; 1723 case '\\': 1724 Storage.push_back(0x5C); 1725 break; 1726 case 'N': 1727 encodeUTF8(0x85, Storage); 1728 break; 1729 case '_': 1730 encodeUTF8(0xA0, Storage); 1731 break; 1732 case 'L': 1733 encodeUTF8(0x2028, Storage); 1734 break; 1735 case 'P': 1736 encodeUTF8(0x2029, Storage); 1737 break; 1738 case 'x': { 1739 if (UnquotedValue.size() < 3) 1740 // TODO: Report error. 1741 break; 1742 unsigned int UnicodeScalarValue; 1743 if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue)) 1744 // TODO: Report error. 1745 UnicodeScalarValue = 0xFFFD; 1746 encodeUTF8(UnicodeScalarValue, Storage); 1747 UnquotedValue = UnquotedValue.substr(2); 1748 break; 1749 } 1750 case 'u': { 1751 if (UnquotedValue.size() < 5) 1752 // TODO: Report error. 1753 break; 1754 unsigned int UnicodeScalarValue; 1755 if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue)) 1756 // TODO: Report error. 1757 UnicodeScalarValue = 0xFFFD; 1758 encodeUTF8(UnicodeScalarValue, Storage); 1759 UnquotedValue = UnquotedValue.substr(4); 1760 break; 1761 } 1762 case 'U': { 1763 if (UnquotedValue.size() < 9) 1764 // TODO: Report error. 1765 break; 1766 unsigned int UnicodeScalarValue; 1767 if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue)) 1768 // TODO: Report error. 1769 UnicodeScalarValue = 0xFFFD; 1770 encodeUTF8(UnicodeScalarValue, Storage); 1771 UnquotedValue = UnquotedValue.substr(8); 1772 break; 1773 } 1774 } 1775 UnquotedValue = UnquotedValue.substr(1); 1776 } 1777 } 1778 Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end()); 1779 return StringRef(Storage.begin(), Storage.size()); 1780} 1781 1782Node *KeyValueNode::getKey() { 1783 if (Key) 1784 return Key; 1785 // Handle implicit null keys. 1786 { 1787 Token &t = peekNext(); 1788 if ( t.Kind == Token::TK_BlockEnd 1789 || t.Kind == Token::TK_Value 1790 || t.Kind == Token::TK_Error) { 1791 return Key = new (getAllocator()) NullNode(Doc); 1792 } 1793 if (t.Kind == Token::TK_Key) 1794 getNext(); // skip TK_Key. 1795 } 1796 1797 // Handle explicit null keys. 1798 Token &t = peekNext(); 1799 if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Value) { 1800 return Key = new (getAllocator()) NullNode(Doc); 1801 } 1802 1803 // We've got a normal key. 1804 return Key = parseBlockNode(); 1805} 1806 1807Node *KeyValueNode::getValue() { 1808 if (Value) 1809 return Value; 1810 getKey()->skip(); 1811 if (failed()) 1812 return Value = new (getAllocator()) NullNode(Doc); 1813 1814 // Handle implicit null values. 1815 { 1816 Token &t = peekNext(); 1817 if ( t.Kind == Token::TK_BlockEnd 1818 || t.Kind == Token::TK_FlowMappingEnd 1819 || t.Kind == Token::TK_Key 1820 || t.Kind == Token::TK_FlowEntry 1821 || t.Kind == Token::TK_Error) { 1822 return Value = new (getAllocator()) NullNode(Doc); 1823 } 1824 1825 if (t.Kind != Token::TK_Value) { 1826 setError("Unexpected token in Key Value.", t); 1827 return Value = new (getAllocator()) NullNode(Doc); 1828 } 1829 getNext(); // skip TK_Value. 1830 } 1831 1832 // Handle explicit null values. 1833 Token &t = peekNext(); 1834 if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Key) { 1835 return Value = new (getAllocator()) NullNode(Doc); 1836 } 1837 1838 // We got a normal value. 1839 return Value = parseBlockNode(); 1840} 1841 1842void MappingNode::increment() { 1843 if (failed()) { 1844 IsAtEnd = true; 1845 CurrentEntry = 0; 1846 return; 1847 } 1848 if (CurrentEntry) { 1849 CurrentEntry->skip(); 1850 if (Type == MT_Inline) { 1851 IsAtEnd = true; 1852 CurrentEntry = 0; 1853 return; 1854 } 1855 } 1856 Token T = peekNext(); 1857 if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar) { 1858 // KeyValueNode eats the TK_Key. That way it can detect null keys. 1859 CurrentEntry = new (getAllocator()) KeyValueNode(Doc); 1860 } else if (Type == MT_Block) { 1861 switch (T.Kind) { 1862 case Token::TK_BlockEnd: 1863 getNext(); 1864 IsAtEnd = true; 1865 CurrentEntry = 0; 1866 break; 1867 default: 1868 setError("Unexpected token. Expected Key or Block End", T); 1869 case Token::TK_Error: 1870 IsAtEnd = true; 1871 CurrentEntry = 0; 1872 } 1873 } else { 1874 switch (T.Kind) { 1875 case Token::TK_FlowEntry: 1876 // Eat the flow entry and recurse. 1877 getNext(); 1878 return increment(); 1879 case Token::TK_FlowMappingEnd: 1880 getNext(); 1881 case Token::TK_Error: 1882 // Set this to end iterator. 1883 IsAtEnd = true; 1884 CurrentEntry = 0; 1885 break; 1886 default: 1887 setError( "Unexpected token. Expected Key, Flow Entry, or Flow " 1888 "Mapping End." 1889 , T); 1890 IsAtEnd = true; 1891 CurrentEntry = 0; 1892 } 1893 } 1894} 1895 1896void SequenceNode::increment() { 1897 if (failed()) { 1898 IsAtEnd = true; 1899 CurrentEntry = 0; 1900 return; 1901 } 1902 if (CurrentEntry) 1903 CurrentEntry->skip(); 1904 Token T = peekNext(); 1905 if (SeqType == ST_Block) { 1906 switch (T.Kind) { 1907 case Token::TK_BlockEntry: 1908 getNext(); 1909 CurrentEntry = parseBlockNode(); 1910 if (CurrentEntry == 0) { // An error occurred. 1911 IsAtEnd = true; 1912 CurrentEntry = 0; 1913 } 1914 break; 1915 case Token::TK_BlockEnd: 1916 getNext(); 1917 IsAtEnd = true; 1918 CurrentEntry = 0; 1919 break; 1920 default: 1921 setError( "Unexpected token. Expected Block Entry or Block End." 1922 , T); 1923 case Token::TK_Error: 1924 IsAtEnd = true; 1925 CurrentEntry = 0; 1926 } 1927 } else if (SeqType == ST_Indentless) { 1928 switch (T.Kind) { 1929 case Token::TK_BlockEntry: 1930 getNext(); 1931 CurrentEntry = parseBlockNode(); 1932 if (CurrentEntry == 0) { // An error occurred. 1933 IsAtEnd = true; 1934 CurrentEntry = 0; 1935 } 1936 break; 1937 default: 1938 case Token::TK_Error: 1939 IsAtEnd = true; 1940 CurrentEntry = 0; 1941 } 1942 } else if (SeqType == ST_Flow) { 1943 switch (T.Kind) { 1944 case Token::TK_FlowEntry: 1945 // Eat the flow entry and recurse. 1946 getNext(); 1947 WasPreviousTokenFlowEntry = true; 1948 return increment(); 1949 case Token::TK_FlowSequenceEnd: 1950 getNext(); 1951 case Token::TK_Error: 1952 // Set this to end iterator. 1953 IsAtEnd = true; 1954 CurrentEntry = 0; 1955 break; 1956 case Token::TK_StreamEnd: 1957 case Token::TK_DocumentEnd: 1958 case Token::TK_DocumentStart: 1959 setError("Could not find closing ]!", T); 1960 // Set this to end iterator. 1961 IsAtEnd = true; 1962 CurrentEntry = 0; 1963 break; 1964 default: 1965 if (!WasPreviousTokenFlowEntry) { 1966 setError("Expected , between entries!", T); 1967 IsAtEnd = true; 1968 CurrentEntry = 0; 1969 break; 1970 } 1971 // Otherwise it must be a flow entry. 1972 CurrentEntry = parseBlockNode(); 1973 if (!CurrentEntry) { 1974 IsAtEnd = true; 1975 } 1976 WasPreviousTokenFlowEntry = false; 1977 break; 1978 } 1979 } 1980} 1981 1982Document::Document(Stream &S) : stream(S), Root(0) { 1983 if (parseDirectives()) 1984 expectToken(Token::TK_DocumentStart); 1985 Token &T = peekNext(); 1986 if (T.Kind == Token::TK_DocumentStart) 1987 getNext(); 1988} 1989 1990bool Document::skip() { 1991 if (stream.scanner->failed()) 1992 return false; 1993 if (!Root) 1994 getRoot(); 1995 Root->skip(); 1996 Token &T = peekNext(); 1997 if (T.Kind == Token::TK_StreamEnd) 1998 return false; 1999 if (T.Kind == Token::TK_DocumentEnd) { 2000 getNext(); 2001 return skip(); 2002 } 2003 return true; 2004} 2005 2006Token &Document::peekNext() { 2007 return stream.scanner->peekNext(); 2008} 2009 2010Token Document::getNext() { 2011 return stream.scanner->getNext(); 2012} 2013 2014void Document::setError(const Twine &Message, Token &Location) const { 2015 stream.scanner->setError(Message, Location.Range.begin()); 2016} 2017 2018bool Document::failed() const { 2019 return stream.scanner->failed(); 2020} 2021 2022Node *Document::parseBlockNode() { 2023 Token T = peekNext(); 2024 // Handle properties. 2025 Token AnchorInfo; 2026parse_property: 2027 switch (T.Kind) { 2028 case Token::TK_Alias: 2029 getNext(); 2030 return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1)); 2031 case Token::TK_Anchor: 2032 if (AnchorInfo.Kind == Token::TK_Anchor) { 2033 setError("Already encountered an anchor for this node!", T); 2034 return 0; 2035 } 2036 AnchorInfo = getNext(); // Consume TK_Anchor. 2037 T = peekNext(); 2038 goto parse_property; 2039 case Token::TK_Tag: 2040 getNext(); // Skip TK_Tag. 2041 T = peekNext(); 2042 goto parse_property; 2043 default: 2044 break; 2045 } 2046 2047 switch (T.Kind) { 2048 case Token::TK_BlockEntry: 2049 // We got an unindented BlockEntry sequence. This is not terminated with 2050 // a BlockEnd. 2051 // Don't eat the TK_BlockEntry, SequenceNode needs it. 2052 return new (NodeAllocator) SequenceNode( stream.CurrentDoc 2053 , AnchorInfo.Range.substr(1) 2054 , SequenceNode::ST_Indentless); 2055 case Token::TK_BlockSequenceStart: 2056 getNext(); 2057 return new (NodeAllocator) 2058 SequenceNode( stream.CurrentDoc 2059 , AnchorInfo.Range.substr(1) 2060 , SequenceNode::ST_Block); 2061 case Token::TK_BlockMappingStart: 2062 getNext(); 2063 return new (NodeAllocator) 2064 MappingNode( stream.CurrentDoc 2065 , AnchorInfo.Range.substr(1) 2066 , MappingNode::MT_Block); 2067 case Token::TK_FlowSequenceStart: 2068 getNext(); 2069 return new (NodeAllocator) 2070 SequenceNode( stream.CurrentDoc 2071 , AnchorInfo.Range.substr(1) 2072 , SequenceNode::ST_Flow); 2073 case Token::TK_FlowMappingStart: 2074 getNext(); 2075 return new (NodeAllocator) 2076 MappingNode( stream.CurrentDoc 2077 , AnchorInfo.Range.substr(1) 2078 , MappingNode::MT_Flow); 2079 case Token::TK_Scalar: 2080 getNext(); 2081 return new (NodeAllocator) 2082 ScalarNode( stream.CurrentDoc 2083 , AnchorInfo.Range.substr(1) 2084 , T.Range); 2085 case Token::TK_Key: 2086 // Don't eat the TK_Key, KeyValueNode expects it. 2087 return new (NodeAllocator) 2088 MappingNode( stream.CurrentDoc 2089 , AnchorInfo.Range.substr(1) 2090 , MappingNode::MT_Inline); 2091 case Token::TK_DocumentStart: 2092 case Token::TK_DocumentEnd: 2093 case Token::TK_StreamEnd: 2094 default: 2095 // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not 2096 // !!null null. 2097 return new (NodeAllocator) NullNode(stream.CurrentDoc); 2098 case Token::TK_Error: 2099 return 0; 2100 } 2101 llvm_unreachable("Control flow shouldn't reach here."); 2102 return 0; 2103} 2104 2105bool Document::parseDirectives() { 2106 bool isDirective = false; 2107 while (true) { 2108 Token T = peekNext(); 2109 if (T.Kind == Token::TK_TagDirective) { 2110 handleTagDirective(getNext()); 2111 isDirective = true; 2112 } else if (T.Kind == Token::TK_VersionDirective) { 2113 stream.handleYAMLDirective(getNext()); 2114 isDirective = true; 2115 } else 2116 break; 2117 } 2118 return isDirective; 2119} 2120 2121bool Document::expectToken(int TK) { 2122 Token T = getNext(); 2123 if (T.Kind != TK) { 2124 setError("Unexpected token", T); 2125 return false; 2126 } 2127 return true; 2128} 2129