1// Copyright 2017 The Fuchsia Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "fidl/lexer.h" 6 7#include <ctype.h> 8 9namespace fidl { 10 11namespace { 12 13bool IsIdentifierBody(char c) { 14 return isalnum(c) || c == '_'; 15} 16 17// IsIdentifierValid disallows identifiers (escaped, and unescaped) from 18// starting or ending with underscore. 19bool IsIdentifierValid(StringView source_data) { 20 return source_data[0] != '_' && source_data[source_data.size() - 1] != '_'; 21} 22 23bool IsNumericLiteralBody(char c) { 24 switch (c) { 25 case '0': 26 case '1': 27 case '2': 28 case '3': 29 case '4': 30 case '5': 31 case '6': 32 case '7': 33 case '8': 34 case '9': 35 case 'a': 36 case 'A': 37 case 'b': 38 case 'B': 39 case 'c': 40 case 'C': 41 case 'd': 42 case 'D': 43 case 'e': 44 case 'E': 45 case 'f': 46 case 'F': 47 case 'x': 48 case 'X': 49 case '-': 50 case '_': 51 case '.': 52 return true; 53 default: 54 return false; 55 } 56} 57 58} // namespace 59 60constexpr char Lexer::Peek() const { 61 return *current_; 62} 63 64void Lexer::Skip() { 65 ++current_; 66 ++token_start_; 67} 68 69char Lexer::Consume() { 70 auto current = *current_; 71 ++current_; 72 ++token_size_; 73 return current; 74} 75 76StringView Lexer::Reset(Token::Kind kind) { 77 auto data = StringView(token_start_, token_size_); 78 if (kind != Token::Kind::kComment) { 79 previous_end_ = token_start_ + token_size_; 80 } 81 token_start_ = current_; 82 token_size_ = 0u; 83 return data; 84} 85 86Token Lexer::Finish(Token::Kind kind) { 87 StringView previous(previous_end_, token_start_ - previous_end_); 88 StringView current(token_start_, token_size_); 89 SourceLocation previous_location(previous, source_file_); 90 return Token(previous_location, 91 SourceLocation(Reset(kind), source_file_), kind); 92} 93 94Token Lexer::LexEndOfStream() { 95 return Finish(Token::Kind::kEndOfFile); 96} 97 98Token Lexer::LexNumericLiteral() { 99 while (IsNumericLiteralBody(Peek())) 100 Consume(); 101 return Finish(Token::Kind::kNumericLiteral); 102} 103 104Token Lexer::LexIdentifier() { 105 while (IsIdentifierBody(Peek())) 106 Consume(); 107 StringView previous(previous_end_, token_start_ - previous_end_); 108 SourceLocation previous_end(previous, source_file_); 109 StringView identifier_data = Reset(Token::Kind::kNotAToken); 110 if (!IsIdentifierValid(identifier_data)) 111 return Finish(Token::Kind::kNotAToken); 112 return identifier_table_->MakeIdentifier( 113 previous_end, identifier_data, source_file_, /* escaped */ false); 114} 115 116Token Lexer::LexEscapedIdentifier() { 117 // Reset() to drop the initial @ from the identifier. 118 Reset(Token::Kind::kComment); 119 120 while (IsIdentifierBody(Peek())) 121 Consume(); 122 StringView previous(previous_end_, token_start_ - previous_end_); 123 SourceLocation previous_end(previous, source_file_); 124 StringView identifier_data = Reset(Token::Kind::kNotAToken); 125 if (!IsIdentifierValid(identifier_data)) 126 return Finish(Token::Kind::kNotAToken); 127 return identifier_table_->MakeIdentifier( 128 previous_end, identifier_data, source_file_, /* escaped */ true); 129} 130 131Token Lexer::LexStringLiteral() { 132 auto last = Peek(); 133 134 // Lexing a "string literal" to the next matching delimiter. 135 for (;;) { 136 auto next = Consume(); 137 switch (next) { 138 case 0: 139 return Finish(Token::Kind::kNotAToken); 140 case '"': 141 // This escaping logic is incorrect for the input: "\\" 142 if (last != '\\') 143 return Finish(Token::Kind::kStringLiteral); 144 // Fall through. 145 default: 146 last = next; 147 } 148 } 149} 150 151Token Lexer::LexCommentOrDocComment() { 152 // Consume the second /. 153 assert(Peek() == '/'); 154 Consume(); 155 156 // Check if it's a Doc Comment 157 auto comment_type = Token::Kind::kComment; 158 if (Peek() == '/') { 159 comment_type = Token::Kind::kDocComment; 160 Consume(); 161 // Anything with more than 3 slashes is a likely a section 162 // break comment 163 if (Peek() == '/') { 164 comment_type = Token::Kind::kComment; 165 } 166 } 167 168 // Lexing a C++-style // comment. Go to the end of the line or 169 // file. 170 for (;;) { 171 switch (Peek()) { 172 case 0: 173 case '\n': 174 return Finish(comment_type); 175 default: 176 Consume(); 177 continue; 178 } 179 } 180} 181 182void Lexer::SkipWhitespace() { 183 for (;;) { 184 switch (Peek()) { 185 case ' ': 186 case '\n': 187 case '\r': 188 case '\t': 189 Skip(); 190 continue; 191 default: 192 return; 193 } 194 } 195} 196 197Token Lexer::LexNoComments() { 198 for (;;) { 199 auto token = Lex(); 200 if (token.kind() == Token::Kind::kComment) 201 continue; 202 return token; 203 } 204} 205 206Token Lexer::Lex() { 207 SkipWhitespace(); 208 209 switch (Consume()) { 210 case 0: 211 return LexEndOfStream(); 212 213 case ' ': 214 case '\n': 215 case '\r': 216 case '\t': 217 assert(false && "Should have been handled by SkipWhitespace!"); 218 219 case '-': 220 // Maybe the start of an arrow. 221 if (Peek() == '>') { 222 Consume(); 223 return Finish(Token::Kind::kArrow); 224 } 225 // Fallthrough 226 case '0': 227 case '1': 228 case '2': 229 case '3': 230 case '4': 231 case '5': 232 case '6': 233 case '7': 234 case '8': 235 case '9': 236 return LexNumericLiteral(); 237 238 case 'a': 239 case 'A': 240 case 'b': 241 case 'B': 242 case 'c': 243 case 'C': 244 case 'd': 245 case 'D': 246 case 'e': 247 case 'E': 248 case 'f': 249 case 'F': 250 case 'g': 251 case 'G': 252 case 'h': 253 case 'H': 254 case 'i': 255 case 'I': 256 case 'j': 257 case 'J': 258 case 'k': 259 case 'K': 260 case 'l': 261 case 'L': 262 case 'm': 263 case 'M': 264 case 'n': 265 case 'N': 266 case 'o': 267 case 'O': 268 case 'p': 269 case 'P': 270 case 'q': 271 case 'Q': 272 case 'r': 273 case 'R': 274 case 's': 275 case 'S': 276 case 't': 277 case 'T': 278 case 'u': 279 case 'U': 280 case 'v': 281 case 'V': 282 case 'w': 283 case 'W': 284 case 'x': 285 case 'X': 286 case 'y': 287 case 'Y': 288 case 'z': 289 case 'Z': 290 return LexIdentifier(); 291 292 case '@': 293 return LexEscapedIdentifier(); 294 295 case '"': 296 return LexStringLiteral(); 297 298 case '/': 299 // Maybe the start of a comment. 300 switch (Peek()) { 301 case '/': 302 return LexCommentOrDocComment(); 303 default: 304 return Finish(Token::Kind::kNotAToken); 305 } 306 307 case '(': 308 return Finish(Token::Kind::kLeftParen); 309 case ')': 310 return Finish(Token::Kind::kRightParen); 311 case '[': 312 return Finish(Token::Kind::kLeftSquare); 313 case ']': 314 return Finish(Token::Kind::kRightSquare); 315 case '{': 316 return Finish(Token::Kind::kLeftCurly); 317 case '}': 318 return Finish(Token::Kind::kRightCurly); 319 case '<': 320 return Finish(Token::Kind::kLeftAngle); 321 case '>': 322 return Finish(Token::Kind::kRightAngle); 323 324 case '.': 325 return Finish(Token::Kind::kDot); 326 case ',': 327 return Finish(Token::Kind::kComma); 328 case ';': 329 return Finish(Token::Kind::kSemicolon); 330 case ':': 331 return Finish(Token::Kind::kColon); 332 case '?': 333 return Finish(Token::Kind::kQuestion); 334 case '=': 335 return Finish(Token::Kind::kEqual); 336 case '&': 337 return Finish(Token::Kind::kAmpersand); 338 339 default: 340 return Finish(Token::Kind::kNotAToken); 341 } 342} 343 344} // namespace fidl 345