1/* 2 * Copyright 2005 Maksim Orlovich <maksim@kde.org> 3 * Copyright (C) 2006, 2013 Apple Inc. All rights reserved. 4 * Copyright (C) 2007 Alexey Proskuryakov <ap@webkit.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28#include "config.h" 29#include "XPathParser.h" 30 31#include "ExceptionCode.h" 32#include "XPathEvaluator.h" 33#include "XPathException.h" 34#include "XPathNSResolver.h" 35#include "XPathPath.h" 36#include <wtf/NeverDestroyed.h> 37#include <wtf/StdLibExtras.h> 38#include <wtf/text/StringHash.h> 39 40using namespace WebCore; 41using namespace XPath; 42 43extern int xpathyyparse(Parser&); 44 45#include "XPathGrammar.h" 46 47namespace WebCore { 48namespace XPath { 49 50struct Parser::Token { 51 int type; 52 String string; 53 Step::Axis axis; 54 NumericOp::Opcode numericOpcode; 55 EqTestOp::Opcode equalityTestOpcode; 56 57 Token(int type) : type(type) { } 58 Token(int type, const String& string) : type(type), string(string) { } 59 Token(int type, Step::Axis axis) : type(type), axis(axis) { } 60 Token(int type, NumericOp::Opcode opcode) : type(type), numericOpcode(opcode) { } 61 Token(int type, EqTestOp::Opcode opcode) : type(type), equalityTestOpcode(opcode) { } 62}; 63 64enum XMLCat { NameStart, NameCont, NotPartOfName }; 65 66static XMLCat charCat(UChar character) 67{ 68 if (character == '_') 69 return NameStart; 70 71 if (character == '.' || character == '-') 72 return NameCont; 73 unsigned characterTypeMask = U_GET_GC_MASK(character); 74 if (characterTypeMask & (U_GC_LU_MASK | U_GC_LL_MASK | U_GC_LO_MASK | U_GC_LT_MASK | U_GC_NL_MASK)) 75 return NameStart; 76 if (characterTypeMask & (U_GC_M_MASK | U_GC_LM_MASK | U_GC_ND_MASK)) 77 return NameCont; 78 return NotPartOfName; 79} 80 81static void populateAxisNamesMap(HashMap<String, Step::Axis>& axisNames) 82{ 83 struct AxisName { 84 const char* name; 85 Step::Axis axis; 86 }; 87 const AxisName axisNameList[] = { 88 { "ancestor", Step::AncestorAxis }, 89 { "ancestor-or-self", Step::AncestorOrSelfAxis }, 90 { "attribute", Step::AttributeAxis }, 91 { "child", Step::ChildAxis }, 92 { "descendant", Step::DescendantAxis }, 93 { "descendant-or-self", Step::DescendantOrSelfAxis }, 94 { "following", Step::FollowingAxis }, 95 { "following-sibling", Step::FollowingSiblingAxis }, 96 { "namespace", Step::NamespaceAxis }, 97 { "parent", Step::ParentAxis }, 98 { "preceding", Step::PrecedingAxis }, 99 { "preceding-sibling", Step::PrecedingSiblingAxis }, 100 { "self", Step::SelfAxis } 101 }; 102 for (unsigned i = 0; i < WTF_ARRAY_LENGTH(axisNameList); ++i) 103 axisNames.add(axisNameList[i].name, axisNameList[i].axis); 104} 105 106static bool parseAxisName(const String& name, Step::Axis& type) 107{ 108 static NeverDestroyed<HashMap<String, Step::Axis>> axisNames; 109 if (axisNames.get().isEmpty()) 110 populateAxisNamesMap(axisNames); 111 112 auto it = axisNames.get().find(name); 113 if (it == axisNames.get().end()) 114 return false; 115 type = it->value; 116 return true; 117} 118 119// Returns whether the current token can possibly be a binary operator, given 120// the previous token. Necessary to disambiguate some of the operators 121// (* (multiply), div, and, or, mod) in the [32] Operator rule 122// (check http://www.w3.org/TR/xpath#exprlex). 123bool Parser::isBinaryOperatorContext() const 124{ 125 switch (m_lastTokenType) { 126 case 0: 127 case '@': case AXISNAME: case '(': case '[': case ',': 128 case AND: case OR: case MULOP: 129 case '/': case SLASHSLASH: case '|': case PLUS: case MINUS: 130 case EQOP: case RELOP: 131 return false; 132 default: 133 return true; 134 } 135} 136 137void Parser::skipWS() 138{ 139 while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos])) 140 ++m_nextPos; 141} 142 143Parser::Token Parser::makeTokenAndAdvance(int code, int advance) 144{ 145 m_nextPos += advance; 146 return Token(code); 147} 148 149Parser::Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance) 150{ 151 m_nextPos += advance; 152 return Token(code, val); 153} 154 155Parser::Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance) 156{ 157 m_nextPos += advance; 158 return Token(code, val); 159} 160 161// Returns next char if it's there and interesting, 0 otherwise. 162char Parser::peekAheadHelper() 163{ 164 if (m_nextPos + 1 >= m_data.length()) 165 return 0; 166 UChar next = m_data[m_nextPos + 1]; 167 if (next >= 0xff) 168 return 0; 169 return next; 170} 171 172char Parser::peekCurHelper() 173{ 174 if (m_nextPos >= m_data.length()) 175 return 0; 176 UChar next = m_data[m_nextPos]; 177 if (next >= 0xff) 178 return 0; 179 return next; 180} 181 182Parser::Token Parser::lexString() 183{ 184 UChar delimiter = m_data[m_nextPos]; 185 int startPos = m_nextPos + 1; 186 187 for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) { 188 if (m_data[m_nextPos] == delimiter) { 189 String value = m_data.substring(startPos, m_nextPos - startPos); 190 if (value.isNull()) 191 value = ""; 192 ++m_nextPos; // Consume the char. 193 return Token(LITERAL, value); 194 } 195 } 196 197 // Ouch, went off the end -- report error. 198 return Token(XPATH_ERROR); 199} 200 201Parser::Token Parser::lexNumber() 202{ 203 int startPos = m_nextPos; 204 bool seenDot = false; 205 206 // Go until end or a non-digits character. 207 for (; m_nextPos < m_data.length(); ++m_nextPos) { 208 UChar aChar = m_data[m_nextPos]; 209 if (aChar >= 0xff) break; 210 211 if (aChar < '0' || aChar > '9') { 212 if (aChar == '.' && !seenDot) 213 seenDot = true; 214 else 215 break; 216 } 217 } 218 219 return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos)); 220} 221 222bool Parser::lexNCName(String& name) 223{ 224 int startPos = m_nextPos; 225 if (m_nextPos >= m_data.length()) 226 return false; 227 228 if (charCat(m_data[m_nextPos]) != NameStart) 229 return false; 230 231 // Keep going until we get a character that's not good for names. 232 for (; m_nextPos < m_data.length(); ++m_nextPos) 233 if (charCat(m_data[m_nextPos]) == NotPartOfName) 234 break; 235 236 name = m_data.substring(startPos, m_nextPos - startPos); 237 return true; 238} 239 240bool Parser::lexQName(String& name) 241{ 242 String n1; 243 if (!lexNCName(n1)) 244 return false; 245 246 skipWS(); 247 248 // If the next character is :, what we just got it the prefix, if not, 249 // it's the whole thing. 250 if (peekAheadHelper() != ':') { 251 name = n1; 252 return true; 253 } 254 255 String n2; 256 if (!lexNCName(n2)) 257 return false; 258 259 name = n1 + ":" + n2; 260 return true; 261} 262 263inline Parser::Token Parser::nextTokenInternal() 264{ 265 skipWS(); 266 267 if (m_nextPos >= m_data.length()) 268 return Token(0); 269 270 char code = peekCurHelper(); 271 switch (code) { 272 case '(': case ')': case '[': case ']': 273 case '@': case ',': case '|': 274 return makeTokenAndAdvance(code); 275 case '\'': 276 case '\"': 277 return lexString(); 278 case '0': case '1': case '2': case '3': case '4': 279 case '5': case '6': case '7': case '8': case '9': 280 return lexNumber(); 281 case '.': { 282 char next = peekAheadHelper(); 283 if (next == '.') 284 return makeTokenAndAdvance(DOTDOT, 2); 285 if (next >= '0' && next <= '9') 286 return lexNumber(); 287 return makeTokenAndAdvance('.'); 288 } 289 case '/': 290 if (peekAheadHelper() == '/') 291 return makeTokenAndAdvance(SLASHSLASH, 2); 292 return makeTokenAndAdvance('/'); 293 case '+': 294 return makeTokenAndAdvance(PLUS); 295 case '-': 296 return makeTokenAndAdvance(MINUS); 297 case '=': 298 return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ); 299 case '!': 300 if (peekAheadHelper() == '=') 301 return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2); 302 return Token(XPATH_ERROR); 303 case '<': 304 if (peekAheadHelper() == '=') 305 return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2); 306 return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT); 307 case '>': 308 if (peekAheadHelper() == '=') 309 return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2); 310 return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT); 311 case '*': 312 if (isBinaryOperatorContext()) 313 return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul); 314 ++m_nextPos; 315 return Token(NAMETEST, "*"); 316 case '$': { // $ QName 317 m_nextPos++; 318 String name; 319 if (!lexQName(name)) 320 return Token(XPATH_ERROR); 321 return Token(VARIABLEREFERENCE, name); 322 } 323 } 324 325 String name; 326 if (!lexNCName(name)) 327 return Token(XPATH_ERROR); 328 329 skipWS(); 330 // If we're in an operator context, check for any operator names 331 if (isBinaryOperatorContext()) { 332 if (name == "and") //### hash? 333 return Token(AND); 334 if (name == "or") 335 return Token(OR); 336 if (name == "mod") 337 return Token(MULOP, NumericOp::OP_Mod); 338 if (name == "div") 339 return Token(MULOP, NumericOp::OP_Div); 340 } 341 342 // See whether we are at a : 343 if (peekCurHelper() == ':') { 344 m_nextPos++; 345 // Any chance it's an axis name? 346 if (peekCurHelper() == ':') { 347 m_nextPos++; 348 349 //It might be an axis name. 350 Step::Axis axis; 351 if (parseAxisName(name, axis)) 352 return Token(AXISNAME, axis); 353 // Ugh, :: is only valid in axis names -> error 354 return Token(XPATH_ERROR); 355 } 356 357 // Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest 358 skipWS(); 359 if (peekCurHelper() == '*') { 360 m_nextPos++; 361 return Token(NAMETEST, name + ":*"); 362 } 363 364 // Make a full qname. 365 String n2; 366 if (!lexNCName(n2)) 367 return Token(XPATH_ERROR); 368 369 name = name + ":" + n2; 370 } 371 372 skipWS(); 373 374 if (peekCurHelper() == '(') { 375 // note: we don't swallow the '(' here! 376 377 // Either node type oor function name. 378 379 if (name == "processing-instruction") 380 return Token(PI); 381 if (name == "node") 382 return Token(NODE); 383 if (name == "text") 384 return Token(TEXT); 385 if (name == "comment") 386 return Token(COMMENT); 387 388 return Token(FUNCTIONNAME, name); 389 } 390 391 // At this point, it must be NAMETEST. 392 return Token(NAMETEST, name); 393} 394 395inline Parser::Token Parser::nextToken() 396{ 397 Token token = nextTokenInternal(); 398 m_lastTokenType = token.type; 399 return token; 400} 401 402Parser::Parser(const String& statement, XPathNSResolver* resolver) 403 : m_data(statement) 404 , m_resolver(resolver) 405 , m_nextPos(0) 406 , m_lastTokenType(0) 407 , m_sawNamespaceError(false) 408{ 409} 410 411int Parser::lex(YYSTYPE& yylval) 412{ 413 Token token = nextToken(); 414 415 switch (token.type) { 416 case AXISNAME: 417 yylval.axis = token.axis; 418 break; 419 case MULOP: 420 yylval.numericOpcode = token.numericOpcode; 421 break; 422 case RELOP: 423 case EQOP: 424 yylval.equalityTestOpcode = token.equalityTestOpcode; 425 break; 426 case NODETYPE: 427 case FUNCTIONNAME: 428 case LITERAL: 429 case VARIABLEREFERENCE: 430 case NUMBER: 431 case NAMETEST: 432 yylval.string = token.string.releaseImpl().leakRef(); 433 break; 434 } 435 436 return token.type; 437} 438 439bool Parser::expandQualifiedName(const String& qualifiedName, String& localName, String& namespaceURI) 440{ 441 size_t colon = qualifiedName.find(':'); 442 if (colon != notFound) { 443 if (!m_resolver) { 444 m_sawNamespaceError = true; 445 return false; 446 } 447 namespaceURI = m_resolver->lookupNamespaceURI(qualifiedName.left(colon)); 448 if (namespaceURI.isNull()) { 449 m_sawNamespaceError = true; 450 return false; 451 } 452 localName = qualifiedName.substring(colon + 1); 453 } else 454 localName = qualifiedName; 455 456 return true; 457} 458 459std::unique_ptr<Expression> Parser::parseStatement(const String& statement, XPathNSResolver* resolver, ExceptionCode& ec) 460{ 461 Parser parser(statement, resolver); 462 463 int parseError = xpathyyparse(parser); 464 465 if (parser.m_sawNamespaceError) { 466 ec = NAMESPACE_ERR; 467 return nullptr; 468 } 469 470 if (parseError) { 471 ec = XPathException::INVALID_EXPRESSION_ERR; 472 return nullptr; 473 } 474 475 return WTF::move(parser.m_result); 476} 477 478} } 479