1/*
2 * Copyright 2005 Maksim Orlovich <maksim@kde.org>
3 * Copyright (C) 2006, 2013 Apple Inc. All rights reserved.
4 * Copyright (C) 2007 Alexey Proskuryakov <ap@webkit.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include "config.h"
29#include "XPathParser.h"
30
31#include "ExceptionCode.h"
32#include "XPathEvaluator.h"
33#include "XPathException.h"
34#include "XPathNSResolver.h"
35#include "XPathPath.h"
36#include <wtf/NeverDestroyed.h>
37#include <wtf/StdLibExtras.h>
38#include <wtf/text/StringHash.h>
39
40using namespace WebCore;
41using namespace XPath;
42
43extern int xpathyyparse(Parser&);
44
45#include "XPathGrammar.h"
46
47namespace WebCore {
48namespace XPath {
49
50struct Parser::Token {
51    int type;
52    String string;
53    Step::Axis axis;
54    NumericOp::Opcode numericOpcode;
55    EqTestOp::Opcode equalityTestOpcode;
56
57    Token(int type) : type(type) { }
58    Token(int type, const String& string) : type(type), string(string) { }
59    Token(int type, Step::Axis axis) : type(type), axis(axis) { }
60    Token(int type, NumericOp::Opcode opcode) : type(type), numericOpcode(opcode) { }
61    Token(int type, EqTestOp::Opcode opcode) : type(type), equalityTestOpcode(opcode) { }
62};
63
64enum XMLCat { NameStart, NameCont, NotPartOfName };
65
66static XMLCat charCat(UChar character)
67{
68    if (character == '_')
69        return NameStart;
70
71    if (character == '.' || character == '-')
72        return NameCont;
73    unsigned characterTypeMask = U_GET_GC_MASK(character);
74    if (characterTypeMask & (U_GC_LU_MASK | U_GC_LL_MASK | U_GC_LO_MASK | U_GC_LT_MASK | U_GC_NL_MASK))
75        return NameStart;
76    if (characterTypeMask & (U_GC_M_MASK | U_GC_LM_MASK | U_GC_ND_MASK))
77        return NameCont;
78    return NotPartOfName;
79}
80
81static void populateAxisNamesMap(HashMap<String, Step::Axis>& axisNames)
82{
83    struct AxisName {
84        const char* name;
85        Step::Axis axis;
86    };
87    const AxisName axisNameList[] = {
88        { "ancestor", Step::AncestorAxis },
89        { "ancestor-or-self", Step::AncestorOrSelfAxis },
90        { "attribute", Step::AttributeAxis },
91        { "child", Step::ChildAxis },
92        { "descendant", Step::DescendantAxis },
93        { "descendant-or-self", Step::DescendantOrSelfAxis },
94        { "following", Step::FollowingAxis },
95        { "following-sibling", Step::FollowingSiblingAxis },
96        { "namespace", Step::NamespaceAxis },
97        { "parent", Step::ParentAxis },
98        { "preceding", Step::PrecedingAxis },
99        { "preceding-sibling", Step::PrecedingSiblingAxis },
100        { "self", Step::SelfAxis }
101    };
102    for (unsigned i = 0; i < WTF_ARRAY_LENGTH(axisNameList); ++i)
103        axisNames.add(axisNameList[i].name, axisNameList[i].axis);
104}
105
106static bool parseAxisName(const String& name, Step::Axis& type)
107{
108    static NeverDestroyed<HashMap<String, Step::Axis>> axisNames;
109    if (axisNames.get().isEmpty())
110        populateAxisNamesMap(axisNames);
111
112    auto it = axisNames.get().find(name);
113    if (it == axisNames.get().end())
114        return false;
115    type = it->value;
116    return true;
117}
118
119// Returns whether the current token can possibly be a binary operator, given
120// the previous token. Necessary to disambiguate some of the operators
121// (* (multiply), div, and, or, mod) in the [32] Operator rule
122// (check http://www.w3.org/TR/xpath#exprlex).
123bool Parser::isBinaryOperatorContext() const
124{
125    switch (m_lastTokenType) {
126    case 0:
127    case '@': case AXISNAME: case '(': case '[': case ',':
128    case AND: case OR: case MULOP:
129    case '/': case SLASHSLASH: case '|': case PLUS: case MINUS:
130    case EQOP: case RELOP:
131        return false;
132    default:
133        return true;
134    }
135}
136
137void Parser::skipWS()
138{
139    while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos]))
140        ++m_nextPos;
141}
142
143Parser::Token Parser::makeTokenAndAdvance(int code, int advance)
144{
145    m_nextPos += advance;
146    return Token(code);
147}
148
149Parser::Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance)
150{
151    m_nextPos += advance;
152    return Token(code, val);
153}
154
155Parser::Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance)
156{
157    m_nextPos += advance;
158    return Token(code, val);
159}
160
161// Returns next char if it's there and interesting, 0 otherwise.
162char Parser::peekAheadHelper()
163{
164    if (m_nextPos + 1 >= m_data.length())
165        return 0;
166    UChar next = m_data[m_nextPos + 1];
167    if (next >= 0xff)
168        return 0;
169    return next;
170}
171
172char Parser::peekCurHelper()
173{
174    if (m_nextPos >= m_data.length())
175        return 0;
176    UChar next = m_data[m_nextPos];
177    if (next >= 0xff)
178        return 0;
179    return next;
180}
181
182Parser::Token Parser::lexString()
183{
184    UChar delimiter = m_data[m_nextPos];
185    int startPos = m_nextPos + 1;
186
187    for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) {
188        if (m_data[m_nextPos] == delimiter) {
189            String value = m_data.substring(startPos, m_nextPos - startPos);
190            if (value.isNull())
191                value = "";
192            ++m_nextPos; // Consume the char.
193            return Token(LITERAL, value);
194        }
195    }
196
197    // Ouch, went off the end -- report error.
198    return Token(XPATH_ERROR);
199}
200
201Parser::Token Parser::lexNumber()
202{
203    int startPos = m_nextPos;
204    bool seenDot = false;
205
206    // Go until end or a non-digits character.
207    for (; m_nextPos < m_data.length(); ++m_nextPos) {
208        UChar aChar = m_data[m_nextPos];
209        if (aChar >= 0xff) break;
210
211        if (aChar < '0' || aChar > '9') {
212            if (aChar == '.' && !seenDot)
213                seenDot = true;
214            else
215                break;
216        }
217    }
218
219    return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos));
220}
221
222bool Parser::lexNCName(String& name)
223{
224    int startPos = m_nextPos;
225    if (m_nextPos >= m_data.length())
226        return false;
227
228    if (charCat(m_data[m_nextPos]) != NameStart)
229        return false;
230
231    // Keep going until we get a character that's not good for names.
232    for (; m_nextPos < m_data.length(); ++m_nextPos)
233        if (charCat(m_data[m_nextPos]) == NotPartOfName)
234            break;
235
236    name = m_data.substring(startPos, m_nextPos - startPos);
237    return true;
238}
239
240bool Parser::lexQName(String& name)
241{
242    String n1;
243    if (!lexNCName(n1))
244        return false;
245
246    skipWS();
247
248    // If the next character is :, what we just got it the prefix, if not,
249    // it's the whole thing.
250    if (peekAheadHelper() != ':') {
251        name = n1;
252        return true;
253    }
254
255    String n2;
256    if (!lexNCName(n2))
257        return false;
258
259    name = n1 + ":" + n2;
260    return true;
261}
262
263inline Parser::Token Parser::nextTokenInternal()
264{
265    skipWS();
266
267    if (m_nextPos >= m_data.length())
268        return Token(0);
269
270    char code = peekCurHelper();
271    switch (code) {
272    case '(': case ')': case '[': case ']':
273    case '@': case ',': case '|':
274        return makeTokenAndAdvance(code);
275    case '\'':
276    case '\"':
277        return lexString();
278    case '0': case '1': case '2': case '3': case '4':
279    case '5': case '6': case '7': case '8': case '9':
280        return lexNumber();
281    case '.': {
282        char next = peekAheadHelper();
283        if (next == '.')
284            return makeTokenAndAdvance(DOTDOT, 2);
285        if (next >= '0' && next <= '9')
286            return lexNumber();
287        return makeTokenAndAdvance('.');
288    }
289    case '/':
290        if (peekAheadHelper() == '/')
291            return makeTokenAndAdvance(SLASHSLASH, 2);
292        return makeTokenAndAdvance('/');
293    case '+':
294        return makeTokenAndAdvance(PLUS);
295    case '-':
296        return makeTokenAndAdvance(MINUS);
297    case '=':
298        return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ);
299    case '!':
300        if (peekAheadHelper() == '=')
301            return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2);
302        return Token(XPATH_ERROR);
303    case '<':
304        if (peekAheadHelper() == '=')
305            return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2);
306        return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT);
307    case '>':
308        if (peekAheadHelper() == '=')
309            return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2);
310        return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT);
311    case '*':
312        if (isBinaryOperatorContext())
313            return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul);
314        ++m_nextPos;
315        return Token(NAMETEST, "*");
316    case '$': { // $ QName
317        m_nextPos++;
318        String name;
319        if (!lexQName(name))
320            return Token(XPATH_ERROR);
321        return Token(VARIABLEREFERENCE, name);
322    }
323    }
324
325    String name;
326    if (!lexNCName(name))
327        return Token(XPATH_ERROR);
328
329    skipWS();
330    // If we're in an operator context, check for any operator names
331    if (isBinaryOperatorContext()) {
332        if (name == "and") //### hash?
333            return Token(AND);
334        if (name == "or")
335            return Token(OR);
336        if (name == "mod")
337            return Token(MULOP, NumericOp::OP_Mod);
338        if (name == "div")
339            return Token(MULOP, NumericOp::OP_Div);
340    }
341
342    // See whether we are at a :
343    if (peekCurHelper() == ':') {
344        m_nextPos++;
345        // Any chance it's an axis name?
346        if (peekCurHelper() == ':') {
347            m_nextPos++;
348
349            //It might be an axis name.
350            Step::Axis axis;
351            if (parseAxisName(name, axis))
352                return Token(AXISNAME, axis);
353            // Ugh, :: is only valid in axis names -> error
354            return Token(XPATH_ERROR);
355        }
356
357        // Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest
358        skipWS();
359        if (peekCurHelper() == '*') {
360            m_nextPos++;
361            return Token(NAMETEST, name + ":*");
362        }
363
364        // Make a full qname.
365        String n2;
366        if (!lexNCName(n2))
367            return Token(XPATH_ERROR);
368
369        name = name + ":" + n2;
370    }
371
372    skipWS();
373
374    if (peekCurHelper() == '(') {
375        // note: we don't swallow the '(' here!
376
377        // Either node type oor function name.
378
379        if (name == "processing-instruction")
380            return Token(PI);
381        if (name == "node")
382            return Token(NODE);
383        if (name == "text")
384            return Token(TEXT);
385        if (name == "comment")
386            return Token(COMMENT);
387
388        return Token(FUNCTIONNAME, name);
389    }
390
391    // At this point, it must be NAMETEST.
392    return Token(NAMETEST, name);
393}
394
395inline Parser::Token Parser::nextToken()
396{
397    Token token = nextTokenInternal();
398    m_lastTokenType = token.type;
399    return token;
400}
401
402Parser::Parser(const String& statement, XPathNSResolver* resolver)
403    : m_data(statement)
404    , m_resolver(resolver)
405    , m_nextPos(0)
406    , m_lastTokenType(0)
407    , m_sawNamespaceError(false)
408{
409}
410
411int Parser::lex(YYSTYPE& yylval)
412{
413    Token token = nextToken();
414
415    switch (token.type) {
416    case AXISNAME:
417        yylval.axis = token.axis;
418        break;
419    case MULOP:
420        yylval.numericOpcode = token.numericOpcode;
421        break;
422    case RELOP:
423    case EQOP:
424        yylval.equalityTestOpcode = token.equalityTestOpcode;
425        break;
426    case NODETYPE:
427    case FUNCTIONNAME:
428    case LITERAL:
429    case VARIABLEREFERENCE:
430    case NUMBER:
431    case NAMETEST:
432        yylval.string = token.string.releaseImpl().leakRef();
433        break;
434    }
435
436    return token.type;
437}
438
439bool Parser::expandQualifiedName(const String& qualifiedName, String& localName, String& namespaceURI)
440{
441    size_t colon = qualifiedName.find(':');
442    if (colon != notFound) {
443        if (!m_resolver) {
444            m_sawNamespaceError = true;
445            return false;
446        }
447        namespaceURI = m_resolver->lookupNamespaceURI(qualifiedName.left(colon));
448        if (namespaceURI.isNull()) {
449            m_sawNamespaceError = true;
450            return false;
451        }
452        localName = qualifiedName.substring(colon + 1);
453    } else
454        localName = qualifiedName;
455
456    return true;
457}
458
459std::unique_ptr<Expression> Parser::parseStatement(const String& statement, XPathNSResolver* resolver, ExceptionCode& ec)
460{
461    Parser parser(statement, resolver);
462
463    int parseError = xpathyyparse(parser);
464
465    if (parser.m_sawNamespaceError) {
466        ec = NAMESPACE_ERR;
467        return nullptr;
468    }
469
470    if (parseError) {
471        ec = XPathException::INVALID_EXPRESSION_ERR;
472        return nullptr;
473    }
474
475    return WTF::move(parser.m_result);
476}
477
478} }
479