1/*
2 * Copyright (C) 2005 Frerich Raabe <raabe@kde.org>
3 * Copyright (C) 2006, 2009, 2013 Apple Inc. All rights reserved.
4 * Copyright (C) 2007 Alexey Proskuryakov <ap@webkit.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include "config.h"
29#include "XPathFunctions.h"
30
31#include "Element.h"
32#include "ProcessingInstruction.h"
33#include "TreeScope.h"
34#include "XMLNames.h"
35#include "XPathUtil.h"
36#include <wtf/MathExtras.h>
37#include <wtf/NeverDestroyed.h>
38#include <wtf/text/StringBuilder.h>
39
40namespace WebCore {
41namespace XPath {
42
43static inline bool isWhitespace(UChar c)
44{
45    return c == ' ' || c == '\n' || c == '\r' || c == '\t';
46}
47
48#define DEFINE_FUNCTION_CREATOR(Suffix) static std::unique_ptr<Function> createFunction##Suffix() { return std::make_unique<Fun##Suffix>(); }
49
50class Interval {
51public:
52    static const int Inf = -1;
53
54    Interval();
55    Interval(int value);
56    Interval(int min, int max);
57
58    bool contains(int value) const;
59
60private:
61    int m_min;
62    int m_max;
63};
64
65class FunLast final : public Function {
66    virtual Value evaluate() const override;
67    virtual Value::Type resultType() const override { return Value::NumberValue; }
68public:
69    FunLast() { setIsContextSizeSensitive(true); }
70};
71
72class FunPosition final : public Function {
73    virtual Value evaluate() const override;
74    virtual Value::Type resultType() const override { return Value::NumberValue; }
75public:
76    FunPosition() { setIsContextPositionSensitive(true); }
77};
78
79class FunCount final : public Function {
80    virtual Value evaluate() const override;
81    virtual Value::Type resultType() const override { return Value::NumberValue; }
82};
83
84class FunId final : public Function {
85    virtual Value evaluate() const override;
86    virtual Value::Type resultType() const override { return Value::NodeSetValue; }
87};
88
89class FunLocalName final : public Function {
90    virtual Value evaluate() const override;
91    virtual Value::Type resultType() const override { return Value::StringValue; }
92public:
93    FunLocalName() { setIsContextNodeSensitive(true); } // local-name() with no arguments uses context node.
94};
95
96class FunNamespaceURI final : public Function {
97    virtual Value evaluate() const override;
98    virtual Value::Type resultType() const override { return Value::StringValue; }
99public:
100    FunNamespaceURI() { setIsContextNodeSensitive(true); } // namespace-uri() with no arguments uses context node.
101};
102
103class FunName final : public Function {
104    virtual Value evaluate() const override;
105    virtual Value::Type resultType() const override { return Value::StringValue; }
106public:
107    FunName() { setIsContextNodeSensitive(true); } // name() with no arguments uses context node.
108};
109
110class FunString final : public Function {
111    virtual Value evaluate() const override;
112    virtual Value::Type resultType() const override { return Value::StringValue; }
113public:
114    FunString() { setIsContextNodeSensitive(true); } // string() with no arguments uses context node.
115};
116
117class FunConcat final : public Function {
118    virtual Value evaluate() const override;
119    virtual Value::Type resultType() const override { return Value::StringValue; }
120};
121
122class FunStartsWith final : public Function {
123    virtual Value evaluate() const override;
124    virtual Value::Type resultType() const override { return Value::BooleanValue; }
125};
126
127class FunContains final : public Function {
128    virtual Value evaluate() const override;
129    virtual Value::Type resultType() const override { return Value::BooleanValue; }
130};
131
132class FunSubstringBefore final : public Function {
133    virtual Value evaluate() const override;
134    virtual Value::Type resultType() const override { return Value::StringValue; }
135};
136
137class FunSubstringAfter final : public Function {
138    virtual Value evaluate() const override;
139    virtual Value::Type resultType() const override { return Value::StringValue; }
140};
141
142class FunSubstring final : public Function {
143    virtual Value evaluate() const override;
144    virtual Value::Type resultType() const override { return Value::StringValue; }
145};
146
147class FunStringLength final : public Function {
148    virtual Value evaluate() const override;
149    virtual Value::Type resultType() const override { return Value::NumberValue; }
150public:
151    FunStringLength() { setIsContextNodeSensitive(true); } // string-length() with no arguments uses context node.
152};
153
154class FunNormalizeSpace final : public Function {
155    virtual Value evaluate() const override;
156    virtual Value::Type resultType() const override { return Value::StringValue; }
157public:
158    FunNormalizeSpace() { setIsContextNodeSensitive(true); } // normalize-space() with no arguments uses context node.
159};
160
161class FunTranslate final : public Function {
162    virtual Value evaluate() const override;
163    virtual Value::Type resultType() const override { return Value::StringValue; }
164};
165
166class FunBoolean final : public Function {
167    virtual Value evaluate() const override;
168    virtual Value::Type resultType() const override { return Value::BooleanValue; }
169};
170
171class FunNot : public Function {
172    virtual Value evaluate() const override;
173    virtual Value::Type resultType() const override { return Value::BooleanValue; }
174};
175
176class FunTrue final : public Function {
177    virtual Value evaluate() const override;
178    virtual Value::Type resultType() const override { return Value::BooleanValue; }
179};
180
181class FunFalse final : public Function {
182    virtual Value evaluate() const override;
183    virtual Value::Type resultType() const override { return Value::BooleanValue; }
184};
185
186class FunLang final : public Function {
187    virtual Value evaluate() const override;
188    virtual Value::Type resultType() const override { return Value::BooleanValue; }
189public:
190    FunLang() { setIsContextNodeSensitive(true); } // lang() always works on context node.
191};
192
193class FunNumber final : public Function {
194    virtual Value evaluate() const override;
195    virtual Value::Type resultType() const override { return Value::NumberValue; }
196public:
197    FunNumber() { setIsContextNodeSensitive(true); } // number() with no arguments uses context node.
198};
199
200class FunSum final : public Function {
201    virtual Value evaluate() const override;
202    virtual Value::Type resultType() const override { return Value::NumberValue; }
203};
204
205class FunFloor final : public Function {
206    virtual Value evaluate() const override;
207    virtual Value::Type resultType() const override { return Value::NumberValue; }
208};
209
210class FunCeiling final : public Function {
211    virtual Value evaluate() const override;
212    virtual Value::Type resultType() const override { return Value::NumberValue; }
213};
214
215class FunRound final : public Function {
216    virtual Value evaluate() const override;
217    virtual Value::Type resultType() const override { return Value::NumberValue; }
218public:
219    static double round(double);
220};
221
222DEFINE_FUNCTION_CREATOR(Last)
223DEFINE_FUNCTION_CREATOR(Position)
224DEFINE_FUNCTION_CREATOR(Count)
225DEFINE_FUNCTION_CREATOR(Id)
226DEFINE_FUNCTION_CREATOR(LocalName)
227DEFINE_FUNCTION_CREATOR(NamespaceURI)
228DEFINE_FUNCTION_CREATOR(Name)
229
230DEFINE_FUNCTION_CREATOR(String)
231DEFINE_FUNCTION_CREATOR(Concat)
232DEFINE_FUNCTION_CREATOR(StartsWith)
233DEFINE_FUNCTION_CREATOR(Contains)
234DEFINE_FUNCTION_CREATOR(SubstringBefore)
235DEFINE_FUNCTION_CREATOR(SubstringAfter)
236DEFINE_FUNCTION_CREATOR(Substring)
237DEFINE_FUNCTION_CREATOR(StringLength)
238DEFINE_FUNCTION_CREATOR(NormalizeSpace)
239DEFINE_FUNCTION_CREATOR(Translate)
240
241DEFINE_FUNCTION_CREATOR(Boolean)
242DEFINE_FUNCTION_CREATOR(Not)
243DEFINE_FUNCTION_CREATOR(True)
244DEFINE_FUNCTION_CREATOR(False)
245DEFINE_FUNCTION_CREATOR(Lang)
246
247DEFINE_FUNCTION_CREATOR(Number)
248DEFINE_FUNCTION_CREATOR(Sum)
249DEFINE_FUNCTION_CREATOR(Floor)
250DEFINE_FUNCTION_CREATOR(Ceiling)
251DEFINE_FUNCTION_CREATOR(Round)
252
253#undef DEFINE_FUNCTION_CREATOR
254
255inline Interval::Interval()
256    : m_min(Inf), m_max(Inf)
257{
258}
259
260inline Interval::Interval(int value)
261    : m_min(value), m_max(value)
262{
263}
264
265inline Interval::Interval(int min, int max)
266    : m_min(min), m_max(max)
267{
268}
269
270inline bool Interval::contains(int value) const
271{
272    if (m_min == Inf && m_max == Inf)
273        return true;
274
275    if (m_min == Inf)
276        return value <= m_max;
277
278    if (m_max == Inf)
279        return value >= m_min;
280
281    return value >= m_min && value <= m_max;
282}
283
284void Function::setArguments(const String& name, Vector<std::unique_ptr<Expression>> arguments)
285{
286    ASSERT(!subexpressionCount());
287
288    // Functions that use the context node as an implicit argument are context node sensitive when they
289    // have no arguments, but when explicit arguments are added, they are no longer context node sensitive.
290    // As of this writing, the only exception to this is the "lang" function.
291    if (name != "lang" && !arguments.isEmpty())
292        setIsContextNodeSensitive(false);
293
294    setSubexpressions(WTF::move(arguments));
295}
296
297Value FunLast::evaluate() const
298{
299    return Expression::evaluationContext().size;
300}
301
302Value FunPosition::evaluate() const
303{
304    return Expression::evaluationContext().position;
305}
306
307static AtomicString atomicSubstring(StringBuilder& builder, unsigned start, unsigned length)
308{
309    ASSERT(start <= builder.length());
310    ASSERT(length <= builder.length() - start);
311    if (builder.is8Bit())
312        return AtomicString(builder.characters8() + start, length);
313    return AtomicString(builder.characters16() + start, length);
314}
315
316Value FunId::evaluate() const
317{
318    Value a = argument(0).evaluate();
319    StringBuilder idList; // A whitespace-separated list of IDs
320
321    if (!a.isNodeSet())
322        idList.append(a.toString());
323    else {
324        for (auto& node : a.toNodeSet()) {
325            idList.append(stringValue(node.get()));
326            idList.append(' ');
327        }
328    }
329
330    TreeScope& contextScope = evaluationContext().node->treeScope();
331    NodeSet result;
332    HashSet<Node*> resultSet;
333
334    unsigned startPos = 0;
335    unsigned length = idList.length();
336    while (true) {
337        while (startPos < length && isWhitespace(idList[startPos]))
338            ++startPos;
339
340        if (startPos == length)
341            break;
342
343        size_t endPos = startPos;
344        while (endPos < length && !isWhitespace(idList[endPos]))
345            ++endPos;
346
347        // If there are several nodes with the same id, id() should return the first one.
348        // In WebKit, getElementById behaves so, too, although its behavior in this case is formally undefined.
349        Node* node = contextScope.getElementById(atomicSubstring(idList, startPos, endPos - startPos));
350        if (node && resultSet.add(node).isNewEntry)
351            result.append(node);
352
353        startPos = endPos;
354    }
355
356    result.markSorted(false);
357
358    return Value(WTF::move(result));
359}
360
361static inline String expandedNameLocalPart(Node* node)
362{
363    // The local part of an XPath expanded-name matches DOM local name for most node types, except for namespace nodes and processing instruction nodes.
364    ASSERT(node->nodeType() != Node::XPATH_NAMESPACE_NODE); // Not supported yet.
365    if (node->nodeType() == Node::PROCESSING_INSTRUCTION_NODE)
366        return toProcessingInstruction(node)->target();
367    return node->localName().string();
368}
369
370static inline String expandedName(Node* node)
371{
372    const AtomicString& prefix = node->prefix();
373    return prefix.isEmpty() ? expandedNameLocalPart(node) : prefix + ":" + expandedNameLocalPart(node);
374}
375
376Value FunLocalName::evaluate() const
377{
378    if (argumentCount() > 0) {
379        Value a = argument(0).evaluate();
380        if (!a.isNodeSet())
381            return emptyString();
382
383        Node* node = a.toNodeSet().firstNode();
384        return node ? expandedNameLocalPart(node) : emptyString();
385    }
386
387    return expandedNameLocalPart(evaluationContext().node.get());
388}
389
390Value FunNamespaceURI::evaluate() const
391{
392    if (argumentCount() > 0) {
393        Value a = argument(0).evaluate();
394        if (!a.isNodeSet())
395            return emptyString();
396
397        Node* node = a.toNodeSet().firstNode();
398        return node ? node->namespaceURI().string() : emptyString();
399    }
400
401    return evaluationContext().node->namespaceURI().string();
402}
403
404Value FunName::evaluate() const
405{
406    if (argumentCount() > 0) {
407        Value a = argument(0).evaluate();
408        if (!a.isNodeSet())
409            return emptyString();
410
411        Node* node = a.toNodeSet().firstNode();
412        return node ? expandedName(node) : emptyString();
413    }
414
415    return expandedName(evaluationContext().node.get());
416}
417
418Value FunCount::evaluate() const
419{
420    Value a = argument(0).evaluate();
421
422    return double(a.toNodeSet().size());
423}
424
425Value FunString::evaluate() const
426{
427    if (!argumentCount())
428        return Value(Expression::evaluationContext().node.get()).toString();
429    return argument(0).evaluate().toString();
430}
431
432Value FunConcat::evaluate() const
433{
434    StringBuilder result;
435    result.reserveCapacity(1024);
436
437    for (unsigned i = 0, count = argumentCount(); i < count; ++i) {
438        String str(argument(i).evaluate().toString());
439        result.append(str);
440    }
441
442    return result.toString();
443}
444
445Value FunStartsWith::evaluate() const
446{
447    String s1 = argument(0).evaluate().toString();
448    String s2 = argument(1).evaluate().toString();
449
450    if (s2.isEmpty())
451        return true;
452
453    return s1.startsWith(s2);
454}
455
456Value FunContains::evaluate() const
457{
458    String s1 = argument(0).evaluate().toString();
459    String s2 = argument(1).evaluate().toString();
460
461    if (s2.isEmpty())
462        return true;
463
464    return s1.contains(s2) != 0;
465}
466
467Value FunSubstringBefore::evaluate() const
468{
469    String s1 = argument(0).evaluate().toString();
470    String s2 = argument(1).evaluate().toString();
471
472    if (s2.isEmpty())
473        return emptyString();
474
475    size_t i = s1.find(s2);
476
477    if (i == notFound)
478        return emptyString();
479
480    return s1.left(i);
481}
482
483Value FunSubstringAfter::evaluate() const
484{
485    String s1 = argument(0).evaluate().toString();
486    String s2 = argument(1).evaluate().toString();
487
488    size_t i = s1.find(s2);
489    if (i == notFound)
490        return emptyString();
491
492    return s1.substring(i + s2.length());
493}
494
495Value FunSubstring::evaluate() const
496{
497    String s = argument(0).evaluate().toString();
498    double doublePos = argument(1).evaluate().toNumber();
499    if (std::isnan(doublePos))
500        return emptyString();
501    long pos = static_cast<long>(FunRound::round(doublePos));
502    bool haveLength = argumentCount() == 3;
503    long len = -1;
504    if (haveLength) {
505        double doubleLen = argument(2).evaluate().toNumber();
506        if (std::isnan(doubleLen))
507            return emptyString();
508        len = static_cast<long>(FunRound::round(doubleLen));
509    }
510
511    if (pos > long(s.length()))
512        return emptyString();
513
514    if (pos < 1) {
515        if (haveLength) {
516            len -= 1 - pos;
517            if (len < 1)
518                return emptyString();
519        }
520        pos = 1;
521    }
522
523    return s.substring(pos - 1, len);
524}
525
526Value FunStringLength::evaluate() const
527{
528    if (!argumentCount())
529        return Value(Expression::evaluationContext().node.get()).toString().length();
530    return argument(0).evaluate().toString().length();
531}
532
533Value FunNormalizeSpace::evaluate() const
534{
535    if (!argumentCount()) {
536        String s = Value(Expression::evaluationContext().node.get()).toString();
537        return s.simplifyWhiteSpace();
538    }
539
540    String s = argument(0).evaluate().toString();
541    return s.simplifyWhiteSpace();
542}
543
544Value FunTranslate::evaluate() const
545{
546    String s1 = argument(0).evaluate().toString();
547    String s2 = argument(1).evaluate().toString();
548    String s3 = argument(2).evaluate().toString();
549    StringBuilder result;
550
551    for (unsigned i1 = 0; i1 < s1.length(); ++i1) {
552        UChar ch = s1[i1];
553        size_t i2 = s2.find(ch);
554
555        if (i2 == notFound)
556            result.append(ch);
557        else if (i2 < s3.length())
558            result.append(s3[i2]);
559    }
560
561    return result.toString();
562}
563
564Value FunBoolean::evaluate() const
565{
566    return argument(0).evaluate().toBoolean();
567}
568
569Value FunNot::evaluate() const
570{
571    return !argument(0).evaluate().toBoolean();
572}
573
574Value FunTrue::evaluate() const
575{
576    return true;
577}
578
579Value FunLang::evaluate() const
580{
581    String lang = argument(0).evaluate().toString();
582
583    const Attribute* languageAttribute = 0;
584    Node* node = evaluationContext().node.get();
585    while (node) {
586        if (node->isElementNode()) {
587            Element* element = toElement(node);
588            if (element->hasAttributes())
589                languageAttribute = element->findAttributeByName(XMLNames::langAttr);
590        }
591        if (languageAttribute)
592            break;
593        node = node->parentNode();
594    }
595
596    if (!languageAttribute)
597        return false;
598
599    String langValue = languageAttribute->value();
600    while (true) {
601        if (equalIgnoringCase(langValue, lang))
602            return true;
603
604        // Remove suffixes one by one.
605        size_t index = langValue.reverseFind('-');
606        if (index == notFound)
607            break;
608        langValue = langValue.left(index);
609    }
610
611    return false;
612}
613
614Value FunFalse::evaluate() const
615{
616    return false;
617}
618
619Value FunNumber::evaluate() const
620{
621    if (!argumentCount())
622        return Value(Expression::evaluationContext().node.get()).toNumber();
623    return argument(0).evaluate().toNumber();
624}
625
626Value FunSum::evaluate() const
627{
628    Value a = argument(0).evaluate();
629    if (!a.isNodeSet())
630        return 0.0;
631
632    double sum = 0.0;
633    const NodeSet& nodes = a.toNodeSet();
634    // To be really compliant, we should sort the node-set, as floating point addition is not associative.
635    // However, this is unlikely to ever become a practical issue, and sorting is slow.
636
637    for (unsigned i = 0; i < nodes.size(); i++)
638        sum += Value(stringValue(nodes[i])).toNumber();
639
640    return sum;
641}
642
643Value FunFloor::evaluate() const
644{
645    return floor(argument(0).evaluate().toNumber());
646}
647
648Value FunCeiling::evaluate() const
649{
650    return ceil(argument(0).evaluate().toNumber());
651}
652
653double FunRound::round(double val)
654{
655    if (!std::isnan(val) && !std::isinf(val)) {
656        if (std::signbit(val) && val >= -0.5)
657            val *= 0; // negative zero
658        else
659            val = floor(val + 0.5);
660    }
661    return val;
662}
663
664Value FunRound::evaluate() const
665{
666    return round(argument(0).evaluate().toNumber());
667}
668
669struct FunctionMapValue {
670    std::unique_ptr<Function> (*creationFunction)();
671    Interval argumentCountInterval;
672};
673
674static void populateFunctionMap(HashMap<String, FunctionMapValue>& functionMap)
675{
676    struct FunctionMapping {
677        const char* name;
678        FunctionMapValue function;
679    };
680
681    static const FunctionMapping functions[] = {
682        { "boolean", { createFunctionBoolean, 1 } },
683        { "ceiling", { createFunctionCeiling, 1 } },
684        { "concat", { createFunctionConcat, Interval(2, Interval::Inf) } },
685        { "contains", { createFunctionContains, 2 } },
686        { "count", { createFunctionCount, 1 } },
687        { "false", { createFunctionFalse, 0 } },
688        { "floor", { createFunctionFloor, 1 } },
689        { "id", { createFunctionId, 1 } },
690        { "lang", { createFunctionLang, 1 } },
691        { "last", { createFunctionLast, 0 } },
692        { "local-name", { createFunctionLocalName, Interval(0, 1) } },
693        { "name", { createFunctionName, Interval(0, 1) } },
694        { "namespace-uri", { createFunctionNamespaceURI, Interval(0, 1) } },
695        { "normalize-space", { createFunctionNormalizeSpace, Interval(0, 1) } },
696        { "not", { createFunctionNot, 1 } },
697        { "number", { createFunctionNumber, Interval(0, 1) } },
698        { "position", { createFunctionPosition, 0 } },
699        { "round", { createFunctionRound, 1 } },
700        { "starts-with", { createFunctionStartsWith, 2 } },
701        { "string", { createFunctionString, Interval(0, 1) } },
702        { "string-length", { createFunctionStringLength, Interval(0, 1) } },
703        { "substring", { createFunctionSubstring, Interval(2, 3) } },
704        { "substring-after", { createFunctionSubstringAfter, 2 } },
705        { "substring-before", { createFunctionSubstringBefore, 2 } },
706        { "sum", { createFunctionSum, 1 } },
707        { "translate", { createFunctionTranslate, 3 } },
708        { "true", { createFunctionTrue, 0 } },
709    };
710
711    for (size_t i = 0; i < WTF_ARRAY_LENGTH(functions); ++i)
712        functionMap.add(functions[i].name, functions[i].function);
713}
714
715std::unique_ptr<Function> Function::create(const String& name, unsigned numArguments)
716{
717    static NeverDestroyed<HashMap<String, FunctionMapValue>> functionMap;
718    if (functionMap.get().isEmpty())
719        populateFunctionMap(functionMap);
720
721    auto it = functionMap.get().find(name);
722    if (it == functionMap.get().end())
723        return nullptr;
724
725    if (!it->value.argumentCountInterval.contains(numArguments))
726        return nullptr;
727
728    return it->value.creationFunction();
729}
730
731std::unique_ptr<Function> Function::create(const String& name)
732{
733    return create(name, 0);
734}
735
736std::unique_ptr<Function> Function::create(const String& name, Vector<std::unique_ptr<Expression>> arguments)
737{
738    std::unique_ptr<Function> function = create(name, arguments.size());
739    if (function)
740        function->setArguments(name, WTF::move(arguments));
741    return function;
742}
743
744}
745}
746