1/*
2 * Copyright (C) 2011 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above
12 * copyright notice, this list of conditions and the following disclaimer
13 * in the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. AND ITS CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC.
20 * OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "config.h"
30#include "ContentSearchUtilities.h"
31
32#if ENABLE(INSPECTOR)
33
34#include "InspectorJSTypeBuilders.h"
35#include "InspectorValues.h"
36#include "RegularExpression.h"
37#include "Yarr.h"
38#include <wtf/BumpPointerAllocator.h>
39#include <wtf/StdLibExtras.h>
40#include <wtf/text/StringBuilder.h>
41
42using namespace JSC::Yarr;
43
44namespace Inspector {
45namespace ContentSearchUtilities {
46
47static const char regexSpecialCharacters[] = "[](){}+-*.,?\\^$|";
48
49static String createSearchRegexSource(const String& text)
50{
51    StringBuilder result;
52
53    for (unsigned i = 0; i < text.length(); i++) {
54        UChar character = text[i];
55        if (isASCII(character) && strchr(regexSpecialCharacters, character))
56            result.append('\\');
57        result.append(character);
58    }
59
60    return result.toString();
61}
62
63static inline size_t sizetExtractor(const size_t* value)
64{
65    return *value;
66}
67
68TextPosition textPositionFromOffset(size_t offset, const Vector<size_t>& lineEndings)
69{
70    const size_t* foundNextStart = approximateBinarySearch<size_t, size_t>(lineEndings, lineEndings.size(), offset, sizetExtractor);
71    size_t lineIndex = foundNextStart - &lineEndings.at(0);
72    if (offset >= *foundNextStart)
73        ++lineIndex;
74    size_t lineStartOffset = lineIndex > 0 ? lineEndings.at(lineIndex - 1) : 0;
75    size_t column = offset - lineStartOffset;
76    return TextPosition(OrdinalNumber::fromZeroBasedInt(lineIndex), OrdinalNumber::fromZeroBasedInt(column));
77}
78
79static Vector<std::pair<size_t, String>> getRegularExpressionMatchesByLines(const JSC::Yarr::RegularExpression& regex, const String& text)
80{
81    Vector<std::pair<size_t, String>> result;
82    if (text.isEmpty())
83        return result;
84
85    std::unique_ptr<Vector<size_t>> endings(lineEndings(text));
86    size_t size = endings->size();
87    size_t start = 0;
88
89    for (size_t lineNumber = 0; lineNumber < size; ++lineNumber) {
90        size_t nextStart = endings->at(lineNumber);
91        String line = text.substring(start, nextStart - start);
92
93        int matchLength;
94        if (regex.match(line, 0, &matchLength) != -1)
95            result.append(std::pair<size_t, String>(lineNumber, line));
96
97        start = nextStart;
98    }
99
100    return result;
101}
102
103std::unique_ptr<Vector<size_t>> lineEndings(const String& text)
104{
105    auto result = std::make_unique<Vector<size_t>>();
106
107    size_t start = 0;
108    while (start < text.length()) {
109        size_t nextStart = text.findNextLineStart(start);
110        if (nextStart == notFound) {
111            result->append(text.length());
112            break;
113        }
114
115        result->append(nextStart);
116        start = nextStart;
117    }
118
119    result->append(text.length());
120
121    return result;
122}
123
124static PassRefPtr<Inspector::TypeBuilder::GenericTypes::SearchMatch> buildObjectForSearchMatch(size_t lineNumber, const String& lineContent)
125{
126    return Inspector::TypeBuilder::GenericTypes::SearchMatch::create()
127        .setLineNumber(lineNumber)
128        .setLineContent(lineContent)
129        .release();
130}
131
132JSC::Yarr::RegularExpression createSearchRegex(const String& query, bool caseSensitive, bool isRegex)
133{
134    String regexSource = isRegex ? query : createSearchRegexSource(query);
135    return JSC::Yarr::RegularExpression(regexSource, caseSensitive ? TextCaseSensitive : TextCaseInsensitive);
136}
137
138int countRegularExpressionMatches(const JSC::Yarr::RegularExpression& regex, const String& content)
139{
140    if (content.isEmpty())
141        return 0;
142
143    int result = 0;
144    int position;
145    unsigned start = 0;
146    int matchLength;
147    while ((position = regex.match(content, start, &matchLength)) != -1) {
148        if (start >= content.length())
149            break;
150        if (matchLength > 0)
151            ++result;
152        start = position + 1;
153    }
154    return result;
155}
156
157PassRefPtr<Inspector::TypeBuilder::Array<Inspector::TypeBuilder::GenericTypes::SearchMatch>> searchInTextByLines(const String& text, const String& query, const bool caseSensitive, const bool isRegex)
158{
159    RefPtr<Inspector::TypeBuilder::Array<Inspector::TypeBuilder::GenericTypes::SearchMatch>> result = Inspector::TypeBuilder::Array<Inspector::TypeBuilder::GenericTypes::SearchMatch>::create();
160
161    JSC::Yarr::RegularExpression regex = ContentSearchUtilities::createSearchRegex(query, caseSensitive, isRegex);
162    Vector<std::pair<size_t, String>> matches = getRegularExpressionMatchesByLines(regex, text);
163
164    for (const auto& match : matches)
165        result->addItem(buildObjectForSearchMatch(match.first, match.second));
166
167    return result;
168}
169
170static String scriptCommentPattern(const String& name)
171{
172    // "//# <name>=<value>" and deprecated "//@"
173    return "//[#@][\040\t]" + name + "=[\040\t]*([^\\s\'\"]*)[\040\t]*$";
174}
175
176static String stylesheetCommentPattern(const String& name)
177{
178    // "/*# <name>=<value> */" and deprecated "/*@"
179    return "/\\*[#@][\040\t]" + name + "=[\040\t]*([^\\s\'\"]*)[\040\t]*\\*/";
180}
181
182static String findMagicComment(const String& content, const String& patternString)
183{
184    const char* error = nullptr;
185    JSC::Yarr::YarrPattern pattern(patternString, false, true, &error);
186    ASSERT(!error);
187    BumpPointerAllocator regexAllocator;
188    OwnPtr<JSC::Yarr::BytecodePattern> bytecodePattern = JSC::Yarr::byteCompile(pattern, &regexAllocator);
189    ASSERT(bytecodePattern);
190
191    ASSERT(pattern.m_numSubpatterns == 1);
192    Vector<int, 4> matches;
193    matches.resize(4);
194    unsigned result = JSC::Yarr::interpret(bytecodePattern.get(), content, 0, reinterpret_cast<unsigned*>(matches.data()));
195    if (result == JSC::Yarr::offsetNoMatch)
196        return String();
197
198    ASSERT(matches[2] > 0 && matches[3] > 0);
199    return content.substring(matches[2], matches[3] - matches[2]);
200}
201
202String findScriptSourceURL(const String& content)
203{
204    return findMagicComment(content, scriptCommentPattern(ASCIILiteral("sourceURL")));
205}
206
207String findScriptSourceMapURL(const String& content)
208{
209    return findMagicComment(content, scriptCommentPattern(ASCIILiteral("sourceMappingURL")));
210}
211
212String findStylesheetSourceMapURL(const String& content)
213{
214    return findMagicComment(content, stylesheetCommentPattern(ASCIILiteral("sourceMappingURL")));
215}
216
217} // namespace ContentSearchUtilities
218} // namespace Inspector
219
220#endif // ENABLE(INSPECTOR)
221