1/* 2 * Copyright (C) 2011 Google Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are 6 * met: 7 * 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 11 * 2. Redistributions in binary form must reproduce the above 12 * copyright notice, this list of conditions and the following disclaimer 13 * in the documentation and/or other materials provided with the 14 * distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. AND ITS CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. 20 * OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include "config.h" 30#include "ContentSearchUtilities.h" 31 32#if ENABLE(INSPECTOR) 33 34#include "InspectorJSTypeBuilders.h" 35#include "InspectorValues.h" 36#include "RegularExpression.h" 37#include "Yarr.h" 38#include <wtf/BumpPointerAllocator.h> 39#include <wtf/StdLibExtras.h> 40#include <wtf/text/StringBuilder.h> 41 42using namespace JSC::Yarr; 43 44namespace Inspector { 45namespace ContentSearchUtilities { 46 47static const char regexSpecialCharacters[] = "[](){}+-*.,?\\^$|"; 48 49static String createSearchRegexSource(const String& text) 50{ 51 StringBuilder result; 52 53 for (unsigned i = 0; i < text.length(); i++) { 54 UChar character = text[i]; 55 if (isASCII(character) && strchr(regexSpecialCharacters, character)) 56 result.append('\\'); 57 result.append(character); 58 } 59 60 return result.toString(); 61} 62 63static inline size_t sizetExtractor(const size_t* value) 64{ 65 return *value; 66} 67 68TextPosition textPositionFromOffset(size_t offset, const Vector<size_t>& lineEndings) 69{ 70 const size_t* foundNextStart = approximateBinarySearch<size_t, size_t>(lineEndings, lineEndings.size(), offset, sizetExtractor); 71 size_t lineIndex = foundNextStart - &lineEndings.at(0); 72 if (offset >= *foundNextStart) 73 ++lineIndex; 74 size_t lineStartOffset = lineIndex > 0 ? lineEndings.at(lineIndex - 1) : 0; 75 size_t column = offset - lineStartOffset; 76 return TextPosition(OrdinalNumber::fromZeroBasedInt(lineIndex), OrdinalNumber::fromZeroBasedInt(column)); 77} 78 79static Vector<std::pair<size_t, String>> getRegularExpressionMatchesByLines(const JSC::Yarr::RegularExpression& regex, const String& text) 80{ 81 Vector<std::pair<size_t, String>> result; 82 if (text.isEmpty()) 83 return result; 84 85 std::unique_ptr<Vector<size_t>> endings(lineEndings(text)); 86 size_t size = endings->size(); 87 size_t start = 0; 88 89 for (size_t lineNumber = 0; lineNumber < size; ++lineNumber) { 90 size_t nextStart = endings->at(lineNumber); 91 String line = text.substring(start, nextStart - start); 92 93 int matchLength; 94 if (regex.match(line, 0, &matchLength) != -1) 95 result.append(std::pair<size_t, String>(lineNumber, line)); 96 97 start = nextStart; 98 } 99 100 return result; 101} 102 103std::unique_ptr<Vector<size_t>> lineEndings(const String& text) 104{ 105 auto result = std::make_unique<Vector<size_t>>(); 106 107 size_t start = 0; 108 while (start < text.length()) { 109 size_t nextStart = text.findNextLineStart(start); 110 if (nextStart == notFound) { 111 result->append(text.length()); 112 break; 113 } 114 115 result->append(nextStart); 116 start = nextStart; 117 } 118 119 result->append(text.length()); 120 121 return result; 122} 123 124static PassRefPtr<Inspector::TypeBuilder::GenericTypes::SearchMatch> buildObjectForSearchMatch(size_t lineNumber, const String& lineContent) 125{ 126 return Inspector::TypeBuilder::GenericTypes::SearchMatch::create() 127 .setLineNumber(lineNumber) 128 .setLineContent(lineContent) 129 .release(); 130} 131 132JSC::Yarr::RegularExpression createSearchRegex(const String& query, bool caseSensitive, bool isRegex) 133{ 134 String regexSource = isRegex ? query : createSearchRegexSource(query); 135 return JSC::Yarr::RegularExpression(regexSource, caseSensitive ? TextCaseSensitive : TextCaseInsensitive); 136} 137 138int countRegularExpressionMatches(const JSC::Yarr::RegularExpression& regex, const String& content) 139{ 140 if (content.isEmpty()) 141 return 0; 142 143 int result = 0; 144 int position; 145 unsigned start = 0; 146 int matchLength; 147 while ((position = regex.match(content, start, &matchLength)) != -1) { 148 if (start >= content.length()) 149 break; 150 if (matchLength > 0) 151 ++result; 152 start = position + 1; 153 } 154 return result; 155} 156 157PassRefPtr<Inspector::TypeBuilder::Array<Inspector::TypeBuilder::GenericTypes::SearchMatch>> searchInTextByLines(const String& text, const String& query, const bool caseSensitive, const bool isRegex) 158{ 159 RefPtr<Inspector::TypeBuilder::Array<Inspector::TypeBuilder::GenericTypes::SearchMatch>> result = Inspector::TypeBuilder::Array<Inspector::TypeBuilder::GenericTypes::SearchMatch>::create(); 160 161 JSC::Yarr::RegularExpression regex = ContentSearchUtilities::createSearchRegex(query, caseSensitive, isRegex); 162 Vector<std::pair<size_t, String>> matches = getRegularExpressionMatchesByLines(regex, text); 163 164 for (const auto& match : matches) 165 result->addItem(buildObjectForSearchMatch(match.first, match.second)); 166 167 return result; 168} 169 170static String scriptCommentPattern(const String& name) 171{ 172 // "//# <name>=<value>" and deprecated "//@" 173 return "//[#@][\040\t]" + name + "=[\040\t]*([^\\s\'\"]*)[\040\t]*$"; 174} 175 176static String stylesheetCommentPattern(const String& name) 177{ 178 // "/*# <name>=<value> */" and deprecated "/*@" 179 return "/\\*[#@][\040\t]" + name + "=[\040\t]*([^\\s\'\"]*)[\040\t]*\\*/"; 180} 181 182static String findMagicComment(const String& content, const String& patternString) 183{ 184 const char* error = nullptr; 185 JSC::Yarr::YarrPattern pattern(patternString, false, true, &error); 186 ASSERT(!error); 187 BumpPointerAllocator regexAllocator; 188 OwnPtr<JSC::Yarr::BytecodePattern> bytecodePattern = JSC::Yarr::byteCompile(pattern, ®exAllocator); 189 ASSERT(bytecodePattern); 190 191 ASSERT(pattern.m_numSubpatterns == 1); 192 Vector<int, 4> matches; 193 matches.resize(4); 194 unsigned result = JSC::Yarr::interpret(bytecodePattern.get(), content, 0, reinterpret_cast<unsigned*>(matches.data())); 195 if (result == JSC::Yarr::offsetNoMatch) 196 return String(); 197 198 ASSERT(matches[2] > 0 && matches[3] > 0); 199 return content.substring(matches[2], matches[3] - matches[2]); 200} 201 202String findScriptSourceURL(const String& content) 203{ 204 return findMagicComment(content, scriptCommentPattern(ASCIILiteral("sourceURL"))); 205} 206 207String findScriptSourceMapURL(const String& content) 208{ 209 return findMagicComment(content, scriptCommentPattern(ASCIILiteral("sourceMappingURL"))); 210} 211 212String findStylesheetSourceMapURL(const String& content) 213{ 214 return findMagicComment(content, stylesheetCommentPattern(ASCIILiteral("sourceMappingURL"))); 215} 216 217} // namespace ContentSearchUtilities 218} // namespace Inspector 219 220#endif // ENABLE(INSPECTOR) 221