1/*
2 *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 *  Copyright (C) 2003, 2007, 2008, 2012 Apple Inc. All Rights Reserved.
4 *
5 *  This library is free software; you can redistribute it and/or
6 *  modify it under the terms of the GNU Lesser General Public
7 *  License as published by the Free Software Foundation; either
8 *  version 2 of the License, or (at your option) any later version.
9 *
10 *  This library is distributed in the hope that it will be useful,
11 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 *  Lesser General Public License for more details.
14 *
15 *  You should have received a copy of the GNU Lesser General Public
16 *  License along with this library; if not, write to the Free Software
17 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18 *
19 */
20
21#include "config.h"
22#include "RegExpObject.h"
23
24#include "ButterflyInlines.h"
25#include "CopiedSpaceInlines.h"
26#include "Error.h"
27#include "ExceptionHelpers.h"
28#include "JSArray.h"
29#include "JSGlobalObject.h"
30#include "JSString.h"
31#include "Lexer.h"
32#include "Lookup.h"
33#include "JSCInlines.h"
34#include "RegExpConstructor.h"
35#include "RegExpMatchesArray.h"
36#include "RegExpPrototype.h"
37#include <wtf/PassOwnPtr.h>
38#include <wtf/text/StringBuilder.h>
39
40namespace JSC {
41
42static EncodedJSValue regExpObjectGlobal(ExecState*, JSObject*, EncodedJSValue, PropertyName);
43static EncodedJSValue regExpObjectIgnoreCase(ExecState*, JSObject*, EncodedJSValue, PropertyName);
44static EncodedJSValue regExpObjectMultiline(ExecState*, JSObject*, EncodedJSValue, PropertyName);
45static EncodedJSValue regExpObjectSource(ExecState*, JSObject*, EncodedJSValue, PropertyName);
46
47} // namespace JSC
48
49#include "RegExpObject.lut.h"
50
51namespace JSC {
52
53STATIC_ASSERT_IS_TRIVIALLY_DESTRUCTIBLE(RegExpObject);
54
55const ClassInfo RegExpObject::s_info = { "RegExp", &Base::s_info, 0, ExecState::regExpTable, CREATE_METHOD_TABLE(RegExpObject) };
56
57/* Source for RegExpObject.lut.h
58@begin regExpTable
59    global        regExpObjectGlobal       DontDelete|ReadOnly|DontEnum
60    ignoreCase    regExpObjectIgnoreCase   DontDelete|ReadOnly|DontEnum
61    multiline     regExpObjectMultiline    DontDelete|ReadOnly|DontEnum
62    source        regExpObjectSource       DontDelete|ReadOnly|DontEnum
63@end
64*/
65
66RegExpObject::RegExpObject(VM& vm, Structure* structure, RegExp* regExp)
67    : JSNonFinalObject(vm, structure)
68    , m_regExp(vm, this, regExp)
69    , m_lastIndexIsWritable(true)
70{
71    m_lastIndex.setWithoutWriteBarrier(jsNumber(0));
72}
73
74void RegExpObject::finishCreation(VM& vm)
75{
76    Base::finishCreation(vm);
77    ASSERT(inherits(info()));
78}
79
80void RegExpObject::visitChildren(JSCell* cell, SlotVisitor& visitor)
81{
82    RegExpObject* thisObject = jsCast<RegExpObject*>(cell);
83    ASSERT_GC_OBJECT_INHERITS(thisObject, info());
84    COMPILE_ASSERT(StructureFlags & OverridesVisitChildren, OverridesVisitChildrenWithoutSettingFlag);
85    ASSERT(thisObject->structure()->typeInfo().overridesVisitChildren());
86
87    Base::visitChildren(thisObject, visitor);
88    visitor.append(&thisObject->m_regExp);
89    visitor.append(&thisObject->m_lastIndex);
90}
91
92bool RegExpObject::getOwnPropertySlot(JSObject* object, ExecState* exec, PropertyName propertyName, PropertySlot& slot)
93{
94    if (propertyName == exec->propertyNames().lastIndex) {
95        RegExpObject* regExp = asRegExpObject(object);
96        unsigned attributes = regExp->m_lastIndexIsWritable ? DontDelete | DontEnum : DontDelete | DontEnum | ReadOnly;
97        slot.setValue(regExp, attributes, regExp->getLastIndex());
98        return true;
99    }
100    return getStaticValueSlot<RegExpObject, JSObject>(exec, ExecState::regExpTable(exec->vm()), jsCast<RegExpObject*>(object), propertyName, slot);
101}
102
103bool RegExpObject::deleteProperty(JSCell* cell, ExecState* exec, PropertyName propertyName)
104{
105    if (propertyName == exec->propertyNames().lastIndex)
106        return false;
107    return Base::deleteProperty(cell, exec, propertyName);
108}
109
110void RegExpObject::getOwnNonIndexPropertyNames(JSObject* object, ExecState* exec, PropertyNameArray& propertyNames, EnumerationMode mode)
111{
112    if (mode == IncludeDontEnumProperties)
113        propertyNames.add(exec->propertyNames().lastIndex);
114    Base::getOwnNonIndexPropertyNames(object, exec, propertyNames, mode);
115}
116
117void RegExpObject::getPropertyNames(JSObject* object, ExecState* exec, PropertyNameArray& propertyNames, EnumerationMode mode)
118{
119    if (mode == IncludeDontEnumProperties)
120        propertyNames.add(exec->propertyNames().lastIndex);
121    Base::getPropertyNames(object, exec, propertyNames, mode);
122}
123
124static bool reject(ExecState* exec, bool throwException, const char* message)
125{
126    if (throwException)
127        throwTypeError(exec, ASCIILiteral(message));
128    return false;
129}
130
131bool RegExpObject::defineOwnProperty(JSObject* object, ExecState* exec, PropertyName propertyName, const PropertyDescriptor& descriptor, bool shouldThrow)
132{
133    if (propertyName == exec->propertyNames().lastIndex) {
134        RegExpObject* regExp = asRegExpObject(object);
135        if (descriptor.configurablePresent() && descriptor.configurable())
136            return reject(exec, shouldThrow, "Attempting to change configurable attribute of unconfigurable property.");
137        if (descriptor.enumerablePresent() && descriptor.enumerable())
138            return reject(exec, shouldThrow, "Attempting to change enumerable attribute of unconfigurable property.");
139        if (descriptor.isAccessorDescriptor())
140            return reject(exec, shouldThrow, "Attempting to change access mechanism for an unconfigurable property.");
141        if (!regExp->m_lastIndexIsWritable) {
142            if (descriptor.writablePresent() && descriptor.writable())
143                return reject(exec, shouldThrow, "Attempting to change writable attribute of unconfigurable property.");
144            if (!sameValue(exec, regExp->getLastIndex(), descriptor.value()))
145                return reject(exec, shouldThrow, "Attempting to change value of a readonly property.");
146            return true;
147        }
148        if (descriptor.writablePresent() && !descriptor.writable())
149            regExp->m_lastIndexIsWritable = false;
150        if (descriptor.value())
151            regExp->setLastIndex(exec, descriptor.value(), false);
152        return true;
153    }
154
155    return Base::defineOwnProperty(object, exec, propertyName, descriptor, shouldThrow);
156}
157
158EncodedJSValue regExpObjectGlobal(ExecState*, JSObject* slotBase, EncodedJSValue, PropertyName)
159{
160    return JSValue::encode(jsBoolean(asRegExpObject(slotBase)->regExp()->global()));
161}
162
163EncodedJSValue regExpObjectIgnoreCase(ExecState*, JSObject* slotBase, EncodedJSValue, PropertyName)
164{
165    return JSValue::encode(jsBoolean(asRegExpObject(slotBase)->regExp()->ignoreCase()));
166}
167
168EncodedJSValue regExpObjectMultiline(ExecState*, JSObject* slotBase, EncodedJSValue, PropertyName)
169{
170    return JSValue::encode(jsBoolean(asRegExpObject(slotBase)->regExp()->multiline()));
171}
172
173template <typename CharacterType>
174static inline void appendLineTerminatorEscape(StringBuilder&, CharacterType);
175
176template <>
177inline void appendLineTerminatorEscape<LChar>(StringBuilder& builder, LChar lineTerminator)
178{
179    if (lineTerminator == '\n')
180        builder.append('n');
181    else
182        builder.append('r');
183}
184
185template <>
186inline void appendLineTerminatorEscape<UChar>(StringBuilder& builder, UChar lineTerminator)
187{
188    if (lineTerminator == '\n')
189        builder.append('n');
190    else if (lineTerminator == '\r')
191        builder.append('r');
192    else if (lineTerminator == 0x2028)
193        builder.appendLiteral("u2028");
194    else
195        builder.appendLiteral("u2029");
196}
197
198template <typename CharacterType>
199static inline JSValue regExpObjectSourceInternal(ExecState* exec, String pattern, const CharacterType* characters, unsigned length)
200{
201    bool previousCharacterWasBackslash = false;
202    bool inBrackets = false;
203    bool shouldEscape = false;
204
205    // 15.10.6.4 specifies that RegExp.prototype.toString must return '/' + source + '/',
206    // and also states that the result must be a valid RegularExpressionLiteral. '//' is
207    // not a valid RegularExpressionLiteral (since it is a single line comment), and hence
208    // source cannot ever validly be "". If the source is empty, return a different Pattern
209    // that would match the same thing.
210    if (!length)
211        return jsNontrivialString(exec, ASCIILiteral("(?:)"));
212
213    // early return for strings that don't contain a forwards slash and LineTerminator
214    for (unsigned i = 0; i < length; ++i) {
215        CharacterType ch = characters[i];
216        if (!previousCharacterWasBackslash) {
217            if (inBrackets) {
218                if (ch == ']')
219                    inBrackets = false;
220            } else {
221                if (ch == '/') {
222                    shouldEscape = true;
223                    break;
224                }
225                if (ch == '[')
226                    inBrackets = true;
227            }
228        }
229
230        if (Lexer<CharacterType>::isLineTerminator(ch)) {
231            shouldEscape = true;
232            break;
233        }
234
235        if (previousCharacterWasBackslash)
236            previousCharacterWasBackslash = false;
237        else
238            previousCharacterWasBackslash = ch == '\\';
239    }
240
241    if (!shouldEscape)
242        return jsString(exec, pattern);
243
244    previousCharacterWasBackslash = false;
245    inBrackets = false;
246    StringBuilder result;
247    for (unsigned i = 0; i < length; ++i) {
248        CharacterType ch = characters[i];
249        if (!previousCharacterWasBackslash) {
250            if (inBrackets) {
251                if (ch == ']')
252                    inBrackets = false;
253            } else {
254                if (ch == '/')
255                    result.append('\\');
256                else if (ch == '[')
257                    inBrackets = true;
258            }
259        }
260
261        // escape LineTerminator
262        if (Lexer<CharacterType>::isLineTerminator(ch)) {
263            if (!previousCharacterWasBackslash)
264                result.append('\\');
265
266            appendLineTerminatorEscape<CharacterType>(result, ch);
267        } else
268            result.append(ch);
269
270        if (previousCharacterWasBackslash)
271            previousCharacterWasBackslash = false;
272        else
273            previousCharacterWasBackslash = ch == '\\';
274    }
275
276    return jsString(exec, result.toString());
277}
278
279
280
281EncodedJSValue regExpObjectSource(ExecState* exec, JSObject* slotBase, EncodedJSValue, PropertyName)
282{
283    String pattern = asRegExpObject(slotBase)->regExp()->pattern();
284    if (pattern.is8Bit())
285        return JSValue::encode(regExpObjectSourceInternal(exec, pattern, pattern.characters8(), pattern.length()));
286    return JSValue::encode(regExpObjectSourceInternal(exec, pattern, pattern.characters16(), pattern.length()));
287}
288
289void RegExpObject::put(JSCell* cell, ExecState* exec, PropertyName propertyName, JSValue value, PutPropertySlot& slot)
290{
291    if (propertyName == exec->propertyNames().lastIndex) {
292        asRegExpObject(cell)->setLastIndex(exec, value, slot.isStrictMode());
293        return;
294    }
295    Base::put(cell, exec, propertyName, value, slot);
296}
297
298JSValue RegExpObject::exec(ExecState* exec, JSString* string)
299{
300    if (MatchResult result = match(exec, string))
301        return RegExpMatchesArray::create(exec, string, regExp(), result);
302    return jsNull();
303}
304
305// Shared implementation used by test and exec.
306MatchResult RegExpObject::match(ExecState* exec, JSString* string)
307{
308    RegExp* regExp = this->regExp();
309    RegExpConstructor* regExpConstructor = exec->lexicalGlobalObject()->regExpConstructor();
310    String input = string->value(exec);
311    VM& vm = exec->vm();
312    if (!regExp->global())
313        return regExpConstructor->performMatch(vm, regExp, string, input, 0);
314
315    JSValue jsLastIndex = getLastIndex();
316    unsigned lastIndex;
317    if (LIKELY(jsLastIndex.isUInt32())) {
318        lastIndex = jsLastIndex.asUInt32();
319        if (lastIndex > input.length()) {
320            setLastIndex(exec, 0);
321            return MatchResult::failed();
322        }
323    } else {
324        double doubleLastIndex = jsLastIndex.toInteger(exec);
325        if (doubleLastIndex < 0 || doubleLastIndex > input.length()) {
326            setLastIndex(exec, 0);
327            return MatchResult::failed();
328        }
329        lastIndex = static_cast<unsigned>(doubleLastIndex);
330    }
331
332    MatchResult result = regExpConstructor->performMatch(vm, regExp, string, input, lastIndex);
333    setLastIndex(exec, result.end);
334    return result;
335}
336
337} // namespace JSC
338