1/*
2 *  Copyright (C) 2011 Apple Inc. All rights reserved.
3 *
4 *  This library is free software; you can redistribute it and/or
5 *  modify it under the terms of the GNU Library General Public
6 *  License as published by the Free Software Foundation; either
7 *  version 2 of the License, or (at your option) any later version.
8 *
9 *  This library is distributed in the hope that it will be useful,
10 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 *  Library General Public License for more details.
13 *
14 *  You should have received a copy of the GNU Library General Public License
15 *  along with this library; see the file COPYING.LIB.  If not, write to
16 *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17 *  Boston, MA 02110-1301, USA.
18 *
19 */
20
21#include "config.h"
22#include "RegExp.h"
23
24#include "APIShims.h"
25#include <wtf/CurrentTime.h>
26#include "InitializeThreading.h"
27#include "JSGlobalObject.h"
28#include "Operations.h"
29#include <errno.h>
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33#include <wtf/text/StringBuilder.h>
34
35#if !OS(WINDOWS)
36#include <unistd.h>
37#endif
38
39#if HAVE(SYS_TIME_H)
40#include <sys/time.h>
41#endif
42
43#if COMPILER(MSVC) && !OS(WINCE)
44#include <crtdbg.h>
45#include <mmsystem.h>
46#include <windows.h>
47#endif
48
49#if PLATFORM(QT)
50#include <QCoreApplication>
51#include <QDateTime>
52#endif
53
54const int MaxLineLength = 100 * 1024;
55
56using namespace JSC;
57using namespace WTF;
58
59struct CommandLine {
60    CommandLine()
61        : interactive(false)
62        , verbose(false)
63    {
64    }
65
66    bool interactive;
67    bool verbose;
68    Vector<String> arguments;
69    Vector<String> files;
70};
71
72class StopWatch {
73public:
74    void start();
75    void stop();
76    long getElapsedMS(); // call stop() first
77
78private:
79    double m_startTime;
80    double m_stopTime;
81};
82
83void StopWatch::start()
84{
85    m_startTime = currentTime();
86}
87
88void StopWatch::stop()
89{
90    m_stopTime = currentTime();
91}
92
93long StopWatch::getElapsedMS()
94{
95    return static_cast<long>((m_stopTime - m_startTime) * 1000);
96}
97
98struct RegExpTest {
99    RegExpTest()
100        : offset(0)
101        , result(0)
102    {
103    }
104
105    String subject;
106    int offset;
107    int result;
108    Vector<int, 32> expectVector;
109};
110
111class GlobalObject : public JSGlobalObject {
112private:
113    GlobalObject(VM&, Structure*, const Vector<String>& arguments);
114
115public:
116    typedef JSGlobalObject Base;
117
118    static GlobalObject* create(VM& vm, Structure* structure, const Vector<String>& arguments)
119    {
120        GlobalObject* globalObject = new (NotNull, allocateCell<GlobalObject>(vm.heap)) GlobalObject(vm, structure, arguments);
121        vm.heap.addFinalizer(globalObject, destroy);
122        return globalObject;
123    }
124
125    static const ClassInfo s_info;
126
127    static const bool needsDestructor = false;
128
129    static Structure* createStructure(VM& vm, JSValue prototype)
130    {
131        return Structure::create(vm, 0, prototype, TypeInfo(GlobalObjectType, StructureFlags), &s_info);
132    }
133
134protected:
135    void finishCreation(VM& vm, const Vector<String>& arguments)
136    {
137        Base::finishCreation(vm);
138        UNUSED_PARAM(arguments);
139    }
140};
141
142COMPILE_ASSERT(!IsInteger<GlobalObject>::value, WTF_IsInteger_GlobalObject_false);
143
144const ClassInfo GlobalObject::s_info = { "global", &JSGlobalObject::s_info, 0, ExecState::globalObjectTable, CREATE_METHOD_TABLE(GlobalObject) };
145
146GlobalObject::GlobalObject(VM& vm, Structure* structure, const Vector<String>& arguments)
147    : JSGlobalObject(vm, structure)
148{
149    finishCreation(vm, arguments);
150}
151
152// Use SEH for Release builds only to get rid of the crash report dialog
153// (luckily the same tests fail in Release and Debug builds so far). Need to
154// be in a separate main function because the realMain function requires object
155// unwinding.
156
157#if COMPILER(MSVC) && !COMPILER(INTEL) && !defined(_DEBUG) && !OS(WINCE)
158#define TRY       __try {
159#define EXCEPT(x) } __except (EXCEPTION_EXECUTE_HANDLER) { x; }
160#else
161#define TRY
162#define EXCEPT(x)
163#endif
164
165int realMain(int argc, char** argv);
166
167int main(int argc, char** argv)
168{
169#if OS(WINDOWS)
170#if !OS(WINCE)
171    // Cygwin calls ::SetErrorMode(SEM_FAILCRITICALERRORS), which we will inherit. This is bad for
172    // testing/debugging, as it causes the post-mortem debugger not to be invoked. We reset the
173    // error mode here to work around Cygwin's behavior. See <http://webkit.org/b/55222>.
174    ::SetErrorMode(0);
175#endif
176
177#if defined(_DEBUG)
178    _CrtSetReportFile(_CRT_WARN, _CRTDBG_FILE_STDERR);
179    _CrtSetReportMode(_CRT_WARN, _CRTDBG_MODE_FILE);
180    _CrtSetReportFile(_CRT_ERROR, _CRTDBG_FILE_STDERR);
181    _CrtSetReportMode(_CRT_ERROR, _CRTDBG_MODE_FILE);
182    _CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR);
183    _CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_FILE);
184#endif
185
186    timeBeginPeriod(1);
187#endif
188
189#if PLATFORM(QT)
190    QCoreApplication app(argc, argv);
191#endif
192
193    // Initialize JSC before getting VM.
194    JSC::initializeThreading();
195
196    // We can't use destructors in the following code because it uses Windows
197    // Structured Exception Handling
198    int res = 0;
199    TRY
200        res = realMain(argc, argv);
201    EXCEPT(res = 3)
202    return res;
203}
204
205static bool testOneRegExp(VM& vm, RegExp* regexp, RegExpTest* regExpTest, bool verbose, unsigned int lineNumber)
206{
207    bool result = true;
208    Vector<int, 32> outVector;
209    outVector.resize(regExpTest->expectVector.size());
210    int matchResult = regexp->match(vm, regExpTest->subject, regExpTest->offset, outVector);
211
212    if (matchResult != regExpTest->result) {
213        result = false;
214        if (verbose)
215            printf("Line %d: results mismatch - expected %d got %d\n", lineNumber, regExpTest->result, matchResult);
216    } else if (matchResult != -1) {
217        if (outVector.size() != regExpTest->expectVector.size()) {
218            result = false;
219            if (verbose)
220                printf("Line %d: output vector size mismatch - expected %lu got %lu\n", lineNumber, regExpTest->expectVector.size(), outVector.size());
221        } else if (outVector.size() % 2) {
222            result = false;
223            if (verbose)
224                printf("Line %d: output vector size is odd (%lu), should be even\n", lineNumber, outVector.size());
225        } else {
226            // Check in pairs since the first value of the pair could be -1 in which case the second doesn't matter.
227            size_t pairCount = outVector.size() / 2;
228            for (size_t i = 0; i < pairCount; ++i) {
229                size_t startIndex = i*2;
230                if (outVector[startIndex] != regExpTest->expectVector[startIndex]) {
231                    result = false;
232                    if (verbose)
233                        printf("Line %d: output vector mismatch at index %lu - expected %d got %d\n", lineNumber, startIndex, regExpTest->expectVector[startIndex], outVector[startIndex]);
234                }
235                if ((i > 0) && (regExpTest->expectVector[startIndex] != -1) && (outVector[startIndex+1] != regExpTest->expectVector[startIndex+1])) {
236                    result = false;
237                    if (verbose)
238                        printf("Line %d: output vector mismatch at index %lu - expected %d got %d\n", lineNumber, startIndex+1, regExpTest->expectVector[startIndex+1], outVector[startIndex+1]);
239                }
240            }
241        }
242    }
243
244    return result;
245}
246
247static int scanString(char* buffer, int bufferLength, StringBuilder& builder, char termChar)
248{
249    bool escape = false;
250
251    for (int i = 0; i < bufferLength; ++i) {
252        UChar c = buffer[i];
253
254        if (escape) {
255            switch (c) {
256            case '0':
257                c = '\0';
258                break;
259            case 'a':
260                c = '\a';
261                break;
262            case 'b':
263                c = '\b';
264                break;
265            case 'f':
266                c = '\f';
267                break;
268            case 'n':
269                c = '\n';
270                break;
271            case 'r':
272                c = '\r';
273                break;
274            case 't':
275                c = '\t';
276                break;
277            case 'v':
278                c = '\v';
279                break;
280            case '\\':
281                c = '\\';
282                break;
283            case '?':
284                c = '\?';
285                break;
286            case 'u':
287                if ((i + 4) >= bufferLength)
288                    return -1;
289                unsigned int charValue;
290                if (sscanf(buffer+i+1, "%04x", &charValue) != 1)
291                    return -1;
292                c = static_cast<UChar>(charValue);
293                i += 4;
294                break;
295            }
296
297            builder.append(c);
298            escape = false;
299        } else {
300            if (c == termChar)
301                return i;
302
303            if (c == '\\')
304                escape = true;
305            else
306                builder.append(c);
307        }
308    }
309
310    return -1;
311}
312
313static RegExp* parseRegExpLine(VM& vm, char* line, int lineLength)
314{
315    StringBuilder pattern;
316
317    if (line[0] != '/')
318        return 0;
319
320    int i = scanString(line + 1, lineLength - 1, pattern, '/') + 1;
321
322    if ((i >= lineLength) || (line[i] != '/'))
323        return 0;
324
325    ++i;
326
327    return RegExp::create(vm, pattern.toString(), regExpFlags(line + i));
328}
329
330static RegExpTest* parseTestLine(char* line, int lineLength)
331{
332    StringBuilder subjectString;
333
334    if ((line[0] != ' ') || (line[1] != '"'))
335        return 0;
336
337    int i = scanString(line + 2, lineLength - 2, subjectString, '"') + 2;
338
339    if ((i >= (lineLength - 2)) || (line[i] != '"') || (line[i+1] != ',') || (line[i+2] != ' '))
340        return 0;
341
342    i += 3;
343
344    int offset;
345
346    if (sscanf(line + i, "%d, ", &offset) != 1)
347        return 0;
348
349    while (line[i] && line[i] != ' ')
350        ++i;
351
352    ++i;
353
354    int matchResult;
355
356    if (sscanf(line + i, "%d, ", &matchResult) != 1)
357        return 0;
358
359    while (line[i] && line[i] != ' ')
360        ++i;
361
362    ++i;
363
364    if (line[i++] != '(')
365        return 0;
366
367    int start, end;
368
369    RegExpTest* result = new RegExpTest();
370
371    result->subject = subjectString.toString();
372    result->offset = offset;
373    result->result = matchResult;
374
375    while (line[i] && line[i] != ')') {
376        if (sscanf(line + i, "%d, %d", &start, &end) != 2) {
377            delete result;
378            return 0;
379        }
380
381        result->expectVector.append(start);
382        result->expectVector.append(end);
383
384        while (line[i] && (line[i] != ',') && (line[i] != ')'))
385            i++;
386        i++;
387        while (line[i] && (line[i] != ',') && (line[i] != ')'))
388            i++;
389
390        if (line[i] == ')')
391            break;
392        if (!line[i] || (line[i] != ',')) {
393            delete result;
394            return 0;
395        }
396        i++;
397    }
398
399    return result;
400}
401
402static bool runFromFiles(GlobalObject* globalObject, const Vector<String>& files, bool verbose)
403{
404    String script;
405    String fileName;
406    Vector<char> scriptBuffer;
407    unsigned tests = 0;
408    unsigned failures = 0;
409    char* lineBuffer = new char[MaxLineLength + 1];
410
411    VM& vm = globalObject->vm();
412
413    bool success = true;
414    for (size_t i = 0; i < files.size(); i++) {
415        FILE* testCasesFile = fopen(files[i].utf8().data(), "rb");
416
417        if (!testCasesFile) {
418            printf("Unable to open test data file \"%s\"\n", files[i].utf8().data());
419            continue;
420        }
421
422        RegExp* regexp = 0;
423        size_t lineLength = 0;
424        char* linePtr = 0;
425        unsigned int lineNumber = 0;
426
427        while ((linePtr = fgets(&lineBuffer[0], MaxLineLength, testCasesFile))) {
428            lineLength = strlen(linePtr);
429            if (linePtr[lineLength - 1] == '\n') {
430                linePtr[lineLength - 1] = '\0';
431                --lineLength;
432            }
433            ++lineNumber;
434
435            if (linePtr[0] == '#')
436                continue;
437
438            if (linePtr[0] == '/') {
439                regexp = parseRegExpLine(vm, linePtr, lineLength);
440            } else if (linePtr[0] == ' ') {
441                RegExpTest* regExpTest = parseTestLine(linePtr, lineLength);
442
443                if (regexp && regExpTest) {
444                    ++tests;
445                    if (!testOneRegExp(vm, regexp, regExpTest, verbose, lineNumber)) {
446                        failures++;
447                        printf("Failure on line %u\n", lineNumber);
448                    }
449                }
450
451                if (regExpTest)
452                    delete regExpTest;
453            }
454        }
455
456        fclose(testCasesFile);
457    }
458
459    if (failures)
460        printf("%u tests run, %u failures\n", tests, failures);
461    else
462        printf("%u tests passed\n", tests);
463
464    delete[] lineBuffer;
465
466    vm.dumpSampleData(globalObject->globalExec());
467#if ENABLE(REGEXP_TRACING)
468    vm.dumpRegExpTrace();
469#endif
470    return success;
471}
472
473#define RUNNING_FROM_XCODE 0
474
475static NO_RETURN void printUsageStatement(bool help = false)
476{
477    fprintf(stderr, "Usage: regexp_test [options] file\n");
478    fprintf(stderr, "  -h|--help  Prints this help message\n");
479    fprintf(stderr, "  -v|--verbose  Verbose output\n");
480
481    exit(help ? EXIT_SUCCESS : EXIT_FAILURE);
482}
483
484static void parseArguments(int argc, char** argv, CommandLine& options)
485{
486    int i = 1;
487    for (; i < argc; ++i) {
488        const char* arg = argv[i];
489        if (!strcmp(arg, "-h") || !strcmp(arg, "--help"))
490            printUsageStatement(true);
491        if (!strcmp(arg, "-v") || !strcmp(arg, "--verbose"))
492            options.verbose = true;
493        else
494            options.files.append(argv[i]);
495    }
496
497    for (; i < argc; ++i)
498        options.arguments.append(argv[i]);
499}
500
501int realMain(int argc, char** argv)
502{
503    VM* vm = VM::create(LargeHeap).leakRef();
504    APIEntryShim shim(vm);
505
506    CommandLine options;
507    parseArguments(argc, argv, options);
508
509    GlobalObject* globalObject = GlobalObject::create(*vm, GlobalObject::createStructure(*vm, jsNull()), options.arguments);
510    bool success = runFromFiles(globalObject, options.files, options.verbose);
511
512    return success ? 0 : 3;
513}
514