1/*
2 *  Copyright (C) 2011 Apple Inc. All rights reserved.
3 *
4 *  This library is free software; you can redistribute it and/or
5 *  modify it under the terms of the GNU Library General Public
6 *  License as published by the Free Software Foundation; either
7 *  version 2 of the License, or (at your option) any later version.
8 *
9 *  This library is distributed in the hope that it will be useful,
10 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 *  Library General Public License for more details.
13 *
14 *  You should have received a copy of the GNU Library General Public License
15 *  along with this library; see the file COPYING.LIB.  If not, write to
16 *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17 *  Boston, MA 02110-1301, USA.
18 *
19 */
20
21#include "config.h"
22#include "RegExp.h"
23
24#include <wtf/CurrentTime.h>
25#include "InitializeThreading.h"
26#include "JSCInlines.h"
27#include "JSGlobalObject.h"
28#include <errno.h>
29#include <stdio.h>
30#include <stdlib.h>
31#include <string.h>
32#include <wtf/text/StringBuilder.h>
33
34#if !OS(WINDOWS)
35#include <unistd.h>
36#endif
37
38#if HAVE(SYS_TIME_H)
39#include <sys/time.h>
40#endif
41
42#if COMPILER(MSVC) && !OS(WINCE)
43#include <crtdbg.h>
44#include <mmsystem.h>
45#include <windows.h>
46#endif
47
48const int MaxLineLength = 100 * 1024;
49
50using namespace JSC;
51using namespace WTF;
52
53struct CommandLine {
54    CommandLine()
55        : interactive(false)
56        , verbose(false)
57    {
58    }
59
60    bool interactive;
61    bool verbose;
62    Vector<String> arguments;
63    Vector<String> files;
64};
65
66class StopWatch {
67public:
68    void start();
69    void stop();
70    long getElapsedMS(); // call stop() first
71
72private:
73    double m_startTime;
74    double m_stopTime;
75};
76
77void StopWatch::start()
78{
79    m_startTime = monotonicallyIncreasingTime();
80}
81
82void StopWatch::stop()
83{
84    m_stopTime = monotonicallyIncreasingTime();
85}
86
87long StopWatch::getElapsedMS()
88{
89    return static_cast<long>((m_stopTime - m_startTime) * 1000);
90}
91
92struct RegExpTest {
93    RegExpTest()
94        : offset(0)
95        , result(0)
96    {
97    }
98
99    String subject;
100    int offset;
101    int result;
102    Vector<int, 32> expectVector;
103};
104
105class GlobalObject : public JSGlobalObject {
106private:
107    GlobalObject(VM&, Structure*, const Vector<String>& arguments);
108
109public:
110    typedef JSGlobalObject Base;
111
112    static GlobalObject* create(VM& vm, Structure* structure, const Vector<String>& arguments)
113    {
114        GlobalObject* globalObject = new (NotNull, allocateCell<GlobalObject>(vm.heap)) GlobalObject(vm, structure, arguments);
115        vm.heap.addFinalizer(globalObject, destroy);
116        return globalObject;
117    }
118
119    DECLARE_INFO;
120
121    static const bool needsDestructor = false;
122
123    static Structure* createStructure(VM& vm, JSValue prototype)
124    {
125        return Structure::create(vm, 0, prototype, TypeInfo(GlobalObjectType, StructureFlags), info());
126    }
127
128protected:
129    void finishCreation(VM& vm, const Vector<String>& arguments)
130    {
131        Base::finishCreation(vm);
132        UNUSED_PARAM(arguments);
133    }
134};
135
136const ClassInfo GlobalObject::s_info = { "global", &JSGlobalObject::s_info, 0, ExecState::globalObjectTable, CREATE_METHOD_TABLE(GlobalObject) };
137
138GlobalObject::GlobalObject(VM& vm, Structure* structure, const Vector<String>& arguments)
139    : JSGlobalObject(vm, structure)
140{
141    finishCreation(vm, arguments);
142}
143
144// Use SEH for Release builds only to get rid of the crash report dialog
145// (luckily the same tests fail in Release and Debug builds so far). Need to
146// be in a separate main function because the realMain function requires object
147// unwinding.
148
149#if COMPILER(MSVC) && !defined(_DEBUG) && !OS(WINCE)
150#define TRY       __try {
151#define EXCEPT(x) } __except (EXCEPTION_EXECUTE_HANDLER) { x; }
152#else
153#define TRY
154#define EXCEPT(x)
155#endif
156
157int realMain(int argc, char** argv);
158
159int main(int argc, char** argv)
160{
161#if OS(WINDOWS)
162#if !OS(WINCE)
163    // Cygwin calls ::SetErrorMode(SEM_FAILCRITICALERRORS), which we will inherit. This is bad for
164    // testing/debugging, as it causes the post-mortem debugger not to be invoked. We reset the
165    // error mode here to work around Cygwin's behavior. See <http://webkit.org/b/55222>.
166    ::SetErrorMode(0);
167#endif
168
169#if defined(_DEBUG)
170    _CrtSetReportFile(_CRT_WARN, _CRTDBG_FILE_STDERR);
171    _CrtSetReportMode(_CRT_WARN, _CRTDBG_MODE_FILE);
172    _CrtSetReportFile(_CRT_ERROR, _CRTDBG_FILE_STDERR);
173    _CrtSetReportMode(_CRT_ERROR, _CRTDBG_MODE_FILE);
174    _CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR);
175    _CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_FILE);
176#endif
177
178    timeBeginPeriod(1);
179#endif
180
181    // Initialize JSC before getting VM.
182    JSC::initializeThreading();
183
184    // We can't use destructors in the following code because it uses Windows
185    // Structured Exception Handling
186    int res = 0;
187    TRY
188        res = realMain(argc, argv);
189    EXCEPT(res = 3)
190    return res;
191}
192
193static bool testOneRegExp(VM& vm, RegExp* regexp, RegExpTest* regExpTest, bool verbose, unsigned int lineNumber)
194{
195    bool result = true;
196    Vector<int, 32> outVector;
197    outVector.resize(regExpTest->expectVector.size());
198    int matchResult = regexp->match(vm, regExpTest->subject, regExpTest->offset, outVector);
199
200    if (matchResult != regExpTest->result) {
201        result = false;
202        if (verbose)
203            printf("Line %d: results mismatch - expected %d got %d\n", lineNumber, regExpTest->result, matchResult);
204    } else if (matchResult != -1) {
205        if (outVector.size() != regExpTest->expectVector.size()) {
206            result = false;
207            if (verbose)
208                printf("Line %d: output vector size mismatch - expected %lu got %lu\n", lineNumber, regExpTest->expectVector.size(), outVector.size());
209        } else if (outVector.size() % 2) {
210            result = false;
211            if (verbose)
212                printf("Line %d: output vector size is odd (%lu), should be even\n", lineNumber, outVector.size());
213        } else {
214            // Check in pairs since the first value of the pair could be -1 in which case the second doesn't matter.
215            size_t pairCount = outVector.size() / 2;
216            for (size_t i = 0; i < pairCount; ++i) {
217                size_t startIndex = i*2;
218                if (outVector[startIndex] != regExpTest->expectVector[startIndex]) {
219                    result = false;
220                    if (verbose)
221                        printf("Line %d: output vector mismatch at index %lu - expected %d got %d\n", lineNumber, startIndex, regExpTest->expectVector[startIndex], outVector[startIndex]);
222                }
223                if ((i > 0) && (regExpTest->expectVector[startIndex] != -1) && (outVector[startIndex+1] != regExpTest->expectVector[startIndex+1])) {
224                    result = false;
225                    if (verbose)
226                        printf("Line %d: output vector mismatch at index %lu - expected %d got %d\n", lineNumber, startIndex+1, regExpTest->expectVector[startIndex+1], outVector[startIndex+1]);
227                }
228            }
229        }
230    }
231
232    return result;
233}
234
235static int scanString(char* buffer, int bufferLength, StringBuilder& builder, char termChar)
236{
237    bool escape = false;
238
239    for (int i = 0; i < bufferLength; ++i) {
240        UChar c = buffer[i];
241
242        if (escape) {
243            switch (c) {
244            case '0':
245                c = '\0';
246                break;
247            case 'a':
248                c = '\a';
249                break;
250            case 'b':
251                c = '\b';
252                break;
253            case 'f':
254                c = '\f';
255                break;
256            case 'n':
257                c = '\n';
258                break;
259            case 'r':
260                c = '\r';
261                break;
262            case 't':
263                c = '\t';
264                break;
265            case 'v':
266                c = '\v';
267                break;
268            case '\\':
269                c = '\\';
270                break;
271            case '?':
272                c = '\?';
273                break;
274            case 'u':
275                if ((i + 4) >= bufferLength)
276                    return -1;
277                unsigned int charValue;
278                if (sscanf(buffer+i+1, "%04x", &charValue) != 1)
279                    return -1;
280                c = static_cast<UChar>(charValue);
281                i += 4;
282                break;
283            }
284
285            builder.append(c);
286            escape = false;
287        } else {
288            if (c == termChar)
289                return i;
290
291            if (c == '\\')
292                escape = true;
293            else
294                builder.append(c);
295        }
296    }
297
298    return -1;
299}
300
301static RegExp* parseRegExpLine(VM& vm, char* line, int lineLength)
302{
303    StringBuilder pattern;
304
305    if (line[0] != '/')
306        return 0;
307
308    int i = scanString(line + 1, lineLength - 1, pattern, '/') + 1;
309
310    if ((i >= lineLength) || (line[i] != '/'))
311        return 0;
312
313    ++i;
314
315    return RegExp::create(vm, pattern.toString(), regExpFlags(line + i));
316}
317
318static RegExpTest* parseTestLine(char* line, int lineLength)
319{
320    StringBuilder subjectString;
321
322    if ((line[0] != ' ') || (line[1] != '"'))
323        return 0;
324
325    int i = scanString(line + 2, lineLength - 2, subjectString, '"') + 2;
326
327    if ((i >= (lineLength - 2)) || (line[i] != '"') || (line[i+1] != ',') || (line[i+2] != ' '))
328        return 0;
329
330    i += 3;
331
332    int offset;
333
334    if (sscanf(line + i, "%d, ", &offset) != 1)
335        return 0;
336
337    while (line[i] && line[i] != ' ')
338        ++i;
339
340    ++i;
341
342    int matchResult;
343
344    if (sscanf(line + i, "%d, ", &matchResult) != 1)
345        return 0;
346
347    while (line[i] && line[i] != ' ')
348        ++i;
349
350    ++i;
351
352    if (line[i++] != '(')
353        return 0;
354
355    int start, end;
356
357    RegExpTest* result = new RegExpTest();
358
359    result->subject = subjectString.toString();
360    result->offset = offset;
361    result->result = matchResult;
362
363    while (line[i] && line[i] != ')') {
364        if (sscanf(line + i, "%d, %d", &start, &end) != 2) {
365            delete result;
366            return 0;
367        }
368
369        result->expectVector.append(start);
370        result->expectVector.append(end);
371
372        while (line[i] && (line[i] != ',') && (line[i] != ')'))
373            i++;
374        i++;
375        while (line[i] && (line[i] != ',') && (line[i] != ')'))
376            i++;
377
378        if (line[i] == ')')
379            break;
380        if (!line[i] || (line[i] != ',')) {
381            delete result;
382            return 0;
383        }
384        i++;
385    }
386
387    return result;
388}
389
390static bool runFromFiles(GlobalObject* globalObject, const Vector<String>& files, bool verbose)
391{
392    String script;
393    String fileName;
394    Vector<char> scriptBuffer;
395    unsigned tests = 0;
396    unsigned failures = 0;
397    char* lineBuffer = new char[MaxLineLength + 1];
398
399    VM& vm = globalObject->vm();
400
401    bool success = true;
402    for (size_t i = 0; i < files.size(); i++) {
403        FILE* testCasesFile = fopen(files[i].utf8().data(), "rb");
404
405        if (!testCasesFile) {
406            printf("Unable to open test data file \"%s\"\n", files[i].utf8().data());
407            continue;
408        }
409
410        RegExp* regexp = 0;
411        size_t lineLength = 0;
412        char* linePtr = 0;
413        unsigned int lineNumber = 0;
414
415        while ((linePtr = fgets(&lineBuffer[0], MaxLineLength, testCasesFile))) {
416            lineLength = strlen(linePtr);
417            if (linePtr[lineLength - 1] == '\n') {
418                linePtr[lineLength - 1] = '\0';
419                --lineLength;
420            }
421            ++lineNumber;
422
423            if (linePtr[0] == '#')
424                continue;
425
426            if (linePtr[0] == '/') {
427                regexp = parseRegExpLine(vm, linePtr, lineLength);
428            } else if (linePtr[0] == ' ') {
429                RegExpTest* regExpTest = parseTestLine(linePtr, lineLength);
430
431                if (regexp && regExpTest) {
432                    ++tests;
433                    if (!testOneRegExp(vm, regexp, regExpTest, verbose, lineNumber)) {
434                        failures++;
435                        printf("Failure on line %u\n", lineNumber);
436                    }
437                }
438
439                if (regExpTest)
440                    delete regExpTest;
441            }
442        }
443
444        fclose(testCasesFile);
445    }
446
447    if (failures)
448        printf("%u tests run, %u failures\n", tests, failures);
449    else
450        printf("%u tests passed\n", tests);
451
452    delete[] lineBuffer;
453
454    vm.dumpSampleData(globalObject->globalExec());
455#if ENABLE(REGEXP_TRACING)
456    vm.dumpRegExpTrace();
457#endif
458    return success;
459}
460
461#define RUNNING_FROM_XCODE 0
462
463static NO_RETURN void printUsageStatement(bool help = false)
464{
465    fprintf(stderr, "Usage: regexp_test [options] file\n");
466    fprintf(stderr, "  -h|--help  Prints this help message\n");
467    fprintf(stderr, "  -v|--verbose  Verbose output\n");
468
469    exit(help ? EXIT_SUCCESS : EXIT_FAILURE);
470}
471
472static void parseArguments(int argc, char** argv, CommandLine& options)
473{
474    int i = 1;
475    for (; i < argc; ++i) {
476        const char* arg = argv[i];
477        if (!strcmp(arg, "-h") || !strcmp(arg, "--help"))
478            printUsageStatement(true);
479        if (!strcmp(arg, "-v") || !strcmp(arg, "--verbose"))
480            options.verbose = true;
481        else
482            options.files.append(argv[i]);
483    }
484
485    for (; i < argc; ++i)
486        options.arguments.append(argv[i]);
487}
488
489int realMain(int argc, char** argv)
490{
491    VM* vm = VM::create(LargeHeap).leakRef();
492    JSLockHolder locker(vm);
493
494    CommandLine options;
495    parseArguments(argc, argv, options);
496
497    GlobalObject* globalObject = GlobalObject::create(*vm, GlobalObject::createStructure(*vm, jsNull()), options.arguments);
498    bool success = runFromFiles(globalObject, options.files, options.verbose);
499
500    return success ? 0 : 3;
501}
502