1/* 2 bench.c - simple regex benchmark program 3 4 This software is released under a BSD-style license. 5 See the file LICENSE for details and copyright. 6 7*/ 8 9#ifdef HAVE_CONFIG_H 10#include <config.h> 11#endif /* HAVE_CONFIG_H */ 12 13#include <stdio.h> 14#include <stdlib.h> 15#ifdef HAVE_GETOPT_H 16#include <getopt.h> 17#endif /* HAVE_GETOPT_H */ 18#include <time.h> 19#include <unistd.h> 20#include <math.h> 21#include <sys/types.h> 22 23#if 0 24#include <hackerlab/rx-posix/regex.h> 25#else 26#include <regex.h> 27#endif 28 29/* T distribution for alpha = 0.025 (for 95% confidence). XXX - is 30 this correct? */ 31double t_distribution[] = { 32 12.71, 33 4.303, 34 3.182, 35 2.776, 36 2.571, 37 2.447, 38 2.365, 39 2.306, 40 2.262, 41 2.228, 42 2.201, 43 2.179, 44 2.160, 45 2.145, 46 2.131, 47 2.120, 48 2.110, 49 2.101, 50 2.093, 51 2.086, 52 2.080, 53 2.074, 54 2.069, 55 2.064, 56 2.060, 57 2.056, 58 2.052, 59 2.048, 60 2.045, 61 2.042 62}; 63 64void 65stats(double *sample_data, int samples, int len) 66{ 67 double mean, tmp1, tmp2, variance, stddev, error, percent; 68 int i; 69 70 mean = 0; 71 for (i = 0; i < samples; i++) 72 mean += sample_data[i]; 73 mean = mean/i; 74 printf("# mean: %.5f\n", mean); 75 76 tmp1 = 0; 77 for (i = 0; i < samples; i++) { 78 tmp2 = sample_data[i] - mean; 79 tmp1 += tmp2*tmp2; 80 } 81 if (samples > 1) 82 variance = tmp1 / (samples-1); 83 else 84 variance = 0; 85 stddev = sqrt(variance); 86 printf("# variance: %.16f\n", variance); 87 printf("# standard deviation: %.16f\n", stddev); 88 89 error = t_distribution[samples-1] * stddev / sqrt(samples); 90 if (mean != 0) 91 percent = 100*error/mean; 92 else 93 percent = 0; 94 printf("# error: �%.16f (�%.4f%%)\n", error, percent); 95 96 printf("%d\t%.5f\t%.5f\n", len, mean, error); 97 98 fflush(stdout); 99} 100 101void 102run_tests(int len, int samples, double *sample_data, int repeats, 103 regex_t *reobj, char *str, char *tmpbuf) 104{ 105 int i, j, errcode; 106 clock_t c1, c2; 107 regmatch_t pmatch[10]; 108 109 110 printf("# len = %d\n", len); 111 fflush(stdout); 112 for (i = 0; i < samples; i++) { 113 c1 = clock(); 114 for (j = 0; j < repeats; j++) 115 if ((errcode = tre_regexec(reobj, str, 10, pmatch, 0))) { 116 tre_regerror(errcode, reobj, tmpbuf, 255); 117 printf("error: %s\n", tmpbuf); 118 } 119 c2 = clock(); 120 121 sample_data[i] = (double)(c2-c1)/(CLOCKS_PER_SEC*repeats); 122 123 printf("# sample: %.5f sec, clocks: %ld\n", 124 (double)(c2-c1)/(CLOCKS_PER_SEC*repeats), 125 (long)(c2-c1)); 126 fflush(stdout); 127 } 128 fflush(stdout); 129 130 for (i = 0; i < 10; i += 2) { 131 printf("# pmatch[%d].rm_so = %d\n", i/2, (int)pmatch[i/2].rm_so); 132 printf("# pmatch[%d].rm_eo = %d\n", i/2, (int)pmatch[i/2].rm_eo); 133 } 134} 135 136 137int 138main(int argc, char **argv) 139{ 140 regex_t reobj; 141 char *str; 142 char tmpbuf[256]; 143 int i, j; 144 int max_len = 1024*1024*10; 145 int steps = 20; 146 int repeats = 10; 147 int samples = 20; 148 int len; 149 clock_t c1, c2; 150 int opt; 151 double sample_data[30]; 152 153 int test_id = -1; 154 155 while ((opt = getopt(argc, argv, "r:l:s:j:t:")) != -1) { 156 switch (opt) { 157 case 't': 158 test_id = atoi(optarg); 159 break; 160 case 'l': 161 max_len = atoi(optarg); 162 break; 163 case 'j': 164 steps = atoi(optarg); 165 break; 166 case 's': 167 samples = atoi(optarg); 168 break; 169 case 'r': 170 repeats = atoi(optarg); 171 break; 172 default: 173 printf("P�lli.\n"); 174 return 1; 175 } 176 } 177 178 /* XXX - Check that the correct results are returned. For example, GNU 179 regex-0.12 returns incorrect results for very long strings in 180 test number 1. */ 181 182 switch (test_id) { 183 case 0: 184 printf("# pattern: \"a*\"\n"); 185 printf("# string: \"aaaaaa...\"\n"); 186 len = 0; 187 tre_regcomp(&reobj, "a*", REG_EXTENDED); 188 while (len <= max_len) { 189 190 str = malloc(sizeof(char) * (len+1)); 191 for (i = 0; i < len; i++) 192 str[i] = 'a'; 193 str[len-1] = '\0'; 194 195 run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf); 196 stats(sample_data, samples, len); 197 len = len + (max_len/steps); 198 free(str); 199 } 200 break; 201 202 203 case 1: 204 printf("# pattern: \"(a)*\"\n"); 205 printf("# string: \"aaaaaa...\"\n"); 206 len = 0; 207 tre_regcomp(&reobj, "(a)*", REG_EXTENDED); 208 while (len <= max_len) { 209 210 str = malloc(sizeof(char) * (len+1)); 211 for (i = 0; i < len; i++) 212 str[i] = 'a'; 213 str[len-1] = '\0'; 214 215 run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf); 216 stats(sample_data, samples, len); 217 len = len + (max_len/steps); 218 free(str); 219 } 220 break; 221 222 223 case 2: 224 printf("# pattern: \"(a*)\"\n"); 225 printf("# string: \"aaaaaa...\"\n"); len = 0; 226 tre_regcomp(&reobj, "(a*)", REG_EXTENDED); 227 while (len <= max_len) { 228 229 str = malloc(sizeof(char) * (len+1)); 230 for (i = 0; i < len; i++) 231 str[i] = 'a'; 232 str[len-1] = '\0'; 233 234 run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf); 235 stats(sample_data, samples, len); 236 len = len + (max_len/steps); 237 free(str); 238 } 239 break; 240 241 case 3: 242 printf("# pattern: \"(a*)*|b*\"\n"); 243 printf("# string: \"aaaaaa...b\"\n"); 244 len = 0; 245 tre_regcomp(&reobj, "(a*)*|b*", REG_EXTENDED); 246 while (len <= max_len) { 247 str = malloc(sizeof(char) * (len+1)); 248 for (i = 0; i < len-1; i++) 249 str[i] = 'a'; 250 if (len > 0) 251 str[len-1] = 'b'; 252 str[len] = '\0'; 253 254 run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf); 255 stats(sample_data, samples, len); 256 len = len + (max_len/steps); 257 free(str); 258 } 259 break; 260 261 case 4: 262 printf("# pattern: \"(a|a|a|...|a)\"\n"); 263 printf("# string: \"aaaaaa...\"\n"); 264 len = 1024*1024; 265 str = malloc(sizeof(char) * (len+1)); 266 for (i = 0; i < len-1; i++) 267 str[i] = 'a'; 268 str[len] = '\0'; 269 len = 0; 270 while (len <= max_len) { 271 tmpbuf[0] = '('; 272 for (i = 1; i < (len*2); i++) { 273 tmpbuf[i] = 'a'; 274 if (i < len*2-2) { 275 i++; 276 tmpbuf[i] = '|'; 277 } 278 } 279 printf("# i = %d\n", i); 280 tmpbuf[i] = ')'; 281 tmpbuf[i+1] = '*'; 282 tmpbuf[i+2] = '\0'; 283 printf("# pat = %s\n", tmpbuf); 284 tre_regcomp(&reobj, tmpbuf, REG_EXTENDED); 285 286 run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf); 287 stats(sample_data, samples, len); 288 len = len + (max_len/steps); 289 tre_regfree(&reobj); 290 } 291 free(str); 292 break; 293 294 case 5: 295 printf("# pattern: \"foobar\"\n"); 296 printf("# string: \"aaaaaa...foobar\"\n"); 297 len = 0; 298 tre_regcomp(&reobj, "foobar", REG_EXTENDED); 299 while (len <= max_len) { 300 str = malloc(sizeof(char) * (len+7)); 301 for (i = 0; i < len; i++) { 302 if (i*i % 3) 303 str[i] = 'a'; 304 else 305 str[i] = 'a'; 306 } 307 str[len+0] = 'f'; 308 str[len+1] = 'o'; 309 str[len+2] = 'o'; 310 str[len+3] = 'b'; 311 str[len+4] = 'a'; 312 str[len+5] = 'r'; 313 str[len+6] = '\0'; 314 315 run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf); 316 stats(sample_data, samples, len); 317 len = len + (max_len/steps); 318 free(str); 319 } 320 break; 321 322 323 case 6: 324 printf("# pattern: \"a*foobar\"\n"); 325 printf("# string: \"aaaaaa...foobar\"\n"); 326 len = 0; 327 tre_regcomp(&reobj, "a*foobar", REG_EXTENDED); 328 while (len <= max_len) { 329 str = malloc(sizeof(char) * (len+7)); 330 for (i = 0; i < len; i++) { 331 str[i] = 'a'; 332 } 333 str[len+0] = 'f'; 334 str[len+1] = 'o'; 335 str[len+2] = 'o'; 336 str[len+3] = 'b'; 337 str[len+4] = 'a'; 338 str[len+5] = 'r'; 339 str[len+6] = '\0'; 340 341 run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf); 342 stats(sample_data, samples, len); 343 len = len + (max_len/steps); 344 free(str); 345 } 346 break; 347 348 349 case 7: 350 printf("# pattern: \"(a)*foobar\"\n"); 351 printf("# string: \"aaaaabbaaab...foobar\"\n"); 352 len = 0; 353 tre_regcomp(&reobj, "(a)*foobar", REG_EXTENDED); 354 while (len <= max_len) { 355 str = malloc(sizeof(char) * (len+7)); 356 for (i = 0; i < len; i++) { 357 /* Without this GNU regex won't find a match! */ 358 if (i*(i-1) % 3) 359 str[i] = 'b'; 360 else 361 str[i] = 'a'; 362 } 363 str[len+0] = 'f'; 364 str[len+1] = 'o'; 365 str[len+2] = 'o'; 366 str[len+3] = 'b'; 367 str[len+4] = 'a'; 368 str[len+5] = 'r'; 369 str[len+6] = '\0'; 370 371 run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf); 372 stats(sample_data, samples, len); 373 len = len + (max_len/steps); 374 free(str); 375 } 376 break; 377 378 379 case 8: 380 printf("# pattern: \"(a|b)*foobar\"\n"); 381 printf("# string: \"aaaaabbaaab...foobar\"\n"); 382 len = 0; 383 tre_regcomp(&reobj, "(a|b)*foobar", REG_EXTENDED); 384 while (len <= max_len) { 385 str = malloc(sizeof(char) * (len+7)); 386 for (i = 0; i < len; i++) { 387 if (i*(i-1) % 3) 388 str[i] = 'b'; 389 else 390 str[i] = 'a'; 391 /* Without this GNU regex won't find a match! */ 392 if (i % (1024*1024*10 - 100)) 393 str[i] = 'f'; 394 } 395 str[len+0] = 'f'; 396 str[len+1] = 'o'; 397 str[len+2] = 'o'; 398 str[len+3] = 'b'; 399 str[len+4] = 'a'; 400 str[len+5] = 'r'; 401 str[len+6] = '\0'; 402 403 run_tests(len, samples, sample_data, repeats, &reobj, str, tmpbuf); 404 stats(sample_data, samples, len); 405 len = len + (max_len/steps); 406 free(str); 407 } 408 break; 409 410 411 case 9: 412 printf("# pattern: hand-coded a*\n"); 413 printf("# string: \"aaaaaa...\"\n"); 414 len = 0; 415 while (len <= max_len) { 416 printf("# len = %d\n", len); 417 418 str = malloc(sizeof(char)*(len+1)); 419 for (i = 0; i < len; i++) 420 str[i] = 'a'; 421 str[len-1] = '\0'; 422 423 for (i = 0; i < samples; i++) { 424 c1 = clock(); 425 for (j = 0; j < repeats; j++) { 426 char *s; 427 int l; 428 429 s = str; 430 l = 0; 431 432 433 while (s != '\0') { 434 if (*s == 'a') { 435 s++; 436 l++; 437 } else 438 break; 439 } 440 } 441 c2 = clock(); 442 sample_data[i] = (double)(c2-c1)/(CLOCKS_PER_SEC*repeats); 443 444 printf("# sample: %.5f sec, clocks: %ld\n", 445 (double)(c2-c1)/(CLOCKS_PER_SEC*repeats), 446 (long)(c2-c1)); 447 fflush(stdout); 448 } 449 fflush(stdout); 450 451 stats(sample_data, samples, len); 452 len = len + (max_len/steps); 453 free(str); 454 } 455 break; 456 457 458 default: 459 printf("Pelle.\n"); 460 return 1; 461 } 462 463 tre_regfree(&reobj); 464 465 return 0; 466} 467