1/* 2 * Copyright (c) 2005, 2008 Sun Microsystems, Inc. All Rights Reserved. 3 * Use is subject to license terms. 4 * 5 * Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T 6 * All Rights Reserved 7 * 8 * University Copyright- Copyright (c) 1982, 1986, 1988 9 * The Regents of the University of California 10 * All Rights Reserved 11 * 12 * University Acknowledgment- Portions of this document are derived from 13 * software developed by the University of California, Berkeley, and its 14 * contributors. 15 * 16 * Licensed under the Apache License, Version 2.0 (the "License"); 17 * you may not use this file except in compliance with the License. 18 * You may obtain a copy of the License at 19 * http://www.apache.org/licenses/LICENSE-2.0. 20 * 21 * Unless required by applicable law or agreed to in writing, software 22 * distributed under the License is distributed on an "AS IS" BASIS, 23 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 24 * or implied. 25 * See the License for the specific language governing permissions and 26 * limitations under the License. 27 */ 28 29/* Code moved from regexp.h */ 30 31#include "apr.h" 32#include "apr_lib.h" 33#ifdef APR_HAVE_LIMITS_H 34#include <limits.h> 35#endif 36#if APR_HAVE_STDLIB_H 37#include <stdlib.h> 38#endif 39#include "libsed.h" 40#include "regexp.h" 41#include "sed.h" 42 43#define GETC() ((unsigned char)*sp++) 44#define PEEKC() ((unsigned char)*sp) 45#define UNGETC(c) (--sp) 46#define SEDCOMPILE_ERROR(c) { \ 47 regerrno = c; \ 48 goto out; \ 49 } 50#define ecmp(s1, s2, n) (strncmp(s1, s2, n) == 0) 51#define uletter(c) (isalpha(c) || c == '_') 52 53 54static unsigned char bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 }; 55 56static int regerr(sed_commands_t *commands, int err); 57static void comperr(sed_commands_t *commands, char *msg); 58static void getrnge(char *str, step_vars_storage *vars); 59static int _advance(char *, char *, step_vars_storage *); 60extern int sed_step(char *p1, char *p2, int circf, step_vars_storage *vars); 61 62 63static void comperr(sed_commands_t *commands, char *msg) 64{ 65 command_errf(commands, msg, commands->linebuf); 66} 67 68/* 69*/ 70static int regerr(sed_commands_t *commands, int err) 71{ 72 switch(err) { 73 case 0: 74 /* No error */ 75 break; 76 case 11: 77 comperr(commands, "Range endpoint too large: %s"); 78 break; 79 80 case 16: 81 comperr(commands, "Bad number: %s"); 82 break; 83 84 case 25: 85 comperr(commands, "``\\digit'' out of range: %s"); 86 break; 87 88 case 36: 89 comperr(commands, "Illegal or missing delimiter: %s"); 90 break; 91 92 case 41: 93 comperr(commands, "No remembered search string: %s"); 94 break; 95 96 case 42: 97 comperr(commands, "\\( \\) imbalance: %s"); 98 break; 99 100 case 43: 101 comperr(commands, "Too many \\(: %s"); 102 break; 103 104 case 44: 105 comperr(commands, "More than 2 numbers given in \\{ \\}: %s"); 106 break; 107 108 case 45: 109 comperr(commands, "} expected after \\: %s"); 110 break; 111 112 case 46: 113 comperr(commands, "First number exceeds second in \\{ \\}: %s"); 114 break; 115 116 case 49: 117 comperr(commands, "[ ] imbalance: %s"); 118 break; 119 120 case 50: 121 comperr(commands, SEDERR_TMMES); 122 break; 123 124 default: 125 comperr(commands, "Unknown regexp error code %s\n"); 126 break; 127 } 128 return (0); 129} 130 131 132char *sed_compile(sed_commands_t *commands, sed_comp_args *compargs, 133 char *ep, char *endbuf, int seof) 134{ 135 int c; 136 int eof = seof; 137 char *lastep; 138 int cclcnt; 139 char bracket[NBRA], *bracketp; 140 int closed; 141 int neg; 142 int lc; 143 int i, cflg; 144 int iflag; /* used for non-ascii characters in brackets */ 145 char *sp = commands->cp; 146 int regerrno = 0; 147 148 lastep = 0; 149 if ((c = GETC()) == eof || c == '\n') { 150 if (c == '\n') { 151 UNGETC(c); 152 } 153 commands->cp = sp; 154 goto out; 155 } 156 bracketp = bracket; 157 compargs->circf = closed = compargs->nbra = 0; 158 if (c == '^') 159 compargs->circf++; 160 else 161 UNGETC(c); 162 while (1) { 163 if (ep >= endbuf) 164 SEDCOMPILE_ERROR(50); 165 c = GETC(); 166 if (c != '*' && ((c != '\\') || (PEEKC() != '{'))) 167 lastep = ep; 168 if (c == eof) { 169 *ep++ = CCEOF; 170 if (bracketp != bracket) 171 SEDCOMPILE_ERROR(42); 172 commands->cp = sp; 173 goto out; 174 } 175 switch (c) { 176 177 case '.': 178 *ep++ = CDOT; 179 continue; 180 181 case '\n': 182 SEDCOMPILE_ERROR(36); 183 commands->cp = sp; 184 goto out; 185 case '*': 186 if (lastep == 0 || *lastep == CBRA || *lastep == CKET) 187 goto defchar; 188 *lastep |= STAR; 189 continue; 190 191 case '$': 192 if (PEEKC() != eof && PEEKC() != '\n') 193 goto defchar; 194 *ep++ = CDOL; 195 continue; 196 197 case '[': 198 if (&ep[17] >= endbuf) 199 SEDCOMPILE_ERROR(50); 200 201 *ep++ = CCL; 202 lc = 0; 203 for (i = 0; i < 16; i++) 204 ep[i] = 0; 205 206 neg = 0; 207 if ((c = GETC()) == '^') { 208 neg = 1; 209 c = GETC(); 210 } 211 iflag = 1; 212 do { 213 c &= 0377; 214 if (c == '\0' || c == '\n') 215 SEDCOMPILE_ERROR(49); 216 if ((c & 0200) && iflag) { 217 iflag = 0; 218 if (&ep[32] >= endbuf) 219 SEDCOMPILE_ERROR(50); 220 ep[-1] = CXCL; 221 for (i = 16; i < 32; i++) 222 ep[i] = 0; 223 } 224 if (c == '-' && lc != 0) { 225 if ((c = GETC()) == ']') { 226 PLACE('-'); 227 break; 228 } 229 if ((c & 0200) && iflag) { 230 iflag = 0; 231 if (&ep[32] >= endbuf) 232 SEDCOMPILE_ERROR(50); 233 ep[-1] = CXCL; 234 for (i = 16; i < 32; i++) 235 ep[i] = 0; 236 } 237 while (lc < c) { 238 PLACE(lc); 239 lc++; 240 } 241 } 242 lc = c; 243 PLACE(c); 244 } while ((c = GETC()) != ']'); 245 246 if (iflag) 247 iflag = 16; 248 else 249 iflag = 32; 250 251 if (neg) { 252 if (iflag == 32) { 253 for (cclcnt = 0; cclcnt < iflag; 254 cclcnt++) 255 ep[cclcnt] ^= 0377; 256 ep[0] &= 0376; 257 } else { 258 ep[-1] = NCCL; 259 /* make nulls match so test fails */ 260 ep[0] |= 01; 261 } 262 } 263 264 ep += iflag; 265 266 continue; 267 268 case '\\': 269 switch (c = GETC()) { 270 271 case '(': 272 if (compargs->nbra >= NBRA) 273 SEDCOMPILE_ERROR(43); 274 *bracketp++ = compargs->nbra; 275 *ep++ = CBRA; 276 *ep++ = compargs->nbra++; 277 continue; 278 279 case ')': 280 if (bracketp <= bracket) 281 SEDCOMPILE_ERROR(42); 282 *ep++ = CKET; 283 *ep++ = *--bracketp; 284 closed++; 285 continue; 286 287 case '{': 288 if (lastep == (char *) 0) 289 goto defchar; 290 *lastep |= RNGE; 291 cflg = 0; 292 nlim: 293 c = GETC(); 294 i = 0; 295 do { 296 if ('0' <= c && c <= '9') 297 i = 10 * i + c - '0'; 298 else 299 SEDCOMPILE_ERROR(16); 300 } while (((c = GETC()) != '\\') && (c != ',')); 301 if (i >= 255) 302 SEDCOMPILE_ERROR(11); 303 *ep++ = i; 304 if (c == ',') { 305 if (cflg++) 306 SEDCOMPILE_ERROR(44); 307 if ((c = GETC()) == '\\') 308 *ep++ = (char) 255; 309 else { 310 UNGETC(c); 311 goto nlim; 312 /* get 2'nd number */ 313 } 314 } 315 if (GETC() != '}') 316 SEDCOMPILE_ERROR(45); 317 if (!cflg) /* one number */ 318 *ep++ = i; 319 else if ((ep[-1] & 0377) < (ep[-2] & 0377)) 320 SEDCOMPILE_ERROR(46); 321 continue; 322 323 case '\n': 324 SEDCOMPILE_ERROR(36); 325 326 case 'n': 327 c = '\n'; 328 goto defchar; 329 330 default: 331 if (c >= '1' && c <= '9') { 332 if ((c -= '1') >= closed) 333 SEDCOMPILE_ERROR(25); 334 *ep++ = CBACK; 335 *ep++ = c; 336 continue; 337 } 338 } 339 /* Drop through to default to use \ to turn off special chars */ 340 341 defchar: 342 default: 343 lastep = ep; 344 *ep++ = CCHR; 345 *ep++ = c; 346 } 347 } 348out: 349 if (regerrno) { 350 regerr(commands, regerrno); 351 return (char*) NULL; 352 } 353 /* XXX : Basant : what extra */ 354 /* int reglength = (int)(ep - expbuf); */ 355 return ep; 356} 357 358int sed_step(char *p1, char *p2, int circf, step_vars_storage *vars) 359{ 360 int c; 361 362 363 if (circf) { 364 vars->loc1 = p1; 365 return (_advance(p1, p2, vars)); 366 } 367 /* fast check for first character */ 368 if (*p2 == CCHR) { 369 c = p2[1]; 370 do { 371 if (*p1 != c) 372 continue; 373 if (_advance(p1, p2, vars)) { 374 vars->loc1 = p1; 375 return (1); 376 } 377 } while (*p1++); 378 return (0); 379 } 380 /* regular algorithm */ 381 do { 382 if (_advance(p1, p2, vars)) { 383 vars->loc1 = p1; 384 return (1); 385 } 386 } while (*p1++); 387 return (0); 388} 389 390static int _advance(char *lp, char *ep, step_vars_storage *vars) 391{ 392 char *curlp; 393 int c; 394 char *bbeg; 395 char neg; 396 int ct; 397 int epint; /* int value of *ep */ 398 399 while (1) { 400 neg = 0; 401 switch (*ep++) { 402 403 case CCHR: 404 if (*ep++ == *lp++) 405 continue; 406 return (0); 407 408 case CDOT: 409 if (*lp++) 410 continue; 411 return (0); 412 413 case CDOL: 414 if (*lp == 0) 415 continue; 416 return (0); 417 418 case CCEOF: 419 vars->loc2 = lp; 420 return (1); 421 422 case CXCL: 423 c = (unsigned char)*lp++; 424 if (ISTHERE(c)) { 425 ep += 32; 426 continue; 427 } 428 return (0); 429 430 case NCCL: 431 neg = 1; 432 433 case CCL: 434 c = *lp++; 435 if (((c & 0200) == 0 && ISTHERE(c)) ^ neg) { 436 ep += 16; 437 continue; 438 } 439 return (0); 440 441 case CBRA: 442 epint = (int) *ep; 443 vars->braslist[epint] = lp; 444 ep++; 445 continue; 446 447 case CKET: 448 epint = (int) *ep; 449 vars->braelist[epint] = lp; 450 ep++; 451 continue; 452 453 case CCHR | RNGE: 454 c = *ep++; 455 getrnge(ep, vars); 456 while (vars->low--) 457 if (*lp++ != c) 458 return (0); 459 curlp = lp; 460 while (vars->size--) 461 if (*lp++ != c) 462 break; 463 if (vars->size < 0) 464 lp++; 465 ep += 2; 466 goto star; 467 468 case CDOT | RNGE: 469 getrnge(ep, vars); 470 while (vars->low--) 471 if (*lp++ == '\0') 472 return (0); 473 curlp = lp; 474 while (vars->size--) 475 if (*lp++ == '\0') 476 break; 477 if (vars->size < 0) 478 lp++; 479 ep += 2; 480 goto star; 481 482 case CXCL | RNGE: 483 getrnge(ep + 32, vars); 484 while (vars->low--) { 485 c = (unsigned char)*lp++; 486 if (!ISTHERE(c)) 487 return (0); 488 } 489 curlp = lp; 490 while (vars->size--) { 491 c = (unsigned char)*lp++; 492 if (!ISTHERE(c)) 493 break; 494 } 495 if (vars->size < 0) 496 lp++; 497 ep += 34; /* 32 + 2 */ 498 goto star; 499 500 case NCCL | RNGE: 501 neg = 1; 502 503 case CCL | RNGE: 504 getrnge(ep + 16, vars); 505 while (vars->low--) { 506 c = *lp++; 507 if (((c & 0200) || !ISTHERE(c)) ^ neg) 508 return (0); 509 } 510 curlp = lp; 511 while (vars->size--) { 512 c = *lp++; 513 if (((c & 0200) || !ISTHERE(c)) ^ neg) 514 break; 515 } 516 if (vars->size < 0) 517 lp++; 518 ep += 18; /* 16 + 2 */ 519 goto star; 520 521 case CBACK: 522 epint = (int) *ep; 523 bbeg = vars->braslist[epint]; 524 ct = vars->braelist[epint] - bbeg; 525 ep++; 526 527 if (ecmp(bbeg, lp, ct)) { 528 lp += ct; 529 continue; 530 } 531 return (0); 532 533 case CBACK | STAR: 534 epint = (int) *ep; 535 bbeg = vars->braslist[epint]; 536 ct = vars->braelist[epint] - bbeg; 537 ep++; 538 curlp = lp; 539 while (ecmp(bbeg, lp, ct)) 540 lp += ct; 541 542 while (lp >= curlp) { 543 if (_advance(lp, ep, vars)) 544 return (1); 545 lp -= ct; 546 } 547 return (0); 548 549 550 case CDOT | STAR: 551 curlp = lp; 552 while (*lp++); 553 goto star; 554 555 case CCHR | STAR: 556 curlp = lp; 557 while (*lp++ == *ep); 558 ep++; 559 goto star; 560 561 case CXCL | STAR: 562 curlp = lp; 563 do { 564 c = (unsigned char)*lp++; 565 } while (ISTHERE(c)); 566 ep += 32; 567 goto star; 568 569 case NCCL | STAR: 570 neg = 1; 571 572 case CCL | STAR: 573 curlp = lp; 574 do { 575 c = *lp++; 576 } while (((c & 0200) == 0 && ISTHERE(c)) ^ neg); 577 ep += 16; 578 goto star; 579 580 star: 581 do { 582 if (--lp == vars->locs) 583 break; 584 if (_advance(lp, ep, vars)) 585 return (1); 586 } while (lp > curlp); 587 return (0); 588 589 } 590 } 591} 592 593static void getrnge(char *str, step_vars_storage *vars) 594{ 595 vars->low = *str++ & 0377; 596 vars->size = ((*str & 0377) == 255)? 20000: (*str &0377) - vars->low; 597} 598 599 600