1/* vi: set sw=4 ts=4: */ 2/* 3 * sed.c - very minimalist version of sed 4 * 5 * Copyright (C) 1999,2000,2001 by Lineo, inc. and Mark Whitley 6 * Copyright (C) 1999,2000,2001 by Mark Whitley <markw@codepoet.org> 7 * Copyright (C) 2002 Matt Kraai 8 * Copyright (C) 2003 by Glenn McGrath <bug1@iinet.net.au> 9 * Copyright (C) 2003,2004 by Rob Landley <rob@landley.net> 10 * 11 * MAINTAINER: Rob Landley <rob@landley.net> 12 * 13 * Licensed under GPL version 2, see file LICENSE in this tarball for details. 14 */ 15 16/* Code overview. 17 18 Files are laid out to avoid unnecessary function declarations. So for 19 example, every function add_cmd calls occurs before add_cmd in this file. 20 21 add_cmd() is called on each line of sed command text (from a file or from 22 the command line). It calls get_address() and parse_cmd_args(). The 23 resulting sed_cmd_t structures are appended to a linked list 24 (G.sed_cmd_head/G.sed_cmd_tail). 25 26 add_input_file() adds a FILE * to the list of input files. We need to 27 know all input sources ahead of time to find the last line for the $ match. 28 29 process_files() does actual sedding, reading data lines from each input FILE * 30 (which could be stdin) and applying the sed command list (sed_cmd_head) to 31 each of the resulting lines. 32 33 sed_main() is where external code calls into this, with a command line. 34*/ 35 36 37/* 38 Supported features and commands in this version of sed: 39 40 - comments ('#') 41 - address matching: num|/matchstr/[,num|/matchstr/|$]command 42 - commands: (p)rint, (d)elete, (s)ubstitue (with g & I flags) 43 - edit commands: (a)ppend, (i)nsert, (c)hange 44 - file commands: (r)ead 45 - backreferences in substitution expressions (\0, \1, \2...\9) 46 - grouped commands: {cmd1;cmd2} 47 - transliteration (y/source-chars/dest-chars/) 48 - pattern space hold space storing / swapping (g, h, x) 49 - labels / branching (: label, b, t, T) 50 51 (Note: Specifying an address (range) to match is *optional*; commands 52 default to the whole pattern space if no specific address match was 53 requested.) 54 55 Todo: 56 - Create a wrapper around regex to make libc's regex conform with sed 57 58 Reference http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html 59*/ 60 61#include "libbb.h" 62#include "xregex.h" 63 64/* Each sed command turns into one of these structures. */ 65typedef struct sed_cmd_s { 66 /* Ordered by alignment requirements: currently 36 bytes on x86 */ 67 struct sed_cmd_s *next; /* Next command (linked list, NULL terminated) */ 68 69 /* address storage */ 70 regex_t *beg_match; /* sed -e '/match/cmd' */ 71 regex_t *end_match; /* sed -e '/match/,/end_match/cmd' */ 72 regex_t *sub_match; /* For 's/sub_match/string/' */ 73 int beg_line; /* 'sed 1p' 0 == apply commands to all lines */ 74 int end_line; /* 'sed 1,3p' 0 == one line only. -1 = last line ($) */ 75 76 FILE *sw_file; /* File (sw) command writes to, -1 for none. */ 77 char *string; /* Data string for (saicytb) commands. */ 78 79 unsigned short which_match; /* (s) Which match to replace (0 for all) */ 80 81 /* Bitfields (gcc won't group them if we don't) */ 82 unsigned invert:1; /* the '!' after the address */ 83 unsigned in_match:1; /* Next line also included in match? */ 84 unsigned sub_p:1; /* (s) print option */ 85 86 char sw_last_char; /* Last line written by (sw) had no '\n' */ 87 88 /* GENERAL FIELDS */ 89 char cmd; /* The command char: abcdDgGhHilnNpPqrstwxy:={} */ 90} sed_cmd_t; 91 92static const char semicolon_whitespace[] ALIGN1 = "; \n\r\t\v"; 93 94struct globals { 95 /* options */ 96 int be_quiet, regex_type; 97 FILE *nonstdout; 98 char *outname, *hold_space; 99 100 /* List of input files */ 101 int input_file_count, current_input_file; 102 FILE **input_file_list; 103 104 regmatch_t regmatch[10]; 105 regex_t *previous_regex_ptr; 106 107 /* linked list of sed commands */ 108 sed_cmd_t sed_cmd_head, *sed_cmd_tail; 109 110 /* Linked list of append lines */ 111 llist_t *append_head; 112 113 char *add_cmd_line; 114 115 struct pipeline { 116 char *buf; /* Space to hold string */ 117 int idx; /* Space used */ 118 int len; /* Space allocated */ 119 } pipeline; 120}; 121#define G (*(struct globals*)&bb_common_bufsiz1) 122void BUG_sed_globals_too_big(void); 123#define INIT_G() do { \ 124 if (sizeof(struct globals) > COMMON_BUFSIZE) \ 125 BUG_sed_globals_too_big(); \ 126 G.sed_cmd_tail = &G.sed_cmd_head; \ 127} while (0) 128 129 130#if ENABLE_FEATURE_CLEAN_UP 131static void sed_free_and_close_stuff(void) 132{ 133 sed_cmd_t *sed_cmd = G.sed_cmd_head.next; 134 135 llist_free(G.append_head, free); 136 137 while (sed_cmd) { 138 sed_cmd_t *sed_cmd_next = sed_cmd->next; 139 140 if (sed_cmd->sw_file) 141 xprint_and_close_file(sed_cmd->sw_file); 142 143 if (sed_cmd->beg_match) { 144 regfree(sed_cmd->beg_match); 145 free(sed_cmd->beg_match); 146 } 147 if (sed_cmd->end_match) { 148 regfree(sed_cmd->end_match); 149 free(sed_cmd->end_match); 150 } 151 if (sed_cmd->sub_match) { 152 regfree(sed_cmd->sub_match); 153 free(sed_cmd->sub_match); 154 } 155 free(sed_cmd->string); 156 free(sed_cmd); 157 sed_cmd = sed_cmd_next; 158 } 159 160 if (G.hold_space) free(G.hold_space); 161 162 while (G.current_input_file < G.input_file_count) 163 fclose(G.input_file_list[G.current_input_file++]); 164} 165#else 166void sed_free_and_close_stuff(void); 167#endif 168 169/* If something bad happens during -i operation, delete temp file */ 170 171static void cleanup_outname(void) 172{ 173 if (G.outname) unlink(G.outname); 174} 175 176/* strdup, replacing "\n" with '\n', and "\delimiter" with 'delimiter' */ 177 178static void parse_escapes(char *dest, const char *string, int len, char from, char to) 179{ 180 int i = 0; 181 182 while (i < len) { 183 if (string[i] == '\\') { 184 if (!to || string[i+1] == from) { 185 *dest++ = to ? to : string[i+1]; 186 i += 2; 187 continue; 188 } 189 *dest++ = string[i++]; 190 } 191 *dest++ = string[i++]; 192 } 193 *dest = 0; 194} 195 196static char *copy_parsing_escapes(const char *string, int len) 197{ 198 char *dest = xmalloc(len + 1); 199 200 parse_escapes(dest, string, len, 'n', '\n'); 201 return dest; 202} 203 204 205/* 206 * index_of_next_unescaped_regexp_delim - walks left to right through a string 207 * beginning at a specified index and returns the index of the next regular 208 * expression delimiter (typically a forward * slash ('/')) not preceded by 209 * a backslash ('\'). A negative delimiter disables square bracket checking. 210 */ 211static int index_of_next_unescaped_regexp_delim(int delimiter, const char *str) 212{ 213 int bracket = -1; 214 int escaped = 0; 215 int idx = 0; 216 char ch; 217 218 if (delimiter < 0) { 219 bracket--; 220 delimiter = -delimiter; 221 } 222 223 for (; (ch = str[idx]); idx++) { 224 if (bracket >= 0) { 225 if (ch == ']' && !(bracket == idx - 1 || (bracket == idx - 2 226 && str[idx - 1] == '^'))) 227 bracket = -1; 228 } else if (escaped) 229 escaped = 0; 230 else if (ch == '\\') 231 escaped = 1; 232 else if (bracket == -1 && ch == '[') 233 bracket = idx; 234 else if (ch == delimiter) 235 return idx; 236 } 237 238 /* if we make it to here, we've hit the end of the string */ 239 bb_error_msg_and_die("unmatched '%c'", delimiter); 240} 241 242/* 243 * Returns the index of the third delimiter 244 */ 245static int parse_regex_delim(const char *cmdstr, char **match, char **replace) 246{ 247 const char *cmdstr_ptr = cmdstr; 248 char delimiter; 249 int idx = 0; 250 251 /* verify that the 's' or 'y' is followed by something. That something 252 * (typically a 'slash') is now our regexp delimiter... */ 253 if (*cmdstr == '\0') 254 bb_error_msg_and_die("bad format in substitution expression"); 255 delimiter = *cmdstr_ptr++; 256 257 /* save the match string */ 258 idx = index_of_next_unescaped_regexp_delim(delimiter, cmdstr_ptr); 259 *match = copy_parsing_escapes(cmdstr_ptr, idx); 260 261 /* save the replacement string */ 262 cmdstr_ptr += idx + 1; 263 idx = index_of_next_unescaped_regexp_delim(-delimiter, cmdstr_ptr); 264 *replace = copy_parsing_escapes(cmdstr_ptr, idx); 265 266 return ((cmdstr_ptr - cmdstr) + idx); 267} 268 269/* 270 * returns the index in the string just past where the address ends. 271 */ 272static int get_address(const char *my_str, int *linenum, regex_t ** regex) 273{ 274 const char *pos = my_str; 275 276 if (isdigit(*my_str)) { 277 *linenum = strtol(my_str, (char**)&pos, 10); 278 /* endstr shouldnt ever equal NULL */ 279 } else if (*my_str == '$') { 280 *linenum = -1; 281 pos++; 282 } else if (*my_str == '/' || *my_str == '\\') { 283 int next; 284 char delimiter; 285 char *temp; 286 287 delimiter = '/'; 288 if (*my_str == '\\') delimiter = *++pos; 289 next = index_of_next_unescaped_regexp_delim(delimiter, ++pos); 290 temp = copy_parsing_escapes(pos, next); 291 *regex = xmalloc(sizeof(regex_t)); 292 xregcomp(*regex, temp, G.regex_type|REG_NEWLINE); 293 free(temp); 294 /* Move position to next character after last delimiter */ 295 pos += (next+1); 296 } 297 return pos - my_str; 298} 299 300/* Grab a filename. Whitespace at start is skipped, then goes to EOL. */ 301static int parse_file_cmd(sed_cmd_t *sed_cmd, const char *filecmdstr, char **retval) 302{ 303 int start = 0, idx, hack = 0; 304 305 /* Skip whitespace, then grab filename to end of line */ 306 while (isspace(filecmdstr[start])) 307 start++; 308 idx = start; 309 while (filecmdstr[idx] && filecmdstr[idx] != '\n') 310 idx++; 311 312 /* If lines glued together, put backslash back. */ 313 if (filecmdstr[idx] == '\n') 314 hack = 1; 315 if (idx == start) 316 bb_error_msg_and_die("empty filename"); 317 *retval = xstrndup(filecmdstr+start, idx-start+hack+1); 318 if (hack) 319 (*retval)[idx] = '\\'; 320 321 return idx; 322} 323 324static int parse_subst_cmd(sed_cmd_t *sed_cmd, const char *substr) 325{ 326 int cflags = G.regex_type; 327 char *match; 328 int idx; 329 330 /* 331 * A substitution command should look something like this: 332 * s/match/replace/ #gIpw 333 * || | ||| 334 * mandatory optional 335 */ 336 idx = parse_regex_delim(substr, &match, &sed_cmd->string); 337 338 /* determine the number of back references in the match string */ 339 /* Note: we compute this here rather than in the do_subst_command() 340 * function to save processor time, at the expense of a little more memory 341 * (4 bits) per sed_cmd */ 342 343 /* process the flags */ 344 345 sed_cmd->which_match = 1; 346 while (substr[++idx]) { 347 /* Parse match number */ 348 if (isdigit(substr[idx])) { 349 if (match[0] != '^') { 350 /* Match 0 treated as all, multiple matches we take the last one. */ 351 const char *pos = substr + idx; 352 sed_cmd->which_match = (unsigned short)strtol(substr+idx, (char**) &pos, 10); 353 idx = pos - substr; 354 } 355 continue; 356 } 357 /* Skip spaces */ 358 if (isspace(substr[idx])) continue; 359 360 switch (substr[idx]) { 361 /* Replace all occurrences */ 362 case 'g': 363 if (match[0] != '^') sed_cmd->which_match = 0; 364 break; 365 /* Print pattern space */ 366 case 'p': 367 sed_cmd->sub_p = 1; 368 break; 369 /* Write to file */ 370 case 'w': 371 { 372 char *temp; 373 idx += parse_file_cmd(sed_cmd, substr+idx, &temp); 374 break; 375 } 376 /* Ignore case (gnu exension) */ 377 case 'I': 378 cflags |= REG_ICASE; 379 break; 380 /* Comment */ 381 case '#': 382 while (substr[++idx]) /*skip all*/; 383 /* Fall through */ 384 /* End of command */ 385 case ';': 386 case '}': 387 goto out; 388 default: 389 bb_error_msg_and_die("bad option in substitution expression"); 390 } 391 } 392out: 393 /* compile the match string into a regex */ 394 if (*match != '\0') { 395 /* If match is empty, we use last regex used at runtime */ 396 sed_cmd->sub_match = xmalloc(sizeof(regex_t)); 397 xregcomp(sed_cmd->sub_match, match, cflags); 398 } 399 free(match); 400 401 return idx; 402} 403 404/* 405 * Process the commands arguments 406 */ 407static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) 408{ 409 /* handle (s)ubstitution command */ 410 if (sed_cmd->cmd == 's') 411 cmdstr += parse_subst_cmd(sed_cmd, cmdstr); 412 /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */ 413 else if (strchr("aic", sed_cmd->cmd)) { 414 if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c') 415 bb_error_msg_and_die 416 ("only a beginning address can be specified for edit commands"); 417 for (;;) { 418 if (*cmdstr == '\n' || *cmdstr == '\\') { 419 cmdstr++; 420 break; 421 } else if (isspace(*cmdstr)) 422 cmdstr++; 423 else 424 break; 425 } 426 sed_cmd->string = xstrdup(cmdstr); 427 parse_escapes(sed_cmd->string, sed_cmd->string, strlen(cmdstr), 0, 0); 428 cmdstr += strlen(cmdstr); 429 /* handle file cmds: (r)ead */ 430 } else if (strchr("rw", sed_cmd->cmd)) { 431 if (sed_cmd->end_line || sed_cmd->end_match) 432 bb_error_msg_and_die("command only uses one address"); 433 cmdstr += parse_file_cmd(sed_cmd, cmdstr, &sed_cmd->string); 434 if (sed_cmd->cmd == 'w') { 435 sed_cmd->sw_file = xfopen(sed_cmd->string, "w"); 436 sed_cmd->sw_last_char = '\n'; 437 } 438 /* handle branch commands */ 439 } else if (strchr(":btT", sed_cmd->cmd)) { 440 int length; 441 442 cmdstr = skip_whitespace(cmdstr); 443 length = strcspn(cmdstr, semicolon_whitespace); 444 if (length) { 445 sed_cmd->string = xstrndup(cmdstr, length); 446 cmdstr += length; 447 } 448 } 449 /* translation command */ 450 else if (sed_cmd->cmd == 'y') { 451 char *match, *replace; 452 int i = cmdstr[0]; 453 454 cmdstr += parse_regex_delim(cmdstr, &match, &replace)+1; 455 /* \n already parsed, but \delimiter needs unescaping. */ 456 parse_escapes(match, match, strlen(match), i, i); 457 parse_escapes(replace, replace, strlen(replace), i, i); 458 459 sed_cmd->string = xzalloc((strlen(match) + 1) * 2); 460 for (i = 0; match[i] && replace[i]; i++) { 461 sed_cmd->string[i*2] = match[i]; 462 sed_cmd->string[i*2+1] = replace[i]; 463 } 464 free(match); 465 free(replace); 466 } 467 /* if it wasnt a single-letter command that takes no arguments 468 * then it must be an invalid command. 469 */ 470 else if (strchr("dDgGhHlnNpPqx={}", sed_cmd->cmd) == 0) { 471 bb_error_msg_and_die("unsupported command %c", sed_cmd->cmd); 472 } 473 474 /* give back whatever's left over */ 475 return cmdstr; 476} 477 478 479/* Parse address+command sets, skipping comment lines. */ 480 481static void add_cmd(const char *cmdstr) 482{ 483 sed_cmd_t *sed_cmd; 484 int temp; 485 486 /* Append this line to any unfinished line from last time. */ 487 if (G.add_cmd_line) { 488 char *tp = xasprintf("%s\n%s", G.add_cmd_line, cmdstr); 489 free(G.add_cmd_line); 490 cmdstr = G.add_cmd_line = tp; 491 } 492 493 /* If this line ends with backslash, request next line. */ 494 temp = strlen(cmdstr); 495 if (temp && cmdstr[--temp] == '\\') { 496 if (!G.add_cmd_line) 497 G.add_cmd_line = xstrdup(cmdstr); 498 G.add_cmd_line[temp] = '\0'; 499 return; 500 } 501 502 /* Loop parsing all commands in this line. */ 503 while (*cmdstr) { 504 /* Skip leading whitespace and semicolons */ 505 cmdstr += strspn(cmdstr, semicolon_whitespace); 506 507 /* If no more commands, exit. */ 508 if (!*cmdstr) break; 509 510 /* if this is a comment, jump past it and keep going */ 511 if (*cmdstr == '#') { 512 /* "#n" is the same as using -n on the command line */ 513 if (cmdstr[1] == 'n') 514 G.be_quiet++; 515 cmdstr = strpbrk(cmdstr, "\n\r"); 516 if (!cmdstr) break; 517 continue; 518 } 519 520 /* parse the command 521 * format is: [addr][,addr][!]cmd 522 * |----||-----||-| 523 * part1 part2 part3 524 */ 525 526 sed_cmd = xzalloc(sizeof(sed_cmd_t)); 527 528 /* first part (if present) is an address: either a '$', a number or a /regex/ */ 529 cmdstr += get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match); 530 531 /* second part (if present) will begin with a comma */ 532 if (*cmdstr == ',') { 533 int idx; 534 535 cmdstr++; 536 idx = get_address(cmdstr, &sed_cmd->end_line, &sed_cmd->end_match); 537 if (!idx) 538 bb_error_msg_and_die("no address after comma"); 539 cmdstr += idx; 540 } 541 542 /* skip whitespace before the command */ 543 cmdstr = skip_whitespace(cmdstr); 544 545 /* Check for inversion flag */ 546 if (*cmdstr == '!') { 547 sed_cmd->invert = 1; 548 cmdstr++; 549 550 /* skip whitespace before the command */ 551 cmdstr = skip_whitespace(cmdstr); 552 } 553 554 /* last part (mandatory) will be a command */ 555 if (!*cmdstr) 556 bb_error_msg_and_die("missing command"); 557 sed_cmd->cmd = *(cmdstr++); 558 cmdstr = parse_cmd_args(sed_cmd, cmdstr); 559 560 /* Add the command to the command array */ 561 G.sed_cmd_tail->next = sed_cmd; 562 G.sed_cmd_tail = G.sed_cmd_tail->next; 563 } 564 565 /* If we glued multiple lines together, free the memory. */ 566 free(G.add_cmd_line); 567 G.add_cmd_line = NULL; 568} 569 570/* Append to a string, reallocating memory as necessary. */ 571 572#define PIPE_GROW 64 573 574static void pipe_putc(char c) 575{ 576 if (G.pipeline.idx == G.pipeline.len) { 577 G.pipeline.buf = xrealloc(G.pipeline.buf, 578 G.pipeline.len + PIPE_GROW); 579 G.pipeline.len += PIPE_GROW; 580 } 581 G.pipeline.buf[G.pipeline.idx++] = c; 582} 583 584static void do_subst_w_backrefs(char *line, char *replace) 585{ 586 int i,j; 587 588 /* go through the replacement string */ 589 for (i = 0; replace[i]; i++) { 590 /* if we find a backreference (\1, \2, etc.) print the backref'ed * text */ 591 if (replace[i] == '\\') { 592 unsigned backref = replace[++i] - '0'; 593 if (backref <= 9) { 594 /* print out the text held in G.regmatch[backref] */ 595 if (G.regmatch[backref].rm_so != -1) { 596 j = G.regmatch[backref].rm_so; 597 while (j < G.regmatch[backref].rm_eo) 598 pipe_putc(line[j++]); 599 } 600 continue; 601 } 602 /* I _think_ it is impossible to get '\' to be 603 * the last char in replace string. Thus we dont check 604 * for replace[i] == NUL. (counterexample anyone?) */ 605 /* if we find a backslash escaped character, print the character */ 606 pipe_putc(replace[i]); 607 continue; 608 } 609 /* if we find an unescaped '&' print out the whole matched text. */ 610 if (replace[i] == '&') { 611 j = G.regmatch[0].rm_so; 612 while (j < G.regmatch[0].rm_eo) 613 pipe_putc(line[j++]); 614 continue; 615 } 616 /* Otherwise just output the character. */ 617 pipe_putc(replace[i]); 618 } 619} 620 621static int do_subst_command(sed_cmd_t *sed_cmd, char **line) 622{ 623 char *oldline = *line; 624 int altered = 0; 625 int match_count = 0; 626 regex_t *current_regex; 627 628 /* Handle empty regex. */ 629 if (sed_cmd->sub_match == NULL) { 630 current_regex = G.previous_regex_ptr; 631 if (!current_regex) 632 bb_error_msg_and_die("no previous regexp"); 633 } else 634 G.previous_regex_ptr = current_regex = sed_cmd->sub_match; 635 636 /* Find the first match */ 637 if (REG_NOMATCH == regexec(current_regex, oldline, 10, G.regmatch, 0)) 638 return 0; 639 640 /* Initialize temporary output buffer. */ 641 G.pipeline.buf = xmalloc(PIPE_GROW); 642 G.pipeline.len = PIPE_GROW; 643 G.pipeline.idx = 0; 644 645 /* Now loop through, substituting for matches */ 646 do { 647 int i; 648 649 if (!G.regmatch[0].rm_so && !G.regmatch[0].rm_eo && match_count) { 650 pipe_putc(*oldline++); 651 continue; 652 } 653 654 match_count++; 655 656 /* If we aren't interested in this match, output old line to 657 end of match and continue */ 658 if (sed_cmd->which_match && sed_cmd->which_match != match_count) { 659 for (i = 0; i < G.regmatch[0].rm_eo; i++) 660 pipe_putc(*oldline++); 661 continue; 662 } 663 664 /* print everything before the match */ 665 for (i = 0; i < G.regmatch[0].rm_so; i++) 666 pipe_putc(oldline[i]); 667 668 /* then print the substitution string */ 669 do_subst_w_backrefs(oldline, sed_cmd->string); 670 671 /* advance past the match */ 672 oldline += G.regmatch[0].rm_eo; 673 /* flag that something has changed */ 674 altered++; 675 676 /* if we're not doing this globally, get out now */ 677 if (sed_cmd->which_match) break; 678 } while (*oldline && (regexec(current_regex, oldline, 10, G.regmatch, 0) != REG_NOMATCH)); 679 680 /* Copy rest of string into output pipeline */ 681 682 while (*oldline) 683 pipe_putc(*oldline++); 684 pipe_putc(0); 685 686 free(*line); 687 *line = G.pipeline.buf; 688 return altered; 689} 690 691/* Set command pointer to point to this label. (Does not handle null label.) */ 692static sed_cmd_t *branch_to(char *label) 693{ 694 sed_cmd_t *sed_cmd; 695 696 for (sed_cmd = G.sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) { 697 if (sed_cmd->cmd == ':' && sed_cmd->string && !strcmp(sed_cmd->string, label)) { 698 return sed_cmd; 699 } 700 } 701 bb_error_msg_and_die("can't find label for jump to '%s'", label); 702} 703 704static void append(char *s) 705{ 706 llist_add_to_end(&G.append_head, xstrdup(s)); 707} 708 709static void flush_append(void) 710{ 711 char *data; 712 713 /* Output appended lines. */ 714 while ((data = (char *)llist_pop(&G.append_head))) { 715 fprintf(G.nonstdout, "%s\n", data); 716 free(data); 717 } 718} 719 720static void add_input_file(FILE *file) 721{ 722 G.input_file_list = xrealloc(G.input_file_list, 723 (G.input_file_count + 1) * sizeof(FILE *)); 724 G.input_file_list[G.input_file_count++] = file; 725} 726 727/* Get next line of input from G.input_file_list, flushing append buffer and 728 * noting if we ran out of files without a newline on the last line we read. 729 */ 730enum { 731 NO_EOL_CHAR = 1, 732 LAST_IS_NUL = 2, 733}; 734static char *get_next_line(char *gets_char) 735{ 736 char *temp = NULL; 737 int len; 738 char gc; 739 740 flush_append(); 741 742 /* will be returned if last line in the file 743 * doesn't end with either '\n' or '\0' */ 744 gc = NO_EOL_CHAR; 745 while (G.current_input_file < G.input_file_count) { 746 FILE *fp = G.input_file_list[G.current_input_file]; 747 /* Read line up to a newline or NUL byte, inclusive, 748 * return malloc'ed char[]. length of the chunk read 749 * is stored in len. NULL if EOF/error */ 750 temp = bb_get_chunk_from_file(fp, &len); 751 if (temp) { 752 /* len > 0 here, it's ok to do temp[len-1] */ 753 char c = temp[len-1]; 754 if (c == '\n' || c == '\0') { 755 temp[len-1] = '\0'; 756 gc = c; 757 if (c == '\0') { 758 int ch = fgetc(fp); 759 if (ch != EOF) 760 ungetc(ch, fp); 761 else 762 gc = LAST_IS_NUL; 763 } 764 } 765 /* else we put NO_EOL_CHAR into *gets_char */ 766 break; 767 768 /* NB: I had the idea of peeking next file(s) and returning 769 * NO_EOL_CHAR only if it is the *last* non-empty 770 * input file. But there is a case where this won't work: 771 * file1: "a woo\nb woo" 772 * file2: "c no\nd no" 773 * sed -ne 's/woo/bang/p' input1 input2 => "a bang\nb bang" 774 * (note: *no* newline after "b bang"!) */ 775 } 776 /* Close this file and advance to next one */ 777 fclose(fp); 778 G.current_input_file++; 779 } 780 *gets_char = gc; 781 return temp; 782} 783 784/* Output line of text. */ 785/* Note: 786 * The tricks with NO_EOL_CHAR and last_puts_char are there to emulate gnu sed. 787 * Without them, we had this: 788 * echo -n thingy >z1 789 * echo -n again >z2 790 * >znull 791 * sed "s/i/z/" z1 z2 znull | hexdump -vC 792 * output: 793 * gnu sed 4.1.5: 794 * 00000000 74 68 7a 6e 67 79 0a 61 67 61 7a 6e |thzngy.agazn| 795 * bbox: 796 * 00000000 74 68 7a 6e 67 79 61 67 61 7a 6e |thzngyagazn| 797 */ 798static void puts_maybe_newline(char *s, FILE *file, char *last_puts_char, char last_gets_char) 799{ 800 char lpc = *last_puts_char; 801 802 /* Need to insert a '\n' between two files because first file's 803 * last line wasn't terminated? */ 804 if (lpc != '\n' && lpc != '\0') { 805 fputc('\n', file); 806 lpc = '\n'; 807 } 808 fputs(s, file); 809 810 /* 'x' - just something which is not '\n', '\0' or NO_EOL_CHAR */ 811 if (s[0]) 812 lpc = 'x'; 813 814 /* had trailing '\0' and it was last char of file? */ 815 if (last_gets_char == LAST_IS_NUL) { 816 fputc('\0', file); 817 lpc = 'x'; /* */ 818 } else 819 /* had trailing '\n' or '\0'? */ 820 if (last_gets_char != NO_EOL_CHAR) { 821 fputc(last_gets_char, file); 822 lpc = last_gets_char; 823 } 824 825 if (ferror(file)) { 826 xfunc_error_retval = 4; /* It's what gnu sed exits with... */ 827 bb_error_msg_and_die(bb_msg_write_error); 828 } 829 *last_puts_char = lpc; 830} 831 832#define sed_puts(s, n) (puts_maybe_newline(s, G.nonstdout, &last_puts_char, n)) 833 834static int beg_match(sed_cmd_t *sed_cmd, const char *pattern_space) 835{ 836 int retval = sed_cmd->beg_match && !regexec(sed_cmd->beg_match, pattern_space, 0, NULL, 0); 837 if (retval) 838 G.previous_regex_ptr = sed_cmd->beg_match; 839 return retval; 840} 841 842/* Process all the lines in all the files */ 843 844static void process_files(void) 845{ 846 char *pattern_space, *next_line; 847 int linenum = 0; 848 char last_puts_char = '\n'; 849 char last_gets_char, next_gets_char; 850 sed_cmd_t *sed_cmd; 851 int substituted; 852 853 /* Prime the pump */ 854 next_line = get_next_line(&next_gets_char); 855 856 /* go through every line in each file */ 857again: 858 substituted = 0; 859 860 /* Advance to next line. Stop if out of lines. */ 861 pattern_space = next_line; 862 if (!pattern_space) return; 863 last_gets_char = next_gets_char; 864 865 /* Read one line in advance so we can act on the last line, 866 * the '$' address */ 867 next_line = get_next_line(&next_gets_char); 868 linenum++; 869restart: 870 /* for every line, go through all the commands */ 871 for (sed_cmd = G.sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) { 872 int old_matched, matched; 873 874 old_matched = sed_cmd->in_match; 875 876 /* Determine if this command matches this line: */ 877 878 /* Are we continuing a previous multi-line match? */ 879 sed_cmd->in_match = sed_cmd->in_match 880 /* Or is no range necessary? */ 881 || (!sed_cmd->beg_line && !sed_cmd->end_line 882 && !sed_cmd->beg_match && !sed_cmd->end_match) 883 /* Or did we match the start of a numerical range? */ 884 || (sed_cmd->beg_line > 0 && (sed_cmd->beg_line == linenum)) 885 /* Or does this line match our begin address regex? */ 886 || (beg_match(sed_cmd, pattern_space)) 887 /* Or did we match last line of input? */ 888 || (sed_cmd->beg_line == -1 && next_line == NULL); 889 890 /* Snapshot the value */ 891 892 matched = sed_cmd->in_match; 893 894 /* Is this line the end of the current match? */ 895 896 if (matched) { 897 sed_cmd->in_match = !( 898 /* has the ending line come, or is this a single address command? */ 899 (sed_cmd->end_line ? 900 sed_cmd->end_line == -1 ? 901 !next_line 902 : (sed_cmd->end_line <= linenum) 903 : !sed_cmd->end_match 904 ) 905 /* or does this line matches our last address regex */ 906 || (sed_cmd->end_match && old_matched 907 && (regexec(sed_cmd->end_match, 908 pattern_space, 0, NULL, 0) == 0)) 909 ); 910 } 911 912 /* Skip blocks of commands we didn't match. */ 913 if (sed_cmd->cmd == '{') { 914 if (sed_cmd->invert ? matched : !matched) { 915 while (sed_cmd->cmd != '}') { 916 sed_cmd = sed_cmd->next; 917 if (!sed_cmd) 918 bb_error_msg_and_die("unterminated {"); 919 } 920 } 921 continue; 922 } 923 924 /* Okay, so did this line match? */ 925 if (sed_cmd->invert ? !matched : matched) { 926 /* Update last used regex in case a blank substitute BRE is found */ 927 if (sed_cmd->beg_match) { 928 G.previous_regex_ptr = sed_cmd->beg_match; 929 } 930 931 /* actual sedding */ 932 switch (sed_cmd->cmd) { 933 934 /* Print line number */ 935 case '=': 936 fprintf(G.nonstdout, "%d\n", linenum); 937 break; 938 939 /* Write the current pattern space up to the first newline */ 940 case 'P': 941 { 942 char *tmp = strchr(pattern_space, '\n'); 943 944 if (tmp) { 945 *tmp = '\0'; 946 /* TODO: explain why '\n' below */ 947 sed_puts(pattern_space, '\n'); 948 *tmp = '\n'; 949 break; 950 } 951 /* Fall Through */ 952 } 953 954 /* Write the current pattern space to output */ 955 case 'p': 956 /* NB: we print this _before_ the last line 957 * (of current file) is printed. Even if 958 * that line is nonterminated, we print 959 * '\n' here (gnu sed does the same) */ 960 sed_puts(pattern_space, '\n'); 961 break; 962 /* Delete up through first newline */ 963 case 'D': 964 { 965 char *tmp = strchr(pattern_space, '\n'); 966 967 if (tmp) { 968 tmp = xstrdup(tmp+1); 969 free(pattern_space); 970 pattern_space = tmp; 971 goto restart; 972 } 973 } 974 /* discard this line. */ 975 case 'd': 976 goto discard_line; 977 978 /* Substitute with regex */ 979 case 's': 980 if (!do_subst_command(sed_cmd, &pattern_space)) 981 break; 982 substituted |= 1; 983 984 /* handle p option */ 985 if (sed_cmd->sub_p) 986 sed_puts(pattern_space, last_gets_char); 987 /* handle w option */ 988 if (sed_cmd->sw_file) 989 puts_maybe_newline( 990 pattern_space, sed_cmd->sw_file, 991 &sed_cmd->sw_last_char, last_gets_char); 992 break; 993 994 /* Append line to linked list to be printed later */ 995 case 'a': 996 append(sed_cmd->string); 997 break; 998 999 /* Insert text before this line */ 1000 case 'i': 1001 sed_puts(sed_cmd->string, '\n'); 1002 break; 1003 1004 /* Cut and paste text (replace) */ 1005 case 'c': 1006 /* Only triggers on last line of a matching range. */ 1007 if (!sed_cmd->in_match) 1008 sed_puts(sed_cmd->string, NO_EOL_CHAR); 1009 goto discard_line; 1010 1011 /* Read file, append contents to output */ 1012 case 'r': 1013 { 1014 FILE *rfile; 1015 1016 rfile = fopen(sed_cmd->string, "r"); 1017 if (rfile) { 1018 char *line; 1019 1020 while ((line = xmalloc_getline(rfile)) 1021 != NULL) 1022 append(line); 1023 xprint_and_close_file(rfile); 1024 } 1025 1026 break; 1027 } 1028 1029 /* Write pattern space to file. */ 1030 case 'w': 1031 puts_maybe_newline( 1032 pattern_space, sed_cmd->sw_file, 1033 &sed_cmd->sw_last_char, last_gets_char); 1034 break; 1035 1036 /* Read next line from input */ 1037 case 'n': 1038 if (!G.be_quiet) 1039 sed_puts(pattern_space, last_gets_char); 1040 if (next_line) { 1041 free(pattern_space); 1042 pattern_space = next_line; 1043 last_gets_char = next_gets_char; 1044 next_line = get_next_line(&next_gets_char); 1045 linenum++; 1046 break; 1047 } 1048 /* fall through */ 1049 1050 /* Quit. End of script, end of input. */ 1051 case 'q': 1052 /* Exit the outer while loop */ 1053 free(next_line); 1054 next_line = NULL; 1055 goto discard_commands; 1056 1057 /* Append the next line to the current line */ 1058 case 'N': 1059 { 1060 int len; 1061 /* If no next line, jump to end of script and exit. */ 1062 if (next_line == NULL) { 1063 /* Jump to end of script and exit */ 1064 free(next_line); 1065 next_line = NULL; 1066 goto discard_line; 1067 /* append next_line, read new next_line. */ 1068 } 1069 len = strlen(pattern_space); 1070 pattern_space = realloc(pattern_space, len + strlen(next_line) + 2); 1071 pattern_space[len] = '\n'; 1072 strcpy(pattern_space + len+1, next_line); 1073 last_gets_char = next_gets_char; 1074 next_line = get_next_line(&next_gets_char); 1075 linenum++; 1076 break; 1077 } 1078 1079 /* Test/branch if substitution occurred */ 1080 case 't': 1081 if (!substituted) break; 1082 substituted = 0; 1083 /* Fall through */ 1084 /* Test/branch if substitution didn't occur */ 1085 case 'T': 1086 if (substituted) break; 1087 /* Fall through */ 1088 /* Branch to label */ 1089 case 'b': 1090 if (!sed_cmd->string) goto discard_commands; 1091 else sed_cmd = branch_to(sed_cmd->string); 1092 break; 1093 /* Transliterate characters */ 1094 case 'y': 1095 { 1096 int i, j; 1097 1098 for (i = 0; pattern_space[i]; i++) { 1099 for (j = 0; sed_cmd->string[j]; j += 2) { 1100 if (pattern_space[i] == sed_cmd->string[j]) { 1101 pattern_space[i] = sed_cmd->string[j + 1]; 1102 break; 1103 } 1104 } 1105 } 1106 1107 break; 1108 } 1109 case 'g': /* Replace pattern space with hold space */ 1110 free(pattern_space); 1111 pattern_space = xstrdup(G.hold_space ? G.hold_space : ""); 1112 break; 1113 case 'G': /* Append newline and hold space to pattern space */ 1114 { 1115 int pattern_space_size = 2; 1116 int hold_space_size = 0; 1117 1118 if (pattern_space) 1119 pattern_space_size += strlen(pattern_space); 1120 if (G.hold_space) 1121 hold_space_size = strlen(G.hold_space); 1122 pattern_space = xrealloc(pattern_space, 1123 pattern_space_size + hold_space_size); 1124 if (pattern_space_size == 2) 1125 pattern_space[0] = 0; 1126 strcat(pattern_space, "\n"); 1127 if (G.hold_space) 1128 strcat(pattern_space, G.hold_space); 1129 last_gets_char = '\n'; 1130 1131 break; 1132 } 1133 case 'h': /* Replace hold space with pattern space */ 1134 free(G.hold_space); 1135 G.hold_space = xstrdup(pattern_space); 1136 break; 1137 case 'H': /* Append newline and pattern space to hold space */ 1138 { 1139 int hold_space_size = 2; 1140 int pattern_space_size = 0; 1141 1142 if (G.hold_space) 1143 hold_space_size += strlen(G.hold_space); 1144 if (pattern_space) 1145 pattern_space_size = strlen(pattern_space); 1146 G.hold_space = xrealloc(G.hold_space, 1147 hold_space_size + pattern_space_size); 1148 1149 if (hold_space_size == 2) 1150 *G.hold_space = 0; 1151 strcat(G.hold_space, "\n"); 1152 if (pattern_space) 1153 strcat(G.hold_space, pattern_space); 1154 1155 break; 1156 } 1157 case 'x': /* Exchange hold and pattern space */ 1158 { 1159 char *tmp = pattern_space; 1160 pattern_space = G.hold_space ? : xzalloc(1); 1161 last_gets_char = '\n'; 1162 G.hold_space = tmp; 1163 break; 1164 } 1165 } 1166 } 1167 } 1168 1169 /* 1170 * exit point from sedding... 1171 */ 1172 discard_commands: 1173 /* we will print the line unless we were told to be quiet ('-n') 1174 or if the line was suppressed (ala 'd'elete) */ 1175 if (!G.be_quiet) 1176 sed_puts(pattern_space, last_gets_char); 1177 1178 /* Delete and such jump here. */ 1179 discard_line: 1180 flush_append(); 1181 free(pattern_space); 1182 1183 goto again; 1184} 1185 1186/* It is possible to have a command line argument with embedded 1187 * newlines. This counts as multiple command lines. 1188 * However, newline can be escaped: 's/e/z\<newline>z/' 1189 * We check for this. 1190 */ 1191 1192static void add_cmd_block(char *cmdstr) 1193{ 1194 char *sv, *eol; 1195 1196 cmdstr = sv = xstrdup(cmdstr); 1197 do { 1198 eol = strchr(cmdstr, '\n'); 1199 next: 1200 if (eol) { 1201 /* Count preceding slashes */ 1202 int slashes = 0; 1203 char *sl = eol; 1204 1205 while (sl != cmdstr && *--sl == '\\') 1206 slashes++; 1207 /* Odd number of preceding slashes - newline is escaped */ 1208 if (slashes & 1) { 1209 strcpy(eol-1, eol); 1210 eol = strchr(eol, '\n'); 1211 goto next; 1212 } 1213 *eol = '\0'; 1214 } 1215 add_cmd(cmdstr); 1216 cmdstr = eol + 1; 1217 } while (eol); 1218 free(sv); 1219} 1220 1221int sed_main(int argc, char **argv); 1222int sed_main(int argc, char **argv) 1223{ 1224 enum { 1225 OPT_in_place = 1 << 0, 1226 }; 1227 unsigned opt; 1228 llist_t *opt_e, *opt_f; 1229 int status = EXIT_SUCCESS; 1230 1231 INIT_G(); 1232 1233 /* destroy command strings on exit */ 1234 if (ENABLE_FEATURE_CLEAN_UP) atexit(sed_free_and_close_stuff); 1235 1236 /* Lie to autoconf when it starts asking stupid questions. */ 1237 if (argc == 2 && !strcmp(argv[1], "--version")) { 1238 puts("This is not GNU sed version 4.0"); 1239 return 0; 1240 } 1241 1242 /* do normal option parsing */ 1243 opt_e = opt_f = NULL; 1244 opt_complementary = "e::f::" /* can occur multiple times */ 1245 "nn"; /* count -n */ 1246 opt = getopt32(argv, "irne:f:", &opt_e, &opt_f, 1247 &G.be_quiet); /* counter for -n */ 1248 argc -= optind; 1249 argv += optind; 1250 if (opt & OPT_in_place) { // -i 1251 atexit(cleanup_outname); 1252 } 1253 if (opt & 0x2) G.regex_type |= REG_EXTENDED; // -r 1254 //if (opt & 0x4) G.be_quiet++; // -n 1255 while (opt_e) { // -e 1256 add_cmd_block(opt_e->data); 1257 opt_e = opt_e->link; 1258 /* we leak opt_e here... */ 1259 } 1260 while (opt_f) { // -f 1261 char *line; 1262 FILE *cmdfile; 1263 cmdfile = xfopen(opt_f->data, "r"); 1264 while ((line = xmalloc_getline(cmdfile)) != NULL) { 1265 add_cmd(line); 1266 free(line); 1267 } 1268 fclose(cmdfile); 1269 opt_f = opt_f->link; 1270 /* we leak opt_f here... */ 1271 } 1272 /* if we didn't get a pattern from -e or -f, use argv[0] */ 1273 if (!(opt & 0x18)) { 1274 if (!argc) 1275 bb_show_usage(); 1276 add_cmd_block(*argv++); 1277 argc--; 1278 } 1279 /* Flush any unfinished commands. */ 1280 add_cmd(""); 1281 1282 /* By default, we write to stdout */ 1283 G.nonstdout = stdout; 1284 1285 /* argv[0..(argc-1)] should be names of file to process. If no 1286 * files were specified or '-' was specified, take input from stdin. 1287 * Otherwise, we process all the files specified. */ 1288 if (argv[0] == NULL) { 1289 if (opt & OPT_in_place) 1290 bb_error_msg_and_die(bb_msg_requires_arg, "-i"); 1291 add_input_file(stdin); 1292 process_files(); 1293 } else { 1294 int i; 1295 FILE *file; 1296 1297 for (i = 0; i < argc; i++) { 1298 struct stat statbuf; 1299 int nonstdoutfd; 1300 1301 if (LONE_DASH(argv[i]) && !(opt & OPT_in_place)) { 1302 add_input_file(stdin); 1303 process_files(); 1304 continue; 1305 } 1306 file = fopen_or_warn(argv[i], "r"); 1307 if (!file) { 1308 status = EXIT_FAILURE; 1309 continue; 1310 } 1311 if (!(opt & OPT_in_place)) { 1312 add_input_file(file); 1313 continue; 1314 } 1315 1316 G.outname = xasprintf("%sXXXXXX", argv[i]); 1317 nonstdoutfd = mkstemp(G.outname); 1318 if (-1 == nonstdoutfd) 1319 bb_perror_msg_and_die("cannot create temp file %s", G.outname); 1320 G.nonstdout = fdopen(nonstdoutfd, "w"); 1321 1322 /* Set permissions of output file */ 1323 1324 fstat(fileno(file), &statbuf); 1325 fchmod(nonstdoutfd, statbuf.st_mode); 1326 add_input_file(file); 1327 process_files(); 1328 fclose(G.nonstdout); 1329 1330 G.nonstdout = stdout; 1331 /* unlink(argv[i]); */ 1332 rename(G.outname, argv[i]); 1333 free(G.outname); 1334 G.outname = 0; 1335 } 1336 if (G.input_file_count > G.current_input_file) 1337 process_files(); 1338 } 1339 1340 return status; 1341} 1342