1/* This file is part of GNU tar. 2 Copyright (C) 2006 Free Software Foundation, Inc. 3 4 This program is free software; you can redistribute it and/or modify it 5 under the terms of the GNU General Public License as published by the 6 Free Software Foundation; either version 2, or (at your option) any later 7 version. 8 9 This program is distributed in the hope that it will be useful, but 10 WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 12 Public License for more details. 13 14 You should have received a copy of the GNU General Public License along 15 with this program; if not, write to the Free Software Foundation, Inc., 16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 17 18#include <system.h> 19#include <regex.h> 20#include "common.h" 21 22static enum transform_type 23 { 24 transform_none, 25 transform_first, 26 transform_global 27 } 28transform_type = transform_none; 29static unsigned match_number = 0; 30static regex_t regex; 31static struct obstack stk; 32 33enum replace_segm_type 34 { 35 segm_literal, /* Literal segment */ 36 segm_backref, /* Back-reference segment */ 37 segm_case_ctl /* Case control segment (GNU extension) */ 38 }; 39 40enum case_ctl_type 41 { 42 ctl_stop, /* Stop case conversion */ 43 ctl_upcase_next,/* Turn the next character to uppercase */ 44 ctl_locase_next,/* Turn the next character to lowercase */ 45 ctl_upcase, /* Turn the replacement to uppercase until ctl_stop */ 46 ctl_locase /* Turn the replacement to lowercase until ctl_stop */ 47 }; 48 49struct replace_segm 50{ 51 struct replace_segm *next; 52 enum replace_segm_type type; 53 union 54 { 55 struct 56 { 57 char *ptr; 58 size_t size; 59 } literal; /* type == segm_literal */ 60 size_t ref; /* type == segm_backref */ 61 enum case_ctl_type ctl; /* type == segm_case_ctl */ 62 } v; 63}; 64 65/* Compiled replacement expression */ 66static struct replace_segm *repl_head, *repl_tail; 67static size_t segm_count; /* Number of elements in the above list */ 68 69static struct replace_segm * 70add_segment (void) 71{ 72 struct replace_segm *segm = xmalloc (sizeof *segm); 73 segm->next = NULL; 74 if (repl_tail) 75 repl_tail->next = segm; 76 else 77 repl_head = segm; 78 repl_tail = segm; 79 segm_count++; 80 return segm; 81} 82 83static void 84add_literal_segment (char *str, char *end) 85{ 86 size_t len = end - str; 87 if (len) 88 { 89 struct replace_segm *segm = add_segment (); 90 segm->type = segm_literal; 91 segm->v.literal.ptr = xmalloc (len + 1); 92 memcpy (segm->v.literal.ptr, str, len); 93 segm->v.literal.ptr[len] = 0; 94 segm->v.literal.size = len; 95 } 96} 97 98static void 99add_char_segment (int chr) 100{ 101 struct replace_segm *segm = add_segment (); 102 segm->type = segm_literal; 103 segm->v.literal.ptr = xmalloc (2); 104 segm->v.literal.ptr[0] = chr; 105 segm->v.literal.ptr[1] = 0; 106 segm->v.literal.size = 1; 107} 108 109static void 110add_backref_segment (size_t ref) 111{ 112 struct replace_segm *segm = add_segment (); 113 segm->type = segm_backref; 114 segm->v.ref = ref; 115} 116 117static void 118add_case_ctl_segment (enum case_ctl_type ctl) 119{ 120 struct replace_segm *segm = add_segment (); 121 segm->type = segm_case_ctl; 122 segm->v.ctl = ctl; 123} 124 125void 126set_transform_expr (const char *expr) 127{ 128 int delim; 129 int i, j, rc; 130 char *str, *beg, *cur; 131 const char *p; 132 int cflags = 0; 133 134 if (transform_type == transform_none) 135 obstack_init (&stk); 136 else 137 { 138 /* Redefinition of the transform expression */ 139 regfree (®ex); 140 } 141 142 if (expr[0] != 's') 143 USAGE_ERROR ((0, 0, _("Invalid transform expression"))); 144 145 delim = expr[1]; 146 147 /* Scan regular expression */ 148 for (i = 2; expr[i] && expr[i] != delim; i++) 149 if (expr[i] == '\\' && expr[i+1]) 150 i++; 151 152 if (expr[i] != delim) 153 USAGE_ERROR ((0, 0, _("Invalid transform expression"))); 154 155 /* Scan replacement expression */ 156 for (j = i + 1; expr[j] && expr[j] != delim; j++) 157 if (expr[j] == '\\' && expr[j+1]) 158 j++; 159 160 if (expr[j] != delim) 161 USAGE_ERROR ((0, 0, _("Invalid transform expression"))); 162 163 /* Check flags */ 164 transform_type = transform_first; 165 for (p = expr + j + 1; *p; p++) 166 switch (*p) 167 { 168 case 'g': 169 transform_type = transform_global; 170 break; 171 172 case 'i': 173 cflags |= REG_ICASE; 174 break; 175 176 case 'x': 177 cflags |= REG_EXTENDED; 178 break; 179 180 case '0': case '1': case '2': case '3': case '4': 181 case '5': case '6': case '7': case '8': case '9': 182 match_number = strtoul (p, (char**) &p, 0); 183 p--; 184 break; 185 186 default: 187 USAGE_ERROR ((0, 0, _("Unknown flag in transform expression"))); 188 } 189 190 /* Extract and compile regex */ 191 str = xmalloc (i - 1); 192 memcpy (str, expr + 2, i - 2); 193 str[i - 2] = 0; 194 195 rc = regcomp (®ex, str, cflags); 196 197 if (rc) 198 { 199 char errbuf[512]; 200 regerror (rc, ®ex, errbuf, sizeof (errbuf)); 201 USAGE_ERROR ((0, 0, _("Invalid transform expression: %s"), errbuf)); 202 } 203 204 if (str[0] == '^' || str[strlen (str) - 1] == '$') 205 transform_type = transform_first; 206 207 free (str); 208 209 /* Extract and compile replacement expr */ 210 i++; 211 str = xmalloc (j - i + 1); 212 memcpy (str, expr + i, j - i); 213 str[j - i] = 0; 214 215 for (cur = beg = str; *cur;) 216 { 217 if (*cur == '\\') 218 { 219 size_t n; 220 221 add_literal_segment (beg, cur); 222 switch (*++cur) 223 { 224 case '0': case '1': case '2': case '3': case '4': 225 case '5': case '6': case '7': case '8': case '9': 226 n = strtoul (cur, &cur, 10); 227 if (n > regex.re_nsub) 228 USAGE_ERROR ((0, 0, _("Invalid transform replacement: back reference out of range"))); 229 add_backref_segment (n); 230 break; 231 232 case '\\': 233 add_char_segment ('\\'); 234 cur++; 235 break; 236 237 case 'a': 238 add_char_segment ('\a'); 239 cur++; 240 break; 241 242 case 'b': 243 add_char_segment ('\b'); 244 cur++; 245 break; 246 247 case 'f': 248 add_char_segment ('\f'); 249 cur++; 250 break; 251 252 case 'n': 253 add_char_segment ('\n'); 254 cur++; 255 break; 256 257 case 'r': 258 add_char_segment ('\r'); 259 cur++; 260 break; 261 262 case 't': 263 add_char_segment ('\t'); 264 cur++; 265 break; 266 267 case 'v': 268 add_char_segment ('\v'); 269 cur++; 270 break; 271 272 case '&': 273 add_char_segment ('&'); 274 cur++; 275 break; 276 277 case 'L': 278 /* Turn the replacement to lowercase until a `\U' or `\E' 279 is found, */ 280 add_case_ctl_segment (ctl_locase); 281 cur++; 282 break; 283 284 case 'l': 285 /* Turn the next character to lowercase, */ 286 add_case_ctl_segment (ctl_locase_next); 287 cur++; 288 break; 289 290 case 'U': 291 /* Turn the replacement to uppercase until a `\L' or `\E' 292 is found, */ 293 add_case_ctl_segment (ctl_upcase); 294 cur++; 295 break; 296 297 case 'u': 298 /* Turn the next character to uppercase, */ 299 add_case_ctl_segment (ctl_upcase_next); 300 cur++; 301 break; 302 303 case 'E': 304 /* Stop case conversion started by `\L' or `\U'. */ 305 add_case_ctl_segment (ctl_stop); 306 cur++; 307 break; 308 309 default: 310 /* Try to be nice */ 311 { 312 char buf[2]; 313 buf[0] = '\\'; 314 buf[1] = *cur; 315 add_literal_segment (buf, buf + 2); 316 } 317 cur++; 318 break; 319 } 320 beg = cur; 321 } 322 else if (*cur == '&') 323 { 324 add_literal_segment (beg, cur); 325 add_backref_segment (0); 326 beg = ++cur; 327 } 328 else 329 cur++; 330 } 331 add_literal_segment (beg, cur); 332 333} 334 335/* Run case conversion specified by CASE_CTL on array PTR of SIZE 336 characters. Returns pointer to statically allocated storage. */ 337static char * 338run_case_conv (enum case_ctl_type case_ctl, char *ptr, size_t size) 339{ 340 static char *case_ctl_buffer; 341 static size_t case_ctl_bufsize; 342 char *p; 343 344 if (case_ctl_bufsize < size) 345 { 346 case_ctl_bufsize = size; 347 case_ctl_buffer = xrealloc (case_ctl_buffer, case_ctl_bufsize); 348 } 349 memcpy (case_ctl_buffer, ptr, size); 350 switch (case_ctl) 351 { 352 case ctl_upcase_next: 353 case_ctl_buffer[0] = toupper (case_ctl_buffer[0]); 354 break; 355 356 case ctl_locase_next: 357 case_ctl_buffer[0] = tolower (case_ctl_buffer[0]); 358 break; 359 360 case ctl_upcase: 361 for (p = case_ctl_buffer; p < case_ctl_buffer + size; p++) 362 *p = toupper (*p); 363 break; 364 365 case ctl_locase: 366 for (p = case_ctl_buffer; p < case_ctl_buffer + size; p++) 367 *p = tolower (*p); 368 break; 369 370 case ctl_stop: 371 break; 372 } 373 return case_ctl_buffer; 374} 375 376bool 377_transform_name_to_obstack (char *input) 378{ 379 regmatch_t *rmp; 380 int rc; 381 size_t nmatches = 0; 382 enum case_ctl_type case_ctl = ctl_stop, /* Current case conversion op */ 383 save_ctl = ctl_stop; /* Saved case_ctl for \u and \l */ 384 385 /* Reset case conversion after a single-char operation */ 386#define CASE_CTL_RESET() if (case_ctl == ctl_upcase_next \ 387 || case_ctl == ctl_locase_next) \ 388 { \ 389 case_ctl = save_ctl; \ 390 save_ctl = ctl_stop; \ 391 } 392 393 if (transform_type == transform_none) 394 return false; 395 396 rmp = xmalloc ((regex.re_nsub + 1) * sizeof (*rmp)); 397 398 while (*input) 399 { 400 size_t disp; 401 char *ptr; 402 403 rc = regexec (®ex, input, regex.re_nsub + 1, rmp, 0); 404 405 if (rc == 0) 406 { 407 struct replace_segm *segm; 408 409 disp = rmp[0].rm_eo; 410 411 if (rmp[0].rm_so) 412 obstack_grow (&stk, input, rmp[0].rm_so); 413 414 nmatches++; 415 if (match_number && nmatches < match_number) 416 { 417 obstack_grow (&stk, input, disp); 418 input += disp; 419 continue; 420 } 421 422 for (segm = repl_head; segm; segm = segm->next) 423 { 424 switch (segm->type) 425 { 426 case segm_literal: /* Literal segment */ 427 if (case_ctl == ctl_stop) 428 ptr = segm->v.literal.ptr; 429 else 430 { 431 ptr = run_case_conv (case_ctl, 432 segm->v.literal.ptr, 433 segm->v.literal.size); 434 CASE_CTL_RESET(); 435 } 436 obstack_grow (&stk, ptr, segm->v.literal.size); 437 break; 438 439 case segm_backref: /* Back-reference segment */ 440 if (rmp[segm->v.ref].rm_so != -1 441 && rmp[segm->v.ref].rm_eo != -1) 442 { 443 size_t size = rmp[segm->v.ref].rm_eo 444 - rmp[segm->v.ref].rm_so; 445 ptr = input + rmp[segm->v.ref].rm_so; 446 if (case_ctl != ctl_stop) 447 { 448 ptr = run_case_conv (case_ctl, ptr, size); 449 CASE_CTL_RESET(); 450 } 451 452 obstack_grow (&stk, ptr, size); 453 } 454 break; 455 456 case segm_case_ctl: 457 switch (segm->v.ctl) 458 { 459 case ctl_upcase_next: 460 case ctl_locase_next: 461 switch (save_ctl) 462 { 463 case ctl_stop: 464 case ctl_upcase: 465 case ctl_locase: 466 save_ctl = case_ctl; 467 default: 468 break; 469 } 470 /*FALL THROUGH*/ 471 472 case ctl_upcase: 473 case ctl_locase: 474 case ctl_stop: 475 case_ctl = segm->v.ctl; 476 } 477 } 478 } 479 } 480 else 481 { 482 disp = strlen (input); 483 obstack_grow (&stk, input, disp); 484 } 485 486 input += disp; 487 488 if (transform_type == transform_first) 489 { 490 obstack_grow (&stk, input, strlen (input)); 491 break; 492 } 493 } 494 495 obstack_1grow (&stk, 0); 496 free (rmp); 497 return true; 498} 499 500bool 501transform_name_fp (char **pinput, char *(*fun)(char *, void *), void *dat) 502{ 503 char *str; 504 bool ret = _transform_name_to_obstack (*pinput); 505 if (ret) 506 { 507 str = obstack_finish (&stk); 508 assign_string (pinput, fun ? fun (str, dat) : str); 509 obstack_free (&stk, str); 510 } 511 else if (fun) 512 { 513 str = *pinput; 514 *pinput = NULL; 515 assign_string (pinput, fun (str, dat)); 516 free (str); 517 ret = true; 518 } 519 return ret; 520} 521 522bool 523transform_name (char **pinput) 524{ 525 return transform_name_fp (pinput, NULL, NULL); 526} 527 528