1/* This file is part of GNU tar.
2   Copyright (C) 2006 Free Software Foundation, Inc.
3
4   This program is free software; you can redistribute it and/or modify it
5   under the terms of the GNU General Public License as published by the
6   Free Software Foundation; either version 2, or (at your option) any later
7   version.
8
9   This program is distributed in the hope that it will be useful, but
10   WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
12   Public License for more details.
13
14   You should have received a copy of the GNU General Public License along
15   with this program; if not, write to the Free Software Foundation, Inc.,
16   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
17
18#include <system.h>
19#include <regex.h>
20#include "common.h"
21
22static enum transform_type
23  {
24    transform_none,
25    transform_first,
26    transform_global
27  }
28transform_type = transform_none;
29static unsigned match_number = 0;
30static regex_t regex;
31static struct obstack stk;
32
33enum replace_segm_type
34  {
35    segm_literal,   /* Literal segment */
36    segm_backref,   /* Back-reference segment */
37    segm_case_ctl   /* Case control segment (GNU extension) */
38  };
39
40enum case_ctl_type
41  {
42    ctl_stop,       /* Stop case conversion */
43    ctl_upcase_next,/* Turn the next character to uppercase */
44    ctl_locase_next,/* Turn the next character to lowercase */
45    ctl_upcase,     /* Turn the replacement to uppercase until ctl_stop */
46    ctl_locase      /* Turn the replacement to lowercase until ctl_stop */
47  };
48
49struct replace_segm
50{
51  struct replace_segm *next;
52  enum replace_segm_type type;
53  union
54  {
55    struct
56    {
57      char *ptr;
58      size_t size;
59    } literal;                /* type == segm_literal */
60    size_t ref;               /* type == segm_backref */
61    enum case_ctl_type ctl;   /* type == segm_case_ctl */
62  } v;
63};
64
65/* Compiled replacement expression */
66static struct replace_segm *repl_head, *repl_tail;
67static size_t segm_count; /* Number of elements in the above list */
68
69static struct replace_segm *
70add_segment (void)
71{
72  struct replace_segm *segm = xmalloc (sizeof *segm);
73  segm->next = NULL;
74  if (repl_tail)
75    repl_tail->next = segm;
76  else
77    repl_head = segm;
78  repl_tail = segm;
79  segm_count++;
80  return segm;
81}
82
83static void
84add_literal_segment (char *str, char *end)
85{
86  size_t len = end - str;
87  if (len)
88    {
89      struct replace_segm *segm = add_segment ();
90      segm->type = segm_literal;
91      segm->v.literal.ptr = xmalloc (len + 1);
92      memcpy (segm->v.literal.ptr, str, len);
93      segm->v.literal.ptr[len] = 0;
94      segm->v.literal.size = len;
95    }
96}
97
98static void
99add_char_segment (int chr)
100{
101  struct replace_segm *segm = add_segment ();
102  segm->type = segm_literal;
103  segm->v.literal.ptr = xmalloc (2);
104  segm->v.literal.ptr[0] = chr;
105  segm->v.literal.ptr[1] = 0;
106  segm->v.literal.size = 1;
107}
108
109static void
110add_backref_segment (size_t ref)
111{
112  struct replace_segm *segm = add_segment ();
113  segm->type = segm_backref;
114  segm->v.ref = ref;
115}
116
117static void
118add_case_ctl_segment (enum case_ctl_type ctl)
119{
120  struct replace_segm *segm = add_segment ();
121  segm->type = segm_case_ctl;
122  segm->v.ctl = ctl;
123}
124
125void
126set_transform_expr (const char *expr)
127{
128  int delim;
129  int i, j, rc;
130  char *str, *beg, *cur;
131  const char *p;
132  int cflags = 0;
133
134  if (transform_type == transform_none)
135    obstack_init (&stk);
136  else
137    {
138      /* Redefinition of the transform expression */
139      regfree (&regex);
140    }
141
142  if (expr[0] != 's')
143    USAGE_ERROR ((0, 0, _("Invalid transform expression")));
144
145  delim = expr[1];
146
147  /* Scan regular expression */
148  for (i = 2; expr[i] && expr[i] != delim; i++)
149    if (expr[i] == '\\' && expr[i+1])
150      i++;
151
152  if (expr[i] != delim)
153    USAGE_ERROR ((0, 0, _("Invalid transform expression")));
154
155  /* Scan replacement expression */
156  for (j = i + 1; expr[j] && expr[j] != delim; j++)
157    if (expr[j] == '\\' && expr[j+1])
158      j++;
159
160  if (expr[j] != delim)
161    USAGE_ERROR ((0, 0, _("Invalid transform expression")));
162
163  /* Check flags */
164  transform_type = transform_first;
165  for (p = expr + j + 1; *p; p++)
166    switch (*p)
167      {
168      case 'g':
169	transform_type = transform_global;
170	break;
171
172      case 'i':
173	cflags |= REG_ICASE;
174	break;
175
176      case 'x':
177	cflags |= REG_EXTENDED;
178	break;
179
180      case '0': case '1': case '2': case '3': case '4':
181      case '5': case '6': case '7': case '8': case '9':
182	match_number = strtoul (p, (char**) &p, 0);
183	p--;
184	break;
185
186      default:
187	USAGE_ERROR ((0, 0, _("Unknown flag in transform expression")));
188      }
189
190  /* Extract and compile regex */
191  str = xmalloc (i - 1);
192  memcpy (str, expr + 2, i - 2);
193  str[i - 2] = 0;
194
195  rc = regcomp (&regex, str, cflags);
196
197  if (rc)
198    {
199      char errbuf[512];
200      regerror (rc, &regex, errbuf, sizeof (errbuf));
201      USAGE_ERROR ((0, 0, _("Invalid transform expression: %s"), errbuf));
202    }
203
204  if (str[0] == '^' || str[strlen (str) - 1] == '$')
205    transform_type = transform_first;
206
207  free (str);
208
209  /* Extract and compile replacement expr */
210  i++;
211  str = xmalloc (j - i + 1);
212  memcpy (str, expr + i, j - i);
213  str[j - i] = 0;
214
215  for (cur = beg = str; *cur;)
216    {
217      if (*cur == '\\')
218	{
219	  size_t n;
220
221	  add_literal_segment (beg, cur);
222	  switch (*++cur)
223	    {
224	    case '0': case '1': case '2': case '3': case '4':
225	    case '5': case '6': case '7': case '8': case '9':
226	      n = strtoul (cur, &cur, 10);
227	      if (n > regex.re_nsub)
228		USAGE_ERROR ((0, 0, _("Invalid transform replacement: back reference out of range")));
229	      add_backref_segment (n);
230	      break;
231
232	    case '\\':
233	      add_char_segment ('\\');
234	      cur++;
235	      break;
236
237	    case 'a':
238	      add_char_segment ('\a');
239	      cur++;
240	      break;
241
242	    case 'b':
243	      add_char_segment ('\b');
244	      cur++;
245	      break;
246
247	    case 'f':
248	      add_char_segment ('\f');
249	      cur++;
250	      break;
251
252	    case 'n':
253	      add_char_segment ('\n');
254	      cur++;
255	      break;
256
257	    case 'r':
258	      add_char_segment ('\r');
259	      cur++;
260	      break;
261
262	    case 't':
263	      add_char_segment ('\t');
264	      cur++;
265	      break;
266
267	    case 'v':
268	      add_char_segment ('\v');
269	      cur++;
270	      break;
271
272	    case '&':
273	      add_char_segment ('&');
274	      cur++;
275	      break;
276
277	    case 'L':
278	      /* Turn the replacement to lowercase until a `\U' or `\E'
279		 is found, */
280	      add_case_ctl_segment (ctl_locase);
281	      cur++;
282	      break;
283
284	    case 'l':
285	      /* Turn the next character to lowercase, */
286	      add_case_ctl_segment (ctl_locase_next);
287	      cur++;
288	      break;
289
290	    case 'U':
291	      /* Turn the replacement to uppercase until a `\L' or `\E'
292		 is found, */
293	      add_case_ctl_segment (ctl_upcase);
294	      cur++;
295	      break;
296
297	    case 'u':
298	      /* Turn the next character to uppercase, */
299	      add_case_ctl_segment (ctl_upcase_next);
300	      cur++;
301	      break;
302
303	    case 'E':
304	      /* Stop case conversion started by `\L' or `\U'. */
305	      add_case_ctl_segment (ctl_stop);
306	      cur++;
307	      break;
308
309	    default:
310	      /* Try to be nice */
311	      {
312		char buf[2];
313		buf[0] = '\\';
314		buf[1] = *cur;
315		add_literal_segment (buf, buf + 2);
316	      }
317	      cur++;
318	      break;
319	    }
320	  beg = cur;
321	}
322      else if (*cur == '&')
323	{
324	  add_literal_segment (beg, cur);
325	  add_backref_segment (0);
326	  beg = ++cur;
327	}
328      else
329	cur++;
330    }
331  add_literal_segment (beg, cur);
332
333}
334
335/* Run case conversion specified by CASE_CTL on array PTR of SIZE
336   characters. Returns pointer to statically allocated storage. */
337static char *
338run_case_conv (enum case_ctl_type case_ctl, char *ptr, size_t size)
339{
340  static char *case_ctl_buffer;
341  static size_t case_ctl_bufsize;
342  char *p;
343
344  if (case_ctl_bufsize < size)
345    {
346      case_ctl_bufsize = size;
347      case_ctl_buffer = xrealloc (case_ctl_buffer, case_ctl_bufsize);
348    }
349  memcpy (case_ctl_buffer, ptr, size);
350  switch (case_ctl)
351    {
352    case ctl_upcase_next:
353      case_ctl_buffer[0] = toupper (case_ctl_buffer[0]);
354      break;
355
356    case ctl_locase_next:
357      case_ctl_buffer[0] = tolower (case_ctl_buffer[0]);
358      break;
359
360    case ctl_upcase:
361      for (p = case_ctl_buffer; p < case_ctl_buffer + size; p++)
362	*p = toupper (*p);
363      break;
364
365    case ctl_locase:
366      for (p = case_ctl_buffer; p < case_ctl_buffer + size; p++)
367	*p = tolower (*p);
368      break;
369
370    case ctl_stop:
371      break;
372    }
373  return case_ctl_buffer;
374}
375
376bool
377_transform_name_to_obstack (char *input)
378{
379  regmatch_t *rmp;
380  int rc;
381  size_t nmatches = 0;
382  enum case_ctl_type case_ctl = ctl_stop,  /* Current case conversion op */
383                     save_ctl = ctl_stop;  /* Saved case_ctl for \u and \l */
384
385  /* Reset case conversion after a single-char operation */
386#define CASE_CTL_RESET()  if (case_ctl == ctl_upcase_next     \
387			      || case_ctl == ctl_locase_next) \
388                            {                                 \
389                              case_ctl = save_ctl;            \
390                              save_ctl = ctl_stop;            \
391			    }
392
393  if (transform_type == transform_none)
394    return false;
395
396  rmp = xmalloc ((regex.re_nsub + 1) * sizeof (*rmp));
397
398  while (*input)
399    {
400      size_t disp;
401      char *ptr;
402
403      rc = regexec (&regex, input, regex.re_nsub + 1, rmp, 0);
404
405      if (rc == 0)
406	{
407	  struct replace_segm *segm;
408
409	  disp = rmp[0].rm_eo;
410
411	  if (rmp[0].rm_so)
412	    obstack_grow (&stk, input, rmp[0].rm_so);
413
414	  nmatches++;
415	  if (match_number && nmatches < match_number)
416	    {
417	      obstack_grow (&stk, input, disp);
418	      input += disp;
419	      continue;
420	    }
421
422	  for (segm = repl_head; segm; segm = segm->next)
423	    {
424	      switch (segm->type)
425		{
426		case segm_literal:    /* Literal segment */
427		  if (case_ctl == ctl_stop)
428		    ptr = segm->v.literal.ptr;
429		  else
430		    {
431		      ptr = run_case_conv (case_ctl,
432					   segm->v.literal.ptr,
433					   segm->v.literal.size);
434		      CASE_CTL_RESET();
435		    }
436		  obstack_grow (&stk, ptr, segm->v.literal.size);
437		  break;
438
439		case segm_backref:    /* Back-reference segment */
440		  if (rmp[segm->v.ref].rm_so != -1
441		      && rmp[segm->v.ref].rm_eo != -1)
442		    {
443		      size_t size = rmp[segm->v.ref].rm_eo
444			              - rmp[segm->v.ref].rm_so;
445		      ptr = input + rmp[segm->v.ref].rm_so;
446		      if (case_ctl != ctl_stop)
447			{
448			  ptr = run_case_conv (case_ctl, ptr, size);
449			  CASE_CTL_RESET();
450			}
451
452		      obstack_grow (&stk, ptr, size);
453		    }
454		  break;
455
456		case segm_case_ctl:
457		  switch (segm->v.ctl)
458		    {
459		    case ctl_upcase_next:
460		    case ctl_locase_next:
461		      switch (save_ctl)
462			{
463			case ctl_stop:
464			case ctl_upcase:
465			case ctl_locase:
466			  save_ctl = case_ctl;
467			default:
468			  break;
469			}
470		      /*FALL THROUGH*/
471
472		    case ctl_upcase:
473		    case ctl_locase:
474		    case ctl_stop:
475		      case_ctl = segm->v.ctl;
476		    }
477		}
478	    }
479	}
480      else
481	{
482	  disp = strlen (input);
483	  obstack_grow (&stk, input, disp);
484	}
485
486      input += disp;
487
488      if (transform_type == transform_first)
489	{
490	  obstack_grow (&stk, input, strlen (input));
491	  break;
492	}
493    }
494
495  obstack_1grow (&stk, 0);
496  free (rmp);
497  return true;
498}
499
500bool
501transform_name_fp (char **pinput, char *(*fun)(char *, void *), void *dat)
502{
503    char *str;
504    bool ret = _transform_name_to_obstack (*pinput);
505    if (ret)
506      {
507	str = obstack_finish (&stk);
508	assign_string (pinput, fun ? fun (str, dat) : str);
509	obstack_free (&stk, str);
510      }
511    else if (fun)
512      {
513	str = *pinput;
514	*pinput = NULL;
515	assign_string (pinput, fun (str, dat));
516	free (str);
517	ret = true;
518      }
519    return ret;
520}
521
522bool
523transform_name (char **pinput)
524{
525  return transform_name_fp (pinput, NULL, NULL);
526}
527
528