1/*
2 * Copyright (c) 2005, 2008 Sun Microsystems, Inc. All Rights Reserved.
3 * Use is subject to license terms.
4 *
5 *      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
6 *        All Rights Reserved
7 *
8 * University Copyright- Copyright (c) 1982, 1986, 1988
9 * The Regents of the University of California
10 * All Rights Reserved
11 *
12 * University Acknowledgment- Portions of this document are derived from
13 * software developed by the University of California, Berkeley, and its
14 * contributors.
15 *
16 * Licensed under the Apache License, Version 2.0 (the "License");
17 * you may not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *  http://www.apache.org/licenses/LICENSE-2.0.
20 *
21 * Unless required by applicable law or agreed to in writing, software
22 * distributed under the License is distributed on an "AS IS" BASIS,
23 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
24 * or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29/* Code moved from regexp.h */
30
31#include "apr.h"
32#include "apr_lib.h"
33#ifdef APR_HAVE_LIMITS_H
34#include <limits.h>
35#endif
36#if APR_HAVE_STDLIB_H
37#include <stdlib.h>
38#endif
39#include "libsed.h"
40#include "regexp.h"
41#include "sed.h"
42
43#define GETC() ((unsigned char)*sp++)
44#define PEEKC() ((unsigned char)*sp)
45#define UNGETC(c) (--sp)
46#define SEDCOMPILE_ERROR(c) { \
47            regerrno = c; \
48            goto out; \
49            }
50#define ecmp(s1, s2, n)    (strncmp(s1, s2, n) == 0)
51#define uletter(c) (isalpha(c) || c == '_')
52
53
54static unsigned char bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
55
56static int regerr(sed_commands_t *commands, int err);
57static void comperr(sed_commands_t *commands, char *msg);
58static void getrnge(char *str, step_vars_storage *vars);
59static int _advance(char *, char *, step_vars_storage *);
60extern int sed_step(char *p1, char *p2, int circf, step_vars_storage *vars);
61
62
63static void comperr(sed_commands_t *commands, char *msg)
64{
65    command_errf(commands, msg, commands->linebuf);
66}
67
68/*
69*/
70static int regerr(sed_commands_t *commands, int err)
71{
72    switch(err) {
73    case 0:
74        /* No error */
75        break;
76    case 11:
77        comperr(commands, "Range endpoint too large: %s");
78        break;
79
80    case 16:
81        comperr(commands, "Bad number: %s");
82        break;
83
84    case 25:
85        comperr(commands, "``\\digit'' out of range: %s");
86        break;
87
88    case 36:
89        comperr(commands, "Illegal or missing delimiter: %s");
90        break;
91
92    case 41:
93        comperr(commands, "No remembered search string: %s");
94        break;
95
96    case 42:
97        comperr(commands, "\\( \\) imbalance: %s");
98        break;
99
100    case 43:
101        comperr(commands, "Too many \\(: %s");
102        break;
103
104    case 44:
105        comperr(commands, "More than 2 numbers given in \\{ \\}: %s");
106        break;
107
108    case 45:
109        comperr(commands, "} expected after \\: %s");
110        break;
111
112    case 46:
113        comperr(commands, "First number exceeds second in \\{ \\}: %s");
114        break;
115
116    case 49:
117        comperr(commands, "[ ] imbalance: %s");
118        break;
119
120    case 50:
121        comperr(commands, SEDERR_TMMES);
122        break;
123
124    default:
125        comperr(commands, "Unknown regexp error code %s\n");
126        break;
127    }
128    return (0);
129}
130
131
132char *sed_compile(sed_commands_t *commands, sed_comp_args *compargs,
133                  char *ep, char *endbuf, int seof)
134{
135    int c;
136    int eof = seof;
137    char *lastep;
138    int cclcnt;
139    char bracket[NBRA], *bracketp;
140    int closed;
141    int neg;
142    int lc;
143    int i, cflg;
144    int iflag; /* used for non-ascii characters in brackets */
145    char *sp = commands->cp;
146    int regerrno = 0;
147
148    lastep = 0;
149    if ((c = GETC()) == eof || c == '\n') {
150        if (c == '\n') {
151            UNGETC(c);
152        }
153        commands->cp = sp;
154        goto out;
155    }
156    bracketp = bracket;
157    compargs->circf = closed = compargs->nbra = 0;
158    if (c == '^')
159        compargs->circf++;
160    else
161        UNGETC(c);
162    while (1) {
163        if (ep >= endbuf)
164            SEDCOMPILE_ERROR(50);
165        c = GETC();
166        if (c != '*' && ((c != '\\') || (PEEKC() != '{')))
167            lastep = ep;
168        if (c == eof) {
169            *ep++ = CCEOF;
170            if (bracketp != bracket)
171                SEDCOMPILE_ERROR(42);
172            commands->cp = sp;
173            goto out;
174        }
175        switch (c) {
176
177        case '.':
178            *ep++ = CDOT;
179            continue;
180
181        case '\n':
182            SEDCOMPILE_ERROR(36);
183            commands->cp = sp;
184            goto out;
185        case '*':
186            if (lastep == 0 || *lastep == CBRA || *lastep == CKET)
187                goto defchar;
188            *lastep |= STAR;
189            continue;
190
191        case '$':
192            if (PEEKC() != eof && PEEKC() != '\n')
193                goto defchar;
194            *ep++ = CDOL;
195            continue;
196
197        case '[':
198            if (&ep[17] >= endbuf)
199                SEDCOMPILE_ERROR(50);
200
201            *ep++ = CCL;
202            lc = 0;
203            for (i = 0; i < 16; i++)
204                ep[i] = 0;
205
206            neg = 0;
207            if ((c = GETC()) == '^') {
208                neg = 1;
209                c = GETC();
210            }
211            iflag = 1;
212            do {
213                c &= 0377;
214                if (c == '\0' || c == '\n')
215                    SEDCOMPILE_ERROR(49);
216                if ((c & 0200) && iflag) {
217                    iflag = 0;
218                    if (&ep[32] >= endbuf)
219                        SEDCOMPILE_ERROR(50);
220                    ep[-1] = CXCL;
221                    for (i = 16; i < 32; i++)
222                        ep[i] = 0;
223                }
224                if (c == '-' && lc != 0) {
225                    if ((c = GETC()) == ']') {
226                        PLACE('-');
227                        break;
228                    }
229                    if ((c & 0200) && iflag) {
230                        iflag = 0;
231                        if (&ep[32] >= endbuf)
232                            SEDCOMPILE_ERROR(50);
233                        ep[-1] = CXCL;
234                        for (i = 16; i < 32; i++)
235                            ep[i] = 0;
236                    }
237                    while (lc < c) {
238                        PLACE(lc);
239                        lc++;
240                    }
241                }
242                lc = c;
243                PLACE(c);
244            } while ((c = GETC()) != ']');
245
246            if (iflag)
247                iflag = 16;
248            else
249                iflag = 32;
250
251            if (neg) {
252                if (iflag == 32) {
253                    for (cclcnt = 0; cclcnt < iflag;
254                        cclcnt++)
255                        ep[cclcnt] ^= 0377;
256                    ep[0] &= 0376;
257                } else {
258                    ep[-1] = NCCL;
259                    /* make nulls match so test fails */
260                    ep[0] |= 01;
261                }
262            }
263
264            ep += iflag;
265
266            continue;
267
268        case '\\':
269            switch (c = GETC()) {
270
271            case '(':
272                if (compargs->nbra >= NBRA)
273                    SEDCOMPILE_ERROR(43);
274                *bracketp++ = compargs->nbra;
275                *ep++ = CBRA;
276                *ep++ = compargs->nbra++;
277                continue;
278
279            case ')':
280                if (bracketp <= bracket)
281                    SEDCOMPILE_ERROR(42);
282                *ep++ = CKET;
283                *ep++ = *--bracketp;
284                closed++;
285                continue;
286
287            case '{':
288                if (lastep == (char *) 0)
289                    goto defchar;
290                *lastep |= RNGE;
291                cflg = 0;
292            nlim:
293                c = GETC();
294                i = 0;
295                do {
296                    if ('0' <= c && c <= '9')
297                        i = 10 * i + c - '0';
298                    else
299                        SEDCOMPILE_ERROR(16);
300                } while (((c = GETC()) != '\\') && (c != ','));
301                if (i >= 255)
302                    SEDCOMPILE_ERROR(11);
303                *ep++ = i;
304                if (c == ',') {
305                    if (cflg++)
306                        SEDCOMPILE_ERROR(44);
307                    if ((c = GETC()) == '\\')
308                        *ep++ = (char) 255;
309                    else {
310                        UNGETC(c);
311                        goto nlim;
312                        /* get 2'nd number */
313                    }
314                }
315                if (GETC() != '}')
316                    SEDCOMPILE_ERROR(45);
317                if (!cflg)    /* one number */
318                    *ep++ = i;
319                else if ((ep[-1] & 0377) < (ep[-2] & 0377))
320                    SEDCOMPILE_ERROR(46);
321                continue;
322
323            case '\n':
324                SEDCOMPILE_ERROR(36);
325
326            case 'n':
327                c = '\n';
328                goto defchar;
329
330            default:
331                if (c >= '1' && c <= '9') {
332                    if ((c -= '1') >= closed)
333                        SEDCOMPILE_ERROR(25);
334                    *ep++ = CBACK;
335                    *ep++ = c;
336                    continue;
337                }
338            }
339    /* Drop through to default to use \ to turn off special chars */
340
341        defchar:
342        default:
343            lastep = ep;
344            *ep++ = CCHR;
345            *ep++ = c;
346        }
347    }
348out:
349    if (regerrno) {
350        regerr(commands, regerrno);
351        return (char*) NULL;
352    }
353    /* XXX : Basant : what extra */
354    /* int reglength = (int)(ep - expbuf); */
355    return ep;
356}
357
358int sed_step(char *p1, char *p2, int circf, step_vars_storage *vars)
359{
360    int c;
361
362
363    if (circf) {
364        vars->loc1 = p1;
365        return (_advance(p1, p2, vars));
366    }
367    /* fast check for first character */
368    if (*p2 == CCHR) {
369        c = p2[1];
370        do {
371            if (*p1 != c)
372                continue;
373            if (_advance(p1, p2, vars)) {
374                vars->loc1 = p1;
375                return (1);
376            }
377        } while (*p1++);
378        return (0);
379    }
380        /* regular algorithm */
381    do {
382        if (_advance(p1, p2, vars)) {
383            vars->loc1 = p1;
384            return (1);
385        }
386    } while (*p1++);
387    return (0);
388}
389
390static int _advance(char *lp, char *ep, step_vars_storage *vars)
391{
392    char *curlp;
393    int c;
394    char *bbeg;
395    char neg;
396    int ct;
397    int epint; /* int value of *ep */
398
399    while (1) {
400        neg = 0;
401        switch (*ep++) {
402
403        case CCHR:
404            if (*ep++ == *lp++)
405                continue;
406            return (0);
407
408        case CDOT:
409            if (*lp++)
410                continue;
411            return (0);
412
413        case CDOL:
414            if (*lp == 0)
415                continue;
416            return (0);
417
418        case CCEOF:
419            vars->loc2 = lp;
420            return (1);
421
422        case CXCL:
423            c = (unsigned char)*lp++;
424            if (ISTHERE(c)) {
425                ep += 32;
426                continue;
427            }
428            return (0);
429
430        case NCCL:
431            neg = 1;
432
433        case CCL:
434            c = *lp++;
435            if (((c & 0200) == 0 && ISTHERE(c)) ^ neg) {
436                ep += 16;
437                continue;
438            }
439            return (0);
440
441        case CBRA:
442            epint = (int) *ep;
443            vars->braslist[epint] = lp;
444            ep++;
445            continue;
446
447        case CKET:
448            epint = (int) *ep;
449            vars->braelist[epint] = lp;
450            ep++;
451            continue;
452
453        case CCHR | RNGE:
454            c = *ep++;
455            getrnge(ep, vars);
456            while (vars->low--)
457                if (*lp++ != c)
458                    return (0);
459            curlp = lp;
460            while (vars->size--)
461                if (*lp++ != c)
462                    break;
463            if (vars->size < 0)
464                lp++;
465            ep += 2;
466            goto star;
467
468        case CDOT | RNGE:
469            getrnge(ep, vars);
470            while (vars->low--)
471                if (*lp++ == '\0')
472                    return (0);
473            curlp = lp;
474            while (vars->size--)
475                if (*lp++ == '\0')
476                    break;
477            if (vars->size < 0)
478                lp++;
479            ep += 2;
480            goto star;
481
482        case CXCL | RNGE:
483            getrnge(ep + 32, vars);
484            while (vars->low--) {
485                c = (unsigned char)*lp++;
486                if (!ISTHERE(c))
487                    return (0);
488            }
489            curlp = lp;
490            while (vars->size--) {
491                c = (unsigned char)*lp++;
492                if (!ISTHERE(c))
493                    break;
494            }
495            if (vars->size < 0)
496                lp++;
497            ep += 34;        /* 32 + 2 */
498            goto star;
499
500        case NCCL | RNGE:
501            neg = 1;
502
503        case CCL | RNGE:
504            getrnge(ep + 16, vars);
505            while (vars->low--) {
506                c = *lp++;
507                if (((c & 0200) || !ISTHERE(c)) ^ neg)
508                    return (0);
509            }
510            curlp = lp;
511            while (vars->size--) {
512                c = *lp++;
513                if (((c & 0200) || !ISTHERE(c)) ^ neg)
514                    break;
515            }
516            if (vars->size < 0)
517                lp++;
518            ep += 18;         /* 16 + 2 */
519            goto star;
520
521        case CBACK:
522            epint = (int) *ep;
523            bbeg = vars->braslist[epint];
524            ct = vars->braelist[epint] - bbeg;
525            ep++;
526
527            if (ecmp(bbeg, lp, ct)) {
528                lp += ct;
529                continue;
530            }
531            return (0);
532
533        case CBACK | STAR:
534            epint = (int) *ep;
535            bbeg = vars->braslist[epint];
536            ct = vars->braelist[epint] - bbeg;
537            ep++;
538            curlp = lp;
539            while (ecmp(bbeg, lp, ct))
540                lp += ct;
541
542            while (lp >= curlp) {
543                if (_advance(lp, ep, vars))
544                    return (1);
545                lp -= ct;
546            }
547            return (0);
548
549
550        case CDOT | STAR:
551            curlp = lp;
552            while (*lp++);
553            goto star;
554
555        case CCHR | STAR:
556            curlp = lp;
557            while (*lp++ == *ep);
558            ep++;
559            goto star;
560
561        case CXCL | STAR:
562            curlp = lp;
563            do {
564                c = (unsigned char)*lp++;
565            } while (ISTHERE(c));
566            ep += 32;
567            goto star;
568
569        case NCCL | STAR:
570            neg = 1;
571
572        case CCL | STAR:
573            curlp = lp;
574            do {
575                c = *lp++;
576            } while (((c & 0200) == 0 && ISTHERE(c)) ^ neg);
577            ep += 16;
578            goto star;
579
580        star:
581            do {
582                if (--lp == vars->locs)
583                    break;
584                if (_advance(lp, ep, vars))
585                    return (1);
586            } while (lp > curlp);
587            return (0);
588
589        }
590    }
591}
592
593static void getrnge(char *str, step_vars_storage *vars)
594{
595    vars->low = *str++ & 0377;
596    vars->size = ((*str & 0377) == 255)? 20000: (*str &0377) - vars->low;
597}
598
599
600