Deleted Added
full compact
regcomp.c (49094) regcomp.c (62232)
1/*-
2 * Copyright (c) 1992, 1993, 1994 Henry Spencer.
3 * Copyright (c) 1992, 1993, 1994
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Henry Spencer.
8 *

--- 21 unchanged lines hidden (view full) ---

30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * @(#)regcomp.c 8.5 (Berkeley) 3/20/94
1/*-
2 * Copyright (c) 1992, 1993, 1994 Henry Spencer.
3 * Copyright (c) 1992, 1993, 1994
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Henry Spencer.
8 *

--- 21 unchanged lines hidden (view full) ---

30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * @(#)regcomp.c 8.5 (Berkeley) 3/20/94
38 *
39 * $FreeBSD: head/lib/libc/regex/regcomp.c 62232 2000-06-29 04:48:34Z dcs $
38 */
39
40#if defined(LIBC_SCCS) && !defined(lint)
41static char sccsid[] = "@(#)regcomp.c 8.5 (Berkeley) 3/20/94";
42#endif /* LIBC_SCCS and not lint */
43
44#include <sys/types.h>
45#include <stdio.h>

--- 71 unchanged lines hidden (view full) ---

117static void categorize __P((struct parse *p, struct re_guts *g));
118static sopno dupl __P((struct parse *p, sopno start, sopno finish));
119static void doemit __P((struct parse *p, sop op, size_t opnd));
120static void doinsert __P((struct parse *p, sop op, size_t opnd, sopno pos));
121static void dofwd __P((struct parse *p, sopno pos, sop value));
122static void enlarge __P((struct parse *p, sopno size));
123static void stripsnug __P((struct parse *p, struct re_guts *g));
124static void findmust __P((struct parse *p, struct re_guts *g));
40 */
41
42#if defined(LIBC_SCCS) && !defined(lint)
43static char sccsid[] = "@(#)regcomp.c 8.5 (Berkeley) 3/20/94";
44#endif /* LIBC_SCCS and not lint */
45
46#include <sys/types.h>
47#include <stdio.h>

--- 71 unchanged lines hidden (view full) ---

119static void categorize __P((struct parse *p, struct re_guts *g));
120static sopno dupl __P((struct parse *p, sopno start, sopno finish));
121static void doemit __P((struct parse *p, sop op, size_t opnd));
122static void doinsert __P((struct parse *p, sop op, size_t opnd, sopno pos));
123static void dofwd __P((struct parse *p, sopno pos, sop value));
124static void enlarge __P((struct parse *p, sopno size));
125static void stripsnug __P((struct parse *p, struct re_guts *g));
126static void findmust __P((struct parse *p, struct re_guts *g));
127static void computejumps __P((struct parse *p, struct re_guts *g));
128static void computematchjumps __P((struct parse *p, struct re_guts *g));
125static sopno pluscount __P((struct parse *p, struct re_guts *g));
126
127#ifdef __cplusplus
128}
129#endif
130/* ========= end header generated by ./mkh ========= */
131
132static char nuls[10]; /* place to point scanner in event of error */

--- 29 unchanged lines hidden (view full) ---

162#define DROP(n) (p->slen -= (n))
163
164#ifndef NDEBUG
165static int never = 0; /* for use in asserts; shuts lint up */
166#else
167#define never 0 /* some <assert.h>s have bugs too */
168#endif
169
129static sopno pluscount __P((struct parse *p, struct re_guts *g));
130
131#ifdef __cplusplus
132}
133#endif
134/* ========= end header generated by ./mkh ========= */
135
136static char nuls[10]; /* place to point scanner in event of error */

--- 29 unchanged lines hidden (view full) ---

166#define DROP(n) (p->slen -= (n))
167
168#ifndef NDEBUG
169static int never = 0; /* for use in asserts; shuts lint up */
170#else
171#define never 0 /* some <assert.h>s have bugs too */
172#endif
173
174/* Macro used by computejump()/computematchjump() */
175#define MIN(a,b) ((a)<(b)?(a):(b))
176
170/*
171 - regcomp - interface for parser and compilation
172 = extern int regcomp(regex_t *, const char *, int);
173 = #define REG_BASIC 0000
174 = #define REG_EXTENDED 0001
175 = #define REG_ICASE 0002
176 = #define REG_NOSUB 0004
177 = #define REG_NEWLINE 0010

--- 79 unchanged lines hidden (view full) ---

257 p_bre(p, OUT, OUT);
258 EMIT(OEND, 0);
259 g->laststate = THERE();
260
261 /* tidy up loose ends and fill things in */
262 categorize(p, g);
263 stripsnug(p, g);
264 findmust(p, g);
177/*
178 - regcomp - interface for parser and compilation
179 = extern int regcomp(regex_t *, const char *, int);
180 = #define REG_BASIC 0000
181 = #define REG_EXTENDED 0001
182 = #define REG_ICASE 0002
183 = #define REG_NOSUB 0004
184 = #define REG_NEWLINE 0010

--- 79 unchanged lines hidden (view full) ---

264 p_bre(p, OUT, OUT);
265 EMIT(OEND, 0);
266 g->laststate = THERE();
267
268 /* tidy up loose ends and fill things in */
269 categorize(p, g);
270 stripsnug(p, g);
271 findmust(p, g);
272 /* only use Boyer-Moore algorithm if the pattern is bigger
273 * than three characters
274 */
275 if(g->mlen > 3) {
276 computejumps(p, g);
277 computematchjumps(p, g);
278 if(g->matchjump == NULL) {
279 free(g->charjump);
280 g->charjump = NULL;
281 }
282 }
265 g->nplus = pluscount(p, g);
266 g->magic = MAGIC2;
267 preg->re_nsub = g->nsub;
268 preg->re_g = g;
269 preg->re_magic = MAGIC1;
270#ifndef REDEBUG
271 /* not debugging, so can't rely on the assert() in regexec() */
272 if (g->iflags&BAD)

--- 1463 unchanged lines hidden (view full) ---

1736 assert(cp < g->must + g->mlen);
1737 *cp++ = (char)OPND(s);
1738 }
1739 assert(cp == g->must + g->mlen);
1740 *cp++ = '\0'; /* just on general principles */
1741}
1742
1743/*
283 g->nplus = pluscount(p, g);
284 g->magic = MAGIC2;
285 preg->re_nsub = g->nsub;
286 preg->re_g = g;
287 preg->re_magic = MAGIC1;
288#ifndef REDEBUG
289 /* not debugging, so can't rely on the assert() in regexec() */
290 if (g->iflags&BAD)

--- 1463 unchanged lines hidden (view full) ---

1754 assert(cp < g->must + g->mlen);
1755 *cp++ = (char)OPND(s);
1756 }
1757 assert(cp == g->must + g->mlen);
1758 *cp++ = '\0'; /* just on general principles */
1759}
1760
1761/*
1762 - computejumps - compute char jumps for BM scan
1763 == static void computejumps(register struct parse *p, register struct re_guts *g);
1764 *
1765 * This algorithm assumes g->must exists and is has size greater than
1766 * zero. It's based on the algorithm found on Computer Algorithms by
1767 * Sara Baase.
1768 *
1769 * A char jump is the number of characters one needs to jump based on
1770 * the value of the character from the text that was mismatched.
1771 */
1772static void
1773computejumps(p, g)
1774struct parse *p;
1775struct re_guts *g;
1776{
1777 int ch;
1778 int mindex;
1779
1780 /* Avoid making errors worse */
1781 if (p->error != 0)
1782 return;
1783
1784 g->charjump = malloc(256 * sizeof(int));
1785 if (g->charjump == NULL) /* Not a fatal error */
1786 return;
1787
1788 /* If the character does not exist in the pattern, the jump
1789 * is equal to the number of characters in the pattern.
1790 */
1791 for (ch = 0; ch < 256; ch++)
1792 g->charjump[ch] = g->mlen;
1793
1794 /* If the character does exist, compute the jump that would
1795 * take us to the last character in the pattern equal to it
1796 * (notice that we match right to left, so that last character
1797 * is the first one that would be matched).
1798 */
1799 for (mindex = 0; mindex < g->mlen; mindex++)
1800 g->charjump[g->must[mindex]] = g->mlen - mindex - 1;
1801}
1802
1803/*
1804 - computematchjumps - compute match jumps for BM scan
1805 == static void computematchjumps(register struct parse *p, register struct re_guts *g);
1806 *
1807 * This algorithm assumes g->must exists and is has size greater than
1808 * zero. It's based on the algorithm found on Computer Algorithms by
1809 * Sara Baase.
1810 *
1811 * A match jump is the number of characters one needs to advance based
1812 * on the already-matched suffix.
1813 * Notice that all values here are minus (g->mlen-1), because of the way
1814 * the search algorithm works.
1815 */
1816static void
1817computematchjumps(p, g)
1818struct parse *p;
1819struct re_guts *g;
1820{
1821 int mindex; /* General "must" iterator */
1822 int suffix; /* Keeps track of matching suffix */
1823 int ssuffix; /* Keeps track of suffixes' suffix */
1824 int* pmatches; /* pmatches[k] points to the next i
1825 * such that i+1...mlen is a substring
1826 * of k+1...k+mlen-i-1
1827 */
1828
1829 /* Avoid making errors worse */
1830 if (p->error != 0)
1831 return;
1832
1833 pmatches = malloc(g->mlen * sizeof(unsigned int));
1834 if (pmatches == NULL) {
1835 g->matchjump = NULL;
1836 return;
1837 }
1838
1839 g->matchjump = malloc(g->mlen * sizeof(unsigned int));
1840 if (g->matchjump == NULL) /* Not a fatal error */
1841 return;
1842
1843 /* Set maximum possible jump for each character in the pattern */
1844 for (mindex = 0; mindex < g->mlen; mindex++)
1845 g->matchjump[mindex] = 2*g->mlen - mindex - 1;
1846
1847 /* Compute pmatches[] */
1848 for (mindex = g->mlen - 1, suffix = g->mlen; mindex >= 0;
1849 mindex--, suffix--) {
1850 pmatches[mindex] = suffix;
1851
1852 /* If a mismatch is found, interrupting the substring,
1853 * compute the matchjump for that position. If no
1854 * mismatch is found, then a text substring mismatched
1855 * against the suffix will also mismatch against the
1856 * substring.
1857 */
1858 while (suffix < g->mlen
1859 && g->must[mindex] != g->must[suffix]) {
1860 g->matchjump[suffix] = MIN(g->matchjump[suffix],
1861 g->mlen - mindex - 1);
1862 suffix = pmatches[suffix];
1863 }
1864 }
1865
1866 /* Compute the matchjump up to the last substring found to jump
1867 * to the beginning of the largest must pattern prefix matching
1868 * it's own suffix.
1869 */
1870 for (mindex = 0; mindex <= suffix; mindex++)
1871 g->matchjump[mindex] = MIN(g->matchjump[mindex],
1872 g->mlen + suffix - mindex);
1873
1874 ssuffix = pmatches[suffix];
1875 while(suffix < g->mlen) {
1876 while(suffix <= ssuffix) {
1877 g->matchjump[suffix] = MIN(g->matchjump[suffix],
1878 g->mlen + ssuffix - suffix);
1879 suffix++;
1880 }
1881 ssuffix = pmatches[ssuffix];
1882 }
1883
1884 free(pmatches);
1885}
1886
1887/*
1744 - pluscount - count + nesting
1745 == static sopno pluscount(register struct parse *p, register struct re_guts *g);
1746 */
1747static sopno /* nesting depth */
1748pluscount(p, g)
1749struct parse *p;
1750register struct re_guts *g;
1751{

--- 26 unchanged lines hidden ---
1888 - pluscount - count + nesting
1889 == static sopno pluscount(register struct parse *p, register struct re_guts *g);
1890 */
1891static sopno /* nesting depth */
1892pluscount(p, g)
1893struct parse *p;
1894register struct re_guts *g;
1895{

--- 26 unchanged lines hidden ---