Cross Reference: /freebsd-9.3-release/lib/libc/regex/regcomp.c

Deleted Added

sdiff udiff text old ( 49094 ) new ( 62232 )

full compact

regcomp.c (49094)	regcomp.c (62232)
1/- 2 Copyright (c) 1992, 1993, 1994 Henry Spencer. 3 * Copyright (c) 1992, 1993, 1994 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Henry Spencer. 8 * --- 21 unchanged lines hidden (view full) --- 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)regcomp.c 8.5 (Berkeley) 3/20/94	1/- 2 Copyright (c) 1992, 1993, 1994 Henry Spencer. 3 * Copyright (c) 1992, 1993, 1994 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Henry Spencer. 8 * --- 21 unchanged lines hidden (view full) --- 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)regcomp.c 8.5 (Berkeley) 3/20/94
	38 * 39 * $FreeBSD: head/lib/libc/regex/regcomp.c 62232 2000-06-29 04:48:34Z dcs $
38 / 39 40#if defined(LIBC_SCCS) && !defined(lint) 41static char sccsid[] = "@(#)regcomp.c 8.5 (Berkeley) 3/20/94"; 42#endif / LIBC_SCCS and not lint / 43 44#include <sys/types.h> 45#include <stdio.h> --- 71 unchanged lines hidden* (view full) --- 117static void categorize __P((struct parse p, struct re_guts g)); 118static sopno dupl __P((struct parse p, sopno start, sopno finish)); 119static void doemit __P((struct parse p, sop op, size_t opnd)); 120static void doinsert __P((struct parse p, sop op, size_t opnd, sopno pos)); 121static void dofwd __P((struct parse p, sopno pos, sop value)); 122static void enlarge __P((struct parse p, sopno size)); 123static void stripsnug __P((struct parse p, struct re_guts g)); 124static void findmust __P((struct parse p, struct re_guts *g));	40 / 41 42#if defined(LIBC_SCCS) && !defined(lint) 43static char sccsid[] = "@(#)regcomp.c 8.5 (Berkeley) 3/20/94"; 44#endif / LIBC_SCCS and not lint / 45 46#include <sys/types.h> 47#include <stdio.h> --- 71 unchanged lines hidden* (view full) --- 119static void categorize __P((struct parse p, struct re_guts g)); 120static sopno dupl __P((struct parse p, sopno start, sopno finish)); 121static void doemit __P((struct parse p, sop op, size_t opnd)); 122static void doinsert __P((struct parse p, sop op, size_t opnd, sopno pos)); 123static void dofwd __P((struct parse p, sopno pos, sop value)); 124static void enlarge __P((struct parse p, sopno size)); 125static void stripsnug __P((struct parse p, struct re_guts g)); 126static void findmust __P((struct parse p, struct re_guts *g));
	127static void computejumps __P((struct parse p, struct re_guts g)); 128static void computematchjumps __P((struct parse p, struct re_guts g));
125static sopno pluscount __P((struct parse p, struct re_guts g)); 126 127#ifdef __cplusplus 128} 129#endif 130/* ========= end header generated by ./mkh ========= / 131* 132static char nuls[10]; /* place to point scanner in event of error / --- 29 unchanged lines hidden* (view full) --- 162#define DROP(n) (p->slen -= (n)) 163 164#ifndef NDEBUG 165static int never = 0; /* for use in asserts; shuts lint up / 166#else 167#define never 0 / some <assert.h>s have bugs too / 168#endif 169*	129static sopno pluscount __P((struct parse p, struct re_guts g)); 130 131#ifdef __cplusplus 132} 133#endif 134/* ========= end header generated by ./mkh ========= / 135* 136static char nuls[10]; /* place to point scanner in event of error / --- 29 unchanged lines hidden* (view full) --- 166#define DROP(n) (p->slen -= (n)) 167 168#ifndef NDEBUG 169static int never = 0; /* for use in asserts; shuts lint up / 170#else 171#define never 0 / some <assert.h>s have bugs too / 172#endif 173*
	174/* Macro used by computejump()/computematchjump() / 175#define MIN(a,b) ((a)<(b)?(a):(b)) 176*
170/* 171 - regcomp - interface for parser and compilation 172 = extern int regcomp(regex_t , const char , int); 173 = #define REG_BASIC 0000 174 = #define REG_EXTENDED 0001 175 = #define REG_ICASE 0002 176 = #define REG_NOSUB 0004 177 = #define REG_NEWLINE 0010 --- 79 unchanged lines hidden (view full) --- 257 p_bre(p, OUT, OUT); 258 EMIT(OEND, 0); 259 g->laststate = THERE(); 260 261 /* tidy up loose ends and fill things in / 262* categorize(p, g); 263 stripsnug(p, g); 264 findmust(p, g);	177/* 178 - regcomp - interface for parser and compilation 179 = extern int regcomp(regex_t , const char , int); 180 = #define REG_BASIC 0000 181 = #define REG_EXTENDED 0001 182 = #define REG_ICASE 0002 183 = #define REG_NOSUB 0004 184 = #define REG_NEWLINE 0010 --- 79 unchanged lines hidden (view full) --- 264 p_bre(p, OUT, OUT); 265 EMIT(OEND, 0); 266 g->laststate = THERE(); 267 268 /* tidy up loose ends and fill things in / 269* categorize(p, g); 270 stripsnug(p, g); 271 findmust(p, g);
	272 /* only use Boyer-Moore algorithm if the pattern is bigger 273 * than three characters 274 / 275* if(g->mlen > 3) { 276 computejumps(p, g); 277 computematchjumps(p, g); 278 if(g->matchjump == NULL) { 279 free(g->charjump); 280 g->charjump = NULL; 281 } 282 }
265 g->nplus = pluscount(p, g); 266 g->magic = MAGIC2; 267 preg->re_nsub = g->nsub; 268 preg->re_g = g; 269 preg->re_magic = MAGIC1; 270#ifndef REDEBUG 271 /* not debugging, so can't rely on the assert() in regexec() / 272* if (g->iflags&BAD) --- 1463 unchanged lines hidden (view full) --- 1736 assert(cp < g->must + g->mlen); 1737 cp++ = (char)OPND(s); 1738* } 1739 assert(cp == g->must + g->mlen); 1740 cp++ = '\0'; / just on general principles / 1741} 1742* 1743/*	283 g->nplus = pluscount(p, g); 284 g->magic = MAGIC2; 285 preg->re_nsub = g->nsub; 286 preg->re_g = g; 287 preg->re_magic = MAGIC1; 288#ifndef REDEBUG 289 /* not debugging, so can't rely on the assert() in regexec() / 290* if (g->iflags&BAD) --- 1463 unchanged lines hidden (view full) --- 1754 assert(cp < g->must + g->mlen); 1755 cp++ = (char)OPND(s); 1756* } 1757 assert(cp == g->must + g->mlen); 1758 cp++ = '\0'; / just on general principles / 1759} 1760* 1761/*
	1762 - computejumps - compute char jumps for BM scan 1763 == static void computejumps(register struct parse p, register struct re_guts g); 1764 * 1765 * This algorithm assumes g->must exists and is has size greater than 1766 * zero. It's based on the algorithm found on Computer Algorithms by 1767 * Sara Baase. 1768 * 1769 * A char jump is the number of characters one needs to jump based on 1770 * the value of the character from the text that was mismatched. 1771 / 1772static void 1773computejumps(p, g) 1774struct parse p; 1775struct re_guts g; 1776{ 1777* int ch; 1778 int mindex; 1779 1780 /* Avoid making errors worse / 1781* if (p->error != 0) 1782 return; 1783 1784 g->charjump = malloc(256 * sizeof(int)); 1785 if (g->charjump == NULL) /* Not a fatal error / 1786* return; 1787 1788 /* If the character does not exist in the pattern, the jump 1789 * is equal to the number of characters in the pattern. 1790 / 1791* for (ch = 0; ch < 256; ch++) 1792 g->charjump[ch] = g->mlen; 1793 1794 /* If the character does exist, compute the jump that would 1795 * take us to the last character in the pattern equal to it 1796 * (notice that we match right to left, so that last character 1797 * is the first one that would be matched). 1798 / 1799* for (mindex = 0; mindex < g->mlen; mindex++) 1800 g->charjump[g->must[mindex]] = g->mlen - mindex - 1; 1801} 1802 1803/* 1804 - computematchjumps - compute match jumps for BM scan 1805 == static void computematchjumps(register struct parse p, register struct re_guts g); 1806 * 1807 * This algorithm assumes g->must exists and is has size greater than 1808 * zero. It's based on the algorithm found on Computer Algorithms by 1809 * Sara Baase. 1810 * 1811 * A match jump is the number of characters one needs to advance based 1812 * on the already-matched suffix. 1813 * Notice that all values here are minus (g->mlen-1), because of the way 1814 * the search algorithm works. 1815 / 1816static void 1817computematchjumps(p, g) 1818struct parse p; 1819struct re_guts g; 1820{ 1821* int mindex; /* General "must" iterator / 1822* int suffix; /* Keeps track of matching suffix / 1823* int ssuffix; /* Keeps track of suffixes' suffix / 1824* int* pmatches; /* pmatches[k] points to the next i 1825 * such that i+1...mlen is a substring 1826 * of k+1...k+mlen-i-1 1827 / 1828* 1829 /* Avoid making errors worse / 1830* if (p->error != 0) 1831 return; 1832 1833 pmatches = malloc(g->mlen * sizeof(unsigned int)); 1834 if (pmatches == NULL) { 1835 g->matchjump = NULL; 1836 return; 1837 } 1838 1839 g->matchjump = malloc(g->mlen * sizeof(unsigned int)); 1840 if (g->matchjump == NULL) /* Not a fatal error / 1841* return; 1842 1843 /* Set maximum possible jump for each character in the pattern / 1844* for (mindex = 0; mindex < g->mlen; mindex++) 1845 g->matchjump[mindex] = 2g->mlen - mindex - 1; 1846* 1847 /* Compute pmatches[] / 1848* for (mindex = g->mlen - 1, suffix = g->mlen; mindex >= 0; 1849 mindex--, suffix--) { 1850 pmatches[mindex] = suffix; 1851 1852 /* If a mismatch is found, interrupting the substring, 1853 * compute the matchjump for that position. If no 1854 * mismatch is found, then a text substring mismatched 1855 * against the suffix will also mismatch against the 1856 * substring. 1857 / 1858* while (suffix < g->mlen 1859 && g->must[mindex] != g->must[suffix]) { 1860 g->matchjump[suffix] = MIN(g->matchjump[suffix], 1861 g->mlen - mindex - 1); 1862 suffix = pmatches[suffix]; 1863 } 1864 } 1865 1866 /* Compute the matchjump up to the last substring found to jump 1867 * to the beginning of the largest must pattern prefix matching 1868 * it's own suffix. 1869 / 1870* for (mindex = 0; mindex <= suffix; mindex++) 1871 g->matchjump[mindex] = MIN(g->matchjump[mindex], 1872 g->mlen + suffix - mindex); 1873 1874 ssuffix = pmatches[suffix]; 1875 while(suffix < g->mlen) { 1876 while(suffix <= ssuffix) { 1877 g->matchjump[suffix] = MIN(g->matchjump[suffix], 1878 g->mlen + ssuffix - suffix); 1879 suffix++; 1880 } 1881 ssuffix = pmatches[ssuffix]; 1882 } 1883 1884 free(pmatches); 1885} 1886 1887/*
1744 - pluscount - count + nesting 1745 == static sopno pluscount(register struct parse p, register struct re_guts g); 1746 / 1747static sopno / nesting depth / 1748pluscount(p, g) 1749struct parse p; 1750register struct re_guts g; 1751{ --- 26 unchanged lines hidden* ---	1888 - pluscount - count + nesting 1889 == static sopno pluscount(register struct parse p, register struct re_guts g); 1890 / 1891static sopno / nesting depth / 1892pluscount(p, g) 1893struct parse p; 1894register struct re_guts g; 1895{ --- 26 unchanged lines hidden* ---