regcomp.c (49094) | regcomp.c (62232) |
---|---|
1/*- 2 * Copyright (c) 1992, 1993, 1994 Henry Spencer. 3 * Copyright (c) 1992, 1993, 1994 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Henry Spencer. 8 * --- 21 unchanged lines hidden (view full) --- 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)regcomp.c 8.5 (Berkeley) 3/20/94 | 1/*- 2 * Copyright (c) 1992, 1993, 1994 Henry Spencer. 3 * Copyright (c) 1992, 1993, 1994 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Henry Spencer. 8 * --- 21 unchanged lines hidden (view full) --- 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)regcomp.c 8.5 (Berkeley) 3/20/94 |
38 * 39 * $FreeBSD: head/lib/libc/regex/regcomp.c 62232 2000-06-29 04:48:34Z dcs $ |
|
38 */ 39 40#if defined(LIBC_SCCS) && !defined(lint) 41static char sccsid[] = "@(#)regcomp.c 8.5 (Berkeley) 3/20/94"; 42#endif /* LIBC_SCCS and not lint */ 43 44#include <sys/types.h> 45#include <stdio.h> --- 71 unchanged lines hidden (view full) --- 117static void categorize __P((struct parse *p, struct re_guts *g)); 118static sopno dupl __P((struct parse *p, sopno start, sopno finish)); 119static void doemit __P((struct parse *p, sop op, size_t opnd)); 120static void doinsert __P((struct parse *p, sop op, size_t opnd, sopno pos)); 121static void dofwd __P((struct parse *p, sopno pos, sop value)); 122static void enlarge __P((struct parse *p, sopno size)); 123static void stripsnug __P((struct parse *p, struct re_guts *g)); 124static void findmust __P((struct parse *p, struct re_guts *g)); | 40 */ 41 42#if defined(LIBC_SCCS) && !defined(lint) 43static char sccsid[] = "@(#)regcomp.c 8.5 (Berkeley) 3/20/94"; 44#endif /* LIBC_SCCS and not lint */ 45 46#include <sys/types.h> 47#include <stdio.h> --- 71 unchanged lines hidden (view full) --- 119static void categorize __P((struct parse *p, struct re_guts *g)); 120static sopno dupl __P((struct parse *p, sopno start, sopno finish)); 121static void doemit __P((struct parse *p, sop op, size_t opnd)); 122static void doinsert __P((struct parse *p, sop op, size_t opnd, sopno pos)); 123static void dofwd __P((struct parse *p, sopno pos, sop value)); 124static void enlarge __P((struct parse *p, sopno size)); 125static void stripsnug __P((struct parse *p, struct re_guts *g)); 126static void findmust __P((struct parse *p, struct re_guts *g)); |
127static void computejumps __P((struct parse *p, struct re_guts *g)); 128static void computematchjumps __P((struct parse *p, struct re_guts *g)); |
|
125static sopno pluscount __P((struct parse *p, struct re_guts *g)); 126 127#ifdef __cplusplus 128} 129#endif 130/* ========= end header generated by ./mkh ========= */ 131 132static char nuls[10]; /* place to point scanner in event of error */ --- 29 unchanged lines hidden (view full) --- 162#define DROP(n) (p->slen -= (n)) 163 164#ifndef NDEBUG 165static int never = 0; /* for use in asserts; shuts lint up */ 166#else 167#define never 0 /* some <assert.h>s have bugs too */ 168#endif 169 | 129static sopno pluscount __P((struct parse *p, struct re_guts *g)); 130 131#ifdef __cplusplus 132} 133#endif 134/* ========= end header generated by ./mkh ========= */ 135 136static char nuls[10]; /* place to point scanner in event of error */ --- 29 unchanged lines hidden (view full) --- 166#define DROP(n) (p->slen -= (n)) 167 168#ifndef NDEBUG 169static int never = 0; /* for use in asserts; shuts lint up */ 170#else 171#define never 0 /* some <assert.h>s have bugs too */ 172#endif 173 |
174/* Macro used by computejump()/computematchjump() */ 175#define MIN(a,b) ((a)<(b)?(a):(b)) 176 |
|
170/* 171 - regcomp - interface for parser and compilation 172 = extern int regcomp(regex_t *, const char *, int); 173 = #define REG_BASIC 0000 174 = #define REG_EXTENDED 0001 175 = #define REG_ICASE 0002 176 = #define REG_NOSUB 0004 177 = #define REG_NEWLINE 0010 --- 79 unchanged lines hidden (view full) --- 257 p_bre(p, OUT, OUT); 258 EMIT(OEND, 0); 259 g->laststate = THERE(); 260 261 /* tidy up loose ends and fill things in */ 262 categorize(p, g); 263 stripsnug(p, g); 264 findmust(p, g); | 177/* 178 - regcomp - interface for parser and compilation 179 = extern int regcomp(regex_t *, const char *, int); 180 = #define REG_BASIC 0000 181 = #define REG_EXTENDED 0001 182 = #define REG_ICASE 0002 183 = #define REG_NOSUB 0004 184 = #define REG_NEWLINE 0010 --- 79 unchanged lines hidden (view full) --- 264 p_bre(p, OUT, OUT); 265 EMIT(OEND, 0); 266 g->laststate = THERE(); 267 268 /* tidy up loose ends and fill things in */ 269 categorize(p, g); 270 stripsnug(p, g); 271 findmust(p, g); |
272 /* only use Boyer-Moore algorithm if the pattern is bigger 273 * than three characters 274 */ 275 if(g->mlen > 3) { 276 computejumps(p, g); 277 computematchjumps(p, g); 278 if(g->matchjump == NULL) { 279 free(g->charjump); 280 g->charjump = NULL; 281 } 282 } |
|
265 g->nplus = pluscount(p, g); 266 g->magic = MAGIC2; 267 preg->re_nsub = g->nsub; 268 preg->re_g = g; 269 preg->re_magic = MAGIC1; 270#ifndef REDEBUG 271 /* not debugging, so can't rely on the assert() in regexec() */ 272 if (g->iflags&BAD) --- 1463 unchanged lines hidden (view full) --- 1736 assert(cp < g->must + g->mlen); 1737 *cp++ = (char)OPND(s); 1738 } 1739 assert(cp == g->must + g->mlen); 1740 *cp++ = '\0'; /* just on general principles */ 1741} 1742 1743/* | 283 g->nplus = pluscount(p, g); 284 g->magic = MAGIC2; 285 preg->re_nsub = g->nsub; 286 preg->re_g = g; 287 preg->re_magic = MAGIC1; 288#ifndef REDEBUG 289 /* not debugging, so can't rely on the assert() in regexec() */ 290 if (g->iflags&BAD) --- 1463 unchanged lines hidden (view full) --- 1754 assert(cp < g->must + g->mlen); 1755 *cp++ = (char)OPND(s); 1756 } 1757 assert(cp == g->must + g->mlen); 1758 *cp++ = '\0'; /* just on general principles */ 1759} 1760 1761/* |
1762 - computejumps - compute char jumps for BM scan 1763 == static void computejumps(register struct parse *p, register struct re_guts *g); 1764 * 1765 * This algorithm assumes g->must exists and is has size greater than 1766 * zero. It's based on the algorithm found on Computer Algorithms by 1767 * Sara Baase. 1768 * 1769 * A char jump is the number of characters one needs to jump based on 1770 * the value of the character from the text that was mismatched. 1771 */ 1772static void 1773computejumps(p, g) 1774struct parse *p; 1775struct re_guts *g; 1776{ 1777 int ch; 1778 int mindex; 1779 1780 /* Avoid making errors worse */ 1781 if (p->error != 0) 1782 return; 1783 1784 g->charjump = malloc(256 * sizeof(int)); 1785 if (g->charjump == NULL) /* Not a fatal error */ 1786 return; 1787 1788 /* If the character does not exist in the pattern, the jump 1789 * is equal to the number of characters in the pattern. 1790 */ 1791 for (ch = 0; ch < 256; ch++) 1792 g->charjump[ch] = g->mlen; 1793 1794 /* If the character does exist, compute the jump that would 1795 * take us to the last character in the pattern equal to it 1796 * (notice that we match right to left, so that last character 1797 * is the first one that would be matched). 1798 */ 1799 for (mindex = 0; mindex < g->mlen; mindex++) 1800 g->charjump[g->must[mindex]] = g->mlen - mindex - 1; 1801} 1802 1803/* 1804 - computematchjumps - compute match jumps for BM scan 1805 == static void computematchjumps(register struct parse *p, register struct re_guts *g); 1806 * 1807 * This algorithm assumes g->must exists and is has size greater than 1808 * zero. It's based on the algorithm found on Computer Algorithms by 1809 * Sara Baase. 1810 * 1811 * A match jump is the number of characters one needs to advance based 1812 * on the already-matched suffix. 1813 * Notice that all values here are minus (g->mlen-1), because of the way 1814 * the search algorithm works. 1815 */ 1816static void 1817computematchjumps(p, g) 1818struct parse *p; 1819struct re_guts *g; 1820{ 1821 int mindex; /* General "must" iterator */ 1822 int suffix; /* Keeps track of matching suffix */ 1823 int ssuffix; /* Keeps track of suffixes' suffix */ 1824 int* pmatches; /* pmatches[k] points to the next i 1825 * such that i+1...mlen is a substring 1826 * of k+1...k+mlen-i-1 1827 */ 1828 1829 /* Avoid making errors worse */ 1830 if (p->error != 0) 1831 return; 1832 1833 pmatches = malloc(g->mlen * sizeof(unsigned int)); 1834 if (pmatches == NULL) { 1835 g->matchjump = NULL; 1836 return; 1837 } 1838 1839 g->matchjump = malloc(g->mlen * sizeof(unsigned int)); 1840 if (g->matchjump == NULL) /* Not a fatal error */ 1841 return; 1842 1843 /* Set maximum possible jump for each character in the pattern */ 1844 for (mindex = 0; mindex < g->mlen; mindex++) 1845 g->matchjump[mindex] = 2*g->mlen - mindex - 1; 1846 1847 /* Compute pmatches[] */ 1848 for (mindex = g->mlen - 1, suffix = g->mlen; mindex >= 0; 1849 mindex--, suffix--) { 1850 pmatches[mindex] = suffix; 1851 1852 /* If a mismatch is found, interrupting the substring, 1853 * compute the matchjump for that position. If no 1854 * mismatch is found, then a text substring mismatched 1855 * against the suffix will also mismatch against the 1856 * substring. 1857 */ 1858 while (suffix < g->mlen 1859 && g->must[mindex] != g->must[suffix]) { 1860 g->matchjump[suffix] = MIN(g->matchjump[suffix], 1861 g->mlen - mindex - 1); 1862 suffix = pmatches[suffix]; 1863 } 1864 } 1865 1866 /* Compute the matchjump up to the last substring found to jump 1867 * to the beginning of the largest must pattern prefix matching 1868 * it's own suffix. 1869 */ 1870 for (mindex = 0; mindex <= suffix; mindex++) 1871 g->matchjump[mindex] = MIN(g->matchjump[mindex], 1872 g->mlen + suffix - mindex); 1873 1874 ssuffix = pmatches[suffix]; 1875 while(suffix < g->mlen) { 1876 while(suffix <= ssuffix) { 1877 g->matchjump[suffix] = MIN(g->matchjump[suffix], 1878 g->mlen + ssuffix - suffix); 1879 suffix++; 1880 } 1881 ssuffix = pmatches[ssuffix]; 1882 } 1883 1884 free(pmatches); 1885} 1886 1887/* |
|
1744 - pluscount - count + nesting 1745 == static sopno pluscount(register struct parse *p, register struct re_guts *g); 1746 */ 1747static sopno /* nesting depth */ 1748pluscount(p, g) 1749struct parse *p; 1750register struct re_guts *g; 1751{ --- 26 unchanged lines hidden --- | 1888 - pluscount - count + nesting 1889 == static sopno pluscount(register struct parse *p, register struct re_guts *g); 1890 */ 1891static sopno /* nesting depth */ 1892pluscount(p, g) 1893struct parse *p; 1894register struct re_guts *g; 1895{ --- 26 unchanged lines hidden --- |