1/* vi:set ts=8 sts=4 sw=4:
2 *
3 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
4 *
5 * NOTICE:
6 *
7 * This is NOT the original regular expression code as written by Henry
8 * Spencer.  This code has been modified specifically for use with the VIM
9 * editor, and should not be used separately from Vim.  If you want a good
10 * regular expression library, get the original code.  The copyright notice
11 * that follows is from the original.
12 *
13 * END NOTICE
14 *
15 *	Copyright (c) 1986 by University of Toronto.
16 *	Written by Henry Spencer.  Not derived from licensed software.
17 *
18 *	Permission is granted to anyone to use this software for any
19 *	purpose on any computer system, and to redistribute it freely,
20 *	subject to the following restrictions:
21 *
22 *	1. The author is not responsible for the consequences of use of
23 *		this software, no matter how awful, even if they arise
24 *		from defects in it.
25 *
26 *	2. The origin of this software must not be misrepresented, either
27 *		by explicit claim or by omission.
28 *
29 *	3. Altered versions must be plainly marked as such, and must not
30 *		be misrepresented as being the original software.
31 *
32 * Beware that some of this code is subtly aware of the way operator
33 * precedence is structured in regular expressions.  Serious changes in
34 * regular-expression syntax might require a total rethink.
35 *
36 * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
37 * Webb, Ciaran McCreesh and Bram Moolenaar.
38 * Named character class support added by Walter Briscoe (1998 Jul 01)
39 */
40
41#include "vim.h"
42
43#undef DEBUG
44
45/*
46 * The "internal use only" fields in regexp.h are present to pass info from
47 * compile to execute that permits the execute phase to run lots faster on
48 * simple cases.  They are:
49 *
50 * regstart	char that must begin a match; NUL if none obvious; Can be a
51 *		multi-byte character.
52 * reganch	is the match anchored (at beginning-of-line only)?
53 * regmust	string (pointer into program) that match must include, or NULL
54 * regmlen	length of regmust string
55 * regflags	RF_ values or'ed together
56 *
57 * Regstart and reganch permit very fast decisions on suitable starting points
58 * for a match, cutting down the work a lot.  Regmust permits fast rejection
59 * of lines that cannot possibly match.  The regmust tests are costly enough
60 * that vim_regcomp() supplies a regmust only if the r.e. contains something
61 * potentially expensive (at present, the only such thing detected is * or +
62 * at the start of the r.e., which can involve a lot of backup).  Regmlen is
63 * supplied because the test in vim_regexec() needs it and vim_regcomp() is
64 * computing it anyway.
65 */
66
67/*
68 * Structure for regexp "program".  This is essentially a linear encoding
69 * of a nondeterministic finite-state machine (aka syntax charts or
70 * "railroad normal form" in parsing technology).  Each node is an opcode
71 * plus a "next" pointer, possibly plus an operand.  "Next" pointers of
72 * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
73 * pointer with a BRANCH on both ends of it is connecting two alternatives.
74 * (Here we have one of the subtle syntax dependencies:	an individual BRANCH
75 * (as opposed to a collection of them) is never concatenated with anything
76 * because of operator precedence).  The "next" pointer of a BRACES_COMPLEX
77 * node points to the node after the stuff to be repeated.
78 * The operand of some types of node is a literal string; for others, it is a
79 * node leading into a sub-FSM.  In particular, the operand of a BRANCH node
80 * is the first node of the branch.
81 * (NB this is *not* a tree structure: the tail of the branch connects to the
82 * thing following the set of BRANCHes.)
83 *
84 * pattern	is coded like:
85 *
86 *			  +-----------------+
87 *			  |		    V
88 * <aa>\|<bb>	BRANCH <aa> BRANCH <bb> --> END
89 *		     |	    ^	 |	    ^
90 *		     +------+	 +----------+
91 *
92 *
93 *		       +------------------+
94 *		       V		  |
95 * <aa>*	BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
96 *		     |	    |		    ^			   ^
97 *		     |	    +---------------+			   |
98 *		     +---------------------------------------------+
99 *
100 *
101 *		       +----------------------+
102 *		       V		      |
103 * <aa>\+	BRANCH <aa> --> BRANCH --> BACK  BRANCH --> NOTHING --> END
104 *		     |		     |		 ^			^
105 *		     |		     +-----------+			|
106 *		     +--------------------------------------------------+
107 *
108 *
109 *					+-------------------------+
110 *					V			  |
111 * <aa>\{}	BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK  END
112 *		     |				    |		     ^
113 *		     |				    +----------------+
114 *		     +-----------------------------------------------+
115 *
116 *
117 * <aa>\@!<bb>	BRANCH NOMATCH <aa> --> END  <bb> --> END
118 *		     |	     |		      ^       ^
119 *		     |	     +----------------+       |
120 *		     +--------------------------------+
121 *
122 *						      +---------+
123 *						      |		V
124 * \z[abc]	BRANCH BRANCH  a  BRANCH  b  BRANCH  c	BRANCH	NOTHING --> END
125 *		     |	    |	       |	  |	^		    ^
126 *		     |	    |	       |	  +-----+		    |
127 *		     |	    |	       +----------------+		    |
128 *		     |	    +---------------------------+		    |
129 *		     +------------------------------------------------------+
130 *
131 * They all start with a BRANCH for "\|" alternatives, even when there is only
132 * one alternative.
133 */
134
135/*
136 * The opcodes are:
137 */
138
139/* definition	number		   opnd?    meaning */
140#define END		0	/*	End of program or NOMATCH operand. */
141#define BOL		1	/*	Match "" at beginning of line. */
142#define EOL		2	/*	Match "" at end of line. */
143#define BRANCH		3	/* node Match this alternative, or the
144				 *	next... */
145#define BACK		4	/*	Match "", "next" ptr points backward. */
146#define EXACTLY		5	/* str	Match this string. */
147#define NOTHING		6	/*	Match empty string. */
148#define STAR		7	/* node Match this (simple) thing 0 or more
149				 *	times. */
150#define PLUS		8	/* node Match this (simple) thing 1 or more
151				 *	times. */
152#define MATCH		9	/* node match the operand zero-width */
153#define NOMATCH		10	/* node check for no match with operand */
154#define BEHIND		11	/* node look behind for a match with operand */
155#define NOBEHIND	12	/* node look behind for no match with operand */
156#define SUBPAT		13	/* node match the operand here */
157#define BRACE_SIMPLE	14	/* node Match this (simple) thing between m and
158				 *	n times (\{m,n\}). */
159#define BOW		15	/*	Match "" after [^a-zA-Z0-9_] */
160#define EOW		16	/*	Match "" at    [^a-zA-Z0-9_] */
161#define BRACE_LIMITS	17	/* nr nr  define the min & max for BRACE_SIMPLE
162				 *	and BRACE_COMPLEX. */
163#define NEWL		18	/*	Match line-break */
164#define BHPOS		19	/*	End position for BEHIND or NOBEHIND */
165
166
167/* character classes: 20-48 normal, 50-78 include a line-break */
168#define ADD_NL		30
169#define FIRST_NL	ANY + ADD_NL
170#define ANY		20	/*	Match any one character. */
171#define ANYOF		21	/* str	Match any character in this string. */
172#define ANYBUT		22	/* str	Match any character not in this
173				 *	string. */
174#define IDENT		23	/*	Match identifier char */
175#define SIDENT		24	/*	Match identifier char but no digit */
176#define KWORD		25	/*	Match keyword char */
177#define SKWORD		26	/*	Match word char but no digit */
178#define FNAME		27	/*	Match file name char */
179#define SFNAME		28	/*	Match file name char but no digit */
180#define PRINT		29	/*	Match printable char */
181#define SPRINT		30	/*	Match printable char but no digit */
182#define WHITE		31	/*	Match whitespace char */
183#define NWHITE		32	/*	Match non-whitespace char */
184#define DIGIT		33	/*	Match digit char */
185#define NDIGIT		34	/*	Match non-digit char */
186#define HEX		35	/*	Match hex char */
187#define NHEX		36	/*	Match non-hex char */
188#define OCTAL		37	/*	Match octal char */
189#define NOCTAL		38	/*	Match non-octal char */
190#define WORD		39	/*	Match word char */
191#define NWORD		40	/*	Match non-word char */
192#define HEAD		41	/*	Match head char */
193#define NHEAD		42	/*	Match non-head char */
194#define ALPHA		43	/*	Match alpha char */
195#define NALPHA		44	/*	Match non-alpha char */
196#define LOWER		45	/*	Match lowercase char */
197#define NLOWER		46	/*	Match non-lowercase char */
198#define UPPER		47	/*	Match uppercase char */
199#define NUPPER		48	/*	Match non-uppercase char */
200#define LAST_NL		NUPPER + ADD_NL
201#define WITH_NL(op)	((op) >= FIRST_NL && (op) <= LAST_NL)
202
203#define MOPEN		80  /* -89	 Mark this point in input as start of
204				 *	 \( subexpr.  MOPEN + 0 marks start of
205				 *	 match. */
206#define MCLOSE		90  /* -99	 Analogous to MOPEN.  MCLOSE + 0 marks
207				 *	 end of match. */
208#define BACKREF		100 /* -109 node Match same string again \1-\9 */
209
210#ifdef FEAT_SYN_HL
211# define ZOPEN		110 /* -119	 Mark this point in input as start of
212				 *	 \z( subexpr. */
213# define ZCLOSE		120 /* -129	 Analogous to ZOPEN. */
214# define ZREF		130 /* -139 node Match external submatch \z1-\z9 */
215#endif
216
217#define BRACE_COMPLEX	140 /* -149 node Match nodes between m & n times */
218
219#define NOPEN		150	/*	Mark this point in input as start of
220					\%( subexpr. */
221#define NCLOSE		151	/*	Analogous to NOPEN. */
222
223#define MULTIBYTECODE	200	/* mbc	Match one multi-byte character */
224#define RE_BOF		201	/*	Match "" at beginning of file. */
225#define RE_EOF		202	/*	Match "" at end of file. */
226#define CURSOR		203	/*	Match location of cursor. */
227
228#define RE_LNUM		204	/* nr cmp  Match line number */
229#define RE_COL		205	/* nr cmp  Match column number */
230#define RE_VCOL		206	/* nr cmp  Match virtual column number */
231
232#define RE_MARK		207	/* mark cmp  Match mark position */
233#define RE_VISUAL	208	/*	Match Visual area */
234
235/*
236 * Magic characters have a special meaning, they don't match literally.
237 * Magic characters are negative.  This separates them from literal characters
238 * (possibly multi-byte).  Only ASCII characters can be Magic.
239 */
240#define Magic(x)	((int)(x) - 256)
241#define un_Magic(x)	((x) + 256)
242#define is_Magic(x)	((x) < 0)
243
244static int no_Magic __ARGS((int x));
245static int toggle_Magic __ARGS((int x));
246
247    static int
248no_Magic(x)
249    int		x;
250{
251    if (is_Magic(x))
252	return un_Magic(x);
253    return x;
254}
255
256    static int
257toggle_Magic(x)
258    int		x;
259{
260    if (is_Magic(x))
261	return un_Magic(x);
262    return Magic(x);
263}
264
265/*
266 * The first byte of the regexp internal "program" is actually this magic
267 * number; the start node begins in the second byte.  It's used to catch the
268 * most severe mutilation of the program by the caller.
269 */
270
271#define REGMAGIC	0234
272
273/*
274 * Opcode notes:
275 *
276 * BRANCH	The set of branches constituting a single choice are hooked
277 *		together with their "next" pointers, since precedence prevents
278 *		anything being concatenated to any individual branch.  The
279 *		"next" pointer of the last BRANCH in a choice points to the
280 *		thing following the whole choice.  This is also where the
281 *		final "next" pointer of each individual branch points; each
282 *		branch starts with the operand node of a BRANCH node.
283 *
284 * BACK		Normal "next" pointers all implicitly point forward; BACK
285 *		exists to make loop structures possible.
286 *
287 * STAR,PLUS	'=', and complex '*' and '+', are implemented as circular
288 *		BRANCH structures using BACK.  Simple cases (one character
289 *		per match) are implemented with STAR and PLUS for speed
290 *		and to minimize recursive plunges.
291 *
292 * BRACE_LIMITS	This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
293 *		node, and defines the min and max limits to be used for that
294 *		node.
295 *
296 * MOPEN,MCLOSE	...are numbered at compile time.
297 * ZOPEN,ZCLOSE	...ditto
298 */
299
300/*
301 * A node is one char of opcode followed by two chars of "next" pointer.
302 * "Next" pointers are stored as two 8-bit bytes, high order first.  The
303 * value is a positive offset from the opcode of the node containing it.
304 * An operand, if any, simply follows the node.  (Note that much of the
305 * code generation knows about this implicit relationship.)
306 *
307 * Using two bytes for the "next" pointer is vast overkill for most things,
308 * but allows patterns to get big without disasters.
309 */
310#define OP(p)		((int)*(p))
311#define NEXT(p)		(((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377))
312#define OPERAND(p)	((p) + 3)
313/* Obtain an operand that was stored as four bytes, MSB first. */
314#define OPERAND_MIN(p)	(((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
315			+ ((long)(p)[5] << 8) + (long)(p)[6])
316/* Obtain a second operand stored as four bytes. */
317#define OPERAND_MAX(p)	OPERAND_MIN((p) + 4)
318/* Obtain a second single-byte operand stored after a four bytes operand. */
319#define OPERAND_CMP(p)	(p)[7]
320
321/*
322 * Utility definitions.
323 */
324#define UCHARAT(p)	((int)*(char_u *)(p))
325
326/* Used for an error (down from) vim_regcomp(): give the error message, set
327 * rc_did_emsg and return NULL */
328#define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, (void *)NULL)
329#define EMSG_M_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
330#define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL)
331#define EMSG_ONE_RET_NULL EMSG_M_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
332
333#define MAX_LIMIT	(32767L << 16L)
334
335static int re_multi_type __ARGS((int));
336static int cstrncmp __ARGS((char_u *s1, char_u *s2, int *n));
337static char_u *cstrchr __ARGS((char_u *, int));
338
339#ifdef DEBUG
340static void	regdump __ARGS((char_u *, regprog_T *));
341static char_u	*regprop __ARGS((char_u *));
342#endif
343
344#define NOT_MULTI	0
345#define MULTI_ONE	1
346#define MULTI_MULT	2
347/*
348 * Return NOT_MULTI if c is not a "multi" operator.
349 * Return MULTI_ONE if c is a single "multi" operator.
350 * Return MULTI_MULT if c is a multi "multi" operator.
351 */
352    static int
353re_multi_type(c)
354    int c;
355{
356    if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
357	return MULTI_ONE;
358    if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
359	return MULTI_MULT;
360    return NOT_MULTI;
361}
362
363/*
364 * Flags to be passed up and down.
365 */
366#define HASWIDTH	0x1	/* Known never to match null string. */
367#define SIMPLE		0x2	/* Simple enough to be STAR/PLUS operand. */
368#define SPSTART		0x4	/* Starts with * or +. */
369#define HASNL		0x8	/* Contains some \n. */
370#define HASLOOKBH	0x10	/* Contains "\@<=" or "\@<!". */
371#define WORST		0	/* Worst case. */
372
373/*
374 * When regcode is set to this value, code is not emitted and size is computed
375 * instead.
376 */
377#define JUST_CALC_SIZE	((char_u *) -1)
378
379static char_u		*reg_prev_sub = NULL;
380
381/*
382 * REGEXP_INRANGE contains all characters which are always special in a []
383 * range after '\'.
384 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
385 * These are:
386 *  \n	- New line (NL).
387 *  \r	- Carriage Return (CR).
388 *  \t	- Tab (TAB).
389 *  \e	- Escape (ESC).
390 *  \b	- Backspace (Ctrl_H).
391 *  \d  - Character code in decimal, eg \d123
392 *  \o	- Character code in octal, eg \o80
393 *  \x	- Character code in hex, eg \x4a
394 *  \u	- Multibyte character code, eg \u20ac
395 *  \U	- Long multibyte character code, eg \U12345678
396 */
397static char_u REGEXP_INRANGE[] = "]^-n\\";
398static char_u REGEXP_ABBR[] = "nrtebdoxuU";
399
400static int	backslash_trans __ARGS((int c));
401static int	get_char_class __ARGS((char_u **pp));
402static int	get_equi_class __ARGS((char_u **pp));
403static void	reg_equi_class __ARGS((int c));
404static int	get_coll_element __ARGS((char_u **pp));
405static char_u	*skip_anyof __ARGS((char_u *p));
406static void	init_class_tab __ARGS((void));
407
408/*
409 * Translate '\x' to its control character, except "\n", which is Magic.
410 */
411    static int
412backslash_trans(c)
413    int		c;
414{
415    switch (c)
416    {
417	case 'r':   return CAR;
418	case 't':   return TAB;
419	case 'e':   return ESC;
420	case 'b':   return BS;
421    }
422    return c;
423}
424
425/*
426 * Check for a character class name "[:name:]".  "pp" points to the '['.
427 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
428 * recognized.  Otherwise "pp" is advanced to after the item.
429 */
430    static int
431get_char_class(pp)
432    char_u	**pp;
433{
434    static const char *(class_names[]) =
435    {
436	"alnum:]",
437#define CLASS_ALNUM 0
438	"alpha:]",
439#define CLASS_ALPHA 1
440	"blank:]",
441#define CLASS_BLANK 2
442	"cntrl:]",
443#define CLASS_CNTRL 3
444	"digit:]",
445#define CLASS_DIGIT 4
446	"graph:]",
447#define CLASS_GRAPH 5
448	"lower:]",
449#define CLASS_LOWER 6
450	"print:]",
451#define CLASS_PRINT 7
452	"punct:]",
453#define CLASS_PUNCT 8
454	"space:]",
455#define CLASS_SPACE 9
456	"upper:]",
457#define CLASS_UPPER 10
458	"xdigit:]",
459#define CLASS_XDIGIT 11
460	"tab:]",
461#define CLASS_TAB 12
462	"return:]",
463#define CLASS_RETURN 13
464	"backspace:]",
465#define CLASS_BACKSPACE 14
466	"escape:]",
467#define CLASS_ESCAPE 15
468    };
469#define CLASS_NONE 99
470    int i;
471
472    if ((*pp)[1] == ':')
473    {
474	for (i = 0; i < (int)(sizeof(class_names) / sizeof(*class_names)); ++i)
475	    if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
476	    {
477		*pp += STRLEN(class_names[i]) + 2;
478		return i;
479	    }
480    }
481    return CLASS_NONE;
482}
483
484/*
485 * Specific version of character class functions.
486 * Using a table to keep this fast.
487 */
488static short	class_tab[256];
489
490#define	    RI_DIGIT	0x01
491#define	    RI_HEX	0x02
492#define	    RI_OCTAL	0x04
493#define	    RI_WORD	0x08
494#define	    RI_HEAD	0x10
495#define	    RI_ALPHA	0x20
496#define	    RI_LOWER	0x40
497#define	    RI_UPPER	0x80
498#define	    RI_WHITE	0x100
499
500    static void
501init_class_tab()
502{
503    int		i;
504    static int	done = FALSE;
505
506    if (done)
507	return;
508
509    for (i = 0; i < 256; ++i)
510    {
511	if (i >= '0' && i <= '7')
512	    class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
513	else if (i >= '8' && i <= '9')
514	    class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
515	else if (i >= 'a' && i <= 'f')
516	    class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
517#ifdef EBCDIC
518	else if ((i >= 'g' && i <= 'i') || (i >= 'j' && i <= 'r')
519						    || (i >= 's' && i <= 'z'))
520#else
521	else if (i >= 'g' && i <= 'z')
522#endif
523	    class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
524	else if (i >= 'A' && i <= 'F')
525	    class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
526#ifdef EBCDIC
527	else if ((i >= 'G' && i <= 'I') || ( i >= 'J' && i <= 'R')
528						    || (i >= 'S' && i <= 'Z'))
529#else
530	else if (i >= 'G' && i <= 'Z')
531#endif
532	    class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
533	else if (i == '_')
534	    class_tab[i] = RI_WORD + RI_HEAD;
535	else
536	    class_tab[i] = 0;
537    }
538    class_tab[' '] |= RI_WHITE;
539    class_tab['\t'] |= RI_WHITE;
540    done = TRUE;
541}
542
543#ifdef FEAT_MBYTE
544# define ri_digit(c)	(c < 0x100 && (class_tab[c] & RI_DIGIT))
545# define ri_hex(c)	(c < 0x100 && (class_tab[c] & RI_HEX))
546# define ri_octal(c)	(c < 0x100 && (class_tab[c] & RI_OCTAL))
547# define ri_word(c)	(c < 0x100 && (class_tab[c] & RI_WORD))
548# define ri_head(c)	(c < 0x100 && (class_tab[c] & RI_HEAD))
549# define ri_alpha(c)	(c < 0x100 && (class_tab[c] & RI_ALPHA))
550# define ri_lower(c)	(c < 0x100 && (class_tab[c] & RI_LOWER))
551# define ri_upper(c)	(c < 0x100 && (class_tab[c] & RI_UPPER))
552# define ri_white(c)	(c < 0x100 && (class_tab[c] & RI_WHITE))
553#else
554# define ri_digit(c)	(class_tab[c] & RI_DIGIT)
555# define ri_hex(c)	(class_tab[c] & RI_HEX)
556# define ri_octal(c)	(class_tab[c] & RI_OCTAL)
557# define ri_word(c)	(class_tab[c] & RI_WORD)
558# define ri_head(c)	(class_tab[c] & RI_HEAD)
559# define ri_alpha(c)	(class_tab[c] & RI_ALPHA)
560# define ri_lower(c)	(class_tab[c] & RI_LOWER)
561# define ri_upper(c)	(class_tab[c] & RI_UPPER)
562# define ri_white(c)	(class_tab[c] & RI_WHITE)
563#endif
564
565/* flags for regflags */
566#define RF_ICASE    1	/* ignore case */
567#define RF_NOICASE  2	/* don't ignore case */
568#define RF_HASNL    4	/* can match a NL */
569#define RF_ICOMBINE 8	/* ignore combining characters */
570#define RF_LOOKBH   16	/* uses "\@<=" or "\@<!" */
571
572/*
573 * Global work variables for vim_regcomp().
574 */
575
576static char_u	*regparse;	/* Input-scan pointer. */
577static int	prevchr_len;	/* byte length of previous char */
578static int	num_complex_braces; /* Complex \{...} count */
579static int	regnpar;	/* () count. */
580#ifdef FEAT_SYN_HL
581static int	regnzpar;	/* \z() count. */
582static int	re_has_z;	/* \z item detected */
583#endif
584static char_u	*regcode;	/* Code-emit pointer, or JUST_CALC_SIZE */
585static long	regsize;	/* Code size. */
586static int	reg_toolong;	/* TRUE when offset out of range */
587static char_u	had_endbrace[NSUBEXP];	/* flags, TRUE if end of () found */
588static unsigned	regflags;	/* RF_ flags for prog */
589static long	brace_min[10];	/* Minimums for complex brace repeats */
590static long	brace_max[10];	/* Maximums for complex brace repeats */
591static int	brace_count[10]; /* Current counts for complex brace repeats */
592#if defined(FEAT_SYN_HL) || defined(PROTO)
593static int	had_eol;	/* TRUE when EOL found by vim_regcomp() */
594#endif
595static int	one_exactly = FALSE;	/* only do one char for EXACTLY */
596
597static int	reg_magic;	/* magicness of the pattern: */
598#define MAGIC_NONE	1	/* "\V" very unmagic */
599#define MAGIC_OFF	2	/* "\M" or 'magic' off */
600#define MAGIC_ON	3	/* "\m" or 'magic' */
601#define MAGIC_ALL	4	/* "\v" very magic */
602
603static int	reg_string;	/* matching with a string instead of a buffer
604				   line */
605static int	reg_strict;	/* "[abc" is illegal */
606
607/*
608 * META contains all characters that may be magic, except '^' and '$'.
609 */
610
611#ifdef EBCDIC
612static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{|~";
613#else
614/* META[] is used often enough to justify turning it into a table. */
615static char_u META_flags[] = {
616    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
617    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
618/*		   %  &     (  )  *  +	      .    */
619    0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
620/*     1  2  3	4  5  6  7  8  9	<  =  >  ? */
621    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
622/*  @  A     C	D     F     H  I     K	L  M	 O */
623    1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
624/*  P	     S	   U  V  W  X	  Z  [		 _ */
625    1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
626/*     a     c	d     f     h  i     k	l  m  n  o */
627    0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
628/*  p	     s	   u  v  w  x	  z  {	|     ~    */
629    1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
630};
631#endif
632
633static int	curchr;
634
635/* arguments for reg() */
636#define REG_NOPAREN	0	/* toplevel reg() */
637#define REG_PAREN	1	/* \(\) */
638#define REG_ZPAREN	2	/* \z(\) */
639#define REG_NPAREN	3	/* \%(\) */
640
641/*
642 * Forward declarations for vim_regcomp()'s friends.
643 */
644static void	initchr __ARGS((char_u *));
645static int	getchr __ARGS((void));
646static void	skipchr_keepstart __ARGS((void));
647static int	peekchr __ARGS((void));
648static void	skipchr __ARGS((void));
649static void	ungetchr __ARGS((void));
650static int	gethexchrs __ARGS((int maxinputlen));
651static int	getoctchrs __ARGS((void));
652static int	getdecchrs __ARGS((void));
653static int	coll_get_char __ARGS((void));
654static void	regcomp_start __ARGS((char_u *expr, int flags));
655static char_u	*reg __ARGS((int, int *));
656static char_u	*regbranch __ARGS((int *flagp));
657static char_u	*regconcat __ARGS((int *flagp));
658static char_u	*regpiece __ARGS((int *));
659static char_u	*regatom __ARGS((int *));
660static char_u	*regnode __ARGS((int));
661#ifdef FEAT_MBYTE
662static int	use_multibytecode __ARGS((int c));
663#endif
664static int	prog_magic_wrong __ARGS((void));
665static char_u	*regnext __ARGS((char_u *));
666static void	regc __ARGS((int b));
667#ifdef FEAT_MBYTE
668static void	regmbc __ARGS((int c));
669#else
670# define regmbc(c) regc(c)
671#endif
672static void	reginsert __ARGS((int, char_u *));
673static void	reginsert_limits __ARGS((int, long, long, char_u *));
674static char_u	*re_put_long __ARGS((char_u *pr, long_u val));
675static int	read_limits __ARGS((long *, long *));
676static void	regtail __ARGS((char_u *, char_u *));
677static void	regoptail __ARGS((char_u *, char_u *));
678
679/*
680 * Return TRUE if compiled regular expression "prog" can match a line break.
681 */
682    int
683re_multiline(prog)
684    regprog_T *prog;
685{
686    return (prog->regflags & RF_HASNL);
687}
688
689/*
690 * Return TRUE if compiled regular expression "prog" looks before the start
691 * position (pattern contains "\@<=" or "\@<!").
692 */
693    int
694re_lookbehind(prog)
695    regprog_T *prog;
696{
697    return (prog->regflags & RF_LOOKBH);
698}
699
700/*
701 * Check for an equivalence class name "[=a=]".  "pp" points to the '['.
702 * Returns a character representing the class. Zero means that no item was
703 * recognized.  Otherwise "pp" is advanced to after the item.
704 */
705    static int
706get_equi_class(pp)
707    char_u	**pp;
708{
709    int		c;
710    int		l = 1;
711    char_u	*p = *pp;
712
713    if (p[1] == '=')
714    {
715#ifdef FEAT_MBYTE
716	if (has_mbyte)
717	    l = (*mb_ptr2len)(p + 2);
718#endif
719	if (p[l + 2] == '=' && p[l + 3] == ']')
720	{
721#ifdef FEAT_MBYTE
722	    if (has_mbyte)
723		c = mb_ptr2char(p + 2);
724	    else
725#endif
726		c = p[2];
727	    *pp += l + 4;
728	    return c;
729	}
730    }
731    return 0;
732}
733
734#ifdef EBCDIC
735/*
736 * Table for equivalence class "c". (IBM-1047)
737 */
738char *EQUIVAL_CLASS_C[16] = {
739    "A\x62\x63\x64\x65\x66\x67",
740    "C\x68",
741    "E\x71\x72\x73\x74",
742    "I\x75\x76\x77\x78",
743    "N\x69",
744    "O\xEB\xEC\xED\xEE\xEF",
745    "U\xFB\xFC\xFD\xFE",
746    "Y\xBA",
747    "a\x42\x43\x44\x45\x46\x47",
748    "c\x48",
749    "e\x51\x52\x53\x54",
750    "i\x55\x56\x57\x58",
751    "n\x49",
752    "o\xCB\xCC\xCD\xCE\xCF",
753    "u\xDB\xDC\xDD\xDE",
754    "y\x8D\xDF",
755};
756#endif
757
758/*
759 * Produce the bytes for equivalence class "c".
760 * Currently only handles latin1, latin9 and utf-8.
761 */
762    static void
763reg_equi_class(c)
764    int	    c;
765{
766#ifdef FEAT_MBYTE
767    if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
768					 || STRCMP(p_enc, "iso-8859-15") == 0)
769#endif
770    {
771#ifdef EBCDIC
772	int i;
773
774	/* This might be slower than switch/case below. */
775	for (i = 0; i < 16; i++)
776	{
777	    if (vim_strchr(EQUIVAL_CLASS_C[i], c) != NULL)
778	    {
779		char *p = EQUIVAL_CLASS_C[i];
780
781		while (*p != 0)
782		    regmbc(*p++);
783		return;
784	    }
785	}
786#else
787	switch (c)
788	{
789	    case 'A': case '\300': case '\301': case '\302':
790	    case '\303': case '\304': case '\305':
791		      regmbc('A'); regmbc('\300'); regmbc('\301');
792		      regmbc('\302'); regmbc('\303'); regmbc('\304');
793		      regmbc('\305');
794		      return;
795	    case 'C': case '\307':
796		      regmbc('C'); regmbc('\307');
797		      return;
798	    case 'E': case '\310': case '\311': case '\312': case '\313':
799		      regmbc('E'); regmbc('\310'); regmbc('\311');
800		      regmbc('\312'); regmbc('\313');
801		      return;
802	    case 'I': case '\314': case '\315': case '\316': case '\317':
803		      regmbc('I'); regmbc('\314'); regmbc('\315');
804		      regmbc('\316'); regmbc('\317');
805		      return;
806	    case 'N': case '\321':
807		      regmbc('N'); regmbc('\321');
808		      return;
809	    case 'O': case '\322': case '\323': case '\324': case '\325':
810	    case '\326':
811		      regmbc('O'); regmbc('\322'); regmbc('\323');
812		      regmbc('\324'); regmbc('\325'); regmbc('\326');
813		      return;
814	    case 'U': case '\331': case '\332': case '\333': case '\334':
815		      regmbc('U'); regmbc('\331'); regmbc('\332');
816		      regmbc('\333'); regmbc('\334');
817		      return;
818	    case 'Y': case '\335':
819		      regmbc('Y'); regmbc('\335');
820		      return;
821	    case 'a': case '\340': case '\341': case '\342':
822	    case '\343': case '\344': case '\345':
823		      regmbc('a'); regmbc('\340'); regmbc('\341');
824		      regmbc('\342'); regmbc('\343'); regmbc('\344');
825		      regmbc('\345');
826		      return;
827	    case 'c': case '\347':
828		      regmbc('c'); regmbc('\347');
829		      return;
830	    case 'e': case '\350': case '\351': case '\352': case '\353':
831		      regmbc('e'); regmbc('\350'); regmbc('\351');
832		      regmbc('\352'); regmbc('\353');
833		      return;
834	    case 'i': case '\354': case '\355': case '\356': case '\357':
835		      regmbc('i'); regmbc('\354'); regmbc('\355');
836		      regmbc('\356'); regmbc('\357');
837		      return;
838	    case 'n': case '\361':
839		      regmbc('n'); regmbc('\361');
840		      return;
841	    case 'o': case '\362': case '\363': case '\364': case '\365':
842	    case '\366':
843		      regmbc('o'); regmbc('\362'); regmbc('\363');
844		      regmbc('\364'); regmbc('\365'); regmbc('\366');
845		      return;
846	    case 'u': case '\371': case '\372': case '\373': case '\374':
847		      regmbc('u'); regmbc('\371'); regmbc('\372');
848		      regmbc('\373'); regmbc('\374');
849		      return;
850	    case 'y': case '\375': case '\377':
851		      regmbc('y'); regmbc('\375'); regmbc('\377');
852		      return;
853	}
854#endif
855    }
856    regmbc(c);
857}
858
859/*
860 * Check for a collating element "[.a.]".  "pp" points to the '['.
861 * Returns a character. Zero means that no item was recognized.  Otherwise
862 * "pp" is advanced to after the item.
863 * Currently only single characters are recognized!
864 */
865    static int
866get_coll_element(pp)
867    char_u	**pp;
868{
869    int		c;
870    int		l = 1;
871    char_u	*p = *pp;
872
873    if (p[1] == '.')
874    {
875#ifdef FEAT_MBYTE
876	if (has_mbyte)
877	    l = (*mb_ptr2len)(p + 2);
878#endif
879	if (p[l + 2] == '.' && p[l + 3] == ']')
880	{
881#ifdef FEAT_MBYTE
882	    if (has_mbyte)
883		c = mb_ptr2char(p + 2);
884	    else
885#endif
886		c = p[2];
887	    *pp += l + 4;
888	    return c;
889	}
890    }
891    return 0;
892}
893
894
895/*
896 * Skip over a "[]" range.
897 * "p" must point to the character after the '['.
898 * The returned pointer is on the matching ']', or the terminating NUL.
899 */
900    static char_u *
901skip_anyof(p)
902    char_u	*p;
903{
904    int		cpo_lit;	/* 'cpoptions' contains 'l' flag */
905    int		cpo_bsl;	/* 'cpoptions' contains '\' flag */
906#ifdef FEAT_MBYTE
907    int		l;
908#endif
909
910    cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
911    cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
912
913    if (*p == '^')	/* Complement of range. */
914	++p;
915    if (*p == ']' || *p == '-')
916	++p;
917    while (*p != NUL && *p != ']')
918    {
919#ifdef FEAT_MBYTE
920	if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
921	    p += l;
922	else
923#endif
924	    if (*p == '-')
925	    {
926		++p;
927		if (*p != ']' && *p != NUL)
928		    mb_ptr_adv(p);
929	    }
930	else if (*p == '\\'
931		&& !cpo_bsl
932		&& (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
933		    || (!cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
934	    p += 2;
935	else if (*p == '[')
936	{
937	    if (get_char_class(&p) == CLASS_NONE
938		    && get_equi_class(&p) == 0
939		    && get_coll_element(&p) == 0)
940		++p; /* It was not a class name */
941	}
942	else
943	    ++p;
944    }
945
946    return p;
947}
948
949/*
950 * Skip past regular expression.
951 * Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
952 * Take care of characters with a backslash in front of it.
953 * Skip strings inside [ and ].
954 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
955 * expression and change "\?" to "?".  If "*newp" is not NULL the expression
956 * is changed in-place.
957 */
958    char_u *
959skip_regexp(startp, dirc, magic, newp)
960    char_u	*startp;
961    int		dirc;
962    int		magic;
963    char_u	**newp;
964{
965    int		mymagic;
966    char_u	*p = startp;
967
968    if (magic)
969	mymagic = MAGIC_ON;
970    else
971	mymagic = MAGIC_OFF;
972
973    for (; p[0] != NUL; mb_ptr_adv(p))
974    {
975	if (p[0] == dirc)	/* found end of regexp */
976	    break;
977	if ((p[0] == '[' && mymagic >= MAGIC_ON)
978		|| (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
979	{
980	    p = skip_anyof(p + 1);
981	    if (p[0] == NUL)
982		break;
983	}
984	else if (p[0] == '\\' && p[1] != NUL)
985	{
986	    if (dirc == '?' && newp != NULL && p[1] == '?')
987	    {
988		/* change "\?" to "?", make a copy first. */
989		if (*newp == NULL)
990		{
991		    *newp = vim_strsave(startp);
992		    if (*newp != NULL)
993			p = *newp + (p - startp);
994		}
995		if (*newp != NULL)
996		    STRMOVE(p, p + 1);
997		else
998		    ++p;
999	    }
1000	    else
1001		++p;    /* skip next character */
1002	    if (*p == 'v')
1003		mymagic = MAGIC_ALL;
1004	    else if (*p == 'V')
1005		mymagic = MAGIC_NONE;
1006	}
1007    }
1008    return p;
1009}
1010
1011/*
1012 * vim_regcomp() - compile a regular expression into internal code
1013 * Returns the program in allocated space.  Returns NULL for an error.
1014 *
1015 * We can't allocate space until we know how big the compiled form will be,
1016 * but we can't compile it (and thus know how big it is) until we've got a
1017 * place to put the code.  So we cheat:  we compile it twice, once with code
1018 * generation turned off and size counting turned on, and once "for real".
1019 * This also means that we don't allocate space until we are sure that the
1020 * thing really will compile successfully, and we never have to move the
1021 * code and thus invalidate pointers into it.  (Note that it has to be in
1022 * one piece because vim_free() must be able to free it all.)
1023 *
1024 * Whether upper/lower case is to be ignored is decided when executing the
1025 * program, it does not matter here.
1026 *
1027 * Beware that the optimization-preparation code in here knows about some
1028 * of the structure of the compiled regexp.
1029 * "re_flags": RE_MAGIC and/or RE_STRING.
1030 */
1031    regprog_T *
1032vim_regcomp(expr, re_flags)
1033    char_u	*expr;
1034    int		re_flags;
1035{
1036    regprog_T	*r;
1037    char_u	*scan;
1038    char_u	*longest;
1039    int		len;
1040    int		flags;
1041
1042    if (expr == NULL)
1043	EMSG_RET_NULL(_(e_null));
1044
1045    init_class_tab();
1046
1047    /*
1048     * First pass: determine size, legality.
1049     */
1050    regcomp_start(expr, re_flags);
1051    regcode = JUST_CALC_SIZE;
1052    regc(REGMAGIC);
1053    if (reg(REG_NOPAREN, &flags) == NULL)
1054	return NULL;
1055
1056    /* Small enough for pointer-storage convention? */
1057#ifdef SMALL_MALLOC		/* 16 bit storage allocation */
1058    if (regsize >= 65536L - 256L)
1059	EMSG_RET_NULL(_("E339: Pattern too long"));
1060#endif
1061
1062    /* Allocate space. */
1063    r = (regprog_T *)lalloc(sizeof(regprog_T) + regsize, TRUE);
1064    if (r == NULL)
1065	return NULL;
1066
1067    /*
1068     * Second pass: emit code.
1069     */
1070    regcomp_start(expr, re_flags);
1071    regcode = r->program;
1072    regc(REGMAGIC);
1073    if (reg(REG_NOPAREN, &flags) == NULL || reg_toolong)
1074    {
1075	vim_free(r);
1076	if (reg_toolong)
1077	    EMSG_RET_NULL(_("E339: Pattern too long"));
1078	return NULL;
1079    }
1080
1081    /* Dig out information for optimizations. */
1082    r->regstart = NUL;		/* Worst-case defaults. */
1083    r->reganch = 0;
1084    r->regmust = NULL;
1085    r->regmlen = 0;
1086    r->regflags = regflags;
1087    if (flags & HASNL)
1088	r->regflags |= RF_HASNL;
1089    if (flags & HASLOOKBH)
1090	r->regflags |= RF_LOOKBH;
1091#ifdef FEAT_SYN_HL
1092    /* Remember whether this pattern has any \z specials in it. */
1093    r->reghasz = re_has_z;
1094#endif
1095    scan = r->program + 1;	/* First BRANCH. */
1096    if (OP(regnext(scan)) == END)   /* Only one top-level choice. */
1097    {
1098	scan = OPERAND(scan);
1099
1100	/* Starting-point info. */
1101	if (OP(scan) == BOL || OP(scan) == RE_BOF)
1102	{
1103	    r->reganch++;
1104	    scan = regnext(scan);
1105	}
1106
1107	if (OP(scan) == EXACTLY)
1108	{
1109#ifdef FEAT_MBYTE
1110	    if (has_mbyte)
1111		r->regstart = (*mb_ptr2char)(OPERAND(scan));
1112	    else
1113#endif
1114		r->regstart = *OPERAND(scan);
1115	}
1116	else if ((OP(scan) == BOW
1117		    || OP(scan) == EOW
1118		    || OP(scan) == NOTHING
1119		    || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN
1120		    || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE)
1121		 && OP(regnext(scan)) == EXACTLY)
1122	{
1123#ifdef FEAT_MBYTE
1124	    if (has_mbyte)
1125		r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
1126	    else
1127#endif
1128		r->regstart = *OPERAND(regnext(scan));
1129	}
1130
1131	/*
1132	 * If there's something expensive in the r.e., find the longest
1133	 * literal string that must appear and make it the regmust.  Resolve
1134	 * ties in favor of later strings, since the regstart check works
1135	 * with the beginning of the r.e. and avoiding duplication
1136	 * strengthens checking.  Not a strong reason, but sufficient in the
1137	 * absence of others.
1138	 */
1139	/*
1140	 * When the r.e. starts with BOW, it is faster to look for a regmust
1141	 * first. Used a lot for "#" and "*" commands. (Added by mool).
1142	 */
1143	if ((flags & SPSTART || OP(scan) == BOW || OP(scan) == EOW)
1144							  && !(flags & HASNL))
1145	{
1146	    longest = NULL;
1147	    len = 0;
1148	    for (; scan != NULL; scan = regnext(scan))
1149		if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
1150		{
1151		    longest = OPERAND(scan);
1152		    len = (int)STRLEN(OPERAND(scan));
1153		}
1154	    r->regmust = longest;
1155	    r->regmlen = len;
1156	}
1157    }
1158#ifdef DEBUG
1159    regdump(expr, r);
1160#endif
1161    return r;
1162}
1163
1164/*
1165 * Setup to parse the regexp.  Used once to get the length and once to do it.
1166 */
1167    static void
1168regcomp_start(expr, re_flags)
1169    char_u	*expr;
1170    int		re_flags;	    /* see vim_regcomp() */
1171{
1172    initchr(expr);
1173    if (re_flags & RE_MAGIC)
1174	reg_magic = MAGIC_ON;
1175    else
1176	reg_magic = MAGIC_OFF;
1177    reg_string = (re_flags & RE_STRING);
1178    reg_strict = (re_flags & RE_STRICT);
1179
1180    num_complex_braces = 0;
1181    regnpar = 1;
1182    vim_memset(had_endbrace, 0, sizeof(had_endbrace));
1183#ifdef FEAT_SYN_HL
1184    regnzpar = 1;
1185    re_has_z = 0;
1186#endif
1187    regsize = 0L;
1188    reg_toolong = FALSE;
1189    regflags = 0;
1190#if defined(FEAT_SYN_HL) || defined(PROTO)
1191    had_eol = FALSE;
1192#endif
1193}
1194
1195#if defined(FEAT_SYN_HL) || defined(PROTO)
1196/*
1197 * Check if during the previous call to vim_regcomp the EOL item "$" has been
1198 * found.  This is messy, but it works fine.
1199 */
1200    int
1201vim_regcomp_had_eol()
1202{
1203    return had_eol;
1204}
1205#endif
1206
1207/*
1208 * reg - regular expression, i.e. main body or parenthesized thing
1209 *
1210 * Caller must absorb opening parenthesis.
1211 *
1212 * Combining parenthesis handling with the base level of regular expression
1213 * is a trifle forced, but the need to tie the tails of the branches to what
1214 * follows makes it hard to avoid.
1215 */
1216    static char_u *
1217reg(paren, flagp)
1218    int		paren;	/* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
1219    int		*flagp;
1220{
1221    char_u	*ret;
1222    char_u	*br;
1223    char_u	*ender;
1224    int		parno = 0;
1225    int		flags;
1226
1227    *flagp = HASWIDTH;		/* Tentatively. */
1228
1229#ifdef FEAT_SYN_HL
1230    if (paren == REG_ZPAREN)
1231    {
1232	/* Make a ZOPEN node. */
1233	if (regnzpar >= NSUBEXP)
1234	    EMSG_RET_NULL(_("E50: Too many \\z("));
1235	parno = regnzpar;
1236	regnzpar++;
1237	ret = regnode(ZOPEN + parno);
1238    }
1239    else
1240#endif
1241	if (paren == REG_PAREN)
1242    {
1243	/* Make a MOPEN node. */
1244	if (regnpar >= NSUBEXP)
1245	    EMSG_M_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
1246	parno = regnpar;
1247	++regnpar;
1248	ret = regnode(MOPEN + parno);
1249    }
1250    else if (paren == REG_NPAREN)
1251    {
1252	/* Make a NOPEN node. */
1253	ret = regnode(NOPEN);
1254    }
1255    else
1256	ret = NULL;
1257
1258    /* Pick up the branches, linking them together. */
1259    br = regbranch(&flags);
1260    if (br == NULL)
1261	return NULL;
1262    if (ret != NULL)
1263	regtail(ret, br);	/* [MZ]OPEN -> first. */
1264    else
1265	ret = br;
1266    /* If one of the branches can be zero-width, the whole thing can.
1267     * If one of the branches has * at start or matches a line-break, the
1268     * whole thing can. */
1269    if (!(flags & HASWIDTH))
1270	*flagp &= ~HASWIDTH;
1271    *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1272    while (peekchr() == Magic('|'))
1273    {
1274	skipchr();
1275	br = regbranch(&flags);
1276	if (br == NULL || reg_toolong)
1277	    return NULL;
1278	regtail(ret, br);	/* BRANCH -> BRANCH. */
1279	if (!(flags & HASWIDTH))
1280	    *flagp &= ~HASWIDTH;
1281	*flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1282    }
1283
1284    /* Make a closing node, and hook it on the end. */
1285    ender = regnode(
1286#ifdef FEAT_SYN_HL
1287	    paren == REG_ZPAREN ? ZCLOSE + parno :
1288#endif
1289	    paren == REG_PAREN ? MCLOSE + parno :
1290	    paren == REG_NPAREN ? NCLOSE : END);
1291    regtail(ret, ender);
1292
1293    /* Hook the tails of the branches to the closing node. */
1294    for (br = ret; br != NULL; br = regnext(br))
1295	regoptail(br, ender);
1296
1297    /* Check for proper termination. */
1298    if (paren != REG_NOPAREN && getchr() != Magic(')'))
1299    {
1300#ifdef FEAT_SYN_HL
1301	if (paren == REG_ZPAREN)
1302	    EMSG_RET_NULL(_("E52: Unmatched \\z("));
1303	else
1304#endif
1305	    if (paren == REG_NPAREN)
1306	    EMSG_M_RET_NULL(_("E53: Unmatched %s%%("), reg_magic == MAGIC_ALL);
1307	else
1308	    EMSG_M_RET_NULL(_("E54: Unmatched %s("), reg_magic == MAGIC_ALL);
1309    }
1310    else if (paren == REG_NOPAREN && peekchr() != NUL)
1311    {
1312	if (curchr == Magic(')'))
1313	    EMSG_M_RET_NULL(_("E55: Unmatched %s)"), reg_magic == MAGIC_ALL);
1314	else
1315	    EMSG_RET_NULL(_(e_trailing));	/* "Can't happen". */
1316	/* NOTREACHED */
1317    }
1318    /*
1319     * Here we set the flag allowing back references to this set of
1320     * parentheses.
1321     */
1322    if (paren == REG_PAREN)
1323	had_endbrace[parno] = TRUE;	/* have seen the close paren */
1324    return ret;
1325}
1326
1327/*
1328 * Handle one alternative of an | operator.
1329 * Implements the & operator.
1330 */
1331    static char_u *
1332regbranch(flagp)
1333    int		*flagp;
1334{
1335    char_u	*ret;
1336    char_u	*chain = NULL;
1337    char_u	*latest;
1338    int		flags;
1339
1340    *flagp = WORST | HASNL;		/* Tentatively. */
1341
1342    ret = regnode(BRANCH);
1343    for (;;)
1344    {
1345	latest = regconcat(&flags);
1346	if (latest == NULL)
1347	    return NULL;
1348	/* If one of the branches has width, the whole thing has.  If one of
1349	 * the branches anchors at start-of-line, the whole thing does.
1350	 * If one of the branches uses look-behind, the whole thing does. */
1351	*flagp |= flags & (HASWIDTH | SPSTART | HASLOOKBH);
1352	/* If one of the branches doesn't match a line-break, the whole thing
1353	 * doesn't. */
1354	*flagp &= ~HASNL | (flags & HASNL);
1355	if (chain != NULL)
1356	    regtail(chain, latest);
1357	if (peekchr() != Magic('&'))
1358	    break;
1359	skipchr();
1360	regtail(latest, regnode(END)); /* operand ends */
1361	if (reg_toolong)
1362	    break;
1363	reginsert(MATCH, latest);
1364	chain = latest;
1365    }
1366
1367    return ret;
1368}
1369
1370/*
1371 * Handle one alternative of an | or & operator.
1372 * Implements the concatenation operator.
1373 */
1374    static char_u *
1375regconcat(flagp)
1376    int		*flagp;
1377{
1378    char_u	*first = NULL;
1379    char_u	*chain = NULL;
1380    char_u	*latest;
1381    int		flags;
1382    int		cont = TRUE;
1383
1384    *flagp = WORST;		/* Tentatively. */
1385
1386    while (cont)
1387    {
1388	switch (peekchr())
1389	{
1390	    case NUL:
1391	    case Magic('|'):
1392	    case Magic('&'):
1393	    case Magic(')'):
1394			    cont = FALSE;
1395			    break;
1396	    case Magic('Z'):
1397#ifdef FEAT_MBYTE
1398			    regflags |= RF_ICOMBINE;
1399#endif
1400			    skipchr_keepstart();
1401			    break;
1402	    case Magic('c'):
1403			    regflags |= RF_ICASE;
1404			    skipchr_keepstart();
1405			    break;
1406	    case Magic('C'):
1407			    regflags |= RF_NOICASE;
1408			    skipchr_keepstart();
1409			    break;
1410	    case Magic('v'):
1411			    reg_magic = MAGIC_ALL;
1412			    skipchr_keepstart();
1413			    curchr = -1;
1414			    break;
1415	    case Magic('m'):
1416			    reg_magic = MAGIC_ON;
1417			    skipchr_keepstart();
1418			    curchr = -1;
1419			    break;
1420	    case Magic('M'):
1421			    reg_magic = MAGIC_OFF;
1422			    skipchr_keepstart();
1423			    curchr = -1;
1424			    break;
1425	    case Magic('V'):
1426			    reg_magic = MAGIC_NONE;
1427			    skipchr_keepstart();
1428			    curchr = -1;
1429			    break;
1430	    default:
1431			    latest = regpiece(&flags);
1432			    if (latest == NULL || reg_toolong)
1433				return NULL;
1434			    *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH);
1435			    if (chain == NULL)	/* First piece. */
1436				*flagp |= flags & SPSTART;
1437			    else
1438				regtail(chain, latest);
1439			    chain = latest;
1440			    if (first == NULL)
1441				first = latest;
1442			    break;
1443	}
1444    }
1445    if (first == NULL)		/* Loop ran zero times. */
1446	first = regnode(NOTHING);
1447    return first;
1448}
1449
1450/*
1451 * regpiece - something followed by possible [*+=]
1452 *
1453 * Note that the branching code sequences used for = and the general cases
1454 * of * and + are somewhat optimized:  they use the same NOTHING node as
1455 * both the endmarker for their branch list and the body of the last branch.
1456 * It might seem that this node could be dispensed with entirely, but the
1457 * endmarker role is not redundant.
1458 */
1459    static char_u *
1460regpiece(flagp)
1461    int		    *flagp;
1462{
1463    char_u	    *ret;
1464    int		    op;
1465    char_u	    *next;
1466    int		    flags;
1467    long	    minval;
1468    long	    maxval;
1469
1470    ret = regatom(&flags);
1471    if (ret == NULL)
1472	return NULL;
1473
1474    op = peekchr();
1475    if (re_multi_type(op) == NOT_MULTI)
1476    {
1477	*flagp = flags;
1478	return ret;
1479    }
1480    /* default flags */
1481    *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH)));
1482
1483    skipchr();
1484    switch (op)
1485    {
1486	case Magic('*'):
1487	    if (flags & SIMPLE)
1488		reginsert(STAR, ret);
1489	    else
1490	    {
1491		/* Emit x* as (x&|), where & means "self". */
1492		reginsert(BRANCH, ret); /* Either x */
1493		regoptail(ret, regnode(BACK));	/* and loop */
1494		regoptail(ret, ret);	/* back */
1495		regtail(ret, regnode(BRANCH));	/* or */
1496		regtail(ret, regnode(NOTHING)); /* null. */
1497	    }
1498	    break;
1499
1500	case Magic('+'):
1501	    if (flags & SIMPLE)
1502		reginsert(PLUS, ret);
1503	    else
1504	    {
1505		/* Emit x+ as x(&|), where & means "self". */
1506		next = regnode(BRANCH); /* Either */
1507		regtail(ret, next);
1508		regtail(regnode(BACK), ret);	/* loop back */
1509		regtail(next, regnode(BRANCH)); /* or */
1510		regtail(ret, regnode(NOTHING)); /* null. */
1511	    }
1512	    *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1513	    break;
1514
1515	case Magic('@'):
1516	    {
1517		int	lop = END;
1518
1519		switch (no_Magic(getchr()))
1520		{
1521		    case '=': lop = MATCH; break;		  /* \@= */
1522		    case '!': lop = NOMATCH; break;		  /* \@! */
1523		    case '>': lop = SUBPAT; break;		  /* \@> */
1524		    case '<': switch (no_Magic(getchr()))
1525			      {
1526				  case '=': lop = BEHIND; break;   /* \@<= */
1527				  case '!': lop = NOBEHIND; break; /* \@<! */
1528			      }
1529		}
1530		if (lop == END)
1531		    EMSG_M_RET_NULL(_("E59: invalid character after %s@"),
1532						      reg_magic == MAGIC_ALL);
1533		/* Look behind must match with behind_pos. */
1534		if (lop == BEHIND || lop == NOBEHIND)
1535		{
1536		    regtail(ret, regnode(BHPOS));
1537		    *flagp |= HASLOOKBH;
1538		}
1539		regtail(ret, regnode(END)); /* operand ends */
1540		reginsert(lop, ret);
1541		break;
1542	    }
1543
1544	case Magic('?'):
1545	case Magic('='):
1546	    /* Emit x= as (x|) */
1547	    reginsert(BRANCH, ret);		/* Either x */
1548	    regtail(ret, regnode(BRANCH));	/* or */
1549	    next = regnode(NOTHING);		/* null. */
1550	    regtail(ret, next);
1551	    regoptail(ret, next);
1552	    break;
1553
1554	case Magic('{'):
1555	    if (!read_limits(&minval, &maxval))
1556		return NULL;
1557	    if (flags & SIMPLE)
1558	    {
1559		reginsert(BRACE_SIMPLE, ret);
1560		reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1561	    }
1562	    else
1563	    {
1564		if (num_complex_braces >= 10)
1565		    EMSG_M_RET_NULL(_("E60: Too many complex %s{...}s"),
1566						      reg_magic == MAGIC_ALL);
1567		reginsert(BRACE_COMPLEX + num_complex_braces, ret);
1568		regoptail(ret, regnode(BACK));
1569		regoptail(ret, ret);
1570		reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1571		++num_complex_braces;
1572	    }
1573	    if (minval > 0 && maxval > 0)
1574		*flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1575	    break;
1576    }
1577    if (re_multi_type(peekchr()) != NOT_MULTI)
1578    {
1579	/* Can't have a multi follow a multi. */
1580	if (peekchr() == Magic('*'))
1581	    sprintf((char *)IObuff, _("E61: Nested %s*"),
1582					    reg_magic >= MAGIC_ON ? "" : "\\");
1583	else
1584	    sprintf((char *)IObuff, _("E62: Nested %s%c"),
1585		reg_magic == MAGIC_ALL ? "" : "\\", no_Magic(peekchr()));
1586	EMSG_RET_NULL(IObuff);
1587    }
1588
1589    return ret;
1590}
1591
1592/*
1593 * regatom - the lowest level
1594 *
1595 * Optimization:  gobbles an entire sequence of ordinary characters so that
1596 * it can turn them into a single node, which is smaller to store and
1597 * faster to run.  Don't do this when one_exactly is set.
1598 */
1599    static char_u *
1600regatom(flagp)
1601    int		   *flagp;
1602{
1603    char_u	    *ret;
1604    int		    flags;
1605    int		    cpo_lit;	    /* 'cpoptions' contains 'l' flag */
1606    int		    cpo_bsl;	    /* 'cpoptions' contains '\' flag */
1607    int		    c;
1608    static char_u   *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
1609    static int	    classcodes[] = {ANY, IDENT, SIDENT, KWORD, SKWORD,
1610				    FNAME, SFNAME, PRINT, SPRINT,
1611				    WHITE, NWHITE, DIGIT, NDIGIT,
1612				    HEX, NHEX, OCTAL, NOCTAL,
1613				    WORD, NWORD, HEAD, NHEAD,
1614				    ALPHA, NALPHA, LOWER, NLOWER,
1615				    UPPER, NUPPER
1616				    };
1617    char_u	    *p;
1618    int		    extra = 0;
1619
1620    *flagp = WORST;		/* Tentatively. */
1621    cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
1622    cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
1623
1624    c = getchr();
1625    switch (c)
1626    {
1627      case Magic('^'):
1628	ret = regnode(BOL);
1629	break;
1630
1631      case Magic('$'):
1632	ret = regnode(EOL);
1633#if defined(FEAT_SYN_HL) || defined(PROTO)
1634	had_eol = TRUE;
1635#endif
1636	break;
1637
1638      case Magic('<'):
1639	ret = regnode(BOW);
1640	break;
1641
1642      case Magic('>'):
1643	ret = regnode(EOW);
1644	break;
1645
1646      case Magic('_'):
1647	c = no_Magic(getchr());
1648	if (c == '^')		/* "\_^" is start-of-line */
1649	{
1650	    ret = regnode(BOL);
1651	    break;
1652	}
1653	if (c == '$')		/* "\_$" is end-of-line */
1654	{
1655	    ret = regnode(EOL);
1656#if defined(FEAT_SYN_HL) || defined(PROTO)
1657	    had_eol = TRUE;
1658#endif
1659	    break;
1660	}
1661
1662	extra = ADD_NL;
1663	*flagp |= HASNL;
1664
1665	/* "\_[" is character range plus newline */
1666	if (c == '[')
1667	    goto collection;
1668
1669	/* "\_x" is character class plus newline */
1670	/*FALLTHROUGH*/
1671
1672	/*
1673	 * Character classes.
1674	 */
1675      case Magic('.'):
1676      case Magic('i'):
1677      case Magic('I'):
1678      case Magic('k'):
1679      case Magic('K'):
1680      case Magic('f'):
1681      case Magic('F'):
1682      case Magic('p'):
1683      case Magic('P'):
1684      case Magic('s'):
1685      case Magic('S'):
1686      case Magic('d'):
1687      case Magic('D'):
1688      case Magic('x'):
1689      case Magic('X'):
1690      case Magic('o'):
1691      case Magic('O'):
1692      case Magic('w'):
1693      case Magic('W'):
1694      case Magic('h'):
1695      case Magic('H'):
1696      case Magic('a'):
1697      case Magic('A'):
1698      case Magic('l'):
1699      case Magic('L'):
1700      case Magic('u'):
1701      case Magic('U'):
1702	p = vim_strchr(classchars, no_Magic(c));
1703	if (p == NULL)
1704	    EMSG_RET_NULL(_("E63: invalid use of \\_"));
1705#ifdef FEAT_MBYTE
1706	/* When '.' is followed by a composing char ignore the dot, so that
1707	 * the composing char is matched here. */
1708	if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
1709	{
1710	    c = getchr();
1711	    goto do_multibyte;
1712	}
1713#endif
1714	ret = regnode(classcodes[p - classchars] + extra);
1715	*flagp |= HASWIDTH | SIMPLE;
1716	break;
1717
1718      case Magic('n'):
1719	if (reg_string)
1720	{
1721	    /* In a string "\n" matches a newline character. */
1722	    ret = regnode(EXACTLY);
1723	    regc(NL);
1724	    regc(NUL);
1725	    *flagp |= HASWIDTH | SIMPLE;
1726	}
1727	else
1728	{
1729	    /* In buffer text "\n" matches the end of a line. */
1730	    ret = regnode(NEWL);
1731	    *flagp |= HASWIDTH | HASNL;
1732	}
1733	break;
1734
1735      case Magic('('):
1736	if (one_exactly)
1737	    EMSG_ONE_RET_NULL;
1738	ret = reg(REG_PAREN, &flags);
1739	if (ret == NULL)
1740	    return NULL;
1741	*flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
1742	break;
1743
1744      case NUL:
1745      case Magic('|'):
1746      case Magic('&'):
1747      case Magic(')'):
1748	if (one_exactly)
1749	    EMSG_ONE_RET_NULL;
1750	EMSG_RET_NULL(_(e_internal));	/* Supposed to be caught earlier. */
1751	/* NOTREACHED */
1752
1753      case Magic('='):
1754      case Magic('?'):
1755      case Magic('+'):
1756      case Magic('@'):
1757      case Magic('{'):
1758      case Magic('*'):
1759	c = no_Magic(c);
1760	sprintf((char *)IObuff, _("E64: %s%c follows nothing"),
1761		(c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL)
1762		? "" : "\\", c);
1763	EMSG_RET_NULL(IObuff);
1764	/* NOTREACHED */
1765
1766      case Magic('~'):		/* previous substitute pattern */
1767	    if (reg_prev_sub != NULL)
1768	    {
1769		char_u	    *lp;
1770
1771		ret = regnode(EXACTLY);
1772		lp = reg_prev_sub;
1773		while (*lp != NUL)
1774		    regc(*lp++);
1775		regc(NUL);
1776		if (*reg_prev_sub != NUL)
1777		{
1778		    *flagp |= HASWIDTH;
1779		    if ((lp - reg_prev_sub) == 1)
1780			*flagp |= SIMPLE;
1781		}
1782	    }
1783	    else
1784		EMSG_RET_NULL(_(e_nopresub));
1785	    break;
1786
1787      case Magic('1'):
1788      case Magic('2'):
1789      case Magic('3'):
1790      case Magic('4'):
1791      case Magic('5'):
1792      case Magic('6'):
1793      case Magic('7'):
1794      case Magic('8'):
1795      case Magic('9'):
1796	    {
1797		int		    refnum;
1798
1799		refnum = c - Magic('0');
1800		/*
1801		 * Check if the back reference is legal. We must have seen the
1802		 * close brace.
1803		 * TODO: Should also check that we don't refer to something
1804		 * that is repeated (+*=): what instance of the repetition
1805		 * should we match?
1806		 */
1807		if (!had_endbrace[refnum])
1808		{
1809		    /* Trick: check if "@<=" or "@<!" follows, in which case
1810		     * the \1 can appear before the referenced match. */
1811		    for (p = regparse; *p != NUL; ++p)
1812			if (p[0] == '@' && p[1] == '<'
1813					      && (p[2] == '!' || p[2] == '='))
1814			    break;
1815		    if (*p == NUL)
1816			EMSG_RET_NULL(_("E65: Illegal back reference"));
1817		}
1818		ret = regnode(BACKREF + refnum);
1819	    }
1820	    break;
1821
1822      case Magic('z'):
1823	{
1824	    c = no_Magic(getchr());
1825	    switch (c)
1826	    {
1827#ifdef FEAT_SYN_HL
1828		case '(': if (reg_do_extmatch != REX_SET)
1829			      EMSG_RET_NULL(_("E66: \\z( not allowed here"));
1830			  if (one_exactly)
1831			      EMSG_ONE_RET_NULL;
1832			  ret = reg(REG_ZPAREN, &flags);
1833			  if (ret == NULL)
1834			      return NULL;
1835			  *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH);
1836			  re_has_z = REX_SET;
1837			  break;
1838
1839		case '1':
1840		case '2':
1841		case '3':
1842		case '4':
1843		case '5':
1844		case '6':
1845		case '7':
1846		case '8':
1847		case '9': if (reg_do_extmatch != REX_USE)
1848			      EMSG_RET_NULL(_("E67: \\z1 et al. not allowed here"));
1849			  ret = regnode(ZREF + c - '0');
1850			  re_has_z = REX_USE;
1851			  break;
1852#endif
1853
1854		case 's': ret = regnode(MOPEN + 0);
1855			  break;
1856
1857		case 'e': ret = regnode(MCLOSE + 0);
1858			  break;
1859
1860		default:  EMSG_RET_NULL(_("E68: Invalid character after \\z"));
1861	    }
1862	}
1863	break;
1864
1865      case Magic('%'):
1866	{
1867	    c = no_Magic(getchr());
1868	    switch (c)
1869	    {
1870		/* () without a back reference */
1871		case '(':
1872		    if (one_exactly)
1873			EMSG_ONE_RET_NULL;
1874		    ret = reg(REG_NPAREN, &flags);
1875		    if (ret == NULL)
1876			return NULL;
1877		    *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
1878		    break;
1879
1880		/* Catch \%^ and \%$ regardless of where they appear in the
1881		 * pattern -- regardless of whether or not it makes sense. */
1882		case '^':
1883		    ret = regnode(RE_BOF);
1884		    break;
1885
1886		case '$':
1887		    ret = regnode(RE_EOF);
1888		    break;
1889
1890		case '#':
1891		    ret = regnode(CURSOR);
1892		    break;
1893
1894		case 'V':
1895		    ret = regnode(RE_VISUAL);
1896		    break;
1897
1898		/* \%[abc]: Emit as a list of branches, all ending at the last
1899		 * branch which matches nothing. */
1900		case '[':
1901			  if (one_exactly)	/* doesn't nest */
1902			      EMSG_ONE_RET_NULL;
1903			  {
1904			      char_u	*lastbranch;
1905			      char_u	*lastnode = NULL;
1906			      char_u	*br;
1907
1908			      ret = NULL;
1909			      while ((c = getchr()) != ']')
1910			      {
1911				  if (c == NUL)
1912				      EMSG_M_RET_NULL(_("E69: Missing ] after %s%%["),
1913						      reg_magic == MAGIC_ALL);
1914				  br = regnode(BRANCH);
1915				  if (ret == NULL)
1916				      ret = br;
1917				  else
1918				      regtail(lastnode, br);
1919
1920				  ungetchr();
1921				  one_exactly = TRUE;
1922				  lastnode = regatom(flagp);
1923				  one_exactly = FALSE;
1924				  if (lastnode == NULL)
1925				      return NULL;
1926			      }
1927			      if (ret == NULL)
1928				  EMSG_M_RET_NULL(_("E70: Empty %s%%[]"),
1929						      reg_magic == MAGIC_ALL);
1930			      lastbranch = regnode(BRANCH);
1931			      br = regnode(NOTHING);
1932			      if (ret != JUST_CALC_SIZE)
1933			      {
1934				  regtail(lastnode, br);
1935				  regtail(lastbranch, br);
1936				  /* connect all branches to the NOTHING
1937				   * branch at the end */
1938				  for (br = ret; br != lastnode; )
1939				  {
1940				      if (OP(br) == BRANCH)
1941				      {
1942					  regtail(br, lastbranch);
1943					  br = OPERAND(br);
1944				      }
1945				      else
1946					  br = regnext(br);
1947				  }
1948			      }
1949			      *flagp &= ~(HASWIDTH | SIMPLE);
1950			      break;
1951			  }
1952
1953		case 'd':   /* %d123 decimal */
1954		case 'o':   /* %o123 octal */
1955		case 'x':   /* %xab hex 2 */
1956		case 'u':   /* %uabcd hex 4 */
1957		case 'U':   /* %U1234abcd hex 8 */
1958			  {
1959			      int i;
1960
1961			      switch (c)
1962			      {
1963				  case 'd': i = getdecchrs(); break;
1964				  case 'o': i = getoctchrs(); break;
1965				  case 'x': i = gethexchrs(2); break;
1966				  case 'u': i = gethexchrs(4); break;
1967				  case 'U': i = gethexchrs(8); break;
1968				  default:  i = -1; break;
1969			      }
1970
1971			      if (i < 0)
1972				  EMSG_M_RET_NULL(
1973					_("E678: Invalid character after %s%%[dxouU]"),
1974					reg_magic == MAGIC_ALL);
1975#ifdef FEAT_MBYTE
1976			      if (use_multibytecode(i))
1977				  ret = regnode(MULTIBYTECODE);
1978			      else
1979#endif
1980				  ret = regnode(EXACTLY);
1981			      if (i == 0)
1982				  regc(0x0a);
1983			      else
1984#ifdef FEAT_MBYTE
1985				  regmbc(i);
1986#else
1987				  regc(i);
1988#endif
1989			      regc(NUL);
1990			      *flagp |= HASWIDTH;
1991			      break;
1992			  }
1993
1994		default:
1995			  if (VIM_ISDIGIT(c) || c == '<' || c == '>'
1996								 || c == '\'')
1997			  {
1998			      long_u	n = 0;
1999			      int	cmp;
2000
2001			      cmp = c;
2002			      if (cmp == '<' || cmp == '>')
2003				  c = getchr();
2004			      while (VIM_ISDIGIT(c))
2005			      {
2006				  n = n * 10 + (c - '0');
2007				  c = getchr();
2008			      }
2009			      if (c == '\'' && n == 0)
2010			      {
2011				  /* "\%'m", "\%<'m" and "\%>'m": Mark */
2012				  c = getchr();
2013				  ret = regnode(RE_MARK);
2014				  if (ret == JUST_CALC_SIZE)
2015				      regsize += 2;
2016				  else
2017				  {
2018				      *regcode++ = c;
2019				      *regcode++ = cmp;
2020				  }
2021				  break;
2022			      }
2023			      else if (c == 'l' || c == 'c' || c == 'v')
2024			      {
2025				  if (c == 'l')
2026				      ret = regnode(RE_LNUM);
2027				  else if (c == 'c')
2028				      ret = regnode(RE_COL);
2029				  else
2030				      ret = regnode(RE_VCOL);
2031				  if (ret == JUST_CALC_SIZE)
2032				      regsize += 5;
2033				  else
2034				  {
2035				      /* put the number and the optional
2036				       * comparator after the opcode */
2037				      regcode = re_put_long(regcode, n);
2038				      *regcode++ = cmp;
2039				  }
2040				  break;
2041			      }
2042			  }
2043
2044			  EMSG_M_RET_NULL(_("E71: Invalid character after %s%%"),
2045						      reg_magic == MAGIC_ALL);
2046	    }
2047	}
2048	break;
2049
2050      case Magic('['):
2051collection:
2052	{
2053	    char_u	*lp;
2054
2055	    /*
2056	     * If there is no matching ']', we assume the '[' is a normal
2057	     * character.  This makes 'incsearch' and ":help [" work.
2058	     */
2059	    lp = skip_anyof(regparse);
2060	    if (*lp == ']')	/* there is a matching ']' */
2061	    {
2062		int	startc = -1;	/* > 0 when next '-' is a range */
2063		int	endc;
2064
2065		/*
2066		 * In a character class, different parsing rules apply.
2067		 * Not even \ is special anymore, nothing is.
2068		 */
2069		if (*regparse == '^')	    /* Complement of range. */
2070		{
2071		    ret = regnode(ANYBUT + extra);
2072		    regparse++;
2073		}
2074		else
2075		    ret = regnode(ANYOF + extra);
2076
2077		/* At the start ']' and '-' mean the literal character. */
2078		if (*regparse == ']' || *regparse == '-')
2079		{
2080		    startc = *regparse;
2081		    regc(*regparse++);
2082		}
2083
2084		while (*regparse != NUL && *regparse != ']')
2085		{
2086		    if (*regparse == '-')
2087		    {
2088			++regparse;
2089			/* The '-' is not used for a range at the end and
2090			 * after or before a '\n'. */
2091			if (*regparse == ']' || *regparse == NUL
2092				|| startc == -1
2093				|| (regparse[0] == '\\' && regparse[1] == 'n'))
2094			{
2095			    regc('-');
2096			    startc = '-';	/* [--x] is a range */
2097			}
2098			else
2099			{
2100			    /* Also accept "a-[.z.]" */
2101			    endc = 0;
2102			    if (*regparse == '[')
2103				endc = get_coll_element(&regparse);
2104			    if (endc == 0)
2105			    {
2106#ifdef FEAT_MBYTE
2107				if (has_mbyte)
2108				    endc = mb_ptr2char_adv(&regparse);
2109				else
2110#endif
2111				    endc = *regparse++;
2112			    }
2113
2114			    /* Handle \o40, \x20 and \u20AC style sequences */
2115			    if (endc == '\\' && !cpo_lit && !cpo_bsl)
2116				endc = coll_get_char();
2117
2118			    if (startc > endc)
2119				EMSG_RET_NULL(_(e_invrange));
2120#ifdef FEAT_MBYTE
2121			    if (has_mbyte && ((*mb_char2len)(startc) > 1
2122						 || (*mb_char2len)(endc) > 1))
2123			    {
2124				/* Limit to a range of 256 chars */
2125				if (endc > startc + 256)
2126				    EMSG_RET_NULL(_(e_invrange));
2127				while (++startc <= endc)
2128				    regmbc(startc);
2129			    }
2130			    else
2131#endif
2132			    {
2133#ifdef EBCDIC
2134				int	alpha_only = FALSE;
2135
2136				/* for alphabetical range skip the gaps
2137				 * 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'.  */
2138				if (isalpha(startc) && isalpha(endc))
2139				    alpha_only = TRUE;
2140#endif
2141				while (++startc <= endc)
2142#ifdef EBCDIC
2143				    if (!alpha_only || isalpha(startc))
2144#endif
2145					regc(startc);
2146			    }
2147			    startc = -1;
2148			}
2149		    }
2150		    /*
2151		     * Only "\]", "\^", "\]" and "\\" are special in Vi.  Vim
2152		     * accepts "\t", "\e", etc., but only when the 'l' flag in
2153		     * 'cpoptions' is not included.
2154		     * Posix doesn't recognize backslash at all.
2155		     */
2156		    else if (*regparse == '\\'
2157			    && !cpo_bsl
2158			    && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
2159				|| (!cpo_lit
2160				    && vim_strchr(REGEXP_ABBR,
2161						       regparse[1]) != NULL)))
2162		    {
2163			regparse++;
2164			if (*regparse == 'n')
2165			{
2166			    /* '\n' in range: also match NL */
2167			    if (ret != JUST_CALC_SIZE)
2168			    {
2169				if (*ret == ANYBUT)
2170				    *ret = ANYBUT + ADD_NL;
2171				else if (*ret == ANYOF)
2172				    *ret = ANYOF + ADD_NL;
2173				/* else: must have had a \n already */
2174			    }
2175			    *flagp |= HASNL;
2176			    regparse++;
2177			    startc = -1;
2178			}
2179			else if (*regparse == 'd'
2180				|| *regparse == 'o'
2181				|| *regparse == 'x'
2182				|| *regparse == 'u'
2183				|| *regparse == 'U')
2184			{
2185			    startc = coll_get_char();
2186			    if (startc == 0)
2187				regc(0x0a);
2188			    else
2189#ifdef FEAT_MBYTE
2190				regmbc(startc);
2191#else
2192				regc(startc);
2193#endif
2194			}
2195			else
2196			{
2197			    startc = backslash_trans(*regparse++);
2198			    regc(startc);
2199			}
2200		    }
2201		    else if (*regparse == '[')
2202		    {
2203			int c_class;
2204			int cu;
2205
2206			c_class = get_char_class(&regparse);
2207			startc = -1;
2208			/* Characters assumed to be 8 bits! */
2209			switch (c_class)
2210			{
2211			    case CLASS_NONE:
2212				c_class = get_equi_class(&regparse);
2213				if (c_class != 0)
2214				{
2215				    /* produce equivalence class */
2216				    reg_equi_class(c_class);
2217				}
2218				else if ((c_class =
2219					    get_coll_element(&regparse)) != 0)
2220				{
2221				    /* produce a collating element */
2222				    regmbc(c_class);
2223				}
2224				else
2225				{
2226				    /* literal '[', allow [[-x] as a range */
2227				    startc = *regparse++;
2228				    regc(startc);
2229				}
2230				break;
2231			    case CLASS_ALNUM:
2232				for (cu = 1; cu <= 255; cu++)
2233				    if (isalnum(cu))
2234					regc(cu);
2235				break;
2236			    case CLASS_ALPHA:
2237				for (cu = 1; cu <= 255; cu++)
2238				    if (isalpha(cu))
2239					regc(cu);
2240				break;
2241			    case CLASS_BLANK:
2242				regc(' ');
2243				regc('\t');
2244				break;
2245			    case CLASS_CNTRL:
2246				for (cu = 1; cu <= 255; cu++)
2247				    if (iscntrl(cu))
2248					regc(cu);
2249				break;
2250			    case CLASS_DIGIT:
2251				for (cu = 1; cu <= 255; cu++)
2252				    if (VIM_ISDIGIT(cu))
2253					regc(cu);
2254				break;
2255			    case CLASS_GRAPH:
2256				for (cu = 1; cu <= 255; cu++)
2257				    if (isgraph(cu))
2258					regc(cu);
2259				break;
2260			    case CLASS_LOWER:
2261				for (cu = 1; cu <= 255; cu++)
2262				    if (MB_ISLOWER(cu))
2263					regc(cu);
2264				break;
2265			    case CLASS_PRINT:
2266				for (cu = 1; cu <= 255; cu++)
2267				    if (vim_isprintc(cu))
2268					regc(cu);
2269				break;
2270			    case CLASS_PUNCT:
2271				for (cu = 1; cu <= 255; cu++)
2272				    if (ispunct(cu))
2273					regc(cu);
2274				break;
2275			    case CLASS_SPACE:
2276				for (cu = 9; cu <= 13; cu++)
2277				    regc(cu);
2278				regc(' ');
2279				break;
2280			    case CLASS_UPPER:
2281				for (cu = 1; cu <= 255; cu++)
2282				    if (MB_ISUPPER(cu))
2283					regc(cu);
2284				break;
2285			    case CLASS_XDIGIT:
2286				for (cu = 1; cu <= 255; cu++)
2287				    if (vim_isxdigit(cu))
2288					regc(cu);
2289				break;
2290			    case CLASS_TAB:
2291				regc('\t');
2292				break;
2293			    case CLASS_RETURN:
2294				regc('\r');
2295				break;
2296			    case CLASS_BACKSPACE:
2297				regc('\b');
2298				break;
2299			    case CLASS_ESCAPE:
2300				regc('\033');
2301				break;
2302			}
2303		    }
2304		    else
2305		    {
2306#ifdef FEAT_MBYTE
2307			if (has_mbyte)
2308			{
2309			    int	len;
2310
2311			    /* produce a multibyte character, including any
2312			     * following composing characters */
2313			    startc = mb_ptr2char(regparse);
2314			    len = (*mb_ptr2len)(regparse);
2315			    if (enc_utf8 && utf_char2len(startc) != len)
2316				startc = -1;	/* composing chars */
2317			    while (--len >= 0)
2318				regc(*regparse++);
2319			}
2320			else
2321#endif
2322			{
2323			    startc = *regparse++;
2324			    regc(startc);
2325			}
2326		    }
2327		}
2328		regc(NUL);
2329		prevchr_len = 1;	/* last char was the ']' */
2330		if (*regparse != ']')
2331		    EMSG_RET_NULL(_(e_toomsbra));	/* Cannot happen? */
2332		skipchr();	    /* let's be friends with the lexer again */
2333		*flagp |= HASWIDTH | SIMPLE;
2334		break;
2335	    }
2336	    else if (reg_strict)
2337		EMSG_M_RET_NULL(_("E769: Missing ] after %s["),
2338						       reg_magic > MAGIC_OFF);
2339	}
2340	/* FALLTHROUGH */
2341
2342      default:
2343	{
2344	    int		len;
2345
2346#ifdef FEAT_MBYTE
2347	    /* A multi-byte character is handled as a separate atom if it's
2348	     * before a multi and when it's a composing char. */
2349	    if (use_multibytecode(c))
2350	    {
2351do_multibyte:
2352		ret = regnode(MULTIBYTECODE);
2353		regmbc(c);
2354		*flagp |= HASWIDTH | SIMPLE;
2355		break;
2356	    }
2357#endif
2358
2359	    ret = regnode(EXACTLY);
2360
2361	    /*
2362	     * Append characters as long as:
2363	     * - there is no following multi, we then need the character in
2364	     *   front of it as a single character operand
2365	     * - not running into a Magic character
2366	     * - "one_exactly" is not set
2367	     * But always emit at least one character.  Might be a Multi,
2368	     * e.g., a "[" without matching "]".
2369	     */
2370	    for (len = 0; c != NUL && (len == 0
2371			|| (re_multi_type(peekchr()) == NOT_MULTI
2372			    && !one_exactly
2373			    && !is_Magic(c))); ++len)
2374	    {
2375		c = no_Magic(c);
2376#ifdef FEAT_MBYTE
2377		if (has_mbyte)
2378		{
2379		    regmbc(c);
2380		    if (enc_utf8)
2381		    {
2382			int	l;
2383
2384			/* Need to get composing character too. */
2385			for (;;)
2386			{
2387			    l = utf_ptr2len(regparse);
2388			    if (!UTF_COMPOSINGLIKE(regparse, regparse + l))
2389				break;
2390			    regmbc(utf_ptr2char(regparse));
2391			    skipchr();
2392			}
2393		    }
2394		}
2395		else
2396#endif
2397		    regc(c);
2398		c = getchr();
2399	    }
2400	    ungetchr();
2401
2402	    regc(NUL);
2403	    *flagp |= HASWIDTH;
2404	    if (len == 1)
2405		*flagp |= SIMPLE;
2406	}
2407	break;
2408    }
2409
2410    return ret;
2411}
2412
2413#ifdef FEAT_MBYTE
2414/*
2415 * Return TRUE if MULTIBYTECODE should be used instead of EXACTLY for
2416 * character "c".
2417 */
2418    static int
2419use_multibytecode(c)
2420    int c;
2421{
2422    return has_mbyte && (*mb_char2len)(c) > 1
2423		     && (re_multi_type(peekchr()) != NOT_MULTI
2424			     || (enc_utf8 && utf_iscomposing(c)));
2425}
2426#endif
2427
2428/*
2429 * emit a node
2430 * Return pointer to generated code.
2431 */
2432    static char_u *
2433regnode(op)
2434    int		op;
2435{
2436    char_u  *ret;
2437
2438    ret = regcode;
2439    if (ret == JUST_CALC_SIZE)
2440	regsize += 3;
2441    else
2442    {
2443	*regcode++ = op;
2444	*regcode++ = NUL;		/* Null "next" pointer. */
2445	*regcode++ = NUL;
2446    }
2447    return ret;
2448}
2449
2450/*
2451 * Emit (if appropriate) a byte of code
2452 */
2453    static void
2454regc(b)
2455    int		b;
2456{
2457    if (regcode == JUST_CALC_SIZE)
2458	regsize++;
2459    else
2460	*regcode++ = b;
2461}
2462
2463#ifdef FEAT_MBYTE
2464/*
2465 * Emit (if appropriate) a multi-byte character of code
2466 */
2467    static void
2468regmbc(c)
2469    int		c;
2470{
2471    if (regcode == JUST_CALC_SIZE)
2472	regsize += (*mb_char2len)(c);
2473    else
2474	regcode += (*mb_char2bytes)(c, regcode);
2475}
2476#endif
2477
2478/*
2479 * reginsert - insert an operator in front of already-emitted operand
2480 *
2481 * Means relocating the operand.
2482 */
2483    static void
2484reginsert(op, opnd)
2485    int		op;
2486    char_u     *opnd;
2487{
2488    char_u	*src;
2489    char_u	*dst;
2490    char_u	*place;
2491
2492    if (regcode == JUST_CALC_SIZE)
2493    {
2494	regsize += 3;
2495	return;
2496    }
2497    src = regcode;
2498    regcode += 3;
2499    dst = regcode;
2500    while (src > opnd)
2501	*--dst = *--src;
2502
2503    place = opnd;		/* Op node, where operand used to be. */
2504    *place++ = op;
2505    *place++ = NUL;
2506    *place = NUL;
2507}
2508
2509/*
2510 * reginsert_limits - insert an operator in front of already-emitted operand.
2511 * The operator has the given limit values as operands.  Also set next pointer.
2512 *
2513 * Means relocating the operand.
2514 */
2515    static void
2516reginsert_limits(op, minval, maxval, opnd)
2517    int		op;
2518    long	minval;
2519    long	maxval;
2520    char_u	*opnd;
2521{
2522    char_u	*src;
2523    char_u	*dst;
2524    char_u	*place;
2525
2526    if (regcode == JUST_CALC_SIZE)
2527    {
2528	regsize += 11;
2529	return;
2530    }
2531    src = regcode;
2532    regcode += 11;
2533    dst = regcode;
2534    while (src > opnd)
2535	*--dst = *--src;
2536
2537    place = opnd;		/* Op node, where operand used to be. */
2538    *place++ = op;
2539    *place++ = NUL;
2540    *place++ = NUL;
2541    place = re_put_long(place, (long_u)minval);
2542    place = re_put_long(place, (long_u)maxval);
2543    regtail(opnd, place);
2544}
2545
2546/*
2547 * Write a long as four bytes at "p" and return pointer to the next char.
2548 */
2549    static char_u *
2550re_put_long(p, val)
2551    char_u	*p;
2552    long_u	val;
2553{
2554    *p++ = (char_u) ((val >> 24) & 0377);
2555    *p++ = (char_u) ((val >> 16) & 0377);
2556    *p++ = (char_u) ((val >> 8) & 0377);
2557    *p++ = (char_u) (val & 0377);
2558    return p;
2559}
2560
2561/*
2562 * regtail - set the next-pointer at the end of a node chain
2563 */
2564    static void
2565regtail(p, val)
2566    char_u	*p;
2567    char_u	*val;
2568{
2569    char_u	*scan;
2570    char_u	*temp;
2571    int		offset;
2572
2573    if (p == JUST_CALC_SIZE)
2574	return;
2575
2576    /* Find last node. */
2577    scan = p;
2578    for (;;)
2579    {
2580	temp = regnext(scan);
2581	if (temp == NULL)
2582	    break;
2583	scan = temp;
2584    }
2585
2586    if (OP(scan) == BACK)
2587	offset = (int)(scan - val);
2588    else
2589	offset = (int)(val - scan);
2590    /* When the offset uses more than 16 bits it can no longer fit in the two
2591     * bytes avaliable.  Use a global flag to avoid having to check return
2592     * values in too many places. */
2593    if (offset > 0xffff)
2594	reg_toolong = TRUE;
2595    else
2596    {
2597	*(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
2598	*(scan + 2) = (char_u) (offset & 0377);
2599    }
2600}
2601
2602/*
2603 * regoptail - regtail on item after a BRANCH; nop if none
2604 */
2605    static void
2606regoptail(p, val)
2607    char_u	*p;
2608    char_u	*val;
2609{
2610    /* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
2611    if (p == NULL || p == JUST_CALC_SIZE
2612	    || (OP(p) != BRANCH
2613		&& (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9)))
2614	return;
2615    regtail(OPERAND(p), val);
2616}
2617
2618/*
2619 * getchr() - get the next character from the pattern. We know about
2620 * magic and such, so therefore we need a lexical analyzer.
2621 */
2622
2623/* static int	    curchr; */
2624static int	prevprevchr;
2625static int	prevchr;
2626static int	nextchr;    /* used for ungetchr() */
2627/*
2628 * Note: prevchr is sometimes -1 when we are not at the start,
2629 * eg in /[ ^I]^ the pattern was never found even if it existed, because ^ was
2630 * taken to be magic -- webb
2631 */
2632static int	at_start;	/* True when on the first character */
2633static int	prev_at_start;  /* True when on the second character */
2634
2635    static void
2636initchr(str)
2637    char_u *str;
2638{
2639    regparse = str;
2640    prevchr_len = 0;
2641    curchr = prevprevchr = prevchr = nextchr = -1;
2642    at_start = TRUE;
2643    prev_at_start = FALSE;
2644}
2645
2646    static int
2647peekchr()
2648{
2649    static int	after_slash = FALSE;
2650
2651    if (curchr == -1)
2652    {
2653	switch (curchr = regparse[0])
2654	{
2655	case '.':
2656	case '[':
2657	case '~':
2658	    /* magic when 'magic' is on */
2659	    if (reg_magic >= MAGIC_ON)
2660		curchr = Magic(curchr);
2661	    break;
2662	case '(':
2663	case ')':
2664	case '{':
2665	case '%':
2666	case '+':
2667	case '=':
2668	case '?':
2669	case '@':
2670	case '!':
2671	case '&':
2672	case '|':
2673	case '<':
2674	case '>':
2675	case '#':	/* future ext. */
2676	case '"':	/* future ext. */
2677	case '\'':	/* future ext. */
2678	case ',':	/* future ext. */
2679	case '-':	/* future ext. */
2680	case ':':	/* future ext. */
2681	case ';':	/* future ext. */
2682	case '`':	/* future ext. */
2683	case '/':	/* Can't be used in / command */
2684	    /* magic only after "\v" */
2685	    if (reg_magic == MAGIC_ALL)
2686		curchr = Magic(curchr);
2687	    break;
2688	case '*':
2689	    /* * is not magic as the very first character, eg "?*ptr", when
2690	     * after '^', eg "/^*ptr" and when after "\(", "\|", "\&".  But
2691	     * "\(\*" is not magic, thus must be magic if "after_slash" */
2692	    if (reg_magic >= MAGIC_ON
2693		    && !at_start
2694		    && !(prev_at_start && prevchr == Magic('^'))
2695		    && (after_slash
2696			|| (prevchr != Magic('(')
2697			    && prevchr != Magic('&')
2698			    && prevchr != Magic('|'))))
2699		curchr = Magic('*');
2700	    break;
2701	case '^':
2702	    /* '^' is only magic as the very first character and if it's after
2703	     * "\(", "\|", "\&' or "\n" */
2704	    if (reg_magic >= MAGIC_OFF
2705		    && (at_start
2706			|| reg_magic == MAGIC_ALL
2707			|| prevchr == Magic('(')
2708			|| prevchr == Magic('|')
2709			|| prevchr == Magic('&')
2710			|| prevchr == Magic('n')
2711			|| (no_Magic(prevchr) == '('
2712			    && prevprevchr == Magic('%'))))
2713	    {
2714		curchr = Magic('^');
2715		at_start = TRUE;
2716		prev_at_start = FALSE;
2717	    }
2718	    break;
2719	case '$':
2720	    /* '$' is only magic as the very last char and if it's in front of
2721	     * either "\|", "\)", "\&", or "\n" */
2722	    if (reg_magic >= MAGIC_OFF)
2723	    {
2724		char_u *p = regparse + 1;
2725
2726		/* ignore \c \C \m and \M after '$' */
2727		while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
2728				|| p[1] == 'm' || p[1] == 'M' || p[1] == 'Z'))
2729		    p += 2;
2730		if (p[0] == NUL
2731			|| (p[0] == '\\'
2732			    && (p[1] == '|' || p[1] == '&' || p[1] == ')'
2733				|| p[1] == 'n'))
2734			|| reg_magic == MAGIC_ALL)
2735		    curchr = Magic('$');
2736	    }
2737	    break;
2738	case '\\':
2739	    {
2740		int c = regparse[1];
2741
2742		if (c == NUL)
2743		    curchr = '\\';	/* trailing '\' */
2744		else if (
2745#ifdef EBCDIC
2746			vim_strchr(META, c)
2747#else
2748			c <= '~' && META_flags[c]
2749#endif
2750			)
2751		{
2752		    /*
2753		     * META contains everything that may be magic sometimes,
2754		     * except ^ and $ ("\^" and "\$" are only magic after
2755		     * "\v").  We now fetch the next character and toggle its
2756		     * magicness.  Therefore, \ is so meta-magic that it is
2757		     * not in META.
2758		     */
2759		    curchr = -1;
2760		    prev_at_start = at_start;
2761		    at_start = FALSE;	/* be able to say "/\*ptr" */
2762		    ++regparse;
2763		    ++after_slash;
2764		    peekchr();
2765		    --regparse;
2766		    --after_slash;
2767		    curchr = toggle_Magic(curchr);
2768		}
2769		else if (vim_strchr(REGEXP_ABBR, c))
2770		{
2771		    /*
2772		     * Handle abbreviations, like "\t" for TAB -- webb
2773		     */
2774		    curchr = backslash_trans(c);
2775		}
2776		else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
2777		    curchr = toggle_Magic(c);
2778		else
2779		{
2780		    /*
2781		     * Next character can never be (made) magic?
2782		     * Then backslashing it won't do anything.
2783		     */
2784#ifdef FEAT_MBYTE
2785		    if (has_mbyte)
2786			curchr = (*mb_ptr2char)(regparse + 1);
2787		    else
2788#endif
2789			curchr = c;
2790		}
2791		break;
2792	    }
2793
2794#ifdef FEAT_MBYTE
2795	default:
2796	    if (has_mbyte)
2797		curchr = (*mb_ptr2char)(regparse);
2798#endif
2799	}
2800    }
2801
2802    return curchr;
2803}
2804
2805/*
2806 * Eat one lexed character.  Do this in a way that we can undo it.
2807 */
2808    static void
2809skipchr()
2810{
2811    /* peekchr() eats a backslash, do the same here */
2812    if (*regparse == '\\')
2813	prevchr_len = 1;
2814    else
2815	prevchr_len = 0;
2816    if (regparse[prevchr_len] != NUL)
2817    {
2818#ifdef FEAT_MBYTE
2819	if (enc_utf8)
2820	    /* exclude composing chars that mb_ptr2len does include */
2821	    prevchr_len += utf_ptr2len(regparse + prevchr_len);
2822	else if (has_mbyte)
2823	    prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
2824	else
2825#endif
2826	    ++prevchr_len;
2827    }
2828    regparse += prevchr_len;
2829    prev_at_start = at_start;
2830    at_start = FALSE;
2831    prevprevchr = prevchr;
2832    prevchr = curchr;
2833    curchr = nextchr;	    /* use previously unget char, or -1 */
2834    nextchr = -1;
2835}
2836
2837/*
2838 * Skip a character while keeping the value of prev_at_start for at_start.
2839 * prevchr and prevprevchr are also kept.
2840 */
2841    static void
2842skipchr_keepstart()
2843{
2844    int as = prev_at_start;
2845    int pr = prevchr;
2846    int prpr = prevprevchr;
2847
2848    skipchr();
2849    at_start = as;
2850    prevchr = pr;
2851    prevprevchr = prpr;
2852}
2853
2854    static int
2855getchr()
2856{
2857    int chr = peekchr();
2858
2859    skipchr();
2860    return chr;
2861}
2862
2863/*
2864 * put character back.  Works only once!
2865 */
2866    static void
2867ungetchr()
2868{
2869    nextchr = curchr;
2870    curchr = prevchr;
2871    prevchr = prevprevchr;
2872    at_start = prev_at_start;
2873    prev_at_start = FALSE;
2874
2875    /* Backup regparse, so that it's at the same position as before the
2876     * getchr(). */
2877    regparse -= prevchr_len;
2878}
2879
2880/*
2881 * Get and return the value of the hex string at the current position.
2882 * Return -1 if there is no valid hex number.
2883 * The position is updated:
2884 *     blahblah\%x20asdf
2885 *	   before-^ ^-after
2886 * The parameter controls the maximum number of input characters. This will be
2887 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
2888 */
2889    static int
2890gethexchrs(maxinputlen)
2891    int		maxinputlen;
2892{
2893    int		nr = 0;
2894    int		c;
2895    int		i;
2896
2897    for (i = 0; i < maxinputlen; ++i)
2898    {
2899	c = regparse[0];
2900	if (!vim_isxdigit(c))
2901	    break;
2902	nr <<= 4;
2903	nr |= hex2nr(c);
2904	++regparse;
2905    }
2906
2907    if (i == 0)
2908	return -1;
2909    return nr;
2910}
2911
2912/*
2913 * get and return the value of the decimal string immediately after the
2914 * current position. Return -1 for invalid.  Consumes all digits.
2915 */
2916    static int
2917getdecchrs()
2918{
2919    int		nr = 0;
2920    int		c;
2921    int		i;
2922
2923    for (i = 0; ; ++i)
2924    {
2925	c = regparse[0];
2926	if (c < '0' || c > '9')
2927	    break;
2928	nr *= 10;
2929	nr += c - '0';
2930	++regparse;
2931    }
2932
2933    if (i == 0)
2934	return -1;
2935    return nr;
2936}
2937
2938/*
2939 * get and return the value of the octal string immediately after the current
2940 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
2941 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
2942 * treat 8 or 9 as recognised characters. Position is updated:
2943 *     blahblah\%o210asdf
2944 *	   before-^  ^-after
2945 */
2946    static int
2947getoctchrs()
2948{
2949    int		nr = 0;
2950    int		c;
2951    int		i;
2952
2953    for (i = 0; i < 3 && nr < 040; ++i)
2954    {
2955	c = regparse[0];
2956	if (c < '0' || c > '7')
2957	    break;
2958	nr <<= 3;
2959	nr |= hex2nr(c);
2960	++regparse;
2961    }
2962
2963    if (i == 0)
2964	return -1;
2965    return nr;
2966}
2967
2968/*
2969 * Get a number after a backslash that is inside [].
2970 * When nothing is recognized return a backslash.
2971 */
2972    static int
2973coll_get_char()
2974{
2975    int	    nr = -1;
2976
2977    switch (*regparse++)
2978    {
2979	case 'd': nr = getdecchrs(); break;
2980	case 'o': nr = getoctchrs(); break;
2981	case 'x': nr = gethexchrs(2); break;
2982	case 'u': nr = gethexchrs(4); break;
2983	case 'U': nr = gethexchrs(8); break;
2984    }
2985    if (nr < 0)
2986    {
2987	/* If getting the number fails be backwards compatible: the character
2988	 * is a backslash. */
2989	--regparse;
2990	nr = '\\';
2991    }
2992    return nr;
2993}
2994
2995/*
2996 * read_limits - Read two integers to be taken as a minimum and maximum.
2997 * If the first character is '-', then the range is reversed.
2998 * Should end with 'end'.  If minval is missing, zero is default, if maxval is
2999 * missing, a very big number is the default.
3000 */
3001    static int
3002read_limits(minval, maxval)
3003    long	*minval;
3004    long	*maxval;
3005{
3006    int		reverse = FALSE;
3007    char_u	*first_char;
3008    long	tmp;
3009
3010    if (*regparse == '-')
3011    {
3012	/* Starts with '-', so reverse the range later */
3013	regparse++;
3014	reverse = TRUE;
3015    }
3016    first_char = regparse;
3017    *minval = getdigits(&regparse);
3018    if (*regparse == ',')	    /* There is a comma */
3019    {
3020	if (vim_isdigit(*++regparse))
3021	    *maxval = getdigits(&regparse);
3022	else
3023	    *maxval = MAX_LIMIT;
3024    }
3025    else if (VIM_ISDIGIT(*first_char))
3026	*maxval = *minval;	    /* It was \{n} or \{-n} */
3027    else
3028	*maxval = MAX_LIMIT;	    /* It was \{} or \{-} */
3029    if (*regparse == '\\')
3030	regparse++;	/* Allow either \{...} or \{...\} */
3031    if (*regparse != '}')
3032    {
3033	sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"),
3034					  reg_magic == MAGIC_ALL ? "" : "\\");
3035	EMSG_RET_FAIL(IObuff);
3036    }
3037
3038    /*
3039     * Reverse the range if there was a '-', or make sure it is in the right
3040     * order otherwise.
3041     */
3042    if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
3043    {
3044	tmp = *minval;
3045	*minval = *maxval;
3046	*maxval = tmp;
3047    }
3048    skipchr();		/* let's be friends with the lexer again */
3049    return OK;
3050}
3051
3052/*
3053 * vim_regexec and friends
3054 */
3055
3056/*
3057 * Global work variables for vim_regexec().
3058 */
3059
3060/* The current match-position is remembered with these variables: */
3061static linenr_T	reglnum;	/* line number, relative to first line */
3062static char_u	*regline;	/* start of current line */
3063static char_u	*reginput;	/* current input, points into "regline" */
3064
3065static int	need_clear_subexpr;	/* subexpressions still need to be
3066					 * cleared */
3067#ifdef FEAT_SYN_HL
3068static int	need_clear_zsubexpr = FALSE;	/* extmatch subexpressions
3069						 * still need to be cleared */
3070#endif
3071
3072/*
3073 * Structure used to save the current input state, when it needs to be
3074 * restored after trying a match.  Used by reg_save() and reg_restore().
3075 * Also stores the length of "backpos".
3076 */
3077typedef struct
3078{
3079    union
3080    {
3081	char_u	*ptr;	/* reginput pointer, for single-line regexp */
3082	lpos_T	pos;	/* reginput pos, for multi-line regexp */
3083    } rs_u;
3084    int		rs_len;
3085} regsave_T;
3086
3087/* struct to save start/end pointer/position in for \(\) */
3088typedef struct
3089{
3090    union
3091    {
3092	char_u	*ptr;
3093	lpos_T	pos;
3094    } se_u;
3095} save_se_T;
3096
3097/* used for BEHIND and NOBEHIND matching */
3098typedef struct regbehind_S
3099{
3100    regsave_T	save_after;
3101    regsave_T	save_behind;
3102    int		save_need_clear_subexpr;
3103    save_se_T   save_start[NSUBEXP];
3104    save_se_T   save_end[NSUBEXP];
3105} regbehind_T;
3106
3107static char_u	*reg_getline __ARGS((linenr_T lnum));
3108static long	vim_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm));
3109static long	regtry __ARGS((regprog_T *prog, colnr_T col));
3110static void	cleanup_subexpr __ARGS((void));
3111#ifdef FEAT_SYN_HL
3112static void	cleanup_zsubexpr __ARGS((void));
3113#endif
3114static void	save_subexpr __ARGS((regbehind_T *bp));
3115static void	restore_subexpr __ARGS((regbehind_T *bp));
3116static void	reg_nextline __ARGS((void));
3117static void	reg_save __ARGS((regsave_T *save, garray_T *gap));
3118static void	reg_restore __ARGS((regsave_T *save, garray_T *gap));
3119static int	reg_save_equal __ARGS((regsave_T *save));
3120static void	save_se_multi __ARGS((save_se_T *savep, lpos_T *posp));
3121static void	save_se_one __ARGS((save_se_T *savep, char_u **pp));
3122
3123/* Save the sub-expressions before attempting a match. */
3124#define save_se(savep, posp, pp) \
3125    REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
3126
3127/* After a failed match restore the sub-expressions. */
3128#define restore_se(savep, posp, pp) { \
3129    if (REG_MULTI) \
3130	*(posp) = (savep)->se_u.pos; \
3131    else \
3132	*(pp) = (savep)->se_u.ptr; }
3133
3134static int	re_num_cmp __ARGS((long_u val, char_u *scan));
3135static int	regmatch __ARGS((char_u *prog));
3136static int	regrepeat __ARGS((char_u *p, long maxcount));
3137
3138#ifdef DEBUG
3139int		regnarrate = 0;
3140#endif
3141
3142/*
3143 * Internal copy of 'ignorecase'.  It is set at each call to vim_regexec().
3144 * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
3145 * contains '\c' or '\C' the value is overruled.
3146 */
3147static int	ireg_ic;
3148
3149#ifdef FEAT_MBYTE
3150/*
3151 * Similar to ireg_ic, but only for 'combining' characters.  Set with \Z flag
3152 * in the regexp.  Defaults to false, always.
3153 */
3154static int	ireg_icombine;
3155#endif
3156
3157/*
3158 * Copy of "rmm_maxcol": maximum column to search for a match.  Zero when
3159 * there is no maximum.
3160 */
3161static colnr_T	ireg_maxcol;
3162
3163/*
3164 * Sometimes need to save a copy of a line.  Since alloc()/free() is very
3165 * slow, we keep one allocated piece of memory and only re-allocate it when
3166 * it's too small.  It's freed in vim_regexec_both() when finished.
3167 */
3168static char_u	*reg_tofree = NULL;
3169static unsigned	reg_tofreelen;
3170
3171/*
3172 * These variables are set when executing a regexp to speed up the execution.
3173 * Which ones are set depends on whether a single-line or multi-line match is
3174 * done:
3175 *			single-line		multi-line
3176 * reg_match		&regmatch_T		NULL
3177 * reg_mmatch		NULL			&regmmatch_T
3178 * reg_startp		reg_match->startp	<invalid>
3179 * reg_endp		reg_match->endp		<invalid>
3180 * reg_startpos		<invalid>		reg_mmatch->startpos
3181 * reg_endpos		<invalid>		reg_mmatch->endpos
3182 * reg_win		NULL			window in which to search
3183 * reg_buf		<invalid>		buffer in which to search
3184 * reg_firstlnum	<invalid>		first line in which to search
3185 * reg_maxline		0			last line nr
3186 * reg_line_lbr		FALSE or TRUE		FALSE
3187 */
3188static regmatch_T	*reg_match;
3189static regmmatch_T	*reg_mmatch;
3190static char_u		**reg_startp = NULL;
3191static char_u		**reg_endp = NULL;
3192static lpos_T		*reg_startpos = NULL;
3193static lpos_T		*reg_endpos = NULL;
3194static win_T		*reg_win;
3195static buf_T		*reg_buf;
3196static linenr_T		reg_firstlnum;
3197static linenr_T		reg_maxline;
3198static int		reg_line_lbr;	    /* "\n" in string is line break */
3199
3200/* Values for rs_state in regitem_T. */
3201typedef enum regstate_E
3202{
3203    RS_NOPEN = 0	/* NOPEN and NCLOSE */
3204    , RS_MOPEN		/* MOPEN + [0-9] */
3205    , RS_MCLOSE		/* MCLOSE + [0-9] */
3206#ifdef FEAT_SYN_HL
3207    , RS_ZOPEN		/* ZOPEN + [0-9] */
3208    , RS_ZCLOSE		/* ZCLOSE + [0-9] */
3209#endif
3210    , RS_BRANCH		/* BRANCH */
3211    , RS_BRCPLX_MORE	/* BRACE_COMPLEX and trying one more match */
3212    , RS_BRCPLX_LONG	/* BRACE_COMPLEX and trying longest match */
3213    , RS_BRCPLX_SHORT	/* BRACE_COMPLEX and trying shortest match */
3214    , RS_NOMATCH	/* NOMATCH */
3215    , RS_BEHIND1	/* BEHIND / NOBEHIND matching rest */
3216    , RS_BEHIND2	/* BEHIND / NOBEHIND matching behind part */
3217    , RS_STAR_LONG	/* STAR/PLUS/BRACE_SIMPLE longest match */
3218    , RS_STAR_SHORT	/* STAR/PLUS/BRACE_SIMPLE shortest match */
3219} regstate_T;
3220
3221/*
3222 * When there are alternatives a regstate_T is put on the regstack to remember
3223 * what we are doing.
3224 * Before it may be another type of item, depending on rs_state, to remember
3225 * more things.
3226 */
3227typedef struct regitem_S
3228{
3229    regstate_T	rs_state;	/* what we are doing, one of RS_ above */
3230    char_u	*rs_scan;	/* current node in program */
3231    union
3232    {
3233	save_se_T  sesave;
3234	regsave_T  regsave;
3235    } rs_un;			/* room for saving reginput */
3236    short	rs_no;		/* submatch nr or BEHIND/NOBEHIND */
3237} regitem_T;
3238
3239static regitem_T *regstack_push __ARGS((regstate_T state, char_u *scan));
3240static void regstack_pop __ARGS((char_u **scan));
3241
3242/* used for STAR, PLUS and BRACE_SIMPLE matching */
3243typedef struct regstar_S
3244{
3245    int		nextb;		/* next byte */
3246    int		nextb_ic;	/* next byte reverse case */
3247    long	count;
3248    long	minval;
3249    long	maxval;
3250} regstar_T;
3251
3252/* used to store input position when a BACK was encountered, so that we now if
3253 * we made any progress since the last time. */
3254typedef struct backpos_S
3255{
3256    char_u	*bp_scan;	/* "scan" where BACK was encountered */
3257    regsave_T	bp_pos;		/* last input position */
3258} backpos_T;
3259
3260/*
3261 * "regstack" and "backpos" are used by regmatch().  They are kept over calls
3262 * to avoid invoking malloc() and free() often.
3263 * "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T
3264 * or regbehind_T.
3265 * "backpos_T" is a table with backpos_T for BACK
3266 */
3267static garray_T	regstack = {0, 0, 0, 0, NULL};
3268static garray_T	backpos = {0, 0, 0, 0, NULL};
3269
3270/*
3271 * Both for regstack and backpos tables we use the following strategy of
3272 * allocation (to reduce malloc/free calls):
3273 * - Initial size is fairly small.
3274 * - When needed, the tables are grown bigger (8 times at first, double after
3275 *   that).
3276 * - After executing the match we free the memory only if the array has grown.
3277 *   Thus the memory is kept allocated when it's at the initial size.
3278 * This makes it fast while not keeping a lot of memory allocated.
3279 * A three times speed increase was observed when using many simple patterns.
3280 */
3281#define REGSTACK_INITIAL	2048
3282#define BACKPOS_INITIAL		64
3283
3284#if defined(EXITFREE) || defined(PROTO)
3285    void
3286free_regexp_stuff()
3287{
3288    ga_clear(&regstack);
3289    ga_clear(&backpos);
3290    vim_free(reg_tofree);
3291    vim_free(reg_prev_sub);
3292}
3293#endif
3294
3295/*
3296 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
3297 */
3298    static char_u *
3299reg_getline(lnum)
3300    linenr_T	lnum;
3301{
3302    /* when looking behind for a match/no-match lnum is negative.  But we
3303     * can't go before line 1 */
3304    if (reg_firstlnum + lnum < 1)
3305	return NULL;
3306    if (lnum > reg_maxline)
3307	/* Must have matched the "\n" in the last line. */
3308	return (char_u *)"";
3309    return ml_get_buf(reg_buf, reg_firstlnum + lnum, FALSE);
3310}
3311
3312static regsave_T behind_pos;
3313
3314#ifdef FEAT_SYN_HL
3315static char_u	*reg_startzp[NSUBEXP];	/* Workspace to mark beginning */
3316static char_u	*reg_endzp[NSUBEXP];	/*   and end of \z(...\) matches */
3317static lpos_T	reg_startzpos[NSUBEXP];	/* idem, beginning pos */
3318static lpos_T	reg_endzpos[NSUBEXP];	/* idem, end pos */
3319#endif
3320
3321/* TRUE if using multi-line regexp. */
3322#define REG_MULTI	(reg_match == NULL)
3323
3324/*
3325 * Match a regexp against a string.
3326 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3327 * Uses curbuf for line count and 'iskeyword'.
3328 *
3329 * Return TRUE if there is a match, FALSE if not.
3330 */
3331    int
3332vim_regexec(rmp, line, col)
3333    regmatch_T	*rmp;
3334    char_u	*line;	/* string to match against */
3335    colnr_T	col;	/* column to start looking for match */
3336{
3337    reg_match = rmp;
3338    reg_mmatch = NULL;
3339    reg_maxline = 0;
3340    reg_line_lbr = FALSE;
3341    reg_win = NULL;
3342    ireg_ic = rmp->rm_ic;
3343#ifdef FEAT_MBYTE
3344    ireg_icombine = FALSE;
3345#endif
3346    ireg_maxcol = 0;
3347    return (vim_regexec_both(line, col, NULL) != 0);
3348}
3349
3350#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
3351	|| defined(FIND_REPLACE_DIALOG) || defined(PROTO)
3352/*
3353 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
3354 */
3355    int
3356vim_regexec_nl(rmp, line, col)
3357    regmatch_T	*rmp;
3358    char_u	*line;	/* string to match against */
3359    colnr_T	col;	/* column to start looking for match */
3360{
3361    reg_match = rmp;
3362    reg_mmatch = NULL;
3363    reg_maxline = 0;
3364    reg_line_lbr = TRUE;
3365    reg_win = NULL;
3366    ireg_ic = rmp->rm_ic;
3367#ifdef FEAT_MBYTE
3368    ireg_icombine = FALSE;
3369#endif
3370    ireg_maxcol = 0;
3371    return (vim_regexec_both(line, col, NULL) != 0);
3372}
3373#endif
3374
3375/*
3376 * Match a regexp against multiple lines.
3377 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3378 * Uses curbuf for line count and 'iskeyword'.
3379 *
3380 * Return zero if there is no match.  Return number of lines contained in the
3381 * match otherwise.
3382 */
3383    long
3384vim_regexec_multi(rmp, win, buf, lnum, col, tm)
3385    regmmatch_T	*rmp;
3386    win_T	*win;		/* window in which to search or NULL */
3387    buf_T	*buf;		/* buffer in which to search */
3388    linenr_T	lnum;		/* nr of line to start looking for match */
3389    colnr_T	col;		/* column to start looking for match */
3390    proftime_T	*tm;		/* timeout limit or NULL */
3391{
3392    long	r;
3393    buf_T	*save_curbuf = curbuf;
3394
3395    reg_match = NULL;
3396    reg_mmatch = rmp;
3397    reg_buf = buf;
3398    reg_win = win;
3399    reg_firstlnum = lnum;
3400    reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
3401    reg_line_lbr = FALSE;
3402    ireg_ic = rmp->rmm_ic;
3403#ifdef FEAT_MBYTE
3404    ireg_icombine = FALSE;
3405#endif
3406    ireg_maxcol = rmp->rmm_maxcol;
3407
3408    /* Need to switch to buffer "buf" to make vim_iswordc() work. */
3409    curbuf = buf;
3410    r = vim_regexec_both(NULL, col, tm);
3411    curbuf = save_curbuf;
3412
3413    return r;
3414}
3415
3416/*
3417 * Match a regexp against a string ("line" points to the string) or multiple
3418 * lines ("line" is NULL, use reg_getline()).
3419 */
3420    static long
3421vim_regexec_both(line, col, tm)
3422    char_u	*line;
3423    colnr_T	col;		/* column to start looking for match */
3424    proftime_T	*tm UNUSED;	/* timeout limit or NULL */
3425{
3426    regprog_T	*prog;
3427    char_u	*s;
3428    long	retval = 0L;
3429
3430    /* Create "regstack" and "backpos" if they are not allocated yet.
3431     * We allocate *_INITIAL amount of bytes first and then set the grow size
3432     * to much bigger value to avoid many malloc calls in case of deep regular
3433     * expressions.  */
3434    if (regstack.ga_data == NULL)
3435    {
3436	/* Use an item size of 1 byte, since we push different things
3437	 * onto the regstack. */
3438	ga_init2(&regstack, 1, REGSTACK_INITIAL);
3439	ga_grow(&regstack, REGSTACK_INITIAL);
3440	regstack.ga_growsize = REGSTACK_INITIAL * 8;
3441    }
3442
3443    if (backpos.ga_data == NULL)
3444    {
3445	ga_init2(&backpos, sizeof(backpos_T), BACKPOS_INITIAL);
3446	ga_grow(&backpos, BACKPOS_INITIAL);
3447	backpos.ga_growsize = BACKPOS_INITIAL * 8;
3448    }
3449
3450    if (REG_MULTI)
3451    {
3452	prog = reg_mmatch->regprog;
3453	line = reg_getline((linenr_T)0);
3454	reg_startpos = reg_mmatch->startpos;
3455	reg_endpos = reg_mmatch->endpos;
3456    }
3457    else
3458    {
3459	prog = reg_match->regprog;
3460	reg_startp = reg_match->startp;
3461	reg_endp = reg_match->endp;
3462    }
3463
3464    /* Be paranoid... */
3465    if (prog == NULL || line == NULL)
3466    {
3467	EMSG(_(e_null));
3468	goto theend;
3469    }
3470
3471    /* Check validity of program. */
3472    if (prog_magic_wrong())
3473	goto theend;
3474
3475    /* If the start column is past the maximum column: no need to try. */
3476    if (ireg_maxcol > 0 && col >= ireg_maxcol)
3477	goto theend;
3478
3479    /* If pattern contains "\c" or "\C": overrule value of ireg_ic */
3480    if (prog->regflags & RF_ICASE)
3481	ireg_ic = TRUE;
3482    else if (prog->regflags & RF_NOICASE)
3483	ireg_ic = FALSE;
3484
3485#ifdef FEAT_MBYTE
3486    /* If pattern contains "\Z" overrule value of ireg_icombine */
3487    if (prog->regflags & RF_ICOMBINE)
3488	ireg_icombine = TRUE;
3489#endif
3490
3491    /* If there is a "must appear" string, look for it. */
3492    if (prog->regmust != NULL)
3493    {
3494	int c;
3495
3496#ifdef FEAT_MBYTE
3497	if (has_mbyte)
3498	    c = (*mb_ptr2char)(prog->regmust);
3499	else
3500#endif
3501	    c = *prog->regmust;
3502	s = line + col;
3503
3504	/*
3505	 * This is used very often, esp. for ":global".  Use three versions of
3506	 * the loop to avoid overhead of conditions.
3507	 */
3508	if (!ireg_ic
3509#ifdef FEAT_MBYTE
3510		&& !has_mbyte
3511#endif
3512		)
3513	    while ((s = vim_strbyte(s, c)) != NULL)
3514	    {
3515		if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3516		    break;		/* Found it. */
3517		++s;
3518	    }
3519#ifdef FEAT_MBYTE
3520	else if (!ireg_ic || (!enc_utf8 && mb_char2len(c) > 1))
3521	    while ((s = vim_strchr(s, c)) != NULL)
3522	    {
3523		if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3524		    break;		/* Found it. */
3525		mb_ptr_adv(s);
3526	    }
3527#endif
3528	else
3529	    while ((s = cstrchr(s, c)) != NULL)
3530	    {
3531		if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3532		    break;		/* Found it. */
3533		mb_ptr_adv(s);
3534	    }
3535	if (s == NULL)		/* Not present. */
3536	    goto theend;
3537    }
3538
3539    regline = line;
3540    reglnum = 0;
3541
3542    /* Simplest case: Anchored match need be tried only once. */
3543    if (prog->reganch)
3544    {
3545	int	c;
3546
3547#ifdef FEAT_MBYTE
3548	if (has_mbyte)
3549	    c = (*mb_ptr2char)(regline + col);
3550	else
3551#endif
3552	    c = regline[col];
3553	if (prog->regstart == NUL
3554		|| prog->regstart == c
3555		|| (ireg_ic && ((
3556#ifdef FEAT_MBYTE
3557			(enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
3558			|| (c < 255 && prog->regstart < 255 &&
3559#endif
3560			    MB_TOLOWER(prog->regstart) == MB_TOLOWER(c)))))
3561	    retval = regtry(prog, col);
3562	else
3563	    retval = 0;
3564    }
3565    else
3566    {
3567#ifdef FEAT_RELTIME
3568	int tm_count = 0;
3569#endif
3570	/* Messy cases:  unanchored match. */
3571	while (!got_int)
3572	{
3573	    if (prog->regstart != NUL)
3574	    {
3575		/* Skip until the char we know it must start with.
3576		 * Used often, do some work to avoid call overhead. */
3577		if (!ireg_ic
3578#ifdef FEAT_MBYTE
3579			    && !has_mbyte
3580#endif
3581			    )
3582		    s = vim_strbyte(regline + col, prog->regstart);
3583		else
3584		    s = cstrchr(regline + col, prog->regstart);
3585		if (s == NULL)
3586		{
3587		    retval = 0;
3588		    break;
3589		}
3590		col = (int)(s - regline);
3591	    }
3592
3593	    /* Check for maximum column to try. */
3594	    if (ireg_maxcol > 0 && col >= ireg_maxcol)
3595	    {
3596		retval = 0;
3597		break;
3598	    }
3599
3600	    retval = regtry(prog, col);
3601	    if (retval > 0)
3602		break;
3603
3604	    /* if not currently on the first line, get it again */
3605	    if (reglnum != 0)
3606	    {
3607		reglnum = 0;
3608		regline = reg_getline((linenr_T)0);
3609	    }
3610	    if (regline[col] == NUL)
3611		break;
3612#ifdef FEAT_MBYTE
3613	    if (has_mbyte)
3614		col += (*mb_ptr2len)(regline + col);
3615	    else
3616#endif
3617		++col;
3618#ifdef FEAT_RELTIME
3619	    /* Check for timeout once in a twenty times to avoid overhead. */
3620	    if (tm != NULL && ++tm_count == 20)
3621	    {
3622		tm_count = 0;
3623		if (profile_passed_limit(tm))
3624		    break;
3625	    }
3626#endif
3627	}
3628    }
3629
3630theend:
3631    /* Free "reg_tofree" when it's a bit big.
3632     * Free regstack and backpos if they are bigger than their initial size. */
3633    if (reg_tofreelen > 400)
3634    {
3635	vim_free(reg_tofree);
3636	reg_tofree = NULL;
3637    }
3638    if (regstack.ga_maxlen > REGSTACK_INITIAL)
3639	ga_clear(&regstack);
3640    if (backpos.ga_maxlen > BACKPOS_INITIAL)
3641	ga_clear(&backpos);
3642
3643    return retval;
3644}
3645
3646#ifdef FEAT_SYN_HL
3647static reg_extmatch_T *make_extmatch __ARGS((void));
3648
3649/*
3650 * Create a new extmatch and mark it as referenced once.
3651 */
3652    static reg_extmatch_T *
3653make_extmatch()
3654{
3655    reg_extmatch_T	*em;
3656
3657    em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T));
3658    if (em != NULL)
3659	em->refcnt = 1;
3660    return em;
3661}
3662
3663/*
3664 * Add a reference to an extmatch.
3665 */
3666    reg_extmatch_T *
3667ref_extmatch(em)
3668    reg_extmatch_T	*em;
3669{
3670    if (em != NULL)
3671	em->refcnt++;
3672    return em;
3673}
3674
3675/*
3676 * Remove a reference to an extmatch.  If there are no references left, free
3677 * the info.
3678 */
3679    void
3680unref_extmatch(em)
3681    reg_extmatch_T	*em;
3682{
3683    int i;
3684
3685    if (em != NULL && --em->refcnt <= 0)
3686    {
3687	for (i = 0; i < NSUBEXP; ++i)
3688	    vim_free(em->matches[i]);
3689	vim_free(em);
3690    }
3691}
3692#endif
3693
3694/*
3695 * regtry - try match of "prog" with at regline["col"].
3696 * Returns 0 for failure, number of lines contained in the match otherwise.
3697 */
3698    static long
3699regtry(prog, col)
3700    regprog_T	*prog;
3701    colnr_T	col;
3702{
3703    reginput = regline + col;
3704    need_clear_subexpr = TRUE;
3705#ifdef FEAT_SYN_HL
3706    /* Clear the external match subpointers if necessary. */
3707    if (prog->reghasz == REX_SET)
3708	need_clear_zsubexpr = TRUE;
3709#endif
3710
3711    if (regmatch(prog->program + 1) == 0)
3712	return 0;
3713
3714    cleanup_subexpr();
3715    if (REG_MULTI)
3716    {
3717	if (reg_startpos[0].lnum < 0)
3718	{
3719	    reg_startpos[0].lnum = 0;
3720	    reg_startpos[0].col = col;
3721	}
3722	if (reg_endpos[0].lnum < 0)
3723	{
3724	    reg_endpos[0].lnum = reglnum;
3725	    reg_endpos[0].col = (int)(reginput - regline);
3726	}
3727	else
3728	    /* Use line number of "\ze". */
3729	    reglnum = reg_endpos[0].lnum;
3730    }
3731    else
3732    {
3733	if (reg_startp[0] == NULL)
3734	    reg_startp[0] = regline + col;
3735	if (reg_endp[0] == NULL)
3736	    reg_endp[0] = reginput;
3737    }
3738#ifdef FEAT_SYN_HL
3739    /* Package any found \z(...\) matches for export. Default is none. */
3740    unref_extmatch(re_extmatch_out);
3741    re_extmatch_out = NULL;
3742
3743    if (prog->reghasz == REX_SET)
3744    {
3745	int		i;
3746
3747	cleanup_zsubexpr();
3748	re_extmatch_out = make_extmatch();
3749	for (i = 0; i < NSUBEXP; i++)
3750	{
3751	    if (REG_MULTI)
3752	    {
3753		/* Only accept single line matches. */
3754		if (reg_startzpos[i].lnum >= 0
3755			&& reg_endzpos[i].lnum == reg_startzpos[i].lnum)
3756		    re_extmatch_out->matches[i] =
3757			vim_strnsave(reg_getline(reg_startzpos[i].lnum)
3758						       + reg_startzpos[i].col,
3759				   reg_endzpos[i].col - reg_startzpos[i].col);
3760	    }
3761	    else
3762	    {
3763		if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
3764		    re_extmatch_out->matches[i] =
3765			    vim_strnsave(reg_startzp[i],
3766					(int)(reg_endzp[i] - reg_startzp[i]));
3767	    }
3768	}
3769    }
3770#endif
3771    return 1 + reglnum;
3772}
3773
3774#ifdef FEAT_MBYTE
3775static int reg_prev_class __ARGS((void));
3776
3777/*
3778 * Get class of previous character.
3779 */
3780    static int
3781reg_prev_class()
3782{
3783    if (reginput > regline)
3784	return mb_get_class(reginput - 1
3785				     - (*mb_head_off)(regline, reginput - 1));
3786    return -1;
3787}
3788
3789#endif
3790#define ADVANCE_REGINPUT() mb_ptr_adv(reginput)
3791
3792/*
3793 * The arguments from BRACE_LIMITS are stored here.  They are actually local
3794 * to regmatch(), but they are here to reduce the amount of stack space used
3795 * (it can be called recursively many times).
3796 */
3797static long	bl_minval;
3798static long	bl_maxval;
3799
3800/*
3801 * regmatch - main matching routine
3802 *
3803 * Conceptually the strategy is simple: Check to see whether the current node
3804 * matches, push an item onto the regstack and loop to see whether the rest
3805 * matches, and then act accordingly.  In practice we make some effort to
3806 * avoid using the regstack, in particular by going through "ordinary" nodes
3807 * (that don't need to know whether the rest of the match failed) by a nested
3808 * loop.
3809 *
3810 * Returns TRUE when there is a match.  Leaves reginput and reglnum just after
3811 * the last matched character.
3812 * Returns FALSE when there is no match.  Leaves reginput and reglnum in an
3813 * undefined state!
3814 */
3815    static int
3816regmatch(scan)
3817    char_u	*scan;		/* Current node. */
3818{
3819  char_u	*next;		/* Next node. */
3820  int		op;
3821  int		c;
3822  regitem_T	*rp;
3823  int		no;
3824  int		status;		/* one of the RA_ values: */
3825#define RA_FAIL		1	/* something failed, abort */
3826#define RA_CONT		2	/* continue in inner loop */
3827#define RA_BREAK	3	/* break inner loop */
3828#define RA_MATCH	4	/* successful match */
3829#define RA_NOMATCH	5	/* didn't match */
3830
3831  /* Make "regstack" and "backpos" empty.  They are allocated and freed in
3832   * vim_regexec_both() to reduce malloc()/free() calls. */
3833  regstack.ga_len = 0;
3834  backpos.ga_len = 0;
3835
3836  /*
3837   * Repeat until "regstack" is empty.
3838   */
3839  for (;;)
3840  {
3841    /* Some patterns my cause a long time to match, even though they are not
3842     * illegal.  E.g., "\([a-z]\+\)\+Q".  Allow breaking them with CTRL-C. */
3843    fast_breakcheck();
3844
3845#ifdef DEBUG
3846    if (scan != NULL && regnarrate)
3847    {
3848	mch_errmsg(regprop(scan));
3849	mch_errmsg("(\n");
3850    }
3851#endif
3852
3853    /*
3854     * Repeat for items that can be matched sequentially, without using the
3855     * regstack.
3856     */
3857    for (;;)
3858    {
3859	if (got_int || scan == NULL)
3860	{
3861	    status = RA_FAIL;
3862	    break;
3863	}
3864	status = RA_CONT;
3865
3866#ifdef DEBUG
3867	if (regnarrate)
3868	{
3869	    mch_errmsg(regprop(scan));
3870	    mch_errmsg("...\n");
3871# ifdef FEAT_SYN_HL
3872	    if (re_extmatch_in != NULL)
3873	    {
3874		int i;
3875
3876		mch_errmsg(_("External submatches:\n"));
3877		for (i = 0; i < NSUBEXP; i++)
3878		{
3879		    mch_errmsg("    \"");
3880		    if (re_extmatch_in->matches[i] != NULL)
3881			mch_errmsg(re_extmatch_in->matches[i]);
3882		    mch_errmsg("\"\n");
3883		}
3884	    }
3885# endif
3886	}
3887#endif
3888	next = regnext(scan);
3889
3890	op = OP(scan);
3891	/* Check for character class with NL added. */
3892	if (!reg_line_lbr && WITH_NL(op) && REG_MULTI
3893				&& *reginput == NUL && reglnum <= reg_maxline)
3894	{
3895	    reg_nextline();
3896	}
3897	else if (reg_line_lbr && WITH_NL(op) && *reginput == '\n')
3898	{
3899	    ADVANCE_REGINPUT();
3900	}
3901	else
3902	{
3903	  if (WITH_NL(op))
3904	      op -= ADD_NL;
3905#ifdef FEAT_MBYTE
3906	  if (has_mbyte)
3907	      c = (*mb_ptr2char)(reginput);
3908	  else
3909#endif
3910	      c = *reginput;
3911	  switch (op)
3912	  {
3913	  case BOL:
3914	    if (reginput != regline)
3915		status = RA_NOMATCH;
3916	    break;
3917
3918	  case EOL:
3919	    if (c != NUL)
3920		status = RA_NOMATCH;
3921	    break;
3922
3923	  case RE_BOF:
3924	    /* We're not at the beginning of the file when below the first
3925	     * line where we started, not at the start of the line or we
3926	     * didn't start at the first line of the buffer. */
3927	    if (reglnum != 0 || reginput != regline
3928					  || (REG_MULTI && reg_firstlnum > 1))
3929		status = RA_NOMATCH;
3930	    break;
3931
3932	  case RE_EOF:
3933	    if (reglnum != reg_maxline || c != NUL)
3934		status = RA_NOMATCH;
3935	    break;
3936
3937	  case CURSOR:
3938	    /* Check if the buffer is in a window and compare the
3939	     * reg_win->w_cursor position to the match position. */
3940	    if (reg_win == NULL
3941		    || (reglnum + reg_firstlnum != reg_win->w_cursor.lnum)
3942		    || ((colnr_T)(reginput - regline) != reg_win->w_cursor.col))
3943		status = RA_NOMATCH;
3944	    break;
3945
3946	  case RE_MARK:
3947	    /* Compare the mark position to the match position.  NOTE: Always
3948	     * uses the current buffer. */
3949	    {
3950		int	mark = OPERAND(scan)[0];
3951		int	cmp = OPERAND(scan)[1];
3952		pos_T	*pos;
3953
3954		pos = getmark(mark, FALSE);
3955		if (pos == NULL		     /* mark doesn't exist */
3956			|| pos->lnum <= 0    /* mark isn't set (in curbuf) */
3957			|| (pos->lnum == reglnum + reg_firstlnum
3958				? (pos->col == (colnr_T)(reginput - regline)
3959				    ? (cmp == '<' || cmp == '>')
3960				    : (pos->col < (colnr_T)(reginput - regline)
3961					? cmp != '>'
3962					: cmp != '<'))
3963				: (pos->lnum < reglnum + reg_firstlnum
3964				    ? cmp != '>'
3965				    : cmp != '<')))
3966		    status = RA_NOMATCH;
3967	    }
3968	    break;
3969
3970	  case RE_VISUAL:
3971#ifdef FEAT_VISUAL
3972	    /* Check if the buffer is the current buffer. and whether the
3973	     * position is inside the Visual area. */
3974	    if (reg_buf != curbuf || VIsual.lnum == 0)
3975		status = RA_NOMATCH;
3976	    else
3977	    {
3978		pos_T	    top, bot;
3979		linenr_T    lnum;
3980		colnr_T	    col;
3981		win_T	    *wp = reg_win == NULL ? curwin : reg_win;
3982		int	    mode;
3983
3984		if (VIsual_active)
3985		{
3986		    if (lt(VIsual, wp->w_cursor))
3987		    {
3988			top = VIsual;
3989			bot = wp->w_cursor;
3990		    }
3991		    else
3992		    {
3993			top = wp->w_cursor;
3994			bot = VIsual;
3995		    }
3996		    mode = VIsual_mode;
3997		}
3998		else
3999		{
4000		    if (lt(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
4001		    {
4002			top = curbuf->b_visual.vi_start;
4003			bot = curbuf->b_visual.vi_end;
4004		    }
4005		    else
4006		    {
4007			top = curbuf->b_visual.vi_end;
4008			bot = curbuf->b_visual.vi_start;
4009		    }
4010		    mode = curbuf->b_visual.vi_mode;
4011		}
4012		lnum = reglnum + reg_firstlnum;
4013		col = (colnr_T)(reginput - regline);
4014		if (lnum < top.lnum || lnum > bot.lnum)
4015		    status = RA_NOMATCH;
4016		else if (mode == 'v')
4017		{
4018		    if ((lnum == top.lnum && col < top.col)
4019			    || (lnum == bot.lnum
4020					 && col >= bot.col + (*p_sel != 'e')))
4021			status = RA_NOMATCH;
4022		}
4023		else if (mode == Ctrl_V)
4024		{
4025		    colnr_T	    start, end;
4026		    colnr_T	    start2, end2;
4027		    colnr_T	    cols;
4028
4029		    getvvcol(wp, &top, &start, NULL, &end);
4030		    getvvcol(wp, &bot, &start2, NULL, &end2);
4031		    if (start2 < start)
4032			start = start2;
4033		    if (end2 > end)
4034			end = end2;
4035		    if (top.col == MAXCOL || bot.col == MAXCOL)
4036			end = MAXCOL;
4037		    cols = win_linetabsize(wp,
4038				      regline, (colnr_T)(reginput - regline));
4039		    if (cols < start || cols > end - (*p_sel == 'e'))
4040			status = RA_NOMATCH;
4041		}
4042	    }
4043#else
4044	    status = RA_NOMATCH;
4045#endif
4046	    break;
4047
4048	  case RE_LNUM:
4049	    if (!REG_MULTI || !re_num_cmp((long_u)(reglnum + reg_firstlnum),
4050									scan))
4051		status = RA_NOMATCH;
4052	    break;
4053
4054	  case RE_COL:
4055	    if (!re_num_cmp((long_u)(reginput - regline) + 1, scan))
4056		status = RA_NOMATCH;
4057	    break;
4058
4059	  case RE_VCOL:
4060	    if (!re_num_cmp((long_u)win_linetabsize(
4061			    reg_win == NULL ? curwin : reg_win,
4062			    regline, (colnr_T)(reginput - regline)) + 1, scan))
4063		status = RA_NOMATCH;
4064	    break;
4065
4066	  case BOW:	/* \<word; reginput points to w */
4067	    if (c == NUL)	/* Can't match at end of line */
4068		status = RA_NOMATCH;
4069#ifdef FEAT_MBYTE
4070	    else if (has_mbyte)
4071	    {
4072		int this_class;
4073
4074		/* Get class of current and previous char (if it exists). */
4075		this_class = mb_get_class(reginput);
4076		if (this_class <= 1)
4077		    status = RA_NOMATCH;  /* not on a word at all */
4078		else if (reg_prev_class() == this_class)
4079		    status = RA_NOMATCH;  /* previous char is in same word */
4080	    }
4081#endif
4082	    else
4083	    {
4084		if (!vim_iswordc(c)
4085			|| (reginput > regline && vim_iswordc(reginput[-1])))
4086		    status = RA_NOMATCH;
4087	    }
4088	    break;
4089
4090	  case EOW:	/* word\>; reginput points after d */
4091	    if (reginput == regline)    /* Can't match at start of line */
4092		status = RA_NOMATCH;
4093#ifdef FEAT_MBYTE
4094	    else if (has_mbyte)
4095	    {
4096		int this_class, prev_class;
4097
4098		/* Get class of current and previous char (if it exists). */
4099		this_class = mb_get_class(reginput);
4100		prev_class = reg_prev_class();
4101		if (this_class == prev_class
4102			|| prev_class == 0 || prev_class == 1)
4103		    status = RA_NOMATCH;
4104	    }
4105#endif
4106	    else
4107	    {
4108		if (!vim_iswordc(reginput[-1])
4109			|| (reginput[0] != NUL && vim_iswordc(c)))
4110		    status = RA_NOMATCH;
4111	    }
4112	    break; /* Matched with EOW */
4113
4114	  case ANY:
4115	    if (c == NUL)
4116		status = RA_NOMATCH;
4117	    else
4118		ADVANCE_REGINPUT();
4119	    break;
4120
4121	  case IDENT:
4122	    if (!vim_isIDc(c))
4123		status = RA_NOMATCH;
4124	    else
4125		ADVANCE_REGINPUT();
4126	    break;
4127
4128	  case SIDENT:
4129	    if (VIM_ISDIGIT(*reginput) || !vim_isIDc(c))
4130		status = RA_NOMATCH;
4131	    else
4132		ADVANCE_REGINPUT();
4133	    break;
4134
4135	  case KWORD:
4136	    if (!vim_iswordp(reginput))
4137		status = RA_NOMATCH;
4138	    else
4139		ADVANCE_REGINPUT();
4140	    break;
4141
4142	  case SKWORD:
4143	    if (VIM_ISDIGIT(*reginput) || !vim_iswordp(reginput))
4144		status = RA_NOMATCH;
4145	    else
4146		ADVANCE_REGINPUT();
4147	    break;
4148
4149	  case FNAME:
4150	    if (!vim_isfilec(c))
4151		status = RA_NOMATCH;
4152	    else
4153		ADVANCE_REGINPUT();
4154	    break;
4155
4156	  case SFNAME:
4157	    if (VIM_ISDIGIT(*reginput) || !vim_isfilec(c))
4158		status = RA_NOMATCH;
4159	    else
4160		ADVANCE_REGINPUT();
4161	    break;
4162
4163	  case PRINT:
4164	    if (ptr2cells(reginput) != 1)
4165		status = RA_NOMATCH;
4166	    else
4167		ADVANCE_REGINPUT();
4168	    break;
4169
4170	  case SPRINT:
4171	    if (VIM_ISDIGIT(*reginput) || ptr2cells(reginput) != 1)
4172		status = RA_NOMATCH;
4173	    else
4174		ADVANCE_REGINPUT();
4175	    break;
4176
4177	  case WHITE:
4178	    if (!vim_iswhite(c))
4179		status = RA_NOMATCH;
4180	    else
4181		ADVANCE_REGINPUT();
4182	    break;
4183
4184	  case NWHITE:
4185	    if (c == NUL || vim_iswhite(c))
4186		status = RA_NOMATCH;
4187	    else
4188		ADVANCE_REGINPUT();
4189	    break;
4190
4191	  case DIGIT:
4192	    if (!ri_digit(c))
4193		status = RA_NOMATCH;
4194	    else
4195		ADVANCE_REGINPUT();
4196	    break;
4197
4198	  case NDIGIT:
4199	    if (c == NUL || ri_digit(c))
4200		status = RA_NOMATCH;
4201	    else
4202		ADVANCE_REGINPUT();
4203	    break;
4204
4205	  case HEX:
4206	    if (!ri_hex(c))
4207		status = RA_NOMATCH;
4208	    else
4209		ADVANCE_REGINPUT();
4210	    break;
4211
4212	  case NHEX:
4213	    if (c == NUL || ri_hex(c))
4214		status = RA_NOMATCH;
4215	    else
4216		ADVANCE_REGINPUT();
4217	    break;
4218
4219	  case OCTAL:
4220	    if (!ri_octal(c))
4221		status = RA_NOMATCH;
4222	    else
4223		ADVANCE_REGINPUT();
4224	    break;
4225
4226	  case NOCTAL:
4227	    if (c == NUL || ri_octal(c))
4228		status = RA_NOMATCH;
4229	    else
4230		ADVANCE_REGINPUT();
4231	    break;
4232
4233	  case WORD:
4234	    if (!ri_word(c))
4235		status = RA_NOMATCH;
4236	    else
4237		ADVANCE_REGINPUT();
4238	    break;
4239
4240	  case NWORD:
4241	    if (c == NUL || ri_word(c))
4242		status = RA_NOMATCH;
4243	    else
4244		ADVANCE_REGINPUT();
4245	    break;
4246
4247	  case HEAD:
4248	    if (!ri_head(c))
4249		status = RA_NOMATCH;
4250	    else
4251		ADVANCE_REGINPUT();
4252	    break;
4253
4254	  case NHEAD:
4255	    if (c == NUL || ri_head(c))
4256		status = RA_NOMATCH;
4257	    else
4258		ADVANCE_REGINPUT();
4259	    break;
4260
4261	  case ALPHA:
4262	    if (!ri_alpha(c))
4263		status = RA_NOMATCH;
4264	    else
4265		ADVANCE_REGINPUT();
4266	    break;
4267
4268	  case NALPHA:
4269	    if (c == NUL || ri_alpha(c))
4270		status = RA_NOMATCH;
4271	    else
4272		ADVANCE_REGINPUT();
4273	    break;
4274
4275	  case LOWER:
4276	    if (!ri_lower(c))
4277		status = RA_NOMATCH;
4278	    else
4279		ADVANCE_REGINPUT();
4280	    break;
4281
4282	  case NLOWER:
4283	    if (c == NUL || ri_lower(c))
4284		status = RA_NOMATCH;
4285	    else
4286		ADVANCE_REGINPUT();
4287	    break;
4288
4289	  case UPPER:
4290	    if (!ri_upper(c))
4291		status = RA_NOMATCH;
4292	    else
4293		ADVANCE_REGINPUT();
4294	    break;
4295
4296	  case NUPPER:
4297	    if (c == NUL || ri_upper(c))
4298		status = RA_NOMATCH;
4299	    else
4300		ADVANCE_REGINPUT();
4301	    break;
4302
4303	  case EXACTLY:
4304	    {
4305		int	len;
4306		char_u	*opnd;
4307
4308		opnd = OPERAND(scan);
4309		/* Inline the first byte, for speed. */
4310		if (*opnd != *reginput
4311			&& (!ireg_ic || (
4312#ifdef FEAT_MBYTE
4313			    !enc_utf8 &&
4314#endif
4315			    MB_TOLOWER(*opnd) != MB_TOLOWER(*reginput))))
4316		    status = RA_NOMATCH;
4317		else if (*opnd == NUL)
4318		{
4319		    /* match empty string always works; happens when "~" is
4320		     * empty. */
4321		}
4322		else if (opnd[1] == NUL
4323#ifdef FEAT_MBYTE
4324			    && !(enc_utf8 && ireg_ic)
4325#endif
4326			)
4327		    ++reginput;		/* matched a single char */
4328		else
4329		{
4330		    len = (int)STRLEN(opnd);
4331		    /* Need to match first byte again for multi-byte. */
4332		    if (cstrncmp(opnd, reginput, &len) != 0)
4333			status = RA_NOMATCH;
4334#ifdef FEAT_MBYTE
4335		    /* Check for following composing character. */
4336		    else if (enc_utf8
4337			       && UTF_COMPOSINGLIKE(reginput, reginput + len))
4338		    {
4339			/* raaron: This code makes a composing character get
4340			 * ignored, which is the correct behavior (sometimes)
4341			 * for voweled Hebrew texts. */
4342			if (!ireg_icombine)
4343			    status = RA_NOMATCH;
4344		    }
4345#endif
4346		    else
4347			reginput += len;
4348		}
4349	    }
4350	    break;
4351
4352	  case ANYOF:
4353	  case ANYBUT:
4354	    if (c == NUL)
4355		status = RA_NOMATCH;
4356	    else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
4357		status = RA_NOMATCH;
4358	    else
4359		ADVANCE_REGINPUT();
4360	    break;
4361
4362#ifdef FEAT_MBYTE
4363	  case MULTIBYTECODE:
4364	    if (has_mbyte)
4365	    {
4366		int	i, len;
4367		char_u	*opnd;
4368		int	opndc = 0, inpc;
4369
4370		opnd = OPERAND(scan);
4371		/* Safety check (just in case 'encoding' was changed since
4372		 * compiling the program). */
4373		if ((len = (*mb_ptr2len)(opnd)) < 2)
4374		{
4375		    status = RA_NOMATCH;
4376		    break;
4377		}
4378		if (enc_utf8)
4379		    opndc = mb_ptr2char(opnd);
4380		if (enc_utf8 && utf_iscomposing(opndc))
4381		{
4382		    /* When only a composing char is given match at any
4383		     * position where that composing char appears. */
4384		    status = RA_NOMATCH;
4385		    for (i = 0; reginput[i] != NUL; i += utf_char2len(inpc))
4386		    {
4387			inpc = mb_ptr2char(reginput + i);
4388			if (!utf_iscomposing(inpc))
4389			{
4390			    if (i > 0)
4391				break;
4392			}
4393			else if (opndc == inpc)
4394			{
4395			    /* Include all following composing chars. */
4396			    len = i + mb_ptr2len(reginput + i);
4397			    status = RA_MATCH;
4398			    break;
4399			}
4400		    }
4401		}
4402		else
4403		    for (i = 0; i < len; ++i)
4404			if (opnd[i] != reginput[i])
4405			{
4406			    status = RA_NOMATCH;
4407			    break;
4408			}
4409		reginput += len;
4410	    }
4411	    else
4412		status = RA_NOMATCH;
4413	    break;
4414#endif
4415
4416	  case NOTHING:
4417	    break;
4418
4419	  case BACK:
4420	    {
4421		int		i;
4422		backpos_T	*bp;
4423
4424		/*
4425		 * When we run into BACK we need to check if we don't keep
4426		 * looping without matching any input.  The second and later
4427		 * times a BACK is encountered it fails if the input is still
4428		 * at the same position as the previous time.
4429		 * The positions are stored in "backpos" and found by the
4430		 * current value of "scan", the position in the RE program.
4431		 */
4432		bp = (backpos_T *)backpos.ga_data;
4433		for (i = 0; i < backpos.ga_len; ++i)
4434		    if (bp[i].bp_scan == scan)
4435			break;
4436		if (i == backpos.ga_len)
4437		{
4438		    /* First time at this BACK, make room to store the pos. */
4439		    if (ga_grow(&backpos, 1) == FAIL)
4440			status = RA_FAIL;
4441		    else
4442		    {
4443			/* get "ga_data" again, it may have changed */
4444			bp = (backpos_T *)backpos.ga_data;
4445			bp[i].bp_scan = scan;
4446			++backpos.ga_len;
4447		    }
4448		}
4449		else if (reg_save_equal(&bp[i].bp_pos))
4450		    /* Still at same position as last time, fail. */
4451		    status = RA_NOMATCH;
4452
4453		if (status != RA_FAIL && status != RA_NOMATCH)
4454		    reg_save(&bp[i].bp_pos, &backpos);
4455	    }
4456	    break;
4457
4458	  case MOPEN + 0:   /* Match start: \zs */
4459	  case MOPEN + 1:   /* \( */
4460	  case MOPEN + 2:
4461	  case MOPEN + 3:
4462	  case MOPEN + 4:
4463	  case MOPEN + 5:
4464	  case MOPEN + 6:
4465	  case MOPEN + 7:
4466	  case MOPEN + 8:
4467	  case MOPEN + 9:
4468	    {
4469		no = op - MOPEN;
4470		cleanup_subexpr();
4471		rp = regstack_push(RS_MOPEN, scan);
4472		if (rp == NULL)
4473		    status = RA_FAIL;
4474		else
4475		{
4476		    rp->rs_no = no;
4477		    save_se(&rp->rs_un.sesave, &reg_startpos[no],
4478							     &reg_startp[no]);
4479		    /* We simply continue and handle the result when done. */
4480		}
4481	    }
4482	    break;
4483
4484	  case NOPEN:	    /* \%( */
4485	  case NCLOSE:	    /* \) after \%( */
4486		if (regstack_push(RS_NOPEN, scan) == NULL)
4487		    status = RA_FAIL;
4488		/* We simply continue and handle the result when done. */
4489		break;
4490
4491#ifdef FEAT_SYN_HL
4492	  case ZOPEN + 1:
4493	  case ZOPEN + 2:
4494	  case ZOPEN + 3:
4495	  case ZOPEN + 4:
4496	  case ZOPEN + 5:
4497	  case ZOPEN + 6:
4498	  case ZOPEN + 7:
4499	  case ZOPEN + 8:
4500	  case ZOPEN + 9:
4501	    {
4502		no = op - ZOPEN;
4503		cleanup_zsubexpr();
4504		rp = regstack_push(RS_ZOPEN, scan);
4505		if (rp == NULL)
4506		    status = RA_FAIL;
4507		else
4508		{
4509		    rp->rs_no = no;
4510		    save_se(&rp->rs_un.sesave, &reg_startzpos[no],
4511							     &reg_startzp[no]);
4512		    /* We simply continue and handle the result when done. */
4513		}
4514	    }
4515	    break;
4516#endif
4517
4518	  case MCLOSE + 0:  /* Match end: \ze */
4519	  case MCLOSE + 1:  /* \) */
4520	  case MCLOSE + 2:
4521	  case MCLOSE + 3:
4522	  case MCLOSE + 4:
4523	  case MCLOSE + 5:
4524	  case MCLOSE + 6:
4525	  case MCLOSE + 7:
4526	  case MCLOSE + 8:
4527	  case MCLOSE + 9:
4528	    {
4529		no = op - MCLOSE;
4530		cleanup_subexpr();
4531		rp = regstack_push(RS_MCLOSE, scan);
4532		if (rp == NULL)
4533		    status = RA_FAIL;
4534		else
4535		{
4536		    rp->rs_no = no;
4537		    save_se(&rp->rs_un.sesave, &reg_endpos[no], &reg_endp[no]);
4538		    /* We simply continue and handle the result when done. */
4539		}
4540	    }
4541	    break;
4542
4543#ifdef FEAT_SYN_HL
4544	  case ZCLOSE + 1:  /* \) after \z( */
4545	  case ZCLOSE + 2:
4546	  case ZCLOSE + 3:
4547	  case ZCLOSE + 4:
4548	  case ZCLOSE + 5:
4549	  case ZCLOSE + 6:
4550	  case ZCLOSE + 7:
4551	  case ZCLOSE + 8:
4552	  case ZCLOSE + 9:
4553	    {
4554		no = op - ZCLOSE;
4555		cleanup_zsubexpr();
4556		rp = regstack_push(RS_ZCLOSE, scan);
4557		if (rp == NULL)
4558		    status = RA_FAIL;
4559		else
4560		{
4561		    rp->rs_no = no;
4562		    save_se(&rp->rs_un.sesave, &reg_endzpos[no],
4563							      &reg_endzp[no]);
4564		    /* We simply continue and handle the result when done. */
4565		}
4566	    }
4567	    break;
4568#endif
4569
4570	  case BACKREF + 1:
4571	  case BACKREF + 2:
4572	  case BACKREF + 3:
4573	  case BACKREF + 4:
4574	  case BACKREF + 5:
4575	  case BACKREF + 6:
4576	  case BACKREF + 7:
4577	  case BACKREF + 8:
4578	  case BACKREF + 9:
4579	    {
4580		int		len;
4581		linenr_T	clnum;
4582		colnr_T		ccol;
4583		char_u		*p;
4584
4585		no = op - BACKREF;
4586		cleanup_subexpr();
4587		if (!REG_MULTI)		/* Single-line regexp */
4588		{
4589		    if (reg_startp[no] == NULL || reg_endp[no] == NULL)
4590		    {
4591			/* Backref was not set: Match an empty string. */
4592			len = 0;
4593		    }
4594		    else
4595		    {
4596			/* Compare current input with back-ref in the same
4597			 * line. */
4598			len = (int)(reg_endp[no] - reg_startp[no]);
4599			if (cstrncmp(reg_startp[no], reginput, &len) != 0)
4600			    status = RA_NOMATCH;
4601		    }
4602		}
4603		else				/* Multi-line regexp */
4604		{
4605		    if (reg_startpos[no].lnum < 0 || reg_endpos[no].lnum < 0)
4606		    {
4607			/* Backref was not set: Match an empty string. */
4608			len = 0;
4609		    }
4610		    else
4611		    {
4612			if (reg_startpos[no].lnum == reglnum
4613				&& reg_endpos[no].lnum == reglnum)
4614			{
4615			    /* Compare back-ref within the current line. */
4616			    len = reg_endpos[no].col - reg_startpos[no].col;
4617			    if (cstrncmp(regline + reg_startpos[no].col,
4618							  reginput, &len) != 0)
4619				status = RA_NOMATCH;
4620			}
4621			else
4622			{
4623			    /* Messy situation: Need to compare between two
4624			     * lines. */
4625			    ccol = reg_startpos[no].col;
4626			    clnum = reg_startpos[no].lnum;
4627			    for (;;)
4628			    {
4629				/* Since getting one line may invalidate
4630				 * the other, need to make copy.  Slow! */
4631				if (regline != reg_tofree)
4632				{
4633				    len = (int)STRLEN(regline);
4634				    if (reg_tofree == NULL
4635						 || len >= (int)reg_tofreelen)
4636				    {
4637					len += 50;	/* get some extra */
4638					vim_free(reg_tofree);
4639					reg_tofree = alloc(len);
4640					if (reg_tofree == NULL)
4641					{
4642					    status = RA_FAIL; /* outof memory!*/
4643					    break;
4644					}
4645					reg_tofreelen = len;
4646				    }
4647				    STRCPY(reg_tofree, regline);
4648				    reginput = reg_tofree
4649						       + (reginput - regline);
4650				    regline = reg_tofree;
4651				}
4652
4653				/* Get the line to compare with. */
4654				p = reg_getline(clnum);
4655				if (clnum == reg_endpos[no].lnum)
4656				    len = reg_endpos[no].col - ccol;
4657				else
4658				    len = (int)STRLEN(p + ccol);
4659
4660				if (cstrncmp(p + ccol, reginput, &len) != 0)
4661				{
4662				    status = RA_NOMATCH;  /* doesn't match */
4663				    break;
4664				}
4665				if (clnum == reg_endpos[no].lnum)
4666				    break;		/* match and at end! */
4667				if (reglnum >= reg_maxline)
4668				{
4669				    status = RA_NOMATCH;  /* text too short */
4670				    break;
4671				}
4672
4673				/* Advance to next line. */
4674				reg_nextline();
4675				++clnum;
4676				ccol = 0;
4677				if (got_int)
4678				{
4679				    status = RA_FAIL;
4680				    break;
4681				}
4682			    }
4683
4684			    /* found a match!  Note that regline may now point
4685			     * to a copy of the line, that should not matter. */
4686			}
4687		    }
4688		}
4689
4690		/* Matched the backref, skip over it. */
4691		reginput += len;
4692	    }
4693	    break;
4694
4695#ifdef FEAT_SYN_HL
4696	  case ZREF + 1:
4697	  case ZREF + 2:
4698	  case ZREF + 3:
4699	  case ZREF + 4:
4700	  case ZREF + 5:
4701	  case ZREF + 6:
4702	  case ZREF + 7:
4703	  case ZREF + 8:
4704	  case ZREF + 9:
4705	    {
4706		int	len;
4707
4708		cleanup_zsubexpr();
4709		no = op - ZREF;
4710		if (re_extmatch_in != NULL
4711			&& re_extmatch_in->matches[no] != NULL)
4712		{
4713		    len = (int)STRLEN(re_extmatch_in->matches[no]);
4714		    if (cstrncmp(re_extmatch_in->matches[no],
4715							  reginput, &len) != 0)
4716			status = RA_NOMATCH;
4717		    else
4718			reginput += len;
4719		}
4720		else
4721		{
4722		    /* Backref was not set: Match an empty string. */
4723		}
4724	    }
4725	    break;
4726#endif
4727
4728	  case BRANCH:
4729	    {
4730		if (OP(next) != BRANCH) /* No choice. */
4731		    next = OPERAND(scan);	/* Avoid recursion. */
4732		else
4733		{
4734		    rp = regstack_push(RS_BRANCH, scan);
4735		    if (rp == NULL)
4736			status = RA_FAIL;
4737		    else
4738			status = RA_BREAK;	/* rest is below */
4739		}
4740	    }
4741	    break;
4742
4743	  case BRACE_LIMITS:
4744	    {
4745		if (OP(next) == BRACE_SIMPLE)
4746		{
4747		    bl_minval = OPERAND_MIN(scan);
4748		    bl_maxval = OPERAND_MAX(scan);
4749		}
4750		else if (OP(next) >= BRACE_COMPLEX
4751			&& OP(next) < BRACE_COMPLEX + 10)
4752		{
4753		    no = OP(next) - BRACE_COMPLEX;
4754		    brace_min[no] = OPERAND_MIN(scan);
4755		    brace_max[no] = OPERAND_MAX(scan);
4756		    brace_count[no] = 0;
4757		}
4758		else
4759		{
4760		    EMSG(_(e_internal));	    /* Shouldn't happen */
4761		    status = RA_FAIL;
4762		}
4763	    }
4764	    break;
4765
4766	  case BRACE_COMPLEX + 0:
4767	  case BRACE_COMPLEX + 1:
4768	  case BRACE_COMPLEX + 2:
4769	  case BRACE_COMPLEX + 3:
4770	  case BRACE_COMPLEX + 4:
4771	  case BRACE_COMPLEX + 5:
4772	  case BRACE_COMPLEX + 6:
4773	  case BRACE_COMPLEX + 7:
4774	  case BRACE_COMPLEX + 8:
4775	  case BRACE_COMPLEX + 9:
4776	    {
4777		no = op - BRACE_COMPLEX;
4778		++brace_count[no];
4779
4780		/* If not matched enough times yet, try one more */
4781		if (brace_count[no] <= (brace_min[no] <= brace_max[no]
4782					     ? brace_min[no] : brace_max[no]))
4783		{
4784		    rp = regstack_push(RS_BRCPLX_MORE, scan);
4785		    if (rp == NULL)
4786			status = RA_FAIL;
4787		    else
4788		    {
4789			rp->rs_no = no;
4790			reg_save(&rp->rs_un.regsave, &backpos);
4791			next = OPERAND(scan);
4792			/* We continue and handle the result when done. */
4793		    }
4794		    break;
4795		}
4796
4797		/* If matched enough times, may try matching some more */
4798		if (brace_min[no] <= brace_max[no])
4799		{
4800		    /* Range is the normal way around, use longest match */
4801		    if (brace_count[no] <= brace_max[no])
4802		    {
4803			rp = regstack_push(RS_BRCPLX_LONG, scan);
4804			if (rp == NULL)
4805			    status = RA_FAIL;
4806			else
4807			{
4808			    rp->rs_no = no;
4809			    reg_save(&rp->rs_un.regsave, &backpos);
4810			    next = OPERAND(scan);
4811			    /* We continue and handle the result when done. */
4812			}
4813		    }
4814		}
4815		else
4816		{
4817		    /* Range is backwards, use shortest match first */
4818		    if (brace_count[no] <= brace_min[no])
4819		    {
4820			rp = regstack_push(RS_BRCPLX_SHORT, scan);
4821			if (rp == NULL)
4822			    status = RA_FAIL;
4823			else
4824			{
4825			    reg_save(&rp->rs_un.regsave, &backpos);
4826			    /* We continue and handle the result when done. */
4827			}
4828		    }
4829		}
4830	    }
4831	    break;
4832
4833	  case BRACE_SIMPLE:
4834	  case STAR:
4835	  case PLUS:
4836	    {
4837		regstar_T	rst;
4838
4839		/*
4840		 * Lookahead to avoid useless match attempts when we know
4841		 * what character comes next.
4842		 */
4843		if (OP(next) == EXACTLY)
4844		{
4845		    rst.nextb = *OPERAND(next);
4846		    if (ireg_ic)
4847		    {
4848			if (MB_ISUPPER(rst.nextb))
4849			    rst.nextb_ic = MB_TOLOWER(rst.nextb);
4850			else
4851			    rst.nextb_ic = MB_TOUPPER(rst.nextb);
4852		    }
4853		    else
4854			rst.nextb_ic = rst.nextb;
4855		}
4856		else
4857		{
4858		    rst.nextb = NUL;
4859		    rst.nextb_ic = NUL;
4860		}
4861		if (op != BRACE_SIMPLE)
4862		{
4863		    rst.minval = (op == STAR) ? 0 : 1;
4864		    rst.maxval = MAX_LIMIT;
4865		}
4866		else
4867		{
4868		    rst.minval = bl_minval;
4869		    rst.maxval = bl_maxval;
4870		}
4871
4872		/*
4873		 * When maxval > minval, try matching as much as possible, up
4874		 * to maxval.  When maxval < minval, try matching at least the
4875		 * minimal number (since the range is backwards, that's also
4876		 * maxval!).
4877		 */
4878		rst.count = regrepeat(OPERAND(scan), rst.maxval);
4879		if (got_int)
4880		{
4881		    status = RA_FAIL;
4882		    break;
4883		}
4884		if (rst.minval <= rst.maxval
4885			  ? rst.count >= rst.minval : rst.count >= rst.maxval)
4886		{
4887		    /* It could match.  Prepare for trying to match what
4888		     * follows.  The code is below.  Parameters are stored in
4889		     * a regstar_T on the regstack. */
4890		    if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
4891		    {
4892			EMSG(_(e_maxmempat));
4893			status = RA_FAIL;
4894		    }
4895		    else if (ga_grow(&regstack, sizeof(regstar_T)) == FAIL)
4896			status = RA_FAIL;
4897		    else
4898		    {
4899			regstack.ga_len += sizeof(regstar_T);
4900			rp = regstack_push(rst.minval <= rst.maxval
4901					? RS_STAR_LONG : RS_STAR_SHORT, scan);
4902			if (rp == NULL)
4903			    status = RA_FAIL;
4904			else
4905			{
4906			    *(((regstar_T *)rp) - 1) = rst;
4907			    status = RA_BREAK;	    /* skip the restore bits */
4908			}
4909		    }
4910		}
4911		else
4912		    status = RA_NOMATCH;
4913
4914	    }
4915	    break;
4916
4917	  case NOMATCH:
4918	  case MATCH:
4919	  case SUBPAT:
4920	    rp = regstack_push(RS_NOMATCH, scan);
4921	    if (rp == NULL)
4922		status = RA_FAIL;
4923	    else
4924	    {
4925		rp->rs_no = op;
4926		reg_save(&rp->rs_un.regsave, &backpos);
4927		next = OPERAND(scan);
4928		/* We continue and handle the result when done. */
4929	    }
4930	    break;
4931
4932	  case BEHIND:
4933	  case NOBEHIND:
4934	    /* Need a bit of room to store extra positions. */
4935	    if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
4936	    {
4937		EMSG(_(e_maxmempat));
4938		status = RA_FAIL;
4939	    }
4940	    else if (ga_grow(&regstack, sizeof(regbehind_T)) == FAIL)
4941		status = RA_FAIL;
4942	    else
4943	    {
4944		regstack.ga_len += sizeof(regbehind_T);
4945		rp = regstack_push(RS_BEHIND1, scan);
4946		if (rp == NULL)
4947		    status = RA_FAIL;
4948		else
4949		{
4950		    /* Need to save the subexpr to be able to restore them
4951		     * when there is a match but we don't use it. */
4952		    save_subexpr(((regbehind_T *)rp) - 1);
4953
4954		    rp->rs_no = op;
4955		    reg_save(&rp->rs_un.regsave, &backpos);
4956		    /* First try if what follows matches.  If it does then we
4957		     * check the behind match by looping. */
4958		}
4959	    }
4960	    break;
4961
4962	  case BHPOS:
4963	    if (REG_MULTI)
4964	    {
4965		if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline)
4966			|| behind_pos.rs_u.pos.lnum != reglnum)
4967		    status = RA_NOMATCH;
4968	    }
4969	    else if (behind_pos.rs_u.ptr != reginput)
4970		status = RA_NOMATCH;
4971	    break;
4972
4973	  case NEWL:
4974	    if ((c != NUL || !REG_MULTI || reglnum > reg_maxline
4975			     || reg_line_lbr) && (c != '\n' || !reg_line_lbr))
4976		status = RA_NOMATCH;
4977	    else if (reg_line_lbr)
4978		ADVANCE_REGINPUT();
4979	    else
4980		reg_nextline();
4981	    break;
4982
4983	  case END:
4984	    status = RA_MATCH;	/* Success! */
4985	    break;
4986
4987	  default:
4988	    EMSG(_(e_re_corr));
4989#ifdef DEBUG
4990	    printf("Illegal op code %d\n", op);
4991#endif
4992	    status = RA_FAIL;
4993	    break;
4994	  }
4995	}
4996
4997	/* If we can't continue sequentially, break the inner loop. */
4998	if (status != RA_CONT)
4999	    break;
5000
5001	/* Continue in inner loop, advance to next item. */
5002	scan = next;
5003
5004    } /* end of inner loop */
5005
5006    /*
5007     * If there is something on the regstack execute the code for the state.
5008     * If the state is popped then loop and use the older state.
5009     */
5010    while (regstack.ga_len > 0 && status != RA_FAIL)
5011    {
5012	rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
5013	switch (rp->rs_state)
5014	{
5015	  case RS_NOPEN:
5016	    /* Result is passed on as-is, simply pop the state. */
5017	    regstack_pop(&scan);
5018	    break;
5019
5020	  case RS_MOPEN:
5021	    /* Pop the state.  Restore pointers when there is no match. */
5022	    if (status == RA_NOMATCH)
5023		restore_se(&rp->rs_un.sesave, &reg_startpos[rp->rs_no],
5024						  &reg_startp[rp->rs_no]);
5025	    regstack_pop(&scan);
5026	    break;
5027
5028#ifdef FEAT_SYN_HL
5029	  case RS_ZOPEN:
5030	    /* Pop the state.  Restore pointers when there is no match. */
5031	    if (status == RA_NOMATCH)
5032		restore_se(&rp->rs_un.sesave, &reg_startzpos[rp->rs_no],
5033						 &reg_startzp[rp->rs_no]);
5034	    regstack_pop(&scan);
5035	    break;
5036#endif
5037
5038	  case RS_MCLOSE:
5039	    /* Pop the state.  Restore pointers when there is no match. */
5040	    if (status == RA_NOMATCH)
5041		restore_se(&rp->rs_un.sesave, &reg_endpos[rp->rs_no],
5042						    &reg_endp[rp->rs_no]);
5043	    regstack_pop(&scan);
5044	    break;
5045
5046#ifdef FEAT_SYN_HL
5047	  case RS_ZCLOSE:
5048	    /* Pop the state.  Restore pointers when there is no match. */
5049	    if (status == RA_NOMATCH)
5050		restore_se(&rp->rs_un.sesave, &reg_endzpos[rp->rs_no],
5051						   &reg_endzp[rp->rs_no]);
5052	    regstack_pop(&scan);
5053	    break;
5054#endif
5055
5056	  case RS_BRANCH:
5057	    if (status == RA_MATCH)
5058		/* this branch matched, use it */
5059		regstack_pop(&scan);
5060	    else
5061	    {
5062		if (status != RA_BREAK)
5063		{
5064		    /* After a non-matching branch: try next one. */
5065		    reg_restore(&rp->rs_un.regsave, &backpos);
5066		    scan = rp->rs_scan;
5067		}
5068		if (scan == NULL || OP(scan) != BRANCH)
5069		{
5070		    /* no more branches, didn't find a match */
5071		    status = RA_NOMATCH;
5072		    regstack_pop(&scan);
5073		}
5074		else
5075		{
5076		    /* Prepare to try a branch. */
5077		    rp->rs_scan = regnext(scan);
5078		    reg_save(&rp->rs_un.regsave, &backpos);
5079		    scan = OPERAND(scan);
5080		}
5081	    }
5082	    break;
5083
5084	  case RS_BRCPLX_MORE:
5085	    /* Pop the state.  Restore pointers when there is no match. */
5086	    if (status == RA_NOMATCH)
5087	    {
5088		reg_restore(&rp->rs_un.regsave, &backpos);
5089		--brace_count[rp->rs_no];	/* decrement match count */
5090	    }
5091	    regstack_pop(&scan);
5092	    break;
5093
5094	  case RS_BRCPLX_LONG:
5095	    /* Pop the state.  Restore pointers when there is no match. */
5096	    if (status == RA_NOMATCH)
5097	    {
5098		/* There was no match, but we did find enough matches. */
5099		reg_restore(&rp->rs_un.regsave, &backpos);
5100		--brace_count[rp->rs_no];
5101		/* continue with the items after "\{}" */
5102		status = RA_CONT;
5103	    }
5104	    regstack_pop(&scan);
5105	    if (status == RA_CONT)
5106		scan = regnext(scan);
5107	    break;
5108
5109	  case RS_BRCPLX_SHORT:
5110	    /* Pop the state.  Restore pointers when there is no match. */
5111	    if (status == RA_NOMATCH)
5112		/* There was no match, try to match one more item. */
5113		reg_restore(&rp->rs_un.regsave, &backpos);
5114	    regstack_pop(&scan);
5115	    if (status == RA_NOMATCH)
5116	    {
5117		scan = OPERAND(scan);
5118		status = RA_CONT;
5119	    }
5120	    break;
5121
5122	  case RS_NOMATCH:
5123	    /* Pop the state.  If the operand matches for NOMATCH or
5124	     * doesn't match for MATCH/SUBPAT, we fail.  Otherwise backup,
5125	     * except for SUBPAT, and continue with the next item. */
5126	    if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH))
5127		status = RA_NOMATCH;
5128	    else
5129	    {
5130		status = RA_CONT;
5131		if (rp->rs_no != SUBPAT)	/* zero-width */
5132		    reg_restore(&rp->rs_un.regsave, &backpos);
5133	    }
5134	    regstack_pop(&scan);
5135	    if (status == RA_CONT)
5136		scan = regnext(scan);
5137	    break;
5138
5139	  case RS_BEHIND1:
5140	    if (status == RA_NOMATCH)
5141	    {
5142		regstack_pop(&scan);
5143		regstack.ga_len -= sizeof(regbehind_T);
5144	    }
5145	    else
5146	    {
5147		/* The stuff after BEHIND/NOBEHIND matches.  Now try if
5148		 * the behind part does (not) match before the current
5149		 * position in the input.  This must be done at every
5150		 * position in the input and checking if the match ends at
5151		 * the current position. */
5152
5153		/* save the position after the found match for next */
5154		reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
5155
5156		/* start looking for a match with operand at the current
5157		 * position.  Go back one character until we find the
5158		 * result, hitting the start of the line or the previous
5159		 * line (for multi-line matching).
5160		 * Set behind_pos to where the match should end, BHPOS
5161		 * will match it.  Save the current value. */
5162		(((regbehind_T *)rp) - 1)->save_behind = behind_pos;
5163		behind_pos = rp->rs_un.regsave;
5164
5165		rp->rs_state = RS_BEHIND2;
5166
5167		reg_restore(&rp->rs_un.regsave, &backpos);
5168		scan = OPERAND(rp->rs_scan);
5169	    }
5170	    break;
5171
5172	  case RS_BEHIND2:
5173	    /*
5174	     * Looping for BEHIND / NOBEHIND match.
5175	     */
5176	    if (status == RA_MATCH && reg_save_equal(&behind_pos))
5177	    {
5178		/* found a match that ends where "next" started */
5179		behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5180		if (rp->rs_no == BEHIND)
5181		    reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5182								    &backpos);
5183		else
5184		{
5185		    /* But we didn't want a match.  Need to restore the
5186		     * subexpr, because what follows matched, so they have
5187		     * been set. */
5188		    status = RA_NOMATCH;
5189		    restore_subexpr(((regbehind_T *)rp) - 1);
5190		}
5191		regstack_pop(&scan);
5192		regstack.ga_len -= sizeof(regbehind_T);
5193	    }
5194	    else
5195	    {
5196		/* No match or a match that doesn't end where we want it: Go
5197		 * back one character.  May go to previous line once. */
5198		no = OK;
5199		if (REG_MULTI)
5200		{
5201		    if (rp->rs_un.regsave.rs_u.pos.col == 0)
5202		    {
5203			if (rp->rs_un.regsave.rs_u.pos.lnum
5204					< behind_pos.rs_u.pos.lnum
5205				|| reg_getline(
5206					--rp->rs_un.regsave.rs_u.pos.lnum)
5207								  == NULL)
5208			    no = FAIL;
5209			else
5210			{
5211			    reg_restore(&rp->rs_un.regsave, &backpos);
5212			    rp->rs_un.regsave.rs_u.pos.col =
5213						 (colnr_T)STRLEN(regline);
5214			}
5215		    }
5216		    else
5217			--rp->rs_un.regsave.rs_u.pos.col;
5218		}
5219		else
5220		{
5221		    if (rp->rs_un.regsave.rs_u.ptr == regline)
5222			no = FAIL;
5223		    else
5224			--rp->rs_un.regsave.rs_u.ptr;
5225		}
5226		if (no == OK)
5227		{
5228		    /* Advanced, prepare for finding match again. */
5229		    reg_restore(&rp->rs_un.regsave, &backpos);
5230		    scan = OPERAND(rp->rs_scan);
5231		    if (status == RA_MATCH)
5232		    {
5233			/* We did match, so subexpr may have been changed,
5234			 * need to restore them for the next try. */
5235			status = RA_NOMATCH;
5236			restore_subexpr(((regbehind_T *)rp) - 1);
5237		    }
5238		}
5239		else
5240		{
5241		    /* Can't advance.  For NOBEHIND that's a match. */
5242		    behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5243		    if (rp->rs_no == NOBEHIND)
5244		    {
5245			reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5246								    &backpos);
5247			status = RA_MATCH;
5248		    }
5249		    else
5250		    {
5251			/* We do want a proper match.  Need to restore the
5252			 * subexpr if we had a match, because they may have
5253			 * been set. */
5254			if (status == RA_MATCH)
5255			{
5256			    status = RA_NOMATCH;
5257			    restore_subexpr(((regbehind_T *)rp) - 1);
5258			}
5259		    }
5260		    regstack_pop(&scan);
5261		    regstack.ga_len -= sizeof(regbehind_T);
5262		}
5263	    }
5264	    break;
5265
5266	  case RS_STAR_LONG:
5267	  case RS_STAR_SHORT:
5268	    {
5269		regstar_T	    *rst = ((regstar_T *)rp) - 1;
5270
5271		if (status == RA_MATCH)
5272		{
5273		    regstack_pop(&scan);
5274		    regstack.ga_len -= sizeof(regstar_T);
5275		    break;
5276		}
5277
5278		/* Tried once already, restore input pointers. */
5279		if (status != RA_BREAK)
5280		    reg_restore(&rp->rs_un.regsave, &backpos);
5281
5282		/* Repeat until we found a position where it could match. */
5283		for (;;)
5284		{
5285		    if (status != RA_BREAK)
5286		    {
5287			/* Tried first position already, advance. */
5288			if (rp->rs_state == RS_STAR_LONG)
5289			{
5290			    /* Trying for longest match, but couldn't or
5291			     * didn't match -- back up one char. */
5292			    if (--rst->count < rst->minval)
5293				break;
5294			    if (reginput == regline)
5295			    {
5296				/* backup to last char of previous line */
5297				--reglnum;
5298				regline = reg_getline(reglnum);
5299				/* Just in case regrepeat() didn't count
5300				 * right. */
5301				if (regline == NULL)
5302				    break;
5303				reginput = regline + STRLEN(regline);
5304				fast_breakcheck();
5305			    }
5306			    else
5307				mb_ptr_back(regline, reginput);
5308			}
5309			else
5310			{
5311			    /* Range is backwards, use shortest match first.
5312			     * Careful: maxval and minval are exchanged!
5313			     * Couldn't or didn't match: try advancing one
5314			     * char. */
5315			    if (rst->count == rst->minval
5316				  || regrepeat(OPERAND(rp->rs_scan), 1L) == 0)
5317				break;
5318			    ++rst->count;
5319			}
5320			if (got_int)
5321			    break;
5322		    }
5323		    else
5324			status = RA_NOMATCH;
5325
5326		    /* If it could match, try it. */
5327		    if (rst->nextb == NUL || *reginput == rst->nextb
5328					     || *reginput == rst->nextb_ic)
5329		    {
5330			reg_save(&rp->rs_un.regsave, &backpos);
5331			scan = regnext(rp->rs_scan);
5332			status = RA_CONT;
5333			break;
5334		    }
5335		}
5336		if (status != RA_CONT)
5337		{
5338		    /* Failed. */
5339		    regstack_pop(&scan);
5340		    regstack.ga_len -= sizeof(regstar_T);
5341		    status = RA_NOMATCH;
5342		}
5343	    }
5344	    break;
5345	}
5346
5347	/* If we want to continue the inner loop or didn't pop a state
5348	 * continue matching loop */
5349	if (status == RA_CONT || rp == (regitem_T *)
5350			     ((char *)regstack.ga_data + regstack.ga_len) - 1)
5351	    break;
5352    }
5353
5354    /* May need to continue with the inner loop, starting at "scan". */
5355    if (status == RA_CONT)
5356	continue;
5357
5358    /*
5359     * If the regstack is empty or something failed we are done.
5360     */
5361    if (regstack.ga_len == 0 || status == RA_FAIL)
5362    {
5363	if (scan == NULL)
5364	{
5365	    /*
5366	     * We get here only if there's trouble -- normally "case END" is
5367	     * the terminating point.
5368	     */
5369	    EMSG(_(e_re_corr));
5370#ifdef DEBUG
5371	    printf("Premature EOL\n");
5372#endif
5373	}
5374	if (status == RA_FAIL)
5375	    got_int = TRUE;
5376	return (status == RA_MATCH);
5377    }
5378
5379  } /* End of loop until the regstack is empty. */
5380
5381  /* NOTREACHED */
5382}
5383
5384/*
5385 * Push an item onto the regstack.
5386 * Returns pointer to new item.  Returns NULL when out of memory.
5387 */
5388    static regitem_T *
5389regstack_push(state, scan)
5390    regstate_T	state;
5391    char_u	*scan;
5392{
5393    regitem_T	*rp;
5394
5395    if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
5396    {
5397	EMSG(_(e_maxmempat));
5398	return NULL;
5399    }
5400    if (ga_grow(&regstack, sizeof(regitem_T)) == FAIL)
5401	return NULL;
5402
5403    rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len);
5404    rp->rs_state = state;
5405    rp->rs_scan = scan;
5406
5407    regstack.ga_len += sizeof(regitem_T);
5408    return rp;
5409}
5410
5411/*
5412 * Pop an item from the regstack.
5413 */
5414    static void
5415regstack_pop(scan)
5416    char_u	**scan;
5417{
5418    regitem_T	*rp;
5419
5420    rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
5421    *scan = rp->rs_scan;
5422
5423    regstack.ga_len -= sizeof(regitem_T);
5424}
5425
5426/*
5427 * regrepeat - repeatedly match something simple, return how many.
5428 * Advances reginput (and reglnum) to just after the matched chars.
5429 */
5430    static int
5431regrepeat(p, maxcount)
5432    char_u	*p;
5433    long	maxcount;   /* maximum number of matches allowed */
5434{
5435    long	count = 0;
5436    char_u	*scan;
5437    char_u	*opnd;
5438    int		mask;
5439    int		testval = 0;
5440
5441    scan = reginput;	    /* Make local copy of reginput for speed. */
5442    opnd = OPERAND(p);
5443    switch (OP(p))
5444    {
5445      case ANY:
5446      case ANY + ADD_NL:
5447	while (count < maxcount)
5448	{
5449	    /* Matching anything means we continue until end-of-line (or
5450	     * end-of-file for ANY + ADD_NL), only limited by maxcount. */
5451	    while (*scan != NUL && count < maxcount)
5452	    {
5453		++count;
5454		mb_ptr_adv(scan);
5455	    }
5456	    if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5457					 || reg_line_lbr || count == maxcount)
5458		break;
5459	    ++count;		/* count the line-break */
5460	    reg_nextline();
5461	    scan = reginput;
5462	    if (got_int)
5463		break;
5464	}
5465	break;
5466
5467      case IDENT:
5468      case IDENT + ADD_NL:
5469	testval = TRUE;
5470	/*FALLTHROUGH*/
5471      case SIDENT:
5472      case SIDENT + ADD_NL:
5473	while (count < maxcount)
5474	{
5475	    if (vim_isIDc(*scan) && (testval || !VIM_ISDIGIT(*scan)))
5476	    {
5477		mb_ptr_adv(scan);
5478	    }
5479	    else if (*scan == NUL)
5480	    {
5481		if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5482							      || reg_line_lbr)
5483		    break;
5484		reg_nextline();
5485		scan = reginput;
5486		if (got_int)
5487		    break;
5488	    }
5489	    else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5490		++scan;
5491	    else
5492		break;
5493	    ++count;
5494	}
5495	break;
5496
5497      case KWORD:
5498      case KWORD + ADD_NL:
5499	testval = TRUE;
5500	/*FALLTHROUGH*/
5501      case SKWORD:
5502      case SKWORD + ADD_NL:
5503	while (count < maxcount)
5504	{
5505	    if (vim_iswordp(scan) && (testval || !VIM_ISDIGIT(*scan)))
5506	    {
5507		mb_ptr_adv(scan);
5508	    }
5509	    else if (*scan == NUL)
5510	    {
5511		if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5512							      || reg_line_lbr)
5513		    break;
5514		reg_nextline();
5515		scan = reginput;
5516		if (got_int)
5517		    break;
5518	    }
5519	    else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5520		++scan;
5521	    else
5522		break;
5523	    ++count;
5524	}
5525	break;
5526
5527      case FNAME:
5528      case FNAME + ADD_NL:
5529	testval = TRUE;
5530	/*FALLTHROUGH*/
5531      case SFNAME:
5532      case SFNAME + ADD_NL:
5533	while (count < maxcount)
5534	{
5535	    if (vim_isfilec(*scan) && (testval || !VIM_ISDIGIT(*scan)))
5536	    {
5537		mb_ptr_adv(scan);
5538	    }
5539	    else if (*scan == NUL)
5540	    {
5541		if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5542							      || reg_line_lbr)
5543		    break;
5544		reg_nextline();
5545		scan = reginput;
5546		if (got_int)
5547		    break;
5548	    }
5549	    else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5550		++scan;
5551	    else
5552		break;
5553	    ++count;
5554	}
5555	break;
5556
5557      case PRINT:
5558      case PRINT + ADD_NL:
5559	testval = TRUE;
5560	/*FALLTHROUGH*/
5561      case SPRINT:
5562      case SPRINT + ADD_NL:
5563	while (count < maxcount)
5564	{
5565	    if (*scan == NUL)
5566	    {
5567		if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5568							      || reg_line_lbr)
5569		    break;
5570		reg_nextline();
5571		scan = reginput;
5572		if (got_int)
5573		    break;
5574	    }
5575	    else if (ptr2cells(scan) == 1 && (testval || !VIM_ISDIGIT(*scan)))
5576	    {
5577		mb_ptr_adv(scan);
5578	    }
5579	    else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5580		++scan;
5581	    else
5582		break;
5583	    ++count;
5584	}
5585	break;
5586
5587      case WHITE:
5588      case WHITE + ADD_NL:
5589	testval = mask = RI_WHITE;
5590do_class:
5591	while (count < maxcount)
5592	{
5593#ifdef FEAT_MBYTE
5594	    int		l;
5595#endif
5596	    if (*scan == NUL)
5597	    {
5598		if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5599							      || reg_line_lbr)
5600		    break;
5601		reg_nextline();
5602		scan = reginput;
5603		if (got_int)
5604		    break;
5605	    }
5606#ifdef FEAT_MBYTE
5607	    else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1)
5608	    {
5609		if (testval != 0)
5610		    break;
5611		scan += l;
5612	    }
5613#endif
5614	    else if ((class_tab[*scan] & mask) == testval)
5615		++scan;
5616	    else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5617		++scan;
5618	    else
5619		break;
5620	    ++count;
5621	}
5622	break;
5623
5624      case NWHITE:
5625      case NWHITE + ADD_NL:
5626	mask = RI_WHITE;
5627	goto do_class;
5628      case DIGIT:
5629      case DIGIT + ADD_NL:
5630	testval = mask = RI_DIGIT;
5631	goto do_class;
5632      case NDIGIT:
5633      case NDIGIT + ADD_NL:
5634	mask = RI_DIGIT;
5635	goto do_class;
5636      case HEX:
5637      case HEX + ADD_NL:
5638	testval = mask = RI_HEX;
5639	goto do_class;
5640      case NHEX:
5641      case NHEX + ADD_NL:
5642	mask = RI_HEX;
5643	goto do_class;
5644      case OCTAL:
5645      case OCTAL + ADD_NL:
5646	testval = mask = RI_OCTAL;
5647	goto do_class;
5648      case NOCTAL:
5649      case NOCTAL + ADD_NL:
5650	mask = RI_OCTAL;
5651	goto do_class;
5652      case WORD:
5653      case WORD + ADD_NL:
5654	testval = mask = RI_WORD;
5655	goto do_class;
5656      case NWORD:
5657      case NWORD + ADD_NL:
5658	mask = RI_WORD;
5659	goto do_class;
5660      case HEAD:
5661      case HEAD + ADD_NL:
5662	testval = mask = RI_HEAD;
5663	goto do_class;
5664      case NHEAD:
5665      case NHEAD + ADD_NL:
5666	mask = RI_HEAD;
5667	goto do_class;
5668      case ALPHA:
5669      case ALPHA + ADD_NL:
5670	testval = mask = RI_ALPHA;
5671	goto do_class;
5672      case NALPHA:
5673      case NALPHA + ADD_NL:
5674	mask = RI_ALPHA;
5675	goto do_class;
5676      case LOWER:
5677      case LOWER + ADD_NL:
5678	testval = mask = RI_LOWER;
5679	goto do_class;
5680      case NLOWER:
5681      case NLOWER + ADD_NL:
5682	mask = RI_LOWER;
5683	goto do_class;
5684      case UPPER:
5685      case UPPER + ADD_NL:
5686	testval = mask = RI_UPPER;
5687	goto do_class;
5688      case NUPPER:
5689      case NUPPER + ADD_NL:
5690	mask = RI_UPPER;
5691	goto do_class;
5692
5693      case EXACTLY:
5694	{
5695	    int	    cu, cl;
5696
5697	    /* This doesn't do a multi-byte character, because a MULTIBYTECODE
5698	     * would have been used for it.  It does handle single-byte
5699	     * characters, such as latin1. */
5700	    if (ireg_ic)
5701	    {
5702		cu = MB_TOUPPER(*opnd);
5703		cl = MB_TOLOWER(*opnd);
5704		while (count < maxcount && (*scan == cu || *scan == cl))
5705		{
5706		    count++;
5707		    scan++;
5708		}
5709	    }
5710	    else
5711	    {
5712		cu = *opnd;
5713		while (count < maxcount && *scan == cu)
5714		{
5715		    count++;
5716		    scan++;
5717		}
5718	    }
5719	    break;
5720	}
5721
5722#ifdef FEAT_MBYTE
5723      case MULTIBYTECODE:
5724	{
5725	    int		i, len, cf = 0;
5726
5727	    /* Safety check (just in case 'encoding' was changed since
5728	     * compiling the program). */
5729	    if ((len = (*mb_ptr2len)(opnd)) > 1)
5730	    {
5731		if (ireg_ic && enc_utf8)
5732		    cf = utf_fold(utf_ptr2char(opnd));
5733		while (count < maxcount)
5734		{
5735		    for (i = 0; i < len; ++i)
5736			if (opnd[i] != scan[i])
5737			    break;
5738		    if (i < len && (!ireg_ic || !enc_utf8
5739					|| utf_fold(utf_ptr2char(scan)) != cf))
5740			break;
5741		    scan += len;
5742		    ++count;
5743		}
5744	    }
5745	}
5746	break;
5747#endif
5748
5749      case ANYOF:
5750      case ANYOF + ADD_NL:
5751	testval = TRUE;
5752	/*FALLTHROUGH*/
5753
5754      case ANYBUT:
5755      case ANYBUT + ADD_NL:
5756	while (count < maxcount)
5757	{
5758#ifdef FEAT_MBYTE
5759	    int len;
5760#endif
5761	    if (*scan == NUL)
5762	    {
5763		if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5764							      || reg_line_lbr)
5765		    break;
5766		reg_nextline();
5767		scan = reginput;
5768		if (got_int)
5769		    break;
5770	    }
5771	    else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5772		++scan;
5773#ifdef FEAT_MBYTE
5774	    else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1)
5775	    {
5776		if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
5777		    break;
5778		scan += len;
5779	    }
5780#endif
5781	    else
5782	    {
5783		if ((cstrchr(opnd, *scan) == NULL) == testval)
5784		    break;
5785		++scan;
5786	    }
5787	    ++count;
5788	}
5789	break;
5790
5791      case NEWL:
5792	while (count < maxcount
5793		&& ((*scan == NUL && reglnum <= reg_maxline && !reg_line_lbr
5794			    && REG_MULTI) || (*scan == '\n' && reg_line_lbr)))
5795	{
5796	    count++;
5797	    if (reg_line_lbr)
5798		ADVANCE_REGINPUT();
5799	    else
5800		reg_nextline();
5801	    scan = reginput;
5802	    if (got_int)
5803		break;
5804	}
5805	break;
5806
5807      default:			/* Oh dear.  Called inappropriately. */
5808	EMSG(_(e_re_corr));
5809#ifdef DEBUG
5810	printf("Called regrepeat with op code %d\n", OP(p));
5811#endif
5812	break;
5813    }
5814
5815    reginput = scan;
5816
5817    return (int)count;
5818}
5819
5820/*
5821 * regnext - dig the "next" pointer out of a node
5822 * Returns NULL when calculating size, when there is no next item and when
5823 * there is an error.
5824 */
5825    static char_u *
5826regnext(p)
5827    char_u  *p;
5828{
5829    int	    offset;
5830
5831    if (p == JUST_CALC_SIZE || reg_toolong)
5832	return NULL;
5833
5834    offset = NEXT(p);
5835    if (offset == 0)
5836	return NULL;
5837
5838    if (OP(p) == BACK)
5839	return p - offset;
5840    else
5841	return p + offset;
5842}
5843
5844/*
5845 * Check the regexp program for its magic number.
5846 * Return TRUE if it's wrong.
5847 */
5848    static int
5849prog_magic_wrong()
5850{
5851    if (UCHARAT(REG_MULTI
5852		? reg_mmatch->regprog->program
5853		: reg_match->regprog->program) != REGMAGIC)
5854    {
5855	EMSG(_(e_re_corr));
5856	return TRUE;
5857    }
5858    return FALSE;
5859}
5860
5861/*
5862 * Cleanup the subexpressions, if this wasn't done yet.
5863 * This construction is used to clear the subexpressions only when they are
5864 * used (to increase speed).
5865 */
5866    static void
5867cleanup_subexpr()
5868{
5869    if (need_clear_subexpr)
5870    {
5871	if (REG_MULTI)
5872	{
5873	    /* Use 0xff to set lnum to -1 */
5874	    vim_memset(reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5875	    vim_memset(reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5876	}
5877	else
5878	{
5879	    vim_memset(reg_startp, 0, sizeof(char_u *) * NSUBEXP);
5880	    vim_memset(reg_endp, 0, sizeof(char_u *) * NSUBEXP);
5881	}
5882	need_clear_subexpr = FALSE;
5883    }
5884}
5885
5886#ifdef FEAT_SYN_HL
5887    static void
5888cleanup_zsubexpr()
5889{
5890    if (need_clear_zsubexpr)
5891    {
5892	if (REG_MULTI)
5893	{
5894	    /* Use 0xff to set lnum to -1 */
5895	    vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5896	    vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5897	}
5898	else
5899	{
5900	    vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
5901	    vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
5902	}
5903	need_clear_zsubexpr = FALSE;
5904    }
5905}
5906#endif
5907
5908/*
5909 * Save the current subexpr to "bp", so that they can be restored
5910 * later by restore_subexpr().
5911 */
5912    static void
5913save_subexpr(bp)
5914    regbehind_T *bp;
5915{
5916    int i;
5917
5918    /* When "need_clear_subexpr" is set we don't need to save the values, only
5919     * remember that this flag needs to be set again when restoring. */
5920    bp->save_need_clear_subexpr = need_clear_subexpr;
5921    if (!need_clear_subexpr)
5922    {
5923	for (i = 0; i < NSUBEXP; ++i)
5924	{
5925	    if (REG_MULTI)
5926	    {
5927		bp->save_start[i].se_u.pos = reg_startpos[i];
5928		bp->save_end[i].se_u.pos = reg_endpos[i];
5929	    }
5930	    else
5931	    {
5932		bp->save_start[i].se_u.ptr = reg_startp[i];
5933		bp->save_end[i].se_u.ptr = reg_endp[i];
5934	    }
5935	}
5936    }
5937}
5938
5939/*
5940 * Restore the subexpr from "bp".
5941 */
5942    static void
5943restore_subexpr(bp)
5944    regbehind_T *bp;
5945{
5946    int i;
5947
5948    /* Only need to restore saved values when they are not to be cleared. */
5949    need_clear_subexpr = bp->save_need_clear_subexpr;
5950    if (!need_clear_subexpr)
5951    {
5952	for (i = 0; i < NSUBEXP; ++i)
5953	{
5954	    if (REG_MULTI)
5955	    {
5956		reg_startpos[i] = bp->save_start[i].se_u.pos;
5957		reg_endpos[i] = bp->save_end[i].se_u.pos;
5958	    }
5959	    else
5960	    {
5961		reg_startp[i] = bp->save_start[i].se_u.ptr;
5962		reg_endp[i] = bp->save_end[i].se_u.ptr;
5963	    }
5964	}
5965    }
5966}
5967
5968/*
5969 * Advance reglnum, regline and reginput to the next line.
5970 */
5971    static void
5972reg_nextline()
5973{
5974    regline = reg_getline(++reglnum);
5975    reginput = regline;
5976    fast_breakcheck();
5977}
5978
5979/*
5980 * Save the input line and position in a regsave_T.
5981 */
5982    static void
5983reg_save(save, gap)
5984    regsave_T	*save;
5985    garray_T	*gap;
5986{
5987    if (REG_MULTI)
5988    {
5989	save->rs_u.pos.col = (colnr_T)(reginput - regline);
5990	save->rs_u.pos.lnum = reglnum;
5991    }
5992    else
5993	save->rs_u.ptr = reginput;
5994    save->rs_len = gap->ga_len;
5995}
5996
5997/*
5998 * Restore the input line and position from a regsave_T.
5999 */
6000    static void
6001reg_restore(save, gap)
6002    regsave_T	*save;
6003    garray_T	*gap;
6004{
6005    if (REG_MULTI)
6006    {
6007	if (reglnum != save->rs_u.pos.lnum)
6008	{
6009	    /* only call reg_getline() when the line number changed to save
6010	     * a bit of time */
6011	    reglnum = save->rs_u.pos.lnum;
6012	    regline = reg_getline(reglnum);
6013	}
6014	reginput = regline + save->rs_u.pos.col;
6015    }
6016    else
6017	reginput = save->rs_u.ptr;
6018    gap->ga_len = save->rs_len;
6019}
6020
6021/*
6022 * Return TRUE if current position is equal to saved position.
6023 */
6024    static int
6025reg_save_equal(save)
6026    regsave_T	*save;
6027{
6028    if (REG_MULTI)
6029	return reglnum == save->rs_u.pos.lnum
6030				  && reginput == regline + save->rs_u.pos.col;
6031    return reginput == save->rs_u.ptr;
6032}
6033
6034/*
6035 * Tentatively set the sub-expression start to the current position (after
6036 * calling regmatch() they will have changed).  Need to save the existing
6037 * values for when there is no match.
6038 * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
6039 * depending on REG_MULTI.
6040 */
6041    static void
6042save_se_multi(savep, posp)
6043    save_se_T	*savep;
6044    lpos_T	*posp;
6045{
6046    savep->se_u.pos = *posp;
6047    posp->lnum = reglnum;
6048    posp->col = (colnr_T)(reginput - regline);
6049}
6050
6051    static void
6052save_se_one(savep, pp)
6053    save_se_T	*savep;
6054    char_u	**pp;
6055{
6056    savep->se_u.ptr = *pp;
6057    *pp = reginput;
6058}
6059
6060/*
6061 * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
6062 */
6063    static int
6064re_num_cmp(val, scan)
6065    long_u	val;
6066    char_u	*scan;
6067{
6068    long_u  n = OPERAND_MIN(scan);
6069
6070    if (OPERAND_CMP(scan) == '>')
6071	return val > n;
6072    if (OPERAND_CMP(scan) == '<')
6073	return val < n;
6074    return val == n;
6075}
6076
6077
6078#ifdef DEBUG
6079
6080/*
6081 * regdump - dump a regexp onto stdout in vaguely comprehensible form
6082 */
6083    static void
6084regdump(pattern, r)
6085    char_u	*pattern;
6086    regprog_T	*r;
6087{
6088    char_u  *s;
6089    int	    op = EXACTLY;	/* Arbitrary non-END op. */
6090    char_u  *next;
6091    char_u  *end = NULL;
6092
6093    printf("\r\nregcomp(%s):\r\n", pattern);
6094
6095    s = r->program + 1;
6096    /*
6097     * Loop until we find the END that isn't before a referred next (an END
6098     * can also appear in a NOMATCH operand).
6099     */
6100    while (op != END || s <= end)
6101    {
6102	op = OP(s);
6103	printf("%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
6104	next = regnext(s);
6105	if (next == NULL)	/* Next ptr. */
6106	    printf("(0)");
6107	else
6108	    printf("(%d)", (int)((s - r->program) + (next - s)));
6109	if (end < next)
6110	    end = next;
6111	if (op == BRACE_LIMITS)
6112	{
6113	    /* Two short ints */
6114	    printf(" minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
6115	    s += 8;
6116	}
6117	s += 3;
6118	if (op == ANYOF || op == ANYOF + ADD_NL
6119		|| op == ANYBUT || op == ANYBUT + ADD_NL
6120		|| op == EXACTLY)
6121	{
6122	    /* Literal string, where present. */
6123	    while (*s != NUL)
6124		printf("%c", *s++);
6125	    s++;
6126	}
6127	printf("\r\n");
6128    }
6129
6130    /* Header fields of interest. */
6131    if (r->regstart != NUL)
6132	printf("start `%s' 0x%x; ", r->regstart < 256
6133		? (char *)transchar(r->regstart)
6134		: "multibyte", r->regstart);
6135    if (r->reganch)
6136	printf("anchored; ");
6137    if (r->regmust != NULL)
6138	printf("must have \"%s\"", r->regmust);
6139    printf("\r\n");
6140}
6141
6142/*
6143 * regprop - printable representation of opcode
6144 */
6145    static char_u *
6146regprop(op)
6147    char_u	   *op;
6148{
6149    char_u	    *p;
6150    static char_u   buf[50];
6151
6152    (void) strcpy(buf, ":");
6153
6154    switch (OP(op))
6155    {
6156      case BOL:
6157	p = "BOL";
6158	break;
6159      case EOL:
6160	p = "EOL";
6161	break;
6162      case RE_BOF:
6163	p = "BOF";
6164	break;
6165      case RE_EOF:
6166	p = "EOF";
6167	break;
6168      case CURSOR:
6169	p = "CURSOR";
6170	break;
6171      case RE_VISUAL:
6172	p = "RE_VISUAL";
6173	break;
6174      case RE_LNUM:
6175	p = "RE_LNUM";
6176	break;
6177      case RE_MARK:
6178	p = "RE_MARK";
6179	break;
6180      case RE_COL:
6181	p = "RE_COL";
6182	break;
6183      case RE_VCOL:
6184	p = "RE_VCOL";
6185	break;
6186      case BOW:
6187	p = "BOW";
6188	break;
6189      case EOW:
6190	p = "EOW";
6191	break;
6192      case ANY:
6193	p = "ANY";
6194	break;
6195      case ANY + ADD_NL:
6196	p = "ANY+NL";
6197	break;
6198      case ANYOF:
6199	p = "ANYOF";
6200	break;
6201      case ANYOF + ADD_NL:
6202	p = "ANYOF+NL";
6203	break;
6204      case ANYBUT:
6205	p = "ANYBUT";
6206	break;
6207      case ANYBUT + ADD_NL:
6208	p = "ANYBUT+NL";
6209	break;
6210      case IDENT:
6211	p = "IDENT";
6212	break;
6213      case IDENT + ADD_NL:
6214	p = "IDENT+NL";
6215	break;
6216      case SIDENT:
6217	p = "SIDENT";
6218	break;
6219      case SIDENT + ADD_NL:
6220	p = "SIDENT+NL";
6221	break;
6222      case KWORD:
6223	p = "KWORD";
6224	break;
6225      case KWORD + ADD_NL:
6226	p = "KWORD+NL";
6227	break;
6228      case SKWORD:
6229	p = "SKWORD";
6230	break;
6231      case SKWORD + ADD_NL:
6232	p = "SKWORD+NL";
6233	break;
6234      case FNAME:
6235	p = "FNAME";
6236	break;
6237      case FNAME + ADD_NL:
6238	p = "FNAME+NL";
6239	break;
6240      case SFNAME:
6241	p = "SFNAME";
6242	break;
6243      case SFNAME + ADD_NL:
6244	p = "SFNAME+NL";
6245	break;
6246      case PRINT:
6247	p = "PRINT";
6248	break;
6249      case PRINT + ADD_NL:
6250	p = "PRINT+NL";
6251	break;
6252      case SPRINT:
6253	p = "SPRINT";
6254	break;
6255      case SPRINT + ADD_NL:
6256	p = "SPRINT+NL";
6257	break;
6258      case WHITE:
6259	p = "WHITE";
6260	break;
6261      case WHITE + ADD_NL:
6262	p = "WHITE+NL";
6263	break;
6264      case NWHITE:
6265	p = "NWHITE";
6266	break;
6267      case NWHITE + ADD_NL:
6268	p = "NWHITE+NL";
6269	break;
6270      case DIGIT:
6271	p = "DIGIT";
6272	break;
6273      case DIGIT + ADD_NL:
6274	p = "DIGIT+NL";
6275	break;
6276      case NDIGIT:
6277	p = "NDIGIT";
6278	break;
6279      case NDIGIT + ADD_NL:
6280	p = "NDIGIT+NL";
6281	break;
6282      case HEX:
6283	p = "HEX";
6284	break;
6285      case HEX + ADD_NL:
6286	p = "HEX+NL";
6287	break;
6288      case NHEX:
6289	p = "NHEX";
6290	break;
6291      case NHEX + ADD_NL:
6292	p = "NHEX+NL";
6293	break;
6294      case OCTAL:
6295	p = "OCTAL";
6296	break;
6297      case OCTAL + ADD_NL:
6298	p = "OCTAL+NL";
6299	break;
6300      case NOCTAL:
6301	p = "NOCTAL";
6302	break;
6303      case NOCTAL + ADD_NL:
6304	p = "NOCTAL+NL";
6305	break;
6306      case WORD:
6307	p = "WORD";
6308	break;
6309      case WORD + ADD_NL:
6310	p = "WORD+NL";
6311	break;
6312      case NWORD:
6313	p = "NWORD";
6314	break;
6315      case NWORD + ADD_NL:
6316	p = "NWORD+NL";
6317	break;
6318      case HEAD:
6319	p = "HEAD";
6320	break;
6321      case HEAD + ADD_NL:
6322	p = "HEAD+NL";
6323	break;
6324      case NHEAD:
6325	p = "NHEAD";
6326	break;
6327      case NHEAD + ADD_NL:
6328	p = "NHEAD+NL";
6329	break;
6330      case ALPHA:
6331	p = "ALPHA";
6332	break;
6333      case ALPHA + ADD_NL:
6334	p = "ALPHA+NL";
6335	break;
6336      case NALPHA:
6337	p = "NALPHA";
6338	break;
6339      case NALPHA + ADD_NL:
6340	p = "NALPHA+NL";
6341	break;
6342      case LOWER:
6343	p = "LOWER";
6344	break;
6345      case LOWER + ADD_NL:
6346	p = "LOWER+NL";
6347	break;
6348      case NLOWER:
6349	p = "NLOWER";
6350	break;
6351      case NLOWER + ADD_NL:
6352	p = "NLOWER+NL";
6353	break;
6354      case UPPER:
6355	p = "UPPER";
6356	break;
6357      case UPPER + ADD_NL:
6358	p = "UPPER+NL";
6359	break;
6360      case NUPPER:
6361	p = "NUPPER";
6362	break;
6363      case NUPPER + ADD_NL:
6364	p = "NUPPER+NL";
6365	break;
6366      case BRANCH:
6367	p = "BRANCH";
6368	break;
6369      case EXACTLY:
6370	p = "EXACTLY";
6371	break;
6372      case NOTHING:
6373	p = "NOTHING";
6374	break;
6375      case BACK:
6376	p = "BACK";
6377	break;
6378      case END:
6379	p = "END";
6380	break;
6381      case MOPEN + 0:
6382	p = "MATCH START";
6383	break;
6384      case MOPEN + 1:
6385      case MOPEN + 2:
6386      case MOPEN + 3:
6387      case MOPEN + 4:
6388      case MOPEN + 5:
6389      case MOPEN + 6:
6390      case MOPEN + 7:
6391      case MOPEN + 8:
6392      case MOPEN + 9:
6393	sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
6394	p = NULL;
6395	break;
6396      case MCLOSE + 0:
6397	p = "MATCH END";
6398	break;
6399      case MCLOSE + 1:
6400      case MCLOSE + 2:
6401      case MCLOSE + 3:
6402      case MCLOSE + 4:
6403      case MCLOSE + 5:
6404      case MCLOSE + 6:
6405      case MCLOSE + 7:
6406      case MCLOSE + 8:
6407      case MCLOSE + 9:
6408	sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
6409	p = NULL;
6410	break;
6411      case BACKREF + 1:
6412      case BACKREF + 2:
6413      case BACKREF + 3:
6414      case BACKREF + 4:
6415      case BACKREF + 5:
6416      case BACKREF + 6:
6417      case BACKREF + 7:
6418      case BACKREF + 8:
6419      case BACKREF + 9:
6420	sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
6421	p = NULL;
6422	break;
6423      case NOPEN:
6424	p = "NOPEN";
6425	break;
6426      case NCLOSE:
6427	p = "NCLOSE";
6428	break;
6429#ifdef FEAT_SYN_HL
6430      case ZOPEN + 1:
6431      case ZOPEN + 2:
6432      case ZOPEN + 3:
6433      case ZOPEN + 4:
6434      case ZOPEN + 5:
6435      case ZOPEN + 6:
6436      case ZOPEN + 7:
6437      case ZOPEN + 8:
6438      case ZOPEN + 9:
6439	sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
6440	p = NULL;
6441	break;
6442      case ZCLOSE + 1:
6443      case ZCLOSE + 2:
6444      case ZCLOSE + 3:
6445      case ZCLOSE + 4:
6446      case ZCLOSE + 5:
6447      case ZCLOSE + 6:
6448      case ZCLOSE + 7:
6449      case ZCLOSE + 8:
6450      case ZCLOSE + 9:
6451	sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
6452	p = NULL;
6453	break;
6454      case ZREF + 1:
6455      case ZREF + 2:
6456      case ZREF + 3:
6457      case ZREF + 4:
6458      case ZREF + 5:
6459      case ZREF + 6:
6460      case ZREF + 7:
6461      case ZREF + 8:
6462      case ZREF + 9:
6463	sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
6464	p = NULL;
6465	break;
6466#endif
6467      case STAR:
6468	p = "STAR";
6469	break;
6470      case PLUS:
6471	p = "PLUS";
6472	break;
6473      case NOMATCH:
6474	p = "NOMATCH";
6475	break;
6476      case MATCH:
6477	p = "MATCH";
6478	break;
6479      case BEHIND:
6480	p = "BEHIND";
6481	break;
6482      case NOBEHIND:
6483	p = "NOBEHIND";
6484	break;
6485      case SUBPAT:
6486	p = "SUBPAT";
6487	break;
6488      case BRACE_LIMITS:
6489	p = "BRACE_LIMITS";
6490	break;
6491      case BRACE_SIMPLE:
6492	p = "BRACE_SIMPLE";
6493	break;
6494      case BRACE_COMPLEX + 0:
6495      case BRACE_COMPLEX + 1:
6496      case BRACE_COMPLEX + 2:
6497      case BRACE_COMPLEX + 3:
6498      case BRACE_COMPLEX + 4:
6499      case BRACE_COMPLEX + 5:
6500      case BRACE_COMPLEX + 6:
6501      case BRACE_COMPLEX + 7:
6502      case BRACE_COMPLEX + 8:
6503      case BRACE_COMPLEX + 9:
6504	sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
6505	p = NULL;
6506	break;
6507#ifdef FEAT_MBYTE
6508      case MULTIBYTECODE:
6509	p = "MULTIBYTECODE";
6510	break;
6511#endif
6512      case NEWL:
6513	p = "NEWL";
6514	break;
6515      default:
6516	sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
6517	p = NULL;
6518	break;
6519    }
6520    if (p != NULL)
6521	(void) strcat(buf, p);
6522    return buf;
6523}
6524#endif
6525
6526#ifdef FEAT_MBYTE
6527static void mb_decompose __ARGS((int c, int *c1, int *c2, int *c3));
6528
6529typedef struct
6530{
6531    int a, b, c;
6532} decomp_T;
6533
6534
6535/* 0xfb20 - 0xfb4f */
6536static decomp_T decomp_table[0xfb4f-0xfb20+1] =
6537{
6538    {0x5e2,0,0},		/* 0xfb20	alt ayin */
6539    {0x5d0,0,0},		/* 0xfb21	alt alef */
6540    {0x5d3,0,0},		/* 0xfb22	alt dalet */
6541    {0x5d4,0,0},		/* 0xfb23	alt he */
6542    {0x5db,0,0},		/* 0xfb24	alt kaf */
6543    {0x5dc,0,0},		/* 0xfb25	alt lamed */
6544    {0x5dd,0,0},		/* 0xfb26	alt mem-sofit */
6545    {0x5e8,0,0},		/* 0xfb27	alt resh */
6546    {0x5ea,0,0},		/* 0xfb28	alt tav */
6547    {'+', 0, 0},		/* 0xfb29	alt plus */
6548    {0x5e9, 0x5c1, 0},		/* 0xfb2a	shin+shin-dot */
6549    {0x5e9, 0x5c2, 0},		/* 0xfb2b	shin+sin-dot */
6550    {0x5e9, 0x5c1, 0x5bc},	/* 0xfb2c	shin+shin-dot+dagesh */
6551    {0x5e9, 0x5c2, 0x5bc},	/* 0xfb2d	shin+sin-dot+dagesh */
6552    {0x5d0, 0x5b7, 0},		/* 0xfb2e	alef+patah */
6553    {0x5d0, 0x5b8, 0},		/* 0xfb2f	alef+qamats */
6554    {0x5d0, 0x5b4, 0},		/* 0xfb30	alef+hiriq */
6555    {0x5d1, 0x5bc, 0},		/* 0xfb31	bet+dagesh */
6556    {0x5d2, 0x5bc, 0},		/* 0xfb32	gimel+dagesh */
6557    {0x5d3, 0x5bc, 0},		/* 0xfb33	dalet+dagesh */
6558    {0x5d4, 0x5bc, 0},		/* 0xfb34	he+dagesh */
6559    {0x5d5, 0x5bc, 0},		/* 0xfb35	vav+dagesh */
6560    {0x5d6, 0x5bc, 0},		/* 0xfb36	zayin+dagesh */
6561    {0xfb37, 0, 0},		/* 0xfb37 -- UNUSED */
6562    {0x5d8, 0x5bc, 0},		/* 0xfb38	tet+dagesh */
6563    {0x5d9, 0x5bc, 0},		/* 0xfb39	yud+dagesh */
6564    {0x5da, 0x5bc, 0},		/* 0xfb3a	kaf sofit+dagesh */
6565    {0x5db, 0x5bc, 0},		/* 0xfb3b	kaf+dagesh */
6566    {0x5dc, 0x5bc, 0},		/* 0xfb3c	lamed+dagesh */
6567    {0xfb3d, 0, 0},		/* 0xfb3d -- UNUSED */
6568    {0x5de, 0x5bc, 0},		/* 0xfb3e	mem+dagesh */
6569    {0xfb3f, 0, 0},		/* 0xfb3f -- UNUSED */
6570    {0x5e0, 0x5bc, 0},		/* 0xfb40	nun+dagesh */
6571    {0x5e1, 0x5bc, 0},		/* 0xfb41	samech+dagesh */
6572    {0xfb42, 0, 0},		/* 0xfb42 -- UNUSED */
6573    {0x5e3, 0x5bc, 0},		/* 0xfb43	pe sofit+dagesh */
6574    {0x5e4, 0x5bc,0},		/* 0xfb44	pe+dagesh */
6575    {0xfb45, 0, 0},		/* 0xfb45 -- UNUSED */
6576    {0x5e6, 0x5bc, 0},		/* 0xfb46	tsadi+dagesh */
6577    {0x5e7, 0x5bc, 0},		/* 0xfb47	qof+dagesh */
6578    {0x5e8, 0x5bc, 0},		/* 0xfb48	resh+dagesh */
6579    {0x5e9, 0x5bc, 0},		/* 0xfb49	shin+dagesh */
6580    {0x5ea, 0x5bc, 0},		/* 0xfb4a	tav+dagesh */
6581    {0x5d5, 0x5b9, 0},		/* 0xfb4b	vav+holam */
6582    {0x5d1, 0x5bf, 0},		/* 0xfb4c	bet+rafe */
6583    {0x5db, 0x5bf, 0},		/* 0xfb4d	kaf+rafe */
6584    {0x5e4, 0x5bf, 0},		/* 0xfb4e	pe+rafe */
6585    {0x5d0, 0x5dc, 0}		/* 0xfb4f	alef-lamed */
6586};
6587
6588    static void
6589mb_decompose(c, c1, c2, c3)
6590    int c, *c1, *c2, *c3;
6591{
6592    decomp_T d;
6593
6594    if (c >= 0x4b20 && c <= 0xfb4f)
6595    {
6596	d = decomp_table[c - 0xfb20];
6597	*c1 = d.a;
6598	*c2 = d.b;
6599	*c3 = d.c;
6600    }
6601    else
6602    {
6603	*c1 = c;
6604	*c2 = *c3 = 0;
6605    }
6606}
6607#endif
6608
6609/*
6610 * Compare two strings, ignore case if ireg_ic set.
6611 * Return 0 if strings match, non-zero otherwise.
6612 * Correct the length "*n" when composing characters are ignored.
6613 */
6614    static int
6615cstrncmp(s1, s2, n)
6616    char_u	*s1, *s2;
6617    int		*n;
6618{
6619    int		result;
6620
6621    if (!ireg_ic)
6622	result = STRNCMP(s1, s2, *n);
6623    else
6624	result = MB_STRNICMP(s1, s2, *n);
6625
6626#ifdef FEAT_MBYTE
6627    /* if it failed and it's utf8 and we want to combineignore: */
6628    if (result != 0 && enc_utf8 && ireg_icombine)
6629    {
6630	char_u	*str1, *str2;
6631	int	c1, c2, c11, c12;
6632	int	junk;
6633
6634	/* we have to handle the strcmp ourselves, since it is necessary to
6635	 * deal with the composing characters by ignoring them: */
6636	str1 = s1;
6637	str2 = s2;
6638	c1 = c2 = 0;
6639	while ((int)(str1 - s1) < *n)
6640	{
6641	    c1 = mb_ptr2char_adv(&str1);
6642	    c2 = mb_ptr2char_adv(&str2);
6643
6644	    /* decompose the character if necessary, into 'base' characters
6645	     * because I don't care about Arabic, I will hard-code the Hebrew
6646	     * which I *do* care about!  So sue me... */
6647	    if (c1 != c2 && (!ireg_ic || utf_fold(c1) != utf_fold(c2)))
6648	    {
6649		/* decomposition necessary? */
6650		mb_decompose(c1, &c11, &junk, &junk);
6651		mb_decompose(c2, &c12, &junk, &junk);
6652		c1 = c11;
6653		c2 = c12;
6654		if (c11 != c12 && (!ireg_ic || utf_fold(c11) != utf_fold(c12)))
6655		    break;
6656	    }
6657	}
6658	result = c2 - c1;
6659	if (result == 0)
6660	    *n = (int)(str2 - s2);
6661    }
6662#endif
6663
6664    return result;
6665}
6666
6667/*
6668 * cstrchr: This function is used a lot for simple searches, keep it fast!
6669 */
6670    static char_u *
6671cstrchr(s, c)
6672    char_u	*s;
6673    int		c;
6674{
6675    char_u	*p;
6676    int		cc;
6677
6678    if (!ireg_ic
6679#ifdef FEAT_MBYTE
6680	    || (!enc_utf8 && mb_char2len(c) > 1)
6681#endif
6682	    )
6683	return vim_strchr(s, c);
6684
6685    /* tolower() and toupper() can be slow, comparing twice should be a lot
6686     * faster (esp. when using MS Visual C++!).
6687     * For UTF-8 need to use folded case. */
6688#ifdef FEAT_MBYTE
6689    if (enc_utf8 && c > 0x80)
6690	cc = utf_fold(c);
6691    else
6692#endif
6693	 if (MB_ISUPPER(c))
6694	cc = MB_TOLOWER(c);
6695    else if (MB_ISLOWER(c))
6696	cc = MB_TOUPPER(c);
6697    else
6698	return vim_strchr(s, c);
6699
6700#ifdef FEAT_MBYTE
6701    if (has_mbyte)
6702    {
6703	for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
6704	{
6705	    if (enc_utf8 && c > 0x80)
6706	    {
6707		if (utf_fold(utf_ptr2char(p)) == cc)
6708		    return p;
6709	    }
6710	    else if (*p == c || *p == cc)
6711		return p;
6712	}
6713    }
6714    else
6715#endif
6716	/* Faster version for when there are no multi-byte characters. */
6717	for (p = s; *p != NUL; ++p)
6718	    if (*p == c || *p == cc)
6719		return p;
6720
6721    return NULL;
6722}
6723
6724/***************************************************************
6725 *		      regsub stuff			       *
6726 ***************************************************************/
6727
6728/* This stuff below really confuses cc on an SGI -- webb */
6729#ifdef __sgi
6730# undef __ARGS
6731# define __ARGS(x)  ()
6732#endif
6733
6734/*
6735 * We should define ftpr as a pointer to a function returning a pointer to
6736 * a function returning a pointer to a function ...
6737 * This is impossible, so we declare a pointer to a function returning a
6738 * pointer to a function returning void. This should work for all compilers.
6739 */
6740typedef void (*(*fptr_T) __ARGS((int *, int)))();
6741
6742static fptr_T do_upper __ARGS((int *, int));
6743static fptr_T do_Upper __ARGS((int *, int));
6744static fptr_T do_lower __ARGS((int *, int));
6745static fptr_T do_Lower __ARGS((int *, int));
6746
6747static int vim_regsub_both __ARGS((char_u *source, char_u *dest, int copy, int magic, int backslash));
6748
6749    static fptr_T
6750do_upper(d, c)
6751    int		*d;
6752    int		c;
6753{
6754    *d = MB_TOUPPER(c);
6755
6756    return (fptr_T)NULL;
6757}
6758
6759    static fptr_T
6760do_Upper(d, c)
6761    int		*d;
6762    int		c;
6763{
6764    *d = MB_TOUPPER(c);
6765
6766    return (fptr_T)do_Upper;
6767}
6768
6769    static fptr_T
6770do_lower(d, c)
6771    int		*d;
6772    int		c;
6773{
6774    *d = MB_TOLOWER(c);
6775
6776    return (fptr_T)NULL;
6777}
6778
6779    static fptr_T
6780do_Lower(d, c)
6781    int		*d;
6782    int		c;
6783{
6784    *d = MB_TOLOWER(c);
6785
6786    return (fptr_T)do_Lower;
6787}
6788
6789/*
6790 * regtilde(): Replace tildes in the pattern by the old pattern.
6791 *
6792 * Short explanation of the tilde: It stands for the previous replacement
6793 * pattern.  If that previous pattern also contains a ~ we should go back a
6794 * step further...  But we insert the previous pattern into the current one
6795 * and remember that.
6796 * This still does not handle the case where "magic" changes.  So require the
6797 * user to keep his hands off of "magic".
6798 *
6799 * The tildes are parsed once before the first call to vim_regsub().
6800 */
6801    char_u *
6802regtilde(source, magic)
6803    char_u	*source;
6804    int		magic;
6805{
6806    char_u	*newsub = source;
6807    char_u	*tmpsub;
6808    char_u	*p;
6809    int		len;
6810    int		prevlen;
6811
6812    for (p = newsub; *p; ++p)
6813    {
6814	if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
6815	{
6816	    if (reg_prev_sub != NULL)
6817	    {
6818		/* length = len(newsub) - 1 + len(prev_sub) + 1 */
6819		prevlen = (int)STRLEN(reg_prev_sub);
6820		tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen));
6821		if (tmpsub != NULL)
6822		{
6823		    /* copy prefix */
6824		    len = (int)(p - newsub);	/* not including ~ */
6825		    mch_memmove(tmpsub, newsub, (size_t)len);
6826		    /* interpret tilde */
6827		    mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
6828		    /* copy postfix */
6829		    if (!magic)
6830			++p;			/* back off \ */
6831		    STRCPY(tmpsub + len + prevlen, p + 1);
6832
6833		    if (newsub != source)	/* already allocated newsub */
6834			vim_free(newsub);
6835		    newsub = tmpsub;
6836		    p = newsub + len + prevlen;
6837		}
6838	    }
6839	    else if (magic)
6840		STRMOVE(p, p + 1);	/* remove '~' */
6841	    else
6842		STRMOVE(p, p + 2);	/* remove '\~' */
6843	    --p;
6844	}
6845	else
6846	{
6847	    if (*p == '\\' && p[1])		/* skip escaped characters */
6848		++p;
6849#ifdef FEAT_MBYTE
6850	    if (has_mbyte)
6851		p += (*mb_ptr2len)(p) - 1;
6852#endif
6853	}
6854    }
6855
6856    vim_free(reg_prev_sub);
6857    if (newsub != source)	/* newsub was allocated, just keep it */
6858	reg_prev_sub = newsub;
6859    else			/* no ~ found, need to save newsub  */
6860	reg_prev_sub = vim_strsave(newsub);
6861    return newsub;
6862}
6863
6864#ifdef FEAT_EVAL
6865static int can_f_submatch = FALSE;	/* TRUE when submatch() can be used */
6866
6867/* These pointers are used instead of reg_match and reg_mmatch for
6868 * reg_submatch().  Needed for when the substitution string is an expression
6869 * that contains a call to substitute() and submatch(). */
6870static regmatch_T	*submatch_match;
6871static regmmatch_T	*submatch_mmatch;
6872static linenr_T		submatch_firstlnum;
6873static linenr_T		submatch_maxline;
6874#endif
6875
6876#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) || defined(PROTO)
6877/*
6878 * vim_regsub() - perform substitutions after a vim_regexec() or
6879 * vim_regexec_multi() match.
6880 *
6881 * If "copy" is TRUE really copy into "dest".
6882 * If "copy" is FALSE nothing is copied, this is just to find out the length
6883 * of the result.
6884 *
6885 * If "backslash" is TRUE, a backslash will be removed later, need to double
6886 * them to keep them, and insert a backslash before a CR to avoid it being
6887 * replaced with a line break later.
6888 *
6889 * Note: The matched text must not change between the call of
6890 * vim_regexec()/vim_regexec_multi() and vim_regsub()!  It would make the back
6891 * references invalid!
6892 *
6893 * Returns the size of the replacement, including terminating NUL.
6894 */
6895    int
6896vim_regsub(rmp, source, dest, copy, magic, backslash)
6897    regmatch_T	*rmp;
6898    char_u	*source;
6899    char_u	*dest;
6900    int		copy;
6901    int		magic;
6902    int		backslash;
6903{
6904    reg_match = rmp;
6905    reg_mmatch = NULL;
6906    reg_maxline = 0;
6907    return vim_regsub_both(source, dest, copy, magic, backslash);
6908}
6909#endif
6910
6911    int
6912vim_regsub_multi(rmp, lnum, source, dest, copy, magic, backslash)
6913    regmmatch_T	*rmp;
6914    linenr_T	lnum;
6915    char_u	*source;
6916    char_u	*dest;
6917    int		copy;
6918    int		magic;
6919    int		backslash;
6920{
6921    reg_match = NULL;
6922    reg_mmatch = rmp;
6923    reg_buf = curbuf;		/* always works on the current buffer! */
6924    reg_firstlnum = lnum;
6925    reg_maxline = curbuf->b_ml.ml_line_count - lnum;
6926    return vim_regsub_both(source, dest, copy, magic, backslash);
6927}
6928
6929    static int
6930vim_regsub_both(source, dest, copy, magic, backslash)
6931    char_u	*source;
6932    char_u	*dest;
6933    int		copy;
6934    int		magic;
6935    int		backslash;
6936{
6937    char_u	*src;
6938    char_u	*dst;
6939    char_u	*s;
6940    int		c;
6941    int		cc;
6942    int		no = -1;
6943    fptr_T	func = (fptr_T)NULL;
6944    linenr_T	clnum = 0;	/* init for GCC */
6945    int		len = 0;	/* init for GCC */
6946#ifdef FEAT_EVAL
6947    static char_u *eval_result = NULL;
6948#endif
6949
6950    /* Be paranoid... */
6951    if (source == NULL || dest == NULL)
6952    {
6953	EMSG(_(e_null));
6954	return 0;
6955    }
6956    if (prog_magic_wrong())
6957	return 0;
6958    src = source;
6959    dst = dest;
6960
6961    /*
6962     * When the substitute part starts with "\=" evaluate it as an expression.
6963     */
6964    if (source[0] == '\\' && source[1] == '='
6965#ifdef FEAT_EVAL
6966	    && !can_f_submatch	    /* can't do this recursively */
6967#endif
6968	    )
6969    {
6970#ifdef FEAT_EVAL
6971	/* To make sure that the length doesn't change between checking the
6972	 * length and copying the string, and to speed up things, the
6973	 * resulting string is saved from the call with "copy" == FALSE to the
6974	 * call with "copy" == TRUE. */
6975	if (copy)
6976	{
6977	    if (eval_result != NULL)
6978	    {
6979		STRCPY(dest, eval_result);
6980		dst += STRLEN(eval_result);
6981		vim_free(eval_result);
6982		eval_result = NULL;
6983	    }
6984	}
6985	else
6986	{
6987	    win_T	*save_reg_win;
6988	    int		save_ireg_ic;
6989
6990	    vim_free(eval_result);
6991
6992	    /* The expression may contain substitute(), which calls us
6993	     * recursively.  Make sure submatch() gets the text from the first
6994	     * level.  Don't need to save "reg_buf", because
6995	     * vim_regexec_multi() can't be called recursively. */
6996	    submatch_match = reg_match;
6997	    submatch_mmatch = reg_mmatch;
6998	    submatch_firstlnum = reg_firstlnum;
6999	    submatch_maxline = reg_maxline;
7000	    save_reg_win = reg_win;
7001	    save_ireg_ic = ireg_ic;
7002	    can_f_submatch = TRUE;
7003
7004	    eval_result = eval_to_string(source + 2, NULL, TRUE);
7005	    if (eval_result != NULL)
7006	    {
7007		int had_backslash = FALSE;
7008
7009		for (s = eval_result; *s != NUL; mb_ptr_adv(s))
7010		{
7011		    /* Change NL to CR, so that it becomes a line break.
7012		     * Skip over a backslashed character. */
7013		    if (*s == NL)
7014			*s = CAR;
7015		    else if (*s == '\\' && s[1] != NUL)
7016		    {
7017			++s;
7018			/* Change NL to CR here too, so that this works:
7019			 * :s/abc\\\ndef/\="aaa\\\nbbb"/  on text:
7020			 *   abc\
7021			 *   def
7022			 */
7023			if (*s == NL)
7024			    *s = CAR;
7025			had_backslash = TRUE;
7026		    }
7027		}
7028		if (had_backslash && backslash)
7029		{
7030		    /* Backslashes will be consumed, need to double them. */
7031		    s = vim_strsave_escaped(eval_result, (char_u *)"\\");
7032		    if (s != NULL)
7033		    {
7034			vim_free(eval_result);
7035			eval_result = s;
7036		    }
7037		}
7038
7039		dst += STRLEN(eval_result);
7040	    }
7041
7042	    reg_match = submatch_match;
7043	    reg_mmatch = submatch_mmatch;
7044	    reg_firstlnum = submatch_firstlnum;
7045	    reg_maxline = submatch_maxline;
7046	    reg_win = save_reg_win;
7047	    ireg_ic = save_ireg_ic;
7048	    can_f_submatch = FALSE;
7049	}
7050#endif
7051    }
7052    else
7053      while ((c = *src++) != NUL)
7054      {
7055	if (c == '&' && magic)
7056	    no = 0;
7057	else if (c == '\\' && *src != NUL)
7058	{
7059	    if (*src == '&' && !magic)
7060	    {
7061		++src;
7062		no = 0;
7063	    }
7064	    else if ('0' <= *src && *src <= '9')
7065	    {
7066		no = *src++ - '0';
7067	    }
7068	    else if (vim_strchr((char_u *)"uUlLeE", *src))
7069	    {
7070		switch (*src++)
7071		{
7072		case 'u':   func = (fptr_T)do_upper;
7073			    continue;
7074		case 'U':   func = (fptr_T)do_Upper;
7075			    continue;
7076		case 'l':   func = (fptr_T)do_lower;
7077			    continue;
7078		case 'L':   func = (fptr_T)do_Lower;
7079			    continue;
7080		case 'e':
7081		case 'E':   func = (fptr_T)NULL;
7082			    continue;
7083		}
7084	    }
7085	}
7086	if (no < 0)	      /* Ordinary character. */
7087	{
7088	    if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
7089	    {
7090		/* Copy a special key as-is. */
7091		if (copy)
7092		{
7093		    *dst++ = c;
7094		    *dst++ = *src++;
7095		    *dst++ = *src++;
7096		}
7097		else
7098		{
7099		    dst += 3;
7100		    src += 2;
7101		}
7102		continue;
7103	    }
7104
7105	    if (c == '\\' && *src != NUL)
7106	    {
7107		/* Check for abbreviations -- webb */
7108		switch (*src)
7109		{
7110		    case 'r':	c = CAR;	++src;	break;
7111		    case 'n':	c = NL;		++src;	break;
7112		    case 't':	c = TAB;	++src;	break;
7113		 /* Oh no!  \e already has meaning in subst pat :-( */
7114		 /* case 'e':   c = ESC;	++src;	break; */
7115		    case 'b':	c = Ctrl_H;	++src;	break;
7116
7117		    /* If "backslash" is TRUE the backslash will be removed
7118		     * later.  Used to insert a literal CR. */
7119		    default:	if (backslash)
7120				{
7121				    if (copy)
7122					*dst = '\\';
7123				    ++dst;
7124				}
7125				c = *src++;
7126		}
7127	    }
7128#ifdef FEAT_MBYTE
7129	    else if (has_mbyte)
7130		c = mb_ptr2char(src - 1);
7131#endif
7132
7133	    /* Write to buffer, if copy is set. */
7134	    if (func == (fptr_T)NULL)	/* just copy */
7135		cc = c;
7136	    else
7137		/* Turbo C complains without the typecast */
7138		func = (fptr_T)(func(&cc, c));
7139
7140#ifdef FEAT_MBYTE
7141	    if (has_mbyte)
7142	    {
7143		int totlen = mb_ptr2len(src - 1);
7144
7145		if (copy)
7146		    mb_char2bytes(cc, dst);
7147		dst += mb_char2len(cc) - 1;
7148		if (enc_utf8)
7149		{
7150		    int clen = utf_ptr2len(src - 1);
7151
7152		    /* If the character length is shorter than "totlen", there
7153		     * are composing characters; copy them as-is. */
7154		    if (clen < totlen)
7155		    {
7156			if (copy)
7157			    mch_memmove(dst + 1, src - 1 + clen,
7158						     (size_t)(totlen - clen));
7159			dst += totlen - clen;
7160		    }
7161		}
7162		src += totlen - 1;
7163	    }
7164	    else
7165#endif
7166		if (copy)
7167		    *dst = cc;
7168	    dst++;
7169	}
7170	else
7171	{
7172	    if (REG_MULTI)
7173	    {
7174		clnum = reg_mmatch->startpos[no].lnum;
7175		if (clnum < 0 || reg_mmatch->endpos[no].lnum < 0)
7176		    s = NULL;
7177		else
7178		{
7179		    s = reg_getline(clnum) + reg_mmatch->startpos[no].col;
7180		    if (reg_mmatch->endpos[no].lnum == clnum)
7181			len = reg_mmatch->endpos[no].col
7182					       - reg_mmatch->startpos[no].col;
7183		    else
7184			len = (int)STRLEN(s);
7185		}
7186	    }
7187	    else
7188	    {
7189		s = reg_match->startp[no];
7190		if (reg_match->endp[no] == NULL)
7191		    s = NULL;
7192		else
7193		    len = (int)(reg_match->endp[no] - s);
7194	    }
7195	    if (s != NULL)
7196	    {
7197		for (;;)
7198		{
7199		    if (len == 0)
7200		    {
7201			if (REG_MULTI)
7202			{
7203			    if (reg_mmatch->endpos[no].lnum == clnum)
7204				break;
7205			    if (copy)
7206				*dst = CAR;
7207			    ++dst;
7208			    s = reg_getline(++clnum);
7209			    if (reg_mmatch->endpos[no].lnum == clnum)
7210				len = reg_mmatch->endpos[no].col;
7211			    else
7212				len = (int)STRLEN(s);
7213			}
7214			else
7215			    break;
7216		    }
7217		    else if (*s == NUL) /* we hit NUL. */
7218		    {
7219			if (copy)
7220			    EMSG(_(e_re_damg));
7221			goto exit;
7222		    }
7223		    else
7224		    {
7225			if (backslash && (*s == CAR || *s == '\\'))
7226			{
7227			    /*
7228			     * Insert a backslash in front of a CR, otherwise
7229			     * it will be replaced by a line break.
7230			     * Number of backslashes will be halved later,
7231			     * double them here.
7232			     */
7233			    if (copy)
7234			    {
7235				dst[0] = '\\';
7236				dst[1] = *s;
7237			    }
7238			    dst += 2;
7239			}
7240			else
7241			{
7242#ifdef FEAT_MBYTE
7243			    if (has_mbyte)
7244				c = mb_ptr2char(s);
7245			    else
7246#endif
7247				c = *s;
7248
7249			    if (func == (fptr_T)NULL)	/* just copy */
7250				cc = c;
7251			    else
7252				/* Turbo C complains without the typecast */
7253				func = (fptr_T)(func(&cc, c));
7254
7255#ifdef FEAT_MBYTE
7256			    if (has_mbyte)
7257			    {
7258				int l;
7259
7260				/* Copy composing characters separately, one
7261				 * at a time. */
7262				if (enc_utf8)
7263				    l = utf_ptr2len(s) - 1;
7264				else
7265				    l = mb_ptr2len(s) - 1;
7266
7267				s += l;
7268				len -= l;
7269				if (copy)
7270				    mb_char2bytes(cc, dst);
7271				dst += mb_char2len(cc) - 1;
7272			    }
7273			    else
7274#endif
7275				if (copy)
7276				    *dst = cc;
7277			    dst++;
7278			}
7279
7280			++s;
7281			--len;
7282		    }
7283		}
7284	    }
7285	    no = -1;
7286	}
7287      }
7288    if (copy)
7289	*dst = NUL;
7290
7291exit:
7292    return (int)((dst - dest) + 1);
7293}
7294
7295#ifdef FEAT_EVAL
7296static char_u *reg_getline_submatch __ARGS((linenr_T lnum));
7297
7298/*
7299 * Call reg_getline() with the line numbers from the submatch.  If a
7300 * substitute() was used the reg_maxline and other values have been
7301 * overwritten.
7302 */
7303    static char_u *
7304reg_getline_submatch(lnum)
7305    linenr_T	lnum;
7306{
7307    char_u *s;
7308    linenr_T save_first = reg_firstlnum;
7309    linenr_T save_max = reg_maxline;
7310
7311    reg_firstlnum = submatch_firstlnum;
7312    reg_maxline = submatch_maxline;
7313
7314    s = reg_getline(lnum);
7315
7316    reg_firstlnum = save_first;
7317    reg_maxline = save_max;
7318    return s;
7319}
7320
7321/*
7322 * Used for the submatch() function: get the string from the n'th submatch in
7323 * allocated memory.
7324 * Returns NULL when not in a ":s" command and for a non-existing submatch.
7325 */
7326    char_u *
7327reg_submatch(no)
7328    int		no;
7329{
7330    char_u	*retval = NULL;
7331    char_u	*s;
7332    int		len;
7333    int		round;
7334    linenr_T	lnum;
7335
7336    if (!can_f_submatch || no < 0)
7337	return NULL;
7338
7339    if (submatch_match == NULL)
7340    {
7341	/*
7342	 * First round: compute the length and allocate memory.
7343	 * Second round: copy the text.
7344	 */
7345	for (round = 1; round <= 2; ++round)
7346	{
7347	    lnum = submatch_mmatch->startpos[no].lnum;
7348	    if (lnum < 0 || submatch_mmatch->endpos[no].lnum < 0)
7349		return NULL;
7350
7351	    s = reg_getline_submatch(lnum) + submatch_mmatch->startpos[no].col;
7352	    if (s == NULL)  /* anti-crash check, cannot happen? */
7353		break;
7354	    if (submatch_mmatch->endpos[no].lnum == lnum)
7355	    {
7356		/* Within one line: take form start to end col. */
7357		len = submatch_mmatch->endpos[no].col
7358					  - submatch_mmatch->startpos[no].col;
7359		if (round == 2)
7360		    vim_strncpy(retval, s, len);
7361		++len;
7362	    }
7363	    else
7364	    {
7365		/* Multiple lines: take start line from start col, middle
7366		 * lines completely and end line up to end col. */
7367		len = (int)STRLEN(s);
7368		if (round == 2)
7369		{
7370		    STRCPY(retval, s);
7371		    retval[len] = '\n';
7372		}
7373		++len;
7374		++lnum;
7375		while (lnum < submatch_mmatch->endpos[no].lnum)
7376		{
7377		    s = reg_getline_submatch(lnum++);
7378		    if (round == 2)
7379			STRCPY(retval + len, s);
7380		    len += (int)STRLEN(s);
7381		    if (round == 2)
7382			retval[len] = '\n';
7383		    ++len;
7384		}
7385		if (round == 2)
7386		    STRNCPY(retval + len, reg_getline_submatch(lnum),
7387					     submatch_mmatch->endpos[no].col);
7388		len += submatch_mmatch->endpos[no].col;
7389		if (round == 2)
7390		    retval[len] = NUL;
7391		++len;
7392	    }
7393
7394	    if (retval == NULL)
7395	    {
7396		retval = lalloc((long_u)len, TRUE);
7397		if (retval == NULL)
7398		    return NULL;
7399	    }
7400	}
7401    }
7402    else
7403    {
7404	s = submatch_match->startp[no];
7405	if (s == NULL || submatch_match->endp[no] == NULL)
7406	    retval = NULL;
7407	else
7408	    retval = vim_strnsave(s, (int)(submatch_match->endp[no] - s));
7409    }
7410
7411    return retval;
7412}
7413#endif
7414