search.c revision 191930
1/* $FreeBSD: head/contrib/less/search.c 191930 2009-05-09 01:35:27Z delphij $ */
2/*
3 * Copyright (C) 1984-2008  Mark Nudelman
4 *
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Less License, as specified in the README file.
7 *
8 * For more information about less, or for information on how to
9 * contact the author, see the README file.
10 */
11
12
13/*
14 * Routines to search a file for a pattern.
15 */
16
17#include "less.h"
18#include "position.h"
19#include "charset.h"
20
21#define	MINPOS(a,b)	(((a) < (b)) ? (a) : (b))
22#define	MAXPOS(a,b)	(((a) > (b)) ? (a) : (b))
23
24#if HAVE_POSIX_REGCOMP
25#include <regex.h>
26#ifdef REG_EXTENDED
27#define	REGCOMP_FLAG	(less_is_more ? 0 : REG_EXTENDED)
28#else
29#define	REGCOMP_FLAG	0
30#endif
31#endif
32#if HAVE_PCRE
33#include <pcre.h>
34#endif
35#if HAVE_RE_COMP
36char *re_comp();
37int re_exec();
38#endif
39#if HAVE_REGCMP
40char *regcmp();
41char *regex();
42extern char *__loc1;
43#endif
44#if HAVE_V8_REGCOMP
45#include "regexp.h"
46#endif
47
48static int match();
49
50extern int sigs;
51extern int how_search;
52extern int caseless;
53extern int linenums;
54extern int sc_height;
55extern int jump_sline;
56extern int bs_mode;
57extern int less_is_more;
58extern int ctldisp;
59extern int status_col;
60extern void * constant ml_search;
61extern POSITION start_attnpos;
62extern POSITION end_attnpos;
63extern int utf_mode;
64extern int screen_trashed;
65#if HILITE_SEARCH
66extern int hilite_search;
67extern int size_linebuf;
68extern int squished;
69extern int can_goto_line;
70static int hide_hilite;
71static int oldbot;
72static POSITION prep_startpos;
73static POSITION prep_endpos;
74
75struct hilite
76{
77	struct hilite *hl_next;
78	POSITION hl_startpos;
79	POSITION hl_endpos;
80};
81static struct hilite hilite_anchor = { NULL, NULL_POSITION, NULL_POSITION };
82static struct hilite filter_anchor = { NULL, NULL_POSITION, NULL_POSITION };
83#define	hl_first	hl_next
84#endif
85
86/*
87 * These are the static variables that represent the "remembered"
88 * search pattern.
89 */
90#if HAVE_POSIX_REGCOMP
91#define DEFINE_PATTERN(name)  static regex_t *name = NULL
92#endif
93#if HAVE_PCRE
94#define DEFINE_PATTERN(name)  pcre *name = NULL;
95#endif
96#if HAVE_RE_COMP
97#define DEFINE_PATTERN(name)  int name = 0;
98#endif
99#if HAVE_REGCMP
100#define DEFINE_PATTERN(name)  static char *name = NULL;
101#endif
102#if HAVE_V8_REGCOMP
103#define DEFINE_PATTERN(name)  static struct regexp *name = NULL;
104#endif
105
106DEFINE_PATTERN(search_pattern);
107DEFINE_PATTERN(filter_pattern);
108
109static int is_caseless;
110static int is_ucase_pattern;
111static int last_search_type;
112static int last_filter_type;
113static char *last_pattern = NULL;
114
115#define	CVT_TO_LC	01	/* Convert upper-case to lower-case */
116#define	CVT_BS		02	/* Do backspace processing */
117#define	CVT_CRLF	04	/* Remove CR after LF */
118#define	CVT_ANSI	010	/* Remove ANSI escape sequences */
119
120/*
121 * Get the length of a buffer needed to convert a string.
122 */
123	static int
124cvt_length(len, ops)
125	int len;
126	int ops;
127{
128	if (utf_mode)
129		/*
130		 * Just copying a string in UTF-8 mode can cause it to grow
131		 * in length.
132		 * Six output bytes for one input byte is the worst case
133		 * (and unfortunately is far more than is needed in any
134		 * non-pathological situation, so this is very wasteful).
135		 */
136		len *= 6;
137	return len + 1;
138}
139
140/*
141 * Convert text.  Perform the transformations specified by ops.
142 */
143	static void
144cvt_text(odst, osrc, lenp, ops)
145	char *odst;
146	char *osrc;
147	int *lenp;
148	int ops;
149{
150	char *dst;
151	char *src;
152	register char *src_end;
153	LWCHAR ch;
154
155	if (lenp != NULL)
156		src_end = osrc + *lenp;
157	else
158		src_end = osrc + strlen(osrc);
159
160	for (src = osrc, dst = odst;  src < src_end;  )
161	{
162		ch = step_char(&src, +1, src_end);
163		if ((ops & CVT_TO_LC) && IS_UPPER(ch))
164		{
165			/* Convert uppercase to lowercase. */
166			put_wchar(&dst, TO_LOWER(ch));
167		} else if ((ops & CVT_BS) && ch == '\b' && dst > odst)
168		{
169			/* Delete backspace and preceding char. */
170			do {
171				dst--;
172			} while (dst > odst &&
173				!IS_ASCII_OCTET(*dst) && !IS_UTF8_LEAD(*dst));
174		} else if ((ops & CVT_ANSI) && IS_CSI_START(ch))
175		{
176			/* Skip to end of ANSI escape sequence. */
177			src++;  /* skip the CSI start char */
178			while (src < src_end)
179				if (!is_ansi_middle(*src++))
180					break;
181		} else
182			/* Just copy. */
183			put_wchar(&dst, ch);
184	}
185	if ((ops & CVT_CRLF) && dst > odst && dst[-1] == '\r')
186		dst--;
187	*dst = '\0';
188	if (lenp != NULL)
189		*lenp = dst - odst;
190}
191
192/*
193 * Determine which conversions to perform.
194 */
195	static int
196get_cvt_ops()
197{
198	int ops = 0;
199	if (is_caseless || bs_mode == BS_SPECIAL)
200	{
201		if (is_caseless)
202			ops |= CVT_TO_LC;
203		if (bs_mode == BS_SPECIAL)
204			ops |= CVT_BS;
205		if (bs_mode != BS_CONTROL)
206			ops |= CVT_CRLF;
207	} else if (bs_mode != BS_CONTROL)
208	{
209		ops |= CVT_CRLF;
210	}
211	if (ctldisp == OPT_ONPLUS)
212		ops |= CVT_ANSI;
213	return (ops);
214}
215
216/*
217 * Are there any uppercase letters in this string?
218 */
219	static int
220is_ucase(str)
221	char *str;
222{
223	char *str_end = str + strlen(str);
224	LWCHAR ch;
225
226	while (str < str_end)
227	{
228		ch = step_char(&str, +1, str_end);
229		if (IS_UPPER(ch))
230			return (1);
231	}
232	return (0);
233}
234
235/*
236 * Is there a previous (remembered) search pattern?
237 */
238	static int
239prev_pattern()
240{
241	if (last_search_type & SRCH_NO_REGEX)
242		return (last_pattern != NULL);
243#if HAVE_POSIX_REGCOMP
244	return (search_pattern != NULL);
245#endif
246#if HAVE_PCRE
247	return (search_pattern != NULL);
248#endif
249#if HAVE_RE_COMP
250	return (search_pattern != 0);
251#endif
252#if HAVE_REGCMP
253	return (search_pattern != NULL);
254#endif
255#if HAVE_V8_REGCOMP
256	return (search_pattern != NULL);
257#endif
258#if NO_REGEX
259	return (search_pattern != NULL);
260#endif
261}
262
263#if HILITE_SEARCH
264/*
265 * Repaint the hilites currently displayed on the screen.
266 * Repaint each line which contains highlighted text.
267 * If on==0, force all hilites off.
268 */
269	public void
270repaint_hilite(on)
271	int on;
272{
273	int slinenum;
274	POSITION pos;
275	POSITION epos;
276	int save_hide_hilite;
277
278	if (squished)
279		repaint();
280
281	save_hide_hilite = hide_hilite;
282	if (!on)
283	{
284		if (hide_hilite)
285			return;
286		hide_hilite = 1;
287	}
288
289	if (!can_goto_line)
290	{
291		repaint();
292		hide_hilite = save_hide_hilite;
293		return;
294	}
295
296	for (slinenum = TOP;  slinenum < TOP + sc_height-1;  slinenum++)
297	{
298		pos = position(slinenum);
299		if (pos == NULL_POSITION)
300			continue;
301		epos = position(slinenum+1);
302#if 0
303		/*
304		 * If any character in the line is highlighted,
305		 * repaint the line.
306		 *
307		 * {{ This doesn't work -- if line is drawn with highlights
308		 * which should be erased (e.g. toggle -i with status column),
309		 * we must redraw the line even if it has no highlights.
310		 * For now, just repaint every line. }}
311		 */
312		if (is_hilited(pos, epos, 1, NULL))
313#endif
314		{
315			(void) forw_line(pos);
316			goto_line(slinenum);
317			put_line();
318		}
319	}
320	if (!oldbot)
321		lower_left();
322	hide_hilite = save_hide_hilite;
323}
324
325/*
326 * Clear the attn hilite.
327 */
328	public void
329clear_attn()
330{
331	int slinenum;
332	POSITION old_start_attnpos;
333	POSITION old_end_attnpos;
334	POSITION pos;
335	POSITION epos;
336	int moved = 0;
337
338	if (start_attnpos == NULL_POSITION)
339		return;
340	old_start_attnpos = start_attnpos;
341	old_end_attnpos = end_attnpos;
342	start_attnpos = end_attnpos = NULL_POSITION;
343
344	if (!can_goto_line)
345	{
346		repaint();
347		return;
348	}
349	if (squished)
350		repaint();
351
352	for (slinenum = TOP;  slinenum < TOP + sc_height-1;  slinenum++)
353	{
354		pos = position(slinenum);
355		if (pos == NULL_POSITION)
356			continue;
357		epos = position(slinenum+1);
358		if (pos < old_end_attnpos &&
359		     (epos == NULL_POSITION || epos > old_start_attnpos))
360		{
361			(void) forw_line(pos);
362			goto_line(slinenum);
363			put_line();
364			moved = 1;
365		}
366	}
367	if (moved)
368		lower_left();
369}
370#endif
371
372/*
373 * Hide search string highlighting.
374 */
375	public void
376undo_search()
377{
378	if (!prev_pattern())
379	{
380		error("No previous regular expression", NULL_PARG);
381		return;
382	}
383#if HILITE_SEARCH
384	hide_hilite = !hide_hilite;
385	repaint_hilite(1);
386#endif
387}
388
389/*
390 * Compile a search pattern, for future use by match_pattern.
391 */
392	static int
393compile_pattern2(pattern, search_type, comp_pattern)
394	char *pattern;
395	int search_type;
396	void **comp_pattern;
397{
398	if ((search_type & SRCH_NO_REGEX) == 0)
399	{
400#if HAVE_POSIX_REGCOMP
401		regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t));
402		regex_t **pcomp = (regex_t **) comp_pattern;
403		if (regcomp(comp, pattern, REGCOMP_FLAG))
404		{
405			free(comp);
406			error("Invalid pattern", NULL_PARG);
407			return (-1);
408		}
409		if (*pcomp != NULL)
410			regfree(*pcomp);
411		*pcomp = comp;
412#endif
413#if HAVE_PCRE
414		pcre *comp;
415		pcre **pcomp = (pcre **) comp_pattern;
416		const char *errstring;
417		int erroffset;
418		PARG parg;
419		comp = pcre_compile(pattern, 0,
420				&errstring, &erroffset, NULL);
421		if (comp == NULL)
422		{
423			parg.p_string = (char *) errstring;
424			error("%s", &parg);
425			return (-1);
426		}
427		*pcomp = comp;
428#endif
429#if HAVE_RE_COMP
430		PARG parg;
431		int *pcomp = (int *) comp_pattern;
432		if ((parg.p_string = re_comp(pattern)) != NULL)
433		{
434			error("%s", &parg);
435			return (-1);
436		}
437		*pcomp = 1;
438#endif
439#if HAVE_REGCMP
440		char *comp;
441		char **pcomp = (char **) comp_pattern;
442		if ((comp = regcmp(pattern, 0)) == NULL)
443		{
444			error("Invalid pattern", NULL_PARG);
445			return (-1);
446		}
447		if (pcomp != NULL)
448			free(*pcomp);
449		*pcomp = comp;
450#endif
451#if HAVE_V8_REGCOMP
452		struct regexp *comp;
453		struct regexp **pcomp = (struct regexp **) comp_pattern;
454		if ((comp = regcomp(pattern)) == NULL)
455		{
456			/*
457			 * regcomp has already printed an error message
458			 * via regerror().
459			 */
460			return (-1);
461		}
462		if (*pcomp != NULL)
463			free(*pcomp);
464		*pcomp = comp;
465#endif
466	}
467
468	if (comp_pattern == (void **) &search_pattern)
469	{
470		if (last_pattern != NULL)
471			free(last_pattern);
472		last_pattern = (char *) calloc(1, strlen(pattern)+1);
473		if (last_pattern != NULL)
474			strcpy(last_pattern, pattern);
475		last_search_type = search_type;
476	} else
477	{
478		last_filter_type = search_type;
479	}
480	return (0);
481}
482
483/*
484 * Like compile_pattern2, but convert the pattern to lowercase if necessary.
485 */
486	static int
487compile_pattern(pattern, search_type, comp_pattern)
488	char *pattern;
489	int search_type;
490	void **comp_pattern;
491{
492	char *cvt_pattern;
493	int result;
494
495	if (caseless != OPT_ONPLUS)
496		cvt_pattern = pattern;
497	else
498	{
499		cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
500		cvt_text(cvt_pattern, pattern, (int *)NULL, CVT_TO_LC);
501	}
502	result = compile_pattern2(cvt_pattern, search_type, comp_pattern);
503	if (cvt_pattern != pattern)
504		free(cvt_pattern);
505	return (result);
506}
507
508/*
509 * Forget that we have a compiled pattern.
510 */
511	static void
512uncompile_pattern(pattern)
513	void **pattern;
514{
515#if HAVE_POSIX_REGCOMP
516	regex_t **pcomp = (regex_t **) pattern;
517	if (*pcomp != NULL)
518		regfree(*pcomp);
519	*pcomp = NULL;
520#endif
521#if HAVE_PCRE
522	pcre **pcomp = (pcre **) pattern;
523	if (*pcomp != NULL)
524		pcre_free(*pcomp);
525	*pcomp = NULL;
526#endif
527#if HAVE_RE_COMP
528	int *pcomp = (int *) pattern;
529	*pcomp = 0;
530#endif
531#if HAVE_REGCMP
532	char **pcomp = (char **) pattern;
533	if (*pcomp != NULL)
534		free(*pcomp);
535	*pcomp = NULL;
536#endif
537#if HAVE_V8_REGCOMP
538	struct regexp **pcomp = (struct regexp **) pattern;
539	if (*pcomp != NULL)
540		free(*pcomp);
541	*pcomp = NULL;
542#endif
543}
544
545	static void
546uncompile_search_pattern()
547{
548	uncompile_pattern(&search_pattern);
549	last_pattern = NULL;
550}
551
552	static void
553uncompile_filter_pattern()
554{
555	uncompile_pattern(&filter_pattern);
556}
557
558/*
559 * Is a compiled pattern null?
560 */
561	static int
562is_null_pattern(pattern)
563	void *pattern;
564{
565#if HAVE_POSIX_REGCOMP
566	return (pattern == NULL);
567#endif
568#if HAVE_PCRE
569	return (pattern == NULL);
570#endif
571#if HAVE_RE_COMP
572	return (pattern == 0);
573#endif
574#if HAVE_REGCMP
575	return (pattern == NULL);
576#endif
577#if HAVE_V8_REGCOMP
578	return (pattern == NULL);
579#endif
580}
581
582/*
583 * Perform a pattern match with the previously compiled pattern.
584 * Set sp and ep to the start and end of the matched string.
585 */
586	static int
587match_pattern(pattern, line, line_len, sp, ep, notbol, search_type)
588	void *pattern;
589	char *line;
590	int line_len;
591	char **sp;
592	char **ep;
593	int notbol;
594	int search_type;
595{
596	int matched;
597#if HAVE_POSIX_REGCOMP
598	regex_t *spattern = (regex_t *) pattern;
599#endif
600#if HAVE_PCRE
601	pcre *spattern = (pcre *) pattern;
602#endif
603#if HAVE_RE_COMP
604	int spattern = (int) pattern;
605#endif
606#if HAVE_REGCMP
607	char *spattern = (char *) pattern;
608#endif
609#if HAVE_V8_REGCOMP
610	struct regexp *spattern = (struct regexp *) pattern;
611#endif
612
613	if (search_type & SRCH_NO_REGEX)
614		return (match(last_pattern, strlen(last_pattern), line, line_len, sp, ep));
615
616#if HAVE_POSIX_REGCOMP
617	{
618		regmatch_t rm;
619		int flags = (notbol) ? REG_NOTBOL : 0;
620		matched = !regexec(spattern, line, 1, &rm, flags);
621		if (matched)
622		{
623#ifndef __WATCOMC__
624			*sp = line + rm.rm_so;
625			*ep = line + rm.rm_eo;
626#else
627			*sp = rm.rm_sp;
628			*ep = rm.rm_ep;
629#endif
630		}
631	}
632#endif
633#if HAVE_PCRE
634	{
635		int flags = (notbol) ? PCRE_NOTBOL : 0;
636		int ovector[3];
637		matched = pcre_exec(spattern, NULL, line, line_len,
638			0, flags, ovector, 3) >= 0;
639		if (matched)
640		{
641			*sp = line + ovector[0];
642			*ep = line + ovector[1];
643		}
644	}
645#endif
646#if HAVE_RE_COMP
647	matched = (re_exec(line) == 1);
648	/*
649	 * re_exec doesn't seem to provide a way to get the matched string.
650	 */
651	*sp = *ep = NULL;
652#endif
653#if HAVE_REGCMP
654	*ep = regex(spattern, line);
655	matched = (*ep != NULL);
656	if (matched)
657		*sp = __loc1;
658#endif
659#if HAVE_V8_REGCOMP
660#if HAVE_REGEXEC2
661	matched = regexec2(spattern, line, notbol);
662#else
663	matched = regexec(spattern, line);
664#endif
665	if (matched)
666	{
667		*sp = spattern->startp[0];
668		*ep = spattern->endp[0];
669	}
670#endif
671#if NO_REGEX
672	matched = match(last_pattern, strlen(last_pattern), line, line_len, sp, ep);
673#endif
674	matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
675			((search_type & SRCH_NO_MATCH) && !matched);
676	return (matched);
677}
678
679#if HILITE_SEARCH
680/*
681 * Clear the hilite list.
682 */
683	public void
684clr_hlist(anchor)
685	struct hilite *anchor;
686{
687	struct hilite *hl;
688	struct hilite *nexthl;
689
690	for (hl = anchor->hl_first;  hl != NULL;  hl = nexthl)
691	{
692		nexthl = hl->hl_next;
693		free((void*)hl);
694	}
695	anchor->hl_first = NULL;
696	prep_startpos = prep_endpos = NULL_POSITION;
697}
698
699	public void
700clr_hilite()
701{
702	clr_hlist(&hilite_anchor);
703}
704
705	public void
706clr_filter()
707{
708	clr_hlist(&filter_anchor);
709}
710
711/*
712 * Should any characters in a specified range be highlighted?
713 */
714	static int
715is_hilited_range(pos, epos)
716	POSITION pos;
717	POSITION epos;
718{
719	struct hilite *hl;
720
721	/*
722	 * Look at each highlight and see if any part of it falls in the range.
723	 */
724	for (hl = hilite_anchor.hl_first;  hl != NULL;  hl = hl->hl_next)
725	{
726		if (hl->hl_endpos > pos &&
727		    (epos == NULL_POSITION || epos > hl->hl_startpos))
728			return (1);
729	}
730	return (0);
731}
732
733/*
734 * Is a line "filtered" -- that is, should it be hidden?
735 */
736	public int
737is_filtered(pos)
738	POSITION pos;
739{
740	struct hilite *hl;
741
742	if (ch_getflags() & CH_HELPFILE)
743		return (0);
744
745	/*
746	 * Look at each filter and see if the start position
747	 * equals the start position of the line.
748	 */
749	for (hl = filter_anchor.hl_first;  hl != NULL;  hl = hl->hl_next)
750	{
751		if (hl->hl_startpos == pos)
752			return (1);
753	}
754	return (0);
755}
756
757/*
758 * Should any characters in a specified range be highlighted?
759 * If nohide is nonzero, don't consider hide_hilite.
760 */
761	public int
762is_hilited(pos, epos, nohide, p_matches)
763	POSITION pos;
764	POSITION epos;
765	int nohide;
766	int *p_matches;
767{
768	int match;
769
770	if (p_matches != NULL)
771		*p_matches = 0;
772
773	if (!status_col &&
774	    start_attnpos != NULL_POSITION &&
775	    pos < end_attnpos &&
776	     (epos == NULL_POSITION || epos > start_attnpos))
777		/*
778		 * The attn line overlaps this range.
779		 */
780		return (1);
781
782	match = is_hilited_range(pos, epos);
783	if (!match)
784		return (0);
785
786	if (p_matches != NULL)
787		/*
788		 * Report matches, even if we're hiding highlights.
789		 */
790		*p_matches = 1;
791
792	if (hilite_search == 0)
793		/*
794		 * Not doing highlighting.
795		 */
796		return (0);
797
798	if (!nohide && hide_hilite)
799		/*
800		 * Highlighting is hidden.
801		 */
802		return (0);
803
804	return (1);
805}
806
807/*
808 * Add a new hilite to a hilite list.
809 */
810	static void
811add_hilite(anchor, hl)
812	struct hilite *anchor;
813	struct hilite *hl;
814{
815	struct hilite *ihl;
816
817	/*
818	 * Hilites are sorted in the list; find where new one belongs.
819	 * Insert new one after ihl.
820	 */
821	for (ihl = anchor;  ihl->hl_next != NULL;  ihl = ihl->hl_next)
822	{
823		if (ihl->hl_next->hl_startpos > hl->hl_startpos)
824			break;
825	}
826
827	/*
828	 * Truncate hilite so it doesn't overlap any existing ones
829	 * above and below it.
830	 */
831	if (ihl != anchor)
832		hl->hl_startpos = MAXPOS(hl->hl_startpos, ihl->hl_endpos);
833	if (ihl->hl_next != NULL)
834		hl->hl_endpos = MINPOS(hl->hl_endpos, ihl->hl_next->hl_startpos);
835	if (hl->hl_startpos >= hl->hl_endpos)
836	{
837		/*
838		 * Hilite was truncated out of existence.
839		 */
840		free(hl);
841		return;
842	}
843	hl->hl_next = ihl->hl_next;
844	ihl->hl_next = hl;
845}
846
847/*
848 * Adjust hl_startpos & hl_endpos to account for processing by cvt_text.
849 */
850	static void
851adj_hilite(anchor, linepos, cvt_ops)
852	struct hilite *anchor;
853	POSITION linepos;
854	int cvt_ops;
855{
856	char *line;
857	char *oline;
858	int line_len;
859	char *line_end;
860	struct hilite *hl;
861	int checkstart;
862	POSITION opos;
863	POSITION npos;
864	POSITION hl_opos;
865	POSITION hl_npos;
866	LWCHAR ch;
867	int ncwidth;
868
869	/*
870	 * The line was already scanned and hilites were added (in hilite_line).
871	 * But it was assumed that each char position in the line
872	 * correponds to one char position in the file.
873	 * This may not be true if cvt_text modified the line.
874	 * Get the raw line again.  Look at each character.
875	 */
876	(void) forw_raw_line(linepos, &line, &line_len);
877	line_end = line + line_len;
878	opos = npos = linepos;
879	hl = anchor->hl_first;
880    if (hl == NULL)
881        return;
882    hl_opos = hl_npos = hl->hl_startpos;
883	checkstart = TRUE;
884
885	while (hl != NULL && line < line_end)
886	{
887		/*
888		 * See if we need to adjust the current hl_startpos or
889		 * hl_endpos.  After adjusting startpos[i], move to endpos[i].
890		 * After adjusting endpos[i], move to startpos[i+1].
891		 * The hilite list must be sorted thus:
892		 * startpos[0] < endpos[0] <= startpos[1] < endpos[1] <= etc.
893		 */
894		oline = line;
895		ch = step_char(&line, +1, line_end);
896		ncwidth = line - oline;
897		npos += ncwidth;
898
899		/* Figure out how this char was processed by cvt_text. */
900		if ((cvt_ops & CVT_BS) && ch == '\b')
901		{
902			/* Skip the backspace and the following char. */
903			oline = line;
904			ch = step_char(&line, +1, line_end);
905			ncwidth = line - oline;
906			npos += ncwidth;
907		} else if ((cvt_ops & CVT_TO_LC) && IS_UPPER(ch))
908		{
909			/* Converted uppercase to lower.
910			 * Note that this may have changed the number of bytes
911			 * that the character occupies. */
912			char dbuf[6];
913			char *dst = dbuf;
914			put_wchar(&dst, TO_LOWER(ch));
915			opos += dst - dbuf;
916		} else if ((cvt_ops & CVT_ANSI) && IS_CSI_START(ch))
917		{
918			/* Skip to end of ANSI escape sequence. */
919			line++;  /* skip the CSI start char */
920			npos++;
921			while (line < line_end)
922			{
923				npos++;
924				if (!is_ansi_middle(*line++))
925					break;
926			}
927		} else
928		{
929			/* Ordinary unprocessed character. */
930			opos += ncwidth;
931		}
932
933        if (opos == hl_opos) {
934            /* Adjust highlight position. */
935            hl_npos = npos;
936        }
937        if (opos > hl_opos)
938        {
939            /*
940             * We've moved past the highlight position; store the
941             * adjusted highlight position and move to the next highlight.
942             */
943            if (checkstart)
944            {
945                hl->hl_startpos = hl_npos;
946                hl_opos = hl->hl_endpos;
947                checkstart = FALSE;
948            } else
949            {
950                hl->hl_endpos = hl_npos;
951                hl = hl->hl_next;
952                if (hl != NULL)
953                    hl_opos = hl->hl_startpos;
954                checkstart = TRUE;
955            }
956            hl_npos = npos;
957        }
958	}
959}
960
961/*
962 * Make a hilite for each string in a physical line which matches
963 * the current pattern.
964 * sp,ep delimit the first match already found.
965 */
966	static void
967hilite_line(linepos, line, line_len, sp, ep, cvt_ops)
968	POSITION linepos;
969	char *line;
970	int line_len;
971	char *sp;
972	char *ep;
973	int cvt_ops;
974{
975	char *searchp;
976	char *line_end = line + line_len;
977	struct hilite *hl;
978	struct hilite hilites;
979
980	if (sp == NULL || ep == NULL)
981		return;
982	/*
983	 * sp and ep delimit the first match in the line.
984	 * Mark the corresponding file positions, then
985	 * look for further matches and mark them.
986	 * {{ This technique, of calling match_pattern on subsequent
987	 *    substrings of the line, may mark more than is correct
988	 *    if the pattern starts with "^".  This bug is fixed
989	 *    for those regex functions that accept a notbol parameter
990	 *    (currently POSIX, PCRE and V8-with-regexec2). }}
991	 */
992	searchp = line;
993	/*
994	 * Put the hilites into a temporary list until they're adjusted.
995	 */
996	hilites.hl_first = NULL;
997	do {
998		if (ep > sp)
999		{
1000			/*
1001			 * Assume that each char position in the "line"
1002			 * buffer corresponds to one char position in the file.
1003			 * This is not quite true; we need to adjust later.
1004			 */
1005			hl = (struct hilite *) ecalloc(1, sizeof(struct hilite));
1006			hl->hl_startpos = linepos + (sp-line);
1007			hl->hl_endpos = linepos + (ep-line);
1008			add_hilite(&hilites, hl);
1009		}
1010		/*
1011		 * If we matched more than zero characters,
1012		 * move to the first char after the string we matched.
1013		 * If we matched zero, just move to the next char.
1014		 */
1015		if (ep > searchp)
1016			searchp = ep;
1017		else if (searchp != line_end)
1018			searchp++;
1019		else /* end of line */
1020			break;
1021	} while (match_pattern(search_pattern, searchp, line_end - searchp, &sp, &ep, 1, last_search_type));
1022
1023	/*
1024	 * If there were backspaces in the original line, they
1025	 * were removed, and hl_startpos/hl_endpos are not correct.
1026	 * {{ This is very ugly. }}
1027	 */
1028	adj_hilite(&hilites, linepos, cvt_ops);
1029
1030	/*
1031	 * Now put the hilites into the real list.
1032	 */
1033	while ((hl = hilites.hl_next) != NULL)
1034	{
1035		hilites.hl_next = hl->hl_next;
1036		add_hilite(&hilite_anchor, hl);
1037	}
1038}
1039#endif
1040
1041/*
1042 * Change the caseless-ness of searches.
1043 * Updates the internal search state to reflect a change in the -i flag.
1044 */
1045	public void
1046chg_caseless()
1047{
1048	if (!is_ucase_pattern)
1049		/*
1050		 * Pattern did not have uppercase.
1051		 * Just set the search caselessness to the global caselessness.
1052		 */
1053		is_caseless = caseless;
1054	else
1055		/*
1056		 * Pattern did have uppercase.
1057		 * Discard the pattern; we can't change search caselessness now.
1058		 */
1059		uncompile_search_pattern();
1060}
1061
1062#if HILITE_SEARCH
1063/*
1064 * Find matching text which is currently on screen and highlight it.
1065 */
1066	static void
1067hilite_screen()
1068{
1069	struct scrpos scrpos;
1070
1071	get_scrpos(&scrpos);
1072	if (scrpos.pos == NULL_POSITION)
1073		return;
1074	prep_hilite(scrpos.pos, position(BOTTOM_PLUS_ONE), -1);
1075	repaint_hilite(1);
1076}
1077
1078/*
1079 * Change highlighting parameters.
1080 */
1081	public void
1082chg_hilite()
1083{
1084	/*
1085	 * Erase any highlights currently on screen.
1086	 */
1087	clr_hilite();
1088	hide_hilite = 0;
1089
1090	if (hilite_search == OPT_ONPLUS)
1091		/*
1092		 * Display highlights.
1093		 */
1094		hilite_screen();
1095}
1096#endif
1097
1098/*
1099 * Figure out where to start a search.
1100 */
1101	static POSITION
1102search_pos(search_type)
1103	int search_type;
1104{
1105	POSITION pos;
1106	int linenum;
1107
1108	if (empty_screen())
1109	{
1110		/*
1111		 * Start at the beginning (or end) of the file.
1112		 * The empty_screen() case is mainly for
1113		 * command line initiated searches;
1114		 * for example, "+/xyz" on the command line.
1115		 * Also for multi-file (SRCH_PAST_EOF) searches.
1116		 */
1117		if (search_type & SRCH_FORW)
1118		{
1119			return (ch_zero());
1120		} else
1121		{
1122			pos = ch_length();
1123			if (pos == NULL_POSITION)
1124			{
1125				(void) ch_end_seek();
1126				pos = ch_length();
1127			}
1128			return (pos);
1129		}
1130	}
1131	if (how_search)
1132	{
1133		/*
1134		 * Search does not include current screen.
1135		 */
1136		if (search_type & SRCH_FORW)
1137			linenum = BOTTOM_PLUS_ONE;
1138		else
1139			linenum = TOP;
1140		pos = position(linenum);
1141	} else
1142	{
1143		/*
1144		 * Search includes current screen.
1145		 * It starts at the jump target (if searching backwards),
1146		 * or at the jump target plus one (if forwards).
1147		 */
1148		linenum = adjsline(jump_sline);
1149		pos = position(linenum);
1150		if (search_type & SRCH_FORW)
1151		{
1152			pos = forw_raw_line(pos, (char **)NULL, (int *)NULL);
1153			while (pos == NULL_POSITION)
1154			{
1155				if (++linenum >= sc_height)
1156					break;
1157				pos = position(linenum);
1158			}
1159		} else
1160		{
1161			while (pos == NULL_POSITION)
1162			{
1163				if (--linenum < 0)
1164					break;
1165				pos = position(linenum);
1166			}
1167		}
1168	}
1169	return (pos);
1170}
1171
1172/*
1173 * Search a subset of the file, specified by start/end position.
1174 */
1175	static int
1176search_range(pos, endpos, search_type, matches, maxlines, plinepos, pendpos)
1177	POSITION pos;
1178	POSITION endpos;
1179	int search_type;
1180	int matches;
1181	int maxlines;
1182	POSITION *plinepos;
1183	POSITION *pendpos;
1184{
1185	char *line;
1186	char *cline;
1187	int line_len;
1188	LINENUM linenum;
1189	char *sp, *ep;
1190	int line_match;
1191	int cvt_ops;
1192	POSITION linepos, oldpos;
1193
1194	linenum = find_linenum(pos);
1195	oldpos = pos;
1196	for (;;)
1197	{
1198		/*
1199		 * Get lines until we find a matching one or until
1200		 * we hit end-of-file (or beginning-of-file if we're
1201		 * going backwards), or until we hit the end position.
1202		 */
1203		if (ABORT_SIGS())
1204		{
1205			/*
1206			 * A signal aborts the search.
1207			 */
1208			return (-1);
1209		}
1210
1211		if ((endpos != NULL_POSITION && pos >= endpos) || maxlines == 0)
1212		{
1213			/*
1214			 * Reached end position without a match.
1215			 */
1216			if (pendpos != NULL)
1217				*pendpos = pos;
1218			return (matches);
1219		}
1220		if (maxlines > 0)
1221			maxlines--;
1222
1223		if (search_type & SRCH_FORW)
1224		{
1225			/*
1226			 * Read the next line, and save the
1227			 * starting position of that line in linepos.
1228			 */
1229			linepos = pos;
1230			pos = forw_raw_line(pos, &line, &line_len);
1231			if (linenum != 0)
1232				linenum++;
1233		} else
1234		{
1235			/*
1236			 * Read the previous line and save the
1237			 * starting position of that line in linepos.
1238			 */
1239			pos = back_raw_line(pos, &line, &line_len);
1240			linepos = pos;
1241			if (linenum != 0)
1242				linenum--;
1243		}
1244
1245		if (pos == NULL_POSITION)
1246		{
1247			/*
1248			 * Reached EOF/BOF without a match.
1249			 */
1250			if (pendpos != NULL)
1251				*pendpos = oldpos;
1252			return (matches);
1253		}
1254
1255		/*
1256		 * If we're using line numbers, we might as well
1257		 * remember the information we have now (the position
1258		 * and line number of the current line).
1259		 * Don't do it for every line because it slows down
1260		 * the search.  Remember the line number only if
1261		 * we're "far" from the last place we remembered it.
1262		 */
1263		if (linenums && abs((int)(pos - oldpos)) > 1024)
1264			add_lnum(linenum, pos);
1265		oldpos = pos;
1266
1267		if (is_filtered(linepos))
1268			continue;
1269
1270		/*
1271		 * If it's a caseless search, convert the line to lowercase.
1272		 * If we're doing backspace processing, delete backspaces.
1273		 */
1274		cvt_ops = get_cvt_ops();
1275		cline = calloc(1, cvt_length(line_len, cvt_ops));
1276		cvt_text(cline, line, &line_len, cvt_ops);
1277
1278#if HILITE_SEARCH
1279		/*
1280		 * Check to see if the line matches the filter pattern.
1281		 * If so, add an entry to the filter list.
1282		 */
1283		if ((search_type & SRCH_FIND_ALL) &&
1284			!is_null_pattern(filter_pattern))
1285		{
1286			int line_filter = match_pattern(filter_pattern,
1287				cline, line_len, &sp, &ep, 0, last_filter_type);
1288			if (line_filter)
1289			{
1290				struct hilite *hl = (struct hilite *)
1291					ecalloc(1, sizeof(struct hilite));
1292				hl->hl_startpos = linepos;
1293				hl->hl_endpos = pos;
1294				add_hilite(&filter_anchor, hl);
1295			}
1296		}
1297#endif
1298
1299		/*
1300		 * Test the next line to see if we have a match.
1301		 * We are successful if we either want a match and got one,
1302		 * or if we want a non-match and got one.
1303		 */
1304		if (!is_null_pattern(search_pattern))
1305		{
1306			line_match = match_pattern(search_pattern,
1307				cline, line_len, &sp, &ep, 0, search_type);
1308			if (line_match)
1309			{
1310				/*
1311				 * Got a match.
1312				 */
1313				if (search_type & SRCH_FIND_ALL)
1314				{
1315#if HILITE_SEARCH
1316					/*
1317					 * We are supposed to find all matches in the range.
1318					 * Just add the matches in this line to the
1319					 * hilite list and keep searching.
1320					 */
1321					hilite_line(linepos, cline, line_len, sp, ep, cvt_ops);
1322#endif
1323				} else if (--matches <= 0)
1324				{
1325					/*
1326					 * Found the one match we're looking for.
1327					 * Return it.
1328					 */
1329#if HILITE_SEARCH
1330					if (hilite_search == OPT_ON)
1331					{
1332						/*
1333						 * Clear the hilite list and add only
1334						 * the matches in this one line.
1335						 */
1336						clr_hilite();
1337						hilite_line(linepos, cline, line_len, sp, ep, cvt_ops);
1338					}
1339#endif
1340					free(cline);
1341					if (plinepos != NULL)
1342						*plinepos = linepos;
1343					return (0);
1344				}
1345			}
1346		}
1347		free(cline);
1348	}
1349}
1350
1351/*
1352 * search for a pattern in history. If found, compile that pattern.
1353 */
1354	static int
1355hist_pattern(search_type)
1356	int search_type;
1357{
1358#if CMD_HISTORY
1359	char *pattern;
1360
1361	set_mlist(ml_search, 0);
1362	pattern = cmd_lastpattern();
1363	if (pattern == NULL)
1364		return (0);
1365
1366	if (compile_pattern(pattern, search_type, &search_pattern) < 0)
1367		return (0);
1368
1369	is_ucase_pattern = is_ucase(pattern);
1370	if (is_ucase_pattern && caseless != OPT_ONPLUS)
1371		is_caseless = 0;
1372	else
1373		is_caseless = caseless;
1374
1375#if HILITE_SEARCH
1376	if (hilite_search == OPT_ONPLUS && !hide_hilite)
1377		hilite_screen();
1378#endif
1379
1380	return (1);
1381#else /* CMD_HISTORY */
1382	return (0);
1383#endif /* CMD_HISTORY */
1384}
1385
1386/*
1387 * Search for the n-th occurrence of a specified pattern,
1388 * either forward or backward.
1389 * Return the number of matches not yet found in this file
1390 * (that is, n minus the number of matches found).
1391 * Return -1 if the search should be aborted.
1392 * Caller may continue the search in another file
1393 * if less than n matches are found in this file.
1394 */
1395	public int
1396search(search_type, pattern, n)
1397	int search_type;
1398	char *pattern;
1399	int n;
1400{
1401	POSITION pos;
1402
1403	if (pattern == NULL || *pattern == '\0')
1404	{
1405		/*
1406		 * A null pattern means use the previously compiled pattern.
1407		 */
1408		if (!prev_pattern() && !hist_pattern(search_type))
1409		{
1410			error("No previous regular expression", NULL_PARG);
1411			return (-1);
1412		}
1413		if ((search_type & SRCH_NO_REGEX) !=
1414		    (last_search_type & SRCH_NO_REGEX))
1415		{
1416			error("Please re-enter search pattern", NULL_PARG);
1417			return -1;
1418		}
1419#if HILITE_SEARCH
1420		if (hilite_search == OPT_ON)
1421		{
1422			/*
1423			 * Erase the highlights currently on screen.
1424			 * If the search fails, we'll redisplay them later.
1425			 */
1426			repaint_hilite(0);
1427		}
1428		if (hilite_search == OPT_ONPLUS && hide_hilite)
1429		{
1430			/*
1431			 * Highlight any matches currently on screen,
1432			 * before we actually start the search.
1433			 */
1434			hide_hilite = 0;
1435			hilite_screen();
1436		}
1437		hide_hilite = 0;
1438#endif
1439	} else
1440	{
1441		/*
1442		 * Compile the pattern.
1443		 */
1444		if (compile_pattern(pattern, search_type, &search_pattern) < 0)
1445			return (-1);
1446		/*
1447		 * Ignore case if -I is set OR
1448		 * -i is set AND the pattern is all lowercase.
1449		 */
1450		is_ucase_pattern = is_ucase(pattern);
1451		if (is_ucase_pattern && caseless != OPT_ONPLUS)
1452			is_caseless = 0;
1453		else
1454			is_caseless = caseless;
1455#if HILITE_SEARCH
1456		if (hilite_search)
1457		{
1458			/*
1459			 * Erase the highlights currently on screen.
1460			 * Also permanently delete them from the hilite list.
1461			 */
1462			repaint_hilite(0);
1463			hide_hilite = 0;
1464			clr_hilite();
1465		}
1466		if (hilite_search == OPT_ONPLUS)
1467		{
1468			/*
1469			 * Highlight any matches currently on screen,
1470			 * before we actually start the search.
1471			 */
1472			hilite_screen();
1473		}
1474#endif
1475	}
1476
1477	/*
1478	 * Figure out where to start the search.
1479	 */
1480	pos = search_pos(search_type);
1481	if (pos == NULL_POSITION)
1482	{
1483		/*
1484		 * Can't find anyplace to start searching from.
1485		 */
1486		if (search_type & SRCH_PAST_EOF)
1487			return (n);
1488		/* repaint(); -- why was this here? */
1489		error("Nothing to search", NULL_PARG);
1490		return (-1);
1491	}
1492
1493	n = search_range(pos, NULL_POSITION, search_type, n, -1,
1494			&pos, (POSITION*)NULL);
1495	if (n != 0)
1496	{
1497		/*
1498		 * Search was unsuccessful.
1499		 */
1500#if HILITE_SEARCH
1501		if (hilite_search == OPT_ON && n > 0)
1502			/*
1503			 * Redisplay old hilites.
1504			 */
1505			repaint_hilite(1);
1506#endif
1507		return (n);
1508	}
1509
1510	if (!(search_type & SRCH_NO_MOVE))
1511	{
1512		/*
1513		 * Go to the matching line.
1514		 */
1515		jump_loc(pos, jump_sline);
1516	}
1517
1518#if HILITE_SEARCH
1519	if (hilite_search == OPT_ON)
1520		/*
1521		 * Display new hilites in the matching line.
1522		 */
1523		repaint_hilite(1);
1524#endif
1525	return (0);
1526}
1527
1528
1529#if HILITE_SEARCH
1530/*
1531 * Prepare hilites in a given range of the file.
1532 *
1533 * The pair (prep_startpos,prep_endpos) delimits a contiguous region
1534 * of the file that has been "prepared"; that is, scanned for matches for
1535 * the current search pattern, and hilites have been created for such matches.
1536 * If prep_startpos == NULL_POSITION, the prep region is empty.
1537 * If prep_endpos == NULL_POSITION, the prep region extends to EOF.
1538 * prep_hilite asks that the range (spos,epos) be covered by the prep region.
1539 */
1540	public void
1541prep_hilite(spos, epos, maxlines)
1542	POSITION spos;
1543	POSITION epos;
1544	int maxlines;
1545{
1546	POSITION nprep_startpos = prep_startpos;
1547	POSITION nprep_endpos = prep_endpos;
1548	POSITION new_epos;
1549	POSITION max_epos;
1550	int result;
1551	int i;
1552/*
1553 * Search beyond where we're asked to search, so the prep region covers
1554 * more than we need.  Do one big search instead of a bunch of small ones.
1555 */
1556#define	SEARCH_MORE (3*size_linebuf)
1557
1558	if (!prev_pattern() && !is_filtering())
1559		return;
1560
1561	/*
1562	 * If we're limited to a max number of lines, figure out the
1563	 * file position we should stop at.
1564	 */
1565	if (maxlines < 0)
1566		max_epos = NULL_POSITION;
1567	else
1568	{
1569		max_epos = spos;
1570		for (i = 0;  i < maxlines;  i++)
1571			max_epos = forw_raw_line(max_epos, (char **)NULL, (int *)NULL);
1572	}
1573
1574	/*
1575	 * Find two ranges:
1576	 * The range that we need to search (spos,epos); and the range that
1577	 * the "prep" region will then cover (nprep_startpos,nprep_endpos).
1578	 */
1579
1580	if (prep_startpos == NULL_POSITION ||
1581	    (epos != NULL_POSITION && epos < prep_startpos) ||
1582	    spos > prep_endpos)
1583	{
1584		/*
1585		 * New range is not contiguous with old prep region.
1586		 * Discard the old prep region and start a new one.
1587		 */
1588		clr_hilite();
1589		clr_filter();
1590		if (epos != NULL_POSITION)
1591			epos += SEARCH_MORE;
1592		nprep_startpos = spos;
1593	} else
1594	{
1595		/*
1596		 * New range partially or completely overlaps old prep region.
1597		 */
1598		if (epos == NULL_POSITION)
1599		{
1600			/*
1601			 * New range goes to end of file.
1602			 */
1603			;
1604		} else if (epos > prep_endpos)
1605		{
1606			/*
1607			 * New range ends after old prep region.
1608			 * Extend prep region to end at end of new range.
1609			 */
1610			epos += SEARCH_MORE;
1611		} else /* (epos <= prep_endpos) */
1612		{
1613			/*
1614			 * New range ends within old prep region.
1615			 * Truncate search to end at start of old prep region.
1616			 */
1617			epos = prep_startpos;
1618		}
1619
1620		if (spos < prep_startpos)
1621		{
1622			/*
1623			 * New range starts before old prep region.
1624			 * Extend old prep region backwards to start at
1625			 * start of new range.
1626			 */
1627			if (spos < SEARCH_MORE)
1628				spos = 0;
1629			else
1630				spos -= SEARCH_MORE;
1631			nprep_startpos = spos;
1632		} else /* (spos >= prep_startpos) */
1633		{
1634			/*
1635			 * New range starts within or after old prep region.
1636			 * Trim search to start at end of old prep region.
1637			 */
1638			spos = prep_endpos;
1639		}
1640	}
1641
1642	if (epos != NULL_POSITION && max_epos != NULL_POSITION &&
1643	    epos > max_epos)
1644		/*
1645		 * Don't go past the max position we're allowed.
1646		 */
1647		epos = max_epos;
1648
1649	if (epos == NULL_POSITION || epos > spos)
1650	{
1651		result = search_range(spos, epos, SRCH_FORW|SRCH_FIND_ALL, 0,
1652				maxlines, (POSITION*)NULL, &new_epos);
1653		if (result < 0)
1654			return;
1655		if (prep_endpos == NULL_POSITION || new_epos > prep_endpos)
1656			nprep_endpos = new_epos;
1657	}
1658	prep_startpos = nprep_startpos;
1659	prep_endpos = nprep_endpos;
1660}
1661
1662/*
1663 * Set the pattern to be used for line filtering.
1664 */
1665	public void
1666set_filter_pattern(pattern, search_type)
1667	char *pattern;
1668	int search_type;
1669{
1670	clr_filter();
1671	if (pattern == NULL || *pattern == '\0')
1672		uncompile_filter_pattern();
1673	else
1674		compile_pattern(pattern, search_type, &filter_pattern);
1675	screen_trashed = 1;
1676}
1677
1678/*
1679 * Is there a line filter in effect?
1680 */
1681	public int
1682is_filtering()
1683{
1684	if (ch_getflags() & CH_HELPFILE)
1685		return (0);
1686	return !is_null_pattern(filter_pattern);
1687}
1688#endif
1689
1690/*
1691 * Simple pattern matching function.
1692 * It supports no metacharacters like *, etc.
1693 */
1694	static int
1695match(pattern, pattern_len, buf, buf_len, pfound, pend)
1696	char *pattern;
1697	int pattern_len;
1698	char *buf;
1699	int buf_len;
1700	char **pfound, **pend;
1701{
1702	register char *pp, *lp;
1703	register char *pattern_end = pattern + pattern_len;
1704	register char *buf_end = buf + buf_len;
1705
1706	for ( ;  buf < buf_end;  buf++)
1707	{
1708		for (pp = pattern, lp = buf;  *pp == *lp;  pp++, lp++)
1709			if (pp == pattern_end || lp == buf_end)
1710				break;
1711		if (pp == pattern_end)
1712		{
1713			if (pfound != NULL)
1714				*pfound = buf;
1715			if (pend != NULL)
1716				*pend = lp;
1717			return (1);
1718		}
1719	}
1720	return (0);
1721}
1722
1723#if HAVE_V8_REGCOMP
1724/*
1725 * This function is called by the V8 regcomp to report
1726 * errors in regular expressions.
1727 */
1728	void
1729regerror(s)
1730	char *s;
1731{
1732	PARG parg;
1733
1734	parg.p_string = s;
1735	error("%s", &parg);
1736}
1737#endif
1738
1739