1/*	$OpenBSD: re_search.c,v 1.37 2023/03/08 04:43:11 guenther Exp $	*/
2
3/* This file is in the public domain. */
4
5/*
6 *	regular expression search commands for Mg
7 *
8 * This file contains functions to implement several of gnuemacs's regular
9 * expression functions for Mg.  Several of the routines below are just minor
10 * re-arrangements of Mg's non-regular expression search functions.  Some of
11 * them are similar in structure to the original MicroEMACS, others are
12 * modifications of Rich Ellison's code.  Peter Newton re-wrote about half of
13 * them from scratch.
14 */
15
16#ifdef REGEX
17#include <sys/queue.h>
18#include <sys/types.h>
19#include <regex.h>
20#include <signal.h>
21#include <stdio.h>
22#include <string.h>
23
24#include "def.h"
25#include "macro.h"
26
27#define SRCH_BEGIN	(0)		/* search sub-codes		    */
28#define SRCH_FORW	(-1)
29#define SRCH_BACK	(-2)
30#define SRCH_NOPR	(-3)
31#define SRCH_ACCM	(-4)
32#define SRCH_MARK	(-5)
33
34#define RE_NMATCH	10		/* max number of matches	    */
35#define REPLEN		256		/* max length of replacement string */
36
37char	re_pat[NPAT];			/* regex pattern		    */
38int	re_srch_lastdir = SRCH_NOPR;	/* last search flags		    */
39int	casefoldsearch = TRUE;		/* does search ignore case?	    */
40
41static int	 re_doreplace(RSIZE, char *);
42static int	 re_forwsrch(void);
43static int	 re_backsrch(void);
44static int	 re_readpattern(char *);
45static int	 killmatches(int);
46static int	 countmatches(int);
47
48/*
49 * Search forward.
50 * Get a search string from the user and search for it starting at ".".  If
51 * found, move "." to just after the matched characters.  display does all
52 * the hard stuff.  If not found, it just prints a message.
53 */
54int
55re_forwsearch(int f, int n)
56{
57	int	s;
58
59	if ((s = re_readpattern("RE Search")) != TRUE)
60		return (s);
61	if (re_forwsrch() == FALSE) {
62		dobeep();
63		ewprintf("Search failed: \"%s\"", re_pat);
64		return (FALSE);
65	}
66	re_srch_lastdir = SRCH_FORW;
67	return (TRUE);
68}
69
70/*
71 * Reverse search.
72 * Get a search string from the user, and search, starting at "."
73 * and proceeding toward the front of the buffer. If found "." is left
74 * pointing at the first character of the pattern [the last character that
75 * was matched].
76 */
77int
78re_backsearch(int f, int n)
79{
80	int	s;
81
82	if ((s = re_readpattern("RE Search backward")) != TRUE)
83		return (s);
84	if (re_backsrch() == FALSE) {
85		dobeep();
86		ewprintf("Search failed: \"%s\"", re_pat);
87		return (FALSE);
88	}
89	re_srch_lastdir = SRCH_BACK;
90	return (TRUE);
91}
92
93/*
94 * Search again, using the same search string and direction as the last search
95 * command.  The direction has been saved in "srch_lastdir", so you know which
96 * way to go.
97 *
98 * XXX: This code has problems -- some incompatibility(?) with extend.c causes
99 * match to fail when it should not.
100 */
101int
102re_searchagain(int f, int n)
103{
104	if (re_srch_lastdir == SRCH_NOPR) {
105		dobeep();
106		ewprintf("No last search");
107		return (FALSE);
108	}
109	if (re_srch_lastdir == SRCH_FORW) {
110		if (re_forwsrch() == FALSE) {
111			dobeep();
112			ewprintf("Search failed: \"%s\"", re_pat);
113			return (FALSE);
114		}
115		return (TRUE);
116	}
117	if (re_srch_lastdir == SRCH_BACK)
118		if (re_backsrch() == FALSE) {
119			dobeep();
120			ewprintf("Search failed: \"%s\"", re_pat);
121			return (FALSE);
122		}
123
124	return (TRUE);
125}
126
127/* Compiled regex goes here-- changed only when new pattern read */
128static regex_t		regex_buff;
129static regmatch_t	regex_match[RE_NMATCH];
130
131/*
132 * Re-Query Replace.
133 *	Replace strings selectively.  Does a search and replace operation.
134 */
135int
136re_queryrepl(int f, int n)
137{
138	int	rcnt = 0;		/* replacements made so far	*/
139	int	plen, s;		/* length of found string	*/
140	char	news[NPAT];		/* replacement string		*/
141
142	if ((s = re_readpattern("RE Query replace")) != TRUE)
143		return (s);
144	if (eread("Query replace %s with: ", news, NPAT,
145	    EFNUL | EFNEW | EFCR, re_pat) == NULL)
146		return (ABORT);
147	ewprintf("Query replacing %s with %s:", re_pat, news);
148
149	/*
150	 * Search forward repeatedly, checking each time whether to insert
151	 * or not.  The "!" case makes the check always true, so it gets put
152	 * into a tighter loop for efficiency.
153	 */
154	while (re_forwsrch() == TRUE) {
155retry:
156		update(CMODE);
157		switch (getkey(FALSE)) {
158		case ' ':
159			plen = regex_match[0].rm_eo - regex_match[0].rm_so;
160			if (re_doreplace((RSIZE)plen, news) == FALSE)
161				return (FALSE);
162			rcnt++;
163			break;
164
165		case '.':
166			plen = regex_match[0].rm_eo - regex_match[0].rm_so;
167			if (re_doreplace((RSIZE)plen, news) == FALSE)
168				return (FALSE);
169			rcnt++;
170			goto stopsearch;
171
172		case CCHR('G'):				/* ^G */
173			(void)ctrlg(FFRAND, 0);
174			goto stopsearch;
175		case CCHR('['):				/* ESC */
176		case '`':
177			goto stopsearch;
178		case '!':
179			do {
180				plen = regex_match[0].rm_eo - regex_match[0].rm_so;
181				if (re_doreplace((RSIZE)plen, news) == FALSE)
182					return (FALSE);
183				rcnt++;
184			} while (re_forwsrch() == TRUE);
185			goto stopsearch;
186
187		case CCHR('?'):				/* To not replace */
188			break;
189
190		default:
191			ewprintf("<SP> replace, [.] rep-end, <DEL> don't, [!] repl rest <ESC> quit");
192			goto retry;
193		}
194	}
195
196stopsearch:
197	curwp->w_rflag |= WFFULL;
198	update(CMODE);
199	if (!inmacro) {
200		if (rcnt == 0)
201			ewprintf("(No replacements done)");
202		else if (rcnt == 1)
203			ewprintf("(1 replacement done)");
204		else
205			ewprintf("(%d replacements done)", rcnt);
206	}
207	return (TRUE);
208}
209
210int
211re_repl(int f, int n)
212{
213	int     rcnt = 0;		/* replacements made so far     */
214	int     plen, s;		/* length of found string       */
215	char    news[NPAT];		/* replacement string           */
216
217	if ((s = re_readpattern("RE Replace")) != TRUE)
218		return (s);
219	if (eread("Replace %s with: ", news, NPAT,
220	    EFNUL | EFNEW | EFCR, re_pat) == NULL)
221                return (ABORT);
222
223	while (re_forwsrch() == TRUE) {
224		plen = regex_match[0].rm_eo - regex_match[0].rm_so;
225		if (re_doreplace((RSIZE)plen, news) == FALSE)
226			return (FALSE);
227		rcnt++;
228	}
229
230	curwp->w_rflag |= WFFULL;
231	update(CMODE);
232	if (!inmacro)
233		ewprintf("(%d replacement(s) done)", rcnt);
234
235	return(TRUE);
236}
237
238/*
239 * Routine re_doreplace calls lreplace to make replacements needed by
240 * re_query replace.  Its reason for existence is to deal with \1, \2. etc.
241 *  plen: length to remove
242 *  st:   replacement string
243 */
244static int
245re_doreplace(RSIZE plen, char *st)
246{
247	int	 j, k, s, more, num, state;
248	struct line	*clp;
249	char	 repstr[REPLEN];
250
251	clp = curwp->w_dotp;
252	more = TRUE;
253	j = 0;
254	state = 0;
255	num = 0;
256
257	/* The following FSA parses the replacement string */
258	while (more) {
259		switch (state) {
260		case 0:
261			if (*st == '\\') {
262				st++;
263				state = 1;
264			} else if (*st == '\0')
265				more = FALSE;
266			else {
267				repstr[j] = *st;
268				j++;
269				if (j >= REPLEN)
270					return (FALSE);
271				st++;
272			}
273			break;
274		case 1:
275			if (*st >= '0' && *st <= '9') {
276				num = *st - '0';
277				st++;
278				state = 2;
279			} else if (*st == '\0')
280				more = FALSE;
281			else {
282				repstr[j] = *st;
283				j++;
284				if (j >= REPLEN)
285					return (FALSE);
286				st++;
287				state = 0;
288			}
289			break;
290		case 2:
291			if (*st >= '0' && *st <= '9') {
292				num = 10 * num + *st - '0';
293				st++;
294			} else {
295				if (num >= RE_NMATCH)
296					return (FALSE);
297				k = regex_match[num].rm_eo - regex_match[num].rm_so;
298				if (j + k >= REPLEN)
299					return (FALSE);
300				bcopy(&(clp->l_text[regex_match[num].rm_so]),
301				    &repstr[j], k);
302				j += k;
303				if (*st == '\0')
304					more = FALSE;
305				if (*st == '\\') {
306					st++;
307					state = 1;
308				} else {
309					repstr[j] = *st;
310					j++;
311					if (j >= REPLEN)
312						return (FALSE);
313					st++;
314					state = 0;
315				}
316			}
317			break;
318		}		/* switch (state) */
319	}			/* while (more)   */
320
321	repstr[j] = '\0';
322	s = lreplace(plen, repstr);
323	return (s);
324}
325
326/*
327 * This routine does the real work of a forward search.  The pattern is
328 * sitting in the external variable "pat".  If found, dot is updated, the
329 * window system is notified of the change, and TRUE is returned.  If the
330 * string isn't found, FALSE is returned.
331 */
332static int
333re_forwsrch(void)
334{
335	int	 	 re_flags, tbo, tdotline, error;
336	struct line	*clp;
337
338	clp = curwp->w_dotp;
339	tbo = curwp->w_doto;
340	tdotline = curwp->w_dotline;
341
342	if (tbo == clp->l_used)
343		/*
344		 * Don't start matching past end of line -- must move to
345		 * beginning of next line, unless line is empty or at
346		 * end of file.
347		 */
348		if (clp != curbp->b_headp && llength(clp) != 0) {
349			clp = lforw(clp);
350			tdotline++;
351			tbo = 0;
352		}
353	/*
354	 * Note this loop does not process the last line, but this editor
355	 * always makes the last line empty so this is good.
356	 */
357	while (clp != (curbp->b_headp)) {
358		re_flags = REG_STARTEND;
359		if (tbo != 0)
360			re_flags |= REG_NOTBOL;
361		regex_match[0].rm_so = tbo;
362		regex_match[0].rm_eo = llength(clp);
363		error = regexec(&regex_buff, ltext(clp) ? ltext(clp) : "",
364		    RE_NMATCH, regex_match, re_flags);
365		if (error != 0) {
366			clp = lforw(clp);
367			tdotline++;
368			tbo = 0;
369		} else {
370			curwp->w_doto = regex_match[0].rm_eo;
371			curwp->w_dotp = clp;
372			curwp->w_dotline = tdotline;
373			curwp->w_rflag |= WFMOVE;
374			return (TRUE);
375		}
376	}
377	return (FALSE);
378}
379
380/*
381 * This routine does the real work of a backward search.  The pattern is sitting
382 * in the external variable "re_pat".  If found, dot is updated, the window
383 * system is notified of the change, and TRUE is returned.  If the string isn't
384 * found, FALSE is returned.
385 */
386static int
387re_backsrch(void)
388{
389	struct line		*clp;
390	int		 tbo, tdotline;
391	regmatch_t	 lastmatch;
392
393	clp = curwp->w_dotp;
394	tbo = curwp->w_doto;
395	tdotline = curwp->w_dotline;
396
397	/* Start search one position to the left of dot */
398	tbo = tbo - 1;
399	if (tbo < 0) {
400		/* must move up one line */
401		clp = lback(clp);
402		tdotline--;
403		tbo = llength(clp);
404	}
405
406	/*
407	 * Note this loop does not process the last line, but this editor
408	 * always makes the last line empty so this is good.
409	 */
410	while (clp != (curbp->b_headp)) {
411		regex_match[0].rm_so = 0;
412		regex_match[0].rm_eo = llength(clp);
413		lastmatch.rm_so = -1;
414		/*
415		 * Keep searching until we don't match any longer.  Assumes a
416		 * non-match does not modify the regex_match array.  We have to
417		 * do this character-by-character after the first match since
418		 * POSIX regexps don't give you a way to do reverse matches.
419		 */
420		while (!regexec(&regex_buff, ltext(clp) ? ltext(clp) : "",
421		    RE_NMATCH, regex_match, REG_STARTEND) &&
422		    regex_match[0].rm_so <= tbo) {
423			memcpy(&lastmatch, &regex_match[0], sizeof(regmatch_t));
424			regex_match[0].rm_so++;
425			regex_match[0].rm_eo = llength(clp);
426		}
427		if (lastmatch.rm_so == -1) {
428			clp = lback(clp);
429			tdotline--;
430			tbo = llength(clp);
431		} else {
432			memcpy(&regex_match[0], &lastmatch, sizeof(regmatch_t));
433			curwp->w_doto = regex_match[0].rm_so;
434			curwp->w_dotp = clp;
435			curwp->w_dotline = tdotline;
436			curwp->w_rflag |= WFMOVE;
437			return (TRUE);
438		}
439	}
440	return (FALSE);
441}
442
443/*
444 * Read a pattern.
445 * Stash it in the external variable "re_pat". The "pat" is
446 * not updated if the user types in an empty line. If the user typed
447 * an empty line, and there is no old pattern, it is an error.
448 * Display the old pattern, in the style of Jeff Lomicka. There is
449 * some do-it-yourself control expansion.
450 */
451static int
452re_readpattern(char *re_prompt)
453{
454	static int	dofree = 0;
455	int		flags, error, s;
456	char		tpat[NPAT], *rep;
457
458	if (re_pat[0] == '\0')
459		rep = eread("%s: ", tpat, NPAT, EFNEW | EFCR, re_prompt);
460	else
461		rep = eread("%s (default %s): ", tpat, NPAT,
462		    EFNUL | EFNEW | EFCR, re_prompt, re_pat);
463	if (rep == NULL)
464		return (ABORT);
465	if (rep[0] != '\0') {
466		/* New pattern given */
467		(void)strlcpy(re_pat, tpat, sizeof(re_pat));
468		if (casefoldsearch)
469			flags = REG_EXTENDED | REG_ICASE;
470		else
471			flags = REG_EXTENDED;
472		if (dofree)
473			regfree(&regex_buff);
474		error = regcomp(&regex_buff, re_pat, flags);
475		if (error != 0) {
476			char	message[256];
477			regerror(error, &regex_buff, message, sizeof(message));
478			dobeep();
479			ewprintf("Regex Error: %s", message);
480			re_pat[0] = '\0';
481			return (FALSE);
482		}
483		dofree = 1;
484		s = TRUE;
485	} else if (rep[0] == '\0' && re_pat[0] != '\0')
486		/* Just using old pattern */
487		s = TRUE;
488	else
489		s = FALSE;
490	return (s);
491}
492
493/*
494 * Cause case to not matter in searches.  This is the default.	If called
495 * with argument cause case to matter.
496 */
497int
498setcasefold(int f, int n)
499{
500	if (f & FFARG) {
501		casefoldsearch = FALSE;
502		ewprintf("Case-fold-search unset");
503	} else {
504		casefoldsearch = TRUE;
505		ewprintf("Case-fold-search set");
506	}
507
508	/*
509	 * Invalidate the regular expression pattern since I'm too lazy to
510	 * recompile it.
511	 */
512	re_pat[0] = '\0';
513	return (TRUE);
514}
515
516/*
517 * Delete all lines after dot that contain a string matching regex.
518 */
519int
520delmatchlines(int f, int n)
521{
522	int	s;
523
524	if ((s = re_readpattern("Flush lines (containing match for regexp)"))
525	    != TRUE)
526		return (s);
527
528	s = killmatches(TRUE);
529	return (s);
530}
531
532/*
533 * Delete all lines after dot that don't contain a string matching regex.
534 */
535int
536delnonmatchlines(int f, int n)
537{
538	int	s;
539
540	if ((s = re_readpattern("Keep lines (containing match for regexp)"))
541	    != TRUE)
542		return (s);
543
544	s = killmatches(FALSE);
545	return (s);
546}
547
548/*
549 * This function does the work of deleting matching lines.
550 */
551static int
552killmatches(int cond)
553{
554	int	 s, error;
555	int	 count = 0;
556	struct line	*clp;
557
558	clp = curwp->w_dotp;
559	if (curwp->w_doto == llength(clp))
560		/* Consider dot on next line */
561		clp = lforw(clp);
562
563	while (clp != (curbp->b_headp)) {
564		/* see if line matches */
565		regex_match[0].rm_so = 0;
566		regex_match[0].rm_eo = llength(clp);
567		error = regexec(&regex_buff, ltext(clp) ? ltext(clp) : "",
568		    RE_NMATCH, regex_match, REG_STARTEND);
569
570		/* Delete line when appropriate */
571		if ((cond == FALSE && error) || (cond == TRUE && !error)) {
572			curwp->w_doto = 0;
573			curwp->w_dotp = clp;
574			count++;
575			s = ldelete(llength(clp) + 1, KNONE);
576			clp = curwp->w_dotp;
577			curwp->w_rflag |= WFMOVE;
578			if (s == FALSE)
579				return (FALSE);
580		} else
581			clp = lforw(clp);
582	}
583
584	ewprintf("%d line(s) deleted", count);
585	if (count > 0)
586		curwp->w_rflag |= WFMOVE;
587
588	return (TRUE);
589}
590
591/*
592 * Count lines matching regex.
593 */
594int
595cntmatchlines(int f, int n)
596{
597	int	s;
598
599	if ((s = re_readpattern("Count lines (matching regexp)")) != TRUE)
600		return (s);
601	s = countmatches(TRUE);
602
603	return (s);
604}
605
606/*
607 * Count lines that fail to match regex.
608 */
609int
610cntnonmatchlines(int f, int n)
611{
612	int	s;
613
614	if ((s = re_readpattern("Count lines (not matching regexp)")) != TRUE)
615		return (s);
616	s = countmatches(FALSE);
617
618	return (s);
619}
620
621/*
622 * This function does the work of counting matching lines.
623 */
624int
625countmatches(int cond)
626{
627	int	 error;
628	int	 count = 0;
629	struct line	*clp;
630
631	clp = curwp->w_dotp;
632	if (curwp->w_doto == llength(clp))
633		/* Consider dot on next line */
634		clp = lforw(clp);
635
636	while (clp != (curbp->b_headp)) {
637		/* see if line matches */
638		regex_match[0].rm_so = 0;
639		regex_match[0].rm_eo = llength(clp);
640		error = regexec(&regex_buff, ltext(clp) ? ltext(clp) : "",
641		    RE_NMATCH, regex_match, REG_STARTEND);
642
643		/* Count line when appropriate */
644		if ((cond == FALSE && error) || (cond == TRUE && !error))
645			count++;
646		clp = lforw(clp);
647	}
648
649	if (cond)
650		ewprintf("Number of lines matching: %d", count);
651	else
652		ewprintf("Number of lines not matching: %d", count);
653
654	return (TRUE);
655}
656#endif	/* REGEX */
657