1/*	$NetBSD: pattern.c,v 1.4 2023/10/06 05:49:49 simonb Exp $	*/
2
3/*
4 * Copyright (C) 1984-2023  Mark Nudelman
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Less License, as specified in the README file.
8 *
9 * For more information, see the README file.
10 */
11
12/*
13 * Routines to do pattern matching.
14 */
15
16#include "less.h"
17
18extern int caseless;
19extern int is_caseless;
20extern int utf_mode;
21
22/*
23 * Compile a search pattern, for future use by match_pattern.
24 */
25static int compile_pattern2(char *pattern, int search_type, PATTERN_TYPE *comp_pattern, int show_error)
26{
27	if (search_type & SRCH_NO_REGEX)
28		return (0);
29  {
30#if HAVE_GNU_REGEX
31	struct re_pattern_buffer *comp = (struct re_pattern_buffer *)
32		ecalloc(1, sizeof(struct re_pattern_buffer));
33	re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
34	if (re_compile_pattern(pattern, strlen(pattern), comp))
35	{
36		free(comp);
37		if (show_error)
38			error("Invalid pattern", NULL_PARG);
39		return (-1);
40	}
41	if (*comp_pattern != NULL)
42	{
43		regfree(*comp_pattern);
44		free(*comp_pattern);
45	}
46	*comp_pattern = comp;
47#endif
48#if HAVE_POSIX_REGCOMP
49	regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t));
50	if (regcomp(comp, pattern, REGCOMP_FLAG | (is_caseless ? REG_ICASE : 0)))
51	{
52		free(comp);
53		if (show_error)
54			error("Invalid pattern", NULL_PARG);
55		return (-1);
56	}
57	if (*comp_pattern != NULL)
58	{
59		regfree(*comp_pattern);
60		free(*comp_pattern);
61	}
62	*comp_pattern = comp;
63#endif
64#if HAVE_PCRE
65	constant char *errstring;
66	int erroffset;
67	PARG parg;
68	pcre *comp = pcre_compile(pattern,
69			((utf_mode) ? PCRE_UTF8 | PCRE_NO_UTF8_CHECK : 0) |
70			(is_caseless ? PCRE_CASELESS : 0),
71			&errstring, &erroffset, NULL);
72	if (comp == NULL)
73	{
74		parg.p_string = (char *) errstring;
75		if (show_error)
76			error("%s", &parg);
77		return (-1);
78	}
79	*comp_pattern = comp;
80#endif
81#if HAVE_PCRE2
82	int errcode;
83	PCRE2_SIZE erroffset;
84	PARG parg;
85	pcre2_code *comp = pcre2_compile((PCRE2_SPTR)pattern, strlen(pattern),
86			(is_caseless ? PCRE2_CASELESS : 0),
87			&errcode, &erroffset, NULL);
88	if (comp == NULL)
89	{
90		if (show_error)
91		{
92			char msg[160];
93			pcre2_get_error_message(errcode, (PCRE2_UCHAR*)msg, sizeof(msg));
94			parg.p_string = msg;
95			error("%s", &parg);
96		}
97		return (-1);
98	}
99	*comp_pattern = comp;
100#endif
101#if HAVE_RE_COMP
102	PARG parg;
103	if ((parg.p_string = re_comp(pattern)) != NULL)
104	{
105		if (show_error)
106			error("%s", &parg);
107		return (-1);
108	}
109	*comp_pattern = 1;
110#endif
111#if HAVE_REGCMP
112	char *comp;
113	if ((comp = regcmp(pattern, 0)) == NULL)
114	{
115		if (show_error)
116			error("Invalid pattern", NULL_PARG);
117		return (-1);
118	}
119	if (comp_pattern != NULL)
120		free(*comp_pattern);
121	*comp_pattern = comp;
122#endif
123#if HAVE_V8_REGCOMP
124	struct regexp *comp;
125	reg_show_error = show_error;
126	comp = regcomp(pattern);
127	reg_show_error = 1;
128	if (comp == NULL)
129	{
130		/*
131		 * regcomp has already printed an error message
132		 * via regerror().
133		 */
134		return (-1);
135	}
136	if (*comp_pattern != NULL)
137		free(*comp_pattern);
138	*comp_pattern = comp;
139#endif
140  }
141	return (0);
142}
143
144/*
145 * Like compile_pattern2, but convert the pattern to lowercase if necessary.
146 */
147public int compile_pattern(char *pattern, int search_type, int show_error, PATTERN_TYPE *comp_pattern)
148{
149	char *cvt_pattern;
150	int result;
151
152	if (caseless != OPT_ONPLUS || (re_handles_caseless && !(search_type & SRCH_NO_REGEX)))
153		cvt_pattern = pattern;
154	else
155	{
156		cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
157		cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC);
158	}
159	result = compile_pattern2(cvt_pattern, search_type, comp_pattern, show_error);
160	if (cvt_pattern != pattern)
161		free(cvt_pattern);
162	return (result);
163}
164
165/*
166 * Forget that we have a compiled pattern.
167 */
168public void uncompile_pattern(PATTERN_TYPE *pattern)
169{
170#if HAVE_GNU_REGEX
171	if (*pattern != NULL)
172	{
173		regfree(*pattern);
174		free(*pattern);
175	}
176	*pattern = NULL;
177#endif
178#if HAVE_POSIX_REGCOMP
179	if (*pattern != NULL)
180	{
181		regfree(*pattern);
182		free(*pattern);
183	}
184	*pattern = NULL;
185#endif
186#if HAVE_PCRE
187	if (*pattern != NULL)
188		pcre_free(*pattern);
189	*pattern = NULL;
190#endif
191#if HAVE_PCRE2
192	if (*pattern != NULL)
193		pcre2_code_free(*pattern);
194	*pattern = NULL;
195#endif
196#if HAVE_RE_COMP
197	*pattern = 0;
198#endif
199#if HAVE_REGCMP
200	if (*pattern != NULL)
201		free(*pattern);
202	*pattern = NULL;
203#endif
204#if HAVE_V8_REGCOMP
205	if (*pattern != NULL)
206		free(*pattern);
207	*pattern = NULL;
208#endif
209}
210
211#if 0
212/*
213 * Can a pattern be successfully compiled?
214 */
215public int valid_pattern(char *pattern)
216{
217	PATTERN_TYPE comp_pattern;
218	int result;
219
220	SET_NULL_PATTERN(comp_pattern);
221	result = compile_pattern2(pattern, 0, &comp_pattern, 0);
222	if (result != 0)
223		return (0);
224	uncompile_pattern(&comp_pattern);
225	return (1);
226}
227#endif
228
229/*
230 * Is a compiled pattern null?
231 */
232public int is_null_pattern(PATTERN_TYPE pattern)
233{
234#if HAVE_GNU_REGEX
235	return (pattern == NULL);
236#endif
237#if HAVE_POSIX_REGCOMP
238	return (pattern == NULL);
239#endif
240#if HAVE_PCRE
241	return (pattern == NULL);
242#endif
243#if HAVE_PCRE2
244	return (pattern == NULL);
245#endif
246#if HAVE_RE_COMP
247	return (pattern == 0);
248#endif
249#if HAVE_REGCMP
250	return (pattern == NULL);
251#endif
252#if HAVE_V8_REGCOMP
253	return (pattern == NULL);
254#endif
255#if NO_REGEX
256	return (pattern == NULL);
257#endif
258}
259/*
260 * Simple pattern matching function.
261 * It supports no metacharacters like *, etc.
262 */
263static int match(char *pattern, int pattern_len, char *buf, int buf_len, char ***sp, char ***ep, int nsubs)
264{
265	char *pp, *lp;
266	char *pattern_end = pattern + pattern_len;
267	char *buf_end = buf + buf_len;
268
269	for ( ;  buf < buf_end;  buf++)
270	{
271		for (pp = pattern, lp = buf;  ;  pp++, lp++)
272		{
273			char cp = *pp;
274			char cl = *lp;
275			if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp))
276				cp = ASCII_TO_LOWER(cp);
277			if (cp != cl)
278				break;
279			if (pp == pattern_end || lp == buf_end)
280				break;
281		}
282		if (pp == pattern_end)
283		{
284			*(*sp)++ = buf;
285			*(*ep)++ = lp;
286			return (1);
287		}
288	}
289	**sp = **ep = NULL;
290	return (0);
291}
292
293/*
294 * Perform a pattern match with the previously compiled pattern.
295 * Set sp[0] and ep[0] to the start and end of the matched string.
296 * Set sp[i] and ep[i] to the start and end of the i-th matched subpattern.
297 * Subpatterns are defined by parentheses in the regex language.
298 */
299static int match_pattern1(PATTERN_TYPE pattern, char *tpattern, char *line, int line_len, char **sp, char **ep, int nsp, int notbol, int search_type)
300{
301	int matched;
302
303#if NO_REGEX
304	search_type |= SRCH_NO_REGEX;
305#endif
306	if (search_type & SRCH_NO_REGEX)
307		matched = match(tpattern, strlen(tpattern), line, line_len, &sp, &ep, nsp);
308	else
309	{
310#if HAVE_GNU_REGEX
311	{
312		struct re_registers search_regs;
313		pattern->not_bol = notbol;
314		pattern->regs_allocated = REGS_UNALLOCATED;
315		matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0;
316		if (matched)
317		{
318			*sp++ = line + search_regs.start[0];
319			*ep++ = line + search_regs.end[0];
320		}
321	}
322#endif
323#if HAVE_POSIX_REGCOMP
324	{
325		#define RM_COUNT (NUM_SEARCH_COLORS+2)
326		regmatch_t rm[RM_COUNT];
327		int flags = (notbol) ? REG_NOTBOL : 0;
328#ifdef REG_STARTEND
329		flags |= REG_STARTEND;
330		rm[0].rm_so = 0;
331		rm[0].rm_eo = line_len;
332#endif
333		matched = !regexec(pattern, line, RM_COUNT, rm, flags);
334		if (matched)
335		{
336			int i;
337			int ecount;
338			for (ecount = RM_COUNT;  ecount > 0;  ecount--)
339				if (rm[ecount-1].rm_so >= 0)
340					break;
341			if (ecount >= nsp)
342				ecount = nsp-1;
343			for (i = 0;  i < ecount;  i++)
344			{
345				if (rm[i].rm_so < 0)
346				{
347					*sp++ = *ep++ = line;
348				} else
349				{
350#ifndef __WATCOMC__
351					*sp++ = line + rm[i].rm_so;
352					*ep++ = line + rm[i].rm_eo;
353#else
354					*sp++ = rm[i].rm_sp;
355					*ep++ = rm[i].rm_ep;
356#endif
357				}
358			}
359		}
360	}
361#endif
362#if HAVE_PCRE
363	{
364		#define OVECTOR_COUNT ((3*NUM_SEARCH_COLORS)+3)
365		int ovector[OVECTOR_COUNT];
366		int flags = (notbol) ? PCRE_NOTBOL : 0;
367		int i;
368		int ecount;
369		int mcount = pcre_exec(pattern, NULL, line, line_len,
370			0, flags, ovector, OVECTOR_COUNT);
371		matched = (mcount > 0);
372		ecount = nsp-1;
373		if (ecount > mcount) ecount = mcount;
374		for (i = 0;  i < ecount*2; )
375		{
376			if (ovector[i] < 0 || ovector[i+1] < 0)
377			{
378				*sp++ = *ep++ = line;
379				i += 2;
380			} else
381			{
382				*sp++ = line + ovector[i++];
383				*ep++ = line + ovector[i++];
384			}
385		}
386	}
387#endif
388#if HAVE_PCRE2
389	{
390		int flags = (notbol) ? PCRE2_NOTBOL : 0;
391		pcre2_match_data *md = pcre2_match_data_create(nsp-1, NULL);
392		int mcount = pcre2_match(pattern, (PCRE2_SPTR)line, line_len,
393			0, flags, md, NULL);
394		matched = (mcount > 0);
395		if (matched)
396		{
397			PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
398			int i;
399			int ecount = nsp-1;
400			if (ecount > mcount) ecount = mcount;
401			for (i = 0;  i < ecount*2; )
402			{
403				if (ovector[i] < 0 || ovector[i+1] < 0)
404				{
405					*sp++ = *ep++ = line;
406					i += 2;
407				} else
408				{
409					*sp++ = line + ovector[i++];
410					*ep++ = line + ovector[i++];
411				}
412			}
413		}
414		pcre2_match_data_free(md);
415	}
416#endif
417#if HAVE_RE_COMP
418	matched = (re_exec(line) == 1);
419	/*
420	 * re_exec doesn't seem to provide a way to get the matched string.
421	 */
422#endif
423#if HAVE_REGCMP
424	matched = ((*ep++ = regex(pattern, line)) != NULL);
425	if (matched)
426		*sp++ = __loc1;
427#endif
428#if HAVE_V8_REGCOMP
429#if HAVE_REGEXEC2
430	matched = regexec2(pattern, line, notbol);
431#else
432	matched = regexec(pattern, line);
433#endif
434	if (matched)
435	{
436		*sp++ = pattern->startp[0];
437		*ep++ = pattern->endp[0];
438	}
439#endif
440	}
441	*sp = *ep = NULL;
442	matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
443			((search_type & SRCH_NO_MATCH) && !matched);
444	return (matched);
445}
446
447public int match_pattern(PATTERN_TYPE pattern, char *tpattern, char *line, int line_len, char **sp, char **ep, int nsp, int notbol, int search_type)
448{
449	int matched = match_pattern1(pattern, tpattern, line, line_len, sp, ep, nsp, notbol, search_type);
450	int i;
451	for (i = 1;  i <= NUM_SEARCH_COLORS;  i++)
452	{
453		if ((search_type & SRCH_SUBSEARCH(i)) && ep[i] == sp[i])
454			matched = 0;
455	}
456	return matched;
457}
458
459/*
460 * Return the name of the pattern matching library.
461 */
462public char * pattern_lib_name(void)
463{
464#if HAVE_GNU_REGEX
465	return ("GNU");
466#else
467#if HAVE_POSIX_REGCOMP
468	return ("POSIX");
469#else
470#if HAVE_PCRE2
471	return ("PCRE2");
472#else
473#if HAVE_PCRE
474	return ("PCRE");
475#else
476#if HAVE_RE_COMP
477	return ("BSD");
478#else
479#if HAVE_REGCMP
480	return ("V8");
481#else
482#if HAVE_V8_REGCOMP
483	return ("Spencer V8");
484#else
485	return ("no");
486#endif
487#endif
488#endif
489#endif
490#endif
491#endif
492#endif
493}
494