1/*
2 * Copyright (C) 1984-2021  Mark Nudelman
3 *
4 * You may distribute under the terms of either the GNU General Public
5 * License or the Less License, as specified in the README file.
6 *
7 * For more information, see the README file.
8 */
9
10/*
11 * Routines to do pattern matching.
12 */
13
14#include "less.h"
15
16extern int caseless;
17extern int utf_mode;
18
19/*
20 * Compile a search pattern, for future use by match_pattern.
21 */
22	static int
23compile_pattern2(pattern, search_type, comp_pattern, show_error)
24	char *pattern;
25	int search_type;
26	PATTERN_TYPE *comp_pattern;
27	int show_error;
28{
29	if (search_type & SRCH_NO_REGEX)
30		return (0);
31  {
32#if HAVE_GNU_REGEX
33	struct re_pattern_buffer *comp = (struct re_pattern_buffer *)
34		ecalloc(1, sizeof(struct re_pattern_buffer));
35	re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
36	if (re_compile_pattern(pattern, strlen(pattern), comp))
37	{
38		free(comp);
39		if (show_error)
40			error("Invalid pattern", NULL_PARG);
41		return (-1);
42	}
43	if (*comp_pattern != NULL)
44	{
45		regfree(*comp_pattern);
46		free(*comp_pattern);
47	}
48	*comp_pattern = comp;
49#endif
50#if HAVE_POSIX_REGCOMP
51	regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t));
52	if (regcomp(comp, pattern, REGCOMP_FLAG))
53	{
54		free(comp);
55		if (show_error)
56			error("Invalid pattern", NULL_PARG);
57		return (-1);
58	}
59	if (*comp_pattern != NULL)
60	{
61		regfree(*comp_pattern);
62		free(*comp_pattern);
63	}
64	*comp_pattern = comp;
65#endif
66#if HAVE_PCRE
67	constant char *errstring;
68	int erroffset;
69	PARG parg;
70	pcre *comp = pcre_compile(pattern,
71			(utf_mode) ? PCRE_UTF8 | PCRE_NO_UTF8_CHECK : 0,
72			&errstring, &erroffset, NULL);
73	if (comp == NULL)
74	{
75		parg.p_string = (char *) errstring;
76		if (show_error)
77			error("%s", &parg);
78		return (-1);
79	}
80	*comp_pattern = comp;
81#endif
82#if HAVE_PCRE2
83	int errcode;
84	PCRE2_SIZE erroffset;
85	PARG parg;
86	pcre2_code *comp = pcre2_compile((PCRE2_SPTR)pattern, strlen(pattern),
87			0, &errcode, &erroffset, NULL);
88	if (comp == NULL)
89	{
90		if (show_error)
91		{
92			char msg[160];
93			pcre2_get_error_message(errcode, (PCRE2_UCHAR*)msg, sizeof(msg));
94			parg.p_string = msg;
95			error("%s", &parg);
96		}
97		return (-1);
98	}
99	*comp_pattern = comp;
100#endif
101#if HAVE_RE_COMP
102	PARG parg;
103	if ((parg.p_string = re_comp(pattern)) != NULL)
104	{
105		if (show_error)
106			error("%s", &parg);
107		return (-1);
108	}
109	*comp_pattern = 1;
110#endif
111#if HAVE_REGCMP
112	char *comp;
113	if ((comp = regcmp(pattern, 0)) == NULL)
114	{
115		if (show_error)
116			error("Invalid pattern", NULL_PARG);
117		return (-1);
118	}
119	if (comp_pattern != NULL)
120		free(*comp_pattern);
121	*comp_pattern = comp;
122#endif
123#if HAVE_V8_REGCOMP
124	struct regexp *comp;
125	reg_show_error = show_error;
126	comp = regcomp(pattern);
127	reg_show_error = 1;
128	if (comp == NULL)
129	{
130		/*
131		 * regcomp has already printed an error message
132		 * via regerror().
133		 */
134		return (-1);
135	}
136	if (*comp_pattern != NULL)
137		free(*comp_pattern);
138	*comp_pattern = comp;
139#endif
140  }
141	return (0);
142}
143
144/*
145 * Like compile_pattern2, but convert the pattern to lowercase if necessary.
146 */
147	public int
148compile_pattern(pattern, search_type, show_error, comp_pattern)
149	char *pattern;
150	int search_type;
151	int show_error;
152	PATTERN_TYPE *comp_pattern;
153{
154	char *cvt_pattern;
155	int result;
156
157	if (caseless != OPT_ONPLUS)
158		cvt_pattern = pattern;
159	else
160	{
161		cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
162		cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC);
163	}
164	result = compile_pattern2(cvt_pattern, search_type, comp_pattern, show_error);
165	if (cvt_pattern != pattern)
166		free(cvt_pattern);
167	return (result);
168}
169
170/*
171 * Forget that we have a compiled pattern.
172 */
173	public void
174uncompile_pattern(pattern)
175	PATTERN_TYPE *pattern;
176{
177#if HAVE_GNU_REGEX
178	if (*pattern != NULL)
179	{
180		regfree(*pattern);
181		free(*pattern);
182	}
183	*pattern = NULL;
184#endif
185#if HAVE_POSIX_REGCOMP
186	if (*pattern != NULL)
187	{
188		regfree(*pattern);
189		free(*pattern);
190	}
191	*pattern = NULL;
192#endif
193#if HAVE_PCRE
194	if (*pattern != NULL)
195		pcre_free(*pattern);
196	*pattern = NULL;
197#endif
198#if HAVE_PCRE2
199	if (*pattern != NULL)
200		pcre2_code_free(*pattern);
201	*pattern = NULL;
202#endif
203#if HAVE_RE_COMP
204	*pattern = 0;
205#endif
206#if HAVE_REGCMP
207	if (*pattern != NULL)
208		free(*pattern);
209	*pattern = NULL;
210#endif
211#if HAVE_V8_REGCOMP
212	if (*pattern != NULL)
213		free(*pattern);
214	*pattern = NULL;
215#endif
216}
217
218#if 0
219/*
220 * Can a pattern be successfully compiled?
221 */
222	public int
223valid_pattern(pattern)
224	char *pattern;
225{
226	PATTERN_TYPE comp_pattern;
227	int result;
228
229	SET_NULL_PATTERN(comp_pattern);
230	result = compile_pattern2(pattern, 0, &comp_pattern, 0);
231	if (result != 0)
232		return (0);
233	uncompile_pattern(&comp_pattern);
234	return (1);
235}
236#endif
237
238/*
239 * Is a compiled pattern null?
240 */
241	public int
242is_null_pattern(pattern)
243	PATTERN_TYPE pattern;
244{
245#if HAVE_GNU_REGEX
246	return (pattern == NULL);
247#endif
248#if HAVE_POSIX_REGCOMP
249	return (pattern == NULL);
250#endif
251#if HAVE_PCRE
252	return (pattern == NULL);
253#endif
254#if HAVE_PCRE2
255	return (pattern == NULL);
256#endif
257#if HAVE_RE_COMP
258	return (pattern == 0);
259#endif
260#if HAVE_REGCMP
261	return (pattern == NULL);
262#endif
263#if HAVE_V8_REGCOMP
264	return (pattern == NULL);
265#endif
266#if NO_REGEX
267	return (pattern == NULL);
268#endif
269}
270
271/*
272 * Simple pattern matching function.
273 * It supports no metacharacters like *, etc.
274 */
275	static int
276match(pattern, pattern_len, buf, buf_len, pfound, pend)
277	char *pattern;
278	int pattern_len;
279	char *buf;
280	int buf_len;
281	char **pfound, **pend;
282{
283	char *pp, *lp;
284	char *pattern_end = pattern + pattern_len;
285	char *buf_end = buf + buf_len;
286
287	for ( ;  buf < buf_end;  buf++)
288	{
289		for (pp = pattern, lp = buf;  ;  pp++, lp++)
290		{
291			char cp = *pp;
292			char cl = *lp;
293			if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp))
294				cp = ASCII_TO_LOWER(cp);
295			if (cp != cl)
296				break;
297			if (pp == pattern_end || lp == buf_end)
298				break;
299		}
300		if (pp == pattern_end)
301		{
302			if (pfound != NULL)
303				*pfound = buf;
304			if (pend != NULL)
305				*pend = lp;
306			return (1);
307		}
308	}
309	return (0);
310}
311
312/*
313 * Perform a pattern match with the previously compiled pattern.
314 * Set sp and ep to the start and end of the matched string.
315 */
316	public int
317match_pattern(pattern, tpattern, line, line_len, sp, ep, notbol, search_type)
318	PATTERN_TYPE pattern;
319	char *tpattern;
320	char *line;
321	int line_len;
322	char **sp;
323	char **ep;
324	int notbol;
325	int search_type;
326{
327	int matched;
328
329	*sp = *ep = NULL;
330#if NO_REGEX
331	search_type |= SRCH_NO_REGEX;
332#endif
333	if (search_type & SRCH_NO_REGEX)
334		matched = match(tpattern, strlen(tpattern), line, line_len, sp, ep);
335	else
336	{
337#if HAVE_GNU_REGEX
338	{
339		struct re_registers search_regs;
340		pattern->not_bol = notbol;
341		pattern->regs_allocated = REGS_UNALLOCATED;
342		matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0;
343		if (matched)
344		{
345			*sp = line + search_regs.start[0];
346			*ep = line + search_regs.end[0];
347		}
348	}
349#endif
350#if HAVE_POSIX_REGCOMP
351	{
352		regmatch_t rm;
353		int flags = (notbol) ? REG_NOTBOL : 0;
354#ifdef REG_STARTEND
355		flags |= REG_STARTEND;
356		rm.rm_so = 0;
357		rm.rm_eo = line_len;
358#endif
359		matched = !regexec(pattern, line, 1, &rm, flags);
360		if (matched)
361		{
362#ifndef __WATCOMC__
363			*sp = line + rm.rm_so;
364			*ep = line + rm.rm_eo;
365#else
366			*sp = rm.rm_sp;
367			*ep = rm.rm_ep;
368#endif
369		}
370	}
371#endif
372#if HAVE_PCRE
373	{
374		int flags = (notbol) ? PCRE_NOTBOL : 0;
375		int ovector[3];
376		matched = pcre_exec(pattern, NULL, line, line_len,
377			0, flags, ovector, 3) >= 0;
378		if (matched)
379		{
380			*sp = line + ovector[0];
381			*ep = line + ovector[1];
382		}
383	}
384#endif
385#if HAVE_PCRE2
386	{
387		int flags = (notbol) ? PCRE2_NOTBOL : 0;
388		pcre2_match_data *md = pcre2_match_data_create(3, NULL);
389		matched = pcre2_match(pattern, (PCRE2_SPTR)line, line_len,
390			0, flags, md, NULL) >= 0;
391		if (matched)
392		{
393			PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
394			*sp = line + ovector[0];
395			*ep = line + ovector[1];
396		}
397		pcre2_match_data_free(md);
398	}
399#endif
400#if HAVE_RE_COMP
401	matched = (re_exec(line) == 1);
402	/*
403	 * re_exec doesn't seem to provide a way to get the matched string.
404	 */
405	*sp = *ep = NULL;
406#endif
407#if HAVE_REGCMP
408	*ep = regex(pattern, line);
409	matched = (*ep != NULL);
410	if (matched)
411		*sp = __loc1;
412#endif
413#if HAVE_V8_REGCOMP
414#if HAVE_REGEXEC2
415	matched = regexec2(pattern, line, notbol);
416#else
417	matched = regexec(pattern, line);
418#endif
419	if (matched)
420	{
421		*sp = pattern->startp[0];
422		*ep = pattern->endp[0];
423	}
424#endif
425	}
426	matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
427			((search_type & SRCH_NO_MATCH) && !matched);
428	return (matched);
429}
430
431/*
432 * Return the name of the pattern matching library.
433 */
434	public char *
435pattern_lib_name(VOID_PARAM)
436{
437#if HAVE_GNU_REGEX
438	return ("GNU");
439#else
440#if HAVE_POSIX_REGCOMP
441	return ("POSIX");
442#else
443#if HAVE_PCRE2
444	return ("PCRE2");
445#else
446#if HAVE_PCRE
447	return ("PCRE");
448#else
449#if HAVE_RE_COMP
450	return ("BSD");
451#else
452#if HAVE_REGCMP
453	return ("V8");
454#else
455#if HAVE_V8_REGCOMP
456	return ("Spencer V8");
457#else
458	return ("no");
459#endif
460#endif
461#endif
462#endif
463#endif
464#endif
465#endif
466}
467