1/* $NetBSD: pattern.c,v 1.4 2023/10/06 05:49:49 simonb Exp $ */ 2 3/* 4 * Copyright (C) 1984-2023 Mark Nudelman 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Less License, as specified in the README file. 8 * 9 * For more information, see the README file. 10 */ 11 12/* 13 * Routines to do pattern matching. 14 */ 15 16#include "less.h" 17 18extern int caseless; 19extern int is_caseless; 20extern int utf_mode; 21 22/* 23 * Compile a search pattern, for future use by match_pattern. 24 */ 25static int compile_pattern2(char *pattern, int search_type, PATTERN_TYPE *comp_pattern, int show_error) 26{ 27 if (search_type & SRCH_NO_REGEX) 28 return (0); 29 { 30#if HAVE_GNU_REGEX 31 struct re_pattern_buffer *comp = (struct re_pattern_buffer *) 32 ecalloc(1, sizeof(struct re_pattern_buffer)); 33 re_set_syntax(RE_SYNTAX_POSIX_EXTENDED); 34 if (re_compile_pattern(pattern, strlen(pattern), comp)) 35 { 36 free(comp); 37 if (show_error) 38 error("Invalid pattern", NULL_PARG); 39 return (-1); 40 } 41 if (*comp_pattern != NULL) 42 { 43 regfree(*comp_pattern); 44 free(*comp_pattern); 45 } 46 *comp_pattern = comp; 47#endif 48#if HAVE_POSIX_REGCOMP 49 regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t)); 50 if (regcomp(comp, pattern, REGCOMP_FLAG | (is_caseless ? REG_ICASE : 0))) 51 { 52 free(comp); 53 if (show_error) 54 error("Invalid pattern", NULL_PARG); 55 return (-1); 56 } 57 if (*comp_pattern != NULL) 58 { 59 regfree(*comp_pattern); 60 free(*comp_pattern); 61 } 62 *comp_pattern = comp; 63#endif 64#if HAVE_PCRE 65 constant char *errstring; 66 int erroffset; 67 PARG parg; 68 pcre *comp = pcre_compile(pattern, 69 ((utf_mode) ? PCRE_UTF8 | PCRE_NO_UTF8_CHECK : 0) | 70 (is_caseless ? PCRE_CASELESS : 0), 71 &errstring, &erroffset, NULL); 72 if (comp == NULL) 73 { 74 parg.p_string = (char *) errstring; 75 if (show_error) 76 error("%s", &parg); 77 return (-1); 78 } 79 *comp_pattern = comp; 80#endif 81#if HAVE_PCRE2 82 int errcode; 83 PCRE2_SIZE erroffset; 84 PARG parg; 85 pcre2_code *comp = pcre2_compile((PCRE2_SPTR)pattern, strlen(pattern), 86 (is_caseless ? PCRE2_CASELESS : 0), 87 &errcode, &erroffset, NULL); 88 if (comp == NULL) 89 { 90 if (show_error) 91 { 92 char msg[160]; 93 pcre2_get_error_message(errcode, (PCRE2_UCHAR*)msg, sizeof(msg)); 94 parg.p_string = msg; 95 error("%s", &parg); 96 } 97 return (-1); 98 } 99 *comp_pattern = comp; 100#endif 101#if HAVE_RE_COMP 102 PARG parg; 103 if ((parg.p_string = re_comp(pattern)) != NULL) 104 { 105 if (show_error) 106 error("%s", &parg); 107 return (-1); 108 } 109 *comp_pattern = 1; 110#endif 111#if HAVE_REGCMP 112 char *comp; 113 if ((comp = regcmp(pattern, 0)) == NULL) 114 { 115 if (show_error) 116 error("Invalid pattern", NULL_PARG); 117 return (-1); 118 } 119 if (comp_pattern != NULL) 120 free(*comp_pattern); 121 *comp_pattern = comp; 122#endif 123#if HAVE_V8_REGCOMP 124 struct regexp *comp; 125 reg_show_error = show_error; 126 comp = regcomp(pattern); 127 reg_show_error = 1; 128 if (comp == NULL) 129 { 130 /* 131 * regcomp has already printed an error message 132 * via regerror(). 133 */ 134 return (-1); 135 } 136 if (*comp_pattern != NULL) 137 free(*comp_pattern); 138 *comp_pattern = comp; 139#endif 140 } 141 return (0); 142} 143 144/* 145 * Like compile_pattern2, but convert the pattern to lowercase if necessary. 146 */ 147public int compile_pattern(char *pattern, int search_type, int show_error, PATTERN_TYPE *comp_pattern) 148{ 149 char *cvt_pattern; 150 int result; 151 152 if (caseless != OPT_ONPLUS || (re_handles_caseless && !(search_type & SRCH_NO_REGEX))) 153 cvt_pattern = pattern; 154 else 155 { 156 cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC)); 157 cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC); 158 } 159 result = compile_pattern2(cvt_pattern, search_type, comp_pattern, show_error); 160 if (cvt_pattern != pattern) 161 free(cvt_pattern); 162 return (result); 163} 164 165/* 166 * Forget that we have a compiled pattern. 167 */ 168public void uncompile_pattern(PATTERN_TYPE *pattern) 169{ 170#if HAVE_GNU_REGEX 171 if (*pattern != NULL) 172 { 173 regfree(*pattern); 174 free(*pattern); 175 } 176 *pattern = NULL; 177#endif 178#if HAVE_POSIX_REGCOMP 179 if (*pattern != NULL) 180 { 181 regfree(*pattern); 182 free(*pattern); 183 } 184 *pattern = NULL; 185#endif 186#if HAVE_PCRE 187 if (*pattern != NULL) 188 pcre_free(*pattern); 189 *pattern = NULL; 190#endif 191#if HAVE_PCRE2 192 if (*pattern != NULL) 193 pcre2_code_free(*pattern); 194 *pattern = NULL; 195#endif 196#if HAVE_RE_COMP 197 *pattern = 0; 198#endif 199#if HAVE_REGCMP 200 if (*pattern != NULL) 201 free(*pattern); 202 *pattern = NULL; 203#endif 204#if HAVE_V8_REGCOMP 205 if (*pattern != NULL) 206 free(*pattern); 207 *pattern = NULL; 208#endif 209} 210 211#if 0 212/* 213 * Can a pattern be successfully compiled? 214 */ 215public int valid_pattern(char *pattern) 216{ 217 PATTERN_TYPE comp_pattern; 218 int result; 219 220 SET_NULL_PATTERN(comp_pattern); 221 result = compile_pattern2(pattern, 0, &comp_pattern, 0); 222 if (result != 0) 223 return (0); 224 uncompile_pattern(&comp_pattern); 225 return (1); 226} 227#endif 228 229/* 230 * Is a compiled pattern null? 231 */ 232public int is_null_pattern(PATTERN_TYPE pattern) 233{ 234#if HAVE_GNU_REGEX 235 return (pattern == NULL); 236#endif 237#if HAVE_POSIX_REGCOMP 238 return (pattern == NULL); 239#endif 240#if HAVE_PCRE 241 return (pattern == NULL); 242#endif 243#if HAVE_PCRE2 244 return (pattern == NULL); 245#endif 246#if HAVE_RE_COMP 247 return (pattern == 0); 248#endif 249#if HAVE_REGCMP 250 return (pattern == NULL); 251#endif 252#if HAVE_V8_REGCOMP 253 return (pattern == NULL); 254#endif 255#if NO_REGEX 256 return (pattern == NULL); 257#endif 258} 259/* 260 * Simple pattern matching function. 261 * It supports no metacharacters like *, etc. 262 */ 263static int match(char *pattern, int pattern_len, char *buf, int buf_len, char ***sp, char ***ep, int nsubs) 264{ 265 char *pp, *lp; 266 char *pattern_end = pattern + pattern_len; 267 char *buf_end = buf + buf_len; 268 269 for ( ; buf < buf_end; buf++) 270 { 271 for (pp = pattern, lp = buf; ; pp++, lp++) 272 { 273 char cp = *pp; 274 char cl = *lp; 275 if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp)) 276 cp = ASCII_TO_LOWER(cp); 277 if (cp != cl) 278 break; 279 if (pp == pattern_end || lp == buf_end) 280 break; 281 } 282 if (pp == pattern_end) 283 { 284 *(*sp)++ = buf; 285 *(*ep)++ = lp; 286 return (1); 287 } 288 } 289 **sp = **ep = NULL; 290 return (0); 291} 292 293/* 294 * Perform a pattern match with the previously compiled pattern. 295 * Set sp[0] and ep[0] to the start and end of the matched string. 296 * Set sp[i] and ep[i] to the start and end of the i-th matched subpattern. 297 * Subpatterns are defined by parentheses in the regex language. 298 */ 299static int match_pattern1(PATTERN_TYPE pattern, char *tpattern, char *line, int line_len, char **sp, char **ep, int nsp, int notbol, int search_type) 300{ 301 int matched; 302 303#if NO_REGEX 304 search_type |= SRCH_NO_REGEX; 305#endif 306 if (search_type & SRCH_NO_REGEX) 307 matched = match(tpattern, strlen(tpattern), line, line_len, &sp, &ep, nsp); 308 else 309 { 310#if HAVE_GNU_REGEX 311 { 312 struct re_registers search_regs; 313 pattern->not_bol = notbol; 314 pattern->regs_allocated = REGS_UNALLOCATED; 315 matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0; 316 if (matched) 317 { 318 *sp++ = line + search_regs.start[0]; 319 *ep++ = line + search_regs.end[0]; 320 } 321 } 322#endif 323#if HAVE_POSIX_REGCOMP 324 { 325 #define RM_COUNT (NUM_SEARCH_COLORS+2) 326 regmatch_t rm[RM_COUNT]; 327 int flags = (notbol) ? REG_NOTBOL : 0; 328#ifdef REG_STARTEND 329 flags |= REG_STARTEND; 330 rm[0].rm_so = 0; 331 rm[0].rm_eo = line_len; 332#endif 333 matched = !regexec(pattern, line, RM_COUNT, rm, flags); 334 if (matched) 335 { 336 int i; 337 int ecount; 338 for (ecount = RM_COUNT; ecount > 0; ecount--) 339 if (rm[ecount-1].rm_so >= 0) 340 break; 341 if (ecount >= nsp) 342 ecount = nsp-1; 343 for (i = 0; i < ecount; i++) 344 { 345 if (rm[i].rm_so < 0) 346 { 347 *sp++ = *ep++ = line; 348 } else 349 { 350#ifndef __WATCOMC__ 351 *sp++ = line + rm[i].rm_so; 352 *ep++ = line + rm[i].rm_eo; 353#else 354 *sp++ = rm[i].rm_sp; 355 *ep++ = rm[i].rm_ep; 356#endif 357 } 358 } 359 } 360 } 361#endif 362#if HAVE_PCRE 363 { 364 #define OVECTOR_COUNT ((3*NUM_SEARCH_COLORS)+3) 365 int ovector[OVECTOR_COUNT]; 366 int flags = (notbol) ? PCRE_NOTBOL : 0; 367 int i; 368 int ecount; 369 int mcount = pcre_exec(pattern, NULL, line, line_len, 370 0, flags, ovector, OVECTOR_COUNT); 371 matched = (mcount > 0); 372 ecount = nsp-1; 373 if (ecount > mcount) ecount = mcount; 374 for (i = 0; i < ecount*2; ) 375 { 376 if (ovector[i] < 0 || ovector[i+1] < 0) 377 { 378 *sp++ = *ep++ = line; 379 i += 2; 380 } else 381 { 382 *sp++ = line + ovector[i++]; 383 *ep++ = line + ovector[i++]; 384 } 385 } 386 } 387#endif 388#if HAVE_PCRE2 389 { 390 int flags = (notbol) ? PCRE2_NOTBOL : 0; 391 pcre2_match_data *md = pcre2_match_data_create(nsp-1, NULL); 392 int mcount = pcre2_match(pattern, (PCRE2_SPTR)line, line_len, 393 0, flags, md, NULL); 394 matched = (mcount > 0); 395 if (matched) 396 { 397 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md); 398 int i; 399 int ecount = nsp-1; 400 if (ecount > mcount) ecount = mcount; 401 for (i = 0; i < ecount*2; ) 402 { 403 if (ovector[i] < 0 || ovector[i+1] < 0) 404 { 405 *sp++ = *ep++ = line; 406 i += 2; 407 } else 408 { 409 *sp++ = line + ovector[i++]; 410 *ep++ = line + ovector[i++]; 411 } 412 } 413 } 414 pcre2_match_data_free(md); 415 } 416#endif 417#if HAVE_RE_COMP 418 matched = (re_exec(line) == 1); 419 /* 420 * re_exec doesn't seem to provide a way to get the matched string. 421 */ 422#endif 423#if HAVE_REGCMP 424 matched = ((*ep++ = regex(pattern, line)) != NULL); 425 if (matched) 426 *sp++ = __loc1; 427#endif 428#if HAVE_V8_REGCOMP 429#if HAVE_REGEXEC2 430 matched = regexec2(pattern, line, notbol); 431#else 432 matched = regexec(pattern, line); 433#endif 434 if (matched) 435 { 436 *sp++ = pattern->startp[0]; 437 *ep++ = pattern->endp[0]; 438 } 439#endif 440 } 441 *sp = *ep = NULL; 442 matched = (!(search_type & SRCH_NO_MATCH) && matched) || 443 ((search_type & SRCH_NO_MATCH) && !matched); 444 return (matched); 445} 446 447public int match_pattern(PATTERN_TYPE pattern, char *tpattern, char *line, int line_len, char **sp, char **ep, int nsp, int notbol, int search_type) 448{ 449 int matched = match_pattern1(pattern, tpattern, line, line_len, sp, ep, nsp, notbol, search_type); 450 int i; 451 for (i = 1; i <= NUM_SEARCH_COLORS; i++) 452 { 453 if ((search_type & SRCH_SUBSEARCH(i)) && ep[i] == sp[i]) 454 matched = 0; 455 } 456 return matched; 457} 458 459/* 460 * Return the name of the pattern matching library. 461 */ 462public char * pattern_lib_name(void) 463{ 464#if HAVE_GNU_REGEX 465 return ("GNU"); 466#else 467#if HAVE_POSIX_REGCOMP 468 return ("POSIX"); 469#else 470#if HAVE_PCRE2 471 return ("PCRE2"); 472#else 473#if HAVE_PCRE 474 return ("PCRE"); 475#else 476#if HAVE_RE_COMP 477 return ("BSD"); 478#else 479#if HAVE_REGCMP 480 return ("V8"); 481#else 482#if HAVE_V8_REGCOMP 483 return ("Spencer V8"); 484#else 485 return ("no"); 486#endif 487#endif 488#endif 489#endif 490#endif 491#endif 492#endif 493} 494