1/*- 2 * Copyright (c) 1988, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * Copyright (c) 1989 by Berkeley Softworks 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Adam de Boor. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)str.c 5.8 (Berkeley) 6/1/90 39 */ 40 41#include <sys/cdefs.h> 42__FBSDID("$FreeBSD$"); 43 44#include <stdlib.h> 45#include <string.h> 46 47#include "buf.h" 48#include "str.h" 49#include "util.h" 50 51/** 52 * Initialize the argument array object. The array is initially 53 * eight positions, and will be expaned as neccessary. The first 54 * position is set to NULL since everything ignores it. We allocate 55 * (size + 1) since we need space for the terminating NULL. The 56 * buffer is set to NULL, since no common buffer is alloated yet. 57 */ 58void 59ArgArray_Init(ArgArray *aa) 60{ 61 62 aa->size = 8; 63 aa->argv = emalloc((aa->size + 1) * sizeof(char *)); 64 aa->argc = 0; 65 aa->argv[aa->argc++] = NULL; 66 aa->len = 0; 67 aa->buffer = NULL; 68} 69 70/** 71 * Cleanup the memory allocated for in the argument array object. 72 */ 73void 74ArgArray_Done(ArgArray *aa) 75{ 76 77 if (aa->buffer == NULL) { 78 int i; 79 /* args are individually allocated */ 80 for (i = 0; i < aa->argc; ++i) { 81 if (aa->argv[i]) { 82 free(aa->argv[i]); 83 aa->argv[i] = NULL; 84 } 85 } 86 } else { 87 /* args are part of a single allocation */ 88 free(aa->buffer); 89 aa->buffer = NULL; 90 } 91 free(aa->argv); 92 aa->argv = NULL; 93 aa->argc = 0; 94 aa->size = 0; 95} 96 97/*- 98 * str_concat -- 99 * concatenate the two strings, inserting a space or slash between them. 100 * 101 * returns -- 102 * the resulting string in allocated space. 103 */ 104char * 105str_concat(const char *s1, const char *s2, int flags) 106{ 107 int len1, len2; 108 char *result; 109 110 /* get the length of both strings */ 111 len1 = strlen(s1); 112 len2 = strlen(s2); 113 114 /* allocate length plus separator plus EOS */ 115 result = emalloc(len1 + len2 + 2); 116 117 /* copy first string into place */ 118 memcpy(result, s1, len1); 119 120 /* add separator character */ 121 if (flags & STR_ADDSPACE) { 122 result[len1] = ' '; 123 ++len1; 124 } else if (flags & STR_ADDSLASH) { 125 result[len1] = '/'; 126 ++len1; 127 } 128 129 /* copy second string plus EOS into place */ 130 memcpy(result + len1, s2, len2 + 1); 131 132 return (result); 133} 134 135/** 136 * Fracture a string into an array of words (as delineated by tabs or 137 * spaces) taking quotation marks into account. Leading tabs/spaces 138 * are ignored. 139 */ 140void 141brk_string(ArgArray *aa, const char str[], Boolean expand) 142{ 143 char inquote; 144 char *start; 145 char *arg; 146 147 /* skip leading space chars. */ 148 for (; *str == ' ' || *str == '\t'; ++str) 149 continue; 150 151 ArgArray_Init(aa); 152 153 aa->buffer = estrdup(str); 154 155 arg = aa->buffer; 156 start = arg; 157 inquote = '\0'; 158 159 /* 160 * copy the string; at the same time, parse backslashes, 161 * quotes and build the argument list. 162 */ 163 for (;;) { 164 switch (str[0]) { 165 case '"': 166 case '\'': 167 if (inquote == '\0') { 168 inquote = str[0]; 169 if (expand) 170 break; 171 if (start == NULL) 172 start = arg; 173 } else if (inquote == str[0]) { 174 inquote = '\0'; 175 /* Don't miss "" or '' */ 176 if (start == NULL) 177 start = arg; 178 if (expand) 179 break; 180 } else { 181 /* other type of quote found */ 182 if (start == NULL) 183 start = arg; 184 } 185 *arg++ = str[0]; 186 break; 187 case ' ': 188 case '\t': 189 case '\n': 190 if (inquote) { 191 if (start == NULL) 192 start = arg; 193 *arg++ = str[0]; 194 break; 195 } 196 if (start == NULL) 197 break; 198 /* FALLTHROUGH */ 199 case '\0': 200 /* 201 * end of a token -- make sure there's enough argv 202 * space and save off a pointer. 203 */ 204 if (aa->argc == aa->size) { 205 aa->size *= 2; /* ramp up fast */ 206 aa->argv = erealloc(aa->argv, 207 (aa->size + 1) * sizeof(char *)); 208 } 209 210 *arg++ = '\0'; 211 if (start == NULL) { 212 aa->argv[aa->argc] = start; 213 return; 214 } 215 if (str[0] == '\n' || str[0] == '\0') { 216 aa->argv[aa->argc++] = start; 217 aa->argv[aa->argc] = NULL; 218 return; 219 } else { 220 aa->argv[aa->argc++] = start; 221 start = NULL; 222 break; 223 } 224 case '\\': 225 if (start == NULL) 226 start = arg; 227 if (expand) { 228 switch (str[1]) { 229 case '\0': 230 case '\n': 231 /* hmmm; fix it up as best we can */ 232 *arg++ = '\\'; 233 break; 234 case 'b': 235 *arg++ = '\b'; 236 ++str; 237 break; 238 case 'f': 239 *arg++ = '\f'; 240 ++str; 241 break; 242 case 'n': 243 *arg++ = '\n'; 244 ++str; 245 break; 246 case 'r': 247 *arg++ = '\r'; 248 ++str; 249 break; 250 case 't': 251 *arg++ = '\t'; 252 ++str; 253 break; 254 default: 255 *arg++ = str[1]; 256 ++str; 257 break; 258 } 259 } else { 260 *arg++ = str[0]; 261 if (str[1] != '\0') { 262 ++str; 263 *arg++ = str[0]; 264 } 265 } 266 break; 267 default: 268 if (start == NULL) 269 start = arg; 270 *arg++ = str[0]; 271 break; 272 } 273 ++str; 274 } 275} 276 277/* 278 * Quote a string for appending it to MAKEFLAGS. According to Posix the 279 * kind of quoting here is implementation-defined. This quoting must ensure 280 * that the parsing of MAKEFLAGS's contents in a sub-shell yields the same 281 * options, option arguments and macro definitions as in the calling make. 282 * We simply quote all blanks, which according to Posix are space and tab 283 * in the POSIX locale. Don't use isblank because in that case makes with 284 * different locale settings could not communicate. We must also quote 285 * backslashes obviously. 286 */ 287char * 288MAKEFLAGS_quote(const char *str) 289{ 290 char *ret, *q; 291 const char *p; 292 293 /* assume worst case - everything has to be quoted */ 294 ret = emalloc(strlen(str) * 2 + 1); 295 296 p = str; 297 q = ret; 298 while (*p != '\0') { 299 switch (*p) { 300 301 case ' ': 302 case '\t': 303 *q++ = '\\'; 304 break; 305 306 default: 307 break; 308 } 309 *q++ = *p++; 310 } 311 *q++ = '\0'; 312 return (ret); 313} 314 315void 316MAKEFLAGS_break(ArgArray *aa, const char str[]) 317{ 318 char *arg; 319 char *start; 320 321 ArgArray_Init(aa); 322 323 aa->buffer = strdup(str); 324 325 arg = aa->buffer; 326 start = NULL; 327 328 for (;;) { 329 switch (str[0]) { 330 case ' ': 331 case '\t': 332 /* word separator */ 333 if (start == NULL) { 334 /* not in a word */ 335 str++; 336 continue; 337 } 338 /* FALLTHRU */ 339 case '\0': 340 if (aa->argc == aa->size) { 341 aa->size *= 2; 342 aa->argv = erealloc(aa->argv, 343 (aa->size + 1) * sizeof(char *)); 344 } 345 346 *arg++ = '\0'; 347 if (start == NULL) { 348 aa->argv[aa->argc] = start; 349 return; 350 } 351 if (str[0] == '\0') { 352 aa->argv[aa->argc++] = start; 353 aa->argv[aa->argc] = NULL; 354 return; 355 } else { 356 aa->argv[aa->argc++] = start; 357 start = NULL; 358 str++; 359 continue; 360 } 361 362 case '\\': 363 if (str[1] == ' ' || str[1] == '\t') 364 str++; 365 break; 366 367 default: 368 break; 369 } 370 if (start == NULL) 371 start = arg; 372 *arg++ = *str++; 373 } 374} 375 376/* 377 * Str_Match -- 378 * 379 * See if a particular string matches a particular pattern. 380 * 381 * Results: Non-zero is returned if string matches pattern, 0 otherwise. The 382 * matching operation permits the following special characters in the 383 * pattern: *?\[] (see the man page for details on what these mean). 384 * 385 * Side effects: None. 386 */ 387int 388Str_Match(const char *string, const char *pattern) 389{ 390 char c2; 391 392 for (;;) { 393 /* 394 * See if we're at the end of both the pattern and the 395 * string. If, we succeeded. If we're at the end of the 396 * pattern but not at the end of the string, we failed. 397 */ 398 if (*pattern == 0) 399 return (!*string); 400 if (*string == 0 && *pattern != '*') 401 return (0); 402 /* 403 * Check for a "*" as the next pattern character. It matches 404 * any substring. We handle this by calling ourselves 405 * recursively for each postfix of string, until either we 406 * match or we reach the end of the string. 407 */ 408 if (*pattern == '*') { 409 pattern += 1; 410 if (*pattern == 0) 411 return (1); 412 while (*string != 0) { 413 if (Str_Match(string, pattern)) 414 return (1); 415 ++string; 416 } 417 return (0); 418 } 419 /* 420 * Check for a "?" as the next pattern character. It matches 421 * any single character. 422 */ 423 if (*pattern == '?') 424 goto thisCharOK; 425 /* 426 * Check for a "[" as the next pattern character. It is 427 * followed by a list of characters that are acceptable, or 428 * by a range (two characters separated by "-"). 429 */ 430 if (*pattern == '[') { 431 ++pattern; 432 for (;;) { 433 if ((*pattern == ']') || (*pattern == 0)) 434 return (0); 435 if (*pattern == *string) 436 break; 437 if (pattern[1] == '-') { 438 c2 = pattern[2]; 439 if (c2 == 0) 440 return (0); 441 if ((*pattern <= *string) && 442 (c2 >= *string)) 443 break; 444 if ((*pattern >= *string) && 445 (c2 <= *string)) 446 break; 447 pattern += 2; 448 } 449 ++pattern; 450 } 451 while ((*pattern != ']') && (*pattern != 0)) 452 ++pattern; 453 goto thisCharOK; 454 } 455 /* 456 * If the next pattern character is '/', just strip off the 457 * '/' so we do exact matching on the character that follows. 458 */ 459 if (*pattern == '\\') { 460 ++pattern; 461 if (*pattern == 0) 462 return (0); 463 } 464 /* 465 * There's no special character. Just make sure that the 466 * next characters of each string match. 467 */ 468 if (*pattern != *string) 469 return (0); 470thisCharOK: ++pattern; 471 ++string; 472 } 473} 474 475 476/** 477 * Str_SYSVMatch 478 * Check word against pattern for a match (% is wild), 479 * 480 * Results: 481 * Returns the beginning position of a match or null. The number 482 * of characters matched is returned in len. 483 */ 484const char * 485Str_SYSVMatch(const char *word, const char *pattern, int *len) 486{ 487 const char *m, *p, *w; 488 489 p = pattern; 490 w = word; 491 492 if (*w == '\0') { 493 /* Zero-length word cannot be matched against */ 494 *len = 0; 495 return (NULL); 496 } 497 498 if (*p == '\0') { 499 /* Null pattern is the whole string */ 500 *len = strlen(w); 501 return (w); 502 } 503 504 if ((m = strchr(p, '%')) != NULL) { 505 /* check that the prefix matches */ 506 for (; p != m && *w && *w == *p; w++, p++) 507 continue; 508 509 if (p != m) 510 return (NULL); /* No match */ 511 512 if (*++p == '\0') { 513 /* No more pattern, return the rest of the string */ 514 *len = strlen(w); 515 return (w); 516 } 517 } 518 519 m = w; 520 521 /* Find a matching tail */ 522 do 523 if (strcmp(p, w) == 0) { 524 *len = w - m; 525 return (m); 526 } 527 while (*w++ != '\0'); 528 529 return (NULL); 530} 531 532 533/** 534 * Str_SYSVSubst 535 * Substitute '%' on the pattern with len characters from src. 536 * If the pattern does not contain a '%' prepend len characters 537 * from src. 538 * 539 * Side Effects: 540 * Places result on buf 541 */ 542void 543Str_SYSVSubst(Buffer *buf, const char *pat, const char *src, int len) 544{ 545 const char *m; 546 547 if ((m = strchr(pat, '%')) != NULL) { 548 /* Copy the prefix */ 549 Buf_AppendRange(buf, pat, m); 550 /* skip the % */ 551 pat = m + 1; 552 } 553 554 /* Copy the pattern */ 555 Buf_AddBytes(buf, len, (const Byte *)src); 556 557 /* append the rest */ 558 Buf_Append(buf, pat); 559} 560