str.c revision 141133
11590Srgrimes/*- 21590Srgrimes * Copyright (c) 1988, 1989, 1990, 1993 31590Srgrimes * The Regents of the University of California. All rights reserved. 41590Srgrimes * Copyright (c) 1989 by Berkeley Softworks 51590Srgrimes * All rights reserved. 61590Srgrimes * 71590Srgrimes * This code is derived from software contributed to Berkeley by 81590Srgrimes * Adam de Boor. 91590Srgrimes * 101590Srgrimes * Redistribution and use in source and binary forms, with or without 111590Srgrimes * modification, are permitted provided that the following conditions 121590Srgrimes * are met: 131590Srgrimes * 1. Redistributions of source code must retain the above copyright 141590Srgrimes * notice, this list of conditions and the following disclaimer. 151590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 161590Srgrimes * notice, this list of conditions and the following disclaimer in the 171590Srgrimes * documentation and/or other materials provided with the distribution. 181590Srgrimes * 3. All advertising materials mentioning features or use of this software 191590Srgrimes * must display the following acknowledgement: 201590Srgrimes * This product includes software developed by the University of 211590Srgrimes * California, Berkeley and its contributors. 221590Srgrimes * 4. Neither the name of the University nor the names of its contributors 231590Srgrimes * may be used to endorse or promote products derived from this software 241590Srgrimes * without specific prior written permission. 251590Srgrimes * 261590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 271590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 281590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 291590Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 301590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 311590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 321590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 331590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 341590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 351590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 361590Srgrimes * SUCH DAMAGE. 3762833Swsanchez * 3862833Swsanchez * @(#)str.c 5.8 (Berkeley) 6/1/90 391590Srgrimes */ 401590Srgrimes 4162833Swsanchez#include <sys/cdefs.h> 4294587Sobrien__FBSDID("$FreeBSD: head/usr.bin/make/str.c 141133 2005-02-02 07:36:18Z harti $"); 431590Srgrimes 44141104Sharti#include <ctype.h> 45141104Sharti#include <stdlib.h> 46141104Sharti#include <string.h> 471590Srgrimes 48141133Sharti#include "buf.h" 49141104Sharti#include "globals.h" 50141104Sharti#include "str.h" 51141104Sharti#include "util.h" 52141104Sharti#include "var.h" 53141104Sharti 545814Sjkhstatic char **argv, *buffer; 555814Sjkhstatic int argmax, curlen; 565814Sjkh 575814Sjkh/* 585814Sjkh * str_init -- 595814Sjkh * Initialize the strings package 605814Sjkh * 615814Sjkh */ 625814Sjkhvoid 63104696Sjmallettstr_init(void) 645814Sjkh{ 655814Sjkh char *p1; 66138232Sharti 67138264Sharti argv = emalloc(((argmax = 50) + 1) * sizeof(char *)); 685814Sjkh argv[0] = Var_Value(".MAKE", VAR_GLOBAL, &p1); 695814Sjkh} 705814Sjkh 711590Srgrimes/*- 721590Srgrimes * str_concat -- 73138547Sharti * concatenate the two strings, inserting a space or slash between them. 741590Srgrimes * 751590Srgrimes * returns -- 761590Srgrimes * the resulting string in allocated space. 771590Srgrimes */ 781590Srgrimeschar * 79138547Shartistr_concat(const char *s1, const char *s2, int flags) 801590Srgrimes{ 8194584Sobrien int len1, len2; 8294584Sobrien char *result; 831590Srgrimes 841590Srgrimes /* get the length of both strings */ 8594638Sobrien len1 = strlen(s1); 8694638Sobrien len2 = strlen(s2); 871590Srgrimes 881590Srgrimes /* allocate length plus separator plus EOS */ 89138264Sharti result = emalloc(len1 + len2 + 2); 901590Srgrimes 911590Srgrimes /* copy first string into place */ 9294638Sobrien memcpy(result, s1, len1); 931590Srgrimes 941590Srgrimes /* add separator character */ 9594638Sobrien if (flags & STR_ADDSPACE) { 9694638Sobrien result[len1] = ' '; 9794638Sobrien ++len1; 9894638Sobrien } else if (flags & STR_ADDSLASH) { 9994638Sobrien result[len1] = '/'; 10094638Sobrien ++len1; 1011590Srgrimes } 1021590Srgrimes 10394638Sobrien /* copy second string plus EOS into place */ 10494638Sobrien memcpy(result + len1, s2, len2 + 1); 1051590Srgrimes 106138232Sharti return (result); 1071590Srgrimes} 1081590Srgrimes 1091590Srgrimes/*- 1101590Srgrimes * brk_string -- 1111590Srgrimes * Fracture a string into an array of words (as delineated by tabs or 1121590Srgrimes * spaces) taking quotation marks into account. Leading tabs/spaces 1131590Srgrimes * are ignored. 1141590Srgrimes * 1151590Srgrimes * returns -- 1161590Srgrimes * Pointer to the array of pointers to the words. To make life easier, 1171590Srgrimes * the first word is always the value of the .MAKE variable. 1181590Srgrimes */ 1191590Srgrimeschar ** 120104696Sjmallettbrk_string(char *str, int *store_argc, Boolean expand) 1211590Srgrimes{ 12294584Sobrien int argc, ch; 12394584Sobrien char inquote, *p, *start, *t; 1241590Srgrimes int len; 1251590Srgrimes 1261590Srgrimes /* skip leading space chars. */ 1271590Srgrimes for (; *str == ' ' || *str == '\t'; ++str) 1281590Srgrimes continue; 1291590Srgrimes 1301590Srgrimes /* allocate room for a copy of the string */ 1315814Sjkh if ((len = strlen(str) + 1) > curlen) { 1325814Sjkh if (buffer) 1335814Sjkh free(buffer); 1345814Sjkh buffer = emalloc(curlen = len); 1355814Sjkh } 1361590Srgrimes 1371590Srgrimes /* 1381590Srgrimes * copy the string; at the same time, parse backslashes, 1391590Srgrimes * quotes and build the argument list. 1401590Srgrimes */ 1411590Srgrimes argc = 1; 1421590Srgrimes inquote = '\0'; 1435814Sjkh for (p = str, start = t = buffer;; ++p) { 1441590Srgrimes switch(ch = *p) { 1451590Srgrimes case '"': 1461590Srgrimes case '\'': 14749938Shoek if (inquote) { 148124966Sru if (ch != inquote) 1491590Srgrimes break; 150124966Sru inquote = '\0'; 151124966Sru /* Don't miss "" or '' */ 152124966Sru if (!start) 153124966Sru start = t; 154124840Sru } else 155138264Sharti inquote = (char)ch; 156124840Sru if (expand) 157124840Sru continue; 158124840Sru break; 1591590Srgrimes case ' ': 1601590Srgrimes case '\t': 1615814Sjkh case '\n': 1621590Srgrimes if (inquote) 1631590Srgrimes break; 1641590Srgrimes if (!start) 1651590Srgrimes continue; 1661590Srgrimes /* FALLTHROUGH */ 1671590Srgrimes case '\0': 1681590Srgrimes /* 1691590Srgrimes * end of a token -- make sure there's enough argv 1701590Srgrimes * space and save off a pointer. 1711590Srgrimes */ 1725814Sjkh if (!start) 1735814Sjkh goto done; 1745814Sjkh 1751590Srgrimes *t++ = '\0'; 1761590Srgrimes if (argc == argmax) { 1771590Srgrimes argmax *= 2; /* ramp up fast */ 178138264Sharti argv = erealloc(argv, 17918730Ssteve (argmax + 1) * sizeof(char *)); 1801590Srgrimes } 1811590Srgrimes argv[argc++] = start; 182138264Sharti start = NULL; 1831590Srgrimes if (ch == '\n' || ch == '\0') 1841590Srgrimes goto done; 1851590Srgrimes continue; 1861590Srgrimes case '\\': 1875814Sjkh if (!expand) { 1885814Sjkh if (!start) 1895814Sjkh start = t; 1905814Sjkh *t++ = '\\'; 1915814Sjkh ch = *++p; 1925814Sjkh break; 1935814Sjkh } 1948874Srgrimes 1951590Srgrimes switch (ch = *++p) { 1961590Srgrimes case '\0': 1971590Srgrimes case '\n': 1981590Srgrimes /* hmmm; fix it up as best we can */ 1991590Srgrimes ch = '\\'; 2001590Srgrimes --p; 2011590Srgrimes break; 2021590Srgrimes case 'b': 2031590Srgrimes ch = '\b'; 2041590Srgrimes break; 2051590Srgrimes case 'f': 2061590Srgrimes ch = '\f'; 2071590Srgrimes break; 2081590Srgrimes case 'n': 2091590Srgrimes ch = '\n'; 2101590Srgrimes break; 2111590Srgrimes case 'r': 2121590Srgrimes ch = '\r'; 2131590Srgrimes break; 2141590Srgrimes case 't': 2151590Srgrimes ch = '\t'; 2161590Srgrimes break; 217104108Sjmallett default: 218104108Sjmallett break; 2191590Srgrimes } 2201590Srgrimes break; 221104108Sjmallett default: 222104108Sjmallett break; 2231590Srgrimes } 2241590Srgrimes if (!start) 2251590Srgrimes start = t; 226138232Sharti *t++ = (char)ch; 2271590Srgrimes } 228138264Shartidone: argv[argc] = NULL; 2291590Srgrimes *store_argc = argc; 230138232Sharti return (argv); 2311590Srgrimes} 2321590Srgrimes 2331590Srgrimes/* 234140870Sharti * Quote a string for appending it to MAKEFLAGS. According to Posix the 235140870Sharti * kind of quoting here is implementation-defined. This quoting must ensure 236140870Sharti * that the parsing of MAKEFLAGS's contents in a sub-shell yields the same 237140870Sharti * options, option arguments and macro definitions as in the calling make. 238140870Sharti * We simply quote all blanks, which according to Posix are space and tab 239140870Sharti * in the POSIX locale. Don't use isblank because in that case makes with 240140870Sharti * different locale settings could not communicate. We must also quote 241140870Sharti * backslashes obviously. 242140870Sharti */ 243140870Shartichar * 244140870ShartiMAKEFLAGS_quote(const char *str) 245140870Sharti{ 246140870Sharti char *ret, *q; 247140870Sharti const char *p; 248140870Sharti 249140870Sharti /* assume worst case - everything has to be quoted */ 250140870Sharti ret = emalloc(strlen(str) * 2 + 1); 251140870Sharti 252140870Sharti p = str; 253140870Sharti q = ret; 254140870Sharti while (*p != '\0') { 255140870Sharti switch (*p) { 256140870Sharti 257140870Sharti case ' ': 258140870Sharti case '\t': 259140870Sharti *q++ = '\\'; 260140870Sharti break; 261140870Sharti 262140870Sharti default: 263140870Sharti break; 264140870Sharti } 265140870Sharti *q++ = *p++; 266140870Sharti } 267140870Sharti *q++ = '\0'; 268140870Sharti return (ret); 269140870Sharti} 270140870Sharti 271140870Shartichar ** 272140870ShartiMAKEFLAGS_break(const char *str, int *pargc) 273140870Sharti{ 274140870Sharti char *q, *start; 275140870Sharti int len; 276140870Sharti 277140870Sharti /* allocate room for a copy of the string */ 278140870Sharti if ((len = strlen(str) + 1) > curlen) 279140870Sharti buffer = erealloc(buffer, curlen = len); 280140870Sharti 281140870Sharti start = NULL; 282140870Sharti *pargc = 1; 283140870Sharti 284140870Sharti for (q = buffer;;) { 285140870Sharti switch (*str) { 286140870Sharti case ' ': 287140870Sharti case '\t': 288140870Sharti /* word separator */ 289140870Sharti if (start == NULL) { 290140870Sharti /* not in a word */ 291140870Sharti str++; 292140870Sharti continue; 293140870Sharti } 294140870Sharti /* FALLTHRU */ 295140870Sharti case '\0': 296140870Sharti if (start == NULL) 297140870Sharti goto done; 298140870Sharti 299140870Sharti /* finish word */ 300140870Sharti *q++ = '\0'; 301140870Sharti if (argmax == *pargc) { 302140870Sharti argmax *= 2; 303140870Sharti argv = erealloc(argv, 304140870Sharti sizeof(*argv) * (argmax + 1)); 305140870Sharti } 306140870Sharti argv[(*pargc)++] = start; 307140870Sharti start = NULL; 308140870Sharti 309140870Sharti if (*str++ == '\0') 310140870Sharti goto done; 311140870Sharti continue; 312140870Sharti 313140870Sharti case '\\': 314140870Sharti if (str[1] == ' ' || str[1] == '\t') 315140870Sharti /* was a quote */ 316140870Sharti str++; 317140870Sharti break; 318140870Sharti 319140870Sharti default: 320140870Sharti break; 321140870Sharti } 322140870Sharti if (start == NULL) 323140870Sharti /* start of new word */ 324140870Sharti start = q; 325140870Sharti *q++ = *str++; 326140870Sharti } 327140870Sharti done: 328140870Sharti argv[(*pargc)] = NULL; 329140870Sharti return (argv); 330140870Sharti} 331140870Sharti 332140870Sharti/* 3331590Srgrimes * Str_Match -- 3348874Srgrimes * 3351590Srgrimes * See if a particular string matches a particular pattern. 3368874Srgrimes * 3371590Srgrimes * Results: Non-zero is returned if string matches pattern, 0 otherwise. The 3381590Srgrimes * matching operation permits the following special characters in the 3391590Srgrimes * pattern: *?\[] (see the man page for details on what these mean). 3408874Srgrimes * 3411590Srgrimes * Side effects: None. 3421590Srgrimes */ 3431590Srgrimesint 344106106SjmallettStr_Match(const char *string, const char *pattern) 3451590Srgrimes{ 3461590Srgrimes char c2; 3471590Srgrimes 3481590Srgrimes for (;;) { 3491590Srgrimes /* 3501590Srgrimes * See if we're at the end of both the pattern and the 3511590Srgrimes * string. If, we succeeded. If we're at the end of the 3521590Srgrimes * pattern but not at the end of the string, we failed. 3531590Srgrimes */ 3541590Srgrimes if (*pattern == 0) 355138232Sharti return (!*string); 3561590Srgrimes if (*string == 0 && *pattern != '*') 357138232Sharti return (0); 3581590Srgrimes /* 3591590Srgrimes * Check for a "*" as the next pattern character. It matches 3601590Srgrimes * any substring. We handle this by calling ourselves 3611590Srgrimes * recursively for each postfix of string, until either we 3621590Srgrimes * match or we reach the end of the string. 3631590Srgrimes */ 3641590Srgrimes if (*pattern == '*') { 3651590Srgrimes pattern += 1; 3661590Srgrimes if (*pattern == 0) 367138232Sharti return (1); 3681590Srgrimes while (*string != 0) { 3691590Srgrimes if (Str_Match(string, pattern)) 370138232Sharti return (1); 3711590Srgrimes ++string; 3721590Srgrimes } 373138232Sharti return (0); 3741590Srgrimes } 3751590Srgrimes /* 3761590Srgrimes * Check for a "?" as the next pattern character. It matches 3771590Srgrimes * any single character. 3781590Srgrimes */ 3791590Srgrimes if (*pattern == '?') 3801590Srgrimes goto thisCharOK; 3811590Srgrimes /* 3821590Srgrimes * Check for a "[" as the next pattern character. It is 3831590Srgrimes * followed by a list of characters that are acceptable, or 3841590Srgrimes * by a range (two characters separated by "-"). 3851590Srgrimes */ 3861590Srgrimes if (*pattern == '[') { 3871590Srgrimes ++pattern; 3881590Srgrimes for (;;) { 3891590Srgrimes if ((*pattern == ']') || (*pattern == 0)) 390138232Sharti return (0); 3911590Srgrimes if (*pattern == *string) 3921590Srgrimes break; 3931590Srgrimes if (pattern[1] == '-') { 3941590Srgrimes c2 = pattern[2]; 3951590Srgrimes if (c2 == 0) 396138232Sharti return (0); 3971590Srgrimes if ((*pattern <= *string) && 3981590Srgrimes (c2 >= *string)) 3991590Srgrimes break; 4001590Srgrimes if ((*pattern >= *string) && 4011590Srgrimes (c2 <= *string)) 4021590Srgrimes break; 4031590Srgrimes pattern += 2; 4041590Srgrimes } 4051590Srgrimes ++pattern; 4061590Srgrimes } 4071590Srgrimes while ((*pattern != ']') && (*pattern != 0)) 4081590Srgrimes ++pattern; 4091590Srgrimes goto thisCharOK; 4101590Srgrimes } 4111590Srgrimes /* 4121590Srgrimes * If the next pattern character is '/', just strip off the 4131590Srgrimes * '/' so we do exact matching on the character that follows. 4141590Srgrimes */ 4151590Srgrimes if (*pattern == '\\') { 4161590Srgrimes ++pattern; 4171590Srgrimes if (*pattern == 0) 418138232Sharti return (0); 4191590Srgrimes } 4201590Srgrimes /* 4211590Srgrimes * There's no special character. Just make sure that the 4221590Srgrimes * next characters of each string match. 4231590Srgrimes */ 4241590Srgrimes if (*pattern != *string) 425138232Sharti return (0); 4261590SrgrimesthisCharOK: ++pattern; 4271590Srgrimes ++string; 4281590Srgrimes } 4291590Srgrimes} 4301590Srgrimes 4311590Srgrimes 4321590Srgrimes/*- 4331590Srgrimes *----------------------------------------------------------------------- 4341590Srgrimes * Str_SYSVMatch -- 4358874Srgrimes * Check word against pattern for a match (% is wild), 4368874Srgrimes * 4371590Srgrimes * Results: 4381590Srgrimes * Returns the beginning position of a match or null. The number 4391590Srgrimes * of characters matched is returned in len. 4401590Srgrimes * 4411590Srgrimes * Side Effects: 4421590Srgrimes * None 4431590Srgrimes * 4441590Srgrimes *----------------------------------------------------------------------- 4451590Srgrimes */ 446106106Sjmallettconst char * 447106106SjmallettStr_SYSVMatch(const char *word, const char *pattern, int *len) 4481590Srgrimes{ 449106106Sjmallett const char *m, *p, *w; 4501590Srgrimes 451106106Sjmallett p = pattern; 452106106Sjmallett w = word; 453106106Sjmallett 45496071Sjmallett if (*w == '\0') { 45596071Sjmallett /* Zero-length word cannot be matched against */ 45696071Sjmallett *len = 0; 457138232Sharti return (NULL); 45896071Sjmallett } 45996071Sjmallett 4601590Srgrimes if (*p == '\0') { 4611590Srgrimes /* Null pattern is the whole string */ 4621590Srgrimes *len = strlen(w); 463138232Sharti return (w); 4641590Srgrimes } 4651590Srgrimes 4661590Srgrimes if ((m = strchr(p, '%')) != NULL) { 4671590Srgrimes /* check that the prefix matches */ 4681590Srgrimes for (; p != m && *w && *w == *p; w++, p++) 4691590Srgrimes continue; 4701590Srgrimes 4711590Srgrimes if (p != m) 472138232Sharti return (NULL); /* No match */ 4731590Srgrimes 4741590Srgrimes if (*++p == '\0') { 4751590Srgrimes /* No more pattern, return the rest of the string */ 4761590Srgrimes *len = strlen(w); 477138232Sharti return (w); 4781590Srgrimes } 4791590Srgrimes } 4801590Srgrimes 4811590Srgrimes m = w; 4821590Srgrimes 4831590Srgrimes /* Find a matching tail */ 4841590Srgrimes do 4851590Srgrimes if (strcmp(p, w) == 0) { 4861590Srgrimes *len = w - m; 487138232Sharti return (m); 4881590Srgrimes } 4891590Srgrimes while (*w++ != '\0'); 4908874Srgrimes 491138232Sharti return (NULL); 4921590Srgrimes} 4931590Srgrimes 4941590Srgrimes 4951590Srgrimes/*- 4961590Srgrimes *----------------------------------------------------------------------- 4971590Srgrimes * Str_SYSVSubst -- 4981590Srgrimes * Substitute '%' on the pattern with len characters from src. 4991590Srgrimes * If the pattern does not contain a '%' prepend len characters 5001590Srgrimes * from src. 5018874Srgrimes * 5021590Srgrimes * Results: 5031590Srgrimes * None 5041590Srgrimes * 5051590Srgrimes * Side Effects: 5061590Srgrimes * Places result on buf 5071590Srgrimes * 5081590Srgrimes *----------------------------------------------------------------------- 5091590Srgrimes */ 5101590Srgrimesvoid 511141133ShartiStr_SYSVSubst(Buffer *buf, const char *pat, const char *src, int len) 5121590Srgrimes{ 513106106Sjmallett const char *m; 5141590Srgrimes 5151590Srgrimes if ((m = strchr(pat, '%')) != NULL) { 5161590Srgrimes /* Copy the prefix */ 517138548Sharti Buf_AddBytes(buf, m - pat, (const Byte *)pat); 5181590Srgrimes /* skip the % */ 5191590Srgrimes pat = m + 1; 5201590Srgrimes } 5211590Srgrimes 5221590Srgrimes /* Copy the pattern */ 523138548Sharti Buf_AddBytes(buf, len, (const Byte *)src); 5241590Srgrimes 5251590Srgrimes /* append the rest */ 526138548Sharti Buf_AddBytes(buf, strlen(pat), (const Byte *)pat); 5271590Srgrimes} 528