str.c revision 104696
11590Srgrimes/*- 21590Srgrimes * Copyright (c) 1988, 1989, 1990, 1993 31590Srgrimes * The Regents of the University of California. All rights reserved. 41590Srgrimes * Copyright (c) 1989 by Berkeley Softworks 51590Srgrimes * All rights reserved. 61590Srgrimes * 71590Srgrimes * This code is derived from software contributed to Berkeley by 81590Srgrimes * Adam de Boor. 91590Srgrimes * 101590Srgrimes * Redistribution and use in source and binary forms, with or without 111590Srgrimes * modification, are permitted provided that the following conditions 121590Srgrimes * are met: 131590Srgrimes * 1. Redistributions of source code must retain the above copyright 141590Srgrimes * notice, this list of conditions and the following disclaimer. 151590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 161590Srgrimes * notice, this list of conditions and the following disclaimer in the 171590Srgrimes * documentation and/or other materials provided with the distribution. 181590Srgrimes * 3. All advertising materials mentioning features or use of this software 191590Srgrimes * must display the following acknowledgement: 201590Srgrimes * This product includes software developed by the University of 211590Srgrimes * California, Berkeley and its contributors. 221590Srgrimes * 4. Neither the name of the University nor the names of its contributors 231590Srgrimes * may be used to endorse or promote products derived from this software 241590Srgrimes * without specific prior written permission. 251590Srgrimes * 261590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 271590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 281590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 291590Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 301590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 311590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 321590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 331590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 341590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 351590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 361590Srgrimes * SUCH DAMAGE. 3762833Swsanchez * 3862833Swsanchez * @(#)str.c 5.8 (Berkeley) 6/1/90 391590Srgrimes */ 401590Srgrimes 4162833Swsanchez#include <sys/cdefs.h> 4294587Sobrien__FBSDID("$FreeBSD: head/usr.bin/make/str.c 104696 2002-10-09 03:42:10Z jmallett $"); 431590Srgrimes 441590Srgrimes#include "make.h" 451590Srgrimes 465814Sjkhstatic char **argv, *buffer; 475814Sjkhstatic int argmax, curlen; 485814Sjkh 495814Sjkh/* 505814Sjkh * str_init -- 515814Sjkh * Initialize the strings package 525814Sjkh * 535814Sjkh */ 545814Sjkhvoid 55104696Sjmallettstr_init(void) 565814Sjkh{ 575814Sjkh char *p1; 5818730Ssteve argv = (char **)emalloc(((argmax = 50) + 1) * sizeof(char *)); 595814Sjkh argv[0] = Var_Value(".MAKE", VAR_GLOBAL, &p1); 605814Sjkh} 615814Sjkh 625814Sjkh 635814Sjkh/* 645814Sjkh * str_end -- 655814Sjkh * Cleanup the strings package 665814Sjkh * 675814Sjkh */ 685814Sjkhvoid 69104696Sjmallettstr_end(void) 705814Sjkh{ 719254Sache if (argv) { 729254Sache if (argv[0]) 739254Sache free(argv[0]); 7469531Swill free(argv); 759254Sache } 765814Sjkh if (buffer) 775814Sjkh free(buffer); 785814Sjkh} 795814Sjkh 801590Srgrimes/*- 811590Srgrimes * str_concat -- 821590Srgrimes * concatenate the two strings, inserting a space or slash between them, 831590Srgrimes * freeing them if requested. 841590Srgrimes * 851590Srgrimes * returns -- 861590Srgrimes * the resulting string in allocated space. 871590Srgrimes */ 881590Srgrimeschar * 89104696Sjmallettstr_concat(char *s1, char *s2, int flags) 901590Srgrimes{ 9194584Sobrien int len1, len2; 9294584Sobrien char *result; 931590Srgrimes 941590Srgrimes /* get the length of both strings */ 9594638Sobrien len1 = strlen(s1); 9694638Sobrien len2 = strlen(s2); 971590Srgrimes 981590Srgrimes /* allocate length plus separator plus EOS */ 991590Srgrimes result = emalloc((u_int)(len1 + len2 + 2)); 1001590Srgrimes 1011590Srgrimes /* copy first string into place */ 10294638Sobrien memcpy(result, s1, len1); 1031590Srgrimes 1041590Srgrimes /* add separator character */ 10594638Sobrien if (flags & STR_ADDSPACE) { 10694638Sobrien result[len1] = ' '; 10794638Sobrien ++len1; 10894638Sobrien } else if (flags & STR_ADDSLASH) { 10994638Sobrien result[len1] = '/'; 11094638Sobrien ++len1; 1111590Srgrimes } 1121590Srgrimes 11394638Sobrien /* copy second string plus EOS into place */ 11494638Sobrien memcpy(result + len1, s2, len2 + 1); 1151590Srgrimes 1161590Srgrimes /* free original strings */ 1171590Srgrimes if (flags & STR_DOFREE) { 11897123Sjmallett (void)efree(s1); 11997123Sjmallett (void)efree(s2); 1201590Srgrimes } 1211590Srgrimes return(result); 1221590Srgrimes} 1231590Srgrimes 1241590Srgrimes/*- 1251590Srgrimes * brk_string -- 1261590Srgrimes * Fracture a string into an array of words (as delineated by tabs or 1271590Srgrimes * spaces) taking quotation marks into account. Leading tabs/spaces 1281590Srgrimes * are ignored. 1291590Srgrimes * 1301590Srgrimes * returns -- 1311590Srgrimes * Pointer to the array of pointers to the words. To make life easier, 1321590Srgrimes * the first word is always the value of the .MAKE variable. 1331590Srgrimes */ 1341590Srgrimeschar ** 135104696Sjmallettbrk_string(char *str, int *store_argc, Boolean expand) 1361590Srgrimes{ 13794584Sobrien int argc, ch; 13894584Sobrien char inquote, *p, *start, *t; 1391590Srgrimes int len; 1401590Srgrimes 1411590Srgrimes /* skip leading space chars. */ 1421590Srgrimes for (; *str == ' ' || *str == '\t'; ++str) 1431590Srgrimes continue; 1441590Srgrimes 1451590Srgrimes /* allocate room for a copy of the string */ 1465814Sjkh if ((len = strlen(str) + 1) > curlen) { 1475814Sjkh if (buffer) 1485814Sjkh free(buffer); 1495814Sjkh buffer = emalloc(curlen = len); 1505814Sjkh } 1511590Srgrimes 1521590Srgrimes /* 1531590Srgrimes * copy the string; at the same time, parse backslashes, 1541590Srgrimes * quotes and build the argument list. 1551590Srgrimes */ 1561590Srgrimes argc = 1; 1571590Srgrimes inquote = '\0'; 1585814Sjkh for (p = str, start = t = buffer;; ++p) { 1591590Srgrimes switch(ch = *p) { 1601590Srgrimes case '"': 1611590Srgrimes case '\'': 16249938Shoek if (inquote) { 1631590Srgrimes if (inquote == ch) 1641590Srgrimes inquote = '\0'; 1651590Srgrimes else 1661590Srgrimes break; 16749938Shoek } else { 1681590Srgrimes inquote = (char) ch; 1695814Sjkh /* Don't miss "" or '' */ 1705814Sjkh if (start == NULL && p[1] == inquote) { 1715814Sjkh start = t + 1; 1725814Sjkh break; 1735814Sjkh } 1742266Ssef } 1755814Sjkh if (!expand) { 1765814Sjkh if (!start) 1775814Sjkh start = t; 1785814Sjkh *t++ = ch; 1795814Sjkh } 1805814Sjkh continue; 1811590Srgrimes case ' ': 1821590Srgrimes case '\t': 1835814Sjkh case '\n': 1841590Srgrimes if (inquote) 1851590Srgrimes break; 1861590Srgrimes if (!start) 1871590Srgrimes continue; 1881590Srgrimes /* FALLTHROUGH */ 1891590Srgrimes case '\0': 1901590Srgrimes /* 1911590Srgrimes * end of a token -- make sure there's enough argv 1921590Srgrimes * space and save off a pointer. 1931590Srgrimes */ 1945814Sjkh if (!start) 1955814Sjkh goto done; 1965814Sjkh 1971590Srgrimes *t++ = '\0'; 1981590Srgrimes if (argc == argmax) { 1991590Srgrimes argmax *= 2; /* ramp up fast */ 20018730Ssteve argv = (char **)erealloc(argv, 20118730Ssteve (argmax + 1) * sizeof(char *)); 2021590Srgrimes } 2031590Srgrimes argv[argc++] = start; 2041590Srgrimes start = (char *)NULL; 2051590Srgrimes if (ch == '\n' || ch == '\0') 2061590Srgrimes goto done; 2071590Srgrimes continue; 2081590Srgrimes case '\\': 2095814Sjkh if (!expand) { 2105814Sjkh if (!start) 2115814Sjkh start = t; 2125814Sjkh *t++ = '\\'; 2135814Sjkh ch = *++p; 2145814Sjkh break; 2155814Sjkh } 2168874Srgrimes 2171590Srgrimes switch (ch = *++p) { 2181590Srgrimes case '\0': 2191590Srgrimes case '\n': 2201590Srgrimes /* hmmm; fix it up as best we can */ 2211590Srgrimes ch = '\\'; 2221590Srgrimes --p; 2231590Srgrimes break; 2241590Srgrimes case 'b': 2251590Srgrimes ch = '\b'; 2261590Srgrimes break; 2271590Srgrimes case 'f': 2281590Srgrimes ch = '\f'; 2291590Srgrimes break; 2301590Srgrimes case 'n': 2311590Srgrimes ch = '\n'; 2321590Srgrimes break; 2331590Srgrimes case 'r': 2341590Srgrimes ch = '\r'; 2351590Srgrimes break; 2361590Srgrimes case 't': 2371590Srgrimes ch = '\t'; 2381590Srgrimes break; 239104108Sjmallett default: 240104108Sjmallett break; 2411590Srgrimes } 2421590Srgrimes break; 243104108Sjmallett default: 244104108Sjmallett break; 2451590Srgrimes } 2461590Srgrimes if (!start) 2471590Srgrimes start = t; 2481590Srgrimes *t++ = (char) ch; 2491590Srgrimes } 2501590Srgrimesdone: argv[argc] = (char *)NULL; 2511590Srgrimes *store_argc = argc; 2521590Srgrimes return(argv); 2531590Srgrimes} 2541590Srgrimes 2551590Srgrimes/* 2561590Srgrimes * Str_FindSubstring -- See if a string contains a particular substring. 2578874Srgrimes * 2581590Srgrimes * Results: If string contains substring, the return value is the location of 2591590Srgrimes * the first matching instance of substring in string. If string doesn't 2601590Srgrimes * contain substring, the return value is NULL. Matching is done on an exact 2611590Srgrimes * character-for-character basis with no wildcards or special characters. 2628874Srgrimes * 2631590Srgrimes * Side effects: None. 264104696Sjmallett * 265104696Sjmallett * XXX should be strstr(3). 2661590Srgrimes */ 2671590Srgrimeschar * 268104696SjmallettStr_FindSubstring(char *string, char *substring) 2691590Srgrimes{ 27094584Sobrien char *a, *b; 2711590Srgrimes 2721590Srgrimes /* 2731590Srgrimes * First scan quickly through the two strings looking for a single- 2741590Srgrimes * character match. When it's found, then compare the rest of the 2751590Srgrimes * substring. 2761590Srgrimes */ 2771590Srgrimes 2781590Srgrimes for (b = substring; *string != 0; string += 1) { 2791590Srgrimes if (*string != *b) 2801590Srgrimes continue; 2811590Srgrimes a = string; 2821590Srgrimes for (;;) { 2831590Srgrimes if (*b == 0) 2841590Srgrimes return(string); 2851590Srgrimes if (*a++ != *b++) 2861590Srgrimes break; 2871590Srgrimes } 2881590Srgrimes b = substring; 2891590Srgrimes } 2901590Srgrimes return((char *) NULL); 2911590Srgrimes} 2921590Srgrimes 2931590Srgrimes/* 2941590Srgrimes * Str_Match -- 2958874Srgrimes * 2961590Srgrimes * See if a particular string matches a particular pattern. 2978874Srgrimes * 2981590Srgrimes * Results: Non-zero is returned if string matches pattern, 0 otherwise. The 2991590Srgrimes * matching operation permits the following special characters in the 3001590Srgrimes * pattern: *?\[] (see the man page for details on what these mean). 3018874Srgrimes * 3021590Srgrimes * Side effects: None. 3031590Srgrimes */ 3041590Srgrimesint 305104696SjmallettStr_Match(char *string, char *pattern) 3061590Srgrimes{ 3071590Srgrimes char c2; 3081590Srgrimes 3091590Srgrimes for (;;) { 3101590Srgrimes /* 3111590Srgrimes * See if we're at the end of both the pattern and the 3121590Srgrimes * string. If, we succeeded. If we're at the end of the 3131590Srgrimes * pattern but not at the end of the string, we failed. 3141590Srgrimes */ 3151590Srgrimes if (*pattern == 0) 3161590Srgrimes return(!*string); 3171590Srgrimes if (*string == 0 && *pattern != '*') 3181590Srgrimes return(0); 3191590Srgrimes /* 3201590Srgrimes * Check for a "*" as the next pattern character. It matches 3211590Srgrimes * any substring. We handle this by calling ourselves 3221590Srgrimes * recursively for each postfix of string, until either we 3231590Srgrimes * match or we reach the end of the string. 3241590Srgrimes */ 3251590Srgrimes if (*pattern == '*') { 3261590Srgrimes pattern += 1; 3271590Srgrimes if (*pattern == 0) 3281590Srgrimes return(1); 3291590Srgrimes while (*string != 0) { 3301590Srgrimes if (Str_Match(string, pattern)) 3311590Srgrimes return(1); 3321590Srgrimes ++string; 3331590Srgrimes } 3341590Srgrimes return(0); 3351590Srgrimes } 3361590Srgrimes /* 3371590Srgrimes * Check for a "?" as the next pattern character. It matches 3381590Srgrimes * any single character. 3391590Srgrimes */ 3401590Srgrimes if (*pattern == '?') 3411590Srgrimes goto thisCharOK; 3421590Srgrimes /* 3431590Srgrimes * Check for a "[" as the next pattern character. It is 3441590Srgrimes * followed by a list of characters that are acceptable, or 3451590Srgrimes * by a range (two characters separated by "-"). 3461590Srgrimes */ 3471590Srgrimes if (*pattern == '[') { 3481590Srgrimes ++pattern; 3491590Srgrimes for (;;) { 3501590Srgrimes if ((*pattern == ']') || (*pattern == 0)) 3511590Srgrimes return(0); 3521590Srgrimes if (*pattern == *string) 3531590Srgrimes break; 3541590Srgrimes if (pattern[1] == '-') { 3551590Srgrimes c2 = pattern[2]; 3561590Srgrimes if (c2 == 0) 3571590Srgrimes return(0); 3581590Srgrimes if ((*pattern <= *string) && 3591590Srgrimes (c2 >= *string)) 3601590Srgrimes break; 3611590Srgrimes if ((*pattern >= *string) && 3621590Srgrimes (c2 <= *string)) 3631590Srgrimes break; 3641590Srgrimes pattern += 2; 3651590Srgrimes } 3661590Srgrimes ++pattern; 3671590Srgrimes } 3681590Srgrimes while ((*pattern != ']') && (*pattern != 0)) 3691590Srgrimes ++pattern; 3701590Srgrimes goto thisCharOK; 3711590Srgrimes } 3721590Srgrimes /* 3731590Srgrimes * If the next pattern character is '/', just strip off the 3741590Srgrimes * '/' so we do exact matching on the character that follows. 3751590Srgrimes */ 3761590Srgrimes if (*pattern == '\\') { 3771590Srgrimes ++pattern; 3781590Srgrimes if (*pattern == 0) 3791590Srgrimes return(0); 3801590Srgrimes } 3811590Srgrimes /* 3821590Srgrimes * There's no special character. Just make sure that the 3831590Srgrimes * next characters of each string match. 3841590Srgrimes */ 3851590Srgrimes if (*pattern != *string) 3861590Srgrimes return(0); 3871590SrgrimesthisCharOK: ++pattern; 3881590Srgrimes ++string; 3891590Srgrimes } 3901590Srgrimes} 3911590Srgrimes 3921590Srgrimes 3931590Srgrimes/*- 3941590Srgrimes *----------------------------------------------------------------------- 3951590Srgrimes * Str_SYSVMatch -- 3968874Srgrimes * Check word against pattern for a match (% is wild), 3978874Srgrimes * 3981590Srgrimes * Results: 3991590Srgrimes * Returns the beginning position of a match or null. The number 4001590Srgrimes * of characters matched is returned in len. 4011590Srgrimes * 4021590Srgrimes * Side Effects: 4031590Srgrimes * None 4041590Srgrimes * 4051590Srgrimes *----------------------------------------------------------------------- 4061590Srgrimes */ 4071590Srgrimeschar * 408104696SjmallettStr_SYSVMatch(char *word, char *pattern, int *len) 4091590Srgrimes{ 4101590Srgrimes char *p = pattern; 4111590Srgrimes char *w = word; 4121590Srgrimes char *m; 4131590Srgrimes 41496071Sjmallett if (*w == '\0') { 41596071Sjmallett /* Zero-length word cannot be matched against */ 41696071Sjmallett *len = 0; 41796071Sjmallett return NULL; 41896071Sjmallett } 41996071Sjmallett 4201590Srgrimes if (*p == '\0') { 4211590Srgrimes /* Null pattern is the whole string */ 4221590Srgrimes *len = strlen(w); 4231590Srgrimes return w; 4241590Srgrimes } 4251590Srgrimes 4261590Srgrimes if ((m = strchr(p, '%')) != NULL) { 4271590Srgrimes /* check that the prefix matches */ 4281590Srgrimes for (; p != m && *w && *w == *p; w++, p++) 4291590Srgrimes continue; 4301590Srgrimes 4311590Srgrimes if (p != m) 4321590Srgrimes return NULL; /* No match */ 4331590Srgrimes 4341590Srgrimes if (*++p == '\0') { 4351590Srgrimes /* No more pattern, return the rest of the string */ 4361590Srgrimes *len = strlen(w); 4371590Srgrimes return w; 4381590Srgrimes } 4391590Srgrimes } 4401590Srgrimes 4411590Srgrimes m = w; 4421590Srgrimes 4431590Srgrimes /* Find a matching tail */ 4441590Srgrimes do 4451590Srgrimes if (strcmp(p, w) == 0) { 4461590Srgrimes *len = w - m; 4471590Srgrimes return m; 4481590Srgrimes } 4491590Srgrimes while (*w++ != '\0'); 4508874Srgrimes 4511590Srgrimes return NULL; 4521590Srgrimes} 4531590Srgrimes 4541590Srgrimes 4551590Srgrimes/*- 4561590Srgrimes *----------------------------------------------------------------------- 4571590Srgrimes * Str_SYSVSubst -- 4581590Srgrimes * Substitute '%' on the pattern with len characters from src. 4591590Srgrimes * If the pattern does not contain a '%' prepend len characters 4601590Srgrimes * from src. 4618874Srgrimes * 4621590Srgrimes * Results: 4631590Srgrimes * None 4641590Srgrimes * 4651590Srgrimes * Side Effects: 4661590Srgrimes * Places result on buf 4671590Srgrimes * 4681590Srgrimes *----------------------------------------------------------------------- 4691590Srgrimes */ 4701590Srgrimesvoid 471104696SjmallettStr_SYSVSubst(Buffer buf, char *pat, char *src, int len) 4721590Srgrimes{ 4731590Srgrimes char *m; 4741590Srgrimes 4751590Srgrimes if ((m = strchr(pat, '%')) != NULL) { 4761590Srgrimes /* Copy the prefix */ 4771590Srgrimes Buf_AddBytes(buf, m - pat, (Byte *) pat); 4781590Srgrimes /* skip the % */ 4791590Srgrimes pat = m + 1; 4801590Srgrimes } 4811590Srgrimes 4821590Srgrimes /* Copy the pattern */ 4831590Srgrimes Buf_AddBytes(buf, len, (Byte *) src); 4841590Srgrimes 4851590Srgrimes /* append the rest */ 4861590Srgrimes Buf_AddBytes(buf, strlen(pat), (Byte *) pat); 4871590Srgrimes} 488