str.c revision 144479
11590Srgrimes/*- 21590Srgrimes * Copyright (c) 1988, 1989, 1990, 1993 31590Srgrimes * The Regents of the University of California. All rights reserved. 41590Srgrimes * Copyright (c) 1989 by Berkeley Softworks 51590Srgrimes * All rights reserved. 61590Srgrimes * 71590Srgrimes * This code is derived from software contributed to Berkeley by 81590Srgrimes * Adam de Boor. 91590Srgrimes * 101590Srgrimes * Redistribution and use in source and binary forms, with or without 111590Srgrimes * modification, are permitted provided that the following conditions 121590Srgrimes * are met: 131590Srgrimes * 1. Redistributions of source code must retain the above copyright 141590Srgrimes * notice, this list of conditions and the following disclaimer. 151590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 161590Srgrimes * notice, this list of conditions and the following disclaimer in the 171590Srgrimes * documentation and/or other materials provided with the distribution. 181590Srgrimes * 3. All advertising materials mentioning features or use of this software 191590Srgrimes * must display the following acknowledgement: 201590Srgrimes * This product includes software developed by the University of 211590Srgrimes * California, Berkeley and its contributors. 221590Srgrimes * 4. Neither the name of the University nor the names of its contributors 231590Srgrimes * may be used to endorse or promote products derived from this software 241590Srgrimes * without specific prior written permission. 251590Srgrimes * 261590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 271590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 281590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 291590Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 301590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 311590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 321590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 331590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 341590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 351590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 361590Srgrimes * SUCH DAMAGE. 3762833Swsanchez * 3862833Swsanchez * @(#)str.c 5.8 (Berkeley) 6/1/90 391590Srgrimes */ 401590Srgrimes 4162833Swsanchez#include <sys/cdefs.h> 4294587Sobrien__FBSDID("$FreeBSD: head/usr.bin/make/str.c 144479 2005-04-01 13:06:05Z harti $"); 431590Srgrimes 44141104Sharti#include <ctype.h> 45141104Sharti#include <stdlib.h> 46141104Sharti#include <string.h> 471590Srgrimes 48141133Sharti#include "buf.h" 49141104Sharti#include "globals.h" 50141104Sharti#include "str.h" 51141104Sharti#include "util.h" 52141104Sharti#include "var.h" 53141104Sharti 545814Sjkhstatic char **argv, *buffer; 555814Sjkhstatic int argmax, curlen; 565814Sjkh 575814Sjkh/* 585814Sjkh * str_init -- 595814Sjkh * Initialize the strings package 605814Sjkh * 615814Sjkh */ 625814Sjkhvoid 63104696Sjmallettstr_init(void) 645814Sjkh{ 655814Sjkh char *p1; 66138232Sharti 67138264Sharti argv = emalloc(((argmax = 50) + 1) * sizeof(char *)); 685814Sjkh argv[0] = Var_Value(".MAKE", VAR_GLOBAL, &p1); 695814Sjkh} 705814Sjkh 711590Srgrimes/*- 721590Srgrimes * str_concat -- 73138547Sharti * concatenate the two strings, inserting a space or slash between them. 741590Srgrimes * 751590Srgrimes * returns -- 761590Srgrimes * the resulting string in allocated space. 771590Srgrimes */ 781590Srgrimeschar * 79138547Shartistr_concat(const char *s1, const char *s2, int flags) 801590Srgrimes{ 8194584Sobrien int len1, len2; 8294584Sobrien char *result; 831590Srgrimes 841590Srgrimes /* get the length of both strings */ 8594638Sobrien len1 = strlen(s1); 8694638Sobrien len2 = strlen(s2); 871590Srgrimes 881590Srgrimes /* allocate length plus separator plus EOS */ 89138264Sharti result = emalloc(len1 + len2 + 2); 901590Srgrimes 911590Srgrimes /* copy first string into place */ 9294638Sobrien memcpy(result, s1, len1); 931590Srgrimes 941590Srgrimes /* add separator character */ 9594638Sobrien if (flags & STR_ADDSPACE) { 9694638Sobrien result[len1] = ' '; 9794638Sobrien ++len1; 9894638Sobrien } else if (flags & STR_ADDSLASH) { 9994638Sobrien result[len1] = '/'; 10094638Sobrien ++len1; 1011590Srgrimes } 1021590Srgrimes 10394638Sobrien /* copy second string plus EOS into place */ 10494638Sobrien memcpy(result + len1, s2, len2 + 1); 1051590Srgrimes 106138232Sharti return (result); 1071590Srgrimes} 1081590Srgrimes 1091590Srgrimes/*- 1101590Srgrimes * brk_string -- 1111590Srgrimes * Fracture a string into an array of words (as delineated by tabs or 1121590Srgrimes * spaces) taking quotation marks into account. Leading tabs/spaces 1131590Srgrimes * are ignored. 1141590Srgrimes * 1151590Srgrimes * returns -- 1161590Srgrimes * Pointer to the array of pointers to the words. To make life easier, 1171590Srgrimes * the first word is always the value of the .MAKE variable. 1181590Srgrimes */ 1191590Srgrimeschar ** 120143253Shartibrk_string(const char *str, int *store_argc, Boolean expand) 1211590Srgrimes{ 12294584Sobrien int argc, ch; 123143253Sharti char inquote; 124143253Sharti const char *p; 125143253Sharti char *start, *t; 1261590Srgrimes int len; 1271590Srgrimes 1281590Srgrimes /* skip leading space chars. */ 1291590Srgrimes for (; *str == ' ' || *str == '\t'; ++str) 1301590Srgrimes continue; 1311590Srgrimes 1321590Srgrimes /* allocate room for a copy of the string */ 1335814Sjkh if ((len = strlen(str) + 1) > curlen) { 1345814Sjkh if (buffer) 1355814Sjkh free(buffer); 1365814Sjkh buffer = emalloc(curlen = len); 1375814Sjkh } 1381590Srgrimes 1391590Srgrimes /* 1401590Srgrimes * copy the string; at the same time, parse backslashes, 1411590Srgrimes * quotes and build the argument list. 1421590Srgrimes */ 1431590Srgrimes argc = 1; 1441590Srgrimes inquote = '\0'; 1455814Sjkh for (p = str, start = t = buffer;; ++p) { 1461590Srgrimes switch(ch = *p) { 1471590Srgrimes case '"': 1481590Srgrimes case '\'': 14949938Shoek if (inquote) { 150124966Sru if (ch != inquote) 1511590Srgrimes break; 152124966Sru inquote = '\0'; 153124966Sru /* Don't miss "" or '' */ 154124966Sru if (!start) 155124966Sru start = t; 156124840Sru } else 157138264Sharti inquote = (char)ch; 158124840Sru if (expand) 159124840Sru continue; 160124840Sru break; 1611590Srgrimes case ' ': 1621590Srgrimes case '\t': 1635814Sjkh case '\n': 1641590Srgrimes if (inquote) 1651590Srgrimes break; 1661590Srgrimes if (!start) 1671590Srgrimes continue; 1681590Srgrimes /* FALLTHROUGH */ 1691590Srgrimes case '\0': 1701590Srgrimes /* 1711590Srgrimes * end of a token -- make sure there's enough argv 1721590Srgrimes * space and save off a pointer. 1731590Srgrimes */ 1745814Sjkh if (!start) 1755814Sjkh goto done; 1765814Sjkh 1771590Srgrimes *t++ = '\0'; 1781590Srgrimes if (argc == argmax) { 1791590Srgrimes argmax *= 2; /* ramp up fast */ 180138264Sharti argv = erealloc(argv, 18118730Ssteve (argmax + 1) * sizeof(char *)); 1821590Srgrimes } 1831590Srgrimes argv[argc++] = start; 184138264Sharti start = NULL; 1851590Srgrimes if (ch == '\n' || ch == '\0') 1861590Srgrimes goto done; 1871590Srgrimes continue; 1881590Srgrimes case '\\': 1895814Sjkh if (!expand) { 1905814Sjkh if (!start) 1915814Sjkh start = t; 1925814Sjkh *t++ = '\\'; 1935814Sjkh ch = *++p; 1945814Sjkh break; 1955814Sjkh } 1968874Srgrimes 1971590Srgrimes switch (ch = *++p) { 1981590Srgrimes case '\0': 1991590Srgrimes case '\n': 2001590Srgrimes /* hmmm; fix it up as best we can */ 2011590Srgrimes ch = '\\'; 2021590Srgrimes --p; 2031590Srgrimes break; 2041590Srgrimes case 'b': 2051590Srgrimes ch = '\b'; 2061590Srgrimes break; 2071590Srgrimes case 'f': 2081590Srgrimes ch = '\f'; 2091590Srgrimes break; 2101590Srgrimes case 'n': 2111590Srgrimes ch = '\n'; 2121590Srgrimes break; 2131590Srgrimes case 'r': 2141590Srgrimes ch = '\r'; 2151590Srgrimes break; 2161590Srgrimes case 't': 2171590Srgrimes ch = '\t'; 2181590Srgrimes break; 219104108Sjmallett default: 220104108Sjmallett break; 2211590Srgrimes } 2221590Srgrimes break; 223104108Sjmallett default: 224104108Sjmallett break; 2251590Srgrimes } 2261590Srgrimes if (!start) 2271590Srgrimes start = t; 228138232Sharti *t++ = (char)ch; 2291590Srgrimes } 230138264Shartidone: argv[argc] = NULL; 231141269Sharti if (store_argc != NULL) 232141269Sharti *store_argc = argc; 233138232Sharti return (argv); 2341590Srgrimes} 2351590Srgrimes 2361590Srgrimes/* 237140870Sharti * Quote a string for appending it to MAKEFLAGS. According to Posix the 238140870Sharti * kind of quoting here is implementation-defined. This quoting must ensure 239140870Sharti * that the parsing of MAKEFLAGS's contents in a sub-shell yields the same 240140870Sharti * options, option arguments and macro definitions as in the calling make. 241140870Sharti * We simply quote all blanks, which according to Posix are space and tab 242140870Sharti * in the POSIX locale. Don't use isblank because in that case makes with 243140870Sharti * different locale settings could not communicate. We must also quote 244140870Sharti * backslashes obviously. 245140870Sharti */ 246140870Shartichar * 247140870ShartiMAKEFLAGS_quote(const char *str) 248140870Sharti{ 249140870Sharti char *ret, *q; 250140870Sharti const char *p; 251140870Sharti 252140870Sharti /* assume worst case - everything has to be quoted */ 253140870Sharti ret = emalloc(strlen(str) * 2 + 1); 254140870Sharti 255140870Sharti p = str; 256140870Sharti q = ret; 257140870Sharti while (*p != '\0') { 258140870Sharti switch (*p) { 259140870Sharti 260140870Sharti case ' ': 261140870Sharti case '\t': 262140870Sharti *q++ = '\\'; 263140870Sharti break; 264140870Sharti 265140870Sharti default: 266140870Sharti break; 267140870Sharti } 268140870Sharti *q++ = *p++; 269140870Sharti } 270140870Sharti *q++ = '\0'; 271140870Sharti return (ret); 272140870Sharti} 273140870Sharti 274140870Shartichar ** 275140870ShartiMAKEFLAGS_break(const char *str, int *pargc) 276140870Sharti{ 277140870Sharti char *q, *start; 278140870Sharti int len; 279140870Sharti 280140870Sharti /* allocate room for a copy of the string */ 281140870Sharti if ((len = strlen(str) + 1) > curlen) 282140870Sharti buffer = erealloc(buffer, curlen = len); 283140870Sharti 284140870Sharti start = NULL; 285140870Sharti *pargc = 1; 286140870Sharti 287140870Sharti for (q = buffer;;) { 288140870Sharti switch (*str) { 289140870Sharti case ' ': 290140870Sharti case '\t': 291140870Sharti /* word separator */ 292140870Sharti if (start == NULL) { 293140870Sharti /* not in a word */ 294140870Sharti str++; 295140870Sharti continue; 296140870Sharti } 297140870Sharti /* FALLTHRU */ 298140870Sharti case '\0': 299140870Sharti if (start == NULL) 300140870Sharti goto done; 301140870Sharti 302140870Sharti /* finish word */ 303140870Sharti *q++ = '\0'; 304140870Sharti if (argmax == *pargc) { 305140870Sharti argmax *= 2; 306140870Sharti argv = erealloc(argv, 307140870Sharti sizeof(*argv) * (argmax + 1)); 308140870Sharti } 309140870Sharti argv[(*pargc)++] = start; 310140870Sharti start = NULL; 311140870Sharti 312140870Sharti if (*str++ == '\0') 313140870Sharti goto done; 314140870Sharti continue; 315140870Sharti 316140870Sharti case '\\': 317140870Sharti if (str[1] == ' ' || str[1] == '\t') 318140870Sharti /* was a quote */ 319140870Sharti str++; 320140870Sharti break; 321140870Sharti 322140870Sharti default: 323140870Sharti break; 324140870Sharti } 325140870Sharti if (start == NULL) 326140870Sharti /* start of new word */ 327140870Sharti start = q; 328140870Sharti *q++ = *str++; 329140870Sharti } 330140870Sharti done: 331140870Sharti argv[(*pargc)] = NULL; 332140870Sharti return (argv); 333140870Sharti} 334140870Sharti 335140870Sharti/* 3361590Srgrimes * Str_Match -- 3378874Srgrimes * 3381590Srgrimes * See if a particular string matches a particular pattern. 3398874Srgrimes * 3401590Srgrimes * Results: Non-zero is returned if string matches pattern, 0 otherwise. The 3411590Srgrimes * matching operation permits the following special characters in the 3421590Srgrimes * pattern: *?\[] (see the man page for details on what these mean). 3438874Srgrimes * 3441590Srgrimes * Side effects: None. 3451590Srgrimes */ 3461590Srgrimesint 347106106SjmallettStr_Match(const char *string, const char *pattern) 3481590Srgrimes{ 3491590Srgrimes char c2; 3501590Srgrimes 3511590Srgrimes for (;;) { 3521590Srgrimes /* 3531590Srgrimes * See if we're at the end of both the pattern and the 3541590Srgrimes * string. If, we succeeded. If we're at the end of the 3551590Srgrimes * pattern but not at the end of the string, we failed. 3561590Srgrimes */ 3571590Srgrimes if (*pattern == 0) 358138232Sharti return (!*string); 3591590Srgrimes if (*string == 0 && *pattern != '*') 360138232Sharti return (0); 3611590Srgrimes /* 3621590Srgrimes * Check for a "*" as the next pattern character. It matches 3631590Srgrimes * any substring. We handle this by calling ourselves 3641590Srgrimes * recursively for each postfix of string, until either we 3651590Srgrimes * match or we reach the end of the string. 3661590Srgrimes */ 3671590Srgrimes if (*pattern == '*') { 3681590Srgrimes pattern += 1; 3691590Srgrimes if (*pattern == 0) 370138232Sharti return (1); 3711590Srgrimes while (*string != 0) { 3721590Srgrimes if (Str_Match(string, pattern)) 373138232Sharti return (1); 3741590Srgrimes ++string; 3751590Srgrimes } 376138232Sharti return (0); 3771590Srgrimes } 3781590Srgrimes /* 3791590Srgrimes * Check for a "?" as the next pattern character. It matches 3801590Srgrimes * any single character. 3811590Srgrimes */ 3821590Srgrimes if (*pattern == '?') 3831590Srgrimes goto thisCharOK; 3841590Srgrimes /* 3851590Srgrimes * Check for a "[" as the next pattern character. It is 3861590Srgrimes * followed by a list of characters that are acceptable, or 3871590Srgrimes * by a range (two characters separated by "-"). 3881590Srgrimes */ 3891590Srgrimes if (*pattern == '[') { 3901590Srgrimes ++pattern; 3911590Srgrimes for (;;) { 3921590Srgrimes if ((*pattern == ']') || (*pattern == 0)) 393138232Sharti return (0); 3941590Srgrimes if (*pattern == *string) 3951590Srgrimes break; 3961590Srgrimes if (pattern[1] == '-') { 3971590Srgrimes c2 = pattern[2]; 3981590Srgrimes if (c2 == 0) 399138232Sharti return (0); 4001590Srgrimes if ((*pattern <= *string) && 4011590Srgrimes (c2 >= *string)) 4021590Srgrimes break; 4031590Srgrimes if ((*pattern >= *string) && 4041590Srgrimes (c2 <= *string)) 4051590Srgrimes break; 4061590Srgrimes pattern += 2; 4071590Srgrimes } 4081590Srgrimes ++pattern; 4091590Srgrimes } 4101590Srgrimes while ((*pattern != ']') && (*pattern != 0)) 4111590Srgrimes ++pattern; 4121590Srgrimes goto thisCharOK; 4131590Srgrimes } 4141590Srgrimes /* 4151590Srgrimes * If the next pattern character is '/', just strip off the 4161590Srgrimes * '/' so we do exact matching on the character that follows. 4171590Srgrimes */ 4181590Srgrimes if (*pattern == '\\') { 4191590Srgrimes ++pattern; 4201590Srgrimes if (*pattern == 0) 421138232Sharti return (0); 4221590Srgrimes } 4231590Srgrimes /* 4241590Srgrimes * There's no special character. Just make sure that the 4251590Srgrimes * next characters of each string match. 4261590Srgrimes */ 4271590Srgrimes if (*pattern != *string) 428138232Sharti return (0); 4291590SrgrimesthisCharOK: ++pattern; 4301590Srgrimes ++string; 4311590Srgrimes } 4321590Srgrimes} 4331590Srgrimes 4341590Srgrimes 435144479Sharti/** 436144479Sharti * Str_SYSVMatch 4378874Srgrimes * Check word against pattern for a match (% is wild), 4388874Srgrimes * 4391590Srgrimes * Results: 4401590Srgrimes * Returns the beginning position of a match or null. The number 4411590Srgrimes * of characters matched is returned in len. 4421590Srgrimes */ 443106106Sjmallettconst char * 444106106SjmallettStr_SYSVMatch(const char *word, const char *pattern, int *len) 4451590Srgrimes{ 446144479Sharti const char *m, *p, *w; 4471590Srgrimes 448144479Sharti p = pattern; 449144479Sharti w = word; 450106106Sjmallett 451144479Sharti if (*w == '\0') { 452144479Sharti /* Zero-length word cannot be matched against */ 453144479Sharti *len = 0; 454144479Sharti return (NULL); 455144479Sharti } 45696071Sjmallett 457144479Sharti if (*p == '\0') { 458144479Sharti /* Null pattern is the whole string */ 459144479Sharti *len = strlen(w); 460144479Sharti return (w); 461144479Sharti } 4621590Srgrimes 463144479Sharti if ((m = strchr(p, '%')) != NULL) { 464144479Sharti /* check that the prefix matches */ 465144479Sharti for (; p != m && *w && *w == *p; w++, p++) 466144479Sharti continue; 4671590Srgrimes 468144479Sharti if (p != m) 469144479Sharti return (NULL); /* No match */ 4701590Srgrimes 471144479Sharti if (*++p == '\0') { 472144479Sharti /* No more pattern, return the rest of the string */ 473144479Sharti *len = strlen(w); 474144479Sharti return (w); 475144479Sharti } 4761590Srgrimes } 4771590Srgrimes 478144479Sharti m = w; 4791590Srgrimes 480144479Sharti /* Find a matching tail */ 481144479Sharti do 482144479Sharti if (strcmp(p, w) == 0) { 483144479Sharti *len = w - m; 484144479Sharti return (m); 485144479Sharti } 486144479Sharti while (*w++ != '\0'); 4878874Srgrimes 488144479Sharti return (NULL); 4891590Srgrimes} 4901590Srgrimes 4911590Srgrimes 492144479Sharti/** 493144479Sharti * Str_SYSVSubst 4941590Srgrimes * Substitute '%' on the pattern with len characters from src. 4951590Srgrimes * If the pattern does not contain a '%' prepend len characters 4961590Srgrimes * from src. 4978874Srgrimes * 4981590Srgrimes * Side Effects: 4991590Srgrimes * Places result on buf 5001590Srgrimes */ 5011590Srgrimesvoid 502141133ShartiStr_SYSVSubst(Buffer *buf, const char *pat, const char *src, int len) 5031590Srgrimes{ 504144479Sharti const char *m; 5051590Srgrimes 506144479Sharti if ((m = strchr(pat, '%')) != NULL) { 507144479Sharti /* Copy the prefix */ 508144479Sharti Buf_AppendRange(buf, pat, m); 509144479Sharti /* skip the % */ 510144479Sharti pat = m + 1; 511144479Sharti } 5121590Srgrimes 513144479Sharti /* Copy the pattern */ 514144479Sharti Buf_AddBytes(buf, len, (const Byte *)src); 5151590Srgrimes 516144479Sharti /* append the rest */ 517144479Sharti Buf_Append(buf, pat); 5181590Srgrimes} 519