str.c revision 141133
11590Srgrimes/*-
21590Srgrimes * Copyright (c) 1988, 1989, 1990, 1993
31590Srgrimes *	The Regents of the University of California.  All rights reserved.
41590Srgrimes * Copyright (c) 1989 by Berkeley Softworks
51590Srgrimes * All rights reserved.
61590Srgrimes *
71590Srgrimes * This code is derived from software contributed to Berkeley by
81590Srgrimes * Adam de Boor.
91590Srgrimes *
101590Srgrimes * Redistribution and use in source and binary forms, with or without
111590Srgrimes * modification, are permitted provided that the following conditions
121590Srgrimes * are met:
131590Srgrimes * 1. Redistributions of source code must retain the above copyright
141590Srgrimes *    notice, this list of conditions and the following disclaimer.
151590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
161590Srgrimes *    notice, this list of conditions and the following disclaimer in the
171590Srgrimes *    documentation and/or other materials provided with the distribution.
181590Srgrimes * 3. All advertising materials mentioning features or use of this software
191590Srgrimes *    must display the following acknowledgement:
201590Srgrimes *	This product includes software developed by the University of
211590Srgrimes *	California, Berkeley and its contributors.
221590Srgrimes * 4. Neither the name of the University nor the names of its contributors
231590Srgrimes *    may be used to endorse or promote products derived from this software
241590Srgrimes *    without specific prior written permission.
251590Srgrimes *
261590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
271590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
281590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
291590Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
301590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
311590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
321590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
331590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
341590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
351590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
361590Srgrimes * SUCH DAMAGE.
3762833Swsanchez *
3862833Swsanchez * @(#)str.c	5.8 (Berkeley) 6/1/90
391590Srgrimes */
401590Srgrimes
4162833Swsanchez#include <sys/cdefs.h>
4294587Sobrien__FBSDID("$FreeBSD: head/usr.bin/make/str.c 141133 2005-02-02 07:36:18Z harti $");
431590Srgrimes
44141104Sharti#include <ctype.h>
45141104Sharti#include <stdlib.h>
46141104Sharti#include <string.h>
471590Srgrimes
48141133Sharti#include "buf.h"
49141104Sharti#include "globals.h"
50141104Sharti#include "str.h"
51141104Sharti#include "util.h"
52141104Sharti#include "var.h"
53141104Sharti
545814Sjkhstatic char **argv, *buffer;
555814Sjkhstatic int argmax, curlen;
565814Sjkh
575814Sjkh/*
585814Sjkh * str_init --
595814Sjkh *	Initialize the strings package
605814Sjkh *
615814Sjkh */
625814Sjkhvoid
63104696Sjmallettstr_init(void)
645814Sjkh{
655814Sjkh    char *p1;
66138232Sharti
67138264Sharti    argv = emalloc(((argmax = 50) + 1) * sizeof(char *));
685814Sjkh    argv[0] = Var_Value(".MAKE", VAR_GLOBAL, &p1);
695814Sjkh}
705814Sjkh
711590Srgrimes/*-
721590Srgrimes * str_concat --
73138547Sharti *	concatenate the two strings, inserting a space or slash between them.
741590Srgrimes *
751590Srgrimes * returns --
761590Srgrimes *	the resulting string in allocated space.
771590Srgrimes */
781590Srgrimeschar *
79138547Shartistr_concat(const char *s1, const char *s2, int flags)
801590Srgrimes{
8194584Sobrien	int len1, len2;
8294584Sobrien	char *result;
831590Srgrimes
841590Srgrimes	/* get the length of both strings */
8594638Sobrien	len1 = strlen(s1);
8694638Sobrien	len2 = strlen(s2);
871590Srgrimes
881590Srgrimes	/* allocate length plus separator plus EOS */
89138264Sharti	result = emalloc(len1 + len2 + 2);
901590Srgrimes
911590Srgrimes	/* copy first string into place */
9294638Sobrien	memcpy(result, s1, len1);
931590Srgrimes
941590Srgrimes	/* add separator character */
9594638Sobrien	if (flags & STR_ADDSPACE) {
9694638Sobrien		result[len1] = ' ';
9794638Sobrien		++len1;
9894638Sobrien	} else if (flags & STR_ADDSLASH) {
9994638Sobrien		result[len1] = '/';
10094638Sobrien		++len1;
1011590Srgrimes	}
1021590Srgrimes
10394638Sobrien	/* copy second string plus EOS into place */
10494638Sobrien	memcpy(result + len1, s2, len2 + 1);
1051590Srgrimes
106138232Sharti	return (result);
1071590Srgrimes}
1081590Srgrimes
1091590Srgrimes/*-
1101590Srgrimes * brk_string --
1111590Srgrimes *	Fracture a string into an array of words (as delineated by tabs or
1121590Srgrimes *	spaces) taking quotation marks into account.  Leading tabs/spaces
1131590Srgrimes *	are ignored.
1141590Srgrimes *
1151590Srgrimes * returns --
1161590Srgrimes *	Pointer to the array of pointers to the words.  To make life easier,
1171590Srgrimes *	the first word is always the value of the .MAKE variable.
1181590Srgrimes */
1191590Srgrimeschar **
120104696Sjmallettbrk_string(char *str, int *store_argc, Boolean expand)
1211590Srgrimes{
12294584Sobrien	int argc, ch;
12394584Sobrien	char inquote, *p, *start, *t;
1241590Srgrimes	int len;
1251590Srgrimes
1261590Srgrimes	/* skip leading space chars. */
1271590Srgrimes	for (; *str == ' ' || *str == '\t'; ++str)
1281590Srgrimes		continue;
1291590Srgrimes
1301590Srgrimes	/* allocate room for a copy of the string */
1315814Sjkh	if ((len = strlen(str) + 1) > curlen) {
1325814Sjkh		if (buffer)
1335814Sjkh		    free(buffer);
1345814Sjkh		buffer = emalloc(curlen = len);
1355814Sjkh	}
1361590Srgrimes
1371590Srgrimes	/*
1381590Srgrimes	 * copy the string; at the same time, parse backslashes,
1391590Srgrimes	 * quotes and build the argument list.
1401590Srgrimes	 */
1411590Srgrimes	argc = 1;
1421590Srgrimes	inquote = '\0';
1435814Sjkh	for (p = str, start = t = buffer;; ++p) {
1441590Srgrimes		switch(ch = *p) {
1451590Srgrimes		case '"':
1461590Srgrimes		case '\'':
14749938Shoek			if (inquote) {
148124966Sru				if (ch != inquote)
1491590Srgrimes					break;
150124966Sru				inquote = '\0';
151124966Sru				/* Don't miss "" or '' */
152124966Sru				if (!start)
153124966Sru					start = t;
154124840Sru			} else
155138264Sharti				inquote = (char)ch;
156124840Sru			if (expand)
157124840Sru				continue;
158124840Sru			break;
1591590Srgrimes		case ' ':
1601590Srgrimes		case '\t':
1615814Sjkh		case '\n':
1621590Srgrimes			if (inquote)
1631590Srgrimes				break;
1641590Srgrimes			if (!start)
1651590Srgrimes				continue;
1661590Srgrimes			/* FALLTHROUGH */
1671590Srgrimes		case '\0':
1681590Srgrimes			/*
1691590Srgrimes			 * end of a token -- make sure there's enough argv
1701590Srgrimes			 * space and save off a pointer.
1711590Srgrimes			 */
1725814Sjkh			if (!start)
1735814Sjkh			    goto done;
1745814Sjkh
1751590Srgrimes			*t++ = '\0';
1761590Srgrimes			if (argc == argmax) {
1771590Srgrimes				argmax *= 2;		/* ramp up fast */
178138264Sharti				argv = erealloc(argv,
17918730Ssteve				    (argmax + 1) * sizeof(char *));
1801590Srgrimes			}
1811590Srgrimes			argv[argc++] = start;
182138264Sharti			start = NULL;
1831590Srgrimes			if (ch == '\n' || ch == '\0')
1841590Srgrimes				goto done;
1851590Srgrimes			continue;
1861590Srgrimes		case '\\':
1875814Sjkh			if (!expand) {
1885814Sjkh				if (!start)
1895814Sjkh					start = t;
1905814Sjkh				*t++ = '\\';
1915814Sjkh				ch = *++p;
1925814Sjkh				break;
1935814Sjkh			}
1948874Srgrimes
1951590Srgrimes			switch (ch = *++p) {
1961590Srgrimes			case '\0':
1971590Srgrimes			case '\n':
1981590Srgrimes				/* hmmm; fix it up as best we can */
1991590Srgrimes				ch = '\\';
2001590Srgrimes				--p;
2011590Srgrimes				break;
2021590Srgrimes			case 'b':
2031590Srgrimes				ch = '\b';
2041590Srgrimes				break;
2051590Srgrimes			case 'f':
2061590Srgrimes				ch = '\f';
2071590Srgrimes				break;
2081590Srgrimes			case 'n':
2091590Srgrimes				ch = '\n';
2101590Srgrimes				break;
2111590Srgrimes			case 'r':
2121590Srgrimes				ch = '\r';
2131590Srgrimes				break;
2141590Srgrimes			case 't':
2151590Srgrimes				ch = '\t';
2161590Srgrimes				break;
217104108Sjmallett			default:
218104108Sjmallett				break;
2191590Srgrimes			}
2201590Srgrimes			break;
221104108Sjmallett		default:
222104108Sjmallett			break;
2231590Srgrimes		}
2241590Srgrimes		if (!start)
2251590Srgrimes			start = t;
226138232Sharti		*t++ = (char)ch;
2271590Srgrimes	}
228138264Shartidone:	argv[argc] = NULL;
2291590Srgrimes	*store_argc = argc;
230138232Sharti	return (argv);
2311590Srgrimes}
2321590Srgrimes
2331590Srgrimes/*
234140870Sharti * Quote a string for appending it to MAKEFLAGS. According to Posix the
235140870Sharti * kind of quoting here is implementation-defined. This quoting must ensure
236140870Sharti * that the parsing of MAKEFLAGS's contents in a sub-shell yields the same
237140870Sharti * options, option arguments and macro definitions as in the calling make.
238140870Sharti * We simply quote all blanks, which according to Posix are space and tab
239140870Sharti * in the POSIX locale. Don't use isblank because in that case makes with
240140870Sharti * different locale settings could not communicate. We must also quote
241140870Sharti * backslashes obviously.
242140870Sharti */
243140870Shartichar *
244140870ShartiMAKEFLAGS_quote(const char *str)
245140870Sharti{
246140870Sharti	char *ret, *q;
247140870Sharti	const char *p;
248140870Sharti
249140870Sharti	/* assume worst case - everything has to be quoted */
250140870Sharti	ret = emalloc(strlen(str) * 2 + 1);
251140870Sharti
252140870Sharti	p = str;
253140870Sharti	q = ret;
254140870Sharti	while (*p != '\0') {
255140870Sharti		switch (*p) {
256140870Sharti
257140870Sharti		  case ' ':
258140870Sharti		  case '\t':
259140870Sharti			*q++ = '\\';
260140870Sharti			break;
261140870Sharti
262140870Sharti		  default:
263140870Sharti			break;
264140870Sharti		}
265140870Sharti		*q++ = *p++;
266140870Sharti	}
267140870Sharti	*q++ = '\0';
268140870Sharti	return (ret);
269140870Sharti}
270140870Sharti
271140870Shartichar **
272140870ShartiMAKEFLAGS_break(const char *str, int *pargc)
273140870Sharti{
274140870Sharti	char *q, *start;
275140870Sharti	int len;
276140870Sharti
277140870Sharti	/* allocate room for a copy of the string */
278140870Sharti	if ((len = strlen(str) + 1) > curlen)
279140870Sharti		buffer = erealloc(buffer, curlen = len);
280140870Sharti
281140870Sharti	start = NULL;
282140870Sharti	*pargc = 1;
283140870Sharti
284140870Sharti	for (q = buffer;;) {
285140870Sharti		switch (*str) {
286140870Sharti		  case ' ':
287140870Sharti		  case '\t':
288140870Sharti			/* word separator */
289140870Sharti			if (start == NULL) {
290140870Sharti				/* not in a word */
291140870Sharti				str++;
292140870Sharti				continue;
293140870Sharti			}
294140870Sharti			/* FALLTHRU */
295140870Sharti		  case '\0':
296140870Sharti			if (start == NULL)
297140870Sharti				goto done;
298140870Sharti
299140870Sharti			/* finish word */
300140870Sharti			*q++ = '\0';
301140870Sharti			if (argmax == *pargc) {
302140870Sharti				argmax *= 2;
303140870Sharti				argv = erealloc(argv,
304140870Sharti				    sizeof(*argv) * (argmax + 1));
305140870Sharti			}
306140870Sharti			argv[(*pargc)++] = start;
307140870Sharti			start = NULL;
308140870Sharti
309140870Sharti			if (*str++ == '\0')
310140870Sharti				goto done;
311140870Sharti			continue;
312140870Sharti
313140870Sharti		  case '\\':
314140870Sharti			if (str[1] == ' ' || str[1] == '\t')
315140870Sharti				/* was a quote */
316140870Sharti				str++;
317140870Sharti			break;
318140870Sharti
319140870Sharti		  default:
320140870Sharti			break;
321140870Sharti		}
322140870Sharti		if (start == NULL)
323140870Sharti			/* start of new word */
324140870Sharti			start = q;
325140870Sharti		*q++ = *str++;
326140870Sharti	}
327140870Sharti  done:
328140870Sharti	argv[(*pargc)] = NULL;
329140870Sharti	return (argv);
330140870Sharti}
331140870Sharti
332140870Sharti/*
3331590Srgrimes * Str_Match --
3348874Srgrimes *
3351590Srgrimes * See if a particular string matches a particular pattern.
3368874Srgrimes *
3371590Srgrimes * Results: Non-zero is returned if string matches pattern, 0 otherwise. The
3381590Srgrimes * matching operation permits the following special characters in the
3391590Srgrimes * pattern: *?\[] (see the man page for details on what these mean).
3408874Srgrimes *
3411590Srgrimes * Side effects: None.
3421590Srgrimes */
3431590Srgrimesint
344106106SjmallettStr_Match(const char *string, const char *pattern)
3451590Srgrimes{
3461590Srgrimes	char c2;
3471590Srgrimes
3481590Srgrimes	for (;;) {
3491590Srgrimes		/*
3501590Srgrimes		 * See if we're at the end of both the pattern and the
3511590Srgrimes		 * string. If, we succeeded.  If we're at the end of the
3521590Srgrimes		 * pattern but not at the end of the string, we failed.
3531590Srgrimes		 */
3541590Srgrimes		if (*pattern == 0)
355138232Sharti			return (!*string);
3561590Srgrimes		if (*string == 0 && *pattern != '*')
357138232Sharti			return (0);
3581590Srgrimes		/*
3591590Srgrimes		 * Check for a "*" as the next pattern character.  It matches
3601590Srgrimes		 * any substring.  We handle this by calling ourselves
3611590Srgrimes		 * recursively for each postfix of string, until either we
3621590Srgrimes		 * match or we reach the end of the string.
3631590Srgrimes		 */
3641590Srgrimes		if (*pattern == '*') {
3651590Srgrimes			pattern += 1;
3661590Srgrimes			if (*pattern == 0)
367138232Sharti				return (1);
3681590Srgrimes			while (*string != 0) {
3691590Srgrimes				if (Str_Match(string, pattern))
370138232Sharti					return (1);
3711590Srgrimes				++string;
3721590Srgrimes			}
373138232Sharti			return (0);
3741590Srgrimes		}
3751590Srgrimes		/*
3761590Srgrimes		 * Check for a "?" as the next pattern character.  It matches
3771590Srgrimes		 * any single character.
3781590Srgrimes		 */
3791590Srgrimes		if (*pattern == '?')
3801590Srgrimes			goto thisCharOK;
3811590Srgrimes		/*
3821590Srgrimes		 * Check for a "[" as the next pattern character.  It is
3831590Srgrimes		 * followed by a list of characters that are acceptable, or
3841590Srgrimes		 * by a range (two characters separated by "-").
3851590Srgrimes		 */
3861590Srgrimes		if (*pattern == '[') {
3871590Srgrimes			++pattern;
3881590Srgrimes			for (;;) {
3891590Srgrimes				if ((*pattern == ']') || (*pattern == 0))
390138232Sharti					return (0);
3911590Srgrimes				if (*pattern == *string)
3921590Srgrimes					break;
3931590Srgrimes				if (pattern[1] == '-') {
3941590Srgrimes					c2 = pattern[2];
3951590Srgrimes					if (c2 == 0)
396138232Sharti						return (0);
3971590Srgrimes					if ((*pattern <= *string) &&
3981590Srgrimes					    (c2 >= *string))
3991590Srgrimes						break;
4001590Srgrimes					if ((*pattern >= *string) &&
4011590Srgrimes					    (c2 <= *string))
4021590Srgrimes						break;
4031590Srgrimes					pattern += 2;
4041590Srgrimes				}
4051590Srgrimes				++pattern;
4061590Srgrimes			}
4071590Srgrimes			while ((*pattern != ']') && (*pattern != 0))
4081590Srgrimes				++pattern;
4091590Srgrimes			goto thisCharOK;
4101590Srgrimes		}
4111590Srgrimes		/*
4121590Srgrimes		 * If the next pattern character is '/', just strip off the
4131590Srgrimes		 * '/' so we do exact matching on the character that follows.
4141590Srgrimes		 */
4151590Srgrimes		if (*pattern == '\\') {
4161590Srgrimes			++pattern;
4171590Srgrimes			if (*pattern == 0)
418138232Sharti				return (0);
4191590Srgrimes		}
4201590Srgrimes		/*
4211590Srgrimes		 * There's no special character.  Just make sure that the
4221590Srgrimes		 * next characters of each string match.
4231590Srgrimes		 */
4241590Srgrimes		if (*pattern != *string)
425138232Sharti			return (0);
4261590SrgrimesthisCharOK:	++pattern;
4271590Srgrimes		++string;
4281590Srgrimes	}
4291590Srgrimes}
4301590Srgrimes
4311590Srgrimes
4321590Srgrimes/*-
4331590Srgrimes *-----------------------------------------------------------------------
4341590Srgrimes * Str_SYSVMatch --
4358874Srgrimes *	Check word against pattern for a match (% is wild),
4368874Srgrimes *
4371590Srgrimes * Results:
4381590Srgrimes *	Returns the beginning position of a match or null. The number
4391590Srgrimes *	of characters matched is returned in len.
4401590Srgrimes *
4411590Srgrimes * Side Effects:
4421590Srgrimes *	None
4431590Srgrimes *
4441590Srgrimes *-----------------------------------------------------------------------
4451590Srgrimes */
446106106Sjmallettconst char *
447106106SjmallettStr_SYSVMatch(const char *word, const char *pattern, int *len)
4481590Srgrimes{
449106106Sjmallett    const char *m, *p, *w;
4501590Srgrimes
451106106Sjmallett    p = pattern;
452106106Sjmallett    w = word;
453106106Sjmallett
45496071Sjmallett    if (*w == '\0') {
45596071Sjmallett	/* Zero-length word cannot be matched against */
45696071Sjmallett	*len = 0;
457138232Sharti	return (NULL);
45896071Sjmallett    }
45996071Sjmallett
4601590Srgrimes    if (*p == '\0') {
4611590Srgrimes	/* Null pattern is the whole string */
4621590Srgrimes	*len = strlen(w);
463138232Sharti	return (w);
4641590Srgrimes    }
4651590Srgrimes
4661590Srgrimes    if ((m = strchr(p, '%')) != NULL) {
4671590Srgrimes	/* check that the prefix matches */
4681590Srgrimes	for (; p != m && *w && *w == *p; w++, p++)
4691590Srgrimes	     continue;
4701590Srgrimes
4711590Srgrimes	if (p != m)
472138232Sharti	    return (NULL);	/* No match */
4731590Srgrimes
4741590Srgrimes	if (*++p == '\0') {
4751590Srgrimes	    /* No more pattern, return the rest of the string */
4761590Srgrimes	    *len = strlen(w);
477138232Sharti	    return (w);
4781590Srgrimes	}
4791590Srgrimes    }
4801590Srgrimes
4811590Srgrimes    m = w;
4821590Srgrimes
4831590Srgrimes    /* Find a matching tail */
4841590Srgrimes    do
4851590Srgrimes	if (strcmp(p, w) == 0) {
4861590Srgrimes	    *len = w - m;
487138232Sharti	    return (m);
4881590Srgrimes	}
4891590Srgrimes    while (*w++ != '\0');
4908874Srgrimes
491138232Sharti    return (NULL);
4921590Srgrimes}
4931590Srgrimes
4941590Srgrimes
4951590Srgrimes/*-
4961590Srgrimes *-----------------------------------------------------------------------
4971590Srgrimes * Str_SYSVSubst --
4981590Srgrimes *	Substitute '%' on the pattern with len characters from src.
4991590Srgrimes *	If the pattern does not contain a '%' prepend len characters
5001590Srgrimes *	from src.
5018874Srgrimes *
5021590Srgrimes * Results:
5031590Srgrimes *	None
5041590Srgrimes *
5051590Srgrimes * Side Effects:
5061590Srgrimes *	Places result on buf
5071590Srgrimes *
5081590Srgrimes *-----------------------------------------------------------------------
5091590Srgrimes */
5101590Srgrimesvoid
511141133ShartiStr_SYSVSubst(Buffer *buf, const char *pat, const char *src, int len)
5121590Srgrimes{
513106106Sjmallett    const char *m;
5141590Srgrimes
5151590Srgrimes    if ((m = strchr(pat, '%')) != NULL) {
5161590Srgrimes	/* Copy the prefix */
517138548Sharti	Buf_AddBytes(buf, m - pat, (const Byte *)pat);
5181590Srgrimes	/* skip the % */
5191590Srgrimes	pat = m + 1;
5201590Srgrimes    }
5211590Srgrimes
5221590Srgrimes    /* Copy the pattern */
523138548Sharti    Buf_AddBytes(buf, len, (const Byte *)src);
5241590Srgrimes
5251590Srgrimes    /* append the rest */
526138548Sharti    Buf_AddBytes(buf, strlen(pat), (const Byte *)pat);
5271590Srgrimes}
528