str.c revision 144479
11590Srgrimes/*-
21590Srgrimes * Copyright (c) 1988, 1989, 1990, 1993
31590Srgrimes *	The Regents of the University of California.  All rights reserved.
41590Srgrimes * Copyright (c) 1989 by Berkeley Softworks
51590Srgrimes * All rights reserved.
61590Srgrimes *
71590Srgrimes * This code is derived from software contributed to Berkeley by
81590Srgrimes * Adam de Boor.
91590Srgrimes *
101590Srgrimes * Redistribution and use in source and binary forms, with or without
111590Srgrimes * modification, are permitted provided that the following conditions
121590Srgrimes * are met:
131590Srgrimes * 1. Redistributions of source code must retain the above copyright
141590Srgrimes *    notice, this list of conditions and the following disclaimer.
151590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
161590Srgrimes *    notice, this list of conditions and the following disclaimer in the
171590Srgrimes *    documentation and/or other materials provided with the distribution.
181590Srgrimes * 3. All advertising materials mentioning features or use of this software
191590Srgrimes *    must display the following acknowledgement:
201590Srgrimes *	This product includes software developed by the University of
211590Srgrimes *	California, Berkeley and its contributors.
221590Srgrimes * 4. Neither the name of the University nor the names of its contributors
231590Srgrimes *    may be used to endorse or promote products derived from this software
241590Srgrimes *    without specific prior written permission.
251590Srgrimes *
261590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
271590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
281590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
291590Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
301590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
311590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
321590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
331590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
341590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
351590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
361590Srgrimes * SUCH DAMAGE.
3762833Swsanchez *
3862833Swsanchez * @(#)str.c	5.8 (Berkeley) 6/1/90
391590Srgrimes */
401590Srgrimes
4162833Swsanchez#include <sys/cdefs.h>
4294587Sobrien__FBSDID("$FreeBSD: head/usr.bin/make/str.c 144479 2005-04-01 13:06:05Z harti $");
431590Srgrimes
44141104Sharti#include <ctype.h>
45141104Sharti#include <stdlib.h>
46141104Sharti#include <string.h>
471590Srgrimes
48141133Sharti#include "buf.h"
49141104Sharti#include "globals.h"
50141104Sharti#include "str.h"
51141104Sharti#include "util.h"
52141104Sharti#include "var.h"
53141104Sharti
545814Sjkhstatic char **argv, *buffer;
555814Sjkhstatic int argmax, curlen;
565814Sjkh
575814Sjkh/*
585814Sjkh * str_init --
595814Sjkh *	Initialize the strings package
605814Sjkh *
615814Sjkh */
625814Sjkhvoid
63104696Sjmallettstr_init(void)
645814Sjkh{
655814Sjkh    char *p1;
66138232Sharti
67138264Sharti    argv = emalloc(((argmax = 50) + 1) * sizeof(char *));
685814Sjkh    argv[0] = Var_Value(".MAKE", VAR_GLOBAL, &p1);
695814Sjkh}
705814Sjkh
711590Srgrimes/*-
721590Srgrimes * str_concat --
73138547Sharti *	concatenate the two strings, inserting a space or slash between them.
741590Srgrimes *
751590Srgrimes * returns --
761590Srgrimes *	the resulting string in allocated space.
771590Srgrimes */
781590Srgrimeschar *
79138547Shartistr_concat(const char *s1, const char *s2, int flags)
801590Srgrimes{
8194584Sobrien	int len1, len2;
8294584Sobrien	char *result;
831590Srgrimes
841590Srgrimes	/* get the length of both strings */
8594638Sobrien	len1 = strlen(s1);
8694638Sobrien	len2 = strlen(s2);
871590Srgrimes
881590Srgrimes	/* allocate length plus separator plus EOS */
89138264Sharti	result = emalloc(len1 + len2 + 2);
901590Srgrimes
911590Srgrimes	/* copy first string into place */
9294638Sobrien	memcpy(result, s1, len1);
931590Srgrimes
941590Srgrimes	/* add separator character */
9594638Sobrien	if (flags & STR_ADDSPACE) {
9694638Sobrien		result[len1] = ' ';
9794638Sobrien		++len1;
9894638Sobrien	} else if (flags & STR_ADDSLASH) {
9994638Sobrien		result[len1] = '/';
10094638Sobrien		++len1;
1011590Srgrimes	}
1021590Srgrimes
10394638Sobrien	/* copy second string plus EOS into place */
10494638Sobrien	memcpy(result + len1, s2, len2 + 1);
1051590Srgrimes
106138232Sharti	return (result);
1071590Srgrimes}
1081590Srgrimes
1091590Srgrimes/*-
1101590Srgrimes * brk_string --
1111590Srgrimes *	Fracture a string into an array of words (as delineated by tabs or
1121590Srgrimes *	spaces) taking quotation marks into account.  Leading tabs/spaces
1131590Srgrimes *	are ignored.
1141590Srgrimes *
1151590Srgrimes * returns --
1161590Srgrimes *	Pointer to the array of pointers to the words.  To make life easier,
1171590Srgrimes *	the first word is always the value of the .MAKE variable.
1181590Srgrimes */
1191590Srgrimeschar **
120143253Shartibrk_string(const char *str, int *store_argc, Boolean expand)
1211590Srgrimes{
12294584Sobrien	int argc, ch;
123143253Sharti	char inquote;
124143253Sharti	const char *p;
125143253Sharti	char *start, *t;
1261590Srgrimes	int len;
1271590Srgrimes
1281590Srgrimes	/* skip leading space chars. */
1291590Srgrimes	for (; *str == ' ' || *str == '\t'; ++str)
1301590Srgrimes		continue;
1311590Srgrimes
1321590Srgrimes	/* allocate room for a copy of the string */
1335814Sjkh	if ((len = strlen(str) + 1) > curlen) {
1345814Sjkh		if (buffer)
1355814Sjkh		    free(buffer);
1365814Sjkh		buffer = emalloc(curlen = len);
1375814Sjkh	}
1381590Srgrimes
1391590Srgrimes	/*
1401590Srgrimes	 * copy the string; at the same time, parse backslashes,
1411590Srgrimes	 * quotes and build the argument list.
1421590Srgrimes	 */
1431590Srgrimes	argc = 1;
1441590Srgrimes	inquote = '\0';
1455814Sjkh	for (p = str, start = t = buffer;; ++p) {
1461590Srgrimes		switch(ch = *p) {
1471590Srgrimes		case '"':
1481590Srgrimes		case '\'':
14949938Shoek			if (inquote) {
150124966Sru				if (ch != inquote)
1511590Srgrimes					break;
152124966Sru				inquote = '\0';
153124966Sru				/* Don't miss "" or '' */
154124966Sru				if (!start)
155124966Sru					start = t;
156124840Sru			} else
157138264Sharti				inquote = (char)ch;
158124840Sru			if (expand)
159124840Sru				continue;
160124840Sru			break;
1611590Srgrimes		case ' ':
1621590Srgrimes		case '\t':
1635814Sjkh		case '\n':
1641590Srgrimes			if (inquote)
1651590Srgrimes				break;
1661590Srgrimes			if (!start)
1671590Srgrimes				continue;
1681590Srgrimes			/* FALLTHROUGH */
1691590Srgrimes		case '\0':
1701590Srgrimes			/*
1711590Srgrimes			 * end of a token -- make sure there's enough argv
1721590Srgrimes			 * space and save off a pointer.
1731590Srgrimes			 */
1745814Sjkh			if (!start)
1755814Sjkh			    goto done;
1765814Sjkh
1771590Srgrimes			*t++ = '\0';
1781590Srgrimes			if (argc == argmax) {
1791590Srgrimes				argmax *= 2;		/* ramp up fast */
180138264Sharti				argv = erealloc(argv,
18118730Ssteve				    (argmax + 1) * sizeof(char *));
1821590Srgrimes			}
1831590Srgrimes			argv[argc++] = start;
184138264Sharti			start = NULL;
1851590Srgrimes			if (ch == '\n' || ch == '\0')
1861590Srgrimes				goto done;
1871590Srgrimes			continue;
1881590Srgrimes		case '\\':
1895814Sjkh			if (!expand) {
1905814Sjkh				if (!start)
1915814Sjkh					start = t;
1925814Sjkh				*t++ = '\\';
1935814Sjkh				ch = *++p;
1945814Sjkh				break;
1955814Sjkh			}
1968874Srgrimes
1971590Srgrimes			switch (ch = *++p) {
1981590Srgrimes			case '\0':
1991590Srgrimes			case '\n':
2001590Srgrimes				/* hmmm; fix it up as best we can */
2011590Srgrimes				ch = '\\';
2021590Srgrimes				--p;
2031590Srgrimes				break;
2041590Srgrimes			case 'b':
2051590Srgrimes				ch = '\b';
2061590Srgrimes				break;
2071590Srgrimes			case 'f':
2081590Srgrimes				ch = '\f';
2091590Srgrimes				break;
2101590Srgrimes			case 'n':
2111590Srgrimes				ch = '\n';
2121590Srgrimes				break;
2131590Srgrimes			case 'r':
2141590Srgrimes				ch = '\r';
2151590Srgrimes				break;
2161590Srgrimes			case 't':
2171590Srgrimes				ch = '\t';
2181590Srgrimes				break;
219104108Sjmallett			default:
220104108Sjmallett				break;
2211590Srgrimes			}
2221590Srgrimes			break;
223104108Sjmallett		default:
224104108Sjmallett			break;
2251590Srgrimes		}
2261590Srgrimes		if (!start)
2271590Srgrimes			start = t;
228138232Sharti		*t++ = (char)ch;
2291590Srgrimes	}
230138264Shartidone:	argv[argc] = NULL;
231141269Sharti	if (store_argc != NULL)
232141269Sharti		*store_argc = argc;
233138232Sharti	return (argv);
2341590Srgrimes}
2351590Srgrimes
2361590Srgrimes/*
237140870Sharti * Quote a string for appending it to MAKEFLAGS. According to Posix the
238140870Sharti * kind of quoting here is implementation-defined. This quoting must ensure
239140870Sharti * that the parsing of MAKEFLAGS's contents in a sub-shell yields the same
240140870Sharti * options, option arguments and macro definitions as in the calling make.
241140870Sharti * We simply quote all blanks, which according to Posix are space and tab
242140870Sharti * in the POSIX locale. Don't use isblank because in that case makes with
243140870Sharti * different locale settings could not communicate. We must also quote
244140870Sharti * backslashes obviously.
245140870Sharti */
246140870Shartichar *
247140870ShartiMAKEFLAGS_quote(const char *str)
248140870Sharti{
249140870Sharti	char *ret, *q;
250140870Sharti	const char *p;
251140870Sharti
252140870Sharti	/* assume worst case - everything has to be quoted */
253140870Sharti	ret = emalloc(strlen(str) * 2 + 1);
254140870Sharti
255140870Sharti	p = str;
256140870Sharti	q = ret;
257140870Sharti	while (*p != '\0') {
258140870Sharti		switch (*p) {
259140870Sharti
260140870Sharti		  case ' ':
261140870Sharti		  case '\t':
262140870Sharti			*q++ = '\\';
263140870Sharti			break;
264140870Sharti
265140870Sharti		  default:
266140870Sharti			break;
267140870Sharti		}
268140870Sharti		*q++ = *p++;
269140870Sharti	}
270140870Sharti	*q++ = '\0';
271140870Sharti	return (ret);
272140870Sharti}
273140870Sharti
274140870Shartichar **
275140870ShartiMAKEFLAGS_break(const char *str, int *pargc)
276140870Sharti{
277140870Sharti	char *q, *start;
278140870Sharti	int len;
279140870Sharti
280140870Sharti	/* allocate room for a copy of the string */
281140870Sharti	if ((len = strlen(str) + 1) > curlen)
282140870Sharti		buffer = erealloc(buffer, curlen = len);
283140870Sharti
284140870Sharti	start = NULL;
285140870Sharti	*pargc = 1;
286140870Sharti
287140870Sharti	for (q = buffer;;) {
288140870Sharti		switch (*str) {
289140870Sharti		  case ' ':
290140870Sharti		  case '\t':
291140870Sharti			/* word separator */
292140870Sharti			if (start == NULL) {
293140870Sharti				/* not in a word */
294140870Sharti				str++;
295140870Sharti				continue;
296140870Sharti			}
297140870Sharti			/* FALLTHRU */
298140870Sharti		  case '\0':
299140870Sharti			if (start == NULL)
300140870Sharti				goto done;
301140870Sharti
302140870Sharti			/* finish word */
303140870Sharti			*q++ = '\0';
304140870Sharti			if (argmax == *pargc) {
305140870Sharti				argmax *= 2;
306140870Sharti				argv = erealloc(argv,
307140870Sharti				    sizeof(*argv) * (argmax + 1));
308140870Sharti			}
309140870Sharti			argv[(*pargc)++] = start;
310140870Sharti			start = NULL;
311140870Sharti
312140870Sharti			if (*str++ == '\0')
313140870Sharti				goto done;
314140870Sharti			continue;
315140870Sharti
316140870Sharti		  case '\\':
317140870Sharti			if (str[1] == ' ' || str[1] == '\t')
318140870Sharti				/* was a quote */
319140870Sharti				str++;
320140870Sharti			break;
321140870Sharti
322140870Sharti		  default:
323140870Sharti			break;
324140870Sharti		}
325140870Sharti		if (start == NULL)
326140870Sharti			/* start of new word */
327140870Sharti			start = q;
328140870Sharti		*q++ = *str++;
329140870Sharti	}
330140870Sharti  done:
331140870Sharti	argv[(*pargc)] = NULL;
332140870Sharti	return (argv);
333140870Sharti}
334140870Sharti
335140870Sharti/*
3361590Srgrimes * Str_Match --
3378874Srgrimes *
3381590Srgrimes * See if a particular string matches a particular pattern.
3398874Srgrimes *
3401590Srgrimes * Results: Non-zero is returned if string matches pattern, 0 otherwise. The
3411590Srgrimes * matching operation permits the following special characters in the
3421590Srgrimes * pattern: *?\[] (see the man page for details on what these mean).
3438874Srgrimes *
3441590Srgrimes * Side effects: None.
3451590Srgrimes */
3461590Srgrimesint
347106106SjmallettStr_Match(const char *string, const char *pattern)
3481590Srgrimes{
3491590Srgrimes	char c2;
3501590Srgrimes
3511590Srgrimes	for (;;) {
3521590Srgrimes		/*
3531590Srgrimes		 * See if we're at the end of both the pattern and the
3541590Srgrimes		 * string. If, we succeeded.  If we're at the end of the
3551590Srgrimes		 * pattern but not at the end of the string, we failed.
3561590Srgrimes		 */
3571590Srgrimes		if (*pattern == 0)
358138232Sharti			return (!*string);
3591590Srgrimes		if (*string == 0 && *pattern != '*')
360138232Sharti			return (0);
3611590Srgrimes		/*
3621590Srgrimes		 * Check for a "*" as the next pattern character.  It matches
3631590Srgrimes		 * any substring.  We handle this by calling ourselves
3641590Srgrimes		 * recursively for each postfix of string, until either we
3651590Srgrimes		 * match or we reach the end of the string.
3661590Srgrimes		 */
3671590Srgrimes		if (*pattern == '*') {
3681590Srgrimes			pattern += 1;
3691590Srgrimes			if (*pattern == 0)
370138232Sharti				return (1);
3711590Srgrimes			while (*string != 0) {
3721590Srgrimes				if (Str_Match(string, pattern))
373138232Sharti					return (1);
3741590Srgrimes				++string;
3751590Srgrimes			}
376138232Sharti			return (0);
3771590Srgrimes		}
3781590Srgrimes		/*
3791590Srgrimes		 * Check for a "?" as the next pattern character.  It matches
3801590Srgrimes		 * any single character.
3811590Srgrimes		 */
3821590Srgrimes		if (*pattern == '?')
3831590Srgrimes			goto thisCharOK;
3841590Srgrimes		/*
3851590Srgrimes		 * Check for a "[" as the next pattern character.  It is
3861590Srgrimes		 * followed by a list of characters that are acceptable, or
3871590Srgrimes		 * by a range (two characters separated by "-").
3881590Srgrimes		 */
3891590Srgrimes		if (*pattern == '[') {
3901590Srgrimes			++pattern;
3911590Srgrimes			for (;;) {
3921590Srgrimes				if ((*pattern == ']') || (*pattern == 0))
393138232Sharti					return (0);
3941590Srgrimes				if (*pattern == *string)
3951590Srgrimes					break;
3961590Srgrimes				if (pattern[1] == '-') {
3971590Srgrimes					c2 = pattern[2];
3981590Srgrimes					if (c2 == 0)
399138232Sharti						return (0);
4001590Srgrimes					if ((*pattern <= *string) &&
4011590Srgrimes					    (c2 >= *string))
4021590Srgrimes						break;
4031590Srgrimes					if ((*pattern >= *string) &&
4041590Srgrimes					    (c2 <= *string))
4051590Srgrimes						break;
4061590Srgrimes					pattern += 2;
4071590Srgrimes				}
4081590Srgrimes				++pattern;
4091590Srgrimes			}
4101590Srgrimes			while ((*pattern != ']') && (*pattern != 0))
4111590Srgrimes				++pattern;
4121590Srgrimes			goto thisCharOK;
4131590Srgrimes		}
4141590Srgrimes		/*
4151590Srgrimes		 * If the next pattern character is '/', just strip off the
4161590Srgrimes		 * '/' so we do exact matching on the character that follows.
4171590Srgrimes		 */
4181590Srgrimes		if (*pattern == '\\') {
4191590Srgrimes			++pattern;
4201590Srgrimes			if (*pattern == 0)
421138232Sharti				return (0);
4221590Srgrimes		}
4231590Srgrimes		/*
4241590Srgrimes		 * There's no special character.  Just make sure that the
4251590Srgrimes		 * next characters of each string match.
4261590Srgrimes		 */
4271590Srgrimes		if (*pattern != *string)
428138232Sharti			return (0);
4291590SrgrimesthisCharOK:	++pattern;
4301590Srgrimes		++string;
4311590Srgrimes	}
4321590Srgrimes}
4331590Srgrimes
4341590Srgrimes
435144479Sharti/**
436144479Sharti * Str_SYSVMatch
4378874Srgrimes *	Check word against pattern for a match (% is wild),
4388874Srgrimes *
4391590Srgrimes * Results:
4401590Srgrimes *	Returns the beginning position of a match or null. The number
4411590Srgrimes *	of characters matched is returned in len.
4421590Srgrimes */
443106106Sjmallettconst char *
444106106SjmallettStr_SYSVMatch(const char *word, const char *pattern, int *len)
4451590Srgrimes{
446144479Sharti	const char *m, *p, *w;
4471590Srgrimes
448144479Sharti	p = pattern;
449144479Sharti	w = word;
450106106Sjmallett
451144479Sharti	if (*w == '\0') {
452144479Sharti		/* Zero-length word cannot be matched against */
453144479Sharti		*len = 0;
454144479Sharti		return (NULL);
455144479Sharti	}
45696071Sjmallett
457144479Sharti	if (*p == '\0') {
458144479Sharti		/* Null pattern is the whole string */
459144479Sharti		*len = strlen(w);
460144479Sharti		return (w);
461144479Sharti	}
4621590Srgrimes
463144479Sharti	if ((m = strchr(p, '%')) != NULL) {
464144479Sharti		/* check that the prefix matches */
465144479Sharti		for (; p != m && *w && *w == *p; w++, p++)
466144479Sharti			continue;
4671590Srgrimes
468144479Sharti		if (p != m)
469144479Sharti			return (NULL);	/* No match */
4701590Srgrimes
471144479Sharti		if (*++p == '\0') {
472144479Sharti			/* No more pattern, return the rest of the string */
473144479Sharti			*len = strlen(w);
474144479Sharti			return (w);
475144479Sharti		}
4761590Srgrimes	}
4771590Srgrimes
478144479Sharti	m = w;
4791590Srgrimes
480144479Sharti	/* Find a matching tail */
481144479Sharti	do
482144479Sharti		if (strcmp(p, w) == 0) {
483144479Sharti			*len = w - m;
484144479Sharti			return (m);
485144479Sharti		}
486144479Sharti	while (*w++ != '\0');
4878874Srgrimes
488144479Sharti	return (NULL);
4891590Srgrimes}
4901590Srgrimes
4911590Srgrimes
492144479Sharti/**
493144479Sharti * Str_SYSVSubst
4941590Srgrimes *	Substitute '%' on the pattern with len characters from src.
4951590Srgrimes *	If the pattern does not contain a '%' prepend len characters
4961590Srgrimes *	from src.
4978874Srgrimes *
4981590Srgrimes * Side Effects:
4991590Srgrimes *	Places result on buf
5001590Srgrimes */
5011590Srgrimesvoid
502141133ShartiStr_SYSVSubst(Buffer *buf, const char *pat, const char *src, int len)
5031590Srgrimes{
504144479Sharti	const char *m;
5051590Srgrimes
506144479Sharti	if ((m = strchr(pat, '%')) != NULL) {
507144479Sharti		/* Copy the prefix */
508144479Sharti		Buf_AppendRange(buf, pat, m);
509144479Sharti		/* skip the % */
510144479Sharti		pat = m + 1;
511144479Sharti	}
5121590Srgrimes
513144479Sharti	/* Copy the pattern */
514144479Sharti	Buf_AddBytes(buf, len, (const Byte *)src);
5151590Srgrimes
516144479Sharti	/* append the rest */
517144479Sharti	Buf_Append(buf, pat);
5181590Srgrimes}
519