str.c revision 104696
11590Srgrimes/*-
21590Srgrimes * Copyright (c) 1988, 1989, 1990, 1993
31590Srgrimes *	The Regents of the University of California.  All rights reserved.
41590Srgrimes * Copyright (c) 1989 by Berkeley Softworks
51590Srgrimes * All rights reserved.
61590Srgrimes *
71590Srgrimes * This code is derived from software contributed to Berkeley by
81590Srgrimes * Adam de Boor.
91590Srgrimes *
101590Srgrimes * Redistribution and use in source and binary forms, with or without
111590Srgrimes * modification, are permitted provided that the following conditions
121590Srgrimes * are met:
131590Srgrimes * 1. Redistributions of source code must retain the above copyright
141590Srgrimes *    notice, this list of conditions and the following disclaimer.
151590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
161590Srgrimes *    notice, this list of conditions and the following disclaimer in the
171590Srgrimes *    documentation and/or other materials provided with the distribution.
181590Srgrimes * 3. All advertising materials mentioning features or use of this software
191590Srgrimes *    must display the following acknowledgement:
201590Srgrimes *	This product includes software developed by the University of
211590Srgrimes *	California, Berkeley and its contributors.
221590Srgrimes * 4. Neither the name of the University nor the names of its contributors
231590Srgrimes *    may be used to endorse or promote products derived from this software
241590Srgrimes *    without specific prior written permission.
251590Srgrimes *
261590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
271590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
281590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
291590Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
301590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
311590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
321590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
331590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
341590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
351590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
361590Srgrimes * SUCH DAMAGE.
3762833Swsanchez *
3862833Swsanchez * @(#)str.c	5.8 (Berkeley) 6/1/90
391590Srgrimes */
401590Srgrimes
4162833Swsanchez#include <sys/cdefs.h>
4294587Sobrien__FBSDID("$FreeBSD: head/usr.bin/make/str.c 104696 2002-10-09 03:42:10Z jmallett $");
431590Srgrimes
441590Srgrimes#include "make.h"
451590Srgrimes
465814Sjkhstatic char **argv, *buffer;
475814Sjkhstatic int argmax, curlen;
485814Sjkh
495814Sjkh/*
505814Sjkh * str_init --
515814Sjkh *	Initialize the strings package
525814Sjkh *
535814Sjkh */
545814Sjkhvoid
55104696Sjmallettstr_init(void)
565814Sjkh{
575814Sjkh    char *p1;
5818730Ssteve    argv = (char **)emalloc(((argmax = 50) + 1) * sizeof(char *));
595814Sjkh    argv[0] = Var_Value(".MAKE", VAR_GLOBAL, &p1);
605814Sjkh}
615814Sjkh
625814Sjkh
635814Sjkh/*
645814Sjkh * str_end --
655814Sjkh *	Cleanup the strings package
665814Sjkh *
675814Sjkh */
685814Sjkhvoid
69104696Sjmallettstr_end(void)
705814Sjkh{
719254Sache    if (argv) {
729254Sache	if (argv[0])
739254Sache	    free(argv[0]);
7469531Swill	free(argv);
759254Sache    }
765814Sjkh    if (buffer)
775814Sjkh	free(buffer);
785814Sjkh}
795814Sjkh
801590Srgrimes/*-
811590Srgrimes * str_concat --
821590Srgrimes *	concatenate the two strings, inserting a space or slash between them,
831590Srgrimes *	freeing them if requested.
841590Srgrimes *
851590Srgrimes * returns --
861590Srgrimes *	the resulting string in allocated space.
871590Srgrimes */
881590Srgrimeschar *
89104696Sjmallettstr_concat(char *s1, char *s2, int flags)
901590Srgrimes{
9194584Sobrien	int len1, len2;
9294584Sobrien	char *result;
931590Srgrimes
941590Srgrimes	/* get the length of both strings */
9594638Sobrien	len1 = strlen(s1);
9694638Sobrien	len2 = strlen(s2);
971590Srgrimes
981590Srgrimes	/* allocate length plus separator plus EOS */
991590Srgrimes	result = emalloc((u_int)(len1 + len2 + 2));
1001590Srgrimes
1011590Srgrimes	/* copy first string into place */
10294638Sobrien	memcpy(result, s1, len1);
1031590Srgrimes
1041590Srgrimes	/* add separator character */
10594638Sobrien	if (flags & STR_ADDSPACE) {
10694638Sobrien		result[len1] = ' ';
10794638Sobrien		++len1;
10894638Sobrien	} else if (flags & STR_ADDSLASH) {
10994638Sobrien		result[len1] = '/';
11094638Sobrien		++len1;
1111590Srgrimes	}
1121590Srgrimes
11394638Sobrien	/* copy second string plus EOS into place */
11494638Sobrien	memcpy(result + len1, s2, len2 + 1);
1151590Srgrimes
1161590Srgrimes	/* free original strings */
1171590Srgrimes	if (flags & STR_DOFREE) {
11897123Sjmallett		(void)efree(s1);
11997123Sjmallett		(void)efree(s2);
1201590Srgrimes	}
1211590Srgrimes	return(result);
1221590Srgrimes}
1231590Srgrimes
1241590Srgrimes/*-
1251590Srgrimes * brk_string --
1261590Srgrimes *	Fracture a string into an array of words (as delineated by tabs or
1271590Srgrimes *	spaces) taking quotation marks into account.  Leading tabs/spaces
1281590Srgrimes *	are ignored.
1291590Srgrimes *
1301590Srgrimes * returns --
1311590Srgrimes *	Pointer to the array of pointers to the words.  To make life easier,
1321590Srgrimes *	the first word is always the value of the .MAKE variable.
1331590Srgrimes */
1341590Srgrimeschar **
135104696Sjmallettbrk_string(char *str, int *store_argc, Boolean expand)
1361590Srgrimes{
13794584Sobrien	int argc, ch;
13894584Sobrien	char inquote, *p, *start, *t;
1391590Srgrimes	int len;
1401590Srgrimes
1411590Srgrimes	/* skip leading space chars. */
1421590Srgrimes	for (; *str == ' ' || *str == '\t'; ++str)
1431590Srgrimes		continue;
1441590Srgrimes
1451590Srgrimes	/* allocate room for a copy of the string */
1465814Sjkh	if ((len = strlen(str) + 1) > curlen) {
1475814Sjkh		if (buffer)
1485814Sjkh		    free(buffer);
1495814Sjkh		buffer = emalloc(curlen = len);
1505814Sjkh	}
1511590Srgrimes
1521590Srgrimes	/*
1531590Srgrimes	 * copy the string; at the same time, parse backslashes,
1541590Srgrimes	 * quotes and build the argument list.
1551590Srgrimes	 */
1561590Srgrimes	argc = 1;
1571590Srgrimes	inquote = '\0';
1585814Sjkh	for (p = str, start = t = buffer;; ++p) {
1591590Srgrimes		switch(ch = *p) {
1601590Srgrimes		case '"':
1611590Srgrimes		case '\'':
16249938Shoek			if (inquote) {
1631590Srgrimes				if (inquote == ch)
1641590Srgrimes					inquote = '\0';
1651590Srgrimes				else
1661590Srgrimes					break;
16749938Shoek			} else {
1681590Srgrimes				inquote = (char) ch;
1695814Sjkh				/* Don't miss "" or '' */
1705814Sjkh				if (start == NULL && p[1] == inquote) {
1715814Sjkh					start = t + 1;
1725814Sjkh					break;
1735814Sjkh				}
1742266Ssef			}
1755814Sjkh			if (!expand) {
1765814Sjkh				if (!start)
1775814Sjkh					start = t;
1785814Sjkh				*t++ = ch;
1795814Sjkh			}
1805814Sjkh			continue;
1811590Srgrimes		case ' ':
1821590Srgrimes		case '\t':
1835814Sjkh		case '\n':
1841590Srgrimes			if (inquote)
1851590Srgrimes				break;
1861590Srgrimes			if (!start)
1871590Srgrimes				continue;
1881590Srgrimes			/* FALLTHROUGH */
1891590Srgrimes		case '\0':
1901590Srgrimes			/*
1911590Srgrimes			 * end of a token -- make sure there's enough argv
1921590Srgrimes			 * space and save off a pointer.
1931590Srgrimes			 */
1945814Sjkh			if (!start)
1955814Sjkh			    goto done;
1965814Sjkh
1971590Srgrimes			*t++ = '\0';
1981590Srgrimes			if (argc == argmax) {
1991590Srgrimes				argmax *= 2;		/* ramp up fast */
20018730Ssteve				argv = (char **)erealloc(argv,
20118730Ssteve				    (argmax + 1) * sizeof(char *));
2021590Srgrimes			}
2031590Srgrimes			argv[argc++] = start;
2041590Srgrimes			start = (char *)NULL;
2051590Srgrimes			if (ch == '\n' || ch == '\0')
2061590Srgrimes				goto done;
2071590Srgrimes			continue;
2081590Srgrimes		case '\\':
2095814Sjkh			if (!expand) {
2105814Sjkh				if (!start)
2115814Sjkh					start = t;
2125814Sjkh				*t++ = '\\';
2135814Sjkh				ch = *++p;
2145814Sjkh				break;
2155814Sjkh			}
2168874Srgrimes
2171590Srgrimes			switch (ch = *++p) {
2181590Srgrimes			case '\0':
2191590Srgrimes			case '\n':
2201590Srgrimes				/* hmmm; fix it up as best we can */
2211590Srgrimes				ch = '\\';
2221590Srgrimes				--p;
2231590Srgrimes				break;
2241590Srgrimes			case 'b':
2251590Srgrimes				ch = '\b';
2261590Srgrimes				break;
2271590Srgrimes			case 'f':
2281590Srgrimes				ch = '\f';
2291590Srgrimes				break;
2301590Srgrimes			case 'n':
2311590Srgrimes				ch = '\n';
2321590Srgrimes				break;
2331590Srgrimes			case 'r':
2341590Srgrimes				ch = '\r';
2351590Srgrimes				break;
2361590Srgrimes			case 't':
2371590Srgrimes				ch = '\t';
2381590Srgrimes				break;
239104108Sjmallett			default:
240104108Sjmallett				break;
2411590Srgrimes			}
2421590Srgrimes			break;
243104108Sjmallett		default:
244104108Sjmallett			break;
2451590Srgrimes		}
2461590Srgrimes		if (!start)
2471590Srgrimes			start = t;
2481590Srgrimes		*t++ = (char) ch;
2491590Srgrimes	}
2501590Srgrimesdone:	argv[argc] = (char *)NULL;
2511590Srgrimes	*store_argc = argc;
2521590Srgrimes	return(argv);
2531590Srgrimes}
2541590Srgrimes
2551590Srgrimes/*
2561590Srgrimes * Str_FindSubstring -- See if a string contains a particular substring.
2578874Srgrimes *
2581590Srgrimes * Results: If string contains substring, the return value is the location of
2591590Srgrimes * the first matching instance of substring in string.  If string doesn't
2601590Srgrimes * contain substring, the return value is NULL.  Matching is done on an exact
2611590Srgrimes * character-for-character basis with no wildcards or special characters.
2628874Srgrimes *
2631590Srgrimes * Side effects: None.
264104696Sjmallett *
265104696Sjmallett * XXX should be strstr(3).
2661590Srgrimes */
2671590Srgrimeschar *
268104696SjmallettStr_FindSubstring(char *string, char *substring)
2691590Srgrimes{
27094584Sobrien	char *a, *b;
2711590Srgrimes
2721590Srgrimes	/*
2731590Srgrimes	 * First scan quickly through the two strings looking for a single-
2741590Srgrimes	 * character match.  When it's found, then compare the rest of the
2751590Srgrimes	 * substring.
2761590Srgrimes	 */
2771590Srgrimes
2781590Srgrimes	for (b = substring; *string != 0; string += 1) {
2791590Srgrimes		if (*string != *b)
2801590Srgrimes			continue;
2811590Srgrimes		a = string;
2821590Srgrimes		for (;;) {
2831590Srgrimes			if (*b == 0)
2841590Srgrimes				return(string);
2851590Srgrimes			if (*a++ != *b++)
2861590Srgrimes				break;
2871590Srgrimes		}
2881590Srgrimes		b = substring;
2891590Srgrimes	}
2901590Srgrimes	return((char *) NULL);
2911590Srgrimes}
2921590Srgrimes
2931590Srgrimes/*
2941590Srgrimes * Str_Match --
2958874Srgrimes *
2961590Srgrimes * See if a particular string matches a particular pattern.
2978874Srgrimes *
2981590Srgrimes * Results: Non-zero is returned if string matches pattern, 0 otherwise. The
2991590Srgrimes * matching operation permits the following special characters in the
3001590Srgrimes * pattern: *?\[] (see the man page for details on what these mean).
3018874Srgrimes *
3021590Srgrimes * Side effects: None.
3031590Srgrimes */
3041590Srgrimesint
305104696SjmallettStr_Match(char *string, char *pattern)
3061590Srgrimes{
3071590Srgrimes	char c2;
3081590Srgrimes
3091590Srgrimes	for (;;) {
3101590Srgrimes		/*
3111590Srgrimes		 * See if we're at the end of both the pattern and the
3121590Srgrimes		 * string. If, we succeeded.  If we're at the end of the
3131590Srgrimes		 * pattern but not at the end of the string, we failed.
3141590Srgrimes		 */
3151590Srgrimes		if (*pattern == 0)
3161590Srgrimes			return(!*string);
3171590Srgrimes		if (*string == 0 && *pattern != '*')
3181590Srgrimes			return(0);
3191590Srgrimes		/*
3201590Srgrimes		 * Check for a "*" as the next pattern character.  It matches
3211590Srgrimes		 * any substring.  We handle this by calling ourselves
3221590Srgrimes		 * recursively for each postfix of string, until either we
3231590Srgrimes		 * match or we reach the end of the string.
3241590Srgrimes		 */
3251590Srgrimes		if (*pattern == '*') {
3261590Srgrimes			pattern += 1;
3271590Srgrimes			if (*pattern == 0)
3281590Srgrimes				return(1);
3291590Srgrimes			while (*string != 0) {
3301590Srgrimes				if (Str_Match(string, pattern))
3311590Srgrimes					return(1);
3321590Srgrimes				++string;
3331590Srgrimes			}
3341590Srgrimes			return(0);
3351590Srgrimes		}
3361590Srgrimes		/*
3371590Srgrimes		 * Check for a "?" as the next pattern character.  It matches
3381590Srgrimes		 * any single character.
3391590Srgrimes		 */
3401590Srgrimes		if (*pattern == '?')
3411590Srgrimes			goto thisCharOK;
3421590Srgrimes		/*
3431590Srgrimes		 * Check for a "[" as the next pattern character.  It is
3441590Srgrimes		 * followed by a list of characters that are acceptable, or
3451590Srgrimes		 * by a range (two characters separated by "-").
3461590Srgrimes		 */
3471590Srgrimes		if (*pattern == '[') {
3481590Srgrimes			++pattern;
3491590Srgrimes			for (;;) {
3501590Srgrimes				if ((*pattern == ']') || (*pattern == 0))
3511590Srgrimes					return(0);
3521590Srgrimes				if (*pattern == *string)
3531590Srgrimes					break;
3541590Srgrimes				if (pattern[1] == '-') {
3551590Srgrimes					c2 = pattern[2];
3561590Srgrimes					if (c2 == 0)
3571590Srgrimes						return(0);
3581590Srgrimes					if ((*pattern <= *string) &&
3591590Srgrimes					    (c2 >= *string))
3601590Srgrimes						break;
3611590Srgrimes					if ((*pattern >= *string) &&
3621590Srgrimes					    (c2 <= *string))
3631590Srgrimes						break;
3641590Srgrimes					pattern += 2;
3651590Srgrimes				}
3661590Srgrimes				++pattern;
3671590Srgrimes			}
3681590Srgrimes			while ((*pattern != ']') && (*pattern != 0))
3691590Srgrimes				++pattern;
3701590Srgrimes			goto thisCharOK;
3711590Srgrimes		}
3721590Srgrimes		/*
3731590Srgrimes		 * If the next pattern character is '/', just strip off the
3741590Srgrimes		 * '/' so we do exact matching on the character that follows.
3751590Srgrimes		 */
3761590Srgrimes		if (*pattern == '\\') {
3771590Srgrimes			++pattern;
3781590Srgrimes			if (*pattern == 0)
3791590Srgrimes				return(0);
3801590Srgrimes		}
3811590Srgrimes		/*
3821590Srgrimes		 * There's no special character.  Just make sure that the
3831590Srgrimes		 * next characters of each string match.
3841590Srgrimes		 */
3851590Srgrimes		if (*pattern != *string)
3861590Srgrimes			return(0);
3871590SrgrimesthisCharOK:	++pattern;
3881590Srgrimes		++string;
3891590Srgrimes	}
3901590Srgrimes}
3911590Srgrimes
3921590Srgrimes
3931590Srgrimes/*-
3941590Srgrimes *-----------------------------------------------------------------------
3951590Srgrimes * Str_SYSVMatch --
3968874Srgrimes *	Check word against pattern for a match (% is wild),
3978874Srgrimes *
3981590Srgrimes * Results:
3991590Srgrimes *	Returns the beginning position of a match or null. The number
4001590Srgrimes *	of characters matched is returned in len.
4011590Srgrimes *
4021590Srgrimes * Side Effects:
4031590Srgrimes *	None
4041590Srgrimes *
4051590Srgrimes *-----------------------------------------------------------------------
4061590Srgrimes */
4071590Srgrimeschar *
408104696SjmallettStr_SYSVMatch(char *word, char *pattern, int *len)
4091590Srgrimes{
4101590Srgrimes    char *p = pattern;
4111590Srgrimes    char *w = word;
4121590Srgrimes    char *m;
4131590Srgrimes
41496071Sjmallett    if (*w == '\0') {
41596071Sjmallett	/* Zero-length word cannot be matched against */
41696071Sjmallett	*len = 0;
41796071Sjmallett	return NULL;
41896071Sjmallett    }
41996071Sjmallett
4201590Srgrimes    if (*p == '\0') {
4211590Srgrimes	/* Null pattern is the whole string */
4221590Srgrimes	*len = strlen(w);
4231590Srgrimes	return w;
4241590Srgrimes    }
4251590Srgrimes
4261590Srgrimes    if ((m = strchr(p, '%')) != NULL) {
4271590Srgrimes	/* check that the prefix matches */
4281590Srgrimes	for (; p != m && *w && *w == *p; w++, p++)
4291590Srgrimes	     continue;
4301590Srgrimes
4311590Srgrimes	if (p != m)
4321590Srgrimes	    return NULL;	/* No match */
4331590Srgrimes
4341590Srgrimes	if (*++p == '\0') {
4351590Srgrimes	    /* No more pattern, return the rest of the string */
4361590Srgrimes	    *len = strlen(w);
4371590Srgrimes	    return w;
4381590Srgrimes	}
4391590Srgrimes    }
4401590Srgrimes
4411590Srgrimes    m = w;
4421590Srgrimes
4431590Srgrimes    /* Find a matching tail */
4441590Srgrimes    do
4451590Srgrimes	if (strcmp(p, w) == 0) {
4461590Srgrimes	    *len = w - m;
4471590Srgrimes	    return m;
4481590Srgrimes	}
4491590Srgrimes    while (*w++ != '\0');
4508874Srgrimes
4511590Srgrimes    return NULL;
4521590Srgrimes}
4531590Srgrimes
4541590Srgrimes
4551590Srgrimes/*-
4561590Srgrimes *-----------------------------------------------------------------------
4571590Srgrimes * Str_SYSVSubst --
4581590Srgrimes *	Substitute '%' on the pattern with len characters from src.
4591590Srgrimes *	If the pattern does not contain a '%' prepend len characters
4601590Srgrimes *	from src.
4618874Srgrimes *
4621590Srgrimes * Results:
4631590Srgrimes *	None
4641590Srgrimes *
4651590Srgrimes * Side Effects:
4661590Srgrimes *	Places result on buf
4671590Srgrimes *
4681590Srgrimes *-----------------------------------------------------------------------
4691590Srgrimes */
4701590Srgrimesvoid
471104696SjmallettStr_SYSVSubst(Buffer buf, char *pat, char *src, int len)
4721590Srgrimes{
4731590Srgrimes    char *m;
4741590Srgrimes
4751590Srgrimes    if ((m = strchr(pat, '%')) != NULL) {
4761590Srgrimes	/* Copy the prefix */
4771590Srgrimes	Buf_AddBytes(buf, m - pat, (Byte *) pat);
4781590Srgrimes	/* skip the % */
4791590Srgrimes	pat = m + 1;
4801590Srgrimes    }
4811590Srgrimes
4821590Srgrimes    /* Copy the pattern */
4831590Srgrimes    Buf_AddBytes(buf, len, (Byte *) src);
4841590Srgrimes
4851590Srgrimes    /* append the rest */
4861590Srgrimes    Buf_AddBytes(buf, strlen(pat), (Byte *) pat);
4871590Srgrimes}
488