11590Srgrimes/*-
21590Srgrimes * Copyright (c) 1988, 1989, 1990, 1993
31590Srgrimes *	The Regents of the University of California.  All rights reserved.
41590Srgrimes * Copyright (c) 1989 by Berkeley Softworks
51590Srgrimes * All rights reserved.
61590Srgrimes *
71590Srgrimes * This code is derived from software contributed to Berkeley by
81590Srgrimes * Adam de Boor.
91590Srgrimes *
101590Srgrimes * Redistribution and use in source and binary forms, with or without
111590Srgrimes * modification, are permitted provided that the following conditions
121590Srgrimes * are met:
131590Srgrimes * 1. Redistributions of source code must retain the above copyright
141590Srgrimes *    notice, this list of conditions and the following disclaimer.
151590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
161590Srgrimes *    notice, this list of conditions and the following disclaimer in the
171590Srgrimes *    documentation and/or other materials provided with the distribution.
181590Srgrimes * 3. All advertising materials mentioning features or use of this software
191590Srgrimes *    must display the following acknowledgement:
201590Srgrimes *	This product includes software developed by the University of
211590Srgrimes *	California, Berkeley and its contributors.
221590Srgrimes * 4. Neither the name of the University nor the names of its contributors
231590Srgrimes *    may be used to endorse or promote products derived from this software
241590Srgrimes *    without specific prior written permission.
251590Srgrimes *
261590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
271590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
281590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
291590Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
301590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
311590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
321590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
331590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
341590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
351590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
361590Srgrimes * SUCH DAMAGE.
3762833Swsanchez *
3862833Swsanchez * @(#)str.c	5.8 (Berkeley) 6/1/90
391590Srgrimes */
401590Srgrimes
4162833Swsanchez#include <sys/cdefs.h>
4294587Sobrien__FBSDID("$FreeBSD$");
431590Srgrimes
44141104Sharti#include <stdlib.h>
45141104Sharti#include <string.h>
461590Srgrimes
47141133Sharti#include "buf.h"
48141104Sharti#include "str.h"
49141104Sharti#include "util.h"
50141104Sharti
51146345Sharti/**
52146345Sharti * Initialize the argument array object.  The array is initially
53146345Sharti * eight positions, and will be expaned as neccessary.  The first
54146345Sharti * position is set to NULL since everything ignores it.  We allocate
55146345Sharti * (size + 1) since we need space for the terminating NULL.  The
56146345Sharti * buffer is set to NULL, since no common buffer is alloated yet.
57146345Sharti */
58146543Shartivoid
59146345ShartiArgArray_Init(ArgArray *aa)
60146345Sharti{
615814Sjkh
62146345Sharti	aa->size = 8;
63146345Sharti	aa->argv = emalloc((aa->size + 1) * sizeof(char *));
64146345Sharti	aa->argc = 0;
65146345Sharti	aa->argv[aa->argc++] = NULL;
66146345Sharti	aa->len = 0;
67146345Sharti	aa->buffer = NULL;
68146345Sharti}
69146345Sharti
70146345Sharti/**
71146345Sharti * Cleanup the memory allocated for in the argument array object.
725814Sjkh */
735814Sjkhvoid
74146345ShartiArgArray_Done(ArgArray *aa)
755814Sjkh{
76138232Sharti
77146345Sharti	if (aa->buffer == NULL) {
78146345Sharti		int	i;
79146345Sharti		/* args are individually allocated */
80146345Sharti		for (i = 0; i < aa->argc; ++i) {
81146345Sharti			if (aa->argv[i]) {
82146345Sharti				free(aa->argv[i]);
83146345Sharti				aa->argv[i] = NULL;
84146345Sharti			}
85146345Sharti		}
86146345Sharti	} else {
87146345Sharti		/* args are part of a single allocation */
88146345Sharti		free(aa->buffer);
89146345Sharti		aa->buffer = NULL;
90146345Sharti	}
91146345Sharti	free(aa->argv);
92146345Sharti	aa->argv = NULL;
93146345Sharti	aa->argc = 0;
94146345Sharti	aa->size = 0;
955814Sjkh}
965814Sjkh
971590Srgrimes/*-
981590Srgrimes * str_concat --
99138547Sharti *	concatenate the two strings, inserting a space or slash between them.
1001590Srgrimes *
1011590Srgrimes * returns --
1021590Srgrimes *	the resulting string in allocated space.
1031590Srgrimes */
1041590Srgrimeschar *
105138547Shartistr_concat(const char *s1, const char *s2, int flags)
1061590Srgrimes{
10794584Sobrien	int len1, len2;
10894584Sobrien	char *result;
1091590Srgrimes
1101590Srgrimes	/* get the length of both strings */
11194638Sobrien	len1 = strlen(s1);
11294638Sobrien	len2 = strlen(s2);
1131590Srgrimes
1141590Srgrimes	/* allocate length plus separator plus EOS */
115138264Sharti	result = emalloc(len1 + len2 + 2);
1161590Srgrimes
1171590Srgrimes	/* copy first string into place */
11894638Sobrien	memcpy(result, s1, len1);
1191590Srgrimes
1201590Srgrimes	/* add separator character */
12194638Sobrien	if (flags & STR_ADDSPACE) {
12294638Sobrien		result[len1] = ' ';
12394638Sobrien		++len1;
12494638Sobrien	} else if (flags & STR_ADDSLASH) {
12594638Sobrien		result[len1] = '/';
12694638Sobrien		++len1;
1271590Srgrimes	}
1281590Srgrimes
12994638Sobrien	/* copy second string plus EOS into place */
13094638Sobrien	memcpy(result + len1, s2, len2 + 1);
1311590Srgrimes
132138232Sharti	return (result);
1331590Srgrimes}
1341590Srgrimes
135146345Sharti/**
136146345Sharti * Fracture a string into an array of words (as delineated by tabs or
137146345Sharti * spaces) taking quotation marks into account.  Leading tabs/spaces
138146345Sharti * are ignored.
1391590Srgrimes */
140146345Shartivoid
141146345Shartibrk_string(ArgArray *aa, const char str[], Boolean expand)
1421590Srgrimes{
143146345Sharti	char	inquote;
144146345Sharti	char	*start;
145146345Sharti	char	*arg;
1461590Srgrimes
1471590Srgrimes	/* skip leading space chars. */
1481590Srgrimes	for (; *str == ' ' || *str == '\t'; ++str)
1491590Srgrimes		continue;
1501590Srgrimes
151146345Sharti	ArgArray_Init(aa);
1521590Srgrimes
153209951Semaste	aa->buffer = estrdup(str);
154146345Sharti
155146345Sharti	arg = aa->buffer;
156146345Sharti	start = arg;
157146345Sharti	inquote = '\0';
158146345Sharti
1591590Srgrimes	/*
1601590Srgrimes	 * copy the string; at the same time, parse backslashes,
1611590Srgrimes	 * quotes and build the argument list.
1621590Srgrimes	 */
163146345Sharti	for (;;) {
164146345Sharti		switch (str[0]) {
1651590Srgrimes		case '"':
1661590Srgrimes		case '\'':
167146345Sharti			if (inquote == '\0') {
168146345Sharti				inquote = str[0];
169146345Sharti				if (expand)
1701590Srgrimes					break;
171146345Sharti				if (start == NULL)
172146345Sharti					start = arg;
173146345Sharti			} else if (inquote == str[0]) {
174124966Sru				inquote = '\0';
175124966Sru				/* Don't miss "" or '' */
176146345Sharti				if (start == NULL)
177146345Sharti					start = arg;
178146345Sharti				if (expand)
179146345Sharti					break;
180146345Sharti			} else {
181146345Sharti				/* other type of quote found */
182146345Sharti				if (start == NULL)
183146345Sharti					start = arg;
184146345Sharti			}
185146345Sharti			*arg++ = str[0];
186124840Sru			break;
1871590Srgrimes		case ' ':
1881590Srgrimes		case '\t':
1895814Sjkh		case '\n':
190146345Sharti			if (inquote) {
191146345Sharti				if (start == NULL)
192146345Sharti					start = arg;
193146345Sharti				*arg++ = str[0];
1941590Srgrimes				break;
195146345Sharti			}
196146345Sharti			if (start == NULL)
197146345Sharti				break;
1981590Srgrimes			/* FALLTHROUGH */
1991590Srgrimes		case '\0':
2001590Srgrimes			/*
2011590Srgrimes			 * end of a token -- make sure there's enough argv
2021590Srgrimes			 * space and save off a pointer.
2031590Srgrimes			 */
204146345Sharti			if (aa->argc == aa->size) {
205146345Sharti				aa->size *= 2;		/* ramp up fast */
206146345Sharti				aa->argv = erealloc(aa->argv,
207146345Sharti				    (aa->size + 1) * sizeof(char *));
208146345Sharti			}
2095814Sjkh
210146345Sharti			*arg++ = '\0';
211146345Sharti			if (start == NULL) {
212146345Sharti				aa->argv[aa->argc] = start;
213146345Sharti				return;
2141590Srgrimes			}
215146345Sharti			if (str[0] == '\n' || str[0] == '\0') {
216146345Sharti				aa->argv[aa->argc++] = start;
217146345Sharti				aa->argv[aa->argc] = NULL;
218146345Sharti				return;
219146345Sharti			} else {
220146345Sharti				aa->argv[aa->argc++] = start;
221146345Sharti				start = NULL;
2225814Sjkh				break;
2235814Sjkh			}
224146345Sharti		case '\\':
225146345Sharti			if (start == NULL)
226146345Sharti				start = arg;
227146345Sharti			if (expand) {
228146345Sharti				switch (str[1]) {
229146345Sharti				case '\0':
230146345Sharti				case '\n':
231146345Sharti					/* hmmm; fix it up as best we can */
232146345Sharti					*arg++ = '\\';
233146345Sharti					break;
234146345Sharti				case 'b':
235146345Sharti					*arg++ = '\b';
236146345Sharti					++str;
237146345Sharti					break;
238146345Sharti				case 'f':
239146345Sharti					*arg++ = '\f';
240146345Sharti					++str;
241146345Sharti					break;
242146345Sharti				case 'n':
243146345Sharti					*arg++ = '\n';
244146345Sharti					++str;
245146345Sharti					break;
246146345Sharti				case 'r':
247146345Sharti					*arg++ = '\r';
248146345Sharti					++str;
249146345Sharti					break;
250146345Sharti				case 't':
251146345Sharti					*arg++ = '\t';
252146345Sharti					++str;
253146345Sharti					break;
254146345Sharti				default:
255146345Sharti					*arg++ = str[1];
256146345Sharti					++str;
257146345Sharti					break;
258146345Sharti				}
259146345Sharti			} else {
260146345Sharti				*arg++ = str[0];
261163171Sru				if (str[1] != '\0') {
262163171Sru					++str;
263163171Sru					*arg++ = str[0];
264163171Sru				}
2651590Srgrimes			}
2661590Srgrimes			break;
267104108Sjmallett		default:
268146345Sharti			if (start == NULL)
269146345Sharti				start = arg;
270146345Sharti			*arg++ = str[0];
271104108Sjmallett			break;
2721590Srgrimes		}
273146345Sharti		++str;
2741590Srgrimes	}
2751590Srgrimes}
2761590Srgrimes
2771590Srgrimes/*
278140870Sharti * Quote a string for appending it to MAKEFLAGS. According to Posix the
279140870Sharti * kind of quoting here is implementation-defined. This quoting must ensure
280140870Sharti * that the parsing of MAKEFLAGS's contents in a sub-shell yields the same
281140870Sharti * options, option arguments and macro definitions as in the calling make.
282140870Sharti * We simply quote all blanks, which according to Posix are space and tab
283140870Sharti * in the POSIX locale. Don't use isblank because in that case makes with
284140870Sharti * different locale settings could not communicate. We must also quote
285140870Sharti * backslashes obviously.
286140870Sharti */
287140870Shartichar *
288140870ShartiMAKEFLAGS_quote(const char *str)
289140870Sharti{
290140870Sharti	char *ret, *q;
291140870Sharti	const char *p;
292140870Sharti
293140870Sharti	/* assume worst case - everything has to be quoted */
294140870Sharti	ret = emalloc(strlen(str) * 2 + 1);
295140870Sharti
296140870Sharti	p = str;
297140870Sharti	q = ret;
298140870Sharti	while (*p != '\0') {
299140870Sharti		switch (*p) {
300140870Sharti
301140870Sharti		  case ' ':
302140870Sharti		  case '\t':
303140870Sharti			*q++ = '\\';
304140870Sharti			break;
305140870Sharti
306140870Sharti		  default:
307140870Sharti			break;
308140870Sharti		}
309140870Sharti		*q++ = *p++;
310140870Sharti	}
311140870Sharti	*q++ = '\0';
312140870Sharti	return (ret);
313140870Sharti}
314140870Sharti
315146345Shartivoid
316146345ShartiMAKEFLAGS_break(ArgArray *aa, const char str[])
317140870Sharti{
318146345Sharti	char	*arg;
319146345Sharti	char	*start;
320140870Sharti
321146345Sharti	ArgArray_Init(aa);
322140870Sharti
323146345Sharti	aa->buffer = strdup(str);
324146345Sharti
325146345Sharti	arg = aa->buffer;
326140870Sharti	start = NULL;
327140870Sharti
328146345Sharti	for (;;) {
329146345Sharti		switch (str[0]) {
330146345Sharti		case ' ':
331146345Sharti		case '\t':
332140870Sharti			/* word separator */
333140870Sharti			if (start == NULL) {
334140870Sharti				/* not in a word */
335140870Sharti				str++;
336140870Sharti				continue;
337140870Sharti			}
338140870Sharti			/* FALLTHRU */
339146345Sharti		case '\0':
340146345Sharti			if (aa->argc == aa->size) {
341146345Sharti				aa->size *= 2;
342146345Sharti				aa->argv = erealloc(aa->argv,
343146345Sharti 				    (aa->size + 1) * sizeof(char *));
344146345Sharti			}
345140870Sharti
346146345Sharti			*arg++ = '\0';
347146345Sharti			if (start == NULL) {
348146345Sharti				aa->argv[aa->argc] = start;
349146345Sharti				return;
350140870Sharti			}
351146345Sharti			if (str[0] == '\0') {
352146345Sharti				aa->argv[aa->argc++] = start;
353146345Sharti				aa->argv[aa->argc] = NULL;
354146345Sharti				return;
355146345Sharti			} else {
356146345Sharti				aa->argv[aa->argc++] = start;
357146345Sharti				start = NULL;
358146345Sharti				str++;
359146345Sharti				continue;
360146345Sharti			}
361140870Sharti
362146345Sharti		case '\\':
363140870Sharti			if (str[1] == ' ' || str[1] == '\t')
364140870Sharti				str++;
365140870Sharti			break;
366140870Sharti
367146345Sharti		default:
368140870Sharti			break;
369140870Sharti		}
370140870Sharti		if (start == NULL)
371146345Sharti			start = arg;
372146345Sharti		*arg++ = *str++;
373140870Sharti	}
374140870Sharti}
375140870Sharti
376140870Sharti/*
3771590Srgrimes * Str_Match --
3788874Srgrimes *
3791590Srgrimes * See if a particular string matches a particular pattern.
3808874Srgrimes *
3811590Srgrimes * Results: Non-zero is returned if string matches pattern, 0 otherwise. The
3821590Srgrimes * matching operation permits the following special characters in the
3831590Srgrimes * pattern: *?\[] (see the man page for details on what these mean).
3848874Srgrimes *
3851590Srgrimes * Side effects: None.
3861590Srgrimes */
3871590Srgrimesint
388106106SjmallettStr_Match(const char *string, const char *pattern)
3891590Srgrimes{
3901590Srgrimes	char c2;
3911590Srgrimes
3921590Srgrimes	for (;;) {
3931590Srgrimes		/*
3941590Srgrimes		 * See if we're at the end of both the pattern and the
3951590Srgrimes		 * string. If, we succeeded.  If we're at the end of the
3961590Srgrimes		 * pattern but not at the end of the string, we failed.
3971590Srgrimes		 */
3981590Srgrimes		if (*pattern == 0)
399138232Sharti			return (!*string);
4001590Srgrimes		if (*string == 0 && *pattern != '*')
401138232Sharti			return (0);
4021590Srgrimes		/*
4031590Srgrimes		 * Check for a "*" as the next pattern character.  It matches
4041590Srgrimes		 * any substring.  We handle this by calling ourselves
4051590Srgrimes		 * recursively for each postfix of string, until either we
4061590Srgrimes		 * match or we reach the end of the string.
4071590Srgrimes		 */
4081590Srgrimes		if (*pattern == '*') {
4091590Srgrimes			pattern += 1;
4101590Srgrimes			if (*pattern == 0)
411138232Sharti				return (1);
4121590Srgrimes			while (*string != 0) {
4131590Srgrimes				if (Str_Match(string, pattern))
414138232Sharti					return (1);
4151590Srgrimes				++string;
4161590Srgrimes			}
417138232Sharti			return (0);
4181590Srgrimes		}
4191590Srgrimes		/*
4201590Srgrimes		 * Check for a "?" as the next pattern character.  It matches
4211590Srgrimes		 * any single character.
4221590Srgrimes		 */
4231590Srgrimes		if (*pattern == '?')
4241590Srgrimes			goto thisCharOK;
4251590Srgrimes		/*
4261590Srgrimes		 * Check for a "[" as the next pattern character.  It is
4271590Srgrimes		 * followed by a list of characters that are acceptable, or
4281590Srgrimes		 * by a range (two characters separated by "-").
4291590Srgrimes		 */
4301590Srgrimes		if (*pattern == '[') {
4311590Srgrimes			++pattern;
4321590Srgrimes			for (;;) {
4331590Srgrimes				if ((*pattern == ']') || (*pattern == 0))
434138232Sharti					return (0);
4351590Srgrimes				if (*pattern == *string)
4361590Srgrimes					break;
4371590Srgrimes				if (pattern[1] == '-') {
4381590Srgrimes					c2 = pattern[2];
4391590Srgrimes					if (c2 == 0)
440138232Sharti						return (0);
4411590Srgrimes					if ((*pattern <= *string) &&
4421590Srgrimes					    (c2 >= *string))
4431590Srgrimes						break;
4441590Srgrimes					if ((*pattern >= *string) &&
4451590Srgrimes					    (c2 <= *string))
4461590Srgrimes						break;
4471590Srgrimes					pattern += 2;
4481590Srgrimes				}
4491590Srgrimes				++pattern;
4501590Srgrimes			}
4511590Srgrimes			while ((*pattern != ']') && (*pattern != 0))
4521590Srgrimes				++pattern;
4531590Srgrimes			goto thisCharOK;
4541590Srgrimes		}
4551590Srgrimes		/*
4561590Srgrimes		 * If the next pattern character is '/', just strip off the
4571590Srgrimes		 * '/' so we do exact matching on the character that follows.
4581590Srgrimes		 */
4591590Srgrimes		if (*pattern == '\\') {
4601590Srgrimes			++pattern;
4611590Srgrimes			if (*pattern == 0)
462138232Sharti				return (0);
4631590Srgrimes		}
4641590Srgrimes		/*
4651590Srgrimes		 * There's no special character.  Just make sure that the
4661590Srgrimes		 * next characters of each string match.
4671590Srgrimes		 */
4681590Srgrimes		if (*pattern != *string)
469138232Sharti			return (0);
4701590SrgrimesthisCharOK:	++pattern;
4711590Srgrimes		++string;
4721590Srgrimes	}
4731590Srgrimes}
4741590Srgrimes
4751590Srgrimes
476144479Sharti/**
477144479Sharti * Str_SYSVMatch
4788874Srgrimes *	Check word against pattern for a match (% is wild),
4798874Srgrimes *
4801590Srgrimes * Results:
4811590Srgrimes *	Returns the beginning position of a match or null. The number
4821590Srgrimes *	of characters matched is returned in len.
4831590Srgrimes */
484106106Sjmallettconst char *
485106106SjmallettStr_SYSVMatch(const char *word, const char *pattern, int *len)
4861590Srgrimes{
487144479Sharti	const char *m, *p, *w;
4881590Srgrimes
489144479Sharti	p = pattern;
490144479Sharti	w = word;
491106106Sjmallett
492144479Sharti	if (*w == '\0') {
493144479Sharti		/* Zero-length word cannot be matched against */
494144479Sharti		*len = 0;
495144479Sharti		return (NULL);
496144479Sharti	}
49796071Sjmallett
498144479Sharti	if (*p == '\0') {
499144479Sharti		/* Null pattern is the whole string */
500144479Sharti		*len = strlen(w);
501144479Sharti		return (w);
502144479Sharti	}
5031590Srgrimes
504144479Sharti	if ((m = strchr(p, '%')) != NULL) {
505144479Sharti		/* check that the prefix matches */
506144479Sharti		for (; p != m && *w && *w == *p; w++, p++)
507144479Sharti			continue;
5081590Srgrimes
509144479Sharti		if (p != m)
510144479Sharti			return (NULL);	/* No match */
5111590Srgrimes
512144479Sharti		if (*++p == '\0') {
513144479Sharti			/* No more pattern, return the rest of the string */
514144479Sharti			*len = strlen(w);
515144479Sharti			return (w);
516144479Sharti		}
5171590Srgrimes	}
5181590Srgrimes
519144479Sharti	m = w;
5201590Srgrimes
521144479Sharti	/* Find a matching tail */
522144479Sharti	do
523144479Sharti		if (strcmp(p, w) == 0) {
524144479Sharti			*len = w - m;
525144479Sharti			return (m);
526144479Sharti		}
527144479Sharti	while (*w++ != '\0');
5288874Srgrimes
529144479Sharti	return (NULL);
5301590Srgrimes}
5311590Srgrimes
5321590Srgrimes
533144479Sharti/**
534144479Sharti * Str_SYSVSubst
5351590Srgrimes *	Substitute '%' on the pattern with len characters from src.
5361590Srgrimes *	If the pattern does not contain a '%' prepend len characters
5371590Srgrimes *	from src.
5388874Srgrimes *
5391590Srgrimes * Side Effects:
5401590Srgrimes *	Places result on buf
5411590Srgrimes */
5421590Srgrimesvoid
543141133ShartiStr_SYSVSubst(Buffer *buf, const char *pat, const char *src, int len)
5441590Srgrimes{
545144479Sharti	const char *m;
5461590Srgrimes
547144479Sharti	if ((m = strchr(pat, '%')) != NULL) {
548144479Sharti		/* Copy the prefix */
549144479Sharti		Buf_AppendRange(buf, pat, m);
550144479Sharti		/* skip the % */
551144479Sharti		pat = m + 1;
552144479Sharti	}
5531590Srgrimes
554144479Sharti	/* Copy the pattern */
555144479Sharti	Buf_AddBytes(buf, len, (const Byte *)src);
5561590Srgrimes
557144479Sharti	/* append the rest */
558144479Sharti	Buf_Append(buf, pat);
5591590Srgrimes}
560