1/*	$NetBSD: str.c,v 1.34 2012/03/03 23:16:47 dholland Exp $	*/
2
3/*-
4 * Copyright (c) 1988, 1989, 1990, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Adam de Boor.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35/*-
36 * Copyright (c) 1989 by Berkeley Softworks
37 * All rights reserved.
38 *
39 * This code is derived from software contributed to Berkeley by
40 * Adam de Boor.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 *    notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 *    notice, this list of conditions and the following disclaimer in the
49 *    documentation and/or other materials provided with the distribution.
50 * 3. All advertising materials mentioning features or use of this software
51 *    must display the following acknowledgement:
52 *	This product includes software developed by the University of
53 *	California, Berkeley and its contributors.
54 * 4. Neither the name of the University nor the names of its contributors
55 *    may be used to endorse or promote products derived from this software
56 *    without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 */
70
71#ifndef MAKE_NATIVE
72static char rcsid[] = "$NetBSD: str.c,v 1.34 2012/03/03 23:16:47 dholland Exp $";
73#else
74#include <sys/cdefs.h>
75#ifndef lint
76#if 0
77static char     sccsid[] = "@(#)str.c	5.8 (Berkeley) 6/1/90";
78#else
79__RCSID("$NetBSD: str.c,v 1.34 2012/03/03 23:16:47 dholland Exp $");
80#endif
81#endif				/* not lint */
82#endif
83
84#include "make.h"
85
86/*-
87 * str_concat --
88 *	concatenate the two strings, inserting a space or slash between them,
89 *	freeing them if requested.
90 *
91 * returns --
92 *	the resulting string in allocated space.
93 */
94char *
95str_concat(const char *s1, const char *s2, int flags)
96{
97	int len1, len2;
98	char *result;
99
100	/* get the length of both strings */
101	len1 = strlen(s1);
102	len2 = strlen(s2);
103
104	/* allocate length plus separator plus EOS */
105	result = bmake_malloc((u_int)(len1 + len2 + 2));
106
107	/* copy first string into place */
108	memcpy(result, s1, len1);
109
110	/* add separator character */
111	if (flags & STR_ADDSPACE) {
112		result[len1] = ' ';
113		++len1;
114	} else if (flags & STR_ADDSLASH) {
115		result[len1] = '/';
116		++len1;
117	}
118
119	/* copy second string plus EOS into place */
120	memcpy(result + len1, s2, len2 + 1);
121
122	return(result);
123}
124
125/*-
126 * brk_string --
127 *	Fracture a string into an array of words (as delineated by tabs or
128 *	spaces) taking quotation marks into account.  Leading tabs/spaces
129 *	are ignored.
130 *
131 * If expand is TRUE, quotes are removed and escape sequences
132 *  such as \r, \t, etc... are expanded.
133 *
134 * returns --
135 *	Pointer to the array of pointers to the words.
136 *      Memory containing the actual words in *buffer.
137 *		Both of these must be free'd by the caller.
138 *      Number of words in *store_argc.
139 */
140char **
141brk_string(const char *str, int *store_argc, Boolean expand, char **buffer)
142{
143	int argc, ch;
144	char inquote, *start, *t;
145	const char *p;
146	int len;
147	int argmax = 50, curlen = 0;
148    	char **argv = bmake_malloc((argmax + 1) * sizeof(char *));
149
150	/* skip leading space chars. */
151	for (; *str == ' ' || *str == '\t'; ++str)
152		continue;
153
154	/* allocate room for a copy of the string */
155	if ((len = strlen(str) + 1) > curlen)
156		*buffer = bmake_malloc(curlen = len);
157
158	/*
159	 * copy the string; at the same time, parse backslashes,
160	 * quotes and build the argument list.
161	 */
162	argc = 0;
163	inquote = '\0';
164	for (p = str, start = t = *buffer;; ++p) {
165		switch(ch = *p) {
166		case '"':
167		case '\'':
168			if (inquote) {
169				if (inquote == ch)
170					inquote = '\0';
171				else
172					break;
173			}
174			else {
175				inquote = (char) ch;
176				/* Don't miss "" or '' */
177				if (start == NULL && p[1] == inquote) {
178					if (!expand) {
179						start = t;
180						*t++ = ch;
181					} else
182						start = t + 1;
183					p++;
184					inquote = '\0';
185					break;
186				}
187			}
188			if (!expand) {
189				if (!start)
190					start = t;
191				*t++ = ch;
192			}
193			continue;
194		case ' ':
195		case '\t':
196		case '\n':
197			if (inquote)
198				break;
199			if (!start)
200				continue;
201			/* FALLTHROUGH */
202		case '\0':
203			/*
204			 * end of a token -- make sure there's enough argv
205			 * space and save off a pointer.
206			 */
207			if (!start)
208			    goto done;
209
210			*t++ = '\0';
211			if (argc == argmax) {
212				argmax *= 2;		/* ramp up fast */
213				argv = (char **)bmake_realloc(argv,
214				    (argmax + 1) * sizeof(char *));
215			}
216			argv[argc++] = start;
217			start = NULL;
218			if (ch == '\n' || ch == '\0') {
219				if (expand && inquote) {
220					free(argv);
221					free(*buffer);
222					*buffer = NULL;
223					return NULL;
224				}
225				goto done;
226			}
227			continue;
228		case '\\':
229			if (!expand) {
230				if (!start)
231					start = t;
232				*t++ = '\\';
233				if (*(p+1) == '\0') /* catch '\' at end of line */
234					continue;
235				ch = *++p;
236				break;
237			}
238
239			switch (ch = *++p) {
240			case '\0':
241			case '\n':
242				/* hmmm; fix it up as best we can */
243				ch = '\\';
244				--p;
245				break;
246			case 'b':
247				ch = '\b';
248				break;
249			case 'f':
250				ch = '\f';
251				break;
252			case 'n':
253				ch = '\n';
254				break;
255			case 'r':
256				ch = '\r';
257				break;
258			case 't':
259				ch = '\t';
260				break;
261			}
262			break;
263		}
264		if (!start)
265			start = t;
266		*t++ = (char) ch;
267	}
268done:	argv[argc] = NULL;
269	*store_argc = argc;
270	return(argv);
271}
272
273/*
274 * Str_FindSubstring -- See if a string contains a particular substring.
275 *
276 * Input:
277 *	string		String to search.
278 *	substring	Substring to find in string.
279 *
280 * Results: If string contains substring, the return value is the location of
281 * the first matching instance of substring in string.  If string doesn't
282 * contain substring, the return value is NULL.  Matching is done on an exact
283 * character-for-character basis with no wildcards or special characters.
284 *
285 * Side effects: None.
286 */
287char *
288Str_FindSubstring(const char *string, const char *substring)
289{
290	const char *a, *b;
291
292	/*
293	 * First scan quickly through the two strings looking for a single-
294	 * character match.  When it's found, then compare the rest of the
295	 * substring.
296	 */
297
298	for (b = substring; *string != 0; string += 1) {
299		if (*string != *b)
300			continue;
301		a = string;
302		for (;;) {
303			if (*b == 0)
304				return UNCONST(string);
305			if (*a++ != *b++)
306				break;
307		}
308		b = substring;
309	}
310	return NULL;
311}
312
313/*
314 * Str_Match --
315 *
316 * See if a particular string matches a particular pattern.
317 *
318 * Results: Non-zero is returned if string matches pattern, 0 otherwise. The
319 * matching operation permits the following special characters in the
320 * pattern: *?\[] (see the man page for details on what these mean).
321 *
322 * XXX this function does not detect or report malformed patterns.
323 *
324 * Side effects: None.
325 */
326int
327Str_Match(const char *string, const char *pattern)
328{
329	char c2;
330
331	for (;;) {
332		/*
333		 * See if we're at the end of both the pattern and the
334		 * string. If, we succeeded.  If we're at the end of the
335		 * pattern but not at the end of the string, we failed.
336		 */
337		if (*pattern == 0)
338			return(!*string);
339		if (*string == 0 && *pattern != '*')
340			return(0);
341		/*
342		 * Check for a "*" as the next pattern character.  It matches
343		 * any substring.  We handle this by calling ourselves
344		 * recursively for each postfix of string, until either we
345		 * match or we reach the end of the string.
346		 */
347		if (*pattern == '*') {
348			pattern += 1;
349			if (*pattern == 0)
350				return(1);
351			while (*string != 0) {
352				if (Str_Match(string, pattern))
353					return(1);
354				++string;
355			}
356			return(0);
357		}
358		/*
359		 * Check for a "?" as the next pattern character.  It matches
360		 * any single character.
361		 */
362		if (*pattern == '?')
363			goto thisCharOK;
364		/*
365		 * Check for a "[" as the next pattern character.  It is
366		 * followed by a list of characters that are acceptable, or
367		 * by a range (two characters separated by "-").
368		 */
369		if (*pattern == '[') {
370			++pattern;
371			for (;;) {
372				if ((*pattern == ']') || (*pattern == 0))
373					return(0);
374				if (*pattern == *string)
375					break;
376				if (pattern[1] == '-') {
377					c2 = pattern[2];
378					if (c2 == 0)
379						return(0);
380					if ((*pattern <= *string) &&
381					    (c2 >= *string))
382						break;
383					if ((*pattern >= *string) &&
384					    (c2 <= *string))
385						break;
386					pattern += 2;
387				}
388				++pattern;
389			}
390			while ((*pattern != ']') && (*pattern != 0))
391				++pattern;
392			goto thisCharOK;
393		}
394		/*
395		 * If the next pattern character is '/', just strip off the
396		 * '/' so we do exact matching on the character that follows.
397		 */
398		if (*pattern == '\\') {
399			++pattern;
400			if (*pattern == 0)
401				return(0);
402		}
403		/*
404		 * There's no special character.  Just make sure that the
405		 * next characters of each string match.
406		 */
407		if (*pattern != *string)
408			return(0);
409thisCharOK:	++pattern;
410		++string;
411	}
412}
413
414
415/*-
416 *-----------------------------------------------------------------------
417 * Str_SYSVMatch --
418 *	Check word against pattern for a match (% is wild),
419 *
420 * Input:
421 *	word		Word to examine
422 *	pattern		Pattern to examine against
423 *	len		Number of characters to substitute
424 *
425 * Results:
426 *	Returns the beginning position of a match or null. The number
427 *	of characters matched is returned in len.
428 *
429 * Side Effects:
430 *	None
431 *
432 *-----------------------------------------------------------------------
433 */
434char *
435Str_SYSVMatch(const char *word, const char *pattern, int *len)
436{
437    const char *p = pattern;
438    const char *w = word;
439    const char *m;
440
441    if (*p == '\0') {
442	/* Null pattern is the whole string */
443	*len = strlen(w);
444	return UNCONST(w);
445    }
446
447    if ((m = strchr(p, '%')) != NULL) {
448	/* check that the prefix matches */
449	for (; p != m && *w && *w == *p; w++, p++)
450	     continue;
451
452	if (p != m)
453	    return NULL;	/* No match */
454
455	if (*++p == '\0') {
456	    /* No more pattern, return the rest of the string */
457	    *len = strlen(w);
458	    return UNCONST(w);
459	}
460    }
461
462    m = w;
463
464    /* Find a matching tail */
465    do
466	if (strcmp(p, w) == 0) {
467	    *len = w - m;
468	    return UNCONST(m);
469	}
470    while (*w++ != '\0');
471
472    return NULL;
473}
474
475
476/*-
477 *-----------------------------------------------------------------------
478 * Str_SYSVSubst --
479 *	Substitute '%' on the pattern with len characters from src.
480 *	If the pattern does not contain a '%' prepend len characters
481 *	from src.
482 *
483 * Results:
484 *	None
485 *
486 * Side Effects:
487 *	Places result on buf
488 *
489 *-----------------------------------------------------------------------
490 */
491void
492Str_SYSVSubst(Buffer *buf, char *pat, char *src, int len)
493{
494    char *m;
495
496    if ((m = strchr(pat, '%')) != NULL) {
497	/* Copy the prefix */
498	Buf_AddBytes(buf, m - pat, pat);
499	/* skip the % */
500	pat = m + 1;
501    }
502
503    /* Copy the pattern */
504    Buf_AddBytes(buf, len, src);
505
506    /* append the rest */
507    Buf_AddBytes(buf, strlen(pat), pat);
508}
509