tokenizer.c revision 84201
1/*- 2 * Copyright (c) 1992, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Christos Zoulas of Cornell University. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37#include <sys/cdefs.h> 38__FBSDID("$FreeBSD: head/lib/libedit/tokenizer.c 84201 2001-09-30 21:21:36Z dillon $"); 39#if !defined(lint) && !defined(SCCSID) 40static char sccsid[] = "@(#)tokenizer.c 8.1 (Berkeley) 6/4/93"; 41#endif /* not lint && not SCCSID */ 42 43/* 44 * tokenize.c: Bourne shell like tokenizer 45 */ 46#include "sys.h" 47#include <string.h> 48#include <stdlib.h> 49#include "tokenizer.h" 50 51typedef enum { Q_none, Q_single, Q_double, Q_one, Q_doubleone } quote_t; 52 53#define IFS "\t \n" 54 55#define TOK_KEEP 1 56#define TOK_EAT 2 57 58#define WINCR 20 59#define AINCR 10 60 61#define tok_malloc(a) malloc(a) 62#define tok_free(a) free(a) 63#define tok_realloc(a, b) realloc(a, b) 64#define tok_reallocf(a, b) reallocf(a, b) 65 66 67struct tokenizer { 68 char *ifs; /* In field separator */ 69 int argc, amax; /* Current and maximum number of args */ 70 char **argv; /* Argument list */ 71 char *wptr, *wmax; /* Space and limit on the word buffer */ 72 char *wstart; /* Beginning of next word */ 73 char *wspace; /* Space of word buffer */ 74 quote_t quote; /* Quoting state */ 75 int flags; /* flags; */ 76}; 77 78 79private void tok_finish __P((Tokenizer *)); 80 81 82/* tok_finish(): 83 * Finish a word in the tokenizer. 84 */ 85private void 86tok_finish(tok) 87 Tokenizer *tok; 88{ 89 *tok->wptr = '\0'; 90 if ((tok->flags & TOK_KEEP) || tok->wptr != tok->wstart) { 91 tok->argv[tok->argc++] = tok->wstart; 92 tok->argv[tok->argc] = NULL; 93 tok->wstart = ++tok->wptr; 94 } 95 tok->flags &= ~TOK_KEEP; 96} 97 98 99/* tok_init(): 100 * Initialize the tokenizer 101 */ 102public Tokenizer * 103tok_init(ifs) 104 const char *ifs; 105{ 106 Tokenizer* tok = (Tokenizer*) tok_malloc(sizeof(Tokenizer)); 107 108 tok->ifs = strdup(ifs ? ifs : IFS); 109 tok->argc = 0; 110 tok->amax = AINCR; 111 tok->argv = (char **) tok_malloc(sizeof(char *) * tok->amax); 112 tok->argv[0] = NULL; 113 tok->wspace = (char *) tok_malloc(WINCR); 114 tok->wmax = tok->wspace + WINCR; 115 tok->wstart = tok->wspace; 116 tok->wptr = tok->wspace; 117 tok->flags = 0; 118 tok->quote = Q_none; 119 120 return tok; 121} 122 123 124/* tok_reset(): 125 * Reset the tokenizer 126 */ 127public void 128tok_reset(tok) 129 Tokenizer *tok; 130{ 131 tok->argc = 0; 132 tok->wstart = tok->wspace; 133 tok->wptr = tok->wspace; 134 tok->flags = 0; 135 tok->quote = Q_none; 136} 137 138 139/* tok_end(): 140 * Clean up 141 */ 142public void 143tok_end(tok) 144 Tokenizer *tok; 145{ 146 tok_free((ptr_t) tok->ifs); 147 tok_free((ptr_t) tok->wspace); 148 tok_free((ptr_t) tok->argv); 149 tok_free((ptr_t) tok); 150} 151 152 153 154/* tok_line(): 155 * Bourne shell like tokenizing 156 * Return: 157 * -1: Internal error 158 * 3: Quoted return 159 * 2: Unmatched double quote 160 * 1: Unmatched single quote 161 * 0: Ok 162 */ 163public int 164tok_line(tok, line, argc, argv) 165 Tokenizer *tok; 166 const char* line; 167 int *argc; 168 char ***argv; 169{ 170 const char *ptr; 171 172 while (1) { 173 switch (*(ptr = line++)) { 174 case '\'': 175 tok->flags |= TOK_KEEP; 176 tok->flags &= ~TOK_EAT; 177 switch (tok->quote) { 178 case Q_none: 179 tok->quote = Q_single; /* Enter single quote mode */ 180 break; 181 182 case Q_single: /* Exit single quote mode */ 183 tok->quote = Q_none; 184 break; 185 186 case Q_one: /* Quote this ' */ 187 tok->quote = Q_none; 188 *tok->wptr++ = *ptr; 189 break; 190 191 case Q_double: /* Stay in double quote mode */ 192 *tok->wptr++ = *ptr; 193 break; 194 195 case Q_doubleone: /* Quote this ' */ 196 tok->quote = Q_double; 197 *tok->wptr++ = *ptr; 198 break; 199 200 default: 201 return(-1); 202 } 203 break; 204 205 case '"': 206 tok->flags &= ~TOK_EAT; 207 tok->flags |= TOK_KEEP; 208 switch (tok->quote) { 209 case Q_none: /* Enter double quote mode */ 210 tok->quote = Q_double; 211 break; 212 213 case Q_double: 214 tok->quote = Q_none; /* Exit double quote mode */ 215 break; 216 217 case Q_one: /* Quote this " */ 218 tok->quote = Q_none; 219 *tok->wptr++ = *ptr; 220 break; 221 222 case Q_single: /* Stay in single quote mode */ 223 *tok->wptr++ = *ptr; 224 break; 225 226 case Q_doubleone: /* Quote this " */ 227 tok->quote = Q_double; 228 *tok->wptr++ = *ptr; 229 break; 230 231 default: 232 return(-1); 233 } 234 break; 235 236 case '\\': 237 tok->flags |= TOK_KEEP; 238 tok->flags &= ~TOK_EAT; 239 switch (tok->quote) { 240 case Q_none: /* Quote next character */ 241 tok->quote = Q_one; 242 break; 243 244 case Q_double: 245 tok->quote = Q_doubleone;/* Quote next character */ 246 break; 247 248 case Q_one: 249 *tok->wptr++ = *ptr; 250 tok->quote = Q_none; /* Quote this, restore state */ 251 break; 252 253 case Q_single: /* Stay in single quote mode */ 254 *tok->wptr++ = *ptr; 255 break; 256 257 case Q_doubleone: /* Quote this \ */ 258 tok->quote = Q_double; 259 *tok->wptr++ = *ptr; 260 break; 261 262 default: 263 return(-1); 264 } 265 break; 266 267 case '\n': 268 tok->flags &= ~TOK_EAT; 269 switch (tok->quote) { 270 case Q_none: 271 tok_finish(tok); 272 *argv = tok->argv; 273 *argc = tok->argc; 274 return(0); 275 276 case Q_single: 277 case Q_double: 278 *tok->wptr++ = *ptr; /* Add the return */ 279 break; 280 281 case Q_doubleone: 282 tok->flags |= TOK_EAT; 283 tok->quote = Q_double; /* Back to double, eat the '\n' */ 284 break; 285 286 case Q_one: 287 tok->flags |= TOK_EAT; 288 tok->quote = Q_none; /* No quote, more eat the '\n' */ 289 break; 290 291 default: 292 return(0); 293 } 294 break; 295 296 case '\0': 297 switch (tok->quote) { 298 case Q_none: 299 /* Finish word and return */ 300 if (tok->flags & TOK_EAT) { 301 tok->flags &= ~TOK_EAT; 302 return 3; 303 } 304 tok_finish(tok); 305 *argv = tok->argv; 306 *argc = tok->argc; 307 return(0); 308 309 case Q_single: 310 return(1); 311 312 case Q_double: 313 return(2); 314 315 case Q_doubleone: 316 tok->quote = Q_double; 317 *tok->wptr++ = *ptr; 318 break; 319 320 case Q_one: 321 tok->quote = Q_none; 322 *tok->wptr++ = *ptr; 323 break; 324 325 default: 326 return(-1); 327 } 328 break; 329 330 default: 331 tok->flags &= ~TOK_EAT; 332 switch (tok->quote) { 333 case Q_none: 334 if (strchr(tok->ifs, *ptr) != NULL) 335 tok_finish(tok); 336 else 337 *tok->wptr++ = *ptr; 338 break; 339 340 case Q_single: 341 case Q_double: 342 *tok->wptr++ = *ptr; 343 break; 344 345 346 case Q_doubleone: 347 *tok->wptr++ = '\\'; 348 tok->quote = Q_double; 349 *tok->wptr++ = *ptr; 350 break; 351 352 case Q_one: 353 tok->quote = Q_none; 354 *tok->wptr++ = *ptr; 355 break; 356 357 default: 358 return(-1); 359 360 } 361 break; 362 } 363 364 if (tok->wptr >= tok->wmax - 4) { 365 size_t size = tok->wmax - tok->wspace + WINCR; 366 char *s = (char *) tok_realloc(tok->wspace, size); 367 /*SUPPRESS 22*/ 368 int offs = s - tok->wspace; 369 370 if (offs != 0) { 371 int i; 372 for (i = 0; i < tok->argc; i++) 373 tok->argv[i] = tok->argv[i] + offs; 374 tok->wptr = tok->wptr + offs; 375 tok->wstart = tok->wstart + offs; 376 tok->wmax = s + size; 377 tok->wspace = s; 378 } 379 } 380 381 if (tok->argc >= tok->amax - 4) { 382 tok->amax += AINCR; 383 tok->argv = (char **) tok_reallocf(tok->argv, 384 tok->amax * sizeof(char*)); 385 } 386 387 } 388} 389