1/*- 2 * Copyright (c) 1992, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Christos Zoulas of Cornell University. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * $NetBSD: tokenizer.c,v 1.15 2009/02/15 21:55:23 christos Exp $ 33 */ 34 35#if !defined(lint) && !defined(SCCSID) 36static char sccsid[] = "@(#)tokenizer.c 8.1 (Berkeley) 6/4/93"; 37#endif /* not lint && not SCCSID */ 38#include <sys/cdefs.h> 39__FBSDID("$FreeBSD$"); 40 41/* 42 * tokenize.c: Bourne shell like tokenizer 43 */ 44#include "sys.h" 45#include <string.h> 46#include <stdlib.h> 47#include "histedit.h" 48 49typedef enum { 50 Q_none, Q_single, Q_double, Q_one, Q_doubleone 51} quote_t; 52 53#define IFS "\t \n" 54 55#define TOK_KEEP 1 56#define TOK_EAT 2 57 58#define WINCR 20 59#define AINCR 10 60 61#define tok_strdup(a) strdup(a) 62#define tok_malloc(a) malloc(a) 63#define tok_free(a) free(a) 64#define tok_realloc(a, b) realloc(a, b) 65 66 67struct tokenizer { 68 char *ifs; /* In field separator */ 69 int argc, amax; /* Current and maximum number of args */ 70 char **argv; /* Argument list */ 71 char *wptr, *wmax; /* Space and limit on the word buffer */ 72 char *wstart; /* Beginning of next word */ 73 char *wspace; /* Space of word buffer */ 74 quote_t quote; /* Quoting state */ 75 int flags; /* flags; */ 76}; 77 78 79private void tok_finish(Tokenizer *); 80 81 82/* tok_finish(): 83 * Finish a word in the tokenizer. 84 */ 85private void 86tok_finish(Tokenizer *tok) 87{ 88 89 *tok->wptr = '\0'; 90 if ((tok->flags & TOK_KEEP) || tok->wptr != tok->wstart) { 91 tok->argv[tok->argc++] = tok->wstart; 92 tok->argv[tok->argc] = NULL; 93 tok->wstart = ++tok->wptr; 94 } 95 tok->flags &= ~TOK_KEEP; 96} 97 98 99/* tok_init(): 100 * Initialize the tokenizer 101 */ 102public Tokenizer * 103tok_init(const char *ifs) 104{ 105 Tokenizer *tok = (Tokenizer *) tok_malloc(sizeof(Tokenizer)); 106 107 if (tok == NULL) 108 return NULL; 109 tok->ifs = tok_strdup(ifs ? ifs : IFS); 110 if (tok->ifs == NULL) { 111 tok_free((ptr_t)tok); 112 return NULL; 113 } 114 tok->argc = 0; 115 tok->amax = AINCR; 116 tok->argv = (char **) tok_malloc(sizeof(char *) * tok->amax); 117 if (tok->argv == NULL) { 118 tok_free((ptr_t)tok->ifs); 119 tok_free((ptr_t)tok); 120 return NULL; 121 } 122 tok->argv[0] = NULL; 123 tok->wspace = (char *) tok_malloc(WINCR); 124 if (tok->wspace == NULL) { 125 tok_free((ptr_t)tok->argv); 126 tok_free((ptr_t)tok->ifs); 127 tok_free((ptr_t)tok); 128 return NULL; 129 } 130 tok->wmax = tok->wspace + WINCR; 131 tok->wstart = tok->wspace; 132 tok->wptr = tok->wspace; 133 tok->flags = 0; 134 tok->quote = Q_none; 135 136 return (tok); 137} 138 139 140/* tok_reset(): 141 * Reset the tokenizer 142 */ 143public void 144tok_reset(Tokenizer *tok) 145{ 146 147 tok->argc = 0; 148 tok->wstart = tok->wspace; 149 tok->wptr = tok->wspace; 150 tok->flags = 0; 151 tok->quote = Q_none; 152} 153 154 155/* tok_end(): 156 * Clean up 157 */ 158public void 159tok_end(Tokenizer *tok) 160{ 161 162 tok_free((ptr_t) tok->ifs); 163 tok_free((ptr_t) tok->wspace); 164 tok_free((ptr_t) tok->argv); 165 tok_free((ptr_t) tok); 166} 167 168 169 170/* tok_line(): 171 * Bourne shell (sh(1)) like tokenizing 172 * Arguments: 173 * tok current tokenizer state (setup with tok_init()) 174 * line line to parse 175 * Returns: 176 * -1 Internal error 177 * 3 Quoted return 178 * 2 Unmatched double quote 179 * 1 Unmatched single quote 180 * 0 Ok 181 * Modifies (if return value is 0): 182 * argc number of arguments 183 * argv argument array 184 * cursorc if !NULL, argv element containing cursor 185 * cursorv if !NULL, offset in argv[cursorc] of cursor 186 */ 187public int 188tok_line(Tokenizer *tok, const LineInfo *line, 189 int *argc, const char ***argv, int *cursorc, int *cursoro) 190{ 191 const char *ptr; 192 int cc, co; 193 194 cc = co = -1; 195 ptr = line->buffer; 196 for (ptr = line->buffer; ;ptr++) { 197 if (ptr >= line->lastchar) 198 ptr = ""; 199 if (ptr == line->cursor) { 200 cc = tok->argc; 201 co = (int)(tok->wptr - tok->wstart); 202 } 203 switch (*ptr) { 204 case '\'': 205 tok->flags |= TOK_KEEP; 206 tok->flags &= ~TOK_EAT; 207 switch (tok->quote) { 208 case Q_none: 209 tok->quote = Q_single; /* Enter single quote 210 * mode */ 211 break; 212 213 case Q_single: /* Exit single quote mode */ 214 tok->quote = Q_none; 215 break; 216 217 case Q_one: /* Quote this ' */ 218 tok->quote = Q_none; 219 *tok->wptr++ = *ptr; 220 break; 221 222 case Q_double: /* Stay in double quote mode */ 223 *tok->wptr++ = *ptr; 224 break; 225 226 case Q_doubleone: /* Quote this ' */ 227 tok->quote = Q_double; 228 *tok->wptr++ = *ptr; 229 break; 230 231 default: 232 return (-1); 233 } 234 break; 235 236 case '"': 237 tok->flags &= ~TOK_EAT; 238 tok->flags |= TOK_KEEP; 239 switch (tok->quote) { 240 case Q_none: /* Enter double quote mode */ 241 tok->quote = Q_double; 242 break; 243 244 case Q_double: /* Exit double quote mode */ 245 tok->quote = Q_none; 246 break; 247 248 case Q_one: /* Quote this " */ 249 tok->quote = Q_none; 250 *tok->wptr++ = *ptr; 251 break; 252 253 case Q_single: /* Stay in single quote mode */ 254 *tok->wptr++ = *ptr; 255 break; 256 257 case Q_doubleone: /* Quote this " */ 258 tok->quote = Q_double; 259 *tok->wptr++ = *ptr; 260 break; 261 262 default: 263 return (-1); 264 } 265 break; 266 267 case '\\': 268 tok->flags |= TOK_KEEP; 269 tok->flags &= ~TOK_EAT; 270 switch (tok->quote) { 271 case Q_none: /* Quote next character */ 272 tok->quote = Q_one; 273 break; 274 275 case Q_double: /* Quote next character */ 276 tok->quote = Q_doubleone; 277 break; 278 279 case Q_one: /* Quote this, restore state */ 280 *tok->wptr++ = *ptr; 281 tok->quote = Q_none; 282 break; 283 284 case Q_single: /* Stay in single quote mode */ 285 *tok->wptr++ = *ptr; 286 break; 287 288 case Q_doubleone: /* Quote this \ */ 289 tok->quote = Q_double; 290 *tok->wptr++ = *ptr; 291 break; 292 293 default: 294 return (-1); 295 } 296 break; 297 298 case '\n': 299 tok->flags &= ~TOK_EAT; 300 switch (tok->quote) { 301 case Q_none: 302 goto tok_line_outok; 303 304 case Q_single: 305 case Q_double: 306 *tok->wptr++ = *ptr; /* Add the return */ 307 break; 308 309 case Q_doubleone: /* Back to double, eat the '\n' */ 310 tok->flags |= TOK_EAT; 311 tok->quote = Q_double; 312 break; 313 314 case Q_one: /* No quote, more eat the '\n' */ 315 tok->flags |= TOK_EAT; 316 tok->quote = Q_none; 317 break; 318 319 default: 320 return (0); 321 } 322 break; 323 324 case '\0': 325 switch (tok->quote) { 326 case Q_none: 327 /* Finish word and return */ 328 if (tok->flags & TOK_EAT) { 329 tok->flags &= ~TOK_EAT; 330 return (3); 331 } 332 goto tok_line_outok; 333 334 case Q_single: 335 return (1); 336 337 case Q_double: 338 return (2); 339 340 case Q_doubleone: 341 tok->quote = Q_double; 342 *tok->wptr++ = *ptr; 343 break; 344 345 case Q_one: 346 tok->quote = Q_none; 347 *tok->wptr++ = *ptr; 348 break; 349 350 default: 351 return (-1); 352 } 353 break; 354 355 default: 356 tok->flags &= ~TOK_EAT; 357 switch (tok->quote) { 358 case Q_none: 359 if (strchr(tok->ifs, *ptr) != NULL) 360 tok_finish(tok); 361 else 362 *tok->wptr++ = *ptr; 363 break; 364 365 case Q_single: 366 case Q_double: 367 *tok->wptr++ = *ptr; 368 break; 369 370 371 case Q_doubleone: 372 *tok->wptr++ = '\\'; 373 tok->quote = Q_double; 374 *tok->wptr++ = *ptr; 375 break; 376 377 case Q_one: 378 tok->quote = Q_none; 379 *tok->wptr++ = *ptr; 380 break; 381 382 default: 383 return (-1); 384 385 } 386 break; 387 } 388 389 if (tok->wptr >= tok->wmax - 4) { 390 size_t size = tok->wmax - tok->wspace + WINCR; 391 char *s = (char *) tok_realloc(tok->wspace, size); 392 if (s == NULL) 393 return (-1); 394 395 if (s != tok->wspace) { 396 int i; 397 for (i = 0; i < tok->argc; i++) { 398 tok->argv[i] = 399 (tok->argv[i] - tok->wspace) + s; 400 } 401 tok->wptr = (tok->wptr - tok->wspace) + s; 402 tok->wstart = (tok->wstart - tok->wspace) + s; 403 tok->wspace = s; 404 } 405 tok->wmax = s + size; 406 } 407 if (tok->argc >= tok->amax - 4) { 408 char **p; 409 tok->amax += AINCR; 410 p = (char **) tok_realloc(tok->argv, 411 tok->amax * sizeof(char *)); 412 if (p == NULL) 413 return (-1); 414 tok->argv = p; 415 } 416 } 417 tok_line_outok: 418 if (cc == -1 && co == -1) { 419 cc = tok->argc; 420 co = (int)(tok->wptr - tok->wstart); 421 } 422 if (cursorc != NULL) 423 *cursorc = cc; 424 if (cursoro != NULL) 425 *cursoro = co; 426 tok_finish(tok); 427 *argv = (const char **)tok->argv; 428 *argc = tok->argc; 429 return (0); 430} 431 432/* tok_str(): 433 * Simpler version of tok_line, taking a NUL terminated line 434 * and splitting into words, ignoring cursor state. 435 */ 436public int 437tok_str(Tokenizer *tok, const char *line, int *argc, const char ***argv) 438{ 439 LineInfo li; 440 441 memset(&li, 0, sizeof(li)); 442 li.buffer = line; 443 li.cursor = li.lastchar = strchr(line, '\0'); 444 return (tok_line(tok, &li, argc, argv, NULL, NULL)); 445} 446