1/* $NetBSD: tokenizer.c,v 1.5 2005/06/09 16:48:58 lukem Exp $ */ 2/* from NetBSD: tokenizer.c,v 1.14 2003/12/05 13:37:48 lukem Exp */ 3 4/*- 5 * Copyright (c) 1992, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Christos Zoulas of Cornell University. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36#include "tnftp.h" 37#include "sys.h" 38 39/* 40 * tokenize.c: Bourne shell like tokenizer 41 */ 42#include <string.h> 43#include <stdlib.h> 44#include "histedit.h" 45 46typedef enum { 47 Q_none, Q_single, Q_double, Q_one, Q_doubleone 48} quote_t; 49 50#define IFS "\t \n" 51 52#define TOK_KEEP 1 53#define TOK_EAT 2 54 55#define WINCR 20 56#define AINCR 10 57 58#define tok_strdup(a) strdup(a) 59#define tok_malloc(a) malloc(a) 60#define tok_free(a) free(a) 61#define tok_realloc(a, b) realloc(a, b) 62 63 64struct tokenizer { 65 char *ifs; /* In field separator */ 66 int argc, amax; /* Current and maximum number of args */ 67 char **argv; /* Argument list */ 68 char *wptr, *wmax; /* Space and limit on the word buffer */ 69 char *wstart; /* Beginning of next word */ 70 char *wspace; /* Space of word buffer */ 71 quote_t quote; /* Quoting state */ 72 int flags; /* flags; */ 73}; 74 75 76private void tok_finish(Tokenizer *); 77 78 79/* tok_finish(): 80 * Finish a word in the tokenizer. 81 */ 82private void 83tok_finish(Tokenizer *tok) 84{ 85 86 *tok->wptr = '\0'; 87 if ((tok->flags & TOK_KEEP) || tok->wptr != tok->wstart) { 88 tok->argv[tok->argc++] = tok->wstart; 89 tok->argv[tok->argc] = NULL; 90 tok->wstart = ++tok->wptr; 91 } 92 tok->flags &= ~TOK_KEEP; 93} 94 95 96/* tok_init(): 97 * Initialize the tokenizer 98 */ 99public Tokenizer * 100tok_init(const char *ifs) 101{ 102 Tokenizer *tok = (Tokenizer *) tok_malloc(sizeof(Tokenizer)); 103 104 if (tok == NULL) 105 return NULL; 106 tok->ifs = tok_strdup(ifs ? ifs : IFS); 107 if (tok->ifs == NULL) { 108 tok_free((ptr_t)tok); 109 return NULL; 110 } 111 tok->argc = 0; 112 tok->amax = AINCR; 113 tok->argv = (char **) tok_malloc(sizeof(char *) * tok->amax); 114 if (tok->argv == NULL) { 115 tok_free((ptr_t)tok->ifs); 116 tok_free((ptr_t)tok); 117 return NULL; 118 } 119 tok->argv[0] = NULL; 120 tok->wspace = (char *) tok_malloc(WINCR); 121 if (tok->wspace == NULL) { 122 tok_free((ptr_t)tok->argv); 123 tok_free((ptr_t)tok->ifs); 124 tok_free((ptr_t)tok); 125 return NULL; 126 } 127 tok->wmax = tok->wspace + WINCR; 128 tok->wstart = tok->wspace; 129 tok->wptr = tok->wspace; 130 tok->flags = 0; 131 tok->quote = Q_none; 132 133 return (tok); 134} 135 136 137/* tok_reset(): 138 * Reset the tokenizer 139 */ 140public void 141tok_reset(Tokenizer *tok) 142{ 143 144 tok->argc = 0; 145 tok->wstart = tok->wspace; 146 tok->wptr = tok->wspace; 147 tok->flags = 0; 148 tok->quote = Q_none; 149} 150 151 152/* tok_end(): 153 * Clean up 154 */ 155public void 156tok_end(Tokenizer *tok) 157{ 158 159 tok_free((ptr_t) tok->ifs); 160 tok_free((ptr_t) tok->wspace); 161 tok_free((ptr_t) tok->argv); 162 tok_free((ptr_t) tok); 163} 164 165 166 167/* tok_line(): 168 * Bourne shell (sh(1)) like tokenizing 169 * Arguments: 170 * tok current tokenizer state (setup with tok_init()) 171 * line line to parse 172 * Returns: 173 * -1 Internal error 174 * 3 Quoted return 175 * 2 Unmatched double quote 176 * 1 Unmatched single quote 177 * 0 Ok 178 * Modifies (if return value is 0): 179 * argc number of arguments 180 * argv argument array 181 * cursorc if !NULL, argv element containing cursor 182 * cursorv if !NULL, offset in argv[cursorc] of cursor 183 */ 184public int 185tok_line(Tokenizer *tok, const LineInfo *line, 186 int *argc, const char ***argv, int *cursorc, int *cursoro) 187{ 188 const char *ptr; 189 int cc, co; 190 191 cc = co = -1; 192 ptr = line->buffer; 193 for (ptr = line->buffer; ;ptr++) { 194 if (ptr >= line->lastchar) 195 ptr = ""; 196 if (ptr == line->cursor) { 197 cc = tok->argc; 198 co = tok->wptr - tok->wstart; 199 } 200 switch (*ptr) { 201 case '\'': 202 tok->flags |= TOK_KEEP; 203 tok->flags &= ~TOK_EAT; 204 switch (tok->quote) { 205 case Q_none: 206 tok->quote = Q_single; /* Enter single quote 207 * mode */ 208 break; 209 210 case Q_single: /* Exit single quote mode */ 211 tok->quote = Q_none; 212 break; 213 214 case Q_one: /* Quote this ' */ 215 tok->quote = Q_none; 216 *tok->wptr++ = *ptr; 217 break; 218 219 case Q_double: /* Stay in double quote mode */ 220 *tok->wptr++ = *ptr; 221 break; 222 223 case Q_doubleone: /* Quote this ' */ 224 tok->quote = Q_double; 225 *tok->wptr++ = *ptr; 226 break; 227 228 default: 229 return (-1); 230 } 231 break; 232 233 case '"': 234 tok->flags &= ~TOK_EAT; 235 tok->flags |= TOK_KEEP; 236 switch (tok->quote) { 237 case Q_none: /* Enter double quote mode */ 238 tok->quote = Q_double; 239 break; 240 241 case Q_double: /* Exit double quote mode */ 242 tok->quote = Q_none; 243 break; 244 245 case Q_one: /* Quote this " */ 246 tok->quote = Q_none; 247 *tok->wptr++ = *ptr; 248 break; 249 250 case Q_single: /* Stay in single quote mode */ 251 *tok->wptr++ = *ptr; 252 break; 253 254 case Q_doubleone: /* Quote this " */ 255 tok->quote = Q_double; 256 *tok->wptr++ = *ptr; 257 break; 258 259 default: 260 return (-1); 261 } 262 break; 263 264 case '\\': 265 tok->flags |= TOK_KEEP; 266 tok->flags &= ~TOK_EAT; 267 switch (tok->quote) { 268 case Q_none: /* Quote next character */ 269 tok->quote = Q_one; 270 break; 271 272 case Q_double: /* Quote next character */ 273 tok->quote = Q_doubleone; 274 break; 275 276 case Q_one: /* Quote this, restore state */ 277 *tok->wptr++ = *ptr; 278 tok->quote = Q_none; 279 break; 280 281 case Q_single: /* Stay in single quote mode */ 282 *tok->wptr++ = *ptr; 283 break; 284 285 case Q_doubleone: /* Quote this \ */ 286 tok->quote = Q_double; 287 *tok->wptr++ = *ptr; 288 break; 289 290 default: 291 return (-1); 292 } 293 break; 294 295 case '\n': 296 tok->flags &= ~TOK_EAT; 297 switch (tok->quote) { 298 case Q_none: 299 goto tok_line_outok; 300 301 case Q_single: 302 case Q_double: 303 *tok->wptr++ = *ptr; /* Add the return */ 304 break; 305 306 case Q_doubleone: /* Back to double, eat the '\n' */ 307 tok->flags |= TOK_EAT; 308 tok->quote = Q_double; 309 break; 310 311 case Q_one: /* No quote, more eat the '\n' */ 312 tok->flags |= TOK_EAT; 313 tok->quote = Q_none; 314 break; 315 316 default: 317 return (0); 318 } 319 break; 320 321 case '\0': 322 switch (tok->quote) { 323 case Q_none: 324 /* Finish word and return */ 325 if (tok->flags & TOK_EAT) { 326 tok->flags &= ~TOK_EAT; 327 return (3); 328 } 329 goto tok_line_outok; 330 331 case Q_single: 332 return (1); 333 334 case Q_double: 335 return (2); 336 337 case Q_doubleone: 338 tok->quote = Q_double; 339 *tok->wptr++ = *ptr; 340 break; 341 342 case Q_one: 343 tok->quote = Q_none; 344 *tok->wptr++ = *ptr; 345 break; 346 347 default: 348 return (-1); 349 } 350 break; 351 352 default: 353 tok->flags &= ~TOK_EAT; 354 switch (tok->quote) { 355 case Q_none: 356 if (strchr(tok->ifs, *ptr) != NULL) 357 tok_finish(tok); 358 else 359 *tok->wptr++ = *ptr; 360 break; 361 362 case Q_single: 363 case Q_double: 364 *tok->wptr++ = *ptr; 365 break; 366 367 368 case Q_doubleone: 369 *tok->wptr++ = '\\'; 370 tok->quote = Q_double; 371 *tok->wptr++ = *ptr; 372 break; 373 374 case Q_one: 375 tok->quote = Q_none; 376 *tok->wptr++ = *ptr; 377 break; 378 379 default: 380 return (-1); 381 382 } 383 break; 384 } 385 386 if (tok->wptr >= tok->wmax - 4) { 387 size_t size = tok->wmax - tok->wspace + WINCR; 388 char *s = (char *) tok_realloc(tok->wspace, size); 389 if (s == NULL) 390 return (-1); 391 392 if (s != tok->wspace) { 393 int i; 394 for (i = 0; i < tok->argc; i++) { 395 tok->argv[i] = 396 (tok->argv[i] - tok->wspace) + s; 397 } 398 tok->wptr = (tok->wptr - tok->wspace) + s; 399 tok->wstart = (tok->wstart - tok->wspace) + s; 400 tok->wspace = s; 401 } 402 tok->wmax = s + size; 403 } 404 if (tok->argc >= tok->amax - 4) { 405 char **p; 406 tok->amax += AINCR; 407 p = (char **) tok_realloc(tok->argv, 408 tok->amax * sizeof(char *)); 409 if (p == NULL) 410 return (-1); 411 tok->argv = p; 412 } 413 } 414 tok_line_outok: 415 if (cc == -1 && co == -1) { 416 cc = tok->argc; 417 co = tok->wptr - tok->wstart; 418 } 419 if (cursorc != NULL) 420 *cursorc = cc; 421 if (cursoro != NULL) 422 *cursoro = co; 423 tok_finish(tok); 424 *argv = (const char **)tok->argv; 425 *argc = tok->argc; 426 return (0); 427} 428 429/* tok_str(): 430 * Simpler version of tok_line, taking a NUL terminated line 431 * and splitting into words, ignoring cursor state. 432 */ 433public int 434tok_str(Tokenizer *tok, const char *line, int *argc, const char ***argv) 435{ 436 LineInfo li; 437 438 memset(&li, 0, sizeof(li)); 439 li.buffer = line; 440 li.cursor = li.lastchar = strchr(line, '\0'); 441 return (tok_line(tok, &li, argc, argv, NULL, NULL)); 442} 443