tokenizer.c (238624) | tokenizer.c (276881) |
---|---|
1/* $NetBSD: tokenizer.c,v 1.21 2011/08/16 16:25:15 christos Exp $ */ 2 |
|
1/*- 2 * Copyright (c) 1992, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Christos Zoulas of Cornell University. 7 * 8 * Redistribution and use in source and binary forms, with or without --- 14 unchanged lines hidden (view full) --- 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. | 3/*- 4 * Copyright (c) 1992, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Christos Zoulas of Cornell University. 9 * 10 * Redistribution and use in source and binary forms, with or without --- 14 unchanged lines hidden (view full) --- 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. |
31 * 32 * $NetBSD: tokenizer.c,v 1.15 2009/02/15 21:55:23 christos Exp $ | |
33 */ 34 | 33 */ 34 |
35#include "config.h" |
|
35#if !defined(lint) && !defined(SCCSID) | 36#if !defined(lint) && !defined(SCCSID) |
37#if 0 |
|
36static char sccsid[] = "@(#)tokenizer.c 8.1 (Berkeley) 6/4/93"; | 38static char sccsid[] = "@(#)tokenizer.c 8.1 (Berkeley) 6/4/93"; |
39#else 40__RCSID("$NetBSD: tokenizer.c,v 1.21 2011/08/16 16:25:15 christos Exp $"); 41#endif |
|
37#endif /* not lint && not SCCSID */ 38#include <sys/cdefs.h> | 42#endif /* not lint && not SCCSID */ 43#include <sys/cdefs.h> |
39__FBSDID("$FreeBSD: head/lib/libedit/tokenizer.c 238624 2012-07-19 19:15:47Z pfg $"); | 44__FBSDID("$FreeBSD: head/lib/libedit/tokenizer.c 276881 2015-01-09 07:40:56Z bapt $"); |
40 | 45 |
46/* We build this file twice, once as NARROW, once as WIDE. */ |
|
41/* 42 * tokenize.c: Bourne shell like tokenizer 43 */ | 47/* 48 * tokenize.c: Bourne shell like tokenizer 49 */ |
44#include "sys.h" | |
45#include <string.h> 46#include <stdlib.h> 47#include "histedit.h" | 50#include <string.h> 51#include <stdlib.h> 52#include "histedit.h" |
53#include "chartype.h" |
|
48 49typedef enum { 50 Q_none, Q_single, Q_double, Q_one, Q_doubleone 51} quote_t; 52 | 54 55typedef enum { 56 Q_none, Q_single, Q_double, Q_one, Q_doubleone 57} quote_t; 58 |
53#define IFS "\t \n" 54 | |
55#define TOK_KEEP 1 56#define TOK_EAT 2 57 58#define WINCR 20 59#define AINCR 10 60 | 59#define TOK_KEEP 1 60#define TOK_EAT 2 61 62#define WINCR 20 63#define AINCR 10 64 |
61#define tok_strdup(a) strdup(a) | 65#define IFS STR("\t \n") 66 |
62#define tok_malloc(a) malloc(a) 63#define tok_free(a) free(a) 64#define tok_realloc(a, b) realloc(a, b) | 67#define tok_malloc(a) malloc(a) 68#define tok_free(a) free(a) 69#define tok_realloc(a, b) realloc(a, b) |
70#define tok_strdup(a) Strdup(a) |
|
65 66 | 71 72 |
67struct tokenizer { 68 char *ifs; /* In field separator */ 69 int argc, amax; /* Current and maximum number of args */ 70 char **argv; /* Argument list */ 71 char *wptr, *wmax; /* Space and limit on the word buffer */ 72 char *wstart; /* Beginning of next word */ 73 char *wspace; /* Space of word buffer */ | 73struct TYPE(tokenizer) { 74 Char *ifs; /* In field separator */ 75 size_t argc, amax; /* Current and maximum number of args */ 76 Char **argv; /* Argument list */ 77 Char *wptr, *wmax; /* Space and limit on the word buffer */ 78 Char *wstart; /* Beginning of next word */ 79 Char *wspace; /* Space of word buffer */ |
74 quote_t quote; /* Quoting state */ 75 int flags; /* flags; */ 76}; 77 78 | 80 quote_t quote; /* Quoting state */ 81 int flags; /* flags; */ 82}; 83 84 |
79private void tok_finish(Tokenizer *); | 85private void FUN(tok,finish)(TYPE(Tokenizer) *); |
80 81 | 86 87 |
82/* tok_finish(): | 88/* FUN(tok,finish)(): |
83 * Finish a word in the tokenizer. 84 */ 85private void | 89 * Finish a word in the tokenizer. 90 */ 91private void |
86tok_finish(Tokenizer *tok) | 92FUN(tok,finish)(TYPE(Tokenizer) *tok) |
87{ 88 89 *tok->wptr = '\0'; 90 if ((tok->flags & TOK_KEEP) || tok->wptr != tok->wstart) { 91 tok->argv[tok->argc++] = tok->wstart; 92 tok->argv[tok->argc] = NULL; 93 tok->wstart = ++tok->wptr; 94 } 95 tok->flags &= ~TOK_KEEP; 96} 97 98 | 93{ 94 95 *tok->wptr = '\0'; 96 if ((tok->flags & TOK_KEEP) || tok->wptr != tok->wstart) { 97 tok->argv[tok->argc++] = tok->wstart; 98 tok->argv[tok->argc] = NULL; 99 tok->wstart = ++tok->wptr; 100 } 101 tok->flags &= ~TOK_KEEP; 102} 103 104 |
99/* tok_init(): | 105/* FUN(tok,init)(): |
100 * Initialize the tokenizer 101 */ | 106 * Initialize the tokenizer 107 */ |
102public Tokenizer * 103tok_init(const char *ifs) | 108public TYPE(Tokenizer) * 109FUN(tok,init)(const Char *ifs) |
104{ | 110{ |
105 Tokenizer *tok = (Tokenizer *) tok_malloc(sizeof(Tokenizer)); | 111 TYPE(Tokenizer) *tok = tok_malloc(sizeof(*tok)); |
106 107 if (tok == NULL) 108 return NULL; 109 tok->ifs = tok_strdup(ifs ? ifs : IFS); 110 if (tok->ifs == NULL) { | 112 113 if (tok == NULL) 114 return NULL; 115 tok->ifs = tok_strdup(ifs ? ifs : IFS); 116 if (tok->ifs == NULL) { |
111 tok_free((ptr_t)tok); | 117 tok_free(tok); |
112 return NULL; 113 } 114 tok->argc = 0; 115 tok->amax = AINCR; | 118 return NULL; 119 } 120 tok->argc = 0; 121 tok->amax = AINCR; |
116 tok->argv = (char **) tok_malloc(sizeof(char *) * tok->amax); | 122 tok->argv = tok_malloc(sizeof(*tok->argv) * tok->amax); |
117 if (tok->argv == NULL) { | 123 if (tok->argv == NULL) { |
118 tok_free((ptr_t)tok->ifs); 119 tok_free((ptr_t)tok); | 124 tok_free(tok->ifs); 125 tok_free(tok); |
120 return NULL; 121 } 122 tok->argv[0] = NULL; | 126 return NULL; 127 } 128 tok->argv[0] = NULL; |
123 tok->wspace = (char *) tok_malloc(WINCR); | 129 tok->wspace = tok_malloc(WINCR * sizeof(*tok->wspace)); |
124 if (tok->wspace == NULL) { | 130 if (tok->wspace == NULL) { |
125 tok_free((ptr_t)tok->argv); 126 tok_free((ptr_t)tok->ifs); 127 tok_free((ptr_t)tok); | 131 tok_free(tok->argv); 132 tok_free(tok->ifs); 133 tok_free(tok); |
128 return NULL; 129 } 130 tok->wmax = tok->wspace + WINCR; 131 tok->wstart = tok->wspace; 132 tok->wptr = tok->wspace; 133 tok->flags = 0; 134 tok->quote = Q_none; 135 | 134 return NULL; 135 } 136 tok->wmax = tok->wspace + WINCR; 137 tok->wstart = tok->wspace; 138 tok->wptr = tok->wspace; 139 tok->flags = 0; 140 tok->quote = Q_none; 141 |
136 return (tok); | 142 return tok; |
137} 138 139 | 143} 144 145 |
140/* tok_reset(): | 146/* FUN(tok,reset)(): |
141 * Reset the tokenizer 142 */ 143public void | 147 * Reset the tokenizer 148 */ 149public void |
144tok_reset(Tokenizer *tok) | 150FUN(tok,reset)(TYPE(Tokenizer) *tok) |
145{ 146 147 tok->argc = 0; 148 tok->wstart = tok->wspace; 149 tok->wptr = tok->wspace; 150 tok->flags = 0; 151 tok->quote = Q_none; 152} 153 154 | 151{ 152 153 tok->argc = 0; 154 tok->wstart = tok->wspace; 155 tok->wptr = tok->wspace; 156 tok->flags = 0; 157 tok->quote = Q_none; 158} 159 160 |
155/* tok_end(): | 161/* FUN(tok,end)(): |
156 * Clean up 157 */ 158public void | 162 * Clean up 163 */ 164public void |
159tok_end(Tokenizer *tok) | 165FUN(tok,end)(TYPE(Tokenizer) *tok) |
160{ 161 | 166{ 167 |
162 tok_free((ptr_t) tok->ifs); 163 tok_free((ptr_t) tok->wspace); 164 tok_free((ptr_t) tok->argv); 165 tok_free((ptr_t) tok); | 168 tok_free(tok->ifs); 169 tok_free(tok->wspace); 170 tok_free(tok->argv); 171 tok_free(tok); |
166} 167 168 169 | 172} 173 174 175 |
170/* tok_line(): | 176/* FUN(tok,line)(): |
171 * Bourne shell (sh(1)) like tokenizing 172 * Arguments: | 177 * Bourne shell (sh(1)) like tokenizing 178 * Arguments: |
173 * tok current tokenizer state (setup with tok_init()) | 179 * tok current tokenizer state (setup with FUN(tok,init)()) |
174 * line line to parse 175 * Returns: 176 * -1 Internal error 177 * 3 Quoted return 178 * 2 Unmatched double quote 179 * 1 Unmatched single quote 180 * 0 Ok 181 * Modifies (if return value is 0): 182 * argc number of arguments 183 * argv argument array 184 * cursorc if !NULL, argv element containing cursor 185 * cursorv if !NULL, offset in argv[cursorc] of cursor 186 */ 187public int | 180 * line line to parse 181 * Returns: 182 * -1 Internal error 183 * 3 Quoted return 184 * 2 Unmatched double quote 185 * 1 Unmatched single quote 186 * 0 Ok 187 * Modifies (if return value is 0): 188 * argc number of arguments 189 * argv argument array 190 * cursorc if !NULL, argv element containing cursor 191 * cursorv if !NULL, offset in argv[cursorc] of cursor 192 */ 193public int |
188tok_line(Tokenizer *tok, const LineInfo *line, 189 int *argc, const char ***argv, int *cursorc, int *cursoro) | 194FUN(tok,line)(TYPE(Tokenizer) *tok, const TYPE(LineInfo) *line, 195 int *argc, const Char ***argv, int *cursorc, int *cursoro) |
190{ | 196{ |
191 const char *ptr; | 197 const Char *ptr; |
192 int cc, co; 193 194 cc = co = -1; 195 ptr = line->buffer; 196 for (ptr = line->buffer; ;ptr++) { 197 if (ptr >= line->lastchar) | 198 int cc, co; 199 200 cc = co = -1; 201 ptr = line->buffer; 202 for (ptr = line->buffer; ;ptr++) { 203 if (ptr >= line->lastchar) |
198 ptr = ""; | 204 ptr = STR(""); |
199 if (ptr == line->cursor) { | 205 if (ptr == line->cursor) { |
200 cc = tok->argc; | 206 cc = (int)tok->argc; |
201 co = (int)(tok->wptr - tok->wstart); 202 } 203 switch (*ptr) { 204 case '\'': 205 tok->flags |= TOK_KEEP; 206 tok->flags &= ~TOK_EAT; 207 switch (tok->quote) { 208 case Q_none: --- 15 unchanged lines hidden (view full) --- 224 break; 225 226 case Q_doubleone: /* Quote this ' */ 227 tok->quote = Q_double; 228 *tok->wptr++ = *ptr; 229 break; 230 231 default: | 207 co = (int)(tok->wptr - tok->wstart); 208 } 209 switch (*ptr) { 210 case '\'': 211 tok->flags |= TOK_KEEP; 212 tok->flags &= ~TOK_EAT; 213 switch (tok->quote) { 214 case Q_none: --- 15 unchanged lines hidden (view full) --- 230 break; 231 232 case Q_doubleone: /* Quote this ' */ 233 tok->quote = Q_double; 234 *tok->wptr++ = *ptr; 235 break; 236 237 default: |
232 return (-1); | 238 return -1; |
233 } 234 break; 235 236 case '"': 237 tok->flags &= ~TOK_EAT; 238 tok->flags |= TOK_KEEP; 239 switch (tok->quote) { 240 case Q_none: /* Enter double quote mode */ --- 14 unchanged lines hidden (view full) --- 255 break; 256 257 case Q_doubleone: /* Quote this " */ 258 tok->quote = Q_double; 259 *tok->wptr++ = *ptr; 260 break; 261 262 default: | 239 } 240 break; 241 242 case '"': 243 tok->flags &= ~TOK_EAT; 244 tok->flags |= TOK_KEEP; 245 switch (tok->quote) { 246 case Q_none: /* Enter double quote mode */ --- 14 unchanged lines hidden (view full) --- 261 break; 262 263 case Q_doubleone: /* Quote this " */ 264 tok->quote = Q_double; 265 *tok->wptr++ = *ptr; 266 break; 267 268 default: |
263 return (-1); | 269 return -1; |
264 } 265 break; 266 267 case '\\': 268 tok->flags |= TOK_KEEP; 269 tok->flags &= ~TOK_EAT; 270 switch (tok->quote) { 271 case Q_none: /* Quote next character */ --- 14 unchanged lines hidden (view full) --- 286 break; 287 288 case Q_doubleone: /* Quote this \ */ 289 tok->quote = Q_double; 290 *tok->wptr++ = *ptr; 291 break; 292 293 default: | 270 } 271 break; 272 273 case '\\': 274 tok->flags |= TOK_KEEP; 275 tok->flags &= ~TOK_EAT; 276 switch (tok->quote) { 277 case Q_none: /* Quote next character */ --- 14 unchanged lines hidden (view full) --- 292 break; 293 294 case Q_doubleone: /* Quote this \ */ 295 tok->quote = Q_double; 296 *tok->wptr++ = *ptr; 297 break; 298 299 default: |
294 return (-1); | 300 return -1; |
295 } 296 break; 297 298 case '\n': 299 tok->flags &= ~TOK_EAT; 300 switch (tok->quote) { 301 case Q_none: 302 goto tok_line_outok; --- 9 unchanged lines hidden (view full) --- 312 break; 313 314 case Q_one: /* No quote, more eat the '\n' */ 315 tok->flags |= TOK_EAT; 316 tok->quote = Q_none; 317 break; 318 319 default: | 301 } 302 break; 303 304 case '\n': 305 tok->flags &= ~TOK_EAT; 306 switch (tok->quote) { 307 case Q_none: 308 goto tok_line_outok; --- 9 unchanged lines hidden (view full) --- 318 break; 319 320 case Q_one: /* No quote, more eat the '\n' */ 321 tok->flags |= TOK_EAT; 322 tok->quote = Q_none; 323 break; 324 325 default: |
320 return (0); | 326 return 0; |
321 } 322 break; 323 324 case '\0': 325 switch (tok->quote) { 326 case Q_none: 327 /* Finish word and return */ 328 if (tok->flags & TOK_EAT) { 329 tok->flags &= ~TOK_EAT; | 327 } 328 break; 329 330 case '\0': 331 switch (tok->quote) { 332 case Q_none: 333 /* Finish word and return */ 334 if (tok->flags & TOK_EAT) { 335 tok->flags &= ~TOK_EAT; |
330 return (3); | 336 return 3; |
331 } 332 goto tok_line_outok; 333 334 case Q_single: | 337 } 338 goto tok_line_outok; 339 340 case Q_single: |
335 return (1); | 341 return 1; |
336 337 case Q_double: | 342 343 case Q_double: |
338 return (2); | 344 return 2; |
339 340 case Q_doubleone: 341 tok->quote = Q_double; 342 *tok->wptr++ = *ptr; 343 break; 344 345 case Q_one: 346 tok->quote = Q_none; 347 *tok->wptr++ = *ptr; 348 break; 349 350 default: | 345 346 case Q_doubleone: 347 tok->quote = Q_double; 348 *tok->wptr++ = *ptr; 349 break; 350 351 case Q_one: 352 tok->quote = Q_none; 353 *tok->wptr++ = *ptr; 354 break; 355 356 default: |
351 return (-1); | 357 return -1; |
352 } 353 break; 354 355 default: 356 tok->flags &= ~TOK_EAT; 357 switch (tok->quote) { 358 case Q_none: | 358 } 359 break; 360 361 default: 362 tok->flags &= ~TOK_EAT; 363 switch (tok->quote) { 364 case Q_none: |
359 if (strchr(tok->ifs, *ptr) != NULL) 360 tok_finish(tok); | 365 if (Strchr(tok->ifs, *ptr) != NULL) 366 FUN(tok,finish)(tok); |
361 else 362 *tok->wptr++ = *ptr; 363 break; 364 365 case Q_single: 366 case Q_double: 367 *tok->wptr++ = *ptr; 368 break; --- 6 unchanged lines hidden (view full) --- 375 break; 376 377 case Q_one: 378 tok->quote = Q_none; 379 *tok->wptr++ = *ptr; 380 break; 381 382 default: | 367 else 368 *tok->wptr++ = *ptr; 369 break; 370 371 case Q_single: 372 case Q_double: 373 *tok->wptr++ = *ptr; 374 break; --- 6 unchanged lines hidden (view full) --- 381 break; 382 383 case Q_one: 384 tok->quote = Q_none; 385 *tok->wptr++ = *ptr; 386 break; 387 388 default: |
383 return (-1); | 389 return -1; |
384 385 } 386 break; 387 } 388 389 if (tok->wptr >= tok->wmax - 4) { | 390 391 } 392 break; 393 } 394 395 if (tok->wptr >= tok->wmax - 4) { |
390 size_t size = tok->wmax - tok->wspace + WINCR; 391 char *s = (char *) tok_realloc(tok->wspace, size); | 396 size_t size = (size_t)(tok->wmax - tok->wspace + WINCR); 397 Char *s = tok_realloc(tok->wspace, 398 size * sizeof(*s)); |
392 if (s == NULL) | 399 if (s == NULL) |
393 return (-1); | 400 return -1; |
394 395 if (s != tok->wspace) { | 401 402 if (s != tok->wspace) { |
396 int i; | 403 size_t i; |
397 for (i = 0; i < tok->argc; i++) { 398 tok->argv[i] = 399 (tok->argv[i] - tok->wspace) + s; 400 } 401 tok->wptr = (tok->wptr - tok->wspace) + s; 402 tok->wstart = (tok->wstart - tok->wspace) + s; 403 tok->wspace = s; 404 } 405 tok->wmax = s + size; 406 } 407 if (tok->argc >= tok->amax - 4) { | 404 for (i = 0; i < tok->argc; i++) { 405 tok->argv[i] = 406 (tok->argv[i] - tok->wspace) + s; 407 } 408 tok->wptr = (tok->wptr - tok->wspace) + s; 409 tok->wstart = (tok->wstart - tok->wspace) + s; 410 tok->wspace = s; 411 } 412 tok->wmax = s + size; 413 } 414 if (tok->argc >= tok->amax - 4) { |
408 char **p; | 415 Char **p; |
409 tok->amax += AINCR; | 416 tok->amax += AINCR; |
410 p = (char **) tok_realloc(tok->argv, 411 tok->amax * sizeof(char *)); | 417 p = tok_realloc(tok->argv, tok->amax * sizeof(*p)); |
412 if (p == NULL) | 418 if (p == NULL) |
413 return (-1); | 419 return -1; |
414 tok->argv = p; 415 } 416 } 417 tok_line_outok: 418 if (cc == -1 && co == -1) { | 420 tok->argv = p; 421 } 422 } 423 tok_line_outok: 424 if (cc == -1 && co == -1) { |
419 cc = tok->argc; | 425 cc = (int)tok->argc; |
420 co = (int)(tok->wptr - tok->wstart); 421 } 422 if (cursorc != NULL) 423 *cursorc = cc; 424 if (cursoro != NULL) 425 *cursoro = co; | 426 co = (int)(tok->wptr - tok->wstart); 427 } 428 if (cursorc != NULL) 429 *cursorc = cc; 430 if (cursoro != NULL) 431 *cursoro = co; |
426 tok_finish(tok); 427 *argv = (const char **)tok->argv; 428 *argc = tok->argc; 429 return (0); | 432 FUN(tok,finish)(tok); 433 *argv = (const Char **)tok->argv; 434 *argc = (int)tok->argc; 435 return 0; |
430} 431 | 436} 437 |
432/* tok_str(): | 438/* FUN(tok,str)(): |
433 * Simpler version of tok_line, taking a NUL terminated line 434 * and splitting into words, ignoring cursor state. 435 */ 436public int | 439 * Simpler version of tok_line, taking a NUL terminated line 440 * and splitting into words, ignoring cursor state. 441 */ 442public int |
437tok_str(Tokenizer *tok, const char *line, int *argc, const char ***argv) | 443FUN(tok,str)(TYPE(Tokenizer) *tok, const Char *line, int *argc, 444 const Char ***argv) |
438{ | 445{ |
439 LineInfo li; | 446 TYPE(LineInfo) li; |
440 441 memset(&li, 0, sizeof(li)); 442 li.buffer = line; | 447 448 memset(&li, 0, sizeof(li)); 449 li.buffer = line; |
443 li.cursor = li.lastchar = strchr(line, '\0'); 444 return (tok_line(tok, &li, argc, argv, NULL, NULL)); | 450 li.cursor = li.lastchar = Strchr(line, '\0'); 451 return FUN(tok,line(tok, &li, argc, argv, NULL, NULL)); |
445} | 452} |