1272343Sngie/* $NetBSD: split.c,v 1.1 2011/01/08 18:10:31 pgoyette Exp $ */ 2272343Sngie 3272343Sngie/*- 4272343Sngie * Copyright (c) 1993 The NetBSD Foundation, Inc. 5272343Sngie * All rights reserved. 6272343Sngie * 7272343Sngie * Redistribution and use in source and binary forms, with or without 8272343Sngie * modification, are permitted provided that the following conditions 9272343Sngie * are met: 10272343Sngie * 1. Redistributions of source code must retain the above copyright 11272343Sngie * notice, this list of conditions and the following disclaimer. 12272343Sngie * 2. Redistributions in binary form must reproduce the above copyright 13272343Sngie * notice, this list of conditions and the following disclaimer in the 14272343Sngie * documentation and/or other materials provided with the distribution. 15272343Sngie * 16272343Sngie * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17272343Sngie * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18272343Sngie * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19272343Sngie * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20272343Sngie * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21272343Sngie * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22272343Sngie * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23272343Sngie * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24272343Sngie * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25272343Sngie * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26272343Sngie * POSSIBILITY OF SUCH DAMAGE. 27272343Sngie */ 28272343Sngie 29272343Sngie#include <regex.h> 30272343Sngie#include <stdio.h> 31272343Sngie#include <string.h> 32272343Sngie 33272343Sngie#include "test_regex.h" 34272343Sngie 35272343Sngie/* 36272343Sngie * split - divide a string into fields, like awk split() 37272343Sngie * 38272343Sngie * returns number of fields, including overflow 39272343Sngie * 40272343Sngie * fields[] list is not NULL-terminated 41272343Sngie * nfields number of entries available in fields[] 42272343Sngie * sep "" white, "c" single char, "ab" [ab]+ 43272343Sngie */ 44272343Sngieint 45272343Sngiesplit(char *string, char *fields[], int nfields, const char *sep) 46272343Sngie{ 47272343Sngie char *p = string; 48272343Sngie char c; /* latest character */ 49272343Sngie char sepc = *sep; 50272343Sngie char sepc2; 51272343Sngie int fn; 52272343Sngie char **fp = fields; 53272343Sngie const char *sepp; 54272343Sngie int trimtrail; 55272343Sngie 56272343Sngie /* white space */ 57272343Sngie if (sepc == '\0') { 58272343Sngie while ((c = *p++) == ' ' || c == '\t') 59272343Sngie continue; 60272343Sngie p--; 61272343Sngie trimtrail = 1; 62272343Sngie sep = " \t"; /* note, code below knows this is 2 long */ 63272343Sngie sepc = ' '; 64272343Sngie } else 65272343Sngie trimtrail = 0; 66272343Sngie sepc2 = sep[1]; /* now we can safely pick this up */ 67272343Sngie 68272343Sngie /* catch empties */ 69272343Sngie if (*p == '\0') 70272343Sngie return(0); 71272343Sngie 72272343Sngie /* single separator */ 73272343Sngie if (sepc2 == '\0') { 74272343Sngie fn = nfields; 75272343Sngie for (;;) { 76272343Sngie *fp++ = p; 77272343Sngie fn--; 78272343Sngie if (fn == 0) 79272343Sngie break; 80272343Sngie while ((c = *p++) != sepc) 81272343Sngie if (c == '\0') 82272343Sngie return(nfields - fn); 83272343Sngie *(p-1) = '\0'; 84272343Sngie } 85272343Sngie /* we have overflowed the fields vector -- just count them */ 86272343Sngie fn = nfields; 87272343Sngie for (;;) { 88272343Sngie while ((c = *p++) != sepc) 89272343Sngie if (c == '\0') 90272343Sngie return(fn); 91272343Sngie fn++; 92272343Sngie } 93272343Sngie /* not reached */ 94272343Sngie } 95272343Sngie 96272343Sngie /* two separators */ 97272343Sngie if (sep[2] == '\0') { 98272343Sngie fn = nfields; 99272343Sngie for (;;) { 100272343Sngie *fp++ = p; 101272343Sngie fn--; 102272343Sngie while ((c = *p++) != sepc && c != sepc2) 103272343Sngie if (c == '\0') { 104272343Sngie if (trimtrail && **(fp-1) == '\0') 105272343Sngie fn++; 106272343Sngie return(nfields - fn); 107272343Sngie } 108272343Sngie if (fn == 0) 109272343Sngie break; 110272343Sngie *(p-1) = '\0'; 111272343Sngie while ((c = *p++) == sepc || c == sepc2) 112272343Sngie continue; 113272343Sngie p--; 114272343Sngie } 115272343Sngie /* we have overflowed the fields vector -- just count them */ 116272343Sngie fn = nfields; 117272343Sngie while (c != '\0') { 118272343Sngie while ((c = *p++) == sepc || c == sepc2) 119272343Sngie continue; 120272343Sngie p--; 121272343Sngie fn++; 122272343Sngie while ((c = *p++) != '\0' && c != sepc && c != sepc2) 123272343Sngie continue; 124272343Sngie } 125272343Sngie /* might have to trim trailing white space */ 126272343Sngie if (trimtrail) { 127272343Sngie p--; 128272343Sngie while ((c = *--p) == sepc || c == sepc2) 129272343Sngie continue; 130272343Sngie p++; 131272343Sngie if (*p != '\0') { 132272343Sngie if (fn == nfields+1) 133272343Sngie *p = '\0'; 134272343Sngie fn--; 135272343Sngie } 136272343Sngie } 137272343Sngie return(fn); 138272343Sngie } 139272343Sngie 140272343Sngie /* n separators */ 141272343Sngie fn = 0; 142272343Sngie for (;;) { 143272343Sngie if (fn < nfields) 144272343Sngie *fp++ = p; 145272343Sngie fn++; 146272343Sngie for (;;) { 147272343Sngie c = *p++; 148272343Sngie if (c == '\0') 149272343Sngie return(fn); 150272343Sngie sepp = sep; 151272343Sngie while ((sepc = *sepp++) != '\0' && sepc != c) 152272343Sngie continue; 153272343Sngie if (sepc != '\0') /* it was a separator */ 154272343Sngie break; 155272343Sngie } 156272343Sngie if (fn < nfields) 157272343Sngie *(p-1) = '\0'; 158272343Sngie for (;;) { 159272343Sngie c = *p++; 160272343Sngie sepp = sep; 161272343Sngie while ((sepc = *sepp++) != '\0' && sepc != c) 162272343Sngie continue; 163272343Sngie if (sepc == '\0') /* it wasn't a separator */ 164272343Sngie break; 165272343Sngie } 166272343Sngie p--; 167272343Sngie } 168272343Sngie 169272343Sngie /* not reached */ 170272343Sngie} 171272343Sngie 172272343Sngie#ifdef TEST_SPLIT 173272343Sngie 174272343Sngie 175272343Sngie/* 176272343Sngie * test program 177272343Sngie * pgm runs regression 178272343Sngie * pgm sep splits stdin lines by sep 179272343Sngie * pgm str sep splits str by sep 180272343Sngie * pgm str sep n splits str by sep n times 181272343Sngie */ 182272343Sngieint 183272343Sngiemain(int argc, char *argv[]) 184272343Sngie{ 185272343Sngie char buf[512]; 186272343Sngie int n; 187272343Sngie# define MNF 10 188272343Sngie char *fields[MNF]; 189272343Sngie 190272343Sngie if (argc > 4) 191272343Sngie for (n = atoi(argv[3]); n > 0; n--) { 192272343Sngie (void) strcpy(buf, argv[1]); 193272343Sngie } 194272343Sngie else if (argc > 3) 195272343Sngie for (n = atoi(argv[3]); n > 0; n--) { 196272343Sngie (void) strcpy(buf, argv[1]); 197272343Sngie (void) split(buf, fields, MNF, argv[2]); 198272343Sngie } 199272343Sngie else if (argc > 2) 200272343Sngie dosplit(argv[1], argv[2]); 201272343Sngie else if (argc > 1) 202272343Sngie while (fgets(buf, sizeof(buf), stdin) != NULL) { 203272343Sngie buf[strlen(buf)-1] = '\0'; /* stomp newline */ 204272343Sngie dosplit(buf, argv[1]); 205272343Sngie } 206272343Sngie else 207272343Sngie regress(); 208272343Sngie 209272343Sngie exit(0); 210272343Sngie} 211272343Sngie 212272343Sngievoid 213272343Sngiedosplit(char *string, char *seps) 214272343Sngie{ 215272343Sngie# define NF 5 216272343Sngie char *fields[NF]; 217272343Sngie int nf; 218272343Sngie 219272343Sngie nf = split(string, fields, NF, seps); 220272343Sngie print(nf, NF, fields); 221272343Sngie} 222272343Sngie 223272343Sngievoid 224272343Sngieprint(int nf, int nfp, char *fields) 225272343Sngie{ 226272343Sngie int fn; 227272343Sngie int bound; 228272343Sngie 229272343Sngie bound = (nf > nfp) ? nfp : nf; 230272343Sngie printf("%d:\t", nf); 231272343Sngie for (fn = 0; fn < bound; fn++) 232272343Sngie printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n"); 233272343Sngie} 234272343Sngie 235272343Sngie#define RNF 5 /* some table entries know this */ 236272343Sngiestruct { 237272343Sngie char *str; 238272343Sngie char *seps; 239272343Sngie int nf; 240272343Sngie char *fi[RNF]; 241272343Sngie} tests[] = { 242272343Sngie "", " ", 0, { "" }, 243272343Sngie " ", " ", 2, { "", "" }, 244272343Sngie "x", " ", 1, { "x" }, 245272343Sngie "xy", " ", 1, { "xy" }, 246272343Sngie "x y", " ", 2, { "x", "y" }, 247272343Sngie "abc def g ", " ", 5, { "abc", "def", "", "g", "" }, 248272343Sngie " a bcd", " ", 4, { "", "", "a", "bcd" }, 249272343Sngie "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, 250272343Sngie " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, 251272343Sngie 252272343Sngie "", " _", 0, { "" }, 253272343Sngie " ", " _", 2, { "", "" }, 254272343Sngie "x", " _", 1, { "x" }, 255272343Sngie "x y", " _", 2, { "x", "y" }, 256272343Sngie "ab _ cd", " _", 2, { "ab", "cd" }, 257272343Sngie " a_b c ", " _", 5, { "", "a", "b", "c", "" }, 258272343Sngie "a b c_d e f", " _", 6, { "a", "b", "c", "d", "e f" }, 259272343Sngie " a b c d ", " _", 6, { "", "a", "b", "c", "d " }, 260272343Sngie 261272343Sngie "", " _~", 0, { "" }, 262272343Sngie " ", " _~", 2, { "", "" }, 263272343Sngie "x", " _~", 1, { "x" }, 264272343Sngie "x y", " _~", 2, { "x", "y" }, 265272343Sngie "ab _~ cd", " _~", 2, { "ab", "cd" }, 266272343Sngie " a_b c~", " _~", 5, { "", "a", "b", "c", "" }, 267272343Sngie "a b_c d~e f", " _~", 6, { "a", "b", "c", "d", "e f" }, 268272343Sngie "~a b c d ", " _~", 6, { "", "a", "b", "c", "d " }, 269272343Sngie 270272343Sngie "", " _~-", 0, { "" }, 271272343Sngie " ", " _~-", 2, { "", "" }, 272272343Sngie "x", " _~-", 1, { "x" }, 273272343Sngie "x y", " _~-", 2, { "x", "y" }, 274272343Sngie "ab _~- cd", " _~-", 2, { "ab", "cd" }, 275272343Sngie " a_b c~", " _~-", 5, { "", "a", "b", "c", "" }, 276272343Sngie "a b_c-d~e f", " _~-", 6, { "a", "b", "c", "d", "e f" }, 277272343Sngie "~a-b c d ", " _~-", 6, { "", "a", "b", "c", "d " }, 278272343Sngie 279272343Sngie "", " ", 0, { "" }, 280272343Sngie " ", " ", 2, { "", "" }, 281272343Sngie "x", " ", 1, { "x" }, 282272343Sngie "xy", " ", 1, { "xy" }, 283272343Sngie "x y", " ", 2, { "x", "y" }, 284272343Sngie "abc def g ", " ", 4, { "abc", "def", "g", "" }, 285272343Sngie " a bcd", " ", 3, { "", "a", "bcd" }, 286272343Sngie "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, 287272343Sngie " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, 288272343Sngie 289272343Sngie "", "", 0, { "" }, 290272343Sngie " ", "", 0, { "" }, 291272343Sngie "x", "", 1, { "x" }, 292272343Sngie "xy", "", 1, { "xy" }, 293272343Sngie "x y", "", 2, { "x", "y" }, 294272343Sngie "abc def g ", "", 3, { "abc", "def", "g" }, 295272343Sngie "\t a bcd", "", 2, { "a", "bcd" }, 296272343Sngie " a \tb\t c ", "", 3, { "a", "b", "c" }, 297272343Sngie "a b c d e ", "", 5, { "a", "b", "c", "d", "e" }, 298272343Sngie "a b\tc d e f", "", 6, { "a", "b", "c", "d", "e f" }, 299272343Sngie " a b c d e f ", "", 6, { "a", "b", "c", "d", "e f " }, 300272343Sngie 301272343Sngie NULL, NULL, 0, { NULL }, 302272343Sngie}; 303272343Sngie 304272343Sngievoid 305272343Sngieregress(void) 306272343Sngie{ 307272343Sngie char buf[512]; 308272343Sngie int n; 309272343Sngie char *fields[RNF+1]; 310272343Sngie int nf; 311272343Sngie int i; 312272343Sngie int printit; 313272343Sngie char *f; 314272343Sngie 315272343Sngie for (n = 0; tests[n].str != NULL; n++) { 316272343Sngie (void) strcpy(buf, tests[n].str); 317272343Sngie fields[RNF] = NULL; 318272343Sngie nf = split(buf, fields, RNF, tests[n].seps); 319272343Sngie printit = 0; 320272343Sngie if (nf != tests[n].nf) { 321272343Sngie printf("split `%s' by `%s' gave %d fields, not %d\n", 322272343Sngie tests[n].str, tests[n].seps, nf, tests[n].nf); 323272343Sngie printit = 1; 324272343Sngie } else if (fields[RNF] != NULL) { 325272343Sngie printf("split() went beyond array end\n"); 326272343Sngie printit = 1; 327272343Sngie } else { 328272343Sngie for (i = 0; i < nf && i < RNF; i++) { 329272343Sngie f = fields[i]; 330272343Sngie if (f == NULL) 331272343Sngie f = "(NULL)"; 332272343Sngie if (strcmp(f, tests[n].fi[i]) != 0) { 333272343Sngie printf("split `%s' by `%s', field %d is `%s', not `%s'\n", 334272343Sngie tests[n].str, tests[n].seps, 335272343Sngie i, fields[i], tests[n].fi[i]); 336272343Sngie printit = 1; 337272343Sngie } 338272343Sngie } 339272343Sngie } 340272343Sngie if (printit) 341272343Sngie print(nf, RNF, fields); 342272343Sngie } 343272343Sngie} 344272343Sngie#endif 345