1#include <stdio.h> 2#include <string.h> 3 4/* 5 - split - divide a string into fields, like awk split() 6 = int split(char *string, char *fields[], int nfields, char *sep); 7 */ 8int /* number of fields, including overflow */ 9split(string, fields, nfields, sep) 10char *string; 11char *fields[]; /* list is not NULL-terminated */ 12int nfields; /* number of entries available in fields[] */ 13char *sep; /* "" white, "c" single char, "ab" [ab]+ */ 14{ 15 register char *p = string; 16 register char c; /* latest character */ 17 register char sepc = sep[0]; 18 register char sepc2; 19 register int fn; 20 register char **fp = fields; 21 register char *sepp; 22 register int trimtrail; 23 24 /* white space */ 25 if (sepc == '\0') { 26 while ((c = *p++) == ' ' || c == '\t') 27 continue; 28 p--; 29 trimtrail = 1; 30 sep = " \t"; /* note, code below knows this is 2 long */ 31 sepc = ' '; 32 } else 33 trimtrail = 0; 34 sepc2 = sep[1]; /* now we can safely pick this up */ 35 36 /* catch empties */ 37 if (*p == '\0') 38 return(0); 39 40 /* single separator */ 41 if (sepc2 == '\0') { 42 fn = nfields; 43 for (;;) { 44 *fp++ = p; 45 fn--; 46 if (fn == 0) 47 break; 48 while ((c = *p++) != sepc) 49 if (c == '\0') 50 return(nfields - fn); 51 *(p-1) = '\0'; 52 } 53 /* we have overflowed the fields vector -- just count them */ 54 fn = nfields; 55 for (;;) { 56 while ((c = *p++) != sepc) 57 if (c == '\0') 58 return(fn); 59 fn++; 60 } 61 /* not reached */ 62 } 63 64 /* two separators */ 65 if (sep[2] == '\0') { 66 fn = nfields; 67 for (;;) { 68 *fp++ = p; 69 fn--; 70 while ((c = *p++) != sepc && c != sepc2) 71 if (c == '\0') { 72 if (trimtrail && **(fp-1) == '\0') 73 fn++; 74 return(nfields - fn); 75 } 76 if (fn == 0) 77 break; 78 *(p-1) = '\0'; 79 while ((c = *p++) == sepc || c == sepc2) 80 continue; 81 p--; 82 } 83 /* we have overflowed the fields vector -- just count them */ 84 fn = nfields; 85 while (c != '\0') { 86 while ((c = *p++) == sepc || c == sepc2) 87 continue; 88 p--; 89 fn++; 90 while ((c = *p++) != '\0' && c != sepc && c != sepc2) 91 continue; 92 } 93 /* might have to trim trailing white space */ 94 if (trimtrail) { 95 p--; 96 while ((c = *--p) == sepc || c == sepc2) 97 continue; 98 p++; 99 if (*p != '\0') { 100 if (fn == nfields+1) 101 *p = '\0'; 102 fn--; 103 } 104 } 105 return(fn); 106 } 107 108 /* n separators */ 109 fn = 0; 110 for (;;) { 111 if (fn < nfields) 112 *fp++ = p; 113 fn++; 114 for (;;) { 115 c = *p++; 116 if (c == '\0') 117 return(fn); 118 sepp = sep; 119 while ((sepc = *sepp++) != '\0' && sepc != c) 120 continue; 121 if (sepc != '\0') /* it was a separator */ 122 break; 123 } 124 if (fn < nfields) 125 *(p-1) = '\0'; 126 for (;;) { 127 c = *p++; 128 sepp = sep; 129 while ((sepc = *sepp++) != '\0' && sepc != c) 130 continue; 131 if (sepc == '\0') /* it wasn't a separator */ 132 break; 133 } 134 p--; 135 } 136 137 /* not reached */ 138} 139 140#ifdef TEST_SPLIT 141 142 143/* 144 * test program 145 * pgm runs regression 146 * pgm sep splits stdin lines by sep 147 * pgm str sep splits str by sep 148 * pgm str sep n splits str by sep n times 149 */ 150int 151main(argc, argv) 152int argc; 153char *argv[]; 154{ 155 char buf[512]; 156 register int n; 157# define MNF 10 158 char *fields[MNF]; 159 160 if (argc > 4) 161 for (n = atoi(argv[3]); n > 0; n--) { 162 (void) strcpy(buf, argv[1]); 163 } 164 else if (argc > 3) 165 for (n = atoi(argv[3]); n > 0; n--) { 166 (void) strcpy(buf, argv[1]); 167 (void) split(buf, fields, MNF, argv[2]); 168 } 169 else if (argc > 2) 170 dosplit(argv[1], argv[2]); 171 else if (argc > 1) 172 while (fgets(buf, sizeof(buf), stdin) != NULL) { 173 buf[strlen(buf)-1] = '\0'; /* stomp newline */ 174 dosplit(buf, argv[1]); 175 } 176 else 177 regress(); 178 179 exit(0); 180} 181 182dosplit(string, seps) 183char *string; 184char *seps; 185{ 186# define NF 5 187 char *fields[NF]; 188 register int nf; 189 190 nf = split(string, fields, NF, seps); 191 print(nf, NF, fields); 192} 193 194print(nf, nfp, fields) 195int nf; 196int nfp; 197char *fields[]; 198{ 199 register int fn; 200 register int bound; 201 202 bound = (nf > nfp) ? nfp : nf; 203 printf("%d:\t", nf); 204 for (fn = 0; fn < bound; fn++) 205 printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n"); 206} 207 208#define RNF 5 /* some table entries know this */ 209struct { 210 char *str; 211 char *seps; 212 int nf; 213 char *fi[RNF]; 214} tests[] = { 215 "", " ", 0, { "" }, 216 " ", " ", 2, { "", "" }, 217 "x", " ", 1, { "x" }, 218 "xy", " ", 1, { "xy" }, 219 "x y", " ", 2, { "x", "y" }, 220 "abc def g ", " ", 5, { "abc", "def", "", "g", "" }, 221 " a bcd", " ", 4, { "", "", "a", "bcd" }, 222 "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, 223 " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, 224 225 "", " _", 0, { "" }, 226 " ", " _", 2, { "", "" }, 227 "x", " _", 1, { "x" }, 228 "x y", " _", 2, { "x", "y" }, 229 "ab _ cd", " _", 2, { "ab", "cd" }, 230 " a_b c ", " _", 5, { "", "a", "b", "c", "" }, 231 "a b c_d e f", " _", 6, { "a", "b", "c", "d", "e f" }, 232 " a b c d ", " _", 6, { "", "a", "b", "c", "d " }, 233 234 "", " _~", 0, { "" }, 235 " ", " _~", 2, { "", "" }, 236 "x", " _~", 1, { "x" }, 237 "x y", " _~", 2, { "x", "y" }, 238 "ab _~ cd", " _~", 2, { "ab", "cd" }, 239 " a_b c~", " _~", 5, { "", "a", "b", "c", "" }, 240 "a b_c d~e f", " _~", 6, { "a", "b", "c", "d", "e f" }, 241 "~a b c d ", " _~", 6, { "", "a", "b", "c", "d " }, 242 243 "", " _~-", 0, { "" }, 244 " ", " _~-", 2, { "", "" }, 245 "x", " _~-", 1, { "x" }, 246 "x y", " _~-", 2, { "x", "y" }, 247 "ab _~- cd", " _~-", 2, { "ab", "cd" }, 248 " a_b c~", " _~-", 5, { "", "a", "b", "c", "" }, 249 "a b_c-d~e f", " _~-", 6, { "a", "b", "c", "d", "e f" }, 250 "~a-b c d ", " _~-", 6, { "", "a", "b", "c", "d " }, 251 252 "", " ", 0, { "" }, 253 " ", " ", 2, { "", "" }, 254 "x", " ", 1, { "x" }, 255 "xy", " ", 1, { "xy" }, 256 "x y", " ", 2, { "x", "y" }, 257 "abc def g ", " ", 4, { "abc", "def", "g", "" }, 258 " a bcd", " ", 3, { "", "a", "bcd" }, 259 "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, 260 " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, 261 262 "", "", 0, { "" }, 263 " ", "", 0, { "" }, 264 "x", "", 1, { "x" }, 265 "xy", "", 1, { "xy" }, 266 "x y", "", 2, { "x", "y" }, 267 "abc def g ", "", 3, { "abc", "def", "g" }, 268 "\t a bcd", "", 2, { "a", "bcd" }, 269 " a \tb\t c ", "", 3, { "a", "b", "c" }, 270 "a b c d e ", "", 5, { "a", "b", "c", "d", "e" }, 271 "a b\tc d e f", "", 6, { "a", "b", "c", "d", "e f" }, 272 " a b c d e f ", "", 6, { "a", "b", "c", "d", "e f " }, 273 274 NULL, NULL, 0, { NULL }, 275}; 276 277regress() 278{ 279 char buf[512]; 280 register int n; 281 char *fields[RNF+1]; 282 register int nf; 283 register int i; 284 register int printit; 285 register char *f; 286 287 for (n = 0; tests[n].str != NULL; n++) { 288 (void) strcpy(buf, tests[n].str); 289 fields[RNF] = NULL; 290 nf = split(buf, fields, RNF, tests[n].seps); 291 printit = 0; 292 if (nf != tests[n].nf) { 293 printf("split `%s' by `%s' gave %d fields, not %d\n", 294 tests[n].str, tests[n].seps, nf, tests[n].nf); 295 printit = 1; 296 } else if (fields[RNF] != NULL) { 297 printf("split() went beyond array end\n"); 298 printit = 1; 299 } else { 300 for (i = 0; i < nf && i < RNF; i++) { 301 f = fields[i]; 302 if (f == NULL) 303 f = "(NULL)"; 304 if (strcmp(f, tests[n].fi[i]) != 0) { 305 printf("split `%s' by `%s', field %d is `%s', not `%s'\n", 306 tests[n].str, tests[n].seps, 307 i, fields[i], tests[n].fi[i]); 308 printit = 1; 309 } 310 } 311 } 312 if (printit) 313 print(nf, RNF, fields); 314 } 315} 316#endif 317