1/* vi: set sw=4 ts=4: */ 2/* 3 * cut.c - minimalist version of cut 4 * 5 * Copyright (C) 1999,2000,2001 by Lineo, inc. 6 * Written by Mark Whitley <markw@codepoet.org> 7 * debloated by Bernhard Fischer 8 * 9 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. 10 */ 11 12#include "libbb.h" 13 14/* This is a NOEXEC applet. Be very careful! */ 15 16 17/* option vars */ 18static const char optstring[] ALIGN1 = "b:c:f:d:sn"; 19#define CUT_OPT_BYTE_FLGS (1<<0) 20#define CUT_OPT_CHAR_FLGS (1<<1) 21#define CUT_OPT_FIELDS_FLGS (1<<2) 22#define CUT_OPT_DELIM_FLGS (1<<3) 23#define CUT_OPT_SUPPRESS_FLGS (1<<4) 24 25static char delim = '\t'; /* delimiter, default is tab */ 26 27struct cut_list { 28 int startpos; 29 int endpos; 30}; 31 32enum { 33 BOL = 0, 34 EOL = INT_MAX, 35 NON_RANGE = -1 36}; 37 38/* growable array holding a series of lists */ 39static struct cut_list *cut_lists; 40static unsigned int nlists; /* number of elements in above list */ 41 42 43static int cmpfunc(const void *a, const void *b) 44{ 45 return (((struct cut_list *) a)->startpos - 46 ((struct cut_list *) b)->startpos); 47 48} 49 50static void cut_file(FILE * file) 51{ 52 char *line = NULL; 53 unsigned int linenum = 0; /* keep these zero-based to be consistent */ 54 55 /* go through every line in the file */ 56 while ((line = xmalloc_getline(file)) != NULL) { 57 58 /* set up a list so we can keep track of what's been printed */ 59 char * printed = xzalloc(strlen(line) * sizeof(char)); 60 char * orig_line = line; 61 unsigned int cl_pos = 0; 62 int spos; 63 64 if (option_mask32 & (CUT_OPT_CHAR_FLGS | CUT_OPT_BYTE_FLGS)) { 65 /* print the chars specified in each cut list */ 66 for (; cl_pos < nlists; cl_pos++) { 67 spos = cut_lists[cl_pos].startpos; 68 while (spos < strlen(line)) { 69 if (!printed[spos]) { 70 printed[spos] = 'X'; 71 putchar(line[spos]); 72 } 73 spos++; 74 if (spos > cut_lists[cl_pos].endpos 75 || cut_lists[cl_pos].endpos == NON_RANGE) 76 break; 77 } 78 } 79 } else if (delim == '\n') { /* cut by lines */ 80 spos = cut_lists[cl_pos].startpos; 81 82 /* get out if we have no more lists to process or if the lines 83 * are lower than what we're interested in */ 84 if (linenum < spos || cl_pos >= nlists) 85 goto next_line; 86 87 /* if the line we're looking for is lower than the one we were 88 * passed, it means we displayed it already, so move on */ 89 while (spos < linenum) { 90 spos++; 91 /* go to the next list if we're at the end of this one */ 92 if (spos > cut_lists[cl_pos].endpos 93 || cut_lists[cl_pos].endpos == NON_RANGE) { 94 cl_pos++; 95 /* get out if there's no more lists to process */ 96 if (cl_pos >= nlists) 97 goto next_line; 98 spos = cut_lists[cl_pos].startpos; 99 /* get out if the current line is lower than the one 100 * we just became interested in */ 101 if (linenum < spos) 102 goto next_line; 103 } 104 } 105 106 /* If we made it here, it means we've found the line we're 107 * looking for, so print it */ 108 puts(line); 109 goto next_line; 110 } else { /* cut by fields */ 111 int ndelim = -1; /* zero-based / one-based problem */ 112 int nfields_printed = 0; 113 char *field = NULL; 114 const char delimiter[2] = { delim, 0 }; 115 116 /* does this line contain any delimiters? */ 117 if (strchr(line, delim) == NULL) { 118 if (!(option_mask32 & CUT_OPT_SUPPRESS_FLGS)) 119 puts(line); 120 goto next_line; 121 } 122 123 /* process each list on this line, for as long as we've got 124 * a line to process */ 125 for (; cl_pos < nlists && line; cl_pos++) { 126 spos = cut_lists[cl_pos].startpos; 127 do { 128 /* find the field we're looking for */ 129 while (line && ndelim < spos) { 130 field = strsep(&line, delimiter); 131 ndelim++; 132 } 133 134 /* we found it, and it hasn't been printed yet */ 135 if (field && ndelim == spos && !printed[ndelim]) { 136 /* if this isn't our first time through, we need to 137 * print the delimiter after the last field that was 138 * printed */ 139 if (nfields_printed > 0) 140 putchar(delim); 141 fputs(field, stdout); 142 printed[ndelim] = 'X'; 143 nfields_printed++; /* shouldn't overflow.. */ 144 } 145 146 spos++; 147 148 /* keep going as long as we have a line to work with, 149 * this is a list, and we're not at the end of that 150 * list */ 151 } while (spos <= cut_lists[cl_pos].endpos && line 152 && cut_lists[cl_pos].endpos != NON_RANGE); 153 } 154 } 155 /* if we printed anything at all, we need to finish it with a 156 * newline cuz we were handed a chomped line */ 157 putchar('\n'); 158 next_line: 159 linenum++; 160 free(printed); 161 free(orig_line); 162 } 163} 164 165static const char _op_on_field[] ALIGN1 = " only when operating on fields"; 166 167int cut_main(int argc, char **argv); 168int cut_main(int argc, char **argv) 169{ 170 char *sopt, *ltok; 171 172 opt_complementary = "b--bcf:c--bcf:f--bcf"; 173 getopt32(argv, optstring, &sopt, &sopt, &sopt, <ok); 174// argc -= optind; 175 argv += optind; 176 if (!(option_mask32 & (CUT_OPT_BYTE_FLGS | CUT_OPT_CHAR_FLGS | CUT_OPT_FIELDS_FLGS))) 177 bb_error_msg_and_die("expected a list of bytes, characters, or fields"); 178 179 if (option_mask32 & CUT_OPT_DELIM_FLGS) { 180 if (strlen(ltok) > 1) { 181 bb_error_msg_and_die("the delimiter must be a single character"); 182 } 183 delim = ltok[0]; 184 } 185 186 /* non-field (char or byte) cutting has some special handling */ 187 if (!(option_mask32 & CUT_OPT_FIELDS_FLGS)) { 188 if (option_mask32 & CUT_OPT_SUPPRESS_FLGS) { 189 bb_error_msg_and_die 190 ("suppressing non-delimited lines makes sense%s", 191 _op_on_field); 192 } 193 if (delim != '\t') { 194 bb_error_msg_and_die 195 ("a delimiter may be specified%s", _op_on_field); 196 } 197 } 198 199 /* 200 * parse list and put values into startpos and endpos. 201 * valid list formats: N, N-, N-M, -M 202 * more than one list can be separated by commas 203 */ 204 { 205 char *ntok; 206 int s = 0, e = 0; 207 208 /* take apart the lists, one by one (they are separated with commas */ 209 while ((ltok = strsep(&sopt, ",")) != NULL) { 210 211 /* it's actually legal to pass an empty list */ 212 if (strlen(ltok) == 0) 213 continue; 214 215 /* get the start pos */ 216 ntok = strsep(<ok, "-"); 217 if (ntok == NULL) { 218 bb_error_msg 219 ("internal error: ntok is null for start pos!?\n"); 220 } else if (strlen(ntok) == 0) { 221 s = BOL; 222 } else { 223 s = xatoi_u(ntok); 224 /* account for the fact that arrays are zero based, while 225 * the user expects the first char on the line to be char #1 */ 226 if (s != 0) 227 s--; 228 } 229 230 /* get the end pos */ 231 ntok = strsep(<ok, "-"); 232 if (ntok == NULL) { 233 e = NON_RANGE; 234 } else if (strlen(ntok) == 0) { 235 e = EOL; 236 } else { 237 e = xatoi_u(ntok); 238 /* if the user specified and end position of 0, that means "til the 239 * end of the line */ 240 if (e == 0) 241 e = EOL; 242 e--; /* again, arrays are zero based, lines are 1 based */ 243 if (e == s) 244 e = NON_RANGE; 245 } 246 247 /* if there's something left to tokenize, the user passed 248 * an invalid list */ 249 if (ltok) 250 bb_error_msg_and_die("invalid byte or field list"); 251 252 /* add the new list */ 253 cut_lists = xrealloc(cut_lists, sizeof(struct cut_list) * (++nlists)); 254 cut_lists[nlists-1].startpos = s; 255 cut_lists[nlists-1].endpos = e; 256 } 257 258 /* make sure we got some cut positions out of all that */ 259 if (nlists == 0) 260 bb_error_msg_and_die("missing list of positions"); 261 262 /* now that the lists are parsed, we need to sort them to make life 263 * easier on us when it comes time to print the chars / fields / lines 264 */ 265 qsort(cut_lists, nlists, sizeof(struct cut_list), cmpfunc); 266 } 267 268 /* argv[0..argc-1] should be names of file to process. If no 269 * files were specified or '-' was specified, take input from stdin. 270 * Otherwise, we process all the files specified. */ 271 if (argv[0] == NULL || LONE_DASH(argv[0])) { 272 cut_file(stdin); 273 } else { 274 FILE *file; 275 276 do { 277 file = fopen_or_warn(argv[0], "r"); 278 if (file) { 279 cut_file(file); 280 fclose(file); 281 } 282 } while (*++argv); 283 } 284 if (ENABLE_FEATURE_CLEAN_UP) 285 free(cut_lists); 286 return EXIT_SUCCESS; 287} 288