176273Sbrian/*- 276273Sbrian * Copyright (c) 1999 The NetBSD Foundation, Inc. 376273Sbrian * All rights reserved. 476273Sbrian * 576273Sbrian * This code is derived from software contributed to The NetBSD Foundation 676273Sbrian * by Klaus Klein. 776273Sbrian * 876273Sbrian * Redistribution and use in source and binary forms, with or without 976273Sbrian * modification, are permitted provided that the following conditions 1076273Sbrian * are met: 1176273Sbrian * 1. Redistributions of source code must retain the above copyright 1276273Sbrian * notice, this list of conditions and the following disclaimer. 1376273Sbrian * 2. Redistributions in binary form must reproduce the above copyright 1476273Sbrian * notice, this list of conditions and the following disclaimer in the 1576273Sbrian * documentation and/or other materials provided with the distribution. 1676273Sbrian * 1776273Sbrian * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 1876273Sbrian * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 1976273Sbrian * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 2076273Sbrian * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 2176273Sbrian * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 2276273Sbrian * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 2376273Sbrian * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 2476273Sbrian * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 2576273Sbrian * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 2676273Sbrian * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 2776273Sbrian * POSSIBILITY OF SUCH DAMAGE. 2876273Sbrian */ 2976273Sbrian 3076273Sbrian#include <sys/cdefs.h> 3176273Sbrian#ifndef lint 3276273Sbrian__COPYRIGHT( 3376273Sbrian"@(#) Copyright (c) 1999\ 3476273Sbrian The NetBSD Foundation, Inc. All rights reserved."); 3576273Sbrian__RCSID("$FreeBSD$"); 3676273Sbrian#endif 3776273Sbrian 38189168Sdas#define _WITH_GETLINE 3976273Sbrian#include <sys/types.h> 4076273Sbrian 4197337Stjr#include <err.h> 4276273Sbrian#include <errno.h> 4376273Sbrian#include <limits.h> 4476273Sbrian#include <locale.h> 4576273Sbrian#include <regex.h> 4676273Sbrian#include <stdio.h> 4776273Sbrian#include <stdlib.h> 4876273Sbrian#include <string.h> 4976273Sbrian#include <unistd.h> 50132078Stjr#include <wchar.h> 5176273Sbrian 5276273Sbriantypedef enum { 5376273Sbrian number_all, /* number all lines */ 5476273Sbrian number_nonempty, /* number non-empty lines */ 5576273Sbrian number_none, /* no line numbering */ 5676273Sbrian number_regex /* number lines matching regular expression */ 5776273Sbrian} numbering_type; 5876273Sbrian 5976273Sbrianstruct numbering_property { 6076273Sbrian const char * const name; /* for diagnostics */ 6176273Sbrian numbering_type type; /* numbering type */ 6276273Sbrian regex_t expr; /* for type == number_regex */ 6376273Sbrian}; 6476273Sbrian 6576273Sbrian/* line numbering formats */ 6676273Sbrian#define FORMAT_LN "%-*d" /* left justified, leading zeros suppressed */ 6776273Sbrian#define FORMAT_RN "%*d" /* right justified, leading zeros suppressed */ 6876273Sbrian#define FORMAT_RZ "%0*d" /* right justified, leading zeros kept */ 6976273Sbrian 7076273Sbrian#define FOOTER 0 7176273Sbrian#define BODY 1 7276273Sbrian#define HEADER 2 7376273Sbrian#define NP_LAST HEADER 7476273Sbrian 7576273Sbrianstatic struct numbering_property numbering_properties[NP_LAST + 1] = { 7676273Sbrian { "footer", number_none }, 7776273Sbrian { "body", number_nonempty }, 7876273Sbrian { "header", number_none } 7976273Sbrian}; 8076273Sbrian 8176273Sbrian#define max(a, b) ((a) > (b) ? (a) : (b)) 8276273Sbrian 8376273Sbrian/* 8476273Sbrian * Maximum number of characters required for a decimal representation of a 8576273Sbrian * (signed) int; courtesy of tzcode. 8676273Sbrian */ 8776273Sbrian#define INT_STRLEN_MAXIMUM \ 8876273Sbrian ((sizeof (int) * CHAR_BIT - 1) * 302 / 1000 + 2) 8976273Sbrian 9092921Simpstatic void filter(void); 9192921Simpstatic void parse_numbering(const char *, int); 9292921Simpstatic void usage(void); 9376273Sbrian 9476273Sbrian/* 9576273Sbrian * Dynamically allocated buffer suitable for string representation of ints. 9676273Sbrian */ 9776273Sbrianstatic char *intbuffer; 9876273Sbrian 99132078Stjr/* delimiter characters that indicate the start of a logical page section */ 100132078Stjrstatic char delim[2 * MB_LEN_MAX]; 101132078Stjrstatic int delimlen; 102132078Stjr 10376273Sbrian/* 10476273Sbrian * Configurable parameters. 10576273Sbrian */ 10676273Sbrian 10776273Sbrian/* line numbering format */ 10876273Sbrianstatic const char *format = FORMAT_RN; 10976273Sbrian 11076273Sbrian/* increment value used to number logical page lines */ 11176273Sbrianstatic int incr = 1; 11276273Sbrian 11376273Sbrian/* number of adjacent blank lines to be considered (and numbered) as one */ 11476273Sbrianstatic unsigned int nblank = 1; 11576273Sbrian 11676273Sbrian/* whether to restart numbering at logical page delimiters */ 11776273Sbrianstatic int restart = 1; 11876273Sbrian 11976273Sbrian/* characters used in separating the line number and the corrsp. text line */ 12076273Sbrianstatic const char *sep = "\t"; 12176273Sbrian 12276273Sbrian/* initial value used to number logical page lines */ 12376273Sbrianstatic int startnum = 1; 12476273Sbrian 12576273Sbrian/* number of characters to be used for the line number */ 12676273Sbrian/* should be unsigned but required signed by `*' precision conversion */ 12776273Sbrianstatic int width = 6; 12876273Sbrian 12976273Sbrian 13076273Sbrianint 13176273Sbrianmain(argc, argv) 13276273Sbrian int argc; 13376273Sbrian char *argv[]; 13476273Sbrian{ 135144840Sstefanf int c; 13676273Sbrian long val; 13776273Sbrian unsigned long uval; 13876273Sbrian char *ep; 139132078Stjr size_t intbuffersize, clen; 140132078Stjr char delim1[MB_LEN_MAX] = { '\\' }, delim2[MB_LEN_MAX] = { ':' }; 141132078Stjr size_t delim1len = 1, delim2len = 1; 14276273Sbrian 14376273Sbrian (void)setlocale(LC_ALL, ""); 14476273Sbrian 14576273Sbrian while ((c = getopt(argc, argv, "pb:d:f:h:i:l:n:s:v:w:")) != -1) { 14676273Sbrian switch (c) { 14776273Sbrian case 'p': 14876273Sbrian restart = 0; 14976273Sbrian break; 15076273Sbrian case 'b': 15176273Sbrian parse_numbering(optarg, BODY); 15276273Sbrian break; 15376273Sbrian case 'd': 154132078Stjr clen = mbrlen(optarg, MB_CUR_MAX, NULL); 155132078Stjr if (clen == (size_t)-1 || clen == (size_t)-2) 156132078Stjr errc(EXIT_FAILURE, EILSEQ, NULL); 157132078Stjr if (clen != 0) { 158132078Stjr memcpy(delim1, optarg, delim1len = clen); 159132078Stjr clen = mbrlen(optarg + delim1len, 160132078Stjr MB_CUR_MAX, NULL); 161132078Stjr if (clen == (size_t)-1 || 162132078Stjr clen == (size_t)-2) 163132078Stjr errc(EXIT_FAILURE, EILSEQ, NULL); 164132078Stjr if (clen != 0) { 165132078Stjr memcpy(delim2, optarg + delim1len, 166132078Stjr delim2len = clen); 167132078Stjr if (optarg[delim1len + clen] != '\0') 168132078Stjr errx(EXIT_FAILURE, 169132078Stjr "invalid delim argument -- %s", 170132078Stjr optarg); 171132078Stjr } 17276273Sbrian } 17376273Sbrian break; 17476273Sbrian case 'f': 17576273Sbrian parse_numbering(optarg, FOOTER); 17676273Sbrian break; 17776273Sbrian case 'h': 17876273Sbrian parse_numbering(optarg, HEADER); 17976273Sbrian break; 18076273Sbrian case 'i': 18176273Sbrian errno = 0; 18276273Sbrian val = strtol(optarg, &ep, 10); 18376273Sbrian if ((ep != NULL && *ep != '\0') || 18497338Stjr ((val == LONG_MIN || val == LONG_MAX) && errno != 0)) 18597338Stjr errx(EXIT_FAILURE, 18697338Stjr "invalid incr argument -- %s", optarg); 18776273Sbrian incr = (int)val; 18876273Sbrian break; 18976273Sbrian case 'l': 19076273Sbrian errno = 0; 19176273Sbrian uval = strtoul(optarg, &ep, 10); 19276273Sbrian if ((ep != NULL && *ep != '\0') || 19397338Stjr (uval == ULONG_MAX && errno != 0)) 19497338Stjr errx(EXIT_FAILURE, 19597338Stjr "invalid num argument -- %s", optarg); 19676273Sbrian nblank = (unsigned int)uval; 19776273Sbrian break; 19876273Sbrian case 'n': 19976273Sbrian if (strcmp(optarg, "ln") == 0) { 20076273Sbrian format = FORMAT_LN; 20176273Sbrian } else if (strcmp(optarg, "rn") == 0) { 20276273Sbrian format = FORMAT_RN; 20376273Sbrian } else if (strcmp(optarg, "rz") == 0) { 20476273Sbrian format = FORMAT_RZ; 20597338Stjr } else 20697338Stjr errx(EXIT_FAILURE, 20797338Stjr "illegal format -- %s", optarg); 20876273Sbrian break; 20976273Sbrian case 's': 21076273Sbrian sep = optarg; 21176273Sbrian break; 21276273Sbrian case 'v': 21376273Sbrian errno = 0; 21476273Sbrian val = strtol(optarg, &ep, 10); 21576273Sbrian if ((ep != NULL && *ep != '\0') || 21697338Stjr ((val == LONG_MIN || val == LONG_MAX) && errno != 0)) 21797338Stjr errx(EXIT_FAILURE, 21897338Stjr "invalid startnum value -- %s", optarg); 21976273Sbrian startnum = (int)val; 22076273Sbrian break; 22176273Sbrian case 'w': 22276273Sbrian errno = 0; 22376273Sbrian val = strtol(optarg, &ep, 10); 22476273Sbrian if ((ep != NULL && *ep != '\0') || 22597338Stjr ((val == LONG_MIN || val == LONG_MAX) && errno != 0)) 22697338Stjr errx(EXIT_FAILURE, 22797338Stjr "invalid width value -- %s", optarg); 22876273Sbrian width = (int)val; 22997338Stjr if (!(width > 0)) 23097338Stjr errx(EXIT_FAILURE, 23197338Stjr "width argument must be > 0 -- %d", 23276273Sbrian width); 23376273Sbrian break; 23476273Sbrian case '?': 23576273Sbrian default: 23676273Sbrian usage(); 23776273Sbrian /* NOTREACHED */ 23876273Sbrian } 23976273Sbrian } 24076273Sbrian argc -= optind; 24176273Sbrian argv += optind; 24276273Sbrian 24376273Sbrian switch (argc) { 24476273Sbrian case 0: 24576273Sbrian break; 24676273Sbrian case 1: 24797337Stjr if (freopen(argv[0], "r", stdin) == NULL) 24897337Stjr err(EXIT_FAILURE, "%s", argv[0]); 24976273Sbrian break; 25076273Sbrian default: 25176273Sbrian usage(); 25276273Sbrian /* NOTREACHED */ 25376273Sbrian } 25476273Sbrian 255132078Stjr /* Generate the delimiter sequence */ 256132078Stjr memcpy(delim, delim1, delim1len); 257132078Stjr memcpy(delim + delim1len, delim2, delim2len); 258132078Stjr delimlen = delim1len + delim2len; 259132078Stjr 26076273Sbrian /* Allocate a buffer suitable for preformatting line number. */ 26176273Sbrian intbuffersize = max(INT_STRLEN_MAXIMUM, width) + 1; /* NUL */ 26297337Stjr if ((intbuffer = malloc(intbuffersize)) == NULL) 26397337Stjr err(EXIT_FAILURE, "cannot allocate preformatting buffer"); 26476273Sbrian 26576273Sbrian /* Do the work. */ 26676273Sbrian filter(); 26776273Sbrian 26876273Sbrian exit(EXIT_SUCCESS); 26976273Sbrian /* NOTREACHED */ 27076273Sbrian} 27176273Sbrian 27276273Sbrianstatic void 27376273Sbrianfilter() 27476273Sbrian{ 275189168Sdas char *buffer; 276189168Sdas size_t buffersize; 277189168Sdas ssize_t linelen; 27876273Sbrian int line; /* logical line number */ 27976273Sbrian int section; /* logical page section */ 28076273Sbrian unsigned int adjblank; /* adjacent blank lines */ 28176273Sbrian int consumed; /* intbuffer measurement */ 282165462Simp int donumber = 0, idx; 28376273Sbrian 28476273Sbrian adjblank = 0; 28576273Sbrian line = startnum; 28676273Sbrian section = BODY; 28776273Sbrian 288189168Sdas buffer = NULL; 289189168Sdas buffersize = 0; 290189168Sdas while ((linelen = getline(&buffer, &buffersize, stdin)) > 0) { 29176273Sbrian for (idx = FOOTER; idx <= NP_LAST; idx++) { 29276273Sbrian /* Does it look like a delimiter? */ 293189168Sdas if (delimlen * (idx + 1) > linelen) 294189168Sdas break; 295132078Stjr if (memcmp(buffer + delimlen * idx, delim, 296189168Sdas delimlen) != 0) 29776273Sbrian break; 298189168Sdas /* Was this the whole line? */ 299189168Sdas if (buffer[delimlen * (idx + 1)] == '\n') { 300189168Sdas section = idx; 301189168Sdas adjblank = 0; 302189168Sdas if (restart) 303189168Sdas line = startnum; 304189168Sdas goto nextline; 30576273Sbrian } 30676273Sbrian } 30776273Sbrian 30876273Sbrian switch (numbering_properties[section].type) { 30976273Sbrian case number_all: 31076273Sbrian /* 31176273Sbrian * Doing this for number_all only is disputable, but 31276273Sbrian * the standard expresses an explicit dependency on 31376273Sbrian * `-b a' etc. 31476273Sbrian */ 31576273Sbrian if (buffer[0] == '\n' && ++adjblank < nblank) 31676273Sbrian donumber = 0; 31776273Sbrian else 31876273Sbrian donumber = 1, adjblank = 0; 31976273Sbrian break; 32076273Sbrian case number_nonempty: 32176273Sbrian donumber = (buffer[0] != '\n'); 32276273Sbrian break; 32376273Sbrian case number_none: 32476273Sbrian donumber = 0; 32576273Sbrian break; 32676273Sbrian case number_regex: 32776273Sbrian donumber = 32876273Sbrian (regexec(&numbering_properties[section].expr, 32976273Sbrian buffer, 0, NULL, 0) == 0); 33076273Sbrian break; 33176273Sbrian } 33276273Sbrian 33376273Sbrian if (donumber) { 33476273Sbrian /* Note: sprintf() is safe here. */ 33576273Sbrian consumed = sprintf(intbuffer, format, width, line); 33676273Sbrian (void)printf("%s", 33776273Sbrian intbuffer + max(0, consumed - width)); 33876273Sbrian line += incr; 33976273Sbrian } else { 34076273Sbrian (void)printf("%*s", width, ""); 34176273Sbrian } 342189168Sdas (void)fputs(sep, stdout); 343189168Sdas (void)fwrite(buffer, linelen, 1, stdout); 34476273Sbrian 34597337Stjr if (ferror(stdout)) 34697337Stjr err(EXIT_FAILURE, "output error"); 34776273Sbriannextline: 34876273Sbrian ; 34976273Sbrian } 35076273Sbrian 35197337Stjr if (ferror(stdin)) 35297337Stjr err(EXIT_FAILURE, "input error"); 353189168Sdas 354189168Sdas free(buffer); 35576273Sbrian} 35676273Sbrian 35776273Sbrian/* 35876273Sbrian * Various support functions. 35976273Sbrian */ 36076273Sbrian 36176273Sbrianstatic void 36276273Sbrianparse_numbering(argstr, section) 36376273Sbrian const char *argstr; 36476273Sbrian int section; 36576273Sbrian{ 36676273Sbrian int error; 36776273Sbrian char errorbuf[NL_TEXTMAX]; 36876273Sbrian 36976273Sbrian switch (argstr[0]) { 37076273Sbrian case 'a': 37176273Sbrian numbering_properties[section].type = number_all; 37276273Sbrian break; 37376273Sbrian case 'n': 37476273Sbrian numbering_properties[section].type = number_none; 37576273Sbrian break; 37676273Sbrian case 't': 37776273Sbrian numbering_properties[section].type = number_nonempty; 37876273Sbrian break; 37976273Sbrian case 'p': 38076273Sbrian /* If there was a previous expression, throw it away. */ 38176273Sbrian if (numbering_properties[section].type == number_regex) 38276273Sbrian regfree(&numbering_properties[section].expr); 38376273Sbrian else 38476273Sbrian numbering_properties[section].type = number_regex; 38576273Sbrian 38676273Sbrian /* Compile/validate the supplied regular expression. */ 38776273Sbrian if ((error = regcomp(&numbering_properties[section].expr, 38876273Sbrian &argstr[1], REG_NEWLINE|REG_NOSUB)) != 0) { 38976273Sbrian (void)regerror(error, 39076273Sbrian &numbering_properties[section].expr, 39176273Sbrian errorbuf, sizeof (errorbuf)); 39297338Stjr errx(EXIT_FAILURE, 39397338Stjr "%s expr: %s -- %s", 39476273Sbrian numbering_properties[section].name, errorbuf, 39576273Sbrian &argstr[1]); 39676273Sbrian } 39776273Sbrian break; 39876273Sbrian default: 39997338Stjr errx(EXIT_FAILURE, 40097338Stjr "illegal %s line numbering type -- %s", 40176273Sbrian numbering_properties[section].name, argstr); 40276273Sbrian } 40376273Sbrian} 40476273Sbrian 40576273Sbrianstatic void 40676273Sbrianusage() 40776273Sbrian{ 40876273Sbrian 409119025Stjr (void)fprintf(stderr, 410119025Stjr"usage: nl [-p] [-b type] [-d delim] [-f type] [-h type] [-i incr] [-l num]\n" 411119025Stjr" [-n format] [-s sep] [-v startnum] [-w width] [file]\n"); 41276273Sbrian exit(EXIT_FAILURE); 41376273Sbrian} 414