1/*	$OpenBSD: nl.c,v 1.8 2022/12/04 23:50:49 cheloha Exp $ */
2/*	$NetBSD: nl.c,v 1.11 2011/08/16 12:00:46 christos Exp $	*/
3
4/*-
5 * Copyright (c) 1999 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Klaus Klein.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <err.h>
34#include <errno.h>
35#include <limits.h>
36#include <locale.h>
37#include <regex.h>
38#include <stdio.h>
39#include <stdlib.h>
40#include <string.h>
41#include <unistd.h>
42#include <wchar.h>
43
44typedef enum {
45	number_all,		/* number all lines */
46	number_nonempty,	/* number non-empty lines */
47	number_none,		/* no line numbering */
48	number_regex		/* number lines matching regular expression */
49} numbering_type;
50
51struct numbering_property {
52	const char * const	name;		/* for diagnostics */
53	numbering_type		type;		/* numbering type */
54	regex_t			expr;		/* for type == number_regex */
55};
56
57/* line numbering formats */
58#define FORMAT_LN	"%-*d"	/* left justified, leading zeros suppressed */
59#define FORMAT_RN	"%*d"	/* right justified, leading zeros suppressed */
60#define FORMAT_RZ	"%0*d"	/* right justified, leading zeros kept */
61
62#define FOOTER		0
63#define BODY		1
64#define HEADER		2
65#define NP_LAST		HEADER
66
67static struct numbering_property numbering_properties[NP_LAST + 1] = {
68	{ "footer",	number_none,	{ 0, 0, 0, 0 } },
69	{ "body",	number_nonempty, { 0, 0, 0, 0 } },
70	{ "header",	number_none,	{ 0, 0, 0, 0 } },
71};
72
73void		filter(void);
74void		parse_numbering(const char *, int);
75__dead void	usage(void);
76
77/*
78 * Delimiter characters that indicate the start of a logical page section.
79 */
80static char delim[2 * MB_LEN_MAX];
81static int delimlen;
82
83/*
84 * Configurable parameters.
85 */
86
87/* line numbering format */
88static const char *format = FORMAT_RN;
89
90/* increment value used to number logical page lines */
91static int incr = 1;
92
93/* number of adjacent blank lines to be considered (and numbered) as one */
94static unsigned int nblank = 1;
95
96/* whether to restart numbering at logical page delimiters */
97static int restart = 1;
98
99/* characters used in separating the line number and the corrsp. text line */
100static const char *sep = "\t";
101
102/* initial value used to number logical page lines */
103static int startnum = 1;
104
105/* number of characters to be used for the line number */
106/* should be unsigned but required signed by `*' precision conversion */
107static int width = 6;
108
109
110int
111main(int argc, char *argv[])
112{
113	int c;
114	size_t clen;
115	char delim1[MB_LEN_MAX] = { '\\' }, delim2[MB_LEN_MAX] = { ':' };
116	size_t delim1len = 1, delim2len = 1;
117	const char *errstr;
118
119	(void)setlocale(LC_ALL, "");
120
121	if (pledge("stdio rpath", NULL) == -1)
122		err(1, "pledge");
123
124	while ((c = getopt(argc, argv, "pb:d:f:h:i:l:n:s:v:w:")) != -1) {
125		switch (c) {
126		case 'p':
127			restart = 0;
128			break;
129		case 'b':
130			parse_numbering(optarg, BODY);
131			break;
132		case 'd':
133			clen = mbrlen(optarg, MB_CUR_MAX, NULL);
134			if (clen == (size_t)-1 || clen == (size_t)-2)
135				errc(EXIT_FAILURE, EILSEQ, NULL);
136			if (clen != 0) {
137				memcpy(delim1, optarg, delim1len = clen);
138				clen = mbrlen(optarg + delim1len,
139				    MB_CUR_MAX, NULL);
140				if (clen == (size_t)-1 || clen == (size_t)-2)
141					errc(EXIT_FAILURE, EILSEQ, NULL);
142				if (clen != 0) {
143					memcpy(delim2, optarg + delim1len,
144					    delim2len = clen);
145					if (optarg[delim1len + clen] != '\0') {
146						errx(EXIT_FAILURE,
147						    "invalid delimiter: %s",
148						    optarg);
149					}
150				}
151			}
152			break;
153		case 'f':
154			parse_numbering(optarg, FOOTER);
155			break;
156		case 'h':
157			parse_numbering(optarg, HEADER);
158			break;
159		case 'i':
160			incr = strtonum(optarg, INT_MIN, INT_MAX, &errstr);
161			if (errstr)
162				errx(EXIT_FAILURE, "increment value is %s: %s",
163				    errstr, optarg);
164			break;
165		case 'l':
166			nblank = strtonum(optarg, 0, UINT_MAX, &errstr);
167			if (errstr)
168				errx(EXIT_FAILURE,
169				    "blank line value is %s: %s",
170				    errstr, optarg);
171			break;
172		case 'n':
173			if (strcmp(optarg, "ln") == 0) {
174				format = FORMAT_LN;
175			} else if (strcmp(optarg, "rn") == 0) {
176				format = FORMAT_RN;
177			} else if (strcmp(optarg, "rz") == 0) {
178				format = FORMAT_RZ;
179			} else
180				errx(EXIT_FAILURE,
181				    "illegal format -- %s", optarg);
182			break;
183		case 's':
184			sep = optarg;
185			break;
186		case 'v':
187			startnum = strtonum(optarg, INT_MIN, INT_MAX, &errstr);
188			if (errstr)
189				errx(EXIT_FAILURE,
190				    "initial logical page value is %s: %s",
191				    errstr, optarg);
192			break;
193		case 'w':
194			width = strtonum(optarg, 1, INT_MAX, &errstr);
195			if (errstr)
196				errx(EXIT_FAILURE, "width is %s: %s", errstr,
197				    optarg);
198			break;
199		default:
200			usage();
201			/* NOTREACHED */
202		}
203	}
204	argc -= optind;
205	argv += optind;
206
207	switch (argc) {
208	case 0:
209		break;
210	case 1:
211		if (strcmp(argv[0], "-") != 0 &&
212		    freopen(argv[0], "r", stdin) == NULL)
213			err(EXIT_FAILURE, "%s", argv[0]);
214		break;
215	default:
216		usage();
217		/* NOTREACHED */
218	}
219
220	if (pledge("stdio", NULL) == -1)
221		err(1, "pledge");
222
223	/* Generate the delimiter sequence */
224	memcpy(delim, delim1, delim1len);
225	memcpy(delim + delim1len, delim2, delim2len);
226	delimlen = delim1len + delim2len;
227
228	/* Do the work. */
229	filter();
230
231	exit(EXIT_SUCCESS);
232}
233
234void
235filter(void)
236{
237	char *buffer;
238	size_t buffersize;
239	ssize_t linelen;
240	int line;		/* logical line number */
241	int section;		/* logical page section */
242	unsigned int adjblank;	/* adjacent blank lines */
243	int donumber = 0, idx;
244
245	adjblank = 0;
246	line = startnum;
247	section = BODY;
248
249	buffer = NULL;
250	buffersize = 0;
251	while ((linelen = getline(&buffer, &buffersize, stdin)) > 0) {
252		for (idx = FOOTER; idx <= NP_LAST; idx++) {
253			/* Does it look like a delimiter? */
254			if (delimlen * (idx + 1) > linelen)
255				break;
256			if (memcmp(buffer + delimlen * idx, delim,
257			    delimlen) != 0)
258				break;
259			/* Was this the whole line? */
260			if (buffer[delimlen * (idx + 1)] == '\n') {
261				section = idx;
262				adjblank = 0;
263				if (restart)
264					line = startnum;
265				goto nextline;
266			}
267		}
268
269		switch (numbering_properties[section].type) {
270		case number_all:
271			/*
272			 * Doing this for number_all only is disputable, but
273			 * the standard expresses an explicit dependency on
274			 * `-b a' etc.
275			 */
276			if (buffer[0] == '\n' && ++adjblank < nblank)
277				donumber = 0;
278			else
279				donumber = 1, adjblank = 0;
280			break;
281		case number_nonempty:
282			donumber = (buffer[0] != '\n');
283			break;
284		case number_none:
285			donumber = 0;
286			break;
287		case number_regex:
288			donumber =
289			    (regexec(&numbering_properties[section].expr,
290			    buffer, 0, NULL, 0) == 0);
291			break;
292		}
293
294		if (donumber) {
295			(void)printf(format, width, line);
296			line += incr;
297			(void)fputs(sep, stdout);
298		} else {
299			(void)printf("%*s", width, "");
300		}
301		(void)fwrite(buffer, linelen, 1, stdout);
302
303		if (ferror(stdout))
304			err(EXIT_FAILURE, "output error");
305nextline:
306		;
307	}
308
309	if (ferror(stdin))
310		err(EXIT_FAILURE, "input error");
311
312	free(buffer);
313}
314
315/*
316 * Various support functions.
317 */
318
319void
320parse_numbering(const char *argstr, int section)
321{
322	int error;
323	char errorbuf[NL_TEXTMAX];
324
325	switch (argstr[0]) {
326	case 'a':
327		numbering_properties[section].type = number_all;
328		break;
329	case 'n':
330		numbering_properties[section].type = number_none;
331		break;
332	case 't':
333		numbering_properties[section].type = number_nonempty;
334		break;
335	case 'p':
336		/* If there was a previous expression, throw it away. */
337		if (numbering_properties[section].type == number_regex)
338			regfree(&numbering_properties[section].expr);
339		else
340			numbering_properties[section].type = number_regex;
341
342		/* Compile/validate the supplied regular expression. */
343		if ((error = regcomp(&numbering_properties[section].expr,
344		    &argstr[1], REG_NEWLINE|REG_NOSUB)) != 0) {
345			(void)regerror(error,
346			    &numbering_properties[section].expr,
347			    errorbuf, sizeof(errorbuf));
348			errx(EXIT_FAILURE,
349			    "%s expr: %s -- %s",
350			    numbering_properties[section].name, errorbuf,
351			    &argstr[1]);
352		}
353		break;
354	default:
355		errx(EXIT_FAILURE,
356		    "illegal %s line numbering type -- %s",
357		    numbering_properties[section].name, argstr);
358	}
359}
360
361__dead void
362usage(void)
363{
364	(void)fprintf(stderr, "usage: %s [-p] [-b type] [-d delim] [-f type] "
365	    "[-h type] [-i incr] [-l num]\n\t[-n format] [-s sep] "
366	    "[-v startnum] [-w width] [file]\n", getprogname());
367	exit(EXIT_FAILURE);
368}
369