1/* nl -- number lines of files
2   Copyright (C) 1989, 1992, 1995-2010 Free Software Foundation, Inc.
3
4   This program is free software: you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation, either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
16
17/* Written by Scott Bartram (nancy!scott@uunet.uu.net)
18   Revised by David MacKenzie (djm@gnu.ai.mit.edu) */
19
20#include <config.h>
21
22#include <stdio.h>
23#include <sys/types.h>
24#include <getopt.h>
25
26#include "system.h"
27
28#include <regex.h>
29
30#include "error.h"
31#include "linebuffer.h"
32#include "quote.h"
33#include "xstrtol.h"
34
35/* The official name of this program (e.g., no `g' prefix).  */
36#define PROGRAM_NAME "nl"
37
38#define AUTHORS \
39  proper_name ("Scott Bartram"), \
40  proper_name ("David MacKenzie")
41
42/* Line-number formats.  They are given an int width, an intmax_t
43   value, and a string separator.  */
44
45/* Right justified, no leading zeroes.  */
46static char const FORMAT_RIGHT_NOLZ[] = "%*" PRIdMAX "%s";
47
48/* Right justified, leading zeroes.  */
49static char const FORMAT_RIGHT_LZ[] = "%0*" PRIdMAX "%s";
50
51/* Left justified, no leading zeroes.  */
52static char const FORMAT_LEFT[] = "%-*" PRIdMAX "%s";
53
54/* Default section delimiter characters.  */
55static char const DEFAULT_SECTION_DELIMITERS[] = "\\:";
56
57/* Types of input lines: either one of the section delimiters,
58   or text to output. */
59enum section
60{
61  Header, Body, Footer, Text
62};
63
64/* Format of body lines (-b).  */
65static char const *body_type = "t";
66
67/* Format of header lines (-h).  */
68static char const *header_type = "n";
69
70/* Format of footer lines (-f).  */
71static char const *footer_type = "n";
72
73/* Format currently being used (body, header, or footer).  */
74static char const *current_type;
75
76/* Regex for body lines to number (-bp).  */
77static struct re_pattern_buffer body_regex;
78
79/* Regex for header lines to number (-hp).  */
80static struct re_pattern_buffer header_regex;
81
82/* Regex for footer lines to number (-fp).  */
83static struct re_pattern_buffer footer_regex;
84
85/* Fastmaps for the above.  */
86static char body_fastmap[UCHAR_MAX + 1];
87static char header_fastmap[UCHAR_MAX + 1];
88static char footer_fastmap[UCHAR_MAX + 1];
89
90/* Pointer to current regex, if any.  */
91static struct re_pattern_buffer *current_regex = NULL;
92
93/* Separator string to print after line number (-s).  */
94static char const *separator_str = "\t";
95
96/* Input section delimiter string (-d).  */
97static char const *section_del = DEFAULT_SECTION_DELIMITERS;
98
99/* Header delimiter string.  */
100static char *header_del = NULL;
101
102/* Header section delimiter length.  */
103static size_t header_del_len;
104
105/* Body delimiter string.  */
106static char *body_del = NULL;
107
108/* Body section delimiter length.  */
109static size_t body_del_len;
110
111/* Footer delimiter string.  */
112static char *footer_del = NULL;
113
114/* Footer section delimiter length.  */
115static size_t footer_del_len;
116
117/* Input buffer.  */
118static struct linebuffer line_buf;
119
120/* printf format string for unnumbered lines.  */
121static char *print_no_line_fmt = NULL;
122
123/* Starting line number on each page (-v).  */
124static intmax_t starting_line_number = 1;
125
126/* Line number increment (-i).  */
127static intmax_t page_incr = 1;
128
129/* If true, reset line number at start of each page (-p).  */
130static bool reset_numbers = true;
131
132/* Number of blank lines to consider to be one line for numbering (-l).  */
133static intmax_t blank_join = 1;
134
135/* Width of line numbers (-w).  */
136static int lineno_width = 6;
137
138/* Line number format (-n).  */
139static char const *lineno_format = FORMAT_RIGHT_NOLZ;
140
141/* Current print line number.  */
142static intmax_t line_no;
143
144/* True if we have ever read standard input.  */
145static bool have_read_stdin;
146
147enum
148{
149  PAGE_INCREMENT_OPTION_DEPRECATED = CHAR_MAX + 1
150};
151
152static struct option const longopts[] =
153{
154  {"header-numbering", required_argument, NULL, 'h'},
155  {"body-numbering", required_argument, NULL, 'b'},
156  {"footer-numbering", required_argument, NULL, 'f'},
157  {"starting-line-number", required_argument, NULL, 'v'},
158  {"line-increment", required_argument, NULL, 'i'},
159  /* FIXME: page-increment is deprecated, remove in dec-2011.  */
160  {"page-increment", required_argument, NULL, PAGE_INCREMENT_OPTION_DEPRECATED},
161  {"no-renumber", no_argument, NULL, 'p'},
162  {"join-blank-lines", required_argument, NULL, 'l'},
163  {"number-separator", required_argument, NULL, 's'},
164  {"number-width", required_argument, NULL, 'w'},
165  {"number-format", required_argument, NULL, 'n'},
166  {"section-delimiter", required_argument, NULL, 'd'},
167  {GETOPT_HELP_OPTION_DECL},
168  {GETOPT_VERSION_OPTION_DECL},
169  {NULL, 0, NULL, 0}
170};
171
172/* Print a usage message and quit. */
173
174void
175usage (int status)
176{
177  if (status != EXIT_SUCCESS)
178    fprintf (stderr, _("Try `%s --help' for more information.\n"),
179             program_name);
180  else
181    {
182      printf (_("\
183Usage: %s [OPTION]... [FILE]...\n\
184"),
185              program_name);
186      fputs (_("\
187Write each FILE to standard output, with line numbers added.\n\
188With no FILE, or when FILE is -, read standard input.\n\
189\n\
190"), stdout);
191      fputs (_("\
192Mandatory arguments to long options are mandatory for short options too.\n\
193"), stdout);
194      fputs (_("\
195  -b, --body-numbering=STYLE      use STYLE for numbering body lines\n\
196  -d, --section-delimiter=CC      use CC for separating logical pages\n\
197  -f, --footer-numbering=STYLE    use STYLE for numbering footer lines\n\
198"), stdout);
199      fputs (_("\
200  -h, --header-numbering=STYLE    use STYLE for numbering header lines\n\
201  -i, --line-increment=NUMBER     line number increment at each line\n\
202  -l, --join-blank-lines=NUMBER   group of NUMBER empty lines counted as one\n\
203  -n, --number-format=FORMAT      insert line numbers according to FORMAT\n\
204  -p, --no-renumber               do not reset line numbers at logical pages\n\
205  -s, --number-separator=STRING   add STRING after (possible) line number\n\
206"), stdout);
207      fputs (_("\
208  -v, --starting-line-number=NUMBER  first line number on each logical page\n\
209  -w, --number-width=NUMBER       use NUMBER columns for line numbers\n\
210"), stdout);
211      fputs (HELP_OPTION_DESCRIPTION, stdout);
212      fputs (VERSION_OPTION_DESCRIPTION, stdout);
213      fputs (_("\
214\n\
215By default, selects -v1 -i1 -l1 -sTAB -w6 -nrn -hn -bt -fn.  CC are\n\
216two delimiter characters for separating logical pages, a missing\n\
217second character implies :.  Type \\\\ for \\.  STYLE is one of:\n\
218"), stdout);
219      fputs (_("\
220\n\
221  a         number all lines\n\
222  t         number only nonempty lines\n\
223  n         number no lines\n\
224  pBRE      number only lines that contain a match for the basic regular\n\
225            expression, BRE\n\
226\n\
227FORMAT is one of:\n\
228\n\
229  ln   left justified, no leading zeros\n\
230  rn   right justified, no leading zeros\n\
231  rz   right justified, leading zeros\n\
232\n\
233"), stdout);
234      emit_ancillary_info ();
235    }
236  exit (status);
237}
238
239/* Set the command line flag TYPEP and possibly the regex pointer REGEXP,
240   according to `optarg'.  */
241
242static bool
243build_type_arg (char const **typep,
244                struct re_pattern_buffer *regexp, char *fastmap)
245{
246  char const *errmsg;
247  bool rval = true;
248
249  switch (*optarg)
250    {
251    case 'a':
252    case 't':
253    case 'n':
254      *typep = optarg;
255      break;
256    case 'p':
257      *typep = optarg++;
258      regexp->buffer = NULL;
259      regexp->allocated = 0;
260      regexp->fastmap = fastmap;
261      regexp->translate = NULL;
262      re_syntax_options =
263        RE_SYNTAX_POSIX_BASIC & ~RE_CONTEXT_INVALID_DUP & ~RE_NO_EMPTY_RANGES;
264      errmsg = re_compile_pattern (optarg, strlen (optarg), regexp);
265      if (errmsg)
266        error (EXIT_FAILURE, 0, "%s", errmsg);
267      break;
268    default:
269      rval = false;
270      break;
271    }
272  return rval;
273}
274
275/* Print the line number and separator; increment the line number. */
276
277static void
278print_lineno (void)
279{
280  intmax_t next_line_no;
281
282  printf (lineno_format, lineno_width, line_no, separator_str);
283
284  next_line_no = line_no + page_incr;
285  if (next_line_no < line_no)
286    error (EXIT_FAILURE, 0, _("line number overflow"));
287  line_no = next_line_no;
288}
289
290/* Switch to a header section. */
291
292static void
293proc_header (void)
294{
295  current_type = header_type;
296  current_regex = &header_regex;
297  if (reset_numbers)
298    line_no = starting_line_number;
299  putchar ('\n');
300}
301
302/* Switch to a body section. */
303
304static void
305proc_body (void)
306{
307  current_type = body_type;
308  current_regex = &body_regex;
309  putchar ('\n');
310}
311
312/* Switch to a footer section. */
313
314static void
315proc_footer (void)
316{
317  current_type = footer_type;
318  current_regex = &footer_regex;
319  putchar ('\n');
320}
321
322/* Process a regular text line in `line_buf'. */
323
324static void
325proc_text (void)
326{
327  static intmax_t blank_lines = 0;	/* Consecutive blank lines so far. */
328
329  switch (*current_type)
330    {
331    case 'a':
332      if (blank_join > 1)
333        {
334          if (1 < line_buf.length || ++blank_lines == blank_join)
335            {
336              print_lineno ();
337              blank_lines = 0;
338            }
339          else
340            fputs (print_no_line_fmt, stdout);
341        }
342      else
343        print_lineno ();
344      break;
345    case 't':
346      if (1 < line_buf.length)
347        print_lineno ();
348      else
349        fputs (print_no_line_fmt, stdout);
350      break;
351    case 'n':
352      fputs (print_no_line_fmt, stdout);
353      break;
354    case 'p':
355      switch (re_search (current_regex, line_buf.buffer, line_buf.length - 1,
356                         0, line_buf.length - 1, NULL))
357        {
358        case -2:
359          error (EXIT_FAILURE, errno, _("error in regular expression search"));
360
361        case -1:
362          fputs (print_no_line_fmt, stdout);
363          break;
364
365        default:
366          print_lineno ();
367          break;
368        }
369    }
370  fwrite (line_buf.buffer, sizeof (char), line_buf.length, stdout);
371}
372
373/* Return the type of line in `line_buf'. */
374
375static enum section
376check_section (void)
377{
378  size_t len = line_buf.length - 1;
379
380  if (len < 2 || memcmp (line_buf.buffer, section_del, 2))
381    return Text;
382  if (len == header_del_len
383      && !memcmp (line_buf.buffer, header_del, header_del_len))
384    return Header;
385  if (len == body_del_len
386      && !memcmp (line_buf.buffer, body_del, body_del_len))
387    return Body;
388  if (len == footer_del_len
389      && !memcmp (line_buf.buffer, footer_del, footer_del_len))
390    return Footer;
391  return Text;
392}
393
394/* Read and process the file pointed to by FP. */
395
396static void
397process_file (FILE *fp)
398{
399  while (readlinebuffer (&line_buf, fp))
400    {
401      switch (check_section ())
402        {
403        case Header:
404          proc_header ();
405          break;
406        case Body:
407          proc_body ();
408          break;
409        case Footer:
410          proc_footer ();
411          break;
412        case Text:
413          proc_text ();
414          break;
415        }
416    }
417}
418
419/* Process file FILE to standard output.
420   Return true if successful.  */
421
422static bool
423nl_file (char const *file)
424{
425  FILE *stream;
426
427  if (STREQ (file, "-"))
428    {
429      have_read_stdin = true;
430      stream = stdin;
431    }
432  else
433    {
434      stream = fopen (file, "r");
435      if (stream == NULL)
436        {
437          error (0, errno, "%s", file);
438          return false;
439        }
440    }
441
442  process_file (stream);
443
444  if (ferror (stream))
445    {
446      error (0, errno, "%s", file);
447      return false;
448    }
449  if (STREQ (file, "-"))
450    clearerr (stream);		/* Also clear EOF. */
451  else if (fclose (stream) == EOF)
452    {
453      error (0, errno, "%s", file);
454      return false;
455    }
456  return true;
457}
458
459int
460main (int argc, char **argv)
461{
462  int c;
463  size_t len;
464  bool ok = true;
465
466  initialize_main (&argc, &argv);
467  set_program_name (argv[0]);
468  setlocale (LC_ALL, "");
469  bindtextdomain (PACKAGE, LOCALEDIR);
470  textdomain (PACKAGE);
471
472  atexit (close_stdout);
473
474  have_read_stdin = false;
475
476  while ((c = getopt_long (argc, argv, "h:b:f:v:i:pl:s:w:n:d:", longopts,
477                           NULL)) != -1)
478    {
479      switch (c)
480        {
481        case 'h':
482          if (! build_type_arg (&header_type, &header_regex, header_fastmap))
483            {
484              error (0, 0, _("invalid header numbering style: %s"),
485                     quote (optarg));
486              ok = false;
487            }
488          break;
489        case 'b':
490          if (! build_type_arg (&body_type, &body_regex, body_fastmap))
491            {
492              error (0, 0, _("invalid body numbering style: %s"),
493                     quote (optarg));
494              ok = false;
495            }
496          break;
497        case 'f':
498          if (! build_type_arg (&footer_type, &footer_regex, footer_fastmap))
499            {
500              error (0, 0, _("invalid footer numbering style: %s"),
501                     quote (optarg));
502              ok = false;
503            }
504          break;
505        case 'v':
506          if (xstrtoimax (optarg, NULL, 10, &starting_line_number, "")
507              != LONGINT_OK)
508            {
509              error (0, 0, _("invalid starting line number: %s"),
510                     quote (optarg));
511              ok = false;
512            }
513          break;
514  case PAGE_INCREMENT_OPTION_DEPRECATED:
515    error (0, 0, _("WARNING: --page-increment is deprecated; "
516                   "use --line-increment instead"));
517    /* fall through */
518        case 'i':
519          if (! (xstrtoimax (optarg, NULL, 10, &page_incr, "") == LONGINT_OK
520                 && 0 < page_incr))
521            {
522              error (0, 0, _("invalid line number increment: %s"),
523                     quote (optarg));
524              ok = false;
525            }
526          break;
527        case 'p':
528          reset_numbers = false;
529          break;
530        case 'l':
531          if (! (xstrtoimax (optarg, NULL, 10, &blank_join, "") == LONGINT_OK
532                 && 0 < blank_join))
533            {
534              error (0, 0, _("invalid number of blank lines: %s"),
535                     quote (optarg));
536              ok = false;
537            }
538          break;
539        case 's':
540          separator_str = optarg;
541          break;
542        case 'w':
543          {
544            long int tmp_long;
545            if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
546                || tmp_long <= 0 || tmp_long > INT_MAX)
547              {
548                error (0, 0, _("invalid line number field width: %s"),
549                       quote (optarg));
550                ok = false;
551              }
552            else
553              {
554                lineno_width = tmp_long;
555              }
556          }
557          break;
558        case 'n':
559          if (STREQ (optarg, "ln"))
560            lineno_format = FORMAT_LEFT;
561          else if (STREQ (optarg, "rn"))
562            lineno_format = FORMAT_RIGHT_NOLZ;
563          else if (STREQ (optarg, "rz"))
564            lineno_format = FORMAT_RIGHT_LZ;
565          else
566            {
567              error (0, 0, _("invalid line numbering format: %s"),
568                     quote (optarg));
569              ok = false;
570            }
571          break;
572        case 'd':
573          section_del = optarg;
574          break;
575        case_GETOPT_HELP_CHAR;
576        case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
577        default:
578          ok = false;
579          break;
580        }
581    }
582
583  if (!ok)
584    usage (EXIT_FAILURE);
585
586  /* Initialize the section delimiters.  */
587  len = strlen (section_del);
588
589  header_del_len = len * 3;
590  header_del = xmalloc (header_del_len + 1);
591  stpcpy (stpcpy (stpcpy (header_del, section_del), section_del), section_del);
592
593  body_del_len = len * 2;
594  body_del = xmalloc (body_del_len + 1);
595  stpcpy (stpcpy (body_del, section_del), section_del);
596
597  footer_del_len = len;
598  footer_del = xmalloc (footer_del_len + 1);
599  stpcpy (footer_del, section_del);
600
601  /* Initialize the input buffer.  */
602  initbuffer (&line_buf);
603
604  /* Initialize the printf format for unnumbered lines. */
605  len = strlen (separator_str);
606  print_no_line_fmt = xmalloc (lineno_width + len + 1);
607  memset (print_no_line_fmt, ' ', lineno_width + len);
608  print_no_line_fmt[lineno_width + len] = '\0';
609
610  line_no = starting_line_number;
611  current_type = body_type;
612  current_regex = &body_regex;
613
614  /* Main processing. */
615
616  if (optind == argc)
617    ok = nl_file ("-");
618  else
619    for (; optind < argc; optind++)
620      ok &= nl_file (argv[optind]);
621
622  if (have_read_stdin && fclose (stdin) == EOF)
623    error (EXIT_FAILURE, errno, "-");
624
625  exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
626}
627