1/* od -- dump files in octal and other formats
2   Copyright (C) 92, 1995-2004 Free Software Foundation, Inc.
3
4   This program is free software; you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation; either version 2, or (at your option)
7   any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program; if not, write to the Free Software Foundation,
16   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
17
18/* Written by Jim Meyering.  */
19
20/* Busyboxed by Denis Vlasenko
21
22Based on od.c from coreutils-5.2.1
23Top bloat sources:
24
2500000073 t parse_old_offset
260000007b t get_lcm
2700000090 r long_options
2800000092 t print_named_ascii
29000000bf t print_ascii
3000000168 t write_block
3100000366 t decode_format_string
3200000a71 T od_main
33
34Tested for compat with coreutils 6.3
35using this script. Minor differences fixed.
36
37#!/bin/sh
38echo STD
39time /path/to/coreutils/od \
40...params... \
41>std
42echo Exit code $?
43echo BBOX
44time ./busybox od \
45...params... \
46>bbox
47echo Exit code $?
48diff -u -a std bbox >bbox.diff || { echo Different!; sleep 1; }
49
50*/
51
52#include "libbb.h"
53#include <getopt.h>
54
55#define assert(a) ((void)0)
56
57/* Check for 0x7f is a coreutils 6.3 addition */
58#define ISPRINT(c) (((c)>=' ') && (c) != 0x7f)
59
60typedef long double longdouble_t;
61typedef unsigned long long ulonglong_t;
62typedef long long llong;
63
64#if ENABLE_LFS
65# define xstrtooff_sfx xstrtoull_sfx
66#else
67# define xstrtooff_sfx xstrtoul_sfx
68#endif
69
70/* The default number of input bytes per output line.  */
71#define DEFAULT_BYTES_PER_BLOCK 16
72
73/* The number of decimal digits of precision in a float.  */
74#ifndef FLT_DIG
75# define FLT_DIG 7
76#endif
77
78/* The number of decimal digits of precision in a double.  */
79#ifndef DBL_DIG
80# define DBL_DIG 15
81#endif
82
83/* The number of decimal digits of precision in a long double.  */
84#ifndef LDBL_DIG
85# define LDBL_DIG DBL_DIG
86#endif
87
88enum size_spec {
89	NO_SIZE,
90	CHAR,
91	SHORT,
92	INT,
93	LONG,
94	LONG_LONG,
95	FLOAT_SINGLE,
96	FLOAT_DOUBLE,
97	FLOAT_LONG_DOUBLE,
98	N_SIZE_SPECS
99};
100
101enum output_format {
102	SIGNED_DECIMAL,
103	UNSIGNED_DECIMAL,
104	OCTAL,
105	HEXADECIMAL,
106	FLOATING_POINT,
107	NAMED_CHARACTER,
108	CHARACTER
109};
110
111/* Each output format specification (from '-t spec' or from
112   old-style options) is represented by one of these structures.  */
113struct tspec {
114	enum output_format fmt;
115	enum size_spec size;
116	void (*print_function) (size_t, const char *, const char *);
117	char *fmt_string;
118	int hexl_mode_trailer;
119	int field_width;
120};
121
122/* Convert the number of 8-bit bytes of a binary representation to
123   the number of characters (digits + sign if the type is signed)
124   required to represent the same quantity in the specified base/type.
125   For example, a 32-bit (4-byte) quantity may require a field width
126   as wide as the following for these types:
127   11	unsigned octal
128   11	signed decimal
129   10	unsigned decimal
130   8	unsigned hexadecimal  */
131
132static const uint8_t bytes_to_oct_digits[] ALIGN1 =
133{0, 3, 6, 8, 11, 14, 16, 19, 22, 25, 27, 30, 32, 35, 38, 41, 43};
134
135static const uint8_t bytes_to_signed_dec_digits[] ALIGN1 =
136{1, 4, 6, 8, 11, 13, 16, 18, 20, 23, 25, 28, 30, 33, 35, 37, 40};
137
138static const uint8_t bytes_to_unsigned_dec_digits[] ALIGN1 =
139{0, 3, 5, 8, 10, 13, 15, 17, 20, 22, 25, 27, 29, 32, 34, 37, 39};
140
141static const uint8_t bytes_to_hex_digits[] ALIGN1 =
142{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32};
143
144/* Convert enum size_spec to the size of the named type.  */
145static const signed char width_bytes[] ALIGN1 = {
146	-1,
147	sizeof(char),
148	sizeof(short),
149	sizeof(int),
150	sizeof(long),
151	sizeof(ulonglong_t),
152	sizeof(float),
153	sizeof(double),
154	sizeof(longdouble_t)
155};
156/* Ensure that for each member of 'enum size_spec' there is an
157   initializer in the width_bytes array.  */
158struct ERR_width_bytes_has_bad_size {
159	char ERR_width_bytes_has_bad_size[ARRAY_SIZE(width_bytes) == N_SIZE_SPECS ? 1 : -1];
160};
161
162static smallint flag_dump_strings;
163/* Non-zero if an old-style 'pseudo-address' was specified.  */
164static smallint flag_pseudo_start;
165static smallint limit_bytes_to_format;
166/* When zero and two or more consecutive blocks are equal, format
167   only the first block and output an asterisk alone on the following
168   line to indicate that identical blocks have been elided.  */
169static smallint verbose;
170static smallint ioerror;
171
172static size_t string_min;
173
174/* An array of specs describing how to format each input block.  */
175static size_t n_specs;
176static struct tspec *spec;
177
178/* Function that accepts an address and an optional following char,
179   and prints the address and char to stdout.  */
180static void (*format_address)(off_t, char);
181/* The difference between the old-style pseudo starting address and
182   the number of bytes to skip.  */
183static off_t pseudo_offset;
184/* The number of input bytes to skip before formatting and writing.  */
185static off_t n_bytes_to_skip;
186/* When zero, MAX_BYTES_TO_FORMAT and END_OFFSET are ignored, and all
187   input is formatted.  */
188/* The maximum number of bytes that will be formatted.  */
189static off_t max_bytes_to_format;
190/* The offset of the first byte after the last byte to be formatted.  */
191static off_t end_offset;
192
193/* The number of input bytes formatted per output line.  It must be
194   a multiple of the least common multiple of the sizes associated with
195   the specified output types.  It should be as large as possible, but
196   no larger than 16 -- unless specified with the -w option.  */
197static size_t bytes_per_block;
198
199/* Human-readable representation of *file_list (for error messages).
200   It differs from *file_list only when *file_list is "-".  */
201static char const *input_filename;
202
203/* A NULL-terminated list of the file-arguments from the command line.  */
204static char const *const *file_list;
205
206/* Initializer for file_list if no file-arguments
207   were specified on the command line.  */
208static char const *const default_file_list[] = { "-", NULL };
209
210/* The input stream associated with the current file.  */
211static FILE *in_stream;
212
213#define MAX_INTEGRAL_TYPE_SIZE sizeof(ulonglong_t)
214static unsigned char integral_type_size[MAX_INTEGRAL_TYPE_SIZE + 1] ALIGN1 = {
215	[sizeof(char)] = CHAR,
216#if USHRT_MAX != UCHAR_MAX
217	[sizeof(short)] = SHORT,
218#endif
219#if UINT_MAX != USHRT_MAX
220	[sizeof(int)] = INT,
221#endif
222#if ULONG_MAX != UINT_MAX
223	[sizeof(long)] = LONG,
224#endif
225#if ULLONG_MAX != ULONG_MAX
226	[sizeof(ulonglong_t)] = LONG_LONG,
227#endif
228};
229
230#define MAX_FP_TYPE_SIZE sizeof(longdouble_t)
231static unsigned char fp_type_size[MAX_FP_TYPE_SIZE + 1] ALIGN1 = {
232	/* gcc seems to allow repeated indexes. Last one stays */
233	[sizeof(longdouble_t)] = FLOAT_LONG_DOUBLE,
234	[sizeof(double)] = FLOAT_DOUBLE,
235	[sizeof(float)] = FLOAT_SINGLE
236};
237
238
239static unsigned
240gcd(unsigned u, unsigned v)
241{
242	unsigned t;
243	while (v != 0) {
244		t = u % v;
245		u = v;
246		v = t;
247	}
248	return u;
249}
250
251/* Compute the least common multiple of U and V.  */
252static unsigned
253lcm(unsigned u, unsigned v) {
254	unsigned t = gcd(u, v);
255	if (t == 0)
256		return 0;
257	return u * v / t;
258}
259
260static void
261print_s_char(size_t n_bytes, const char *block, const char *fmt_string)
262{
263	while (n_bytes--) {
264		int tmp = *(signed char *) block;
265		printf(fmt_string, tmp);
266		block += sizeof(unsigned char);
267	}
268}
269
270static void
271print_char(size_t n_bytes, const char *block, const char *fmt_string)
272{
273	while (n_bytes--) {
274		unsigned tmp = *(unsigned char *) block;
275		printf(fmt_string, tmp);
276		block += sizeof(unsigned char);
277	}
278}
279
280static void
281print_s_short(size_t n_bytes, const char *block, const char *fmt_string)
282{
283	n_bytes /= sizeof(signed short);
284	while (n_bytes--) {
285		int tmp = *(signed short *) block;
286		printf(fmt_string, tmp);
287		block += sizeof(unsigned short);
288	}
289}
290
291static void
292print_short(size_t n_bytes, const char *block, const char *fmt_string)
293{
294	n_bytes /= sizeof(unsigned short);
295	while (n_bytes--) {
296		unsigned tmp = *(unsigned short *) block;
297		printf(fmt_string, tmp);
298		block += sizeof(unsigned short);
299	}
300}
301
302static void
303print_int(size_t n_bytes, const char *block, const char *fmt_string)
304{
305	n_bytes /= sizeof(unsigned);
306	while (n_bytes--) {
307		unsigned tmp = *(unsigned *) block;
308		printf(fmt_string, tmp);
309		block += sizeof(unsigned);
310	}
311}
312
313#if UINT_MAX == ULONG_MAX
314# define print_long print_int
315#else
316static void
317print_long(size_t n_bytes, const char *block, const char *fmt_string)
318{
319	n_bytes /= sizeof(unsigned long);
320	while (n_bytes--) {
321		unsigned long tmp = *(unsigned long *) block;
322		printf(fmt_string, tmp);
323		block += sizeof(unsigned long);
324	}
325}
326#endif
327
328#if ULONG_MAX == ULLONG_MAX
329# define print_long_long print_long
330#else
331static void
332print_long_long(size_t n_bytes, const char *block, const char *fmt_string)
333{
334	n_bytes /= sizeof(ulonglong_t);
335	while (n_bytes--) {
336		ulonglong_t tmp = *(ulonglong_t *) block;
337		printf(fmt_string, tmp);
338		block += sizeof(ulonglong_t);
339	}
340}
341#endif
342
343static void
344print_float(size_t n_bytes, const char *block, const char *fmt_string)
345{
346	n_bytes /= sizeof(float);
347	while (n_bytes--) {
348		float tmp = *(float *) block;
349		printf(fmt_string, tmp);
350		block += sizeof(float);
351	}
352}
353
354static void
355print_double(size_t n_bytes, const char *block, const char *fmt_string)
356{
357	n_bytes /= sizeof(double);
358	while (n_bytes--) {
359		double tmp = *(double *) block;
360		printf(fmt_string, tmp);
361		block += sizeof(double);
362	}
363}
364
365static void
366print_long_double(size_t n_bytes, const char *block, const char *fmt_string)
367{
368	n_bytes /= sizeof(longdouble_t);
369	while (n_bytes--) {
370		longdouble_t tmp = *(longdouble_t *) block;
371		printf(fmt_string, tmp);
372		block += sizeof(longdouble_t);
373	}
374}
375
376/* print_[named]_ascii are optimized for speed.
377 * Remember, someday you may want to pump gigabytes thru this thing.
378 * Saving a dozen of .text bytes here is counter-productive */
379
380static void
381print_named_ascii(size_t n_bytes, const char *block,
382		const char *unused_fmt_string ATTRIBUTE_UNUSED)
383{
384	/* Names for some non-printing characters.  */
385	static const char charname[33][3] ALIGN1 = {
386		"nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
387		" bs", " ht", " nl", " vt", " ff", " cr", " so", " si",
388		"dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",
389		"can", " em", "sub", "esc", " fs", " gs", " rs", " us",
390		" sp"
391	};
392	// buf[N] pos:  01234 56789
393	char buf[12] = "   x\0 0xx\0";
394	// [12] because we take three 32bit stack slots anyway, and
395	// gcc is too dumb to initialize with constant stores,
396	// it copies initializer from rodata. Oh well.
397
398	while (n_bytes--) {
399		unsigned masked_c = *(unsigned char *) block++;
400
401		masked_c &= 0x7f;
402		if (masked_c == 0x7f) {
403			fputs(" del", stdout);
404			continue;
405		}
406		if (masked_c > ' ') {
407			buf[3] = masked_c;
408			fputs(buf, stdout);
409			continue;
410		}
411		/* Why? Because printf(" %3.3s") is much slower... */
412		buf[6] = charname[masked_c][0];
413		buf[7] = charname[masked_c][1];
414		buf[8] = charname[masked_c][2];
415		fputs(buf+5, stdout);
416	}
417}
418
419static void
420print_ascii(size_t n_bytes, const char *block,
421		const char *unused_fmt_string ATTRIBUTE_UNUSED)
422{
423	// buf[N] pos:  01234 56789
424	char buf[12] = "   x\0 0xx\0";
425
426	while (n_bytes--) {
427		const char *s;
428		unsigned c = *(unsigned char *) block++;
429
430		if (ISPRINT(c)) {
431			buf[3] = c;
432			fputs(buf, stdout);
433			continue;
434		}
435		switch (c) {
436		case '\0':
437			s = "  \\0";
438			break;
439		case '\007':
440			s = "  \\a";
441			break;
442		case '\b':
443			s = "  \\b";
444			break;
445		case '\f':
446			s = "  \\f";
447			break;
448		case '\n':
449			s = "  \\n";
450			break;
451		case '\r':
452			s = "  \\r";
453			break;
454		case '\t':
455			s = "  \\t";
456			break;
457		case '\v':
458			s = "  \\v";
459			break;
460		case '\x7f':
461			s = " 177";
462			break;
463		default: /* c is never larger than 040 */
464			buf[7] = (c >> 3) + '0';
465			buf[8] = (c & 7) + '0';
466			s = buf + 5;
467		}
468		fputs(s, stdout);
469	}
470}
471
472/* Given a list of one or more input filenames FILE_LIST, set the global
473   file pointer IN_STREAM and the global string INPUT_FILENAME to the
474   first one that can be successfully opened. Modify FILE_LIST to
475   reference the next filename in the list.  A file name of "-" is
476   interpreted as standard input.  If any file open fails, give an error
477   message and return nonzero.  */
478
479static void
480open_next_file(void)
481{
482	while (1) {
483		input_filename = *file_list;
484		if (!input_filename)
485			return;
486		file_list++;
487		in_stream = fopen_or_warn_stdin(input_filename);
488		if (in_stream) {
489			if (in_stream == stdin)
490				input_filename = bb_msg_standard_input;
491			break;
492		}
493		ioerror = 1;
494	}
495
496	if (limit_bytes_to_format && !flag_dump_strings)
497		setbuf(in_stream, NULL);
498}
499
500/* Test whether there have been errors on in_stream, and close it if
501   it is not standard input.  Return nonzero if there has been an error
502   on in_stream or stdout; return zero otherwise.  This function will
503   report more than one error only if both a read and a write error
504   have occurred.  IN_ERRNO, if nonzero, is the error number
505   corresponding to the most recent action for IN_STREAM.  */
506
507static void
508check_and_close(void)
509{
510	if (in_stream) {
511		if (ferror(in_stream))	{
512			bb_error_msg("%s: read error", input_filename);
513			ioerror = 1;
514		}
515		fclose_if_not_stdin(in_stream);
516		in_stream = NULL;
517	}
518
519	if (ferror(stdout)) {
520		bb_error_msg("write error");
521		ioerror = 1;
522	}
523}
524
525/* If S points to a single valid modern od format string, put
526   a description of that format in *TSPEC, make *NEXT point at the
527   character following the just-decoded format (if *NEXT is non-NULL),
528   and return zero.  For example, if S were "d4afL"
529   *NEXT would be set to "afL" and *TSPEC would be
530	{
531		fmt = SIGNED_DECIMAL;
532		size = INT or LONG; (whichever integral_type_size[4] resolves to)
533		print_function = print_int; (assuming size == INT)
534		fmt_string = "%011d%c";
535	}
536   S_ORIG is solely for reporting errors.  It should be the full format
537   string argument. */
538
539static void
540decode_one_format(const char *s_orig, const char *s, const char **next,
541					   struct tspec *tspec)
542{
543	enum size_spec size_spec;
544	unsigned size;
545	enum output_format fmt;
546	const char *p;
547	char *end;
548	char *fmt_string = NULL;
549	void (*print_function) (size_t, const char *, const char *);
550	unsigned c;
551	unsigned field_width = 0;
552	int pos;
553
554	assert(tspec != NULL);
555
556	switch (*s) {
557	case 'd':
558	case 'o':
559	case 'u':
560	case 'x': {
561		static const char CSIL[] ALIGN1 = "CSIL";
562
563		c = *s++;
564		p = strchr(CSIL, *s);
565		if (!p) {
566			size = sizeof(int);
567			if (isdigit(s[0])) {
568				size = bb_strtou(s, &end, 0);
569				if (errno == ERANGE
570				 || MAX_INTEGRAL_TYPE_SIZE < size
571				 || integral_type_size[size] == NO_SIZE
572				) {
573					bb_error_msg_and_die("invalid type string '%s'; "
574						"%u-byte %s type is not supported",
575						s_orig, size, "integral");
576				}
577				s = end;
578			}
579		} else {
580			static const uint8_t CSIL_sizeof[] = {
581				sizeof(char),
582				sizeof(short),
583				sizeof(int),
584				sizeof(long),
585			};
586			size = CSIL_sizeof[p - CSIL];
587		}
588
589#define ISPEC_TO_FORMAT(Spec, Min_format, Long_format, Max_format) \
590	((Spec) == LONG_LONG ? (Max_format) \
591	: ((Spec) == LONG ? (Long_format) : (Min_format)))
592
593#define FMT_BYTES_ALLOCATED 9
594		size_spec = integral_type_size[size];
595
596		{
597			static const char doux[] ALIGN1 = "doux";
598			static const char doux_fmt_letter[][4] = {
599				"lld", "llo", "llu", "llx"
600			};
601			static const enum output_format doux_fmt[] = {
602				SIGNED_DECIMAL,
603				OCTAL,
604				UNSIGNED_DECIMAL,
605				HEXADECIMAL,
606			};
607			static const uint8_t *const doux_bytes_to_XXX[] = {
608				bytes_to_signed_dec_digits,
609				bytes_to_oct_digits,
610				bytes_to_unsigned_dec_digits,
611				bytes_to_hex_digits,
612			};
613			static const char doux_fmtstring[][sizeof(" %%0%u%s")] = {
614				" %%%u%s",
615				" %%0%u%s",
616				" %%%u%s",
617				" %%0%u%s",
618			};
619
620			pos = strchr(doux, c) - doux;
621			fmt = doux_fmt[pos];
622			field_width = doux_bytes_to_XXX[pos][size];
623			p = doux_fmt_letter[pos] + 2;
624			if (size_spec == LONG) p--;
625			if (size_spec == LONG_LONG) p -= 2;
626			fmt_string = xasprintf(doux_fmtstring[pos], field_width, p);
627		}
628
629		switch (size_spec) {
630		case CHAR:
631			print_function = (fmt == SIGNED_DECIMAL
632				    ? print_s_char
633				    : print_char);
634			break;
635		case SHORT:
636			print_function = (fmt == SIGNED_DECIMAL
637				    ? print_s_short
638				    : print_short);
639			break;
640		case INT:
641			print_function = print_int;
642			break;
643		case LONG:
644			print_function = print_long;
645			break;
646		default: /* case LONG_LONG: */
647			print_function = print_long_long;
648			break;
649		}
650		break;
651	}
652
653	case 'f': {
654		static const char FDL[] ALIGN1 = "FDL";
655
656		fmt = FLOATING_POINT;
657		++s;
658		p = strchr(FDL, *s);
659		if (!p) {
660			size = sizeof(double);
661			if (isdigit(s[0])) {
662				size = bb_strtou(s, &end, 0);
663				if (errno == ERANGE || size > MAX_FP_TYPE_SIZE
664				 || fp_type_size[size] == NO_SIZE
665				) {
666					bb_error_msg_and_die("invalid type string '%s'; "
667						"%u-byte %s type is not supported",
668						s_orig, size, "floating point");
669				}
670				s = end;
671			}
672		} else {
673			static const uint8_t FDL_sizeof[] = {
674				sizeof(float),
675				sizeof(double),
676				sizeof(longdouble_t),
677			};
678
679			size = FDL_sizeof[p - FDL];
680		}
681
682		size_spec = fp_type_size[size];
683
684		switch (size_spec) {
685		case FLOAT_SINGLE:
686			print_function = print_float;
687			field_width = FLT_DIG + 8;
688			/* Don't use %#e; not all systems support it.  */
689			fmt_string = xasprintf(" %%%d.%de", field_width, FLT_DIG);
690			break;
691		case FLOAT_DOUBLE:
692			print_function = print_double;
693			field_width = DBL_DIG + 8;
694			fmt_string = xasprintf(" %%%d.%de", field_width, DBL_DIG);
695			break;
696		default: /* case FLOAT_LONG_DOUBLE: */
697			print_function = print_long_double;
698			field_width = LDBL_DIG + 8;
699			fmt_string = xasprintf(" %%%d.%dLe", field_width, LDBL_DIG);
700			break;
701		}
702		break;
703	}
704
705	case 'a':
706		++s;
707		fmt = NAMED_CHARACTER;
708		size_spec = CHAR;
709		print_function = print_named_ascii;
710		field_width = 3;
711		break;
712	case 'c':
713		++s;
714		fmt = CHARACTER;
715		size_spec = CHAR;
716		print_function = print_ascii;
717		field_width = 3;
718		break;
719	default:
720		bb_error_msg_and_die("invalid character '%c' "
721				"in type string '%s'", *s, s_orig);
722	}
723
724	tspec->size = size_spec;
725	tspec->fmt = fmt;
726	tspec->print_function = print_function;
727	tspec->fmt_string = fmt_string;
728
729	tspec->field_width = field_width;
730	tspec->hexl_mode_trailer = (*s == 'z');
731	if (tspec->hexl_mode_trailer)
732		s++;
733
734	if (next != NULL)
735		*next = s;
736}
737
738/* Decode the modern od format string S.  Append the decoded
739   representation to the global array SPEC, reallocating SPEC if
740   necessary.  Return zero if S is valid, nonzero otherwise.  */
741
742static void
743decode_format_string(const char *s)
744{
745	const char *s_orig = s;
746
747	while (*s != '\0') {
748		struct tspec tspec;
749		const char *next;
750
751		decode_one_format(s_orig, s, &next, &tspec);
752
753		assert(s != next);
754		s = next;
755		n_specs++;
756		spec = xrealloc(spec, n_specs * sizeof(*spec));
757		memcpy(&spec[n_specs-1], &tspec, sizeof *spec);
758	}
759}
760
761/* Given a list of one or more input filenames FILE_LIST, set the global
762   file pointer IN_STREAM to position N_SKIP in the concatenation of
763   those files.  If any file operation fails or if there are fewer than
764   N_SKIP bytes in the combined input, give an error message and return
765   nonzero.  When possible, use seek rather than read operations to
766   advance IN_STREAM.  */
767
768static void
769skip(off_t n_skip)
770{
771	if (n_skip == 0)
772		return;
773
774	while (in_stream) { /* !EOF */
775		struct stat file_stats;
776
777		/* First try seeking.  For large offsets, this extra work is
778		   worthwhile.  If the offset is below some threshold it may be
779		   more efficient to move the pointer by reading.  There are two
780		   issues when trying to seek:
781			- the file must be seekable.
782			- before seeking to the specified position, make sure
783			  that the new position is in the current file.
784			  Try to do that by getting file's size using fstat.
785			  But that will work only for regular files.  */
786
787			/* The st_size field is valid only for regular files
788			   (and for symbolic links, which cannot occur here).
789			   If the number of bytes left to skip is at least
790			   as large as the size of the current file, we can
791			   decrement n_skip and go on to the next file.  */
792		if (fstat(fileno(in_stream), &file_stats) == 0
793		 && S_ISREG(file_stats.st_mode) && file_stats.st_size >= 0
794		) {
795			if (file_stats.st_size < n_skip) {
796				n_skip -= file_stats.st_size;
797				/* take check&close / open_next route */
798			} else {
799				if (fseeko(in_stream, n_skip, SEEK_CUR) != 0)
800					ioerror = 1;
801				return;
802			}
803		} else {
804			/* If it's not a regular file with nonnegative size,
805			   position the file pointer by reading.  */
806			char buf[BUFSIZ];
807			size_t n_bytes_read, n_bytes_to_read = BUFSIZ;
808
809			while (n_skip > 0) {
810				if (n_skip < n_bytes_to_read)
811					n_bytes_to_read = n_skip;
812				n_bytes_read = fread(buf, 1, n_bytes_to_read, in_stream);
813				n_skip -= n_bytes_read;
814				if (n_bytes_read != n_bytes_to_read)
815					break; /* EOF on this file or error */
816			}
817		}
818		if (n_skip == 0)
819			return;
820
821		check_and_close();
822		open_next_file();
823	}
824
825	if (n_skip)
826		bb_error_msg_and_die("cannot skip past end of combined input");
827}
828
829
830typedef void FN_format_address(off_t address, char c);
831
832static void
833format_address_none(off_t address ATTRIBUTE_UNUSED, char c ATTRIBUTE_UNUSED)
834{
835}
836
837static char address_fmt[] ALIGN1 = "%0n"OFF_FMT"xc";
838/* Corresponds to 'x' above */
839#define address_base_char address_fmt[sizeof(address_fmt)-3]
840/* Corresponds to 'n' above */
841#define address_pad_len_char address_fmt[2]
842
843static void
844format_address_std(off_t address, char c)
845{
846	/* Corresponds to 'c' */
847	address_fmt[sizeof(address_fmt)-2] = c;
848	printf(address_fmt, address);
849}
850
851#if ENABLE_GETOPT_LONG
852/* only used with --traditional */
853static void
854format_address_paren(off_t address, char c)
855{
856	putchar('(');
857	format_address_std(address, ')');
858	if (c) putchar(c);
859}
860
861static void
862format_address_label(off_t address, char c)
863{
864	format_address_std(address, ' ');
865	format_address_paren(address + pseudo_offset, c);
866}
867#endif
868
869static void
870dump_hexl_mode_trailer(size_t n_bytes, const char *block)
871{
872	fputs("  >", stdout);
873	while (n_bytes--) {
874		unsigned c = *(unsigned char *) block++;
875		c = (ISPRINT(c) ? c : '.');
876		putchar(c);
877	}
878	putchar('<');
879}
880
881/* Write N_BYTES bytes from CURR_BLOCK to standard output once for each
882   of the N_SPEC format specs.  CURRENT_OFFSET is the byte address of
883   CURR_BLOCK in the concatenation of input files, and it is printed
884   (optionally) only before the output line associated with the first
885   format spec.  When duplicate blocks are being abbreviated, the output
886   for a sequence of identical input blocks is the output for the first
887   block followed by an asterisk alone on a line.  It is valid to compare
888   the blocks PREV_BLOCK and CURR_BLOCK only when N_BYTES == BYTES_PER_BLOCK.
889   That condition may be false only for the last input block -- and then
890   only when it has not been padded to length BYTES_PER_BLOCK.  */
891
892static void
893write_block(off_t current_offset, size_t n_bytes,
894		const char *prev_block, const char *curr_block)
895{
896	static char first = 1;
897	static char prev_pair_equal = 0;
898	size_t i;
899
900	if (!verbose && !first
901	 && n_bytes == bytes_per_block
902	 && memcmp(prev_block, curr_block, bytes_per_block) == 0
903	) {
904		if (prev_pair_equal) {
905			/* The two preceding blocks were equal, and the current
906			   block is the same as the last one, so print nothing.  */
907		} else {
908			puts("*");
909			prev_pair_equal = 1;
910		}
911	} else {
912		first = 0;
913		prev_pair_equal = 0;
914		for (i = 0; i < n_specs; i++) {
915			if (i == 0)
916				format_address(current_offset, '\0');
917			else
918				printf("%*s", address_pad_len_char - '0', "");
919			(*spec[i].print_function) (n_bytes, curr_block, spec[i].fmt_string);
920			if (spec[i].hexl_mode_trailer) {
921				/* space-pad out to full line width, then dump the trailer */
922				int datum_width = width_bytes[spec[i].size];
923				int blank_fields = (bytes_per_block - n_bytes) / datum_width;
924				int field_width = spec[i].field_width + 1;
925				printf("%*s", blank_fields * field_width, "");
926				dump_hexl_mode_trailer(n_bytes, curr_block);
927			}
928			putchar('\n');
929		}
930	}
931}
932
933static void
934read_block(size_t n, char *block, size_t *n_bytes_in_buffer)
935{
936	assert(0 < n && n <= bytes_per_block);
937
938	*n_bytes_in_buffer = 0;
939
940	if (n == 0)
941		return;
942
943	while (in_stream != NULL) { /* EOF.  */
944		size_t n_needed;
945		size_t n_read;
946
947		n_needed = n - *n_bytes_in_buffer;
948		n_read = fread(block + *n_bytes_in_buffer, 1, n_needed, in_stream);
949		*n_bytes_in_buffer += n_read;
950		if (n_read == n_needed)
951			break;
952		/* error check is done in check_and_close */
953		check_and_close();
954		open_next_file();
955	}
956}
957
958/* Return the least common multiple of the sizes associated
959   with the format specs.  */
960
961static int
962get_lcm(void)
963{
964	size_t i;
965	int l_c_m = 1;
966
967	for (i = 0; i < n_specs; i++)
968		l_c_m = lcm(l_c_m, width_bytes[(int) spec[i].size]);
969	return l_c_m;
970}
971
972#if ENABLE_GETOPT_LONG
973/* If S is a valid traditional offset specification with an optional
974   leading '+' return nonzero and set *OFFSET to the offset it denotes.  */
975
976static int
977parse_old_offset(const char *s, off_t *offset)
978{
979	static const struct suffix_mult Bb[] = {
980		{ "B", 1024 },
981		{ "b", 512 },
982		{ }
983	};
984	char *p;
985	int radix;
986
987	/* Skip over any leading '+'. */
988	if (s[0] == '+') ++s;
989
990	/* Determine the radix we'll use to interpret S.  If there is a '.',
991	 * it's decimal, otherwise, if the string begins with '0X'or '0x',
992	 * it's hexadecimal, else octal.  */
993	p = strchr(s, '.');
994	radix = 8;
995	if (p) {
996		p[0] = '\0'; /* cheating */
997		radix = 10;
998	} else if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
999		radix = 16;
1000
1001	*offset = xstrtooff_sfx(s, radix, Bb);
1002	if (p) p[0] = '.';
1003
1004	return (*offset >= 0);
1005}
1006#endif
1007
1008/* Read a chunk of size BYTES_PER_BLOCK from the input files, write the
1009   formatted block to standard output, and repeat until the specified
1010   maximum number of bytes has been read or until all input has been
1011   processed.  If the last block read is smaller than BYTES_PER_BLOCK
1012   and its size is not a multiple of the size associated with a format
1013   spec, extend the input block with zero bytes until its length is a
1014   multiple of all format spec sizes.  Write the final block.  Finally,
1015   write on a line by itself the offset of the byte after the last byte
1016   read.  Accumulate return values from calls to read_block and
1017   check_and_close, and if any was nonzero, return nonzero.
1018   Otherwise, return zero.  */
1019
1020static void
1021dump(void)
1022{
1023	char *block[2];
1024	off_t current_offset;
1025	int idx;
1026	size_t n_bytes_read;
1027
1028	block[0] = xmalloc(2*bytes_per_block);
1029	block[1] = block[0] + bytes_per_block;
1030
1031	current_offset = n_bytes_to_skip;
1032
1033	idx = 0;
1034	if (limit_bytes_to_format) {
1035		while (1) {
1036			size_t n_needed;
1037			if (current_offset >= end_offset) {
1038				n_bytes_read = 0;
1039				break;
1040			}
1041			n_needed = MIN(end_offset - current_offset,
1042				(off_t) bytes_per_block);
1043			read_block(n_needed, block[idx], &n_bytes_read);
1044			if (n_bytes_read < bytes_per_block)
1045				break;
1046			assert(n_bytes_read == bytes_per_block);
1047			write_block(current_offset, n_bytes_read,
1048			       block[!idx], block[idx]);
1049			current_offset += n_bytes_read;
1050			idx = !idx;
1051		}
1052	} else {
1053		while (1) {
1054			read_block(bytes_per_block, block[idx], &n_bytes_read);
1055			if (n_bytes_read < bytes_per_block)
1056				break;
1057			assert(n_bytes_read == bytes_per_block);
1058			write_block(current_offset, n_bytes_read,
1059			       block[!idx], block[idx]);
1060			current_offset += n_bytes_read;
1061			idx = !idx;
1062		}
1063	}
1064
1065	if (n_bytes_read > 0) {
1066		int l_c_m;
1067		size_t bytes_to_write;
1068
1069		l_c_m = get_lcm();
1070
1071		/* Make bytes_to_write the smallest multiple of l_c_m that
1072			 is at least as large as n_bytes_read.  */
1073		bytes_to_write = l_c_m * ((n_bytes_read + l_c_m - 1) / l_c_m);
1074
1075		memset(block[idx] + n_bytes_read, 0, bytes_to_write - n_bytes_read);
1076		write_block(current_offset, bytes_to_write,
1077				   block[!idx], block[idx]);
1078		current_offset += n_bytes_read;
1079	}
1080
1081	format_address(current_offset, '\n');
1082
1083	if (limit_bytes_to_format && current_offset >= end_offset)
1084		check_and_close();
1085
1086	free(block[0]);
1087}
1088
1089/* Read a single byte into *C from the concatenation of the input files
1090   named in the global array FILE_LIST.  On the first call to this
1091   function, the global variable IN_STREAM is expected to be an open
1092   stream associated with the input file INPUT_FILENAME.  If IN_STREAM
1093   is at end-of-file, close it and update the global variables IN_STREAM
1094   and INPUT_FILENAME so they correspond to the next file in the list.
1095   Then try to read a byte from the newly opened file.  Repeat if
1096   necessary until EOF is reached for the last file in FILE_LIST, then
1097   set *C to EOF and return.  Subsequent calls do likewise.  The return
1098   value is nonzero if any errors occured, zero otherwise.  */
1099
1100static void
1101read_char(int *c)
1102{
1103	while (in_stream) { /* !EOF */
1104		*c = fgetc(in_stream);
1105		if (*c != EOF)
1106			return;
1107		check_and_close();
1108		open_next_file();
1109	}
1110	*c = EOF;
1111}
1112
1113/* Read N bytes into BLOCK from the concatenation of the input files
1114   named in the global array FILE_LIST.  On the first call to this
1115   function, the global variable IN_STREAM is expected to be an open
1116   stream associated with the input file INPUT_FILENAME.  If all N
1117   bytes cannot be read from IN_STREAM, close IN_STREAM and update
1118   the global variables IN_STREAM and INPUT_FILENAME.  Then try to
1119   read the remaining bytes from the newly opened file.  Repeat if
1120   necessary until EOF is reached for the last file in FILE_LIST.
1121   On subsequent calls, don't modify BLOCK and return zero.  Set
1122   *N_BYTES_IN_BUFFER to the number of bytes read.  If an error occurs,
1123   it will be detected through ferror when the stream is about to be
1124   closed.  If there is an error, give a message but continue reading
1125   as usual and return nonzero.  Otherwise return zero.  */
1126
1127/* STRINGS mode.  Find each "string constant" in the input.
1128   A string constant is a run of at least 'string_min' ASCII
1129   graphic (or formatting) characters terminated by a null.
1130   Based on a function written by Richard Stallman for a
1131   traditional version of od.  Return nonzero if an error
1132   occurs.  Otherwise, return zero.  */
1133
1134static void
1135dump_strings(void)
1136{
1137	size_t bufsize = MAX(100, string_min);
1138	char *buf = xmalloc(bufsize);
1139	off_t address = n_bytes_to_skip;
1140
1141	while (1) {
1142		size_t i;
1143		int c;
1144
1145		/* See if the next 'string_min' chars are all printing chars.  */
1146 tryline:
1147		if (limit_bytes_to_format && (end_offset - string_min <= address))
1148			break;
1149		i = 0;
1150		while (!limit_bytes_to_format || address < end_offset) {
1151			if (i == bufsize) {
1152				bufsize += bufsize/8;
1153				buf = xrealloc(buf, bufsize);
1154			}
1155			read_char(&c);
1156			if (c < 0) { /* EOF */
1157				free(buf);
1158				return;
1159			}
1160			address++;
1161			if (!c)
1162				break;
1163			if (!ISPRINT(c))
1164				goto tryline;	/* It isn't; give up on this string.  */
1165			buf[i++] = c;		/* String continues; store it all.  */
1166		}
1167
1168		if (i < string_min)		/* Too short! */
1169			goto tryline;
1170
1171		/* If we get here, the string is all printable and null-terminated,
1172		 * so print it.  It is all in 'buf' and 'i' is its length.  */
1173		buf[i] = 0;
1174		format_address(address - i - 1, ' ');
1175
1176		for (i = 0; (c = buf[i]); i++) {
1177			switch (c) {
1178			case '\007': fputs("\\a", stdout); break;
1179			case '\b': fputs("\\b", stdout); break;
1180			case '\f': fputs("\\f", stdout); break;
1181			case '\n': fputs("\\n", stdout); break;
1182			case '\r': fputs("\\r", stdout); break;
1183			case '\t': fputs("\\t", stdout); break;
1184			case '\v': fputs("\\v", stdout); break;
1185			default: putc(c, stdout);
1186			}
1187		}
1188		putchar('\n');
1189	}
1190
1191	/* We reach this point only if we search through
1192	   (max_bytes_to_format - string_min) bytes before reaching EOF.  */
1193	free(buf);
1194
1195	check_and_close();
1196}
1197
1198int od_main(int argc, char **argv);
1199int od_main(int argc, char **argv)
1200{
1201	static const struct suffix_mult bkm[] = {
1202		{ "b", 512 },
1203		{ "k", 1024 },
1204		{ "m", 1024*1024 },
1205		{ }
1206	};
1207	unsigned opt;
1208	int l_c_m;
1209	/* The old-style 'pseudo starting address' to be printed in parentheses
1210	   after any true address.  */
1211	off_t pseudo_start = 0; // only for gcc
1212	enum {
1213		OPT_A = 1 << 0,
1214		OPT_N = 1 << 1,
1215		OPT_a = 1 << 2,
1216		OPT_b = 1 << 3,
1217		OPT_c = 1 << 4,
1218		OPT_d = 1 << 5,
1219		OPT_f = 1 << 6,
1220		OPT_h = 1 << 7,
1221		OPT_i = 1 << 8,
1222		OPT_j = 1 << 9,
1223		OPT_l = 1 << 10,
1224		OPT_o = 1 << 11,
1225		OPT_t = 1 << 12,
1226		OPT_v = 1 << 13,
1227		OPT_x = 1 << 14,
1228		OPT_s = 1 << 15,
1229		OPT_S = 1 << 16,
1230		OPT_w = 1 << 17,
1231		OPT_traditional = (1 << 18) * ENABLE_GETOPT_LONG,
1232	};
1233#if ENABLE_GETOPT_LONG
1234	static const char od_longopts[] ALIGN1 =
1235		"skip-bytes\0"        Required_argument "j"
1236		"address-radix\0"     Required_argument "A"
1237		"read-bytes\0"        Required_argument "N"
1238		"format\0"            Required_argument "t"
1239		"output-duplicates\0" No_argument       "v"
1240		"strings\0"           Optional_argument "S"
1241		"width\0"             Optional_argument "w"
1242		"traditional\0"       No_argument       "\xff"
1243		;
1244#endif
1245	char *str_A, *str_N, *str_j, *str_S;
1246	char *str_w = NULL;
1247	llist_t *lst_t = NULL;
1248
1249	spec = NULL;
1250	format_address = format_address_std;
1251	address_base_char = 'o';
1252	address_pad_len_char = '7';
1253	/* flag_dump_strings = 0; - already is */
1254
1255	/* Parse command line */
1256	opt_complementary = "t::"; // list
1257#if ENABLE_GETOPT_LONG
1258	applet_long_options = od_longopts;
1259#endif
1260	opt = getopt32(argv, "A:N:abcdfhij:lot:vxsS:"
1261		"w::", // -w with optional param
1262		// -S was -s and also had optional parameter
1263		// but in coreutils 6.3 it was renamed and now has
1264		// _mandatory_ parameter
1265		&str_A, &str_N, &str_j, &lst_t, &str_S, &str_w);
1266	argc -= optind;
1267	argv += optind;
1268	if (opt & OPT_A) {
1269		static const char doxn[] ALIGN1 = "doxn";
1270		static const char doxn_address_base_char[] ALIGN1 = {
1271			'u', 'o', 'x', /* '?' fourth one is not important */
1272		};
1273		static const uint8_t doxn_address_pad_len_char[] ALIGN1 = {
1274			'7', '7', '6', /* '?' */
1275		};
1276		char *p;
1277		int pos;
1278		p = strchr(doxn, str_A[0]);
1279		if (!p)
1280			bb_error_msg_and_die("bad output address radix "
1281				"'%c' (must be [doxn])", str_A[0]);
1282		pos = p - doxn;
1283		if (pos == 3) format_address = format_address_none;
1284		address_base_char = doxn_address_base_char[pos];
1285		address_pad_len_char = doxn_address_pad_len_char[pos];
1286	}
1287	if (opt & OPT_N) {
1288		limit_bytes_to_format = 1;
1289		max_bytes_to_format = xstrtooff_sfx(str_N, 0, bkm);
1290	}
1291	if (opt & OPT_a) decode_format_string("a");
1292	if (opt & OPT_b) decode_format_string("oC");
1293	if (opt & OPT_c) decode_format_string("c");
1294	if (opt & OPT_d) decode_format_string("u2");
1295	if (opt & OPT_f) decode_format_string("fF");
1296	if (opt & OPT_h) decode_format_string("x2");
1297	if (opt & OPT_i) decode_format_string("d2");
1298	if (opt & OPT_j) n_bytes_to_skip = xstrtooff_sfx(str_j, 0, bkm);
1299	if (opt & OPT_l) decode_format_string("d4");
1300	if (opt & OPT_o) decode_format_string("o2");
1301	//if (opt & OPT_t)...
1302	while (lst_t) {
1303		decode_format_string(lst_t->data);
1304		lst_t = lst_t->link;
1305	}
1306	if (opt & OPT_v) verbose = 1;
1307	if (opt & OPT_x) decode_format_string("x2");
1308	if (opt & OPT_s) decode_format_string("d2");
1309	if (opt & OPT_S) {
1310		string_min = 3;
1311		string_min = xstrtou_sfx(str_S, 0, bkm);
1312		flag_dump_strings = 1;
1313	}
1314	//if (opt & OPT_w)...
1315	//if (opt & OPT_traditional)...
1316
1317	if (flag_dump_strings && n_specs > 0)
1318		bb_error_msg_and_die("no type may be specified when dumping strings");
1319
1320
1321#if ENABLE_GETOPT_LONG
1322	if (opt & OPT_traditional) {
1323		off_t o1, o2;
1324
1325		if (argc == 1) {
1326			if (parse_old_offset(argv[0], &o1)) {
1327				n_bytes_to_skip = o1;
1328				--argc;
1329				++argv;
1330			}
1331		} else if (argc == 2) {
1332			if (parse_old_offset(argv[0], &o1)
1333			 && parse_old_offset(argv[1], &o2)
1334			) {
1335				n_bytes_to_skip = o1;
1336				flag_pseudo_start = 1;
1337				pseudo_start = o2;
1338				argv += 2;
1339				argc -= 2;
1340			} else if (parse_old_offset(argv[1], &o2)) {
1341				n_bytes_to_skip = o2;
1342				--argc;
1343				argv[1] = argv[0];
1344				++argv;
1345			} else {
1346				bb_error_msg_and_die("invalid second operand "
1347					"in compatibility mode '%s'", argv[1]);
1348			}
1349		} else if (argc == 3) {
1350			if (parse_old_offset(argv[1], &o1)
1351			 && parse_old_offset(argv[2], &o2)
1352			) {
1353				n_bytes_to_skip = o1;
1354				flag_pseudo_start = 1;
1355				pseudo_start = o2;
1356				argv[2] = argv[0];
1357				argv += 2;
1358				argc -= 2;
1359			} else {
1360				bb_error_msg_and_die("in compatibility mode "
1361					"the last two arguments must be offsets");
1362			}
1363		} else if (argc > 3)	{
1364			bb_error_msg_and_die("compatibility mode supports "
1365				"at most three arguments");
1366		}
1367
1368		if (flag_pseudo_start) {
1369			if (format_address == format_address_none) {
1370				address_base_char = 'o';
1371				address_pad_len_char = '7';
1372				format_address = format_address_paren;
1373			} else
1374				format_address = format_address_label;
1375		}
1376	}
1377#endif
1378
1379	if (limit_bytes_to_format) {
1380		end_offset = n_bytes_to_skip + max_bytes_to_format;
1381		if (end_offset < n_bytes_to_skip)
1382			bb_error_msg_and_die("skip-bytes + read-bytes is too large");
1383	}
1384
1385	if (n_specs == 0) {
1386		decode_format_string("o2");
1387		n_specs = 1;
1388	}
1389
1390	/* If no files were listed on the command line,
1391	   set the global pointer FILE_LIST so that it
1392	   references the null-terminated list of one name: "-".  */
1393	file_list = default_file_list;
1394	if (argc > 0) {
1395		/* Set the global pointer FILE_LIST so that it
1396		   references the first file-argument on the command-line.  */
1397		file_list = (char const *const *) argv;
1398	}
1399
1400	/* open the first input file */
1401	open_next_file();
1402	/* skip over any unwanted header bytes */
1403	skip(n_bytes_to_skip);
1404	if (!in_stream)
1405		return 1;
1406
1407	pseudo_offset = (flag_pseudo_start ? pseudo_start - n_bytes_to_skip : 0);
1408
1409	/* Compute output block length.  */
1410	l_c_m = get_lcm();
1411
1412	if (opt & OPT_w) { /* -w: width */
1413		bytes_per_block = 32;
1414		if (str_w)
1415			bytes_per_block = xatou(str_w);
1416		if (!bytes_per_block || bytes_per_block % l_c_m != 0) {
1417			bb_error_msg("warning: invalid width %zu; using %d instead",
1418					bytes_per_block, l_c_m);
1419			bytes_per_block = l_c_m;
1420		}
1421	} else {
1422		bytes_per_block = l_c_m;
1423		if (l_c_m < DEFAULT_BYTES_PER_BLOCK)
1424			bytes_per_block *= DEFAULT_BYTES_PER_BLOCK / l_c_m;
1425	}
1426
1427#ifdef DEBUG
1428	for (i = 0; i < n_specs; i++) {
1429		printf("%d: fmt=\"%s\" width=%d\n",
1430			i, spec[i].fmt_string, width_bytes[spec[i].size]);
1431	}
1432#endif
1433
1434	if (flag_dump_strings)
1435		dump_strings();
1436	else
1437		dump();
1438
1439	if (fclose(stdin) == EOF)
1440		bb_perror_msg_and_die(bb_msg_standard_input);
1441
1442	return ioerror;
1443}
1444