1/***********************************************************************
2*                                                                      *
3*               This software is part of the ast package               *
4*          Copyright (c) 1992-2012 AT&T Intellectual Property          *
5*                      and is licensed under the                       *
6*                 Eclipse Public License, Version 1.0                  *
7*                    by AT&T Intellectual Property                     *
8*                                                                      *
9*                A copy of the License is available at                 *
10*          http://www.eclipse.org/org/documents/epl-v10.html           *
11*         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12*                                                                      *
13*              Information and Software Systems Research               *
14*                            AT&T Research                             *
15*                           Florham Park NJ                            *
16*                                                                      *
17*                 Glenn Fowler <gsf@research.att.com>                  *
18*                  David Korn <dgk@research.att.com>                   *
19*                                                                      *
20***********************************************************************/
21#pragma prototyped
22/*
23 * David Korn
24 * Glenn Fowler
25 * AT&T Bell Laboratories
26 *
27 * cat
28 */
29
30#include <cmd.h>
31#include <fcntl.h>
32
33static const char usage[] =
34"[-?\n@(#)$Id: cat (AT&T Research) 2012-05-31 $\n]"
35USAGE_LICENSE
36"[+NAME?cat - concatenate files]"
37"[+DESCRIPTION?\bcat\b copies each \afile\a in sequence to the standard"
38"	output. If no \afile\a is given, or if the \afile\a is \b-\b,"
39"	\bcat\b copies from standard input starting at the current location.]"
40
41"[b:number-nonblank?Number lines as with \b-n\b but omit line numbers from"
42"	blank lines.]"
43"[d:dos-input?Input files are opened in \atext\amode which removes carriage"
44"	returns in front of new-lines on some systems.]"
45"[e?Equivalent to \b-vE\b.]"
46"[n:number?Causes a line number to be inserted at the beginning of each line.]"
47"[s?Equivalent to \b-S\b for \aatt\a universe and \b-B\b otherwise.]"
48"[t?Equivalent to \b-vT\b.]"
49"[u:unbuffer?The output is not delayed by buffering.]"
50"[v:show-nonprinting|print-chars?Print characters as follows: space and "
51    "printable characters as themselves; control characters as \b^\b "
52    "followed by a letter of the alphabet; and characters with the high bit "
53    "set as the lower 7 bit character prefixed by \bM^\b for 7 bit "
54    "non-printable characters and \bM-\b for all other characters. If the 7 "
55    "bit character encoding is not ASCII then the characters are converted "
56    "to ASCII to determine \ahigh bit set\a, and if set it is cleared and "
57    "converted back to the native encoding. Multibyte characters in the "
58    "current locale are treated as printable characters.]"
59"[A:show-all?Equivalent to \b-vET\b.]"
60"[B:squeeze-blank?Multiple adjacent new-line characters are replace by one"
61"	new-line.]"
62"[D:dos-output?Output files are opened in \atext\amode which inserts carriage"
63"	returns in front of new-lines on some systems.]"
64"[E:show-ends?Causes a \b$\b to be inserted before each new-line.]"
65"[R:regress?Regression test defaults: \b-v\b buffer size 4.]"
66"[S:silent?\bcat\b is silent about non-existent files.]"
67"[T:show-blank?Causes tabs to be copied as \b^I\b and formfeeds as \b^L\b.]"
68
69"\n"
70"\n[file ...]\n"
71"\n"
72
73"[+SEE ALSO?\bcp\b(1), \bgetconf\b(1), \bpr\b(1)]"
74;
75
76#define RUBOUT	0177
77
78/* control flags */
79#define B_FLAG		(1<<0)
80#define E_FLAG		(1<<1)
81#define F_FLAG		(1<<2)
82#define N_FLAG		(1<<3)
83#define S_FLAG		(1<<4)
84#define T_FLAG		(1<<5)
85#define U_FLAG		(1<<6)
86#define V_FLAG		(1<<7)
87#define D_FLAG		(1<<8)
88#define d_FLAG		(1<<9)
89
90/* character types */
91#define T_ERROR		1
92#define T_EOF		2
93#define T_ENDBUF	3
94#define T_NEWLINE	4
95#define T_CONTROL	5
96#define T_EIGHTBIT	6
97#define T_CNTL8BIT	7
98
99#define printof(c)	((c)^0100)
100
101typedef void* (*Reserve_f)(Sfio_t*, ssize_t, int);
102
103#ifndef sfvalue
104#define sfvalue(f)	((f)->_val)
105#endif
106
107static void*
108regress(Sfio_t* sp, ssize_t n, int f)
109{
110	void*	r;
111
112	if (!(r = sfreserve(sp, 4, f)))
113		r = sfreserve(sp, n, f);
114	else if (sfvalue(sp) > 4)
115		sfvalue(sp) = 4;
116	return r;
117}
118
119/*
120 * called for any special output processing
121 */
122
123static int
124vcat(register char* states, Sfio_t* ip, Sfio_t* op, Reserve_f reserve, int flags)
125{
126	register unsigned char*	cp;
127	register unsigned char*	pp;
128	unsigned char*		cur;
129	unsigned char*		end;
130	unsigned char*		buf;
131	unsigned char*		nxt;
132	register int		n;
133	register int		line;
134	register int		raw;
135	int			last;
136	int			c;
137	int			m;
138	int			any;
139	int			header;
140
141	unsigned char		meta[3];
142	unsigned char		tmp[32];
143
144	meta[0] = 'M';
145	last = -1;
146	*(cp = buf = end = tmp) = 0;
147	any = 0;
148	header = flags & (B_FLAG|N_FLAG);
149	line = 1;
150	states[0] = T_ENDBUF;
151	raw = !mbwide();
152	for (;;)
153	{
154		cur = cp;
155		if (raw)
156			while (!(n = states[*cp++]));
157		else
158			for (;;)
159			{
160				while (!(n = states[*cp++]));
161				if (n < T_CONTROL)
162					break;
163				if ((m = mbsize(pp = cp - 1)) > 1)
164					cp += m - 1;
165				else
166				{
167					if (m <= 0)
168					{
169						if (cur == pp)
170						{
171							if (last > 0)
172							{
173								*end = last;
174								last = -1;
175								c = end - pp + 1;
176								if ((m = mbsize(pp)) == c)
177								{
178									any = 1;
179									if (header)
180									{
181										header = 0;
182										sfprintf(op, "%6d\t", line);
183									}
184									sfwrite(op, cur, m);
185									*(cp = cur = end) = 0;
186								}
187								else
188								{
189									memcpy(tmp, pp, c);
190									if (!(nxt = (unsigned char*)(*reserve)(ip, SF_UNBOUND, 0)))
191									{
192										states[0] = sfvalue(ip) ? T_ERROR : T_EOF;
193										*(cp = end = tmp + sizeof(tmp) - 1) = 0;
194										last = -1;
195									}
196									else if ((n = sfvalue(ip)) <= 0)
197									{
198										states[0] = n ? T_ERROR : T_EOF;
199										*(cp = end = tmp + sizeof(tmp) - 1) = 0;
200										last = -1;
201									}
202									else
203									{
204										cp = buf = nxt;
205										end = buf + n - 1;
206										last = *end;
207										*end = 0;
208									}
209 mb:
210									if ((n = end - cp + 1) >= (sizeof(tmp) - c))
211										n = sizeof(tmp) - c - 1;
212									memcpy(tmp + c, cp, n);
213									if ((m = mbsize(tmp)) >= c)
214									{
215										any = 1;
216										if (header)
217										{
218											header = 0;
219											sfprintf(op, "%6d\t", line);
220										}
221										sfwrite(op, tmp, m);
222										cur = cp += m - c;
223									}
224								}
225								continue;
226							}
227						}
228						else
229						{
230							cp = pp + 1;
231							n = 0;
232						}
233					}
234					break;
235				}
236			}
237		c = *--cp;
238		if ((m = cp - cur) || n >= T_CONTROL)
239		{
240 flush:
241			any = 1;
242			if (header)
243			{
244				header = 0;
245				sfprintf(op, "%6d\t", line);
246			}
247			if (m)
248				sfwrite(op, cur, m);
249		}
250 special:
251		switch (n)
252		{
253		case T_ERROR:
254			if (cp < end)
255			{
256				n = T_CONTROL;
257				goto flush;
258			}
259			return -1;
260		case T_EOF:
261			if (cp < end)
262			{
263				n = T_CONTROL;
264				goto flush;
265			}
266			return 0;
267		case T_ENDBUF:
268			if (cp < end)
269			{
270				n = T_CONTROL;
271				goto flush;
272			}
273			c = last;
274			if (!(nxt = (unsigned char*)(*reserve)(ip, SF_UNBOUND, 0)))
275			{
276				*(cp = end = tmp + sizeof(tmp) - 1) = 0;
277				states[0] = (m = sfvalue(ip)) ? T_ERROR : T_EOF;
278				last = -1;
279			}
280			else if ((m = sfvalue(ip)) <= 0)
281			{
282				*(cp = end = tmp + sizeof(tmp) - 1) = 0;
283				states[0] = m ? T_ERROR : T_EOF;
284				last = -1;
285			}
286			else
287			{
288				buf = nxt;
289				end = buf + m - 1;
290				last = *end;
291				*end = 0;
292				cp = buf;
293			}
294			if (c >= 0)
295			{
296				if (!(n = states[c]))
297				{
298					*(cur = tmp) = c;
299					m = 1;
300					goto flush;
301				}
302				if (raw || n < T_CONTROL)
303				{
304					cp--;
305					goto special;
306				}
307				tmp[0] = c;
308				c = 1;
309				goto mb;
310			}
311			break;
312		case T_CONTROL:
313			do
314			{
315				sfputc(op, '^');
316				sfputc(op, printof(c));
317			} while (states[c = *++cp] == T_CONTROL);
318			break;
319		case T_CNTL8BIT:
320			meta[1] = '^';
321			do
322			{
323				n = c & ~0200;
324				meta[2] = printof(n);
325				sfwrite(op, (char*)meta, 3);
326			} while (states[c = *++cp] == T_CNTL8BIT && raw);
327			break;
328		case T_EIGHTBIT:
329			meta[1] = '-';
330			do
331			{
332				meta[2] = c & ~0200;
333				sfwrite(op, (char*)meta, 3);
334			} while (states[c = *++cp] == T_EIGHTBIT && raw);
335			break;
336		case T_NEWLINE:
337			if (header && !(flags & B_FLAG))
338				sfprintf(op, "%6d\t", line);
339			if (flags & E_FLAG)
340				sfputc(op, '$');
341			sfputc(op, '\n');
342			if (!header || !(flags & B_FLAG))
343				line++;
344			header = !(flags & S_FLAG);
345			for (;;)
346			{
347				if ((n = states[*++cp]) == T_ENDBUF)
348				{
349					if (cp < end || last != '\n')
350						break;
351					if (!(nxt = (unsigned char*)(*reserve)(ip, SF_UNBOUND, 0)))
352					{
353						states[0] = sfvalue(ip) ? T_ERROR : T_EOF;
354						cp = end = tmp;
355						*cp-- = 0;
356						last = -1;
357					}
358					else if ((n = sfvalue(ip)) <= 0)
359					{
360						states[0] = n ? T_ERROR : T_EOF;
361						cp = end = tmp;
362						*cp-- = 0;
363						last = -1;
364					}
365					else
366					{
367						buf = nxt;
368						end = buf + n - 1;
369						last = *end;
370						*end = 0;
371						cp = buf - 1;
372					}
373				}
374				else if (n != T_NEWLINE)
375					break;
376				if (!(flags & S_FLAG) || any || header)
377				{
378					any = 0;
379					header = 0;
380					if ((flags & (B_FLAG|N_FLAG)) == N_FLAG)
381						sfprintf(op, "%6d\t", line);
382					if (flags & E_FLAG)
383						sfputc(op, '$');
384					sfputc(op, '\n');
385				}
386				if (!(flags & B_FLAG))
387					line++;
388			}
389			header = flags & (B_FLAG|N_FLAG);
390			break;
391		}
392	}
393}
394
395int
396b_cat(int argc, char** argv, Shbltin_t* context)
397{
398	register int		n;
399	register int		flags = 0;
400	register char*		cp;
401	register Sfio_t*	fp;
402	char*			mode;
403	Reserve_f		reserve = sfreserve;
404	int			att;
405	int			dovcat = 0;
406	char			states[UCHAR_MAX+1];
407
408	cmdinit(argc, argv, context, ERROR_CATALOG, 0);
409	att = !strcmp(astconf("UNIVERSE", NiL, NiL), "att");
410	mode = "r";
411	for (;;)
412	{
413		n = 0;
414		switch (optget(argv, usage))
415		{
416		case 'A':
417			n = T_FLAG|E_FLAG|V_FLAG;
418			break;
419		case 'B':
420			n = S_FLAG;
421			break;
422		case 'b':
423			n = B_FLAG;
424			break;
425		case 'd':
426			mode = opt_info.num ? "rt" : "r";
427			continue;
428		case 'D':
429			n = d_FLAG;
430			break;
431		case 'E':
432			n = E_FLAG;
433			break;
434		case 'e':
435			n = E_FLAG|V_FLAG;
436			break;
437		case 'n':
438			n = N_FLAG;
439			break;
440		case 'R':
441			reserve = opt_info.num ? regress : sfreserve;
442			continue;
443		case 's':
444			n = att ? F_FLAG : S_FLAG;
445			break;
446		case 'S':
447			n = F_FLAG;
448			break;
449		case 'T':
450			n = T_FLAG;
451			break;
452		case 't':
453			n = T_FLAG|V_FLAG;
454			break;
455		case 'u':
456			n = U_FLAG;
457			break;
458		case 'v':
459			n = V_FLAG;
460			break;
461		case ':':
462			error(2, "%s", opt_info.arg);
463			break;
464		case '?':
465			error(ERROR_usage(2), "%s", opt_info.arg);
466			break;
467		}
468		if (!n)
469			break;
470		if (opt_info.num)
471			flags |= n;
472		else
473			flags &= ~n;
474	}
475	argv += opt_info.index;
476	if (error_info.errors)
477		error(ERROR_usage(2), "%s", optusage(NiL));
478	memset(states, 0, sizeof(states));
479	if (flags&V_FLAG)
480	{
481		memset(states, T_CONTROL, ' ');
482		states[RUBOUT] = T_CONTROL;
483		memset(states+0200, T_EIGHTBIT, 0200);
484		memset(states+0200, T_CNTL8BIT, ' ');
485		states[RUBOUT|0200] = T_CNTL8BIT;
486		states['\n'] = 0;
487	}
488	if (flags&T_FLAG)
489		states['\t'] = T_CONTROL;
490	states[0] = T_ENDBUF;
491	if (att)
492	{
493		if (flags&V_FLAG)
494		{
495			states['\n'|0200] = T_EIGHTBIT;
496			if (!(flags&T_FLAG))
497			{
498				states['\t'] = states['\f'] = 0;
499				states['\t'|0200] = states['\f'|0200] = T_EIGHTBIT;
500			}
501		}
502	}
503	else if (flags)
504	{
505		if (!(flags&T_FLAG))
506			states['\t'] = 0;
507	}
508	if (flags&(V_FLAG|T_FLAG|N_FLAG|E_FLAG|B_FLAG|S_FLAG))
509	{
510		states['\n'] = T_NEWLINE;
511		dovcat = 1;
512	}
513	if (flags&d_FLAG)
514		sfopen(sfstdout, NiL, "wt");
515	if (cp = *argv)
516		argv++;
517	do
518	{
519		if (!cp || streq(cp, "-"))
520		{
521			fp = sfstdin;
522			if (flags&D_FLAG)
523				sfopen(fp, NiL, mode);
524		}
525		else if (!(fp = sfopen(NiL, cp, mode)))
526		{
527			if (!(flags&F_FLAG))
528				error(ERROR_system(0), "%s: cannot open", cp);
529			error_info.errors = 1;
530			continue;
531		}
532		if (flags&U_FLAG)
533			sfsetbuf(fp, (void*)fp, -1);
534		if (dovcat)
535			n = vcat(states, fp, sfstdout, reserve, flags);
536		else if (sfmove(fp, sfstdout, SF_UNBOUND, -1) >= 0 && sfeof(fp))
537			n = 0;
538		else
539			n = -1;
540		if (fp != sfstdin)
541			sfclose(fp);
542		if (n < 0 && !ERROR_PIPE(errno) && errno != EINTR)
543		{
544			if (cp)
545				error(ERROR_system(0), "%s: read error", cp);
546			else
547				error(ERROR_system(0), "read error");
548		}
549		if (sferror(sfstdout))
550			break;
551	} while (cp = *argv++);
552	if (sfsync(sfstdout))
553		error(ERROR_system(0), "write error");
554	if (flags&d_FLAG)
555		sfopen(sfstdout, NiL, "w");
556	return error_info.errors;
557}
558