ch.c revision 172468
1/*
2 * Copyright (C) 1984-2007  Mark Nudelman
3 *
4 * You may distribute under the terms of either the GNU General Public
5 * License or the Less License, as specified in the README file.
6 *
7 * For more information about less, or for information on how to
8 * contact the author, see the README file.
9 */
10
11
12/*
13 * Low level character input from the input file.
14 * We use these special purpose routines which optimize moving
15 * both forward and backward from the current read pointer.
16 */
17
18#include "less.h"
19#if MSDOS_COMPILER==WIN32C
20#include <errno.h>
21#include <windows.h>
22#endif
23
24typedef POSITION BLOCKNUM;
25
26public int ignore_eoi;
27
28/*
29 * Pool of buffers holding the most recently used blocks of the input file.
30 * The buffer pool is kept as a doubly-linked circular list,
31 * in order from most- to least-recently used.
32 * The circular list is anchored by the file state "thisfile".
33 */
34#define	LBUFSIZE	8192
35struct buf {
36	struct buf *next, *prev;
37	struct buf *hnext, *hprev;
38	BLOCKNUM block;
39	unsigned int datasize;
40	unsigned char data[LBUFSIZE];
41};
42
43struct buflist {
44	/* -- Following members must match struct buf */
45	struct buf *buf_next, *buf_prev;
46	struct buf *buf_hnext, *buf_hprev;
47};
48
49/*
50 * The file state is maintained in a filestate structure.
51 * A pointer to the filestate is kept in the ifile structure.
52 */
53#define	BUFHASH_SIZE	64
54struct filestate {
55	struct buf *buf_next, *buf_prev;
56	struct buflist hashtbl[BUFHASH_SIZE];
57	int file;
58	int flags;
59	POSITION fpos;
60	int nbufs;
61	BLOCKNUM block;
62	unsigned int offset;
63	POSITION fsize;
64};
65
66#define	ch_bufhead	thisfile->buf_next
67#define	ch_buftail	thisfile->buf_prev
68#define	ch_nbufs	thisfile->nbufs
69#define	ch_block	thisfile->block
70#define	ch_offset	thisfile->offset
71#define	ch_fpos		thisfile->fpos
72#define	ch_fsize	thisfile->fsize
73#define	ch_flags	thisfile->flags
74#define	ch_file		thisfile->file
75
76#define	END_OF_CHAIN	((struct buf *)&thisfile->buf_next)
77#define	END_OF_HCHAIN(h) ((struct buf *)&thisfile->hashtbl[h])
78#define BUFHASH(blk)	((blk) & (BUFHASH_SIZE-1))
79
80#define	FOR_BUFS_IN_CHAIN(h,bp) \
81	for (bp = thisfile->hashtbl[h].buf_hnext;  \
82	     bp != END_OF_HCHAIN(h);  bp = bp->hnext)
83
84#define	HASH_RM(bp) \
85	(bp)->hnext->hprev = (bp)->hprev; \
86	(bp)->hprev->hnext = (bp)->hnext;
87
88#define	HASH_INS(bp,h) \
89	(bp)->hnext = thisfile->hashtbl[h].buf_hnext; \
90	(bp)->hprev = END_OF_HCHAIN(h); \
91	thisfile->hashtbl[h].buf_hnext->hprev = (bp); \
92	thisfile->hashtbl[h].buf_hnext = (bp);
93
94static struct filestate *thisfile;
95static int ch_ungotchar = -1;
96static int maxbufs = -1;
97
98extern int autobuf;
99extern int sigs;
100extern int secure;
101extern constant char helpdata[];
102extern constant int size_helpdata;
103extern IFILE curr_ifile;
104#if LOGFILE
105extern int logfile;
106extern char *namelogfile;
107#endif
108
109static int ch_addbuf();
110
111
112/*
113 * Get the character pointed to by the read pointer.
114 * ch_get() is a macro which is more efficient to call
115 * than fch_get (the function), in the usual case
116 * that the block desired is at the head of the chain.
117 */
118#define	ch_get()   ((ch_block == ch_bufhead->block && \
119		     ch_offset < ch_bufhead->datasize) ? \
120			ch_bufhead->data[ch_offset] : fch_get())
121	int
122fch_get()
123{
124	register struct buf *bp;
125	register int n;
126	register int slept;
127	register int h;
128	POSITION pos;
129	POSITION len;
130
131	if (thisfile == NULL)
132		return (EOI);
133
134	slept = FALSE;
135
136	/*
137	 * Look for a buffer holding the desired block.
138	 */
139	h = BUFHASH(ch_block);
140	FOR_BUFS_IN_CHAIN(h, bp)
141	{
142		if (bp->block == ch_block)
143		{
144			if (ch_offset >= bp->datasize)
145				/*
146				 * Need more data in this buffer.
147				 */
148				goto read_more;
149			goto found;
150		}
151	}
152	/*
153	 * Block is not in a buffer.
154	 * Take the least recently used buffer
155	 * and read the desired block into it.
156	 * If the LRU buffer has data in it,
157	 * then maybe allocate a new buffer.
158	 */
159	if (ch_buftail == END_OF_CHAIN || ch_buftail->block != -1)
160	{
161		/*
162		 * There is no empty buffer to use.
163		 * Allocate a new buffer if:
164		 * 1. We can't seek on this file and -b is not in effect; or
165		 * 2. We haven't allocated the max buffers for this file yet.
166		 */
167		if ((autobuf && !(ch_flags & CH_CANSEEK)) ||
168		    (maxbufs < 0 || ch_nbufs < maxbufs))
169			if (ch_addbuf())
170				/*
171				 * Allocation failed: turn off autobuf.
172				 */
173				autobuf = OPT_OFF;
174	}
175	bp = ch_buftail;
176	HASH_RM(bp); /* Remove from old hash chain. */
177	bp->block = ch_block;
178	bp->datasize = 0;
179	HASH_INS(bp, h); /* Insert into new hash chain. */
180
181    read_more:
182	pos = (ch_block * LBUFSIZE) + bp->datasize;
183	if ((len = ch_length()) != NULL_POSITION && pos >= len)
184		/*
185		 * At end of file.
186		 */
187		return (EOI);
188
189	if (pos != ch_fpos)
190	{
191		/*
192		 * Not at the correct position: must seek.
193		 * If input is a pipe, we're in trouble (can't seek on a pipe).
194		 * Some data has been lost: just return "?".
195		 */
196		if (!(ch_flags & CH_CANSEEK))
197			return ('?');
198		if (lseek(ch_file, (off_t)pos, 0) == BAD_LSEEK)
199		{
200 			error("seek error", NULL_PARG);
201			clear_eol();
202			return (EOI);
203 		}
204 		ch_fpos = pos;
205 	}
206
207	/*
208	 * Read the block.
209	 * If we read less than a full block, that's ok.
210	 * We use partial block and pick up the rest next time.
211	 */
212	if (ch_ungotchar != -1)
213	{
214		bp->data[bp->datasize] = ch_ungotchar;
215		n = 1;
216		ch_ungotchar = -1;
217	} else if (ch_flags & CH_HELPFILE)
218	{
219		bp->data[bp->datasize] = helpdata[ch_fpos];
220		n = 1;
221	} else
222	{
223		n = iread(ch_file, &bp->data[bp->datasize],
224			(unsigned int)(LBUFSIZE - bp->datasize));
225	}
226
227	if (n == READ_INTR)
228		return (EOI);
229	if (n < 0)
230	{
231#if MSDOS_COMPILER==WIN32C
232		if (errno != EPIPE)
233#endif
234		{
235			error("read error", NULL_PARG);
236			clear_eol();
237		}
238		n = 0;
239	}
240
241#if LOGFILE
242	/*
243	 * If we have a log file, write the new data to it.
244	 */
245	if (!secure && logfile >= 0 && n > 0)
246		write(logfile, (char *) &bp->data[bp->datasize], n);
247#endif
248
249	ch_fpos += n;
250	bp->datasize += n;
251
252	/*
253	 * If we have read to end of file, set ch_fsize to indicate
254	 * the position of the end of file.
255	 */
256	if (n == 0)
257	{
258		ch_fsize = pos;
259		if (ignore_eoi)
260		{
261			/*
262			 * We are ignoring EOF.
263			 * Wait a while, then try again.
264			 */
265			if (!slept)
266			{
267				PARG parg;
268				parg.p_string = wait_message();
269				ierror("%s", &parg);
270			}
271#if !MSDOS_COMPILER
272	 		sleep(1);
273#else
274#if MSDOS_COMPILER==WIN32C
275			Sleep(1000);
276#endif
277#endif
278			slept = TRUE;
279		}
280		if (sigs)
281			return (EOI);
282	}
283
284    found:
285	if (ch_bufhead != bp)
286	{
287		/*
288		 * Move the buffer to the head of the buffer chain.
289		 * This orders the buffer chain, most- to least-recently used.
290		 */
291		bp->next->prev = bp->prev;
292		bp->prev->next = bp->next;
293		bp->next = ch_bufhead;
294		bp->prev = END_OF_CHAIN;
295		ch_bufhead->prev = bp;
296		ch_bufhead = bp;
297
298		/*
299		 * Move to head of hash chain too.
300		 */
301		HASH_RM(bp);
302		HASH_INS(bp, h);
303	}
304
305	if (ch_offset >= bp->datasize)
306		/*
307		 * After all that, we still don't have enough data.
308		 * Go back and try again.
309		 */
310		goto read_more;
311
312	return (bp->data[ch_offset]);
313}
314
315/*
316 * ch_ungetchar is a rather kludgy and limited way to push
317 * a single char onto an input file descriptor.
318 */
319	public void
320ch_ungetchar(c)
321	int c;
322{
323	if (c != -1 && ch_ungotchar != -1)
324		error("ch_ungetchar overrun", NULL_PARG);
325	ch_ungotchar = c;
326}
327
328#if LOGFILE
329/*
330 * Close the logfile.
331 * If we haven't read all of standard input into it, do that now.
332 */
333	public void
334end_logfile()
335{
336	static int tried = FALSE;
337
338	if (logfile < 0)
339		return;
340	if (!tried && ch_fsize == NULL_POSITION)
341	{
342		tried = TRUE;
343		ierror("Finishing logfile", NULL_PARG);
344		while (ch_forw_get() != EOI)
345			if (ABORT_SIGS())
346				break;
347	}
348	close(logfile);
349	logfile = -1;
350	namelogfile = NULL;
351}
352
353/*
354 * Start a log file AFTER less has already been running.
355 * Invoked from the - command; see toggle_option().
356 * Write all the existing buffered data to the log file.
357 */
358	public void
359sync_logfile()
360{
361	register struct buf *bp;
362	int warned = FALSE;
363	BLOCKNUM block;
364	BLOCKNUM nblocks;
365
366	nblocks = (ch_fpos + LBUFSIZE - 1) / LBUFSIZE;
367	for (block = 0;  block < nblocks;  block++)
368	{
369		for (bp = ch_bufhead;  ;  bp = bp->next)
370		{
371			if (bp == END_OF_CHAIN)
372			{
373				if (!warned)
374				{
375					error("Warning: log file is incomplete",
376						NULL_PARG);
377					warned = TRUE;
378				}
379				break;
380			}
381			if (bp->block == block)
382			{
383				write(logfile, (char *) bp->data, bp->datasize);
384				break;
385			}
386		}
387	}
388}
389
390#endif
391
392/*
393 * Determine if a specific block is currently in one of the buffers.
394 */
395	static int
396buffered(block)
397	BLOCKNUM block;
398{
399	register struct buf *bp;
400	register int h;
401
402	h = BUFHASH(block);
403	FOR_BUFS_IN_CHAIN(h, bp)
404	{
405		if (bp->block == block)
406			return (TRUE);
407	}
408	return (FALSE);
409}
410
411/*
412 * Seek to a specified position in the file.
413 * Return 0 if successful, non-zero if can't seek there.
414 */
415	public int
416ch_seek(pos)
417	register POSITION pos;
418{
419	BLOCKNUM new_block;
420	POSITION len;
421
422	if (thisfile == NULL)
423		return (0);
424
425	len = ch_length();
426	if (pos < ch_zero() || (len != NULL_POSITION && pos > len))
427		return (1);
428
429	new_block = pos / LBUFSIZE;
430	if (!(ch_flags & CH_CANSEEK) && pos != ch_fpos && !buffered(new_block))
431	{
432		if (ch_fpos > pos)
433			return (1);
434		while (ch_fpos < pos)
435		{
436			if (ch_forw_get() == EOI)
437				return (1);
438			if (ABORT_SIGS())
439				return (1);
440		}
441		return (0);
442	}
443	/*
444	 * Set read pointer.
445	 */
446	ch_block = new_block;
447	ch_offset = pos % LBUFSIZE;
448	return (0);
449}
450
451/*
452 * Seek to the end of the file.
453 */
454	public int
455ch_end_seek()
456{
457	POSITION len;
458
459	if (thisfile == NULL)
460		return (0);
461
462	if (ch_flags & CH_CANSEEK)
463		ch_fsize = filesize(ch_file);
464
465	len = ch_length();
466	if (len != NULL_POSITION)
467		return (ch_seek(len));
468
469	/*
470	 * Do it the slow way: read till end of data.
471	 */
472	while (ch_forw_get() != EOI)
473		if (ABORT_SIGS())
474			return (1);
475	return (0);
476}
477
478/*
479 * Seek to the beginning of the file, or as close to it as we can get.
480 * We may not be able to seek there if input is a pipe and the
481 * beginning of the pipe is no longer buffered.
482 */
483	public int
484ch_beg_seek()
485{
486	register struct buf *bp, *firstbp;
487
488	/*
489	 * Try a plain ch_seek first.
490	 */
491	if (ch_seek(ch_zero()) == 0)
492		return (0);
493
494	/*
495	 * Can't get to position 0.
496	 * Look thru the buffers for the one closest to position 0.
497	 */
498	firstbp = bp = ch_bufhead;
499	if (bp == END_OF_CHAIN)
500		return (1);
501	while ((bp = bp->next) != END_OF_CHAIN)
502		if (bp->block < firstbp->block)
503			firstbp = bp;
504	ch_block = firstbp->block;
505	ch_offset = 0;
506	return (0);
507}
508
509/*
510 * Return the length of the file, if known.
511 */
512	public POSITION
513ch_length()
514{
515	if (thisfile == NULL)
516		return (NULL_POSITION);
517	if (ignore_eoi)
518		return (NULL_POSITION);
519	if (ch_flags & CH_HELPFILE)
520		return (size_helpdata);
521	return (ch_fsize);
522}
523
524/*
525 * Return the current position in the file.
526 */
527	public POSITION
528ch_tell()
529{
530	if (thisfile == NULL)
531		return (NULL_POSITION);
532	return (ch_block * LBUFSIZE) + ch_offset;
533}
534
535/*
536 * Get the current char and post-increment the read pointer.
537 */
538	public int
539ch_forw_get()
540{
541	register int c;
542
543	if (thisfile == NULL)
544		return (EOI);
545	c = ch_get();
546	if (c == EOI)
547		return (EOI);
548	if (ch_offset < LBUFSIZE-1)
549		ch_offset++;
550	else
551	{
552		ch_block ++;
553		ch_offset = 0;
554	}
555	return (c);
556}
557
558/*
559 * Pre-decrement the read pointer and get the new current char.
560 */
561	public int
562ch_back_get()
563{
564	if (thisfile == NULL)
565		return (EOI);
566	if (ch_offset > 0)
567		ch_offset --;
568	else
569	{
570		if (ch_block <= 0)
571			return (EOI);
572		if (!(ch_flags & CH_CANSEEK) && !buffered(ch_block-1))
573			return (EOI);
574		ch_block--;
575		ch_offset = LBUFSIZE-1;
576	}
577	return (ch_get());
578}
579
580/*
581 * Set max amount of buffer space.
582 * bufspace is in units of 1024 bytes.  -1 mean no limit.
583 */
584	public void
585ch_setbufspace(bufspace)
586	int bufspace;
587{
588	if (bufspace < 0)
589		maxbufs = -1;
590	else
591	{
592		maxbufs = ((bufspace * 1024) + LBUFSIZE-1) / LBUFSIZE;
593		if (maxbufs < 1)
594			maxbufs = 1;
595	}
596}
597
598/*
599 * Flush (discard) any saved file state, including buffer contents.
600 */
601	public void
602ch_flush()
603{
604	register struct buf *bp;
605
606	if (thisfile == NULL)
607		return;
608
609	if (!(ch_flags & CH_CANSEEK))
610	{
611		/*
612		 * If input is a pipe, we don't flush buffer contents,
613		 * since the contents can't be recovered.
614		 */
615		ch_fsize = NULL_POSITION;
616		return;
617	}
618
619	/*
620	 * Initialize all the buffers.
621	 */
622	for (bp = ch_bufhead;  bp != END_OF_CHAIN;  bp = bp->next)
623		bp->block = -1;
624
625	/*
626	 * Figure out the size of the file, if we can.
627	 */
628	ch_fsize = filesize(ch_file);
629
630	/*
631	 * Seek to a known position: the beginning of the file.
632	 */
633	ch_fpos = 0;
634	ch_block = 0; /* ch_fpos / LBUFSIZE; */
635	ch_offset = 0; /* ch_fpos % LBUFSIZE; */
636
637#if 1
638	/*
639	 * This is a kludge to workaround a Linux kernel bug: files in
640	 * /proc have a size of 0 according to fstat() but have readable
641	 * data.  They are sometimes, but not always, seekable.
642	 * Force them to be non-seekable here.
643	 */
644	if (ch_fsize == 0)
645	{
646		ch_fsize = NULL_POSITION;
647		ch_flags &= ~CH_CANSEEK;
648	}
649#endif
650
651	if (lseek(ch_file, (off_t)0, 0) == BAD_LSEEK)
652	{
653		/*
654		 * Warning only; even if the seek fails for some reason,
655		 * there's a good chance we're at the beginning anyway.
656		 * {{ I think this is bogus reasoning. }}
657		 */
658		error("seek error to 0", NULL_PARG);
659	}
660}
661
662/*
663 * Allocate a new buffer.
664 * The buffer is added to the tail of the buffer chain.
665 */
666	static int
667ch_addbuf()
668{
669	register struct buf *bp;
670
671	/*
672	 * Allocate and initialize a new buffer and link it
673	 * onto the tail of the buffer list.
674	 */
675	bp = (struct buf *) calloc(1, sizeof(struct buf));
676	if (bp == NULL)
677		return (1);
678	ch_nbufs++;
679	bp->block = -1;
680	bp->next = END_OF_CHAIN;
681	bp->prev = ch_buftail;
682	ch_buftail->next = bp;
683	ch_buftail = bp;
684	HASH_INS(bp, 0);
685	return (0);
686}
687
688/*
689 *
690 */
691	static void
692init_hashtbl()
693{
694	register int h;
695
696	for (h = 0;  h < BUFHASH_SIZE;  h++)
697	{
698		thisfile->hashtbl[h].buf_hnext = END_OF_HCHAIN(h);
699		thisfile->hashtbl[h].buf_hprev = END_OF_HCHAIN(h);
700	}
701}
702
703/*
704 * Delete all buffers for this file.
705 */
706	static void
707ch_delbufs()
708{
709	register struct buf *bp;
710
711	while (ch_bufhead != END_OF_CHAIN)
712	{
713		bp = ch_bufhead;
714		bp->next->prev = bp->prev;;
715		bp->prev->next = bp->next;
716		free(bp);
717	}
718	ch_nbufs = 0;
719	init_hashtbl();
720}
721
722/*
723 * Is it possible to seek on a file descriptor?
724 */
725	public int
726seekable(f)
727	int f;
728{
729#if MSDOS_COMPILER
730	extern int fd0;
731	if (f == fd0 && !isatty(fd0))
732	{
733		/*
734		 * In MS-DOS, pipes are seekable.  Check for
735		 * standard input, and pretend it is not seekable.
736		 */
737		return (0);
738	}
739#endif
740	return (lseek(f, (off_t)1, 0) != BAD_LSEEK);
741}
742
743/*
744 * Initialize file state for a new file.
745 */
746	public void
747ch_init(f, flags)
748	int f;
749	int flags;
750{
751	/*
752	 * See if we already have a filestate for this file.
753	 */
754	thisfile = (struct filestate *) get_filestate(curr_ifile);
755	if (thisfile == NULL)
756	{
757		/*
758		 * Allocate and initialize a new filestate.
759		 */
760		thisfile = (struct filestate *)
761				calloc(1, sizeof(struct filestate));
762		thisfile->buf_next = thisfile->buf_prev = END_OF_CHAIN;
763		thisfile->nbufs = 0;
764		thisfile->flags = 0;
765		thisfile->fpos = 0;
766		thisfile->block = 0;
767		thisfile->offset = 0;
768		thisfile->file = -1;
769		thisfile->fsize = NULL_POSITION;
770		ch_flags = flags;
771		init_hashtbl();
772		/*
773		 * Try to seek; set CH_CANSEEK if it works.
774		 */
775		if ((flags & CH_CANSEEK) && !seekable(f))
776			ch_flags &= ~CH_CANSEEK;
777		set_filestate(curr_ifile, (void *) thisfile);
778	}
779	if (thisfile->file == -1)
780		thisfile->file = f;
781	ch_flush();
782}
783
784/*
785 * Close a filestate.
786 */
787	public void
788ch_close()
789{
790	int keepstate = FALSE;
791
792	if (thisfile == NULL)
793		return;
794
795	if (ch_flags & (CH_CANSEEK|CH_POPENED|CH_HELPFILE))
796	{
797		/*
798		 * We can seek or re-open, so we don't need to keep buffers.
799		 */
800		ch_delbufs();
801	} else
802		keepstate = TRUE;
803	if (!(ch_flags & CH_KEEPOPEN))
804	{
805		/*
806		 * We don't need to keep the file descriptor open
807		 * (because we can re-open it.)
808		 * But don't really close it if it was opened via popen(),
809		 * because pclose() wants to close it.
810		 */
811		if (!(ch_flags & (CH_POPENED|CH_HELPFILE)))
812			close(ch_file);
813		ch_file = -1;
814	} else
815		keepstate = TRUE;
816	if (!keepstate)
817	{
818		/*
819		 * We don't even need to keep the filestate structure.
820		 */
821		free(thisfile);
822		thisfile = NULL;
823		set_filestate(curr_ifile, (void *) NULL);
824	}
825}
826
827/*
828 * Return ch_flags for the current file.
829 */
830	public int
831ch_getflags()
832{
833	if (thisfile == NULL)
834		return (0);
835	return (ch_flags);
836}
837
838#if 0
839	public void
840ch_dump(struct filestate *fs)
841{
842	struct buf *bp;
843	unsigned char *s;
844
845	if (fs == NULL)
846	{
847		printf(" --no filestate\n");
848		return;
849	}
850	printf(" file %d, flags %x, fpos %x, fsize %x, blk/off %x/%x\n",
851		fs->file, fs->flags, fs->fpos,
852		fs->fsize, fs->block, fs->offset);
853	printf(" %d bufs:\n", fs->nbufs);
854	for (bp = fs->buf_next; bp != (struct buf *)fs;  bp = bp->next)
855	{
856		printf("%x: blk %x, size %x \"",
857			bp, bp->block, bp->datasize);
858		for (s = bp->data;  s < bp->data + 30;  s++)
859			if (*s >= ' ' && *s < 0x7F)
860				printf("%c", *s);
861			else
862				printf(".");
863		printf("\"\n");
864	}
865}
866#endif
867