ch.c revision 89019
1/*
2 * Copyright (C) 1984-2000  Mark Nudelman
3 *
4 * You may distribute under the terms of either the GNU General Public
5 * License or the Less License, as specified in the README file.
6 *
7 * For more information about less, or for information on how to
8 * contact the author, see the README file.
9 */
10
11
12/*
13 * Low level character input from the input file.
14 * We use these special purpose routines which optimize moving
15 * both forward and backward from the current read pointer.
16 */
17
18#include "less.h"
19#if MSDOS_COMPILER==WIN32C
20#include <errno.h>
21#include <windows.h>
22#endif
23
24typedef POSITION BLOCKNUM;
25
26public int ignore_eoi;
27
28/*
29 * Pool of buffers holding the most recently used blocks of the input file.
30 * The buffer pool is kept as a doubly-linked circular list,
31 * in order from most- to least-recently used.
32 * The circular list is anchored by the file state "thisfile".
33 */
34#define	LBUFSIZE	8192
35struct buf {
36	struct buf *next, *prev;
37	struct buf *hnext, *hprev;
38	BLOCKNUM block;
39	unsigned int datasize;
40	unsigned char data[LBUFSIZE];
41};
42
43struct buflist {
44	/* -- Following members must match struct buf */
45	struct buf *buf_next, *buf_prev;
46	struct buf *buf_hnext, *buf_hprev;
47};
48
49/*
50 * The file state is maintained in a filestate structure.
51 * A pointer to the filestate is kept in the ifile structure.
52 */
53#define	BUFHASH_SIZE	64
54struct filestate {
55	struct buf *buf_next, *buf_prev;
56	struct buflist hashtbl[BUFHASH_SIZE];
57	int file;
58	int flags;
59	POSITION fpos;
60	int nbufs;
61	BLOCKNUM block;
62	unsigned int offset;
63	POSITION fsize;
64};
65
66#define	ch_bufhead	thisfile->buf_next
67#define	ch_buftail	thisfile->buf_prev
68#define	ch_nbufs	thisfile->nbufs
69#define	ch_block	thisfile->block
70#define	ch_offset	thisfile->offset
71#define	ch_fpos		thisfile->fpos
72#define	ch_fsize	thisfile->fsize
73#define	ch_flags	thisfile->flags
74#define	ch_file		thisfile->file
75
76#define	END_OF_CHAIN	((struct buf *)&thisfile->buf_next)
77#define	END_OF_HCHAIN(h) ((struct buf *)&thisfile->hashtbl[h])
78#define BUFHASH(blk)	((blk) & (BUFHASH_SIZE-1))
79
80#define	FOR_BUFS_IN_CHAIN(h,bp) \
81	for (bp = thisfile->hashtbl[h].buf_hnext;  \
82	     bp != END_OF_HCHAIN(h);  bp = bp->hnext)
83
84#define	HASH_RM(bp) \
85	(bp)->hnext->hprev = (bp)->hprev; \
86	(bp)->hprev->hnext = (bp)->hnext;
87
88#define	HASH_INS(bp,h) \
89	(bp)->hnext = thisfile->hashtbl[h].buf_hnext; \
90	(bp)->hprev = END_OF_HCHAIN(h); \
91	thisfile->hashtbl[h].buf_hnext->hprev = (bp); \
92	thisfile->hashtbl[h].buf_hnext = (bp);
93
94static struct filestate *thisfile;
95static int ch_ungotchar = -1;
96
97extern int autobuf;
98extern int sigs;
99extern int cbufs;
100extern int secure;
101extern constant char helpdata[];
102extern constant int size_helpdata;
103extern IFILE curr_ifile;
104#if LOGFILE
105extern int logfile;
106extern char *namelogfile;
107#endif
108
109static int ch_addbuf();
110
111
112/*
113 * Get the character pointed to by the read pointer.
114 * ch_get() is a macro which is more efficient to call
115 * than fch_get (the function), in the usual case
116 * that the block desired is at the head of the chain.
117 */
118#define	ch_get()   ((ch_block == ch_bufhead->block && \
119		     ch_offset < ch_bufhead->datasize) ? \
120			ch_bufhead->data[ch_offset] : fch_get())
121	int
122fch_get()
123{
124	register struct buf *bp;
125	register int n;
126	register int slept;
127	register int h;
128	POSITION pos;
129	POSITION len;
130
131	slept = FALSE;
132
133	/*
134	 * Look for a buffer holding the desired block.
135	 */
136	h = BUFHASH(ch_block);
137	FOR_BUFS_IN_CHAIN(h, bp)
138	{
139		if (bp->block == ch_block)
140		{
141			if (ch_offset >= bp->datasize)
142				/*
143				 * Need more data in this buffer.
144				 */
145				goto read_more;
146			goto found;
147		}
148	}
149	/*
150	 * Block is not in a buffer.
151	 * Take the least recently used buffer
152	 * and read the desired block into it.
153	 * If the LRU buffer has data in it,
154	 * then maybe allocate a new buffer.
155	 */
156	if (ch_buftail == END_OF_CHAIN || ch_buftail->block != -1)
157	{
158		/*
159		 * There is no empty buffer to use.
160		 * Allocate a new buffer if:
161		 * 1. We can't seek on this file and -b is not in effect; or
162		 * 2. We haven't allocated the max buffers for this file yet.
163		 */
164		if ((autobuf && !(ch_flags & CH_CANSEEK)) ||
165		    (cbufs == -1 || ch_nbufs < cbufs))
166			if (ch_addbuf())
167				/*
168				 * Allocation failed: turn off autobuf.
169				 */
170				autobuf = OPT_OFF;
171	}
172	bp = ch_buftail;
173	HASH_RM(bp); /* Remove from old hash chain. */
174	bp->block = ch_block;
175	bp->datasize = 0;
176	HASH_INS(bp, h); /* Insert into new hash chain. */
177
178    read_more:
179	pos = (ch_block * LBUFSIZE) + bp->datasize;
180	if ((len = ch_length()) != NULL_POSITION && pos >= len)
181		/*
182		 * At end of file.
183		 */
184		return (EOI);
185
186	if (pos != ch_fpos)
187	{
188		/*
189		 * Not at the correct position: must seek.
190		 * If input is a pipe, we're in trouble (can't seek on a pipe).
191		 * Some data has been lost: just return "?".
192		 */
193		if (!(ch_flags & CH_CANSEEK))
194			return ('?');
195		if (lseek(ch_file, (off_t)pos, 0) == BAD_LSEEK)
196		{
197 			error("seek error", NULL_PARG);
198			clear_eol();
199			return (EOI);
200 		}
201 		ch_fpos = pos;
202 	}
203
204	/*
205	 * Read the block.
206	 * If we read less than a full block, that's ok.
207	 * We use partial block and pick up the rest next time.
208	 */
209	if (ch_ungotchar != -1)
210	{
211		bp->data[bp->datasize] = ch_ungotchar;
212		n = 1;
213		ch_ungotchar = -1;
214	} else if (ch_flags & CH_HELPFILE)
215	{
216		bp->data[bp->datasize] = helpdata[ch_fpos];
217		n = 1;
218	} else
219	{
220		n = iread(ch_file, &bp->data[bp->datasize],
221			(unsigned int)(LBUFSIZE - bp->datasize));
222	}
223
224	if (n == READ_INTR)
225		return (EOI);
226	if (n < 0)
227	{
228#if MSDOS_COMPILER==WIN32C
229		if (errno != EPIPE)
230#endif
231		{
232			error("read error", NULL_PARG);
233			clear_eol();
234		}
235		n = 0;
236	}
237
238#if LOGFILE
239	/*
240	 * If we have a log file, write the new data to it.
241	 */
242	if (!secure && logfile >= 0 && n > 0)
243		write(logfile, (char *) &bp->data[bp->datasize], n);
244#endif
245
246	ch_fpos += n;
247	bp->datasize += n;
248
249	/*
250	 * If we have read to end of file, set ch_fsize to indicate
251	 * the position of the end of file.
252	 */
253	if (n == 0)
254	{
255		ch_fsize = pos;
256		if (ignore_eoi)
257		{
258			/*
259			 * We are ignoring EOF.
260			 * Wait a while, then try again.
261			 */
262			if (!slept)
263			{
264				PARG parg;
265				parg.p_string = wait_message();
266				ierror("%s", &parg);
267			}
268#if !MSDOS_COMPILER
269	 		sleep(1);
270#else
271#if MSDOS_COMPILER==WIN32C
272			Sleep(1000);
273#endif
274#endif
275			slept = TRUE;
276		}
277		if (sigs)
278			return (EOI);
279	}
280
281    found:
282	if (ch_bufhead != bp)
283	{
284		/*
285		 * Move the buffer to the head of the buffer chain.
286		 * This orders the buffer chain, most- to least-recently used.
287		 */
288		bp->next->prev = bp->prev;
289		bp->prev->next = bp->next;
290		bp->next = ch_bufhead;
291		bp->prev = END_OF_CHAIN;
292		ch_bufhead->prev = bp;
293		ch_bufhead = bp;
294
295		/*
296		 * Move to head of hash chain too.
297		 */
298		HASH_RM(bp);
299		HASH_INS(bp, h);
300	}
301
302	if (ch_offset >= bp->datasize)
303		/*
304		 * After all that, we still don't have enough data.
305		 * Go back and try again.
306		 */
307		goto read_more;
308
309	return (bp->data[ch_offset]);
310}
311
312/*
313 * ch_ungetchar is a rather kludgy and limited way to push
314 * a single char onto an input file descriptor.
315 */
316	public void
317ch_ungetchar(c)
318	int c;
319{
320	if (c != -1 && ch_ungotchar != -1)
321		error("ch_ungetchar overrun", NULL_PARG);
322	ch_ungotchar = c;
323}
324
325#if LOGFILE
326/*
327 * Close the logfile.
328 * If we haven't read all of standard input into it, do that now.
329 */
330	public void
331end_logfile()
332{
333	static int tried = FALSE;
334
335	if (logfile < 0)
336		return;
337	if (!tried && ch_fsize == NULL_POSITION)
338	{
339		tried = TRUE;
340		ierror("Finishing logfile", NULL_PARG);
341		while (ch_forw_get() != EOI)
342			if (ABORT_SIGS())
343				break;
344	}
345	close(logfile);
346	logfile = -1;
347	namelogfile = NULL;
348}
349
350/*
351 * Start a log file AFTER less has already been running.
352 * Invoked from the - command; see toggle_option().
353 * Write all the existing buffered data to the log file.
354 */
355	public void
356sync_logfile()
357{
358	register struct buf *bp;
359	int warned = FALSE;
360	BLOCKNUM block;
361	BLOCKNUM nblocks;
362
363	nblocks = (ch_fpos + LBUFSIZE - 1) / LBUFSIZE;
364	for (block = 0;  block < nblocks;  block++)
365	{
366		for (bp = ch_bufhead;  ;  bp = bp->next)
367		{
368			if (bp == END_OF_CHAIN)
369			{
370				if (!warned)
371				{
372					error("Warning: log file is incomplete",
373						NULL_PARG);
374					warned = TRUE;
375				}
376				break;
377			}
378			if (bp->block == block)
379			{
380				write(logfile, (char *) bp->data, bp->datasize);
381				break;
382			}
383		}
384	}
385}
386
387#endif
388
389/*
390 * Determine if a specific block is currently in one of the buffers.
391 */
392	static int
393buffered(block)
394	BLOCKNUM block;
395{
396	register struct buf *bp;
397	register int h;
398
399	h = BUFHASH(block);
400	FOR_BUFS_IN_CHAIN(h, bp)
401	{
402		if (bp->block == block)
403			return (TRUE);
404	}
405	return (FALSE);
406}
407
408/*
409 * Seek to a specified position in the file.
410 * Return 0 if successful, non-zero if can't seek there.
411 */
412	public int
413ch_seek(pos)
414	register POSITION pos;
415{
416	BLOCKNUM new_block;
417	POSITION len;
418
419	len = ch_length();
420	if (pos < ch_zero() || (len != NULL_POSITION && pos > len))
421		return (1);
422
423	new_block = pos / LBUFSIZE;
424	if (!(ch_flags & CH_CANSEEK) && pos != ch_fpos && !buffered(new_block))
425	{
426		if (ch_fpos > pos)
427			return (1);
428		while (ch_fpos < pos)
429		{
430			if (ch_forw_get() == EOI)
431				return (1);
432			if (ABORT_SIGS())
433				return (1);
434		}
435		return (0);
436	}
437	/*
438	 * Set read pointer.
439	 */
440	ch_block = new_block;
441	ch_offset = pos % LBUFSIZE;
442	return (0);
443}
444
445/*
446 * Seek to the end of the file.
447 */
448	public int
449ch_end_seek()
450{
451	POSITION len;
452
453	if (ch_flags & CH_CANSEEK)
454		ch_fsize = filesize(ch_file);
455
456	len = ch_length();
457	if (len != NULL_POSITION)
458		return (ch_seek(len));
459
460	/*
461	 * Do it the slow way: read till end of data.
462	 */
463	while (ch_forw_get() != EOI)
464		if (ABORT_SIGS())
465			return (1);
466	return (0);
467}
468
469/*
470 * Seek to the beginning of the file, or as close to it as we can get.
471 * We may not be able to seek there if input is a pipe and the
472 * beginning of the pipe is no longer buffered.
473 */
474	public int
475ch_beg_seek()
476{
477	register struct buf *bp, *firstbp;
478
479	/*
480	 * Try a plain ch_seek first.
481	 */
482	if (ch_seek(ch_zero()) == 0)
483		return (0);
484
485	/*
486	 * Can't get to position 0.
487	 * Look thru the buffers for the one closest to position 0.
488	 */
489	firstbp = bp = ch_bufhead;
490	if (bp == END_OF_CHAIN)
491		return (1);
492	while ((bp = bp->next) != END_OF_CHAIN)
493		if (bp->block < firstbp->block)
494			firstbp = bp;
495	ch_block = firstbp->block;
496	ch_offset = 0;
497	return (0);
498}
499
500/*
501 * Return the length of the file, if known.
502 */
503	public POSITION
504ch_length()
505{
506	if (ignore_eoi)
507		return (NULL_POSITION);
508	if (ch_flags & CH_HELPFILE)
509		return (size_helpdata);
510	return (ch_fsize);
511}
512
513/*
514 * Return the current position in the file.
515 */
516	public POSITION
517ch_tell()
518{
519	return (ch_block * LBUFSIZE) + ch_offset;
520}
521
522/*
523 * Get the current char and post-increment the read pointer.
524 */
525	public int
526ch_forw_get()
527{
528	register int c;
529
530	c = ch_get();
531	if (c == EOI)
532		return (EOI);
533	if (ch_offset < LBUFSIZE-1)
534		ch_offset++;
535	else
536	{
537		ch_block ++;
538		ch_offset = 0;
539	}
540	return (c);
541}
542
543/*
544 * Pre-decrement the read pointer and get the new current char.
545 */
546	public int
547ch_back_get()
548{
549	if (ch_offset > 0)
550		ch_offset --;
551	else
552	{
553		if (ch_block <= 0)
554			return (EOI);
555		if (!(ch_flags & CH_CANSEEK) && !buffered(ch_block-1))
556			return (EOI);
557		ch_block--;
558		ch_offset = LBUFSIZE-1;
559	}
560	return (ch_get());
561}
562
563/*
564 * Allocate buffers.
565 * Caller wants us to have a total of at least want_nbufs buffers.
566 */
567	public int
568ch_nbuf(want_nbufs)
569	int want_nbufs;
570{
571	PARG parg;
572
573	while (ch_nbufs < want_nbufs)
574	{
575		if (ch_addbuf())
576		{
577			/*
578			 * Cannot allocate enough buffers.
579			 * If we don't have ANY, then quit.
580			 * Otherwise, just report the error and return.
581			 */
582			parg.p_int = want_nbufs - ch_nbufs;
583			error("Cannot allocate %d buffers", &parg);
584			if (ch_nbufs == 0)
585				quit(QUIT_ERROR);
586			break;
587		}
588	}
589	return (ch_nbufs);
590}
591
592/*
593 * Flush (discard) any saved file state, including buffer contents.
594 */
595	public void
596ch_flush()
597{
598	register struct buf *bp;
599
600	if (!(ch_flags & CH_CANSEEK))
601	{
602		/*
603		 * If input is a pipe, we don't flush buffer contents,
604		 * since the contents can't be recovered.
605		 */
606		ch_fsize = NULL_POSITION;
607		return;
608	}
609
610	/*
611	 * Initialize all the buffers.
612	 */
613	for (bp = ch_bufhead;  bp != END_OF_CHAIN;  bp = bp->next)
614		bp->block = -1;
615
616	/*
617	 * Figure out the size of the file, if we can.
618	 */
619	ch_fsize = filesize(ch_file);
620
621	/*
622	 * Seek to a known position: the beginning of the file.
623	 */
624	ch_fpos = 0;
625	ch_block = 0; /* ch_fpos / LBUFSIZE; */
626	ch_offset = 0; /* ch_fpos % LBUFSIZE; */
627
628#if 1
629	/*
630	 * This is a kludge to workaround a Linux kernel bug: files in
631	 * /proc have a size of 0 according to fstat() but have readable
632	 * data.  They are sometimes, but not always, seekable.
633	 * Force them to be non-seekable here.
634	 */
635	if (ch_fsize == 0)
636	{
637		ch_fsize = NULL_POSITION;
638		ch_flags &= ~CH_CANSEEK;
639	}
640#endif
641
642	if (lseek(ch_file, (off_t)0, 0) == BAD_LSEEK)
643	{
644		/*
645		 * Warning only; even if the seek fails for some reason,
646		 * there's a good chance we're at the beginning anyway.
647		 * {{ I think this is bogus reasoning. }}
648		 */
649		error("seek error to 0", NULL_PARG);
650	}
651}
652
653/*
654 * Allocate a new buffer.
655 * The buffer is added to the tail of the buffer chain.
656 */
657	static int
658ch_addbuf()
659{
660	register struct buf *bp;
661
662	/*
663	 * Allocate and initialize a new buffer and link it
664	 * onto the tail of the buffer list.
665	 */
666	bp = (struct buf *) calloc(1, sizeof(struct buf));
667	if (bp == NULL)
668		return (1);
669	ch_nbufs++;
670	bp->block = -1;
671	bp->next = END_OF_CHAIN;
672	bp->prev = ch_buftail;
673	ch_buftail->next = bp;
674	ch_buftail = bp;
675	HASH_INS(bp, 0);
676	return (0);
677}
678
679/*
680 *
681 */
682	static void
683init_hashtbl()
684{
685	register int h;
686
687	for (h = 0;  h < BUFHASH_SIZE;  h++)
688	{
689		thisfile->hashtbl[h].buf_hnext = END_OF_HCHAIN(h);
690		thisfile->hashtbl[h].buf_hprev = END_OF_HCHAIN(h);
691	}
692}
693
694/*
695 * Delete all buffers for this file.
696 */
697	static void
698ch_delbufs()
699{
700	register struct buf *bp;
701
702	while (ch_bufhead != END_OF_CHAIN)
703	{
704		bp = ch_bufhead;
705		bp->next->prev = bp->prev;;
706		bp->prev->next = bp->next;
707		free(bp);
708	}
709	ch_nbufs = 0;
710	init_hashtbl();
711}
712
713/*
714 * Is it possible to seek on a file descriptor?
715 */
716	public int
717seekable(f)
718	int f;
719{
720#if MSDOS_COMPILER
721	extern int fd0;
722	if (f == fd0 && !isatty(fd0))
723	{
724		/*
725		 * In MS-DOS, pipes are seekable.  Check for
726		 * standard input, and pretend it is not seekable.
727		 */
728		return (0);
729	}
730#endif
731	return (lseek(f, (off_t)1, 0) != BAD_LSEEK);
732}
733
734/*
735 * Initialize file state for a new file.
736 */
737	public void
738ch_init(f, flags)
739	int f;
740	int flags;
741{
742	/*
743	 * See if we already have a filestate for this file.
744	 */
745	thisfile = (struct filestate *) get_filestate(curr_ifile);
746	if (thisfile == NULL)
747	{
748		/*
749		 * Allocate and initialize a new filestate.
750		 */
751		thisfile = (struct filestate *)
752				calloc(1, sizeof(struct filestate));
753		thisfile->buf_next = thisfile->buf_prev = END_OF_CHAIN;
754		thisfile->nbufs = 0;
755		thisfile->flags = 0;
756		thisfile->fpos = 0;
757		thisfile->block = 0;
758		thisfile->offset = 0;
759		thisfile->file = -1;
760		thisfile->fsize = NULL_POSITION;
761		ch_flags = flags;
762		init_hashtbl();
763		/*
764		 * Try to seek; set CH_CANSEEK if it works.
765		 */
766		if ((flags & CH_CANSEEK) && !seekable(f))
767			ch_flags &= ~CH_CANSEEK;
768		set_filestate(curr_ifile, (void *) thisfile);
769	}
770	if (thisfile->file == -1)
771		thisfile->file = f;
772	ch_flush();
773}
774
775/*
776 * Close a filestate.
777 */
778	public void
779ch_close()
780{
781	int keepstate = FALSE;
782
783	if (ch_flags & (CH_CANSEEK|CH_POPENED|CH_HELPFILE))
784	{
785		/*
786		 * We can seek or re-open, so we don't need to keep buffers.
787		 */
788		ch_delbufs();
789	} else
790		keepstate = TRUE;
791	if (!(ch_flags & CH_KEEPOPEN))
792	{
793		/*
794		 * We don't need to keep the file descriptor open
795		 * (because we can re-open it.)
796		 * But don't really close it if it was opened via popen(),
797		 * because pclose() wants to close it.
798		 */
799		if (!(ch_flags & (CH_POPENED|CH_HELPFILE)))
800			close(ch_file);
801		ch_file = -1;
802	} else
803		keepstate = TRUE;
804	if (!keepstate)
805	{
806		/*
807		 * We don't even need to keep the filestate structure.
808		 */
809		free(thisfile);
810		thisfile = NULL;
811		set_filestate(curr_ifile, (void *) NULL);
812	}
813}
814
815/*
816 * Return ch_flags for the current file.
817 */
818	public int
819ch_getflags()
820{
821	return (ch_flags);
822}
823
824#if 0
825	public void
826ch_dump(struct filestate *fs)
827{
828	struct buf *bp;
829	unsigned char *s;
830
831	if (fs == NULL)
832	{
833		printf(" --no filestate\n");
834		return;
835	}
836	printf(" file %d, flags %x, fpos %x, fsize %x, blk/off %x/%x\n",
837		fs->file, fs->flags, fs->fpos,
838		fs->fsize, fs->block, fs->offset);
839	printf(" %d bufs:\n", fs->nbufs);
840	for (bp = fs->buf_next; bp != (struct buf *)fs;  bp = bp->next)
841	{
842		printf("%x: blk %x, size %x \"",
843			bp, bp->block, bp->datasize);
844		for (s = bp->data;  s < bp->data + 30;  s++)
845			if (*s >= ' ' && *s < 0x7F)
846				printf("%c", *s);
847			else
848				printf(".");
849		printf("\"\n");
850	}
851}
852#endif
853