ch.c revision 161475
1/*
2 * Copyright (C) 1984-2004  Mark Nudelman
3 *
4 * You may distribute under the terms of either the GNU General Public
5 * License or the Less License, as specified in the README file.
6 *
7 * For more information about less, or for information on how to
8 * contact the author, see the README file.
9 */
10
11
12/*
13 * Low level character input from the input file.
14 * We use these special purpose routines which optimize moving
15 * both forward and backward from the current read pointer.
16 */
17
18#include "less.h"
19#if MSDOS_COMPILER==WIN32C
20#include <errno.h>
21#include <windows.h>
22#endif
23
24typedef POSITION BLOCKNUM;
25
26public int ignore_eoi;
27
28/*
29 * Pool of buffers holding the most recently used blocks of the input file.
30 * The buffer pool is kept as a doubly-linked circular list,
31 * in order from most- to least-recently used.
32 * The circular list is anchored by the file state "thisfile".
33 */
34#define	LBUFSIZE	8192
35struct buf {
36	struct buf *next, *prev;
37	struct buf *hnext, *hprev;
38	BLOCKNUM block;
39	unsigned int datasize;
40	unsigned char data[LBUFSIZE];
41};
42
43struct buflist {
44	/* -- Following members must match struct buf */
45	struct buf *buf_next, *buf_prev;
46	struct buf *buf_hnext, *buf_hprev;
47};
48
49/*
50 * The file state is maintained in a filestate structure.
51 * A pointer to the filestate is kept in the ifile structure.
52 */
53#define	BUFHASH_SIZE	64
54struct filestate {
55	struct buf *buf_next, *buf_prev;
56	struct buflist hashtbl[BUFHASH_SIZE];
57	int file;
58	int flags;
59	POSITION fpos;
60	int nbufs;
61	BLOCKNUM block;
62	unsigned int offset;
63	POSITION fsize;
64};
65
66#define	ch_bufhead	thisfile->buf_next
67#define	ch_buftail	thisfile->buf_prev
68#define	ch_nbufs	thisfile->nbufs
69#define	ch_block	thisfile->block
70#define	ch_offset	thisfile->offset
71#define	ch_fpos		thisfile->fpos
72#define	ch_fsize	thisfile->fsize
73#define	ch_flags	thisfile->flags
74#define	ch_file		thisfile->file
75
76#define	END_OF_CHAIN	((struct buf *)&thisfile->buf_next)
77#define	END_OF_HCHAIN(h) ((struct buf *)&thisfile->hashtbl[h])
78#define BUFHASH(blk)	((blk) & (BUFHASH_SIZE-1))
79
80#define	FOR_BUFS_IN_CHAIN(h,bp) \
81	for (bp = thisfile->hashtbl[h].buf_hnext;  \
82	     bp != END_OF_HCHAIN(h);  bp = bp->hnext)
83
84#define	HASH_RM(bp) \
85	(bp)->hnext->hprev = (bp)->hprev; \
86	(bp)->hprev->hnext = (bp)->hnext;
87
88#define	HASH_INS(bp,h) \
89	(bp)->hnext = thisfile->hashtbl[h].buf_hnext; \
90	(bp)->hprev = END_OF_HCHAIN(h); \
91	thisfile->hashtbl[h].buf_hnext->hprev = (bp); \
92	thisfile->hashtbl[h].buf_hnext = (bp);
93
94static struct filestate *thisfile;
95static int ch_ungotchar = -1;
96static int maxbufs = -1;
97
98extern int autobuf;
99extern int sigs;
100extern int secure;
101extern constant char helpdata[];
102extern constant int size_helpdata;
103extern IFILE curr_ifile;
104#if LOGFILE
105extern int logfile;
106extern char *namelogfile;
107#endif
108
109static int ch_addbuf();
110
111
112/*
113 * Get the character pointed to by the read pointer.
114 * ch_get() is a macro which is more efficient to call
115 * than fch_get (the function), in the usual case
116 * that the block desired is at the head of the chain.
117 */
118#define	ch_get()   ((ch_block == ch_bufhead->block && \
119		     ch_offset < ch_bufhead->datasize) ? \
120			ch_bufhead->data[ch_offset] : fch_get())
121	int
122fch_get()
123{
124	register struct buf *bp;
125	register int n;
126	register int slept;
127	register int h;
128	POSITION pos;
129	POSITION len;
130
131	slept = FALSE;
132
133	/*
134	 * Look for a buffer holding the desired block.
135	 */
136	h = BUFHASH(ch_block);
137	FOR_BUFS_IN_CHAIN(h, bp)
138	{
139		if (bp->block == ch_block)
140		{
141			if (ch_offset >= bp->datasize)
142				/*
143				 * Need more data in this buffer.
144				 */
145				goto read_more;
146			goto found;
147		}
148	}
149	/*
150	 * Block is not in a buffer.
151	 * Take the least recently used buffer
152	 * and read the desired block into it.
153	 * If the LRU buffer has data in it,
154	 * then maybe allocate a new buffer.
155	 */
156	if (ch_buftail == END_OF_CHAIN || ch_buftail->block != -1)
157	{
158		/*
159		 * There is no empty buffer to use.
160		 * Allocate a new buffer if:
161		 * 1. We can't seek on this file and -b is not in effect; or
162		 * 2. We haven't allocated the max buffers for this file yet.
163		 */
164		if ((autobuf && !(ch_flags & CH_CANSEEK)) ||
165		    (maxbufs < 0 || ch_nbufs < maxbufs))
166			if (ch_addbuf())
167				/*
168				 * Allocation failed: turn off autobuf.
169				 */
170				autobuf = OPT_OFF;
171	}
172	bp = ch_buftail;
173	HASH_RM(bp); /* Remove from old hash chain. */
174	bp->block = ch_block;
175	bp->datasize = 0;
176	HASH_INS(bp, h); /* Insert into new hash chain. */
177
178    read_more:
179	pos = (ch_block * LBUFSIZE) + bp->datasize;
180	if ((len = ch_length()) != NULL_POSITION && pos >= len)
181		/*
182		 * At end of file.
183		 */
184		return (EOI);
185
186	if (pos != ch_fpos)
187	{
188		/*
189		 * Not at the correct position: must seek.
190		 * If input is a pipe, we're in trouble (can't seek on a pipe).
191		 * Some data has been lost: just return "?".
192		 */
193		if (!(ch_flags & CH_CANSEEK))
194			return ('?');
195		if (lseek(ch_file, (off_t)pos, 0) == BAD_LSEEK)
196		{
197 			error("seek error", NULL_PARG);
198			clear_eol();
199			return (EOI);
200 		}
201 		ch_fpos = pos;
202 	}
203
204	/*
205	 * Read the block.
206	 * If we read less than a full block, that's ok.
207	 * We use partial block and pick up the rest next time.
208	 */
209	if (ch_ungotchar != -1)
210	{
211		bp->data[bp->datasize] = ch_ungotchar;
212		n = 1;
213		ch_ungotchar = -1;
214	} else if (ch_flags & CH_HELPFILE)
215	{
216		bp->data[bp->datasize] = helpdata[ch_fpos];
217		n = 1;
218	} else
219	{
220		n = iread(ch_file, &bp->data[bp->datasize],
221			(unsigned int)(LBUFSIZE - bp->datasize));
222	}
223
224	if (n == READ_INTR)
225		return (EOI);
226	if (n < 0)
227	{
228#if MSDOS_COMPILER==WIN32C
229		if (errno != EPIPE)
230#endif
231		{
232			error("read error", NULL_PARG);
233			clear_eol();
234		}
235		n = 0;
236	}
237
238#if LOGFILE
239	/*
240	 * If we have a log file, write the new data to it.
241	 */
242	if (!secure && logfile >= 0 && n > 0)
243		write(logfile, (char *) &bp->data[bp->datasize], n);
244#endif
245
246	ch_fpos += n;
247	bp->datasize += n;
248
249	/*
250	 * If we have read to end of file, set ch_fsize to indicate
251	 * the position of the end of file.
252	 */
253	if (n == 0)
254	{
255		ch_fsize = pos;
256		if (ignore_eoi)
257		{
258			/*
259			 * We are ignoring EOF.
260			 * Wait a while, then try again.
261			 */
262			if (!slept)
263			{
264				PARG parg;
265				parg.p_string = wait_message();
266				ierror("%s", &parg);
267			}
268#if !MSDOS_COMPILER
269	 		sleep(1);
270#else
271#if MSDOS_COMPILER==WIN32C
272			Sleep(1000);
273#endif
274#endif
275			slept = TRUE;
276		}
277		if (sigs)
278			return (EOI);
279	}
280
281    found:
282	if (ch_bufhead != bp)
283	{
284		/*
285		 * Move the buffer to the head of the buffer chain.
286		 * This orders the buffer chain, most- to least-recently used.
287		 */
288		bp->next->prev = bp->prev;
289		bp->prev->next = bp->next;
290		bp->next = ch_bufhead;
291		bp->prev = END_OF_CHAIN;
292		ch_bufhead->prev = bp;
293		ch_bufhead = bp;
294
295		/*
296		 * Move to head of hash chain too.
297		 */
298		HASH_RM(bp);
299		HASH_INS(bp, h);
300	}
301
302	if (ch_offset >= bp->datasize)
303		/*
304		 * After all that, we still don't have enough data.
305		 * Go back and try again.
306		 */
307		goto read_more;
308
309	return (bp->data[ch_offset]);
310}
311
312/*
313 * ch_ungetchar is a rather kludgy and limited way to push
314 * a single char onto an input file descriptor.
315 */
316	public void
317ch_ungetchar(c)
318	int c;
319{
320	if (c != -1 && ch_ungotchar != -1)
321		error("ch_ungetchar overrun", NULL_PARG);
322	ch_ungotchar = c;
323}
324
325#if LOGFILE
326/*
327 * Close the logfile.
328 * If we haven't read all of standard input into it, do that now.
329 */
330	public void
331end_logfile()
332{
333	static int tried = FALSE;
334
335	if (logfile < 0)
336		return;
337	if (!tried && ch_fsize == NULL_POSITION)
338	{
339		tried = TRUE;
340		ierror("Finishing logfile", NULL_PARG);
341		while (ch_forw_get() != EOI)
342			if (ABORT_SIGS())
343				break;
344	}
345	close(logfile);
346	logfile = -1;
347	namelogfile = NULL;
348}
349
350/*
351 * Start a log file AFTER less has already been running.
352 * Invoked from the - command; see toggle_option().
353 * Write all the existing buffered data to the log file.
354 */
355	public void
356sync_logfile()
357{
358	register struct buf *bp;
359	int warned = FALSE;
360	BLOCKNUM block;
361	BLOCKNUM nblocks;
362
363	nblocks = (ch_fpos + LBUFSIZE - 1) / LBUFSIZE;
364	for (block = 0;  block < nblocks;  block++)
365	{
366		for (bp = ch_bufhead;  ;  bp = bp->next)
367		{
368			if (bp == END_OF_CHAIN)
369			{
370				if (!warned)
371				{
372					error("Warning: log file is incomplete",
373						NULL_PARG);
374					warned = TRUE;
375				}
376				break;
377			}
378			if (bp->block == block)
379			{
380				write(logfile, (char *) bp->data, bp->datasize);
381				break;
382			}
383		}
384	}
385}
386
387#endif
388
389/*
390 * Determine if a specific block is currently in one of the buffers.
391 */
392	static int
393buffered(block)
394	BLOCKNUM block;
395{
396	register struct buf *bp;
397	register int h;
398
399	h = BUFHASH(block);
400	FOR_BUFS_IN_CHAIN(h, bp)
401	{
402		if (bp->block == block)
403			return (TRUE);
404	}
405	return (FALSE);
406}
407
408/*
409 * Seek to a specified position in the file.
410 * Return 0 if successful, non-zero if can't seek there.
411 */
412	public int
413ch_seek(pos)
414	register POSITION pos;
415{
416	BLOCKNUM new_block;
417	POSITION len;
418
419	len = ch_length();
420	if (pos < ch_zero() || (len != NULL_POSITION && pos > len))
421		return (1);
422
423	new_block = pos / LBUFSIZE;
424	if (!(ch_flags & CH_CANSEEK) && pos != ch_fpos && !buffered(new_block))
425	{
426		if (ch_fpos > pos)
427			return (1);
428		while (ch_fpos < pos)
429		{
430			if (ch_forw_get() == EOI)
431				return (1);
432			if (ABORT_SIGS())
433				return (1);
434		}
435		return (0);
436	}
437	/*
438	 * Set read pointer.
439	 */
440	ch_block = new_block;
441	ch_offset = pos % LBUFSIZE;
442	return (0);
443}
444
445/*
446 * Seek to the end of the file.
447 */
448	public int
449ch_end_seek()
450{
451	POSITION len;
452
453	if (ch_flags & CH_CANSEEK)
454		ch_fsize = filesize(ch_file);
455
456	len = ch_length();
457	if (len != NULL_POSITION)
458		return (ch_seek(len));
459
460	/*
461	 * Do it the slow way: read till end of data.
462	 */
463	while (ch_forw_get() != EOI)
464		if (ABORT_SIGS())
465			return (1);
466	return (0);
467}
468
469/*
470 * Seek to the beginning of the file, or as close to it as we can get.
471 * We may not be able to seek there if input is a pipe and the
472 * beginning of the pipe is no longer buffered.
473 */
474	public int
475ch_beg_seek()
476{
477	register struct buf *bp, *firstbp;
478
479	/*
480	 * Try a plain ch_seek first.
481	 */
482	if (ch_seek(ch_zero()) == 0)
483		return (0);
484
485	/*
486	 * Can't get to position 0.
487	 * Look thru the buffers for the one closest to position 0.
488	 */
489	firstbp = bp = ch_bufhead;
490	if (bp == END_OF_CHAIN)
491		return (1);
492	while ((bp = bp->next) != END_OF_CHAIN)
493		if (bp->block < firstbp->block)
494			firstbp = bp;
495	ch_block = firstbp->block;
496	ch_offset = 0;
497	return (0);
498}
499
500/*
501 * Return the length of the file, if known.
502 */
503	public POSITION
504ch_length()
505{
506	if (ignore_eoi)
507		return (NULL_POSITION);
508	if (ch_flags & CH_HELPFILE)
509		return (size_helpdata);
510	return (ch_fsize);
511}
512
513/*
514 * Return the current position in the file.
515 */
516	public POSITION
517ch_tell()
518{
519	return (ch_block * LBUFSIZE) + ch_offset;
520}
521
522/*
523 * Get the current char and post-increment the read pointer.
524 */
525	public int
526ch_forw_get()
527{
528	register int c;
529
530	c = ch_get();
531	if (c == EOI)
532		return (EOI);
533	if (ch_offset < LBUFSIZE-1)
534		ch_offset++;
535	else
536	{
537		ch_block ++;
538		ch_offset = 0;
539	}
540	return (c);
541}
542
543/*
544 * Pre-decrement the read pointer and get the new current char.
545 */
546	public int
547ch_back_get()
548{
549	if (ch_offset > 0)
550		ch_offset --;
551	else
552	{
553		if (ch_block <= 0)
554			return (EOI);
555		if (!(ch_flags & CH_CANSEEK) && !buffered(ch_block-1))
556			return (EOI);
557		ch_block--;
558		ch_offset = LBUFSIZE-1;
559	}
560	return (ch_get());
561}
562
563/*
564 * Set max amount of buffer space.
565 * bufspace is in units of 1024 bytes.  -1 mean no limit.
566 */
567	public void
568ch_setbufspace(bufspace)
569	int bufspace;
570{
571	if (bufspace < 0)
572		maxbufs = -1;
573	else
574	{
575		maxbufs = ((bufspace * 1024) + LBUFSIZE-1) / LBUFSIZE;
576		if (maxbufs < 1)
577			maxbufs = 1;
578	}
579}
580
581/*
582 * Flush (discard) any saved file state, including buffer contents.
583 */
584	public void
585ch_flush()
586{
587	register struct buf *bp;
588
589	if (!(ch_flags & CH_CANSEEK))
590	{
591		/*
592		 * If input is a pipe, we don't flush buffer contents,
593		 * since the contents can't be recovered.
594		 */
595		ch_fsize = NULL_POSITION;
596		return;
597	}
598
599	/*
600	 * Initialize all the buffers.
601	 */
602	for (bp = ch_bufhead;  bp != END_OF_CHAIN;  bp = bp->next)
603		bp->block = -1;
604
605	/*
606	 * Figure out the size of the file, if we can.
607	 */
608	ch_fsize = filesize(ch_file);
609
610	/*
611	 * Seek to a known position: the beginning of the file.
612	 */
613	ch_fpos = 0;
614	ch_block = 0; /* ch_fpos / LBUFSIZE; */
615	ch_offset = 0; /* ch_fpos % LBUFSIZE; */
616
617#if 1
618	/*
619	 * This is a kludge to workaround a Linux kernel bug: files in
620	 * /proc have a size of 0 according to fstat() but have readable
621	 * data.  They are sometimes, but not always, seekable.
622	 * Force them to be non-seekable here.
623	 */
624	if (ch_fsize == 0)
625	{
626		ch_fsize = NULL_POSITION;
627		ch_flags &= ~CH_CANSEEK;
628	}
629#endif
630
631	if (lseek(ch_file, (off_t)0, 0) == BAD_LSEEK)
632	{
633		/*
634		 * Warning only; even if the seek fails for some reason,
635		 * there's a good chance we're at the beginning anyway.
636		 * {{ I think this is bogus reasoning. }}
637		 */
638		error("seek error to 0", NULL_PARG);
639	}
640}
641
642/*
643 * Allocate a new buffer.
644 * The buffer is added to the tail of the buffer chain.
645 */
646	static int
647ch_addbuf()
648{
649	register struct buf *bp;
650
651	/*
652	 * Allocate and initialize a new buffer and link it
653	 * onto the tail of the buffer list.
654	 */
655	bp = (struct buf *) calloc(1, sizeof(struct buf));
656	if (bp == NULL)
657		return (1);
658	ch_nbufs++;
659	bp->block = -1;
660	bp->next = END_OF_CHAIN;
661	bp->prev = ch_buftail;
662	ch_buftail->next = bp;
663	ch_buftail = bp;
664	HASH_INS(bp, 0);
665	return (0);
666}
667
668/*
669 *
670 */
671	static void
672init_hashtbl()
673{
674	register int h;
675
676	for (h = 0;  h < BUFHASH_SIZE;  h++)
677	{
678		thisfile->hashtbl[h].buf_hnext = END_OF_HCHAIN(h);
679		thisfile->hashtbl[h].buf_hprev = END_OF_HCHAIN(h);
680	}
681}
682
683/*
684 * Delete all buffers for this file.
685 */
686	static void
687ch_delbufs()
688{
689	register struct buf *bp;
690
691	while (ch_bufhead != END_OF_CHAIN)
692	{
693		bp = ch_bufhead;
694		bp->next->prev = bp->prev;;
695		bp->prev->next = bp->next;
696		free(bp);
697	}
698	ch_nbufs = 0;
699	init_hashtbl();
700}
701
702/*
703 * Is it possible to seek on a file descriptor?
704 */
705	public int
706seekable(f)
707	int f;
708{
709#if MSDOS_COMPILER
710	extern int fd0;
711	if (f == fd0 && !isatty(fd0))
712	{
713		/*
714		 * In MS-DOS, pipes are seekable.  Check for
715		 * standard input, and pretend it is not seekable.
716		 */
717		return (0);
718	}
719#endif
720	return (lseek(f, (off_t)1, 0) != BAD_LSEEK);
721}
722
723/*
724 * Initialize file state for a new file.
725 */
726	public void
727ch_init(f, flags)
728	int f;
729	int flags;
730{
731	/*
732	 * See if we already have a filestate for this file.
733	 */
734	thisfile = (struct filestate *) get_filestate(curr_ifile);
735	if (thisfile == NULL)
736	{
737		/*
738		 * Allocate and initialize a new filestate.
739		 */
740		thisfile = (struct filestate *)
741				calloc(1, sizeof(struct filestate));
742		thisfile->buf_next = thisfile->buf_prev = END_OF_CHAIN;
743		thisfile->nbufs = 0;
744		thisfile->flags = 0;
745		thisfile->fpos = 0;
746		thisfile->block = 0;
747		thisfile->offset = 0;
748		thisfile->file = -1;
749		thisfile->fsize = NULL_POSITION;
750		ch_flags = flags;
751		init_hashtbl();
752		/*
753		 * Try to seek; set CH_CANSEEK if it works.
754		 */
755		if ((flags & CH_CANSEEK) && !seekable(f))
756			ch_flags &= ~CH_CANSEEK;
757		set_filestate(curr_ifile, (void *) thisfile);
758	}
759	if (thisfile->file == -1)
760		thisfile->file = f;
761	ch_flush();
762}
763
764/*
765 * Close a filestate.
766 */
767	public void
768ch_close()
769{
770	int keepstate = FALSE;
771
772	if (ch_flags & (CH_CANSEEK|CH_POPENED|CH_HELPFILE))
773	{
774		/*
775		 * We can seek or re-open, so we don't need to keep buffers.
776		 */
777		ch_delbufs();
778	} else
779		keepstate = TRUE;
780	if (!(ch_flags & CH_KEEPOPEN))
781	{
782		/*
783		 * We don't need to keep the file descriptor open
784		 * (because we can re-open it.)
785		 * But don't really close it if it was opened via popen(),
786		 * because pclose() wants to close it.
787		 */
788		if (!(ch_flags & (CH_POPENED|CH_HELPFILE)))
789			close(ch_file);
790		ch_file = -1;
791	} else
792		keepstate = TRUE;
793	if (!keepstate)
794	{
795		/*
796		 * We don't even need to keep the filestate structure.
797		 */
798		free(thisfile);
799		thisfile = NULL;
800		set_filestate(curr_ifile, (void *) NULL);
801	}
802}
803
804/*
805 * Return ch_flags for the current file.
806 */
807	public int
808ch_getflags()
809{
810	return (ch_flags);
811}
812
813#if 0
814	public void
815ch_dump(struct filestate *fs)
816{
817	struct buf *bp;
818	unsigned char *s;
819
820	if (fs == NULL)
821	{
822		printf(" --no filestate\n");
823		return;
824	}
825	printf(" file %d, flags %x, fpos %x, fsize %x, blk/off %x/%x\n",
826		fs->file, fs->flags, fs->fpos,
827		fs->fsize, fs->block, fs->offset);
828	printf(" %d bufs:\n", fs->nbufs);
829	for (bp = fs->buf_next; bp != (struct buf *)fs;  bp = bp->next)
830	{
831		printf("%x: blk %x, size %x \"",
832			bp, bp->block, bp->datasize);
833		for (s = bp->data;  s < bp->data + 30;  s++)
834			if (*s >= ' ' && *s < 0x7F)
835				printf("%c", *s);
836			else
837				printf(".");
838		printf("\"\n");
839	}
840}
841#endif
842