ch.c revision 173683
1/*
2 * Copyright (C) 1984-2007  Mark Nudelman
3 *
4 * You may distribute under the terms of either the GNU General Public
5 * License or the Less License, as specified in the README file.
6 *
7 * For more information about less, or for information on how to
8 * contact the author, see the README file.
9 */
10
11
12/*
13 * Low level character input from the input file.
14 * We use these special purpose routines which optimize moving
15 * both forward and backward from the current read pointer.
16 */
17
18#include "less.h"
19#if MSDOS_COMPILER==WIN32C
20#include <errno.h>
21#include <windows.h>
22#endif
23
24#if HAVE_STAT_INO
25#include <sys/stat.h>
26extern dev_t curr_dev;
27extern ino_t curr_ino;
28#endif
29
30typedef POSITION BLOCKNUM;
31
32public int ignore_eoi;
33
34/*
35 * Pool of buffers holding the most recently used blocks of the input file.
36 * The buffer pool is kept as a doubly-linked circular list,
37 * in order from most- to least-recently used.
38 * The circular list is anchored by the file state "thisfile".
39 */
40#define	LBUFSIZE	8192
41struct buf {
42	struct buf *next, *prev;
43	struct buf *hnext, *hprev;
44	BLOCKNUM block;
45	unsigned int datasize;
46	unsigned char data[LBUFSIZE];
47};
48
49struct buflist {
50	/* -- Following members must match struct buf */
51	struct buf *buf_next, *buf_prev;
52	struct buf *buf_hnext, *buf_hprev;
53};
54
55/*
56 * The file state is maintained in a filestate structure.
57 * A pointer to the filestate is kept in the ifile structure.
58 */
59#define	BUFHASH_SIZE	64
60struct filestate {
61	struct buf *buf_next, *buf_prev;
62	struct buflist hashtbl[BUFHASH_SIZE];
63	int file;
64	int flags;
65	POSITION fpos;
66	int nbufs;
67	BLOCKNUM block;
68	unsigned int offset;
69	POSITION fsize;
70};
71
72#define	ch_bufhead	thisfile->buf_next
73#define	ch_buftail	thisfile->buf_prev
74#define	ch_nbufs	thisfile->nbufs
75#define	ch_block	thisfile->block
76#define	ch_offset	thisfile->offset
77#define	ch_fpos		thisfile->fpos
78#define	ch_fsize	thisfile->fsize
79#define	ch_flags	thisfile->flags
80#define	ch_file		thisfile->file
81
82#define	END_OF_CHAIN	((struct buf *)&thisfile->buf_next)
83#define	END_OF_HCHAIN(h) ((struct buf *)&thisfile->hashtbl[h])
84#define BUFHASH(blk)	((blk) & (BUFHASH_SIZE-1))
85
86#define	FOR_BUFS_IN_CHAIN(h,bp) \
87	for (bp = thisfile->hashtbl[h].buf_hnext;  \
88	     bp != END_OF_HCHAIN(h);  bp = bp->hnext)
89
90#define	HASH_RM(bp) \
91	(bp)->hnext->hprev = (bp)->hprev; \
92	(bp)->hprev->hnext = (bp)->hnext;
93
94#define	HASH_INS(bp,h) \
95	(bp)->hnext = thisfile->hashtbl[h].buf_hnext; \
96	(bp)->hprev = END_OF_HCHAIN(h); \
97	thisfile->hashtbl[h].buf_hnext->hprev = (bp); \
98	thisfile->hashtbl[h].buf_hnext = (bp);
99
100static struct filestate *thisfile;
101static int ch_ungotchar = -1;
102static int maxbufs = -1;
103
104extern int autobuf;
105extern int sigs;
106extern int secure;
107extern int screen_trashed;
108extern int follow_mode;
109extern constant char helpdata[];
110extern constant int size_helpdata;
111extern IFILE curr_ifile;
112#if LOGFILE
113extern int logfile;
114extern char *namelogfile;
115#endif
116
117static int ch_addbuf();
118
119
120/*
121 * Get the character pointed to by the read pointer.
122 * ch_get() is a macro which is more efficient to call
123 * than fch_get (the function), in the usual case
124 * that the block desired is at the head of the chain.
125 */
126#define	ch_get()   ((ch_block == ch_bufhead->block && \
127		     ch_offset < ch_bufhead->datasize) ? \
128			ch_bufhead->data[ch_offset] : fch_get())
129	int
130fch_get()
131{
132	register struct buf *bp;
133	register int n;
134	register int slept;
135	register int h;
136	POSITION pos;
137	POSITION len;
138
139	if (thisfile == NULL)
140		return (EOI);
141
142	slept = FALSE;
143
144	/*
145	 * Look for a buffer holding the desired block.
146	 */
147	h = BUFHASH(ch_block);
148	FOR_BUFS_IN_CHAIN(h, bp)
149	{
150		if (bp->block == ch_block)
151		{
152			if (ch_offset >= bp->datasize)
153				/*
154				 * Need more data in this buffer.
155				 */
156				goto read_more;
157			goto found;
158		}
159	}
160	/*
161	 * Block is not in a buffer.
162	 * Take the least recently used buffer
163	 * and read the desired block into it.
164	 * If the LRU buffer has data in it,
165	 * then maybe allocate a new buffer.
166	 */
167	if (ch_buftail == END_OF_CHAIN || ch_buftail->block != -1)
168	{
169		/*
170		 * There is no empty buffer to use.
171		 * Allocate a new buffer if:
172		 * 1. We can't seek on this file and -b is not in effect; or
173		 * 2. We haven't allocated the max buffers for this file yet.
174		 */
175		if ((autobuf && !(ch_flags & CH_CANSEEK)) ||
176		    (maxbufs < 0 || ch_nbufs < maxbufs))
177			if (ch_addbuf())
178				/*
179				 * Allocation failed: turn off autobuf.
180				 */
181				autobuf = OPT_OFF;
182	}
183	bp = ch_buftail;
184	HASH_RM(bp); /* Remove from old hash chain. */
185	bp->block = ch_block;
186	bp->datasize = 0;
187	HASH_INS(bp, h); /* Insert into new hash chain. */
188
189    read_more:
190	pos = (ch_block * LBUFSIZE) + bp->datasize;
191	if ((len = ch_length()) != NULL_POSITION && pos >= len)
192		/*
193		 * At end of file.
194		 */
195		return (EOI);
196
197	if (pos != ch_fpos)
198	{
199		/*
200		 * Not at the correct position: must seek.
201		 * If input is a pipe, we're in trouble (can't seek on a pipe).
202		 * Some data has been lost: just return "?".
203		 */
204		if (!(ch_flags & CH_CANSEEK))
205			return ('?');
206		if (lseek(ch_file, (off_t)pos, SEEK_SET) == BAD_LSEEK)
207		{
208 			error("seek error", NULL_PARG);
209			clear_eol();
210			return (EOI);
211 		}
212 		ch_fpos = pos;
213 	}
214
215	/*
216	 * Read the block.
217	 * If we read less than a full block, that's ok.
218	 * We use partial block and pick up the rest next time.
219	 */
220	if (ch_ungotchar != -1)
221	{
222		bp->data[bp->datasize] = ch_ungotchar;
223		n = 1;
224		ch_ungotchar = -1;
225	} else if (ch_flags & CH_HELPFILE)
226	{
227		bp->data[bp->datasize] = helpdata[ch_fpos];
228		n = 1;
229	} else
230	{
231		n = iread(ch_file, &bp->data[bp->datasize],
232			(unsigned int)(LBUFSIZE - bp->datasize));
233	}
234
235	if (n == READ_INTR)
236		return (EOI);
237	if (n < 0)
238	{
239#if MSDOS_COMPILER==WIN32C
240		if (errno != EPIPE)
241#endif
242		{
243			error("read error", NULL_PARG);
244			clear_eol();
245		}
246		n = 0;
247	}
248
249#if LOGFILE
250	/*
251	 * If we have a log file, write the new data to it.
252	 */
253	if (!secure && logfile >= 0 && n > 0)
254		write(logfile, (char *) &bp->data[bp->datasize], n);
255#endif
256
257	ch_fpos += n;
258	bp->datasize += n;
259
260	/*
261	 * If we have read to end of file, set ch_fsize to indicate
262	 * the position of the end of file.
263	 */
264	if (n == 0)
265	{
266		ch_fsize = pos;
267		if (ignore_eoi)
268		{
269			/*
270			 * We are ignoring EOF.
271			 * Wait a while, then try again.
272			 */
273			if (!slept)
274			{
275				PARG parg;
276				parg.p_string = wait_message();
277				ierror("%s", &parg);
278			}
279#if !MSDOS_COMPILER
280	 		sleep(1);
281#else
282#if MSDOS_COMPILER==WIN32C
283			Sleep(1000);
284#endif
285#endif
286			slept = TRUE;
287
288#if HAVE_STAT_INO
289			if (follow_mode == FOLLOW_NAME)
290			{
291				/* See whether the file's i-number has changed.
292				 * If so, force the file to be closed and
293				 * reopened. */
294				struct stat st;
295				int r = stat(get_filename(curr_ifile), &st);
296				if (r == 0 && (st.st_ino != curr_ino ||
297					st.st_dev != curr_dev))
298				{
299					/* screen_trashed=2 causes
300					 * make_display to reopen the file. */
301					screen_trashed = 2;
302					return (EOI);
303				}
304			}
305#endif
306		}
307		if (sigs)
308			return (EOI);
309	}
310
311    found:
312	if (ch_bufhead != bp)
313	{
314		/*
315		 * Move the buffer to the head of the buffer chain.
316		 * This orders the buffer chain, most- to least-recently used.
317		 */
318		bp->next->prev = bp->prev;
319		bp->prev->next = bp->next;
320		bp->next = ch_bufhead;
321		bp->prev = END_OF_CHAIN;
322		ch_bufhead->prev = bp;
323		ch_bufhead = bp;
324
325		/*
326		 * Move to head of hash chain too.
327		 */
328		HASH_RM(bp);
329		HASH_INS(bp, h);
330	}
331
332	if (ch_offset >= bp->datasize)
333		/*
334		 * After all that, we still don't have enough data.
335		 * Go back and try again.
336		 */
337		goto read_more;
338
339	return (bp->data[ch_offset]);
340}
341
342/*
343 * ch_ungetchar is a rather kludgy and limited way to push
344 * a single char onto an input file descriptor.
345 */
346	public void
347ch_ungetchar(c)
348	int c;
349{
350	if (c != -1 && ch_ungotchar != -1)
351		error("ch_ungetchar overrun", NULL_PARG);
352	ch_ungotchar = c;
353}
354
355#if LOGFILE
356/*
357 * Close the logfile.
358 * If we haven't read all of standard input into it, do that now.
359 */
360	public void
361end_logfile()
362{
363	static int tried = FALSE;
364
365	if (logfile < 0)
366		return;
367	if (!tried && ch_fsize == NULL_POSITION)
368	{
369		tried = TRUE;
370		ierror("Finishing logfile", NULL_PARG);
371		while (ch_forw_get() != EOI)
372			if (ABORT_SIGS())
373				break;
374	}
375	close(logfile);
376	logfile = -1;
377	namelogfile = NULL;
378}
379
380/*
381 * Start a log file AFTER less has already been running.
382 * Invoked from the - command; see toggle_option().
383 * Write all the existing buffered data to the log file.
384 */
385	public void
386sync_logfile()
387{
388	register struct buf *bp;
389	int warned = FALSE;
390	BLOCKNUM block;
391	BLOCKNUM nblocks;
392
393	nblocks = (ch_fpos + LBUFSIZE - 1) / LBUFSIZE;
394	for (block = 0;  block < nblocks;  block++)
395	{
396		for (bp = ch_bufhead;  ;  bp = bp->next)
397		{
398			if (bp == END_OF_CHAIN)
399			{
400				if (!warned)
401				{
402					error("Warning: log file is incomplete",
403						NULL_PARG);
404					warned = TRUE;
405				}
406				break;
407			}
408			if (bp->block == block)
409			{
410				write(logfile, (char *) bp->data, bp->datasize);
411				break;
412			}
413		}
414	}
415}
416
417#endif
418
419/*
420 * Determine if a specific block is currently in one of the buffers.
421 */
422	static int
423buffered(block)
424	BLOCKNUM block;
425{
426	register struct buf *bp;
427	register int h;
428
429	h = BUFHASH(block);
430	FOR_BUFS_IN_CHAIN(h, bp)
431	{
432		if (bp->block == block)
433			return (TRUE);
434	}
435	return (FALSE);
436}
437
438/*
439 * Seek to a specified position in the file.
440 * Return 0 if successful, non-zero if can't seek there.
441 */
442	public int
443ch_seek(pos)
444	register POSITION pos;
445{
446	BLOCKNUM new_block;
447	POSITION len;
448
449	if (thisfile == NULL)
450		return (0);
451
452	len = ch_length();
453	if (pos < ch_zero() || (len != NULL_POSITION && pos > len))
454		return (1);
455
456	new_block = pos / LBUFSIZE;
457	if (!(ch_flags & CH_CANSEEK) && pos != ch_fpos && !buffered(new_block))
458	{
459		if (ch_fpos > pos)
460			return (1);
461		while (ch_fpos < pos)
462		{
463			if (ch_forw_get() == EOI)
464				return (1);
465			if (ABORT_SIGS())
466				return (1);
467		}
468		return (0);
469	}
470	/*
471	 * Set read pointer.
472	 */
473	ch_block = new_block;
474	ch_offset = pos % LBUFSIZE;
475	return (0);
476}
477
478/*
479 * Seek to the end of the file.
480 */
481	public int
482ch_end_seek()
483{
484	POSITION len;
485
486	if (thisfile == NULL)
487		return (0);
488
489	if (ch_flags & CH_CANSEEK)
490		ch_fsize = filesize(ch_file);
491
492	len = ch_length();
493	if (len != NULL_POSITION)
494		return (ch_seek(len));
495
496	/*
497	 * Do it the slow way: read till end of data.
498	 */
499	while (ch_forw_get() != EOI)
500		if (ABORT_SIGS())
501			return (1);
502	return (0);
503}
504
505/*
506 * Seek to the beginning of the file, or as close to it as we can get.
507 * We may not be able to seek there if input is a pipe and the
508 * beginning of the pipe is no longer buffered.
509 */
510	public int
511ch_beg_seek()
512{
513	register struct buf *bp, *firstbp;
514
515	/*
516	 * Try a plain ch_seek first.
517	 */
518	if (ch_seek(ch_zero()) == 0)
519		return (0);
520
521	/*
522	 * Can't get to position 0.
523	 * Look thru the buffers for the one closest to position 0.
524	 */
525	firstbp = bp = ch_bufhead;
526	if (bp == END_OF_CHAIN)
527		return (1);
528	while ((bp = bp->next) != END_OF_CHAIN)
529		if (bp->block < firstbp->block)
530			firstbp = bp;
531	ch_block = firstbp->block;
532	ch_offset = 0;
533	return (0);
534}
535
536/*
537 * Return the length of the file, if known.
538 */
539	public POSITION
540ch_length()
541{
542	if (thisfile == NULL)
543		return (NULL_POSITION);
544	if (ignore_eoi)
545		return (NULL_POSITION);
546	if (ch_flags & CH_HELPFILE)
547		return (size_helpdata);
548	return (ch_fsize);
549}
550
551/*
552 * Return the current position in the file.
553 */
554	public POSITION
555ch_tell()
556{
557	if (thisfile == NULL)
558		return (NULL_POSITION);
559	return (ch_block * LBUFSIZE) + ch_offset;
560}
561
562/*
563 * Get the current char and post-increment the read pointer.
564 */
565	public int
566ch_forw_get()
567{
568	register int c;
569
570	if (thisfile == NULL)
571		return (EOI);
572	c = ch_get();
573	if (c == EOI)
574		return (EOI);
575	if (ch_offset < LBUFSIZE-1)
576		ch_offset++;
577	else
578	{
579		ch_block ++;
580		ch_offset = 0;
581	}
582	return (c);
583}
584
585/*
586 * Pre-decrement the read pointer and get the new current char.
587 */
588	public int
589ch_back_get()
590{
591	if (thisfile == NULL)
592		return (EOI);
593	if (ch_offset > 0)
594		ch_offset --;
595	else
596	{
597		if (ch_block <= 0)
598			return (EOI);
599		if (!(ch_flags & CH_CANSEEK) && !buffered(ch_block-1))
600			return (EOI);
601		ch_block--;
602		ch_offset = LBUFSIZE-1;
603	}
604	return (ch_get());
605}
606
607/*
608 * Set max amount of buffer space.
609 * bufspace is in units of 1024 bytes.  -1 mean no limit.
610 */
611	public void
612ch_setbufspace(bufspace)
613	int bufspace;
614{
615	if (bufspace < 0)
616		maxbufs = -1;
617	else
618	{
619		maxbufs = ((bufspace * 1024) + LBUFSIZE-1) / LBUFSIZE;
620		if (maxbufs < 1)
621			maxbufs = 1;
622	}
623}
624
625/*
626 * Flush (discard) any saved file state, including buffer contents.
627 */
628	public void
629ch_flush()
630{
631	register struct buf *bp;
632
633	if (thisfile == NULL)
634		return;
635
636	if (!(ch_flags & CH_CANSEEK))
637	{
638		/*
639		 * If input is a pipe, we don't flush buffer contents,
640		 * since the contents can't be recovered.
641		 */
642		ch_fsize = NULL_POSITION;
643		return;
644	}
645
646	/*
647	 * Initialize all the buffers.
648	 */
649	for (bp = ch_bufhead;  bp != END_OF_CHAIN;  bp = bp->next)
650		bp->block = -1;
651
652	/*
653	 * Figure out the size of the file, if we can.
654	 */
655	ch_fsize = filesize(ch_file);
656
657	/*
658	 * Seek to a known position: the beginning of the file.
659	 */
660	ch_fpos = 0;
661	ch_block = 0; /* ch_fpos / LBUFSIZE; */
662	ch_offset = 0; /* ch_fpos % LBUFSIZE; */
663
664#if 1
665	/*
666	 * This is a kludge to workaround a Linux kernel bug: files in
667	 * /proc have a size of 0 according to fstat() but have readable
668	 * data.  They are sometimes, but not always, seekable.
669	 * Force them to be non-seekable here.
670	 */
671	if (ch_fsize == 0)
672	{
673		ch_fsize = NULL_POSITION;
674		ch_flags &= ~CH_CANSEEK;
675	}
676#endif
677
678	if (lseek(ch_file, (off_t)0, SEEK_SET) == BAD_LSEEK)
679	{
680		/*
681		 * Warning only; even if the seek fails for some reason,
682		 * there's a good chance we're at the beginning anyway.
683		 * {{ I think this is bogus reasoning. }}
684		 */
685		error("seek error to 0", NULL_PARG);
686	}
687}
688
689/*
690 * Allocate a new buffer.
691 * The buffer is added to the tail of the buffer chain.
692 */
693	static int
694ch_addbuf()
695{
696	register struct buf *bp;
697
698	/*
699	 * Allocate and initialize a new buffer and link it
700	 * onto the tail of the buffer list.
701	 */
702	bp = (struct buf *) calloc(1, sizeof(struct buf));
703	if (bp == NULL)
704		return (1);
705	ch_nbufs++;
706	bp->block = -1;
707	bp->next = END_OF_CHAIN;
708	bp->prev = ch_buftail;
709	ch_buftail->next = bp;
710	ch_buftail = bp;
711	HASH_INS(bp, 0);
712	return (0);
713}
714
715/*
716 *
717 */
718	static void
719init_hashtbl()
720{
721	register int h;
722
723	for (h = 0;  h < BUFHASH_SIZE;  h++)
724	{
725		thisfile->hashtbl[h].buf_hnext = END_OF_HCHAIN(h);
726		thisfile->hashtbl[h].buf_hprev = END_OF_HCHAIN(h);
727	}
728}
729
730/*
731 * Delete all buffers for this file.
732 */
733	static void
734ch_delbufs()
735{
736	register struct buf *bp;
737
738	while (ch_bufhead != END_OF_CHAIN)
739	{
740		bp = ch_bufhead;
741		bp->next->prev = bp->prev;
742		bp->prev->next = bp->next;
743		free(bp);
744	}
745	ch_nbufs = 0;
746	init_hashtbl();
747}
748
749/*
750 * Is it possible to seek on a file descriptor?
751 */
752	public int
753seekable(f)
754	int f;
755{
756#if MSDOS_COMPILER
757	extern int fd0;
758	if (f == fd0 && !isatty(fd0))
759	{
760		/*
761		 * In MS-DOS, pipes are seekable.  Check for
762		 * standard input, and pretend it is not seekable.
763		 */
764		return (0);
765	}
766#endif
767	return (lseek(f, (off_t)1, SEEK_SET) != BAD_LSEEK);
768}
769
770/*
771 * Initialize file state for a new file.
772 */
773	public void
774ch_init(f, flags)
775	int f;
776	int flags;
777{
778	/*
779	 * See if we already have a filestate for this file.
780	 */
781	thisfile = (struct filestate *) get_filestate(curr_ifile);
782	if (thisfile == NULL)
783	{
784		/*
785		 * Allocate and initialize a new filestate.
786		 */
787		thisfile = (struct filestate *)
788				calloc(1, sizeof(struct filestate));
789		thisfile->buf_next = thisfile->buf_prev = END_OF_CHAIN;
790		thisfile->nbufs = 0;
791		thisfile->flags = 0;
792		thisfile->fpos = 0;
793		thisfile->block = 0;
794		thisfile->offset = 0;
795		thisfile->file = -1;
796		thisfile->fsize = NULL_POSITION;
797		ch_flags = flags;
798		init_hashtbl();
799		/*
800		 * Try to seek; set CH_CANSEEK if it works.
801		 */
802		if ((flags & CH_CANSEEK) && !seekable(f))
803			ch_flags &= ~CH_CANSEEK;
804		set_filestate(curr_ifile, (void *) thisfile);
805	}
806	if (thisfile->file == -1)
807		thisfile->file = f;
808	ch_flush();
809}
810
811/*
812 * Close a filestate.
813 */
814	public void
815ch_close()
816{
817	int keepstate = FALSE;
818
819	if (thisfile == NULL)
820		return;
821
822	if (ch_flags & (CH_CANSEEK|CH_POPENED|CH_HELPFILE))
823	{
824		/*
825		 * We can seek or re-open, so we don't need to keep buffers.
826		 */
827		ch_delbufs();
828	} else
829		keepstate = TRUE;
830	if (!(ch_flags & CH_KEEPOPEN))
831	{
832		/*
833		 * We don't need to keep the file descriptor open
834		 * (because we can re-open it.)
835		 * But don't really close it if it was opened via popen(),
836		 * because pclose() wants to close it.
837		 */
838		if (!(ch_flags & (CH_POPENED|CH_HELPFILE)))
839			close(ch_file);
840		ch_file = -1;
841	} else
842		keepstate = TRUE;
843	if (!keepstate)
844	{
845		/*
846		 * We don't even need to keep the filestate structure.
847		 */
848		free(thisfile);
849		thisfile = NULL;
850		set_filestate(curr_ifile, (void *) NULL);
851	}
852}
853
854/*
855 * Return ch_flags for the current file.
856 */
857	public int
858ch_getflags()
859{
860	if (thisfile == NULL)
861		return (0);
862	return (ch_flags);
863}
864
865#if 0
866	public void
867ch_dump(struct filestate *fs)
868{
869	struct buf *bp;
870	unsigned char *s;
871
872	if (fs == NULL)
873	{
874		printf(" --no filestate\n");
875		return;
876	}
877	printf(" file %d, flags %x, fpos %x, fsize %x, blk/off %x/%x\n",
878		fs->file, fs->flags, fs->fpos,
879		fs->fsize, fs->block, fs->offset);
880	printf(" %d bufs:\n", fs->nbufs);
881	for (bp = fs->buf_next; bp != (struct buf *)fs;  bp = bp->next)
882	{
883		printf("%x: blk %x, size %x \"",
884			bp, bp->block, bp->datasize);
885		for (s = bp->data;  s < bp->data + 30;  s++)
886			if (*s >= ' ' && *s < 0x7F)
887				printf("%c", *s);
888			else
889				printf(".");
890		printf("\"\n");
891	}
892}
893#endif
894