1/*
2 * Socket and pipe I/O utilities used in rsync.
3 *
4 * Copyright (C) 1996-2001 Andrew Tridgell
5 * Copyright (C) 1996 Paul Mackerras
6 * Copyright (C) 2001, 2002 Martin Pool <mbp@samba.org>
7 * Copyright (C) 2003, 2004, 2005, 2006 Wayne Davison
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write to the Free Software Foundation, Inc.,
21 * 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
22 */
23
24/* Rsync provides its own multiplexing system, which is used to send
25 * stderr and stdout over a single socket.
26 *
27 * For historical reasons this is off during the start of the
28 * connection, but it's switched on quite early using
29 * io_start_multiplex_out() and io_start_multiplex_in(). */
30
31#include "rsync.h"
32/** If no timeout is specified then use a 60 second select timeout */
33#define SELECT_TIMEOUT 60
34
35extern int bwlimit;
36extern size_t bwlimit_writemax;
37extern int io_timeout;
38extern int allowed_lull;
39extern int am_server;
40extern int am_daemon;
41extern int am_sender;
42extern int am_generator;
43extern int eol_nulls;
44extern int read_batch;
45extern int csum_length;
46extern int checksum_seed;
47extern int protocol_version;
48extern int remove_source_files;
49extern int preserve_hard_links;
50extern char *filesfrom_host;
51extern struct stats stats;
52extern struct file_list *the_file_list;
53
54const char phase_unknown[] = "unknown";
55int ignore_timeout = 0;
56int batch_fd = -1;
57int batch_gen_fd = -1;
58
59/* Ignore an EOF error if non-zero. See whine_about_eof(). */
60int kluge_around_eof = 0;
61
62int msg_fd_in = -1;
63int msg_fd_out = -1;
64int sock_f_in = -1;
65int sock_f_out = -1;
66
67static int io_multiplexing_out;
68static int io_multiplexing_in;
69static time_t last_io_in;
70static time_t last_io_out;
71static int no_flush;
72
73static int write_batch_monitor_in = -1;
74static int write_batch_monitor_out = -1;
75
76static int io_filesfrom_f_in = -1;
77static int io_filesfrom_f_out = -1;
78static char io_filesfrom_buf[2048];
79static char *io_filesfrom_bp;
80static char io_filesfrom_lastchar;
81static int io_filesfrom_buflen;
82static int defer_forwarding_messages = 0;
83static int select_timeout = SELECT_TIMEOUT;
84static int active_filecnt = 0;
85static OFF_T active_bytecnt = 0;
86
87static void read_loop(int fd, char *buf, size_t len);
88
89struct flist_ndx_item {
90	struct flist_ndx_item *next;
91	int ndx;
92};
93
94struct flist_ndx_list {
95	struct flist_ndx_item *head, *tail;
96};
97
98static struct flist_ndx_list redo_list, hlink_list;
99
100struct msg_list_item {
101	struct msg_list_item *next;
102	int len;
103	char buf[1];
104};
105
106struct msg_list {
107	struct msg_list_item *head, *tail;
108};
109
110static struct msg_list msg2genr, msg2sndr;
111
112static void flist_ndx_push(struct flist_ndx_list *lp, int ndx)
113{
114	struct flist_ndx_item *item;
115
116	if (!(item = new(struct flist_ndx_item)))
117		out_of_memory("flist_ndx_push");
118	item->next = NULL;
119	item->ndx = ndx;
120	if (lp->tail)
121		lp->tail->next = item;
122	else
123		lp->head = item;
124	lp->tail = item;
125}
126
127static int flist_ndx_pop(struct flist_ndx_list *lp)
128{
129	struct flist_ndx_item *next;
130	int ndx;
131
132	if (!lp->head)
133		return -1;
134
135	ndx = lp->head->ndx;
136	next = lp->head->next;
137	free(lp->head);
138	lp->head = next;
139	if (!next)
140		lp->tail = NULL;
141
142	return ndx;
143}
144
145static void check_timeout(void)
146{
147	time_t t;
148
149	if (!io_timeout || ignore_timeout)
150		return;
151
152	if (!last_io_in) {
153		last_io_in = time(NULL);
154		return;
155	}
156
157	t = time(NULL);
158
159	if (t - last_io_in >= io_timeout) {
160		if (!am_server && !am_daemon) {
161			rprintf(FERROR, "io timeout after %d seconds -- exiting\n",
162				(int)(t-last_io_in));
163		}
164		exit_cleanup(RERR_TIMEOUT);
165	}
166}
167
168/* Note the fds used for the main socket (which might really be a pipe
169 * for a local transfer, but we can ignore that). */
170void io_set_sock_fds(int f_in, int f_out)
171{
172	sock_f_in = f_in;
173	sock_f_out = f_out;
174}
175
176void set_io_timeout(int secs)
177{
178	io_timeout = secs;
179
180	if (!io_timeout || io_timeout > SELECT_TIMEOUT)
181		select_timeout = SELECT_TIMEOUT;
182	else
183		select_timeout = io_timeout;
184
185	allowed_lull = read_batch ? 0 : (io_timeout + 1) / 2;
186}
187
188/* Setup the fd used to receive MSG_* messages.  Only needed during the
189 * early stages of being a local sender (up through the sending of the
190 * file list) or when we're the generator (to fetch the messages from
191 * the receiver). */
192void set_msg_fd_in(int fd)
193{
194	msg_fd_in = fd;
195}
196
197/* Setup the fd used to send our MSG_* messages.  Only needed when
198 * we're the receiver (to send our messages to the generator). */
199void set_msg_fd_out(int fd)
200{
201	msg_fd_out = fd;
202	set_nonblocking(msg_fd_out);
203}
204
205/* Add a message to the pending MSG_* list. */
206static void msg_list_add(struct msg_list *lst, int code, char *buf, int len)
207{
208	struct msg_list_item *m;
209	int sz = len + 4 + sizeof m[0] - 1;
210
211	if (!(m = (struct msg_list_item *)new_array(char, sz)))
212		out_of_memory("msg_list_add");
213	m->next = NULL;
214	m->len = len + 4;
215	SIVAL(m->buf, 0, ((code+MPLEX_BASE)<<24) | len);
216	memcpy(m->buf + 4, buf, len);
217	if (lst->tail)
218		lst->tail->next = m;
219	else
220		lst->head = m;
221	lst->tail = m;
222}
223
224/* Read a message from the MSG_* fd and handle it.  This is called either
225 * during the early stages of being a local sender (up through the sending
226 * of the file list) or when we're the generator (to fetch the messages
227 * from the receiver). */
228static void read_msg_fd(void)
229{
230	char buf[2048];
231	size_t n;
232	int fd = msg_fd_in;
233	int tag, len;
234
235	/* Temporarily disable msg_fd_in.  This is needed to avoid looping back
236	 * to this routine from writefd_unbuffered(). */
237	msg_fd_in = -1;
238
239	memset(buf, 0, 4);
240	read_loop(fd, buf, 4);
241	tag = IVAL(buf, 0);
242
243	len = tag & 0xFFFFFF;
244	tag = (tag >> 24) - MPLEX_BASE;
245
246	switch (tag) {
247	case MSG_DONE:
248		if (len != 0 || !am_generator) {
249			rprintf(FERROR, "invalid message %d:%d\n", tag, len);
250			exit_cleanup(RERR_STREAMIO);
251		}
252		flist_ndx_push(&redo_list, -1);
253		break;
254	case MSG_REDO:
255		if (len != 4 || !am_generator) {
256			rprintf(FERROR, "invalid message %d:%d\n", tag, len);
257			exit_cleanup(RERR_STREAMIO);
258		}
259		read_loop(fd, buf, 4);
260		if (remove_source_files)
261			decrement_active_files(IVAL(buf,0));
262		flist_ndx_push(&redo_list, IVAL(buf,0));
263		break;
264	case MSG_DELETED:
265		if (len >= (int)sizeof buf || !am_generator) {
266			rprintf(FERROR, "invalid message %d:%d\n", tag, len);
267			exit_cleanup(RERR_STREAMIO);
268		}
269		read_loop(fd, buf, len);
270		send_msg(MSG_DELETED, buf, len);
271		break;
272	case MSG_SUCCESS:
273		if (len != 4 || !am_generator) {
274			rprintf(FERROR, "invalid message %d:%d\n", tag, len);
275			exit_cleanup(RERR_STREAMIO);
276		}
277		read_loop(fd, buf, len);
278		if (remove_source_files) {
279			decrement_active_files(IVAL(buf,0));
280			send_msg(MSG_SUCCESS, buf, len);
281		}
282		if (preserve_hard_links)
283			flist_ndx_push(&hlink_list, IVAL(buf,0));
284		break;
285	case MSG_SOCKERR:
286		if (!am_generator) {
287			rprintf(FERROR, "invalid message %d:%d\n", tag, len);
288			exit_cleanup(RERR_STREAMIO);
289		}
290		close_multiplexing_out();
291		/* FALL THROUGH */
292	case MSG_INFO:
293	case MSG_ERROR:
294	case MSG_LOG:
295		while (len) {
296			n = len;
297			if (n >= sizeof buf)
298				n = sizeof buf - 1;
299			read_loop(fd, buf, n);
300			rwrite(tag, buf, n);
301			len -= n;
302		}
303		break;
304	default:
305		rprintf(FERROR, "unknown message %d:%d [%s]\n",
306			tag, len, who_am_i());
307		exit_cleanup(RERR_STREAMIO);
308	}
309
310	msg_fd_in = fd;
311}
312
313/* This is used by the generator to limit how many file transfers can
314 * be active at once when --remove-source-files is specified.  Without
315 * this, sender-side deletions were mostly happening at the end. */
316void increment_active_files(int ndx, int itemizing, enum logcode code)
317{
318	/* TODO: tune these limits? */
319	while (active_filecnt >= (active_bytecnt >= 128*1024 ? 10 : 50)) {
320		if (hlink_list.head)
321			check_for_finished_hlinks(itemizing, code);
322		read_msg_fd();
323	}
324
325	active_filecnt++;
326	active_bytecnt += the_file_list->files[ndx]->length;
327}
328
329void decrement_active_files(int ndx)
330{
331	active_filecnt--;
332	active_bytecnt -= the_file_list->files[ndx]->length;
333}
334
335/* Try to push messages off the list onto the wire.  If we leave with more
336 * to do, return 0.  On error, return -1.  If everything flushed, return 1.
337 * This is only active in the receiver. */
338static int msg2genr_flush(int flush_it_all)
339{
340	static int written = 0;
341	struct timeval tv;
342	fd_set fds;
343
344	if (msg_fd_out < 0)
345		return -1;
346
347	while (msg2genr.head) {
348		struct msg_list_item *m = msg2genr.head;
349		int n = write(msg_fd_out, m->buf + written, m->len - written);
350		if (n < 0) {
351			if (errno == EINTR)
352				continue;
353			if (errno != EWOULDBLOCK && errno != EAGAIN)
354				return -1;
355			if (!flush_it_all)
356				return 0;
357			FD_ZERO(&fds);
358			FD_SET(msg_fd_out, &fds);
359			tv.tv_sec = select_timeout;
360			tv.tv_usec = 0;
361			if (!select(msg_fd_out+1, NULL, &fds, NULL, &tv))
362				check_timeout();
363		} else if ((written += n) == m->len) {
364			msg2genr.head = m->next;
365			if (!msg2genr.head)
366				msg2genr.tail = NULL;
367			free(m);
368			written = 0;
369		}
370	}
371	return 1;
372}
373
374int send_msg(enum msgcode code, char *buf, int len)
375{
376	if (msg_fd_out < 0) {
377		if (!defer_forwarding_messages)
378			return io_multiplex_write(code, buf, len);
379		if (!io_multiplexing_out)
380			return 0;
381		msg_list_add(&msg2sndr, code, buf, len);
382		return 1;
383	}
384	msg_list_add(&msg2genr, code, buf, len);
385	msg2genr_flush(NORMAL_FLUSH);
386	return 1;
387}
388
389int get_redo_num(int itemizing, enum logcode code)
390{
391	while (1) {
392		if (hlink_list.head)
393			check_for_finished_hlinks(itemizing, code);
394		if (redo_list.head)
395			break;
396		read_msg_fd();
397	}
398
399	return flist_ndx_pop(&redo_list);
400}
401
402int get_hlink_num(void)
403{
404	return flist_ndx_pop(&hlink_list);
405}
406
407/**
408 * When we're the receiver and we have a local --files-from list of names
409 * that needs to be sent over the socket to the sender, we have to do two
410 * things at the same time: send the sender a list of what files we're
411 * processing and read the incoming file+info list from the sender.  We do
412 * this by augmenting the read_timeout() function to copy this data.  It
413 * uses the io_filesfrom_buf to read a block of data from f_in (when it is
414 * ready, since it might be a pipe) and then blast it out f_out (when it
415 * is ready to receive more data).
416 */
417void io_set_filesfrom_fds(int f_in, int f_out)
418{
419	io_filesfrom_f_in = f_in;
420	io_filesfrom_f_out = f_out;
421	io_filesfrom_bp = io_filesfrom_buf;
422	io_filesfrom_lastchar = '\0';
423	io_filesfrom_buflen = 0;
424}
425
426/* It's almost always an error to get an EOF when we're trying to read from the
427 * network, because the protocol is (for the most part) self-terminating.
428 *
429 * There is one case for the receiver when it is at the end of the transfer
430 * (hanging around reading any keep-alive packets that might come its way): if
431 * the sender dies before the generator's kill-signal comes through, we can end
432 * up here needing to loop until the kill-signal arrives.  In this situation,
433 * kluge_around_eof will be < 0.
434 *
435 * There is another case for older protocol versions (< 24) where the module
436 * listing was not terminated, so we must ignore an EOF error in that case and
437 * exit.  In this situation, kluge_around_eof will be > 0. */
438static void whine_about_eof(int fd)
439{
440	if (kluge_around_eof && fd == sock_f_in) {
441		int i;
442		if (kluge_around_eof > 0)
443			exit_cleanup(0);
444		/* If we're still here after 10 seconds, exit with an error. */
445		for (i = 10*1000/20; i--; )
446			msleep(20);
447	}
448
449	rprintf(FERROR, RSYNC_NAME ": connection unexpectedly closed "
450		"(%.0f bytes received so far) [%s]\n",
451		(double)stats.total_read, who_am_i());
452
453	exit_cleanup(RERR_STREAMIO);
454}
455
456/**
457 * Read from a socket with I/O timeout. return the number of bytes
458 * read. If no bytes can be read then exit, never return a number <= 0.
459 *
460 * TODO: If the remote shell connection fails, then current versions
461 * actually report an "unexpected EOF" error here.  Since it's a
462 * fairly common mistake to try to use rsh when ssh is required, we
463 * should trap that: if we fail to read any data at all, we should
464 * give a better explanation.  We can tell whether the connection has
465 * started by looking e.g. at whether the remote version is known yet.
466 */
467static int read_timeout(int fd, char *buf, size_t len)
468{
469	int n, cnt = 0;
470
471	io_flush(NORMAL_FLUSH);
472
473	while (cnt == 0) {
474		/* until we manage to read *something* */
475		fd_set r_fds, w_fds;
476		struct timeval tv;
477		int maxfd = fd;
478		int count;
479
480		FD_ZERO(&r_fds);
481		FD_ZERO(&w_fds);
482		FD_SET(fd, &r_fds);
483		if (msg2genr.head) {
484			FD_SET(msg_fd_out, &w_fds);
485			if (msg_fd_out > maxfd)
486				maxfd = msg_fd_out;
487		}
488		if (io_filesfrom_f_out >= 0) {
489			int new_fd;
490			if (io_filesfrom_buflen == 0) {
491				if (io_filesfrom_f_in >= 0) {
492					FD_SET(io_filesfrom_f_in, &r_fds);
493					new_fd = io_filesfrom_f_in;
494				} else {
495					io_filesfrom_f_out = -1;
496					new_fd = -1;
497				}
498			} else {
499				FD_SET(io_filesfrom_f_out, &w_fds);
500				new_fd = io_filesfrom_f_out;
501			}
502			if (new_fd > maxfd)
503				maxfd = new_fd;
504		}
505
506		tv.tv_sec = select_timeout;
507		tv.tv_usec = 0;
508
509		errno = 0;
510
511		count = select(maxfd + 1, &r_fds, &w_fds, NULL, &tv);
512
513		if (count <= 0) {
514			if (errno == EBADF)
515				exit_cleanup(RERR_SOCKETIO);
516			check_timeout();
517			continue;
518		}
519
520		if (msg2genr.head && FD_ISSET(msg_fd_out, &w_fds))
521			msg2genr_flush(NORMAL_FLUSH);
522
523		if (io_filesfrom_f_out >= 0) {
524			if (io_filesfrom_buflen) {
525				if (FD_ISSET(io_filesfrom_f_out, &w_fds)) {
526					int l = write(io_filesfrom_f_out,
527						      io_filesfrom_bp,
528						      io_filesfrom_buflen);
529					if (l > 0) {
530						if (!(io_filesfrom_buflen -= l))
531							io_filesfrom_bp = io_filesfrom_buf;
532						else
533							io_filesfrom_bp += l;
534					} else {
535						/* XXX should we complain? */
536						io_filesfrom_f_out = -1;
537					}
538				}
539			} else if (io_filesfrom_f_in >= 0) {
540				if (FD_ISSET(io_filesfrom_f_in, &r_fds)) {
541					int l = read(io_filesfrom_f_in,
542						     io_filesfrom_buf,
543						     sizeof io_filesfrom_buf);
544					if (l <= 0) {
545						/* Send end-of-file marker */
546						io_filesfrom_buf[0] = '\0';
547						io_filesfrom_buf[1] = '\0';
548						io_filesfrom_buflen = io_filesfrom_lastchar? 2 : 1;
549						io_filesfrom_f_in = -1;
550					} else {
551						if (!eol_nulls) {
552							char *s = io_filesfrom_buf + l;
553							/* Transform CR and/or LF into '\0' */
554							while (s-- > io_filesfrom_buf) {
555								if (*s == '\n' || *s == '\r')
556									*s = '\0';
557							}
558						}
559						if (!io_filesfrom_lastchar) {
560							/* Last buf ended with a '\0', so don't
561							 * let this buf start with one. */
562							while (l && !*io_filesfrom_bp)
563								io_filesfrom_bp++, l--;
564						}
565						if (!l)
566							io_filesfrom_bp = io_filesfrom_buf;
567						else {
568							char *f = io_filesfrom_bp;
569							char *t = f;
570							char *eob = f + l;
571							/* Eliminate any multi-'\0' runs. */
572							while (f != eob) {
573								if (!(*t++ = *f++)) {
574									while (f != eob && !*f)
575										f++, l--;
576								}
577							}
578							io_filesfrom_lastchar = f[-1];
579						}
580						io_filesfrom_buflen = l;
581					}
582				}
583			}
584		}
585
586		if (!FD_ISSET(fd, &r_fds))
587			continue;
588
589		n = read(fd, buf, len);
590
591		if (n <= 0) {
592			if (n == 0)
593				whine_about_eof(fd); /* Doesn't return. */
594			if (errno == EINTR || errno == EWOULDBLOCK
595			    || errno == EAGAIN)
596				continue;
597
598			/* Don't write errors on a dead socket. */
599			if (fd == sock_f_in) {
600				close_multiplexing_out();
601				rsyserr(FSOCKERR, errno, "read error");
602			} else
603				rsyserr(FERROR, errno, "read error");
604			exit_cleanup(RERR_STREAMIO);
605		}
606
607		buf += n;
608		len -= n;
609		cnt += n;
610
611		if (fd == sock_f_in && io_timeout)
612			last_io_in = time(NULL);
613	}
614
615	return cnt;
616}
617
618/**
619 * Read a line into the "fname" buffer (which must be at least MAXPATHLEN
620 * characters long).
621 */
622int read_filesfrom_line(int fd, char *fname)
623{
624	char ch, *s, *eob = fname + MAXPATHLEN - 1;
625	int cnt;
626	int reading_remotely = filesfrom_host != NULL;
627	int nulls = eol_nulls || reading_remotely;
628
629  start:
630	s = fname;
631	while (1) {
632		cnt = read(fd, &ch, 1);
633		if (cnt < 0 && (errno == EWOULDBLOCK
634		  || errno == EINTR || errno == EAGAIN)) {
635			struct timeval tv;
636			fd_set r_fds, e_fds;
637			FD_ZERO(&r_fds);
638			FD_SET(fd, &r_fds);
639			FD_ZERO(&e_fds);
640			FD_SET(fd, &e_fds);
641			tv.tv_sec = select_timeout;
642			tv.tv_usec = 0;
643			if (!select(fd+1, &r_fds, NULL, &e_fds, &tv))
644				check_timeout();
645			if (FD_ISSET(fd, &e_fds)) {
646				rsyserr(FINFO, errno,
647					"select exception on fd %d", fd);
648			}
649			continue;
650		}
651		if (cnt != 1)
652			break;
653		if (nulls? !ch : (ch == '\r' || ch == '\n')) {
654			/* Skip empty lines if reading locally. */
655			if (!reading_remotely && s == fname)
656				continue;
657			break;
658		}
659		if (s < eob)
660			*s++ = ch;
661	}
662	*s = '\0';
663
664	/* Dump comments. */
665	if (*fname == '#' || *fname == ';')
666		goto start;
667
668	return s - fname;
669}
670
671static char *iobuf_out;
672static int iobuf_out_cnt;
673
674void io_start_buffering_out(void)
675{
676	if (iobuf_out)
677		return;
678	if (!(iobuf_out = new_array(char, IO_BUFFER_SIZE)))
679		out_of_memory("io_start_buffering_out");
680	iobuf_out_cnt = 0;
681}
682
683static char *iobuf_in;
684static size_t iobuf_in_siz;
685
686void io_start_buffering_in(void)
687{
688	if (iobuf_in)
689		return;
690	iobuf_in_siz = 2 * IO_BUFFER_SIZE;
691	if (!(iobuf_in = new_array(char, iobuf_in_siz)))
692		out_of_memory("io_start_buffering_in");
693}
694
695void io_end_buffering(void)
696{
697	io_flush(NORMAL_FLUSH);
698	if (!io_multiplexing_out) {
699		free(iobuf_out);
700		iobuf_out = NULL;
701	}
702}
703
704void maybe_flush_socket(void)
705{
706	if (iobuf_out && iobuf_out_cnt && time(NULL) - last_io_out >= 5)
707		io_flush(NORMAL_FLUSH);
708}
709
710void maybe_send_keepalive(void)
711{
712	if (time(NULL) - last_io_out >= allowed_lull) {
713		if (!iobuf_out || !iobuf_out_cnt) {
714			if (protocol_version < 29)
715				return; /* there's nothing we can do */
716			write_int(sock_f_out, the_file_list->count);
717			write_shortint(sock_f_out, ITEM_IS_NEW);
718		}
719		if (iobuf_out)
720			io_flush(NORMAL_FLUSH);
721	}
722}
723
724/**
725 * Continue trying to read len bytes - don't return until len has been
726 * read.
727 **/
728static void read_loop(int fd, char *buf, size_t len)
729{
730	while (len) {
731		int n = read_timeout(fd, buf, len);
732
733		buf += n;
734		len -= n;
735	}
736}
737
738/**
739 * Read from the file descriptor handling multiplexing - return number
740 * of bytes read.
741 *
742 * Never returns <= 0.
743 */
744static int readfd_unbuffered(int fd, char *buf, size_t len)
745{
746	static size_t remaining;
747	static size_t iobuf_in_ndx;
748	size_t msg_bytes;
749	int tag, cnt = 0;
750	char line[BIGPATHBUFLEN];
751
752	if (!iobuf_in || fd != sock_f_in)
753		return read_timeout(fd, buf, len);
754
755	if (!io_multiplexing_in && remaining == 0) {
756		remaining = read_timeout(fd, iobuf_in, iobuf_in_siz);
757		iobuf_in_ndx = 0;
758	}
759
760	while (cnt == 0) {
761		if (remaining) {
762			len = MIN(len, remaining);
763			memcpy(buf, iobuf_in + iobuf_in_ndx, len);
764			iobuf_in_ndx += len;
765			remaining -= len;
766			cnt = len;
767			break;
768		}
769
770		read_loop(fd, line, 4);
771		tag = IVAL(line, 0);
772
773		msg_bytes = tag & 0xFFFFFF;
774		tag = (tag >> 24) - MPLEX_BASE;
775
776		switch (tag) {
777		case MSG_DATA:
778			if (msg_bytes > iobuf_in_siz) {
779				if (!(iobuf_in = realloc_array(iobuf_in, char,
780							       msg_bytes)))
781					out_of_memory("readfd_unbuffered");
782				iobuf_in_siz = msg_bytes;
783			}
784			read_loop(fd, iobuf_in, msg_bytes);
785			remaining = msg_bytes;
786			iobuf_in_ndx = 0;
787			break;
788		case MSG_DELETED:
789			if (msg_bytes >= sizeof line)
790				goto overflow;
791			read_loop(fd, line, msg_bytes);
792			/* A directory name was sent with the trailing null */
793			if (msg_bytes > 0 && !line[msg_bytes-1])
794				log_delete(line, S_IFDIR);
795			else {
796				line[msg_bytes] = '\0';
797				log_delete(line, S_IFREG);
798			}
799			break;
800		case MSG_SUCCESS:
801			if (msg_bytes != 4) {
802				rprintf(FERROR, "invalid multi-message %d:%ld [%s]\n",
803					tag, (long)msg_bytes, who_am_i());
804				exit_cleanup(RERR_STREAMIO);
805			}
806			read_loop(fd, line, msg_bytes);
807			successful_send(IVAL(line, 0));
808			break;
809		case MSG_INFO:
810		case MSG_ERROR:
811			if (msg_bytes >= sizeof line) {
812			    overflow:
813				rprintf(FERROR,
814					"multiplexing overflow %d:%ld [%s]\n",
815					tag, (long)msg_bytes, who_am_i());
816				exit_cleanup(RERR_STREAMIO);
817			}
818			read_loop(fd, line, msg_bytes);
819			rwrite((enum logcode)tag, line, msg_bytes);
820			break;
821		default:
822			rprintf(FERROR, "unexpected tag %d [%s]\n",
823				tag, who_am_i());
824			exit_cleanup(RERR_STREAMIO);
825		}
826	}
827
828	if (remaining == 0)
829		io_flush(NORMAL_FLUSH);
830
831	return cnt;
832}
833
834/**
835 * Do a buffered read from @p fd.  Don't return until all @p n bytes
836 * have been read.  If all @p n can't be read then exit with an
837 * error.
838 **/
839static void readfd(int fd, char *buffer, size_t N)
840{
841	int  cnt;
842	size_t total = 0;
843
844	while (total < N) {
845		cnt = readfd_unbuffered(fd, buffer + total, N-total);
846		total += cnt;
847	}
848
849	if (fd == write_batch_monitor_in) {
850		if ((size_t)write(batch_fd, buffer, total) != total)
851			exit_cleanup(RERR_FILEIO);
852	}
853
854	if (fd == sock_f_in)
855		stats.total_read += total;
856}
857
858int read_shortint(int f)
859{
860	uchar b[2];
861	readfd(f, (char *)b, 2);
862	return (b[1] << 8) + b[0];
863}
864
865int32 read_int(int f)
866{
867	char b[4];
868	int32 num;
869
870	readfd(f,b,4);
871	num = IVAL(b,0);
872	if (num == (int32)0xffffffff)
873		return -1;
874	return num;
875}
876
877int64 read_longint(int f)
878{
879	int64 num;
880	char b[8];
881	num = read_int(f);
882
883	if ((int32)num != (int32)0xffffffff)
884		return num;
885
886#if SIZEOF_INT64 < 8
887	rprintf(FERROR, "Integer overflow: attempted 64-bit offset\n");
888	exit_cleanup(RERR_UNSUPPORTED);
889#else
890	readfd(f,b,8);
891	num = IVAL(b,0) | (((int64)IVAL(b,4))<<32);
892#endif
893
894	return num;
895}
896
897void read_buf(int f,char *buf,size_t len)
898{
899	readfd(f,buf,len);
900}
901
902void read_sbuf(int f,char *buf,size_t len)
903{
904	readfd(f, buf, len);
905	buf[len] = '\0';
906}
907
908uchar read_byte(int f)
909{
910	uchar c;
911	readfd(f, (char *)&c, 1);
912	return c;
913}
914
915int read_vstring(int f, char *buf, int bufsize)
916{
917	int len = read_byte(f);
918
919	if (len & 0x80)
920		len = (len & ~0x80) * 0x100 + read_byte(f);
921
922	if (len >= bufsize) {
923		rprintf(FERROR, "over-long vstring received (%d > %d)\n",
924			len, bufsize - 1);
925		return -1;
926	}
927
928	if (len)
929		readfd(f, buf, len);
930	buf[len] = '\0';
931	return len;
932}
933
934/* Populate a sum_struct with values from the socket.  This is
935 * called by both the sender and the receiver. */
936void read_sum_head(int f, struct sum_struct *sum)
937{
938	sum->count = read_int(f);
939	if (sum->count < 0) {
940		rprintf(FERROR, "Invalid checksum count %ld [%s]\n",
941			(long)sum->count, who_am_i());
942		exit_cleanup(RERR_PROTOCOL);
943	}
944	sum->blength = read_int(f);
945	if (sum->blength < 0 || sum->blength > MAX_BLOCK_SIZE) {
946		rprintf(FERROR, "Invalid block length %ld [%s]\n",
947			(long)sum->blength, who_am_i());
948		exit_cleanup(RERR_PROTOCOL);
949	}
950	sum->s2length = protocol_version < 27 ? csum_length : (int)read_int(f);
951	if (sum->s2length < 0 || sum->s2length > MD4_SUM_LENGTH) {
952		rprintf(FERROR, "Invalid checksum length %d [%s]\n",
953			sum->s2length, who_am_i());
954		exit_cleanup(RERR_PROTOCOL);
955	}
956	sum->remainder = read_int(f);
957	if (sum->remainder < 0 || sum->remainder > sum->blength) {
958		rprintf(FERROR, "Invalid remainder length %ld [%s]\n",
959			(long)sum->remainder, who_am_i());
960		exit_cleanup(RERR_PROTOCOL);
961	}
962}
963
964/* Send the values from a sum_struct over the socket.  Set sum to
965 * NULL if there are no checksums to send.  This is called by both
966 * the generator and the sender. */
967void write_sum_head(int f, struct sum_struct *sum)
968{
969	static struct sum_struct null_sum;
970
971	if (sum == NULL)
972		sum = &null_sum;
973
974	write_int(f, sum->count);
975	write_int(f, sum->blength);
976	if (protocol_version >= 27)
977		write_int(f, sum->s2length);
978	write_int(f, sum->remainder);
979}
980
981/**
982 * Sleep after writing to limit I/O bandwidth usage.
983 *
984 * @todo Rather than sleeping after each write, it might be better to
985 * use some kind of averaging.  The current algorithm seems to always
986 * use a bit less bandwidth than specified, because it doesn't make up
987 * for slow periods.  But arguably this is a feature.  In addition, we
988 * ought to take the time used to write the data into account.
989 *
990 * During some phases of big transfers (file FOO is uptodate) this is
991 * called with a small bytes_written every time.  As the kernel has to
992 * round small waits up to guarantee that we actually wait at least the
993 * requested number of microseconds, this can become grossly inaccurate.
994 * We therefore keep track of the bytes we've written over time and only
995 * sleep when the accumulated delay is at least 1 tenth of a second.
996 **/
997static void sleep_for_bwlimit(int bytes_written)
998{
999	static struct timeval prior_tv;
1000	static long total_written = 0;
1001	struct timeval tv, start_tv;
1002	long elapsed_usec, sleep_usec;
1003
1004#define ONE_SEC	1000000L /* # of microseconds in a second */
1005
1006	if (!bwlimit_writemax)
1007		return;
1008
1009	total_written += bytes_written;
1010
1011	gettimeofday(&start_tv, NULL);
1012	if (prior_tv.tv_sec) {
1013		elapsed_usec = (start_tv.tv_sec - prior_tv.tv_sec) * ONE_SEC
1014			     + (start_tv.tv_usec - prior_tv.tv_usec);
1015		total_written -= elapsed_usec * bwlimit / (ONE_SEC/1024);
1016		if (total_written < 0)
1017			total_written = 0;
1018	}
1019
1020	sleep_usec = total_written * (ONE_SEC/1024) / bwlimit;
1021	if (sleep_usec < ONE_SEC / 10) {
1022		prior_tv = start_tv;
1023		return;
1024	}
1025
1026	tv.tv_sec  = sleep_usec / ONE_SEC;
1027	tv.tv_usec = sleep_usec % ONE_SEC;
1028	select(0, NULL, NULL, NULL, &tv);
1029
1030	gettimeofday(&prior_tv, NULL);
1031	elapsed_usec = (prior_tv.tv_sec - start_tv.tv_sec) * ONE_SEC
1032		     + (prior_tv.tv_usec - start_tv.tv_usec);
1033	total_written = (sleep_usec - elapsed_usec) * bwlimit / (ONE_SEC/1024);
1034}
1035
1036/* Write len bytes to the file descriptor fd, looping as necessary to get
1037 * the job done and also (in certain circumstances) reading any data on
1038 * msg_fd_in to avoid deadlock.
1039 *
1040 * This function underlies the multiplexing system.  The body of the
1041 * application never calls this function directly. */
1042static void writefd_unbuffered(int fd,char *buf,size_t len)
1043{
1044	size_t n, total = 0;
1045	fd_set w_fds, r_fds, e_fds;
1046	int maxfd, count, cnt, using_r_fds;
1047	int defer_save = defer_forwarding_messages;
1048	struct timeval tv;
1049
1050	no_flush++;
1051
1052	while (total < len) {
1053		FD_ZERO(&w_fds);
1054		FD_SET(fd, &w_fds);
1055		FD_ZERO(&e_fds);
1056		FD_SET(fd, &e_fds);
1057		maxfd = fd;
1058
1059		if (msg_fd_in >= 0) {
1060			FD_ZERO(&r_fds);
1061			FD_SET(msg_fd_in, &r_fds);
1062			if (msg_fd_in > maxfd)
1063				maxfd = msg_fd_in;
1064			using_r_fds = 1;
1065		} else
1066			using_r_fds = 0;
1067
1068		tv.tv_sec = select_timeout;
1069		tv.tv_usec = 0;
1070
1071		errno = 0;
1072		count = select(maxfd + 1, using_r_fds ? &r_fds : NULL,
1073			       &w_fds, &e_fds, &tv);
1074
1075		if (count <= 0) {
1076			if (count < 0 && errno == EBADF)
1077				exit_cleanup(RERR_SOCKETIO);
1078			check_timeout();
1079			continue;
1080		}
1081
1082		if (FD_ISSET(fd, &e_fds)) {
1083			rsyserr(FINFO, errno,
1084				"select exception on fd %d", fd);
1085		}
1086
1087		if (using_r_fds && FD_ISSET(msg_fd_in, &r_fds))
1088			read_msg_fd();
1089
1090		if (!FD_ISSET(fd, &w_fds))
1091			continue;
1092
1093		n = len - total;
1094		if (bwlimit_writemax && n > bwlimit_writemax)
1095			n = bwlimit_writemax;
1096		cnt = write(fd, buf + total, n);
1097
1098		if (cnt <= 0) {
1099			if (cnt < 0) {
1100				if (errno == EINTR)
1101					continue;
1102				if (errno == EWOULDBLOCK || errno == EAGAIN) {
1103					msleep(1);
1104					continue;
1105				}
1106			}
1107
1108			/* Don't try to write errors back across the stream. */
1109			if (fd == sock_f_out)
1110				close_multiplexing_out();
1111			rsyserr(FERROR, errno,
1112				"writefd_unbuffered failed to write %ld bytes [%s]",
1113				(long)len, who_am_i());
1114			/* If the other side is sending us error messages, try
1115			 * to grab any messages they sent before they died. */
1116			while (fd == sock_f_out && io_multiplexing_in) {
1117				set_io_timeout(30);
1118				ignore_timeout = 0;
1119				readfd_unbuffered(sock_f_in, io_filesfrom_buf,
1120						  sizeof io_filesfrom_buf);
1121			}
1122			exit_cleanup(RERR_STREAMIO);
1123		}
1124
1125		total += cnt;
1126		defer_forwarding_messages = 1;
1127
1128		if (fd == sock_f_out) {
1129			if (io_timeout || am_generator)
1130				last_io_out = time(NULL);
1131			sleep_for_bwlimit(cnt);
1132		}
1133	}
1134
1135	defer_forwarding_messages = defer_save;
1136	no_flush--;
1137}
1138
1139static void msg2sndr_flush(void)
1140{
1141	if (defer_forwarding_messages)
1142		return;
1143
1144	while (msg2sndr.head && io_multiplexing_out) {
1145		struct msg_list_item *m = msg2sndr.head;
1146		if (!(msg2sndr.head = m->next))
1147			msg2sndr.tail = NULL;
1148		stats.total_written += m->len;
1149		defer_forwarding_messages = 1;
1150		writefd_unbuffered(sock_f_out, m->buf, m->len);
1151		defer_forwarding_messages = 0;
1152		free(m);
1153	}
1154}
1155
1156/**
1157 * Write an message to a multiplexed stream. If this fails then rsync
1158 * exits.
1159 **/
1160static void mplex_write(enum msgcode code, char *buf, size_t len)
1161{
1162	char buffer[1024];
1163	size_t n = len;
1164
1165	SIVAL(buffer, 0, ((MPLEX_BASE + (int)code)<<24) + len);
1166
1167	if (n > sizeof buffer - 4)
1168		n = 0;
1169	else
1170		memcpy(buffer + 4, buf, n);
1171
1172	writefd_unbuffered(sock_f_out, buffer, n+4);
1173
1174	len -= n;
1175	buf += n;
1176
1177	if (len) {
1178		defer_forwarding_messages = 1;
1179		writefd_unbuffered(sock_f_out, buf, len);
1180		defer_forwarding_messages = 0;
1181		msg2sndr_flush();
1182	}
1183}
1184
1185void io_flush(int flush_it_all)
1186{
1187	msg2genr_flush(flush_it_all);
1188	msg2sndr_flush();
1189
1190	if (!iobuf_out_cnt || no_flush)
1191		return;
1192
1193	if (io_multiplexing_out)
1194		mplex_write(MSG_DATA, iobuf_out, iobuf_out_cnt);
1195	else
1196		writefd_unbuffered(sock_f_out, iobuf_out, iobuf_out_cnt);
1197	iobuf_out_cnt = 0;
1198}
1199
1200static void writefd(int fd,char *buf,size_t len)
1201{
1202	if (fd == msg_fd_out) {
1203		rprintf(FERROR, "Internal error: wrong write used in receiver.\n");
1204		exit_cleanup(RERR_PROTOCOL);
1205	}
1206
1207	if (fd == sock_f_out)
1208		stats.total_written += len;
1209
1210	if (fd == write_batch_monitor_out) {
1211		if ((size_t)write(batch_fd, buf, len) != len)
1212			exit_cleanup(RERR_FILEIO);
1213	}
1214
1215	if (!iobuf_out || fd != sock_f_out) {
1216		writefd_unbuffered(fd, buf, len);
1217		return;
1218	}
1219
1220	while (len) {
1221		int n = MIN((int)len, IO_BUFFER_SIZE - iobuf_out_cnt);
1222		if (n > 0) {
1223			memcpy(iobuf_out+iobuf_out_cnt, buf, n);
1224			buf += n;
1225			len -= n;
1226			iobuf_out_cnt += n;
1227		}
1228
1229		if (iobuf_out_cnt == IO_BUFFER_SIZE)
1230			io_flush(NORMAL_FLUSH);
1231	}
1232}
1233
1234void write_shortint(int f, int x)
1235{
1236	uchar b[2];
1237	b[0] = x;
1238	b[1] = x >> 8;
1239	writefd(f, (char *)b, 2);
1240}
1241
1242void write_int(int f,int32 x)
1243{
1244	char b[4];
1245	SIVAL(b,0,x);
1246	writefd(f,b,4);
1247}
1248
1249/*
1250 * Note: int64 may actually be a 32-bit type if ./configure couldn't find any
1251 * 64-bit types on this platform.
1252 */
1253void write_longint(int f, int64 x)
1254{
1255	char b[8];
1256
1257	if (x <= 0x7FFFFFFF) {
1258		write_int(f, (int)x);
1259		return;
1260	}
1261
1262#if SIZEOF_INT64 < 8
1263	rprintf(FERROR, "Integer overflow: attempted 64-bit offset\n");
1264	exit_cleanup(RERR_UNSUPPORTED);
1265#else
1266	write_int(f, (int32)0xFFFFFFFF);
1267	SIVAL(b,0,(x&0xFFFFFFFF));
1268	SIVAL(b,4,((x>>32)&0xFFFFFFFF));
1269
1270	writefd(f,b,8);
1271#endif
1272}
1273
1274void write_buf(int f,char *buf,size_t len)
1275{
1276	writefd(f,buf,len);
1277}
1278
1279/** Write a string to the connection */
1280void write_sbuf(int f, char *buf)
1281{
1282	writefd(f, buf, strlen(buf));
1283}
1284
1285void write_byte(int f, uchar c)
1286{
1287	writefd(f, (char *)&c, 1);
1288}
1289
1290void write_vstring(int f, char *str, int len)
1291{
1292	uchar lenbuf[3], *lb = lenbuf;
1293
1294	if (len > 0x7F) {
1295		if (len > 0x7FFF) {
1296			rprintf(FERROR,
1297				"attempting to send over-long vstring (%d > %d)\n",
1298				len, 0x7FFF);
1299			exit_cleanup(RERR_PROTOCOL);
1300		}
1301		*lb++ = len / 0x100 + 0x80;
1302	}
1303	*lb = len;
1304
1305	writefd(f, (char*)lenbuf, lb - lenbuf + 1);
1306	if (len)
1307		writefd(f, str, len);
1308}
1309
1310/**
1311 * Read a line of up to @p maxlen characters into @p buf (not counting
1312 * the trailing null).  Strips the (required) trailing newline and all
1313 * carriage returns.
1314 *
1315 * @return 1 for success; 0 for I/O error or truncation.
1316 **/
1317int read_line(int f, char *buf, size_t maxlen)
1318{
1319	while (maxlen) {
1320		buf[0] = 0;
1321		read_buf(f, buf, 1);
1322		if (buf[0] == 0)
1323			return 0;
1324		if (buf[0] == '\n')
1325			break;
1326		if (buf[0] != '\r') {
1327			buf++;
1328			maxlen--;
1329		}
1330	}
1331	*buf = '\0';
1332	return maxlen > 0;
1333}
1334
1335void io_printf(int fd, const char *format, ...)
1336{
1337	va_list ap;
1338	char buf[BIGPATHBUFLEN];
1339	int len;
1340
1341	va_start(ap, format);
1342	len = vsnprintf(buf, sizeof buf, format, ap);
1343	va_end(ap);
1344
1345	if (len < 0)
1346		exit_cleanup(RERR_STREAMIO);
1347
1348	if (len > (int)sizeof buf) {
1349		rprintf(FERROR, "io_printf() was too long for the buffer.\n");
1350		exit_cleanup(RERR_STREAMIO);
1351	}
1352
1353	write_sbuf(fd, buf);
1354}
1355
1356/** Setup for multiplexing a MSG_* stream with the data stream. */
1357void io_start_multiplex_out(void)
1358{
1359	io_flush(NORMAL_FLUSH);
1360	io_start_buffering_out();
1361	io_multiplexing_out = 1;
1362}
1363
1364/** Setup for multiplexing a MSG_* stream with the data stream. */
1365void io_start_multiplex_in(void)
1366{
1367	io_flush(NORMAL_FLUSH);
1368	io_start_buffering_in();
1369	io_multiplexing_in = 1;
1370}
1371
1372/** Write an message to the multiplexed data stream. */
1373int io_multiplex_write(enum msgcode code, char *buf, size_t len)
1374{
1375	if (!io_multiplexing_out)
1376		return 0;
1377
1378	io_flush(NORMAL_FLUSH);
1379	stats.total_written += (len+4);
1380	mplex_write(code, buf, len);
1381	return 1;
1382}
1383
1384void close_multiplexing_in(void)
1385{
1386	io_multiplexing_in = 0;
1387}
1388
1389/** Stop output multiplexing. */
1390void close_multiplexing_out(void)
1391{
1392	io_multiplexing_out = 0;
1393}
1394
1395void start_write_batch(int fd)
1396{
1397	write_stream_flags(batch_fd);
1398
1399	/* Some communication has already taken place, but we don't
1400	 * enable batch writing until here so that we can write a
1401	 * canonical record of the communication even though the
1402	 * actual communication so far depends on whether a daemon
1403	 * is involved. */
1404	write_int(batch_fd, protocol_version);
1405	write_int(batch_fd, checksum_seed);
1406
1407	if (am_sender)
1408		write_batch_monitor_out = fd;
1409	else
1410		write_batch_monitor_in = fd;
1411}
1412
1413void stop_write_batch(void)
1414{
1415	write_batch_monitor_out = -1;
1416	write_batch_monitor_in = -1;
1417}
1418