slave.c revision 1554
1/*-
2 * Copyright (c) 1985, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#ifndef lint
35static char sccsid[] = "@(#)slave.c	8.1 (Berkeley) 6/6/93";
36#endif /* not lint */
37
38#ifdef sgi
39#ident "$Revision: 1.20 $"
40#endif
41
42#include "globals.h"
43#include <setjmp.h>
44#include "pathnames.h"
45
46extern jmp_buf jmpenv;
47extern int Mflag;
48extern int justquit;
49
50extern u_short sequence;
51
52static char master_name[MAXHOSTNAMELEN+1];
53static struct netinfo *old_slavenet;
54static int old_status;
55
56static void schgdate __P((struct tsp *, char *));
57static void setmaster __P((struct tsp *));
58static void answerdelay __P((void));
59
60#ifdef sgi
61extern void logwtmp __P((struct timeval *, struct timeval *));
62#else
63extern void logwtmp __P((char *, char *, char *));
64#endif /* sgi */
65
66int
67slave()
68{
69	int tries;
70	long electiontime, refusetime, looktime, looptime, adjtime;
71	u_short seq;
72	long fastelection;
73#define FASTTOUT 3
74	struct in_addr cadr;
75	struct timeval otime;
76	struct sockaddr_in taddr;
77	char tname[MAXHOSTNAMELEN];
78	struct tsp *msg, to;
79	struct timeval ntime, wait;
80	struct tsp *answer;
81	int timeout();
82	char olddate[32];
83	char newdate[32];
84	struct netinfo *ntp;
85	struct hosttbl *htp;
86
87
88	old_slavenet = 0;
89	seq = 0;
90	refusetime = 0;
91	adjtime = 0;
92
93	(void)gettimeofday(&ntime, 0);
94	electiontime = ntime.tv_sec + delay2;
95	fastelection = ntime.tv_sec + FASTTOUT;
96	if (justquit)
97		looktime = electiontime;
98	else
99		looktime = fastelection;
100	looptime = fastelection;
101
102	if (slavenet)
103		xmit(TSP_SLAVEUP, 0, &slavenet->dest_addr);
104	if (status & MASTER) {
105		for (ntp = nettab; ntp != NULL; ntp = ntp->next) {
106			if (ntp->status == MASTER)
107				masterup(ntp);
108		}
109	}
110
111loop:
112	get_goodgroup(0);
113	(void)gettimeofday(&ntime, (struct timezone *)0);
114	if (ntime.tv_sec > electiontime) {
115		if (trace)
116			fprintf(fd, "election timer expired\n");
117		longjmp(jmpenv, 1);
118	}
119
120	if (ntime.tv_sec >= looktime) {
121		if (trace)
122			fprintf(fd, "Looking for nets to master\n");
123
124		if (Mflag && nignorednets > 0) {
125			for (ntp = nettab; ntp != NULL; ntp = ntp->next) {
126				if (ntp->status == IGNORE
127				    || ntp->status == NOMASTER) {
128					lookformaster(ntp);
129					if (ntp->status == MASTER) {
130						masterup(ntp);
131					} else if (ntp->status == MASTER) {
132						ntp->status = NOMASTER;
133					}
134				}
135				if (ntp->status == MASTER
136				    && --ntp->quit_count < 0)
137					ntp->quit_count = 0;
138			}
139			makeslave(slavenet);	/* prune extras */
140			setstatus();
141		}
142		(void)gettimeofday(&ntime, 0);
143		looktime = ntime.tv_sec + delay2;
144	}
145	if (ntime.tv_sec >= looptime) {
146		if (trace)
147			fprintf(fd, "Looking for loops\n");
148		for (ntp = nettab; ntp != NULL; ntp = ntp->next) {
149		    if (ntp->status == MASTER) {
150			to.tsp_type = TSP_LOOP;
151			to.tsp_vers = TSPVERSION;
152			to.tsp_seq = sequence++;
153			to.tsp_hopcnt = MAX_HOPCNT;
154			(void)strcpy(to.tsp_name, hostname);
155			bytenetorder(&to);
156			if (sendto(sock, (char *)&to, sizeof(struct tsp), 0,
157				   (struct sockaddr*)&ntp->dest_addr,
158				   sizeof(ntp->dest_addr)) < 0) {
159				trace_sendto_err(ntp->dest_addr.sin_addr);
160			}
161		    }
162		}
163		(void)gettimeofday(&ntime, 0);
164		looptime = ntime.tv_sec + delay2;
165	}
166
167	wait.tv_sec = min(electiontime,min(looktime,looptime)) - ntime.tv_sec;
168	if (wait.tv_sec < 0)
169		wait.tv_sec = 0;
170	wait.tv_sec += FASTTOUT;
171	wait.tv_usec = 0;
172	msg = readmsg(TSP_ANY, ANYADDR, &wait, 0);
173
174	if (msg != NULL) {
175		/*
176		 * filter stuff not for us
177		 */
178		switch (msg->tsp_type) {
179		case TSP_SETDATE:
180		case TSP_TRACEOFF:
181		case TSP_TRACEON:
182			/*
183			 * XXX check to see they are from ourself
184			 */
185			break;
186
187		case TSP_TEST:
188		case TSP_MSITE:
189			break;
190
191		case TSP_MASTERUP:
192			if (!fromnet) {
193				if (trace) {
194					fprintf(fd, "slave ignored: ");
195					print(msg, &from);
196				}
197				goto loop;
198			}
199			break;
200
201		default:
202			if (!fromnet
203			    || fromnet->status == IGNORE
204			    || fromnet->status == NOMASTER) {
205				if (trace) {
206					fprintf(fd, "slave ignored: ");
207					print(msg, &from);
208				}
209				goto loop;
210			}
211			break;
212		}
213
214
215		/*
216		 * now process the message
217		 */
218		switch (msg->tsp_type) {
219
220		case TSP_ADJTIME:
221			if (fromnet != slavenet)
222				break;
223			if (!good_host_name(msg->tsp_name)) {
224				syslog(LOG_NOTICE,
225				   "attempted time adjustment by %s",
226				       msg->tsp_name);
227				suppress(&from, msg->tsp_name, fromnet);
228				break;
229			}
230			/*
231			 * Speed up loop detection in case we have a loop.
232			 * Otherwise the clocks can race until the loop
233			 * is found.
234			 */
235			(void)gettimeofday(&otime, 0);
236			if (adjtime < otime.tv_sec)
237				looptime -= (looptime-otime.tv_sec)/2 + 1;
238
239			setmaster(msg);
240			if (seq != msg->tsp_seq) {
241				seq = msg->tsp_seq;
242				synch(tvtomsround(msg->tsp_time));
243			}
244			(void)gettimeofday(&ntime, 0);
245			electiontime = ntime.tv_sec + delay2;
246			fastelection = ntime.tv_sec + FASTTOUT;
247			adjtime = ntime.tv_sec + SAMPLEINTVL*2;
248			break;
249
250		case TSP_SETTIME:
251			if (fromnet != slavenet)
252				break;
253			if (seq == msg->tsp_seq)
254				break;
255			seq = msg->tsp_seq;
256
257			/* adjust time for residence on the queue */
258			(void)gettimeofday(&otime, 0);
259			adj_msg_time(msg,&otime);
260#ifdef sgi
261			(void)cftime(newdate, "%D %T", &msg->tsp_time.tv_sec);
262			(void)cftime(olddate, "%D %T", &otime.tv_sec);
263#else
264			/*
265			 * the following line is necessary due to syslog
266			 * calling ctime() which clobbers the static buffer
267			 */
268			(void)strcpy(olddate, date());
269			(void)strcpy(newdate, ctime(&msg->tsp_time.tv_sec));
270#endif /* sgi */
271
272			if (!good_host_name(msg->tsp_name)) {
273				syslog(LOG_NOTICE,
274			    "attempted time setting by untrusted %s to %s",
275				       msg->tsp_name, newdate);
276				suppress(&from, msg->tsp_name, fromnet);
277				break;
278			}
279
280			setmaster(msg);
281			timevalsub(&ntime, &msg->tsp_time, &otime);
282			if (ntime.tv_sec < MAXADJ && ntime.tv_sec > -MAXADJ) {
283				/*
284				 * do not change the clock if we can adjust it
285				 */
286				synch(tvtomsround(ntime));
287			} else {
288#ifdef sgi
289				if (0 > settimeofday(&msg->tsp_time, 0)) {
290					syslog(LOG_ERR,"settimeofdate(): %m");
291					break;
292				}
293				logwtmp(&otime, &msg->tsp_time);
294#else
295				logwtmp("|", "date", "");
296				(void)settimeofday(&msg->tsp_time, 0);
297				logwtmp("}", "date", "");
298#endif /* sgi */
299				syslog(LOG_NOTICE,
300				       "date changed by %s from %s",
301					msg->tsp_name, olddate);
302				if (status & MASTER)
303					spreadtime();
304			}
305			(void)gettimeofday(&ntime, 0);
306			electiontime = ntime.tv_sec + delay2;
307			fastelection = ntime.tv_sec + FASTTOUT;
308
309/* This patches a bad protocol bug.  Imagine a system with several networks,
310 * where there are a pair of redundant gateways between a pair of networks,
311 * each running timed.  Assume that we start with a third machine mastering
312 * one of the networks, and one of the gateways mastering the other.
313 * Imagine that the third machine goes away and the non-master gateway
314 * decides to replace it.  If things are timed just 'right,' we will have
315 * each gateway mastering one network for a little while.  If a SETTIME
316 * message gets into the network at that time, perhaps from the newly
317 * masterful gateway as it was taking control, the SETTIME will loop
318 * forever.  Each time a gateway receives it on its slave side, it will
319 * call spreadtime to forward it on its mastered network.  We are now in
320 * a permanent loop, since the SETTIME msgs will keep any clock
321 * in the network from advancing.  Normally, the 'LOOP' stuff will detect
322 * and correct the situation.  However, with the clocks stopped, the
323 * 'looptime' timer cannot expire.  While they are in this state, the
324 * masters will try to saturate the network with SETTIME packets.
325 */
326			looptime = ntime.tv_sec + (looptime-otime.tv_sec)/2-1;
327			break;
328
329		case TSP_MASTERUP:
330			if (slavenet && fromnet != slavenet)
331				break;
332			if (!good_host_name(msg->tsp_name)) {
333				suppress(&from, msg->tsp_name, fromnet);
334				if (electiontime > fastelection)
335					electiontime = fastelection;
336				break;
337			}
338			makeslave(fromnet);
339			setmaster(msg);
340			setstatus();
341			answerdelay();
342			xmit(TSP_SLAVEUP, 0, &from);
343			(void)gettimeofday(&ntime, 0);
344			electiontime = ntime.tv_sec + delay2;
345			fastelection = ntime.tv_sec + FASTTOUT;
346			refusetime = 0;
347			break;
348
349		case TSP_MASTERREQ:
350			if (fromnet->status != SLAVE)
351				break;
352			(void)gettimeofday(&ntime, 0);
353			electiontime = ntime.tv_sec + delay2;
354			break;
355
356		case TSP_SETDATE:
357#ifdef sgi
358			(void)cftime(newdate, "%D %T", &msg->tsp_time.tv_sec);
359#else
360			(void)strcpy(newdate, ctime(&msg->tsp_time.tv_sec));
361#endif /* sgi */
362			schgdate(msg, newdate);
363			break;
364
365		case TSP_SETDATEREQ:
366			if (fromnet->status != MASTER)
367				break;
368#ifdef sgi
369			(void)cftime(newdate, "%D %T", &msg->tsp_time.tv_sec);
370#else
371			(void)strcpy(newdate, ctime(&msg->tsp_time.tv_sec));
372#endif /* sgi */
373			htp = findhost(msg->tsp_name);
374			if (0 == htp) {
375				syslog(LOG_WARNING,
376				       "DATEREQ from uncontrolled machine");
377				break;
378			}
379			if (!htp->good) {
380				syslog(LOG_WARNING,
381				"attempted date change by untrusted %s to %s",
382				       htp->name, newdate);
383				spreadtime();
384				break;
385			}
386			schgdate(msg, newdate);
387			break;
388
389		case TSP_TRACEON:
390			traceon();
391			break;
392
393		case TSP_TRACEOFF:
394			traceoff("Tracing ended at %s\n");
395			break;
396
397		case TSP_SLAVEUP:
398			newslave(msg);
399			break;
400
401		case TSP_ELECTION:
402			if (fromnet->status == SLAVE) {
403				(void)gettimeofday(&ntime, 0);
404				electiontime = ntime.tv_sec + delay2;
405				fastelection = ntime.tv_sec + FASTTOUT;
406				seq = 0;
407				if (!good_host_name(msg->tsp_name)) {
408					syslog(LOG_NOTICE,
409					       "suppress election of %s",
410					       msg->tsp_name);
411					to.tsp_type = TSP_QUIT;
412					electiontime = fastelection;
413				} else if (cadr.s_addr != from.sin_addr.s_addr
414					   && ntime.tv_sec < refusetime) {
415/* if the candidate has to repeat itself, the old code would refuse it
416 * the second time.  That would prevent elections.
417 */
418					to.tsp_type = TSP_REFUSE;
419				} else {
420					cadr.s_addr = from.sin_addr.s_addr;
421					to.tsp_type = TSP_ACCEPT;
422					refusetime = ntime.tv_sec + 30;
423				}
424				taddr = from;
425				(void)strcpy(tname, msg->tsp_name);
426				(void)strcpy(to.tsp_name, hostname);
427				answerdelay();
428				if (!acksend(&to, &taddr, tname,
429					     TSP_ACK, 0, 0))
430					syslog(LOG_WARNING,
431					     "no answer from candidate %s\n",
432					       tname);
433
434			} else {	/* fromnet->status == MASTER */
435				htp = addmach(msg->tsp_name, &from,fromnet);
436				to.tsp_type = TSP_QUIT;
437				(void)strcpy(to.tsp_name, hostname);
438				if (!acksend(&to, &htp->addr, htp->name,
439					     TSP_ACK, 0, htp->noanswer)) {
440					syslog(LOG_ERR,
441					  "no reply from %s to ELECTION-QUIT",
442					       htp->name);
443					(void)remmach(htp);
444				}
445			}
446			break;
447
448		case TSP_CONFLICT:
449			if (fromnet->status != MASTER)
450				break;
451			/*
452			 * After a network partition, there can be
453			 * more than one master: the first slave to
454			 * come up will notify here the situation.
455			 */
456			(void)strcpy(to.tsp_name, hostname);
457
458			/* The other master often gets into the same state,
459			 * with boring results.
460			 */
461			ntp = fromnet;	/* (acksend() can leave fromnet=0 */
462			for (tries = 0; tries < 3; tries++) {
463				to.tsp_type = TSP_RESOLVE;
464				answer = acksend(&to, &ntp->dest_addr,
465						 ANYADDR, TSP_MASTERACK,
466						 ntp, 0);
467				if (answer == NULL)
468					break;
469				htp = addmach(answer->tsp_name,&from,ntp);
470				to.tsp_type = TSP_QUIT;
471				answer = acksend(&to, &htp->addr, htp->name,
472						 TSP_ACK, 0, htp->noanswer);
473				if (!answer) {
474					syslog(LOG_WARNING,
475				  "conflict error: no reply from %s to QUIT",
476						htp->name);
477					(void)remmach(htp);
478				}
479			}
480			masterup(ntp);
481			break;
482
483		case TSP_MSITE:
484			if (!slavenet)
485				break;
486			taddr = from;
487			to.tsp_type = TSP_MSITEREQ;
488			to.tsp_vers = TSPVERSION;
489			to.tsp_seq = 0;
490			(void)strcpy(to.tsp_name, hostname);
491			answer = acksend(&to, &slavenet->dest_addr,
492					 ANYADDR, TSP_ACK,
493					 slavenet, 0);
494			if (answer != NULL
495			    && good_host_name(answer->tsp_name)) {
496				setmaster(answer);
497				to.tsp_type = TSP_ACK;
498				(void)strcpy(to.tsp_name, answer->tsp_name);
499				bytenetorder(&to);
500				if (sendto(sock, (char *)&to,
501					   sizeof(struct tsp), 0,
502					   (struct sockaddr*)&taddr, sizeof(taddr)) < 0) {
503					trace_sendto_err(taddr.sin_addr);
504				}
505			}
506			break;
507
508		case TSP_MSITEREQ:
509			break;
510
511		case TSP_ACCEPT:
512		case TSP_REFUSE:
513		case TSP_RESOLVE:
514			break;
515
516		case TSP_QUIT:
517			doquit(msg);		/* become a slave */
518			break;
519
520		case TSP_TEST:
521			electiontime = 0;
522			break;
523
524		case TSP_LOOP:
525			/* looking for loops of masters */
526			if (!(status & MASTER))
527				break;
528			if (fromnet->status == SLAVE) {
529			    if (!strcmp(msg->tsp_name, hostname)) {
530				/*
531				 * Someone forwarded our message back to
532				 * us.  There must be a loop.  Tell the
533				 * master of this network to quit.
534				 *
535				 * The other master often gets into
536				 * the same state, with boring results.
537				 */
538				ntp = fromnet;
539				for (tries = 0; tries < 3; tries++) {
540				    to.tsp_type = TSP_RESOLVE;
541				    answer = acksend(&to, &ntp->dest_addr,
542						     ANYADDR, TSP_MASTERACK,
543						     ntp,0);
544				    if (answer == NULL)
545					break;
546				    taddr = from;
547				    (void)strcpy(tname, answer->tsp_name);
548				    to.tsp_type = TSP_QUIT;
549				    (void)strcpy(to.tsp_name, hostname);
550				    if (!acksend(&to, &taddr, tname,
551						 TSP_ACK, 0, 1)) {
552					syslog(LOG_ERR,
553					"no reply from %s to slave LOOP-QUIT",
554						 tname);
555				    } else {
556					electiontime = 0;
557				    }
558				}
559				(void)gettimeofday(&ntime, 0);
560				looptime = ntime.tv_sec + FASTTOUT;
561			    } else {
562				if (msg->tsp_hopcnt-- < 1)
563				    break;
564				bytenetorder(msg);
565				for (ntp = nettab; ntp != 0; ntp = ntp->next) {
566				    if (ntp->status == MASTER
567					&& 0 > sendto(sock, (char *)msg,
568						      sizeof(struct tsp), 0,
569					      (struct sockaddr*)&ntp->dest_addr,
570						      sizeof(ntp->dest_addr)))
571				    trace_sendto_err(ntp->dest_addr.sin_addr);
572				}
573			    }
574			} else {	/* fromnet->status == MASTER */
575			    /*
576			     * We should not have received this from a net
577			     * we are master on.  There must be two masters,
578			     * unless the packet was really from us.
579			     */
580			    if (from.sin_addr.s_addr
581				== fromnet->my_addr.s_addr) {
582				if (trace)
583				    fprintf(fd,"discarding forwarded LOOP\n");
584				break;
585			    }
586
587			    /*
588			     * The other master often gets into the same
589			     * state, with boring results.
590			     */
591			    ntp = fromnet;
592			    for (tries = 0; tries < 3; tries++) {
593				to.tsp_type = TSP_RESOLVE;
594				answer = acksend(&to, &ntp->dest_addr,
595						 ANYADDR, TSP_MASTERACK,
596						ntp,0);
597				if (!answer)
598					break;
599				htp = addmach(answer->tsp_name,
600					      &from,ntp);
601				to.tsp_type = TSP_QUIT;
602				(void)strcpy(to.tsp_name, hostname);
603				if (!acksend(&to,&htp->addr,htp->name,
604					     TSP_ACK, 0, htp->noanswer)) {
605					syslog(LOG_ERR,
606				    "no reply from %s to master LOOP-QUIT",
607					       htp->name);
608					(void)remmach(htp);
609				}
610			    }
611			    (void)gettimeofday(&ntime, 0);
612			    looptime = ntime.tv_sec + FASTTOUT;
613			}
614			break;
615		default:
616			if (trace) {
617				fprintf(fd, "garbage message: ");
618				print(msg, &from);
619			}
620			break;
621		}
622	}
623	goto loop;
624}
625
626
627/*
628 * tell the world who our master is
629 */
630static void
631setmaster(msg)
632	struct tsp *msg;
633{
634	if (slavenet
635	    && (slavenet != old_slavenet
636		|| strcmp(msg->tsp_name, master_name)
637		|| old_status != status)) {
638		(void)strcpy(master_name, msg->tsp_name);
639		old_slavenet = slavenet;
640		old_status = status;
641
642		if (status & MASTER) {
643			syslog(LOG_NOTICE, "submaster to %s", master_name);
644			if (trace)
645				fprintf(fd, "submaster to %s\n", master_name);
646
647		} else {
648			syslog(LOG_NOTICE, "slave to %s", master_name);
649			if (trace)
650				fprintf(fd, "slave to %s\n", master_name);
651		}
652	}
653}
654
655
656
657/*
658 * handle date change request on a slave
659 */
660static void
661schgdate(msg, newdate)
662	struct tsp *msg;
663	char *newdate;
664{
665	struct tsp to;
666	u_short seq;
667	struct sockaddr_in taddr;
668	struct timeval otime;
669
670	if (!slavenet)
671		return;			/* no where to forward */
672
673	taddr = from;
674	seq = msg->tsp_seq;
675
676	syslog(LOG_INFO,
677	       "forwarding date change by %s to %s",
678	       msg->tsp_name, newdate);
679
680	/* adjust time for residence on the queue */
681	(void)gettimeofday(&otime, 0);
682	adj_msg_time(msg, &otime);
683
684	to.tsp_type = TSP_SETDATEREQ;
685	to.tsp_time = msg->tsp_time;
686	(void)strcpy(to.tsp_name, hostname);
687	if (!acksend(&to, &slavenet->dest_addr,
688		     ANYADDR, TSP_DATEACK,
689		     slavenet, 0))
690		return;			/* no answer */
691
692	xmit(TSP_DATEACK, seq, &taddr);
693}
694
695
696/*
697 * Used before answering a broadcast message to avoid network
698 * contention and likely collisions.
699 */
700static void
701answerdelay()
702{
703#ifdef sgi
704	sginap(delay1);
705#else
706	struct timeval timeout;
707
708	timeout.tv_sec = 0;
709	timeout.tv_usec = delay1;
710
711	(void)select(0, (fd_set *)NULL, (fd_set *)NULL, (fd_set *)NULL,
712	    &timeout);
713	return;
714#endif /* sgi */
715}
716