ntp_proto.c revision 293896
1/*
2 * ntp_proto.c - NTP version 4 protocol machinery
3 *
4 * ATTENTION: Get approval from Dave Mills on all changes to this file!
5 *
6 */
7#ifdef HAVE_CONFIG_H
8#include <config.h>
9#endif
10
11#include "ntpd.h"
12#include "ntp_stdlib.h"
13#include "ntp_unixtime.h"
14#include "ntp_control.h"
15#include "ntp_string.h"
16#include "ntp_leapsec.h"
17#include "refidsmear.h"
18#include "lib_strbuf.h"
19
20#include <stdio.h>
21#ifdef HAVE_LIBSCF_H
22#include <libscf.h>
23#endif
24#ifdef HAVE_UNISTD_H
25#include <unistd.h>
26#endif
27
28/*
29 * This macro defines the authentication state. If x is 1 authentication
30 * is required; othewise it is optional.
31 */
32#define	AUTH(x, y)	((x) ? (y) == AUTH_OK \
33			     : (y) == AUTH_OK || (y) == AUTH_NONE)
34
35#define	AUTH_NONE	0	/* authentication not required */
36#define	AUTH_OK		1	/* authentication OK */
37#define	AUTH_ERROR	2	/* authentication error */
38#define	AUTH_CRYPTO	3	/* crypto_NAK */
39
40/*
41 * Set up Kiss Code values
42 */
43
44enum kiss_codes {
45	NOKISS,				/* No Kiss Code */
46	RATEKISS,			/* Rate limit Kiss Code */
47	DENYKISS,			/* Deny Kiss */
48	RSTRKISS,			/* Restricted Kiss */
49	XKISS,				/* Experimental Kiss */
50	UNKNOWNKISS			/* Unknown Kiss Code */
51};
52
53/*
54 * traffic shaping parameters
55 */
56#define	NTP_IBURST	6	/* packets in iburst */
57#define	RESP_DELAY	1	/* refclock burst delay (s) */
58
59/*
60 * pool soliciting restriction duration (s)
61 */
62#define	POOL_SOLICIT_WINDOW	8
63
64/*
65 * peer_select groups statistics for a peer used by clock_select() and
66 * clock_cluster().
67 */
68typedef struct peer_select_tag {
69	struct peer *	peer;
70	double		synch;	/* sync distance */
71	double		error;	/* jitter */
72	double		seljit;	/* selection jitter */
73} peer_select;
74
75/*
76 * System variables are declared here. Unless specified otherwise, all
77 * times are in seconds.
78 */
79u_char	sys_leap;		/* system leap indicator, use set_sys_leap() to change this */
80u_char	xmt_leap;		/* leap indicator sent in client requests, set up by set_sys_leap() */
81u_char	sys_stratum;		/* system stratum */
82s_char	sys_precision;		/* local clock precision (log2 s) */
83double	sys_rootdelay;		/* roundtrip delay to primary source */
84double	sys_rootdisp;		/* dispersion to primary source */
85u_int32 sys_refid;		/* reference id (network byte order) */
86l_fp	sys_reftime;		/* last update time */
87struct	peer *sys_peer;		/* current peer */
88
89#ifdef LEAP_SMEAR
90struct leap_smear_info leap_smear;
91#endif
92int leap_sec_in_progress;
93
94/*
95 * Rate controls. Leaky buckets are used to throttle the packet
96 * transmission rates in order to protect busy servers such as at NIST
97 * and USNO. There is a counter for each association and another for KoD
98 * packets. The association counter decrements each second, but not
99 * below zero. Each time a packet is sent the counter is incremented by
100 * a configurable value representing the average interval between
101 * packets. A packet is delayed as long as the counter is greater than
102 * zero. Note this does not affect the time value computations.
103 */
104/*
105 * Nonspecified system state variables
106 */
107int	sys_bclient;		/* broadcast client enable */
108double	sys_bdelay;		/* broadcast client default delay */
109int	sys_authenticate;	/* requre authentication for config */
110l_fp	sys_authdelay;		/* authentication delay */
111double	sys_offset;	/* current local clock offset */
112double	sys_mindisp = MINDISPERSE; /* minimum distance (s) */
113double	sys_maxdist = MAXDISTANCE; /* selection threshold */
114double	sys_jitter;		/* system jitter */
115u_long	sys_epoch;		/* last clock update time */
116static	double sys_clockhop;	/* clockhop threshold */
117static int leap_vote_ins;	/* leap consensus for insert */
118static int leap_vote_del;	/* leap consensus for delete */
119keyid_t	sys_private;		/* private value for session seed */
120int	sys_manycastserver;	/* respond to manycast client pkts */
121int	ntp_mode7;		/* respond to ntpdc (mode7) */
122int	peer_ntpdate;		/* active peers in ntpdate mode */
123int	sys_survivors;		/* truest of the truechimers */
124char	*sys_ident = NULL;	/* identity scheme */
125
126/*
127 * TOS and multicast mapping stuff
128 */
129int	sys_floor = 0;		/* cluster stratum floor */
130int	sys_ceiling = STRATUM_UNSPEC - 1; /* cluster stratum ceiling */
131int	sys_minsane = 1;	/* minimum candidates */
132int	sys_minclock = NTP_MINCLOCK; /* minimum candidates */
133int	sys_maxclock = NTP_MAXCLOCK; /* maximum candidates */
134int	sys_cohort = 0;		/* cohort switch */
135int	sys_orphan = STRATUM_UNSPEC + 1; /* orphan stratum */
136int	sys_orphwait = NTP_ORPHWAIT; /* orphan wait */
137int	sys_beacon = BEACON;	/* manycast beacon interval */
138int	sys_ttlmax;		/* max ttl mapping vector index */
139u_char	sys_ttl[MAX_TTL];	/* ttl mapping vector */
140
141/*
142 * Statistics counters - first the good, then the bad
143 */
144u_long	sys_stattime;		/* elapsed time */
145u_long	sys_received;		/* packets received */
146u_long	sys_processed;		/* packets for this host */
147u_long	sys_newversion;		/* current version */
148u_long	sys_oldversion;		/* old version */
149u_long	sys_restricted;		/* access denied */
150u_long	sys_badlength;		/* bad length or format */
151u_long	sys_badauth;		/* bad authentication */
152u_long	sys_declined;		/* declined */
153u_long	sys_limitrejected;	/* rate exceeded */
154u_long	sys_kodsent;		/* KoD sent */
155
156static int kiss_code_check(u_char hisleap, u_char hisstratum, u_char hismode, u_int32 refid);
157static	double	root_distance	(struct peer *);
158static	void	clock_combine	(peer_select *, int, int);
159static	void	peer_xmit	(struct peer *);
160static	void	fast_xmit	(struct recvbuf *, int, keyid_t, int);
161static	void	pool_xmit	(struct peer *);
162static	void	clock_update	(struct peer *);
163static	void	measure_precision(void);
164static	double	measure_tick_fuzz(void);
165static	int	local_refid	(struct peer *);
166static	int	peer_unfit	(struct peer *);
167#ifdef AUTOKEY
168static	int	group_test	(char *, char *);
169#endif /* AUTOKEY */
170#ifdef WORKER
171void	pool_name_resolved	(int, int, void *, const char *,
172				 const char *, const struct addrinfo *,
173				 const struct addrinfo *);
174#endif /* WORKER */
175
176const char *	amtoa		(int am);
177
178
179void
180set_sys_leap(
181	u_char new_sys_leap
182	)
183{
184	sys_leap = new_sys_leap;
185	xmt_leap = sys_leap;
186
187	/*
188	 * Under certain conditions we send faked leap bits to clients, so
189	 * eventually change xmt_leap below, but never change LEAP_NOTINSYNC.
190	 */
191	if (xmt_leap != LEAP_NOTINSYNC) {
192		if (leap_sec_in_progress) {
193			/* always send "not sync" */
194			xmt_leap = LEAP_NOTINSYNC;
195		}
196#ifdef LEAP_SMEAR
197		else {
198			/*
199			 * If leap smear is enabled in general we must
200			 * never send a leap second warning to clients,
201			 * so make sure we only send "in sync".
202			 */
203			if (leap_smear.enabled)
204				xmt_leap = LEAP_NOWARNING;
205		}
206#endif	/* LEAP_SMEAR */
207	}
208}
209
210
211/*
212 * Kiss Code check
213 */
214int
215kiss_code_check(
216	u_char hisleap,
217	u_char hisstratum,
218	u_char hismode,
219	u_int32 refid
220	)
221{
222
223	if (   hismode == MODE_SERVER
224	    && hisleap == LEAP_NOTINSYNC
225	    && hisstratum == STRATUM_UNSPEC) {
226		if(memcmp(&refid,"RATE", 4) == 0) {
227			return (RATEKISS);
228		} else if(memcmp(&refid,"DENY", 4) == 0) {
229			return (DENYKISS);
230		} else if(memcmp(&refid,"RSTR", 4) == 0) {
231			return (RSTRKISS);
232		} else if(memcmp(&refid,"X", 1) == 0) {
233			return (XKISS);
234		} else {
235			return (UNKNOWNKISS);
236		}
237	} else {
238		return (NOKISS);
239	}
240}
241
242
243/*
244 * transmit - transmit procedure called by poll timeout
245 */
246void
247transmit(
248	struct peer *peer	/* peer structure pointer */
249	)
250{
251	u_char	hpoll;
252
253	/*
254	 * The polling state machine. There are two kinds of machines,
255	 * those that never expect a reply (broadcast and manycast
256	 * server modes) and those that do (all other modes). The dance
257	 * is intricate...
258	 */
259	hpoll = peer->hpoll;
260
261	/*
262	 * In broadcast mode the poll interval is never changed from
263	 * minpoll.
264	 */
265	if (peer->cast_flags & (MDF_BCAST | MDF_MCAST)) {
266		peer->outdate = current_time;
267		if (sys_leap != LEAP_NOTINSYNC)
268			peer_xmit(peer);
269		poll_update(peer, hpoll);
270		return;
271	}
272
273	/*
274	 * In manycast mode we start with unity ttl. The ttl is
275	 * increased by one for each poll until either sys_maxclock
276	 * servers have been found or the maximum ttl is reached. When
277	 * sys_maxclock servers are found we stop polling until one or
278	 * more servers have timed out or until less than sys_minclock
279	 * associations turn up. In this case additional better servers
280	 * are dragged in and preempt the existing ones.  Once every
281	 * sys_beacon seconds we are to transmit unconditionally, but
282	 * this code is not quite right -- peer->unreach counts polls
283	 * and is being compared with sys_beacon, so the beacons happen
284	 * every sys_beacon polls.
285	 */
286	if (peer->cast_flags & MDF_ACAST) {
287		peer->outdate = current_time;
288		if (peer->unreach > sys_beacon) {
289			peer->unreach = 0;
290			peer->ttl = 0;
291			peer_xmit(peer);
292		} else if (   sys_survivors < sys_minclock
293			   || peer_associations < sys_maxclock) {
294			if (peer->ttl < (u_int32)sys_ttlmax)
295				peer->ttl++;
296			peer_xmit(peer);
297		}
298		peer->unreach++;
299		poll_update(peer, hpoll);
300		return;
301	}
302
303	/*
304	 * Pool associations transmit unicast solicitations when there
305	 * are less than a hard limit of 2 * sys_maxclock associations,
306	 * and either less than sys_minclock survivors or less than
307	 * sys_maxclock associations.  The hard limit prevents unbounded
308	 * growth in associations if the system clock or network quality
309	 * result in survivor count dipping below sys_minclock often.
310	 * This was observed testing with pool, where sys_maxclock == 12
311	 * resulted in 60 associations without the hard limit.  A
312	 * similar hard limit on manycastclient ephemeral associations
313	 * may be appropriate.
314	 */
315	if (peer->cast_flags & MDF_POOL) {
316		peer->outdate = current_time;
317		if (   (peer_associations <= 2 * sys_maxclock)
318		    && (   peer_associations < sys_maxclock
319			|| sys_survivors < sys_minclock))
320			pool_xmit(peer);
321		poll_update(peer, hpoll);
322		return;
323	}
324
325	/*
326	 * In unicast modes the dance is much more intricate. It is
327	 * designed to back off whenever possible to minimize network
328	 * traffic.
329	 */
330	if (peer->burst == 0) {
331		u_char oreach;
332
333		/*
334		 * Update the reachability status. If not heard for
335		 * three consecutive polls, stuff infinity in the clock
336		 * filter.
337		 */
338		oreach = peer->reach;
339		peer->outdate = current_time;
340		peer->unreach++;
341		peer->reach <<= 1;
342		if (!peer->reach) {
343
344			/*
345			 * Here the peer is unreachable. If it was
346			 * previously reachable raise a trap. Send a
347			 * burst if enabled.
348			 */
349			clock_filter(peer, 0., 0., MAXDISPERSE);
350			if (oreach) {
351				peer_unfit(peer);
352				report_event(PEVNT_UNREACH, peer, NULL);
353			}
354			if (   (peer->flags & FLAG_IBURST)
355			    && peer->retry == 0)
356				peer->retry = NTP_RETRY;
357		} else {
358
359			/*
360			 * Here the peer is reachable. Send a burst if
361			 * enabled and the peer is fit.  Reset unreach
362			 * for persistent and ephemeral associations.
363			 * Unreach is also reset for survivors in
364			 * clock_select().
365			 */
366			hpoll = sys_poll;
367			if (!(peer->flags & FLAG_PREEMPT))
368				peer->unreach = 0;
369			if (   (peer->flags & FLAG_BURST)
370			    && peer->retry == 0
371			    && !peer_unfit(peer))
372				peer->retry = NTP_RETRY;
373		}
374
375		/*
376		 * Watch for timeout.  If ephemeral, toss the rascal;
377		 * otherwise, bump the poll interval. Note the
378		 * poll_update() routine will clamp it to maxpoll.
379		 * If preemptible and we have more peers than maxclock,
380		 * and this peer has the minimum score of preemptibles,
381		 * demobilize.
382		 */
383		if (peer->unreach >= NTP_UNREACH) {
384			hpoll++;
385			/* ephemeral: no FLAG_CONFIG nor FLAG_PREEMPT */
386			if (!(peer->flags & (FLAG_CONFIG | FLAG_PREEMPT))) {
387				report_event(PEVNT_RESTART, peer, "timeout");
388				peer_clear(peer, "TIME");
389				unpeer(peer);
390				return;
391			}
392			if (   (peer->flags & FLAG_PREEMPT)
393			    && (peer_associations > sys_maxclock)
394			    && score_all(peer)) {
395				report_event(PEVNT_RESTART, peer, "timeout");
396				peer_clear(peer, "TIME");
397				unpeer(peer);
398				return;
399			}
400		}
401	} else {
402		peer->burst--;
403		if (peer->burst == 0) {
404
405			/*
406			 * If ntpdate mode and the clock has not been
407			 * set and all peers have completed the burst,
408			 * we declare a successful failure.
409			 */
410			if (mode_ntpdate) {
411				peer_ntpdate--;
412				if (peer_ntpdate == 0) {
413					msyslog(LOG_NOTICE,
414					    "ntpd: no servers found");
415					if (!msyslog_term)
416						printf(
417						    "ntpd: no servers found\n");
418					exit (0);
419				}
420			}
421		}
422	}
423	if (peer->retry > 0)
424		peer->retry--;
425
426	/*
427	 * Do not transmit if in broadcast client mode.
428	 */
429	if (peer->hmode != MODE_BCLIENT)
430		peer_xmit(peer);
431	poll_update(peer, hpoll);
432
433	return;
434}
435
436
437const char *
438amtoa(
439	int am
440	)
441{
442	char *bp;
443
444	switch(am) {
445	    case AM_ERR:	return "AM_ERR";
446	    case AM_NOMATCH:	return "AM_NOMATCH";
447	    case AM_PROCPKT:	return "AM_PROCPKT";
448	    case AM_BCST:	return "AM_BCST";
449	    case AM_FXMIT:	return "AM_FXMIT";
450	    case AM_MANYCAST:	return "AM_MANYCAST";
451	    case AM_NEWPASS:	return "AM_NEWPASS";
452	    case AM_NEWBCL:	return "AM_NEWBCL";
453	    case AM_POSSBCL:	return "AM_POSSBCL";
454	    default:
455		LIB_GETBUF(bp);
456		snprintf(bp, LIB_BUFLENGTH, "AM_#%d", am);
457		return bp;
458	}
459}
460
461
462/*
463 * receive - receive procedure called for each packet received
464 */
465void
466receive(
467	struct recvbuf *rbufp
468	)
469{
470	register struct peer *peer;	/* peer structure pointer */
471	register struct pkt *pkt;	/* receive packet pointer */
472	u_char	hisversion;		/* packet version */
473	u_char	hisleap;		/* packet leap indicator */
474	u_char	hismode;		/* packet mode */
475	u_char	hisstratum;		/* packet stratum */
476	u_short	restrict_mask;		/* restrict bits */
477	const char *hm_str;		/* hismode string */
478	const char *am_str;		/* association match string */
479	int	kissCode = NOKISS;	/* Kiss Code */
480	int	has_mac;		/* length of MAC field */
481	int	authlen;		/* offset of MAC field */
482	int	is_authentic = 0;	/* cryptosum ok */
483	int	retcode = AM_NOMATCH;	/* match code */
484	keyid_t	skeyid = 0;		/* key IDs */
485	u_int32	opcode = 0;		/* extension field opcode */
486	sockaddr_u *dstadr_sin;		/* active runway */
487	struct peer *peer2;		/* aux peer structure pointer */
488	endpt	*match_ep;		/* newpeer() local address */
489	l_fp	p_org;			/* origin timestamp */
490	l_fp	p_rec;			/* receive timestamp */
491	l_fp	p_xmt;			/* transmit timestamp */
492#ifdef AUTOKEY
493	char	hostname[NTP_MAXSTRLEN + 1];
494	char	*groupname = NULL;
495	struct autokey *ap;		/* autokey structure pointer */
496	int	rval;			/* cookie snatcher */
497	keyid_t	pkeyid = 0, tkeyid = 0;	/* key IDs */
498#endif	/* AUTOKEY */
499#ifdef HAVE_NTP_SIGND
500	static unsigned char zero_key[16];
501#endif /* HAVE_NTP_SIGND */
502
503	/*
504	 * Monitor the packet and get restrictions. Note that the packet
505	 * length for control and private mode packets must be checked
506	 * by the service routines. Some restrictions have to be handled
507	 * later in order to generate a kiss-o'-death packet.
508	 */
509	/*
510	 * Bogus port check is before anything, since it probably
511	 * reveals a clogging attack.
512	 */
513	sys_received++;
514	if (0 == SRCPORT(&rbufp->recv_srcadr)) {
515		sys_badlength++;
516		return;				/* bogus port */
517	}
518	restrict_mask = restrictions(&rbufp->recv_srcadr);
519	pkt = &rbufp->recv_pkt;
520	DPRINTF(2, ("receive: at %ld %s<-%s flags %x restrict %03x org %#010x.%08x xmt %#010x.%08x\n",
521		    current_time, stoa(&rbufp->dstadr->sin),
522		    stoa(&rbufp->recv_srcadr), rbufp->dstadr->flags,
523		    restrict_mask, ntohl(pkt->org.l_ui), ntohl(pkt->org.l_uf),
524		    ntohl(pkt->xmt.l_ui), ntohl(pkt->xmt.l_uf)));
525	hisversion = PKT_VERSION(pkt->li_vn_mode);
526	hisleap = PKT_LEAP(pkt->li_vn_mode);
527	hismode = (int)PKT_MODE(pkt->li_vn_mode);
528	hisstratum = PKT_TO_STRATUM(pkt->stratum);
529	if (restrict_mask & RES_IGNORE) {
530		sys_restricted++;
531		return;				/* ignore everything */
532	}
533	if (hismode == MODE_PRIVATE) {
534		if (!ntp_mode7 || (restrict_mask & RES_NOQUERY)) {
535			sys_restricted++;
536			return;			/* no query private */
537		}
538		process_private(rbufp, ((restrict_mask &
539		    RES_NOMODIFY) == 0));
540		return;
541	}
542	if (hismode == MODE_CONTROL) {
543		if (restrict_mask & RES_NOQUERY) {
544			sys_restricted++;
545			return;			/* no query control */
546		}
547		process_control(rbufp, restrict_mask);
548		return;
549	}
550	if (restrict_mask & RES_DONTSERVE) {
551		sys_restricted++;
552		return;				/* no time serve */
553	}
554
555	/*
556	 * This is for testing. If restricted drop ten percent of
557	 * surviving packets.
558	 */
559	if (restrict_mask & RES_FLAKE) {
560		if ((double)ntp_random() / 0x7fffffff < .1) {
561			sys_restricted++;
562			return;			/* no flakeway */
563		}
564	}
565
566	/*
567	 * Version check must be after the query packets, since they
568	 * intentionally use an early version.
569	 */
570	if (hisversion == NTP_VERSION) {
571		sys_newversion++;		/* new version */
572	} else if (   !(restrict_mask & RES_VERSION)
573		   && hisversion >= NTP_OLDVERSION) {
574		sys_oldversion++;		/* previous version */
575	} else {
576		sys_badlength++;
577		return;				/* old version */
578	}
579
580	/*
581	 * Figure out his mode and validate the packet. This has some
582	 * legacy raunch that probably should be removed. In very early
583	 * NTP versions mode 0 was equivalent to what later versions
584	 * would interpret as client mode.
585	 */
586	if (hismode == MODE_UNSPEC) {
587		if (hisversion == NTP_OLDVERSION) {
588			hismode = MODE_CLIENT;
589		} else {
590			sys_badlength++;
591			return;                 /* invalid mode */
592		}
593	}
594
595	/*
596	 * Parse the extension field if present. We figure out whether
597	 * an extension field is present by measuring the MAC size. If
598	 * the number of words following the packet header is 0, no MAC
599	 * is present and the packet is not authenticated. If 1, the
600	 * packet is a crypto-NAK; if 3, the packet is authenticated
601	 * with DES; if 5, the packet is authenticated with MD5; if 6,
602	 * the packet is authenticated with SHA. If 2 or * 4, the packet
603	 * is a runt and discarded forthwith. If greater than 6, an
604	 * extension field is present, so we subtract the length of the
605	 * field and go around again.
606	 */
607	authlen = LEN_PKT_NOMAC;
608	has_mac = rbufp->recv_length - authlen;
609	while (has_mac > 0) {
610		u_int32	len;
611#ifdef AUTOKEY
612		u_int32	hostlen;
613		struct exten *ep;
614#endif /*AUTOKEY */
615
616		if (has_mac % 4 != 0 || has_mac < (int)MIN_MAC_LEN) {
617			sys_badlength++;
618			return;			/* bad length */
619		}
620		if (has_mac <= (int)MAX_MAC_LEN) {
621			skeyid = ntohl(((u_int32 *)pkt)[authlen / 4]);
622			break;
623
624		} else {
625			opcode = ntohl(((u_int32 *)pkt)[authlen / 4]);
626			len = opcode & 0xffff;
627			if (   len % 4 != 0
628			    || len < 4
629			    || (int)len + authlen > rbufp->recv_length) {
630				sys_badlength++;
631				return;		/* bad length */
632			}
633#ifdef AUTOKEY
634			/*
635			 * Extract calling group name for later.  If
636			 * sys_groupname is non-NULL, there must be
637			 * a group name provided to elicit a response.
638			 */
639			if (   (opcode & 0x3fff0000) == CRYPTO_ASSOC
640			    && sys_groupname != NULL) {
641				ep = (struct exten *)&((u_int32 *)pkt)[authlen / 4];
642				hostlen = ntohl(ep->vallen);
643				if (   hostlen >= sizeof(hostname)
644				    || hostlen > len -
645						offsetof(struct exten, pkt)) {
646					sys_badlength++;
647					return;		/* bad length */
648				}
649				memcpy(hostname, &ep->pkt, hostlen);
650				hostname[hostlen] = '\0';
651				groupname = strchr(hostname, '@');
652				if (groupname == NULL) {
653					sys_declined++;
654					return;
655				}
656				groupname++;
657			}
658#endif /* AUTOKEY */
659			authlen += len;
660			has_mac -= len;
661		}
662	}
663
664	/*
665	 * If has_mac is < 0 we had a malformed packet.
666	 */
667	if (has_mac < 0) {
668		sys_badlength++;
669		return;		/* bad length */
670	}
671
672	/*
673	 * If authentication required, a MAC must be present.
674	 */
675	if (restrict_mask & RES_DONTTRUST && has_mac == 0) {
676		sys_restricted++;
677		return;				/* access denied */
678	}
679
680	/*
681	 * Update the MRU list and finger the cloggers. It can be a
682	 * little expensive, so turn it off for production use.
683	 * RES_LIMITED and RES_KOD will be cleared in the returned
684	 * restrict_mask unless one or both actions are warranted.
685	 */
686	restrict_mask = ntp_monitor(rbufp, restrict_mask);
687	if (restrict_mask & RES_LIMITED) {
688		sys_limitrejected++;
689		if (   !(restrict_mask & RES_KOD)
690		    || MODE_BROADCAST == hismode
691		    || MODE_SERVER == hismode) {
692			if (MODE_SERVER == hismode)
693				DPRINTF(1, ("Possibly self-induced rate limiting of MODE_SERVER from %s\n",
694					stoa(&rbufp->recv_srcadr)));
695			return;			/* rate exceeded */
696		}
697		if (hismode == MODE_CLIENT)
698			fast_xmit(rbufp, MODE_SERVER, skeyid,
699			    restrict_mask);
700		else
701			fast_xmit(rbufp, MODE_ACTIVE, skeyid,
702			    restrict_mask);
703		return;				/* rate exceeded */
704	}
705	restrict_mask &= ~RES_KOD;
706
707	/*
708	 * We have tossed out as many buggy packets as possible early in
709	 * the game to reduce the exposure to a clogging attack. Now we
710	 * have to burn some cycles to find the association and
711	 * authenticate the packet if required. Note that we burn only
712	 * digest cycles, again to reduce exposure. There may be no
713	 * matching association and that's okay.
714	 *
715	 * More on the autokey mambo. Normally the local interface is
716	 * found when the association was mobilized with respect to a
717	 * designated remote address. We assume packets arriving from
718	 * the remote address arrive via this interface and the local
719	 * address used to construct the autokey is the unicast address
720	 * of the interface. However, if the sender is a broadcaster,
721	 * the interface broadcast address is used instead.
722	 * Notwithstanding this technobabble, if the sender is a
723	 * multicaster, the broadcast address is null, so we use the
724	 * unicast address anyway. Don't ask.
725	 */
726	peer = findpeer(rbufp,  hismode, &retcode);
727	dstadr_sin = &rbufp->dstadr->sin;
728	NTOHL_FP(&pkt->org, &p_org);
729	NTOHL_FP(&pkt->rec, &p_rec);
730	NTOHL_FP(&pkt->xmt, &p_xmt);
731	hm_str = modetoa(hismode);
732	am_str = amtoa(retcode);
733
734	/*
735	 * Authentication is conditioned by three switches:
736	 *
737	 * NOPEER  (RES_NOPEER) do not mobilize an association unless
738	 *         authenticated
739	 * NOTRUST (RES_DONTTRUST) do not allow access unless
740	 *         authenticated (implies NOPEER)
741	 * enable  (sys_authenticate) master NOPEER switch, by default
742	 *         on
743	 *
744	 * The NOPEER and NOTRUST can be specified on a per-client basis
745	 * using the restrict command. The enable switch if on implies
746	 * NOPEER for all clients. There are four outcomes:
747	 *
748	 * NONE    The packet has no MAC.
749	 * OK      the packet has a MAC and authentication succeeds
750	 * ERROR   the packet has a MAC and authentication fails
751	 * CRYPTO  crypto-NAK. The MAC has four octets only.
752	 *
753	 * Note: The AUTH(x, y) macro is used to filter outcomes. If x
754	 * is zero, acceptable outcomes of y are NONE and OK. If x is
755	 * one, the only acceptable outcome of y is OK.
756	 */
757
758	if (has_mac == 0) {
759		restrict_mask &= ~RES_MSSNTP;
760		is_authentic = AUTH_NONE; /* not required */
761		DPRINTF(2, ("receive: at %ld %s<-%s mode %d/%s:%s len %d org %#010x.%08x xmt %#010x.%08x NOMAC\n",
762			    current_time, stoa(dstadr_sin),
763			    stoa(&rbufp->recv_srcadr), hismode, hm_str, am_str,
764			    authlen,
765			    ntohl(pkt->org.l_ui), ntohl(pkt->org.l_uf),
766			    ntohl(pkt->xmt.l_ui), ntohl(pkt->xmt.l_uf)));
767	} else if (has_mac == 4) {
768		restrict_mask &= ~RES_MSSNTP;
769		is_authentic = AUTH_CRYPTO; /* crypto-NAK */
770		DPRINTF(2, ("receive: at %ld %s<-%s mode %d/%s:%s keyid %08x len %d auth %d org %#010x.%08x xmt %#010x.%08x MAC4\n",
771			    current_time, stoa(dstadr_sin),
772			    stoa(&rbufp->recv_srcadr), hismode, hm_str, am_str,
773			    skeyid, authlen + has_mac, is_authentic,
774			    ntohl(pkt->org.l_ui), ntohl(pkt->org.l_uf),
775			    ntohl(pkt->xmt.l_ui), ntohl(pkt->xmt.l_uf)));
776
777#ifdef HAVE_NTP_SIGND
778		/*
779		 * If the signature is 20 bytes long, the last 16 of
780		 * which are zero, then this is a Microsoft client
781		 * wanting AD-style authentication of the server's
782		 * reply.
783		 *
784		 * This is described in Microsoft's WSPP docs, in MS-SNTP:
785		 * http://msdn.microsoft.com/en-us/library/cc212930.aspx
786		 */
787	} else if (   has_mac == MAX_MD5_LEN
788		   && (restrict_mask & RES_MSSNTP)
789		   && (retcode == AM_FXMIT || retcode == AM_NEWPASS)
790		   && (memcmp(zero_key, (char *)pkt + authlen + 4,
791			      MAX_MD5_LEN - 4) == 0)) {
792		is_authentic = AUTH_NONE;
793#endif /* HAVE_NTP_SIGND */
794
795	} else {
796		restrict_mask &= ~RES_MSSNTP;
797#ifdef AUTOKEY
798		/*
799		 * For autokey modes, generate the session key
800		 * and install in the key cache. Use the socket
801		 * broadcast or unicast address as appropriate.
802		 */
803		if (crypto_flags && skeyid > NTP_MAXKEY) {
804
805			/*
806			 * More on the autokey dance (AKD). A cookie is
807			 * constructed from public and private values.
808			 * For broadcast packets, the cookie is public
809			 * (zero). For packets that match no
810			 * association, the cookie is hashed from the
811			 * addresses and private value. For server
812			 * packets, the cookie was previously obtained
813			 * from the server. For symmetric modes, the
814			 * cookie was previously constructed using an
815			 * agreement protocol; however, should PKI be
816			 * unavailable, we construct a fake agreement as
817			 * the EXOR of the peer and host cookies.
818			 *
819			 * hismode	ephemeral	persistent
820			 * =======================================
821			 * active	0		cookie#
822			 * passive	0%		cookie#
823			 * client	sys cookie	0%
824			 * server	0%		sys cookie
825			 * broadcast	0		0
826			 *
827			 * # if unsync, 0
828			 * % can't happen
829			 */
830			if (has_mac < (int)MAX_MD5_LEN) {
831				sys_badauth++;
832				return;
833			}
834			if (hismode == MODE_BROADCAST) {
835
836				/*
837				 * For broadcaster, use the interface
838				 * broadcast address when available;
839				 * otherwise, use the unicast address
840				 * found when the association was
841				 * mobilized. However, if this is from
842				 * the wildcard interface, game over.
843				 */
844				if (   crypto_flags
845				    && rbufp->dstadr ==
846				       ANY_INTERFACE_CHOOSE(&rbufp->recv_srcadr)) {
847					sys_restricted++;
848					return;	     /* no wildcard */
849				}
850				pkeyid = 0;
851				if (!SOCK_UNSPEC(&rbufp->dstadr->bcast))
852					dstadr_sin =
853					    &rbufp->dstadr->bcast;
854			} else if (peer == NULL) {
855				pkeyid = session_key(
856				    &rbufp->recv_srcadr, dstadr_sin, 0,
857				    sys_private, 0);
858			} else {
859				pkeyid = peer->pcookie;
860			}
861
862			/*
863			 * The session key includes both the public
864			 * values and cookie. In case of an extension
865			 * field, the cookie used for authentication
866			 * purposes is zero. Note the hash is saved for
867			 * use later in the autokey mambo.
868			 */
869			if (authlen > (int)LEN_PKT_NOMAC && pkeyid != 0) {
870				session_key(&rbufp->recv_srcadr,
871				    dstadr_sin, skeyid, 0, 2);
872				tkeyid = session_key(
873				    &rbufp->recv_srcadr, dstadr_sin,
874				    skeyid, pkeyid, 0);
875			} else {
876				tkeyid = session_key(
877				    &rbufp->recv_srcadr, dstadr_sin,
878				    skeyid, pkeyid, 2);
879			}
880
881		}
882#endif	/* AUTOKEY */
883
884		/*
885		 * Compute the cryptosum. Note a clogging attack may
886		 * succeed in bloating the key cache. If an autokey,
887		 * purge it immediately, since we won't be needing it
888		 * again. If the packet is authentic, it can mobilize an
889		 * association. Note that there is no key zero.
890		 */
891		if (!authdecrypt(skeyid, (u_int32 *)pkt, authlen,
892		    has_mac))
893			is_authentic = AUTH_ERROR;
894		else
895			is_authentic = AUTH_OK;
896#ifdef AUTOKEY
897		if (crypto_flags && skeyid > NTP_MAXKEY)
898			authtrust(skeyid, 0);
899#endif	/* AUTOKEY */
900		DPRINTF(2, ("receive: at %ld %s<-%s mode %d/%s:%s keyid %08x len %d auth %d org %#010x.%08x xmt %#010x.%08x\n",
901			    current_time, stoa(dstadr_sin),
902			    stoa(&rbufp->recv_srcadr), hismode, hm_str, am_str,
903			    skeyid, authlen + has_mac, is_authentic,
904			    ntohl(pkt->org.l_ui), ntohl(pkt->org.l_uf),
905			    ntohl(pkt->xmt.l_ui), ntohl(pkt->xmt.l_uf)));
906	}
907
908	/*
909	 * The association matching rules are implemented by a set of
910	 * routines and an association table. A packet matching an
911	 * association is processed by the peer process for that
912	 * association. If there are no errors, an ephemeral association
913	 * is mobilized: a broadcast packet mobilizes a broadcast client
914	 * aassociation; a manycast server packet mobilizes a manycast
915	 * client association; a symmetric active packet mobilizes a
916	 * symmetric passive association.
917	 */
918	switch (retcode) {
919
920	/*
921	 * This is a client mode packet not matching any association. If
922	 * an ordinary client, simply toss a server mode packet back
923	 * over the fence. If a manycast client, we have to work a
924	 * little harder.
925	 */
926	case AM_FXMIT:
927
928		/*
929		 * If authentication OK, send a server reply; otherwise,
930		 * send a crypto-NAK.
931		 */
932		if (!(rbufp->dstadr->flags & INT_MCASTOPEN)) {
933			if (AUTH(restrict_mask & RES_DONTTRUST,
934			   is_authentic)) {
935				fast_xmit(rbufp, MODE_SERVER, skeyid,
936				    restrict_mask);
937			} else if (is_authentic == AUTH_ERROR) {
938				fast_xmit(rbufp, MODE_SERVER, 0,
939				    restrict_mask);
940				sys_badauth++;
941			} else {
942				sys_restricted++;
943			}
944			return;			/* hooray */
945		}
946
947		/*
948		 * This must be manycast. Do not respond if not
949		 * configured as a manycast server.
950		 */
951		if (!sys_manycastserver) {
952			sys_restricted++;
953			return;			/* not enabled */
954		}
955
956#ifdef AUTOKEY
957		/*
958		 * Do not respond if not the same group.
959		 */
960		if (group_test(groupname, NULL)) {
961			sys_declined++;
962			return;
963		}
964#endif /* AUTOKEY */
965
966		/*
967		 * Do not respond if we are not synchronized or our
968		 * stratum is greater than the manycaster or the
969		 * manycaster has already synchronized to us.
970		 */
971		if (   sys_leap == LEAP_NOTINSYNC
972		    || sys_stratum >= hisstratum
973		    || (!sys_cohort && sys_stratum == hisstratum + 1)
974		    || rbufp->dstadr->addr_refid == pkt->refid) {
975			sys_declined++;
976			return;			/* no help */
977		}
978
979		/*
980		 * Respond only if authentication succeeds. Don't do a
981		 * crypto-NAK, as that would not be useful.
982		 */
983		if (AUTH(restrict_mask & RES_DONTTRUST, is_authentic))
984			fast_xmit(rbufp, MODE_SERVER, skeyid,
985			    restrict_mask);
986		return;				/* hooray */
987
988	/*
989	 * This is a server mode packet returned in response to a client
990	 * mode packet sent to a multicast group address (for
991	 * manycastclient) or to a unicast address (for pool). The
992	 * origin timestamp is a good nonce to reliably associate the
993	 * reply with what was sent. If there is no match, that's
994	 * curious and could be an intruder attempting to clog, so we
995	 * just ignore it.
996	 *
997	 * If the packet is authentic and the manycastclient or pool
998	 * association is found, we mobilize a client association and
999	 * copy pertinent variables from the manycastclient or pool
1000	 * association to the new client association. If not, just
1001	 * ignore the packet.
1002	 *
1003	 * There is an implosion hazard at the manycast client, since
1004	 * the manycast servers send the server packet immediately. If
1005	 * the guy is already here, don't fire up a duplicate.
1006	 */
1007	case AM_MANYCAST:
1008
1009#ifdef AUTOKEY
1010		/*
1011		 * Do not respond if not the same group.
1012		 */
1013		if (group_test(groupname, NULL)) {
1014			sys_declined++;
1015			return;
1016		}
1017#endif /* AUTOKEY */
1018		if ((peer2 = findmanycastpeer(rbufp)) == NULL) {
1019			sys_restricted++;
1020			return;			/* not enabled */
1021		}
1022		if (!AUTH(  (!(peer2->cast_flags & MDF_POOL)
1023			     && sys_authenticate)
1024			  || (restrict_mask & (RES_NOPEER |
1025			      RES_DONTTRUST)), is_authentic)) {
1026			sys_restricted++;
1027			return;			/* access denied */
1028		}
1029
1030		/*
1031		 * Do not respond if unsynchronized or stratum is below
1032		 * the floor or at or above the ceiling.
1033		 */
1034		if (   hisleap == LEAP_NOTINSYNC
1035		    || hisstratum < sys_floor
1036		    || hisstratum >= sys_ceiling) {
1037			sys_declined++;
1038			return;			/* no help */
1039		}
1040		peer = newpeer(&rbufp->recv_srcadr, NULL, rbufp->dstadr,
1041			       MODE_CLIENT, hisversion, peer2->minpoll,
1042			       peer2->maxpoll, FLAG_PREEMPT |
1043			       (FLAG_IBURST & peer2->flags), MDF_UCAST |
1044			       MDF_UCLNT, 0, skeyid, sys_ident);
1045		if (NULL == peer) {
1046			sys_declined++;
1047			return;			/* ignore duplicate  */
1048		}
1049
1050		/*
1051		 * After each ephemeral pool association is spun,
1052		 * accelerate the next poll for the pool solicitor so
1053		 * the pool will fill promptly.
1054		 */
1055		if (peer2->cast_flags & MDF_POOL)
1056			peer2->nextdate = current_time + 1;
1057
1058		/*
1059		 * Further processing of the solicitation response would
1060		 * simply detect its origin timestamp as bogus for the
1061		 * brand-new association (it matches the prototype
1062		 * association) and tinker with peer->nextdate delaying
1063		 * first sync.
1064		 */
1065		return;		/* solicitation response handled */
1066
1067	/*
1068	 * This is the first packet received from a broadcast server. If
1069	 * the packet is authentic and we are enabled as broadcast
1070	 * client, mobilize a broadcast client association. We don't
1071	 * kiss any frogs here.
1072	 */
1073	case AM_NEWBCL:
1074
1075#ifdef AUTOKEY
1076		/*
1077		 * Do not respond if not the same group.
1078		 */
1079		if (group_test(groupname, sys_ident)) {
1080			sys_declined++;
1081			return;
1082		}
1083#endif /* AUTOKEY */
1084		if (sys_bclient == 0) {
1085			sys_restricted++;
1086			return;			/* not enabled */
1087		}
1088		if (!AUTH(sys_authenticate | (restrict_mask &
1089		    (RES_NOPEER | RES_DONTTRUST)), is_authentic)) {
1090			sys_restricted++;
1091			return;			/* access denied */
1092		}
1093
1094		/*
1095		 * Do not respond if unsynchronized or stratum is below
1096		 * the floor or at or above the ceiling.
1097		 */
1098		if (   hisleap == LEAP_NOTINSYNC
1099		    || hisstratum < sys_floor
1100		    || hisstratum >= sys_ceiling) {
1101			sys_declined++;
1102			return;			/* no help */
1103		}
1104
1105#ifdef AUTOKEY
1106		/*
1107		 * Do not respond if Autokey and the opcode is not a
1108		 * CRYPTO_ASSOC response with association ID.
1109		 */
1110		if (   crypto_flags && skeyid > NTP_MAXKEY
1111		    && (opcode & 0xffff0000) != (CRYPTO_ASSOC | CRYPTO_RESP)) {
1112			sys_declined++;
1113			return;			/* protocol error */
1114		}
1115#endif	/* AUTOKEY */
1116
1117		/*
1118		 * Broadcasts received via a multicast address may
1119		 * arrive after a unicast volley has begun
1120		 * with the same remote address.  newpeer() will not
1121		 * find duplicate associations on other local endpoints
1122		 * if a non-NULL endpoint is supplied.  multicastclient
1123		 * ephemeral associations are unique across all local
1124		 * endpoints.
1125		 */
1126		if (!(INT_MCASTOPEN & rbufp->dstadr->flags))
1127			match_ep = rbufp->dstadr;
1128		else
1129			match_ep = NULL;
1130
1131		/*
1132		 * Determine whether to execute the initial volley.
1133		 */
1134		if (sys_bdelay != 0) {
1135#ifdef AUTOKEY
1136			/*
1137			 * If a two-way exchange is not possible,
1138			 * neither is Autokey.
1139			 */
1140			if (crypto_flags && skeyid > NTP_MAXKEY) {
1141				sys_restricted++;
1142				return;		/* no autokey */
1143			}
1144#endif	/* AUTOKEY */
1145
1146			/*
1147			 * Do not execute the volley. Start out in
1148			 * broadcast client mode.
1149			 */
1150			peer = newpeer(&rbufp->recv_srcadr, NULL,
1151			    match_ep, MODE_BCLIENT, hisversion,
1152			    pkt->ppoll, pkt->ppoll, FLAG_PREEMPT,
1153			    MDF_BCLNT, 0, skeyid, sys_ident);
1154			if (NULL == peer) {
1155				sys_restricted++;
1156				return;		/* ignore duplicate */
1157
1158			} else {
1159				peer->delay = sys_bdelay;
1160			}
1161			break;
1162		}
1163
1164		/*
1165		 * Execute the initial volley in order to calibrate the
1166		 * propagation delay and run the Autokey protocol.
1167		 *
1168		 * Note that the minpoll is taken from the broadcast
1169		 * packet, normally 6 (64 s) and that the poll interval
1170		 * is fixed at this value.
1171		 */
1172		peer = newpeer(&rbufp->recv_srcadr, NULL, match_ep,
1173		    MODE_CLIENT, hisversion, pkt->ppoll, pkt->ppoll,
1174		    FLAG_BC_VOL | FLAG_IBURST | FLAG_PREEMPT, MDF_BCLNT,
1175		    0, skeyid, sys_ident);
1176		if (NULL == peer) {
1177			sys_restricted++;
1178			return;			/* ignore duplicate */
1179		}
1180#ifdef AUTOKEY
1181		if (skeyid > NTP_MAXKEY)
1182			crypto_recv(peer, rbufp);
1183#endif	/* AUTOKEY */
1184
1185		return;				/* hooray */
1186
1187	/*
1188	 * This is the first packet received from a symmetric active
1189	 * peer. If the packet is authentic and the first he sent,
1190	 * mobilize a passive association. If not, kiss the frog.
1191	 */
1192	case AM_NEWPASS:
1193
1194#ifdef AUTOKEY
1195		/*
1196		 * Do not respond if not the same group.
1197		 */
1198		if (group_test(groupname, sys_ident)) {
1199			sys_declined++;
1200			return;
1201		}
1202#endif /* AUTOKEY */
1203		if (!AUTH(sys_authenticate | (restrict_mask &
1204		    (RES_NOPEER | RES_DONTTRUST)), is_authentic)) {
1205
1206			/*
1207			 * If authenticated but cannot mobilize an
1208			 * association, send a symmetric passive
1209			 * response without mobilizing an association.
1210			 * This is for drat broken Windows clients. See
1211			 * Microsoft KB 875424 for preferred workaround.
1212			 */
1213			if (AUTH(restrict_mask & RES_DONTTRUST,
1214			    is_authentic)) {
1215				fast_xmit(rbufp, MODE_PASSIVE, skeyid,
1216				    restrict_mask);
1217				return;			/* hooray */
1218			}
1219			if (is_authentic == AUTH_ERROR) {
1220				fast_xmit(rbufp, MODE_ACTIVE, 0,
1221				    restrict_mask);
1222				sys_restricted++;
1223				return;
1224			}
1225			/* [Bug 2941]
1226			 * If we got here, the packet isn't part of an
1227			 * existing association, it isn't correctly
1228			 * authenticated, and it didn't meet either of
1229			 * the previous two special cases so we should
1230			 * just drop it on the floor.  For example,
1231			 * crypto-NAKs (is_authentic == AUTH_CRYPTO)
1232			 * will make it this far.  This is just
1233			 * debug-printed and not logged to avoid log
1234			 * flooding.
1235			 */
1236			DPRINTF(2, ("receive: at %ld refusing to mobilize passive association"
1237				    " with unknown peer %s mode %d/%s:%s keyid %08x len %d auth %d\n",
1238				    current_time, stoa(&rbufp->recv_srcadr),
1239				    hismode, hm_str, am_str, skeyid,
1240				    (authlen + has_mac), is_authentic));
1241			sys_declined++;
1242			return;
1243		}
1244
1245		/*
1246		 * Do not respond if synchronized and if stratum is
1247		 * below the floor or at or above the ceiling. Note,
1248		 * this allows an unsynchronized peer to synchronize to
1249		 * us. It would be very strange if he did and then was
1250		 * nipped, but that could only happen if we were
1251		 * operating at the top end of the range.  It also means
1252		 * we will spin an ephemeral association in response to
1253		 * MODE_ACTIVE KoDs, which will time out eventually.
1254		 */
1255		if (   hisleap != LEAP_NOTINSYNC
1256		    && (hisstratum < sys_floor || hisstratum >= sys_ceiling)) {
1257			sys_declined++;
1258			return;			/* no help */
1259		}
1260
1261		/*
1262		 * The message is correctly authenticated and allowed.
1263		 * Mobilize a symmetric passive association.
1264		 */
1265		if ((peer = newpeer(&rbufp->recv_srcadr, NULL,
1266		    rbufp->dstadr, MODE_PASSIVE, hisversion, pkt->ppoll,
1267		    NTP_MAXDPOLL, 0, MDF_UCAST, 0, skeyid,
1268		    sys_ident)) == NULL) {
1269			sys_declined++;
1270			return;			/* ignore duplicate */
1271		}
1272		break;
1273
1274
1275	/*
1276	 * Process regular packet. Nothing special.
1277	 */
1278	case AM_PROCPKT:
1279
1280#ifdef AUTOKEY
1281		/*
1282		 * Do not respond if not the same group.
1283		 */
1284		if (group_test(groupname, peer->ident)) {
1285			sys_declined++;
1286			return;
1287		}
1288#endif /* AUTOKEY */
1289		break;
1290
1291	/*
1292	 * A passive packet matches a passive association. This is
1293	 * usually the result of reconfiguring a client on the fly. As
1294	 * this association might be legitimate and this packet an
1295	 * attempt to deny service, just ignore it.
1296	 */
1297	case AM_ERR:
1298		sys_declined++;
1299		return;
1300
1301	/*
1302	 * For everything else there is the bit bucket.
1303	 */
1304	default:
1305		sys_declined++;
1306		return;
1307	}
1308
1309#ifdef AUTOKEY
1310	/*
1311	 * If the association is configured for Autokey, the packet must
1312	 * have a public key ID; if not, the packet must have a
1313	 * symmetric key ID.
1314	 */
1315	if (   is_authentic != AUTH_CRYPTO
1316	    && (   ((peer->flags & FLAG_SKEY) && skeyid <= NTP_MAXKEY)
1317	        || (!(peer->flags & FLAG_SKEY) && skeyid > NTP_MAXKEY))) {
1318		sys_badauth++;
1319		return;
1320	}
1321#endif	/* AUTOKEY */
1322	peer->received++;
1323	peer->flash &= ~PKT_TEST_MASK;
1324	if (peer->flags & FLAG_XBOGUS) {
1325		peer->flags &= ~FLAG_XBOGUS;
1326		peer->flash |= TEST3;
1327	}
1328
1329	/*
1330	 * Next comes a rigorous schedule of timestamp checking. If the
1331	 * transmit timestamp is zero, the server has not initialized in
1332	 * interleaved modes or is horribly broken.
1333	 */
1334	if (L_ISZERO(&p_xmt)) {
1335		peer->flash |= TEST3;			/* unsynch */
1336
1337	/*
1338	 * If the transmit timestamp duplicates a previous one, the
1339	 * packet is a replay. This prevents the bad guys from replaying
1340	 * the most recent packet, authenticated or not.
1341	 */
1342	} else if (L_ISEQU(&peer->xmt, &p_xmt)) {
1343		peer->flash |= TEST1;			/* duplicate */
1344		peer->oldpkt++;
1345		return;
1346
1347	/*
1348	 * If this is a broadcast mode packet, skip further checking. If
1349	 * an initial volley, bail out now and let the client do its
1350	 * stuff. If the origin timestamp is nonzero, this is an
1351	 * interleaved broadcast. so restart the protocol.
1352	 */
1353	} else if (hismode == MODE_BROADCAST) {
1354		if (!L_ISZERO(&p_org) && !(peer->flags & FLAG_XB)) {
1355			peer->flags |= FLAG_XB;
1356			peer->aorg = p_xmt;
1357			peer->borg = rbufp->recv_time;
1358			report_event(PEVNT_XLEAVE, peer, NULL);
1359			return;
1360		}
1361
1362	/*
1363	 * Basic mode checks:
1364	 *
1365	 * If there is no origin timestamp, it's an initial packet.
1366	 *
1367	 * Otherwise, check for bogus packet in basic mode.
1368	 * If it is bogus, switch to interleaved mode and resynchronize,
1369	 * but only after confirming the packet is not bogus in
1370	 * symmetric interleaved mode.
1371	 *
1372	 * This could also mean somebody is forging packets claiming to
1373	 * be from us, attempting to cause our server to KoD us.
1374	 */
1375	} else if (peer->flip == 0) {
1376		if (0 < hisstratum && L_ISZERO(&p_org)) {
1377			L_CLR(&peer->aorg);
1378		} else if (!L_ISEQU(&p_org, &peer->aorg)) {
1379			peer->bogusorg++;
1380			peer->flash |= TEST2;	/* bogus */
1381			msyslog(LOG_INFO,
1382				"receive: Unexpected origin timestamp %#010x.%08x from %s xmt %#010x.%08x",
1383				ntohl(pkt->org.l_ui), ntohl(pkt->org.l_uf),
1384				ntoa(&peer->srcadr),
1385				ntohl(pkt->xmt.l_ui), ntohl(pkt->xmt.l_uf));
1386			if (  !L_ISZERO(&peer->dst)
1387			    && L_ISEQU(&p_org, &peer->dst)) {
1388				/* Might be the start of an interleave */
1389				peer->flip = 1;
1390				report_event(PEVNT_XLEAVE, peer, NULL);
1391			}
1392			return; /* Bogus or possible interleave packet */
1393		} else {
1394			L_CLR(&peer->aorg);
1395		}
1396
1397	/*
1398	 * Check for valid nonzero timestamp fields.
1399	 */
1400	} else if (L_ISZERO(&p_org) || L_ISZERO(&p_rec) ||
1401	    L_ISZERO(&peer->dst)) {
1402		peer->flash |= TEST3;		/* unsynch */
1403
1404	/*
1405	 * Check for bogus packet in interleaved symmetric mode. This
1406	 * can happen if a packet is lost, duplicated or crossed. If
1407	 * found, flip and resynchronize.
1408	 */
1409	} else if (   !L_ISZERO(&peer->dst)
1410		   && !L_ISEQU(&p_org, &peer->dst)) {
1411		peer->bogusorg++;
1412		peer->flags |= FLAG_XBOGUS;
1413		peer->flash |= TEST2;		/* bogus */
1414		return; /* Bogus packet, we are done */
1415	}
1416
1417	/*
1418	 * If this is a crypto_NAK, the server cannot authenticate a
1419	 * client packet. The server might have just changed keys. Clear
1420	 * the association and restart the protocol.
1421	 */
1422	if (is_authentic == AUTH_CRYPTO) {
1423		report_event(PEVNT_AUTH, peer, "crypto_NAK");
1424		peer->flash |= TEST5;		/* bad auth */
1425		peer->badauth++;
1426		if (peer->flags & FLAG_PREEMPT) {
1427			unpeer(peer);
1428			return;
1429		}
1430#ifdef AUTOKEY
1431		if (peer->crypto)
1432			peer_clear(peer, "AUTH");
1433#endif	/* AUTOKEY */
1434		return;
1435
1436	/*
1437	 * If the digest fails or it's missing for authenticated
1438	 * associations, the client cannot authenticate a server
1439	 * reply to a client packet previously sent. The loopback check
1440	 * is designed to avoid a bait-and-switch attack, which was
1441	 * possible in past versions. If symmetric modes, return a
1442	 * crypto-NAK. The peer should restart the protocol.
1443	 */
1444	} else if (!AUTH(peer->keyid || has_mac ||
1445			 (restrict_mask & RES_DONTTRUST), is_authentic)) {
1446		report_event(PEVNT_AUTH, peer, "digest");
1447		peer->flash |= TEST5;		/* bad auth */
1448		peer->badauth++;
1449		if (   has_mac
1450		    && (hismode == MODE_ACTIVE || hismode == MODE_PASSIVE))
1451			fast_xmit(rbufp, MODE_ACTIVE, 0, restrict_mask);
1452		if (peer->flags & FLAG_PREEMPT) {
1453			unpeer(peer);
1454			return;
1455		}
1456#ifdef AUTOKEY
1457		if (peer->crypto)
1458			peer_clear(peer, "AUTH");
1459#endif	/* AUTOKEY */
1460		return;
1461	}
1462
1463	/*
1464	 * Update the state variables.
1465	 */
1466	if (peer->flip == 0) {
1467		if (hismode != MODE_BROADCAST)
1468			peer->rec = p_xmt;
1469		peer->dst = rbufp->recv_time;
1470	}
1471	peer->xmt = p_xmt;
1472
1473	/*
1474	 * Set the peer ppoll to the maximum of the packet ppoll and the
1475	 * peer minpoll. If a kiss-o'-death, set the peer minpoll to
1476	 * this maximum and advance the headway to give the sender some
1477	 * headroom. Very intricate.
1478	 */
1479
1480	/*
1481	 * Check for any kiss codes. Note this is only used when a server
1482	 * responds to a packet request
1483	 */
1484
1485	kissCode = kiss_code_check(hisleap, hisstratum, hismode, pkt->refid);
1486
1487	/*
1488	 * Check to see if this is a RATE Kiss Code
1489	 * Currently this kiss code will accept whatever poll
1490	 * rate that the server sends
1491	 */
1492	peer->ppoll = max(peer->minpoll, pkt->ppoll);
1493	if (kissCode == RATEKISS) {
1494		peer->selbroken++;	/* Increment the KoD count */
1495		report_event(PEVNT_RATE, peer, NULL);
1496		if (pkt->ppoll > peer->minpoll)
1497			peer->minpoll = peer->ppoll;
1498		peer->burst = peer->retry = 0;
1499		peer->throttle = (NTP_SHIFT + 1) * (1 << peer->minpoll);
1500		poll_update(peer, pkt->ppoll);
1501		return;				/* kiss-o'-death */
1502	}
1503	if (kissCode != NOKISS) {
1504		peer->selbroken++;	/* Increment the KoD count */
1505		return;		/* Drop any other kiss code packets */
1506	}
1507
1508
1509	/*
1510	 * That was hard and I am sweaty, but the packet is squeaky
1511	 * clean. Get on with real work.
1512	 */
1513	peer->timereceived = current_time;
1514	if (is_authentic == AUTH_OK)
1515		peer->flags |= FLAG_AUTHENTIC;
1516	else
1517		peer->flags &= ~FLAG_AUTHENTIC;
1518
1519#ifdef AUTOKEY
1520	/*
1521	 * More autokey dance. The rules of the cha-cha are as follows:
1522	 *
1523	 * 1. If there is no key or the key is not auto, do nothing.
1524	 *
1525	 * 2. If this packet is in response to the one just previously
1526	 *    sent or from a broadcast server, do the extension fields.
1527	 *    Otherwise, assume bogosity and bail out.
1528	 *
1529	 * 3. If an extension field contains a verified signature, it is
1530	 *    self-authenticated and we sit the dance.
1531	 *
1532	 * 4. If this is a server reply, check only to see that the
1533	 *    transmitted key ID matches the received key ID.
1534	 *
1535	 * 5. Check to see that one or more hashes of the current key ID
1536	 *    matches the previous key ID or ultimate original key ID
1537	 *    obtained from the broadcaster or symmetric peer. If no
1538	 *    match, sit the dance and call for new autokey values.
1539	 *
1540	 * In case of crypto error, fire the orchestra, stop dancing and
1541	 * restart the protocol.
1542	 */
1543	if (peer->flags & FLAG_SKEY) {
1544		/*
1545		 * Decrement remaining autokey hashes. This isn't
1546		 * perfect if a packet is lost, but results in no harm.
1547		 */
1548		ap = (struct autokey *)peer->recval.ptr;
1549		if (ap != NULL) {
1550			if (ap->seq > 0)
1551				ap->seq--;
1552		}
1553		peer->flash |= TEST8;
1554		rval = crypto_recv(peer, rbufp);
1555		if (rval == XEVNT_OK) {
1556			peer->unreach = 0;
1557		} else {
1558			if (rval == XEVNT_ERR) {
1559				report_event(PEVNT_RESTART, peer,
1560				    "crypto error");
1561				peer_clear(peer, "CRYP");
1562				peer->flash |= TEST9;	/* bad crypt */
1563				if (peer->flags & FLAG_PREEMPT)
1564					unpeer(peer);
1565			}
1566			return;
1567		}
1568
1569		/*
1570		 * If server mode, verify the receive key ID matches
1571		 * the transmit key ID.
1572		 */
1573		if (hismode == MODE_SERVER) {
1574			if (skeyid == peer->keyid)
1575				peer->flash &= ~TEST8;
1576
1577		/*
1578		 * If an extension field is present, verify only that it
1579		 * has been correctly signed. We don't need a sequence
1580		 * check here, but the sequence continues.
1581		 */
1582		} else if (!(peer->flash & TEST8)) {
1583			peer->pkeyid = skeyid;
1584
1585		/*
1586		 * Now the fun part. Here, skeyid is the current ID in
1587		 * the packet, pkeyid is the ID in the last packet and
1588		 * tkeyid is the hash of skeyid. If the autokey values
1589		 * have not been received, this is an automatic error.
1590		 * If so, check that the tkeyid matches pkeyid. If not,
1591		 * hash tkeyid and try again. If the number of hashes
1592		 * exceeds the number remaining in the sequence, declare
1593		 * a successful failure and refresh the autokey values.
1594		 */
1595		} else if (ap != NULL) {
1596			int i;
1597
1598			for (i = 0; ; i++) {
1599				if (   tkeyid == peer->pkeyid
1600				    || tkeyid == ap->key) {
1601					peer->flash &= ~TEST8;
1602					peer->pkeyid = skeyid;
1603					ap->seq -= i;
1604					break;
1605				}
1606				if (i > ap->seq) {
1607					peer->crypto &=
1608					    ~CRYPTO_FLAG_AUTO;
1609					break;
1610				}
1611				tkeyid = session_key(
1612				    &rbufp->recv_srcadr, dstadr_sin,
1613				    tkeyid, pkeyid, 0);
1614			}
1615			if (peer->flash & TEST8)
1616				report_event(PEVNT_AUTH, peer, "keylist");
1617		}
1618		if (!(peer->crypto & CRYPTO_FLAG_PROV)) /* test 9 */
1619			peer->flash |= TEST8;	/* bad autokey */
1620
1621		/*
1622		 * The maximum lifetime of the protocol is about one
1623		 * week before restarting the Autokey protocol to
1624		 * refresh certificates and leapseconds values.
1625		 */
1626		if (current_time > peer->refresh) {
1627			report_event(PEVNT_RESTART, peer,
1628			    "crypto refresh");
1629			peer_clear(peer, "TIME");
1630			return;
1631		}
1632	}
1633#endif	/* AUTOKEY */
1634
1635	/*
1636	 * The dance is complete and the flash bits have been lit. Toss
1637	 * the packet over the fence for processing, which may light up
1638	 * more flashers.
1639	 */
1640	process_packet(peer, pkt, rbufp->recv_length);
1641
1642	/*
1643	 * In interleaved mode update the state variables. Also adjust the
1644	 * transmit phase to avoid crossover.
1645	 */
1646	if (peer->flip != 0) {
1647		peer->rec = p_rec;
1648		peer->dst = rbufp->recv_time;
1649		if (peer->nextdate - current_time < (1U << min(peer->ppoll,
1650		    peer->hpoll)) / 2)
1651			peer->nextdate++;
1652		else
1653			peer->nextdate--;
1654	}
1655}
1656
1657
1658/*
1659 * process_packet - Packet Procedure, a la Section 3.4.4 of the
1660 *	specification. Or almost, at least. If we're in here we have a
1661 *	reasonable expectation that we will be having a long term
1662 *	relationship with this host.
1663 */
1664void
1665process_packet(
1666	register struct peer *peer,
1667	register struct pkt *pkt,
1668	u_int	len
1669	)
1670{
1671	double	t34, t21;
1672	double	p_offset, p_del, p_disp;
1673	l_fp	p_rec, p_xmt, p_org, p_reftime, ci;
1674	u_char	pmode, pleap, pversion, pstratum;
1675	char	statstr[NTP_MAXSTRLEN];
1676#ifdef ASSYM
1677	int	itemp;
1678	double	etemp, ftemp, td;
1679#endif /* ASSYM */
1680
1681	sys_processed++;
1682	peer->processed++;
1683	p_del = FPTOD(NTOHS_FP(pkt->rootdelay));
1684	p_offset = 0;
1685	p_disp = FPTOD(NTOHS_FP(pkt->rootdisp));
1686	NTOHL_FP(&pkt->reftime, &p_reftime);
1687	NTOHL_FP(&pkt->org, &p_org);
1688	NTOHL_FP(&pkt->rec, &p_rec);
1689	NTOHL_FP(&pkt->xmt, &p_xmt);
1690	pmode = PKT_MODE(pkt->li_vn_mode);
1691	pleap = PKT_LEAP(pkt->li_vn_mode);
1692	pversion = PKT_VERSION(pkt->li_vn_mode);
1693	pstratum = PKT_TO_STRATUM(pkt->stratum);
1694
1695	/*
1696	 * Capture the header values in the client/peer association..
1697	 */
1698	record_raw_stats(&peer->srcadr, peer->dstadr ?
1699	    &peer->dstadr->sin : NULL,
1700	    &p_org, &p_rec, &p_xmt, &peer->dst,
1701	    pleap, pversion, pmode, pstratum, pkt->ppoll, pkt->precision,
1702	    p_del, p_disp, pkt->refid);
1703	peer->leap = pleap;
1704	peer->stratum = min(pstratum, STRATUM_UNSPEC);
1705	peer->pmode = pmode;
1706	peer->precision = pkt->precision;
1707	peer->rootdelay = p_del;
1708	peer->rootdisp = p_disp;
1709	peer->refid = pkt->refid;		/* network byte order */
1710	peer->reftime = p_reftime;
1711
1712	/*
1713	 * First, if either burst mode is armed, enable the burst.
1714	 * Compute the headway for the next packet and delay if
1715	 * necessary to avoid exceeding the threshold.
1716	 */
1717	if (peer->retry > 0) {
1718		peer->retry = 0;
1719		if (peer->reach)
1720			peer->burst = min(1 << (peer->hpoll -
1721			    peer->minpoll), NTP_SHIFT) - 1;
1722		else
1723			peer->burst = NTP_IBURST - 1;
1724		if (peer->burst > 0)
1725			peer->nextdate = current_time;
1726	}
1727	poll_update(peer, peer->hpoll);
1728
1729	/*
1730	 * Verify the server is synchronized; that is, the leap bits,
1731	 * stratum and root distance are valid.
1732	 */
1733	if (   pleap == LEAP_NOTINSYNC		/* test 6 */
1734	    || pstratum < sys_floor || pstratum >= sys_ceiling)
1735		peer->flash |= TEST6;		/* bad synch or strat */
1736	if (p_del / 2 + p_disp >= MAXDISPERSE)	/* test 7 */
1737		peer->flash |= TEST7;		/* bad header */
1738
1739	/*
1740	 * If any tests fail at this point, the packet is discarded.
1741	 * Note that some flashers may have already been set in the
1742	 * receive() routine.
1743	 */
1744	if (peer->flash & PKT_TEST_MASK) {
1745		peer->seldisptoolarge++;
1746		DPRINTF(1, ("packet: flash header %04x\n",
1747			    peer->flash));
1748		return;
1749	}
1750
1751	/*
1752	 * If the peer was previously unreachable, raise a trap. In any
1753	 * case, mark it reachable.
1754	 */
1755	if (!peer->reach) {
1756		report_event(PEVNT_REACH, peer, NULL);
1757		peer->timereachable = current_time;
1758	}
1759	peer->reach |= 1;
1760
1761	/*
1762	 * For a client/server association, calculate the clock offset,
1763	 * roundtrip delay and dispersion. The equations are reordered
1764	 * from the spec for more efficient use of temporaries. For a
1765	 * broadcast association, offset the last measurement by the
1766	 * computed delay during the client/server volley. Note the
1767	 * computation of dispersion includes the system precision plus
1768	 * that due to the frequency error since the origin time.
1769	 *
1770	 * It is very important to respect the hazards of overflow. The
1771	 * only permitted operation on raw timestamps is subtraction,
1772	 * where the result is a signed quantity spanning from 68 years
1773	 * in the past to 68 years in the future. To avoid loss of
1774	 * precision, these calculations are done using 64-bit integer
1775	 * arithmetic. However, the offset and delay calculations are
1776	 * sums and differences of these first-order differences, which
1777	 * if done using 64-bit integer arithmetic, would be valid over
1778	 * only half that span. Since the typical first-order
1779	 * differences are usually very small, they are converted to 64-
1780	 * bit doubles and all remaining calculations done in floating-
1781	 * double arithmetic. This preserves the accuracy while
1782	 * retaining the 68-year span.
1783	 *
1784	 * There are three interleaving schemes, basic, interleaved
1785	 * symmetric and interleaved broadcast. The timestamps are
1786	 * idioscyncratically different. See the onwire briefing/white
1787	 * paper at www.eecis.udel.edu/~mills for details.
1788	 *
1789	 * Interleaved symmetric mode
1790	 * t1 = peer->aorg/borg, t2 = peer->rec, t3 = p_xmt,
1791	 * t4 = peer->dst
1792	 */
1793	if (peer->flip != 0) {
1794		ci = p_xmt;				/* t3 - t4 */
1795		L_SUB(&ci, &peer->dst);
1796		LFPTOD(&ci, t34);
1797		ci = p_rec;				/* t2 - t1 */
1798		if (peer->flip > 0)
1799			L_SUB(&ci, &peer->borg);
1800		else
1801			L_SUB(&ci, &peer->aorg);
1802		LFPTOD(&ci, t21);
1803		p_del = t21 - t34;
1804		p_offset = (t21 + t34) / 2.;
1805		if (p_del < 0 || p_del > 1.) {
1806			snprintf(statstr, sizeof(statstr),
1807			    "t21 %.6f t34 %.6f", t21, t34);
1808			report_event(PEVNT_XERR, peer, statstr);
1809			return;
1810		}
1811
1812	/*
1813	 * Broadcast modes
1814	 */
1815	} else if (peer->pmode == MODE_BROADCAST) {
1816
1817		/*
1818		 * Interleaved broadcast mode. Use interleaved timestamps.
1819		 * t1 = peer->borg, t2 = p_org, t3 = p_org, t4 = aorg
1820		 */
1821		if (peer->flags & FLAG_XB) {
1822			ci = p_org;			/* delay */
1823			L_SUB(&ci, &peer->aorg);
1824			LFPTOD(&ci, t34);
1825			ci = p_org;			/* t2 - t1 */
1826			L_SUB(&ci, &peer->borg);
1827			LFPTOD(&ci, t21);
1828			peer->aorg = p_xmt;
1829			peer->borg = peer->dst;
1830			if (t34 < 0 || t34 > 1.) {
1831				snprintf(statstr, sizeof(statstr),
1832				    "offset %.6f delay %.6f", t21, t34);
1833				report_event(PEVNT_XERR, peer, statstr);
1834				return;
1835			}
1836			p_offset = t21;
1837			peer->xleave = t34;
1838
1839		/*
1840		 * Basic broadcast - use direct timestamps.
1841		 * t3 = p_xmt, t4 = peer->dst
1842		 */
1843		} else {
1844			ci = p_xmt;		/* t3 - t4 */
1845			L_SUB(&ci, &peer->dst);
1846			LFPTOD(&ci, t34);
1847			p_offset = t34;
1848		}
1849
1850		/*
1851		 * When calibration is complete and the clock is
1852		 * synchronized, the bias is calculated as the difference
1853		 * between the unicast timestamp and the broadcast
1854		 * timestamp. This works for both basic and interleaved
1855		 * modes.
1856		 */
1857		if (FLAG_BC_VOL & peer->flags) {
1858			peer->flags &= ~FLAG_BC_VOL;
1859			peer->delay = fabs(peer->offset - p_offset) * 2;
1860		}
1861		p_del = peer->delay;
1862		p_offset += p_del / 2;
1863
1864
1865	/*
1866	 * Basic mode, otherwise known as the old fashioned way.
1867	 *
1868	 * t1 = p_org, t2 = p_rec, t3 = p_xmt, t4 = peer->dst
1869	 */
1870	} else {
1871		ci = p_xmt;				/* t3 - t4 */
1872		L_SUB(&ci, &peer->dst);
1873		LFPTOD(&ci, t34);
1874		ci = p_rec;				/* t2 - t1 */
1875		L_SUB(&ci, &p_org);
1876		LFPTOD(&ci, t21);
1877		p_del = fabs(t21 - t34);
1878		p_offset = (t21 + t34) / 2.;
1879	}
1880	p_del = max(p_del, LOGTOD(sys_precision));
1881	p_disp = LOGTOD(sys_precision) + LOGTOD(peer->precision) +
1882	    clock_phi * p_del;
1883
1884#if ASSYM
1885	/*
1886	 * This code calculates the outbound and inbound data rates by
1887	 * measuring the differences between timestamps at different
1888	 * packet lengths. This is helpful in cases of large asymmetric
1889	 * delays commonly experienced on deep space communication
1890	 * links.
1891	 */
1892	if (peer->t21_last > 0 && peer->t34_bytes > 0) {
1893		itemp = peer->t21_bytes - peer->t21_last;
1894		if (itemp > 25) {
1895			etemp = t21 - peer->t21;
1896			if (fabs(etemp) > 1e-6) {
1897				ftemp = itemp / etemp;
1898				if (ftemp > 1000.)
1899					peer->r21 = ftemp;
1900			}
1901		}
1902		itemp = len - peer->t34_bytes;
1903		if (itemp > 25) {
1904			etemp = -t34 - peer->t34;
1905			if (fabs(etemp) > 1e-6) {
1906				ftemp = itemp / etemp;
1907				if (ftemp > 1000.)
1908					peer->r34 = ftemp;
1909			}
1910		}
1911	}
1912
1913	/*
1914	 * The following section compensates for different data rates on
1915	 * the outbound (d21) and inbound (t34) directions. To do this,
1916	 * it finds t such that r21 * t - r34 * (d - t) = 0, where d is
1917	 * the roundtrip delay. Then it calculates the correction as a
1918	 * fraction of d.
1919	 */
1920	peer->t21 = t21;
1921	peer->t21_last = peer->t21_bytes;
1922	peer->t34 = -t34;
1923	peer->t34_bytes = len;
1924	DPRINTF(2, ("packet: t21 %.9lf %d t34 %.9lf %d\n", peer->t21,
1925		    peer->t21_bytes, peer->t34, peer->t34_bytes));
1926	if (peer->r21 > 0 && peer->r34 > 0 && p_del > 0) {
1927		if (peer->pmode != MODE_BROADCAST)
1928			td = (peer->r34 / (peer->r21 + peer->r34) -
1929			    .5) * p_del;
1930		else
1931			td = 0;
1932
1933		/*
1934		 * Unfortunately, in many cases the errors are
1935		 * unacceptable, so for the present the rates are not
1936		 * used. In future, we might find conditions where the
1937		 * calculations are useful, so this should be considered
1938		 * a work in progress.
1939		 */
1940		t21 -= td;
1941		t34 -= td;
1942		DPRINTF(2, ("packet: del %.6lf r21 %.1lf r34 %.1lf %.6lf\n",
1943			    p_del, peer->r21 / 1e3, peer->r34 / 1e3,
1944			    td));
1945	}
1946#endif /* ASSYM */
1947
1948	/*
1949	 * That was awesome. Now hand off to the clock filter.
1950	 */
1951	clock_filter(peer, p_offset + peer->bias, p_del, p_disp);
1952
1953	/*
1954	 * If we are in broadcast calibrate mode, return to broadcast
1955	 * client mode when the client is fit and the autokey dance is
1956	 * complete.
1957	 */
1958	if (   (FLAG_BC_VOL & peer->flags)
1959	    && MODE_CLIENT == peer->hmode
1960	    && !(TEST11 & peer_unfit(peer))) {	/* distance exceeded */
1961#ifdef AUTOKEY
1962		if (peer->flags & FLAG_SKEY) {
1963			if (!(~peer->crypto & CRYPTO_FLAG_ALL))
1964				peer->hmode = MODE_BCLIENT;
1965		} else {
1966			peer->hmode = MODE_BCLIENT;
1967		}
1968#else	/* !AUTOKEY follows */
1969		peer->hmode = MODE_BCLIENT;
1970#endif	/* !AUTOKEY */
1971	}
1972}
1973
1974
1975/*
1976 * clock_update - Called at system process update intervals.
1977 */
1978static void
1979clock_update(
1980	struct peer *peer	/* peer structure pointer */
1981	)
1982{
1983	double	dtemp;
1984	l_fp	now;
1985#ifdef HAVE_LIBSCF_H
1986	char	*fmri;
1987#endif /* HAVE_LIBSCF_H */
1988
1989	/*
1990	 * Update the system state variables. We do this very carefully,
1991	 * as the poll interval might need to be clamped differently.
1992	 */
1993	sys_peer = peer;
1994	sys_epoch = peer->epoch;
1995	if (sys_poll < peer->minpoll)
1996		sys_poll = peer->minpoll;
1997	if (sys_poll > peer->maxpoll)
1998		sys_poll = peer->maxpoll;
1999	poll_update(peer, sys_poll);
2000	sys_stratum = min(peer->stratum + 1, STRATUM_UNSPEC);
2001	if (   peer->stratum == STRATUM_REFCLOCK
2002	    || peer->stratum == STRATUM_UNSPEC)
2003		sys_refid = peer->refid;
2004	else
2005		sys_refid = addr2refid(&peer->srcadr);
2006	/*
2007	 * Root Dispersion (E) is defined (in RFC 5905) as:
2008	 *
2009	 * E = p.epsilon_r + p.epsilon + p.psi + PHI*(s.t - p.t) + |THETA|
2010	 *
2011	 * where:
2012	 *  p.epsilon_r is the PollProc's root dispersion
2013	 *  p.epsilon   is the PollProc's dispersion
2014	 *  p.psi       is the PollProc's jitter
2015	 *  THETA       is the combined offset
2016	 *
2017	 * NB: Think Hard about where these numbers come from and
2018	 * what they mean.  When did peer->update happen?  Has anything
2019	 * interesting happened since then?  What values are the most
2020	 * defensible?  Why?
2021	 *
2022	 * DLM thinks this equation is probably the best of all worse choices.
2023	 */
2024	dtemp	= peer->rootdisp
2025		+ peer->disp
2026		+ sys_jitter
2027		+ clock_phi * (current_time - peer->update)
2028		+ fabs(sys_offset);
2029
2030	if (dtemp > sys_mindisp)
2031		sys_rootdisp = dtemp;
2032	else
2033		sys_rootdisp = sys_mindisp;
2034	sys_rootdelay = peer->delay + peer->rootdelay;
2035	sys_reftime = peer->dst;
2036
2037	DPRINTF(1, ("clock_update: at %lu sample %lu associd %d\n",
2038		    current_time, peer->epoch, peer->associd));
2039
2040	/*
2041	 * Comes now the moment of truth. Crank the clock discipline and
2042	 * see what comes out.
2043	 */
2044	switch (local_clock(peer, sys_offset)) {
2045
2046	/*
2047	 * Clock exceeds panic threshold. Life as we know it ends.
2048	 */
2049	case -1:
2050#ifdef HAVE_LIBSCF_H
2051		/*
2052		 * For Solaris enter the maintenance mode.
2053		 */
2054		if ((fmri = getenv("SMF_FMRI")) != NULL) {
2055			if (smf_maintain_instance(fmri, 0) < 0) {
2056				printf("smf_maintain_instance: %s\n",
2057				    scf_strerror(scf_error()));
2058				exit(1);
2059			}
2060			/*
2061			 * Sleep until SMF kills us.
2062			 */
2063			for (;;)
2064				pause();
2065		}
2066#endif /* HAVE_LIBSCF_H */
2067		exit (-1);
2068		/* not reached */
2069
2070	/*
2071	 * Clock was stepped. Flush all time values of all peers.
2072	 */
2073	case 2:
2074		clear_all();
2075		set_sys_leap(LEAP_NOTINSYNC);
2076		sys_stratum = STRATUM_UNSPEC;
2077		memcpy(&sys_refid, "STEP", 4);
2078		sys_rootdelay = 0;
2079		sys_rootdisp = 0;
2080		L_CLR(&sys_reftime);
2081		sys_jitter = LOGTOD(sys_precision);
2082		leapsec_reset_frame();
2083		break;
2084
2085	/*
2086	 * Clock was slewed. Handle the leapsecond stuff.
2087	 */
2088	case 1:
2089
2090		/*
2091		 * If this is the first time the clock is set, reset the
2092		 * leap bits. If crypto, the timer will goose the setup
2093		 * process.
2094		 */
2095		if (sys_leap == LEAP_NOTINSYNC) {
2096			set_sys_leap(LEAP_NOWARNING);
2097#ifdef AUTOKEY
2098			if (crypto_flags)
2099				crypto_update();
2100#endif	/* AUTOKEY */
2101			/*
2102			 * If our parent process is waiting for the
2103			 * first clock sync, send them home satisfied.
2104			 */
2105#ifdef HAVE_WORKING_FORK
2106			if (waitsync_fd_to_close != -1) {
2107				close(waitsync_fd_to_close);
2108				waitsync_fd_to_close = -1;
2109				DPRINTF(1, ("notified parent --wait-sync is done\n"));
2110			}
2111#endif /* HAVE_WORKING_FORK */
2112
2113		}
2114
2115		/*
2116		 * If there is no leap second pending and the number of
2117		 * survivor leap bits is greater than half the number of
2118		 * survivors, try to schedule a leap for the end of the
2119		 * current month. (This only works if no leap second for
2120		 * that range is in the table, so doing this more than
2121		 * once is mostly harmless.)
2122		 */
2123		if (leapsec == LSPROX_NOWARN) {
2124			if (   leap_vote_ins > leap_vote_del
2125			    && leap_vote_ins > sys_survivors / 2) {
2126				get_systime(&now);
2127				leapsec_add_dyn(TRUE, now.l_ui, NULL);
2128			}
2129			if (   leap_vote_del > leap_vote_ins
2130			    && leap_vote_del > sys_survivors / 2) {
2131				get_systime(&now);
2132				leapsec_add_dyn(FALSE, now.l_ui, NULL);
2133			}
2134		}
2135		break;
2136
2137	/*
2138	 * Popcorn spike or step threshold exceeded. Pretend it never
2139	 * happened.
2140	 */
2141	default:
2142		break;
2143	}
2144}
2145
2146
2147/*
2148 * poll_update - update peer poll interval
2149 */
2150void
2151poll_update(
2152	struct peer *peer,	/* peer structure pointer */
2153	u_char	mpoll
2154	)
2155{
2156	u_long	next, utemp;
2157	u_char	hpoll;
2158
2159	/*
2160	 * This routine figures out when the next poll should be sent.
2161	 * That turns out to be wickedly complicated. One problem is
2162	 * that sometimes the time for the next poll is in the past when
2163	 * the poll interval is reduced. We watch out for races here
2164	 * between the receive process and the poll process.
2165	 *
2166	 * Clamp the poll interval between minpoll and maxpoll.
2167	 */
2168	hpoll = max(min(peer->maxpoll, mpoll), peer->minpoll);
2169
2170#ifdef AUTOKEY
2171	/*
2172	 * If during the crypto protocol the poll interval has changed,
2173	 * the lifetimes in the key list are probably bogus. Purge the
2174	 * the key list and regenerate it later.
2175	 */
2176	if ((peer->flags & FLAG_SKEY) && hpoll != peer->hpoll)
2177		key_expire(peer);
2178#endif	/* AUTOKEY */
2179	peer->hpoll = hpoll;
2180
2181	/*
2182	 * There are three variables important for poll scheduling, the
2183	 * current time (current_time), next scheduled time (nextdate)
2184	 * and the earliest time (utemp). The earliest time is 2 s
2185	 * seconds, but could be more due to rate management. When
2186	 * sending in a burst, use the earliest time. When not in a
2187	 * burst but with a reply pending, send at the earliest time
2188	 * unless the next scheduled time has not advanced. This can
2189	 * only happen if multiple replies are pending in the same
2190	 * response interval. Otherwise, send at the later of the next
2191	 * scheduled time and the earliest time.
2192	 *
2193	 * Now we figure out if there is an override. If a burst is in
2194	 * progress and we get called from the receive process, just
2195	 * slink away. If called from the poll process, delay 1 s for a
2196	 * reference clock, otherwise 2 s.
2197	 */
2198	utemp = current_time + max(peer->throttle - (NTP_SHIFT - 1) *
2199	    (1 << peer->minpoll), ntp_minpkt);
2200	if (peer->burst > 0) {
2201		if (peer->nextdate > current_time)
2202			return;
2203#ifdef REFCLOCK
2204		else if (peer->flags & FLAG_REFCLOCK)
2205			peer->nextdate = current_time + RESP_DELAY;
2206#endif /* REFCLOCK */
2207		else
2208			peer->nextdate = utemp;
2209
2210#ifdef AUTOKEY
2211	/*
2212	 * If a burst is not in progress and a crypto response message
2213	 * is pending, delay 2 s, but only if this is a new interval.
2214	 */
2215	} else if (peer->cmmd != NULL) {
2216		if (peer->nextdate > current_time) {
2217			if (peer->nextdate + ntp_minpkt != utemp)
2218				peer->nextdate = utemp;
2219		} else {
2220			peer->nextdate = utemp;
2221		}
2222#endif	/* AUTOKEY */
2223
2224	/*
2225	 * The ordinary case. If a retry, use minpoll; if unreachable,
2226	 * use host poll; otherwise, use the minimum of host and peer
2227	 * polls; In other words, oversampling is okay but
2228	 * understampling is evil. Use the maximum of this value and the
2229	 * headway. If the average headway is greater than the headway
2230	 * threshold, increase the headway by the minimum interval.
2231	 */
2232	} else {
2233		if (peer->retry > 0)
2234			hpoll = peer->minpoll;
2235		else if (!(peer->reach))
2236			hpoll = peer->hpoll;
2237		else
2238			hpoll = min(peer->ppoll, peer->hpoll);
2239#ifdef REFCLOCK
2240		if (peer->flags & FLAG_REFCLOCK)
2241			next = 1 << hpoll;
2242		else
2243#endif /* REFCLOCK */
2244			next = ((0x1000UL | (ntp_random() & 0x0ff)) <<
2245			    hpoll) >> 12;
2246		next += peer->outdate;
2247		if (next > utemp)
2248			peer->nextdate = next;
2249		else
2250			peer->nextdate = utemp;
2251		if (peer->throttle > (1 << peer->minpoll))
2252			peer->nextdate += ntp_minpkt;
2253	}
2254	DPRINTF(2, ("poll_update: at %lu %s poll %d burst %d retry %d head %d early %lu next %lu\n",
2255		    current_time, ntoa(&peer->srcadr), peer->hpoll,
2256		    peer->burst, peer->retry, peer->throttle,
2257		    utemp - current_time, peer->nextdate -
2258		    current_time));
2259}
2260
2261
2262/*
2263 * peer_clear - clear peer filter registers.  See Section 3.4.8 of the
2264 * spec.
2265 */
2266void
2267peer_clear(
2268	struct peer *peer,		/* peer structure */
2269	const char *ident		/* tally lights */
2270	)
2271{
2272	u_char	u;
2273
2274#ifdef AUTOKEY
2275	/*
2276	 * If cryptographic credentials have been acquired, toss them to
2277	 * Valhalla. Note that autokeys are ephemeral, in that they are
2278	 * tossed immediately upon use. Therefore, the keylist can be
2279	 * purged anytime without needing to preserve random keys. Note
2280	 * that, if the peer is purged, the cryptographic variables are
2281	 * purged, too. This makes it much harder to sneak in some
2282	 * unauthenticated data in the clock filter.
2283	 */
2284	key_expire(peer);
2285	if (peer->iffval != NULL)
2286		BN_free(peer->iffval);
2287	value_free(&peer->cookval);
2288	value_free(&peer->recval);
2289	value_free(&peer->encrypt);
2290	value_free(&peer->sndval);
2291	if (peer->cmmd != NULL)
2292		free(peer->cmmd);
2293	if (peer->subject != NULL)
2294		free(peer->subject);
2295	if (peer->issuer != NULL)
2296		free(peer->issuer);
2297#endif /* AUTOKEY */
2298
2299	/*
2300	 * Clear all values, including the optional crypto values above.
2301	 */
2302	memset(CLEAR_TO_ZERO(peer), 0, LEN_CLEAR_TO_ZERO(peer));
2303	peer->ppoll = peer->maxpoll;
2304	peer->hpoll = peer->minpoll;
2305	peer->disp = MAXDISPERSE;
2306	peer->flash = peer_unfit(peer);
2307	peer->jitter = LOGTOD(sys_precision);
2308
2309	/*
2310	 * If interleave mode, initialize the alternate origin switch.
2311	 */
2312	if (peer->flags & FLAG_XLEAVE)
2313		peer->flip = 1;
2314	for (u = 0; u < NTP_SHIFT; u++) {
2315		peer->filter_order[u] = u;
2316		peer->filter_disp[u] = MAXDISPERSE;
2317	}
2318#ifdef REFCLOCK
2319	if (!(peer->flags & FLAG_REFCLOCK)) {
2320#endif
2321		peer->leap = LEAP_NOTINSYNC;
2322		peer->stratum = STRATUM_UNSPEC;
2323		memcpy(&peer->refid, ident, 4);
2324#ifdef REFCLOCK
2325	}
2326#endif
2327
2328	/*
2329	 * During initialization use the association count to spread out
2330	 * the polls at one-second intervals. Passive associations'
2331	 * first poll is delayed by the "discard minimum" to avoid rate
2332	 * limiting. Other post-startup new or cleared associations
2333	 * randomize the first poll over the minimum poll interval to
2334	 * avoid implosion.
2335	 */
2336	peer->nextdate = peer->update = peer->outdate = current_time;
2337	if (initializing) {
2338		peer->nextdate += peer_associations;
2339	} else if (MODE_PASSIVE == peer->hmode) {
2340		peer->nextdate += ntp_minpkt;
2341	} else {
2342		peer->nextdate += ntp_random() % peer->minpoll;
2343	}
2344#ifdef AUTOKEY
2345	peer->refresh = current_time + (1 << NTP_REFRESH);
2346#endif	/* AUTOKEY */
2347	DPRINTF(1, ("peer_clear: at %ld next %ld associd %d refid %s\n",
2348		    current_time, peer->nextdate, peer->associd,
2349		    ident));
2350}
2351
2352
2353/*
2354 * clock_filter - add incoming clock sample to filter register and run
2355 *		  the filter procedure to find the best sample.
2356 */
2357void
2358clock_filter(
2359	struct peer *peer,		/* peer structure pointer */
2360	double	sample_offset,		/* clock offset */
2361	double	sample_delay,		/* roundtrip delay */
2362	double	sample_disp		/* dispersion */
2363	)
2364{
2365	double	dst[NTP_SHIFT];		/* distance vector */
2366	int	ord[NTP_SHIFT];		/* index vector */
2367	int	i, j, k, m;
2368	double	dtemp, etemp;
2369	char	tbuf[80];
2370
2371	/*
2372	 * A sample consists of the offset, delay, dispersion and epoch
2373	 * of arrival. The offset and delay are determined by the on-
2374	 * wire protocol. The dispersion grows from the last outbound
2375	 * packet to the arrival of this one increased by the sum of the
2376	 * peer precision and the system precision as required by the
2377	 * error budget. First, shift the new arrival into the shift
2378	 * register discarding the oldest one.
2379	 */
2380	j = peer->filter_nextpt;
2381	peer->filter_offset[j] = sample_offset;
2382	peer->filter_delay[j] = sample_delay;
2383	peer->filter_disp[j] = sample_disp;
2384	peer->filter_epoch[j] = current_time;
2385	j = (j + 1) % NTP_SHIFT;
2386	peer->filter_nextpt = j;
2387
2388	/*
2389	 * Update dispersions since the last update and at the same
2390	 * time initialize the distance and index lists. Since samples
2391	 * become increasingly uncorrelated beyond the Allan intercept,
2392	 * only under exceptional cases will an older sample be used.
2393	 * Therefore, the distance list uses a compound metric. If the
2394	 * dispersion is greater than the maximum dispersion, clamp the
2395	 * distance at that value. If the time since the last update is
2396	 * less than the Allan intercept use the delay; otherwise, use
2397	 * the sum of the delay and dispersion.
2398	 */
2399	dtemp = clock_phi * (current_time - peer->update);
2400	peer->update = current_time;
2401	for (i = NTP_SHIFT - 1; i >= 0; i--) {
2402		if (i != 0)
2403			peer->filter_disp[j] += dtemp;
2404		if (peer->filter_disp[j] >= MAXDISPERSE) {
2405			peer->filter_disp[j] = MAXDISPERSE;
2406			dst[i] = MAXDISPERSE;
2407		} else if (peer->update - peer->filter_epoch[j] >
2408		    (u_long)ULOGTOD(allan_xpt)) {
2409			dst[i] = peer->filter_delay[j] +
2410			    peer->filter_disp[j];
2411		} else {
2412			dst[i] = peer->filter_delay[j];
2413		}
2414		ord[i] = j;
2415		j = (j + 1) % NTP_SHIFT;
2416	}
2417
2418	/*
2419	 * If the clock has stabilized, sort the samples by distance.
2420	 */
2421	if (freq_cnt == 0) {
2422		for (i = 1; i < NTP_SHIFT; i++) {
2423			for (j = 0; j < i; j++) {
2424				if (dst[j] > dst[i]) {
2425					k = ord[j];
2426					ord[j] = ord[i];
2427					ord[i] = k;
2428					etemp = dst[j];
2429					dst[j] = dst[i];
2430					dst[i] = etemp;
2431				}
2432			}
2433		}
2434	}
2435
2436	/*
2437	 * Copy the index list to the association structure so ntpq
2438	 * can see it later. Prune the distance list to leave only
2439	 * samples less than the maximum dispersion, which disfavors
2440	 * uncorrelated samples older than the Allan intercept. To
2441	 * further improve the jitter estimate, of the remainder leave
2442	 * only samples less than the maximum distance, but keep at
2443	 * least two samples for jitter calculation.
2444	 */
2445	m = 0;
2446	for (i = 0; i < NTP_SHIFT; i++) {
2447		peer->filter_order[i] = (u_char) ord[i];
2448		if (   dst[i] >= MAXDISPERSE
2449		    || (m >= 2 && dst[i] >= sys_maxdist))
2450			continue;
2451		m++;
2452	}
2453
2454	/*
2455	 * Compute the dispersion and jitter. The dispersion is weighted
2456	 * exponentially by NTP_FWEIGHT (0.5) so it is normalized close
2457	 * to 1.0. The jitter is the RMS differences relative to the
2458	 * lowest delay sample.
2459	 */
2460	peer->disp = peer->jitter = 0;
2461	k = ord[0];
2462	for (i = NTP_SHIFT - 1; i >= 0; i--) {
2463		j = ord[i];
2464		peer->disp = NTP_FWEIGHT * (peer->disp +
2465		    peer->filter_disp[j]);
2466		if (i < m)
2467			peer->jitter += DIFF(peer->filter_offset[j],
2468			    peer->filter_offset[k]);
2469	}
2470
2471	/*
2472	 * If no acceptable samples remain in the shift register,
2473	 * quietly tiptoe home leaving only the dispersion. Otherwise,
2474	 * save the offset, delay and jitter. Note the jitter must not
2475	 * be less than the precision.
2476	 */
2477	if (m == 0) {
2478		clock_select();
2479		return;
2480	}
2481	etemp = fabs(peer->offset - peer->filter_offset[k]);
2482	peer->offset = peer->filter_offset[k];
2483	peer->delay = peer->filter_delay[k];
2484	if (m > 1)
2485		peer->jitter /= m - 1;
2486	peer->jitter = max(SQRT(peer->jitter), LOGTOD(sys_precision));
2487
2488	/*
2489	 * If the the new sample and the current sample are both valid
2490	 * and the difference between their offsets exceeds CLOCK_SGATE
2491	 * (3) times the jitter and the interval between them is less
2492	 * than twice the host poll interval, consider the new sample
2493	 * a popcorn spike and ignore it.
2494	 */
2495	if (   peer->disp < sys_maxdist
2496	    && peer->filter_disp[k] < sys_maxdist
2497	    && etemp > CLOCK_SGATE * peer->jitter
2498	    && peer->filter_epoch[k] - peer->epoch
2499	       < 2. * ULOGTOD(peer->hpoll)) {
2500		snprintf(tbuf, sizeof(tbuf), "%.6f s", etemp);
2501		report_event(PEVNT_POPCORN, peer, tbuf);
2502		return;
2503	}
2504
2505	/*
2506	 * A new minimum sample is useful only if it is later than the
2507	 * last one used. In this design the maximum lifetime of any
2508	 * sample is not greater than eight times the poll interval, so
2509	 * the maximum interval between minimum samples is eight
2510	 * packets.
2511	 */
2512	if (peer->filter_epoch[k] <= peer->epoch) {
2513	DPRINTF(2, ("clock_filter: old sample %lu\n", current_time -
2514		    peer->filter_epoch[k]));
2515		return;
2516	}
2517	peer->epoch = peer->filter_epoch[k];
2518
2519	/*
2520	 * The mitigated sample statistics are saved for later
2521	 * processing. If not synchronized or not in a burst, tickle the
2522	 * clock select algorithm.
2523	 */
2524	record_peer_stats(&peer->srcadr, ctlpeerstatus(peer),
2525	    peer->offset, peer->delay, peer->disp, peer->jitter);
2526	DPRINTF(1, ("clock_filter: n %d off %.6f del %.6f dsp %.6f jit %.6f\n",
2527		    m, peer->offset, peer->delay, peer->disp,
2528		    peer->jitter));
2529	if (peer->burst == 0 || sys_leap == LEAP_NOTINSYNC)
2530		clock_select();
2531}
2532
2533
2534/*
2535 * clock_select - find the pick-of-the-litter clock
2536 *
2537 * LOCKCLOCK: (1) If the local clock is the prefer peer, it will always
2538 * be enabled, even if declared falseticker, (2) only the prefer peer
2539 * can be selected as the system peer, (3) if the external source is
2540 * down, the system leap bits are set to 11 and the stratum set to
2541 * infinity.
2542 */
2543void
2544clock_select(void)
2545{
2546	struct peer *peer;
2547	int	i, j, k, n;
2548	int	nlist, nl2;
2549	int	allow;
2550	int	speer;
2551	double	d, e, f, g;
2552	double	high, low;
2553	double	speermet;
2554	double	orphmet = 2.0 * U_INT32_MAX; /* 2x is greater than */
2555	struct endpoint endp;
2556	struct peer *osys_peer;
2557	struct peer *sys_prefer = NULL;	/* prefer peer */
2558	struct peer *typesystem = NULL;
2559	struct peer *typeorphan = NULL;
2560#ifdef REFCLOCK
2561	struct peer *typeacts = NULL;
2562	struct peer *typelocal = NULL;
2563	struct peer *typepps = NULL;
2564#endif /* REFCLOCK */
2565	static struct endpoint *endpoint = NULL;
2566	static int *indx = NULL;
2567	static peer_select *peers = NULL;
2568	static u_int endpoint_size = 0;
2569	static u_int peers_size = 0;
2570	static u_int indx_size = 0;
2571	size_t octets;
2572
2573	/*
2574	 * Initialize and create endpoint, index and peer lists big
2575	 * enough to handle all associations.
2576	 */
2577	osys_peer = sys_peer;
2578	sys_survivors = 0;
2579#ifdef LOCKCLOCK
2580	set_sys_leap(LEAP_NOTINSYNC);
2581	sys_stratum = STRATUM_UNSPEC;
2582	memcpy(&sys_refid, "DOWN", 4);
2583#endif /* LOCKCLOCK */
2584
2585	/*
2586	 * Allocate dynamic space depending on the number of
2587	 * associations.
2588	 */
2589	nlist = 1;
2590	for (peer = peer_list; peer != NULL; peer = peer->p_link)
2591		nlist++;
2592	endpoint_size = ALIGNED_SIZE(nlist * 2 * sizeof(*endpoint));
2593	peers_size = ALIGNED_SIZE(nlist * sizeof(*peers));
2594	indx_size = ALIGNED_SIZE(nlist * 2 * sizeof(*indx));
2595	octets = endpoint_size + peers_size + indx_size;
2596	endpoint = erealloc(endpoint, octets);
2597	peers = INC_ALIGNED_PTR(endpoint, endpoint_size);
2598	indx = INC_ALIGNED_PTR(peers, peers_size);
2599
2600	/*
2601	 * Initially, we populate the island with all the rifraff peers
2602	 * that happen to be lying around. Those with seriously
2603	 * defective clocks are immediately booted off the island. Then,
2604	 * the falsetickers are culled and put to sea. The truechimers
2605	 * remaining are subject to repeated rounds where the most
2606	 * unpopular at each round is kicked off. When the population
2607	 * has dwindled to sys_minclock, the survivors split a million
2608	 * bucks and collectively crank the chimes.
2609	 */
2610	nlist = nl2 = 0;	/* none yet */
2611	for (peer = peer_list; peer != NULL; peer = peer->p_link) {
2612		peer->new_status = CTL_PST_SEL_REJECT;
2613
2614		/*
2615		 * Leave the island immediately if the peer is
2616		 * unfit to synchronize.
2617		 */
2618		if (peer_unfit(peer))
2619			continue;
2620
2621		/*
2622		 * If this peer is an orphan parent, elect the
2623		 * one with the lowest metric defined as the
2624		 * IPv4 address or the first 64 bits of the
2625		 * hashed IPv6 address.  To ensure convergence
2626		 * on the same selected orphan, consider as
2627		 * well that this system may have the lowest
2628		 * metric and be the orphan parent.  If this
2629		 * system wins, sys_peer will be NULL to trigger
2630		 * orphan mode in timer().
2631		 */
2632		if (peer->stratum == sys_orphan) {
2633			u_int32	localmet;
2634			u_int32 peermet;
2635
2636			if (peer->dstadr != NULL)
2637				localmet = ntohl(peer->dstadr->addr_refid);
2638			else
2639				localmet = U_INT32_MAX;
2640			peermet = ntohl(addr2refid(&peer->srcadr));
2641			if (peermet < localmet && peermet < orphmet) {
2642				typeorphan = peer;
2643				orphmet = peermet;
2644			}
2645			continue;
2646		}
2647
2648		/*
2649		 * If this peer could have the orphan parent
2650		 * as a synchronization ancestor, exclude it
2651		 * from selection to avoid forming a
2652		 * synchronization loop within the orphan mesh,
2653		 * triggering stratum climb to infinity
2654		 * instability.  Peers at stratum higher than
2655		 * the orphan stratum could have the orphan
2656		 * parent in ancestry so are excluded.
2657		 * See http://bugs.ntp.org/2050
2658		 */
2659		if (peer->stratum > sys_orphan)
2660			continue;
2661#ifdef REFCLOCK
2662		/*
2663		 * The following are special cases. We deal
2664		 * with them later.
2665		 */
2666		if (!(peer->flags & FLAG_PREFER)) {
2667			switch (peer->refclktype) {
2668			case REFCLK_LOCALCLOCK:
2669				if (   current_time > orphwait
2670				    && typelocal == NULL)
2671					typelocal = peer;
2672				continue;
2673
2674			case REFCLK_ACTS:
2675				if (   current_time > orphwait
2676				    && typeacts == NULL)
2677					typeacts = peer;
2678				continue;
2679			}
2680		}
2681#endif /* REFCLOCK */
2682
2683		/*
2684		 * If we get this far, the peer can stay on the
2685		 * island, but does not yet have the immunity
2686		 * idol.
2687		 */
2688		peer->new_status = CTL_PST_SEL_SANE;
2689		f = root_distance(peer);
2690		peers[nlist].peer = peer;
2691		peers[nlist].error = peer->jitter;
2692		peers[nlist].synch = f;
2693		nlist++;
2694
2695		/*
2696		 * Insert each interval endpoint on the unsorted
2697		 * endpoint[] list.
2698		 */
2699		e = peer->offset;
2700		endpoint[nl2].type = -1;	/* lower end */
2701		endpoint[nl2].val = e - f;
2702		nl2++;
2703		endpoint[nl2].type = 1;		/* upper end */
2704		endpoint[nl2].val = e + f;
2705		nl2++;
2706	}
2707	/*
2708	 * Construct sorted indx[] of endpoint[] indexes ordered by
2709	 * offset.
2710	 */
2711	for (i = 0; i < nl2; i++)
2712		indx[i] = i;
2713	for (i = 0; i < nl2; i++) {
2714		endp = endpoint[indx[i]];
2715		e = endp.val;
2716		k = i;
2717		for (j = i + 1; j < nl2; j++) {
2718			endp = endpoint[indx[j]];
2719			if (endp.val < e) {
2720				e = endp.val;
2721				k = j;
2722			}
2723		}
2724		if (k != i) {
2725			j = indx[k];
2726			indx[k] = indx[i];
2727			indx[i] = j;
2728		}
2729	}
2730	for (i = 0; i < nl2; i++)
2731		DPRINTF(3, ("select: endpoint %2d %.6f\n",
2732			endpoint[indx[i]].type, endpoint[indx[i]].val));
2733
2734	/*
2735	 * This is the actual algorithm that cleaves the truechimers
2736	 * from the falsetickers. The original algorithm was described
2737	 * in Keith Marzullo's dissertation, but has been modified for
2738	 * better accuracy.
2739	 *
2740	 * Briefly put, we first assume there are no falsetickers, then
2741	 * scan the candidate list first from the low end upwards and
2742	 * then from the high end downwards. The scans stop when the
2743	 * number of intersections equals the number of candidates less
2744	 * the number of falsetickers. If this doesn't happen for a
2745	 * given number of falsetickers, we bump the number of
2746	 * falsetickers and try again. If the number of falsetickers
2747	 * becomes equal to or greater than half the number of
2748	 * candidates, the Albanians have won the Byzantine wars and
2749	 * correct synchronization is not possible.
2750	 *
2751	 * Here, nlist is the number of candidates and allow is the
2752	 * number of falsetickers. Upon exit, the truechimers are the
2753	 * survivors with offsets not less than low and not greater than
2754	 * high. There may be none of them.
2755	 */
2756	low = 1e9;
2757	high = -1e9;
2758	for (allow = 0; 2 * allow < nlist; allow++) {
2759
2760		/*
2761		 * Bound the interval (low, high) as the smallest
2762		 * interval containing points from the most sources.
2763		 */
2764		n = 0;
2765		for (i = 0; i < nl2; i++) {
2766			low = endpoint[indx[i]].val;
2767			n -= endpoint[indx[i]].type;
2768			if (n >= nlist - allow)
2769				break;
2770		}
2771		n = 0;
2772		for (j = nl2 - 1; j >= 0; j--) {
2773			high = endpoint[indx[j]].val;
2774			n += endpoint[indx[j]].type;
2775			if (n >= nlist - allow)
2776				break;
2777		}
2778
2779		/*
2780		 * If an interval containing truechimers is found, stop.
2781		 * If not, increase the number of falsetickers and go
2782		 * around again.
2783		 */
2784		if (high > low)
2785			break;
2786	}
2787
2788	/*
2789	 * Clustering algorithm. Whittle candidate list of falsetickers,
2790	 * who leave the island immediately. The TRUE peer is always a
2791	 * truechimer. We must leave at least one peer to collect the
2792	 * million bucks.
2793	 *
2794	 * We assert the correct time is contained in the interval, but
2795	 * the best offset estimate for the interval might not be
2796	 * contained in the interval. For this purpose, a truechimer is
2797	 * defined as the midpoint of an interval that overlaps the
2798	 * intersection interval.
2799	 */
2800	j = 0;
2801	for (i = 0; i < nlist; i++) {
2802		double	h;
2803
2804		peer = peers[i].peer;
2805		h = peers[i].synch;
2806		if ((   high <= low
2807		     || peer->offset + h < low
2808		     || peer->offset - h > high
2809		    ) && !(peer->flags & FLAG_TRUE))
2810			continue;
2811
2812#ifdef REFCLOCK
2813		/*
2814		 * Eligible PPS peers must survive the intersection
2815		 * algorithm. Use the first one found, but don't
2816		 * include any of them in the cluster population.
2817		 */
2818		if (peer->flags & FLAG_PPS) {
2819			if (typepps == NULL)
2820				typepps = peer;
2821			if (!(peer->flags & FLAG_TSTAMP_PPS))
2822				continue;
2823		}
2824#endif /* REFCLOCK */
2825
2826		if (j != i)
2827			peers[j] = peers[i];
2828		j++;
2829	}
2830	nlist = j;
2831
2832	/*
2833	 * If no survivors remain at this point, check if the modem
2834	 * driver, local driver or orphan parent in that order. If so,
2835	 * nominate the first one found as the only survivor.
2836	 * Otherwise, give up and leave the island to the rats.
2837	 */
2838	if (nlist == 0) {
2839		peers[0].error = 0;
2840		peers[0].synch = sys_mindisp;
2841#ifdef REFCLOCK
2842		if (typeacts != NULL) {
2843			peers[0].peer = typeacts;
2844			nlist = 1;
2845		} else if (typelocal != NULL) {
2846			peers[0].peer = typelocal;
2847			nlist = 1;
2848		} else
2849#endif /* REFCLOCK */
2850		if (typeorphan != NULL) {
2851			peers[0].peer = typeorphan;
2852			nlist = 1;
2853		}
2854	}
2855
2856	/*
2857	 * Mark the candidates at this point as truechimers.
2858	 */
2859	for (i = 0; i < nlist; i++) {
2860		peers[i].peer->new_status = CTL_PST_SEL_SELCAND;
2861		DPRINTF(2, ("select: survivor %s %f\n",
2862			stoa(&peers[i].peer->srcadr), peers[i].synch));
2863	}
2864
2865	/*
2866	 * Now, vote outliers off the island by select jitter weighted
2867	 * by root distance. Continue voting as long as there are more
2868	 * than sys_minclock survivors and the select jitter of the peer
2869	 * with the worst metric is greater than the minimum peer
2870	 * jitter. Stop if we are about to discard a TRUE or PREFER
2871	 * peer, who of course have the immunity idol.
2872	 */
2873	while (1) {
2874		d = 1e9;
2875		e = -1e9;
2876		g = 0;
2877		k = 0;
2878		for (i = 0; i < nlist; i++) {
2879			if (peers[i].error < d)
2880				d = peers[i].error;
2881			peers[i].seljit = 0;
2882			if (nlist > 1) {
2883				f = 0;
2884				for (j = 0; j < nlist; j++)
2885					f += DIFF(peers[j].peer->offset,
2886					    peers[i].peer->offset);
2887				peers[i].seljit = SQRT(f / (nlist - 1));
2888			}
2889			if (peers[i].seljit * peers[i].synch > e) {
2890				g = peers[i].seljit;
2891				e = peers[i].seljit * peers[i].synch;
2892				k = i;
2893			}
2894		}
2895		g = max(g, LOGTOD(sys_precision));
2896		if (   nlist <= max(1, sys_minclock)
2897		    || g <= d
2898		    || ((FLAG_TRUE | FLAG_PREFER) & peers[k].peer->flags))
2899			break;
2900
2901		DPRINTF(3, ("select: drop %s seljit %.6f jit %.6f\n",
2902			ntoa(&peers[k].peer->srcadr), g, d));
2903		if (nlist > sys_maxclock)
2904			peers[k].peer->new_status = CTL_PST_SEL_EXCESS;
2905		for (j = k + 1; j < nlist; j++)
2906			peers[j - 1] = peers[j];
2907		nlist--;
2908	}
2909
2910	/*
2911	 * What remains is a list usually not greater than sys_minclock
2912	 * peers. Note that unsynchronized peers cannot survive this
2913	 * far.  Count and mark these survivors.
2914	 *
2915	 * While at it, count the number of leap warning bits found.
2916	 * This will be used later to vote the system leap warning bit.
2917	 * If a leap warning bit is found on a reference clock, the vote
2918	 * is always won.
2919	 *
2920	 * Choose the system peer using a hybrid metric composed of the
2921	 * selection jitter scaled by the root distance augmented by
2922	 * stratum scaled by sys_mindisp (.001 by default). The goal of
2923	 * the small stratum factor is to avoid clockhop between a
2924	 * reference clock and a network peer which has a refclock and
2925	 * is using an older ntpd, which does not floor sys_rootdisp at
2926	 * sys_mindisp.
2927	 *
2928	 * In contrast, ntpd 4.2.6 and earlier used stratum primarily
2929	 * in selecting the system peer, using a weight of 1 second of
2930	 * additional root distance per stratum.  This heavy bias is no
2931	 * longer appropriate, as the scaled root distance provides a
2932	 * more rational metric carrying the cumulative error budget.
2933	 */
2934	e = 1e9;
2935	speer = 0;
2936	leap_vote_ins = 0;
2937	leap_vote_del = 0;
2938	for (i = 0; i < nlist; i++) {
2939		peer = peers[i].peer;
2940		peer->unreach = 0;
2941		peer->new_status = CTL_PST_SEL_SYNCCAND;
2942		sys_survivors++;
2943		if (peer->leap == LEAP_ADDSECOND) {
2944			if (peer->flags & FLAG_REFCLOCK)
2945				leap_vote_ins = nlist;
2946			else if (leap_vote_ins < nlist)
2947				leap_vote_ins++;
2948		}
2949		if (peer->leap == LEAP_DELSECOND) {
2950			if (peer->flags & FLAG_REFCLOCK)
2951				leap_vote_del = nlist;
2952			else if (leap_vote_del < nlist)
2953				leap_vote_del++;
2954		}
2955		if (peer->flags & FLAG_PREFER)
2956			sys_prefer = peer;
2957		speermet = peers[i].seljit * peers[i].synch +
2958		    peer->stratum * sys_mindisp;
2959		if (speermet < e) {
2960			e = speermet;
2961			speer = i;
2962		}
2963	}
2964
2965	/*
2966	 * Unless there are at least sys_misane survivors, leave the
2967	 * building dark. Otherwise, do a clockhop dance. Ordinarily,
2968	 * use the selected survivor speer. However, if the current
2969	 * system peer is not speer, stay with the current system peer
2970	 * as long as it doesn't get too old or too ugly.
2971	 */
2972	if (nlist > 0 && nlist >= sys_minsane) {
2973		double	x;
2974
2975		typesystem = peers[speer].peer;
2976		if (osys_peer == NULL || osys_peer == typesystem) {
2977			sys_clockhop = 0;
2978		} else if ((x = fabs(typesystem->offset -
2979		    osys_peer->offset)) < sys_mindisp) {
2980			if (sys_clockhop == 0)
2981				sys_clockhop = sys_mindisp;
2982			else
2983				sys_clockhop *= .5;
2984			DPRINTF(1, ("select: clockhop %d %.6f %.6f\n",
2985				j, x, sys_clockhop));
2986			if (fabs(x) < sys_clockhop)
2987				typesystem = osys_peer;
2988			else
2989				sys_clockhop = 0;
2990		} else {
2991			sys_clockhop = 0;
2992		}
2993	}
2994
2995	/*
2996	 * Mitigation rules of the game. We have the pick of the
2997	 * litter in typesystem if any survivors are left. If
2998	 * there is a prefer peer, use its offset and jitter.
2999	 * Otherwise, use the combined offset and jitter of all kitters.
3000	 */
3001	if (typesystem != NULL) {
3002		if (sys_prefer == NULL) {
3003			typesystem->new_status = CTL_PST_SEL_SYSPEER;
3004			clock_combine(peers, sys_survivors, speer);
3005		} else {
3006			typesystem = sys_prefer;
3007			sys_clockhop = 0;
3008			typesystem->new_status = CTL_PST_SEL_SYSPEER;
3009			sys_offset = typesystem->offset;
3010			sys_jitter = typesystem->jitter;
3011		}
3012		DPRINTF(1, ("select: combine offset %.9f jitter %.9f\n",
3013			sys_offset, sys_jitter));
3014	}
3015#ifdef REFCLOCK
3016	/*
3017	 * If a PPS driver is lit and the combined offset is less than
3018	 * 0.4 s, select the driver as the PPS peer and use its offset
3019	 * and jitter. However, if this is the atom driver, use it only
3020	 * if there is a prefer peer or there are no survivors and none
3021	 * are required.
3022	 */
3023	if (   typepps != NULL
3024	    && fabs(sys_offset) < 0.4
3025	    && (   typepps->refclktype != REFCLK_ATOM_PPS
3026		|| (   typepps->refclktype == REFCLK_ATOM_PPS
3027		    && (   sys_prefer != NULL
3028			|| (typesystem == NULL && sys_minsane == 0))))) {
3029		typesystem = typepps;
3030		sys_clockhop = 0;
3031		typesystem->new_status = CTL_PST_SEL_PPS;
3032		sys_offset = typesystem->offset;
3033		sys_jitter = typesystem->jitter;
3034		DPRINTF(1, ("select: pps offset %.9f jitter %.9f\n",
3035			sys_offset, sys_jitter));
3036	}
3037#endif /* REFCLOCK */
3038
3039	/*
3040	 * If there are no survivors at this point, there is no
3041	 * system peer. If so and this is an old update, keep the
3042	 * current statistics, but do not update the clock.
3043	 */
3044	if (typesystem == NULL) {
3045		if (osys_peer != NULL) {
3046			if (sys_orphwait > 0)
3047				orphwait = current_time + sys_orphwait;
3048			report_event(EVNT_NOPEER, NULL, NULL);
3049		}
3050		sys_peer = NULL;
3051		for (peer = peer_list; peer != NULL; peer = peer->p_link)
3052			peer->status = peer->new_status;
3053		return;
3054	}
3055
3056	/*
3057	 * Do not use old data, as this may mess up the clock discipline
3058	 * stability.
3059	 */
3060	if (typesystem->epoch <= sys_epoch)
3061		return;
3062
3063	/*
3064	 * We have found the alpha male. Wind the clock.
3065	 */
3066	if (osys_peer != typesystem)
3067		report_event(PEVNT_NEWPEER, typesystem, NULL);
3068	for (peer = peer_list; peer != NULL; peer = peer->p_link)
3069		peer->status = peer->new_status;
3070	clock_update(typesystem);
3071}
3072
3073
3074static void
3075clock_combine(
3076	peer_select *	peers,	/* survivor list */
3077	int		npeers,	/* number of survivors */
3078	int		syspeer	/* index of sys.peer */
3079	)
3080{
3081	int	i;
3082	double	x, y, z, w;
3083
3084	y = z = w = 0;
3085	for (i = 0; i < npeers; i++) {
3086		x = 1. / peers[i].synch;
3087		y += x;
3088		z += x * peers[i].peer->offset;
3089		w += x * DIFF(peers[i].peer->offset,
3090		    peers[syspeer].peer->offset);
3091	}
3092	sys_offset = z / y;
3093	sys_jitter = SQRT(w / y + SQUARE(peers[syspeer].seljit));
3094}
3095
3096
3097/*
3098 * root_distance - compute synchronization distance from peer to root
3099 */
3100static double
3101root_distance(
3102	struct peer *peer	/* peer structure pointer */
3103	)
3104{
3105	double	dtemp;
3106
3107	/*
3108	 * Root Distance (LAMBDA) is defined as:
3109	 * (delta + DELTA)/2 + epsilon + EPSILON + phi
3110	 *
3111	 * where:
3112	 *  delta   is the round-trip delay
3113	 *  DELTA   is the root delay
3114	 *  epsilon is the remote server precision + local precision
3115	 *	    + (15 usec each second)
3116	 *  EPSILON is the root dispersion
3117	 *  phi     is the peer jitter statistic
3118	 *
3119	 * NB: Think hard about why we are using these values, and what
3120	 * the alternatives are, and the various pros/cons.
3121	 *
3122	 * DLM thinks these are probably the best choices from any of the
3123	 * other worse choices.
3124	 */
3125	dtemp = (peer->delay + peer->rootdelay) / 2
3126		+ LOGTOD(peer->precision)
3127		  + LOGTOD(sys_precision)
3128		  + clock_phi * (current_time - peer->update)
3129		+ peer->rootdisp
3130		+ peer->jitter;
3131	/*
3132	 * Careful squeak here. The value returned must be greater than
3133	 * the minimum root dispersion in order to avoid clockhop with
3134	 * highly precise reference clocks. Note that the root distance
3135	 * cannot exceed the sys_maxdist, as this is the cutoff by the
3136	 * selection algorithm.
3137	 */
3138	if (dtemp < sys_mindisp)
3139		dtemp = sys_mindisp;
3140	return (dtemp);
3141}
3142
3143
3144/*
3145 * peer_xmit - send packet for persistent association.
3146 */
3147static void
3148peer_xmit(
3149	struct peer *peer	/* peer structure pointer */
3150	)
3151{
3152	struct pkt xpkt;	/* transmit packet */
3153	size_t	sendlen, authlen;
3154	keyid_t	xkeyid = 0;	/* transmit key ID */
3155	l_fp	xmt_tx, xmt_ty;
3156
3157	if (!peer->dstadr)	/* drop peers without interface */
3158		return;
3159
3160	xpkt.li_vn_mode = PKT_LI_VN_MODE(sys_leap, peer->version,
3161	    peer->hmode);
3162	xpkt.stratum = STRATUM_TO_PKT(sys_stratum);
3163	xpkt.ppoll = peer->hpoll;
3164	xpkt.precision = sys_precision;
3165	xpkt.refid = sys_refid;
3166	xpkt.rootdelay = HTONS_FP(DTOFP(sys_rootdelay));
3167	xpkt.rootdisp =  HTONS_FP(DTOUFP(sys_rootdisp));
3168	HTONL_FP(&sys_reftime, &xpkt.reftime);
3169	HTONL_FP(&peer->rec, &xpkt.org);
3170	HTONL_FP(&peer->dst, &xpkt.rec);
3171
3172	/*
3173	 * If the received packet contains a MAC, the transmitted packet
3174	 * is authenticated and contains a MAC. If not, the transmitted
3175	 * packet is not authenticated.
3176	 *
3177	 * It is most important when autokey is in use that the local
3178	 * interface IP address be known before the first packet is
3179	 * sent. Otherwise, it is not possible to compute a correct MAC
3180	 * the recipient will accept. Thus, the I/O semantics have to do
3181	 * a little more work. In particular, the wildcard interface
3182	 * might not be usable.
3183	 */
3184	sendlen = LEN_PKT_NOMAC;
3185	if (
3186#ifdef AUTOKEY
3187	    !(peer->flags & FLAG_SKEY) &&
3188#endif	/* !AUTOKEY */
3189	    peer->keyid == 0) {
3190
3191		/*
3192		 * Transmit a-priori timestamps
3193		 */
3194		get_systime(&xmt_tx);
3195		if (peer->flip == 0) {	/* basic mode */
3196			peer->aorg = xmt_tx;
3197			HTONL_FP(&xmt_tx, &xpkt.xmt);
3198		} else {		/* interleaved modes */
3199			if (peer->hmode == MODE_BROADCAST) { /* bcst */
3200				HTONL_FP(&xmt_tx, &xpkt.xmt);
3201				if (peer->flip > 0)
3202					HTONL_FP(&peer->borg,
3203					    &xpkt.org);
3204				else
3205					HTONL_FP(&peer->aorg,
3206					    &xpkt.org);
3207			} else {	/* symmetric */
3208				if (peer->flip > 0)
3209					HTONL_FP(&peer->borg,
3210					    &xpkt.xmt);
3211				else
3212					HTONL_FP(&peer->aorg,
3213					    &xpkt.xmt);
3214			}
3215		}
3216		peer->t21_bytes = sendlen;
3217		sendpkt(&peer->srcadr, peer->dstadr, sys_ttl[peer->ttl],
3218		    &xpkt, sendlen);
3219		peer->sent++;
3220		peer->throttle += (1 << peer->minpoll) - 2;
3221
3222		/*
3223		 * Capture a-posteriori timestamps
3224		 */
3225		get_systime(&xmt_ty);
3226		if (peer->flip != 0) {		/* interleaved modes */
3227			if (peer->flip > 0)
3228				peer->aorg = xmt_ty;
3229			else
3230				peer->borg = xmt_ty;
3231			peer->flip = -peer->flip;
3232		}
3233		L_SUB(&xmt_ty, &xmt_tx);
3234		LFPTOD(&xmt_ty, peer->xleave);
3235		DPRINTF(1, ("peer_xmit: at %ld %s->%s mode %d len %zu xmt %#010x.%08x\n",
3236			    current_time,
3237			    peer->dstadr ? stoa(&peer->dstadr->sin) : "-",
3238		            stoa(&peer->srcadr), peer->hmode, sendlen,
3239			    xmt_tx.l_ui, xmt_tx.l_uf));
3240		return;
3241	}
3242
3243	/*
3244	 * Authentication is enabled, so the transmitted packet must be
3245	 * authenticated. If autokey is enabled, fuss with the various
3246	 * modes; otherwise, symmetric key cryptography is used.
3247	 */
3248#ifdef AUTOKEY
3249	if (peer->flags & FLAG_SKEY) {
3250		struct exten *exten;	/* extension field */
3251
3252		/*
3253		 * The Public Key Dance (PKD): Cryptographic credentials
3254		 * are contained in extension fields, each including a
3255		 * 4-octet length/code word followed by a 4-octet
3256		 * association ID and optional additional data. Optional
3257		 * data includes a 4-octet data length field followed by
3258		 * the data itself. Request messages are sent from a
3259		 * configured association; response messages can be sent
3260		 * from a configured association or can take the fast
3261		 * path without ever matching an association. Response
3262		 * messages have the same code as the request, but have
3263		 * a response bit and possibly an error bit set. In this
3264		 * implementation, a message may contain no more than
3265		 * one command and one or more responses.
3266		 *
3267		 * Cryptographic session keys include both a public and
3268		 * a private componet. Request and response messages
3269		 * using extension fields are always sent with the
3270		 * private component set to zero. Packets without
3271		 * extension fields indlude the private component when
3272		 * the session key is generated.
3273		 */
3274		while (1) {
3275
3276			/*
3277			 * Allocate and initialize a keylist if not
3278			 * already done. Then, use the list in inverse
3279			 * order, discarding keys once used. Keep the
3280			 * latest key around until the next one, so
3281			 * clients can use client/server packets to
3282			 * compute propagation delay.
3283			 *
3284			 * Note that once a key is used from the list,
3285			 * it is retained in the key cache until the
3286			 * next key is used. This is to allow a client
3287			 * to retrieve the encrypted session key
3288			 * identifier to verify authenticity.
3289			 *
3290			 * If for some reason a key is no longer in the
3291			 * key cache, a birthday has happened or the key
3292			 * has expired, so the pseudo-random sequence is
3293			 * broken. In that case, purge the keylist and
3294			 * regenerate it.
3295			 */
3296			if (peer->keynumber == 0)
3297				make_keylist(peer, peer->dstadr);
3298			else
3299				peer->keynumber--;
3300			xkeyid = peer->keylist[peer->keynumber];
3301			if (authistrusted(xkeyid))
3302				break;
3303			else
3304				key_expire(peer);
3305		}
3306		peer->keyid = xkeyid;
3307		exten = NULL;
3308		switch (peer->hmode) {
3309
3310		/*
3311		 * In broadcast server mode the autokey values are
3312		 * required by the broadcast clients. Push them when a
3313		 * new keylist is generated; otherwise, push the
3314		 * association message so the client can request them at
3315		 * other times.
3316		 */
3317		case MODE_BROADCAST:
3318			if (peer->flags & FLAG_ASSOC)
3319				exten = crypto_args(peer, CRYPTO_AUTO |
3320				    CRYPTO_RESP, peer->associd, NULL);
3321			else
3322				exten = crypto_args(peer, CRYPTO_ASSOC |
3323				    CRYPTO_RESP, peer->associd, NULL);
3324			break;
3325
3326		/*
3327		 * In symmetric modes the parameter, certificate,
3328		 * identity, cookie and autokey exchanges are
3329		 * required. The leapsecond exchange is optional. But, a
3330		 * peer will not believe the other peer until the other
3331		 * peer has synchronized, so the certificate exchange
3332		 * might loop until then. If a peer finds a broken
3333		 * autokey sequence, it uses the autokey exchange to
3334		 * retrieve the autokey values. In any case, if a new
3335		 * keylist is generated, the autokey values are pushed.
3336		 */
3337		case MODE_ACTIVE:
3338		case MODE_PASSIVE:
3339
3340			/*
3341			 * Parameter, certificate and identity.
3342			 */
3343			if (!peer->crypto)
3344				exten = crypto_args(peer, CRYPTO_ASSOC,
3345				    peer->associd, hostval.ptr);
3346			else if (!(peer->crypto & CRYPTO_FLAG_CERT))
3347				exten = crypto_args(peer, CRYPTO_CERT,
3348				    peer->associd, peer->issuer);
3349			else if (!(peer->crypto & CRYPTO_FLAG_VRFY))
3350				exten = crypto_args(peer,
3351				    crypto_ident(peer), peer->associd,
3352				    NULL);
3353
3354			/*
3355			 * Cookie and autokey. We request the cookie
3356			 * only when the this peer and the other peer
3357			 * are synchronized. But, this peer needs the
3358			 * autokey values when the cookie is zero. Any
3359			 * time we regenerate the key list, we offer the
3360			 * autokey values without being asked. If for
3361			 * some reason either peer finds a broken
3362			 * autokey sequence, the autokey exchange is
3363			 * used to retrieve the autokey values.
3364			 */
3365			else if (   sys_leap != LEAP_NOTINSYNC
3366				 && peer->leap != LEAP_NOTINSYNC
3367				 && !(peer->crypto & CRYPTO_FLAG_COOK))
3368				exten = crypto_args(peer, CRYPTO_COOK,
3369				    peer->associd, NULL);
3370			else if (!(peer->crypto & CRYPTO_FLAG_AUTO))
3371				exten = crypto_args(peer, CRYPTO_AUTO,
3372				    peer->associd, NULL);
3373			else if (   peer->flags & FLAG_ASSOC
3374				 && peer->crypto & CRYPTO_FLAG_SIGN)
3375				exten = crypto_args(peer, CRYPTO_AUTO |
3376				    CRYPTO_RESP, peer->assoc, NULL);
3377
3378			/*
3379			 * Wait for clock sync, then sign the
3380			 * certificate and retrieve the leapsecond
3381			 * values.
3382			 */
3383			else if (sys_leap == LEAP_NOTINSYNC)
3384				break;
3385
3386			else if (!(peer->crypto & CRYPTO_FLAG_SIGN))
3387				exten = crypto_args(peer, CRYPTO_SIGN,
3388				    peer->associd, hostval.ptr);
3389			else if (!(peer->crypto & CRYPTO_FLAG_LEAP))
3390				exten = crypto_args(peer, CRYPTO_LEAP,
3391				    peer->associd, NULL);
3392			break;
3393
3394		/*
3395		 * In client mode the parameter, certificate, identity,
3396		 * cookie and sign exchanges are required. The
3397		 * leapsecond exchange is optional. If broadcast client
3398		 * mode the same exchanges are required, except that the
3399		 * autokey exchange is substitutes for the cookie
3400		 * exchange, since the cookie is always zero. If the
3401		 * broadcast client finds a broken autokey sequence, it
3402		 * uses the autokey exchange to retrieve the autokey
3403		 * values.
3404		 */
3405		case MODE_CLIENT:
3406
3407			/*
3408			 * Parameter, certificate and identity.
3409			 */
3410			if (!peer->crypto)
3411				exten = crypto_args(peer, CRYPTO_ASSOC,
3412				    peer->associd, hostval.ptr);
3413			else if (!(peer->crypto & CRYPTO_FLAG_CERT))
3414				exten = crypto_args(peer, CRYPTO_CERT,
3415				    peer->associd, peer->issuer);
3416			else if (!(peer->crypto & CRYPTO_FLAG_VRFY))
3417				exten = crypto_args(peer,
3418				    crypto_ident(peer), peer->associd,
3419				    NULL);
3420
3421			/*
3422			 * Cookie and autokey. These are requests, but
3423			 * we use the peer association ID with autokey
3424			 * rather than our own.
3425			 */
3426			else if (!(peer->crypto & CRYPTO_FLAG_COOK))
3427				exten = crypto_args(peer, CRYPTO_COOK,
3428				    peer->associd, NULL);
3429			else if (!(peer->crypto & CRYPTO_FLAG_AUTO))
3430				exten = crypto_args(peer, CRYPTO_AUTO,
3431				    peer->assoc, NULL);
3432
3433			/*
3434			 * Wait for clock sync, then sign the
3435			 * certificate and retrieve the leapsecond
3436			 * values.
3437			 */
3438			else if (sys_leap == LEAP_NOTINSYNC)
3439				break;
3440
3441			else if (!(peer->crypto & CRYPTO_FLAG_SIGN))
3442				exten = crypto_args(peer, CRYPTO_SIGN,
3443				    peer->associd, hostval.ptr);
3444			else if (!(peer->crypto & CRYPTO_FLAG_LEAP))
3445				exten = crypto_args(peer, CRYPTO_LEAP,
3446				    peer->associd, NULL);
3447			break;
3448		}
3449
3450		/*
3451		 * Add a queued extension field if present. This is
3452		 * always a request message, so the reply ID is already
3453		 * in the message. If an error occurs, the error bit is
3454		 * lit in the response.
3455		 */
3456		if (peer->cmmd != NULL) {
3457			u_int32 temp32;
3458
3459			temp32 = CRYPTO_RESP;
3460			peer->cmmd->opcode |= htonl(temp32);
3461			sendlen += crypto_xmit(peer, &xpkt, NULL,
3462			    sendlen, peer->cmmd, 0);
3463			free(peer->cmmd);
3464			peer->cmmd = NULL;
3465		}
3466
3467		/*
3468		 * Add an extension field created above. All but the
3469		 * autokey response message are request messages.
3470		 */
3471		if (exten != NULL) {
3472			if (exten->opcode != 0)
3473				sendlen += crypto_xmit(peer, &xpkt,
3474				    NULL, sendlen, exten, 0);
3475			free(exten);
3476		}
3477
3478		/*
3479		 * Calculate the next session key. Since extension
3480		 * fields are present, the cookie value is zero.
3481		 */
3482		if (sendlen > (int)LEN_PKT_NOMAC) {
3483			session_key(&peer->dstadr->sin, &peer->srcadr,
3484			    xkeyid, 0, 2);
3485		}
3486	}
3487#endif	/* AUTOKEY */
3488
3489	/*
3490	 * Transmit a-priori timestamps
3491	 */
3492	get_systime(&xmt_tx);
3493	if (peer->flip == 0) {		/* basic mode */
3494		peer->aorg = xmt_tx;
3495		HTONL_FP(&xmt_tx, &xpkt.xmt);
3496	} else {			/* interleaved modes */
3497		if (peer->hmode == MODE_BROADCAST) { /* bcst */
3498			HTONL_FP(&xmt_tx, &xpkt.xmt);
3499			if (peer->flip > 0)
3500				HTONL_FP(&peer->borg, &xpkt.org);
3501			else
3502				HTONL_FP(&peer->aorg, &xpkt.org);
3503		} else {		/* symmetric */
3504			if (peer->flip > 0)
3505				HTONL_FP(&peer->borg, &xpkt.xmt);
3506			else
3507				HTONL_FP(&peer->aorg, &xpkt.xmt);
3508		}
3509	}
3510	xkeyid = peer->keyid;
3511	authlen = authencrypt(xkeyid, (u_int32 *)&xpkt, sendlen);
3512	if (authlen == 0) {
3513		report_event(PEVNT_AUTH, peer, "no key");
3514		peer->flash |= TEST5;		/* auth error */
3515		peer->badauth++;
3516		return;
3517	}
3518	sendlen += authlen;
3519#ifdef AUTOKEY
3520	if (xkeyid > NTP_MAXKEY)
3521		authtrust(xkeyid, 0);
3522#endif	/* AUTOKEY */
3523	if (sendlen > sizeof(xpkt)) {
3524		msyslog(LOG_ERR, "peer_xmit: buffer overflow %zu", sendlen);
3525		exit (-1);
3526	}
3527	peer->t21_bytes = sendlen;
3528	sendpkt(&peer->srcadr, peer->dstadr, sys_ttl[peer->ttl], &xpkt,
3529	    sendlen);
3530	peer->sent++;
3531	peer->throttle += (1 << peer->minpoll) - 2;
3532
3533	/*
3534	 * Capture a-posteriori timestamps
3535	 */
3536	get_systime(&xmt_ty);
3537	if (peer->flip != 0) {			/* interleaved modes */
3538		if (peer->flip > 0)
3539			peer->aorg = xmt_ty;
3540		else
3541			peer->borg = xmt_ty;
3542		peer->flip = -peer->flip;
3543	}
3544	L_SUB(&xmt_ty, &xmt_tx);
3545	LFPTOD(&xmt_ty, peer->xleave);
3546#ifdef AUTOKEY
3547	DPRINTF(1, ("peer_xmit: at %ld %s->%s mode %d keyid %08x len %zu index %d\n",
3548		    current_time, latoa(peer->dstadr),
3549		    ntoa(&peer->srcadr), peer->hmode, xkeyid, sendlen,
3550		    peer->keynumber));
3551#else	/* !AUTOKEY follows */
3552	DPRINTF(1, ("peer_xmit: at %ld %s->%s mode %d keyid %08x len %d\n",
3553		    current_time, peer->dstadr ?
3554		    ntoa(&peer->dstadr->sin) : "-",
3555		    ntoa(&peer->srcadr), peer->hmode, xkeyid, sendlen));
3556#endif	/* !AUTOKEY */
3557
3558	return;
3559}
3560
3561
3562#ifdef LEAP_SMEAR
3563
3564static void
3565leap_smear_add_offs(
3566	l_fp *t,
3567	l_fp *t_recv
3568	)
3569{
3570
3571	L_ADD(t, &leap_smear.offset);
3572
3573	return;
3574}
3575
3576#endif  /* LEAP_SMEAR */
3577
3578
3579/*
3580 * fast_xmit - Send packet for nonpersistent association. Note that
3581 * neither the source or destination can be a broadcast address.
3582 */
3583static void
3584fast_xmit(
3585	struct recvbuf *rbufp,	/* receive packet pointer */
3586	int	xmode,		/* receive mode */
3587	keyid_t	xkeyid,		/* transmit key ID */
3588	int	flags		/* restrict mask */
3589	)
3590{
3591	struct pkt xpkt;	/* transmit packet structure */
3592	struct pkt *rpkt;	/* receive packet structure */
3593	l_fp	xmt_tx, xmt_ty;
3594	size_t	sendlen;
3595#ifdef AUTOKEY
3596	u_int32	temp32;
3597#endif
3598
3599	/*
3600	 * Initialize transmit packet header fields from the receive
3601	 * buffer provided. We leave the fields intact as received, but
3602	 * set the peer poll at the maximum of the receive peer poll and
3603	 * the system minimum poll (ntp_minpoll). This is for KoD rate
3604	 * control and not strictly specification compliant, but doesn't
3605	 * break anything.
3606	 *
3607	 * If the gazinta was from a multicast address, the gazoutta
3608	 * must go out another way.
3609	 */
3610	rpkt = &rbufp->recv_pkt;
3611	if (rbufp->dstadr->flags & INT_MCASTOPEN)
3612		rbufp->dstadr = findinterface(&rbufp->recv_srcadr);
3613
3614	/*
3615	 * If this is a kiss-o'-death (KoD) packet, show leap
3616	 * unsynchronized, stratum zero, reference ID the four-character
3617	 * kiss code and system root delay. Note we don't reveal the
3618	 * local time, so these packets can't be used for
3619	 * synchronization.
3620	 */
3621	if (flags & RES_KOD) {
3622		sys_kodsent++;
3623		xpkt.li_vn_mode = PKT_LI_VN_MODE(LEAP_NOTINSYNC,
3624		    PKT_VERSION(rpkt->li_vn_mode), xmode);
3625		xpkt.stratum = STRATUM_PKT_UNSPEC;
3626		xpkt.ppoll = max(rpkt->ppoll, ntp_minpoll);
3627		xpkt.precision = rpkt->precision;
3628		memcpy(&xpkt.refid, "RATE", 4);
3629		xpkt.rootdelay = rpkt->rootdelay;
3630		xpkt.rootdisp = rpkt->rootdisp;
3631		xpkt.reftime = rpkt->reftime;
3632		xpkt.org = rpkt->xmt;
3633		xpkt.rec = rpkt->xmt;
3634		xpkt.xmt = rpkt->xmt;
3635
3636	/*
3637	 * This is a normal packet. Use the system variables.
3638	 */
3639	} else {
3640#ifdef LEAP_SMEAR
3641		/*
3642		 * Make copies of the variables which can be affected by smearing.
3643		 */
3644		l_fp this_ref_time;
3645		l_fp this_recv_time;
3646#endif
3647
3648		/*
3649		 * If we are inside the leap smear interval we add the current smear offset to
3650		 * the packet receive time, to the packet transmit time, and eventually to the
3651		 * reftime to make sure the reftime isn't later than the transmit/receive times.
3652		 */
3653		xpkt.li_vn_mode = PKT_LI_VN_MODE(xmt_leap,
3654		    PKT_VERSION(rpkt->li_vn_mode), xmode);
3655
3656		xpkt.stratum = STRATUM_TO_PKT(sys_stratum);
3657		xpkt.ppoll = max(rpkt->ppoll, ntp_minpoll);
3658		xpkt.precision = sys_precision;
3659		xpkt.refid = sys_refid;
3660		xpkt.rootdelay = HTONS_FP(DTOFP(sys_rootdelay));
3661		xpkt.rootdisp = HTONS_FP(DTOUFP(sys_rootdisp));
3662
3663#ifdef LEAP_SMEAR
3664		this_ref_time = sys_reftime;
3665		if (leap_smear.in_progress) {
3666			leap_smear_add_offs(&this_ref_time, NULL);
3667			xpkt.refid = convertLFPToRefID(leap_smear.offset);
3668			DPRINTF(2, ("fast_xmit: leap_smear.in_progress: refid %8x, smear %s\n",
3669				ntohl(xpkt.refid),
3670				lfptoa(&leap_smear.offset, 8)
3671				));
3672		}
3673		HTONL_FP(&this_ref_time, &xpkt.reftime);
3674#else
3675		HTONL_FP(&sys_reftime, &xpkt.reftime);
3676#endif
3677
3678		xpkt.org = rpkt->xmt;
3679
3680#ifdef LEAP_SMEAR
3681		this_recv_time = rbufp->recv_time;
3682		if (leap_smear.in_progress)
3683			leap_smear_add_offs(&this_recv_time, NULL);
3684		HTONL_FP(&this_recv_time, &xpkt.rec);
3685#else
3686		HTONL_FP(&rbufp->recv_time, &xpkt.rec);
3687#endif
3688
3689		get_systime(&xmt_tx);
3690#ifdef LEAP_SMEAR
3691		if (leap_smear.in_progress)
3692			leap_smear_add_offs(&xmt_tx, &this_recv_time);
3693#endif
3694		HTONL_FP(&xmt_tx, &xpkt.xmt);
3695	}
3696
3697#ifdef HAVE_NTP_SIGND
3698	if (flags & RES_MSSNTP) {
3699		send_via_ntp_signd(rbufp, xmode, xkeyid, flags, &xpkt);
3700		return;
3701	}
3702#endif /* HAVE_NTP_SIGND */
3703
3704	/*
3705	 * If the received packet contains a MAC, the transmitted packet
3706	 * is authenticated and contains a MAC. If not, the transmitted
3707	 * packet is not authenticated.
3708	 */
3709	sendlen = LEN_PKT_NOMAC;
3710	if (rbufp->recv_length == sendlen) {
3711		sendpkt(&rbufp->recv_srcadr, rbufp->dstadr, 0, &xpkt,
3712		    sendlen);
3713		DPRINTF(1, ("fast_xmit: at %ld %s->%s mode %d len %lu\n",
3714			    current_time, stoa(&rbufp->dstadr->sin),
3715			    stoa(&rbufp->recv_srcadr), xmode,
3716			    (u_long)sendlen));
3717		return;
3718	}
3719
3720	/*
3721	 * The received packet contains a MAC, so the transmitted packet
3722	 * must be authenticated. For symmetric key cryptography, use
3723	 * the predefined and trusted symmetric keys to generate the
3724	 * cryptosum. For autokey cryptography, use the server private
3725	 * value to generate the cookie, which is unique for every
3726	 * source-destination-key ID combination.
3727	 */
3728#ifdef AUTOKEY
3729	if (xkeyid > NTP_MAXKEY) {
3730		keyid_t cookie;
3731
3732		/*
3733		 * The only way to get here is a reply to a legitimate
3734		 * client request message, so the mode must be
3735		 * MODE_SERVER. If an extension field is present, there
3736		 * can be only one and that must be a command. Do what
3737		 * needs, but with private value of zero so the poor
3738		 * jerk can decode it. If no extension field is present,
3739		 * use the cookie to generate the session key.
3740		 */
3741		cookie = session_key(&rbufp->recv_srcadr,
3742		    &rbufp->dstadr->sin, 0, sys_private, 0);
3743		if ((size_t)rbufp->recv_length > sendlen + MAX_MAC_LEN) {
3744			session_key(&rbufp->dstadr->sin,
3745			    &rbufp->recv_srcadr, xkeyid, 0, 2);
3746			temp32 = CRYPTO_RESP;
3747			rpkt->exten[0] |= htonl(temp32);
3748			sendlen += crypto_xmit(NULL, &xpkt, rbufp,
3749			    sendlen, (struct exten *)rpkt->exten,
3750			    cookie);
3751		} else {
3752			session_key(&rbufp->dstadr->sin,
3753			    &rbufp->recv_srcadr, xkeyid, cookie, 2);
3754		}
3755	}
3756#endif	/* AUTOKEY */
3757	get_systime(&xmt_tx);
3758	sendlen += authencrypt(xkeyid, (u_int32 *)&xpkt, sendlen);
3759#ifdef AUTOKEY
3760	if (xkeyid > NTP_MAXKEY)
3761		authtrust(xkeyid, 0);
3762#endif	/* AUTOKEY */
3763	sendpkt(&rbufp->recv_srcadr, rbufp->dstadr, 0, &xpkt, sendlen);
3764	get_systime(&xmt_ty);
3765	L_SUB(&xmt_ty, &xmt_tx);
3766	sys_authdelay = xmt_ty;
3767	DPRINTF(1, ("fast_xmit: at %ld %s->%s mode %d keyid %08x len %lu\n",
3768		    current_time, ntoa(&rbufp->dstadr->sin),
3769		    ntoa(&rbufp->recv_srcadr), xmode, xkeyid,
3770		    (u_long)sendlen));
3771}
3772
3773
3774/*
3775 * pool_xmit - resolve hostname or send unicast solicitation for pool.
3776 */
3777static void
3778pool_xmit(
3779	struct peer *pool	/* pool solicitor association */
3780	)
3781{
3782#ifdef WORKER
3783	struct pkt		xpkt;	/* transmit packet structure */
3784	struct addrinfo		hints;
3785	int			rc;
3786	struct interface *	lcladr;
3787	sockaddr_u *		rmtadr;
3788	int			restrict_mask;
3789	struct peer *		p;
3790	l_fp			xmt_tx;
3791
3792	if (NULL == pool->ai) {
3793		if (pool->addrs != NULL) {
3794			/* free() is used with copy_addrinfo_list() */
3795			free(pool->addrs);
3796			pool->addrs = NULL;
3797		}
3798		ZERO(hints);
3799		hints.ai_family = AF(&pool->srcadr);
3800		hints.ai_socktype = SOCK_DGRAM;
3801		hints.ai_protocol = IPPROTO_UDP;
3802		/* ignore getaddrinfo_sometime() errors, we will retry */
3803		rc = getaddrinfo_sometime(
3804			pool->hostname,
3805			"ntp",
3806			&hints,
3807			0,			/* no retry */
3808			&pool_name_resolved,
3809			(void *)(intptr_t)pool->associd);
3810		if (!rc)
3811			DPRINTF(1, ("pool DNS lookup %s started\n",
3812				pool->hostname));
3813		else
3814			msyslog(LOG_ERR,
3815				"unable to start pool DNS %s: %m",
3816				pool->hostname);
3817		return;
3818	}
3819
3820	do {
3821		/* copy_addrinfo_list ai_addr points to a sockaddr_u */
3822		rmtadr = (sockaddr_u *)(void *)pool->ai->ai_addr;
3823		pool->ai = pool->ai->ai_next;
3824		p = findexistingpeer(rmtadr, NULL, NULL, MODE_CLIENT, 0);
3825	} while (p != NULL && pool->ai != NULL);
3826	if (p != NULL)
3827		return;	/* out of addresses, re-query DNS next poll */
3828	restrict_mask = restrictions(rmtadr);
3829	if (RES_FLAGS & restrict_mask)
3830		restrict_source(rmtadr, 0,
3831				current_time + POOL_SOLICIT_WINDOW + 1);
3832	lcladr = findinterface(rmtadr);
3833	memset(&xpkt, 0, sizeof(xpkt));
3834	xpkt.li_vn_mode = PKT_LI_VN_MODE(sys_leap, pool->version,
3835					 MODE_CLIENT);
3836	xpkt.stratum = STRATUM_TO_PKT(sys_stratum);
3837	xpkt.ppoll = pool->hpoll;
3838	xpkt.precision = sys_precision;
3839	xpkt.refid = sys_refid;
3840	xpkt.rootdelay = HTONS_FP(DTOFP(sys_rootdelay));
3841	xpkt.rootdisp = HTONS_FP(DTOUFP(sys_rootdisp));
3842	HTONL_FP(&sys_reftime, &xpkt.reftime);
3843	get_systime(&xmt_tx);
3844	pool->aorg = xmt_tx;
3845	HTONL_FP(&xmt_tx, &xpkt.xmt);
3846	sendpkt(rmtadr, lcladr,	sys_ttl[pool->ttl], &xpkt,
3847		LEN_PKT_NOMAC);
3848	pool->sent++;
3849	pool->throttle += (1 << pool->minpoll) - 2;
3850	DPRINTF(1, ("pool_xmit: at %ld %s->%s pool\n",
3851		    current_time, latoa(lcladr), stoa(rmtadr)));
3852	msyslog(LOG_INFO, "Soliciting pool server %s", stoa(rmtadr));
3853#endif	/* WORKER */
3854}
3855
3856
3857#ifdef AUTOKEY
3858	/*
3859	 * group_test - test if this is the same group
3860	 *
3861	 * host		assoc		return		action
3862	 * none		none		0		mobilize *
3863	 * none		group		0		mobilize *
3864	 * group	none		0		mobilize *
3865	 * group	group		1		mobilize
3866	 * group	different	1		ignore
3867	 * * ignore if notrust
3868	 */
3869int
3870group_test(
3871	char	*grp,
3872	char	*ident
3873	)
3874{
3875	if (grp == NULL)
3876		return (0);
3877
3878	if (strcmp(grp, sys_groupname) == 0)
3879		return (0);
3880
3881	if (ident == NULL)
3882		return (1);
3883
3884	if (strcmp(grp, ident) == 0)
3885		return (0);
3886
3887	return (1);
3888}
3889#endif /* AUTOKEY */
3890
3891#ifdef WORKER
3892void
3893pool_name_resolved(
3894	int			rescode,
3895	int			gai_errno,
3896	void *			context,
3897	const char *		name,
3898	const char *		service,
3899	const struct addrinfo *	hints,
3900	const struct addrinfo *	res
3901	)
3902{
3903	struct peer *	pool;	/* pool solicitor association */
3904	associd_t	assoc;
3905
3906	if (rescode) {
3907		msyslog(LOG_ERR,
3908			"error resolving pool %s: %s (%d)",
3909			name, gai_strerror(rescode), rescode);
3910		return;
3911	}
3912
3913	assoc = (associd_t)(intptr_t)context;
3914	pool = findpeerbyassoc(assoc);
3915	if (NULL == pool) {
3916		msyslog(LOG_ERR,
3917			"Could not find assoc %u for pool DNS %s",
3918			assoc, name);
3919		return;
3920	}
3921	DPRINTF(1, ("pool DNS %s completed\n", name));
3922	pool->addrs = copy_addrinfo_list(res);
3923	pool->ai = pool->addrs;
3924	pool_xmit(pool);
3925
3926}
3927#endif	/* WORKER */
3928
3929
3930#ifdef AUTOKEY
3931/*
3932 * key_expire - purge the key list
3933 */
3934void
3935key_expire(
3936	struct peer *peer	/* peer structure pointer */
3937	)
3938{
3939	int i;
3940
3941	if (peer->keylist != NULL) {
3942		for (i = 0; i <= peer->keynumber; i++)
3943			authtrust(peer->keylist[i], 0);
3944		free(peer->keylist);
3945		peer->keylist = NULL;
3946	}
3947	value_free(&peer->sndval);
3948	peer->keynumber = 0;
3949	peer->flags &= ~FLAG_ASSOC;
3950	DPRINTF(1, ("key_expire: at %lu associd %d\n", current_time,
3951		    peer->associd));
3952}
3953#endif	/* AUTOKEY */
3954
3955
3956/*
3957 * local_refid(peer) - check peer refid to avoid selecting peers
3958 *		       currently synced to this ntpd.
3959 */
3960static int
3961local_refid(
3962	struct peer *	p
3963	)
3964{
3965	endpt *	unicast_ep;
3966
3967	if (p->dstadr != NULL && !(INT_MCASTIF & p->dstadr->flags))
3968		unicast_ep = p->dstadr;
3969	else
3970		unicast_ep = findinterface(&p->srcadr);
3971
3972	if (unicast_ep != NULL && p->refid == unicast_ep->addr_refid)
3973		return TRUE;
3974	else
3975		return FALSE;
3976}
3977
3978
3979/*
3980 * Determine if the peer is unfit for synchronization
3981 *
3982 * A peer is unfit for synchronization if
3983 * > TEST10 bad leap or stratum below floor or at or above ceiling
3984 * > TEST11 root distance exceeded for remote peer
3985 * > TEST12 a direct or indirect synchronization loop would form
3986 * > TEST13 unreachable or noselect
3987 */
3988int				/* FALSE if fit, TRUE if unfit */
3989peer_unfit(
3990	struct peer *peer	/* peer structure pointer */
3991	)
3992{
3993	int	rval = 0;
3994
3995	/*
3996	 * A stratum error occurs if (1) the server has never been
3997	 * synchronized, (2) the server stratum is below the floor or
3998	 * greater than or equal to the ceiling.
3999	 */
4000	if (   peer->leap == LEAP_NOTINSYNC
4001	    || peer->stratum < sys_floor
4002	    || peer->stratum >= sys_ceiling)
4003		rval |= TEST10;		/* bad synch or stratum */
4004
4005	/*
4006	 * A distance error for a remote peer occurs if the root
4007	 * distance is greater than or equal to the distance threshold
4008	 * plus the increment due to one host poll interval.
4009	 */
4010	if (   !(peer->flags & FLAG_REFCLOCK)
4011	    && root_distance(peer) >= sys_maxdist
4012				      + clock_phi * ULOGTOD(peer->hpoll))
4013		rval |= TEST11;		/* distance exceeded */
4014
4015	/*
4016	 * A loop error occurs if the remote peer is synchronized to the
4017	 * local peer or if the remote peer is synchronized to the same
4018	 * server as the local peer but only if the remote peer is
4019	 * neither a reference clock nor an orphan.
4020	 */
4021	if (peer->stratum > 1 && local_refid(peer))
4022		rval |= TEST12;		/* synchronization loop */
4023
4024	/*
4025	 * An unreachable error occurs if the server is unreachable or
4026	 * the noselect bit is set.
4027	 */
4028	if (!peer->reach || (peer->flags & FLAG_NOSELECT))
4029		rval |= TEST13;		/* unreachable */
4030
4031	peer->flash &= ~PEER_TEST_MASK;
4032	peer->flash |= rval;
4033	return (rval);
4034}
4035
4036
4037/*
4038 * Find the precision of this particular machine
4039 */
4040#define MINSTEP		20e-9	/* minimum clock increment (s) */
4041#define MAXSTEP		1	/* maximum clock increment (s) */
4042#define MINCHANGES	12	/* minimum number of step samples */
4043#define MAXLOOPS	((int)(1. / MINSTEP))	/* avoid infinite loop */
4044
4045/*
4046 * This routine measures the system precision defined as the minimum of
4047 * a sequence of differences between successive readings of the system
4048 * clock. However, if a difference is less than MINSTEP, the clock has
4049 * been read more than once during a clock tick and the difference is
4050 * ignored. We set MINSTEP greater than zero in case something happens
4051 * like a cache miss, and to tolerate underlying system clocks which
4052 * ensure each reading is strictly greater than prior readings while
4053 * using an underlying stepping (not interpolated) clock.
4054 *
4055 * sys_tick and sys_precision represent the time to read the clock for
4056 * systems with high-precision clocks, and the tick interval or step
4057 * size for lower-precision stepping clocks.
4058 *
4059 * This routine also measures the time to read the clock on stepping
4060 * system clocks by counting the number of readings between changes of
4061 * the underlying clock.  With either type of clock, the minimum time
4062 * to read the clock is saved as sys_fuzz, and used to ensure the
4063 * get_systime() readings always increase and are fuzzed below sys_fuzz.
4064 */
4065void
4066measure_precision(void)
4067{
4068	/*
4069	 * With sys_fuzz set to zero, get_systime() fuzzing of low bits
4070	 * is effectively disabled.  trunc_os_clock is FALSE to disable
4071	 * get_ostime() simulation of a low-precision system clock.
4072	 */
4073	set_sys_fuzz(0.);
4074	trunc_os_clock = FALSE;
4075	measured_tick = measure_tick_fuzz();
4076	set_sys_tick_precision(measured_tick);
4077	msyslog(LOG_INFO, "proto: precision = %.3f usec (%d)",
4078		sys_tick * 1e6, sys_precision);
4079	if (sys_fuzz < sys_tick) {
4080		msyslog(LOG_NOTICE, "proto: fuzz beneath %.3f usec",
4081			sys_fuzz * 1e6);
4082	}
4083}
4084
4085
4086/*
4087 * measure_tick_fuzz()
4088 *
4089 * measures the minimum time to read the clock (stored in sys_fuzz)
4090 * and returns the tick, the larger of the minimum increment observed
4091 * between successive clock readings and the time to read the clock.
4092 */
4093double
4094measure_tick_fuzz(void)
4095{
4096	l_fp	minstep;	/* MINSTEP as l_fp */
4097	l_fp	val;		/* current seconds fraction */
4098	l_fp	last;		/* last seconds fraction */
4099	l_fp	ldiff;		/* val - last */
4100	double	tick;		/* computed tick value */
4101	double	diff;
4102	long	repeats;
4103	long	max_repeats;
4104	int	changes;
4105	int	i;		/* log2 precision */
4106
4107	tick = MAXSTEP;
4108	max_repeats = 0;
4109	repeats = 0;
4110	changes = 0;
4111	DTOLFP(MINSTEP, &minstep);
4112	get_systime(&last);
4113	for (i = 0; i < MAXLOOPS && changes < MINCHANGES; i++) {
4114		get_systime(&val);
4115		ldiff = val;
4116		L_SUB(&ldiff, &last);
4117		last = val;
4118		if (L_ISGT(&ldiff, &minstep)) {
4119			max_repeats = max(repeats, max_repeats);
4120			repeats = 0;
4121			changes++;
4122			LFPTOD(&ldiff, diff);
4123			tick = min(diff, tick);
4124		} else {
4125			repeats++;
4126		}
4127	}
4128	if (changes < MINCHANGES) {
4129		msyslog(LOG_ERR, "Fatal error: precision could not be measured (MINSTEP too large?)");
4130		exit(1);
4131	}
4132
4133	if (0 == max_repeats) {
4134		set_sys_fuzz(tick);
4135	} else {
4136		set_sys_fuzz(tick / max_repeats);
4137	}
4138
4139	return tick;
4140}
4141
4142
4143void
4144set_sys_tick_precision(
4145	double tick
4146	)
4147{
4148	int i;
4149
4150	if (tick > 1.) {
4151		msyslog(LOG_ERR,
4152			"unsupported tick %.3f > 1s ignored", tick);
4153		return;
4154	}
4155	if (tick < measured_tick) {
4156		msyslog(LOG_ERR,
4157			"proto: tick %.3f less than measured tick %.3f, ignored",
4158			tick, measured_tick);
4159		return;
4160	} else if (tick > measured_tick) {
4161		trunc_os_clock = TRUE;
4162		msyslog(LOG_NOTICE,
4163			"proto: truncating system clock to multiples of %.9f",
4164			tick);
4165	}
4166	sys_tick = tick;
4167
4168	/*
4169	 * Find the nearest power of two.
4170	 */
4171	for (i = 0; tick <= 1; i--)
4172		tick *= 2;
4173	if (tick - 1 > 1 - tick / 2)
4174		i++;
4175
4176	sys_precision = (s_char)i;
4177}
4178
4179
4180/*
4181 * init_proto - initialize the protocol module's data
4182 */
4183void
4184init_proto(void)
4185{
4186	l_fp	dummy;
4187	int	i;
4188
4189	/*
4190	 * Fill in the sys_* stuff.  Default is don't listen to
4191	 * broadcasting, require authentication.
4192	 */
4193	set_sys_leap(LEAP_NOTINSYNC);
4194	sys_stratum = STRATUM_UNSPEC;
4195	memcpy(&sys_refid, "INIT", 4);
4196	sys_peer = NULL;
4197	sys_rootdelay = 0;
4198	sys_rootdisp = 0;
4199	L_CLR(&sys_reftime);
4200	sys_jitter = 0;
4201	measure_precision();
4202	get_systime(&dummy);
4203	sys_survivors = 0;
4204	sys_manycastserver = 0;
4205	sys_bclient = 0;
4206	sys_bdelay = 0;
4207	sys_authenticate = 1;
4208	sys_stattime = current_time;
4209	orphwait = current_time + sys_orphwait;
4210	proto_clr_stats();
4211	for (i = 0; i < MAX_TTL; i++) {
4212		sys_ttl[i] = (u_char)((i * 256) / MAX_TTL);
4213		sys_ttlmax = i;
4214	}
4215	hardpps_enable = 0;
4216	stats_control = 1;
4217}
4218
4219
4220/*
4221 * proto_config - configure the protocol module
4222 */
4223void
4224proto_config(
4225	int	item,
4226	u_long	value,
4227	double	dvalue,
4228	sockaddr_u *svalue
4229	)
4230{
4231	/*
4232	 * Figure out what he wants to change, then do it
4233	 */
4234	DPRINTF(2, ("proto_config: code %d value %lu dvalue %lf\n",
4235		    item, value, dvalue));
4236
4237	switch (item) {
4238
4239	/*
4240	 * enable and disable commands - arguments are Boolean.
4241	 */
4242	case PROTO_AUTHENTICATE: /* authentication (auth) */
4243		sys_authenticate = value;
4244		break;
4245
4246	case PROTO_BROADCLIENT: /* broadcast client (bclient) */
4247		sys_bclient = (int)value;
4248		if (sys_bclient == 0)
4249			io_unsetbclient();
4250		else
4251			io_setbclient();
4252		break;
4253
4254#ifdef REFCLOCK
4255	case PROTO_CAL:		/* refclock calibrate (calibrate) */
4256		cal_enable = value;
4257		break;
4258#endif /* REFCLOCK */
4259
4260	case PROTO_KERNEL:	/* kernel discipline (kernel) */
4261		select_loop(value);
4262		break;
4263
4264	case PROTO_MONITOR:	/* monitoring (monitor) */
4265		if (value)
4266			mon_start(MON_ON);
4267		else {
4268			mon_stop(MON_ON);
4269			if (mon_enabled)
4270				msyslog(LOG_WARNING,
4271					"restrict: 'monitor' cannot be disabled while 'limited' is enabled");
4272		}
4273		break;
4274
4275	case PROTO_NTP:		/* NTP discipline (ntp) */
4276		ntp_enable = value;
4277		break;
4278
4279	case PROTO_MODE7:	/* mode7 management (ntpdc) */
4280		ntp_mode7 = value;
4281		break;
4282
4283	case PROTO_PPS:		/* PPS discipline (pps) */
4284		hardpps_enable = value;
4285		break;
4286
4287	case PROTO_FILEGEN:	/* statistics (stats) */
4288		stats_control = value;
4289		break;
4290
4291	/*
4292	 * tos command - arguments are double, sometimes cast to int
4293	 */
4294	case PROTO_BEACON:	/* manycast beacon (beacon) */
4295		sys_beacon = (int)dvalue;
4296		break;
4297
4298	case PROTO_BROADDELAY:	/* default broadcast delay (bdelay) */
4299		sys_bdelay = dvalue;
4300		break;
4301
4302	case PROTO_CEILING:	/* stratum ceiling (ceiling) */
4303		sys_ceiling = (int)dvalue;
4304		break;
4305
4306	case PROTO_COHORT:	/* cohort switch (cohort) */
4307		sys_cohort = (int)dvalue;
4308		break;
4309
4310	case PROTO_FLOOR:	/* stratum floor (floor) */
4311		sys_floor = (int)dvalue;
4312		break;
4313
4314	case PROTO_MAXCLOCK:	/* maximum candidates (maxclock) */
4315		sys_maxclock = (int)dvalue;
4316		break;
4317
4318	case PROTO_MAXDIST:	/* select threshold (maxdist) */
4319		sys_maxdist = dvalue;
4320		break;
4321
4322	case PROTO_CALLDELAY:	/* modem call delay (mdelay) */
4323		break;		/* NOT USED */
4324
4325	case PROTO_MINCLOCK:	/* minimum candidates (minclock) */
4326		sys_minclock = (int)dvalue;
4327		break;
4328
4329	case PROTO_MINDISP:	/* minimum distance (mindist) */
4330		sys_mindisp = dvalue;
4331		break;
4332
4333	case PROTO_MINSANE:	/* minimum survivors (minsane) */
4334		sys_minsane = (int)dvalue;
4335		break;
4336
4337	case PROTO_ORPHAN:	/* orphan stratum (orphan) */
4338		sys_orphan = (int)dvalue;
4339		break;
4340
4341	case PROTO_ORPHWAIT:	/* orphan wait (orphwait) */
4342		orphwait -= sys_orphwait;
4343		sys_orphwait = (int)dvalue;
4344		orphwait += sys_orphwait;
4345		break;
4346
4347	/*
4348	 * Miscellaneous commands
4349	 */
4350	case PROTO_MULTICAST_ADD: /* add group address */
4351		if (svalue != NULL)
4352			io_multicast_add(svalue);
4353		sys_bclient = 1;
4354		break;
4355
4356	case PROTO_MULTICAST_DEL: /* delete group address */
4357		if (svalue != NULL)
4358			io_multicast_del(svalue);
4359		break;
4360
4361	default:
4362		msyslog(LOG_NOTICE,
4363		    "proto: unsupported option %d", item);
4364	}
4365}
4366
4367
4368/*
4369 * proto_clr_stats - clear protocol stat counters
4370 */
4371void
4372proto_clr_stats(void)
4373{
4374	sys_stattime = current_time;
4375	sys_received = 0;
4376	sys_processed = 0;
4377	sys_newversion = 0;
4378	sys_oldversion = 0;
4379	sys_declined = 0;
4380	sys_restricted = 0;
4381	sys_badlength = 0;
4382	sys_badauth = 0;
4383	sys_limitrejected = 0;
4384	sys_kodsent = 0;
4385}
4386