1/*
2 * NTP client/server, based on OpenNTPD 3.9p1
3 *
4 * Author: Adam Tkac <vonsch@gmail.com>
5 *
6 * Licensed under GPLv2, see file LICENSE in this tarball for details.
7 */
8#include "libbb.h"
9#include <netinet/ip.h> /* For IPTOS_LOWDELAY definition */
10#ifndef IPTOS_LOWDELAY
11# define IPTOS_LOWDELAY 0x10
12#endif
13#ifndef IP_PKTINFO
14# error "Sorry, your kernel has to support IP_PKTINFO"
15#endif
16
17
18/* Sync to peers every N secs */
19#define INTERVAL_QUERY_NORMAL    30
20#define INTERVAL_QUERY_PATHETIC  60
21#define INTERVAL_QUERY_AGRESSIVE  5
22
23/* Bad if *less than* TRUSTLEVEL_BADPEER */
24#define TRUSTLEVEL_BADPEER        6
25#define TRUSTLEVEL_PATHETIC       2
26#define TRUSTLEVEL_AGRESSIVE      8
27#define TRUSTLEVEL_MAX           10
28
29#define QSCALE_OFF_MIN         0.05
30#define QSCALE_OFF_MAX         0.50
31
32/* Single query might take N secs max */
33#define QUERYTIME_MAX            15
34/* Min offset for settime at start. "man ntpd" says it's 128 ms */
35#define STEPTIME_MIN_OFFSET   0.128
36
37typedef struct {
38	uint32_t int_partl;
39	uint32_t fractionl;
40} l_fixedpt_t;
41
42typedef struct {
43	uint16_t int_parts;
44	uint16_t fractions;
45} s_fixedpt_t;
46
47enum {
48	NTP_DIGESTSIZE     = 16,
49	NTP_MSGSIZE_NOAUTH = 48,
50	NTP_MSGSIZE        = (NTP_MSGSIZE_NOAUTH + 4 + NTP_DIGESTSIZE),
51};
52
53typedef struct {
54	uint8_t     m_status;     /* status of local clock and leap info */
55	uint8_t     m_stratum;    /* stratum level */
56	uint8_t     m_ppoll;      /* poll value */
57	int8_t      m_precision_exp;
58	s_fixedpt_t m_rootdelay;
59	s_fixedpt_t m_dispersion;
60	uint32_t    m_refid;
61	l_fixedpt_t m_reftime;
62	l_fixedpt_t m_orgtime;
63	l_fixedpt_t m_rectime;
64	l_fixedpt_t m_xmttime;
65	uint32_t    m_keyid;
66	uint8_t     m_digest[NTP_DIGESTSIZE];
67} msg_t;
68
69enum {
70	NTP_VERSION     = 4,
71	NTP_MAXSTRATUM  = 15,
72
73	/* Status Masks */
74	MODE_MASK       = (7 << 0),
75	VERSION_MASK    = (7 << 3),
76	VERSION_SHIFT   = 3,
77	LI_MASK         = (3 << 6),
78
79	/* Leap Second Codes (high order two bits of m_status) */
80	LI_NOWARNING    = (0 << 6),    /* no warning */
81	LI_PLUSSEC      = (1 << 6),    /* add a second (61 seconds) */
82	LI_MINUSSEC     = (2 << 6),    /* minus a second (59 seconds) */
83	LI_ALARM        = (3 << 6),    /* alarm condition */
84
85	/* Mode values */
86	MODE_RES0       = 0,    /* reserved */
87	MODE_SYM_ACT    = 1,    /* symmetric active */
88	MODE_SYM_PAS    = 2,    /* symmetric passive */
89	MODE_CLIENT     = 3,    /* client */
90	MODE_SERVER     = 4,    /* server */
91	MODE_BROADCAST  = 5,    /* broadcast */
92	MODE_RES1       = 6,    /* reserved for NTP control message */
93	MODE_RES2       = 7,    /* reserved for private use */
94};
95
96#define OFFSET_1900_1970 2208988800UL  /* 1970 - 1900 in seconds */
97
98typedef struct {
99	double   d_offset;
100	double   d_delay;
101	//UNUSED: double d_error;
102	time_t   d_rcv_time;
103	uint32_t d_refid4;
104	uint8_t  d_leap;
105	uint8_t  d_stratum;
106	uint8_t  d_good;
107} datapoint_t;
108
109#define NUM_DATAPOINTS  8
110typedef struct {
111	len_and_sockaddr *p_lsa;
112	char             *p_dotted;
113	/* When to send new query (if p_fd == -1)
114	 * or when receive times out (if p_fd >= 0): */
115	time_t           next_action_time;
116	int              p_fd;
117	uint8_t          p_datapoint_idx;
118	uint8_t          p_trustlevel;
119	double           p_xmttime;
120	datapoint_t      update;
121	datapoint_t      p_datapoint[NUM_DATAPOINTS];
122	msg_t            p_xmt_msg;
123} peer_t;
124
125enum {
126	OPT_n = (1 << 0),
127	OPT_q = (1 << 1),
128	OPT_N = (1 << 2),
129	OPT_x = (1 << 3),
130	/* Insert new options above this line. */
131	/* Non-compat options: */
132	OPT_p = (1 << 4),
133	OPT_l = (1 << 5) * ENABLE_FEATURE_NTPD_SERVER,
134};
135
136
137struct globals {
138	/* total round trip delay to currently selected reference clock */
139	double   rootdelay;
140	/* reference timestamp: time when the system clock was last set or corrected */
141	double   reftime;
142	llist_t  *ntp_peers;
143#if ENABLE_FEATURE_NTPD_SERVER
144	int      listen_fd;
145#endif
146	unsigned verbose;
147	unsigned peer_cnt;
148	unsigned scale;
149	uint32_t refid;
150	uint32_t refid4;
151	uint8_t  synced;
152	uint8_t  leap;
153#define G_precision_exp -6
154//	int8_t   precision_exp;
155	uint8_t  stratum;
156	uint8_t  time_was_stepped;
157	uint8_t  first_adj_done;
158};
159#define G (*ptr_to_globals)
160
161static const int const_IPTOS_LOWDELAY = IPTOS_LOWDELAY;
162
163
164static void
165set_next(peer_t *p, unsigned t)
166{
167	p->next_action_time = time(NULL) + t;
168}
169
170static void
171add_peers(char *s)
172{
173	peer_t *p;
174
175	p = xzalloc(sizeof(*p));
176	p->p_lsa = xhost2sockaddr(s, 123);
177	p->p_dotted = xmalloc_sockaddr2dotted_noport(&p->p_lsa->u.sa);
178	p->p_fd = -1;
179	p->p_xmt_msg.m_status = MODE_CLIENT | (NTP_VERSION << 3);
180	p->p_trustlevel = TRUSTLEVEL_PATHETIC;
181	p->next_action_time = time(NULL); /* = set_next(p, 0); */
182
183	llist_add_to(&G.ntp_peers, p);
184	G.peer_cnt++;
185}
186
187static double
188gettime1900d(void)
189{
190	struct timeval tv;
191	gettimeofday(&tv, NULL); /* never fails */
192	return (tv.tv_sec + 1.0e-6 * tv.tv_usec + OFFSET_1900_1970);
193}
194
195static void
196d_to_tv(double d, struct timeval *tv)
197{
198	tv->tv_sec = (long)d;
199	tv->tv_usec = (d - tv->tv_sec) * 1000000;
200}
201
202static double
203lfp_to_d(l_fixedpt_t lfp)
204{
205	double ret;
206	lfp.int_partl = ntohl(lfp.int_partl);
207	lfp.fractionl = ntohl(lfp.fractionl);
208	ret = (double)lfp.int_partl + ((double)lfp.fractionl / UINT_MAX);
209	return ret;
210}
211
212#if 0 //UNUSED
213static double
214sfp_to_d(s_fixedpt_t sfp)
215{
216	double ret;
217	sfp.int_parts = ntohs(sfp.int_parts);
218	sfp.fractions = ntohs(sfp.fractions);
219	ret = (double)sfp.int_parts + ((double)sfp.fractions / USHRT_MAX);
220	return ret;
221}
222#endif
223
224#if ENABLE_FEATURE_NTPD_SERVER
225static l_fixedpt_t
226d_to_lfp(double d)
227{
228	l_fixedpt_t lfp;
229	lfp.int_partl = (uint32_t)d;
230	lfp.fractionl = (uint32_t)((d - lfp.int_partl) * UINT_MAX);
231	lfp.int_partl = htonl(lfp.int_partl);
232	lfp.fractionl = htonl(lfp.fractionl);
233	return lfp;
234}
235
236static s_fixedpt_t
237d_to_sfp(double d)
238{
239	s_fixedpt_t sfp;
240	sfp.int_parts = (uint16_t)d;
241	sfp.fractions = (uint16_t)((d - sfp.int_parts) * USHRT_MAX);
242	sfp.int_parts = htons(sfp.int_parts);
243	sfp.fractions = htons(sfp.fractions);
244	return sfp;
245}
246#endif
247
248static unsigned
249error_interval(void)
250{
251	unsigned interval, r;
252	interval = INTERVAL_QUERY_PATHETIC * QSCALE_OFF_MAX / QSCALE_OFF_MIN;
253	r = (unsigned)random() % (unsigned)(interval / 10);
254	return (interval + r);
255}
256
257static int
258do_sendto(int fd,
259		const struct sockaddr *from, const struct sockaddr *to, socklen_t addrlen,
260		msg_t *msg, ssize_t len)
261{
262	ssize_t ret;
263
264	errno = 0;
265	if (!from) {
266		ret = sendto(fd, msg, len, MSG_DONTWAIT, to, addrlen);
267	} else {
268		ret = send_to_from(fd, msg, len, MSG_DONTWAIT, to, from, addrlen);
269	}
270	if (ret != len) {
271		bb_perror_msg("send failed");
272		return -1;
273	}
274	return 0;
275}
276
277static int
278send_query_to_peer(peer_t *p)
279{
280	// Why do we need to bind()?
281	// See what happens when we don't bind:
282	//
283	// socket(PF_INET, SOCK_DGRAM, IPPROTO_IP) = 3
284	// setsockopt(3, SOL_IP, IP_TOS, [16], 4) = 0
285	// gettimeofday({1259071266, 327885}, NULL) = 0
286	// sendto(3, "xxx", 48, MSG_DONTWAIT, {sa_family=AF_INET, sin_port=htons(123), sin_addr=inet_addr("10.34.32.125")}, 16) = 48
287	// ^^^ we sent it from some source port picked by kernel.
288	// time(NULL)              = 1259071266
289	// write(2, "ntpd: entering poll 15 secs\n", 28) = 28
290	// poll([{fd=3, events=POLLIN}], 1, 15000) = 1 ([{fd=3, revents=POLLIN}])
291	// recv(3, "yyy", 68, MSG_DONTWAIT) = 48
292	// ^^^ this recv will receive packets to any local port!
293	//
294	// Uncomment this and use strace to see it in action:
295#define PROBE_LOCAL_ADDR // { len_and_sockaddr lsa; lsa.len = LSA_SIZEOF_SA; getsockname(p->query.fd, &lsa.u.sa, &lsa.len); }
296
297	if (p->p_fd == -1) {
298		int fd, family;
299		len_and_sockaddr *local_lsa;
300
301		family = p->p_lsa->u.sa.sa_family;
302		p->p_fd = fd = xsocket_type(&local_lsa, family, SOCK_DGRAM);
303		/* local_lsa has "null" address and port 0 now.
304		 * bind() ensures we have a *particular port* selected by kernel
305		 * and remembered in p->p_fd, thus later recv(p->p_fd)
306		 * receives only packets sent to this port.
307		 */
308		PROBE_LOCAL_ADDR
309		xbind(fd, &local_lsa->u.sa, local_lsa->len);
310		PROBE_LOCAL_ADDR
311#if ENABLE_FEATURE_IPV6
312		if (family == AF_INET)
313#endif
314			setsockopt(fd, IPPROTO_IP, IP_TOS, &const_IPTOS_LOWDELAY, sizeof(const_IPTOS_LOWDELAY));
315		free(local_lsa);
316	}
317
318	/*
319	 * Send out a random 64-bit number as our transmit time.  The NTP
320	 * server will copy said number into the originate field on the
321	 * response that it sends us.  This is totally legal per the SNTP spec.
322	 *
323	 * The impact of this is two fold: we no longer send out the current
324	 * system time for the world to see (which may aid an attacker), and
325	 * it gives us a (not very secure) way of knowing that we're not
326	 * getting spoofed by an attacker that can't capture our traffic
327	 * but can spoof packets from the NTP server we're communicating with.
328	 *
329	 * Save the real transmit timestamp locally.
330	 */
331	p->p_xmt_msg.m_xmttime.int_partl = random();
332	p->p_xmt_msg.m_xmttime.fractionl = random();
333	p->p_xmttime = gettime1900d();
334
335	if (do_sendto(p->p_fd, /*from:*/ NULL, /*to:*/ &p->p_lsa->u.sa, /*addrlen:*/ p->p_lsa->len,
336			&p->p_xmt_msg, NTP_MSGSIZE_NOAUTH) == -1
337	) {
338		close(p->p_fd);
339		p->p_fd = -1;
340		set_next(p, INTERVAL_QUERY_PATHETIC);
341		return -1;
342	}
343
344	if (G.verbose)
345		bb_error_msg("sent query to %s", p->p_dotted);
346	set_next(p, QUERYTIME_MAX);
347
348	return 0;
349}
350
351
352/* Time is stepped only once, when the first packet from a peer is received.
353 */
354static void
355step_time_once(double offset)
356{
357	double dtime;
358	llist_t *item;
359	struct timeval tv;
360	char buf[80];
361	time_t tval;
362
363	if (G.time_was_stepped)
364		goto bail;
365	G.time_was_stepped = 1;
366
367	/* if the offset is small, don't step, slew (later) */
368	if (offset < STEPTIME_MIN_OFFSET && offset > -STEPTIME_MIN_OFFSET)
369		goto bail;
370
371	gettimeofday(&tv, NULL); /* never fails */
372	dtime = offset + tv.tv_sec;
373	dtime += 1.0e-6 * tv.tv_usec;
374	d_to_tv(dtime, &tv);
375
376	if (settimeofday(&tv, NULL) == -1)
377		bb_perror_msg_and_die("settimeofday");
378
379	tval = tv.tv_sec;
380	strftime(buf, sizeof(buf), "%a %b %e %H:%M:%S %Z %Y", localtime(&tval));
381
382	bb_error_msg("setting clock to %s (offset %fs)", buf, offset);
383
384	for (item = G.ntp_peers; item != NULL; item = item->link) {
385		peer_t *p = (peer_t *) item->data;
386		p->next_action_time -= (time_t)offset;
387	}
388
389 bail:
390	if (option_mask32 & OPT_q)
391		exit(0);
392}
393
394
395/* Time is periodically slewed when we collect enough
396 * good data points.
397 */
398static int
399compare_offsets(const void *aa, const void *bb)
400{
401	const peer_t *const *a = aa;
402	const peer_t *const *b = bb;
403	if ((*a)->update.d_offset < (*b)->update.d_offset)
404		return -1;
405	return ((*a)->update.d_offset > (*b)->update.d_offset);
406}
407static unsigned
408updated_scale(double offset)
409{
410	if (offset < 0)
411		offset = -offset;
412	if (offset > QSCALE_OFF_MAX)
413		return 1;
414	if (offset < QSCALE_OFF_MIN)
415		return QSCALE_OFF_MAX / QSCALE_OFF_MIN;
416	return QSCALE_OFF_MAX / offset;
417}
418static void
419slew_time(void)
420{
421	llist_t *item;
422	double offset_median;
423	struct timeval tv;
424
425	{
426		peer_t **peers = xzalloc(sizeof(peers[0]) * G.peer_cnt);
427		unsigned goodpeer_cnt = 0;
428		unsigned middle;
429
430		for (item = G.ntp_peers; item != NULL; item = item->link) {
431			peer_t *p = (peer_t *) item->data;
432			if (p->p_trustlevel < TRUSTLEVEL_BADPEER)
433				continue;
434			if (!p->update.d_good) {
435				free(peers);
436				return;
437			}
438			peers[goodpeer_cnt++] = p;
439		}
440
441		if (goodpeer_cnt == 0) {
442			free(peers);
443			goto clear_good;
444		}
445
446		qsort(peers, goodpeer_cnt, sizeof(peers[0]), compare_offsets);
447
448		middle = goodpeer_cnt / 2;
449		if (middle != 0 && (goodpeer_cnt & 1) == 0) {
450			offset_median = (peers[middle-1]->update.d_offset + peers[middle]->update.d_offset) / 2;
451			G.rootdelay = (peers[middle-1]->update.d_delay + peers[middle]->update.d_delay) / 2;
452			G.stratum = 1 + MAX(peers[middle-1]->update.d_stratum, peers[middle]->update.d_stratum);
453		} else {
454			offset_median = peers[middle]->update.d_offset;
455			G.rootdelay = peers[middle]->update.d_delay;
456			G.stratum = 1 + peers[middle]->update.d_stratum;
457		}
458		G.leap = peers[middle]->update.d_leap;
459		G.refid4 = peers[middle]->update.d_refid4;
460		G.refid =
461#if ENABLE_FEATURE_IPV6
462			peers[middle]->p_lsa->u.sa.sa_family != AF_INET ?
463				G.refid4 :
464#endif
465				peers[middle]->p_lsa->u.sin.sin_addr.s_addr;
466		free(peers);
467	}
468//TODO: if (offset_median > BIG) step_time(offset_median)?
469
470	G.scale = updated_scale(offset_median);
471
472	bb_error_msg("adjusting clock by %fs, our stratum is %u, time scale %u",
473			offset_median, G.stratum, G.scale);
474
475	errno = 0;
476	d_to_tv(offset_median, &tv);
477	if (adjtime(&tv, &tv) == -1)
478		bb_perror_msg_and_die("adjtime failed");
479	if (G.verbose >= 2)
480		bb_error_msg("old adjust: %d.%06u", (int)tv.tv_sec, (unsigned)tv.tv_usec);
481
482	if (G.first_adj_done) {
483		uint8_t synced = (tv.tv_sec == 0 && tv.tv_usec == 0);
484		if (synced != G.synced) {
485			G.synced = synced;
486			bb_error_msg("clock is %ssynced", synced ? "" : "un");
487		}
488	}
489	G.first_adj_done = 1;
490
491	G.reftime = gettime1900d();
492
493 clear_good:
494	for (item = G.ntp_peers; item != NULL; item = item->link) {
495		peer_t *p = (peer_t *) item->data;
496		p->update.d_good = 0;
497	}
498}
499
500static void
501update_peer_data(peer_t *p)
502{
503	/* Clock filter.
504	 * Find the datapoint with the lowest delay.
505	 * Use that as the peer update.
506	 * Invalidate it and all older ones.
507	 */
508	int i;
509	int best = -1;
510	int good = 0;
511
512	for (i = 0; i < NUM_DATAPOINTS; i++) {
513		if (p->p_datapoint[i].d_good) {
514			good++;
515			if (best < 0 || p->p_datapoint[i].d_delay < p->p_datapoint[best].d_delay)
516				best = i;
517		}
518	}
519
520	if (good < 8) //FIXME: was it meant to be NUM_DATAPOINTS, not 8?
521		return;
522
523	p->update = p->p_datapoint[best]; /* struct copy */
524	slew_time();
525
526	for (i = 0; i < NUM_DATAPOINTS; i++)
527		if (p->p_datapoint[i].d_rcv_time <= p->p_datapoint[best].d_rcv_time)
528			p->p_datapoint[i].d_good = 0;
529}
530
531static unsigned
532scale_interval(unsigned requested)
533{
534	unsigned interval, r;
535	interval = requested * G.scale;
536	r = (unsigned)random() % (unsigned)(MAX(5, interval / 10));
537	return (interval + r);
538}
539static void
540recv_and_process_peer_pkt(peer_t *p)
541{
542	ssize_t     size;
543	msg_t       msg;
544	double      T1, T2, T3, T4;
545	unsigned    interval;
546	datapoint_t *datapoint;
547
548	/* We can recvfrom here and check from.IP, but some multihomed
549	 * ntp servers reply from their *other IP*.
550	 * TODO: maybe we should check at least what we can: from.port == 123?
551	 */
552	size = recv(p->p_fd, &msg, sizeof(msg), MSG_DONTWAIT);
553	if (size == -1) {
554		bb_perror_msg("recv(%s) error", p->p_dotted);
555		if (errno == EHOSTUNREACH || errno == EHOSTDOWN
556		 || errno == ENETUNREACH || errno == ENETDOWN
557		 || errno == ECONNREFUSED || errno == EADDRNOTAVAIL
558		 || errno == EAGAIN
559		) {
560//TODO: always do this?
561			set_next(p, error_interval());
562			goto close_sock;
563		}
564		xfunc_die();
565	}
566
567	if (size != NTP_MSGSIZE_NOAUTH && size != NTP_MSGSIZE) {
568		bb_error_msg("malformed packet received from %s", p->p_dotted);
569		goto bail;
570	}
571
572	if (msg.m_orgtime.int_partl != p->p_xmt_msg.m_xmttime.int_partl
573	 || msg.m_orgtime.fractionl != p->p_xmt_msg.m_xmttime.fractionl
574	) {
575		goto bail;
576	}
577
578	if ((msg.m_status & LI_ALARM) == LI_ALARM
579	 || msg.m_stratum == 0
580	 || msg.m_stratum > NTP_MAXSTRATUM
581	) {
582// TODO: stratum 0 responses may have commands in 32-bit m_refid field:
583// "DENY", "RSTR" - peer does not like us at all
584// "RATE" - peer is overloaded, reduce polling freq
585		interval = error_interval();
586		bb_error_msg("reply from %s: not synced, next query in %us", p->p_dotted, interval);
587		goto close_sock;
588	}
589
590	/*
591	 * From RFC 2030 (with a correction to the delay math):
592	 *
593	 * Timestamp Name          ID   When Generated
594	 * ------------------------------------------------------------
595	 * Originate Timestamp     T1   time request sent by client
596	 * Receive Timestamp       T2   time request received by server
597	 * Transmit Timestamp      T3   time reply sent by server
598	 * Destination Timestamp   T4   time reply received by client
599	 *
600	 * The roundtrip delay and local clock offset are defined as
601	 *
602	 * delay = (T4 - T1) - (T3 - T2); offset = ((T2 - T1) + (T3 - T4)) / 2
603	 */
604	T1 = p->p_xmttime;
605	T2 = lfp_to_d(msg.m_rectime);
606	T3 = lfp_to_d(msg.m_xmttime);
607	T4 = gettime1900d();
608
609	datapoint = &p->p_datapoint[p->p_datapoint_idx];
610
611	datapoint->d_offset = ((T2 - T1) + (T3 - T4)) / 2;
612	datapoint->d_delay = (T4 - T1) - (T3 - T2);
613	if (datapoint->d_delay < 0) {
614		bb_error_msg("reply from %s: negative delay %f", p->p_dotted, datapoint->d_delay);
615		interval = error_interval();
616		set_next(p, interval);
617		goto close_sock;
618	}
619	//UNUSED: datapoint->d_error = (T2 - T1) - (T3 - T4);
620	datapoint->d_rcv_time = (time_t)(T4 - OFFSET_1900_1970); /* = time(NULL); */
621	datapoint->d_good = 1;
622
623	datapoint->d_leap = (msg.m_status & LI_MASK);
624	//UNUSED: datapoint->o_precision = msg.m_precision_exp;
625	//UNUSED: datapoint->o_rootdelay = sfp_to_d(msg.m_rootdelay);
626	//UNUSED: datapoint->o_rootdispersion = sfp_to_d(msg.m_dispersion);
627	//UNUSED: datapoint->d_refid = ntohl(msg.m_refid);
628	datapoint->d_refid4 = msg.m_xmttime.fractionl;
629	//UNUSED: datapoint->o_reftime = lfp_to_d(msg.m_reftime);
630	//UNUSED: datapoint->o_poll = msg.m_ppoll;
631	datapoint->d_stratum = msg.m_stratum;
632
633	if (p->p_trustlevel < TRUSTLEVEL_PATHETIC)
634		interval = scale_interval(INTERVAL_QUERY_PATHETIC);
635	else if (p->p_trustlevel < TRUSTLEVEL_AGRESSIVE)
636		interval = scale_interval(INTERVAL_QUERY_AGRESSIVE);
637	else
638		interval = scale_interval(INTERVAL_QUERY_NORMAL);
639
640	set_next(p, interval);
641
642	/* Every received reply which we do not discard increases trust */
643	if (p->p_trustlevel < TRUSTLEVEL_MAX) {
644		p->p_trustlevel++;
645		if (p->p_trustlevel == TRUSTLEVEL_BADPEER)
646			bb_error_msg("peer %s now valid", p->p_dotted);
647	}
648
649	if (G.verbose)
650		bb_error_msg("reply from %s: offset %f delay %f, next query in %us", p->p_dotted,
651			datapoint->d_offset, datapoint->d_delay, interval);
652
653	update_peer_data(p);
654//TODO: do it after all peers had a chance to return at least one reply?
655	step_time_once(datapoint->d_offset);
656
657	p->p_datapoint_idx++;
658	if (p->p_datapoint_idx >= NUM_DATAPOINTS)
659		p->p_datapoint_idx = 0;
660
661 close_sock:
662	/* We do not expect any more packets from this peer for now.
663	 * Closing the socket informs kernel about it.
664	 * We open a new socket when we send a new query.
665	 */
666	close(p->p_fd);
667	p->p_fd = -1;
668 bail:
669	return;
670}
671
672#if ENABLE_FEATURE_NTPD_SERVER
673static void
674recv_and_process_client_pkt(void /*int fd*/)
675{
676	ssize_t          size;
677	uint8_t          version;
678	double           rectime;
679	len_and_sockaddr *to;
680	struct sockaddr  *from;
681	msg_t            msg;
682	uint8_t          query_status;
683	uint8_t          query_ppoll;
684	l_fixedpt_t      query_xmttime;
685
686	to = get_sock_lsa(G.listen_fd);
687	from = xzalloc(to->len);
688
689	size = recv_from_to(G.listen_fd, &msg, sizeof(msg), MSG_DONTWAIT, from, &to->u.sa, to->len);
690	if (size != NTP_MSGSIZE_NOAUTH && size != NTP_MSGSIZE) {
691		char *addr;
692		if (size < 0) {
693			if (errno == EAGAIN)
694				goto bail;
695			bb_perror_msg_and_die("recv");
696		}
697		addr = xmalloc_sockaddr2dotted_noport(from);
698		bb_error_msg("malformed packet received from %s: size %u", addr, (int)size);
699		free(addr);
700		goto bail;
701	}
702
703	query_status = msg.m_status;
704	query_ppoll = msg.m_ppoll;
705	query_xmttime = msg.m_xmttime;
706
707	/* Build a reply packet */
708	memset(&msg, 0, sizeof(msg));
709	msg.m_status = G.synced ? G.leap : LI_ALARM;
710	msg.m_status |= (query_status & VERSION_MASK);
711	msg.m_status |= ((query_status & MODE_MASK) == MODE_CLIENT) ?
712			 MODE_SERVER : MODE_SYM_PAS;
713	msg.m_stratum = G.stratum;
714	msg.m_ppoll = query_ppoll;
715	msg.m_precision_exp = G_precision_exp;
716	rectime = gettime1900d();
717	msg.m_xmttime = msg.m_rectime = d_to_lfp(rectime);
718	msg.m_reftime = d_to_lfp(G.reftime);
719	//msg.m_xmttime = d_to_lfp(gettime1900d()); // = msg.m_rectime
720	msg.m_orgtime = query_xmttime;
721	msg.m_rootdelay = d_to_sfp(G.rootdelay);
722	version = (query_status & VERSION_MASK); /* ... >> VERSION_SHIFT - done below instead */
723	msg.m_refid = (version > (3 << VERSION_SHIFT)) ? G.refid4 : G.refid;
724
725	/* We reply from the local address packet was sent to,
726	 * this makes to/from look swapped here: */
727	do_sendto(G.listen_fd,
728		/*from:*/ &to->u.sa, /*to:*/ from, /*addrlen:*/ to->len,
729		&msg, size);
730
731 bail:
732	free(to);
733	free(from);
734}
735#endif
736
737/* Upstream ntpd's options:
738 *
739 * -4   Force DNS resolution of host names to the IPv4 namespace.
740 * -6   Force DNS resolution of host names to the IPv6 namespace.
741 * -a   Require cryptographic authentication for broadcast client,
742 *      multicast client and symmetric passive associations.
743 *      This is the default.
744 * -A   Do not require cryptographic authentication for broadcast client,
745 *      multicast client and symmetric passive associations.
746 *      This is almost never a good idea.
747 * -b   Enable the client to synchronize to broadcast servers.
748 * -c conffile
749 *      Specify the name and path of the configuration file,
750 *      default /etc/ntp.conf
751 * -d   Specify debugging mode. This option may occur more than once,
752 *      with each occurrence indicating greater detail of display.
753 * -D level
754 *      Specify debugging level directly.
755 * -f driftfile
756 *      Specify the name and path of the frequency file.
757 *      This is the same operation as the "driftfile FILE"
758 *      configuration command.
759 * -g   Normally, ntpd exits with a message to the system log
760 *      if the offset exceeds the panic threshold, which is 1000 s
761 *      by default. This option allows the time to be set to any value
762 *      without restriction; however, this can happen only once.
763 *      If the threshold is exceeded after that, ntpd will exit
764 *      with a message to the system log. This option can be used
765 *      with the -q and -x options. See the tinker command for other options.
766 * -i jaildir
767 *      Chroot the server to the directory jaildir. This option also implies
768 *      that the server attempts to drop root privileges at startup
769 *      (otherwise, chroot gives very little additional security).
770 *      You may need to also specify a -u option.
771 * -k keyfile
772 *      Specify the name and path of the symmetric key file,
773 *      default /etc/ntp/keys. This is the same operation
774 *      as the "keys FILE" configuration command.
775 * -l logfile
776 *      Specify the name and path of the log file. The default
777 *      is the system log file. This is the same operation as
778 *      the "logfile FILE" configuration command.
779 * -L   Do not listen to virtual IPs. The default is to listen.
780 * -n   Don't fork.
781 * -N   To the extent permitted by the operating system,
782 *      run the ntpd at the highest priority.
783 * -p pidfile
784 *      Specify the name and path of the file used to record the ntpd
785 *      process ID. This is the same operation as the "pidfile FILE"
786 *      configuration command.
787 * -P priority
788 *      To the extent permitted by the operating system,
789 *      run the ntpd at the specified priority.
790 * -q   Exit the ntpd just after the first time the clock is set.
791 *      This behavior mimics that of the ntpdate program, which is
792 *      to be retired. The -g and -x options can be used with this option.
793 *      Note: The kernel time discipline is disabled with this option.
794 * -r broadcastdelay
795 *      Specify the default propagation delay from the broadcast/multicast
796 *      server to this client. This is necessary only if the delay
797 *      cannot be computed automatically by the protocol.
798 * -s statsdir
799 *      Specify the directory path for files created by the statistics
800 *      facility. This is the same operation as the "statsdir DIR"
801 *      configuration command.
802 * -t key
803 *      Add a key number to the trusted key list. This option can occur
804 *      more than once.
805 * -u user[:group]
806 *      Specify a user, and optionally a group, to switch to.
807 * -v variable
808 * -V variable
809 *      Add a system variable listed by default.
810 * -x   Normally, the time is slewed if the offset is less than the step
811 *      threshold, which is 128 ms by default, and stepped if above
812 *      the threshold. This option sets the threshold to 600 s, which is
813 *      well within the accuracy window to set the clock manually.
814 *      Note: since the slew rate of typical Unix kernels is limited
815 *      to 0.5 ms/s, each second of adjustment requires an amortization
816 *      interval of 2000 s. Thus, an adjustment as much as 600 s
817 *      will take almost 14 days to complete. This option can be used
818 *      with the -g and -q options. See the tinker command for other options.
819 *      Note: The kernel time discipline is disabled with this option.
820 */
821
822/* By doing init in a separate function we decrease stack usage
823 * in main loop.
824 */
825static NOINLINE void ntp_init(char **argv)
826{
827	unsigned opts;
828	llist_t *peers;
829
830	srandom(getpid());
831
832	if (getuid())
833		bb_error_msg_and_die(bb_msg_you_must_be_root);
834
835	peers = NULL;
836	opt_complementary = "dd:p::"; /* d: counter, p: list */
837	opts = getopt32(argv,
838			"nqNx" /* compat */
839			"p:"IF_FEATURE_NTPD_SERVER("l") /* NOT compat */
840			"d" /* compat */
841			"46aAbgL", /* compat, ignored */
842			&peers, &G.verbose);
843	if (!(opts & (OPT_p|OPT_l)))
844		bb_show_usage();
845	if (opts & OPT_x) /* disable stepping, only slew is allowed */
846		G.time_was_stepped = 1;
847	while (peers)
848		add_peers(llist_pop(&peers));
849	if (!(opts & OPT_n)) {
850		bb_daemonize_or_rexec(DAEMON_DEVNULL_STDIO, argv);
851		logmode = LOGMODE_NONE;
852	}
853#if ENABLE_FEATURE_NTPD_SERVER
854	G.listen_fd = -1;
855	if (opts & OPT_l) {
856		G.listen_fd = create_and_bind_dgram_or_die(NULL, 123);
857		socket_want_pktinfo(G.listen_fd);
858		setsockopt(G.listen_fd, IPPROTO_IP, IP_TOS, &const_IPTOS_LOWDELAY, sizeof(const_IPTOS_LOWDELAY));
859	}
860#endif
861	/* I hesitate to set -20 prio. -15 should be high enough for timekeeping */
862	if (opts & OPT_N)
863		setpriority(PRIO_PROCESS, 0, -15);
864
865	/* Set some globals */
866#if 0
867	/* With constant b = 100, G.precision_exp is also constant -6.
868	 * Uncomment this and you'll see */
869	{
870		int prec = 0;
871		int b;
872# if 0
873		struct timespec	tp;
874		/* We can use sys_clock_getres but assuming 10ms tick should be fine */
875		clock_getres(CLOCK_REALTIME, &tp);
876		tp.tv_sec = 0;
877		tp.tv_nsec = 10000000;
878		b = 1000000000 / tp.tv_nsec;  /* convert to Hz */
879# else
880		b = 100; /* b = 1000000000/10000000 = 100 */
881# endif
882		while (b > 1)
883			prec--, b >>= 1;
884		//G.precision_exp = prec;
885		bb_error_msg("G.precision_exp:%d", prec); /* -6 */
886	}
887#endif
888	G.scale = 1;
889
890	bb_signals((1 << SIGTERM) | (1 << SIGINT), record_signo);
891	bb_signals((1 << SIGPIPE) | (1 << SIGHUP), SIG_IGN);
892}
893
894int ntpd_main(int argc UNUSED_PARAM, char **argv) MAIN_EXTERNALLY_VISIBLE;
895int ntpd_main(int argc UNUSED_PARAM, char **argv)
896{
897	struct globals g;
898	struct pollfd *pfd;
899	peer_t **idx2peer;
900
901	memset(&g, 0, sizeof(g));
902	SET_PTR_TO_GLOBALS(&g);
903
904	ntp_init(argv);
905
906	{
907		/* if ENABLE_FEATURE_NTPD_SERVER, + 1 for listen_fd: */
908		unsigned cnt = g.peer_cnt + ENABLE_FEATURE_NTPD_SERVER;
909		idx2peer = xzalloc(sizeof(idx2peer[0]) * cnt);
910		pfd = xzalloc(sizeof(pfd[0]) * cnt);
911	}
912
913	while (!bb_got_signal) {
914		llist_t *item;
915		unsigned i, j;
916		unsigned sent_cnt, trial_cnt;
917		int nfds, timeout;
918		time_t cur_time, nextaction;
919
920		/* Nothing between here and poll() blocks for any significant time */
921
922		cur_time = time(NULL);
923		nextaction = cur_time + 3600;
924
925		i = 0;
926#if ENABLE_FEATURE_NTPD_SERVER
927		if (g.listen_fd != -1) {
928			pfd[0].fd = g.listen_fd;
929			pfd[0].events = POLLIN;
930			i++;
931		}
932#endif
933		/* Pass over peer list, send requests, time out on receives */
934		sent_cnt = trial_cnt = 0;
935		for (item = g.ntp_peers; item != NULL; item = item->link) {
936			peer_t *p = (peer_t *) item->data;
937
938			/* Overflow-safe "if (p->next_action_time <= cur_time) ..." */
939			if ((int)(cur_time - p->next_action_time) >= 0) {
940				if (p->p_fd == -1) {
941					/* Time to send new req */
942					trial_cnt++;
943					if (send_query_to_peer(p) == 0)
944						sent_cnt++;
945				} else {
946					/* Timed out waiting for reply */
947					close(p->p_fd);
948					p->p_fd = -1;
949					timeout = error_interval();
950					bb_error_msg("timed out waiting for %s, "
951							"next query in %us", p->p_dotted, timeout);
952					if (p->p_trustlevel >= TRUSTLEVEL_BADPEER) {
953						p->p_trustlevel /= 2;
954						if (p->p_trustlevel < TRUSTLEVEL_BADPEER)
955							bb_error_msg("peer %s now invalid", p->p_dotted);
956					}
957					set_next(p, timeout);
958				}
959			}
960
961			if (p->next_action_time < nextaction)
962				nextaction = p->next_action_time;
963
964			if (p->p_fd >= 0) {
965				/* Wait for reply from this peer */
966				pfd[i].fd = p->p_fd;
967				pfd[i].events = POLLIN;
968				idx2peer[i] = p;
969				i++;
970			}
971		}
972
973		if ((trial_cnt > 0 && sent_cnt == 0) || g.peer_cnt == 0)
974			step_time_once(0); /* no good peers, don't wait */
975
976		timeout = nextaction - cur_time;
977		if (timeout < 1)
978			timeout = 1;
979
980		/* Here we may block */
981		if (g.verbose >= 2)
982			bb_error_msg("poll %us, sockets:%u", timeout, i);
983		nfds = poll(pfd, i, timeout * 1000);
984		if (nfds <= 0)
985			continue;
986
987		/* Process any received packets */
988		j = 0;
989#if ENABLE_FEATURE_NTPD_SERVER
990		if (g.listen_fd != -1) {
991			if (pfd[0].revents /* & (POLLIN|POLLERR)*/) {
992				nfds--;
993				recv_and_process_client_pkt(/*g.listen_fd*/);
994			}
995			j = 1;
996		}
997#endif
998		for (; nfds != 0 && j < i; j++) {
999			if (pfd[j].revents /* & (POLLIN|POLLERR)*/) {
1000				nfds--;
1001				recv_and_process_peer_pkt(idx2peer[j]);
1002			}
1003		}
1004	} /* while (!bb_got_signal) */
1005
1006	kill_myself_with_sig(bb_got_signal);
1007}
1008