1/*
2 * ntp_io.c - input/output routines for ntpd.	The socket-opening code
3 *		   was shamelessly stolen from ntpd.
4 */
5
6#ifdef HAVE_CONFIG_H
7# include <config.h>
8#endif
9
10#include <stdio.h>
11#include <signal.h>
12#ifdef HAVE_FNMATCH_H
13# include <fnmatch.h>
14# if !defined(FNM_CASEFOLD) && defined(FNM_IGNORECASE)
15#  define FNM_CASEFOLD FNM_IGNORECASE
16# endif
17#endif
18#ifdef HAVE_SYS_PARAM_H
19# include <sys/param.h>
20#endif
21#ifdef HAVE_SYS_IOCTL_H
22# include <sys/ioctl.h>
23#endif
24#ifdef HAVE_SYS_SOCKIO_H	/* UXPV: SIOC* #defines (Frank Vance <fvance@waii.com>) */
25# include <sys/sockio.h>
26#endif
27#ifdef HAVE_SYS_UIO_H
28# include <sys/uio.h>
29#endif
30
31#include "ntp_machine.h"
32#include "ntpd.h"
33#include "ntp_io.h"
34#include "iosignal.h"
35#include "ntp_lists.h"
36#include "ntp_refclock.h"
37#include "ntp_stdlib.h"
38#include "ntp_worker.h"
39#include "ntp_request.h"
40#include "ntp_assert.h"
41#include "timevalops.h"
42#include "timespecops.h"
43#include "ntpd-opts.h"
44#include "safecast.h"
45
46/* Don't include ISC's version of IPv6 variables and structures */
47#define ISC_IPV6_H 1
48#include <isc/mem.h>
49#include <isc/interfaceiter.h>
50#include <isc/netaddr.h>
51#include <isc/result.h>
52#include <isc/sockaddr.h>
53
54#ifdef SIM
55#include "ntpsim.h"
56#endif
57
58#ifdef HAS_ROUTING_SOCKET
59# include <net/route.h>
60# ifdef HAVE_RTNETLINK
61#  include <linux/rtnetlink.h>
62# endif
63#endif
64
65/*
66 * setsockopt does not always have the same arg declaration
67 * across all platforms. If it's not defined we make it empty
68 */
69
70#ifndef SETSOCKOPT_ARG_CAST
71#define SETSOCKOPT_ARG_CAST
72#endif
73
74extern int listen_to_virtual_ips;
75
76#ifndef IPTOS_DSCP_EF
77#define IPTOS_DSCP_EF 0xb8
78#endif
79int qos = IPTOS_DSCP_EF;	/* QoS RFC3246 */
80
81#ifdef LEAP_SMEAR
82/* TODO burnicki: This should be moved to ntp_timer.c, but if we do so
83 * we get a linker error. Since we're running out of time before the leap
84 * second occurs, we let it here where it just works.
85 */
86int leap_smear_intv;
87#endif
88
89/*
90 * NIC rule entry
91 */
92typedef struct nic_rule_tag nic_rule;
93
94struct nic_rule_tag {
95	nic_rule *	next;
96	nic_rule_action	action;
97	nic_rule_match	match_type;
98	char *		if_name;
99	sockaddr_u	addr;
100	int		prefixlen;
101};
102
103/*
104 * NIC rule listhead.  Entries are added at the head so that the first
105 * match in the list is the last matching rule specified.
106 */
107nic_rule *nic_rule_list;
108
109
110#if defined(SO_BINTIME) && defined(SCM_BINTIME) && defined(CMSG_FIRSTHDR)
111#  define HAVE_PACKET_TIMESTAMP
112#  define HAVE_BINTIME
113#  ifdef BINTIME_CTLMSGBUF_SIZE
114#   define CMSG_BUFSIZE BINTIME_CTLMSGBUF_SIZE
115#  else
116#   define CMSG_BUFSIZE  1536 /* moderate default */
117#  endif
118#elif defined(SO_TIMESTAMPNS) && defined(SCM_TIMESTAMPNS) && defined(CMSG_FIRSTHDR)
119#  define HAVE_PACKET_TIMESTAMP
120#  define HAVE_TIMESTAMPNS
121#  ifdef TIMESTAMPNS_CTLMSGBUF_SIZE
122#   define CMSG_BUFSIZE TIMESTAMPNS_CTLMSGBUF_SIZE
123#  else
124#   define CMSG_BUFSIZE  1536 /* moderate default */
125#  endif
126#elif defined(SO_TIMESTAMP) && defined(SCM_TIMESTAMP) && defined(CMSG_FIRSTHDR)
127#  define HAVE_PACKET_TIMESTAMP
128#  define HAVE_TIMESTAMP
129#  ifdef TIMESTAMP_CTLMSGBUF_SIZE
130#   define CMSG_BUFSIZE TIMESTAMP_CTLMSGBUF_SIZE
131#  else
132#   define CMSG_BUFSIZE  1536 /* moderate default */
133#  endif
134#else
135/* fill in for old/other timestamp interfaces */
136#endif
137
138#if defined(SYS_WINNT)
139#include "win32_io.h"
140#include <isc/win32os.h>
141#endif
142
143/*
144 * We do asynchronous input using the SIGIO facility.  A number of
145 * recvbuf buffers are preallocated for input.	In the signal
146 * handler we poll to see which sockets are ready and read the
147 * packets from them into the recvbuf's along with a time stamp and
148 * an indication of the source host and the interface it was received
149 * through.  This allows us to get as accurate receive time stamps
150 * as possible independent of other processing going on.
151 *
152 * We watch the number of recvbufs available to the signal handler
153 * and allocate more when this number drops below the low water
154 * mark.  If the signal handler should run out of buffers in the
155 * interim it will drop incoming frames, the idea being that it is
156 * better to drop a packet than to be inaccurate.
157 */
158
159
160/*
161 * Other statistics of possible interest
162 */
163volatile u_long packets_dropped;	/* total number of packets dropped on reception */
164volatile u_long packets_ignored;	/* packets received on wild card interface */
165volatile u_long packets_received;	/* total number of packets received */
166	 u_long packets_sent;		/* total number of packets sent */
167	 u_long packets_notsent;	/* total number of packets which couldn't be sent */
168
169volatile u_long handler_calls;	/* number of calls to interrupt handler */
170volatile u_long handler_pkts;	/* number of pkts received by handler */
171u_long io_timereset;		/* time counters were reset */
172
173/*
174 * Interface stuff
175 */
176endpt *	any_interface;		/* wildcard ipv4 interface */
177endpt *	any6_interface;		/* wildcard ipv6 interface */
178endpt *	loopback_interface;	/* loopback ipv4 interface */
179
180static isc_boolean_t broadcast_client_enabled;
181u_int sys_ifnum;			/* next .ifnum to assign */
182int ninterfaces;			/* Total number of interfaces */
183
184int no_periodic_scan;		/* network endpoint scans */
185int scan_addrs_once;		/* because dropped privs */
186int nonlocal_v4_addr_up;	/* should we try IPv4 pool? */
187int nonlocal_v6_addr_up;	/* should we try IPv6 pool? */
188
189#ifdef REFCLOCK
190/*
191 * Refclock stuff.	We keep a chain of structures with data concerning
192 * the guys we are doing I/O for.
193 */
194static	struct refclockio *refio;
195#endif /* REFCLOCK */
196
197/*
198 * File descriptor masks etc. for call to select
199 * Not needed for I/O Completion Ports or anything outside this file
200 */
201static fd_set activefds;
202static int maxactivefd;
203
204/*
205 * bit alternating value to detect verified interfaces during an update cycle
206 */
207static  u_short		sys_interphase = 0;
208
209static endpt *	new_interface(endpt *);
210static void	add_interface(endpt *);
211static int	update_interfaces(u_short, interface_receiver_t,
212				  void *);
213static void	remove_interface(endpt *);
214static endpt *	create_interface(u_short, endpt *);
215
216static inline int is_wildcard_addr(const sockaddr_u *psau);
217
218/*
219 * Multicast functions
220 */
221static	isc_boolean_t	addr_ismulticast	(sockaddr_u *);
222static	isc_boolean_t	is_anycast		(sockaddr_u *,
223						 const char *);
224
225/*
226 * Not all platforms support multicast
227 */
228#ifdef MCAST
229static	isc_boolean_t	socket_multicast_enable	(endpt *, sockaddr_u *);
230static	isc_boolean_t	socket_multicast_disable(endpt *, sockaddr_u *);
231#endif
232
233#ifdef DEBUG
234static void interface_dump	(const endpt *);
235static void print_interface	(const endpt *, const char *, const char *);
236#define DPRINT_INTERFACE(level, args) do { if (debug >= (level)) { print_interface args; } } while (0)
237#else
238#define DPRINT_INTERFACE(level, args) do {} while (0)
239#endif
240
241typedef struct vsock vsock_t;
242enum desc_type { FD_TYPE_SOCKET, FD_TYPE_FILE };
243
244struct vsock {
245	vsock_t	*	link;
246	SOCKET		fd;
247	enum desc_type	type;
248};
249
250vsock_t	*fd_list;
251
252#if !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET)
253/*
254 * async notification processing (e. g. routing sockets)
255 */
256/*
257 * support for receiving data on fd that is not a refclock or a socket
258 * like e. g. routing sockets
259 */
260struct asyncio_reader {
261	struct asyncio_reader *link;		    /* the list this is being kept in */
262	SOCKET fd;				    /* fd to be read */
263	void  *data;				    /* possibly local data */
264	void (*receiver)(struct asyncio_reader *);  /* input handler */
265};
266
267struct asyncio_reader *asyncio_reader_list;
268
269static void delete_asyncio_reader (struct asyncio_reader *);
270static struct asyncio_reader *new_asyncio_reader (void);
271static void add_asyncio_reader (struct asyncio_reader *, enum desc_type);
272static void remove_asyncio_reader (struct asyncio_reader *);
273
274#endif /* !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET) */
275
276static void init_async_notifications (void);
277
278static	int	addr_eqprefix	(const sockaddr_u *, const sockaddr_u *,
279				 int);
280static int	addr_samesubnet	(const sockaddr_u *, const sockaddr_u *,
281				 const sockaddr_u *, const sockaddr_u *);
282static	int	create_sockets	(u_short);
283static	SOCKET	open_socket	(sockaddr_u *, int, int, endpt *);
284static	void	set_reuseaddr	(int);
285static	isc_boolean_t	socket_broadcast_enable	 (endpt *, SOCKET, sockaddr_u *);
286
287#if !defined(HAVE_IO_COMPLETION_PORT) && !defined(HAVE_SIGNALED_IO)
288static	char *	fdbits		(int, const fd_set *);
289#endif
290#ifdef  OS_MISSES_SPECIFIC_ROUTE_UPDATES
291static	isc_boolean_t	socket_broadcast_disable (endpt *, sockaddr_u *);
292#endif
293
294typedef struct remaddr remaddr_t;
295
296struct remaddr {
297	remaddr_t *		link;
298	sockaddr_u		addr;
299	endpt *			ep;
300};
301
302remaddr_t *	remoteaddr_list;
303endpt *		ep_list;	/* complete endpt list */
304endpt *		mc4_list;	/* IPv4 mcast-capable unicast endpts */
305endpt *		mc6_list;	/* IPv6 mcast-capable unicast endpts */
306
307static endpt *	wildipv4;
308static endpt *	wildipv6;
309
310#define		RFC3927_ADDR	0xa9fe0000	/* 169.254. */
311#define		RFC3927_MASK	0xffff0000
312#define		IS_AUTOCONF(addr4)					\
313		((SRCADR(addr4) & RFC3927_MASK) == RFC3927_ADDR)
314
315#ifdef SYS_WINNT
316int accept_wildcard_if_for_winnt;
317#else
318const int accept_wildcard_if_for_winnt = FALSE;
319#define		init_io_completion_port()	do {} while (FALSE)
320#endif
321
322static void	add_fd_to_list		(SOCKET, enum desc_type);
323static endpt *	find_addr_in_list	(sockaddr_u *);
324static endpt *	find_flagged_addr_in_list(sockaddr_u *, u_int32);
325static void	delete_addr_from_list	(sockaddr_u *);
326static void	delete_interface_from_list(endpt *);
327static void	close_and_delete_fd_from_list(SOCKET, endpt *);
328static void	add_addr_to_list	(sockaddr_u *, endpt *);
329static void	create_wildcards	(u_short);
330static endpt *	findlocalinterface	(sockaddr_u *, int, int);
331static endpt *	findclosestinterface	(sockaddr_u *, int);
332#ifdef DEBUG
333static const char *	action_text	(nic_rule_action);
334#endif
335static nic_rule_action	interface_action(char *, sockaddr_u *, u_int32);
336static void		convert_isc_if	(isc_interface_t *,
337					 endpt *, u_short);
338static void		calc_addr_distance(sockaddr_u *,
339					   const sockaddr_u *,
340					   const sockaddr_u *);
341static int		cmp_addr_distance(const sockaddr_u *,
342					  const sockaddr_u *);
343
344/*
345 * Routines to read the ntp packets
346 */
347#if !defined(HAVE_IO_COMPLETION_PORT)
348static inline int	read_network_packet	(SOCKET, endpt *, l_fp);
349static void		ntpd_addremove_io_fd	(int, int, int);
350static void 		input_handler_scan	(const l_fp*, const fd_set*);
351static int/*BOOL*/	sanitize_fdset		(int errc);
352#ifdef REFCLOCK
353static inline int	read_refclock_packet	(SOCKET, struct refclockio *, l_fp);
354#endif
355#ifdef HAVE_SIGNALED_IO
356static void 		input_handler		(l_fp*);
357#endif
358#endif
359
360
361#ifndef HAVE_IO_COMPLETION_PORT
362void
363maintain_activefds(
364	int fd,
365	int closing
366	)
367{
368	int i;
369
370	if (fd < 0 || fd >= FD_SETSIZE) {
371		msyslog(LOG_ERR,
372			"Too many sockets in use, FD_SETSIZE %d exceeded by fd %d",
373			FD_SETSIZE, fd);
374		exit(1);
375	}
376
377	if (!closing) {
378		FD_SET(fd, &activefds);
379		maxactivefd = max(fd, maxactivefd);
380	} else {
381		FD_CLR(fd, &activefds);
382		if (maxactivefd && fd == maxactivefd) {
383			for (i = maxactivefd - 1; i >= 0; i--)
384				if (FD_ISSET(i, &activefds)) {
385					maxactivefd = i;
386					break;
387				}
388			INSIST(fd != maxactivefd);
389		}
390	}
391}
392#endif	/* !HAVE_IO_COMPLETION_PORT */
393
394
395#ifdef DEBUG_TIMING
396/*
397 * collect timing information for various processing
398 * paths. currently we only pass them on to the file
399 * for later processing. this could also do histogram
400 * based analysis in other to reduce the load (and skew)
401 * dur to the file output
402 */
403void
404collect_timing(struct recvbuf *rb, const char *tag, int count, l_fp *dts)
405{
406	char buf[256];
407
408	snprintf(buf, sizeof(buf), "%s %d %s %s",
409		 (rb != NULL)
410		     ? ((rb->dstadr != NULL)
411			    ? stoa(&rb->recv_srcadr)
412			    : "-REFCLOCK-")
413		     : "-",
414		 count, lfptoa(dts, 9), tag);
415	record_timing_stats(buf);
416}
417#endif
418
419/*
420 * About dynamic interfaces, sockets, reception and more...
421 *
422 * the code solves following tasks:
423 *
424 *   - keep a current list of active interfaces in order
425 *     to bind to to the interface address on NTP_PORT so that
426 *     all wild and specific bindings for NTP_PORT are taken by ntpd
427 *     to avoid other daemons messing with the time or sockets.
428 *   - all interfaces keep a list of peers that are referencing
429 *     the interface in order to quickly re-assign the peers to
430 *     new interface in case an interface is deleted (=> gone from system or
431 *     down)
432 *   - have a preconfigured socket ready with the right local address
433 *     for transmission and reception
434 *   - have an address list for all destination addresses used within ntpd
435 *     to find the "right" preconfigured socket.
436 *   - facilitate updating the internal interface list with respect to
437 *     the current kernel state
438 *
439 * special issues:
440 *
441 *   - mapping of multicast addresses to the interface affected is not always
442 *     one to one - especially on hosts with multiple interfaces
443 *     the code here currently allocates a separate interface entry for those
444 *     multicast addresses
445 *     iff it is able to bind to a *new* socket with the multicast address (flags |= MCASTIF)
446 *     in case of failure the multicast address is bound to an existing interface.
447 *   - on some systems it is perfectly legal to assign the same address to
448 *     multiple interfaces. Therefore this code does not keep a list of interfaces
449 *     but a list of interfaces that represent a unique address as determined by the kernel
450 *     by the procedure in findlocalinterface. Thus it is perfectly legal to see only
451 *     one representative of a group of real interfaces if they share the same address.
452 *
453 * Frank Kardel 20050910
454 */
455
456/*
457 * init_io - initialize I/O module.
458 */
459void
460init_io(void)
461{
462	/* Init buffer free list and stat counters */
463	init_recvbuff(RECV_INIT);
464	/* update interface every 5 minutes as default */
465	endpt_scan_period = 301;
466
467#ifdef WORK_PIPE
468	addremove_io_fd = &ntpd_addremove_io_fd;
469#endif
470
471	init_io_completion_port();
472#if defined(HAVE_SIGNALED_IO)
473	(void) set_signal(input_handler);
474#endif
475}
476
477
478static void
479ntpd_addremove_io_fd(
480	int	fd,
481	int	is_pipe,
482	int	remove_it
483	)
484{
485	UNUSED_ARG(is_pipe);
486
487#ifdef HAVE_SIGNALED_IO
488	if (!remove_it)
489		init_socket_sig(fd);
490#endif /* not HAVE_SIGNALED_IO */
491
492	maintain_activefds(fd, remove_it);
493}
494
495
496/*
497 * io_open_sockets - call socket creation routine
498 */
499void
500io_open_sockets(void)
501{
502	static int already_opened;
503
504	if (already_opened || HAVE_OPT( SAVECONFIGQUIT ))
505		return;
506
507	already_opened = 1;
508
509	/*
510	 * Create the sockets
511	 */
512	BLOCKIO();
513	create_sockets(NTP_PORT);
514	UNBLOCKIO();
515
516	init_async_notifications();
517
518	DPRINTF(3, ("io_open_sockets: maxactivefd %d\n", maxactivefd));
519}
520
521
522#ifdef DEBUG
523/*
524 * function to dump the contents of the interface structure
525 * for debugging use only.
526 * We face a dilemma here -- sockets are FDs under POSIX and
527 * actually HANDLES under Windows. So we use '%lld' as format
528 * and cast the value to 'long long'; this should not hurt
529 * with UNIX-like systems and does not truncate values on Win64.
530 */
531void
532interface_dump(const endpt *itf)
533{
534	printf("Dumping interface: %p\n", itf);
535	printf("fd = %lld\n", (long long)itf->fd);
536	printf("bfd = %lld\n", (long long)itf->bfd);
537	printf("sin = %s,\n", stoa(&itf->sin));
538	printf("bcast = %s,\n", stoa(&itf->bcast));
539	printf("mask = %s,\n", stoa(&itf->mask));
540	printf("name = %s\n", itf->name);
541	printf("flags = 0x%08x\n", itf->flags);
542	printf("last_ttl = %d\n", itf->last_ttl);
543	printf("addr_refid = %08x\n", itf->addr_refid);
544	printf("num_mcast = %d\n", itf->num_mcast);
545	printf("received = %ld\n", itf->received);
546	printf("sent = %ld\n", itf->sent);
547	printf("notsent = %ld\n", itf->notsent);
548	printf("ifindex = %u\n", itf->ifindex);
549	printf("peercnt = %u\n", itf->peercnt);
550	printf("phase = %u\n", itf->phase);
551}
552
553
554/*
555 * print_interface - helper to output debug information
556 */
557static void
558print_interface(const endpt *iface, const char *pfx, const char *sfx)
559{
560	printf("%sinterface #%d: fd=%lld, bfd=%lld, name=%s, flags=0x%x, ifindex=%u, sin=%s",
561	       pfx,
562	       iface->ifnum,
563	       (long long)iface->fd,
564	       (long long)iface->bfd,
565	       iface->name,
566	       iface->flags,
567	       iface->ifindex,
568	       stoa(&iface->sin));
569	if (AF_INET == iface->family) {
570		if (iface->flags & INT_BROADCAST)
571			printf(", bcast=%s", stoa(&iface->bcast));
572		printf(", mask=%s", stoa(&iface->mask));
573	}
574	printf(", %s:%s",
575	       (iface->ignore_packets)
576		   ? "Disabled"
577		   : "Enabled",
578	       sfx);
579	if (debug > 4)	/* in-depth debugging only */
580		interface_dump(iface);
581}
582#endif
583
584#if !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET)
585/*
586 * create an asyncio_reader structure
587 */
588static struct asyncio_reader *
589new_asyncio_reader(void)
590{
591	struct asyncio_reader *reader;
592
593	reader = emalloc_zero(sizeof(*reader));
594	reader->fd = INVALID_SOCKET;
595
596	return reader;
597}
598
599/*
600 * delete a reader
601 */
602static void
603delete_asyncio_reader(
604	struct asyncio_reader *reader
605	)
606{
607	free(reader);
608}
609
610/*
611 * add asynchio_reader
612 */
613static void
614add_asyncio_reader(
615	struct asyncio_reader *	reader,
616	enum desc_type		type)
617{
618	LINK_SLIST(asyncio_reader_list, reader, link);
619	add_fd_to_list(reader->fd, type);
620}
621
622/*
623 * remove asyncio_reader
624 */
625static void
626remove_asyncio_reader(
627	struct asyncio_reader *reader
628	)
629{
630	struct asyncio_reader *unlinked;
631
632	UNLINK_SLIST(unlinked, asyncio_reader_list, reader, link,
633	    struct asyncio_reader);
634
635	if (reader->fd != INVALID_SOCKET) {
636		close_and_delete_fd_from_list(reader->fd, NULL);
637	}
638	reader->fd = INVALID_SOCKET;
639}
640#endif /* !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET) */
641
642
643/* compare two sockaddr prefixes */
644static int
645addr_eqprefix(
646	const sockaddr_u *	a,
647	const sockaddr_u *	b,
648	int			prefixlen
649	)
650{
651	isc_netaddr_t		isc_a;
652	isc_netaddr_t		isc_b;
653	isc_sockaddr_t		isc_sa;
654
655	ZERO(isc_sa);
656	memcpy(&isc_sa.type, a, min(sizeof(isc_sa.type), sizeof(*a)));
657	isc_netaddr_fromsockaddr(&isc_a, &isc_sa);
658
659	ZERO(isc_sa);
660	memcpy(&isc_sa.type, b, min(sizeof(isc_sa.type), sizeof(*b)));
661	isc_netaddr_fromsockaddr(&isc_b, &isc_sa);
662
663	return (int)isc_netaddr_eqprefix(&isc_a, &isc_b,
664					 (u_int)prefixlen);
665}
666
667
668static int
669addr_samesubnet(
670	const sockaddr_u *	a,
671	const sockaddr_u *	a_mask,
672	const sockaddr_u *	b,
673	const sockaddr_u *	b_mask
674	)
675{
676	const u_int32 *	pa;
677	const u_int32 *	pa_limit;
678	const u_int32 *	pb;
679	const u_int32 *	pm;
680	size_t		loops;
681
682	REQUIRE(AF(a) == AF(a_mask));
683	REQUIRE(AF(b) == AF(b_mask));
684	/*
685	 * With address and mask families verified to match, comparing
686	 * the masks also validates the address's families match.
687	 */
688	if (!SOCK_EQ(a_mask, b_mask))
689		return FALSE;
690
691	if (IS_IPV6(a)) {
692		loops = sizeof(NSRCADR6(a)) / sizeof(*pa);
693		pa = (const void *)&NSRCADR6(a);
694		pb = (const void *)&NSRCADR6(b);
695		pm = (const void *)&NSRCADR6(a_mask);
696	} else {
697		loops = sizeof(NSRCADR(a)) / sizeof(*pa);
698		pa = (const void *)&NSRCADR(a);
699		pb = (const void *)&NSRCADR(b);
700		pm = (const void *)&NSRCADR(a_mask);
701	}
702	for (pa_limit = pa + loops; pa < pa_limit; pa++, pb++, pm++)
703		if ((*pa & *pm) != (*pb & *pm))
704			return FALSE;
705
706	return TRUE;
707}
708
709
710/*
711 * interface list enumerator - visitor pattern
712 */
713void
714interface_enumerate(
715	interface_receiver_t	receiver,
716	void *			data
717	)
718{
719	interface_info_t ifi;
720
721	ifi.action = IFS_EXISTS;
722	for (ifi.ep = ep_list; ifi.ep != NULL; ifi.ep = ifi.ep->elink)
723		(*receiver)(data, &ifi);
724}
725
726/*
727 * do standard initialization of interface structure
728 */
729static inline void
730init_interface(
731	endpt *ep
732	)
733{
734	ZERO(*ep);
735	ep->fd = INVALID_SOCKET;
736	ep->bfd = INVALID_SOCKET;
737	ep->phase = sys_interphase;
738}
739
740
741/*
742 * create new interface structure initialize from
743 * template structure or via standard initialization
744 * function
745 */
746static endpt *
747new_interface(
748	endpt *protot
749	)
750{
751	endpt *	iface;
752
753	iface = emalloc(sizeof(*iface));
754	if (NULL == protot) {
755		ZERO(*iface);
756	} else {
757		memcpy(iface, protot, sizeof(*iface));
758	}
759	/* count every new instance of an interface in the system */
760	iface->ifnum = sys_ifnum++;
761	iface->starttime = current_time;
762
763#   ifdef HAVE_IO_COMPLETION_PORT
764	if (!io_completion_port_add_interface(iface)) {
765		msyslog(LOG_EMERG, "cannot register interface with IO engine -- will exit now");
766		exit(1);
767	}
768#   endif
769	return iface;
770}
771
772
773/*
774 * return interface storage into free memory pool
775 */
776static void
777delete_interface(
778	endpt *ep
779	)
780{
781#    ifdef HAVE_IO_COMPLETION_PORT
782	io_completion_port_remove_interface(ep);
783#    endif
784	free(ep);
785}
786
787
788/*
789 * link interface into list of known interfaces
790 */
791static void
792add_interface(
793	endpt *	ep
794	)
795{
796	endpt **	pmclisthead;
797	endpt *		scan;
798	endpt *		scan_next;
799	int		same_subnet;
800	int		rc;
801
802	/* Calculate the refid */
803	ep->addr_refid = addr2refid(&ep->sin);
804#    ifdef WORDS_BIGENDIAN
805	if (IS_IPV6(&ep->sin)) {
806		ep->old_refid = BYTESWAP32(ep->addr_refid);
807	}
808#    endif
809	/* link at tail so ntpdc -c ifstats index increases each row */
810	LINK_TAIL_SLIST(ep_list, ep, elink, endpt);
811	ninterfaces++;
812#ifdef MCAST
813	/* the rest is for enabled multicast-capable addresses only */
814	if (ep->ignore_packets || !(INT_MULTICAST & ep->flags) ||
815	    INT_LOOPBACK & ep->flags)
816		return;
817# ifndef INCLUDE_IPV6_MULTICAST_SUPPORT
818	if (AF_INET6 == ep->family)
819		return;
820# endif
821	pmclisthead = (AF_INET == ep->family)
822			 ? &mc4_list
823			 : &mc6_list;
824
825	/*
826	 * If we have multiple global addresses from the same prefix
827	 * on the same network interface, multicast from one.
828	 */
829	for (scan = *pmclisthead; scan != NULL; scan = scan_next) {
830		scan_next = scan->mclink;
831		if (   ep->family != scan->family
832		    || ep->ifindex != scan->ifindex) {
833			continue;
834		}
835		same_subnet = addr_samesubnet(&ep->sin, &ep->mask,
836					      &scan->sin, &scan->mask);
837		if (same_subnet) {
838			DPRINTF(4, ("did not add %s to multicast-capable list"
839				    "which already has %s\n",
840				    stoa(&ep->sin), stoa(&scan->sin)));
841			return;
842		}
843	}
844	LINK_SLIST(*pmclisthead, ep, mclink);
845	if (INVALID_SOCKET == ep->fd)
846		return;
847
848	/*
849	 * select the local address from which to send to multicast.
850	 */
851	switch (AF(&ep->sin)) {
852
853	case AF_INET :
854		rc = setsockopt(ep->fd, IPPROTO_IP,
855				IP_MULTICAST_IF,
856				(void *)&NSRCADR(&ep->sin),
857				sizeof(NSRCADR(&ep->sin)));
858		if (rc)
859			msyslog(LOG_ERR,
860				"setsockopt IP_MULTICAST_IF %s fails: %m",
861				stoa(&ep->sin));
862		break;
863
864# ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
865	case AF_INET6 :
866		rc = setsockopt(ep->fd, IPPROTO_IPV6,
867				 IPV6_MULTICAST_IF,
868				 (void *)&ep->ifindex,
869				 sizeof(ep->ifindex));
870		/* do not complain if bound addr scope is ifindex */
871		if (rc && ep->ifindex != SCOPE(&ep->sin))
872			msyslog(LOG_ERR,
873				"setsockopt IPV6_MULTICAST_IF %u for %s fails: %m",
874				ep->ifindex, stoa(&ep->sin));
875		break;
876# endif
877	}
878#endif	/* MCAST */
879}
880
881
882/*
883 * remove interface from known interface list and clean up
884 * associated resources
885 */
886static void
887remove_interface(
888	endpt *	ep
889	)
890{
891	endpt *		unlinked;
892	endpt **	pmclisthead;
893	sockaddr_u	resmask;
894	int/*BOOL*/	success;
895
896	UNLINK_SLIST(unlinked, ep_list, ep, elink, endpt);
897	if (!ep->ignore_packets && INT_MULTICAST & ep->flags) {
898		pmclisthead = (AF_INET == ep->family)
899				 ? &mc4_list
900				 : &mc6_list;
901		UNLINK_SLIST(unlinked, *pmclisthead, ep, mclink, endpt);
902		DPRINTF(4, ("%s %s IPv%s multicast-capable unicast local address list\n",
903			stoa(&ep->sin),
904			(unlinked != NULL)
905			    ? "removed from"
906			    : "not found on",
907			(AF_INET == ep->family)
908			    ? "4"
909			    : "6"));
910	}
911	delete_interface_from_list(ep);
912
913	if (ep->fd != INVALID_SOCKET) {
914		msyslog(LOG_INFO,
915			"Deleting %d %s, [%s]:%hd, stats:"
916			" received=%ld, sent=%ld, dropped=%ld,"
917			" active_time=%ld secs",
918			ep->ifnum,
919			ep->name,
920			stoa(&ep->sin),
921			SRCPORT(&ep->sin),
922			ep->received,
923			ep->sent,
924			ep->notsent,
925			current_time - ep->starttime);
926		close_and_delete_fd_from_list(ep->fd, ep);
927		ep->fd = INVALID_SOCKET;
928	}
929
930	if (ep->bfd != INVALID_SOCKET) {
931		msyslog(LOG_INFO,
932			"stop listening for broadcasts to %s on interface #%d %s",
933			stoa(&ep->bcast), ep->ifnum, ep->name);
934		close_and_delete_fd_from_list(ep->bfd, ep);
935		ep->bfd = INVALID_SOCKET;
936	}
937#   ifdef HAVE_IO_COMPLETION_PORT
938	io_completion_port_remove_interface(ep);
939#   endif
940
941	ninterfaces--;
942	mon_clearinterface(ep);
943
944	/* remove restrict interface entry */
945	SET_HOSTMASK(&resmask, AF(&ep->sin));
946	success = hack_restrict(RESTRICT_REMOVEIF, &ep->sin, &resmask, 0,
947				RESM_NTPONLY | RESM_INTERFACE, 0, 0);
948	if (!success) {
949		msyslog(LOG_ERR,
950			"unable to remove self-restriction for %s",
951			stoa(&ep->sin));
952	}
953
954}
955
956
957static void
958log_listen_address(
959	endpt *	ep
960	)
961{
962	msyslog(LOG_INFO, "%s on %d %s %s",
963		(ep->ignore_packets)
964		    ? "Listen and drop"
965		    : "Listen normally",
966		ep->ifnum,
967		ep->name,
968		sptoa(&ep->sin));
969}
970
971
972static void
973create_wildcards(
974	u_short	port
975	)
976{
977	int			v4wild;
978#ifdef INCLUDE_IPV6_SUPPORT
979	int			v6wild;
980#endif
981	sockaddr_u		wildaddr;
982	nic_rule_action		action;
983	endpt *			wildif;
984
985	/*
986	 * silence "potentially uninitialized" warnings from VC9
987	 * failing to follow the logic.  Ideally action could remain
988	 * uninitialized, and the memset be the first statement under
989	 * the first if (v4wild).
990	 */
991	action = ACTION_LISTEN;
992	ZERO(wildaddr);
993
994#ifdef INCLUDE_IPV6_SUPPORT
995	/*
996	 * create pseudo-interface with wildcard IPv6 address
997	 */
998	v6wild = ipv6_works;
999	if (v6wild) {
1000		/* set wildaddr to the v6 wildcard address :: */
1001		ZERO(wildaddr);
1002		AF(&wildaddr) = AF_INET6;
1003		SET_ADDR6N(&wildaddr, in6addr_any);
1004		SET_PORT(&wildaddr, port);
1005		SET_SCOPE(&wildaddr, 0);
1006
1007		/* check for interface/nic rules affecting the wildcard */
1008		action = interface_action(NULL, &wildaddr, 0);
1009		v6wild = (ACTION_IGNORE != action);
1010	}
1011	if (v6wild) {
1012		wildif = new_interface(NULL);
1013
1014		strlcpy(wildif->name, "v6wildcard", sizeof(wildif->name));
1015		memcpy(&wildif->sin, &wildaddr, sizeof(wildif->sin));
1016		wildif->family = AF_INET6;
1017		AF(&wildif->mask) = AF_INET6;
1018		SET_ONESMASK(&wildif->mask);
1019
1020		wildif->flags = INT_UP | INT_WILDCARD;
1021		wildif->ignore_packets = (ACTION_DROP == action);
1022
1023		wildif->fd = open_socket(&wildif->sin, 0, 1, wildif);
1024
1025		if (wildif->fd != INVALID_SOCKET) {
1026			wildipv6 = wildif;
1027			any6_interface = wildif;
1028			add_addr_to_list(&wildif->sin, wildif);
1029			add_interface(wildif);
1030			log_listen_address(wildif);
1031		} else {
1032			msyslog(LOG_ERR,
1033				"unable to bind to wildcard address %s - another process may be running - EXITING",
1034				stoa(&wildif->sin));
1035			exit(1);
1036		}
1037		DPRINT_INTERFACE(2, (wildif, "created ", "\n"));
1038	}
1039#endif
1040
1041	/*
1042	 * create pseudo-interface with wildcard IPv4 address
1043	 */
1044	v4wild = ipv4_works;
1045	if (v4wild) {
1046		/* set wildaddr to the v4 wildcard address 0.0.0.0 */
1047		AF(&wildaddr) = AF_INET;
1048		SET_ADDR4N(&wildaddr, INADDR_ANY);
1049		SET_PORT(&wildaddr, port);
1050
1051		/* check for interface/nic rules affecting the wildcard */
1052		action = interface_action(NULL, &wildaddr, 0);
1053		v4wild = (ACTION_IGNORE != action);
1054	}
1055	if (v4wild) {
1056		wildif = new_interface(NULL);
1057
1058		strlcpy(wildif->name, "v4wildcard", sizeof(wildif->name));
1059		memcpy(&wildif->sin, &wildaddr, sizeof(wildif->sin));
1060		wildif->family = AF_INET;
1061		AF(&wildif->mask) = AF_INET;
1062		SET_ONESMASK(&wildif->mask);
1063
1064		wildif->flags = INT_BROADCAST | INT_UP | INT_WILDCARD;
1065		wildif->ignore_packets = (ACTION_DROP == action);
1066#if defined(MCAST)
1067		/*
1068		 * enable multicast reception on the broadcast socket
1069		 */
1070		AF(&wildif->bcast) = AF_INET;
1071		SET_ADDR4N(&wildif->bcast, INADDR_ANY);
1072		SET_PORT(&wildif->bcast, port);
1073#endif /* MCAST */
1074		wildif->fd = open_socket(&wildif->sin, 0, 1, wildif);
1075
1076		if (wildif->fd != INVALID_SOCKET) {
1077			wildipv4 = wildif;
1078			any_interface = wildif;
1079
1080			add_addr_to_list(&wildif->sin, wildif);
1081			add_interface(wildif);
1082			log_listen_address(wildif);
1083		} else {
1084			msyslog(LOG_ERR,
1085				"unable to bind to wildcard address %s - another process may be running - EXITING",
1086				stoa(&wildif->sin));
1087			exit(1);
1088		}
1089		DPRINT_INTERFACE(2, (wildif, "created ", "\n"));
1090	}
1091}
1092
1093
1094/*
1095 * add_nic_rule() -- insert a rule entry at the head of nic_rule_list.
1096 */
1097void
1098add_nic_rule(
1099	nic_rule_match	match_type,
1100	const char *	if_name,	/* interface name or numeric address */
1101	int		prefixlen,
1102	nic_rule_action	action
1103	)
1104{
1105	nic_rule *	rule;
1106	isc_boolean_t	is_ip;
1107
1108	rule = emalloc_zero(sizeof(*rule));
1109	rule->match_type = match_type;
1110	rule->prefixlen = prefixlen;
1111	rule->action = action;
1112
1113	if (MATCH_IFNAME == match_type) {
1114		REQUIRE(NULL != if_name);
1115		rule->if_name = estrdup(if_name);
1116	} else if (MATCH_IFADDR == match_type) {
1117		REQUIRE(NULL != if_name);
1118		/* set rule->addr */
1119		is_ip = is_ip_address(if_name, AF_UNSPEC, &rule->addr);
1120		REQUIRE(is_ip);
1121	} else
1122		REQUIRE(NULL == if_name);
1123
1124	LINK_SLIST(nic_rule_list, rule, next);
1125}
1126
1127
1128#ifdef DEBUG
1129static const char *
1130action_text(
1131	nic_rule_action	action
1132	)
1133{
1134	const char *t;
1135
1136	switch (action) {
1137
1138	default:
1139		t = "ERROR";	/* quiet uninit warning */
1140		DPRINTF(1, ("fatal: unknown nic_rule_action %d\n",
1141			    action));
1142		ENSURE(0);
1143		break;
1144
1145	case ACTION_LISTEN:
1146		t = "listen";
1147		break;
1148
1149	case ACTION_IGNORE:
1150		t = "ignore";
1151		break;
1152
1153	case ACTION_DROP:
1154		t = "drop";
1155		break;
1156	}
1157
1158	return t;
1159}
1160#endif	/* DEBUG */
1161
1162
1163static nic_rule_action
1164interface_action(
1165	char *		if_name,
1166	sockaddr_u *	if_addr,
1167	u_int32		if_flags
1168	)
1169{
1170	nic_rule *	rule;
1171	int		isloopback;
1172	int		iswildcard;
1173
1174	DPRINTF(4, ("interface_action: interface %s ",
1175		    (if_name != NULL) ? if_name : "wildcard"));
1176
1177	iswildcard = is_wildcard_addr(if_addr);
1178	isloopback = !!(INT_LOOPBACK & if_flags);
1179
1180	/*
1181	 * Find any matching NIC rule from --interface / -I or ntp.conf
1182	 * interface/nic rules.
1183	 */
1184	for (rule = nic_rule_list; rule != NULL; rule = rule->next) {
1185
1186		switch (rule->match_type) {
1187
1188		case MATCH_ALL:
1189			/* loopback and wildcard excluded from "all" */
1190			if (isloopback || iswildcard)
1191				break;
1192			DPRINTF(4, ("nic all %s\n",
1193			    action_text(rule->action)));
1194			return rule->action;
1195
1196		case MATCH_IPV4:
1197			if (IS_IPV4(if_addr)) {
1198				DPRINTF(4, ("nic ipv4 %s\n",
1199				    action_text(rule->action)));
1200				return rule->action;
1201			}
1202			break;
1203
1204		case MATCH_IPV6:
1205			if (IS_IPV6(if_addr)) {
1206				DPRINTF(4, ("nic ipv6 %s\n",
1207				    action_text(rule->action)));
1208				return rule->action;
1209			}
1210			break;
1211
1212		case MATCH_WILDCARD:
1213			if (iswildcard) {
1214				DPRINTF(4, ("nic wildcard %s\n",
1215				    action_text(rule->action)));
1216				return rule->action;
1217			}
1218			break;
1219
1220		case MATCH_IFADDR:
1221			if (rule->prefixlen != -1) {
1222				if (addr_eqprefix(if_addr, &rule->addr,
1223						  rule->prefixlen)) {
1224
1225					DPRINTF(4, ("subnet address match - %s\n",
1226					    action_text(rule->action)));
1227					return rule->action;
1228				}
1229			} else
1230				if (SOCK_EQ(if_addr, &rule->addr)) {
1231
1232					DPRINTF(4, ("address match - %s\n",
1233					    action_text(rule->action)));
1234					return rule->action;
1235				}
1236			break;
1237
1238		case MATCH_IFNAME:
1239			if (if_name != NULL
1240#if defined(HAVE_FNMATCH) && defined(FNM_CASEFOLD)
1241			    && !fnmatch(rule->if_name, if_name, FNM_CASEFOLD)
1242#else
1243			    && !strcasecmp(if_name, rule->if_name)
1244#endif
1245			    ) {
1246
1247				DPRINTF(4, ("interface name match - %s\n",
1248				    action_text(rule->action)));
1249				return rule->action;
1250			}
1251			break;
1252		}
1253	}
1254
1255	/*
1256	 * Unless explicitly disabled such as with "nic ignore ::1"
1257	 * listen on loopback addresses.  Since ntpq and ntpdc query
1258	 * "localhost" by default, which typically resolves to ::1 and
1259	 * 127.0.0.1, it's useful to default to listening on both.
1260	 */
1261	if (isloopback) {
1262		DPRINTF(4, ("default loopback listen\n"));
1263		return ACTION_LISTEN;
1264	}
1265
1266	/*
1267	 * Treat wildcard addresses specially.  If there is no explicit
1268	 * "nic ... wildcard" or "nic ... 0.0.0.0" or "nic ... ::" rule
1269	 * default to drop.
1270	 */
1271	if (iswildcard) {
1272		DPRINTF(4, ("default wildcard drop\n"));
1273		return ACTION_DROP;
1274	}
1275
1276	/*
1277	 * Check for "virtual IP" (colon in the interface name) after
1278	 * the rules so that "ntpd --interface eth0:1 -novirtualips"
1279	 * does indeed listen on eth0:1's addresses.
1280	 */
1281	if (!listen_to_virtual_ips && if_name != NULL
1282	    && (strchr(if_name, ':') != NULL)) {
1283
1284		DPRINTF(4, ("virtual ip - ignore\n"));
1285		return ACTION_IGNORE;
1286	}
1287
1288	/*
1289	 * If there are no --interface/-I command-line options and no
1290	 * interface/nic rules in ntp.conf, the default action is to
1291	 * listen.  In the presence of rules from either, the default
1292	 * is to ignore.  This implements ntpd's traditional listen-
1293	 * every default with no interface listen configuration, and
1294	 * ensures a single -I eth0 or "nic listen eth0" means do not
1295	 * listen on any other addresses.
1296	 */
1297	if (NULL == nic_rule_list) {
1298		DPRINTF(4, ("default listen\n"));
1299		return ACTION_LISTEN;
1300	}
1301
1302	DPRINTF(4, ("implicit ignore\n"));
1303	return ACTION_IGNORE;
1304}
1305
1306
1307static void
1308convert_isc_if(
1309	isc_interface_t *isc_if,
1310	endpt *itf,
1311	u_short port
1312	)
1313{
1314	strlcpy(itf->name, isc_if->name, sizeof(itf->name));
1315	itf->ifindex = isc_if->ifindex;
1316	itf->family = (u_short)isc_if->af;
1317	AF(&itf->sin) = itf->family;
1318	AF(&itf->mask) = itf->family;
1319	AF(&itf->bcast) = itf->family;
1320	SET_PORT(&itf->sin, port);
1321	SET_PORT(&itf->mask, port);
1322	SET_PORT(&itf->bcast, port);
1323
1324	if (IS_IPV4(&itf->sin)) {
1325		NSRCADR(&itf->sin) = isc_if->address.type.in.s_addr;
1326		NSRCADR(&itf->mask) = isc_if->netmask.type.in.s_addr;
1327
1328		if (isc_if->flags & INTERFACE_F_BROADCAST) {
1329			itf->flags |= INT_BROADCAST;
1330			NSRCADR(&itf->bcast) =
1331			    isc_if->broadcast.type.in.s_addr;
1332		}
1333	}
1334#ifdef INCLUDE_IPV6_SUPPORT
1335	else if (IS_IPV6(&itf->sin)) {
1336		SET_ADDR6N(&itf->sin, isc_if->address.type.in6);
1337		SET_ADDR6N(&itf->mask, isc_if->netmask.type.in6);
1338
1339		SET_SCOPE(&itf->sin, isc_if->address.zone);
1340	}
1341#endif /* INCLUDE_IPV6_SUPPORT */
1342
1343
1344	/* Process the rest of the flags */
1345
1346	itf->flags |=
1347		  ((INTERFACE_F_UP & isc_if->flags)
1348			? INT_UP : 0)
1349		| ((INTERFACE_F_LOOPBACK & isc_if->flags)
1350			? INT_LOOPBACK : 0)
1351		| ((INTERFACE_F_POINTTOPOINT & isc_if->flags)
1352			? INT_PPP : 0)
1353		| ((INTERFACE_F_MULTICAST & isc_if->flags)
1354			? INT_MULTICAST : 0)
1355		| ((INTERFACE_F_PRIVACY & isc_if->flags)
1356			? INT_PRIVACY : 0)
1357		;
1358
1359	/*
1360	 * Clear the loopback flag if the address is not localhost.
1361	 * http://bugs.ntp.org/1683
1362	 */
1363	if ((INT_LOOPBACK & itf->flags) && !IS_LOOPBACK_ADDR(&itf->sin)) {
1364		itf->flags &= ~INT_LOOPBACK;
1365	}
1366}
1367
1368
1369/*
1370 * refresh_interface
1371 *
1372 * some OSes have been observed to keep
1373 * cached routes even when more specific routes
1374 * become available.
1375 * this can be mitigated by re-binding
1376 * the socket.
1377 */
1378static int
1379refresh_interface(
1380	endpt *	iface
1381	)
1382{
1383#ifdef  OS_MISSES_SPECIFIC_ROUTE_UPDATES
1384	if (iface->fd != INVALID_SOCKET) {
1385		int bcast = (iface->flags & INT_BCASTXMIT) != 0;
1386		/* as we forcibly close() the socket remove the
1387		   broadcast permission indication */
1388		if (bcast)
1389			socket_broadcast_disable(iface, &iface->sin);
1390
1391		close_and_delete_fd_from_list(iface->fd);
1392
1393		/* create new socket picking up a new first hop binding
1394		   at connect() time */
1395		iface->fd = open_socket(&iface->sin,
1396					    bcast, 0, iface);
1397		 /*
1398		  * reset TTL indication so TTL is is set again
1399		  * next time around
1400		  */
1401		iface->last_ttl = 0;
1402		return (iface->fd != INVALID_SOCKET);
1403	} else
1404		return 0;	/* invalid sockets are not refreshable */
1405#else /* !OS_MISSES_SPECIFIC_ROUTE_UPDATES */
1406	return (iface->fd != INVALID_SOCKET);
1407#endif /* !OS_MISSES_SPECIFIC_ROUTE_UPDATES */
1408}
1409
1410/*
1411 * interface_update - externally callable update function
1412 */
1413void
1414interface_update(
1415	interface_receiver_t	receiver,
1416	void *			data
1417	)
1418{
1419	int new_interface_found;
1420
1421	if (scan_addrs_once) {
1422		return;
1423	}
1424	BLOCKIO();
1425	new_interface_found = update_interfaces(NTP_PORT, receiver, data);
1426	UNBLOCKIO();
1427
1428	if (!new_interface_found) {
1429		return;
1430	}
1431#ifdef DEBUG
1432	msyslog(LOG_DEBUG, "new interface(s) found: waking up resolver");
1433#endif
1434	interrupt_worker_sleep();
1435}
1436
1437
1438/*
1439 * sau_from_netaddr() - convert network address on-wire formats.
1440 * Convert from libisc's isc_netaddr_t to NTP's sockaddr_u
1441 */
1442void
1443sau_from_netaddr(
1444	sockaddr_u *psau,
1445	const isc_netaddr_t *pna
1446	)
1447{
1448	ZERO_SOCK(psau);
1449	AF(psau) = (u_short)pna->family;
1450	switch (pna->family) {
1451
1452	case AF_INET:
1453		psau->sa4.sin_addr = pna->type.in;
1454		break;
1455
1456	case AF_INET6:
1457		psau->sa6.sin6_addr = pna->type.in6;
1458		break;
1459	}
1460}
1461
1462
1463static int
1464is_wildcard_addr(
1465	const sockaddr_u *psau
1466	)
1467{
1468	if (IS_IPV4(psau) && !NSRCADR(psau))
1469		return 1;
1470
1471#ifdef INCLUDE_IPV6_SUPPORT
1472	if (IS_IPV6(psau) && S_ADDR6_EQ(psau, &in6addr_any))
1473		return 1;
1474#endif
1475
1476	return 0;
1477}
1478
1479
1480isc_boolean_t
1481is_linklocal(
1482	sockaddr_u *		psau
1483)
1484{
1485	struct in6_addr *	p6addr;
1486
1487	if (IS_IPV6(psau)) {
1488		p6addr = &psau->sa6.sin6_addr;
1489		if (   IN6_IS_ADDR_LINKLOCAL(p6addr)
1490		    || IN6_IS_ADDR_SITELOCAL(p6addr)) {
1491
1492			return TRUE;
1493		}
1494	} else if (IS_IPV4(psau)) {
1495		/* autoconf are link-local 169.254.0.0/16 */
1496		if (IS_AUTOCONF(psau)) {
1497			return TRUE;
1498		}
1499	}
1500	return FALSE;
1501}
1502
1503
1504#ifdef OS_NEEDS_REUSEADDR_FOR_IFADDRBIND
1505/*
1506 * enable/disable re-use of wildcard address socket
1507 */
1508static void
1509set_wildcard_reuse(
1510	u_short	family,
1511	int	on
1512	)
1513{
1514	endpt *any;
1515	SOCKET fd = INVALID_SOCKET;
1516
1517	any = ANY_INTERFACE_BYFAM(family);
1518	if (any != NULL)
1519		fd = any->fd;
1520
1521	if (fd != INVALID_SOCKET) {
1522		if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
1523			       (void *)&on, sizeof(on)))
1524			msyslog(LOG_ERR,
1525				"set_wildcard_reuse: setsockopt(SO_REUSEADDR, %s) failed: %m",
1526				on ? "on" : "off");
1527
1528		DPRINTF(4, ("set SO_REUSEADDR to %s on %s\n",
1529			    on ? "on" : "off",
1530			    stoa(&any->sin)));
1531	}
1532}
1533#endif /* OS_NEEDS_REUSEADDR_FOR_IFADDRBIND */
1534
1535static isc_boolean_t
1536check_flags(
1537	sockaddr_u *psau,
1538	const char *name,
1539	u_int32 flags
1540	)
1541{
1542#if defined(SIOCGIFAFLAG_IN)
1543	struct ifreq ifr;
1544	int fd;
1545
1546	if (psau->sa.sa_family != AF_INET)
1547		return ISC_FALSE;
1548	if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
1549		return ISC_FALSE;
1550	ZERO(ifr);
1551	memcpy(&ifr.ifr_addr, &psau->sa, sizeof(ifr.ifr_addr));
1552	strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
1553	if (ioctl(fd, SIOCGIFAFLAG_IN, &ifr) < 0) {
1554		close(fd);
1555		return ISC_FALSE;
1556	}
1557	close(fd);
1558	if ((ifr.ifr_addrflags & flags) != 0)
1559		return ISC_TRUE;
1560#endif	/* SIOCGIFAFLAG_IN */
1561	return ISC_FALSE;
1562}
1563
1564static isc_boolean_t
1565check_flags6(
1566	sockaddr_u *psau,
1567	const char *name,
1568	u_int32 flags6
1569	)
1570{
1571#if defined(INCLUDE_IPV6_SUPPORT) && defined(SIOCGIFAFLAG_IN6)
1572	struct in6_ifreq ifr6;
1573	int fd;
1574
1575	if (psau->sa.sa_family != AF_INET6)
1576		return ISC_FALSE;
1577	if ((fd = socket(AF_INET6, SOCK_DGRAM, 0)) < 0)
1578		return ISC_FALSE;
1579	ZERO(ifr6);
1580	memcpy(&ifr6.ifr_addr, &psau->sa6, sizeof(ifr6.ifr_addr));
1581	strlcpy(ifr6.ifr_name, name, sizeof(ifr6.ifr_name));
1582	if (ioctl(fd, SIOCGIFAFLAG_IN6, &ifr6) < 0) {
1583		close(fd);
1584		return ISC_FALSE;
1585	}
1586	close(fd);
1587	if ((ifr6.ifr_ifru.ifru_flags6 & flags6) != 0)
1588		return ISC_TRUE;
1589#endif	/* INCLUDE_IPV6_SUPPORT && SIOCGIFAFLAG_IN6 */
1590	return ISC_FALSE;
1591}
1592
1593static isc_boolean_t
1594is_anycast(
1595	sockaddr_u *psau,
1596	const char *name
1597	)
1598{
1599#ifdef IN6_IFF_ANYCAST
1600	return check_flags6(psau, name, IN6_IFF_ANYCAST);
1601#else
1602	return ISC_FALSE;
1603#endif
1604}
1605
1606static isc_boolean_t
1607is_valid(
1608	sockaddr_u *psau,
1609	const char *name
1610	)
1611{
1612	u_int32 flags;
1613
1614	flags = 0;
1615	switch (psau->sa.sa_family) {
1616	case AF_INET:
1617#ifdef IN_IFF_DETACHED
1618		flags |= IN_IFF_DETACHED;
1619#endif
1620#ifdef IN_IFF_TENTATIVE
1621		flags |= IN_IFF_TENTATIVE;
1622#endif
1623		return check_flags(psau, name, flags) ? ISC_FALSE : ISC_TRUE;
1624	case AF_INET6:
1625#ifdef IN6_IFF_DEPARTED
1626		flags |= IN6_IFF_DEPARTED;
1627#endif
1628#ifdef IN6_IFF_DETACHED
1629		flags |= IN6_IFF_DETACHED;
1630#endif
1631#ifdef IN6_IFF_TENTATIVE
1632		flags |= IN6_IFF_TENTATIVE;
1633#endif
1634		return check_flags6(psau, name, flags) ? ISC_FALSE : ISC_TRUE;
1635	default:
1636		return ISC_FALSE;
1637	}
1638}
1639
1640/*
1641 * update_interface strategy
1642 *
1643 * toggle configuration phase
1644 *
1645 * Phase 1a:
1646 * forall currently existing interfaces
1647 *   if address is known:
1648 *	drop socket - rebind again
1649 *
1650 *   if address is NOT known:
1651 *	Add address to list of new addresses
1652 *
1653 * Phase 1b:
1654 *	Scan the list of new addresses marking IPv6 link-local addresses
1655 *	   which also have a global v6 address using the same OS ifindex.
1656 *	Attempt to create a new interface entry
1657 *
1658 * Phase 2:
1659 * forall currently known non MCAST and WILDCARD interfaces
1660 *   if interface does not match configuration phase (not seen in phase 1):
1661 *	remove interface from known interface list
1662 *	forall peers associated with this interface
1663 *         disconnect peer from this interface
1664 *
1665 * Phase 3:
1666 *   attempt to re-assign interfaces to peers
1667 *
1668 */
1669
1670static int
1671update_interfaces(
1672	u_short			port,
1673	interface_receiver_t	receiver,
1674	void *			data
1675	)
1676{
1677	isc_mem_t *		mctx = (void *)-1;
1678	interface_info_t	ifi;
1679	isc_interfaceiter_t *	iter;
1680	isc_result_t		result;
1681	isc_interface_t		isc_if;
1682	int			new_interface_found;
1683	unsigned int		family;
1684	endpt			enumep;
1685	endpt *			ep;
1686	endpt *			next_ep;
1687	endpt *			newaddrs;
1688	endpt *			newaddrs_tail;
1689	endpt *			ep2;
1690
1691	DPRINTF(3, ("update_interfaces(%d)\n", port));
1692
1693	/*
1694	 * phase 1a - scan OS local addresses
1695	 * - update those that ntpd already knows
1696	 * - build a list of newly-discovered addresses.
1697	 */
1698
1699	new_interface_found = FALSE;
1700	nonlocal_v4_addr_up = nonlocal_v6_addr_up = FALSE;
1701	iter = NULL;
1702	newaddrs = newaddrs_tail = NULL;
1703	result = isc_interfaceiter_create(mctx, &iter);
1704
1705	if (result != ISC_R_SUCCESS)
1706		return 0;
1707
1708	/*
1709	 * Toggle system interface scan phase to find untouched
1710	 * interfaces to be deleted.
1711	 */
1712	sys_interphase ^= 0x1;
1713
1714	for (result = isc_interfaceiter_first(iter);
1715	     ISC_R_SUCCESS == result;
1716	     result = isc_interfaceiter_next(iter)) {
1717
1718		result = isc_interfaceiter_current(iter, &isc_if);
1719
1720		if (result != ISC_R_SUCCESS) {
1721			break;
1722		}
1723		/* See if we have a valid family to use */
1724		family = isc_if.address.family;
1725		if (AF_INET != family && AF_INET6 != family)
1726			continue;
1727		if (AF_INET == family && !ipv4_works)
1728			continue;
1729		if (AF_INET6 == family && !ipv6_works)
1730			continue;
1731
1732		/* create prototype */
1733		init_interface(&enumep);
1734
1735		convert_isc_if(&isc_if, &enumep, port);
1736
1737		DPRINT_INTERFACE(4, (&enumep, "examining ", "\n"));
1738
1739		/*
1740		 * Check if and how we are going to use the interface.
1741		 */
1742		switch (interface_action(enumep.name, &enumep.sin,
1743					 enumep.flags)) {
1744
1745		case ACTION_IGNORE:
1746			DPRINTF(4, ("ignoring interface %s (%s) - by nic rules\n",
1747				    enumep.name, stoa(&enumep.sin)));
1748			continue;
1749
1750		case ACTION_LISTEN:
1751			DPRINTF(4, ("listen interface %s (%s) - by nic rules\n",
1752				    enumep.name, stoa(&enumep.sin)));
1753			enumep.ignore_packets = ISC_FALSE;
1754			break;
1755
1756		case ACTION_DROP:
1757			DPRINTF(4, ("drop on interface %s (%s) - by nic rules\n",
1758				    enumep.name, stoa(&enumep.sin)));
1759			enumep.ignore_packets = ISC_TRUE;
1760			break;
1761		}
1762
1763		 /* interfaces must be UP to be usable */
1764		if (!(enumep.flags & INT_UP)) {
1765			DPRINTF(4, ("skipping interface %s (%s) - DOWN\n",
1766				    enumep.name, stoa(&enumep.sin)));
1767			continue;
1768		}
1769
1770		/*
1771		 * skip any interfaces UP and bound to a wildcard
1772		 * address - some dhcp clients produce that in the
1773		 * wild
1774		 */
1775		if (is_wildcard_addr(&enumep.sin))
1776			continue;
1777
1778		if (is_anycast(&enumep.sin, isc_if.name))
1779			continue;
1780
1781		/*
1782		 * skip any address that is an invalid state to be used
1783		 */
1784		if (!is_valid(&enumep.sin, isc_if.name))
1785			continue;
1786
1787		/*
1788		 * Keep track of having non-linklocal connectivity
1789		 * for IPv4 and IPv6 so we don't solicit pool hosts
1790		 * when it can't work.
1791		 */
1792		if (   !(INT_LOOPBACK & enumep.flags)
1793		    && !is_linklocal(&enumep.sin)) {
1794			if (IS_IPV6(&enumep.sin)) {
1795				nonlocal_v6_addr_up = TRUE;
1796			} else {
1797				nonlocal_v4_addr_up = TRUE;
1798			}
1799		}
1800		/*
1801		 * map to local *address* in order to map all duplicate
1802		 * interfaces to an endpt structure with the appropriate
1803		 * socket.  Our name space is (ip-address), NOT
1804		 * (interface name, ip-address).
1805		 */
1806		ep = getinterface(&enumep.sin, INT_WILDCARD);
1807
1808		if (NULL == ep) {
1809			ep = emalloc(sizeof(*ep));
1810			memcpy(ep, &enumep, sizeof(*ep));
1811			if (NULL != newaddrs_tail) {
1812				newaddrs_tail->elink = ep;
1813				newaddrs_tail = ep;
1814			} else {
1815				newaddrs_tail = newaddrs = ep;
1816			}
1817			continue;
1818		}
1819
1820		if (!refresh_interface(ep)) {
1821			/*
1822			 * Refreshing failed, we will delete the endpt
1823			 * in phase 2 because it was not marked current.
1824			 * We can bind to the address as the refresh
1825			 * code already closed the endpt's socket.
1826			*/
1827			continue;
1828		}
1829		/*
1830		 * found existing and up to date interface -
1831		 * mark present.
1832		 */
1833		if (ep->phase != sys_interphase) {
1834			/*
1835			 * On a new round we reset the name so
1836			 * the interface name shows up again if
1837			 * this address is no longer shared.
1838			 * We reset ignore_packets from the
1839			 * new prototype to respect any runtime
1840			 * changes to the nic rules.
1841			 */
1842			strlcpy(ep->name, enumep.name, sizeof(ep->name));
1843			ep->ignore_packets = enumep.ignore_packets;
1844		} else {
1845			/*
1846			 * DLH: else branch might be dead code from
1847			 * when both address and name were compared.
1848			 */
1849			msyslog(LOG_INFO, "%s on %u %s -> *multiple*",
1850				stoa(&ep->sin), ep->ifnum, ep->name);
1851			/* name collision - rename interface */
1852			strlcpy(ep->name, "*multiple*", sizeof(ep->name));
1853		}
1854
1855		DPRINT_INTERFACE(4, (ep, "updating ", " present\n"));
1856
1857		if (ep->ignore_packets != enumep.ignore_packets) {
1858			/*
1859			 * We have conflicting configurations for the
1860			 * address. This can happen with
1861			 * -I <interfacename> on the command line for an
1862			 *  interface that shares its address with other
1863			 * interfaces. We cannot disambiguate incoming
1864			 * packets delivered to this socket without extra
1865			 * syscalls/features.  Note this is an unusual
1866			 * configuration where several interfaces share
1867			 * an address but filtering via interface name is
1868			 * attempted.  We resolve the config conflict by
1869			 * disabling the processing of received packets.
1870			 * This leads to no service on the address where
1871			 * the conflict occurs.
1872			 */
1873			msyslog(LOG_WARNING,
1874				"conflicting listen configuration between"
1875				" %s and %s for %s, disabled",
1876				enumep.name, ep->name, stoa(&enumep.sin));
1877
1878			ep->ignore_packets = TRUE;
1879		}
1880
1881		ep->phase = sys_interphase;
1882
1883		ifi.action = IFS_EXISTS;
1884		ifi.ep = ep;
1885		if (receiver != NULL) {
1886			(*receiver)(data, &ifi);
1887		}
1888	}
1889
1890	isc_interfaceiter_destroy(&iter);
1891
1892	/*
1893	 * Phase 1b
1894	 */
1895	for (ep = newaddrs; ep != NULL; ep = ep->elink) {
1896		if (IS_IPV6(&ep->sin) && is_linklocal(&ep->sin)) {
1897			for (ep2 = newaddrs; ep2 != NULL; ep2 = ep2->elink) {
1898				if (   IS_IPV6(&ep2->sin)
1899				    && ep != ep2
1900				    && !is_linklocal(&ep2->sin)) {
1901
1902					ep->flags |= INT_LL_OF_GLOB;
1903					break;
1904				}
1905			}
1906		}
1907	}
1908	for (ep2 = newaddrs; ep2 != NULL; ep2 = next_ep) {
1909		next_ep = ep2->elink;
1910		ep2->elink = NULL;
1911		ep = create_interface(port, ep2);
1912		if (ep != NULL) {
1913			ifi.action = IFS_CREATED;
1914			ifi.ep = ep;
1915			if (receiver != NULL) {
1916				(*receiver)(data, &ifi);
1917			}
1918			new_interface_found = TRUE;
1919			DPRINT_INTERFACE(3,
1920				(ep, "updating ", " new - created\n"));
1921		}
1922		else {
1923			DPRINT_INTERFACE(3,
1924				(ep, "updating ", " new - FAILED"));
1925
1926			msyslog(LOG_ERR,
1927				"cannot bind address %s",
1928				stoa(&ep->sin));
1929		}
1930		free(ep2);
1931	}
1932
1933	/*
1934	 * phase 2 - delete gone interfaces - reassigning peers to
1935	 * other interfaces
1936	 */
1937	for (ep = ep_list; ep != NULL; ep = next_ep) {
1938		next_ep = ep->elink;
1939
1940		/*
1941		 * if phase does not match sys_phase this interface was
1942		 * not enumerated during the last interface scan - so it
1943		 * is gone and will be deleted here unless it did not
1944		 * originate from interface enumeration (INT_WILDCARD,
1945		 * INT_MCASTIF).
1946		 */
1947		if (((INT_WILDCARD | INT_MCASTIF) & ep->flags) ||
1948		    ep->phase == sys_interphase)
1949			continue;
1950
1951		DPRINT_INTERFACE(3, (ep, "updating ",
1952				     "GONE - deleting\n"));
1953		remove_interface(ep);
1954
1955		ifi.action = IFS_DELETED;
1956		ifi.ep = ep;
1957		if (receiver != NULL) {
1958			(*receiver)(data, &ifi);
1959		}
1960		/* disconnect peers from deleted endpt. */
1961		while (ep->peers != NULL) {
1962			set_peerdstadr(ep->peers, NULL);
1963		}
1964		/*
1965		 * update globals in case we lose
1966		 * a loopback interface
1967		 */
1968		if (ep == loopback_interface) {
1969			loopback_interface = NULL;
1970		}
1971		delete_interface(ep);
1972	}
1973
1974	/*
1975	 * phase 3 - re-configure as the world has possibly changed
1976	 *
1977	 * never ever make this conditional again - it is needed to track
1978	 * routing updates. see bug #2506
1979	 */
1980	refresh_all_peerinterfaces();
1981
1982	if (sys_bclient) {
1983		io_setbclient();
1984	}
1985#ifdef MCAST
1986	/*
1987	 * Check multicast interfaces and try to join multicast groups if
1988	 * not joined yet.
1989	 */
1990	for (ep = ep_list; ep != NULL; ep = ep->elink) {
1991		remaddr_t *entry;
1992
1993		if (!(INT_MCASTIF & ep->flags) || (INT_MCASTOPEN & ep->flags)) {
1994			continue;
1995		}
1996		/* Find remote address that was linked to this interface */
1997		for (entry = remoteaddr_list;
1998		     entry != NULL;
1999		     entry = entry->link) {
2000			if (entry->ep == ep) {
2001				if (socket_multicast_enable(ep, &entry->addr)) {
2002					msyslog(LOG_INFO,
2003						"Joined %s socket to multicast group %s",
2004						stoa(&ep->sin),
2005						stoa(&entry->addr));
2006				}
2007				break;
2008			}
2009		}
2010	}
2011#endif /* MCAST */
2012
2013	return new_interface_found;
2014}
2015
2016
2017/*
2018 * create_sockets - create a socket for each interface plus a default
2019 *			socket for when we don't know where to send
2020 */
2021static int
2022create_sockets(
2023	u_short port
2024	)
2025{
2026#ifndef HAVE_IO_COMPLETION_PORT
2027	/*
2028	 * I/O Completion Ports don't care about the select and FD_SET
2029	 */
2030	maxactivefd = 0;
2031	FD_ZERO(&activefds);
2032#endif
2033
2034	DPRINTF(2, ("create_sockets(%d)\n", port));
2035
2036	create_wildcards(port);
2037
2038	update_interfaces(port, NULL, NULL);
2039
2040	/*
2041	 * Now that we have opened all the sockets, turn off the reuse
2042	 * flag for security.
2043	 */
2044	set_reuseaddr(0);
2045
2046	DPRINTF(2, ("create_sockets: Total interfaces = %d\n", ninterfaces));
2047
2048	return ninterfaces;
2049}
2050
2051/*
2052 * create_interface - create a new interface for a given prototype
2053 *		      binding the socket.
2054 */
2055static endpt *
2056create_interface(
2057	u_short	port,
2058	endpt *	protot
2059	)
2060{
2061	sockaddr_u	resmask;
2062	endpt *		iface;
2063	int/*BOOL*/	success;
2064#if defined(MCAST) && defined(MULTICAST_NONEWSOCKET)
2065	remaddr_t *	entry;
2066	remaddr_t *	next_entry;
2067#endif
2068	DPRINTF(2, ("create_interface(%s)\n", sptoa(&protot->sin)));
2069
2070	/* build an interface */
2071	iface = new_interface(protot);
2072
2073	/*
2074	 * create socket
2075	 */
2076	iface->fd = open_socket(&iface->sin, 0, 0, iface);
2077
2078	if (iface->fd != INVALID_SOCKET)
2079		log_listen_address(iface);
2080
2081	if ((INT_BROADCAST & iface->flags)
2082	    && iface->bfd != INVALID_SOCKET)
2083		msyslog(LOG_INFO, "Listening on broadcast address %s",
2084			sptoa(&iface->bcast));
2085
2086	if (INVALID_SOCKET == iface->fd
2087	    && INVALID_SOCKET == iface->bfd) {
2088		msyslog(LOG_ERR, "unable to create socket on %s (%d) for %s",
2089			iface->name,
2090			iface->ifnum,
2091			sptoa(&iface->sin));
2092		delete_interface(iface);
2093		return NULL;
2094	}
2095
2096	/*
2097	 * Blacklist our own addresses, no use talking to ourself
2098	 */
2099	SET_HOSTMASK(&resmask, AF(&iface->sin));
2100	success = hack_restrict(RESTRICT_FLAGS, &iface->sin, &resmask,
2101				-4, RESM_NTPONLY | RESM_INTERFACE,
2102				RES_IGNORE, 0);
2103	if (!success) {
2104		msyslog(LOG_ERR,
2105			"unable to self-restrict %s", stoa(&iface->sin));
2106	}
2107
2108	/*
2109	 * set globals with the first found
2110	 * loopback interface of the appropriate class
2111	 */
2112	if (NULL == loopback_interface && AF_INET == iface->family
2113	    && (INT_LOOPBACK & iface->flags))
2114		loopback_interface = iface;
2115
2116	/*
2117	 * put into our interface list
2118	 */
2119	add_addr_to_list(&iface->sin, iface);
2120	add_interface(iface);
2121
2122#if defined(MCAST) && defined(MULTICAST_NONEWSOCKET)
2123	/*
2124	 * Join any previously-configured compatible multicast groups.
2125	 */
2126	if (INT_MULTICAST & iface->flags &&
2127	    !((INT_LOOPBACK | INT_WILDCARD) & iface->flags) &&
2128	    !iface->ignore_packets) {
2129		for (entry = remoteaddr_list;
2130		     entry != NULL;
2131		     entry = next_entry) {
2132			next_entry = entry->link;
2133			if (AF(&iface->sin) != AF(&entry->addr) ||
2134			    !IS_MCAST(&entry->addr))
2135				continue;
2136			if (socket_multicast_enable(iface,
2137						    &entry->addr))
2138				msyslog(LOG_INFO,
2139					"Joined %s socket to multicast group %s",
2140					stoa(&iface->sin),
2141					stoa(&entry->addr));
2142			else
2143				msyslog(LOG_ERR,
2144					"Failed to join %s socket to multicast group %s",
2145					stoa(&iface->sin),
2146					stoa(&entry->addr));
2147		}
2148	}
2149#endif	/* MCAST && MCAST_NONEWSOCKET */
2150
2151	DPRINT_INTERFACE(2, (iface, "created ", "\n"));
2152	return iface;
2153}
2154
2155
2156#ifdef DEBUG
2157const char *
2158iflags_str(
2159	u_int32 iflags
2160)
2161{
2162	const size_t	sz = LIB_BUFLENGTH;
2163	char *		ifs;
2164
2165	LIB_GETBUF(ifs);
2166	ifs[0] = '\0';
2167
2168	if (iflags & INT_UP) {
2169		CLEAR_BIT_IF_DEBUG(INT_UP, iflags);
2170		append_flagstr(ifs, sz, "up");
2171	}
2172
2173	if (iflags & INT_PPP) {
2174		CLEAR_BIT_IF_DEBUG(INT_PPP, iflags);
2175		append_flagstr(ifs, sz, "ppp");
2176	}
2177
2178	if (iflags & INT_LOOPBACK) {
2179		CLEAR_BIT_IF_DEBUG(INT_LOOPBACK, iflags);
2180		append_flagstr(ifs, sz, "loopback");
2181	}
2182
2183	if (iflags & INT_BROADCAST) {
2184		CLEAR_BIT_IF_DEBUG(INT_BROADCAST, iflags);
2185		append_flagstr(ifs, sz, "broadcast");
2186	}
2187
2188	if (iflags & INT_MULTICAST) {
2189		CLEAR_BIT_IF_DEBUG(INT_MULTICAST, iflags);
2190		append_flagstr(ifs, sz, "multicast");
2191	}
2192
2193	if (iflags & INT_BCASTOPEN) {
2194		CLEAR_BIT_IF_DEBUG(INT_BCASTOPEN, iflags);
2195		append_flagstr(ifs, sz, "bcastopen");
2196	}
2197
2198	if (iflags & INT_MCASTOPEN) {
2199		CLEAR_BIT_IF_DEBUG(INT_MCASTOPEN, iflags);
2200		append_flagstr(ifs, sz, "mcastopen");
2201	}
2202
2203	if (iflags & INT_WILDCARD) {
2204		CLEAR_BIT_IF_DEBUG(INT_WILDCARD, iflags);
2205		append_flagstr(ifs, sz, "wildcard");
2206	}
2207
2208	if (iflags & INT_MCASTIF) {
2209		CLEAR_BIT_IF_DEBUG(INT_MCASTIF, iflags);
2210		append_flagstr(ifs, sz, "mcastif");
2211	}
2212
2213	if (iflags & INT_PRIVACY) {
2214		CLEAR_BIT_IF_DEBUG(INT_PRIVACY, iflags);
2215		append_flagstr(ifs, sz, "IPv6privacy");
2216	}
2217
2218	if (iflags & INT_BCASTXMIT) {
2219		CLEAR_BIT_IF_DEBUG(INT_BCASTXMIT, iflags);
2220		append_flagstr(ifs, sz, "bcastxmit");
2221	}
2222
2223	if (iflags & INT_LL_OF_GLOB) {
2224		CLEAR_BIT_IF_DEBUG(INT_LL_OF_GLOB, iflags);
2225		append_flagstr(ifs, sz, "linklocal-w-global");
2226	}
2227
2228	DEBUG_INVARIANT(!iflags);
2229
2230	return ifs;
2231}
2232#endif	/* DEBUG */
2233
2234
2235#ifdef SO_EXCLUSIVEADDRUSE
2236static void
2237set_excladdruse(
2238	SOCKET fd
2239	)
2240{
2241	int one = 1;
2242	int failed;
2243#ifdef SYS_WINNT
2244	DWORD err;
2245#endif
2246
2247	failed = setsockopt(fd, SOL_SOCKET, SO_EXCLUSIVEADDRUSE,
2248			    (void *)&one, sizeof(one));
2249
2250	if (!failed)
2251		return;
2252
2253#ifdef SYS_WINNT
2254	/*
2255	 * Prior to Windows XP setting SO_EXCLUSIVEADDRUSE can fail with
2256	 * error WSAINVAL depending on service pack level and whether
2257	 * the user account is in the Administrators group.  Do not
2258	 * complain if it fails that way on versions prior to XP (5.1).
2259	 */
2260	err = GetLastError();
2261
2262	if (isc_win32os_versioncheck(5, 1, 0, 0) < 0	/* < 5.1/XP */
2263	    && WSAEINVAL == err)
2264		return;
2265
2266	SetLastError(err);
2267#endif
2268	msyslog(LOG_ERR,
2269		"setsockopt(%d, SO_EXCLUSIVEADDRUSE, on): %m",
2270		(int)fd);
2271}
2272#endif  /* SO_EXCLUSIVEADDRUSE */
2273
2274
2275/*
2276 * set_reuseaddr() - set/clear REUSEADDR on all sockets
2277 *			NB possible hole - should we be doing this on broadcast
2278 *			fd's also?
2279 */
2280static void
2281set_reuseaddr(
2282	int flag
2283	)
2284{
2285#ifndef SO_EXCLUSIVEADDRUSE
2286	endpt *ep;
2287
2288	for (ep = ep_list; ep != NULL; ep = ep->elink) {
2289		if (ep->flags & INT_WILDCARD)
2290			continue;
2291
2292		/*
2293		 * if ep->fd  is INVALID_SOCKET, we might have a adapter
2294		 * configured but not present
2295		 */
2296		DPRINTF(4, ("setting SO_REUSEADDR on %.16s@%s to %s\n",
2297			    ep->name, stoa(&ep->sin),
2298			    flag ? "on" : "off"));
2299
2300		if (ep->fd != INVALID_SOCKET) {
2301			if (setsockopt(ep->fd, SOL_SOCKET, SO_REUSEADDR,
2302				       (void *)&flag, sizeof(flag))) {
2303				msyslog(LOG_ERR, "set_reuseaddr: setsockopt(%s, SO_REUSEADDR, %s) failed: %m",
2304					stoa(&ep->sin), flag ? "on" : "off");
2305			}
2306		}
2307	}
2308#endif /* ! SO_EXCLUSIVEADDRUSE */
2309}
2310
2311/*
2312 * This is just a wrapper around an internal function so we can
2313 * make other changes as necessary later on
2314 */
2315void
2316enable_broadcast(
2317	endpt *		iface,
2318	sockaddr_u *	baddr
2319	)
2320{
2321#ifdef OPEN_BCAST_SOCKET
2322	socket_broadcast_enable(iface, iface->fd, baddr);
2323#endif
2324}
2325
2326#ifdef OPEN_BCAST_SOCKET
2327/*
2328 * Enable a broadcast address to a given socket
2329 * The socket is in the ep_list all we need to do is enable
2330 * broadcasting. It is not this function's job to select the socket
2331 */
2332static isc_boolean_t
2333socket_broadcast_enable(
2334	endpt *		iface,
2335	SOCKET		fd,
2336	sockaddr_u *	baddr
2337	)
2338{
2339#ifdef SO_BROADCAST
2340	int on = 1;
2341
2342	if (IS_IPV4(baddr)) {
2343		/* if this interface can support broadcast, set SO_BROADCAST */
2344		if (setsockopt(fd, SOL_SOCKET, SO_BROADCAST,
2345			       (void *)&on, sizeof(on)))
2346			msyslog(LOG_ERR,
2347				"setsockopt(SO_BROADCAST) enable failure on address %s: %m",
2348				stoa(baddr));
2349		else
2350			DPRINTF(2, ("Broadcast enabled on socket %d for address %s\n",
2351				    fd, stoa(baddr)));
2352	}
2353	iface->flags |= INT_BCASTXMIT;
2354	return ISC_TRUE;
2355#else
2356	return ISC_FALSE;
2357#endif /* SO_BROADCAST */
2358}
2359
2360#ifdef  OS_MISSES_SPECIFIC_ROUTE_UPDATES
2361/*
2362 * Remove a broadcast address from a given socket
2363 * The socket is in the ep_list all we need to do is disable
2364 * broadcasting. It is not this function's job to select the socket
2365 */
2366static isc_boolean_t
2367socket_broadcast_disable(
2368	endpt *	iface,
2369	sockaddr_u *		baddr
2370	)
2371{
2372#ifdef SO_BROADCAST
2373	int off = 0;	/* This seems to be OK as an int */
2374
2375	if (IS_IPV4(baddr) && setsockopt(iface->fd, SOL_SOCKET,
2376	    SO_BROADCAST, (void *)&off, sizeof(off)))
2377		msyslog(LOG_ERR,
2378			"setsockopt(SO_BROADCAST) disable failure on address %s: %m",
2379			stoa(baddr));
2380
2381	iface->flags &= ~INT_BCASTXMIT;
2382	return ISC_TRUE;
2383#else
2384	return ISC_FALSE;
2385#endif /* SO_BROADCAST */
2386}
2387#endif /* OS_MISSES_SPECIFIC_ROUTE_UPDATES */
2388
2389#endif /* OPEN_BCAST_SOCKET */
2390
2391
2392/*
2393 * Check to see if the address is a multicast address
2394 */
2395static isc_boolean_t
2396addr_ismulticast(
2397	sockaddr_u *maddr
2398	)
2399{
2400	isc_boolean_t result;
2401
2402#ifndef INCLUDE_IPV6_MULTICAST_SUPPORT
2403	/*
2404	 * If we don't have IPV6 support any IPV6 addr is not multicast
2405	 */
2406	if (IS_IPV6(maddr))
2407		result = ISC_FALSE;
2408	else
2409#endif
2410		result = IS_MCAST(maddr);
2411
2412	if (!result)
2413		DPRINTF(4, ("address %s is not multicast\n",
2414			    stoa(maddr)));
2415
2416	return result;
2417}
2418
2419/*
2420 * Multicast servers need to set the appropriate Multicast interface
2421 * socket option in order for it to know which interface to use for
2422 * send the multicast packet.
2423 */
2424void
2425enable_multicast_if(
2426	endpt *		iface,
2427	sockaddr_u *	maddr
2428	)
2429{
2430#ifdef MCAST
2431#ifdef IP_MULTICAST_LOOP
2432	TYPEOF_IP_MULTICAST_LOOP off = 0;
2433#endif
2434#if defined(INCLUDE_IPV6_MULTICAST_SUPPORT) && defined(IPV6_MULTICAST_LOOP)
2435	u_int off6 = 0;
2436#endif
2437
2438	REQUIRE(AF(maddr) == AF(&iface->sin));
2439
2440	switch (AF(&iface->sin)) {
2441
2442	case AF_INET:
2443#ifdef IP_MULTICAST_LOOP
2444		/*
2445		 * Don't send back to itself, but allow failure to set
2446		 */
2447		if (setsockopt(iface->fd, IPPROTO_IP,
2448			       IP_MULTICAST_LOOP,
2449			       (void *)&off,
2450			       sizeof(off))) {
2451
2452			msyslog(LOG_ERR,
2453				"setsockopt IP_MULTICAST_LOOP failed: %m on socket %d, addr %s for multicast address %s",
2454				iface->fd, stoa(&iface->sin),
2455				stoa(maddr));
2456		}
2457#endif
2458		break;
2459
2460	case AF_INET6:
2461#ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2462#ifdef IPV6_MULTICAST_LOOP
2463		/*
2464		 * Don't send back to itself, but allow failure to set
2465		 */
2466		if (setsockopt(iface->fd, IPPROTO_IPV6,
2467			       IPV6_MULTICAST_LOOP,
2468			       (void *) &off6, sizeof(off6))) {
2469
2470			msyslog(LOG_ERR,
2471				"setsockopt IPV6_MULTICAST_LOOP failed: %m on socket %d, addr %s for multicast address %s",
2472				iface->fd, stoa(&iface->sin),
2473				stoa(maddr));
2474		}
2475#endif
2476		break;
2477#else
2478		return;
2479#endif	/* INCLUDE_IPV6_MULTICAST_SUPPORT */
2480	}
2481	return;
2482#endif
2483}
2484
2485/*
2486 * Add a multicast address to a given socket
2487 * The socket is in the ep_list all we need to do is enable
2488 * multicasting. It is not this function's job to select the socket
2489 */
2490#if defined(MCAST)
2491static isc_boolean_t
2492socket_multicast_enable(
2493	endpt *		iface,
2494	sockaddr_u *	maddr
2495	)
2496{
2497	struct ip_mreq		mreq;
2498# ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2499	struct ipv6_mreq	mreq6;
2500# endif
2501	switch (AF(maddr)) {
2502
2503	case AF_INET:
2504		ZERO(mreq);
2505		mreq.imr_multiaddr = SOCK_ADDR4(maddr);
2506		mreq.imr_interface.s_addr = htonl(INADDR_ANY);
2507		if (setsockopt(iface->fd,
2508			       IPPROTO_IP,
2509			       IP_ADD_MEMBERSHIP,
2510			       (void *)&mreq,
2511			       sizeof(mreq))) {
2512			DPRINTF(2, (
2513				"setsockopt IP_ADD_MEMBERSHIP failed: %m on socket %d, addr %s for %x / %x (%s)",
2514				iface->fd, stoa(&iface->sin),
2515				mreq.imr_multiaddr.s_addr,
2516				mreq.imr_interface.s_addr,
2517				stoa(maddr)));
2518			return ISC_FALSE;
2519		}
2520		DPRINTF(4, ("Added IPv4 multicast membership on socket %d, addr %s for %x / %x (%s)\n",
2521			    iface->fd, stoa(&iface->sin),
2522			    mreq.imr_multiaddr.s_addr,
2523			    mreq.imr_interface.s_addr, stoa(maddr)));
2524		break;
2525
2526	case AF_INET6:
2527# ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2528		/*
2529		 * Enable reception of multicast packets.
2530		 * If the address is link-local we can get the
2531		 * interface index from the scope id. Don't do this
2532		 * for other types of multicast addresses. For now let
2533		 * the kernel figure it out.
2534		 */
2535		ZERO(mreq6);
2536		mreq6.ipv6mr_multiaddr = SOCK_ADDR6(maddr);
2537		mreq6.ipv6mr_interface = iface->ifindex;
2538
2539		if (setsockopt(iface->fd, IPPROTO_IPV6,
2540			       IPV6_JOIN_GROUP, (void *)&mreq6,
2541			       sizeof(mreq6))) {
2542			DPRINTF(2, (
2543				"setsockopt IPV6_JOIN_GROUP failed: %m on socket %d, addr %s for interface %u (%s)",
2544				iface->fd, stoa(&iface->sin),
2545				mreq6.ipv6mr_interface, stoa(maddr)));
2546			return ISC_FALSE;
2547		}
2548		DPRINTF(4, ("Added IPv6 multicast group on socket %d, addr %s for interface %u (%s)\n",
2549			    iface->fd, stoa(&iface->sin),
2550			    mreq6.ipv6mr_interface, stoa(maddr)));
2551# else
2552		return ISC_FALSE;
2553# endif	/* INCLUDE_IPV6_MULTICAST_SUPPORT */
2554	}
2555	iface->flags |= INT_MCASTOPEN;
2556	iface->num_mcast++;
2557
2558	return ISC_TRUE;
2559}
2560#endif	/* MCAST */
2561
2562
2563/*
2564 * Remove a multicast address from a given socket
2565 * The socket is in the ep_list all we need to do is disable
2566 * multicasting. It is not this function's job to select the socket
2567 */
2568#ifdef MCAST
2569static isc_boolean_t
2570socket_multicast_disable(
2571	endpt *	iface,
2572	sockaddr_u *		maddr
2573	)
2574{
2575# ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2576	struct ipv6_mreq mreq6;
2577# endif
2578	struct ip_mreq mreq;
2579
2580	if (find_addr_in_list(maddr) == NULL) {
2581		DPRINTF(4, ("socket_multicast_disable(%s): not found\n",
2582			    stoa(maddr)));
2583		return ISC_TRUE;
2584	}
2585
2586	switch (AF(maddr)) {
2587
2588	case AF_INET:
2589		ZERO(mreq);
2590		mreq.imr_multiaddr = SOCK_ADDR4(maddr);
2591		mreq.imr_interface = SOCK_ADDR4(&iface->sin);
2592		if (setsockopt(iface->fd, IPPROTO_IP,
2593			       IP_DROP_MEMBERSHIP, (void *)&mreq,
2594			       sizeof(mreq))) {
2595
2596			msyslog(LOG_ERR,
2597				"setsockopt IP_DROP_MEMBERSHIP failed: %m on socket %d, addr %s for %x / %x (%s)",
2598				iface->fd, stoa(&iface->sin),
2599				SRCADR(maddr), SRCADR(&iface->sin),
2600				stoa(maddr));
2601			return ISC_FALSE;
2602		}
2603		break;
2604	case AF_INET6:
2605# ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2606		/*
2607		 * Disable reception of multicast packets
2608		 * If the address is link-local we can get the
2609		 * interface index from the scope id.  Don't do this
2610		 * for other types of multicast addresses. For now let
2611		 * the kernel figure it out.
2612		 */
2613		ZERO(mreq6);
2614		mreq6.ipv6mr_multiaddr = SOCK_ADDR6(maddr);
2615		mreq6.ipv6mr_interface = iface->ifindex;
2616
2617		if (setsockopt(iface->fd, IPPROTO_IPV6,
2618			       IPV6_LEAVE_GROUP, (void *)&mreq6,
2619			       sizeof(mreq6))) {
2620
2621			msyslog(LOG_ERR,
2622				"setsockopt IPV6_LEAVE_GROUP failure: %m on socket %d, addr %s for %d (%s)",
2623				iface->fd, stoa(&iface->sin),
2624				iface->ifindex, stoa(maddr));
2625			return ISC_FALSE;
2626		}
2627		break;
2628# else
2629		return ISC_FALSE;
2630# endif	/* INCLUDE_IPV6_MULTICAST_SUPPORT */
2631	}
2632
2633	iface->num_mcast--;
2634	if (iface->num_mcast <= 0) {
2635		iface->flags &= ~INT_MCASTOPEN;
2636	}
2637	return ISC_TRUE;
2638}
2639#endif	/* MCAST */
2640
2641
2642/*
2643 * io_setbclient - open the broadcast client sockets
2644 */
2645void
2646io_setbclient(void)
2647{
2648#ifdef OPEN_BCAST_SOCKET
2649	endpt *		ep;
2650	unsigned int	nif, ni4;
2651
2652	nif = ni4 = 0;
2653	set_reuseaddr(1);
2654
2655	for (ep = ep_list; ep != NULL; ep = ep->elink) {
2656		/* count IPv4 interfaces. Needed later to decide
2657		 * if we should log an error or not.
2658		 */
2659		if (AF_INET == ep->family) {
2660			++ni4;
2661		}
2662
2663		if (ep->flags & (INT_WILDCARD | INT_LOOPBACK))
2664			continue;
2665
2666		/* use only allowed addresses */
2667		if (ep->ignore_packets)
2668			continue;
2669
2670		/* Need a broadcast-capable interface */
2671		if (!(ep->flags & INT_BROADCAST))
2672			continue;
2673
2674		/* Only IPv4 addresses are valid for broadcast */
2675		REQUIRE(IS_IPV4(&ep->bcast));
2676
2677		/* Do we already have the broadcast address open? */
2678		if (ep->flags & INT_BCASTOPEN) {
2679			/*
2680			 * account for already open interfaces to avoid
2681			 * misleading warning below
2682			 */
2683			nif++;
2684			continue;
2685		}
2686
2687		/*
2688		 * Try to open the broadcast address
2689		 */
2690		ep->family = AF_INET;
2691		ep->bfd = open_socket(&ep->bcast, 1, 0, ep);
2692
2693		/*
2694		 * If we succeeded then we use it otherwise enable
2695		 * broadcast on the interface address
2696		 */
2697		if (ep->bfd != INVALID_SOCKET) {
2698			nif++;
2699			ep->flags |= INT_BCASTOPEN;
2700			msyslog(LOG_INFO,
2701				"Listen for broadcasts to %s on interface #%d %s",
2702				stoa(&ep->bcast), ep->ifnum, ep->name);
2703		} else switch (errno) {
2704			/* Silently ignore EADDRINUSE as we probably
2705			 * opened the socket already for an address in
2706			 * the same network */
2707		case EADDRINUSE:
2708			/* Some systems cannot bind a socket to a broadcast
2709			 * address, as that is not a valid host address. */
2710		case EADDRNOTAVAIL:
2711#		    ifdef SYS_WINNT	/*TODO: use for other systems, too? */
2712			/* avoid recurrence here -- if we already have a
2713			 * regular socket, it's quite useless to try this
2714			 * again.
2715			 */
2716			if (ep->fd != INVALID_SOCKET) {
2717				ep->flags |= INT_BCASTOPEN;
2718				nif++;
2719			}
2720#		    endif
2721			break;
2722
2723		default:
2724			msyslog(LOG_INFO,
2725				"failed to listen for broadcasts to %s on interface #%d %s",
2726				stoa(&ep->bcast), ep->ifnum, ep->name);
2727			break;
2728		}
2729	}
2730	set_reuseaddr(0);
2731	if (nif != 0) {
2732		broadcast_client_enabled = ISC_TRUE;
2733		DPRINTF(1, ("io_setbclient: listening to %d broadcast addresses\n", nif));
2734	} else {
2735		broadcast_client_enabled = ISC_FALSE;
2736		/* This is expected when having only IPv6 interfaces
2737		 * and no IPv4 interfaces at all. We suppress the error
2738		 * log in that case... everything else should work!
2739		 */
2740		if (ni4) {
2741			msyslog(LOG_ERR,
2742				"Unable to listen for broadcasts, no broadcast interfaces available");
2743		}
2744	}
2745#else
2746	msyslog(LOG_ERR,
2747		"io_setbclient: Broadcast Client disabled by build");
2748#endif	/* OPEN_BCAST_SOCKET */
2749}
2750
2751
2752/*
2753 * io_unsetbclient - close the broadcast client sockets
2754 */
2755void
2756io_unsetbclient(void)
2757{
2758	endpt *ep;
2759
2760	for (ep = ep_list; ep != NULL; ep = ep->elink) {
2761		if (INT_WILDCARD & ep->flags)
2762			continue;
2763		if (!(INT_BCASTOPEN & ep->flags))
2764			continue;
2765
2766		if (ep->bfd != INVALID_SOCKET) {
2767			/* destroy broadcast listening socket */
2768			msyslog(LOG_INFO,
2769				"stop listening for broadcasts to %s on interface #%d %s",
2770				stoa(&ep->bcast), ep->ifnum, ep->name);
2771			close_and_delete_fd_from_list(ep->bfd, ep);
2772			ep->bfd = INVALID_SOCKET;
2773		}
2774		ep->flags &= ~INT_BCASTOPEN;
2775	}
2776	broadcast_client_enabled = ISC_FALSE;
2777}
2778
2779
2780/*
2781 * io_multicast_add() - add multicast group address
2782 */
2783void
2784io_multicast_add(
2785	sockaddr_u *addr
2786	)
2787{
2788#ifdef MCAST
2789	endpt *	ep;
2790	endpt *	one_ep;
2791
2792	/*
2793	 * Check to see if this is a multicast address
2794	 */
2795	if (!addr_ismulticast(addr))
2796		return;
2797
2798	/* If we already have it we can just return */
2799	if (NULL != find_flagged_addr_in_list(addr, INT_MCASTOPEN)) {
2800		return;
2801	}
2802
2803# ifndef MULTICAST_NONEWSOCKET
2804	ep = new_interface(NULL);
2805
2806	/*
2807	 * Open a new socket for the multicast address
2808	 */
2809	ep->sin = *addr;
2810	SET_PORT(&ep->sin, NTP_PORT);
2811	ep->family = AF(&ep->sin);
2812	AF(&ep->mask) = ep->family;
2813	SET_ONESMASK(&ep->mask);
2814
2815	set_reuseaddr(1);
2816	ep->bfd = INVALID_SOCKET;
2817	ep->fd = open_socket(&ep->sin, 0, 0, ep);
2818	if (ep->fd != INVALID_SOCKET) {
2819		ep->ignore_packets = ISC_FALSE;
2820		ep->flags |= INT_MCASTIF;
2821		ep->ifindex = SCOPE(addr);
2822
2823		strlcpy(ep->name, "multicast", sizeof(ep->name));
2824		DPRINT_INTERFACE(2, (ep, "multicast add ", "\n"));
2825		add_interface(ep);
2826		log_listen_address(ep);
2827	} else {
2828		/* bind failed, re-use wildcard interface */
2829		delete_interface(ep);
2830
2831		if (IS_IPV4(addr))
2832			ep = wildipv4;
2833		else if (IS_IPV6(addr))
2834			ep = wildipv6;
2835		else
2836			ep = NULL;
2837
2838		if (ep != NULL) {
2839			/* HACK ! -- stuff in an address */
2840			/* because we don't bind addr? DH */
2841			ep->bcast = *addr;
2842			msyslog(LOG_ERR,
2843				"multicast address %s using wildcard interface #%d %s",
2844				stoa(addr), ep->ifnum, ep->name);
2845		} else {
2846			msyslog(LOG_ERR,
2847				"No multicast socket available to use for address %s",
2848				stoa(addr));
2849			return;
2850		}
2851	}
2852	{	/* in place of the { following for in #else clause */
2853		one_ep = ep;
2854# else	/* MULTICAST_NONEWSOCKET follows */
2855	/*
2856	 * For the case where we can't use a separate socket (Windows)
2857	 * join each applicable endpoint socket to the group address.
2858	 */
2859	if (IS_IPV4(addr))
2860		one_ep = wildipv4;
2861	else
2862		one_ep = wildipv6;
2863	for (ep = ep_list; ep != NULL; ep = ep->elink) {
2864		if (ep->ignore_packets || AF(&ep->sin) != AF(addr) ||
2865		    !(INT_MULTICAST & ep->flags) ||
2866		    (INT_LOOPBACK | INT_WILDCARD) & ep->flags)
2867			continue;
2868		one_ep = ep;
2869# endif	/* MULTICAST_NONEWSOCKET */
2870		if (socket_multicast_enable(ep, addr))
2871			msyslog(LOG_INFO,
2872				"Joined %s socket to multicast group %s",
2873				stoa(&ep->sin),
2874				stoa(addr));
2875	}
2876
2877	add_addr_to_list(addr, one_ep);
2878#else	/* !MCAST  follows*/
2879	msyslog(LOG_ERR,
2880		"Can not add multicast address %s: no multicast support",
2881		stoa(addr));
2882#endif
2883	return;
2884}
2885
2886
2887/*
2888 * io_multicast_del() - delete multicast group address
2889 */
2890void
2891io_multicast_del(
2892	sockaddr_u *	addr
2893	)
2894{
2895#ifdef MCAST
2896	endpt *iface;
2897
2898	/*
2899	 * Check to see if this is a multicast address
2900	 */
2901	if (!addr_ismulticast(addr)) {
2902		msyslog(LOG_ERR, "invalid multicast address %s",
2903			stoa(addr));
2904		return;
2905	}
2906
2907	/*
2908	 * Disable reception of multicast packets
2909	 */
2910	while ((iface = find_flagged_addr_in_list(addr, INT_MCASTOPEN))
2911	       != NULL)
2912		socket_multicast_disable(iface, addr);
2913
2914	delete_addr_from_list(addr);
2915
2916#else /* not MCAST */
2917	msyslog(LOG_ERR,
2918		"Can not delete multicast address %s: no multicast support",
2919		stoa(addr));
2920#endif /* not MCAST */
2921}
2922
2923
2924/*
2925 * open_socket - open a socket, returning the file descriptor
2926 */
2927
2928static SOCKET
2929open_socket(
2930	sockaddr_u *	addr,
2931	int		bcast,
2932	int		turn_off_reuse,
2933	endpt *		interf
2934	)
2935{
2936	SOCKET	fd;
2937	int	errval;
2938	/*
2939	 * int is OK for REUSEADR per
2940	 * http://www.kohala.com/start/mcast.api.txt
2941	 */
2942	int	on = 1;
2943	int	off = 0;
2944
2945	if (IS_IPV6(addr) && !ipv6_works)
2946		return INVALID_SOCKET;
2947
2948	/* create a datagram (UDP) socket */
2949	fd = socket(AF(addr), SOCK_DGRAM, 0);
2950	if (INVALID_SOCKET == fd) {
2951		errval = socket_errno();
2952		msyslog(LOG_ERR,
2953			"socket(AF_INET%s, SOCK_DGRAM, 0) failed on address %s: %m",
2954			IS_IPV6(addr) ? "6" : "", stoa(addr));
2955
2956		if (errval == EPROTONOSUPPORT ||
2957		    errval == EAFNOSUPPORT ||
2958		    errval == EPFNOSUPPORT)
2959			return (INVALID_SOCKET);
2960
2961		errno = errval;
2962		msyslog(LOG_ERR,
2963			"unexpected socket() error %m code %d (not EPROTONOSUPPORT nor EAFNOSUPPORT nor EPFNOSUPPORT) - exiting",
2964			errno);
2965		exit(1);
2966	}
2967
2968#ifdef SYS_WINNT
2969	connection_reset_fix(fd, addr);
2970#endif
2971	/*
2972	 * Fixup the file descriptor for some systems
2973	 * See bug #530 for details of the issue.
2974	 */
2975	fd = move_fd(fd);
2976
2977	/*
2978	 * set SO_REUSEADDR since we will be binding the same port
2979	 * number on each interface according to turn_off_reuse.
2980	 * This is undesirable on Windows versions starting with
2981	 * Windows XP (numeric version 5.1).
2982	 */
2983#ifdef SYS_WINNT
2984	if (isc_win32os_versioncheck(5, 1, 0, 0) < 0)  /* before 5.1 */
2985#endif
2986		if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
2987			       (void *)((turn_off_reuse)
2988					    ? &off
2989					    : &on),
2990			       sizeof(on))) {
2991
2992			msyslog(LOG_ERR,
2993				"setsockopt SO_REUSEADDR %s fails for address %s: %m",
2994				(turn_off_reuse)
2995				    ? "off"
2996				    : "on",
2997				stoa(addr));
2998			closesocket(fd);
2999			return INVALID_SOCKET;
3000		}
3001#ifdef SO_EXCLUSIVEADDRUSE
3002	/*
3003	 * setting SO_EXCLUSIVEADDRUSE on the wildcard we open
3004	 * first will cause more specific binds to fail.
3005	 */
3006	if (!(interf->flags & INT_WILDCARD))
3007		set_excladdruse(fd);
3008#endif
3009
3010	/*
3011	 * IPv4 specific options go here
3012	 */
3013	if (IS_IPV4(addr)) {
3014#if defined(IPPROTO_IP) && defined(IP_TOS)
3015		if (setsockopt(fd, IPPROTO_IP, IP_TOS, (void *)&qos,
3016			       sizeof(qos)))
3017			msyslog(LOG_ERR,
3018				"setsockopt IP_TOS (%02x) fails on address %s: %m",
3019				qos, stoa(addr));
3020#endif /* IPPROTO_IP && IP_TOS */
3021		if (bcast)
3022			socket_broadcast_enable(interf, fd, addr);
3023	}
3024
3025	/*
3026	 * IPv6 specific options go here
3027	 */
3028	if (IS_IPV6(addr)) {
3029#if defined(IPPROTO_IPV6) && defined(IPV6_TCLASS)
3030		if (setsockopt(fd, IPPROTO_IPV6, IPV6_TCLASS, (void *)&qos,
3031			       sizeof(qos)))
3032			msyslog(LOG_ERR,
3033				"setsockopt IPV6_TCLASS (%02x) fails on address %s: %m",
3034				qos, stoa(addr));
3035#endif /* IPPROTO_IPV6 && IPV6_TCLASS */
3036#ifdef IPV6_V6ONLY
3037		if (isc_net_probe_ipv6only() == ISC_R_SUCCESS
3038		    && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY,
3039		    (void *)&on, sizeof(on)))
3040			msyslog(LOG_ERR,
3041				"setsockopt IPV6_V6ONLY on fails on address %s: %m",
3042				stoa(addr));
3043#endif
3044#ifdef IPV6_BINDV6ONLY
3045		if (setsockopt(fd, IPPROTO_IPV6, IPV6_BINDV6ONLY,
3046		    (void *)&on, sizeof(on)))
3047			msyslog(LOG_ERR,
3048				"setsockopt IPV6_BINDV6ONLY on fails on address %s: %m",
3049				stoa(addr));
3050#endif
3051	}
3052
3053#ifdef OS_NEEDS_REUSEADDR_FOR_IFADDRBIND
3054	/*
3055	 * some OSes don't allow binding to more specific
3056	 * addresses if a wildcard address already bound
3057	 * to the port and SO_REUSEADDR is not set
3058	 */
3059	if (!is_wildcard_addr(addr))
3060		set_wildcard_reuse(AF(addr), 1);
3061#endif
3062
3063	/*
3064	 * bind the local address.
3065	 */
3066	errval = bind(fd, &addr->sa, SOCKLEN(addr));
3067
3068#ifdef OS_NEEDS_REUSEADDR_FOR_IFADDRBIND
3069	if (!is_wildcard_addr(addr))
3070		set_wildcard_reuse(AF(addr), 0);
3071#endif
3072
3073	if (errval < 0) {
3074		/*
3075		 * Don't log this under all conditions
3076		 */
3077		if (turn_off_reuse == 0
3078#ifdef DEBUG
3079		    || debug > 1
3080#endif
3081		    ) {
3082			msyslog(LOG_ERR,
3083				"bind(%d) AF_INET%s %s%s flags 0x%x failed: %m",
3084				fd, IS_IPV6(addr) ? "6" : "",
3085				sptoa(addr),
3086				IS_MCAST(addr) ? " (multicast)" : "",
3087				interf->flags);
3088		}
3089
3090		closesocket(fd);
3091
3092		return INVALID_SOCKET;
3093	}
3094
3095#ifdef HAVE_TIMESTAMP
3096	{
3097		if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMP,
3098			       (void *)&on, sizeof(on)))
3099			msyslog(LOG_DEBUG,
3100				"setsockopt SO_TIMESTAMP on fails on address %s: %m",
3101				stoa(addr));
3102		else
3103			DPRINTF(4, ("setsockopt SO_TIMESTAMP enabled on fd %d address %s\n",
3104				    fd, stoa(addr)));
3105	}
3106#endif
3107#ifdef HAVE_TIMESTAMPNS
3108	{
3109		if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS,
3110			       (void *)&on, sizeof(on)))
3111			msyslog(LOG_DEBUG,
3112				"setsockopt SO_TIMESTAMPNS on fails on address %s: %m",
3113				stoa(addr));
3114		else
3115			DPRINTF(4, ("setsockopt SO_TIMESTAMPNS enabled on fd %d address %s\n",
3116				    fd, stoa(addr)));
3117	}
3118#endif
3119#ifdef HAVE_BINTIME
3120	{
3121		if (setsockopt(fd, SOL_SOCKET, SO_BINTIME,
3122			       (void *)&on, sizeof(on)))
3123			msyslog(LOG_DEBUG,
3124				"setsockopt SO_BINTIME on fails on address %s: %m",
3125				stoa(addr));
3126		else
3127			DPRINTF(4, ("setsockopt SO_BINTIME enabled on fd %d address %s\n",
3128				    fd, stoa(addr)));
3129	}
3130#endif
3131
3132	DPRINTF(4, ("bind(%d) addr %s, flags 0x%x\n",
3133		    fd, sptoa(addr), interf->flags));
3134
3135	make_socket_nonblocking(fd);
3136
3137#ifdef HAVE_SIGNALED_IO
3138	init_socket_sig(fd);
3139#endif /* not HAVE_SIGNALED_IO */
3140
3141	add_fd_to_list(fd, FD_TYPE_SOCKET);
3142
3143#if !defined(SYS_WINNT) && !defined(VMS)
3144	DPRINTF(4, ("flags for fd %d: 0x%x\n", fd,
3145		    fcntl(fd, F_GETFL, 0)));
3146#endif /* SYS_WINNT || VMS */
3147
3148#if defined(HAVE_IO_COMPLETION_PORT)
3149/*
3150 * Add the socket to the completion port
3151 */
3152	if (!io_completion_port_add_socket(fd, interf, bcast)) {
3153		msyslog(LOG_ERR, "unable to set up io completion port - EXITING");
3154		exit(1);
3155	}
3156#endif
3157	return fd;
3158}
3159
3160
3161
3162/* XXX ELIMINATE sendpkt similar in ntpq.c, ntpdc.c, ntp_io.c, ntptrace.c */
3163/*
3164 * sendpkt - send a packet to the specified destination from the given endpt
3165 *	     except for multicast, which may be sent from several addresses.
3166 */
3167void
3168sendpkt(
3169	sockaddr_u *	dest,
3170	endpt *		ep,
3171	int		ttl,
3172	struct pkt *	pkt,
3173	int		len
3174	)
3175{
3176	endpt *	src;
3177	int	ismcast;
3178	int	cc;
3179	int	rc;
3180	u_char	cttl;
3181	l_fp	fp_zero = { { 0 }, 0 };
3182	l_fp	org, rec, xmt;
3183
3184	ismcast = IS_MCAST(dest);
3185	if (!ismcast) {
3186		src = ep;
3187	} else {
3188#ifndef MCAST
3189		return;
3190#endif
3191		src = (IS_IPV4(dest))
3192			? mc4_list
3193			: mc6_list;
3194	}
3195
3196	if (NULL == src) {
3197		/*
3198		 * unbound peer - drop request and wait for better
3199		 * network conditions
3200		 */
3201		DPRINTF(2, ("%ssendpkt(dst=%s, ttl=%d, len=%d): no interface - IGNORED\n",
3202			    ismcast ? "\tMCAST\t***** " : "",
3203			    stoa(dest), ttl, len));
3204		return;
3205	}
3206
3207	do {
3208		if (INT_LL_OF_GLOB & src->flags) {
3209			/* avoid duplicate multicasts on same IPv6 net */
3210			goto loop;
3211		}
3212		DPRINTF(2, ("%ssendpkt(%d, dst=%s, src=%s, ttl=%d, len=%d)\n",
3213			    ismcast ? "\tMCAST\t***** " : "", src->fd,
3214			    stoa(dest), stoa(&src->sin), ttl, len));
3215#ifdef MCAST
3216		if (ismcast && ttl > 0 && ttl != src->last_ttl) {
3217			/*
3218			 * set the multicast ttl for outgoing packets
3219			 */
3220			switch (AF(&src->sin)) {
3221
3222			case AF_INET :
3223				cttl = (u_char)ttl;
3224				rc = setsockopt(src->fd, IPPROTO_IP,
3225						IP_MULTICAST_TTL,
3226						(void *)&cttl,
3227						sizeof(cttl));
3228				break;
3229
3230# ifdef INCLUDE_IPV6_SUPPORT
3231			case AF_INET6 :
3232				rc = setsockopt(src->fd, IPPROTO_IPV6,
3233						 IPV6_MULTICAST_HOPS,
3234						 (void *)&ttl,
3235						 sizeof(ttl));
3236				break;
3237# endif	/* INCLUDE_IPV6_SUPPORT */
3238
3239			default:
3240				rc = 0;
3241			}
3242
3243			if (!rc)
3244				src->last_ttl = ttl;
3245			else
3246				msyslog(LOG_ERR,
3247					"setsockopt IP_MULTICAST_TTL/IPV6_MULTICAST_HOPS fails on address %s: %m",
3248					stoa(&src->sin));
3249		}
3250#endif	/* MCAST */
3251
3252#ifdef SIM
3253		cc = simulate_server(dest, src, pkt);
3254#elif defined(HAVE_IO_COMPLETION_PORT)
3255		cc = io_completion_port_sendto(src, src->fd, pkt,
3256			(size_t)len, dest);
3257#else
3258		cc = sendto(src->fd, (char *)pkt, (u_int)len, 0,
3259			    &dest->sa, SOCKLEN(dest));
3260#endif
3261		if (cc == -1) {
3262			src->notsent++;
3263			packets_notsent++;
3264		} else	{
3265			src->sent++;
3266			packets_sent++;
3267		}
3268	    loop:
3269		if (ismcast)
3270			src = src->mclink;
3271	} while (ismcast && src != NULL);
3272
3273	/* HMS: pkt->rootdisp is usually random here */
3274	NTOHL_FP(&pkt->org, &org);
3275	NTOHL_FP(&pkt->rec, &rec);
3276	NTOHL_FP(&pkt->xmt, &xmt);
3277	record_raw_stats(src ? &src->sin : NULL, dest,
3278			&org, &rec, &xmt, &fp_zero,
3279			PKT_LEAP(pkt->li_vn_mode),
3280			PKT_VERSION(pkt->li_vn_mode),
3281			PKT_MODE(pkt->li_vn_mode),
3282			pkt->stratum,
3283			pkt->ppoll, pkt->precision,
3284			FPTOD(NTOHS_FP(pkt->rootdelay)),
3285			FPTOD(NTOHS_FP(pkt->rootdisp)),  pkt->refid,
3286			len - MIN_V4_PKT_LEN, (u_char *)&pkt->exten);
3287}
3288
3289
3290#if !defined(HAVE_IO_COMPLETION_PORT)
3291#if !defined(HAVE_SIGNALED_IO)
3292/*
3293 * fdbits - generate ascii representation of fd_set (FAU debug support)
3294 * HFDF format - highest fd first.
3295 */
3296static char *
3297fdbits(
3298	int		count,
3299	const fd_set*	set
3300	)
3301{
3302	static char buffer[256];
3303	char * buf = buffer;
3304
3305	count = min(count,  sizeof(buffer) - 1);
3306
3307	while (count >= 0) {
3308		*buf++ = FD_ISSET(count, set) ? '#' : '-';
3309		count--;
3310	}
3311	*buf = '\0';
3312
3313	return buffer;
3314}
3315#endif
3316
3317#ifdef REFCLOCK
3318/*
3319 * Routine to read the refclock packets for a specific interface
3320 * Return the number of bytes read. That way we know if we should
3321 * read it again or go on to the next one if no bytes returned
3322 */
3323static inline int
3324read_refclock_packet(
3325	SOCKET			fd,
3326	struct refclockio *	rp,
3327	l_fp			ts
3328	)
3329{
3330	u_int			read_count;
3331	int			buflen;
3332	int			saved_errno;
3333	int			consumed;
3334	struct recvbuf *	rb;
3335
3336	rb = get_free_recv_buffer(TRUE);
3337
3338	if (NULL == rb) {
3339		/*
3340		 * No buffer space available - just drop the 'packet'.
3341		 * Since this is a non-blocking character stream we read
3342		 * all data that we can.
3343		 *
3344		 * ...hmmmm... what about "tcflush(fd,TCIFLUSH)" here?!?
3345		 */
3346		char buf[128];
3347		do
3348			buflen = read(fd, buf, sizeof(buf));
3349		while (buflen > 0);
3350		packets_dropped++;
3351		return (buflen);
3352	}
3353
3354	/* TALOS-CAN-0064: avoid signed/unsigned clashes that can lead
3355	 * to buffer overrun and memory corruption
3356	 */
3357	if (rp->datalen <= 0 || (size_t)rp->datalen > sizeof(rb->recv_space))
3358		read_count = sizeof(rb->recv_space);
3359	else
3360		read_count = (u_int)rp->datalen;
3361	do {
3362		buflen = read(fd, (char *)&rb->recv_space, read_count);
3363	} while (buflen < 0 && EINTR == errno);
3364
3365	if (buflen <= 0) {
3366		saved_errno = errno;
3367		freerecvbuf(rb);
3368		errno = saved_errno;
3369		return buflen;
3370	}
3371
3372	/*
3373	 * Got one. Mark how and when it got here,
3374	 * put it on the full list and do bookkeeping.
3375	 */
3376	rb->recv_length = buflen;
3377	rb->recv_peer = rp->srcclock;
3378	rb->dstadr = NULL;
3379	rb->fd = fd;
3380	rb->recv_time = ts;
3381	rb->receiver = rp->clock_recv;
3382
3383	consumed = indicate_refclock_packet(rp, rb);
3384	if (!consumed) {
3385		rp->recvcount++;
3386		packets_received++;
3387	}
3388
3389	return buflen;
3390}
3391#endif	/* REFCLOCK */
3392
3393
3394#ifdef HAVE_PACKET_TIMESTAMP
3395/*
3396 * extract timestamps from control message buffer
3397 */
3398static l_fp
3399fetch_timestamp(
3400	struct recvbuf *	rb,
3401	struct msghdr *		msghdr,
3402	l_fp			ts
3403	)
3404{
3405	struct cmsghdr *	cmsghdr;
3406	unsigned long		ticks;
3407	double			fuzz;
3408	l_fp			lfpfuzz;
3409	l_fp			nts;
3410#ifdef DEBUG_TIMING
3411	l_fp			dts;
3412#endif
3413
3414	cmsghdr = CMSG_FIRSTHDR(msghdr);
3415	while (cmsghdr != NULL) {
3416		switch (cmsghdr->cmsg_type)
3417		{
3418#ifdef HAVE_BINTIME
3419		case SCM_BINTIME:
3420#endif  /* HAVE_BINTIME */
3421#ifdef HAVE_TIMESTAMPNS
3422		case SCM_TIMESTAMPNS:
3423#endif	/* HAVE_TIMESTAMPNS */
3424#ifdef HAVE_TIMESTAMP
3425		case SCM_TIMESTAMP:
3426#endif	/* HAVE_TIMESTAMP */
3427#if defined(HAVE_BINTIME) || defined (HAVE_TIMESTAMPNS) || defined(HAVE_TIMESTAMP)
3428			switch (cmsghdr->cmsg_type)
3429			{
3430#ifdef HAVE_BINTIME
3431			case SCM_BINTIME:
3432				{
3433					struct bintime	pbt;
3434					memcpy(&pbt, CMSG_DATA(cmsghdr), sizeof(pbt));
3435					/*
3436					 * bintime documentation is at http://phk.freebsd.dk/pubs/timecounter.pdf
3437					 */
3438					nts.l_i = pbt.sec + JAN_1970;
3439					nts.l_uf = (u_int32)(pbt.frac >> 32);
3440					if (sys_tick > measured_tick &&
3441					    sys_tick > 1e-9) {
3442						ticks = (unsigned long)(nts.l_uf / (unsigned long)(sys_tick * FRAC));
3443						nts.l_uf = (unsigned long)(ticks * (unsigned long)(sys_tick * FRAC));
3444					}
3445					DPRINTF(4, ("fetch_timestamp: system bintime network time stamp: %ld.%09lu\n",
3446						    (long)pbt.sec, (u_long)((nts.l_uf / FRAC) * 1e9)));
3447				}
3448				break;
3449#endif  /* HAVE_BINTIME */
3450#ifdef HAVE_TIMESTAMPNS
3451			case SCM_TIMESTAMPNS:
3452				{
3453					struct timespec	pts;
3454					memcpy(&pts, CMSG_DATA(cmsghdr), sizeof(pts));
3455					if (sys_tick > measured_tick &&
3456					    sys_tick > 1e-9) {
3457						ticks = (unsigned long)((pts.tv_nsec * 1e-9) /
3458									sys_tick);
3459						pts.tv_nsec = (long)(ticks * 1e9 *
3460								     sys_tick);
3461					}
3462					DPRINTF(4, ("fetch_timestamp: system nsec network time stamp: %ld.%09ld\n",
3463						    pts.tv_sec, pts.tv_nsec));
3464					nts = tspec_stamp_to_lfp(pts);
3465				}
3466				break;
3467#endif	/* HAVE_TIMESTAMPNS */
3468#ifdef HAVE_TIMESTAMP
3469			case SCM_TIMESTAMP:
3470				{
3471					struct timeval	ptv;
3472					memcpy(&ptv, CMSG_DATA(cmsghdr), sizeof(ptv));
3473					if (sys_tick > measured_tick &&
3474					    sys_tick > 1e-6) {
3475						ticks = (unsigned long)((ptv.tv_usec * 1e-6) /
3476									sys_tick);
3477						ptv.tv_usec = (long)(ticks * 1e6 *
3478								    sys_tick);
3479					}
3480					DPRINTF(4, ("fetch_timestamp: system usec network time stamp: %jd.%06ld\n",
3481						    (intmax_t)ptv.tv_sec, (long)ptv.tv_usec));
3482					nts = tval_stamp_to_lfp(ptv);
3483				}
3484				break;
3485#endif  /* HAVE_TIMESTAMP */
3486			}
3487			fuzz = ntp_uurandom() * sys_fuzz;
3488			DTOLFP(fuzz, &lfpfuzz);
3489			L_ADD(&nts, &lfpfuzz);
3490#ifdef DEBUG_TIMING
3491			dts = ts;
3492			L_SUB(&dts, &nts);
3493			collect_timing(rb, "input processing delay", 1,
3494				       &dts);
3495			DPRINTF(4, ("fetch_timestamp: timestamp delta: %s (incl. fuzz)\n",
3496				    lfptoa(&dts, 9)));
3497#endif	/* DEBUG_TIMING */
3498			ts = nts;  /* network time stamp */
3499			break;
3500#endif	/* HAVE_BINTIME || HAVE_TIMESTAMPNS || HAVE_TIMESTAMP */
3501
3502		default:
3503			DPRINTF(4, ("fetch_timestamp: skipping control message 0x%x\n",
3504				    cmsghdr->cmsg_type));
3505		}
3506		cmsghdr = CMSG_NXTHDR(msghdr, cmsghdr);
3507	}
3508	return ts;
3509}
3510#endif	/* HAVE_PACKET_TIMESTAMP */
3511
3512
3513/*
3514 * Routine to read the network NTP packets for a specific interface
3515 * Return the number of bytes read. That way we know if we should
3516 * read it again or go on to the next one if no bytes returned
3517 */
3518static inline int
3519read_network_packet(
3520	SOCKET		fd,
3521	endpt *		itf,
3522	l_fp		ts
3523	)
3524{
3525	GETSOCKNAME_SOCKLEN_TYPE fromlen;
3526	int buflen;
3527	register struct recvbuf *rb;
3528#ifdef HAVE_PACKET_TIMESTAMP
3529	struct msghdr msghdr;
3530	struct iovec iovec;
3531	char control[CMSG_BUFSIZE];
3532#endif
3533
3534	/*
3535	 * Get a buffer and read the frame.  If we haven't got a buffer,
3536	 * or this is received on a disallowed socket, just dump the
3537	 * packet.
3538	 */
3539
3540	rb = itf->ignore_packets ? NULL : get_free_recv_buffer(FALSE);
3541	if (NULL == rb) {
3542		/* A partial read on a UDP socket truncates the data and
3543		 * removes the message from the queue. So there's no
3544		 * need to have a full buffer here on the stack.
3545		 */
3546		char buf[16];
3547		sockaddr_u from;
3548
3549		if (rb != NULL)
3550			freerecvbuf(rb);
3551
3552		fromlen = sizeof(from);
3553		buflen = recvfrom(fd, buf, sizeof(buf), 0,
3554				  &from.sa, &fromlen);
3555		DPRINTF(4, ("%s on (%lu) fd=%d from %s\n",
3556			(itf->ignore_packets)
3557			    ? "ignore"
3558			    : "drop",
3559			free_recvbuffs(), fd, stoa(&from)));
3560		if (itf->ignore_packets)
3561			packets_ignored++;
3562		else
3563			packets_dropped++;
3564		return (buflen);
3565	}
3566
3567	fromlen = sizeof(rb->recv_srcadr);
3568
3569#ifndef HAVE_PACKET_TIMESTAMP
3570	rb->recv_length = recvfrom(fd, (char *)&rb->recv_space,
3571				   sizeof(rb->recv_space), 0,
3572				   &rb->recv_srcadr.sa, &fromlen);
3573#else
3574	iovec.iov_base        = &rb->recv_space;
3575	iovec.iov_len         = sizeof(rb->recv_space);
3576	msghdr.msg_name       = &rb->recv_srcadr;
3577	msghdr.msg_namelen    = fromlen;
3578	msghdr.msg_iov        = &iovec;
3579	msghdr.msg_iovlen     = 1;
3580	msghdr.msg_control    = (void *)&control;
3581	msghdr.msg_controllen = sizeof(control);
3582	msghdr.msg_flags      = 0;
3583	rb->recv_length       = recvmsg(fd, &msghdr, 0);
3584#endif
3585
3586	buflen = rb->recv_length;
3587
3588	if (buflen == 0 || (buflen == -1 &&
3589	    (EWOULDBLOCK == errno
3590#ifdef EAGAIN
3591	     || EAGAIN == errno
3592#endif
3593	     ))) {
3594		freerecvbuf(rb);
3595		return (buflen);
3596	} else if (buflen < 0) {
3597		msyslog(LOG_ERR, "recvfrom(%s) fd=%d: %m",
3598			stoa(&rb->recv_srcadr), fd);
3599		DPRINTF(5, ("read_network_packet: fd=%d dropped (bad recvfrom)\n",
3600			    fd));
3601		freerecvbuf(rb);
3602		return (buflen);
3603	}
3604
3605	DPRINTF(3, ("read_network_packet: fd=%d length %d from %s\n",
3606		    fd, buflen, stoa(&rb->recv_srcadr)));
3607
3608#ifdef ENABLE_BUG3020_FIX
3609	if (ISREFCLOCKADR(&rb->recv_srcadr)) {
3610		msyslog(LOG_ERR, "recvfrom(%s) fd=%d: refclock srcadr on a network interface!",
3611			stoa(&rb->recv_srcadr), fd);
3612		DPRINTF(1, ("read_network_packet: fd=%d dropped (refclock srcadr))\n",
3613			    fd));
3614		packets_dropped++;
3615		freerecvbuf(rb);
3616		return (buflen);
3617	}
3618#endif
3619
3620	/*
3621	** Bug 2672: Some OSes (MacOSX and Linux) don't block spoofed ::1
3622	*/
3623
3624	if (   IS_IPV6(&rb->recv_srcadr)
3625	    && IN6_IS_ADDR_LOOPBACK(PSOCK_ADDR6(&rb->recv_srcadr))
3626	    && !(INT_LOOPBACK & itf->flags)) {
3627
3628		packets_dropped++;
3629		DPRINTF(2, ("DROPPING pkt with spoofed ::1 source on %s\n", latoa(itf)));
3630		freerecvbuf(rb);
3631		return -1;
3632	}
3633
3634	/*
3635	 * Got one.  Mark how and when it got here,
3636	 * put it on the full list and do bookkeeping.
3637	 */
3638	rb->dstadr = itf;
3639	rb->fd = fd;
3640#ifdef HAVE_PACKET_TIMESTAMP
3641	/* pick up a network time stamp if possible */
3642	ts = fetch_timestamp(rb, &msghdr, ts);
3643#endif
3644	rb->recv_time = ts;
3645	rb->receiver = receive;
3646
3647	add_full_recv_buffer(rb);
3648
3649	itf->received++;
3650	packets_received++;
3651	return (buflen);
3652}
3653
3654/*
3655 * attempt to handle io (select()/signaled IO)
3656 */
3657void
3658io_handler(void)
3659{
3660#  ifndef HAVE_SIGNALED_IO
3661	fd_set rdfdes;
3662	int nfound;
3663
3664	/*
3665	 * Use select() on all on all input fd's for unlimited
3666	 * time.  select() will terminate on SIGALARM or on the
3667	 * reception of input.	Using select() means we can't do
3668	 * robust signal handling and we get a potential race
3669	 * between checking for alarms and doing the select().
3670	 * Mostly harmless, I think.
3671	 */
3672	/*
3673	 * On VMS, I suspect that select() can't be interrupted
3674	 * by a "signal" either, so I take the easy way out and
3675	 * have select() time out after one second.
3676	 * System clock updates really aren't time-critical,
3677	 * and - lacking a hardware reference clock - I have
3678	 * yet to learn about anything else that is.
3679	 */
3680	++handler_calls;
3681	rdfdes = activefds;
3682#   if !defined(VMS) && !defined(SYS_VXWORKS)
3683	nfound = select(maxactivefd + 1, &rdfdes, NULL,
3684			NULL, NULL);
3685#   else	/* VMS, VxWorks */
3686	/* make select() wake up after one second */
3687	{
3688		struct timeval t1;
3689		t1.tv_sec  = 1;
3690		t1.tv_usec = 0;
3691		nfound = select(maxactivefd + 1,
3692				&rdfdes, NULL, NULL,
3693				&t1);
3694	}
3695#   endif	/* VMS, VxWorks */
3696	if (nfound < 0 && sanitize_fdset(errno)) {
3697		struct timeval t1;
3698		t1.tv_sec  = 0;
3699		t1.tv_usec = 0;
3700		rdfdes = activefds;
3701		nfound = select(maxactivefd + 1,
3702				&rdfdes, NULL, NULL,
3703				&t1);
3704	}
3705
3706	if (nfound > 0) {
3707		l_fp ts;
3708
3709		get_systime(&ts);
3710
3711		input_handler_scan(&ts, &rdfdes);
3712	} else if (nfound == -1 && errno != EINTR) {
3713		msyslog(LOG_ERR, "select() error: %m");
3714	}
3715#   ifdef DEBUG
3716	else if (debug > 4) {
3717		msyslog(LOG_DEBUG, "select(): nfound=%d, error: %m", nfound);
3718	} else {
3719		DPRINTF(3, ("select() returned %d: %m\n", nfound));
3720	}
3721#   endif /* DEBUG */
3722#  else /* HAVE_SIGNALED_IO */
3723	wait_for_signal();
3724#  endif /* HAVE_SIGNALED_IO */
3725}
3726
3727#ifdef HAVE_SIGNALED_IO
3728/*
3729 * input_handler - receive packets asynchronously
3730 *
3731 * ALWAYS IN SIGNAL HANDLER CONTEXT -- only async-safe functions allowed!
3732 */
3733static RETSIGTYPE
3734input_handler(
3735	l_fp *	cts
3736	)
3737{
3738	int		n;
3739	struct timeval	tvzero;
3740	fd_set		fds;
3741
3742	++handler_calls;
3743
3744	/*
3745	 * Do a poll to see who has data
3746	 */
3747
3748	fds = activefds;
3749	tvzero.tv_sec = tvzero.tv_usec = 0;
3750
3751	n = select(maxactivefd + 1, &fds, NULL, NULL, &tvzero);
3752	if (n < 0 && sanitize_fdset(errno)) {
3753		fds = activefds;
3754		tvzero.tv_sec = tvzero.tv_usec = 0;
3755		n = select(maxactivefd + 1, &fds, NULL, NULL, &tvzero);
3756	}
3757	if (n > 0)
3758		input_handler_scan(cts, &fds);
3759}
3760#endif /* HAVE_SIGNALED_IO */
3761
3762
3763/*
3764 * Try to sanitize the global FD set
3765 *
3766 * SIGNAL HANDLER CONTEXT if HAVE_SIGNALED_IO, ordinary userspace otherwise
3767 */
3768static int/*BOOL*/
3769sanitize_fdset(
3770	int	errc
3771	)
3772{
3773	int j, b, maxscan;
3774
3775#  ifndef HAVE_SIGNALED_IO
3776	/*
3777	 * extended FAU debugging output
3778	 */
3779	if (errc != EINTR) {
3780		msyslog(LOG_ERR,
3781			"select(%d, %s, 0L, 0L, &0.0) error: %m",
3782			maxactivefd + 1,
3783			fdbits(maxactivefd, &activefds));
3784	}
3785#   endif
3786
3787	if (errc != EBADF)
3788		return FALSE;
3789
3790	/* if we have oviously bad FDs, try to sanitize the FD set. */
3791	for (j = 0, maxscan = 0; j <= maxactivefd; j++) {
3792		if (FD_ISSET(j, &activefds)) {
3793			if (-1 != read(j, &b, 0)) {
3794				maxscan = j;
3795				continue;
3796			}
3797#		    ifndef HAVE_SIGNALED_IO
3798			msyslog(LOG_ERR,
3799				"Removing bad file descriptor %d from select set",
3800				j);
3801#		    endif
3802			FD_CLR(j, &activefds);
3803		}
3804	}
3805	if (maxactivefd != maxscan)
3806		maxactivefd = maxscan;
3807	return TRUE;
3808}
3809
3810/*
3811 * scan the known FDs (clocks, servers, ...) for presence in a 'fd_set'.
3812 *
3813 * SIGNAL HANDLER CONTEXT if HAVE_SIGNALED_IO, ordinary userspace otherwise
3814 */
3815static void
3816input_handler_scan(
3817	const l_fp *	cts,
3818	const fd_set *	pfds
3819	)
3820{
3821	int		buflen;
3822	u_int		idx;
3823	int		doing;
3824	SOCKET		fd;
3825	blocking_child *c;
3826	l_fp		ts;	/* Timestamp at BOselect() gob */
3827
3828#if defined(DEBUG_TIMING)
3829	l_fp		ts_e;	/* Timestamp at EOselect() gob */
3830#endif
3831	endpt *		ep;
3832#ifdef REFCLOCK
3833	struct refclockio *rp;
3834	int		saved_errno;
3835	const char *	clk;
3836#endif
3837#ifdef HAS_ROUTING_SOCKET
3838	struct asyncio_reader *	asyncio_reader;
3839	struct asyncio_reader *	next_asyncio_reader;
3840#endif
3841
3842	++handler_pkts;
3843	ts = *cts;
3844
3845#ifdef REFCLOCK
3846	/*
3847	 * Check out the reference clocks first, if any
3848	 */
3849
3850	for (rp = refio; rp != NULL; rp = rp->next) {
3851		fd = rp->fd;
3852
3853		if (!FD_ISSET(fd, pfds))
3854			continue;
3855		buflen = read_refclock_packet(fd, rp, ts);
3856		/*
3857		 * The first read must succeed after select() indicates
3858		 * readability, or we've reached a permanent EOF.
3859		 * http://bugs.ntp.org/1732 reported ntpd munching CPU
3860		 * after a USB GPS was unplugged because select was
3861		 * indicating EOF but ntpd didn't remove the descriptor
3862		 * from the activefds set.
3863		 */
3864		if (buflen < 0 && EAGAIN != errno) {
3865			saved_errno = errno;
3866			clk = refnumtoa(&rp->srcclock->srcadr);
3867			errno = saved_errno;
3868			msyslog(LOG_ERR, "%s read: %m", clk);
3869			maintain_activefds(fd, TRUE);
3870		} else if (0 == buflen) {
3871			clk = refnumtoa(&rp->srcclock->srcadr);
3872			msyslog(LOG_ERR, "%s read EOF", clk);
3873			maintain_activefds(fd, TRUE);
3874		} else {
3875			/* drain any remaining refclock input */
3876			do {
3877				buflen = read_refclock_packet(fd, rp, ts);
3878			} while (buflen > 0);
3879		}
3880	}
3881#endif /* REFCLOCK */
3882
3883	/*
3884	 * Loop through the interfaces looking for data to read.
3885	 */
3886	for (ep = ep_list; ep != NULL; ep = ep->elink) {
3887		for (doing = 0; doing < 2; doing++) {
3888			if (!doing) {
3889				fd = ep->fd;
3890			} else {
3891				if (!(ep->flags & INT_BCASTOPEN))
3892					break;
3893				fd = ep->bfd;
3894			}
3895			if (fd < 0)
3896				continue;
3897			if (FD_ISSET(fd, pfds))
3898				do {
3899					buflen = read_network_packet(
3900							fd, ep, ts);
3901				} while (buflen > 0);
3902			/* Check more interfaces */
3903		}
3904	}
3905
3906#ifdef HAS_ROUTING_SOCKET
3907	/*
3908	 * scan list of asyncio readers - currently only used for routing sockets
3909	 */
3910	asyncio_reader = asyncio_reader_list;
3911
3912	while (asyncio_reader != NULL) {
3913		/* callback may unlink and free asyncio_reader */
3914		next_asyncio_reader = asyncio_reader->link;
3915		if (FD_ISSET(asyncio_reader->fd, pfds))
3916			(*asyncio_reader->receiver)(asyncio_reader);
3917		asyncio_reader = next_asyncio_reader;
3918	}
3919#endif /* HAS_ROUTING_SOCKET */
3920
3921	/*
3922	 * Check for a response from a blocking child
3923	 */
3924	for (idx = 0; idx < blocking_children_alloc; idx++) {
3925		c = blocking_children[idx];
3926		if (NULL == c || -1 == c->resp_read_pipe)
3927			continue;
3928		if (FD_ISSET(c->resp_read_pipe, pfds)) {
3929			++c->resp_ready_seen;
3930			++blocking_child_ready_seen;
3931		}
3932	}
3933
3934	/* We've done our work */
3935#if defined(DEBUG_TIMING)
3936	get_systime(&ts_e);
3937	/*
3938	 * (ts_e - ts) is the amount of time we spent
3939	 * processing this gob of file descriptors.  Log
3940	 * it.
3941	 */
3942	L_SUB(&ts_e, &ts);
3943	collect_timing(NULL, "input handler", 1, &ts_e);
3944	if (debug > 3)
3945		msyslog(LOG_DEBUG,
3946			"input_handler: Processed a gob of fd's in %s msec",
3947			lfptoms(&ts_e, 6));
3948#endif /* DEBUG_TIMING */
3949}
3950#endif /* !HAVE_IO_COMPLETION_PORT */
3951
3952/*
3953 * find an interface suitable for the src address
3954 */
3955endpt *
3956select_peerinterface(
3957	struct peer *	peer,
3958	sockaddr_u *	srcadr,
3959	endpt *		dstadr
3960	)
3961{
3962	endpt *ep;
3963#ifndef SIM
3964	endpt *wild;
3965
3966	wild = ANY_INTERFACE_CHOOSE(srcadr);
3967
3968	/*
3969	 * Initialize the peer structure and dance the interface jig.
3970	 * Reference clocks step the loopback waltz, the others
3971	 * squaredance around the interface list looking for a buddy. If
3972	 * the dance peters out, there is always the wildcard interface.
3973	 * This might happen in some systems and would preclude proper
3974	 * operation with public key cryptography.
3975	 */
3976	if (ISREFCLOCKADR(srcadr)) {
3977		ep = loopback_interface;
3978	} else if (peer->cast_flags &
3979		   (MDF_BCLNT | MDF_ACAST | MDF_MCAST | MDF_BCAST)) {
3980		ep = findbcastinter(srcadr);
3981		if (ep != NULL)
3982			DPRINTF(4, ("Found *-cast interface %s for address %s\n",
3983				stoa(&ep->sin), stoa(srcadr)));
3984		else
3985			DPRINTF(4, ("No *-cast local address found for address %s\n",
3986				stoa(srcadr)));
3987	} else {
3988		ep = dstadr;
3989		if (NULL == ep) {
3990			ep = wild;
3991		}
3992	}
3993	/*
3994	 * If it is a multicast address, findbcastinter() may not find
3995	 * it.  For unicast, we get to find the interface when dstadr is
3996	 * given to us as the wildcard (ANY_INTERFACE_CHOOSE).  Either
3997	 * way, try a little harder.
3998	 */
3999	if (wild == ep) {
4000		ep = findinterface(srcadr);
4001	}
4002	/*
4003	 * we do not bind to the wildcard interfaces for output
4004	 * as our (network) source address would be undefined and
4005	 * crypto will not work without knowing the own transmit address
4006	 */
4007	if (ep != NULL && (INT_WILDCARD & ep->flags)) {
4008		if (!accept_wildcard_if_for_winnt) {
4009			ep = NULL;
4010		}
4011	}
4012#else	/* SIM follows */
4013	ep = loopback_interface;
4014#endif
4015
4016	return ep;
4017}
4018
4019
4020/*
4021 * findinterface - find local interface corresponding to address
4022 */
4023endpt *
4024findinterface(
4025	sockaddr_u *addr
4026	)
4027{
4028	endpt *iface;
4029
4030	iface = findlocalinterface(addr, INT_WILDCARD, 0);
4031
4032	if (NULL == iface) {
4033		DPRINTF(4, ("Found no interface for address %s - returning wildcard\n",
4034			    stoa(addr)));
4035
4036		iface = ANY_INTERFACE_CHOOSE(addr);
4037	} else
4038		DPRINTF(4, ("Found interface #%d %s for address %s\n",
4039			    iface->ifnum, iface->name, stoa(addr)));
4040
4041	return iface;
4042}
4043
4044/*
4045 * findlocalinterface - find local interface corresponding to addr,
4046 * which does not have any of flags set.  If bcast is nonzero, addr is
4047 * a broadcast address.
4048 *
4049 * This code attempts to find the local sending address for an outgoing
4050 * address by connecting a new socket to destinationaddress:NTP_PORT
4051 * and reading the sockname of the resulting connect.
4052 * the complicated sequence simulates the routing table lookup
4053 * for to first hop without duplicating any of the routing logic into
4054 * ntpd. preferably we would have used an API call - but its not there -
4055 * so this is the best we can do here short of duplicating to entire routing
4056 * logic in ntpd which would be a silly and really unportable thing to do.
4057 *
4058 */
4059static endpt *
4060findlocalinterface(
4061	sockaddr_u *	addr,
4062	int		flags,
4063	int		bcast
4064	)
4065{
4066	GETSOCKNAME_SOCKLEN_TYPE	sockaddrlen;
4067	endpt *				iface;
4068	sockaddr_u			saddr;
4069	SOCKET				s;
4070	int				rtn;
4071	int				on;
4072
4073	DPRINTF(4, ("Finding interface for addr %s in list of addresses\n",
4074		    stoa(addr)));
4075
4076	/* [Bug 3437] The prototype POOL peer can be AF_UNSPEC.
4077	 * This is bound to fail, but on the way to nowhere it
4078	 * triggers a security incident on SELinux.
4079	 *
4080	 * Checking the condition and failing early is probably good
4081	 * advice, and even saves us some syscalls in that case.
4082	 * Thanks to Miroslav Lichvar for finding this.
4083	 */
4084	if (AF_UNSPEC == AF(addr)) {
4085		return NULL;
4086	}
4087	s = socket(AF(addr), SOCK_DGRAM, 0);
4088	if (INVALID_SOCKET == s) {
4089		return NULL;
4090	}
4091	/*
4092	 * If we are looking for broadcast interface we need to set this
4093	 * socket to allow broadcast
4094	 */
4095	if (bcast) {
4096		on = 1;
4097		if (SOCKET_ERROR == setsockopt(s, SOL_SOCKET,
4098						SO_BROADCAST,
4099						(void *)&on,
4100						sizeof(on))) {
4101			closesocket(s);
4102			return NULL;
4103		}
4104	}
4105
4106	rtn = connect(s, &addr->sa, SOCKLEN(addr));
4107	if (SOCKET_ERROR == rtn) {
4108		closesocket(s);
4109		return NULL;
4110	}
4111
4112	sockaddrlen = sizeof(saddr);
4113	rtn = getsockname(s, &saddr.sa, &sockaddrlen);
4114	closesocket(s);
4115	if (SOCKET_ERROR == rtn)
4116		return NULL;
4117
4118	DPRINTF(4, ("findlocalinterface: kernel maps %s to %s\n",
4119		    stoa(addr), stoa(&saddr)));
4120
4121	iface = getinterface(&saddr, flags);
4122
4123	/*
4124	 * if we didn't find an exact match on saddr, find the closest
4125	 * available local address.  This handles the case of the
4126	 * address suggested by the kernel being excluded by nic rules
4127	 * or the user's -I and -L options to ntpd.
4128	 * See http://bugs.ntp.org/1184 and http://bugs.ntp.org/1683
4129	 * for more background.
4130	 */
4131	if (NULL == iface || iface->ignore_packets) {
4132		iface = findclosestinterface(&saddr,
4133					     flags | INT_LOOPBACK);
4134	}
4135	/*
4136	 * Don't select an interface which will ignore replies, or one
4137	 * dedicated to multicast receive.
4138	 */
4139	if (   iface != NULL
4140	    && (iface->ignore_packets || (INT_MCASTIF & iface->flags))) {
4141		iface = NULL;
4142	}
4143	return iface;
4144}
4145
4146
4147/*
4148 * findclosestinterface
4149 *
4150 * If there are -I/--interface or -L/novirtualips command-line options,
4151 * or "nic" or "interface" rules in ntp.conf, findlocalinterface() may
4152 * find the kernel's preferred local address for a given peer address is
4153 * administratively unavailable to ntpd, and punt to this routine's more
4154 * expensive search.
4155 *
4156 * Find the numerically closest local address to the one connect()
4157 * suggested.  This matches an address on the same subnet first, as
4158 * needed by Bug 1184, and provides a consistent choice if there are
4159 * multiple feasible local addresses, regardless of the order ntpd
4160 * enumerated them.
4161 */
4162endpt *
4163findclosestinterface(
4164	sockaddr_u *	addr,
4165	int		flags
4166	)
4167{
4168	endpt *		ep;
4169	endpt *		winner;
4170	sockaddr_u	addr_dist;
4171	sockaddr_u	min_dist;
4172
4173	ZERO_SOCK(&min_dist);
4174	winner = NULL;
4175
4176	for (ep = ep_list; ep != NULL; ep = ep->elink) {
4177		if (ep->ignore_packets ||
4178		    AF(addr) != ep->family ||
4179		    flags & ep->flags)
4180			continue;
4181
4182		calc_addr_distance(&addr_dist, addr, &ep->sin);
4183		if (NULL == winner ||
4184		    -1 == cmp_addr_distance(&addr_dist, &min_dist)) {
4185			min_dist = addr_dist;
4186			winner = ep;
4187		}
4188	}
4189	if (NULL == winner)
4190		DPRINTF(4, ("findclosestinterface(%s) failed\n",
4191			    stoa(addr)));
4192	else
4193		DPRINTF(4, ("findclosestinterface(%s) -> %s\n",
4194			    stoa(addr), stoa(&winner->sin)));
4195
4196	return winner;
4197}
4198
4199
4200/*
4201 * calc_addr_distance - calculate the distance between two addresses,
4202 *			the absolute value of the difference between
4203 *			the addresses numerically, stored as an address.
4204 */
4205static void
4206calc_addr_distance(
4207	sockaddr_u *		dist,
4208	const sockaddr_u *	a1,
4209	const sockaddr_u *	a2
4210	)
4211{
4212	u_char *	pdist;
4213	const u_char *	p1;
4214	const u_char *	p2;
4215	size_t		cb;
4216	int		different;
4217	int		a1_greater;
4218	u_int		u;
4219
4220	REQUIRE(AF(a1) == AF(a2));
4221
4222	ZERO_SOCK(dist);
4223	AF(dist) = AF(a1);
4224
4225	if (IS_IPV4(a1)) {
4226		pdist = (      u_char *)&NSRCADR(dist);
4227		p1 =	(const u_char *)&NSRCADR(a1);
4228		p2 =	(const u_char *)&NSRCADR(a2);
4229	} else {
4230		pdist = (      u_char *)&NSRCADR(dist);
4231		p1 =	(const u_char *)&NSRCADR(a1);
4232		p2 =	(const u_char *)&NSRCADR(a2);
4233	}
4234	cb = SIZEOF_INADDR(AF(dist));
4235	different = FALSE;
4236	a1_greater = FALSE;
4237	for (u = 0; u < cb; u++) {
4238		if (!different && p1[u] != p2[u]) {
4239			a1_greater = (p1[u] > p2[u]);
4240			different = TRUE;
4241		}
4242		if (a1_greater) {
4243			pdist[u] = p1[u] - p2[u];
4244		} else {
4245			pdist[u] = p2[u] - p1[u];
4246		}
4247	}
4248}
4249
4250
4251/*
4252 * cmp_addr_distance - compare two address distances, returning -1, 0,
4253 *		       1 to indicate their relationship.
4254 */
4255static int
4256cmp_addr_distance(
4257	const sockaddr_u *	d1,
4258	const sockaddr_u *	d2
4259	)
4260{
4261	int	i;
4262
4263	REQUIRE(AF(d1) == AF(d2));
4264
4265	if (IS_IPV4(d1)) {
4266		if (SRCADR(d1) < SRCADR(d2))
4267			return -1;
4268		else if (SRCADR(d1) == SRCADR(d2))
4269			return 0;
4270		else
4271			return 1;
4272	}
4273
4274	for (i = 0; i < (int)sizeof(NSRCADR6(d1)); i++) {
4275		if (NSRCADR6(d1)[i] < NSRCADR6(d2)[i])
4276			return -1;
4277		else if (NSRCADR6(d1)[i] > NSRCADR6(d2)[i])
4278			return 1;
4279	}
4280
4281	return 0;
4282}
4283
4284
4285
4286/*
4287 * fetch an interface structure the matches the
4288 * address and has the given flags NOT set
4289 */
4290endpt *
4291getinterface(
4292	sockaddr_u *	addr,
4293	u_int32		flags
4294	)
4295{
4296	endpt *iface;
4297
4298	iface = find_addr_in_list(addr);
4299
4300	if (iface != NULL && (iface->flags & flags))
4301		iface = NULL;
4302
4303	return iface;
4304}
4305
4306
4307/*
4308 * findbcastinter - find broadcast interface corresponding to address
4309 */
4310endpt *
4311findbcastinter(
4312	sockaddr_u *addr
4313	)
4314{
4315	endpt *	iface;
4316
4317	iface = NULL;
4318#if !defined(MPE) && (defined(SIOCGIFCONF) || defined(SYS_WINNT))
4319	DPRINTF(4, ("Finding broadcast/multicast interface for addr %s in list of addresses\n",
4320		    stoa(addr)));
4321
4322	iface = findlocalinterface(addr, INT_LOOPBACK | INT_WILDCARD,
4323				   1);
4324	if (iface != NULL) {
4325		DPRINTF(4, ("Easily found bcast-/mcast- interface index #%d %s\n",
4326			    iface->ifnum, iface->name));
4327		return iface;
4328	}
4329
4330	/*
4331	 * plan B - try to find something reasonable in our lists in
4332	 * case kernel lookup doesn't help
4333	 */
4334	for (iface = ep_list; iface != NULL; iface = iface->elink) {
4335		if (iface->flags & INT_WILDCARD)
4336			continue;
4337
4338		/* Don't bother with ignored interfaces */
4339		if (iface->ignore_packets)
4340			continue;
4341
4342		/*
4343		 * First look if this is the correct family
4344		 */
4345		if(AF(&iface->sin) != AF(addr))
4346			continue;
4347
4348		/* Skip the loopback addresses */
4349		if (iface->flags & INT_LOOPBACK)
4350			continue;
4351
4352		/*
4353		 * If we are looking to match a multicast address and
4354		 * this interface is one...
4355		 */
4356		if (addr_ismulticast(addr)
4357		    && (iface->flags & INT_MULTICAST)) {
4358#ifdef INCLUDE_IPV6_SUPPORT
4359			/*
4360			 * ...it is the winner unless we're looking for
4361			 * an interface to use for link-local multicast
4362			 * and its address is not link-local.
4363			 */
4364			if (IS_IPV6(addr)
4365			    && IN6_IS_ADDR_MC_LINKLOCAL(PSOCK_ADDR6(addr))
4366			    && !IN6_IS_ADDR_LINKLOCAL(PSOCK_ADDR6(&iface->sin)))
4367				continue;
4368#endif
4369			break;
4370		}
4371
4372		/*
4373		 * We match only those interfaces marked as
4374		 * broadcastable and either the explicit broadcast
4375		 * address or the network portion of the IP address.
4376		 * Sloppy.
4377		 */
4378		if (IS_IPV4(addr)) {
4379			if (SOCK_EQ(&iface->bcast, addr))
4380				break;
4381
4382			if ((NSRCADR(&iface->sin) & NSRCADR(&iface->mask))
4383			    == (NSRCADR(addr)	  & NSRCADR(&iface->mask)))
4384				break;
4385		}
4386#ifdef INCLUDE_IPV6_SUPPORT
4387		else if (IS_IPV6(addr)) {
4388			if (SOCK_EQ(&iface->bcast, addr))
4389				break;
4390
4391			if (SOCK_EQ(netof(&iface->sin), netof(addr)))
4392				break;
4393		}
4394#endif
4395	}
4396#endif /* SIOCGIFCONF */
4397	if (NULL == iface) {
4398		DPRINTF(4, ("No bcast interface found for %s\n",
4399			    stoa(addr)));
4400		iface = ANY_INTERFACE_CHOOSE(addr);
4401	} else {
4402		DPRINTF(4, ("Found bcast-/mcast- interface index #%d %s\n",
4403			    iface->ifnum, iface->name));
4404	}
4405
4406	return iface;
4407}
4408
4409
4410/*
4411 * io_clr_stats - clear I/O module statistics
4412 */
4413void
4414io_clr_stats(void)
4415{
4416	packets_dropped = 0;
4417	packets_ignored = 0;
4418	packets_received = 0;
4419	packets_sent = 0;
4420	packets_notsent = 0;
4421
4422	handler_calls = 0;
4423	handler_pkts = 0;
4424	io_timereset = current_time;
4425}
4426
4427
4428#ifdef REFCLOCK
4429/*
4430 * io_addclock - add a reference clock to the list and arrange that we
4431 *				 get SIGIO interrupts from it.
4432 */
4433int
4434io_addclock(
4435	struct refclockio *rio
4436	)
4437{
4438	BLOCKIO();
4439
4440	/*
4441	 * Stuff the I/O structure in the list and mark the descriptor
4442	 * in use.  There is a harmless (I hope) race condition here.
4443	 */
4444	rio->active = TRUE;
4445
4446# ifdef HAVE_SIGNALED_IO
4447	if (init_clock_sig(rio)) {
4448		UNBLOCKIO();
4449		return 0;
4450	}
4451# elif defined(HAVE_IO_COMPLETION_PORT)
4452	if (!io_completion_port_add_clock_io(rio)) {
4453		UNBLOCKIO();
4454		return 0;
4455	}
4456# endif
4457
4458	/*
4459	 * enqueue
4460	 */
4461	LINK_SLIST(refio, rio, next);
4462
4463	/*
4464	 * register fd
4465	 */
4466	add_fd_to_list(rio->fd, FD_TYPE_FILE);
4467
4468	UNBLOCKIO();
4469	return 1;
4470}
4471
4472
4473/*
4474 * io_closeclock - close the clock in the I/O structure given
4475 */
4476void
4477io_closeclock(
4478	struct refclockio *rio
4479	)
4480{
4481	struct refclockio *unlinked;
4482
4483	BLOCKIO();
4484
4485	/*
4486	 * Remove structure from the list
4487	 */
4488	rio->active = FALSE;
4489	UNLINK_SLIST(unlinked, refio, rio, next, struct refclockio);
4490	if (NULL != unlinked) {
4491		/* Close the descriptor. The order of operations is
4492		 * important here in case of async / overlapped IO:
4493		 * only after we have removed the clock from the
4494		 * IO completion port we can be sure no further
4495		 * input is queued. So...
4496		 *  - we first disable feeding to the queu by removing
4497		 *    the clock from the IO engine
4498		 *  - close the file (which brings down any IO on it)
4499		 *  - clear the buffer from results for this fd
4500		 */
4501#	    ifdef HAVE_IO_COMPLETION_PORT
4502		io_completion_port_remove_clock_io(rio);
4503#	    endif
4504		close_and_delete_fd_from_list(rio->fd, NULL);
4505		purge_recv_buffers_for_fd(rio->fd);
4506		rio->fd = -1;
4507	}
4508
4509	UNBLOCKIO();
4510}
4511#endif	/* REFCLOCK */
4512
4513
4514/*
4515 * On NT a SOCKET is an unsigned int so we cannot possibly keep it in
4516 * an array. So we use one of the ISC_LIST functions to hold the
4517 * socket value and use that when we want to enumerate it.
4518 *
4519 * This routine is called by the forked intres child process to close
4520 * all open sockets.  On Windows there's no need as intres runs in
4521 * the same process as a thread.
4522 */
4523#ifndef SYS_WINNT
4524void
4525kill_asyncio(
4526	int	startfd
4527	)
4528{
4529	BLOCKIO();
4530
4531	/*
4532	 * In the child process we do not maintain activefds and
4533	 * maxactivefd.  Zeroing maxactivefd disables code which
4534	 * maintains it in close_and_delete_fd_from_list().
4535	 */
4536	maxactivefd = 0;
4537
4538	while (fd_list != NULL)
4539		close_and_delete_fd_from_list(fd_list->fd, NULL);
4540
4541	UNBLOCKIO();
4542}
4543#endif	/* !SYS_WINNT */
4544
4545
4546/*
4547 * Add and delete functions for the list of input file descriptors
4548 */
4549static void
4550add_fd_to_list(
4551	SOCKET fd,
4552	enum desc_type type
4553	)
4554{
4555	vsock_t *lsock = emalloc(sizeof(*lsock));
4556
4557	lsock->fd = fd;
4558	lsock->type = type;
4559
4560	LINK_SLIST(fd_list, lsock, link);
4561	maintain_activefds(fd, 0);
4562}
4563
4564
4565static void
4566close_and_delete_fd_from_list(
4567	SOCKET fd,
4568	endpt *ep	/* req. if fd is in struct endpt */
4569	)
4570{
4571	vsock_t *lsock;
4572
4573	UNLINK_EXPR_SLIST(lsock, fd_list, fd ==
4574	    UNLINK_EXPR_SLIST_CURRENT()->fd, link, vsock_t);
4575
4576	if (NULL == lsock)
4577		return;
4578
4579	switch (lsock->type) {
4580
4581	case FD_TYPE_SOCKET:
4582	    #ifdef HAVE_IO_COMPLETION_PORT
4583		if (ep != NULL) {
4584			io_completion_port_remove_socket(fd, ep);
4585		}
4586	    #endif
4587		closesocket(lsock->fd);
4588		break;
4589
4590	case FD_TYPE_FILE:
4591		closeserial((int)lsock->fd);
4592		break;
4593
4594	default:
4595		msyslog(LOG_ERR,
4596			"internal error - illegal descriptor type %d - EXITING",
4597			(int)lsock->type);
4598		exit(1);
4599	}
4600
4601	free(lsock);
4602	/*
4603	 * remove from activefds
4604	 */
4605	maintain_activefds(fd, 1);
4606}
4607
4608
4609static void
4610add_addr_to_list(
4611	sockaddr_u *	addr,
4612	endpt *		ep
4613	)
4614{
4615	remaddr_t *laddr;
4616
4617#ifdef DEBUG
4618	if (find_addr_in_list(addr) == NULL) {
4619#endif
4620		/* not there yet - add to list */
4621		laddr = emalloc(sizeof(*laddr));
4622		laddr->addr = *addr;
4623		laddr->ep = ep;
4624
4625		LINK_SLIST(remoteaddr_list, laddr, link);
4626
4627		DPRINTF(4, ("Added addr %s to list of addresses\n",
4628			    stoa(addr)));
4629#ifdef DEBUG
4630	} else
4631		DPRINTF(4, ("WARNING: Attempt to add duplicate addr %s to address list\n",
4632			    stoa(addr)));
4633#endif
4634}
4635
4636
4637static void
4638delete_addr_from_list(
4639	sockaddr_u *addr
4640	)
4641{
4642	remaddr_t *unlinked;
4643
4644	UNLINK_EXPR_SLIST(unlinked, remoteaddr_list, SOCK_EQ(addr,
4645		&(UNLINK_EXPR_SLIST_CURRENT()->addr)), link, remaddr_t);
4646
4647	if (unlinked != NULL) {
4648		DPRINTF(4, ("Deleted addr %s from list of addresses\n",
4649			stoa(addr)));
4650		free(unlinked);
4651	}
4652}
4653
4654
4655static void
4656delete_interface_from_list(
4657	endpt *iface
4658	)
4659{
4660	remaddr_t *unlinked;
4661
4662	for (;;) {
4663		UNLINK_EXPR_SLIST(unlinked, remoteaddr_list, iface ==
4664		    UNLINK_EXPR_SLIST_CURRENT()->ep, link,
4665		    remaddr_t);
4666
4667		if (unlinked == NULL)
4668			break;
4669		DPRINTF(4, ("Deleted addr %s for interface #%d %s from list of addresses\n",
4670			    stoa(&unlinked->addr), iface->ifnum,
4671			    iface->name));
4672		free(unlinked);
4673	}
4674}
4675
4676
4677static endpt *
4678find_addr_in_list(
4679	sockaddr_u *addr
4680	)
4681{
4682	remaddr_t *entry;
4683
4684	DPRINTF(4, ("Searching for addr %s in list of addresses - ",
4685		    stoa(addr)));
4686
4687	for (entry = remoteaddr_list;
4688	     entry != NULL;
4689	     entry = entry->link)
4690		if (SOCK_EQ(&entry->addr, addr)) {
4691			DPRINTF(4, ("FOUND\n"));
4692			return entry->ep;
4693		}
4694
4695	DPRINTF(4, ("NOT FOUND\n"));
4696	return NULL;
4697}
4698
4699
4700/*
4701 * Find the given address with the all given flags set in the list
4702 */
4703static endpt *
4704find_flagged_addr_in_list(
4705	sockaddr_u *	addr,
4706	u_int32		flags
4707	)
4708{
4709	remaddr_t *entry;
4710
4711	DPRINTF(4, ("Finding addr %s with flags %d in list: ",
4712		    stoa(addr), flags));
4713
4714	for (entry = remoteaddr_list;
4715	     entry != NULL;
4716	     entry = entry->link)
4717
4718		if (SOCK_EQ(&entry->addr, addr)
4719		    && (entry->ep->flags & flags) == flags) {
4720
4721			DPRINTF(4, ("FOUND\n"));
4722			return entry->ep;
4723		}
4724
4725	DPRINTF(4, ("NOT FOUND\n"));
4726	return NULL;
4727}
4728
4729
4730const char *
4731localaddrtoa(
4732	endpt *la
4733	)
4734{
4735	return (NULL == la)
4736		   ? "<null>"
4737		   : stoa(&la->sin);
4738}
4739
4740
4741#ifdef HAS_ROUTING_SOCKET
4742# ifndef UPDATE_GRACE
4743#  define UPDATE_GRACE	3	/* min. UPDATE_GRACE - 1 seconds before scanning */
4744# endif
4745
4746static void
4747process_routing_msgs(struct asyncio_reader *reader)
4748{
4749	static void *	buffer;
4750	static size_t	buffsz = 8192;
4751	int		cnt, new, msg_type;
4752	socklen_t	len;
4753#ifdef HAVE_RTNETLINK
4754	struct nlmsghdr *nh;
4755#else
4756	struct rt_msghdr rtm;
4757	char *p;
4758	char *endp;
4759#endif
4760
4761	if (scan_addrs_once) {
4762		/*
4763		 * discard ourselves if we are not needed any more
4764		 * usually happens when running unprivileged
4765		 */
4766		goto disable;
4767	}
4768
4769	if (NULL == buffer) {
4770		buffer = emalloc(buffsz);
4771	}
4772
4773	cnt = read(reader->fd, buffer, buffsz);
4774
4775	if (cnt < 0) {
4776		if (errno == ENOBUFS) {
4777			/* increase socket buffer by 25% */
4778			len = sizeof cnt;
4779			if (0 > getsockopt(reader->fd, SOL_SOCKET, SO_RCVBUF, &cnt, &len) ||
4780			    sizeof cnt != len) {
4781				msyslog(LOG_ERR,
4782					"routing getsockopt SO_RCVBUF %u %u: %m - disabling",
4783					(u_int)cnt, (u_int)sizeof cnt);
4784				goto disable;
4785			}
4786			new = cnt + (cnt / 4);
4787			if (0 > setsockopt(reader->fd, SOL_SOCKET, SO_RCVBUF, &new, sizeof new)) {
4788				msyslog(LOG_ERR,
4789					"routing setsockopt SO_RCVBUF %d -> %d: %m - disabling",
4790					cnt, new);
4791				goto disable;
4792			}
4793		} else {
4794			msyslog(LOG_ERR,
4795				"routing socket reports: %m - disabling");
4796		    disable:
4797			remove_asyncio_reader(reader);
4798			delete_asyncio_reader(reader);
4799			return;
4800		}
4801	}
4802
4803	/*
4804	 * process routing message
4805	 */
4806#ifdef HAVE_RTNETLINK
4807	for (nh = buffer; NLMSG_OK(nh, cnt); nh = NLMSG_NEXT(nh, cnt))
4808	{
4809		msg_type = nh->nlmsg_type;
4810#else
4811	for (p = buffer, endp = p + cnt;
4812	     (p + sizeof(struct rt_msghdr)) <= endp;
4813	     p += rtm.rtm_msglen)
4814	{
4815		memcpy(&rtm, p, sizeof(rtm));
4816		if (rtm.rtm_version != RTM_VERSION) {
4817			msyslog(LOG_ERR,
4818				"version mismatch (got %d - expected %d) on routing socket - disabling",
4819				rtm.rtm_version, RTM_VERSION);
4820
4821			remove_asyncio_reader(reader);
4822			delete_asyncio_reader(reader);
4823			return;
4824		}
4825		msg_type = rtm.rtm_type;
4826#endif	/* !HAVE_RTNETLINK */
4827		switch (msg_type) {
4828#ifdef RTM_NEWADDR
4829		case RTM_NEWADDR:
4830#endif
4831#ifdef RTM_DELADDR
4832		case RTM_DELADDR:
4833#endif
4834#ifdef RTM_ADD
4835		case RTM_ADD:
4836#endif
4837#ifdef RTM_DELETE
4838		case RTM_DELETE:
4839#endif
4840#ifdef RTM_REDIRECT
4841		case RTM_REDIRECT:
4842#endif
4843#ifdef RTM_CHANGE
4844		case RTM_CHANGE:
4845#endif
4846#ifdef RTM_LOSING
4847		case RTM_LOSING:
4848#endif
4849#ifdef RTM_IFINFO
4850		case RTM_IFINFO:
4851#endif
4852#ifdef RTM_IFANNOUNCE
4853		case RTM_IFANNOUNCE:
4854#endif
4855#ifdef RTM_NEWLINK
4856		case RTM_NEWLINK:
4857#endif
4858#ifdef RTM_DELLINK
4859		case RTM_DELLINK:
4860#endif
4861#ifdef RTM_NEWROUTE
4862		case RTM_NEWROUTE:
4863#endif
4864#ifdef RTM_DELROUTE
4865		case RTM_DELROUTE:
4866#endif
4867			/*
4868			 * we are keen on new and deleted addresses and
4869			 * if an interface goes up and down or routing
4870			 * changes
4871			 */
4872			DPRINTF(3, ("routing message op = %d: scheduling interface update\n",
4873				    msg_type));
4874			endpt_scan_timer = UPDATE_GRACE + current_time;
4875			break;
4876#ifdef HAVE_RTNETLINK
4877		case NLMSG_DONE:
4878			/* end of multipart message */
4879			return;
4880#endif
4881		default:
4882			/*
4883			 * the rest doesn't bother us.
4884			 */
4885			DPRINTF(4, ("routing message op = %d: ignored\n",
4886				    msg_type));
4887			break;
4888		}
4889	}
4890}
4891
4892/*
4893 * set up routing notifications
4894 */
4895static void
4896init_async_notifications(void)
4897{
4898	struct asyncio_reader *reader;
4899#ifdef HAVE_RTNETLINK
4900	int fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
4901	struct sockaddr_nl sa;
4902#else
4903	int fd = socket(PF_ROUTE, SOCK_RAW, AF_UNSPEC);
4904#endif
4905	if (fd < 0) {
4906		msyslog(LOG_ERR,
4907			"unable to open routing socket (%m) - using polled interface update");
4908		return;
4909	}
4910
4911	fd = move_fd(fd);
4912#ifdef HAVE_RTNETLINK
4913	ZERO(sa);
4914	sa.nl_family = PF_NETLINK;
4915	sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR
4916		       | RTMGRP_IPV6_IFADDR | RTMGRP_IPV4_ROUTE
4917		       | RTMGRP_IPV4_MROUTE | RTMGRP_IPV6_ROUTE
4918		       | RTMGRP_IPV6_MROUTE;
4919	if (bind(fd, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
4920		msyslog(LOG_ERR,
4921			"bind failed on routing socket (%m) - using polled interface update");
4922		return;
4923	}
4924#endif
4925	make_socket_nonblocking(fd);
4926#if defined(HAVE_SIGNALED_IO)
4927	init_socket_sig(fd);
4928#endif /* HAVE_SIGNALED_IO */
4929
4930	reader = new_asyncio_reader();
4931
4932	reader->fd = fd;
4933	reader->receiver = process_routing_msgs;
4934
4935	add_asyncio_reader(reader, FD_TYPE_SOCKET);
4936	msyslog(LOG_INFO,
4937		"Listening on routing socket on fd #%d for interface updates",
4938		fd);
4939}
4940#else
4941/* HAS_ROUTING_SOCKET not defined */
4942static void
4943init_async_notifications(void)
4944{
4945}
4946#endif
4947