ntp_io.c revision 358659
1/*
2 * ntp_io.c - input/output routines for ntpd.	The socket-opening code
3 *		   was shamelessly stolen from ntpd.
4 */
5
6#ifdef HAVE_CONFIG_H
7# include <config.h>
8#endif
9
10#include <stdio.h>
11#include <signal.h>
12#ifdef HAVE_FNMATCH_H
13# include <fnmatch.h>
14# if !defined(FNM_CASEFOLD) && defined(FNM_IGNORECASE)
15#  define FNM_CASEFOLD FNM_IGNORECASE
16# endif
17#endif
18#ifdef HAVE_SYS_PARAM_H
19# include <sys/param.h>
20#endif
21#ifdef HAVE_SYS_IOCTL_H
22# include <sys/ioctl.h>
23#endif
24#ifdef HAVE_SYS_SOCKIO_H	/* UXPV: SIOC* #defines (Frank Vance <fvance@waii.com>) */
25# include <sys/sockio.h>
26#endif
27#ifdef HAVE_SYS_UIO_H
28# include <sys/uio.h>
29#endif
30
31#include "ntp_machine.h"
32#include "ntpd.h"
33#include "ntp_io.h"
34#include "iosignal.h"
35#include "ntp_lists.h"
36#include "ntp_refclock.h"
37#include "ntp_stdlib.h"
38#include "ntp_worker.h"
39#include "ntp_request.h"
40#include "ntp_assert.h"
41#include "timevalops.h"
42#include "timespecops.h"
43#include "ntpd-opts.h"
44#include "safecast.h"
45
46/* Don't include ISC's version of IPv6 variables and structures */
47#define ISC_IPV6_H 1
48#include <isc/mem.h>
49#include <isc/interfaceiter.h>
50#include <isc/netaddr.h>
51#include <isc/result.h>
52#include <isc/sockaddr.h>
53
54#ifdef SIM
55#include "ntpsim.h"
56#endif
57
58#ifdef HAS_ROUTING_SOCKET
59# include <net/route.h>
60# ifdef HAVE_RTNETLINK
61#  include <linux/rtnetlink.h>
62# endif
63#endif
64
65/*
66 * setsockopt does not always have the same arg declaration
67 * across all platforms. If it's not defined we make it empty
68 */
69
70#ifndef SETSOCKOPT_ARG_CAST
71#define SETSOCKOPT_ARG_CAST
72#endif
73
74extern int listen_to_virtual_ips;
75
76#ifndef IPTOS_DSCP_EF
77#define IPTOS_DSCP_EF 0xb8
78#endif
79int qos = IPTOS_DSCP_EF;	/* QoS RFC3246 */
80
81#ifdef LEAP_SMEAR
82/* TODO burnicki: This should be moved to ntp_timer.c, but if we do so
83 * we get a linker error. Since we're running out of time before the leap
84 * second occurs, we let it here where it just works.
85 */
86int leap_smear_intv;
87#endif
88
89/*
90 * NIC rule entry
91 */
92typedef struct nic_rule_tag nic_rule;
93
94struct nic_rule_tag {
95	nic_rule *	next;
96	nic_rule_action	action;
97	nic_rule_match	match_type;
98	char *		if_name;
99	sockaddr_u	addr;
100	int		prefixlen;
101};
102
103/*
104 * NIC rule listhead.  Entries are added at the head so that the first
105 * match in the list is the last matching rule specified.
106 */
107nic_rule *nic_rule_list;
108
109
110#if defined(SO_BINTIME) && defined(SCM_BINTIME) && defined(CMSG_FIRSTHDR)
111#  define HAVE_PACKET_TIMESTAMP
112#  define HAVE_BINTIME
113#  ifdef BINTIME_CTLMSGBUF_SIZE
114#   define CMSG_BUFSIZE BINTIME_CTLMSGBUF_SIZE
115#  else
116#   define CMSG_BUFSIZE  1536 /* moderate default */
117#  endif
118#elif defined(SO_TIMESTAMPNS) && defined(SCM_TIMESTAMPNS) && defined(CMSG_FIRSTHDR)
119#  define HAVE_PACKET_TIMESTAMP
120#  define HAVE_TIMESTAMPNS
121#  ifdef TIMESTAMPNS_CTLMSGBUF_SIZE
122#   define CMSG_BUFSIZE TIMESTAMPNS_CTLMSGBUF_SIZE
123#  else
124#   define CMSG_BUFSIZE  1536 /* moderate default */
125#  endif
126#elif defined(SO_TIMESTAMP) && defined(SCM_TIMESTAMP) && defined(CMSG_FIRSTHDR)
127#  define HAVE_PACKET_TIMESTAMP
128#  define HAVE_TIMESTAMP
129#  ifdef TIMESTAMP_CTLMSGBUF_SIZE
130#   define CMSG_BUFSIZE TIMESTAMP_CTLMSGBUF_SIZE
131#  else
132#   define CMSG_BUFSIZE  1536 /* moderate default */
133#  endif
134#else
135/* fill in for old/other timestamp interfaces */
136#endif
137
138#if defined(SYS_WINNT)
139#include "win32_io.h"
140#include <isc/win32os.h>
141#endif
142
143/*
144 * We do asynchronous input using the SIGIO facility.  A number of
145 * recvbuf buffers are preallocated for input.	In the signal
146 * handler we poll to see which sockets are ready and read the
147 * packets from them into the recvbuf's along with a time stamp and
148 * an indication of the source host and the interface it was received
149 * through.  This allows us to get as accurate receive time stamps
150 * as possible independent of other processing going on.
151 *
152 * We watch the number of recvbufs available to the signal handler
153 * and allocate more when this number drops below the low water
154 * mark.  If the signal handler should run out of buffers in the
155 * interim it will drop incoming frames, the idea being that it is
156 * better to drop a packet than to be inaccurate.
157 */
158
159
160/*
161 * Other statistics of possible interest
162 */
163volatile u_long packets_dropped;	/* total number of packets dropped on reception */
164volatile u_long packets_ignored;	/* packets received on wild card interface */
165volatile u_long packets_received;	/* total number of packets received */
166	 u_long packets_sent;		/* total number of packets sent */
167	 u_long packets_notsent;	/* total number of packets which couldn't be sent */
168
169volatile u_long handler_calls;	/* number of calls to interrupt handler */
170volatile u_long handler_pkts;	/* number of pkts received by handler */
171u_long io_timereset;		/* time counters were reset */
172
173/*
174 * Interface stuff
175 */
176endpt *	any_interface;		/* wildcard ipv4 interface */
177endpt *	any6_interface;		/* wildcard ipv6 interface */
178endpt *	loopback_interface;	/* loopback ipv4 interface */
179
180static isc_boolean_t broadcast_client_enabled;	/* is broadcast client enabled */
181u_int sys_ifnum;			/* next .ifnum to assign */
182int ninterfaces;			/* Total number of interfaces */
183
184int disable_dynamic_updates;		/* scan interfaces once only */
185
186#ifdef REFCLOCK
187/*
188 * Refclock stuff.	We keep a chain of structures with data concerning
189 * the guys we are doing I/O for.
190 */
191static	struct refclockio *refio;
192#endif /* REFCLOCK */
193
194/*
195 * File descriptor masks etc. for call to select
196 * Not needed for I/O Completion Ports or anything outside this file
197 */
198static fd_set activefds;
199static int maxactivefd;
200
201/*
202 * bit alternating value to detect verified interfaces during an update cycle
203 */
204static  u_short		sys_interphase = 0;
205
206static endpt *	new_interface(endpt *);
207static void	add_interface(endpt *);
208static int	update_interfaces(u_short, interface_receiver_t,
209				  void *);
210static void	remove_interface(endpt *);
211static endpt *	create_interface(u_short, endpt *);
212
213static int	is_wildcard_addr	(const sockaddr_u *);
214
215/*
216 * Multicast functions
217 */
218static	isc_boolean_t	addr_ismulticast	(sockaddr_u *);
219static	isc_boolean_t	is_anycast		(sockaddr_u *,
220						 const char *);
221
222/*
223 * Not all platforms support multicast
224 */
225#ifdef MCAST
226static	isc_boolean_t	socket_multicast_enable	(endpt *, sockaddr_u *);
227static	isc_boolean_t	socket_multicast_disable(endpt *, sockaddr_u *);
228#endif
229
230#ifdef DEBUG
231static void interface_dump	(const endpt *);
232static void sockaddr_dump	(const sockaddr_u *);
233static void print_interface	(const endpt *, const char *, const char *);
234#define DPRINT_INTERFACE(level, args) do { if (debug >= (level)) { print_interface args; } } while (0)
235#else
236#define DPRINT_INTERFACE(level, args) do {} while (0)
237#endif
238
239typedef struct vsock vsock_t;
240enum desc_type { FD_TYPE_SOCKET, FD_TYPE_FILE };
241
242struct vsock {
243	vsock_t	*	link;
244	SOCKET		fd;
245	enum desc_type	type;
246};
247
248vsock_t	*fd_list;
249
250#if !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET)
251/*
252 * async notification processing (e. g. routing sockets)
253 */
254/*
255 * support for receiving data on fd that is not a refclock or a socket
256 * like e. g. routing sockets
257 */
258struct asyncio_reader {
259	struct asyncio_reader *link;		    /* the list this is being kept in */
260	SOCKET fd;				    /* fd to be read */
261	void  *data;				    /* possibly local data */
262	void (*receiver)(struct asyncio_reader *);  /* input handler */
263};
264
265struct asyncio_reader *asyncio_reader_list;
266
267static void delete_asyncio_reader (struct asyncio_reader *);
268static struct asyncio_reader *new_asyncio_reader (void);
269static void add_asyncio_reader (struct asyncio_reader *, enum desc_type);
270static void remove_asyncio_reader (struct asyncio_reader *);
271
272#endif /* !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET) */
273
274static void init_async_notifications (void);
275
276static	int	addr_eqprefix	(const sockaddr_u *, const sockaddr_u *,
277				 int);
278static int	addr_samesubnet	(const sockaddr_u *, const sockaddr_u *,
279				 const sockaddr_u *, const sockaddr_u *);
280static	int	create_sockets	(u_short);
281static	SOCKET	open_socket	(sockaddr_u *, int, int, endpt *);
282static	void	set_reuseaddr	(int);
283static	isc_boolean_t	socket_broadcast_enable	 (struct interface *, SOCKET, sockaddr_u *);
284
285#if !defined(HAVE_IO_COMPLETION_PORT) && !defined(HAVE_SIGNALED_IO)
286static	char *	fdbits		(int, const fd_set *);
287#endif
288#ifdef  OS_MISSES_SPECIFIC_ROUTE_UPDATES
289static	isc_boolean_t	socket_broadcast_disable (struct interface *, sockaddr_u *);
290#endif
291
292typedef struct remaddr remaddr_t;
293
294struct remaddr {
295	remaddr_t *		link;
296	sockaddr_u		addr;
297	endpt *			ep;
298};
299
300remaddr_t *	remoteaddr_list;
301endpt *		ep_list;	/* complete endpt list */
302endpt *		mc4_list;	/* IPv4 mcast-capable unicast endpts */
303endpt *		mc6_list;	/* IPv6 mcast-capable unicast endpts */
304
305static endpt *	wildipv4;
306static endpt *	wildipv6;
307
308#ifdef SYS_WINNT
309int accept_wildcard_if_for_winnt;
310#else
311const int accept_wildcard_if_for_winnt = FALSE;
312#endif
313
314static void	add_fd_to_list		(SOCKET, enum desc_type);
315static endpt *	find_addr_in_list	(sockaddr_u *);
316static endpt *	find_flagged_addr_in_list(sockaddr_u *, u_int32);
317static void	delete_addr_from_list	(sockaddr_u *);
318static void	delete_interface_from_list(endpt *);
319static void	close_and_delete_fd_from_list(SOCKET);
320static void	add_addr_to_list	(sockaddr_u *, endpt *);
321static void	create_wildcards	(u_short);
322static endpt *	findlocalinterface	(sockaddr_u *, int, int);
323static endpt *	findclosestinterface	(sockaddr_u *, int);
324#ifdef DEBUG
325static const char *	action_text	(nic_rule_action);
326#endif
327static nic_rule_action	interface_action(char *, sockaddr_u *, u_int32);
328static void		convert_isc_if	(isc_interface_t *,
329					 endpt *, u_short);
330static void		calc_addr_distance(sockaddr_u *,
331					   const sockaddr_u *,
332					   const sockaddr_u *);
333static int		cmp_addr_distance(const sockaddr_u *,
334					  const sockaddr_u *);
335
336/*
337 * Routines to read the ntp packets
338 */
339#if !defined(HAVE_IO_COMPLETION_PORT)
340static inline int	read_network_packet	(SOCKET, struct interface *, l_fp);
341static void		ntpd_addremove_io_fd	(int, int, int);
342static void 		input_handler_scan	(const l_fp*, const fd_set*);
343static int/*BOOL*/	sanitize_fdset		(int errc);
344#ifdef REFCLOCK
345static inline int	read_refclock_packet	(SOCKET, struct refclockio *, l_fp);
346#endif
347#ifdef HAVE_SIGNALED_IO
348static void 		input_handler		(l_fp*);
349#endif
350#endif
351
352
353#ifndef HAVE_IO_COMPLETION_PORT
354void
355maintain_activefds(
356	int fd,
357	int closing
358	)
359{
360	int i;
361
362	if (fd < 0 || fd >= FD_SETSIZE) {
363		msyslog(LOG_ERR,
364			"Too many sockets in use, FD_SETSIZE %d exceeded by fd %d",
365			FD_SETSIZE, fd);
366		exit(1);
367	}
368
369	if (!closing) {
370		FD_SET(fd, &activefds);
371		maxactivefd = max(fd, maxactivefd);
372	} else {
373		FD_CLR(fd, &activefds);
374		if (maxactivefd && fd == maxactivefd) {
375			for (i = maxactivefd - 1; i >= 0; i--)
376				if (FD_ISSET(i, &activefds)) {
377					maxactivefd = i;
378					break;
379				}
380			INSIST(fd != maxactivefd);
381		}
382	}
383}
384#endif	/* !HAVE_IO_COMPLETION_PORT */
385
386
387#ifdef DEBUG_TIMING
388/*
389 * collect timing information for various processing
390 * paths. currently we only pass them on to the file
391 * for later processing. this could also do histogram
392 * based analysis in other to reduce the load (and skew)
393 * dur to the file output
394 */
395void
396collect_timing(struct recvbuf *rb, const char *tag, int count, l_fp *dts)
397{
398	char buf[256];
399
400	snprintf(buf, sizeof(buf), "%s %d %s %s",
401		 (rb != NULL)
402		     ? ((rb->dstadr != NULL)
403			    ? stoa(&rb->recv_srcadr)
404			    : "-REFCLOCK-")
405		     : "-",
406		 count, lfptoa(dts, 9), tag);
407	record_timing_stats(buf);
408}
409#endif
410
411/*
412 * About dynamic interfaces, sockets, reception and more...
413 *
414 * the code solves following tasks:
415 *
416 *   - keep a current list of active interfaces in order
417 *     to bind to to the interface address on NTP_PORT so that
418 *     all wild and specific bindings for NTP_PORT are taken by ntpd
419 *     to avoid other daemons messing with the time or sockets.
420 *   - all interfaces keep a list of peers that are referencing
421 *     the interface in order to quickly re-assign the peers to
422 *     new interface in case an interface is deleted (=> gone from system or
423 *     down)
424 *   - have a preconfigured socket ready with the right local address
425 *     for transmission and reception
426 *   - have an address list for all destination addresses used within ntpd
427 *     to find the "right" preconfigured socket.
428 *   - facilitate updating the internal interface list with respect to
429 *     the current kernel state
430 *
431 * special issues:
432 *
433 *   - mapping of multicast addresses to the interface affected is not always
434 *     one to one - especially on hosts with multiple interfaces
435 *     the code here currently allocates a separate interface entry for those
436 *     multicast addresses
437 *     iff it is able to bind to a *new* socket with the multicast address (flags |= MCASTIF)
438 *     in case of failure the multicast address is bound to an existing interface.
439 *   - on some systems it is perfectly legal to assign the same address to
440 *     multiple interfaces. Therefore this code does not keep a list of interfaces
441 *     but a list of interfaces that represent a unique address as determined by the kernel
442 *     by the procedure in findlocalinterface. Thus it is perfectly legal to see only
443 *     one representative of a group of real interfaces if they share the same address.
444 *
445 * Frank Kardel 20050910
446 */
447
448/*
449 * init_io - initialize I/O module.
450 */
451void
452init_io(void)
453{
454	/* Init buffer free list and stat counters */
455	init_recvbuff(RECV_INIT);
456	/* update interface every 5 minutes as default */
457	interface_interval = 300;
458
459#ifdef WORK_PIPE
460	addremove_io_fd = &ntpd_addremove_io_fd;
461#endif
462
463#if defined(SYS_WINNT)
464	init_io_completion_port();
465#elif defined(HAVE_SIGNALED_IO)
466	(void) set_signal(input_handler);
467#endif
468}
469
470
471static void
472ntpd_addremove_io_fd(
473	int	fd,
474	int	is_pipe,
475	int	remove_it
476	)
477{
478	UNUSED_ARG(is_pipe);
479
480#ifdef HAVE_SIGNALED_IO
481	if (!remove_it)
482		init_socket_sig(fd);
483#endif /* not HAVE_SIGNALED_IO */
484
485	maintain_activefds(fd, remove_it);
486}
487
488
489/*
490 * io_open_sockets - call socket creation routine
491 */
492void
493io_open_sockets(void)
494{
495	static int already_opened;
496
497	if (already_opened || HAVE_OPT( SAVECONFIGQUIT ))
498		return;
499
500	already_opened = 1;
501
502	/*
503	 * Create the sockets
504	 */
505	BLOCKIO();
506	create_sockets(NTP_PORT);
507	UNBLOCKIO();
508
509	init_async_notifications();
510
511	DPRINTF(3, ("io_open_sockets: maxactivefd %d\n", maxactivefd));
512}
513
514
515#ifdef DEBUG
516/*
517 * function to dump the contents of the interface structure
518 * for debugging use only.
519 * We face a dilemma here -- sockets are FDs under POSIX and
520 * actually HANDLES under Windows. So we use '%lld' as format
521 * and cast the value to 'long long'; this should not hurt
522 * with UNIX-like systems and does not truncate values on Win64.
523 */
524void
525interface_dump(const endpt *itf)
526{
527	printf("Dumping interface: %p\n", itf);
528	printf("fd = %lld\n", (long long)itf->fd);
529	printf("bfd = %lld\n", (long long)itf->bfd);
530	printf("sin = %s,\n", stoa(&itf->sin));
531	sockaddr_dump(&itf->sin);
532	printf("bcast = %s,\n", stoa(&itf->bcast));
533	sockaddr_dump(&itf->bcast);
534	printf("mask = %s,\n", stoa(&itf->mask));
535	sockaddr_dump(&itf->mask);
536	printf("name = %s\n", itf->name);
537	printf("flags = 0x%08x\n", itf->flags);
538	printf("last_ttl = %d\n", itf->last_ttl);
539	printf("addr_refid = %08x\n", itf->addr_refid);
540	printf("num_mcast = %d\n", itf->num_mcast);
541	printf("received = %ld\n", itf->received);
542	printf("sent = %ld\n", itf->sent);
543	printf("notsent = %ld\n", itf->notsent);
544	printf("ifindex = %u\n", itf->ifindex);
545	printf("peercnt = %u\n", itf->peercnt);
546	printf("phase = %u\n", itf->phase);
547}
548
549/*
550 * sockaddr_dump - hex dump the start of a sockaddr_u
551 */
552static void
553sockaddr_dump(const sockaddr_u *psau)
554{
555	/* Limit the size of the sockaddr_in6 hex dump */
556	const int maxsize = min(32, sizeof(psau->sa6));
557	const u_char *	cp;
558	int		i;
559
560	/* XXX: Should we limit maxsize based on psau->saX.sin_family? */
561	cp = (const void *)&psau->sa6;
562
563	for(i = 0; i < maxsize; i++) {
564		printf("%02x", *cp++);
565		if (!((i + 1) % 4))
566			printf(" ");
567	}
568	printf("\n");
569}
570
571/*
572 * print_interface - helper to output debug information
573 */
574static void
575print_interface(const endpt *iface, const char *pfx, const char *sfx)
576{
577	printf("%sinterface #%d: fd=%lld, bfd=%lld, name=%s, flags=0x%x, ifindex=%u, sin=%s",
578	       pfx,
579	       iface->ifnum,
580	       (long long)iface->fd,
581	       (long long)iface->bfd,
582	       iface->name,
583	       iface->flags,
584	       iface->ifindex,
585	       stoa(&iface->sin));
586	if (AF_INET == iface->family) {
587		if (iface->flags & INT_BROADCAST)
588			printf(", bcast=%s", stoa(&iface->bcast));
589		printf(", mask=%s", stoa(&iface->mask));
590	}
591	printf(", %s:%s",
592	       (iface->ignore_packets)
593		   ? "Disabled"
594		   : "Enabled",
595	       sfx);
596	if (debug > 4)	/* in-depth debugging only */
597		interface_dump(iface);
598}
599#endif
600
601#if !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET)
602/*
603 * create an asyncio_reader structure
604 */
605static struct asyncio_reader *
606new_asyncio_reader(void)
607{
608	struct asyncio_reader *reader;
609
610	reader = emalloc_zero(sizeof(*reader));
611	reader->fd = INVALID_SOCKET;
612
613	return reader;
614}
615
616/*
617 * delete a reader
618 */
619static void
620delete_asyncio_reader(
621	struct asyncio_reader *reader
622	)
623{
624	free(reader);
625}
626
627/*
628 * add asynchio_reader
629 */
630static void
631add_asyncio_reader(
632	struct asyncio_reader *	reader,
633	enum desc_type		type)
634{
635	LINK_SLIST(asyncio_reader_list, reader, link);
636	add_fd_to_list(reader->fd, type);
637}
638
639/*
640 * remove asynchio_reader
641 */
642static void
643remove_asyncio_reader(
644	struct asyncio_reader *reader
645	)
646{
647	struct asyncio_reader *unlinked;
648
649	UNLINK_SLIST(unlinked, asyncio_reader_list, reader, link,
650	    struct asyncio_reader);
651
652	if (reader->fd != INVALID_SOCKET)
653		close_and_delete_fd_from_list(reader->fd);
654
655	reader->fd = INVALID_SOCKET;
656}
657#endif /* !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET) */
658
659
660/* compare two sockaddr prefixes */
661static int
662addr_eqprefix(
663	const sockaddr_u *	a,
664	const sockaddr_u *	b,
665	int			prefixlen
666	)
667{
668	isc_netaddr_t		isc_a;
669	isc_netaddr_t		isc_b;
670	isc_sockaddr_t		isc_sa;
671
672	ZERO(isc_sa);
673	memcpy(&isc_sa.type, a, min(sizeof(isc_sa.type), sizeof(*a)));
674	isc_netaddr_fromsockaddr(&isc_a, &isc_sa);
675
676	ZERO(isc_sa);
677	memcpy(&isc_sa.type, b, min(sizeof(isc_sa.type), sizeof(*b)));
678	isc_netaddr_fromsockaddr(&isc_b, &isc_sa);
679
680	return (int)isc_netaddr_eqprefix(&isc_a, &isc_b,
681					 (u_int)prefixlen);
682}
683
684
685static int
686addr_samesubnet(
687	const sockaddr_u *	a,
688	const sockaddr_u *	a_mask,
689	const sockaddr_u *	b,
690	const sockaddr_u *	b_mask
691	)
692{
693	const u_int32 *	pa;
694	const u_int32 *	pa_limit;
695	const u_int32 *	pb;
696	const u_int32 *	pm;
697	size_t		loops;
698
699	REQUIRE(AF(a) == AF(a_mask));
700	REQUIRE(AF(b) == AF(b_mask));
701	/*
702	 * With address and mask families verified to match, comparing
703	 * the masks also validates the address's families match.
704	 */
705	if (!SOCK_EQ(a_mask, b_mask))
706		return FALSE;
707
708	if (IS_IPV6(a)) {
709		loops = sizeof(NSRCADR6(a)) / sizeof(*pa);
710		pa = (const void *)&NSRCADR6(a);
711		pb = (const void *)&NSRCADR6(b);
712		pm = (const void *)&NSRCADR6(a_mask);
713	} else {
714		loops = sizeof(NSRCADR(a)) / sizeof(*pa);
715		pa = (const void *)&NSRCADR(a);
716		pb = (const void *)&NSRCADR(b);
717		pm = (const void *)&NSRCADR(a_mask);
718	}
719	for (pa_limit = pa + loops; pa < pa_limit; pa++, pb++, pm++)
720		if ((*pa & *pm) != (*pb & *pm))
721			return FALSE;
722
723	return TRUE;
724}
725
726
727/*
728 * interface list enumerator - visitor pattern
729 */
730void
731interface_enumerate(
732	interface_receiver_t	receiver,
733	void *			data
734	)
735{
736	interface_info_t ifi;
737
738	ifi.action = IFS_EXISTS;
739	for (ifi.ep = ep_list; ifi.ep != NULL; ifi.ep = ifi.ep->elink)
740		(*receiver)(data, &ifi);
741}
742
743/*
744 * do standard initialization of interface structure
745 */
746static void
747init_interface(
748	endpt *ep
749	)
750{
751	ZERO(*ep);
752	ep->fd = INVALID_SOCKET;
753	ep->bfd = INVALID_SOCKET;
754	ep->phase = sys_interphase;
755}
756
757
758/*
759 * create new interface structure initialize from
760 * template structure or via standard initialization
761 * function
762 */
763static struct interface *
764new_interface(
765	struct interface *interface
766	)
767{
768	struct interface *	iface;
769
770	iface = emalloc(sizeof(*iface));
771
772	if (NULL == interface)
773		init_interface(iface);
774	else				/* use the template */
775		memcpy(iface, interface, sizeof(*iface));
776
777	/* count every new instance of an interface in the system */
778	iface->ifnum = sys_ifnum++;
779	iface->starttime = current_time;
780
781#   ifdef HAVE_IO_COMPLETION_PORT
782	if (!io_completion_port_add_interface(iface)) {
783		msyslog(LOG_EMERG, "cannot register interface with IO engine -- will exit now");
784		exit(1);
785	}
786#   endif
787	return iface;
788}
789
790
791/*
792 * return interface storage into free memory pool
793 */
794static void
795delete_interface(
796	endpt *ep
797	)
798{
799#    ifdef HAVE_IO_COMPLETION_PORT
800	io_completion_port_remove_interface(ep);
801#    endif
802	free(ep);
803}
804
805
806/*
807 * link interface into list of known interfaces
808 */
809static void
810add_interface(
811	endpt *	ep
812	)
813{
814	endpt **	pmclisthead;
815	endpt *		scan;
816	endpt *		scan_next;
817	endpt *		unlinked;
818	sockaddr_u *	addr;
819	int		ep_local;
820	int		scan_local;
821	int		same_subnet;
822	int		ep_univ_iid;	/* iface ID from MAC address */
823	int		scan_univ_iid;	/* see RFC 4291 */
824	int		ep_privacy;	/* random local iface ID */
825	int		scan_privacy;	/* see RFC 4941 */
826	int		rc;
827
828	/* Calculate the refid */
829	ep->addr_refid = addr2refid(&ep->sin);
830	/* link at tail so ntpdc -c ifstats index increases each row */
831	LINK_TAIL_SLIST(ep_list, ep, elink, endpt);
832	ninterfaces++;
833#ifdef MCAST
834	/* the rest is for enabled multicast-capable addresses only */
835	if (ep->ignore_packets || !(INT_MULTICAST & ep->flags) ||
836	    INT_LOOPBACK & ep->flags)
837		return;
838# ifndef INCLUDE_IPV6_MULTICAST_SUPPORT
839	if (AF_INET6 == ep->family)
840		return;
841# endif
842	pmclisthead = (AF_INET == ep->family)
843			 ? &mc4_list
844			 : &mc6_list;
845
846	if (AF_INET6 == ep->family) {
847		ep_local =
848		    IN6_IS_ADDR_LINKLOCAL(PSOCK_ADDR6(&ep->sin)) ||
849		    IN6_IS_ADDR_SITELOCAL(PSOCK_ADDR6(&ep->sin));
850		ep_univ_iid = IS_IID_UNIV(&ep->sin);
851		ep_privacy = !!(INT_PRIVACY & ep->flags);
852	} else {
853		ep_local = FALSE;
854		ep_univ_iid = FALSE;
855		ep_privacy = FALSE;
856	}
857	DPRINTF(4, ("add_interface mcast-capable %s%s%s%s\n",
858		    stoa(&ep->sin),
859		    (ep_local) ? " link/scope-local" : "",
860		    (ep_univ_iid) ? " univ-IID" : "",
861		    (ep_privacy) ? " privacy" : ""));
862	/*
863	 * If we have multiple local addresses on the same network
864	 * interface, and some are link- or site-local, do not multicast
865	 * out from the link-/site-local addresses by default, to avoid
866	 * duplicate manycastclient associations between v6 peers using
867	 * link-local and global addresses.  link-local can still be
868	 * chosen using "nic ignore myv6globalprefix::/64".
869	 * Similarly, if we have multiple global addresses from the same
870	 * prefix on the same network interface, multicast from one,
871	 * preferring EUI-64, then static, then least RFC 4941 privacy
872	 * addresses.
873	 */
874	for (scan = *pmclisthead; scan != NULL; scan = scan_next) {
875		scan_next = scan->mclink;
876		if (ep->family != scan->family)
877			continue;
878		if (strcmp(ep->name, scan->name))
879			continue;
880		same_subnet = addr_samesubnet(&ep->sin, &ep->mask,
881					      &scan->sin, &scan->mask);
882		if (AF_INET6 == ep->family) {
883			addr = &scan->sin;
884			scan_local =
885			    IN6_IS_ADDR_LINKLOCAL(PSOCK_ADDR6(addr)) ||
886			    IN6_IS_ADDR_SITELOCAL(PSOCK_ADDR6(addr));
887			scan_univ_iid = IS_IID_UNIV(addr);
888			scan_privacy = !!(INT_PRIVACY & scan->flags);
889		} else {
890			scan_local = FALSE;
891			scan_univ_iid = FALSE;
892			scan_privacy = FALSE;
893		}
894		DPRINTF(4, ("add_interface mcast-capable scan %s%s%s%s\n",
895			    stoa(&scan->sin),
896			    (scan_local) ? " link/scope-local" : "",
897			    (scan_univ_iid) ? " univ-IID" : "",
898			    (scan_privacy) ? " privacy" : ""));
899		if ((ep_local && !scan_local) || (same_subnet &&
900		    ((ep_privacy && !scan_privacy) ||
901		     (!ep_univ_iid && scan_univ_iid)))) {
902			DPRINTF(4, ("did not add %s to %s of IPv6 multicast-capable list which already has %s\n",
903				stoa(&ep->sin),
904				(ep_local)
905				    ? "tail"
906				    : "head",
907				stoa(&scan->sin)));
908			return;
909		}
910		if ((scan_local && !ep_local) || (same_subnet &&
911		    ((scan_privacy && !ep_privacy) ||
912		     (!scan_univ_iid && ep_univ_iid)))) {
913			UNLINK_SLIST(unlinked, *pmclisthead,
914				     scan, mclink, endpt);
915			DPRINTF(4, ("%s %s from IPv6 multicast-capable list to add %s\n",
916				(unlinked != scan)
917				    ? "Failed to remove"
918				    : "removed",
919				stoa(&scan->sin), stoa(&ep->sin)));
920		}
921	}
922	/*
923	 * Add link/site local at the tail of the multicast-
924	 * capable unicast interfaces list, so that ntpd will
925	 * send from global addresses before link-/site-local
926	 * ones.
927	 */
928	if (ep_local)
929		LINK_TAIL_SLIST(*pmclisthead, ep, mclink, endpt);
930	else
931		LINK_SLIST(*pmclisthead, ep, mclink);
932	DPRINTF(4, ("added %s to %s of IPv%s multicast-capable unicast local address list\n",
933		stoa(&ep->sin),
934		(ep_local)
935		    ? "tail"
936		    : "head",
937		(AF_INET == ep->family)
938		    ? "4"
939		    : "6"));
940
941	if (INVALID_SOCKET == ep->fd)
942		return;
943
944	/*
945	 * select the local address from which to send to multicast.
946	 */
947	switch (AF(&ep->sin)) {
948
949	case AF_INET :
950		rc = setsockopt(ep->fd, IPPROTO_IP,
951				IP_MULTICAST_IF,
952				(void *)&NSRCADR(&ep->sin),
953				sizeof(NSRCADR(&ep->sin)));
954		if (rc)
955			msyslog(LOG_ERR,
956				"setsockopt IP_MULTICAST_IF %s fails: %m",
957				stoa(&ep->sin));
958		break;
959
960# ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
961	case AF_INET6 :
962		rc = setsockopt(ep->fd, IPPROTO_IPV6,
963				 IPV6_MULTICAST_IF,
964				 (void *)&ep->ifindex,
965				 sizeof(ep->ifindex));
966		/* do not complain if bound addr scope is ifindex */
967		if (rc && ep->ifindex != SCOPE(&ep->sin))
968			msyslog(LOG_ERR,
969				"setsockopt IPV6_MULTICAST_IF %u for %s fails: %m",
970				ep->ifindex, stoa(&ep->sin));
971		break;
972# endif
973	}
974#endif	/* MCAST */
975}
976
977
978/*
979 * remove interface from known interface list and clean up
980 * associated resources
981 */
982static void
983remove_interface(
984	endpt *	ep
985	)
986{
987	endpt *		unlinked;
988	endpt **	pmclisthead;
989	sockaddr_u	resmask;
990
991	UNLINK_SLIST(unlinked, ep_list, ep, elink, endpt);
992	if (!ep->ignore_packets && INT_MULTICAST & ep->flags) {
993		pmclisthead = (AF_INET == ep->family)
994				 ? &mc4_list
995				 : &mc6_list;
996		UNLINK_SLIST(unlinked, *pmclisthead, ep, mclink, endpt);
997		DPRINTF(4, ("%s %s IPv%s multicast-capable unicast local address list\n",
998			stoa(&ep->sin),
999			(unlinked != NULL)
1000			    ? "removed from"
1001			    : "not found on",
1002			(AF_INET == ep->family)
1003			    ? "4"
1004			    : "6"));
1005	}
1006	delete_interface_from_list(ep);
1007
1008	if (ep->fd != INVALID_SOCKET) {
1009		msyslog(LOG_INFO,
1010			"Deleting interface #%d %s, %s#%d, interface stats: received=%ld, sent=%ld, dropped=%ld, active_time=%ld secs",
1011			ep->ifnum,
1012			ep->name,
1013			stoa(&ep->sin),
1014			SRCPORT(&ep->sin),
1015			ep->received,
1016			ep->sent,
1017			ep->notsent,
1018			current_time - ep->starttime);
1019#	    ifdef HAVE_IO_COMPLETION_PORT
1020		io_completion_port_remove_socket(ep->fd, ep);
1021#	    endif
1022		close_and_delete_fd_from_list(ep->fd);
1023		ep->fd = INVALID_SOCKET;
1024	}
1025
1026	if (ep->bfd != INVALID_SOCKET) {
1027		msyslog(LOG_INFO,
1028			"stop listening for broadcasts to %s on interface #%d %s",
1029			stoa(&ep->bcast), ep->ifnum, ep->name);
1030#	    ifdef HAVE_IO_COMPLETION_PORT
1031		io_completion_port_remove_socket(ep->bfd, ep);
1032#	    endif
1033		close_and_delete_fd_from_list(ep->bfd);
1034		ep->bfd = INVALID_SOCKET;
1035	}
1036#   ifdef HAVE_IO_COMPLETION_PORT
1037	io_completion_port_remove_interface(ep);
1038#   endif
1039
1040	ninterfaces--;
1041	mon_clearinterface(ep);
1042
1043	/* remove restrict interface entry */
1044	SET_HOSTMASK(&resmask, AF(&ep->sin));
1045	hack_restrict(RESTRICT_REMOVEIF, &ep->sin, &resmask,
1046		      -3, RESM_NTPONLY | RESM_INTERFACE, RES_IGNORE, 0);
1047}
1048
1049
1050static void
1051log_listen_address(
1052	endpt *	ep
1053	)
1054{
1055	msyslog(LOG_INFO, "%s on %d %s %s",
1056		(ep->ignore_packets)
1057		    ? "Listen and drop"
1058		    : "Listen normally",
1059		ep->ifnum,
1060		ep->name,
1061		sptoa(&ep->sin));
1062}
1063
1064
1065static void
1066create_wildcards(
1067	u_short	port
1068	)
1069{
1070	int			v4wild;
1071#ifdef INCLUDE_IPV6_SUPPORT
1072	int			v6wild;
1073#endif
1074	sockaddr_u		wildaddr;
1075	nic_rule_action		action;
1076	struct interface *	wildif;
1077
1078	/*
1079	 * silence "potentially uninitialized" warnings from VC9
1080	 * failing to follow the logic.  Ideally action could remain
1081	 * uninitialized, and the memset be the first statement under
1082	 * the first if (v4wild).
1083	 */
1084	action = ACTION_LISTEN;
1085	ZERO(wildaddr);
1086
1087#ifdef INCLUDE_IPV6_SUPPORT
1088	/*
1089	 * create pseudo-interface with wildcard IPv6 address
1090	 */
1091	v6wild = ipv6_works;
1092	if (v6wild) {
1093		/* set wildaddr to the v6 wildcard address :: */
1094		ZERO(wildaddr);
1095		AF(&wildaddr) = AF_INET6;
1096		SET_ADDR6N(&wildaddr, in6addr_any);
1097		SET_PORT(&wildaddr, port);
1098		SET_SCOPE(&wildaddr, 0);
1099
1100		/* check for interface/nic rules affecting the wildcard */
1101		action = interface_action(NULL, &wildaddr, 0);
1102		v6wild = (ACTION_IGNORE != action);
1103	}
1104	if (v6wild) {
1105		wildif = new_interface(NULL);
1106
1107		strlcpy(wildif->name, "v6wildcard", sizeof(wildif->name));
1108		memcpy(&wildif->sin, &wildaddr, sizeof(wildif->sin));
1109		wildif->family = AF_INET6;
1110		AF(&wildif->mask) = AF_INET6;
1111		SET_ONESMASK(&wildif->mask);
1112
1113		wildif->flags = INT_UP | INT_WILDCARD;
1114		wildif->ignore_packets = (ACTION_DROP == action);
1115
1116		wildif->fd = open_socket(&wildif->sin, 0, 1, wildif);
1117
1118		if (wildif->fd != INVALID_SOCKET) {
1119			wildipv6 = wildif;
1120			any6_interface = wildif;
1121			add_addr_to_list(&wildif->sin, wildif);
1122			add_interface(wildif);
1123			log_listen_address(wildif);
1124		} else {
1125			msyslog(LOG_ERR,
1126				"unable to bind to wildcard address %s - another process may be running - EXITING",
1127				stoa(&wildif->sin));
1128			exit(1);
1129		}
1130		DPRINT_INTERFACE(2, (wildif, "created ", "\n"));
1131	}
1132#endif
1133
1134	/*
1135	 * create pseudo-interface with wildcard IPv4 address
1136	 */
1137	v4wild = ipv4_works;
1138	if (v4wild) {
1139		/* set wildaddr to the v4 wildcard address 0.0.0.0 */
1140		AF(&wildaddr) = AF_INET;
1141		SET_ADDR4N(&wildaddr, INADDR_ANY);
1142		SET_PORT(&wildaddr, port);
1143
1144		/* check for interface/nic rules affecting the wildcard */
1145		action = interface_action(NULL, &wildaddr, 0);
1146		v4wild = (ACTION_IGNORE != action);
1147	}
1148	if (v4wild) {
1149		wildif = new_interface(NULL);
1150
1151		strlcpy(wildif->name, "v4wildcard", sizeof(wildif->name));
1152		memcpy(&wildif->sin, &wildaddr, sizeof(wildif->sin));
1153		wildif->family = AF_INET;
1154		AF(&wildif->mask) = AF_INET;
1155		SET_ONESMASK(&wildif->mask);
1156
1157		wildif->flags = INT_BROADCAST | INT_UP | INT_WILDCARD;
1158		wildif->ignore_packets = (ACTION_DROP == action);
1159#if defined(MCAST)
1160		/*
1161		 * enable multicast reception on the broadcast socket
1162		 */
1163		AF(&wildif->bcast) = AF_INET;
1164		SET_ADDR4N(&wildif->bcast, INADDR_ANY);
1165		SET_PORT(&wildif->bcast, port);
1166#endif /* MCAST */
1167		wildif->fd = open_socket(&wildif->sin, 0, 1, wildif);
1168
1169		if (wildif->fd != INVALID_SOCKET) {
1170			wildipv4 = wildif;
1171			any_interface = wildif;
1172
1173			add_addr_to_list(&wildif->sin, wildif);
1174			add_interface(wildif);
1175			log_listen_address(wildif);
1176		} else {
1177			msyslog(LOG_ERR,
1178				"unable to bind to wildcard address %s - another process may be running - EXITING",
1179				stoa(&wildif->sin));
1180			exit(1);
1181		}
1182		DPRINT_INTERFACE(2, (wildif, "created ", "\n"));
1183	}
1184}
1185
1186
1187/*
1188 * add_nic_rule() -- insert a rule entry at the head of nic_rule_list.
1189 */
1190void
1191add_nic_rule(
1192	nic_rule_match	match_type,
1193	const char *	if_name,	/* interface name or numeric address */
1194	int		prefixlen,
1195	nic_rule_action	action
1196	)
1197{
1198	nic_rule *	rule;
1199	isc_boolean_t	is_ip;
1200
1201	rule = emalloc_zero(sizeof(*rule));
1202	rule->match_type = match_type;
1203	rule->prefixlen = prefixlen;
1204	rule->action = action;
1205
1206	if (MATCH_IFNAME == match_type) {
1207		REQUIRE(NULL != if_name);
1208		rule->if_name = estrdup(if_name);
1209	} else if (MATCH_IFADDR == match_type) {
1210		REQUIRE(NULL != if_name);
1211		/* set rule->addr */
1212		is_ip = is_ip_address(if_name, AF_UNSPEC, &rule->addr);
1213		REQUIRE(is_ip);
1214	} else
1215		REQUIRE(NULL == if_name);
1216
1217	LINK_SLIST(nic_rule_list, rule, next);
1218}
1219
1220
1221#ifdef DEBUG
1222static const char *
1223action_text(
1224	nic_rule_action	action
1225	)
1226{
1227	const char *t;
1228
1229	switch (action) {
1230
1231	default:
1232		t = "ERROR";	/* quiet uninit warning */
1233		DPRINTF(1, ("fatal: unknown nic_rule_action %d\n",
1234			    action));
1235		ENSURE(0);
1236		break;
1237
1238	case ACTION_LISTEN:
1239		t = "listen";
1240		break;
1241
1242	case ACTION_IGNORE:
1243		t = "ignore";
1244		break;
1245
1246	case ACTION_DROP:
1247		t = "drop";
1248		break;
1249	}
1250
1251	return t;
1252}
1253#endif	/* DEBUG */
1254
1255
1256static nic_rule_action
1257interface_action(
1258	char *		if_name,
1259	sockaddr_u *	if_addr,
1260	u_int32		if_flags
1261	)
1262{
1263	nic_rule *	rule;
1264	int		isloopback;
1265	int		iswildcard;
1266
1267	DPRINTF(4, ("interface_action: interface %s ",
1268		    (if_name != NULL) ? if_name : "wildcard"));
1269
1270	iswildcard = is_wildcard_addr(if_addr);
1271	isloopback = !!(INT_LOOPBACK & if_flags);
1272
1273	/*
1274	 * Find any matching NIC rule from --interface / -I or ntp.conf
1275	 * interface/nic rules.
1276	 */
1277	for (rule = nic_rule_list; rule != NULL; rule = rule->next) {
1278
1279		switch (rule->match_type) {
1280
1281		case MATCH_ALL:
1282			/* loopback and wildcard excluded from "all" */
1283			if (isloopback || iswildcard)
1284				break;
1285			DPRINTF(4, ("nic all %s\n",
1286			    action_text(rule->action)));
1287			return rule->action;
1288
1289		case MATCH_IPV4:
1290			if (IS_IPV4(if_addr)) {
1291				DPRINTF(4, ("nic ipv4 %s\n",
1292				    action_text(rule->action)));
1293				return rule->action;
1294			}
1295			break;
1296
1297		case MATCH_IPV6:
1298			if (IS_IPV6(if_addr)) {
1299				DPRINTF(4, ("nic ipv6 %s\n",
1300				    action_text(rule->action)));
1301				return rule->action;
1302			}
1303			break;
1304
1305		case MATCH_WILDCARD:
1306			if (iswildcard) {
1307				DPRINTF(4, ("nic wildcard %s\n",
1308				    action_text(rule->action)));
1309				return rule->action;
1310			}
1311			break;
1312
1313		case MATCH_IFADDR:
1314			if (rule->prefixlen != -1) {
1315				if (addr_eqprefix(if_addr, &rule->addr,
1316						  rule->prefixlen)) {
1317
1318					DPRINTF(4, ("subnet address match - %s\n",
1319					    action_text(rule->action)));
1320					return rule->action;
1321				}
1322			} else
1323				if (SOCK_EQ(if_addr, &rule->addr)) {
1324
1325					DPRINTF(4, ("address match - %s\n",
1326					    action_text(rule->action)));
1327					return rule->action;
1328				}
1329			break;
1330
1331		case MATCH_IFNAME:
1332			if (if_name != NULL
1333#if defined(HAVE_FNMATCH) && defined(FNM_CASEFOLD)
1334			    && !fnmatch(rule->if_name, if_name, FNM_CASEFOLD)
1335#else
1336			    && !strcasecmp(if_name, rule->if_name)
1337#endif
1338			    ) {
1339
1340				DPRINTF(4, ("interface name match - %s\n",
1341				    action_text(rule->action)));
1342				return rule->action;
1343			}
1344			break;
1345		}
1346	}
1347
1348	/*
1349	 * Unless explicitly disabled such as with "nic ignore ::1"
1350	 * listen on loopback addresses.  Since ntpq and ntpdc query
1351	 * "localhost" by default, which typically resolves to ::1 and
1352	 * 127.0.0.1, it's useful to default to listening on both.
1353	 */
1354	if (isloopback) {
1355		DPRINTF(4, ("default loopback listen\n"));
1356		return ACTION_LISTEN;
1357	}
1358
1359	/*
1360	 * Treat wildcard addresses specially.  If there is no explicit
1361	 * "nic ... wildcard" or "nic ... 0.0.0.0" or "nic ... ::" rule
1362	 * default to drop.
1363	 */
1364	if (iswildcard) {
1365		DPRINTF(4, ("default wildcard drop\n"));
1366		return ACTION_DROP;
1367	}
1368
1369	/*
1370	 * Check for "virtual IP" (colon in the interface name) after
1371	 * the rules so that "ntpd --interface eth0:1 -novirtualips"
1372	 * does indeed listen on eth0:1's addresses.
1373	 */
1374	if (!listen_to_virtual_ips && if_name != NULL
1375	    && (strchr(if_name, ':') != NULL)) {
1376
1377		DPRINTF(4, ("virtual ip - ignore\n"));
1378		return ACTION_IGNORE;
1379	}
1380
1381	/*
1382	 * If there are no --interface/-I command-line options and no
1383	 * interface/nic rules in ntp.conf, the default action is to
1384	 * listen.  In the presence of rules from either, the default
1385	 * is to ignore.  This implements ntpd's traditional listen-
1386	 * every default with no interface listen configuration, and
1387	 * ensures a single -I eth0 or "nic listen eth0" means do not
1388	 * listen on any other addresses.
1389	 */
1390	if (NULL == nic_rule_list) {
1391		DPRINTF(4, ("default listen\n"));
1392		return ACTION_LISTEN;
1393	}
1394
1395	DPRINTF(4, ("implicit ignore\n"));
1396	return ACTION_IGNORE;
1397}
1398
1399
1400static void
1401convert_isc_if(
1402	isc_interface_t *isc_if,
1403	endpt *itf,
1404	u_short port
1405	)
1406{
1407	const u_char v6loop[16] = {0, 0, 0, 0, 0, 0, 0, 0,
1408				   0, 0, 0, 0, 0, 0, 0, 1};
1409
1410	strlcpy(itf->name, isc_if->name, sizeof(itf->name));
1411	itf->ifindex = isc_if->ifindex;
1412	itf->family = (u_short)isc_if->af;
1413	AF(&itf->sin) = itf->family;
1414	AF(&itf->mask) = itf->family;
1415	AF(&itf->bcast) = itf->family;
1416	SET_PORT(&itf->sin, port);
1417	SET_PORT(&itf->mask, port);
1418	SET_PORT(&itf->bcast, port);
1419
1420	if (IS_IPV4(&itf->sin)) {
1421		NSRCADR(&itf->sin) = isc_if->address.type.in.s_addr;
1422		NSRCADR(&itf->mask) = isc_if->netmask.type.in.s_addr;
1423
1424		if (isc_if->flags & INTERFACE_F_BROADCAST) {
1425			itf->flags |= INT_BROADCAST;
1426			NSRCADR(&itf->bcast) =
1427			    isc_if->broadcast.type.in.s_addr;
1428		}
1429	}
1430#ifdef INCLUDE_IPV6_SUPPORT
1431	else if (IS_IPV6(&itf->sin)) {
1432		SET_ADDR6N(&itf->sin, isc_if->address.type.in6);
1433		SET_ADDR6N(&itf->mask, isc_if->netmask.type.in6);
1434
1435		SET_SCOPE(&itf->sin, isc_if->address.zone);
1436	}
1437#endif /* INCLUDE_IPV6_SUPPORT */
1438
1439
1440	/* Process the rest of the flags */
1441
1442	itf->flags |=
1443		  ((INTERFACE_F_UP & isc_if->flags)
1444			? INT_UP : 0)
1445		| ((INTERFACE_F_LOOPBACK & isc_if->flags)
1446			? INT_LOOPBACK : 0)
1447		| ((INTERFACE_F_POINTTOPOINT & isc_if->flags)
1448			? INT_PPP : 0)
1449		| ((INTERFACE_F_MULTICAST & isc_if->flags)
1450			? INT_MULTICAST : 0)
1451		| ((INTERFACE_F_PRIVACY & isc_if->flags)
1452			? INT_PRIVACY : 0)
1453		;
1454
1455	/*
1456	 * Clear the loopback flag if the address is not localhost.
1457	 * http://bugs.ntp.org/1683
1458	 */
1459	if (INT_LOOPBACK & itf->flags) {
1460		if (AF_INET == itf->family) {
1461			if (127 != (SRCADR(&itf->sin) >> 24))
1462				itf->flags &= ~INT_LOOPBACK;
1463		} else {
1464			if (memcmp(v6loop, NSRCADR6(&itf->sin),
1465				   sizeof(NSRCADR6(&itf->sin))))
1466				itf->flags &= ~INT_LOOPBACK;
1467		}
1468	}
1469}
1470
1471
1472/*
1473 * refresh_interface
1474 *
1475 * some OSes have been observed to keep
1476 * cached routes even when more specific routes
1477 * become available.
1478 * this can be mitigated by re-binding
1479 * the socket.
1480 */
1481static int
1482refresh_interface(
1483	struct interface * interface
1484	)
1485{
1486#ifdef  OS_MISSES_SPECIFIC_ROUTE_UPDATES
1487	if (interface->fd != INVALID_SOCKET) {
1488		int bcast = (interface->flags & INT_BCASTXMIT) != 0;
1489		/* as we forcibly close() the socket remove the
1490		   broadcast permission indication */
1491		if (bcast)
1492			socket_broadcast_disable(interface, &interface->sin);
1493
1494		close_and_delete_fd_from_list(interface->fd);
1495
1496		/* create new socket picking up a new first hop binding
1497		   at connect() time */
1498		interface->fd = open_socket(&interface->sin,
1499					    bcast, 0, interface);
1500		 /*
1501		  * reset TTL indication so TTL is is set again
1502		  * next time around
1503		  */
1504		interface->last_ttl = 0;
1505		return (interface->fd != INVALID_SOCKET);
1506	} else
1507		return 0;	/* invalid sockets are not refreshable */
1508#else /* !OS_MISSES_SPECIFIC_ROUTE_UPDATES */
1509	return (interface->fd != INVALID_SOCKET);
1510#endif /* !OS_MISSES_SPECIFIC_ROUTE_UPDATES */
1511}
1512
1513/*
1514 * interface_update - externally callable update function
1515 */
1516void
1517interface_update(
1518	interface_receiver_t	receiver,
1519	void *			data)
1520{
1521	int new_interface_found;
1522
1523	if (disable_dynamic_updates)
1524		return;
1525
1526	BLOCKIO();
1527	new_interface_found = update_interfaces(NTP_PORT, receiver, data);
1528	UNBLOCKIO();
1529
1530	if (!new_interface_found)
1531		return;
1532
1533#ifdef DEBUG
1534	msyslog(LOG_DEBUG, "new interface(s) found: waking up resolver");
1535#endif
1536	interrupt_worker_sleep();
1537}
1538
1539
1540/*
1541 * sau_from_netaddr() - convert network address on-wire formats.
1542 * Convert from libisc's isc_netaddr_t to NTP's sockaddr_u
1543 */
1544void
1545sau_from_netaddr(
1546	sockaddr_u *psau,
1547	const isc_netaddr_t *pna
1548	)
1549{
1550	ZERO_SOCK(psau);
1551	AF(psau) = (u_short)pna->family;
1552	switch (pna->family) {
1553
1554	case AF_INET:
1555		memcpy(&psau->sa4.sin_addr, &pna->type.in,
1556		       sizeof(psau->sa4.sin_addr));
1557		break;
1558
1559	case AF_INET6:
1560		memcpy(&psau->sa6.sin6_addr, &pna->type.in6,
1561		       sizeof(psau->sa6.sin6_addr));
1562		break;
1563	}
1564}
1565
1566
1567static int
1568is_wildcard_addr(
1569	const sockaddr_u *psau
1570	)
1571{
1572	if (IS_IPV4(psau) && !NSRCADR(psau))
1573		return 1;
1574
1575#ifdef INCLUDE_IPV6_SUPPORT
1576	if (IS_IPV6(psau) && S_ADDR6_EQ(psau, &in6addr_any))
1577		return 1;
1578#endif
1579
1580	return 0;
1581}
1582
1583
1584#ifdef OS_NEEDS_REUSEADDR_FOR_IFADDRBIND
1585/*
1586 * enable/disable re-use of wildcard address socket
1587 */
1588static void
1589set_wildcard_reuse(
1590	u_short	family,
1591	int	on
1592	)
1593{
1594	struct interface *any;
1595	SOCKET fd = INVALID_SOCKET;
1596
1597	any = ANY_INTERFACE_BYFAM(family);
1598	if (any != NULL)
1599		fd = any->fd;
1600
1601	if (fd != INVALID_SOCKET) {
1602		if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
1603			       (void *)&on, sizeof(on)))
1604			msyslog(LOG_ERR,
1605				"set_wildcard_reuse: setsockopt(SO_REUSEADDR, %s) failed: %m",
1606				on ? "on" : "off");
1607
1608		DPRINTF(4, ("set SO_REUSEADDR to %s on %s\n",
1609			    on ? "on" : "off",
1610			    stoa(&any->sin)));
1611	}
1612}
1613#endif /* OS_NEEDS_REUSEADDR_FOR_IFADDRBIND */
1614
1615static isc_boolean_t
1616check_flags(
1617	sockaddr_u *psau,
1618	const char *name,
1619	u_int32 flags
1620	)
1621{
1622#if defined(SIOCGIFAFLAG_IN)
1623	struct ifreq ifr;
1624	int fd;
1625
1626	if (psau->sa.sa_family != AF_INET)
1627		return ISC_FALSE;
1628	if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
1629		return ISC_FALSE;
1630	ZERO(ifr);
1631	memcpy(&ifr.ifr_addr, &psau->sa, sizeof(ifr.ifr_addr));
1632	strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
1633	if (ioctl(fd, SIOCGIFAFLAG_IN, &ifr) < 0) {
1634		close(fd);
1635		return ISC_FALSE;
1636	}
1637	close(fd);
1638	if ((ifr.ifr_addrflags & flags) != 0)
1639		return ISC_TRUE;
1640#endif	/* SIOCGIFAFLAG_IN */
1641	return ISC_FALSE;
1642}
1643
1644static isc_boolean_t
1645check_flags6(
1646	sockaddr_u *psau,
1647	const char *name,
1648	u_int32 flags6
1649	)
1650{
1651#if defined(INCLUDE_IPV6_SUPPORT) && defined(SIOCGIFAFLAG_IN6)
1652	struct in6_ifreq ifr6;
1653	int fd;
1654
1655	if (psau->sa.sa_family != AF_INET6)
1656		return ISC_FALSE;
1657	if ((fd = socket(AF_INET6, SOCK_DGRAM, 0)) < 0)
1658		return ISC_FALSE;
1659	ZERO(ifr6);
1660	memcpy(&ifr6.ifr_addr, &psau->sa6, sizeof(ifr6.ifr_addr));
1661	strlcpy(ifr6.ifr_name, name, sizeof(ifr6.ifr_name));
1662	if (ioctl(fd, SIOCGIFAFLAG_IN6, &ifr6) < 0) {
1663		close(fd);
1664		return ISC_FALSE;
1665	}
1666	close(fd);
1667	if ((ifr6.ifr_ifru.ifru_flags6 & flags6) != 0)
1668		return ISC_TRUE;
1669#endif	/* INCLUDE_IPV6_SUPPORT && SIOCGIFAFLAG_IN6 */
1670	return ISC_FALSE;
1671}
1672
1673static isc_boolean_t
1674is_anycast(
1675	sockaddr_u *psau,
1676	const char *name
1677	)
1678{
1679#ifdef IN6_IFF_ANYCAST
1680	return check_flags6(psau, name, IN6_IFF_ANYCAST);
1681#else
1682	return ISC_FALSE;
1683#endif
1684}
1685
1686static isc_boolean_t
1687is_valid(
1688	sockaddr_u *psau,
1689	const char *name
1690	)
1691{
1692	u_int32 flags;
1693
1694	flags = 0;
1695	switch (psau->sa.sa_family) {
1696	case AF_INET:
1697#ifdef IN_IFF_DETACHED
1698		flags |= IN_IFF_DETACHED;
1699#endif
1700#ifdef IN_IFF_TENTATIVE
1701		flags |= IN_IFF_TENTATIVE;
1702#endif
1703		return check_flags(psau, name, flags) ? ISC_FALSE : ISC_TRUE;
1704	case AF_INET6:
1705#ifdef IN6_IFF_DEPARTED
1706		flags |= IN6_IFF_DEPARTED;
1707#endif
1708#ifdef IN6_IFF_DETACHED
1709		flags |= IN6_IFF_DETACHED;
1710#endif
1711#ifdef IN6_IFF_TENTATIVE
1712		flags |= IN6_IFF_TENTATIVE;
1713#endif
1714		return check_flags6(psau, name, flags) ? ISC_FALSE : ISC_TRUE;
1715	default:
1716		return ISC_FALSE;
1717	}
1718}
1719
1720/*
1721 * update_interface strategy
1722 *
1723 * toggle configuration phase
1724 *
1725 * Phase 1:
1726 * forall currently existing interfaces
1727 *   if address is known:
1728 *	drop socket - rebind again
1729 *
1730 *   if address is NOT known:
1731 *	attempt to create a new interface entry
1732 *
1733 * Phase 2:
1734 * forall currently known non MCAST and WILDCARD interfaces
1735 *   if interface does not match configuration phase (not seen in phase 1):
1736 *	remove interface from known interface list
1737 *	forall peers associated with this interface
1738 *         disconnect peer from this interface
1739 *
1740 * Phase 3:
1741 *   attempt to re-assign interfaces to peers
1742 *
1743 */
1744
1745static int
1746update_interfaces(
1747	u_short			port,
1748	interface_receiver_t	receiver,
1749	void *			data
1750	)
1751{
1752	isc_mem_t *		mctx = (void *)-1;
1753	interface_info_t	ifi;
1754	isc_interfaceiter_t *	iter;
1755	isc_result_t		result;
1756	isc_interface_t		isc_if;
1757	int			new_interface_found;
1758	unsigned int		family;
1759	endpt			enumep;
1760	endpt *			ep;
1761	endpt *			next_ep;
1762
1763	DPRINTF(3, ("update_interfaces(%d)\n", port));
1764
1765	/*
1766	 * phase one - scan interfaces
1767	 * - create those that are not found
1768	 * - update those that are found
1769	 */
1770
1771	new_interface_found = FALSE;
1772	iter = NULL;
1773	result = isc_interfaceiter_create(mctx, &iter);
1774
1775	if (result != ISC_R_SUCCESS)
1776		return 0;
1777
1778	/*
1779	 * Toggle system interface scan phase to find untouched
1780	 * interfaces to be deleted.
1781	 */
1782	sys_interphase ^= 0x1;
1783
1784	for (result = isc_interfaceiter_first(iter);
1785	     ISC_R_SUCCESS == result;
1786	     result = isc_interfaceiter_next(iter)) {
1787
1788		result = isc_interfaceiter_current(iter, &isc_if);
1789
1790		if (result != ISC_R_SUCCESS)
1791			break;
1792
1793		/* See if we have a valid family to use */
1794		family = isc_if.address.family;
1795		if (AF_INET != family && AF_INET6 != family)
1796			continue;
1797		if (AF_INET == family && !ipv4_works)
1798			continue;
1799		if (AF_INET6 == family && !ipv6_works)
1800			continue;
1801
1802		/* create prototype */
1803		init_interface(&enumep);
1804
1805		convert_isc_if(&isc_if, &enumep, port);
1806
1807		DPRINT_INTERFACE(4, (&enumep, "examining ", "\n"));
1808
1809		/*
1810		 * Check if and how we are going to use the interface.
1811		 */
1812		switch (interface_action(enumep.name, &enumep.sin,
1813					 enumep.flags)) {
1814
1815		case ACTION_IGNORE:
1816			DPRINTF(4, ("ignoring interface %s (%s) - by nic rules\n",
1817				    enumep.name, stoa(&enumep.sin)));
1818			continue;
1819
1820		case ACTION_LISTEN:
1821			DPRINTF(4, ("listen interface %s (%s) - by nic rules\n",
1822				    enumep.name, stoa(&enumep.sin)));
1823			enumep.ignore_packets = ISC_FALSE;
1824			break;
1825
1826		case ACTION_DROP:
1827			DPRINTF(4, ("drop on interface %s (%s) - by nic rules\n",
1828				    enumep.name, stoa(&enumep.sin)));
1829			enumep.ignore_packets = ISC_TRUE;
1830			break;
1831		}
1832
1833		 /* interfaces must be UP to be usable */
1834		if (!(enumep.flags & INT_UP)) {
1835			DPRINTF(4, ("skipping interface %s (%s) - DOWN\n",
1836				    enumep.name, stoa(&enumep.sin)));
1837			continue;
1838		}
1839
1840		/*
1841		 * skip any interfaces UP and bound to a wildcard
1842		 * address - some dhcp clients produce that in the
1843		 * wild
1844		 */
1845		if (is_wildcard_addr(&enumep.sin))
1846			continue;
1847
1848		if (is_anycast(&enumep.sin, isc_if.name))
1849			continue;
1850
1851		/*
1852		 * skip any address that is an invalid state to be used
1853		 */
1854		if (!is_valid(&enumep.sin, isc_if.name))
1855			continue;
1856
1857		/*
1858		 * map to local *address* in order to map all duplicate
1859		 * interfaces to an endpt structure with the appropriate
1860		 * socket.  Our name space is (ip-address), NOT
1861		 * (interface name, ip-address).
1862		 */
1863		ep = getinterface(&enumep.sin, INT_WILDCARD);
1864
1865		if (ep != NULL && refresh_interface(ep)) {
1866			/*
1867			 * found existing and up to date interface -
1868			 * mark present.
1869			 */
1870			if (ep->phase != sys_interphase) {
1871				/*
1872				 * On a new round we reset the name so
1873				 * the interface name shows up again if
1874				 * this address is no longer shared.
1875				 * We reset ignore_packets from the
1876				 * new prototype to respect any runtime
1877				 * changes to the nic rules.
1878				 */
1879				strlcpy(ep->name, enumep.name,
1880					sizeof(ep->name));
1881				ep->ignore_packets =
1882					    enumep.ignore_packets;
1883			} else {
1884				/* name collision - rename interface */
1885				strlcpy(ep->name, "*multiple*",
1886					sizeof(ep->name));
1887			}
1888
1889			DPRINT_INTERFACE(4, (ep, "updating ",
1890					     " present\n"));
1891
1892			if (ep->ignore_packets !=
1893			    enumep.ignore_packets) {
1894				/*
1895				 * We have conflicting configurations
1896				 * for the interface address. This is
1897				 * caused by using -I <interfacename>
1898				 * for an interface that shares its
1899				 * address with other interfaces. We
1900				 * can not disambiguate incoming
1901				 * packets delivered to this socket
1902				 * without extra syscalls/features.
1903				 * These are not (commonly) available.
1904				 * Note this is a more unusual
1905				 * configuration where several
1906				 * interfaces share an address but
1907				 * filtering via interface name is
1908				 * attempted.  We resolve the
1909				 * configuration conflict by disabling
1910				 * the processing of received packets.
1911				 * This leads to no service on the
1912				 * interface address where the conflict
1913				 * occurs.
1914				 */
1915				msyslog(LOG_ERR,
1916					"WARNING: conflicting enable configuration for interfaces %s and %s for address %s - unsupported configuration - address DISABLED",
1917					enumep.name, ep->name,
1918					stoa(&enumep.sin));
1919
1920				ep->ignore_packets = ISC_TRUE;
1921			}
1922
1923			ep->phase = sys_interphase;
1924
1925			ifi.action = IFS_EXISTS;
1926			ifi.ep = ep;
1927			if (receiver != NULL)
1928				(*receiver)(data, &ifi);
1929		} else {
1930			/*
1931			 * This is new or refreshing failed - add to
1932			 * our interface list.  If refreshing failed we
1933			 * will delete the interface structure in phase
1934			 * 2 as the interface was not marked current.
1935			 * We can bind to the address as the refresh
1936			 * code already closed the offending socket
1937			 */
1938			ep = create_interface(port, &enumep);
1939
1940			if (ep != NULL) {
1941				ifi.action = IFS_CREATED;
1942				ifi.ep = ep;
1943				if (receiver != NULL)
1944					(*receiver)(data, &ifi);
1945
1946				new_interface_found = TRUE;
1947				DPRINT_INTERFACE(3,
1948					(ep, "updating ",
1949					 " new - created\n"));
1950			} else {
1951				DPRINT_INTERFACE(3,
1952					(&enumep, "updating ",
1953					 " new - creation FAILED"));
1954
1955				msyslog(LOG_INFO,
1956					"failed to init interface for address %s",
1957					stoa(&enumep.sin));
1958				continue;
1959			}
1960		}
1961	}
1962
1963	isc_interfaceiter_destroy(&iter);
1964
1965	/*
1966	 * phase 2 - delete gone interfaces - reassigning peers to
1967	 * other interfaces
1968	 */
1969	for (ep = ep_list; ep != NULL; ep = next_ep) {
1970		next_ep = ep->elink;
1971
1972		/*
1973		 * if phase does not match sys_phase this interface was
1974		 * not enumerated during the last interface scan - so it
1975		 * is gone and will be deleted here unless it did not
1976		 * originate from interface enumeration (INT_WILDCARD,
1977		 * INT_MCASTIF).
1978		 */
1979		if (((INT_WILDCARD | INT_MCASTIF) & ep->flags) ||
1980		    ep->phase == sys_interphase)
1981			continue;
1982
1983		DPRINT_INTERFACE(3, (ep, "updating ",
1984				     "GONE - deleting\n"));
1985		remove_interface(ep);
1986
1987		ifi.action = IFS_DELETED;
1988		ifi.ep = ep;
1989		if (receiver != NULL)
1990			(*receiver)(data, &ifi);
1991
1992		/* disconnect peers from deleted endpt. */
1993		while (ep->peers != NULL)
1994			set_peerdstadr(ep->peers, NULL);
1995
1996		/*
1997		 * update globals in case we lose
1998		 * a loopback interface
1999		 */
2000		if (ep == loopback_interface)
2001			loopback_interface = NULL;
2002
2003		delete_interface(ep);
2004	}
2005
2006	/*
2007	 * phase 3 - re-configure as the world has possibly changed
2008	 *
2009	 * never ever make this conditional again - it is needed to track
2010	 * routing updates. see bug #2506
2011	 */
2012	refresh_all_peerinterfaces();
2013
2014	if (broadcast_client_enabled || sys_bclient)
2015		io_setbclient();
2016
2017#ifdef MCAST
2018	/*
2019	 * Check multicast interfaces and try to join multicast groups if
2020         * not joined yet.
2021         */
2022	for (ep = ep_list; ep != NULL; ep = ep->elink) {
2023		remaddr_t *entry;
2024
2025		if (!(INT_MCASTIF & ep->flags) || (INT_MCASTOPEN & ep->flags))
2026			continue;
2027
2028		/* Find remote address that was linked to this interface */
2029		for (entry = remoteaddr_list;
2030		     entry != NULL;
2031		     entry = entry->link) {
2032			if (entry->ep == ep) {
2033				if (socket_multicast_enable(ep, &entry->addr)) {
2034					msyslog(LOG_INFO,
2035						"Joined %s socket to multicast group %s",
2036						stoa(&ep->sin),
2037						stoa(&entry->addr));
2038				}
2039				break;
2040			}
2041		}
2042	}
2043#endif /* MCAST */
2044
2045	return new_interface_found;
2046}
2047
2048
2049/*
2050 * create_sockets - create a socket for each interface plus a default
2051 *			socket for when we don't know where to send
2052 */
2053static int
2054create_sockets(
2055	u_short port
2056	)
2057{
2058#ifndef HAVE_IO_COMPLETION_PORT
2059	/*
2060	 * I/O Completion Ports don't care about the select and FD_SET
2061	 */
2062	maxactivefd = 0;
2063	FD_ZERO(&activefds);
2064#endif
2065
2066	DPRINTF(2, ("create_sockets(%d)\n", port));
2067
2068	create_wildcards(port);
2069
2070	update_interfaces(port, NULL, NULL);
2071
2072	/*
2073	 * Now that we have opened all the sockets, turn off the reuse
2074	 * flag for security.
2075	 */
2076	set_reuseaddr(0);
2077
2078	DPRINTF(2, ("create_sockets: Total interfaces = %d\n", ninterfaces));
2079
2080	return ninterfaces;
2081}
2082
2083/*
2084 * create_interface - create a new interface for a given prototype
2085 *		      binding the socket.
2086 */
2087static struct interface *
2088create_interface(
2089	u_short			port,
2090	struct interface *	protot
2091	)
2092{
2093	sockaddr_u	resmask;
2094	endpt *		iface;
2095#if defined(MCAST) && defined(MULTICAST_NONEWSOCKET)
2096	remaddr_t *	entry;
2097	remaddr_t *	next_entry;
2098#endif
2099	DPRINTF(2, ("create_interface(%s#%d)\n", stoa(&protot->sin),
2100		    port));
2101
2102	/* build an interface */
2103	iface = new_interface(protot);
2104
2105	/*
2106	 * create socket
2107	 */
2108	iface->fd = open_socket(&iface->sin, 0, 0, iface);
2109
2110	if (iface->fd != INVALID_SOCKET)
2111		log_listen_address(iface);
2112
2113	if ((INT_BROADCAST & iface->flags)
2114	    && iface->bfd != INVALID_SOCKET)
2115		msyslog(LOG_INFO, "Listening on broadcast address %s#%d",
2116			stoa((&iface->bcast)), port);
2117
2118	if (INVALID_SOCKET == iface->fd
2119	    && INVALID_SOCKET == iface->bfd) {
2120		msyslog(LOG_ERR, "unable to create socket on %s (%d) for %s#%d",
2121			iface->name,
2122			iface->ifnum,
2123			stoa((&iface->sin)),
2124			port);
2125		delete_interface(iface);
2126		return NULL;
2127	}
2128
2129	/*
2130	 * Blacklist our own addresses, no use talking to ourself
2131	 */
2132	SET_HOSTMASK(&resmask, AF(&iface->sin));
2133	hack_restrict(RESTRICT_FLAGS, &iface->sin, &resmask,
2134		      -4, RESM_NTPONLY | RESM_INTERFACE, RES_IGNORE, 0);
2135
2136	/*
2137	 * set globals with the first found
2138	 * loopback interface of the appropriate class
2139	 */
2140	if (NULL == loopback_interface && AF_INET == iface->family
2141	    && (INT_LOOPBACK & iface->flags))
2142		loopback_interface = iface;
2143
2144	/*
2145	 * put into our interface list
2146	 */
2147	add_addr_to_list(&iface->sin, iface);
2148	add_interface(iface);
2149
2150#if defined(MCAST) && defined(MULTICAST_NONEWSOCKET)
2151	/*
2152	 * Join any previously-configured compatible multicast groups.
2153	 */
2154	if (INT_MULTICAST & iface->flags &&
2155	    !((INT_LOOPBACK | INT_WILDCARD) & iface->flags) &&
2156	    !iface->ignore_packets) {
2157		for (entry = remoteaddr_list;
2158		     entry != NULL;
2159		     entry = next_entry) {
2160			next_entry = entry->link;
2161			if (AF(&iface->sin) != AF(&entry->addr) ||
2162			    !IS_MCAST(&entry->addr))
2163				continue;
2164			if (socket_multicast_enable(iface,
2165						    &entry->addr))
2166				msyslog(LOG_INFO,
2167					"Joined %s socket to multicast group %s",
2168					stoa(&iface->sin),
2169					stoa(&entry->addr));
2170			else
2171				msyslog(LOG_ERR,
2172					"Failed to join %s socket to multicast group %s",
2173					stoa(&iface->sin),
2174					stoa(&entry->addr));
2175		}
2176	}
2177#endif	/* MCAST && MCAST_NONEWSOCKET */
2178
2179	DPRINT_INTERFACE(2, (iface, "created ", "\n"));
2180	return iface;
2181}
2182
2183
2184#ifdef SO_EXCLUSIVEADDRUSE
2185static void
2186set_excladdruse(
2187	SOCKET fd
2188	)
2189{
2190	int one = 1;
2191	int failed;
2192#ifdef SYS_WINNT
2193	DWORD err;
2194#endif
2195
2196	failed = setsockopt(fd, SOL_SOCKET, SO_EXCLUSIVEADDRUSE,
2197			    (void *)&one, sizeof(one));
2198
2199	if (!failed)
2200		return;
2201
2202#ifdef SYS_WINNT
2203	/*
2204	 * Prior to Windows XP setting SO_EXCLUSIVEADDRUSE can fail with
2205	 * error WSAINVAL depending on service pack level and whether
2206	 * the user account is in the Administrators group.  Do not
2207	 * complain if it fails that way on versions prior to XP (5.1).
2208	 */
2209	err = GetLastError();
2210
2211	if (isc_win32os_versioncheck(5, 1, 0, 0) < 0	/* < 5.1/XP */
2212	    && WSAEINVAL == err)
2213		return;
2214
2215	SetLastError(err);
2216#endif
2217	msyslog(LOG_ERR,
2218		"setsockopt(%d, SO_EXCLUSIVEADDRUSE, on): %m",
2219		(int)fd);
2220}
2221#endif  /* SO_EXCLUSIVEADDRUSE */
2222
2223
2224/*
2225 * set_reuseaddr() - set/clear REUSEADDR on all sockets
2226 *			NB possible hole - should we be doing this on broadcast
2227 *			fd's also?
2228 */
2229static void
2230set_reuseaddr(
2231	int flag
2232	)
2233{
2234#ifndef SO_EXCLUSIVEADDRUSE
2235	endpt *ep;
2236
2237	for (ep = ep_list; ep != NULL; ep = ep->elink) {
2238		if (ep->flags & INT_WILDCARD)
2239			continue;
2240
2241		/*
2242		 * if ep->fd  is INVALID_SOCKET, we might have a adapter
2243		 * configured but not present
2244		 */
2245		DPRINTF(4, ("setting SO_REUSEADDR on %.16s@%s to %s\n",
2246			    ep->name, stoa(&ep->sin),
2247			    flag ? "on" : "off"));
2248
2249		if (ep->fd != INVALID_SOCKET) {
2250			if (setsockopt(ep->fd, SOL_SOCKET, SO_REUSEADDR,
2251				       (void *)&flag, sizeof(flag))) {
2252				msyslog(LOG_ERR, "set_reuseaddr: setsockopt(%s, SO_REUSEADDR, %s) failed: %m",
2253					stoa(&ep->sin), flag ? "on" : "off");
2254			}
2255		}
2256	}
2257#endif /* ! SO_EXCLUSIVEADDRUSE */
2258}
2259
2260/*
2261 * This is just a wrapper around an internal function so we can
2262 * make other changes as necessary later on
2263 */
2264void
2265enable_broadcast(
2266	struct interface *	iface,
2267	sockaddr_u *		baddr
2268	)
2269{
2270#ifdef OPEN_BCAST_SOCKET
2271	socket_broadcast_enable(iface, iface->fd, baddr);
2272#endif
2273}
2274
2275#ifdef OPEN_BCAST_SOCKET
2276/*
2277 * Enable a broadcast address to a given socket
2278 * The socket is in the ep_list all we need to do is enable
2279 * broadcasting. It is not this function's job to select the socket
2280 */
2281static isc_boolean_t
2282socket_broadcast_enable(
2283	struct interface *	iface,
2284	SOCKET			fd,
2285	sockaddr_u *		baddr
2286	)
2287{
2288#ifdef SO_BROADCAST
2289	int on = 1;
2290
2291	if (IS_IPV4(baddr)) {
2292		/* if this interface can support broadcast, set SO_BROADCAST */
2293		if (setsockopt(fd, SOL_SOCKET, SO_BROADCAST,
2294			       (void *)&on, sizeof(on)))
2295			msyslog(LOG_ERR,
2296				"setsockopt(SO_BROADCAST) enable failure on address %s: %m",
2297				stoa(baddr));
2298		else
2299			DPRINTF(2, ("Broadcast enabled on socket %d for address %s\n",
2300				    fd, stoa(baddr)));
2301	}
2302	iface->flags |= INT_BCASTXMIT;
2303	return ISC_TRUE;
2304#else
2305	return ISC_FALSE;
2306#endif /* SO_BROADCAST */
2307}
2308
2309#ifdef  OS_MISSES_SPECIFIC_ROUTE_UPDATES
2310/*
2311 * Remove a broadcast address from a given socket
2312 * The socket is in the ep_list all we need to do is disable
2313 * broadcasting. It is not this function's job to select the socket
2314 */
2315static isc_boolean_t
2316socket_broadcast_disable(
2317	struct interface *	iface,
2318	sockaddr_u *		baddr
2319	)
2320{
2321#ifdef SO_BROADCAST
2322	int off = 0;	/* This seems to be OK as an int */
2323
2324	if (IS_IPV4(baddr) && setsockopt(iface->fd, SOL_SOCKET,
2325	    SO_BROADCAST, (void *)&off, sizeof(off)))
2326		msyslog(LOG_ERR,
2327			"setsockopt(SO_BROADCAST) disable failure on address %s: %m",
2328			stoa(baddr));
2329
2330	iface->flags &= ~INT_BCASTXMIT;
2331	return ISC_TRUE;
2332#else
2333	return ISC_FALSE;
2334#endif /* SO_BROADCAST */
2335}
2336#endif /* OS_MISSES_SPECIFIC_ROUTE_UPDATES */
2337
2338#endif /* OPEN_BCAST_SOCKET */
2339
2340/*
2341 * return the broadcast client flag value
2342 */
2343/*isc_boolean_t
2344get_broadcastclient_flag(void)
2345{
2346	return (broadcast_client_enabled);
2347}
2348*/
2349
2350/*
2351 * Check to see if the address is a multicast address
2352 */
2353static isc_boolean_t
2354addr_ismulticast(
2355	sockaddr_u *maddr
2356	)
2357{
2358	isc_boolean_t result;
2359
2360#ifndef INCLUDE_IPV6_MULTICAST_SUPPORT
2361	/*
2362	 * If we don't have IPV6 support any IPV6 addr is not multicast
2363	 */
2364	if (IS_IPV6(maddr))
2365		result = ISC_FALSE;
2366	else
2367#endif
2368		result = IS_MCAST(maddr);
2369
2370	if (!result)
2371		DPRINTF(4, ("address %s is not multicast\n",
2372			    stoa(maddr)));
2373
2374	return result;
2375}
2376
2377/*
2378 * Multicast servers need to set the appropriate Multicast interface
2379 * socket option in order for it to know which interface to use for
2380 * send the multicast packet.
2381 */
2382void
2383enable_multicast_if(
2384	struct interface *	iface,
2385	sockaddr_u *		maddr
2386	)
2387{
2388#ifdef MCAST
2389#ifdef IP_MULTICAST_LOOP
2390	TYPEOF_IP_MULTICAST_LOOP off = 0;
2391#endif
2392#if defined(INCLUDE_IPV6_MULTICAST_SUPPORT) && defined(IPV6_MULTICAST_LOOP)
2393	u_int off6 = 0;
2394#endif
2395
2396	REQUIRE(AF(maddr) == AF(&iface->sin));
2397
2398	switch (AF(&iface->sin)) {
2399
2400	case AF_INET:
2401#ifdef IP_MULTICAST_LOOP
2402		/*
2403		 * Don't send back to itself, but allow failure to set
2404		 */
2405		if (setsockopt(iface->fd, IPPROTO_IP,
2406			       IP_MULTICAST_LOOP,
2407			       (void *)&off,
2408			       sizeof(off))) {
2409
2410			msyslog(LOG_ERR,
2411				"setsockopt IP_MULTICAST_LOOP failed: %m on socket %d, addr %s for multicast address %s",
2412				iface->fd, stoa(&iface->sin),
2413				stoa(maddr));
2414		}
2415#endif
2416		break;
2417
2418	case AF_INET6:
2419#ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2420#ifdef IPV6_MULTICAST_LOOP
2421		/*
2422		 * Don't send back to itself, but allow failure to set
2423		 */
2424		if (setsockopt(iface->fd, IPPROTO_IPV6,
2425			       IPV6_MULTICAST_LOOP,
2426			       (void *) &off6, sizeof(off6))) {
2427
2428			msyslog(LOG_ERR,
2429				"setsockopt IPV6_MULTICAST_LOOP failed: %m on socket %d, addr %s for multicast address %s",
2430				iface->fd, stoa(&iface->sin),
2431				stoa(maddr));
2432		}
2433#endif
2434		break;
2435#else
2436		return;
2437#endif	/* INCLUDE_IPV6_MULTICAST_SUPPORT */
2438	}
2439	return;
2440#endif
2441}
2442
2443/*
2444 * Add a multicast address to a given socket
2445 * The socket is in the ep_list all we need to do is enable
2446 * multicasting. It is not this function's job to select the socket
2447 */
2448#if defined(MCAST)
2449static isc_boolean_t
2450socket_multicast_enable(
2451	endpt *		iface,
2452	sockaddr_u *	maddr
2453	)
2454{
2455	struct ip_mreq		mreq;
2456# ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2457	struct ipv6_mreq	mreq6;
2458# endif
2459	switch (AF(maddr)) {
2460
2461	case AF_INET:
2462		ZERO(mreq);
2463		mreq.imr_multiaddr = SOCK_ADDR4(maddr);
2464		mreq.imr_interface.s_addr = htonl(INADDR_ANY);
2465		if (setsockopt(iface->fd,
2466			       IPPROTO_IP,
2467			       IP_ADD_MEMBERSHIP,
2468			       (void *)&mreq,
2469			       sizeof(mreq))) {
2470			DPRINTF(2, (
2471				"setsockopt IP_ADD_MEMBERSHIP failed: %m on socket %d, addr %s for %x / %x (%s)",
2472				iface->fd, stoa(&iface->sin),
2473				mreq.imr_multiaddr.s_addr,
2474				mreq.imr_interface.s_addr,
2475				stoa(maddr)));
2476			return ISC_FALSE;
2477		}
2478		DPRINTF(4, ("Added IPv4 multicast membership on socket %d, addr %s for %x / %x (%s)\n",
2479			    iface->fd, stoa(&iface->sin),
2480			    mreq.imr_multiaddr.s_addr,
2481			    mreq.imr_interface.s_addr, stoa(maddr)));
2482		break;
2483
2484	case AF_INET6:
2485# ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2486		/*
2487		 * Enable reception of multicast packets.
2488		 * If the address is link-local we can get the
2489		 * interface index from the scope id. Don't do this
2490		 * for other types of multicast addresses. For now let
2491		 * the kernel figure it out.
2492		 */
2493		ZERO(mreq6);
2494		mreq6.ipv6mr_multiaddr = SOCK_ADDR6(maddr);
2495		mreq6.ipv6mr_interface = iface->ifindex;
2496
2497		if (setsockopt(iface->fd, IPPROTO_IPV6,
2498			       IPV6_JOIN_GROUP, (void *)&mreq6,
2499			       sizeof(mreq6))) {
2500			DPRINTF(2, (
2501				"setsockopt IPV6_JOIN_GROUP failed: %m on socket %d, addr %s for interface %u (%s)",
2502				iface->fd, stoa(&iface->sin),
2503				mreq6.ipv6mr_interface, stoa(maddr)));
2504			return ISC_FALSE;
2505		}
2506		DPRINTF(4, ("Added IPv6 multicast group on socket %d, addr %s for interface %u (%s)\n",
2507			    iface->fd, stoa(&iface->sin),
2508			    mreq6.ipv6mr_interface, stoa(maddr)));
2509# else
2510		return ISC_FALSE;
2511# endif	/* INCLUDE_IPV6_MULTICAST_SUPPORT */
2512	}
2513	iface->flags |= INT_MCASTOPEN;
2514	iface->num_mcast++;
2515
2516	return ISC_TRUE;
2517}
2518#endif	/* MCAST */
2519
2520
2521/*
2522 * Remove a multicast address from a given socket
2523 * The socket is in the ep_list all we need to do is disable
2524 * multicasting. It is not this function's job to select the socket
2525 */
2526#ifdef MCAST
2527static isc_boolean_t
2528socket_multicast_disable(
2529	struct interface *	iface,
2530	sockaddr_u *		maddr
2531	)
2532{
2533# ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2534	struct ipv6_mreq mreq6;
2535# endif
2536	struct ip_mreq mreq;
2537
2538	ZERO(mreq);
2539
2540	if (find_addr_in_list(maddr) == NULL) {
2541		DPRINTF(4, ("socket_multicast_disable(%s): not found\n",
2542			    stoa(maddr)));
2543		return ISC_TRUE;
2544	}
2545
2546	switch (AF(maddr)) {
2547
2548	case AF_INET:
2549		mreq.imr_multiaddr = SOCK_ADDR4(maddr);
2550		mreq.imr_interface = SOCK_ADDR4(&iface->sin);
2551		if (setsockopt(iface->fd, IPPROTO_IP,
2552			       IP_DROP_MEMBERSHIP, (void *)&mreq,
2553			       sizeof(mreq))) {
2554
2555			msyslog(LOG_ERR,
2556				"setsockopt IP_DROP_MEMBERSHIP failed: %m on socket %d, addr %s for %x / %x (%s)",
2557				iface->fd, stoa(&iface->sin),
2558				SRCADR(maddr), SRCADR(&iface->sin),
2559				stoa(maddr));
2560			return ISC_FALSE;
2561		}
2562		break;
2563	case AF_INET6:
2564# ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2565		/*
2566		 * Disable reception of multicast packets
2567		 * If the address is link-local we can get the
2568		 * interface index from the scope id.  Don't do this
2569		 * for other types of multicast addresses. For now let
2570		 * the kernel figure it out.
2571		 */
2572		mreq6.ipv6mr_multiaddr = SOCK_ADDR6(maddr);
2573		mreq6.ipv6mr_interface = iface->ifindex;
2574
2575		if (setsockopt(iface->fd, IPPROTO_IPV6,
2576			       IPV6_LEAVE_GROUP, (void *)&mreq6,
2577			       sizeof(mreq6))) {
2578
2579			msyslog(LOG_ERR,
2580				"setsockopt IPV6_LEAVE_GROUP failure: %m on socket %d, addr %s for %d (%s)",
2581				iface->fd, stoa(&iface->sin),
2582				iface->ifindex, stoa(maddr));
2583			return ISC_FALSE;
2584		}
2585		break;
2586# else
2587		return ISC_FALSE;
2588# endif	/* INCLUDE_IPV6_MULTICAST_SUPPORT */
2589	}
2590
2591	iface->num_mcast--;
2592	if (!iface->num_mcast)
2593		iface->flags &= ~INT_MCASTOPEN;
2594
2595	return ISC_TRUE;
2596}
2597#endif	/* MCAST */
2598
2599/*
2600 * io_setbclient - open the broadcast client sockets
2601 */
2602void
2603io_setbclient(void)
2604{
2605#ifdef OPEN_BCAST_SOCKET
2606	endpt *		ep;
2607	unsigned int	nif, ni4, ni6;
2608
2609	nif = ni4 = ni6 = 0;
2610	set_reuseaddr(1);
2611
2612	for (ep = ep_list; ep != NULL; ep = ep->elink) {
2613		/* count IPv6 vs IPv4 interfaces. Needed later to decide
2614		 * if we should log an error or not.
2615		 */
2616		switch (ep->family) {
2617		case AF_INET : ++ni4; break;
2618		case AF_INET6: ++ni6; break;
2619		default      :        break;
2620		}
2621
2622		if (ep->flags & (INT_WILDCARD | INT_LOOPBACK))
2623			continue;
2624
2625		/* use only allowed addresses */
2626		if (ep->ignore_packets)
2627			continue;
2628
2629		/* Need a broadcast-capable interface */
2630		if (!(ep->flags & INT_BROADCAST))
2631			continue;
2632
2633		/* Only IPv4 addresses are valid for broadcast */
2634		REQUIRE(IS_IPV4(&ep->bcast));
2635
2636		/* Do we already have the broadcast address open? */
2637		if (ep->flags & INT_BCASTOPEN) {
2638			/*
2639			 * account for already open interfaces to avoid
2640			 * misleading warning below
2641			 */
2642			nif++;
2643			continue;
2644		}
2645
2646		/*
2647		 * Try to open the broadcast address
2648		 */
2649		ep->family = AF_INET;
2650		ep->bfd = open_socket(&ep->bcast, 1, 0, ep);
2651
2652		/*
2653		 * If we succeeded then we use it otherwise enable
2654		 * broadcast on the interface address
2655		 */
2656		if (ep->bfd != INVALID_SOCKET) {
2657			nif++;
2658			ep->flags |= INT_BCASTOPEN;
2659			msyslog(LOG_INFO,
2660				"Listen for broadcasts to %s on interface #%d %s",
2661				stoa(&ep->bcast), ep->ifnum, ep->name);
2662		} else switch (errno) {
2663			/* Silently ignore EADDRINUSE as we probably
2664			 * opened the socket already for an address in
2665			 * the same network */
2666		case EADDRINUSE:
2667			/* Some systems cannot bind a socket to a broadcast
2668			 * address, as that is not a valid host address. */
2669		case EADDRNOTAVAIL:
2670#		    ifdef SYS_WINNT	/*TODO: use for other systems, too? */
2671			/* avoid recurrence here -- if we already have a
2672			 * regular socket, it's quite useless to try this
2673			 * again.
2674			 */
2675			if (ep->fd != INVALID_SOCKET) {
2676				ep->flags |= INT_BCASTOPEN;
2677				nif++;
2678			}
2679#		    endif
2680			break;
2681
2682		default:
2683			msyslog(LOG_INFO,
2684				"failed to listen for broadcasts to %s on interface #%d %s",
2685				stoa(&ep->bcast), ep->ifnum, ep->name);
2686			break;
2687		}
2688	}
2689	set_reuseaddr(0);
2690	if (nif != 0) {
2691		broadcast_client_enabled = ISC_TRUE;
2692		DPRINTF(1, ("io_setbclient: listening to %d broadcast addresses\n", nif));
2693	} else {
2694		broadcast_client_enabled = ISC_FALSE;
2695		/* This is expected when having only IPv6 interfaces
2696		 * and no IPv4 interfaces at all. We suppress the error
2697		 * log in that case... everything else should work!
2698		 */
2699		if (ni4 && !ni6) {
2700			msyslog(LOG_ERR,
2701				"Unable to listen for broadcasts, no broadcast interfaces available");
2702		}
2703	}
2704#else
2705	msyslog(LOG_ERR,
2706		"io_setbclient: Broadcast Client disabled by build");
2707#endif	/* OPEN_BCAST_SOCKET */
2708}
2709
2710/*
2711 * io_unsetbclient - close the broadcast client sockets
2712 */
2713void
2714io_unsetbclient(void)
2715{
2716	endpt *ep;
2717
2718	for (ep = ep_list; ep != NULL; ep = ep->elink) {
2719		if (INT_WILDCARD & ep->flags)
2720			continue;
2721		if (!(INT_BCASTOPEN & ep->flags))
2722			continue;
2723
2724		if (ep->bfd != INVALID_SOCKET) {
2725			/* destroy broadcast listening socket */
2726			msyslog(LOG_INFO,
2727				"stop listening for broadcasts to %s on interface #%d %s",
2728				stoa(&ep->bcast), ep->ifnum, ep->name);
2729#		    ifdef HAVE_IO_COMPLETION_PORT
2730			io_completion_port_remove_socket(ep->bfd, ep);
2731#		    endif
2732			close_and_delete_fd_from_list(ep->bfd);
2733			ep->bfd = INVALID_SOCKET;
2734		}
2735		ep->flags &= ~INT_BCASTOPEN;
2736	}
2737	broadcast_client_enabled = ISC_FALSE;
2738}
2739
2740/*
2741 * io_multicast_add() - add multicast group address
2742 */
2743void
2744io_multicast_add(
2745	sockaddr_u *addr
2746	)
2747{
2748#ifdef MCAST
2749	endpt *	ep;
2750	endpt *	one_ep;
2751
2752	/*
2753	 * Check to see if this is a multicast address
2754	 */
2755	if (!addr_ismulticast(addr))
2756		return;
2757
2758	/* If we already have it we can just return */
2759	if (NULL != find_flagged_addr_in_list(addr, INT_MCASTOPEN)) {
2760		msyslog(LOG_INFO,
2761			"Duplicate request found for multicast address %s",
2762			stoa(addr));
2763		return;
2764	}
2765
2766# ifndef MULTICAST_NONEWSOCKET
2767	ep = new_interface(NULL);
2768
2769	/*
2770	 * Open a new socket for the multicast address
2771	 */
2772	ep->sin = *addr;
2773	SET_PORT(&ep->sin, NTP_PORT);
2774	ep->family = AF(&ep->sin);
2775	AF(&ep->mask) = ep->family;
2776	SET_ONESMASK(&ep->mask);
2777
2778	set_reuseaddr(1);
2779	ep->bfd = INVALID_SOCKET;
2780	ep->fd = open_socket(&ep->sin, 0, 0, ep);
2781	if (ep->fd != INVALID_SOCKET) {
2782		ep->ignore_packets = ISC_FALSE;
2783		ep->flags |= INT_MCASTIF;
2784		ep->ifindex = SCOPE(addr);
2785
2786		strlcpy(ep->name, "multicast", sizeof(ep->name));
2787		DPRINT_INTERFACE(2, (ep, "multicast add ", "\n"));
2788		add_interface(ep);
2789		log_listen_address(ep);
2790	} else {
2791		/* bind failed, re-use wildcard interface */
2792		delete_interface(ep);
2793
2794		if (IS_IPV4(addr))
2795			ep = wildipv4;
2796		else if (IS_IPV6(addr))
2797			ep = wildipv6;
2798		else
2799			ep = NULL;
2800
2801		if (ep != NULL) {
2802			/* HACK ! -- stuff in an address */
2803			/* because we don't bind addr? DH */
2804			ep->bcast = *addr;
2805			msyslog(LOG_ERR,
2806				"multicast address %s using wildcard interface #%d %s",
2807				stoa(addr), ep->ifnum, ep->name);
2808		} else {
2809			msyslog(LOG_ERR,
2810				"No multicast socket available to use for address %s",
2811				stoa(addr));
2812			return;
2813		}
2814	}
2815	{	/* in place of the { following for in #else clause */
2816		one_ep = ep;
2817# else	/* MULTICAST_NONEWSOCKET follows */
2818	/*
2819	 * For the case where we can't use a separate socket (Windows)
2820	 * join each applicable endpoint socket to the group address.
2821	 */
2822	if (IS_IPV4(addr))
2823		one_ep = wildipv4;
2824	else
2825		one_ep = wildipv6;
2826	for (ep = ep_list; ep != NULL; ep = ep->elink) {
2827		if (ep->ignore_packets || AF(&ep->sin) != AF(addr) ||
2828		    !(INT_MULTICAST & ep->flags) ||
2829		    (INT_LOOPBACK | INT_WILDCARD) & ep->flags)
2830			continue;
2831		one_ep = ep;
2832# endif	/* MULTICAST_NONEWSOCKET */
2833		if (socket_multicast_enable(ep, addr))
2834			msyslog(LOG_INFO,
2835				"Joined %s socket to multicast group %s",
2836				stoa(&ep->sin),
2837				stoa(addr));
2838	}
2839
2840	add_addr_to_list(addr, one_ep);
2841#else	/* !MCAST  follows*/
2842	msyslog(LOG_ERR,
2843		"Can not add multicast address %s: no multicast support",
2844		stoa(addr));
2845#endif
2846	return;
2847}
2848
2849
2850/*
2851 * io_multicast_del() - delete multicast group address
2852 */
2853void
2854io_multicast_del(
2855	sockaddr_u *	addr
2856	)
2857{
2858#ifdef MCAST
2859	endpt *iface;
2860
2861	/*
2862	 * Check to see if this is a multicast address
2863	 */
2864	if (!addr_ismulticast(addr)) {
2865		msyslog(LOG_ERR, "invalid multicast address %s",
2866			stoa(addr));
2867		return;
2868	}
2869
2870	/*
2871	 * Disable reception of multicast packets
2872	 */
2873	while ((iface = find_flagged_addr_in_list(addr, INT_MCASTOPEN))
2874	       != NULL)
2875		socket_multicast_disable(iface, addr);
2876
2877	delete_addr_from_list(addr);
2878
2879#else /* not MCAST */
2880	msyslog(LOG_ERR,
2881		"Can not delete multicast address %s: no multicast support",
2882		stoa(addr));
2883#endif /* not MCAST */
2884}
2885
2886
2887/*
2888 * open_socket - open a socket, returning the file descriptor
2889 */
2890
2891static SOCKET
2892open_socket(
2893	sockaddr_u *	addr,
2894	int		bcast,
2895	int		turn_off_reuse,
2896	endpt *		interf
2897	)
2898{
2899	SOCKET	fd;
2900	int	errval;
2901	/*
2902	 * int is OK for REUSEADR per
2903	 * http://www.kohala.com/start/mcast.api.txt
2904	 */
2905	int	on = 1;
2906	int	off = 0;
2907
2908	if (IS_IPV6(addr) && !ipv6_works)
2909		return INVALID_SOCKET;
2910
2911	/* create a datagram (UDP) socket */
2912	fd = socket(AF(addr), SOCK_DGRAM, 0);
2913	if (INVALID_SOCKET == fd) {
2914		errval = socket_errno();
2915		msyslog(LOG_ERR,
2916			"socket(AF_INET%s, SOCK_DGRAM, 0) failed on address %s: %m",
2917			IS_IPV6(addr) ? "6" : "", stoa(addr));
2918
2919		if (errval == EPROTONOSUPPORT ||
2920		    errval == EAFNOSUPPORT ||
2921		    errval == EPFNOSUPPORT)
2922			return (INVALID_SOCKET);
2923
2924		errno = errval;
2925		msyslog(LOG_ERR,
2926			"unexpected socket() error %m code %d (not EPROTONOSUPPORT nor EAFNOSUPPORT nor EPFNOSUPPORT) - exiting",
2927			errno);
2928		exit(1);
2929	}
2930
2931#ifdef SYS_WINNT
2932	connection_reset_fix(fd, addr);
2933#endif
2934	/*
2935	 * Fixup the file descriptor for some systems
2936	 * See bug #530 for details of the issue.
2937	 */
2938	fd = move_fd(fd);
2939
2940	/*
2941	 * set SO_REUSEADDR since we will be binding the same port
2942	 * number on each interface according to turn_off_reuse.
2943	 * This is undesirable on Windows versions starting with
2944	 * Windows XP (numeric version 5.1).
2945	 */
2946#ifdef SYS_WINNT
2947	if (isc_win32os_versioncheck(5, 1, 0, 0) < 0)  /* before 5.1 */
2948#endif
2949		if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
2950			       (void *)((turn_off_reuse)
2951					    ? &off
2952					    : &on),
2953			       sizeof(on))) {
2954
2955			msyslog(LOG_ERR,
2956				"setsockopt SO_REUSEADDR %s fails for address %s: %m",
2957				(turn_off_reuse)
2958				    ? "off"
2959				    : "on",
2960				stoa(addr));
2961			closesocket(fd);
2962			return INVALID_SOCKET;
2963		}
2964#ifdef SO_EXCLUSIVEADDRUSE
2965	/*
2966	 * setting SO_EXCLUSIVEADDRUSE on the wildcard we open
2967	 * first will cause more specific binds to fail.
2968	 */
2969	if (!(interf->flags & INT_WILDCARD))
2970		set_excladdruse(fd);
2971#endif
2972
2973	/*
2974	 * IPv4 specific options go here
2975	 */
2976	if (IS_IPV4(addr)) {
2977#if defined(IPPROTO_IP) && defined(IP_TOS)
2978		if (setsockopt(fd, IPPROTO_IP, IP_TOS, (void *)&qos,
2979			       sizeof(qos)))
2980			msyslog(LOG_ERR,
2981				"setsockopt IP_TOS (%02x) fails on address %s: %m",
2982				qos, stoa(addr));
2983#endif /* IPPROTO_IP && IP_TOS */
2984		if (bcast)
2985			socket_broadcast_enable(interf, fd, addr);
2986	}
2987
2988	/*
2989	 * IPv6 specific options go here
2990	 */
2991	if (IS_IPV6(addr)) {
2992#if defined(IPPROTO_IPV6) && defined(IPV6_TCLASS)
2993		if (setsockopt(fd, IPPROTO_IPV6, IPV6_TCLASS, (void *)&qos,
2994			       sizeof(qos)))
2995			msyslog(LOG_ERR,
2996				"setsockopt IPV6_TCLASS (%02x) fails on address %s: %m",
2997				qos, stoa(addr));
2998#endif /* IPPROTO_IPV6 && IPV6_TCLASS */
2999#ifdef IPV6_V6ONLY
3000		if (isc_net_probe_ipv6only() == ISC_R_SUCCESS
3001		    && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY,
3002		    (void *)&on, sizeof(on)))
3003			msyslog(LOG_ERR,
3004				"setsockopt IPV6_V6ONLY on fails on address %s: %m",
3005				stoa(addr));
3006#endif
3007#ifdef IPV6_BINDV6ONLY
3008		if (setsockopt(fd, IPPROTO_IPV6, IPV6_BINDV6ONLY,
3009		    (void *)&on, sizeof(on)))
3010			msyslog(LOG_ERR,
3011				"setsockopt IPV6_BINDV6ONLY on fails on address %s: %m",
3012				stoa(addr));
3013#endif
3014	}
3015
3016#ifdef OS_NEEDS_REUSEADDR_FOR_IFADDRBIND
3017	/*
3018	 * some OSes don't allow binding to more specific
3019	 * addresses if a wildcard address already bound
3020	 * to the port and SO_REUSEADDR is not set
3021	 */
3022	if (!is_wildcard_addr(addr))
3023		set_wildcard_reuse(AF(addr), 1);
3024#endif
3025
3026	/*
3027	 * bind the local address.
3028	 */
3029	errval = bind(fd, &addr->sa, SOCKLEN(addr));
3030
3031#ifdef OS_NEEDS_REUSEADDR_FOR_IFADDRBIND
3032	if (!is_wildcard_addr(addr))
3033		set_wildcard_reuse(AF(addr), 0);
3034#endif
3035
3036	if (errval < 0) {
3037		/*
3038		 * Don't log this under all conditions
3039		 */
3040		if (turn_off_reuse == 0
3041#ifdef DEBUG
3042		    || debug > 1
3043#endif
3044		    ) {
3045			msyslog(LOG_ERR,
3046				"bind(%d) AF_INET%s %s#%d%s flags 0x%x failed: %m",
3047				fd, IS_IPV6(addr) ? "6" : "",
3048				stoa(addr), SRCPORT(addr),
3049				IS_MCAST(addr) ? " (multicast)" : "",
3050				interf->flags);
3051		}
3052
3053		closesocket(fd);
3054
3055		return INVALID_SOCKET;
3056	}
3057
3058#ifdef HAVE_TIMESTAMP
3059	{
3060		if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMP,
3061			       (void *)&on, sizeof(on)))
3062			msyslog(LOG_DEBUG,
3063				"setsockopt SO_TIMESTAMP on fails on address %s: %m",
3064				stoa(addr));
3065		else
3066			DPRINTF(4, ("setsockopt SO_TIMESTAMP enabled on fd %d address %s\n",
3067				    fd, stoa(addr)));
3068	}
3069#endif
3070#ifdef HAVE_TIMESTAMPNS
3071	{
3072		if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS,
3073			       (void *)&on, sizeof(on)))
3074			msyslog(LOG_DEBUG,
3075				"setsockopt SO_TIMESTAMPNS on fails on address %s: %m",
3076				stoa(addr));
3077		else
3078			DPRINTF(4, ("setsockopt SO_TIMESTAMPNS enabled on fd %d address %s\n",
3079				    fd, stoa(addr)));
3080	}
3081#endif
3082#ifdef HAVE_BINTIME
3083	{
3084		if (setsockopt(fd, SOL_SOCKET, SO_BINTIME,
3085			       (void *)&on, sizeof(on)))
3086			msyslog(LOG_DEBUG,
3087				"setsockopt SO_BINTIME on fails on address %s: %m",
3088				stoa(addr));
3089		else
3090			DPRINTF(4, ("setsockopt SO_BINTIME enabled on fd %d address %s\n",
3091				    fd, stoa(addr)));
3092	}
3093#endif
3094
3095	DPRINTF(4, ("bind(%d) AF_INET%s, addr %s%%%d#%d, flags 0x%x\n",
3096		   fd, IS_IPV6(addr) ? "6" : "", stoa(addr),
3097		   SCOPE(addr), SRCPORT(addr), interf->flags));
3098
3099	make_socket_nonblocking(fd);
3100
3101#ifdef HAVE_SIGNALED_IO
3102	init_socket_sig(fd);
3103#endif /* not HAVE_SIGNALED_IO */
3104
3105	add_fd_to_list(fd, FD_TYPE_SOCKET);
3106
3107#if !defined(SYS_WINNT) && !defined(VMS)
3108	DPRINTF(4, ("flags for fd %d: 0x%x\n", fd,
3109		    fcntl(fd, F_GETFL, 0)));
3110#endif /* SYS_WINNT || VMS */
3111
3112#if defined(HAVE_IO_COMPLETION_PORT)
3113/*
3114 * Add the socket to the completion port
3115 */
3116	if (!io_completion_port_add_socket(fd, interf, bcast)) {
3117		msyslog(LOG_ERR, "unable to set up io completion port - EXITING");
3118		exit(1);
3119	}
3120#endif
3121	return fd;
3122}
3123
3124
3125
3126/* XXX ELIMINATE sendpkt similar in ntpq.c, ntpdc.c, ntp_io.c, ntptrace.c */
3127/*
3128 * sendpkt - send a packet to the specified destination. Maintain a
3129 * send error cache so that only the first consecutive error for a
3130 * destination is logged.
3131 */
3132void
3133sendpkt(
3134	sockaddr_u *		dest,
3135	struct interface *	ep,
3136	int			ttl,
3137	struct pkt *		pkt,
3138	int			len
3139	)
3140{
3141	endpt *	src;
3142	int	ismcast;
3143	int	cc;
3144	int	rc;
3145	u_char	cttl;
3146	l_fp	fp_zero = { { 0 }, 0 };
3147	l_fp	org, rec, xmt;
3148
3149	ismcast = IS_MCAST(dest);
3150	if (!ismcast)
3151		src = ep;
3152	else
3153		src = (IS_IPV4(dest))
3154			  ? mc4_list
3155			  : mc6_list;
3156
3157	if (NULL == src) {
3158		/*
3159		 * unbound peer - drop request and wait for better
3160		 * network conditions
3161		 */
3162		DPRINTF(2, ("%ssendpkt(dst=%s, ttl=%d, len=%d): no interface - IGNORED\n",
3163			    ismcast ? "\tMCAST\t***** " : "",
3164			    stoa(dest), ttl, len));
3165		return;
3166	}
3167
3168	do {
3169		DPRINTF(2, ("%ssendpkt(%d, dst=%s, src=%s, ttl=%d, len=%d)\n",
3170			    ismcast ? "\tMCAST\t***** " : "", src->fd,
3171			    stoa(dest), stoa(&src->sin), ttl, len));
3172#ifdef MCAST
3173		/*
3174		 * for the moment we use the bcast option to set multicast ttl
3175		 */
3176		if (ismcast && ttl > 0 && ttl != src->last_ttl) {
3177			/*
3178			 * set the multicast ttl for outgoing packets
3179			 */
3180			switch (AF(&src->sin)) {
3181
3182			case AF_INET :
3183				cttl = (u_char)ttl;
3184				rc = setsockopt(src->fd, IPPROTO_IP,
3185						IP_MULTICAST_TTL,
3186						(void *)&cttl,
3187						sizeof(cttl));
3188				break;
3189
3190# ifdef INCLUDE_IPV6_SUPPORT
3191			case AF_INET6 :
3192				rc = setsockopt(src->fd, IPPROTO_IPV6,
3193						 IPV6_MULTICAST_HOPS,
3194						 (void *)&ttl,
3195						 sizeof(ttl));
3196				break;
3197# endif	/* INCLUDE_IPV6_SUPPORT */
3198
3199			default:
3200				rc = 0;
3201			}
3202
3203			if (!rc)
3204				src->last_ttl = ttl;
3205			else
3206				msyslog(LOG_ERR,
3207					"setsockopt IP_MULTICAST_TTL/IPV6_MULTICAST_HOPS fails on address %s: %m",
3208					stoa(&src->sin));
3209		}
3210#endif	/* MCAST */
3211
3212#ifdef SIM
3213		cc = simulate_server(dest, src, pkt);
3214#elif defined(HAVE_IO_COMPLETION_PORT)
3215		cc = io_completion_port_sendto(src, src->fd, pkt,
3216			(size_t)len, (sockaddr_u *)&dest->sa);
3217#else
3218		cc = sendto(src->fd, (char *)pkt, (u_int)len, 0,
3219			    &dest->sa, SOCKLEN(dest));
3220#endif
3221		if (cc == -1) {
3222			src->notsent++;
3223			packets_notsent++;
3224		} else	{
3225			src->sent++;
3226			packets_sent++;
3227		}
3228		if (ismcast)
3229			src = src->mclink;
3230	} while (ismcast && src != NULL);
3231
3232	/* HMS: pkt->rootdisp is usually random here */
3233	NTOHL_FP(&pkt->org, &org);
3234	NTOHL_FP(&pkt->rec, &rec);
3235	NTOHL_FP(&pkt->xmt, &xmt);
3236	record_raw_stats(src ? &src->sin : NULL, dest,
3237			&org, &rec, &xmt, &fp_zero,
3238			PKT_LEAP(pkt->li_vn_mode),
3239			PKT_VERSION(pkt->li_vn_mode),
3240			PKT_MODE(pkt->li_vn_mode),
3241			pkt->stratum,
3242			pkt->ppoll, pkt->precision,
3243			pkt->rootdelay, pkt->rootdisp, pkt->refid,
3244			len - MIN_V4_PKT_LEN, (u_char *)&pkt->exten);
3245
3246	return;
3247}
3248
3249
3250#if !defined(HAVE_IO_COMPLETION_PORT)
3251#if !defined(HAVE_SIGNALED_IO)
3252/*
3253 * fdbits - generate ascii representation of fd_set (FAU debug support)
3254 * HFDF format - highest fd first.
3255 */
3256static char *
3257fdbits(
3258	int		count,
3259	const fd_set*	set
3260	)
3261{
3262	static char buffer[256];
3263	char * buf = buffer;
3264
3265	count = min(count,  255);
3266
3267	while (count >= 0) {
3268		*buf++ = FD_ISSET(count, set) ? '#' : '-';
3269		count--;
3270	}
3271	*buf = '\0';
3272
3273	return buffer;
3274}
3275#endif
3276
3277#ifdef REFCLOCK
3278/*
3279 * Routine to read the refclock packets for a specific interface
3280 * Return the number of bytes read. That way we know if we should
3281 * read it again or go on to the next one if no bytes returned
3282 */
3283static inline int
3284read_refclock_packet(
3285	SOCKET			fd,
3286	struct refclockio *	rp,
3287	l_fp			ts
3288	)
3289{
3290	u_int			read_count;
3291	int			buflen;
3292	int			saved_errno;
3293	int			consumed;
3294	struct recvbuf *	rb;
3295
3296	rb = get_free_recv_buffer();
3297
3298	if (NULL == rb) {
3299		/*
3300		 * No buffer space available - just drop the packet
3301		 */
3302		char buf[RX_BUFF_SIZE];
3303
3304		buflen = read(fd, buf, sizeof buf);
3305		packets_dropped++;
3306		return (buflen);
3307	}
3308
3309	/* TALOS-CAN-0064: avoid signed/unsigned clashes that can lead
3310	 * to buffer overrun and memory corruption
3311	 */
3312	if (rp->datalen <= 0 || (size_t)rp->datalen > sizeof(rb->recv_space))
3313		read_count = sizeof(rb->recv_space);
3314	else
3315		read_count = (u_int)rp->datalen;
3316	do {
3317		buflen = read(fd, (char *)&rb->recv_space, read_count);
3318	} while (buflen < 0 && EINTR == errno);
3319
3320	if (buflen <= 0) {
3321		saved_errno = errno;
3322		freerecvbuf(rb);
3323		errno = saved_errno;
3324		return buflen;
3325	}
3326
3327	/*
3328	 * Got one. Mark how and when it got here,
3329	 * put it on the full list and do bookkeeping.
3330	 */
3331	rb->recv_length = buflen;
3332	rb->recv_peer = rp->srcclock;
3333	rb->dstadr = 0;
3334	rb->fd = fd;
3335	rb->recv_time = ts;
3336	rb->receiver = rp->clock_recv;
3337
3338	consumed = indicate_refclock_packet(rp, rb);
3339	if (!consumed) {
3340		rp->recvcount++;
3341		packets_received++;
3342	}
3343
3344	return buflen;
3345}
3346#endif	/* REFCLOCK */
3347
3348
3349#ifdef HAVE_PACKET_TIMESTAMP
3350/*
3351 * extract timestamps from control message buffer
3352 */
3353static l_fp
3354fetch_timestamp(
3355	struct recvbuf *	rb,
3356	struct msghdr *		msghdr,
3357	l_fp			ts
3358	)
3359{
3360	struct cmsghdr *	cmsghdr;
3361	unsigned long		ticks;
3362	double			fuzz;
3363	l_fp			lfpfuzz;
3364	l_fp			nts;
3365#ifdef DEBUG_TIMING
3366	l_fp			dts;
3367#endif
3368
3369	cmsghdr = CMSG_FIRSTHDR(msghdr);
3370	while (cmsghdr != NULL) {
3371		switch (cmsghdr->cmsg_type)
3372		{
3373#ifdef HAVE_BINTIME
3374		case SCM_BINTIME:
3375#endif  /* HAVE_BINTIME */
3376#ifdef HAVE_TIMESTAMPNS
3377		case SCM_TIMESTAMPNS:
3378#endif	/* HAVE_TIMESTAMPNS */
3379#ifdef HAVE_TIMESTAMP
3380		case SCM_TIMESTAMP:
3381#endif	/* HAVE_TIMESTAMP */
3382#if defined(HAVE_BINTIME) || defined (HAVE_TIMESTAMPNS) || defined(HAVE_TIMESTAMP)
3383			switch (cmsghdr->cmsg_type)
3384			{
3385#ifdef HAVE_BINTIME
3386			case SCM_BINTIME:
3387				{
3388					struct bintime	pbt;
3389					memcpy(&pbt, CMSG_DATA(cmsghdr), sizeof(pbt));
3390					/*
3391					 * bintime documentation is at http://phk.freebsd.dk/pubs/timecounter.pdf
3392					 */
3393					nts.l_i = pbt.sec + JAN_1970;
3394					nts.l_uf = (u_int32)(pbt.frac >> 32);
3395					if (sys_tick > measured_tick &&
3396					    sys_tick > 1e-9) {
3397						ticks = (unsigned long)(nts.l_uf / (unsigned long)(sys_tick * FRAC));
3398						nts.l_uf = (unsigned long)(ticks * (unsigned long)(sys_tick * FRAC));
3399					}
3400					DPRINTF(4, ("fetch_timestamp: system bintime network time stamp: %ld.%09lu\n",
3401						    pbt.sec, (unsigned long)((nts.l_uf / FRAC) * 1e9)));
3402				}
3403				break;
3404#endif  /* HAVE_BINTIME */
3405#ifdef HAVE_TIMESTAMPNS
3406			case SCM_TIMESTAMPNS:
3407				{
3408					struct timespec	pts;
3409					memcpy(&pts, CMSG_DATA(cmsghdr), sizeof(pts));
3410					if (sys_tick > measured_tick &&
3411					    sys_tick > 1e-9) {
3412						ticks = (unsigned long)((pts.tv_nsec * 1e-9) /
3413									sys_tick);
3414						pts.tv_nsec = (long)(ticks * 1e9 *
3415								     sys_tick);
3416					}
3417					DPRINTF(4, ("fetch_timestamp: system nsec network time stamp: %ld.%09ld\n",
3418						    pts.tv_sec, pts.tv_nsec));
3419					nts = tspec_stamp_to_lfp(pts);
3420				}
3421				break;
3422#endif	/* HAVE_TIMESTAMPNS */
3423#ifdef HAVE_TIMESTAMP
3424			case SCM_TIMESTAMP:
3425				{
3426					struct timeval	ptv;
3427					memcpy(&ptv, CMSG_DATA(cmsghdr), sizeof(ptv));
3428					if (sys_tick > measured_tick &&
3429					    sys_tick > 1e-6) {
3430						ticks = (unsigned long)((ptv.tv_usec * 1e-6) /
3431									sys_tick);
3432						ptv.tv_usec = (long)(ticks * 1e6 *
3433								    sys_tick);
3434					}
3435					DPRINTF(4, ("fetch_timestamp: system usec network time stamp: %jd.%06ld\n",
3436						    (intmax_t)ptv.tv_sec, (long)ptv.tv_usec));
3437					nts = tval_stamp_to_lfp(ptv);
3438				}
3439				break;
3440#endif  /* HAVE_TIMESTAMP */
3441			}
3442			fuzz = ntp_random() * 2. / FRAC * sys_fuzz;
3443			DTOLFP(fuzz, &lfpfuzz);
3444			L_ADD(&nts, &lfpfuzz);
3445#ifdef DEBUG_TIMING
3446			dts = ts;
3447			L_SUB(&dts, &nts);
3448			collect_timing(rb, "input processing delay", 1,
3449				       &dts);
3450			DPRINTF(4, ("fetch_timestamp: timestamp delta: %s (incl. fuzz)\n",
3451				    lfptoa(&dts, 9)));
3452#endif	/* DEBUG_TIMING */
3453			ts = nts;  /* network time stamp */
3454			break;
3455#endif	/* HAVE_BINTIME || HAVE_TIMESTAMPNS || HAVE_TIMESTAMP */
3456
3457		default:
3458			DPRINTF(4, ("fetch_timestamp: skipping control message 0x%x\n",
3459				    cmsghdr->cmsg_type));
3460		}
3461		cmsghdr = CMSG_NXTHDR(msghdr, cmsghdr);
3462	}
3463	return ts;
3464}
3465#endif	/* HAVE_PACKET_TIMESTAMP */
3466
3467
3468/*
3469 * Routine to read the network NTP packets for a specific interface
3470 * Return the number of bytes read. That way we know if we should
3471 * read it again or go on to the next one if no bytes returned
3472 */
3473static inline int
3474read_network_packet(
3475	SOCKET			fd,
3476	struct interface *	itf,
3477	l_fp			ts
3478	)
3479{
3480	GETSOCKNAME_SOCKLEN_TYPE fromlen;
3481	int buflen;
3482	register struct recvbuf *rb;
3483#ifdef HAVE_PACKET_TIMESTAMP
3484	struct msghdr msghdr;
3485	struct iovec iovec;
3486	char control[CMSG_BUFSIZE];
3487#endif
3488
3489	/*
3490	 * Get a buffer and read the frame.  If we
3491	 * haven't got a buffer, or this is received
3492	 * on a disallowed socket, just dump the
3493	 * packet.
3494	 */
3495
3496	rb = get_free_recv_buffer();
3497	if (NULL == rb || itf->ignore_packets) {
3498		char buf[RX_BUFF_SIZE];
3499		sockaddr_u from;
3500
3501		if (rb != NULL)
3502			freerecvbuf(rb);
3503
3504		fromlen = sizeof(from);
3505		buflen = recvfrom(fd, buf, sizeof(buf), 0,
3506				  &from.sa, &fromlen);
3507		DPRINTF(4, ("%s on (%lu) fd=%d from %s\n",
3508			(itf->ignore_packets)
3509			    ? "ignore"
3510			    : "drop",
3511			free_recvbuffs(), fd, stoa(&from)));
3512		if (itf->ignore_packets)
3513			packets_ignored++;
3514		else
3515			packets_dropped++;
3516		return (buflen);
3517	}
3518
3519	fromlen = sizeof(rb->recv_srcadr);
3520
3521#ifndef HAVE_PACKET_TIMESTAMP
3522	rb->recv_length = recvfrom(fd, (char *)&rb->recv_space,
3523				   sizeof(rb->recv_space), 0,
3524				   &rb->recv_srcadr.sa, &fromlen);
3525#else
3526	iovec.iov_base        = &rb->recv_space;
3527	iovec.iov_len         = sizeof(rb->recv_space);
3528	msghdr.msg_name       = &rb->recv_srcadr;
3529	msghdr.msg_namelen    = fromlen;
3530	msghdr.msg_iov        = &iovec;
3531	msghdr.msg_iovlen     = 1;
3532	msghdr.msg_control    = (void *)&control;
3533	msghdr.msg_controllen = sizeof(control);
3534	msghdr.msg_flags      = 0;
3535	rb->recv_length       = recvmsg(fd, &msghdr, 0);
3536#endif
3537
3538	buflen = rb->recv_length;
3539
3540	if (buflen == 0 || (buflen == -1 &&
3541	    (EWOULDBLOCK == errno
3542#ifdef EAGAIN
3543	     || EAGAIN == errno
3544#endif
3545	     ))) {
3546		freerecvbuf(rb);
3547		return (buflen);
3548	} else if (buflen < 0) {
3549		msyslog(LOG_ERR, "recvfrom(%s) fd=%d: %m",
3550			stoa(&rb->recv_srcadr), fd);
3551		DPRINTF(5, ("read_network_packet: fd=%d dropped (bad recvfrom)\n",
3552			    fd));
3553		freerecvbuf(rb);
3554		return (buflen);
3555	}
3556
3557	DPRINTF(3, ("read_network_packet: fd=%d length %d from %s\n",
3558		    fd, buflen, stoa(&rb->recv_srcadr)));
3559
3560#ifdef ENABLE_BUG3020_FIX
3561	if (ISREFCLOCKADR(&rb->recv_srcadr)) {
3562		msyslog(LOG_ERR, "recvfrom(%s) fd=%d: refclock srcadr on a network interface!",
3563			stoa(&rb->recv_srcadr), fd);
3564		DPRINTF(1, ("read_network_packet: fd=%d dropped (refclock srcadr))\n",
3565			    fd));
3566		packets_dropped++;
3567		freerecvbuf(rb);
3568		return (buflen);
3569	}
3570#endif
3571
3572	/*
3573	** Bug 2672: Some OSes (MacOSX and Linux) don't block spoofed ::1
3574	*/
3575
3576	if (AF_INET6 == itf->family) {
3577		DPRINTF(2, ("Got an IPv6 packet, from <%s> (%d) to <%s> (%d)\n",
3578			stoa(&rb->recv_srcadr),
3579			IN6_IS_ADDR_LOOPBACK(PSOCK_ADDR6(&rb->recv_srcadr)),
3580			stoa(&itf->sin),
3581			!IN6_IS_ADDR_LOOPBACK(PSOCK_ADDR6(&itf->sin))
3582			));
3583
3584		if (   IN6_IS_ADDR_LOOPBACK(PSOCK_ADDR6(&rb->recv_srcadr))
3585		    && !IN6_IS_ADDR_LOOPBACK(PSOCK_ADDR6(&itf->sin))
3586		   ) {
3587			packets_dropped++;
3588			DPRINTF(2, ("DROPPING that packet\n"));
3589			freerecvbuf(rb);
3590			return buflen;
3591		}
3592		DPRINTF(2, ("processing that packet\n"));
3593	}
3594
3595	/*
3596	 * Got one.  Mark how and when it got here,
3597	 * put it on the full list and do bookkeeping.
3598	 */
3599	rb->dstadr = itf;
3600	rb->fd = fd;
3601#ifdef HAVE_PACKET_TIMESTAMP
3602	/* pick up a network time stamp if possible */
3603	ts = fetch_timestamp(rb, &msghdr, ts);
3604#endif
3605	rb->recv_time = ts;
3606	rb->receiver = receive;
3607
3608	add_full_recv_buffer(rb);
3609
3610	itf->received++;
3611	packets_received++;
3612	return (buflen);
3613}
3614
3615/*
3616 * attempt to handle io (select()/signaled IO)
3617 */
3618void
3619io_handler(void)
3620{
3621#  ifndef HAVE_SIGNALED_IO
3622	fd_set rdfdes;
3623	int nfound;
3624
3625	/*
3626	 * Use select() on all on all input fd's for unlimited
3627	 * time.  select() will terminate on SIGALARM or on the
3628	 * reception of input.	Using select() means we can't do
3629	 * robust signal handling and we get a potential race
3630	 * between checking for alarms and doing the select().
3631	 * Mostly harmless, I think.
3632	 */
3633	/*
3634	 * On VMS, I suspect that select() can't be interrupted
3635	 * by a "signal" either, so I take the easy way out and
3636	 * have select() time out after one second.
3637	 * System clock updates really aren't time-critical,
3638	 * and - lacking a hardware reference clock - I have
3639	 * yet to learn about anything else that is.
3640	 */
3641	++handler_calls;
3642	rdfdes = activefds;
3643#   if !defined(VMS) && !defined(SYS_VXWORKS)
3644	nfound = select(maxactivefd + 1, &rdfdes, NULL,
3645			NULL, NULL);
3646#   else	/* VMS, VxWorks */
3647	/* make select() wake up after one second */
3648	{
3649		struct timeval t1;
3650		t1.tv_sec  = 1;
3651		t1.tv_usec = 0;
3652		nfound = select(maxactivefd + 1,
3653				&rdfdes, NULL, NULL,
3654				&t1);
3655	}
3656#   endif	/* VMS, VxWorks */
3657	if (nfound < 0 && sanitize_fdset(errno)) {
3658		struct timeval t1;
3659		t1.tv_sec  = 0;
3660		t1.tv_usec = 0;
3661		rdfdes = activefds;
3662		nfound = select(maxactivefd + 1,
3663				&rdfdes, NULL, NULL,
3664				&t1);
3665	}
3666
3667	if (nfound > 0) {
3668		l_fp ts;
3669
3670		get_systime(&ts);
3671
3672		input_handler_scan(&ts, &rdfdes);
3673	} else if (nfound == -1 && errno != EINTR) {
3674		msyslog(LOG_ERR, "select() error: %m");
3675	}
3676#   ifdef DEBUG
3677	else if (debug > 4) {
3678		msyslog(LOG_DEBUG, "select(): nfound=%d, error: %m", nfound);
3679	} else {
3680		DPRINTF(3, ("select() returned %d: %m\n", nfound));
3681	}
3682#   endif /* DEBUG */
3683#  else /* HAVE_SIGNALED_IO */
3684	wait_for_signal();
3685#  endif /* HAVE_SIGNALED_IO */
3686}
3687
3688#ifdef HAVE_SIGNALED_IO
3689/*
3690 * input_handler - receive packets asynchronously
3691 *
3692 * ALWAYS IN SIGNAL HANDLER CONTEXT -- only async-safe functions allowed!
3693 */
3694static RETSIGTYPE
3695input_handler(
3696	l_fp *	cts
3697	)
3698{
3699	int		n;
3700	struct timeval	tvzero;
3701	fd_set		fds;
3702
3703	++handler_calls;
3704
3705	/*
3706	 * Do a poll to see who has data
3707	 */
3708
3709	fds = activefds;
3710	tvzero.tv_sec = tvzero.tv_usec = 0;
3711
3712	n = select(maxactivefd + 1, &fds, NULL, NULL, &tvzero);
3713	if (n < 0 && sanitize_fdset(errno)) {
3714		fds = activefds;
3715		tvzero.tv_sec = tvzero.tv_usec = 0;
3716		n = select(maxactivefd + 1, &fds, NULL, NULL, &tvzero);
3717	}
3718	if (n > 0)
3719		input_handler_scan(cts, &fds);
3720}
3721#endif /* HAVE_SIGNALED_IO */
3722
3723
3724/*
3725 * Try to sanitize the global FD set
3726 *
3727 * SIGNAL HANDLER CONTEXT if HAVE_SIGNALED_IO, ordinary userspace otherwise
3728 */
3729static int/*BOOL*/
3730sanitize_fdset(
3731	int	errc
3732	)
3733{
3734	int j, b, maxscan;
3735
3736#  ifndef HAVE_SIGNALED_IO
3737	/*
3738	 * extended FAU debugging output
3739	 */
3740	if (errc != EINTR) {
3741		msyslog(LOG_ERR,
3742			"select(%d, %s, 0L, 0L, &0.0) error: %m",
3743			maxactivefd + 1,
3744			fdbits(maxactivefd, &activefds));
3745	}
3746#   endif
3747
3748	if (errc != EBADF)
3749		return FALSE;
3750
3751	/* if we have oviously bad FDs, try to sanitize the FD set. */
3752	for (j = 0, maxscan = 0; j <= maxactivefd; j++) {
3753		if (FD_ISSET(j, &activefds)) {
3754			if (-1 != read(j, &b, 0)) {
3755				maxscan = j;
3756				continue;
3757			}
3758#		    ifndef HAVE_SIGNALED_IO
3759			msyslog(LOG_ERR,
3760				"Removing bad file descriptor %d from select set",
3761				j);
3762#		    endif
3763			FD_CLR(j, &activefds);
3764		}
3765	}
3766	if (maxactivefd != maxscan)
3767		maxactivefd = maxscan;
3768	return TRUE;
3769}
3770
3771/*
3772 * scan the known FDs (clocks, servers, ...) for presence in a 'fd_set'.
3773 *
3774 * SIGNAL HANDLER CONTEXT if HAVE_SIGNALED_IO, ordinary userspace otherwise
3775 */
3776static void
3777input_handler_scan(
3778	const l_fp *	cts,
3779	const fd_set *	pfds
3780	)
3781{
3782	int		buflen;
3783	u_int		idx;
3784	int		doing;
3785	SOCKET		fd;
3786	blocking_child *c;
3787	l_fp		ts;	/* Timestamp at BOselect() gob */
3788
3789#if defined(DEBUG_TIMING)
3790	l_fp		ts_e;	/* Timestamp at EOselect() gob */
3791#endif
3792	endpt *		ep;
3793#ifdef REFCLOCK
3794	struct refclockio *rp;
3795	int		saved_errno;
3796	const char *	clk;
3797#endif
3798#ifdef HAS_ROUTING_SOCKET
3799	struct asyncio_reader *	asyncio_reader;
3800	struct asyncio_reader *	next_asyncio_reader;
3801#endif
3802
3803	++handler_pkts;
3804	ts = *cts;
3805
3806#ifdef REFCLOCK
3807	/*
3808	 * Check out the reference clocks first, if any
3809	 */
3810
3811	for (rp = refio; rp != NULL; rp = rp->next) {
3812		fd = rp->fd;
3813
3814		if (!FD_ISSET(fd, pfds))
3815			continue;
3816		buflen = read_refclock_packet(fd, rp, ts);
3817		/*
3818		 * The first read must succeed after select() indicates
3819		 * readability, or we've reached a permanent EOF.
3820		 * http://bugs.ntp.org/1732 reported ntpd munching CPU
3821		 * after a USB GPS was unplugged because select was
3822		 * indicating EOF but ntpd didn't remove the descriptor
3823		 * from the activefds set.
3824		 */
3825		if (buflen < 0 && EAGAIN != errno) {
3826			saved_errno = errno;
3827			clk = refnumtoa(&rp->srcclock->srcadr);
3828			errno = saved_errno;
3829			msyslog(LOG_ERR, "%s read: %m", clk);
3830			maintain_activefds(fd, TRUE);
3831		} else if (0 == buflen) {
3832			clk = refnumtoa(&rp->srcclock->srcadr);
3833			msyslog(LOG_ERR, "%s read EOF", clk);
3834			maintain_activefds(fd, TRUE);
3835		} else {
3836			/* drain any remaining refclock input */
3837			do {
3838				buflen = read_refclock_packet(fd, rp, ts);
3839			} while (buflen > 0);
3840		}
3841	}
3842#endif /* REFCLOCK */
3843
3844	/*
3845	 * Loop through the interfaces looking for data to read.
3846	 */
3847	for (ep = ep_list; ep != NULL; ep = ep->elink) {
3848		for (doing = 0; doing < 2; doing++) {
3849			if (!doing) {
3850				fd = ep->fd;
3851			} else {
3852				if (!(ep->flags & INT_BCASTOPEN))
3853					break;
3854				fd = ep->bfd;
3855			}
3856			if (fd < 0)
3857				continue;
3858			if (FD_ISSET(fd, pfds))
3859				do {
3860					buflen = read_network_packet(
3861							fd, ep, ts);
3862				} while (buflen > 0);
3863			/* Check more interfaces */
3864		}
3865	}
3866
3867#ifdef HAS_ROUTING_SOCKET
3868	/*
3869	 * scan list of asyncio readers - currently only used for routing sockets
3870	 */
3871	asyncio_reader = asyncio_reader_list;
3872
3873	while (asyncio_reader != NULL) {
3874		/* callback may unlink and free asyncio_reader */
3875		next_asyncio_reader = asyncio_reader->link;
3876		if (FD_ISSET(asyncio_reader->fd, pfds))
3877			(*asyncio_reader->receiver)(asyncio_reader);
3878		asyncio_reader = next_asyncio_reader;
3879	}
3880#endif /* HAS_ROUTING_SOCKET */
3881
3882	/*
3883	 * Check for a response from a blocking child
3884	 */
3885	for (idx = 0; idx < blocking_children_alloc; idx++) {
3886		c = blocking_children[idx];
3887		if (NULL == c || -1 == c->resp_read_pipe)
3888			continue;
3889		if (FD_ISSET(c->resp_read_pipe, pfds)) {
3890			++c->resp_ready_seen;
3891			++blocking_child_ready_seen;
3892		}
3893	}
3894
3895	/* We've done our work */
3896#if defined(DEBUG_TIMING)
3897	get_systime(&ts_e);
3898	/*
3899	 * (ts_e - ts) is the amount of time we spent
3900	 * processing this gob of file descriptors.  Log
3901	 * it.
3902	 */
3903	L_SUB(&ts_e, &ts);
3904	collect_timing(NULL, "input handler", 1, &ts_e);
3905	if (debug > 3)
3906		msyslog(LOG_DEBUG,
3907			"input_handler: Processed a gob of fd's in %s msec",
3908			lfptoms(&ts_e, 6));
3909#endif /* DEBUG_TIMING */
3910}
3911#endif /* !HAVE_IO_COMPLETION_PORT */
3912
3913/*
3914 * find an interface suitable for the src address
3915 */
3916endpt *
3917select_peerinterface(
3918	struct peer *	peer,
3919	sockaddr_u *	srcadr,
3920	endpt *		dstadr
3921	)
3922{
3923	endpt *ep;
3924#ifndef SIM
3925	endpt *wild;
3926
3927	wild = ANY_INTERFACE_CHOOSE(srcadr);
3928
3929	/*
3930	 * Initialize the peer structure and dance the interface jig.
3931	 * Reference clocks step the loopback waltz, the others
3932	 * squaredance around the interface list looking for a buddy. If
3933	 * the dance peters out, there is always the wildcard interface.
3934	 * This might happen in some systems and would preclude proper
3935	 * operation with public key cryptography.
3936	 */
3937	if (ISREFCLOCKADR(srcadr)) {
3938		ep = loopback_interface;
3939	} else if (peer->cast_flags &
3940		   (MDF_BCLNT | MDF_ACAST | MDF_MCAST | MDF_BCAST)) {
3941		ep = findbcastinter(srcadr);
3942		if (ep != NULL)
3943			DPRINTF(4, ("Found *-cast interface %s for address %s\n",
3944				stoa(&ep->sin), stoa(srcadr)));
3945		else
3946			DPRINTF(4, ("No *-cast local address found for address %s\n",
3947				stoa(srcadr)));
3948	} else {
3949		ep = dstadr;
3950		if (NULL == ep)
3951			ep = wild;
3952	}
3953	/*
3954	 * If it is a multicast address, findbcastinter() may not find
3955	 * it.  For unicast, we get to find the interface when dstadr is
3956	 * given to us as the wildcard (ANY_INTERFACE_CHOOSE).  Either
3957	 * way, try a little harder.
3958	 */
3959	if (wild == ep)
3960		ep = findinterface(srcadr);
3961	/*
3962	 * we do not bind to the wildcard interfaces for output
3963	 * as our (network) source address would be undefined and
3964	 * crypto will not work without knowing the own transmit address
3965	 */
3966	if (ep != NULL && INT_WILDCARD & ep->flags)
3967		if (!accept_wildcard_if_for_winnt)
3968			ep = NULL;
3969#else	/* SIM follows */
3970	ep = loopback_interface;
3971#endif
3972
3973	return ep;
3974}
3975
3976
3977/*
3978 * findinterface - find local interface corresponding to address
3979 */
3980endpt *
3981findinterface(
3982	sockaddr_u *addr
3983	)
3984{
3985	endpt *iface;
3986
3987	iface = findlocalinterface(addr, INT_WILDCARD, 0);
3988
3989	if (NULL == iface) {
3990		DPRINTF(4, ("Found no interface for address %s - returning wildcard\n",
3991			    stoa(addr)));
3992
3993		iface = ANY_INTERFACE_CHOOSE(addr);
3994	} else
3995		DPRINTF(4, ("Found interface #%d %s for address %s\n",
3996			    iface->ifnum, iface->name, stoa(addr)));
3997
3998	return iface;
3999}
4000
4001/*
4002 * findlocalinterface - find local interface corresponding to addr,
4003 * which does not have any of flags set.  If bast is nonzero, addr is
4004 * a broadcast address.
4005 *
4006 * This code attempts to find the local sending address for an outgoing
4007 * address by connecting a new socket to destinationaddress:NTP_PORT
4008 * and reading the sockname of the resulting connect.
4009 * the complicated sequence simulates the routing table lookup
4010 * for to first hop without duplicating any of the routing logic into
4011 * ntpd. preferably we would have used an API call - but its not there -
4012 * so this is the best we can do here short of duplicating to entire routing
4013 * logic in ntpd which would be a silly and really unportable thing to do.
4014 *
4015 */
4016static endpt *
4017findlocalinterface(
4018	sockaddr_u *	addr,
4019	int		flags,
4020	int		bcast
4021	)
4022{
4023	GETSOCKNAME_SOCKLEN_TYPE	sockaddrlen;
4024	endpt *				iface;
4025	sockaddr_u			saddr;
4026	SOCKET				s;
4027	int				rtn;
4028	int				on;
4029
4030	DPRINTF(4, ("Finding interface for addr %s in list of addresses\n",
4031		    stoa(addr)));
4032
4033	/* [Bug 3437] The dummy POOL peer comes in with an AF of
4034	 * zero. This is bound to fail, but on the way to nowhere it
4035	 * triggers a security incident on SELinux.
4036	 *
4037	 * Checking the condition and failing early is probably a good
4038	 * advice, and even saves us some syscalls in that case.
4039	 * Thanks to Miroslav Lichvar for finding this.
4040	 */
4041	if (AF_UNSPEC == AF(addr))
4042		return NULL;
4043
4044	s = socket(AF(addr), SOCK_DGRAM, 0);
4045	if (INVALID_SOCKET == s)
4046		return NULL;
4047
4048	/*
4049	 * If we are looking for broadcast interface we need to set this
4050	 * socket to allow broadcast
4051	 */
4052	if (bcast) {
4053		on = 1;
4054		if (SOCKET_ERROR == setsockopt(s, SOL_SOCKET,
4055						SO_BROADCAST,
4056						(void *)&on,
4057						sizeof(on))) {
4058			closesocket(s);
4059			return NULL;
4060		}
4061	}
4062
4063	rtn = connect(s, &addr->sa, SOCKLEN(addr));
4064	if (SOCKET_ERROR == rtn) {
4065		closesocket(s);
4066		return NULL;
4067	}
4068
4069	sockaddrlen = sizeof(saddr);
4070	rtn = getsockname(s, &saddr.sa, &sockaddrlen);
4071	closesocket(s);
4072	if (SOCKET_ERROR == rtn)
4073		return NULL;
4074
4075	DPRINTF(4, ("findlocalinterface: kernel maps %s to %s\n",
4076		    stoa(addr), stoa(&saddr)));
4077
4078	iface = getinterface(&saddr, flags);
4079
4080	/*
4081	 * if we didn't find an exact match on saddr, find the closest
4082	 * available local address.  This handles the case of the
4083	 * address suggested by the kernel being excluded by nic rules
4084	 * or the user's -I and -L options to ntpd.
4085	 * See http://bugs.ntp.org/1184 and http://bugs.ntp.org/1683
4086	 * for more background.
4087	 */
4088	if (NULL == iface || iface->ignore_packets)
4089		iface = findclosestinterface(&saddr,
4090					     flags | INT_LOOPBACK);
4091
4092	/* Don't use an interface which will ignore replies */
4093	if (iface != NULL && iface->ignore_packets)
4094		iface = NULL;
4095
4096	return iface;
4097}
4098
4099
4100/*
4101 * findclosestinterface
4102 *
4103 * If there are -I/--interface or -L/novirtualips command-line options,
4104 * or "nic" or "interface" rules in ntp.conf, findlocalinterface() may
4105 * find the kernel's preferred local address for a given peer address is
4106 * administratively unavailable to ntpd, and punt to this routine's more
4107 * expensive search.
4108 *
4109 * Find the numerically closest local address to the one connect()
4110 * suggested.  This matches an address on the same subnet first, as
4111 * needed by Bug 1184, and provides a consistent choice if there are
4112 * multiple feasible local addresses, regardless of the order ntpd
4113 * enumerated them.
4114 */
4115endpt *
4116findclosestinterface(
4117	sockaddr_u *	addr,
4118	int		flags
4119	)
4120{
4121	endpt *		ep;
4122	endpt *		winner;
4123	sockaddr_u	addr_dist;
4124	sockaddr_u	min_dist;
4125
4126	ZERO_SOCK(&min_dist);
4127	winner = NULL;
4128
4129	for (ep = ep_list; ep != NULL; ep = ep->elink) {
4130		if (ep->ignore_packets ||
4131		    AF(addr) != ep->family ||
4132		    flags & ep->flags)
4133			continue;
4134
4135		calc_addr_distance(&addr_dist, addr, &ep->sin);
4136		if (NULL == winner ||
4137		    -1 == cmp_addr_distance(&addr_dist, &min_dist)) {
4138			min_dist = addr_dist;
4139			winner = ep;
4140		}
4141	}
4142	if (NULL == winner)
4143		DPRINTF(4, ("findclosestinterface(%s) failed\n",
4144			    stoa(addr)));
4145	else
4146		DPRINTF(4, ("findclosestinterface(%s) -> %s\n",
4147			    stoa(addr), stoa(&winner->sin)));
4148
4149	return winner;
4150}
4151
4152
4153/*
4154 * calc_addr_distance - calculate the distance between two addresses,
4155 *			the absolute value of the difference between
4156 *			the addresses numerically, stored as an address.
4157 */
4158static void
4159calc_addr_distance(
4160	sockaddr_u *		dist,
4161	const sockaddr_u *	a1,
4162	const sockaddr_u *	a2
4163	)
4164{
4165	u_int32	a1val;
4166	u_int32	a2val;
4167	u_int32	v4dist;
4168	int	found_greater;
4169	int	a1_greater;
4170	int	i;
4171
4172	REQUIRE(AF(a1) == AF(a2));
4173
4174	ZERO_SOCK(dist);
4175	AF(dist) = AF(a1);
4176
4177	/* v4 can be done a bit simpler */
4178	if (IS_IPV4(a1)) {
4179		a1val = SRCADR(a1);
4180		a2val = SRCADR(a2);
4181		v4dist = (a1val > a2val)
4182			     ? a1val - a2val
4183			     : a2val - a1val;
4184		SET_ADDR4(dist, v4dist);
4185
4186		return;
4187	}
4188
4189	found_greater = FALSE;
4190	a1_greater = FALSE;	/* suppress pot. uninit. warning */
4191	for (i = 0; i < (int)sizeof(NSRCADR6(a1)); i++) {
4192		if (!found_greater &&
4193		    NSRCADR6(a1)[i] != NSRCADR6(a2)[i]) {
4194			found_greater = TRUE;
4195			a1_greater = (NSRCADR6(a1)[i] > NSRCADR6(a2)[i]);
4196		}
4197		if (!found_greater) {
4198			NSRCADR6(dist)[i] = 0;
4199		} else {
4200			if (a1_greater)
4201				NSRCADR6(dist)[i] = NSRCADR6(a1)[i] -
4202						    NSRCADR6(a2)[i];
4203			else
4204				NSRCADR6(dist)[i] = NSRCADR6(a2)[i] -
4205						    NSRCADR6(a1)[i];
4206		}
4207	}
4208}
4209
4210
4211/*
4212 * cmp_addr_distance - compare two address distances, returning -1, 0,
4213 *		       1 to indicate their relationship.
4214 */
4215static int
4216cmp_addr_distance(
4217	const sockaddr_u *	d1,
4218	const sockaddr_u *	d2
4219	)
4220{
4221	int	i;
4222
4223	REQUIRE(AF(d1) == AF(d2));
4224
4225	if (IS_IPV4(d1)) {
4226		if (SRCADR(d1) < SRCADR(d2))
4227			return -1;
4228		else if (SRCADR(d1) == SRCADR(d2))
4229			return 0;
4230		else
4231			return 1;
4232	}
4233
4234	for (i = 0; i < (int)sizeof(NSRCADR6(d1)); i++) {
4235		if (NSRCADR6(d1)[i] < NSRCADR6(d2)[i])
4236			return -1;
4237		else if (NSRCADR6(d1)[i] > NSRCADR6(d2)[i])
4238			return 1;
4239	}
4240
4241	return 0;
4242}
4243
4244
4245
4246/*
4247 * fetch an interface structure the matches the
4248 * address and has the given flags NOT set
4249 */
4250endpt *
4251getinterface(
4252	sockaddr_u *	addr,
4253	u_int32		flags
4254	)
4255{
4256	endpt *iface;
4257
4258	iface = find_addr_in_list(addr);
4259
4260	if (iface != NULL && (iface->flags & flags))
4261		iface = NULL;
4262
4263	return iface;
4264}
4265
4266
4267/*
4268 * findbcastinter - find broadcast interface corresponding to address
4269 */
4270endpt *
4271findbcastinter(
4272	sockaddr_u *addr
4273	)
4274{
4275	endpt *	iface;
4276
4277	iface = NULL;
4278#if !defined(MPE) && (defined(SIOCGIFCONF) || defined(SYS_WINNT))
4279	DPRINTF(4, ("Finding broadcast/multicast interface for addr %s in list of addresses\n",
4280		    stoa(addr)));
4281
4282	iface = findlocalinterface(addr, INT_LOOPBACK | INT_WILDCARD,
4283				   1);
4284	if (iface != NULL) {
4285		DPRINTF(4, ("Easily found bcast-/mcast- interface index #%d %s\n",
4286			    iface->ifnum, iface->name));
4287		return iface;
4288	}
4289
4290	/*
4291	 * plan B - try to find something reasonable in our lists in
4292	 * case kernel lookup doesn't help
4293	 */
4294	for (iface = ep_list; iface != NULL; iface = iface->elink) {
4295		if (iface->flags & INT_WILDCARD)
4296			continue;
4297
4298		/* Don't bother with ignored interfaces */
4299		if (iface->ignore_packets)
4300			continue;
4301
4302		/*
4303		 * First look if this is the correct family
4304		 */
4305		if(AF(&iface->sin) != AF(addr))
4306			continue;
4307
4308		/* Skip the loopback addresses */
4309		if (iface->flags & INT_LOOPBACK)
4310			continue;
4311
4312		/*
4313		 * If we are looking to match a multicast address and
4314		 * this interface is one...
4315		 */
4316		if (addr_ismulticast(addr)
4317		    && (iface->flags & INT_MULTICAST)) {
4318#ifdef INCLUDE_IPV6_SUPPORT
4319			/*
4320			 * ...it is the winner unless we're looking for
4321			 * an interface to use for link-local multicast
4322			 * and its address is not link-local.
4323			 */
4324			if (IS_IPV6(addr)
4325			    && IN6_IS_ADDR_MC_LINKLOCAL(PSOCK_ADDR6(addr))
4326			    && !IN6_IS_ADDR_LINKLOCAL(PSOCK_ADDR6(&iface->sin)))
4327				continue;
4328#endif
4329			break;
4330		}
4331
4332		/*
4333		 * We match only those interfaces marked as
4334		 * broadcastable and either the explicit broadcast
4335		 * address or the network portion of the IP address.
4336		 * Sloppy.
4337		 */
4338		if (IS_IPV4(addr)) {
4339			if (SOCK_EQ(&iface->bcast, addr))
4340				break;
4341
4342			if ((NSRCADR(&iface->sin) & NSRCADR(&iface->mask))
4343			    == (NSRCADR(addr)	  & NSRCADR(&iface->mask)))
4344				break;
4345		}
4346#ifdef INCLUDE_IPV6_SUPPORT
4347		else if (IS_IPV6(addr)) {
4348			if (SOCK_EQ(&iface->bcast, addr))
4349				break;
4350
4351			if (SOCK_EQ(netof(&iface->sin), netof(addr)))
4352				break;
4353		}
4354#endif
4355	}
4356#endif /* SIOCGIFCONF */
4357	if (NULL == iface) {
4358		DPRINTF(4, ("No bcast interface found for %s\n",
4359			    stoa(addr)));
4360		iface = ANY_INTERFACE_CHOOSE(addr);
4361	} else {
4362		DPRINTF(4, ("Found bcast-/mcast- interface index #%d %s\n",
4363			    iface->ifnum, iface->name));
4364	}
4365
4366	return iface;
4367}
4368
4369
4370/*
4371 * io_clr_stats - clear I/O module statistics
4372 */
4373void
4374io_clr_stats(void)
4375{
4376	packets_dropped = 0;
4377	packets_ignored = 0;
4378	packets_received = 0;
4379	packets_sent = 0;
4380	packets_notsent = 0;
4381
4382	handler_calls = 0;
4383	handler_pkts = 0;
4384	io_timereset = current_time;
4385}
4386
4387
4388#ifdef REFCLOCK
4389/*
4390 * io_addclock - add a reference clock to the list and arrange that we
4391 *				 get SIGIO interrupts from it.
4392 */
4393int
4394io_addclock(
4395	struct refclockio *rio
4396	)
4397{
4398	BLOCKIO();
4399
4400	/*
4401	 * Stuff the I/O structure in the list and mark the descriptor
4402	 * in use.  There is a harmless (I hope) race condition here.
4403	 */
4404	rio->active = TRUE;
4405
4406# ifdef HAVE_SIGNALED_IO
4407	if (init_clock_sig(rio)) {
4408		UNBLOCKIO();
4409		return 0;
4410	}
4411# elif defined(HAVE_IO_COMPLETION_PORT)
4412	if (!io_completion_port_add_clock_io(rio)) {
4413		UNBLOCKIO();
4414		return 0;
4415	}
4416# endif
4417
4418	/*
4419	 * enqueue
4420	 */
4421	LINK_SLIST(refio, rio, next);
4422
4423	/*
4424	 * register fd
4425	 */
4426	add_fd_to_list(rio->fd, FD_TYPE_FILE);
4427
4428	UNBLOCKIO();
4429	return 1;
4430}
4431
4432
4433/*
4434 * io_closeclock - close the clock in the I/O structure given
4435 */
4436void
4437io_closeclock(
4438	struct refclockio *rio
4439	)
4440{
4441	struct refclockio *unlinked;
4442
4443	BLOCKIO();
4444
4445	/*
4446	 * Remove structure from the list
4447	 */
4448	rio->active = FALSE;
4449	UNLINK_SLIST(unlinked, refio, rio, next, struct refclockio);
4450	if (NULL != unlinked) {
4451		/* Close the descriptor. The order of operations is
4452		 * important here in case of async / overlapped IO:
4453		 * only after we have removed the clock from the
4454		 * IO completion port we can be sure no further
4455		 * input is queued. So...
4456		 *  - we first disable feeding to the queu by removing
4457		 *    the clock from the IO engine
4458		 *  - close the file (which brings down any IO on it)
4459		 *  - clear the buffer from results for this fd
4460		 */
4461#	    ifdef HAVE_IO_COMPLETION_PORT
4462		io_completion_port_remove_clock_io(rio);
4463#	    endif
4464		close_and_delete_fd_from_list(rio->fd);
4465		purge_recv_buffers_for_fd(rio->fd);
4466		rio->fd = -1;
4467	}
4468
4469	UNBLOCKIO();
4470}
4471#endif	/* REFCLOCK */
4472
4473
4474/*
4475 * On NT a SOCKET is an unsigned int so we cannot possibly keep it in
4476 * an array. So we use one of the ISC_LIST functions to hold the
4477 * socket value and use that when we want to enumerate it.
4478 *
4479 * This routine is called by the forked intres child process to close
4480 * all open sockets.  On Windows there's no need as intres runs in
4481 * the same process as a thread.
4482 */
4483#ifndef SYS_WINNT
4484void
4485kill_asyncio(
4486	int	startfd
4487	)
4488{
4489	BLOCKIO();
4490
4491	/*
4492	 * In the child process we do not maintain activefds and
4493	 * maxactivefd.  Zeroing maxactivefd disables code which
4494	 * maintains it in close_and_delete_fd_from_list().
4495	 */
4496	maxactivefd = 0;
4497
4498	while (fd_list != NULL)
4499		close_and_delete_fd_from_list(fd_list->fd);
4500
4501	UNBLOCKIO();
4502}
4503#endif	/* !SYS_WINNT */
4504
4505
4506/*
4507 * Add and delete functions for the list of open sockets
4508 */
4509static void
4510add_fd_to_list(
4511	SOCKET fd,
4512	enum desc_type type
4513	)
4514{
4515	vsock_t *lsock = emalloc(sizeof(*lsock));
4516
4517	lsock->fd = fd;
4518	lsock->type = type;
4519
4520	LINK_SLIST(fd_list, lsock, link);
4521	maintain_activefds(fd, 0);
4522}
4523
4524
4525static void
4526close_and_delete_fd_from_list(
4527	SOCKET fd
4528	)
4529{
4530	vsock_t *lsock;
4531
4532	UNLINK_EXPR_SLIST(lsock, fd_list, fd ==
4533	    UNLINK_EXPR_SLIST_CURRENT()->fd, link, vsock_t);
4534
4535	if (NULL == lsock)
4536		return;
4537
4538	switch (lsock->type) {
4539
4540	case FD_TYPE_SOCKET:
4541		closesocket(lsock->fd);
4542		break;
4543
4544	case FD_TYPE_FILE:
4545		closeserial((int)lsock->fd);
4546		break;
4547
4548	default:
4549		msyslog(LOG_ERR,
4550			"internal error - illegal descriptor type %d - EXITING",
4551			(int)lsock->type);
4552		exit(1);
4553	}
4554
4555	free(lsock);
4556	/*
4557	 * remove from activefds
4558	 */
4559	maintain_activefds(fd, 1);
4560}
4561
4562
4563static void
4564add_addr_to_list(
4565	sockaddr_u *	addr,
4566	endpt *		ep
4567	)
4568{
4569	remaddr_t *laddr;
4570
4571#ifdef DEBUG
4572	if (find_addr_in_list(addr) == NULL) {
4573#endif
4574		/* not there yet - add to list */
4575		laddr = emalloc(sizeof(*laddr));
4576		laddr->addr = *addr;
4577		laddr->ep = ep;
4578
4579		LINK_SLIST(remoteaddr_list, laddr, link);
4580
4581		DPRINTF(4, ("Added addr %s to list of addresses\n",
4582			    stoa(addr)));
4583#ifdef DEBUG
4584	} else
4585		DPRINTF(4, ("WARNING: Attempt to add duplicate addr %s to address list\n",
4586			    stoa(addr)));
4587#endif
4588}
4589
4590
4591static void
4592delete_addr_from_list(
4593	sockaddr_u *addr
4594	)
4595{
4596	remaddr_t *unlinked;
4597
4598	UNLINK_EXPR_SLIST(unlinked, remoteaddr_list, SOCK_EQ(addr,
4599		&(UNLINK_EXPR_SLIST_CURRENT()->addr)), link, remaddr_t);
4600
4601	if (unlinked != NULL) {
4602		DPRINTF(4, ("Deleted addr %s from list of addresses\n",
4603			stoa(addr)));
4604		free(unlinked);
4605	}
4606}
4607
4608
4609static void
4610delete_interface_from_list(
4611	endpt *iface
4612	)
4613{
4614	remaddr_t *unlinked;
4615
4616	for (;;) {
4617		UNLINK_EXPR_SLIST(unlinked, remoteaddr_list, iface ==
4618		    UNLINK_EXPR_SLIST_CURRENT()->ep, link,
4619		    remaddr_t);
4620
4621		if (unlinked == NULL)
4622			break;
4623		DPRINTF(4, ("Deleted addr %s for interface #%d %s from list of addresses\n",
4624			    stoa(&unlinked->addr), iface->ifnum,
4625			    iface->name));
4626		free(unlinked);
4627	}
4628}
4629
4630
4631static struct interface *
4632find_addr_in_list(
4633	sockaddr_u *addr
4634	)
4635{
4636	remaddr_t *entry;
4637
4638	DPRINTF(4, ("Searching for addr %s in list of addresses - ",
4639		    stoa(addr)));
4640
4641	for (entry = remoteaddr_list;
4642	     entry != NULL;
4643	     entry = entry->link)
4644		if (SOCK_EQ(&entry->addr, addr)) {
4645			DPRINTF(4, ("FOUND\n"));
4646			return entry->ep;
4647		}
4648
4649	DPRINTF(4, ("NOT FOUND\n"));
4650	return NULL;
4651}
4652
4653
4654/*
4655 * Find the given address with the all given flags set in the list
4656 */
4657static endpt *
4658find_flagged_addr_in_list(
4659	sockaddr_u *	addr,
4660	u_int32		flags
4661	)
4662{
4663	remaddr_t *entry;
4664
4665	DPRINTF(4, ("Finding addr %s with flags %d in list: ",
4666		    stoa(addr), flags));
4667
4668	for (entry = remoteaddr_list;
4669	     entry != NULL;
4670	     entry = entry->link)
4671
4672		if (SOCK_EQ(&entry->addr, addr)
4673		    && (entry->ep->flags & flags) == flags) {
4674
4675			DPRINTF(4, ("FOUND\n"));
4676			return entry->ep;
4677		}
4678
4679	DPRINTF(4, ("NOT FOUND\n"));
4680	return NULL;
4681}
4682
4683
4684const char *
4685localaddrtoa(
4686	endpt *la
4687	)
4688{
4689	return (NULL == la)
4690		   ? "<null>"
4691		   : stoa(&la->sin);
4692}
4693
4694
4695#ifdef HAS_ROUTING_SOCKET
4696# ifndef UPDATE_GRACE
4697#  define UPDATE_GRACE	2	/* wait UPDATE_GRACE seconds before scanning */
4698# endif
4699
4700static void
4701process_routing_msgs(struct asyncio_reader *reader)
4702{
4703	char buffer[5120];
4704	int cnt, msg_type;
4705#ifdef HAVE_RTNETLINK
4706	struct nlmsghdr *nh;
4707#else
4708	struct rt_msghdr rtm;
4709	char *p;
4710#endif
4711
4712	if (disable_dynamic_updates) {
4713		/*
4714		 * discard ourselves if we are not needed any more
4715		 * usually happens when running unprivileged
4716		 */
4717		remove_asyncio_reader(reader);
4718		delete_asyncio_reader(reader);
4719		return;
4720	}
4721
4722	cnt = read(reader->fd, buffer, sizeof(buffer));
4723
4724	if (cnt < 0) {
4725		if (errno == ENOBUFS) {
4726			msyslog(LOG_ERR,
4727				"routing socket reports: %m");
4728		} else {
4729			msyslog(LOG_ERR,
4730				"routing socket reports: %m - disabling");
4731			remove_asyncio_reader(reader);
4732			delete_asyncio_reader(reader);
4733		}
4734		return;
4735	}
4736
4737	/*
4738	 * process routing message
4739	 */
4740#ifdef HAVE_RTNETLINK
4741	for (nh = UA_PTR(struct nlmsghdr, buffer);
4742	     NLMSG_OK(nh, cnt);
4743	     nh = NLMSG_NEXT(nh, cnt)) {
4744		msg_type = nh->nlmsg_type;
4745#else
4746	for (p = buffer;
4747	     (p + sizeof(struct rt_msghdr)) <= (buffer + cnt);
4748	     p += rtm.rtm_msglen) {
4749		memcpy(&rtm, p, sizeof(rtm));
4750		if (rtm.rtm_version != RTM_VERSION) {
4751			msyslog(LOG_ERR,
4752				"version mismatch (got %d - expected %d) on routing socket - disabling",
4753				rtm.rtm_version, RTM_VERSION);
4754
4755			remove_asyncio_reader(reader);
4756			delete_asyncio_reader(reader);
4757			return;
4758		}
4759		msg_type = rtm.rtm_type;
4760#endif
4761		switch (msg_type) {
4762#ifdef RTM_NEWADDR
4763		case RTM_NEWADDR:
4764#endif
4765#ifdef RTM_DELADDR
4766		case RTM_DELADDR:
4767#endif
4768#ifdef RTM_ADD
4769		case RTM_ADD:
4770#endif
4771#ifdef RTM_DELETE
4772		case RTM_DELETE:
4773#endif
4774#ifdef RTM_REDIRECT
4775		case RTM_REDIRECT:
4776#endif
4777#ifdef RTM_CHANGE
4778		case RTM_CHANGE:
4779#endif
4780#ifdef RTM_LOSING
4781		case RTM_LOSING:
4782#endif
4783#ifdef RTM_IFINFO
4784		case RTM_IFINFO:
4785#endif
4786#ifdef RTM_IFANNOUNCE
4787		case RTM_IFANNOUNCE:
4788#endif
4789#ifdef RTM_NEWLINK
4790		case RTM_NEWLINK:
4791#endif
4792#ifdef RTM_DELLINK
4793		case RTM_DELLINK:
4794#endif
4795#ifdef RTM_NEWROUTE
4796		case RTM_NEWROUTE:
4797#endif
4798#ifdef RTM_DELROUTE
4799		case RTM_DELROUTE:
4800#endif
4801			/*
4802			 * we are keen on new and deleted addresses and
4803			 * if an interface goes up and down or routing
4804			 * changes
4805			 */
4806			DPRINTF(3, ("routing message op = %d: scheduling interface update\n",
4807				    msg_type));
4808			timer_interfacetimeout(current_time + UPDATE_GRACE);
4809			break;
4810#ifdef HAVE_RTNETLINK
4811		case NLMSG_DONE:
4812			/* end of multipart message */
4813			return;
4814#endif
4815		default:
4816			/*
4817			 * the rest doesn't bother us.
4818			 */
4819			DPRINTF(4, ("routing message op = %d: ignored\n",
4820				    msg_type));
4821			break;
4822		}
4823	}
4824}
4825
4826/*
4827 * set up routing notifications
4828 */
4829static void
4830init_async_notifications()
4831{
4832	struct asyncio_reader *reader;
4833#ifdef HAVE_RTNETLINK
4834	int fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
4835	struct sockaddr_nl sa;
4836#else
4837	int fd = socket(PF_ROUTE, SOCK_RAW, 0);
4838#endif
4839	if (fd < 0) {
4840		msyslog(LOG_ERR,
4841			"unable to open routing socket (%m) - using polled interface update");
4842		return;
4843	}
4844
4845	fd = move_fd(fd);
4846#ifdef HAVE_RTNETLINK
4847	ZERO(sa);
4848	sa.nl_family = PF_NETLINK;
4849	sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR
4850		       | RTMGRP_IPV6_IFADDR | RTMGRP_IPV4_ROUTE
4851		       | RTMGRP_IPV4_MROUTE | RTMGRP_IPV6_ROUTE
4852		       | RTMGRP_IPV6_MROUTE;
4853	if (bind(fd, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
4854		msyslog(LOG_ERR,
4855			"bind failed on routing socket (%m) - using polled interface update");
4856		return;
4857	}
4858#endif
4859	make_socket_nonblocking(fd);
4860#if defined(HAVE_SIGNALED_IO)
4861	init_socket_sig(fd);
4862#endif /* HAVE_SIGNALED_IO */
4863
4864	reader = new_asyncio_reader();
4865
4866	reader->fd = fd;
4867	reader->receiver = process_routing_msgs;
4868
4869	add_asyncio_reader(reader, FD_TYPE_SOCKET);
4870	msyslog(LOG_INFO,
4871		"Listening on routing socket on fd #%d for interface updates",
4872		fd);
4873}
4874#else
4875/* HAS_ROUTING_SOCKET not defined */
4876static void
4877init_async_notifications(void)
4878{
4879}
4880#endif
4881
4882