ntp_io.c revision 293896
1/*
2 * ntp_io.c - input/output routines for ntpd.	The socket-opening code
3 *		   was shamelessly stolen from ntpd.
4 */
5
6#ifdef HAVE_CONFIG_H
7# include <config.h>
8#endif
9
10#include <stdio.h>
11#include <signal.h>
12#ifdef HAVE_FNMATCH_H
13# include <fnmatch.h>
14# if !defined(FNM_CASEFOLD) && defined(FNM_IGNORECASE)
15#  define FNM_CASEFOLD FNM_IGNORECASE
16# endif
17#endif
18#ifdef HAVE_SYS_PARAM_H
19# include <sys/param.h>
20#endif
21#ifdef HAVE_SYS_IOCTL_H
22# include <sys/ioctl.h>
23#endif
24#ifdef HAVE_SYS_SOCKIO_H	/* UXPV: SIOC* #defines (Frank Vance <fvance@waii.com>) */
25# include <sys/sockio.h>
26#endif
27#ifdef HAVE_SYS_UIO_H
28# include <sys/uio.h>
29#endif
30
31#include "ntp_machine.h"
32#include "ntpd.h"
33#include "ntp_io.h"
34#include "iosignal.h"
35#include "ntp_lists.h"
36#include "ntp_refclock.h"
37#include "ntp_stdlib.h"
38#include "ntp_worker.h"
39#include "ntp_request.h"
40#include "ntp_assert.h"
41#include "timevalops.h"
42#include "timespecops.h"
43#include "ntpd-opts.h"
44#include "safecast.h"
45
46/* Don't include ISC's version of IPv6 variables and structures */
47#define ISC_IPV6_H 1
48#include <isc/mem.h>
49#include <isc/interfaceiter.h>
50#include <isc/netaddr.h>
51#include <isc/result.h>
52#include <isc/sockaddr.h>
53
54#ifdef SIM
55#include "ntpsim.h"
56#endif
57
58#ifdef HAS_ROUTING_SOCKET
59# include <net/route.h>
60# ifdef HAVE_RTNETLINK
61#  include <linux/rtnetlink.h>
62# endif
63#endif
64
65
66/*
67 * setsockopt does not always have the same arg declaration
68 * across all platforms. If it's not defined we make it empty
69 */
70
71#ifndef SETSOCKOPT_ARG_CAST
72#define SETSOCKOPT_ARG_CAST
73#endif
74
75extern int listen_to_virtual_ips;
76
77#ifndef IPTOS_DSCP_EF
78#define IPTOS_DSCP_EF 0xb8
79#endif
80int qos = IPTOS_DSCP_EF;	/* QoS RFC3246 */
81
82#ifdef LEAP_SMEAR
83/* TODO burnicki: This should be moved to ntp_timer.c, but if we do so
84 * we get a linker error. Since we're running out of time before the leap
85 * second occurs, we let it here where it just works.
86 */
87int leap_smear_intv;
88#endif
89
90/*
91 * NIC rule entry
92 */
93typedef struct nic_rule_tag nic_rule;
94
95struct nic_rule_tag {
96	nic_rule *	next;
97	nic_rule_action	action;
98	nic_rule_match	match_type;
99	char *		if_name;
100	sockaddr_u	addr;
101	int		prefixlen;
102};
103
104/*
105 * NIC rule listhead.  Entries are added at the head so that the first
106 * match in the list is the last matching rule specified.
107 */
108nic_rule *nic_rule_list;
109
110
111#if defined(SO_BINTIME) && defined(SCM_BINTIME) && defined(CMSG_FIRSTHDR)
112#  define HAVE_PACKET_TIMESTAMP
113#  define HAVE_BINTIME
114#  ifdef BINTIME_CTLMSGBUF_SIZE
115#   define CMSG_BUFSIZE BINTIME_CTLMSGBUF_SIZE
116#  else
117#   define CMSG_BUFSIZE  1536 /* moderate default */
118#  endif
119#elif defined(SO_TIMESTAMPNS) && defined(SCM_TIMESTAMPNS) && defined(CMSG_FIRSTHDR)
120#  define HAVE_PACKET_TIMESTAMP
121#  define HAVE_TIMESTAMPNS
122#  ifdef TIMESTAMPNS_CTLMSGBUF_SIZE
123#   define CMSG_BUFSIZE TIMESTAMPNS_CTLMSGBUF_SIZE
124#  else
125#   define CMSG_BUFSIZE  1536 /* moderate default */
126#  endif
127#elif defined(SO_TIMESTAMP) && defined(SCM_TIMESTAMP) && defined(CMSG_FIRSTHDR)
128#  define HAVE_PACKET_TIMESTAMP
129#  define HAVE_TIMESTAMP
130#  ifdef TIMESTAMP_CTLMSGBUF_SIZE
131#   define CMSG_BUFSIZE TIMESTAMP_CTLMSGBUF_SIZE
132#  else
133#   define CMSG_BUFSIZE  1536 /* moderate default */
134#  endif
135#else
136/* fill in for old/other timestamp interfaces */
137#endif
138
139#if defined(SYS_WINNT)
140#include "win32_io.h"
141#include <isc/win32os.h>
142#endif
143
144/*
145 * We do asynchronous input using the SIGIO facility.  A number of
146 * recvbuf buffers are preallocated for input.	In the signal
147 * handler we poll to see which sockets are ready and read the
148 * packets from them into the recvbuf's along with a time stamp and
149 * an indication of the source host and the interface it was received
150 * through.  This allows us to get as accurate receive time stamps
151 * as possible independent of other processing going on.
152 *
153 * We watch the number of recvbufs available to the signal handler
154 * and allocate more when this number drops below the low water
155 * mark.  If the signal handler should run out of buffers in the
156 * interim it will drop incoming frames, the idea being that it is
157 * better to drop a packet than to be inaccurate.
158 */
159
160
161/*
162 * Other statistics of possible interest
163 */
164volatile u_long packets_dropped;	/* total number of packets dropped on reception */
165volatile u_long packets_ignored;	/* packets received on wild card interface */
166volatile u_long packets_received;	/* total number of packets received */
167	 u_long packets_sent;		/* total number of packets sent */
168	 u_long packets_notsent;	/* total number of packets which couldn't be sent */
169
170volatile u_long handler_calls;	/* number of calls to interrupt handler */
171volatile u_long handler_pkts;	/* number of pkts received by handler */
172u_long io_timereset;		/* time counters were reset */
173
174/*
175 * Interface stuff
176 */
177endpt *	any_interface;		/* wildcard ipv4 interface */
178endpt *	any6_interface;		/* wildcard ipv6 interface */
179endpt *	loopback_interface;	/* loopback ipv4 interface */
180
181isc_boolean_t broadcast_client_enabled;	/* is broadcast client enabled */
182u_int sys_ifnum;			/* next .ifnum to assign */
183int ninterfaces;			/* Total number of interfaces */
184
185int disable_dynamic_updates;		/* scan interfaces once only */
186
187#ifdef REFCLOCK
188/*
189 * Refclock stuff.	We keep a chain of structures with data concerning
190 * the guys we are doing I/O for.
191 */
192static	struct refclockio *refio;
193#endif /* REFCLOCK */
194
195/*
196 * File descriptor masks etc. for call to select
197 * Not needed for I/O Completion Ports or anything outside this file
198 */
199static fd_set activefds;
200static int maxactivefd;
201
202/*
203 * bit alternating value to detect verified interfaces during an update cycle
204 */
205static  u_short		sys_interphase = 0;
206
207static endpt *	new_interface(endpt *);
208static void	add_interface(endpt *);
209static int	update_interfaces(u_short, interface_receiver_t,
210				  void *);
211static void	remove_interface(endpt *);
212static endpt *	create_interface(u_short, endpt *);
213
214static int	is_wildcard_addr	(const sockaddr_u *);
215
216/*
217 * Multicast functions
218 */
219static	isc_boolean_t	addr_ismulticast	(sockaddr_u *);
220static	isc_boolean_t	is_anycast		(sockaddr_u *,
221						 const char *);
222
223/*
224 * Not all platforms support multicast
225 */
226#ifdef MCAST
227static	isc_boolean_t	socket_multicast_enable	(endpt *, sockaddr_u *);
228static	isc_boolean_t	socket_multicast_disable(endpt *, sockaddr_u *);
229#endif
230
231#ifdef DEBUG
232static void interface_dump	(const endpt *);
233static void sockaddr_dump	(const sockaddr_u *);
234static void print_interface	(const endpt *, const char *, const char *);
235#define DPRINT_INTERFACE(level, args) do { if (debug >= (level)) { print_interface args; } } while (0)
236#else
237#define DPRINT_INTERFACE(level, args) do {} while (0)
238#endif
239
240typedef struct vsock vsock_t;
241enum desc_type { FD_TYPE_SOCKET, FD_TYPE_FILE };
242
243struct vsock {
244	vsock_t	*	link;
245	SOCKET		fd;
246	enum desc_type	type;
247};
248
249vsock_t	*fd_list;
250
251#if !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET)
252/*
253 * async notification processing (e. g. routing sockets)
254 */
255/*
256 * support for receiving data on fd that is not a refclock or a socket
257 * like e. g. routing sockets
258 */
259struct asyncio_reader {
260	struct asyncio_reader *link;		    /* the list this is being kept in */
261	SOCKET fd;				    /* fd to be read */
262	void  *data;				    /* possibly local data */
263	void (*receiver)(struct asyncio_reader *);  /* input handler */
264};
265
266struct asyncio_reader *asyncio_reader_list;
267
268static void delete_asyncio_reader (struct asyncio_reader *);
269static struct asyncio_reader *new_asyncio_reader (void);
270static void add_asyncio_reader (struct asyncio_reader *, enum desc_type);
271static void remove_asyncio_reader (struct asyncio_reader *);
272
273#endif /* !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET) */
274
275static void init_async_notifications (void);
276
277static	int	addr_eqprefix	(const sockaddr_u *, const sockaddr_u *,
278				 int);
279static int	addr_samesubnet	(const sockaddr_u *, const sockaddr_u *,
280				 const sockaddr_u *, const sockaddr_u *);
281static	int	create_sockets	(u_short);
282static	SOCKET	open_socket	(sockaddr_u *, int, int, endpt *);
283static	char *	fdbits		(int, fd_set *);
284static	void	set_reuseaddr	(int);
285static	isc_boolean_t	socket_broadcast_enable	 (struct interface *, SOCKET, sockaddr_u *);
286#ifdef  OS_MISSES_SPECIFIC_ROUTE_UPDATES
287static	isc_boolean_t	socket_broadcast_disable (struct interface *, sockaddr_u *);
288#endif
289
290typedef struct remaddr remaddr_t;
291
292struct remaddr {
293	remaddr_t *		link;
294	sockaddr_u		addr;
295	endpt *			ep;
296};
297
298remaddr_t *	remoteaddr_list;
299endpt *		ep_list;	/* complete endpt list */
300endpt *		mc4_list;	/* IPv4 mcast-capable unicast endpts */
301endpt *		mc6_list;	/* IPv6 mcast-capable unicast endpts */
302
303static endpt *	wildipv4;
304static endpt *	wildipv6;
305
306#ifdef SYS_WINNT
307int accept_wildcard_if_for_winnt;
308#else
309const int accept_wildcard_if_for_winnt = FALSE;
310#endif
311
312static void	add_fd_to_list		(SOCKET, enum desc_type);
313static endpt *	find_addr_in_list	(sockaddr_u *);
314static endpt *	find_flagged_addr_in_list(sockaddr_u *, u_int32);
315static void	delete_addr_from_list	(sockaddr_u *);
316static void	delete_interface_from_list(endpt *);
317static void	close_and_delete_fd_from_list(SOCKET);
318static void	add_addr_to_list	(sockaddr_u *, endpt *);
319static void	create_wildcards	(u_short);
320static endpt *	findlocalinterface	(sockaddr_u *, int, int);
321static endpt *	findclosestinterface	(sockaddr_u *, int);
322#ifdef DEBUG
323static const char *	action_text	(nic_rule_action);
324#endif
325static nic_rule_action	interface_action(char *, sockaddr_u *, u_int32);
326static void		convert_isc_if	(isc_interface_t *,
327					 endpt *, u_short);
328static void		calc_addr_distance(sockaddr_u *,
329					   const sockaddr_u *,
330					   const sockaddr_u *);
331static int		cmp_addr_distance(const sockaddr_u *,
332					  const sockaddr_u *);
333
334/*
335 * Routines to read the ntp packets
336 */
337#if !defined(HAVE_IO_COMPLETION_PORT)
338static inline int	read_network_packet	(SOCKET, struct interface *, l_fp);
339static void		ntpd_addremove_io_fd	(int, int, int);
340static input_handler_t  input_handler;
341#ifdef REFCLOCK
342static inline int	read_refclock_packet	(SOCKET, struct refclockio *, l_fp);
343#endif
344#endif
345
346
347
348#ifndef HAVE_IO_COMPLETION_PORT
349void
350maintain_activefds(
351	int fd,
352	int closing
353	)
354{
355	int i;
356
357	if (fd < 0 || fd >= FD_SETSIZE) {
358		msyslog(LOG_ERR,
359			"Too many sockets in use, FD_SETSIZE %d exceeded by fd %d",
360			FD_SETSIZE, fd);
361		exit(1);
362	}
363
364	if (!closing) {
365		FD_SET(fd, &activefds);
366		maxactivefd = max(fd, maxactivefd);
367	} else {
368		FD_CLR(fd, &activefds);
369		if (maxactivefd && fd == maxactivefd) {
370			for (i = maxactivefd - 1; i >= 0; i--)
371				if (FD_ISSET(i, &activefds)) {
372					maxactivefd = i;
373					break;
374				}
375			INSIST(fd != maxactivefd);
376		}
377	}
378}
379#endif	/* !HAVE_IO_COMPLETION_PORT */
380
381
382#ifdef DEBUG_TIMING
383/*
384 * collect timing information for various processing
385 * paths. currently we only pass them on to the file
386 * for later processing. this could also do histogram
387 * based analysis in other to reduce the load (and skew)
388 * dur to the file output
389 */
390void
391collect_timing(struct recvbuf *rb, const char *tag, int count, l_fp *dts)
392{
393	char buf[256];
394
395	snprintf(buf, sizeof(buf), "%s %d %s %s",
396		 (rb != NULL)
397		     ? ((rb->dstadr != NULL)
398			    ? stoa(&rb->recv_srcadr)
399			    : "-REFCLOCK-")
400		     : "-",
401		 count, lfptoa(dts, 9), tag);
402	record_timing_stats(buf);
403}
404#endif
405
406/*
407 * About dynamic interfaces, sockets, reception and more...
408 *
409 * the code solves following tasks:
410 *
411 *   - keep a current list of active interfaces in order
412 *     to bind to to the interface address on NTP_PORT so that
413 *     all wild and specific bindings for NTP_PORT are taken by ntpd
414 *     to avoid other daemons messing with the time or sockets.
415 *   - all interfaces keep a list of peers that are referencing
416 *     the interface in order to quickly re-assign the peers to
417 *     new interface in case an interface is deleted (=> gone from system or
418 *     down)
419 *   - have a preconfigured socket ready with the right local address
420 *     for transmission and reception
421 *   - have an address list for all destination addresses used within ntpd
422 *     to find the "right" preconfigured socket.
423 *   - facilitate updating the internal interface list with respect to
424 *     the current kernel state
425 *
426 * special issues:
427 *
428 *   - mapping of multicast addresses to the interface affected is not always
429 *     one to one - especially on hosts with multiple interfaces
430 *     the code here currently allocates a separate interface entry for those
431 *     multicast addresses
432 *     iff it is able to bind to a *new* socket with the multicast address (flags |= MCASTIF)
433 *     in case of failure the multicast address is bound to an existing interface.
434 *   - on some systems it is perfectly legal to assign the same address to
435 *     multiple interfaces. Therefore this code does not keep a list of interfaces
436 *     but a list of interfaces that represent a unique address as determined by the kernel
437 *     by the procedure in findlocalinterface. Thus it is perfectly legal to see only
438 *     one representative of a group of real interfaces if they share the same address.
439 *
440 * Frank Kardel 20050910
441 */
442
443/*
444 * init_io - initialize I/O module.
445 */
446void
447init_io(void)
448{
449	/* Init buffer free list and stat counters */
450	init_recvbuff(RECV_INIT);
451	/* update interface every 5 minutes as default */
452	interface_interval = 300;
453
454#ifdef WORK_PIPE
455	addremove_io_fd = &ntpd_addremove_io_fd;
456#endif
457
458#ifdef SYS_WINNT
459	init_io_completion_port();
460#endif
461
462#if defined(HAVE_SIGNALED_IO)
463	(void) set_signal(input_handler);
464#endif
465}
466
467
468static void
469ntpd_addremove_io_fd(
470	int	fd,
471	int	is_pipe,
472	int	remove_it
473	)
474{
475	UNUSED_ARG(is_pipe);
476
477#ifdef HAVE_SIGNALED_IO
478	init_socket_sig(fd);
479#endif /* not HAVE_SIGNALED_IO */
480
481	maintain_activefds(fd, remove_it);
482}
483
484
485/*
486 * io_open_sockets - call socket creation routine
487 */
488void
489io_open_sockets(void)
490{
491	static int already_opened;
492
493	if (already_opened || HAVE_OPT( SAVECONFIGQUIT ))
494		return;
495
496	already_opened = 1;
497
498	/*
499	 * Create the sockets
500	 */
501	BLOCKIO();
502	create_sockets(NTP_PORT);
503	UNBLOCKIO();
504
505	init_async_notifications();
506
507	DPRINTF(3, ("io_open_sockets: maxactivefd %d\n", maxactivefd));
508}
509
510
511#ifdef DEBUG
512/*
513 * function to dump the contents of the interface structure
514 * for debugging use only.
515 */
516void
517interface_dump(const endpt *itf)
518{
519	printf("Dumping interface: %p\n", itf);
520	printf("fd = %d\n", itf->fd);
521	printf("bfd = %d\n", itf->bfd);
522	printf("sin = %s,\n", stoa(&itf->sin));
523	sockaddr_dump(&itf->sin);
524	printf("bcast = %s,\n", stoa(&itf->bcast));
525	sockaddr_dump(&itf->bcast);
526	printf("mask = %s,\n", stoa(&itf->mask));
527	sockaddr_dump(&itf->mask);
528	printf("name = %s\n", itf->name);
529	printf("flags = 0x%08x\n", itf->flags);
530	printf("last_ttl = %d\n", itf->last_ttl);
531	printf("addr_refid = %08x\n", itf->addr_refid);
532	printf("num_mcast = %d\n", itf->num_mcast);
533	printf("received = %ld\n", itf->received);
534	printf("sent = %ld\n", itf->sent);
535	printf("notsent = %ld\n", itf->notsent);
536	printf("ifindex = %u\n", itf->ifindex);
537	printf("peercnt = %u\n", itf->peercnt);
538	printf("phase = %u\n", itf->phase);
539}
540
541/*
542 * sockaddr_dump - hex dump the start of a sockaddr_u
543 */
544static void
545sockaddr_dump(const sockaddr_u *psau)
546{
547	/* Limit the size of the sockaddr_in6 hex dump */
548	const int maxsize = min(32, sizeof(psau->sa6));
549	const u_char *	cp;
550	int		i;
551
552	/* XXX: Should we limit maxsize based on psau->saX.sin_family? */
553	cp = (const void *)&psau->sa6;
554
555	for(i = 0; i < maxsize; i++) {
556		printf("%02x", *cp++);
557		if (!((i + 1) % 4))
558			printf(" ");
559	}
560	printf("\n");
561}
562
563/*
564 * print_interface - helper to output debug information
565 */
566static void
567print_interface(const endpt *iface, const char *pfx, const char *sfx)
568{
569	printf("%sinterface #%d: fd=%d, bfd=%d, name=%s, flags=0x%x, ifindex=%u, sin=%s",
570	       pfx,
571	       iface->ifnum,
572	       iface->fd,
573	       iface->bfd,
574	       iface->name,
575	       iface->flags,
576	       iface->ifindex,
577	       stoa(&iface->sin));
578	if (AF_INET == iface->family) {
579		if (iface->flags & INT_BROADCAST)
580			printf(", bcast=%s", stoa(&iface->bcast));
581		printf(", mask=%s", stoa(&iface->mask));
582	}
583	printf(", %s:%s",
584	       (iface->ignore_packets)
585		   ? "Disabled"
586		   : "Enabled",
587	       sfx);
588	if (debug > 4)	/* in-depth debugging only */
589		interface_dump(iface);
590}
591#endif
592
593#if !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET)
594/*
595 * create an asyncio_reader structure
596 */
597static struct asyncio_reader *
598new_asyncio_reader(void)
599{
600	struct asyncio_reader *reader;
601
602	reader = emalloc_zero(sizeof(*reader));
603	reader->fd = INVALID_SOCKET;
604
605	return reader;
606}
607
608/*
609 * delete a reader
610 */
611static void
612delete_asyncio_reader(
613	struct asyncio_reader *reader
614	)
615{
616	free(reader);
617}
618
619/*
620 * add asynchio_reader
621 */
622static void
623add_asyncio_reader(
624	struct asyncio_reader *	reader,
625	enum desc_type		type)
626{
627	LINK_SLIST(asyncio_reader_list, reader, link);
628	add_fd_to_list(reader->fd, type);
629}
630
631/*
632 * remove asynchio_reader
633 */
634static void
635remove_asyncio_reader(
636	struct asyncio_reader *reader
637	)
638{
639	struct asyncio_reader *unlinked;
640
641	UNLINK_SLIST(unlinked, asyncio_reader_list, reader, link,
642	    struct asyncio_reader);
643
644	if (reader->fd != INVALID_SOCKET)
645		close_and_delete_fd_from_list(reader->fd);
646
647	reader->fd = INVALID_SOCKET;
648}
649#endif /* !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET) */
650
651
652/* compare two sockaddr prefixes */
653static int
654addr_eqprefix(
655	const sockaddr_u *	a,
656	const sockaddr_u *	b,
657	int			prefixlen
658	)
659{
660	isc_netaddr_t		isc_a;
661	isc_netaddr_t		isc_b;
662	isc_sockaddr_t		isc_sa;
663
664	ZERO(isc_sa);
665	memcpy(&isc_sa.type, a, min(sizeof(isc_sa.type), sizeof(*a)));
666	isc_netaddr_fromsockaddr(&isc_a, &isc_sa);
667
668	ZERO(isc_sa);
669	memcpy(&isc_sa.type, b, min(sizeof(isc_sa.type), sizeof(*b)));
670	isc_netaddr_fromsockaddr(&isc_b, &isc_sa);
671
672	return (int)isc_netaddr_eqprefix(&isc_a, &isc_b,
673					 (u_int)prefixlen);
674}
675
676
677static int
678addr_samesubnet(
679	const sockaddr_u *	a,
680	const sockaddr_u *	a_mask,
681	const sockaddr_u *	b,
682	const sockaddr_u *	b_mask
683	)
684{
685	const u_int32 *	pa;
686	const u_int32 *	pa_limit;
687	const u_int32 *	pb;
688	const u_int32 *	pm;
689	size_t		loops;
690
691	REQUIRE(AF(a) == AF(a_mask));
692	REQUIRE(AF(b) == AF(b_mask));
693	/*
694	 * With address and mask families verified to match, comparing
695	 * the masks also validates the address's families match.
696	 */
697	if (!SOCK_EQ(a_mask, b_mask))
698		return FALSE;
699
700	if (IS_IPV6(a)) {
701		loops = sizeof(NSRCADR6(a)) / sizeof(*pa);
702		pa = (const void *)&NSRCADR6(a);
703		pb = (const void *)&NSRCADR6(b);
704		pm = (const void *)&NSRCADR6(a_mask);
705	} else {
706		loops = sizeof(NSRCADR(a)) / sizeof(*pa);
707		pa = (const void *)&NSRCADR(a);
708		pb = (const void *)&NSRCADR(b);
709		pm = (const void *)&NSRCADR(a_mask);
710	}
711	for (pa_limit = pa + loops; pa < pa_limit; pa++, pb++, pm++)
712		if ((*pa & *pm) != (*pb & *pm))
713			return FALSE;
714
715	return TRUE;
716}
717
718
719/*
720 * Code to tell if we have an IP address
721 * If we have then return the sockaddr structure
722 * and set the return value
723 * see the bind9/getaddresses.c for details
724 */
725int
726is_ip_address(
727	const char *	host,
728	u_short		af,
729	sockaddr_u *	addr
730	)
731{
732	struct in_addr in4;
733	struct addrinfo hints;
734	struct addrinfo *result;
735	struct sockaddr_in6 *resaddr6;
736	char tmpbuf[128];
737	char *pch;
738
739	REQUIRE(host != NULL);
740	REQUIRE(addr != NULL);
741
742	ZERO_SOCK(addr);
743
744	/*
745	 * Try IPv4, then IPv6.  In order to handle the extended format
746	 * for IPv6 scoped addresses (address%scope_ID), we'll use a local
747	 * working buffer of 128 bytes.  The length is an ad-hoc value, but
748	 * should be enough for this purpose; the buffer can contain a string
749	 * of at least 80 bytes for scope_ID in addition to any IPv6 numeric
750	 * addresses (up to 46 bytes), the delimiter character and the
751	 * terminating NULL character.
752	 */
753	if (AF_UNSPEC == af || AF_INET == af)
754		if (inet_pton(AF_INET, host, &in4) == 1) {
755			AF(addr) = AF_INET;
756			SET_ADDR4N(addr, in4.s_addr);
757
758			return TRUE;
759		}
760
761	if (AF_UNSPEC == af || AF_INET6 == af)
762		if (sizeof(tmpbuf) > strlen(host)) {
763			if ('[' == host[0]) {
764				strlcpy(tmpbuf, &host[1], sizeof(tmpbuf));
765				pch = strchr(tmpbuf, ']');
766				if (pch != NULL)
767					*pch = '\0';
768			} else {
769				strlcpy(tmpbuf, host, sizeof(tmpbuf));
770			}
771			ZERO(hints);
772			hints.ai_family = AF_INET6;
773			hints.ai_flags |= AI_NUMERICHOST;
774			if (getaddrinfo(tmpbuf, NULL, &hints, &result) == 0) {
775				AF(addr) = AF_INET6;
776				resaddr6 = UA_PTR(struct sockaddr_in6, result->ai_addr);
777				SET_ADDR6N(addr, resaddr6->sin6_addr);
778				SET_SCOPE(addr, resaddr6->sin6_scope_id);
779
780				freeaddrinfo(result);
781				return TRUE;
782			}
783		}
784	/*
785	 * If we got here it was not an IP address
786	 */
787	return FALSE;
788}
789
790
791/*
792 * interface list enumerator - visitor pattern
793 */
794void
795interface_enumerate(
796	interface_receiver_t	receiver,
797	void *			data
798	)
799{
800	interface_info_t ifi;
801
802	ifi.action = IFS_EXISTS;
803	for (ifi.ep = ep_list; ifi.ep != NULL; ifi.ep = ifi.ep->elink)
804		(*receiver)(data, &ifi);
805}
806
807/*
808 * do standard initialization of interface structure
809 */
810static void
811init_interface(
812	endpt *ep
813	)
814{
815	ZERO(*ep);
816	ep->fd = INVALID_SOCKET;
817	ep->bfd = INVALID_SOCKET;
818	ep->phase = sys_interphase;
819}
820
821
822/*
823 * create new interface structure initialize from
824 * template structure or via standard initialization
825 * function
826 */
827static struct interface *
828new_interface(
829	struct interface *interface
830	)
831{
832	struct interface *	iface;
833
834	iface = emalloc(sizeof(*iface));
835
836	if (NULL == interface)
837		init_interface(iface);
838	else				/* use the template */
839		memcpy(iface, interface, sizeof(*iface));
840
841	/* count every new instance of an interface in the system */
842	iface->ifnum = sys_ifnum++;
843	iface->starttime = current_time;
844
845	return iface;
846}
847
848
849/*
850 * return interface storage into free memory pool
851 */
852static inline void
853delete_interface(
854	endpt *ep
855	)
856{
857	free(ep);
858}
859
860
861/*
862 * link interface into list of known interfaces
863 */
864static void
865add_interface(
866	endpt *	ep
867	)
868{
869	endpt **	pmclisthead;
870	endpt *		scan;
871	endpt *		scan_next;
872	endpt *		unlinked;
873	sockaddr_u *	addr;
874	int		ep_local;
875	int		scan_local;
876	int		same_subnet;
877	int		ep_univ_iid;	/* iface ID from MAC address */
878	int		scan_univ_iid;	/* see RFC 4291 */
879	int		ep_privacy;	/* random local iface ID */
880	int		scan_privacy;	/* see RFC 4941 */
881	int		rc;
882
883	/* Calculate the refid */
884	ep->addr_refid = addr2refid(&ep->sin);
885	/* link at tail so ntpdc -c ifstats index increases each row */
886	LINK_TAIL_SLIST(ep_list, ep, elink, endpt);
887	ninterfaces++;
888#ifdef MCAST
889	/* the rest is for enabled multicast-capable addresses only */
890	if (ep->ignore_packets || !(INT_MULTICAST & ep->flags) ||
891	    INT_LOOPBACK & ep->flags)
892		return;
893# ifndef INCLUDE_IPV6_MULTICAST_SUPPORT
894	if (AF_INET6 == ep->family)
895		return;
896# endif
897	pmclisthead = (AF_INET == ep->family)
898			 ? &mc4_list
899			 : &mc6_list;
900
901	if (AF_INET6 == ep->family) {
902		ep_local =
903		    IN6_IS_ADDR_LINKLOCAL(PSOCK_ADDR6(&ep->sin)) ||
904		    IN6_IS_ADDR_SITELOCAL(PSOCK_ADDR6(&ep->sin));
905		ep_univ_iid = IS_IID_UNIV(&ep->sin);
906		ep_privacy = !!(INT_PRIVACY & ep->flags);
907	} else {
908		ep_local = FALSE;
909		ep_univ_iid = FALSE;
910		ep_privacy = FALSE;
911	}
912	DPRINTF(4, ("add_interface mcast-capable %s%s%s%s\n",
913		    stoa(&ep->sin),
914		    (ep_local) ? " link/scope-local" : "",
915		    (ep_univ_iid) ? " univ-IID" : "",
916		    (ep_privacy) ? " privacy" : ""));
917	/*
918	 * If we have multiple local addresses on the same network
919	 * interface, and some are link- or site-local, do not multicast
920	 * out from the link-/site-local addresses by default, to avoid
921	 * duplicate manycastclient associations between v6 peers using
922	 * link-local and global addresses.  link-local can still be
923	 * chosen using "nic ignore myv6globalprefix::/64".
924	 * Similarly, if we have multiple global addresses from the same
925	 * prefix on the same network interface, multicast from one,
926	 * preferring EUI-64, then static, then least RFC 4941 privacy
927	 * addresses.
928	 */
929	for (scan = *pmclisthead; scan != NULL; scan = scan_next) {
930		scan_next = scan->mclink;
931		if (ep->family != scan->family)
932			continue;
933		if (strcmp(ep->name, scan->name))
934			continue;
935		same_subnet = addr_samesubnet(&ep->sin, &ep->mask,
936					      &scan->sin, &scan->mask);
937		if (AF_INET6 == ep->family) {
938			addr = &scan->sin;
939			scan_local =
940			    IN6_IS_ADDR_LINKLOCAL(PSOCK_ADDR6(addr)) ||
941			    IN6_IS_ADDR_SITELOCAL(PSOCK_ADDR6(addr));
942			scan_univ_iid = IS_IID_UNIV(addr);
943			scan_privacy = !!(INT_PRIVACY & scan->flags);
944		} else {
945			scan_local = FALSE;
946			scan_univ_iid = FALSE;
947			scan_privacy = FALSE;
948		}
949		DPRINTF(4, ("add_interface mcast-capable scan %s%s%s%s\n",
950			    stoa(&scan->sin),
951			    (scan_local) ? " link/scope-local" : "",
952			    (scan_univ_iid) ? " univ-IID" : "",
953			    (scan_privacy) ? " privacy" : ""));
954		if ((ep_local && !scan_local) || (same_subnet &&
955		    ((ep_privacy && !scan_privacy) ||
956		     (!ep_univ_iid && scan_univ_iid)))) {
957			DPRINTF(4, ("did not add %s to %s of IPv6 multicast-capable list which already has %s\n",
958				stoa(&ep->sin),
959				(ep_local)
960				    ? "tail"
961				    : "head",
962				stoa(&scan->sin)));
963			return;
964		}
965		if ((scan_local && !ep_local) || (same_subnet &&
966		    ((scan_privacy && !ep_privacy) ||
967		     (!scan_univ_iid && ep_univ_iid)))) {
968			UNLINK_SLIST(unlinked, *pmclisthead,
969				     scan, mclink, endpt);
970			DPRINTF(4, ("%s %s from IPv6 multicast-capable list to add %s\n",
971				(unlinked != scan)
972				    ? "Failed to remove"
973				    : "removed",
974				stoa(&scan->sin), stoa(&ep->sin)));
975		}
976	}
977	/*
978	 * Add link/site local at the tail of the multicast-
979	 * capable unicast interfaces list, so that ntpd will
980	 * send from global addresses before link-/site-local
981	 * ones.
982	 */
983	if (ep_local)
984		LINK_TAIL_SLIST(*pmclisthead, ep, mclink, endpt);
985	else
986		LINK_SLIST(*pmclisthead, ep, mclink);
987	DPRINTF(4, ("added %s to %s of IPv%s multicast-capable unicast local address list\n",
988		stoa(&ep->sin),
989		(ep_local)
990		    ? "tail"
991		    : "head",
992		(AF_INET == ep->family)
993		    ? "4"
994		    : "6"));
995
996	if (INVALID_SOCKET == ep->fd)
997		return;
998
999	/*
1000	 * select the local address from which to send to multicast.
1001	 */
1002	switch (AF(&ep->sin)) {
1003
1004	case AF_INET :
1005		rc = setsockopt(ep->fd, IPPROTO_IP,
1006				IP_MULTICAST_IF,
1007				(void *)&NSRCADR(&ep->sin),
1008				sizeof(NSRCADR(&ep->sin)));
1009		if (rc)
1010			msyslog(LOG_ERR,
1011				"setsockopt IP_MULTICAST_IF %s fails: %m",
1012				stoa(&ep->sin));
1013		break;
1014
1015# ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
1016	case AF_INET6 :
1017		rc = setsockopt(ep->fd, IPPROTO_IPV6,
1018				 IPV6_MULTICAST_IF,
1019				 (void *)&ep->ifindex,
1020				 sizeof(ep->ifindex));
1021		/* do not complain if bound addr scope is ifindex */
1022		if (rc && ep->ifindex != SCOPE(&ep->sin))
1023			msyslog(LOG_ERR,
1024				"setsockopt IPV6_MULTICAST_IF %u for %s fails: %m",
1025				ep->ifindex, stoa(&ep->sin));
1026		break;
1027# endif
1028	}
1029#endif	/* MCAST */
1030}
1031
1032
1033/*
1034 * remove interface from known interface list and clean up
1035 * associated resources
1036 */
1037static void
1038remove_interface(
1039	endpt *	ep
1040	)
1041{
1042	endpt *		unlinked;
1043	endpt **	pmclisthead;
1044	sockaddr_u	resmask;
1045
1046	UNLINK_SLIST(unlinked, ep_list, ep, elink, endpt);
1047	if (!ep->ignore_packets && INT_MULTICAST & ep->flags) {
1048		pmclisthead = (AF_INET == ep->family)
1049				 ? &mc4_list
1050				 : &mc6_list;
1051		UNLINK_SLIST(unlinked, *pmclisthead, ep, mclink, endpt);
1052		DPRINTF(4, ("%s %s IPv%s multicast-capable unicast local address list\n",
1053			stoa(&ep->sin),
1054			(unlinked != NULL)
1055			    ? "removed from"
1056			    : "not found on",
1057			(AF_INET == ep->family)
1058			    ? "4"
1059			    : "6"));
1060	}
1061	delete_interface_from_list(ep);
1062
1063	if (ep->fd != INVALID_SOCKET) {
1064		msyslog(LOG_INFO,
1065			"Deleting interface #%d %s, %s#%d, interface stats: received=%ld, sent=%ld, dropped=%ld, active_time=%ld secs",
1066			ep->ifnum,
1067			ep->name,
1068			stoa(&ep->sin),
1069			SRCPORT(&ep->sin),
1070			ep->received,
1071			ep->sent,
1072			ep->notsent,
1073			current_time - ep->starttime);
1074		close_and_delete_fd_from_list(ep->fd);
1075		ep->fd = INVALID_SOCKET;
1076	}
1077
1078	if (ep->bfd != INVALID_SOCKET) {
1079		msyslog(LOG_INFO,
1080			"stop listening for broadcasts to %s on interface #%d %s",
1081			stoa(&ep->bcast), ep->ifnum, ep->name);
1082		close_and_delete_fd_from_list(ep->bfd);
1083		ep->bfd = INVALID_SOCKET;
1084		ep->flags &= ~INT_BCASTOPEN;
1085	}
1086
1087	ninterfaces--;
1088	mon_clearinterface(ep);
1089
1090	/* remove restrict interface entry */
1091	SET_HOSTMASK(&resmask, AF(&ep->sin));
1092	hack_restrict(RESTRICT_REMOVEIF, &ep->sin, &resmask,
1093		      RESM_NTPONLY | RESM_INTERFACE, RES_IGNORE, 0);
1094}
1095
1096
1097static void
1098log_listen_address(
1099	endpt *	ep
1100	)
1101{
1102	msyslog(LOG_INFO, "%s on %d %s %s",
1103		(ep->ignore_packets)
1104		    ? "Listen and drop"
1105		    : "Listen normally",
1106		ep->ifnum,
1107		ep->name,
1108		sptoa(&ep->sin));
1109}
1110
1111
1112static void
1113create_wildcards(
1114	u_short	port
1115	)
1116{
1117	int			v4wild;
1118#ifdef INCLUDE_IPV6_SUPPORT
1119	int			v6wild;
1120#endif
1121	sockaddr_u		wildaddr;
1122	nic_rule_action		action;
1123	struct interface *	wildif;
1124
1125	/*
1126	 * silence "potentially uninitialized" warnings from VC9
1127	 * failing to follow the logic.  Ideally action could remain
1128	 * uninitialized, and the memset be the first statement under
1129	 * the first if (v4wild).
1130	 */
1131	action = ACTION_LISTEN;
1132	ZERO(wildaddr);
1133
1134#ifdef INCLUDE_IPV6_SUPPORT
1135	/*
1136	 * create pseudo-interface with wildcard IPv6 address
1137	 */
1138	v6wild = ipv6_works;
1139	if (v6wild) {
1140		/* set wildaddr to the v6 wildcard address :: */
1141		ZERO(wildaddr);
1142		AF(&wildaddr) = AF_INET6;
1143		SET_ADDR6N(&wildaddr, in6addr_any);
1144		SET_PORT(&wildaddr, port);
1145		SET_SCOPE(&wildaddr, 0);
1146
1147		/* check for interface/nic rules affecting the wildcard */
1148		action = interface_action(NULL, &wildaddr, 0);
1149		v6wild = (ACTION_IGNORE != action);
1150	}
1151	if (v6wild) {
1152		wildif = new_interface(NULL);
1153
1154		strlcpy(wildif->name, "v6wildcard", sizeof(wildif->name));
1155		memcpy(&wildif->sin, &wildaddr, sizeof(wildif->sin));
1156		wildif->family = AF_INET6;
1157		AF(&wildif->mask) = AF_INET6;
1158		SET_ONESMASK(&wildif->mask);
1159
1160		wildif->flags = INT_UP | INT_WILDCARD;
1161		wildif->ignore_packets = (ACTION_DROP == action);
1162
1163		wildif->fd = open_socket(&wildif->sin, 0, 1, wildif);
1164
1165		if (wildif->fd != INVALID_SOCKET) {
1166			wildipv6 = wildif;
1167			any6_interface = wildif;
1168			add_addr_to_list(&wildif->sin, wildif);
1169			add_interface(wildif);
1170			log_listen_address(wildif);
1171		} else {
1172			msyslog(LOG_ERR,
1173				"unable to bind to wildcard address %s - another process may be running - EXITING",
1174				stoa(&wildif->sin));
1175			exit(1);
1176		}
1177		DPRINT_INTERFACE(2, (wildif, "created ", "\n"));
1178	}
1179#endif
1180
1181	/*
1182	 * create pseudo-interface with wildcard IPv4 address
1183	 */
1184	v4wild = ipv4_works;
1185	if (v4wild) {
1186		/* set wildaddr to the v4 wildcard address 0.0.0.0 */
1187		AF(&wildaddr) = AF_INET;
1188		SET_ADDR4N(&wildaddr, INADDR_ANY);
1189		SET_PORT(&wildaddr, port);
1190
1191		/* check for interface/nic rules affecting the wildcard */
1192		action = interface_action(NULL, &wildaddr, 0);
1193		v4wild = (ACTION_IGNORE != action);
1194	}
1195	if (v4wild) {
1196		wildif = new_interface(NULL);
1197
1198		strlcpy(wildif->name, "v4wildcard", sizeof(wildif->name));
1199		memcpy(&wildif->sin, &wildaddr, sizeof(wildif->sin));
1200		wildif->family = AF_INET;
1201		AF(&wildif->mask) = AF_INET;
1202		SET_ONESMASK(&wildif->mask);
1203
1204		wildif->flags = INT_BROADCAST | INT_UP | INT_WILDCARD;
1205		wildif->ignore_packets = (ACTION_DROP == action);
1206#if defined(MCAST)
1207		/*
1208		 * enable multicast reception on the broadcast socket
1209		 */
1210		AF(&wildif->bcast) = AF_INET;
1211		SET_ADDR4N(&wildif->bcast, INADDR_ANY);
1212		SET_PORT(&wildif->bcast, port);
1213#endif /* MCAST */
1214		wildif->fd = open_socket(&wildif->sin, 0, 1, wildif);
1215
1216		if (wildif->fd != INVALID_SOCKET) {
1217			wildipv4 = wildif;
1218			any_interface = wildif;
1219
1220			add_addr_to_list(&wildif->sin, wildif);
1221			add_interface(wildif);
1222			log_listen_address(wildif);
1223		} else {
1224			msyslog(LOG_ERR,
1225				"unable to bind to wildcard address %s - another process may be running - EXITING",
1226				stoa(&wildif->sin));
1227			exit(1);
1228		}
1229		DPRINT_INTERFACE(2, (wildif, "created ", "\n"));
1230	}
1231}
1232
1233
1234/*
1235 * add_nic_rule() -- insert a rule entry at the head of nic_rule_list.
1236 */
1237void
1238add_nic_rule(
1239	nic_rule_match	match_type,
1240	const char *	if_name,	/* interface name or numeric address */
1241	int		prefixlen,
1242	nic_rule_action	action
1243	)
1244{
1245	nic_rule *	rule;
1246	isc_boolean_t	is_ip;
1247
1248	rule = emalloc_zero(sizeof(*rule));
1249	rule->match_type = match_type;
1250	rule->prefixlen = prefixlen;
1251	rule->action = action;
1252
1253	if (MATCH_IFNAME == match_type) {
1254		REQUIRE(NULL != if_name);
1255		rule->if_name = estrdup(if_name);
1256	} else if (MATCH_IFADDR == match_type) {
1257		REQUIRE(NULL != if_name);
1258		/* set rule->addr */
1259		is_ip = is_ip_address(if_name, AF_UNSPEC, &rule->addr);
1260		REQUIRE(is_ip);
1261	} else
1262		REQUIRE(NULL == if_name);
1263
1264	LINK_SLIST(nic_rule_list, rule, next);
1265}
1266
1267
1268#ifdef DEBUG
1269static const char *
1270action_text(
1271	nic_rule_action	action
1272	)
1273{
1274	const char *t;
1275
1276	switch (action) {
1277
1278	default:
1279		t = "ERROR";	/* quiet uninit warning */
1280		DPRINTF(1, ("fatal: unknown nic_rule_action %d\n",
1281			    action));
1282		ENSURE(0);
1283		break;
1284
1285	case ACTION_LISTEN:
1286		t = "listen";
1287		break;
1288
1289	case ACTION_IGNORE:
1290		t = "ignore";
1291		break;
1292
1293	case ACTION_DROP:
1294		t = "drop";
1295		break;
1296	}
1297
1298	return t;
1299}
1300#endif	/* DEBUG */
1301
1302
1303static nic_rule_action
1304interface_action(
1305	char *		if_name,
1306	sockaddr_u *	if_addr,
1307	u_int32		if_flags
1308	)
1309{
1310	nic_rule *	rule;
1311	int		isloopback;
1312	int		iswildcard;
1313
1314	DPRINTF(4, ("interface_action: interface %s ",
1315		    (if_name != NULL) ? if_name : "wildcard"));
1316
1317	iswildcard = is_wildcard_addr(if_addr);
1318	isloopback = !!(INT_LOOPBACK & if_flags);
1319
1320	/*
1321	 * Find any matching NIC rule from --interface / -I or ntp.conf
1322	 * interface/nic rules.
1323	 */
1324	for (rule = nic_rule_list; rule != NULL; rule = rule->next) {
1325
1326		switch (rule->match_type) {
1327
1328		case MATCH_ALL:
1329			/* loopback and wildcard excluded from "all" */
1330			if (isloopback || iswildcard)
1331				break;
1332			DPRINTF(4, ("nic all %s\n",
1333			    action_text(rule->action)));
1334			return rule->action;
1335
1336		case MATCH_IPV4:
1337			if (IS_IPV4(if_addr)) {
1338				DPRINTF(4, ("nic ipv4 %s\n",
1339				    action_text(rule->action)));
1340				return rule->action;
1341			}
1342			break;
1343
1344		case MATCH_IPV6:
1345			if (IS_IPV6(if_addr)) {
1346				DPRINTF(4, ("nic ipv6 %s\n",
1347				    action_text(rule->action)));
1348				return rule->action;
1349			}
1350			break;
1351
1352		case MATCH_WILDCARD:
1353			if (iswildcard) {
1354				DPRINTF(4, ("nic wildcard %s\n",
1355				    action_text(rule->action)));
1356				return rule->action;
1357			}
1358			break;
1359
1360		case MATCH_IFADDR:
1361			if (rule->prefixlen != -1) {
1362				if (addr_eqprefix(if_addr, &rule->addr,
1363						  rule->prefixlen)) {
1364
1365					DPRINTF(4, ("subnet address match - %s\n",
1366					    action_text(rule->action)));
1367					return rule->action;
1368				}
1369			} else
1370				if (SOCK_EQ(if_addr, &rule->addr)) {
1371
1372					DPRINTF(4, ("address match - %s\n",
1373					    action_text(rule->action)));
1374					return rule->action;
1375				}
1376			break;
1377
1378		case MATCH_IFNAME:
1379			if (if_name != NULL
1380#if defined(HAVE_FNMATCH) && defined(FNM_CASEFOLD)
1381			    && !fnmatch(rule->if_name, if_name, FNM_CASEFOLD)
1382#else
1383			    && !strcasecmp(if_name, rule->if_name)
1384#endif
1385			    ) {
1386
1387				DPRINTF(4, ("interface name match - %s\n",
1388				    action_text(rule->action)));
1389				return rule->action;
1390			}
1391			break;
1392		}
1393	}
1394
1395	/*
1396	 * Unless explicitly disabled such as with "nic ignore ::1"
1397	 * listen on loopback addresses.  Since ntpq and ntpdc query
1398	 * "localhost" by default, which typically resolves to ::1 and
1399	 * 127.0.0.1, it's useful to default to listening on both.
1400	 */
1401	if (isloopback) {
1402		DPRINTF(4, ("default loopback listen\n"));
1403		return ACTION_LISTEN;
1404	}
1405
1406	/*
1407	 * Treat wildcard addresses specially.  If there is no explicit
1408	 * "nic ... wildcard" or "nic ... 0.0.0.0" or "nic ... ::" rule
1409	 * default to drop.
1410	 */
1411	if (iswildcard) {
1412		DPRINTF(4, ("default wildcard drop\n"));
1413		return ACTION_DROP;
1414	}
1415
1416	/*
1417	 * Check for "virtual IP" (colon in the interface name) after
1418	 * the rules so that "ntpd --interface eth0:1 -novirtualips"
1419	 * does indeed listen on eth0:1's addresses.
1420	 */
1421	if (!listen_to_virtual_ips && if_name != NULL
1422	    && (strchr(if_name, ':') != NULL)) {
1423
1424		DPRINTF(4, ("virtual ip - ignore\n"));
1425		return ACTION_IGNORE;
1426	}
1427
1428	/*
1429	 * If there are no --interface/-I command-line options and no
1430	 * interface/nic rules in ntp.conf, the default action is to
1431	 * listen.  In the presence of rules from either, the default
1432	 * is to ignore.  This implements ntpd's traditional listen-
1433	 * every default with no interface listen configuration, and
1434	 * ensures a single -I eth0 or "nic listen eth0" means do not
1435	 * listen on any other addresses.
1436	 */
1437	if (NULL == nic_rule_list) {
1438		DPRINTF(4, ("default listen\n"));
1439		return ACTION_LISTEN;
1440	}
1441
1442	DPRINTF(4, ("implicit ignore\n"));
1443	return ACTION_IGNORE;
1444}
1445
1446
1447static void
1448convert_isc_if(
1449	isc_interface_t *isc_if,
1450	endpt *itf,
1451	u_short port
1452	)
1453{
1454	const u_char v6loop[16] = {0, 0, 0, 0, 0, 0, 0, 0,
1455				   0, 0, 0, 0, 0, 0, 0, 1};
1456
1457	strlcpy(itf->name, isc_if->name, sizeof(itf->name));
1458	itf->ifindex = isc_if->ifindex;
1459	itf->family = (u_short)isc_if->af;
1460	AF(&itf->sin) = itf->family;
1461	AF(&itf->mask) = itf->family;
1462	AF(&itf->bcast) = itf->family;
1463	SET_PORT(&itf->sin, port);
1464	SET_PORT(&itf->mask, port);
1465	SET_PORT(&itf->bcast, port);
1466
1467	if (IS_IPV4(&itf->sin)) {
1468		NSRCADR(&itf->sin) = isc_if->address.type.in.s_addr;
1469		NSRCADR(&itf->mask) = isc_if->netmask.type.in.s_addr;
1470
1471		if (isc_if->flags & INTERFACE_F_BROADCAST) {
1472			itf->flags |= INT_BROADCAST;
1473			NSRCADR(&itf->bcast) =
1474			    isc_if->broadcast.type.in.s_addr;
1475		}
1476	}
1477#ifdef INCLUDE_IPV6_SUPPORT
1478	else if (IS_IPV6(&itf->sin)) {
1479		SET_ADDR6N(&itf->sin, isc_if->address.type.in6);
1480		SET_ADDR6N(&itf->mask, isc_if->netmask.type.in6);
1481
1482		SET_SCOPE(&itf->sin, isc_if->address.zone);
1483	}
1484#endif /* INCLUDE_IPV6_SUPPORT */
1485
1486
1487	/* Process the rest of the flags */
1488
1489	itf->flags |=
1490		  ((INTERFACE_F_UP & isc_if->flags)
1491			? INT_UP : 0)
1492		| ((INTERFACE_F_LOOPBACK & isc_if->flags)
1493			? INT_LOOPBACK : 0)
1494		| ((INTERFACE_F_POINTTOPOINT & isc_if->flags)
1495			? INT_PPP : 0)
1496		| ((INTERFACE_F_MULTICAST & isc_if->flags)
1497			? INT_MULTICAST : 0)
1498		| ((INTERFACE_F_PRIVACY & isc_if->flags)
1499			? INT_PRIVACY : 0)
1500		;
1501
1502	/*
1503	 * Clear the loopback flag if the address is not localhost.
1504	 * http://bugs.ntp.org/1683
1505	 */
1506	if (INT_LOOPBACK & itf->flags) {
1507		if (AF_INET == itf->family) {
1508			if (127 != (SRCADR(&itf->sin) >> 24))
1509				itf->flags &= ~INT_LOOPBACK;
1510		} else {
1511			if (memcmp(v6loop, NSRCADR6(&itf->sin),
1512				   sizeof(NSRCADR6(&itf->sin))))
1513				itf->flags &= ~INT_LOOPBACK;
1514		}
1515	}
1516}
1517
1518
1519/*
1520 * refresh_interface
1521 *
1522 * some OSes have been observed to keep
1523 * cached routes even when more specific routes
1524 * become available.
1525 * this can be mitigated by re-binding
1526 * the socket.
1527 */
1528static int
1529refresh_interface(
1530	struct interface * interface
1531	)
1532{
1533#ifdef  OS_MISSES_SPECIFIC_ROUTE_UPDATES
1534	if (interface->fd != INVALID_SOCKET) {
1535		int bcast = (interface->flags & INT_BCASTXMIT) != 0;
1536		/* as we forcibly close() the socket remove the
1537		   broadcast permission indication */
1538		if (bcast)
1539			socket_broadcast_disable(interface, &interface->sin);
1540
1541		close_and_delete_fd_from_list(interface->fd);
1542
1543		/* create new socket picking up a new first hop binding
1544		   at connect() time */
1545		interface->fd = open_socket(&interface->sin,
1546					    bcast, 0, interface);
1547		 /*
1548		  * reset TTL indication so TTL is is set again
1549		  * next time around
1550		  */
1551		interface->last_ttl = 0;
1552		return (interface->fd != INVALID_SOCKET);
1553	} else
1554		return 0;	/* invalid sockets are not refreshable */
1555#else /* !OS_MISSES_SPECIFIC_ROUTE_UPDATES */
1556	return (interface->fd != INVALID_SOCKET);
1557#endif /* !OS_MISSES_SPECIFIC_ROUTE_UPDATES */
1558}
1559
1560/*
1561 * interface_update - externally callable update function
1562 */
1563void
1564interface_update(
1565	interface_receiver_t	receiver,
1566	void *			data)
1567{
1568	int new_interface_found;
1569
1570	if (disable_dynamic_updates)
1571		return;
1572
1573	BLOCKIO();
1574	new_interface_found = update_interfaces(NTP_PORT, receiver, data);
1575	UNBLOCKIO();
1576
1577	if (!new_interface_found)
1578		return;
1579
1580#ifdef DEBUG
1581	msyslog(LOG_DEBUG, "new interface(s) found: waking up resolver");
1582#endif
1583	interrupt_worker_sleep();
1584}
1585
1586
1587/*
1588 * sau_from_netaddr() - convert network address on-wire formats.
1589 * Convert from libisc's isc_netaddr_t to NTP's sockaddr_u
1590 */
1591void
1592sau_from_netaddr(
1593	sockaddr_u *psau,
1594	const isc_netaddr_t *pna
1595	)
1596{
1597	ZERO_SOCK(psau);
1598	AF(psau) = (u_short)pna->family;
1599	switch (pna->family) {
1600
1601	case AF_INET:
1602		memcpy(&psau->sa4.sin_addr, &pna->type.in,
1603		       sizeof(psau->sa4.sin_addr));
1604		break;
1605
1606	case AF_INET6:
1607		memcpy(&psau->sa6.sin6_addr, &pna->type.in6,
1608		       sizeof(psau->sa6.sin6_addr));
1609		break;
1610	}
1611}
1612
1613
1614static int
1615is_wildcard_addr(
1616	const sockaddr_u *psau
1617	)
1618{
1619	if (IS_IPV4(psau) && !NSRCADR(psau))
1620		return 1;
1621
1622#ifdef INCLUDE_IPV6_SUPPORT
1623	if (IS_IPV6(psau) && S_ADDR6_EQ(psau, &in6addr_any))
1624		return 1;
1625#endif
1626
1627	return 0;
1628}
1629
1630
1631#ifdef OS_NEEDS_REUSEADDR_FOR_IFADDRBIND
1632/*
1633 * enable/disable re-use of wildcard address socket
1634 */
1635static void
1636set_wildcard_reuse(
1637	u_short	family,
1638	int	on
1639	)
1640{
1641	struct interface *any;
1642	SOCKET fd = INVALID_SOCKET;
1643
1644	any = ANY_INTERFACE_BYFAM(family);
1645	if (any != NULL)
1646		fd = any->fd;
1647
1648	if (fd != INVALID_SOCKET) {
1649		if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
1650			       (char *)&on, sizeof(on)))
1651			msyslog(LOG_ERR,
1652				"set_wildcard_reuse: setsockopt(SO_REUSEADDR, %s) failed: %m",
1653				on ? "on" : "off");
1654
1655		DPRINTF(4, ("set SO_REUSEADDR to %s on %s\n",
1656			    on ? "on" : "off",
1657			    stoa(&any->sin)));
1658	}
1659}
1660#endif /* OS_NEEDS_REUSEADDR_FOR_IFADDRBIND */
1661
1662
1663static isc_boolean_t
1664check_flags6(
1665	sockaddr_u *psau,
1666	const char *name,
1667	u_int32 flags6
1668	)
1669{
1670#if defined(INCLUDE_IPV6_SUPPORT) && defined(SIOCGIFAFLAG_IN6)
1671	struct in6_ifreq ifr6;
1672	int fd;
1673
1674	if (psau->sa.sa_family != AF_INET6)
1675		return ISC_FALSE;
1676	if ((fd = socket(AF_INET6, SOCK_DGRAM, 0)) < 0)
1677		return ISC_FALSE;
1678	ZERO(ifr6);
1679	memcpy(&ifr6.ifr_addr, &psau->sa6, sizeof(ifr6.ifr_addr));
1680	strlcpy(ifr6.ifr_name, name, sizeof(ifr6.ifr_name));
1681	if (ioctl(fd, SIOCGIFAFLAG_IN6, &ifr6) < 0) {
1682		close(fd);
1683		return ISC_FALSE;
1684	}
1685	close(fd);
1686	if ((ifr6.ifr_ifru.ifru_flags6 & flags6) != 0)
1687		return ISC_TRUE;
1688#endif	/* INCLUDE_IPV6_SUPPORT && SIOCGIFAFLAG_IN6 */
1689	return ISC_FALSE;
1690}
1691
1692static isc_boolean_t
1693is_anycast(
1694	sockaddr_u *psau,
1695	const char *name
1696	)
1697{
1698#ifdef IN6_IFF_ANYCAST
1699	return check_flags6(psau, name, IN6_IFF_ANYCAST);
1700#else
1701	return ISC_FALSE;
1702#endif
1703}
1704
1705static isc_boolean_t
1706is_valid(
1707	sockaddr_u *psau,
1708	const char *name
1709	)
1710{
1711	u_int32 flags6;
1712
1713	flags6 = 0;
1714#ifdef IN6_IFF_DEPARTED
1715	flags6 |= IN6_IFF_DEPARTED;
1716#endif
1717#ifdef IN6_IFF_DETACHED
1718	flags6 |= IN6_IFF_DETACHED;
1719#endif
1720#ifdef IN6_IFF_TENTATIVE
1721	flags6 |= IN6_IFF_TENTATIVE;
1722#endif
1723	return check_flags6(psau, name, flags6) ? ISC_FALSE : ISC_TRUE;
1724}
1725
1726/*
1727 * update_interface strategy
1728 *
1729 * toggle configuration phase
1730 *
1731 * Phase 1:
1732 * forall currently existing interfaces
1733 *   if address is known:
1734 *	drop socket - rebind again
1735 *
1736 *   if address is NOT known:
1737 *	attempt to create a new interface entry
1738 *
1739 * Phase 2:
1740 * forall currently known non MCAST and WILDCARD interfaces
1741 *   if interface does not match configuration phase (not seen in phase 1):
1742 *	remove interface from known interface list
1743 *	forall peers associated with this interface
1744 *         disconnect peer from this interface
1745 *
1746 * Phase 3:
1747 *   attempt to re-assign interfaces to peers
1748 *
1749 */
1750
1751static int
1752update_interfaces(
1753	u_short			port,
1754	interface_receiver_t	receiver,
1755	void *			data
1756	)
1757{
1758	isc_mem_t *		mctx = (void *)-1;
1759	interface_info_t	ifi;
1760	isc_interfaceiter_t *	iter;
1761	isc_result_t		result;
1762	isc_interface_t		isc_if;
1763	int			new_interface_found;
1764	unsigned int		family;
1765	endpt			enumep;
1766	endpt *			ep;
1767	endpt *			next_ep;
1768
1769	DPRINTF(3, ("update_interfaces(%d)\n", port));
1770
1771	/*
1772	 * phase one - scan interfaces
1773	 * - create those that are not found
1774	 * - update those that are found
1775	 */
1776
1777	new_interface_found = FALSE;
1778	iter = NULL;
1779	result = isc_interfaceiter_create(mctx, &iter);
1780
1781	if (result != ISC_R_SUCCESS)
1782		return 0;
1783
1784	/*
1785	 * Toggle system interface scan phase to find untouched
1786	 * interfaces to be deleted.
1787	 */
1788	sys_interphase ^= 0x1;
1789
1790	for (result = isc_interfaceiter_first(iter);
1791	     ISC_R_SUCCESS == result;
1792	     result = isc_interfaceiter_next(iter)) {
1793
1794		result = isc_interfaceiter_current(iter, &isc_if);
1795
1796		if (result != ISC_R_SUCCESS)
1797			break;
1798
1799		/* See if we have a valid family to use */
1800		family = isc_if.address.family;
1801		if (AF_INET != family && AF_INET6 != family)
1802			continue;
1803		if (AF_INET == family && !ipv4_works)
1804			continue;
1805		if (AF_INET6 == family && !ipv6_works)
1806			continue;
1807
1808		/* create prototype */
1809		init_interface(&enumep);
1810
1811		convert_isc_if(&isc_if, &enumep, port);
1812
1813		DPRINT_INTERFACE(4, (&enumep, "examining ", "\n"));
1814
1815		/*
1816		 * Check if and how we are going to use the interface.
1817		 */
1818		switch (interface_action(enumep.name, &enumep.sin,
1819					 enumep.flags)) {
1820
1821		case ACTION_IGNORE:
1822			DPRINTF(4, ("ignoring interface %s (%s) - by nic rules\n",
1823				    enumep.name, stoa(&enumep.sin)));
1824			continue;
1825
1826		case ACTION_LISTEN:
1827			DPRINTF(4, ("listen interface %s (%s) - by nic rules\n",
1828				    enumep.name, stoa(&enumep.sin)));
1829			enumep.ignore_packets = ISC_FALSE;
1830			break;
1831
1832		case ACTION_DROP:
1833			DPRINTF(4, ("drop on interface %s (%s) - by nic rules\n",
1834				    enumep.name, stoa(&enumep.sin)));
1835			enumep.ignore_packets = ISC_TRUE;
1836			break;
1837		}
1838
1839		 /* interfaces must be UP to be usable */
1840		if (!(enumep.flags & INT_UP)) {
1841			DPRINTF(4, ("skipping interface %s (%s) - DOWN\n",
1842				    enumep.name, stoa(&enumep.sin)));
1843			continue;
1844		}
1845
1846		/*
1847		 * skip any interfaces UP and bound to a wildcard
1848		 * address - some dhcp clients produce that in the
1849		 * wild
1850		 */
1851		if (is_wildcard_addr(&enumep.sin))
1852			continue;
1853
1854		if (is_anycast(&enumep.sin, isc_if.name))
1855			continue;
1856
1857		/*
1858		 * skip any address that is an invalid state to be used
1859		 */
1860		if (!is_valid(&enumep.sin, isc_if.name))
1861			continue;
1862
1863		/*
1864		 * map to local *address* in order to map all duplicate
1865		 * interfaces to an endpt structure with the appropriate
1866		 * socket.  Our name space is (ip-address), NOT
1867		 * (interface name, ip-address).
1868		 */
1869		ep = getinterface(&enumep.sin, INT_WILDCARD);
1870
1871		if (ep != NULL && refresh_interface(ep)) {
1872			/*
1873			 * found existing and up to date interface -
1874			 * mark present.
1875			 */
1876			if (ep->phase != sys_interphase) {
1877				/*
1878				 * On a new round we reset the name so
1879				 * the interface name shows up again if
1880				 * this address is no longer shared.
1881				 * We reset ignore_packets from the
1882				 * new prototype to respect any runtime
1883				 * changes to the nic rules.
1884				 */
1885				strlcpy(ep->name, enumep.name,
1886					sizeof(ep->name));
1887				ep->ignore_packets =
1888					    enumep.ignore_packets;
1889			} else {
1890				/* name collision - rename interface */
1891				strlcpy(ep->name, "*multiple*",
1892					sizeof(ep->name));
1893			}
1894
1895			DPRINT_INTERFACE(4, (ep, "updating ",
1896					     " present\n"));
1897
1898			if (ep->ignore_packets !=
1899			    enumep.ignore_packets) {
1900				/*
1901				 * We have conflicting configurations
1902				 * for the interface address. This is
1903				 * caused by using -I <interfacename>
1904				 * for an interface that shares its
1905				 * address with other interfaces. We
1906				 * can not disambiguate incoming
1907				 * packets delivered to this socket
1908				 * without extra syscalls/features.
1909				 * These are not (commonly) available.
1910				 * Note this is a more unusual
1911				 * configuration where several
1912				 * interfaces share an address but
1913				 * filtering via interface name is
1914				 * attempted.  We resolve the
1915				 * configuration conflict by disabling
1916				 * the processing of received packets.
1917				 * This leads to no service on the
1918				 * interface address where the conflict
1919				 * occurs.
1920				 */
1921				msyslog(LOG_ERR,
1922					"WARNING: conflicting enable configuration for interfaces %s and %s for address %s - unsupported configuration - address DISABLED",
1923					enumep.name, ep->name,
1924					stoa(&enumep.sin));
1925
1926				ep->ignore_packets = ISC_TRUE;
1927			}
1928
1929			ep->phase = sys_interphase;
1930
1931			ifi.action = IFS_EXISTS;
1932			ifi.ep = ep;
1933			if (receiver != NULL)
1934				(*receiver)(data, &ifi);
1935		} else {
1936			/*
1937			 * This is new or refreshing failed - add to
1938			 * our interface list.  If refreshing failed we
1939			 * will delete the interface structure in phase
1940			 * 2 as the interface was not marked current.
1941			 * We can bind to the address as the refresh
1942			 * code already closed the offending socket
1943			 */
1944			ep = create_interface(port, &enumep);
1945
1946			if (ep != NULL) {
1947				ifi.action = IFS_CREATED;
1948				ifi.ep = ep;
1949				if (receiver != NULL)
1950					(*receiver)(data, &ifi);
1951
1952				new_interface_found = TRUE;
1953				DPRINT_INTERFACE(3,
1954					(ep, "updating ",
1955					 " new - created\n"));
1956			} else {
1957				DPRINT_INTERFACE(3,
1958					(&enumep, "updating ",
1959					 " new - creation FAILED"));
1960
1961				msyslog(LOG_INFO,
1962					"failed to init interface for address %s",
1963					stoa(&enumep.sin));
1964				continue;
1965			}
1966		}
1967	}
1968
1969	isc_interfaceiter_destroy(&iter);
1970
1971	/*
1972	 * phase 2 - delete gone interfaces - reassigning peers to
1973	 * other interfaces
1974	 */
1975	for (ep = ep_list; ep != NULL; ep = next_ep) {
1976		next_ep = ep->elink;
1977
1978		/*
1979		 * if phase does not match sys_phase this interface was
1980		 * not enumerated during the last interface scan - so it
1981		 * is gone and will be deleted here unless it did not
1982		 * originate from interface enumeration (INT_WILDCARD,
1983		 * INT_MCASTIF).
1984		 */
1985		if (((INT_WILDCARD | INT_MCASTIF) & ep->flags) ||
1986		    ep->phase == sys_interphase)
1987			continue;
1988
1989		DPRINT_INTERFACE(3, (ep, "updating ",
1990				     "GONE - deleting\n"));
1991		remove_interface(ep);
1992
1993		ifi.action = IFS_DELETED;
1994		ifi.ep = ep;
1995		if (receiver != NULL)
1996			(*receiver)(data, &ifi);
1997
1998		/* disconnect peers from deleted endpt. */
1999		while (ep->peers != NULL)
2000			set_peerdstadr(ep->peers, NULL);
2001
2002		/*
2003		 * update globals in case we lose
2004		 * a loopback interface
2005		 */
2006		if (ep == loopback_interface)
2007			loopback_interface = NULL;
2008
2009		delete_interface(ep);
2010	}
2011
2012	/*
2013	 * phase 3 - re-configure as the world has possibly changed
2014	 *
2015	 * never ever make this conditional again - it is needed to track
2016	 * routing updates. see bug #2506
2017	 */
2018	refresh_all_peerinterfaces();
2019
2020	if (broadcast_client_enabled)
2021		io_setbclient();
2022
2023	if (sys_bclient)
2024		io_setbclient();
2025
2026#ifdef MCAST
2027	/*
2028	 * Check multicast interfaces and try to join multicast groups if
2029         * not joined yet.
2030         */
2031	for (ep = ep_list; ep != NULL; ep = ep->elink) {
2032		remaddr_t *entry;
2033
2034		if (!(INT_MCASTIF & ep->flags) || (INT_MCASTOPEN & ep->flags))
2035			continue;
2036
2037		/* Find remote address that was linked to this interface */
2038		for (entry = remoteaddr_list;
2039		     entry != NULL;
2040		     entry = entry->link) {
2041			if (entry->ep == ep) {
2042				if (socket_multicast_enable(ep, &entry->addr)) {
2043					msyslog(LOG_INFO,
2044						"Joined %s socket to multicast group %s",
2045						stoa(&ep->sin),
2046						stoa(&entry->addr));
2047				}
2048				break;
2049			}
2050		}
2051	}
2052#endif /* MCAST */
2053
2054	return new_interface_found;
2055}
2056
2057
2058/*
2059 * create_sockets - create a socket for each interface plus a default
2060 *			socket for when we don't know where to send
2061 */
2062static int
2063create_sockets(
2064	u_short port
2065	)
2066{
2067#ifndef HAVE_IO_COMPLETION_PORT
2068	/*
2069	 * I/O Completion Ports don't care about the select and FD_SET
2070	 */
2071	maxactivefd = 0;
2072	FD_ZERO(&activefds);
2073#endif
2074
2075	DPRINTF(2, ("create_sockets(%d)\n", port));
2076
2077	create_wildcards(port);
2078
2079	update_interfaces(port, NULL, NULL);
2080
2081	/*
2082	 * Now that we have opened all the sockets, turn off the reuse
2083	 * flag for security.
2084	 */
2085	set_reuseaddr(0);
2086
2087	DPRINTF(2, ("create_sockets: Total interfaces = %d\n", ninterfaces));
2088
2089	return ninterfaces;
2090}
2091
2092/*
2093 * create_interface - create a new interface for a given prototype
2094 *		      binding the socket.
2095 */
2096static struct interface *
2097create_interface(
2098	u_short			port,
2099	struct interface *	protot
2100	)
2101{
2102	sockaddr_u	resmask;
2103	endpt *		iface;
2104#if defined(MCAST) && defined(MULTICAST_NONEWSOCKET)
2105	remaddr_t *	entry;
2106	remaddr_t *	next_entry;
2107#endif
2108	DPRINTF(2, ("create_interface(%s#%d)\n", stoa(&protot->sin),
2109		    port));
2110
2111	/* build an interface */
2112	iface = new_interface(protot);
2113
2114	/*
2115	 * create socket
2116	 */
2117	iface->fd = open_socket(&iface->sin, 0, 0, iface);
2118
2119	if (iface->fd != INVALID_SOCKET)
2120		log_listen_address(iface);
2121
2122	if ((INT_BROADCAST & iface->flags)
2123	    && iface->bfd != INVALID_SOCKET)
2124		msyslog(LOG_INFO, "Listening on broadcast address %s#%d",
2125			stoa((&iface->bcast)), port);
2126
2127	if (INVALID_SOCKET == iface->fd
2128	    && INVALID_SOCKET == iface->bfd) {
2129		msyslog(LOG_ERR, "unable to create socket on %s (%d) for %s#%d",
2130			iface->name,
2131			iface->ifnum,
2132			stoa((&iface->sin)),
2133			port);
2134		delete_interface(iface);
2135		return NULL;
2136	}
2137
2138	/*
2139	 * Blacklist our own addresses, no use talking to ourself
2140	 */
2141	SET_HOSTMASK(&resmask, AF(&iface->sin));
2142	hack_restrict(RESTRICT_FLAGS, &iface->sin, &resmask,
2143		      RESM_NTPONLY | RESM_INTERFACE, RES_IGNORE, 0);
2144
2145	/*
2146	 * set globals with the first found
2147	 * loopback interface of the appropriate class
2148	 */
2149	if (NULL == loopback_interface && AF_INET == iface->family
2150	    && (INT_LOOPBACK & iface->flags))
2151		loopback_interface = iface;
2152
2153	/*
2154	 * put into our interface list
2155	 */
2156	add_addr_to_list(&iface->sin, iface);
2157	add_interface(iface);
2158
2159#if defined(MCAST) && defined(MULTICAST_NONEWSOCKET)
2160	/*
2161	 * Join any previously-configured compatible multicast groups.
2162	 */
2163	if (INT_MULTICAST & iface->flags &&
2164	    !((INT_LOOPBACK | INT_WILDCARD) & iface->flags) &&
2165	    !iface->ignore_packets) {
2166		for (entry = remoteaddr_list;
2167		     entry != NULL;
2168		     entry = next_entry) {
2169			next_entry = entry->link;
2170			if (AF(&iface->sin) != AF(&entry->addr) ||
2171			    !IS_MCAST(&entry->addr))
2172				continue;
2173			if (socket_multicast_enable(iface,
2174						    &entry->addr))
2175				msyslog(LOG_INFO,
2176					"Joined %s socket to multicast group %s",
2177					stoa(&iface->sin),
2178					stoa(&entry->addr));
2179			else
2180				msyslog(LOG_ERR,
2181					"Failed to join %s socket to multicast group %s",
2182					stoa(&iface->sin),
2183					stoa(&entry->addr));
2184		}
2185	}
2186#endif	/* MCAST && MCAST_NONEWSOCKET */
2187
2188	DPRINT_INTERFACE(2, (iface, "created ", "\n"));
2189	return iface;
2190}
2191
2192
2193#ifdef SO_EXCLUSIVEADDRUSE
2194static void
2195set_excladdruse(
2196	SOCKET fd
2197	)
2198{
2199	int one = 1;
2200	int failed;
2201#ifdef SYS_WINNT
2202	DWORD err;
2203#endif
2204
2205	failed = setsockopt(fd, SOL_SOCKET, SO_EXCLUSIVEADDRUSE,
2206			    (char *)&one, sizeof(one));
2207
2208	if (!failed)
2209		return;
2210
2211#ifdef SYS_WINNT
2212	/*
2213	 * Prior to Windows XP setting SO_EXCLUSIVEADDRUSE can fail with
2214	 * error WSAINVAL depending on service pack level and whether
2215	 * the user account is in the Administrators group.  Do not
2216	 * complain if it fails that way on versions prior to XP (5.1).
2217	 */
2218	err = GetLastError();
2219
2220	if (isc_win32os_versioncheck(5, 1, 0, 0) < 0	/* < 5.1/XP */
2221	    && WSAEINVAL == err)
2222		return;
2223
2224	SetLastError(err);
2225#endif
2226	msyslog(LOG_ERR,
2227		"setsockopt(%d, SO_EXCLUSIVEADDRUSE, on): %m",
2228		(int)fd);
2229}
2230#endif  /* SO_EXCLUSIVEADDRUSE */
2231
2232
2233/*
2234 * set_reuseaddr() - set/clear REUSEADDR on all sockets
2235 *			NB possible hole - should we be doing this on broadcast
2236 *			fd's also?
2237 */
2238static void
2239set_reuseaddr(
2240	int flag
2241	)
2242{
2243#ifndef SO_EXCLUSIVEADDRUSE
2244	endpt *ep;
2245
2246	for (ep = ep_list; ep != NULL; ep = ep->elink) {
2247		if (ep->flags & INT_WILDCARD)
2248			continue;
2249
2250		/*
2251		 * if ep->fd  is INVALID_SOCKET, we might have a adapter
2252		 * configured but not present
2253		 */
2254		DPRINTF(4, ("setting SO_REUSEADDR on %.16s@%s to %s\n",
2255			    ep->name, stoa(&ep->sin),
2256			    flag ? "on" : "off"));
2257
2258		if (ep->fd != INVALID_SOCKET) {
2259			if (setsockopt(ep->fd, SOL_SOCKET, SO_REUSEADDR,
2260				       (char *)&flag, sizeof(flag))) {
2261				msyslog(LOG_ERR, "set_reuseaddr: setsockopt(%s, SO_REUSEADDR, %s) failed: %m",
2262					stoa(&ep->sin), flag ? "on" : "off");
2263			}
2264		}
2265	}
2266#endif /* ! SO_EXCLUSIVEADDRUSE */
2267}
2268
2269/*
2270 * This is just a wrapper around an internal function so we can
2271 * make other changes as necessary later on
2272 */
2273void
2274enable_broadcast(
2275	struct interface *	iface,
2276	sockaddr_u *		baddr
2277	)
2278{
2279#ifdef OPEN_BCAST_SOCKET
2280	socket_broadcast_enable(iface, iface->fd, baddr);
2281#endif
2282}
2283
2284#ifdef OPEN_BCAST_SOCKET
2285/*
2286 * Enable a broadcast address to a given socket
2287 * The socket is in the ep_list all we need to do is enable
2288 * broadcasting. It is not this function's job to select the socket
2289 */
2290static isc_boolean_t
2291socket_broadcast_enable(
2292	struct interface *	iface,
2293	SOCKET			fd,
2294	sockaddr_u *		baddr
2295	)
2296{
2297#ifdef SO_BROADCAST
2298	int on = 1;
2299
2300	if (IS_IPV4(baddr)) {
2301		/* if this interface can support broadcast, set SO_BROADCAST */
2302		if (setsockopt(fd, SOL_SOCKET, SO_BROADCAST,
2303			       (char *)&on, sizeof(on)))
2304			msyslog(LOG_ERR,
2305				"setsockopt(SO_BROADCAST) enable failure on address %s: %m",
2306				stoa(baddr));
2307		else
2308			DPRINTF(2, ("Broadcast enabled on socket %d for address %s\n",
2309				    fd, stoa(baddr)));
2310	}
2311	iface->flags |= INT_BCASTXMIT;
2312	return ISC_TRUE;
2313#else
2314	return ISC_FALSE;
2315#endif /* SO_BROADCAST */
2316}
2317
2318#ifdef  OS_MISSES_SPECIFIC_ROUTE_UPDATES
2319/*
2320 * Remove a broadcast address from a given socket
2321 * The socket is in the ep_list all we need to do is disable
2322 * broadcasting. It is not this function's job to select the socket
2323 */
2324static isc_boolean_t
2325socket_broadcast_disable(
2326	struct interface *	iface,
2327	sockaddr_u *		baddr
2328	)
2329{
2330#ifdef SO_BROADCAST
2331	int off = 0;	/* This seems to be OK as an int */
2332
2333	if (IS_IPV4(baddr) && setsockopt(iface->fd, SOL_SOCKET,
2334	    SO_BROADCAST, (char *)&off, sizeof(off)))
2335		msyslog(LOG_ERR,
2336			"setsockopt(SO_BROADCAST) disable failure on address %s: %m",
2337			stoa(baddr));
2338
2339	iface->flags &= ~INT_BCASTXMIT;
2340	return ISC_TRUE;
2341#else
2342	return ISC_FALSE;
2343#endif /* SO_BROADCAST */
2344}
2345#endif /* OS_MISSES_SPECIFIC_ROUTE_UPDATES */
2346
2347#endif /* OPEN_BCAST_SOCKET */
2348
2349/*
2350 * return the broadcast client flag value
2351 */
2352isc_boolean_t
2353get_broadcastclient_flag(void)
2354{
2355	return (broadcast_client_enabled);
2356}
2357/*
2358 * Check to see if the address is a multicast address
2359 */
2360static isc_boolean_t
2361addr_ismulticast(
2362	sockaddr_u *maddr
2363	)
2364{
2365	isc_boolean_t result;
2366
2367#ifndef INCLUDE_IPV6_MULTICAST_SUPPORT
2368	/*
2369	 * If we don't have IPV6 support any IPV6 addr is not multicast
2370	 */
2371	if (IS_IPV6(maddr))
2372		result = ISC_FALSE;
2373	else
2374#endif
2375		result = IS_MCAST(maddr);
2376
2377	if (!result)
2378		DPRINTF(4, ("address %s is not multicast\n",
2379			    stoa(maddr)));
2380
2381	return result;
2382}
2383
2384/*
2385 * Multicast servers need to set the appropriate Multicast interface
2386 * socket option in order for it to know which interface to use for
2387 * send the multicast packet.
2388 */
2389void
2390enable_multicast_if(
2391	struct interface *	iface,
2392	sockaddr_u *		maddr
2393	)
2394{
2395#ifdef MCAST
2396#ifdef IP_MULTICAST_LOOP
2397	TYPEOF_IP_MULTICAST_LOOP off = 0;
2398#endif
2399#if defined(INCLUDE_IPV6_MULTICAST_SUPPORT) && defined(IPV6_MULTICAST_LOOP)
2400	u_int off6 = 0;
2401#endif
2402
2403	REQUIRE(AF(maddr) == AF(&iface->sin));
2404
2405	switch (AF(&iface->sin)) {
2406
2407	case AF_INET:
2408#ifdef IP_MULTICAST_LOOP
2409		/*
2410		 * Don't send back to itself, but allow failure to set
2411		 */
2412		if (setsockopt(iface->fd, IPPROTO_IP,
2413			       IP_MULTICAST_LOOP,
2414			       SETSOCKOPT_ARG_CAST &off,
2415			       sizeof(off))) {
2416
2417			msyslog(LOG_ERR,
2418				"setsockopt IP_MULTICAST_LOOP failed: %m on socket %d, addr %s for multicast address %s",
2419				iface->fd, stoa(&iface->sin),
2420				stoa(maddr));
2421		}
2422#endif
2423		break;
2424
2425	case AF_INET6:
2426#ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2427#ifdef IPV6_MULTICAST_LOOP
2428		/*
2429		 * Don't send back to itself, but allow failure to set
2430		 */
2431		if (setsockopt(iface->fd, IPPROTO_IPV6,
2432			       IPV6_MULTICAST_LOOP,
2433			       (char *) &off6, sizeof(off6))) {
2434
2435			msyslog(LOG_ERR,
2436				"setsockopt IPV6_MULTICAST_LOOP failed: %m on socket %d, addr %s for multicast address %s",
2437				iface->fd, stoa(&iface->sin),
2438				stoa(maddr));
2439		}
2440#endif
2441		break;
2442#else
2443		return;
2444#endif	/* INCLUDE_IPV6_MULTICAST_SUPPORT */
2445	}
2446	return;
2447#endif
2448}
2449
2450/*
2451 * Add a multicast address to a given socket
2452 * The socket is in the ep_list all we need to do is enable
2453 * multicasting. It is not this function's job to select the socket
2454 */
2455#if defined(MCAST)
2456static isc_boolean_t
2457socket_multicast_enable(
2458	endpt *		iface,
2459	sockaddr_u *	maddr
2460	)
2461{
2462	struct ip_mreq		mreq;
2463# ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2464	struct ipv6_mreq	mreq6;
2465# endif
2466	switch (AF(maddr)) {
2467
2468	case AF_INET:
2469		ZERO(mreq);
2470		mreq.imr_multiaddr = SOCK_ADDR4(maddr);
2471		mreq.imr_interface.s_addr = htonl(INADDR_ANY);
2472		if (setsockopt(iface->fd,
2473			       IPPROTO_IP,
2474			       IP_ADD_MEMBERSHIP,
2475			       (char *)&mreq,
2476			       sizeof(mreq))) {
2477			DPRINTF(2, (
2478				"setsockopt IP_ADD_MEMBERSHIP failed: %m on socket %d, addr %s for %x / %x (%s)",
2479				iface->fd, stoa(&iface->sin),
2480				mreq.imr_multiaddr.s_addr,
2481				mreq.imr_interface.s_addr,
2482				stoa(maddr)));
2483			return ISC_FALSE;
2484		}
2485		DPRINTF(4, ("Added IPv4 multicast membership on socket %d, addr %s for %x / %x (%s)\n",
2486			    iface->fd, stoa(&iface->sin),
2487			    mreq.imr_multiaddr.s_addr,
2488			    mreq.imr_interface.s_addr, stoa(maddr)));
2489		break;
2490
2491	case AF_INET6:
2492# ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2493		/*
2494		 * Enable reception of multicast packets.
2495		 * If the address is link-local we can get the
2496		 * interface index from the scope id. Don't do this
2497		 * for other types of multicast addresses. For now let
2498		 * the kernel figure it out.
2499		 */
2500		ZERO(mreq6);
2501		mreq6.ipv6mr_multiaddr = SOCK_ADDR6(maddr);
2502		mreq6.ipv6mr_interface = iface->ifindex;
2503
2504		if (setsockopt(iface->fd, IPPROTO_IPV6,
2505			       IPV6_JOIN_GROUP, (char *)&mreq6,
2506			       sizeof(mreq6))) {
2507			DPRINTF(2, (
2508				"setsockopt IPV6_JOIN_GROUP failed: %m on socket %d, addr %s for interface %u (%s)",
2509				iface->fd, stoa(&iface->sin),
2510				mreq6.ipv6mr_interface, stoa(maddr)));
2511			return ISC_FALSE;
2512		}
2513		DPRINTF(4, ("Added IPv6 multicast group on socket %d, addr %s for interface %u (%s)\n",
2514			    iface->fd, stoa(&iface->sin),
2515			    mreq6.ipv6mr_interface, stoa(maddr)));
2516# else
2517		return ISC_FALSE;
2518# endif	/* INCLUDE_IPV6_MULTICAST_SUPPORT */
2519	}
2520	iface->flags |= INT_MCASTOPEN;
2521	iface->num_mcast++;
2522
2523	return ISC_TRUE;
2524}
2525#endif	/* MCAST */
2526
2527
2528/*
2529 * Remove a multicast address from a given socket
2530 * The socket is in the ep_list all we need to do is disable
2531 * multicasting. It is not this function's job to select the socket
2532 */
2533#ifdef MCAST
2534static isc_boolean_t
2535socket_multicast_disable(
2536	struct interface *	iface,
2537	sockaddr_u *		maddr
2538	)
2539{
2540# ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2541	struct ipv6_mreq mreq6;
2542# endif
2543	struct ip_mreq mreq;
2544
2545	ZERO(mreq);
2546
2547	if (find_addr_in_list(maddr) == NULL) {
2548		DPRINTF(4, ("socket_multicast_disable(%s): not found\n",
2549			    stoa(maddr)));
2550		return ISC_TRUE;
2551	}
2552
2553	switch (AF(maddr)) {
2554
2555	case AF_INET:
2556		mreq.imr_multiaddr = SOCK_ADDR4(maddr);
2557		mreq.imr_interface = SOCK_ADDR4(&iface->sin);
2558		if (setsockopt(iface->fd, IPPROTO_IP,
2559			       IP_DROP_MEMBERSHIP, (char *)&mreq,
2560			       sizeof(mreq))) {
2561
2562			msyslog(LOG_ERR,
2563				"setsockopt IP_DROP_MEMBERSHIP failed: %m on socket %d, addr %s for %x / %x (%s)",
2564				iface->fd, stoa(&iface->sin),
2565				SRCADR(maddr), SRCADR(&iface->sin),
2566				stoa(maddr));
2567			return ISC_FALSE;
2568		}
2569		break;
2570	case AF_INET6:
2571# ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2572		/*
2573		 * Disable reception of multicast packets
2574		 * If the address is link-local we can get the
2575		 * interface index from the scope id.  Don't do this
2576		 * for other types of multicast addresses. For now let
2577		 * the kernel figure it out.
2578		 */
2579		mreq6.ipv6mr_multiaddr = SOCK_ADDR6(maddr);
2580		mreq6.ipv6mr_interface = iface->ifindex;
2581
2582		if (setsockopt(iface->fd, IPPROTO_IPV6,
2583			       IPV6_LEAVE_GROUP, (char *)&mreq6,
2584			       sizeof(mreq6))) {
2585
2586			msyslog(LOG_ERR,
2587				"setsockopt IPV6_LEAVE_GROUP failure: %m on socket %d, addr %s for %d (%s)",
2588				iface->fd, stoa(&iface->sin),
2589				iface->ifindex, stoa(maddr));
2590			return ISC_FALSE;
2591		}
2592		break;
2593# else
2594		return ISC_FALSE;
2595# endif	/* INCLUDE_IPV6_MULTICAST_SUPPORT */
2596	}
2597
2598	iface->num_mcast--;
2599	if (!iface->num_mcast)
2600		iface->flags &= ~INT_MCASTOPEN;
2601
2602	return ISC_TRUE;
2603}
2604#endif	/* MCAST */
2605
2606/*
2607 * io_setbclient - open the broadcast client sockets
2608 */
2609void
2610io_setbclient(void)
2611{
2612#ifdef OPEN_BCAST_SOCKET
2613	struct interface *	interf;
2614	int			nif;
2615
2616	nif = 0;
2617	set_reuseaddr(1);
2618
2619	for (interf = ep_list;
2620	     interf != NULL;
2621	     interf = interf->elink) {
2622
2623		if (interf->flags & (INT_WILDCARD | INT_LOOPBACK))
2624			continue;
2625
2626		/* use only allowed addresses */
2627		if (interf->ignore_packets)
2628			continue;
2629
2630		/* Need a broadcast-capable interface */
2631		if (!(interf->flags & INT_BROADCAST))
2632			continue;
2633
2634		/* Only IPv4 addresses are valid for broadcast */
2635		REQUIRE(IS_IPV4(&interf->sin));
2636
2637		/* Do we already have the broadcast address open? */
2638		if (interf->flags & INT_BCASTOPEN) {
2639			/*
2640			 * account for already open interfaces to avoid
2641			 * misleading warning below
2642			 */
2643			nif++;
2644			continue;
2645		}
2646
2647		/*
2648		 * Try to open the broadcast address
2649		 */
2650		interf->family = AF_INET;
2651		interf->bfd = open_socket(&interf->bcast, 1, 0, interf);
2652
2653		/*
2654		 * If we succeeded then we use it otherwise enable
2655		 * broadcast on the interface address
2656		 */
2657		if (interf->bfd != INVALID_SOCKET) {
2658			nif++;
2659			interf->flags |= INT_BCASTOPEN;
2660			msyslog(LOG_INFO,
2661				"Listen for broadcasts to %s on interface #%d %s",
2662				stoa(&interf->bcast), interf->ifnum, interf->name);
2663		} else {
2664			/* silently ignore EADDRINUSE as we probably opened
2665			   the socket already for an address in the same network */
2666			if (errno != EADDRINUSE)
2667				msyslog(LOG_INFO,
2668					"failed to listen for broadcasts to %s on interface #%d %s",
2669					stoa(&interf->bcast), interf->ifnum, interf->name);
2670		}
2671	}
2672	set_reuseaddr(0);
2673	if (nif > 0) {
2674		broadcast_client_enabled = ISC_TRUE;
2675		DPRINTF(1, ("io_setbclient: listening to %d broadcast addresses\n", nif));
2676	}
2677	else if (!nif) {
2678		broadcast_client_enabled = ISC_FALSE;
2679		msyslog(LOG_ERR,
2680			"Unable to listen for broadcasts, no broadcast interfaces available");
2681	}
2682#else
2683	msyslog(LOG_ERR,
2684		"io_setbclient: Broadcast Client disabled by build");
2685#endif	/* OPEN_BCAST_SOCKET */
2686}
2687
2688/*
2689 * io_unsetbclient - close the broadcast client sockets
2690 */
2691void
2692io_unsetbclient(void)
2693{
2694	endpt *ep;
2695
2696	for (ep = ep_list; ep != NULL; ep = ep->elink) {
2697		if (INT_WILDCARD & ep->flags)
2698			continue;
2699		if (!(INT_BCASTOPEN & ep->flags))
2700			continue;
2701
2702		if (ep->bfd != INVALID_SOCKET) {
2703			/* destroy broadcast listening socket */
2704			msyslog(LOG_INFO,
2705				"stop listening for broadcasts to %s on interface #%d %s",
2706				stoa(&ep->bcast), ep->ifnum, ep->name);
2707			close_and_delete_fd_from_list(ep->bfd);
2708			ep->bfd = INVALID_SOCKET;
2709			ep->flags &= ~INT_BCASTOPEN;
2710		}
2711	}
2712	broadcast_client_enabled = ISC_FALSE;
2713}
2714
2715/*
2716 * io_multicast_add() - add multicast group address
2717 */
2718void
2719io_multicast_add(
2720	sockaddr_u *addr
2721	)
2722{
2723#ifdef MCAST
2724	endpt *	ep;
2725	endpt *	one_ep;
2726
2727	/*
2728	 * Check to see if this is a multicast address
2729	 */
2730	if (!addr_ismulticast(addr))
2731		return;
2732
2733	/* If we already have it we can just return */
2734	if (NULL != find_flagged_addr_in_list(addr, INT_MCASTOPEN)) {
2735		msyslog(LOG_INFO,
2736			"Duplicate request found for multicast address %s",
2737			stoa(addr));
2738		return;
2739	}
2740
2741# ifndef MULTICAST_NONEWSOCKET
2742	ep = new_interface(NULL);
2743
2744	/*
2745	 * Open a new socket for the multicast address
2746	 */
2747	ep->sin = *addr;
2748	SET_PORT(&ep->sin, NTP_PORT);
2749	ep->family = AF(&ep->sin);
2750	AF(&ep->mask) = ep->family;
2751	SET_ONESMASK(&ep->mask);
2752
2753	set_reuseaddr(1);
2754	ep->bfd = INVALID_SOCKET;
2755	ep->fd = open_socket(&ep->sin, 0, 0, ep);
2756	if (ep->fd != INVALID_SOCKET) {
2757		ep->ignore_packets = ISC_FALSE;
2758		ep->flags |= INT_MCASTIF;
2759
2760		strlcpy(ep->name, "multicast", sizeof(ep->name));
2761		DPRINT_INTERFACE(2, (ep, "multicast add ", "\n"));
2762		add_interface(ep);
2763		log_listen_address(ep);
2764	} else {
2765		/* bind failed, re-use wildcard interface */
2766		delete_interface(ep);
2767
2768		if (IS_IPV4(addr))
2769			ep = wildipv4;
2770		else if (IS_IPV6(addr))
2771			ep = wildipv6;
2772		else
2773			ep = NULL;
2774
2775		if (ep != NULL) {
2776			/* HACK ! -- stuff in an address */
2777			/* because we don't bind addr? DH */
2778			ep->bcast = *addr;
2779			msyslog(LOG_ERR,
2780				"multicast address %s using wildcard interface #%d %s",
2781				stoa(addr), ep->ifnum, ep->name);
2782		} else {
2783			msyslog(LOG_ERR,
2784				"No multicast socket available to use for address %s",
2785				stoa(addr));
2786			return;
2787		}
2788	}
2789	{	/* in place of the { following for in #else clause */
2790		one_ep = ep;
2791# else	/* MULTICAST_NONEWSOCKET follows */
2792	/*
2793	 * For the case where we can't use a separate socket (Windows)
2794	 * join each applicable endpoint socket to the group address.
2795	 */
2796	if (IS_IPV4(addr))
2797		one_ep = wildipv4;
2798	else
2799		one_ep = wildipv6;
2800	for (ep = ep_list; ep != NULL; ep = ep->elink) {
2801		if (ep->ignore_packets || AF(&ep->sin) != AF(addr) ||
2802		    !(INT_MULTICAST & ep->flags) ||
2803		    (INT_LOOPBACK | INT_WILDCARD) & ep->flags)
2804			continue;
2805		one_ep = ep;
2806# endif	/* MULTICAST_NONEWSOCKET */
2807		if (socket_multicast_enable(ep, addr))
2808			msyslog(LOG_INFO,
2809				"Joined %s socket to multicast group %s",
2810				stoa(&ep->sin),
2811				stoa(addr));
2812	}
2813
2814	add_addr_to_list(addr, one_ep);
2815#else	/* !MCAST  follows*/
2816	msyslog(LOG_ERR,
2817		"Can not add multicast address %s: no multicast support",
2818		stoa(addr));
2819#endif
2820	return;
2821}
2822
2823
2824/*
2825 * io_multicast_del() - delete multicast group address
2826 */
2827void
2828io_multicast_del(
2829	sockaddr_u *	addr
2830	)
2831{
2832#ifdef MCAST
2833	endpt *iface;
2834
2835	/*
2836	 * Check to see if this is a multicast address
2837	 */
2838	if (!addr_ismulticast(addr)) {
2839		msyslog(LOG_ERR, "invalid multicast address %s",
2840			stoa(addr));
2841		return;
2842	}
2843
2844	/*
2845	 * Disable reception of multicast packets
2846	 */
2847	while ((iface = find_flagged_addr_in_list(addr, INT_MCASTOPEN))
2848	       != NULL)
2849		socket_multicast_disable(iface, addr);
2850
2851	delete_addr_from_list(addr);
2852
2853#else /* not MCAST */
2854	msyslog(LOG_ERR,
2855		"Can not delete multicast address %s: no multicast support",
2856		stoa(addr));
2857#endif /* not MCAST */
2858}
2859
2860
2861/*
2862 * open_socket - open a socket, returning the file descriptor
2863 */
2864
2865static SOCKET
2866open_socket(
2867	sockaddr_u *	addr,
2868	int		bcast,
2869	int		turn_off_reuse,
2870	endpt *		interf
2871	)
2872{
2873	SOCKET	fd;
2874	int	errval;
2875	/*
2876	 * int is OK for REUSEADR per
2877	 * http://www.kohala.com/start/mcast.api.txt
2878	 */
2879	int	on = 1;
2880	int	off = 0;
2881
2882	if (IS_IPV6(addr) && !ipv6_works)
2883		return INVALID_SOCKET;
2884
2885	/* create a datagram (UDP) socket */
2886	fd = socket(AF(addr), SOCK_DGRAM, 0);
2887	if (INVALID_SOCKET == fd) {
2888		errval = socket_errno();
2889		msyslog(LOG_ERR,
2890			"socket(AF_INET%s, SOCK_DGRAM, 0) failed on address %s: %m",
2891			IS_IPV6(addr) ? "6" : "", stoa(addr));
2892
2893		if (errval == EPROTONOSUPPORT ||
2894		    errval == EAFNOSUPPORT ||
2895		    errval == EPFNOSUPPORT)
2896			return (INVALID_SOCKET);
2897
2898		errno = errval;
2899		msyslog(LOG_ERR,
2900			"unexpected socket() error %m code %d (not EPROTONOSUPPORT nor EAFNOSUPPORT nor EPFNOSUPPORT) - exiting",
2901			errno);
2902		exit(1);
2903	}
2904
2905#ifdef SYS_WINNT
2906	connection_reset_fix(fd, addr);
2907#endif
2908	/*
2909	 * Fixup the file descriptor for some systems
2910	 * See bug #530 for details of the issue.
2911	 */
2912	fd = move_fd(fd);
2913
2914	/*
2915	 * set SO_REUSEADDR since we will be binding the same port
2916	 * number on each interface according to turn_off_reuse.
2917	 * This is undesirable on Windows versions starting with
2918	 * Windows XP (numeric version 5.1).
2919	 */
2920#ifdef SYS_WINNT
2921	if (isc_win32os_versioncheck(5, 1, 0, 0) < 0)  /* before 5.1 */
2922#endif
2923		if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
2924			       (char *)((turn_off_reuse)
2925					    ? &off
2926					    : &on),
2927			       sizeof(on))) {
2928
2929			msyslog(LOG_ERR,
2930				"setsockopt SO_REUSEADDR %s fails for address %s: %m",
2931				(turn_off_reuse)
2932				    ? "off"
2933				    : "on",
2934				stoa(addr));
2935			closesocket(fd);
2936			return INVALID_SOCKET;
2937		}
2938#ifdef SO_EXCLUSIVEADDRUSE
2939	/*
2940	 * setting SO_EXCLUSIVEADDRUSE on the wildcard we open
2941	 * first will cause more specific binds to fail.
2942	 */
2943	if (!(interf->flags & INT_WILDCARD))
2944		set_excladdruse(fd);
2945#endif
2946
2947	/*
2948	 * IPv4 specific options go here
2949	 */
2950	if (IS_IPV4(addr)) {
2951#if defined(IPPROTO_IP) && defined(IP_TOS)
2952		if (setsockopt(fd, IPPROTO_IP, IP_TOS, (char*)&qos,
2953			       sizeof(qos)))
2954			msyslog(LOG_ERR,
2955				"setsockopt IP_TOS (%02x) fails on address %s: %m",
2956				qos, stoa(addr));
2957#endif /* IPPROTO_IP && IP_TOS */
2958		if (bcast)
2959			socket_broadcast_enable(interf, fd, addr);
2960	}
2961
2962	/*
2963	 * IPv6 specific options go here
2964	 */
2965	if (IS_IPV6(addr)) {
2966#if defined(IPPROTO_IPV6) && defined(IPV6_TCLASS)
2967		if (setsockopt(fd, IPPROTO_IPV6, IPV6_TCLASS, (char*)&qos,
2968			       sizeof(qos)))
2969			msyslog(LOG_ERR,
2970				"setsockopt IPV6_TCLASS (%02x) fails on address %s: %m",
2971				qos, stoa(addr));
2972#endif /* IPPROTO_IPV6 && IPV6_TCLASS */
2973#ifdef IPV6_V6ONLY
2974		if (isc_net_probe_ipv6only() == ISC_R_SUCCESS
2975		    && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY,
2976		    (char*)&on, sizeof(on)))
2977			msyslog(LOG_ERR,
2978				"setsockopt IPV6_V6ONLY on fails on address %s: %m",
2979				stoa(addr));
2980#endif
2981#ifdef IPV6_BINDV6ONLY
2982		if (setsockopt(fd, IPPROTO_IPV6, IPV6_BINDV6ONLY,
2983		    (char*)&on, sizeof(on)))
2984			msyslog(LOG_ERR,
2985				"setsockopt IPV6_BINDV6ONLY on fails on address %s: %m",
2986				stoa(addr));
2987#endif
2988	}
2989
2990#ifdef OS_NEEDS_REUSEADDR_FOR_IFADDRBIND
2991	/*
2992	 * some OSes don't allow binding to more specific
2993	 * addresses if a wildcard address already bound
2994	 * to the port and SO_REUSEADDR is not set
2995	 */
2996	if (!is_wildcard_addr(addr))
2997		set_wildcard_reuse(AF(addr), 1);
2998#endif
2999
3000	/*
3001	 * bind the local address.
3002	 */
3003	errval = bind(fd, &addr->sa, SOCKLEN(addr));
3004
3005#ifdef OS_NEEDS_REUSEADDR_FOR_IFADDRBIND
3006	if (!is_wildcard_addr(addr))
3007		set_wildcard_reuse(AF(addr), 0);
3008#endif
3009
3010	if (errval < 0) {
3011		/*
3012		 * Don't log this under all conditions
3013		 */
3014		if (turn_off_reuse == 0
3015#ifdef DEBUG
3016		    || debug > 1
3017#endif
3018		    ) {
3019			msyslog(LOG_ERR,
3020				"bind(%d) AF_INET%s %s#%d%s flags 0x%x failed: %m",
3021				fd, IS_IPV6(addr) ? "6" : "",
3022				stoa(addr), SRCPORT(addr),
3023				IS_MCAST(addr) ? " (multicast)" : "",
3024				interf->flags);
3025		}
3026
3027		closesocket(fd);
3028
3029		return INVALID_SOCKET;
3030	}
3031
3032#ifdef HAVE_TIMESTAMP
3033	{
3034		if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMP,
3035			       (char*)&on, sizeof(on)))
3036			msyslog(LOG_DEBUG,
3037				"setsockopt SO_TIMESTAMP on fails on address %s: %m",
3038				stoa(addr));
3039		else
3040			DPRINTF(4, ("setsockopt SO_TIMESTAMP enabled on fd %d address %s\n",
3041				    fd, stoa(addr)));
3042	}
3043#endif
3044#ifdef HAVE_TIMESTAMPNS
3045	{
3046		if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS,
3047			       (char*)&on, sizeof(on)))
3048			msyslog(LOG_DEBUG,
3049				"setsockopt SO_TIMESTAMPNS on fails on address %s: %m",
3050				stoa(addr));
3051		else
3052			DPRINTF(4, ("setsockopt SO_TIMESTAMPNS enabled on fd %d address %s\n",
3053				    fd, stoa(addr)));
3054	}
3055#endif
3056#ifdef HAVE_BINTIME
3057	{
3058		if (setsockopt(fd, SOL_SOCKET, SO_BINTIME,
3059			       (char*)&on, sizeof(on)))
3060			msyslog(LOG_DEBUG,
3061				"setsockopt SO_BINTIME on fails on address %s: %m",
3062				stoa(addr));
3063		else
3064			DPRINTF(4, ("setsockopt SO_BINTIME enabled on fd %d address %s\n",
3065				    fd, stoa(addr)));
3066	}
3067#endif
3068
3069	DPRINTF(4, ("bind(%d) AF_INET%s, addr %s%%%d#%d, flags 0x%x\n",
3070		   fd, IS_IPV6(addr) ? "6" : "", stoa(addr),
3071		   SCOPE(addr), SRCPORT(addr), interf->flags));
3072
3073	make_socket_nonblocking(fd);
3074
3075#ifdef HAVE_SIGNALED_IO
3076	init_socket_sig(fd);
3077#endif /* not HAVE_SIGNALED_IO */
3078
3079	add_fd_to_list(fd, FD_TYPE_SOCKET);
3080
3081#if !defined(SYS_WINNT) && !defined(VMS)
3082	DPRINTF(4, ("flags for fd %d: 0x%x\n", fd,
3083		    fcntl(fd, F_GETFL, 0)));
3084#endif /* SYS_WINNT || VMS */
3085
3086#if defined (HAVE_IO_COMPLETION_PORT)
3087/*
3088 * Add the socket to the completion port
3089 */
3090	if (io_completion_port_add_socket(fd, interf)) {
3091		msyslog(LOG_ERR, "unable to set up io completion port - EXITING");
3092		exit(1);
3093	}
3094#endif
3095	return fd;
3096}
3097
3098
3099#ifdef SYS_WINNT
3100#define sendto(fd, buf, len, flags, dest, destsz)	\
3101	io_completion_port_sendto(fd, buf, len, (sockaddr_u *)(dest))
3102#endif
3103
3104/* XXX ELIMINATE sendpkt similar in ntpq.c, ntpdc.c, ntp_io.c, ntptrace.c */
3105/*
3106 * sendpkt - send a packet to the specified destination. Maintain a
3107 * send error cache so that only the first consecutive error for a
3108 * destination is logged.
3109 */
3110void
3111sendpkt(
3112	sockaddr_u *		dest,
3113	struct interface *	ep,
3114	int			ttl,
3115	struct pkt *		pkt,
3116	int			len
3117	)
3118{
3119	endpt *	src;
3120	int	ismcast;
3121	int	cc;
3122	int	rc;
3123	u_char	cttl;
3124
3125	ismcast = IS_MCAST(dest);
3126	if (!ismcast)
3127		src = ep;
3128	else
3129		src = (IS_IPV4(dest))
3130			  ? mc4_list
3131			  : mc6_list;
3132
3133	if (NULL == src) {
3134		/*
3135		 * unbound peer - drop request and wait for better
3136		 * network conditions
3137		 */
3138		DPRINTF(2, ("%ssendpkt(dst=%s, ttl=%d, len=%d): no interface - IGNORED\n",
3139			    ismcast ? "\tMCAST\t***** " : "",
3140			    stoa(dest), ttl, len));
3141		return;
3142	}
3143
3144	do {
3145		DPRINTF(2, ("%ssendpkt(%d, dst=%s, src=%s, ttl=%d, len=%d)\n",
3146			    ismcast ? "\tMCAST\t***** " : "", src->fd,
3147			    stoa(dest), stoa(&src->sin), ttl, len));
3148#ifdef MCAST
3149		/*
3150		 * for the moment we use the bcast option to set multicast ttl
3151		 */
3152		if (ismcast && ttl > 0 && ttl != src->last_ttl) {
3153			/*
3154			 * set the multicast ttl for outgoing packets
3155			 */
3156			switch (AF(&src->sin)) {
3157
3158			case AF_INET :
3159				cttl = (u_char)ttl;
3160				rc = setsockopt(src->fd, IPPROTO_IP,
3161						IP_MULTICAST_TTL,
3162						(void *)&cttl,
3163						sizeof(cttl));
3164				break;
3165
3166# ifdef INCLUDE_IPV6_SUPPORT
3167			case AF_INET6 :
3168				rc = setsockopt(src->fd, IPPROTO_IPV6,
3169						 IPV6_MULTICAST_HOPS,
3170						 (void *)&ttl,
3171						 sizeof(ttl));
3172				break;
3173# endif	/* INCLUDE_IPV6_SUPPORT */
3174
3175			default:
3176				rc = 0;
3177			}
3178
3179			if (!rc)
3180				src->last_ttl = ttl;
3181			else
3182				msyslog(LOG_ERR,
3183					"setsockopt IP_MULTICAST_TTL/IPV6_MULTICAST_HOPS fails on address %s: %m",
3184					stoa(&src->sin));
3185		}
3186#endif	/* MCAST */
3187
3188#ifdef SIM
3189		cc = simulate_server(dest, src, pkt);
3190#else
3191		cc = sendto(src->fd, (char *)pkt, (u_int)len, 0,
3192			    &dest->sa, SOCKLEN(dest));
3193#endif
3194		if (cc == -1) {
3195			src->notsent++;
3196			packets_notsent++;
3197		} else	{
3198			src->sent++;
3199			packets_sent++;
3200		}
3201		if (ismcast)
3202			src = src->mclink;
3203	} while (ismcast && src != NULL);
3204}
3205
3206
3207#if !defined(HAVE_IO_COMPLETION_PORT)
3208/*
3209 * fdbits - generate ascii representation of fd_set (FAU debug support)
3210 * HFDF format - highest fd first.
3211 */
3212static char *
3213fdbits(
3214	int count,
3215	fd_set *set
3216	)
3217{
3218	static char buffer[256];
3219	char * buf = buffer;
3220
3221	count = min(count,  255);
3222
3223	while (count >= 0) {
3224		*buf++ = FD_ISSET(count, set) ? '#' : '-';
3225		count--;
3226	}
3227	*buf = '\0';
3228
3229	return buffer;
3230}
3231
3232
3233#ifdef REFCLOCK
3234/*
3235 * Routine to read the refclock packets for a specific interface
3236 * Return the number of bytes read. That way we know if we should
3237 * read it again or go on to the next one if no bytes returned
3238 */
3239static inline int
3240read_refclock_packet(
3241	SOCKET			fd,
3242	struct refclockio *	rp,
3243	l_fp			ts
3244	)
3245{
3246	u_int			read_count;
3247	int			buflen;
3248	int			saved_errno;
3249	int			consumed;
3250	struct recvbuf *	rb;
3251
3252	rb = get_free_recv_buffer();
3253
3254	if (NULL == rb) {
3255		/*
3256		 * No buffer space available - just drop the packet
3257		 */
3258		char buf[RX_BUFF_SIZE];
3259
3260		buflen = read(fd, buf, sizeof buf);
3261		packets_dropped++;
3262		return (buflen);
3263	}
3264
3265	/* TALOS-CAN-0064: avoid signed/unsigned clashes that can lead
3266	 * to buffer overrun and memory corruption
3267	 */
3268	if (rp->datalen <= 0 || rp->datalen > sizeof(rb->recv_space))
3269		read_count = sizeof(rb->recv_space);
3270	else
3271		read_count = (u_int)rp->datalen;
3272	do {
3273		buflen = read(fd, (char *)&rb->recv_space, read_count);
3274	} while (buflen < 0 && EINTR == errno);
3275
3276	if (buflen <= 0) {
3277		saved_errno = errno;
3278		freerecvbuf(rb);
3279		errno = saved_errno;
3280		return buflen;
3281	}
3282
3283	/*
3284	 * Got one. Mark how and when it got here,
3285	 * put it on the full list and do bookkeeping.
3286	 */
3287	rb->recv_length = buflen;
3288	rb->recv_peer = rp->srcclock;
3289	rb->dstadr = 0;
3290	rb->fd = fd;
3291	rb->recv_time = ts;
3292	rb->receiver = rp->clock_recv;
3293
3294	consumed = indicate_refclock_packet(rp, rb);
3295	if (!consumed) {
3296		rp->recvcount++;
3297		packets_received++;
3298	}
3299
3300	return buflen;
3301}
3302#endif	/* REFCLOCK */
3303
3304
3305#ifdef HAVE_PACKET_TIMESTAMP
3306/*
3307 * extract timestamps from control message buffer
3308 */
3309static l_fp
3310fetch_timestamp(
3311	struct recvbuf *	rb,
3312	struct msghdr *		msghdr,
3313	l_fp			ts
3314	)
3315{
3316	struct cmsghdr *	cmsghdr;
3317#ifdef HAVE_BINTIME
3318	struct bintime *	btp;
3319#endif
3320#ifdef HAVE_TIMESTAMPNS
3321	struct timespec *	tsp;
3322#endif
3323#ifdef HAVE_TIMESTAMP
3324	struct timeval *	tvp;
3325#endif
3326	unsigned long		ticks;
3327	double			fuzz;
3328	l_fp			lfpfuzz;
3329	l_fp			nts;
3330#ifdef DEBUG_TIMING
3331	l_fp			dts;
3332#endif
3333
3334	cmsghdr = CMSG_FIRSTHDR(msghdr);
3335	while (cmsghdr != NULL) {
3336		switch (cmsghdr->cmsg_type)
3337		{
3338#ifdef HAVE_BINTIME
3339		case SCM_BINTIME:
3340#endif  /* HAVE_BINTIME */
3341#ifdef HAVE_TIMESTAMPNS
3342		case SCM_TIMESTAMPNS:
3343#endif	/* HAVE_TIMESTAMPNS */
3344#ifdef HAVE_TIMESTAMP
3345		case SCM_TIMESTAMP:
3346#endif	/* HAVE_TIMESTAMP */
3347#if defined(HAVE_BINTIME) || defined (HAVE_TIMESTAMPNS) || defined(HAVE_TIMESTAMP)
3348			switch (cmsghdr->cmsg_type)
3349			{
3350#ifdef HAVE_BINTIME
3351			case SCM_BINTIME:
3352				btp = (struct bintime *)CMSG_DATA(cmsghdr);
3353				/*
3354				 * bintime documentation is at http://phk.freebsd.dk/pubs/timecounter.pdf
3355				 */
3356				nts.l_i = btp->sec + JAN_1970;
3357				nts.l_uf = (u_int32)(btp->frac >> 32);
3358				if (sys_tick > measured_tick &&
3359				    sys_tick > 1e-9) {
3360					ticks = (unsigned long)(nts.l_uf / (unsigned long)(sys_tick * FRAC));
3361					nts.l_uf = (unsigned long)(ticks * (unsigned long)(sys_tick * FRAC));
3362				}
3363                                DPRINTF(4, ("fetch_timestamp: system bintime network time stamp: %ld.%09lu\n",
3364                                            btp->sec, (unsigned long)((nts.l_uf / FRAC) * 1e9)));
3365				break;
3366#endif  /* HAVE_BINTIME */
3367#ifdef HAVE_TIMESTAMPNS
3368			case SCM_TIMESTAMPNS:
3369				tsp = UA_PTR(struct timespec, CMSG_DATA(cmsghdr));
3370				if (sys_tick > measured_tick &&
3371				    sys_tick > 1e-9) {
3372					ticks = (unsigned long)((tsp->tv_nsec * 1e-9) /
3373						       sys_tick);
3374					tsp->tv_nsec = (long)(ticks * 1e9 *
3375							      sys_tick);
3376				}
3377				DPRINTF(4, ("fetch_timestamp: system nsec network time stamp: %ld.%09ld\n",
3378					    tsp->tv_sec, tsp->tv_nsec));
3379				nts = tspec_stamp_to_lfp(*tsp);
3380				break;
3381#endif	/* HAVE_TIMESTAMPNS */
3382#ifdef HAVE_TIMESTAMP
3383			case SCM_TIMESTAMP:
3384				tvp = (struct timeval *)CMSG_DATA(cmsghdr);
3385				if (sys_tick > measured_tick &&
3386				    sys_tick > 1e-6) {
3387					ticks = (unsigned long)((tvp->tv_usec * 1e-6) /
3388						       sys_tick);
3389					tvp->tv_usec = (long)(ticks * 1e6 *
3390							      sys_tick);
3391				}
3392				DPRINTF(4, ("fetch_timestamp: system usec network time stamp: %jd.%06ld\n",
3393					    (intmax_t)tvp->tv_sec, (long)tvp->tv_usec));
3394				nts = tval_stamp_to_lfp(*tvp);
3395				break;
3396#endif  /* HAVE_TIMESTAMP */
3397			}
3398			fuzz = ntp_random() * 2. / FRAC * sys_fuzz;
3399			DTOLFP(fuzz, &lfpfuzz);
3400			L_ADD(&nts, &lfpfuzz);
3401#ifdef DEBUG_TIMING
3402			dts = ts;
3403			L_SUB(&dts, &nts);
3404			collect_timing(rb, "input processing delay", 1,
3405				       &dts);
3406			DPRINTF(4, ("fetch_timestamp: timestamp delta: %s (incl. fuzz)\n",
3407				    lfptoa(&dts, 9)));
3408#endif	/* DEBUG_TIMING */
3409			ts = nts;  /* network time stamp */
3410			break;
3411#endif	/* HAVE_BINTIME || HAVE_TIMESTAMPNS || HAVE_TIMESTAMP */
3412
3413		default:
3414			DPRINTF(4, ("fetch_timestamp: skipping control message 0x%x\n",
3415				    cmsghdr->cmsg_type));
3416		}
3417		cmsghdr = CMSG_NXTHDR(msghdr, cmsghdr);
3418	}
3419	return ts;
3420}
3421#endif	/* HAVE_PACKET_TIMESTAMP */
3422
3423
3424/*
3425 * Routine to read the network NTP packets for a specific interface
3426 * Return the number of bytes read. That way we know if we should
3427 * read it again or go on to the next one if no bytes returned
3428 */
3429static inline int
3430read_network_packet(
3431	SOCKET			fd,
3432	struct interface *	itf,
3433	l_fp			ts
3434	)
3435{
3436	GETSOCKNAME_SOCKLEN_TYPE fromlen;
3437	int buflen;
3438	register struct recvbuf *rb;
3439#ifdef HAVE_PACKET_TIMESTAMP
3440	struct msghdr msghdr;
3441	struct iovec iovec;
3442	char control[CMSG_BUFSIZE];
3443#endif
3444
3445	/*
3446	 * Get a buffer and read the frame.  If we
3447	 * haven't got a buffer, or this is received
3448	 * on a disallowed socket, just dump the
3449	 * packet.
3450	 */
3451
3452	rb = get_free_recv_buffer();
3453	if (NULL == rb || itf->ignore_packets) {
3454		char buf[RX_BUFF_SIZE];
3455		sockaddr_u from;
3456
3457		if (rb != NULL)
3458			freerecvbuf(rb);
3459
3460		fromlen = sizeof(from);
3461		buflen = recvfrom(fd, buf, sizeof(buf), 0,
3462				  &from.sa, &fromlen);
3463		DPRINTF(4, ("%s on (%lu) fd=%d from %s\n",
3464			(itf->ignore_packets)
3465			    ? "ignore"
3466			    : "drop",
3467			free_recvbuffs(), fd, stoa(&from)));
3468		if (itf->ignore_packets)
3469			packets_ignored++;
3470		else
3471			packets_dropped++;
3472		return (buflen);
3473	}
3474
3475	fromlen = sizeof(rb->recv_srcadr);
3476
3477#ifndef HAVE_PACKET_TIMESTAMP
3478	rb->recv_length = recvfrom(fd, (char *)&rb->recv_space,
3479				   sizeof(rb->recv_space), 0,
3480				   &rb->recv_srcadr.sa, &fromlen);
3481#else
3482	iovec.iov_base        = &rb->recv_space;
3483	iovec.iov_len         = sizeof(rb->recv_space);
3484	msghdr.msg_name       = &rb->recv_srcadr;
3485	msghdr.msg_namelen    = fromlen;
3486	msghdr.msg_iov        = &iovec;
3487	msghdr.msg_iovlen     = 1;
3488	msghdr.msg_control    = (void *)&control;
3489	msghdr.msg_controllen = sizeof(control);
3490	msghdr.msg_flags      = 0;
3491	rb->recv_length       = recvmsg(fd, &msghdr, 0);
3492#endif
3493
3494	buflen = rb->recv_length;
3495
3496	if (buflen == 0 || (buflen == -1 &&
3497	    (EWOULDBLOCK == errno
3498#ifdef EAGAIN
3499	     || EAGAIN == errno
3500#endif
3501	     ))) {
3502		freerecvbuf(rb);
3503		return (buflen);
3504	} else if (buflen < 0) {
3505		msyslog(LOG_ERR, "recvfrom(%s) fd=%d: %m",
3506			stoa(&rb->recv_srcadr), fd);
3507		DPRINTF(5, ("read_network_packet: fd=%d dropped (bad recvfrom)\n",
3508			    fd));
3509		freerecvbuf(rb);
3510		return (buflen);
3511	}
3512
3513	DPRINTF(3, ("read_network_packet: fd=%d length %d from %s\n",
3514		    fd, buflen, stoa(&rb->recv_srcadr)));
3515
3516	/*
3517	** Bug 2672: Some OSes (MacOSX and Linux) don't block spoofed ::1
3518	*/
3519
3520	if (AF_INET6 == itf->family) {
3521		DPRINTF(2, ("Got an IPv6 packet, from <%s> (%d) to <%s> (%d)\n",
3522			stoa(&rb->recv_srcadr),
3523			IN6_IS_ADDR_LOOPBACK(PSOCK_ADDR6(&rb->recv_srcadr)),
3524			stoa(&itf->sin),
3525			!IN6_IS_ADDR_LOOPBACK(PSOCK_ADDR6(&itf->sin))
3526			));
3527
3528		if (   IN6_IS_ADDR_LOOPBACK(PSOCK_ADDR6(&rb->recv_srcadr))
3529		    && !IN6_IS_ADDR_LOOPBACK(PSOCK_ADDR6(&itf->sin))
3530		   ) {
3531			packets_dropped++;
3532			DPRINTF(2, ("DROPPING that packet\n"));
3533			freerecvbuf(rb);
3534			return buflen;
3535		}
3536		DPRINTF(2, ("processing that packet\n"));
3537	}
3538
3539	/*
3540	 * Got one.  Mark how and when it got here,
3541	 * put it on the full list and do bookkeeping.
3542	 */
3543	rb->dstadr = itf;
3544	rb->fd = fd;
3545#ifdef HAVE_PACKET_TIMESTAMP
3546	/* pick up a network time stamp if possible */
3547	ts = fetch_timestamp(rb, &msghdr, ts);
3548#endif
3549	rb->recv_time = ts;
3550	rb->receiver = receive;
3551
3552	add_full_recv_buffer(rb);
3553
3554	itf->received++;
3555	packets_received++;
3556	return (buflen);
3557}
3558
3559/*
3560 * attempt to handle io (select()/signaled IO)
3561 */
3562void
3563io_handler(void)
3564{
3565#  ifndef HAVE_SIGNALED_IO
3566	fd_set rdfdes;
3567	int nfound;
3568
3569	/*
3570	 * Use select() on all on all input fd's for unlimited
3571	 * time.  select() will terminate on SIGALARM or on the
3572	 * reception of input.	Using select() means we can't do
3573	 * robust signal handling and we get a potential race
3574	 * between checking for alarms and doing the select().
3575	 * Mostly harmless, I think.
3576	 */
3577	/*
3578	 * On VMS, I suspect that select() can't be interrupted
3579	 * by a "signal" either, so I take the easy way out and
3580	 * have select() time out after one second.
3581	 * System clock updates really aren't time-critical,
3582	 * and - lacking a hardware reference clock - I have
3583	 * yet to learn about anything else that is.
3584	 */
3585	rdfdes = activefds;
3586#   if !defined(VMS) && !defined(SYS_VXWORKS)
3587	nfound = select(maxactivefd + 1, &rdfdes, NULL,
3588			NULL, NULL);
3589#   else	/* VMS, VxWorks */
3590	/* make select() wake up after one second */
3591	{
3592		struct timeval t1;
3593
3594		t1.tv_sec = 1;
3595		t1.tv_usec = 0;
3596		nfound = select(maxactivefd + 1,
3597				&rdfdes, NULL, NULL,
3598				&t1);
3599	}
3600#   endif	/* VMS, VxWorks */
3601	if (nfound > 0) {
3602		l_fp ts;
3603
3604		get_systime(&ts);
3605
3606		input_handler(&ts);
3607	} else if (nfound == -1 && errno != EINTR) {
3608		msyslog(LOG_ERR, "select() error: %m");
3609	}
3610#   ifdef DEBUG
3611	else if (debug > 4) {
3612		msyslog(LOG_DEBUG, "select(): nfound=%d, error: %m", nfound);
3613	} else {
3614		DPRINTF(3, ("select() returned %d: %m\n", nfound));
3615	}
3616#   endif /* DEBUG */
3617#  else /* HAVE_SIGNALED_IO */
3618	wait_for_signal();
3619#  endif /* HAVE_SIGNALED_IO */
3620}
3621
3622/*
3623 * input_handler - receive packets asynchronously
3624 */
3625static void
3626input_handler(
3627	l_fp *	cts
3628	)
3629{
3630	int		buflen;
3631	int		n;
3632	u_int		idx;
3633	int		doing;
3634	SOCKET		fd;
3635	blocking_child *c;
3636	struct timeval	tvzero;
3637	l_fp		ts;	/* Timestamp at BOselect() gob */
3638#ifdef DEBUG_TIMING
3639	l_fp		ts_e;	/* Timestamp at EOselect() gob */
3640#endif
3641	fd_set		fds;
3642	size_t		select_count;
3643	endpt *		ep;
3644#ifdef REFCLOCK
3645	struct refclockio *rp;
3646	int		saved_errno;
3647	const char *	clk;
3648#endif
3649#ifdef HAS_ROUTING_SOCKET
3650	struct asyncio_reader *	asyncio_reader;
3651	struct asyncio_reader *	next_asyncio_reader;
3652#endif
3653
3654	handler_calls++;
3655	select_count = 0;
3656
3657	/*
3658	 * If we have something to do, freeze a timestamp.
3659	 * See below for the other cases (nothing left to do or error)
3660	 */
3661	ts = *cts;
3662
3663	/*
3664	 * Do a poll to see who has data
3665	 */
3666
3667	fds = activefds;
3668	tvzero.tv_sec = tvzero.tv_usec = 0;
3669
3670	n = select(maxactivefd + 1, &fds, NULL, NULL, &tvzero);
3671
3672	/*
3673	 * If there are no packets waiting just return
3674	 */
3675	if (n < 0) {
3676		int err = errno;
3677		int j, b, prior;
3678		/*
3679		 * extended FAU debugging output
3680		 */
3681		if (err != EINTR)
3682			msyslog(LOG_ERR,
3683				"select(%d, %s, 0L, 0L, &0.0) error: %m",
3684				maxactivefd + 1,
3685				fdbits(maxactivefd, &activefds));
3686		if (err != EBADF)
3687			goto ih_return;
3688		for (j = 0, prior = 0; j <= maxactivefd; j++) {
3689			if (FD_ISSET(j, &activefds)) {
3690				if (-1 != read(j, &b, 0)) {
3691					prior = j;
3692					continue;
3693				}
3694				msyslog(LOG_ERR,
3695					"Removing bad file descriptor %d from select set",
3696					j);
3697				FD_CLR(j, &activefds);
3698				if (j == maxactivefd)
3699					maxactivefd = prior;
3700			}
3701		}
3702		goto ih_return;
3703	}
3704	else if (n == 0)
3705		goto ih_return;
3706
3707	++handler_pkts;
3708
3709#ifdef REFCLOCK
3710	/*
3711	 * Check out the reference clocks first, if any
3712	 */
3713
3714	if (refio != NULL) {
3715		for (rp = refio; rp != NULL; rp = rp->next) {
3716			fd = rp->fd;
3717
3718			if (!FD_ISSET(fd, &fds))
3719				continue;
3720			++select_count;
3721			buflen = read_refclock_packet(fd, rp, ts);
3722			/*
3723			 * The first read must succeed after select()
3724			 * indicates readability, or we've reached
3725			 * a permanent EOF.  http://bugs.ntp.org/1732
3726			 * reported ntpd munching CPU after a USB GPS
3727			 * was unplugged because select was indicating
3728			 * EOF but ntpd didn't remove the descriptor
3729			 * from the activefds set.
3730			 */
3731			if (buflen < 0 && EAGAIN != errno) {
3732				saved_errno = errno;
3733				clk = refnumtoa(&rp->srcclock->srcadr);
3734				errno = saved_errno;
3735				msyslog(LOG_ERR, "%s read: %m", clk);
3736				maintain_activefds(fd, TRUE);
3737			} else if (0 == buflen) {
3738				clk = refnumtoa(&rp->srcclock->srcadr);
3739				msyslog(LOG_ERR, "%s read EOF", clk);
3740				maintain_activefds(fd, TRUE);
3741			} else {
3742				/* drain any remaining refclock input */
3743				do {
3744					buflen = read_refclock_packet(fd, rp, ts);
3745				} while (buflen > 0);
3746			}
3747		}
3748	}
3749#endif /* REFCLOCK */
3750
3751	/*
3752	 * Loop through the interfaces looking for data to read.
3753	 */
3754	for (ep = ep_list; ep != NULL; ep = ep->elink) {
3755		for (doing = 0; doing < 2; doing++) {
3756			if (!doing) {
3757				fd = ep->fd;
3758			} else {
3759				if (!(ep->flags & INT_BCASTOPEN))
3760					break;
3761				fd = ep->bfd;
3762			}
3763			if (fd < 0)
3764				continue;
3765			if (FD_ISSET(fd, &fds))
3766				do {
3767					++select_count;
3768					buflen = read_network_packet(
3769							fd, ep, ts);
3770				} while (buflen > 0);
3771			/* Check more interfaces */
3772		}
3773	}
3774
3775#ifdef HAS_ROUTING_SOCKET
3776	/*
3777	 * scan list of asyncio readers - currently only used for routing sockets
3778	 */
3779	asyncio_reader = asyncio_reader_list;
3780
3781	while (asyncio_reader != NULL) {
3782		/* callback may unlink and free asyncio_reader */
3783		next_asyncio_reader = asyncio_reader->link;
3784		if (FD_ISSET(asyncio_reader->fd, &fds)) {
3785			++select_count;
3786			(*asyncio_reader->receiver)(asyncio_reader);
3787		}
3788		asyncio_reader = next_asyncio_reader;
3789	}
3790#endif /* HAS_ROUTING_SOCKET */
3791
3792	/*
3793	 * Check for a response from a blocking child
3794	 */
3795	for (idx = 0; idx < blocking_children_alloc; idx++) {
3796		c = blocking_children[idx];
3797		if (NULL == c || -1 == c->resp_read_pipe)
3798			continue;
3799		if (FD_ISSET(c->resp_read_pipe, &fds)) {
3800			select_count++;
3801			process_blocking_resp(c);
3802		}
3803	}
3804
3805	/*
3806	 * Done everything from that select.
3807	 * If nothing to do, just return.
3808	 * If an error occurred, complain and return.
3809	 */
3810	if (select_count == 0) { /* We really had nothing to do */
3811#ifdef DEBUG
3812		if (debug)
3813			msyslog(LOG_DEBUG, "input_handler: select() returned 0");
3814#endif /* DEBUG */
3815		goto ih_return;
3816	}
3817	/* We've done our work */
3818#ifdef DEBUG_TIMING
3819	get_systime(&ts_e);
3820	/*
3821	 * (ts_e - ts) is the amount of time we spent
3822	 * processing this gob of file descriptors.  Log
3823	 * it.
3824	 */
3825	L_SUB(&ts_e, &ts);
3826	collect_timing(NULL, "input handler", 1, &ts_e);
3827	if (debug > 3)
3828		msyslog(LOG_DEBUG,
3829			"input_handler: Processed a gob of fd's in %s msec",
3830			lfptoms(&ts_e, 6));
3831#endif /* DEBUG_TIMING */
3832	/* We're done... */
3833    ih_return:
3834	return;
3835}
3836#endif /* !HAVE_IO_COMPLETION_PORT */
3837
3838
3839/*
3840 * find an interface suitable for the src address
3841 */
3842endpt *
3843select_peerinterface(
3844	struct peer *	peer,
3845	sockaddr_u *	srcadr,
3846	endpt *		dstadr
3847	)
3848{
3849	endpt *ep;
3850#ifndef SIM
3851	endpt *wild;
3852
3853	wild = ANY_INTERFACE_CHOOSE(srcadr);
3854
3855	/*
3856	 * Initialize the peer structure and dance the interface jig.
3857	 * Reference clocks step the loopback waltz, the others
3858	 * squaredance around the interface list looking for a buddy. If
3859	 * the dance peters out, there is always the wildcard interface.
3860	 * This might happen in some systems and would preclude proper
3861	 * operation with public key cryptography.
3862	 */
3863	if (ISREFCLOCKADR(srcadr)) {
3864		ep = loopback_interface;
3865	} else if (peer->cast_flags &
3866		   (MDF_BCLNT | MDF_ACAST | MDF_MCAST | MDF_BCAST)) {
3867		ep = findbcastinter(srcadr);
3868		if (ep != NULL)
3869			DPRINTF(4, ("Found *-cast interface %s for address %s\n",
3870				stoa(&ep->sin), stoa(srcadr)));
3871		else
3872			DPRINTF(4, ("No *-cast local address found for address %s\n",
3873				stoa(srcadr)));
3874	} else {
3875		ep = dstadr;
3876		if (NULL == ep)
3877			ep = wild;
3878	}
3879	/*
3880	 * If it is a multicast address, findbcastinter() may not find
3881	 * it.  For unicast, we get to find the interface when dstadr is
3882	 * given to us as the wildcard (ANY_INTERFACE_CHOOSE).  Either
3883	 * way, try a little harder.
3884	 */
3885	if (wild == ep)
3886		ep = findinterface(srcadr);
3887	/*
3888	 * we do not bind to the wildcard interfaces for output
3889	 * as our (network) source address would be undefined and
3890	 * crypto will not work without knowing the own transmit address
3891	 */
3892	if (ep != NULL && INT_WILDCARD & ep->flags)
3893		if (!accept_wildcard_if_for_winnt)
3894			ep = NULL;
3895#else	/* SIM follows */
3896	ep = loopback_interface;
3897#endif
3898
3899	return ep;
3900}
3901
3902
3903/*
3904 * findinterface - find local interface corresponding to address
3905 */
3906endpt *
3907findinterface(
3908	sockaddr_u *addr
3909	)
3910{
3911	endpt *iface;
3912
3913	iface = findlocalinterface(addr, INT_WILDCARD, 0);
3914
3915	if (NULL == iface) {
3916		DPRINTF(4, ("Found no interface for address %s - returning wildcard\n",
3917			    stoa(addr)));
3918
3919		iface = ANY_INTERFACE_CHOOSE(addr);
3920	} else
3921		DPRINTF(4, ("Found interface #%d %s for address %s\n",
3922			    iface->ifnum, iface->name, stoa(addr)));
3923
3924	return iface;
3925}
3926
3927/*
3928 * findlocalinterface - find local interface corresponding to addr,
3929 * which does not have any of flags set.  If bast is nonzero, addr is
3930 * a broadcast address.
3931 *
3932 * This code attempts to find the local sending address for an outgoing
3933 * address by connecting a new socket to destinationaddress:NTP_PORT
3934 * and reading the sockname of the resulting connect.
3935 * the complicated sequence simulates the routing table lookup
3936 * for to first hop without duplicating any of the routing logic into
3937 * ntpd. preferably we would have used an API call - but its not there -
3938 * so this is the best we can do here short of duplicating to entire routing
3939 * logic in ntpd which would be a silly and really unportable thing to do.
3940 *
3941 */
3942static endpt *
3943findlocalinterface(
3944	sockaddr_u *	addr,
3945	int		flags,
3946	int		bcast
3947	)
3948{
3949	GETSOCKNAME_SOCKLEN_TYPE	sockaddrlen;
3950	endpt *				iface;
3951	sockaddr_u			saddr;
3952	SOCKET				s;
3953	int				rtn;
3954	int				on;
3955
3956	DPRINTF(4, ("Finding interface for addr %s in list of addresses\n",
3957		    stoa(addr)));
3958
3959	s = socket(AF(addr), SOCK_DGRAM, 0);
3960	if (INVALID_SOCKET == s)
3961		return NULL;
3962
3963	/*
3964	 * If we are looking for broadcast interface we need to set this
3965	 * socket to allow broadcast
3966	 */
3967	if (bcast) {
3968		on = 1;
3969		if (SOCKET_ERROR == setsockopt(s, SOL_SOCKET,
3970						SO_BROADCAST,
3971						(char *)&on,
3972						sizeof(on))) {
3973			closesocket(s);
3974			return NULL;
3975		}
3976	}
3977
3978	rtn = connect(s, &addr->sa, SOCKLEN(addr));
3979	if (SOCKET_ERROR == rtn) {
3980		closesocket(s);
3981		return NULL;
3982	}
3983
3984	sockaddrlen = sizeof(saddr);
3985	rtn = getsockname(s, &saddr.sa, &sockaddrlen);
3986	closesocket(s);
3987	if (SOCKET_ERROR == rtn)
3988		return NULL;
3989
3990	DPRINTF(4, ("findlocalinterface: kernel maps %s to %s\n",
3991		    stoa(addr), stoa(&saddr)));
3992
3993	iface = getinterface(&saddr, flags);
3994
3995	/*
3996	 * if we didn't find an exact match on saddr, find the closest
3997	 * available local address.  This handles the case of the
3998	 * address suggested by the kernel being excluded by nic rules
3999	 * or the user's -I and -L options to ntpd.
4000	 * See http://bugs.ntp.org/1184 and http://bugs.ntp.org/1683
4001	 * for more background.
4002	 */
4003	if (NULL == iface || iface->ignore_packets)
4004		iface = findclosestinterface(&saddr,
4005					     flags | INT_LOOPBACK);
4006
4007	/* Don't use an interface which will ignore replies */
4008	if (iface != NULL && iface->ignore_packets)
4009		iface = NULL;
4010
4011	return iface;
4012}
4013
4014
4015/*
4016 * findclosestinterface
4017 *
4018 * If there are -I/--interface or -L/novirtualips command-line options,
4019 * or "nic" or "interface" rules in ntp.conf, findlocalinterface() may
4020 * find the kernel's preferred local address for a given peer address is
4021 * administratively unavailable to ntpd, and punt to this routine's more
4022 * expensive search.
4023 *
4024 * Find the numerically closest local address to the one connect()
4025 * suggested.  This matches an address on the same subnet first, as
4026 * needed by Bug 1184, and provides a consistent choice if there are
4027 * multiple feasible local addresses, regardless of the order ntpd
4028 * enumerated them.
4029 */
4030endpt *
4031findclosestinterface(
4032	sockaddr_u *	addr,
4033	int		flags
4034	)
4035{
4036	endpt *		ep;
4037	endpt *		winner;
4038	sockaddr_u	addr_dist;
4039	sockaddr_u	min_dist;
4040
4041	ZERO_SOCK(&min_dist);
4042	winner = NULL;
4043
4044	for (ep = ep_list; ep != NULL; ep = ep->elink) {
4045		if (ep->ignore_packets ||
4046		    AF(addr) != ep->family ||
4047		    flags & ep->flags)
4048			continue;
4049
4050		calc_addr_distance(&addr_dist, addr, &ep->sin);
4051		if (NULL == winner ||
4052		    -1 == cmp_addr_distance(&addr_dist, &min_dist)) {
4053			min_dist = addr_dist;
4054			winner = ep;
4055		}
4056	}
4057	if (NULL == winner)
4058		DPRINTF(4, ("findclosestinterface(%s) failed\n",
4059			    stoa(addr)));
4060	else
4061		DPRINTF(4, ("findclosestinterface(%s) -> %s\n",
4062			    stoa(addr), stoa(&winner->sin)));
4063
4064	return winner;
4065}
4066
4067
4068/*
4069 * calc_addr_distance - calculate the distance between two addresses,
4070 *			the absolute value of the difference between
4071 *			the addresses numerically, stored as an address.
4072 */
4073static void
4074calc_addr_distance(
4075	sockaddr_u *		dist,
4076	const sockaddr_u *	a1,
4077	const sockaddr_u *	a2
4078	)
4079{
4080	u_int32	a1val;
4081	u_int32	a2val;
4082	u_int32	v4dist;
4083	int	found_greater;
4084	int	a1_greater;
4085	int	i;
4086
4087	REQUIRE(AF(a1) == AF(a2));
4088
4089	ZERO_SOCK(dist);
4090	AF(dist) = AF(a1);
4091
4092	/* v4 can be done a bit simpler */
4093	if (IS_IPV4(a1)) {
4094		a1val = SRCADR(a1);
4095		a2val = SRCADR(a2);
4096		v4dist = (a1val > a2val)
4097			     ? a1val - a2val
4098			     : a2val - a1val;
4099		SET_ADDR4(dist, v4dist);
4100
4101		return;
4102	}
4103
4104	found_greater = FALSE;
4105	a1_greater = FALSE;	/* suppress pot. uninit. warning */
4106	for (i = 0; i < (int)sizeof(NSRCADR6(a1)); i++) {
4107		if (!found_greater &&
4108		    NSRCADR6(a1)[i] != NSRCADR6(a2)[i]) {
4109			found_greater = TRUE;
4110			a1_greater = (NSRCADR6(a1)[i] > NSRCADR6(a2)[i]);
4111		}
4112		if (!found_greater) {
4113			NSRCADR6(dist)[i] = 0;
4114		} else {
4115			if (a1_greater)
4116				NSRCADR6(dist)[i] = NSRCADR6(a1)[i] -
4117						    NSRCADR6(a2)[i];
4118			else
4119				NSRCADR6(dist)[i] = NSRCADR6(a2)[i] -
4120						    NSRCADR6(a1)[i];
4121		}
4122	}
4123}
4124
4125
4126/*
4127 * cmp_addr_distance - compare two address distances, returning -1, 0,
4128 *		       1 to indicate their relationship.
4129 */
4130static int
4131cmp_addr_distance(
4132	const sockaddr_u *	d1,
4133	const sockaddr_u *	d2
4134	)
4135{
4136	int	i;
4137
4138	REQUIRE(AF(d1) == AF(d2));
4139
4140	if (IS_IPV4(d1)) {
4141		if (SRCADR(d1) < SRCADR(d2))
4142			return -1;
4143		else if (SRCADR(d1) == SRCADR(d2))
4144			return 0;
4145		else
4146			return 1;
4147	}
4148
4149	for (i = 0; i < (int)sizeof(NSRCADR6(d1)); i++) {
4150		if (NSRCADR6(d1)[i] < NSRCADR6(d2)[i])
4151			return -1;
4152		else if (NSRCADR6(d1)[i] > NSRCADR6(d2)[i])
4153			return 1;
4154	}
4155
4156	return 0;
4157}
4158
4159
4160
4161/*
4162 * fetch an interface structure the matches the
4163 * address and has the given flags NOT set
4164 */
4165endpt *
4166getinterface(
4167	sockaddr_u *	addr,
4168	u_int32		flags
4169	)
4170{
4171	endpt *iface;
4172
4173	iface = find_addr_in_list(addr);
4174
4175	if (iface != NULL && (iface->flags & flags))
4176		iface = NULL;
4177
4178	return iface;
4179}
4180
4181
4182/*
4183 * findbcastinter - find broadcast interface corresponding to address
4184 */
4185endpt *
4186findbcastinter(
4187	sockaddr_u *addr
4188	)
4189{
4190	endpt *	iface;
4191
4192	iface = NULL;
4193#if !defined(MPE) && (defined(SIOCGIFCONF) || defined(SYS_WINNT))
4194	DPRINTF(4, ("Finding broadcast/multicast interface for addr %s in list of addresses\n",
4195		    stoa(addr)));
4196
4197	iface = findlocalinterface(addr, INT_LOOPBACK | INT_WILDCARD,
4198				   1);
4199	if (iface != NULL) {
4200		DPRINTF(4, ("Easily found bcast-/mcast- interface index #%d %s\n",
4201			    iface->ifnum, iface->name));
4202		return iface;
4203	}
4204
4205	/*
4206	 * plan B - try to find something reasonable in our lists in
4207	 * case kernel lookup doesn't help
4208	 */
4209	for (iface = ep_list; iface != NULL; iface = iface->elink) {
4210		if (iface->flags & INT_WILDCARD)
4211			continue;
4212
4213		/* Don't bother with ignored interfaces */
4214		if (iface->ignore_packets)
4215			continue;
4216
4217		/*
4218		 * First look if this is the correct family
4219		 */
4220		if(AF(&iface->sin) != AF(addr))
4221			continue;
4222
4223		/* Skip the loopback addresses */
4224		if (iface->flags & INT_LOOPBACK)
4225			continue;
4226
4227		/*
4228		 * If we are looking to match a multicast address and
4229		 * this interface is one...
4230		 */
4231		if (addr_ismulticast(addr)
4232		    && (iface->flags & INT_MULTICAST)) {
4233#ifdef INCLUDE_IPV6_SUPPORT
4234			/*
4235			 * ...it is the winner unless we're looking for
4236			 * an interface to use for link-local multicast
4237			 * and its address is not link-local.
4238			 */
4239			if (IS_IPV6(addr)
4240			    && IN6_IS_ADDR_MC_LINKLOCAL(PSOCK_ADDR6(addr))
4241			    && !IN6_IS_ADDR_LINKLOCAL(PSOCK_ADDR6(&iface->sin)))
4242				continue;
4243#endif
4244			break;
4245		}
4246
4247		/*
4248		 * We match only those interfaces marked as
4249		 * broadcastable and either the explicit broadcast
4250		 * address or the network portion of the IP address.
4251		 * Sloppy.
4252		 */
4253		if (IS_IPV4(addr)) {
4254			if (SOCK_EQ(&iface->bcast, addr))
4255				break;
4256
4257			if ((NSRCADR(&iface->sin) & NSRCADR(&iface->mask))
4258			    == (NSRCADR(addr)	  & NSRCADR(&iface->mask)))
4259				break;
4260		}
4261#ifdef INCLUDE_IPV6_SUPPORT
4262		else if (IS_IPV6(addr)) {
4263			if (SOCK_EQ(&iface->bcast, addr))
4264				break;
4265
4266			if (SOCK_EQ(netof(&iface->sin), netof(addr)))
4267				break;
4268		}
4269#endif
4270	}
4271#endif /* SIOCGIFCONF */
4272	if (NULL == iface) {
4273		DPRINTF(4, ("No bcast interface found for %s\n",
4274			    stoa(addr)));
4275		iface = ANY_INTERFACE_CHOOSE(addr);
4276	} else {
4277		DPRINTF(4, ("Found bcast-/mcast- interface index #%d %s\n",
4278			    iface->ifnum, iface->name));
4279	}
4280
4281	return iface;
4282}
4283
4284
4285/*
4286 * io_clr_stats - clear I/O module statistics
4287 */
4288void
4289io_clr_stats(void)
4290{
4291	packets_dropped = 0;
4292	packets_ignored = 0;
4293	packets_received = 0;
4294	packets_sent = 0;
4295	packets_notsent = 0;
4296
4297	handler_calls = 0;
4298	handler_pkts = 0;
4299	io_timereset = current_time;
4300}
4301
4302
4303#ifdef REFCLOCK
4304/*
4305 * io_addclock - add a reference clock to the list and arrange that we
4306 *				 get SIGIO interrupts from it.
4307 */
4308int
4309io_addclock(
4310	struct refclockio *rio
4311	)
4312{
4313	BLOCKIO();
4314
4315	/*
4316	 * Stuff the I/O structure in the list and mark the descriptor
4317	 * in use.  There is a harmless (I hope) race condition here.
4318	 */
4319	rio->active = TRUE;
4320
4321# ifdef HAVE_SIGNALED_IO
4322	if (init_clock_sig(rio)) {
4323		UNBLOCKIO();
4324		return 0;
4325	}
4326# elif defined(HAVE_IO_COMPLETION_PORT)
4327	if (io_completion_port_add_clock_io(rio)) {
4328		UNBLOCKIO();
4329		return 0;
4330	}
4331# endif
4332
4333	/*
4334	 * enqueue
4335	 */
4336	LINK_SLIST(refio, rio, next);
4337
4338	/*
4339	 * register fd
4340	 */
4341	add_fd_to_list(rio->fd, FD_TYPE_FILE);
4342
4343	UNBLOCKIO();
4344	return 1;
4345}
4346
4347
4348/*
4349 * io_closeclock - close the clock in the I/O structure given
4350 */
4351void
4352io_closeclock(
4353	struct refclockio *rio
4354	)
4355{
4356	struct refclockio *unlinked;
4357
4358	BLOCKIO();
4359
4360	/*
4361	 * Remove structure from the list
4362	 */
4363	rio->active = FALSE;
4364	UNLINK_SLIST(unlinked, refio, rio, next, struct refclockio);
4365	if (NULL != unlinked) {
4366		purge_recv_buffers_for_fd(rio->fd);
4367		/*
4368		 * Close the descriptor.
4369		 */
4370		close_and_delete_fd_from_list(rio->fd);
4371	}
4372	rio->fd = -1;
4373
4374	UNBLOCKIO();
4375}
4376#endif	/* REFCLOCK */
4377
4378
4379/*
4380 * On NT a SOCKET is an unsigned int so we cannot possibly keep it in
4381 * an array. So we use one of the ISC_LIST functions to hold the
4382 * socket value and use that when we want to enumerate it.
4383 *
4384 * This routine is called by the forked intres child process to close
4385 * all open sockets.  On Windows there's no need as intres runs in
4386 * the same process as a thread.
4387 */
4388#ifndef SYS_WINNT
4389void
4390kill_asyncio(
4391	int	startfd
4392	)
4393{
4394	BLOCKIO();
4395
4396	/*
4397	 * In the child process we do not maintain activefds and
4398	 * maxactivefd.  Zeroing maxactivefd disables code which
4399	 * maintains it in close_and_delete_fd_from_list().
4400	 */
4401	maxactivefd = 0;
4402
4403	while (fd_list != NULL)
4404		close_and_delete_fd_from_list(fd_list->fd);
4405
4406	UNBLOCKIO();
4407}
4408#endif	/* !SYS_WINNT */
4409
4410
4411/*
4412 * Add and delete functions for the list of open sockets
4413 */
4414static void
4415add_fd_to_list(
4416	SOCKET fd,
4417	enum desc_type type
4418	)
4419{
4420	vsock_t *lsock = emalloc(sizeof(*lsock));
4421
4422	lsock->fd = fd;
4423	lsock->type = type;
4424
4425	LINK_SLIST(fd_list, lsock, link);
4426	maintain_activefds(fd, 0);
4427}
4428
4429
4430static void
4431close_and_delete_fd_from_list(
4432	SOCKET fd
4433	)
4434{
4435	vsock_t *lsock;
4436
4437	UNLINK_EXPR_SLIST(lsock, fd_list, fd ==
4438	    UNLINK_EXPR_SLIST_CURRENT()->fd, link, vsock_t);
4439
4440	if (NULL == lsock)
4441		return;
4442
4443	switch (lsock->type) {
4444
4445	case FD_TYPE_SOCKET:
4446		closesocket(lsock->fd);
4447		break;
4448
4449	case FD_TYPE_FILE:
4450		closeserial((int)lsock->fd);
4451		break;
4452
4453	default:
4454		msyslog(LOG_ERR,
4455			"internal error - illegal descriptor type %d - EXITING",
4456			(int)lsock->type);
4457		exit(1);
4458	}
4459
4460	free(lsock);
4461	/*
4462	 * remove from activefds
4463	 */
4464	maintain_activefds(fd, 1);
4465}
4466
4467
4468static void
4469add_addr_to_list(
4470	sockaddr_u *	addr,
4471	endpt *		ep
4472	)
4473{
4474	remaddr_t *laddr;
4475
4476#ifdef DEBUG
4477	if (find_addr_in_list(addr) == NULL) {
4478#endif
4479		/* not there yet - add to list */
4480		laddr = emalloc(sizeof(*laddr));
4481		laddr->addr = *addr;
4482		laddr->ep = ep;
4483
4484		LINK_SLIST(remoteaddr_list, laddr, link);
4485
4486		DPRINTF(4, ("Added addr %s to list of addresses\n",
4487			    stoa(addr)));
4488#ifdef DEBUG
4489	} else
4490		DPRINTF(4, ("WARNING: Attempt to add duplicate addr %s to address list\n",
4491			    stoa(addr)));
4492#endif
4493}
4494
4495
4496static void
4497delete_addr_from_list(
4498	sockaddr_u *addr
4499	)
4500{
4501	remaddr_t *unlinked;
4502
4503	UNLINK_EXPR_SLIST(unlinked, remoteaddr_list, SOCK_EQ(addr,
4504		&(UNLINK_EXPR_SLIST_CURRENT()->addr)), link, remaddr_t);
4505
4506	if (unlinked != NULL) {
4507		DPRINTF(4, ("Deleted addr %s from list of addresses\n",
4508			stoa(addr)));
4509		free(unlinked);
4510	}
4511}
4512
4513
4514static void
4515delete_interface_from_list(
4516	endpt *iface
4517	)
4518{
4519	remaddr_t *unlinked;
4520
4521	for (;;) {
4522		UNLINK_EXPR_SLIST(unlinked, remoteaddr_list, iface ==
4523		    UNLINK_EXPR_SLIST_CURRENT()->ep, link,
4524		    remaddr_t);
4525
4526		if (unlinked == NULL)
4527			break;
4528		DPRINTF(4, ("Deleted addr %s for interface #%d %s from list of addresses\n",
4529			    stoa(&unlinked->addr), iface->ifnum,
4530			    iface->name));
4531		free(unlinked);
4532	}
4533}
4534
4535
4536static struct interface *
4537find_addr_in_list(
4538	sockaddr_u *addr
4539	)
4540{
4541	remaddr_t *entry;
4542
4543	DPRINTF(4, ("Searching for addr %s in list of addresses - ",
4544		    stoa(addr)));
4545
4546	for (entry = remoteaddr_list;
4547	     entry != NULL;
4548	     entry = entry->link)
4549		if (SOCK_EQ(&entry->addr, addr)) {
4550			DPRINTF(4, ("FOUND\n"));
4551			return entry->ep;
4552		}
4553
4554	DPRINTF(4, ("NOT FOUND\n"));
4555	return NULL;
4556}
4557
4558
4559/*
4560 * Find the given address with the all given flags set in the list
4561 */
4562static endpt *
4563find_flagged_addr_in_list(
4564	sockaddr_u *	addr,
4565	u_int32		flags
4566	)
4567{
4568	remaddr_t *entry;
4569
4570	DPRINTF(4, ("Finding addr %s with flags %d in list: ",
4571		    stoa(addr), flags));
4572
4573	for (entry = remoteaddr_list;
4574	     entry != NULL;
4575	     entry = entry->link)
4576
4577		if (SOCK_EQ(&entry->addr, addr)
4578		    && (entry->ep->flags & flags) == flags) {
4579
4580			DPRINTF(4, ("FOUND\n"));
4581			return entry->ep;
4582		}
4583
4584	DPRINTF(4, ("NOT FOUND\n"));
4585	return NULL;
4586}
4587
4588
4589const char *
4590localaddrtoa(
4591	endpt *la
4592	)
4593{
4594	return (NULL == la)
4595		   ? "<null>"
4596		   : stoa(&la->sin);
4597}
4598
4599
4600#ifdef HAS_ROUTING_SOCKET
4601# ifndef UPDATE_GRACE
4602#  define UPDATE_GRACE	2	/* wait UPDATE_GRACE seconds before scanning */
4603# endif
4604
4605static void
4606process_routing_msgs(struct asyncio_reader *reader)
4607{
4608	char buffer[5120];
4609	int cnt, msg_type;
4610#ifdef HAVE_RTNETLINK
4611	struct nlmsghdr *nh;
4612#else
4613	struct rt_msghdr rtm;
4614	char *p;
4615#endif
4616
4617	if (disable_dynamic_updates) {
4618		/*
4619		 * discard ourselves if we are not needed any more
4620		 * usually happens when running unprivileged
4621		 */
4622		remove_asyncio_reader(reader);
4623		delete_asyncio_reader(reader);
4624		return;
4625	}
4626
4627	cnt = read(reader->fd, buffer, sizeof(buffer));
4628
4629	if (cnt < 0) {
4630		if (errno == ENOBUFS) {
4631			msyslog(LOG_ERR,
4632				"routing socket reports: %m");
4633		} else {
4634			msyslog(LOG_ERR,
4635				"routing socket reports: %m - disabling");
4636			remove_asyncio_reader(reader);
4637			delete_asyncio_reader(reader);
4638		}
4639		return;
4640	}
4641
4642	/*
4643	 * process routing message
4644	 */
4645#ifdef HAVE_RTNETLINK
4646	for (nh = UA_PTR(struct nlmsghdr, buffer);
4647	     NLMSG_OK(nh, cnt);
4648	     nh = NLMSG_NEXT(nh, cnt)) {
4649		msg_type = nh->nlmsg_type;
4650#else
4651	for (p = buffer;
4652	     (p + sizeof(struct rt_msghdr)) <= (buffer + cnt);
4653	     p += rtm.rtm_msglen) {
4654		memcpy(&rtm, p, sizeof(rtm));
4655		if (rtm.rtm_version != RTM_VERSION) {
4656			msyslog(LOG_ERR,
4657				"version mismatch (got %d - expected %d) on routing socket - disabling",
4658				rtm.rtm_version, RTM_VERSION);
4659
4660			remove_asyncio_reader(reader);
4661			delete_asyncio_reader(reader);
4662			return;
4663		}
4664		msg_type = rtm.rtm_type;
4665#endif
4666		switch (msg_type) {
4667#ifdef RTM_NEWADDR
4668		case RTM_NEWADDR:
4669#endif
4670#ifdef RTM_DELADDR
4671		case RTM_DELADDR:
4672#endif
4673#ifdef RTM_ADD
4674		case RTM_ADD:
4675#endif
4676#ifdef RTM_DELETE
4677		case RTM_DELETE:
4678#endif
4679#ifdef RTM_REDIRECT
4680		case RTM_REDIRECT:
4681#endif
4682#ifdef RTM_CHANGE
4683		case RTM_CHANGE:
4684#endif
4685#ifdef RTM_LOSING
4686		case RTM_LOSING:
4687#endif
4688#ifdef RTM_IFINFO
4689		case RTM_IFINFO:
4690#endif
4691#ifdef RTM_IFANNOUNCE
4692		case RTM_IFANNOUNCE:
4693#endif
4694#ifdef RTM_NEWLINK
4695		case RTM_NEWLINK:
4696#endif
4697#ifdef RTM_DELLINK
4698		case RTM_DELLINK:
4699#endif
4700#ifdef RTM_NEWROUTE
4701		case RTM_NEWROUTE:
4702#endif
4703#ifdef RTM_DELROUTE
4704		case RTM_DELROUTE:
4705#endif
4706			/*
4707			 * we are keen on new and deleted addresses and
4708			 * if an interface goes up and down or routing
4709			 * changes
4710			 */
4711			DPRINTF(3, ("routing message op = %d: scheduling interface update\n",
4712				    msg_type));
4713			timer_interfacetimeout(current_time + UPDATE_GRACE);
4714			break;
4715#ifdef HAVE_RTNETLINK
4716		case NLMSG_DONE:
4717			/* end of multipart message */
4718			return;
4719#endif
4720		default:
4721			/*
4722			 * the rest doesn't bother us.
4723			 */
4724			DPRINTF(4, ("routing message op = %d: ignored\n",
4725				    msg_type));
4726			break;
4727		}
4728	}
4729}
4730
4731/*
4732 * set up routing notifications
4733 */
4734static void
4735init_async_notifications()
4736{
4737	struct asyncio_reader *reader;
4738#ifdef HAVE_RTNETLINK
4739	int fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
4740	struct sockaddr_nl sa;
4741#else
4742	int fd = socket(PF_ROUTE, SOCK_RAW, 0);
4743#endif
4744	if (fd < 0) {
4745		msyslog(LOG_ERR,
4746			"unable to open routing socket (%m) - using polled interface update");
4747		return;
4748	}
4749
4750	fd = move_fd(fd);
4751#ifdef HAVE_RTNETLINK
4752	ZERO(sa);
4753	sa.nl_family = PF_NETLINK;
4754	sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR
4755		       | RTMGRP_IPV6_IFADDR | RTMGRP_IPV4_ROUTE
4756		       | RTMGRP_IPV4_MROUTE | RTMGRP_IPV6_ROUTE
4757		       | RTMGRP_IPV6_MROUTE;
4758	if (bind(fd, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
4759		msyslog(LOG_ERR,
4760			"bind failed on routing socket (%m) - using polled interface update");
4761		return;
4762	}
4763#endif
4764	make_socket_nonblocking(fd);
4765#if defined(HAVE_SIGNALED_IO)
4766	init_socket_sig(fd);
4767#endif /* HAVE_SIGNALED_IO */
4768
4769	reader = new_asyncio_reader();
4770
4771	reader->fd = fd;
4772	reader->receiver = process_routing_msgs;
4773
4774	add_asyncio_reader(reader, FD_TYPE_SOCKET);
4775	msyslog(LOG_INFO,
4776		"Listening on routing socket on fd #%d for interface updates",
4777		fd);
4778}
4779#else
4780/* HAS_ROUTING_SOCKET not defined */
4781static void
4782init_async_notifications(void)
4783{
4784}
4785#endif
4786
4787