listen_dnsport.c revision 291767
1/*
2 * services/listen_dnsport.c - listen on port 53 for incoming DNS queries.
3 *
4 * Copyright (c) 2007, NLnet Labs. All rights reserved.
5 *
6 * This software is open source.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
14 *
15 * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 *
19 * Neither the name of the NLNET LABS nor the names of its contributors may
20 * be used to endorse or promote products derived from this software without
21 * specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35
36/**
37 * \file
38 *
39 * This file has functions to get queries from clients.
40 */
41#include "config.h"
42#ifdef HAVE_SYS_TYPES_H
43#  include <sys/types.h>
44#endif
45#include <sys/time.h>
46#include "services/listen_dnsport.h"
47#include "services/outside_network.h"
48#include "util/netevent.h"
49#include "util/log.h"
50#include "util/config_file.h"
51#include "util/net_help.h"
52#include "sldns/sbuffer.h"
53
54#ifdef HAVE_NETDB_H
55#include <netdb.h>
56#endif
57#include <fcntl.h>
58
59#ifdef HAVE_SYS_UN_H
60#include <sys/un.h>
61#endif
62
63/** number of queued TCP connections for listen() */
64#define TCP_BACKLOG 256
65
66/**
67 * Debug print of the getaddrinfo returned address.
68 * @param addr: the address returned.
69 */
70static void
71verbose_print_addr(struct addrinfo *addr)
72{
73	if(verbosity >= VERB_ALGO) {
74		char buf[100];
75		void* sinaddr = &((struct sockaddr_in*)addr->ai_addr)->sin_addr;
76#ifdef INET6
77		if(addr->ai_family == AF_INET6)
78			sinaddr = &((struct sockaddr_in6*)addr->ai_addr)->
79				sin6_addr;
80#endif /* INET6 */
81		if(inet_ntop(addr->ai_family, sinaddr, buf,
82			(socklen_t)sizeof(buf)) == 0) {
83			(void)strlcpy(buf, "(null)", sizeof(buf));
84		}
85		buf[sizeof(buf)-1] = 0;
86		verbose(VERB_ALGO, "creating %s%s socket %s %d",
87			addr->ai_socktype==SOCK_DGRAM?"udp":
88			addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto",
89			addr->ai_family==AF_INET?"4":
90			addr->ai_family==AF_INET6?"6":
91			"_otherfam", buf,
92			ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port));
93	}
94}
95
96int
97create_udp_sock(int family, int socktype, struct sockaddr* addr,
98        socklen_t addrlen, int v6only, int* inuse, int* noproto,
99	int rcv, int snd, int listen, int* reuseport, int transparent)
100{
101	int s;
102#if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_USE_MIN_MTU)  || defined(IP_TRANSPARENT)
103	int on=1;
104#endif
105#ifdef IPV6_MTU
106	int mtu = IPV6_MIN_MTU;
107#endif
108#if !defined(SO_RCVBUFFORCE) && !defined(SO_RCVBUF)
109	(void)rcv;
110#endif
111#if !defined(SO_SNDBUFFORCE) && !defined(SO_SNDBUF)
112	(void)snd;
113#endif
114#ifndef IPV6_V6ONLY
115	(void)v6only;
116#endif
117#ifndef IP_TRANSPARENT
118	(void)transparent;
119#endif
120	if((s = socket(family, socktype, 0)) == -1) {
121		*inuse = 0;
122#ifndef USE_WINSOCK
123		if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
124			*noproto = 1;
125			return -1;
126		}
127		log_err("can't create socket: %s", strerror(errno));
128#else
129		if(WSAGetLastError() == WSAEAFNOSUPPORT ||
130			WSAGetLastError() == WSAEPROTONOSUPPORT) {
131			*noproto = 1;
132			return -1;
133		}
134		log_err("can't create socket: %s",
135			wsa_strerror(WSAGetLastError()));
136#endif
137		*noproto = 0;
138		return -1;
139	}
140	if(listen) {
141#ifdef SO_REUSEADDR
142		if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
143			(socklen_t)sizeof(on)) < 0) {
144#ifndef USE_WINSOCK
145			log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
146				strerror(errno));
147			if(errno != ENOSYS) {
148				close(s);
149				*noproto = 0;
150				*inuse = 0;
151				return -1;
152			}
153#else
154			log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
155				wsa_strerror(WSAGetLastError()));
156			closesocket(s);
157			*noproto = 0;
158			*inuse = 0;
159			return -1;
160#endif
161		}
162#endif /* SO_REUSEADDR */
163#ifdef SO_REUSEPORT
164		/* try to set SO_REUSEPORT so that incoming
165		 * queries are distributed evenly among the receiving threads.
166		 * Each thread must have its own socket bound to the same port,
167		 * with SO_REUSEPORT set on each socket.
168		 */
169		if (reuseport && *reuseport &&
170		    setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
171			(socklen_t)sizeof(on)) < 0) {
172#ifdef ENOPROTOOPT
173			if(errno != ENOPROTOOPT || verbosity >= 3)
174				log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
175					strerror(errno));
176#endif
177			/* this option is not essential, we can continue */
178			*reuseport = 0;
179		}
180#else
181		(void)reuseport;
182#endif /* defined(SO_REUSEPORT) */
183#ifdef IP_TRANSPARENT
184		if (transparent &&
185		    setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
186		    (socklen_t)sizeof(on)) < 0) {
187			log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
188			strerror(errno));
189		}
190#endif /* IP_TRANSPARENT */
191	}
192	if(rcv) {
193#ifdef SO_RCVBUF
194		int got;
195		socklen_t slen = (socklen_t)sizeof(got);
196#  ifdef SO_RCVBUFFORCE
197		/* Linux specific: try to use root permission to override
198		 * system limits on rcvbuf. The limit is stored in
199		 * /proc/sys/net/core/rmem_max or sysctl net.core.rmem_max */
200		if(setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv,
201			(socklen_t)sizeof(rcv)) < 0) {
202			if(errno != EPERM) {
203#    ifndef USE_WINSOCK
204				log_err("setsockopt(..., SO_RCVBUFFORCE, "
205					"...) failed: %s", strerror(errno));
206				close(s);
207#    else
208				log_err("setsockopt(..., SO_RCVBUFFORCE, "
209					"...) failed: %s",
210					wsa_strerror(WSAGetLastError()));
211				closesocket(s);
212#    endif
213				*noproto = 0;
214				*inuse = 0;
215				return -1;
216			}
217#  endif /* SO_RCVBUFFORCE */
218			if(setsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv,
219				(socklen_t)sizeof(rcv)) < 0) {
220#  ifndef USE_WINSOCK
221				log_err("setsockopt(..., SO_RCVBUF, "
222					"...) failed: %s", strerror(errno));
223				close(s);
224#  else
225				log_err("setsockopt(..., SO_RCVBUF, "
226					"...) failed: %s",
227					wsa_strerror(WSAGetLastError()));
228				closesocket(s);
229#  endif
230				*noproto = 0;
231				*inuse = 0;
232				return -1;
233			}
234			/* check if we got the right thing or if system
235			 * reduced to some system max.  Warn if so */
236			if(getsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&got,
237				&slen) >= 0 && got < rcv/2) {
238				log_warn("so-rcvbuf %u was not granted. "
239					"Got %u. To fix: start with "
240					"root permissions(linux) or sysctl "
241					"bigger net.core.rmem_max(linux) or "
242					"kern.ipc.maxsockbuf(bsd) values.",
243					(unsigned)rcv, (unsigned)got);
244			}
245#  ifdef SO_RCVBUFFORCE
246		}
247#  endif
248#endif /* SO_RCVBUF */
249	}
250	/* first do RCVBUF as the receive buffer is more important */
251	if(snd) {
252#ifdef SO_SNDBUF
253		int got;
254		socklen_t slen = (socklen_t)sizeof(got);
255#  ifdef SO_SNDBUFFORCE
256		/* Linux specific: try to use root permission to override
257		 * system limits on sndbuf. The limit is stored in
258		 * /proc/sys/net/core/wmem_max or sysctl net.core.wmem_max */
259		if(setsockopt(s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd,
260			(socklen_t)sizeof(snd)) < 0) {
261			if(errno != EPERM) {
262#    ifndef USE_WINSOCK
263				log_err("setsockopt(..., SO_SNDBUFFORCE, "
264					"...) failed: %s", strerror(errno));
265				close(s);
266#    else
267				log_err("setsockopt(..., SO_SNDBUFFORCE, "
268					"...) failed: %s",
269					wsa_strerror(WSAGetLastError()));
270				closesocket(s);
271#    endif
272				*noproto = 0;
273				*inuse = 0;
274				return -1;
275			}
276#  endif /* SO_SNDBUFFORCE */
277			if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&snd,
278				(socklen_t)sizeof(snd)) < 0) {
279#  ifndef USE_WINSOCK
280				log_err("setsockopt(..., SO_SNDBUF, "
281					"...) failed: %s", strerror(errno));
282				close(s);
283#  else
284				log_err("setsockopt(..., SO_SNDBUF, "
285					"...) failed: %s",
286					wsa_strerror(WSAGetLastError()));
287				closesocket(s);
288#  endif
289				*noproto = 0;
290				*inuse = 0;
291				return -1;
292			}
293			/* check if we got the right thing or if system
294			 * reduced to some system max.  Warn if so */
295			if(getsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&got,
296				&slen) >= 0 && got < snd/2) {
297				log_warn("so-sndbuf %u was not granted. "
298					"Got %u. To fix: start with "
299					"root permissions(linux) or sysctl "
300					"bigger net.core.wmem_max(linux) or "
301					"kern.ipc.maxsockbuf(bsd) values.",
302					(unsigned)snd, (unsigned)got);
303			}
304#  ifdef SO_SNDBUFFORCE
305		}
306#  endif
307#endif /* SO_SNDBUF */
308	}
309	if(family == AF_INET6) {
310# if defined(IPV6_V6ONLY)
311		if(v6only) {
312			int val=(v6only==2)?0:1;
313			if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
314				(void*)&val, (socklen_t)sizeof(val)) < 0) {
315#ifndef USE_WINSOCK
316				log_err("setsockopt(..., IPV6_V6ONLY"
317					", ...) failed: %s", strerror(errno));
318				close(s);
319#else
320				log_err("setsockopt(..., IPV6_V6ONLY"
321					", ...) failed: %s",
322					wsa_strerror(WSAGetLastError()));
323				closesocket(s);
324#endif
325				*noproto = 0;
326				*inuse = 0;
327				return -1;
328			}
329		}
330# endif
331# if defined(IPV6_USE_MIN_MTU)
332		/*
333		 * There is no fragmentation of IPv6 datagrams
334		 * during forwarding in the network. Therefore
335		 * we do not send UDP datagrams larger than
336		 * the minimum IPv6 MTU of 1280 octets. The
337		 * EDNS0 message length can be larger if the
338		 * network stack supports IPV6_USE_MIN_MTU.
339		 */
340		if (setsockopt(s, IPPROTO_IPV6, IPV6_USE_MIN_MTU,
341			(void*)&on, (socklen_t)sizeof(on)) < 0) {
342#  ifndef USE_WINSOCK
343			log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
344				"...) failed: %s", strerror(errno));
345			close(s);
346#  else
347			log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
348				"...) failed: %s",
349				wsa_strerror(WSAGetLastError()));
350			closesocket(s);
351#  endif
352			*noproto = 0;
353			*inuse = 0;
354			return -1;
355		}
356# elif defined(IPV6_MTU)
357		/*
358		 * On Linux, to send no larger than 1280, the PMTUD is
359		 * disabled by default for datagrams anyway, so we set
360		 * the MTU to use.
361		 */
362		if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU,
363			(void*)&mtu, (socklen_t)sizeof(mtu)) < 0) {
364#  ifndef USE_WINSOCK
365			log_err("setsockopt(..., IPV6_MTU, ...) failed: %s",
366				strerror(errno));
367			close(s);
368#  else
369			log_err("setsockopt(..., IPV6_MTU, ...) failed: %s",
370				wsa_strerror(WSAGetLastError()));
371			closesocket(s);
372#  endif
373			*noproto = 0;
374			*inuse = 0;
375			return -1;
376		}
377# endif /* IPv6 MTU */
378	} else if(family == AF_INET) {
379#  if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
380/* linux 3.15 has IP_PMTUDISC_OMIT, Hannes Frederic Sowa made it so that
381 * PMTU information is not accepted, but fragmentation is allowed
382 * if and only if the packet size exceeds the outgoing interface MTU
383 * (and also uses the interface mtu to determine the size of the packets).
384 * So there won't be any EMSGSIZE error.  Against DNS fragmentation attacks.
385 * FreeBSD already has same semantics without setting the option. */
386		int omit_set = 0;
387		int action;
388#   if defined(IP_PMTUDISC_OMIT)
389		action = IP_PMTUDISC_OMIT;
390		if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
391			&action, (socklen_t)sizeof(action)) < 0) {
392
393			if (errno != EINVAL) {
394				log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s",
395					strerror(errno));
396
397#    ifndef USE_WINSOCK
398				close(s);
399#    else
400				closesocket(s);
401#    endif
402				*noproto = 0;
403				*inuse = 0;
404				return -1;
405			}
406		}
407		else
408		{
409		    omit_set = 1;
410		}
411#   endif
412		if (omit_set == 0) {
413   			action = IP_PMTUDISC_DONT;
414			if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
415				&action, (socklen_t)sizeof(action)) < 0) {
416				log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s",
417					strerror(errno));
418#    ifndef USE_WINSOCK
419				close(s);
420#    else
421				closesocket(s);
422#    endif
423				*noproto = 0;
424				*inuse = 0;
425				return -1;
426			}
427		}
428#  elif defined(IP_DONTFRAG)
429		int off = 0;
430		if (setsockopt(s, IPPROTO_IP, IP_DONTFRAG,
431			&off, (socklen_t)sizeof(off)) < 0) {
432			log_err("setsockopt(..., IP_DONTFRAG, ...) failed: %s",
433				strerror(errno));
434#    ifndef USE_WINSOCK
435			close(s);
436#    else
437			closesocket(s);
438#    endif
439			*noproto = 0;
440			*inuse = 0;
441			return -1;
442		}
443#  endif /* IPv4 MTU */
444	}
445	if(bind(s, (struct sockaddr*)addr, addrlen) != 0) {
446		*noproto = 0;
447		*inuse = 0;
448#ifndef USE_WINSOCK
449#ifdef EADDRINUSE
450		*inuse = (errno == EADDRINUSE);
451		/* detect freebsd jail with no ipv6 permission */
452		if(family==AF_INET6 && errno==EINVAL)
453			*noproto = 1;
454		else if(errno != EADDRINUSE) {
455			log_err_addr("can't bind socket", strerror(errno),
456				(struct sockaddr_storage*)addr, addrlen);
457		}
458#endif /* EADDRINUSE */
459		close(s);
460#else /* USE_WINSOCK */
461		if(WSAGetLastError() != WSAEADDRINUSE &&
462			WSAGetLastError() != WSAEADDRNOTAVAIL) {
463			log_err_addr("can't bind socket",
464				wsa_strerror(WSAGetLastError()),
465				(struct sockaddr_storage*)addr, addrlen);
466		}
467		closesocket(s);
468#endif
469		return -1;
470	}
471	if(!fd_set_nonblock(s)) {
472		*noproto = 0;
473		*inuse = 0;
474#ifndef USE_WINSOCK
475		close(s);
476#else
477		closesocket(s);
478#endif
479		return -1;
480	}
481	return s;
482}
483
484int
485create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto,
486	int* reuseport, int transparent)
487{
488	int s;
489#if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_V6ONLY) || defined(IP_TRANSPARENT)
490	int on = 1;
491#endif
492#ifndef IP_TRANSPARENT
493	(void)transparent;
494#endif
495	verbose_print_addr(addr);
496	*noproto = 0;
497	if((s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) {
498#ifndef USE_WINSOCK
499		if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
500			*noproto = 1;
501			return -1;
502		}
503		log_err("can't create socket: %s", strerror(errno));
504#else
505		if(WSAGetLastError() == WSAEAFNOSUPPORT ||
506			WSAGetLastError() == WSAEPROTONOSUPPORT) {
507			*noproto = 1;
508			return -1;
509		}
510		log_err("can't create socket: %s",
511			wsa_strerror(WSAGetLastError()));
512#endif
513		return -1;
514	}
515#ifdef SO_REUSEADDR
516	if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
517		(socklen_t)sizeof(on)) < 0) {
518#ifndef USE_WINSOCK
519		log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
520			strerror(errno));
521		close(s);
522#else
523		log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
524			wsa_strerror(WSAGetLastError()));
525		closesocket(s);
526#endif
527		return -1;
528	}
529#endif /* SO_REUSEADDR */
530#ifdef SO_REUSEPORT
531	/* try to set SO_REUSEPORT so that incoming
532	 * connections are distributed evenly among the receiving threads.
533	 * Each thread must have its own socket bound to the same port,
534	 * with SO_REUSEPORT set on each socket.
535	 */
536	if (reuseport && *reuseport &&
537		setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
538		(socklen_t)sizeof(on)) < 0) {
539#ifdef ENOPROTOOPT
540		if(errno != ENOPROTOOPT || verbosity >= 3)
541			log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
542				strerror(errno));
543#endif
544		/* this option is not essential, we can continue */
545		*reuseport = 0;
546	}
547#else
548	(void)reuseport;
549#endif /* defined(SO_REUSEPORT) */
550#if defined(IPV6_V6ONLY)
551	if(addr->ai_family == AF_INET6 && v6only) {
552		if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
553			(void*)&on, (socklen_t)sizeof(on)) < 0) {
554#ifndef USE_WINSOCK
555			log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
556				strerror(errno));
557			close(s);
558#else
559			log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
560				wsa_strerror(WSAGetLastError()));
561			closesocket(s);
562#endif
563			return -1;
564		}
565	}
566#else
567	(void)v6only;
568#endif /* IPV6_V6ONLY */
569#ifdef IP_TRANSPARENT
570	if (transparent &&
571	    setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
572	    (socklen_t)sizeof(on)) < 0) {
573		log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
574			strerror(errno));
575	}
576#endif /* IP_TRANSPARENT */
577	if(bind(s, addr->ai_addr, addr->ai_addrlen) != 0) {
578#ifndef USE_WINSOCK
579		/* detect freebsd jail with no ipv6 permission */
580		if(addr->ai_family==AF_INET6 && errno==EINVAL)
581			*noproto = 1;
582		else {
583			log_err_addr("can't bind socket", strerror(errno),
584				(struct sockaddr_storage*)addr->ai_addr,
585				addr->ai_addrlen);
586		}
587		close(s);
588#else
589		log_err_addr("can't bind socket",
590			wsa_strerror(WSAGetLastError()),
591			(struct sockaddr_storage*)addr->ai_addr,
592			addr->ai_addrlen);
593		closesocket(s);
594#endif
595		return -1;
596	}
597	if(!fd_set_nonblock(s)) {
598#ifndef USE_WINSOCK
599		close(s);
600#else
601		closesocket(s);
602#endif
603		return -1;
604	}
605	if(listen(s, TCP_BACKLOG) == -1) {
606#ifndef USE_WINSOCK
607		log_err("can't listen: %s", strerror(errno));
608		close(s);
609#else
610		log_err("can't listen: %s", wsa_strerror(WSAGetLastError()));
611		closesocket(s);
612#endif
613		return -1;
614	}
615	return s;
616}
617
618int
619create_local_accept_sock(const char *path, int* noproto)
620{
621#ifdef HAVE_SYS_UN_H
622	int s;
623	struct sockaddr_un usock;
624
625	verbose(VERB_ALGO, "creating unix socket %s", path);
626#ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
627	/* this member exists on BSDs, not Linux */
628	usock.sun_len = (socklen_t)sizeof(usock);
629#endif
630	usock.sun_family = AF_LOCAL;
631	/* length is 92-108, 104 on FreeBSD */
632	(void)strlcpy(usock.sun_path, path, sizeof(usock.sun_path));
633
634	if ((s = socket(AF_LOCAL, SOCK_STREAM, 0)) == -1) {
635		log_err("Cannot create local socket %s (%s)",
636			path, strerror(errno));
637		return -1;
638	}
639
640	if (unlink(path) && errno != ENOENT) {
641		/* The socket already exists and cannot be removed */
642		log_err("Cannot remove old local socket %s (%s)",
643			path, strerror(errno));
644		return -1;
645	}
646
647	if (bind(s, (struct sockaddr *)&usock,
648		(socklen_t)sizeof(struct sockaddr_un)) == -1) {
649		log_err("Cannot bind local socket %s (%s)",
650			path, strerror(errno));
651		return -1;
652	}
653
654	if (!fd_set_nonblock(s)) {
655		log_err("Cannot set non-blocking mode");
656		return -1;
657	}
658
659	if (listen(s, TCP_BACKLOG) == -1) {
660		log_err("can't listen: %s", strerror(errno));
661		return -1;
662	}
663
664	(void)noproto; /*unused*/
665	return s;
666#else
667	(void)path;
668	log_err("Local sockets are not supported");
669	*noproto = 1;
670	return -1;
671#endif
672}
673
674
675/**
676 * Create socket from getaddrinfo results
677 */
678static int
679make_sock(int stype, const char* ifname, const char* port,
680	struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
681	int* reuseport, int transparent)
682{
683	struct addrinfo *res = NULL;
684	int r, s, inuse, noproto;
685	hints->ai_socktype = stype;
686	*noip6 = 0;
687	if((r=getaddrinfo(ifname, port, hints, &res)) != 0 || !res) {
688#ifdef USE_WINSOCK
689		if(r == EAI_NONAME && hints->ai_family == AF_INET6){
690			*noip6 = 1; /* 'Host not found' for IP6 on winXP */
691			return -1;
692		}
693#endif
694		log_err("node %s:%s getaddrinfo: %s %s",
695			ifname?ifname:"default", port, gai_strerror(r),
696#ifdef EAI_SYSTEM
697			r==EAI_SYSTEM?(char*)strerror(errno):""
698#else
699			""
700#endif
701		);
702		return -1;
703	}
704	if(stype == SOCK_DGRAM) {
705		verbose_print_addr(res);
706		s = create_udp_sock(res->ai_family, res->ai_socktype,
707			(struct sockaddr*)res->ai_addr, res->ai_addrlen,
708			v6only, &inuse, &noproto, (int)rcv, (int)snd, 1,
709			reuseport, transparent);
710		if(s == -1 && inuse) {
711			log_err("bind: address already in use");
712		} else if(s == -1 && noproto && hints->ai_family == AF_INET6){
713			*noip6 = 1;
714		}
715	} else	{
716		s = create_tcp_accept_sock(res, v6only, &noproto, reuseport,
717			transparent);
718		if(s == -1 && noproto && hints->ai_family == AF_INET6){
719			*noip6 = 1;
720		}
721	}
722	freeaddrinfo(res);
723	return s;
724}
725
726/** make socket and first see if ifname contains port override info */
727static int
728make_sock_port(int stype, const char* ifname, const char* port,
729	struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
730	int* reuseport, int transparent)
731{
732	char* s = strchr(ifname, '@');
733	if(s) {
734		/* override port with ifspec@port */
735		char p[16];
736		char newif[128];
737		if((size_t)(s-ifname) >= sizeof(newif)) {
738			log_err("ifname too long: %s", ifname);
739			*noip6 = 0;
740			return -1;
741		}
742		if(strlen(s+1) >= sizeof(p)) {
743			log_err("portnumber too long: %s", ifname);
744			*noip6 = 0;
745			return -1;
746		}
747		(void)strlcpy(newif, ifname, sizeof(newif));
748		newif[s-ifname] = 0;
749		(void)strlcpy(p, s+1, sizeof(p));
750		p[strlen(s+1)]=0;
751		return make_sock(stype, newif, p, hints, v6only, noip6,
752			rcv, snd, reuseport, transparent);
753	}
754	return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd,
755		reuseport, transparent);
756}
757
758/**
759 * Add port to open ports list.
760 * @param list: list head. changed.
761 * @param s: fd.
762 * @param ftype: if fd is UDP.
763 * @return false on failure. list in unchanged then.
764 */
765static int
766port_insert(struct listen_port** list, int s, enum listen_type ftype)
767{
768	struct listen_port* item = (struct listen_port*)malloc(
769		sizeof(struct listen_port));
770	if(!item)
771		return 0;
772	item->next = *list;
773	item->fd = s;
774	item->ftype = ftype;
775	*list = item;
776	return 1;
777}
778
779/** set fd to receive source address packet info */
780static int
781set_recvpktinfo(int s, int family)
782{
783#if defined(IPV6_RECVPKTINFO) || defined(IPV6_PKTINFO) || (defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)) || defined(IP_PKTINFO)
784	int on = 1;
785#else
786	(void)s;
787#endif
788	if(family == AF_INET6) {
789#           ifdef IPV6_RECVPKTINFO
790		if(setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO,
791			(void*)&on, (socklen_t)sizeof(on)) < 0) {
792			log_err("setsockopt(..., IPV6_RECVPKTINFO, ...) failed: %s",
793				strerror(errno));
794			return 0;
795		}
796#           elif defined(IPV6_PKTINFO)
797		if(setsockopt(s, IPPROTO_IPV6, IPV6_PKTINFO,
798			(void*)&on, (socklen_t)sizeof(on)) < 0) {
799			log_err("setsockopt(..., IPV6_PKTINFO, ...) failed: %s",
800				strerror(errno));
801			return 0;
802		}
803#           else
804		log_err("no IPV6_RECVPKTINFO and no IPV6_PKTINFO option, please "
805			"disable interface-automatic in config");
806		return 0;
807#           endif /* defined IPV6_RECVPKTINFO */
808
809	} else if(family == AF_INET) {
810#           ifdef IP_PKTINFO
811		if(setsockopt(s, IPPROTO_IP, IP_PKTINFO,
812			(void*)&on, (socklen_t)sizeof(on)) < 0) {
813			log_err("setsockopt(..., IP_PKTINFO, ...) failed: %s",
814				strerror(errno));
815			return 0;
816		}
817#           elif defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)
818		if(setsockopt(s, IPPROTO_IP, IP_RECVDSTADDR,
819			(void*)&on, (socklen_t)sizeof(on)) < 0) {
820			log_err("setsockopt(..., IP_RECVDSTADDR, ...) failed: %s",
821				strerror(errno));
822			return 0;
823		}
824#           else
825		log_err("no IP_SENDSRCADDR or IP_PKTINFO option, please disable "
826			"interface-automatic in config");
827		return 0;
828#           endif /* IP_PKTINFO */
829
830	}
831	return 1;
832}
833
834/**
835 * Helper for ports_open. Creates one interface (or NULL for default).
836 * @param ifname: The interface ip address.
837 * @param do_auto: use automatic interface detection.
838 * 	If enabled, then ifname must be the wildcard name.
839 * @param do_udp: if udp should be used.
840 * @param do_tcp: if udp should be used.
841 * @param hints: for getaddrinfo. family and flags have to be set by caller.
842 * @param port: Port number to use (as string).
843 * @param list: list of open ports, appended to, changed to point to list head.
844 * @param rcv: receive buffer size for UDP
845 * @param snd: send buffer size for UDP
846 * @param ssl_port: ssl service port number
847 * @param reuseport: try to set SO_REUSEPORT if nonNULL and true.
848 * 	set to false on exit if reuseport failed due to no kernel support.
849 * @param transparent: set IP_TRANSPARENT socket option.
850 * @return: returns false on error.
851 */
852static int
853ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp,
854	struct addrinfo *hints, const char* port, struct listen_port** list,
855	size_t rcv, size_t snd, int ssl_port, int* reuseport, int transparent)
856{
857	int s, noip6=0;
858	if(!do_udp && !do_tcp)
859		return 0;
860	if(do_auto) {
861		if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
862			&noip6, rcv, snd, reuseport, transparent)) == -1) {
863			if(noip6) {
864				log_warn("IPv6 protocol not available");
865				return 1;
866			}
867			return 0;
868		}
869		/* getting source addr packet info is highly non-portable */
870		if(!set_recvpktinfo(s, hints->ai_family)) {
871#ifndef USE_WINSOCK
872			close(s);
873#else
874			closesocket(s);
875#endif
876			return 0;
877		}
878		if(!port_insert(list, s, listen_type_udpancil)) {
879#ifndef USE_WINSOCK
880			close(s);
881#else
882			closesocket(s);
883#endif
884			return 0;
885		}
886	} else if(do_udp) {
887		/* regular udp socket */
888		if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
889			&noip6, rcv, snd, reuseport, transparent)) == -1) {
890			if(noip6) {
891				log_warn("IPv6 protocol not available");
892				return 1;
893			}
894			return 0;
895		}
896		if(!port_insert(list, s, listen_type_udp)) {
897#ifndef USE_WINSOCK
898			close(s);
899#else
900			closesocket(s);
901#endif
902			return 0;
903		}
904	}
905	if(do_tcp) {
906		int is_ssl = ((strchr(ifname, '@') &&
907			atoi(strchr(ifname, '@')+1) == ssl_port) ||
908			(!strchr(ifname, '@') && atoi(port) == ssl_port));
909		if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1,
910			&noip6, 0, 0, reuseport, transparent)) == -1) {
911			if(noip6) {
912				/*log_warn("IPv6 protocol not available");*/
913				return 1;
914			}
915			return 0;
916		}
917		if(is_ssl)
918			verbose(VERB_ALGO, "setup TCP for SSL service");
919		if(!port_insert(list, s, is_ssl?listen_type_ssl:
920			listen_type_tcp)) {
921#ifndef USE_WINSOCK
922			close(s);
923#else
924			closesocket(s);
925#endif
926			return 0;
927		}
928	}
929	return 1;
930}
931
932/**
933 * Add items to commpoint list in front.
934 * @param c: commpoint to add.
935 * @param front: listen struct.
936 * @return: false on failure.
937 */
938static int
939listen_cp_insert(struct comm_point* c, struct listen_dnsport* front)
940{
941	struct listen_list* item = (struct listen_list*)malloc(
942		sizeof(struct listen_list));
943	if(!item)
944		return 0;
945	item->com = c;
946	item->next = front->cps;
947	front->cps = item;
948	return 1;
949}
950
951struct listen_dnsport*
952listen_create(struct comm_base* base, struct listen_port* ports,
953	size_t bufsize, int tcp_accept_count, void* sslctx,
954	struct dt_env* dtenv, comm_point_callback_t* cb, void *cb_arg)
955{
956	struct listen_dnsport* front = (struct listen_dnsport*)
957		malloc(sizeof(struct listen_dnsport));
958	if(!front)
959		return NULL;
960	front->cps = NULL;
961	front->udp_buff = sldns_buffer_new(bufsize);
962	if(!front->udp_buff) {
963		free(front);
964		return NULL;
965	}
966
967	/* create comm points as needed */
968	while(ports) {
969		struct comm_point* cp = NULL;
970		if(ports->ftype == listen_type_udp)
971			cp = comm_point_create_udp(base, ports->fd,
972				front->udp_buff, cb, cb_arg);
973		else if(ports->ftype == listen_type_tcp)
974			cp = comm_point_create_tcp(base, ports->fd,
975				tcp_accept_count, bufsize, cb, cb_arg);
976		else if(ports->ftype == listen_type_ssl) {
977			cp = comm_point_create_tcp(base, ports->fd,
978				tcp_accept_count, bufsize, cb, cb_arg);
979			cp->ssl = sslctx;
980		} else if(ports->ftype == listen_type_udpancil)
981			cp = comm_point_create_udp_ancil(base, ports->fd,
982				front->udp_buff, cb, cb_arg);
983		if(!cp) {
984			log_err("can't create commpoint");
985			listen_delete(front);
986			return NULL;
987		}
988		cp->dtenv = dtenv;
989		cp->do_not_close = 1;
990		if(!listen_cp_insert(cp, front)) {
991			log_err("malloc failed");
992			comm_point_delete(cp);
993			listen_delete(front);
994			return NULL;
995		}
996		ports = ports->next;
997	}
998	if(!front->cps) {
999		log_err("Could not open sockets to accept queries.");
1000		listen_delete(front);
1001		return NULL;
1002	}
1003
1004	return front;
1005}
1006
1007void
1008listen_list_delete(struct listen_list* list)
1009{
1010	struct listen_list *p = list, *pn;
1011	while(p) {
1012		pn = p->next;
1013		comm_point_delete(p->com);
1014		free(p);
1015		p = pn;
1016	}
1017}
1018
1019void
1020listen_delete(struct listen_dnsport* front)
1021{
1022	if(!front)
1023		return;
1024	listen_list_delete(front->cps);
1025	sldns_buffer_free(front->udp_buff);
1026	free(front);
1027}
1028
1029struct listen_port*
1030listening_ports_open(struct config_file* cfg, int* reuseport)
1031{
1032	struct listen_port* list = NULL;
1033	struct addrinfo hints;
1034	int i, do_ip4, do_ip6;
1035	int do_tcp, do_auto;
1036	char portbuf[32];
1037	snprintf(portbuf, sizeof(portbuf), "%d", cfg->port);
1038	do_ip4 = cfg->do_ip4;
1039	do_ip6 = cfg->do_ip6;
1040	do_tcp = cfg->do_tcp;
1041	do_auto = cfg->if_automatic && cfg->do_udp;
1042	if(cfg->incoming_num_tcp == 0)
1043		do_tcp = 0;
1044
1045	/* getaddrinfo */
1046	memset(&hints, 0, sizeof(hints));
1047	hints.ai_flags = AI_PASSIVE;
1048	/* no name lookups on our listening ports */
1049	if(cfg->num_ifs > 0)
1050		hints.ai_flags |= AI_NUMERICHOST;
1051	hints.ai_family = AF_UNSPEC;
1052#ifndef INET6
1053	do_ip6 = 0;
1054#endif
1055	if(!do_ip4 && !do_ip6) {
1056		return NULL;
1057	}
1058	/* create ip4 and ip6 ports so that return addresses are nice. */
1059	if(do_auto || cfg->num_ifs == 0) {
1060		if(do_ip6) {
1061			hints.ai_family = AF_INET6;
1062			if(!ports_create_if(do_auto?"::0":"::1",
1063				do_auto, cfg->do_udp, do_tcp,
1064				&hints, portbuf, &list,
1065				cfg->so_rcvbuf, cfg->so_sndbuf,
1066				cfg->ssl_port, reuseport,
1067				cfg->ip_transparent)) {
1068				listening_ports_free(list);
1069				return NULL;
1070			}
1071		}
1072		if(do_ip4) {
1073			hints.ai_family = AF_INET;
1074			if(!ports_create_if(do_auto?"0.0.0.0":"127.0.0.1",
1075				do_auto, cfg->do_udp, do_tcp,
1076				&hints, portbuf, &list,
1077				cfg->so_rcvbuf, cfg->so_sndbuf,
1078				cfg->ssl_port, reuseport,
1079				cfg->ip_transparent)) {
1080				listening_ports_free(list);
1081				return NULL;
1082			}
1083		}
1084	} else for(i = 0; i<cfg->num_ifs; i++) {
1085		if(str_is_ip6(cfg->ifs[i])) {
1086			if(!do_ip6)
1087				continue;
1088			hints.ai_family = AF_INET6;
1089			if(!ports_create_if(cfg->ifs[i], 0, cfg->do_udp,
1090				do_tcp, &hints, portbuf, &list,
1091				cfg->so_rcvbuf, cfg->so_sndbuf,
1092				cfg->ssl_port, reuseport,
1093				cfg->ip_transparent)) {
1094				listening_ports_free(list);
1095				return NULL;
1096			}
1097		} else {
1098			if(!do_ip4)
1099				continue;
1100			hints.ai_family = AF_INET;
1101			if(!ports_create_if(cfg->ifs[i], 0, cfg->do_udp,
1102				do_tcp, &hints, portbuf, &list,
1103				cfg->so_rcvbuf, cfg->so_sndbuf,
1104				cfg->ssl_port, reuseport,
1105				cfg->ip_transparent)) {
1106				listening_ports_free(list);
1107				return NULL;
1108			}
1109		}
1110	}
1111	return list;
1112}
1113
1114void listening_ports_free(struct listen_port* list)
1115{
1116	struct listen_port* nx;
1117	while(list) {
1118		nx = list->next;
1119		if(list->fd != -1) {
1120#ifndef USE_WINSOCK
1121			close(list->fd);
1122#else
1123			closesocket(list->fd);
1124#endif
1125		}
1126		free(list);
1127		list = nx;
1128	}
1129}
1130
1131size_t listen_get_mem(struct listen_dnsport* listen)
1132{
1133	size_t s = sizeof(*listen) + sizeof(*listen->base) +
1134		sizeof(*listen->udp_buff) +
1135		sldns_buffer_capacity(listen->udp_buff);
1136	struct listen_list* p;
1137	for(p = listen->cps; p; p = p->next) {
1138		s += sizeof(*p);
1139		s += comm_point_get_mem(p->com);
1140	}
1141	return s;
1142}
1143
1144void listen_stop_accept(struct listen_dnsport* listen)
1145{
1146	/* do not stop the ones that have no tcp_free list
1147	 * (they have already stopped listening) */
1148	struct listen_list* p;
1149	for(p=listen->cps; p; p=p->next) {
1150		if(p->com->type == comm_tcp_accept &&
1151			p->com->tcp_free != NULL) {
1152			comm_point_stop_listening(p->com);
1153		}
1154	}
1155}
1156
1157void listen_start_accept(struct listen_dnsport* listen)
1158{
1159	/* do not start the ones that have no tcp_free list, it is no
1160	 * use to listen to them because they have no free tcp handlers */
1161	struct listen_list* p;
1162	for(p=listen->cps; p; p=p->next) {
1163		if(p->com->type == comm_tcp_accept &&
1164			p->com->tcp_free != NULL) {
1165			comm_point_start_listening(p->com, -1, -1);
1166		}
1167	}
1168}
1169
1170