1/* 2 * services/listen_dnsport.c - listen on port 53 for incoming DNS queries. 3 * 4 * Copyright (c) 2007, NLnet Labs. All rights reserved. 5 * 6 * This software is open source. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * Redistributions of source code must retain the above copyright notice, 13 * this list of conditions and the following disclaimer. 14 * 15 * Redistributions in binary form must reproduce the above copyright notice, 16 * this list of conditions and the following disclaimer in the documentation 17 * and/or other materials provided with the distribution. 18 * 19 * Neither the name of the NLNET LABS nor the names of its contributors may 20 * be used to endorse or promote products derived from this software without 21 * specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36/** 37 * \file 38 * 39 * This file has functions to get queries from clients. 40 */ 41#include "config.h" 42#ifdef HAVE_SYS_TYPES_H 43# include <sys/types.h> 44#endif 45#include <sys/time.h> 46#include <limits.h> 47#ifdef USE_TCP_FASTOPEN 48#include <netinet/tcp.h> 49#endif 50#include <ctype.h> 51#include "services/listen_dnsport.h" 52#include "services/outside_network.h" 53#include "util/netevent.h" 54#include "util/log.h" 55#include "util/config_file.h" 56#include "util/net_help.h" 57#include "sldns/sbuffer.h" 58#include "sldns/parseutil.h" 59#include "services/mesh.h" 60#include "util/fptr_wlist.h" 61#include "util/locks.h" 62 63#ifdef HAVE_NETDB_H 64#include <netdb.h> 65#endif 66#include <fcntl.h> 67 68#ifdef HAVE_SYS_UN_H 69#include <sys/un.h> 70#endif 71 72#ifdef HAVE_SYSTEMD 73#include <systemd/sd-daemon.h> 74#endif 75 76#ifdef HAVE_IFADDRS_H 77#include <ifaddrs.h> 78#endif 79#ifdef HAVE_NET_IF_H 80#include <net/if.h> 81#endif 82#ifdef HAVE_LINUX_NET_TSTAMP_H 83#include <linux/net_tstamp.h> 84#endif 85/** number of queued TCP connections for listen() */ 86#define TCP_BACKLOG 256 87 88#ifndef THREADS_DISABLED 89/** lock on the counter of stream buffer memory */ 90static lock_basic_type stream_wait_count_lock; 91/** lock on the counter of HTTP2 query buffer memory */ 92static lock_basic_type http2_query_buffer_count_lock; 93/** lock on the counter of HTTP2 response buffer memory */ 94static lock_basic_type http2_response_buffer_count_lock; 95#endif 96/** size (in bytes) of stream wait buffers */ 97static size_t stream_wait_count = 0; 98/** is the lock initialised for stream wait buffers */ 99static int stream_wait_lock_inited = 0; 100/** size (in bytes) of HTTP2 query buffers */ 101static size_t http2_query_buffer_count = 0; 102/** is the lock initialised for HTTP2 query buffers */ 103static int http2_query_buffer_lock_inited = 0; 104/** size (in bytes) of HTTP2 response buffers */ 105static size_t http2_response_buffer_count = 0; 106/** is the lock initialised for HTTP2 response buffers */ 107static int http2_response_buffer_lock_inited = 0; 108 109/** 110 * Debug print of the getaddrinfo returned address. 111 * @param addr: the address returned. 112 */ 113static void 114verbose_print_addr(struct addrinfo *addr) 115{ 116 if(verbosity >= VERB_ALGO) { 117 char buf[100]; 118 void* sinaddr = &((struct sockaddr_in*)addr->ai_addr)->sin_addr; 119#ifdef INET6 120 if(addr->ai_family == AF_INET6) 121 sinaddr = &((struct sockaddr_in6*)addr->ai_addr)-> 122 sin6_addr; 123#endif /* INET6 */ 124 if(inet_ntop(addr->ai_family, sinaddr, buf, 125 (socklen_t)sizeof(buf)) == 0) { 126 (void)strlcpy(buf, "(null)", sizeof(buf)); 127 } 128 buf[sizeof(buf)-1] = 0; 129 verbose(VERB_ALGO, "creating %s%s socket %s %d", 130 addr->ai_socktype==SOCK_DGRAM?"udp": 131 addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto", 132 addr->ai_family==AF_INET?"4": 133 addr->ai_family==AF_INET6?"6": 134 "_otherfam", buf, 135 ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port)); 136 } 137} 138 139void 140verbose_print_unbound_socket(struct unbound_socket* ub_sock) 141{ 142 if(verbosity >= VERB_ALGO) { 143 char buf[256]; 144 log_info("listing of unbound_socket structure:"); 145 addr_to_str((void*)ub_sock->addr, ub_sock->addrlen, buf, 146 sizeof(buf)); 147 log_info("%s s is: %d, fam is: %s, acl: %s", buf, ub_sock->s, 148 ub_sock->fam == AF_INET?"AF_INET":"AF_INET6", 149 ub_sock->acl?"yes":"no"); 150 } 151} 152 153#ifdef HAVE_SYSTEMD 154static int 155systemd_get_activated(int family, int socktype, int listen, 156 struct sockaddr *addr, socklen_t addrlen, 157 const char *path) 158{ 159 int i = 0; 160 int r = 0; 161 int s = -1; 162 const char* listen_pid, *listen_fds; 163 164 /* We should use "listen" option only for stream protocols. For UDP it should be -1 */ 165 166 if((r = sd_booted()) < 1) { 167 if(r == 0) 168 log_warn("systemd is not running"); 169 else 170 log_err("systemd sd_booted(): %s", strerror(-r)); 171 return -1; 172 } 173 174 listen_pid = getenv("LISTEN_PID"); 175 listen_fds = getenv("LISTEN_FDS"); 176 177 if (!listen_pid) { 178 log_warn("Systemd mandatory ENV variable is not defined: LISTEN_PID"); 179 return -1; 180 } 181 182 if (!listen_fds) { 183 log_warn("Systemd mandatory ENV variable is not defined: LISTEN_FDS"); 184 return -1; 185 } 186 187 if((r = sd_listen_fds(0)) < 1) { 188 if(r == 0) 189 log_warn("systemd: did not return socket, check unit configuration"); 190 else 191 log_err("systemd sd_listen_fds(): %s", strerror(-r)); 192 return -1; 193 } 194 195 for(i = 0; i < r; i++) { 196 if(sd_is_socket(SD_LISTEN_FDS_START + i, family, socktype, listen)) { 197 s = SD_LISTEN_FDS_START + i; 198 break; 199 } 200 } 201 if (s == -1) { 202 if (addr) 203 log_err_addr("systemd sd_listen_fds()", 204 "no such socket", 205 (struct sockaddr_storage *)addr, addrlen); 206 else 207 log_err("systemd sd_listen_fds(): %s", path); 208 } 209 return s; 210} 211#endif 212 213int 214create_udp_sock(int family, int socktype, struct sockaddr* addr, 215 socklen_t addrlen, int v6only, int* inuse, int* noproto, 216 int rcv, int snd, int listen, int* reuseport, int transparent, 217 int freebind, int use_systemd, int dscp) 218{ 219 int s; 220 char* err; 221#if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_USE_MIN_MTU) || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined (SO_BINDANY) 222 int on=1; 223#endif 224#ifdef IPV6_MTU 225 int mtu = IPV6_MIN_MTU; 226#endif 227#if !defined(SO_RCVBUFFORCE) && !defined(SO_RCVBUF) 228 (void)rcv; 229#endif 230#if !defined(SO_SNDBUFFORCE) && !defined(SO_SNDBUF) 231 (void)snd; 232#endif 233#ifndef IPV6_V6ONLY 234 (void)v6only; 235#endif 236#if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY) 237 (void)transparent; 238#endif 239#if !defined(IP_FREEBIND) 240 (void)freebind; 241#endif 242#ifdef HAVE_SYSTEMD 243 int got_fd_from_systemd = 0; 244 245 if (!use_systemd 246 || (use_systemd 247 && (s = systemd_get_activated(family, socktype, -1, addr, 248 addrlen, NULL)) == -1)) { 249#else 250 (void)use_systemd; 251#endif 252 if((s = socket(family, socktype, 0)) == -1) { 253 *inuse = 0; 254#ifndef USE_WINSOCK 255 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) { 256 *noproto = 1; 257 return -1; 258 } 259#else 260 if(WSAGetLastError() == WSAEAFNOSUPPORT || 261 WSAGetLastError() == WSAEPROTONOSUPPORT) { 262 *noproto = 1; 263 return -1; 264 } 265#endif 266 log_err("can't create socket: %s", sock_strerror(errno)); 267 *noproto = 0; 268 return -1; 269 } 270#ifdef HAVE_SYSTEMD 271 } else { 272 got_fd_from_systemd = 1; 273 } 274#endif 275 if(listen) { 276#ifdef SO_REUSEADDR 277 if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on, 278 (socklen_t)sizeof(on)) < 0) { 279 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s", 280 sock_strerror(errno)); 281#ifndef USE_WINSOCK 282 if(errno != ENOSYS) { 283 close(s); 284 *noproto = 0; 285 *inuse = 0; 286 return -1; 287 } 288#else 289 closesocket(s); 290 *noproto = 0; 291 *inuse = 0; 292 return -1; 293#endif 294 } 295#endif /* SO_REUSEADDR */ 296#ifdef SO_REUSEPORT 297# ifdef SO_REUSEPORT_LB 298 /* on FreeBSD 12 we have SO_REUSEPORT_LB that does loadbalance 299 * like SO_REUSEPORT on Linux. This is what the users want 300 * with the config option in unbound.conf; if we actually 301 * need local address and port reuse they'll also need to 302 * have SO_REUSEPORT set for them, assume it was _LB they want. 303 */ 304 if (reuseport && *reuseport && 305 setsockopt(s, SOL_SOCKET, SO_REUSEPORT_LB, (void*)&on, 306 (socklen_t)sizeof(on)) < 0) { 307#ifdef ENOPROTOOPT 308 if(errno != ENOPROTOOPT || verbosity >= 3) 309 log_warn("setsockopt(.. SO_REUSEPORT_LB ..) failed: %s", 310 strerror(errno)); 311#endif 312 /* this option is not essential, we can continue */ 313 *reuseport = 0; 314 } 315# else /* no SO_REUSEPORT_LB */ 316 317 /* try to set SO_REUSEPORT so that incoming 318 * queries are distributed evenly among the receiving threads. 319 * Each thread must have its own socket bound to the same port, 320 * with SO_REUSEPORT set on each socket. 321 */ 322 if (reuseport && *reuseport && 323 setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on, 324 (socklen_t)sizeof(on)) < 0) { 325#ifdef ENOPROTOOPT 326 if(errno != ENOPROTOOPT || verbosity >= 3) 327 log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s", 328 strerror(errno)); 329#endif 330 /* this option is not essential, we can continue */ 331 *reuseport = 0; 332 } 333# endif /* SO_REUSEPORT_LB */ 334#else 335 (void)reuseport; 336#endif /* defined(SO_REUSEPORT) */ 337#ifdef IP_TRANSPARENT 338 if (transparent && 339 setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on, 340 (socklen_t)sizeof(on)) < 0) { 341 log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s", 342 strerror(errno)); 343 } 344#elif defined(IP_BINDANY) 345 if (transparent && 346 setsockopt(s, (family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP), 347 (family == AF_INET6? IPV6_BINDANY:IP_BINDANY), 348 (void*)&on, (socklen_t)sizeof(on)) < 0) { 349 log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s", 350 (family==AF_INET6?"V6":""), strerror(errno)); 351 } 352#elif defined(SO_BINDANY) 353 if (transparent && 354 setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on, 355 (socklen_t)sizeof(on)) < 0) { 356 log_warn("setsockopt(.. SO_BINDANY ..) failed: %s", 357 strerror(errno)); 358 } 359#endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */ 360 } 361#ifdef IP_FREEBIND 362 if(freebind && 363 setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on, 364 (socklen_t)sizeof(on)) < 0) { 365 log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s", 366 strerror(errno)); 367 } 368#endif /* IP_FREEBIND */ 369 if(rcv) { 370#ifdef SO_RCVBUF 371 int got; 372 socklen_t slen = (socklen_t)sizeof(got); 373# ifdef SO_RCVBUFFORCE 374 /* Linux specific: try to use root permission to override 375 * system limits on rcvbuf. The limit is stored in 376 * /proc/sys/net/core/rmem_max or sysctl net.core.rmem_max */ 377 if(setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv, 378 (socklen_t)sizeof(rcv)) < 0) { 379 if(errno != EPERM) { 380 log_err("setsockopt(..., SO_RCVBUFFORCE, " 381 "...) failed: %s", sock_strerror(errno)); 382 sock_close(s); 383 *noproto = 0; 384 *inuse = 0; 385 return -1; 386 } 387# endif /* SO_RCVBUFFORCE */ 388 if(setsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv, 389 (socklen_t)sizeof(rcv)) < 0) { 390 log_err("setsockopt(..., SO_RCVBUF, " 391 "...) failed: %s", sock_strerror(errno)); 392 sock_close(s); 393 *noproto = 0; 394 *inuse = 0; 395 return -1; 396 } 397 /* check if we got the right thing or if system 398 * reduced to some system max. Warn if so */ 399 if(getsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&got, 400 &slen) >= 0 && got < rcv/2) { 401 log_warn("so-rcvbuf %u was not granted. " 402 "Got %u. To fix: start with " 403 "root permissions(linux) or sysctl " 404 "bigger net.core.rmem_max(linux) or " 405 "kern.ipc.maxsockbuf(bsd) values.", 406 (unsigned)rcv, (unsigned)got); 407 } 408# ifdef SO_RCVBUFFORCE 409 } 410# endif 411#endif /* SO_RCVBUF */ 412 } 413 /* first do RCVBUF as the receive buffer is more important */ 414 if(snd) { 415#ifdef SO_SNDBUF 416 int got; 417 socklen_t slen = (socklen_t)sizeof(got); 418# ifdef SO_SNDBUFFORCE 419 /* Linux specific: try to use root permission to override 420 * system limits on sndbuf. The limit is stored in 421 * /proc/sys/net/core/wmem_max or sysctl net.core.wmem_max */ 422 if(setsockopt(s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd, 423 (socklen_t)sizeof(snd)) < 0) { 424 if(errno != EPERM) { 425 log_err("setsockopt(..., SO_SNDBUFFORCE, " 426 "...) failed: %s", sock_strerror(errno)); 427 sock_close(s); 428 *noproto = 0; 429 *inuse = 0; 430 return -1; 431 } 432# endif /* SO_SNDBUFFORCE */ 433 if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&snd, 434 (socklen_t)sizeof(snd)) < 0) { 435 log_err("setsockopt(..., SO_SNDBUF, " 436 "...) failed: %s", sock_strerror(errno)); 437 sock_close(s); 438 *noproto = 0; 439 *inuse = 0; 440 return -1; 441 } 442 /* check if we got the right thing or if system 443 * reduced to some system max. Warn if so */ 444 if(getsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&got, 445 &slen) >= 0 && got < snd/2) { 446 log_warn("so-sndbuf %u was not granted. " 447 "Got %u. To fix: start with " 448 "root permissions(linux) or sysctl " 449 "bigger net.core.wmem_max(linux) or " 450 "kern.ipc.maxsockbuf(bsd) values.", 451 (unsigned)snd, (unsigned)got); 452 } 453# ifdef SO_SNDBUFFORCE 454 } 455# endif 456#endif /* SO_SNDBUF */ 457 } 458 err = set_ip_dscp(s, family, dscp); 459 if(err != NULL) 460 log_warn("error setting IP DiffServ codepoint %d on UDP socket: %s", dscp, err); 461 if(family == AF_INET6) { 462# if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) 463 int omit6_set = 0; 464 int action; 465# endif 466# if defined(IPV6_V6ONLY) 467 if(v6only 468# ifdef HAVE_SYSTEMD 469 /* Systemd wants to control if the socket is v6 only 470 * or both, with BindIPv6Only=default, ipv6-only or 471 * both in systemd.socket, so it is not set here. */ 472 && !got_fd_from_systemd 473# endif 474 ) { 475 int val=(v6only==2)?0:1; 476 if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, 477 (void*)&val, (socklen_t)sizeof(val)) < 0) { 478 log_err("setsockopt(..., IPV6_V6ONLY" 479 ", ...) failed: %s", sock_strerror(errno)); 480 sock_close(s); 481 *noproto = 0; 482 *inuse = 0; 483 return -1; 484 } 485 } 486# endif 487# if defined(IPV6_USE_MIN_MTU) 488 /* 489 * There is no fragmentation of IPv6 datagrams 490 * during forwarding in the network. Therefore 491 * we do not send UDP datagrams larger than 492 * the minimum IPv6 MTU of 1280 octets. The 493 * EDNS0 message length can be larger if the 494 * network stack supports IPV6_USE_MIN_MTU. 495 */ 496 if (setsockopt(s, IPPROTO_IPV6, IPV6_USE_MIN_MTU, 497 (void*)&on, (socklen_t)sizeof(on)) < 0) { 498 log_err("setsockopt(..., IPV6_USE_MIN_MTU, " 499 "...) failed: %s", sock_strerror(errno)); 500 sock_close(s); 501 *noproto = 0; 502 *inuse = 0; 503 return -1; 504 } 505# elif defined(IPV6_MTU) 506# ifndef USE_WINSOCK 507 /* 508 * On Linux, to send no larger than 1280, the PMTUD is 509 * disabled by default for datagrams anyway, so we set 510 * the MTU to use. 511 */ 512 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU, 513 (void*)&mtu, (socklen_t)sizeof(mtu)) < 0) { 514 log_err("setsockopt(..., IPV6_MTU, ...) failed: %s", 515 sock_strerror(errno)); 516 sock_close(s); 517 *noproto = 0; 518 *inuse = 0; 519 return -1; 520 } 521# elif defined(IPV6_USER_MTU) 522 /* As later versions of the mingw crosscompiler define 523 * IPV6_MTU, do the same for windows but use IPV6_USER_MTU 524 * instead which is writable; IPV6_MTU is readonly there. */ 525 if (setsockopt(s, IPPROTO_IPV6, IPV6_USER_MTU, 526 (void*)&mtu, (socklen_t)sizeof(mtu)) < 0) { 527 if (WSAGetLastError() != WSAENOPROTOOPT) { 528 log_err("setsockopt(..., IPV6_USER_MTU, ...) failed: %s", 529 wsa_strerror(WSAGetLastError())); 530 sock_close(s); 531 *noproto = 0; 532 *inuse = 0; 533 return -1; 534 } 535 } 536# endif /* USE_WINSOCK */ 537# endif /* IPv6 MTU */ 538# if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) 539# if defined(IP_PMTUDISC_OMIT) 540 action = IP_PMTUDISC_OMIT; 541 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER, 542 &action, (socklen_t)sizeof(action)) < 0) { 543 544 if (errno != EINVAL) { 545 log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s", 546 strerror(errno)); 547 sock_close(s); 548 *noproto = 0; 549 *inuse = 0; 550 return -1; 551 } 552 } 553 else 554 { 555 omit6_set = 1; 556 } 557# endif 558 if (omit6_set == 0) { 559 action = IP_PMTUDISC_DONT; 560 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER, 561 &action, (socklen_t)sizeof(action)) < 0) { 562 log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s", 563 strerror(errno)); 564 sock_close(s); 565 *noproto = 0; 566 *inuse = 0; 567 return -1; 568 } 569 } 570# endif /* IPV6_MTU_DISCOVER */ 571 } else if(family == AF_INET) { 572# if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) 573/* linux 3.15 has IP_PMTUDISC_OMIT, Hannes Frederic Sowa made it so that 574 * PMTU information is not accepted, but fragmentation is allowed 575 * if and only if the packet size exceeds the outgoing interface MTU 576 * (and also uses the interface mtu to determine the size of the packets). 577 * So there won't be any EMSGSIZE error. Against DNS fragmentation attacks. 578 * FreeBSD already has same semantics without setting the option. */ 579 int omit_set = 0; 580 int action; 581# if defined(IP_PMTUDISC_OMIT) 582 action = IP_PMTUDISC_OMIT; 583 if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER, 584 &action, (socklen_t)sizeof(action)) < 0) { 585 586 if (errno != EINVAL) { 587 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s", 588 strerror(errno)); 589 sock_close(s); 590 *noproto = 0; 591 *inuse = 0; 592 return -1; 593 } 594 } 595 else 596 { 597 omit_set = 1; 598 } 599# endif 600 if (omit_set == 0) { 601 action = IP_PMTUDISC_DONT; 602 if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER, 603 &action, (socklen_t)sizeof(action)) < 0) { 604 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s", 605 strerror(errno)); 606 sock_close(s); 607 *noproto = 0; 608 *inuse = 0; 609 return -1; 610 } 611 } 612# elif defined(IP_DONTFRAG) && !defined(__APPLE__) 613 /* the IP_DONTFRAG option if defined in the 11.0 OSX headers, 614 * but does not work on that version, so we exclude it */ 615 /* a nonzero value disables fragmentation, according to 616 * docs.oracle.com for ip(4). */ 617 int off = 1; 618 if (setsockopt(s, IPPROTO_IP, IP_DONTFRAG, 619 &off, (socklen_t)sizeof(off)) < 0) { 620 log_err("setsockopt(..., IP_DONTFRAG, ...) failed: %s", 621 strerror(errno)); 622 sock_close(s); 623 *noproto = 0; 624 *inuse = 0; 625 return -1; 626 } 627# endif /* IPv4 MTU */ 628 } 629 if( 630#ifdef HAVE_SYSTEMD 631 !got_fd_from_systemd && 632#endif 633 bind(s, (struct sockaddr*)addr, addrlen) != 0) { 634 *noproto = 0; 635 *inuse = 0; 636#ifndef USE_WINSOCK 637#ifdef EADDRINUSE 638 *inuse = (errno == EADDRINUSE); 639 /* detect freebsd jail with no ipv6 permission */ 640 if(family==AF_INET6 && errno==EINVAL) 641 *noproto = 1; 642 else if(errno != EADDRINUSE && 643 !(errno == EACCES && verbosity < 4 && !listen) 644#ifdef EADDRNOTAVAIL 645 && !(errno == EADDRNOTAVAIL && verbosity < 4 && !listen) 646#endif 647 ) { 648 log_err_addr("can't bind socket", strerror(errno), 649 (struct sockaddr_storage*)addr, addrlen); 650 } 651#endif /* EADDRINUSE */ 652#else /* USE_WINSOCK */ 653 if(WSAGetLastError() != WSAEADDRINUSE && 654 WSAGetLastError() != WSAEADDRNOTAVAIL && 655 !(WSAGetLastError() == WSAEACCES && verbosity < 4 && !listen)) { 656 log_err_addr("can't bind socket", 657 wsa_strerror(WSAGetLastError()), 658 (struct sockaddr_storage*)addr, addrlen); 659 } 660#endif /* USE_WINSOCK */ 661 sock_close(s); 662 return -1; 663 } 664 if(!fd_set_nonblock(s)) { 665 *noproto = 0; 666 *inuse = 0; 667 sock_close(s); 668 return -1; 669 } 670 return s; 671} 672 673int 674create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto, 675 int* reuseport, int transparent, int mss, int nodelay, int freebind, 676 int use_systemd, int dscp) 677{ 678 int s; 679 char* err; 680#if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_V6ONLY) || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined(SO_BINDANY) 681 int on = 1; 682#endif 683#ifdef HAVE_SYSTEMD 684 int got_fd_from_systemd = 0; 685#endif 686#ifdef USE_TCP_FASTOPEN 687 int qlen; 688#endif 689#if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY) 690 (void)transparent; 691#endif 692#if !defined(IP_FREEBIND) 693 (void)freebind; 694#endif 695 verbose_print_addr(addr); 696 *noproto = 0; 697#ifdef HAVE_SYSTEMD 698 if (!use_systemd || 699 (use_systemd 700 && (s = systemd_get_activated(addr->ai_family, addr->ai_socktype, 1, 701 addr->ai_addr, addr->ai_addrlen, 702 NULL)) == -1)) { 703#else 704 (void)use_systemd; 705#endif 706 if((s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) { 707#ifndef USE_WINSOCK 708 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) { 709 *noproto = 1; 710 return -1; 711 } 712#else 713 if(WSAGetLastError() == WSAEAFNOSUPPORT || 714 WSAGetLastError() == WSAEPROTONOSUPPORT) { 715 *noproto = 1; 716 return -1; 717 } 718#endif 719 log_err("can't create socket: %s", sock_strerror(errno)); 720 return -1; 721 } 722 if(nodelay) { 723#if defined(IPPROTO_TCP) && defined(TCP_NODELAY) 724 if(setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (void*)&on, 725 (socklen_t)sizeof(on)) < 0) { 726 #ifndef USE_WINSOCK 727 log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s", 728 strerror(errno)); 729 #else 730 log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s", 731 wsa_strerror(WSAGetLastError())); 732 #endif 733 } 734#else 735 log_warn(" setsockopt(TCP_NODELAY) unsupported"); 736#endif /* defined(IPPROTO_TCP) && defined(TCP_NODELAY) */ 737 } 738 if (mss > 0) { 739#if defined(IPPROTO_TCP) && defined(TCP_MAXSEG) 740 if(setsockopt(s, IPPROTO_TCP, TCP_MAXSEG, (void*)&mss, 741 (socklen_t)sizeof(mss)) < 0) { 742 log_err(" setsockopt(.. TCP_MAXSEG ..) failed: %s", 743 sock_strerror(errno)); 744 } else { 745 verbose(VERB_ALGO, 746 " tcp socket mss set to %d", mss); 747 } 748#else 749 log_warn(" setsockopt(TCP_MAXSEG) unsupported"); 750#endif /* defined(IPPROTO_TCP) && defined(TCP_MAXSEG) */ 751 } 752#ifdef HAVE_SYSTEMD 753 } else { 754 got_fd_from_systemd = 1; 755 } 756#endif 757#ifdef SO_REUSEADDR 758 if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on, 759 (socklen_t)sizeof(on)) < 0) { 760 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s", 761 sock_strerror(errno)); 762 sock_close(s); 763 return -1; 764 } 765#endif /* SO_REUSEADDR */ 766#ifdef IP_FREEBIND 767 if (freebind && setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on, 768 (socklen_t)sizeof(on)) < 0) { 769 log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s", 770 strerror(errno)); 771 } 772#endif /* IP_FREEBIND */ 773#ifdef SO_REUSEPORT 774 /* try to set SO_REUSEPORT so that incoming 775 * connections are distributed evenly among the receiving threads. 776 * Each thread must have its own socket bound to the same port, 777 * with SO_REUSEPORT set on each socket. 778 */ 779 if (reuseport && *reuseport && 780 setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on, 781 (socklen_t)sizeof(on)) < 0) { 782#ifdef ENOPROTOOPT 783 if(errno != ENOPROTOOPT || verbosity >= 3) 784 log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s", 785 strerror(errno)); 786#endif 787 /* this option is not essential, we can continue */ 788 *reuseport = 0; 789 } 790#else 791 (void)reuseport; 792#endif /* defined(SO_REUSEPORT) */ 793#if defined(IPV6_V6ONLY) 794 if(addr->ai_family == AF_INET6 && v6only 795# ifdef HAVE_SYSTEMD 796 /* Systemd wants to control if the socket is v6 only 797 * or both, with BindIPv6Only=default, ipv6-only or 798 * both in systemd.socket, so it is not set here. */ 799 && !got_fd_from_systemd 800# endif 801 ) { 802 if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, 803 (void*)&on, (socklen_t)sizeof(on)) < 0) { 804 log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s", 805 sock_strerror(errno)); 806 sock_close(s); 807 return -1; 808 } 809 } 810#else 811 (void)v6only; 812#endif /* IPV6_V6ONLY */ 813#ifdef IP_TRANSPARENT 814 if (transparent && 815 setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on, 816 (socklen_t)sizeof(on)) < 0) { 817 log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s", 818 strerror(errno)); 819 } 820#elif defined(IP_BINDANY) 821 if (transparent && 822 setsockopt(s, (addr->ai_family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP), 823 (addr->ai_family == AF_INET6? IPV6_BINDANY:IP_BINDANY), 824 (void*)&on, (socklen_t)sizeof(on)) < 0) { 825 log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s", 826 (addr->ai_family==AF_INET6?"V6":""), strerror(errno)); 827 } 828#elif defined(SO_BINDANY) 829 if (transparent && 830 setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on, (socklen_t) 831 sizeof(on)) < 0) { 832 log_warn("setsockopt(.. SO_BINDANY ..) failed: %s", 833 strerror(errno)); 834 } 835#endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */ 836 err = set_ip_dscp(s, addr->ai_family, dscp); 837 if(err != NULL) 838 log_warn("error setting IP DiffServ codepoint %d on TCP socket: %s", dscp, err); 839 if( 840#ifdef HAVE_SYSTEMD 841 !got_fd_from_systemd && 842#endif 843 bind(s, addr->ai_addr, addr->ai_addrlen) != 0) { 844#ifndef USE_WINSOCK 845 /* detect freebsd jail with no ipv6 permission */ 846 if(addr->ai_family==AF_INET6 && errno==EINVAL) 847 *noproto = 1; 848 else { 849 log_err_addr("can't bind socket", strerror(errno), 850 (struct sockaddr_storage*)addr->ai_addr, 851 addr->ai_addrlen); 852 } 853#else 854 log_err_addr("can't bind socket", 855 wsa_strerror(WSAGetLastError()), 856 (struct sockaddr_storage*)addr->ai_addr, 857 addr->ai_addrlen); 858#endif 859 sock_close(s); 860 return -1; 861 } 862 if(!fd_set_nonblock(s)) { 863 sock_close(s); 864 return -1; 865 } 866 if(listen(s, TCP_BACKLOG) == -1) { 867 log_err("can't listen: %s", sock_strerror(errno)); 868 sock_close(s); 869 return -1; 870 } 871#ifdef USE_TCP_FASTOPEN 872 /* qlen specifies how many outstanding TFO requests to allow. Limit is a defense 873 against IP spoofing attacks as suggested in RFC7413 */ 874#ifdef __APPLE__ 875 /* OS X implementation only supports qlen of 1 via this call. Actual 876 value is configured by the net.inet.tcp.fastopen_backlog kernel parm. */ 877 qlen = 1; 878#else 879 /* 5 is recommended on linux */ 880 qlen = 5; 881#endif 882 if ((setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN, &qlen, 883 sizeof(qlen))) == -1 ) { 884#ifdef ENOPROTOOPT 885 /* squelch ENOPROTOOPT: freebsd server mode with kernel support 886 disabled, except when verbosity enabled for debugging */ 887 if(errno != ENOPROTOOPT || verbosity >= 3) { 888#endif 889 if(errno == EPERM) { 890 log_warn("Setting TCP Fast Open as server failed: %s ; this could likely be because sysctl net.inet.tcp.fastopen.enabled, net.inet.tcp.fastopen.server_enable, or net.ipv4.tcp_fastopen is disabled", strerror(errno)); 891 } else { 892 log_err("Setting TCP Fast Open as server failed: %s", strerror(errno)); 893 } 894#ifdef ENOPROTOOPT 895 } 896#endif 897 } 898#endif 899 return s; 900} 901 902char* 903set_ip_dscp(int socket, int addrfamily, int dscp) 904{ 905 int ds; 906 907 if(dscp == 0) 908 return NULL; 909 ds = dscp << 2; 910 switch(addrfamily) { 911 case AF_INET6: 912 #ifdef IPV6_TCLASS 913 if(setsockopt(socket, IPPROTO_IPV6, IPV6_TCLASS, (void*)&ds, 914 sizeof(ds)) < 0) 915 return sock_strerror(errno); 916 break; 917 #else 918 return "IPV6_TCLASS not defined on this system"; 919 #endif 920 default: 921 if(setsockopt(socket, IPPROTO_IP, IP_TOS, (void*)&ds, sizeof(ds)) < 0) 922 return sock_strerror(errno); 923 break; 924 } 925 return NULL; 926} 927 928int 929create_local_accept_sock(const char *path, int* noproto, int use_systemd) 930{ 931#ifdef HAVE_SYSTEMD 932 int ret; 933 934 if (use_systemd && (ret = systemd_get_activated(AF_LOCAL, SOCK_STREAM, 1, NULL, 0, path)) != -1) 935 return ret; 936 else { 937#endif 938#ifdef HAVE_SYS_UN_H 939 int s; 940 struct sockaddr_un usock; 941#ifndef HAVE_SYSTEMD 942 (void)use_systemd; 943#endif 944 945 verbose(VERB_ALGO, "creating unix socket %s", path); 946#ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN 947 /* this member exists on BSDs, not Linux */ 948 usock.sun_len = (unsigned)sizeof(usock); 949#endif 950 usock.sun_family = AF_LOCAL; 951 /* length is 92-108, 104 on FreeBSD */ 952 (void)strlcpy(usock.sun_path, path, sizeof(usock.sun_path)); 953 954 if ((s = socket(AF_LOCAL, SOCK_STREAM, 0)) == -1) { 955 log_err("Cannot create local socket %s (%s)", 956 path, strerror(errno)); 957 return -1; 958 } 959 960 if (unlink(path) && errno != ENOENT) { 961 /* The socket already exists and cannot be removed */ 962 log_err("Cannot remove old local socket %s (%s)", 963 path, strerror(errno)); 964 goto err; 965 } 966 967 if (bind(s, (struct sockaddr *)&usock, 968 (socklen_t)sizeof(struct sockaddr_un)) == -1) { 969 log_err("Cannot bind local socket %s (%s)", 970 path, strerror(errno)); 971 goto err; 972 } 973 974 if (!fd_set_nonblock(s)) { 975 log_err("Cannot set non-blocking mode"); 976 goto err; 977 } 978 979 if (listen(s, TCP_BACKLOG) == -1) { 980 log_err("can't listen: %s", strerror(errno)); 981 goto err; 982 } 983 984 (void)noproto; /*unused*/ 985 return s; 986 987err: 988 sock_close(s); 989 return -1; 990 991#ifdef HAVE_SYSTEMD 992 } 993#endif 994#else 995 (void)use_systemd; 996 (void)path; 997 log_err("Local sockets are not supported"); 998 *noproto = 1; 999 return -1; 1000#endif 1001} 1002 1003 1004/** 1005 * Create socket from getaddrinfo results 1006 */ 1007static int 1008make_sock(int stype, const char* ifname, const char* port, 1009 struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd, 1010 int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind, 1011 int use_systemd, int dscp, struct unbound_socket* ub_sock) 1012{ 1013 struct addrinfo *res = NULL; 1014 int r, s, inuse, noproto; 1015 hints->ai_socktype = stype; 1016 *noip6 = 0; 1017 if((r=getaddrinfo(ifname, port, hints, &res)) != 0 || !res) { 1018#ifdef USE_WINSOCK 1019 if(r == EAI_NONAME && hints->ai_family == AF_INET6){ 1020 *noip6 = 1; /* 'Host not found' for IP6 on winXP */ 1021 return -1; 1022 } 1023#endif 1024 log_err("node %s:%s getaddrinfo: %s %s", 1025 ifname?ifname:"default", port, gai_strerror(r), 1026#ifdef EAI_SYSTEM 1027 (r==EAI_SYSTEM?(char*)strerror(errno):"") 1028#else 1029 "" 1030#endif 1031 ); 1032 return -1; 1033 } 1034 if(stype == SOCK_DGRAM) { 1035 verbose_print_addr(res); 1036 s = create_udp_sock(res->ai_family, res->ai_socktype, 1037 (struct sockaddr*)res->ai_addr, res->ai_addrlen, 1038 v6only, &inuse, &noproto, (int)rcv, (int)snd, 1, 1039 reuseport, transparent, freebind, use_systemd, dscp); 1040 if(s == -1 && inuse) { 1041 log_err("bind: address already in use"); 1042 } else if(s == -1 && noproto && hints->ai_family == AF_INET6){ 1043 *noip6 = 1; 1044 } 1045 } else { 1046 s = create_tcp_accept_sock(res, v6only, &noproto, reuseport, 1047 transparent, tcp_mss, nodelay, freebind, use_systemd, 1048 dscp); 1049 if(s == -1 && noproto && hints->ai_family == AF_INET6){ 1050 *noip6 = 1; 1051 } 1052 } 1053 1054 if(!res->ai_addr) { 1055 log_err("getaddrinfo returned no address"); 1056 freeaddrinfo(res); 1057 sock_close(s); 1058 return -1; 1059 } 1060 ub_sock->addr = memdup(res->ai_addr, res->ai_addrlen); 1061 ub_sock->addrlen = res->ai_addrlen; 1062 if(!ub_sock->addr) { 1063 log_err("out of memory: allocate listening address"); 1064 freeaddrinfo(res); 1065 sock_close(s); 1066 return -1; 1067 } 1068 freeaddrinfo(res); 1069 1070 ub_sock->s = s; 1071 ub_sock->fam = hints->ai_family; 1072 ub_sock->acl = NULL; 1073 1074 return s; 1075} 1076 1077/** make socket and first see if ifname contains port override info */ 1078static int 1079make_sock_port(int stype, const char* ifname, const char* port, 1080 struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd, 1081 int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind, 1082 int use_systemd, int dscp, struct unbound_socket* ub_sock) 1083{ 1084 char* s = strchr(ifname, '@'); 1085 if(s) { 1086 /* override port with ifspec@port */ 1087 char p[16]; 1088 char newif[128]; 1089 if((size_t)(s-ifname) >= sizeof(newif)) { 1090 log_err("ifname too long: %s", ifname); 1091 *noip6 = 0; 1092 return -1; 1093 } 1094 if(strlen(s+1) >= sizeof(p)) { 1095 log_err("portnumber too long: %s", ifname); 1096 *noip6 = 0; 1097 return -1; 1098 } 1099 (void)strlcpy(newif, ifname, sizeof(newif)); 1100 newif[s-ifname] = 0; 1101 (void)strlcpy(p, s+1, sizeof(p)); 1102 p[strlen(s+1)]=0; 1103 return make_sock(stype, newif, p, hints, v6only, noip6, rcv, 1104 snd, reuseport, transparent, tcp_mss, nodelay, freebind, 1105 use_systemd, dscp, ub_sock); 1106 } 1107 return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd, 1108 reuseport, transparent, tcp_mss, nodelay, freebind, use_systemd, 1109 dscp, ub_sock); 1110} 1111 1112/** 1113 * Add port to open ports list. 1114 * @param list: list head. changed. 1115 * @param s: fd. 1116 * @param ftype: if fd is UDP. 1117 * @param pp2_enabled: if PROXYv2 is enabled for this port. 1118 * @param ub_sock: socket with address. 1119 * @return false on failure. list in unchanged then. 1120 */ 1121static int 1122port_insert(struct listen_port** list, int s, enum listen_type ftype, 1123 int pp2_enabled, struct unbound_socket* ub_sock) 1124{ 1125 struct listen_port* item = (struct listen_port*)malloc( 1126 sizeof(struct listen_port)); 1127 if(!item) 1128 return 0; 1129 item->next = *list; 1130 item->fd = s; 1131 item->ftype = ftype; 1132 item->pp2_enabled = pp2_enabled; 1133 item->socket = ub_sock; 1134 *list = item; 1135 return 1; 1136} 1137 1138/** set fd to receive software timestamps */ 1139static int 1140set_recvtimestamp(int s) 1141{ 1142#ifdef HAVE_LINUX_NET_TSTAMP_H 1143 int opt = SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE; 1144 if (setsockopt(s, SOL_SOCKET, SO_TIMESTAMPNS, (void*)&opt, (socklen_t)sizeof(opt)) < 0) { 1145 log_err("setsockopt(..., SO_TIMESTAMPNS, ...) failed: %s", 1146 strerror(errno)); 1147 return 0; 1148 } 1149 return 1; 1150#else 1151 log_err("packets timestamping is not supported on this platform"); 1152 (void)s; 1153 return 0; 1154#endif 1155} 1156 1157/** set fd to receive source address packet info */ 1158static int 1159set_recvpktinfo(int s, int family) 1160{ 1161#if defined(IPV6_RECVPKTINFO) || defined(IPV6_PKTINFO) || (defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)) || defined(IP_PKTINFO) 1162 int on = 1; 1163#else 1164 (void)s; 1165#endif 1166 if(family == AF_INET6) { 1167# ifdef IPV6_RECVPKTINFO 1168 if(setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO, 1169 (void*)&on, (socklen_t)sizeof(on)) < 0) { 1170 log_err("setsockopt(..., IPV6_RECVPKTINFO, ...) failed: %s", 1171 strerror(errno)); 1172 return 0; 1173 } 1174# elif defined(IPV6_PKTINFO) 1175 if(setsockopt(s, IPPROTO_IPV6, IPV6_PKTINFO, 1176 (void*)&on, (socklen_t)sizeof(on)) < 0) { 1177 log_err("setsockopt(..., IPV6_PKTINFO, ...) failed: %s", 1178 strerror(errno)); 1179 return 0; 1180 } 1181# else 1182 log_err("no IPV6_RECVPKTINFO and IPV6_PKTINFO options, please " 1183 "disable interface-automatic or do-ip6 in config"); 1184 return 0; 1185# endif /* defined IPV6_RECVPKTINFO */ 1186 1187 } else if(family == AF_INET) { 1188# ifdef IP_PKTINFO 1189 if(setsockopt(s, IPPROTO_IP, IP_PKTINFO, 1190 (void*)&on, (socklen_t)sizeof(on)) < 0) { 1191 log_err("setsockopt(..., IP_PKTINFO, ...) failed: %s", 1192 strerror(errno)); 1193 return 0; 1194 } 1195# elif defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR) 1196 if(setsockopt(s, IPPROTO_IP, IP_RECVDSTADDR, 1197 (void*)&on, (socklen_t)sizeof(on)) < 0) { 1198 log_err("setsockopt(..., IP_RECVDSTADDR, ...) failed: %s", 1199 strerror(errno)); 1200 return 0; 1201 } 1202# else 1203 log_err("no IP_SENDSRCADDR or IP_PKTINFO option, please disable " 1204 "interface-automatic or do-ip4 in config"); 1205 return 0; 1206# endif /* IP_PKTINFO */ 1207 1208 } 1209 return 1; 1210} 1211 1212/** see if interface is ssl, its port number == the ssl port number */ 1213static int 1214if_is_ssl(const char* ifname, const char* port, int ssl_port, 1215 struct config_strlist* tls_additional_port) 1216{ 1217 struct config_strlist* s; 1218 char* p = strchr(ifname, '@'); 1219 if(!p && atoi(port) == ssl_port) 1220 return 1; 1221 if(p && atoi(p+1) == ssl_port) 1222 return 1; 1223 for(s = tls_additional_port; s; s = s->next) { 1224 if(p && atoi(p+1) == atoi(s->str)) 1225 return 1; 1226 if(!p && atoi(port) == atoi(s->str)) 1227 return 1; 1228 } 1229 return 0; 1230} 1231 1232/** 1233 * Helper for ports_open. Creates one interface (or NULL for default). 1234 * @param ifname: The interface ip address. 1235 * @param do_auto: use automatic interface detection. 1236 * If enabled, then ifname must be the wildcard name. 1237 * @param do_udp: if udp should be used. 1238 * @param do_tcp: if tcp should be used. 1239 * @param hints: for getaddrinfo. family and flags have to be set by caller. 1240 * @param port: Port number to use (as string). 1241 * @param list: list of open ports, appended to, changed to point to list head. 1242 * @param rcv: receive buffer size for UDP 1243 * @param snd: send buffer size for UDP 1244 * @param ssl_port: ssl service port number 1245 * @param tls_additional_port: list of additional ssl service port numbers. 1246 * @param https_port: DoH service port number 1247 * @param proxy_protocol_port: list of PROXYv2 port numbers. 1248 * @param reuseport: try to set SO_REUSEPORT if nonNULL and true. 1249 * set to false on exit if reuseport failed due to no kernel support. 1250 * @param transparent: set IP_TRANSPARENT socket option. 1251 * @param tcp_mss: maximum segment size of tcp socket. default if zero. 1252 * @param freebind: set IP_FREEBIND socket option. 1253 * @param http2_nodelay: set TCP_NODELAY on HTTP/2 connection 1254 * @param use_systemd: if true, fetch sockets from systemd. 1255 * @param dnscrypt_port: dnscrypt service port number 1256 * @param dscp: DSCP to use. 1257 * @param sock_queue_timeout: the sock_queue_timeout from config. Seconds to 1258 * wait to discard if UDP packets have waited for long in the socket 1259 * buffer. 1260 * @return: returns false on error. 1261 */ 1262static int 1263ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, 1264 struct addrinfo *hints, const char* port, struct listen_port** list, 1265 size_t rcv, size_t snd, int ssl_port, 1266 struct config_strlist* tls_additional_port, int https_port, 1267 struct config_strlist* proxy_protocol_port, 1268 int* reuseport, int transparent, int tcp_mss, int freebind, 1269 int http2_nodelay, int use_systemd, int dnscrypt_port, int dscp, 1270 int sock_queue_timeout) 1271{ 1272 int s, noip6=0; 1273 int is_https = if_is_https(ifname, port, https_port); 1274 int is_dnscrypt = if_is_dnscrypt(ifname, port, dnscrypt_port); 1275 int is_pp2 = if_is_pp2(ifname, port, proxy_protocol_port); 1276 int nodelay = is_https && http2_nodelay; 1277 struct unbound_socket* ub_sock; 1278 1279 if(!do_udp && !do_tcp) 1280 return 0; 1281 1282 if(is_pp2) { 1283 if(is_dnscrypt) { 1284 fatal_exit("PROXYv2 and DNSCrypt combination not " 1285 "supported!"); 1286 } else if(is_https) { 1287 fatal_exit("PROXYv2 and DoH combination not " 1288 "supported!"); 1289 } 1290 } 1291 1292 if(do_auto) { 1293 ub_sock = calloc(1, sizeof(struct unbound_socket)); 1294 if(!ub_sock) 1295 return 0; 1296 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, 1297 &noip6, rcv, snd, reuseport, transparent, 1298 tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock)) == -1) { 1299 free(ub_sock->addr); 1300 free(ub_sock); 1301 if(noip6) { 1302 log_warn("IPv6 protocol not available"); 1303 return 1; 1304 } 1305 return 0; 1306 } 1307 /* getting source addr packet info is highly non-portable */ 1308 if(!set_recvpktinfo(s, hints->ai_family)) { 1309 sock_close(s); 1310 free(ub_sock->addr); 1311 free(ub_sock); 1312 return 0; 1313 } 1314 if (sock_queue_timeout && !set_recvtimestamp(s)) { 1315 log_warn("socket timestamping is not available"); 1316 } 1317 if(!port_insert(list, s, is_dnscrypt 1318 ?listen_type_udpancil_dnscrypt:listen_type_udpancil, 1319 is_pp2, ub_sock)) { 1320 sock_close(s); 1321 free(ub_sock->addr); 1322 free(ub_sock); 1323 return 0; 1324 } 1325 } else if(do_udp) { 1326 ub_sock = calloc(1, sizeof(struct unbound_socket)); 1327 if(!ub_sock) 1328 return 0; 1329 /* regular udp socket */ 1330 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, 1331 &noip6, rcv, snd, reuseport, transparent, 1332 tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock)) == -1) { 1333 free(ub_sock->addr); 1334 free(ub_sock); 1335 if(noip6) { 1336 log_warn("IPv6 protocol not available"); 1337 return 1; 1338 } 1339 return 0; 1340 } 1341 if (sock_queue_timeout && !set_recvtimestamp(s)) { 1342 log_warn("socket timestamping is not available"); 1343 } 1344 if(!port_insert(list, s, is_dnscrypt 1345 ?listen_type_udp_dnscrypt : 1346 (sock_queue_timeout ? 1347 listen_type_udpancil:listen_type_udp), 1348 is_pp2, ub_sock)) { 1349 sock_close(s); 1350 free(ub_sock->addr); 1351 free(ub_sock); 1352 return 0; 1353 } 1354 } 1355 if(do_tcp) { 1356 int is_ssl = if_is_ssl(ifname, port, ssl_port, 1357 tls_additional_port); 1358 enum listen_type port_type; 1359 ub_sock = calloc(1, sizeof(struct unbound_socket)); 1360 if(!ub_sock) 1361 return 0; 1362 if(is_ssl) 1363 port_type = listen_type_ssl; 1364 else if(is_https) 1365 port_type = listen_type_http; 1366 else if(is_dnscrypt) 1367 port_type = listen_type_tcp_dnscrypt; 1368 else 1369 port_type = listen_type_tcp; 1370 if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1, 1371 &noip6, 0, 0, reuseport, transparent, tcp_mss, nodelay, 1372 freebind, use_systemd, dscp, ub_sock)) == -1) { 1373 free(ub_sock->addr); 1374 free(ub_sock); 1375 if(noip6) { 1376 /*log_warn("IPv6 protocol not available");*/ 1377 return 1; 1378 } 1379 return 0; 1380 } 1381 if(is_ssl) 1382 verbose(VERB_ALGO, "setup TCP for SSL service"); 1383 if(!port_insert(list, s, port_type, is_pp2, ub_sock)) { 1384 sock_close(s); 1385 free(ub_sock->addr); 1386 free(ub_sock); 1387 return 0; 1388 } 1389 } 1390 return 1; 1391} 1392 1393/** 1394 * Add items to commpoint list in front. 1395 * @param c: commpoint to add. 1396 * @param front: listen struct. 1397 * @return: false on failure. 1398 */ 1399static int 1400listen_cp_insert(struct comm_point* c, struct listen_dnsport* front) 1401{ 1402 struct listen_list* item = (struct listen_list*)malloc( 1403 sizeof(struct listen_list)); 1404 if(!item) 1405 return 0; 1406 item->com = c; 1407 item->next = front->cps; 1408 front->cps = item; 1409 return 1; 1410} 1411 1412void listen_setup_locks(void) 1413{ 1414 if(!stream_wait_lock_inited) { 1415 lock_basic_init(&stream_wait_count_lock); 1416 stream_wait_lock_inited = 1; 1417 } 1418 if(!http2_query_buffer_lock_inited) { 1419 lock_basic_init(&http2_query_buffer_count_lock); 1420 http2_query_buffer_lock_inited = 1; 1421 } 1422 if(!http2_response_buffer_lock_inited) { 1423 lock_basic_init(&http2_response_buffer_count_lock); 1424 http2_response_buffer_lock_inited = 1; 1425 } 1426} 1427 1428void listen_desetup_locks(void) 1429{ 1430 if(stream_wait_lock_inited) { 1431 stream_wait_lock_inited = 0; 1432 lock_basic_destroy(&stream_wait_count_lock); 1433 } 1434 if(http2_query_buffer_lock_inited) { 1435 http2_query_buffer_lock_inited = 0; 1436 lock_basic_destroy(&http2_query_buffer_count_lock); 1437 } 1438 if(http2_response_buffer_lock_inited) { 1439 http2_response_buffer_lock_inited = 0; 1440 lock_basic_destroy(&http2_response_buffer_count_lock); 1441 } 1442} 1443 1444struct listen_dnsport* 1445listen_create(struct comm_base* base, struct listen_port* ports, 1446 size_t bufsize, int tcp_accept_count, int tcp_idle_timeout, 1447 int harden_large_queries, uint32_t http_max_streams, 1448 char* http_endpoint, int http_notls, struct tcl_list* tcp_conn_limit, 1449 void* sslctx, struct dt_env* dtenv, comm_point_callback_type* cb, 1450 void *cb_arg) 1451{ 1452 struct listen_dnsport* front = (struct listen_dnsport*) 1453 malloc(sizeof(struct listen_dnsport)); 1454 if(!front) 1455 return NULL; 1456 front->cps = NULL; 1457 front->udp_buff = sldns_buffer_new(bufsize); 1458#ifdef USE_DNSCRYPT 1459 front->dnscrypt_udp_buff = NULL; 1460#endif 1461 if(!front->udp_buff) { 1462 free(front); 1463 return NULL; 1464 } 1465 1466 /* create comm points as needed */ 1467 while(ports) { 1468 struct comm_point* cp = NULL; 1469 if(ports->ftype == listen_type_udp || 1470 ports->ftype == listen_type_udp_dnscrypt) { 1471 cp = comm_point_create_udp(base, ports->fd, 1472 front->udp_buff, ports->pp2_enabled, cb, 1473 cb_arg, ports->socket); 1474 } else if(ports->ftype == listen_type_tcp || 1475 ports->ftype == listen_type_tcp_dnscrypt) { 1476 cp = comm_point_create_tcp(base, ports->fd, 1477 tcp_accept_count, tcp_idle_timeout, 1478 harden_large_queries, 0, NULL, 1479 tcp_conn_limit, bufsize, front->udp_buff, 1480 ports->ftype, ports->pp2_enabled, cb, cb_arg, 1481 ports->socket); 1482 } else if(ports->ftype == listen_type_ssl || 1483 ports->ftype == listen_type_http) { 1484 cp = comm_point_create_tcp(base, ports->fd, 1485 tcp_accept_count, tcp_idle_timeout, 1486 harden_large_queries, 1487 http_max_streams, http_endpoint, 1488 tcp_conn_limit, bufsize, front->udp_buff, 1489 ports->ftype, ports->pp2_enabled, cb, cb_arg, 1490 ports->socket); 1491 if(ports->ftype == listen_type_http) { 1492 if(!sslctx && !http_notls) { 1493 log_warn("HTTPS port configured, but " 1494 "no TLS tls-service-key or " 1495 "tls-service-pem set"); 1496 } 1497#ifndef HAVE_SSL_CTX_SET_ALPN_SELECT_CB 1498 if(!http_notls) { 1499 log_warn("Unbound is not compiled " 1500 "with an OpenSSL version " 1501 "supporting ALPN " 1502 "(OpenSSL >= 1.0.2). This " 1503 "is required to use " 1504 "DNS-over-HTTPS"); 1505 } 1506#endif 1507#ifndef HAVE_NGHTTP2_NGHTTP2_H 1508 log_warn("Unbound is not compiled with " 1509 "nghttp2. This is required to use " 1510 "DNS-over-HTTPS."); 1511#endif 1512 } 1513 } else if(ports->ftype == listen_type_udpancil || 1514 ports->ftype == listen_type_udpancil_dnscrypt) { 1515#if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG) 1516 cp = comm_point_create_udp_ancil(base, ports->fd, 1517 front->udp_buff, ports->pp2_enabled, cb, 1518 cb_arg, ports->socket); 1519#else 1520 log_warn("This system does not support UDP ancilliary data."); 1521#endif 1522 } 1523 if(!cp) { 1524 log_err("can't create commpoint"); 1525 listen_delete(front); 1526 return NULL; 1527 } 1528 if((http_notls && ports->ftype == listen_type_http) || 1529 (ports->ftype == listen_type_tcp) || 1530 (ports->ftype == listen_type_udp) || 1531 (ports->ftype == listen_type_udpancil) || 1532 (ports->ftype == listen_type_tcp_dnscrypt) || 1533 (ports->ftype == listen_type_udp_dnscrypt) || 1534 (ports->ftype == listen_type_udpancil_dnscrypt)) 1535 cp->ssl = NULL; 1536 else 1537 cp->ssl = sslctx; 1538 cp->dtenv = dtenv; 1539 cp->do_not_close = 1; 1540#ifdef USE_DNSCRYPT 1541 if (ports->ftype == listen_type_udp_dnscrypt || 1542 ports->ftype == listen_type_tcp_dnscrypt || 1543 ports->ftype == listen_type_udpancil_dnscrypt) { 1544 cp->dnscrypt = 1; 1545 cp->dnscrypt_buffer = sldns_buffer_new(bufsize); 1546 if(!cp->dnscrypt_buffer) { 1547 log_err("can't alloc dnscrypt_buffer"); 1548 comm_point_delete(cp); 1549 listen_delete(front); 1550 return NULL; 1551 } 1552 front->dnscrypt_udp_buff = cp->dnscrypt_buffer; 1553 } 1554#endif 1555 if(!listen_cp_insert(cp, front)) { 1556 log_err("malloc failed"); 1557 comm_point_delete(cp); 1558 listen_delete(front); 1559 return NULL; 1560 } 1561 ports = ports->next; 1562 } 1563 if(!front->cps) { 1564 log_err("Could not open sockets to accept queries."); 1565 listen_delete(front); 1566 return NULL; 1567 } 1568 1569 return front; 1570} 1571 1572void 1573listen_list_delete(struct listen_list* list) 1574{ 1575 struct listen_list *p = list, *pn; 1576 while(p) { 1577 pn = p->next; 1578 comm_point_delete(p->com); 1579 free(p); 1580 p = pn; 1581 } 1582} 1583 1584void 1585listen_delete(struct listen_dnsport* front) 1586{ 1587 if(!front) 1588 return; 1589 listen_list_delete(front->cps); 1590#ifdef USE_DNSCRYPT 1591 if(front->dnscrypt_udp_buff && 1592 front->udp_buff != front->dnscrypt_udp_buff) { 1593 sldns_buffer_free(front->dnscrypt_udp_buff); 1594 } 1595#endif 1596 sldns_buffer_free(front->udp_buff); 1597 free(front); 1598} 1599 1600#ifdef HAVE_GETIFADDRS 1601static int 1602resolve_ifa_name(struct ifaddrs *ifas, const char *search_ifa, char ***ip_addresses, int *ip_addresses_size) 1603{ 1604 struct ifaddrs *ifa; 1605 void *tmpbuf; 1606 int last_ip_addresses_size = *ip_addresses_size; 1607 1608 for(ifa = ifas; ifa != NULL; ifa = ifa->ifa_next) { 1609 sa_family_t family; 1610 const char* atsign; 1611#ifdef INET6 /* | address ip | % | ifa name | @ | port | nul */ 1612 char addr_buf[INET6_ADDRSTRLEN + 1 + IF_NAMESIZE + 1 + 16 + 1]; 1613#else 1614 char addr_buf[INET_ADDRSTRLEN + 1 + 16 + 1]; 1615#endif 1616 1617 if((atsign=strrchr(search_ifa, '@')) != NULL) { 1618 if(strlen(ifa->ifa_name) != (size_t)(atsign-search_ifa) 1619 || strncmp(ifa->ifa_name, search_ifa, 1620 atsign-search_ifa) != 0) 1621 continue; 1622 } else { 1623 if(strcmp(ifa->ifa_name, search_ifa) != 0) 1624 continue; 1625 atsign = ""; 1626 } 1627 1628 if(ifa->ifa_addr == NULL) 1629 continue; 1630 1631 family = ifa->ifa_addr->sa_family; 1632 if(family == AF_INET) { 1633 char a4[INET_ADDRSTRLEN + 1]; 1634 struct sockaddr_in *in4 = (struct sockaddr_in *) 1635 ifa->ifa_addr; 1636 if(!inet_ntop(family, &in4->sin_addr, a4, sizeof(a4))) { 1637 log_err("inet_ntop failed"); 1638 return 0; 1639 } 1640 snprintf(addr_buf, sizeof(addr_buf), "%s%s", 1641 a4, atsign); 1642 } 1643#ifdef INET6 1644 else if(family == AF_INET6) { 1645 struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) 1646 ifa->ifa_addr; 1647 char a6[INET6_ADDRSTRLEN + 1]; 1648 char if_index_name[IF_NAMESIZE + 1]; 1649 if_index_name[0] = 0; 1650 if(!inet_ntop(family, &in6->sin6_addr, a6, sizeof(a6))) { 1651 log_err("inet_ntop failed"); 1652 return 0; 1653 } 1654 (void)if_indextoname(in6->sin6_scope_id, 1655 (char *)if_index_name); 1656 if (strlen(if_index_name) != 0) { 1657 snprintf(addr_buf, sizeof(addr_buf), 1658 "%s%%%s%s", a6, if_index_name, atsign); 1659 } else { 1660 snprintf(addr_buf, sizeof(addr_buf), "%s%s", 1661 a6, atsign); 1662 } 1663 } 1664#endif 1665 else { 1666 continue; 1667 } 1668 verbose(4, "interface %s has address %s", search_ifa, addr_buf); 1669 1670 tmpbuf = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1)); 1671 if(!tmpbuf) { 1672 log_err("realloc failed: out of memory"); 1673 return 0; 1674 } else { 1675 *ip_addresses = tmpbuf; 1676 } 1677 (*ip_addresses)[*ip_addresses_size] = strdup(addr_buf); 1678 if(!(*ip_addresses)[*ip_addresses_size]) { 1679 log_err("strdup failed: out of memory"); 1680 return 0; 1681 } 1682 (*ip_addresses_size)++; 1683 } 1684 1685 if (*ip_addresses_size == last_ip_addresses_size) { 1686 tmpbuf = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1)); 1687 if(!tmpbuf) { 1688 log_err("realloc failed: out of memory"); 1689 return 0; 1690 } else { 1691 *ip_addresses = tmpbuf; 1692 } 1693 (*ip_addresses)[*ip_addresses_size] = strdup(search_ifa); 1694 if(!(*ip_addresses)[*ip_addresses_size]) { 1695 log_err("strdup failed: out of memory"); 1696 return 0; 1697 } 1698 (*ip_addresses_size)++; 1699 } 1700 return 1; 1701} 1702#endif /* HAVE_GETIFADDRS */ 1703 1704int resolve_interface_names(char** ifs, int num_ifs, 1705 struct config_strlist* list, char*** resif, int* num_resif) 1706{ 1707#ifdef HAVE_GETIFADDRS 1708 struct ifaddrs *addrs = NULL; 1709 if(num_ifs == 0 && list == NULL) { 1710 *resif = NULL; 1711 *num_resif = 0; 1712 return 1; 1713 } 1714 if(getifaddrs(&addrs) == -1) { 1715 log_err("failed to list interfaces: getifaddrs: %s", 1716 strerror(errno)); 1717 freeifaddrs(addrs); 1718 return 0; 1719 } 1720 if(ifs) { 1721 int i; 1722 for(i=0; i<num_ifs; i++) { 1723 if(!resolve_ifa_name(addrs, ifs[i], resif, num_resif)) { 1724 freeifaddrs(addrs); 1725 config_del_strarray(*resif, *num_resif); 1726 *resif = NULL; 1727 *num_resif = 0; 1728 return 0; 1729 } 1730 } 1731 } 1732 if(list) { 1733 struct config_strlist* p; 1734 for(p = list; p; p = p->next) { 1735 if(!resolve_ifa_name(addrs, p->str, resif, num_resif)) { 1736 freeifaddrs(addrs); 1737 config_del_strarray(*resif, *num_resif); 1738 *resif = NULL; 1739 *num_resif = 0; 1740 return 0; 1741 } 1742} 1743 } 1744 freeifaddrs(addrs); 1745 return 1; 1746#else 1747 struct config_strlist* p; 1748 if(num_ifs == 0 && list == NULL) { 1749 *resif = NULL; 1750 *num_resif = 0; 1751 return 1; 1752 } 1753 *num_resif = num_ifs; 1754 for(p = list; p; p = p->next) { 1755 (*num_resif)++; 1756 } 1757 *resif = calloc(*num_resif, sizeof(**resif)); 1758 if(!*resif) { 1759 log_err("out of memory"); 1760 return 0; 1761 } 1762 if(ifs) { 1763 int i; 1764 for(i=0; i<num_ifs; i++) { 1765 (*resif)[i] = strdup(ifs[i]); 1766 if(!((*resif)[i])) { 1767 log_err("out of memory"); 1768 config_del_strarray(*resif, *num_resif); 1769 *resif = NULL; 1770 *num_resif = 0; 1771 return 0; 1772 } 1773 } 1774 } 1775 if(list) { 1776 int idx = num_ifs; 1777 for(p = list; p; p = p->next) { 1778 (*resif)[idx] = strdup(p->str); 1779 if(!((*resif)[idx])) { 1780 log_err("out of memory"); 1781 config_del_strarray(*resif, *num_resif); 1782 *resif = NULL; 1783 *num_resif = 0; 1784 return 0; 1785 } 1786 idx++; 1787 } 1788 } 1789 return 1; 1790#endif /* HAVE_GETIFADDRS */ 1791} 1792 1793struct listen_port* 1794listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs, 1795 int* reuseport) 1796{ 1797 struct listen_port* list = NULL; 1798 struct addrinfo hints; 1799 int i, do_ip4, do_ip6; 1800 int do_tcp, do_auto; 1801 char portbuf[32]; 1802 snprintf(portbuf, sizeof(portbuf), "%d", cfg->port); 1803 do_ip4 = cfg->do_ip4; 1804 do_ip6 = cfg->do_ip6; 1805 do_tcp = cfg->do_tcp; 1806 do_auto = cfg->if_automatic && cfg->do_udp; 1807 if(cfg->incoming_num_tcp == 0) 1808 do_tcp = 0; 1809 1810 /* getaddrinfo */ 1811 memset(&hints, 0, sizeof(hints)); 1812 hints.ai_flags = AI_PASSIVE; 1813 /* no name lookups on our listening ports */ 1814 if(num_ifs > 0) 1815 hints.ai_flags |= AI_NUMERICHOST; 1816 hints.ai_family = AF_UNSPEC; 1817#ifndef INET6 1818 do_ip6 = 0; 1819#endif 1820 if(!do_ip4 && !do_ip6) { 1821 return NULL; 1822 } 1823 /* create ip4 and ip6 ports so that return addresses are nice. */ 1824 if(do_auto || num_ifs == 0) { 1825 if(do_auto && cfg->if_automatic_ports && 1826 cfg->if_automatic_ports[0]!=0) { 1827 char* now = cfg->if_automatic_ports; 1828 while(now && *now) { 1829 char* after; 1830 int extraport; 1831 while(isspace((unsigned char)*now)) 1832 now++; 1833 if(!*now) 1834 break; 1835 after = now; 1836 extraport = (int)strtol(now, &after, 10); 1837 if(extraport < 0 || extraport > 65535) { 1838 log_err("interface-automatic-ports port number out of range, at position %d of '%s'", (int)(now-cfg->if_automatic_ports)+1, cfg->if_automatic_ports); 1839 listening_ports_free(list); 1840 return NULL; 1841 } 1842 if(extraport == 0 && now == after) { 1843 log_err("interface-automatic-ports could not be parsed, at position %d of '%s'", (int)(now-cfg->if_automatic_ports)+1, cfg->if_automatic_ports); 1844 listening_ports_free(list); 1845 return NULL; 1846 } 1847 now = after; 1848 snprintf(portbuf, sizeof(portbuf), "%d", extraport); 1849 if(do_ip6) { 1850 hints.ai_family = AF_INET6; 1851 if(!ports_create_if("::0", 1852 do_auto, cfg->do_udp, do_tcp, 1853 &hints, portbuf, &list, 1854 cfg->so_rcvbuf, cfg->so_sndbuf, 1855 cfg->ssl_port, cfg->tls_additional_port, 1856 cfg->https_port, 1857 cfg->proxy_protocol_port, 1858 reuseport, cfg->ip_transparent, 1859 cfg->tcp_mss, cfg->ip_freebind, 1860 cfg->http_nodelay, cfg->use_systemd, 1861 cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { 1862 listening_ports_free(list); 1863 return NULL; 1864 } 1865 } 1866 if(do_ip4) { 1867 hints.ai_family = AF_INET; 1868 if(!ports_create_if("0.0.0.0", 1869 do_auto, cfg->do_udp, do_tcp, 1870 &hints, portbuf, &list, 1871 cfg->so_rcvbuf, cfg->so_sndbuf, 1872 cfg->ssl_port, cfg->tls_additional_port, 1873 cfg->https_port, 1874 cfg->proxy_protocol_port, 1875 reuseport, cfg->ip_transparent, 1876 cfg->tcp_mss, cfg->ip_freebind, 1877 cfg->http_nodelay, cfg->use_systemd, 1878 cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { 1879 listening_ports_free(list); 1880 return NULL; 1881 } 1882 } 1883 } 1884 return list; 1885 } 1886 if(do_ip6) { 1887 hints.ai_family = AF_INET6; 1888 if(!ports_create_if(do_auto?"::0":"::1", 1889 do_auto, cfg->do_udp, do_tcp, 1890 &hints, portbuf, &list, 1891 cfg->so_rcvbuf, cfg->so_sndbuf, 1892 cfg->ssl_port, cfg->tls_additional_port, 1893 cfg->https_port, cfg->proxy_protocol_port, 1894 reuseport, cfg->ip_transparent, 1895 cfg->tcp_mss, cfg->ip_freebind, 1896 cfg->http_nodelay, cfg->use_systemd, 1897 cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { 1898 listening_ports_free(list); 1899 return NULL; 1900 } 1901 } 1902 if(do_ip4) { 1903 hints.ai_family = AF_INET; 1904 if(!ports_create_if(do_auto?"0.0.0.0":"127.0.0.1", 1905 do_auto, cfg->do_udp, do_tcp, 1906 &hints, portbuf, &list, 1907 cfg->so_rcvbuf, cfg->so_sndbuf, 1908 cfg->ssl_port, cfg->tls_additional_port, 1909 cfg->https_port, cfg->proxy_protocol_port, 1910 reuseport, cfg->ip_transparent, 1911 cfg->tcp_mss, cfg->ip_freebind, 1912 cfg->http_nodelay, cfg->use_systemd, 1913 cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { 1914 listening_ports_free(list); 1915 return NULL; 1916 } 1917 } 1918 } else for(i = 0; i<num_ifs; i++) { 1919 if(str_is_ip6(ifs[i])) { 1920 if(!do_ip6) 1921 continue; 1922 hints.ai_family = AF_INET6; 1923 if(!ports_create_if(ifs[i], 0, cfg->do_udp, 1924 do_tcp, &hints, portbuf, &list, 1925 cfg->so_rcvbuf, cfg->so_sndbuf, 1926 cfg->ssl_port, cfg->tls_additional_port, 1927 cfg->https_port, cfg->proxy_protocol_port, 1928 reuseport, cfg->ip_transparent, 1929 cfg->tcp_mss, cfg->ip_freebind, 1930 cfg->http_nodelay, cfg->use_systemd, 1931 cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { 1932 listening_ports_free(list); 1933 return NULL; 1934 } 1935 } else { 1936 if(!do_ip4) 1937 continue; 1938 hints.ai_family = AF_INET; 1939 if(!ports_create_if(ifs[i], 0, cfg->do_udp, 1940 do_tcp, &hints, portbuf, &list, 1941 cfg->so_rcvbuf, cfg->so_sndbuf, 1942 cfg->ssl_port, cfg->tls_additional_port, 1943 cfg->https_port, cfg->proxy_protocol_port, 1944 reuseport, cfg->ip_transparent, 1945 cfg->tcp_mss, cfg->ip_freebind, 1946 cfg->http_nodelay, cfg->use_systemd, 1947 cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { 1948 listening_ports_free(list); 1949 return NULL; 1950 } 1951 } 1952 } 1953 1954 return list; 1955} 1956 1957void listening_ports_free(struct listen_port* list) 1958{ 1959 struct listen_port* nx; 1960 while(list) { 1961 nx = list->next; 1962 if(list->fd != -1) { 1963 sock_close(list->fd); 1964 } 1965 /* rc_ports don't have ub_socket */ 1966 if(list->socket) { 1967 free(list->socket->addr); 1968 free(list->socket); 1969 } 1970 free(list); 1971 list = nx; 1972 } 1973} 1974 1975size_t listen_get_mem(struct listen_dnsport* listen) 1976{ 1977 struct listen_list* p; 1978 size_t s = sizeof(*listen) + sizeof(*listen->base) + 1979 sizeof(*listen->udp_buff) + 1980 sldns_buffer_capacity(listen->udp_buff); 1981#ifdef USE_DNSCRYPT 1982 s += sizeof(*listen->dnscrypt_udp_buff); 1983 if(listen->udp_buff != listen->dnscrypt_udp_buff){ 1984 s += sldns_buffer_capacity(listen->dnscrypt_udp_buff); 1985 } 1986#endif 1987 for(p = listen->cps; p; p = p->next) { 1988 s += sizeof(*p); 1989 s += comm_point_get_mem(p->com); 1990 } 1991 return s; 1992} 1993 1994void listen_stop_accept(struct listen_dnsport* listen) 1995{ 1996 /* do not stop the ones that have no tcp_free list 1997 * (they have already stopped listening) */ 1998 struct listen_list* p; 1999 for(p=listen->cps; p; p=p->next) { 2000 if(p->com->type == comm_tcp_accept && 2001 p->com->tcp_free != NULL) { 2002 comm_point_stop_listening(p->com); 2003 } 2004 } 2005} 2006 2007void listen_start_accept(struct listen_dnsport* listen) 2008{ 2009 /* do not start the ones that have no tcp_free list, it is no 2010 * use to listen to them because they have no free tcp handlers */ 2011 struct listen_list* p; 2012 for(p=listen->cps; p; p=p->next) { 2013 if(p->com->type == comm_tcp_accept && 2014 p->com->tcp_free != NULL) { 2015 comm_point_start_listening(p->com, -1, -1); 2016 } 2017 } 2018} 2019 2020struct tcp_req_info* 2021tcp_req_info_create(struct sldns_buffer* spoolbuf) 2022{ 2023 struct tcp_req_info* req = (struct tcp_req_info*)malloc(sizeof(*req)); 2024 if(!req) { 2025 log_err("malloc failure for new stream outoforder processing structure"); 2026 return NULL; 2027 } 2028 memset(req, 0, sizeof(*req)); 2029 req->spool_buffer = spoolbuf; 2030 return req; 2031} 2032 2033void 2034tcp_req_info_delete(struct tcp_req_info* req) 2035{ 2036 if(!req) return; 2037 tcp_req_info_clear(req); 2038 /* cp is pointer back to commpoint that owns this struct and 2039 * called delete on us */ 2040 /* spool_buffer is shared udp buffer, not deleted here */ 2041 free(req); 2042} 2043 2044void tcp_req_info_clear(struct tcp_req_info* req) 2045{ 2046 struct tcp_req_open_item* open, *nopen; 2047 struct tcp_req_done_item* item, *nitem; 2048 if(!req) return; 2049 2050 /* free outstanding request mesh reply entries */ 2051 open = req->open_req_list; 2052 while(open) { 2053 nopen = open->next; 2054 mesh_state_remove_reply(open->mesh, open->mesh_state, req->cp); 2055 free(open); 2056 open = nopen; 2057 } 2058 req->open_req_list = NULL; 2059 req->num_open_req = 0; 2060 2061 /* free pending writable result packets */ 2062 item = req->done_req_list; 2063 while(item) { 2064 nitem = item->next; 2065 lock_basic_lock(&stream_wait_count_lock); 2066 stream_wait_count -= (sizeof(struct tcp_req_done_item) 2067 +item->len); 2068 lock_basic_unlock(&stream_wait_count_lock); 2069 free(item->buf); 2070 free(item); 2071 item = nitem; 2072 } 2073 req->done_req_list = NULL; 2074 req->num_done_req = 0; 2075 req->read_is_closed = 0; 2076} 2077 2078void 2079tcp_req_info_remove_mesh_state(struct tcp_req_info* req, struct mesh_state* m) 2080{ 2081 struct tcp_req_open_item* open, *prev = NULL; 2082 if(!req || !m) return; 2083 open = req->open_req_list; 2084 while(open) { 2085 if(open->mesh_state == m) { 2086 struct tcp_req_open_item* next; 2087 if(prev) prev->next = open->next; 2088 else req->open_req_list = open->next; 2089 /* caller has to manage the mesh state reply entry */ 2090 next = open->next; 2091 free(open); 2092 req->num_open_req --; 2093 2094 /* prev = prev; */ 2095 open = next; 2096 continue; 2097 } 2098 prev = open; 2099 open = open->next; 2100 } 2101} 2102 2103/** setup listening for read or write */ 2104static void 2105tcp_req_info_setup_listen(struct tcp_req_info* req) 2106{ 2107 int wr = 0; 2108 int rd = 0; 2109 2110 if(req->cp->tcp_byte_count != 0) { 2111 /* cannot change, halfway through */ 2112 return; 2113 } 2114 2115 if(!req->cp->tcp_is_reading) 2116 wr = 1; 2117 if(!req->read_is_closed) 2118 rd = 1; 2119 2120 if(wr) { 2121 req->cp->tcp_is_reading = 0; 2122 comm_point_stop_listening(req->cp); 2123 comm_point_start_listening(req->cp, -1, 2124 adjusted_tcp_timeout(req->cp)); 2125 } else if(rd) { 2126 req->cp->tcp_is_reading = 1; 2127 comm_point_stop_listening(req->cp); 2128 comm_point_start_listening(req->cp, -1, 2129 adjusted_tcp_timeout(req->cp)); 2130 /* and also read it (from SSL stack buffers), so 2131 * no event read event is expected since the remainder of 2132 * the TLS frame is sitting in the buffers. */ 2133 req->read_again = 1; 2134 } else { 2135 comm_point_stop_listening(req->cp); 2136 comm_point_start_listening(req->cp, -1, 2137 adjusted_tcp_timeout(req->cp)); 2138 comm_point_listen_for_rw(req->cp, 0, 0); 2139 } 2140} 2141 2142/** remove first item from list of pending results */ 2143static struct tcp_req_done_item* 2144tcp_req_info_pop_done(struct tcp_req_info* req) 2145{ 2146 struct tcp_req_done_item* item; 2147 log_assert(req->num_done_req > 0 && req->done_req_list); 2148 item = req->done_req_list; 2149 lock_basic_lock(&stream_wait_count_lock); 2150 stream_wait_count -= (sizeof(struct tcp_req_done_item)+item->len); 2151 lock_basic_unlock(&stream_wait_count_lock); 2152 req->done_req_list = req->done_req_list->next; 2153 req->num_done_req --; 2154 return item; 2155} 2156 2157/** Send given buffer and setup to write */ 2158static void 2159tcp_req_info_start_write_buf(struct tcp_req_info* req, uint8_t* buf, 2160 size_t len) 2161{ 2162 sldns_buffer_clear(req->cp->buffer); 2163 sldns_buffer_write(req->cp->buffer, buf, len); 2164 sldns_buffer_flip(req->cp->buffer); 2165 2166 req->cp->tcp_is_reading = 0; /* we are now writing */ 2167} 2168 2169/** pick up the next result and start writing it to the channel */ 2170static void 2171tcp_req_pickup_next_result(struct tcp_req_info* req) 2172{ 2173 if(req->num_done_req > 0) { 2174 /* unlist the done item from the list of pending results */ 2175 struct tcp_req_done_item* item = tcp_req_info_pop_done(req); 2176 tcp_req_info_start_write_buf(req, item->buf, item->len); 2177 free(item->buf); 2178 free(item); 2179 } 2180} 2181 2182/** the read channel has closed */ 2183int 2184tcp_req_info_handle_read_close(struct tcp_req_info* req) 2185{ 2186 verbose(VERB_ALGO, "tcp channel read side closed %d", req->cp->fd); 2187 /* reset byte count for (potential) partial read */ 2188 req->cp->tcp_byte_count = 0; 2189 /* if we still have results to write, pick up next and write it */ 2190 if(req->num_done_req != 0) { 2191 tcp_req_pickup_next_result(req); 2192 tcp_req_info_setup_listen(req); 2193 return 1; 2194 } 2195 /* if nothing to do, this closes the connection */ 2196 if(req->num_open_req == 0 && req->num_done_req == 0) 2197 return 0; 2198 /* otherwise, we must be waiting for dns resolve, wait with timeout */ 2199 req->read_is_closed = 1; 2200 tcp_req_info_setup_listen(req); 2201 return 1; 2202} 2203 2204void 2205tcp_req_info_handle_writedone(struct tcp_req_info* req) 2206{ 2207 /* back to reading state, we finished this write event */ 2208 sldns_buffer_clear(req->cp->buffer); 2209 if(req->num_done_req == 0 && req->read_is_closed) { 2210 /* no more to write and nothing to read, close it */ 2211 comm_point_drop_reply(&req->cp->repinfo); 2212 return; 2213 } 2214 req->cp->tcp_is_reading = 1; 2215 /* see if another result needs writing */ 2216 tcp_req_pickup_next_result(req); 2217 2218 /* see if there is more to write, if not stop_listening for writing */ 2219 /* see if new requests are allowed, if so, start_listening 2220 * for reading */ 2221 tcp_req_info_setup_listen(req); 2222} 2223 2224void 2225tcp_req_info_handle_readdone(struct tcp_req_info* req) 2226{ 2227 struct comm_point* c = req->cp; 2228 2229 /* we want to read up several requests, unless there are 2230 * pending answers */ 2231 2232 req->is_drop = 0; 2233 req->is_reply = 0; 2234 req->in_worker_handle = 1; 2235 sldns_buffer_set_limit(req->spool_buffer, 0); 2236 /* handle the current request */ 2237 /* this calls the worker handle request routine that could give 2238 * a cache response, or localdata response, or drop the reply, 2239 * or schedule a mesh entry for later */ 2240 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2241 if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) { 2242 req->in_worker_handle = 0; 2243 /* there is an answer, put it up. It is already in the 2244 * c->buffer, just send it. */ 2245 /* since we were just reading a query, the channel is 2246 * clear to write to */ 2247 send_it: 2248 c->tcp_is_reading = 0; 2249 comm_point_stop_listening(c); 2250 comm_point_start_listening(c, -1, adjusted_tcp_timeout(c)); 2251 return; 2252 } 2253 req->in_worker_handle = 0; 2254 /* it should be waiting in the mesh for recursion. 2255 * If mesh failed to add a new entry and called commpoint_drop_reply. 2256 * Then the mesh state has been cleared. */ 2257 if(req->is_drop) { 2258 /* the reply has been dropped, stream has been closed. */ 2259 return; 2260 } 2261 /* If mesh failed(mallocfail) and called commpoint_send_reply with 2262 * something like servfail then we pick up that reply below. */ 2263 if(req->is_reply) { 2264 goto send_it; 2265 } 2266 2267 sldns_buffer_clear(c->buffer); 2268 /* if pending answers, pick up an answer and start sending it */ 2269 tcp_req_pickup_next_result(req); 2270 2271 /* if answers pending, start sending answers */ 2272 /* read more requests if we can have more requests */ 2273 tcp_req_info_setup_listen(req); 2274} 2275 2276int 2277tcp_req_info_add_meshstate(struct tcp_req_info* req, 2278 struct mesh_area* mesh, struct mesh_state* m) 2279{ 2280 struct tcp_req_open_item* item; 2281 log_assert(req && mesh && m); 2282 item = (struct tcp_req_open_item*)malloc(sizeof(*item)); 2283 if(!item) return 0; 2284 item->next = req->open_req_list; 2285 item->mesh = mesh; 2286 item->mesh_state = m; 2287 req->open_req_list = item; 2288 req->num_open_req++; 2289 return 1; 2290} 2291 2292/** Add a result to the result list. At the end. */ 2293static int 2294tcp_req_info_add_result(struct tcp_req_info* req, uint8_t* buf, size_t len) 2295{ 2296 struct tcp_req_done_item* last = NULL; 2297 struct tcp_req_done_item* item; 2298 size_t space; 2299 2300 /* see if we have space */ 2301 space = sizeof(struct tcp_req_done_item) + len; 2302 lock_basic_lock(&stream_wait_count_lock); 2303 if(stream_wait_count + space > stream_wait_max) { 2304 lock_basic_unlock(&stream_wait_count_lock); 2305 verbose(VERB_ALGO, "drop stream reply, no space left, in stream-wait-size"); 2306 return 0; 2307 } 2308 stream_wait_count += space; 2309 lock_basic_unlock(&stream_wait_count_lock); 2310 2311 /* find last element */ 2312 last = req->done_req_list; 2313 while(last && last->next) 2314 last = last->next; 2315 2316 /* create new element */ 2317 item = (struct tcp_req_done_item*)malloc(sizeof(*item)); 2318 if(!item) { 2319 log_err("malloc failure, for stream result list"); 2320 return 0; 2321 } 2322 item->next = NULL; 2323 item->len = len; 2324 item->buf = memdup(buf, len); 2325 if(!item->buf) { 2326 free(item); 2327 log_err("malloc failure, adding reply to stream result list"); 2328 return 0; 2329 } 2330 2331 /* link in */ 2332 if(last) last->next = item; 2333 else req->done_req_list = item; 2334 req->num_done_req++; 2335 return 1; 2336} 2337 2338void 2339tcp_req_info_send_reply(struct tcp_req_info* req) 2340{ 2341 if(req->in_worker_handle) { 2342 /* reply from mesh is in the spool_buffer */ 2343 /* copy now, so that the spool buffer is free for other tasks 2344 * before the callback is done */ 2345 sldns_buffer_clear(req->cp->buffer); 2346 sldns_buffer_write(req->cp->buffer, 2347 sldns_buffer_begin(req->spool_buffer), 2348 sldns_buffer_limit(req->spool_buffer)); 2349 sldns_buffer_flip(req->cp->buffer); 2350 req->is_reply = 1; 2351 return; 2352 } 2353 /* now that the query has been handled, that mesh_reply entry 2354 * should be removed, from the tcp_req_info list, 2355 * the mesh state cleanup removes then with region_cleanup and 2356 * replies_sent true. */ 2357 /* see if we can send it straight away (we are not doing 2358 * anything else). If so, copy to buffer and start */ 2359 if(req->cp->tcp_is_reading && req->cp->tcp_byte_count == 0) { 2360 /* buffer is free, and was ready to read new query into, 2361 * but we are now going to use it to send this answer */ 2362 tcp_req_info_start_write_buf(req, 2363 sldns_buffer_begin(req->spool_buffer), 2364 sldns_buffer_limit(req->spool_buffer)); 2365 /* switch to listen to write events */ 2366 comm_point_stop_listening(req->cp); 2367 comm_point_start_listening(req->cp, -1, 2368 adjusted_tcp_timeout(req->cp)); 2369 return; 2370 } 2371 /* queue up the answer behind the others already pending */ 2372 if(!tcp_req_info_add_result(req, sldns_buffer_begin(req->spool_buffer), 2373 sldns_buffer_limit(req->spool_buffer))) { 2374 /* drop the connection, we are out of resources */ 2375 comm_point_drop_reply(&req->cp->repinfo); 2376 } 2377} 2378 2379size_t tcp_req_info_get_stream_buffer_size(void) 2380{ 2381 size_t s; 2382 if(!stream_wait_lock_inited) 2383 return stream_wait_count; 2384 lock_basic_lock(&stream_wait_count_lock); 2385 s = stream_wait_count; 2386 lock_basic_unlock(&stream_wait_count_lock); 2387 return s; 2388} 2389 2390size_t http2_get_query_buffer_size(void) 2391{ 2392 size_t s; 2393 if(!http2_query_buffer_lock_inited) 2394 return http2_query_buffer_count; 2395 lock_basic_lock(&http2_query_buffer_count_lock); 2396 s = http2_query_buffer_count; 2397 lock_basic_unlock(&http2_query_buffer_count_lock); 2398 return s; 2399} 2400 2401size_t http2_get_response_buffer_size(void) 2402{ 2403 size_t s; 2404 if(!http2_response_buffer_lock_inited) 2405 return http2_response_buffer_count; 2406 lock_basic_lock(&http2_response_buffer_count_lock); 2407 s = http2_response_buffer_count; 2408 lock_basic_unlock(&http2_response_buffer_count_lock); 2409 return s; 2410} 2411 2412#ifdef HAVE_NGHTTP2 2413/** nghttp2 callback. Used to copy response from rbuffer to nghttp2 session */ 2414static ssize_t http2_submit_response_read_callback( 2415 nghttp2_session* ATTR_UNUSED(session), 2416 int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags, 2417 nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg)) 2418{ 2419 struct http2_stream* h2_stream; 2420 struct http2_session* h2_session = source->ptr; 2421 size_t copylen = length; 2422 if(!(h2_stream = nghttp2_session_get_stream_user_data( 2423 h2_session->session, stream_id))) { 2424 verbose(VERB_QUERY, "http2: cannot get stream data, closing " 2425 "stream"); 2426 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE; 2427 } 2428 if(!h2_stream->rbuffer || 2429 sldns_buffer_remaining(h2_stream->rbuffer) == 0) { 2430 verbose(VERB_QUERY, "http2: cannot submit buffer. No data " 2431 "available in rbuffer"); 2432 /* rbuffer will be free'd in frame close cb */ 2433 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE; 2434 } 2435 2436 if(copylen > sldns_buffer_remaining(h2_stream->rbuffer)) 2437 copylen = sldns_buffer_remaining(h2_stream->rbuffer); 2438 if(copylen > SSIZE_MAX) 2439 copylen = SSIZE_MAX; /* will probably never happen */ 2440 2441 memcpy(buf, sldns_buffer_current(h2_stream->rbuffer), copylen); 2442 sldns_buffer_skip(h2_stream->rbuffer, copylen); 2443 2444 if(sldns_buffer_remaining(h2_stream->rbuffer) == 0) { 2445 *data_flags |= NGHTTP2_DATA_FLAG_EOF; 2446 lock_basic_lock(&http2_response_buffer_count_lock); 2447 http2_response_buffer_count -= 2448 sldns_buffer_capacity(h2_stream->rbuffer); 2449 lock_basic_unlock(&http2_response_buffer_count_lock); 2450 sldns_buffer_free(h2_stream->rbuffer); 2451 h2_stream->rbuffer = NULL; 2452 } 2453 2454 return copylen; 2455} 2456 2457/** 2458 * Send RST_STREAM frame for stream. 2459 * @param h2_session: http2 session to submit frame to 2460 * @param h2_stream: http2 stream containing frame ID to use in RST_STREAM 2461 * @return 0 on error, 1 otherwise 2462 */ 2463static int http2_submit_rst_stream(struct http2_session* h2_session, 2464 struct http2_stream* h2_stream) 2465{ 2466 int ret = nghttp2_submit_rst_stream(h2_session->session, 2467 NGHTTP2_FLAG_NONE, h2_stream->stream_id, 2468 NGHTTP2_INTERNAL_ERROR); 2469 if(ret) { 2470 verbose(VERB_QUERY, "http2: nghttp2_submit_rst_stream failed, " 2471 "error: %s", nghttp2_strerror(ret)); 2472 return 0; 2473 } 2474 return 1; 2475} 2476 2477/** 2478 * DNS response ready to be submitted to nghttp2, to be prepared for sending 2479 * out. Response is stored in c->buffer. Copy to rbuffer because the c->buffer 2480 * might be used before this will be sent out. 2481 * @param h2_session: http2 session, containing c->buffer which contains answer 2482 * @return 0 on error, 1 otherwise 2483 */ 2484int http2_submit_dns_response(struct http2_session* h2_session) 2485{ 2486 int ret; 2487 nghttp2_data_provider data_prd; 2488 char status[4]; 2489 nghttp2_nv headers[3]; 2490 struct http2_stream* h2_stream = h2_session->c->h2_stream; 2491 size_t rlen; 2492 char rlen_str[32]; 2493 2494 if(h2_stream->rbuffer) { 2495 log_err("http2 submit response error: rbuffer already " 2496 "exists"); 2497 return 0; 2498 } 2499 if(sldns_buffer_remaining(h2_session->c->buffer) == 0) { 2500 log_err("http2 submit response error: c->buffer not complete"); 2501 return 0; 2502 } 2503 2504 if(snprintf(status, 4, "%d", h2_stream->status) != 3) { 2505 verbose(VERB_QUERY, "http2: submit response error: " 2506 "invalid status"); 2507 return 0; 2508 } 2509 2510 rlen = sldns_buffer_remaining(h2_session->c->buffer); 2511 snprintf(rlen_str, sizeof(rlen_str), "%u", (unsigned)rlen); 2512 2513 lock_basic_lock(&http2_response_buffer_count_lock); 2514 if(http2_response_buffer_count + rlen > http2_response_buffer_max) { 2515 lock_basic_unlock(&http2_response_buffer_count_lock); 2516 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, " 2517 "in https-response-buffer-size"); 2518 return http2_submit_rst_stream(h2_session, h2_stream); 2519 } 2520 http2_response_buffer_count += rlen; 2521 lock_basic_unlock(&http2_response_buffer_count_lock); 2522 2523 if(!(h2_stream->rbuffer = sldns_buffer_new(rlen))) { 2524 lock_basic_lock(&http2_response_buffer_count_lock); 2525 http2_response_buffer_count -= rlen; 2526 lock_basic_unlock(&http2_response_buffer_count_lock); 2527 log_err("http2 submit response error: malloc failure"); 2528 return 0; 2529 } 2530 2531 headers[0].name = (uint8_t*)":status"; 2532 headers[0].namelen = 7; 2533 headers[0].value = (uint8_t*)status; 2534 headers[0].valuelen = 3; 2535 headers[0].flags = NGHTTP2_NV_FLAG_NONE; 2536 2537 headers[1].name = (uint8_t*)"content-type"; 2538 headers[1].namelen = 12; 2539 headers[1].value = (uint8_t*)"application/dns-message"; 2540 headers[1].valuelen = 23; 2541 headers[1].flags = NGHTTP2_NV_FLAG_NONE; 2542 2543 headers[2].name = (uint8_t*)"content-length"; 2544 headers[2].namelen = 14; 2545 headers[2].value = (uint8_t*)rlen_str; 2546 headers[2].valuelen = strlen(rlen_str); 2547 headers[2].flags = NGHTTP2_NV_FLAG_NONE; 2548 2549 sldns_buffer_write(h2_stream->rbuffer, 2550 sldns_buffer_current(h2_session->c->buffer), 2551 sldns_buffer_remaining(h2_session->c->buffer)); 2552 sldns_buffer_flip(h2_stream->rbuffer); 2553 2554 data_prd.source.ptr = h2_session; 2555 data_prd.read_callback = http2_submit_response_read_callback; 2556 ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id, 2557 headers, 3, &data_prd); 2558 if(ret) { 2559 verbose(VERB_QUERY, "http2: set_stream_user_data failed, " 2560 "error: %s", nghttp2_strerror(ret)); 2561 return 0; 2562 } 2563 return 1; 2564} 2565#else 2566int http2_submit_dns_response(void* ATTR_UNUSED(v)) 2567{ 2568 return 0; 2569} 2570#endif 2571 2572#ifdef HAVE_NGHTTP2 2573/** HTTP status to descriptive string */ 2574static char* http_status_to_str(enum http_status s) 2575{ 2576 switch(s) { 2577 case HTTP_STATUS_OK: 2578 return "OK"; 2579 case HTTP_STATUS_BAD_REQUEST: 2580 return "Bad Request"; 2581 case HTTP_STATUS_NOT_FOUND: 2582 return "Not Found"; 2583 case HTTP_STATUS_PAYLOAD_TOO_LARGE: 2584 return "Payload Too Large"; 2585 case HTTP_STATUS_URI_TOO_LONG: 2586 return "URI Too Long"; 2587 case HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE: 2588 return "Unsupported Media Type"; 2589 case HTTP_STATUS_NOT_IMPLEMENTED: 2590 return "Not Implemented"; 2591 } 2592 return "Status Unknown"; 2593} 2594 2595/** nghttp2 callback. Used to copy error message to nghttp2 session */ 2596static ssize_t http2_submit_error_read_callback( 2597 nghttp2_session* ATTR_UNUSED(session), 2598 int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags, 2599 nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg)) 2600{ 2601 struct http2_stream* h2_stream; 2602 struct http2_session* h2_session = source->ptr; 2603 char* msg; 2604 if(!(h2_stream = nghttp2_session_get_stream_user_data( 2605 h2_session->session, stream_id))) { 2606 verbose(VERB_QUERY, "http2: cannot get stream data, closing " 2607 "stream"); 2608 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE; 2609 } 2610 *data_flags |= NGHTTP2_DATA_FLAG_EOF; 2611 msg = http_status_to_str(h2_stream->status); 2612 if(length < strlen(msg)) 2613 return 0; /* not worth trying over multiple frames */ 2614 memcpy(buf, msg, strlen(msg)); 2615 return strlen(msg); 2616 2617} 2618 2619/** 2620 * HTTP error response ready to be submitted to nghttp2, to be prepared for 2621 * sending out. Message body will contain descriptive string for HTTP status. 2622 * @param h2_session: http2 session to submit to 2623 * @param h2_stream: http2 stream containing HTTP status to use for error 2624 * @return 0 on error, 1 otherwise 2625 */ 2626static int http2_submit_error(struct http2_session* h2_session, 2627 struct http2_stream* h2_stream) 2628{ 2629 int ret; 2630 char status[4]; 2631 nghttp2_data_provider data_prd; 2632 nghttp2_nv headers[1]; /* will be copied by nghttp */ 2633 if(snprintf(status, 4, "%d", h2_stream->status) != 3) { 2634 verbose(VERB_QUERY, "http2: submit error failed, " 2635 "invalid status"); 2636 return 0; 2637 } 2638 headers[0].name = (uint8_t*)":status"; 2639 headers[0].namelen = 7; 2640 headers[0].value = (uint8_t*)status; 2641 headers[0].valuelen = 3; 2642 headers[0].flags = NGHTTP2_NV_FLAG_NONE; 2643 2644 data_prd.source.ptr = h2_session; 2645 data_prd.read_callback = http2_submit_error_read_callback; 2646 2647 ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id, 2648 headers, 1, &data_prd); 2649 if(ret) { 2650 verbose(VERB_QUERY, "http2: submit error failed, " 2651 "error: %s", nghttp2_strerror(ret)); 2652 return 0; 2653 } 2654 return 1; 2655} 2656 2657/** 2658 * Start query handling. Query is stored in the stream, and will be free'd here. 2659 * @param h2_session: http2 session, containing comm point 2660 * @param h2_stream: stream containing buffered query 2661 * @return: -1 on error, 1 if answer is stored in c->buffer, 0 if there is no 2662 * reply available (yet). 2663 */ 2664static int http2_query_read_done(struct http2_session* h2_session, 2665 struct http2_stream* h2_stream) 2666{ 2667 log_assert(h2_stream->qbuffer); 2668 2669 if(h2_session->c->h2_stream) { 2670 verbose(VERB_ALGO, "http2_query_read_done failure: shared " 2671 "buffer already assigned to stream"); 2672 return -1; 2673 } 2674 2675 /* the c->buffer might be used by mesh_send_reply and no be cleard 2676 * need to be cleared before use */ 2677 sldns_buffer_clear(h2_session->c->buffer); 2678 if(sldns_buffer_remaining(h2_session->c->buffer) < 2679 sldns_buffer_remaining(h2_stream->qbuffer)) { 2680 /* qbuffer will be free'd in frame close cb */ 2681 sldns_buffer_clear(h2_session->c->buffer); 2682 verbose(VERB_ALGO, "http2_query_read_done failure: can't fit " 2683 "qbuffer in c->buffer"); 2684 return -1; 2685 } 2686 2687 sldns_buffer_write(h2_session->c->buffer, 2688 sldns_buffer_current(h2_stream->qbuffer), 2689 sldns_buffer_remaining(h2_stream->qbuffer)); 2690 2691 lock_basic_lock(&http2_query_buffer_count_lock); 2692 http2_query_buffer_count -= sldns_buffer_capacity(h2_stream->qbuffer); 2693 lock_basic_unlock(&http2_query_buffer_count_lock); 2694 sldns_buffer_free(h2_stream->qbuffer); 2695 h2_stream->qbuffer = NULL; 2696 2697 sldns_buffer_flip(h2_session->c->buffer); 2698 h2_session->c->h2_stream = h2_stream; 2699 fptr_ok(fptr_whitelist_comm_point(h2_session->c->callback)); 2700 if((*h2_session->c->callback)(h2_session->c, h2_session->c->cb_arg, 2701 NETEVENT_NOERROR, &h2_session->c->repinfo)) { 2702 return 1; /* answer in c->buffer */ 2703 } 2704 sldns_buffer_clear(h2_session->c->buffer); 2705 h2_session->c->h2_stream = NULL; 2706 return 0; /* mesh state added, or dropped */ 2707} 2708 2709/** nghttp2 callback. Used to check if the received frame indicates the end of a 2710 * stream. Gather collected request data and start query handling. */ 2711static int http2_req_frame_recv_cb(nghttp2_session* session, 2712 const nghttp2_frame* frame, void* cb_arg) 2713{ 2714 struct http2_session* h2_session = (struct http2_session*)cb_arg; 2715 struct http2_stream* h2_stream; 2716 int query_read_done; 2717 2718 if((frame->hd.type != NGHTTP2_DATA && 2719 frame->hd.type != NGHTTP2_HEADERS) || 2720 !(frame->hd.flags & NGHTTP2_FLAG_END_STREAM)) { 2721 return 0; 2722 } 2723 2724 if(!(h2_stream = nghttp2_session_get_stream_user_data( 2725 session, frame->hd.stream_id))) 2726 return 0; 2727 2728 if(h2_stream->invalid_endpoint) { 2729 h2_stream->status = HTTP_STATUS_NOT_FOUND; 2730 goto submit_http_error; 2731 } 2732 2733 if(h2_stream->invalid_content_type) { 2734 h2_stream->status = HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE; 2735 goto submit_http_error; 2736 } 2737 2738 if(h2_stream->http_method != HTTP_METHOD_GET && 2739 h2_stream->http_method != HTTP_METHOD_POST) { 2740 h2_stream->status = HTTP_STATUS_NOT_IMPLEMENTED; 2741 goto submit_http_error; 2742 } 2743 2744 if(h2_stream->query_too_large) { 2745 if(h2_stream->http_method == HTTP_METHOD_POST) 2746 h2_stream->status = HTTP_STATUS_PAYLOAD_TOO_LARGE; 2747 else 2748 h2_stream->status = HTTP_STATUS_URI_TOO_LONG; 2749 goto submit_http_error; 2750 } 2751 2752 if(!h2_stream->qbuffer) { 2753 h2_stream->status = HTTP_STATUS_BAD_REQUEST; 2754 goto submit_http_error; 2755 } 2756 2757 if(h2_stream->status) { 2758submit_http_error: 2759 verbose(VERB_QUERY, "http2 request invalid, returning :status=" 2760 "%d", h2_stream->status); 2761 if(!http2_submit_error(h2_session, h2_stream)) { 2762 return NGHTTP2_ERR_CALLBACK_FAILURE; 2763 } 2764 return 0; 2765 } 2766 h2_stream->status = HTTP_STATUS_OK; 2767 2768 sldns_buffer_flip(h2_stream->qbuffer); 2769 h2_session->postpone_drop = 1; 2770 query_read_done = http2_query_read_done(h2_session, h2_stream); 2771 if(query_read_done < 0) 2772 return NGHTTP2_ERR_CALLBACK_FAILURE; 2773 else if(!query_read_done) { 2774 if(h2_session->is_drop) { 2775 /* connection needs to be closed. Return failure to make 2776 * sure no other action are taken anymore on comm point. 2777 * failure will result in reclaiming (and closing) 2778 * of comm point. */ 2779 verbose(VERB_QUERY, "http2 query dropped in worker cb"); 2780 h2_session->postpone_drop = 0; 2781 return NGHTTP2_ERR_CALLBACK_FAILURE; 2782 } 2783 /* nothing to submit right now, query added to mesh. */ 2784 h2_session->postpone_drop = 0; 2785 return 0; 2786 } 2787 if(!http2_submit_dns_response(h2_session)) { 2788 sldns_buffer_clear(h2_session->c->buffer); 2789 h2_session->c->h2_stream = NULL; 2790 return NGHTTP2_ERR_CALLBACK_FAILURE; 2791 } 2792 verbose(VERB_QUERY, "http2 query submitted to session"); 2793 sldns_buffer_clear(h2_session->c->buffer); 2794 h2_session->c->h2_stream = NULL; 2795 return 0; 2796} 2797 2798/** nghttp2 callback. Used to detect start of new streams. */ 2799static int http2_req_begin_headers_cb(nghttp2_session* session, 2800 const nghttp2_frame* frame, void* cb_arg) 2801{ 2802 struct http2_session* h2_session = (struct http2_session*)cb_arg; 2803 struct http2_stream* h2_stream; 2804 int ret; 2805 if(frame->hd.type != NGHTTP2_HEADERS || 2806 frame->headers.cat != NGHTTP2_HCAT_REQUEST) { 2807 /* only interested in request headers */ 2808 return 0; 2809 } 2810 if(!(h2_stream = http2_stream_create(frame->hd.stream_id))) { 2811 log_err("malloc failure while creating http2 stream"); 2812 return NGHTTP2_ERR_CALLBACK_FAILURE; 2813 } 2814 http2_session_add_stream(h2_session, h2_stream); 2815 ret = nghttp2_session_set_stream_user_data(session, 2816 frame->hd.stream_id, h2_stream); 2817 if(ret) { 2818 /* stream does not exist */ 2819 verbose(VERB_QUERY, "http2: set_stream_user_data failed, " 2820 "error: %s", nghttp2_strerror(ret)); 2821 return NGHTTP2_ERR_CALLBACK_FAILURE; 2822 } 2823 2824 return 0; 2825} 2826 2827/** 2828 * base64url decode, store in qbuffer 2829 * @param h2_session: http2 session 2830 * @param h2_stream: http2 stream 2831 * @param start: start of the base64 string 2832 * @param length: length of the base64 string 2833 * @return: 0 on error, 1 otherwise. query will be stored in h2_stream->qbuffer, 2834 * buffer will be NULL is unparseble. 2835 */ 2836static int http2_buffer_uri_query(struct http2_session* h2_session, 2837 struct http2_stream* h2_stream, const uint8_t* start, size_t length) 2838{ 2839 size_t expectb64len; 2840 int b64len; 2841 if(h2_stream->http_method == HTTP_METHOD_POST) 2842 return 1; 2843 if(length == 0) 2844 return 1; 2845 if(h2_stream->qbuffer) { 2846 verbose(VERB_ALGO, "http2_req_header fail, " 2847 "qbuffer already set"); 2848 return 0; 2849 } 2850 2851 /* calculate size, might be a bit bigger than the real 2852 * decoded buffer size */ 2853 expectb64len = sldns_b64_pton_calculate_size(length); 2854 log_assert(expectb64len > 0); 2855 if(expectb64len > 2856 h2_session->c->http2_stream_max_qbuffer_size) { 2857 h2_stream->query_too_large = 1; 2858 return 1; 2859 } 2860 2861 lock_basic_lock(&http2_query_buffer_count_lock); 2862 if(http2_query_buffer_count + expectb64len > http2_query_buffer_max) { 2863 lock_basic_unlock(&http2_query_buffer_count_lock); 2864 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, " 2865 "in http2-query-buffer-size"); 2866 return http2_submit_rst_stream(h2_session, h2_stream); 2867 } 2868 http2_query_buffer_count += expectb64len; 2869 lock_basic_unlock(&http2_query_buffer_count_lock); 2870 if(!(h2_stream->qbuffer = sldns_buffer_new(expectb64len))) { 2871 lock_basic_lock(&http2_query_buffer_count_lock); 2872 http2_query_buffer_count -= expectb64len; 2873 lock_basic_unlock(&http2_query_buffer_count_lock); 2874 log_err("http2_req_header fail, qbuffer " 2875 "malloc failure"); 2876 return 0; 2877 } 2878 2879 if(sldns_b64_contains_nonurl((char const*)start, length)) { 2880 char buf[65536+4]; 2881 verbose(VERB_ALGO, "HTTP2 stream contains wrong b64 encoding"); 2882 /* copy to the scratch buffer temporarily to terminate the 2883 * string with a zero */ 2884 if(length+1 > sizeof(buf)) { 2885 /* too long */ 2886 lock_basic_lock(&http2_query_buffer_count_lock); 2887 http2_query_buffer_count -= expectb64len; 2888 lock_basic_unlock(&http2_query_buffer_count_lock); 2889 sldns_buffer_free(h2_stream->qbuffer); 2890 h2_stream->qbuffer = NULL; 2891 return 1; 2892 } 2893 memmove(buf, start, length); 2894 buf[length] = 0; 2895 if(!(b64len = sldns_b64_pton(buf, sldns_buffer_current( 2896 h2_stream->qbuffer), expectb64len)) || b64len < 0) { 2897 lock_basic_lock(&http2_query_buffer_count_lock); 2898 http2_query_buffer_count -= expectb64len; 2899 lock_basic_unlock(&http2_query_buffer_count_lock); 2900 sldns_buffer_free(h2_stream->qbuffer); 2901 h2_stream->qbuffer = NULL; 2902 return 1; 2903 } 2904 } else { 2905 if(!(b64len = sldns_b64url_pton( 2906 (char const *)start, length, 2907 sldns_buffer_current(h2_stream->qbuffer), 2908 expectb64len)) || b64len < 0) { 2909 lock_basic_lock(&http2_query_buffer_count_lock); 2910 http2_query_buffer_count -= expectb64len; 2911 lock_basic_unlock(&http2_query_buffer_count_lock); 2912 sldns_buffer_free(h2_stream->qbuffer); 2913 h2_stream->qbuffer = NULL; 2914 /* return without error, method can be an 2915 * unknown POST */ 2916 return 1; 2917 } 2918 } 2919 sldns_buffer_skip(h2_stream->qbuffer, (size_t)b64len); 2920 return 1; 2921} 2922 2923/** nghttp2 callback. Used to parse headers from HEADER frames. */ 2924static int http2_req_header_cb(nghttp2_session* session, 2925 const nghttp2_frame* frame, const uint8_t* name, size_t namelen, 2926 const uint8_t* value, size_t valuelen, uint8_t ATTR_UNUSED(flags), 2927 void* cb_arg) 2928{ 2929 struct http2_stream* h2_stream = NULL; 2930 struct http2_session* h2_session = (struct http2_session*)cb_arg; 2931 /* nghttp2 deals with CONTINUATION frames and provides them as part of 2932 * the HEADER */ 2933 if(frame->hd.type != NGHTTP2_HEADERS || 2934 frame->headers.cat != NGHTTP2_HCAT_REQUEST) { 2935 /* only interested in request headers */ 2936 return 0; 2937 } 2938 if(!(h2_stream = nghttp2_session_get_stream_user_data(session, 2939 frame->hd.stream_id))) 2940 return 0; 2941 2942 /* earlier checks already indicate we can stop handling this query */ 2943 if(h2_stream->http_method == HTTP_METHOD_UNSUPPORTED || 2944 h2_stream->invalid_content_type || 2945 h2_stream->invalid_endpoint) 2946 return 0; 2947 2948 2949 /* nghttp2 performs some sanity checks in the headers, including: 2950 * name and value are guaranteed to be null terminated 2951 * name is guaranteed to be lowercase 2952 * content-length value is guaranteed to contain digits 2953 */ 2954 2955 if(!h2_stream->http_method && namelen == 7 && 2956 memcmp(":method", name, namelen) == 0) { 2957 /* Case insensitive check on :method value to be on the safe 2958 * side. I failed to find text about case sensitivity in specs. 2959 */ 2960 if(valuelen == 3 && strcasecmp("GET", (const char*)value) == 0) 2961 h2_stream->http_method = HTTP_METHOD_GET; 2962 else if(valuelen == 4 && 2963 strcasecmp("POST", (const char*)value) == 0) { 2964 h2_stream->http_method = HTTP_METHOD_POST; 2965 if(h2_stream->qbuffer) { 2966 /* POST method uses query from DATA frames */ 2967 lock_basic_lock(&http2_query_buffer_count_lock); 2968 http2_query_buffer_count -= 2969 sldns_buffer_capacity(h2_stream->qbuffer); 2970 lock_basic_unlock(&http2_query_buffer_count_lock); 2971 sldns_buffer_free(h2_stream->qbuffer); 2972 h2_stream->qbuffer = NULL; 2973 } 2974 } else 2975 h2_stream->http_method = HTTP_METHOD_UNSUPPORTED; 2976 return 0; 2977 } 2978 if(namelen == 5 && memcmp(":path", name, namelen) == 0) { 2979 /* :path may contain DNS query, depending on method. Method might 2980 * not be known yet here, so check after finishing receiving 2981 * stream. */ 2982#define HTTP_QUERY_PARAM "?dns=" 2983 size_t el = strlen(h2_session->c->http_endpoint); 2984 size_t qpl = strlen(HTTP_QUERY_PARAM); 2985 2986 if(valuelen < el || memcmp(h2_session->c->http_endpoint, 2987 value, el) != 0) { 2988 h2_stream->invalid_endpoint = 1; 2989 return 0; 2990 } 2991 /* larger than endpoint only allowed if it is for the query 2992 * parameter */ 2993 if(valuelen <= el+qpl || 2994 memcmp(HTTP_QUERY_PARAM, value+el, qpl) != 0) { 2995 if(valuelen != el) 2996 h2_stream->invalid_endpoint = 1; 2997 return 0; 2998 } 2999 3000 if(!http2_buffer_uri_query(h2_session, h2_stream, 3001 value+(el+qpl), valuelen-(el+qpl))) { 3002 return NGHTTP2_ERR_CALLBACK_FAILURE; 3003 } 3004 return 0; 3005 } 3006 /* Content type is a SHOULD (rfc7231#section-3.1.1.5) when using POST, 3007 * and not needed when using GET. Don't enfore. 3008 * If set only allow lowercase "application/dns-message". 3009 * 3010 * Clients SHOULD (rfc8484#section-4.1) set an accept header, but MUST 3011 * be able to handle "application/dns-message". Since that is the only 3012 * content-type supported we can ignore the accept header. 3013 */ 3014 if((namelen == 12 && memcmp("content-type", name, namelen) == 0)) { 3015 if(valuelen != 23 || memcmp("application/dns-message", value, 3016 valuelen) != 0) { 3017 h2_stream->invalid_content_type = 1; 3018 } 3019 } 3020 3021 /* Only interested in content-lentg for POST (on not yet known) method. 3022 */ 3023 if((!h2_stream->http_method || 3024 h2_stream->http_method == HTTP_METHOD_POST) && 3025 !h2_stream->content_length && namelen == 14 && 3026 memcmp("content-length", name, namelen) == 0) { 3027 if(valuelen > 5) { 3028 h2_stream->query_too_large = 1; 3029 return 0; 3030 } 3031 /* guaranteed to only contain digits and be null terminated */ 3032 h2_stream->content_length = atoi((const char*)value); 3033 if(h2_stream->content_length > 3034 h2_session->c->http2_stream_max_qbuffer_size) { 3035 h2_stream->query_too_large = 1; 3036 return 0; 3037 } 3038 } 3039 return 0; 3040} 3041 3042/** nghttp2 callback. Used to get data from DATA frames, which can contain 3043 * queries in POST requests. */ 3044static int http2_req_data_chunk_recv_cb(nghttp2_session* ATTR_UNUSED(session), 3045 uint8_t ATTR_UNUSED(flags), int32_t stream_id, const uint8_t* data, 3046 size_t len, void* cb_arg) 3047{ 3048 struct http2_session* h2_session = (struct http2_session*)cb_arg; 3049 struct http2_stream* h2_stream; 3050 size_t qlen = 0; 3051 3052 if(!(h2_stream = nghttp2_session_get_stream_user_data( 3053 h2_session->session, stream_id))) { 3054 return 0; 3055 } 3056 3057 if(h2_stream->query_too_large) 3058 return 0; 3059 3060 if(!h2_stream->qbuffer) { 3061 if(h2_stream->content_length) { 3062 if(h2_stream->content_length < len) 3063 /* getting more data in DATA frame than 3064 * advertised in content-length header. */ 3065 return NGHTTP2_ERR_CALLBACK_FAILURE; 3066 qlen = h2_stream->content_length; 3067 } else if(len <= h2_session->c->http2_stream_max_qbuffer_size) { 3068 /* setting this to msg-buffer-size can result in a lot 3069 * of memory consuption. Most queries should fit in a 3070 * single DATA frame, and most POST queries will 3071 * contain content-length which does not impose this 3072 * limit. */ 3073 qlen = len; 3074 } 3075 } 3076 if(!h2_stream->qbuffer && qlen) { 3077 lock_basic_lock(&http2_query_buffer_count_lock); 3078 if(http2_query_buffer_count + qlen > http2_query_buffer_max) { 3079 lock_basic_unlock(&http2_query_buffer_count_lock); 3080 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, " 3081 "in http2-query-buffer-size"); 3082 return http2_submit_rst_stream(h2_session, h2_stream); 3083 } 3084 http2_query_buffer_count += qlen; 3085 lock_basic_unlock(&http2_query_buffer_count_lock); 3086 if(!(h2_stream->qbuffer = sldns_buffer_new(qlen))) { 3087 lock_basic_lock(&http2_query_buffer_count_lock); 3088 http2_query_buffer_count -= qlen; 3089 lock_basic_unlock(&http2_query_buffer_count_lock); 3090 } 3091 } 3092 3093 if(!h2_stream->qbuffer || 3094 sldns_buffer_remaining(h2_stream->qbuffer) < len) { 3095 verbose(VERB_ALGO, "http2 data_chunck_recv failed. Not enough " 3096 "buffer space for POST query. Can happen on multi " 3097 "frame requests without content-length header"); 3098 h2_stream->query_too_large = 1; 3099 return 0; 3100 } 3101 3102 sldns_buffer_write(h2_stream->qbuffer, data, len); 3103 3104 return 0; 3105} 3106 3107void http2_req_stream_clear(struct http2_stream* h2_stream) 3108{ 3109 if(h2_stream->qbuffer) { 3110 lock_basic_lock(&http2_query_buffer_count_lock); 3111 http2_query_buffer_count -= 3112 sldns_buffer_capacity(h2_stream->qbuffer); 3113 lock_basic_unlock(&http2_query_buffer_count_lock); 3114 sldns_buffer_free(h2_stream->qbuffer); 3115 h2_stream->qbuffer = NULL; 3116 } 3117 if(h2_stream->rbuffer) { 3118 lock_basic_lock(&http2_response_buffer_count_lock); 3119 http2_response_buffer_count -= 3120 sldns_buffer_capacity(h2_stream->rbuffer); 3121 lock_basic_unlock(&http2_response_buffer_count_lock); 3122 sldns_buffer_free(h2_stream->rbuffer); 3123 h2_stream->rbuffer = NULL; 3124 } 3125} 3126 3127nghttp2_session_callbacks* http2_req_callbacks_create(void) 3128{ 3129 nghttp2_session_callbacks *callbacks; 3130 if(nghttp2_session_callbacks_new(&callbacks) == NGHTTP2_ERR_NOMEM) { 3131 log_err("failed to initialize nghttp2 callback"); 3132 return NULL; 3133 } 3134 /* reception of header block started, used to create h2_stream */ 3135 nghttp2_session_callbacks_set_on_begin_headers_callback(callbacks, 3136 http2_req_begin_headers_cb); 3137 /* complete frame received, used to get data from stream if frame 3138 * has end stream flag, and start processing query */ 3139 nghttp2_session_callbacks_set_on_frame_recv_callback(callbacks, 3140 http2_req_frame_recv_cb); 3141 /* get request info from headers */ 3142 nghttp2_session_callbacks_set_on_header_callback(callbacks, 3143 http2_req_header_cb); 3144 /* get data from DATA frames, containing POST query */ 3145 nghttp2_session_callbacks_set_on_data_chunk_recv_callback(callbacks, 3146 http2_req_data_chunk_recv_cb); 3147 3148 /* generic HTTP2 callbacks */ 3149 nghttp2_session_callbacks_set_recv_callback(callbacks, http2_recv_cb); 3150 nghttp2_session_callbacks_set_send_callback(callbacks, http2_send_cb); 3151 nghttp2_session_callbacks_set_on_stream_close_callback(callbacks, 3152 http2_stream_close_cb); 3153 3154 return callbacks; 3155} 3156#endif /* HAVE_NGHTTP2 */ 3157