netevent.c revision 368693
1/* 2 * util/netevent.c - event notification 3 * 4 * Copyright (c) 2007, NLnet Labs. All rights reserved. 5 * 6 * This software is open source. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * Redistributions of source code must retain the above copyright notice, 13 * this list of conditions and the following disclaimer. 14 * 15 * Redistributions in binary form must reproduce the above copyright notice, 16 * this list of conditions and the following disclaimer in the documentation 17 * and/or other materials provided with the distribution. 18 * 19 * Neither the name of the NLNET LABS nor the names of its contributors may 20 * be used to endorse or promote products derived from this software without 21 * specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36/** 37 * \file 38 * 39 * This file contains event notification functions. 40 */ 41#include "config.h" 42#include "util/netevent.h" 43#include "util/ub_event.h" 44#include "util/log.h" 45#include "util/net_help.h" 46#include "util/tcp_conn_limit.h" 47#include "util/fptr_wlist.h" 48#include "sldns/pkthdr.h" 49#include "sldns/sbuffer.h" 50#include "sldns/str2wire.h" 51#include "dnstap/dnstap.h" 52#include "dnscrypt/dnscrypt.h" 53#include "services/listen_dnsport.h" 54#ifdef HAVE_OPENSSL_SSL_H 55#include <openssl/ssl.h> 56#endif 57#ifdef HAVE_OPENSSL_ERR_H 58#include <openssl/err.h> 59#endif 60 61/* -------- Start of local definitions -------- */ 62/** if CMSG_ALIGN is not defined on this platform, a workaround */ 63#ifndef CMSG_ALIGN 64# ifdef __CMSG_ALIGN 65# define CMSG_ALIGN(n) __CMSG_ALIGN(n) 66# elif defined(CMSG_DATA_ALIGN) 67# define CMSG_ALIGN _CMSG_DATA_ALIGN 68# else 69# define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1)) 70# endif 71#endif 72 73/** if CMSG_LEN is not defined on this platform, a workaround */ 74#ifndef CMSG_LEN 75# define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len)) 76#endif 77 78/** if CMSG_SPACE is not defined on this platform, a workaround */ 79#ifndef CMSG_SPACE 80# ifdef _CMSG_HDR_ALIGN 81# define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr))) 82# else 83# define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr))) 84# endif 85#endif 86 87/** The TCP writing query timeout in milliseconds */ 88#define TCP_QUERY_TIMEOUT 120000 89/** The minimum actual TCP timeout to use, regardless of what we advertise, 90 * in msec */ 91#define TCP_QUERY_TIMEOUT_MINIMUM 200 92 93#ifndef NONBLOCKING_IS_BROKEN 94/** number of UDP reads to perform per read indication from select */ 95#define NUM_UDP_PER_SELECT 100 96#else 97#define NUM_UDP_PER_SELECT 1 98#endif 99 100/** 101 * The internal event structure for keeping ub_event info for the event. 102 * Possibly other structures (list, tree) this is part of. 103 */ 104struct internal_event { 105 /** the comm base */ 106 struct comm_base* base; 107 /** ub_event event type */ 108 struct ub_event* ev; 109}; 110 111/** 112 * Internal base structure, so that every thread has its own events. 113 */ 114struct internal_base { 115 /** ub_event event_base type. */ 116 struct ub_event_base* base; 117 /** seconds time pointer points here */ 118 time_t secs; 119 /** timeval with current time */ 120 struct timeval now; 121 /** the event used for slow_accept timeouts */ 122 struct ub_event* slow_accept; 123 /** true if slow_accept is enabled */ 124 int slow_accept_enabled; 125}; 126 127/** 128 * Internal timer structure, to store timer event in. 129 */ 130struct internal_timer { 131 /** the super struct from which derived */ 132 struct comm_timer super; 133 /** the comm base */ 134 struct comm_base* base; 135 /** ub_event event type */ 136 struct ub_event* ev; 137 /** is timer enabled */ 138 uint8_t enabled; 139}; 140 141/** 142 * Internal signal structure, to store signal event in. 143 */ 144struct internal_signal { 145 /** ub_event event type */ 146 struct ub_event* ev; 147 /** next in signal list */ 148 struct internal_signal* next; 149}; 150 151/** create a tcp handler with a parent */ 152static struct comm_point* comm_point_create_tcp_handler( 153 struct comm_base *base, struct comm_point* parent, size_t bufsize, 154 struct sldns_buffer* spoolbuf, comm_point_callback_type* callback, 155 void* callback_arg); 156 157/* -------- End of local definitions -------- */ 158 159struct comm_base* 160comm_base_create(int sigs) 161{ 162 struct comm_base* b = (struct comm_base*)calloc(1, 163 sizeof(struct comm_base)); 164 const char *evnm="event", *evsys="", *evmethod=""; 165 166 if(!b) 167 return NULL; 168 b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base)); 169 if(!b->eb) { 170 free(b); 171 return NULL; 172 } 173 b->eb->base = ub_default_event_base(sigs, &b->eb->secs, &b->eb->now); 174 if(!b->eb->base) { 175 free(b->eb); 176 free(b); 177 return NULL; 178 } 179 ub_comm_base_now(b); 180 ub_get_event_sys(b->eb->base, &evnm, &evsys, &evmethod); 181 verbose(VERB_ALGO, "%s %s uses %s method.", evnm, evsys, evmethod); 182 return b; 183} 184 185struct comm_base* 186comm_base_create_event(struct ub_event_base* base) 187{ 188 struct comm_base* b = (struct comm_base*)calloc(1, 189 sizeof(struct comm_base)); 190 if(!b) 191 return NULL; 192 b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base)); 193 if(!b->eb) { 194 free(b); 195 return NULL; 196 } 197 b->eb->base = base; 198 ub_comm_base_now(b); 199 return b; 200} 201 202void 203comm_base_delete(struct comm_base* b) 204{ 205 if(!b) 206 return; 207 if(b->eb->slow_accept_enabled) { 208 if(ub_event_del(b->eb->slow_accept) != 0) { 209 log_err("could not event_del slow_accept"); 210 } 211 ub_event_free(b->eb->slow_accept); 212 } 213 ub_event_base_free(b->eb->base); 214 b->eb->base = NULL; 215 free(b->eb); 216 free(b); 217} 218 219void 220comm_base_delete_no_base(struct comm_base* b) 221{ 222 if(!b) 223 return; 224 if(b->eb->slow_accept_enabled) { 225 if(ub_event_del(b->eb->slow_accept) != 0) { 226 log_err("could not event_del slow_accept"); 227 } 228 ub_event_free(b->eb->slow_accept); 229 } 230 b->eb->base = NULL; 231 free(b->eb); 232 free(b); 233} 234 235void 236comm_base_timept(struct comm_base* b, time_t** tt, struct timeval** tv) 237{ 238 *tt = &b->eb->secs; 239 *tv = &b->eb->now; 240} 241 242void 243comm_base_dispatch(struct comm_base* b) 244{ 245 int retval; 246 retval = ub_event_base_dispatch(b->eb->base); 247 if(retval < 0) { 248 fatal_exit("event_dispatch returned error %d, " 249 "errno is %s", retval, strerror(errno)); 250 } 251} 252 253void comm_base_exit(struct comm_base* b) 254{ 255 if(ub_event_base_loopexit(b->eb->base) != 0) { 256 log_err("Could not loopexit"); 257 } 258} 259 260void comm_base_set_slow_accept_handlers(struct comm_base* b, 261 void (*stop_acc)(void*), void (*start_acc)(void*), void* arg) 262{ 263 b->stop_accept = stop_acc; 264 b->start_accept = start_acc; 265 b->cb_arg = arg; 266} 267 268struct ub_event_base* comm_base_internal(struct comm_base* b) 269{ 270 return b->eb->base; 271} 272 273/** see if errno for udp has to be logged or not uses globals */ 274static int 275udp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen) 276{ 277 /* do not log transient errors (unless high verbosity) */ 278#if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN) 279 switch(errno) { 280# ifdef ENETUNREACH 281 case ENETUNREACH: 282# endif 283# ifdef EHOSTDOWN 284 case EHOSTDOWN: 285# endif 286# ifdef EHOSTUNREACH 287 case EHOSTUNREACH: 288# endif 289# ifdef ENETDOWN 290 case ENETDOWN: 291# endif 292 if(verbosity < VERB_ALGO) 293 return 0; 294 default: 295 break; 296 } 297#endif 298 /* permission denied is gotten for every send if the 299 * network is disconnected (on some OS), squelch it */ 300 if( ((errno == EPERM) 301# ifdef EADDRNOTAVAIL 302 /* 'Cannot assign requested address' also when disconnected */ 303 || (errno == EADDRNOTAVAIL) 304# endif 305 ) && verbosity < VERB_DETAIL) 306 return 0; 307# ifdef EADDRINUSE 308 /* If SO_REUSEADDR is set, we could try to connect to the same server 309 * from the same source port twice. */ 310 if(errno == EADDRINUSE && verbosity < VERB_DETAIL) 311 return 0; 312# endif 313 /* squelch errors where people deploy AAAA ::ffff:bla for 314 * authority servers, which we try for intranets. */ 315 if(errno == EINVAL && addr_is_ip4mapped( 316 (struct sockaddr_storage*)addr, addrlen) && 317 verbosity < VERB_DETAIL) 318 return 0; 319 /* SO_BROADCAST sockopt can give access to 255.255.255.255, 320 * but a dns cache does not need it. */ 321 if(errno == EACCES && addr_is_broadcast( 322 (struct sockaddr_storage*)addr, addrlen) && 323 verbosity < VERB_DETAIL) 324 return 0; 325 return 1; 326} 327 328int tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen) 329{ 330 return udp_send_errno_needs_log(addr, addrlen); 331} 332 333/* send a UDP reply */ 334int 335comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet, 336 struct sockaddr* addr, socklen_t addrlen) 337{ 338 ssize_t sent; 339 log_assert(c->fd != -1); 340#ifdef UNBOUND_DEBUG 341 if(sldns_buffer_remaining(packet) == 0) 342 log_err("error: send empty UDP packet"); 343#endif 344 if(addr) { 345 log_assert(addr && addrlen > 0); 346 sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), 347 sldns_buffer_remaining(packet), 0, 348 addr, addrlen); 349 } else { 350 sent = send(c->fd, (void*)sldns_buffer_begin(packet), 351 sldns_buffer_remaining(packet), 0); 352 } 353 if(sent == -1) { 354 /* try again and block, waiting for IO to complete, 355 * we want to send the answer, and we will wait for 356 * the ethernet interface buffer to have space. */ 357#ifndef USE_WINSOCK 358 if(errno == EAGAIN || 359# ifdef EWOULDBLOCK 360 errno == EWOULDBLOCK || 361# endif 362 errno == ENOBUFS) { 363#else 364 if(WSAGetLastError() == WSAEINPROGRESS || 365 WSAGetLastError() == WSAENOBUFS || 366 WSAGetLastError() == WSAEWOULDBLOCK) { 367#endif 368 int e; 369 fd_set_block(c->fd); 370 sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), 371 sldns_buffer_remaining(packet), 0, 372 addr, addrlen); 373 e = errno; 374 fd_set_nonblock(c->fd); 375 errno = e; 376 } 377 } 378 if(sent == -1) { 379 if(!udp_send_errno_needs_log(addr, addrlen)) 380 return 0; 381 verbose(VERB_OPS, "sendto failed: %s", sock_strerror(errno)); 382 log_addr(VERB_OPS, "remote address is", 383 (struct sockaddr_storage*)addr, addrlen); 384 return 0; 385 } else if((size_t)sent != sldns_buffer_remaining(packet)) { 386 log_err("sent %d in place of %d bytes", 387 (int)sent, (int)sldns_buffer_remaining(packet)); 388 return 0; 389 } 390 return 1; 391} 392 393#if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG)) 394/** print debug ancillary info */ 395static void p_ancil(const char* str, struct comm_reply* r) 396{ 397 if(r->srctype != 4 && r->srctype != 6) { 398 log_info("%s: unknown srctype %d", str, r->srctype); 399 return; 400 } 401 if(r->srctype == 6) { 402 char buf[1024]; 403 if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr, 404 buf, (socklen_t)sizeof(buf)) == 0) { 405 (void)strlcpy(buf, "(inet_ntop error)", sizeof(buf)); 406 } 407 buf[sizeof(buf)-1]=0; 408 log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex); 409 } else if(r->srctype == 4) { 410#ifdef IP_PKTINFO 411 char buf1[1024], buf2[1024]; 412 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr, 413 buf1, (socklen_t)sizeof(buf1)) == 0) { 414 (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1)); 415 } 416 buf1[sizeof(buf1)-1]=0; 417#ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST 418 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst, 419 buf2, (socklen_t)sizeof(buf2)) == 0) { 420 (void)strlcpy(buf2, "(inet_ntop error)", sizeof(buf2)); 421 } 422 buf2[sizeof(buf2)-1]=0; 423#else 424 buf2[0]=0; 425#endif 426 log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex, 427 buf1, buf2); 428#elif defined(IP_RECVDSTADDR) 429 char buf1[1024]; 430 if(inet_ntop(AF_INET, &r->pktinfo.v4addr, 431 buf1, (socklen_t)sizeof(buf1)) == 0) { 432 (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1)); 433 } 434 buf1[sizeof(buf1)-1]=0; 435 log_info("%s: %s", str, buf1); 436#endif /* IP_PKTINFO or PI_RECVDSTDADDR */ 437 } 438} 439#endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */ 440 441/** send a UDP reply over specified interface*/ 442static int 443comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet, 444 struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r) 445{ 446#if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG) 447 ssize_t sent; 448 struct msghdr msg; 449 struct iovec iov[1]; 450 union { 451 struct cmsghdr hdr; 452 char buf[256]; 453 } control; 454#ifndef S_SPLINT_S 455 struct cmsghdr *cmsg; 456#endif /* S_SPLINT_S */ 457 458 log_assert(c->fd != -1); 459#ifdef UNBOUND_DEBUG 460 if(sldns_buffer_remaining(packet) == 0) 461 log_err("error: send empty UDP packet"); 462#endif 463 log_assert(addr && addrlen > 0); 464 465 msg.msg_name = addr; 466 msg.msg_namelen = addrlen; 467 iov[0].iov_base = sldns_buffer_begin(packet); 468 iov[0].iov_len = sldns_buffer_remaining(packet); 469 msg.msg_iov = iov; 470 msg.msg_iovlen = 1; 471 msg.msg_control = control.buf; 472#ifndef S_SPLINT_S 473 msg.msg_controllen = sizeof(control.buf); 474#endif /* S_SPLINT_S */ 475 msg.msg_flags = 0; 476 477#ifndef S_SPLINT_S 478 cmsg = CMSG_FIRSTHDR(&msg); 479 if(r->srctype == 4) { 480#ifdef IP_PKTINFO 481 void* cmsg_data; 482 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo)); 483 log_assert(msg.msg_controllen <= sizeof(control.buf)); 484 cmsg->cmsg_level = IPPROTO_IP; 485 cmsg->cmsg_type = IP_PKTINFO; 486 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info, 487 sizeof(struct in_pktinfo)); 488 /* unset the ifindex to not bypass the routing tables */ 489 cmsg_data = CMSG_DATA(cmsg); 490 ((struct in_pktinfo *) cmsg_data)->ipi_ifindex = 0; 491 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo)); 492#elif defined(IP_SENDSRCADDR) 493 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr)); 494 log_assert(msg.msg_controllen <= sizeof(control.buf)); 495 cmsg->cmsg_level = IPPROTO_IP; 496 cmsg->cmsg_type = IP_SENDSRCADDR; 497 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr, 498 sizeof(struct in_addr)); 499 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr)); 500#else 501 verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR"); 502 msg.msg_control = NULL; 503#endif /* IP_PKTINFO or IP_SENDSRCADDR */ 504 } else if(r->srctype == 6) { 505 void* cmsg_data; 506 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo)); 507 log_assert(msg.msg_controllen <= sizeof(control.buf)); 508 cmsg->cmsg_level = IPPROTO_IPV6; 509 cmsg->cmsg_type = IPV6_PKTINFO; 510 memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info, 511 sizeof(struct in6_pktinfo)); 512 /* unset the ifindex to not bypass the routing tables */ 513 cmsg_data = CMSG_DATA(cmsg); 514 ((struct in6_pktinfo *) cmsg_data)->ipi6_ifindex = 0; 515 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo)); 516 } else { 517 /* try to pass all 0 to use default route */ 518 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo)); 519 log_assert(msg.msg_controllen <= sizeof(control.buf)); 520 cmsg->cmsg_level = IPPROTO_IPV6; 521 cmsg->cmsg_type = IPV6_PKTINFO; 522 memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo)); 523 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo)); 524 } 525#endif /* S_SPLINT_S */ 526 if(verbosity >= VERB_ALGO) 527 p_ancil("send_udp over interface", r); 528 sent = sendmsg(c->fd, &msg, 0); 529 if(sent == -1) { 530 /* try again and block, waiting for IO to complete, 531 * we want to send the answer, and we will wait for 532 * the ethernet interface buffer to have space. */ 533#ifndef USE_WINSOCK 534 if(errno == EAGAIN || 535# ifdef EWOULDBLOCK 536 errno == EWOULDBLOCK || 537# endif 538 errno == ENOBUFS) { 539#else 540 if(WSAGetLastError() == WSAEINPROGRESS || 541 WSAGetLastError() == WSAENOBUFS || 542 WSAGetLastError() == WSAEWOULDBLOCK) { 543#endif 544 int e; 545 fd_set_block(c->fd); 546 sent = sendmsg(c->fd, &msg, 0); 547 e = errno; 548 fd_set_nonblock(c->fd); 549 errno = e; 550 } 551 } 552 if(sent == -1) { 553 if(!udp_send_errno_needs_log(addr, addrlen)) 554 return 0; 555 verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno)); 556 log_addr(VERB_OPS, "remote address is", 557 (struct sockaddr_storage*)addr, addrlen); 558#ifdef __NetBSD__ 559 /* netbsd 7 has IP_PKTINFO for recv but not send */ 560 if(errno == EINVAL && r->srctype == 4) 561 log_err("sendmsg: No support for sendmsg(IP_PKTINFO). " 562 "Please disable interface-automatic"); 563#endif 564 return 0; 565 } else if((size_t)sent != sldns_buffer_remaining(packet)) { 566 log_err("sent %d in place of %d bytes", 567 (int)sent, (int)sldns_buffer_remaining(packet)); 568 return 0; 569 } 570 return 1; 571#else 572 (void)c; 573 (void)packet; 574 (void)addr; 575 (void)addrlen; 576 (void)r; 577 log_err("sendmsg: IPV6_PKTINFO not supported"); 578 return 0; 579#endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */ 580} 581 582/** return true is UDP receive error needs to be logged */ 583static int udp_recv_needs_log(int err) 584{ 585 switch(err) { 586 case ECONNREFUSED: 587# ifdef ENETUNREACH 588 case ENETUNREACH: 589# endif 590# ifdef EHOSTDOWN 591 case EHOSTDOWN: 592# endif 593# ifdef EHOSTUNREACH 594 case EHOSTUNREACH: 595# endif 596# ifdef ENETDOWN 597 case ENETDOWN: 598# endif 599 if(verbosity >= VERB_ALGO) 600 return 1; 601 return 0; 602 default: 603 break; 604 } 605 return 1; 606} 607 608void 609comm_point_udp_ancil_callback(int fd, short event, void* arg) 610{ 611#if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG) 612 struct comm_reply rep; 613 struct msghdr msg; 614 struct iovec iov[1]; 615 ssize_t rcv; 616 union { 617 struct cmsghdr hdr; 618 char buf[256]; 619 } ancil; 620 int i; 621#ifndef S_SPLINT_S 622 struct cmsghdr* cmsg; 623#endif /* S_SPLINT_S */ 624 625 rep.c = (struct comm_point*)arg; 626 log_assert(rep.c->type == comm_udp); 627 628 if(!(event&UB_EV_READ)) 629 return; 630 log_assert(rep.c && rep.c->buffer && rep.c->fd == fd); 631 ub_comm_base_now(rep.c->ev->base); 632 for(i=0; i<NUM_UDP_PER_SELECT; i++) { 633 sldns_buffer_clear(rep.c->buffer); 634 rep.addrlen = (socklen_t)sizeof(rep.addr); 635 log_assert(fd != -1); 636 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0); 637 msg.msg_name = &rep.addr; 638 msg.msg_namelen = (socklen_t)sizeof(rep.addr); 639 iov[0].iov_base = sldns_buffer_begin(rep.c->buffer); 640 iov[0].iov_len = sldns_buffer_remaining(rep.c->buffer); 641 msg.msg_iov = iov; 642 msg.msg_iovlen = 1; 643 msg.msg_control = ancil.buf; 644#ifndef S_SPLINT_S 645 msg.msg_controllen = sizeof(ancil.buf); 646#endif /* S_SPLINT_S */ 647 msg.msg_flags = 0; 648 rcv = recvmsg(fd, &msg, 0); 649 if(rcv == -1) { 650 if(errno != EAGAIN && errno != EINTR 651 && udp_recv_needs_log(errno)) { 652 log_err("recvmsg failed: %s", strerror(errno)); 653 } 654 return; 655 } 656 rep.addrlen = msg.msg_namelen; 657 sldns_buffer_skip(rep.c->buffer, rcv); 658 sldns_buffer_flip(rep.c->buffer); 659 rep.srctype = 0; 660#ifndef S_SPLINT_S 661 for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; 662 cmsg = CMSG_NXTHDR(&msg, cmsg)) { 663 if( cmsg->cmsg_level == IPPROTO_IPV6 && 664 cmsg->cmsg_type == IPV6_PKTINFO) { 665 rep.srctype = 6; 666 memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg), 667 sizeof(struct in6_pktinfo)); 668 break; 669#ifdef IP_PKTINFO 670 } else if( cmsg->cmsg_level == IPPROTO_IP && 671 cmsg->cmsg_type == IP_PKTINFO) { 672 rep.srctype = 4; 673 memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg), 674 sizeof(struct in_pktinfo)); 675 break; 676#elif defined(IP_RECVDSTADDR) 677 } else if( cmsg->cmsg_level == IPPROTO_IP && 678 cmsg->cmsg_type == IP_RECVDSTADDR) { 679 rep.srctype = 4; 680 memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg), 681 sizeof(struct in_addr)); 682 break; 683#endif /* IP_PKTINFO or IP_RECVDSTADDR */ 684 } 685 } 686 if(verbosity >= VERB_ALGO) 687 p_ancil("receive_udp on interface", &rep); 688#endif /* S_SPLINT_S */ 689 fptr_ok(fptr_whitelist_comm_point(rep.c->callback)); 690 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) { 691 /* send back immediate reply */ 692 (void)comm_point_send_udp_msg_if(rep.c, rep.c->buffer, 693 (struct sockaddr*)&rep.addr, rep.addrlen, &rep); 694 } 695 if(!rep.c || rep.c->fd == -1) /* commpoint closed */ 696 break; 697 } 698#else 699 (void)fd; 700 (void)event; 701 (void)arg; 702 fatal_exit("recvmsg: No support for IPV6_PKTINFO; IP_PKTINFO or IP_RECVDSTADDR. " 703 "Please disable interface-automatic"); 704#endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */ 705} 706 707void 708comm_point_udp_callback(int fd, short event, void* arg) 709{ 710 struct comm_reply rep; 711 ssize_t rcv; 712 int i; 713 struct sldns_buffer *buffer; 714 715 rep.c = (struct comm_point*)arg; 716 log_assert(rep.c->type == comm_udp); 717 718 if(!(event&UB_EV_READ)) 719 return; 720 log_assert(rep.c && rep.c->buffer && rep.c->fd == fd); 721 ub_comm_base_now(rep.c->ev->base); 722 for(i=0; i<NUM_UDP_PER_SELECT; i++) { 723 sldns_buffer_clear(rep.c->buffer); 724 rep.addrlen = (socklen_t)sizeof(rep.addr); 725 log_assert(fd != -1); 726 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0); 727 rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer), 728 sldns_buffer_remaining(rep.c->buffer), 0, 729 (struct sockaddr*)&rep.addr, &rep.addrlen); 730 if(rcv == -1) { 731#ifndef USE_WINSOCK 732 if(errno != EAGAIN && errno != EINTR 733 && udp_recv_needs_log(errno)) 734 log_err("recvfrom %d failed: %s", 735 fd, strerror(errno)); 736#else 737 if(WSAGetLastError() != WSAEINPROGRESS && 738 WSAGetLastError() != WSAECONNRESET && 739 WSAGetLastError()!= WSAEWOULDBLOCK) 740 log_err("recvfrom failed: %s", 741 wsa_strerror(WSAGetLastError())); 742#endif 743 return; 744 } 745 sldns_buffer_skip(rep.c->buffer, rcv); 746 sldns_buffer_flip(rep.c->buffer); 747 rep.srctype = 0; 748 fptr_ok(fptr_whitelist_comm_point(rep.c->callback)); 749 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) { 750 /* send back immediate reply */ 751#ifdef USE_DNSCRYPT 752 buffer = rep.c->dnscrypt_buffer; 753#else 754 buffer = rep.c->buffer; 755#endif 756 (void)comm_point_send_udp_msg(rep.c, buffer, 757 (struct sockaddr*)&rep.addr, rep.addrlen); 758 } 759 if(!rep.c || rep.c->fd != fd) /* commpoint closed to -1 or reused for 760 another UDP port. Note rep.c cannot be reused with TCP fd. */ 761 break; 762 } 763} 764 765/** Use a new tcp handler for new query fd, set to read query */ 766static void 767setup_tcp_handler(struct comm_point* c, int fd, int cur, int max) 768{ 769 int handler_usage; 770 log_assert(c->type == comm_tcp || c->type == comm_http); 771 log_assert(c->fd == -1); 772 sldns_buffer_clear(c->buffer); 773#ifdef USE_DNSCRYPT 774 if (c->dnscrypt) 775 sldns_buffer_clear(c->dnscrypt_buffer); 776#endif 777 c->tcp_is_reading = 1; 778 c->tcp_byte_count = 0; 779 /* if more than half the tcp handlers are in use, use a shorter 780 * timeout for this TCP connection, we need to make space for 781 * other connections to be able to get attention */ 782 /* If > 50% TCP handler structures in use, set timeout to 1/100th 783 * configured value. 784 * If > 65%TCP handler structures in use, set to 1/500th configured 785 * value. 786 * If > 80% TCP handler structures in use, set to 0. 787 * 788 * If the timeout to use falls below 200 milliseconds, an actual 789 * timeout of 200ms is used. 790 */ 791 handler_usage = (cur * 100) / max; 792 if(handler_usage > 50 && handler_usage <= 65) 793 c->tcp_timeout_msec /= 100; 794 else if (handler_usage > 65 && handler_usage <= 80) 795 c->tcp_timeout_msec /= 500; 796 else if (handler_usage > 80) 797 c->tcp_timeout_msec = 0; 798 comm_point_start_listening(c, fd, 799 c->tcp_timeout_msec < TCP_QUERY_TIMEOUT_MINIMUM 800 ? TCP_QUERY_TIMEOUT_MINIMUM 801 : c->tcp_timeout_msec); 802} 803 804void comm_base_handle_slow_accept(int ATTR_UNUSED(fd), 805 short ATTR_UNUSED(event), void* arg) 806{ 807 struct comm_base* b = (struct comm_base*)arg; 808 /* timeout for the slow accept, re-enable accepts again */ 809 if(b->start_accept) { 810 verbose(VERB_ALGO, "wait is over, slow accept disabled"); 811 fptr_ok(fptr_whitelist_start_accept(b->start_accept)); 812 (*b->start_accept)(b->cb_arg); 813 b->eb->slow_accept_enabled = 0; 814 } 815} 816 817int comm_point_perform_accept(struct comm_point* c, 818 struct sockaddr_storage* addr, socklen_t* addrlen) 819{ 820 int new_fd; 821 *addrlen = (socklen_t)sizeof(*addr); 822#ifndef HAVE_ACCEPT4 823 new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen); 824#else 825 /* SOCK_NONBLOCK saves extra calls to fcntl for the same result */ 826 new_fd = accept4(c->fd, (struct sockaddr*)addr, addrlen, SOCK_NONBLOCK); 827#endif 828 if(new_fd == -1) { 829#ifndef USE_WINSOCK 830 /* EINTR is signal interrupt. others are closed connection. */ 831 if( errno == EINTR || errno == EAGAIN 832#ifdef EWOULDBLOCK 833 || errno == EWOULDBLOCK 834#endif 835#ifdef ECONNABORTED 836 || errno == ECONNABORTED 837#endif 838#ifdef EPROTO 839 || errno == EPROTO 840#endif /* EPROTO */ 841 ) 842 return -1; 843#if defined(ENFILE) && defined(EMFILE) 844 if(errno == ENFILE || errno == EMFILE) { 845 /* out of file descriptors, likely outside of our 846 * control. stop accept() calls for some time */ 847 if(c->ev->base->stop_accept) { 848 struct comm_base* b = c->ev->base; 849 struct timeval tv; 850 verbose(VERB_ALGO, "out of file descriptors: " 851 "slow accept"); 852 b->eb->slow_accept_enabled = 1; 853 fptr_ok(fptr_whitelist_stop_accept( 854 b->stop_accept)); 855 (*b->stop_accept)(b->cb_arg); 856 /* set timeout, no mallocs */ 857 tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000; 858 tv.tv_usec = (NETEVENT_SLOW_ACCEPT_TIME%1000)*1000; 859 b->eb->slow_accept = ub_event_new(b->eb->base, 860 -1, UB_EV_TIMEOUT, 861 comm_base_handle_slow_accept, b); 862 if(b->eb->slow_accept == NULL) { 863 /* we do not want to log here, because 864 * that would spam the logfiles. 865 * error: "event_base_set failed." */ 866 } 867 else if(ub_event_add(b->eb->slow_accept, &tv) 868 != 0) { 869 /* we do not want to log here, 870 * error: "event_add failed." */ 871 } 872 } 873 return -1; 874 } 875#endif 876#else /* USE_WINSOCK */ 877 if(WSAGetLastError() == WSAEINPROGRESS || 878 WSAGetLastError() == WSAECONNRESET) 879 return -1; 880 if(WSAGetLastError() == WSAEWOULDBLOCK) { 881 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 882 return -1; 883 } 884#endif 885 log_err_addr("accept failed", sock_strerror(errno), addr, 886 *addrlen); 887 return -1; 888 } 889 if(c->tcp_conn_limit && c->type == comm_tcp_accept) { 890 c->tcl_addr = tcl_addr_lookup(c->tcp_conn_limit, addr, *addrlen); 891 if(!tcl_new_connection(c->tcl_addr)) { 892 if(verbosity >= 3) 893 log_err_addr("accept rejected", 894 "connection limit exceeded", addr, *addrlen); 895 close(new_fd); 896 return -1; 897 } 898 } 899#ifndef HAVE_ACCEPT4 900 fd_set_nonblock(new_fd); 901#endif 902 return new_fd; 903} 904 905#ifdef USE_WINSOCK 906static long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp), 907 int ATTR_UNUSED(argi), long argl, long retvalue) 908{ 909 int wsa_err = WSAGetLastError(); /* store errcode before it is gone */ 910 verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper, 911 (oper&BIO_CB_RETURN)?"return":"before", 912 (oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"), 913 wsa_err==WSAEWOULDBLOCK?"wsawb":""); 914 /* on windows, check if previous operation caused EWOULDBLOCK */ 915 if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) || 916 (oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) { 917 if(wsa_err == WSAEWOULDBLOCK) 918 ub_winsock_tcp_wouldblock((struct ub_event*) 919 BIO_get_callback_arg(b), UB_EV_READ); 920 } 921 if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) || 922 (oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) { 923 if(wsa_err == WSAEWOULDBLOCK) 924 ub_winsock_tcp_wouldblock((struct ub_event*) 925 BIO_get_callback_arg(b), UB_EV_WRITE); 926 } 927 /* return original return value */ 928 return retvalue; 929} 930 931/** set win bio callbacks for nonblocking operations */ 932void 933comm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl) 934{ 935 SSL* ssl = (SSL*)thessl; 936 /* set them both just in case, but usually they are the same BIO */ 937 BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb); 938 BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)c->ev->ev); 939 BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb); 940 BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)c->ev->ev); 941} 942#endif 943 944#ifdef HAVE_NGHTTP2 945/** Create http2 session server. Per connection, after TCP accepted.*/ 946static int http2_session_server_create(struct http2_session* h2_session) 947{ 948 log_assert(h2_session->callbacks); 949 h2_session->is_drop = 0; 950 if(nghttp2_session_server_new(&h2_session->session, 951 h2_session->callbacks, 952 h2_session) == NGHTTP2_ERR_NOMEM) { 953 log_err("failed to create nghttp2 session server"); 954 return 0; 955 } 956 957 return 1; 958} 959 960/** Submit http2 setting to session. Once per session. */ 961static int http2_submit_settings(struct http2_session* h2_session) 962{ 963 int ret; 964 nghttp2_settings_entry settings[1] = { 965 {NGHTTP2_SETTINGS_MAX_CONCURRENT_STREAMS, 966 h2_session->c->http2_max_streams}}; 967 968 ret = nghttp2_submit_settings(h2_session->session, NGHTTP2_FLAG_NONE, 969 settings, 1); 970 if(ret) { 971 verbose(VERB_QUERY, "http2: submit_settings failed, " 972 "error: %s", nghttp2_strerror(ret)); 973 return 0; 974 } 975 return 1; 976} 977#endif /* HAVE_NGHTTP2 */ 978 979 980void 981comm_point_tcp_accept_callback(int fd, short event, void* arg) 982{ 983 struct comm_point* c = (struct comm_point*)arg, *c_hdl; 984 int new_fd; 985 log_assert(c->type == comm_tcp_accept); 986 if(!(event & UB_EV_READ)) { 987 log_info("ignoring tcp accept event %d", (int)event); 988 return; 989 } 990 ub_comm_base_now(c->ev->base); 991 /* find free tcp handler. */ 992 if(!c->tcp_free) { 993 log_warn("accepted too many tcp, connections full"); 994 return; 995 } 996 /* accept incoming connection. */ 997 c_hdl = c->tcp_free; 998 /* clear leftover flags from previous use, and then set the 999 * correct event base for the event structure for libevent */ 1000 ub_event_free(c_hdl->ev->ev); 1001 if((c_hdl->type == comm_tcp && c_hdl->tcp_req_info) || 1002 c_hdl->type == comm_local || c_hdl->type == comm_raw) 1003 c_hdl->tcp_do_toggle_rw = 0; 1004 else c_hdl->tcp_do_toggle_rw = 1; 1005 1006 if(c_hdl->type == comm_http) { 1007#ifdef HAVE_NGHTTP2 1008 if(!c_hdl->h2_session || 1009 !http2_session_server_create(c_hdl->h2_session)) { 1010 log_warn("failed to create nghttp2"); 1011 return; 1012 } 1013 if(!c_hdl->h2_session || 1014 !http2_submit_settings(c_hdl->h2_session)) { 1015 log_warn("failed to submit http2 settings"); 1016 return; 1017 } 1018 if(!c->ssl) { 1019 c_hdl->tcp_do_toggle_rw = 0; 1020 c_hdl->use_h2 = 1; 1021 } 1022#endif 1023 c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1, 1024 UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT, 1025 comm_point_http_handle_callback, c_hdl); 1026 } else { 1027 c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1, 1028 UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT, 1029 comm_point_tcp_handle_callback, c_hdl); 1030 } 1031 if(!c_hdl->ev->ev) { 1032 log_warn("could not ub_event_new, dropped tcp"); 1033 return; 1034 } 1035 log_assert(fd != -1); 1036 (void)fd; 1037 new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.addr, 1038 &c_hdl->repinfo.addrlen); 1039 if(new_fd == -1) 1040 return; 1041 if(c->ssl) { 1042 c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd); 1043 if(!c_hdl->ssl) { 1044 c_hdl->fd = new_fd; 1045 comm_point_close(c_hdl); 1046 return; 1047 } 1048 c_hdl->ssl_shake_state = comm_ssl_shake_read; 1049#ifdef USE_WINSOCK 1050 comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl); 1051#endif 1052 } 1053 1054 /* grab the tcp handler buffers */ 1055 c->cur_tcp_count++; 1056 c->tcp_free = c_hdl->tcp_free; 1057 if(!c->tcp_free) { 1058 /* stop accepting incoming queries for now. */ 1059 comm_point_stop_listening(c); 1060 } 1061 setup_tcp_handler(c_hdl, new_fd, c->cur_tcp_count, c->max_tcp_count); 1062} 1063 1064/** Make tcp handler free for next assignment */ 1065static void 1066reclaim_tcp_handler(struct comm_point* c) 1067{ 1068 log_assert(c->type == comm_tcp); 1069 if(c->ssl) { 1070#ifdef HAVE_SSL 1071 SSL_shutdown(c->ssl); 1072 SSL_free(c->ssl); 1073 c->ssl = NULL; 1074#endif 1075 } 1076 comm_point_close(c); 1077 if(c->tcp_parent) { 1078 c->tcp_parent->cur_tcp_count--; 1079 c->tcp_free = c->tcp_parent->tcp_free; 1080 c->tcp_parent->tcp_free = c; 1081 if(!c->tcp_free) { 1082 /* re-enable listening on accept socket */ 1083 comm_point_start_listening(c->tcp_parent, -1, -1); 1084 } 1085 } 1086 c->tcp_more_read_again = NULL; 1087 c->tcp_more_write_again = NULL; 1088} 1089 1090/** do the callback when writing is done */ 1091static void 1092tcp_callback_writer(struct comm_point* c) 1093{ 1094 log_assert(c->type == comm_tcp); 1095 if(!c->tcp_write_and_read) { 1096 sldns_buffer_clear(c->buffer); 1097 c->tcp_byte_count = 0; 1098 } 1099 if(c->tcp_do_toggle_rw) 1100 c->tcp_is_reading = 1; 1101 /* switch from listening(write) to listening(read) */ 1102 if(c->tcp_req_info) { 1103 tcp_req_info_handle_writedone(c->tcp_req_info); 1104 } else { 1105 comm_point_stop_listening(c); 1106 if(c->tcp_write_and_read) { 1107 fptr_ok(fptr_whitelist_comm_point(c->callback)); 1108 if( (*c->callback)(c, c->cb_arg, NETEVENT_PKT_WRITTEN, 1109 &c->repinfo) ) { 1110 comm_point_start_listening(c, -1, 1111 c->tcp_timeout_msec); 1112 } 1113 } else { 1114 comm_point_start_listening(c, -1, c->tcp_timeout_msec); 1115 } 1116 } 1117} 1118 1119/** do the callback when reading is done */ 1120static void 1121tcp_callback_reader(struct comm_point* c) 1122{ 1123 log_assert(c->type == comm_tcp || c->type == comm_local); 1124 sldns_buffer_flip(c->buffer); 1125 if(c->tcp_do_toggle_rw) 1126 c->tcp_is_reading = 0; 1127 c->tcp_byte_count = 0; 1128 if(c->tcp_req_info) { 1129 tcp_req_info_handle_readdone(c->tcp_req_info); 1130 } else { 1131 if(c->type == comm_tcp) 1132 comm_point_stop_listening(c); 1133 fptr_ok(fptr_whitelist_comm_point(c->callback)); 1134 if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) { 1135 comm_point_start_listening(c, -1, c->tcp_timeout_msec); 1136 } 1137 } 1138} 1139 1140#ifdef HAVE_SSL 1141/** true if the ssl handshake error has to be squelched from the logs */ 1142int 1143squelch_err_ssl_handshake(unsigned long err) 1144{ 1145 if(verbosity >= VERB_QUERY) 1146 return 0; /* only squelch on low verbosity */ 1147 /* this is very specific, we could filter on ERR_GET_REASON() 1148 * (the third element in ERR_PACK) */ 1149 if(err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTPS_PROXY_REQUEST) || 1150 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTP_REQUEST) || 1151 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_WRONG_VERSION_NUMBER) || 1152 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_READ_BYTES, SSL_R_SSLV3_ALERT_BAD_CERTIFICATE) 1153#ifdef SSL_F_TLS_POST_PROCESS_CLIENT_HELLO 1154 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_POST_PROCESS_CLIENT_HELLO, SSL_R_NO_SHARED_CIPHER) 1155#endif 1156#ifdef SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO 1157 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNKNOWN_PROTOCOL) 1158 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNSUPPORTED_PROTOCOL) 1159# ifdef SSL_R_VERSION_TOO_LOW 1160 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_VERSION_TOO_LOW) 1161# endif 1162#endif 1163 ) 1164 return 1; 1165 return 0; 1166} 1167#endif /* HAVE_SSL */ 1168 1169/** continue ssl handshake */ 1170#ifdef HAVE_SSL 1171static int 1172ssl_handshake(struct comm_point* c) 1173{ 1174 int r; 1175 if(c->ssl_shake_state == comm_ssl_shake_hs_read) { 1176 /* read condition satisfied back to writing */ 1177 comm_point_listen_for_rw(c, 1, 1); 1178 c->ssl_shake_state = comm_ssl_shake_none; 1179 return 1; 1180 } 1181 if(c->ssl_shake_state == comm_ssl_shake_hs_write) { 1182 /* write condition satisfied, back to reading */ 1183 comm_point_listen_for_rw(c, 1, 0); 1184 c->ssl_shake_state = comm_ssl_shake_none; 1185 return 1; 1186 } 1187 1188 ERR_clear_error(); 1189 r = SSL_do_handshake(c->ssl); 1190 if(r != 1) { 1191 int want = SSL_get_error(c->ssl, r); 1192 if(want == SSL_ERROR_WANT_READ) { 1193 if(c->ssl_shake_state == comm_ssl_shake_read) 1194 return 1; 1195 c->ssl_shake_state = comm_ssl_shake_read; 1196 comm_point_listen_for_rw(c, 1, 0); 1197 return 1; 1198 } else if(want == SSL_ERROR_WANT_WRITE) { 1199 if(c->ssl_shake_state == comm_ssl_shake_write) 1200 return 1; 1201 c->ssl_shake_state = comm_ssl_shake_write; 1202 comm_point_listen_for_rw(c, 0, 1); 1203 return 1; 1204 } else if(r == 0) { 1205 return 0; /* closed */ 1206 } else if(want == SSL_ERROR_SYSCALL) { 1207 /* SYSCALL and errno==0 means closed uncleanly */ 1208#ifdef EPIPE 1209 if(errno == EPIPE && verbosity < 2) 1210 return 0; /* silence 'broken pipe' */ 1211#endif 1212#ifdef ECONNRESET 1213 if(errno == ECONNRESET && verbosity < 2) 1214 return 0; /* silence reset by peer */ 1215#endif 1216 if(errno != 0) 1217 log_err("SSL_handshake syscall: %s", 1218 strerror(errno)); 1219 return 0; 1220 } else { 1221 unsigned long err = ERR_get_error(); 1222 if(!squelch_err_ssl_handshake(err)) { 1223 log_crypto_err_code("ssl handshake failed", err); 1224 log_addr(VERB_OPS, "ssl handshake failed", &c->repinfo.addr, 1225 c->repinfo.addrlen); 1226 } 1227 return 0; 1228 } 1229 } 1230 /* this is where peer verification could take place */ 1231 if((SSL_get_verify_mode(c->ssl)&SSL_VERIFY_PEER)) { 1232 /* verification */ 1233 if(SSL_get_verify_result(c->ssl) == X509_V_OK) { 1234 X509* x = SSL_get_peer_certificate(c->ssl); 1235 if(!x) { 1236 log_addr(VERB_ALGO, "SSL connection failed: " 1237 "no certificate", 1238 &c->repinfo.addr, c->repinfo.addrlen); 1239 return 0; 1240 } 1241 log_cert(VERB_ALGO, "peer certificate", x); 1242#ifdef HAVE_SSL_GET0_PEERNAME 1243 if(SSL_get0_peername(c->ssl)) { 1244 char buf[255]; 1245 snprintf(buf, sizeof(buf), "SSL connection " 1246 "to %s authenticated", 1247 SSL_get0_peername(c->ssl)); 1248 log_addr(VERB_ALGO, buf, &c->repinfo.addr, 1249 c->repinfo.addrlen); 1250 } else { 1251#endif 1252 log_addr(VERB_ALGO, "SSL connection " 1253 "authenticated", &c->repinfo.addr, 1254 c->repinfo.addrlen); 1255#ifdef HAVE_SSL_GET0_PEERNAME 1256 } 1257#endif 1258 X509_free(x); 1259 } else { 1260 X509* x = SSL_get_peer_certificate(c->ssl); 1261 if(x) { 1262 log_cert(VERB_ALGO, "peer certificate", x); 1263 X509_free(x); 1264 } 1265 log_addr(VERB_ALGO, "SSL connection failed: " 1266 "failed to authenticate", 1267 &c->repinfo.addr, c->repinfo.addrlen); 1268 return 0; 1269 } 1270 } else { 1271 /* unauthenticated, the verify peer flag was not set 1272 * in c->ssl when the ssl object was created from ssl_ctx */ 1273 log_addr(VERB_ALGO, "SSL connection", &c->repinfo.addr, 1274 c->repinfo.addrlen); 1275 } 1276 1277 /* check if http2 use is negotiated */ 1278 if(c->type == comm_http && c->h2_session) { 1279 const unsigned char *alpn; 1280 unsigned int alpnlen = 0; 1281 SSL_get0_alpn_selected(c->ssl, &alpn, &alpnlen); 1282 if(alpnlen == 2 && memcmp("h2", alpn, 2) == 0) { 1283 /* connection upgraded to HTTP2 */ 1284 c->tcp_do_toggle_rw = 0; 1285 c->use_h2 = 1; 1286 } 1287 } 1288 1289 /* setup listen rw correctly */ 1290 if(c->tcp_is_reading) { 1291 if(c->ssl_shake_state != comm_ssl_shake_read) 1292 comm_point_listen_for_rw(c, 1, 0); 1293 } else { 1294 comm_point_listen_for_rw(c, 1, 1); 1295 } 1296 c->ssl_shake_state = comm_ssl_shake_none; 1297 return 1; 1298} 1299#endif /* HAVE_SSL */ 1300 1301/** ssl read callback on TCP */ 1302static int 1303ssl_handle_read(struct comm_point* c) 1304{ 1305#ifdef HAVE_SSL 1306 int r; 1307 if(c->ssl_shake_state != comm_ssl_shake_none) { 1308 if(!ssl_handshake(c)) 1309 return 0; 1310 if(c->ssl_shake_state != comm_ssl_shake_none) 1311 return 1; 1312 } 1313 if(c->tcp_byte_count < sizeof(uint16_t)) { 1314 /* read length bytes */ 1315 ERR_clear_error(); 1316 if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(c->buffer, 1317 c->tcp_byte_count), (int)(sizeof(uint16_t) - 1318 c->tcp_byte_count))) <= 0) { 1319 int want = SSL_get_error(c->ssl, r); 1320 if(want == SSL_ERROR_ZERO_RETURN) { 1321 if(c->tcp_req_info) 1322 return tcp_req_info_handle_read_close(c->tcp_req_info); 1323 return 0; /* shutdown, closed */ 1324 } else if(want == SSL_ERROR_WANT_READ) { 1325 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 1326 return 1; /* read more later */ 1327 } else if(want == SSL_ERROR_WANT_WRITE) { 1328 c->ssl_shake_state = comm_ssl_shake_hs_write; 1329 comm_point_listen_for_rw(c, 0, 1); 1330 return 1; 1331 } else if(want == SSL_ERROR_SYSCALL) { 1332#ifdef ECONNRESET 1333 if(errno == ECONNRESET && verbosity < 2) 1334 return 0; /* silence reset by peer */ 1335#endif 1336 if(errno != 0) 1337 log_err("SSL_read syscall: %s", 1338 strerror(errno)); 1339 return 0; 1340 } 1341 log_crypto_err("could not SSL_read"); 1342 return 0; 1343 } 1344 c->tcp_byte_count += r; 1345 if(c->tcp_byte_count < sizeof(uint16_t)) 1346 return 1; 1347 if(sldns_buffer_read_u16_at(c->buffer, 0) > 1348 sldns_buffer_capacity(c->buffer)) { 1349 verbose(VERB_QUERY, "ssl: dropped larger than buffer"); 1350 return 0; 1351 } 1352 sldns_buffer_set_limit(c->buffer, 1353 sldns_buffer_read_u16_at(c->buffer, 0)); 1354 if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) { 1355 verbose(VERB_QUERY, "ssl: dropped bogus too short."); 1356 return 0; 1357 } 1358 sldns_buffer_skip(c->buffer, (ssize_t)(c->tcp_byte_count-sizeof(uint16_t))); 1359 verbose(VERB_ALGO, "Reading ssl tcp query of length %d", 1360 (int)sldns_buffer_limit(c->buffer)); 1361 } 1362 if(sldns_buffer_remaining(c->buffer) > 0) { 1363 ERR_clear_error(); 1364 r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer), 1365 (int)sldns_buffer_remaining(c->buffer)); 1366 if(r <= 0) { 1367 int want = SSL_get_error(c->ssl, r); 1368 if(want == SSL_ERROR_ZERO_RETURN) { 1369 if(c->tcp_req_info) 1370 return tcp_req_info_handle_read_close(c->tcp_req_info); 1371 return 0; /* shutdown, closed */ 1372 } else if(want == SSL_ERROR_WANT_READ) { 1373 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 1374 return 1; /* read more later */ 1375 } else if(want == SSL_ERROR_WANT_WRITE) { 1376 c->ssl_shake_state = comm_ssl_shake_hs_write; 1377 comm_point_listen_for_rw(c, 0, 1); 1378 return 1; 1379 } else if(want == SSL_ERROR_SYSCALL) { 1380#ifdef ECONNRESET 1381 if(errno == ECONNRESET && verbosity < 2) 1382 return 0; /* silence reset by peer */ 1383#endif 1384 if(errno != 0) 1385 log_err("SSL_read syscall: %s", 1386 strerror(errno)); 1387 return 0; 1388 } 1389 log_crypto_err("could not SSL_read"); 1390 return 0; 1391 } 1392 sldns_buffer_skip(c->buffer, (ssize_t)r); 1393 } 1394 if(sldns_buffer_remaining(c->buffer) <= 0) { 1395 tcp_callback_reader(c); 1396 } 1397 return 1; 1398#else 1399 (void)c; 1400 return 0; 1401#endif /* HAVE_SSL */ 1402} 1403 1404/** ssl write callback on TCP */ 1405static int 1406ssl_handle_write(struct comm_point* c) 1407{ 1408#ifdef HAVE_SSL 1409 int r; 1410 if(c->ssl_shake_state != comm_ssl_shake_none) { 1411 if(!ssl_handshake(c)) 1412 return 0; 1413 if(c->ssl_shake_state != comm_ssl_shake_none) 1414 return 1; 1415 } 1416 /* ignore return, if fails we may simply block */ 1417 (void)SSL_set_mode(c->ssl, (long)SSL_MODE_ENABLE_PARTIAL_WRITE); 1418 if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) { 1419 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(c->buffer)); 1420 ERR_clear_error(); 1421 if(c->tcp_write_and_read) { 1422 if(c->tcp_write_pkt_len + 2 < LDNS_RR_BUF_SIZE) { 1423 /* combine the tcp length and the query for 1424 * write, this emulates writev */ 1425 uint8_t buf[LDNS_RR_BUF_SIZE]; 1426 memmove(buf, &len, sizeof(uint16_t)); 1427 memmove(buf+sizeof(uint16_t), 1428 c->tcp_write_pkt, 1429 c->tcp_write_pkt_len); 1430 r = SSL_write(c->ssl, 1431 (void*)(buf+c->tcp_write_byte_count), 1432 c->tcp_write_pkt_len + 2 - 1433 c->tcp_write_byte_count); 1434 } else { 1435 r = SSL_write(c->ssl, 1436 (void*)(((uint8_t*)&len)+c->tcp_write_byte_count), 1437 (int)(sizeof(uint16_t)-c->tcp_write_byte_count)); 1438 } 1439 } else if(sizeof(uint16_t)+sldns_buffer_remaining(c->buffer) < 1440 LDNS_RR_BUF_SIZE) { 1441 /* combine the tcp length and the query for write, 1442 * this emulates writev */ 1443 uint8_t buf[LDNS_RR_BUF_SIZE]; 1444 memmove(buf, &len, sizeof(uint16_t)); 1445 memmove(buf+sizeof(uint16_t), 1446 sldns_buffer_current(c->buffer), 1447 sldns_buffer_remaining(c->buffer)); 1448 r = SSL_write(c->ssl, (void*)(buf+c->tcp_byte_count), 1449 (int)(sizeof(uint16_t)+ 1450 sldns_buffer_remaining(c->buffer) 1451 - c->tcp_byte_count)); 1452 } else { 1453 r = SSL_write(c->ssl, 1454 (void*)(((uint8_t*)&len)+c->tcp_byte_count), 1455 (int)(sizeof(uint16_t)-c->tcp_byte_count)); 1456 } 1457 if(r <= 0) { 1458 int want = SSL_get_error(c->ssl, r); 1459 if(want == SSL_ERROR_ZERO_RETURN) { 1460 return 0; /* closed */ 1461 } else if(want == SSL_ERROR_WANT_READ) { 1462 c->ssl_shake_state = comm_ssl_shake_hs_read; 1463 comm_point_listen_for_rw(c, 1, 0); 1464 return 1; /* wait for read condition */ 1465 } else if(want == SSL_ERROR_WANT_WRITE) { 1466 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 1467 return 1; /* write more later */ 1468 } else if(want == SSL_ERROR_SYSCALL) { 1469#ifdef EPIPE 1470 if(errno == EPIPE && verbosity < 2) 1471 return 0; /* silence 'broken pipe' */ 1472#endif 1473 if(errno != 0) 1474 log_err("SSL_write syscall: %s", 1475 strerror(errno)); 1476 return 0; 1477 } 1478 log_crypto_err("could not SSL_write"); 1479 return 0; 1480 } 1481 if(c->tcp_write_and_read) { 1482 c->tcp_write_byte_count += r; 1483 if(c->tcp_write_byte_count < sizeof(uint16_t)) 1484 return 1; 1485 } else { 1486 c->tcp_byte_count += r; 1487 if(c->tcp_byte_count < sizeof(uint16_t)) 1488 return 1; 1489 sldns_buffer_set_position(c->buffer, c->tcp_byte_count - 1490 sizeof(uint16_t)); 1491 } 1492 if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 1493 tcp_callback_writer(c); 1494 return 1; 1495 } 1496 } 1497 log_assert(c->tcp_write_and_read || sldns_buffer_remaining(c->buffer) > 0); 1498 log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2); 1499 ERR_clear_error(); 1500 if(c->tcp_write_and_read) { 1501 r = SSL_write(c->ssl, (void*)(c->tcp_write_pkt + c->tcp_write_byte_count - 2), 1502 (int)(c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count)); 1503 } else { 1504 r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer), 1505 (int)sldns_buffer_remaining(c->buffer)); 1506 } 1507 if(r <= 0) { 1508 int want = SSL_get_error(c->ssl, r); 1509 if(want == SSL_ERROR_ZERO_RETURN) { 1510 return 0; /* closed */ 1511 } else if(want == SSL_ERROR_WANT_READ) { 1512 c->ssl_shake_state = comm_ssl_shake_hs_read; 1513 comm_point_listen_for_rw(c, 1, 0); 1514 return 1; /* wait for read condition */ 1515 } else if(want == SSL_ERROR_WANT_WRITE) { 1516 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 1517 return 1; /* write more later */ 1518 } else if(want == SSL_ERROR_SYSCALL) { 1519#ifdef EPIPE 1520 if(errno == EPIPE && verbosity < 2) 1521 return 0; /* silence 'broken pipe' */ 1522#endif 1523 if(errno != 0) 1524 log_err("SSL_write syscall: %s", 1525 strerror(errno)); 1526 return 0; 1527 } 1528 log_crypto_err("could not SSL_write"); 1529 return 0; 1530 } 1531 if(c->tcp_write_and_read) { 1532 c->tcp_write_byte_count += r; 1533 } else { 1534 sldns_buffer_skip(c->buffer, (ssize_t)r); 1535 } 1536 1537 if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 1538 tcp_callback_writer(c); 1539 } 1540 return 1; 1541#else 1542 (void)c; 1543 return 0; 1544#endif /* HAVE_SSL */ 1545} 1546 1547/** handle ssl tcp connection with dns contents */ 1548static int 1549ssl_handle_it(struct comm_point* c, int is_write) 1550{ 1551 /* handle case where renegotiation wants read during write call 1552 * or write during read calls */ 1553 if(is_write && c->ssl_shake_state == comm_ssl_shake_hs_write) 1554 return ssl_handle_read(c); 1555 else if(!is_write && c->ssl_shake_state == comm_ssl_shake_hs_read) 1556 return ssl_handle_write(c); 1557 /* handle read events for read operation and write events for a 1558 * write operation */ 1559 else if(!is_write) 1560 return ssl_handle_read(c); 1561 return ssl_handle_write(c); 1562} 1563 1564/** Handle tcp reading callback. 1565 * @param fd: file descriptor of socket. 1566 * @param c: comm point to read from into buffer. 1567 * @param short_ok: if true, very short packets are OK (for comm_local). 1568 * @return: 0 on error 1569 */ 1570static int 1571comm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok) 1572{ 1573 ssize_t r; 1574 log_assert(c->type == comm_tcp || c->type == comm_local); 1575 if(c->ssl) 1576 return ssl_handle_it(c, 0); 1577 if(!c->tcp_is_reading && !c->tcp_write_and_read) 1578 return 0; 1579 1580 log_assert(fd != -1); 1581 if(c->tcp_byte_count < sizeof(uint16_t)) { 1582 /* read length bytes */ 1583 r = recv(fd,(void*)sldns_buffer_at(c->buffer,c->tcp_byte_count), 1584 sizeof(uint16_t)-c->tcp_byte_count, 0); 1585 if(r == 0) { 1586 if(c->tcp_req_info) 1587 return tcp_req_info_handle_read_close(c->tcp_req_info); 1588 return 0; 1589 } else if(r == -1) { 1590#ifndef USE_WINSOCK 1591 if(errno == EINTR || errno == EAGAIN) 1592 return 1; 1593#ifdef ECONNRESET 1594 if(errno == ECONNRESET && verbosity < 2) 1595 return 0; /* silence reset by peer */ 1596#endif 1597#else /* USE_WINSOCK */ 1598 if(WSAGetLastError() == WSAECONNRESET) 1599 return 0; 1600 if(WSAGetLastError() == WSAEINPROGRESS) 1601 return 1; 1602 if(WSAGetLastError() == WSAEWOULDBLOCK) { 1603 ub_winsock_tcp_wouldblock(c->ev->ev, 1604 UB_EV_READ); 1605 return 1; 1606 } 1607#endif 1608 log_err_addr("read (in tcp s)", sock_strerror(errno), 1609 &c->repinfo.addr, c->repinfo.addrlen); 1610 return 0; 1611 } 1612 c->tcp_byte_count += r; 1613 if(c->tcp_byte_count != sizeof(uint16_t)) 1614 return 1; 1615 if(sldns_buffer_read_u16_at(c->buffer, 0) > 1616 sldns_buffer_capacity(c->buffer)) { 1617 verbose(VERB_QUERY, "tcp: dropped larger than buffer"); 1618 return 0; 1619 } 1620 sldns_buffer_set_limit(c->buffer, 1621 sldns_buffer_read_u16_at(c->buffer, 0)); 1622 if(!short_ok && 1623 sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) { 1624 verbose(VERB_QUERY, "tcp: dropped bogus too short."); 1625 return 0; 1626 } 1627 verbose(VERB_ALGO, "Reading tcp query of length %d", 1628 (int)sldns_buffer_limit(c->buffer)); 1629 } 1630 1631 log_assert(sldns_buffer_remaining(c->buffer) > 0); 1632 r = recv(fd, (void*)sldns_buffer_current(c->buffer), 1633 sldns_buffer_remaining(c->buffer), 0); 1634 if(r == 0) { 1635 if(c->tcp_req_info) 1636 return tcp_req_info_handle_read_close(c->tcp_req_info); 1637 return 0; 1638 } else if(r == -1) { 1639#ifndef USE_WINSOCK 1640 if(errno == EINTR || errno == EAGAIN) 1641 return 1; 1642#else /* USE_WINSOCK */ 1643 if(WSAGetLastError() == WSAECONNRESET) 1644 return 0; 1645 if(WSAGetLastError() == WSAEINPROGRESS) 1646 return 1; 1647 if(WSAGetLastError() == WSAEWOULDBLOCK) { 1648 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 1649 return 1; 1650 } 1651#endif 1652 log_err_addr("read (in tcp r)", sock_strerror(errno), 1653 &c->repinfo.addr, c->repinfo.addrlen); 1654 return 0; 1655 } 1656 sldns_buffer_skip(c->buffer, r); 1657 if(sldns_buffer_remaining(c->buffer) <= 0) { 1658 tcp_callback_reader(c); 1659 } 1660 return 1; 1661} 1662 1663/** 1664 * Handle tcp writing callback. 1665 * @param fd: file descriptor of socket. 1666 * @param c: comm point to write buffer out of. 1667 * @return: 0 on error 1668 */ 1669static int 1670comm_point_tcp_handle_write(int fd, struct comm_point* c) 1671{ 1672 ssize_t r; 1673 struct sldns_buffer *buffer; 1674 log_assert(c->type == comm_tcp); 1675#ifdef USE_DNSCRYPT 1676 buffer = c->dnscrypt_buffer; 1677#else 1678 buffer = c->buffer; 1679#endif 1680 if(c->tcp_is_reading && !c->ssl && !c->tcp_write_and_read) 1681 return 0; 1682 log_assert(fd != -1); 1683 if(((!c->tcp_write_and_read && c->tcp_byte_count == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == 0)) && c->tcp_check_nb_connect) { 1684 /* check for pending error from nonblocking connect */ 1685 /* from Stevens, unix network programming, vol1, 3rd ed, p450*/ 1686 int error = 0; 1687 socklen_t len = (socklen_t)sizeof(error); 1688 if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 1689 &len) < 0){ 1690#ifndef USE_WINSOCK 1691 error = errno; /* on solaris errno is error */ 1692#else /* USE_WINSOCK */ 1693 error = WSAGetLastError(); 1694#endif 1695 } 1696#ifndef USE_WINSOCK 1697#if defined(EINPROGRESS) && defined(EWOULDBLOCK) 1698 if(error == EINPROGRESS || error == EWOULDBLOCK) 1699 return 1; /* try again later */ 1700 else 1701#endif 1702 if(error != 0 && verbosity < 2) 1703 return 0; /* silence lots of chatter in the logs */ 1704 else if(error != 0) { 1705 log_err_addr("tcp connect", strerror(error), 1706 &c->repinfo.addr, c->repinfo.addrlen); 1707#else /* USE_WINSOCK */ 1708 /* examine error */ 1709 if(error == WSAEINPROGRESS) 1710 return 1; 1711 else if(error == WSAEWOULDBLOCK) { 1712 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 1713 return 1; 1714 } else if(error != 0 && verbosity < 2) 1715 return 0; 1716 else if(error != 0) { 1717 log_err_addr("tcp connect", wsa_strerror(error), 1718 &c->repinfo.addr, c->repinfo.addrlen); 1719#endif /* USE_WINSOCK */ 1720 return 0; 1721 } 1722 } 1723 if(c->ssl) 1724 return ssl_handle_it(c, 1); 1725 1726#ifdef USE_MSG_FASTOPEN 1727 /* Only try this on first use of a connection that uses tfo, 1728 otherwise fall through to normal write */ 1729 /* Also, TFO support on WINDOWS not implemented at the moment */ 1730 if(c->tcp_do_fastopen == 1) { 1731 /* this form of sendmsg() does both a connect() and send() so need to 1732 look for various flavours of error*/ 1733 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer)); 1734 struct msghdr msg; 1735 struct iovec iov[2]; 1736 c->tcp_do_fastopen = 0; 1737 memset(&msg, 0, sizeof(msg)); 1738 if(c->tcp_write_and_read) { 1739 iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count; 1740 iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count; 1741 iov[1].iov_base = c->tcp_write_pkt; 1742 iov[1].iov_len = c->tcp_write_pkt_len; 1743 } else { 1744 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count; 1745 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count; 1746 iov[1].iov_base = sldns_buffer_begin(buffer); 1747 iov[1].iov_len = sldns_buffer_limit(buffer); 1748 } 1749 log_assert(iov[0].iov_len > 0); 1750 msg.msg_name = &c->repinfo.addr; 1751 msg.msg_namelen = c->repinfo.addrlen; 1752 msg.msg_iov = iov; 1753 msg.msg_iovlen = 2; 1754 r = sendmsg(fd, &msg, MSG_FASTOPEN); 1755 if (r == -1) { 1756#if defined(EINPROGRESS) && defined(EWOULDBLOCK) 1757 /* Handshake is underway, maybe because no TFO cookie available. 1758 Come back to write the message*/ 1759 if(errno == EINPROGRESS || errno == EWOULDBLOCK) 1760 return 1; 1761#endif 1762 if(errno == EINTR || errno == EAGAIN) 1763 return 1; 1764 /* Not handling EISCONN here as shouldn't ever hit that case.*/ 1765 if(errno != EPIPE && errno != 0 && verbosity < 2) 1766 return 0; /* silence lots of chatter in the logs */ 1767 if(errno != EPIPE && errno != 0) { 1768 log_err_addr("tcp sendmsg", strerror(errno), 1769 &c->repinfo.addr, c->repinfo.addrlen); 1770 return 0; 1771 } 1772 /* fallthrough to nonFASTOPEN 1773 * (MSG_FASTOPEN on Linux 3 produces EPIPE) 1774 * we need to perform connect() */ 1775 if(connect(fd, (struct sockaddr *)&c->repinfo.addr, c->repinfo.addrlen) == -1) { 1776#ifdef EINPROGRESS 1777 if(errno == EINPROGRESS) 1778 return 1; /* wait until connect done*/ 1779#endif 1780#ifdef USE_WINSOCK 1781 if(WSAGetLastError() == WSAEINPROGRESS || 1782 WSAGetLastError() == WSAEWOULDBLOCK) 1783 return 1; /* wait until connect done*/ 1784#endif 1785 if(tcp_connect_errno_needs_log( 1786 (struct sockaddr *)&c->repinfo.addr, c->repinfo.addrlen)) { 1787 log_err_addr("outgoing tcp: connect after EPIPE for fastopen", 1788 strerror(errno), &c->repinfo.addr, c->repinfo.addrlen); 1789 } 1790 return 0; 1791 } 1792 1793 } else { 1794 if(c->tcp_write_and_read) { 1795 c->tcp_write_byte_count += r; 1796 if(c->tcp_write_byte_count < sizeof(uint16_t)) 1797 return 1; 1798 } else { 1799 c->tcp_byte_count += r; 1800 if(c->tcp_byte_count < sizeof(uint16_t)) 1801 return 1; 1802 sldns_buffer_set_position(buffer, c->tcp_byte_count - 1803 sizeof(uint16_t)); 1804 } 1805 if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 1806 tcp_callback_writer(c); 1807 return 1; 1808 } 1809 } 1810 } 1811#endif /* USE_MSG_FASTOPEN */ 1812 1813 if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) { 1814 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer)); 1815#ifdef HAVE_WRITEV 1816 struct iovec iov[2]; 1817 if(c->tcp_write_and_read) { 1818 iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count; 1819 iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count; 1820 iov[1].iov_base = c->tcp_write_pkt; 1821 iov[1].iov_len = c->tcp_write_pkt_len; 1822 } else { 1823 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count; 1824 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count; 1825 iov[1].iov_base = sldns_buffer_begin(buffer); 1826 iov[1].iov_len = sldns_buffer_limit(buffer); 1827 } 1828 log_assert(iov[0].iov_len > 0); 1829 r = writev(fd, iov, 2); 1830#else /* HAVE_WRITEV */ 1831 if(c->tcp_write_and_read) { 1832 r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_write_byte_count), 1833 sizeof(uint16_t)-c->tcp_write_byte_count, 0); 1834 } else { 1835 r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count), 1836 sizeof(uint16_t)-c->tcp_byte_count, 0); 1837 } 1838#endif /* HAVE_WRITEV */ 1839 if(r == -1) { 1840#ifndef USE_WINSOCK 1841# ifdef EPIPE 1842 if(errno == EPIPE && verbosity < 2) 1843 return 0; /* silence 'broken pipe' */ 1844 #endif 1845 if(errno == EINTR || errno == EAGAIN) 1846 return 1; 1847#ifdef ECONNRESET 1848 if(errno == ECONNRESET && verbosity < 2) 1849 return 0; /* silence reset by peer */ 1850#endif 1851# ifdef HAVE_WRITEV 1852 log_err_addr("tcp writev", strerror(errno), 1853 &c->repinfo.addr, c->repinfo.addrlen); 1854# else /* HAVE_WRITEV */ 1855 log_err_addr("tcp send s", strerror(errno), 1856 &c->repinfo.addr, c->repinfo.addrlen); 1857# endif /* HAVE_WRITEV */ 1858#else 1859 if(WSAGetLastError() == WSAENOTCONN) 1860 return 1; 1861 if(WSAGetLastError() == WSAEINPROGRESS) 1862 return 1; 1863 if(WSAGetLastError() == WSAEWOULDBLOCK) { 1864 ub_winsock_tcp_wouldblock(c->ev->ev, 1865 UB_EV_WRITE); 1866 return 1; 1867 } 1868 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2) 1869 return 0; /* silence reset by peer */ 1870 log_err_addr("tcp send s", 1871 wsa_strerror(WSAGetLastError()), 1872 &c->repinfo.addr, c->repinfo.addrlen); 1873#endif 1874 return 0; 1875 } 1876 if(c->tcp_write_and_read) { 1877 c->tcp_write_byte_count += r; 1878 if(c->tcp_write_byte_count < sizeof(uint16_t)) 1879 return 1; 1880 } else { 1881 c->tcp_byte_count += r; 1882 if(c->tcp_byte_count < sizeof(uint16_t)) 1883 return 1; 1884 sldns_buffer_set_position(buffer, c->tcp_byte_count - 1885 sizeof(uint16_t)); 1886 } 1887 if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 1888 tcp_callback_writer(c); 1889 return 1; 1890 } 1891 } 1892 log_assert(c->tcp_write_and_read || sldns_buffer_remaining(buffer) > 0); 1893 log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2); 1894 if(c->tcp_write_and_read) { 1895 r = send(fd, (void*)c->tcp_write_pkt + c->tcp_write_byte_count - 2, 1896 c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count, 0); 1897 } else { 1898 r = send(fd, (void*)sldns_buffer_current(buffer), 1899 sldns_buffer_remaining(buffer), 0); 1900 } 1901 if(r == -1) { 1902#ifndef USE_WINSOCK 1903 if(errno == EINTR || errno == EAGAIN) 1904 return 1; 1905#ifdef ECONNRESET 1906 if(errno == ECONNRESET && verbosity < 2) 1907 return 0; /* silence reset by peer */ 1908#endif 1909#else 1910 if(WSAGetLastError() == WSAEINPROGRESS) 1911 return 1; 1912 if(WSAGetLastError() == WSAEWOULDBLOCK) { 1913 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 1914 return 1; 1915 } 1916 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2) 1917 return 0; /* silence reset by peer */ 1918#endif 1919 log_err_addr("tcp send r", sock_strerror(errno), 1920 &c->repinfo.addr, c->repinfo.addrlen); 1921 return 0; 1922 } 1923 if(c->tcp_write_and_read) { 1924 c->tcp_write_byte_count += r; 1925 } else { 1926 sldns_buffer_skip(buffer, r); 1927 } 1928 1929 if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 1930 tcp_callback_writer(c); 1931 } 1932 1933 return 1; 1934} 1935 1936/** read again to drain buffers when there could be more to read */ 1937static void 1938tcp_req_info_read_again(int fd, struct comm_point* c) 1939{ 1940 while(c->tcp_req_info->read_again) { 1941 int r; 1942 c->tcp_req_info->read_again = 0; 1943 if(c->tcp_is_reading) 1944 r = comm_point_tcp_handle_read(fd, c, 0); 1945 else r = comm_point_tcp_handle_write(fd, c); 1946 if(!r) { 1947 reclaim_tcp_handler(c); 1948 if(!c->tcp_do_close) { 1949 fptr_ok(fptr_whitelist_comm_point( 1950 c->callback)); 1951 (void)(*c->callback)(c, c->cb_arg, 1952 NETEVENT_CLOSED, NULL); 1953 } 1954 return; 1955 } 1956 } 1957} 1958 1959/** read again to drain buffers when there could be more to read */ 1960static void 1961tcp_more_read_again(int fd, struct comm_point* c) 1962{ 1963 /* if the packet is done, but another one could be waiting on 1964 * the connection, the callback signals this, and we try again */ 1965 /* this continues until the read routines get EAGAIN or so, 1966 * and thus does not call the callback, and the bool is 0 */ 1967 int* moreread = c->tcp_more_read_again; 1968 while(moreread && *moreread) { 1969 *moreread = 0; 1970 if(!comm_point_tcp_handle_read(fd, c, 0)) { 1971 reclaim_tcp_handler(c); 1972 if(!c->tcp_do_close) { 1973 fptr_ok(fptr_whitelist_comm_point( 1974 c->callback)); 1975 (void)(*c->callback)(c, c->cb_arg, 1976 NETEVENT_CLOSED, NULL); 1977 } 1978 return; 1979 } 1980 } 1981} 1982 1983/** write again to fill up when there could be more to write */ 1984static void 1985tcp_more_write_again(int fd, struct comm_point* c) 1986{ 1987 /* if the packet is done, but another is waiting to be written, 1988 * the callback signals it and we try again. */ 1989 /* this continues until the write routines get EAGAIN or so, 1990 * and thus does not call the callback, and the bool is 0 */ 1991 int* morewrite = c->tcp_more_write_again; 1992 while(morewrite && *morewrite) { 1993 *morewrite = 0; 1994 if(!comm_point_tcp_handle_write(fd, c)) { 1995 reclaim_tcp_handler(c); 1996 if(!c->tcp_do_close) { 1997 fptr_ok(fptr_whitelist_comm_point( 1998 c->callback)); 1999 (void)(*c->callback)(c, c->cb_arg, 2000 NETEVENT_CLOSED, NULL); 2001 } 2002 return; 2003 } 2004 } 2005} 2006 2007void 2008comm_point_tcp_handle_callback(int fd, short event, void* arg) 2009{ 2010 struct comm_point* c = (struct comm_point*)arg; 2011 log_assert(c->type == comm_tcp); 2012 ub_comm_base_now(c->ev->base); 2013 2014#ifdef USE_DNSCRYPT 2015 /* Initialize if this is a dnscrypt socket */ 2016 if(c->tcp_parent) { 2017 c->dnscrypt = c->tcp_parent->dnscrypt; 2018 } 2019 if(c->dnscrypt && c->dnscrypt_buffer == c->buffer) { 2020 c->dnscrypt_buffer = sldns_buffer_new(sldns_buffer_capacity(c->buffer)); 2021 if(!c->dnscrypt_buffer) { 2022 log_err("Could not allocate dnscrypt buffer"); 2023 reclaim_tcp_handler(c); 2024 if(!c->tcp_do_close) { 2025 fptr_ok(fptr_whitelist_comm_point( 2026 c->callback)); 2027 (void)(*c->callback)(c, c->cb_arg, 2028 NETEVENT_CLOSED, NULL); 2029 } 2030 return; 2031 } 2032 } 2033#endif 2034 2035 if(event&UB_EV_TIMEOUT) { 2036 verbose(VERB_QUERY, "tcp took too long, dropped"); 2037 reclaim_tcp_handler(c); 2038 if(!c->tcp_do_close) { 2039 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2040 (void)(*c->callback)(c, c->cb_arg, 2041 NETEVENT_TIMEOUT, NULL); 2042 } 2043 return; 2044 } 2045 if(event&UB_EV_READ 2046#ifdef USE_MSG_FASTOPEN 2047 && !(c->tcp_do_fastopen && (event&UB_EV_WRITE)) 2048#endif 2049 ) { 2050 int has_tcpq = (c->tcp_req_info != NULL); 2051 int* moreread = c->tcp_more_read_again; 2052 if(!comm_point_tcp_handle_read(fd, c, 0)) { 2053 reclaim_tcp_handler(c); 2054 if(!c->tcp_do_close) { 2055 fptr_ok(fptr_whitelist_comm_point( 2056 c->callback)); 2057 (void)(*c->callback)(c, c->cb_arg, 2058 NETEVENT_CLOSED, NULL); 2059 } 2060 return; 2061 } 2062 if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again) 2063 tcp_req_info_read_again(fd, c); 2064 if(moreread && *moreread) 2065 tcp_more_read_again(fd, c); 2066 return; 2067 } 2068 if(event&UB_EV_WRITE) { 2069 int has_tcpq = (c->tcp_req_info != NULL); 2070 int* morewrite = c->tcp_more_write_again; 2071 if(!comm_point_tcp_handle_write(fd, c)) { 2072 reclaim_tcp_handler(c); 2073 if(!c->tcp_do_close) { 2074 fptr_ok(fptr_whitelist_comm_point( 2075 c->callback)); 2076 (void)(*c->callback)(c, c->cb_arg, 2077 NETEVENT_CLOSED, NULL); 2078 } 2079 return; 2080 } 2081 if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again) 2082 tcp_req_info_read_again(fd, c); 2083 if(morewrite && *morewrite) 2084 tcp_more_write_again(fd, c); 2085 return; 2086 } 2087 log_err("Ignored event %d for tcphdl.", event); 2088} 2089 2090/** Make http handler free for next assignment */ 2091static void 2092reclaim_http_handler(struct comm_point* c) 2093{ 2094 log_assert(c->type == comm_http); 2095 if(c->ssl) { 2096#ifdef HAVE_SSL 2097 SSL_shutdown(c->ssl); 2098 SSL_free(c->ssl); 2099 c->ssl = NULL; 2100#endif 2101 } 2102 comm_point_close(c); 2103 if(c->tcp_parent) { 2104 c->tcp_parent->cur_tcp_count--; 2105 c->tcp_free = c->tcp_parent->tcp_free; 2106 c->tcp_parent->tcp_free = c; 2107 if(!c->tcp_free) { 2108 /* re-enable listening on accept socket */ 2109 comm_point_start_listening(c->tcp_parent, -1, -1); 2110 } 2111 } 2112} 2113 2114/** read more data for http (with ssl) */ 2115static int 2116ssl_http_read_more(struct comm_point* c) 2117{ 2118#ifdef HAVE_SSL 2119 int r; 2120 log_assert(sldns_buffer_remaining(c->buffer) > 0); 2121 ERR_clear_error(); 2122 r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer), 2123 (int)sldns_buffer_remaining(c->buffer)); 2124 if(r <= 0) { 2125 int want = SSL_get_error(c->ssl, r); 2126 if(want == SSL_ERROR_ZERO_RETURN) { 2127 return 0; /* shutdown, closed */ 2128 } else if(want == SSL_ERROR_WANT_READ) { 2129 return 1; /* read more later */ 2130 } else if(want == SSL_ERROR_WANT_WRITE) { 2131 c->ssl_shake_state = comm_ssl_shake_hs_write; 2132 comm_point_listen_for_rw(c, 0, 1); 2133 return 1; 2134 } else if(want == SSL_ERROR_SYSCALL) { 2135#ifdef ECONNRESET 2136 if(errno == ECONNRESET && verbosity < 2) 2137 return 0; /* silence reset by peer */ 2138#endif 2139 if(errno != 0) 2140 log_err("SSL_read syscall: %s", 2141 strerror(errno)); 2142 return 0; 2143 } 2144 log_crypto_err("could not SSL_read"); 2145 return 0; 2146 } 2147 sldns_buffer_skip(c->buffer, (ssize_t)r); 2148 return 1; 2149#else 2150 (void)c; 2151 return 0; 2152#endif /* HAVE_SSL */ 2153} 2154 2155/** read more data for http */ 2156static int 2157http_read_more(int fd, struct comm_point* c) 2158{ 2159 ssize_t r; 2160 log_assert(sldns_buffer_remaining(c->buffer) > 0); 2161 r = recv(fd, (void*)sldns_buffer_current(c->buffer), 2162 sldns_buffer_remaining(c->buffer), 0); 2163 if(r == 0) { 2164 return 0; 2165 } else if(r == -1) { 2166#ifndef USE_WINSOCK 2167 if(errno == EINTR || errno == EAGAIN) 2168 return 1; 2169#else /* USE_WINSOCK */ 2170 if(WSAGetLastError() == WSAECONNRESET) 2171 return 0; 2172 if(WSAGetLastError() == WSAEINPROGRESS) 2173 return 1; 2174 if(WSAGetLastError() == WSAEWOULDBLOCK) { 2175 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 2176 return 1; 2177 } 2178#endif 2179 log_err_addr("read (in http r)", sock_strerror(errno), 2180 &c->repinfo.addr, c->repinfo.addrlen); 2181 return 0; 2182 } 2183 sldns_buffer_skip(c->buffer, r); 2184 return 1; 2185} 2186 2187/** return true if http header has been read (one line complete) */ 2188static int 2189http_header_done(sldns_buffer* buf) 2190{ 2191 size_t i; 2192 for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) { 2193 /* there was a \r before the \n, but we ignore that */ 2194 if((char)sldns_buffer_read_u8_at(buf, i) == '\n') 2195 return 1; 2196 } 2197 return 0; 2198} 2199 2200/** return character string into buffer for header line, moves buffer 2201 * past that line and puts zero terminator into linefeed-newline */ 2202static char* 2203http_header_line(sldns_buffer* buf) 2204{ 2205 char* result = (char*)sldns_buffer_current(buf); 2206 size_t i; 2207 for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) { 2208 /* terminate the string on the \r */ 2209 if((char)sldns_buffer_read_u8_at(buf, i) == '\r') 2210 sldns_buffer_write_u8_at(buf, i, 0); 2211 /* terminate on the \n and skip past the it and done */ 2212 if((char)sldns_buffer_read_u8_at(buf, i) == '\n') { 2213 sldns_buffer_write_u8_at(buf, i, 0); 2214 sldns_buffer_set_position(buf, i+1); 2215 return result; 2216 } 2217 } 2218 return NULL; 2219} 2220 2221/** move unread buffer to start and clear rest for putting the rest into it */ 2222static void 2223http_moveover_buffer(sldns_buffer* buf) 2224{ 2225 size_t pos = sldns_buffer_position(buf); 2226 size_t len = sldns_buffer_remaining(buf); 2227 sldns_buffer_clear(buf); 2228 memmove(sldns_buffer_begin(buf), sldns_buffer_at(buf, pos), len); 2229 sldns_buffer_set_position(buf, len); 2230} 2231 2232/** a http header is complete, process it */ 2233static int 2234http_process_initial_header(struct comm_point* c) 2235{ 2236 char* line = http_header_line(c->buffer); 2237 if(!line) return 1; 2238 verbose(VERB_ALGO, "http header: %s", line); 2239 if(strncasecmp(line, "HTTP/1.1 ", 9) == 0) { 2240 /* check returncode */ 2241 if(line[9] != '2') { 2242 verbose(VERB_ALGO, "http bad status %s", line+9); 2243 return 0; 2244 } 2245 } else if(strncasecmp(line, "Content-Length: ", 16) == 0) { 2246 if(!c->http_is_chunked) 2247 c->tcp_byte_count = (size_t)atoi(line+16); 2248 } else if(strncasecmp(line, "Transfer-Encoding: chunked", 19+7) == 0) { 2249 c->tcp_byte_count = 0; 2250 c->http_is_chunked = 1; 2251 } else if(line[0] == 0) { 2252 /* end of initial headers */ 2253 c->http_in_headers = 0; 2254 if(c->http_is_chunked) 2255 c->http_in_chunk_headers = 1; 2256 /* remove header text from front of buffer 2257 * the buffer is going to be used to return the data segment 2258 * itself and we don't want the header to get returned 2259 * prepended with it */ 2260 http_moveover_buffer(c->buffer); 2261 sldns_buffer_flip(c->buffer); 2262 return 1; 2263 } 2264 /* ignore other headers */ 2265 return 1; 2266} 2267 2268/** a chunk header is complete, process it, return 0=fail, 1=continue next 2269 * header line, 2=done with chunked transfer*/ 2270static int 2271http_process_chunk_header(struct comm_point* c) 2272{ 2273 char* line = http_header_line(c->buffer); 2274 if(!line) return 1; 2275 if(c->http_in_chunk_headers == 3) { 2276 verbose(VERB_ALGO, "http chunk trailer: %s", line); 2277 /* are we done ? */ 2278 if(line[0] == 0 && c->tcp_byte_count == 0) { 2279 /* callback of http reader when NETEVENT_DONE, 2280 * end of data, with no data in buffer */ 2281 sldns_buffer_set_position(c->buffer, 0); 2282 sldns_buffer_set_limit(c->buffer, 0); 2283 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2284 (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL); 2285 /* return that we are done */ 2286 return 2; 2287 } 2288 if(line[0] == 0) { 2289 /* continue with header of the next chunk */ 2290 c->http_in_chunk_headers = 1; 2291 /* remove header text from front of buffer */ 2292 http_moveover_buffer(c->buffer); 2293 sldns_buffer_flip(c->buffer); 2294 return 1; 2295 } 2296 /* ignore further trail headers */ 2297 return 1; 2298 } 2299 verbose(VERB_ALGO, "http chunk header: %s", line); 2300 if(c->http_in_chunk_headers == 1) { 2301 /* read chunked start line */ 2302 char* end = NULL; 2303 c->tcp_byte_count = (size_t)strtol(line, &end, 16); 2304 if(end == line) 2305 return 0; 2306 c->http_in_chunk_headers = 0; 2307 /* remove header text from front of buffer */ 2308 http_moveover_buffer(c->buffer); 2309 sldns_buffer_flip(c->buffer); 2310 if(c->tcp_byte_count == 0) { 2311 /* done with chunks, process chunk_trailer lines */ 2312 c->http_in_chunk_headers = 3; 2313 } 2314 return 1; 2315 } 2316 /* ignore other headers */ 2317 return 1; 2318} 2319 2320/** handle nonchunked data segment */ 2321static int 2322http_nonchunk_segment(struct comm_point* c) 2323{ 2324 /* c->buffer at position..limit has new data we read in. 2325 * the buffer itself is full of nonchunked data. 2326 * we are looking to read tcp_byte_count more data 2327 * and then the transfer is done. */ 2328 size_t remainbufferlen; 2329 size_t got_now = sldns_buffer_limit(c->buffer) - c->http_stored; 2330 if(c->tcp_byte_count <= got_now) { 2331 /* done, this is the last data fragment */ 2332 c->http_stored = 0; 2333 sldns_buffer_set_position(c->buffer, 0); 2334 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2335 (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL); 2336 return 1; 2337 } 2338 c->tcp_byte_count -= got_now; 2339 /* if we have the buffer space, 2340 * read more data collected into the buffer */ 2341 remainbufferlen = sldns_buffer_capacity(c->buffer) - 2342 sldns_buffer_limit(c->buffer); 2343 if(remainbufferlen >= c->tcp_byte_count || 2344 remainbufferlen >= 2048) { 2345 size_t total = sldns_buffer_limit(c->buffer); 2346 sldns_buffer_clear(c->buffer); 2347 sldns_buffer_set_position(c->buffer, total); 2348 c->http_stored = total; 2349 /* return and wait to read more */ 2350 return 1; 2351 } 2352 /* call callback with this data amount, then 2353 * wait for more */ 2354 c->http_stored = 0; 2355 sldns_buffer_set_position(c->buffer, 0); 2356 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2357 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL); 2358 /* c->callback has to buffer_clear(c->buffer). */ 2359 /* return and wait to read more */ 2360 return 1; 2361} 2362 2363/** handle nonchunked data segment, return 0=fail, 1=wait, 2=process more */ 2364static int 2365http_chunked_segment(struct comm_point* c) 2366{ 2367 /* the c->buffer has from position..limit new data we read. */ 2368 /* the current chunk has length tcp_byte_count. 2369 * once we read that read more chunk headers. 2370 */ 2371 size_t remainbufferlen; 2372 size_t got_now = sldns_buffer_limit(c->buffer) - c->http_stored; 2373 if(c->tcp_byte_count <= got_now) { 2374 /* the chunk has completed (with perhaps some extra data 2375 * from next chunk header and next chunk) */ 2376 /* save too much info into temp buffer */ 2377 size_t fraglen; 2378 struct comm_reply repinfo; 2379 c->http_stored = 0; 2380 sldns_buffer_skip(c->buffer, (ssize_t)c->tcp_byte_count); 2381 sldns_buffer_clear(c->http_temp); 2382 sldns_buffer_write(c->http_temp, 2383 sldns_buffer_current(c->buffer), 2384 sldns_buffer_remaining(c->buffer)); 2385 sldns_buffer_flip(c->http_temp); 2386 2387 /* callback with this fragment */ 2388 fraglen = sldns_buffer_position(c->buffer); 2389 sldns_buffer_set_position(c->buffer, 0); 2390 sldns_buffer_set_limit(c->buffer, fraglen); 2391 repinfo = c->repinfo; 2392 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2393 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &repinfo); 2394 /* c->callback has to buffer_clear(). */ 2395 2396 /* is commpoint deleted? */ 2397 if(!repinfo.c) { 2398 return 1; 2399 } 2400 /* copy waiting info */ 2401 sldns_buffer_clear(c->buffer); 2402 sldns_buffer_write(c->buffer, 2403 sldns_buffer_begin(c->http_temp), 2404 sldns_buffer_remaining(c->http_temp)); 2405 sldns_buffer_flip(c->buffer); 2406 /* process end of chunk trailer header lines, until 2407 * an empty line */ 2408 c->http_in_chunk_headers = 3; 2409 /* process more data in buffer (if any) */ 2410 return 2; 2411 } 2412 c->tcp_byte_count -= got_now; 2413 2414 /* if we have the buffer space, 2415 * read more data collected into the buffer */ 2416 remainbufferlen = sldns_buffer_capacity(c->buffer) - 2417 sldns_buffer_limit(c->buffer); 2418 if(remainbufferlen >= c->tcp_byte_count || 2419 remainbufferlen >= 2048) { 2420 size_t total = sldns_buffer_limit(c->buffer); 2421 sldns_buffer_clear(c->buffer); 2422 sldns_buffer_set_position(c->buffer, total); 2423 c->http_stored = total; 2424 /* return and wait to read more */ 2425 return 1; 2426 } 2427 2428 /* callback of http reader for a new part of the data */ 2429 c->http_stored = 0; 2430 sldns_buffer_set_position(c->buffer, 0); 2431 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2432 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL); 2433 /* c->callback has to buffer_clear(c->buffer). */ 2434 /* return and wait to read more */ 2435 return 1; 2436} 2437 2438#ifdef HAVE_NGHTTP2 2439/** Create new http2 session. Called when creating handling comm point. */ 2440struct http2_session* http2_session_create(struct comm_point* c) 2441{ 2442 struct http2_session* session = calloc(1, sizeof(*session)); 2443 if(!session) { 2444 log_err("malloc failure while creating http2 session"); 2445 return NULL; 2446 } 2447 session->c = c; 2448 2449 return session; 2450} 2451#endif 2452 2453/** Delete http2 session. After closing connection or on error */ 2454void http2_session_delete(struct http2_session* h2_session) 2455{ 2456#ifdef HAVE_NGHTTP2 2457 if(h2_session->callbacks) 2458 nghttp2_session_callbacks_del(h2_session->callbacks); 2459 free(h2_session); 2460#else 2461 (void)h2_session; 2462#endif 2463} 2464 2465#ifdef HAVE_NGHTTP2 2466struct http2_stream* http2_stream_create(int32_t stream_id) 2467{ 2468 struct http2_stream* h2_stream = calloc(1, sizeof(*h2_stream)); 2469 if(!h2_stream) { 2470 log_err("malloc failure while creating http2 stream"); 2471 return NULL; 2472 } 2473 h2_stream->stream_id = stream_id; 2474 return h2_stream; 2475} 2476 2477/** Delete http2 stream. After session delete or stream close callback */ 2478static void http2_stream_delete(struct http2_session* h2_session, 2479 struct http2_stream* h2_stream) 2480{ 2481 if(h2_stream->mesh_state) { 2482 mesh_state_remove_reply(h2_stream->mesh, h2_stream->mesh_state, 2483 h2_session->c); 2484 h2_stream->mesh_state = NULL; 2485 } 2486 http2_req_stream_clear(h2_stream); 2487 free(h2_stream); 2488} 2489#endif 2490 2491void http2_stream_add_meshstate(struct http2_stream* h2_stream, 2492 struct mesh_area* mesh, struct mesh_state* m) 2493{ 2494 h2_stream->mesh = mesh; 2495 h2_stream->mesh_state = m; 2496} 2497 2498/** delete http2 session server. After closing connection. */ 2499static void http2_session_server_delete(struct http2_session* h2_session) 2500{ 2501#ifdef HAVE_NGHTTP2 2502 struct http2_stream* h2_stream, *next; 2503 nghttp2_session_del(h2_session->session); /* NULL input is fine */ 2504 h2_session->session = NULL; 2505 for(h2_stream = h2_session->first_stream; h2_stream;) { 2506 next = h2_stream->next; 2507 http2_stream_delete(h2_session, h2_stream); 2508 h2_stream = next; 2509 } 2510 h2_session->first_stream = NULL; 2511 h2_session->is_drop = 0; 2512 h2_session->postpone_drop = 0; 2513 h2_session->c->h2_stream = NULL; 2514#endif 2515 (void)h2_session; 2516} 2517 2518#ifdef HAVE_NGHTTP2 2519void http2_session_add_stream(struct http2_session* h2_session, 2520 struct http2_stream* h2_stream) 2521{ 2522 if(h2_session->first_stream) 2523 h2_session->first_stream->prev = h2_stream; 2524 h2_stream->next = h2_session->first_stream; 2525 h2_session->first_stream = h2_stream; 2526} 2527 2528/** remove stream from session linked list. After stream close callback or 2529 * closing connection */ 2530void http2_session_remove_stream(struct http2_session* h2_session, 2531 struct http2_stream* h2_stream) 2532{ 2533 if(h2_stream->prev) 2534 h2_stream->prev->next = h2_stream->next; 2535 else 2536 h2_session->first_stream = h2_stream->next; 2537 if(h2_stream->next) 2538 h2_stream->next->prev = h2_stream->prev; 2539 2540} 2541 2542int http2_stream_close_cb(nghttp2_session* ATTR_UNUSED(session), 2543 int32_t stream_id, uint32_t ATTR_UNUSED(error_code), void* cb_arg) 2544{ 2545 struct http2_stream* h2_stream; 2546 struct http2_session* h2_session = (struct http2_session*)cb_arg; 2547 if(!(h2_stream = nghttp2_session_get_stream_user_data( 2548 h2_session->session, stream_id))) { 2549 return 0; 2550 } 2551 http2_session_remove_stream(h2_session, h2_stream); 2552 http2_stream_delete(h2_session, h2_stream); 2553 return 0; 2554} 2555 2556ssize_t http2_recv_cb(nghttp2_session* ATTR_UNUSED(session), uint8_t* buf, 2557 size_t len, int ATTR_UNUSED(flags), void* cb_arg) 2558{ 2559 struct http2_session* h2_session = (struct http2_session*)cb_arg; 2560 ssize_t ret; 2561 2562 log_assert(h2_session->c->type == comm_http); 2563 log_assert(h2_session->c->h2_session); 2564 2565#ifdef HAVE_SSL 2566 if(h2_session->c->ssl) { 2567 int r; 2568 ERR_clear_error(); 2569 r = SSL_read(h2_session->c->ssl, buf, len); 2570 if(r <= 0) { 2571 int want = SSL_get_error(h2_session->c->ssl, r); 2572 if(want == SSL_ERROR_ZERO_RETURN) { 2573 return NGHTTP2_ERR_EOF; 2574 } else if(want == SSL_ERROR_WANT_READ) { 2575 return NGHTTP2_ERR_WOULDBLOCK; 2576 } else if(want == SSL_ERROR_WANT_WRITE) { 2577 h2_session->c->ssl_shake_state = comm_ssl_shake_hs_write; 2578 comm_point_listen_for_rw(h2_session->c, 0, 1); 2579 return NGHTTP2_ERR_WOULDBLOCK; 2580 } else if(want == SSL_ERROR_SYSCALL) { 2581#ifdef ECONNRESET 2582 if(errno == ECONNRESET && verbosity < 2) 2583 return NGHTTP2_ERR_CALLBACK_FAILURE; 2584#endif 2585 if(errno != 0) 2586 log_err("SSL_read syscall: %s", 2587 strerror(errno)); 2588 return NGHTTP2_ERR_CALLBACK_FAILURE; 2589 } 2590 log_crypto_err("could not SSL_read"); 2591 return NGHTTP2_ERR_CALLBACK_FAILURE; 2592 } 2593 return r; 2594 } 2595#endif /* HAVE_SSL */ 2596 2597 ret = recv(h2_session->c->fd, buf, len, 0); 2598 if(ret == 0) { 2599 return NGHTTP2_ERR_EOF; 2600 } else if(ret < 0) { 2601#ifndef USE_WINSOCK 2602 if(errno == EINTR || errno == EAGAIN) 2603 return NGHTTP2_ERR_WOULDBLOCK; 2604#ifdef ECONNRESET 2605 if(errno == ECONNRESET && verbosity < 2) 2606 return NGHTTP2_ERR_CALLBACK_FAILURE; 2607#endif 2608 log_err_addr("could not http2 recv: %s", strerror(errno), 2609 &h2_session->c->repinfo.addr, 2610 h2_session->c->repinfo.addrlen); 2611#else /* USE_WINSOCK */ 2612 if(WSAGetLastError() == WSAECONNRESET) 2613 return NGHTTP2_ERR_CALLBACK_FAILURE; 2614 if(WSAGetLastError() == WSAEINPROGRESS) 2615 return NGHTTP2_ERR_WOULDBLOCK; 2616 if(WSAGetLastError() == WSAEWOULDBLOCK) { 2617 ub_winsock_tcp_wouldblock(h2_session->c->ev->ev, 2618 UB_EV_READ); 2619 return NGHTTP2_ERR_WOULDBLOCK; 2620 } 2621 log_err_addr("could not http2 recv: %s", 2622 wsa_strerror(WSAGetLastError()), 2623 &h2_session->c->repinfo.addr, 2624 h2_session->c->repinfo.addrlen); 2625#endif 2626 return NGHTTP2_ERR_CALLBACK_FAILURE; 2627 } 2628 return ret; 2629} 2630#endif /* HAVE_NGHTTP2 */ 2631 2632/** Handle http2 read */ 2633static int 2634comm_point_http2_handle_read(int ATTR_UNUSED(fd), struct comm_point* c) 2635{ 2636#ifdef HAVE_NGHTTP2 2637 int ret; 2638 log_assert(c->h2_session); 2639 2640 /* reading until recv cb returns NGHTTP2_ERR_WOULDBLOCK */ 2641 ret = nghttp2_session_recv(c->h2_session->session); 2642 if(ret) { 2643 if(ret != NGHTTP2_ERR_EOF && 2644 ret != NGHTTP2_ERR_CALLBACK_FAILURE) { 2645 char a[256]; 2646 addr_to_str(&c->repinfo.addr, c->repinfo.addrlen, 2647 a, sizeof(a)); 2648 verbose(VERB_QUERY, "http2: session_recv from %s failed, " 2649 "error: %s", a, nghttp2_strerror(ret)); 2650 } 2651 return 0; 2652 } 2653 if(nghttp2_session_want_write(c->h2_session->session)) { 2654 c->tcp_is_reading = 0; 2655 comm_point_stop_listening(c); 2656 comm_point_start_listening(c, -1, c->tcp_timeout_msec); 2657 } else if(!nghttp2_session_want_read(c->h2_session->session)) 2658 return 0; /* connection can be closed */ 2659 return 1; 2660#else 2661 (void)c; 2662 return 0; 2663#endif 2664} 2665 2666/** 2667 * Handle http reading callback. 2668 * @param fd: file descriptor of socket. 2669 * @param c: comm point to read from into buffer. 2670 * @return: 0 on error 2671 */ 2672static int 2673comm_point_http_handle_read(int fd, struct comm_point* c) 2674{ 2675 log_assert(c->type == comm_http); 2676 log_assert(fd != -1); 2677 2678 /* if we are in ssl handshake, handle SSL handshake */ 2679#ifdef HAVE_SSL 2680 if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) { 2681 if(!ssl_handshake(c)) 2682 return 0; 2683 if(c->ssl_shake_state != comm_ssl_shake_none) 2684 return 1; 2685 } 2686#endif /* HAVE_SSL */ 2687 2688 if(!c->tcp_is_reading) 2689 return 1; 2690 2691 if(c->use_h2) { 2692 return comm_point_http2_handle_read(fd, c); 2693 } 2694 2695 /* http version is <= http/1.1 */ 2696 2697 if(c->http_min_version >= http_version_2) { 2698 /* HTTP/2 failed, not allowed to use lower version. */ 2699 return 0; 2700 } 2701 2702 /* read more data */ 2703 if(c->ssl) { 2704 if(!ssl_http_read_more(c)) 2705 return 0; 2706 } else { 2707 if(!http_read_more(fd, c)) 2708 return 0; 2709 } 2710 2711 sldns_buffer_flip(c->buffer); 2712 2713 while(sldns_buffer_remaining(c->buffer) > 0) { 2714 /* Handle HTTP/1.x data */ 2715 /* if we are reading headers, read more headers */ 2716 if(c->http_in_headers || c->http_in_chunk_headers) { 2717 /* if header is done, process the header */ 2718 if(!http_header_done(c->buffer)) { 2719 /* copy remaining data to front of buffer 2720 * and set rest for writing into it */ 2721 http_moveover_buffer(c->buffer); 2722 /* return and wait to read more */ 2723 return 1; 2724 } 2725 if(!c->http_in_chunk_headers) { 2726 /* process initial headers */ 2727 if(!http_process_initial_header(c)) 2728 return 0; 2729 } else { 2730 /* process chunk headers */ 2731 int r = http_process_chunk_header(c); 2732 if(r == 0) return 0; 2733 if(r == 2) return 1; /* done */ 2734 /* r == 1, continue */ 2735 } 2736 /* see if we have more to process */ 2737 continue; 2738 } 2739 2740 if(!c->http_is_chunked) { 2741 /* if we are reading nonchunks, process that*/ 2742 return http_nonchunk_segment(c); 2743 } else { 2744 /* if we are reading chunks, read the chunk */ 2745 int r = http_chunked_segment(c); 2746 if(r == 0) return 0; 2747 if(r == 1) return 1; 2748 continue; 2749 } 2750 } 2751 /* broke out of the loop; could not process header instead need 2752 * to read more */ 2753 /* moveover any remaining data and read more data */ 2754 http_moveover_buffer(c->buffer); 2755 /* return and wait to read more */ 2756 return 1; 2757} 2758 2759/** check pending connect for http */ 2760static int 2761http_check_connect(int fd, struct comm_point* c) 2762{ 2763 /* check for pending error from nonblocking connect */ 2764 /* from Stevens, unix network programming, vol1, 3rd ed, p450*/ 2765 int error = 0; 2766 socklen_t len = (socklen_t)sizeof(error); 2767 if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 2768 &len) < 0){ 2769#ifndef USE_WINSOCK 2770 error = errno; /* on solaris errno is error */ 2771#else /* USE_WINSOCK */ 2772 error = WSAGetLastError(); 2773#endif 2774 } 2775#ifndef USE_WINSOCK 2776#if defined(EINPROGRESS) && defined(EWOULDBLOCK) 2777 if(error == EINPROGRESS || error == EWOULDBLOCK) 2778 return 1; /* try again later */ 2779 else 2780#endif 2781 if(error != 0 && verbosity < 2) 2782 return 0; /* silence lots of chatter in the logs */ 2783 else if(error != 0) { 2784 log_err_addr("http connect", strerror(error), 2785 &c->repinfo.addr, c->repinfo.addrlen); 2786#else /* USE_WINSOCK */ 2787 /* examine error */ 2788 if(error == WSAEINPROGRESS) 2789 return 1; 2790 else if(error == WSAEWOULDBLOCK) { 2791 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 2792 return 1; 2793 } else if(error != 0 && verbosity < 2) 2794 return 0; 2795 else if(error != 0) { 2796 log_err_addr("http connect", wsa_strerror(error), 2797 &c->repinfo.addr, c->repinfo.addrlen); 2798#endif /* USE_WINSOCK */ 2799 return 0; 2800 } 2801 /* keep on processing this socket */ 2802 return 2; 2803} 2804 2805/** write more data for http (with ssl) */ 2806static int 2807ssl_http_write_more(struct comm_point* c) 2808{ 2809#ifdef HAVE_SSL 2810 int r; 2811 log_assert(sldns_buffer_remaining(c->buffer) > 0); 2812 ERR_clear_error(); 2813 r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer), 2814 (int)sldns_buffer_remaining(c->buffer)); 2815 if(r <= 0) { 2816 int want = SSL_get_error(c->ssl, r); 2817 if(want == SSL_ERROR_ZERO_RETURN) { 2818 return 0; /* closed */ 2819 } else if(want == SSL_ERROR_WANT_READ) { 2820 c->ssl_shake_state = comm_ssl_shake_hs_read; 2821 comm_point_listen_for_rw(c, 1, 0); 2822 return 1; /* wait for read condition */ 2823 } else if(want == SSL_ERROR_WANT_WRITE) { 2824 return 1; /* write more later */ 2825 } else if(want == SSL_ERROR_SYSCALL) { 2826#ifdef EPIPE 2827 if(errno == EPIPE && verbosity < 2) 2828 return 0; /* silence 'broken pipe' */ 2829#endif 2830 if(errno != 0) 2831 log_err("SSL_write syscall: %s", 2832 strerror(errno)); 2833 return 0; 2834 } 2835 log_crypto_err("could not SSL_write"); 2836 return 0; 2837 } 2838 sldns_buffer_skip(c->buffer, (ssize_t)r); 2839 return 1; 2840#else 2841 (void)c; 2842 return 0; 2843#endif /* HAVE_SSL */ 2844} 2845 2846/** write more data for http */ 2847static int 2848http_write_more(int fd, struct comm_point* c) 2849{ 2850 ssize_t r; 2851 log_assert(sldns_buffer_remaining(c->buffer) > 0); 2852 r = send(fd, (void*)sldns_buffer_current(c->buffer), 2853 sldns_buffer_remaining(c->buffer), 0); 2854 if(r == -1) { 2855#ifndef USE_WINSOCK 2856 if(errno == EINTR || errno == EAGAIN) 2857 return 1; 2858#else 2859 if(WSAGetLastError() == WSAEINPROGRESS) 2860 return 1; 2861 if(WSAGetLastError() == WSAEWOULDBLOCK) { 2862 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 2863 return 1; 2864 } 2865#endif 2866 log_err_addr("http send r", sock_strerror(errno), 2867 &c->repinfo.addr, c->repinfo.addrlen); 2868 return 0; 2869 } 2870 sldns_buffer_skip(c->buffer, r); 2871 return 1; 2872} 2873 2874#ifdef HAVE_NGHTTP2 2875ssize_t http2_send_cb(nghttp2_session* ATTR_UNUSED(session), const uint8_t* buf, 2876 size_t len, int ATTR_UNUSED(flags), void* cb_arg) 2877{ 2878 ssize_t ret; 2879 struct http2_session* h2_session = (struct http2_session*)cb_arg; 2880 log_assert(h2_session->c->type == comm_http); 2881 log_assert(h2_session->c->h2_session); 2882 2883#ifdef HAVE_SSL 2884 if(h2_session->c->ssl) { 2885 int r; 2886 ERR_clear_error(); 2887 r = SSL_write(h2_session->c->ssl, buf, len); 2888 if(r <= 0) { 2889 int want = SSL_get_error(h2_session->c->ssl, r); 2890 if(want == SSL_ERROR_ZERO_RETURN) { 2891 return NGHTTP2_ERR_CALLBACK_FAILURE; 2892 } else if(want == SSL_ERROR_WANT_READ) { 2893 h2_session->c->ssl_shake_state = comm_ssl_shake_hs_read; 2894 comm_point_listen_for_rw(h2_session->c, 1, 0); 2895 return NGHTTP2_ERR_WOULDBLOCK; 2896 } else if(want == SSL_ERROR_WANT_WRITE) { 2897 return NGHTTP2_ERR_WOULDBLOCK; 2898 } else if(want == SSL_ERROR_SYSCALL) { 2899#ifdef EPIPE 2900 if(errno == EPIPE && verbosity < 2) 2901 return NGHTTP2_ERR_CALLBACK_FAILURE; 2902#endif 2903 if(errno != 0) 2904 log_err("SSL_write syscall: %s", 2905 strerror(errno)); 2906 return NGHTTP2_ERR_CALLBACK_FAILURE; 2907 } 2908 log_crypto_err("could not SSL_write"); 2909 return NGHTTP2_ERR_CALLBACK_FAILURE; 2910 } 2911 return r; 2912 } 2913#endif /* HAVE_SSL */ 2914 2915 ret = send(h2_session->c->fd, buf, len, 0); 2916 if(ret == 0) { 2917 return NGHTTP2_ERR_CALLBACK_FAILURE; 2918 } else if(ret < 0) { 2919#ifndef USE_WINSOCK 2920 if(errno == EINTR || errno == EAGAIN) 2921 return NGHTTP2_ERR_WOULDBLOCK; 2922#ifdef EPIPE 2923 if(errno == EPIPE && verbosity < 2) 2924 return NGHTTP2_ERR_CALLBACK_FAILURE; 2925#endif 2926#ifdef ECONNRESET 2927 if(errno == ECONNRESET && verbosity < 2) 2928 return NGHTTP2_ERR_CALLBACK_FAILURE; 2929#endif 2930 log_err_addr("could not http2 write: %s", strerror(errno), 2931 &h2_session->c->repinfo.addr, 2932 h2_session->c->repinfo.addrlen); 2933#else /* USE_WINSOCK */ 2934 if(WSAGetLastError() == WSAENOTCONN) 2935 return NGHTTP2_ERR_WOULDBLOCK; 2936 if(WSAGetLastError() == WSAEINPROGRESS) 2937 return NGHTTP2_ERR_WOULDBLOCK; 2938 if(WSAGetLastError() == WSAEWOULDBLOCK) { 2939 ub_winsock_tcp_wouldblock(h2_session->c->ev->ev, 2940 UB_EV_WRITE); 2941 return NGHTTP2_ERR_WOULDBLOCK; 2942 } 2943 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2) 2944 return NGHTTP2_ERR_CALLBACK_FAILURE; 2945 log_err_addr("could not http2 write: %s", 2946 wsa_strerror(WSAGetLastError()), 2947 &h2_session->c->repinfo.addr, 2948 h2_session->c->repinfo.addrlen); 2949#endif 2950 return NGHTTP2_ERR_CALLBACK_FAILURE; 2951 } 2952 return ret; 2953} 2954#endif /* HAVE_NGHTTP2 */ 2955 2956/** Handle http2 writing */ 2957static int 2958comm_point_http2_handle_write(int ATTR_UNUSED(fd), struct comm_point* c) 2959{ 2960#ifdef HAVE_NGHTTP2 2961 int ret; 2962 log_assert(c->h2_session); 2963 2964 ret = nghttp2_session_send(c->h2_session->session); 2965 if(ret) { 2966 verbose(VERB_QUERY, "http2: session_send failed, " 2967 "error: %s", nghttp2_strerror(ret)); 2968 return 0; 2969 } 2970 2971 if(nghttp2_session_want_read(c->h2_session->session)) { 2972 c->tcp_is_reading = 1; 2973 comm_point_stop_listening(c); 2974 comm_point_start_listening(c, -1, c->tcp_timeout_msec); 2975 } else if(!nghttp2_session_want_write(c->h2_session->session)) 2976 return 0; /* connection can be closed */ 2977 return 1; 2978#else 2979 (void)c; 2980 return 0; 2981#endif 2982} 2983 2984/** 2985 * Handle http writing callback. 2986 * @param fd: file descriptor of socket. 2987 * @param c: comm point to write buffer out of. 2988 * @return: 0 on error 2989 */ 2990static int 2991comm_point_http_handle_write(int fd, struct comm_point* c) 2992{ 2993 log_assert(c->type == comm_http); 2994 log_assert(fd != -1); 2995 2996 /* check pending connect errors, if that fails, we wait for more, 2997 * or we can continue to write contents */ 2998 if(c->tcp_check_nb_connect) { 2999 int r = http_check_connect(fd, c); 3000 if(r == 0) return 0; 3001 if(r == 1) return 1; 3002 c->tcp_check_nb_connect = 0; 3003 } 3004 /* if we are in ssl handshake, handle SSL handshake */ 3005#ifdef HAVE_SSL 3006 if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) { 3007 if(!ssl_handshake(c)) 3008 return 0; 3009 if(c->ssl_shake_state != comm_ssl_shake_none) 3010 return 1; 3011 } 3012#endif /* HAVE_SSL */ 3013 if(c->tcp_is_reading) 3014 return 1; 3015 3016 if(c->use_h2) { 3017 return comm_point_http2_handle_write(fd, c); 3018 } 3019 3020 /* http version is <= http/1.1 */ 3021 3022 if(c->http_min_version >= http_version_2) { 3023 /* HTTP/2 failed, not allowed to use lower version. */ 3024 return 0; 3025 } 3026 3027 /* if we are writing, write more */ 3028 if(c->ssl) { 3029 if(!ssl_http_write_more(c)) 3030 return 0; 3031 } else { 3032 if(!http_write_more(fd, c)) 3033 return 0; 3034 } 3035 3036 /* we write a single buffer contents, that can contain 3037 * the http request, and then flip to read the results */ 3038 /* see if write is done */ 3039 if(sldns_buffer_remaining(c->buffer) == 0) { 3040 sldns_buffer_clear(c->buffer); 3041 if(c->tcp_do_toggle_rw) 3042 c->tcp_is_reading = 1; 3043 c->tcp_byte_count = 0; 3044 /* switch from listening(write) to listening(read) */ 3045 comm_point_stop_listening(c); 3046 comm_point_start_listening(c, -1, -1); 3047 } 3048 return 1; 3049} 3050 3051void 3052comm_point_http_handle_callback(int fd, short event, void* arg) 3053{ 3054 struct comm_point* c = (struct comm_point*)arg; 3055 log_assert(c->type == comm_http); 3056 ub_comm_base_now(c->ev->base); 3057 3058 if(event&UB_EV_TIMEOUT) { 3059 verbose(VERB_QUERY, "http took too long, dropped"); 3060 reclaim_http_handler(c); 3061 if(!c->tcp_do_close) { 3062 fptr_ok(fptr_whitelist_comm_point(c->callback)); 3063 (void)(*c->callback)(c, c->cb_arg, 3064 NETEVENT_TIMEOUT, NULL); 3065 } 3066 return; 3067 } 3068 if(event&UB_EV_READ) { 3069 if(!comm_point_http_handle_read(fd, c)) { 3070 reclaim_http_handler(c); 3071 if(!c->tcp_do_close) { 3072 fptr_ok(fptr_whitelist_comm_point( 3073 c->callback)); 3074 (void)(*c->callback)(c, c->cb_arg, 3075 NETEVENT_CLOSED, NULL); 3076 } 3077 } 3078 return; 3079 } 3080 if(event&UB_EV_WRITE) { 3081 if(!comm_point_http_handle_write(fd, c)) { 3082 reclaim_http_handler(c); 3083 if(!c->tcp_do_close) { 3084 fptr_ok(fptr_whitelist_comm_point( 3085 c->callback)); 3086 (void)(*c->callback)(c, c->cb_arg, 3087 NETEVENT_CLOSED, NULL); 3088 } 3089 } 3090 return; 3091 } 3092 log_err("Ignored event %d for httphdl.", event); 3093} 3094 3095void comm_point_local_handle_callback(int fd, short event, void* arg) 3096{ 3097 struct comm_point* c = (struct comm_point*)arg; 3098 log_assert(c->type == comm_local); 3099 ub_comm_base_now(c->ev->base); 3100 3101 if(event&UB_EV_READ) { 3102 if(!comm_point_tcp_handle_read(fd, c, 1)) { 3103 fptr_ok(fptr_whitelist_comm_point(c->callback)); 3104 (void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED, 3105 NULL); 3106 } 3107 return; 3108 } 3109 log_err("Ignored event %d for localhdl.", event); 3110} 3111 3112void comm_point_raw_handle_callback(int ATTR_UNUSED(fd), 3113 short event, void* arg) 3114{ 3115 struct comm_point* c = (struct comm_point*)arg; 3116 int err = NETEVENT_NOERROR; 3117 log_assert(c->type == comm_raw); 3118 ub_comm_base_now(c->ev->base); 3119 3120 if(event&UB_EV_TIMEOUT) 3121 err = NETEVENT_TIMEOUT; 3122 fptr_ok(fptr_whitelist_comm_point_raw(c->callback)); 3123 (void)(*c->callback)(c, c->cb_arg, err, NULL); 3124} 3125 3126struct comm_point* 3127comm_point_create_udp(struct comm_base *base, int fd, sldns_buffer* buffer, 3128 comm_point_callback_type* callback, void* callback_arg) 3129{ 3130 struct comm_point* c = (struct comm_point*)calloc(1, 3131 sizeof(struct comm_point)); 3132 short evbits; 3133 if(!c) 3134 return NULL; 3135 c->ev = (struct internal_event*)calloc(1, 3136 sizeof(struct internal_event)); 3137 if(!c->ev) { 3138 free(c); 3139 return NULL; 3140 } 3141 c->ev->base = base; 3142 c->fd = fd; 3143 c->buffer = buffer; 3144 c->timeout = NULL; 3145 c->tcp_is_reading = 0; 3146 c->tcp_byte_count = 0; 3147 c->tcp_parent = NULL; 3148 c->max_tcp_count = 0; 3149 c->cur_tcp_count = 0; 3150 c->tcp_handlers = NULL; 3151 c->tcp_free = NULL; 3152 c->type = comm_udp; 3153 c->tcp_do_close = 0; 3154 c->do_not_close = 0; 3155 c->tcp_do_toggle_rw = 0; 3156 c->tcp_check_nb_connect = 0; 3157#ifdef USE_MSG_FASTOPEN 3158 c->tcp_do_fastopen = 0; 3159#endif 3160#ifdef USE_DNSCRYPT 3161 c->dnscrypt = 0; 3162 c->dnscrypt_buffer = buffer; 3163#endif 3164 c->inuse = 0; 3165 c->callback = callback; 3166 c->cb_arg = callback_arg; 3167 evbits = UB_EV_READ | UB_EV_PERSIST; 3168 /* ub_event stuff */ 3169 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3170 comm_point_udp_callback, c); 3171 if(c->ev->ev == NULL) { 3172 log_err("could not baseset udp event"); 3173 comm_point_delete(c); 3174 return NULL; 3175 } 3176 if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) { 3177 log_err("could not add udp event"); 3178 comm_point_delete(c); 3179 return NULL; 3180 } 3181 return c; 3182} 3183 3184struct comm_point* 3185comm_point_create_udp_ancil(struct comm_base *base, int fd, 3186 sldns_buffer* buffer, 3187 comm_point_callback_type* callback, void* callback_arg) 3188{ 3189 struct comm_point* c = (struct comm_point*)calloc(1, 3190 sizeof(struct comm_point)); 3191 short evbits; 3192 if(!c) 3193 return NULL; 3194 c->ev = (struct internal_event*)calloc(1, 3195 sizeof(struct internal_event)); 3196 if(!c->ev) { 3197 free(c); 3198 return NULL; 3199 } 3200 c->ev->base = base; 3201 c->fd = fd; 3202 c->buffer = buffer; 3203 c->timeout = NULL; 3204 c->tcp_is_reading = 0; 3205 c->tcp_byte_count = 0; 3206 c->tcp_parent = NULL; 3207 c->max_tcp_count = 0; 3208 c->cur_tcp_count = 0; 3209 c->tcp_handlers = NULL; 3210 c->tcp_free = NULL; 3211 c->type = comm_udp; 3212 c->tcp_do_close = 0; 3213 c->do_not_close = 0; 3214#ifdef USE_DNSCRYPT 3215 c->dnscrypt = 0; 3216 c->dnscrypt_buffer = buffer; 3217#endif 3218 c->inuse = 0; 3219 c->tcp_do_toggle_rw = 0; 3220 c->tcp_check_nb_connect = 0; 3221#ifdef USE_MSG_FASTOPEN 3222 c->tcp_do_fastopen = 0; 3223#endif 3224 c->callback = callback; 3225 c->cb_arg = callback_arg; 3226 evbits = UB_EV_READ | UB_EV_PERSIST; 3227 /* ub_event stuff */ 3228 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3229 comm_point_udp_ancil_callback, c); 3230 if(c->ev->ev == NULL) { 3231 log_err("could not baseset udp event"); 3232 comm_point_delete(c); 3233 return NULL; 3234 } 3235 if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) { 3236 log_err("could not add udp event"); 3237 comm_point_delete(c); 3238 return NULL; 3239 } 3240 return c; 3241} 3242 3243static struct comm_point* 3244comm_point_create_tcp_handler(struct comm_base *base, 3245 struct comm_point* parent, size_t bufsize, 3246 struct sldns_buffer* spoolbuf, comm_point_callback_type* callback, 3247 void* callback_arg) 3248{ 3249 struct comm_point* c = (struct comm_point*)calloc(1, 3250 sizeof(struct comm_point)); 3251 short evbits; 3252 if(!c) 3253 return NULL; 3254 c->ev = (struct internal_event*)calloc(1, 3255 sizeof(struct internal_event)); 3256 if(!c->ev) { 3257 free(c); 3258 return NULL; 3259 } 3260 c->ev->base = base; 3261 c->fd = -1; 3262 c->buffer = sldns_buffer_new(bufsize); 3263 if(!c->buffer) { 3264 free(c->ev); 3265 free(c); 3266 return NULL; 3267 } 3268 c->timeout = (struct timeval*)malloc(sizeof(struct timeval)); 3269 if(!c->timeout) { 3270 sldns_buffer_free(c->buffer); 3271 free(c->ev); 3272 free(c); 3273 return NULL; 3274 } 3275 c->tcp_is_reading = 0; 3276 c->tcp_byte_count = 0; 3277 c->tcp_parent = parent; 3278 c->tcp_timeout_msec = parent->tcp_timeout_msec; 3279 c->tcp_conn_limit = parent->tcp_conn_limit; 3280 c->tcl_addr = NULL; 3281 c->tcp_keepalive = 0; 3282 c->max_tcp_count = 0; 3283 c->cur_tcp_count = 0; 3284 c->tcp_handlers = NULL; 3285 c->tcp_free = NULL; 3286 c->type = comm_tcp; 3287 c->tcp_do_close = 0; 3288 c->do_not_close = 0; 3289 c->tcp_do_toggle_rw = 1; 3290 c->tcp_check_nb_connect = 0; 3291#ifdef USE_MSG_FASTOPEN 3292 c->tcp_do_fastopen = 0; 3293#endif 3294#ifdef USE_DNSCRYPT 3295 c->dnscrypt = 0; 3296 /* We don't know just yet if this is a dnscrypt channel. Allocation 3297 * will be done when handling the callback. */ 3298 c->dnscrypt_buffer = c->buffer; 3299#endif 3300 c->repinfo.c = c; 3301 c->callback = callback; 3302 c->cb_arg = callback_arg; 3303 if(spoolbuf) { 3304 c->tcp_req_info = tcp_req_info_create(spoolbuf); 3305 if(!c->tcp_req_info) { 3306 log_err("could not create tcp commpoint"); 3307 sldns_buffer_free(c->buffer); 3308 free(c->timeout); 3309 free(c->ev); 3310 free(c); 3311 return NULL; 3312 } 3313 c->tcp_req_info->cp = c; 3314 c->tcp_do_close = 1; 3315 c->tcp_do_toggle_rw = 0; 3316 } 3317 /* add to parent free list */ 3318 c->tcp_free = parent->tcp_free; 3319 parent->tcp_free = c; 3320 /* ub_event stuff */ 3321 evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT; 3322 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3323 comm_point_tcp_handle_callback, c); 3324 if(c->ev->ev == NULL) 3325 { 3326 log_err("could not basetset tcphdl event"); 3327 parent->tcp_free = c->tcp_free; 3328 tcp_req_info_delete(c->tcp_req_info); 3329 sldns_buffer_free(c->buffer); 3330 free(c->timeout); 3331 free(c->ev); 3332 free(c); 3333 return NULL; 3334 } 3335 return c; 3336} 3337 3338static struct comm_point* 3339comm_point_create_http_handler(struct comm_base *base, 3340 struct comm_point* parent, size_t bufsize, int harden_large_queries, 3341 uint32_t http_max_streams, char* http_endpoint, 3342 comm_point_callback_type* callback, void* callback_arg) 3343{ 3344 struct comm_point* c = (struct comm_point*)calloc(1, 3345 sizeof(struct comm_point)); 3346 short evbits; 3347 if(!c) 3348 return NULL; 3349 c->ev = (struct internal_event*)calloc(1, 3350 sizeof(struct internal_event)); 3351 if(!c->ev) { 3352 free(c); 3353 return NULL; 3354 } 3355 c->ev->base = base; 3356 c->fd = -1; 3357 c->buffer = sldns_buffer_new(bufsize); 3358 if(!c->buffer) { 3359 free(c->ev); 3360 free(c); 3361 return NULL; 3362 } 3363 c->timeout = (struct timeval*)malloc(sizeof(struct timeval)); 3364 if(!c->timeout) { 3365 sldns_buffer_free(c->buffer); 3366 free(c->ev); 3367 free(c); 3368 return NULL; 3369 } 3370 c->tcp_is_reading = 0; 3371 c->tcp_byte_count = 0; 3372 c->tcp_parent = parent; 3373 c->tcp_timeout_msec = parent->tcp_timeout_msec; 3374 c->tcp_conn_limit = parent->tcp_conn_limit; 3375 c->tcl_addr = NULL; 3376 c->tcp_keepalive = 0; 3377 c->max_tcp_count = 0; 3378 c->cur_tcp_count = 0; 3379 c->tcp_handlers = NULL; 3380 c->tcp_free = NULL; 3381 c->type = comm_http; 3382 c->tcp_do_close = 1; 3383 c->do_not_close = 0; 3384 c->tcp_do_toggle_rw = 1; /* will be set to 0 after http2 upgrade */ 3385 c->tcp_check_nb_connect = 0; 3386#ifdef USE_MSG_FASTOPEN 3387 c->tcp_do_fastopen = 0; 3388#endif 3389#ifdef USE_DNSCRYPT 3390 c->dnscrypt = 0; 3391 c->dnscrypt_buffer = NULL; 3392#endif 3393 c->repinfo.c = c; 3394 c->callback = callback; 3395 c->cb_arg = callback_arg; 3396 3397 c->http_min_version = http_version_2; 3398 c->http2_stream_max_qbuffer_size = bufsize; 3399 if(harden_large_queries && bufsize > 512) 3400 c->http2_stream_max_qbuffer_size = 512; 3401 c->http2_max_streams = http_max_streams; 3402 if(!(c->http_endpoint = strdup(http_endpoint))) { 3403 log_err("could not strdup http_endpoint"); 3404 sldns_buffer_free(c->buffer); 3405 free(c->timeout); 3406 free(c->ev); 3407 free(c); 3408 return NULL; 3409 } 3410 c->use_h2 = 0; 3411#ifdef HAVE_NGHTTP2 3412 if(!(c->h2_session = http2_session_create(c))) { 3413 log_err("could not create http2 session"); 3414 free(c->http_endpoint); 3415 sldns_buffer_free(c->buffer); 3416 free(c->timeout); 3417 free(c->ev); 3418 free(c); 3419 return NULL; 3420 } 3421 if(!(c->h2_session->callbacks = http2_req_callbacks_create())) { 3422 log_err("could not create http2 callbacks"); 3423 http2_session_delete(c->h2_session); 3424 free(c->http_endpoint); 3425 sldns_buffer_free(c->buffer); 3426 free(c->timeout); 3427 free(c->ev); 3428 free(c); 3429 return NULL; 3430 } 3431#endif 3432 3433 /* add to parent free list */ 3434 c->tcp_free = parent->tcp_free; 3435 parent->tcp_free = c; 3436 /* ub_event stuff */ 3437 evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT; 3438 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3439 comm_point_http_handle_callback, c); 3440 if(c->ev->ev == NULL) 3441 { 3442 log_err("could not set http handler event"); 3443 parent->tcp_free = c->tcp_free; 3444 http2_session_delete(c->h2_session); 3445 sldns_buffer_free(c->buffer); 3446 free(c->timeout); 3447 free(c->ev); 3448 free(c); 3449 return NULL; 3450 } 3451 return c; 3452} 3453 3454struct comm_point* 3455comm_point_create_tcp(struct comm_base *base, int fd, int num, 3456 int idle_timeout, int harden_large_queries, 3457 uint32_t http_max_streams, char* http_endpoint, 3458 struct tcl_list* tcp_conn_limit, size_t bufsize, 3459 struct sldns_buffer* spoolbuf, enum listen_type port_type, 3460 comm_point_callback_type* callback, void* callback_arg) 3461{ 3462 struct comm_point* c = (struct comm_point*)calloc(1, 3463 sizeof(struct comm_point)); 3464 short evbits; 3465 int i; 3466 /* first allocate the TCP accept listener */ 3467 if(!c) 3468 return NULL; 3469 c->ev = (struct internal_event*)calloc(1, 3470 sizeof(struct internal_event)); 3471 if(!c->ev) { 3472 free(c); 3473 return NULL; 3474 } 3475 c->ev->base = base; 3476 c->fd = fd; 3477 c->buffer = NULL; 3478 c->timeout = NULL; 3479 c->tcp_is_reading = 0; 3480 c->tcp_byte_count = 0; 3481 c->tcp_timeout_msec = idle_timeout; 3482 c->tcp_conn_limit = tcp_conn_limit; 3483 c->tcl_addr = NULL; 3484 c->tcp_keepalive = 0; 3485 c->tcp_parent = NULL; 3486 c->max_tcp_count = num; 3487 c->cur_tcp_count = 0; 3488 c->tcp_handlers = (struct comm_point**)calloc((size_t)num, 3489 sizeof(struct comm_point*)); 3490 if(!c->tcp_handlers) { 3491 free(c->ev); 3492 free(c); 3493 return NULL; 3494 } 3495 c->tcp_free = NULL; 3496 c->type = comm_tcp_accept; 3497 c->tcp_do_close = 0; 3498 c->do_not_close = 0; 3499 c->tcp_do_toggle_rw = 0; 3500 c->tcp_check_nb_connect = 0; 3501#ifdef USE_MSG_FASTOPEN 3502 c->tcp_do_fastopen = 0; 3503#endif 3504#ifdef USE_DNSCRYPT 3505 c->dnscrypt = 0; 3506 c->dnscrypt_buffer = NULL; 3507#endif 3508 c->callback = NULL; 3509 c->cb_arg = NULL; 3510 evbits = UB_EV_READ | UB_EV_PERSIST; 3511 /* ub_event stuff */ 3512 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3513 comm_point_tcp_accept_callback, c); 3514 if(c->ev->ev == NULL) { 3515 log_err("could not baseset tcpacc event"); 3516 comm_point_delete(c); 3517 return NULL; 3518 } 3519 if (ub_event_add(c->ev->ev, c->timeout) != 0) { 3520 log_err("could not add tcpacc event"); 3521 comm_point_delete(c); 3522 return NULL; 3523 } 3524 /* now prealloc the handlers */ 3525 for(i=0; i<num; i++) { 3526 if(port_type == listen_type_tcp || 3527 port_type == listen_type_ssl || 3528 port_type == listen_type_tcp_dnscrypt) { 3529 c->tcp_handlers[i] = comm_point_create_tcp_handler(base, 3530 c, bufsize, spoolbuf, callback, callback_arg); 3531 } else if(port_type == listen_type_http) { 3532 c->tcp_handlers[i] = comm_point_create_http_handler( 3533 base, c, bufsize, harden_large_queries, 3534 http_max_streams, http_endpoint, 3535 callback, callback_arg); 3536 } 3537 else { 3538 log_err("could not create tcp handler, unknown listen " 3539 "type"); 3540 return NULL; 3541 } 3542 if(!c->tcp_handlers[i]) { 3543 comm_point_delete(c); 3544 return NULL; 3545 } 3546 } 3547 3548 return c; 3549} 3550 3551struct comm_point* 3552comm_point_create_tcp_out(struct comm_base *base, size_t bufsize, 3553 comm_point_callback_type* callback, void* callback_arg) 3554{ 3555 struct comm_point* c = (struct comm_point*)calloc(1, 3556 sizeof(struct comm_point)); 3557 short evbits; 3558 if(!c) 3559 return NULL; 3560 c->ev = (struct internal_event*)calloc(1, 3561 sizeof(struct internal_event)); 3562 if(!c->ev) { 3563 free(c); 3564 return NULL; 3565 } 3566 c->ev->base = base; 3567 c->fd = -1; 3568 c->buffer = sldns_buffer_new(bufsize); 3569 if(!c->buffer) { 3570 free(c->ev); 3571 free(c); 3572 return NULL; 3573 } 3574 c->timeout = NULL; 3575 c->tcp_is_reading = 0; 3576 c->tcp_byte_count = 0; 3577 c->tcp_timeout_msec = TCP_QUERY_TIMEOUT; 3578 c->tcp_conn_limit = NULL; 3579 c->tcl_addr = NULL; 3580 c->tcp_keepalive = 0; 3581 c->tcp_parent = NULL; 3582 c->max_tcp_count = 0; 3583 c->cur_tcp_count = 0; 3584 c->tcp_handlers = NULL; 3585 c->tcp_free = NULL; 3586 c->type = comm_tcp; 3587 c->tcp_do_close = 0; 3588 c->do_not_close = 0; 3589 c->tcp_do_toggle_rw = 1; 3590 c->tcp_check_nb_connect = 1; 3591#ifdef USE_MSG_FASTOPEN 3592 c->tcp_do_fastopen = 1; 3593#endif 3594#ifdef USE_DNSCRYPT 3595 c->dnscrypt = 0; 3596 c->dnscrypt_buffer = c->buffer; 3597#endif 3598 c->repinfo.c = c; 3599 c->callback = callback; 3600 c->cb_arg = callback_arg; 3601 evbits = UB_EV_PERSIST | UB_EV_WRITE; 3602 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3603 comm_point_tcp_handle_callback, c); 3604 if(c->ev->ev == NULL) 3605 { 3606 log_err("could not baseset tcpout event"); 3607 sldns_buffer_free(c->buffer); 3608 free(c->ev); 3609 free(c); 3610 return NULL; 3611 } 3612 3613 return c; 3614} 3615 3616struct comm_point* 3617comm_point_create_http_out(struct comm_base *base, size_t bufsize, 3618 comm_point_callback_type* callback, void* callback_arg, 3619 sldns_buffer* temp) 3620{ 3621 struct comm_point* c = (struct comm_point*)calloc(1, 3622 sizeof(struct comm_point)); 3623 short evbits; 3624 if(!c) 3625 return NULL; 3626 c->ev = (struct internal_event*)calloc(1, 3627 sizeof(struct internal_event)); 3628 if(!c->ev) { 3629 free(c); 3630 return NULL; 3631 } 3632 c->ev->base = base; 3633 c->fd = -1; 3634 c->buffer = sldns_buffer_new(bufsize); 3635 if(!c->buffer) { 3636 free(c->ev); 3637 free(c); 3638 return NULL; 3639 } 3640 c->timeout = NULL; 3641 c->tcp_is_reading = 0; 3642 c->tcp_byte_count = 0; 3643 c->tcp_parent = NULL; 3644 c->max_tcp_count = 0; 3645 c->cur_tcp_count = 0; 3646 c->tcp_handlers = NULL; 3647 c->tcp_free = NULL; 3648 c->type = comm_http; 3649 c->tcp_do_close = 0; 3650 c->do_not_close = 0; 3651 c->tcp_do_toggle_rw = 1; 3652 c->tcp_check_nb_connect = 1; 3653 c->http_in_headers = 1; 3654 c->http_in_chunk_headers = 0; 3655 c->http_is_chunked = 0; 3656 c->http_temp = temp; 3657#ifdef USE_MSG_FASTOPEN 3658 c->tcp_do_fastopen = 1; 3659#endif 3660#ifdef USE_DNSCRYPT 3661 c->dnscrypt = 0; 3662 c->dnscrypt_buffer = c->buffer; 3663#endif 3664 c->repinfo.c = c; 3665 c->callback = callback; 3666 c->cb_arg = callback_arg; 3667 evbits = UB_EV_PERSIST | UB_EV_WRITE; 3668 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3669 comm_point_http_handle_callback, c); 3670 if(c->ev->ev == NULL) 3671 { 3672 log_err("could not baseset tcpout event"); 3673#ifdef HAVE_SSL 3674 SSL_free(c->ssl); 3675#endif 3676 sldns_buffer_free(c->buffer); 3677 free(c->ev); 3678 free(c); 3679 return NULL; 3680 } 3681 3682 return c; 3683} 3684 3685struct comm_point* 3686comm_point_create_local(struct comm_base *base, int fd, size_t bufsize, 3687 comm_point_callback_type* callback, void* callback_arg) 3688{ 3689 struct comm_point* c = (struct comm_point*)calloc(1, 3690 sizeof(struct comm_point)); 3691 short evbits; 3692 if(!c) 3693 return NULL; 3694 c->ev = (struct internal_event*)calloc(1, 3695 sizeof(struct internal_event)); 3696 if(!c->ev) { 3697 free(c); 3698 return NULL; 3699 } 3700 c->ev->base = base; 3701 c->fd = fd; 3702 c->buffer = sldns_buffer_new(bufsize); 3703 if(!c->buffer) { 3704 free(c->ev); 3705 free(c); 3706 return NULL; 3707 } 3708 c->timeout = NULL; 3709 c->tcp_is_reading = 1; 3710 c->tcp_byte_count = 0; 3711 c->tcp_parent = NULL; 3712 c->max_tcp_count = 0; 3713 c->cur_tcp_count = 0; 3714 c->tcp_handlers = NULL; 3715 c->tcp_free = NULL; 3716 c->type = comm_local; 3717 c->tcp_do_close = 0; 3718 c->do_not_close = 1; 3719 c->tcp_do_toggle_rw = 0; 3720 c->tcp_check_nb_connect = 0; 3721#ifdef USE_MSG_FASTOPEN 3722 c->tcp_do_fastopen = 0; 3723#endif 3724#ifdef USE_DNSCRYPT 3725 c->dnscrypt = 0; 3726 c->dnscrypt_buffer = c->buffer; 3727#endif 3728 c->callback = callback; 3729 c->cb_arg = callback_arg; 3730 /* ub_event stuff */ 3731 evbits = UB_EV_PERSIST | UB_EV_READ; 3732 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3733 comm_point_local_handle_callback, c); 3734 if(c->ev->ev == NULL) { 3735 log_err("could not baseset localhdl event"); 3736 free(c->ev); 3737 free(c); 3738 return NULL; 3739 } 3740 if (ub_event_add(c->ev->ev, c->timeout) != 0) { 3741 log_err("could not add localhdl event"); 3742 ub_event_free(c->ev->ev); 3743 free(c->ev); 3744 free(c); 3745 return NULL; 3746 } 3747 return c; 3748} 3749 3750struct comm_point* 3751comm_point_create_raw(struct comm_base* base, int fd, int writing, 3752 comm_point_callback_type* callback, void* callback_arg) 3753{ 3754 struct comm_point* c = (struct comm_point*)calloc(1, 3755 sizeof(struct comm_point)); 3756 short evbits; 3757 if(!c) 3758 return NULL; 3759 c->ev = (struct internal_event*)calloc(1, 3760 sizeof(struct internal_event)); 3761 if(!c->ev) { 3762 free(c); 3763 return NULL; 3764 } 3765 c->ev->base = base; 3766 c->fd = fd; 3767 c->buffer = NULL; 3768 c->timeout = NULL; 3769 c->tcp_is_reading = 0; 3770 c->tcp_byte_count = 0; 3771 c->tcp_parent = NULL; 3772 c->max_tcp_count = 0; 3773 c->cur_tcp_count = 0; 3774 c->tcp_handlers = NULL; 3775 c->tcp_free = NULL; 3776 c->type = comm_raw; 3777 c->tcp_do_close = 0; 3778 c->do_not_close = 1; 3779 c->tcp_do_toggle_rw = 0; 3780 c->tcp_check_nb_connect = 0; 3781#ifdef USE_MSG_FASTOPEN 3782 c->tcp_do_fastopen = 0; 3783#endif 3784#ifdef USE_DNSCRYPT 3785 c->dnscrypt = 0; 3786 c->dnscrypt_buffer = c->buffer; 3787#endif 3788 c->callback = callback; 3789 c->cb_arg = callback_arg; 3790 /* ub_event stuff */ 3791 if(writing) 3792 evbits = UB_EV_PERSIST | UB_EV_WRITE; 3793 else evbits = UB_EV_PERSIST | UB_EV_READ; 3794 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 3795 comm_point_raw_handle_callback, c); 3796 if(c->ev->ev == NULL) { 3797 log_err("could not baseset rawhdl event"); 3798 free(c->ev); 3799 free(c); 3800 return NULL; 3801 } 3802 if (ub_event_add(c->ev->ev, c->timeout) != 0) { 3803 log_err("could not add rawhdl event"); 3804 ub_event_free(c->ev->ev); 3805 free(c->ev); 3806 free(c); 3807 return NULL; 3808 } 3809 return c; 3810} 3811 3812void 3813comm_point_close(struct comm_point* c) 3814{ 3815 if(!c) 3816 return; 3817 if(c->fd != -1) { 3818 verbose(5, "comm_point_close of %d: event_del", c->fd); 3819 if(ub_event_del(c->ev->ev) != 0) { 3820 log_err("could not event_del on close"); 3821 } 3822 } 3823 tcl_close_connection(c->tcl_addr); 3824 if(c->tcp_req_info) 3825 tcp_req_info_clear(c->tcp_req_info); 3826 if(c->h2_session) 3827 http2_session_server_delete(c->h2_session); 3828 3829 /* close fd after removing from event lists, or epoll.. is messed up */ 3830 if(c->fd != -1 && !c->do_not_close) { 3831 if(c->type == comm_tcp || c->type == comm_http) { 3832 /* delete sticky events for the fd, it gets closed */ 3833 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 3834 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 3835 } 3836 verbose(VERB_ALGO, "close fd %d", c->fd); 3837 sock_close(c->fd); 3838 } 3839 c->fd = -1; 3840} 3841 3842void 3843comm_point_delete(struct comm_point* c) 3844{ 3845 if(!c) 3846 return; 3847 if((c->type == comm_tcp || c->type == comm_http) && c->ssl) { 3848#ifdef HAVE_SSL 3849 SSL_shutdown(c->ssl); 3850 SSL_free(c->ssl); 3851#endif 3852 } 3853 if(c->type == comm_http && c->http_endpoint) { 3854 free(c->http_endpoint); 3855 c->http_endpoint = NULL; 3856 } 3857 comm_point_close(c); 3858 if(c->tcp_handlers) { 3859 int i; 3860 for(i=0; i<c->max_tcp_count; i++) 3861 comm_point_delete(c->tcp_handlers[i]); 3862 free(c->tcp_handlers); 3863 } 3864 free(c->timeout); 3865 if(c->type == comm_tcp || c->type == comm_local || c->type == comm_http) { 3866 sldns_buffer_free(c->buffer); 3867#ifdef USE_DNSCRYPT 3868 if(c->dnscrypt && c->dnscrypt_buffer != c->buffer) { 3869 sldns_buffer_free(c->dnscrypt_buffer); 3870 } 3871#endif 3872 if(c->tcp_req_info) { 3873 tcp_req_info_delete(c->tcp_req_info); 3874 } 3875 if(c->h2_session) { 3876 http2_session_delete(c->h2_session); 3877 } 3878 } 3879 ub_event_free(c->ev->ev); 3880 free(c->ev); 3881 free(c); 3882} 3883 3884void 3885comm_point_send_reply(struct comm_reply *repinfo) 3886{ 3887 struct sldns_buffer* buffer; 3888 log_assert(repinfo && repinfo->c); 3889#ifdef USE_DNSCRYPT 3890 buffer = repinfo->c->dnscrypt_buffer; 3891 if(!dnsc_handle_uncurved_request(repinfo)) { 3892 return; 3893 } 3894#else 3895 buffer = repinfo->c->buffer; 3896#endif 3897 if(repinfo->c->type == comm_udp) { 3898 if(repinfo->srctype) 3899 comm_point_send_udp_msg_if(repinfo->c, 3900 buffer, (struct sockaddr*)&repinfo->addr, 3901 repinfo->addrlen, repinfo); 3902 else 3903 comm_point_send_udp_msg(repinfo->c, buffer, 3904 (struct sockaddr*)&repinfo->addr, repinfo->addrlen); 3905#ifdef USE_DNSTAP 3906 if(repinfo->c->dtenv != NULL && 3907 repinfo->c->dtenv->log_client_response_messages) 3908 dt_msg_send_client_response(repinfo->c->dtenv, 3909 &repinfo->addr, repinfo->c->type, repinfo->c->buffer); 3910#endif 3911 } else { 3912#ifdef USE_DNSTAP 3913 if(repinfo->c->tcp_parent->dtenv != NULL && 3914 repinfo->c->tcp_parent->dtenv->log_client_response_messages) 3915 dt_msg_send_client_response(repinfo->c->tcp_parent->dtenv, 3916 &repinfo->addr, repinfo->c->type, 3917 ( repinfo->c->tcp_req_info 3918 ? repinfo->c->tcp_req_info->spool_buffer 3919 : repinfo->c->buffer )); 3920#endif 3921 if(repinfo->c->tcp_req_info) { 3922 tcp_req_info_send_reply(repinfo->c->tcp_req_info); 3923 } else if(repinfo->c->use_h2) { 3924 if(!http2_submit_dns_response(repinfo->c->h2_session)) { 3925 comm_point_drop_reply(repinfo); 3926 return; 3927 } 3928 repinfo->c->h2_stream = NULL; 3929 repinfo->c->tcp_is_reading = 0; 3930 comm_point_stop_listening(repinfo->c); 3931 comm_point_start_listening(repinfo->c, -1, 3932 repinfo->c->tcp_timeout_msec); 3933 return; 3934 } else { 3935 comm_point_start_listening(repinfo->c, -1, 3936 repinfo->c->tcp_timeout_msec); 3937 } 3938 } 3939} 3940 3941void 3942comm_point_drop_reply(struct comm_reply* repinfo) 3943{ 3944 if(!repinfo) 3945 return; 3946 log_assert(repinfo->c); 3947 log_assert(repinfo->c->type != comm_tcp_accept); 3948 if(repinfo->c->type == comm_udp) 3949 return; 3950 if(repinfo->c->tcp_req_info) 3951 repinfo->c->tcp_req_info->is_drop = 1; 3952 if(repinfo->c->type == comm_http) { 3953 if(repinfo->c->h2_session) { 3954 repinfo->c->h2_session->is_drop = 1; 3955 if(!repinfo->c->h2_session->postpone_drop) 3956 reclaim_http_handler(repinfo->c); 3957 return; 3958 } 3959 reclaim_http_handler(repinfo->c); 3960 return; 3961 } 3962 reclaim_tcp_handler(repinfo->c); 3963} 3964 3965void 3966comm_point_stop_listening(struct comm_point* c) 3967{ 3968 verbose(VERB_ALGO, "comm point stop listening %d", c->fd); 3969 if(ub_event_del(c->ev->ev) != 0) { 3970 log_err("event_del error to stoplisten"); 3971 } 3972} 3973 3974void 3975comm_point_start_listening(struct comm_point* c, int newfd, int msec) 3976{ 3977 verbose(VERB_ALGO, "comm point start listening %d (%d msec)", 3978 c->fd==-1?newfd:c->fd, msec); 3979 if(c->type == comm_tcp_accept && !c->tcp_free) { 3980 /* no use to start listening no free slots. */ 3981 return; 3982 } 3983 if(msec != -1 && msec != 0) { 3984 if(!c->timeout) { 3985 c->timeout = (struct timeval*)malloc(sizeof( 3986 struct timeval)); 3987 if(!c->timeout) { 3988 log_err("cpsl: malloc failed. No net read."); 3989 return; 3990 } 3991 } 3992 ub_event_add_bits(c->ev->ev, UB_EV_TIMEOUT); 3993#ifndef S_SPLINT_S /* splint fails on struct timeval. */ 3994 c->timeout->tv_sec = msec/1000; 3995 c->timeout->tv_usec = (msec%1000)*1000; 3996#endif /* S_SPLINT_S */ 3997 } 3998 if(c->type == comm_tcp || c->type == comm_http) { 3999 ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE); 4000 if(c->tcp_write_and_read) { 4001 verbose(5, "startlistening %d mode rw", (newfd==-1?c->fd:newfd)); 4002 ub_event_add_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE); 4003 } else if(c->tcp_is_reading) { 4004 verbose(5, "startlistening %d mode r", (newfd==-1?c->fd:newfd)); 4005 ub_event_add_bits(c->ev->ev, UB_EV_READ); 4006 } else { 4007 verbose(5, "startlistening %d mode w", (newfd==-1?c->fd:newfd)); 4008 ub_event_add_bits(c->ev->ev, UB_EV_WRITE); 4009 } 4010 } 4011 if(newfd != -1) { 4012 if(c->fd != -1 && c->fd != newfd) { 4013 verbose(5, "cpsl close of fd %d for %d", c->fd, newfd); 4014 sock_close(c->fd); 4015 } 4016 c->fd = newfd; 4017 ub_event_set_fd(c->ev->ev, c->fd); 4018 } 4019 if(ub_event_add(c->ev->ev, msec==0?NULL:c->timeout) != 0) { 4020 log_err("event_add failed. in cpsl."); 4021 } 4022} 4023 4024void comm_point_listen_for_rw(struct comm_point* c, int rd, int wr) 4025{ 4026 verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr); 4027 if(ub_event_del(c->ev->ev) != 0) { 4028 log_err("event_del error to cplf"); 4029 } 4030 ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE); 4031 if(rd) ub_event_add_bits(c->ev->ev, UB_EV_READ); 4032 if(wr) ub_event_add_bits(c->ev->ev, UB_EV_WRITE); 4033 if(ub_event_add(c->ev->ev, c->timeout) != 0) { 4034 log_err("event_add failed. in cplf."); 4035 } 4036} 4037 4038size_t comm_point_get_mem(struct comm_point* c) 4039{ 4040 size_t s; 4041 if(!c) 4042 return 0; 4043 s = sizeof(*c) + sizeof(*c->ev); 4044 if(c->timeout) 4045 s += sizeof(*c->timeout); 4046 if(c->type == comm_tcp || c->type == comm_local) { 4047 s += sizeof(*c->buffer) + sldns_buffer_capacity(c->buffer); 4048#ifdef USE_DNSCRYPT 4049 s += sizeof(*c->dnscrypt_buffer); 4050 if(c->buffer != c->dnscrypt_buffer) { 4051 s += sldns_buffer_capacity(c->dnscrypt_buffer); 4052 } 4053#endif 4054 } 4055 if(c->type == comm_tcp_accept) { 4056 int i; 4057 for(i=0; i<c->max_tcp_count; i++) 4058 s += comm_point_get_mem(c->tcp_handlers[i]); 4059 } 4060 return s; 4061} 4062 4063struct comm_timer* 4064comm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg) 4065{ 4066 struct internal_timer *tm = (struct internal_timer*)calloc(1, 4067 sizeof(struct internal_timer)); 4068 if(!tm) { 4069 log_err("malloc failed"); 4070 return NULL; 4071 } 4072 tm->super.ev_timer = tm; 4073 tm->base = base; 4074 tm->super.callback = cb; 4075 tm->super.cb_arg = cb_arg; 4076 tm->ev = ub_event_new(base->eb->base, -1, UB_EV_TIMEOUT, 4077 comm_timer_callback, &tm->super); 4078 if(tm->ev == NULL) { 4079 log_err("timer_create: event_base_set failed."); 4080 free(tm); 4081 return NULL; 4082 } 4083 return &tm->super; 4084} 4085 4086void 4087comm_timer_disable(struct comm_timer* timer) 4088{ 4089 if(!timer) 4090 return; 4091 ub_timer_del(timer->ev_timer->ev); 4092 timer->ev_timer->enabled = 0; 4093} 4094 4095void 4096comm_timer_set(struct comm_timer* timer, struct timeval* tv) 4097{ 4098 log_assert(tv); 4099 if(timer->ev_timer->enabled) 4100 comm_timer_disable(timer); 4101 if(ub_timer_add(timer->ev_timer->ev, timer->ev_timer->base->eb->base, 4102 comm_timer_callback, timer, tv) != 0) 4103 log_err("comm_timer_set: evtimer_add failed."); 4104 timer->ev_timer->enabled = 1; 4105} 4106 4107void 4108comm_timer_delete(struct comm_timer* timer) 4109{ 4110 if(!timer) 4111 return; 4112 comm_timer_disable(timer); 4113 /* Free the sub struct timer->ev_timer derived from the super struct timer. 4114 * i.e. assert(timer == timer->ev_timer) 4115 */ 4116 ub_event_free(timer->ev_timer->ev); 4117 free(timer->ev_timer); 4118} 4119 4120void 4121comm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg) 4122{ 4123 struct comm_timer* tm = (struct comm_timer*)arg; 4124 if(!(event&UB_EV_TIMEOUT)) 4125 return; 4126 ub_comm_base_now(tm->ev_timer->base); 4127 tm->ev_timer->enabled = 0; 4128 fptr_ok(fptr_whitelist_comm_timer(tm->callback)); 4129 (*tm->callback)(tm->cb_arg); 4130} 4131 4132int 4133comm_timer_is_set(struct comm_timer* timer) 4134{ 4135 return (int)timer->ev_timer->enabled; 4136} 4137 4138size_t 4139comm_timer_get_mem(struct comm_timer* ATTR_UNUSED(timer)) 4140{ 4141 return sizeof(struct internal_timer); 4142} 4143 4144struct comm_signal* 4145comm_signal_create(struct comm_base* base, 4146 void (*callback)(int, void*), void* cb_arg) 4147{ 4148 struct comm_signal* com = (struct comm_signal*)malloc( 4149 sizeof(struct comm_signal)); 4150 if(!com) { 4151 log_err("malloc failed"); 4152 return NULL; 4153 } 4154 com->base = base; 4155 com->callback = callback; 4156 com->cb_arg = cb_arg; 4157 com->ev_signal = NULL; 4158 return com; 4159} 4160 4161void 4162comm_signal_callback(int sig, short event, void* arg) 4163{ 4164 struct comm_signal* comsig = (struct comm_signal*)arg; 4165 if(!(event & UB_EV_SIGNAL)) 4166 return; 4167 ub_comm_base_now(comsig->base); 4168 fptr_ok(fptr_whitelist_comm_signal(comsig->callback)); 4169 (*comsig->callback)(sig, comsig->cb_arg); 4170} 4171 4172int 4173comm_signal_bind(struct comm_signal* comsig, int sig) 4174{ 4175 struct internal_signal* entry = (struct internal_signal*)calloc(1, 4176 sizeof(struct internal_signal)); 4177 if(!entry) { 4178 log_err("malloc failed"); 4179 return 0; 4180 } 4181 log_assert(comsig); 4182 /* add signal event */ 4183 entry->ev = ub_signal_new(comsig->base->eb->base, sig, 4184 comm_signal_callback, comsig); 4185 if(entry->ev == NULL) { 4186 log_err("Could not create signal event"); 4187 free(entry); 4188 return 0; 4189 } 4190 if(ub_signal_add(entry->ev, NULL) != 0) { 4191 log_err("Could not add signal handler"); 4192 ub_event_free(entry->ev); 4193 free(entry); 4194 return 0; 4195 } 4196 /* link into list */ 4197 entry->next = comsig->ev_signal; 4198 comsig->ev_signal = entry; 4199 return 1; 4200} 4201 4202void 4203comm_signal_delete(struct comm_signal* comsig) 4204{ 4205 struct internal_signal* p, *np; 4206 if(!comsig) 4207 return; 4208 p=comsig->ev_signal; 4209 while(p) { 4210 np = p->next; 4211 ub_signal_del(p->ev); 4212 ub_event_free(p->ev); 4213 free(p); 4214 p = np; 4215 } 4216 free(comsig); 4217} 4218