1/* $NetBSD: netmgr-int.h,v 1.1 2024/02/18 20:57:55 christos Exp $ */ 2 3/* 4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 5 * 6 * SPDX-License-Identifier: MPL-2.0 7 * 8 * This Source Code Form is subject to the terms of the Mozilla Public 9 * License, v. 2.0. If a copy of the MPL was not distributed with this 10 * file, you can obtain one at https://mozilla.org/MPL/2.0/. 11 * 12 * See the COPYRIGHT file distributed with this work for additional 13 * information regarding copyright ownership. 14 */ 15 16#pragma once 17 18#include <unistd.h> 19#include <uv.h> 20 21#include <openssl/err.h> 22#include <openssl/ssl.h> 23 24#include <isc/astack.h> 25#include <isc/atomic.h> 26#include <isc/barrier.h> 27#include <isc/buffer.h> 28#include <isc/condition.h> 29#include <isc/magic.h> 30#include <isc/mem.h> 31#include <isc/netmgr.h> 32#include <isc/quota.h> 33#include <isc/random.h> 34#include <isc/refcount.h> 35#include <isc/region.h> 36#include <isc/result.h> 37#include <isc/rwlock.h> 38#include <isc/sockaddr.h> 39#include <isc/stats.h> 40#include <isc/thread.h> 41#include <isc/util.h> 42 43#include "uv-compat.h" 44 45#define ISC_NETMGR_TID_UNKNOWN -1 46 47/* Must be different from ISC_NETMGR_TID_UNKNOWN */ 48#define ISC_NETMGR_NON_INTERLOCKED -2 49 50/* 51 * Receive buffers 52 */ 53#if HAVE_DECL_UV_UDP_MMSG_CHUNK 54/* 55 * The value 20 here is UV__MMSG_MAXWIDTH taken from the current libuv source, 56 * libuv will not receive more that 20 datagrams in a single recvmmsg call. 57 */ 58#define ISC_NETMGR_UDP_RECVBUF_SIZE (20 * UINT16_MAX) 59#else 60/* 61 * A single DNS message size 62 */ 63#define ISC_NETMGR_UDP_RECVBUF_SIZE UINT16_MAX 64#endif 65 66/* 67 * The TCP receive buffer can fit one maximum sized DNS message plus its size, 68 * the receive buffer here affects TCP, DoT and DoH. 69 */ 70#define ISC_NETMGR_TCP_RECVBUF_SIZE (sizeof(uint16_t) + UINT16_MAX) 71 72/* Pick the larger buffer */ 73#define ISC_NETMGR_RECVBUF_SIZE \ 74 (ISC_NETMGR_UDP_RECVBUF_SIZE >= ISC_NETMGR_TCP_RECVBUF_SIZE \ 75 ? ISC_NETMGR_UDP_RECVBUF_SIZE \ 76 : ISC_NETMGR_TCP_RECVBUF_SIZE) 77 78/* 79 * Send buffer 80 */ 81#define ISC_NETMGR_SENDBUF_SIZE (sizeof(uint16_t) + UINT16_MAX) 82 83/*% 84 * Regular TCP buffer size. 85 */ 86#define NM_REG_BUF 4096 87 88/*% 89 * Larger buffer for when the regular one isn't enough; this will 90 * hold two full DNS packets with lengths. netmgr receives 64k at 91 * most in TCPDNS connections, so there's no risk of overrun 92 * when using a buffer this size. 93 */ 94#define NM_BIG_BUF ISC_NETMGR_TCP_RECVBUF_SIZE * 2 95 96/* 97 * Define NETMGR_TRACE to activate tracing of handles and sockets. 98 * This will impair performance but enables us to quickly determine, 99 * if netmgr resources haven't been cleaned up on shutdown, which ones 100 * are still in use. 101 */ 102#ifdef NETMGR_TRACE 103#define TRACE_SIZE 8 104 105void 106isc__nm_dump_active(isc_nm_t *nm); 107 108#if defined(__linux__) 109#include <syscall.h> 110#define gettid() (uint32_t) syscall(SYS_gettid) 111#elif defined(_WIN32) 112#define gettid() (uint32_t) GetCurrentThreadId() 113#else 114#define gettid() (uint32_t) pthread_self() 115#endif 116 117#ifdef NETMGR_TRACE_VERBOSE 118#define NETMGR_TRACE_LOG(format, ...) \ 119 fprintf(stderr, "%" PRIu32 ":%d:%s:%u:%s:" format, gettid(), \ 120 isc_nm_tid(), file, line, func, __VA_ARGS__) 121#else 122#define NETMGR_TRACE_LOG(format, ...) \ 123 (void)file; \ 124 (void)line; \ 125 (void)func; 126#endif 127 128#define FLARG_PASS , file, line, func 129#define FLARG \ 130 , const char *file __attribute__((unused)), \ 131 unsigned int line __attribute__((unused)), \ 132 const char *func __attribute__((unused)) 133#define FLARG_IEVENT(ievent) \ 134 const char *file = ievent->file; \ 135 unsigned int line = ievent->line; \ 136 const char *func = ievent->func; 137#define FLARG_IEVENT_PASS(ievent) \ 138 ievent->file = file; \ 139 ievent->line = line; \ 140 ievent->func = func; 141#define isc__nm_uvreq_get(req, sock) \ 142 isc___nm_uvreq_get(req, sock, __FILE__, __LINE__, __func__) 143#define isc__nm_uvreq_put(req, sock) \ 144 isc___nm_uvreq_put(req, sock, __FILE__, __LINE__, __func__) 145#define isc__nmsocket_init(sock, mgr, type, iface) \ 146 isc___nmsocket_init(sock, mgr, type, iface, __FILE__, __LINE__, \ 147 __func__) 148#define isc__nmsocket_put(sockp) \ 149 isc___nmsocket_put(sockp, __FILE__, __LINE__, __func__) 150#define isc__nmsocket_attach(sock, target) \ 151 isc___nmsocket_attach(sock, target, __FILE__, __LINE__, __func__) 152#define isc__nmsocket_detach(socketp) \ 153 isc___nmsocket_detach(socketp, __FILE__, __LINE__, __func__) 154#define isc__nmsocket_close(socketp) \ 155 isc___nmsocket_close(socketp, __FILE__, __LINE__, __func__) 156#define isc__nmhandle_get(sock, peer, local) \ 157 isc___nmhandle_get(sock, peer, local, __FILE__, __LINE__, __func__) 158#define isc__nmsocket_prep_destroy(sock) \ 159 isc___nmsocket_prep_destroy(sock, __FILE__, __LINE__, __func__) 160#else 161#define NETMGR_TRACE_LOG(format, ...) 162 163#define FLARG_PASS 164#define FLARG 165#define FLARG_IEVENT(ievent) 166#define FLARG_IEVENT_PASS(ievent) 167#define isc__nm_uvreq_get(req, sock) isc___nm_uvreq_get(req, sock) 168#define isc__nm_uvreq_put(req, sock) isc___nm_uvreq_put(req, sock) 169#define isc__nmsocket_init(sock, mgr, type, iface) \ 170 isc___nmsocket_init(sock, mgr, type, iface) 171#define isc__nmsocket_put(sockp) isc___nmsocket_put(sockp) 172#define isc__nmsocket_attach(sock, target) isc___nmsocket_attach(sock, target) 173#define isc__nmsocket_detach(socketp) isc___nmsocket_detach(socketp) 174#define isc__nmsocket_close(socketp) isc___nmsocket_close(socketp) 175#define isc__nmhandle_get(sock, peer, local) \ 176 isc___nmhandle_get(sock, peer, local) 177#define isc__nmsocket_prep_destroy(sock) isc___nmsocket_prep_destroy(sock) 178#endif 179 180/* 181 * Queue types in the order of processing priority. 182 */ 183typedef enum { 184 NETIEVENT_PRIORITY = 0, 185 NETIEVENT_PRIVILEGED = 1, 186 NETIEVENT_TASK = 2, 187 NETIEVENT_NORMAL = 3, 188 NETIEVENT_MAX = 4, 189} netievent_type_t; 190 191typedef struct isc__nm_uvreq isc__nm_uvreq_t; 192typedef struct isc__netievent isc__netievent_t; 193 194typedef ISC_LIST(isc__netievent_t) isc__netievent_list_t; 195 196typedef struct ievent { 197 isc_mutex_t lock; 198 isc_condition_t cond; 199 isc__netievent_list_t list; 200} ievent_t; 201 202/* 203 * Single network event loop worker. 204 */ 205typedef struct isc__networker { 206 isc_nm_t *mgr; 207 int id; /* thread id */ 208 uv_loop_t loop; /* libuv loop structure */ 209 uv_async_t async; /* async channel to send 210 * data to this networker */ 211 bool paused; 212 bool finished; 213 isc_thread_t thread; 214 ievent_t ievents[NETIEVENT_MAX]; 215 216 isc_refcount_t references; 217 atomic_int_fast64_t pktcount; 218 char *recvbuf; 219 char *sendbuf; 220 bool recvbuf_inuse; 221} isc__networker_t; 222 223/* 224 * A general handle for a connection bound to a networker. For UDP 225 * connections we have peer address here, so both TCP and UDP can be 226 * handled with a simple send-like function 227 */ 228#define NMHANDLE_MAGIC ISC_MAGIC('N', 'M', 'H', 'D') 229#define VALID_NMHANDLE(t) \ 230 (ISC_MAGIC_VALID(t, NMHANDLE_MAGIC) && \ 231 atomic_load(&(t)->references) > 0) 232 233typedef void (*isc__nm_closecb)(isc_nmhandle_t *); 234 235struct isc_nmhandle { 236 int magic; 237 isc_refcount_t references; 238 239 /* 240 * The socket is not 'attached' in the traditional 241 * reference-counting sense. Instead, we keep all handles in an 242 * array in the socket object. This way, we don't have circular 243 * dependencies and we can close all handles when we're destroying 244 * the socket. 245 */ 246 isc_nmsocket_t *sock; 247 248 isc_sockaddr_t peer; 249 isc_sockaddr_t local; 250 isc_nm_opaquecb_t doreset; /* reset extra callback, external */ 251 isc_nm_opaquecb_t dofree; /* free extra callback, external */ 252#ifdef NETMGR_TRACE 253 void *backtrace[TRACE_SIZE]; 254 int backtrace_size; 255 LINK(isc_nmhandle_t) active_link; 256#endif 257 void *opaque; 258 char extra[]; 259}; 260 261typedef enum isc__netievent_type { 262 netievent_udpconnect, 263 netievent_udpclose, 264 netievent_udpsend, 265 netievent_udpread, 266 netievent_udpcancel, 267 268 netievent_tcpconnect, 269 netievent_tcpclose, 270 netievent_tcpsend, 271 netievent_tcpstartread, 272 netievent_tcppauseread, 273 netievent_tcpaccept, 274 netievent_tcpcancel, 275 276 netievent_tcpdnsaccept, 277 netievent_tcpdnsconnect, 278 netievent_tcpdnsclose, 279 netievent_tcpdnssend, 280 netievent_tcpdnsread, 281 netievent_tcpdnscancel, 282 283 netievent_shutdown, 284 netievent_stop, 285 netievent_pause, 286 287 netievent_connectcb, 288 netievent_readcb, 289 netievent_sendcb, 290 291 netievent_task, 292 netievent_privilegedtask, 293 294 /* 295 * event type values higher than this will be treated 296 * as high-priority events, which can be processed 297 * while the netmgr is pausing or paused. 298 */ 299 netievent_prio = 0xff, 300 301 netievent_udplisten, 302 netievent_udpstop, 303 netievent_tcplisten, 304 netievent_tcpstop, 305 netievent_tcpdnslisten, 306 netievent_tcpdnsstop, 307 308 netievent_resume, 309 netievent_detach, 310 netievent_close, 311} isc__netievent_type; 312 313typedef union { 314 isc_nm_recv_cb_t recv; 315 isc_nm_cb_t send; 316 isc_nm_cb_t connect; 317 isc_nm_accept_cb_t accept; 318} isc__nm_cb_t; 319 320/* 321 * Wrapper around uv_req_t with 'our' fields in it. req->data should 322 * always point to its parent. Note that we always allocate more than 323 * sizeof(struct) because we make room for different req types; 324 */ 325#define UVREQ_MAGIC ISC_MAGIC('N', 'M', 'U', 'R') 326#define VALID_UVREQ(t) ISC_MAGIC_VALID(t, UVREQ_MAGIC) 327 328struct isc__nm_uvreq { 329 int magic; 330 isc_nmsocket_t *sock; 331 isc_nmhandle_t *handle; 332 char tcplen[2]; /* The TCP DNS message length */ 333 uv_buf_t uvbuf; /* translated isc_region_t, to be 334 * sent or received */ 335 isc_sockaddr_t local; /* local address */ 336 isc_sockaddr_t peer; /* peer address */ 337 isc__nm_cb_t cb; /* callback */ 338 void *cbarg; /* callback argument */ 339 isc_nm_timer_t *timer; /* TCP write timer */ 340 341 union { 342 uv_handle_t handle; 343 uv_req_t req; 344 uv_getaddrinfo_t getaddrinfo; 345 uv_getnameinfo_t getnameinfo; 346 uv_shutdown_t shutdown; 347 uv_write_t write; 348 uv_connect_t connect; 349 uv_udp_send_t udp_send; 350 uv_fs_t fs; 351 uv_work_t work; 352 } uv_req; 353 ISC_LINK(isc__nm_uvreq_t) link; 354}; 355 356struct isc_nm_timer { 357 isc_refcount_t references; 358 uv_timer_t timer; 359 isc_nmhandle_t *handle; 360 isc_nm_timer_cb cb; 361 void *cbarg; 362}; 363 364void * 365isc__nm_get_netievent(isc_nm_t *mgr, isc__netievent_type type); 366/*%< 367 * Allocate an ievent and set the type. 368 */ 369void 370isc__nm_put_netievent(isc_nm_t *mgr, void *ievent); 371 372/* 373 * The macros here are used to simulate the "inheritance" in C, there's the base 374 * netievent structure that contains just its own type and socket, and there are 375 * extended netievent types that also have handles or requests or other data. 376 * 377 * The macros here ensure that: 378 * 379 * 1. every netievent type has matching definition, declaration and 380 * implementation 381 * 382 * 2. we handle all the netievent types of same subclass the same, e.g. if the 383 * extended netievent contains handle, we always attach to the handle in 384 * the ctor and detach from the handle in dtor. 385 * 386 * There are three macros here for each netievent subclass: 387 * 388 * 1. NETIEVENT_*_TYPE(type) creates the typedef for each type; used below in 389 * this header 390 * 391 * 2. NETIEVENT_*_DECL(type) generates the declaration of the get and put 392 * functions (isc__nm_get_netievent_* and isc__nm_put_netievent_*); used 393 * below in this header 394 * 395 * 3. NETIEVENT_*_DEF(type) generates the definition of the functions; used 396 * either in netmgr.c or matching protocol file (e.g. udp.c, tcp.c, etc.) 397 */ 398 399#define NETIEVENT__SOCKET \ 400 isc__netievent_type type; \ 401 ISC_LINK(isc__netievent_t) link; \ 402 isc_nmsocket_t *sock; \ 403 const char *file; \ 404 unsigned int line; \ 405 const char *func 406 407typedef struct isc__netievent__socket { 408 NETIEVENT__SOCKET; 409} isc__netievent__socket_t; 410 411#define NETIEVENT_SOCKET_TYPE(type) \ 412 typedef isc__netievent__socket_t isc__netievent_##type##_t 413 414#define NETIEVENT_SOCKET_DECL(type) \ 415 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 416 isc_nm_t *nm, isc_nmsocket_t *sock); \ 417 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 418 isc__netievent_##type##_t *ievent) 419 420#define NETIEVENT_SOCKET_DEF(type) \ 421 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 422 isc_nm_t *nm, isc_nmsocket_t *sock) { \ 423 isc__netievent_##type##_t *ievent = \ 424 isc__nm_get_netievent(nm, netievent_##type); \ 425 isc__nmsocket_attach(sock, &ievent->sock); \ 426 \ 427 return (ievent); \ 428 } \ 429 \ 430 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 431 isc__netievent_##type##_t *ievent) { \ 432 isc__nmsocket_detach(&ievent->sock); \ 433 isc__nm_put_netievent(nm, ievent); \ 434 } 435 436typedef struct isc__netievent__socket_req { 437 NETIEVENT__SOCKET; 438 isc__nm_uvreq_t *req; 439} isc__netievent__socket_req_t; 440 441#define NETIEVENT_SOCKET_REQ_TYPE(type) \ 442 typedef isc__netievent__socket_req_t isc__netievent_##type##_t 443 444#define NETIEVENT_SOCKET_REQ_DECL(type) \ 445 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 446 isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req); \ 447 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 448 isc__netievent_##type##_t *ievent) 449 450#define NETIEVENT_SOCKET_REQ_DEF(type) \ 451 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 452 isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req) { \ 453 isc__netievent_##type##_t *ievent = \ 454 isc__nm_get_netievent(nm, netievent_##type); \ 455 isc__nmsocket_attach(sock, &ievent->sock); \ 456 ievent->req = req; \ 457 \ 458 return (ievent); \ 459 } \ 460 \ 461 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 462 isc__netievent_##type##_t *ievent) { \ 463 isc__nmsocket_detach(&ievent->sock); \ 464 isc__nm_put_netievent(nm, ievent); \ 465 } 466 467typedef struct isc__netievent__socket_req_result { 468 NETIEVENT__SOCKET; 469 isc__nm_uvreq_t *req; 470 isc_result_t result; 471} isc__netievent__socket_req_result_t; 472 473#define NETIEVENT_SOCKET_REQ_RESULT_TYPE(type) \ 474 typedef isc__netievent__socket_req_result_t isc__netievent_##type##_t 475 476#define NETIEVENT_SOCKET_REQ_RESULT_DECL(type) \ 477 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 478 isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req, \ 479 isc_result_t result); \ 480 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 481 isc__netievent_##type##_t *ievent) 482 483#define NETIEVENT_SOCKET_REQ_RESULT_DEF(type) \ 484 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 485 isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req, \ 486 isc_result_t result) { \ 487 isc__netievent_##type##_t *ievent = \ 488 isc__nm_get_netievent(nm, netievent_##type); \ 489 isc__nmsocket_attach(sock, &ievent->sock); \ 490 ievent->req = req; \ 491 ievent->result = result; \ 492 \ 493 return (ievent); \ 494 } \ 495 \ 496 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 497 isc__netievent_##type##_t *ievent) { \ 498 isc__nmsocket_detach(&ievent->sock); \ 499 isc__nm_put_netievent(nm, ievent); \ 500 } 501 502typedef struct isc__netievent__socket_handle { 503 NETIEVENT__SOCKET; 504 isc_nmhandle_t *handle; 505} isc__netievent__socket_handle_t; 506 507#define NETIEVENT_SOCKET_HANDLE_TYPE(type) \ 508 typedef isc__netievent__socket_handle_t isc__netievent_##type##_t 509 510#define NETIEVENT_SOCKET_HANDLE_DECL(type) \ 511 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 512 isc_nm_t *nm, isc_nmsocket_t *sock, isc_nmhandle_t *handle); \ 513 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 514 isc__netievent_##type##_t *ievent) 515 516#define NETIEVENT_SOCKET_HANDLE_DEF(type) \ 517 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 518 isc_nm_t *nm, isc_nmsocket_t *sock, isc_nmhandle_t *handle) { \ 519 isc__netievent_##type##_t *ievent = \ 520 isc__nm_get_netievent(nm, netievent_##type); \ 521 isc__nmsocket_attach(sock, &ievent->sock); \ 522 isc_nmhandle_attach(handle, &ievent->handle); \ 523 \ 524 return (ievent); \ 525 } \ 526 \ 527 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 528 isc__netievent_##type##_t *ievent) { \ 529 isc__nmsocket_detach(&ievent->sock); \ 530 isc_nmhandle_detach(&ievent->handle); \ 531 isc__nm_put_netievent(nm, ievent); \ 532 } 533 534typedef struct isc__netievent__socket_quota { 535 NETIEVENT__SOCKET; 536 isc_quota_t *quota; 537} isc__netievent__socket_quota_t; 538 539#define NETIEVENT_SOCKET_QUOTA_TYPE(type) \ 540 typedef isc__netievent__socket_quota_t isc__netievent_##type##_t 541 542#define NETIEVENT_SOCKET_QUOTA_DECL(type) \ 543 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 544 isc_nm_t *nm, isc_nmsocket_t *sock, isc_quota_t *quota); \ 545 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 546 isc__netievent_##type##_t *ievent) 547 548#define NETIEVENT_SOCKET_QUOTA_DEF(type) \ 549 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 550 isc_nm_t *nm, isc_nmsocket_t *sock, isc_quota_t *quota) { \ 551 isc__netievent_##type##_t *ievent = \ 552 isc__nm_get_netievent(nm, netievent_##type); \ 553 isc__nmsocket_attach(sock, &ievent->sock); \ 554 ievent->quota = quota; \ 555 \ 556 return (ievent); \ 557 } \ 558 \ 559 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 560 isc__netievent_##type##_t *ievent) { \ 561 isc__nmsocket_detach(&ievent->sock); \ 562 isc__nm_put_netievent(nm, ievent); \ 563 } 564 565typedef struct isc__netievent__task { 566 isc__netievent_type type; 567 ISC_LINK(isc__netievent_t) link; 568 isc_task_t *task; 569} isc__netievent__task_t; 570 571#define NETIEVENT_TASK_TYPE(type) \ 572 typedef isc__netievent__task_t isc__netievent_##type##_t; 573 574#define NETIEVENT_TASK_DECL(type) \ 575 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 576 isc_nm_t *nm, isc_task_t *task); \ 577 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 578 isc__netievent_##type##_t *ievent); 579 580#define NETIEVENT_TASK_DEF(type) \ 581 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 582 isc_nm_t *nm, isc_task_t *task) { \ 583 isc__netievent_##type##_t *ievent = \ 584 isc__nm_get_netievent(nm, netievent_##type); \ 585 ievent->task = task; \ 586 \ 587 return (ievent); \ 588 } \ 589 \ 590 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 591 isc__netievent_##type##_t *ievent) { \ 592 ievent->task = NULL; \ 593 isc__nm_put_netievent(nm, ievent); \ 594 } 595 596typedef struct isc__netievent_udpsend { 597 NETIEVENT__SOCKET; 598 isc_sockaddr_t peer; 599 isc__nm_uvreq_t *req; 600} isc__netievent_udpsend_t; 601 602struct isc__netievent { 603 isc__netievent_type type; 604 ISC_LINK(isc__netievent_t) link; 605}; 606 607#define NETIEVENT_TYPE(type) typedef isc__netievent_t isc__netievent_##type##_t 608 609#define NETIEVENT_DECL(type) \ 610 isc__netievent_##type##_t *isc__nm_get_netievent_##type(isc_nm_t *nm); \ 611 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 612 isc__netievent_##type##_t *ievent) 613 614#define NETIEVENT_DEF(type) \ 615 isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ 616 isc_nm_t *nm) { \ 617 isc__netievent_##type##_t *ievent = \ 618 isc__nm_get_netievent(nm, netievent_##type); \ 619 \ 620 return (ievent); \ 621 } \ 622 \ 623 void isc__nm_put_netievent_##type(isc_nm_t *nm, \ 624 isc__netievent_##type##_t *ievent) { \ 625 isc__nm_put_netievent(nm, ievent); \ 626 } 627 628typedef union { 629 isc__netievent_t ni; 630 isc__netievent__socket_t nis; 631 isc__netievent__socket_req_t nisr; 632 isc__netievent_udpsend_t nius; 633 isc__netievent__socket_quota_t nisq; 634} isc__netievent_storage_t; 635 636/* 637 * Work item for a uv_work threadpool. 638 */ 639typedef struct isc__nm_work { 640 isc_nm_t *netmgr; 641 uv_work_t req; 642 isc_nm_workcb_t cb; 643 isc_nm_after_workcb_t after_cb; 644 void *data; 645} isc__nm_work_t; 646 647/* 648 * Network manager 649 */ 650#define NM_MAGIC ISC_MAGIC('N', 'E', 'T', 'M') 651#define VALID_NM(t) ISC_MAGIC_VALID(t, NM_MAGIC) 652 653struct isc_nm { 654 int magic; 655 isc_refcount_t references; 656 isc_mem_t *mctx; 657 int nworkers; 658 int nlisteners; 659 isc_mutex_t lock; 660 isc_condition_t wkstatecond; 661 isc_condition_t wkpausecond; 662 isc__networker_t *workers; 663 664 isc_stats_t *stats; 665 666 uint_fast32_t workers_running; 667 atomic_uint_fast32_t workers_paused; 668 atomic_uint_fast32_t maxudp; 669 670 bool load_balance_sockets; 671 672 atomic_bool paused; 673 674 /* 675 * Active connections are being closed and new connections are 676 * no longer allowed. 677 */ 678 atomic_bool closing; 679 680 /* 681 * A worker is actively waiting for other workers, for example to 682 * stop listening; that means no other thread can do the same thing 683 * or pause, or we'll deadlock. We have to either re-enqueue our 684 * event or wait for the other one to finish if we want to pause. 685 */ 686 atomic_int interlocked; 687 688 /* 689 * Timeout values for TCP connections, corresponding to 690 * tcp-intiial-timeout, tcp-idle-timeout, tcp-keepalive-timeout, 691 * and tcp-advertised-timeout. Note that these are stored in 692 * milliseconds so they can be used directly with the libuv timer, 693 * but they are configured in tenths of seconds. 694 */ 695 atomic_uint_fast32_t init; 696 atomic_uint_fast32_t idle; 697 atomic_uint_fast32_t keepalive; 698 atomic_uint_fast32_t advertised; 699 700 isc_barrier_t pausing; 701 isc_barrier_t resuming; 702 703#ifdef NETMGR_TRACE 704 ISC_LIST(isc_nmsocket_t) active_sockets; 705#endif 706}; 707 708typedef enum isc_nmsocket_type { 709 isc_nm_udpsocket, 710 isc_nm_udplistener, /* Aggregate of nm_udpsocks */ 711 isc_nm_tcpsocket, 712 isc_nm_tcplistener, 713 isc_nm_tcpdnslistener, 714 isc_nm_tcpdnssocket, 715} isc_nmsocket_type; 716 717/*% 718 * A universal structure for either a single socket or a group of 719 * dup'd/SO_REUSE_PORT-using sockets listening on the same interface. 720 */ 721#define NMSOCK_MAGIC ISC_MAGIC('N', 'M', 'S', 'K') 722#define VALID_NMSOCK(t) ISC_MAGIC_VALID(t, NMSOCK_MAGIC) 723 724/*% 725 * Index into socket stat counter arrays. 726 */ 727enum { 728 STATID_OPEN = 0, 729 STATID_OPENFAIL = 1, 730 STATID_CLOSE = 2, 731 STATID_BINDFAIL = 3, 732 STATID_CONNECTFAIL = 4, 733 STATID_CONNECT = 5, 734 STATID_ACCEPTFAIL = 6, 735 STATID_ACCEPT = 7, 736 STATID_SENDFAIL = 8, 737 STATID_RECVFAIL = 9, 738 STATID_ACTIVE = 10 739}; 740 741typedef void (*isc_nm_closehandlecb_t)(void *arg); 742/*%< 743 * Opaque callback function, used for isc_nmhandle 'reset' and 'free' 744 * callbacks. 745 */ 746 747struct isc_nmsocket { 748 /*% Unlocked, RO */ 749 int magic; 750 int tid; 751 isc_nmsocket_type type; 752 isc_nm_t *mgr; 753 754 /*% Parent socket for multithreaded listeners */ 755 isc_nmsocket_t *parent; 756 /*% Listener socket this connection was accepted on */ 757 isc_nmsocket_t *listener; 758 /*% Self socket */ 759 isc_nmsocket_t *self; 760 761 isc_barrier_t startlistening; 762 isc_barrier_t stoplistening; 763 764 /*% 765 * quota is the TCP client, attached when a TCP connection 766 * is established. pquota is a non-attached pointer to the 767 * TCP client quota, stored in listening sockets but only 768 * attached in connected sockets. 769 */ 770 isc_quota_t *quota; 771 isc_quota_t *pquota; 772 isc_quota_cb_t quotacb; 773 774 /*% 775 * Socket statistics 776 */ 777 const isc_statscounter_t *statsindex; 778 779 /*% 780 * TCP read/connect timeout timers. 781 */ 782 uv_timer_t read_timer; 783 uint64_t read_timeout; 784 uint64_t connect_timeout; 785 786 /*% 787 * TCP write timeout timer. 788 */ 789 uint64_t write_timeout; 790 791 /*% outer socket is for 'wrapped' sockets - e.g. tcpdns in tcp */ 792 isc_nmsocket_t *outer; 793 794 /*% server socket for connections */ 795 isc_nmsocket_t *server; 796 797 /*% Child sockets for multi-socket setups */ 798 isc_nmsocket_t *children; 799 uint_fast32_t nchildren; 800 isc_sockaddr_t iface; 801 isc_nmhandle_t *statichandle; 802 isc_nmhandle_t *outerhandle; 803 804 /*% Extra data allocated at the end of each isc_nmhandle_t */ 805 size_t extrahandlesize; 806 807 /*% TCP backlog */ 808 int backlog; 809 810 /*% libuv data */ 811 uv_os_sock_t fd; 812 union uv_any_handle uv_handle; 813 814 /*% Peer address */ 815 isc_sockaddr_t peer; 816 817 /* Atomic */ 818 /*% Number of running (e.g. listening) child sockets */ 819 atomic_uint_fast32_t rchildren; 820 821 /*% 822 * Socket is active if it's listening, working, etc. If it's 823 * closing, then it doesn't make a sense, for example, to 824 * push handles or reqs for reuse. 825 */ 826 atomic_bool active; 827 atomic_bool destroying; 828 829 /*% 830 * Socket is closed if it's not active and all the possible 831 * callbacks were fired, there are no active handles, etc. 832 * If active==false but closed==false, that means the socket 833 * is closing. 834 */ 835 atomic_bool closing; 836 atomic_bool closed; 837 atomic_bool listening; 838 atomic_bool connecting; 839 atomic_bool connected; 840 bool accepting; 841 bool reading; 842 atomic_bool timedout; 843 isc_refcount_t references; 844 845 /*% 846 * Established an outgoing connection, as client not server. 847 */ 848 atomic_bool client; 849 850 /*% 851 * TCPDNS socket has been set not to pipeline. 852 */ 853 atomic_bool sequential; 854 855 /*% 856 * The socket is processing read callback, this is guard to not read 857 * data before the readcb is back. 858 */ 859 bool processing; 860 861 /*% 862 * A TCP socket has had isc_nm_pauseread() called. 863 */ 864 atomic_bool readpaused; 865 866 /*% 867 * A TCP or TCPDNS socket has been set to use the keepalive 868 * timeout instead of the default idle timeout. 869 */ 870 atomic_bool keepalive; 871 872 /*% 873 * 'spare' handles for that can be reused to avoid allocations, 874 * for UDP. 875 */ 876 isc_astack_t *inactivehandles; 877 isc_astack_t *inactivereqs; 878 879 /*% 880 * Used to wait for TCP listening events to complete, and 881 * for the number of running children to reach zero during 882 * shutdown. 883 * 884 * We use two condition variables to prevent the race where the netmgr 885 * threads would be able to finish and destroy the socket before it's 886 * unlocked by the isc_nm_listen<proto>() function. So, the flow is as 887 * follows: 888 * 889 * 1. parent thread creates all children sockets and passes then to 890 * netthreads, looks at the signaling variable and WAIT(cond) until 891 * the childrens are done initializing 892 * 893 * 2. the events get picked by netthreads, calls the libuv API (and 894 * either succeeds or fails) and WAIT(scond) until all other 895 * children sockets in netthreads are initialized and the listening 896 * socket lock is unlocked 897 * 898 * 3. the control is given back to the parent thread which now either 899 * returns success or shutdowns the listener if an error has 900 * occured in the children netthread 901 * 902 * NOTE: The other approach would be doing an extra attach to the parent 903 * listening socket, and then detach it in the parent thread, but that 904 * breaks the promise that once the libuv socket is initialized on the 905 * nmsocket, the nmsocket needs to be handled only by matching 906 * netthread, so in fact that would add a complexity in a way that 907 * isc__nmsocket_detach would have to be converted to use an 908 * asynchrounous netievent. 909 */ 910 isc_mutex_t lock; 911 isc_condition_t cond; 912 isc_condition_t scond; 913 914 /*% 915 * Used to pass a result back from listen or connect events. 916 */ 917 isc_result_t result; 918 919 /*% 920 * Current number of active handles. 921 */ 922 atomic_int_fast32_t ah; 923 924 /*% Buffer for TCPDNS processing */ 925 size_t buf_size; 926 size_t buf_len; 927 unsigned char *buf; 928 929 /*% 930 * This function will be called with handle->sock 931 * as the argument whenever a handle's references drop 932 * to zero, after its reset callback has been called. 933 */ 934 isc_nm_closehandlecb_t closehandle_cb; 935 936 isc_nmhandle_t *recv_handle; 937 isc_nm_recv_cb_t recv_cb; 938 void *recv_cbarg; 939 bool recv_read; 940 941 isc_nm_cb_t connect_cb; 942 void *connect_cbarg; 943 944 isc_nm_accept_cb_t accept_cb; 945 void *accept_cbarg; 946 947 atomic_int_fast32_t active_child_connections; 948 949#ifdef NETMGR_TRACE 950 void *backtrace[TRACE_SIZE]; 951 int backtrace_size; 952 LINK(isc_nmsocket_t) active_link; 953 ISC_LIST(isc_nmhandle_t) active_handles; 954#endif 955}; 956 957bool 958isc__nm_in_netthread(void); 959/*% 960 * Returns 'true' if we're in the network thread. 961 */ 962 963void 964isc__nm_maybe_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event); 965/*%< 966 * If the caller is already in the matching nmthread, process the netievent 967 * directly, if not enqueue using isc__nm_enqueue_ievent(). 968 */ 969 970void 971isc__nm_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event); 972/*%< 973 * Enqueue an ievent onto a specific worker queue. (This the only safe 974 * way to use an isc__networker_t from another thread.) 975 */ 976 977void 978isc__nm_free_uvbuf(isc_nmsocket_t *sock, const uv_buf_t *buf); 979/*%< 980 * Free a buffer allocated for a receive operation. 981 * 982 * Note that as currently implemented, this doesn't actually 983 * free anything, marks the isc__networker's UDP receive buffer 984 * as "not in use". 985 */ 986 987isc_nmhandle_t * 988isc___nmhandle_get(isc_nmsocket_t *sock, isc_sockaddr_t *peer, 989 isc_sockaddr_t *local FLARG); 990/*%< 991 * Get a handle for the socket 'sock', allocating a new one 992 * if there isn't one available in 'sock->inactivehandles'. 993 * 994 * If 'peer' is not NULL, set the handle's peer address to 'peer', 995 * otherwise set it to 'sock->peer'. 996 * 997 * If 'local' is not NULL, set the handle's local address to 'local', 998 * otherwise set it to 'sock->iface->addr'. 999 * 1000 * 'sock' will be attached to 'handle->sock'. The caller may need 1001 * to detach the socket afterward. 1002 */ 1003 1004isc__nm_uvreq_t * 1005isc___nm_uvreq_get(isc_nm_t *mgr, isc_nmsocket_t *sock FLARG); 1006/*%< 1007 * Get a UV request structure for the socket 'sock', allocating a 1008 * new one if there isn't one available in 'sock->inactivereqs'. 1009 */ 1010 1011void 1012isc___nm_uvreq_put(isc__nm_uvreq_t **req, isc_nmsocket_t *sock FLARG); 1013/*%< 1014 * Completes the use of a UV request structure, setting '*req' to NULL. 1015 * 1016 * The UV request is pushed onto the 'sock->inactivereqs' stack or, 1017 * if that doesn't work, freed. 1018 */ 1019 1020void 1021isc___nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, isc_nmsocket_type type, 1022 isc_sockaddr_t *iface FLARG); 1023/*%< 1024 * Initialize socket 'sock', attach it to 'mgr', and set it to type 'type' 1025 * and its interface to 'iface'. 1026 */ 1027 1028void 1029isc___nmsocket_attach(isc_nmsocket_t *sock, isc_nmsocket_t **target FLARG); 1030/*%< 1031 * Attach to a socket, increasing refcount 1032 */ 1033 1034void 1035isc___nmsocket_detach(isc_nmsocket_t **socketp FLARG); 1036/*%< 1037 * Detach from socket, decreasing refcount and possibly destroying the 1038 * socket if it's no longer referenced. 1039 */ 1040 1041void 1042isc___nmsocket_prep_destroy(isc_nmsocket_t *sock FLARG); 1043/*%< 1044 * Market 'sock' as inactive, close it if necessary, and destroy it 1045 * if there are no remaining references or active handles. 1046 */ 1047 1048void 1049isc__nmsocket_shutdown(isc_nmsocket_t *sock); 1050/*%< 1051 * Initiate the socket shutdown which actively calls the active 1052 * callbacks. 1053 */ 1054 1055bool 1056isc__nmsocket_active(isc_nmsocket_t *sock); 1057/*%< 1058 * Determine whether 'sock' is active by checking 'sock->active' 1059 * or, for child sockets, 'sock->parent->active'. 1060 */ 1061 1062bool 1063isc__nmsocket_deactivate(isc_nmsocket_t *sock); 1064/*%< 1065 * @brief Deactivate active socket 1066 * 1067 * Atomically deactive the socket by setting @p sock->active or, for child 1068 * sockets, @p sock->parent->active to @c false 1069 * 1070 * @param[in] sock - valid nmsocket 1071 * @return @c false if the socket was already inactive, @c true otherwise 1072 */ 1073 1074void 1075isc__nmsocket_clearcb(isc_nmsocket_t *sock); 1076/*%< 1077 * Clear the recv and accept callbacks in 'sock'. 1078 */ 1079 1080void 1081isc__nmsocket_timer_stop(isc_nmsocket_t *sock); 1082void 1083isc__nmsocket_timer_start(isc_nmsocket_t *sock); 1084void 1085isc__nmsocket_timer_restart(isc_nmsocket_t *sock); 1086bool 1087isc__nmsocket_timer_running(isc_nmsocket_t *sock); 1088/*%< 1089 * Start/stop/restart/check the timeout on the socket 1090 */ 1091 1092void 1093isc__nm_connectcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq, 1094 isc_result_t eresult, bool async); 1095 1096void 1097isc__nm_async_connectcb(isc__networker_t *worker, isc__netievent_t *ev0); 1098/*%< 1099 * Issue a connect callback on the socket, used to call the callback 1100 */ 1101 1102void 1103isc__nm_readcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq, 1104 isc_result_t eresult); 1105void 1106isc__nm_async_readcb(isc__networker_t *worker, isc__netievent_t *ev0); 1107 1108/*%< 1109 * Issue a read callback on the socket, used to call the callback 1110 * on failed conditions when the event can't be scheduled on the uv loop. 1111 * 1112 */ 1113 1114void 1115isc__nm_sendcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq, 1116 isc_result_t eresult, bool async); 1117void 1118isc__nm_async_sendcb(isc__networker_t *worker, isc__netievent_t *ev0); 1119/*%< 1120 * Issue a write callback on the socket, used to call the callback 1121 * on failed conditions when the event can't be scheduled on the uv loop. 1122 */ 1123 1124void 1125isc__nm_async_shutdown(isc__networker_t *worker, isc__netievent_t *ev0); 1126/*%< 1127 * Walk through all uv handles, get the underlying sockets and issue 1128 * close on them. 1129 */ 1130 1131void 1132isc__nm_udp_send(isc_nmhandle_t *handle, const isc_region_t *region, 1133 isc_nm_cb_t cb, void *cbarg); 1134/*%< 1135 * Back-end implementation of isc_nm_send() for UDP handles. 1136 */ 1137 1138void 1139isc__nm_udp_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg); 1140/* 1141 * Back-end implementation of isc_nm_read() for UDP handles. 1142 */ 1143 1144void 1145isc__nm_udp_close(isc_nmsocket_t *sock); 1146/*%< 1147 * Close a UDP socket. 1148 */ 1149 1150void 1151isc__nm_udp_cancelread(isc_nmhandle_t *handle); 1152/*%< 1153 * Stop reading on a connected UDP handle. 1154 */ 1155 1156void 1157isc__nm_udp_shutdown(isc_nmsocket_t *sock); 1158/*%< 1159 * Called during the shutdown process to close and clean up connected 1160 * sockets. 1161 */ 1162 1163void 1164isc__nm_udp_stoplistening(isc_nmsocket_t *sock); 1165/*%< 1166 * Stop listening on 'sock'. 1167 */ 1168 1169void 1170isc__nm_udp_settimeout(isc_nmhandle_t *handle, uint32_t timeout); 1171/*%< 1172 * Set or clear the recv timeout for the UDP socket associated with 'handle'. 1173 */ 1174 1175void 1176isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0); 1177void 1178isc__nm_async_udpconnect(isc__networker_t *worker, isc__netievent_t *ev0); 1179void 1180isc__nm_async_udpstop(isc__networker_t *worker, isc__netievent_t *ev0); 1181void 1182isc__nm_async_udpsend(isc__networker_t *worker, isc__netievent_t *ev0); 1183void 1184isc__nm_async_udpread(isc__networker_t *worker, isc__netievent_t *ev0); 1185void 1186isc__nm_async_udpcancel(isc__networker_t *worker, isc__netievent_t *ev0); 1187void 1188isc__nm_async_udpclose(isc__networker_t *worker, isc__netievent_t *ev0); 1189/*%< 1190 * Callback handlers for asynchronous UDP events (listen, stoplisten, send). 1191 */ 1192 1193void 1194isc__nm_tcp_send(isc_nmhandle_t *handle, const isc_region_t *region, 1195 isc_nm_cb_t cb, void *cbarg); 1196/*%< 1197 * Back-end implementation of isc_nm_send() for TCP handles. 1198 */ 1199 1200void 1201isc__nm_tcp_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg); 1202/* 1203 * Back-end implementation of isc_nm_read() for TCP handles. 1204 */ 1205 1206void 1207isc__nm_tcp_close(isc_nmsocket_t *sock); 1208/*%< 1209 * Close a TCP socket. 1210 */ 1211void 1212isc__nm_tcp_pauseread(isc_nmhandle_t *handle); 1213/*%< 1214 * Pause reading on this handle, while still remembering the callback. 1215 */ 1216 1217void 1218isc__nm_tcp_resumeread(isc_nmhandle_t *handle); 1219/*%< 1220 * Resume reading from socket. 1221 * 1222 */ 1223 1224void 1225isc__nm_tcp_shutdown(isc_nmsocket_t *sock); 1226/*%< 1227 * Called during the shutdown process to close and clean up connected 1228 * sockets. 1229 */ 1230 1231void 1232isc__nm_tcp_cancelread(isc_nmhandle_t *handle); 1233/*%< 1234 * Stop reading on a connected TCP handle. 1235 */ 1236 1237void 1238isc__nm_tcp_stoplistening(isc_nmsocket_t *sock); 1239/*%< 1240 * Stop listening on 'sock'. 1241 */ 1242 1243int_fast32_t 1244isc__nm_tcp_listener_nactive(isc_nmsocket_t *sock); 1245/*%< 1246 * Returns the number of active connections for the TCP listener socket. 1247 */ 1248 1249void 1250isc__nm_tcp_settimeout(isc_nmhandle_t *handle, uint32_t timeout); 1251/*%< 1252 * Set the read timeout for the TCP socket associated with 'handle'. 1253 */ 1254 1255void 1256isc__nm_async_tcpconnect(isc__networker_t *worker, isc__netievent_t *ev0); 1257void 1258isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0); 1259void 1260isc__nm_async_tcpaccept(isc__networker_t *worker, isc__netievent_t *ev0); 1261void 1262isc__nm_async_tcpstop(isc__networker_t *worker, isc__netievent_t *ev0); 1263void 1264isc__nm_async_tcpsend(isc__networker_t *worker, isc__netievent_t *ev0); 1265void 1266isc__nm_async_startread(isc__networker_t *worker, isc__netievent_t *ev0); 1267void 1268isc__nm_async_pauseread(isc__networker_t *worker, isc__netievent_t *ev0); 1269void 1270isc__nm_async_tcpstartread(isc__networker_t *worker, isc__netievent_t *ev0); 1271void 1272isc__nm_async_tcppauseread(isc__networker_t *worker, isc__netievent_t *ev0); 1273void 1274isc__nm_async_tcpcancel(isc__networker_t *worker, isc__netievent_t *ev0); 1275void 1276isc__nm_async_tcpclose(isc__networker_t *worker, isc__netievent_t *ev0); 1277/*%< 1278 * Callback handlers for asynchronous TCP events (connect, listen, 1279 * stoplisten, send, read, pause, close). 1280 */ 1281 1282void 1283isc__nm_async_tcpdnsaccept(isc__networker_t *worker, isc__netievent_t *ev0); 1284void 1285isc__nm_async_tcpdnsconnect(isc__networker_t *worker, isc__netievent_t *ev0); 1286void 1287isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0); 1288 1289void 1290isc__nm_tcpdns_send(isc_nmhandle_t *handle, isc_region_t *region, 1291 isc_nm_cb_t cb, void *cbarg); 1292/*%< 1293 * Back-end implementation of isc_nm_send() for TCPDNS handles. 1294 */ 1295 1296void 1297isc__nm_tcpdns_shutdown(isc_nmsocket_t *sock); 1298 1299void 1300isc__nm_tcpdns_close(isc_nmsocket_t *sock); 1301/*%< 1302 * Close a TCPDNS socket. 1303 */ 1304 1305void 1306isc__nm_tcpdns_stoplistening(isc_nmsocket_t *sock); 1307/*%< 1308 * Stop listening on 'sock'. 1309 */ 1310 1311void 1312isc__nm_tcpdns_settimeout(isc_nmhandle_t *handle, uint32_t timeout); 1313/*%< 1314 * Set the read timeout and reset the timer for the TCPDNS socket 1315 * associated with 'handle', and the TCP socket it wraps around. 1316 */ 1317 1318void 1319isc__nm_async_tcpdnsaccept(isc__networker_t *worker, isc__netievent_t *ev0); 1320void 1321isc__nm_async_tcpdnsconnect(isc__networker_t *worker, isc__netievent_t *ev0); 1322void 1323isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0); 1324void 1325isc__nm_async_tcpdnscancel(isc__networker_t *worker, isc__netievent_t *ev0); 1326void 1327isc__nm_async_tcpdnsclose(isc__networker_t *worker, isc__netievent_t *ev0); 1328void 1329isc__nm_async_tcpdnssend(isc__networker_t *worker, isc__netievent_t *ev0); 1330void 1331isc__nm_async_tcpdnsstop(isc__networker_t *worker, isc__netievent_t *ev0); 1332void 1333isc__nm_async_tcpdnsread(isc__networker_t *worker, isc__netievent_t *ev0); 1334/*%< 1335 * Callback handlers for asynchronous TCPDNS events. 1336 */ 1337 1338void 1339isc__nm_tcpdns_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg); 1340/* 1341 * Back-end implementation of isc_nm_read() for TCPDNS handles. 1342 */ 1343 1344void 1345isc__nm_tcpdns_cancelread(isc_nmhandle_t *handle); 1346/*%< 1347 * Stop reading on a connected TCPDNS handle. 1348 */ 1349 1350#define isc__nm_uverr2result(x) \ 1351 isc___nm_uverr2result(x, true, __FILE__, __LINE__, __func__) 1352isc_result_t 1353isc___nm_uverr2result(int uverr, bool dolog, const char *file, 1354 unsigned int line, const char *func); 1355/*%< 1356 * Convert a libuv error value into an isc_result_t. The 1357 * list of supported error values is not complete; new users 1358 * of this function should add any expected errors that are 1359 * not already there. 1360 */ 1361 1362bool 1363isc__nm_acquire_interlocked(isc_nm_t *mgr); 1364/*%< 1365 * Try to acquire interlocked state; return true if successful. 1366 */ 1367 1368void 1369isc__nm_drop_interlocked(isc_nm_t *mgr); 1370/*%< 1371 * Drop interlocked state; signal waiters. 1372 */ 1373 1374void 1375isc__nm_acquire_interlocked_force(isc_nm_t *mgr); 1376/*%< 1377 * Actively wait for interlocked state. 1378 */ 1379 1380void 1381isc__nm_incstats(isc_nm_t *mgr, isc_statscounter_t counterid); 1382/*%< 1383 * Increment socket-related statistics counters. 1384 */ 1385 1386void 1387isc__nm_decstats(isc_nm_t *mgr, isc_statscounter_t counterid); 1388/*%< 1389 * Decrement socket-related statistics counters. 1390 */ 1391 1392isc_result_t 1393isc__nm_socket(int domain, int type, int protocol, uv_os_sock_t *sockp); 1394/*%< 1395 * Platform independent socket() version 1396 */ 1397 1398void 1399isc__nm_closesocket(uv_os_sock_t sock); 1400/*%< 1401 * Platform independent closesocket() version 1402 */ 1403 1404isc_result_t 1405isc__nm_socket_freebind(uv_os_sock_t fd, sa_family_t sa_family); 1406/*%< 1407 * Set the IP_FREEBIND (or equivalent) socket option on the uv_handle 1408 */ 1409 1410isc_result_t 1411isc__nm_socket_reuse(uv_os_sock_t fd); 1412/*%< 1413 * Set the SO_REUSEADDR or SO_REUSEPORT (or equivalent) socket option on the fd 1414 */ 1415 1416isc_result_t 1417isc__nm_socket_reuse_lb(uv_os_sock_t fd); 1418/*%< 1419 * Set the SO_REUSEPORT_LB (or equivalent) socket option on the fd 1420 */ 1421 1422isc_result_t 1423isc__nm_socket_incoming_cpu(uv_os_sock_t fd); 1424/*%< 1425 * Set the SO_INCOMING_CPU socket option on the fd if available 1426 */ 1427 1428isc_result_t 1429isc__nm_socket_disable_pmtud(uv_os_sock_t fd, sa_family_t sa_family); 1430/*%< 1431 * Disable the Path MTU Discovery, either by disabling IP(V6)_DONTFRAG socket 1432 * option, or setting the IP(V6)_MTU_DISCOVER socket option to IP_PMTUDISC_OMIT 1433 */ 1434 1435isc_result_t 1436isc__nm_socket_connectiontimeout(uv_os_sock_t fd, int timeout_ms); 1437/*%< 1438 * Set the connection timeout in milliseconds, on non-Linux platforms, 1439 * the minimum value must be at least 1000 (1 second). 1440 */ 1441 1442isc_result_t 1443isc__nm_socket_tcp_nodelay(uv_os_sock_t fd); 1444/*%< 1445 * Disables Nagle's algorithm on a TCP socket (sets TCP_NODELAY). 1446 */ 1447 1448/* 1449 * typedef all the netievent types 1450 */ 1451 1452NETIEVENT_SOCKET_TYPE(close); 1453NETIEVENT_SOCKET_TYPE(tcpclose); 1454NETIEVENT_SOCKET_TYPE(tcplisten); 1455NETIEVENT_SOCKET_TYPE(tcppauseread); 1456NETIEVENT_SOCKET_TYPE(tcpstop); 1457NETIEVENT_SOCKET_TYPE(udpclose); 1458NETIEVENT_SOCKET_TYPE(udplisten); 1459NETIEVENT_SOCKET_TYPE(udpread); 1460/* NETIEVENT_SOCKET_TYPE(udpsend); */ /* unique type, defined independently */ 1461NETIEVENT_SOCKET_TYPE(udpstop); 1462 1463NETIEVENT_SOCKET_TYPE(tcpdnsclose); 1464NETIEVENT_SOCKET_TYPE(tcpdnsread); 1465NETIEVENT_SOCKET_TYPE(tcpdnsstop); 1466NETIEVENT_SOCKET_TYPE(tcpdnslisten); 1467NETIEVENT_SOCKET_REQ_TYPE(tcpdnsconnect); 1468NETIEVENT_SOCKET_REQ_TYPE(tcpdnssend); 1469NETIEVENT_SOCKET_HANDLE_TYPE(tcpdnscancel); 1470NETIEVENT_SOCKET_QUOTA_TYPE(tcpdnsaccept); 1471 1472NETIEVENT_SOCKET_REQ_TYPE(tcpconnect); 1473NETIEVENT_SOCKET_REQ_TYPE(tcpsend); 1474NETIEVENT_SOCKET_TYPE(tcpstartread); 1475NETIEVENT_SOCKET_REQ_TYPE(udpconnect); 1476 1477NETIEVENT_SOCKET_REQ_RESULT_TYPE(connectcb); 1478NETIEVENT_SOCKET_REQ_RESULT_TYPE(readcb); 1479NETIEVENT_SOCKET_REQ_RESULT_TYPE(sendcb); 1480 1481NETIEVENT_SOCKET_HANDLE_TYPE(detach); 1482NETIEVENT_SOCKET_HANDLE_TYPE(tcpcancel); 1483NETIEVENT_SOCKET_HANDLE_TYPE(udpcancel); 1484 1485NETIEVENT_SOCKET_QUOTA_TYPE(tcpaccept); 1486 1487NETIEVENT_TYPE(pause); 1488NETIEVENT_TYPE(resume); 1489NETIEVENT_TYPE(shutdown); 1490NETIEVENT_TYPE(stop); 1491 1492NETIEVENT_TASK_TYPE(task); 1493NETIEVENT_TASK_TYPE(privilegedtask); 1494 1495/* Now declared the helper functions */ 1496 1497NETIEVENT_SOCKET_DECL(close); 1498NETIEVENT_SOCKET_DECL(tcpclose); 1499NETIEVENT_SOCKET_DECL(tcplisten); 1500NETIEVENT_SOCKET_DECL(tcppauseread); 1501NETIEVENT_SOCKET_DECL(tcpstartread); 1502NETIEVENT_SOCKET_DECL(tcpstop); 1503NETIEVENT_SOCKET_DECL(udpclose); 1504NETIEVENT_SOCKET_DECL(udplisten); 1505NETIEVENT_SOCKET_DECL(udpread); 1506NETIEVENT_SOCKET_DECL(udpsend); 1507NETIEVENT_SOCKET_DECL(udpstop); 1508 1509NETIEVENT_SOCKET_DECL(tcpdnsclose); 1510NETIEVENT_SOCKET_DECL(tcpdnsread); 1511NETIEVENT_SOCKET_DECL(tcpdnsstop); 1512NETIEVENT_SOCKET_DECL(tcpdnslisten); 1513NETIEVENT_SOCKET_REQ_DECL(tcpdnsconnect); 1514NETIEVENT_SOCKET_REQ_DECL(tcpdnssend); 1515NETIEVENT_SOCKET_HANDLE_DECL(tcpdnscancel); 1516NETIEVENT_SOCKET_QUOTA_DECL(tcpdnsaccept); 1517 1518NETIEVENT_SOCKET_REQ_DECL(tcpconnect); 1519NETIEVENT_SOCKET_REQ_DECL(tcpsend); 1520NETIEVENT_SOCKET_REQ_DECL(udpconnect); 1521 1522NETIEVENT_SOCKET_REQ_RESULT_DECL(connectcb); 1523NETIEVENT_SOCKET_REQ_RESULT_DECL(readcb); 1524NETIEVENT_SOCKET_REQ_RESULT_DECL(sendcb); 1525 1526NETIEVENT_SOCKET_HANDLE_DECL(udpcancel); 1527NETIEVENT_SOCKET_HANDLE_DECL(tcpcancel); 1528NETIEVENT_SOCKET_DECL(detach); 1529 1530NETIEVENT_SOCKET_QUOTA_DECL(tcpaccept); 1531 1532NETIEVENT_DECL(pause); 1533NETIEVENT_DECL(resume); 1534NETIEVENT_DECL(shutdown); 1535NETIEVENT_DECL(stop); 1536 1537NETIEVENT_TASK_DECL(task); 1538NETIEVENT_TASK_DECL(privilegedtask); 1539 1540void 1541isc__nm_udp_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result); 1542void 1543isc__nm_tcp_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result); 1544void 1545isc__nm_tcpdns_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result); 1546 1547isc_result_t 1548isc__nm_tcpdns_processbuffer(isc_nmsocket_t *sock); 1549 1550isc__nm_uvreq_t * 1551isc__nm_get_read_req(isc_nmsocket_t *sock, isc_sockaddr_t *sockaddr); 1552 1553void 1554isc__nm_alloc_cb(uv_handle_t *handle, size_t size, uv_buf_t *buf); 1555 1556void 1557isc__nm_udp_read_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf, 1558 const struct sockaddr *addr, unsigned flags); 1559void 1560isc__nm_tcp_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf); 1561void 1562isc__nm_tcpdns_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf); 1563 1564isc_result_t 1565isc__nm_start_reading(isc_nmsocket_t *sock); 1566void 1567isc__nm_stop_reading(isc_nmsocket_t *sock); 1568isc_result_t 1569isc__nm_process_sock_buffer(isc_nmsocket_t *sock); 1570void 1571isc__nm_resume_processing(void *arg); 1572bool 1573isc__nmsocket_closing(isc_nmsocket_t *sock); 1574bool 1575isc__nm_closing(isc_nmsocket_t *sock); 1576 1577void 1578isc__nm_alloc_dnsbuf(isc_nmsocket_t *sock, size_t len); 1579 1580void 1581isc__nm_failed_send_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req, 1582 isc_result_t eresult); 1583void 1584isc__nm_failed_accept_cb(isc_nmsocket_t *sock, isc_result_t eresult); 1585void 1586isc__nm_failed_connect_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req, 1587 isc_result_t eresult, bool async); 1588void 1589isc__nm_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result, bool async); 1590 1591void 1592isc__nm_accept_connection_log(isc_result_t result, bool can_log_quota); 1593 1594/* 1595 * Timeout callbacks 1596 */ 1597void 1598isc__nmsocket_connecttimeout_cb(uv_timer_t *timer); 1599void 1600isc__nmsocket_readtimeout_cb(uv_timer_t *timer); 1601void 1602isc__nmsocket_writetimeout_cb(void *data, isc_result_t eresult); 1603 1604/*%< 1605 * 1606 * Maximum number of simultaneous handles in flight supported for a single 1607 * connected TCPDNS socket. This value was chosen arbitrarily, and may be 1608 * changed in the future. 1609 */ 1610#define STREAM_CLIENTS_PER_CONN 23 1611 1612#define UV_RUNTIME_CHECK(func, ret) \ 1613 if (ret != 0) { \ 1614 isc_error_fatal(__FILE__, __LINE__, "%s failed: %s\n", #func, \ 1615 uv_strerror(ret)); \ 1616 } 1617