1/* 2 * Copyright (C) 2004-2009, 2011-2014 Internet Systems Consortium, Inc. ("ISC") 3 * Copyright (C) 1999-2003 Internet Software Consortium. 4 * 5 * Permission to use, copy, modify, and/or distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH 10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, 12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 15 * PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18/* $Id: dispatch.c,v 1.175 2011/11/29 01:03:47 marka Exp $ */ 19 20/*! \file */ 21 22#include <config.h> 23 24#include <stdlib.h> 25#include <sys/types.h> 26#include <unistd.h> 27#include <stdlib.h> 28 29#include <isc/entropy.h> 30#include <isc/mem.h> 31#include <isc/mutex.h> 32#include <isc/portset.h> 33#include <isc/print.h> 34#include <isc/random.h> 35#include <isc/socket.h> 36#include <isc/stats.h> 37#include <isc/string.h> 38#include <isc/task.h> 39#include <isc/time.h> 40#include <isc/util.h> 41 42#include <dns/acl.h> 43#include <dns/dispatch.h> 44#include <dns/events.h> 45#include <dns/log.h> 46#include <dns/message.h> 47#include <dns/portlist.h> 48#include <dns/stats.h> 49#include <dns/tcpmsg.h> 50#include <dns/types.h> 51 52typedef ISC_LIST(dns_dispentry_t) dns_displist_t; 53 54typedef struct dispsocket dispsocket_t; 55typedef ISC_LIST(dispsocket_t) dispsocketlist_t; 56 57typedef struct dispportentry dispportentry_t; 58typedef ISC_LIST(dispportentry_t) dispportlist_t; 59 60/* ARC4 Random generator state */ 61typedef struct arc4ctx { 62 isc_uint8_t i; 63 isc_uint8_t j; 64 isc_uint8_t s[256]; 65 int count; 66 isc_entropy_t *entropy; /*%< entropy source for ARC4 */ 67 isc_mutex_t *lock; 68} arc4ctx_t; 69 70typedef struct dns_qid { 71 unsigned int magic; 72 unsigned int qid_nbuckets; /*%< hash table size */ 73 unsigned int qid_increment; /*%< id increment on collision */ 74 isc_mutex_t lock; 75 dns_displist_t *qid_table; /*%< the table itself */ 76 dispsocketlist_t *sock_table; /*%< socket table */ 77} dns_qid_t; 78 79struct dns_dispatchmgr { 80 /* Unlocked. */ 81 unsigned int magic; 82 isc_mem_t *mctx; 83 dns_acl_t *blackhole; 84 dns_portlist_t *portlist; 85 isc_stats_t *stats; 86 isc_entropy_t *entropy; /*%< entropy source */ 87 88 /* Locked by "lock". */ 89 isc_mutex_t lock; 90 unsigned int state; 91 ISC_LIST(dns_dispatch_t) list; 92 93 /* Locked by arc4_lock. */ 94 isc_mutex_t arc4_lock; 95 arc4ctx_t arc4ctx; /*%< ARC4 context for QID */ 96 97 /* locked by buffer lock */ 98 dns_qid_t *qid; 99 isc_mutex_t buffer_lock; 100 unsigned int buffers; /*%< allocated buffers */ 101 unsigned int buffersize; /*%< size of each buffer */ 102 unsigned int maxbuffers; /*%< max buffers */ 103 104 /* Locked internally. */ 105 isc_mutex_t depool_lock; 106 isc_mempool_t *depool; /*%< pool for dispatch events */ 107 isc_mutex_t rpool_lock; 108 isc_mempool_t *rpool; /*%< pool for replies */ 109 isc_mutex_t dpool_lock; 110 isc_mempool_t *dpool; /*%< dispatch allocations */ 111 isc_mutex_t bpool_lock; 112 isc_mempool_t *bpool; /*%< pool for buffers */ 113 isc_mutex_t spool_lock; 114 isc_mempool_t *spool; /*%< pool for dispsocks */ 115 116 /*% 117 * Locked by qid->lock if qid exists; otherwise, can be used without 118 * being locked. 119 * Memory footprint considerations: this is a simple implementation of 120 * available ports, i.e., an ordered array of the actual port numbers. 121 * This will require about 256KB of memory in the worst case (128KB for 122 * each of IPv4 and IPv6). We could reduce it by representing it as a 123 * more sophisticated way such as a list (or array) of ranges that are 124 * searched to identify a specific port. Our decision here is the saved 125 * memory isn't worth the implementation complexity, considering the 126 * fact that the whole BIND9 process (which is mainly named) already 127 * requires a pretty large memory footprint. We may, however, have to 128 * revisit the decision when we want to use it as a separate module for 129 * an environment where memory requirement is severer. 130 */ 131 in_port_t *v4ports; /*%< available ports for IPv4 */ 132 unsigned int nv4ports; /*%< # of available ports for IPv4 */ 133 in_port_t *v6ports; /*%< available ports for IPv4 */ 134 unsigned int nv6ports; /*%< # of available ports for IPv4 */ 135}; 136 137#define MGR_SHUTTINGDOWN 0x00000001U 138#define MGR_IS_SHUTTINGDOWN(l) (((l)->state & MGR_SHUTTINGDOWN) != 0) 139 140#define IS_PRIVATE(d) (((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0) 141 142struct dns_dispentry { 143 unsigned int magic; 144 dns_dispatch_t *disp; 145 dns_messageid_t id; 146 in_port_t port; 147 unsigned int bucket; 148 isc_sockaddr_t host; 149 isc_task_t *task; 150 isc_taskaction_t action; 151 void *arg; 152 isc_boolean_t item_out; 153 dispsocket_t *dispsocket; 154 ISC_LIST(dns_dispatchevent_t) items; 155 ISC_LINK(dns_dispentry_t) link; 156}; 157 158/*% 159 * Maximum number of dispatch sockets that can be pooled for reuse. The 160 * appropriate value may vary, but experiments have shown a busy caching server 161 * may need more than 1000 sockets concurrently opened. The maximum allowable 162 * number of dispatch sockets (per manager) will be set to the double of this 163 * value. 164 */ 165#ifndef DNS_DISPATCH_POOLSOCKS 166#define DNS_DISPATCH_POOLSOCKS 2048 167#endif 168 169/*% 170 * Quota to control the number of dispatch sockets. If a dispatch has more 171 * than the quota of sockets, new queries will purge oldest ones, so that 172 * a massive number of outstanding queries won't prevent subsequent queries 173 * (especially if the older ones take longer time and result in timeout). 174 */ 175#ifndef DNS_DISPATCH_SOCKSQUOTA 176#define DNS_DISPATCH_SOCKSQUOTA 3072 177#endif 178 179struct dispsocket { 180 unsigned int magic; 181 isc_socket_t *socket; 182 dns_dispatch_t *disp; 183 isc_sockaddr_t host; 184 in_port_t localport; /* XXX: should be removed later */ 185 dispportentry_t *portentry; 186 dns_dispentry_t *resp; 187 isc_task_t *task; 188 ISC_LINK(dispsocket_t) link; 189 unsigned int bucket; 190 ISC_LINK(dispsocket_t) blink; 191}; 192 193/*% 194 * A port table entry. We remember every port we first open in a table with a 195 * reference counter so that we can 'reuse' the same port (with different 196 * destination addresses) using the SO_REUSEADDR socket option. 197 */ 198struct dispportentry { 199 in_port_t port; 200 unsigned int refs; 201 ISC_LINK(struct dispportentry) link; 202}; 203 204#ifndef DNS_DISPATCH_PORTTABLESIZE 205#define DNS_DISPATCH_PORTTABLESIZE 1024 206#endif 207 208#define INVALID_BUCKET (0xffffdead) 209 210/*% 211 * Number of tasks for each dispatch that use separate sockets for different 212 * transactions. This must be a power of 2 as it will divide 32 bit numbers 213 * to get an uniformly random tasks selection. See get_dispsocket(). 214 */ 215#define MAX_INTERNAL_TASKS 64 216 217struct dns_dispatch { 218 /* Unlocked. */ 219 unsigned int magic; /*%< magic */ 220 dns_dispatchmgr_t *mgr; /*%< dispatch manager */ 221 int ntasks; 222 /*% 223 * internal task buckets. We use multiple tasks to distribute various 224 * socket events well when using separate dispatch sockets. We use the 225 * 1st task (task[0]) for internal control events. 226 */ 227 isc_task_t *task[MAX_INTERNAL_TASKS]; 228 isc_socket_t *socket; /*%< isc socket attached to */ 229 isc_sockaddr_t local; /*%< local address */ 230 in_port_t localport; /*%< local UDP port */ 231 unsigned int maxrequests; /*%< max requests */ 232 isc_event_t *ctlevent; 233 234 isc_mutex_t sepool_lock; 235 isc_mempool_t *sepool; /*%< pool for socket events */ 236 237 /*% Locked by mgr->lock. */ 238 ISC_LINK(dns_dispatch_t) link; 239 240 /* Locked by "lock". */ 241 isc_mutex_t lock; /*%< locks all below */ 242 isc_sockettype_t socktype; 243 unsigned int attributes; 244 unsigned int refcount; /*%< number of users */ 245 dns_dispatchevent_t *failsafe_ev; /*%< failsafe cancel event */ 246 unsigned int shutting_down : 1, 247 shutdown_out : 1, 248 connected : 1, 249 tcpmsg_valid : 1, 250 recv_pending : 1; /*%< is a recv() pending? */ 251 isc_result_t shutdown_why; 252 ISC_LIST(dispsocket_t) activesockets; 253 ISC_LIST(dispsocket_t) inactivesockets; 254 unsigned int nsockets; 255 unsigned int requests; /*%< how many requests we have */ 256 unsigned int tcpbuffers; /*%< allocated buffers */ 257 dns_tcpmsg_t tcpmsg; /*%< for tcp streams */ 258 dns_qid_t *qid; 259 arc4ctx_t arc4ctx; /*%< for QID/UDP port num */ 260 dispportlist_t *port_table; /*%< hold ports 'owned' by us */ 261 isc_mempool_t *portpool; /*%< port table entries */ 262}; 263 264#define QID_MAGIC ISC_MAGIC('Q', 'i', 'd', ' ') 265#define VALID_QID(e) ISC_MAGIC_VALID((e), QID_MAGIC) 266 267#define RESPONSE_MAGIC ISC_MAGIC('D', 'r', 's', 'p') 268#define VALID_RESPONSE(e) ISC_MAGIC_VALID((e), RESPONSE_MAGIC) 269 270#define DISPSOCK_MAGIC ISC_MAGIC('D', 's', 'o', 'c') 271#define VALID_DISPSOCK(e) ISC_MAGIC_VALID((e), DISPSOCK_MAGIC) 272 273#define DISPATCH_MAGIC ISC_MAGIC('D', 'i', 's', 'p') 274#define VALID_DISPATCH(e) ISC_MAGIC_VALID((e), DISPATCH_MAGIC) 275 276#define DNS_DISPATCHMGR_MAGIC ISC_MAGIC('D', 'M', 'g', 'r') 277#define VALID_DISPATCHMGR(e) ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC) 278 279#define DNS_QID(disp) ((disp)->socktype == isc_sockettype_tcp) ? \ 280 (disp)->qid : (disp)->mgr->qid 281#define DISP_ARC4CTX(disp) ((disp)->socktype == isc_sockettype_udp) ? \ 282 (&(disp)->arc4ctx) : (&(disp)->mgr->arc4ctx) 283 284/*% 285 * Locking a query port buffer is a bit tricky. We access the buffer without 286 * locking until qid is created. Technically, there is a possibility of race 287 * between the creation of qid and access to the port buffer; in practice, 288 * however, this should be safe because qid isn't created until the first 289 * dispatch is created and there should be no contending situation until then. 290 */ 291#define PORTBUFLOCK(mgr) if ((mgr)->qid != NULL) LOCK(&((mgr)->qid->lock)) 292#define PORTBUFUNLOCK(mgr) if ((mgr)->qid != NULL) UNLOCK((&(mgr)->qid->lock)) 293 294/* 295 * Statics. 296 */ 297static dns_dispentry_t *entry_search(dns_qid_t *, isc_sockaddr_t *, 298 dns_messageid_t, in_port_t, unsigned int); 299static isc_boolean_t destroy_disp_ok(dns_dispatch_t *); 300static void destroy_disp(isc_task_t *task, isc_event_t *event); 301static void destroy_dispsocket(dns_dispatch_t *, dispsocket_t **); 302static void deactivate_dispsocket(dns_dispatch_t *, dispsocket_t *); 303static void udp_exrecv(isc_task_t *, isc_event_t *); 304static void udp_shrecv(isc_task_t *, isc_event_t *); 305static void udp_recv(isc_event_t *, dns_dispatch_t *, dispsocket_t *); 306static void tcp_recv(isc_task_t *, isc_event_t *); 307static isc_result_t startrecv(dns_dispatch_t *, dispsocket_t *); 308static isc_uint32_t dns_hash(dns_qid_t *, isc_sockaddr_t *, dns_messageid_t, 309 in_port_t); 310static void free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len); 311static void *allocate_udp_buffer(dns_dispatch_t *disp); 312static inline void free_devent(dns_dispatch_t *disp, dns_dispatchevent_t *ev); 313static inline dns_dispatchevent_t *allocate_devent(dns_dispatch_t *disp); 314static void do_cancel(dns_dispatch_t *disp); 315static dns_dispentry_t *linear_first(dns_qid_t *disp); 316static dns_dispentry_t *linear_next(dns_qid_t *disp, 317 dns_dispentry_t *resp); 318static void dispatch_free(dns_dispatch_t **dispp); 319static isc_result_t get_udpsocket(dns_dispatchmgr_t *mgr, 320 dns_dispatch_t *disp, 321 isc_socketmgr_t *sockmgr, 322 isc_sockaddr_t *localaddr, 323 isc_socket_t **sockp, 324 isc_socket_t *dup_socket); 325static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr, 326 isc_socketmgr_t *sockmgr, 327 isc_taskmgr_t *taskmgr, 328 isc_sockaddr_t *localaddr, 329 unsigned int maxrequests, 330 unsigned int attributes, 331 dns_dispatch_t **dispp, 332 isc_socket_t *dup_socket); 333static isc_boolean_t destroy_mgr_ok(dns_dispatchmgr_t *mgr); 334static void destroy_mgr(dns_dispatchmgr_t **mgrp); 335static isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets, 336 unsigned int increment, dns_qid_t **qidp, 337 isc_boolean_t needaddrtable); 338static void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp); 339static isc_result_t open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local, 340 unsigned int options, isc_socket_t **sockp, 341 isc_socket_t *dup_socket); 342static isc_boolean_t portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock, 343 isc_sockaddr_t *sockaddrp); 344 345#define LVL(x) ISC_LOG_DEBUG(x) 346 347static void 348mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) 349 ISC_FORMAT_PRINTF(3, 4); 350 351static void 352mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) { 353 char msgbuf[2048]; 354 va_list ap; 355 356 if (! isc_log_wouldlog(dns_lctx, level)) 357 return; 358 359 va_start(ap, fmt); 360 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); 361 va_end(ap); 362 363 isc_log_write(dns_lctx, 364 DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH, 365 level, "dispatchmgr %p: %s", mgr, msgbuf); 366} 367 368static inline void 369inc_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) { 370 if (mgr->stats != NULL) 371 isc_stats_increment(mgr->stats, counter); 372} 373 374static void 375dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) 376 ISC_FORMAT_PRINTF(3, 4); 377 378static void 379dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) { 380 char msgbuf[2048]; 381 va_list ap; 382 383 if (! isc_log_wouldlog(dns_lctx, level)) 384 return; 385 386 va_start(ap, fmt); 387 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); 388 va_end(ap); 389 390 isc_log_write(dns_lctx, 391 DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH, 392 level, "dispatch %p: %s", disp, msgbuf); 393} 394 395static void 396request_log(dns_dispatch_t *disp, dns_dispentry_t *resp, 397 int level, const char *fmt, ...) 398 ISC_FORMAT_PRINTF(4, 5); 399 400static void 401request_log(dns_dispatch_t *disp, dns_dispentry_t *resp, 402 int level, const char *fmt, ...) 403{ 404 char msgbuf[2048]; 405 char peerbuf[256]; 406 va_list ap; 407 408 if (! isc_log_wouldlog(dns_lctx, level)) 409 return; 410 411 va_start(ap, fmt); 412 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); 413 va_end(ap); 414 415 if (VALID_RESPONSE(resp)) { 416 isc_sockaddr_format(&resp->host, peerbuf, sizeof(peerbuf)); 417 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH, 418 DNS_LOGMODULE_DISPATCH, level, 419 "dispatch %p response %p %s: %s", disp, resp, 420 peerbuf, msgbuf); 421 } else { 422 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH, 423 DNS_LOGMODULE_DISPATCH, level, 424 "dispatch %p req/resp %p: %s", disp, resp, 425 msgbuf); 426 } 427} 428 429/*% 430 * ARC4 random number generator derived from OpenBSD. 431 * Only dispatch_random() and dispatch_uniformrandom() are expected 432 * to be called from general dispatch routines; the rest of them are subroutines 433 * for these two. 434 * 435 * The original copyright follows: 436 * Copyright (c) 1996, David Mazieres <dm@uun.org> 437 * Copyright (c) 2008, Damien Miller <djm@openbsd.org> 438 * 439 * Permission to use, copy, modify, and distribute this software for any 440 * purpose with or without fee is hereby granted, provided that the above 441 * copyright notice and this permission notice appear in all copies. 442 * 443 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 444 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 445 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 446 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 447 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 448 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 449 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 450 */ 451#ifdef BIND9 452static void 453dispatch_initrandom(arc4ctx_t *actx, isc_entropy_t *entropy, 454 isc_mutex_t *lock) 455{ 456 int n; 457 for (n = 0; n < 256; n++) 458 actx->s[n] = n; 459 actx->i = 0; 460 actx->j = 0; 461 actx->count = 0; 462 actx->entropy = entropy; /* don't have to attach */ 463 actx->lock = lock; 464} 465 466static void 467dispatch_arc4addrandom(arc4ctx_t *actx, unsigned char *dat, int datlen) { 468 int n; 469 isc_uint8_t si; 470 471 actx->i--; 472 for (n = 0; n < 256; n++) { 473 actx->i = (actx->i + 1); 474 si = actx->s[actx->i]; 475 actx->j = (actx->j + si + dat[n % datlen]); 476 actx->s[actx->i] = actx->s[actx->j]; 477 actx->s[actx->j] = si; 478 } 479 actx->j = actx->i; 480} 481 482static inline isc_uint8_t 483dispatch_arc4get8(arc4ctx_t *actx) { 484 isc_uint8_t si, sj; 485 486 actx->i = (actx->i + 1); 487 si = actx->s[actx->i]; 488 actx->j = (actx->j + si); 489 sj = actx->s[actx->j]; 490 actx->s[actx->i] = sj; 491 actx->s[actx->j] = si; 492 493 return (actx->s[(si + sj) & 0xff]); 494} 495 496static inline isc_uint16_t 497dispatch_arc4get16(arc4ctx_t *actx) { 498 isc_uint16_t val; 499 500 val = dispatch_arc4get8(actx) << 8; 501 val |= dispatch_arc4get8(actx); 502 503 return (val); 504} 505 506static void 507dispatch_arc4stir(arc4ctx_t *actx) { 508 int i; 509 union { 510 unsigned char rnd[128]; 511 isc_uint32_t rnd32[32]; 512 } rnd; 513 isc_result_t result; 514 515 if (actx->entropy != NULL) { 516 /* 517 * We accept any quality of random data to avoid blocking. 518 */ 519 result = isc_entropy_getdata(actx->entropy, rnd.rnd, 520 sizeof(rnd), NULL, 0); 521 RUNTIME_CHECK(result == ISC_R_SUCCESS); 522 } else { 523 for (i = 0; i < 32; i++) 524 isc_random_get(&rnd.rnd32[i]); 525 } 526 dispatch_arc4addrandom(actx, rnd.rnd, sizeof(rnd.rnd)); 527 528 /* 529 * Discard early keystream, as per recommendations in: 530 * http://www.wisdom.weizmann.ac.il/~itsik/RC4/Papers/Rc4_ksa.ps 531 */ 532 for (i = 0; i < 256; i++) 533 (void)dispatch_arc4get8(actx); 534 535 /* 536 * Derived from OpenBSD's implementation. The rationale is not clear, 537 * but should be conservative enough in safety, and reasonably large 538 * for efficiency. 539 */ 540 actx->count = 1600000; 541} 542 543static isc_uint16_t 544dispatch_random(arc4ctx_t *actx) { 545 isc_uint16_t result; 546 547 if (actx->lock != NULL) 548 LOCK(actx->lock); 549 550 actx->count -= sizeof(isc_uint16_t); 551 if (actx->count <= 0) 552 dispatch_arc4stir(actx); 553 result = dispatch_arc4get16(actx); 554 555 if (actx->lock != NULL) 556 UNLOCK(actx->lock); 557 558 return (result); 559} 560#else 561/* 562 * For general purpose library, we don't have to be too strict about the 563 * quality of random values. Performance doesn't matter much, either. 564 * So we simply use the isc_random module to keep the library as small as 565 * possible. 566 */ 567 568static void 569dispatch_initrandom(arc4ctx_t *actx, isc_entropy_t *entropy, 570 isc_mutex_t *lock) 571{ 572 UNUSED(actx); 573 UNUSED(entropy); 574 UNUSED(lock); 575 576 return; 577} 578 579static isc_uint16_t 580dispatch_random(arc4ctx_t *actx) { 581 isc_uint32_t r; 582 583 UNUSED(actx); 584 585 isc_random_get(&r); 586 return (r & 0xffff); 587} 588#endif /* BIND9 */ 589 590static isc_uint16_t 591dispatch_uniformrandom(arc4ctx_t *actx, isc_uint16_t upper_bound) { 592 isc_uint16_t min, r; 593 594 if (upper_bound < 2) 595 return (0); 596 597 /* 598 * Ensure the range of random numbers [min, 0xffff] be a multiple of 599 * upper_bound and contain at least a half of the 16 bit range. 600 */ 601 602 if (upper_bound > 0x8000) 603 min = 1 + ~upper_bound; /* 0x8000 - upper_bound */ 604 else 605 min = (isc_uint16_t)(0x10000 % (isc_uint32_t)upper_bound); 606 607 /* 608 * This could theoretically loop forever but each retry has 609 * p > 0.5 (worst case, usually far better) of selecting a 610 * number inside the range we need, so it should rarely need 611 * to re-roll. 612 */ 613 for (;;) { 614 r = dispatch_random(actx); 615 if (r >= min) 616 break; 617 } 618 619 return (r % upper_bound); 620} 621 622/* 623 * Return a hash of the destination and message id. 624 */ 625static isc_uint32_t 626dns_hash(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id, 627 in_port_t port) 628{ 629 unsigned int ret; 630 631 ret = isc_sockaddr_hash(dest, ISC_TRUE); 632 ret ^= (id << 16) | port; 633 ret %= qid->qid_nbuckets; 634 635 INSIST(ret < qid->qid_nbuckets); 636 637 return (ret); 638} 639 640/* 641 * Find the first entry in 'qid'. Returns NULL if there are no entries. 642 */ 643static dns_dispentry_t * 644linear_first(dns_qid_t *qid) { 645 dns_dispentry_t *ret; 646 unsigned int bucket; 647 648 bucket = 0; 649 650 while (bucket < qid->qid_nbuckets) { 651 ret = ISC_LIST_HEAD(qid->qid_table[bucket]); 652 if (ret != NULL) 653 return (ret); 654 bucket++; 655 } 656 657 return (NULL); 658} 659 660/* 661 * Find the next entry after 'resp' in 'qid'. Return NULL if there are 662 * no more entries. 663 */ 664static dns_dispentry_t * 665linear_next(dns_qid_t *qid, dns_dispentry_t *resp) { 666 dns_dispentry_t *ret; 667 unsigned int bucket; 668 669 ret = ISC_LIST_NEXT(resp, link); 670 if (ret != NULL) 671 return (ret); 672 673 bucket = resp->bucket; 674 bucket++; 675 while (bucket < qid->qid_nbuckets) { 676 ret = ISC_LIST_HEAD(qid->qid_table[bucket]); 677 if (ret != NULL) 678 return (ret); 679 bucket++; 680 } 681 682 return (NULL); 683} 684 685/* 686 * The dispatch must be locked. 687 */ 688static isc_boolean_t 689destroy_disp_ok(dns_dispatch_t *disp) 690{ 691 if (disp->refcount != 0) 692 return (ISC_FALSE); 693 694 if (disp->recv_pending != 0) 695 return (ISC_FALSE); 696 697 if (!ISC_LIST_EMPTY(disp->activesockets)) 698 return (ISC_FALSE); 699 700 if (disp->shutting_down == 0) 701 return (ISC_FALSE); 702 703 return (ISC_TRUE); 704} 705 706/* 707 * Called when refcount reaches 0 (and safe to destroy). 708 * 709 * The dispatcher must be locked. 710 * The manager must not be locked. 711 */ 712static void 713destroy_disp(isc_task_t *task, isc_event_t *event) { 714 dns_dispatch_t *disp; 715 dns_dispatchmgr_t *mgr; 716 isc_boolean_t killmgr; 717 dispsocket_t *dispsocket; 718 int i; 719 720 INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL); 721 722 UNUSED(task); 723 724 disp = event->ev_arg; 725 mgr = disp->mgr; 726 727 LOCK(&mgr->lock); 728 ISC_LIST_UNLINK(mgr->list, disp, link); 729 730 dispatch_log(disp, LVL(90), 731 "shutting down; detaching from sock %p, task %p", 732 disp->socket, disp->task[0]); /* XXXX */ 733 734 if (disp->sepool != NULL) { 735 isc_mempool_destroy(&disp->sepool); 736 (void)isc_mutex_destroy(&disp->sepool_lock); 737 } 738 739 if (disp->socket != NULL) 740 isc_socket_detach(&disp->socket); 741 while ((dispsocket = ISC_LIST_HEAD(disp->inactivesockets)) != NULL) { 742 ISC_LIST_UNLINK(disp->inactivesockets, dispsocket, link); 743 destroy_dispsocket(disp, &dispsocket); 744 } 745 for (i = 0; i < disp->ntasks; i++) 746 isc_task_detach(&disp->task[i]); 747 isc_event_free(&event); 748 749 dispatch_free(&disp); 750 751 killmgr = destroy_mgr_ok(mgr); 752 UNLOCK(&mgr->lock); 753 if (killmgr) 754 destroy_mgr(&mgr); 755} 756 757/*% 758 * Manipulate port table per dispatch: find an entry for a given port number, 759 * create a new entry, and decrement a given entry with possible clean-up. 760 */ 761static dispportentry_t * 762port_search(dns_dispatch_t *disp, in_port_t port) { 763 dispportentry_t *portentry; 764 765 REQUIRE(disp->port_table != NULL); 766 767 portentry = ISC_LIST_HEAD(disp->port_table[port % 768 DNS_DISPATCH_PORTTABLESIZE]); 769 while (portentry != NULL) { 770 if (portentry->port == port) 771 return (portentry); 772 portentry = ISC_LIST_NEXT(portentry, link); 773 } 774 775 return (NULL); 776} 777 778static dispportentry_t * 779new_portentry(dns_dispatch_t *disp, in_port_t port) { 780 dispportentry_t *portentry; 781 dns_qid_t *qid; 782 783 REQUIRE(disp->port_table != NULL); 784 785 portentry = isc_mempool_get(disp->portpool); 786 if (portentry == NULL) 787 return (portentry); 788 789 portentry->port = port; 790 portentry->refs = 1; 791 ISC_LINK_INIT(portentry, link); 792 qid = DNS_QID(disp); 793 LOCK(&qid->lock); 794 ISC_LIST_APPEND(disp->port_table[port % DNS_DISPATCH_PORTTABLESIZE], 795 portentry, link); 796 UNLOCK(&qid->lock); 797 798 return (portentry); 799} 800 801/*% 802 * The caller must not hold the qid->lock. 803 */ 804static void 805deref_portentry(dns_dispatch_t *disp, dispportentry_t **portentryp) { 806 dispportentry_t *portentry = *portentryp; 807 dns_qid_t *qid; 808 809 REQUIRE(disp->port_table != NULL); 810 REQUIRE(portentry != NULL && portentry->refs > 0); 811 812 qid = DNS_QID(disp); 813 LOCK(&qid->lock); 814 portentry->refs--; 815 816 if (portentry->refs == 0) { 817 ISC_LIST_UNLINK(disp->port_table[portentry->port % 818 DNS_DISPATCH_PORTTABLESIZE], 819 portentry, link); 820 isc_mempool_put(disp->portpool, portentry); 821 } 822 UNLOCK(&qid->lock); 823 824 *portentryp = NULL; 825} 826 827/*% 828 * Find a dispsocket for socket address 'dest', and port number 'port'. 829 * Return NULL if no such entry exists. 830 */ 831static dispsocket_t * 832socket_search(dns_qid_t *qid, isc_sockaddr_t *dest, in_port_t port, 833 unsigned int bucket) 834{ 835 dispsocket_t *dispsock; 836 837 REQUIRE(VALID_QID(qid)); 838 REQUIRE(bucket < qid->qid_nbuckets); 839 840 dispsock = ISC_LIST_HEAD(qid->sock_table[bucket]); 841 842 while (dispsock != NULL) { 843 if (dispsock->portentry != NULL && 844 dispsock->portentry->port == port && 845 isc_sockaddr_equal(dest, &dispsock->host)) 846 return (dispsock); 847 dispsock = ISC_LIST_NEXT(dispsock, blink); 848 } 849 850 return (NULL); 851} 852 853/*% 854 * Make a new socket for a single dispatch with a random port number. 855 * The caller must hold the disp->lock 856 */ 857static isc_result_t 858get_dispsocket(dns_dispatch_t *disp, isc_sockaddr_t *dest, 859 isc_socketmgr_t *sockmgr, dispsocket_t **dispsockp, 860 in_port_t *portp) 861{ 862 int i; 863 isc_uint32_t r; 864 dns_dispatchmgr_t *mgr = disp->mgr; 865 isc_socket_t *sock = NULL; 866 isc_result_t result = ISC_R_FAILURE; 867 in_port_t port; 868 isc_sockaddr_t localaddr; 869 unsigned int bucket = 0; 870 dispsocket_t *dispsock; 871 unsigned int nports; 872 in_port_t *ports; 873 unsigned int bindoptions; 874 dispportentry_t *portentry = NULL; 875 dns_qid_t *qid; 876 877 if (isc_sockaddr_pf(&disp->local) == AF_INET) { 878 nports = disp->mgr->nv4ports; 879 ports = disp->mgr->v4ports; 880 } else { 881 nports = disp->mgr->nv6ports; 882 ports = disp->mgr->v6ports; 883 } 884 if (nports == 0) 885 return (ISC_R_ADDRNOTAVAIL); 886 887 dispsock = ISC_LIST_HEAD(disp->inactivesockets); 888 if (dispsock != NULL) { 889 ISC_LIST_UNLINK(disp->inactivesockets, dispsock, link); 890 sock = dispsock->socket; 891 dispsock->socket = NULL; 892 } else { 893 dispsock = isc_mempool_get(mgr->spool); 894 if (dispsock == NULL) 895 return (ISC_R_NOMEMORY); 896 897 disp->nsockets++; 898 dispsock->socket = NULL; 899 dispsock->disp = disp; 900 dispsock->resp = NULL; 901 dispsock->portentry = NULL; 902 isc_random_get(&r); 903 dispsock->task = NULL; 904 isc_task_attach(disp->task[r % disp->ntasks], &dispsock->task); 905 ISC_LINK_INIT(dispsock, link); 906 ISC_LINK_INIT(dispsock, blink); 907 dispsock->magic = DISPSOCK_MAGIC; 908 } 909 910 /* 911 * Pick up a random UDP port and open a new socket with it. Avoid 912 * choosing ports that share the same destination because it will be 913 * very likely to fail in bind(2) or connect(2). 914 */ 915 localaddr = disp->local; 916 qid = DNS_QID(disp); 917 918 for (i = 0; i < 64; i++) { 919 port = ports[dispatch_uniformrandom(DISP_ARC4CTX(disp), 920 nports)]; 921 isc_sockaddr_setport(&localaddr, port); 922 923 LOCK(&qid->lock); 924 bucket = dns_hash(qid, dest, 0, port); 925 if (socket_search(qid, dest, port, bucket) != NULL) { 926 UNLOCK(&qid->lock); 927 continue; 928 } 929 UNLOCK(&qid->lock); 930 bindoptions = 0; 931 portentry = port_search(disp, port); 932 933 if (portentry != NULL) 934 bindoptions |= ISC_SOCKET_REUSEADDRESS; 935 result = open_socket(sockmgr, &localaddr, bindoptions, &sock, 936 NULL); 937 if (result == ISC_R_SUCCESS) { 938 if (portentry == NULL) { 939 portentry = new_portentry(disp, port); 940 if (portentry == NULL) { 941 result = ISC_R_NOMEMORY; 942 break; 943 } 944 } else { 945 LOCK(&qid->lock); 946 portentry->refs++; 947 UNLOCK(&qid->lock); 948 } 949 break; 950 } else if (result == ISC_R_NOPERM) { 951 char buf[ISC_SOCKADDR_FORMATSIZE]; 952 isc_sockaddr_format(&localaddr, buf, sizeof(buf)); 953 dispatch_log(disp, ISC_LOG_WARNING, 954 "open_socket(%s) -> %s: continuing", 955 buf, isc_result_totext(result)); 956 } else if (result != ISC_R_ADDRINUSE) 957 break; 958 } 959 960 if (result == ISC_R_SUCCESS) { 961 dispsock->socket = sock; 962 dispsock->host = *dest; 963 dispsock->portentry = portentry; 964 dispsock->bucket = bucket; 965 LOCK(&qid->lock); 966 ISC_LIST_APPEND(qid->sock_table[bucket], dispsock, blink); 967 UNLOCK(&qid->lock); 968 *dispsockp = dispsock; 969 *portp = port; 970 } else { 971 /* 972 * We could keep it in the inactive list, but since this should 973 * be an exceptional case and might be resource shortage, we'd 974 * rather destroy it. 975 */ 976 if (sock != NULL) 977 isc_socket_detach(&sock); 978 destroy_dispsocket(disp, &dispsock); 979 } 980 981 return (result); 982} 983 984/*% 985 * Destroy a dedicated dispatch socket. 986 */ 987static void 988destroy_dispsocket(dns_dispatch_t *disp, dispsocket_t **dispsockp) { 989 dispsocket_t *dispsock; 990 dns_qid_t *qid; 991 992 /* 993 * The dispatch must be locked. 994 */ 995 996 REQUIRE(dispsockp != NULL && *dispsockp != NULL); 997 dispsock = *dispsockp; 998 REQUIRE(!ISC_LINK_LINKED(dispsock, link)); 999 1000 disp->nsockets--; 1001 dispsock->magic = 0; 1002 if (dispsock->portentry != NULL) 1003 deref_portentry(disp, &dispsock->portentry); 1004 if (dispsock->socket != NULL) 1005 isc_socket_detach(&dispsock->socket); 1006 if (ISC_LINK_LINKED(dispsock, blink)) { 1007 qid = DNS_QID(disp); 1008 LOCK(&qid->lock); 1009 ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock, 1010 blink); 1011 UNLOCK(&qid->lock); 1012 } 1013 if (dispsock->task != NULL) 1014 isc_task_detach(&dispsock->task); 1015 isc_mempool_put(disp->mgr->spool, dispsock); 1016 1017 *dispsockp = NULL; 1018} 1019 1020/*% 1021 * Deactivate a dedicated dispatch socket. Move it to the inactive list for 1022 * future reuse unless the total number of sockets are exceeding the maximum. 1023 */ 1024static void 1025deactivate_dispsocket(dns_dispatch_t *disp, dispsocket_t *dispsock) { 1026 isc_result_t result; 1027 dns_qid_t *qid; 1028 1029 /* 1030 * The dispatch must be locked. 1031 */ 1032 ISC_LIST_UNLINK(disp->activesockets, dispsock, link); 1033 if (dispsock->resp != NULL) { 1034 INSIST(dispsock->resp->dispsocket == dispsock); 1035 dispsock->resp->dispsocket = NULL; 1036 } 1037 1038 INSIST(dispsock->portentry != NULL); 1039 deref_portentry(disp, &dispsock->portentry); 1040 1041#ifdef BIND9 1042 if (disp->nsockets > DNS_DISPATCH_POOLSOCKS) 1043 destroy_dispsocket(disp, &dispsock); 1044 else { 1045 result = isc_socket_close(dispsock->socket); 1046 1047 qid = DNS_QID(disp); 1048 LOCK(&qid->lock); 1049 ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock, 1050 blink); 1051 UNLOCK(&qid->lock); 1052 1053 if (result == ISC_R_SUCCESS) 1054 ISC_LIST_APPEND(disp->inactivesockets, dispsock, link); 1055 else { 1056 /* 1057 * If the underlying system does not allow this 1058 * optimization, destroy this temporary structure (and 1059 * create a new one for a new transaction). 1060 */ 1061 INSIST(result == ISC_R_NOTIMPLEMENTED); 1062 destroy_dispsocket(disp, &dispsock); 1063 } 1064 } 1065#else 1066 /* This kind of optimization isn't necessary for normal use */ 1067 UNUSED(qid); 1068 UNUSED(result); 1069 1070 destroy_dispsocket(disp, &dispsock); 1071#endif 1072} 1073 1074/* 1075 * Find an entry for query ID 'id', socket address 'dest', and port number 1076 * 'port'. 1077 * Return NULL if no such entry exists. 1078 */ 1079static dns_dispentry_t * 1080entry_search(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id, 1081 in_port_t port, unsigned int bucket) 1082{ 1083 dns_dispentry_t *res; 1084 1085 REQUIRE(VALID_QID(qid)); 1086 REQUIRE(bucket < qid->qid_nbuckets); 1087 1088 res = ISC_LIST_HEAD(qid->qid_table[bucket]); 1089 1090 while (res != NULL) { 1091 if (res->id == id && isc_sockaddr_equal(dest, &res->host) && 1092 res->port == port) { 1093 return (res); 1094 } 1095 res = ISC_LIST_NEXT(res, link); 1096 } 1097 1098 return (NULL); 1099} 1100 1101static void 1102free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len) { 1103 isc_mempool_t *bpool; 1104 INSIST(buf != NULL && len != 0); 1105 1106 1107 switch (disp->socktype) { 1108 case isc_sockettype_tcp: 1109 INSIST(disp->tcpbuffers > 0); 1110 disp->tcpbuffers--; 1111 isc_mem_put(disp->mgr->mctx, buf, len); 1112 break; 1113 case isc_sockettype_udp: 1114 LOCK(&disp->mgr->buffer_lock); 1115 INSIST(disp->mgr->buffers > 0); 1116 INSIST(len == disp->mgr->buffersize); 1117 disp->mgr->buffers--; 1118 bpool = disp->mgr->bpool; 1119 UNLOCK(&disp->mgr->buffer_lock); 1120 isc_mempool_put(bpool, buf); 1121 break; 1122 default: 1123 INSIST(0); 1124 break; 1125 } 1126} 1127 1128static void * 1129allocate_udp_buffer(dns_dispatch_t *disp) { 1130 isc_mempool_t *bpool; 1131 void *temp; 1132 1133 LOCK(&disp->mgr->buffer_lock); 1134 bpool = disp->mgr->bpool; 1135 disp->mgr->buffers++; 1136 UNLOCK(&disp->mgr->buffer_lock); 1137 1138 temp = isc_mempool_get(bpool); 1139 1140 if (temp == NULL) { 1141 LOCK(&disp->mgr->buffer_lock); 1142 disp->mgr->buffers--; 1143 UNLOCK(&disp->mgr->buffer_lock); 1144 } 1145 1146 return (temp); 1147} 1148 1149static inline void 1150free_sevent(isc_event_t *ev) { 1151 isc_mempool_t *pool = ev->ev_destroy_arg; 1152 isc_socketevent_t *sev = (isc_socketevent_t *) ev; 1153 isc_mempool_put(pool, sev); 1154} 1155 1156static inline isc_socketevent_t * 1157allocate_sevent(dns_dispatch_t *disp, isc_socket_t *socket, 1158 isc_eventtype_t type, isc_taskaction_t action, const void *arg) 1159{ 1160 isc_socketevent_t *ev; 1161 void *deconst_arg; 1162 1163 ev = isc_mempool_get(disp->sepool); 1164 if (ev == NULL) 1165 return (NULL); 1166 DE_CONST(arg, deconst_arg); 1167 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, type, 1168 action, deconst_arg, socket, 1169 free_sevent, disp->sepool); 1170 ev->result = ISC_R_UNSET; 1171 ISC_LINK_INIT(ev, ev_link); 1172 ISC_LIST_INIT(ev->bufferlist); 1173 ev->region.base = NULL; 1174 ev->n = 0; 1175 ev->offset = 0; 1176 ev->attributes = 0; 1177 1178 return (ev); 1179} 1180 1181 1182static inline void 1183free_devent(dns_dispatch_t *disp, dns_dispatchevent_t *ev) { 1184 if (disp->failsafe_ev == ev) { 1185 INSIST(disp->shutdown_out == 1); 1186 disp->shutdown_out = 0; 1187 1188 return; 1189 } 1190 1191 isc_mempool_put(disp->mgr->depool, ev); 1192} 1193 1194static inline dns_dispatchevent_t * 1195allocate_devent(dns_dispatch_t *disp) { 1196 dns_dispatchevent_t *ev; 1197 1198 ev = isc_mempool_get(disp->mgr->depool); 1199 if (ev == NULL) 1200 return (NULL); 1201 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, 0, 1202 NULL, NULL, NULL, NULL, NULL); 1203 1204 return (ev); 1205} 1206 1207static void 1208udp_exrecv(isc_task_t *task, isc_event_t *ev) { 1209 dispsocket_t *dispsock = ev->ev_arg; 1210 1211 UNUSED(task); 1212 1213 REQUIRE(VALID_DISPSOCK(dispsock)); 1214 udp_recv(ev, dispsock->disp, dispsock); 1215} 1216 1217static void 1218udp_shrecv(isc_task_t *task, isc_event_t *ev) { 1219 dns_dispatch_t *disp = ev->ev_arg; 1220 1221 UNUSED(task); 1222 1223 REQUIRE(VALID_DISPATCH(disp)); 1224 udp_recv(ev, disp, NULL); 1225} 1226 1227/* 1228 * General flow: 1229 * 1230 * If I/O result == CANCELED or error, free the buffer. 1231 * 1232 * If query, free the buffer, restart. 1233 * 1234 * If response: 1235 * Allocate event, fill in details. 1236 * If cannot allocate, free buffer, restart. 1237 * find target. If not found, free buffer, restart. 1238 * if event queue is not empty, queue. else, send. 1239 * restart. 1240 */ 1241static void 1242udp_recv(isc_event_t *ev_in, dns_dispatch_t *disp, dispsocket_t *dispsock) { 1243 isc_socketevent_t *ev = (isc_socketevent_t *)ev_in; 1244 dns_messageid_t id; 1245 isc_result_t dres; 1246 isc_buffer_t source; 1247 unsigned int flags; 1248 dns_dispentry_t *resp = NULL; 1249 dns_dispatchevent_t *rev; 1250 unsigned int bucket; 1251 isc_boolean_t killit; 1252 isc_boolean_t queue_response; 1253 dns_dispatchmgr_t *mgr; 1254 dns_qid_t *qid; 1255 isc_netaddr_t netaddr; 1256 int match; 1257 int result; 1258 isc_boolean_t qidlocked = ISC_FALSE; 1259 1260 LOCK(&disp->lock); 1261 1262 mgr = disp->mgr; 1263 qid = mgr->qid; 1264 1265 dispatch_log(disp, LVL(90), 1266 "got packet: requests %d, buffers %d, recvs %d", 1267 disp->requests, disp->mgr->buffers, disp->recv_pending); 1268 1269 if (dispsock == NULL && ev->ev_type == ISC_SOCKEVENT_RECVDONE) { 1270 /* 1271 * Unless the receive event was imported from a listening 1272 * interface, in which case the event type is 1273 * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending. 1274 */ 1275 INSIST(disp->recv_pending != 0); 1276 disp->recv_pending = 0; 1277 } 1278 1279 if (dispsock != NULL && 1280 (ev->result == ISC_R_CANCELED || dispsock->resp == NULL)) { 1281 /* 1282 * dispsock->resp can be NULL if this transaction was canceled 1283 * just after receiving a response. Since this socket is 1284 * exclusively used and there should be at most one receive 1285 * event the canceled event should have been no effect. So 1286 * we can (and should) deactivate the socket right now. 1287 */ 1288 deactivate_dispsocket(disp, dispsock); 1289 dispsock = NULL; 1290 } 1291 1292 if (disp->shutting_down) { 1293 /* 1294 * This dispatcher is shutting down. 1295 */ 1296 free_buffer(disp, ev->region.base, ev->region.length); 1297 1298 isc_event_free(&ev_in); 1299 ev = NULL; 1300 1301 killit = destroy_disp_ok(disp); 1302 UNLOCK(&disp->lock); 1303 if (killit) 1304 isc_task_send(disp->task[0], &disp->ctlevent); 1305 1306 return; 1307 } 1308 1309 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) { 1310 if (dispsock != NULL) { 1311 resp = dispsock->resp; 1312 id = resp->id; 1313 if (ev->result != ISC_R_SUCCESS) { 1314 /* 1315 * This is most likely a network error on a 1316 * connected socket. It makes no sense to 1317 * check the address or parse the packet, but it 1318 * will help to return the error to the caller. 1319 */ 1320 goto sendresponse; 1321 } 1322 } else { 1323 free_buffer(disp, ev->region.base, ev->region.length); 1324 1325 UNLOCK(&disp->lock); 1326 isc_event_free(&ev_in); 1327 return; 1328 } 1329 } else if (ev->result != ISC_R_SUCCESS) { 1330 free_buffer(disp, ev->region.base, ev->region.length); 1331 1332 if (ev->result != ISC_R_CANCELED) 1333 dispatch_log(disp, ISC_LOG_ERROR, 1334 "odd socket result in udp_recv(): %s", 1335 isc_result_totext(ev->result)); 1336 1337 UNLOCK(&disp->lock); 1338 isc_event_free(&ev_in); 1339 return; 1340 } 1341 1342 /* 1343 * If this is from a blackholed address, drop it. 1344 */ 1345 isc_netaddr_fromsockaddr(&netaddr, &ev->address); 1346 if (disp->mgr->blackhole != NULL && 1347 dns_acl_match(&netaddr, NULL, disp->mgr->blackhole, 1348 NULL, &match, NULL) == ISC_R_SUCCESS && 1349 match > 0) 1350 { 1351 if (isc_log_wouldlog(dns_lctx, LVL(10))) { 1352 char netaddrstr[ISC_NETADDR_FORMATSIZE]; 1353 isc_netaddr_format(&netaddr, netaddrstr, 1354 sizeof(netaddrstr)); 1355 dispatch_log(disp, LVL(10), 1356 "blackholed packet from %s", 1357 netaddrstr); 1358 } 1359 free_buffer(disp, ev->region.base, ev->region.length); 1360 goto restart; 1361 } 1362 1363 /* 1364 * Peek into the buffer to see what we can see. 1365 */ 1366 isc_buffer_init(&source, ev->region.base, ev->region.length); 1367 isc_buffer_add(&source, ev->n); 1368 dres = dns_message_peekheader(&source, &id, &flags); 1369 if (dres != ISC_R_SUCCESS) { 1370 free_buffer(disp, ev->region.base, ev->region.length); 1371 dispatch_log(disp, LVL(10), "got garbage packet"); 1372 goto restart; 1373 } 1374 1375 dispatch_log(disp, LVL(92), 1376 "got valid DNS message header, /QR %c, id %u", 1377 ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id); 1378 1379 /* 1380 * Look at flags. If query, drop it. If response, 1381 * look to see where it goes. 1382 */ 1383 if ((flags & DNS_MESSAGEFLAG_QR) == 0) { 1384 /* query */ 1385 free_buffer(disp, ev->region.base, ev->region.length); 1386 goto restart; 1387 } 1388 1389 /* 1390 * Search for the corresponding response. If we are using an exclusive 1391 * socket, we've already identified it and we can skip the search; but 1392 * the ID and the address must match the expected ones. 1393 */ 1394 if (resp == NULL) { 1395 bucket = dns_hash(qid, &ev->address, id, disp->localport); 1396 LOCK(&qid->lock); 1397 qidlocked = ISC_TRUE; 1398 resp = entry_search(qid, &ev->address, id, disp->localport, 1399 bucket); 1400 dispatch_log(disp, LVL(90), 1401 "search for response in bucket %d: %s", 1402 bucket, (resp == NULL ? "not found" : "found")); 1403 1404 if (resp == NULL) { 1405 inc_stats(mgr, dns_resstatscounter_mismatch); 1406 free_buffer(disp, ev->region.base, ev->region.length); 1407 goto unlock; 1408 } 1409 } else if (resp->id != id || !isc_sockaddr_equal(&ev->address, 1410 &resp->host)) { 1411 dispatch_log(disp, LVL(90), 1412 "response to an exclusive socket doesn't match"); 1413 inc_stats(mgr, dns_resstatscounter_mismatch); 1414 free_buffer(disp, ev->region.base, ev->region.length); 1415 goto unlock; 1416 } 1417 1418 /* 1419 * Now that we have the original dispatch the query was sent 1420 * from check that the address and port the response was 1421 * sent to make sense. 1422 */ 1423 if (disp != resp->disp) { 1424 isc_sockaddr_t a1; 1425 isc_sockaddr_t a2; 1426 1427 /* 1428 * Check that the socket types and ports match. 1429 */ 1430 if (disp->socktype != resp->disp->socktype || 1431 isc_sockaddr_getport(&disp->local) != 1432 isc_sockaddr_getport(&resp->disp->local)) { 1433 free_buffer(disp, ev->region.base, ev->region.length); 1434 goto unlock; 1435 } 1436 1437 /* 1438 * If each dispatch is bound to a different address 1439 * then fail. 1440 * 1441 * Note under Linux a packet can be sent out via IPv4 socket 1442 * and the response be received via a IPv6 socket. 1443 * 1444 * Requests sent out via IPv6 should always come back in 1445 * via IPv6. 1446 */ 1447 if (isc_sockaddr_pf(&resp->disp->local) == PF_INET6 && 1448 isc_sockaddr_pf(&disp->local) != PF_INET6) { 1449 free_buffer(disp, ev->region.base, ev->region.length); 1450 goto unlock; 1451 } 1452 isc_sockaddr_anyofpf(&a1, isc_sockaddr_pf(&resp->disp->local)); 1453 isc_sockaddr_anyofpf(&a2, isc_sockaddr_pf(&disp->local)); 1454 if (!isc_sockaddr_eqaddr(&disp->local, &resp->disp->local) && 1455 !isc_sockaddr_eqaddr(&a1, &resp->disp->local) && 1456 !isc_sockaddr_eqaddr(&a2, &disp->local)) { 1457 free_buffer(disp, ev->region.base, ev->region.length); 1458 goto unlock; 1459 } 1460 } 1461 1462 sendresponse: 1463 queue_response = resp->item_out; 1464 rev = allocate_devent(resp->disp); 1465 if (rev == NULL) { 1466 free_buffer(disp, ev->region.base, ev->region.length); 1467 goto unlock; 1468 } 1469 1470 /* 1471 * At this point, rev contains the event we want to fill in, and 1472 * resp contains the information on the place to send it to. 1473 * Send the event off. 1474 */ 1475 isc_buffer_init(&rev->buffer, ev->region.base, ev->region.length); 1476 isc_buffer_add(&rev->buffer, ev->n); 1477 rev->result = ev->result; 1478 rev->id = id; 1479 rev->addr = ev->address; 1480 rev->pktinfo = ev->pktinfo; 1481 rev->attributes = ev->attributes; 1482 if (queue_response) { 1483 ISC_LIST_APPEND(resp->items, rev, ev_link); 1484 } else { 1485 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, 1486 DNS_EVENT_DISPATCH, 1487 resp->action, resp->arg, resp, NULL, NULL); 1488 request_log(disp, resp, LVL(90), 1489 "[a] Sent event %p buffer %p len %d to task %p", 1490 rev, rev->buffer.base, rev->buffer.length, 1491 resp->task); 1492 resp->item_out = ISC_TRUE; 1493 isc_task_send(resp->task, ISC_EVENT_PTR(&rev)); 1494 } 1495 unlock: 1496 if (qidlocked) 1497 UNLOCK(&qid->lock); 1498 1499 /* 1500 * Restart recv() to get the next packet. 1501 */ 1502 restart: 1503 result = startrecv(disp, dispsock); 1504 if (result != ISC_R_SUCCESS && dispsock != NULL) { 1505 /* 1506 * XXX: wired. There seems to be no recovery process other than 1507 * deactivate this socket anyway (since we cannot start 1508 * receiving, we won't be able to receive a cancel event 1509 * from the user). 1510 */ 1511 deactivate_dispsocket(disp, dispsock); 1512 } 1513 UNLOCK(&disp->lock); 1514 1515 isc_event_free(&ev_in); 1516} 1517 1518/* 1519 * General flow: 1520 * 1521 * If I/O result == CANCELED, EOF, or error, notify everyone as the 1522 * various queues drain. 1523 * 1524 * If query, restart. 1525 * 1526 * If response: 1527 * Allocate event, fill in details. 1528 * If cannot allocate, restart. 1529 * find target. If not found, restart. 1530 * if event queue is not empty, queue. else, send. 1531 * restart. 1532 */ 1533static void 1534tcp_recv(isc_task_t *task, isc_event_t *ev_in) { 1535 dns_dispatch_t *disp = ev_in->ev_arg; 1536 dns_tcpmsg_t *tcpmsg = &disp->tcpmsg; 1537 dns_messageid_t id; 1538 isc_result_t dres; 1539 unsigned int flags; 1540 dns_dispentry_t *resp; 1541 dns_dispatchevent_t *rev; 1542 unsigned int bucket; 1543 isc_boolean_t killit; 1544 isc_boolean_t queue_response; 1545 dns_qid_t *qid; 1546 int level; 1547 char buf[ISC_SOCKADDR_FORMATSIZE]; 1548 1549 UNUSED(task); 1550 1551 REQUIRE(VALID_DISPATCH(disp)); 1552 1553 qid = disp->qid; 1554 1555 dispatch_log(disp, LVL(90), 1556 "got TCP packet: requests %d, buffers %d, recvs %d", 1557 disp->requests, disp->tcpbuffers, disp->recv_pending); 1558 1559 LOCK(&disp->lock); 1560 1561 INSIST(disp->recv_pending != 0); 1562 disp->recv_pending = 0; 1563 1564 if (disp->refcount == 0) { 1565 /* 1566 * This dispatcher is shutting down. Force cancelation. 1567 */ 1568 tcpmsg->result = ISC_R_CANCELED; 1569 } 1570 1571 if (tcpmsg->result != ISC_R_SUCCESS) { 1572 switch (tcpmsg->result) { 1573 case ISC_R_CANCELED: 1574 break; 1575 1576 case ISC_R_EOF: 1577 dispatch_log(disp, LVL(90), "shutting down on EOF"); 1578 do_cancel(disp); 1579 break; 1580 1581 case ISC_R_CONNECTIONRESET: 1582 level = ISC_LOG_INFO; 1583 goto logit; 1584 1585 default: 1586 level = ISC_LOG_ERROR; 1587 logit: 1588 isc_sockaddr_format(&tcpmsg->address, buf, sizeof(buf)); 1589 dispatch_log(disp, level, "shutting down due to TCP " 1590 "receive error: %s: %s", buf, 1591 isc_result_totext(tcpmsg->result)); 1592 do_cancel(disp); 1593 break; 1594 } 1595 1596 /* 1597 * The event is statically allocated in the tcpmsg 1598 * structure, and destroy_disp() frees the tcpmsg, so we must 1599 * free the event *before* calling destroy_disp(). 1600 */ 1601 isc_event_free(&ev_in); 1602 1603 disp->shutting_down = 1; 1604 disp->shutdown_why = tcpmsg->result; 1605 1606 /* 1607 * If the recv() was canceled pass the word on. 1608 */ 1609 killit = destroy_disp_ok(disp); 1610 UNLOCK(&disp->lock); 1611 if (killit) 1612 isc_task_send(disp->task[0], &disp->ctlevent); 1613 return; 1614 } 1615 1616 dispatch_log(disp, LVL(90), "result %d, length == %d, addr = %p", 1617 tcpmsg->result, 1618 tcpmsg->buffer.length, tcpmsg->buffer.base); 1619 1620 /* 1621 * Peek into the buffer to see what we can see. 1622 */ 1623 dres = dns_message_peekheader(&tcpmsg->buffer, &id, &flags); 1624 if (dres != ISC_R_SUCCESS) { 1625 dispatch_log(disp, LVL(10), "got garbage packet"); 1626 goto restart; 1627 } 1628 1629 dispatch_log(disp, LVL(92), 1630 "got valid DNS message header, /QR %c, id %u", 1631 ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id); 1632 1633 /* 1634 * Allocate an event to send to the query or response client, and 1635 * allocate a new buffer for our use. 1636 */ 1637 1638 /* 1639 * Look at flags. If query, drop it. If response, 1640 * look to see where it goes. 1641 */ 1642 if ((flags & DNS_MESSAGEFLAG_QR) == 0) { 1643 /* 1644 * Query. 1645 */ 1646 goto restart; 1647 } 1648 1649 /* 1650 * Response. 1651 */ 1652 bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport); 1653 LOCK(&qid->lock); 1654 resp = entry_search(qid, &tcpmsg->address, id, disp->localport, bucket); 1655 dispatch_log(disp, LVL(90), 1656 "search for response in bucket %d: %s", 1657 bucket, (resp == NULL ? "not found" : "found")); 1658 1659 if (resp == NULL) 1660 goto unlock; 1661 queue_response = resp->item_out; 1662 rev = allocate_devent(disp); 1663 if (rev == NULL) 1664 goto unlock; 1665 1666 /* 1667 * At this point, rev contains the event we want to fill in, and 1668 * resp contains the information on the place to send it to. 1669 * Send the event off. 1670 */ 1671 dns_tcpmsg_keepbuffer(tcpmsg, &rev->buffer); 1672 disp->tcpbuffers++; 1673 rev->result = ISC_R_SUCCESS; 1674 rev->id = id; 1675 rev->addr = tcpmsg->address; 1676 if (queue_response) { 1677 ISC_LIST_APPEND(resp->items, rev, ev_link); 1678 } else { 1679 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH, 1680 resp->action, resp->arg, resp, NULL, NULL); 1681 request_log(disp, resp, LVL(90), 1682 "[b] Sent event %p buffer %p len %d to task %p", 1683 rev, rev->buffer.base, rev->buffer.length, 1684 resp->task); 1685 resp->item_out = ISC_TRUE; 1686 isc_task_send(resp->task, ISC_EVENT_PTR(&rev)); 1687 } 1688 unlock: 1689 UNLOCK(&qid->lock); 1690 1691 /* 1692 * Restart recv() to get the next packet. 1693 */ 1694 restart: 1695 (void)startrecv(disp, NULL); 1696 1697 UNLOCK(&disp->lock); 1698 1699 isc_event_free(&ev_in); 1700} 1701 1702/* 1703 * disp must be locked. 1704 */ 1705static isc_result_t 1706startrecv(dns_dispatch_t *disp, dispsocket_t *dispsock) { 1707 isc_result_t res; 1708 isc_region_t region; 1709 isc_socket_t *socket; 1710 1711 if (disp->shutting_down == 1) 1712 return (ISC_R_SUCCESS); 1713 1714 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) 1715 return (ISC_R_SUCCESS); 1716 1717 if (disp->recv_pending != 0 && dispsock == NULL) 1718 return (ISC_R_SUCCESS); 1719 1720 if (disp->mgr->buffers >= disp->mgr->maxbuffers) 1721 return (ISC_R_NOMEMORY); 1722 1723 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 && 1724 dispsock == NULL) 1725 return (ISC_R_SUCCESS); 1726 1727 if (dispsock != NULL) 1728 socket = dispsock->socket; 1729 else 1730 socket = disp->socket; 1731 INSIST(socket != NULL); 1732 1733 switch (disp->socktype) { 1734 /* 1735 * UDP reads are always maximal. 1736 */ 1737 case isc_sockettype_udp: 1738 region.length = disp->mgr->buffersize; 1739 region.base = allocate_udp_buffer(disp); 1740 if (region.base == NULL) 1741 return (ISC_R_NOMEMORY); 1742 if (dispsock != NULL) { 1743 isc_task_t *dt = dispsock->task; 1744 isc_socketevent_t *sev = 1745 allocate_sevent(disp, socket, 1746 ISC_SOCKEVENT_RECVDONE, 1747 udp_exrecv, dispsock); 1748 if (sev == NULL) { 1749 free_buffer(disp, region.base, region.length); 1750 return (ISC_R_NOMEMORY); 1751 } 1752 1753 res = isc_socket_recv2(socket, ®ion, 1, dt, sev, 0); 1754 if (res != ISC_R_SUCCESS) { 1755 free_buffer(disp, region.base, region.length); 1756 return (res); 1757 } 1758 } else { 1759 isc_task_t *dt = disp->task[0]; 1760 isc_socketevent_t *sev = 1761 allocate_sevent(disp, socket, 1762 ISC_SOCKEVENT_RECVDONE, 1763 udp_shrecv, disp); 1764 if (sev == NULL) { 1765 free_buffer(disp, region.base, region.length); 1766 return (ISC_R_NOMEMORY); 1767 } 1768 1769 res = isc_socket_recv2(socket, ®ion, 1, dt, sev, 0); 1770 if (res != ISC_R_SUCCESS) { 1771 free_buffer(disp, region.base, region.length); 1772 disp->shutdown_why = res; 1773 disp->shutting_down = 1; 1774 do_cancel(disp); 1775 return (ISC_R_SUCCESS); /* recover by cancel */ 1776 } 1777 INSIST(disp->recv_pending == 0); 1778 disp->recv_pending = 1; 1779 } 1780 break; 1781 1782 case isc_sockettype_tcp: 1783 res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task[0], 1784 tcp_recv, disp); 1785 if (res != ISC_R_SUCCESS) { 1786 disp->shutdown_why = res; 1787 disp->shutting_down = 1; 1788 do_cancel(disp); 1789 return (ISC_R_SUCCESS); /* recover by cancel */ 1790 } 1791 INSIST(disp->recv_pending == 0); 1792 disp->recv_pending = 1; 1793 break; 1794 default: 1795 INSIST(0); 1796 break; 1797 } 1798 1799 return (ISC_R_SUCCESS); 1800} 1801 1802/* 1803 * Mgr must be locked when calling this function. 1804 */ 1805static isc_boolean_t 1806destroy_mgr_ok(dns_dispatchmgr_t *mgr) { 1807 mgr_log(mgr, LVL(90), 1808 "destroy_mgr_ok: shuttingdown=%d, listnonempty=%d, " 1809 "depool=%d, rpool=%d, dpool=%d", 1810 MGR_IS_SHUTTINGDOWN(mgr), !ISC_LIST_EMPTY(mgr->list), 1811 isc_mempool_getallocated(mgr->depool), 1812 isc_mempool_getallocated(mgr->rpool), 1813 isc_mempool_getallocated(mgr->dpool)); 1814 if (!MGR_IS_SHUTTINGDOWN(mgr)) 1815 return (ISC_FALSE); 1816 if (!ISC_LIST_EMPTY(mgr->list)) 1817 return (ISC_FALSE); 1818 if (isc_mempool_getallocated(mgr->depool) != 0) 1819 return (ISC_FALSE); 1820 if (isc_mempool_getallocated(mgr->rpool) != 0) 1821 return (ISC_FALSE); 1822 if (isc_mempool_getallocated(mgr->dpool) != 0) 1823 return (ISC_FALSE); 1824 1825 return (ISC_TRUE); 1826} 1827 1828/* 1829 * Mgr must be unlocked when calling this function. 1830 */ 1831static void 1832destroy_mgr(dns_dispatchmgr_t **mgrp) { 1833 isc_mem_t *mctx; 1834 dns_dispatchmgr_t *mgr; 1835 1836 mgr = *mgrp; 1837 *mgrp = NULL; 1838 1839 mctx = mgr->mctx; 1840 1841 mgr->magic = 0; 1842 mgr->mctx = NULL; 1843 DESTROYLOCK(&mgr->lock); 1844 mgr->state = 0; 1845 1846 DESTROYLOCK(&mgr->arc4_lock); 1847 1848 isc_mempool_destroy(&mgr->depool); 1849 isc_mempool_destroy(&mgr->rpool); 1850 isc_mempool_destroy(&mgr->dpool); 1851 if (mgr->bpool != NULL) 1852 isc_mempool_destroy(&mgr->bpool); 1853 if (mgr->spool != NULL) 1854 isc_mempool_destroy(&mgr->spool); 1855 1856 DESTROYLOCK(&mgr->spool_lock); 1857 DESTROYLOCK(&mgr->bpool_lock); 1858 DESTROYLOCK(&mgr->dpool_lock); 1859 DESTROYLOCK(&mgr->rpool_lock); 1860 DESTROYLOCK(&mgr->depool_lock); 1861 1862#ifdef BIND9 1863 if (mgr->entropy != NULL) 1864 isc_entropy_detach(&mgr->entropy); 1865#endif /* BIND9 */ 1866 if (mgr->qid != NULL) 1867 qid_destroy(mctx, &mgr->qid); 1868 1869 DESTROYLOCK(&mgr->buffer_lock); 1870 1871 if (mgr->blackhole != NULL) 1872 dns_acl_detach(&mgr->blackhole); 1873 1874 if (mgr->stats != NULL) 1875 isc_stats_detach(&mgr->stats); 1876 1877 if (mgr->v4ports != NULL) { 1878 isc_mem_put(mctx, mgr->v4ports, 1879 mgr->nv4ports * sizeof(in_port_t)); 1880 } 1881 if (mgr->v6ports != NULL) { 1882 isc_mem_put(mctx, mgr->v6ports, 1883 mgr->nv6ports * sizeof(in_port_t)); 1884 } 1885 isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t)); 1886 isc_mem_detach(&mctx); 1887} 1888 1889static isc_result_t 1890open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local, 1891 unsigned int options, isc_socket_t **sockp, 1892 isc_socket_t *dup_socket) 1893{ 1894 isc_socket_t *sock; 1895 isc_result_t result; 1896 1897 sock = *sockp; 1898 if (sock != NULL) { 1899#ifdef BIND9 1900 result = isc_socket_open(sock); 1901 if (result != ISC_R_SUCCESS) 1902 return (result); 1903#else 1904 INSIST(0); 1905#endif 1906 } else if (dup_socket != NULL) { 1907 result = isc_socket_dup(dup_socket, &sock); 1908 if (result != ISC_R_SUCCESS) 1909 return (result); 1910 1911 isc_socket_setname(sock, "dispatcher", NULL); 1912 *sockp = sock; 1913 return (ISC_R_SUCCESS); 1914 } else { 1915 result = isc_socket_create(mgr, isc_sockaddr_pf(local), 1916 isc_sockettype_udp, &sock); 1917 if (result != ISC_R_SUCCESS) 1918 return (result); 1919 } 1920 1921 isc_socket_setname(sock, "dispatcher", NULL); 1922 1923#ifndef ISC_ALLOW_MAPPED 1924 isc_socket_ipv6only(sock, ISC_TRUE); 1925#endif 1926 result = isc_socket_bind(sock, local, options); 1927 if (result != ISC_R_SUCCESS) { 1928 if (*sockp == NULL) 1929 isc_socket_detach(&sock); 1930 else { 1931#ifdef BIND9 1932 isc_socket_close(sock); 1933#else 1934 INSIST(0); 1935#endif 1936 } 1937 return (result); 1938 } 1939 1940 *sockp = sock; 1941 return (ISC_R_SUCCESS); 1942} 1943 1944/*% 1945 * Create a temporary port list to set the initial default set of dispatch 1946 * ports: [1024, 65535]. This is almost meaningless as the application will 1947 * normally set the ports explicitly, but is provided to fill some minor corner 1948 * cases. 1949 */ 1950static isc_result_t 1951create_default_portset(isc_mem_t *mctx, isc_portset_t **portsetp) { 1952 isc_result_t result; 1953 1954 result = isc_portset_create(mctx, portsetp); 1955 if (result != ISC_R_SUCCESS) 1956 return (result); 1957 isc_portset_addrange(*portsetp, 1024, 65535); 1958 1959 return (ISC_R_SUCCESS); 1960} 1961 1962/* 1963 * Publics. 1964 */ 1965 1966isc_result_t 1967dns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy, 1968 dns_dispatchmgr_t **mgrp) 1969{ 1970 dns_dispatchmgr_t *mgr; 1971 isc_result_t result; 1972 isc_portset_t *v4portset = NULL; 1973 isc_portset_t *v6portset = NULL; 1974 1975 REQUIRE(mctx != NULL); 1976 REQUIRE(mgrp != NULL && *mgrp == NULL); 1977 1978 mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t)); 1979 if (mgr == NULL) 1980 return (ISC_R_NOMEMORY); 1981 1982 mgr->mctx = NULL; 1983 isc_mem_attach(mctx, &mgr->mctx); 1984 1985 mgr->blackhole = NULL; 1986 mgr->stats = NULL; 1987 1988 result = isc_mutex_init(&mgr->lock); 1989 if (result != ISC_R_SUCCESS) 1990 goto deallocate; 1991 1992 result = isc_mutex_init(&mgr->arc4_lock); 1993 if (result != ISC_R_SUCCESS) 1994 goto kill_lock; 1995 1996 result = isc_mutex_init(&mgr->buffer_lock); 1997 if (result != ISC_R_SUCCESS) 1998 goto kill_arc4_lock; 1999 2000 result = isc_mutex_init(&mgr->depool_lock); 2001 if (result != ISC_R_SUCCESS) 2002 goto kill_buffer_lock; 2003 2004 result = isc_mutex_init(&mgr->rpool_lock); 2005 if (result != ISC_R_SUCCESS) 2006 goto kill_depool_lock; 2007 2008 result = isc_mutex_init(&mgr->dpool_lock); 2009 if (result != ISC_R_SUCCESS) 2010 goto kill_rpool_lock; 2011 2012 result = isc_mutex_init(&mgr->bpool_lock); 2013 if (result != ISC_R_SUCCESS) 2014 goto kill_dpool_lock; 2015 2016 result = isc_mutex_init(&mgr->spool_lock); 2017 if (result != ISC_R_SUCCESS) 2018 goto kill_bpool_lock; 2019 2020 mgr->depool = NULL; 2021 if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatchevent_t), 2022 &mgr->depool) != ISC_R_SUCCESS) { 2023 result = ISC_R_NOMEMORY; 2024 goto kill_spool_lock; 2025 } 2026 2027 mgr->rpool = NULL; 2028 if (isc_mempool_create(mgr->mctx, sizeof(dns_dispentry_t), 2029 &mgr->rpool) != ISC_R_SUCCESS) { 2030 result = ISC_R_NOMEMORY; 2031 goto kill_depool; 2032 } 2033 2034 mgr->dpool = NULL; 2035 if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatch_t), 2036 &mgr->dpool) != ISC_R_SUCCESS) { 2037 result = ISC_R_NOMEMORY; 2038 goto kill_rpool; 2039 } 2040 2041 isc_mempool_setname(mgr->depool, "dispmgr_depool"); 2042 isc_mempool_setmaxalloc(mgr->depool, 32768); 2043 isc_mempool_setfreemax(mgr->depool, 32768); 2044 isc_mempool_associatelock(mgr->depool, &mgr->depool_lock); 2045 isc_mempool_setfillcount(mgr->depool, 256); 2046 2047 isc_mempool_setname(mgr->rpool, "dispmgr_rpool"); 2048 isc_mempool_setmaxalloc(mgr->rpool, 32768); 2049 isc_mempool_setfreemax(mgr->rpool, 32768); 2050 isc_mempool_associatelock(mgr->rpool, &mgr->rpool_lock); 2051 isc_mempool_setfillcount(mgr->rpool, 256); 2052 2053 isc_mempool_setname(mgr->dpool, "dispmgr_dpool"); 2054 isc_mempool_setmaxalloc(mgr->dpool, 32768); 2055 isc_mempool_setfreemax(mgr->dpool, 32768); 2056 isc_mempool_associatelock(mgr->dpool, &mgr->dpool_lock); 2057 isc_mempool_setfillcount(mgr->dpool, 256); 2058 2059 mgr->buffers = 0; 2060 mgr->buffersize = 0; 2061 mgr->maxbuffers = 0; 2062 mgr->bpool = NULL; 2063 mgr->spool = NULL; 2064 mgr->entropy = NULL; 2065 mgr->qid = NULL; 2066 mgr->state = 0; 2067 ISC_LIST_INIT(mgr->list); 2068 mgr->v4ports = NULL; 2069 mgr->v6ports = NULL; 2070 mgr->nv4ports = 0; 2071 mgr->nv6ports = 0; 2072 mgr->magic = DNS_DISPATCHMGR_MAGIC; 2073 2074 result = create_default_portset(mctx, &v4portset); 2075 if (result == ISC_R_SUCCESS) { 2076 result = create_default_portset(mctx, &v6portset); 2077 if (result == ISC_R_SUCCESS) { 2078 result = dns_dispatchmgr_setavailports(mgr, 2079 v4portset, 2080 v6portset); 2081 } 2082 } 2083 if (v4portset != NULL) 2084 isc_portset_destroy(mctx, &v4portset); 2085 if (v6portset != NULL) 2086 isc_portset_destroy(mctx, &v6portset); 2087 if (result != ISC_R_SUCCESS) 2088 goto kill_dpool; 2089 2090#ifdef BIND9 2091 if (entropy != NULL) 2092 isc_entropy_attach(entropy, &mgr->entropy); 2093#else 2094 UNUSED(entropy); 2095#endif 2096 2097 dispatch_initrandom(&mgr->arc4ctx, mgr->entropy, &mgr->arc4_lock); 2098 2099 *mgrp = mgr; 2100 return (ISC_R_SUCCESS); 2101 2102 kill_dpool: 2103 isc_mempool_destroy(&mgr->dpool); 2104 kill_rpool: 2105 isc_mempool_destroy(&mgr->rpool); 2106 kill_depool: 2107 isc_mempool_destroy(&mgr->depool); 2108 kill_spool_lock: 2109 DESTROYLOCK(&mgr->spool_lock); 2110 kill_bpool_lock: 2111 DESTROYLOCK(&mgr->bpool_lock); 2112 kill_dpool_lock: 2113 DESTROYLOCK(&mgr->dpool_lock); 2114 kill_rpool_lock: 2115 DESTROYLOCK(&mgr->rpool_lock); 2116 kill_depool_lock: 2117 DESTROYLOCK(&mgr->depool_lock); 2118 kill_buffer_lock: 2119 DESTROYLOCK(&mgr->buffer_lock); 2120 kill_arc4_lock: 2121 DESTROYLOCK(&mgr->arc4_lock); 2122 kill_lock: 2123 DESTROYLOCK(&mgr->lock); 2124 deallocate: 2125 isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t)); 2126 isc_mem_detach(&mctx); 2127 2128 return (result); 2129} 2130 2131void 2132dns_dispatchmgr_setblackhole(dns_dispatchmgr_t *mgr, dns_acl_t *blackhole) { 2133 REQUIRE(VALID_DISPATCHMGR(mgr)); 2134 if (mgr->blackhole != NULL) 2135 dns_acl_detach(&mgr->blackhole); 2136 dns_acl_attach(blackhole, &mgr->blackhole); 2137} 2138 2139dns_acl_t * 2140dns_dispatchmgr_getblackhole(dns_dispatchmgr_t *mgr) { 2141 REQUIRE(VALID_DISPATCHMGR(mgr)); 2142 return (mgr->blackhole); 2143} 2144 2145void 2146dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t *mgr, 2147 dns_portlist_t *portlist) 2148{ 2149 REQUIRE(VALID_DISPATCHMGR(mgr)); 2150 UNUSED(portlist); 2151 2152 /* This function is deprecated: use dns_dispatchmgr_setavailports(). */ 2153 return; 2154} 2155 2156dns_portlist_t * 2157dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t *mgr) { 2158 REQUIRE(VALID_DISPATCHMGR(mgr)); 2159 return (NULL); /* this function is deprecated */ 2160} 2161 2162isc_result_t 2163dns_dispatchmgr_setavailports(dns_dispatchmgr_t *mgr, isc_portset_t *v4portset, 2164 isc_portset_t *v6portset) 2165{ 2166 in_port_t *v4ports, *v6ports, p; 2167 unsigned int nv4ports, nv6ports, i4, i6; 2168 2169 REQUIRE(VALID_DISPATCHMGR(mgr)); 2170 2171 nv4ports = isc_portset_nports(v4portset); 2172 nv6ports = isc_portset_nports(v6portset); 2173 2174 v4ports = NULL; 2175 if (nv4ports != 0) { 2176 v4ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv4ports); 2177 if (v4ports == NULL) 2178 return (ISC_R_NOMEMORY); 2179 } 2180 v6ports = NULL; 2181 if (nv6ports != 0) { 2182 v6ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv6ports); 2183 if (v6ports == NULL) { 2184 if (v4ports != NULL) { 2185 isc_mem_put(mgr->mctx, v4ports, 2186 sizeof(in_port_t) * 2187 isc_portset_nports(v4portset)); 2188 } 2189 return (ISC_R_NOMEMORY); 2190 } 2191 } 2192 2193 p = 0; 2194 i4 = 0; 2195 i6 = 0; 2196 do { 2197 if (isc_portset_isset(v4portset, p)) { 2198 INSIST(i4 < nv4ports); 2199 v4ports[i4++] = p; 2200 } 2201 if (isc_portset_isset(v6portset, p)) { 2202 INSIST(i6 < nv6ports); 2203 v6ports[i6++] = p; 2204 } 2205 } while (p++ < 65535); 2206 INSIST(i4 == nv4ports && i6 == nv6ports); 2207 2208 PORTBUFLOCK(mgr); 2209 if (mgr->v4ports != NULL) { 2210 isc_mem_put(mgr->mctx, mgr->v4ports, 2211 mgr->nv4ports * sizeof(in_port_t)); 2212 } 2213 mgr->v4ports = v4ports; 2214 mgr->nv4ports = nv4ports; 2215 2216 if (mgr->v6ports != NULL) { 2217 isc_mem_put(mgr->mctx, mgr->v6ports, 2218 mgr->nv6ports * sizeof(in_port_t)); 2219 } 2220 mgr->v6ports = v6ports; 2221 mgr->nv6ports = nv6ports; 2222 PORTBUFUNLOCK(mgr); 2223 2224 return (ISC_R_SUCCESS); 2225} 2226 2227static isc_result_t 2228dns_dispatchmgr_setudp(dns_dispatchmgr_t *mgr, 2229 unsigned int buffersize, unsigned int maxbuffers, 2230 unsigned int maxrequests, unsigned int buckets, 2231 unsigned int increment) 2232{ 2233 isc_result_t result; 2234 2235 REQUIRE(VALID_DISPATCHMGR(mgr)); 2236 REQUIRE(buffersize >= 512 && buffersize < (64 * 1024)); 2237 REQUIRE(maxbuffers > 0); 2238 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */ 2239 REQUIRE(increment > buckets); 2240 2241 /* 2242 * Keep some number of items around. This should be a config 2243 * option. For now, keep 8, but later keep at least two even 2244 * if the caller wants less. This allows us to ensure certain 2245 * things, like an event can be "freed" and the next allocation 2246 * will always succeed. 2247 * 2248 * Note that if limits are placed on anything here, we use one 2249 * event internally, so the actual limit should be "wanted + 1." 2250 * 2251 * XXXMLG 2252 */ 2253 2254 if (maxbuffers < 8) 2255 maxbuffers = 8; 2256 2257 LOCK(&mgr->buffer_lock); 2258 2259 /* Create or adjust buffer pool */ 2260 if (mgr->bpool != NULL) { 2261 /* 2262 * We only increase the maxbuffers to avoid accidental buffer 2263 * shortage. Ideally we'd separate the manager-wide maximum 2264 * from per-dispatch limits and respect the latter within the 2265 * global limit. But at this moment that's deemed to be 2266 * overkilling and isn't worth additional implementation 2267 * complexity. 2268 */ 2269 if (maxbuffers > mgr->maxbuffers) { 2270 isc_mempool_setmaxalloc(mgr->bpool, maxbuffers); 2271 isc_mempool_setfreemax(mgr->bpool, maxbuffers); 2272 mgr->maxbuffers = maxbuffers; 2273 } 2274 } else { 2275 result = isc_mempool_create(mgr->mctx, buffersize, &mgr->bpool); 2276 if (result != ISC_R_SUCCESS) { 2277 UNLOCK(&mgr->buffer_lock); 2278 return (result); 2279 } 2280 isc_mempool_setname(mgr->bpool, "dispmgr_bpool"); 2281 isc_mempool_setmaxalloc(mgr->bpool, maxbuffers); 2282 isc_mempool_setfreemax(mgr->bpool, maxbuffers); 2283 isc_mempool_associatelock(mgr->bpool, &mgr->bpool_lock); 2284 isc_mempool_setfillcount(mgr->bpool, 256); 2285 } 2286 2287 /* Create or adjust socket pool */ 2288 if (mgr->spool != NULL) { 2289 if (maxrequests < DNS_DISPATCH_POOLSOCKS * 2) 2290 isc_mempool_setmaxalloc(mgr->spool, DNS_DISPATCH_POOLSOCKS * 2); 2291 isc_mempool_setfreemax(mgr->spool, DNS_DISPATCH_POOLSOCKS * 2); 2292 UNLOCK(&mgr->buffer_lock); 2293 return (ISC_R_SUCCESS); 2294 } 2295 result = isc_mempool_create(mgr->mctx, sizeof(dispsocket_t), 2296 &mgr->spool); 2297 if (result != ISC_R_SUCCESS) { 2298 UNLOCK(&mgr->buffer_lock); 2299 goto cleanup; 2300 } 2301 isc_mempool_setname(mgr->spool, "dispmgr_spool"); 2302 isc_mempool_setmaxalloc(mgr->spool, maxrequests); 2303 isc_mempool_setfreemax(mgr->spool, maxrequests); 2304 isc_mempool_associatelock(mgr->spool, &mgr->spool_lock); 2305 isc_mempool_setfillcount(mgr->spool, 256); 2306 2307 result = qid_allocate(mgr, buckets, increment, &mgr->qid, ISC_TRUE); 2308 if (result != ISC_R_SUCCESS) 2309 goto cleanup; 2310 2311 mgr->buffersize = buffersize; 2312 mgr->maxbuffers = maxbuffers; 2313 UNLOCK(&mgr->buffer_lock); 2314 return (ISC_R_SUCCESS); 2315 2316 cleanup: 2317 isc_mempool_destroy(&mgr->bpool); 2318 if (mgr->spool != NULL) 2319 isc_mempool_destroy(&mgr->spool); 2320 UNLOCK(&mgr->buffer_lock); 2321 return (result); 2322} 2323 2324void 2325dns_dispatchmgr_destroy(dns_dispatchmgr_t **mgrp) { 2326 dns_dispatchmgr_t *mgr; 2327 isc_boolean_t killit; 2328 2329 REQUIRE(mgrp != NULL); 2330 REQUIRE(VALID_DISPATCHMGR(*mgrp)); 2331 2332 mgr = *mgrp; 2333 *mgrp = NULL; 2334 2335 LOCK(&mgr->lock); 2336 mgr->state |= MGR_SHUTTINGDOWN; 2337 2338 killit = destroy_mgr_ok(mgr); 2339 UNLOCK(&mgr->lock); 2340 2341 mgr_log(mgr, LVL(90), "destroy: killit=%d", killit); 2342 2343 if (killit) 2344 destroy_mgr(&mgr); 2345} 2346 2347void 2348dns_dispatchmgr_setstats(dns_dispatchmgr_t *mgr, isc_stats_t *stats) { 2349 REQUIRE(VALID_DISPATCHMGR(mgr)); 2350 REQUIRE(ISC_LIST_EMPTY(mgr->list)); 2351 REQUIRE(mgr->stats == NULL); 2352 2353 isc_stats_attach(stats, &mgr->stats); 2354} 2355 2356static int 2357port_cmp(const void *key, const void *ent) { 2358 in_port_t p1 = *(const in_port_t *)key; 2359 in_port_t p2 = *(const in_port_t *)ent; 2360 2361 if (p1 < p2) 2362 return (-1); 2363 else if (p1 == p2) 2364 return (0); 2365 else 2366 return (1); 2367} 2368 2369static isc_boolean_t 2370portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock, 2371 isc_sockaddr_t *sockaddrp) 2372{ 2373 isc_sockaddr_t sockaddr; 2374 isc_result_t result; 2375 in_port_t *ports, port; 2376 unsigned int nports; 2377 isc_boolean_t available = ISC_FALSE; 2378 2379 REQUIRE(sock != NULL || sockaddrp != NULL); 2380 2381 PORTBUFLOCK(mgr); 2382 if (sock != NULL) { 2383 sockaddrp = &sockaddr; 2384 result = isc_socket_getsockname(sock, sockaddrp); 2385 if (result != ISC_R_SUCCESS) 2386 goto unlock; 2387 } 2388 2389 if (isc_sockaddr_pf(sockaddrp) == AF_INET) { 2390 ports = mgr->v4ports; 2391 nports = mgr->nv4ports; 2392 } else { 2393 ports = mgr->v6ports; 2394 nports = mgr->nv6ports; 2395 } 2396 if (ports == NULL) 2397 goto unlock; 2398 2399 port = isc_sockaddr_getport(sockaddrp); 2400 if (bsearch(&port, ports, nports, sizeof(in_port_t), port_cmp) != NULL) 2401 available = ISC_TRUE; 2402 2403unlock: 2404 PORTBUFUNLOCK(mgr); 2405 return (available); 2406} 2407 2408#define ATTRMATCH(_a1, _a2, _mask) (((_a1) & (_mask)) == ((_a2) & (_mask))) 2409 2410static isc_boolean_t 2411local_addr_match(dns_dispatch_t *disp, isc_sockaddr_t *addr) { 2412 isc_sockaddr_t sockaddr; 2413 isc_result_t result; 2414 2415 REQUIRE(disp->socket != NULL); 2416 2417 if (addr == NULL) 2418 return (ISC_TRUE); 2419 2420 /* 2421 * Don't match wildcard ports unless the port is available in the 2422 * current configuration. 2423 */ 2424 if (isc_sockaddr_getport(addr) == 0 && 2425 isc_sockaddr_getport(&disp->local) == 0 && 2426 !portavailable(disp->mgr, disp->socket, NULL)) { 2427 return (ISC_FALSE); 2428 } 2429 2430 /* 2431 * Check if we match the binding <address,port>. 2432 * Wildcard ports match/fail here. 2433 */ 2434 if (isc_sockaddr_equal(&disp->local, addr)) 2435 return (ISC_TRUE); 2436 if (isc_sockaddr_getport(addr) == 0) 2437 return (ISC_FALSE); 2438 2439 /* 2440 * Check if we match a bound wildcard port <address,port>. 2441 */ 2442 if (!isc_sockaddr_eqaddr(&disp->local, addr)) 2443 return (ISC_FALSE); 2444 result = isc_socket_getsockname(disp->socket, &sockaddr); 2445 if (result != ISC_R_SUCCESS) 2446 return (ISC_FALSE); 2447 2448 return (isc_sockaddr_equal(&sockaddr, addr)); 2449} 2450 2451/* 2452 * Requires mgr be locked. 2453 * 2454 * No dispatcher can be locked by this thread when calling this function. 2455 * 2456 * 2457 * NOTE: 2458 * If a matching dispatcher is found, it is locked after this function 2459 * returns, and must be unlocked by the caller. 2460 */ 2461static isc_result_t 2462dispatch_find(dns_dispatchmgr_t *mgr, isc_sockaddr_t *local, 2463 unsigned int attributes, unsigned int mask, 2464 dns_dispatch_t **dispp) 2465{ 2466 dns_dispatch_t *disp; 2467 isc_result_t result; 2468 2469 /* 2470 * Make certain that we will not match a private or exclusive dispatch. 2471 */ 2472 attributes &= ~(DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE); 2473 mask |= (DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE); 2474 2475 disp = ISC_LIST_HEAD(mgr->list); 2476 while (disp != NULL) { 2477 LOCK(&disp->lock); 2478 if ((disp->shutting_down == 0) 2479 && ATTRMATCH(disp->attributes, attributes, mask) 2480 && local_addr_match(disp, local)) 2481 break; 2482 UNLOCK(&disp->lock); 2483 disp = ISC_LIST_NEXT(disp, link); 2484 } 2485 2486 if (disp == NULL) { 2487 result = ISC_R_NOTFOUND; 2488 goto out; 2489 } 2490 2491 *dispp = disp; 2492 result = ISC_R_SUCCESS; 2493 out: 2494 2495 return (result); 2496} 2497 2498static isc_result_t 2499qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets, 2500 unsigned int increment, dns_qid_t **qidp, 2501 isc_boolean_t needsocktable) 2502{ 2503 dns_qid_t *qid; 2504 unsigned int i; 2505 isc_result_t result; 2506 2507 REQUIRE(VALID_DISPATCHMGR(mgr)); 2508 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */ 2509 REQUIRE(increment > buckets); 2510 REQUIRE(qidp != NULL && *qidp == NULL); 2511 2512 qid = isc_mem_get(mgr->mctx, sizeof(*qid)); 2513 if (qid == NULL) 2514 return (ISC_R_NOMEMORY); 2515 2516 qid->qid_table = isc_mem_get(mgr->mctx, 2517 buckets * sizeof(dns_displist_t)); 2518 if (qid->qid_table == NULL) { 2519 isc_mem_put(mgr->mctx, qid, sizeof(*qid)); 2520 return (ISC_R_NOMEMORY); 2521 } 2522 2523 qid->sock_table = NULL; 2524 if (needsocktable) { 2525 qid->sock_table = isc_mem_get(mgr->mctx, buckets * 2526 sizeof(dispsocketlist_t)); 2527 if (qid->sock_table == NULL) { 2528 isc_mem_put(mgr->mctx, qid->qid_table, 2529 buckets * sizeof(dns_displist_t)); 2530 isc_mem_put(mgr->mctx, qid, sizeof(*qid)); 2531 return (ISC_R_NOMEMORY); 2532 } 2533 } 2534 2535 result = isc_mutex_init(&qid->lock); 2536 if (result != ISC_R_SUCCESS) { 2537 if (qid->sock_table != NULL) { 2538 isc_mem_put(mgr->mctx, qid->sock_table, 2539 buckets * sizeof(dispsocketlist_t)); 2540 } 2541 isc_mem_put(mgr->mctx, qid->qid_table, 2542 buckets * sizeof(dns_displist_t)); 2543 isc_mem_put(mgr->mctx, qid, sizeof(*qid)); 2544 return (result); 2545 } 2546 2547 for (i = 0; i < buckets; i++) { 2548 ISC_LIST_INIT(qid->qid_table[i]); 2549 if (qid->sock_table != NULL) 2550 ISC_LIST_INIT(qid->sock_table[i]); 2551 } 2552 2553 qid->qid_nbuckets = buckets; 2554 qid->qid_increment = increment; 2555 qid->magic = QID_MAGIC; 2556 *qidp = qid; 2557 return (ISC_R_SUCCESS); 2558} 2559 2560static void 2561qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp) { 2562 dns_qid_t *qid; 2563 2564 REQUIRE(qidp != NULL); 2565 qid = *qidp; 2566 2567 REQUIRE(VALID_QID(qid)); 2568 2569 *qidp = NULL; 2570 qid->magic = 0; 2571 isc_mem_put(mctx, qid->qid_table, 2572 qid->qid_nbuckets * sizeof(dns_displist_t)); 2573 if (qid->sock_table != NULL) { 2574 isc_mem_put(mctx, qid->sock_table, 2575 qid->qid_nbuckets * sizeof(dispsocketlist_t)); 2576 } 2577 DESTROYLOCK(&qid->lock); 2578 isc_mem_put(mctx, qid, sizeof(*qid)); 2579} 2580 2581/* 2582 * Allocate and set important limits. 2583 */ 2584static isc_result_t 2585dispatch_allocate(dns_dispatchmgr_t *mgr, unsigned int maxrequests, 2586 dns_dispatch_t **dispp) 2587{ 2588 dns_dispatch_t *disp; 2589 isc_result_t result; 2590 2591 REQUIRE(VALID_DISPATCHMGR(mgr)); 2592 REQUIRE(dispp != NULL && *dispp == NULL); 2593 2594 /* 2595 * Set up the dispatcher, mostly. Don't bother setting some of 2596 * the options that are controlled by tcp vs. udp, etc. 2597 */ 2598 2599 disp = isc_mempool_get(mgr->dpool); 2600 if (disp == NULL) 2601 return (ISC_R_NOMEMORY); 2602 2603 disp->magic = 0; 2604 disp->mgr = mgr; 2605 disp->maxrequests = maxrequests; 2606 disp->attributes = 0; 2607 ISC_LINK_INIT(disp, link); 2608 disp->refcount = 1; 2609 disp->recv_pending = 0; 2610 memset(&disp->local, 0, sizeof(disp->local)); 2611 disp->localport = 0; 2612 disp->shutting_down = 0; 2613 disp->shutdown_out = 0; 2614 disp->connected = 0; 2615 disp->tcpmsg_valid = 0; 2616 disp->shutdown_why = ISC_R_UNEXPECTED; 2617 disp->requests = 0; 2618 disp->tcpbuffers = 0; 2619 disp->qid = NULL; 2620 ISC_LIST_INIT(disp->activesockets); 2621 ISC_LIST_INIT(disp->inactivesockets); 2622 disp->nsockets = 0; 2623 dispatch_initrandom(&disp->arc4ctx, mgr->entropy, NULL); 2624 disp->port_table = NULL; 2625 disp->portpool = NULL; 2626 2627 result = isc_mutex_init(&disp->lock); 2628 if (result != ISC_R_SUCCESS) 2629 goto deallocate; 2630 2631 disp->failsafe_ev = allocate_devent(disp); 2632 if (disp->failsafe_ev == NULL) { 2633 result = ISC_R_NOMEMORY; 2634 goto kill_lock; 2635 } 2636 2637 disp->magic = DISPATCH_MAGIC; 2638 2639 *dispp = disp; 2640 return (ISC_R_SUCCESS); 2641 2642 /* 2643 * error returns 2644 */ 2645 kill_lock: 2646 DESTROYLOCK(&disp->lock); 2647 deallocate: 2648 isc_mempool_put(mgr->dpool, disp); 2649 2650 return (result); 2651} 2652 2653 2654/* 2655 * MUST be unlocked, and not used by anything. 2656 */ 2657static void 2658dispatch_free(dns_dispatch_t **dispp) { 2659 dns_dispatch_t *disp; 2660 dns_dispatchmgr_t *mgr; 2661 int i; 2662 2663 REQUIRE(VALID_DISPATCH(*dispp)); 2664 disp = *dispp; 2665 *dispp = NULL; 2666 2667 mgr = disp->mgr; 2668 REQUIRE(VALID_DISPATCHMGR(mgr)); 2669 2670 if (disp->tcpmsg_valid) { 2671 dns_tcpmsg_invalidate(&disp->tcpmsg); 2672 disp->tcpmsg_valid = 0; 2673 } 2674 2675 INSIST(disp->tcpbuffers == 0); 2676 INSIST(disp->requests == 0); 2677 INSIST(disp->recv_pending == 0); 2678 INSIST(ISC_LIST_EMPTY(disp->activesockets)); 2679 INSIST(ISC_LIST_EMPTY(disp->inactivesockets)); 2680 2681 isc_mempool_put(mgr->depool, disp->failsafe_ev); 2682 disp->failsafe_ev = NULL; 2683 2684 if (disp->qid != NULL) 2685 qid_destroy(mgr->mctx, &disp->qid); 2686 2687 if (disp->port_table != NULL) { 2688 for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++) 2689 INSIST(ISC_LIST_EMPTY(disp->port_table[i])); 2690 isc_mem_put(mgr->mctx, disp->port_table, 2691 sizeof(disp->port_table[0]) * 2692 DNS_DISPATCH_PORTTABLESIZE); 2693 } 2694 2695 if (disp->portpool != NULL) 2696 isc_mempool_destroy(&disp->portpool); 2697 2698 disp->mgr = NULL; 2699 DESTROYLOCK(&disp->lock); 2700 disp->magic = 0; 2701 isc_mempool_put(mgr->dpool, disp); 2702} 2703 2704isc_result_t 2705dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock, 2706 isc_taskmgr_t *taskmgr, unsigned int buffersize, 2707 unsigned int maxbuffers, unsigned int maxrequests, 2708 unsigned int buckets, unsigned int increment, 2709 unsigned int attributes, dns_dispatch_t **dispp) 2710{ 2711 isc_result_t result; 2712 dns_dispatch_t *disp; 2713 2714 UNUSED(maxbuffers); 2715 UNUSED(buffersize); 2716 2717 REQUIRE(VALID_DISPATCHMGR(mgr)); 2718 REQUIRE(isc_socket_gettype(sock) == isc_sockettype_tcp); 2719 REQUIRE((attributes & DNS_DISPATCHATTR_TCP) != 0); 2720 REQUIRE((attributes & DNS_DISPATCHATTR_UDP) == 0); 2721 2722 attributes |= DNS_DISPATCHATTR_PRIVATE; /* XXXMLG */ 2723 2724 LOCK(&mgr->lock); 2725 2726 /* 2727 * dispatch_allocate() checks mgr for us. 2728 * qid_allocate() checks buckets and increment for us. 2729 */ 2730 disp = NULL; 2731 result = dispatch_allocate(mgr, maxrequests, &disp); 2732 if (result != ISC_R_SUCCESS) { 2733 UNLOCK(&mgr->lock); 2734 return (result); 2735 } 2736 2737 result = qid_allocate(mgr, buckets, increment, &disp->qid, ISC_FALSE); 2738 if (result != ISC_R_SUCCESS) 2739 goto deallocate_dispatch; 2740 2741 disp->socktype = isc_sockettype_tcp; 2742 disp->socket = NULL; 2743 isc_socket_attach(sock, &disp->socket); 2744 2745 disp->sepool = NULL; 2746 2747 disp->ntasks = 1; 2748 disp->task[0] = NULL; 2749 result = isc_task_create(taskmgr, 0, &disp->task[0]); 2750 if (result != ISC_R_SUCCESS) 2751 goto kill_socket; 2752 2753 disp->ctlevent = isc_event_allocate(mgr->mctx, disp, 2754 DNS_EVENT_DISPATCHCONTROL, 2755 destroy_disp, disp, 2756 sizeof(isc_event_t)); 2757 if (disp->ctlevent == NULL) { 2758 result = ISC_R_NOMEMORY; 2759 goto kill_task; 2760 } 2761 2762 isc_task_setname(disp->task[0], "tcpdispatch", disp); 2763 2764 dns_tcpmsg_init(mgr->mctx, disp->socket, &disp->tcpmsg); 2765 disp->tcpmsg_valid = 1; 2766 2767 disp->attributes = attributes; 2768 2769 /* 2770 * Append it to the dispatcher list. 2771 */ 2772 ISC_LIST_APPEND(mgr->list, disp, link); 2773 UNLOCK(&mgr->lock); 2774 2775 mgr_log(mgr, LVL(90), "created TCP dispatcher %p", disp); 2776 dispatch_log(disp, LVL(90), "created task %p", disp->task[0]); 2777 2778 *dispp = disp; 2779 2780 return (ISC_R_SUCCESS); 2781 2782 /* 2783 * Error returns. 2784 */ 2785 kill_task: 2786 isc_task_detach(&disp->task[0]); 2787 kill_socket: 2788 isc_socket_detach(&disp->socket); 2789 deallocate_dispatch: 2790 dispatch_free(&disp); 2791 2792 UNLOCK(&mgr->lock); 2793 2794 return (result); 2795} 2796 2797isc_result_t 2798dns_dispatch_getudp_dup(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, 2799 isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr, 2800 unsigned int buffersize, 2801 unsigned int maxbuffers, unsigned int maxrequests, 2802 unsigned int buckets, unsigned int increment, 2803 unsigned int attributes, unsigned int mask, 2804 dns_dispatch_t **dispp, dns_dispatch_t *dup_dispatch) 2805{ 2806 isc_result_t result; 2807 dns_dispatch_t *disp = NULL; 2808 2809 REQUIRE(VALID_DISPATCHMGR(mgr)); 2810 REQUIRE(sockmgr != NULL); 2811 REQUIRE(localaddr != NULL); 2812 REQUIRE(taskmgr != NULL); 2813 REQUIRE(buffersize >= 512 && buffersize < (64 * 1024)); 2814 REQUIRE(maxbuffers > 0); 2815 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */ 2816 REQUIRE(increment > buckets); 2817 REQUIRE(dispp != NULL && *dispp == NULL); 2818 REQUIRE((attributes & DNS_DISPATCHATTR_TCP) == 0); 2819 2820 result = dns_dispatchmgr_setudp(mgr, buffersize, maxbuffers, 2821 maxrequests, buckets, increment); 2822 if (result != ISC_R_SUCCESS) 2823 return (result); 2824 2825 LOCK(&mgr->lock); 2826 2827 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) { 2828 REQUIRE(isc_sockaddr_getport(localaddr) == 0); 2829 goto createudp; 2830 } 2831 2832 /* 2833 * See if we have a dispatcher that matches. 2834 */ 2835 if (dup_dispatch == NULL) { 2836 result = dispatch_find(mgr, localaddr, attributes, mask, &disp); 2837 if (result == ISC_R_SUCCESS) { 2838 disp->refcount++; 2839 2840 if (disp->maxrequests < maxrequests) 2841 disp->maxrequests = maxrequests; 2842 2843 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0 2844 && (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) 2845 { 2846 disp->attributes |= DNS_DISPATCHATTR_NOLISTEN; 2847 if (disp->recv_pending != 0) 2848 isc_socket_cancel(disp->socket, 2849 disp->task[0], 2850 ISC_SOCKCANCEL_RECV); 2851 } 2852 2853 UNLOCK(&disp->lock); 2854 UNLOCK(&mgr->lock); 2855 2856 *dispp = disp; 2857 2858 return (ISC_R_SUCCESS); 2859 } 2860 } 2861 2862 createudp: 2863 /* 2864 * Nope, create one. 2865 */ 2866 result = dispatch_createudp(mgr, sockmgr, taskmgr, localaddr, 2867 maxrequests, attributes, &disp, 2868 dup_dispatch == NULL 2869 ? NULL 2870 : dup_dispatch->socket); 2871 2872 if (result != ISC_R_SUCCESS) { 2873 UNLOCK(&mgr->lock); 2874 return (result); 2875 } 2876 2877 UNLOCK(&mgr->lock); 2878 *dispp = disp; 2879 2880 return (ISC_R_SUCCESS); 2881} 2882 2883isc_result_t 2884dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, 2885 isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr, 2886 unsigned int buffersize, 2887 unsigned int maxbuffers, unsigned int maxrequests, 2888 unsigned int buckets, unsigned int increment, 2889 unsigned int attributes, unsigned int mask, 2890 dns_dispatch_t **dispp) 2891{ 2892 return (dns_dispatch_getudp_dup(mgr, sockmgr, taskmgr, localaddr, 2893 buffersize, maxbuffers, maxrequests, 2894 buckets, increment, attributes, 2895 mask, dispp, NULL)); 2896} 2897 2898/* 2899 * mgr should be locked. 2900 */ 2901 2902#ifndef DNS_DISPATCH_HELD 2903#define DNS_DISPATCH_HELD 20U 2904#endif 2905 2906static isc_result_t 2907get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp, 2908 isc_socketmgr_t *sockmgr, isc_sockaddr_t *localaddr, 2909 isc_socket_t **sockp, isc_socket_t *dup_socket) 2910{ 2911 unsigned int i, j; 2912 isc_socket_t *held[DNS_DISPATCH_HELD]; 2913 isc_sockaddr_t localaddr_bound; 2914 isc_socket_t *sock = NULL; 2915 isc_result_t result = ISC_R_SUCCESS; 2916 isc_boolean_t anyport; 2917 2918 INSIST(sockp != NULL && *sockp == NULL); 2919 2920 localaddr_bound = *localaddr; 2921 anyport = ISC_TF(isc_sockaddr_getport(localaddr) == 0); 2922 2923 if (anyport) { 2924 unsigned int nports; 2925 in_port_t *ports; 2926 2927 /* 2928 * If no port is specified, we first try to pick up a random 2929 * port by ourselves. 2930 */ 2931 if (isc_sockaddr_pf(localaddr) == AF_INET) { 2932 nports = disp->mgr->nv4ports; 2933 ports = disp->mgr->v4ports; 2934 } else { 2935 nports = disp->mgr->nv6ports; 2936 ports = disp->mgr->v6ports; 2937 } 2938 if (nports == 0) 2939 return (ISC_R_ADDRNOTAVAIL); 2940 2941 for (i = 0; i < 1024; i++) { 2942 in_port_t prt; 2943 2944 prt = ports[dispatch_uniformrandom( 2945 DISP_ARC4CTX(disp), 2946 nports)]; 2947 isc_sockaddr_setport(&localaddr_bound, prt); 2948 result = open_socket(sockmgr, &localaddr_bound, 2949 0, &sock, NULL); 2950 /* 2951 * Continue if the port choosen is already in use 2952 * or the OS has reserved it. 2953 */ 2954 if (result == ISC_R_NOPERM || 2955 result == ISC_R_ADDRINUSE) 2956 continue; 2957 disp->localport = prt; 2958 *sockp = sock; 2959 return (result); 2960 } 2961 2962 /* 2963 * If this fails 1024 times, we then ask the kernel for 2964 * choosing one. 2965 */ 2966 } else { 2967 /* Allow to reuse address for non-random ports. */ 2968 result = open_socket(sockmgr, localaddr, 2969 ISC_SOCKET_REUSEADDRESS, &sock, 2970 dup_socket); 2971 2972 if (result == ISC_R_SUCCESS) 2973 *sockp = sock; 2974 2975 return (result); 2976 } 2977 2978 memset(held, 0, sizeof(held)); 2979 i = 0; 2980 2981 for (j = 0; j < 0xffffU; j++) { 2982 result = open_socket(sockmgr, localaddr, 0, &sock, NULL); 2983 if (result != ISC_R_SUCCESS) 2984 goto end; 2985 else if (portavailable(mgr, sock, NULL)) 2986 break; 2987 if (held[i] != NULL) 2988 isc_socket_detach(&held[i]); 2989 held[i++] = sock; 2990 sock = NULL; 2991 if (i == DNS_DISPATCH_HELD) 2992 i = 0; 2993 } 2994 if (j == 0xffffU) { 2995 mgr_log(mgr, ISC_LOG_ERROR, 2996 "avoid-v%s-udp-ports: unable to allocate " 2997 "an available port", 2998 isc_sockaddr_pf(localaddr) == AF_INET ? "4" : "6"); 2999 result = ISC_R_FAILURE; 3000 goto end; 3001 } 3002 *sockp = sock; 3003 3004end: 3005 for (i = 0; i < DNS_DISPATCH_HELD; i++) { 3006 if (held[i] != NULL) 3007 isc_socket_detach(&held[i]); 3008 } 3009 3010 return (result); 3011} 3012 3013static isc_result_t 3014dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, 3015 isc_taskmgr_t *taskmgr, 3016 isc_sockaddr_t *localaddr, 3017 unsigned int maxrequests, 3018 unsigned int attributes, 3019 dns_dispatch_t **dispp, 3020 isc_socket_t *dup_socket) 3021{ 3022 isc_result_t result; 3023 dns_dispatch_t *disp; 3024 isc_socket_t *sock = NULL; 3025 int i = 0; 3026 3027 /* 3028 * dispatch_allocate() checks mgr for us. 3029 */ 3030 disp = NULL; 3031 result = dispatch_allocate(mgr, maxrequests, &disp); 3032 if (result != ISC_R_SUCCESS) 3033 return (result); 3034 3035 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0) { 3036 result = get_udpsocket(mgr, disp, sockmgr, localaddr, &sock, 3037 dup_socket); 3038 if (result != ISC_R_SUCCESS) 3039 goto deallocate_dispatch; 3040 3041 if (isc_log_wouldlog(dns_lctx, 90)) { 3042 char addrbuf[ISC_SOCKADDR_FORMATSIZE]; 3043 3044 isc_sockaddr_format(localaddr, addrbuf, 3045 ISC_SOCKADDR_FORMATSIZE); 3046 mgr_log(mgr, LVL(90), "dns_dispatch_createudp: Created" 3047 " UDP dispatch for %s with socket fd %d\n", 3048 addrbuf, isc_socket_getfd(sock)); 3049 } 3050 3051 } else { 3052 isc_sockaddr_t sa_any; 3053 3054 /* 3055 * For dispatches using exclusive sockets with a specific 3056 * source address, we only check if the specified address is 3057 * available on the system. Query sockets will be created later 3058 * on demand. 3059 */ 3060 isc_sockaddr_anyofpf(&sa_any, isc_sockaddr_pf(localaddr)); 3061 if (!isc_sockaddr_eqaddr(&sa_any, localaddr)) { 3062 result = open_socket(sockmgr, localaddr, 0, &sock, NULL); 3063 if (sock != NULL) 3064 isc_socket_detach(&sock); 3065 if (result != ISC_R_SUCCESS) 3066 goto deallocate_dispatch; 3067 } 3068 3069 disp->port_table = isc_mem_get(mgr->mctx, 3070 sizeof(disp->port_table[0]) * 3071 DNS_DISPATCH_PORTTABLESIZE); 3072 if (disp->port_table == NULL) 3073 goto deallocate_dispatch; 3074 for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++) 3075 ISC_LIST_INIT(disp->port_table[i]); 3076 3077 result = isc_mempool_create(mgr->mctx, sizeof(dispportentry_t), 3078 &disp->portpool); 3079 if (result != ISC_R_SUCCESS) 3080 goto deallocate_dispatch; 3081 isc_mempool_setname(disp->portpool, "disp_portpool"); 3082 isc_mempool_setfreemax(disp->portpool, 128); 3083 } 3084 disp->socktype = isc_sockettype_udp; 3085 disp->socket = sock; 3086 disp->local = *localaddr; 3087 3088 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) 3089 disp->ntasks = MAX_INTERNAL_TASKS; 3090 else 3091 disp->ntasks = 1; 3092 for (i = 0; i < disp->ntasks; i++) { 3093 disp->task[i] = NULL; 3094 result = isc_task_create(taskmgr, 0, &disp->task[i]); 3095 if (result != ISC_R_SUCCESS) { 3096 while (--i >= 0) { 3097 isc_task_shutdown(disp->task[i]); 3098 isc_task_detach(&disp->task[i]); 3099 } 3100 goto kill_socket; 3101 } 3102 isc_task_setname(disp->task[i], "udpdispatch", disp); 3103 } 3104 3105 disp->ctlevent = isc_event_allocate(mgr->mctx, disp, 3106 DNS_EVENT_DISPATCHCONTROL, 3107 destroy_disp, disp, 3108 sizeof(isc_event_t)); 3109 if (disp->ctlevent == NULL) { 3110 result = ISC_R_NOMEMORY; 3111 goto kill_task; 3112 } 3113 3114 disp->sepool = NULL; 3115 if (isc_mempool_create(mgr->mctx, sizeof(isc_socketevent_t), 3116 &disp->sepool) != ISC_R_SUCCESS) 3117 { 3118 result = ISC_R_NOMEMORY; 3119 goto kill_ctlevent; 3120 } 3121 3122 result = isc_mutex_init(&disp->sepool_lock); 3123 if (result != ISC_R_SUCCESS) 3124 goto kill_sepool; 3125 3126 isc_mempool_setname(disp->sepool, "disp_sepool"); 3127 isc_mempool_setmaxalloc(disp->sepool, 32768); 3128 isc_mempool_setfreemax(disp->sepool, 32768); 3129 isc_mempool_associatelock(disp->sepool, &disp->sepool_lock); 3130 isc_mempool_setfillcount(disp->sepool, 16); 3131 3132 attributes &= ~DNS_DISPATCHATTR_TCP; 3133 attributes |= DNS_DISPATCHATTR_UDP; 3134 disp->attributes = attributes; 3135 3136 /* 3137 * Append it to the dispatcher list. 3138 */ 3139 ISC_LIST_APPEND(mgr->list, disp, link); 3140 3141 mgr_log(mgr, LVL(90), "created UDP dispatcher %p", disp); 3142 dispatch_log(disp, LVL(90), "created task %p", disp->task[0]); /* XXX */ 3143 if (disp->socket != NULL) 3144 dispatch_log(disp, LVL(90), "created socket %p", disp->socket); 3145 3146 *dispp = disp; 3147 3148 return (result); 3149 3150 /* 3151 * Error returns. 3152 */ 3153 kill_sepool: 3154 isc_mempool_destroy(&disp->sepool); 3155 kill_ctlevent: 3156 isc_event_free(&disp->ctlevent); 3157 kill_task: 3158 for (i = 0; i < disp->ntasks; i++) 3159 isc_task_detach(&disp->task[i]); 3160 kill_socket: 3161 if (disp->socket != NULL) 3162 isc_socket_detach(&disp->socket); 3163 deallocate_dispatch: 3164 dispatch_free(&disp); 3165 3166 return (result); 3167} 3168 3169void 3170dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp) { 3171 REQUIRE(VALID_DISPATCH(disp)); 3172 REQUIRE(dispp != NULL && *dispp == NULL); 3173 3174 LOCK(&disp->lock); 3175 disp->refcount++; 3176 UNLOCK(&disp->lock); 3177 3178 *dispp = disp; 3179} 3180 3181/* 3182 * It is important to lock the manager while we are deleting the dispatch, 3183 * since dns_dispatch_getudp will call dispatch_find, which returns to 3184 * the caller a dispatch but does not attach to it until later. _getudp 3185 * locks the manager, however, so locking it here will keep us from attaching 3186 * to a dispatcher that is in the process of going away. 3187 */ 3188void 3189dns_dispatch_detach(dns_dispatch_t **dispp) { 3190 dns_dispatch_t *disp; 3191 dispsocket_t *dispsock; 3192 isc_boolean_t killit; 3193 3194 REQUIRE(dispp != NULL && VALID_DISPATCH(*dispp)); 3195 3196 disp = *dispp; 3197 *dispp = NULL; 3198 3199 LOCK(&disp->lock); 3200 3201 INSIST(disp->refcount > 0); 3202 disp->refcount--; 3203 if (disp->refcount == 0) { 3204 if (disp->recv_pending > 0) 3205 isc_socket_cancel(disp->socket, disp->task[0], 3206 ISC_SOCKCANCEL_RECV); 3207 for (dispsock = ISC_LIST_HEAD(disp->activesockets); 3208 dispsock != NULL; 3209 dispsock = ISC_LIST_NEXT(dispsock, link)) { 3210 isc_socket_cancel(dispsock->socket, dispsock->task, 3211 ISC_SOCKCANCEL_RECV); 3212 } 3213 disp->shutting_down = 1; 3214 } 3215 3216 dispatch_log(disp, LVL(90), "detach: refcount %d", disp->refcount); 3217 3218 killit = destroy_disp_ok(disp); 3219 UNLOCK(&disp->lock); 3220 if (killit) 3221 isc_task_send(disp->task[0], &disp->ctlevent); 3222} 3223 3224isc_result_t 3225dns_dispatch_addresponse2(dns_dispatch_t *disp, isc_sockaddr_t *dest, 3226 isc_task_t *task, isc_taskaction_t action, void *arg, 3227 dns_messageid_t *idp, dns_dispentry_t **resp, 3228 isc_socketmgr_t *sockmgr) 3229{ 3230 dns_dispentry_t *res; 3231 unsigned int bucket; 3232 in_port_t localport = 0; 3233 dns_messageid_t id; 3234 int i; 3235 isc_boolean_t ok; 3236 dns_qid_t *qid; 3237 dispsocket_t *dispsocket = NULL; 3238 isc_result_t result; 3239 3240 REQUIRE(VALID_DISPATCH(disp)); 3241 REQUIRE(task != NULL); 3242 REQUIRE(dest != NULL); 3243 REQUIRE(resp != NULL && *resp == NULL); 3244 REQUIRE(idp != NULL); 3245 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) 3246 REQUIRE(sockmgr != NULL); 3247 3248 LOCK(&disp->lock); 3249 3250 if (disp->shutting_down == 1) { 3251 UNLOCK(&disp->lock); 3252 return (ISC_R_SHUTTINGDOWN); 3253 } 3254 3255 if (disp->requests >= disp->maxrequests) { 3256 UNLOCK(&disp->lock); 3257 return (ISC_R_QUOTA); 3258 } 3259 3260 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 && 3261 disp->nsockets > DNS_DISPATCH_SOCKSQUOTA) { 3262 dispsocket_t *oldestsocket; 3263 dns_dispentry_t *oldestresp; 3264 dns_dispatchevent_t *rev; 3265 3266 /* 3267 * Kill oldest outstanding query if the number of sockets 3268 * exceeds the quota to keep the room for new queries. 3269 */ 3270 oldestsocket = ISC_LIST_HEAD(disp->activesockets); 3271 oldestresp = oldestsocket->resp; 3272 if (oldestresp != NULL && !oldestresp->item_out) { 3273 rev = allocate_devent(oldestresp->disp); 3274 if (rev != NULL) { 3275 rev->buffer.base = NULL; 3276 rev->result = ISC_R_CANCELED; 3277 rev->id = oldestresp->id; 3278 ISC_EVENT_INIT(rev, sizeof(*rev), 0, 3279 NULL, DNS_EVENT_DISPATCH, 3280 oldestresp->action, 3281 oldestresp->arg, oldestresp, 3282 NULL, NULL); 3283 oldestresp->item_out = ISC_TRUE; 3284 isc_task_send(oldestresp->task, 3285 ISC_EVENT_PTR(&rev)); 3286 inc_stats(disp->mgr, 3287 dns_resstatscounter_dispabort); 3288 } 3289 } 3290 3291 /* 3292 * Move this entry to the tail so that it won't (easily) be 3293 * examined before actually being canceled. 3294 */ 3295 ISC_LIST_UNLINK(disp->activesockets, oldestsocket, link); 3296 ISC_LIST_APPEND(disp->activesockets, oldestsocket, link); 3297 } 3298 3299 qid = DNS_QID(disp); 3300 3301 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) { 3302 /* 3303 * Get a separate UDP socket with a random port number. 3304 */ 3305 result = get_dispsocket(disp, dest, sockmgr, &dispsocket, 3306 &localport); 3307 if (result != ISC_R_SUCCESS) { 3308 UNLOCK(&disp->lock); 3309 inc_stats(disp->mgr, dns_resstatscounter_dispsockfail); 3310 return (result); 3311 } 3312 } else { 3313 localport = disp->localport; 3314 } 3315 3316 /* 3317 * Try somewhat hard to find an unique ID. 3318 */ 3319 LOCK(&qid->lock); 3320 id = (dns_messageid_t)dispatch_random(DISP_ARC4CTX(disp)); 3321 ok = ISC_FALSE; 3322 i = 0; 3323 do { 3324 bucket = dns_hash(qid, dest, id, localport); 3325 if (entry_search(qid, dest, id, localport, bucket) == NULL) { 3326 ok = ISC_TRUE; 3327 break; 3328 } 3329 id += qid->qid_increment; 3330 id &= 0x0000ffff; 3331 } while (i++ < 64); 3332 UNLOCK(&qid->lock); 3333 3334 if (!ok) { 3335 UNLOCK(&disp->lock); 3336 return (ISC_R_NOMORE); 3337 } 3338 3339 res = isc_mempool_get(disp->mgr->rpool); 3340 if (res == NULL) { 3341 if (dispsocket != NULL) 3342 destroy_dispsocket(disp, &dispsocket); 3343 UNLOCK(&disp->lock); 3344 return (ISC_R_NOMEMORY); 3345 } 3346 3347 disp->refcount++; 3348 disp->requests++; 3349 res->task = NULL; 3350 isc_task_attach(task, &res->task); 3351 res->disp = disp; 3352 res->id = id; 3353 res->port = localport; 3354 res->bucket = bucket; 3355 res->host = *dest; 3356 res->action = action; 3357 res->arg = arg; 3358 res->dispsocket = dispsocket; 3359 if (dispsocket != NULL) 3360 dispsocket->resp = res; 3361 res->item_out = ISC_FALSE; 3362 ISC_LIST_INIT(res->items); 3363 ISC_LINK_INIT(res, link); 3364 res->magic = RESPONSE_MAGIC; 3365 3366 LOCK(&qid->lock); 3367 ISC_LIST_APPEND(qid->qid_table[bucket], res, link); 3368 UNLOCK(&qid->lock); 3369 3370 request_log(disp, res, LVL(90), 3371 "attached to task %p", res->task); 3372 3373 if (((disp->attributes & DNS_DISPATCHATTR_UDP) != 0) || 3374 ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) != 0)) { 3375 result = startrecv(disp, dispsocket); 3376 if (result != ISC_R_SUCCESS) { 3377 LOCK(&qid->lock); 3378 ISC_LIST_UNLINK(qid->qid_table[bucket], res, link); 3379 UNLOCK(&qid->lock); 3380 3381 if (dispsocket != NULL) 3382 destroy_dispsocket(disp, &dispsocket); 3383 3384 disp->refcount--; 3385 disp->requests--; 3386 3387 UNLOCK(&disp->lock); 3388 isc_task_detach(&res->task); 3389 isc_mempool_put(disp->mgr->rpool, res); 3390 return (result); 3391 } 3392 } 3393 3394 if (dispsocket != NULL) 3395 ISC_LIST_APPEND(disp->activesockets, dispsocket, link); 3396 3397 UNLOCK(&disp->lock); 3398 3399 *idp = id; 3400 *resp = res; 3401 3402 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) 3403 INSIST(res->dispsocket != NULL); 3404 3405 return (ISC_R_SUCCESS); 3406} 3407 3408isc_result_t 3409dns_dispatch_addresponse(dns_dispatch_t *disp, isc_sockaddr_t *dest, 3410 isc_task_t *task, isc_taskaction_t action, void *arg, 3411 dns_messageid_t *idp, dns_dispentry_t **resp) 3412{ 3413 REQUIRE(VALID_DISPATCH(disp)); 3414 REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0); 3415 3416 return (dns_dispatch_addresponse2(disp, dest, task, action, arg, 3417 idp, resp, NULL)); 3418} 3419 3420void 3421dns_dispatch_starttcp(dns_dispatch_t *disp) { 3422 3423 REQUIRE(VALID_DISPATCH(disp)); 3424 3425 dispatch_log(disp, LVL(90), "starttcp %p", disp->task[0]); 3426 3427 LOCK(&disp->lock); 3428 disp->attributes |= DNS_DISPATCHATTR_CONNECTED; 3429 (void)startrecv(disp, NULL); 3430 UNLOCK(&disp->lock); 3431} 3432 3433void 3434dns_dispatch_removeresponse(dns_dispentry_t **resp, 3435 dns_dispatchevent_t **sockevent) 3436{ 3437 dns_dispatchmgr_t *mgr; 3438 dns_dispatch_t *disp; 3439 dns_dispentry_t *res; 3440 dispsocket_t *dispsock; 3441 dns_dispatchevent_t *ev; 3442 unsigned int bucket; 3443 isc_boolean_t killit; 3444 unsigned int n; 3445 isc_eventlist_t events; 3446 dns_qid_t *qid; 3447 3448 REQUIRE(resp != NULL); 3449 REQUIRE(VALID_RESPONSE(*resp)); 3450 3451 res = *resp; 3452 *resp = NULL; 3453 3454 disp = res->disp; 3455 REQUIRE(VALID_DISPATCH(disp)); 3456 mgr = disp->mgr; 3457 REQUIRE(VALID_DISPATCHMGR(mgr)); 3458 3459 qid = DNS_QID(disp); 3460 3461 if (sockevent != NULL) { 3462 REQUIRE(*sockevent != NULL); 3463 ev = *sockevent; 3464 *sockevent = NULL; 3465 } else { 3466 ev = NULL; 3467 } 3468 3469 LOCK(&disp->lock); 3470 3471 INSIST(disp->requests > 0); 3472 disp->requests--; 3473 INSIST(disp->refcount > 0); 3474 disp->refcount--; 3475 if (disp->refcount == 0) { 3476 if (disp->recv_pending > 0) 3477 isc_socket_cancel(disp->socket, disp->task[0], 3478 ISC_SOCKCANCEL_RECV); 3479 for (dispsock = ISC_LIST_HEAD(disp->activesockets); 3480 dispsock != NULL; 3481 dispsock = ISC_LIST_NEXT(dispsock, link)) { 3482 isc_socket_cancel(dispsock->socket, dispsock->task, 3483 ISC_SOCKCANCEL_RECV); 3484 } 3485 disp->shutting_down = 1; 3486 } 3487 3488 bucket = res->bucket; 3489 3490 LOCK(&qid->lock); 3491 ISC_LIST_UNLINK(qid->qid_table[bucket], res, link); 3492 UNLOCK(&qid->lock); 3493 3494 if (ev == NULL && res->item_out) { 3495 /* 3496 * We've posted our event, but the caller hasn't gotten it 3497 * yet. Take it back. 3498 */ 3499 ISC_LIST_INIT(events); 3500 n = isc_task_unsend(res->task, res, DNS_EVENT_DISPATCH, 3501 NULL, &events); 3502 /* 3503 * We had better have gotten it back. 3504 */ 3505 INSIST(n == 1); 3506 ev = (dns_dispatchevent_t *)ISC_LIST_HEAD(events); 3507 } 3508 3509 if (ev != NULL) { 3510 REQUIRE(res->item_out == ISC_TRUE); 3511 res->item_out = ISC_FALSE; 3512 if (ev->buffer.base != NULL) 3513 free_buffer(disp, ev->buffer.base, ev->buffer.length); 3514 free_devent(disp, ev); 3515 } 3516 3517 request_log(disp, res, LVL(90), "detaching from task %p", res->task); 3518 isc_task_detach(&res->task); 3519 3520 if (res->dispsocket != NULL) { 3521 isc_socket_cancel(res->dispsocket->socket, 3522 res->dispsocket->task, ISC_SOCKCANCEL_RECV); 3523 res->dispsocket->resp = NULL; 3524 } 3525 3526 /* 3527 * Free any buffered requests as well 3528 */ 3529 ev = ISC_LIST_HEAD(res->items); 3530 while (ev != NULL) { 3531 ISC_LIST_UNLINK(res->items, ev, ev_link); 3532 if (ev->buffer.base != NULL) 3533 free_buffer(disp, ev->buffer.base, ev->buffer.length); 3534 free_devent(disp, ev); 3535 ev = ISC_LIST_HEAD(res->items); 3536 } 3537 res->magic = 0; 3538 isc_mempool_put(disp->mgr->rpool, res); 3539 if (disp->shutting_down == 1) 3540 do_cancel(disp); 3541 else 3542 (void)startrecv(disp, NULL); 3543 3544 killit = destroy_disp_ok(disp); 3545 UNLOCK(&disp->lock); 3546 if (killit) 3547 isc_task_send(disp->task[0], &disp->ctlevent); 3548} 3549 3550static void 3551do_cancel(dns_dispatch_t *disp) { 3552 dns_dispatchevent_t *ev; 3553 dns_dispentry_t *resp; 3554 dns_qid_t *qid; 3555 3556 if (disp->shutdown_out == 1) 3557 return; 3558 3559 qid = DNS_QID(disp); 3560 3561 /* 3562 * Search for the first response handler without packets outstanding 3563 * unless a specific hander is given. 3564 */ 3565 LOCK(&qid->lock); 3566 for (resp = linear_first(qid); 3567 resp != NULL && resp->item_out; 3568 /* Empty. */) 3569 resp = linear_next(qid, resp); 3570 3571 /* 3572 * No one to send the cancel event to, so nothing to do. 3573 */ 3574 if (resp == NULL) 3575 goto unlock; 3576 3577 /* 3578 * Send the shutdown failsafe event to this resp. 3579 */ 3580 ev = disp->failsafe_ev; 3581 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH, 3582 resp->action, resp->arg, resp, NULL, NULL); 3583 ev->result = disp->shutdown_why; 3584 ev->buffer.base = NULL; 3585 ev->buffer.length = 0; 3586 disp->shutdown_out = 1; 3587 request_log(disp, resp, LVL(10), 3588 "cancel: failsafe event %p -> task %p", 3589 ev, resp->task); 3590 resp->item_out = ISC_TRUE; 3591 isc_task_send(resp->task, ISC_EVENT_PTR(&ev)); 3592 unlock: 3593 UNLOCK(&qid->lock); 3594} 3595 3596isc_socket_t * 3597dns_dispatch_getsocket(dns_dispatch_t *disp) { 3598 REQUIRE(VALID_DISPATCH(disp)); 3599 3600 return (disp->socket); 3601} 3602 3603isc_socket_t * 3604dns_dispatch_getentrysocket(dns_dispentry_t *resp) { 3605 REQUIRE(VALID_RESPONSE(resp)); 3606 3607 if (resp->dispsocket != NULL) 3608 return (resp->dispsocket->socket); 3609 else 3610 return (NULL); 3611} 3612 3613isc_result_t 3614dns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) { 3615 3616 REQUIRE(VALID_DISPATCH(disp)); 3617 REQUIRE(addrp != NULL); 3618 3619 if (disp->socktype == isc_sockettype_udp) { 3620 *addrp = disp->local; 3621 return (ISC_R_SUCCESS); 3622 } 3623 return (ISC_R_NOTIMPLEMENTED); 3624} 3625 3626void 3627dns_dispatch_cancel(dns_dispatch_t *disp) { 3628 REQUIRE(VALID_DISPATCH(disp)); 3629 3630 LOCK(&disp->lock); 3631 3632 if (disp->shutting_down == 1) { 3633 UNLOCK(&disp->lock); 3634 return; 3635 } 3636 3637 disp->shutdown_why = ISC_R_CANCELED; 3638 disp->shutting_down = 1; 3639 do_cancel(disp); 3640 3641 UNLOCK(&disp->lock); 3642 3643 return; 3644} 3645 3646unsigned int 3647dns_dispatch_getattributes(dns_dispatch_t *disp) { 3648 REQUIRE(VALID_DISPATCH(disp)); 3649 3650 /* 3651 * We don't bother locking disp here; it's the caller's responsibility 3652 * to use only non volatile flags. 3653 */ 3654 return (disp->attributes); 3655} 3656 3657void 3658dns_dispatch_changeattributes(dns_dispatch_t *disp, 3659 unsigned int attributes, unsigned int mask) 3660{ 3661 REQUIRE(VALID_DISPATCH(disp)); 3662 /* Exclusive attribute can only be set on creation */ 3663 REQUIRE((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0); 3664 /* Also, a dispatch with randomport specified cannot start listening */ 3665 REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0 || 3666 (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0); 3667 3668 /* XXXMLG 3669 * Should check for valid attributes here! 3670 */ 3671 3672 LOCK(&disp->lock); 3673 3674 if ((mask & DNS_DISPATCHATTR_NOLISTEN) != 0) { 3675 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0 && 3676 (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) { 3677 disp->attributes &= ~DNS_DISPATCHATTR_NOLISTEN; 3678 (void)startrecv(disp, NULL); 3679 } else if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) 3680 == 0 && 3681 (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) { 3682 disp->attributes |= DNS_DISPATCHATTR_NOLISTEN; 3683 if (disp->recv_pending != 0) 3684 isc_socket_cancel(disp->socket, disp->task[0], 3685 ISC_SOCKCANCEL_RECV); 3686 } 3687 } 3688 3689 disp->attributes &= ~mask; 3690 disp->attributes |= (attributes & mask); 3691 UNLOCK(&disp->lock); 3692} 3693 3694void 3695dns_dispatch_importrecv(dns_dispatch_t *disp, isc_event_t *event) { 3696 void *buf; 3697 isc_socketevent_t *sevent, *newsevent; 3698 3699 REQUIRE(VALID_DISPATCH(disp)); 3700 REQUIRE((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0); 3701 REQUIRE(event != NULL); 3702 3703 sevent = (isc_socketevent_t *)event; 3704 3705 INSIST(sevent->n <= disp->mgr->buffersize); 3706 newsevent = (isc_socketevent_t *) 3707 isc_event_allocate(disp->mgr->mctx, NULL, 3708 DNS_EVENT_IMPORTRECVDONE, udp_shrecv, 3709 disp, sizeof(isc_socketevent_t)); 3710 if (newsevent == NULL) 3711 return; 3712 3713 buf = allocate_udp_buffer(disp); 3714 if (buf == NULL) { 3715 isc_event_free(ISC_EVENT_PTR(&newsevent)); 3716 return; 3717 } 3718 memmove(buf, sevent->region.base, sevent->n); 3719 newsevent->region.base = buf; 3720 newsevent->region.length = disp->mgr->buffersize; 3721 newsevent->n = sevent->n; 3722 newsevent->result = sevent->result; 3723 newsevent->address = sevent->address; 3724 newsevent->timestamp = sevent->timestamp; 3725 newsevent->pktinfo = sevent->pktinfo; 3726 newsevent->attributes = sevent->attributes; 3727 3728 isc_task_send(disp->task[0], ISC_EVENT_PTR(&newsevent)); 3729} 3730 3731dns_dispatch_t * 3732dns_dispatchset_get(dns_dispatchset_t *dset) { 3733 dns_dispatch_t *disp; 3734 3735 /* check that dispatch set is configured */ 3736 if (dset == NULL || dset->ndisp == 0) 3737 return (NULL); 3738 3739 LOCK(&dset->lock); 3740 disp = dset->dispatches[dset->cur]; 3741 dset->cur++; 3742 if (dset->cur == dset->ndisp) 3743 dset->cur = 0; 3744 UNLOCK(&dset->lock); 3745 3746 return (disp); 3747} 3748 3749isc_result_t 3750dns_dispatchset_create(isc_mem_t *mctx, isc_socketmgr_t *sockmgr, 3751 isc_taskmgr_t *taskmgr, dns_dispatch_t *source, 3752 dns_dispatchset_t **dsetp, int n) 3753{ 3754 isc_result_t result; 3755 dns_dispatchset_t *dset; 3756 dns_dispatchmgr_t *mgr; 3757 int i, j; 3758 3759 REQUIRE(VALID_DISPATCH(source)); 3760 REQUIRE((source->attributes & DNS_DISPATCHATTR_UDP) != 0); 3761 REQUIRE(dsetp != NULL && *dsetp == NULL); 3762 3763 mgr = source->mgr; 3764 3765 dset = isc_mem_get(mctx, sizeof(dns_dispatchset_t)); 3766 if (dset == NULL) 3767 return (ISC_R_NOMEMORY); 3768 memset(dset, 0, sizeof(*dset)); 3769 3770 result = isc_mutex_init(&dset->lock); 3771 if (result != ISC_R_SUCCESS) 3772 goto fail_alloc; 3773 3774 dset->dispatches = isc_mem_get(mctx, sizeof(dns_dispatch_t *) * n); 3775 if (dset == NULL) { 3776 result = ISC_R_NOMEMORY; 3777 goto fail_lock; 3778 } 3779 3780 isc_mem_attach(mctx, &dset->mctx); 3781 dset->ndisp = n; 3782 dset->cur = 0; 3783 3784 dset->dispatches[0] = NULL; 3785 dns_dispatch_attach(source, &dset->dispatches[0]); 3786 3787 LOCK(&mgr->lock); 3788 for (i = 1; i < n; i++) { 3789 dset->dispatches[i] = NULL; 3790 result = dispatch_createudp(mgr, sockmgr, taskmgr, 3791 &source->local, 3792 source->maxrequests, 3793 source->attributes, 3794 &dset->dispatches[i], 3795 source->socket); 3796 if (result != ISC_R_SUCCESS) 3797 goto fail; 3798 } 3799 3800 UNLOCK(&mgr->lock); 3801 *dsetp = dset; 3802 3803 return (ISC_R_SUCCESS); 3804 3805 fail: 3806 UNLOCK(&mgr->lock); 3807 3808 for (j = 0; j < i; j++) 3809 dns_dispatch_detach(&(dset->dispatches[j])); 3810 isc_mem_put(mctx, dset->dispatches, sizeof(dns_dispatch_t *) * n); 3811 if (dset->mctx == mctx) 3812 isc_mem_detach(&dset->mctx); 3813 3814 fail_lock: 3815 DESTROYLOCK(&dset->lock); 3816 3817 fail_alloc: 3818 isc_mem_put(mctx, dset, sizeof(dns_dispatchset_t)); 3819 return (result); 3820} 3821 3822void 3823dns_dispatchset_cancelall(dns_dispatchset_t *dset, isc_task_t *task) { 3824 int i; 3825 3826 REQUIRE(dset != NULL); 3827 3828 for (i = 0; i < dset->ndisp; i++) { 3829 isc_socket_t *sock; 3830 sock = dns_dispatch_getsocket(dset->dispatches[i]); 3831 isc_socket_cancel(sock, task, ISC_SOCKCANCEL_ALL); 3832 } 3833} 3834 3835void 3836dns_dispatchset_destroy(dns_dispatchset_t **dsetp) { 3837 dns_dispatchset_t *dset; 3838 int i; 3839 3840 REQUIRE(dsetp != NULL && *dsetp != NULL); 3841 3842 dset = *dsetp; 3843 for (i = 0; i < dset->ndisp; i++) 3844 dns_dispatch_detach(&(dset->dispatches[i])); 3845 isc_mem_put(dset->mctx, dset->dispatches, 3846 sizeof(dns_dispatch_t *) * dset->ndisp); 3847 DESTROYLOCK(&dset->lock); 3848 isc_mem_putanddetach(&dset->mctx, dset, sizeof(dns_dispatchset_t)); 3849 3850 *dsetp = NULL; 3851} 3852 3853#if 0 3854void 3855dns_dispatchmgr_dump(dns_dispatchmgr_t *mgr) { 3856 dns_dispatch_t *disp; 3857 char foo[1024]; 3858 3859 disp = ISC_LIST_HEAD(mgr->list); 3860 while (disp != NULL) { 3861 isc_sockaddr_format(&disp->local, foo, sizeof(foo)); 3862 printf("\tdispatch %p, addr %s\n", disp, foo); 3863 disp = ISC_LIST_NEXT(disp, link); 3864 } 3865} 3866#endif 3867