dispatch.c revision 193149
1/*
2 * Copyright (C) 2004-2009  Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2003  Internet Software Consortium.
4 *
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
16 */
17
18/* $Id: dispatch.c,v 1.155.12.7 2009/04/28 21:39:45 jinmei Exp $ */
19
20/*! \file */
21
22#include <config.h>
23
24#include <stdlib.h>
25#include <sys/types.h>
26#include <unistd.h>
27#include <stdlib.h>
28
29#include <isc/entropy.h>
30#include <isc/mem.h>
31#include <isc/mutex.h>
32#include <isc/portset.h>
33#include <isc/print.h>
34#include <isc/random.h>
35#include <isc/stats.h>
36#include <isc/string.h>
37#include <isc/task.h>
38#include <isc/time.h>
39#include <isc/util.h>
40
41#include <dns/acl.h>
42#include <dns/dispatch.h>
43#include <dns/events.h>
44#include <dns/log.h>
45#include <dns/message.h>
46#include <dns/portlist.h>
47#include <dns/stats.h>
48#include <dns/tcpmsg.h>
49#include <dns/types.h>
50
51typedef ISC_LIST(dns_dispentry_t)	dns_displist_t;
52
53typedef struct dispsocket		dispsocket_t;
54typedef ISC_LIST(dispsocket_t)		dispsocketlist_t;
55
56typedef struct dispportentry		dispportentry_t;
57typedef ISC_LIST(dispportentry_t)	dispportlist_t;
58
59/* ARC4 Random generator state */
60typedef struct arc4ctx {
61	isc_uint8_t	i;
62	isc_uint8_t	j;
63	isc_uint8_t	s[256];
64	int		count;
65	isc_entropy_t	*entropy;	/*%< entropy source for ARC4 */
66	isc_mutex_t	*lock;
67} arc4ctx_t;
68
69typedef struct dns_qid {
70	unsigned int	magic;
71	unsigned int	qid_nbuckets;	/*%< hash table size */
72	unsigned int	qid_increment;	/*%< id increment on collision */
73	isc_mutex_t	lock;
74	dns_displist_t	*qid_table;	/*%< the table itself */
75	dispsocketlist_t *sock_table;	/*%< socket table */
76} dns_qid_t;
77
78struct dns_dispatchmgr {
79	/* Unlocked. */
80	unsigned int			magic;
81	isc_mem_t		       *mctx;
82	dns_acl_t		       *blackhole;
83	dns_portlist_t		       *portlist;
84	isc_stats_t		       *stats;
85	isc_entropy_t		       *entropy; /*%< entropy source */
86
87	/* Locked by "lock". */
88	isc_mutex_t			lock;
89	unsigned int			state;
90	ISC_LIST(dns_dispatch_t)	list;
91
92	/* Locked by arc4_lock. */
93	isc_mutex_t			arc4_lock;
94	arc4ctx_t			arc4ctx;    /*%< ARC4 context for QID */
95
96	/* locked by buffer lock */
97	dns_qid_t			*qid;
98	isc_mutex_t			buffer_lock;
99	unsigned int			buffers;    /*%< allocated buffers */
100	unsigned int			buffersize; /*%< size of each buffer */
101	unsigned int			maxbuffers; /*%< max buffers */
102
103	/* Locked internally. */
104	isc_mutex_t			pool_lock;
105	isc_mempool_t		       *epool;	/*%< memory pool for events */
106	isc_mempool_t		       *rpool;	/*%< memory pool for replies */
107	isc_mempool_t		       *dpool;  /*%< dispatch allocations */
108	isc_mempool_t		       *bpool;	/*%< memory pool for buffers */
109	isc_mempool_t		       *spool;	/*%< memory pool for dispsocs */
110
111	/*%
112	 * Locked by qid->lock if qid exists; otherwise, can be used without
113	 * being locked.
114	 * Memory footprint considerations: this is a simple implementation of
115	 * available ports, i.e., an ordered array of the actual port numbers.
116	 * This will require about 256KB of memory in the worst case (128KB for
117	 * each of IPv4 and IPv6).  We could reduce it by representing it as a
118	 * more sophisticated way such as a list (or array) of ranges that are
119	 * searched to identify a specific port.  Our decision here is the saved
120	 * memory isn't worth the implementation complexity, considering the
121	 * fact that the whole BIND9 process (which is mainly named) already
122	 * requires a pretty large memory footprint.  We may, however, have to
123	 * revisit the decision when we want to use it as a separate module for
124	 * an environment where memory requirement is severer.
125	 */
126	in_port_t	*v4ports;	/*%< available ports for IPv4 */
127	unsigned int	nv4ports;	/*%< # of available ports for IPv4 */
128	in_port_t	*v6ports;	/*%< available ports for IPv4 */
129	unsigned int	nv6ports;	/*%< # of available ports for IPv4 */
130};
131
132#define MGR_SHUTTINGDOWN		0x00000001U
133#define MGR_IS_SHUTTINGDOWN(l)	(((l)->state & MGR_SHUTTINGDOWN) != 0)
134
135#define IS_PRIVATE(d)	(((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0)
136
137struct dns_dispentry {
138	unsigned int			magic;
139	dns_dispatch_t		       *disp;
140	dns_messageid_t			id;
141	in_port_t			port;
142	unsigned int			bucket;
143	isc_sockaddr_t			host;
144	isc_task_t		       *task;
145	isc_taskaction_t		action;
146	void			       *arg;
147	isc_boolean_t			item_out;
148	dispsocket_t			*dispsocket;
149	ISC_LIST(dns_dispatchevent_t)	items;
150	ISC_LINK(dns_dispentry_t)	link;
151};
152
153/*%
154 * Maximum number of dispatch sockets that can be pooled for reuse.  The
155 * appropriate value may vary, but experiments have shown a busy caching server
156 * may need more than 1000 sockets concurrently opened.  The maximum allowable
157 * number of dispatch sockets (per manager) will be set to the double of this
158 * value.
159 */
160#ifndef DNS_DISPATCH_POOLSOCKS
161#define DNS_DISPATCH_POOLSOCKS			2048
162#endif
163
164/*%
165 * Quota to control the number of dispatch sockets.  If a dispatch has more
166 * than the quota of sockets, new queries will purge oldest ones, so that
167 * a massive number of outstanding queries won't prevent subsequent queries
168 * (especially if the older ones take longer time and result in timeout).
169 */
170#ifndef DNS_DISPATCH_SOCKSQUOTA
171#define DNS_DISPATCH_SOCKSQUOTA			3072
172#endif
173
174struct dispsocket {
175	unsigned int			magic;
176	isc_socket_t			*socket;
177	dns_dispatch_t			*disp;
178	isc_sockaddr_t			host;
179	in_port_t			localport; /* XXX: should be removed later */
180	dispportentry_t			*portentry;
181	dns_dispentry_t			*resp;
182	isc_task_t			*task;
183	ISC_LINK(dispsocket_t)		link;
184	unsigned int			bucket;
185	ISC_LINK(dispsocket_t)		blink;
186};
187
188/*%
189 * A port table entry.  We remember every port we first open in a table with a
190 * reference counter so that we can 'reuse' the same port (with different
191 * destination addresses) using the SO_REUSEADDR socket option.
192 */
193struct dispportentry {
194	in_port_t			port;
195	unsigned int			refs;
196	ISC_LINK(struct dispportentry)	link;
197};
198
199#ifndef DNS_DISPATCH_PORTTABLESIZE
200#define DNS_DISPATCH_PORTTABLESIZE	1024
201#endif
202
203#define INVALID_BUCKET		(0xffffdead)
204
205/*%
206 * Number of tasks for each dispatch that use separate sockets for different
207 * transactions.  This must be a power of 2 as it will divide 32 bit numbers
208 * to get an uniformly random tasks selection.  See get_dispsocket().
209 */
210#define MAX_INTERNAL_TASKS	64
211
212struct dns_dispatch {
213	/* Unlocked. */
214	unsigned int		magic;		/*%< magic */
215	dns_dispatchmgr_t      *mgr;		/*%< dispatch manager */
216	int			ntasks;
217	/*%
218	 * internal task buckets.  We use multiple tasks to distribute various
219	 * socket events well when using separate dispatch sockets.  We use the
220	 * 1st task (task[0]) for internal control events.
221	 */
222	isc_task_t	       *task[MAX_INTERNAL_TASKS];
223	isc_socket_t	       *socket;		/*%< isc socket attached to */
224	isc_sockaddr_t		local;		/*%< local address */
225	in_port_t		localport;	/*%< local UDP port */
226	unsigned int		maxrequests;	/*%< max requests */
227	isc_event_t	       *ctlevent;
228
229	/*% Locked by mgr->lock. */
230	ISC_LINK(dns_dispatch_t) link;
231
232	/* Locked by "lock". */
233	isc_mutex_t		lock;		/*%< locks all below */
234	isc_sockettype_t	socktype;
235	unsigned int		attributes;
236	unsigned int		refcount;	/*%< number of users */
237	dns_dispatchevent_t    *failsafe_ev;	/*%< failsafe cancel event */
238	unsigned int		shutting_down : 1,
239				shutdown_out : 1,
240				connected : 1,
241				tcpmsg_valid : 1,
242				recv_pending : 1; /*%< is a recv() pending? */
243	isc_result_t		shutdown_why;
244	ISC_LIST(dispsocket_t)	activesockets;
245	ISC_LIST(dispsocket_t)	inactivesockets;
246	unsigned int		nsockets;
247	unsigned int		requests;	/*%< how many requests we have */
248	unsigned int		tcpbuffers;	/*%< allocated buffers */
249	dns_tcpmsg_t		tcpmsg;		/*%< for tcp streams */
250	dns_qid_t		*qid;
251	arc4ctx_t		arc4ctx;	/*%< for QID/UDP port num */
252	dispportlist_t		*port_table;	/*%< hold ports 'owned' by us */
253	isc_mempool_t		*portpool;	/*%< port table entries  */
254};
255
256#define QID_MAGIC		ISC_MAGIC('Q', 'i', 'd', ' ')
257#define VALID_QID(e)		ISC_MAGIC_VALID((e), QID_MAGIC)
258
259#define RESPONSE_MAGIC		ISC_MAGIC('D', 'r', 's', 'p')
260#define VALID_RESPONSE(e)	ISC_MAGIC_VALID((e), RESPONSE_MAGIC)
261
262#define DISPSOCK_MAGIC		ISC_MAGIC('D', 's', 'o', 'c')
263#define VALID_DISPSOCK(e)	ISC_MAGIC_VALID((e), DISPSOCK_MAGIC)
264
265#define DISPATCH_MAGIC		ISC_MAGIC('D', 'i', 's', 'p')
266#define VALID_DISPATCH(e)	ISC_MAGIC_VALID((e), DISPATCH_MAGIC)
267
268#define DNS_DISPATCHMGR_MAGIC	ISC_MAGIC('D', 'M', 'g', 'r')
269#define VALID_DISPATCHMGR(e)	ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC)
270
271#define DNS_QID(disp) ((disp)->socktype == isc_sockettype_tcp) ? \
272		       (disp)->qid : (disp)->mgr->qid
273#define DISP_ARC4CTX(disp) ((disp)->socktype == isc_sockettype_udp) ? \
274			(&(disp)->arc4ctx) : (&(disp)->mgr->arc4ctx)
275
276/*%
277 * Locking a query port buffer is a bit tricky.  We access the buffer without
278 * locking until qid is created.  Technically, there is a possibility of race
279 * between the creation of qid and access to the port buffer; in practice,
280 * however, this should be safe because qid isn't created until the first
281 * dispatch is created and there should be no contending situation until then.
282 */
283#define PORTBUFLOCK(mgr) if ((mgr)->qid != NULL) LOCK(&((mgr)->qid->lock))
284#define PORTBUFUNLOCK(mgr) if ((mgr)->qid != NULL) UNLOCK((&(mgr)->qid->lock))
285
286/*
287 * Statics.
288 */
289static dns_dispentry_t *entry_search(dns_qid_t *, isc_sockaddr_t *,
290				     dns_messageid_t, in_port_t, unsigned int);
291static isc_boolean_t destroy_disp_ok(dns_dispatch_t *);
292static void destroy_disp(isc_task_t *task, isc_event_t *event);
293static void destroy_dispsocket(dns_dispatch_t *, dispsocket_t **);
294static void deactivate_dispsocket(dns_dispatch_t *, dispsocket_t *);
295static void udp_exrecv(isc_task_t *, isc_event_t *);
296static void udp_shrecv(isc_task_t *, isc_event_t *);
297static void udp_recv(isc_event_t *, dns_dispatch_t *, dispsocket_t *);
298static void tcp_recv(isc_task_t *, isc_event_t *);
299static isc_result_t startrecv(dns_dispatch_t *, dispsocket_t *);
300static isc_uint32_t dns_hash(dns_qid_t *, isc_sockaddr_t *, dns_messageid_t,
301			     in_port_t);
302static void free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len);
303static void *allocate_udp_buffer(dns_dispatch_t *disp);
304static inline void free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev);
305static inline dns_dispatchevent_t *allocate_event(dns_dispatch_t *disp);
306static void do_cancel(dns_dispatch_t *disp);
307static dns_dispentry_t *linear_first(dns_qid_t *disp);
308static dns_dispentry_t *linear_next(dns_qid_t *disp,
309				    dns_dispentry_t *resp);
310static void dispatch_free(dns_dispatch_t **dispp);
311static isc_result_t get_udpsocket(dns_dispatchmgr_t *mgr,
312				  dns_dispatch_t *disp,
313				  isc_socketmgr_t *sockmgr,
314				  isc_sockaddr_t *localaddr,
315				  isc_socket_t **sockp);
316static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr,
317				       isc_socketmgr_t *sockmgr,
318				       isc_taskmgr_t *taskmgr,
319				       isc_sockaddr_t *localaddr,
320				       unsigned int maxrequests,
321				       unsigned int attributes,
322				       dns_dispatch_t **dispp);
323static isc_boolean_t destroy_mgr_ok(dns_dispatchmgr_t *mgr);
324static void destroy_mgr(dns_dispatchmgr_t **mgrp);
325static isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
326				 unsigned int increment, dns_qid_t **qidp,
327				 isc_boolean_t needaddrtable);
328static void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp);
329static isc_result_t open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
330				unsigned int options, isc_socket_t **sockp);
331static isc_boolean_t portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
332				   isc_sockaddr_t *sockaddrp);
333
334#define LVL(x) ISC_LOG_DEBUG(x)
335
336static void
337mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...)
338     ISC_FORMAT_PRINTF(3, 4);
339
340static void
341mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) {
342	char msgbuf[2048];
343	va_list ap;
344
345	if (! isc_log_wouldlog(dns_lctx, level))
346		return;
347
348	va_start(ap, fmt);
349	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
350	va_end(ap);
351
352	isc_log_write(dns_lctx,
353		      DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
354		      level, "dispatchmgr %p: %s", mgr, msgbuf);
355}
356
357static inline void
358inc_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) {
359	if (mgr->stats != NULL)
360		isc_stats_increment(mgr->stats, counter);
361}
362
363static void
364dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...)
365     ISC_FORMAT_PRINTF(3, 4);
366
367static void
368dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) {
369	char msgbuf[2048];
370	va_list ap;
371
372	if (! isc_log_wouldlog(dns_lctx, level))
373		return;
374
375	va_start(ap, fmt);
376	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
377	va_end(ap);
378
379	isc_log_write(dns_lctx,
380		      DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
381		      level, "dispatch %p: %s", disp, msgbuf);
382}
383
384static void
385request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
386	    int level, const char *fmt, ...)
387     ISC_FORMAT_PRINTF(4, 5);
388
389static void
390request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
391	    int level, const char *fmt, ...)
392{
393	char msgbuf[2048];
394	char peerbuf[256];
395	va_list ap;
396
397	if (! isc_log_wouldlog(dns_lctx, level))
398		return;
399
400	va_start(ap, fmt);
401	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
402	va_end(ap);
403
404	if (VALID_RESPONSE(resp)) {
405		isc_sockaddr_format(&resp->host, peerbuf, sizeof(peerbuf));
406		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
407			      DNS_LOGMODULE_DISPATCH, level,
408			      "dispatch %p response %p %s: %s", disp, resp,
409			      peerbuf, msgbuf);
410	} else {
411		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
412			      DNS_LOGMODULE_DISPATCH, level,
413			      "dispatch %p req/resp %p: %s", disp, resp,
414			      msgbuf);
415	}
416}
417
418/*%
419 * ARC4 random number generator derived from OpenBSD.
420 * Only dispatch_arc4random() and dispatch_arc4uniformrandom() are expected
421 * to be called from general dispatch routines; the rest of them are subroutines
422 * for these two.
423 *
424 * The original copyright follows:
425 * Copyright (c) 1996, David Mazieres <dm@uun.org>
426 * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
427 *
428 * Permission to use, copy, modify, and distribute this software for any
429 * purpose with or without fee is hereby granted, provided that the above
430 * copyright notice and this permission notice appear in all copies.
431 *
432 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
433 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
434 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
435 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
436 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
437 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
438 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
439 */
440static void
441dispatch_arc4init(arc4ctx_t *actx, isc_entropy_t *entropy, isc_mutex_t *lock) {
442	int n;
443	for (n = 0; n < 256; n++)
444		actx->s[n] = n;
445	actx->i = 0;
446	actx->j = 0;
447	actx->count = 0;
448	actx->entropy = entropy; /* don't have to attach */
449	actx->lock = lock;
450}
451
452static void
453dispatch_arc4addrandom(arc4ctx_t *actx, unsigned char *dat, int datlen) {
454	int n;
455	isc_uint8_t si;
456
457	actx->i--;
458	for (n = 0; n < 256; n++) {
459		actx->i = (actx->i + 1);
460		si = actx->s[actx->i];
461		actx->j = (actx->j + si + dat[n % datlen]);
462		actx->s[actx->i] = actx->s[actx->j];
463		actx->s[actx->j] = si;
464	}
465	actx->j = actx->i;
466}
467
468static inline isc_uint8_t
469dispatch_arc4get8(arc4ctx_t *actx) {
470	isc_uint8_t si, sj;
471
472	actx->i = (actx->i + 1);
473	si = actx->s[actx->i];
474	actx->j = (actx->j + si);
475	sj = actx->s[actx->j];
476	actx->s[actx->i] = sj;
477	actx->s[actx->j] = si;
478
479	return (actx->s[(si + sj) & 0xff]);
480}
481
482static inline isc_uint16_t
483dispatch_arc4get16(arc4ctx_t *actx) {
484	isc_uint16_t val;
485
486	val = dispatch_arc4get8(actx) << 8;
487	val |= dispatch_arc4get8(actx);
488
489	return (val);
490}
491
492static void
493dispatch_arc4stir(arc4ctx_t *actx) {
494	int i;
495	union {
496		unsigned char rnd[128];
497		isc_uint32_t rnd32[32];
498	} rnd;
499	isc_result_t result;
500
501	if (actx->entropy != NULL) {
502		/*
503		 * We accept any quality of random data to avoid blocking.
504		 */
505		result = isc_entropy_getdata(actx->entropy, rnd.rnd,
506					     sizeof(rnd), NULL, 0);
507		RUNTIME_CHECK(result == ISC_R_SUCCESS);
508	} else {
509		for (i = 0; i < 32; i++)
510			isc_random_get(&rnd.rnd32[i]);
511	}
512	dispatch_arc4addrandom(actx, rnd.rnd, sizeof(rnd.rnd));
513
514	/*
515	 * Discard early keystream, as per recommendations in:
516	 * http://www.wisdom.weizmann.ac.il/~itsik/RC4/Papers/Rc4_ksa.ps
517	 */
518	for (i = 0; i < 256; i++)
519		(void)dispatch_arc4get8(actx);
520
521	/*
522	 * Derived from OpenBSD's implementation.  The rationale is not clear,
523	 * but should be conservative enough in safety, and reasonably large
524	 * for efficiency.
525	 */
526	actx->count = 1600000;
527}
528
529static isc_uint16_t
530dispatch_arc4random(arc4ctx_t *actx) {
531	isc_uint16_t result;
532
533	if (actx->lock != NULL)
534		LOCK(actx->lock);
535
536	actx->count -= sizeof(isc_uint16_t);
537	if (actx->count <= 0)
538		dispatch_arc4stir(actx);
539	result = dispatch_arc4get16(actx);
540
541	if (actx->lock != NULL)
542		UNLOCK(actx->lock);
543
544	return (result);
545}
546
547static isc_uint16_t
548dispatch_arc4uniformrandom(arc4ctx_t *actx, isc_uint16_t upper_bound) {
549	isc_uint16_t min, r;
550
551	if (upper_bound < 2)
552		return (0);
553
554	/*
555	 * Ensure the range of random numbers [min, 0xffff] be a multiple of
556	 * upper_bound and contain at least a half of the 16 bit range.
557	 */
558
559	if (upper_bound > 0x8000)
560		min = 1 + ~upper_bound; /* 0x8000 - upper_bound */
561	else
562		min = (isc_uint16_t)(0x10000 % (isc_uint32_t)upper_bound);
563
564	/*
565	 * This could theoretically loop forever but each retry has
566	 * p > 0.5 (worst case, usually far better) of selecting a
567	 * number inside the range we need, so it should rarely need
568	 * to re-roll.
569	 */
570	for (;;) {
571		r = dispatch_arc4random(actx);
572		if (r >= min)
573			break;
574	}
575
576	return (r % upper_bound);
577}
578
579/*
580 * Return a hash of the destination and message id.
581 */
582static isc_uint32_t
583dns_hash(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
584	 in_port_t port)
585{
586	unsigned int ret;
587
588	ret = isc_sockaddr_hash(dest, ISC_TRUE);
589	ret ^= (id << 16) | port;
590	ret %= qid->qid_nbuckets;
591
592	INSIST(ret < qid->qid_nbuckets);
593
594	return (ret);
595}
596
597/*
598 * Find the first entry in 'qid'.  Returns NULL if there are no entries.
599 */
600static dns_dispentry_t *
601linear_first(dns_qid_t *qid) {
602	dns_dispentry_t *ret;
603	unsigned int bucket;
604
605	bucket = 0;
606
607	while (bucket < qid->qid_nbuckets) {
608		ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
609		if (ret != NULL)
610			return (ret);
611		bucket++;
612	}
613
614	return (NULL);
615}
616
617/*
618 * Find the next entry after 'resp' in 'qid'.  Return NULL if there are
619 * no more entries.
620 */
621static dns_dispentry_t *
622linear_next(dns_qid_t *qid, dns_dispentry_t *resp) {
623	dns_dispentry_t *ret;
624	unsigned int bucket;
625
626	ret = ISC_LIST_NEXT(resp, link);
627	if (ret != NULL)
628		return (ret);
629
630	bucket = resp->bucket;
631	bucket++;
632	while (bucket < qid->qid_nbuckets) {
633		ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
634		if (ret != NULL)
635			return (ret);
636		bucket++;
637	}
638
639	return (NULL);
640}
641
642/*
643 * The dispatch must be locked.
644 */
645static isc_boolean_t
646destroy_disp_ok(dns_dispatch_t *disp)
647{
648	if (disp->refcount != 0)
649		return (ISC_FALSE);
650
651	if (disp->recv_pending != 0)
652		return (ISC_FALSE);
653
654	if (!ISC_LIST_EMPTY(disp->activesockets))
655		return (ISC_FALSE);
656
657	if (disp->shutting_down == 0)
658		return (ISC_FALSE);
659
660	return (ISC_TRUE);
661}
662
663/*
664 * Called when refcount reaches 0 (and safe to destroy).
665 *
666 * The dispatcher must not be locked.
667 * The manager must be locked.
668 */
669static void
670destroy_disp(isc_task_t *task, isc_event_t *event) {
671	dns_dispatch_t *disp;
672	dns_dispatchmgr_t *mgr;
673	isc_boolean_t killmgr;
674	dispsocket_t *dispsocket;
675	int i;
676
677	INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL);
678
679	UNUSED(task);
680
681	disp = event->ev_arg;
682	mgr = disp->mgr;
683
684	LOCK(&mgr->lock);
685	ISC_LIST_UNLINK(mgr->list, disp, link);
686
687	dispatch_log(disp, LVL(90),
688		     "shutting down; detaching from sock %p, task %p",
689		     disp->socket, disp->task[0]); /* XXXX */
690
691	if (disp->socket != NULL)
692		isc_socket_detach(&disp->socket);
693	while ((dispsocket = ISC_LIST_HEAD(disp->inactivesockets)) != NULL) {
694		ISC_LIST_UNLINK(disp->inactivesockets, dispsocket, link);
695		destroy_dispsocket(disp, &dispsocket);
696	}
697	for (i = 0; i < disp->ntasks; i++)
698		isc_task_detach(&disp->task[i]);
699	isc_event_free(&event);
700
701	dispatch_free(&disp);
702
703	killmgr = destroy_mgr_ok(mgr);
704	UNLOCK(&mgr->lock);
705	if (killmgr)
706		destroy_mgr(&mgr);
707}
708
709/*%
710 * Manipulate port table per dispatch: find an entry for a given port number,
711 * create a new entry, and decrement a given entry with possible clean-up.
712 */
713static dispportentry_t *
714port_search(dns_dispatch_t *disp, in_port_t port) {
715	dispportentry_t *portentry;
716
717	REQUIRE(disp->port_table != NULL);
718
719	portentry = ISC_LIST_HEAD(disp->port_table[port %
720						   DNS_DISPATCH_PORTTABLESIZE]);
721	while (portentry != NULL) {
722		if (portentry->port == port)
723			return (portentry);
724		portentry = ISC_LIST_NEXT(portentry, link);
725	}
726
727	return (NULL);
728}
729
730static dispportentry_t *
731new_portentry(dns_dispatch_t *disp, in_port_t port) {
732	dispportentry_t *portentry;
733
734	REQUIRE(disp->port_table != NULL);
735
736	portentry = isc_mempool_get(disp->portpool);
737	if (portentry == NULL)
738		return (portentry);
739
740	portentry->port = port;
741	portentry->refs = 0;
742	ISC_LINK_INIT(portentry, link);
743	ISC_LIST_APPEND(disp->port_table[port % DNS_DISPATCH_PORTTABLESIZE],
744			portentry, link);
745
746	return (portentry);
747}
748
749static void
750deref_portentry(dns_dispatch_t *disp, dispportentry_t **portentryp) {
751	dispportentry_t *portentry = *portentryp;
752
753	REQUIRE(disp->port_table != NULL);
754	REQUIRE(portentry != NULL && portentry->refs > 0);
755
756	portentry->refs--;
757	if (portentry->refs == 0) {
758		ISC_LIST_UNLINK(disp->port_table[portentry->port %
759						 DNS_DISPATCH_PORTTABLESIZE],
760				portentry, link);
761		isc_mempool_put(disp->portpool, portentry);
762	}
763
764	*portentryp = NULL;
765}
766
767/*%
768 * Find a dispsocket for socket address 'dest', and port number 'port'.
769 * Return NULL if no such entry exists.
770 */
771static dispsocket_t *
772socket_search(dns_qid_t *qid, isc_sockaddr_t *dest, in_port_t port,
773	      unsigned int bucket)
774{
775	dispsocket_t *dispsock;
776
777	REQUIRE(bucket < qid->qid_nbuckets);
778
779	dispsock = ISC_LIST_HEAD(qid->sock_table[bucket]);
780
781	while (dispsock != NULL) {
782		if (isc_sockaddr_equal(dest, &dispsock->host) &&
783		    dispsock->portentry->port == port)
784			return (dispsock);
785		dispsock = ISC_LIST_NEXT(dispsock, blink);
786	}
787
788	return (NULL);
789}
790
791/*%
792 * Make a new socket for a single dispatch with a random port number.
793 * The caller must hold the disp->lock and qid->lock.
794 */
795static isc_result_t
796get_dispsocket(dns_dispatch_t *disp, isc_sockaddr_t *dest,
797	       isc_socketmgr_t *sockmgr, dns_qid_t *qid,
798	       dispsocket_t **dispsockp, in_port_t *portp)
799{
800	int i;
801	isc_uint32_t r;
802	dns_dispatchmgr_t *mgr = disp->mgr;
803	isc_socket_t *sock = NULL;
804	isc_result_t result = ISC_R_FAILURE;
805	in_port_t port;
806	isc_sockaddr_t localaddr;
807	unsigned int bucket = 0;
808	dispsocket_t *dispsock;
809	unsigned int nports;
810	in_port_t *ports;
811	unsigned int bindoptions;
812	dispportentry_t *portentry = NULL;
813
814	if (isc_sockaddr_pf(&disp->local) == AF_INET) {
815		nports = disp->mgr->nv4ports;
816		ports = disp->mgr->v4ports;
817	} else {
818		nports = disp->mgr->nv6ports;
819		ports = disp->mgr->v6ports;
820	}
821	if (nports == 0)
822		return (ISC_R_ADDRNOTAVAIL);
823
824	dispsock = ISC_LIST_HEAD(disp->inactivesockets);
825	if (dispsock != NULL) {
826		ISC_LIST_UNLINK(disp->inactivesockets, dispsock, link);
827		sock = dispsock->socket;
828		dispsock->socket = NULL;
829	} else {
830		dispsock = isc_mempool_get(mgr->spool);
831		if (dispsock == NULL)
832			return (ISC_R_NOMEMORY);
833
834		disp->nsockets++;
835		dispsock->socket = NULL;
836		dispsock->disp = disp;
837		dispsock->resp = NULL;
838		dispsock->portentry = NULL;
839		isc_random_get(&r);
840		dispsock->task = NULL;
841		isc_task_attach(disp->task[r % disp->ntasks], &dispsock->task);
842		ISC_LINK_INIT(dispsock, link);
843		ISC_LINK_INIT(dispsock, blink);
844		dispsock->magic = DISPSOCK_MAGIC;
845	}
846
847	/*
848	 * Pick up a random UDP port and open a new socket with it.  Avoid
849	 * choosing ports that share the same destination because it will be
850	 * very likely to fail in bind(2) or connect(2).
851	 */
852	localaddr = disp->local;
853	for (i = 0; i < 64; i++) {
854		port = ports[dispatch_arc4uniformrandom(DISP_ARC4CTX(disp),
855							nports)];
856		isc_sockaddr_setport(&localaddr, port);
857
858		bucket = dns_hash(qid, dest, 0, port);
859		if (socket_search(qid, dest, port, bucket) != NULL)
860			continue;
861		bindoptions = 0;
862		portentry = port_search(disp, port);
863		if (portentry != NULL)
864			bindoptions |= ISC_SOCKET_REUSEADDRESS;
865		result = open_socket(sockmgr, &localaddr, bindoptions, &sock);
866		if (result == ISC_R_SUCCESS) {
867			if (portentry == NULL) {
868				portentry = new_portentry(disp, port);
869				if (portentry == NULL) {
870					result = ISC_R_NOMEMORY;
871					break;
872				}
873			}
874			portentry->refs++;
875			break;
876		} else if (result != ISC_R_ADDRINUSE)
877			break;
878	}
879
880	if (result == ISC_R_SUCCESS) {
881		dispsock->socket = sock;
882		dispsock->host = *dest;
883		dispsock->portentry = portentry;
884		dispsock->bucket = bucket;
885		ISC_LIST_APPEND(qid->sock_table[bucket], dispsock, blink);
886		*dispsockp = dispsock;
887		*portp = port;
888	} else {
889		/*
890		 * We could keep it in the inactive list, but since this should
891		 * be an exceptional case and might be resource shortage, we'd
892		 * rather destroy it.
893		 */
894		if (sock != NULL)
895			isc_socket_detach(&sock);
896		destroy_dispsocket(disp, &dispsock);
897	}
898
899	return (result);
900}
901
902/*%
903 * Destroy a dedicated dispatch socket.
904 */
905static void
906destroy_dispsocket(dns_dispatch_t *disp, dispsocket_t **dispsockp) {
907	dispsocket_t *dispsock;
908	dns_qid_t *qid;
909
910	/*
911	 * The dispatch must be locked.
912	 */
913
914	REQUIRE(dispsockp != NULL && *dispsockp != NULL);
915	dispsock = *dispsockp;
916	REQUIRE(!ISC_LINK_LINKED(dispsock, link));
917
918	disp->nsockets--;
919	dispsock->magic = 0;
920	if (dispsock->portentry != NULL)
921		deref_portentry(disp, &dispsock->portentry);
922	if (dispsock->socket != NULL)
923		isc_socket_detach(&dispsock->socket);
924	if (ISC_LINK_LINKED(dispsock, blink)) {
925		qid = DNS_QID(disp);
926		LOCK(&qid->lock);
927		ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
928				blink);
929		UNLOCK(&qid->lock);
930	}
931	if (dispsock->task != NULL)
932		isc_task_detach(&dispsock->task);
933	isc_mempool_put(disp->mgr->spool, dispsock);
934
935	*dispsockp = NULL;
936}
937
938/*%
939 * Deactivate a dedicated dispatch socket.  Move it to the inactive list for
940 * future reuse unless the total number of sockets are exceeding the maximum.
941 */
942static void
943deactivate_dispsocket(dns_dispatch_t *disp, dispsocket_t *dispsock) {
944	isc_result_t result;
945	dns_qid_t *qid;
946
947	/*
948	 * The dispatch must be locked.
949	 */
950	ISC_LIST_UNLINK(disp->activesockets, dispsock, link);
951	if (dispsock->resp != NULL) {
952		INSIST(dispsock->resp->dispsocket == dispsock);
953		dispsock->resp->dispsocket = NULL;
954	}
955
956	INSIST(dispsock->portentry != NULL);
957	deref_portentry(disp, &dispsock->portentry);
958
959	if (disp->nsockets > DNS_DISPATCH_POOLSOCKS)
960		destroy_dispsocket(disp, &dispsock);
961	else {
962		result = isc_socket_close(dispsock->socket);
963
964		qid = DNS_QID(disp);
965		LOCK(&qid->lock);
966		ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
967				blink);
968		UNLOCK(&qid->lock);
969
970		if (result == ISC_R_SUCCESS)
971			ISC_LIST_APPEND(disp->inactivesockets, dispsock, link);
972		else {
973			/*
974			 * If the underlying system does not allow this
975			 * optimization, destroy this temporary structure (and
976			 * create a new one for a new transaction).
977			 */
978			INSIST(result == ISC_R_NOTIMPLEMENTED);
979			destroy_dispsocket(disp, &dispsock);
980		}
981	}
982}
983
984/*
985 * Find an entry for query ID 'id', socket address 'dest', and port number
986 * 'port'.
987 * Return NULL if no such entry exists.
988 */
989static dns_dispentry_t *
990entry_search(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
991	     in_port_t port, unsigned int bucket)
992{
993	dns_dispentry_t *res;
994
995	REQUIRE(bucket < qid->qid_nbuckets);
996
997	res = ISC_LIST_HEAD(qid->qid_table[bucket]);
998
999	while (res != NULL) {
1000		if (res->id == id && isc_sockaddr_equal(dest, &res->host) &&
1001		    res->port == port) {
1002			return (res);
1003		}
1004		res = ISC_LIST_NEXT(res, link);
1005	}
1006
1007	return (NULL);
1008}
1009
1010static void
1011free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len) {
1012	INSIST(buf != NULL && len != 0);
1013
1014
1015	switch (disp->socktype) {
1016	case isc_sockettype_tcp:
1017		INSIST(disp->tcpbuffers > 0);
1018		disp->tcpbuffers--;
1019		isc_mem_put(disp->mgr->mctx, buf, len);
1020		break;
1021	case isc_sockettype_udp:
1022		LOCK(&disp->mgr->buffer_lock);
1023		INSIST(disp->mgr->buffers > 0);
1024		INSIST(len == disp->mgr->buffersize);
1025		disp->mgr->buffers--;
1026		isc_mempool_put(disp->mgr->bpool, buf);
1027		UNLOCK(&disp->mgr->buffer_lock);
1028		break;
1029	default:
1030		INSIST(0);
1031		break;
1032	}
1033}
1034
1035static void *
1036allocate_udp_buffer(dns_dispatch_t *disp) {
1037	void *temp;
1038
1039	LOCK(&disp->mgr->buffer_lock);
1040	temp = isc_mempool_get(disp->mgr->bpool);
1041
1042	if (temp != NULL)
1043		disp->mgr->buffers++;
1044	UNLOCK(&disp->mgr->buffer_lock);
1045
1046	return (temp);
1047}
1048
1049static inline void
1050free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev) {
1051	if (disp->failsafe_ev == ev) {
1052		INSIST(disp->shutdown_out == 1);
1053		disp->shutdown_out = 0;
1054
1055		return;
1056	}
1057
1058	isc_mempool_put(disp->mgr->epool, ev);
1059}
1060
1061static inline dns_dispatchevent_t *
1062allocate_event(dns_dispatch_t *disp) {
1063	dns_dispatchevent_t *ev;
1064
1065	ev = isc_mempool_get(disp->mgr->epool);
1066	if (ev == NULL)
1067		return (NULL);
1068	ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, 0,
1069		       NULL, NULL, NULL, NULL, NULL);
1070
1071	return (ev);
1072}
1073
1074static void
1075udp_exrecv(isc_task_t *task, isc_event_t *ev) {
1076	dispsocket_t *dispsock = ev->ev_arg;
1077
1078	UNUSED(task);
1079
1080	REQUIRE(VALID_DISPSOCK(dispsock));
1081	udp_recv(ev, dispsock->disp, dispsock);
1082}
1083
1084static void
1085udp_shrecv(isc_task_t *task, isc_event_t *ev) {
1086	dns_dispatch_t *disp = ev->ev_arg;
1087
1088	UNUSED(task);
1089
1090	REQUIRE(VALID_DISPATCH(disp));
1091	udp_recv(ev, disp, NULL);
1092}
1093
1094/*
1095 * General flow:
1096 *
1097 * If I/O result == CANCELED or error, free the buffer.
1098 *
1099 * If query, free the buffer, restart.
1100 *
1101 * If response:
1102 *	Allocate event, fill in details.
1103 *		If cannot allocate, free buffer, restart.
1104 *	find target.  If not found, free buffer, restart.
1105 *	if event queue is not empty, queue.  else, send.
1106 *	restart.
1107 */
1108static void
1109udp_recv(isc_event_t *ev_in, dns_dispatch_t *disp, dispsocket_t *dispsock) {
1110	isc_socketevent_t *ev = (isc_socketevent_t *)ev_in;
1111	dns_messageid_t id;
1112	isc_result_t dres;
1113	isc_buffer_t source;
1114	unsigned int flags;
1115	dns_dispentry_t *resp = NULL;
1116	dns_dispatchevent_t *rev;
1117	unsigned int bucket;
1118	isc_boolean_t killit;
1119	isc_boolean_t queue_response;
1120	dns_dispatchmgr_t *mgr;
1121	dns_qid_t *qid;
1122	isc_netaddr_t netaddr;
1123	int match;
1124	int result;
1125	isc_boolean_t qidlocked = ISC_FALSE;
1126
1127	LOCK(&disp->lock);
1128
1129	mgr = disp->mgr;
1130	qid = mgr->qid;
1131
1132	dispatch_log(disp, LVL(90),
1133		     "got packet: requests %d, buffers %d, recvs %d",
1134		     disp->requests, disp->mgr->buffers, disp->recv_pending);
1135
1136	if (dispsock == NULL && ev->ev_type == ISC_SOCKEVENT_RECVDONE) {
1137		/*
1138		 * Unless the receive event was imported from a listening
1139		 * interface, in which case the event type is
1140		 * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending.
1141		 */
1142		INSIST(disp->recv_pending != 0);
1143		disp->recv_pending = 0;
1144	}
1145
1146	if (dispsock != NULL &&
1147	    (ev->result == ISC_R_CANCELED || dispsock->resp == NULL)) {
1148		/*
1149		 * dispsock->resp can be NULL if this transaction was canceled
1150		 * just after receiving a response.  Since this socket is
1151		 * exclusively used and there should be at most one receive
1152		 * event the canceled event should have been no effect.  So
1153		 * we can (and should) deactivate the socket right now.
1154		 */
1155		deactivate_dispsocket(disp, dispsock);
1156		dispsock = NULL;
1157	}
1158
1159	if (disp->shutting_down) {
1160		/*
1161		 * This dispatcher is shutting down.
1162		 */
1163		free_buffer(disp, ev->region.base, ev->region.length);
1164
1165		isc_event_free(&ev_in);
1166		ev = NULL;
1167
1168		killit = destroy_disp_ok(disp);
1169		UNLOCK(&disp->lock);
1170		if (killit)
1171			isc_task_send(disp->task[0], &disp->ctlevent);
1172
1173		return;
1174	}
1175
1176	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
1177		if (dispsock != NULL) {
1178			resp = dispsock->resp;
1179			id = resp->id;
1180			if (ev->result != ISC_R_SUCCESS) {
1181				/*
1182				 * This is most likely a network error on a
1183				 * connected socket.  It makes no sense to
1184				 * check the address or parse the packet, but it
1185				 * will help to return the error to the caller.
1186				 */
1187				goto sendresponse;
1188			}
1189		} else {
1190			free_buffer(disp, ev->region.base, ev->region.length);
1191
1192			UNLOCK(&disp->lock);
1193			isc_event_free(&ev_in);
1194			return;
1195		}
1196	} else if (ev->result != ISC_R_SUCCESS) {
1197		free_buffer(disp, ev->region.base, ev->region.length);
1198
1199		if (ev->result != ISC_R_CANCELED)
1200			dispatch_log(disp, ISC_LOG_ERROR,
1201				     "odd socket result in udp_recv(): %s",
1202				     isc_result_totext(ev->result));
1203
1204		UNLOCK(&disp->lock);
1205		isc_event_free(&ev_in);
1206		return;
1207	}
1208
1209	/*
1210	 * If this is from a blackholed address, drop it.
1211	 */
1212	isc_netaddr_fromsockaddr(&netaddr, &ev->address);
1213	if (disp->mgr->blackhole != NULL &&
1214	    dns_acl_match(&netaddr, NULL, disp->mgr->blackhole,
1215			  NULL, &match, NULL) == ISC_R_SUCCESS &&
1216	    match > 0)
1217	{
1218		if (isc_log_wouldlog(dns_lctx, LVL(10))) {
1219			char netaddrstr[ISC_NETADDR_FORMATSIZE];
1220			isc_netaddr_format(&netaddr, netaddrstr,
1221					   sizeof(netaddrstr));
1222			dispatch_log(disp, LVL(10),
1223				     "blackholed packet from %s",
1224				     netaddrstr);
1225		}
1226		free_buffer(disp, ev->region.base, ev->region.length);
1227		goto restart;
1228	}
1229
1230	/*
1231	 * Peek into the buffer to see what we can see.
1232	 */
1233	isc_buffer_init(&source, ev->region.base, ev->region.length);
1234	isc_buffer_add(&source, ev->n);
1235	dres = dns_message_peekheader(&source, &id, &flags);
1236	if (dres != ISC_R_SUCCESS) {
1237		free_buffer(disp, ev->region.base, ev->region.length);
1238		dispatch_log(disp, LVL(10), "got garbage packet");
1239		goto restart;
1240	}
1241
1242	dispatch_log(disp, LVL(92),
1243		     "got valid DNS message header, /QR %c, id %u",
1244		     ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
1245
1246	/*
1247	 * Look at flags.  If query, drop it. If response,
1248	 * look to see where it goes.
1249	 */
1250	queue_response = ISC_FALSE;
1251	if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1252		/* query */
1253		free_buffer(disp, ev->region.base, ev->region.length);
1254		goto restart;
1255	}
1256
1257	/*
1258	 * Search for the corresponding response.  If we are using an exclusive
1259	 * socket, we've already identified it and we can skip the search; but
1260	 * the ID and the address must match the expected ones.
1261	 */
1262	if (resp == NULL) {
1263		bucket = dns_hash(qid, &ev->address, id, disp->localport);
1264		LOCK(&qid->lock);
1265		qidlocked = ISC_TRUE;
1266		resp = entry_search(qid, &ev->address, id, disp->localport,
1267				    bucket);
1268		dispatch_log(disp, LVL(90),
1269			     "search for response in bucket %d: %s",
1270			     bucket, (resp == NULL ? "not found" : "found"));
1271
1272		if (resp == NULL) {
1273			inc_stats(mgr, dns_resstatscounter_mismatch);
1274			free_buffer(disp, ev->region.base, ev->region.length);
1275			goto unlock;
1276		}
1277	} else if (resp->id != id || !isc_sockaddr_equal(&ev->address,
1278							 &resp->host)) {
1279		dispatch_log(disp, LVL(90),
1280			     "response to an exclusive socket doesn't match");
1281		inc_stats(mgr, dns_resstatscounter_mismatch);
1282		free_buffer(disp, ev->region.base, ev->region.length);
1283		goto unlock;
1284	}
1285
1286	/*
1287	 * Now that we have the original dispatch the query was sent
1288	 * from check that the address and port the response was
1289	 * sent to make sense.
1290	 */
1291	if (disp != resp->disp) {
1292		isc_sockaddr_t a1;
1293		isc_sockaddr_t a2;
1294
1295		/*
1296		 * Check that the socket types and ports match.
1297		 */
1298		if (disp->socktype != resp->disp->socktype ||
1299		    isc_sockaddr_getport(&disp->local) !=
1300		    isc_sockaddr_getport(&resp->disp->local)) {
1301			free_buffer(disp, ev->region.base, ev->region.length);
1302			goto unlock;
1303		}
1304
1305		/*
1306		 * If both dispatches are bound to an address then fail as
1307		 * the addresses can't be equal (enforced by the IP stack).
1308		 *
1309		 * Note under Linux a packet can be sent out via IPv4 socket
1310		 * and the response be received via a IPv6 socket.
1311		 *
1312		 * Requests sent out via IPv6 should always come back in
1313		 * via IPv6.
1314		 */
1315		if (isc_sockaddr_pf(&resp->disp->local) == PF_INET6 &&
1316		    isc_sockaddr_pf(&disp->local) != PF_INET6) {
1317			free_buffer(disp, ev->region.base, ev->region.length);
1318			goto unlock;
1319		}
1320		isc_sockaddr_anyofpf(&a1, isc_sockaddr_pf(&resp->disp->local));
1321		isc_sockaddr_anyofpf(&a2, isc_sockaddr_pf(&disp->local));
1322		if (!isc_sockaddr_eqaddr(&a1, &resp->disp->local) &&
1323		    !isc_sockaddr_eqaddr(&a2, &disp->local)) {
1324			free_buffer(disp, ev->region.base, ev->region.length);
1325			goto unlock;
1326		}
1327	}
1328
1329  sendresponse:
1330	queue_response = resp->item_out;
1331	rev = allocate_event(resp->disp);
1332	if (rev == NULL) {
1333		free_buffer(disp, ev->region.base, ev->region.length);
1334		goto unlock;
1335	}
1336
1337	/*
1338	 * At this point, rev contains the event we want to fill in, and
1339	 * resp contains the information on the place to send it to.
1340	 * Send the event off.
1341	 */
1342	isc_buffer_init(&rev->buffer, ev->region.base, ev->region.length);
1343	isc_buffer_add(&rev->buffer, ev->n);
1344	rev->result = ev->result;
1345	rev->id = id;
1346	rev->addr = ev->address;
1347	rev->pktinfo = ev->pktinfo;
1348	rev->attributes = ev->attributes;
1349	if (queue_response) {
1350		ISC_LIST_APPEND(resp->items, rev, ev_link);
1351	} else {
1352		ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL,
1353			       DNS_EVENT_DISPATCH,
1354			       resp->action, resp->arg, resp, NULL, NULL);
1355		request_log(disp, resp, LVL(90),
1356			    "[a] Sent event %p buffer %p len %d to task %p",
1357			    rev, rev->buffer.base, rev->buffer.length,
1358			    resp->task);
1359		resp->item_out = ISC_TRUE;
1360		isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1361	}
1362 unlock:
1363	if (qidlocked)
1364		UNLOCK(&qid->lock);
1365
1366	/*
1367	 * Restart recv() to get the next packet.
1368	 */
1369 restart:
1370	result = startrecv(disp, dispsock);
1371	if (result != ISC_R_SUCCESS && dispsock != NULL) {
1372		/*
1373		 * XXX: wired. There seems to be no recovery process other than
1374		 * deactivate this socket anyway (since we cannot start
1375		 * receiving, we won't be able to receive a cancel event
1376		 * from the user).
1377		 */
1378		deactivate_dispsocket(disp, dispsock);
1379	}
1380	UNLOCK(&disp->lock);
1381
1382	isc_event_free(&ev_in);
1383}
1384
1385/*
1386 * General flow:
1387 *
1388 * If I/O result == CANCELED, EOF, or error, notify everyone as the
1389 * various queues drain.
1390 *
1391 * If query, restart.
1392 *
1393 * If response:
1394 *	Allocate event, fill in details.
1395 *		If cannot allocate, restart.
1396 *	find target.  If not found, restart.
1397 *	if event queue is not empty, queue.  else, send.
1398 *	restart.
1399 */
1400static void
1401tcp_recv(isc_task_t *task, isc_event_t *ev_in) {
1402	dns_dispatch_t *disp = ev_in->ev_arg;
1403	dns_tcpmsg_t *tcpmsg = &disp->tcpmsg;
1404	dns_messageid_t id;
1405	isc_result_t dres;
1406	unsigned int flags;
1407	dns_dispentry_t *resp;
1408	dns_dispatchevent_t *rev;
1409	unsigned int bucket;
1410	isc_boolean_t killit;
1411	isc_boolean_t queue_response;
1412	dns_qid_t *qid;
1413	int level;
1414	char buf[ISC_SOCKADDR_FORMATSIZE];
1415
1416	UNUSED(task);
1417
1418	REQUIRE(VALID_DISPATCH(disp));
1419
1420	qid = disp->qid;
1421
1422	dispatch_log(disp, LVL(90),
1423		     "got TCP packet: requests %d, buffers %d, recvs %d",
1424		     disp->requests, disp->tcpbuffers, disp->recv_pending);
1425
1426	LOCK(&disp->lock);
1427
1428	INSIST(disp->recv_pending != 0);
1429	disp->recv_pending = 0;
1430
1431	if (disp->refcount == 0) {
1432		/*
1433		 * This dispatcher is shutting down.  Force cancelation.
1434		 */
1435		tcpmsg->result = ISC_R_CANCELED;
1436	}
1437
1438	if (tcpmsg->result != ISC_R_SUCCESS) {
1439		switch (tcpmsg->result) {
1440		case ISC_R_CANCELED:
1441			break;
1442
1443		case ISC_R_EOF:
1444			dispatch_log(disp, LVL(90), "shutting down on EOF");
1445			do_cancel(disp);
1446			break;
1447
1448		case ISC_R_CONNECTIONRESET:
1449			level = ISC_LOG_INFO;
1450			goto logit;
1451
1452		default:
1453			level = ISC_LOG_ERROR;
1454		logit:
1455			isc_sockaddr_format(&tcpmsg->address, buf, sizeof(buf));
1456			dispatch_log(disp, level, "shutting down due to TCP "
1457				     "receive error: %s: %s", buf,
1458				     isc_result_totext(tcpmsg->result));
1459			do_cancel(disp);
1460			break;
1461		}
1462
1463		/*
1464		 * The event is statically allocated in the tcpmsg
1465		 * structure, and destroy_disp() frees the tcpmsg, so we must
1466		 * free the event *before* calling destroy_disp().
1467		 */
1468		isc_event_free(&ev_in);
1469
1470		disp->shutting_down = 1;
1471		disp->shutdown_why = tcpmsg->result;
1472
1473		/*
1474		 * If the recv() was canceled pass the word on.
1475		 */
1476		killit = destroy_disp_ok(disp);
1477		UNLOCK(&disp->lock);
1478		if (killit)
1479			isc_task_send(disp->task[0], &disp->ctlevent);
1480		return;
1481	}
1482
1483	dispatch_log(disp, LVL(90), "result %d, length == %d, addr = %p",
1484		     tcpmsg->result,
1485		     tcpmsg->buffer.length, tcpmsg->buffer.base);
1486
1487	/*
1488	 * Peek into the buffer to see what we can see.
1489	 */
1490	dres = dns_message_peekheader(&tcpmsg->buffer, &id, &flags);
1491	if (dres != ISC_R_SUCCESS) {
1492		dispatch_log(disp, LVL(10), "got garbage packet");
1493		goto restart;
1494	}
1495
1496	dispatch_log(disp, LVL(92),
1497		     "got valid DNS message header, /QR %c, id %u",
1498		     ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
1499
1500	/*
1501	 * Allocate an event to send to the query or response client, and
1502	 * allocate a new buffer for our use.
1503	 */
1504
1505	/*
1506	 * Look at flags.  If query, drop it. If response,
1507	 * look to see where it goes.
1508	 */
1509	queue_response = ISC_FALSE;
1510	if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1511		/*
1512		 * Query.
1513		 */
1514		goto restart;
1515	}
1516
1517	/*
1518	 * Response.
1519	 */
1520	bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport);
1521	LOCK(&qid->lock);
1522	resp = entry_search(qid, &tcpmsg->address, id, disp->localport, bucket);
1523	dispatch_log(disp, LVL(90),
1524		     "search for response in bucket %d: %s",
1525		     bucket, (resp == NULL ? "not found" : "found"));
1526
1527	if (resp == NULL)
1528		goto unlock;
1529	queue_response = resp->item_out;
1530	rev = allocate_event(disp);
1531	if (rev == NULL)
1532		goto unlock;
1533
1534	/*
1535	 * At this point, rev contains the event we want to fill in, and
1536	 * resp contains the information on the place to send it to.
1537	 * Send the event off.
1538	 */
1539	dns_tcpmsg_keepbuffer(tcpmsg, &rev->buffer);
1540	disp->tcpbuffers++;
1541	rev->result = ISC_R_SUCCESS;
1542	rev->id = id;
1543	rev->addr = tcpmsg->address;
1544	if (queue_response) {
1545		ISC_LIST_APPEND(resp->items, rev, ev_link);
1546	} else {
1547		ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH,
1548			       resp->action, resp->arg, resp, NULL, NULL);
1549		request_log(disp, resp, LVL(90),
1550			    "[b] Sent event %p buffer %p len %d to task %p",
1551			    rev, rev->buffer.base, rev->buffer.length,
1552			    resp->task);
1553		resp->item_out = ISC_TRUE;
1554		isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1555	}
1556 unlock:
1557	UNLOCK(&qid->lock);
1558
1559	/*
1560	 * Restart recv() to get the next packet.
1561	 */
1562 restart:
1563	(void)startrecv(disp, NULL);
1564
1565	UNLOCK(&disp->lock);
1566
1567	isc_event_free(&ev_in);
1568}
1569
1570/*
1571 * disp must be locked.
1572 */
1573static isc_result_t
1574startrecv(dns_dispatch_t *disp, dispsocket_t *dispsock) {
1575	isc_result_t res;
1576	isc_region_t region;
1577	isc_socket_t *socket;
1578
1579	if (disp->shutting_down == 1)
1580		return (ISC_R_SUCCESS);
1581
1582	if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
1583		return (ISC_R_SUCCESS);
1584
1585	if (disp->recv_pending != 0 && dispsock == NULL)
1586		return (ISC_R_SUCCESS);
1587
1588	if (disp->mgr->buffers >= disp->mgr->maxbuffers)
1589		return (ISC_R_NOMEMORY);
1590
1591	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
1592	    dispsock == NULL)
1593		return (ISC_R_SUCCESS);
1594
1595	if (dispsock != NULL)
1596		socket = dispsock->socket;
1597	else
1598		socket = disp->socket;
1599	INSIST(socket != NULL);
1600
1601	switch (disp->socktype) {
1602		/*
1603		 * UDP reads are always maximal.
1604		 */
1605	case isc_sockettype_udp:
1606		region.length = disp->mgr->buffersize;
1607		region.base = allocate_udp_buffer(disp);
1608		if (region.base == NULL)
1609			return (ISC_R_NOMEMORY);
1610		if (dispsock != NULL) {
1611			res = isc_socket_recv(socket, &region, 1,
1612					      dispsock->task, udp_exrecv,
1613					      dispsock);
1614			if (res != ISC_R_SUCCESS) {
1615				free_buffer(disp, region.base, region.length);
1616				return (res);
1617			}
1618		} else {
1619			res = isc_socket_recv(socket, &region, 1,
1620					      disp->task[0], udp_shrecv, disp);
1621			if (res != ISC_R_SUCCESS) {
1622				free_buffer(disp, region.base, region.length);
1623				disp->shutdown_why = res;
1624				disp->shutting_down = 1;
1625				do_cancel(disp);
1626				return (ISC_R_SUCCESS); /* recover by cancel */
1627			}
1628			INSIST(disp->recv_pending == 0);
1629			disp->recv_pending = 1;
1630		}
1631		break;
1632
1633	case isc_sockettype_tcp:
1634		res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task[0],
1635					     tcp_recv, disp);
1636		if (res != ISC_R_SUCCESS) {
1637			disp->shutdown_why = res;
1638			disp->shutting_down = 1;
1639			do_cancel(disp);
1640			return (ISC_R_SUCCESS); /* recover by cancel */
1641		}
1642		INSIST(disp->recv_pending == 0);
1643		disp->recv_pending = 1;
1644		break;
1645	default:
1646		INSIST(0);
1647		break;
1648	}
1649
1650	return (ISC_R_SUCCESS);
1651}
1652
1653/*
1654 * Mgr must be locked when calling this function.
1655 */
1656static isc_boolean_t
1657destroy_mgr_ok(dns_dispatchmgr_t *mgr) {
1658	mgr_log(mgr, LVL(90),
1659		"destroy_mgr_ok: shuttingdown=%d, listnonempty=%d, "
1660		"epool=%d, rpool=%d, dpool=%d",
1661		MGR_IS_SHUTTINGDOWN(mgr), !ISC_LIST_EMPTY(mgr->list),
1662		isc_mempool_getallocated(mgr->epool),
1663		isc_mempool_getallocated(mgr->rpool),
1664		isc_mempool_getallocated(mgr->dpool));
1665	if (!MGR_IS_SHUTTINGDOWN(mgr))
1666		return (ISC_FALSE);
1667	if (!ISC_LIST_EMPTY(mgr->list))
1668		return (ISC_FALSE);
1669	if (isc_mempool_getallocated(mgr->epool) != 0)
1670		return (ISC_FALSE);
1671	if (isc_mempool_getallocated(mgr->rpool) != 0)
1672		return (ISC_FALSE);
1673	if (isc_mempool_getallocated(mgr->dpool) != 0)
1674		return (ISC_FALSE);
1675
1676	return (ISC_TRUE);
1677}
1678
1679/*
1680 * Mgr must be unlocked when calling this function.
1681 */
1682static void
1683destroy_mgr(dns_dispatchmgr_t **mgrp) {
1684	isc_mem_t *mctx;
1685	dns_dispatchmgr_t *mgr;
1686
1687	mgr = *mgrp;
1688	*mgrp = NULL;
1689
1690	mctx = mgr->mctx;
1691
1692	mgr->magic = 0;
1693	mgr->mctx = NULL;
1694	DESTROYLOCK(&mgr->lock);
1695	mgr->state = 0;
1696
1697	DESTROYLOCK(&mgr->arc4_lock);
1698
1699	isc_mempool_destroy(&mgr->epool);
1700	isc_mempool_destroy(&mgr->rpool);
1701	isc_mempool_destroy(&mgr->dpool);
1702	isc_mempool_destroy(&mgr->bpool);
1703	isc_mempool_destroy(&mgr->spool);
1704
1705	DESTROYLOCK(&mgr->pool_lock);
1706
1707	if (mgr->entropy != NULL)
1708		isc_entropy_detach(&mgr->entropy);
1709	if (mgr->qid != NULL)
1710		qid_destroy(mctx, &mgr->qid);
1711
1712	DESTROYLOCK(&mgr->buffer_lock);
1713
1714	if (mgr->blackhole != NULL)
1715		dns_acl_detach(&mgr->blackhole);
1716
1717	if (mgr->stats != NULL)
1718		isc_stats_detach(&mgr->stats);
1719
1720	if (mgr->v4ports != NULL) {
1721		isc_mem_put(mctx, mgr->v4ports,
1722			    mgr->nv4ports * sizeof(in_port_t));
1723	}
1724	if (mgr->v6ports != NULL) {
1725		isc_mem_put(mctx, mgr->v6ports,
1726			    mgr->nv6ports * sizeof(in_port_t));
1727	}
1728	isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1729	isc_mem_detach(&mctx);
1730}
1731
1732static isc_result_t
1733open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
1734	    unsigned int options, isc_socket_t **sockp)
1735{
1736	isc_socket_t *sock;
1737	isc_result_t result;
1738
1739	sock = *sockp;
1740	if (sock == NULL) {
1741		result = isc_socket_create(mgr, isc_sockaddr_pf(local),
1742					   isc_sockettype_udp, &sock);
1743		if (result != ISC_R_SUCCESS)
1744			return (result);
1745		isc_socket_setname(sock, "dispatcher", NULL);
1746	} else {
1747		result = isc_socket_open(sock);
1748		if (result != ISC_R_SUCCESS)
1749			return (result);
1750	}
1751
1752#ifndef ISC_ALLOW_MAPPED
1753	isc_socket_ipv6only(sock, ISC_TRUE);
1754#endif
1755	result = isc_socket_bind(sock, local, options);
1756	if (result != ISC_R_SUCCESS) {
1757		if (*sockp == NULL)
1758			isc_socket_detach(&sock);
1759		else
1760			isc_socket_close(sock);
1761		return (result);
1762	}
1763
1764	*sockp = sock;
1765	return (ISC_R_SUCCESS);
1766}
1767
1768/*%
1769 * Create a temporary port list to set the initial default set of dispatch
1770 * ports: [1024, 65535].  This is almost meaningless as the application will
1771 * normally set the ports explicitly, but is provided to fill some minor corner
1772 * cases.
1773 */
1774static isc_result_t
1775create_default_portset(isc_mem_t *mctx, isc_portset_t **portsetp) {
1776	isc_result_t result;
1777
1778	result = isc_portset_create(mctx, portsetp);
1779	if (result != ISC_R_SUCCESS)
1780		return (result);
1781	isc_portset_addrange(*portsetp, 1024, 65535);
1782
1783	return (ISC_R_SUCCESS);
1784}
1785
1786/*
1787 * Publics.
1788 */
1789
1790isc_result_t
1791dns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy,
1792		       dns_dispatchmgr_t **mgrp)
1793{
1794	dns_dispatchmgr_t *mgr;
1795	isc_result_t result;
1796	isc_portset_t *v4portset = NULL;
1797	isc_portset_t *v6portset = NULL;
1798
1799	REQUIRE(mctx != NULL);
1800	REQUIRE(mgrp != NULL && *mgrp == NULL);
1801
1802	mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t));
1803	if (mgr == NULL)
1804		return (ISC_R_NOMEMORY);
1805
1806	mgr->mctx = NULL;
1807	isc_mem_attach(mctx, &mgr->mctx);
1808
1809	mgr->blackhole = NULL;
1810	mgr->stats = NULL;
1811
1812	result = isc_mutex_init(&mgr->lock);
1813	if (result != ISC_R_SUCCESS)
1814		goto deallocate;
1815
1816	result = isc_mutex_init(&mgr->arc4_lock);
1817	if (result != ISC_R_SUCCESS)
1818		goto kill_lock;
1819
1820	result = isc_mutex_init(&mgr->buffer_lock);
1821	if (result != ISC_R_SUCCESS)
1822		goto kill_arc4_lock;
1823
1824	result = isc_mutex_init(&mgr->pool_lock);
1825	if (result != ISC_R_SUCCESS)
1826		goto kill_buffer_lock;
1827
1828	mgr->epool = NULL;
1829	if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatchevent_t),
1830			       &mgr->epool) != ISC_R_SUCCESS) {
1831		result = ISC_R_NOMEMORY;
1832		goto kill_pool_lock;
1833	}
1834
1835	mgr->rpool = NULL;
1836	if (isc_mempool_create(mgr->mctx, sizeof(dns_dispentry_t),
1837			       &mgr->rpool) != ISC_R_SUCCESS) {
1838		result = ISC_R_NOMEMORY;
1839		goto kill_epool;
1840	}
1841
1842	mgr->dpool = NULL;
1843	if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatch_t),
1844			       &mgr->dpool) != ISC_R_SUCCESS) {
1845		result = ISC_R_NOMEMORY;
1846		goto kill_rpool;
1847	}
1848
1849	isc_mempool_setname(mgr->epool, "dispmgr_epool");
1850	isc_mempool_setfreemax(mgr->epool, 1024);
1851	isc_mempool_associatelock(mgr->epool, &mgr->pool_lock);
1852
1853	isc_mempool_setname(mgr->rpool, "dispmgr_rpool");
1854	isc_mempool_setfreemax(mgr->rpool, 1024);
1855	isc_mempool_associatelock(mgr->rpool, &mgr->pool_lock);
1856
1857	isc_mempool_setname(mgr->dpool, "dispmgr_dpool");
1858	isc_mempool_setfreemax(mgr->dpool, 1024);
1859	isc_mempool_associatelock(mgr->dpool, &mgr->pool_lock);
1860
1861	mgr->buffers = 0;
1862	mgr->buffersize = 0;
1863	mgr->maxbuffers = 0;
1864	mgr->bpool = NULL;
1865	mgr->spool = NULL;
1866	mgr->entropy = NULL;
1867	mgr->qid = NULL;
1868	mgr->state = 0;
1869	ISC_LIST_INIT(mgr->list);
1870	mgr->v4ports = NULL;
1871	mgr->v6ports = NULL;
1872	mgr->nv4ports = 0;
1873	mgr->nv6ports = 0;
1874	mgr->magic = DNS_DISPATCHMGR_MAGIC;
1875
1876	result = create_default_portset(mctx, &v4portset);
1877	if (result == ISC_R_SUCCESS) {
1878		result = create_default_portset(mctx, &v6portset);
1879		if (result == ISC_R_SUCCESS) {
1880			result = dns_dispatchmgr_setavailports(mgr,
1881							       v4portset,
1882							       v6portset);
1883		}
1884	}
1885	if (v4portset != NULL)
1886		isc_portset_destroy(mctx, &v4portset);
1887	if (v6portset != NULL)
1888		isc_portset_destroy(mctx, &v6portset);
1889	if (result != ISC_R_SUCCESS)
1890		goto kill_dpool;
1891
1892	if (entropy != NULL)
1893		isc_entropy_attach(entropy, &mgr->entropy);
1894
1895	dispatch_arc4init(&mgr->arc4ctx, mgr->entropy, &mgr->arc4_lock);
1896
1897	*mgrp = mgr;
1898	return (ISC_R_SUCCESS);
1899
1900 kill_dpool:
1901	isc_mempool_destroy(&mgr->dpool);
1902 kill_rpool:
1903	isc_mempool_destroy(&mgr->rpool);
1904 kill_epool:
1905	isc_mempool_destroy(&mgr->epool);
1906 kill_pool_lock:
1907	DESTROYLOCK(&mgr->pool_lock);
1908 kill_buffer_lock:
1909	DESTROYLOCK(&mgr->buffer_lock);
1910 kill_arc4_lock:
1911	DESTROYLOCK(&mgr->arc4_lock);
1912 kill_lock:
1913	DESTROYLOCK(&mgr->lock);
1914 deallocate:
1915	isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1916	isc_mem_detach(&mctx);
1917
1918	return (result);
1919}
1920
1921void
1922dns_dispatchmgr_setblackhole(dns_dispatchmgr_t *mgr, dns_acl_t *blackhole) {
1923	REQUIRE(VALID_DISPATCHMGR(mgr));
1924	if (mgr->blackhole != NULL)
1925		dns_acl_detach(&mgr->blackhole);
1926	dns_acl_attach(blackhole, &mgr->blackhole);
1927}
1928
1929dns_acl_t *
1930dns_dispatchmgr_getblackhole(dns_dispatchmgr_t *mgr) {
1931	REQUIRE(VALID_DISPATCHMGR(mgr));
1932	return (mgr->blackhole);
1933}
1934
1935void
1936dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t *mgr,
1937				 dns_portlist_t *portlist)
1938{
1939	REQUIRE(VALID_DISPATCHMGR(mgr));
1940	UNUSED(portlist);
1941
1942	/* This function is deprecated: use dns_dispatchmgr_setavailports(). */
1943	return;
1944}
1945
1946dns_portlist_t *
1947dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t *mgr) {
1948	REQUIRE(VALID_DISPATCHMGR(mgr));
1949	return (NULL);		/* this function is deprecated */
1950}
1951
1952isc_result_t
1953dns_dispatchmgr_setavailports(dns_dispatchmgr_t *mgr, isc_portset_t *v4portset,
1954			      isc_portset_t *v6portset)
1955{
1956	in_port_t *v4ports, *v6ports, p;
1957	unsigned int nv4ports, nv6ports, i4, i6;
1958
1959	REQUIRE(VALID_DISPATCHMGR(mgr));
1960
1961	nv4ports = isc_portset_nports(v4portset);
1962	nv6ports = isc_portset_nports(v6portset);
1963
1964	v4ports = NULL;
1965	if (nv4ports != 0) {
1966		v4ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv4ports);
1967		if (v4ports == NULL)
1968			return (ISC_R_NOMEMORY);
1969	}
1970	v6ports = NULL;
1971	if (nv6ports != 0) {
1972		v6ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv6ports);
1973		if (v6ports == NULL) {
1974			if (v4ports != NULL) {
1975				isc_mem_put(mgr->mctx, v4ports,
1976					    sizeof(in_port_t) *
1977					    isc_portset_nports(v4portset));
1978			}
1979			return (ISC_R_NOMEMORY);
1980		}
1981	}
1982
1983	p = 0;
1984	i4 = 0;
1985	i6 = 0;
1986	do {
1987		if (isc_portset_isset(v4portset, p)) {
1988			INSIST(i4 < nv4ports);
1989			v4ports[i4++] = p;
1990		}
1991		if (isc_portset_isset(v6portset, p)) {
1992			INSIST(i6 < nv6ports);
1993			v6ports[i6++] = p;
1994		}
1995	} while (p++ < 65535);
1996	INSIST(i4 == nv4ports && i6 == nv6ports);
1997
1998	PORTBUFLOCK(mgr);
1999	if (mgr->v4ports != NULL) {
2000		isc_mem_put(mgr->mctx, mgr->v4ports,
2001			    mgr->nv4ports * sizeof(in_port_t));
2002	}
2003	mgr->v4ports = v4ports;
2004	mgr->nv4ports = nv4ports;
2005
2006	if (mgr->v6ports != NULL) {
2007		isc_mem_put(mgr->mctx, mgr->v6ports,
2008			    mgr->nv6ports * sizeof(in_port_t));
2009	}
2010	mgr->v6ports = v6ports;
2011	mgr->nv6ports = nv6ports;
2012	PORTBUFUNLOCK(mgr);
2013
2014	return (ISC_R_SUCCESS);
2015}
2016
2017static isc_result_t
2018dns_dispatchmgr_setudp(dns_dispatchmgr_t *mgr,
2019		       unsigned int buffersize, unsigned int maxbuffers,
2020		       unsigned int maxrequests, unsigned int buckets,
2021		       unsigned int increment)
2022{
2023	isc_result_t result;
2024
2025	REQUIRE(VALID_DISPATCHMGR(mgr));
2026	REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
2027	REQUIRE(maxbuffers > 0);
2028	REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
2029	REQUIRE(increment > buckets);
2030
2031	/*
2032	 * Keep some number of items around.  This should be a config
2033	 * option.  For now, keep 8, but later keep at least two even
2034	 * if the caller wants less.  This allows us to ensure certain
2035	 * things, like an event can be "freed" and the next allocation
2036	 * will always succeed.
2037	 *
2038	 * Note that if limits are placed on anything here, we use one
2039	 * event internally, so the actual limit should be "wanted + 1."
2040	 *
2041	 * XXXMLG
2042	 */
2043
2044	if (maxbuffers < 8)
2045		maxbuffers = 8;
2046
2047	LOCK(&mgr->buffer_lock);
2048
2049	/* Create or adjust buffer pool */
2050	if (mgr->bpool != NULL) {
2051		isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
2052		mgr->maxbuffers = maxbuffers;
2053	} else {
2054		result = isc_mempool_create(mgr->mctx, buffersize, &mgr->bpool);
2055		if (result != ISC_R_SUCCESS) {
2056			UNLOCK(&mgr->buffer_lock);
2057			return (result);
2058		}
2059		isc_mempool_setname(mgr->bpool, "dispmgr_bpool");
2060		isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
2061		isc_mempool_associatelock(mgr->bpool, &mgr->pool_lock);
2062	}
2063
2064	/* Create or adjust socket pool */
2065	if (mgr->spool != NULL) {
2066		isc_mempool_setmaxalloc(mgr->spool, DNS_DISPATCH_POOLSOCKS * 2);
2067		UNLOCK(&mgr->buffer_lock);
2068		return (ISC_R_SUCCESS);
2069	}
2070	result = isc_mempool_create(mgr->mctx, sizeof(dispsocket_t),
2071				    &mgr->spool);
2072	if (result != ISC_R_SUCCESS) {
2073		UNLOCK(&mgr->buffer_lock);
2074		goto cleanup;
2075	}
2076	isc_mempool_setname(mgr->spool, "dispmgr_spool");
2077	isc_mempool_setmaxalloc(mgr->spool, maxrequests);
2078	isc_mempool_associatelock(mgr->spool, &mgr->pool_lock);
2079
2080	result = qid_allocate(mgr, buckets, increment, &mgr->qid, ISC_TRUE);
2081	if (result != ISC_R_SUCCESS)
2082		goto cleanup;
2083
2084	mgr->buffersize = buffersize;
2085	mgr->maxbuffers = maxbuffers;
2086	UNLOCK(&mgr->buffer_lock);
2087	return (ISC_R_SUCCESS);
2088
2089 cleanup:
2090	isc_mempool_destroy(&mgr->bpool);
2091	if (mgr->spool != NULL)
2092		isc_mempool_destroy(&mgr->spool);
2093	UNLOCK(&mgr->buffer_lock);
2094	return (result);
2095}
2096
2097void
2098dns_dispatchmgr_destroy(dns_dispatchmgr_t **mgrp) {
2099	dns_dispatchmgr_t *mgr;
2100	isc_boolean_t killit;
2101
2102	REQUIRE(mgrp != NULL);
2103	REQUIRE(VALID_DISPATCHMGR(*mgrp));
2104
2105	mgr = *mgrp;
2106	*mgrp = NULL;
2107
2108	LOCK(&mgr->lock);
2109	mgr->state |= MGR_SHUTTINGDOWN;
2110
2111	killit = destroy_mgr_ok(mgr);
2112	UNLOCK(&mgr->lock);
2113
2114	mgr_log(mgr, LVL(90), "destroy: killit=%d", killit);
2115
2116	if (killit)
2117		destroy_mgr(&mgr);
2118}
2119
2120void
2121dns_dispatchmgr_setstats(dns_dispatchmgr_t *mgr, isc_stats_t *stats) {
2122	REQUIRE(VALID_DISPATCHMGR(mgr));
2123	REQUIRE(ISC_LIST_EMPTY(mgr->list));
2124	REQUIRE(mgr->stats == NULL);
2125
2126	isc_stats_attach(stats, &mgr->stats);
2127}
2128
2129static int
2130port_cmp(const void *key, const void *ent) {
2131	in_port_t p1 = *(const in_port_t *)key;
2132	in_port_t p2 = *(const in_port_t *)ent;
2133
2134	if (p1 < p2)
2135		return (-1);
2136	else if (p1 == p2)
2137		return (0);
2138	else
2139		return (1);
2140}
2141
2142static isc_boolean_t
2143portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2144	      isc_sockaddr_t *sockaddrp)
2145{
2146	isc_sockaddr_t sockaddr;
2147	isc_result_t result;
2148	in_port_t *ports, port;
2149	unsigned int nports;
2150	isc_boolean_t available = ISC_FALSE;
2151
2152	REQUIRE(sock != NULL || sockaddrp != NULL);
2153
2154	PORTBUFLOCK(mgr);
2155	if (sock != NULL) {
2156		sockaddrp = &sockaddr;
2157		result = isc_socket_getsockname(sock, sockaddrp);
2158		if (result != ISC_R_SUCCESS)
2159			goto unlock;
2160	}
2161
2162	if (isc_sockaddr_pf(sockaddrp) == AF_INET) {
2163		ports = mgr->v4ports;
2164		nports = mgr->nv4ports;
2165	} else {
2166		ports = mgr->v6ports;
2167		nports = mgr->nv6ports;
2168	}
2169	if (ports == NULL)
2170		goto unlock;
2171
2172	port = isc_sockaddr_getport(sockaddrp);
2173	if (bsearch(&port, ports, nports, sizeof(in_port_t), port_cmp) != NULL)
2174		available = ISC_TRUE;
2175
2176unlock:
2177	PORTBUFUNLOCK(mgr);
2178	return (available);
2179}
2180
2181#define ATTRMATCH(_a1, _a2, _mask) (((_a1) & (_mask)) == ((_a2) & (_mask)))
2182
2183static isc_boolean_t
2184local_addr_match(dns_dispatch_t *disp, isc_sockaddr_t *addr) {
2185	isc_sockaddr_t sockaddr;
2186	isc_result_t result;
2187
2188	REQUIRE(disp->socket != NULL);
2189
2190	if (addr == NULL)
2191		return (ISC_TRUE);
2192
2193	/*
2194	 * Don't match wildcard ports unless the port is available in the
2195	 * current configuration.
2196	 */
2197	if (isc_sockaddr_getport(addr) == 0 &&
2198	    isc_sockaddr_getport(&disp->local) == 0 &&
2199	    !portavailable(disp->mgr, disp->socket, NULL)) {
2200		return (ISC_FALSE);
2201	}
2202
2203	/*
2204	 * Check if we match the binding <address,port>.
2205	 * Wildcard ports match/fail here.
2206	 */
2207	if (isc_sockaddr_equal(&disp->local, addr))
2208		return (ISC_TRUE);
2209	if (isc_sockaddr_getport(addr) == 0)
2210		return (ISC_FALSE);
2211
2212	/*
2213	 * Check if we match a bound wildcard port <address,port>.
2214	 */
2215	if (!isc_sockaddr_eqaddr(&disp->local, addr))
2216		return (ISC_FALSE);
2217	result = isc_socket_getsockname(disp->socket, &sockaddr);
2218	if (result != ISC_R_SUCCESS)
2219		return (ISC_FALSE);
2220
2221	return (isc_sockaddr_equal(&sockaddr, addr));
2222}
2223
2224/*
2225 * Requires mgr be locked.
2226 *
2227 * No dispatcher can be locked by this thread when calling this function.
2228 *
2229 *
2230 * NOTE:
2231 *	If a matching dispatcher is found, it is locked after this function
2232 *	returns, and must be unlocked by the caller.
2233 */
2234static isc_result_t
2235dispatch_find(dns_dispatchmgr_t *mgr, isc_sockaddr_t *local,
2236	      unsigned int attributes, unsigned int mask,
2237	      dns_dispatch_t **dispp)
2238{
2239	dns_dispatch_t *disp;
2240	isc_result_t result;
2241
2242	/*
2243	 * Make certain that we will not match a private or exclusive dispatch.
2244	 */
2245	attributes &= ~(DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
2246	mask |= (DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
2247
2248	disp = ISC_LIST_HEAD(mgr->list);
2249	while (disp != NULL) {
2250		LOCK(&disp->lock);
2251		if ((disp->shutting_down == 0)
2252		    && ATTRMATCH(disp->attributes, attributes, mask)
2253		    && local_addr_match(disp, local))
2254			break;
2255		UNLOCK(&disp->lock);
2256		disp = ISC_LIST_NEXT(disp, link);
2257	}
2258
2259	if (disp == NULL) {
2260		result = ISC_R_NOTFOUND;
2261		goto out;
2262	}
2263
2264	*dispp = disp;
2265	result = ISC_R_SUCCESS;
2266 out:
2267
2268	return (result);
2269}
2270
2271static isc_result_t
2272qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
2273	     unsigned int increment, dns_qid_t **qidp,
2274	     isc_boolean_t needsocktable)
2275{
2276	dns_qid_t *qid;
2277	unsigned int i;
2278	isc_result_t result;
2279
2280	REQUIRE(VALID_DISPATCHMGR(mgr));
2281	REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
2282	REQUIRE(increment > buckets);
2283	REQUIRE(qidp != NULL && *qidp == NULL);
2284
2285	qid = isc_mem_get(mgr->mctx, sizeof(*qid));
2286	if (qid == NULL)
2287		return (ISC_R_NOMEMORY);
2288
2289	qid->qid_table = isc_mem_get(mgr->mctx,
2290				     buckets * sizeof(dns_displist_t));
2291	if (qid->qid_table == NULL) {
2292		isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2293		return (ISC_R_NOMEMORY);
2294	}
2295
2296	qid->sock_table = NULL;
2297	if (needsocktable) {
2298		qid->sock_table = isc_mem_get(mgr->mctx, buckets *
2299					      sizeof(dispsocketlist_t));
2300		if (qid->sock_table == NULL) {
2301			isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2302			isc_mem_put(mgr->mctx, qid->qid_table,
2303				    buckets * sizeof(dns_displist_t));
2304			return (ISC_R_NOMEMORY);
2305		}
2306	}
2307
2308	result = isc_mutex_init(&qid->lock);
2309	if (result != ISC_R_SUCCESS) {
2310		if (qid->sock_table != NULL) {
2311			isc_mem_put(mgr->mctx, qid->sock_table,
2312				    buckets * sizeof(dispsocketlist_t));
2313		}
2314		isc_mem_put(mgr->mctx, qid->qid_table,
2315			    buckets * sizeof(dns_displist_t));
2316		isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2317		return (result);
2318	}
2319
2320	for (i = 0; i < buckets; i++) {
2321		ISC_LIST_INIT(qid->qid_table[i]);
2322		if (qid->sock_table != NULL)
2323			ISC_LIST_INIT(qid->sock_table[i]);
2324	}
2325
2326	qid->qid_nbuckets = buckets;
2327	qid->qid_increment = increment;
2328	qid->magic = QID_MAGIC;
2329	*qidp = qid;
2330	return (ISC_R_SUCCESS);
2331}
2332
2333static void
2334qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp) {
2335	dns_qid_t *qid;
2336
2337	REQUIRE(qidp != NULL);
2338	qid = *qidp;
2339
2340	REQUIRE(VALID_QID(qid));
2341
2342	*qidp = NULL;
2343	qid->magic = 0;
2344	isc_mem_put(mctx, qid->qid_table,
2345		    qid->qid_nbuckets * sizeof(dns_displist_t));
2346	if (qid->sock_table != NULL) {
2347		isc_mem_put(mctx, qid->sock_table,
2348			    qid->qid_nbuckets * sizeof(dispsocketlist_t));
2349	}
2350	DESTROYLOCK(&qid->lock);
2351	isc_mem_put(mctx, qid, sizeof(*qid));
2352}
2353
2354/*
2355 * Allocate and set important limits.
2356 */
2357static isc_result_t
2358dispatch_allocate(dns_dispatchmgr_t *mgr, unsigned int maxrequests,
2359		  dns_dispatch_t **dispp)
2360{
2361	dns_dispatch_t *disp;
2362	isc_result_t result;
2363
2364	REQUIRE(VALID_DISPATCHMGR(mgr));
2365	REQUIRE(dispp != NULL && *dispp == NULL);
2366
2367	/*
2368	 * Set up the dispatcher, mostly.  Don't bother setting some of
2369	 * the options that are controlled by tcp vs. udp, etc.
2370	 */
2371
2372	disp = isc_mempool_get(mgr->dpool);
2373	if (disp == NULL)
2374		return (ISC_R_NOMEMORY);
2375
2376	disp->magic = 0;
2377	disp->mgr = mgr;
2378	disp->maxrequests = maxrequests;
2379	disp->attributes = 0;
2380	ISC_LINK_INIT(disp, link);
2381	disp->refcount = 1;
2382	disp->recv_pending = 0;
2383	memset(&disp->local, 0, sizeof(disp->local));
2384	disp->localport = 0;
2385	disp->shutting_down = 0;
2386	disp->shutdown_out = 0;
2387	disp->connected = 0;
2388	disp->tcpmsg_valid = 0;
2389	disp->shutdown_why = ISC_R_UNEXPECTED;
2390	disp->requests = 0;
2391	disp->tcpbuffers = 0;
2392	disp->qid = NULL;
2393	ISC_LIST_INIT(disp->activesockets);
2394	ISC_LIST_INIT(disp->inactivesockets);
2395	disp->nsockets = 0;
2396	dispatch_arc4init(&disp->arc4ctx, mgr->entropy, NULL);
2397	disp->port_table = NULL;
2398	disp->portpool = NULL;
2399
2400	result = isc_mutex_init(&disp->lock);
2401	if (result != ISC_R_SUCCESS)
2402		goto deallocate;
2403
2404	disp->failsafe_ev = allocate_event(disp);
2405	if (disp->failsafe_ev == NULL) {
2406		result = ISC_R_NOMEMORY;
2407		goto kill_lock;
2408	}
2409
2410	disp->magic = DISPATCH_MAGIC;
2411
2412	*dispp = disp;
2413	return (ISC_R_SUCCESS);
2414
2415	/*
2416	 * error returns
2417	 */
2418 kill_lock:
2419	DESTROYLOCK(&disp->lock);
2420 deallocate:
2421	isc_mempool_put(mgr->dpool, disp);
2422
2423	return (result);
2424}
2425
2426
2427/*
2428 * MUST be unlocked, and not used by anything.
2429 */
2430static void
2431dispatch_free(dns_dispatch_t **dispp)
2432{
2433	dns_dispatch_t *disp;
2434	dns_dispatchmgr_t *mgr;
2435	int i;
2436
2437	REQUIRE(VALID_DISPATCH(*dispp));
2438	disp = *dispp;
2439	*dispp = NULL;
2440
2441	mgr = disp->mgr;
2442	REQUIRE(VALID_DISPATCHMGR(mgr));
2443
2444	if (disp->tcpmsg_valid) {
2445		dns_tcpmsg_invalidate(&disp->tcpmsg);
2446		disp->tcpmsg_valid = 0;
2447	}
2448
2449	INSIST(disp->tcpbuffers == 0);
2450	INSIST(disp->requests == 0);
2451	INSIST(disp->recv_pending == 0);
2452	INSIST(ISC_LIST_EMPTY(disp->activesockets));
2453	INSIST(ISC_LIST_EMPTY(disp->inactivesockets));
2454
2455	isc_mempool_put(mgr->epool, disp->failsafe_ev);
2456	disp->failsafe_ev = NULL;
2457
2458	if (disp->qid != NULL)
2459		qid_destroy(mgr->mctx, &disp->qid);
2460
2461	if (disp->port_table != NULL) {
2462		for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++)
2463			INSIST(ISC_LIST_EMPTY(disp->port_table[i]));
2464		isc_mem_put(mgr->mctx, disp->port_table,
2465			    sizeof(disp->port_table[0]) *
2466			    DNS_DISPATCH_PORTTABLESIZE);
2467	}
2468
2469	if (disp->portpool != NULL)
2470		isc_mempool_destroy(&disp->portpool);
2471
2472	disp->mgr = NULL;
2473	DESTROYLOCK(&disp->lock);
2474	disp->magic = 0;
2475	isc_mempool_put(mgr->dpool, disp);
2476}
2477
2478isc_result_t
2479dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2480		       isc_taskmgr_t *taskmgr, unsigned int buffersize,
2481		       unsigned int maxbuffers, unsigned int maxrequests,
2482		       unsigned int buckets, unsigned int increment,
2483		       unsigned int attributes, dns_dispatch_t **dispp)
2484{
2485	isc_result_t result;
2486	dns_dispatch_t *disp;
2487
2488	UNUSED(maxbuffers);
2489	UNUSED(buffersize);
2490
2491	REQUIRE(VALID_DISPATCHMGR(mgr));
2492	REQUIRE(isc_socket_gettype(sock) == isc_sockettype_tcp);
2493	REQUIRE((attributes & DNS_DISPATCHATTR_TCP) != 0);
2494	REQUIRE((attributes & DNS_DISPATCHATTR_UDP) == 0);
2495
2496	attributes |= DNS_DISPATCHATTR_PRIVATE;  /* XXXMLG */
2497
2498	LOCK(&mgr->lock);
2499
2500	/*
2501	 * dispatch_allocate() checks mgr for us.
2502	 * qid_allocate() checks buckets and increment for us.
2503	 */
2504	disp = NULL;
2505	result = dispatch_allocate(mgr, maxrequests, &disp);
2506	if (result != ISC_R_SUCCESS) {
2507		UNLOCK(&mgr->lock);
2508		return (result);
2509	}
2510
2511	result = qid_allocate(mgr, buckets, increment, &disp->qid, ISC_FALSE);
2512	if (result != ISC_R_SUCCESS)
2513		goto deallocate_dispatch;
2514
2515	disp->socktype = isc_sockettype_tcp;
2516	disp->socket = NULL;
2517	isc_socket_attach(sock, &disp->socket);
2518
2519	disp->ntasks = 1;
2520	disp->task[0] = NULL;
2521	result = isc_task_create(taskmgr, 0, &disp->task[0]);
2522	if (result != ISC_R_SUCCESS)
2523		goto kill_socket;
2524
2525	disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
2526					    DNS_EVENT_DISPATCHCONTROL,
2527					    destroy_disp, disp,
2528					    sizeof(isc_event_t));
2529	if (disp->ctlevent == NULL) {
2530		result = ISC_R_NOMEMORY;
2531		goto kill_task;
2532	}
2533
2534	isc_task_setname(disp->task[0], "tcpdispatch", disp);
2535
2536	dns_tcpmsg_init(mgr->mctx, disp->socket, &disp->tcpmsg);
2537	disp->tcpmsg_valid = 1;
2538
2539	disp->attributes = attributes;
2540
2541	/*
2542	 * Append it to the dispatcher list.
2543	 */
2544	ISC_LIST_APPEND(mgr->list, disp, link);
2545	UNLOCK(&mgr->lock);
2546
2547	mgr_log(mgr, LVL(90), "created TCP dispatcher %p", disp);
2548	dispatch_log(disp, LVL(90), "created task %p", disp->task[0]);
2549
2550	*dispp = disp;
2551
2552	return (ISC_R_SUCCESS);
2553
2554	/*
2555	 * Error returns.
2556	 */
2557 kill_task:
2558	isc_task_detach(&disp->task[0]);
2559 kill_socket:
2560	isc_socket_detach(&disp->socket);
2561 deallocate_dispatch:
2562	dispatch_free(&disp);
2563
2564	UNLOCK(&mgr->lock);
2565
2566	return (result);
2567}
2568
2569isc_result_t
2570dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2571		    isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr,
2572		    unsigned int buffersize,
2573		    unsigned int maxbuffers, unsigned int maxrequests,
2574		    unsigned int buckets, unsigned int increment,
2575		    unsigned int attributes, unsigned int mask,
2576		    dns_dispatch_t **dispp)
2577{
2578	isc_result_t result;
2579	dns_dispatch_t *disp = NULL;
2580
2581	REQUIRE(VALID_DISPATCHMGR(mgr));
2582	REQUIRE(sockmgr != NULL);
2583	REQUIRE(localaddr != NULL);
2584	REQUIRE(taskmgr != NULL);
2585	REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
2586	REQUIRE(maxbuffers > 0);
2587	REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
2588	REQUIRE(increment > buckets);
2589	REQUIRE(dispp != NULL && *dispp == NULL);
2590	REQUIRE((attributes & DNS_DISPATCHATTR_TCP) == 0);
2591
2592	result = dns_dispatchmgr_setudp(mgr, buffersize, maxbuffers,
2593					maxrequests, buckets, increment);
2594	if (result != ISC_R_SUCCESS)
2595		return (result);
2596
2597	LOCK(&mgr->lock);
2598
2599	if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
2600		REQUIRE(isc_sockaddr_getport(localaddr) == 0);
2601		goto createudp;
2602	}
2603
2604	/*
2605	 * See if we have a dispatcher that matches.
2606	 */
2607	result = dispatch_find(mgr, localaddr, attributes, mask, &disp);
2608	if (result == ISC_R_SUCCESS) {
2609		disp->refcount++;
2610
2611		if (disp->maxrequests < maxrequests)
2612			disp->maxrequests = maxrequests;
2613
2614		if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0 &&
2615		    (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
2616		{
2617			disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
2618			if (disp->recv_pending != 0)
2619				isc_socket_cancel(disp->socket, disp->task[0],
2620						  ISC_SOCKCANCEL_RECV);
2621		}
2622
2623		UNLOCK(&disp->lock);
2624		UNLOCK(&mgr->lock);
2625
2626		*dispp = disp;
2627
2628		return (ISC_R_SUCCESS);
2629	}
2630
2631 createudp:
2632	/*
2633	 * Nope, create one.
2634	 */
2635	result = dispatch_createudp(mgr, sockmgr, taskmgr, localaddr,
2636				    maxrequests, attributes, &disp);
2637	if (result != ISC_R_SUCCESS) {
2638		UNLOCK(&mgr->lock);
2639		return (result);
2640	}
2641
2642	UNLOCK(&mgr->lock);
2643	*dispp = disp;
2644	return (ISC_R_SUCCESS);
2645}
2646
2647/*
2648 * mgr should be locked.
2649 */
2650
2651#ifndef DNS_DISPATCH_HELD
2652#define DNS_DISPATCH_HELD 20U
2653#endif
2654
2655static isc_result_t
2656get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp,
2657	      isc_socketmgr_t *sockmgr, isc_sockaddr_t *localaddr,
2658	      isc_socket_t **sockp)
2659{
2660	unsigned int i, j;
2661	isc_socket_t *held[DNS_DISPATCH_HELD];
2662	isc_sockaddr_t localaddr_bound;
2663	isc_socket_t *sock = NULL;
2664	isc_result_t result = ISC_R_SUCCESS;
2665	isc_boolean_t anyport;
2666
2667	INSIST(sockp != NULL && *sockp == NULL);
2668
2669	localaddr_bound = *localaddr;
2670	anyport = ISC_TF(isc_sockaddr_getport(localaddr) == 0);
2671
2672	if (anyport) {
2673		unsigned int nports;
2674		in_port_t *ports;
2675
2676		/*
2677		 * If no port is specified, we first try to pick up a random
2678		 * port by ourselves.
2679		 */
2680		if (isc_sockaddr_pf(&disp->local) == AF_INET) {
2681			nports = disp->mgr->nv4ports;
2682			ports = disp->mgr->v4ports;
2683		} else {
2684			nports = disp->mgr->nv6ports;
2685			ports = disp->mgr->v6ports;
2686		}
2687		if (nports == 0)
2688			return (ISC_R_ADDRNOTAVAIL);
2689
2690		for (i = 0; i < 1024; i++) {
2691			in_port_t prt;
2692
2693			prt = ports[dispatch_arc4uniformrandom(
2694					DISP_ARC4CTX(disp),
2695					nports)];
2696			isc_sockaddr_setport(&localaddr_bound, prt);
2697			result = open_socket(sockmgr, &localaddr_bound,
2698					     0, &sock);
2699			if (result == ISC_R_SUCCESS ||
2700			    result != ISC_R_ADDRINUSE) {
2701				disp->localport = prt;
2702				*sockp = sock;
2703				return (result);
2704			}
2705		}
2706
2707		/*
2708		 * If this fails 1024 times, we then ask the kernel for
2709		 * choosing one.
2710		 */
2711	} else {
2712		/* Allow to reuse address for non-random ports. */
2713		result = open_socket(sockmgr, localaddr,
2714				     ISC_SOCKET_REUSEADDRESS, &sock);
2715
2716		if (result == ISC_R_SUCCESS)
2717			*sockp = sock;
2718
2719		return (result);
2720	}
2721
2722	memset(held, 0, sizeof(held));
2723	i = 0;
2724
2725	for (j = 0; j < 0xffffU; j++) {
2726		result = open_socket(sockmgr, localaddr, 0, &sock);
2727		if (result != ISC_R_SUCCESS)
2728			goto end;
2729		else if (!anyport)
2730			break;
2731		else if (portavailable(mgr, sock, NULL))
2732			break;
2733		if (held[i] != NULL)
2734			isc_socket_detach(&held[i]);
2735		held[i++] = sock;
2736		sock = NULL;
2737		if (i == DNS_DISPATCH_HELD)
2738			i = 0;
2739	}
2740	if (j == 0xffffU) {
2741		mgr_log(mgr, ISC_LOG_ERROR,
2742			"avoid-v%s-udp-ports: unable to allocate "
2743			"an available port",
2744			isc_sockaddr_pf(localaddr) == AF_INET ? "4" : "6");
2745		result = ISC_R_FAILURE;
2746		goto end;
2747	}
2748	*sockp = sock;
2749
2750end:
2751	for (i = 0; i < DNS_DISPATCH_HELD; i++) {
2752		if (held[i] != NULL)
2753			isc_socket_detach(&held[i]);
2754	}
2755
2756	return (result);
2757}
2758
2759static isc_result_t
2760dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2761		   isc_taskmgr_t *taskmgr,
2762		   isc_sockaddr_t *localaddr,
2763		   unsigned int maxrequests,
2764		   unsigned int attributes,
2765		   dns_dispatch_t **dispp)
2766{
2767	isc_result_t result;
2768	dns_dispatch_t *disp;
2769	isc_socket_t *sock = NULL;
2770	int i = 0;
2771
2772	/*
2773	 * dispatch_allocate() checks mgr for us.
2774	 */
2775	disp = NULL;
2776	result = dispatch_allocate(mgr, maxrequests, &disp);
2777	if (result != ISC_R_SUCCESS)
2778		return (result);
2779
2780	if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0) {
2781		result = get_udpsocket(mgr, disp, sockmgr, localaddr, &sock);
2782		if (result != ISC_R_SUCCESS)
2783			goto deallocate_dispatch;
2784	} else {
2785		isc_sockaddr_t sa_any;
2786
2787		/*
2788		 * For dispatches using exclusive sockets with a specific
2789		 * source address, we only check if the specified address is
2790		 * available on the system.  Query sockets will be created later
2791		 * on demand.
2792		 */
2793		isc_sockaddr_anyofpf(&sa_any, isc_sockaddr_pf(localaddr));
2794		if (!isc_sockaddr_eqaddr(&sa_any, localaddr)) {
2795			result = open_socket(sockmgr, localaddr, 0, &sock);
2796			if (sock != NULL)
2797				isc_socket_detach(&sock);
2798			if (result != ISC_R_SUCCESS)
2799				goto deallocate_dispatch;
2800		}
2801
2802		disp->port_table = isc_mem_get(mgr->mctx,
2803					       sizeof(disp->port_table[0]) *
2804					       DNS_DISPATCH_PORTTABLESIZE);
2805		if (disp->port_table == NULL)
2806			goto deallocate_dispatch;
2807		for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++)
2808			ISC_LIST_INIT(disp->port_table[i]);
2809
2810		result = isc_mempool_create(mgr->mctx, sizeof(dispportentry_t),
2811					    &disp->portpool);
2812		if (result != ISC_R_SUCCESS)
2813			goto deallocate_dispatch;
2814		isc_mempool_setname(disp->portpool, "disp_portpool");
2815		isc_mempool_setfreemax(disp->portpool, 128);
2816	}
2817	disp->socktype = isc_sockettype_udp;
2818	disp->socket = sock;
2819	disp->local = *localaddr;
2820
2821	if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
2822		disp->ntasks = MAX_INTERNAL_TASKS;
2823	else
2824		disp->ntasks = 1;
2825	for (i = 0; i < disp->ntasks; i++) {
2826		disp->task[i] = NULL;
2827		result = isc_task_create(taskmgr, 0, &disp->task[i]);
2828		if (result != ISC_R_SUCCESS) {
2829			while (--i >= 0)
2830				isc_task_destroy(&disp->task[i]);
2831			goto kill_socket;
2832		}
2833		isc_task_setname(disp->task[i], "udpdispatch", disp);
2834	}
2835
2836	disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
2837					    DNS_EVENT_DISPATCHCONTROL,
2838					    destroy_disp, disp,
2839					    sizeof(isc_event_t));
2840	if (disp->ctlevent == NULL) {
2841		result = ISC_R_NOMEMORY;
2842		goto kill_task;
2843	}
2844
2845	attributes &= ~DNS_DISPATCHATTR_TCP;
2846	attributes |= DNS_DISPATCHATTR_UDP;
2847	disp->attributes = attributes;
2848
2849	/*
2850	 * Append it to the dispatcher list.
2851	 */
2852	ISC_LIST_APPEND(mgr->list, disp, link);
2853
2854	mgr_log(mgr, LVL(90), "created UDP dispatcher %p", disp);
2855	dispatch_log(disp, LVL(90), "created task %p", disp->task[0]); /* XXX */
2856	if (disp->socket != NULL)
2857		dispatch_log(disp, LVL(90), "created socket %p", disp->socket);
2858
2859	*dispp = disp;
2860	return (result);
2861
2862	/*
2863	 * Error returns.
2864	 */
2865 kill_task:
2866	for (i = 0; i < disp->ntasks; i++)
2867		isc_task_detach(&disp->task[i]);
2868 kill_socket:
2869	if (disp->socket != NULL)
2870		isc_socket_detach(&disp->socket);
2871 deallocate_dispatch:
2872	dispatch_free(&disp);
2873
2874	return (result);
2875}
2876
2877void
2878dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp) {
2879	REQUIRE(VALID_DISPATCH(disp));
2880	REQUIRE(dispp != NULL && *dispp == NULL);
2881
2882	LOCK(&disp->lock);
2883	disp->refcount++;
2884	UNLOCK(&disp->lock);
2885
2886	*dispp = disp;
2887}
2888
2889/*
2890 * It is important to lock the manager while we are deleting the dispatch,
2891 * since dns_dispatch_getudp will call dispatch_find, which returns to
2892 * the caller a dispatch but does not attach to it until later.  _getudp
2893 * locks the manager, however, so locking it here will keep us from attaching
2894 * to a dispatcher that is in the process of going away.
2895 */
2896void
2897dns_dispatch_detach(dns_dispatch_t **dispp) {
2898	dns_dispatch_t *disp;
2899	dispsocket_t *dispsock;
2900	isc_boolean_t killit;
2901
2902	REQUIRE(dispp != NULL && VALID_DISPATCH(*dispp));
2903
2904	disp = *dispp;
2905	*dispp = NULL;
2906
2907	LOCK(&disp->lock);
2908
2909	INSIST(disp->refcount > 0);
2910	disp->refcount--;
2911	killit = ISC_FALSE;
2912	if (disp->refcount == 0) {
2913		if (disp->recv_pending > 0)
2914			isc_socket_cancel(disp->socket, disp->task[0],
2915					  ISC_SOCKCANCEL_RECV);
2916		for (dispsock = ISC_LIST_HEAD(disp->activesockets);
2917		     dispsock != NULL;
2918		     dispsock = ISC_LIST_NEXT(dispsock, link)) {
2919			isc_socket_cancel(dispsock->socket, dispsock->task,
2920					  ISC_SOCKCANCEL_RECV);
2921		}
2922		disp->shutting_down = 1;
2923	}
2924
2925	dispatch_log(disp, LVL(90), "detach: refcount %d", disp->refcount);
2926
2927	killit = destroy_disp_ok(disp);
2928	UNLOCK(&disp->lock);
2929	if (killit)
2930		isc_task_send(disp->task[0], &disp->ctlevent);
2931}
2932
2933isc_result_t
2934dns_dispatch_addresponse2(dns_dispatch_t *disp, isc_sockaddr_t *dest,
2935			  isc_task_t *task, isc_taskaction_t action, void *arg,
2936			  dns_messageid_t *idp, dns_dispentry_t **resp,
2937			  isc_socketmgr_t *sockmgr)
2938{
2939	dns_dispentry_t *res;
2940	unsigned int bucket;
2941	in_port_t localport = 0;
2942	dns_messageid_t id;
2943	int i;
2944	isc_boolean_t ok;
2945	dns_qid_t *qid;
2946	dispsocket_t *dispsocket = NULL;
2947	isc_result_t result;
2948
2949	REQUIRE(VALID_DISPATCH(disp));
2950	REQUIRE(task != NULL);
2951	REQUIRE(dest != NULL);
2952	REQUIRE(resp != NULL && *resp == NULL);
2953	REQUIRE(idp != NULL);
2954	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
2955		REQUIRE(sockmgr != NULL);
2956
2957	LOCK(&disp->lock);
2958
2959	if (disp->shutting_down == 1) {
2960		UNLOCK(&disp->lock);
2961		return (ISC_R_SHUTTINGDOWN);
2962	}
2963
2964	if (disp->requests >= disp->maxrequests) {
2965		UNLOCK(&disp->lock);
2966		return (ISC_R_QUOTA);
2967	}
2968
2969	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
2970	    disp->nsockets > DNS_DISPATCH_SOCKSQUOTA) {
2971		dispsocket_t *oldestsocket;
2972		dns_dispentry_t *oldestresp;
2973		dns_dispatchevent_t *rev;
2974
2975		/*
2976		 * Kill oldest outstanding query if the number of sockets
2977		 * exceeds the quota to keep the room for new queries.
2978		 */
2979		oldestsocket = ISC_LIST_HEAD(disp->activesockets);
2980		oldestresp = oldestsocket->resp;
2981		if (oldestresp != NULL && !oldestresp->item_out) {
2982			rev = allocate_event(oldestresp->disp);
2983			if (rev != NULL) {
2984				rev->buffer.base = NULL;
2985				rev->result = ISC_R_CANCELED;
2986				rev->id = oldestresp->id;
2987				ISC_EVENT_INIT(rev, sizeof(*rev), 0,
2988					       NULL, DNS_EVENT_DISPATCH,
2989					       oldestresp->action,
2990					       oldestresp->arg, oldestresp,
2991					       NULL, NULL);
2992				oldestresp->item_out = ISC_TRUE;
2993				isc_task_send(oldestresp->task,
2994					      ISC_EVENT_PTR(&rev));
2995				inc_stats(disp->mgr,
2996					  dns_resstatscounter_dispabort);
2997			}
2998		}
2999
3000		/*
3001		 * Move this entry to the tail so that it won't (easily) be
3002		 * examined before actually being canceled.
3003		 */
3004		ISC_LIST_UNLINK(disp->activesockets, oldestsocket, link);
3005		ISC_LIST_APPEND(disp->activesockets, oldestsocket, link);
3006	}
3007
3008	qid = DNS_QID(disp);
3009	LOCK(&qid->lock);
3010
3011	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
3012		/*
3013		 * Get a separate UDP socket with a random port number.
3014		 */
3015		result = get_dispsocket(disp, dest, sockmgr, qid, &dispsocket,
3016					&localport);
3017		if (result != ISC_R_SUCCESS) {
3018			UNLOCK(&qid->lock);
3019			UNLOCK(&disp->lock);
3020			inc_stats(disp->mgr, dns_resstatscounter_dispsockfail);
3021			return (result);
3022		}
3023	} else {
3024		localport = disp->localport;
3025	}
3026
3027	/*
3028	 * Try somewhat hard to find an unique ID.
3029	 */
3030	id = (dns_messageid_t)dispatch_arc4random(DISP_ARC4CTX(disp));
3031	bucket = dns_hash(qid, dest, id, localport);
3032	ok = ISC_FALSE;
3033	for (i = 0; i < 64; i++) {
3034		if (entry_search(qid, dest, id, localport, bucket) == NULL) {
3035			ok = ISC_TRUE;
3036			break;
3037		}
3038		id += qid->qid_increment;
3039		id &= 0x0000ffff;
3040		bucket = dns_hash(qid, dest, id, localport);
3041	}
3042
3043	if (!ok) {
3044		UNLOCK(&qid->lock);
3045		UNLOCK(&disp->lock);
3046		return (ISC_R_NOMORE);
3047	}
3048
3049	res = isc_mempool_get(disp->mgr->rpool);
3050	if (res == NULL) {
3051		UNLOCK(&qid->lock);
3052		UNLOCK(&disp->lock);
3053		if (dispsocket != NULL)
3054			destroy_dispsocket(disp, &dispsocket);
3055		return (ISC_R_NOMEMORY);
3056	}
3057
3058	disp->refcount++;
3059	disp->requests++;
3060	res->task = NULL;
3061	isc_task_attach(task, &res->task);
3062	res->disp = disp;
3063	res->id = id;
3064	res->port = localport;
3065	res->bucket = bucket;
3066	res->host = *dest;
3067	res->action = action;
3068	res->arg = arg;
3069	res->dispsocket = dispsocket;
3070	if (dispsocket != NULL)
3071		dispsocket->resp = res;
3072	res->item_out = ISC_FALSE;
3073	ISC_LIST_INIT(res->items);
3074	ISC_LINK_INIT(res, link);
3075	res->magic = RESPONSE_MAGIC;
3076	ISC_LIST_APPEND(qid->qid_table[bucket], res, link);
3077	UNLOCK(&qid->lock);
3078
3079	request_log(disp, res, LVL(90),
3080		    "attached to task %p", res->task);
3081
3082	if (((disp->attributes & DNS_DISPATCHATTR_UDP) != 0) ||
3083	    ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) != 0)) {
3084		result = startrecv(disp, dispsocket);
3085		if (result != ISC_R_SUCCESS) {
3086			LOCK(&qid->lock);
3087			ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
3088			UNLOCK(&qid->lock);
3089
3090			if (dispsocket != NULL)
3091				destroy_dispsocket(disp, &dispsocket);
3092
3093			disp->refcount--;
3094			disp->requests--;
3095
3096			UNLOCK(&disp->lock);
3097			isc_task_detach(&res->task);
3098			isc_mempool_put(disp->mgr->rpool, res);
3099			return (result);
3100		}
3101	}
3102
3103	if (dispsocket != NULL)
3104		ISC_LIST_APPEND(disp->activesockets, dispsocket, link);
3105
3106	UNLOCK(&disp->lock);
3107
3108	*idp = id;
3109	*resp = res;
3110
3111	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3112		INSIST(res->dispsocket != NULL);
3113
3114	return (ISC_R_SUCCESS);
3115}
3116
3117isc_result_t
3118dns_dispatch_addresponse(dns_dispatch_t *disp, isc_sockaddr_t *dest,
3119			 isc_task_t *task, isc_taskaction_t action, void *arg,
3120			 dns_messageid_t *idp, dns_dispentry_t **resp)
3121{
3122	REQUIRE(VALID_DISPATCH(disp));
3123	REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
3124
3125	return (dns_dispatch_addresponse2(disp, dest, task, action, arg,
3126					  idp, resp, NULL));
3127}
3128
3129void
3130dns_dispatch_starttcp(dns_dispatch_t *disp) {
3131
3132	REQUIRE(VALID_DISPATCH(disp));
3133
3134	dispatch_log(disp, LVL(90), "starttcp %p", disp->task[0]);
3135
3136	LOCK(&disp->lock);
3137	disp->attributes |= DNS_DISPATCHATTR_CONNECTED;
3138	(void)startrecv(disp, NULL);
3139	UNLOCK(&disp->lock);
3140}
3141
3142void
3143dns_dispatch_removeresponse(dns_dispentry_t **resp,
3144			    dns_dispatchevent_t **sockevent)
3145{
3146	dns_dispatchmgr_t *mgr;
3147	dns_dispatch_t *disp;
3148	dns_dispentry_t *res;
3149	dispsocket_t *dispsock;
3150	dns_dispatchevent_t *ev;
3151	unsigned int bucket;
3152	isc_boolean_t killit;
3153	unsigned int n;
3154	isc_eventlist_t events;
3155	dns_qid_t *qid;
3156
3157	REQUIRE(resp != NULL);
3158	REQUIRE(VALID_RESPONSE(*resp));
3159
3160	res = *resp;
3161	*resp = NULL;
3162
3163	disp = res->disp;
3164	REQUIRE(VALID_DISPATCH(disp));
3165	mgr = disp->mgr;
3166	REQUIRE(VALID_DISPATCHMGR(mgr));
3167
3168	qid = DNS_QID(disp);
3169
3170	if (sockevent != NULL) {
3171		REQUIRE(*sockevent != NULL);
3172		ev = *sockevent;
3173		*sockevent = NULL;
3174	} else {
3175		ev = NULL;
3176	}
3177
3178	LOCK(&disp->lock);
3179
3180	INSIST(disp->requests > 0);
3181	disp->requests--;
3182	INSIST(disp->refcount > 0);
3183	disp->refcount--;
3184	killit = ISC_FALSE;
3185	if (disp->refcount == 0) {
3186		if (disp->recv_pending > 0)
3187			isc_socket_cancel(disp->socket, disp->task[0],
3188					  ISC_SOCKCANCEL_RECV);
3189		for (dispsock = ISC_LIST_HEAD(disp->activesockets);
3190		     dispsock != NULL;
3191		     dispsock = ISC_LIST_NEXT(dispsock, link)) {
3192			isc_socket_cancel(dispsock->socket, dispsock->task,
3193					  ISC_SOCKCANCEL_RECV);
3194		}
3195		disp->shutting_down = 1;
3196	}
3197
3198	bucket = res->bucket;
3199
3200	LOCK(&qid->lock);
3201	ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
3202	UNLOCK(&qid->lock);
3203
3204	if (ev == NULL && res->item_out) {
3205		/*
3206		 * We've posted our event, but the caller hasn't gotten it
3207		 * yet.  Take it back.
3208		 */
3209		ISC_LIST_INIT(events);
3210		n = isc_task_unsend(res->task, res, DNS_EVENT_DISPATCH,
3211				    NULL, &events);
3212		/*
3213		 * We had better have gotten it back.
3214		 */
3215		INSIST(n == 1);
3216		ev = (dns_dispatchevent_t *)ISC_LIST_HEAD(events);
3217	}
3218
3219	if (ev != NULL) {
3220		REQUIRE(res->item_out == ISC_TRUE);
3221		res->item_out = ISC_FALSE;
3222		if (ev->buffer.base != NULL)
3223			free_buffer(disp, ev->buffer.base, ev->buffer.length);
3224		free_event(disp, ev);
3225	}
3226
3227	request_log(disp, res, LVL(90), "detaching from task %p", res->task);
3228	isc_task_detach(&res->task);
3229
3230	if (res->dispsocket != NULL) {
3231		isc_socket_cancel(res->dispsocket->socket,
3232				  res->dispsocket->task, ISC_SOCKCANCEL_RECV);
3233		res->dispsocket->resp = NULL;
3234	}
3235
3236	/*
3237	 * Free any buffered requests as well
3238	 */
3239	ev = ISC_LIST_HEAD(res->items);
3240	while (ev != NULL) {
3241		ISC_LIST_UNLINK(res->items, ev, ev_link);
3242		if (ev->buffer.base != NULL)
3243			free_buffer(disp, ev->buffer.base, ev->buffer.length);
3244		free_event(disp, ev);
3245		ev = ISC_LIST_HEAD(res->items);
3246	}
3247	res->magic = 0;
3248	isc_mempool_put(disp->mgr->rpool, res);
3249	if (disp->shutting_down == 1)
3250		do_cancel(disp);
3251	else
3252		(void)startrecv(disp, NULL);
3253
3254	killit = destroy_disp_ok(disp);
3255	UNLOCK(&disp->lock);
3256	if (killit)
3257		isc_task_send(disp->task[0], &disp->ctlevent);
3258}
3259
3260static void
3261do_cancel(dns_dispatch_t *disp) {
3262	dns_dispatchevent_t *ev;
3263	dns_dispentry_t *resp;
3264	dns_qid_t *qid;
3265
3266	if (disp->shutdown_out == 1)
3267		return;
3268
3269	qid = DNS_QID(disp);
3270
3271	/*
3272	 * Search for the first response handler without packets outstanding
3273	 * unless a specific hander is given.
3274	 */
3275	LOCK(&qid->lock);
3276	for (resp = linear_first(qid);
3277	     resp != NULL && resp->item_out;
3278	     /* Empty. */)
3279		resp = linear_next(qid, resp);
3280
3281	/*
3282	 * No one to send the cancel event to, so nothing to do.
3283	 */
3284	if (resp == NULL)
3285		goto unlock;
3286
3287	/*
3288	 * Send the shutdown failsafe event to this resp.
3289	 */
3290	ev = disp->failsafe_ev;
3291	ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH,
3292		       resp->action, resp->arg, resp, NULL, NULL);
3293	ev->result = disp->shutdown_why;
3294	ev->buffer.base = NULL;
3295	ev->buffer.length = 0;
3296	disp->shutdown_out = 1;
3297	request_log(disp, resp, LVL(10),
3298		    "cancel: failsafe event %p -> task %p",
3299		    ev, resp->task);
3300	resp->item_out = ISC_TRUE;
3301	isc_task_send(resp->task, ISC_EVENT_PTR(&ev));
3302 unlock:
3303	UNLOCK(&qid->lock);
3304}
3305
3306isc_socket_t *
3307dns_dispatch_getsocket(dns_dispatch_t *disp) {
3308	REQUIRE(VALID_DISPATCH(disp));
3309
3310	return (disp->socket);
3311}
3312
3313isc_socket_t *
3314dns_dispatch_getentrysocket(dns_dispentry_t *resp) {
3315	REQUIRE(VALID_RESPONSE(resp));
3316
3317	if (resp->dispsocket != NULL)
3318		return (resp->dispsocket->socket);
3319	else
3320		return (NULL);
3321}
3322
3323isc_result_t
3324dns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) {
3325
3326	REQUIRE(VALID_DISPATCH(disp));
3327	REQUIRE(addrp != NULL);
3328
3329	if (disp->socktype == isc_sockettype_udp) {
3330		*addrp = disp->local;
3331		return (ISC_R_SUCCESS);
3332	}
3333	return (ISC_R_NOTIMPLEMENTED);
3334}
3335
3336void
3337dns_dispatch_cancel(dns_dispatch_t *disp) {
3338	REQUIRE(VALID_DISPATCH(disp));
3339
3340	LOCK(&disp->lock);
3341
3342	if (disp->shutting_down == 1) {
3343		UNLOCK(&disp->lock);
3344		return;
3345	}
3346
3347	disp->shutdown_why = ISC_R_CANCELED;
3348	disp->shutting_down = 1;
3349	do_cancel(disp);
3350
3351	UNLOCK(&disp->lock);
3352
3353	return;
3354}
3355
3356unsigned int
3357dns_dispatch_getattributes(dns_dispatch_t *disp) {
3358	REQUIRE(VALID_DISPATCH(disp));
3359
3360	/*
3361	 * We don't bother locking disp here; it's the caller's responsibility
3362	 * to use only non volatile flags.
3363	 */
3364	return (disp->attributes);
3365}
3366
3367void
3368dns_dispatch_changeattributes(dns_dispatch_t *disp,
3369			      unsigned int attributes, unsigned int mask)
3370{
3371	REQUIRE(VALID_DISPATCH(disp));
3372	/* Exclusive attribute can only be set on creation */
3373	REQUIRE((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
3374	/* Also, a dispatch with randomport specified cannot start listening */
3375	REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0 ||
3376		(attributes & DNS_DISPATCHATTR_NOLISTEN) == 0);
3377
3378	/* XXXMLG
3379	 * Should check for valid attributes here!
3380	 */
3381
3382	LOCK(&disp->lock);
3383
3384	if ((mask & DNS_DISPATCHATTR_NOLISTEN) != 0) {
3385		if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0 &&
3386		    (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) {
3387			disp->attributes &= ~DNS_DISPATCHATTR_NOLISTEN;
3388			(void)startrecv(disp, NULL);
3389		} else if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN)
3390			   == 0 &&
3391			   (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) {
3392			disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
3393			if (disp->recv_pending != 0)
3394				isc_socket_cancel(disp->socket, disp->task[0],
3395						  ISC_SOCKCANCEL_RECV);
3396		}
3397	}
3398
3399	disp->attributes &= ~mask;
3400	disp->attributes |= (attributes & mask);
3401	UNLOCK(&disp->lock);
3402}
3403
3404void
3405dns_dispatch_importrecv(dns_dispatch_t *disp, isc_event_t *event) {
3406	void *buf;
3407	isc_socketevent_t *sevent, *newsevent;
3408
3409	REQUIRE(VALID_DISPATCH(disp));
3410	REQUIRE((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0);
3411	REQUIRE(event != NULL);
3412
3413	sevent = (isc_socketevent_t *)event;
3414
3415	INSIST(sevent->n <= disp->mgr->buffersize);
3416	newsevent = (isc_socketevent_t *)
3417		    isc_event_allocate(disp->mgr->mctx, NULL,
3418				      DNS_EVENT_IMPORTRECVDONE, udp_shrecv,
3419				      disp, sizeof(isc_socketevent_t));
3420	if (newsevent == NULL)
3421		return;
3422
3423	buf = allocate_udp_buffer(disp);
3424	if (buf == NULL) {
3425		isc_event_free(ISC_EVENT_PTR(&newsevent));
3426		return;
3427	}
3428	memcpy(buf, sevent->region.base, sevent->n);
3429	newsevent->region.base = buf;
3430	newsevent->region.length = disp->mgr->buffersize;
3431	newsevent->n = sevent->n;
3432	newsevent->result = sevent->result;
3433	newsevent->address = sevent->address;
3434	newsevent->timestamp = sevent->timestamp;
3435	newsevent->pktinfo = sevent->pktinfo;
3436	newsevent->attributes = sevent->attributes;
3437
3438	isc_task_send(disp->task[0], ISC_EVENT_PTR(&newsevent));
3439}
3440
3441#if 0
3442void
3443dns_dispatchmgr_dump(dns_dispatchmgr_t *mgr) {
3444	dns_dispatch_t *disp;
3445	char foo[1024];
3446
3447	disp = ISC_LIST_HEAD(mgr->list);
3448	while (disp != NULL) {
3449		isc_sockaddr_format(&disp->local, foo, sizeof(foo));
3450		printf("\tdispatch %p, addr %s\n", disp, foo);
3451		disp = ISC_LIST_NEXT(disp, link);
3452	}
3453}
3454#endif
3455