dispatch.c revision 254402
193787Sdes/*
294691Sdes * Copyright (C) 2004-2009, 2011-2013  Internet Systems Consortium, Inc. ("ISC")
393787Sdes * Copyright (C) 1999-2003  Internet Software Consortium.
493787Sdes *
593787Sdes * Permission to use, copy, modify, and/or distribute this software for any
693787Sdes * purpose with or without fee is hereby granted, provided that the above
793787Sdes * copyright notice and this permission notice appear in all copies.
893787Sdes *
993787Sdes * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
1093787Sdes * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
1193787Sdes * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
1293787Sdes * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
1393787Sdes * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
1493787Sdes * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
1593787Sdes * PERFORMANCE OF THIS SOFTWARE.
1693787Sdes */
1793787Sdes
1893787Sdes/* $Id$ */
1993787Sdes
2093787Sdes/*! \file */
2193787Sdes
2293787Sdes#include <config.h>
2393787Sdes
2493787Sdes#include <stdlib.h>
2593787Sdes#include <sys/types.h>
2693787Sdes#include <unistd.h>
2793787Sdes#include <stdlib.h>
2893787Sdes
2993787Sdes#include <isc/entropy.h>
3093787Sdes#include <isc/mem.h>
3193787Sdes#include <isc/mutex.h>
3293787Sdes#include <isc/portset.h>
3393787Sdes#include <isc/print.h>
3493787Sdes#include <isc/random.h>
3593787Sdes#include <isc/stats.h>
3693787Sdes#include <isc/string.h>
3793787Sdes#include <isc/task.h>
3893787Sdes#include <isc/time.h>
3993787Sdes#include <isc/util.h>
4094691Sdes
4194691Sdes#include <dns/acl.h>
4293787Sdes#include <dns/dispatch.h>
4393787Sdes#include <dns/events.h>
4493787Sdes#include <dns/log.h>
4593787Sdes#include <dns/message.h>
4693787Sdes#include <dns/portlist.h>
4793787Sdes#include <dns/stats.h>
4893787Sdes#include <dns/tcpmsg.h>
4993787Sdes#include <dns/types.h>
5093787Sdes
5193787Sdestypedef ISC_LIST(dns_dispentry_t)	dns_displist_t;
5293787Sdes
5394691Sdestypedef struct dispsocket		dispsocket_t;
5494691Sdestypedef ISC_LIST(dispsocket_t)		dispsocketlist_t;
5593787Sdes
5693787Sdestypedef struct dispportentry		dispportentry_t;
5793787Sdestypedef ISC_LIST(dispportentry_t)	dispportlist_t;
5893787Sdes
5993787Sdes/* ARC4 Random generator state */
6093787Sdestypedef struct arc4ctx {
6193787Sdes	isc_uint8_t	i;
6293787Sdes	isc_uint8_t	j;
6393787Sdes	isc_uint8_t	s[256];
6493787Sdes	int		count;
6594691Sdes	isc_entropy_t	*entropy;	/*%< entropy source for ARC4 */
6694691Sdes	isc_mutex_t	*lock;
6793787Sdes} arc4ctx_t;
6893787Sdes
6993787Sdestypedef struct dns_qid {
7093787Sdes	unsigned int	magic;
7193787Sdes	unsigned int	qid_nbuckets;	/*%< hash table size */
7293787Sdes	unsigned int	qid_increment;	/*%< id increment on collision */
7393787Sdes	isc_mutex_t	lock;
7493787Sdes	dns_displist_t	*qid_table;	/*%< the table itself */
7593787Sdes	dispsocketlist_t *sock_table;	/*%< socket table */
7693787Sdes} dns_qid_t;
7793787Sdes
7894691Sdesstruct dns_dispatchmgr {
7994691Sdes	/* Unlocked. */
8093787Sdes	unsigned int			magic;
8193787Sdes	isc_mem_t		       *mctx;
8293787Sdes	dns_acl_t		       *blackhole;
8393787Sdes	dns_portlist_t		       *portlist;
8493787Sdes	isc_stats_t		       *stats;
8593787Sdes	isc_entropy_t		       *entropy; /*%< entropy source */
8693787Sdes
8793787Sdes	/* Locked by "lock". */
8893787Sdes	isc_mutex_t			lock;
8993787Sdes	unsigned int			state;
9093787Sdes	ISC_LIST(dns_dispatch_t)	list;
9193787Sdes
9293787Sdes	/* Locked by arc4_lock. */
93	isc_mutex_t			arc4_lock;
94	arc4ctx_t			arc4ctx;    /*%< ARC4 context for QID */
95
96	/* locked by buffer lock */
97	dns_qid_t			*qid;
98	isc_mutex_t			buffer_lock;
99	unsigned int			buffers;    /*%< allocated buffers */
100	unsigned int			buffersize; /*%< size of each buffer */
101	unsigned int			maxbuffers; /*%< max buffers */
102
103	/* Locked internally. */
104	isc_mutex_t			pool_lock;
105	isc_mempool_t		       *epool;	/*%< memory pool for events */
106	isc_mempool_t		       *rpool;	/*%< memory pool for replies */
107	isc_mempool_t		       *dpool;  /*%< dispatch allocations */
108	isc_mempool_t		       *bpool;	/*%< memory pool for buffers */
109	isc_mempool_t		       *spool;	/*%< memory pool for dispsocs */
110
111	/*%
112	 * Locked by qid->lock if qid exists; otherwise, can be used without
113	 * being locked.
114	 * Memory footprint considerations: this is a simple implementation of
115	 * available ports, i.e., an ordered array of the actual port numbers.
116	 * This will require about 256KB of memory in the worst case (128KB for
117	 * each of IPv4 and IPv6).  We could reduce it by representing it as a
118	 * more sophisticated way such as a list (or array) of ranges that are
119	 * searched to identify a specific port.  Our decision here is the saved
120	 * memory isn't worth the implementation complexity, considering the
121	 * fact that the whole BIND9 process (which is mainly named) already
122	 * requires a pretty large memory footprint.  We may, however, have to
123	 * revisit the decision when we want to use it as a separate module for
124	 * an environment where memory requirement is severer.
125	 */
126	in_port_t	*v4ports;	/*%< available ports for IPv4 */
127	unsigned int	nv4ports;	/*%< # of available ports for IPv4 */
128	in_port_t	*v6ports;	/*%< available ports for IPv4 */
129	unsigned int	nv6ports;	/*%< # of available ports for IPv4 */
130};
131
132#define MGR_SHUTTINGDOWN		0x00000001U
133#define MGR_IS_SHUTTINGDOWN(l)	(((l)->state & MGR_SHUTTINGDOWN) != 0)
134
135#define IS_PRIVATE(d)	(((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0)
136
137struct dns_dispentry {
138	unsigned int			magic;
139	dns_dispatch_t		       *disp;
140	dns_messageid_t			id;
141	in_port_t			port;
142	unsigned int			bucket;
143	isc_sockaddr_t			host;
144	isc_task_t		       *task;
145	isc_taskaction_t		action;
146	void			       *arg;
147	isc_boolean_t			item_out;
148	dispsocket_t			*dispsocket;
149	ISC_LIST(dns_dispatchevent_t)	items;
150	ISC_LINK(dns_dispentry_t)	link;
151};
152
153/*%
154 * Maximum number of dispatch sockets that can be pooled for reuse.  The
155 * appropriate value may vary, but experiments have shown a busy caching server
156 * may need more than 1000 sockets concurrently opened.  The maximum allowable
157 * number of dispatch sockets (per manager) will be set to the double of this
158 * value.
159 */
160#ifndef DNS_DISPATCH_POOLSOCKS
161#define DNS_DISPATCH_POOLSOCKS			2048
162#endif
163
164/*%
165 * Quota to control the number of dispatch sockets.  If a dispatch has more
166 * than the quota of sockets, new queries will purge oldest ones, so that
167 * a massive number of outstanding queries won't prevent subsequent queries
168 * (especially if the older ones take longer time and result in timeout).
169 */
170#ifndef DNS_DISPATCH_SOCKSQUOTA
171#define DNS_DISPATCH_SOCKSQUOTA			3072
172#endif
173
174struct dispsocket {
175	unsigned int			magic;
176	isc_socket_t			*socket;
177	dns_dispatch_t			*disp;
178	isc_sockaddr_t			host;
179	in_port_t			localport; /* XXX: should be removed later */
180	dispportentry_t			*portentry;
181	dns_dispentry_t			*resp;
182	isc_task_t			*task;
183	ISC_LINK(dispsocket_t)		link;
184	unsigned int			bucket;
185	ISC_LINK(dispsocket_t)		blink;
186};
187
188/*%
189 * A port table entry.  We remember every port we first open in a table with a
190 * reference counter so that we can 'reuse' the same port (with different
191 * destination addresses) using the SO_REUSEADDR socket option.
192 */
193struct dispportentry {
194	in_port_t			port;
195	unsigned int			refs;
196	ISC_LINK(struct dispportentry)	link;
197};
198
199#ifndef DNS_DISPATCH_PORTTABLESIZE
200#define DNS_DISPATCH_PORTTABLESIZE	1024
201#endif
202
203#define INVALID_BUCKET		(0xffffdead)
204
205/*%
206 * Number of tasks for each dispatch that use separate sockets for different
207 * transactions.  This must be a power of 2 as it will divide 32 bit numbers
208 * to get an uniformly random tasks selection.  See get_dispsocket().
209 */
210#define MAX_INTERNAL_TASKS	64
211
212struct dns_dispatch {
213	/* Unlocked. */
214	unsigned int		magic;		/*%< magic */
215	dns_dispatchmgr_t      *mgr;		/*%< dispatch manager */
216	int			ntasks;
217	/*%
218	 * internal task buckets.  We use multiple tasks to distribute various
219	 * socket events well when using separate dispatch sockets.  We use the
220	 * 1st task (task[0]) for internal control events.
221	 */
222	isc_task_t	       *task[MAX_INTERNAL_TASKS];
223	isc_socket_t	       *socket;		/*%< isc socket attached to */
224	isc_sockaddr_t		local;		/*%< local address */
225	in_port_t		localport;	/*%< local UDP port */
226	unsigned int		maxrequests;	/*%< max requests */
227	isc_event_t	       *ctlevent;
228
229	/*% Locked by mgr->lock. */
230	ISC_LINK(dns_dispatch_t) link;
231
232	/* Locked by "lock". */
233	isc_mutex_t		lock;		/*%< locks all below */
234	isc_sockettype_t	socktype;
235	unsigned int		attributes;
236	unsigned int		refcount;	/*%< number of users */
237	dns_dispatchevent_t    *failsafe_ev;	/*%< failsafe cancel event */
238	unsigned int		shutting_down : 1,
239				shutdown_out : 1,
240				connected : 1,
241				tcpmsg_valid : 1,
242				recv_pending : 1; /*%< is a recv() pending? */
243	isc_result_t		shutdown_why;
244	ISC_LIST(dispsocket_t)	activesockets;
245	ISC_LIST(dispsocket_t)	inactivesockets;
246	unsigned int		nsockets;
247	unsigned int		requests;	/*%< how many requests we have */
248	unsigned int		tcpbuffers;	/*%< allocated buffers */
249	dns_tcpmsg_t		tcpmsg;		/*%< for tcp streams */
250	dns_qid_t		*qid;
251	arc4ctx_t		arc4ctx;	/*%< for QID/UDP port num */
252	dispportlist_t		*port_table;	/*%< hold ports 'owned' by us */
253	isc_mempool_t		*portpool;	/*%< port table entries  */
254};
255
256#define QID_MAGIC		ISC_MAGIC('Q', 'i', 'd', ' ')
257#define VALID_QID(e)		ISC_MAGIC_VALID((e), QID_MAGIC)
258
259#define RESPONSE_MAGIC		ISC_MAGIC('D', 'r', 's', 'p')
260#define VALID_RESPONSE(e)	ISC_MAGIC_VALID((e), RESPONSE_MAGIC)
261
262#define DISPSOCK_MAGIC		ISC_MAGIC('D', 's', 'o', 'c')
263#define VALID_DISPSOCK(e)	ISC_MAGIC_VALID((e), DISPSOCK_MAGIC)
264
265#define DISPATCH_MAGIC		ISC_MAGIC('D', 'i', 's', 'p')
266#define VALID_DISPATCH(e)	ISC_MAGIC_VALID((e), DISPATCH_MAGIC)
267
268#define DNS_DISPATCHMGR_MAGIC	ISC_MAGIC('D', 'M', 'g', 'r')
269#define VALID_DISPATCHMGR(e)	ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC)
270
271#define DNS_QID(disp) ((disp)->socktype == isc_sockettype_tcp) ? \
272		       (disp)->qid : (disp)->mgr->qid
273#define DISP_ARC4CTX(disp) ((disp)->socktype == isc_sockettype_udp) ? \
274			(&(disp)->arc4ctx) : (&(disp)->mgr->arc4ctx)
275
276/*%
277 * Locking a query port buffer is a bit tricky.  We access the buffer without
278 * locking until qid is created.  Technically, there is a possibility of race
279 * between the creation of qid and access to the port buffer; in practice,
280 * however, this should be safe because qid isn't created until the first
281 * dispatch is created and there should be no contending situation until then.
282 */
283#define PORTBUFLOCK(mgr) if ((mgr)->qid != NULL) LOCK(&((mgr)->qid->lock))
284#define PORTBUFUNLOCK(mgr) if ((mgr)->qid != NULL) UNLOCK((&(mgr)->qid->lock))
285
286/*
287 * Statics.
288 */
289static dns_dispentry_t *entry_search(dns_qid_t *, isc_sockaddr_t *,
290				     dns_messageid_t, in_port_t, unsigned int);
291static isc_boolean_t destroy_disp_ok(dns_dispatch_t *);
292static void destroy_disp(isc_task_t *task, isc_event_t *event);
293static void destroy_dispsocket(dns_dispatch_t *, dispsocket_t **);
294static void deactivate_dispsocket(dns_dispatch_t *, dispsocket_t *);
295static void udp_exrecv(isc_task_t *, isc_event_t *);
296static void udp_shrecv(isc_task_t *, isc_event_t *);
297static void udp_recv(isc_event_t *, dns_dispatch_t *, dispsocket_t *);
298static void tcp_recv(isc_task_t *, isc_event_t *);
299static isc_result_t startrecv(dns_dispatch_t *, dispsocket_t *);
300static isc_uint32_t dns_hash(dns_qid_t *, isc_sockaddr_t *, dns_messageid_t,
301			     in_port_t);
302static void free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len);
303static void *allocate_udp_buffer(dns_dispatch_t *disp);
304static inline void free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev);
305static inline dns_dispatchevent_t *allocate_event(dns_dispatch_t *disp);
306static void do_cancel(dns_dispatch_t *disp);
307static dns_dispentry_t *linear_first(dns_qid_t *disp);
308static dns_dispentry_t *linear_next(dns_qid_t *disp,
309				    dns_dispentry_t *resp);
310static void dispatch_free(dns_dispatch_t **dispp);
311static isc_result_t get_udpsocket(dns_dispatchmgr_t *mgr,
312				  dns_dispatch_t *disp,
313				  isc_socketmgr_t *sockmgr,
314				  isc_sockaddr_t *localaddr,
315				  isc_socket_t **sockp);
316static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr,
317				       isc_socketmgr_t *sockmgr,
318				       isc_taskmgr_t *taskmgr,
319				       isc_sockaddr_t *localaddr,
320				       unsigned int maxrequests,
321				       unsigned int attributes,
322				       dns_dispatch_t **dispp);
323static isc_boolean_t destroy_mgr_ok(dns_dispatchmgr_t *mgr);
324static void destroy_mgr(dns_dispatchmgr_t **mgrp);
325static isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
326				 unsigned int increment, dns_qid_t **qidp,
327				 isc_boolean_t needaddrtable);
328static void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp);
329static isc_result_t open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
330				unsigned int options, isc_socket_t **sockp);
331static isc_boolean_t portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
332				   isc_sockaddr_t *sockaddrp);
333
334#define LVL(x) ISC_LOG_DEBUG(x)
335
336static void
337mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...)
338     ISC_FORMAT_PRINTF(3, 4);
339
340static void
341mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) {
342	char msgbuf[2048];
343	va_list ap;
344
345	if (! isc_log_wouldlog(dns_lctx, level))
346		return;
347
348	va_start(ap, fmt);
349	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
350	va_end(ap);
351
352	isc_log_write(dns_lctx,
353		      DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
354		      level, "dispatchmgr %p: %s", mgr, msgbuf);
355}
356
357static inline void
358inc_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) {
359	if (mgr->stats != NULL)
360		isc_stats_increment(mgr->stats, counter);
361}
362
363static void
364dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...)
365     ISC_FORMAT_PRINTF(3, 4);
366
367static void
368dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) {
369	char msgbuf[2048];
370	va_list ap;
371
372	if (! isc_log_wouldlog(dns_lctx, level))
373		return;
374
375	va_start(ap, fmt);
376	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
377	va_end(ap);
378
379	isc_log_write(dns_lctx,
380		      DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
381		      level, "dispatch %p: %s", disp, msgbuf);
382}
383
384static void
385request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
386	    int level, const char *fmt, ...)
387     ISC_FORMAT_PRINTF(4, 5);
388
389static void
390request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
391	    int level, const char *fmt, ...)
392{
393	char msgbuf[2048];
394	char peerbuf[256];
395	va_list ap;
396
397	if (! isc_log_wouldlog(dns_lctx, level))
398		return;
399
400	va_start(ap, fmt);
401	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
402	va_end(ap);
403
404	if (VALID_RESPONSE(resp)) {
405		isc_sockaddr_format(&resp->host, peerbuf, sizeof(peerbuf));
406		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
407			      DNS_LOGMODULE_DISPATCH, level,
408			      "dispatch %p response %p %s: %s", disp, resp,
409			      peerbuf, msgbuf);
410	} else {
411		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
412			      DNS_LOGMODULE_DISPATCH, level,
413			      "dispatch %p req/resp %p: %s", disp, resp,
414			      msgbuf);
415	}
416}
417
418/*%
419 * ARC4 random number generator derived from OpenBSD.
420 * Only dispatch_random() and dispatch_uniformrandom() are expected
421 * to be called from general dispatch routines; the rest of them are subroutines
422 * for these two.
423 *
424 * The original copyright follows:
425 * Copyright (c) 1996, David Mazieres <dm@uun.org>
426 * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
427 *
428 * Permission to use, copy, modify, and distribute this software for any
429 * purpose with or without fee is hereby granted, provided that the above
430 * copyright notice and this permission notice appear in all copies.
431 *
432 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
433 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
434 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
435 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
436 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
437 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
438 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
439 */
440#ifdef BIND9
441static void
442dispatch_initrandom(arc4ctx_t *actx, isc_entropy_t *entropy,
443		    isc_mutex_t *lock)
444{
445	int n;
446	for (n = 0; n < 256; n++)
447		actx->s[n] = n;
448	actx->i = 0;
449	actx->j = 0;
450	actx->count = 0;
451	actx->entropy = entropy; /* don't have to attach */
452	actx->lock = lock;
453}
454
455static void
456dispatch_arc4addrandom(arc4ctx_t *actx, unsigned char *dat, int datlen) {
457	int n;
458	isc_uint8_t si;
459
460	actx->i--;
461	for (n = 0; n < 256; n++) {
462		actx->i = (actx->i + 1);
463		si = actx->s[actx->i];
464		actx->j = (actx->j + si + dat[n % datlen]);
465		actx->s[actx->i] = actx->s[actx->j];
466		actx->s[actx->j] = si;
467	}
468	actx->j = actx->i;
469}
470
471static inline isc_uint8_t
472dispatch_arc4get8(arc4ctx_t *actx) {
473	isc_uint8_t si, sj;
474
475	actx->i = (actx->i + 1);
476	si = actx->s[actx->i];
477	actx->j = (actx->j + si);
478	sj = actx->s[actx->j];
479	actx->s[actx->i] = sj;
480	actx->s[actx->j] = si;
481
482	return (actx->s[(si + sj) & 0xff]);
483}
484
485static inline isc_uint16_t
486dispatch_arc4get16(arc4ctx_t *actx) {
487	isc_uint16_t val;
488
489	val = dispatch_arc4get8(actx) << 8;
490	val |= dispatch_arc4get8(actx);
491
492	return (val);
493}
494
495static void
496dispatch_arc4stir(arc4ctx_t *actx) {
497	int i;
498	union {
499		unsigned char rnd[128];
500		isc_uint32_t rnd32[32];
501	} rnd;
502	isc_result_t result;
503
504	if (actx->entropy != NULL) {
505		/*
506		 * We accept any quality of random data to avoid blocking.
507		 */
508		result = isc_entropy_getdata(actx->entropy, rnd.rnd,
509					     sizeof(rnd), NULL, 0);
510		RUNTIME_CHECK(result == ISC_R_SUCCESS);
511	} else {
512		for (i = 0; i < 32; i++)
513			isc_random_get(&rnd.rnd32[i]);
514	}
515	dispatch_arc4addrandom(actx, rnd.rnd, sizeof(rnd.rnd));
516
517	/*
518	 * Discard early keystream, as per recommendations in:
519	 * http://www.wisdom.weizmann.ac.il/~itsik/RC4/Papers/Rc4_ksa.ps
520	 */
521	for (i = 0; i < 256; i++)
522		(void)dispatch_arc4get8(actx);
523
524	/*
525	 * Derived from OpenBSD's implementation.  The rationale is not clear,
526	 * but should be conservative enough in safety, and reasonably large
527	 * for efficiency.
528	 */
529	actx->count = 1600000;
530}
531
532static isc_uint16_t
533dispatch_random(arc4ctx_t *actx) {
534	isc_uint16_t result;
535
536	if (actx->lock != NULL)
537		LOCK(actx->lock);
538
539	actx->count -= sizeof(isc_uint16_t);
540	if (actx->count <= 0)
541		dispatch_arc4stir(actx);
542	result = dispatch_arc4get16(actx);
543
544	if (actx->lock != NULL)
545		UNLOCK(actx->lock);
546
547	return (result);
548}
549#else
550/*
551 * For general purpose library, we don't have to be too strict about the
552 * quality of random values.  Performance doesn't matter much, either.
553 * So we simply use the isc_random module to keep the library as small as
554 * possible.
555 */
556
557static void
558dispatch_initrandom(arc4ctx_t *actx, isc_entropy_t *entropy,
559		    isc_mutex_t *lock)
560{
561	UNUSED(actx);
562	UNUSED(entropy);
563	UNUSED(lock);
564
565	return;
566}
567
568static isc_uint16_t
569dispatch_random(arc4ctx_t *actx) {
570	isc_uint32_t r;
571
572	UNUSED(actx);
573
574	isc_random_get(&r);
575	return (r & 0xffff);
576}
577#endif	/* BIND9 */
578
579static isc_uint16_t
580dispatch_uniformrandom(arc4ctx_t *actx, isc_uint16_t upper_bound) {
581	isc_uint16_t min, r;
582
583	if (upper_bound < 2)
584		return (0);
585
586	/*
587	 * Ensure the range of random numbers [min, 0xffff] be a multiple of
588	 * upper_bound and contain at least a half of the 16 bit range.
589	 */
590
591	if (upper_bound > 0x8000)
592		min = 1 + ~upper_bound; /* 0x8000 - upper_bound */
593	else
594		min = (isc_uint16_t)(0x10000 % (isc_uint32_t)upper_bound);
595
596	/*
597	 * This could theoretically loop forever but each retry has
598	 * p > 0.5 (worst case, usually far better) of selecting a
599	 * number inside the range we need, so it should rarely need
600	 * to re-roll.
601	 */
602	for (;;) {
603		r = dispatch_random(actx);
604		if (r >= min)
605			break;
606	}
607
608	return (r % upper_bound);
609}
610
611/*
612 * Return a hash of the destination and message id.
613 */
614static isc_uint32_t
615dns_hash(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
616	 in_port_t port)
617{
618	unsigned int ret;
619
620	ret = isc_sockaddr_hash(dest, ISC_TRUE);
621	ret ^= (id << 16) | port;
622	ret %= qid->qid_nbuckets;
623
624	INSIST(ret < qid->qid_nbuckets);
625
626	return (ret);
627}
628
629/*
630 * Find the first entry in 'qid'.  Returns NULL if there are no entries.
631 */
632static dns_dispentry_t *
633linear_first(dns_qid_t *qid) {
634	dns_dispentry_t *ret;
635	unsigned int bucket;
636
637	bucket = 0;
638
639	while (bucket < qid->qid_nbuckets) {
640		ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
641		if (ret != NULL)
642			return (ret);
643		bucket++;
644	}
645
646	return (NULL);
647}
648
649/*
650 * Find the next entry after 'resp' in 'qid'.  Return NULL if there are
651 * no more entries.
652 */
653static dns_dispentry_t *
654linear_next(dns_qid_t *qid, dns_dispentry_t *resp) {
655	dns_dispentry_t *ret;
656	unsigned int bucket;
657
658	ret = ISC_LIST_NEXT(resp, link);
659	if (ret != NULL)
660		return (ret);
661
662	bucket = resp->bucket;
663	bucket++;
664	while (bucket < qid->qid_nbuckets) {
665		ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
666		if (ret != NULL)
667			return (ret);
668		bucket++;
669	}
670
671	return (NULL);
672}
673
674/*
675 * The dispatch must be locked.
676 */
677static isc_boolean_t
678destroy_disp_ok(dns_dispatch_t *disp)
679{
680	if (disp->refcount != 0)
681		return (ISC_FALSE);
682
683	if (disp->recv_pending != 0)
684		return (ISC_FALSE);
685
686	if (!ISC_LIST_EMPTY(disp->activesockets))
687		return (ISC_FALSE);
688
689	if (disp->shutting_down == 0)
690		return (ISC_FALSE);
691
692	return (ISC_TRUE);
693}
694
695/*
696 * Called when refcount reaches 0 (and safe to destroy).
697 *
698 * The dispatcher must not be locked.
699 * The manager must be locked.
700 */
701static void
702destroy_disp(isc_task_t *task, isc_event_t *event) {
703	dns_dispatch_t *disp;
704	dns_dispatchmgr_t *mgr;
705	isc_boolean_t killmgr;
706	dispsocket_t *dispsocket;
707	int i;
708
709	INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL);
710
711	UNUSED(task);
712
713	disp = event->ev_arg;
714	mgr = disp->mgr;
715
716	LOCK(&mgr->lock);
717	ISC_LIST_UNLINK(mgr->list, disp, link);
718
719	dispatch_log(disp, LVL(90),
720		     "shutting down; detaching from sock %p, task %p",
721		     disp->socket, disp->task[0]); /* XXXX */
722
723	if (disp->socket != NULL)
724		isc_socket_detach(&disp->socket);
725	while ((dispsocket = ISC_LIST_HEAD(disp->inactivesockets)) != NULL) {
726		ISC_LIST_UNLINK(disp->inactivesockets, dispsocket, link);
727		destroy_dispsocket(disp, &dispsocket);
728	}
729	for (i = 0; i < disp->ntasks; i++)
730		isc_task_detach(&disp->task[i]);
731	isc_event_free(&event);
732
733	dispatch_free(&disp);
734
735	killmgr = destroy_mgr_ok(mgr);
736	UNLOCK(&mgr->lock);
737	if (killmgr)
738		destroy_mgr(&mgr);
739}
740
741/*%
742 * Manipulate port table per dispatch: find an entry for a given port number,
743 * create a new entry, and decrement a given entry with possible clean-up.
744 */
745static dispportentry_t *
746port_search(dns_dispatch_t *disp, in_port_t port) {
747	dispportentry_t *portentry;
748
749	REQUIRE(disp->port_table != NULL);
750
751	portentry = ISC_LIST_HEAD(disp->port_table[port %
752						   DNS_DISPATCH_PORTTABLESIZE]);
753	while (portentry != NULL) {
754		if (portentry->port == port)
755			return (portentry);
756		portentry = ISC_LIST_NEXT(portentry, link);
757	}
758
759	return (NULL);
760}
761
762static dispportentry_t *
763new_portentry(dns_dispatch_t *disp, in_port_t port) {
764	dispportentry_t *portentry;
765
766	REQUIRE(disp->port_table != NULL);
767
768	portentry = isc_mempool_get(disp->portpool);
769	if (portentry == NULL)
770		return (portentry);
771
772	portentry->port = port;
773	portentry->refs = 0;
774	ISC_LINK_INIT(portentry, link);
775	ISC_LIST_APPEND(disp->port_table[port % DNS_DISPATCH_PORTTABLESIZE],
776			portentry, link);
777
778	return (portentry);
779}
780
781/*%
782 * The caller must not hold the qid->lock.
783 */
784static void
785deref_portentry(dns_dispatch_t *disp, dispportentry_t **portentryp) {
786	dispportentry_t *portentry = *portentryp;
787	dns_qid_t *qid;
788
789	REQUIRE(disp->port_table != NULL);
790	REQUIRE(portentry != NULL && portentry->refs > 0);
791
792	qid = DNS_QID(disp);
793	LOCK(&qid->lock);
794	portentry->refs--;
795	if (portentry->refs == 0) {
796		ISC_LIST_UNLINK(disp->port_table[portentry->port %
797						 DNS_DISPATCH_PORTTABLESIZE],
798				portentry, link);
799		isc_mempool_put(disp->portpool, portentry);
800	}
801
802	*portentryp = NULL;
803	UNLOCK(&qid->lock);
804}
805
806/*%
807 * Find a dispsocket for socket address 'dest', and port number 'port'.
808 * Return NULL if no such entry exists.
809 */
810static dispsocket_t *
811socket_search(dns_qid_t *qid, isc_sockaddr_t *dest, in_port_t port,
812	      unsigned int bucket)
813{
814	dispsocket_t *dispsock;
815
816	REQUIRE(bucket < qid->qid_nbuckets);
817
818	dispsock = ISC_LIST_HEAD(qid->sock_table[bucket]);
819
820	while (dispsock != NULL) {
821		if (dispsock->portentry != NULL &&
822		    dispsock->portentry->port == port &&
823		    isc_sockaddr_equal(dest, &dispsock->host))
824			return (dispsock);
825		dispsock = ISC_LIST_NEXT(dispsock, blink);
826	}
827
828	return (NULL);
829}
830
831/*%
832 * Make a new socket for a single dispatch with a random port number.
833 * The caller must hold the disp->lock and qid->lock.
834 */
835static isc_result_t
836get_dispsocket(dns_dispatch_t *disp, isc_sockaddr_t *dest,
837	       isc_socketmgr_t *sockmgr, dns_qid_t *qid,
838	       dispsocket_t **dispsockp, in_port_t *portp)
839{
840	int i;
841	isc_uint32_t r;
842	dns_dispatchmgr_t *mgr = disp->mgr;
843	isc_socket_t *sock = NULL;
844	isc_result_t result = ISC_R_FAILURE;
845	in_port_t port;
846	isc_sockaddr_t localaddr;
847	unsigned int bucket = 0;
848	dispsocket_t *dispsock;
849	unsigned int nports;
850	in_port_t *ports;
851	unsigned int bindoptions;
852	dispportentry_t *portentry = NULL;
853
854	if (isc_sockaddr_pf(&disp->local) == AF_INET) {
855		nports = disp->mgr->nv4ports;
856		ports = disp->mgr->v4ports;
857	} else {
858		nports = disp->mgr->nv6ports;
859		ports = disp->mgr->v6ports;
860	}
861	if (nports == 0)
862		return (ISC_R_ADDRNOTAVAIL);
863
864	dispsock = ISC_LIST_HEAD(disp->inactivesockets);
865	if (dispsock != NULL) {
866		ISC_LIST_UNLINK(disp->inactivesockets, dispsock, link);
867		sock = dispsock->socket;
868		dispsock->socket = NULL;
869	} else {
870		dispsock = isc_mempool_get(mgr->spool);
871		if (dispsock == NULL)
872			return (ISC_R_NOMEMORY);
873
874		disp->nsockets++;
875		dispsock->socket = NULL;
876		dispsock->disp = disp;
877		dispsock->resp = NULL;
878		dispsock->portentry = NULL;
879		isc_random_get(&r);
880		dispsock->task = NULL;
881		isc_task_attach(disp->task[r % disp->ntasks], &dispsock->task);
882		ISC_LINK_INIT(dispsock, link);
883		ISC_LINK_INIT(dispsock, blink);
884		dispsock->magic = DISPSOCK_MAGIC;
885	}
886
887	/*
888	 * Pick up a random UDP port and open a new socket with it.  Avoid
889	 * choosing ports that share the same destination because it will be
890	 * very likely to fail in bind(2) or connect(2).
891	 */
892	localaddr = disp->local;
893	for (i = 0; i < 64; i++) {
894		port = ports[dispatch_uniformrandom(DISP_ARC4CTX(disp),
895							nports)];
896		isc_sockaddr_setport(&localaddr, port);
897
898		bucket = dns_hash(qid, dest, 0, port);
899		if (socket_search(qid, dest, port, bucket) != NULL)
900			continue;
901		bindoptions = 0;
902		portentry = port_search(disp, port);
903		if (portentry != NULL)
904			bindoptions |= ISC_SOCKET_REUSEADDRESS;
905		result = open_socket(sockmgr, &localaddr, bindoptions, &sock);
906		if (result == ISC_R_SUCCESS) {
907			if (portentry == NULL) {
908				portentry = new_portentry(disp, port);
909				if (portentry == NULL) {
910					result = ISC_R_NOMEMORY;
911					break;
912				}
913			}
914			portentry->refs++;
915			break;
916		} else if (result == ISC_R_NOPERM) {
917			char buf[ISC_SOCKADDR_FORMATSIZE];
918			isc_sockaddr_format(&localaddr, buf, sizeof(buf));
919			dispatch_log(disp, ISC_LOG_WARNING,
920				     "open_socket(%s) -> %s: continuing",
921				     buf, isc_result_totext(result));
922		} else if (result != ISC_R_ADDRINUSE)
923			break;
924	}
925
926	if (result == ISC_R_SUCCESS) {
927		dispsock->socket = sock;
928		dispsock->host = *dest;
929		dispsock->portentry = portentry;
930		dispsock->bucket = bucket;
931		ISC_LIST_APPEND(qid->sock_table[bucket], dispsock, blink);
932		*dispsockp = dispsock;
933		*portp = port;
934	} else {
935		/*
936		 * We could keep it in the inactive list, but since this should
937		 * be an exceptional case and might be resource shortage, we'd
938		 * rather destroy it.
939		 */
940		if (sock != NULL)
941			isc_socket_detach(&sock);
942		destroy_dispsocket(disp, &dispsock);
943	}
944
945	return (result);
946}
947
948/*%
949 * Destroy a dedicated dispatch socket.
950 */
951static void
952destroy_dispsocket(dns_dispatch_t *disp, dispsocket_t **dispsockp) {
953	dispsocket_t *dispsock;
954	dns_qid_t *qid;
955
956	/*
957	 * The dispatch must be locked.
958	 */
959
960	REQUIRE(dispsockp != NULL && *dispsockp != NULL);
961	dispsock = *dispsockp;
962	REQUIRE(!ISC_LINK_LINKED(dispsock, link));
963
964	disp->nsockets--;
965	dispsock->magic = 0;
966	if (dispsock->portentry != NULL)
967		deref_portentry(disp, &dispsock->portentry);
968	if (dispsock->socket != NULL)
969		isc_socket_detach(&dispsock->socket);
970	if (ISC_LINK_LINKED(dispsock, blink)) {
971		qid = DNS_QID(disp);
972		LOCK(&qid->lock);
973		ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
974				blink);
975		UNLOCK(&qid->lock);
976	}
977	if (dispsock->task != NULL)
978		isc_task_detach(&dispsock->task);
979	isc_mempool_put(disp->mgr->spool, dispsock);
980
981	*dispsockp = NULL;
982}
983
984/*%
985 * Deactivate a dedicated dispatch socket.  Move it to the inactive list for
986 * future reuse unless the total number of sockets are exceeding the maximum.
987 */
988static void
989deactivate_dispsocket(dns_dispatch_t *disp, dispsocket_t *dispsock) {
990	isc_result_t result;
991	dns_qid_t *qid;
992
993	/*
994	 * The dispatch must be locked.
995	 */
996	ISC_LIST_UNLINK(disp->activesockets, dispsock, link);
997	if (dispsock->resp != NULL) {
998		INSIST(dispsock->resp->dispsocket == dispsock);
999		dispsock->resp->dispsocket = NULL;
1000	}
1001
1002	INSIST(dispsock->portentry != NULL);
1003	deref_portentry(disp, &dispsock->portentry);
1004
1005#ifdef BIND9
1006	if (disp->nsockets > DNS_DISPATCH_POOLSOCKS)
1007		destroy_dispsocket(disp, &dispsock);
1008	else {
1009		result = isc_socket_close(dispsock->socket);
1010
1011		qid = DNS_QID(disp);
1012		LOCK(&qid->lock);
1013		ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
1014				blink);
1015		UNLOCK(&qid->lock);
1016
1017		if (result == ISC_R_SUCCESS)
1018			ISC_LIST_APPEND(disp->inactivesockets, dispsock, link);
1019		else {
1020			/*
1021			 * If the underlying system does not allow this
1022			 * optimization, destroy this temporary structure (and
1023			 * create a new one for a new transaction).
1024			 */
1025			INSIST(result == ISC_R_NOTIMPLEMENTED);
1026			destroy_dispsocket(disp, &dispsock);
1027		}
1028	}
1029#else
1030	/* This kind of optimization isn't necessary for normal use */
1031	UNUSED(qid);
1032	UNUSED(result);
1033
1034	destroy_dispsocket(disp, &dispsock);
1035#endif
1036}
1037
1038/*
1039 * Find an entry for query ID 'id', socket address 'dest', and port number
1040 * 'port'.
1041 * Return NULL if no such entry exists.
1042 */
1043static dns_dispentry_t *
1044entry_search(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
1045	     in_port_t port, unsigned int bucket)
1046{
1047	dns_dispentry_t *res;
1048
1049	REQUIRE(bucket < qid->qid_nbuckets);
1050
1051	res = ISC_LIST_HEAD(qid->qid_table[bucket]);
1052
1053	while (res != NULL) {
1054		if (res->id == id && isc_sockaddr_equal(dest, &res->host) &&
1055		    res->port == port) {
1056			return (res);
1057		}
1058		res = ISC_LIST_NEXT(res, link);
1059	}
1060
1061	return (NULL);
1062}
1063
1064static void
1065free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len) {
1066	INSIST(buf != NULL && len != 0);
1067
1068
1069	switch (disp->socktype) {
1070	case isc_sockettype_tcp:
1071		INSIST(disp->tcpbuffers > 0);
1072		disp->tcpbuffers--;
1073		isc_mem_put(disp->mgr->mctx, buf, len);
1074		break;
1075	case isc_sockettype_udp:
1076		LOCK(&disp->mgr->buffer_lock);
1077		INSIST(disp->mgr->buffers > 0);
1078		INSIST(len == disp->mgr->buffersize);
1079		disp->mgr->buffers--;
1080		isc_mempool_put(disp->mgr->bpool, buf);
1081		UNLOCK(&disp->mgr->buffer_lock);
1082		break;
1083	default:
1084		INSIST(0);
1085		break;
1086	}
1087}
1088
1089static void *
1090allocate_udp_buffer(dns_dispatch_t *disp) {
1091	void *temp;
1092
1093	LOCK(&disp->mgr->buffer_lock);
1094	temp = isc_mempool_get(disp->mgr->bpool);
1095
1096	if (temp != NULL)
1097		disp->mgr->buffers++;
1098	UNLOCK(&disp->mgr->buffer_lock);
1099
1100	return (temp);
1101}
1102
1103static inline void
1104free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev) {
1105	if (disp->failsafe_ev == ev) {
1106		INSIST(disp->shutdown_out == 1);
1107		disp->shutdown_out = 0;
1108
1109		return;
1110	}
1111
1112	isc_mempool_put(disp->mgr->epool, ev);
1113}
1114
1115static inline dns_dispatchevent_t *
1116allocate_event(dns_dispatch_t *disp) {
1117	dns_dispatchevent_t *ev;
1118
1119	ev = isc_mempool_get(disp->mgr->epool);
1120	if (ev == NULL)
1121		return (NULL);
1122	ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, 0,
1123		       NULL, NULL, NULL, NULL, NULL);
1124
1125	return (ev);
1126}
1127
1128static void
1129udp_exrecv(isc_task_t *task, isc_event_t *ev) {
1130	dispsocket_t *dispsock = ev->ev_arg;
1131
1132	UNUSED(task);
1133
1134	REQUIRE(VALID_DISPSOCK(dispsock));
1135	udp_recv(ev, dispsock->disp, dispsock);
1136}
1137
1138static void
1139udp_shrecv(isc_task_t *task, isc_event_t *ev) {
1140	dns_dispatch_t *disp = ev->ev_arg;
1141
1142	UNUSED(task);
1143
1144	REQUIRE(VALID_DISPATCH(disp));
1145	udp_recv(ev, disp, NULL);
1146}
1147
1148/*
1149 * General flow:
1150 *
1151 * If I/O result == CANCELED or error, free the buffer.
1152 *
1153 * If query, free the buffer, restart.
1154 *
1155 * If response:
1156 *	Allocate event, fill in details.
1157 *		If cannot allocate, free buffer, restart.
1158 *	find target.  If not found, free buffer, restart.
1159 *	if event queue is not empty, queue.  else, send.
1160 *	restart.
1161 */
1162static void
1163udp_recv(isc_event_t *ev_in, dns_dispatch_t *disp, dispsocket_t *dispsock) {
1164	isc_socketevent_t *ev = (isc_socketevent_t *)ev_in;
1165	dns_messageid_t id;
1166	isc_result_t dres;
1167	isc_buffer_t source;
1168	unsigned int flags;
1169	dns_dispentry_t *resp = NULL;
1170	dns_dispatchevent_t *rev;
1171	unsigned int bucket;
1172	isc_boolean_t killit;
1173	isc_boolean_t queue_response;
1174	dns_dispatchmgr_t *mgr;
1175	dns_qid_t *qid;
1176	isc_netaddr_t netaddr;
1177	int match;
1178	int result;
1179	isc_boolean_t qidlocked = ISC_FALSE;
1180
1181	LOCK(&disp->lock);
1182
1183	mgr = disp->mgr;
1184	qid = mgr->qid;
1185
1186	dispatch_log(disp, LVL(90),
1187		     "got packet: requests %d, buffers %d, recvs %d",
1188		     disp->requests, disp->mgr->buffers, disp->recv_pending);
1189
1190	if (dispsock == NULL && ev->ev_type == ISC_SOCKEVENT_RECVDONE) {
1191		/*
1192		 * Unless the receive event was imported from a listening
1193		 * interface, in which case the event type is
1194		 * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending.
1195		 */
1196		INSIST(disp->recv_pending != 0);
1197		disp->recv_pending = 0;
1198	}
1199
1200	if (dispsock != NULL &&
1201	    (ev->result == ISC_R_CANCELED || dispsock->resp == NULL)) {
1202		/*
1203		 * dispsock->resp can be NULL if this transaction was canceled
1204		 * just after receiving a response.  Since this socket is
1205		 * exclusively used and there should be at most one receive
1206		 * event the canceled event should have been no effect.  So
1207		 * we can (and should) deactivate the socket right now.
1208		 */
1209		deactivate_dispsocket(disp, dispsock);
1210		dispsock = NULL;
1211	}
1212
1213	if (disp->shutting_down) {
1214		/*
1215		 * This dispatcher is shutting down.
1216		 */
1217		free_buffer(disp, ev->region.base, ev->region.length);
1218
1219		isc_event_free(&ev_in);
1220		ev = NULL;
1221
1222		killit = destroy_disp_ok(disp);
1223		UNLOCK(&disp->lock);
1224		if (killit)
1225			isc_task_send(disp->task[0], &disp->ctlevent);
1226
1227		return;
1228	}
1229
1230	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
1231		if (dispsock != NULL) {
1232			resp = dispsock->resp;
1233			id = resp->id;
1234			if (ev->result != ISC_R_SUCCESS) {
1235				/*
1236				 * This is most likely a network error on a
1237				 * connected socket.  It makes no sense to
1238				 * check the address or parse the packet, but it
1239				 * will help to return the error to the caller.
1240				 */
1241				goto sendresponse;
1242			}
1243		} else {
1244			free_buffer(disp, ev->region.base, ev->region.length);
1245
1246			UNLOCK(&disp->lock);
1247			isc_event_free(&ev_in);
1248			return;
1249		}
1250	} else if (ev->result != ISC_R_SUCCESS) {
1251		free_buffer(disp, ev->region.base, ev->region.length);
1252
1253		if (ev->result != ISC_R_CANCELED)
1254			dispatch_log(disp, ISC_LOG_ERROR,
1255				     "odd socket result in udp_recv(): %s",
1256				     isc_result_totext(ev->result));
1257
1258		UNLOCK(&disp->lock);
1259		isc_event_free(&ev_in);
1260		return;
1261	}
1262
1263	/*
1264	 * If this is from a blackholed address, drop it.
1265	 */
1266	isc_netaddr_fromsockaddr(&netaddr, &ev->address);
1267	if (disp->mgr->blackhole != NULL &&
1268	    dns_acl_match(&netaddr, NULL, disp->mgr->blackhole,
1269			  NULL, &match, NULL) == ISC_R_SUCCESS &&
1270	    match > 0)
1271	{
1272		if (isc_log_wouldlog(dns_lctx, LVL(10))) {
1273			char netaddrstr[ISC_NETADDR_FORMATSIZE];
1274			isc_netaddr_format(&netaddr, netaddrstr,
1275					   sizeof(netaddrstr));
1276			dispatch_log(disp, LVL(10),
1277				     "blackholed packet from %s",
1278				     netaddrstr);
1279		}
1280		free_buffer(disp, ev->region.base, ev->region.length);
1281		goto restart;
1282	}
1283
1284	/*
1285	 * Peek into the buffer to see what we can see.
1286	 */
1287	isc_buffer_init(&source, ev->region.base, ev->region.length);
1288	isc_buffer_add(&source, ev->n);
1289	dres = dns_message_peekheader(&source, &id, &flags);
1290	if (dres != ISC_R_SUCCESS) {
1291		free_buffer(disp, ev->region.base, ev->region.length);
1292		dispatch_log(disp, LVL(10), "got garbage packet");
1293		goto restart;
1294	}
1295
1296	dispatch_log(disp, LVL(92),
1297		     "got valid DNS message header, /QR %c, id %u",
1298		     ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
1299
1300	/*
1301	 * Look at flags.  If query, drop it. If response,
1302	 * look to see where it goes.
1303	 */
1304	if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1305		/* query */
1306		free_buffer(disp, ev->region.base, ev->region.length);
1307		goto restart;
1308	}
1309
1310	/*
1311	 * Search for the corresponding response.  If we are using an exclusive
1312	 * socket, we've already identified it and we can skip the search; but
1313	 * the ID and the address must match the expected ones.
1314	 */
1315	if (resp == NULL) {
1316		bucket = dns_hash(qid, &ev->address, id, disp->localport);
1317		LOCK(&qid->lock);
1318		qidlocked = ISC_TRUE;
1319		resp = entry_search(qid, &ev->address, id, disp->localport,
1320				    bucket);
1321		dispatch_log(disp, LVL(90),
1322			     "search for response in bucket %d: %s",
1323			     bucket, (resp == NULL ? "not found" : "found"));
1324
1325		if (resp == NULL) {
1326			inc_stats(mgr, dns_resstatscounter_mismatch);
1327			free_buffer(disp, ev->region.base, ev->region.length);
1328			goto unlock;
1329		}
1330	} else if (resp->id != id || !isc_sockaddr_equal(&ev->address,
1331							 &resp->host)) {
1332		dispatch_log(disp, LVL(90),
1333			     "response to an exclusive socket doesn't match");
1334		inc_stats(mgr, dns_resstatscounter_mismatch);
1335		free_buffer(disp, ev->region.base, ev->region.length);
1336		goto unlock;
1337	}
1338
1339	/*
1340	 * Now that we have the original dispatch the query was sent
1341	 * from check that the address and port the response was
1342	 * sent to make sense.
1343	 */
1344	if (disp != resp->disp) {
1345		isc_sockaddr_t a1;
1346		isc_sockaddr_t a2;
1347
1348		/*
1349		 * Check that the socket types and ports match.
1350		 */
1351		if (disp->socktype != resp->disp->socktype ||
1352		    isc_sockaddr_getport(&disp->local) !=
1353		    isc_sockaddr_getport(&resp->disp->local)) {
1354			free_buffer(disp, ev->region.base, ev->region.length);
1355			goto unlock;
1356		}
1357
1358		/*
1359		 * If both dispatches are bound to an address then fail as
1360		 * the addresses can't be equal (enforced by the IP stack).
1361		 *
1362		 * Note under Linux a packet can be sent out via IPv4 socket
1363		 * and the response be received via a IPv6 socket.
1364		 *
1365		 * Requests sent out via IPv6 should always come back in
1366		 * via IPv6.
1367		 */
1368		if (isc_sockaddr_pf(&resp->disp->local) == PF_INET6 &&
1369		    isc_sockaddr_pf(&disp->local) != PF_INET6) {
1370			free_buffer(disp, ev->region.base, ev->region.length);
1371			goto unlock;
1372		}
1373		isc_sockaddr_anyofpf(&a1, isc_sockaddr_pf(&resp->disp->local));
1374		isc_sockaddr_anyofpf(&a2, isc_sockaddr_pf(&disp->local));
1375		if (!isc_sockaddr_eqaddr(&a1, &resp->disp->local) &&
1376		    !isc_sockaddr_eqaddr(&a2, &disp->local)) {
1377			free_buffer(disp, ev->region.base, ev->region.length);
1378			goto unlock;
1379		}
1380	}
1381
1382  sendresponse:
1383	queue_response = resp->item_out;
1384	rev = allocate_event(resp->disp);
1385	if (rev == NULL) {
1386		free_buffer(disp, ev->region.base, ev->region.length);
1387		goto unlock;
1388	}
1389
1390	/*
1391	 * At this point, rev contains the event we want to fill in, and
1392	 * resp contains the information on the place to send it to.
1393	 * Send the event off.
1394	 */
1395	isc_buffer_init(&rev->buffer, ev->region.base, ev->region.length);
1396	isc_buffer_add(&rev->buffer, ev->n);
1397	rev->result = ev->result;
1398	rev->id = id;
1399	rev->addr = ev->address;
1400	rev->pktinfo = ev->pktinfo;
1401	rev->attributes = ev->attributes;
1402	if (queue_response) {
1403		ISC_LIST_APPEND(resp->items, rev, ev_link);
1404	} else {
1405		ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL,
1406			       DNS_EVENT_DISPATCH,
1407			       resp->action, resp->arg, resp, NULL, NULL);
1408		request_log(disp, resp, LVL(90),
1409			    "[a] Sent event %p buffer %p len %d to task %p",
1410			    rev, rev->buffer.base, rev->buffer.length,
1411			    resp->task);
1412		resp->item_out = ISC_TRUE;
1413		isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1414	}
1415 unlock:
1416	if (qidlocked)
1417		UNLOCK(&qid->lock);
1418
1419	/*
1420	 * Restart recv() to get the next packet.
1421	 */
1422 restart:
1423	result = startrecv(disp, dispsock);
1424	if (result != ISC_R_SUCCESS && dispsock != NULL) {
1425		/*
1426		 * XXX: wired. There seems to be no recovery process other than
1427		 * deactivate this socket anyway (since we cannot start
1428		 * receiving, we won't be able to receive a cancel event
1429		 * from the user).
1430		 */
1431		deactivate_dispsocket(disp, dispsock);
1432	}
1433	UNLOCK(&disp->lock);
1434
1435	isc_event_free(&ev_in);
1436}
1437
1438/*
1439 * General flow:
1440 *
1441 * If I/O result == CANCELED, EOF, or error, notify everyone as the
1442 * various queues drain.
1443 *
1444 * If query, restart.
1445 *
1446 * If response:
1447 *	Allocate event, fill in details.
1448 *		If cannot allocate, restart.
1449 *	find target.  If not found, restart.
1450 *	if event queue is not empty, queue.  else, send.
1451 *	restart.
1452 */
1453static void
1454tcp_recv(isc_task_t *task, isc_event_t *ev_in) {
1455	dns_dispatch_t *disp = ev_in->ev_arg;
1456	dns_tcpmsg_t *tcpmsg = &disp->tcpmsg;
1457	dns_messageid_t id;
1458	isc_result_t dres;
1459	unsigned int flags;
1460	dns_dispentry_t *resp;
1461	dns_dispatchevent_t *rev;
1462	unsigned int bucket;
1463	isc_boolean_t killit;
1464	isc_boolean_t queue_response;
1465	dns_qid_t *qid;
1466	int level;
1467	char buf[ISC_SOCKADDR_FORMATSIZE];
1468
1469	UNUSED(task);
1470
1471	REQUIRE(VALID_DISPATCH(disp));
1472
1473	qid = disp->qid;
1474
1475	dispatch_log(disp, LVL(90),
1476		     "got TCP packet: requests %d, buffers %d, recvs %d",
1477		     disp->requests, disp->tcpbuffers, disp->recv_pending);
1478
1479	LOCK(&disp->lock);
1480
1481	INSIST(disp->recv_pending != 0);
1482	disp->recv_pending = 0;
1483
1484	if (disp->refcount == 0) {
1485		/*
1486		 * This dispatcher is shutting down.  Force cancelation.
1487		 */
1488		tcpmsg->result = ISC_R_CANCELED;
1489	}
1490
1491	if (tcpmsg->result != ISC_R_SUCCESS) {
1492		switch (tcpmsg->result) {
1493		case ISC_R_CANCELED:
1494			break;
1495
1496		case ISC_R_EOF:
1497			dispatch_log(disp, LVL(90), "shutting down on EOF");
1498			do_cancel(disp);
1499			break;
1500
1501		case ISC_R_CONNECTIONRESET:
1502			level = ISC_LOG_INFO;
1503			goto logit;
1504
1505		default:
1506			level = ISC_LOG_ERROR;
1507		logit:
1508			isc_sockaddr_format(&tcpmsg->address, buf, sizeof(buf));
1509			dispatch_log(disp, level, "shutting down due to TCP "
1510				     "receive error: %s: %s", buf,
1511				     isc_result_totext(tcpmsg->result));
1512			do_cancel(disp);
1513			break;
1514		}
1515
1516		/*
1517		 * The event is statically allocated in the tcpmsg
1518		 * structure, and destroy_disp() frees the tcpmsg, so we must
1519		 * free the event *before* calling destroy_disp().
1520		 */
1521		isc_event_free(&ev_in);
1522
1523		disp->shutting_down = 1;
1524		disp->shutdown_why = tcpmsg->result;
1525
1526		/*
1527		 * If the recv() was canceled pass the word on.
1528		 */
1529		killit = destroy_disp_ok(disp);
1530		UNLOCK(&disp->lock);
1531		if (killit)
1532			isc_task_send(disp->task[0], &disp->ctlevent);
1533		return;
1534	}
1535
1536	dispatch_log(disp, LVL(90), "result %d, length == %d, addr = %p",
1537		     tcpmsg->result,
1538		     tcpmsg->buffer.length, tcpmsg->buffer.base);
1539
1540	/*
1541	 * Peek into the buffer to see what we can see.
1542	 */
1543	dres = dns_message_peekheader(&tcpmsg->buffer, &id, &flags);
1544	if (dres != ISC_R_SUCCESS) {
1545		dispatch_log(disp, LVL(10), "got garbage packet");
1546		goto restart;
1547	}
1548
1549	dispatch_log(disp, LVL(92),
1550		     "got valid DNS message header, /QR %c, id %u",
1551		     ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
1552
1553	/*
1554	 * Allocate an event to send to the query or response client, and
1555	 * allocate a new buffer for our use.
1556	 */
1557
1558	/*
1559	 * Look at flags.  If query, drop it. If response,
1560	 * look to see where it goes.
1561	 */
1562	if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1563		/*
1564		 * Query.
1565		 */
1566		goto restart;
1567	}
1568
1569	/*
1570	 * Response.
1571	 */
1572	bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport);
1573	LOCK(&qid->lock);
1574	resp = entry_search(qid, &tcpmsg->address, id, disp->localport, bucket);
1575	dispatch_log(disp, LVL(90),
1576		     "search for response in bucket %d: %s",
1577		     bucket, (resp == NULL ? "not found" : "found"));
1578
1579	if (resp == NULL)
1580		goto unlock;
1581	queue_response = resp->item_out;
1582	rev = allocate_event(disp);
1583	if (rev == NULL)
1584		goto unlock;
1585
1586	/*
1587	 * At this point, rev contains the event we want to fill in, and
1588	 * resp contains the information on the place to send it to.
1589	 * Send the event off.
1590	 */
1591	dns_tcpmsg_keepbuffer(tcpmsg, &rev->buffer);
1592	disp->tcpbuffers++;
1593	rev->result = ISC_R_SUCCESS;
1594	rev->id = id;
1595	rev->addr = tcpmsg->address;
1596	if (queue_response) {
1597		ISC_LIST_APPEND(resp->items, rev, ev_link);
1598	} else {
1599		ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH,
1600			       resp->action, resp->arg, resp, NULL, NULL);
1601		request_log(disp, resp, LVL(90),
1602			    "[b] Sent event %p buffer %p len %d to task %p",
1603			    rev, rev->buffer.base, rev->buffer.length,
1604			    resp->task);
1605		resp->item_out = ISC_TRUE;
1606		isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1607	}
1608 unlock:
1609	UNLOCK(&qid->lock);
1610
1611	/*
1612	 * Restart recv() to get the next packet.
1613	 */
1614 restart:
1615	(void)startrecv(disp, NULL);
1616
1617	UNLOCK(&disp->lock);
1618
1619	isc_event_free(&ev_in);
1620}
1621
1622/*
1623 * disp must be locked.
1624 */
1625static isc_result_t
1626startrecv(dns_dispatch_t *disp, dispsocket_t *dispsock) {
1627	isc_result_t res;
1628	isc_region_t region;
1629	isc_socket_t *socket;
1630
1631	if (disp->shutting_down == 1)
1632		return (ISC_R_SUCCESS);
1633
1634	if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
1635		return (ISC_R_SUCCESS);
1636
1637	if (disp->recv_pending != 0 && dispsock == NULL)
1638		return (ISC_R_SUCCESS);
1639
1640	if (disp->mgr->buffers >= disp->mgr->maxbuffers)
1641		return (ISC_R_NOMEMORY);
1642
1643	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
1644	    dispsock == NULL)
1645		return (ISC_R_SUCCESS);
1646
1647	if (dispsock != NULL)
1648		socket = dispsock->socket;
1649	else
1650		socket = disp->socket;
1651	INSIST(socket != NULL);
1652
1653	switch (disp->socktype) {
1654		/*
1655		 * UDP reads are always maximal.
1656		 */
1657	case isc_sockettype_udp:
1658		region.length = disp->mgr->buffersize;
1659		region.base = allocate_udp_buffer(disp);
1660		if (region.base == NULL)
1661			return (ISC_R_NOMEMORY);
1662		if (dispsock != NULL) {
1663			res = isc_socket_recv(socket, &region, 1,
1664					      dispsock->task, udp_exrecv,
1665					      dispsock);
1666			if (res != ISC_R_SUCCESS) {
1667				free_buffer(disp, region.base, region.length);
1668				return (res);
1669			}
1670		} else {
1671			res = isc_socket_recv(socket, &region, 1,
1672					      disp->task[0], udp_shrecv, disp);
1673			if (res != ISC_R_SUCCESS) {
1674				free_buffer(disp, region.base, region.length);
1675				disp->shutdown_why = res;
1676				disp->shutting_down = 1;
1677				do_cancel(disp);
1678				return (ISC_R_SUCCESS); /* recover by cancel */
1679			}
1680			INSIST(disp->recv_pending == 0);
1681			disp->recv_pending = 1;
1682		}
1683		break;
1684
1685	case isc_sockettype_tcp:
1686		res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task[0],
1687					     tcp_recv, disp);
1688		if (res != ISC_R_SUCCESS) {
1689			disp->shutdown_why = res;
1690			disp->shutting_down = 1;
1691			do_cancel(disp);
1692			return (ISC_R_SUCCESS); /* recover by cancel */
1693		}
1694		INSIST(disp->recv_pending == 0);
1695		disp->recv_pending = 1;
1696		break;
1697	default:
1698		INSIST(0);
1699		break;
1700	}
1701
1702	return (ISC_R_SUCCESS);
1703}
1704
1705/*
1706 * Mgr must be locked when calling this function.
1707 */
1708static isc_boolean_t
1709destroy_mgr_ok(dns_dispatchmgr_t *mgr) {
1710	mgr_log(mgr, LVL(90),
1711		"destroy_mgr_ok: shuttingdown=%d, listnonempty=%d, "
1712		"epool=%d, rpool=%d, dpool=%d",
1713		MGR_IS_SHUTTINGDOWN(mgr), !ISC_LIST_EMPTY(mgr->list),
1714		isc_mempool_getallocated(mgr->epool),
1715		isc_mempool_getallocated(mgr->rpool),
1716		isc_mempool_getallocated(mgr->dpool));
1717	if (!MGR_IS_SHUTTINGDOWN(mgr))
1718		return (ISC_FALSE);
1719	if (!ISC_LIST_EMPTY(mgr->list))
1720		return (ISC_FALSE);
1721	if (isc_mempool_getallocated(mgr->epool) != 0)
1722		return (ISC_FALSE);
1723	if (isc_mempool_getallocated(mgr->rpool) != 0)
1724		return (ISC_FALSE);
1725	if (isc_mempool_getallocated(mgr->dpool) != 0)
1726		return (ISC_FALSE);
1727
1728	return (ISC_TRUE);
1729}
1730
1731/*
1732 * Mgr must be unlocked when calling this function.
1733 */
1734static void
1735destroy_mgr(dns_dispatchmgr_t **mgrp) {
1736	isc_mem_t *mctx;
1737	dns_dispatchmgr_t *mgr;
1738
1739	mgr = *mgrp;
1740	*mgrp = NULL;
1741
1742	mctx = mgr->mctx;
1743
1744	mgr->magic = 0;
1745	mgr->mctx = NULL;
1746	DESTROYLOCK(&mgr->lock);
1747	mgr->state = 0;
1748
1749	DESTROYLOCK(&mgr->arc4_lock);
1750
1751	isc_mempool_destroy(&mgr->epool);
1752	isc_mempool_destroy(&mgr->rpool);
1753	isc_mempool_destroy(&mgr->dpool);
1754	if (mgr->bpool != NULL)
1755		isc_mempool_destroy(&mgr->bpool);
1756	if (mgr->spool != NULL)
1757		isc_mempool_destroy(&mgr->spool);
1758
1759	DESTROYLOCK(&mgr->pool_lock);
1760
1761#ifdef BIND9
1762	if (mgr->entropy != NULL)
1763		isc_entropy_detach(&mgr->entropy);
1764#endif /* BIND9 */
1765	if (mgr->qid != NULL)
1766		qid_destroy(mctx, &mgr->qid);
1767
1768	DESTROYLOCK(&mgr->buffer_lock);
1769
1770	if (mgr->blackhole != NULL)
1771		dns_acl_detach(&mgr->blackhole);
1772
1773	if (mgr->stats != NULL)
1774		isc_stats_detach(&mgr->stats);
1775
1776	if (mgr->v4ports != NULL) {
1777		isc_mem_put(mctx, mgr->v4ports,
1778			    mgr->nv4ports * sizeof(in_port_t));
1779	}
1780	if (mgr->v6ports != NULL) {
1781		isc_mem_put(mctx, mgr->v6ports,
1782			    mgr->nv6ports * sizeof(in_port_t));
1783	}
1784	isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1785	isc_mem_detach(&mctx);
1786}
1787
1788static isc_result_t
1789open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
1790	    unsigned int options, isc_socket_t **sockp)
1791{
1792	isc_socket_t *sock;
1793	isc_result_t result;
1794
1795	sock = *sockp;
1796	if (sock == NULL) {
1797		result = isc_socket_create(mgr, isc_sockaddr_pf(local),
1798					   isc_sockettype_udp, &sock);
1799		if (result != ISC_R_SUCCESS)
1800			return (result);
1801		isc_socket_setname(sock, "dispatcher", NULL);
1802	} else {
1803#ifdef BIND9
1804		result = isc_socket_open(sock);
1805		if (result != ISC_R_SUCCESS)
1806			return (result);
1807#else
1808		INSIST(0);
1809#endif
1810	}
1811
1812#ifndef ISC_ALLOW_MAPPED
1813	isc_socket_ipv6only(sock, ISC_TRUE);
1814#endif
1815	result = isc_socket_bind(sock, local, options);
1816	if (result != ISC_R_SUCCESS) {
1817		if (*sockp == NULL)
1818			isc_socket_detach(&sock);
1819		else {
1820#ifdef BIND9
1821			isc_socket_close(sock);
1822#else
1823			INSIST(0);
1824#endif
1825		}
1826		return (result);
1827	}
1828
1829	*sockp = sock;
1830	return (ISC_R_SUCCESS);
1831}
1832
1833/*%
1834 * Create a temporary port list to set the initial default set of dispatch
1835 * ports: [1024, 65535].  This is almost meaningless as the application will
1836 * normally set the ports explicitly, but is provided to fill some minor corner
1837 * cases.
1838 */
1839static isc_result_t
1840create_default_portset(isc_mem_t *mctx, isc_portset_t **portsetp) {
1841	isc_result_t result;
1842
1843	result = isc_portset_create(mctx, portsetp);
1844	if (result != ISC_R_SUCCESS)
1845		return (result);
1846	isc_portset_addrange(*portsetp, 1024, 65535);
1847
1848	return (ISC_R_SUCCESS);
1849}
1850
1851/*
1852 * Publics.
1853 */
1854
1855isc_result_t
1856dns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy,
1857		       dns_dispatchmgr_t **mgrp)
1858{
1859	dns_dispatchmgr_t *mgr;
1860	isc_result_t result;
1861	isc_portset_t *v4portset = NULL;
1862	isc_portset_t *v6portset = NULL;
1863
1864	REQUIRE(mctx != NULL);
1865	REQUIRE(mgrp != NULL && *mgrp == NULL);
1866
1867	mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t));
1868	if (mgr == NULL)
1869		return (ISC_R_NOMEMORY);
1870
1871	mgr->mctx = NULL;
1872	isc_mem_attach(mctx, &mgr->mctx);
1873
1874	mgr->blackhole = NULL;
1875	mgr->stats = NULL;
1876
1877	result = isc_mutex_init(&mgr->lock);
1878	if (result != ISC_R_SUCCESS)
1879		goto deallocate;
1880
1881	result = isc_mutex_init(&mgr->arc4_lock);
1882	if (result != ISC_R_SUCCESS)
1883		goto kill_lock;
1884
1885	result = isc_mutex_init(&mgr->buffer_lock);
1886	if (result != ISC_R_SUCCESS)
1887		goto kill_arc4_lock;
1888
1889	result = isc_mutex_init(&mgr->pool_lock);
1890	if (result != ISC_R_SUCCESS)
1891		goto kill_buffer_lock;
1892
1893	mgr->epool = NULL;
1894	if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatchevent_t),
1895			       &mgr->epool) != ISC_R_SUCCESS) {
1896		result = ISC_R_NOMEMORY;
1897		goto kill_pool_lock;
1898	}
1899
1900	mgr->rpool = NULL;
1901	if (isc_mempool_create(mgr->mctx, sizeof(dns_dispentry_t),
1902			       &mgr->rpool) != ISC_R_SUCCESS) {
1903		result = ISC_R_NOMEMORY;
1904		goto kill_epool;
1905	}
1906
1907	mgr->dpool = NULL;
1908	if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatch_t),
1909			       &mgr->dpool) != ISC_R_SUCCESS) {
1910		result = ISC_R_NOMEMORY;
1911		goto kill_rpool;
1912	}
1913
1914	isc_mempool_setname(mgr->epool, "dispmgr_epool");
1915	isc_mempool_setfreemax(mgr->epool, 1024);
1916	isc_mempool_associatelock(mgr->epool, &mgr->pool_lock);
1917
1918	isc_mempool_setname(mgr->rpool, "dispmgr_rpool");
1919	isc_mempool_setfreemax(mgr->rpool, 1024);
1920	isc_mempool_associatelock(mgr->rpool, &mgr->pool_lock);
1921
1922	isc_mempool_setname(mgr->dpool, "dispmgr_dpool");
1923	isc_mempool_setfreemax(mgr->dpool, 1024);
1924	isc_mempool_associatelock(mgr->dpool, &mgr->pool_lock);
1925
1926	mgr->buffers = 0;
1927	mgr->buffersize = 0;
1928	mgr->maxbuffers = 0;
1929	mgr->bpool = NULL;
1930	mgr->spool = NULL;
1931	mgr->entropy = NULL;
1932	mgr->qid = NULL;
1933	mgr->state = 0;
1934	ISC_LIST_INIT(mgr->list);
1935	mgr->v4ports = NULL;
1936	mgr->v6ports = NULL;
1937	mgr->nv4ports = 0;
1938	mgr->nv6ports = 0;
1939	mgr->magic = DNS_DISPATCHMGR_MAGIC;
1940
1941	result = create_default_portset(mctx, &v4portset);
1942	if (result == ISC_R_SUCCESS) {
1943		result = create_default_portset(mctx, &v6portset);
1944		if (result == ISC_R_SUCCESS) {
1945			result = dns_dispatchmgr_setavailports(mgr,
1946							       v4portset,
1947							       v6portset);
1948		}
1949	}
1950	if (v4portset != NULL)
1951		isc_portset_destroy(mctx, &v4portset);
1952	if (v6portset != NULL)
1953		isc_portset_destroy(mctx, &v6portset);
1954	if (result != ISC_R_SUCCESS)
1955		goto kill_dpool;
1956
1957#ifdef BIND9
1958	if (entropy != NULL)
1959		isc_entropy_attach(entropy, &mgr->entropy);
1960#else
1961	UNUSED(entropy);
1962#endif
1963
1964	dispatch_initrandom(&mgr->arc4ctx, mgr->entropy, &mgr->arc4_lock);
1965
1966	*mgrp = mgr;
1967	return (ISC_R_SUCCESS);
1968
1969 kill_dpool:
1970	isc_mempool_destroy(&mgr->dpool);
1971 kill_rpool:
1972	isc_mempool_destroy(&mgr->rpool);
1973 kill_epool:
1974	isc_mempool_destroy(&mgr->epool);
1975 kill_pool_lock:
1976	DESTROYLOCK(&mgr->pool_lock);
1977 kill_buffer_lock:
1978	DESTROYLOCK(&mgr->buffer_lock);
1979 kill_arc4_lock:
1980	DESTROYLOCK(&mgr->arc4_lock);
1981 kill_lock:
1982	DESTROYLOCK(&mgr->lock);
1983 deallocate:
1984	isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1985	isc_mem_detach(&mctx);
1986
1987	return (result);
1988}
1989
1990void
1991dns_dispatchmgr_setblackhole(dns_dispatchmgr_t *mgr, dns_acl_t *blackhole) {
1992	REQUIRE(VALID_DISPATCHMGR(mgr));
1993	if (mgr->blackhole != NULL)
1994		dns_acl_detach(&mgr->blackhole);
1995	dns_acl_attach(blackhole, &mgr->blackhole);
1996}
1997
1998dns_acl_t *
1999dns_dispatchmgr_getblackhole(dns_dispatchmgr_t *mgr) {
2000	REQUIRE(VALID_DISPATCHMGR(mgr));
2001	return (mgr->blackhole);
2002}
2003
2004void
2005dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t *mgr,
2006				 dns_portlist_t *portlist)
2007{
2008	REQUIRE(VALID_DISPATCHMGR(mgr));
2009	UNUSED(portlist);
2010
2011	/* This function is deprecated: use dns_dispatchmgr_setavailports(). */
2012	return;
2013}
2014
2015dns_portlist_t *
2016dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t *mgr) {
2017	REQUIRE(VALID_DISPATCHMGR(mgr));
2018	return (NULL);		/* this function is deprecated */
2019}
2020
2021isc_result_t
2022dns_dispatchmgr_setavailports(dns_dispatchmgr_t *mgr, isc_portset_t *v4portset,
2023			      isc_portset_t *v6portset)
2024{
2025	in_port_t *v4ports, *v6ports, p;
2026	unsigned int nv4ports, nv6ports, i4, i6;
2027
2028	REQUIRE(VALID_DISPATCHMGR(mgr));
2029
2030	nv4ports = isc_portset_nports(v4portset);
2031	nv6ports = isc_portset_nports(v6portset);
2032
2033	v4ports = NULL;
2034	if (nv4ports != 0) {
2035		v4ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv4ports);
2036		if (v4ports == NULL)
2037			return (ISC_R_NOMEMORY);
2038	}
2039	v6ports = NULL;
2040	if (nv6ports != 0) {
2041		v6ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv6ports);
2042		if (v6ports == NULL) {
2043			if (v4ports != NULL) {
2044				isc_mem_put(mgr->mctx, v4ports,
2045					    sizeof(in_port_t) *
2046					    isc_portset_nports(v4portset));
2047			}
2048			return (ISC_R_NOMEMORY);
2049		}
2050	}
2051
2052	p = 0;
2053	i4 = 0;
2054	i6 = 0;
2055	do {
2056		if (isc_portset_isset(v4portset, p)) {
2057			INSIST(i4 < nv4ports);
2058			v4ports[i4++] = p;
2059		}
2060		if (isc_portset_isset(v6portset, p)) {
2061			INSIST(i6 < nv6ports);
2062			v6ports[i6++] = p;
2063		}
2064	} while (p++ < 65535);
2065	INSIST(i4 == nv4ports && i6 == nv6ports);
2066
2067	PORTBUFLOCK(mgr);
2068	if (mgr->v4ports != NULL) {
2069		isc_mem_put(mgr->mctx, mgr->v4ports,
2070			    mgr->nv4ports * sizeof(in_port_t));
2071	}
2072	mgr->v4ports = v4ports;
2073	mgr->nv4ports = nv4ports;
2074
2075	if (mgr->v6ports != NULL) {
2076		isc_mem_put(mgr->mctx, mgr->v6ports,
2077			    mgr->nv6ports * sizeof(in_port_t));
2078	}
2079	mgr->v6ports = v6ports;
2080	mgr->nv6ports = nv6ports;
2081	PORTBUFUNLOCK(mgr);
2082
2083	return (ISC_R_SUCCESS);
2084}
2085
2086static isc_result_t
2087dns_dispatchmgr_setudp(dns_dispatchmgr_t *mgr,
2088		       unsigned int buffersize, unsigned int maxbuffers,
2089		       unsigned int maxrequests, unsigned int buckets,
2090		       unsigned int increment)
2091{
2092	isc_result_t result;
2093
2094	REQUIRE(VALID_DISPATCHMGR(mgr));
2095	REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
2096	REQUIRE(maxbuffers > 0);
2097	REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
2098	REQUIRE(increment > buckets);
2099
2100	/*
2101	 * Keep some number of items around.  This should be a config
2102	 * option.  For now, keep 8, but later keep at least two even
2103	 * if the caller wants less.  This allows us to ensure certain
2104	 * things, like an event can be "freed" and the next allocation
2105	 * will always succeed.
2106	 *
2107	 * Note that if limits are placed on anything here, we use one
2108	 * event internally, so the actual limit should be "wanted + 1."
2109	 *
2110	 * XXXMLG
2111	 */
2112
2113	if (maxbuffers < 8)
2114		maxbuffers = 8;
2115
2116	LOCK(&mgr->buffer_lock);
2117
2118	/* Create or adjust buffer pool */
2119	if (mgr->bpool != NULL) {
2120		/*
2121		 * We only increase the maxbuffers to avoid accidental buffer
2122		 * shortage.  Ideally we'd separate the manager-wide maximum
2123		 * from per-dispatch limits and respect the latter within the
2124		 * global limit.  But at this moment that's deemed to be
2125		 * overkilling and isn't worth additional implementation
2126		 * complexity.
2127		 */
2128		if (maxbuffers > mgr->maxbuffers) {
2129			isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
2130			mgr->maxbuffers = maxbuffers;
2131		}
2132	} else {
2133		result = isc_mempool_create(mgr->mctx, buffersize, &mgr->bpool);
2134		if (result != ISC_R_SUCCESS) {
2135			UNLOCK(&mgr->buffer_lock);
2136			return (result);
2137		}
2138		isc_mempool_setname(mgr->bpool, "dispmgr_bpool");
2139		isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
2140		isc_mempool_associatelock(mgr->bpool, &mgr->pool_lock);
2141	}
2142
2143	/* Create or adjust socket pool */
2144	if (mgr->spool != NULL) {
2145		isc_mempool_setmaxalloc(mgr->spool, DNS_DISPATCH_POOLSOCKS * 2);
2146		UNLOCK(&mgr->buffer_lock);
2147		return (ISC_R_SUCCESS);
2148	}
2149	result = isc_mempool_create(mgr->mctx, sizeof(dispsocket_t),
2150				    &mgr->spool);
2151	if (result != ISC_R_SUCCESS) {
2152		UNLOCK(&mgr->buffer_lock);
2153		goto cleanup;
2154	}
2155	isc_mempool_setname(mgr->spool, "dispmgr_spool");
2156	isc_mempool_setmaxalloc(mgr->spool, maxrequests);
2157	isc_mempool_associatelock(mgr->spool, &mgr->pool_lock);
2158
2159	result = qid_allocate(mgr, buckets, increment, &mgr->qid, ISC_TRUE);
2160	if (result != ISC_R_SUCCESS)
2161		goto cleanup;
2162
2163	mgr->buffersize = buffersize;
2164	mgr->maxbuffers = maxbuffers;
2165	UNLOCK(&mgr->buffer_lock);
2166	return (ISC_R_SUCCESS);
2167
2168 cleanup:
2169	isc_mempool_destroy(&mgr->bpool);
2170	if (mgr->spool != NULL)
2171		isc_mempool_destroy(&mgr->spool);
2172	UNLOCK(&mgr->buffer_lock);
2173	return (result);
2174}
2175
2176void
2177dns_dispatchmgr_destroy(dns_dispatchmgr_t **mgrp) {
2178	dns_dispatchmgr_t *mgr;
2179	isc_boolean_t killit;
2180
2181	REQUIRE(mgrp != NULL);
2182	REQUIRE(VALID_DISPATCHMGR(*mgrp));
2183
2184	mgr = *mgrp;
2185	*mgrp = NULL;
2186
2187	LOCK(&mgr->lock);
2188	mgr->state |= MGR_SHUTTINGDOWN;
2189
2190	killit = destroy_mgr_ok(mgr);
2191	UNLOCK(&mgr->lock);
2192
2193	mgr_log(mgr, LVL(90), "destroy: killit=%d", killit);
2194
2195	if (killit)
2196		destroy_mgr(&mgr);
2197}
2198
2199void
2200dns_dispatchmgr_setstats(dns_dispatchmgr_t *mgr, isc_stats_t *stats) {
2201	REQUIRE(VALID_DISPATCHMGR(mgr));
2202	REQUIRE(ISC_LIST_EMPTY(mgr->list));
2203	REQUIRE(mgr->stats == NULL);
2204
2205	isc_stats_attach(stats, &mgr->stats);
2206}
2207
2208static int
2209port_cmp(const void *key, const void *ent) {
2210	in_port_t p1 = *(const in_port_t *)key;
2211	in_port_t p2 = *(const in_port_t *)ent;
2212
2213	if (p1 < p2)
2214		return (-1);
2215	else if (p1 == p2)
2216		return (0);
2217	else
2218		return (1);
2219}
2220
2221static isc_boolean_t
2222portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2223	      isc_sockaddr_t *sockaddrp)
2224{
2225	isc_sockaddr_t sockaddr;
2226	isc_result_t result;
2227	in_port_t *ports, port;
2228	unsigned int nports;
2229	isc_boolean_t available = ISC_FALSE;
2230
2231	REQUIRE(sock != NULL || sockaddrp != NULL);
2232
2233	PORTBUFLOCK(mgr);
2234	if (sock != NULL) {
2235		sockaddrp = &sockaddr;
2236		result = isc_socket_getsockname(sock, sockaddrp);
2237		if (result != ISC_R_SUCCESS)
2238			goto unlock;
2239	}
2240
2241	if (isc_sockaddr_pf(sockaddrp) == AF_INET) {
2242		ports = mgr->v4ports;
2243		nports = mgr->nv4ports;
2244	} else {
2245		ports = mgr->v6ports;
2246		nports = mgr->nv6ports;
2247	}
2248	if (ports == NULL)
2249		goto unlock;
2250
2251	port = isc_sockaddr_getport(sockaddrp);
2252	if (bsearch(&port, ports, nports, sizeof(in_port_t), port_cmp) != NULL)
2253		available = ISC_TRUE;
2254
2255unlock:
2256	PORTBUFUNLOCK(mgr);
2257	return (available);
2258}
2259
2260#define ATTRMATCH(_a1, _a2, _mask) (((_a1) & (_mask)) == ((_a2) & (_mask)))
2261
2262static isc_boolean_t
2263local_addr_match(dns_dispatch_t *disp, isc_sockaddr_t *addr) {
2264	isc_sockaddr_t sockaddr;
2265	isc_result_t result;
2266
2267	REQUIRE(disp->socket != NULL);
2268
2269	if (addr == NULL)
2270		return (ISC_TRUE);
2271
2272	/*
2273	 * Don't match wildcard ports unless the port is available in the
2274	 * current configuration.
2275	 */
2276	if (isc_sockaddr_getport(addr) == 0 &&
2277	    isc_sockaddr_getport(&disp->local) == 0 &&
2278	    !portavailable(disp->mgr, disp->socket, NULL)) {
2279		return (ISC_FALSE);
2280	}
2281
2282	/*
2283	 * Check if we match the binding <address,port>.
2284	 * Wildcard ports match/fail here.
2285	 */
2286	if (isc_sockaddr_equal(&disp->local, addr))
2287		return (ISC_TRUE);
2288	if (isc_sockaddr_getport(addr) == 0)
2289		return (ISC_FALSE);
2290
2291	/*
2292	 * Check if we match a bound wildcard port <address,port>.
2293	 */
2294	if (!isc_sockaddr_eqaddr(&disp->local, addr))
2295		return (ISC_FALSE);
2296	result = isc_socket_getsockname(disp->socket, &sockaddr);
2297	if (result != ISC_R_SUCCESS)
2298		return (ISC_FALSE);
2299
2300	return (isc_sockaddr_equal(&sockaddr, addr));
2301}
2302
2303/*
2304 * Requires mgr be locked.
2305 *
2306 * No dispatcher can be locked by this thread when calling this function.
2307 *
2308 *
2309 * NOTE:
2310 *	If a matching dispatcher is found, it is locked after this function
2311 *	returns, and must be unlocked by the caller.
2312 */
2313static isc_result_t
2314dispatch_find(dns_dispatchmgr_t *mgr, isc_sockaddr_t *local,
2315	      unsigned int attributes, unsigned int mask,
2316	      dns_dispatch_t **dispp)
2317{
2318	dns_dispatch_t *disp;
2319	isc_result_t result;
2320
2321	/*
2322	 * Make certain that we will not match a private or exclusive dispatch.
2323	 */
2324	attributes &= ~(DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
2325	mask |= (DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
2326
2327	disp = ISC_LIST_HEAD(mgr->list);
2328	while (disp != NULL) {
2329		LOCK(&disp->lock);
2330		if ((disp->shutting_down == 0)
2331		    && ATTRMATCH(disp->attributes, attributes, mask)
2332		    && local_addr_match(disp, local))
2333			break;
2334		UNLOCK(&disp->lock);
2335		disp = ISC_LIST_NEXT(disp, link);
2336	}
2337
2338	if (disp == NULL) {
2339		result = ISC_R_NOTFOUND;
2340		goto out;
2341	}
2342
2343	*dispp = disp;
2344	result = ISC_R_SUCCESS;
2345 out:
2346
2347	return (result);
2348}
2349
2350static isc_result_t
2351qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
2352	     unsigned int increment, dns_qid_t **qidp,
2353	     isc_boolean_t needsocktable)
2354{
2355	dns_qid_t *qid;
2356	unsigned int i;
2357	isc_result_t result;
2358
2359	REQUIRE(VALID_DISPATCHMGR(mgr));
2360	REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
2361	REQUIRE(increment > buckets);
2362	REQUIRE(qidp != NULL && *qidp == NULL);
2363
2364	qid = isc_mem_get(mgr->mctx, sizeof(*qid));
2365	if (qid == NULL)
2366		return (ISC_R_NOMEMORY);
2367
2368	qid->qid_table = isc_mem_get(mgr->mctx,
2369				     buckets * sizeof(dns_displist_t));
2370	if (qid->qid_table == NULL) {
2371		isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2372		return (ISC_R_NOMEMORY);
2373	}
2374
2375	qid->sock_table = NULL;
2376	if (needsocktable) {
2377		qid->sock_table = isc_mem_get(mgr->mctx, buckets *
2378					      sizeof(dispsocketlist_t));
2379		if (qid->sock_table == NULL) {
2380			isc_mem_put(mgr->mctx, qid->qid_table,
2381				    buckets * sizeof(dns_displist_t));
2382			isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2383			return (ISC_R_NOMEMORY);
2384		}
2385	}
2386
2387	result = isc_mutex_init(&qid->lock);
2388	if (result != ISC_R_SUCCESS) {
2389		if (qid->sock_table != NULL) {
2390			isc_mem_put(mgr->mctx, qid->sock_table,
2391				    buckets * sizeof(dispsocketlist_t));
2392		}
2393		isc_mem_put(mgr->mctx, qid->qid_table,
2394			    buckets * sizeof(dns_displist_t));
2395		isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2396		return (result);
2397	}
2398
2399	for (i = 0; i < buckets; i++) {
2400		ISC_LIST_INIT(qid->qid_table[i]);
2401		if (qid->sock_table != NULL)
2402			ISC_LIST_INIT(qid->sock_table[i]);
2403	}
2404
2405	qid->qid_nbuckets = buckets;
2406	qid->qid_increment = increment;
2407	qid->magic = QID_MAGIC;
2408	*qidp = qid;
2409	return (ISC_R_SUCCESS);
2410}
2411
2412static void
2413qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp) {
2414	dns_qid_t *qid;
2415
2416	REQUIRE(qidp != NULL);
2417	qid = *qidp;
2418
2419	REQUIRE(VALID_QID(qid));
2420
2421	*qidp = NULL;
2422	qid->magic = 0;
2423	isc_mem_put(mctx, qid->qid_table,
2424		    qid->qid_nbuckets * sizeof(dns_displist_t));
2425	if (qid->sock_table != NULL) {
2426		isc_mem_put(mctx, qid->sock_table,
2427			    qid->qid_nbuckets * sizeof(dispsocketlist_t));
2428	}
2429	DESTROYLOCK(&qid->lock);
2430	isc_mem_put(mctx, qid, sizeof(*qid));
2431}
2432
2433/*
2434 * Allocate and set important limits.
2435 */
2436static isc_result_t
2437dispatch_allocate(dns_dispatchmgr_t *mgr, unsigned int maxrequests,
2438		  dns_dispatch_t **dispp)
2439{
2440	dns_dispatch_t *disp;
2441	isc_result_t result;
2442
2443	REQUIRE(VALID_DISPATCHMGR(mgr));
2444	REQUIRE(dispp != NULL && *dispp == NULL);
2445
2446	/*
2447	 * Set up the dispatcher, mostly.  Don't bother setting some of
2448	 * the options that are controlled by tcp vs. udp, etc.
2449	 */
2450
2451	disp = isc_mempool_get(mgr->dpool);
2452	if (disp == NULL)
2453		return (ISC_R_NOMEMORY);
2454
2455	disp->magic = 0;
2456	disp->mgr = mgr;
2457	disp->maxrequests = maxrequests;
2458	disp->attributes = 0;
2459	ISC_LINK_INIT(disp, link);
2460	disp->refcount = 1;
2461	disp->recv_pending = 0;
2462	memset(&disp->local, 0, sizeof(disp->local));
2463	disp->localport = 0;
2464	disp->shutting_down = 0;
2465	disp->shutdown_out = 0;
2466	disp->connected = 0;
2467	disp->tcpmsg_valid = 0;
2468	disp->shutdown_why = ISC_R_UNEXPECTED;
2469	disp->requests = 0;
2470	disp->tcpbuffers = 0;
2471	disp->qid = NULL;
2472	ISC_LIST_INIT(disp->activesockets);
2473	ISC_LIST_INIT(disp->inactivesockets);
2474	disp->nsockets = 0;
2475	dispatch_initrandom(&disp->arc4ctx, mgr->entropy, NULL);
2476	disp->port_table = NULL;
2477	disp->portpool = NULL;
2478
2479	result = isc_mutex_init(&disp->lock);
2480	if (result != ISC_R_SUCCESS)
2481		goto deallocate;
2482
2483	disp->failsafe_ev = allocate_event(disp);
2484	if (disp->failsafe_ev == NULL) {
2485		result = ISC_R_NOMEMORY;
2486		goto kill_lock;
2487	}
2488
2489	disp->magic = DISPATCH_MAGIC;
2490
2491	*dispp = disp;
2492	return (ISC_R_SUCCESS);
2493
2494	/*
2495	 * error returns
2496	 */
2497 kill_lock:
2498	DESTROYLOCK(&disp->lock);
2499 deallocate:
2500	isc_mempool_put(mgr->dpool, disp);
2501
2502	return (result);
2503}
2504
2505
2506/*
2507 * MUST be unlocked, and not used by anything.
2508 */
2509static void
2510dispatch_free(dns_dispatch_t **dispp)
2511{
2512	dns_dispatch_t *disp;
2513	dns_dispatchmgr_t *mgr;
2514	int i;
2515
2516	REQUIRE(VALID_DISPATCH(*dispp));
2517	disp = *dispp;
2518	*dispp = NULL;
2519
2520	mgr = disp->mgr;
2521	REQUIRE(VALID_DISPATCHMGR(mgr));
2522
2523	if (disp->tcpmsg_valid) {
2524		dns_tcpmsg_invalidate(&disp->tcpmsg);
2525		disp->tcpmsg_valid = 0;
2526	}
2527
2528	INSIST(disp->tcpbuffers == 0);
2529	INSIST(disp->requests == 0);
2530	INSIST(disp->recv_pending == 0);
2531	INSIST(ISC_LIST_EMPTY(disp->activesockets));
2532	INSIST(ISC_LIST_EMPTY(disp->inactivesockets));
2533
2534	isc_mempool_put(mgr->epool, disp->failsafe_ev);
2535	disp->failsafe_ev = NULL;
2536
2537	if (disp->qid != NULL)
2538		qid_destroy(mgr->mctx, &disp->qid);
2539
2540	if (disp->port_table != NULL) {
2541		for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++)
2542			INSIST(ISC_LIST_EMPTY(disp->port_table[i]));
2543		isc_mem_put(mgr->mctx, disp->port_table,
2544			    sizeof(disp->port_table[0]) *
2545			    DNS_DISPATCH_PORTTABLESIZE);
2546	}
2547
2548	if (disp->portpool != NULL)
2549		isc_mempool_destroy(&disp->portpool);
2550
2551	disp->mgr = NULL;
2552	DESTROYLOCK(&disp->lock);
2553	disp->magic = 0;
2554	isc_mempool_put(mgr->dpool, disp);
2555}
2556
2557isc_result_t
2558dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2559		       isc_taskmgr_t *taskmgr, unsigned int buffersize,
2560		       unsigned int maxbuffers, unsigned int maxrequests,
2561		       unsigned int buckets, unsigned int increment,
2562		       unsigned int attributes, dns_dispatch_t **dispp)
2563{
2564	isc_result_t result;
2565	dns_dispatch_t *disp;
2566
2567	UNUSED(maxbuffers);
2568	UNUSED(buffersize);
2569
2570	REQUIRE(VALID_DISPATCHMGR(mgr));
2571	REQUIRE(isc_socket_gettype(sock) == isc_sockettype_tcp);
2572	REQUIRE((attributes & DNS_DISPATCHATTR_TCP) != 0);
2573	REQUIRE((attributes & DNS_DISPATCHATTR_UDP) == 0);
2574
2575	attributes |= DNS_DISPATCHATTR_PRIVATE;  /* XXXMLG */
2576
2577	LOCK(&mgr->lock);
2578
2579	/*
2580	 * dispatch_allocate() checks mgr for us.
2581	 * qid_allocate() checks buckets and increment for us.
2582	 */
2583	disp = NULL;
2584	result = dispatch_allocate(mgr, maxrequests, &disp);
2585	if (result != ISC_R_SUCCESS) {
2586		UNLOCK(&mgr->lock);
2587		return (result);
2588	}
2589
2590	result = qid_allocate(mgr, buckets, increment, &disp->qid, ISC_FALSE);
2591	if (result != ISC_R_SUCCESS)
2592		goto deallocate_dispatch;
2593
2594	disp->socktype = isc_sockettype_tcp;
2595	disp->socket = NULL;
2596	isc_socket_attach(sock, &disp->socket);
2597
2598	disp->ntasks = 1;
2599	disp->task[0] = NULL;
2600	result = isc_task_create(taskmgr, 0, &disp->task[0]);
2601	if (result != ISC_R_SUCCESS)
2602		goto kill_socket;
2603
2604	disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
2605					    DNS_EVENT_DISPATCHCONTROL,
2606					    destroy_disp, disp,
2607					    sizeof(isc_event_t));
2608	if (disp->ctlevent == NULL) {
2609		result = ISC_R_NOMEMORY;
2610		goto kill_task;
2611	}
2612
2613	isc_task_setname(disp->task[0], "tcpdispatch", disp);
2614
2615	dns_tcpmsg_init(mgr->mctx, disp->socket, &disp->tcpmsg);
2616	disp->tcpmsg_valid = 1;
2617
2618	disp->attributes = attributes;
2619
2620	/*
2621	 * Append it to the dispatcher list.
2622	 */
2623	ISC_LIST_APPEND(mgr->list, disp, link);
2624	UNLOCK(&mgr->lock);
2625
2626	mgr_log(mgr, LVL(90), "created TCP dispatcher %p", disp);
2627	dispatch_log(disp, LVL(90), "created task %p", disp->task[0]);
2628
2629	*dispp = disp;
2630
2631	return (ISC_R_SUCCESS);
2632
2633	/*
2634	 * Error returns.
2635	 */
2636 kill_task:
2637	isc_task_detach(&disp->task[0]);
2638 kill_socket:
2639	isc_socket_detach(&disp->socket);
2640 deallocate_dispatch:
2641	dispatch_free(&disp);
2642
2643	UNLOCK(&mgr->lock);
2644
2645	return (result);
2646}
2647
2648isc_result_t
2649dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2650		    isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr,
2651		    unsigned int buffersize,
2652		    unsigned int maxbuffers, unsigned int maxrequests,
2653		    unsigned int buckets, unsigned int increment,
2654		    unsigned int attributes, unsigned int mask,
2655		    dns_dispatch_t **dispp)
2656{
2657	isc_result_t result;
2658	dns_dispatch_t *disp = NULL;
2659
2660	REQUIRE(VALID_DISPATCHMGR(mgr));
2661	REQUIRE(sockmgr != NULL);
2662	REQUIRE(localaddr != NULL);
2663	REQUIRE(taskmgr != NULL);
2664	REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
2665	REQUIRE(maxbuffers > 0);
2666	REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
2667	REQUIRE(increment > buckets);
2668	REQUIRE(dispp != NULL && *dispp == NULL);
2669	REQUIRE((attributes & DNS_DISPATCHATTR_TCP) == 0);
2670
2671	result = dns_dispatchmgr_setudp(mgr, buffersize, maxbuffers,
2672					maxrequests, buckets, increment);
2673	if (result != ISC_R_SUCCESS)
2674		return (result);
2675
2676	LOCK(&mgr->lock);
2677
2678	if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
2679		REQUIRE(isc_sockaddr_getport(localaddr) == 0);
2680		goto createudp;
2681	}
2682
2683	/*
2684	 * See if we have a dispatcher that matches.
2685	 */
2686	result = dispatch_find(mgr, localaddr, attributes, mask, &disp);
2687	if (result == ISC_R_SUCCESS) {
2688		disp->refcount++;
2689
2690		if (disp->maxrequests < maxrequests)
2691			disp->maxrequests = maxrequests;
2692
2693		if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0 &&
2694		    (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
2695		{
2696			disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
2697			if (disp->recv_pending != 0)
2698				isc_socket_cancel(disp->socket, disp->task[0],
2699						  ISC_SOCKCANCEL_RECV);
2700		}
2701
2702		UNLOCK(&disp->lock);
2703		UNLOCK(&mgr->lock);
2704
2705		*dispp = disp;
2706
2707		return (ISC_R_SUCCESS);
2708	}
2709
2710 createudp:
2711	/*
2712	 * Nope, create one.
2713	 */
2714	result = dispatch_createudp(mgr, sockmgr, taskmgr, localaddr,
2715				    maxrequests, attributes, &disp);
2716	if (result != ISC_R_SUCCESS) {
2717		UNLOCK(&mgr->lock);
2718		return (result);
2719	}
2720
2721	UNLOCK(&mgr->lock);
2722	*dispp = disp;
2723	return (ISC_R_SUCCESS);
2724}
2725
2726/*
2727 * mgr should be locked.
2728 */
2729
2730#ifndef DNS_DISPATCH_HELD
2731#define DNS_DISPATCH_HELD 20U
2732#endif
2733
2734static isc_result_t
2735get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp,
2736	      isc_socketmgr_t *sockmgr, isc_sockaddr_t *localaddr,
2737	      isc_socket_t **sockp)
2738{
2739	unsigned int i, j;
2740	isc_socket_t *held[DNS_DISPATCH_HELD];
2741	isc_sockaddr_t localaddr_bound;
2742	isc_socket_t *sock = NULL;
2743	isc_result_t result = ISC_R_SUCCESS;
2744	isc_boolean_t anyport;
2745
2746	INSIST(sockp != NULL && *sockp == NULL);
2747
2748	localaddr_bound = *localaddr;
2749	anyport = ISC_TF(isc_sockaddr_getport(localaddr) == 0);
2750
2751	if (anyport) {
2752		unsigned int nports;
2753		in_port_t *ports;
2754
2755		/*
2756		 * If no port is specified, we first try to pick up a random
2757		 * port by ourselves.
2758		 */
2759		if (isc_sockaddr_pf(localaddr) == AF_INET) {
2760			nports = disp->mgr->nv4ports;
2761			ports = disp->mgr->v4ports;
2762		} else {
2763			nports = disp->mgr->nv6ports;
2764			ports = disp->mgr->v6ports;
2765		}
2766		if (nports == 0)
2767			return (ISC_R_ADDRNOTAVAIL);
2768
2769		for (i = 0; i < 1024; i++) {
2770			in_port_t prt;
2771
2772			prt = ports[dispatch_uniformrandom(
2773					DISP_ARC4CTX(disp),
2774					nports)];
2775			isc_sockaddr_setport(&localaddr_bound, prt);
2776			result = open_socket(sockmgr, &localaddr_bound,
2777					     0, &sock);
2778			/*
2779			 * Continue if the port choosen is already in use
2780			 * or the OS has reserved it.
2781			 */
2782			if (result == ISC_R_NOPERM ||
2783			    result == ISC_R_ADDRINUSE)
2784				continue;
2785			disp->localport = prt;
2786			*sockp = sock;
2787			return (result);
2788		}
2789
2790		/*
2791		 * If this fails 1024 times, we then ask the kernel for
2792		 * choosing one.
2793		 */
2794	} else {
2795		/* Allow to reuse address for non-random ports. */
2796		result = open_socket(sockmgr, localaddr,
2797				     ISC_SOCKET_REUSEADDRESS, &sock);
2798
2799		if (result == ISC_R_SUCCESS)
2800			*sockp = sock;
2801
2802		return (result);
2803	}
2804
2805	memset(held, 0, sizeof(held));
2806	i = 0;
2807
2808	for (j = 0; j < 0xffffU; j++) {
2809		result = open_socket(sockmgr, localaddr, 0, &sock);
2810		if (result != ISC_R_SUCCESS)
2811			goto end;
2812		else if (portavailable(mgr, sock, NULL))
2813			break;
2814		if (held[i] != NULL)
2815			isc_socket_detach(&held[i]);
2816		held[i++] = sock;
2817		sock = NULL;
2818		if (i == DNS_DISPATCH_HELD)
2819			i = 0;
2820	}
2821	if (j == 0xffffU) {
2822		mgr_log(mgr, ISC_LOG_ERROR,
2823			"avoid-v%s-udp-ports: unable to allocate "
2824			"an available port",
2825			isc_sockaddr_pf(localaddr) == AF_INET ? "4" : "6");
2826		result = ISC_R_FAILURE;
2827		goto end;
2828	}
2829	*sockp = sock;
2830
2831end:
2832	for (i = 0; i < DNS_DISPATCH_HELD; i++) {
2833		if (held[i] != NULL)
2834			isc_socket_detach(&held[i]);
2835	}
2836
2837	return (result);
2838}
2839
2840static isc_result_t
2841dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2842		   isc_taskmgr_t *taskmgr,
2843		   isc_sockaddr_t *localaddr,
2844		   unsigned int maxrequests,
2845		   unsigned int attributes,
2846		   dns_dispatch_t **dispp)
2847{
2848	isc_result_t result;
2849	dns_dispatch_t *disp;
2850	isc_socket_t *sock = NULL;
2851	int i = 0;
2852
2853	/*
2854	 * dispatch_allocate() checks mgr for us.
2855	 */
2856	disp = NULL;
2857	result = dispatch_allocate(mgr, maxrequests, &disp);
2858	if (result != ISC_R_SUCCESS)
2859		return (result);
2860
2861	if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0) {
2862		result = get_udpsocket(mgr, disp, sockmgr, localaddr, &sock);
2863		if (result != ISC_R_SUCCESS)
2864			goto deallocate_dispatch;
2865	} else {
2866		isc_sockaddr_t sa_any;
2867
2868		/*
2869		 * For dispatches using exclusive sockets with a specific
2870		 * source address, we only check if the specified address is
2871		 * available on the system.  Query sockets will be created later
2872		 * on demand.
2873		 */
2874		isc_sockaddr_anyofpf(&sa_any, isc_sockaddr_pf(localaddr));
2875		if (!isc_sockaddr_eqaddr(&sa_any, localaddr)) {
2876			result = open_socket(sockmgr, localaddr, 0, &sock);
2877			if (sock != NULL)
2878				isc_socket_detach(&sock);
2879			if (result != ISC_R_SUCCESS)
2880				goto deallocate_dispatch;
2881		}
2882
2883		disp->port_table = isc_mem_get(mgr->mctx,
2884					       sizeof(disp->port_table[0]) *
2885					       DNS_DISPATCH_PORTTABLESIZE);
2886		if (disp->port_table == NULL)
2887			goto deallocate_dispatch;
2888		for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++)
2889			ISC_LIST_INIT(disp->port_table[i]);
2890
2891		result = isc_mempool_create(mgr->mctx, sizeof(dispportentry_t),
2892					    &disp->portpool);
2893		if (result != ISC_R_SUCCESS)
2894			goto deallocate_dispatch;
2895		isc_mempool_setname(disp->portpool, "disp_portpool");
2896		isc_mempool_setfreemax(disp->portpool, 128);
2897	}
2898	disp->socktype = isc_sockettype_udp;
2899	disp->socket = sock;
2900	disp->local = *localaddr;
2901
2902	if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
2903		disp->ntasks = MAX_INTERNAL_TASKS;
2904	else
2905		disp->ntasks = 1;
2906	for (i = 0; i < disp->ntasks; i++) {
2907		disp->task[i] = NULL;
2908		result = isc_task_create(taskmgr, 0, &disp->task[i]);
2909		if (result != ISC_R_SUCCESS) {
2910			while (--i >= 0) {
2911				isc_task_shutdown(disp->task[i]);
2912				isc_task_detach(&disp->task[i]);
2913			}
2914			goto kill_socket;
2915		}
2916		isc_task_setname(disp->task[i], "udpdispatch", disp);
2917	}
2918
2919	disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
2920					    DNS_EVENT_DISPATCHCONTROL,
2921					    destroy_disp, disp,
2922					    sizeof(isc_event_t));
2923	if (disp->ctlevent == NULL) {
2924		result = ISC_R_NOMEMORY;
2925		goto kill_task;
2926	}
2927
2928	attributes &= ~DNS_DISPATCHATTR_TCP;
2929	attributes |= DNS_DISPATCHATTR_UDP;
2930	disp->attributes = attributes;
2931
2932	/*
2933	 * Append it to the dispatcher list.
2934	 */
2935	ISC_LIST_APPEND(mgr->list, disp, link);
2936
2937	mgr_log(mgr, LVL(90), "created UDP dispatcher %p", disp);
2938	dispatch_log(disp, LVL(90), "created task %p", disp->task[0]); /* XXX */
2939	if (disp->socket != NULL)
2940		dispatch_log(disp, LVL(90), "created socket %p", disp->socket);
2941
2942	*dispp = disp;
2943	return (result);
2944
2945	/*
2946	 * Error returns.
2947	 */
2948 kill_task:
2949	for (i = 0; i < disp->ntasks; i++)
2950		isc_task_detach(&disp->task[i]);
2951 kill_socket:
2952	if (disp->socket != NULL)
2953		isc_socket_detach(&disp->socket);
2954 deallocate_dispatch:
2955	dispatch_free(&disp);
2956
2957	return (result);
2958}
2959
2960void
2961dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp) {
2962	REQUIRE(VALID_DISPATCH(disp));
2963	REQUIRE(dispp != NULL && *dispp == NULL);
2964
2965	LOCK(&disp->lock);
2966	disp->refcount++;
2967	UNLOCK(&disp->lock);
2968
2969	*dispp = disp;
2970}
2971
2972/*
2973 * It is important to lock the manager while we are deleting the dispatch,
2974 * since dns_dispatch_getudp will call dispatch_find, which returns to
2975 * the caller a dispatch but does not attach to it until later.  _getudp
2976 * locks the manager, however, so locking it here will keep us from attaching
2977 * to a dispatcher that is in the process of going away.
2978 */
2979void
2980dns_dispatch_detach(dns_dispatch_t **dispp) {
2981	dns_dispatch_t *disp;
2982	dispsocket_t *dispsock;
2983	isc_boolean_t killit;
2984
2985	REQUIRE(dispp != NULL && VALID_DISPATCH(*dispp));
2986
2987	disp = *dispp;
2988	*dispp = NULL;
2989
2990	LOCK(&disp->lock);
2991
2992	INSIST(disp->refcount > 0);
2993	disp->refcount--;
2994	if (disp->refcount == 0) {
2995		if (disp->recv_pending > 0)
2996			isc_socket_cancel(disp->socket, disp->task[0],
2997					  ISC_SOCKCANCEL_RECV);
2998		for (dispsock = ISC_LIST_HEAD(disp->activesockets);
2999		     dispsock != NULL;
3000		     dispsock = ISC_LIST_NEXT(dispsock, link)) {
3001			isc_socket_cancel(dispsock->socket, dispsock->task,
3002					  ISC_SOCKCANCEL_RECV);
3003		}
3004		disp->shutting_down = 1;
3005	}
3006
3007	dispatch_log(disp, LVL(90), "detach: refcount %d", disp->refcount);
3008
3009	killit = destroy_disp_ok(disp);
3010	UNLOCK(&disp->lock);
3011	if (killit)
3012		isc_task_send(disp->task[0], &disp->ctlevent);
3013}
3014
3015isc_result_t
3016dns_dispatch_addresponse2(dns_dispatch_t *disp, isc_sockaddr_t *dest,
3017			  isc_task_t *task, isc_taskaction_t action, void *arg,
3018			  dns_messageid_t *idp, dns_dispentry_t **resp,
3019			  isc_socketmgr_t *sockmgr)
3020{
3021	dns_dispentry_t *res;
3022	unsigned int bucket;
3023	in_port_t localport = 0;
3024	dns_messageid_t id;
3025	int i;
3026	isc_boolean_t ok;
3027	dns_qid_t *qid;
3028	dispsocket_t *dispsocket = NULL;
3029	isc_result_t result;
3030
3031	REQUIRE(VALID_DISPATCH(disp));
3032	REQUIRE(task != NULL);
3033	REQUIRE(dest != NULL);
3034	REQUIRE(resp != NULL && *resp == NULL);
3035	REQUIRE(idp != NULL);
3036	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3037		REQUIRE(sockmgr != NULL);
3038
3039	LOCK(&disp->lock);
3040
3041	if (disp->shutting_down == 1) {
3042		UNLOCK(&disp->lock);
3043		return (ISC_R_SHUTTINGDOWN);
3044	}
3045
3046	if (disp->requests >= disp->maxrequests) {
3047		UNLOCK(&disp->lock);
3048		return (ISC_R_QUOTA);
3049	}
3050
3051	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
3052	    disp->nsockets > DNS_DISPATCH_SOCKSQUOTA) {
3053		dispsocket_t *oldestsocket;
3054		dns_dispentry_t *oldestresp;
3055		dns_dispatchevent_t *rev;
3056
3057		/*
3058		 * Kill oldest outstanding query if the number of sockets
3059		 * exceeds the quota to keep the room for new queries.
3060		 */
3061		oldestsocket = ISC_LIST_HEAD(disp->activesockets);
3062		oldestresp = oldestsocket->resp;
3063		if (oldestresp != NULL && !oldestresp->item_out) {
3064			rev = allocate_event(oldestresp->disp);
3065			if (rev != NULL) {
3066				rev->buffer.base = NULL;
3067				rev->result = ISC_R_CANCELED;
3068				rev->id = oldestresp->id;
3069				ISC_EVENT_INIT(rev, sizeof(*rev), 0,
3070					       NULL, DNS_EVENT_DISPATCH,
3071					       oldestresp->action,
3072					       oldestresp->arg, oldestresp,
3073					       NULL, NULL);
3074				oldestresp->item_out = ISC_TRUE;
3075				isc_task_send(oldestresp->task,
3076					      ISC_EVENT_PTR(&rev));
3077				inc_stats(disp->mgr,
3078					  dns_resstatscounter_dispabort);
3079			}
3080		}
3081
3082		/*
3083		 * Move this entry to the tail so that it won't (easily) be
3084		 * examined before actually being canceled.
3085		 */
3086		ISC_LIST_UNLINK(disp->activesockets, oldestsocket, link);
3087		ISC_LIST_APPEND(disp->activesockets, oldestsocket, link);
3088	}
3089
3090	qid = DNS_QID(disp);
3091	LOCK(&qid->lock);
3092
3093	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
3094		/*
3095		 * Get a separate UDP socket with a random port number.
3096		 */
3097		result = get_dispsocket(disp, dest, sockmgr, qid, &dispsocket,
3098					&localport);
3099		if (result != ISC_R_SUCCESS) {
3100			UNLOCK(&qid->lock);
3101			UNLOCK(&disp->lock);
3102			inc_stats(disp->mgr, dns_resstatscounter_dispsockfail);
3103			return (result);
3104		}
3105	} else {
3106		localport = disp->localport;
3107	}
3108
3109	/*
3110	 * Try somewhat hard to find an unique ID.
3111	 */
3112	id = (dns_messageid_t)dispatch_random(DISP_ARC4CTX(disp));
3113	bucket = dns_hash(qid, dest, id, localport);
3114	ok = ISC_FALSE;
3115	for (i = 0; i < 64; i++) {
3116		if (entry_search(qid, dest, id, localport, bucket) == NULL) {
3117			ok = ISC_TRUE;
3118			break;
3119		}
3120		id += qid->qid_increment;
3121		id &= 0x0000ffff;
3122		bucket = dns_hash(qid, dest, id, localport);
3123	}
3124
3125	if (!ok) {
3126		UNLOCK(&qid->lock);
3127		UNLOCK(&disp->lock);
3128		return (ISC_R_NOMORE);
3129	}
3130
3131	res = isc_mempool_get(disp->mgr->rpool);
3132	if (res == NULL) {
3133		UNLOCK(&qid->lock);
3134		UNLOCK(&disp->lock);
3135		if (dispsocket != NULL)
3136			destroy_dispsocket(disp, &dispsocket);
3137		return (ISC_R_NOMEMORY);
3138	}
3139
3140	disp->refcount++;
3141	disp->requests++;
3142	res->task = NULL;
3143	isc_task_attach(task, &res->task);
3144	res->disp = disp;
3145	res->id = id;
3146	res->port = localport;
3147	res->bucket = bucket;
3148	res->host = *dest;
3149	res->action = action;
3150	res->arg = arg;
3151	res->dispsocket = dispsocket;
3152	if (dispsocket != NULL)
3153		dispsocket->resp = res;
3154	res->item_out = ISC_FALSE;
3155	ISC_LIST_INIT(res->items);
3156	ISC_LINK_INIT(res, link);
3157	res->magic = RESPONSE_MAGIC;
3158	ISC_LIST_APPEND(qid->qid_table[bucket], res, link);
3159	UNLOCK(&qid->lock);
3160
3161	request_log(disp, res, LVL(90),
3162		    "attached to task %p", res->task);
3163
3164	if (((disp->attributes & DNS_DISPATCHATTR_UDP) != 0) ||
3165	    ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) != 0)) {
3166		result = startrecv(disp, dispsocket);
3167		if (result != ISC_R_SUCCESS) {
3168			LOCK(&qid->lock);
3169			ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
3170			UNLOCK(&qid->lock);
3171
3172			if (dispsocket != NULL)
3173				destroy_dispsocket(disp, &dispsocket);
3174
3175			disp->refcount--;
3176			disp->requests--;
3177
3178			UNLOCK(&disp->lock);
3179			isc_task_detach(&res->task);
3180			isc_mempool_put(disp->mgr->rpool, res);
3181			return (result);
3182		}
3183	}
3184
3185	if (dispsocket != NULL)
3186		ISC_LIST_APPEND(disp->activesockets, dispsocket, link);
3187
3188	UNLOCK(&disp->lock);
3189
3190	*idp = id;
3191	*resp = res;
3192
3193	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3194		INSIST(res->dispsocket != NULL);
3195
3196	return (ISC_R_SUCCESS);
3197}
3198
3199isc_result_t
3200dns_dispatch_addresponse(dns_dispatch_t *disp, isc_sockaddr_t *dest,
3201			 isc_task_t *task, isc_taskaction_t action, void *arg,
3202			 dns_messageid_t *idp, dns_dispentry_t **resp)
3203{
3204	REQUIRE(VALID_DISPATCH(disp));
3205	REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
3206
3207	return (dns_dispatch_addresponse2(disp, dest, task, action, arg,
3208					  idp, resp, NULL));
3209}
3210
3211void
3212dns_dispatch_starttcp(dns_dispatch_t *disp) {
3213
3214	REQUIRE(VALID_DISPATCH(disp));
3215
3216	dispatch_log(disp, LVL(90), "starttcp %p", disp->task[0]);
3217
3218	LOCK(&disp->lock);
3219	disp->attributes |= DNS_DISPATCHATTR_CONNECTED;
3220	(void)startrecv(disp, NULL);
3221	UNLOCK(&disp->lock);
3222}
3223
3224void
3225dns_dispatch_removeresponse(dns_dispentry_t **resp,
3226			    dns_dispatchevent_t **sockevent)
3227{
3228	dns_dispatchmgr_t *mgr;
3229	dns_dispatch_t *disp;
3230	dns_dispentry_t *res;
3231	dispsocket_t *dispsock;
3232	dns_dispatchevent_t *ev;
3233	unsigned int bucket;
3234	isc_boolean_t killit;
3235	unsigned int n;
3236	isc_eventlist_t events;
3237	dns_qid_t *qid;
3238
3239	REQUIRE(resp != NULL);
3240	REQUIRE(VALID_RESPONSE(*resp));
3241
3242	res = *resp;
3243	*resp = NULL;
3244
3245	disp = res->disp;
3246	REQUIRE(VALID_DISPATCH(disp));
3247	mgr = disp->mgr;
3248	REQUIRE(VALID_DISPATCHMGR(mgr));
3249
3250	qid = DNS_QID(disp);
3251
3252	if (sockevent != NULL) {
3253		REQUIRE(*sockevent != NULL);
3254		ev = *sockevent;
3255		*sockevent = NULL;
3256	} else {
3257		ev = NULL;
3258	}
3259
3260	LOCK(&disp->lock);
3261
3262	INSIST(disp->requests > 0);
3263	disp->requests--;
3264	INSIST(disp->refcount > 0);
3265	disp->refcount--;
3266	if (disp->refcount == 0) {
3267		if (disp->recv_pending > 0)
3268			isc_socket_cancel(disp->socket, disp->task[0],
3269					  ISC_SOCKCANCEL_RECV);
3270		for (dispsock = ISC_LIST_HEAD(disp->activesockets);
3271		     dispsock != NULL;
3272		     dispsock = ISC_LIST_NEXT(dispsock, link)) {
3273			isc_socket_cancel(dispsock->socket, dispsock->task,
3274					  ISC_SOCKCANCEL_RECV);
3275		}
3276		disp->shutting_down = 1;
3277	}
3278
3279	bucket = res->bucket;
3280
3281	LOCK(&qid->lock);
3282	ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
3283	UNLOCK(&qid->lock);
3284
3285	if (ev == NULL && res->item_out) {
3286		/*
3287		 * We've posted our event, but the caller hasn't gotten it
3288		 * yet.  Take it back.
3289		 */
3290		ISC_LIST_INIT(events);
3291		n = isc_task_unsend(res->task, res, DNS_EVENT_DISPATCH,
3292				    NULL, &events);
3293		/*
3294		 * We had better have gotten it back.
3295		 */
3296		INSIST(n == 1);
3297		ev = (dns_dispatchevent_t *)ISC_LIST_HEAD(events);
3298	}
3299
3300	if (ev != NULL) {
3301		REQUIRE(res->item_out == ISC_TRUE);
3302		res->item_out = ISC_FALSE;
3303		if (ev->buffer.base != NULL)
3304			free_buffer(disp, ev->buffer.base, ev->buffer.length);
3305		free_event(disp, ev);
3306	}
3307
3308	request_log(disp, res, LVL(90), "detaching from task %p", res->task);
3309	isc_task_detach(&res->task);
3310
3311	if (res->dispsocket != NULL) {
3312		isc_socket_cancel(res->dispsocket->socket,
3313				  res->dispsocket->task, ISC_SOCKCANCEL_RECV);
3314		res->dispsocket->resp = NULL;
3315	}
3316
3317	/*
3318	 * Free any buffered requests as well
3319	 */
3320	ev = ISC_LIST_HEAD(res->items);
3321	while (ev != NULL) {
3322		ISC_LIST_UNLINK(res->items, ev, ev_link);
3323		if (ev->buffer.base != NULL)
3324			free_buffer(disp, ev->buffer.base, ev->buffer.length);
3325		free_event(disp, ev);
3326		ev = ISC_LIST_HEAD(res->items);
3327	}
3328	res->magic = 0;
3329	isc_mempool_put(disp->mgr->rpool, res);
3330	if (disp->shutting_down == 1)
3331		do_cancel(disp);
3332	else
3333		(void)startrecv(disp, NULL);
3334
3335	killit = destroy_disp_ok(disp);
3336	UNLOCK(&disp->lock);
3337	if (killit)
3338		isc_task_send(disp->task[0], &disp->ctlevent);
3339}
3340
3341static void
3342do_cancel(dns_dispatch_t *disp) {
3343	dns_dispatchevent_t *ev;
3344	dns_dispentry_t *resp;
3345	dns_qid_t *qid;
3346
3347	if (disp->shutdown_out == 1)
3348		return;
3349
3350	qid = DNS_QID(disp);
3351
3352	/*
3353	 * Search for the first response handler without packets outstanding
3354	 * unless a specific hander is given.
3355	 */
3356	LOCK(&qid->lock);
3357	for (resp = linear_first(qid);
3358	     resp != NULL && resp->item_out;
3359	     /* Empty. */)
3360		resp = linear_next(qid, resp);
3361
3362	/*
3363	 * No one to send the cancel event to, so nothing to do.
3364	 */
3365	if (resp == NULL)
3366		goto unlock;
3367
3368	/*
3369	 * Send the shutdown failsafe event to this resp.
3370	 */
3371	ev = disp->failsafe_ev;
3372	ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH,
3373		       resp->action, resp->arg, resp, NULL, NULL);
3374	ev->result = disp->shutdown_why;
3375	ev->buffer.base = NULL;
3376	ev->buffer.length = 0;
3377	disp->shutdown_out = 1;
3378	request_log(disp, resp, LVL(10),
3379		    "cancel: failsafe event %p -> task %p",
3380		    ev, resp->task);
3381	resp->item_out = ISC_TRUE;
3382	isc_task_send(resp->task, ISC_EVENT_PTR(&ev));
3383 unlock:
3384	UNLOCK(&qid->lock);
3385}
3386
3387isc_socket_t *
3388dns_dispatch_getsocket(dns_dispatch_t *disp) {
3389	REQUIRE(VALID_DISPATCH(disp));
3390
3391	return (disp->socket);
3392}
3393
3394isc_socket_t *
3395dns_dispatch_getentrysocket(dns_dispentry_t *resp) {
3396	REQUIRE(VALID_RESPONSE(resp));
3397
3398	if (resp->dispsocket != NULL)
3399		return (resp->dispsocket->socket);
3400	else
3401		return (NULL);
3402}
3403
3404isc_result_t
3405dns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) {
3406
3407	REQUIRE(VALID_DISPATCH(disp));
3408	REQUIRE(addrp != NULL);
3409
3410	if (disp->socktype == isc_sockettype_udp) {
3411		*addrp = disp->local;
3412		return (ISC_R_SUCCESS);
3413	}
3414	return (ISC_R_NOTIMPLEMENTED);
3415}
3416
3417void
3418dns_dispatch_cancel(dns_dispatch_t *disp) {
3419	REQUIRE(VALID_DISPATCH(disp));
3420
3421	LOCK(&disp->lock);
3422
3423	if (disp->shutting_down == 1) {
3424		UNLOCK(&disp->lock);
3425		return;
3426	}
3427
3428	disp->shutdown_why = ISC_R_CANCELED;
3429	disp->shutting_down = 1;
3430	do_cancel(disp);
3431
3432	UNLOCK(&disp->lock);
3433
3434	return;
3435}
3436
3437unsigned int
3438dns_dispatch_getattributes(dns_dispatch_t *disp) {
3439	REQUIRE(VALID_DISPATCH(disp));
3440
3441	/*
3442	 * We don't bother locking disp here; it's the caller's responsibility
3443	 * to use only non volatile flags.
3444	 */
3445	return (disp->attributes);
3446}
3447
3448void
3449dns_dispatch_changeattributes(dns_dispatch_t *disp,
3450			      unsigned int attributes, unsigned int mask)
3451{
3452	REQUIRE(VALID_DISPATCH(disp));
3453	/* Exclusive attribute can only be set on creation */
3454	REQUIRE((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
3455	/* Also, a dispatch with randomport specified cannot start listening */
3456	REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0 ||
3457		(attributes & DNS_DISPATCHATTR_NOLISTEN) == 0);
3458
3459	/* XXXMLG
3460	 * Should check for valid attributes here!
3461	 */
3462
3463	LOCK(&disp->lock);
3464
3465	if ((mask & DNS_DISPATCHATTR_NOLISTEN) != 0) {
3466		if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0 &&
3467		    (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) {
3468			disp->attributes &= ~DNS_DISPATCHATTR_NOLISTEN;
3469			(void)startrecv(disp, NULL);
3470		} else if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN)
3471			   == 0 &&
3472			   (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) {
3473			disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
3474			if (disp->recv_pending != 0)
3475				isc_socket_cancel(disp->socket, disp->task[0],
3476						  ISC_SOCKCANCEL_RECV);
3477		}
3478	}
3479
3480	disp->attributes &= ~mask;
3481	disp->attributes |= (attributes & mask);
3482	UNLOCK(&disp->lock);
3483}
3484
3485void
3486dns_dispatch_importrecv(dns_dispatch_t *disp, isc_event_t *event) {
3487	void *buf;
3488	isc_socketevent_t *sevent, *newsevent;
3489
3490	REQUIRE(VALID_DISPATCH(disp));
3491	REQUIRE((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0);
3492	REQUIRE(event != NULL);
3493
3494	sevent = (isc_socketevent_t *)event;
3495
3496	INSIST(sevent->n <= disp->mgr->buffersize);
3497	newsevent = (isc_socketevent_t *)
3498		    isc_event_allocate(disp->mgr->mctx, NULL,
3499				      DNS_EVENT_IMPORTRECVDONE, udp_shrecv,
3500				      disp, sizeof(isc_socketevent_t));
3501	if (newsevent == NULL)
3502		return;
3503
3504	buf = allocate_udp_buffer(disp);
3505	if (buf == NULL) {
3506		isc_event_free(ISC_EVENT_PTR(&newsevent));
3507		return;
3508	}
3509	memcpy(buf, sevent->region.base, sevent->n);
3510	newsevent->region.base = buf;
3511	newsevent->region.length = disp->mgr->buffersize;
3512	newsevent->n = sevent->n;
3513	newsevent->result = sevent->result;
3514	newsevent->address = sevent->address;
3515	newsevent->timestamp = sevent->timestamp;
3516	newsevent->pktinfo = sevent->pktinfo;
3517	newsevent->attributes = sevent->attributes;
3518
3519	isc_task_send(disp->task[0], ISC_EVENT_PTR(&newsevent));
3520}
3521
3522#if 0
3523void
3524dns_dispatchmgr_dump(dns_dispatchmgr_t *mgr) {
3525	dns_dispatch_t *disp;
3526	char foo[1024];
3527
3528	disp = ISC_LIST_HEAD(mgr->list);
3529	while (disp != NULL) {
3530		isc_sockaddr_format(&disp->local, foo, sizeof(foo));
3531		printf("\tdispatch %p, addr %s\n", disp, foo);
3532		disp = ISC_LIST_NEXT(disp, link);
3533	}
3534}
3535#endif
3536