dispatch.c revision 254402
1135446Strhodes/*
2254402Serwin * Copyright (C) 2004-2009, 2011-2013  Internet Systems Consortium, Inc. ("ISC")
3135446Strhodes * Copyright (C) 1999-2003  Internet Software Consortium.
4135446Strhodes *
5174187Sdougb * Permission to use, copy, modify, and/or distribute this software for any
6135446Strhodes * purpose with or without fee is hereby granted, provided that the above
7135446Strhodes * copyright notice and this permission notice appear in all copies.
8135446Strhodes *
9135446Strhodes * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10135446Strhodes * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11135446Strhodes * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12135446Strhodes * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13135446Strhodes * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14135446Strhodes * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15135446Strhodes * PERFORMANCE OF THIS SOFTWARE.
16135446Strhodes */
17135446Strhodes
18234010Sdougb/* $Id$ */
19135446Strhodes
20170222Sdougb/*! \file */
21170222Sdougb
22135446Strhodes#include <config.h>
23135446Strhodes
24135446Strhodes#include <stdlib.h>
25171577Sdougb#include <sys/types.h>
26171577Sdougb#include <unistd.h>
27186462Sdougb#include <stdlib.h>
28135446Strhodes
29135446Strhodes#include <isc/entropy.h>
30135446Strhodes#include <isc/mem.h>
31135446Strhodes#include <isc/mutex.h>
32186462Sdougb#include <isc/portset.h>
33135446Strhodes#include <isc/print.h>
34180477Sdougb#include <isc/random.h>
35193149Sdougb#include <isc/stats.h>
36135446Strhodes#include <isc/string.h>
37135446Strhodes#include <isc/task.h>
38171577Sdougb#include <isc/time.h>
39135446Strhodes#include <isc/util.h>
40135446Strhodes
41135446Strhodes#include <dns/acl.h>
42135446Strhodes#include <dns/dispatch.h>
43135446Strhodes#include <dns/events.h>
44135446Strhodes#include <dns/log.h>
45135446Strhodes#include <dns/message.h>
46135446Strhodes#include <dns/portlist.h>
47193149Sdougb#include <dns/stats.h>
48135446Strhodes#include <dns/tcpmsg.h>
49135446Strhodes#include <dns/types.h>
50135446Strhodes
51135446Strhodestypedef ISC_LIST(dns_dispentry_t)	dns_displist_t;
52135446Strhodes
53193149Sdougbtypedef struct dispsocket		dispsocket_t;
54186462Sdougbtypedef ISC_LIST(dispsocket_t)		dispsocketlist_t;
55135446Strhodes
56193149Sdougbtypedef struct dispportentry		dispportentry_t;
57193149Sdougbtypedef ISC_LIST(dispportentry_t)	dispportlist_t;
58193149Sdougb
59180477Sdougb/* ARC4 Random generator state */
60180477Sdougbtypedef struct arc4ctx {
61180477Sdougb	isc_uint8_t	i;
62180477Sdougb	isc_uint8_t	j;
63180477Sdougb	isc_uint8_t	s[256];
64180477Sdougb	int		count;
65186462Sdougb	isc_entropy_t	*entropy;	/*%< entropy source for ARC4 */
66186462Sdougb	isc_mutex_t	*lock;
67180477Sdougb} arc4ctx_t;
68180477Sdougb
69186462Sdougbtypedef struct dns_qid {
70186462Sdougb	unsigned int	magic;
71186462Sdougb	unsigned int	qid_nbuckets;	/*%< hash table size */
72186462Sdougb	unsigned int	qid_increment;	/*%< id increment on collision */
73186462Sdougb	isc_mutex_t	lock;
74186462Sdougb	dns_displist_t	*qid_table;	/*%< the table itself */
75186462Sdougb	dispsocketlist_t *sock_table;	/*%< socket table */
76186462Sdougb} dns_qid_t;
77186462Sdougb
78135446Strhodesstruct dns_dispatchmgr {
79135446Strhodes	/* Unlocked. */
80135446Strhodes	unsigned int			magic;
81135446Strhodes	isc_mem_t		       *mctx;
82135446Strhodes	dns_acl_t		       *blackhole;
83135446Strhodes	dns_portlist_t		       *portlist;
84193149Sdougb	isc_stats_t		       *stats;
85186462Sdougb	isc_entropy_t		       *entropy; /*%< entropy source */
86135446Strhodes
87135446Strhodes	/* Locked by "lock". */
88135446Strhodes	isc_mutex_t			lock;
89135446Strhodes	unsigned int			state;
90135446Strhodes	ISC_LIST(dns_dispatch_t)	list;
91135446Strhodes
92180477Sdougb	/* Locked by arc4_lock. */
93180477Sdougb	isc_mutex_t			arc4_lock;
94180477Sdougb	arc4ctx_t			arc4ctx;    /*%< ARC4 context for QID */
95180477Sdougb
96135446Strhodes	/* locked by buffer lock */
97135446Strhodes	dns_qid_t			*qid;
98135446Strhodes	isc_mutex_t			buffer_lock;
99170222Sdougb	unsigned int			buffers;    /*%< allocated buffers */
100170222Sdougb	unsigned int			buffersize; /*%< size of each buffer */
101170222Sdougb	unsigned int			maxbuffers; /*%< max buffers */
102135446Strhodes
103135446Strhodes	/* Locked internally. */
104135446Strhodes	isc_mutex_t			pool_lock;
105170222Sdougb	isc_mempool_t		       *epool;	/*%< memory pool for events */
106170222Sdougb	isc_mempool_t		       *rpool;	/*%< memory pool for replies */
107170222Sdougb	isc_mempool_t		       *dpool;  /*%< dispatch allocations */
108170222Sdougb	isc_mempool_t		       *bpool;	/*%< memory pool for buffers */
109186462Sdougb	isc_mempool_t		       *spool;	/*%< memory pool for dispsocs */
110135446Strhodes
111186462Sdougb	/*%
112186462Sdougb	 * Locked by qid->lock if qid exists; otherwise, can be used without
113186462Sdougb	 * being locked.
114186462Sdougb	 * Memory footprint considerations: this is a simple implementation of
115186462Sdougb	 * available ports, i.e., an ordered array of the actual port numbers.
116186462Sdougb	 * This will require about 256KB of memory in the worst case (128KB for
117186462Sdougb	 * each of IPv4 and IPv6).  We could reduce it by representing it as a
118186462Sdougb	 * more sophisticated way such as a list (or array) of ranges that are
119186462Sdougb	 * searched to identify a specific port.  Our decision here is the saved
120186462Sdougb	 * memory isn't worth the implementation complexity, considering the
121186462Sdougb	 * fact that the whole BIND9 process (which is mainly named) already
122186462Sdougb	 * requires a pretty large memory footprint.  We may, however, have to
123186462Sdougb	 * revisit the decision when we want to use it as a separate module for
124186462Sdougb	 * an environment where memory requirement is severer.
125186462Sdougb	 */
126186462Sdougb	in_port_t	*v4ports;	/*%< available ports for IPv4 */
127186462Sdougb	unsigned int	nv4ports;	/*%< # of available ports for IPv4 */
128186462Sdougb	in_port_t	*v6ports;	/*%< available ports for IPv4 */
129186462Sdougb	unsigned int	nv6ports;	/*%< # of available ports for IPv4 */
130135446Strhodes};
131135446Strhodes
132135446Strhodes#define MGR_SHUTTINGDOWN		0x00000001U
133135446Strhodes#define MGR_IS_SHUTTINGDOWN(l)	(((l)->state & MGR_SHUTTINGDOWN) != 0)
134135446Strhodes
135135446Strhodes#define IS_PRIVATE(d)	(((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0)
136135446Strhodes
137135446Strhodesstruct dns_dispentry {
138135446Strhodes	unsigned int			magic;
139135446Strhodes	dns_dispatch_t		       *disp;
140135446Strhodes	dns_messageid_t			id;
141180477Sdougb	in_port_t			port;
142135446Strhodes	unsigned int			bucket;
143135446Strhodes	isc_sockaddr_t			host;
144135446Strhodes	isc_task_t		       *task;
145135446Strhodes	isc_taskaction_t		action;
146135446Strhodes	void			       *arg;
147135446Strhodes	isc_boolean_t			item_out;
148186462Sdougb	dispsocket_t			*dispsocket;
149135446Strhodes	ISC_LIST(dns_dispatchevent_t)	items;
150135446Strhodes	ISC_LINK(dns_dispentry_t)	link;
151135446Strhodes};
152135446Strhodes
153186462Sdougb/*%
154186462Sdougb * Maximum number of dispatch sockets that can be pooled for reuse.  The
155186462Sdougb * appropriate value may vary, but experiments have shown a busy caching server
156186462Sdougb * may need more than 1000 sockets concurrently opened.  The maximum allowable
157186462Sdougb * number of dispatch sockets (per manager) will be set to the double of this
158186462Sdougb * value.
159186462Sdougb */
160186462Sdougb#ifndef DNS_DISPATCH_POOLSOCKS
161186462Sdougb#define DNS_DISPATCH_POOLSOCKS			2048
162186462Sdougb#endif
163186462Sdougb
164186462Sdougb/*%
165186462Sdougb * Quota to control the number of dispatch sockets.  If a dispatch has more
166186462Sdougb * than the quota of sockets, new queries will purge oldest ones, so that
167186462Sdougb * a massive number of outstanding queries won't prevent subsequent queries
168186462Sdougb * (especially if the older ones take longer time and result in timeout).
169186462Sdougb */
170186462Sdougb#ifndef DNS_DISPATCH_SOCKSQUOTA
171186462Sdougb#define DNS_DISPATCH_SOCKSQUOTA			3072
172186462Sdougb#endif
173186462Sdougb
174186462Sdougbstruct dispsocket {
175186462Sdougb	unsigned int			magic;
176186462Sdougb	isc_socket_t			*socket;
177186462Sdougb	dns_dispatch_t			*disp;
178186462Sdougb	isc_sockaddr_t			host;
179193149Sdougb	in_port_t			localport; /* XXX: should be removed later */
180193149Sdougb	dispportentry_t			*portentry;
181186462Sdougb	dns_dispentry_t			*resp;
182186462Sdougb	isc_task_t			*task;
183186462Sdougb	ISC_LINK(dispsocket_t)		link;
184186462Sdougb	unsigned int			bucket;
185186462Sdougb	ISC_LINK(dispsocket_t)		blink;
186186462Sdougb};
187186462Sdougb
188193149Sdougb/*%
189193149Sdougb * A port table entry.  We remember every port we first open in a table with a
190193149Sdougb * reference counter so that we can 'reuse' the same port (with different
191193149Sdougb * destination addresses) using the SO_REUSEADDR socket option.
192193149Sdougb */
193193149Sdougbstruct dispportentry {
194193149Sdougb	in_port_t			port;
195193149Sdougb	unsigned int			refs;
196193149Sdougb	ISC_LINK(struct dispportentry)	link;
197193149Sdougb};
198193149Sdougb
199193149Sdougb#ifndef DNS_DISPATCH_PORTTABLESIZE
200193149Sdougb#define DNS_DISPATCH_PORTTABLESIZE	1024
201193149Sdougb#endif
202193149Sdougb
203135446Strhodes#define INVALID_BUCKET		(0xffffdead)
204135446Strhodes
205186462Sdougb/*%
206186462Sdougb * Number of tasks for each dispatch that use separate sockets for different
207186462Sdougb * transactions.  This must be a power of 2 as it will divide 32 bit numbers
208186462Sdougb * to get an uniformly random tasks selection.  See get_dispsocket().
209186462Sdougb */
210186462Sdougb#define MAX_INTERNAL_TASKS	64
211186462Sdougb
212135446Strhodesstruct dns_dispatch {
213135446Strhodes	/* Unlocked. */
214170222Sdougb	unsigned int		magic;		/*%< magic */
215170222Sdougb	dns_dispatchmgr_t      *mgr;		/*%< dispatch manager */
216186462Sdougb	int			ntasks;
217186462Sdougb	/*%
218186462Sdougb	 * internal task buckets.  We use multiple tasks to distribute various
219186462Sdougb	 * socket events well when using separate dispatch sockets.  We use the
220186462Sdougb	 * 1st task (task[0]) for internal control events.
221186462Sdougb	 */
222186462Sdougb	isc_task_t	       *task[MAX_INTERNAL_TASKS];
223170222Sdougb	isc_socket_t	       *socket;		/*%< isc socket attached to */
224170222Sdougb	isc_sockaddr_t		local;		/*%< local address */
225180477Sdougb	in_port_t		localport;	/*%< local UDP port */
226170222Sdougb	unsigned int		maxrequests;	/*%< max requests */
227135446Strhodes	isc_event_t	       *ctlevent;
228135446Strhodes
229170222Sdougb	/*% Locked by mgr->lock. */
230135446Strhodes	ISC_LINK(dns_dispatch_t) link;
231135446Strhodes
232135446Strhodes	/* Locked by "lock". */
233170222Sdougb	isc_mutex_t		lock;		/*%< locks all below */
234135446Strhodes	isc_sockettype_t	socktype;
235135446Strhodes	unsigned int		attributes;
236170222Sdougb	unsigned int		refcount;	/*%< number of users */
237170222Sdougb	dns_dispatchevent_t    *failsafe_ev;	/*%< failsafe cancel event */
238135446Strhodes	unsigned int		shutting_down : 1,
239135446Strhodes				shutdown_out : 1,
240135446Strhodes				connected : 1,
241135446Strhodes				tcpmsg_valid : 1,
242170222Sdougb				recv_pending : 1; /*%< is a recv() pending? */
243135446Strhodes	isc_result_t		shutdown_why;
244186462Sdougb	ISC_LIST(dispsocket_t)	activesockets;
245186462Sdougb	ISC_LIST(dispsocket_t)	inactivesockets;
246186462Sdougb	unsigned int		nsockets;
247170222Sdougb	unsigned int		requests;	/*%< how many requests we have */
248170222Sdougb	unsigned int		tcpbuffers;	/*%< allocated buffers */
249170222Sdougb	dns_tcpmsg_t		tcpmsg;		/*%< for tcp streams */
250135446Strhodes	dns_qid_t		*qid;
251186462Sdougb	arc4ctx_t		arc4ctx;	/*%< for QID/UDP port num */
252193149Sdougb	dispportlist_t		*port_table;	/*%< hold ports 'owned' by us */
253193149Sdougb	isc_mempool_t		*portpool;	/*%< port table entries  */
254135446Strhodes};
255135446Strhodes
256135446Strhodes#define QID_MAGIC		ISC_MAGIC('Q', 'i', 'd', ' ')
257135446Strhodes#define VALID_QID(e)		ISC_MAGIC_VALID((e), QID_MAGIC)
258135446Strhodes
259135446Strhodes#define RESPONSE_MAGIC		ISC_MAGIC('D', 'r', 's', 'p')
260135446Strhodes#define VALID_RESPONSE(e)	ISC_MAGIC_VALID((e), RESPONSE_MAGIC)
261135446Strhodes
262186462Sdougb#define DISPSOCK_MAGIC		ISC_MAGIC('D', 's', 'o', 'c')
263186462Sdougb#define VALID_DISPSOCK(e)	ISC_MAGIC_VALID((e), DISPSOCK_MAGIC)
264186462Sdougb
265135446Strhodes#define DISPATCH_MAGIC		ISC_MAGIC('D', 'i', 's', 'p')
266135446Strhodes#define VALID_DISPATCH(e)	ISC_MAGIC_VALID((e), DISPATCH_MAGIC)
267135446Strhodes
268135446Strhodes#define DNS_DISPATCHMGR_MAGIC	ISC_MAGIC('D', 'M', 'g', 'r')
269135446Strhodes#define VALID_DISPATCHMGR(e)	ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC)
270135446Strhodes
271135446Strhodes#define DNS_QID(disp) ((disp)->socktype == isc_sockettype_tcp) ? \
272135446Strhodes		       (disp)->qid : (disp)->mgr->qid
273186462Sdougb#define DISP_ARC4CTX(disp) ((disp)->socktype == isc_sockettype_udp) ? \
274186462Sdougb			(&(disp)->arc4ctx) : (&(disp)->mgr->arc4ctx)
275186462Sdougb
276186462Sdougb/*%
277186462Sdougb * Locking a query port buffer is a bit tricky.  We access the buffer without
278186462Sdougb * locking until qid is created.  Technically, there is a possibility of race
279186462Sdougb * between the creation of qid and access to the port buffer; in practice,
280186462Sdougb * however, this should be safe because qid isn't created until the first
281186462Sdougb * dispatch is created and there should be no contending situation until then.
282186462Sdougb */
283186462Sdougb#define PORTBUFLOCK(mgr) if ((mgr)->qid != NULL) LOCK(&((mgr)->qid->lock))
284186462Sdougb#define PORTBUFUNLOCK(mgr) if ((mgr)->qid != NULL) UNLOCK((&(mgr)->qid->lock))
285186462Sdougb
286135446Strhodes/*
287135446Strhodes * Statics.
288135446Strhodes */
289186462Sdougbstatic dns_dispentry_t *entry_search(dns_qid_t *, isc_sockaddr_t *,
290186462Sdougb				     dns_messageid_t, in_port_t, unsigned int);
291135446Strhodesstatic isc_boolean_t destroy_disp_ok(dns_dispatch_t *);
292135446Strhodesstatic void destroy_disp(isc_task_t *task, isc_event_t *event);
293186462Sdougbstatic void destroy_dispsocket(dns_dispatch_t *, dispsocket_t **);
294186462Sdougbstatic void deactivate_dispsocket(dns_dispatch_t *, dispsocket_t *);
295186462Sdougbstatic void udp_exrecv(isc_task_t *, isc_event_t *);
296186462Sdougbstatic void udp_shrecv(isc_task_t *, isc_event_t *);
297186462Sdougbstatic void udp_recv(isc_event_t *, dns_dispatch_t *, dispsocket_t *);
298135446Strhodesstatic void tcp_recv(isc_task_t *, isc_event_t *);
299186462Sdougbstatic isc_result_t startrecv(dns_dispatch_t *, dispsocket_t *);
300180477Sdougbstatic isc_uint32_t dns_hash(dns_qid_t *, isc_sockaddr_t *, dns_messageid_t,
301180477Sdougb			     in_port_t);
302135446Strhodesstatic void free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len);
303135446Strhodesstatic void *allocate_udp_buffer(dns_dispatch_t *disp);
304135446Strhodesstatic inline void free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev);
305135446Strhodesstatic inline dns_dispatchevent_t *allocate_event(dns_dispatch_t *disp);
306135446Strhodesstatic void do_cancel(dns_dispatch_t *disp);
307135446Strhodesstatic dns_dispentry_t *linear_first(dns_qid_t *disp);
308135446Strhodesstatic dns_dispentry_t *linear_next(dns_qid_t *disp,
309135446Strhodes				    dns_dispentry_t *resp);
310135446Strhodesstatic void dispatch_free(dns_dispatch_t **dispp);
311186462Sdougbstatic isc_result_t get_udpsocket(dns_dispatchmgr_t *mgr,
312186462Sdougb				  dns_dispatch_t *disp,
313186462Sdougb				  isc_socketmgr_t *sockmgr,
314186462Sdougb				  isc_sockaddr_t *localaddr,
315186462Sdougb				  isc_socket_t **sockp);
316135446Strhodesstatic isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr,
317135446Strhodes				       isc_socketmgr_t *sockmgr,
318135446Strhodes				       isc_taskmgr_t *taskmgr,
319135446Strhodes				       isc_sockaddr_t *localaddr,
320135446Strhodes				       unsigned int maxrequests,
321135446Strhodes				       unsigned int attributes,
322135446Strhodes				       dns_dispatch_t **dispp);
323135446Strhodesstatic isc_boolean_t destroy_mgr_ok(dns_dispatchmgr_t *mgr);
324135446Strhodesstatic void destroy_mgr(dns_dispatchmgr_t **mgrp);
325135446Strhodesstatic isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
326186462Sdougb				 unsigned int increment, dns_qid_t **qidp,
327186462Sdougb				 isc_boolean_t needaddrtable);
328135446Strhodesstatic void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp);
329186462Sdougbstatic isc_result_t open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
330186462Sdougb				unsigned int options, isc_socket_t **sockp);
331186462Sdougbstatic isc_boolean_t portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
332186462Sdougb				   isc_sockaddr_t *sockaddrp);
333135446Strhodes
334135446Strhodes#define LVL(x) ISC_LOG_DEBUG(x)
335135446Strhodes
336135446Strhodesstatic void
337135446Strhodesmgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...)
338135446Strhodes     ISC_FORMAT_PRINTF(3, 4);
339135446Strhodes
340135446Strhodesstatic void
341135446Strhodesmgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) {
342135446Strhodes	char msgbuf[2048];
343135446Strhodes	va_list ap;
344135446Strhodes
345135446Strhodes	if (! isc_log_wouldlog(dns_lctx, level))
346135446Strhodes		return;
347135446Strhodes
348135446Strhodes	va_start(ap, fmt);
349135446Strhodes	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
350135446Strhodes	va_end(ap);
351135446Strhodes
352135446Strhodes	isc_log_write(dns_lctx,
353135446Strhodes		      DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
354135446Strhodes		      level, "dispatchmgr %p: %s", mgr, msgbuf);
355135446Strhodes}
356135446Strhodes
357193149Sdougbstatic inline void
358193149Sdougbinc_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) {
359193149Sdougb	if (mgr->stats != NULL)
360193149Sdougb		isc_stats_increment(mgr->stats, counter);
361193149Sdougb}
362193149Sdougb
363135446Strhodesstatic void
364135446Strhodesdispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...)
365135446Strhodes     ISC_FORMAT_PRINTF(3, 4);
366135446Strhodes
367135446Strhodesstatic void
368135446Strhodesdispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) {
369135446Strhodes	char msgbuf[2048];
370135446Strhodes	va_list ap;
371135446Strhodes
372135446Strhodes	if (! isc_log_wouldlog(dns_lctx, level))
373135446Strhodes		return;
374135446Strhodes
375135446Strhodes	va_start(ap, fmt);
376135446Strhodes	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
377135446Strhodes	va_end(ap);
378135446Strhodes
379135446Strhodes	isc_log_write(dns_lctx,
380135446Strhodes		      DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
381135446Strhodes		      level, "dispatch %p: %s", disp, msgbuf);
382135446Strhodes}
383135446Strhodes
384135446Strhodesstatic void
385135446Strhodesrequest_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
386135446Strhodes	    int level, const char *fmt, ...)
387135446Strhodes     ISC_FORMAT_PRINTF(4, 5);
388135446Strhodes
389135446Strhodesstatic void
390135446Strhodesrequest_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
391135446Strhodes	    int level, const char *fmt, ...)
392135446Strhodes{
393135446Strhodes	char msgbuf[2048];
394135446Strhodes	char peerbuf[256];
395135446Strhodes	va_list ap;
396135446Strhodes
397135446Strhodes	if (! isc_log_wouldlog(dns_lctx, level))
398135446Strhodes		return;
399135446Strhodes
400135446Strhodes	va_start(ap, fmt);
401135446Strhodes	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
402135446Strhodes	va_end(ap);
403135446Strhodes
404135446Strhodes	if (VALID_RESPONSE(resp)) {
405135446Strhodes		isc_sockaddr_format(&resp->host, peerbuf, sizeof(peerbuf));
406135446Strhodes		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
407135446Strhodes			      DNS_LOGMODULE_DISPATCH, level,
408135446Strhodes			      "dispatch %p response %p %s: %s", disp, resp,
409135446Strhodes			      peerbuf, msgbuf);
410135446Strhodes	} else {
411135446Strhodes		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
412135446Strhodes			      DNS_LOGMODULE_DISPATCH, level,
413135446Strhodes			      "dispatch %p req/resp %p: %s", disp, resp,
414135446Strhodes			      msgbuf);
415135446Strhodes	}
416135446Strhodes}
417135446Strhodes
418186462Sdougb/*%
419182645Sdougb * ARC4 random number generator derived from OpenBSD.
420224092Sdougb * Only dispatch_random() and dispatch_uniformrandom() are expected
421182645Sdougb * to be called from general dispatch routines; the rest of them are subroutines
422182645Sdougb * for these two.
423182645Sdougb *
424182645Sdougb * The original copyright follows:
425182645Sdougb * Copyright (c) 1996, David Mazieres <dm@uun.org>
426182645Sdougb * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
427182645Sdougb *
428182645Sdougb * Permission to use, copy, modify, and distribute this software for any
429182645Sdougb * purpose with or without fee is hereby granted, provided that the above
430182645Sdougb * copyright notice and this permission notice appear in all copies.
431182645Sdougb *
432182645Sdougb * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
433182645Sdougb * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
434182645Sdougb * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
435182645Sdougb * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
436182645Sdougb * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
437182645Sdougb * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
438182645Sdougb * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
439135446Strhodes */
440224092Sdougb#ifdef BIND9
441180477Sdougbstatic void
442224092Sdougbdispatch_initrandom(arc4ctx_t *actx, isc_entropy_t *entropy,
443224092Sdougb		    isc_mutex_t *lock)
444224092Sdougb{
445180477Sdougb	int n;
446180477Sdougb	for (n = 0; n < 256; n++)
447180477Sdougb		actx->s[n] = n;
448180477Sdougb	actx->i = 0;
449180477Sdougb	actx->j = 0;
450180477Sdougb	actx->count = 0;
451186462Sdougb	actx->entropy = entropy; /* don't have to attach */
452186462Sdougb	actx->lock = lock;
453180477Sdougb}
454135446Strhodes
455180477Sdougbstatic void
456180477Sdougbdispatch_arc4addrandom(arc4ctx_t *actx, unsigned char *dat, int datlen) {
457180477Sdougb	int n;
458180477Sdougb	isc_uint8_t si;
459135446Strhodes
460180477Sdougb	actx->i--;
461180477Sdougb	for (n = 0; n < 256; n++) {
462180477Sdougb		actx->i = (actx->i + 1);
463180477Sdougb		si = actx->s[actx->i];
464180477Sdougb		actx->j = (actx->j + si + dat[n % datlen]);
465180477Sdougb		actx->s[actx->i] = actx->s[actx->j];
466180477Sdougb		actx->s[actx->j] = si;
467180477Sdougb	}
468180477Sdougb	actx->j = actx->i;
469135446Strhodes}
470135446Strhodes
471180477Sdougbstatic inline isc_uint8_t
472180477Sdougbdispatch_arc4get8(arc4ctx_t *actx) {
473180477Sdougb	isc_uint8_t si, sj;
474180477Sdougb
475180477Sdougb	actx->i = (actx->i + 1);
476180477Sdougb	si = actx->s[actx->i];
477180477Sdougb	actx->j = (actx->j + si);
478180477Sdougb	sj = actx->s[actx->j];
479180477Sdougb	actx->s[actx->i] = sj;
480180477Sdougb	actx->s[actx->j] = si;
481180477Sdougb
482180477Sdougb	return (actx->s[(si + sj) & 0xff]);
483180477Sdougb}
484180477Sdougb
485180477Sdougbstatic inline isc_uint16_t
486180477Sdougbdispatch_arc4get16(arc4ctx_t *actx) {
487180477Sdougb	isc_uint16_t val;
488180477Sdougb
489180477Sdougb	val = dispatch_arc4get8(actx) << 8;
490180477Sdougb	val |= dispatch_arc4get8(actx);
491180477Sdougb
492180477Sdougb	return (val);
493180477Sdougb}
494180477Sdougb
495180477Sdougbstatic void
496186462Sdougbdispatch_arc4stir(arc4ctx_t *actx) {
497180477Sdougb	int i;
498180477Sdougb	union {
499180477Sdougb		unsigned char rnd[128];
500180477Sdougb		isc_uint32_t rnd32[32];
501180477Sdougb	} rnd;
502180477Sdougb	isc_result_t result;
503180477Sdougb
504186462Sdougb	if (actx->entropy != NULL) {
505180477Sdougb		/*
506180477Sdougb		 * We accept any quality of random data to avoid blocking.
507180477Sdougb		 */
508186462Sdougb		result = isc_entropy_getdata(actx->entropy, rnd.rnd,
509180477Sdougb					     sizeof(rnd), NULL, 0);
510180477Sdougb		RUNTIME_CHECK(result == ISC_R_SUCCESS);
511180477Sdougb	} else {
512180477Sdougb		for (i = 0; i < 32; i++)
513180477Sdougb			isc_random_get(&rnd.rnd32[i]);
514180477Sdougb	}
515186462Sdougb	dispatch_arc4addrandom(actx, rnd.rnd, sizeof(rnd.rnd));
516180477Sdougb
517180477Sdougb	/*
518180477Sdougb	 * Discard early keystream, as per recommendations in:
519180477Sdougb	 * http://www.wisdom.weizmann.ac.il/~itsik/RC4/Papers/Rc4_ksa.ps
520180477Sdougb	 */
521180477Sdougb	for (i = 0; i < 256; i++)
522186462Sdougb		(void)dispatch_arc4get8(actx);
523180477Sdougb
524180477Sdougb	/*
525180477Sdougb	 * Derived from OpenBSD's implementation.  The rationale is not clear,
526180477Sdougb	 * but should be conservative enough in safety, and reasonably large
527180477Sdougb	 * for efficiency.
528180477Sdougb	 */
529186462Sdougb	actx->count = 1600000;
530180477Sdougb}
531180477Sdougb
532180477Sdougbstatic isc_uint16_t
533224092Sdougbdispatch_random(arc4ctx_t *actx) {
534180477Sdougb	isc_uint16_t result;
535180477Sdougb
536186462Sdougb	if (actx->lock != NULL)
537186462Sdougb		LOCK(actx->lock);
538186462Sdougb
539186462Sdougb	actx->count -= sizeof(isc_uint16_t);
540186462Sdougb	if (actx->count <= 0)
541186462Sdougb		dispatch_arc4stir(actx);
542186462Sdougb	result = dispatch_arc4get16(actx);
543186462Sdougb
544186462Sdougb	if (actx->lock != NULL)
545186462Sdougb		UNLOCK(actx->lock);
546186462Sdougb
547180477Sdougb	return (result);
548180477Sdougb}
549224092Sdougb#else
550224092Sdougb/*
551224092Sdougb * For general purpose library, we don't have to be too strict about the
552224092Sdougb * quality of random values.  Performance doesn't matter much, either.
553224092Sdougb * So we simply use the isc_random module to keep the library as small as
554224092Sdougb * possible.
555224092Sdougb */
556180477Sdougb
557224092Sdougbstatic void
558224092Sdougbdispatch_initrandom(arc4ctx_t *actx, isc_entropy_t *entropy,
559224092Sdougb		    isc_mutex_t *lock)
560224092Sdougb{
561224092Sdougb	UNUSED(actx);
562224092Sdougb	UNUSED(entropy);
563224092Sdougb	UNUSED(lock);
564224092Sdougb
565224092Sdougb	return;
566224092Sdougb}
567224092Sdougb
568180477Sdougbstatic isc_uint16_t
569224092Sdougbdispatch_random(arc4ctx_t *actx) {
570224092Sdougb	isc_uint32_t r;
571224092Sdougb
572224092Sdougb	UNUSED(actx);
573224092Sdougb
574224092Sdougb	isc_random_get(&r);
575224092Sdougb	return (r & 0xffff);
576224092Sdougb}
577224092Sdougb#endif	/* BIND9 */
578224092Sdougb
579224092Sdougbstatic isc_uint16_t
580224092Sdougbdispatch_uniformrandom(arc4ctx_t *actx, isc_uint16_t upper_bound) {
581180477Sdougb	isc_uint16_t min, r;
582180477Sdougb
583180477Sdougb	if (upper_bound < 2)
584180477Sdougb		return (0);
585180477Sdougb
586180477Sdougb	/*
587180477Sdougb	 * Ensure the range of random numbers [min, 0xffff] be a multiple of
588180477Sdougb	 * upper_bound and contain at least a half of the 16 bit range.
589180477Sdougb	 */
590180477Sdougb
591180477Sdougb	if (upper_bound > 0x8000)
592180477Sdougb		min = 1 + ~upper_bound; /* 0x8000 - upper_bound */
593180477Sdougb	else
594180477Sdougb		min = (isc_uint16_t)(0x10000 % (isc_uint32_t)upper_bound);
595180477Sdougb
596180477Sdougb	/*
597180477Sdougb	 * This could theoretically loop forever but each retry has
598180477Sdougb	 * p > 0.5 (worst case, usually far better) of selecting a
599180477Sdougb	 * number inside the range we need, so it should rarely need
600180477Sdougb	 * to re-roll.
601180477Sdougb	 */
602180477Sdougb	for (;;) {
603224092Sdougb		r = dispatch_random(actx);
604180477Sdougb		if (r >= min)
605180477Sdougb			break;
606180477Sdougb	}
607180477Sdougb
608180477Sdougb	return (r % upper_bound);
609180477Sdougb}
610180477Sdougb
611135446Strhodes/*
612135446Strhodes * Return a hash of the destination and message id.
613135446Strhodes */
614135446Strhodesstatic isc_uint32_t
615180477Sdougbdns_hash(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
616180477Sdougb	 in_port_t port)
617180477Sdougb{
618135446Strhodes	unsigned int ret;
619135446Strhodes
620135446Strhodes	ret = isc_sockaddr_hash(dest, ISC_TRUE);
621180477Sdougb	ret ^= (id << 16) | port;
622135446Strhodes	ret %= qid->qid_nbuckets;
623135446Strhodes
624135446Strhodes	INSIST(ret < qid->qid_nbuckets);
625135446Strhodes
626135446Strhodes	return (ret);
627135446Strhodes}
628135446Strhodes
629135446Strhodes/*
630135446Strhodes * Find the first entry in 'qid'.  Returns NULL if there are no entries.
631135446Strhodes */
632135446Strhodesstatic dns_dispentry_t *
633135446Strhodeslinear_first(dns_qid_t *qid) {
634135446Strhodes	dns_dispentry_t *ret;
635135446Strhodes	unsigned int bucket;
636135446Strhodes
637135446Strhodes	bucket = 0;
638135446Strhodes
639135446Strhodes	while (bucket < qid->qid_nbuckets) {
640135446Strhodes		ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
641135446Strhodes		if (ret != NULL)
642135446Strhodes			return (ret);
643135446Strhodes		bucket++;
644135446Strhodes	}
645135446Strhodes
646135446Strhodes	return (NULL);
647135446Strhodes}
648135446Strhodes
649135446Strhodes/*
650135446Strhodes * Find the next entry after 'resp' in 'qid'.  Return NULL if there are
651135446Strhodes * no more entries.
652135446Strhodes */
653135446Strhodesstatic dns_dispentry_t *
654135446Strhodeslinear_next(dns_qid_t *qid, dns_dispentry_t *resp) {
655135446Strhodes	dns_dispentry_t *ret;
656135446Strhodes	unsigned int bucket;
657135446Strhodes
658135446Strhodes	ret = ISC_LIST_NEXT(resp, link);
659135446Strhodes	if (ret != NULL)
660135446Strhodes		return (ret);
661135446Strhodes
662135446Strhodes	bucket = resp->bucket;
663135446Strhodes	bucket++;
664135446Strhodes	while (bucket < qid->qid_nbuckets) {
665135446Strhodes		ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
666135446Strhodes		if (ret != NULL)
667135446Strhodes			return (ret);
668135446Strhodes		bucket++;
669135446Strhodes	}
670135446Strhodes
671135446Strhodes	return (NULL);
672135446Strhodes}
673135446Strhodes
674135446Strhodes/*
675135446Strhodes * The dispatch must be locked.
676135446Strhodes */
677135446Strhodesstatic isc_boolean_t
678135446Strhodesdestroy_disp_ok(dns_dispatch_t *disp)
679135446Strhodes{
680135446Strhodes	if (disp->refcount != 0)
681135446Strhodes		return (ISC_FALSE);
682135446Strhodes
683135446Strhodes	if (disp->recv_pending != 0)
684135446Strhodes		return (ISC_FALSE);
685135446Strhodes
686186462Sdougb	if (!ISC_LIST_EMPTY(disp->activesockets))
687186462Sdougb		return (ISC_FALSE);
688186462Sdougb
689135446Strhodes	if (disp->shutting_down == 0)
690135446Strhodes		return (ISC_FALSE);
691135446Strhodes
692135446Strhodes	return (ISC_TRUE);
693135446Strhodes}
694135446Strhodes
695135446Strhodes/*
696135446Strhodes * Called when refcount reaches 0 (and safe to destroy).
697135446Strhodes *
698135446Strhodes * The dispatcher must not be locked.
699135446Strhodes * The manager must be locked.
700135446Strhodes */
701135446Strhodesstatic void
702135446Strhodesdestroy_disp(isc_task_t *task, isc_event_t *event) {
703135446Strhodes	dns_dispatch_t *disp;
704135446Strhodes	dns_dispatchmgr_t *mgr;
705135446Strhodes	isc_boolean_t killmgr;
706186462Sdougb	dispsocket_t *dispsocket;
707186462Sdougb	int i;
708135446Strhodes
709135446Strhodes	INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL);
710135446Strhodes
711135446Strhodes	UNUSED(task);
712135446Strhodes
713135446Strhodes	disp = event->ev_arg;
714135446Strhodes	mgr = disp->mgr;
715135446Strhodes
716135446Strhodes	LOCK(&mgr->lock);
717135446Strhodes	ISC_LIST_UNLINK(mgr->list, disp, link);
718135446Strhodes
719135446Strhodes	dispatch_log(disp, LVL(90),
720135446Strhodes		     "shutting down; detaching from sock %p, task %p",
721186462Sdougb		     disp->socket, disp->task[0]); /* XXXX */
722135446Strhodes
723186462Sdougb	if (disp->socket != NULL)
724186462Sdougb		isc_socket_detach(&disp->socket);
725186462Sdougb	while ((dispsocket = ISC_LIST_HEAD(disp->inactivesockets)) != NULL) {
726186462Sdougb		ISC_LIST_UNLINK(disp->inactivesockets, dispsocket, link);
727186462Sdougb		destroy_dispsocket(disp, &dispsocket);
728186462Sdougb	}
729186462Sdougb	for (i = 0; i < disp->ntasks; i++)
730186462Sdougb		isc_task_detach(&disp->task[i]);
731135446Strhodes	isc_event_free(&event);
732135446Strhodes
733135446Strhodes	dispatch_free(&disp);
734135446Strhodes
735135446Strhodes	killmgr = destroy_mgr_ok(mgr);
736135446Strhodes	UNLOCK(&mgr->lock);
737135446Strhodes	if (killmgr)
738135446Strhodes		destroy_mgr(&mgr);
739135446Strhodes}
740135446Strhodes
741186462Sdougb/*%
742193149Sdougb * Manipulate port table per dispatch: find an entry for a given port number,
743193149Sdougb * create a new entry, and decrement a given entry with possible clean-up.
744193149Sdougb */
745193149Sdougbstatic dispportentry_t *
746193149Sdougbport_search(dns_dispatch_t *disp, in_port_t port) {
747193149Sdougb	dispportentry_t *portentry;
748193149Sdougb
749193149Sdougb	REQUIRE(disp->port_table != NULL);
750193149Sdougb
751193149Sdougb	portentry = ISC_LIST_HEAD(disp->port_table[port %
752193149Sdougb						   DNS_DISPATCH_PORTTABLESIZE]);
753193149Sdougb	while (portentry != NULL) {
754193149Sdougb		if (portentry->port == port)
755193149Sdougb			return (portentry);
756193149Sdougb		portentry = ISC_LIST_NEXT(portentry, link);
757193149Sdougb	}
758193149Sdougb
759193149Sdougb	return (NULL);
760193149Sdougb}
761193149Sdougb
762193149Sdougbstatic dispportentry_t *
763193149Sdougbnew_portentry(dns_dispatch_t *disp, in_port_t port) {
764193149Sdougb	dispportentry_t *portentry;
765193149Sdougb
766193149Sdougb	REQUIRE(disp->port_table != NULL);
767193149Sdougb
768193149Sdougb	portentry = isc_mempool_get(disp->portpool);
769193149Sdougb	if (portentry == NULL)
770193149Sdougb		return (portentry);
771193149Sdougb
772193149Sdougb	portentry->port = port;
773193149Sdougb	portentry->refs = 0;
774193149Sdougb	ISC_LINK_INIT(portentry, link);
775193149Sdougb	ISC_LIST_APPEND(disp->port_table[port % DNS_DISPATCH_PORTTABLESIZE],
776193149Sdougb			portentry, link);
777193149Sdougb
778193149Sdougb	return (portentry);
779193149Sdougb}
780193149Sdougb
781204619Sdougb/*%
782204619Sdougb * The caller must not hold the qid->lock.
783204619Sdougb */
784193149Sdougbstatic void
785193149Sdougbderef_portentry(dns_dispatch_t *disp, dispportentry_t **portentryp) {
786193149Sdougb	dispportentry_t *portentry = *portentryp;
787204619Sdougb	dns_qid_t *qid;
788193149Sdougb
789193149Sdougb	REQUIRE(disp->port_table != NULL);
790193149Sdougb	REQUIRE(portentry != NULL && portentry->refs > 0);
791193149Sdougb
792204619Sdougb	qid = DNS_QID(disp);
793204619Sdougb	LOCK(&qid->lock);
794193149Sdougb	portentry->refs--;
795193149Sdougb	if (portentry->refs == 0) {
796193149Sdougb		ISC_LIST_UNLINK(disp->port_table[portentry->port %
797193149Sdougb						 DNS_DISPATCH_PORTTABLESIZE],
798193149Sdougb				portentry, link);
799193149Sdougb		isc_mempool_put(disp->portpool, portentry);
800193149Sdougb	}
801193149Sdougb
802193149Sdougb	*portentryp = NULL;
803204619Sdougb	UNLOCK(&qid->lock);
804193149Sdougb}
805193149Sdougb
806193149Sdougb/*%
807186462Sdougb * Find a dispsocket for socket address 'dest', and port number 'port'.
808186462Sdougb * Return NULL if no such entry exists.
809186462Sdougb */
810186462Sdougbstatic dispsocket_t *
811186462Sdougbsocket_search(dns_qid_t *qid, isc_sockaddr_t *dest, in_port_t port,
812186462Sdougb	      unsigned int bucket)
813186462Sdougb{
814186462Sdougb	dispsocket_t *dispsock;
815135446Strhodes
816186462Sdougb	REQUIRE(bucket < qid->qid_nbuckets);
817186462Sdougb
818186462Sdougb	dispsock = ISC_LIST_HEAD(qid->sock_table[bucket]);
819186462Sdougb
820186462Sdougb	while (dispsock != NULL) {
821204619Sdougb		if (dispsock->portentry != NULL &&
822204619Sdougb		    dispsock->portentry->port == port &&
823204619Sdougb		    isc_sockaddr_equal(dest, &dispsock->host))
824186462Sdougb			return (dispsock);
825186462Sdougb		dispsock = ISC_LIST_NEXT(dispsock, blink);
826186462Sdougb	}
827186462Sdougb
828186462Sdougb	return (NULL);
829186462Sdougb}
830186462Sdougb
831186462Sdougb/*%
832186462Sdougb * Make a new socket for a single dispatch with a random port number.
833186462Sdougb * The caller must hold the disp->lock and qid->lock.
834186462Sdougb */
835186462Sdougbstatic isc_result_t
836186462Sdougbget_dispsocket(dns_dispatch_t *disp, isc_sockaddr_t *dest,
837186462Sdougb	       isc_socketmgr_t *sockmgr, dns_qid_t *qid,
838186462Sdougb	       dispsocket_t **dispsockp, in_port_t *portp)
839186462Sdougb{
840186462Sdougb	int i;
841186462Sdougb	isc_uint32_t r;
842186462Sdougb	dns_dispatchmgr_t *mgr = disp->mgr;
843186462Sdougb	isc_socket_t *sock = NULL;
844186462Sdougb	isc_result_t result = ISC_R_FAILURE;
845186462Sdougb	in_port_t port;
846186462Sdougb	isc_sockaddr_t localaddr;
847186462Sdougb	unsigned int bucket = 0;
848186462Sdougb	dispsocket_t *dispsock;
849186462Sdougb	unsigned int nports;
850186462Sdougb	in_port_t *ports;
851193149Sdougb	unsigned int bindoptions;
852193149Sdougb	dispportentry_t *portentry = NULL;
853186462Sdougb
854186462Sdougb	if (isc_sockaddr_pf(&disp->local) == AF_INET) {
855186462Sdougb		nports = disp->mgr->nv4ports;
856186462Sdougb		ports = disp->mgr->v4ports;
857186462Sdougb	} else {
858186462Sdougb		nports = disp->mgr->nv6ports;
859186462Sdougb		ports = disp->mgr->v6ports;
860186462Sdougb	}
861186462Sdougb	if (nports == 0)
862186462Sdougb		return (ISC_R_ADDRNOTAVAIL);
863186462Sdougb
864186462Sdougb	dispsock = ISC_LIST_HEAD(disp->inactivesockets);
865186462Sdougb	if (dispsock != NULL) {
866186462Sdougb		ISC_LIST_UNLINK(disp->inactivesockets, dispsock, link);
867186462Sdougb		sock = dispsock->socket;
868186462Sdougb		dispsock->socket = NULL;
869186462Sdougb	} else {
870186462Sdougb		dispsock = isc_mempool_get(mgr->spool);
871186462Sdougb		if (dispsock == NULL)
872186462Sdougb			return (ISC_R_NOMEMORY);
873186462Sdougb
874186462Sdougb		disp->nsockets++;
875186462Sdougb		dispsock->socket = NULL;
876186462Sdougb		dispsock->disp = disp;
877186462Sdougb		dispsock->resp = NULL;
878193149Sdougb		dispsock->portentry = NULL;
879186462Sdougb		isc_random_get(&r);
880186462Sdougb		dispsock->task = NULL;
881186462Sdougb		isc_task_attach(disp->task[r % disp->ntasks], &dispsock->task);
882186462Sdougb		ISC_LINK_INIT(dispsock, link);
883186462Sdougb		ISC_LINK_INIT(dispsock, blink);
884186462Sdougb		dispsock->magic = DISPSOCK_MAGIC;
885186462Sdougb	}
886186462Sdougb
887186462Sdougb	/*
888186462Sdougb	 * Pick up a random UDP port and open a new socket with it.  Avoid
889186462Sdougb	 * choosing ports that share the same destination because it will be
890186462Sdougb	 * very likely to fail in bind(2) or connect(2).
891186462Sdougb	 */
892186462Sdougb	localaddr = disp->local;
893186462Sdougb	for (i = 0; i < 64; i++) {
894224092Sdougb		port = ports[dispatch_uniformrandom(DISP_ARC4CTX(disp),
895186462Sdougb							nports)];
896186462Sdougb		isc_sockaddr_setport(&localaddr, port);
897186462Sdougb
898186462Sdougb		bucket = dns_hash(qid, dest, 0, port);
899186462Sdougb		if (socket_search(qid, dest, port, bucket) != NULL)
900186462Sdougb			continue;
901193149Sdougb		bindoptions = 0;
902193149Sdougb		portentry = port_search(disp, port);
903193149Sdougb		if (portentry != NULL)
904193149Sdougb			bindoptions |= ISC_SOCKET_REUSEADDRESS;
905193149Sdougb		result = open_socket(sockmgr, &localaddr, bindoptions, &sock);
906193149Sdougb		if (result == ISC_R_SUCCESS) {
907193149Sdougb			if (portentry == NULL) {
908193149Sdougb				portentry = new_portentry(disp, port);
909193149Sdougb				if (portentry == NULL) {
910193149Sdougb					result = ISC_R_NOMEMORY;
911193149Sdougb					break;
912193149Sdougb				}
913193149Sdougb			}
914193149Sdougb			portentry->refs++;
915186462Sdougb			break;
916225361Sdougb		} else if (result == ISC_R_NOPERM) {
917225361Sdougb			char buf[ISC_SOCKADDR_FORMATSIZE];
918225361Sdougb			isc_sockaddr_format(&localaddr, buf, sizeof(buf));
919225361Sdougb			dispatch_log(disp, ISC_LOG_WARNING,
920225361Sdougb				     "open_socket(%s) -> %s: continuing",
921225361Sdougb				     buf, isc_result_totext(result));
922193149Sdougb		} else if (result != ISC_R_ADDRINUSE)
923193149Sdougb			break;
924186462Sdougb	}
925186462Sdougb
926186462Sdougb	if (result == ISC_R_SUCCESS) {
927186462Sdougb		dispsock->socket = sock;
928186462Sdougb		dispsock->host = *dest;
929193149Sdougb		dispsock->portentry = portentry;
930186462Sdougb		dispsock->bucket = bucket;
931186462Sdougb		ISC_LIST_APPEND(qid->sock_table[bucket], dispsock, blink);
932186462Sdougb		*dispsockp = dispsock;
933186462Sdougb		*portp = port;
934186462Sdougb	} else {
935186462Sdougb		/*
936186462Sdougb		 * We could keep it in the inactive list, but since this should
937186462Sdougb		 * be an exceptional case and might be resource shortage, we'd
938186462Sdougb		 * rather destroy it.
939186462Sdougb		 */
940186462Sdougb		if (sock != NULL)
941186462Sdougb			isc_socket_detach(&sock);
942186462Sdougb		destroy_dispsocket(disp, &dispsock);
943186462Sdougb	}
944186462Sdougb
945186462Sdougb	return (result);
946186462Sdougb}
947186462Sdougb
948186462Sdougb/*%
949186462Sdougb * Destroy a dedicated dispatch socket.
950186462Sdougb */
951186462Sdougbstatic void
952186462Sdougbdestroy_dispsocket(dns_dispatch_t *disp, dispsocket_t **dispsockp) {
953186462Sdougb	dispsocket_t *dispsock;
954186462Sdougb	dns_qid_t *qid;
955186462Sdougb
956186462Sdougb	/*
957186462Sdougb	 * The dispatch must be locked.
958186462Sdougb	 */
959186462Sdougb
960186462Sdougb	REQUIRE(dispsockp != NULL && *dispsockp != NULL);
961186462Sdougb	dispsock = *dispsockp;
962186462Sdougb	REQUIRE(!ISC_LINK_LINKED(dispsock, link));
963186462Sdougb
964186462Sdougb	disp->nsockets--;
965186462Sdougb	dispsock->magic = 0;
966193149Sdougb	if (dispsock->portentry != NULL)
967193149Sdougb		deref_portentry(disp, &dispsock->portentry);
968186462Sdougb	if (dispsock->socket != NULL)
969186462Sdougb		isc_socket_detach(&dispsock->socket);
970186462Sdougb	if (ISC_LINK_LINKED(dispsock, blink)) {
971186462Sdougb		qid = DNS_QID(disp);
972186462Sdougb		LOCK(&qid->lock);
973186462Sdougb		ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
974186462Sdougb				blink);
975186462Sdougb		UNLOCK(&qid->lock);
976186462Sdougb	}
977186462Sdougb	if (dispsock->task != NULL)
978186462Sdougb		isc_task_detach(&dispsock->task);
979186462Sdougb	isc_mempool_put(disp->mgr->spool, dispsock);
980186462Sdougb
981186462Sdougb	*dispsockp = NULL;
982186462Sdougb}
983186462Sdougb
984186462Sdougb/*%
985186462Sdougb * Deactivate a dedicated dispatch socket.  Move it to the inactive list for
986186462Sdougb * future reuse unless the total number of sockets are exceeding the maximum.
987186462Sdougb */
988186462Sdougbstatic void
989186462Sdougbdeactivate_dispsocket(dns_dispatch_t *disp, dispsocket_t *dispsock) {
990186462Sdougb	isc_result_t result;
991186462Sdougb	dns_qid_t *qid;
992186462Sdougb
993186462Sdougb	/*
994186462Sdougb	 * The dispatch must be locked.
995186462Sdougb	 */
996186462Sdougb	ISC_LIST_UNLINK(disp->activesockets, dispsock, link);
997186462Sdougb	if (dispsock->resp != NULL) {
998186462Sdougb		INSIST(dispsock->resp->dispsocket == dispsock);
999186462Sdougb		dispsock->resp->dispsocket = NULL;
1000186462Sdougb	}
1001186462Sdougb
1002193149Sdougb	INSIST(dispsock->portentry != NULL);
1003193149Sdougb	deref_portentry(disp, &dispsock->portentry);
1004193149Sdougb
1005224092Sdougb#ifdef BIND9
1006186462Sdougb	if (disp->nsockets > DNS_DISPATCH_POOLSOCKS)
1007186462Sdougb		destroy_dispsocket(disp, &dispsock);
1008186462Sdougb	else {
1009186462Sdougb		result = isc_socket_close(dispsock->socket);
1010186462Sdougb
1011186462Sdougb		qid = DNS_QID(disp);
1012186462Sdougb		LOCK(&qid->lock);
1013186462Sdougb		ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
1014186462Sdougb				blink);
1015186462Sdougb		UNLOCK(&qid->lock);
1016186462Sdougb
1017186462Sdougb		if (result == ISC_R_SUCCESS)
1018186462Sdougb			ISC_LIST_APPEND(disp->inactivesockets, dispsock, link);
1019186462Sdougb		else {
1020186462Sdougb			/*
1021186462Sdougb			 * If the underlying system does not allow this
1022186462Sdougb			 * optimization, destroy this temporary structure (and
1023186462Sdougb			 * create a new one for a new transaction).
1024186462Sdougb			 */
1025186462Sdougb			INSIST(result == ISC_R_NOTIMPLEMENTED);
1026186462Sdougb			destroy_dispsocket(disp, &dispsock);
1027186462Sdougb		}
1028186462Sdougb	}
1029224092Sdougb#else
1030224092Sdougb	/* This kind of optimization isn't necessary for normal use */
1031224092Sdougb	UNUSED(qid);
1032224092Sdougb	UNUSED(result);
1033224092Sdougb
1034224092Sdougb	destroy_dispsocket(disp, &dispsock);
1035224092Sdougb#endif
1036186462Sdougb}
1037186462Sdougb
1038135446Strhodes/*
1039186462Sdougb * Find an entry for query ID 'id', socket address 'dest', and port number
1040186462Sdougb * 'port'.
1041135446Strhodes * Return NULL if no such entry exists.
1042135446Strhodes */
1043135446Strhodesstatic dns_dispentry_t *
1044186462Sdougbentry_search(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
1045186462Sdougb	     in_port_t port, unsigned int bucket)
1046135446Strhodes{
1047135446Strhodes	dns_dispentry_t *res;
1048135446Strhodes
1049135446Strhodes	REQUIRE(bucket < qid->qid_nbuckets);
1050135446Strhodes
1051135446Strhodes	res = ISC_LIST_HEAD(qid->qid_table[bucket]);
1052135446Strhodes
1053135446Strhodes	while (res != NULL) {
1054186462Sdougb		if (res->id == id && isc_sockaddr_equal(dest, &res->host) &&
1055180477Sdougb		    res->port == port) {
1056135446Strhodes			return (res);
1057180477Sdougb		}
1058135446Strhodes		res = ISC_LIST_NEXT(res, link);
1059135446Strhodes	}
1060135446Strhodes
1061135446Strhodes	return (NULL);
1062135446Strhodes}
1063135446Strhodes
1064135446Strhodesstatic void
1065135446Strhodesfree_buffer(dns_dispatch_t *disp, void *buf, unsigned int len) {
1066135446Strhodes	INSIST(buf != NULL && len != 0);
1067135446Strhodes
1068135446Strhodes
1069135446Strhodes	switch (disp->socktype) {
1070135446Strhodes	case isc_sockettype_tcp:
1071135446Strhodes		INSIST(disp->tcpbuffers > 0);
1072135446Strhodes		disp->tcpbuffers--;
1073135446Strhodes		isc_mem_put(disp->mgr->mctx, buf, len);
1074135446Strhodes		break;
1075135446Strhodes	case isc_sockettype_udp:
1076135446Strhodes		LOCK(&disp->mgr->buffer_lock);
1077135446Strhodes		INSIST(disp->mgr->buffers > 0);
1078135446Strhodes		INSIST(len == disp->mgr->buffersize);
1079135446Strhodes		disp->mgr->buffers--;
1080135446Strhodes		isc_mempool_put(disp->mgr->bpool, buf);
1081135446Strhodes		UNLOCK(&disp->mgr->buffer_lock);
1082135446Strhodes		break;
1083135446Strhodes	default:
1084135446Strhodes		INSIST(0);
1085135446Strhodes		break;
1086135446Strhodes	}
1087135446Strhodes}
1088135446Strhodes
1089135446Strhodesstatic void *
1090135446Strhodesallocate_udp_buffer(dns_dispatch_t *disp) {
1091135446Strhodes	void *temp;
1092135446Strhodes
1093135446Strhodes	LOCK(&disp->mgr->buffer_lock);
1094135446Strhodes	temp = isc_mempool_get(disp->mgr->bpool);
1095135446Strhodes
1096135446Strhodes	if (temp != NULL)
1097135446Strhodes		disp->mgr->buffers++;
1098135446Strhodes	UNLOCK(&disp->mgr->buffer_lock);
1099135446Strhodes
1100135446Strhodes	return (temp);
1101135446Strhodes}
1102135446Strhodes
1103135446Strhodesstatic inline void
1104135446Strhodesfree_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev) {
1105135446Strhodes	if (disp->failsafe_ev == ev) {
1106135446Strhodes		INSIST(disp->shutdown_out == 1);
1107135446Strhodes		disp->shutdown_out = 0;
1108135446Strhodes
1109135446Strhodes		return;
1110135446Strhodes	}
1111135446Strhodes
1112135446Strhodes	isc_mempool_put(disp->mgr->epool, ev);
1113135446Strhodes}
1114135446Strhodes
1115135446Strhodesstatic inline dns_dispatchevent_t *
1116135446Strhodesallocate_event(dns_dispatch_t *disp) {
1117135446Strhodes	dns_dispatchevent_t *ev;
1118135446Strhodes
1119135446Strhodes	ev = isc_mempool_get(disp->mgr->epool);
1120135446Strhodes	if (ev == NULL)
1121135446Strhodes		return (NULL);
1122135446Strhodes	ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, 0,
1123135446Strhodes		       NULL, NULL, NULL, NULL, NULL);
1124135446Strhodes
1125135446Strhodes	return (ev);
1126135446Strhodes}
1127135446Strhodes
1128186462Sdougbstatic void
1129186462Sdougbudp_exrecv(isc_task_t *task, isc_event_t *ev) {
1130186462Sdougb	dispsocket_t *dispsock = ev->ev_arg;
1131186462Sdougb
1132186462Sdougb	UNUSED(task);
1133186462Sdougb
1134186462Sdougb	REQUIRE(VALID_DISPSOCK(dispsock));
1135186462Sdougb	udp_recv(ev, dispsock->disp, dispsock);
1136186462Sdougb}
1137186462Sdougb
1138186462Sdougbstatic void
1139186462Sdougbudp_shrecv(isc_task_t *task, isc_event_t *ev) {
1140186462Sdougb	dns_dispatch_t *disp = ev->ev_arg;
1141186462Sdougb
1142186462Sdougb	UNUSED(task);
1143186462Sdougb
1144186462Sdougb	REQUIRE(VALID_DISPATCH(disp));
1145186462Sdougb	udp_recv(ev, disp, NULL);
1146186462Sdougb}
1147186462Sdougb
1148135446Strhodes/*
1149135446Strhodes * General flow:
1150135446Strhodes *
1151135446Strhodes * If I/O result == CANCELED or error, free the buffer.
1152135446Strhodes *
1153135446Strhodes * If query, free the buffer, restart.
1154135446Strhodes *
1155135446Strhodes * If response:
1156135446Strhodes *	Allocate event, fill in details.
1157135446Strhodes *		If cannot allocate, free buffer, restart.
1158135446Strhodes *	find target.  If not found, free buffer, restart.
1159135446Strhodes *	if event queue is not empty, queue.  else, send.
1160135446Strhodes *	restart.
1161135446Strhodes */
1162135446Strhodesstatic void
1163186462Sdougbudp_recv(isc_event_t *ev_in, dns_dispatch_t *disp, dispsocket_t *dispsock) {
1164135446Strhodes	isc_socketevent_t *ev = (isc_socketevent_t *)ev_in;
1165135446Strhodes	dns_messageid_t id;
1166135446Strhodes	isc_result_t dres;
1167135446Strhodes	isc_buffer_t source;
1168135446Strhodes	unsigned int flags;
1169186462Sdougb	dns_dispentry_t *resp = NULL;
1170135446Strhodes	dns_dispatchevent_t *rev;
1171135446Strhodes	unsigned int bucket;
1172135446Strhodes	isc_boolean_t killit;
1173135446Strhodes	isc_boolean_t queue_response;
1174135446Strhodes	dns_dispatchmgr_t *mgr;
1175135446Strhodes	dns_qid_t *qid;
1176135446Strhodes	isc_netaddr_t netaddr;
1177135446Strhodes	int match;
1178186462Sdougb	int result;
1179186462Sdougb	isc_boolean_t qidlocked = ISC_FALSE;
1180135446Strhodes
1181135446Strhodes	LOCK(&disp->lock);
1182135446Strhodes
1183135446Strhodes	mgr = disp->mgr;
1184135446Strhodes	qid = mgr->qid;
1185135446Strhodes
1186135446Strhodes	dispatch_log(disp, LVL(90),
1187135446Strhodes		     "got packet: requests %d, buffers %d, recvs %d",
1188135446Strhodes		     disp->requests, disp->mgr->buffers, disp->recv_pending);
1189135446Strhodes
1190186462Sdougb	if (dispsock == NULL && ev->ev_type == ISC_SOCKEVENT_RECVDONE) {
1191135446Strhodes		/*
1192135446Strhodes		 * Unless the receive event was imported from a listening
1193135446Strhodes		 * interface, in which case the event type is
1194135446Strhodes		 * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending.
1195135446Strhodes		 */
1196135446Strhodes		INSIST(disp->recv_pending != 0);
1197135446Strhodes		disp->recv_pending = 0;
1198135446Strhodes	}
1199135446Strhodes
1200186462Sdougb	if (dispsock != NULL &&
1201186462Sdougb	    (ev->result == ISC_R_CANCELED || dispsock->resp == NULL)) {
1202186462Sdougb		/*
1203186462Sdougb		 * dispsock->resp can be NULL if this transaction was canceled
1204186462Sdougb		 * just after receiving a response.  Since this socket is
1205186462Sdougb		 * exclusively used and there should be at most one receive
1206186462Sdougb		 * event the canceled event should have been no effect.  So
1207186462Sdougb		 * we can (and should) deactivate the socket right now.
1208186462Sdougb		 */
1209186462Sdougb		deactivate_dispsocket(disp, dispsock);
1210186462Sdougb		dispsock = NULL;
1211186462Sdougb	}
1212186462Sdougb
1213135446Strhodes	if (disp->shutting_down) {
1214135446Strhodes		/*
1215135446Strhodes		 * This dispatcher is shutting down.
1216135446Strhodes		 */
1217135446Strhodes		free_buffer(disp, ev->region.base, ev->region.length);
1218135446Strhodes
1219135446Strhodes		isc_event_free(&ev_in);
1220135446Strhodes		ev = NULL;
1221135446Strhodes
1222135446Strhodes		killit = destroy_disp_ok(disp);
1223135446Strhodes		UNLOCK(&disp->lock);
1224135446Strhodes		if (killit)
1225186462Sdougb			isc_task_send(disp->task[0], &disp->ctlevent);
1226135446Strhodes
1227135446Strhodes		return;
1228135446Strhodes	}
1229135446Strhodes
1230186462Sdougb	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
1231186462Sdougb		if (dispsock != NULL) {
1232186462Sdougb			resp = dispsock->resp;
1233186462Sdougb			id = resp->id;
1234186462Sdougb			if (ev->result != ISC_R_SUCCESS) {
1235186462Sdougb				/*
1236186462Sdougb				 * This is most likely a network error on a
1237186462Sdougb				 * connected socket.  It makes no sense to
1238186462Sdougb				 * check the address or parse the packet, but it
1239186462Sdougb				 * will help to return the error to the caller.
1240186462Sdougb				 */
1241186462Sdougb				goto sendresponse;
1242186462Sdougb			}
1243186462Sdougb		} else {
1244186462Sdougb			free_buffer(disp, ev->region.base, ev->region.length);
1245186462Sdougb
1246186462Sdougb			UNLOCK(&disp->lock);
1247186462Sdougb			isc_event_free(&ev_in);
1248186462Sdougb			return;
1249186462Sdougb		}
1250186462Sdougb	} else if (ev->result != ISC_R_SUCCESS) {
1251135446Strhodes		free_buffer(disp, ev->region.base, ev->region.length);
1252135446Strhodes
1253135446Strhodes		if (ev->result != ISC_R_CANCELED)
1254135446Strhodes			dispatch_log(disp, ISC_LOG_ERROR,
1255135446Strhodes				     "odd socket result in udp_recv(): %s",
1256135446Strhodes				     isc_result_totext(ev->result));
1257135446Strhodes
1258135446Strhodes		UNLOCK(&disp->lock);
1259135446Strhodes		isc_event_free(&ev_in);
1260135446Strhodes		return;
1261135446Strhodes	}
1262135446Strhodes
1263135446Strhodes	/*
1264135446Strhodes	 * If this is from a blackholed address, drop it.
1265135446Strhodes	 */
1266135446Strhodes	isc_netaddr_fromsockaddr(&netaddr, &ev->address);
1267135446Strhodes	if (disp->mgr->blackhole != NULL &&
1268135446Strhodes	    dns_acl_match(&netaddr, NULL, disp->mgr->blackhole,
1269186462Sdougb			  NULL, &match, NULL) == ISC_R_SUCCESS &&
1270135446Strhodes	    match > 0)
1271135446Strhodes	{
1272135446Strhodes		if (isc_log_wouldlog(dns_lctx, LVL(10))) {
1273135446Strhodes			char netaddrstr[ISC_NETADDR_FORMATSIZE];
1274135446Strhodes			isc_netaddr_format(&netaddr, netaddrstr,
1275135446Strhodes					   sizeof(netaddrstr));
1276135446Strhodes			dispatch_log(disp, LVL(10),
1277135446Strhodes				     "blackholed packet from %s",
1278135446Strhodes				     netaddrstr);
1279135446Strhodes		}
1280135446Strhodes		free_buffer(disp, ev->region.base, ev->region.length);
1281135446Strhodes		goto restart;
1282135446Strhodes	}
1283135446Strhodes
1284135446Strhodes	/*
1285135446Strhodes	 * Peek into the buffer to see what we can see.
1286135446Strhodes	 */
1287135446Strhodes	isc_buffer_init(&source, ev->region.base, ev->region.length);
1288135446Strhodes	isc_buffer_add(&source, ev->n);
1289135446Strhodes	dres = dns_message_peekheader(&source, &id, &flags);
1290135446Strhodes	if (dres != ISC_R_SUCCESS) {
1291135446Strhodes		free_buffer(disp, ev->region.base, ev->region.length);
1292135446Strhodes		dispatch_log(disp, LVL(10), "got garbage packet");
1293135446Strhodes		goto restart;
1294135446Strhodes	}
1295135446Strhodes
1296135446Strhodes	dispatch_log(disp, LVL(92),
1297135446Strhodes		     "got valid DNS message header, /QR %c, id %u",
1298135446Strhodes		     ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
1299135446Strhodes
1300135446Strhodes	/*
1301135446Strhodes	 * Look at flags.  If query, drop it. If response,
1302135446Strhodes	 * look to see where it goes.
1303135446Strhodes	 */
1304135446Strhodes	if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1305135446Strhodes		/* query */
1306135446Strhodes		free_buffer(disp, ev->region.base, ev->region.length);
1307135446Strhodes		goto restart;
1308135446Strhodes	}
1309135446Strhodes
1310186462Sdougb	/*
1311186462Sdougb	 * Search for the corresponding response.  If we are using an exclusive
1312186462Sdougb	 * socket, we've already identified it and we can skip the search; but
1313186462Sdougb	 * the ID and the address must match the expected ones.
1314186462Sdougb	 */
1315186462Sdougb	if (resp == NULL) {
1316186462Sdougb		bucket = dns_hash(qid, &ev->address, id, disp->localport);
1317186462Sdougb		LOCK(&qid->lock);
1318186462Sdougb		qidlocked = ISC_TRUE;
1319186462Sdougb		resp = entry_search(qid, &ev->address, id, disp->localport,
1320186462Sdougb				    bucket);
1321186462Sdougb		dispatch_log(disp, LVL(90),
1322186462Sdougb			     "search for response in bucket %d: %s",
1323186462Sdougb			     bucket, (resp == NULL ? "not found" : "found"));
1324135446Strhodes
1325186462Sdougb		if (resp == NULL) {
1326193149Sdougb			inc_stats(mgr, dns_resstatscounter_mismatch);
1327186462Sdougb			free_buffer(disp, ev->region.base, ev->region.length);
1328186462Sdougb			goto unlock;
1329186462Sdougb		}
1330186462Sdougb	} else if (resp->id != id || !isc_sockaddr_equal(&ev->address,
1331186462Sdougb							 &resp->host)) {
1332186462Sdougb		dispatch_log(disp, LVL(90),
1333186462Sdougb			     "response to an exclusive socket doesn't match");
1334193149Sdougb		inc_stats(mgr, dns_resstatscounter_mismatch);
1335135446Strhodes		free_buffer(disp, ev->region.base, ev->region.length);
1336135446Strhodes		goto unlock;
1337186462Sdougb	}
1338165071Sdougb
1339165071Sdougb	/*
1340165071Sdougb	 * Now that we have the original dispatch the query was sent
1341165071Sdougb	 * from check that the address and port the response was
1342165071Sdougb	 * sent to make sense.
1343165071Sdougb	 */
1344165071Sdougb	if (disp != resp->disp) {
1345165071Sdougb		isc_sockaddr_t a1;
1346165071Sdougb		isc_sockaddr_t a2;
1347186462Sdougb
1348165071Sdougb		/*
1349165071Sdougb		 * Check that the socket types and ports match.
1350165071Sdougb		 */
1351165071Sdougb		if (disp->socktype != resp->disp->socktype ||
1352165071Sdougb		    isc_sockaddr_getport(&disp->local) !=
1353165071Sdougb		    isc_sockaddr_getport(&resp->disp->local)) {
1354165071Sdougb			free_buffer(disp, ev->region.base, ev->region.length);
1355165071Sdougb			goto unlock;
1356165071Sdougb		}
1357165071Sdougb
1358165071Sdougb		/*
1359165071Sdougb		 * If both dispatches are bound to an address then fail as
1360186462Sdougb		 * the addresses can't be equal (enforced by the IP stack).
1361165071Sdougb		 *
1362165071Sdougb		 * Note under Linux a packet can be sent out via IPv4 socket
1363165071Sdougb		 * and the response be received via a IPv6 socket.
1364186462Sdougb		 *
1365165071Sdougb		 * Requests sent out via IPv6 should always come back in
1366165071Sdougb		 * via IPv6.
1367165071Sdougb		 */
1368165071Sdougb		if (isc_sockaddr_pf(&resp->disp->local) == PF_INET6 &&
1369165071Sdougb		    isc_sockaddr_pf(&disp->local) != PF_INET6) {
1370165071Sdougb			free_buffer(disp, ev->region.base, ev->region.length);
1371165071Sdougb			goto unlock;
1372165071Sdougb		}
1373165071Sdougb		isc_sockaddr_anyofpf(&a1, isc_sockaddr_pf(&resp->disp->local));
1374165071Sdougb		isc_sockaddr_anyofpf(&a2, isc_sockaddr_pf(&disp->local));
1375165071Sdougb		if (!isc_sockaddr_eqaddr(&a1, &resp->disp->local) &&
1376165071Sdougb		    !isc_sockaddr_eqaddr(&a2, &disp->local)) {
1377165071Sdougb			free_buffer(disp, ev->region.base, ev->region.length);
1378165071Sdougb			goto unlock;
1379165071Sdougb		}
1380165071Sdougb	}
1381165071Sdougb
1382186462Sdougb  sendresponse:
1383135446Strhodes	queue_response = resp->item_out;
1384135446Strhodes	rev = allocate_event(resp->disp);
1385135446Strhodes	if (rev == NULL) {
1386135446Strhodes		free_buffer(disp, ev->region.base, ev->region.length);
1387135446Strhodes		goto unlock;
1388135446Strhodes	}
1389135446Strhodes
1390135446Strhodes	/*
1391135446Strhodes	 * At this point, rev contains the event we want to fill in, and
1392135446Strhodes	 * resp contains the information on the place to send it to.
1393135446Strhodes	 * Send the event off.
1394135446Strhodes	 */
1395135446Strhodes	isc_buffer_init(&rev->buffer, ev->region.base, ev->region.length);
1396135446Strhodes	isc_buffer_add(&rev->buffer, ev->n);
1397186462Sdougb	rev->result = ev->result;
1398135446Strhodes	rev->id = id;
1399135446Strhodes	rev->addr = ev->address;
1400135446Strhodes	rev->pktinfo = ev->pktinfo;
1401135446Strhodes	rev->attributes = ev->attributes;
1402135446Strhodes	if (queue_response) {
1403135446Strhodes		ISC_LIST_APPEND(resp->items, rev, ev_link);
1404135446Strhodes	} else {
1405135446Strhodes		ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL,
1406135446Strhodes			       DNS_EVENT_DISPATCH,
1407135446Strhodes			       resp->action, resp->arg, resp, NULL, NULL);
1408135446Strhodes		request_log(disp, resp, LVL(90),
1409135446Strhodes			    "[a] Sent event %p buffer %p len %d to task %p",
1410135446Strhodes			    rev, rev->buffer.base, rev->buffer.length,
1411135446Strhodes			    resp->task);
1412135446Strhodes		resp->item_out = ISC_TRUE;
1413135446Strhodes		isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1414135446Strhodes	}
1415135446Strhodes unlock:
1416186462Sdougb	if (qidlocked)
1417186462Sdougb		UNLOCK(&qid->lock);
1418135446Strhodes
1419135446Strhodes	/*
1420135446Strhodes	 * Restart recv() to get the next packet.
1421135446Strhodes	 */
1422135446Strhodes restart:
1423186462Sdougb	result = startrecv(disp, dispsock);
1424186462Sdougb	if (result != ISC_R_SUCCESS && dispsock != NULL) {
1425186462Sdougb		/*
1426186462Sdougb		 * XXX: wired. There seems to be no recovery process other than
1427186462Sdougb		 * deactivate this socket anyway (since we cannot start
1428186462Sdougb		 * receiving, we won't be able to receive a cancel event
1429186462Sdougb		 * from the user).
1430186462Sdougb		 */
1431186462Sdougb		deactivate_dispsocket(disp, dispsock);
1432186462Sdougb	}
1433135446Strhodes	UNLOCK(&disp->lock);
1434135446Strhodes
1435135446Strhodes	isc_event_free(&ev_in);
1436135446Strhodes}
1437135446Strhodes
1438135446Strhodes/*
1439135446Strhodes * General flow:
1440135446Strhodes *
1441135446Strhodes * If I/O result == CANCELED, EOF, or error, notify everyone as the
1442135446Strhodes * various queues drain.
1443135446Strhodes *
1444135446Strhodes * If query, restart.
1445135446Strhodes *
1446135446Strhodes * If response:
1447135446Strhodes *	Allocate event, fill in details.
1448135446Strhodes *		If cannot allocate, restart.
1449135446Strhodes *	find target.  If not found, restart.
1450135446Strhodes *	if event queue is not empty, queue.  else, send.
1451135446Strhodes *	restart.
1452135446Strhodes */
1453135446Strhodesstatic void
1454135446Strhodestcp_recv(isc_task_t *task, isc_event_t *ev_in) {
1455135446Strhodes	dns_dispatch_t *disp = ev_in->ev_arg;
1456135446Strhodes	dns_tcpmsg_t *tcpmsg = &disp->tcpmsg;
1457135446Strhodes	dns_messageid_t id;
1458135446Strhodes	isc_result_t dres;
1459135446Strhodes	unsigned int flags;
1460135446Strhodes	dns_dispentry_t *resp;
1461135446Strhodes	dns_dispatchevent_t *rev;
1462135446Strhodes	unsigned int bucket;
1463135446Strhodes	isc_boolean_t killit;
1464135446Strhodes	isc_boolean_t queue_response;
1465135446Strhodes	dns_qid_t *qid;
1466135446Strhodes	int level;
1467135446Strhodes	char buf[ISC_SOCKADDR_FORMATSIZE];
1468135446Strhodes
1469135446Strhodes	UNUSED(task);
1470135446Strhodes
1471135446Strhodes	REQUIRE(VALID_DISPATCH(disp));
1472135446Strhodes
1473135446Strhodes	qid = disp->qid;
1474135446Strhodes
1475135446Strhodes	dispatch_log(disp, LVL(90),
1476135446Strhodes		     "got TCP packet: requests %d, buffers %d, recvs %d",
1477135446Strhodes		     disp->requests, disp->tcpbuffers, disp->recv_pending);
1478135446Strhodes
1479135446Strhodes	LOCK(&disp->lock);
1480135446Strhodes
1481135446Strhodes	INSIST(disp->recv_pending != 0);
1482135446Strhodes	disp->recv_pending = 0;
1483135446Strhodes
1484135446Strhodes	if (disp->refcount == 0) {
1485135446Strhodes		/*
1486135446Strhodes		 * This dispatcher is shutting down.  Force cancelation.
1487135446Strhodes		 */
1488135446Strhodes		tcpmsg->result = ISC_R_CANCELED;
1489135446Strhodes	}
1490135446Strhodes
1491135446Strhodes	if (tcpmsg->result != ISC_R_SUCCESS) {
1492135446Strhodes		switch (tcpmsg->result) {
1493135446Strhodes		case ISC_R_CANCELED:
1494135446Strhodes			break;
1495186462Sdougb
1496135446Strhodes		case ISC_R_EOF:
1497135446Strhodes			dispatch_log(disp, LVL(90), "shutting down on EOF");
1498135446Strhodes			do_cancel(disp);
1499135446Strhodes			break;
1500135446Strhodes
1501135446Strhodes		case ISC_R_CONNECTIONRESET:
1502135446Strhodes			level = ISC_LOG_INFO;
1503135446Strhodes			goto logit;
1504135446Strhodes
1505135446Strhodes		default:
1506135446Strhodes			level = ISC_LOG_ERROR;
1507135446Strhodes		logit:
1508135446Strhodes			isc_sockaddr_format(&tcpmsg->address, buf, sizeof(buf));
1509135446Strhodes			dispatch_log(disp, level, "shutting down due to TCP "
1510135446Strhodes				     "receive error: %s: %s", buf,
1511135446Strhodes				     isc_result_totext(tcpmsg->result));
1512135446Strhodes			do_cancel(disp);
1513135446Strhodes			break;
1514135446Strhodes		}
1515135446Strhodes
1516135446Strhodes		/*
1517135446Strhodes		 * The event is statically allocated in the tcpmsg
1518135446Strhodes		 * structure, and destroy_disp() frees the tcpmsg, so we must
1519135446Strhodes		 * free the event *before* calling destroy_disp().
1520135446Strhodes		 */
1521135446Strhodes		isc_event_free(&ev_in);
1522135446Strhodes
1523135446Strhodes		disp->shutting_down = 1;
1524135446Strhodes		disp->shutdown_why = tcpmsg->result;
1525135446Strhodes
1526135446Strhodes		/*
1527135446Strhodes		 * If the recv() was canceled pass the word on.
1528135446Strhodes		 */
1529135446Strhodes		killit = destroy_disp_ok(disp);
1530135446Strhodes		UNLOCK(&disp->lock);
1531135446Strhodes		if (killit)
1532186462Sdougb			isc_task_send(disp->task[0], &disp->ctlevent);
1533135446Strhodes		return;
1534135446Strhodes	}
1535135446Strhodes
1536135446Strhodes	dispatch_log(disp, LVL(90), "result %d, length == %d, addr = %p",
1537135446Strhodes		     tcpmsg->result,
1538135446Strhodes		     tcpmsg->buffer.length, tcpmsg->buffer.base);
1539135446Strhodes
1540135446Strhodes	/*
1541135446Strhodes	 * Peek into the buffer to see what we can see.
1542135446Strhodes	 */
1543135446Strhodes	dres = dns_message_peekheader(&tcpmsg->buffer, &id, &flags);
1544135446Strhodes	if (dres != ISC_R_SUCCESS) {
1545135446Strhodes		dispatch_log(disp, LVL(10), "got garbage packet");
1546135446Strhodes		goto restart;
1547135446Strhodes	}
1548135446Strhodes
1549135446Strhodes	dispatch_log(disp, LVL(92),
1550135446Strhodes		     "got valid DNS message header, /QR %c, id %u",
1551135446Strhodes		     ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
1552135446Strhodes
1553135446Strhodes	/*
1554135446Strhodes	 * Allocate an event to send to the query or response client, and
1555135446Strhodes	 * allocate a new buffer for our use.
1556135446Strhodes	 */
1557135446Strhodes
1558135446Strhodes	/*
1559135446Strhodes	 * Look at flags.  If query, drop it. If response,
1560135446Strhodes	 * look to see where it goes.
1561135446Strhodes	 */
1562135446Strhodes	if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1563135446Strhodes		/*
1564135446Strhodes		 * Query.
1565135446Strhodes		 */
1566135446Strhodes		goto restart;
1567135446Strhodes	}
1568135446Strhodes
1569135446Strhodes	/*
1570135446Strhodes	 * Response.
1571135446Strhodes	 */
1572180477Sdougb	bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport);
1573135446Strhodes	LOCK(&qid->lock);
1574186462Sdougb	resp = entry_search(qid, &tcpmsg->address, id, disp->localport, bucket);
1575135446Strhodes	dispatch_log(disp, LVL(90),
1576135446Strhodes		     "search for response in bucket %d: %s",
1577135446Strhodes		     bucket, (resp == NULL ? "not found" : "found"));
1578135446Strhodes
1579135446Strhodes	if (resp == NULL)
1580135446Strhodes		goto unlock;
1581135446Strhodes	queue_response = resp->item_out;
1582135446Strhodes	rev = allocate_event(disp);
1583135446Strhodes	if (rev == NULL)
1584135446Strhodes		goto unlock;
1585135446Strhodes
1586135446Strhodes	/*
1587135446Strhodes	 * At this point, rev contains the event we want to fill in, and
1588135446Strhodes	 * resp contains the information on the place to send it to.
1589135446Strhodes	 * Send the event off.
1590135446Strhodes	 */
1591135446Strhodes	dns_tcpmsg_keepbuffer(tcpmsg, &rev->buffer);
1592135446Strhodes	disp->tcpbuffers++;
1593135446Strhodes	rev->result = ISC_R_SUCCESS;
1594135446Strhodes	rev->id = id;
1595135446Strhodes	rev->addr = tcpmsg->address;
1596135446Strhodes	if (queue_response) {
1597135446Strhodes		ISC_LIST_APPEND(resp->items, rev, ev_link);
1598135446Strhodes	} else {
1599135446Strhodes		ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH,
1600135446Strhodes			       resp->action, resp->arg, resp, NULL, NULL);
1601135446Strhodes		request_log(disp, resp, LVL(90),
1602135446Strhodes			    "[b] Sent event %p buffer %p len %d to task %p",
1603135446Strhodes			    rev, rev->buffer.base, rev->buffer.length,
1604135446Strhodes			    resp->task);
1605135446Strhodes		resp->item_out = ISC_TRUE;
1606135446Strhodes		isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1607135446Strhodes	}
1608135446Strhodes unlock:
1609135446Strhodes	UNLOCK(&qid->lock);
1610135446Strhodes
1611135446Strhodes	/*
1612135446Strhodes	 * Restart recv() to get the next packet.
1613135446Strhodes	 */
1614135446Strhodes restart:
1615186462Sdougb	(void)startrecv(disp, NULL);
1616135446Strhodes
1617135446Strhodes	UNLOCK(&disp->lock);
1618135446Strhodes
1619135446Strhodes	isc_event_free(&ev_in);
1620135446Strhodes}
1621135446Strhodes
1622135446Strhodes/*
1623135446Strhodes * disp must be locked.
1624135446Strhodes */
1625186462Sdougbstatic isc_result_t
1626186462Sdougbstartrecv(dns_dispatch_t *disp, dispsocket_t *dispsock) {
1627135446Strhodes	isc_result_t res;
1628135446Strhodes	isc_region_t region;
1629186462Sdougb	isc_socket_t *socket;
1630135446Strhodes
1631135446Strhodes	if (disp->shutting_down == 1)
1632186462Sdougb		return (ISC_R_SUCCESS);
1633135446Strhodes
1634135446Strhodes	if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
1635186462Sdougb		return (ISC_R_SUCCESS);
1636135446Strhodes
1637186462Sdougb	if (disp->recv_pending != 0 && dispsock == NULL)
1638186462Sdougb		return (ISC_R_SUCCESS);
1639135446Strhodes
1640135446Strhodes	if (disp->mgr->buffers >= disp->mgr->maxbuffers)
1641186462Sdougb		return (ISC_R_NOMEMORY);
1642135446Strhodes
1643186462Sdougb	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
1644186462Sdougb	    dispsock == NULL)
1645186462Sdougb		return (ISC_R_SUCCESS);
1646186462Sdougb
1647186462Sdougb	if (dispsock != NULL)
1648186462Sdougb		socket = dispsock->socket;
1649186462Sdougb	else
1650186462Sdougb		socket = disp->socket;
1651186462Sdougb	INSIST(socket != NULL);
1652186462Sdougb
1653135446Strhodes	switch (disp->socktype) {
1654135446Strhodes		/*
1655135446Strhodes		 * UDP reads are always maximal.
1656135446Strhodes		 */
1657135446Strhodes	case isc_sockettype_udp:
1658135446Strhodes		region.length = disp->mgr->buffersize;
1659135446Strhodes		region.base = allocate_udp_buffer(disp);
1660135446Strhodes		if (region.base == NULL)
1661186462Sdougb			return (ISC_R_NOMEMORY);
1662186462Sdougb		if (dispsock != NULL) {
1663186462Sdougb			res = isc_socket_recv(socket, &region, 1,
1664186462Sdougb					      dispsock->task, udp_exrecv,
1665186462Sdougb					      dispsock);
1666186462Sdougb			if (res != ISC_R_SUCCESS) {
1667186462Sdougb				free_buffer(disp, region.base, region.length);
1668186462Sdougb				return (res);
1669186462Sdougb			}
1670186462Sdougb		} else {
1671186462Sdougb			res = isc_socket_recv(socket, &region, 1,
1672186462Sdougb					      disp->task[0], udp_shrecv, disp);
1673186462Sdougb			if (res != ISC_R_SUCCESS) {
1674186462Sdougb				free_buffer(disp, region.base, region.length);
1675186462Sdougb				disp->shutdown_why = res;
1676186462Sdougb				disp->shutting_down = 1;
1677186462Sdougb				do_cancel(disp);
1678186462Sdougb				return (ISC_R_SUCCESS); /* recover by cancel */
1679186462Sdougb			}
1680186462Sdougb			INSIST(disp->recv_pending == 0);
1681186462Sdougb			disp->recv_pending = 1;
1682135446Strhodes		}
1683135446Strhodes		break;
1684135446Strhodes
1685135446Strhodes	case isc_sockettype_tcp:
1686186462Sdougb		res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task[0],
1687135446Strhodes					     tcp_recv, disp);
1688135446Strhodes		if (res != ISC_R_SUCCESS) {
1689135446Strhodes			disp->shutdown_why = res;
1690135446Strhodes			disp->shutting_down = 1;
1691135446Strhodes			do_cancel(disp);
1692186462Sdougb			return (ISC_R_SUCCESS); /* recover by cancel */
1693135446Strhodes		}
1694135446Strhodes		INSIST(disp->recv_pending == 0);
1695135446Strhodes		disp->recv_pending = 1;
1696135446Strhodes		break;
1697170222Sdougb	default:
1698170222Sdougb		INSIST(0);
1699170222Sdougb		break;
1700135446Strhodes	}
1701186462Sdougb
1702186462Sdougb	return (ISC_R_SUCCESS);
1703135446Strhodes}
1704135446Strhodes
1705135446Strhodes/*
1706135446Strhodes * Mgr must be locked when calling this function.
1707135446Strhodes */
1708135446Strhodesstatic isc_boolean_t
1709135446Strhodesdestroy_mgr_ok(dns_dispatchmgr_t *mgr) {
1710135446Strhodes	mgr_log(mgr, LVL(90),
1711135446Strhodes		"destroy_mgr_ok: shuttingdown=%d, listnonempty=%d, "
1712135446Strhodes		"epool=%d, rpool=%d, dpool=%d",
1713135446Strhodes		MGR_IS_SHUTTINGDOWN(mgr), !ISC_LIST_EMPTY(mgr->list),
1714135446Strhodes		isc_mempool_getallocated(mgr->epool),
1715135446Strhodes		isc_mempool_getallocated(mgr->rpool),
1716135446Strhodes		isc_mempool_getallocated(mgr->dpool));
1717135446Strhodes	if (!MGR_IS_SHUTTINGDOWN(mgr))
1718135446Strhodes		return (ISC_FALSE);
1719135446Strhodes	if (!ISC_LIST_EMPTY(mgr->list))
1720135446Strhodes		return (ISC_FALSE);
1721135446Strhodes	if (isc_mempool_getallocated(mgr->epool) != 0)
1722135446Strhodes		return (ISC_FALSE);
1723135446Strhodes	if (isc_mempool_getallocated(mgr->rpool) != 0)
1724135446Strhodes		return (ISC_FALSE);
1725135446Strhodes	if (isc_mempool_getallocated(mgr->dpool) != 0)
1726135446Strhodes		return (ISC_FALSE);
1727135446Strhodes
1728135446Strhodes	return (ISC_TRUE);
1729135446Strhodes}
1730135446Strhodes
1731135446Strhodes/*
1732135446Strhodes * Mgr must be unlocked when calling this function.
1733135446Strhodes */
1734135446Strhodesstatic void
1735135446Strhodesdestroy_mgr(dns_dispatchmgr_t **mgrp) {
1736135446Strhodes	isc_mem_t *mctx;
1737135446Strhodes	dns_dispatchmgr_t *mgr;
1738135446Strhodes
1739135446Strhodes	mgr = *mgrp;
1740135446Strhodes	*mgrp = NULL;
1741135446Strhodes
1742135446Strhodes	mctx = mgr->mctx;
1743135446Strhodes
1744135446Strhodes	mgr->magic = 0;
1745135446Strhodes	mgr->mctx = NULL;
1746135446Strhodes	DESTROYLOCK(&mgr->lock);
1747135446Strhodes	mgr->state = 0;
1748135446Strhodes
1749180477Sdougb	DESTROYLOCK(&mgr->arc4_lock);
1750180477Sdougb
1751135446Strhodes	isc_mempool_destroy(&mgr->epool);
1752135446Strhodes	isc_mempool_destroy(&mgr->rpool);
1753135446Strhodes	isc_mempool_destroy(&mgr->dpool);
1754224092Sdougb	if (mgr->bpool != NULL)
1755224092Sdougb		isc_mempool_destroy(&mgr->bpool);
1756224092Sdougb	if (mgr->spool != NULL)
1757224092Sdougb		isc_mempool_destroy(&mgr->spool);
1758135446Strhodes
1759135446Strhodes	DESTROYLOCK(&mgr->pool_lock);
1760135446Strhodes
1761224092Sdougb#ifdef BIND9
1762135446Strhodes	if (mgr->entropy != NULL)
1763135446Strhodes		isc_entropy_detach(&mgr->entropy);
1764224092Sdougb#endif /* BIND9 */
1765135446Strhodes	if (mgr->qid != NULL)
1766135446Strhodes		qid_destroy(mctx, &mgr->qid);
1767135446Strhodes
1768135446Strhodes	DESTROYLOCK(&mgr->buffer_lock);
1769135446Strhodes
1770135446Strhodes	if (mgr->blackhole != NULL)
1771135446Strhodes		dns_acl_detach(&mgr->blackhole);
1772135446Strhodes
1773193149Sdougb	if (mgr->stats != NULL)
1774193149Sdougb		isc_stats_detach(&mgr->stats);
1775193149Sdougb
1776186462Sdougb	if (mgr->v4ports != NULL) {
1777186462Sdougb		isc_mem_put(mctx, mgr->v4ports,
1778186462Sdougb			    mgr->nv4ports * sizeof(in_port_t));
1779186462Sdougb	}
1780186462Sdougb	if (mgr->v6ports != NULL) {
1781186462Sdougb		isc_mem_put(mctx, mgr->v6ports,
1782186462Sdougb			    mgr->nv6ports * sizeof(in_port_t));
1783186462Sdougb	}
1784135446Strhodes	isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1785135446Strhodes	isc_mem_detach(&mctx);
1786135446Strhodes}
1787135446Strhodes
1788135446Strhodesstatic isc_result_t
1789186462Sdougbopen_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
1790186462Sdougb	    unsigned int options, isc_socket_t **sockp)
1791135446Strhodes{
1792135446Strhodes	isc_socket_t *sock;
1793135446Strhodes	isc_result_t result;
1794135446Strhodes
1795186462Sdougb	sock = *sockp;
1796186462Sdougb	if (sock == NULL) {
1797186462Sdougb		result = isc_socket_create(mgr, isc_sockaddr_pf(local),
1798186462Sdougb					   isc_sockettype_udp, &sock);
1799186462Sdougb		if (result != ISC_R_SUCCESS)
1800186462Sdougb			return (result);
1801193149Sdougb		isc_socket_setname(sock, "dispatcher", NULL);
1802186462Sdougb	} else {
1803224092Sdougb#ifdef BIND9
1804186462Sdougb		result = isc_socket_open(sock);
1805186462Sdougb		if (result != ISC_R_SUCCESS)
1806186462Sdougb			return (result);
1807224092Sdougb#else
1808224092Sdougb		INSIST(0);
1809224092Sdougb#endif
1810186462Sdougb	}
1811135446Strhodes
1812135446Strhodes#ifndef ISC_ALLOW_MAPPED
1813135446Strhodes	isc_socket_ipv6only(sock, ISC_TRUE);
1814135446Strhodes#endif
1815182645Sdougb	result = isc_socket_bind(sock, local, options);
1816135446Strhodes	if (result != ISC_R_SUCCESS) {
1817186462Sdougb		if (*sockp == NULL)
1818186462Sdougb			isc_socket_detach(&sock);
1819224092Sdougb		else {
1820224092Sdougb#ifdef BIND9
1821186462Sdougb			isc_socket_close(sock);
1822224092Sdougb#else
1823224092Sdougb			INSIST(0);
1824224092Sdougb#endif
1825224092Sdougb		}
1826135446Strhodes		return (result);
1827135446Strhodes	}
1828135446Strhodes
1829135446Strhodes	*sockp = sock;
1830135446Strhodes	return (ISC_R_SUCCESS);
1831135446Strhodes}
1832135446Strhodes
1833186462Sdougb/*%
1834186462Sdougb * Create a temporary port list to set the initial default set of dispatch
1835186462Sdougb * ports: [1024, 65535].  This is almost meaningless as the application will
1836186462Sdougb * normally set the ports explicitly, but is provided to fill some minor corner
1837186462Sdougb * cases.
1838186462Sdougb */
1839186462Sdougbstatic isc_result_t
1840186462Sdougbcreate_default_portset(isc_mem_t *mctx, isc_portset_t **portsetp) {
1841186462Sdougb	isc_result_t result;
1842186462Sdougb
1843186462Sdougb	result = isc_portset_create(mctx, portsetp);
1844186462Sdougb	if (result != ISC_R_SUCCESS)
1845186462Sdougb		return (result);
1846186462Sdougb	isc_portset_addrange(*portsetp, 1024, 65535);
1847186462Sdougb
1848186462Sdougb	return (ISC_R_SUCCESS);
1849186462Sdougb}
1850186462Sdougb
1851135446Strhodes/*
1852135446Strhodes * Publics.
1853135446Strhodes */
1854135446Strhodes
1855135446Strhodesisc_result_t
1856135446Strhodesdns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy,
1857135446Strhodes		       dns_dispatchmgr_t **mgrp)
1858135446Strhodes{
1859135446Strhodes	dns_dispatchmgr_t *mgr;
1860135446Strhodes	isc_result_t result;
1861186462Sdougb	isc_portset_t *v4portset = NULL;
1862186462Sdougb	isc_portset_t *v6portset = NULL;
1863135446Strhodes
1864135446Strhodes	REQUIRE(mctx != NULL);
1865135446Strhodes	REQUIRE(mgrp != NULL && *mgrp == NULL);
1866135446Strhodes
1867135446Strhodes	mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t));
1868135446Strhodes	if (mgr == NULL)
1869135446Strhodes		return (ISC_R_NOMEMORY);
1870135446Strhodes
1871135446Strhodes	mgr->mctx = NULL;
1872135446Strhodes	isc_mem_attach(mctx, &mgr->mctx);
1873135446Strhodes
1874135446Strhodes	mgr->blackhole = NULL;
1875193149Sdougb	mgr->stats = NULL;
1876135446Strhodes
1877135446Strhodes	result = isc_mutex_init(&mgr->lock);
1878135446Strhodes	if (result != ISC_R_SUCCESS)
1879135446Strhodes		goto deallocate;
1880135446Strhodes
1881180477Sdougb	result = isc_mutex_init(&mgr->arc4_lock);
1882135446Strhodes	if (result != ISC_R_SUCCESS)
1883135446Strhodes		goto kill_lock;
1884135446Strhodes
1885180477Sdougb	result = isc_mutex_init(&mgr->buffer_lock);
1886180477Sdougb	if (result != ISC_R_SUCCESS)
1887180477Sdougb		goto kill_arc4_lock;
1888180477Sdougb
1889135446Strhodes	result = isc_mutex_init(&mgr->pool_lock);
1890135446Strhodes	if (result != ISC_R_SUCCESS)
1891135446Strhodes		goto kill_buffer_lock;
1892135446Strhodes
1893135446Strhodes	mgr->epool = NULL;
1894135446Strhodes	if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatchevent_t),
1895135446Strhodes			       &mgr->epool) != ISC_R_SUCCESS) {
1896135446Strhodes		result = ISC_R_NOMEMORY;
1897135446Strhodes		goto kill_pool_lock;
1898135446Strhodes	}
1899135446Strhodes
1900135446Strhodes	mgr->rpool = NULL;
1901135446Strhodes	if (isc_mempool_create(mgr->mctx, sizeof(dns_dispentry_t),
1902135446Strhodes			       &mgr->rpool) != ISC_R_SUCCESS) {
1903135446Strhodes		result = ISC_R_NOMEMORY;
1904135446Strhodes		goto kill_epool;
1905135446Strhodes	}
1906135446Strhodes
1907135446Strhodes	mgr->dpool = NULL;
1908135446Strhodes	if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatch_t),
1909135446Strhodes			       &mgr->dpool) != ISC_R_SUCCESS) {
1910135446Strhodes		result = ISC_R_NOMEMORY;
1911135446Strhodes		goto kill_rpool;
1912135446Strhodes	}
1913135446Strhodes
1914135446Strhodes	isc_mempool_setname(mgr->epool, "dispmgr_epool");
1915135446Strhodes	isc_mempool_setfreemax(mgr->epool, 1024);
1916135446Strhodes	isc_mempool_associatelock(mgr->epool, &mgr->pool_lock);
1917135446Strhodes
1918135446Strhodes	isc_mempool_setname(mgr->rpool, "dispmgr_rpool");
1919135446Strhodes	isc_mempool_setfreemax(mgr->rpool, 1024);
1920135446Strhodes	isc_mempool_associatelock(mgr->rpool, &mgr->pool_lock);
1921135446Strhodes
1922135446Strhodes	isc_mempool_setname(mgr->dpool, "dispmgr_dpool");
1923135446Strhodes	isc_mempool_setfreemax(mgr->dpool, 1024);
1924135446Strhodes	isc_mempool_associatelock(mgr->dpool, &mgr->pool_lock);
1925135446Strhodes
1926135446Strhodes	mgr->buffers = 0;
1927135446Strhodes	mgr->buffersize = 0;
1928135446Strhodes	mgr->maxbuffers = 0;
1929135446Strhodes	mgr->bpool = NULL;
1930186462Sdougb	mgr->spool = NULL;
1931135446Strhodes	mgr->entropy = NULL;
1932135446Strhodes	mgr->qid = NULL;
1933135446Strhodes	mgr->state = 0;
1934135446Strhodes	ISC_LIST_INIT(mgr->list);
1935186462Sdougb	mgr->v4ports = NULL;
1936186462Sdougb	mgr->v6ports = NULL;
1937186462Sdougb	mgr->nv4ports = 0;
1938186462Sdougb	mgr->nv6ports = 0;
1939135446Strhodes	mgr->magic = DNS_DISPATCHMGR_MAGIC;
1940135446Strhodes
1941186462Sdougb	result = create_default_portset(mctx, &v4portset);
1942186462Sdougb	if (result == ISC_R_SUCCESS) {
1943186462Sdougb		result = create_default_portset(mctx, &v6portset);
1944186462Sdougb		if (result == ISC_R_SUCCESS) {
1945186462Sdougb			result = dns_dispatchmgr_setavailports(mgr,
1946186462Sdougb							       v4portset,
1947186462Sdougb							       v6portset);
1948186462Sdougb		}
1949186462Sdougb	}
1950186462Sdougb	if (v4portset != NULL)
1951186462Sdougb		isc_portset_destroy(mctx, &v4portset);
1952186462Sdougb	if (v6portset != NULL)
1953186462Sdougb		isc_portset_destroy(mctx, &v6portset);
1954186462Sdougb	if (result != ISC_R_SUCCESS)
1955186462Sdougb		goto kill_dpool;
1956186462Sdougb
1957224092Sdougb#ifdef BIND9
1958135446Strhodes	if (entropy != NULL)
1959135446Strhodes		isc_entropy_attach(entropy, &mgr->entropy);
1960224092Sdougb#else
1961224092Sdougb	UNUSED(entropy);
1962224092Sdougb#endif
1963135446Strhodes
1964224092Sdougb	dispatch_initrandom(&mgr->arc4ctx, mgr->entropy, &mgr->arc4_lock);
1965180477Sdougb
1966135446Strhodes	*mgrp = mgr;
1967135446Strhodes	return (ISC_R_SUCCESS);
1968135446Strhodes
1969186462Sdougb kill_dpool:
1970186462Sdougb	isc_mempool_destroy(&mgr->dpool);
1971135446Strhodes kill_rpool:
1972135446Strhodes	isc_mempool_destroy(&mgr->rpool);
1973135446Strhodes kill_epool:
1974135446Strhodes	isc_mempool_destroy(&mgr->epool);
1975135446Strhodes kill_pool_lock:
1976135446Strhodes	DESTROYLOCK(&mgr->pool_lock);
1977135446Strhodes kill_buffer_lock:
1978135446Strhodes	DESTROYLOCK(&mgr->buffer_lock);
1979180477Sdougb kill_arc4_lock:
1980180477Sdougb	DESTROYLOCK(&mgr->arc4_lock);
1981135446Strhodes kill_lock:
1982135446Strhodes	DESTROYLOCK(&mgr->lock);
1983135446Strhodes deallocate:
1984135446Strhodes	isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1985135446Strhodes	isc_mem_detach(&mctx);
1986135446Strhodes
1987135446Strhodes	return (result);
1988135446Strhodes}
1989135446Strhodes
1990135446Strhodesvoid
1991135446Strhodesdns_dispatchmgr_setblackhole(dns_dispatchmgr_t *mgr, dns_acl_t *blackhole) {
1992135446Strhodes	REQUIRE(VALID_DISPATCHMGR(mgr));
1993135446Strhodes	if (mgr->blackhole != NULL)
1994135446Strhodes		dns_acl_detach(&mgr->blackhole);
1995135446Strhodes	dns_acl_attach(blackhole, &mgr->blackhole);
1996135446Strhodes}
1997135446Strhodes
1998135446Strhodesdns_acl_t *
1999135446Strhodesdns_dispatchmgr_getblackhole(dns_dispatchmgr_t *mgr) {
2000135446Strhodes	REQUIRE(VALID_DISPATCHMGR(mgr));
2001135446Strhodes	return (mgr->blackhole);
2002135446Strhodes}
2003135446Strhodes
2004135446Strhodesvoid
2005135446Strhodesdns_dispatchmgr_setblackportlist(dns_dispatchmgr_t *mgr,
2006135446Strhodes				 dns_portlist_t *portlist)
2007135446Strhodes{
2008135446Strhodes	REQUIRE(VALID_DISPATCHMGR(mgr));
2009186462Sdougb	UNUSED(portlist);
2010186462Sdougb
2011186462Sdougb	/* This function is deprecated: use dns_dispatchmgr_setavailports(). */
2012186462Sdougb	return;
2013135446Strhodes}
2014135446Strhodes
2015135446Strhodesdns_portlist_t *
2016135446Strhodesdns_dispatchmgr_getblackportlist(dns_dispatchmgr_t *mgr) {
2017135446Strhodes	REQUIRE(VALID_DISPATCHMGR(mgr));
2018186462Sdougb	return (NULL);		/* this function is deprecated */
2019135446Strhodes}
2020135446Strhodes
2021186462Sdougbisc_result_t
2022186462Sdougbdns_dispatchmgr_setavailports(dns_dispatchmgr_t *mgr, isc_portset_t *v4portset,
2023186462Sdougb			      isc_portset_t *v6portset)
2024186462Sdougb{
2025186462Sdougb	in_port_t *v4ports, *v6ports, p;
2026186462Sdougb	unsigned int nv4ports, nv6ports, i4, i6;
2027186462Sdougb
2028186462Sdougb	REQUIRE(VALID_DISPATCHMGR(mgr));
2029186462Sdougb
2030186462Sdougb	nv4ports = isc_portset_nports(v4portset);
2031186462Sdougb	nv6ports = isc_portset_nports(v6portset);
2032186462Sdougb
2033186462Sdougb	v4ports = NULL;
2034186462Sdougb	if (nv4ports != 0) {
2035186462Sdougb		v4ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv4ports);
2036186462Sdougb		if (v4ports == NULL)
2037186462Sdougb			return (ISC_R_NOMEMORY);
2038186462Sdougb	}
2039186462Sdougb	v6ports = NULL;
2040186462Sdougb	if (nv6ports != 0) {
2041186462Sdougb		v6ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv6ports);
2042186462Sdougb		if (v6ports == NULL) {
2043186462Sdougb			if (v4ports != NULL) {
2044186462Sdougb				isc_mem_put(mgr->mctx, v4ports,
2045186462Sdougb					    sizeof(in_port_t) *
2046186462Sdougb					    isc_portset_nports(v4portset));
2047186462Sdougb			}
2048186462Sdougb			return (ISC_R_NOMEMORY);
2049186462Sdougb		}
2050186462Sdougb	}
2051186462Sdougb
2052186462Sdougb	p = 0;
2053186462Sdougb	i4 = 0;
2054186462Sdougb	i6 = 0;
2055186462Sdougb	do {
2056186462Sdougb		if (isc_portset_isset(v4portset, p)) {
2057186462Sdougb			INSIST(i4 < nv4ports);
2058186462Sdougb			v4ports[i4++] = p;
2059186462Sdougb		}
2060186462Sdougb		if (isc_portset_isset(v6portset, p)) {
2061186462Sdougb			INSIST(i6 < nv6ports);
2062186462Sdougb			v6ports[i6++] = p;
2063186462Sdougb		}
2064186462Sdougb	} while (p++ < 65535);
2065186462Sdougb	INSIST(i4 == nv4ports && i6 == nv6ports);
2066186462Sdougb
2067186462Sdougb	PORTBUFLOCK(mgr);
2068186462Sdougb	if (mgr->v4ports != NULL) {
2069186462Sdougb		isc_mem_put(mgr->mctx, mgr->v4ports,
2070186462Sdougb			    mgr->nv4ports * sizeof(in_port_t));
2071186462Sdougb	}
2072186462Sdougb	mgr->v4ports = v4ports;
2073186462Sdougb	mgr->nv4ports = nv4ports;
2074186462Sdougb
2075186462Sdougb	if (mgr->v6ports != NULL) {
2076186462Sdougb		isc_mem_put(mgr->mctx, mgr->v6ports,
2077186462Sdougb			    mgr->nv6ports * sizeof(in_port_t));
2078186462Sdougb	}
2079186462Sdougb	mgr->v6ports = v6ports;
2080186462Sdougb	mgr->nv6ports = nv6ports;
2081186462Sdougb	PORTBUFUNLOCK(mgr);
2082186462Sdougb
2083186462Sdougb	return (ISC_R_SUCCESS);
2084186462Sdougb}
2085186462Sdougb
2086135446Strhodesstatic isc_result_t
2087135446Strhodesdns_dispatchmgr_setudp(dns_dispatchmgr_t *mgr,
2088186462Sdougb		       unsigned int buffersize, unsigned int maxbuffers,
2089186462Sdougb		       unsigned int maxrequests, unsigned int buckets,
2090186462Sdougb		       unsigned int increment)
2091135446Strhodes{
2092135446Strhodes	isc_result_t result;
2093135446Strhodes
2094135446Strhodes	REQUIRE(VALID_DISPATCHMGR(mgr));
2095135446Strhodes	REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
2096135446Strhodes	REQUIRE(maxbuffers > 0);
2097135446Strhodes	REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
2098135446Strhodes	REQUIRE(increment > buckets);
2099135446Strhodes
2100135446Strhodes	/*
2101135446Strhodes	 * Keep some number of items around.  This should be a config
2102135446Strhodes	 * option.  For now, keep 8, but later keep at least two even
2103135446Strhodes	 * if the caller wants less.  This allows us to ensure certain
2104135446Strhodes	 * things, like an event can be "freed" and the next allocation
2105135446Strhodes	 * will always succeed.
2106135446Strhodes	 *
2107135446Strhodes	 * Note that if limits are placed on anything here, we use one
2108135446Strhodes	 * event internally, so the actual limit should be "wanted + 1."
2109135446Strhodes	 *
2110135446Strhodes	 * XXXMLG
2111135446Strhodes	 */
2112135446Strhodes
2113135446Strhodes	if (maxbuffers < 8)
2114135446Strhodes		maxbuffers = 8;
2115135446Strhodes
2116135446Strhodes	LOCK(&mgr->buffer_lock);
2117186462Sdougb
2118186462Sdougb	/* Create or adjust buffer pool */
2119135446Strhodes	if (mgr->bpool != NULL) {
2120204619Sdougb		/*
2121204619Sdougb		 * We only increase the maxbuffers to avoid accidental buffer
2122204619Sdougb		 * shortage.  Ideally we'd separate the manager-wide maximum
2123204619Sdougb		 * from per-dispatch limits and respect the latter within the
2124204619Sdougb		 * global limit.  But at this moment that's deemed to be
2125204619Sdougb		 * overkilling and isn't worth additional implementation
2126204619Sdougb		 * complexity.
2127204619Sdougb		 */
2128204619Sdougb		if (maxbuffers > mgr->maxbuffers) {
2129204619Sdougb			isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
2130204619Sdougb			mgr->maxbuffers = maxbuffers;
2131204619Sdougb		}
2132186462Sdougb	} else {
2133186462Sdougb		result = isc_mempool_create(mgr->mctx, buffersize, &mgr->bpool);
2134186462Sdougb		if (result != ISC_R_SUCCESS) {
2135186462Sdougb			UNLOCK(&mgr->buffer_lock);
2136186462Sdougb			return (result);
2137186462Sdougb		}
2138186462Sdougb		isc_mempool_setname(mgr->bpool, "dispmgr_bpool");
2139186462Sdougb		isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
2140186462Sdougb		isc_mempool_associatelock(mgr->bpool, &mgr->pool_lock);
2141186462Sdougb	}
2142186462Sdougb
2143186462Sdougb	/* Create or adjust socket pool */
2144186462Sdougb	if (mgr->spool != NULL) {
2145186462Sdougb		isc_mempool_setmaxalloc(mgr->spool, DNS_DISPATCH_POOLSOCKS * 2);
2146135446Strhodes		UNLOCK(&mgr->buffer_lock);
2147135446Strhodes		return (ISC_R_SUCCESS);
2148135446Strhodes	}
2149186462Sdougb	result = isc_mempool_create(mgr->mctx, sizeof(dispsocket_t),
2150186462Sdougb				    &mgr->spool);
2151186462Sdougb	if (result != ISC_R_SUCCESS) {
2152170222Sdougb		UNLOCK(&mgr->buffer_lock);
2153186462Sdougb		goto cleanup;
2154135446Strhodes	}
2155186462Sdougb	isc_mempool_setname(mgr->spool, "dispmgr_spool");
2156186462Sdougb	isc_mempool_setmaxalloc(mgr->spool, maxrequests);
2157186462Sdougb	isc_mempool_associatelock(mgr->spool, &mgr->pool_lock);
2158135446Strhodes
2159186462Sdougb	result = qid_allocate(mgr, buckets, increment, &mgr->qid, ISC_TRUE);
2160135446Strhodes	if (result != ISC_R_SUCCESS)
2161135446Strhodes		goto cleanup;
2162135446Strhodes
2163135446Strhodes	mgr->buffersize = buffersize;
2164135446Strhodes	mgr->maxbuffers = maxbuffers;
2165135446Strhodes	UNLOCK(&mgr->buffer_lock);
2166135446Strhodes	return (ISC_R_SUCCESS);
2167135446Strhodes
2168135446Strhodes cleanup:
2169135446Strhodes	isc_mempool_destroy(&mgr->bpool);
2170186462Sdougb	if (mgr->spool != NULL)
2171186462Sdougb		isc_mempool_destroy(&mgr->spool);
2172135446Strhodes	UNLOCK(&mgr->buffer_lock);
2173186462Sdougb	return (result);
2174135446Strhodes}
2175135446Strhodes
2176135446Strhodesvoid
2177135446Strhodesdns_dispatchmgr_destroy(dns_dispatchmgr_t **mgrp) {
2178135446Strhodes	dns_dispatchmgr_t *mgr;
2179135446Strhodes	isc_boolean_t killit;
2180135446Strhodes
2181135446Strhodes	REQUIRE(mgrp != NULL);
2182135446Strhodes	REQUIRE(VALID_DISPATCHMGR(*mgrp));
2183135446Strhodes
2184135446Strhodes	mgr = *mgrp;
2185135446Strhodes	*mgrp = NULL;
2186135446Strhodes
2187135446Strhodes	LOCK(&mgr->lock);
2188135446Strhodes	mgr->state |= MGR_SHUTTINGDOWN;
2189135446Strhodes
2190135446Strhodes	killit = destroy_mgr_ok(mgr);
2191135446Strhodes	UNLOCK(&mgr->lock);
2192135446Strhodes
2193135446Strhodes	mgr_log(mgr, LVL(90), "destroy: killit=%d", killit);
2194135446Strhodes
2195135446Strhodes	if (killit)
2196135446Strhodes		destroy_mgr(&mgr);
2197135446Strhodes}
2198135446Strhodes
2199193149Sdougbvoid
2200193149Sdougbdns_dispatchmgr_setstats(dns_dispatchmgr_t *mgr, isc_stats_t *stats) {
2201193149Sdougb	REQUIRE(VALID_DISPATCHMGR(mgr));
2202193149Sdougb	REQUIRE(ISC_LIST_EMPTY(mgr->list));
2203193149Sdougb	REQUIRE(mgr->stats == NULL);
2204193149Sdougb
2205193149Sdougb	isc_stats_attach(stats, &mgr->stats);
2206193149Sdougb}
2207193149Sdougb
2208186462Sdougbstatic int
2209186462Sdougbport_cmp(const void *key, const void *ent) {
2210186462Sdougb	in_port_t p1 = *(const in_port_t *)key;
2211186462Sdougb	in_port_t p2 = *(const in_port_t *)ent;
2212186462Sdougb
2213186462Sdougb	if (p1 < p2)
2214186462Sdougb		return (-1);
2215186462Sdougb	else if (p1 == p2)
2216186462Sdougb		return (0);
2217186462Sdougb	else
2218186462Sdougb		return (1);
2219186462Sdougb}
2220186462Sdougb
2221135446Strhodesstatic isc_boolean_t
2222186462Sdougbportavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2223186462Sdougb	      isc_sockaddr_t *sockaddrp)
2224180477Sdougb{
2225135446Strhodes	isc_sockaddr_t sockaddr;
2226135446Strhodes	isc_result_t result;
2227186462Sdougb	in_port_t *ports, port;
2228186462Sdougb	unsigned int nports;
2229186462Sdougb	isc_boolean_t available = ISC_FALSE;
2230135446Strhodes
2231180477Sdougb	REQUIRE(sock != NULL || sockaddrp != NULL);
2232180477Sdougb
2233186462Sdougb	PORTBUFLOCK(mgr);
2234180477Sdougb	if (sock != NULL) {
2235180477Sdougb		sockaddrp = &sockaddr;
2236180477Sdougb		result = isc_socket_getsockname(sock, sockaddrp);
2237180477Sdougb		if (result != ISC_R_SUCCESS)
2238186462Sdougb			goto unlock;
2239180477Sdougb	}
2240135446Strhodes
2241186462Sdougb	if (isc_sockaddr_pf(sockaddrp) == AF_INET) {
2242186462Sdougb		ports = mgr->v4ports;
2243186462Sdougb		nports = mgr->nv4ports;
2244186462Sdougb	} else {
2245186462Sdougb		ports = mgr->v6ports;
2246186462Sdougb		nports = mgr->nv6ports;
2247186462Sdougb	}
2248186462Sdougb	if (ports == NULL)
2249186462Sdougb		goto unlock;
2250186462Sdougb
2251186462Sdougb	port = isc_sockaddr_getport(sockaddrp);
2252186462Sdougb	if (bsearch(&port, ports, nports, sizeof(in_port_t), port_cmp) != NULL)
2253186462Sdougb		available = ISC_TRUE;
2254186462Sdougb
2255186462Sdougbunlock:
2256186462Sdougb	PORTBUFUNLOCK(mgr);
2257186462Sdougb	return (available);
2258135446Strhodes}
2259135446Strhodes
2260135446Strhodes#define ATTRMATCH(_a1, _a2, _mask) (((_a1) & (_mask)) == ((_a2) & (_mask)))
2261135446Strhodes
2262135446Strhodesstatic isc_boolean_t
2263135446Strhodeslocal_addr_match(dns_dispatch_t *disp, isc_sockaddr_t *addr) {
2264135446Strhodes	isc_sockaddr_t sockaddr;
2265135446Strhodes	isc_result_t result;
2266135446Strhodes
2267186462Sdougb	REQUIRE(disp->socket != NULL);
2268186462Sdougb
2269135446Strhodes	if (addr == NULL)
2270135446Strhodes		return (ISC_TRUE);
2271135446Strhodes
2272135446Strhodes	/*
2273186462Sdougb	 * Don't match wildcard ports unless the port is available in the
2274186462Sdougb	 * current configuration.
2275135446Strhodes	 */
2276186462Sdougb	if (isc_sockaddr_getport(addr) == 0 &&
2277135446Strhodes	    isc_sockaddr_getport(&disp->local) == 0 &&
2278186462Sdougb	    !portavailable(disp->mgr, disp->socket, NULL)) {
2279135446Strhodes		return (ISC_FALSE);
2280186462Sdougb	}
2281135446Strhodes
2282135446Strhodes	/*
2283135446Strhodes	 * Check if we match the binding <address,port>.
2284135446Strhodes	 * Wildcard ports match/fail here.
2285135446Strhodes	 */
2286135446Strhodes	if (isc_sockaddr_equal(&disp->local, addr))
2287135446Strhodes		return (ISC_TRUE);
2288135446Strhodes	if (isc_sockaddr_getport(addr) == 0)
2289135446Strhodes		return (ISC_FALSE);
2290135446Strhodes
2291135446Strhodes	/*
2292135446Strhodes	 * Check if we match a bound wildcard port <address,port>.
2293135446Strhodes	 */
2294135446Strhodes	if (!isc_sockaddr_eqaddr(&disp->local, addr))
2295135446Strhodes		return (ISC_FALSE);
2296135446Strhodes	result = isc_socket_getsockname(disp->socket, &sockaddr);
2297135446Strhodes	if (result != ISC_R_SUCCESS)
2298135446Strhodes		return (ISC_FALSE);
2299135446Strhodes
2300135446Strhodes	return (isc_sockaddr_equal(&sockaddr, addr));
2301135446Strhodes}
2302135446Strhodes
2303135446Strhodes/*
2304135446Strhodes * Requires mgr be locked.
2305135446Strhodes *
2306135446Strhodes * No dispatcher can be locked by this thread when calling this function.
2307135446Strhodes *
2308135446Strhodes *
2309135446Strhodes * NOTE:
2310135446Strhodes *	If a matching dispatcher is found, it is locked after this function
2311135446Strhodes *	returns, and must be unlocked by the caller.
2312135446Strhodes */
2313135446Strhodesstatic isc_result_t
2314135446Strhodesdispatch_find(dns_dispatchmgr_t *mgr, isc_sockaddr_t *local,
2315135446Strhodes	      unsigned int attributes, unsigned int mask,
2316135446Strhodes	      dns_dispatch_t **dispp)
2317135446Strhodes{
2318135446Strhodes	dns_dispatch_t *disp;
2319135446Strhodes	isc_result_t result;
2320135446Strhodes
2321135446Strhodes	/*
2322186462Sdougb	 * Make certain that we will not match a private or exclusive dispatch.
2323135446Strhodes	 */
2324186462Sdougb	attributes &= ~(DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
2325186462Sdougb	mask |= (DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
2326135446Strhodes
2327135446Strhodes	disp = ISC_LIST_HEAD(mgr->list);
2328135446Strhodes	while (disp != NULL) {
2329135446Strhodes		LOCK(&disp->lock);
2330135446Strhodes		if ((disp->shutting_down == 0)
2331135446Strhodes		    && ATTRMATCH(disp->attributes, attributes, mask)
2332135446Strhodes		    && local_addr_match(disp, local))
2333135446Strhodes			break;
2334135446Strhodes		UNLOCK(&disp->lock);
2335135446Strhodes		disp = ISC_LIST_NEXT(disp, link);
2336135446Strhodes	}
2337135446Strhodes
2338135446Strhodes	if (disp == NULL) {
2339135446Strhodes		result = ISC_R_NOTFOUND;
2340135446Strhodes		goto out;
2341135446Strhodes	}
2342135446Strhodes
2343135446Strhodes	*dispp = disp;
2344135446Strhodes	result = ISC_R_SUCCESS;
2345135446Strhodes out:
2346135446Strhodes
2347135446Strhodes	return (result);
2348135446Strhodes}
2349135446Strhodes
2350135446Strhodesstatic isc_result_t
2351135446Strhodesqid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
2352186462Sdougb	     unsigned int increment, dns_qid_t **qidp,
2353186462Sdougb	     isc_boolean_t needsocktable)
2354135446Strhodes{
2355135446Strhodes	dns_qid_t *qid;
2356135446Strhodes	unsigned int i;
2357170222Sdougb	isc_result_t result;
2358135446Strhodes
2359135446Strhodes	REQUIRE(VALID_DISPATCHMGR(mgr));
2360135446Strhodes	REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
2361135446Strhodes	REQUIRE(increment > buckets);
2362135446Strhodes	REQUIRE(qidp != NULL && *qidp == NULL);
2363135446Strhodes
2364135446Strhodes	qid = isc_mem_get(mgr->mctx, sizeof(*qid));
2365135446Strhodes	if (qid == NULL)
2366135446Strhodes		return (ISC_R_NOMEMORY);
2367135446Strhodes
2368135446Strhodes	qid->qid_table = isc_mem_get(mgr->mctx,
2369135446Strhodes				     buckets * sizeof(dns_displist_t));
2370135446Strhodes	if (qid->qid_table == NULL) {
2371135446Strhodes		isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2372135446Strhodes		return (ISC_R_NOMEMORY);
2373135446Strhodes	}
2374135446Strhodes
2375186462Sdougb	qid->sock_table = NULL;
2376186462Sdougb	if (needsocktable) {
2377186462Sdougb		qid->sock_table = isc_mem_get(mgr->mctx, buckets *
2378186462Sdougb					      sizeof(dispsocketlist_t));
2379186462Sdougb		if (qid->sock_table == NULL) {
2380186462Sdougb			isc_mem_put(mgr->mctx, qid->qid_table,
2381186462Sdougb				    buckets * sizeof(dns_displist_t));
2382225361Sdougb			isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2383186462Sdougb			return (ISC_R_NOMEMORY);
2384186462Sdougb		}
2385186462Sdougb	}
2386186462Sdougb
2387170222Sdougb	result = isc_mutex_init(&qid->lock);
2388170222Sdougb	if (result != ISC_R_SUCCESS) {
2389186462Sdougb		if (qid->sock_table != NULL) {
2390186462Sdougb			isc_mem_put(mgr->mctx, qid->sock_table,
2391186462Sdougb				    buckets * sizeof(dispsocketlist_t));
2392186462Sdougb		}
2393135446Strhodes		isc_mem_put(mgr->mctx, qid->qid_table,
2394135446Strhodes			    buckets * sizeof(dns_displist_t));
2395135446Strhodes		isc_mem_put(mgr->mctx, qid, sizeof(*qid));
2396170222Sdougb		return (result);
2397135446Strhodes	}
2398135446Strhodes
2399186462Sdougb	for (i = 0; i < buckets; i++) {
2400135446Strhodes		ISC_LIST_INIT(qid->qid_table[i]);
2401186462Sdougb		if (qid->sock_table != NULL)
2402186462Sdougb			ISC_LIST_INIT(qid->sock_table[i]);
2403186462Sdougb	}
2404135446Strhodes
2405135446Strhodes	qid->qid_nbuckets = buckets;
2406135446Strhodes	qid->qid_increment = increment;
2407135446Strhodes	qid->magic = QID_MAGIC;
2408135446Strhodes	*qidp = qid;
2409135446Strhodes	return (ISC_R_SUCCESS);
2410135446Strhodes}
2411135446Strhodes
2412135446Strhodesstatic void
2413135446Strhodesqid_destroy(isc_mem_t *mctx, dns_qid_t **qidp) {
2414135446Strhodes	dns_qid_t *qid;
2415135446Strhodes
2416135446Strhodes	REQUIRE(qidp != NULL);
2417135446Strhodes	qid = *qidp;
2418135446Strhodes
2419135446Strhodes	REQUIRE(VALID_QID(qid));
2420135446Strhodes
2421135446Strhodes	*qidp = NULL;
2422135446Strhodes	qid->magic = 0;
2423135446Strhodes	isc_mem_put(mctx, qid->qid_table,
2424135446Strhodes		    qid->qid_nbuckets * sizeof(dns_displist_t));
2425186462Sdougb	if (qid->sock_table != NULL) {
2426186462Sdougb		isc_mem_put(mctx, qid->sock_table,
2427186462Sdougb			    qid->qid_nbuckets * sizeof(dispsocketlist_t));
2428186462Sdougb	}
2429135446Strhodes	DESTROYLOCK(&qid->lock);
2430135446Strhodes	isc_mem_put(mctx, qid, sizeof(*qid));
2431135446Strhodes}
2432135446Strhodes
2433135446Strhodes/*
2434135446Strhodes * Allocate and set important limits.
2435135446Strhodes */
2436135446Strhodesstatic isc_result_t
2437135446Strhodesdispatch_allocate(dns_dispatchmgr_t *mgr, unsigned int maxrequests,
2438135446Strhodes		  dns_dispatch_t **dispp)
2439135446Strhodes{
2440135446Strhodes	dns_dispatch_t *disp;
2441170222Sdougb	isc_result_t result;
2442135446Strhodes
2443135446Strhodes	REQUIRE(VALID_DISPATCHMGR(mgr));
2444135446Strhodes	REQUIRE(dispp != NULL && *dispp == NULL);
2445135446Strhodes
2446135446Strhodes	/*
2447135446Strhodes	 * Set up the dispatcher, mostly.  Don't bother setting some of
2448135446Strhodes	 * the options that are controlled by tcp vs. udp, etc.
2449135446Strhodes	 */
2450135446Strhodes
2451135446Strhodes	disp = isc_mempool_get(mgr->dpool);
2452135446Strhodes	if (disp == NULL)
2453135446Strhodes		return (ISC_R_NOMEMORY);
2454135446Strhodes
2455135446Strhodes	disp->magic = 0;
2456135446Strhodes	disp->mgr = mgr;
2457135446Strhodes	disp->maxrequests = maxrequests;
2458135446Strhodes	disp->attributes = 0;
2459135446Strhodes	ISC_LINK_INIT(disp, link);
2460135446Strhodes	disp->refcount = 1;
2461135446Strhodes	disp->recv_pending = 0;
2462135446Strhodes	memset(&disp->local, 0, sizeof(disp->local));
2463180477Sdougb	disp->localport = 0;
2464135446Strhodes	disp->shutting_down = 0;
2465135446Strhodes	disp->shutdown_out = 0;
2466135446Strhodes	disp->connected = 0;
2467135446Strhodes	disp->tcpmsg_valid = 0;
2468135446Strhodes	disp->shutdown_why = ISC_R_UNEXPECTED;
2469135446Strhodes	disp->requests = 0;
2470135446Strhodes	disp->tcpbuffers = 0;
2471135446Strhodes	disp->qid = NULL;
2472186462Sdougb	ISC_LIST_INIT(disp->activesockets);
2473186462Sdougb	ISC_LIST_INIT(disp->inactivesockets);
2474186462Sdougb	disp->nsockets = 0;
2475224092Sdougb	dispatch_initrandom(&disp->arc4ctx, mgr->entropy, NULL);
2476193149Sdougb	disp->port_table = NULL;
2477193149Sdougb	disp->portpool = NULL;
2478135446Strhodes
2479170222Sdougb	result = isc_mutex_init(&disp->lock);
2480170222Sdougb	if (result != ISC_R_SUCCESS)
2481135446Strhodes		goto deallocate;
2482135446Strhodes
2483135446Strhodes	disp->failsafe_ev = allocate_event(disp);
2484135446Strhodes	if (disp->failsafe_ev == NULL) {
2485170222Sdougb		result = ISC_R_NOMEMORY;
2486135446Strhodes		goto kill_lock;
2487135446Strhodes	}
2488135446Strhodes
2489135446Strhodes	disp->magic = DISPATCH_MAGIC;
2490135446Strhodes
2491135446Strhodes	*dispp = disp;
2492135446Strhodes	return (ISC_R_SUCCESS);
2493135446Strhodes
2494135446Strhodes	/*
2495135446Strhodes	 * error returns
2496135446Strhodes	 */
2497135446Strhodes kill_lock:
2498135446Strhodes	DESTROYLOCK(&disp->lock);
2499135446Strhodes deallocate:
2500135446Strhodes	isc_mempool_put(mgr->dpool, disp);
2501135446Strhodes
2502170222Sdougb	return (result);
2503135446Strhodes}
2504135446Strhodes
2505135446Strhodes
2506135446Strhodes/*
2507193149Sdougb * MUST be unlocked, and not used by anything.
2508135446Strhodes */
2509135446Strhodesstatic void
2510135446Strhodesdispatch_free(dns_dispatch_t **dispp)
2511135446Strhodes{
2512135446Strhodes	dns_dispatch_t *disp;
2513135446Strhodes	dns_dispatchmgr_t *mgr;
2514193149Sdougb	int i;
2515135446Strhodes
2516135446Strhodes	REQUIRE(VALID_DISPATCH(*dispp));
2517135446Strhodes	disp = *dispp;
2518135446Strhodes	*dispp = NULL;
2519135446Strhodes
2520135446Strhodes	mgr = disp->mgr;
2521135446Strhodes	REQUIRE(VALID_DISPATCHMGR(mgr));
2522135446Strhodes
2523135446Strhodes	if (disp->tcpmsg_valid) {
2524135446Strhodes		dns_tcpmsg_invalidate(&disp->tcpmsg);
2525135446Strhodes		disp->tcpmsg_valid = 0;
2526135446Strhodes	}
2527135446Strhodes
2528135446Strhodes	INSIST(disp->tcpbuffers == 0);
2529135446Strhodes	INSIST(disp->requests == 0);
2530135446Strhodes	INSIST(disp->recv_pending == 0);
2531186462Sdougb	INSIST(ISC_LIST_EMPTY(disp->activesockets));
2532186462Sdougb	INSIST(ISC_LIST_EMPTY(disp->inactivesockets));
2533135446Strhodes
2534135446Strhodes	isc_mempool_put(mgr->epool, disp->failsafe_ev);
2535135446Strhodes	disp->failsafe_ev = NULL;
2536135446Strhodes
2537135446Strhodes	if (disp->qid != NULL)
2538135446Strhodes		qid_destroy(mgr->mctx, &disp->qid);
2539193149Sdougb
2540193149Sdougb	if (disp->port_table != NULL) {
2541193149Sdougb		for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++)
2542193149Sdougb			INSIST(ISC_LIST_EMPTY(disp->port_table[i]));
2543193149Sdougb		isc_mem_put(mgr->mctx, disp->port_table,
2544193149Sdougb			    sizeof(disp->port_table[0]) *
2545193149Sdougb			    DNS_DISPATCH_PORTTABLESIZE);
2546193149Sdougb	}
2547193149Sdougb
2548193149Sdougb	if (disp->portpool != NULL)
2549193149Sdougb		isc_mempool_destroy(&disp->portpool);
2550193149Sdougb
2551135446Strhodes	disp->mgr = NULL;
2552135446Strhodes	DESTROYLOCK(&disp->lock);
2553135446Strhodes	disp->magic = 0;
2554135446Strhodes	isc_mempool_put(mgr->dpool, disp);
2555135446Strhodes}
2556135446Strhodes
2557135446Strhodesisc_result_t
2558135446Strhodesdns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
2559135446Strhodes		       isc_taskmgr_t *taskmgr, unsigned int buffersize,
2560135446Strhodes		       unsigned int maxbuffers, unsigned int maxrequests,
2561135446Strhodes		       unsigned int buckets, unsigned int increment,
2562135446Strhodes		       unsigned int attributes, dns_dispatch_t **dispp)
2563135446Strhodes{
2564135446Strhodes	isc_result_t result;
2565135446Strhodes	dns_dispatch_t *disp;
2566135446Strhodes
2567135446Strhodes	UNUSED(maxbuffers);
2568135446Strhodes	UNUSED(buffersize);
2569135446Strhodes
2570135446Strhodes	REQUIRE(VALID_DISPATCHMGR(mgr));
2571135446Strhodes	REQUIRE(isc_socket_gettype(sock) == isc_sockettype_tcp);
2572135446Strhodes	REQUIRE((attributes & DNS_DISPATCHATTR_TCP) != 0);
2573135446Strhodes	REQUIRE((attributes & DNS_DISPATCHATTR_UDP) == 0);
2574135446Strhodes
2575135446Strhodes	attributes |= DNS_DISPATCHATTR_PRIVATE;  /* XXXMLG */
2576135446Strhodes
2577135446Strhodes	LOCK(&mgr->lock);
2578135446Strhodes
2579135446Strhodes	/*
2580135446Strhodes	 * dispatch_allocate() checks mgr for us.
2581135446Strhodes	 * qid_allocate() checks buckets and increment for us.
2582135446Strhodes	 */
2583135446Strhodes	disp = NULL;
2584135446Strhodes	result = dispatch_allocate(mgr, maxrequests, &disp);
2585135446Strhodes	if (result != ISC_R_SUCCESS) {
2586135446Strhodes		UNLOCK(&mgr->lock);
2587135446Strhodes		return (result);
2588135446Strhodes	}
2589135446Strhodes
2590186462Sdougb	result = qid_allocate(mgr, buckets, increment, &disp->qid, ISC_FALSE);
2591135446Strhodes	if (result != ISC_R_SUCCESS)
2592135446Strhodes		goto deallocate_dispatch;
2593135446Strhodes
2594135446Strhodes	disp->socktype = isc_sockettype_tcp;
2595135446Strhodes	disp->socket = NULL;
2596135446Strhodes	isc_socket_attach(sock, &disp->socket);
2597135446Strhodes
2598186462Sdougb	disp->ntasks = 1;
2599186462Sdougb	disp->task[0] = NULL;
2600186462Sdougb	result = isc_task_create(taskmgr, 0, &disp->task[0]);
2601135446Strhodes	if (result != ISC_R_SUCCESS)
2602135446Strhodes		goto kill_socket;
2603135446Strhodes
2604135446Strhodes	disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
2605135446Strhodes					    DNS_EVENT_DISPATCHCONTROL,
2606135446Strhodes					    destroy_disp, disp,
2607135446Strhodes					    sizeof(isc_event_t));
2608174187Sdougb	if (disp->ctlevent == NULL) {
2609174187Sdougb		result = ISC_R_NOMEMORY;
2610135446Strhodes		goto kill_task;
2611174187Sdougb	}
2612135446Strhodes
2613186462Sdougb	isc_task_setname(disp->task[0], "tcpdispatch", disp);
2614135446Strhodes
2615135446Strhodes	dns_tcpmsg_init(mgr->mctx, disp->socket, &disp->tcpmsg);
2616135446Strhodes	disp->tcpmsg_valid = 1;
2617135446Strhodes
2618135446Strhodes	disp->attributes = attributes;
2619135446Strhodes
2620135446Strhodes	/*
2621135446Strhodes	 * Append it to the dispatcher list.
2622135446Strhodes	 */
2623135446Strhodes	ISC_LIST_APPEND(mgr->list, disp, link);
2624135446Strhodes	UNLOCK(&mgr->lock);
2625135446Strhodes
2626135446Strhodes	mgr_log(mgr, LVL(90), "created TCP dispatcher %p", disp);
2627186462Sdougb	dispatch_log(disp, LVL(90), "created task %p", disp->task[0]);
2628135446Strhodes
2629135446Strhodes	*dispp = disp;
2630135446Strhodes
2631135446Strhodes	return (ISC_R_SUCCESS);
2632135446Strhodes
2633135446Strhodes	/*
2634135446Strhodes	 * Error returns.
2635135446Strhodes	 */
2636135446Strhodes kill_task:
2637186462Sdougb	isc_task_detach(&disp->task[0]);
2638135446Strhodes kill_socket:
2639135446Strhodes	isc_socket_detach(&disp->socket);
2640135446Strhodes deallocate_dispatch:
2641135446Strhodes	dispatch_free(&disp);
2642135446Strhodes
2643135446Strhodes	UNLOCK(&mgr->lock);
2644135446Strhodes
2645135446Strhodes	return (result);
2646135446Strhodes}
2647135446Strhodes
2648135446Strhodesisc_result_t
2649135446Strhodesdns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2650135446Strhodes		    isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr,
2651135446Strhodes		    unsigned int buffersize,
2652135446Strhodes		    unsigned int maxbuffers, unsigned int maxrequests,
2653135446Strhodes		    unsigned int buckets, unsigned int increment,
2654135446Strhodes		    unsigned int attributes, unsigned int mask,
2655135446Strhodes		    dns_dispatch_t **dispp)
2656135446Strhodes{
2657135446Strhodes	isc_result_t result;
2658180477Sdougb	dns_dispatch_t *disp = NULL;
2659135446Strhodes
2660135446Strhodes	REQUIRE(VALID_DISPATCHMGR(mgr));
2661135446Strhodes	REQUIRE(sockmgr != NULL);
2662135446Strhodes	REQUIRE(localaddr != NULL);
2663135446Strhodes	REQUIRE(taskmgr != NULL);
2664135446Strhodes	REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
2665135446Strhodes	REQUIRE(maxbuffers > 0);
2666135446Strhodes	REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
2667135446Strhodes	REQUIRE(increment > buckets);
2668135446Strhodes	REQUIRE(dispp != NULL && *dispp == NULL);
2669135446Strhodes	REQUIRE((attributes & DNS_DISPATCHATTR_TCP) == 0);
2670135446Strhodes
2671135446Strhodes	result = dns_dispatchmgr_setudp(mgr, buffersize, maxbuffers,
2672186462Sdougb					maxrequests, buckets, increment);
2673135446Strhodes	if (result != ISC_R_SUCCESS)
2674135446Strhodes		return (result);
2675135446Strhodes
2676135446Strhodes	LOCK(&mgr->lock);
2677135446Strhodes
2678186462Sdougb	if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
2679180477Sdougb		REQUIRE(isc_sockaddr_getport(localaddr) == 0);
2680180477Sdougb		goto createudp;
2681180477Sdougb	}
2682180477Sdougb
2683135446Strhodes	/*
2684193149Sdougb	 * See if we have a dispatcher that matches.
2685135446Strhodes	 */
2686135446Strhodes	result = dispatch_find(mgr, localaddr, attributes, mask, &disp);
2687135446Strhodes	if (result == ISC_R_SUCCESS) {
2688135446Strhodes		disp->refcount++;
2689135446Strhodes
2690135446Strhodes		if (disp->maxrequests < maxrequests)
2691135446Strhodes			disp->maxrequests = maxrequests;
2692135446Strhodes
2693135446Strhodes		if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0 &&
2694135446Strhodes		    (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
2695135446Strhodes		{
2696135446Strhodes			disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
2697135446Strhodes			if (disp->recv_pending != 0)
2698186462Sdougb				isc_socket_cancel(disp->socket, disp->task[0],
2699135446Strhodes						  ISC_SOCKCANCEL_RECV);
2700135446Strhodes		}
2701135446Strhodes
2702135446Strhodes		UNLOCK(&disp->lock);
2703135446Strhodes		UNLOCK(&mgr->lock);
2704135446Strhodes
2705135446Strhodes		*dispp = disp;
2706135446Strhodes
2707135446Strhodes		return (ISC_R_SUCCESS);
2708135446Strhodes	}
2709135446Strhodes
2710180477Sdougb createudp:
2711135446Strhodes	/*
2712135446Strhodes	 * Nope, create one.
2713135446Strhodes	 */
2714135446Strhodes	result = dispatch_createudp(mgr, sockmgr, taskmgr, localaddr,
2715135446Strhodes				    maxrequests, attributes, &disp);
2716135446Strhodes	if (result != ISC_R_SUCCESS) {
2717135446Strhodes		UNLOCK(&mgr->lock);
2718135446Strhodes		return (result);
2719135446Strhodes	}
2720135446Strhodes
2721135446Strhodes	UNLOCK(&mgr->lock);
2722135446Strhodes	*dispp = disp;
2723135446Strhodes	return (ISC_R_SUCCESS);
2724135446Strhodes}
2725135446Strhodes
2726135446Strhodes/*
2727135446Strhodes * mgr should be locked.
2728135446Strhodes */
2729165071Sdougb
2730165071Sdougb#ifndef DNS_DISPATCH_HELD
2731165071Sdougb#define DNS_DISPATCH_HELD 20U
2732165071Sdougb#endif
2733165071Sdougb
2734135446Strhodesstatic isc_result_t
2735186462Sdougbget_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp,
2736186462Sdougb	      isc_socketmgr_t *sockmgr, isc_sockaddr_t *localaddr,
2737186462Sdougb	      isc_socket_t **sockp)
2738186462Sdougb{
2739186462Sdougb	unsigned int i, j;
2740186462Sdougb	isc_socket_t *held[DNS_DISPATCH_HELD];
2741186462Sdougb	isc_sockaddr_t localaddr_bound;
2742186462Sdougb	isc_socket_t *sock = NULL;
2743186462Sdougb	isc_result_t result = ISC_R_SUCCESS;
2744186462Sdougb	isc_boolean_t anyport;
2745186462Sdougb
2746186462Sdougb	INSIST(sockp != NULL && *sockp == NULL);
2747186462Sdougb
2748186462Sdougb	localaddr_bound = *localaddr;
2749186462Sdougb	anyport = ISC_TF(isc_sockaddr_getport(localaddr) == 0);
2750186462Sdougb
2751186462Sdougb	if (anyport) {
2752186462Sdougb		unsigned int nports;
2753186462Sdougb		in_port_t *ports;
2754186462Sdougb
2755186462Sdougb		/*
2756186462Sdougb		 * If no port is specified, we first try to pick up a random
2757186462Sdougb		 * port by ourselves.
2758186462Sdougb		 */
2759254402Serwin		if (isc_sockaddr_pf(localaddr) == AF_INET) {
2760186462Sdougb			nports = disp->mgr->nv4ports;
2761186462Sdougb			ports = disp->mgr->v4ports;
2762186462Sdougb		} else {
2763186462Sdougb			nports = disp->mgr->nv6ports;
2764186462Sdougb			ports = disp->mgr->v6ports;
2765186462Sdougb		}
2766186462Sdougb		if (nports == 0)
2767186462Sdougb			return (ISC_R_ADDRNOTAVAIL);
2768186462Sdougb
2769186462Sdougb		for (i = 0; i < 1024; i++) {
2770186462Sdougb			in_port_t prt;
2771186462Sdougb
2772224092Sdougb			prt = ports[dispatch_uniformrandom(
2773186462Sdougb					DISP_ARC4CTX(disp),
2774186462Sdougb					nports)];
2775186462Sdougb			isc_sockaddr_setport(&localaddr_bound, prt);
2776186462Sdougb			result = open_socket(sockmgr, &localaddr_bound,
2777186462Sdougb					     0, &sock);
2778254402Serwin			/*
2779254402Serwin			 * Continue if the port choosen is already in use
2780254402Serwin			 * or the OS has reserved it.
2781254402Serwin			 */
2782254402Serwin			if (result == ISC_R_NOPERM ||
2783254402Serwin			    result == ISC_R_ADDRINUSE)
2784254402Serwin				continue;
2785254402Serwin			disp->localport = prt;
2786254402Serwin			*sockp = sock;
2787254402Serwin			return (result);
2788186462Sdougb		}
2789186462Sdougb
2790186462Sdougb		/*
2791186462Sdougb		 * If this fails 1024 times, we then ask the kernel for
2792186462Sdougb		 * choosing one.
2793186462Sdougb		 */
2794193149Sdougb	} else {
2795193149Sdougb		/* Allow to reuse address for non-random ports. */
2796193149Sdougb		result = open_socket(sockmgr, localaddr,
2797193149Sdougb				     ISC_SOCKET_REUSEADDRESS, &sock);
2798193149Sdougb
2799193149Sdougb		if (result == ISC_R_SUCCESS)
2800193149Sdougb			*sockp = sock;
2801193149Sdougb
2802193149Sdougb		return (result);
2803186462Sdougb	}
2804186462Sdougb
2805186462Sdougb	memset(held, 0, sizeof(held));
2806186462Sdougb	i = 0;
2807186462Sdougb
2808186462Sdougb	for (j = 0; j < 0xffffU; j++) {
2809186462Sdougb		result = open_socket(sockmgr, localaddr, 0, &sock);
2810186462Sdougb		if (result != ISC_R_SUCCESS)
2811186462Sdougb			goto end;
2812186462Sdougb		else if (portavailable(mgr, sock, NULL))
2813186462Sdougb			break;
2814186462Sdougb		if (held[i] != NULL)
2815186462Sdougb			isc_socket_detach(&held[i]);
2816186462Sdougb		held[i++] = sock;
2817186462Sdougb		sock = NULL;
2818186462Sdougb		if (i == DNS_DISPATCH_HELD)
2819186462Sdougb			i = 0;
2820186462Sdougb	}
2821186462Sdougb	if (j == 0xffffU) {
2822186462Sdougb		mgr_log(mgr, ISC_LOG_ERROR,
2823186462Sdougb			"avoid-v%s-udp-ports: unable to allocate "
2824186462Sdougb			"an available port",
2825186462Sdougb			isc_sockaddr_pf(localaddr) == AF_INET ? "4" : "6");
2826186462Sdougb		result = ISC_R_FAILURE;
2827186462Sdougb		goto end;
2828186462Sdougb	}
2829186462Sdougb	*sockp = sock;
2830186462Sdougb
2831186462Sdougbend:
2832186462Sdougb	for (i = 0; i < DNS_DISPATCH_HELD; i++) {
2833186462Sdougb		if (held[i] != NULL)
2834186462Sdougb			isc_socket_detach(&held[i]);
2835186462Sdougb	}
2836186462Sdougb
2837186462Sdougb	return (result);
2838186462Sdougb}
2839186462Sdougb
2840186462Sdougbstatic isc_result_t
2841135446Strhodesdispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
2842135446Strhodes		   isc_taskmgr_t *taskmgr,
2843135446Strhodes		   isc_sockaddr_t *localaddr,
2844135446Strhodes		   unsigned int maxrequests,
2845135446Strhodes		   unsigned int attributes,
2846135446Strhodes		   dns_dispatch_t **dispp)
2847135446Strhodes{
2848135446Strhodes	isc_result_t result;
2849135446Strhodes	dns_dispatch_t *disp;
2850165071Sdougb	isc_socket_t *sock = NULL;
2851186462Sdougb	int i = 0;
2852135446Strhodes
2853135446Strhodes	/*
2854135446Strhodes	 * dispatch_allocate() checks mgr for us.
2855135446Strhodes	 */
2856135446Strhodes	disp = NULL;
2857135446Strhodes	result = dispatch_allocate(mgr, maxrequests, &disp);
2858135446Strhodes	if (result != ISC_R_SUCCESS)
2859135446Strhodes		return (result);
2860135446Strhodes
2861186462Sdougb	if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0) {
2862186462Sdougb		result = get_udpsocket(mgr, disp, sockmgr, localaddr, &sock);
2863186462Sdougb		if (result != ISC_R_SUCCESS)
2864186462Sdougb			goto deallocate_dispatch;
2865186462Sdougb	} else {
2866186462Sdougb		isc_sockaddr_t sa_any;
2867180477Sdougb
2868186462Sdougb		/*
2869186462Sdougb		 * For dispatches using exclusive sockets with a specific
2870186462Sdougb		 * source address, we only check if the specified address is
2871186462Sdougb		 * available on the system.  Query sockets will be created later
2872186462Sdougb		 * on demand.
2873186462Sdougb		 */
2874186462Sdougb		isc_sockaddr_anyofpf(&sa_any, isc_sockaddr_pf(localaddr));
2875186462Sdougb		if (!isc_sockaddr_eqaddr(&sa_any, localaddr)) {
2876186462Sdougb			result = open_socket(sockmgr, localaddr, 0, &sock);
2877186462Sdougb			if (sock != NULL)
2878186462Sdougb				isc_socket_detach(&sock);
2879186462Sdougb			if (result != ISC_R_SUCCESS)
2880186462Sdougb				goto deallocate_dispatch;
2881180477Sdougb		}
2882193149Sdougb
2883193149Sdougb		disp->port_table = isc_mem_get(mgr->mctx,
2884193149Sdougb					       sizeof(disp->port_table[0]) *
2885193149Sdougb					       DNS_DISPATCH_PORTTABLESIZE);
2886193149Sdougb		if (disp->port_table == NULL)
2887193149Sdougb			goto deallocate_dispatch;
2888193149Sdougb		for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++)
2889193149Sdougb			ISC_LIST_INIT(disp->port_table[i]);
2890193149Sdougb
2891193149Sdougb		result = isc_mempool_create(mgr->mctx, sizeof(dispportentry_t),
2892193149Sdougb					    &disp->portpool);
2893193149Sdougb		if (result != ISC_R_SUCCESS)
2894193149Sdougb			goto deallocate_dispatch;
2895193149Sdougb		isc_mempool_setname(disp->portpool, "disp_portpool");
2896193149Sdougb		isc_mempool_setfreemax(disp->portpool, 128);
2897135446Strhodes	}
2898135446Strhodes	disp->socktype = isc_sockettype_udp;
2899135446Strhodes	disp->socket = sock;
2900135446Strhodes	disp->local = *localaddr;
2901135446Strhodes
2902186462Sdougb	if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
2903186462Sdougb		disp->ntasks = MAX_INTERNAL_TASKS;
2904186462Sdougb	else
2905186462Sdougb		disp->ntasks = 1;
2906186462Sdougb	for (i = 0; i < disp->ntasks; i++) {
2907186462Sdougb		disp->task[i] = NULL;
2908186462Sdougb		result = isc_task_create(taskmgr, 0, &disp->task[i]);
2909186462Sdougb		if (result != ISC_R_SUCCESS) {
2910224092Sdougb			while (--i >= 0) {
2911224092Sdougb				isc_task_shutdown(disp->task[i]);
2912224092Sdougb				isc_task_detach(&disp->task[i]);
2913224092Sdougb			}
2914186462Sdougb			goto kill_socket;
2915186462Sdougb		}
2916186462Sdougb		isc_task_setname(disp->task[i], "udpdispatch", disp);
2917186462Sdougb	}
2918135446Strhodes
2919135446Strhodes	disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
2920135446Strhodes					    DNS_EVENT_DISPATCHCONTROL,
2921135446Strhodes					    destroy_disp, disp,
2922135446Strhodes					    sizeof(isc_event_t));
2923174187Sdougb	if (disp->ctlevent == NULL) {
2924174187Sdougb		result = ISC_R_NOMEMORY;
2925135446Strhodes		goto kill_task;
2926174187Sdougb	}
2927135446Strhodes
2928135446Strhodes	attributes &= ~DNS_DISPATCHATTR_TCP;
2929135446Strhodes	attributes |= DNS_DISPATCHATTR_UDP;
2930135446Strhodes	disp->attributes = attributes;
2931135446Strhodes
2932135446Strhodes	/*
2933135446Strhodes	 * Append it to the dispatcher list.
2934135446Strhodes	 */
2935135446Strhodes	ISC_LIST_APPEND(mgr->list, disp, link);
2936135446Strhodes
2937135446Strhodes	mgr_log(mgr, LVL(90), "created UDP dispatcher %p", disp);
2938186462Sdougb	dispatch_log(disp, LVL(90), "created task %p", disp->task[0]); /* XXX */
2939186462Sdougb	if (disp->socket != NULL)
2940186462Sdougb		dispatch_log(disp, LVL(90), "created socket %p", disp->socket);
2941135446Strhodes
2942135446Strhodes	*dispp = disp;
2943186462Sdougb	return (result);
2944135446Strhodes
2945135446Strhodes	/*
2946135446Strhodes	 * Error returns.
2947135446Strhodes	 */
2948135446Strhodes kill_task:
2949186462Sdougb	for (i = 0; i < disp->ntasks; i++)
2950186462Sdougb		isc_task_detach(&disp->task[i]);
2951135446Strhodes kill_socket:
2952186462Sdougb	if (disp->socket != NULL)
2953186462Sdougb		isc_socket_detach(&disp->socket);
2954135446Strhodes deallocate_dispatch:
2955135446Strhodes	dispatch_free(&disp);
2956186462Sdougb
2957135446Strhodes	return (result);
2958135446Strhodes}
2959135446Strhodes
2960135446Strhodesvoid
2961135446Strhodesdns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp) {
2962135446Strhodes	REQUIRE(VALID_DISPATCH(disp));
2963135446Strhodes	REQUIRE(dispp != NULL && *dispp == NULL);
2964135446Strhodes
2965135446Strhodes	LOCK(&disp->lock);
2966135446Strhodes	disp->refcount++;
2967135446Strhodes	UNLOCK(&disp->lock);
2968135446Strhodes
2969135446Strhodes	*dispp = disp;
2970135446Strhodes}
2971135446Strhodes
2972135446Strhodes/*
2973135446Strhodes * It is important to lock the manager while we are deleting the dispatch,
2974135446Strhodes * since dns_dispatch_getudp will call dispatch_find, which returns to
2975135446Strhodes * the caller a dispatch but does not attach to it until later.  _getudp
2976135446Strhodes * locks the manager, however, so locking it here will keep us from attaching
2977135446Strhodes * to a dispatcher that is in the process of going away.
2978135446Strhodes */
2979135446Strhodesvoid
2980135446Strhodesdns_dispatch_detach(dns_dispatch_t **dispp) {
2981135446Strhodes	dns_dispatch_t *disp;
2982186462Sdougb	dispsocket_t *dispsock;
2983135446Strhodes	isc_boolean_t killit;
2984135446Strhodes
2985135446Strhodes	REQUIRE(dispp != NULL && VALID_DISPATCH(*dispp));
2986135446Strhodes
2987135446Strhodes	disp = *dispp;
2988135446Strhodes	*dispp = NULL;
2989135446Strhodes
2990135446Strhodes	LOCK(&disp->lock);
2991135446Strhodes
2992135446Strhodes	INSIST(disp->refcount > 0);
2993135446Strhodes	disp->refcount--;
2994135446Strhodes	if (disp->refcount == 0) {
2995135446Strhodes		if (disp->recv_pending > 0)
2996186462Sdougb			isc_socket_cancel(disp->socket, disp->task[0],
2997135446Strhodes					  ISC_SOCKCANCEL_RECV);
2998186462Sdougb		for (dispsock = ISC_LIST_HEAD(disp->activesockets);
2999186462Sdougb		     dispsock != NULL;
3000186462Sdougb		     dispsock = ISC_LIST_NEXT(dispsock, link)) {
3001186462Sdougb			isc_socket_cancel(dispsock->socket, dispsock->task,
3002186462Sdougb					  ISC_SOCKCANCEL_RECV);
3003186462Sdougb		}
3004135446Strhodes		disp->shutting_down = 1;
3005135446Strhodes	}
3006135446Strhodes
3007135446Strhodes	dispatch_log(disp, LVL(90), "detach: refcount %d", disp->refcount);
3008135446Strhodes
3009135446Strhodes	killit = destroy_disp_ok(disp);
3010135446Strhodes	UNLOCK(&disp->lock);
3011135446Strhodes	if (killit)
3012186462Sdougb		isc_task_send(disp->task[0], &disp->ctlevent);
3013135446Strhodes}
3014135446Strhodes
3015135446Strhodesisc_result_t
3016186462Sdougbdns_dispatch_addresponse2(dns_dispatch_t *disp, isc_sockaddr_t *dest,
3017186462Sdougb			  isc_task_t *task, isc_taskaction_t action, void *arg,
3018186462Sdougb			  dns_messageid_t *idp, dns_dispentry_t **resp,
3019186462Sdougb			  isc_socketmgr_t *sockmgr)
3020135446Strhodes{
3021135446Strhodes	dns_dispentry_t *res;
3022135446Strhodes	unsigned int bucket;
3023186462Sdougb	in_port_t localport = 0;
3024135446Strhodes	dns_messageid_t id;
3025135446Strhodes	int i;
3026135446Strhodes	isc_boolean_t ok;
3027135446Strhodes	dns_qid_t *qid;
3028186462Sdougb	dispsocket_t *dispsocket = NULL;
3029186462Sdougb	isc_result_t result;
3030135446Strhodes
3031135446Strhodes	REQUIRE(VALID_DISPATCH(disp));
3032135446Strhodes	REQUIRE(task != NULL);
3033135446Strhodes	REQUIRE(dest != NULL);
3034135446Strhodes	REQUIRE(resp != NULL && *resp == NULL);
3035135446Strhodes	REQUIRE(idp != NULL);
3036186462Sdougb	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3037186462Sdougb		REQUIRE(sockmgr != NULL);
3038135446Strhodes
3039135446Strhodes	LOCK(&disp->lock);
3040135446Strhodes
3041135446Strhodes	if (disp->shutting_down == 1) {
3042135446Strhodes		UNLOCK(&disp->lock);
3043135446Strhodes		return (ISC_R_SHUTTINGDOWN);
3044135446Strhodes	}
3045135446Strhodes
3046135446Strhodes	if (disp->requests >= disp->maxrequests) {
3047135446Strhodes		UNLOCK(&disp->lock);
3048135446Strhodes		return (ISC_R_QUOTA);
3049135446Strhodes	}
3050135446Strhodes
3051186462Sdougb	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
3052186462Sdougb	    disp->nsockets > DNS_DISPATCH_SOCKSQUOTA) {
3053186462Sdougb		dispsocket_t *oldestsocket;
3054186462Sdougb		dns_dispentry_t *oldestresp;
3055186462Sdougb		dns_dispatchevent_t *rev;
3056186462Sdougb
3057186462Sdougb		/*
3058186462Sdougb		 * Kill oldest outstanding query if the number of sockets
3059186462Sdougb		 * exceeds the quota to keep the room for new queries.
3060186462Sdougb		 */
3061186462Sdougb		oldestsocket = ISC_LIST_HEAD(disp->activesockets);
3062186462Sdougb		oldestresp = oldestsocket->resp;
3063186462Sdougb		if (oldestresp != NULL && !oldestresp->item_out) {
3064186462Sdougb			rev = allocate_event(oldestresp->disp);
3065186462Sdougb			if (rev != NULL) {
3066186462Sdougb				rev->buffer.base = NULL;
3067186462Sdougb				rev->result = ISC_R_CANCELED;
3068186462Sdougb				rev->id = oldestresp->id;
3069186462Sdougb				ISC_EVENT_INIT(rev, sizeof(*rev), 0,
3070186462Sdougb					       NULL, DNS_EVENT_DISPATCH,
3071186462Sdougb					       oldestresp->action,
3072186462Sdougb					       oldestresp->arg, oldestresp,
3073186462Sdougb					       NULL, NULL);
3074186462Sdougb				oldestresp->item_out = ISC_TRUE;
3075186462Sdougb				isc_task_send(oldestresp->task,
3076186462Sdougb					      ISC_EVENT_PTR(&rev));
3077193149Sdougb				inc_stats(disp->mgr,
3078193149Sdougb					  dns_resstatscounter_dispabort);
3079186462Sdougb			}
3080186462Sdougb		}
3081186462Sdougb
3082186462Sdougb		/*
3083186462Sdougb		 * Move this entry to the tail so that it won't (easily) be
3084186462Sdougb		 * examined before actually being canceled.
3085186462Sdougb		 */
3086186462Sdougb		ISC_LIST_UNLINK(disp->activesockets, oldestsocket, link);
3087186462Sdougb		ISC_LIST_APPEND(disp->activesockets, oldestsocket, link);
3088186462Sdougb	}
3089186462Sdougb
3090186462Sdougb	qid = DNS_QID(disp);
3091186462Sdougb	LOCK(&qid->lock);
3092186462Sdougb
3093186462Sdougb	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
3094186462Sdougb		/*
3095186462Sdougb		 * Get a separate UDP socket with a random port number.
3096186462Sdougb		 */
3097186462Sdougb		result = get_dispsocket(disp, dest, sockmgr, qid, &dispsocket,
3098186462Sdougb					&localport);
3099186462Sdougb		if (result != ISC_R_SUCCESS) {
3100186462Sdougb			UNLOCK(&qid->lock);
3101186462Sdougb			UNLOCK(&disp->lock);
3102193149Sdougb			inc_stats(disp->mgr, dns_resstatscounter_dispsockfail);
3103186462Sdougb			return (result);
3104186462Sdougb		}
3105186462Sdougb	} else {
3106186462Sdougb		localport = disp->localport;
3107186462Sdougb	}
3108186462Sdougb
3109135446Strhodes	/*
3110135446Strhodes	 * Try somewhat hard to find an unique ID.
3111135446Strhodes	 */
3112224092Sdougb	id = (dns_messageid_t)dispatch_random(DISP_ARC4CTX(disp));
3113186462Sdougb	bucket = dns_hash(qid, dest, id, localport);
3114135446Strhodes	ok = ISC_FALSE;
3115135446Strhodes	for (i = 0; i < 64; i++) {
3116186462Sdougb		if (entry_search(qid, dest, id, localport, bucket) == NULL) {
3117135446Strhodes			ok = ISC_TRUE;
3118135446Strhodes			break;
3119135446Strhodes		}
3120135446Strhodes		id += qid->qid_increment;
3121135446Strhodes		id &= 0x0000ffff;
3122186462Sdougb		bucket = dns_hash(qid, dest, id, localport);
3123135446Strhodes	}
3124135446Strhodes
3125135446Strhodes	if (!ok) {
3126135446Strhodes		UNLOCK(&qid->lock);
3127135446Strhodes		UNLOCK(&disp->lock);
3128135446Strhodes		return (ISC_R_NOMORE);
3129135446Strhodes	}
3130135446Strhodes
3131135446Strhodes	res = isc_mempool_get(disp->mgr->rpool);
3132135446Strhodes	if (res == NULL) {
3133135446Strhodes		UNLOCK(&qid->lock);
3134135446Strhodes		UNLOCK(&disp->lock);
3135186462Sdougb		if (dispsocket != NULL)
3136186462Sdougb			destroy_dispsocket(disp, &dispsocket);
3137135446Strhodes		return (ISC_R_NOMEMORY);
3138135446Strhodes	}
3139135446Strhodes
3140135446Strhodes	disp->refcount++;
3141135446Strhodes	disp->requests++;
3142135446Strhodes	res->task = NULL;
3143135446Strhodes	isc_task_attach(task, &res->task);
3144135446Strhodes	res->disp = disp;
3145135446Strhodes	res->id = id;
3146186462Sdougb	res->port = localport;
3147135446Strhodes	res->bucket = bucket;
3148135446Strhodes	res->host = *dest;
3149135446Strhodes	res->action = action;
3150135446Strhodes	res->arg = arg;
3151186462Sdougb	res->dispsocket = dispsocket;
3152186462Sdougb	if (dispsocket != NULL)
3153186462Sdougb		dispsocket->resp = res;
3154135446Strhodes	res->item_out = ISC_FALSE;
3155135446Strhodes	ISC_LIST_INIT(res->items);
3156135446Strhodes	ISC_LINK_INIT(res, link);
3157135446Strhodes	res->magic = RESPONSE_MAGIC;
3158135446Strhodes	ISC_LIST_APPEND(qid->qid_table[bucket], res, link);
3159135446Strhodes	UNLOCK(&qid->lock);
3160135446Strhodes
3161135446Strhodes	request_log(disp, res, LVL(90),
3162135446Strhodes		    "attached to task %p", res->task);
3163135446Strhodes
3164135446Strhodes	if (((disp->attributes & DNS_DISPATCHATTR_UDP) != 0) ||
3165186462Sdougb	    ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) != 0)) {
3166186462Sdougb		result = startrecv(disp, dispsocket);
3167186462Sdougb		if (result != ISC_R_SUCCESS) {
3168186462Sdougb			LOCK(&qid->lock);
3169186462Sdougb			ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
3170186462Sdougb			UNLOCK(&qid->lock);
3171135446Strhodes
3172186462Sdougb			if (dispsocket != NULL)
3173186462Sdougb				destroy_dispsocket(disp, &dispsocket);
3174186462Sdougb
3175186462Sdougb			disp->refcount--;
3176186462Sdougb			disp->requests--;
3177186462Sdougb
3178186462Sdougb			UNLOCK(&disp->lock);
3179186462Sdougb			isc_task_detach(&res->task);
3180186462Sdougb			isc_mempool_put(disp->mgr->rpool, res);
3181186462Sdougb			return (result);
3182186462Sdougb		}
3183186462Sdougb	}
3184186462Sdougb
3185186462Sdougb	if (dispsocket != NULL)
3186186462Sdougb		ISC_LIST_APPEND(disp->activesockets, dispsocket, link);
3187186462Sdougb
3188135446Strhodes	UNLOCK(&disp->lock);
3189135446Strhodes
3190135446Strhodes	*idp = id;
3191135446Strhodes	*resp = res;
3192135446Strhodes
3193186462Sdougb	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
3194186462Sdougb		INSIST(res->dispsocket != NULL);
3195186462Sdougb
3196135446Strhodes	return (ISC_R_SUCCESS);
3197135446Strhodes}
3198135446Strhodes
3199186462Sdougbisc_result_t
3200186462Sdougbdns_dispatch_addresponse(dns_dispatch_t *disp, isc_sockaddr_t *dest,
3201186462Sdougb			 isc_task_t *task, isc_taskaction_t action, void *arg,
3202186462Sdougb			 dns_messageid_t *idp, dns_dispentry_t **resp)
3203186462Sdougb{
3204186462Sdougb	REQUIRE(VALID_DISPATCH(disp));
3205186462Sdougb	REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
3206186462Sdougb
3207186462Sdougb	return (dns_dispatch_addresponse2(disp, dest, task, action, arg,
3208186462Sdougb					  idp, resp, NULL));
3209186462Sdougb}
3210186462Sdougb
3211135446Strhodesvoid
3212135446Strhodesdns_dispatch_starttcp(dns_dispatch_t *disp) {
3213135446Strhodes
3214135446Strhodes	REQUIRE(VALID_DISPATCH(disp));
3215135446Strhodes
3216186462Sdougb	dispatch_log(disp, LVL(90), "starttcp %p", disp->task[0]);
3217135446Strhodes
3218135446Strhodes	LOCK(&disp->lock);
3219135446Strhodes	disp->attributes |= DNS_DISPATCHATTR_CONNECTED;
3220186462Sdougb	(void)startrecv(disp, NULL);
3221135446Strhodes	UNLOCK(&disp->lock);
3222135446Strhodes}
3223135446Strhodes
3224135446Strhodesvoid
3225135446Strhodesdns_dispatch_removeresponse(dns_dispentry_t **resp,
3226135446Strhodes			    dns_dispatchevent_t **sockevent)
3227135446Strhodes{
3228135446Strhodes	dns_dispatchmgr_t *mgr;
3229135446Strhodes	dns_dispatch_t *disp;
3230135446Strhodes	dns_dispentry_t *res;
3231186462Sdougb	dispsocket_t *dispsock;
3232135446Strhodes	dns_dispatchevent_t *ev;
3233135446Strhodes	unsigned int bucket;
3234135446Strhodes	isc_boolean_t killit;
3235135446Strhodes	unsigned int n;
3236135446Strhodes	isc_eventlist_t events;
3237135446Strhodes	dns_qid_t *qid;
3238135446Strhodes
3239135446Strhodes	REQUIRE(resp != NULL);
3240135446Strhodes	REQUIRE(VALID_RESPONSE(*resp));
3241135446Strhodes
3242135446Strhodes	res = *resp;
3243135446Strhodes	*resp = NULL;
3244135446Strhodes
3245135446Strhodes	disp = res->disp;
3246135446Strhodes	REQUIRE(VALID_DISPATCH(disp));
3247135446Strhodes	mgr = disp->mgr;
3248135446Strhodes	REQUIRE(VALID_DISPATCHMGR(mgr));
3249135446Strhodes
3250135446Strhodes	qid = DNS_QID(disp);
3251135446Strhodes
3252135446Strhodes	if (sockevent != NULL) {
3253135446Strhodes		REQUIRE(*sockevent != NULL);
3254135446Strhodes		ev = *sockevent;
3255135446Strhodes		*sockevent = NULL;
3256135446Strhodes	} else {
3257135446Strhodes		ev = NULL;
3258135446Strhodes	}
3259135446Strhodes
3260135446Strhodes	LOCK(&disp->lock);
3261135446Strhodes
3262135446Strhodes	INSIST(disp->requests > 0);
3263135446Strhodes	disp->requests--;
3264135446Strhodes	INSIST(disp->refcount > 0);
3265135446Strhodes	disp->refcount--;
3266135446Strhodes	if (disp->refcount == 0) {
3267135446Strhodes		if (disp->recv_pending > 0)
3268186462Sdougb			isc_socket_cancel(disp->socket, disp->task[0],
3269135446Strhodes					  ISC_SOCKCANCEL_RECV);
3270186462Sdougb		for (dispsock = ISC_LIST_HEAD(disp->activesockets);
3271186462Sdougb		     dispsock != NULL;
3272186462Sdougb		     dispsock = ISC_LIST_NEXT(dispsock, link)) {
3273186462Sdougb			isc_socket_cancel(dispsock->socket, dispsock->task,
3274186462Sdougb					  ISC_SOCKCANCEL_RECV);
3275186462Sdougb		}
3276135446Strhodes		disp->shutting_down = 1;
3277135446Strhodes	}
3278135446Strhodes
3279135446Strhodes	bucket = res->bucket;
3280135446Strhodes
3281135446Strhodes	LOCK(&qid->lock);
3282135446Strhodes	ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
3283135446Strhodes	UNLOCK(&qid->lock);
3284135446Strhodes
3285135446Strhodes	if (ev == NULL && res->item_out) {
3286135446Strhodes		/*
3287135446Strhodes		 * We've posted our event, but the caller hasn't gotten it
3288135446Strhodes		 * yet.  Take it back.
3289135446Strhodes		 */
3290135446Strhodes		ISC_LIST_INIT(events);
3291135446Strhodes		n = isc_task_unsend(res->task, res, DNS_EVENT_DISPATCH,
3292135446Strhodes				    NULL, &events);
3293135446Strhodes		/*
3294135446Strhodes		 * We had better have gotten it back.
3295135446Strhodes		 */
3296135446Strhodes		INSIST(n == 1);
3297135446Strhodes		ev = (dns_dispatchevent_t *)ISC_LIST_HEAD(events);
3298135446Strhodes	}
3299135446Strhodes
3300135446Strhodes	if (ev != NULL) {
3301135446Strhodes		REQUIRE(res->item_out == ISC_TRUE);
3302135446Strhodes		res->item_out = ISC_FALSE;
3303135446Strhodes		if (ev->buffer.base != NULL)
3304135446Strhodes			free_buffer(disp, ev->buffer.base, ev->buffer.length);
3305135446Strhodes		free_event(disp, ev);
3306135446Strhodes	}
3307135446Strhodes
3308135446Strhodes	request_log(disp, res, LVL(90), "detaching from task %p", res->task);
3309135446Strhodes	isc_task_detach(&res->task);
3310135446Strhodes
3311186462Sdougb	if (res->dispsocket != NULL) {
3312186462Sdougb		isc_socket_cancel(res->dispsocket->socket,
3313186462Sdougb				  res->dispsocket->task, ISC_SOCKCANCEL_RECV);
3314186462Sdougb		res->dispsocket->resp = NULL;
3315186462Sdougb	}
3316186462Sdougb
3317135446Strhodes	/*
3318135446Strhodes	 * Free any buffered requests as well
3319135446Strhodes	 */
3320135446Strhodes	ev = ISC_LIST_HEAD(res->items);
3321135446Strhodes	while (ev != NULL) {
3322135446Strhodes		ISC_LIST_UNLINK(res->items, ev, ev_link);
3323135446Strhodes		if (ev->buffer.base != NULL)
3324135446Strhodes			free_buffer(disp, ev->buffer.base, ev->buffer.length);
3325135446Strhodes		free_event(disp, ev);
3326135446Strhodes		ev = ISC_LIST_HEAD(res->items);
3327135446Strhodes	}
3328135446Strhodes	res->magic = 0;
3329135446Strhodes	isc_mempool_put(disp->mgr->rpool, res);
3330135446Strhodes	if (disp->shutting_down == 1)
3331135446Strhodes		do_cancel(disp);
3332135446Strhodes	else
3333186462Sdougb		(void)startrecv(disp, NULL);
3334135446Strhodes
3335135446Strhodes	killit = destroy_disp_ok(disp);
3336135446Strhodes	UNLOCK(&disp->lock);
3337135446Strhodes	if (killit)
3338186462Sdougb		isc_task_send(disp->task[0], &disp->ctlevent);
3339135446Strhodes}
3340135446Strhodes
3341135446Strhodesstatic void
3342135446Strhodesdo_cancel(dns_dispatch_t *disp) {
3343135446Strhodes	dns_dispatchevent_t *ev;
3344135446Strhodes	dns_dispentry_t *resp;
3345135446Strhodes	dns_qid_t *qid;
3346135446Strhodes
3347135446Strhodes	if (disp->shutdown_out == 1)
3348135446Strhodes		return;
3349135446Strhodes
3350135446Strhodes	qid = DNS_QID(disp);
3351135446Strhodes
3352135446Strhodes	/*
3353186462Sdougb	 * Search for the first response handler without packets outstanding
3354186462Sdougb	 * unless a specific hander is given.
3355135446Strhodes	 */
3356135446Strhodes	LOCK(&qid->lock);
3357135446Strhodes	for (resp = linear_first(qid);
3358186462Sdougb	     resp != NULL && resp->item_out;
3359135446Strhodes	     /* Empty. */)
3360135446Strhodes		resp = linear_next(qid, resp);
3361186462Sdougb
3362135446Strhodes	/*
3363135446Strhodes	 * No one to send the cancel event to, so nothing to do.
3364135446Strhodes	 */
3365135446Strhodes	if (resp == NULL)
3366135446Strhodes		goto unlock;
3367135446Strhodes
3368135446Strhodes	/*
3369135446Strhodes	 * Send the shutdown failsafe event to this resp.
3370135446Strhodes	 */
3371135446Strhodes	ev = disp->failsafe_ev;
3372135446Strhodes	ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH,
3373135446Strhodes		       resp->action, resp->arg, resp, NULL, NULL);
3374135446Strhodes	ev->result = disp->shutdown_why;
3375135446Strhodes	ev->buffer.base = NULL;
3376135446Strhodes	ev->buffer.length = 0;
3377135446Strhodes	disp->shutdown_out = 1;
3378135446Strhodes	request_log(disp, resp, LVL(10),
3379135446Strhodes		    "cancel: failsafe event %p -> task %p",
3380135446Strhodes		    ev, resp->task);
3381135446Strhodes	resp->item_out = ISC_TRUE;
3382135446Strhodes	isc_task_send(resp->task, ISC_EVENT_PTR(&ev));
3383135446Strhodes unlock:
3384135446Strhodes	UNLOCK(&qid->lock);
3385135446Strhodes}
3386135446Strhodes
3387135446Strhodesisc_socket_t *
3388135446Strhodesdns_dispatch_getsocket(dns_dispatch_t *disp) {
3389135446Strhodes	REQUIRE(VALID_DISPATCH(disp));
3390135446Strhodes
3391135446Strhodes	return (disp->socket);
3392135446Strhodes}
3393135446Strhodes
3394186462Sdougbisc_socket_t *
3395186462Sdougbdns_dispatch_getentrysocket(dns_dispentry_t *resp) {
3396186462Sdougb	REQUIRE(VALID_RESPONSE(resp));
3397186462Sdougb
3398186462Sdougb	if (resp->dispsocket != NULL)
3399186462Sdougb		return (resp->dispsocket->socket);
3400186462Sdougb	else
3401186462Sdougb		return (NULL);
3402186462Sdougb}
3403186462Sdougb
3404135446Strhodesisc_result_t
3405135446Strhodesdns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) {
3406135446Strhodes
3407135446Strhodes	REQUIRE(VALID_DISPATCH(disp));
3408135446Strhodes	REQUIRE(addrp != NULL);
3409135446Strhodes
3410135446Strhodes	if (disp->socktype == isc_sockettype_udp) {
3411135446Strhodes		*addrp = disp->local;
3412135446Strhodes		return (ISC_R_SUCCESS);
3413135446Strhodes	}
3414135446Strhodes	return (ISC_R_NOTIMPLEMENTED);
3415135446Strhodes}
3416135446Strhodes
3417135446Strhodesvoid
3418135446Strhodesdns_dispatch_cancel(dns_dispatch_t *disp) {
3419135446Strhodes	REQUIRE(VALID_DISPATCH(disp));
3420135446Strhodes
3421135446Strhodes	LOCK(&disp->lock);
3422135446Strhodes
3423135446Strhodes	if (disp->shutting_down == 1) {
3424135446Strhodes		UNLOCK(&disp->lock);
3425135446Strhodes		return;
3426135446Strhodes	}
3427135446Strhodes
3428135446Strhodes	disp->shutdown_why = ISC_R_CANCELED;
3429135446Strhodes	disp->shutting_down = 1;
3430135446Strhodes	do_cancel(disp);
3431135446Strhodes
3432135446Strhodes	UNLOCK(&disp->lock);
3433135446Strhodes
3434135446Strhodes	return;
3435135446Strhodes}
3436135446Strhodes
3437186462Sdougbunsigned int
3438186462Sdougbdns_dispatch_getattributes(dns_dispatch_t *disp) {
3439186462Sdougb	REQUIRE(VALID_DISPATCH(disp));
3440186462Sdougb
3441186462Sdougb	/*
3442186462Sdougb	 * We don't bother locking disp here; it's the caller's responsibility
3443186462Sdougb	 * to use only non volatile flags.
3444186462Sdougb	 */
3445186462Sdougb	return (disp->attributes);
3446186462Sdougb}
3447186462Sdougb
3448135446Strhodesvoid
3449135446Strhodesdns_dispatch_changeattributes(dns_dispatch_t *disp,
3450135446Strhodes			      unsigned int attributes, unsigned int mask)
3451135446Strhodes{
3452135446Strhodes	REQUIRE(VALID_DISPATCH(disp));
3453186462Sdougb	/* Exclusive attribute can only be set on creation */
3454186462Sdougb	REQUIRE((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
3455186462Sdougb	/* Also, a dispatch with randomport specified cannot start listening */
3456186462Sdougb	REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0 ||
3457186462Sdougb		(attributes & DNS_DISPATCHATTR_NOLISTEN) == 0);
3458135446Strhodes
3459135446Strhodes	/* XXXMLG
3460135446Strhodes	 * Should check for valid attributes here!
3461135446Strhodes	 */
3462135446Strhodes
3463135446Strhodes	LOCK(&disp->lock);
3464135446Strhodes
3465135446Strhodes	if ((mask & DNS_DISPATCHATTR_NOLISTEN) != 0) {
3466135446Strhodes		if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0 &&
3467135446Strhodes		    (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) {
3468135446Strhodes			disp->attributes &= ~DNS_DISPATCHATTR_NOLISTEN;
3469186462Sdougb			(void)startrecv(disp, NULL);
3470135446Strhodes		} else if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN)
3471135446Strhodes			   == 0 &&
3472135446Strhodes			   (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) {
3473135446Strhodes			disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
3474135446Strhodes			if (disp->recv_pending != 0)
3475186462Sdougb				isc_socket_cancel(disp->socket, disp->task[0],
3476135446Strhodes						  ISC_SOCKCANCEL_RECV);
3477135446Strhodes		}
3478135446Strhodes	}
3479135446Strhodes
3480135446Strhodes	disp->attributes &= ~mask;
3481135446Strhodes	disp->attributes |= (attributes & mask);
3482135446Strhodes	UNLOCK(&disp->lock);
3483135446Strhodes}
3484135446Strhodes
3485135446Strhodesvoid
3486135446Strhodesdns_dispatch_importrecv(dns_dispatch_t *disp, isc_event_t *event) {
3487135446Strhodes	void *buf;
3488135446Strhodes	isc_socketevent_t *sevent, *newsevent;
3489135446Strhodes
3490135446Strhodes	REQUIRE(VALID_DISPATCH(disp));
3491135446Strhodes	REQUIRE((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0);
3492135446Strhodes	REQUIRE(event != NULL);
3493135446Strhodes
3494135446Strhodes	sevent = (isc_socketevent_t *)event;
3495135446Strhodes
3496135446Strhodes	INSIST(sevent->n <= disp->mgr->buffersize);
3497135446Strhodes	newsevent = (isc_socketevent_t *)
3498135446Strhodes		    isc_event_allocate(disp->mgr->mctx, NULL,
3499186462Sdougb				      DNS_EVENT_IMPORTRECVDONE, udp_shrecv,
3500135446Strhodes				      disp, sizeof(isc_socketevent_t));
3501135446Strhodes	if (newsevent == NULL)
3502135446Strhodes		return;
3503135446Strhodes
3504135446Strhodes	buf = allocate_udp_buffer(disp);
3505135446Strhodes	if (buf == NULL) {
3506135446Strhodes		isc_event_free(ISC_EVENT_PTR(&newsevent));
3507135446Strhodes		return;
3508135446Strhodes	}
3509135446Strhodes	memcpy(buf, sevent->region.base, sevent->n);
3510135446Strhodes	newsevent->region.base = buf;
3511135446Strhodes	newsevent->region.length = disp->mgr->buffersize;
3512135446Strhodes	newsevent->n = sevent->n;
3513135446Strhodes	newsevent->result = sevent->result;
3514135446Strhodes	newsevent->address = sevent->address;
3515135446Strhodes	newsevent->timestamp = sevent->timestamp;
3516135446Strhodes	newsevent->pktinfo = sevent->pktinfo;
3517135446Strhodes	newsevent->attributes = sevent->attributes;
3518186462Sdougb
3519186462Sdougb	isc_task_send(disp->task[0], ISC_EVENT_PTR(&newsevent));
3520135446Strhodes}
3521135446Strhodes
3522135446Strhodes#if 0
3523135446Strhodesvoid
3524135446Strhodesdns_dispatchmgr_dump(dns_dispatchmgr_t *mgr) {
3525135446Strhodes	dns_dispatch_t *disp;
3526135446Strhodes	char foo[1024];
3527135446Strhodes
3528135446Strhodes	disp = ISC_LIST_HEAD(mgr->list);
3529135446Strhodes	while (disp != NULL) {
3530135446Strhodes		isc_sockaddr_format(&disp->local, foo, sizeof(foo));
3531135446Strhodes		printf("\tdispatch %p, addr %s\n", disp, foo);
3532135446Strhodes		disp = ISC_LIST_NEXT(disp, link);
3533135446Strhodes	}
3534135446Strhodes}
3535135446Strhodes#endif
3536