resolver.c revision 308205
1/*
2 * Copyright (C) 2004-2014  Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2003  Internet Software Consortium.
4 *
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
16 */
17
18/* $Id$ */
19
20/*! \file */
21
22#include <config.h>
23
24#include <isc/log.h>
25#include <isc/platform.h>
26#include <isc/print.h>
27#include <isc/string.h>
28#include <isc/random.h>
29#include <isc/socket.h>
30#include <isc/stats.h>
31#include <isc/task.h>
32#include <isc/timer.h>
33#include <isc/util.h>
34
35#include <dns/acl.h>
36#include <dns/adb.h>
37#include <dns/cache.h>
38#include <dns/db.h>
39#include <dns/dispatch.h>
40#include <dns/ds.h>
41#include <dns/events.h>
42#include <dns/forward.h>
43#include <dns/keytable.h>
44#include <dns/log.h>
45#include <dns/message.h>
46#include <dns/ncache.h>
47#include <dns/nsec.h>
48#include <dns/nsec3.h>
49#include <dns/opcode.h>
50#include <dns/peer.h>
51#include <dns/rbt.h>
52#include <dns/rcode.h>
53#include <dns/rdata.h>
54#include <dns/rdataclass.h>
55#include <dns/rdatalist.h>
56#include <dns/rdataset.h>
57#include <dns/rdatastruct.h>
58#include <dns/rdatatype.h>
59#include <dns/resolver.h>
60#include <dns/result.h>
61#include <dns/rootns.h>
62#include <dns/stats.h>
63#include <dns/tsig.h>
64#include <dns/validator.h>
65
66#define DNS_RESOLVER_TRACE
67#ifdef DNS_RESOLVER_TRACE
68#define RTRACE(m)       isc_log_write(dns_lctx, \
69				      DNS_LOGCATEGORY_RESOLVER, \
70				      DNS_LOGMODULE_RESOLVER, \
71				      ISC_LOG_DEBUG(3), \
72				      "res %p: %s", res, (m))
73#define RRTRACE(r, m)   isc_log_write(dns_lctx, \
74				      DNS_LOGCATEGORY_RESOLVER, \
75				      DNS_LOGMODULE_RESOLVER, \
76				      ISC_LOG_DEBUG(3), \
77				      "res %p: %s", (r), (m))
78#define FCTXTRACE(m)    isc_log_write(dns_lctx, \
79				      DNS_LOGCATEGORY_RESOLVER, \
80				      DNS_LOGMODULE_RESOLVER, \
81				      ISC_LOG_DEBUG(3), \
82				      "fctx %p(%s): %s", fctx, fctx->info, (m))
83#define FCTXTRACE2(m1, m2) \
84			isc_log_write(dns_lctx, \
85				      DNS_LOGCATEGORY_RESOLVER, \
86				      DNS_LOGMODULE_RESOLVER, \
87				      ISC_LOG_DEBUG(3), \
88				      "fctx %p(%s): %s %s", \
89				      fctx, fctx->info, (m1), (m2))
90#define FTRACE(m)       isc_log_write(dns_lctx, \
91				      DNS_LOGCATEGORY_RESOLVER, \
92				      DNS_LOGMODULE_RESOLVER, \
93				      ISC_LOG_DEBUG(3), \
94				      "fetch %p (fctx %p(%s)): %s", \
95				      fetch, fetch->private, \
96				      fetch->private->info, (m))
97#define QTRACE(m)       isc_log_write(dns_lctx, \
98				      DNS_LOGCATEGORY_RESOLVER, \
99				      DNS_LOGMODULE_RESOLVER, \
100				      ISC_LOG_DEBUG(3), \
101				      "resquery %p (fctx %p(%s)): %s", \
102				      query, query->fctx, \
103				      query->fctx->info, (m))
104#else
105#define RTRACE(m)
106#define RRTRACE(r, m)
107#define FCTXTRACE(m)
108#define FCTXTRACE2(m1, m2)
109#define FTRACE(m)
110#define QTRACE(m)
111#endif
112
113#define US_PER_SEC 1000000U
114/*
115 * The maximum time we will wait for a single query.
116 */
117#define MAX_SINGLE_QUERY_TIMEOUT 9U
118#define MAX_SINGLE_QUERY_TIMEOUT_US (MAX_SINGLE_QUERY_TIMEOUT*US_PER_SEC)
119
120/*
121 * We need to allow a individual query time to complete / timeout.
122 */
123#define MINIMUM_QUERY_TIMEOUT (MAX_SINGLE_QUERY_TIMEOUT + 1U)
124
125/* The default time in seconds for the whole query to live. */
126#ifndef DEFAULT_QUERY_TIMEOUT
127#define DEFAULT_QUERY_TIMEOUT MINIMUM_QUERY_TIMEOUT
128#endif
129
130#ifndef MAXIMUM_QUERY_TIMEOUT
131#define MAXIMUM_QUERY_TIMEOUT 30 /* The maximum time in seconds for the whole query to live. */
132#endif
133
134/* The default maximum number of recursions to follow before giving up. */
135#ifndef DEFAULT_RECURSION_DEPTH
136#define DEFAULT_RECURSION_DEPTH 7
137#endif
138
139/* The default maximum number of iterative queries to allow before giving up. */
140#ifndef DEFAULT_MAX_QUERIES
141#define DEFAULT_MAX_QUERIES 50
142#endif
143
144/*%
145 * Maximum EDNS0 input packet size.
146 */
147#define RECV_BUFFER_SIZE                4096            /* XXXRTH  Constant. */
148#define EDNSOPTS			2
149
150/*%
151 * This defines the maximum number of timeouts we will permit before we
152 * disable EDNS0 on the query.
153 */
154#define MAX_EDNS0_TIMEOUTS      3
155
156typedef struct fetchctx fetchctx_t;
157
158typedef struct query {
159	/* Locked by task event serialization. */
160	unsigned int			magic;
161	fetchctx_t *			fctx;
162	isc_mem_t *			mctx;
163	dns_dispatchmgr_t *		dispatchmgr;
164	dns_dispatch_t *		dispatch;
165	isc_boolean_t			exclusivesocket;
166	dns_adbaddrinfo_t *		addrinfo;
167	isc_socket_t *			tcpsocket;
168	isc_time_t			start;
169	dns_messageid_t			id;
170	dns_dispentry_t *		dispentry;
171	ISC_LINK(struct query)		link;
172	isc_buffer_t			buffer;
173	isc_buffer_t			*tsig;
174	dns_tsigkey_t			*tsigkey;
175	isc_socketevent_t		sendevent;
176	unsigned int			options;
177	unsigned int			attributes;
178	unsigned int			sends;
179	unsigned int			connects;
180	unsigned char			data[512];
181} resquery_t;
182
183#define QUERY_MAGIC			ISC_MAGIC('Q', '!', '!', '!')
184#define VALID_QUERY(query)		ISC_MAGIC_VALID(query, QUERY_MAGIC)
185
186#define RESQUERY_ATTR_CANCELED          0x02
187
188#define RESQUERY_CONNECTING(q)          ((q)->connects > 0)
189#define RESQUERY_CANCELED(q)            (((q)->attributes & \
190					  RESQUERY_ATTR_CANCELED) != 0)
191#define RESQUERY_SENDING(q)             ((q)->sends > 0)
192
193typedef enum {
194	fetchstate_init = 0,            /*%< Start event has not run yet. */
195	fetchstate_active,
196	fetchstate_done                 /*%< FETCHDONE events posted. */
197} fetchstate;
198
199typedef enum {
200	badns_unreachable = 0,
201	badns_response,
202	badns_validation
203} badnstype_t;
204
205struct fetchctx {
206	/*% Not locked. */
207	unsigned int			magic;
208	dns_resolver_t *		res;
209	dns_name_t			name;
210	dns_rdatatype_t			type;
211	unsigned int			options;
212	unsigned int			bucketnum;
213	char *				info;
214	isc_mem_t *			mctx;
215
216	/*% Locked by appropriate bucket lock. */
217	fetchstate			state;
218	isc_boolean_t			want_shutdown;
219	isc_boolean_t			cloned;
220	isc_boolean_t			spilled;
221	unsigned int			references;
222	isc_event_t			control_event;
223	ISC_LINK(struct fetchctx)       link;
224	ISC_LIST(dns_fetchevent_t)      events;
225	/*% Locked by task event serialization. */
226	dns_name_t			domain;
227	dns_rdataset_t			nameservers;
228	unsigned int			attributes;
229	isc_timer_t *			timer;
230	isc_time_t			expires;
231	isc_interval_t			interval;
232	dns_message_t *			qmessage;
233	dns_message_t *			rmessage;
234	ISC_LIST(resquery_t)		queries;
235	dns_adbfindlist_t		finds;
236	dns_adbfind_t *			find;
237	dns_adbfindlist_t		altfinds;
238	dns_adbfind_t *			altfind;
239	dns_adbaddrinfolist_t		forwaddrs;
240	dns_adbaddrinfolist_t		altaddrs;
241	isc_sockaddrlist_t		forwarders;
242	dns_fwdpolicy_t			fwdpolicy;
243	isc_sockaddrlist_t		bad;
244	isc_sockaddrlist_t		edns;
245	isc_sockaddrlist_t		edns512;
246	isc_sockaddrlist_t		bad_edns;
247	dns_validator_t			*validator;
248	ISC_LIST(dns_validator_t)       validators;
249	dns_db_t *			cache;
250	dns_adb_t *			adb;
251	isc_boolean_t			ns_ttl_ok;
252	isc_uint32_t			ns_ttl;
253
254	/*%
255	 * The number of events we're waiting for.
256	 */
257	unsigned int			pending;
258
259	/*%
260	 * The number of times we've "restarted" the current
261	 * nameserver set.  This acts as a failsafe to prevent
262	 * us from pounding constantly on a particular set of
263	 * servers that, for whatever reason, are not giving
264	 * us useful responses, but are responding in such a
265	 * way that they are not marked "bad".
266	 */
267	unsigned int			restarts;
268
269	/*%
270	 * The number of timeouts that have occurred since we
271	 * last successfully received a response packet.  This
272	 * is used for EDNS0 black hole detection.
273	 */
274	unsigned int			timeouts;
275
276	/*%
277	 * Look aside state for DS lookups.
278	 */
279	dns_name_t 			nsname;
280	dns_fetch_t *			nsfetch;
281	dns_rdataset_t			nsrrset;
282
283	/*%
284	 * Number of queries that reference this context.
285	 */
286	unsigned int			nqueries;
287
288	/*%
289	 * The reason to print when logging a successful
290	 * response to a query.
291	 */
292	const char *			reason;
293
294	/*%
295	 * Random numbers to use for mixing up server addresses.
296	 */
297	isc_uint32_t                    rand_buf;
298	isc_uint32_t                    rand_bits;
299
300	/*%
301	 * Fetch-local statistics for detailed logging.
302	 */
303	isc_result_t			result; /*%< fetch result  */
304	isc_result_t			vresult; /*%< validation result  */
305	int				exitline;
306	isc_time_t			start;
307	isc_uint64_t			duration;
308	isc_boolean_t			logged;
309	unsigned int			querysent;
310	unsigned int			totalqueries;
311	unsigned int			referrals;
312	unsigned int			lamecount;
313	unsigned int			neterr;
314	unsigned int			badresp;
315	unsigned int			adberr;
316	unsigned int			findfail;
317	unsigned int			valfail;
318	isc_boolean_t			timeout;
319	dns_adbaddrinfo_t 		*addrinfo;
320	isc_sockaddr_t			*client;
321	unsigned int			depth;
322};
323
324#define FCTX_MAGIC			ISC_MAGIC('F', '!', '!', '!')
325#define VALID_FCTX(fctx)		ISC_MAGIC_VALID(fctx, FCTX_MAGIC)
326
327#define FCTX_ATTR_HAVEANSWER            0x0001
328#define FCTX_ATTR_GLUING                0x0002
329#define FCTX_ATTR_ADDRWAIT              0x0004
330#define FCTX_ATTR_SHUTTINGDOWN          0x0008
331#define FCTX_ATTR_WANTCACHE             0x0010
332#define FCTX_ATTR_WANTNCACHE            0x0020
333#define FCTX_ATTR_NEEDEDNS0             0x0040
334#define FCTX_ATTR_TRIEDFIND             0x0080
335#define FCTX_ATTR_TRIEDALT              0x0100
336
337#define HAVE_ANSWER(f)          (((f)->attributes & FCTX_ATTR_HAVEANSWER) != \
338				 0)
339#define GLUING(f)               (((f)->attributes & FCTX_ATTR_GLUING) != \
340				 0)
341#define ADDRWAIT(f)             (((f)->attributes & FCTX_ATTR_ADDRWAIT) != \
342				 0)
343#define SHUTTINGDOWN(f)         (((f)->attributes & FCTX_ATTR_SHUTTINGDOWN) \
344				 != 0)
345#define WANTCACHE(f)            (((f)->attributes & FCTX_ATTR_WANTCACHE) != 0)
346#define WANTNCACHE(f)           (((f)->attributes & FCTX_ATTR_WANTNCACHE) != 0)
347#define NEEDEDNS0(f)            (((f)->attributes & FCTX_ATTR_NEEDEDNS0) != 0)
348#define TRIEDFIND(f)            (((f)->attributes & FCTX_ATTR_TRIEDFIND) != 0)
349#define TRIEDALT(f)             (((f)->attributes & FCTX_ATTR_TRIEDALT) != 0)
350
351typedef struct {
352	dns_adbaddrinfo_t *		addrinfo;
353	fetchctx_t *			fctx;
354} dns_valarg_t;
355
356struct dns_fetch {
357	unsigned int			magic;
358	fetchctx_t *			private;
359};
360
361#define DNS_FETCH_MAGIC			ISC_MAGIC('F', 't', 'c', 'h')
362#define DNS_FETCH_VALID(fetch)		ISC_MAGIC_VALID(fetch, DNS_FETCH_MAGIC)
363
364typedef struct fctxbucket {
365	isc_task_t *			task;
366	isc_mutex_t			lock;
367	ISC_LIST(fetchctx_t)		fctxs;
368	isc_boolean_t			exiting;
369	isc_mem_t *			mctx;
370} fctxbucket_t;
371
372typedef struct alternate {
373	isc_boolean_t			isaddress;
374	union   {
375		isc_sockaddr_t		addr;
376		struct {
377			dns_name_t      name;
378			in_port_t       port;
379		} _n;
380	} _u;
381	ISC_LINK(struct alternate)      link;
382} alternate_t;
383
384typedef struct dns_badcache dns_badcache_t;
385struct dns_badcache {
386	dns_badcache_t *	next;
387	dns_rdatatype_t 	type;
388	isc_time_t		expire;
389	unsigned int		hashval;
390	dns_name_t		name;
391};
392#define DNS_BADCACHE_SIZE 1021
393#define DNS_BADCACHE_TTL(fctx) \
394	(((fctx)->res->lame_ttl > 30 ) ? (fctx)->res->lame_ttl : 30)
395
396struct dns_resolver {
397	/* Unlocked. */
398	unsigned int			magic;
399	isc_mem_t *			mctx;
400	isc_mutex_t			lock;
401	isc_mutex_t			nlock;
402	isc_mutex_t			primelock;
403	dns_rdataclass_t		rdclass;
404	isc_socketmgr_t *		socketmgr;
405	isc_timermgr_t *		timermgr;
406	isc_taskmgr_t *			taskmgr;
407	dns_view_t *			view;
408	isc_boolean_t			frozen;
409	unsigned int			options;
410	dns_dispatchmgr_t *		dispatchmgr;
411	dns_dispatchset_t *		dispatches4;
412	isc_boolean_t			exclusivev4;
413	dns_dispatchset_t *		dispatches6;
414	isc_boolean_t			exclusivev6;
415	unsigned int			nbuckets;
416	fctxbucket_t *			buckets;
417	isc_uint32_t			lame_ttl;
418	ISC_LIST(alternate_t)		alternates;
419	isc_uint16_t			udpsize;
420#if USE_ALGLOCK
421	isc_rwlock_t			alglock;
422#endif
423	dns_rbt_t *			algorithms;
424#if USE_MBSLOCK
425	isc_rwlock_t			mbslock;
426#endif
427	dns_rbt_t *			mustbesecure;
428	unsigned int			spillatmax;
429	unsigned int			spillatmin;
430	isc_timer_t *			spillattimer;
431	isc_boolean_t			zero_no_soa_ttl;
432	unsigned int			query_timeout;
433	unsigned int			maxdepth;
434
435	/* Locked by lock. */
436	unsigned int			references;
437	isc_boolean_t			exiting;
438	isc_eventlist_t			whenshutdown;
439	unsigned int			activebuckets;
440	isc_boolean_t			priming;
441	unsigned int			spillat;	/* clients-per-query */
442
443	/* Bad cache. */
444	dns_badcache_t  ** 		badcache;
445	unsigned int 			badcount;
446	unsigned int 			badhash;
447	unsigned int 			badsweep;
448
449	/* Locked by primelock. */
450	dns_fetch_t *			primefetch;
451	/* Locked by nlock. */
452	unsigned int			nfctx;
453};
454
455#define RES_MAGIC			ISC_MAGIC('R', 'e', 's', '!')
456#define VALID_RESOLVER(res)		ISC_MAGIC_VALID(res, RES_MAGIC)
457
458/*%
459 * Private addrinfo flags.  These must not conflict with DNS_FETCHOPT_NOEDNS0,
460 * which we also use as an addrinfo flag.
461 */
462#define FCTX_ADDRINFO_MARK              0x0001
463#define FCTX_ADDRINFO_FORWARDER         0x1000
464#define FCTX_ADDRINFO_TRIED             0x2000
465#define UNMARKED(a)                     (((a)->flags & FCTX_ADDRINFO_MARK) \
466					 == 0)
467#define ISFORWARDER(a)                  (((a)->flags & \
468					 FCTX_ADDRINFO_FORWARDER) != 0)
469#define TRIED(a)                        (((a)->flags & \
470					 FCTX_ADDRINFO_TRIED) != 0)
471
472#define NXDOMAIN(r) (((r)->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
473#define NEGATIVE(r) (((r)->attributes & DNS_RDATASETATTR_NEGATIVE) != 0)
474
475static void destroy(dns_resolver_t *res);
476static void empty_bucket(dns_resolver_t *res);
477static isc_result_t resquery_send(resquery_t *query);
478static void resquery_response(isc_task_t *task, isc_event_t *event);
479static void resquery_connected(isc_task_t *task, isc_event_t *event);
480static void fctx_try(fetchctx_t *fctx, isc_boolean_t retrying,
481		     isc_boolean_t badcache);
482static void fctx_destroy(fetchctx_t *fctx);
483static isc_boolean_t fctx_unlink(fetchctx_t *fctx);
484static isc_result_t ncache_adderesult(dns_message_t *message,
485				      dns_db_t *cache, dns_dbnode_t *node,
486				      dns_rdatatype_t covers,
487				      isc_stdtime_t now, dns_ttl_t maxttl,
488				      isc_boolean_t optout,
489				      isc_boolean_t secure,
490				      dns_rdataset_t *ardataset,
491				      isc_result_t *eresultp);
492static void validated(isc_task_t *task, isc_event_t *event);
493static isc_boolean_t maybe_destroy(fetchctx_t *fctx, isc_boolean_t locked);
494static void add_bad(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo,
495		    isc_result_t reason, badnstype_t badtype);
496static inline isc_result_t findnoqname(fetchctx_t *fctx, dns_name_t *name,
497				       dns_rdatatype_t type,
498				       dns_name_t **noqname);
499
500/*%
501 * Increment resolver-related statistics counters.
502 */
503static inline void
504inc_stats(dns_resolver_t *res, isc_statscounter_t counter) {
505	if (res->view->resstats != NULL)
506		isc_stats_increment(res->view->resstats, counter);
507}
508
509static isc_result_t
510valcreate(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo, dns_name_t *name,
511	  dns_rdatatype_t type, dns_rdataset_t *rdataset,
512	  dns_rdataset_t *sigrdataset, unsigned int valoptions,
513	  isc_task_t *task)
514{
515	dns_validator_t *validator = NULL;
516	dns_valarg_t *valarg;
517	isc_result_t result;
518
519	valarg = isc_mem_get(fctx->mctx, sizeof(*valarg));
520	if (valarg == NULL)
521		return (ISC_R_NOMEMORY);
522
523	valarg->fctx = fctx;
524	valarg->addrinfo = addrinfo;
525
526	if (!ISC_LIST_EMPTY(fctx->validators))
527		valoptions |= DNS_VALIDATOR_DEFER;
528	else
529		valoptions &= ~DNS_VALIDATOR_DEFER;
530
531	result = dns_validator_create(fctx->res->view, name, type, rdataset,
532				      sigrdataset, fctx->rmessage,
533				      valoptions, task, validated, valarg,
534				      &validator);
535	if (result == ISC_R_SUCCESS) {
536		inc_stats(fctx->res, dns_resstatscounter_val);
537		if ((valoptions & DNS_VALIDATOR_DEFER) == 0) {
538			INSIST(fctx->validator == NULL);
539			fctx->validator = validator;
540		}
541		ISC_LIST_APPEND(fctx->validators, validator, link);
542	} else
543		isc_mem_put(fctx->mctx, valarg, sizeof(*valarg));
544	return (result);
545}
546
547static isc_boolean_t
548rrsig_fromchildzone(fetchctx_t *fctx, dns_rdataset_t *rdataset) {
549	dns_namereln_t namereln;
550	dns_rdata_rrsig_t rrsig;
551	dns_rdata_t rdata = DNS_RDATA_INIT;
552	int order;
553	isc_result_t result;
554	unsigned int labels;
555
556	for (result = dns_rdataset_first(rdataset);
557	     result == ISC_R_SUCCESS;
558	     result = dns_rdataset_next(rdataset)) {
559		dns_rdataset_current(rdataset, &rdata);
560		result = dns_rdata_tostruct(&rdata, &rrsig, NULL);
561		RUNTIME_CHECK(result == ISC_R_SUCCESS);
562		namereln = dns_name_fullcompare(&rrsig.signer, &fctx->domain,
563						&order, &labels);
564		if (namereln == dns_namereln_subdomain)
565			return (ISC_TRUE);
566		dns_rdata_reset(&rdata);
567	}
568	return (ISC_FALSE);
569}
570
571static isc_boolean_t
572fix_mustbedelegationornxdomain(dns_message_t *message, fetchctx_t *fctx) {
573	dns_name_t *name;
574	dns_name_t *domain = &fctx->domain;
575	dns_rdataset_t *rdataset;
576	dns_rdatatype_t type;
577	isc_result_t result;
578	isc_boolean_t keep_auth = ISC_FALSE;
579
580	if (message->rcode == dns_rcode_nxdomain)
581		return (ISC_FALSE);
582
583	/*
584	 * A DS RRset can appear anywhere in a zone, even for a delegation-only
585	 * zone.  So a response to an explicit query for this type should be
586	 * excluded from delegation-only fixup.
587	 *
588	 * SOA, NS, and DNSKEY can only exist at a zone apex, so a postive
589	 * response to a query for these types can never violate the
590	 * delegation-only assumption: if the query name is below a
591	 * zone cut, the response should normally be a referral, which should
592	 * be accepted; if the query name is below a zone cut but the server
593	 * happens to have authority for the zone of the query name, the
594	 * response is a (non-referral) answer.  But this does not violate
595	 * delegation-only because the query name must be in a different zone
596	 * due to the "apex-only" nature of these types.  Note that if the
597	 * remote server happens to have authority for a child zone of a
598	 * delegation-only zone, we may still incorrectly "fix" the response
599	 * with NXDOMAIN for queries for other types.  Unfortunately it's
600	 * generally impossible to differentiate this case from violation of
601	 * the delegation-only assumption.  Once the resolver learns the
602	 * correct zone cut, possibly via a separate query for an "apex-only"
603	 * type, queries for other types will be resolved correctly.
604	 *
605	 * A query for type ANY will be accepted if it hits an exceptional
606	 * type above in the answer section as it should be from a child
607	 * zone.
608	 *
609	 * Also accept answers with RRSIG records from the child zone.
610	 * Direct queries for RRSIG records should not be answered from
611	 * the parent zone.
612	 */
613
614	if (message->counts[DNS_SECTION_ANSWER] != 0 &&
615	    (fctx->type == dns_rdatatype_ns ||
616	     fctx->type == dns_rdatatype_ds ||
617	     fctx->type == dns_rdatatype_soa ||
618	     fctx->type == dns_rdatatype_any ||
619	     fctx->type == dns_rdatatype_rrsig ||
620	     fctx->type == dns_rdatatype_dnskey)) {
621		result = dns_message_firstname(message, DNS_SECTION_ANSWER);
622		while (result == ISC_R_SUCCESS) {
623			name = NULL;
624			dns_message_currentname(message, DNS_SECTION_ANSWER,
625						&name);
626			for (rdataset = ISC_LIST_HEAD(name->list);
627			     rdataset != NULL;
628			     rdataset = ISC_LIST_NEXT(rdataset, link)) {
629				if (!dns_name_equal(name, &fctx->name))
630					continue;
631				type = rdataset->type;
632				/*
633				 * RRsig from child?
634				 */
635				if (type == dns_rdatatype_rrsig &&
636				    rrsig_fromchildzone(fctx, rdataset))
637					return (ISC_FALSE);
638				/*
639				 * Direct query for apex records or DS.
640				 */
641				if (fctx->type == type &&
642				    (type == dns_rdatatype_ds ||
643				     type == dns_rdatatype_ns ||
644				     type == dns_rdatatype_soa ||
645				     type == dns_rdatatype_dnskey))
646					return (ISC_FALSE);
647				/*
648				 * Indirect query for apex records or DS.
649				 */
650				if (fctx->type == dns_rdatatype_any &&
651				    (type == dns_rdatatype_ns ||
652				     type == dns_rdatatype_ds ||
653				     type == dns_rdatatype_soa ||
654				     type == dns_rdatatype_dnskey))
655					return (ISC_FALSE);
656			}
657			result = dns_message_nextname(message,
658						      DNS_SECTION_ANSWER);
659		}
660	}
661
662	/*
663	 * A NODATA response to a DS query?
664	 */
665	if (fctx->type == dns_rdatatype_ds &&
666	    message->counts[DNS_SECTION_ANSWER] == 0)
667		return (ISC_FALSE);
668
669	/* Look for referral or indication of answer from child zone? */
670	if (message->counts[DNS_SECTION_AUTHORITY] == 0)
671		goto munge;
672
673	result = dns_message_firstname(message, DNS_SECTION_AUTHORITY);
674	while (result == ISC_R_SUCCESS) {
675		name = NULL;
676		dns_message_currentname(message, DNS_SECTION_AUTHORITY, &name);
677		for (rdataset = ISC_LIST_HEAD(name->list);
678		     rdataset != NULL;
679		     rdataset = ISC_LIST_NEXT(rdataset, link)) {
680			type = rdataset->type;
681			if (type == dns_rdatatype_soa &&
682			    dns_name_equal(name, domain))
683				keep_auth = ISC_TRUE;
684
685			if (type != dns_rdatatype_ns &&
686			    type != dns_rdatatype_soa &&
687			    type != dns_rdatatype_rrsig)
688				continue;
689
690			if (type == dns_rdatatype_rrsig) {
691				if (rrsig_fromchildzone(fctx, rdataset))
692					return (ISC_FALSE);
693				else
694					continue;
695			}
696
697			/* NS or SOA records. */
698			if (dns_name_equal(name, domain)) {
699				/*
700				 * If a query for ANY causes a negative
701				 * response, we can be sure that this is
702				 * an empty node.  For other type of queries
703				 * we cannot differentiate an empty node
704				 * from a node that just doesn't have that
705				 * type of record.  We only accept the former
706				 * case.
707				 */
708				if (message->counts[DNS_SECTION_ANSWER] == 0 &&
709				    fctx->type == dns_rdatatype_any)
710					return (ISC_FALSE);
711			} else if (dns_name_issubdomain(name, domain)) {
712				/* Referral or answer from child zone. */
713				return (ISC_FALSE);
714			}
715		}
716		result = dns_message_nextname(message, DNS_SECTION_AUTHORITY);
717	}
718
719 munge:
720	message->rcode = dns_rcode_nxdomain;
721	message->counts[DNS_SECTION_ANSWER] = 0;
722	if (!keep_auth)
723		message->counts[DNS_SECTION_AUTHORITY] = 0;
724	message->counts[DNS_SECTION_ADDITIONAL] = 0;
725	return (ISC_TRUE);
726}
727
728static inline isc_result_t
729fctx_starttimer(fetchctx_t *fctx) {
730	/*
731	 * Start the lifetime timer for fctx.
732	 *
733	 * This is also used for stopping the idle timer; in that
734	 * case we must purge events already posted to ensure that
735	 * no further idle events are delivered.
736	 */
737	return (isc_timer_reset(fctx->timer, isc_timertype_once,
738				&fctx->expires, NULL, ISC_TRUE));
739}
740
741static inline void
742fctx_stoptimer(fetchctx_t *fctx) {
743	isc_result_t result;
744
745	/*
746	 * We don't return a result if resetting the timer to inactive fails
747	 * since there's nothing to be done about it.  Resetting to inactive
748	 * should never fail anyway, since the code as currently written
749	 * cannot fail in that case.
750	 */
751	result = isc_timer_reset(fctx->timer, isc_timertype_inactive,
752				  NULL, NULL, ISC_TRUE);
753	if (result != ISC_R_SUCCESS) {
754		UNEXPECTED_ERROR(__FILE__, __LINE__,
755				 "isc_timer_reset(): %s",
756				 isc_result_totext(result));
757	}
758}
759
760
761static inline isc_result_t
762fctx_startidletimer(fetchctx_t *fctx, isc_interval_t *interval) {
763	/*
764	 * Start the idle timer for fctx.  The lifetime timer continues
765	 * to be in effect.
766	 */
767	return (isc_timer_reset(fctx->timer, isc_timertype_once,
768				&fctx->expires, interval, ISC_FALSE));
769}
770
771/*
772 * Stopping the idle timer is equivalent to calling fctx_starttimer(), but
773 * we use fctx_stopidletimer for readability in the code below.
774 */
775#define fctx_stopidletimer      fctx_starttimer
776
777
778static inline void
779resquery_destroy(resquery_t **queryp) {
780	resquery_t *query;
781
782	REQUIRE(queryp != NULL);
783	query = *queryp;
784	REQUIRE(!ISC_LINK_LINKED(query, link));
785
786	INSIST(query->tcpsocket == NULL);
787
788	query->fctx->nqueries--;
789	if (SHUTTINGDOWN(query->fctx)) {
790		dns_resolver_t *res = query->fctx->res;
791		if (maybe_destroy(query->fctx, ISC_FALSE))
792			empty_bucket(res);
793	}
794	query->magic = 0;
795	isc_mem_put(query->mctx, query, sizeof(*query));
796	*queryp = NULL;
797}
798
799static void
800fctx_cancelquery(resquery_t **queryp, dns_dispatchevent_t **deventp,
801		 isc_time_t *finish, isc_boolean_t no_response)
802{
803	fetchctx_t *fctx;
804	resquery_t *query;
805	unsigned int rtt, rttms;
806	unsigned int factor;
807	dns_adbfind_t *find;
808	dns_adbaddrinfo_t *addrinfo;
809	isc_socket_t *socket;
810
811	query = *queryp;
812	fctx = query->fctx;
813
814	FCTXTRACE("cancelquery");
815
816	REQUIRE(!RESQUERY_CANCELED(query));
817
818	query->attributes |= RESQUERY_ATTR_CANCELED;
819
820	/*
821	 * Should we update the RTT?
822	 */
823	if (finish != NULL || no_response) {
824		if (finish != NULL) {
825			/*
826			 * We have both the start and finish times for this
827			 * packet, so we can compute a real RTT.
828			 */
829			rtt = (unsigned int)isc_time_microdiff(finish,
830							       &query->start);
831			factor = DNS_ADB_RTTADJDEFAULT;
832
833			rttms = rtt / 1000;
834			if (rttms < DNS_RESOLVER_QRYRTTCLASS0) {
835				inc_stats(fctx->res,
836					  dns_resstatscounter_queryrtt0);
837			} else if (rttms < DNS_RESOLVER_QRYRTTCLASS1) {
838				inc_stats(fctx->res,
839					  dns_resstatscounter_queryrtt1);
840			} else if (rttms < DNS_RESOLVER_QRYRTTCLASS2) {
841				inc_stats(fctx->res,
842					  dns_resstatscounter_queryrtt2);
843			} else if (rttms < DNS_RESOLVER_QRYRTTCLASS3) {
844				inc_stats(fctx->res,
845					  dns_resstatscounter_queryrtt3);
846			} else if (rttms < DNS_RESOLVER_QRYRTTCLASS4) {
847				inc_stats(fctx->res,
848					  dns_resstatscounter_queryrtt4);
849			} else {
850				inc_stats(fctx->res,
851					  dns_resstatscounter_queryrtt5);
852			}
853		} else {
854			/*
855			 * We don't have an RTT for this query.  Maybe the
856			 * packet was lost, or maybe this server is very
857			 * slow.  We don't know.  Increase the RTT.
858			 */
859			INSIST(no_response);
860			rtt = query->addrinfo->srtt + 200000;
861			if (rtt > MAX_SINGLE_QUERY_TIMEOUT_US)
862				rtt = MAX_SINGLE_QUERY_TIMEOUT_US;
863			/*
864			 * Replace the current RTT with our value.
865			 */
866			factor = DNS_ADB_RTTADJREPLACE;
867		}
868		dns_adb_adjustsrtt(fctx->adb, query->addrinfo, rtt, factor);
869	}
870
871	/* Remember that the server has been tried. */
872	if (!TRIED(query->addrinfo)) {
873		dns_adb_changeflags(fctx->adb, query->addrinfo,
874				    FCTX_ADDRINFO_TRIED, FCTX_ADDRINFO_TRIED);
875	}
876
877	/*
878	 * Age RTTs of servers not tried.
879	 */
880	factor = DNS_ADB_RTTADJAGE;
881	if (finish != NULL)
882		for (addrinfo = ISC_LIST_HEAD(fctx->forwaddrs);
883		     addrinfo != NULL;
884		     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
885			if (UNMARKED(addrinfo))
886				dns_adb_adjustsrtt(fctx->adb, addrinfo,
887						   0, factor);
888
889	if (finish != NULL && TRIEDFIND(fctx))
890		for (find = ISC_LIST_HEAD(fctx->finds);
891		     find != NULL;
892		     find = ISC_LIST_NEXT(find, publink))
893			for (addrinfo = ISC_LIST_HEAD(find->list);
894			     addrinfo != NULL;
895			     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
896				if (UNMARKED(addrinfo))
897					dns_adb_adjustsrtt(fctx->adb, addrinfo,
898							   0, factor);
899
900	if (finish != NULL && TRIEDALT(fctx)) {
901		for (addrinfo = ISC_LIST_HEAD(fctx->altaddrs);
902		     addrinfo != NULL;
903		     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
904			if (UNMARKED(addrinfo))
905				dns_adb_adjustsrtt(fctx->adb, addrinfo,
906						   0, factor);
907		for (find = ISC_LIST_HEAD(fctx->altfinds);
908		     find != NULL;
909		     find = ISC_LIST_NEXT(find, publink))
910			for (addrinfo = ISC_LIST_HEAD(find->list);
911			     addrinfo != NULL;
912			     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
913				if (UNMARKED(addrinfo))
914					dns_adb_adjustsrtt(fctx->adb, addrinfo,
915							   0, factor);
916	}
917
918	/*
919	 * Check for any outstanding socket events.  If they exist, cancel
920	 * them and let the event handlers finish the cleanup.  The resolver
921	 * only needs to worry about managing the connect and send events;
922	 * the dispatcher manages the recv events.
923	 */
924	if (RESQUERY_CONNECTING(query)) {
925		/*
926		 * Cancel the connect.
927		 */
928		if (query->tcpsocket != NULL) {
929			isc_socket_cancel(query->tcpsocket, NULL,
930					  ISC_SOCKCANCEL_CONNECT);
931		} else if (query->dispentry != NULL) {
932			INSIST(query->exclusivesocket);
933			socket = dns_dispatch_getentrysocket(query->dispentry);
934			if (socket != NULL)
935				isc_socket_cancel(socket, NULL,
936						  ISC_SOCKCANCEL_CONNECT);
937		}
938	} else if (RESQUERY_SENDING(query)) {
939		/*
940		 * Cancel the pending send.
941		 */
942		if (query->exclusivesocket && query->dispentry != NULL)
943			socket = dns_dispatch_getentrysocket(query->dispentry);
944		else
945			socket = dns_dispatch_getsocket(query->dispatch);
946		if (socket != NULL)
947			isc_socket_cancel(socket, NULL, ISC_SOCKCANCEL_SEND);
948	}
949
950	if (query->dispentry != NULL)
951		dns_dispatch_removeresponse(&query->dispentry, deventp);
952
953	ISC_LIST_UNLINK(fctx->queries, query, link);
954
955	if (query->tsig != NULL)
956		isc_buffer_free(&query->tsig);
957
958	if (query->tsigkey != NULL)
959		dns_tsigkey_detach(&query->tsigkey);
960
961	if (query->dispatch != NULL)
962		dns_dispatch_detach(&query->dispatch);
963
964	if (! (RESQUERY_CONNECTING(query) || RESQUERY_SENDING(query)))
965		/*
966		 * It's safe to destroy the query now.
967		 */
968		resquery_destroy(&query);
969}
970
971static void
972fctx_cancelqueries(fetchctx_t *fctx, isc_boolean_t no_response) {
973	resquery_t *query, *next_query;
974
975	FCTXTRACE("cancelqueries");
976
977	for (query = ISC_LIST_HEAD(fctx->queries);
978	     query != NULL;
979	     query = next_query) {
980		next_query = ISC_LIST_NEXT(query, link);
981		fctx_cancelquery(&query, NULL, NULL, no_response);
982	}
983}
984
985static void
986fctx_cleanupfinds(fetchctx_t *fctx) {
987	dns_adbfind_t *find, *next_find;
988
989	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
990
991	for (find = ISC_LIST_HEAD(fctx->finds);
992	     find != NULL;
993	     find = next_find) {
994		next_find = ISC_LIST_NEXT(find, publink);
995		ISC_LIST_UNLINK(fctx->finds, find, publink);
996		dns_adb_destroyfind(&find);
997	}
998	fctx->find = NULL;
999}
1000
1001static void
1002fctx_cleanupaltfinds(fetchctx_t *fctx) {
1003	dns_adbfind_t *find, *next_find;
1004
1005	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
1006
1007	for (find = ISC_LIST_HEAD(fctx->altfinds);
1008	     find != NULL;
1009	     find = next_find) {
1010		next_find = ISC_LIST_NEXT(find, publink);
1011		ISC_LIST_UNLINK(fctx->altfinds, find, publink);
1012		dns_adb_destroyfind(&find);
1013	}
1014	fctx->altfind = NULL;
1015}
1016
1017static void
1018fctx_cleanupforwaddrs(fetchctx_t *fctx) {
1019	dns_adbaddrinfo_t *addr, *next_addr;
1020
1021	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
1022
1023	for (addr = ISC_LIST_HEAD(fctx->forwaddrs);
1024	     addr != NULL;
1025	     addr = next_addr) {
1026		next_addr = ISC_LIST_NEXT(addr, publink);
1027		ISC_LIST_UNLINK(fctx->forwaddrs, addr, publink);
1028		dns_adb_freeaddrinfo(fctx->adb, &addr);
1029	}
1030}
1031
1032static void
1033fctx_cleanupaltaddrs(fetchctx_t *fctx) {
1034	dns_adbaddrinfo_t *addr, *next_addr;
1035
1036	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
1037
1038	for (addr = ISC_LIST_HEAD(fctx->altaddrs);
1039	     addr != NULL;
1040	     addr = next_addr) {
1041		next_addr = ISC_LIST_NEXT(addr, publink);
1042		ISC_LIST_UNLINK(fctx->altaddrs, addr, publink);
1043		dns_adb_freeaddrinfo(fctx->adb, &addr);
1044	}
1045}
1046
1047static inline void
1048fctx_stopeverything(fetchctx_t *fctx, isc_boolean_t no_response) {
1049	FCTXTRACE("stopeverything");
1050	fctx_cancelqueries(fctx, no_response);
1051	fctx_cleanupfinds(fctx);
1052	fctx_cleanupaltfinds(fctx);
1053	fctx_cleanupforwaddrs(fctx);
1054	fctx_cleanupaltaddrs(fctx);
1055	fctx_stoptimer(fctx);
1056}
1057
1058static inline void
1059fctx_sendevents(fetchctx_t *fctx, isc_result_t result, int line) {
1060	dns_fetchevent_t *event, *next_event;
1061	isc_task_t *task;
1062	unsigned int count = 0;
1063	isc_interval_t i;
1064	isc_boolean_t logit = ISC_FALSE;
1065	isc_time_t now;
1066	unsigned int old_spillat;
1067	unsigned int new_spillat = 0;	/* initialized to silence
1068					   compiler warnings */
1069
1070	/*
1071	 * Caller must be holding the appropriate bucket lock.
1072	 */
1073	REQUIRE(fctx->state == fetchstate_done);
1074
1075	FCTXTRACE("sendevents");
1076
1077	/*
1078	 * Keep some record of fetch result for logging later (if required).
1079	 */
1080	fctx->result = result;
1081	fctx->exitline = line;
1082	TIME_NOW(&now);
1083	fctx->duration = isc_time_microdiff(&now, &fctx->start);
1084
1085	for (event = ISC_LIST_HEAD(fctx->events);
1086	     event != NULL;
1087	     event = next_event) {
1088		next_event = ISC_LIST_NEXT(event, ev_link);
1089		ISC_LIST_UNLINK(fctx->events, event, ev_link);
1090		task = event->ev_sender;
1091		event->ev_sender = fctx;
1092		event->vresult = fctx->vresult;
1093		if (!HAVE_ANSWER(fctx))
1094			event->result = result;
1095
1096		INSIST(result != ISC_R_SUCCESS ||
1097		       dns_rdataset_isassociated(event->rdataset) ||
1098		       fctx->type == dns_rdatatype_any ||
1099		       fctx->type == dns_rdatatype_rrsig ||
1100		       fctx->type == dns_rdatatype_sig);
1101
1102		/*
1103		 * Negative results must be indicated in event->result.
1104		 */
1105		if (dns_rdataset_isassociated(event->rdataset) &&
1106		    NEGATIVE(event->rdataset)) {
1107			INSIST(event->result == DNS_R_NCACHENXDOMAIN ||
1108			       event->result == DNS_R_NCACHENXRRSET);
1109		}
1110
1111		event->qtotal = fctx->totalqueries;
1112		isc_task_sendanddetach(&task, ISC_EVENT_PTR(&event));
1113		count++;
1114	}
1115
1116	if ((fctx->attributes & FCTX_ATTR_HAVEANSWER) != 0 &&
1117	    fctx->spilled &&
1118	    (count < fctx->res->spillatmax || fctx->res->spillatmax == 0)) {
1119		LOCK(&fctx->res->lock);
1120		if (count == fctx->res->spillat && !fctx->res->exiting) {
1121			old_spillat = fctx->res->spillat;
1122			fctx->res->spillat += 5;
1123			if (fctx->res->spillat > fctx->res->spillatmax &&
1124			    fctx->res->spillatmax != 0)
1125				fctx->res->spillat = fctx->res->spillatmax;
1126			new_spillat = fctx->res->spillat;
1127			if (new_spillat != old_spillat) {
1128				logit = ISC_TRUE;
1129			}
1130			isc_interval_set(&i, 20 * 60, 0);
1131			result = isc_timer_reset(fctx->res->spillattimer,
1132						 isc_timertype_ticker, NULL,
1133						 &i, ISC_TRUE);
1134			RUNTIME_CHECK(result == ISC_R_SUCCESS);
1135		}
1136		UNLOCK(&fctx->res->lock);
1137		if (logit)
1138			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
1139				      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
1140				      "clients-per-query increased to %u",
1141				      new_spillat);
1142	}
1143}
1144
1145static inline void
1146log_edns(fetchctx_t *fctx) {
1147	char domainbuf[DNS_NAME_FORMATSIZE];
1148
1149	if (fctx->reason == NULL)
1150		return;
1151
1152	/*
1153	 * We do not know if fctx->domain is the actual domain the record
1154	 * lives in or a parent domain so we have a '?' after it.
1155	 */
1156	dns_name_format(&fctx->domain, domainbuf, sizeof(domainbuf));
1157	isc_log_write(dns_lctx, DNS_LOGCATEGORY_EDNS_DISABLED,
1158		      DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
1159		      "success resolving '%s' (in '%s'?) after %s",
1160		      fctx->info, domainbuf, fctx->reason);
1161
1162	fctx->reason = NULL;
1163}
1164
1165static void
1166fctx_done(fetchctx_t *fctx, isc_result_t result, int line) {
1167	dns_resolver_t *res;
1168	isc_boolean_t no_response;
1169
1170	REQUIRE(line >= 0);
1171
1172	FCTXTRACE("done");
1173
1174	res = fctx->res;
1175
1176	if (result == ISC_R_SUCCESS) {
1177		/*%
1178		 * Log any deferred EDNS timeout messages.
1179		 */
1180		log_edns(fctx);
1181		no_response = ISC_TRUE;
1182	 } else
1183		no_response = ISC_FALSE;
1184
1185	fctx->reason = NULL;
1186	fctx_stopeverything(fctx, no_response);
1187
1188	LOCK(&res->buckets[fctx->bucketnum].lock);
1189
1190	fctx->state = fetchstate_done;
1191	fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
1192	fctx_sendevents(fctx, result, line);
1193
1194	UNLOCK(&res->buckets[fctx->bucketnum].lock);
1195}
1196
1197static void
1198process_sendevent(resquery_t *query, isc_event_t *event) {
1199	isc_socketevent_t *sevent = (isc_socketevent_t *)event;
1200	isc_boolean_t retry = ISC_FALSE;
1201	isc_result_t result;
1202	fetchctx_t *fctx;
1203
1204	fctx = query->fctx;
1205
1206	if (RESQUERY_CANCELED(query)) {
1207		if (query->sends == 0 && query->connects == 0) {
1208			/*
1209			 * This query was canceled while the
1210			 * isc_socket_sendto/connect() was in progress.
1211			 */
1212			if (query->tcpsocket != NULL)
1213				isc_socket_detach(&query->tcpsocket);
1214			resquery_destroy(&query);
1215		}
1216	} else {
1217		switch (sevent->result) {
1218		case ISC_R_SUCCESS:
1219			break;
1220
1221		case ISC_R_HOSTUNREACH:
1222		case ISC_R_NETUNREACH:
1223		case ISC_R_NOPERM:
1224		case ISC_R_ADDRNOTAVAIL:
1225		case ISC_R_CONNREFUSED:
1226
1227			/*
1228			 * No route to remote.
1229			 */
1230			add_bad(fctx, query->addrinfo, sevent->result,
1231				badns_unreachable);
1232			fctx_cancelquery(&query, NULL, NULL, ISC_TRUE);
1233			retry = ISC_TRUE;
1234			break;
1235
1236		default:
1237			fctx_cancelquery(&query, NULL, NULL, ISC_FALSE);
1238			break;
1239		}
1240	}
1241
1242	if (event->ev_type == ISC_SOCKEVENT_CONNECT)
1243		isc_event_free(&event);
1244
1245	if (retry) {
1246		/*
1247		 * Behave as if the idle timer has expired.  For TCP
1248		 * this may not actually reflect the latest timer.
1249		 */
1250		fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
1251		result = fctx_stopidletimer(fctx);
1252		if (result != ISC_R_SUCCESS)
1253			fctx_done(fctx, result, __LINE__);
1254		else
1255			fctx_try(fctx, ISC_TRUE, ISC_FALSE);
1256	}
1257}
1258
1259static void
1260resquery_udpconnected(isc_task_t *task, isc_event_t *event) {
1261	resquery_t *query = event->ev_arg;
1262
1263	REQUIRE(event->ev_type == ISC_SOCKEVENT_CONNECT);
1264
1265	QTRACE("udpconnected");
1266
1267	UNUSED(task);
1268
1269	INSIST(RESQUERY_CONNECTING(query));
1270
1271	query->connects--;
1272
1273	process_sendevent(query, event);
1274}
1275
1276static void
1277resquery_senddone(isc_task_t *task, isc_event_t *event) {
1278	resquery_t *query = event->ev_arg;
1279
1280	REQUIRE(event->ev_type == ISC_SOCKEVENT_SENDDONE);
1281
1282	QTRACE("senddone");
1283
1284	/*
1285	 * XXXRTH
1286	 *
1287	 * Currently we don't wait for the senddone event before retrying
1288	 * a query.  This means that if we get really behind, we may end
1289	 * up doing extra work!
1290	 */
1291
1292	UNUSED(task);
1293
1294	INSIST(RESQUERY_SENDING(query));
1295
1296	query->sends--;
1297
1298	process_sendevent(query, event);
1299}
1300
1301static inline isc_result_t
1302fctx_addopt(dns_message_t *message, unsigned int version,
1303	    isc_uint16_t udpsize, dns_ednsopt_t *ednsopts, size_t count)
1304{
1305	dns_rdataset_t *rdataset = NULL;
1306	isc_result_t result;
1307
1308	result = dns_message_buildopt(message, &rdataset, version, udpsize,
1309				      DNS_MESSAGEEXTFLAG_DO, ednsopts, count);
1310	if (result != ISC_R_SUCCESS)
1311		return (result);
1312	return (dns_message_setopt(message, rdataset));
1313}
1314
1315static inline void
1316fctx_setretryinterval(fetchctx_t *fctx, unsigned int rtt) {
1317	unsigned int seconds;
1318	unsigned int us;
1319
1320	/*
1321	 * We retry every .8 seconds the first two times through the address
1322	 * list, and then we do exponential back-off.
1323	 */
1324	if (fctx->restarts < 3)
1325		us = 800000;
1326	else
1327		us = (800000 << (fctx->restarts - 2));
1328
1329	/*
1330	 * Add a fudge factor to the expected rtt based on the current
1331	 * estimate.
1332	 */
1333	if (rtt < 50000)
1334		rtt += 50000;
1335	else if (rtt < 100000)
1336		rtt += 100000;
1337	else
1338		rtt += 200000;
1339
1340	/*
1341	 * Always wait for at least the expected rtt.
1342	 */
1343	if (us < rtt)
1344		us = rtt;
1345
1346	/*
1347	 * But don't ever wait for more than 10 seconds.
1348	 */
1349	if (us > MAX_SINGLE_QUERY_TIMEOUT_US)
1350		us = MAX_SINGLE_QUERY_TIMEOUT_US;
1351
1352	seconds = us / US_PER_SEC;
1353	us -= seconds * US_PER_SEC;
1354	isc_interval_set(&fctx->interval, seconds, us * 1000);
1355}
1356
1357static isc_result_t
1358fctx_query(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo,
1359	   unsigned int options)
1360{
1361	dns_resolver_t *res;
1362	isc_task_t *task;
1363	isc_result_t result;
1364	resquery_t *query;
1365	isc_sockaddr_t addr;
1366	isc_boolean_t have_addr = ISC_FALSE;
1367	unsigned int srtt;
1368
1369	FCTXTRACE("query");
1370
1371	res = fctx->res;
1372	task = res->buckets[fctx->bucketnum].task;
1373
1374	srtt = addrinfo->srtt;
1375
1376	/*
1377	 * A forwarder needs to make multiple queries. Give it at least
1378	 * a second to do these in.
1379	 */
1380	if (ISFORWARDER(addrinfo) && srtt < 1000000)
1381		srtt = 1000000;
1382
1383	fctx_setretryinterval(fctx, srtt);
1384	result = fctx_startidletimer(fctx, &fctx->interval);
1385	if (result != ISC_R_SUCCESS)
1386		return (result);
1387
1388	INSIST(ISC_LIST_EMPTY(fctx->validators));
1389
1390	dns_message_reset(fctx->rmessage, DNS_MESSAGE_INTENTPARSE);
1391
1392	query = isc_mem_get(fctx->mctx, sizeof(*query));
1393	if (query == NULL) {
1394		result = ISC_R_NOMEMORY;
1395		goto stop_idle_timer;
1396	}
1397	query->mctx = fctx->mctx;
1398	query->options = options;
1399	query->attributes = 0;
1400	query->sends = 0;
1401	query->connects = 0;
1402	/*
1403	 * Note that the caller MUST guarantee that 'addrinfo' will remain
1404	 * valid until this query is canceled.
1405	 */
1406	query->addrinfo = addrinfo;
1407	TIME_NOW(&query->start);
1408
1409	/*
1410	 * If this is a TCP query, then we need to make a socket and
1411	 * a dispatch for it here.  Otherwise we use the resolver's
1412	 * shared dispatch.
1413	 */
1414	query->dispatchmgr = res->dispatchmgr;
1415	query->dispatch = NULL;
1416	query->exclusivesocket = ISC_FALSE;
1417	query->tcpsocket = NULL;
1418	if (res->view->peers != NULL) {
1419		dns_peer_t *peer = NULL;
1420		isc_netaddr_t dstip;
1421		isc_netaddr_fromsockaddr(&dstip, &addrinfo->sockaddr);
1422		result = dns_peerlist_peerbyaddr(res->view->peers,
1423						 &dstip, &peer);
1424		if (result == ISC_R_SUCCESS) {
1425			result = dns_peer_getquerysource(peer, &addr);
1426			if (result == ISC_R_SUCCESS)
1427				have_addr = ISC_TRUE;
1428		}
1429	}
1430
1431	if ((query->options & DNS_FETCHOPT_TCP) != 0) {
1432		int pf;
1433
1434		pf = isc_sockaddr_pf(&addrinfo->sockaddr);
1435		if (!have_addr) {
1436			switch (pf) {
1437			case PF_INET:
1438				result = dns_dispatch_getlocaladdress(
1439					      res->dispatches4->dispatches[0],
1440					      &addr);
1441				break;
1442			case PF_INET6:
1443				result = dns_dispatch_getlocaladdress(
1444					      res->dispatches6->dispatches[0],
1445					      &addr);
1446				break;
1447			default:
1448				result = ISC_R_NOTIMPLEMENTED;
1449				break;
1450			}
1451			if (result != ISC_R_SUCCESS)
1452				goto cleanup_query;
1453		}
1454		isc_sockaddr_setport(&addr, 0);
1455
1456		result = isc_socket_create(res->socketmgr, pf,
1457					   isc_sockettype_tcp,
1458					   &query->tcpsocket);
1459		if (result != ISC_R_SUCCESS)
1460			goto cleanup_query;
1461
1462#ifndef BROKEN_TCP_BIND_BEFORE_CONNECT
1463		result = isc_socket_bind(query->tcpsocket, &addr, 0);
1464		if (result != ISC_R_SUCCESS)
1465			goto cleanup_socket;
1466#endif
1467
1468		/*
1469		 * A dispatch will be created once the connect succeeds.
1470		 */
1471	} else {
1472		if (have_addr) {
1473			unsigned int attrs, attrmask;
1474			attrs = DNS_DISPATCHATTR_UDP;
1475			switch (isc_sockaddr_pf(&addr)) {
1476			case AF_INET:
1477				attrs |= DNS_DISPATCHATTR_IPV4;
1478				break;
1479			case AF_INET6:
1480				attrs |= DNS_DISPATCHATTR_IPV6;
1481				break;
1482			default:
1483				result = ISC_R_NOTIMPLEMENTED;
1484				goto cleanup_query;
1485			}
1486			attrmask = DNS_DISPATCHATTR_UDP;
1487			attrmask |= DNS_DISPATCHATTR_TCP;
1488			attrmask |= DNS_DISPATCHATTR_IPV4;
1489			attrmask |= DNS_DISPATCHATTR_IPV6;
1490			result = dns_dispatch_getudp(res->dispatchmgr,
1491						     res->socketmgr,
1492						     res->taskmgr, &addr,
1493						     4096, 1000, 32768, 16411,
1494						     16433, attrs, attrmask,
1495						     &query->dispatch);
1496			if (result != ISC_R_SUCCESS)
1497				goto cleanup_query;
1498		} else {
1499			switch (isc_sockaddr_pf(&addrinfo->sockaddr)) {
1500			case PF_INET:
1501				dns_dispatch_attach(
1502				    dns_resolver_dispatchv4(res),
1503				    &query->dispatch);
1504				query->exclusivesocket = res->exclusivev4;
1505				break;
1506			case PF_INET6:
1507				dns_dispatch_attach(
1508				    dns_resolver_dispatchv6(res),
1509				    &query->dispatch);
1510				query->exclusivesocket = res->exclusivev6;
1511				break;
1512			default:
1513				result = ISC_R_NOTIMPLEMENTED;
1514				goto cleanup_query;
1515			}
1516		}
1517		/*
1518		 * We should always have a valid dispatcher here.  If we
1519		 * don't support a protocol family, then its dispatcher
1520		 * will be NULL, but we shouldn't be finding addresses for
1521		 * protocol types we don't support, so the dispatcher
1522		 * we found should never be NULL.
1523		 */
1524		INSIST(query->dispatch != NULL);
1525	}
1526
1527	query->dispentry = NULL;
1528	query->fctx = fctx;
1529	query->tsig = NULL;
1530	query->tsigkey = NULL;
1531	ISC_LINK_INIT(query, link);
1532	query->magic = QUERY_MAGIC;
1533
1534	if ((query->options & DNS_FETCHOPT_TCP) != 0) {
1535		/*
1536		 * Connect to the remote server.
1537		 *
1538		 * XXXRTH  Should we attach to the socket?
1539		 */
1540		result = isc_socket_connect(query->tcpsocket,
1541					    &addrinfo->sockaddr, task,
1542					    resquery_connected, query);
1543		if (result != ISC_R_SUCCESS)
1544			goto cleanup_socket;
1545		query->connects++;
1546		QTRACE("connecting via TCP");
1547	} else {
1548		result = resquery_send(query);
1549		if (result != ISC_R_SUCCESS)
1550			goto cleanup_dispatch;
1551	}
1552
1553	fctx->querysent++;
1554	fctx->totalqueries++;
1555
1556	ISC_LIST_APPEND(fctx->queries, query, link);
1557	query->fctx->nqueries++;
1558	if (isc_sockaddr_pf(&addrinfo->sockaddr) == PF_INET)
1559		inc_stats(res, dns_resstatscounter_queryv4);
1560	else
1561		inc_stats(res, dns_resstatscounter_queryv6);
1562	if (res->view->resquerystats != NULL)
1563		dns_rdatatypestats_increment(res->view->resquerystats,
1564					     fctx->type);
1565
1566	return (ISC_R_SUCCESS);
1567
1568 cleanup_socket:
1569	isc_socket_detach(&query->tcpsocket);
1570
1571 cleanup_dispatch:
1572	if (query->dispatch != NULL)
1573		dns_dispatch_detach(&query->dispatch);
1574
1575 cleanup_query:
1576	if (query->connects == 0) {
1577		query->magic = 0;
1578		isc_mem_put(fctx->mctx, query, sizeof(*query));
1579	}
1580
1581 stop_idle_timer:
1582	RUNTIME_CHECK(fctx_stopidletimer(fctx) == ISC_R_SUCCESS);
1583
1584	return (result);
1585}
1586
1587static isc_boolean_t
1588bad_edns(fetchctx_t *fctx, isc_sockaddr_t *address) {
1589	isc_sockaddr_t *sa;
1590
1591	for (sa = ISC_LIST_HEAD(fctx->bad_edns);
1592	     sa != NULL;
1593	     sa = ISC_LIST_NEXT(sa, link)) {
1594		if (isc_sockaddr_equal(sa, address))
1595			return (ISC_TRUE);
1596	}
1597
1598	return (ISC_FALSE);
1599}
1600
1601static void
1602add_bad_edns(fetchctx_t *fctx, isc_sockaddr_t *address) {
1603	isc_sockaddr_t *sa;
1604
1605	if (bad_edns(fctx, address))
1606		return;
1607
1608	sa = isc_mem_get(fctx->mctx, sizeof(*sa));
1609	if (sa == NULL)
1610		return;
1611
1612	*sa = *address;
1613	ISC_LIST_INITANDAPPEND(fctx->bad_edns, sa, link);
1614}
1615
1616static isc_boolean_t
1617triededns(fetchctx_t *fctx, isc_sockaddr_t *address) {
1618	isc_sockaddr_t *sa;
1619
1620	for (sa = ISC_LIST_HEAD(fctx->edns);
1621	     sa != NULL;
1622	     sa = ISC_LIST_NEXT(sa, link)) {
1623		if (isc_sockaddr_equal(sa, address))
1624			return (ISC_TRUE);
1625	}
1626
1627	return (ISC_FALSE);
1628}
1629
1630static void
1631add_triededns(fetchctx_t *fctx, isc_sockaddr_t *address) {
1632	isc_sockaddr_t *sa;
1633
1634	if (triededns(fctx, address))
1635		return;
1636
1637	sa = isc_mem_get(fctx->mctx, sizeof(*sa));
1638	if (sa == NULL)
1639		return;
1640
1641	*sa = *address;
1642	ISC_LIST_INITANDAPPEND(fctx->edns, sa, link);
1643}
1644
1645static isc_boolean_t
1646triededns512(fetchctx_t *fctx, isc_sockaddr_t *address) {
1647	isc_sockaddr_t *sa;
1648
1649	for (sa = ISC_LIST_HEAD(fctx->edns512);
1650	     sa != NULL;
1651	     sa = ISC_LIST_NEXT(sa, link)) {
1652		if (isc_sockaddr_equal(sa, address))
1653			return (ISC_TRUE);
1654	}
1655
1656	return (ISC_FALSE);
1657}
1658
1659static void
1660add_triededns512(fetchctx_t *fctx, isc_sockaddr_t *address) {
1661	isc_sockaddr_t *sa;
1662
1663	if (triededns512(fctx, address))
1664		return;
1665
1666	sa = isc_mem_get(fctx->mctx, sizeof(*sa));
1667	if (sa == NULL)
1668		return;
1669
1670	*sa = *address;
1671	ISC_LIST_INITANDAPPEND(fctx->edns512, sa, link);
1672}
1673
1674static isc_result_t
1675resquery_send(resquery_t *query) {
1676	fetchctx_t *fctx;
1677	isc_result_t result;
1678	dns_name_t *qname = NULL;
1679	dns_rdataset_t *qrdataset = NULL;
1680	isc_region_t r;
1681	dns_resolver_t *res;
1682	isc_task_t *task;
1683	isc_socket_t *socket;
1684	isc_buffer_t tcpbuffer;
1685	isc_sockaddr_t *address;
1686	isc_buffer_t *buffer;
1687	isc_netaddr_t ipaddr;
1688	dns_tsigkey_t *tsigkey = NULL;
1689	dns_peer_t *peer = NULL;
1690	isc_boolean_t useedns;
1691	dns_compress_t cctx;
1692	isc_boolean_t cleanup_cctx = ISC_FALSE;
1693	isc_boolean_t secure_domain;
1694	isc_boolean_t connecting = ISC_FALSE;
1695	dns_ednsopt_t ednsopts[EDNSOPTS];
1696	unsigned ednsopt = 0;
1697
1698	fctx = query->fctx;
1699	QTRACE("send");
1700
1701	res = fctx->res;
1702	task = res->buckets[fctx->bucketnum].task;
1703	address = NULL;
1704
1705	if ((query->options & DNS_FETCHOPT_TCP) != 0) {
1706		/*
1707		 * Reserve space for the TCP message length.
1708		 */
1709		isc_buffer_init(&tcpbuffer, query->data, sizeof(query->data));
1710		isc_buffer_init(&query->buffer, query->data + 2,
1711				sizeof(query->data) - 2);
1712		buffer = &tcpbuffer;
1713	} else {
1714		isc_buffer_init(&query->buffer, query->data,
1715				sizeof(query->data));
1716		buffer = &query->buffer;
1717	}
1718
1719	result = dns_message_gettempname(fctx->qmessage, &qname);
1720	if (result != ISC_R_SUCCESS)
1721		goto cleanup_temps;
1722	result = dns_message_gettemprdataset(fctx->qmessage, &qrdataset);
1723	if (result != ISC_R_SUCCESS)
1724		goto cleanup_temps;
1725
1726	/*
1727	 * Get a query id from the dispatch.
1728	 */
1729	result = dns_dispatch_addresponse2(query->dispatch,
1730					   &query->addrinfo->sockaddr,
1731					   task,
1732					   resquery_response,
1733					   query,
1734					   &query->id,
1735					   &query->dispentry,
1736					   res->socketmgr);
1737	if (result != ISC_R_SUCCESS)
1738		goto cleanup_temps;
1739
1740	fctx->qmessage->opcode = dns_opcode_query;
1741
1742	/*
1743	 * Set up question.
1744	 */
1745	dns_name_init(qname, NULL);
1746	dns_name_clone(&fctx->name, qname);
1747	dns_rdataset_init(qrdataset);
1748	dns_rdataset_makequestion(qrdataset, res->rdclass, fctx->type);
1749	ISC_LIST_APPEND(qname->list, qrdataset, link);
1750	dns_message_addname(fctx->qmessage, qname, DNS_SECTION_QUESTION);
1751	qname = NULL;
1752	qrdataset = NULL;
1753
1754	/*
1755	 * Set RD if the client has requested that we do a recursive query,
1756	 * or if we're sending to a forwarder.
1757	 */
1758	if ((query->options & DNS_FETCHOPT_RECURSIVE) != 0 ||
1759	    ISFORWARDER(query->addrinfo))
1760		fctx->qmessage->flags |= DNS_MESSAGEFLAG_RD;
1761
1762	/*
1763	 * Set CD if the client says don't validate or the question is
1764	 * under a secure entry point.
1765	 */
1766	if ((query->options & DNS_FETCHOPT_NOVALIDATE) != 0) {
1767		fctx->qmessage->flags |= DNS_MESSAGEFLAG_CD;
1768	} else if (res->view->enablevalidation) {
1769		result = dns_view_issecuredomain(res->view, &fctx->name,
1770						 &secure_domain);
1771		if (result != ISC_R_SUCCESS)
1772			secure_domain = ISC_FALSE;
1773		if (res->view->dlv != NULL)
1774			secure_domain = ISC_TRUE;
1775		if (secure_domain)
1776			fctx->qmessage->flags |= DNS_MESSAGEFLAG_CD;
1777	}
1778
1779	/*
1780	 * We don't have to set opcode because it defaults to query.
1781	 */
1782	fctx->qmessage->id = query->id;
1783
1784	/*
1785	 * Convert the question to wire format.
1786	 */
1787	result = dns_compress_init(&cctx, -1, fctx->res->mctx);
1788	if (result != ISC_R_SUCCESS)
1789		goto cleanup_message;
1790	cleanup_cctx = ISC_TRUE;
1791
1792	result = dns_message_renderbegin(fctx->qmessage, &cctx,
1793					 &query->buffer);
1794	if (result != ISC_R_SUCCESS)
1795		goto cleanup_message;
1796
1797	result = dns_message_rendersection(fctx->qmessage,
1798					   DNS_SECTION_QUESTION, 0);
1799	if (result != ISC_R_SUCCESS)
1800		goto cleanup_message;
1801
1802	peer = NULL;
1803	isc_netaddr_fromsockaddr(&ipaddr, &query->addrinfo->sockaddr);
1804	(void) dns_peerlist_peerbyaddr(fctx->res->view->peers, &ipaddr, &peer);
1805
1806	/*
1807	 * The ADB does not know about servers with "edns no".  Check this,
1808	 * and then inform the ADB for future use.
1809	 */
1810	if ((query->addrinfo->flags & DNS_FETCHOPT_NOEDNS0) == 0 &&
1811	    peer != NULL &&
1812	    dns_peer_getsupportedns(peer, &useedns) == ISC_R_SUCCESS &&
1813	    !useedns)
1814	{
1815		query->options |= DNS_FETCHOPT_NOEDNS0;
1816		dns_adb_changeflags(fctx->adb, query->addrinfo,
1817				    DNS_FETCHOPT_NOEDNS0,
1818				    DNS_FETCHOPT_NOEDNS0);
1819	}
1820
1821	/* Sync NOEDNS0 flag in addrinfo->flags and options now. */
1822	if ((query->addrinfo->flags & DNS_FETCHOPT_NOEDNS0) != 0)
1823		query->options |= DNS_FETCHOPT_NOEDNS0;
1824
1825	/*
1826	 * Handle timeouts by reducing the UDP response size to 512 bytes
1827	 * then if that doesn't work disabling EDNS (includes DO) and CD.
1828	 *
1829	 * These timeout can be due to:
1830	 *	* broken nameservers that don't respond to EDNS queries.
1831	 *	* broken/misconfigured firewalls and NAT implementations
1832	 *	  that don't handle IP fragmentation.
1833	 *	* broken/misconfigured firewalls that don't handle responses
1834	 *	  greater than 512 bytes.
1835	 *	* broken/misconfigured firewalls that don't handle EDNS, DO
1836	 *	  or CD.
1837	 *	* packet loss / link outage.
1838	 */
1839	if (fctx->timeout) {
1840		if ((triededns512(fctx, &query->addrinfo->sockaddr) ||
1841		     fctx->timeouts >= (MAX_EDNS0_TIMEOUTS * 2)) &&
1842		    (query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
1843			query->options |= DNS_FETCHOPT_NOEDNS0;
1844			fctx->reason = "disabling EDNS";
1845		} else if ((triededns(fctx, &query->addrinfo->sockaddr) ||
1846			    fctx->timeouts >= MAX_EDNS0_TIMEOUTS) &&
1847			   (query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
1848			query->options |= DNS_FETCHOPT_EDNS512;
1849			fctx->reason = "reducing the advertised EDNS UDP "
1850				       "packet size to 512 octets";
1851		}
1852		fctx->timeout = ISC_FALSE;
1853	}
1854
1855	/*
1856	 * Use EDNS0, unless the caller doesn't want it, or we know that
1857	 * the remote server doesn't like it.
1858	 */
1859	if ((query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
1860		if ((query->addrinfo->flags & DNS_FETCHOPT_NOEDNS0) == 0) {
1861			unsigned int version = 0;       /* Default version. */
1862			unsigned int flags;
1863			isc_uint16_t udpsize = res->udpsize;
1864			isc_boolean_t reqnsid = res->view->requestnsid;
1865
1866			flags = query->addrinfo->flags;
1867			if ((flags & DNS_FETCHOPT_EDNSVERSIONSET) != 0) {
1868				version = flags & DNS_FETCHOPT_EDNSVERSIONMASK;
1869				version >>= DNS_FETCHOPT_EDNSVERSIONSHIFT;
1870			}
1871			if ((query->options & DNS_FETCHOPT_EDNS512) != 0)
1872				udpsize = 512;
1873			else if (peer != NULL)
1874				(void)dns_peer_getudpsize(peer, &udpsize);
1875
1876			/* request NSID for current view or peer? */
1877			if (peer != NULL)
1878				(void) dns_peer_getrequestnsid(peer, &reqnsid);
1879			if (reqnsid) {
1880				INSIST(ednsopt < EDNSOPTS);
1881				ednsopts[ednsopt].code = DNS_OPT_NSID;
1882				ednsopts[ednsopt].length = 0;
1883				ednsopts[ednsopt].value = NULL;
1884				ednsopt++;
1885			}
1886			result = fctx_addopt(fctx->qmessage, version,
1887					     udpsize, ednsopts, ednsopt);
1888			if (reqnsid && result == ISC_R_SUCCESS) {
1889				query->options |= DNS_FETCHOPT_WANTNSID;
1890			} else if (result != ISC_R_SUCCESS) {
1891				/*
1892				 * We couldn't add the OPT, but we'll press on.
1893				 * We're not using EDNS0, so set the NOEDNS0
1894				 * bit.
1895				 */
1896				query->options |= DNS_FETCHOPT_NOEDNS0;
1897			}
1898		} else {
1899			/*
1900			 * We know this server doesn't like EDNS0, so we
1901			 * won't use it.  Set the NOEDNS0 bit since we're
1902			 * not using EDNS0.
1903			 */
1904			query->options |= DNS_FETCHOPT_NOEDNS0;
1905		}
1906	}
1907
1908	/*
1909	 * If we need EDNS0 to do this query and aren't using it, we lose.
1910	 */
1911	if (NEEDEDNS0(fctx) && (query->options & DNS_FETCHOPT_NOEDNS0) != 0) {
1912		result = DNS_R_SERVFAIL;
1913		goto cleanup_message;
1914	}
1915
1916	if ((query->options & DNS_FETCHOPT_NOEDNS0) == 0)
1917		add_triededns(fctx, &query->addrinfo->sockaddr);
1918
1919	if ((query->options & DNS_FETCHOPT_EDNS512) != 0)
1920		add_triededns512(fctx, &query->addrinfo->sockaddr);
1921
1922	/*
1923	 * Clear CD if EDNS is not in use.
1924	 */
1925	if ((query->options & DNS_FETCHOPT_NOEDNS0) != 0)
1926		fctx->qmessage->flags &= ~DNS_MESSAGEFLAG_CD;
1927
1928	/*
1929	 * Add TSIG record tailored to the current recipient.
1930	 */
1931	result = dns_view_getpeertsig(fctx->res->view, &ipaddr, &tsigkey);
1932	if (result != ISC_R_SUCCESS && result != ISC_R_NOTFOUND)
1933		goto cleanup_message;
1934
1935	if (tsigkey != NULL) {
1936		result = dns_message_settsigkey(fctx->qmessage, tsigkey);
1937		dns_tsigkey_detach(&tsigkey);
1938		if (result != ISC_R_SUCCESS)
1939			goto cleanup_message;
1940	}
1941
1942	result = dns_message_rendersection(fctx->qmessage,
1943					   DNS_SECTION_ADDITIONAL, 0);
1944	if (result != ISC_R_SUCCESS)
1945		goto cleanup_message;
1946
1947	result = dns_message_renderend(fctx->qmessage);
1948	if (result != ISC_R_SUCCESS)
1949		goto cleanup_message;
1950
1951	dns_compress_invalidate(&cctx);
1952	cleanup_cctx = ISC_FALSE;
1953
1954	if (dns_message_gettsigkey(fctx->qmessage) != NULL) {
1955		dns_tsigkey_attach(dns_message_gettsigkey(fctx->qmessage),
1956				   &query->tsigkey);
1957		result = dns_message_getquerytsig(fctx->qmessage,
1958						  fctx->res->mctx,
1959						  &query->tsig);
1960		if (result != ISC_R_SUCCESS)
1961			goto cleanup_message;
1962	}
1963
1964	/*
1965	 * If using TCP, write the length of the message at the beginning
1966	 * of the buffer.
1967	 */
1968	if ((query->options & DNS_FETCHOPT_TCP) != 0) {
1969		isc_buffer_usedregion(&query->buffer, &r);
1970		isc_buffer_putuint16(&tcpbuffer, (isc_uint16_t)r.length);
1971		isc_buffer_add(&tcpbuffer, r.length);
1972	}
1973
1974	/*
1975	 * We're now done with the query message.
1976	 */
1977	dns_message_reset(fctx->qmessage, DNS_MESSAGE_INTENTRENDER);
1978
1979	if (query->exclusivesocket)
1980		socket = dns_dispatch_getentrysocket(query->dispentry);
1981	else
1982		socket = dns_dispatch_getsocket(query->dispatch);
1983	/*
1984	 * Send the query!
1985	 */
1986	if ((query->options & DNS_FETCHOPT_TCP) == 0) {
1987		address = &query->addrinfo->sockaddr;
1988		if (query->exclusivesocket) {
1989			result = isc_socket_connect(socket, address, task,
1990						    resquery_udpconnected,
1991						    query);
1992			if (result != ISC_R_SUCCESS)
1993				goto cleanup_message;
1994			connecting = ISC_TRUE;
1995			query->connects++;
1996		}
1997	}
1998	isc_buffer_usedregion(buffer, &r);
1999
2000	/*
2001	 * XXXRTH  Make sure we don't send to ourselves!  We should probably
2002	 *		prune out these addresses when we get them from the ADB.
2003	 */
2004	ISC_EVENT_INIT(&query->sendevent, sizeof(query->sendevent), 0, NULL,
2005		       ISC_SOCKEVENT_SENDDONE, resquery_senddone, query,
2006		       NULL, NULL, NULL);
2007	result = isc_socket_sendto2(socket, &r, task, address, NULL,
2008				    &query->sendevent, 0);
2009	if (result != ISC_R_SUCCESS) {
2010		if (connecting) {
2011			/*
2012			 * This query is still connecting.
2013			 * Mark it as canceled so that it will just be
2014			 * cleaned up when the connected event is received.
2015			 * Keep fctx around until the event is processed.
2016			 */
2017			query->fctx->nqueries++;
2018			query->attributes |= RESQUERY_ATTR_CANCELED;
2019		}
2020		goto cleanup_message;
2021	}
2022
2023	query->sends++;
2024
2025	QTRACE("sent");
2026
2027	return (ISC_R_SUCCESS);
2028
2029 cleanup_message:
2030	if (cleanup_cctx)
2031		dns_compress_invalidate(&cctx);
2032
2033	dns_message_reset(fctx->qmessage, DNS_MESSAGE_INTENTRENDER);
2034
2035	/*
2036	 * Stop the dispatcher from listening.
2037	 */
2038	dns_dispatch_removeresponse(&query->dispentry, NULL);
2039
2040 cleanup_temps:
2041	if (qname != NULL)
2042		dns_message_puttempname(fctx->qmessage, &qname);
2043	if (qrdataset != NULL)
2044		dns_message_puttemprdataset(fctx->qmessage, &qrdataset);
2045
2046	return (result);
2047}
2048
2049static void
2050resquery_connected(isc_task_t *task, isc_event_t *event) {
2051	isc_socketevent_t *sevent = (isc_socketevent_t *)event;
2052	resquery_t *query = event->ev_arg;
2053	isc_boolean_t retry = ISC_FALSE;
2054	isc_interval_t interval;
2055	isc_result_t result;
2056	unsigned int attrs;
2057	fetchctx_t *fctx;
2058
2059	REQUIRE(event->ev_type == ISC_SOCKEVENT_CONNECT);
2060	REQUIRE(VALID_QUERY(query));
2061
2062	QTRACE("connected");
2063
2064	UNUSED(task);
2065
2066	/*
2067	 * XXXRTH
2068	 *
2069	 * Currently we don't wait for the connect event before retrying
2070	 * a query.  This means that if we get really behind, we may end
2071	 * up doing extra work!
2072	 */
2073
2074	query->connects--;
2075	fctx = query->fctx;
2076
2077	if (RESQUERY_CANCELED(query)) {
2078		/*
2079		 * This query was canceled while the connect() was in
2080		 * progress.
2081		 */
2082		isc_socket_detach(&query->tcpsocket);
2083		resquery_destroy(&query);
2084	} else {
2085		switch (sevent->result) {
2086		case ISC_R_SUCCESS:
2087
2088			/*
2089			 * Extend the idle timer for TCP.  20 seconds
2090			 * should be long enough for a TCP connection to be
2091			 * established, a single DNS request to be sent,
2092			 * and the response received.
2093			 */
2094			isc_interval_set(&interval, 20, 0);
2095			result = fctx_startidletimer(query->fctx, &interval);
2096			if (result != ISC_R_SUCCESS) {
2097				fctx_cancelquery(&query, NULL, NULL, ISC_FALSE);
2098				fctx_done(fctx, result, __LINE__);
2099				break;
2100			}
2101			/*
2102			 * We are connected.  Create a dispatcher and
2103			 * send the query.
2104			 */
2105			attrs = 0;
2106			attrs |= DNS_DISPATCHATTR_TCP;
2107			attrs |= DNS_DISPATCHATTR_PRIVATE;
2108			attrs |= DNS_DISPATCHATTR_CONNECTED;
2109			if (isc_sockaddr_pf(&query->addrinfo->sockaddr) ==
2110			    AF_INET)
2111				attrs |= DNS_DISPATCHATTR_IPV4;
2112			else
2113				attrs |= DNS_DISPATCHATTR_IPV6;
2114			attrs |= DNS_DISPATCHATTR_MAKEQUERY;
2115
2116			result = dns_dispatch_createtcp(query->dispatchmgr,
2117						     query->tcpsocket,
2118						     query->fctx->res->taskmgr,
2119						     4096, 2, 1, 1, 3, attrs,
2120						     &query->dispatch);
2121
2122			/*
2123			 * Regardless of whether dns_dispatch_create()
2124			 * succeeded or not, we don't need our reference
2125			 * to the socket anymore.
2126			 */
2127			isc_socket_detach(&query->tcpsocket);
2128
2129			if (result == ISC_R_SUCCESS)
2130				result = resquery_send(query);
2131
2132			if (result != ISC_R_SUCCESS) {
2133				fctx_cancelquery(&query, NULL, NULL, ISC_FALSE);
2134				fctx_done(fctx, result, __LINE__);
2135			}
2136			break;
2137
2138		case ISC_R_NETUNREACH:
2139		case ISC_R_HOSTUNREACH:
2140		case ISC_R_CONNREFUSED:
2141		case ISC_R_NOPERM:
2142		case ISC_R_ADDRNOTAVAIL:
2143		case ISC_R_CONNECTIONRESET:
2144			/*
2145			 * No route to remote.
2146			 */
2147			isc_socket_detach(&query->tcpsocket);
2148			fctx_cancelquery(&query, NULL, NULL, ISC_TRUE);
2149			retry = ISC_TRUE;
2150			break;
2151
2152		default:
2153			isc_socket_detach(&query->tcpsocket);
2154			fctx_cancelquery(&query, NULL, NULL, ISC_FALSE);
2155			break;
2156		}
2157	}
2158
2159	isc_event_free(&event);
2160
2161	if (retry) {
2162		/*
2163		 * Behave as if the idle timer has expired.  For TCP
2164		 * connections this may not actually reflect the latest timer.
2165		 */
2166		fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
2167		result = fctx_stopidletimer(fctx);
2168		if (result != ISC_R_SUCCESS)
2169			fctx_done(fctx, result, __LINE__);
2170		else
2171			fctx_try(fctx, ISC_TRUE, ISC_FALSE);
2172	}
2173}
2174
2175static void
2176fctx_finddone(isc_task_t *task, isc_event_t *event) {
2177	fetchctx_t *fctx;
2178	dns_adbfind_t *find;
2179	dns_resolver_t *res;
2180	isc_boolean_t want_try = ISC_FALSE;
2181	isc_boolean_t want_done = ISC_FALSE;
2182	isc_boolean_t bucket_empty = ISC_FALSE;
2183	unsigned int bucketnum;
2184	isc_boolean_t destroy = ISC_FALSE;
2185
2186	find = event->ev_sender;
2187	fctx = event->ev_arg;
2188	REQUIRE(VALID_FCTX(fctx));
2189	res = fctx->res;
2190
2191	UNUSED(task);
2192
2193	FCTXTRACE("finddone");
2194
2195	bucketnum = fctx->bucketnum;
2196	LOCK(&res->buckets[bucketnum].lock);
2197
2198	INSIST(fctx->pending > 0);
2199	fctx->pending--;
2200
2201	if (ADDRWAIT(fctx)) {
2202		/*
2203		 * The fetch is waiting for a name to be found.
2204		 */
2205		INSIST(!SHUTTINGDOWN(fctx));
2206		fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
2207		if (event->ev_type == DNS_EVENT_ADBMOREADDRESSES) {
2208			want_try = ISC_TRUE;
2209			fctx->totalqueries += find->qtotal;
2210		} else {
2211			fctx->findfail++;
2212			if (fctx->pending == 0) {
2213				/*
2214				 * We've got nothing else to wait for and don't
2215				 * know the answer.  There's nothing to do but
2216				 * fail the fctx.
2217				 */
2218				want_done = ISC_TRUE;
2219			}
2220		}
2221	} else if (SHUTTINGDOWN(fctx) && fctx->pending == 0 &&
2222		   fctx->nqueries == 0 && ISC_LIST_EMPTY(fctx->validators)) {
2223
2224		if (fctx->references == 0) {
2225			bucket_empty = fctx_unlink(fctx);
2226			destroy = ISC_TRUE;
2227		}
2228	}
2229	UNLOCK(&res->buckets[bucketnum].lock);
2230
2231	isc_event_free(&event);
2232	dns_adb_destroyfind(&find);
2233
2234	if (want_try)
2235		fctx_try(fctx, ISC_TRUE, ISC_FALSE);
2236	else if (want_done)
2237		fctx_done(fctx, ISC_R_FAILURE, __LINE__);
2238	else if (destroy) {
2239			fctx_destroy(fctx);
2240		if (bucket_empty)
2241			empty_bucket(res);
2242	}
2243}
2244
2245
2246static inline isc_boolean_t
2247bad_server(fetchctx_t *fctx, isc_sockaddr_t *address) {
2248	isc_sockaddr_t *sa;
2249
2250	for (sa = ISC_LIST_HEAD(fctx->bad);
2251	     sa != NULL;
2252	     sa = ISC_LIST_NEXT(sa, link)) {
2253		if (isc_sockaddr_equal(sa, address))
2254			return (ISC_TRUE);
2255	}
2256
2257	return (ISC_FALSE);
2258}
2259
2260static inline isc_boolean_t
2261mark_bad(fetchctx_t *fctx) {
2262	dns_adbfind_t *curr;
2263	dns_adbaddrinfo_t *addrinfo;
2264	isc_boolean_t all_bad = ISC_TRUE;
2265
2266	/*
2267	 * Mark all known bad servers, so we don't try to talk to them
2268	 * again.
2269	 */
2270
2271	/*
2272	 * Mark any bad nameservers.
2273	 */
2274	for (curr = ISC_LIST_HEAD(fctx->finds);
2275	     curr != NULL;
2276	     curr = ISC_LIST_NEXT(curr, publink)) {
2277		for (addrinfo = ISC_LIST_HEAD(curr->list);
2278		     addrinfo != NULL;
2279		     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2280			if (bad_server(fctx, &addrinfo->sockaddr))
2281				addrinfo->flags |= FCTX_ADDRINFO_MARK;
2282			else
2283				all_bad = ISC_FALSE;
2284		}
2285	}
2286
2287	/*
2288	 * Mark any bad forwarders.
2289	 */
2290	for (addrinfo = ISC_LIST_HEAD(fctx->forwaddrs);
2291	     addrinfo != NULL;
2292	     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2293		if (bad_server(fctx, &addrinfo->sockaddr))
2294			addrinfo->flags |= FCTX_ADDRINFO_MARK;
2295		else
2296			all_bad = ISC_FALSE;
2297	}
2298
2299	/*
2300	 * Mark any bad alternates.
2301	 */
2302	for (curr = ISC_LIST_HEAD(fctx->altfinds);
2303	     curr != NULL;
2304	     curr = ISC_LIST_NEXT(curr, publink)) {
2305		for (addrinfo = ISC_LIST_HEAD(curr->list);
2306		     addrinfo != NULL;
2307		     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2308			if (bad_server(fctx, &addrinfo->sockaddr))
2309				addrinfo->flags |= FCTX_ADDRINFO_MARK;
2310			else
2311				all_bad = ISC_FALSE;
2312		}
2313	}
2314
2315	for (addrinfo = ISC_LIST_HEAD(fctx->altaddrs);
2316	     addrinfo != NULL;
2317	     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2318		if (bad_server(fctx, &addrinfo->sockaddr))
2319			addrinfo->flags |= FCTX_ADDRINFO_MARK;
2320		else
2321			all_bad = ISC_FALSE;
2322	}
2323
2324	return (all_bad);
2325}
2326
2327static void
2328add_bad(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo, isc_result_t reason,
2329	badnstype_t badtype)
2330{
2331	char namebuf[DNS_NAME_FORMATSIZE];
2332	char addrbuf[ISC_SOCKADDR_FORMATSIZE];
2333	char classbuf[64];
2334	char typebuf[64];
2335	char code[64];
2336	isc_buffer_t b;
2337	isc_sockaddr_t *sa;
2338	const char *spc = "";
2339	isc_sockaddr_t *address = &addrinfo->sockaddr;
2340
2341	if (reason == DNS_R_LAME)
2342		fctx->lamecount++;
2343	else {
2344		switch (badtype) {
2345		case badns_unreachable:
2346			fctx->neterr++;
2347			break;
2348		case badns_response:
2349			fctx->badresp++;
2350			break;
2351		case badns_validation:
2352			break;	/* counted as 'valfail' */
2353		}
2354	}
2355
2356	if (bad_server(fctx, address)) {
2357		/*
2358		 * We already know this server is bad.
2359		 */
2360		return;
2361	}
2362
2363	FCTXTRACE("add_bad");
2364
2365	sa = isc_mem_get(fctx->mctx, sizeof(*sa));
2366	if (sa == NULL)
2367		return;
2368	*sa = *address;
2369	ISC_LIST_INITANDAPPEND(fctx->bad, sa, link);
2370
2371	if (reason == DNS_R_LAME)       /* already logged */
2372		return;
2373
2374	if (reason == DNS_R_UNEXPECTEDRCODE &&
2375	    fctx->rmessage->rcode == dns_rcode_servfail &&
2376	    ISFORWARDER(addrinfo))
2377		return;
2378
2379	if (reason == DNS_R_UNEXPECTEDRCODE) {
2380		isc_buffer_init(&b, code, sizeof(code) - 1);
2381		dns_rcode_totext(fctx->rmessage->rcode, &b);
2382		code[isc_buffer_usedlength(&b)] = '\0';
2383		spc = " ";
2384	} else if (reason == DNS_R_UNEXPECTEDOPCODE) {
2385		isc_buffer_init(&b, code, sizeof(code) - 1);
2386		dns_opcode_totext((dns_opcode_t)fctx->rmessage->opcode, &b);
2387		code[isc_buffer_usedlength(&b)] = '\0';
2388		spc = " ";
2389	} else {
2390		code[0] = '\0';
2391	}
2392	dns_name_format(&fctx->name, namebuf, sizeof(namebuf));
2393	dns_rdatatype_format(fctx->type, typebuf, sizeof(typebuf));
2394	dns_rdataclass_format(fctx->res->rdclass, classbuf, sizeof(classbuf));
2395	isc_sockaddr_format(address, addrbuf, sizeof(addrbuf));
2396	isc_log_write(dns_lctx, DNS_LOGCATEGORY_LAME_SERVERS,
2397		      DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
2398		      "error (%s%s%s) resolving '%s/%s/%s': %s",
2399		      dns_result_totext(reason), spc, code,
2400		      namebuf, typebuf, classbuf, addrbuf);
2401}
2402
2403/*
2404 * Sort addrinfo list by RTT.
2405 */
2406static void
2407sort_adbfind(dns_adbfind_t *find) {
2408	dns_adbaddrinfo_t *best, *curr;
2409	dns_adbaddrinfolist_t sorted;
2410
2411	/* Lame N^2 bubble sort. */
2412	ISC_LIST_INIT(sorted);
2413	while (!ISC_LIST_EMPTY(find->list)) {
2414		best = ISC_LIST_HEAD(find->list);
2415		curr = ISC_LIST_NEXT(best, publink);
2416		while (curr != NULL) {
2417			if (curr->srtt < best->srtt)
2418				best = curr;
2419			curr = ISC_LIST_NEXT(curr, publink);
2420		}
2421		ISC_LIST_UNLINK(find->list, best, publink);
2422		ISC_LIST_APPEND(sorted, best, publink);
2423	}
2424	find->list = sorted;
2425}
2426
2427/*
2428 * Sort a list of finds by server RTT.
2429 */
2430static void
2431sort_finds(dns_adbfindlist_t *findlist) {
2432	dns_adbfind_t *best, *curr;
2433	dns_adbfindlist_t sorted;
2434	dns_adbaddrinfo_t *addrinfo, *bestaddrinfo;
2435
2436	/* Sort each find's addrinfo list by SRTT. */
2437	for (curr = ISC_LIST_HEAD(*findlist);
2438	     curr != NULL;
2439	     curr = ISC_LIST_NEXT(curr, publink))
2440		sort_adbfind(curr);
2441
2442	/* Lame N^2 bubble sort. */
2443	ISC_LIST_INIT(sorted);
2444	while (!ISC_LIST_EMPTY(*findlist)) {
2445		best = ISC_LIST_HEAD(*findlist);
2446		bestaddrinfo = ISC_LIST_HEAD(best->list);
2447		INSIST(bestaddrinfo != NULL);
2448		curr = ISC_LIST_NEXT(best, publink);
2449		while (curr != NULL) {
2450			addrinfo = ISC_LIST_HEAD(curr->list);
2451			INSIST(addrinfo != NULL);
2452			if (addrinfo->srtt < bestaddrinfo->srtt) {
2453				best = curr;
2454				bestaddrinfo = addrinfo;
2455			}
2456			curr = ISC_LIST_NEXT(curr, publink);
2457		}
2458		ISC_LIST_UNLINK(*findlist, best, publink);
2459		ISC_LIST_APPEND(sorted, best, publink);
2460	}
2461	*findlist = sorted;
2462}
2463
2464static void
2465findname(fetchctx_t *fctx, dns_name_t *name, in_port_t port,
2466	 unsigned int options, unsigned int flags, isc_stdtime_t now,
2467	 isc_boolean_t *need_alternate)
2468{
2469	dns_adbaddrinfo_t *ai;
2470	dns_adbfind_t *find;
2471	dns_resolver_t *res;
2472	isc_boolean_t unshared;
2473	isc_result_t result;
2474
2475	res = fctx->res;
2476	unshared = ISC_TF((fctx->options & DNS_FETCHOPT_UNSHARED) != 0);
2477	/*
2478	 * If this name is a subdomain of the query domain, tell
2479	 * the ADB to start looking using zone/hint data. This keeps us
2480	 * from getting stuck if the nameserver is beneath the zone cut
2481	 * and we don't know its address (e.g. because the A record has
2482	 * expired).
2483	 */
2484	if (dns_name_issubdomain(name, &fctx->domain))
2485		options |= DNS_ADBFIND_STARTATZONE;
2486	options |= DNS_ADBFIND_GLUEOK;
2487	options |= DNS_ADBFIND_HINTOK;
2488
2489	/*
2490	 * See what we know about this address.
2491	 */
2492	find = NULL;
2493	result = dns_adb_createfind2(fctx->adb,
2494				     res->buckets[fctx->bucketnum].task,
2495				     fctx_finddone, fctx, name,
2496				     &fctx->name, fctx->type,
2497				     options, now, NULL,
2498				     res->view->dstport,
2499				     fctx->depth + 1, &find);
2500	if (result != ISC_R_SUCCESS) {
2501		if (result == DNS_R_ALIAS) {
2502			/*
2503			 * XXXRTH  Follow the CNAME/DNAME chain?
2504			 */
2505			dns_adb_destroyfind(&find);
2506			fctx->adberr++;
2507		}
2508	} else if (!ISC_LIST_EMPTY(find->list)) {
2509		/*
2510		 * We have at least some of the addresses for the
2511		 * name.
2512		 */
2513		INSIST((find->options & DNS_ADBFIND_WANTEVENT) == 0);
2514		if (flags != 0 || port != 0) {
2515			for (ai = ISC_LIST_HEAD(find->list);
2516			     ai != NULL;
2517			     ai = ISC_LIST_NEXT(ai, publink)) {
2518				ai->flags |= flags;
2519				if (port != 0)
2520					isc_sockaddr_setport(&ai->sockaddr,
2521							     port);
2522			}
2523		}
2524		if ((flags & FCTX_ADDRINFO_FORWARDER) != 0)
2525			ISC_LIST_APPEND(fctx->altfinds, find, publink);
2526		else
2527			ISC_LIST_APPEND(fctx->finds, find, publink);
2528	} else {
2529		/*
2530		 * We don't know any of the addresses for this
2531		 * name.
2532		 */
2533		if ((find->options & DNS_ADBFIND_WANTEVENT) != 0) {
2534			/*
2535			 * We're looking for them and will get an
2536			 * event about it later.
2537			 */
2538			fctx->pending++;
2539			/*
2540			 * Bootstrap.
2541			 */
2542			if (need_alternate != NULL &&
2543			    !*need_alternate && unshared &&
2544			    ((res->dispatches4 == NULL &&
2545			      find->result_v6 != DNS_R_NXDOMAIN) ||
2546			     (res->dispatches6 == NULL &&
2547			      find->result_v4 != DNS_R_NXDOMAIN)))
2548				*need_alternate = ISC_TRUE;
2549		} else {
2550			if ((find->options & DNS_ADBFIND_LAMEPRUNED) != 0)
2551				fctx->lamecount++; /* cached lame server */
2552			else
2553				fctx->adberr++; /* unreachable server, etc. */
2554
2555			/*
2556			 * If we know there are no addresses for
2557			 * the family we are using then try to add
2558			 * an alternative server.
2559			 */
2560			if (need_alternate != NULL && !*need_alternate &&
2561			    ((res->dispatches4 == NULL &&
2562			      find->result_v6 == DNS_R_NXRRSET) ||
2563			     (res->dispatches6 == NULL &&
2564			      find->result_v4 == DNS_R_NXRRSET)))
2565				*need_alternate = ISC_TRUE;
2566			dns_adb_destroyfind(&find);
2567		}
2568	}
2569}
2570
2571static isc_boolean_t
2572isstrictsubdomain(dns_name_t *name1, dns_name_t *name2) {
2573	int order;
2574	unsigned int nlabels;
2575	dns_namereln_t namereln;
2576
2577	namereln = dns_name_fullcompare(name1, name2, &order, &nlabels);
2578	return (ISC_TF(namereln == dns_namereln_subdomain));
2579}
2580
2581static isc_result_t
2582fctx_getaddresses(fetchctx_t *fctx, isc_boolean_t badcache) {
2583	dns_rdata_t rdata = DNS_RDATA_INIT;
2584	isc_result_t result;
2585	dns_resolver_t *res;
2586	isc_stdtime_t now;
2587	unsigned int stdoptions = 0;
2588	isc_sockaddr_t *sa;
2589	dns_adbaddrinfo_t *ai;
2590	isc_boolean_t all_bad;
2591	dns_rdata_ns_t ns;
2592	isc_boolean_t need_alternate = ISC_FALSE;
2593
2594	FCTXTRACE("getaddresses");
2595
2596	/*
2597	 * Don't pound on remote servers.  (Failsafe!)
2598	 */
2599	fctx->restarts++;
2600	if (fctx->restarts > 10) {
2601		FCTXTRACE("too many restarts");
2602		return (DNS_R_SERVFAIL);
2603	}
2604
2605	res = fctx->res;
2606
2607	if (fctx->depth > res->maxdepth) {
2608		FCTXTRACE("too much NS indirection");
2609		return (DNS_R_SERVFAIL);
2610	}
2611
2612	/*
2613	 * Forwarders.
2614	 */
2615
2616	INSIST(ISC_LIST_EMPTY(fctx->forwaddrs));
2617	INSIST(ISC_LIST_EMPTY(fctx->altaddrs));
2618
2619	/*
2620	 * If this fctx has forwarders, use them; otherwise use any
2621	 * selective forwarders specified in the view; otherwise use the
2622	 * resolver's forwarders (if any).
2623	 */
2624	sa = ISC_LIST_HEAD(fctx->forwarders);
2625	if (sa == NULL) {
2626		dns_forwarders_t *forwarders = NULL;
2627		dns_name_t *name = &fctx->name;
2628		dns_name_t suffix;
2629		unsigned int labels;
2630		dns_fixedname_t fixed;
2631		dns_name_t *domain;
2632
2633		/*
2634		 * DS records are found in the parent server.
2635		 * Strip label to get the correct forwarder (if any).
2636		 */
2637		if (dns_rdatatype_atparent(fctx->type) &&
2638		    dns_name_countlabels(name) > 1) {
2639			dns_name_init(&suffix, NULL);
2640			labels = dns_name_countlabels(name);
2641			dns_name_getlabelsequence(name, 1, labels - 1, &suffix);
2642			name = &suffix;
2643		}
2644
2645		dns_fixedname_init(&fixed);
2646		domain = dns_fixedname_name(&fixed);
2647		result = dns_fwdtable_find2(fctx->res->view->fwdtable, name,
2648					    domain, &forwarders);
2649		if (result == ISC_R_SUCCESS) {
2650			sa = ISC_LIST_HEAD(forwarders->addrs);
2651			fctx->fwdpolicy = forwarders->fwdpolicy;
2652			if (fctx->fwdpolicy == dns_fwdpolicy_only &&
2653			    isstrictsubdomain(domain, &fctx->domain)) {
2654				dns_name_free(&fctx->domain, fctx->mctx);
2655				dns_name_init(&fctx->domain, NULL);
2656				result = dns_name_dup(domain, fctx->mctx,
2657						      &fctx->domain);
2658				if (result != ISC_R_SUCCESS)
2659					return (result);
2660			}
2661		}
2662	}
2663
2664	while (sa != NULL) {
2665		if ((isc_sockaddr_pf(sa) == AF_INET &&
2666			 fctx->res->dispatches4 == NULL) ||
2667		    (isc_sockaddr_pf(sa) == AF_INET6 &&
2668			fctx->res->dispatches6 == NULL)) {
2669				sa = ISC_LIST_NEXT(sa, link);
2670				continue;
2671		}
2672		ai = NULL;
2673		result = dns_adb_findaddrinfo(fctx->adb,
2674					      sa, &ai, 0);  /* XXXMLG */
2675		if (result == ISC_R_SUCCESS) {
2676			dns_adbaddrinfo_t *cur;
2677			ai->flags |= FCTX_ADDRINFO_FORWARDER;
2678			cur = ISC_LIST_HEAD(fctx->forwaddrs);
2679			while (cur != NULL && cur->srtt < ai->srtt)
2680				cur = ISC_LIST_NEXT(cur, publink);
2681			if (cur != NULL)
2682				ISC_LIST_INSERTBEFORE(fctx->forwaddrs, cur,
2683						      ai, publink);
2684			else
2685				ISC_LIST_APPEND(fctx->forwaddrs, ai, publink);
2686		}
2687		sa = ISC_LIST_NEXT(sa, link);
2688	}
2689
2690	/*
2691	 * If the forwarding policy is "only", we don't need the addresses
2692	 * of the nameservers.
2693	 */
2694	if (fctx->fwdpolicy == dns_fwdpolicy_only)
2695		goto out;
2696
2697	/*
2698	 * Normal nameservers.
2699	 */
2700
2701	stdoptions = DNS_ADBFIND_WANTEVENT | DNS_ADBFIND_EMPTYEVENT;
2702	if (fctx->restarts == 1) {
2703		/*
2704		 * To avoid sending out a flood of queries likely to
2705		 * result in NXRRSET, we suppress fetches for address
2706		 * families we don't have the first time through,
2707		 * provided that we have addresses in some family we
2708		 * can use.
2709		 *
2710		 * We don't want to set this option all the time, since
2711		 * if fctx->restarts > 1, we've clearly been having trouble
2712		 * with the addresses we had, so getting more could help.
2713		 */
2714		stdoptions |= DNS_ADBFIND_AVOIDFETCHES;
2715	}
2716	if (res->dispatches4 != NULL)
2717		stdoptions |= DNS_ADBFIND_INET;
2718	if (res->dispatches6 != NULL)
2719		stdoptions |= DNS_ADBFIND_INET6;
2720	isc_stdtime_get(&now);
2721
2722	INSIST(ISC_LIST_EMPTY(fctx->finds));
2723	INSIST(ISC_LIST_EMPTY(fctx->altfinds));
2724
2725	for (result = dns_rdataset_first(&fctx->nameservers);
2726	     result == ISC_R_SUCCESS;
2727	     result = dns_rdataset_next(&fctx->nameservers))
2728	{
2729		dns_rdataset_current(&fctx->nameservers, &rdata);
2730		/*
2731		 * Extract the name from the NS record.
2732		 */
2733		result = dns_rdata_tostruct(&rdata, &ns, NULL);
2734		if (result != ISC_R_SUCCESS)
2735			continue;
2736
2737		findname(fctx, &ns.name, 0, stdoptions, 0, now,
2738			 &need_alternate);
2739		dns_rdata_reset(&rdata);
2740		dns_rdata_freestruct(&ns);
2741	}
2742	if (result != ISC_R_NOMORE)
2743		return (result);
2744
2745	/*
2746	 * Do we need to use 6 to 4?
2747	 */
2748	if (need_alternate) {
2749		int family;
2750		alternate_t *a;
2751		family = (res->dispatches6 != NULL) ? AF_INET6 : AF_INET;
2752		for (a = ISC_LIST_HEAD(fctx->res->alternates);
2753		     a != NULL;
2754		     a = ISC_LIST_NEXT(a, link)) {
2755			if (!a->isaddress) {
2756				findname(fctx, &a->_u._n.name, a->_u._n.port,
2757					 stdoptions, FCTX_ADDRINFO_FORWARDER,
2758					 now, NULL);
2759				continue;
2760			}
2761			if (isc_sockaddr_pf(&a->_u.addr) != family)
2762				continue;
2763			ai = NULL;
2764			result = dns_adb_findaddrinfo(fctx->adb, &a->_u.addr,
2765						      &ai, 0);
2766			if (result == ISC_R_SUCCESS) {
2767				dns_adbaddrinfo_t *cur;
2768				ai->flags |= FCTX_ADDRINFO_FORWARDER;
2769				cur = ISC_LIST_HEAD(fctx->altaddrs);
2770				while (cur != NULL && cur->srtt < ai->srtt)
2771					cur = ISC_LIST_NEXT(cur, publink);
2772				if (cur != NULL)
2773					ISC_LIST_INSERTBEFORE(fctx->altaddrs,
2774							      cur, ai, publink);
2775				else
2776					ISC_LIST_APPEND(fctx->altaddrs, ai,
2777							publink);
2778			}
2779		}
2780	}
2781
2782 out:
2783	/*
2784	 * Mark all known bad servers.
2785	 */
2786	all_bad = mark_bad(fctx);
2787
2788	/*
2789	 * How are we doing?
2790	 */
2791	if (all_bad) {
2792		/*
2793		 * We've got no addresses.
2794		 */
2795		if (fctx->pending > 0) {
2796			/*
2797			 * We're fetching the addresses, but don't have any
2798			 * yet.   Tell the caller to wait for an answer.
2799			 */
2800			result = DNS_R_WAIT;
2801		} else {
2802			isc_time_t expire;
2803			isc_interval_t i;
2804			/*
2805			 * We've lost completely.  We don't know any
2806			 * addresses, and the ADB has told us it can't get
2807			 * them.
2808			 */
2809			FCTXTRACE("no addresses");
2810			isc_interval_set(&i, DNS_BADCACHE_TTL(fctx), 0);
2811			result = isc_time_nowplusinterval(&expire, &i);
2812			if (badcache &&
2813			    (fctx->type == dns_rdatatype_dnskey ||
2814			     fctx->type == dns_rdatatype_dlv ||
2815			     fctx->type == dns_rdatatype_ds) &&
2816			     result == ISC_R_SUCCESS)
2817				dns_resolver_addbadcache(fctx->res,
2818							 &fctx->name,
2819							 fctx->type, &expire);
2820			result = ISC_R_FAILURE;
2821		}
2822	} else {
2823		/*
2824		 * We've found some addresses.  We might still be looking
2825		 * for more addresses.
2826		 */
2827		sort_finds(&fctx->finds);
2828		sort_finds(&fctx->altfinds);
2829		result = ISC_R_SUCCESS;
2830	}
2831
2832	return (result);
2833}
2834
2835static inline void
2836possibly_mark(fetchctx_t *fctx, dns_adbaddrinfo_t *addr)
2837{
2838	isc_netaddr_t na;
2839	char buf[ISC_NETADDR_FORMATSIZE];
2840	isc_sockaddr_t *sa;
2841	isc_boolean_t aborted = ISC_FALSE;
2842	isc_boolean_t bogus;
2843	dns_acl_t *blackhole;
2844	isc_netaddr_t ipaddr;
2845	dns_peer_t *peer = NULL;
2846	dns_resolver_t *res;
2847	const char *msg = NULL;
2848
2849	sa = &addr->sockaddr;
2850
2851	res = fctx->res;
2852	isc_netaddr_fromsockaddr(&ipaddr, sa);
2853	blackhole = dns_dispatchmgr_getblackhole(res->dispatchmgr);
2854	(void) dns_peerlist_peerbyaddr(res->view->peers, &ipaddr, &peer);
2855
2856	if (blackhole != NULL) {
2857		int match;
2858
2859		if (dns_acl_match(&ipaddr, NULL, blackhole,
2860				  &res->view->aclenv,
2861				  &match, NULL) == ISC_R_SUCCESS &&
2862		    match > 0)
2863			aborted = ISC_TRUE;
2864	}
2865
2866	if (peer != NULL &&
2867	    dns_peer_getbogus(peer, &bogus) == ISC_R_SUCCESS &&
2868	    bogus)
2869		aborted = ISC_TRUE;
2870
2871	if (aborted) {
2872		addr->flags |= FCTX_ADDRINFO_MARK;
2873		msg = "ignoring blackholed / bogus server: ";
2874	} else if (isc_sockaddr_ismulticast(sa)) {
2875		addr->flags |= FCTX_ADDRINFO_MARK;
2876		msg = "ignoring multicast address: ";
2877	} else if (isc_sockaddr_isexperimental(sa)) {
2878		addr->flags |= FCTX_ADDRINFO_MARK;
2879		msg = "ignoring experimental address: ";
2880	} else if (sa->type.sa.sa_family != AF_INET6) {
2881		return;
2882	} else if (IN6_IS_ADDR_V4MAPPED(&sa->type.sin6.sin6_addr)) {
2883		addr->flags |= FCTX_ADDRINFO_MARK;
2884		msg = "ignoring IPv6 mapped IPV4 address: ";
2885	} else if (IN6_IS_ADDR_V4COMPAT(&sa->type.sin6.sin6_addr)) {
2886		addr->flags |= FCTX_ADDRINFO_MARK;
2887		msg = "ignoring IPv6 compatibility IPV4 address: ";
2888	} else
2889		return;
2890
2891	if (!isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(3)))
2892		return;
2893
2894	isc_netaddr_fromsockaddr(&na, sa);
2895	isc_netaddr_format(&na, buf, sizeof(buf));
2896	FCTXTRACE2(msg, buf);
2897}
2898
2899static inline dns_adbaddrinfo_t *
2900fctx_nextaddress(fetchctx_t *fctx) {
2901	dns_adbfind_t *find, *start;
2902	dns_adbaddrinfo_t *addrinfo;
2903	dns_adbaddrinfo_t *faddrinfo;
2904
2905	/*
2906	 * Return the next untried address, if any.
2907	 */
2908
2909	/*
2910	 * Find the first unmarked forwarder (if any).
2911	 */
2912	for (addrinfo = ISC_LIST_HEAD(fctx->forwaddrs);
2913	     addrinfo != NULL;
2914	     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2915		if (!UNMARKED(addrinfo))
2916			continue;
2917		possibly_mark(fctx, addrinfo);
2918		if (UNMARKED(addrinfo)) {
2919			addrinfo->flags |= FCTX_ADDRINFO_MARK;
2920			fctx->find = NULL;
2921			return (addrinfo);
2922		}
2923	}
2924
2925	/*
2926	 * No forwarders.  Move to the next find.
2927	 */
2928
2929	fctx->attributes |= FCTX_ATTR_TRIEDFIND;
2930
2931	find = fctx->find;
2932	if (find == NULL)
2933		find = ISC_LIST_HEAD(fctx->finds);
2934	else {
2935		find = ISC_LIST_NEXT(find, publink);
2936		if (find == NULL)
2937			find = ISC_LIST_HEAD(fctx->finds);
2938	}
2939
2940	/*
2941	 * Find the first unmarked addrinfo.
2942	 */
2943	addrinfo = NULL;
2944	if (find != NULL) {
2945		start = find;
2946		do {
2947			for (addrinfo = ISC_LIST_HEAD(find->list);
2948			     addrinfo != NULL;
2949			     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2950				if (!UNMARKED(addrinfo))
2951					continue;
2952				possibly_mark(fctx, addrinfo);
2953				if (UNMARKED(addrinfo)) {
2954					addrinfo->flags |= FCTX_ADDRINFO_MARK;
2955					break;
2956				}
2957			}
2958			if (addrinfo != NULL)
2959				break;
2960			find = ISC_LIST_NEXT(find, publink);
2961			if (find == NULL)
2962				find = ISC_LIST_HEAD(fctx->finds);
2963		} while (find != start);
2964	}
2965
2966	fctx->find = find;
2967	if (addrinfo != NULL)
2968		return (addrinfo);
2969
2970	/*
2971	 * No nameservers left.  Try alternates.
2972	 */
2973
2974	fctx->attributes |= FCTX_ATTR_TRIEDALT;
2975
2976	find = fctx->altfind;
2977	if (find == NULL)
2978		find = ISC_LIST_HEAD(fctx->altfinds);
2979	else {
2980		find = ISC_LIST_NEXT(find, publink);
2981		if (find == NULL)
2982			find = ISC_LIST_HEAD(fctx->altfinds);
2983	}
2984
2985	/*
2986	 * Find the first unmarked addrinfo.
2987	 */
2988	addrinfo = NULL;
2989	if (find != NULL) {
2990		start = find;
2991		do {
2992			for (addrinfo = ISC_LIST_HEAD(find->list);
2993			     addrinfo != NULL;
2994			     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2995				if (!UNMARKED(addrinfo))
2996					continue;
2997				possibly_mark(fctx, addrinfo);
2998				if (UNMARKED(addrinfo)) {
2999					addrinfo->flags |= FCTX_ADDRINFO_MARK;
3000					break;
3001				}
3002			}
3003			if (addrinfo != NULL)
3004				break;
3005			find = ISC_LIST_NEXT(find, publink);
3006			if (find == NULL)
3007				find = ISC_LIST_HEAD(fctx->altfinds);
3008		} while (find != start);
3009	}
3010
3011	faddrinfo = addrinfo;
3012
3013	/*
3014	 * See if we have a better alternate server by address.
3015	 */
3016
3017	for (addrinfo = ISC_LIST_HEAD(fctx->altaddrs);
3018	     addrinfo != NULL;
3019	     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
3020		if (!UNMARKED(addrinfo))
3021			continue;
3022		possibly_mark(fctx, addrinfo);
3023		if (UNMARKED(addrinfo) &&
3024		    (faddrinfo == NULL ||
3025		     addrinfo->srtt < faddrinfo->srtt)) {
3026			if (faddrinfo != NULL)
3027				faddrinfo->flags &= ~FCTX_ADDRINFO_MARK;
3028			addrinfo->flags |= FCTX_ADDRINFO_MARK;
3029			break;
3030		}
3031	}
3032
3033	if (addrinfo == NULL) {
3034		addrinfo = faddrinfo;
3035		fctx->altfind = find;
3036	}
3037
3038	return (addrinfo);
3039}
3040
3041static void
3042fctx_try(fetchctx_t *fctx, isc_boolean_t retrying, isc_boolean_t badcache) {
3043	isc_result_t result;
3044	dns_adbaddrinfo_t *addrinfo;
3045
3046	FCTXTRACE("try");
3047
3048	REQUIRE(!ADDRWAIT(fctx));
3049
3050	if (fctx->totalqueries > DEFAULT_MAX_QUERIES)
3051		fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
3052
3053	addrinfo = fctx_nextaddress(fctx);
3054	if (addrinfo == NULL) {
3055		/*
3056		 * We have no more addresses.  Start over.
3057		 */
3058		fctx_cancelqueries(fctx, ISC_TRUE);
3059		fctx_cleanupfinds(fctx);
3060		fctx_cleanupaltfinds(fctx);
3061		fctx_cleanupforwaddrs(fctx);
3062		fctx_cleanupaltaddrs(fctx);
3063		result = fctx_getaddresses(fctx, badcache);
3064		if (result == DNS_R_WAIT) {
3065			/*
3066			 * Sleep waiting for addresses.
3067			 */
3068			FCTXTRACE("addrwait");
3069			fctx->attributes |= FCTX_ATTR_ADDRWAIT;
3070			return;
3071		} else if (result != ISC_R_SUCCESS) {
3072			/*
3073			 * Something bad happened.
3074			 */
3075			fctx_done(fctx, result, __LINE__);
3076			return;
3077		}
3078
3079		addrinfo = fctx_nextaddress(fctx);
3080		/*
3081		 * While we may have addresses from the ADB, they
3082		 * might be bad ones.  In this case, return SERVFAIL.
3083		 */
3084		if (addrinfo == NULL) {
3085			fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
3086			return;
3087		}
3088	}
3089
3090	result = fctx_query(fctx, addrinfo, fctx->options);
3091	if (result != ISC_R_SUCCESS)
3092		fctx_done(fctx, result, __LINE__);
3093	else if (retrying)
3094		inc_stats(fctx->res, dns_resstatscounter_retry);
3095}
3096
3097static isc_boolean_t
3098fctx_unlink(fetchctx_t *fctx) {
3099	dns_resolver_t *res;
3100	unsigned int bucketnum;
3101
3102	/*
3103	 * Caller must be holding the bucket lock.
3104	 */
3105
3106	REQUIRE(VALID_FCTX(fctx));
3107	REQUIRE(fctx->state == fetchstate_done ||
3108		fctx->state == fetchstate_init);
3109	REQUIRE(ISC_LIST_EMPTY(fctx->events));
3110	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
3111	REQUIRE(ISC_LIST_EMPTY(fctx->finds));
3112	REQUIRE(ISC_LIST_EMPTY(fctx->altfinds));
3113	REQUIRE(fctx->pending == 0);
3114	REQUIRE(fctx->references == 0);
3115	REQUIRE(ISC_LIST_EMPTY(fctx->validators));
3116
3117	FCTXTRACE("unlink");
3118
3119	res = fctx->res;
3120	bucketnum = fctx->bucketnum;
3121
3122	ISC_LIST_UNLINK(res->buckets[bucketnum].fctxs, fctx, link);
3123
3124	LOCK(&res->nlock);
3125	res->nfctx--;
3126	UNLOCK(&res->nlock);
3127
3128	if (res->buckets[bucketnum].exiting &&
3129	    ISC_LIST_EMPTY(res->buckets[bucketnum].fctxs))
3130		return (ISC_TRUE);
3131
3132	return (ISC_FALSE);
3133}
3134
3135static void
3136fctx_destroy(fetchctx_t *fctx) {
3137	isc_sockaddr_t *sa, *next_sa;
3138
3139	REQUIRE(VALID_FCTX(fctx));
3140	REQUIRE(fctx->state == fetchstate_done ||
3141		fctx->state == fetchstate_init);
3142	REQUIRE(ISC_LIST_EMPTY(fctx->events));
3143	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
3144	REQUIRE(ISC_LIST_EMPTY(fctx->finds));
3145	REQUIRE(ISC_LIST_EMPTY(fctx->altfinds));
3146	REQUIRE(fctx->pending == 0);
3147	REQUIRE(fctx->references == 0);
3148	REQUIRE(ISC_LIST_EMPTY(fctx->validators));
3149	REQUIRE(!ISC_LINK_LINKED(fctx, link));
3150
3151	FCTXTRACE("destroy");
3152
3153	/*
3154	 * Free bad.
3155	 */
3156	for (sa = ISC_LIST_HEAD(fctx->bad);
3157	     sa != NULL;
3158	     sa = next_sa) {
3159		next_sa = ISC_LIST_NEXT(sa, link);
3160		ISC_LIST_UNLINK(fctx->bad, sa, link);
3161		isc_mem_put(fctx->mctx, sa, sizeof(*sa));
3162	}
3163
3164	for (sa = ISC_LIST_HEAD(fctx->edns);
3165	     sa != NULL;
3166	     sa = next_sa) {
3167		next_sa = ISC_LIST_NEXT(sa, link);
3168		ISC_LIST_UNLINK(fctx->edns, sa, link);
3169		isc_mem_put(fctx->mctx, sa, sizeof(*sa));
3170	}
3171
3172	for (sa = ISC_LIST_HEAD(fctx->edns512);
3173	     sa != NULL;
3174	     sa = next_sa) {
3175		next_sa = ISC_LIST_NEXT(sa, link);
3176		ISC_LIST_UNLINK(fctx->edns512, sa, link);
3177		isc_mem_put(fctx->mctx, sa, sizeof(*sa));
3178	}
3179
3180	for (sa = ISC_LIST_HEAD(fctx->bad_edns);
3181	     sa != NULL;
3182	     sa = next_sa) {
3183		next_sa = ISC_LIST_NEXT(sa, link);
3184		ISC_LIST_UNLINK(fctx->bad_edns, sa, link);
3185		isc_mem_put(fctx->mctx, sa, sizeof(*sa));
3186	}
3187
3188	isc_timer_detach(&fctx->timer);
3189	dns_message_destroy(&fctx->rmessage);
3190	dns_message_destroy(&fctx->qmessage);
3191	if (dns_name_countlabels(&fctx->domain) > 0)
3192		dns_name_free(&fctx->domain, fctx->mctx);
3193	if (dns_rdataset_isassociated(&fctx->nameservers))
3194		dns_rdataset_disassociate(&fctx->nameservers);
3195	dns_name_free(&fctx->name, fctx->mctx);
3196	dns_db_detach(&fctx->cache);
3197	dns_adb_detach(&fctx->adb);
3198	isc_mem_free(fctx->mctx, fctx->info);
3199	isc_mem_putanddetach(&fctx->mctx, fctx, sizeof(*fctx));
3200}
3201
3202/*
3203 * Fetch event handlers.
3204 */
3205
3206static void
3207fctx_timeout(isc_task_t *task, isc_event_t *event) {
3208	fetchctx_t *fctx = event->ev_arg;
3209	isc_timerevent_t *tevent = (isc_timerevent_t *)event;
3210	resquery_t *query;
3211
3212	REQUIRE(VALID_FCTX(fctx));
3213
3214	UNUSED(task);
3215
3216	FCTXTRACE("timeout");
3217
3218	inc_stats(fctx->res, dns_resstatscounter_querytimeout);
3219
3220	if (event->ev_type == ISC_TIMEREVENT_LIFE) {
3221		fctx->reason = NULL;
3222		fctx_done(fctx, ISC_R_TIMEDOUT, __LINE__);
3223	} else {
3224		isc_result_t result;
3225
3226		fctx->timeouts++;
3227		fctx->timeout = ISC_TRUE;
3228		/*
3229		 * We could cancel the running queries here, or we could let
3230		 * them keep going.  Since we normally use separate sockets for
3231		 * different queries, we adopt the former approach to reduce
3232		 * the number of open sockets: cancel the oldest query if it
3233		 * expired after the query had started (this is usually the
3234		 * case but is not always so, depending on the task schedule
3235		 * timing).
3236		 */
3237		query = ISC_LIST_HEAD(fctx->queries);
3238		if (query != NULL &&
3239		    isc_time_compare(&tevent->due, &query->start) >= 0) {
3240			fctx_cancelquery(&query, NULL, NULL, ISC_TRUE);
3241		}
3242		fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
3243		/*
3244		 * Our timer has triggered.  Reestablish the fctx lifetime
3245		 * timer.
3246		 */
3247		result = fctx_starttimer(fctx);
3248		if (result != ISC_R_SUCCESS)
3249			fctx_done(fctx, result, __LINE__);
3250		else
3251			/*
3252			 * Keep trying.
3253			 */
3254			fctx_try(fctx, ISC_TRUE, ISC_FALSE);
3255	}
3256
3257	isc_event_free(&event);
3258}
3259
3260static void
3261fctx_shutdown(fetchctx_t *fctx) {
3262	isc_event_t *cevent;
3263
3264	/*
3265	 * Start the shutdown process for fctx, if it isn't already underway.
3266	 */
3267
3268	FCTXTRACE("shutdown");
3269
3270	/*
3271	 * The caller must be holding the appropriate bucket lock.
3272	 */
3273
3274	if (fctx->want_shutdown)
3275		return;
3276
3277	fctx->want_shutdown = ISC_TRUE;
3278
3279	/*
3280	 * Unless we're still initializing (in which case the
3281	 * control event is still outstanding), we need to post
3282	 * the control event to tell the fetch we want it to
3283	 * exit.
3284	 */
3285	if (fctx->state != fetchstate_init) {
3286		cevent = &fctx->control_event;
3287		isc_task_send(fctx->res->buckets[fctx->bucketnum].task,
3288			      &cevent);
3289	}
3290}
3291
3292static void
3293fctx_doshutdown(isc_task_t *task, isc_event_t *event) {
3294	fetchctx_t *fctx = event->ev_arg;
3295	isc_boolean_t bucket_empty = ISC_FALSE;
3296	dns_resolver_t *res;
3297	unsigned int bucketnum;
3298	dns_validator_t *validator;
3299	isc_boolean_t destroy = ISC_FALSE;
3300
3301	REQUIRE(VALID_FCTX(fctx));
3302
3303	UNUSED(task);
3304
3305	res = fctx->res;
3306	bucketnum = fctx->bucketnum;
3307
3308	FCTXTRACE("doshutdown");
3309
3310	/*
3311	 * An fctx that is shutting down is no longer in ADDRWAIT mode.
3312	 */
3313	fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
3314
3315	/*
3316	 * Cancel all pending validators.  Note that this must be done
3317	 * without the bucket lock held, since that could cause deadlock.
3318	 */
3319	validator = ISC_LIST_HEAD(fctx->validators);
3320	while (validator != NULL) {
3321		dns_validator_cancel(validator);
3322		validator = ISC_LIST_NEXT(validator, link);
3323	}
3324
3325	if (fctx->nsfetch != NULL)
3326		dns_resolver_cancelfetch(fctx->nsfetch);
3327
3328	/*
3329	 * Shut down anything that is still running on behalf of this
3330	 * fetch.  To avoid deadlock with the ADB, we must do this
3331	 * before we lock the bucket lock.
3332	 */
3333	fctx_stopeverything(fctx, ISC_FALSE);
3334
3335	LOCK(&res->buckets[bucketnum].lock);
3336
3337	fctx->attributes |= FCTX_ATTR_SHUTTINGDOWN;
3338
3339	INSIST(fctx->state == fetchstate_active ||
3340	       fctx->state == fetchstate_done);
3341	INSIST(fctx->want_shutdown);
3342
3343	if (fctx->state != fetchstate_done) {
3344		fctx->state = fetchstate_done;
3345		fctx_sendevents(fctx, ISC_R_CANCELED, __LINE__);
3346	}
3347
3348	if (fctx->references == 0 && fctx->pending == 0 &&
3349	    fctx->nqueries == 0 && ISC_LIST_EMPTY(fctx->validators)) {
3350		bucket_empty = fctx_unlink(fctx);
3351		destroy = ISC_TRUE;
3352	}
3353
3354	UNLOCK(&res->buckets[bucketnum].lock);
3355
3356	if (destroy) {
3357		fctx_destroy(fctx);
3358		if (bucket_empty)
3359			empty_bucket(res);
3360	}
3361}
3362
3363static void
3364fctx_start(isc_task_t *task, isc_event_t *event) {
3365	fetchctx_t *fctx = event->ev_arg;
3366	isc_boolean_t done = ISC_FALSE, bucket_empty = ISC_FALSE;
3367	dns_resolver_t *res;
3368	unsigned int bucketnum;
3369	isc_boolean_t destroy = ISC_FALSE;
3370
3371	REQUIRE(VALID_FCTX(fctx));
3372
3373	UNUSED(task);
3374
3375	res = fctx->res;
3376	bucketnum = fctx->bucketnum;
3377
3378	FCTXTRACE("start");
3379
3380	LOCK(&res->buckets[bucketnum].lock);
3381
3382	INSIST(fctx->state == fetchstate_init);
3383	if (fctx->want_shutdown) {
3384		/*
3385		 * We haven't started this fctx yet, and we've been requested
3386		 * to shut it down.
3387		 */
3388		fctx->attributes |= FCTX_ATTR_SHUTTINGDOWN;
3389		fctx->state = fetchstate_done;
3390		fctx_sendevents(fctx, ISC_R_CANCELED, __LINE__);
3391		/*
3392		 * Since we haven't started, we INSIST that we have no
3393		 * pending ADB finds and no pending validations.
3394		 */
3395		INSIST(fctx->pending == 0);
3396		INSIST(fctx->nqueries == 0);
3397		INSIST(ISC_LIST_EMPTY(fctx->validators));
3398		if (fctx->references == 0) {
3399			/*
3400			 * It's now safe to destroy this fctx.
3401			 */
3402			bucket_empty = fctx_unlink(fctx);
3403			destroy = ISC_TRUE;
3404		}
3405		done = ISC_TRUE;
3406	} else {
3407		/*
3408		 * Normal fctx startup.
3409		 */
3410		fctx->state = fetchstate_active;
3411		fctx->totalqueries = 0;
3412		/*
3413		 * Reset the control event for later use in shutting down
3414		 * the fctx.
3415		 */
3416		ISC_EVENT_INIT(event, sizeof(*event), 0, NULL,
3417			       DNS_EVENT_FETCHCONTROL, fctx_doshutdown, fctx,
3418			       NULL, NULL, NULL);
3419	}
3420
3421	UNLOCK(&res->buckets[bucketnum].lock);
3422
3423	if (!done) {
3424		isc_result_t result;
3425
3426		INSIST(!destroy);
3427
3428		/*
3429		 * All is well.  Start working on the fetch.
3430		 */
3431		result = fctx_starttimer(fctx);
3432		if (result != ISC_R_SUCCESS)
3433			fctx_done(fctx, result, __LINE__);
3434		else
3435			fctx_try(fctx, ISC_FALSE, ISC_FALSE);
3436	} else if (destroy) {
3437			fctx_destroy(fctx);
3438		if (bucket_empty)
3439			empty_bucket(res);
3440	}
3441}
3442
3443/*
3444 * Fetch Creation, Joining, and Cancelation.
3445 */
3446
3447static inline isc_result_t
3448fctx_join(fetchctx_t *fctx, isc_task_t *task, isc_sockaddr_t *client,
3449	  dns_messageid_t id, isc_taskaction_t action, void *arg,
3450	  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset,
3451	  dns_fetch_t *fetch)
3452{
3453	isc_task_t *clone;
3454	dns_fetchevent_t *event;
3455
3456	FCTXTRACE("join");
3457
3458	/*
3459	 * We store the task we're going to send this event to in the
3460	 * sender field.  We'll make the fetch the sender when we actually
3461	 * send the event.
3462	 */
3463	clone = NULL;
3464	isc_task_attach(task, &clone);
3465	event = (dns_fetchevent_t *)
3466		isc_event_allocate(fctx->res->mctx, clone, DNS_EVENT_FETCHDONE,
3467				   action, arg, sizeof(*event));
3468	if (event == NULL) {
3469		isc_task_detach(&clone);
3470		return (ISC_R_NOMEMORY);
3471	}
3472	event->result = DNS_R_SERVFAIL;
3473	event->qtype = fctx->type;
3474	event->db = NULL;
3475	event->node = NULL;
3476	event->rdataset = rdataset;
3477	event->sigrdataset = sigrdataset;
3478	event->fetch = fetch;
3479	event->client = client;
3480	event->id = id;
3481	event->qtotal = 0;
3482	dns_fixedname_init(&event->foundname);
3483
3484	/*
3485	 * Make sure that we can store the sigrdataset in the
3486	 * first event if it is needed by any of the events.
3487	 */
3488	if (event->sigrdataset != NULL)
3489		ISC_LIST_PREPEND(fctx->events, event, ev_link);
3490	else
3491		ISC_LIST_APPEND(fctx->events, event, ev_link);
3492	fctx->references++;
3493	fctx->client = client;
3494
3495	fetch->magic = DNS_FETCH_MAGIC;
3496	fetch->private = fctx;
3497
3498	return (ISC_R_SUCCESS);
3499}
3500
3501static inline void
3502log_ns_ttl(fetchctx_t *fctx, const char *where) {
3503	char namebuf[DNS_NAME_FORMATSIZE];
3504	char domainbuf[DNS_NAME_FORMATSIZE];
3505
3506	dns_name_format(&fctx->name, namebuf, sizeof(namebuf));
3507	dns_name_format(&fctx->domain, domainbuf, sizeof(domainbuf));
3508	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
3509		      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(10),
3510		      "log_ns_ttl: fctx %p: %s: %s (in '%s'?): %u %u",
3511		      fctx, where, namebuf, domainbuf,
3512		      fctx->ns_ttl_ok, fctx->ns_ttl);
3513}
3514
3515static isc_result_t
3516fctx_create(dns_resolver_t *res, dns_name_t *name, dns_rdatatype_t type,
3517	    dns_name_t *domain, dns_rdataset_t *nameservers,
3518	    unsigned int options, unsigned int bucketnum, unsigned int depth,
3519	    fetchctx_t **fctxp)
3520{
3521	fetchctx_t *fctx;
3522	isc_result_t result;
3523	isc_result_t iresult;
3524	isc_interval_t interval;
3525	dns_fixedname_t fixed;
3526	unsigned int findoptions = 0;
3527	char buf[DNS_NAME_FORMATSIZE + DNS_RDATATYPE_FORMATSIZE];
3528	char typebuf[DNS_RDATATYPE_FORMATSIZE];
3529	dns_name_t suffix;
3530	isc_mem_t *mctx;
3531
3532	/*
3533	 * Caller must be holding the lock for bucket number 'bucketnum'.
3534	 */
3535	REQUIRE(fctxp != NULL && *fctxp == NULL);
3536
3537	mctx = res->buckets[bucketnum].mctx;
3538	fctx = isc_mem_get(mctx, sizeof(*fctx));
3539	if (fctx == NULL)
3540		return (ISC_R_NOMEMORY);
3541	dns_name_format(name, buf, sizeof(buf));
3542	dns_rdatatype_format(type, typebuf, sizeof(typebuf));
3543	strcat(buf, "/");       /* checked */
3544	strcat(buf, typebuf);   /* checked */
3545	fctx->info = isc_mem_strdup(mctx, buf);
3546	if (fctx->info == NULL) {
3547		result = ISC_R_NOMEMORY;
3548		goto cleanup_fetch;
3549	}
3550	FCTXTRACE("create");
3551	dns_name_init(&fctx->name, NULL);
3552	result = dns_name_dup(name, mctx, &fctx->name);
3553	if (result != ISC_R_SUCCESS)
3554		goto cleanup_info;
3555	dns_name_init(&fctx->domain, NULL);
3556	dns_rdataset_init(&fctx->nameservers);
3557
3558	fctx->type = type;
3559	fctx->options = options;
3560	/*
3561	 * Note!  We do not attach to the task.  We are relying on the
3562	 * resolver to ensure that this task doesn't go away while we are
3563	 * using it.
3564	 */
3565	fctx->res = res;
3566	fctx->references = 0;
3567	fctx->bucketnum = bucketnum;
3568	fctx->state = fetchstate_init;
3569	fctx->want_shutdown = ISC_FALSE;
3570	fctx->cloned = ISC_FALSE;
3571	fctx->depth = depth;
3572	ISC_LIST_INIT(fctx->queries);
3573	ISC_LIST_INIT(fctx->finds);
3574	ISC_LIST_INIT(fctx->altfinds);
3575	ISC_LIST_INIT(fctx->forwaddrs);
3576	ISC_LIST_INIT(fctx->altaddrs);
3577	ISC_LIST_INIT(fctx->forwarders);
3578	fctx->fwdpolicy = dns_fwdpolicy_none;
3579	ISC_LIST_INIT(fctx->bad);
3580	ISC_LIST_INIT(fctx->edns);
3581	ISC_LIST_INIT(fctx->edns512);
3582	ISC_LIST_INIT(fctx->bad_edns);
3583	ISC_LIST_INIT(fctx->validators);
3584	fctx->validator = NULL;
3585	fctx->find = NULL;
3586	fctx->altfind = NULL;
3587	fctx->pending = 0;
3588	fctx->restarts = 0;
3589	fctx->querysent = 0;
3590	fctx->totalqueries = 0;
3591	fctx->referrals = 0;
3592	TIME_NOW(&fctx->start);
3593	fctx->timeouts = 0;
3594	fctx->lamecount = 0;
3595	fctx->adberr = 0;
3596	fctx->neterr = 0;
3597	fctx->badresp = 0;
3598	fctx->findfail = 0;
3599	fctx->valfail = 0;
3600	fctx->result = ISC_R_FAILURE;
3601	fctx->vresult = ISC_R_SUCCESS;
3602	fctx->exitline = -1;	/* sentinel */
3603	fctx->logged = ISC_FALSE;
3604	fctx->attributes = 0;
3605	fctx->spilled = ISC_FALSE;
3606	fctx->nqueries = 0;
3607	fctx->reason = NULL;
3608	fctx->rand_buf = 0;
3609	fctx->rand_bits = 0;
3610	fctx->timeout = ISC_FALSE;
3611	fctx->addrinfo = NULL;
3612	fctx->client = NULL;
3613	fctx->ns_ttl = 0;
3614	fctx->ns_ttl_ok = ISC_FALSE;
3615
3616	dns_name_init(&fctx->nsname, NULL);
3617	fctx->nsfetch = NULL;
3618	dns_rdataset_init(&fctx->nsrrset);
3619
3620	if (domain == NULL) {
3621		dns_forwarders_t *forwarders = NULL;
3622		unsigned int labels;
3623		dns_name_t *fwdname = name;
3624
3625		/*
3626		 * DS records are found in the parent server.
3627		 * Strip label to get the correct forwarder (if any).
3628		 */
3629		if (dns_rdatatype_atparent(fctx->type) &&
3630		    dns_name_countlabels(name) > 1) {
3631			dns_name_init(&suffix, NULL);
3632			labels = dns_name_countlabels(name);
3633			dns_name_getlabelsequence(name, 1, labels - 1, &suffix);
3634			fwdname = &suffix;
3635		}
3636		dns_fixedname_init(&fixed);
3637		domain = dns_fixedname_name(&fixed);
3638		result = dns_fwdtable_find2(fctx->res->view->fwdtable, fwdname,
3639					    domain, &forwarders);
3640		if (result == ISC_R_SUCCESS)
3641			fctx->fwdpolicy = forwarders->fwdpolicy;
3642
3643		if (fctx->fwdpolicy != dns_fwdpolicy_only) {
3644			/*
3645			 * The caller didn't supply a query domain and
3646			 * nameservers, and we're not in forward-only mode,
3647			 * so find the best nameservers to use.
3648			 */
3649			if (dns_rdatatype_atparent(fctx->type))
3650				findoptions |= DNS_DBFIND_NOEXACT;
3651			result = dns_view_findzonecut(res->view, fwdname,
3652						      domain, 0, findoptions,
3653						      ISC_TRUE,
3654						      &fctx->nameservers,
3655						      NULL);
3656			if (result != ISC_R_SUCCESS)
3657				goto cleanup_name;
3658
3659			result = dns_name_dup(domain, mctx, &fctx->domain);
3660			if (result != ISC_R_SUCCESS) {
3661				dns_rdataset_disassociate(&fctx->nameservers);
3662				goto cleanup_name;
3663			}
3664			fctx->ns_ttl = fctx->nameservers.ttl;
3665			fctx->ns_ttl_ok = ISC_TRUE;
3666		} else {
3667			/*
3668			 * We're in forward-only mode.  Set the query domain.
3669			 */
3670			result = dns_name_dup(domain, mctx, &fctx->domain);
3671			if (result != ISC_R_SUCCESS)
3672				goto cleanup_name;
3673		}
3674	} else {
3675		result = dns_name_dup(domain, mctx, &fctx->domain);
3676		if (result != ISC_R_SUCCESS)
3677			goto cleanup_name;
3678		dns_rdataset_clone(nameservers, &fctx->nameservers);
3679		fctx->ns_ttl = fctx->nameservers.ttl;
3680		fctx->ns_ttl_ok = ISC_TRUE;
3681	}
3682
3683	log_ns_ttl(fctx, "fctx_create");
3684
3685	INSIST(dns_name_issubdomain(&fctx->name, &fctx->domain));
3686
3687	fctx->qmessage = NULL;
3688	result = dns_message_create(mctx, DNS_MESSAGE_INTENTRENDER,
3689				    &fctx->qmessage);
3690
3691	if (result != ISC_R_SUCCESS)
3692		goto cleanup_domain;
3693
3694	fctx->rmessage = NULL;
3695	result = dns_message_create(mctx, DNS_MESSAGE_INTENTPARSE,
3696				    &fctx->rmessage);
3697
3698	if (result != ISC_R_SUCCESS)
3699		goto cleanup_qmessage;
3700
3701	/*
3702	 * Compute an expiration time for the entire fetch.
3703	 */
3704	isc_interval_set(&interval, res->query_timeout, 0);
3705	iresult = isc_time_nowplusinterval(&fctx->expires, &interval);
3706	if (iresult != ISC_R_SUCCESS) {
3707		UNEXPECTED_ERROR(__FILE__, __LINE__,
3708				 "isc_time_nowplusinterval: %s",
3709				 isc_result_totext(iresult));
3710		result = ISC_R_UNEXPECTED;
3711		goto cleanup_rmessage;
3712	}
3713
3714	/*
3715	 * Default retry interval initialization.  We set the interval now
3716	 * mostly so it won't be uninitialized.  It will be set to the
3717	 * correct value before a query is issued.
3718	 */
3719	isc_interval_set(&fctx->interval, 2, 0);
3720
3721	/*
3722	 * Create an inactive timer.  It will be made active when the fetch
3723	 * is actually started.
3724	 */
3725	fctx->timer = NULL;
3726	iresult = isc_timer_create(res->timermgr, isc_timertype_inactive,
3727				   NULL, NULL,
3728				   res->buckets[bucketnum].task, fctx_timeout,
3729				   fctx, &fctx->timer);
3730	if (iresult != ISC_R_SUCCESS) {
3731		UNEXPECTED_ERROR(__FILE__, __LINE__,
3732				 "isc_timer_create: %s",
3733				 isc_result_totext(iresult));
3734		result = ISC_R_UNEXPECTED;
3735		goto cleanup_rmessage;
3736	}
3737
3738	/*
3739	 * Attach to the view's cache and adb.
3740	 */
3741	fctx->cache = NULL;
3742	dns_db_attach(res->view->cachedb, &fctx->cache);
3743	fctx->adb = NULL;
3744	dns_adb_attach(res->view->adb, &fctx->adb);
3745	fctx->mctx = NULL;
3746	isc_mem_attach(mctx, &fctx->mctx);
3747
3748	ISC_LIST_INIT(fctx->events);
3749	ISC_LINK_INIT(fctx, link);
3750	fctx->magic = FCTX_MAGIC;
3751
3752	ISC_LIST_APPEND(res->buckets[bucketnum].fctxs, fctx, link);
3753
3754	LOCK(&res->nlock);
3755	res->nfctx++;
3756	UNLOCK(&res->nlock);
3757
3758	*fctxp = fctx;
3759
3760	return (ISC_R_SUCCESS);
3761
3762 cleanup_rmessage:
3763	dns_message_destroy(&fctx->rmessage);
3764
3765 cleanup_qmessage:
3766	dns_message_destroy(&fctx->qmessage);
3767
3768 cleanup_domain:
3769	if (dns_name_countlabels(&fctx->domain) > 0)
3770		dns_name_free(&fctx->domain, mctx);
3771	if (dns_rdataset_isassociated(&fctx->nameservers))
3772		dns_rdataset_disassociate(&fctx->nameservers);
3773
3774 cleanup_name:
3775	dns_name_free(&fctx->name, mctx);
3776
3777 cleanup_info:
3778	isc_mem_free(mctx, fctx->info);
3779
3780 cleanup_fetch:
3781	isc_mem_put(mctx, fctx, sizeof(*fctx));
3782
3783	return (result);
3784}
3785
3786/*
3787 * Handle Responses
3788 */
3789static inline isc_boolean_t
3790is_lame(fetchctx_t *fctx) {
3791	dns_message_t *message = fctx->rmessage;
3792	dns_name_t *name;
3793	dns_rdataset_t *rdataset;
3794	isc_result_t result;
3795
3796	if (message->rcode != dns_rcode_noerror &&
3797	    message->rcode != dns_rcode_nxdomain)
3798		return (ISC_FALSE);
3799
3800	if (message->counts[DNS_SECTION_ANSWER] != 0)
3801		return (ISC_FALSE);
3802
3803	if (message->counts[DNS_SECTION_AUTHORITY] == 0)
3804		return (ISC_FALSE);
3805
3806	result = dns_message_firstname(message, DNS_SECTION_AUTHORITY);
3807	while (result == ISC_R_SUCCESS) {
3808		name = NULL;
3809		dns_message_currentname(message, DNS_SECTION_AUTHORITY, &name);
3810		for (rdataset = ISC_LIST_HEAD(name->list);
3811		     rdataset != NULL;
3812		     rdataset = ISC_LIST_NEXT(rdataset, link)) {
3813			dns_namereln_t namereln;
3814			int order;
3815			unsigned int labels;
3816			if (rdataset->type != dns_rdatatype_ns)
3817				continue;
3818			namereln = dns_name_fullcompare(name, &fctx->domain,
3819							&order, &labels);
3820			if (namereln == dns_namereln_equal &&
3821			    (message->flags & DNS_MESSAGEFLAG_AA) != 0)
3822				return (ISC_FALSE);
3823			if (namereln == dns_namereln_subdomain)
3824				return (ISC_FALSE);
3825			return (ISC_TRUE);
3826		}
3827		result = dns_message_nextname(message, DNS_SECTION_AUTHORITY);
3828	}
3829
3830	return (ISC_FALSE);
3831}
3832
3833static inline void
3834log_lame(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo) {
3835	char namebuf[DNS_NAME_FORMATSIZE];
3836	char domainbuf[DNS_NAME_FORMATSIZE];
3837	char addrbuf[ISC_SOCKADDR_FORMATSIZE];
3838
3839	dns_name_format(&fctx->name, namebuf, sizeof(namebuf));
3840	dns_name_format(&fctx->domain, domainbuf, sizeof(domainbuf));
3841	isc_sockaddr_format(&addrinfo->sockaddr, addrbuf, sizeof(addrbuf));
3842	isc_log_write(dns_lctx, DNS_LOGCATEGORY_LAME_SERVERS,
3843		      DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
3844		      "lame server resolving '%s' (in '%s'?): %s",
3845		      namebuf, domainbuf, addrbuf);
3846}
3847
3848static inline void
3849log_formerr(fetchctx_t *fctx, const char *format, ...) {
3850	char nsbuf[ISC_SOCKADDR_FORMATSIZE];
3851	char clbuf[ISC_SOCKADDR_FORMATSIZE];
3852	const char *clmsg = "";
3853	char msgbuf[2048];
3854	va_list args;
3855
3856	va_start(args, format);
3857	vsnprintf(msgbuf, sizeof(msgbuf), format, args);
3858	va_end(args);
3859
3860	isc_sockaddr_format(&fctx->addrinfo->sockaddr, nsbuf, sizeof(nsbuf));
3861
3862	if (fctx->client != NULL) {
3863		clmsg = " for client ";
3864		isc_sockaddr_format(fctx->client, clbuf, sizeof(clbuf));
3865	} else {
3866		clbuf[0] = '\0';
3867	}
3868
3869	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
3870		      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
3871		      "DNS format error from %s resolving %s%s%s: %s",
3872		      nsbuf, fctx->info, clmsg, clbuf, msgbuf);
3873}
3874
3875static inline isc_result_t
3876same_question(fetchctx_t *fctx) {
3877	isc_result_t result;
3878	dns_message_t *message = fctx->rmessage;
3879	dns_name_t *name;
3880	dns_rdataset_t *rdataset;
3881
3882	/*
3883	 * Caller must be holding the fctx lock.
3884	 */
3885
3886	/*
3887	 * XXXRTH  Currently we support only one question.
3888	 */
3889	if (message->counts[DNS_SECTION_QUESTION] != 1) {
3890		log_formerr(fctx, "too many questions");
3891		return (DNS_R_FORMERR);
3892	}
3893
3894	result = dns_message_firstname(message, DNS_SECTION_QUESTION);
3895	if (result != ISC_R_SUCCESS)
3896		return (result);
3897	name = NULL;
3898	dns_message_currentname(message, DNS_SECTION_QUESTION, &name);
3899	rdataset = ISC_LIST_HEAD(name->list);
3900	INSIST(rdataset != NULL);
3901	INSIST(ISC_LIST_NEXT(rdataset, link) == NULL);
3902
3903	if (fctx->type != rdataset->type ||
3904	    fctx->res->rdclass != rdataset->rdclass ||
3905	    !dns_name_equal(&fctx->name, name)) {
3906		char namebuf[DNS_NAME_FORMATSIZE];
3907		char class[DNS_RDATACLASS_FORMATSIZE];
3908		char type[DNS_RDATATYPE_FORMATSIZE];
3909
3910		dns_name_format(name, namebuf, sizeof(namebuf));
3911		dns_rdataclass_format(rdataset->rdclass, class, sizeof(class));
3912		dns_rdatatype_format(rdataset->type, type, sizeof(type));
3913		log_formerr(fctx, "question section mismatch: got %s/%s/%s",
3914			    namebuf, class, type);
3915		return (DNS_R_FORMERR);
3916	}
3917
3918	return (ISC_R_SUCCESS);
3919}
3920
3921static void
3922clone_results(fetchctx_t *fctx) {
3923	dns_fetchevent_t *event, *hevent;
3924	isc_result_t result;
3925	dns_name_t *name, *hname;
3926
3927	FCTXTRACE("clone_results");
3928
3929	/*
3930	 * Set up any other events to have the same data as the first
3931	 * event.
3932	 *
3933	 * Caller must be holding the appropriate lock.
3934	 */
3935
3936	fctx->cloned = ISC_TRUE;
3937	hevent = ISC_LIST_HEAD(fctx->events);
3938	if (hevent == NULL)
3939		return;
3940	hname = dns_fixedname_name(&hevent->foundname);
3941	for (event = ISC_LIST_NEXT(hevent, ev_link);
3942	     event != NULL;
3943	     event = ISC_LIST_NEXT(event, ev_link)) {
3944		name = dns_fixedname_name(&event->foundname);
3945		result = dns_name_copy(hname, name, NULL);
3946		if (result != ISC_R_SUCCESS)
3947			event->result = result;
3948		else
3949			event->result = hevent->result;
3950		dns_db_attach(hevent->db, &event->db);
3951		dns_db_attachnode(hevent->db, hevent->node, &event->node);
3952		INSIST(hevent->rdataset != NULL);
3953		INSIST(event->rdataset != NULL);
3954		if (dns_rdataset_isassociated(hevent->rdataset))
3955			dns_rdataset_clone(hevent->rdataset, event->rdataset);
3956		INSIST(! (hevent->sigrdataset == NULL &&
3957			  event->sigrdataset != NULL));
3958		if (hevent->sigrdataset != NULL &&
3959		    dns_rdataset_isassociated(hevent->sigrdataset) &&
3960		    event->sigrdataset != NULL)
3961			dns_rdataset_clone(hevent->sigrdataset,
3962					   event->sigrdataset);
3963	}
3964}
3965
3966#define CACHE(r)        (((r)->attributes & DNS_RDATASETATTR_CACHE) != 0)
3967#define ANSWER(r)       (((r)->attributes & DNS_RDATASETATTR_ANSWER) != 0)
3968#define ANSWERSIG(r)    (((r)->attributes & DNS_RDATASETATTR_ANSWERSIG) != 0)
3969#define EXTERNAL(r)     (((r)->attributes & DNS_RDATASETATTR_EXTERNAL) != 0)
3970#define CHAINING(r)     (((r)->attributes & DNS_RDATASETATTR_CHAINING) != 0)
3971#define CHASE(r)        (((r)->attributes & DNS_RDATASETATTR_CHASE) != 0)
3972#define CHECKNAMES(r)   (((r)->attributes & DNS_RDATASETATTR_CHECKNAMES) != 0)
3973
3974
3975/*
3976 * Destroy '*fctx' if it is ready to be destroyed (i.e., if it has
3977 * no references and is no longer waiting for any events).
3978 *
3979 * Requires:
3980 *      '*fctx' is shutting down.
3981 *
3982 * Returns:
3983 *	true if the resolver is exiting and this is the last fctx in the bucket.
3984 */
3985static isc_boolean_t
3986maybe_destroy(fetchctx_t *fctx, isc_boolean_t locked) {
3987	unsigned int bucketnum;
3988	isc_boolean_t bucket_empty = ISC_FALSE;
3989	dns_resolver_t *res = fctx->res;
3990	dns_validator_t *validator, *next_validator;
3991	isc_boolean_t destroy = ISC_FALSE;
3992
3993	REQUIRE(SHUTTINGDOWN(fctx));
3994
3995	bucketnum = fctx->bucketnum;
3996	if (!locked)
3997		LOCK(&res->buckets[bucketnum].lock);
3998	if (fctx->pending != 0 || fctx->nqueries != 0)
3999		goto unlock;
4000
4001	for (validator = ISC_LIST_HEAD(fctx->validators);
4002	     validator != NULL; validator = next_validator) {
4003		next_validator = ISC_LIST_NEXT(validator, link);
4004		dns_validator_cancel(validator);
4005	}
4006
4007	if (fctx->references == 0 && ISC_LIST_EMPTY(fctx->validators)) {
4008		bucket_empty = fctx_unlink(fctx);
4009		destroy = ISC_TRUE;
4010	}
4011 unlock:
4012	if (!locked)
4013		UNLOCK(&res->buckets[bucketnum].lock);
4014	if (destroy)
4015		fctx_destroy(fctx);
4016	return (bucket_empty);
4017}
4018
4019/*
4020 * The validator has finished.
4021 */
4022static void
4023validated(isc_task_t *task, isc_event_t *event) {
4024	dns_adbaddrinfo_t *addrinfo;
4025	dns_dbnode_t *node = NULL;
4026	dns_dbnode_t *nsnode = NULL;
4027	dns_fetchevent_t *hevent;
4028	dns_name_t *name;
4029	dns_rdataset_t *ardataset = NULL;
4030	dns_rdataset_t *asigrdataset = NULL;
4031	dns_rdataset_t *rdataset;
4032	dns_rdataset_t *sigrdataset;
4033	dns_resolver_t *res;
4034	dns_valarg_t *valarg;
4035	dns_validatorevent_t *vevent;
4036	fetchctx_t *fctx;
4037	isc_boolean_t chaining;
4038	isc_boolean_t negative;
4039	isc_boolean_t sentresponse;
4040	isc_result_t eresult = ISC_R_SUCCESS;
4041	isc_result_t result = ISC_R_SUCCESS;
4042	isc_stdtime_t now;
4043	isc_uint32_t ttl;
4044
4045	UNUSED(task); /* for now */
4046
4047	REQUIRE(event->ev_type == DNS_EVENT_VALIDATORDONE);
4048	valarg = event->ev_arg;
4049	fctx = valarg->fctx;
4050	res = fctx->res;
4051	addrinfo = valarg->addrinfo;
4052	REQUIRE(VALID_FCTX(fctx));
4053	REQUIRE(!ISC_LIST_EMPTY(fctx->validators));
4054
4055	vevent = (dns_validatorevent_t *)event;
4056	fctx->vresult = vevent->result;
4057
4058	FCTXTRACE("received validation completion event");
4059
4060	LOCK(&res->buckets[fctx->bucketnum].lock);
4061
4062	ISC_LIST_UNLINK(fctx->validators, vevent->validator, link);
4063	fctx->validator = NULL;
4064
4065	/*
4066	 * Destroy the validator early so that we can
4067	 * destroy the fctx if necessary.
4068	 */
4069	dns_validator_destroy(&vevent->validator);
4070	isc_mem_put(fctx->mctx, valarg, sizeof(*valarg));
4071
4072	negative = ISC_TF(vevent->rdataset == NULL);
4073
4074	sentresponse = ISC_TF((fctx->options & DNS_FETCHOPT_NOVALIDATE) != 0);
4075
4076	/*
4077	 * If shutting down, ignore the results.  Check to see if we're
4078	 * done waiting for validator completions and ADB pending events; if
4079	 * so, destroy the fctx.
4080	 */
4081	if (SHUTTINGDOWN(fctx) && !sentresponse) {
4082		isc_uint32_t bucketnum = fctx->bucketnum;
4083		isc_boolean_t bucket_empty;
4084		bucket_empty = maybe_destroy(fctx, ISC_TRUE);
4085		UNLOCK(&res->buckets[bucketnum].lock);
4086		if (bucket_empty)
4087			empty_bucket(res);
4088		goto cleanup_event;
4089	}
4090
4091	isc_stdtime_get(&now);
4092
4093	/*
4094	 * If chaining, we need to make sure that the right result code is
4095	 * returned, and that the rdatasets are bound.
4096	 */
4097	if (vevent->result == ISC_R_SUCCESS &&
4098	    !negative &&
4099	    vevent->rdataset != NULL &&
4100	    CHAINING(vevent->rdataset))
4101	{
4102		if (vevent->rdataset->type == dns_rdatatype_cname)
4103			eresult = DNS_R_CNAME;
4104		else {
4105			INSIST(vevent->rdataset->type == dns_rdatatype_dname);
4106			eresult = DNS_R_DNAME;
4107		}
4108		chaining = ISC_TRUE;
4109	} else
4110		chaining = ISC_FALSE;
4111
4112	/*
4113	 * Either we're not shutting down, or we are shutting down but want
4114	 * to cache the result anyway (if this was a validation started by
4115	 * a query with cd set)
4116	 */
4117
4118	hevent = ISC_LIST_HEAD(fctx->events);
4119	if (hevent != NULL) {
4120		if (!negative && !chaining &&
4121		    (fctx->type == dns_rdatatype_any ||
4122		     fctx->type == dns_rdatatype_rrsig ||
4123		     fctx->type == dns_rdatatype_sig)) {
4124			/*
4125			 * Don't bind rdatasets; the caller
4126			 * will iterate the node.
4127			 */
4128		} else {
4129			ardataset = hevent->rdataset;
4130			asigrdataset = hevent->sigrdataset;
4131		}
4132	}
4133
4134	if (vevent->result != ISC_R_SUCCESS) {
4135		FCTXTRACE("validation failed");
4136		inc_stats(res, dns_resstatscounter_valfail);
4137		fctx->valfail++;
4138		fctx->vresult = vevent->result;
4139		if (fctx->vresult != DNS_R_BROKENCHAIN) {
4140			result = ISC_R_NOTFOUND;
4141			if (vevent->rdataset != NULL)
4142				result = dns_db_findnode(fctx->cache,
4143							 vevent->name,
4144							 ISC_TRUE, &node);
4145			if (result == ISC_R_SUCCESS)
4146				(void)dns_db_deleterdataset(fctx->cache, node,
4147							     NULL,
4148							    vevent->type, 0);
4149			if (result == ISC_R_SUCCESS &&
4150			     vevent->sigrdataset != NULL)
4151				(void)dns_db_deleterdataset(fctx->cache, node,
4152							    NULL,
4153							    dns_rdatatype_rrsig,
4154							    vevent->type);
4155			if (result == ISC_R_SUCCESS)
4156				dns_db_detachnode(fctx->cache, &node);
4157		}
4158		if (fctx->vresult == DNS_R_BROKENCHAIN && !negative) {
4159			/*
4160			 * Cache the data as pending for later validation.
4161			 */
4162			result = ISC_R_NOTFOUND;
4163			if (vevent->rdataset != NULL)
4164				result = dns_db_findnode(fctx->cache,
4165							 vevent->name,
4166							 ISC_TRUE, &node);
4167			if (result == ISC_R_SUCCESS) {
4168				(void)dns_db_addrdataset(fctx->cache, node,
4169							 NULL, now,
4170							 vevent->rdataset, 0,
4171							 NULL);
4172			}
4173			if (result == ISC_R_SUCCESS &&
4174			    vevent->sigrdataset != NULL)
4175				(void)dns_db_addrdataset(fctx->cache, node,
4176							 NULL, now,
4177							 vevent->sigrdataset,
4178							 0, NULL);
4179			if (result == ISC_R_SUCCESS)
4180				dns_db_detachnode(fctx->cache, &node);
4181		}
4182		result = fctx->vresult;
4183		add_bad(fctx, addrinfo, result, badns_validation);
4184		isc_event_free(&event);
4185		UNLOCK(&res->buckets[fctx->bucketnum].lock);
4186		INSIST(fctx->validator == NULL);
4187		fctx->validator = ISC_LIST_HEAD(fctx->validators);
4188		if (fctx->validator != NULL)
4189			dns_validator_send(fctx->validator);
4190		else if (sentresponse)
4191			fctx_done(fctx, result, __LINE__); /* Locks bucket. */
4192		else if (result == DNS_R_BROKENCHAIN) {
4193			isc_result_t tresult;
4194			isc_time_t expire;
4195			isc_interval_t i;
4196
4197			isc_interval_set(&i, DNS_BADCACHE_TTL(fctx), 0);
4198			tresult = isc_time_nowplusinterval(&expire, &i);
4199			if (negative &&
4200			    (fctx->type == dns_rdatatype_dnskey ||
4201			     fctx->type == dns_rdatatype_dlv ||
4202			     fctx->type == dns_rdatatype_ds) &&
4203			     tresult == ISC_R_SUCCESS)
4204				dns_resolver_addbadcache(res, &fctx->name,
4205							 fctx->type, &expire);
4206			fctx_done(fctx, result, __LINE__); /* Locks bucket. */
4207		} else
4208			fctx_try(fctx, ISC_TRUE, ISC_TRUE); /* Locks bucket. */
4209		return;
4210	}
4211
4212
4213	if (negative) {
4214		dns_rdatatype_t covers;
4215		FCTXTRACE("nonexistence validation OK");
4216
4217		inc_stats(res, dns_resstatscounter_valnegsuccess);
4218
4219		if (fctx->rmessage->rcode == dns_rcode_nxdomain)
4220			covers = dns_rdatatype_any;
4221		else
4222			covers = fctx->type;
4223
4224		result = dns_db_findnode(fctx->cache, vevent->name, ISC_TRUE,
4225					 &node);
4226		if (result != ISC_R_SUCCESS)
4227			goto noanswer_response;
4228
4229		/*
4230		 * If we are asking for a SOA record set the cache time
4231		 * to zero to facilitate locating the containing zone of
4232		 * a arbitrary zone.
4233		 */
4234		ttl = res->view->maxncachettl;
4235		if (fctx->type == dns_rdatatype_soa &&
4236		    covers == dns_rdatatype_any && res->zero_no_soa_ttl)
4237			ttl = 0;
4238
4239		result = ncache_adderesult(fctx->rmessage, fctx->cache, node,
4240					   covers, now, ttl, vevent->optout,
4241					   vevent->secure, ardataset, &eresult);
4242		if (result != ISC_R_SUCCESS)
4243			goto noanswer_response;
4244		goto answer_response;
4245	} else
4246		inc_stats(res, dns_resstatscounter_valsuccess);
4247
4248	FCTXTRACE("validation OK");
4249
4250	if (vevent->proofs[DNS_VALIDATOR_NOQNAMEPROOF] != NULL) {
4251		result = dns_rdataset_addnoqname(vevent->rdataset,
4252				   vevent->proofs[DNS_VALIDATOR_NOQNAMEPROOF]);
4253		RUNTIME_CHECK(result == ISC_R_SUCCESS);
4254		INSIST(vevent->sigrdataset != NULL);
4255		vevent->sigrdataset->ttl = vevent->rdataset->ttl;
4256		if (vevent->proofs[DNS_VALIDATOR_CLOSESTENCLOSER] != NULL) {
4257			result = dns_rdataset_addclosest(vevent->rdataset,
4258				 vevent->proofs[DNS_VALIDATOR_CLOSESTENCLOSER]);
4259			RUNTIME_CHECK(result == ISC_R_SUCCESS);
4260		}
4261	} else if (vevent->rdataset->trust == dns_trust_answer &&
4262		   vevent->rdataset->type != dns_rdatatype_rrsig)
4263	{
4264		isc_result_t tresult;
4265		dns_name_t *noqname = NULL;
4266		tresult = findnoqname(fctx, vevent->name,
4267				      vevent->rdataset->type, &noqname);
4268		if (tresult == ISC_R_SUCCESS && noqname != NULL) {
4269			tresult = dns_rdataset_addnoqname(vevent->rdataset,
4270							  noqname);
4271			RUNTIME_CHECK(tresult == ISC_R_SUCCESS);
4272		}
4273	}
4274
4275	/*
4276	 * The data was already cached as pending data.
4277	 * Re-cache it as secure and bind the cached
4278	 * rdatasets to the first event on the fetch
4279	 * event list.
4280	 */
4281	result = dns_db_findnode(fctx->cache, vevent->name, ISC_TRUE, &node);
4282	if (result != ISC_R_SUCCESS)
4283		goto noanswer_response;
4284
4285	result = dns_db_addrdataset(fctx->cache, node, NULL, now,
4286				    vevent->rdataset, 0, ardataset);
4287	if (result != ISC_R_SUCCESS &&
4288	    result != DNS_R_UNCHANGED)
4289		goto noanswer_response;
4290	if (ardataset != NULL && NEGATIVE(ardataset)) {
4291		if (NXDOMAIN(ardataset))
4292			eresult = DNS_R_NCACHENXDOMAIN;
4293		else
4294			eresult = DNS_R_NCACHENXRRSET;
4295	} else if (vevent->sigrdataset != NULL) {
4296		result = dns_db_addrdataset(fctx->cache, node, NULL, now,
4297					    vevent->sigrdataset, 0,
4298					    asigrdataset);
4299		if (result != ISC_R_SUCCESS &&
4300		    result != DNS_R_UNCHANGED)
4301			goto noanswer_response;
4302	}
4303
4304	if (sentresponse) {
4305		isc_boolean_t bucket_empty = ISC_FALSE;
4306		/*
4307		 * If we only deferred the destroy because we wanted to cache
4308		 * the data, destroy now.
4309		 */
4310		dns_db_detachnode(fctx->cache, &node);
4311		if (SHUTTINGDOWN(fctx))
4312			bucket_empty = maybe_destroy(fctx, ISC_TRUE);
4313		UNLOCK(&res->buckets[fctx->bucketnum].lock);
4314		if (bucket_empty)
4315			empty_bucket(res);
4316		goto cleanup_event;
4317	}
4318
4319	if (!ISC_LIST_EMPTY(fctx->validators)) {
4320		INSIST(!negative);
4321		INSIST(fctx->type == dns_rdatatype_any ||
4322		       fctx->type == dns_rdatatype_rrsig ||
4323		       fctx->type == dns_rdatatype_sig);
4324		/*
4325		 * Don't send a response yet - we have
4326		 * more rdatasets that still need to
4327		 * be validated.
4328		 */
4329		dns_db_detachnode(fctx->cache, &node);
4330		UNLOCK(&res->buckets[fctx->bucketnum].lock);
4331		dns_validator_send(ISC_LIST_HEAD(fctx->validators));
4332		goto cleanup_event;
4333	}
4334
4335 answer_response:
4336	/*
4337	 * Cache any NS/NSEC records that happened to be validated.
4338	 */
4339	result = dns_message_firstname(fctx->rmessage, DNS_SECTION_AUTHORITY);
4340	while (result == ISC_R_SUCCESS) {
4341		name = NULL;
4342		dns_message_currentname(fctx->rmessage, DNS_SECTION_AUTHORITY,
4343					&name);
4344		for (rdataset = ISC_LIST_HEAD(name->list);
4345		     rdataset != NULL;
4346		     rdataset = ISC_LIST_NEXT(rdataset, link)) {
4347			if ((rdataset->type != dns_rdatatype_ns &&
4348			     rdataset->type != dns_rdatatype_nsec) ||
4349			    rdataset->trust != dns_trust_secure)
4350				continue;
4351			for (sigrdataset = ISC_LIST_HEAD(name->list);
4352			     sigrdataset != NULL;
4353			     sigrdataset = ISC_LIST_NEXT(sigrdataset, link)) {
4354				if (sigrdataset->type != dns_rdatatype_rrsig ||
4355				    sigrdataset->covers != rdataset->type)
4356					continue;
4357				break;
4358			}
4359			if (sigrdataset == NULL ||
4360			    sigrdataset->trust != dns_trust_secure)
4361				continue;
4362			result = dns_db_findnode(fctx->cache, name, ISC_TRUE,
4363						 &nsnode);
4364			if (result != ISC_R_SUCCESS)
4365				continue;
4366
4367			result = dns_db_addrdataset(fctx->cache, nsnode, NULL,
4368						    now, rdataset, 0, NULL);
4369			if (result == ISC_R_SUCCESS)
4370				result = dns_db_addrdataset(fctx->cache, nsnode,
4371							    NULL, now,
4372							    sigrdataset, 0,
4373							    NULL);
4374			dns_db_detachnode(fctx->cache, &nsnode);
4375			if (result != ISC_R_SUCCESS)
4376				continue;
4377		}
4378		result = dns_message_nextname(fctx->rmessage,
4379					      DNS_SECTION_AUTHORITY);
4380	}
4381
4382	result = ISC_R_SUCCESS;
4383
4384	/*
4385	 * Respond with an answer, positive or negative,
4386	 * as opposed to an error.  'node' must be non-NULL.
4387	 */
4388
4389	fctx->attributes |= FCTX_ATTR_HAVEANSWER;
4390
4391	if (hevent != NULL) {
4392		/*
4393		 * Negative results must be indicated in event->result.
4394		 */
4395		if (dns_rdataset_isassociated(hevent->rdataset) &&
4396		    NEGATIVE(hevent->rdataset)) {
4397			INSIST(eresult == DNS_R_NCACHENXDOMAIN ||
4398			       eresult == DNS_R_NCACHENXRRSET);
4399		}
4400		hevent->result = eresult;
4401		RUNTIME_CHECK(dns_name_copy(vevent->name,
4402			      dns_fixedname_name(&hevent->foundname), NULL)
4403			      == ISC_R_SUCCESS);
4404		dns_db_attach(fctx->cache, &hevent->db);
4405		dns_db_transfernode(fctx->cache, &node, &hevent->node);
4406		clone_results(fctx);
4407	}
4408
4409 noanswer_response:
4410	if (node != NULL)
4411		dns_db_detachnode(fctx->cache, &node);
4412
4413	UNLOCK(&res->buckets[fctx->bucketnum].lock);
4414	fctx_done(fctx, result, __LINE__); /* Locks bucket. */
4415
4416 cleanup_event:
4417	INSIST(node == NULL);
4418	isc_event_free(&event);
4419}
4420
4421static void
4422fctx_log(void *arg, int level, const char *fmt, ...) {
4423	char msgbuf[2048];
4424	va_list args;
4425	fetchctx_t *fctx = arg;
4426
4427	va_start(args, fmt);
4428	vsnprintf(msgbuf, sizeof(msgbuf), fmt, args);
4429	va_end(args);
4430
4431	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
4432		      DNS_LOGMODULE_RESOLVER, level,
4433		      "fctx %p(%s): %s", fctx, fctx->info, msgbuf);
4434}
4435
4436static inline isc_result_t
4437findnoqname(fetchctx_t *fctx, dns_name_t *name, dns_rdatatype_t type,
4438	    dns_name_t **noqnamep)
4439{
4440	dns_rdataset_t *nrdataset, *next, *sigrdataset;
4441	dns_rdata_rrsig_t rrsig;
4442	isc_result_t result;
4443	unsigned int labels;
4444	dns_section_t section;
4445	dns_name_t *zonename;
4446	dns_fixedname_t fzonename;
4447	dns_name_t *closest;
4448	dns_fixedname_t fclosest;
4449	dns_name_t *nearest;
4450	dns_fixedname_t fnearest;
4451	dns_rdatatype_t found = dns_rdatatype_none;
4452	dns_name_t *noqname = NULL;
4453
4454	FCTXTRACE("findnoqname");
4455
4456	REQUIRE(noqnamep != NULL && *noqnamep == NULL);
4457
4458	/*
4459	 * Find the SIG for this rdataset, if we have it.
4460	 */
4461	for (sigrdataset = ISC_LIST_HEAD(name->list);
4462	     sigrdataset != NULL;
4463	     sigrdataset = ISC_LIST_NEXT(sigrdataset, link)) {
4464		if (sigrdataset->type == dns_rdatatype_rrsig &&
4465		    sigrdataset->covers == type)
4466			break;
4467	}
4468
4469	if (sigrdataset == NULL)
4470		return (ISC_R_NOTFOUND);
4471
4472	labels = dns_name_countlabels(name);
4473
4474	for (result = dns_rdataset_first(sigrdataset);
4475	     result == ISC_R_SUCCESS;
4476	     result = dns_rdataset_next(sigrdataset)) {
4477		dns_rdata_t rdata = DNS_RDATA_INIT;
4478		dns_rdataset_current(sigrdataset, &rdata);
4479		result = dns_rdata_tostruct(&rdata, &rrsig, NULL);
4480		RUNTIME_CHECK(result == ISC_R_SUCCESS);
4481		/* Wildcard has rrsig.labels < labels - 1. */
4482		if (rrsig.labels + 1U >= labels)
4483			continue;
4484		break;
4485	}
4486
4487	if (result == ISC_R_NOMORE)
4488		return (ISC_R_NOTFOUND);
4489	if (result != ISC_R_SUCCESS)
4490		return (result);
4491
4492	dns_fixedname_init(&fzonename);
4493	zonename = dns_fixedname_name(&fzonename);
4494	dns_fixedname_init(&fclosest);
4495	closest = dns_fixedname_name(&fclosest);
4496	dns_fixedname_init(&fnearest);
4497	nearest = dns_fixedname_name(&fnearest);
4498
4499#define NXND(x) ((x) == ISC_R_SUCCESS)
4500
4501	section = DNS_SECTION_AUTHORITY;
4502	for (result = dns_message_firstname(fctx->rmessage, section);
4503	     result == ISC_R_SUCCESS;
4504	     result = dns_message_nextname(fctx->rmessage, section)) {
4505		dns_name_t *nsec = NULL;
4506		dns_message_currentname(fctx->rmessage, section, &nsec);
4507		for (nrdataset = ISC_LIST_HEAD(nsec->list);
4508		      nrdataset != NULL; nrdataset = next) {
4509			isc_boolean_t data = ISC_FALSE, exists = ISC_FALSE;
4510			isc_boolean_t optout = ISC_FALSE, unknown = ISC_FALSE;
4511			isc_boolean_t setclosest = ISC_FALSE;
4512			isc_boolean_t setnearest = ISC_FALSE;
4513
4514			next = ISC_LIST_NEXT(nrdataset, link);
4515			if (nrdataset->type != dns_rdatatype_nsec &&
4516			    nrdataset->type != dns_rdatatype_nsec3)
4517				continue;
4518
4519			if (nrdataset->type == dns_rdatatype_nsec &&
4520			    NXND(dns_nsec_noexistnodata(type, name, nsec,
4521							nrdataset, &exists,
4522							&data, NULL, fctx_log,
4523							fctx)))
4524			{
4525				if (!exists) {
4526					noqname = nsec;
4527					found = dns_rdatatype_nsec;
4528				}
4529			}
4530
4531			if (nrdataset->type == dns_rdatatype_nsec3 &&
4532			    NXND(dns_nsec3_noexistnodata(type, name, nsec,
4533							 nrdataset, zonename,
4534							 &exists, &data,
4535							 &optout, &unknown,
4536							 &setclosest,
4537							 &setnearest,
4538							 closest, nearest,
4539							 fctx_log, fctx)))
4540			{
4541				if (!exists && setnearest) {
4542					noqname = nsec;
4543					found = dns_rdatatype_nsec3;
4544				}
4545			}
4546		}
4547	}
4548	if (result == ISC_R_NOMORE)
4549		result = ISC_R_SUCCESS;
4550	if (noqname != NULL) {
4551		for (sigrdataset = ISC_LIST_HEAD(noqname->list);
4552		     sigrdataset != NULL;
4553		     sigrdataset = ISC_LIST_NEXT(sigrdataset, link)) {
4554			if (sigrdataset->type == dns_rdatatype_rrsig &&
4555			    sigrdataset->covers == found)
4556				break;
4557		}
4558		if (sigrdataset != NULL)
4559			*noqnamep = noqname;
4560	}
4561	return (result);
4562}
4563
4564static inline isc_result_t
4565cache_name(fetchctx_t *fctx, dns_name_t *name, dns_adbaddrinfo_t *addrinfo,
4566	   isc_stdtime_t now)
4567{
4568	dns_rdataset_t *rdataset, *sigrdataset;
4569	dns_rdataset_t *addedrdataset, *ardataset, *asigrdataset;
4570	dns_rdataset_t *valrdataset = NULL, *valsigrdataset = NULL;
4571	dns_dbnode_t *node, **anodep;
4572	dns_db_t **adbp;
4573	dns_name_t *aname;
4574	dns_resolver_t *res;
4575	isc_boolean_t need_validation, secure_domain, have_answer;
4576	isc_result_t result, eresult;
4577	dns_fetchevent_t *event;
4578	unsigned int options;
4579	isc_task_t *task;
4580	isc_boolean_t fail;
4581	unsigned int valoptions = 0;
4582
4583	/*
4584	 * The appropriate bucket lock must be held.
4585	 */
4586
4587	res = fctx->res;
4588	need_validation = ISC_FALSE;
4589	POST(need_validation);
4590	secure_domain = ISC_FALSE;
4591	have_answer = ISC_FALSE;
4592	eresult = ISC_R_SUCCESS;
4593	task = res->buckets[fctx->bucketnum].task;
4594
4595	/*
4596	 * Is DNSSEC validation required for this name?
4597	 */
4598	if (res->view->enablevalidation) {
4599		result = dns_view_issecuredomain(res->view, name,
4600						 &secure_domain);
4601		if (result != ISC_R_SUCCESS)
4602			return (result);
4603
4604		if (!secure_domain && res->view->dlv != NULL) {
4605			valoptions = DNS_VALIDATOR_DLV;
4606			secure_domain = ISC_TRUE;
4607		}
4608	}
4609
4610	if ((fctx->options & DNS_FETCHOPT_NOVALIDATE) != 0)
4611		need_validation = ISC_FALSE;
4612	else
4613		need_validation = secure_domain;
4614
4615	adbp = NULL;
4616	aname = NULL;
4617	anodep = NULL;
4618	ardataset = NULL;
4619	asigrdataset = NULL;
4620	event = NULL;
4621	if ((name->attributes & DNS_NAMEATTR_ANSWER) != 0 &&
4622	    !need_validation) {
4623		have_answer = ISC_TRUE;
4624		event = ISC_LIST_HEAD(fctx->events);
4625		if (event != NULL) {
4626			adbp = &event->db;
4627			aname = dns_fixedname_name(&event->foundname);
4628			result = dns_name_copy(name, aname, NULL);
4629			if (result != ISC_R_SUCCESS)
4630				return (result);
4631			anodep = &event->node;
4632			/*
4633			 * If this is an ANY, SIG or RRSIG query, we're not
4634			 * going to return any rdatasets, unless we encountered
4635			 * a CNAME or DNAME as "the answer".  In this case,
4636			 * we're going to return DNS_R_CNAME or DNS_R_DNAME
4637			 * and we must set up the rdatasets.
4638			 */
4639			if ((fctx->type != dns_rdatatype_any &&
4640			     fctx->type != dns_rdatatype_rrsig &&
4641			     fctx->type != dns_rdatatype_sig) ||
4642			    (name->attributes & DNS_NAMEATTR_CHAINING) != 0) {
4643				ardataset = event->rdataset;
4644				asigrdataset = event->sigrdataset;
4645			}
4646		}
4647	}
4648
4649	/*
4650	 * Find or create the cache node.
4651	 */
4652	node = NULL;
4653	result = dns_db_findnode(fctx->cache, name, ISC_TRUE, &node);
4654	if (result != ISC_R_SUCCESS)
4655		return (result);
4656
4657	/*
4658	 * Cache or validate each cacheable rdataset.
4659	 */
4660	fail = ISC_TF((fctx->res->options & DNS_RESOLVER_CHECKNAMESFAIL) != 0);
4661	for (rdataset = ISC_LIST_HEAD(name->list);
4662	     rdataset != NULL;
4663	     rdataset = ISC_LIST_NEXT(rdataset, link)) {
4664		if (!CACHE(rdataset))
4665			continue;
4666		if (CHECKNAMES(rdataset)) {
4667			char namebuf[DNS_NAME_FORMATSIZE];
4668			char typebuf[DNS_RDATATYPE_FORMATSIZE];
4669			char classbuf[DNS_RDATATYPE_FORMATSIZE];
4670
4671			dns_name_format(name, namebuf, sizeof(namebuf));
4672			dns_rdatatype_format(rdataset->type, typebuf,
4673					     sizeof(typebuf));
4674			dns_rdataclass_format(rdataset->rdclass, classbuf,
4675					      sizeof(classbuf));
4676			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
4677				      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
4678				      "check-names %s %s/%s/%s",
4679				      fail ? "failure" : "warning",
4680				      namebuf, typebuf, classbuf);
4681			if (fail) {
4682				if (ANSWER(rdataset)) {
4683					dns_db_detachnode(fctx->cache, &node);
4684					return (DNS_R_BADNAME);
4685				}
4686				continue;
4687			}
4688		}
4689
4690		/*
4691		 * Enforce the configure maximum cache TTL.
4692		 */
4693		if (rdataset->ttl > res->view->maxcachettl)
4694			rdataset->ttl = res->view->maxcachettl;
4695
4696		/*
4697		 * Find the SIG for this rdataset, if we have it.
4698		 */
4699		for (sigrdataset = ISC_LIST_HEAD(name->list);
4700		     sigrdataset != NULL;
4701		     sigrdataset = ISC_LIST_NEXT(sigrdataset, link)) {
4702			if (sigrdataset->type == dns_rdatatype_rrsig &&
4703			    sigrdataset->covers == rdataset->type)
4704				break;
4705		}
4706
4707		/*
4708		 * If this RRset is in a secure domain, is in bailiwick,
4709		 * and is not glue, attempt DNSSEC validation.	(We do not
4710		 * attempt to validate glue or out-of-bailiwick data--even
4711		 * though there might be some performance benefit to doing
4712		 * so--because it makes it simpler and safer to ensure that
4713		 * records from a secure domain are only cached if validated
4714		 * within the context of a query to the domain that owns
4715		 * them.)
4716		 */
4717		if (secure_domain && rdataset->trust != dns_trust_glue &&
4718		    !EXTERNAL(rdataset)) {
4719			dns_trust_t trust;
4720
4721			/*
4722			 * RRSIGs are validated as part of validating the
4723			 * type they cover.
4724			 */
4725			if (rdataset->type == dns_rdatatype_rrsig)
4726				continue;
4727
4728			if (sigrdataset == NULL) {
4729				if (!ANSWER(rdataset) && need_validation) {
4730					/*
4731					 * Ignore non-answer rdatasets that
4732					 * are missing signatures.
4733					 */
4734					continue;
4735				}
4736			}
4737
4738			/*
4739			 * Normalize the rdataset and sigrdataset TTLs.
4740			 */
4741			if (sigrdataset != NULL) {
4742				rdataset->ttl = ISC_MIN(rdataset->ttl,
4743							sigrdataset->ttl);
4744				sigrdataset->ttl = rdataset->ttl;
4745			}
4746
4747			/*
4748			 * Cache this rdataset/sigrdataset pair as
4749			 * pending data.  Track whether it was additional
4750			 * or not.
4751			 */
4752			if (rdataset->trust == dns_trust_additional)
4753				trust = dns_trust_pending_additional;
4754			else
4755				trust = dns_trust_pending_answer;
4756
4757			rdataset->trust = trust;
4758			if (sigrdataset != NULL)
4759				sigrdataset->trust = trust;
4760			if (!need_validation || !ANSWER(rdataset)) {
4761				if (ANSWER(rdataset) &&
4762				   rdataset->type != dns_rdatatype_rrsig) {
4763					isc_result_t tresult;
4764					dns_name_t *noqname = NULL;
4765					tresult = findnoqname(fctx, name,
4766							      rdataset->type,
4767							      &noqname);
4768					if (tresult == ISC_R_SUCCESS &&
4769					    noqname != NULL) {
4770						tresult =
4771						     dns_rdataset_addnoqname(
4772							    rdataset, noqname);
4773						RUNTIME_CHECK(tresult ==
4774							      ISC_R_SUCCESS);
4775					}
4776				}
4777				addedrdataset = ardataset;
4778				result = dns_db_addrdataset(fctx->cache, node,
4779							    NULL, now, rdataset,
4780							    0, addedrdataset);
4781				if (result == DNS_R_UNCHANGED) {
4782					result = ISC_R_SUCCESS;
4783					if (!need_validation &&
4784					    ardataset != NULL &&
4785					    NEGATIVE(ardataset)) {
4786						/*
4787						 * The answer in the cache is
4788						 * better than the answer we
4789						 * found, and is a negative
4790						 * cache entry, so we must set
4791						 * eresult appropriately.
4792						 */
4793						if (NXDOMAIN(ardataset))
4794							eresult =
4795							   DNS_R_NCACHENXDOMAIN;
4796						else
4797							eresult =
4798							   DNS_R_NCACHENXRRSET;
4799						/*
4800						 * We have a negative response
4801						 * from the cache so don't
4802						 * attempt to add the RRSIG
4803						 * rrset.
4804						 */
4805						continue;
4806					}
4807				}
4808				if (result != ISC_R_SUCCESS)
4809					break;
4810				if (sigrdataset != NULL) {
4811					addedrdataset = asigrdataset;
4812					result = dns_db_addrdataset(fctx->cache,
4813								node, NULL, now,
4814								sigrdataset, 0,
4815								addedrdataset);
4816					if (result == DNS_R_UNCHANGED)
4817						result = ISC_R_SUCCESS;
4818					if (result != ISC_R_SUCCESS)
4819						break;
4820				} else if (!ANSWER(rdataset))
4821					continue;
4822			}
4823
4824			if (ANSWER(rdataset) && need_validation) {
4825				if (fctx->type != dns_rdatatype_any &&
4826				    fctx->type != dns_rdatatype_rrsig &&
4827				    fctx->type != dns_rdatatype_sig) {
4828					/*
4829					 * This is The Answer.  We will
4830					 * validate it, but first we cache
4831					 * the rest of the response - it may
4832					 * contain useful keys.
4833					 */
4834					INSIST(valrdataset == NULL &&
4835					       valsigrdataset == NULL);
4836					valrdataset = rdataset;
4837					valsigrdataset = sigrdataset;
4838				} else {
4839					/*
4840					 * This is one of (potentially)
4841					 * multiple answers to an ANY
4842					 * or SIG query.  To keep things
4843					 * simple, we just start the
4844					 * validator right away rather
4845					 * than caching first and
4846					 * having to remember which
4847					 * rdatasets needed validation.
4848					 */
4849					result = valcreate(fctx, addrinfo,
4850							   name, rdataset->type,
4851							   rdataset,
4852							   sigrdataset,
4853							   valoptions, task);
4854				}
4855			} else if (CHAINING(rdataset)) {
4856				if (rdataset->type == dns_rdatatype_cname)
4857					eresult = DNS_R_CNAME;
4858				else {
4859					INSIST(rdataset->type ==
4860					       dns_rdatatype_dname);
4861					eresult = DNS_R_DNAME;
4862				}
4863			}
4864		} else if (!EXTERNAL(rdataset)) {
4865			/*
4866			 * It's OK to cache this rdataset now.
4867			 */
4868			if (ANSWER(rdataset))
4869				addedrdataset = ardataset;
4870			else if (ANSWERSIG(rdataset))
4871				addedrdataset = asigrdataset;
4872			else
4873				addedrdataset = NULL;
4874			if (CHAINING(rdataset)) {
4875				if (rdataset->type == dns_rdatatype_cname)
4876					eresult = DNS_R_CNAME;
4877				else {
4878					INSIST(rdataset->type ==
4879					       dns_rdatatype_dname);
4880					eresult = DNS_R_DNAME;
4881				}
4882			}
4883			if (rdataset->trust == dns_trust_glue &&
4884			    (rdataset->type == dns_rdatatype_ns ||
4885			     (rdataset->type == dns_rdatatype_rrsig &&
4886			      rdataset->covers == dns_rdatatype_ns))) {
4887				/*
4888				 * If the trust level is 'dns_trust_glue'
4889				 * then we are adding data from a referral
4890				 * we got while executing the search algorithm.
4891				 * New referral data always takes precedence
4892				 * over the existing cache contents.
4893				 */
4894				options = DNS_DBADD_FORCE;
4895			} else
4896				options = 0;
4897
4898			if (ANSWER(rdataset) &&
4899			   rdataset->type != dns_rdatatype_rrsig) {
4900				isc_result_t tresult;
4901				dns_name_t *noqname = NULL;
4902				tresult = findnoqname(fctx, name,
4903						      rdataset->type, &noqname);
4904				if (tresult == ISC_R_SUCCESS &&
4905				    noqname != NULL) {
4906					tresult = dns_rdataset_addnoqname(
4907							    rdataset, noqname);
4908					RUNTIME_CHECK(tresult == ISC_R_SUCCESS);
4909				}
4910			}
4911
4912			/*
4913			 * Now we can add the rdataset.
4914			 */
4915			result = dns_db_addrdataset(fctx->cache,
4916						    node, NULL, now,
4917						    rdataset,
4918						    options,
4919						    addedrdataset);
4920
4921			if (result == DNS_R_UNCHANGED) {
4922				if (ANSWER(rdataset) &&
4923				    ardataset != NULL &&
4924				    NEGATIVE(ardataset)) {
4925					/*
4926					 * The answer in the cache is better
4927					 * than the answer we found, and is
4928					 * a negative cache entry, so we
4929					 * must set eresult appropriately.
4930					 */
4931					if (NXDOMAIN(ardataset))
4932						eresult = DNS_R_NCACHENXDOMAIN;
4933					else
4934						eresult = DNS_R_NCACHENXRRSET;
4935				}
4936				result = ISC_R_SUCCESS;
4937			} else if (result != ISC_R_SUCCESS)
4938				break;
4939		}
4940	}
4941
4942	if (valrdataset != NULL)
4943		result = valcreate(fctx, addrinfo, name, fctx->type,
4944				   valrdataset, valsigrdataset, valoptions,
4945				   task);
4946
4947	if (result == ISC_R_SUCCESS && have_answer) {
4948		fctx->attributes |= FCTX_ATTR_HAVEANSWER;
4949		if (event != NULL) {
4950			/*
4951			 * Negative results must be indicated in event->result.
4952			 */
4953			if (dns_rdataset_isassociated(event->rdataset) &&
4954			    NEGATIVE(event->rdataset)) {
4955				INSIST(eresult == DNS_R_NCACHENXDOMAIN ||
4956				       eresult == DNS_R_NCACHENXRRSET);
4957			}
4958			event->result = eresult;
4959			if (adbp != NULL && *adbp != NULL) {
4960				if (anodep != NULL && *anodep != NULL)
4961					dns_db_detachnode(*adbp, anodep);
4962				dns_db_detach(adbp);
4963			}
4964			dns_db_attach(fctx->cache, adbp);
4965			dns_db_transfernode(fctx->cache, &node, anodep);
4966			clone_results(fctx);
4967		}
4968	}
4969
4970	if (node != NULL)
4971		dns_db_detachnode(fctx->cache, &node);
4972
4973	return (result);
4974}
4975
4976static inline isc_result_t
4977cache_message(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo, isc_stdtime_t now)
4978{
4979	isc_result_t result;
4980	dns_section_t section;
4981	dns_name_t *name;
4982
4983	FCTXTRACE("cache_message");
4984
4985	fctx->attributes &= ~FCTX_ATTR_WANTCACHE;
4986
4987	LOCK(&fctx->res->buckets[fctx->bucketnum].lock);
4988
4989	for (section = DNS_SECTION_ANSWER;
4990	     section <= DNS_SECTION_ADDITIONAL;
4991	     section++) {
4992		result = dns_message_firstname(fctx->rmessage, section);
4993		while (result == ISC_R_SUCCESS) {
4994			name = NULL;
4995			dns_message_currentname(fctx->rmessage, section,
4996						&name);
4997			if ((name->attributes & DNS_NAMEATTR_CACHE) != 0) {
4998				result = cache_name(fctx, name, addrinfo, now);
4999				if (result != ISC_R_SUCCESS)
5000					break;
5001			}
5002			result = dns_message_nextname(fctx->rmessage, section);
5003		}
5004		if (result != ISC_R_NOMORE)
5005			break;
5006	}
5007	if (result == ISC_R_NOMORE)
5008		result = ISC_R_SUCCESS;
5009
5010	UNLOCK(&fctx->res->buckets[fctx->bucketnum].lock);
5011
5012	return (result);
5013}
5014
5015/*
5016 * Do what dns_ncache_addoptout() does, and then compute an appropriate eresult.
5017 */
5018static isc_result_t
5019ncache_adderesult(dns_message_t *message, dns_db_t *cache, dns_dbnode_t *node,
5020		  dns_rdatatype_t covers, isc_stdtime_t now, dns_ttl_t maxttl,
5021		  isc_boolean_t optout, isc_boolean_t secure,
5022		  dns_rdataset_t *ardataset, isc_result_t *eresultp)
5023{
5024	isc_result_t result;
5025	dns_rdataset_t rdataset;
5026
5027	if (ardataset == NULL) {
5028		dns_rdataset_init(&rdataset);
5029		ardataset = &rdataset;
5030	}
5031	if (secure)
5032		result = dns_ncache_addoptout(message, cache, node, covers,
5033					      now, maxttl, optout, ardataset);
5034	else
5035		result = dns_ncache_add(message, cache, node, covers, now,
5036					maxttl, ardataset);
5037	if (result == DNS_R_UNCHANGED || result == ISC_R_SUCCESS) {
5038		/*
5039		 * If the cache now contains a negative entry and we
5040		 * care about whether it is DNS_R_NCACHENXDOMAIN or
5041		 * DNS_R_NCACHENXRRSET then extract it.
5042		 */
5043		if (NEGATIVE(ardataset)) {
5044			/*
5045			 * The cache data is a negative cache entry.
5046			 */
5047			if (NXDOMAIN(ardataset))
5048				*eresultp = DNS_R_NCACHENXDOMAIN;
5049			else
5050				*eresultp = DNS_R_NCACHENXRRSET;
5051		} else {
5052			/*
5053			 * Either we don't care about the nature of the
5054			 * cache rdataset (because no fetch is interested
5055			 * in the outcome), or the cache rdataset is not
5056			 * a negative cache entry.  Whichever case it is,
5057			 * we can return success.
5058			 *
5059			 * XXXRTH  There's a CNAME/DNAME problem here.
5060			 */
5061			*eresultp = ISC_R_SUCCESS;
5062		}
5063		result = ISC_R_SUCCESS;
5064	}
5065	if (ardataset == &rdataset && dns_rdataset_isassociated(ardataset))
5066		dns_rdataset_disassociate(ardataset);
5067
5068	return (result);
5069}
5070
5071static inline isc_result_t
5072ncache_message(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo,
5073	       dns_rdatatype_t covers, isc_stdtime_t now)
5074{
5075	isc_result_t result, eresult;
5076	dns_name_t *name;
5077	dns_resolver_t *res;
5078	dns_db_t **adbp;
5079	dns_dbnode_t *node, **anodep;
5080	dns_rdataset_t *ardataset;
5081	isc_boolean_t need_validation, secure_domain;
5082	dns_name_t *aname;
5083	dns_fetchevent_t *event;
5084	isc_uint32_t ttl;
5085	unsigned int valoptions = 0;
5086
5087	FCTXTRACE("ncache_message");
5088
5089	fctx->attributes &= ~FCTX_ATTR_WANTNCACHE;
5090
5091	res = fctx->res;
5092	need_validation = ISC_FALSE;
5093	POST(need_validation);
5094	secure_domain = ISC_FALSE;
5095	eresult = ISC_R_SUCCESS;
5096	name = &fctx->name;
5097	node = NULL;
5098
5099	/*
5100	 * XXXMPA remove when we follow cnames and adjust the setting
5101	 * of FCTX_ATTR_WANTNCACHE in noanswer_response().
5102	 */
5103	INSIST(fctx->rmessage->counts[DNS_SECTION_ANSWER] == 0);
5104
5105	/*
5106	 * Is DNSSEC validation required for this name?
5107	 */
5108	if (fctx->res->view->enablevalidation) {
5109		result = dns_view_issecuredomain(res->view, name,
5110						 &secure_domain);
5111		if (result != ISC_R_SUCCESS)
5112			return (result);
5113
5114		if (!secure_domain && res->view->dlv != NULL) {
5115			valoptions = DNS_VALIDATOR_DLV;
5116			secure_domain = ISC_TRUE;
5117		}
5118	}
5119
5120	if ((fctx->options & DNS_FETCHOPT_NOVALIDATE) != 0)
5121		need_validation = ISC_FALSE;
5122	else
5123		need_validation = secure_domain;
5124
5125	if (secure_domain) {
5126		/*
5127		 * Mark all rdatasets as pending.
5128		 */
5129		dns_rdataset_t *trdataset;
5130		dns_name_t *tname;
5131
5132		result = dns_message_firstname(fctx->rmessage,
5133					       DNS_SECTION_AUTHORITY);
5134		while (result == ISC_R_SUCCESS) {
5135			tname = NULL;
5136			dns_message_currentname(fctx->rmessage,
5137						DNS_SECTION_AUTHORITY,
5138						&tname);
5139			for (trdataset = ISC_LIST_HEAD(tname->list);
5140			     trdataset != NULL;
5141			     trdataset = ISC_LIST_NEXT(trdataset, link))
5142				trdataset->trust = dns_trust_pending_answer;
5143			result = dns_message_nextname(fctx->rmessage,
5144						      DNS_SECTION_AUTHORITY);
5145		}
5146		if (result != ISC_R_NOMORE)
5147			return (result);
5148
5149	}
5150
5151	if (need_validation) {
5152		/*
5153		 * Do negative response validation.
5154		 */
5155		result = valcreate(fctx, addrinfo, name, fctx->type,
5156				   NULL, NULL, valoptions,
5157				   res->buckets[fctx->bucketnum].task);
5158		/*
5159		 * If validation is necessary, return now.  Otherwise continue
5160		 * to process the message, letting the validation complete
5161		 * in its own good time.
5162		 */
5163		return (result);
5164	}
5165
5166	LOCK(&res->buckets[fctx->bucketnum].lock);
5167
5168	adbp = NULL;
5169	aname = NULL;
5170	anodep = NULL;
5171	ardataset = NULL;
5172	if (!HAVE_ANSWER(fctx)) {
5173		event = ISC_LIST_HEAD(fctx->events);
5174		if (event != NULL) {
5175			adbp = &event->db;
5176			aname = dns_fixedname_name(&event->foundname);
5177			result = dns_name_copy(name, aname, NULL);
5178			if (result != ISC_R_SUCCESS)
5179				goto unlock;
5180			anodep = &event->node;
5181			ardataset = event->rdataset;
5182		}
5183	} else
5184		event = NULL;
5185
5186	result = dns_db_findnode(fctx->cache, name, ISC_TRUE, &node);
5187	if (result != ISC_R_SUCCESS)
5188		goto unlock;
5189
5190	/*
5191	 * If we are asking for a SOA record set the cache time
5192	 * to zero to facilitate locating the containing zone of
5193	 * a arbitrary zone.
5194	 */
5195	ttl = fctx->res->view->maxncachettl;
5196	if (fctx->type == dns_rdatatype_soa &&
5197	    covers == dns_rdatatype_any &&
5198	    fctx->res->zero_no_soa_ttl)
5199		ttl = 0;
5200
5201	result = ncache_adderesult(fctx->rmessage, fctx->cache, node,
5202				   covers, now, ttl, ISC_FALSE,
5203				   ISC_FALSE, ardataset, &eresult);
5204	if (result != ISC_R_SUCCESS)
5205		goto unlock;
5206
5207	if (!HAVE_ANSWER(fctx)) {
5208		fctx->attributes |= FCTX_ATTR_HAVEANSWER;
5209		if (event != NULL) {
5210			event->result = eresult;
5211			if (adbp != NULL && *adbp != NULL) {
5212				if (anodep != NULL && *anodep != NULL)
5213					dns_db_detachnode(*adbp, anodep);
5214				dns_db_detach(adbp);
5215			}
5216			dns_db_attach(fctx->cache, adbp);
5217			dns_db_transfernode(fctx->cache, &node, anodep);
5218			clone_results(fctx);
5219		}
5220	}
5221
5222 unlock:
5223	UNLOCK(&res->buckets[fctx->bucketnum].lock);
5224
5225	if (node != NULL)
5226		dns_db_detachnode(fctx->cache, &node);
5227
5228	return (result);
5229}
5230
5231static inline void
5232mark_related(dns_name_t *name, dns_rdataset_t *rdataset,
5233	     isc_boolean_t external, isc_boolean_t gluing)
5234{
5235	name->attributes |= DNS_NAMEATTR_CACHE;
5236	if (gluing) {
5237		rdataset->trust = dns_trust_glue;
5238		/*
5239		 * Glue with 0 TTL causes problems.  We force the TTL to
5240		 * 1 second to prevent this.
5241		 */
5242		if (rdataset->ttl == 0)
5243			rdataset->ttl = 1;
5244	} else
5245		rdataset->trust = dns_trust_additional;
5246	/*
5247	 * Avoid infinite loops by only marking new rdatasets.
5248	 */
5249	if (!CACHE(rdataset)) {
5250		name->attributes |= DNS_NAMEATTR_CHASE;
5251		rdataset->attributes |= DNS_RDATASETATTR_CHASE;
5252	}
5253	rdataset->attributes |= DNS_RDATASETATTR_CACHE;
5254	if (external)
5255		rdataset->attributes |= DNS_RDATASETATTR_EXTERNAL;
5256}
5257
5258static isc_result_t
5259check_section(void *arg, dns_name_t *addname, dns_rdatatype_t type,
5260	      dns_section_t section)
5261{
5262	fetchctx_t *fctx = arg;
5263	isc_result_t result;
5264	dns_name_t *name;
5265	dns_rdataset_t *rdataset;
5266	isc_boolean_t external;
5267	dns_rdatatype_t rtype;
5268	isc_boolean_t gluing;
5269
5270	REQUIRE(VALID_FCTX(fctx));
5271
5272#if CHECK_FOR_GLUE_IN_ANSWER
5273	if (section == DNS_SECTION_ANSWER && type != dns_rdatatype_a)
5274		return (ISC_R_SUCCESS);
5275#endif
5276
5277	if (GLUING(fctx))
5278		gluing = ISC_TRUE;
5279	else
5280		gluing = ISC_FALSE;
5281	name = NULL;
5282	rdataset = NULL;
5283	result = dns_message_findname(fctx->rmessage, section, addname,
5284				      dns_rdatatype_any, 0, &name, NULL);
5285	if (result == ISC_R_SUCCESS) {
5286		external = ISC_TF(!dns_name_issubdomain(name, &fctx->domain));
5287		if (type == dns_rdatatype_a) {
5288			for (rdataset = ISC_LIST_HEAD(name->list);
5289			     rdataset != NULL;
5290			     rdataset = ISC_LIST_NEXT(rdataset, link)) {
5291				if (rdataset->type == dns_rdatatype_rrsig)
5292					rtype = rdataset->covers;
5293				else
5294					rtype = rdataset->type;
5295				if (rtype == dns_rdatatype_a ||
5296				    rtype == dns_rdatatype_aaaa)
5297					mark_related(name, rdataset, external,
5298						     gluing);
5299			}
5300		} else {
5301			result = dns_message_findtype(name, type, 0,
5302						      &rdataset);
5303			if (result == ISC_R_SUCCESS) {
5304				mark_related(name, rdataset, external, gluing);
5305				/*
5306				 * Do we have its SIG too?
5307				 */
5308				rdataset = NULL;
5309				result = dns_message_findtype(name,
5310						      dns_rdatatype_rrsig,
5311						      type, &rdataset);
5312				if (result == ISC_R_SUCCESS)
5313					mark_related(name, rdataset, external,
5314						     gluing);
5315			}
5316		}
5317	}
5318
5319	return (ISC_R_SUCCESS);
5320}
5321
5322static isc_result_t
5323check_related(void *arg, dns_name_t *addname, dns_rdatatype_t type) {
5324	return (check_section(arg, addname, type, DNS_SECTION_ADDITIONAL));
5325}
5326
5327#ifndef CHECK_FOR_GLUE_IN_ANSWER
5328#define CHECK_FOR_GLUE_IN_ANSWER 0
5329#endif
5330#if CHECK_FOR_GLUE_IN_ANSWER
5331static isc_result_t
5332check_answer(void *arg, dns_name_t *addname, dns_rdatatype_t type) {
5333	return (check_section(arg, addname, type, DNS_SECTION_ANSWER));
5334}
5335#endif
5336
5337static void
5338chase_additional(fetchctx_t *fctx) {
5339	isc_boolean_t rescan;
5340	dns_section_t section = DNS_SECTION_ADDITIONAL;
5341	isc_result_t result;
5342
5343 again:
5344	rescan = ISC_FALSE;
5345
5346	for (result = dns_message_firstname(fctx->rmessage, section);
5347	     result == ISC_R_SUCCESS;
5348	     result = dns_message_nextname(fctx->rmessage, section)) {
5349		dns_name_t *name = NULL;
5350		dns_rdataset_t *rdataset;
5351		dns_message_currentname(fctx->rmessage, DNS_SECTION_ADDITIONAL,
5352					&name);
5353		if ((name->attributes & DNS_NAMEATTR_CHASE) == 0)
5354			continue;
5355		name->attributes &= ~DNS_NAMEATTR_CHASE;
5356		for (rdataset = ISC_LIST_HEAD(name->list);
5357		     rdataset != NULL;
5358		     rdataset = ISC_LIST_NEXT(rdataset, link)) {
5359			if (CHASE(rdataset)) {
5360				rdataset->attributes &= ~DNS_RDATASETATTR_CHASE;
5361				(void)dns_rdataset_additionaldata(rdataset,
5362								  check_related,
5363								  fctx);
5364				rescan = ISC_TRUE;
5365			}
5366		}
5367	}
5368	if (rescan)
5369		goto again;
5370}
5371
5372static inline isc_result_t
5373cname_target(dns_rdataset_t *rdataset, dns_name_t *tname) {
5374	isc_result_t result;
5375	dns_rdata_t rdata = DNS_RDATA_INIT;
5376	dns_rdata_cname_t cname;
5377
5378	result = dns_rdataset_first(rdataset);
5379	if (result != ISC_R_SUCCESS)
5380		return (result);
5381	dns_rdataset_current(rdataset, &rdata);
5382	result = dns_rdata_tostruct(&rdata, &cname, NULL);
5383	if (result != ISC_R_SUCCESS)
5384		return (result);
5385	dns_name_init(tname, NULL);
5386	dns_name_clone(&cname.cname, tname);
5387	dns_rdata_freestruct(&cname);
5388
5389	return (ISC_R_SUCCESS);
5390}
5391
5392static inline isc_result_t
5393dname_target(dns_rdataset_t *rdataset, dns_name_t *qname,
5394	     unsigned int nlabels, dns_fixedname_t *fixeddname)
5395{
5396	isc_result_t result;
5397	dns_rdata_t rdata = DNS_RDATA_INIT;
5398	dns_rdata_dname_t dname;
5399	dns_fixedname_t prefix;
5400
5401	/*
5402	 * Get the target name of the DNAME.
5403	 */
5404	result = dns_rdataset_first(rdataset);
5405	if (result != ISC_R_SUCCESS)
5406		return (result);
5407	dns_rdataset_current(rdataset, &rdata);
5408	result = dns_rdata_tostruct(&rdata, &dname, NULL);
5409	if (result != ISC_R_SUCCESS)
5410		return (result);
5411
5412	dns_fixedname_init(&prefix);
5413	dns_name_split(qname, nlabels, dns_fixedname_name(&prefix), NULL);
5414	dns_fixedname_init(fixeddname);
5415	result = dns_name_concatenate(dns_fixedname_name(&prefix),
5416				      &dname.dname,
5417				      dns_fixedname_name(fixeddname), NULL);
5418	dns_rdata_freestruct(&dname);
5419	return (result);
5420}
5421
5422static isc_boolean_t
5423is_answeraddress_allowed(dns_view_t *view, dns_name_t *name,
5424			 dns_rdataset_t *rdataset)
5425{
5426	isc_result_t result;
5427	dns_rdata_t rdata = DNS_RDATA_INIT;
5428	struct in_addr ina;
5429	struct in6_addr in6a;
5430	isc_netaddr_t netaddr;
5431	char addrbuf[ISC_NETADDR_FORMATSIZE];
5432	char namebuf[DNS_NAME_FORMATSIZE];
5433	char classbuf[64];
5434	char typebuf[64];
5435	int match;
5436
5437	/* By default, we allow any addresses. */
5438	if (view->denyansweracl == NULL)
5439		return (ISC_TRUE);
5440
5441	/*
5442	 * If the owner name matches one in the exclusion list, either exactly
5443	 * or partially, allow it.
5444	 */
5445	if (view->answeracl_exclude != NULL) {
5446		dns_rbtnode_t *node = NULL;
5447
5448		result = dns_rbt_findnode(view->answeracl_exclude, name, NULL,
5449					  &node, NULL, 0, NULL, NULL);
5450
5451		if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH)
5452			return (ISC_TRUE);
5453	}
5454
5455	/*
5456	 * Otherwise, search the filter list for a match for each address
5457	 * record.  If a match is found, the address should be filtered,
5458	 * so should the entire answer.
5459	 */
5460	for (result = dns_rdataset_first(rdataset);
5461	     result == ISC_R_SUCCESS;
5462	     result = dns_rdataset_next(rdataset)) {
5463		dns_rdata_reset(&rdata);
5464		dns_rdataset_current(rdataset, &rdata);
5465		if (rdataset->type == dns_rdatatype_a) {
5466			INSIST(rdata.length == sizeof(ina.s_addr));
5467			memmove(&ina.s_addr, rdata.data, sizeof(ina.s_addr));
5468			isc_netaddr_fromin(&netaddr, &ina);
5469		} else {
5470			INSIST(rdata.length == sizeof(in6a.s6_addr));
5471			memmove(in6a.s6_addr, rdata.data, sizeof(in6a.s6_addr));
5472			isc_netaddr_fromin6(&netaddr, &in6a);
5473		}
5474
5475		result = dns_acl_match(&netaddr, NULL, view->denyansweracl,
5476				       &view->aclenv, &match, NULL);
5477
5478		if (result == ISC_R_SUCCESS && match > 0) {
5479			isc_netaddr_format(&netaddr, addrbuf, sizeof(addrbuf));
5480			dns_name_format(name, namebuf, sizeof(namebuf));
5481			dns_rdatatype_format(rdataset->type, typebuf,
5482					     sizeof(typebuf));
5483			dns_rdataclass_format(rdataset->rdclass, classbuf,
5484					      sizeof(classbuf));
5485			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
5486				      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
5487				      "answer address %s denied for %s/%s/%s",
5488				      addrbuf, namebuf, typebuf, classbuf);
5489			return (ISC_FALSE);
5490		}
5491	}
5492
5493	return (ISC_TRUE);
5494}
5495
5496static isc_boolean_t
5497is_answertarget_allowed(dns_view_t *view, dns_name_t *name,
5498			dns_rdatatype_t type, dns_name_t *tname,
5499			dns_name_t *domain)
5500{
5501	isc_result_t result;
5502	dns_rbtnode_t *node = NULL;
5503	char qnamebuf[DNS_NAME_FORMATSIZE];
5504	char tnamebuf[DNS_NAME_FORMATSIZE];
5505	char classbuf[64];
5506	char typebuf[64];
5507
5508	/* By default, we allow any target name. */
5509	if (view->denyanswernames == NULL)
5510		return (ISC_TRUE);
5511
5512	/*
5513	 * If the owner name matches one in the exclusion list, either exactly
5514	 * or partially, allow it.
5515	 */
5516	if (view->answernames_exclude != NULL) {
5517		result = dns_rbt_findnode(view->answernames_exclude, name, NULL,
5518					  &node, NULL, 0, NULL, NULL);
5519		if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH)
5520			return (ISC_TRUE);
5521	}
5522
5523	/*
5524	 * If the target name is a subdomain of the search domain, allow it.
5525	 */
5526	if (dns_name_issubdomain(tname, domain))
5527		return (ISC_TRUE);
5528
5529	/*
5530	 * Otherwise, apply filters.
5531	 */
5532	result = dns_rbt_findnode(view->denyanswernames, tname, NULL, &node,
5533				  NULL, 0, NULL, NULL);
5534	if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
5535		dns_name_format(name, qnamebuf, sizeof(qnamebuf));
5536		dns_name_format(tname, tnamebuf, sizeof(tnamebuf));
5537		dns_rdatatype_format(type, typebuf, sizeof(typebuf));
5538		dns_rdataclass_format(view->rdclass, classbuf,
5539				      sizeof(classbuf));
5540		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
5541			      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
5542			      "%s target %s denied for %s/%s",
5543			      typebuf, tnamebuf, qnamebuf, classbuf);
5544		return (ISC_FALSE);
5545	}
5546
5547	return (ISC_TRUE);
5548}
5549
5550static void
5551trim_ns_ttl(fetchctx_t *fctx, dns_name_t *name, dns_rdataset_t *rdataset) {
5552	char ns_namebuf[DNS_NAME_FORMATSIZE];
5553	char namebuf[DNS_NAME_FORMATSIZE];
5554	char tbuf[DNS_RDATATYPE_FORMATSIZE];
5555
5556	if (fctx->ns_ttl_ok && rdataset->ttl > fctx->ns_ttl) {
5557		dns_name_format(name, ns_namebuf, sizeof(ns_namebuf));
5558		dns_name_format(&fctx->name, namebuf, sizeof(namebuf));
5559		dns_rdatatype_format(fctx->type, tbuf, sizeof(tbuf));
5560
5561		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
5562			      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(10),
5563			      "fctx %p: trimming ttl of %s/NS for %s/%s: "
5564			      "%u -> %u", fctx, ns_namebuf, namebuf, tbuf,
5565			      rdataset->ttl, fctx->ns_ttl);
5566		rdataset->ttl = fctx->ns_ttl;
5567	}
5568}
5569
5570/*
5571 * Handle a no-answer response (NXDOMAIN, NXRRSET, or referral).
5572 * If look_in_options has LOOK_FOR_NS_IN_ANSWER then we look in the answer
5573 * section for the NS RRset if the query type is NS; if it has
5574 * LOOK_FOR_GLUE_IN_ANSWER we look for glue incorrectly returned in the answer
5575 * section for A and AAAA queries.
5576 */
5577#define LOOK_FOR_NS_IN_ANSWER 0x1
5578#define LOOK_FOR_GLUE_IN_ANSWER 0x2
5579
5580static isc_result_t
5581noanswer_response(fetchctx_t *fctx, dns_name_t *oqname,
5582		  unsigned int look_in_options)
5583{
5584	isc_result_t result;
5585	dns_message_t *message;
5586	dns_name_t *name, *qname, *ns_name, *soa_name, *ds_name, *save_name;
5587	dns_rdataset_t *rdataset, *ns_rdataset;
5588	isc_boolean_t aa, negative_response;
5589	dns_rdatatype_t type, save_type;
5590	dns_section_t section;
5591
5592	FCTXTRACE("noanswer_response");
5593
5594	if ((look_in_options & LOOK_FOR_NS_IN_ANSWER) != 0) {
5595		INSIST(fctx->type == dns_rdatatype_ns);
5596		section = DNS_SECTION_ANSWER;
5597	} else
5598		section = DNS_SECTION_AUTHORITY;
5599
5600	message = fctx->rmessage;
5601
5602	/*
5603	 * Setup qname.
5604	 */
5605	if (oqname == NULL) {
5606		/*
5607		 * We have a normal, non-chained negative response or
5608		 * referral.
5609		 */
5610		if ((message->flags & DNS_MESSAGEFLAG_AA) != 0)
5611			aa = ISC_TRUE;
5612		else
5613			aa = ISC_FALSE;
5614		qname = &fctx->name;
5615	} else {
5616		/*
5617		 * We're being invoked by answer_response() after it has
5618		 * followed a CNAME/DNAME chain.
5619		 */
5620		qname = oqname;
5621		aa = ISC_FALSE;
5622		/*
5623		 * If the current qname is not a subdomain of the query
5624		 * domain, there's no point in looking at the authority
5625		 * section without doing DNSSEC validation.
5626		 *
5627		 * Until we do that validation, we'll just return success
5628		 * in this case.
5629		 */
5630		if (!dns_name_issubdomain(qname, &fctx->domain))
5631			return (ISC_R_SUCCESS);
5632	}
5633
5634	/*
5635	 * We have to figure out if this is a negative response, or a
5636	 * referral.
5637	 */
5638
5639	/*
5640	 * Sometimes we can tell if its a negative response by looking at
5641	 * the message header.
5642	 */
5643	negative_response = ISC_FALSE;
5644	if (message->rcode == dns_rcode_nxdomain ||
5645	    (message->counts[DNS_SECTION_ANSWER] == 0 &&
5646	     message->counts[DNS_SECTION_AUTHORITY] == 0))
5647		negative_response = ISC_TRUE;
5648
5649	/*
5650	 * Process the authority section.
5651	 */
5652	ns_name = NULL;
5653	ns_rdataset = NULL;
5654	soa_name = NULL;
5655	ds_name = NULL;
5656	save_name = NULL;
5657	save_type = dns_rdatatype_none;
5658	result = dns_message_firstname(message, section);
5659	while (result == ISC_R_SUCCESS) {
5660		name = NULL;
5661		dns_message_currentname(message, section, &name);
5662		if (dns_name_issubdomain(name, &fctx->domain)) {
5663			/*
5664			 * Look for NS/SOA RRsets first.
5665			 */
5666			for (rdataset = ISC_LIST_HEAD(name->list);
5667			     rdataset != NULL;
5668			     rdataset = ISC_LIST_NEXT(rdataset, link)) {
5669				type = rdataset->type;
5670				if (type == dns_rdatatype_rrsig)
5671					type = rdataset->covers;
5672				if (((type == dns_rdatatype_ns ||
5673				      type == dns_rdatatype_soa) &&
5674				     !dns_name_issubdomain(qname, name))) {
5675					char qbuf[DNS_NAME_FORMATSIZE];
5676					char nbuf[DNS_NAME_FORMATSIZE];
5677					char tbuf[DNS_RDATATYPE_FORMATSIZE];
5678					dns_rdatatype_format(fctx->type, tbuf,
5679							     sizeof(tbuf));
5680					dns_name_format(name, nbuf,
5681							     sizeof(nbuf));
5682					dns_name_format(qname, qbuf,
5683							     sizeof(qbuf));
5684					log_formerr(fctx,
5685						    "unrelated %s %s in "
5686						    "%s authority section",
5687						    tbuf, qbuf, nbuf);
5688					return (DNS_R_FORMERR);
5689				}
5690				if (type == dns_rdatatype_ns) {
5691					/*
5692					 * NS or RRSIG NS.
5693					 *
5694					 * Only one set of NS RRs is allowed.
5695					 */
5696					if (rdataset->type ==
5697					    dns_rdatatype_ns) {
5698						if (ns_name != NULL &&
5699						    name != ns_name) {
5700							log_formerr(fctx,
5701								"multiple NS "
5702								"RRsets in "
5703								"authority "
5704								"section");
5705							return (DNS_R_FORMERR);
5706						}
5707						ns_name = name;
5708						ns_rdataset = rdataset;
5709					}
5710					name->attributes |=
5711						DNS_NAMEATTR_CACHE;
5712					rdataset->attributes |=
5713						DNS_RDATASETATTR_CACHE;
5714					rdataset->trust = dns_trust_glue;
5715				}
5716				if (type == dns_rdatatype_soa) {
5717					/*
5718					 * SOA, or RRSIG SOA.
5719					 *
5720					 * Only one SOA is allowed.
5721					 */
5722					if (rdataset->type ==
5723					    dns_rdatatype_soa) {
5724						if (soa_name != NULL &&
5725						    name != soa_name) {
5726							log_formerr(fctx,
5727								"multiple SOA "
5728								"RRs in "
5729								"authority "
5730								"section");
5731							return (DNS_R_FORMERR);
5732						}
5733						soa_name = name;
5734					}
5735					name->attributes |=
5736						DNS_NAMEATTR_NCACHE;
5737					rdataset->attributes |=
5738						DNS_RDATASETATTR_NCACHE;
5739					if (aa)
5740						rdataset->trust =
5741						    dns_trust_authauthority;
5742					else if (ISFORWARDER(fctx->addrinfo))
5743						rdataset->trust =
5744							dns_trust_answer;
5745					else
5746						rdataset->trust =
5747							dns_trust_additional;
5748				}
5749			}
5750		}
5751		result = dns_message_nextname(message, section);
5752		if (result == ISC_R_NOMORE)
5753			break;
5754		else if (result != ISC_R_SUCCESS)
5755			return (result);
5756	}
5757
5758	log_ns_ttl(fctx, "noanswer_response");
5759
5760	if (ns_rdataset != NULL && dns_name_equal(&fctx->domain, ns_name) &&
5761	    !dns_name_equal(ns_name, dns_rootname))
5762		trim_ns_ttl(fctx, ns_name, ns_rdataset);
5763
5764	/*
5765	 * A negative response has a SOA record (Type 2)
5766	 * and a optional NS RRset (Type 1) or it has neither
5767	 * a SOA or a NS RRset (Type 3, handled above) or
5768	 * rcode is NXDOMAIN (handled above) in which case
5769	 * the NS RRset is allowed (Type 4).
5770	 */
5771	if (soa_name != NULL)
5772		negative_response = ISC_TRUE;
5773
5774	result = dns_message_firstname(message, section);
5775	while (result == ISC_R_SUCCESS) {
5776		name = NULL;
5777		dns_message_currentname(message, section, &name);
5778		if (dns_name_issubdomain(name, &fctx->domain)) {
5779			for (rdataset = ISC_LIST_HEAD(name->list);
5780			     rdataset != NULL;
5781			     rdataset = ISC_LIST_NEXT(rdataset, link)) {
5782				type = rdataset->type;
5783				if (type == dns_rdatatype_rrsig)
5784					type = rdataset->covers;
5785				if (type == dns_rdatatype_nsec ||
5786				    type == dns_rdatatype_nsec3) {
5787					/*
5788					 * NSEC or RRSIG NSEC.
5789					 */
5790					if (negative_response) {
5791						name->attributes |=
5792							DNS_NAMEATTR_NCACHE;
5793						rdataset->attributes |=
5794							DNS_RDATASETATTR_NCACHE;
5795					} else if (type == dns_rdatatype_nsec) {
5796						name->attributes |=
5797							DNS_NAMEATTR_CACHE;
5798						rdataset->attributes |=
5799							DNS_RDATASETATTR_CACHE;
5800					}
5801					if (aa)
5802						rdataset->trust =
5803						    dns_trust_authauthority;
5804					else if (ISFORWARDER(fctx->addrinfo))
5805						rdataset->trust =
5806							dns_trust_answer;
5807					else
5808						rdataset->trust =
5809							dns_trust_additional;
5810					/*
5811					 * No additional data needs to be
5812					 * marked.
5813					 */
5814				} else if (type == dns_rdatatype_ds) {
5815					/*
5816					 * DS or SIG DS.
5817					 *
5818					 * These should only be here if
5819					 * this is a referral, and there
5820					 * should only be one DS RRset.
5821					 */
5822					if (ns_name == NULL) {
5823						log_formerr(fctx,
5824							    "DS with no "
5825							    "referral");
5826						return (DNS_R_FORMERR);
5827					}
5828					if (rdataset->type ==
5829					    dns_rdatatype_ds) {
5830						if (ds_name != NULL &&
5831						    name != ds_name) {
5832							log_formerr(fctx,
5833								"DS doesn't "
5834								"match "
5835								"referral "
5836								"(NS)");
5837							return (DNS_R_FORMERR);
5838						}
5839						ds_name = name;
5840					}
5841					name->attributes |=
5842						DNS_NAMEATTR_CACHE;
5843					rdataset->attributes |=
5844						DNS_RDATASETATTR_CACHE;
5845					if (aa)
5846						rdataset->trust =
5847						    dns_trust_authauthority;
5848					else if (ISFORWARDER(fctx->addrinfo))
5849						rdataset->trust =
5850							dns_trust_answer;
5851					else
5852						rdataset->trust =
5853							dns_trust_additional;
5854				}
5855			}
5856		} else {
5857			save_name = name;
5858			save_type = ISC_LIST_HEAD(name->list)->type;
5859		}
5860		result = dns_message_nextname(message, section);
5861		if (result == ISC_R_NOMORE)
5862			break;
5863		else if (result != ISC_R_SUCCESS)
5864			return (result);
5865	}
5866
5867	/*
5868	 * Trigger lookups for DNS nameservers.
5869	 */
5870	if (negative_response && message->rcode == dns_rcode_noerror &&
5871	    fctx->type == dns_rdatatype_ds && soa_name != NULL &&
5872	    dns_name_equal(soa_name, qname) &&
5873	    !dns_name_equal(qname, dns_rootname))
5874		return (DNS_R_CHASEDSSERVERS);
5875
5876	/*
5877	 * Did we find anything?
5878	 */
5879	if (!negative_response && ns_name == NULL) {
5880		/*
5881		 * Nope.
5882		 */
5883		if (oqname != NULL) {
5884			/*
5885			 * We've already got a partial CNAME/DNAME chain,
5886			 * and haven't found else anything useful here, but
5887			 * no error has occurred since we have an answer.
5888			 */
5889			return (ISC_R_SUCCESS);
5890		} else {
5891			/*
5892			 * The responder is insane.
5893			 */
5894			if (save_name == NULL) {
5895				log_formerr(fctx, "invalid response");
5896				return (DNS_R_FORMERR);
5897			}
5898			if (!dns_name_issubdomain(save_name, &fctx->domain)) {
5899				char nbuf[DNS_NAME_FORMATSIZE];
5900				char dbuf[DNS_NAME_FORMATSIZE];
5901				char tbuf[DNS_RDATATYPE_FORMATSIZE];
5902
5903				dns_rdatatype_format(save_type, tbuf,
5904					sizeof(tbuf));
5905				dns_name_format(save_name, nbuf, sizeof(nbuf));
5906				dns_name_format(&fctx->domain, dbuf,
5907					sizeof(dbuf));
5908
5909				log_formerr(fctx, "Name %s (%s) not subdomain"
5910					" of zone %s -- invalid response",
5911					nbuf, tbuf, dbuf);
5912			} else {
5913				log_formerr(fctx, "invalid response");
5914			}
5915			return (DNS_R_FORMERR);
5916		}
5917	}
5918
5919	/*
5920	 * If we found both NS and SOA, they should be the same name.
5921	 */
5922	if (ns_name != NULL && soa_name != NULL && ns_name != soa_name) {
5923		log_formerr(fctx, "NS/SOA mismatch");
5924		return (DNS_R_FORMERR);
5925	}
5926
5927	/*
5928	 * Do we have a referral?  (We only want to follow a referral if
5929	 * we're not following a chain.)
5930	 */
5931	if (!negative_response && ns_name != NULL && oqname == NULL) {
5932		/*
5933		 * We already know ns_name is a subdomain of fctx->domain.
5934		 * If ns_name is equal to fctx->domain, we're not making
5935		 * progress.  We return DNS_R_FORMERR so that we'll keep
5936		 * trying other servers.
5937		 */
5938		if (dns_name_equal(ns_name, &fctx->domain)) {
5939			log_formerr(fctx, "non-improving referral");
5940			return (DNS_R_FORMERR);
5941		}
5942
5943		/*
5944		 * If the referral name is not a parent of the query
5945		 * name, consider the responder insane.
5946		 */
5947		if (! dns_name_issubdomain(&fctx->name, ns_name)) {
5948			/* Logged twice */
5949			log_formerr(fctx, "referral to non-parent");
5950			FCTXTRACE("referral to non-parent");
5951			return (DNS_R_FORMERR);
5952		}
5953
5954		/*
5955		 * Mark any additional data related to this rdataset.
5956		 * It's important that we do this before we change the
5957		 * query domain.
5958		 */
5959		INSIST(ns_rdataset != NULL);
5960		fctx->attributes |= FCTX_ATTR_GLUING;
5961		(void)dns_rdataset_additionaldata(ns_rdataset, check_related,
5962						  fctx);
5963#if CHECK_FOR_GLUE_IN_ANSWER
5964		/*
5965		 * Look in the answer section for "glue" that is incorrectly
5966		 * returned as a answer.  This is needed if the server also
5967		 * minimizes the response size by not adding records to the
5968		 * additional section that are in the answer section or if
5969		 * the record gets dropped due to message size constraints.
5970		 */
5971		if ((look_in_options & LOOK_FOR_GLUE_IN_ANSWER) != 0 &&
5972		    (fctx->type == dns_rdatatype_aaaa ||
5973		     fctx->type == dns_rdatatype_a))
5974			(void)dns_rdataset_additionaldata(ns_rdataset,
5975							  check_answer, fctx);
5976#endif
5977		fctx->attributes &= ~FCTX_ATTR_GLUING;
5978		/*
5979		 * NS rdatasets with 0 TTL cause problems.
5980		 * dns_view_findzonecut() will not find them when we
5981		 * try to follow the referral, and we'll SERVFAIL
5982		 * because the best nameservers are now above QDOMAIN.
5983		 * We force the TTL to 1 second to prevent this.
5984		 */
5985		if (ns_rdataset->ttl == 0)
5986			ns_rdataset->ttl = 1;
5987		/*
5988		 * Set the current query domain to the referral name.
5989		 *
5990		 * XXXRTH  We should check if we're in forward-only mode, and
5991		 *		if so we should bail out.
5992		 */
5993		INSIST(dns_name_countlabels(&fctx->domain) > 0);
5994		dns_name_free(&fctx->domain, fctx->mctx);
5995		if (dns_rdataset_isassociated(&fctx->nameservers))
5996			dns_rdataset_disassociate(&fctx->nameservers);
5997		dns_name_init(&fctx->domain, NULL);
5998		result = dns_name_dup(ns_name, fctx->mctx, &fctx->domain);
5999		if (result != ISC_R_SUCCESS)
6000			return (result);
6001		fctx->attributes |= FCTX_ATTR_WANTCACHE;
6002		fctx->ns_ttl_ok = ISC_FALSE;
6003		log_ns_ttl(fctx, "DELEGATION");
6004		return (DNS_R_DELEGATION);
6005	}
6006
6007	/*
6008	 * Since we're not doing a referral, we don't want to cache any
6009	 * NS RRs we may have found.
6010	 */
6011	if (ns_name != NULL)
6012		ns_name->attributes &= ~DNS_NAMEATTR_CACHE;
6013
6014	if (negative_response && oqname == NULL)
6015		fctx->attributes |= FCTX_ATTR_WANTNCACHE;
6016
6017	return (ISC_R_SUCCESS);
6018}
6019
6020static isc_result_t
6021answer_response(fetchctx_t *fctx) {
6022	isc_result_t result;
6023	dns_message_t *message;
6024	dns_name_t *name, *dname = NULL, *qname, *dqname, tname, *ns_name;
6025	dns_name_t *cname = NULL;
6026	dns_rdataset_t *rdataset, *ns_rdataset;
6027	isc_boolean_t done, external, chaining, aa, found, want_chaining;
6028	isc_boolean_t have_answer, found_cname, found_dname, found_type;
6029	isc_boolean_t wanted_chaining;
6030	unsigned int aflag;
6031	dns_rdatatype_t type;
6032	dns_fixedname_t fdname, fqname, fqdname;
6033	dns_view_t *view;
6034
6035	FCTXTRACE("answer_response");
6036
6037	message = fctx->rmessage;
6038
6039	/*
6040	 * Examine the answer section, marking those rdatasets which are
6041	 * part of the answer and should be cached.
6042	 */
6043
6044	done = ISC_FALSE;
6045	found_cname = ISC_FALSE;
6046	found_dname = ISC_FALSE;
6047	found_type = ISC_FALSE;
6048	chaining = ISC_FALSE;
6049	have_answer = ISC_FALSE;
6050	want_chaining = ISC_FALSE;
6051	POST(want_chaining);
6052	if ((message->flags & DNS_MESSAGEFLAG_AA) != 0)
6053		aa = ISC_TRUE;
6054	else
6055		aa = ISC_FALSE;
6056	dqname = qname = &fctx->name;
6057	type = fctx->type;
6058	view = fctx->res->view;
6059	dns_fixedname_init(&fqdname);
6060	result = dns_message_firstname(message, DNS_SECTION_ANSWER);
6061	while (!done && result == ISC_R_SUCCESS) {
6062		dns_namereln_t namereln, dnamereln;
6063		int order;
6064		unsigned int nlabels;
6065
6066		name = NULL;
6067		dns_message_currentname(message, DNS_SECTION_ANSWER, &name);
6068		external = ISC_TF(!dns_name_issubdomain(name, &fctx->domain));
6069		namereln = dns_name_fullcompare(qname, name, &order, &nlabels);
6070		dnamereln = dns_name_fullcompare(dqname, name, &order,
6071						 &nlabels);
6072		if (namereln == dns_namereln_equal) {
6073			wanted_chaining = ISC_FALSE;
6074			for (rdataset = ISC_LIST_HEAD(name->list);
6075			     rdataset != NULL;
6076			     rdataset = ISC_LIST_NEXT(rdataset, link)) {
6077				found = ISC_FALSE;
6078				want_chaining = ISC_FALSE;
6079				aflag = 0;
6080				if (rdataset->type == dns_rdatatype_nsec3) {
6081					/*
6082					 * NSEC3 records are not allowed to
6083					 * appear in the answer section.
6084					 */
6085					log_formerr(fctx, "NSEC3 in answer");
6086					return (DNS_R_FORMERR);
6087				}
6088
6089				/*
6090				 * Apply filters, if given, on answers to reject
6091				 * a malicious attempt of rebinding.
6092				 */
6093				if ((rdataset->type == dns_rdatatype_a ||
6094				     rdataset->type == dns_rdatatype_aaaa) &&
6095				    !is_answeraddress_allowed(view, name,
6096							      rdataset)) {
6097					return (DNS_R_SERVFAIL);
6098				}
6099
6100				if (rdataset->type == type && !found_cname) {
6101					/*
6102					 * We've found an ordinary answer.
6103					 */
6104					found = ISC_TRUE;
6105					found_type = ISC_TRUE;
6106					done = ISC_TRUE;
6107					aflag = DNS_RDATASETATTR_ANSWER;
6108				} else if (type == dns_rdatatype_any) {
6109					/*
6110					 * We've found an answer matching
6111					 * an ANY query.  There may be
6112					 * more.
6113					 */
6114					found = ISC_TRUE;
6115					aflag = DNS_RDATASETATTR_ANSWER;
6116				} else if (rdataset->type == dns_rdatatype_rrsig
6117					   && rdataset->covers == type
6118					   && !found_cname) {
6119					/*
6120					 * We've found a signature that
6121					 * covers the type we're looking for.
6122					 */
6123					found = ISC_TRUE;
6124					found_type = ISC_TRUE;
6125					aflag = DNS_RDATASETATTR_ANSWERSIG;
6126				} else if (rdataset->type ==
6127					   dns_rdatatype_cname
6128					   && !found_type) {
6129					/*
6130					 * We're looking for something else,
6131					 * but we found a CNAME.
6132					 *
6133					 * Getting a CNAME response for some
6134					 * query types is an error, see
6135					 * RFC 4035, Section 2.5.
6136					 */
6137					if (type == dns_rdatatype_rrsig ||
6138					    type == dns_rdatatype_key ||
6139					    type == dns_rdatatype_nsec) {
6140						char buf[DNS_RDATATYPE_FORMATSIZE];
6141						dns_rdatatype_format(fctx->type,
6142							      buf, sizeof(buf));
6143						log_formerr(fctx,
6144							    "CNAME response "
6145							    "for %s RR", buf);
6146						return (DNS_R_FORMERR);
6147					}
6148					found = ISC_TRUE;
6149					found_cname = ISC_TRUE;
6150					want_chaining = ISC_TRUE;
6151					aflag = DNS_RDATASETATTR_ANSWER;
6152					result = cname_target(rdataset,
6153							      &tname);
6154					if (result != ISC_R_SUCCESS)
6155						return (result);
6156					/* Apply filters on the target name. */
6157					if (!is_answertarget_allowed(view,
6158							name,
6159							rdataset->type,
6160							&tname,
6161							&fctx->domain)) {
6162						return (DNS_R_SERVFAIL);
6163					}
6164				} else if (rdataset->type == dns_rdatatype_rrsig
6165					   && rdataset->covers ==
6166					      dns_rdatatype_cname
6167					   && !found_type) {
6168					/*
6169					 * We're looking for something else,
6170					 * but we found a SIG CNAME.
6171					 */
6172					found = ISC_TRUE;
6173					found_cname = ISC_TRUE;
6174					aflag = DNS_RDATASETATTR_ANSWERSIG;
6175				}
6176
6177				if (found) {
6178					/*
6179					 * We've found an answer to our
6180					 * question.
6181					 */
6182					name->attributes |=
6183						DNS_NAMEATTR_CACHE;
6184					rdataset->attributes |=
6185						DNS_RDATASETATTR_CACHE;
6186					rdataset->trust = dns_trust_answer;
6187					if (!chaining) {
6188						/*
6189						 * This data is "the" answer
6190						 * to our question only if
6191						 * we're not chaining (i.e.
6192						 * if we haven't followed
6193						 * a CNAME or DNAME).
6194						 */
6195						INSIST(!external);
6196						if ((rdataset->type !=
6197						     dns_rdatatype_cname) ||
6198						    !found_dname ||
6199						    (aflag ==
6200						     DNS_RDATASETATTR_ANSWER))
6201						{
6202							have_answer = ISC_TRUE;
6203							if (rdataset->type ==
6204							    dns_rdatatype_cname)
6205								cname = name;
6206							name->attributes |=
6207							    DNS_NAMEATTR_ANSWER;
6208						}
6209						rdataset->attributes |= aflag;
6210						if (aa)
6211							rdataset->trust =
6212							  dns_trust_authanswer;
6213					} else if (external) {
6214						/*
6215						 * This data is outside of
6216						 * our query domain, and
6217						 * may not be cached.
6218						 */
6219						rdataset->attributes |=
6220						    DNS_RDATASETATTR_EXTERNAL;
6221					}
6222
6223					/*
6224					 * Mark any additional data related
6225					 * to this rdataset.
6226					 */
6227					(void)dns_rdataset_additionaldata(
6228							rdataset,
6229							check_related,
6230							fctx);
6231
6232					/*
6233					 * CNAME chaining.
6234					 */
6235					if (want_chaining) {
6236						wanted_chaining = ISC_TRUE;
6237						name->attributes |=
6238							DNS_NAMEATTR_CHAINING;
6239						rdataset->attributes |=
6240						    DNS_RDATASETATTR_CHAINING;
6241						qname = &tname;
6242					}
6243				}
6244				/*
6245				 * We could add an "else" clause here and
6246				 * log that we're ignoring this rdataset.
6247				 */
6248			}
6249			/*
6250			 * If wanted_chaining is true, we've done
6251			 * some chaining as the result of processing
6252			 * this node, and thus we need to set
6253			 * chaining to true.
6254			 *
6255			 * We don't set chaining inside of the
6256			 * rdataset loop because doing that would
6257			 * cause us to ignore the signatures of
6258			 * CNAMEs.
6259			 */
6260			if (wanted_chaining)
6261				chaining = ISC_TRUE;
6262		} else {
6263			dns_rdataset_t *dnameset = NULL;
6264
6265			/*
6266			 * Look for a DNAME (or its SIG).  Anything else is
6267			 * ignored.
6268			 */
6269			wanted_chaining = ISC_FALSE;
6270			for (rdataset = ISC_LIST_HEAD(name->list);
6271			     rdataset != NULL;
6272			     rdataset = ISC_LIST_NEXT(rdataset, link))
6273			{
6274				/*
6275				 * Only pass DNAME or RRSIG(DNAME).
6276				 */
6277				if (rdataset->type != dns_rdatatype_dname &&
6278				    (rdataset->type != dns_rdatatype_rrsig ||
6279				     rdataset->covers != dns_rdatatype_dname))
6280					continue;
6281
6282				/*
6283				 * If we're not chaining, then the DNAME and
6284				 * its signature should not be external.
6285				 */
6286				if (!chaining && external) {
6287					char qbuf[DNS_NAME_FORMATSIZE];
6288					char obuf[DNS_NAME_FORMATSIZE];
6289
6290					dns_name_format(name, qbuf,
6291							sizeof(qbuf));
6292					dns_name_format(&fctx->domain, obuf,
6293							sizeof(obuf));
6294					log_formerr(fctx, "external DNAME or "
6295						    "RRSIG covering DNAME "
6296						    "in answer: %s is "
6297						    "not in %s", qbuf, obuf);
6298					return (DNS_R_FORMERR);
6299				}
6300
6301				if (dnamereln != dns_namereln_subdomain) {
6302					char qbuf[DNS_NAME_FORMATSIZE];
6303					char obuf[DNS_NAME_FORMATSIZE];
6304
6305					dns_name_format(dqname, qbuf,
6306							sizeof(qbuf));
6307					dns_name_format(name, obuf,
6308							sizeof(obuf));
6309					log_formerr(fctx, "unrelated DNAME "
6310						    "in answer: %s is "
6311						    "not in %s", qbuf, obuf);
6312					return (DNS_R_FORMERR);
6313				}
6314
6315				aflag = 0;
6316				if (rdataset->type == dns_rdatatype_dname) {
6317					want_chaining = ISC_TRUE;
6318					POST(want_chaining);
6319					aflag = DNS_RDATASETATTR_ANSWER;
6320					result = dname_target(rdataset, dqname,
6321							      nlabels, &fdname);
6322					if (result == ISC_R_NOSPACE) {
6323						/*
6324						 * We can't construct the
6325						 * DNAME target.  Do not
6326						 * try to continue.
6327						 */
6328						want_chaining = ISC_FALSE;
6329						POST(want_chaining);
6330					} else if (result != ISC_R_SUCCESS)
6331						return (result);
6332					else
6333						dnameset = rdataset;
6334
6335					dname = dns_fixedname_name(&fdname);
6336					if (!is_answertarget_allowed(view,
6337						     dqname, rdataset->type,
6338						     dname, &fctx->domain))
6339					{
6340						return (DNS_R_SERVFAIL);
6341					}
6342					dqname = dns_fixedname_name(&fqdname);
6343					dns_name_copy(dname, dqname, NULL);
6344				} else {
6345					/*
6346					 * We've found a signature that
6347					 * covers the DNAME.
6348					 */
6349					aflag = DNS_RDATASETATTR_ANSWERSIG;
6350				}
6351
6352				/*
6353				 * We've found an answer to our
6354				 * question.
6355				 */
6356				name->attributes |= DNS_NAMEATTR_CACHE;
6357				rdataset->attributes |= DNS_RDATASETATTR_CACHE;
6358				rdataset->trust = dns_trust_answer;
6359				if (!chaining) {
6360					/*
6361					 * This data is "the" answer to
6362					 * our question only if we're
6363					 * not chaining.
6364					 */
6365					INSIST(!external);
6366					if (aflag == DNS_RDATASETATTR_ANSWER) {
6367						have_answer = ISC_TRUE;
6368						found_dname = ISC_TRUE;
6369						if (cname != NULL)
6370							cname->attributes &=
6371							   ~DNS_NAMEATTR_ANSWER;
6372						name->attributes |=
6373							DNS_NAMEATTR_ANSWER;
6374					}
6375					rdataset->attributes |= aflag;
6376					if (aa)
6377						rdataset->trust =
6378						  dns_trust_authanswer;
6379				} else if (external) {
6380					rdataset->attributes |=
6381					    DNS_RDATASETATTR_EXTERNAL;
6382				}
6383			}
6384
6385			/*
6386			 * DNAME chaining.
6387			 */
6388			if (dnameset != NULL) {
6389				/*
6390				 * Copy the dname into the qname fixed name.
6391				 *
6392				 * Although we check for failure of the copy
6393				 * operation, in practice it should never fail
6394				 * since we already know that the  result fits
6395				 * in a fixedname.
6396				 */
6397				dns_fixedname_init(&fqname);
6398				qname = dns_fixedname_name(&fqname);
6399				result = dns_name_copy(dname, qname, NULL);
6400				if (result != ISC_R_SUCCESS)
6401					return (result);
6402				wanted_chaining = ISC_TRUE;
6403				name->attributes |= DNS_NAMEATTR_CHAINING;
6404				dnameset->attributes |=
6405					    DNS_RDATASETATTR_CHAINING;
6406			}
6407			if (wanted_chaining)
6408				chaining = ISC_TRUE;
6409		}
6410		result = dns_message_nextname(message, DNS_SECTION_ANSWER);
6411	}
6412	if (result == ISC_R_NOMORE)
6413		result = ISC_R_SUCCESS;
6414	if (result != ISC_R_SUCCESS)
6415		return (result);
6416
6417	/*
6418	 * We should have found an answer.
6419	 */
6420	if (!have_answer) {
6421		log_formerr(fctx, "reply has no answer");
6422		return (DNS_R_FORMERR);
6423	}
6424
6425	/*
6426	 * This response is now potentially cacheable.
6427	 */
6428	fctx->attributes |= FCTX_ATTR_WANTCACHE;
6429
6430	/*
6431	 * Did chaining end before we got the final answer?
6432	 */
6433	if (chaining) {
6434		/*
6435		 * Yes.  This may be a negative reply, so hand off
6436		 * authority section processing to the noanswer code.
6437		 * If it isn't a noanswer response, no harm will be
6438		 * done.
6439		 */
6440		return (noanswer_response(fctx, qname, 0));
6441	}
6442
6443	/*
6444	 * We didn't end with an incomplete chain, so the rcode should be
6445	 * "no error".
6446	 */
6447	if (message->rcode != dns_rcode_noerror) {
6448		log_formerr(fctx, "CNAME/DNAME chain complete, but RCODE "
6449				  "indicates error");
6450		return (DNS_R_FORMERR);
6451	}
6452
6453	/*
6454	 * Examine the authority section (if there is one).
6455	 *
6456	 * We expect there to be only one owner name for all the rdatasets
6457	 * in this section, and we expect that it is not external.
6458	 */
6459	done = ISC_FALSE;
6460	ns_name = NULL;
6461	ns_rdataset = NULL;
6462	result = dns_message_firstname(message, DNS_SECTION_AUTHORITY);
6463	while (!done && result == ISC_R_SUCCESS) {
6464		name = NULL;
6465		dns_message_currentname(message, DNS_SECTION_AUTHORITY, &name);
6466		external = ISC_TF(!dns_name_issubdomain(name, &fctx->domain));
6467		if (!external) {
6468			/*
6469			 * We expect to find NS or SIG NS rdatasets, and
6470			 * nothing else.
6471			 */
6472			for (rdataset = ISC_LIST_HEAD(name->list);
6473			     rdataset != NULL;
6474			     rdataset = ISC_LIST_NEXT(rdataset, link)) {
6475				if (rdataset->type == dns_rdatatype_ns ||
6476				    (rdataset->type == dns_rdatatype_rrsig &&
6477				     rdataset->covers == dns_rdatatype_ns)) {
6478					name->attributes |=
6479						DNS_NAMEATTR_CACHE;
6480					rdataset->attributes |=
6481						DNS_RDATASETATTR_CACHE;
6482					if (aa && !chaining)
6483						rdataset->trust =
6484						    dns_trust_authauthority;
6485					else
6486						rdataset->trust =
6487						    dns_trust_additional;
6488
6489					if (rdataset->type == dns_rdatatype_ns) {
6490						ns_name = name;
6491						ns_rdataset = rdataset;
6492					}
6493					/*
6494					 * Mark any additional data related
6495					 * to this rdataset.
6496					 */
6497					(void)dns_rdataset_additionaldata(
6498							rdataset,
6499							check_related,
6500							fctx);
6501					done = ISC_TRUE;
6502				}
6503			}
6504		}
6505		result = dns_message_nextname(message, DNS_SECTION_AUTHORITY);
6506	}
6507	if (result == ISC_R_NOMORE)
6508		result = ISC_R_SUCCESS;
6509
6510	log_ns_ttl(fctx, "answer_response");
6511
6512	if (ns_rdataset != NULL && dns_name_equal(&fctx->domain, ns_name) &&
6513	    !dns_name_equal(ns_name, dns_rootname))
6514		trim_ns_ttl(fctx, ns_name, ns_rdataset);
6515
6516	return (result);
6517}
6518
6519static isc_boolean_t
6520fctx_decreference(fetchctx_t *fctx) {
6521	isc_boolean_t bucket_empty = ISC_FALSE;
6522
6523	INSIST(fctx->references > 0);
6524	fctx->references--;
6525	if (fctx->references == 0) {
6526		/*
6527		 * No one cares about the result of this fetch anymore.
6528		 */
6529		if (fctx->pending == 0 && fctx->nqueries == 0 &&
6530		    ISC_LIST_EMPTY(fctx->validators) && SHUTTINGDOWN(fctx)) {
6531			/*
6532			 * This fctx is already shutdown; we were just
6533			 * waiting for the last reference to go away.
6534			 */
6535			bucket_empty = fctx_unlink(fctx);
6536			fctx_destroy(fctx);
6537		} else {
6538			/*
6539			 * Initiate shutdown.
6540			 */
6541			fctx_shutdown(fctx);
6542		}
6543	}
6544	return (bucket_empty);
6545}
6546
6547static void
6548resume_dslookup(isc_task_t *task, isc_event_t *event) {
6549	dns_fetchevent_t *fevent;
6550	dns_resolver_t *res;
6551	fetchctx_t *fctx;
6552	isc_result_t result;
6553	isc_boolean_t bucket_empty;
6554	isc_boolean_t locked = ISC_FALSE;
6555	unsigned int bucketnum;
6556	dns_rdataset_t nameservers;
6557	dns_fixedname_t fixed;
6558	dns_name_t *domain;
6559
6560	REQUIRE(event->ev_type == DNS_EVENT_FETCHDONE);
6561	fevent = (dns_fetchevent_t *)event;
6562	fctx = event->ev_arg;
6563	REQUIRE(VALID_FCTX(fctx));
6564	res = fctx->res;
6565
6566	UNUSED(task);
6567	FCTXTRACE("resume_dslookup");
6568
6569	if (fevent->node != NULL)
6570		dns_db_detachnode(fevent->db, &fevent->node);
6571	if (fevent->db != NULL)
6572		dns_db_detach(&fevent->db);
6573
6574	dns_rdataset_init(&nameservers);
6575
6576	bucketnum = fctx->bucketnum;
6577	if (fevent->result == ISC_R_CANCELED) {
6578		dns_resolver_destroyfetch(&fctx->nsfetch);
6579		fctx_done(fctx, ISC_R_CANCELED, __LINE__);
6580	} else if (fevent->result == ISC_R_SUCCESS) {
6581
6582		FCTXTRACE("resuming DS lookup");
6583
6584		dns_resolver_destroyfetch(&fctx->nsfetch);
6585		if (dns_rdataset_isassociated(&fctx->nameservers))
6586			dns_rdataset_disassociate(&fctx->nameservers);
6587		dns_rdataset_clone(fevent->rdataset, &fctx->nameservers);
6588		fctx->ns_ttl = fctx->nameservers.ttl;
6589		fctx->ns_ttl_ok = ISC_TRUE;
6590		log_ns_ttl(fctx, "resume_dslookup");
6591		dns_name_free(&fctx->domain, fctx->mctx);
6592		dns_name_init(&fctx->domain, NULL);
6593		result = dns_name_dup(&fctx->nsname, fctx->mctx, &fctx->domain);
6594		if (result != ISC_R_SUCCESS) {
6595			fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
6596			goto cleanup;
6597		}
6598		/*
6599		 * Try again.
6600		 */
6601		fctx_try(fctx, ISC_TRUE, ISC_FALSE);
6602	} else {
6603		unsigned int n;
6604		dns_rdataset_t *nsrdataset = NULL;
6605
6606		/*
6607		 * Retrieve state from fctx->nsfetch before we destroy it.
6608		 */
6609		dns_fixedname_init(&fixed);
6610		domain = dns_fixedname_name(&fixed);
6611		dns_name_copy(&fctx->nsfetch->private->domain, domain, NULL);
6612		if (dns_name_equal(&fctx->nsname, domain)) {
6613			fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
6614			dns_resolver_destroyfetch(&fctx->nsfetch);
6615			goto cleanup;
6616		}
6617		if (dns_rdataset_isassociated(
6618		    &fctx->nsfetch->private->nameservers)) {
6619			dns_rdataset_clone(
6620			    &fctx->nsfetch->private->nameservers,
6621			    &nameservers);
6622			nsrdataset = &nameservers;
6623		} else
6624			domain = NULL;
6625		dns_resolver_destroyfetch(&fctx->nsfetch);
6626		n = dns_name_countlabels(&fctx->nsname);
6627		dns_name_getlabelsequence(&fctx->nsname, 1, n - 1,
6628					  &fctx->nsname);
6629
6630		if (dns_rdataset_isassociated(fevent->rdataset))
6631			dns_rdataset_disassociate(fevent->rdataset);
6632		FCTXTRACE("continuing to look for parent's NS records");
6633		result = dns_resolver_createfetch(fctx->res, &fctx->nsname,
6634						  dns_rdatatype_ns, domain,
6635						  nsrdataset, NULL, 0, task,
6636						  resume_dslookup, fctx,
6637						  &fctx->nsrrset, NULL,
6638						  &fctx->nsfetch);
6639		if (result != ISC_R_SUCCESS)
6640			fctx_done(fctx, result, __LINE__);
6641		else {
6642			LOCK(&res->buckets[bucketnum].lock);
6643			locked = ISC_TRUE;
6644			fctx->references++;
6645		}
6646	}
6647
6648 cleanup:
6649	if (dns_rdataset_isassociated(&nameservers))
6650		dns_rdataset_disassociate(&nameservers);
6651	if (dns_rdataset_isassociated(fevent->rdataset))
6652		dns_rdataset_disassociate(fevent->rdataset);
6653	INSIST(fevent->sigrdataset == NULL);
6654	isc_event_free(&event);
6655	if (!locked)
6656		LOCK(&res->buckets[bucketnum].lock);
6657	bucket_empty = fctx_decreference(fctx);
6658	UNLOCK(&res->buckets[bucketnum].lock);
6659	if (bucket_empty)
6660		empty_bucket(res);
6661}
6662
6663static inline void
6664checknamessection(dns_message_t *message, dns_section_t section) {
6665	isc_result_t result;
6666	dns_name_t *name;
6667	dns_rdata_t rdata = DNS_RDATA_INIT;
6668	dns_rdataset_t *rdataset;
6669
6670	for (result = dns_message_firstname(message, section);
6671	     result == ISC_R_SUCCESS;
6672	     result = dns_message_nextname(message, section))
6673	{
6674		name = NULL;
6675		dns_message_currentname(message, section, &name);
6676		for (rdataset = ISC_LIST_HEAD(name->list);
6677		     rdataset != NULL;
6678		     rdataset = ISC_LIST_NEXT(rdataset, link)) {
6679			for (result = dns_rdataset_first(rdataset);
6680			     result == ISC_R_SUCCESS;
6681			     result = dns_rdataset_next(rdataset)) {
6682				dns_rdataset_current(rdataset, &rdata);
6683				if (!dns_rdata_checkowner(name, rdata.rdclass,
6684							  rdata.type,
6685							  ISC_FALSE) ||
6686				    !dns_rdata_checknames(&rdata, name, NULL))
6687				{
6688					rdataset->attributes |=
6689						DNS_RDATASETATTR_CHECKNAMES;
6690				}
6691				dns_rdata_reset(&rdata);
6692			}
6693		}
6694	}
6695}
6696
6697static void
6698checknames(dns_message_t *message) {
6699
6700	checknamessection(message, DNS_SECTION_ANSWER);
6701	checknamessection(message, DNS_SECTION_AUTHORITY);
6702	checknamessection(message, DNS_SECTION_ADDITIONAL);
6703}
6704
6705/*
6706 * Log server NSID at log level 'level'
6707 */
6708static void
6709log_nsid(isc_buffer_t *opt, size_t nsid_len, resquery_t *query,
6710	 int level, isc_mem_t *mctx)
6711{
6712	static const char hex[17] = "0123456789abcdef";
6713	char addrbuf[ISC_SOCKADDR_FORMATSIZE];
6714	isc_uint16_t buflen, i;
6715	unsigned char *p, *buf, *nsid;
6716
6717	/* Allocate buffer for storing hex version of the NSID */
6718	buflen = (isc_uint16_t)nsid_len * 2 + 1;
6719	buf = isc_mem_get(mctx, buflen);
6720	if (buf == NULL)
6721		return;
6722
6723	/* Convert to hex */
6724	p = buf;
6725	nsid = isc_buffer_current(opt);
6726	for (i = 0; i < nsid_len; i++) {
6727		*p++ = hex[(nsid[0] >> 4) & 0xf];
6728		*p++ = hex[nsid[0] & 0xf];
6729		nsid++;
6730	}
6731	*p = '\0';
6732
6733	isc_sockaddr_format(&query->addrinfo->sockaddr, addrbuf,
6734			    sizeof(addrbuf));
6735	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
6736		      DNS_LOGMODULE_RESOLVER, level,
6737		      "received NSID '%s' from %s", buf, addrbuf);
6738
6739	/* Clean up */
6740	isc_mem_put(mctx, buf, buflen);
6741	return;
6742}
6743
6744static void
6745log_packet(dns_message_t *message, int level, isc_mem_t *mctx) {
6746	isc_buffer_t buffer;
6747	char *buf = NULL;
6748	int len = 1024;
6749	isc_result_t result;
6750
6751	if (! isc_log_wouldlog(dns_lctx, level))
6752		return;
6753
6754	/*
6755	 * Note that these are multiline debug messages.  We want a newline
6756	 * to appear in the log after each message.
6757	 */
6758
6759	do {
6760		buf = isc_mem_get(mctx, len);
6761		if (buf == NULL)
6762			break;
6763		isc_buffer_init(&buffer, buf, len);
6764		result = dns_message_totext(message, &dns_master_style_debug,
6765					    0, &buffer);
6766		if (result == ISC_R_NOSPACE) {
6767			isc_mem_put(mctx, buf, len);
6768			len += 1024;
6769		} else if (result == ISC_R_SUCCESS)
6770			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
6771				      DNS_LOGMODULE_RESOLVER, level,
6772				      "received packet:\n%.*s",
6773				      (int)isc_buffer_usedlength(&buffer),
6774				      buf);
6775	} while (result == ISC_R_NOSPACE);
6776
6777	if (buf != NULL)
6778		isc_mem_put(mctx, buf, len);
6779}
6780
6781static isc_boolean_t
6782iscname(fetchctx_t *fctx) {
6783	isc_result_t result;
6784
6785	result = dns_message_findname(fctx->rmessage, DNS_SECTION_ANSWER,
6786				      &fctx->name, dns_rdatatype_cname, 0,
6787				      NULL, NULL);
6788	return (result == ISC_R_SUCCESS ? ISC_TRUE : ISC_FALSE);
6789}
6790
6791static isc_boolean_t
6792betterreferral(fetchctx_t *fctx) {
6793	isc_result_t result;
6794	dns_name_t *name;
6795	dns_rdataset_t *rdataset;
6796	dns_message_t *message = fctx->rmessage;
6797
6798	for (result = dns_message_firstname(message, DNS_SECTION_AUTHORITY);
6799	     result == ISC_R_SUCCESS;
6800	     result = dns_message_nextname(message, DNS_SECTION_AUTHORITY)) {
6801		name = NULL;
6802		dns_message_currentname(message, DNS_SECTION_AUTHORITY, &name);
6803		if (!isstrictsubdomain(name, &fctx->domain))
6804			continue;
6805		for (rdataset = ISC_LIST_HEAD(name->list);
6806		     rdataset != NULL;
6807		     rdataset = ISC_LIST_NEXT(rdataset, link))
6808			if (rdataset->type == dns_rdatatype_ns)
6809				return (ISC_TRUE);
6810	}
6811	return (ISC_FALSE);
6812}
6813
6814static void
6815process_opt(resquery_t *query, dns_rdataset_t *opt) {
6816	dns_rdata_t rdata;
6817	isc_buffer_t optbuf;
6818	isc_result_t result;
6819	isc_uint16_t optcode;
6820	isc_uint16_t optlen;
6821
6822	result = dns_rdataset_first(opt);
6823	if (result == ISC_R_SUCCESS) {
6824		dns_rdata_init(&rdata);
6825		dns_rdataset_current(opt, &rdata);
6826		isc_buffer_init(&optbuf, rdata.data, rdata.length);
6827		isc_buffer_add(&optbuf, rdata.length);
6828		while (isc_buffer_remaininglength(&optbuf) >= 4) {
6829			optcode = isc_buffer_getuint16(&optbuf);
6830			optlen = isc_buffer_getuint16(&optbuf);
6831			INSIST(optlen <= isc_buffer_remaininglength(&optbuf));
6832			switch (optcode) {
6833			case DNS_OPT_NSID:
6834				if (query->options & DNS_FETCHOPT_WANTNSID)
6835					log_nsid(&optbuf, optlen, query,
6836						 ISC_LOG_INFO,
6837						 query->fctx->res->mctx);
6838				isc_buffer_forward(&optbuf, optlen);
6839				break;
6840			default:
6841				isc_buffer_forward(&optbuf, optlen);
6842				break;
6843			}
6844		}
6845		INSIST(isc_buffer_remaininglength(&optbuf) == 0U);
6846	}
6847}
6848
6849static void
6850resquery_response(isc_task_t *task, isc_event_t *event) {
6851	isc_result_t result = ISC_R_SUCCESS;
6852	resquery_t *query = event->ev_arg;
6853	dns_dispatchevent_t *devent = (dns_dispatchevent_t *)event;
6854	isc_boolean_t keep_trying, get_nameservers, resend;
6855	isc_boolean_t truncated;
6856	dns_message_t *message;
6857	dns_rdataset_t *opt;
6858	fetchctx_t *fctx;
6859	dns_name_t *fname;
6860	dns_fixedname_t foundname;
6861	isc_stdtime_t now;
6862	isc_time_t tnow, *finish;
6863	dns_adbaddrinfo_t *addrinfo;
6864	unsigned int options;
6865	unsigned int findoptions;
6866	isc_result_t broken_server;
6867	badnstype_t broken_type = badns_response;
6868	isc_boolean_t no_response;
6869
6870	REQUIRE(VALID_QUERY(query));
6871	fctx = query->fctx;
6872	options = query->options;
6873	REQUIRE(VALID_FCTX(fctx));
6874	REQUIRE(event->ev_type == DNS_EVENT_DISPATCH);
6875
6876	QTRACE("response");
6877
6878	if (isc_sockaddr_pf(&query->addrinfo->sockaddr) == PF_INET)
6879		inc_stats(fctx->res, dns_resstatscounter_responsev4);
6880	else
6881		inc_stats(fctx->res, dns_resstatscounter_responsev6);
6882
6883	(void)isc_timer_touch(fctx->timer);
6884
6885	keep_trying = ISC_FALSE;
6886	broken_server = ISC_R_SUCCESS;
6887	get_nameservers = ISC_FALSE;
6888	resend = ISC_FALSE;
6889	truncated = ISC_FALSE;
6890	finish = NULL;
6891	no_response = ISC_FALSE;
6892
6893	if (fctx->res->exiting) {
6894		result = ISC_R_SHUTTINGDOWN;
6895		goto done;
6896	}
6897
6898	fctx->timeouts = 0;
6899	fctx->timeout = ISC_FALSE;
6900	fctx->addrinfo = query->addrinfo;
6901
6902	/*
6903	 * XXXRTH  We should really get the current time just once.  We
6904	 *		need a routine to convert from an isc_time_t to an
6905	 *		isc_stdtime_t.
6906	 */
6907	TIME_NOW(&tnow);
6908	finish = &tnow;
6909	isc_stdtime_get(&now);
6910
6911	/*
6912	 * Did the dispatcher have a problem?
6913	 */
6914	if (devent->result != ISC_R_SUCCESS) {
6915		if (devent->result == ISC_R_EOF &&
6916		    (query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
6917			/*
6918			 * The problem might be that they
6919			 * don't understand EDNS0.  Turn it
6920			 * off and try again.
6921			 */
6922			options |= DNS_FETCHOPT_NOEDNS0;
6923			resend = ISC_TRUE;
6924			add_bad_edns(fctx, &query->addrinfo->sockaddr);
6925		} else {
6926			/*
6927			 * There's no hope for this query.
6928			 */
6929			keep_trying = ISC_TRUE;
6930
6931			/*
6932			 * If this is a network error on an exclusive query
6933			 * socket, mark the server as bad so that we won't try
6934			 * it for this fetch again.  Also adjust finish and
6935			 * no_response so that we penalize this address in SRTT
6936			 * adjustment later.
6937			 */
6938			if (query->exclusivesocket &&
6939			    (devent->result == ISC_R_HOSTUNREACH ||
6940			     devent->result == ISC_R_NETUNREACH ||
6941			     devent->result == ISC_R_CONNREFUSED ||
6942			     devent->result == ISC_R_CANCELED)) {
6943				    broken_server = devent->result;
6944				    broken_type = badns_unreachable;
6945				    finish = NULL;
6946				    no_response = ISC_TRUE;
6947			}
6948		}
6949		goto done;
6950	}
6951
6952	message = fctx->rmessage;
6953
6954	if (query->tsig != NULL) {
6955		result = dns_message_setquerytsig(message, query->tsig);
6956		if (result != ISC_R_SUCCESS)
6957			goto done;
6958	}
6959
6960	if (query->tsigkey) {
6961		result = dns_message_settsigkey(message, query->tsigkey);
6962		if (result != ISC_R_SUCCESS)
6963			goto done;
6964	}
6965
6966	dns_message_setclass(message, fctx->res->rdclass);
6967
6968	result = dns_message_parse(message, &devent->buffer, 0);
6969	if (result != ISC_R_SUCCESS) {
6970		switch (result) {
6971		case ISC_R_UNEXPECTEDEND:
6972			if (!message->question_ok ||
6973			    (message->flags & DNS_MESSAGEFLAG_TC) == 0 ||
6974			    (options & DNS_FETCHOPT_TCP) != 0) {
6975				/*
6976				 * Either the message ended prematurely,
6977				 * and/or wasn't marked as being truncated,
6978				 * and/or this is a response to a query we
6979				 * sent over TCP.  In all of these cases,
6980				 * something is wrong with the remote
6981				 * server and we don't want to retry using
6982				 * TCP.
6983				 */
6984				if ((query->options & DNS_FETCHOPT_NOEDNS0)
6985				    == 0) {
6986					/*
6987					 * The problem might be that they
6988					 * don't understand EDNS0.  Turn it
6989					 * off and try again.
6990					 */
6991					options |= DNS_FETCHOPT_NOEDNS0;
6992					resend = ISC_TRUE;
6993					add_bad_edns(fctx,
6994						    &query->addrinfo->sockaddr);
6995					inc_stats(fctx->res,
6996						 dns_resstatscounter_edns0fail);
6997				} else {
6998					broken_server = result;
6999					keep_trying = ISC_TRUE;
7000				}
7001				goto done;
7002			}
7003			/*
7004			 * We defer retrying via TCP for a bit so we can
7005			 * check out this message further.
7006			 */
7007			truncated = ISC_TRUE;
7008			break;
7009		case DNS_R_FORMERR:
7010			if ((query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
7011				/*
7012				 * The problem might be that they
7013				 * don't understand EDNS0.  Turn it
7014				 * off and try again.
7015				 */
7016				options |= DNS_FETCHOPT_NOEDNS0;
7017				resend = ISC_TRUE;
7018				add_bad_edns(fctx, &query->addrinfo->sockaddr);
7019				inc_stats(fctx->res,
7020						 dns_resstatscounter_edns0fail);
7021			} else {
7022				broken_server = DNS_R_UNEXPECTEDRCODE;
7023				keep_trying = ISC_TRUE;
7024			}
7025			goto done;
7026		default:
7027			/*
7028			 * Something bad has happened.
7029			 */
7030			goto done;
7031		}
7032	}
7033
7034
7035	/*
7036	 * Log the incoming packet.
7037	 */
7038	log_packet(message, ISC_LOG_DEBUG(10), fctx->res->mctx);
7039
7040	if (message->rdclass != fctx->res->rdclass) {
7041		resend = ISC_TRUE;
7042		FCTXTRACE("bad class");
7043		goto done;
7044	}
7045
7046	/*
7047	 * Process receive opt record.
7048	 */
7049	opt = dns_message_getopt(message);
7050	if (opt != NULL)
7051		process_opt(query, opt);
7052
7053	/*
7054	 * If the message is signed, check the signature.  If not, this
7055	 * returns success anyway.
7056	 */
7057	result = dns_message_checksig(message, fctx->res->view);
7058	if (result != ISC_R_SUCCESS)
7059		goto done;
7060
7061	/*
7062	 * The dispatcher should ensure we only get responses with QR set.
7063	 */
7064	INSIST((message->flags & DNS_MESSAGEFLAG_QR) != 0);
7065	/*
7066	 * INSIST() that the message comes from the place we sent it to,
7067	 * since the dispatch code should ensure this.
7068	 *
7069	 * INSIST() that the message id is correct (this should also be
7070	 * ensured by the dispatch code).
7071	 */
7072
7073	/*
7074	 * We have an affirmative response to the query and we have
7075	 * previously got a response from this server which indicated
7076	 * EDNS may not be supported so we can now cache the lack of
7077	 * EDNS support.
7078	 */
7079	if (opt == NULL &&
7080	    (message->rcode == dns_rcode_noerror ||
7081	     message->rcode == dns_rcode_nxdomain ||
7082	     message->rcode == dns_rcode_refused ||
7083	     message->rcode == dns_rcode_yxdomain) &&
7084	     bad_edns(fctx, &query->addrinfo->sockaddr)) {
7085		char addrbuf[ISC_SOCKADDR_FORMATSIZE];
7086		isc_sockaddr_format(&query->addrinfo->sockaddr, addrbuf,
7087				    sizeof(addrbuf));
7088		dns_adb_changeflags(fctx->adb, query->addrinfo,
7089				    DNS_FETCHOPT_NOEDNS0,
7090				    DNS_FETCHOPT_NOEDNS0);
7091	}
7092
7093	/*
7094	 * Deal with truncated responses by retrying using TCP.
7095	 */
7096	if ((message->flags & DNS_MESSAGEFLAG_TC) != 0)
7097		truncated = ISC_TRUE;
7098
7099	if (truncated) {
7100		inc_stats(fctx->res, dns_resstatscounter_truncated);
7101		if ((options & DNS_FETCHOPT_TCP) != 0) {
7102			broken_server = DNS_R_TRUNCATEDTCP;
7103			keep_trying = ISC_TRUE;
7104		} else {
7105			options |= DNS_FETCHOPT_TCP;
7106			resend = ISC_TRUE;
7107		}
7108		goto done;
7109	}
7110
7111	/*
7112	 * Is it a query response?
7113	 */
7114	if (message->opcode != dns_opcode_query) {
7115		/* XXXRTH Log */
7116		broken_server = DNS_R_UNEXPECTEDOPCODE;
7117		keep_trying = ISC_TRUE;
7118		goto done;
7119	}
7120
7121	/*
7122	 * Update statistics about erroneous responses.
7123	 */
7124	if (message->rcode != dns_rcode_noerror) {
7125		switch (message->rcode) {
7126		case dns_rcode_nxdomain:
7127			inc_stats(fctx->res, dns_resstatscounter_nxdomain);
7128			break;
7129		case dns_rcode_servfail:
7130			inc_stats(fctx->res, dns_resstatscounter_servfail);
7131			break;
7132		case dns_rcode_formerr:
7133			inc_stats(fctx->res, dns_resstatscounter_formerr);
7134			break;
7135		default:
7136			inc_stats(fctx->res, dns_resstatscounter_othererror);
7137			break;
7138		}
7139	}
7140
7141	/*
7142	 * Is the remote server broken, or does it dislike us?
7143	 */
7144	if (message->rcode != dns_rcode_noerror &&
7145	    message->rcode != dns_rcode_nxdomain) {
7146		if (((message->rcode == dns_rcode_formerr ||
7147		      message->rcode == dns_rcode_notimp) ||
7148		     (message->rcode == dns_rcode_servfail &&
7149		      dns_message_getopt(message) == NULL)) &&
7150		    (query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
7151			/*
7152			 * It's very likely they don't like EDNS0.
7153			 * If the response code is SERVFAIL, also check if the
7154			 * response contains an OPT RR and don't cache the
7155			 * failure since it can be returned for various other
7156			 * reasons.
7157			 *
7158			 * XXXRTH  We should check if the question
7159			 *		we're asking requires EDNS0, and
7160			 *		if so, we should bail out.
7161			 */
7162			options |= DNS_FETCHOPT_NOEDNS0;
7163			resend = ISC_TRUE;
7164			/*
7165			 * Remember that they may not like EDNS0.
7166			 */
7167			add_bad_edns(fctx, &query->addrinfo->sockaddr);
7168			inc_stats(fctx->res, dns_resstatscounter_edns0fail);
7169		} else if (message->rcode == dns_rcode_formerr) {
7170			if (ISFORWARDER(query->addrinfo)) {
7171				/*
7172				 * This forwarder doesn't understand us,
7173				 * but other forwarders might.  Keep trying.
7174				 */
7175				broken_server = DNS_R_REMOTEFORMERR;
7176				keep_trying = ISC_TRUE;
7177			} else {
7178				/*
7179				 * The server doesn't understand us.  Since
7180				 * all servers for a zone need similar
7181				 * capabilities, we assume that we will get
7182				 * FORMERR from all servers, and thus we
7183				 * cannot make any more progress with this
7184				 * fetch.
7185				 */
7186				log_formerr(fctx, "server sent FORMERR");
7187				result = DNS_R_FORMERR;
7188			}
7189		} else if (message->rcode == dns_rcode_yxdomain) {
7190			/*
7191			 * DNAME mapping failed because the new name
7192			 * was too long.  There's no chance of success
7193			 * for this fetch.
7194			 */
7195			result = DNS_R_YXDOMAIN;
7196		} else if (message->rcode == dns_rcode_badvers) {
7197			unsigned int flags, mask;
7198			unsigned int version;
7199
7200			resend = ISC_TRUE;
7201			INSIST(opt != NULL);
7202			version = (opt->ttl >> 16) & 0xff;
7203			flags = (version << DNS_FETCHOPT_EDNSVERSIONSHIFT) |
7204				DNS_FETCHOPT_EDNSVERSIONSET;
7205			mask = DNS_FETCHOPT_EDNSVERSIONMASK |
7206			       DNS_FETCHOPT_EDNSVERSIONSET;
7207			switch (version) {
7208			case 0:
7209				dns_adb_changeflags(fctx->adb, query->addrinfo,
7210						    flags, mask);
7211				break;
7212			default:
7213				broken_server = DNS_R_BADVERS;
7214				keep_trying = ISC_TRUE;
7215				break;
7216			}
7217		} else {
7218			/*
7219			 * XXXRTH log.
7220			 */
7221			broken_server = DNS_R_UNEXPECTEDRCODE;
7222			INSIST(broken_server != ISC_R_SUCCESS);
7223			keep_trying = ISC_TRUE;
7224		}
7225		goto done;
7226	}
7227
7228	/*
7229	 * Is the question the same as the one we asked?
7230	 */
7231	result = same_question(fctx);
7232	if (result != ISC_R_SUCCESS) {
7233		/* XXXRTH Log */
7234		if (result == DNS_R_FORMERR)
7235			keep_trying = ISC_TRUE;
7236		goto done;
7237	}
7238
7239	/*
7240	 * Is the server lame?
7241	 */
7242	if (fctx->res->lame_ttl != 0 && !ISFORWARDER(query->addrinfo) &&
7243	    is_lame(fctx)) {
7244		inc_stats(fctx->res, dns_resstatscounter_lame);
7245		log_lame(fctx, query->addrinfo);
7246		result = dns_adb_marklame(fctx->adb, query->addrinfo,
7247					  &fctx->name, fctx->type,
7248					  now + fctx->res->lame_ttl);
7249		if (result != ISC_R_SUCCESS)
7250			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
7251				      DNS_LOGMODULE_RESOLVER, ISC_LOG_ERROR,
7252				      "could not mark server as lame: %s",
7253				      isc_result_totext(result));
7254		broken_server = DNS_R_LAME;
7255		keep_trying = ISC_TRUE;
7256		goto done;
7257	}
7258
7259	/*
7260	 * Enforce delegations only zones like NET and COM.
7261	 */
7262	if (!ISFORWARDER(query->addrinfo) &&
7263	    dns_view_isdelegationonly(fctx->res->view, &fctx->domain) &&
7264	    !dns_name_equal(&fctx->domain, &fctx->name) &&
7265	    fix_mustbedelegationornxdomain(message, fctx)) {
7266		char namebuf[DNS_NAME_FORMATSIZE];
7267		char domainbuf[DNS_NAME_FORMATSIZE];
7268		char addrbuf[ISC_SOCKADDR_FORMATSIZE];
7269		char classbuf[64];
7270		char typebuf[64];
7271
7272		dns_name_format(&fctx->name, namebuf, sizeof(namebuf));
7273		dns_name_format(&fctx->domain, domainbuf, sizeof(domainbuf));
7274		dns_rdatatype_format(fctx->type, typebuf, sizeof(typebuf));
7275		dns_rdataclass_format(fctx->res->rdclass, classbuf,
7276				      sizeof(classbuf));
7277		isc_sockaddr_format(&query->addrinfo->sockaddr, addrbuf,
7278				    sizeof(addrbuf));
7279
7280		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DELEGATION_ONLY,
7281			     DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
7282			     "enforced delegation-only for '%s' (%s/%s/%s) "
7283			     "from %s",
7284			     domainbuf, namebuf, typebuf, classbuf, addrbuf);
7285	}
7286
7287	if ((fctx->res->options & DNS_RESOLVER_CHECKNAMES) != 0)
7288		checknames(message);
7289
7290	/*
7291	 * Clear cache bits.
7292	 */
7293	fctx->attributes &= ~(FCTX_ATTR_WANTNCACHE | FCTX_ATTR_WANTCACHE);
7294
7295	/*
7296	 * Did we get any answers?
7297	 */
7298	if (message->counts[DNS_SECTION_ANSWER] > 0 &&
7299	    (message->rcode == dns_rcode_noerror ||
7300	     message->rcode == dns_rcode_nxdomain)) {
7301		/*
7302		 * [normal case]
7303		 * We've got answers.  If it has an authoritative answer or an
7304		 * answer from a forwarder, we're done.
7305		 */
7306		if ((message->flags & DNS_MESSAGEFLAG_AA) != 0 ||
7307		    ISFORWARDER(query->addrinfo))
7308			result = answer_response(fctx);
7309		else if (iscname(fctx) &&
7310			 fctx->type != dns_rdatatype_any &&
7311			 fctx->type != dns_rdatatype_cname) {
7312			/*
7313			 * A BIND8 server could return a non-authoritative
7314			 * answer when a CNAME is followed.  We should treat
7315			 * it as a valid answer.
7316			 */
7317			result = answer_response(fctx);
7318		} else if (fctx->type != dns_rdatatype_ns &&
7319			   !betterreferral(fctx)) {
7320			/*
7321			 * Lame response !!!.
7322			 */
7323			result = answer_response(fctx);
7324		} else {
7325			if (fctx->type == dns_rdatatype_ns) {
7326				/*
7327				 * A BIND 8 server could incorrectly return a
7328				 * non-authoritative answer to an NS query
7329				 * instead of a referral. Since this answer
7330				 * lacks the SIGs necessary to do DNSSEC
7331				 * validation, we must invoke the following
7332				 * special kludge to treat it as a referral.
7333				 */
7334				result = noanswer_response(fctx, NULL,
7335						   LOOK_FOR_NS_IN_ANSWER);
7336			} else {
7337				/*
7338				 * Some other servers may still somehow include
7339				 * an answer when it should return a referral
7340				 * with an empty answer.  Check to see if we can
7341				 * treat this as a referral by ignoring the
7342				 * answer.  Further more, there may be an
7343				 * implementation that moves A/AAAA glue records
7344				 * to the answer section for that type of
7345				 * delegation when the query is for that glue
7346				 * record.  LOOK_FOR_GLUE_IN_ANSWER will handle
7347				 * such a corner case.
7348				 */
7349				result = noanswer_response(fctx, NULL,
7350						   LOOK_FOR_GLUE_IN_ANSWER);
7351			}
7352			if (result != DNS_R_DELEGATION) {
7353				/*
7354				 * At this point, AA is not set, the response
7355				 * is not a referral, and the server is not a
7356				 * forwarder.  It is technically lame and it's
7357				 * easier to treat it as such than to figure out
7358				 * some more elaborate course of action.
7359				 */
7360				broken_server = DNS_R_LAME;
7361				keep_trying = ISC_TRUE;
7362				goto done;
7363			}
7364			goto force_referral;
7365		}
7366		if (result != ISC_R_SUCCESS) {
7367			if (result == DNS_R_FORMERR)
7368				keep_trying = ISC_TRUE;
7369			goto done;
7370		}
7371	} else if (message->counts[DNS_SECTION_AUTHORITY] > 0 ||
7372		   message->rcode == dns_rcode_noerror ||
7373		   message->rcode == dns_rcode_nxdomain) {
7374		/*
7375		 * NXDOMAIN, NXRDATASET, or referral.
7376		 */
7377		result = noanswer_response(fctx, NULL, 0);
7378		switch (result) {
7379		case ISC_R_SUCCESS:
7380		case DNS_R_CHASEDSSERVERS:
7381			break;
7382		case DNS_R_DELEGATION:
7383 force_referral:
7384			/*
7385			 * We don't have the answer, but we know a better
7386			 * place to look.
7387			 */
7388			get_nameservers = ISC_TRUE;
7389			keep_trying = ISC_TRUE;
7390			/*
7391			 * We have a new set of name servers, and it
7392			 * has not experienced any restarts yet.
7393			 */
7394			fctx->restarts = 0;
7395
7396			/*
7397			 * Update local statistics counters collected for each
7398			 * new zone.
7399			 */
7400			fctx->referrals++;
7401			fctx->querysent = 0;
7402			fctx->lamecount = 0;
7403			fctx->neterr = 0;
7404			fctx->badresp = 0;
7405			fctx->adberr = 0;
7406
7407			result = ISC_R_SUCCESS;
7408			break;
7409		default:
7410			/*
7411			 * Something has gone wrong.
7412			 */
7413			if (result == DNS_R_FORMERR)
7414				keep_trying = ISC_TRUE;
7415			goto done;
7416		}
7417	} else {
7418		/*
7419		 * The server is insane.
7420		 */
7421		/* XXXRTH Log */
7422		broken_server = DNS_R_UNEXPECTEDRCODE;
7423		keep_trying = ISC_TRUE;
7424		goto done;
7425	}
7426
7427	/*
7428	 * Follow additional section data chains.
7429	 */
7430	chase_additional(fctx);
7431
7432	/*
7433	 * Cache the cacheable parts of the message.  This may also cause
7434	 * work to be queued to the DNSSEC validator.
7435	 */
7436	if (WANTCACHE(fctx)) {
7437		result = cache_message(fctx, query->addrinfo, now);
7438		if (result != ISC_R_SUCCESS)
7439			goto done;
7440	}
7441
7442	/*
7443	 * Ncache the negatively cacheable parts of the message.  This may
7444	 * also cause work to be queued to the DNSSEC validator.
7445	 */
7446	if (WANTNCACHE(fctx)) {
7447		dns_rdatatype_t covers;
7448		if (message->rcode == dns_rcode_nxdomain)
7449			covers = dns_rdatatype_any;
7450		else
7451			covers = fctx->type;
7452
7453		/*
7454		 * Cache any negative cache entries in the message.
7455		 */
7456		result = ncache_message(fctx, query->addrinfo, covers, now);
7457	}
7458
7459 done:
7460	/*
7461	 * Remember the query's addrinfo, in case we need to mark the
7462	 * server as broken.
7463	 */
7464	addrinfo = query->addrinfo;
7465
7466	/*
7467	 * Cancel the query.
7468	 *
7469	 * XXXRTH  Don't cancel the query if waiting for validation?
7470	 */
7471	fctx_cancelquery(&query, &devent, finish, no_response);
7472
7473	if (keep_trying) {
7474		if (result == DNS_R_FORMERR)
7475			broken_server = DNS_R_FORMERR;
7476		if (broken_server != ISC_R_SUCCESS) {
7477			/*
7478			 * Add this server to the list of bad servers for
7479			 * this fctx.
7480			 */
7481			add_bad(fctx, addrinfo, broken_server, broken_type);
7482		}
7483
7484		if (get_nameservers) {
7485			dns_name_t *name;
7486			dns_fixedname_init(&foundname);
7487			fname = dns_fixedname_name(&foundname);
7488			if (result != ISC_R_SUCCESS) {
7489				fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
7490				return;
7491			}
7492			findoptions = 0;
7493			if (dns_rdatatype_atparent(fctx->type))
7494				findoptions |= DNS_DBFIND_NOEXACT;
7495			if ((options & DNS_FETCHOPT_UNSHARED) == 0)
7496				name = &fctx->name;
7497			else
7498				name = &fctx->domain;
7499			result = dns_view_findzonecut(fctx->res->view,
7500						      name, fname,
7501						      now, findoptions,
7502						      ISC_TRUE,
7503						      &fctx->nameservers,
7504						      NULL);
7505			if (result != ISC_R_SUCCESS) {
7506				FCTXTRACE("couldn't find a zonecut");
7507				fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
7508				return;
7509			}
7510			if (!dns_name_issubdomain(fname, &fctx->domain)) {
7511				/*
7512				 * The best nameservers are now above our
7513				 * QDOMAIN.
7514				 */
7515				FCTXTRACE("nameservers now above QDOMAIN");
7516				fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
7517				return;
7518			}
7519			dns_name_free(&fctx->domain, fctx->mctx);
7520			dns_name_init(&fctx->domain, NULL);
7521			result = dns_name_dup(fname, fctx->mctx, &fctx->domain);
7522			if (result != ISC_R_SUCCESS) {
7523				fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
7524				return;
7525			}
7526			fctx->ns_ttl = fctx->nameservers.ttl;
7527			fctx->ns_ttl_ok = ISC_TRUE;
7528			fctx_cancelqueries(fctx, ISC_TRUE);
7529			fctx_cleanupfinds(fctx);
7530			fctx_cleanupaltfinds(fctx);
7531			fctx_cleanupforwaddrs(fctx);
7532			fctx_cleanupaltaddrs(fctx);
7533		}
7534		/*
7535		 * Try again.
7536		 */
7537		fctx_try(fctx, !get_nameservers, ISC_FALSE);
7538	} else if (resend) {
7539		/*
7540		 * Resend (probably with changed options).
7541		 */
7542		FCTXTRACE("resend");
7543		inc_stats(fctx->res, dns_resstatscounter_retry);
7544		result = fctx_query(fctx, addrinfo, options);
7545		if (result != ISC_R_SUCCESS)
7546			fctx_done(fctx, result, __LINE__);
7547	} else if (result == ISC_R_SUCCESS && !HAVE_ANSWER(fctx)) {
7548		/*
7549		 * All has gone well so far, but we are waiting for the
7550		 * DNSSEC validator to validate the answer.
7551		 */
7552		FCTXTRACE("wait for validator");
7553		fctx_cancelqueries(fctx, ISC_TRUE);
7554		/*
7555		 * We must not retransmit while the validator is working;
7556		 * it has references to the current rmessage.
7557		 */
7558		result = fctx_stopidletimer(fctx);
7559		if (result != ISC_R_SUCCESS)
7560			fctx_done(fctx, result, __LINE__);
7561	} else if (result == DNS_R_CHASEDSSERVERS) {
7562		unsigned int n;
7563		add_bad(fctx, addrinfo, result, broken_type);
7564		fctx_cancelqueries(fctx, ISC_TRUE);
7565		fctx_cleanupfinds(fctx);
7566		fctx_cleanupforwaddrs(fctx);
7567
7568		n = dns_name_countlabels(&fctx->name);
7569		dns_name_getlabelsequence(&fctx->name, 1, n - 1, &fctx->nsname);
7570
7571		FCTXTRACE("suspending DS lookup to find parent's NS records");
7572
7573		result = dns_resolver_createfetch(fctx->res, &fctx->nsname,
7574						  dns_rdatatype_ns,
7575						  NULL, NULL, NULL, 0, task,
7576						  resume_dslookup, fctx,
7577						  &fctx->nsrrset, NULL,
7578						  &fctx->nsfetch);
7579		if (result != ISC_R_SUCCESS)
7580			fctx_done(fctx, result, __LINE__);
7581		else {
7582			LOCK(&fctx->res->buckets[fctx->bucketnum].lock);
7583			fctx->references++;
7584			UNLOCK(&fctx->res->buckets[fctx->bucketnum].lock);
7585			result = fctx_stopidletimer(fctx);
7586			if (result != ISC_R_SUCCESS)
7587				fctx_done(fctx, result, __LINE__);
7588		}
7589	} else {
7590		/*
7591		 * We're done.
7592		 */
7593		fctx_done(fctx, result, __LINE__);
7594	}
7595}
7596
7597
7598/***
7599 *** Resolver Methods
7600 ***/
7601static void
7602destroy_badcache(dns_resolver_t *res) {
7603	dns_badcache_t *bad, *next;
7604	unsigned int i;
7605
7606	if (res->badcache != NULL) {
7607		for (i = 0; i < res->badhash; i++)
7608			for (bad = res->badcache[i]; bad != NULL;
7609			     bad = next) {
7610				next = bad->next;
7611				isc_mem_put(res->mctx, bad, sizeof(*bad) +
7612					    bad->name.length);
7613				res->badcount--;
7614			}
7615		isc_mem_put(res->mctx, res->badcache,
7616			    sizeof(*res->badcache) * res->badhash);
7617		res->badcache = NULL;
7618		res->badhash = 0;
7619		INSIST(res->badcount == 0);
7620	}
7621}
7622
7623static void
7624destroy(dns_resolver_t *res) {
7625	unsigned int i;
7626	alternate_t *a;
7627
7628	REQUIRE(res->references == 0);
7629	REQUIRE(!res->priming);
7630	REQUIRE(res->primefetch == NULL);
7631
7632	RTRACE("destroy");
7633
7634	INSIST(res->nfctx == 0);
7635
7636	DESTROYLOCK(&res->primelock);
7637	DESTROYLOCK(&res->nlock);
7638	DESTROYLOCK(&res->lock);
7639	for (i = 0; i < res->nbuckets; i++) {
7640		INSIST(ISC_LIST_EMPTY(res->buckets[i].fctxs));
7641		isc_task_shutdown(res->buckets[i].task);
7642		isc_task_detach(&res->buckets[i].task);
7643		DESTROYLOCK(&res->buckets[i].lock);
7644		isc_mem_detach(&res->buckets[i].mctx);
7645	}
7646	isc_mem_put(res->mctx, res->buckets,
7647		    res->nbuckets * sizeof(fctxbucket_t));
7648	if (res->dispatches4 != NULL)
7649		dns_dispatchset_destroy(&res->dispatches4);
7650	if (res->dispatches6 != NULL)
7651		dns_dispatchset_destroy(&res->dispatches6);
7652	while ((a = ISC_LIST_HEAD(res->alternates)) != NULL) {
7653		ISC_LIST_UNLINK(res->alternates, a, link);
7654		if (!a->isaddress)
7655			dns_name_free(&a->_u._n.name, res->mctx);
7656		isc_mem_put(res->mctx, a, sizeof(*a));
7657	}
7658	dns_resolver_reset_algorithms(res);
7659	destroy_badcache(res);
7660	dns_resolver_resetmustbesecure(res);
7661#if USE_ALGLOCK
7662	isc_rwlock_destroy(&res->alglock);
7663#endif
7664#if USE_MBSLOCK
7665	isc_rwlock_destroy(&res->mbslock);
7666#endif
7667	isc_timer_detach(&res->spillattimer);
7668	res->magic = 0;
7669	isc_mem_put(res->mctx, res, sizeof(*res));
7670}
7671
7672static void
7673send_shutdown_events(dns_resolver_t *res) {
7674	isc_event_t *event, *next_event;
7675	isc_task_t *etask;
7676
7677	/*
7678	 * Caller must be holding the resolver lock.
7679	 */
7680
7681	for (event = ISC_LIST_HEAD(res->whenshutdown);
7682	     event != NULL;
7683	     event = next_event) {
7684		next_event = ISC_LIST_NEXT(event, ev_link);
7685		ISC_LIST_UNLINK(res->whenshutdown, event, ev_link);
7686		etask = event->ev_sender;
7687		event->ev_sender = res;
7688		isc_task_sendanddetach(&etask, &event);
7689	}
7690}
7691
7692static void
7693empty_bucket(dns_resolver_t *res) {
7694	RTRACE("empty_bucket");
7695
7696	LOCK(&res->lock);
7697
7698	INSIST(res->activebuckets > 0);
7699	res->activebuckets--;
7700	if (res->activebuckets == 0)
7701		send_shutdown_events(res);
7702
7703	UNLOCK(&res->lock);
7704}
7705
7706static void
7707spillattimer_countdown(isc_task_t *task, isc_event_t *event) {
7708	dns_resolver_t *res = event->ev_arg;
7709	isc_result_t result;
7710	unsigned int count;
7711	isc_boolean_t logit = ISC_FALSE;
7712
7713	REQUIRE(VALID_RESOLVER(res));
7714
7715	UNUSED(task);
7716
7717	LOCK(&res->lock);
7718	INSIST(!res->exiting);
7719	if (res->spillat > res->spillatmin) {
7720		res->spillat--;
7721		logit = ISC_TRUE;
7722	}
7723	if (res->spillat <= res->spillatmin) {
7724		result = isc_timer_reset(res->spillattimer,
7725					 isc_timertype_inactive, NULL,
7726					 NULL, ISC_TRUE);
7727		RUNTIME_CHECK(result == ISC_R_SUCCESS);
7728	}
7729	count = res->spillat;
7730	UNLOCK(&res->lock);
7731	if (logit)
7732		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
7733			      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
7734			      "clients-per-query decreased to %u", count);
7735
7736	isc_event_free(&event);
7737}
7738
7739isc_result_t
7740dns_resolver_create(dns_view_t *view,
7741		    isc_taskmgr_t *taskmgr,
7742		    unsigned int ntasks, unsigned int ndisp,
7743		    isc_socketmgr_t *socketmgr,
7744		    isc_timermgr_t *timermgr,
7745		    unsigned int options,
7746		    dns_dispatchmgr_t *dispatchmgr,
7747		    dns_dispatch_t *dispatchv4,
7748		    dns_dispatch_t *dispatchv6,
7749		    dns_resolver_t **resp)
7750{
7751	dns_resolver_t *res;
7752	isc_result_t result = ISC_R_SUCCESS;
7753	unsigned int i, buckets_created = 0;
7754	isc_task_t *task = NULL;
7755	char name[16];
7756	unsigned dispattr;
7757
7758	/*
7759	 * Create a resolver.
7760	 */
7761
7762	REQUIRE(DNS_VIEW_VALID(view));
7763	REQUIRE(ntasks > 0);
7764	REQUIRE(ndisp > 0);
7765	REQUIRE(resp != NULL && *resp == NULL);
7766	REQUIRE(dispatchmgr != NULL);
7767	REQUIRE(dispatchv4 != NULL || dispatchv6 != NULL);
7768
7769	res = isc_mem_get(view->mctx, sizeof(*res));
7770	if (res == NULL)
7771		return (ISC_R_NOMEMORY);
7772	RTRACE("create");
7773	res->mctx = view->mctx;
7774	res->rdclass = view->rdclass;
7775	res->socketmgr = socketmgr;
7776	res->timermgr = timermgr;
7777	res->taskmgr = taskmgr;
7778	res->dispatchmgr = dispatchmgr;
7779	res->view = view;
7780	res->options = options;
7781	res->lame_ttl = 0;
7782	ISC_LIST_INIT(res->alternates);
7783	res->udpsize = RECV_BUFFER_SIZE;
7784	res->algorithms = NULL;
7785	res->badcache = NULL;
7786	res->badcount = 0;
7787	res->badhash = 0;
7788	res->badsweep = 0;
7789	res->mustbesecure = NULL;
7790	res->spillatmin = res->spillat = 10;
7791	res->spillatmax = 100;
7792	res->spillattimer = NULL;
7793	res->zero_no_soa_ttl = ISC_FALSE;
7794	res->query_timeout = DEFAULT_QUERY_TIMEOUT;
7795	res->maxdepth = DEFAULT_RECURSION_DEPTH;
7796	res->nbuckets = ntasks;
7797	res->activebuckets = ntasks;
7798	res->buckets = isc_mem_get(view->mctx,
7799				   ntasks * sizeof(fctxbucket_t));
7800	if (res->buckets == NULL) {
7801		result = ISC_R_NOMEMORY;
7802		goto cleanup_res;
7803	}
7804	for (i = 0; i < ntasks; i++) {
7805		result = isc_mutex_init(&res->buckets[i].lock);
7806		if (result != ISC_R_SUCCESS)
7807			goto cleanup_buckets;
7808		res->buckets[i].task = NULL;
7809		result = isc_task_create(taskmgr, 0, &res->buckets[i].task);
7810		if (result != ISC_R_SUCCESS) {
7811			DESTROYLOCK(&res->buckets[i].lock);
7812			goto cleanup_buckets;
7813		}
7814		res->buckets[i].mctx = NULL;
7815		snprintf(name, sizeof(name), "res%u", i);
7816#ifdef ISC_PLATFORM_USETHREADS
7817		/*
7818		 * Use a separate memory context for each bucket to reduce
7819		 * contention among multiple threads.  Do this only when
7820		 * enabling threads because it will be require more memory.
7821		 */
7822		result = isc_mem_create(0, 0, &res->buckets[i].mctx);
7823		if (result != ISC_R_SUCCESS) {
7824			isc_task_detach(&res->buckets[i].task);
7825			DESTROYLOCK(&res->buckets[i].lock);
7826			goto cleanup_buckets;
7827		}
7828		isc_mem_setname(res->buckets[i].mctx, name, NULL);
7829#else
7830		isc_mem_attach(view->mctx, &res->buckets[i].mctx);
7831#endif
7832		isc_task_setname(res->buckets[i].task, name, res);
7833		ISC_LIST_INIT(res->buckets[i].fctxs);
7834		res->buckets[i].exiting = ISC_FALSE;
7835		buckets_created++;
7836	}
7837
7838	res->dispatches4 = NULL;
7839	if (dispatchv4 != NULL) {
7840		dns_dispatchset_create(view->mctx, socketmgr, taskmgr,
7841				       dispatchv4, &res->dispatches4, ndisp);
7842		dispattr = dns_dispatch_getattributes(dispatchv4);
7843		res->exclusivev4 =
7844			ISC_TF((dispattr & DNS_DISPATCHATTR_EXCLUSIVE) != 0);
7845	}
7846
7847	res->dispatches6 = NULL;
7848	if (dispatchv6 != NULL) {
7849		dns_dispatchset_create(view->mctx, socketmgr, taskmgr,
7850				       dispatchv6, &res->dispatches6, ndisp);
7851		dispattr = dns_dispatch_getattributes(dispatchv6);
7852		res->exclusivev6 =
7853			ISC_TF((dispattr & DNS_DISPATCHATTR_EXCLUSIVE) != 0);
7854	}
7855
7856	res->references = 1;
7857	res->exiting = ISC_FALSE;
7858	res->frozen = ISC_FALSE;
7859	ISC_LIST_INIT(res->whenshutdown);
7860	res->priming = ISC_FALSE;
7861	res->primefetch = NULL;
7862	res->nfctx = 0;
7863
7864	result = isc_mutex_init(&res->lock);
7865	if (result != ISC_R_SUCCESS)
7866		goto cleanup_dispatches;
7867
7868	result = isc_mutex_init(&res->nlock);
7869	if (result != ISC_R_SUCCESS)
7870		goto cleanup_lock;
7871
7872	result = isc_mutex_init(&res->primelock);
7873	if (result != ISC_R_SUCCESS)
7874		goto cleanup_nlock;
7875
7876	task = NULL;
7877	result = isc_task_create(taskmgr, 0, &task);
7878	if (result != ISC_R_SUCCESS)
7879		goto cleanup_primelock;
7880
7881	result = isc_timer_create(timermgr, isc_timertype_inactive, NULL, NULL,
7882				  task, spillattimer_countdown, res,
7883				  &res->spillattimer);
7884	isc_task_detach(&task);
7885	if (result != ISC_R_SUCCESS)
7886		goto cleanup_primelock;
7887
7888#if USE_ALGLOCK
7889	result = isc_rwlock_init(&res->alglock, 0, 0);
7890	if (result != ISC_R_SUCCESS)
7891		goto cleanup_spillattimer;
7892#endif
7893#if USE_MBSLOCK
7894	result = isc_rwlock_init(&res->mbslock, 0, 0);
7895	if (result != ISC_R_SUCCESS)
7896		goto cleanup_alglock;
7897#endif
7898
7899	res->magic = RES_MAGIC;
7900
7901	*resp = res;
7902
7903	return (ISC_R_SUCCESS);
7904
7905#if USE_MBSLOCK
7906 cleanup_alglock:
7907#if USE_ALGLOCK
7908	isc_rwlock_destroy(&res->alglock);
7909#endif
7910#endif
7911#if USE_ALGLOCK || USE_MBSLOCK
7912 cleanup_spillattimer:
7913	isc_timer_detach(&res->spillattimer);
7914#endif
7915
7916 cleanup_primelock:
7917	DESTROYLOCK(&res->primelock);
7918
7919 cleanup_nlock:
7920	DESTROYLOCK(&res->nlock);
7921
7922 cleanup_lock:
7923	DESTROYLOCK(&res->lock);
7924
7925 cleanup_dispatches:
7926	if (res->dispatches6 != NULL)
7927		dns_dispatchset_destroy(&res->dispatches6);
7928	if (res->dispatches4 != NULL)
7929		dns_dispatchset_destroy(&res->dispatches4);
7930
7931 cleanup_buckets:
7932	for (i = 0; i < buckets_created; i++) {
7933		isc_mem_detach(&res->buckets[i].mctx);
7934		DESTROYLOCK(&res->buckets[i].lock);
7935		isc_task_shutdown(res->buckets[i].task);
7936		isc_task_detach(&res->buckets[i].task);
7937	}
7938	isc_mem_put(view->mctx, res->buckets,
7939		    res->nbuckets * sizeof(fctxbucket_t));
7940
7941 cleanup_res:
7942	isc_mem_put(view->mctx, res, sizeof(*res));
7943
7944	return (result);
7945}
7946
7947#ifdef BIND9
7948static void
7949prime_done(isc_task_t *task, isc_event_t *event) {
7950	dns_resolver_t *res;
7951	dns_fetchevent_t *fevent;
7952	dns_fetch_t *fetch;
7953	dns_db_t *db = NULL;
7954
7955	REQUIRE(event->ev_type == DNS_EVENT_FETCHDONE);
7956	fevent = (dns_fetchevent_t *)event;
7957	res = event->ev_arg;
7958	REQUIRE(VALID_RESOLVER(res));
7959
7960	UNUSED(task);
7961
7962	LOCK(&res->lock);
7963
7964	INSIST(res->priming);
7965	res->priming = ISC_FALSE;
7966	LOCK(&res->primelock);
7967	fetch = res->primefetch;
7968	res->primefetch = NULL;
7969	UNLOCK(&res->primelock);
7970
7971	UNLOCK(&res->lock);
7972
7973	if (fevent->result == ISC_R_SUCCESS &&
7974	    res->view->cache != NULL && res->view->hints != NULL) {
7975		dns_cache_attachdb(res->view->cache, &db);
7976		dns_root_checkhints(res->view, res->view->hints, db);
7977		dns_db_detach(&db);
7978	}
7979
7980	if (fevent->node != NULL)
7981		dns_db_detachnode(fevent->db, &fevent->node);
7982	if (fevent->db != NULL)
7983		dns_db_detach(&fevent->db);
7984	if (dns_rdataset_isassociated(fevent->rdataset))
7985		dns_rdataset_disassociate(fevent->rdataset);
7986	INSIST(fevent->sigrdataset == NULL);
7987
7988	isc_mem_put(res->mctx, fevent->rdataset, sizeof(*fevent->rdataset));
7989
7990	isc_event_free(&event);
7991	dns_resolver_destroyfetch(&fetch);
7992}
7993
7994void
7995dns_resolver_prime(dns_resolver_t *res) {
7996	isc_boolean_t want_priming = ISC_FALSE;
7997	dns_rdataset_t *rdataset;
7998	isc_result_t result;
7999
8000	REQUIRE(VALID_RESOLVER(res));
8001	REQUIRE(res->frozen);
8002
8003	RTRACE("dns_resolver_prime");
8004
8005	LOCK(&res->lock);
8006
8007	if (!res->exiting && !res->priming) {
8008		INSIST(res->primefetch == NULL);
8009		res->priming = ISC_TRUE;
8010		want_priming = ISC_TRUE;
8011	}
8012
8013	UNLOCK(&res->lock);
8014
8015	if (want_priming) {
8016		/*
8017		 * To avoid any possible recursive locking problems, we
8018		 * start the priming fetch like any other fetch, and holding
8019		 * no resolver locks.  No one else will try to start it
8020		 * because we're the ones who set res->priming to true.
8021		 * Any other callers of dns_resolver_prime() while we're
8022		 * running will see that res->priming is already true and
8023		 * do nothing.
8024		 */
8025		RTRACE("priming");
8026		rdataset = isc_mem_get(res->mctx, sizeof(*rdataset));
8027		if (rdataset == NULL) {
8028			LOCK(&res->lock);
8029			INSIST(res->priming);
8030			INSIST(res->primefetch == NULL);
8031			res->priming = ISC_FALSE;
8032			UNLOCK(&res->lock);
8033			return;
8034		}
8035		dns_rdataset_init(rdataset);
8036		LOCK(&res->primelock);
8037		result = dns_resolver_createfetch(res, dns_rootname,
8038						  dns_rdatatype_ns,
8039						  NULL, NULL, NULL, 0,
8040						  res->buckets[0].task,
8041						  prime_done,
8042						  res, rdataset, NULL,
8043						  &res->primefetch);
8044		UNLOCK(&res->primelock);
8045		if (result != ISC_R_SUCCESS) {
8046			LOCK(&res->lock);
8047			INSIST(res->priming);
8048			res->priming = ISC_FALSE;
8049			UNLOCK(&res->lock);
8050		}
8051	}
8052}
8053#endif /* BIND9 */
8054
8055void
8056dns_resolver_freeze(dns_resolver_t *res) {
8057	/*
8058	 * Freeze resolver.
8059	 */
8060
8061	REQUIRE(VALID_RESOLVER(res));
8062
8063	res->frozen = ISC_TRUE;
8064}
8065
8066void
8067dns_resolver_attach(dns_resolver_t *source, dns_resolver_t **targetp) {
8068	REQUIRE(VALID_RESOLVER(source));
8069	REQUIRE(targetp != NULL && *targetp == NULL);
8070
8071	RRTRACE(source, "attach");
8072	LOCK(&source->lock);
8073	REQUIRE(!source->exiting);
8074
8075	INSIST(source->references > 0);
8076	source->references++;
8077	INSIST(source->references != 0);
8078	UNLOCK(&source->lock);
8079
8080	*targetp = source;
8081}
8082
8083void
8084dns_resolver_whenshutdown(dns_resolver_t *res, isc_task_t *task,
8085			  isc_event_t **eventp)
8086{
8087	isc_task_t *clone;
8088	isc_event_t *event;
8089
8090	REQUIRE(VALID_RESOLVER(res));
8091	REQUIRE(eventp != NULL);
8092
8093	event = *eventp;
8094	*eventp = NULL;
8095
8096	LOCK(&res->lock);
8097
8098	if (res->exiting && res->activebuckets == 0) {
8099		/*
8100		 * We're already shutdown.  Send the event.
8101		 */
8102		event->ev_sender = res;
8103		isc_task_send(task, &event);
8104	} else {
8105		clone = NULL;
8106		isc_task_attach(task, &clone);
8107		event->ev_sender = clone;
8108		ISC_LIST_APPEND(res->whenshutdown, event, ev_link);
8109	}
8110
8111	UNLOCK(&res->lock);
8112}
8113
8114void
8115dns_resolver_shutdown(dns_resolver_t *res) {
8116	unsigned int i;
8117	fetchctx_t *fctx;
8118	isc_result_t result;
8119
8120	REQUIRE(VALID_RESOLVER(res));
8121
8122	RTRACE("shutdown");
8123
8124	LOCK(&res->lock);
8125
8126	if (!res->exiting) {
8127		RTRACE("exiting");
8128		res->exiting = ISC_TRUE;
8129
8130		for (i = 0; i < res->nbuckets; i++) {
8131			LOCK(&res->buckets[i].lock);
8132			for (fctx = ISC_LIST_HEAD(res->buckets[i].fctxs);
8133			     fctx != NULL;
8134			     fctx = ISC_LIST_NEXT(fctx, link))
8135				fctx_shutdown(fctx);
8136			if (res->dispatches4 != NULL && !res->exclusivev4) {
8137				dns_dispatchset_cancelall(res->dispatches4,
8138							  res->buckets[i].task);
8139			}
8140			if (res->dispatches6 != NULL && !res->exclusivev6) {
8141				dns_dispatchset_cancelall(res->dispatches6,
8142							  res->buckets[i].task);
8143			}
8144			res->buckets[i].exiting = ISC_TRUE;
8145			if (ISC_LIST_EMPTY(res->buckets[i].fctxs)) {
8146				INSIST(res->activebuckets > 0);
8147				res->activebuckets--;
8148			}
8149			UNLOCK(&res->buckets[i].lock);
8150		}
8151		if (res->activebuckets == 0)
8152			send_shutdown_events(res);
8153		result = isc_timer_reset(res->spillattimer,
8154					 isc_timertype_inactive, NULL,
8155					 NULL, ISC_TRUE);
8156		RUNTIME_CHECK(result == ISC_R_SUCCESS);
8157	}
8158
8159	UNLOCK(&res->lock);
8160}
8161
8162void
8163dns_resolver_detach(dns_resolver_t **resp) {
8164	dns_resolver_t *res;
8165	isc_boolean_t need_destroy = ISC_FALSE;
8166
8167	REQUIRE(resp != NULL);
8168	res = *resp;
8169	REQUIRE(VALID_RESOLVER(res));
8170
8171	RTRACE("detach");
8172
8173	LOCK(&res->lock);
8174
8175	INSIST(res->references > 0);
8176	res->references--;
8177	if (res->references == 0) {
8178		INSIST(res->exiting && res->activebuckets == 0);
8179		need_destroy = ISC_TRUE;
8180	}
8181
8182	UNLOCK(&res->lock);
8183
8184	if (need_destroy)
8185		destroy(res);
8186
8187	*resp = NULL;
8188}
8189
8190static inline isc_boolean_t
8191fctx_match(fetchctx_t *fctx, dns_name_t *name, dns_rdatatype_t type,
8192	   unsigned int options)
8193{
8194	/*
8195	 * Don't match fetch contexts that are shutting down.
8196	 */
8197	if (fctx->cloned || fctx->state == fetchstate_done ||
8198	    ISC_LIST_EMPTY(fctx->events))
8199		return (ISC_FALSE);
8200
8201	if (fctx->type != type || fctx->options != options)
8202		return (ISC_FALSE);
8203	return (dns_name_equal(&fctx->name, name));
8204}
8205
8206static inline void
8207log_fetch(dns_name_t *name, dns_rdatatype_t type) {
8208	char namebuf[DNS_NAME_FORMATSIZE];
8209	char typebuf[DNS_RDATATYPE_FORMATSIZE];
8210	int level = ISC_LOG_DEBUG(1);
8211
8212	if (! isc_log_wouldlog(dns_lctx, level))
8213		return;
8214
8215	dns_name_format(name, namebuf, sizeof(namebuf));
8216	dns_rdatatype_format(type, typebuf, sizeof(typebuf));
8217
8218	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
8219		      DNS_LOGMODULE_RESOLVER, level,
8220		      "createfetch: %s %s", namebuf, typebuf);
8221}
8222
8223isc_result_t
8224dns_resolver_createfetch(dns_resolver_t *res, dns_name_t *name,
8225			 dns_rdatatype_t type,
8226			 dns_name_t *domain, dns_rdataset_t *nameservers,
8227			 dns_forwarders_t *forwarders,
8228			 unsigned int options, isc_task_t *task,
8229			 isc_taskaction_t action, void *arg,
8230			 dns_rdataset_t *rdataset,
8231			 dns_rdataset_t *sigrdataset,
8232			 dns_fetch_t **fetchp)
8233{
8234	return (dns_resolver_createfetch3(res, name, type, domain,
8235					  nameservers, forwarders, NULL, 0,
8236					  options, 0, task, action, arg,
8237					  rdataset, sigrdataset, fetchp));
8238}
8239
8240isc_result_t
8241dns_resolver_createfetch2(dns_resolver_t *res, dns_name_t *name,
8242			  dns_rdatatype_t type,
8243			  dns_name_t *domain, dns_rdataset_t *nameservers,
8244			  dns_forwarders_t *forwarders,
8245			  isc_sockaddr_t *client, dns_messageid_t id,
8246			  unsigned int options, isc_task_t *task,
8247			  isc_taskaction_t action, void *arg,
8248			  dns_rdataset_t *rdataset,
8249			  dns_rdataset_t *sigrdataset,
8250			  dns_fetch_t **fetchp)
8251{
8252	return (dns_resolver_createfetch3(res, name, type, domain,
8253					  nameservers, forwarders, client, id,
8254					  options, 0, task, action, arg,
8255					  rdataset, sigrdataset, fetchp));
8256}
8257
8258isc_result_t
8259dns_resolver_createfetch3(dns_resolver_t *res, dns_name_t *name,
8260			  dns_rdatatype_t type,
8261			  dns_name_t *domain, dns_rdataset_t *nameservers,
8262			  dns_forwarders_t *forwarders,
8263			  isc_sockaddr_t *client, dns_messageid_t id,
8264			  unsigned int options, unsigned int depth,
8265			  isc_task_t *task,
8266			  isc_taskaction_t action, void *arg,
8267			  dns_rdataset_t *rdataset,
8268			  dns_rdataset_t *sigrdataset,
8269			  dns_fetch_t **fetchp)
8270{
8271	dns_fetch_t *fetch;
8272	fetchctx_t *fctx = NULL;
8273	isc_result_t result = ISC_R_SUCCESS;
8274	unsigned int bucketnum;
8275	isc_boolean_t new_fctx = ISC_FALSE;
8276	isc_event_t *event;
8277	unsigned int count = 0;
8278	unsigned int spillat;
8279	unsigned int spillatmin;
8280	isc_boolean_t destroy = ISC_FALSE;
8281
8282	UNUSED(forwarders);
8283
8284	REQUIRE(VALID_RESOLVER(res));
8285	REQUIRE(res->frozen);
8286	/* XXXRTH  Check for meta type */
8287	if (domain != NULL) {
8288		REQUIRE(DNS_RDATASET_VALID(nameservers));
8289		REQUIRE(nameservers->type == dns_rdatatype_ns);
8290	} else
8291		REQUIRE(nameservers == NULL);
8292	REQUIRE(forwarders == NULL);
8293	REQUIRE(!dns_rdataset_isassociated(rdataset));
8294	REQUIRE(sigrdataset == NULL ||
8295		!dns_rdataset_isassociated(sigrdataset));
8296	REQUIRE(fetchp != NULL && *fetchp == NULL);
8297
8298	log_fetch(name, type);
8299
8300	/*
8301	 * XXXRTH  use a mempool?
8302	 */
8303	fetch = isc_mem_get(res->mctx, sizeof(*fetch));
8304	if (fetch == NULL)
8305		return (ISC_R_NOMEMORY);
8306
8307	bucketnum = dns_name_fullhash(name, ISC_FALSE) % res->nbuckets;
8308
8309	LOCK(&res->lock);
8310	spillat = res->spillat;
8311	spillatmin = res->spillatmin;
8312	UNLOCK(&res->lock);
8313	LOCK(&res->buckets[bucketnum].lock);
8314
8315	if (res->buckets[bucketnum].exiting) {
8316		result = ISC_R_SHUTTINGDOWN;
8317		goto unlock;
8318	}
8319
8320	if ((options & DNS_FETCHOPT_UNSHARED) == 0) {
8321		for (fctx = ISC_LIST_HEAD(res->buckets[bucketnum].fctxs);
8322		     fctx != NULL;
8323		     fctx = ISC_LIST_NEXT(fctx, link)) {
8324			if (fctx_match(fctx, name, type, options))
8325				break;
8326		}
8327	}
8328
8329	/*
8330	 * Is this a duplicate?
8331	 */
8332	if (fctx != NULL && client != NULL) {
8333		dns_fetchevent_t *fevent;
8334		for (fevent = ISC_LIST_HEAD(fctx->events);
8335		     fevent != NULL;
8336		     fevent = ISC_LIST_NEXT(fevent, ev_link)) {
8337			if (fevent->client != NULL && fevent->id == id &&
8338			    isc_sockaddr_equal(fevent->client, client)) {
8339				result = DNS_R_DUPLICATE;
8340				goto unlock;
8341			}
8342			count++;
8343		}
8344	}
8345	if (count >= spillatmin && spillatmin != 0) {
8346		INSIST(fctx != NULL);
8347		if (count >= spillat)
8348			fctx->spilled = ISC_TRUE;
8349		if (fctx->spilled) {
8350			result = DNS_R_DROP;
8351			goto unlock;
8352		}
8353	}
8354
8355	if (fctx == NULL) {
8356		result = fctx_create(res, name, type, domain, nameservers,
8357				     options, bucketnum, depth, &fctx);
8358		if (result != ISC_R_SUCCESS)
8359			goto unlock;
8360		new_fctx = ISC_TRUE;
8361	} else if (fctx->depth > depth)
8362		fctx->depth = depth;
8363
8364	result = fctx_join(fctx, task, client, id, action, arg,
8365			   rdataset, sigrdataset, fetch);
8366	if (new_fctx) {
8367		if (result == ISC_R_SUCCESS) {
8368			/*
8369			 * Launch this fctx.
8370			 */
8371			event = &fctx->control_event;
8372			ISC_EVENT_INIT(event, sizeof(*event), 0, NULL,
8373				       DNS_EVENT_FETCHCONTROL,
8374				       fctx_start, fctx, NULL,
8375				       NULL, NULL);
8376			isc_task_send(res->buckets[bucketnum].task, &event);
8377		} else {
8378			/*
8379			 * We don't care about the result of fctx_unlink()
8380			 * since we know we're not exiting.
8381			 */
8382			(void)fctx_unlink(fctx);
8383			destroy = ISC_TRUE;
8384		}
8385	}
8386
8387 unlock:
8388	UNLOCK(&res->buckets[bucketnum].lock);
8389
8390	if (destroy)
8391		fctx_destroy(fctx);
8392
8393	if (result == ISC_R_SUCCESS) {
8394		FTRACE("created");
8395		*fetchp = fetch;
8396	} else
8397		isc_mem_put(res->mctx, fetch, sizeof(*fetch));
8398
8399	return (result);
8400}
8401
8402void
8403dns_resolver_cancelfetch(dns_fetch_t *fetch) {
8404	fetchctx_t *fctx;
8405	dns_resolver_t *res;
8406	dns_fetchevent_t *event, *next_event;
8407	isc_task_t *etask;
8408
8409	REQUIRE(DNS_FETCH_VALID(fetch));
8410	fctx = fetch->private;
8411	REQUIRE(VALID_FCTX(fctx));
8412	res = fctx->res;
8413
8414	FTRACE("cancelfetch");
8415
8416	LOCK(&res->buckets[fctx->bucketnum].lock);
8417
8418	/*
8419	 * Find the completion event for this fetch (as opposed
8420	 * to those for other fetches that have joined the same
8421	 * fctx) and send it with result = ISC_R_CANCELED.
8422	 */
8423	event = NULL;
8424	if (fctx->state != fetchstate_done) {
8425		for (event = ISC_LIST_HEAD(fctx->events);
8426		     event != NULL;
8427		     event = next_event) {
8428			next_event = ISC_LIST_NEXT(event, ev_link);
8429			if (event->fetch == fetch) {
8430				ISC_LIST_UNLINK(fctx->events, event, ev_link);
8431				break;
8432			}
8433		}
8434	}
8435	if (event != NULL) {
8436		etask = event->ev_sender;
8437		event->ev_sender = fctx;
8438		event->result = ISC_R_CANCELED;
8439		isc_task_sendanddetach(&etask, ISC_EVENT_PTR(&event));
8440	}
8441	/*
8442	 * The fctx continues running even if no fetches remain;
8443	 * the answer is still cached.
8444	 */
8445
8446	UNLOCK(&res->buckets[fctx->bucketnum].lock);
8447}
8448
8449void
8450dns_resolver_destroyfetch(dns_fetch_t **fetchp) {
8451	dns_fetch_t *fetch;
8452	dns_resolver_t *res;
8453	dns_fetchevent_t *event, *next_event;
8454	fetchctx_t *fctx;
8455	unsigned int bucketnum;
8456	isc_boolean_t bucket_empty;
8457
8458	REQUIRE(fetchp != NULL);
8459	fetch = *fetchp;
8460	REQUIRE(DNS_FETCH_VALID(fetch));
8461	fctx = fetch->private;
8462	REQUIRE(VALID_FCTX(fctx));
8463	res = fctx->res;
8464
8465	FTRACE("destroyfetch");
8466
8467	bucketnum = fctx->bucketnum;
8468	LOCK(&res->buckets[bucketnum].lock);
8469
8470	/*
8471	 * Sanity check: the caller should have gotten its event before
8472	 * trying to destroy the fetch.
8473	 */
8474	event = NULL;
8475	if (fctx->state != fetchstate_done) {
8476		for (event = ISC_LIST_HEAD(fctx->events);
8477		     event != NULL;
8478		     event = next_event) {
8479			next_event = ISC_LIST_NEXT(event, ev_link);
8480			RUNTIME_CHECK(event->fetch != fetch);
8481		}
8482	}
8483
8484	bucket_empty = fctx_decreference(fctx);
8485
8486	UNLOCK(&res->buckets[bucketnum].lock);
8487
8488	isc_mem_put(res->mctx, fetch, sizeof(*fetch));
8489	*fetchp = NULL;
8490
8491	if (bucket_empty)
8492		empty_bucket(res);
8493}
8494
8495void
8496dns_resolver_logfetch(dns_fetch_t *fetch, isc_log_t *lctx,
8497		      isc_logcategory_t *category, isc_logmodule_t *module,
8498		      int level, isc_boolean_t duplicateok)
8499{
8500	fetchctx_t *fctx;
8501	dns_resolver_t *res;
8502	char domainbuf[DNS_NAME_FORMATSIZE];
8503
8504	REQUIRE(DNS_FETCH_VALID(fetch));
8505	fctx = fetch->private;
8506	REQUIRE(VALID_FCTX(fctx));
8507	res = fctx->res;
8508
8509	LOCK(&res->buckets[fctx->bucketnum].lock);
8510
8511	INSIST(fctx->exitline >= 0);
8512	if (!fctx->logged || duplicateok) {
8513		dns_name_format(&fctx->domain, domainbuf, sizeof(domainbuf));
8514		isc_log_write(lctx, category, module, level,
8515			      "fetch completed at %s:%d for %s in "
8516			      "%" ISC_PRINT_QUADFORMAT "u."
8517			      "%06" ISC_PRINT_QUADFORMAT "u: %s/%s "
8518			      "[domain:%s,referral:%u,restart:%u,qrysent:%u,"
8519			      "timeout:%u,lame:%u,neterr:%u,badresp:%u,"
8520			      "adberr:%u,findfail:%u,valfail:%u]",
8521			      __FILE__, fctx->exitline, fctx->info,
8522			      fctx->duration / US_PER_SEC,
8523			      fctx->duration % US_PER_SEC,
8524			      isc_result_totext(fctx->result),
8525			      isc_result_totext(fctx->vresult), domainbuf,
8526			      fctx->referrals, fctx->restarts,
8527			      fctx->querysent, fctx->timeouts, fctx->lamecount,
8528			      fctx->neterr, fctx->badresp, fctx->adberr,
8529			      fctx->findfail, fctx->valfail);
8530		fctx->logged = ISC_TRUE;
8531	}
8532
8533	UNLOCK(&res->buckets[fctx->bucketnum].lock);
8534}
8535
8536dns_dispatchmgr_t *
8537dns_resolver_dispatchmgr(dns_resolver_t *resolver) {
8538	REQUIRE(VALID_RESOLVER(resolver));
8539	return (resolver->dispatchmgr);
8540}
8541
8542dns_dispatch_t *
8543dns_resolver_dispatchv4(dns_resolver_t *resolver) {
8544	REQUIRE(VALID_RESOLVER(resolver));
8545	return (dns_dispatchset_get(resolver->dispatches4));
8546}
8547
8548dns_dispatch_t *
8549dns_resolver_dispatchv6(dns_resolver_t *resolver) {
8550	REQUIRE(VALID_RESOLVER(resolver));
8551	return (dns_dispatchset_get(resolver->dispatches6));
8552}
8553
8554isc_socketmgr_t *
8555dns_resolver_socketmgr(dns_resolver_t *resolver) {
8556	REQUIRE(VALID_RESOLVER(resolver));
8557	return (resolver->socketmgr);
8558}
8559
8560isc_taskmgr_t *
8561dns_resolver_taskmgr(dns_resolver_t *resolver) {
8562	REQUIRE(VALID_RESOLVER(resolver));
8563	return (resolver->taskmgr);
8564}
8565
8566isc_uint32_t
8567dns_resolver_getlamettl(dns_resolver_t *resolver) {
8568	REQUIRE(VALID_RESOLVER(resolver));
8569	return (resolver->lame_ttl);
8570}
8571
8572void
8573dns_resolver_setlamettl(dns_resolver_t *resolver, isc_uint32_t lame_ttl) {
8574	REQUIRE(VALID_RESOLVER(resolver));
8575	resolver->lame_ttl = lame_ttl;
8576}
8577
8578unsigned int
8579dns_resolver_nrunning(dns_resolver_t *resolver) {
8580	unsigned int n;
8581	LOCK(&resolver->nlock);
8582	n = resolver->nfctx;
8583	UNLOCK(&resolver->nlock);
8584	return (n);
8585}
8586
8587isc_result_t
8588dns_resolver_addalternate(dns_resolver_t *resolver, isc_sockaddr_t *alt,
8589			  dns_name_t *name, in_port_t port) {
8590	alternate_t *a;
8591	isc_result_t result;
8592
8593	REQUIRE(VALID_RESOLVER(resolver));
8594	REQUIRE(!resolver->frozen);
8595	REQUIRE((alt == NULL) ^ (name == NULL));
8596
8597	a = isc_mem_get(resolver->mctx, sizeof(*a));
8598	if (a == NULL)
8599		return (ISC_R_NOMEMORY);
8600	if (alt != NULL) {
8601		a->isaddress = ISC_TRUE;
8602		a->_u.addr = *alt;
8603	} else {
8604		a->isaddress = ISC_FALSE;
8605		a->_u._n.port = port;
8606		dns_name_init(&a->_u._n.name, NULL);
8607		result = dns_name_dup(name, resolver->mctx, &a->_u._n.name);
8608		if (result != ISC_R_SUCCESS) {
8609			isc_mem_put(resolver->mctx, a, sizeof(*a));
8610			return (result);
8611		}
8612	}
8613	ISC_LINK_INIT(a, link);
8614	ISC_LIST_APPEND(resolver->alternates, a, link);
8615
8616	return (ISC_R_SUCCESS);
8617}
8618
8619void
8620dns_resolver_setudpsize(dns_resolver_t *resolver, isc_uint16_t udpsize) {
8621	REQUIRE(VALID_RESOLVER(resolver));
8622	resolver->udpsize = udpsize;
8623}
8624
8625isc_uint16_t
8626dns_resolver_getudpsize(dns_resolver_t *resolver) {
8627	REQUIRE(VALID_RESOLVER(resolver));
8628	return (resolver->udpsize);
8629}
8630
8631void
8632dns_resolver_flushbadcache(dns_resolver_t *resolver, dns_name_t *name) {
8633	unsigned int i;
8634	dns_badcache_t *bad, *prev, *next;
8635
8636	REQUIRE(VALID_RESOLVER(resolver));
8637
8638	LOCK(&resolver->lock);
8639	if (resolver->badcache == NULL)
8640		goto unlock;
8641
8642	if (name != NULL) {
8643		isc_time_t now;
8644		isc_result_t result;
8645		result = isc_time_now(&now);
8646		if (result != ISC_R_SUCCESS)
8647			isc_time_settoepoch(&now);
8648		i = dns_name_hash(name, ISC_FALSE) % resolver->badhash;
8649		prev = NULL;
8650		for (bad = resolver->badcache[i]; bad != NULL; bad = next) {
8651			int n;
8652			next = bad->next;
8653			n = isc_time_compare(&bad->expire, &now);
8654			if (n < 0 || dns_name_equal(name, &bad->name)) {
8655				if (prev == NULL)
8656					resolver->badcache[i] = bad->next;
8657				else
8658					prev->next = bad->next;
8659				isc_mem_put(resolver->mctx, bad, sizeof(*bad) +
8660					    bad->name.length);
8661				resolver->badcount--;
8662			} else
8663				prev = bad;
8664		}
8665	} else
8666		destroy_badcache(resolver);
8667
8668 unlock:
8669	UNLOCK(&resolver->lock);
8670
8671}
8672
8673static void
8674resizehash(dns_resolver_t *resolver, isc_time_t *now, isc_boolean_t grow) {
8675	unsigned int newsize;
8676	dns_badcache_t **new, *bad, *next;
8677	unsigned int i;
8678
8679	if (grow)
8680		newsize = resolver->badhash * 2 + 1;
8681	else
8682		newsize = (resolver->badhash - 1) / 2;
8683
8684	new = isc_mem_get(resolver->mctx,
8685			  sizeof(*resolver->badcache) * newsize);
8686	if (new == NULL)
8687		return;
8688	memset(new, 0, sizeof(*resolver->badcache) * newsize);
8689	for (i = 0; i < resolver->badhash; i++) {
8690		for (bad = resolver->badcache[i]; bad != NULL; bad = next) {
8691			next = bad->next;
8692			if (isc_time_compare(&bad->expire, now) < 0) {
8693				isc_mem_put(resolver->mctx, bad, sizeof(*bad) +
8694					    bad->name.length);
8695				resolver->badcount--;
8696			} else {
8697				bad->next = new[bad->hashval % newsize];
8698				new[bad->hashval % newsize] = bad;
8699			}
8700		}
8701	}
8702	isc_mem_put(resolver->mctx, resolver->badcache,
8703		    sizeof(*resolver->badcache) * resolver->badhash);
8704	resolver->badhash = newsize;
8705	resolver->badcache = new;
8706}
8707
8708void
8709dns_resolver_addbadcache(dns_resolver_t *resolver, dns_name_t *name,
8710			 dns_rdatatype_t type, isc_time_t *expire)
8711{
8712	isc_time_t now;
8713	isc_result_t result = ISC_R_SUCCESS;
8714	unsigned int i, hashval;
8715	dns_badcache_t *bad, *prev, *next;
8716
8717	REQUIRE(VALID_RESOLVER(resolver));
8718
8719	LOCK(&resolver->lock);
8720	if (resolver->badcache == NULL) {
8721		resolver->badcache = isc_mem_get(resolver->mctx,
8722						 sizeof(*resolver->badcache) *
8723						 DNS_BADCACHE_SIZE);
8724		if (resolver->badcache == NULL)
8725			goto cleanup;
8726		resolver->badhash = DNS_BADCACHE_SIZE;
8727		memset(resolver->badcache, 0, sizeof(*resolver->badcache) *
8728		       resolver->badhash);
8729	}
8730
8731	result = isc_time_now(&now);
8732	if (result != ISC_R_SUCCESS)
8733		isc_time_settoepoch(&now);
8734	hashval = dns_name_hash(name, ISC_FALSE);
8735	i = hashval % resolver->badhash;
8736	prev = NULL;
8737	for (bad = resolver->badcache[i]; bad != NULL; bad = next) {
8738		next = bad->next;
8739		if (bad->type == type && dns_name_equal(name, &bad->name))
8740			break;
8741		if (isc_time_compare(&bad->expire, &now) < 0) {
8742			if (prev == NULL)
8743				resolver->badcache[i] = bad->next;
8744			else
8745				prev->next = bad->next;
8746			isc_mem_put(resolver->mctx, bad, sizeof(*bad) +
8747				    bad->name.length);
8748			resolver->badcount--;
8749		} else
8750			prev = bad;
8751	}
8752	if (bad == NULL) {
8753		isc_buffer_t buffer;
8754		bad = isc_mem_get(resolver->mctx, sizeof(*bad) + name->length);
8755		if (bad == NULL)
8756			goto cleanup;
8757		bad->type = type;
8758		bad->hashval = hashval;
8759		bad->expire = *expire;
8760		isc_buffer_init(&buffer, bad + 1, name->length);
8761		dns_name_init(&bad->name, NULL);
8762		dns_name_copy(name, &bad->name, &buffer);
8763		bad->next = resolver->badcache[i];
8764		resolver->badcache[i] = bad;
8765		resolver->badcount++;
8766		if (resolver->badcount > resolver->badhash * 8)
8767			resizehash(resolver, &now, ISC_TRUE);
8768		if (resolver->badcount < resolver->badhash * 2 &&
8769		    resolver->badhash > DNS_BADCACHE_SIZE)
8770			resizehash(resolver, &now, ISC_FALSE);
8771	} else
8772		bad->expire = *expire;
8773 cleanup:
8774	UNLOCK(&resolver->lock);
8775}
8776
8777isc_boolean_t
8778dns_resolver_getbadcache(dns_resolver_t *resolver, dns_name_t *name,
8779			 dns_rdatatype_t type, isc_time_t *now)
8780{
8781	dns_badcache_t *bad, *prev, *next;
8782	isc_boolean_t answer = ISC_FALSE;
8783	unsigned int i;
8784
8785	REQUIRE(VALID_RESOLVER(resolver));
8786
8787	LOCK(&resolver->lock);
8788	if (resolver->badcache == NULL)
8789		goto unlock;
8790
8791	i = dns_name_hash(name, ISC_FALSE) % resolver->badhash;
8792	prev = NULL;
8793	for (bad = resolver->badcache[i]; bad != NULL; bad = next) {
8794		next = bad->next;
8795		/*
8796		 * Search the hash list. Clean out expired records as we go.
8797		 */
8798		if (isc_time_compare(&bad->expire, now) < 0) {
8799			if (prev != NULL)
8800				prev->next = bad->next;
8801			else
8802				resolver->badcache[i] = bad->next;
8803			isc_mem_put(resolver->mctx, bad, sizeof(*bad) +
8804				    bad->name.length);
8805			resolver->badcount--;
8806			continue;
8807		}
8808		if (bad->type == type && dns_name_equal(name, &bad->name)) {
8809			answer = ISC_TRUE;
8810			break;
8811		}
8812		prev = bad;
8813	}
8814
8815	/*
8816	 * Slow sweep to clean out stale records.
8817	 */
8818	i = resolver->badsweep++ % resolver->badhash;
8819	bad = resolver->badcache[i];
8820	if (bad != NULL && isc_time_compare(&bad->expire, now) < 0) {
8821		resolver->badcache[i] = bad->next;
8822		isc_mem_put(resolver->mctx, bad, sizeof(*bad) +
8823			    bad->name.length);
8824		resolver->badcount--;
8825	}
8826
8827 unlock:
8828	UNLOCK(&resolver->lock);
8829	return (answer);
8830}
8831
8832void
8833dns_resolver_printbadcache(dns_resolver_t *resolver, FILE *fp) {
8834	char namebuf[DNS_NAME_FORMATSIZE];
8835	char typebuf[DNS_RDATATYPE_FORMATSIZE];
8836	dns_badcache_t *bad, *next, *prev;
8837	isc_time_t now;
8838	unsigned int i;
8839	isc_uint64_t t;
8840
8841	LOCK(&resolver->lock);
8842	fprintf(fp, ";\n; Bad cache\n;\n");
8843
8844	if (resolver->badcache == NULL)
8845		goto unlock;
8846
8847	TIME_NOW(&now);
8848	for (i = 0; i < resolver->badhash; i++) {
8849		prev = NULL;
8850		for (bad = resolver->badcache[i]; bad != NULL; bad = next) {
8851			next = bad->next;
8852			if (isc_time_compare(&bad->expire, &now) < 0) {
8853				if (prev != NULL)
8854					prev->next = bad->next;
8855				else
8856					resolver->badcache[i] = bad->next;
8857				isc_mem_put(resolver->mctx, bad, sizeof(*bad) +
8858					    bad->name.length);
8859				resolver->badcount--;
8860				continue;
8861			}
8862			prev = bad;
8863			dns_name_format(&bad->name, namebuf, sizeof(namebuf));
8864			dns_rdatatype_format(bad->type, typebuf,
8865					     sizeof(typebuf));
8866			t = isc_time_microdiff(&bad->expire, &now);
8867			t /= 1000;
8868			fprintf(fp, "; %s/%s [ttl "
8869				"%" ISC_PLATFORM_QUADFORMAT "u]\n",
8870				namebuf, typebuf, t);
8871		}
8872	}
8873
8874 unlock:
8875	UNLOCK(&resolver->lock);
8876}
8877
8878static void
8879free_algorithm(void *node, void *arg) {
8880	unsigned char *algorithms = node;
8881	isc_mem_t *mctx = arg;
8882
8883	isc_mem_put(mctx, algorithms, *algorithms);
8884}
8885
8886void
8887dns_resolver_reset_algorithms(dns_resolver_t *resolver) {
8888
8889	REQUIRE(VALID_RESOLVER(resolver));
8890
8891#if USE_ALGLOCK
8892	RWLOCK(&resolver->alglock, isc_rwlocktype_write);
8893#endif
8894	if (resolver->algorithms != NULL)
8895		dns_rbt_destroy(&resolver->algorithms);
8896#if USE_ALGLOCK
8897	RWUNLOCK(&resolver->alglock, isc_rwlocktype_write);
8898#endif
8899}
8900
8901isc_result_t
8902dns_resolver_disable_algorithm(dns_resolver_t *resolver, dns_name_t *name,
8903			       unsigned int alg)
8904{
8905	unsigned int len, mask;
8906	unsigned char *new;
8907	unsigned char *algorithms;
8908	isc_result_t result;
8909	dns_rbtnode_t *node = NULL;
8910
8911	REQUIRE(VALID_RESOLVER(resolver));
8912	if (alg > 255)
8913		return (ISC_R_RANGE);
8914
8915#if USE_ALGLOCK
8916	RWLOCK(&resolver->alglock, isc_rwlocktype_write);
8917#endif
8918	if (resolver->algorithms == NULL) {
8919		result = dns_rbt_create(resolver->mctx, free_algorithm,
8920					resolver->mctx, &resolver->algorithms);
8921		if (result != ISC_R_SUCCESS)
8922			goto cleanup;
8923	}
8924
8925	len = alg/8 + 2;
8926	mask = 1 << (alg%8);
8927
8928	result = dns_rbt_addnode(resolver->algorithms, name, &node);
8929
8930	if (result == ISC_R_SUCCESS || result == ISC_R_EXISTS) {
8931		algorithms = node->data;
8932		if (algorithms == NULL || len > *algorithms) {
8933			new = isc_mem_get(resolver->mctx, len);
8934			if (new == NULL) {
8935				result = ISC_R_NOMEMORY;
8936				goto cleanup;
8937			}
8938			memset(new, 0, len);
8939			if (algorithms != NULL)
8940				memmove(new, algorithms, *algorithms);
8941			new[len-1] |= mask;
8942			*new = len;
8943			node->data = new;
8944			if (algorithms != NULL)
8945				isc_mem_put(resolver->mctx, algorithms,
8946					    *algorithms);
8947		} else
8948			algorithms[len-1] |= mask;
8949	}
8950	result = ISC_R_SUCCESS;
8951 cleanup:
8952#if USE_ALGLOCK
8953	RWUNLOCK(&resolver->alglock, isc_rwlocktype_write);
8954#endif
8955	return (result);
8956}
8957
8958isc_boolean_t
8959dns_resolver_algorithm_supported(dns_resolver_t *resolver, dns_name_t *name,
8960				 unsigned int alg)
8961{
8962	unsigned int len, mask;
8963	unsigned char *algorithms;
8964	void *data = NULL;
8965	isc_result_t result;
8966	isc_boolean_t found = ISC_FALSE;
8967
8968	REQUIRE(VALID_RESOLVER(resolver));
8969
8970	/*
8971	 * DH is unsupported for DNSKEYs, see RFC 4034 sec. A.1.
8972	 */
8973	if ((alg == DST_ALG_DH) || (alg == DST_ALG_INDIRECT))
8974		return (ISC_FALSE);
8975
8976#if USE_ALGLOCK
8977	RWLOCK(&resolver->alglock, isc_rwlocktype_read);
8978#endif
8979	if (resolver->algorithms == NULL)
8980		goto unlock;
8981	result = dns_rbt_findname(resolver->algorithms, name, 0, NULL, &data);
8982	if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
8983		len = alg/8 + 2;
8984		mask = 1 << (alg%8);
8985		algorithms = data;
8986		if (len <= *algorithms && (algorithms[len-1] & mask) != 0)
8987			found = ISC_TRUE;
8988	}
8989 unlock:
8990#if USE_ALGLOCK
8991	RWUNLOCK(&resolver->alglock, isc_rwlocktype_read);
8992#endif
8993	if (found)
8994		return (ISC_FALSE);
8995
8996	return (dst_algorithm_supported(alg));
8997}
8998
8999isc_boolean_t
9000dns_resolver_digest_supported(dns_resolver_t *resolver, unsigned int digest) {
9001
9002	UNUSED(resolver);
9003	return (dns_ds_digest_supported(digest));
9004}
9005
9006void
9007dns_resolver_resetmustbesecure(dns_resolver_t *resolver) {
9008
9009	REQUIRE(VALID_RESOLVER(resolver));
9010
9011#if USE_MBSLOCK
9012	RWLOCK(&resolver->mbslock, isc_rwlocktype_write);
9013#endif
9014	if (resolver->mustbesecure != NULL)
9015		dns_rbt_destroy(&resolver->mustbesecure);
9016#if USE_MBSLOCK
9017	RWUNLOCK(&resolver->mbslock, isc_rwlocktype_write);
9018#endif
9019}
9020
9021static isc_boolean_t yes = ISC_TRUE, no = ISC_FALSE;
9022
9023isc_result_t
9024dns_resolver_setmustbesecure(dns_resolver_t *resolver, dns_name_t *name,
9025			     isc_boolean_t value)
9026{
9027	isc_result_t result;
9028
9029	REQUIRE(VALID_RESOLVER(resolver));
9030
9031#if USE_MBSLOCK
9032	RWLOCK(&resolver->mbslock, isc_rwlocktype_write);
9033#endif
9034	if (resolver->mustbesecure == NULL) {
9035		result = dns_rbt_create(resolver->mctx, NULL, NULL,
9036					&resolver->mustbesecure);
9037		if (result != ISC_R_SUCCESS)
9038			goto cleanup;
9039	}
9040	result = dns_rbt_addname(resolver->mustbesecure, name,
9041				 value ? &yes : &no);
9042 cleanup:
9043#if USE_MBSLOCK
9044	RWUNLOCK(&resolver->mbslock, isc_rwlocktype_write);
9045#endif
9046	return (result);
9047}
9048
9049isc_boolean_t
9050dns_resolver_getmustbesecure(dns_resolver_t *resolver, dns_name_t *name) {
9051	void *data = NULL;
9052	isc_boolean_t value = ISC_FALSE;
9053	isc_result_t result;
9054
9055	REQUIRE(VALID_RESOLVER(resolver));
9056
9057#if USE_MBSLOCK
9058	RWLOCK(&resolver->mbslock, isc_rwlocktype_read);
9059#endif
9060	if (resolver->mustbesecure == NULL)
9061		goto unlock;
9062	result = dns_rbt_findname(resolver->mustbesecure, name, 0, NULL, &data);
9063	if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH)
9064		value = *(isc_boolean_t*)data;
9065 unlock:
9066#if USE_MBSLOCK
9067	RWUNLOCK(&resolver->mbslock, isc_rwlocktype_read);
9068#endif
9069	return (value);
9070}
9071
9072void
9073dns_resolver_getclientsperquery(dns_resolver_t *resolver, isc_uint32_t *cur,
9074				isc_uint32_t *min, isc_uint32_t *max)
9075{
9076	REQUIRE(VALID_RESOLVER(resolver));
9077
9078	LOCK(&resolver->lock);
9079	if (cur != NULL)
9080		*cur = resolver->spillat;
9081	if (min != NULL)
9082		*min = resolver->spillatmin;
9083	if (max != NULL)
9084		*max = resolver->spillatmax;
9085	UNLOCK(&resolver->lock);
9086}
9087
9088void
9089dns_resolver_setclientsperquery(dns_resolver_t *resolver, isc_uint32_t min,
9090				isc_uint32_t max)
9091{
9092	REQUIRE(VALID_RESOLVER(resolver));
9093
9094	LOCK(&resolver->lock);
9095	resolver->spillatmin = resolver->spillat = min;
9096	resolver->spillatmax = max;
9097	UNLOCK(&resolver->lock);
9098}
9099
9100isc_boolean_t
9101dns_resolver_getzeronosoattl(dns_resolver_t *resolver) {
9102	REQUIRE(VALID_RESOLVER(resolver));
9103
9104	return (resolver->zero_no_soa_ttl);
9105}
9106
9107void
9108dns_resolver_setzeronosoattl(dns_resolver_t *resolver, isc_boolean_t state) {
9109	REQUIRE(VALID_RESOLVER(resolver));
9110
9111	resolver->zero_no_soa_ttl = state;
9112}
9113
9114unsigned int
9115dns_resolver_getoptions(dns_resolver_t *resolver) {
9116	REQUIRE(VALID_RESOLVER(resolver));
9117
9118	return (resolver->options);
9119}
9120
9121unsigned int
9122dns_resolver_gettimeout(dns_resolver_t *resolver) {
9123	REQUIRE(VALID_RESOLVER(resolver));
9124
9125	return (resolver->query_timeout);
9126}
9127
9128void
9129dns_resolver_settimeout(dns_resolver_t *resolver, unsigned int seconds) {
9130	REQUIRE(VALID_RESOLVER(resolver));
9131
9132	if (seconds == 0)
9133		seconds = DEFAULT_QUERY_TIMEOUT;
9134	if (seconds > MAXIMUM_QUERY_TIMEOUT)
9135		seconds = MAXIMUM_QUERY_TIMEOUT;
9136	if (seconds < MINIMUM_QUERY_TIMEOUT)
9137		seconds =  MINIMUM_QUERY_TIMEOUT;
9138
9139	resolver->query_timeout = seconds;
9140}
9141
9142void
9143dns_resolver_setmaxdepth(dns_resolver_t *resolver, unsigned int maxdepth) {
9144	REQUIRE(VALID_RESOLVER(resolver));
9145	resolver->maxdepth = maxdepth;
9146}
9147
9148unsigned int
9149dns_resolver_getmaxdepth(dns_resolver_t *resolver) {
9150	REQUIRE(VALID_RESOLVER(resolver));
9151	return (resolver->maxdepth);
9152}
9153