1/*
2 * Copyright (C) 2004-2012  Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2003  Internet Software Consortium.
4 *
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
16 */
17
18/* $Id$ */
19
20/*! \file */
21
22#include <config.h>
23
24#include <isc/platform.h>
25#include <isc/print.h>
26#include <isc/string.h>
27#include <isc/random.h>
28#include <isc/task.h>
29#include <isc/stats.h>
30#include <isc/timer.h>
31#include <isc/util.h>
32
33#include <dns/acl.h>
34#include <dns/adb.h>
35#include <dns/cache.h>
36#include <dns/db.h>
37#include <dns/dispatch.h>
38#include <dns/ds.h>
39#include <dns/events.h>
40#include <dns/forward.h>
41#include <dns/keytable.h>
42#include <dns/log.h>
43#include <dns/message.h>
44#include <dns/ncache.h>
45#include <dns/opcode.h>
46#include <dns/peer.h>
47#include <dns/rbt.h>
48#include <dns/rcode.h>
49#include <dns/rdata.h>
50#include <dns/rdataclass.h>
51#include <dns/rdatalist.h>
52#include <dns/rdataset.h>
53#include <dns/rdatastruct.h>
54#include <dns/rdatatype.h>
55#include <dns/resolver.h>
56#include <dns/result.h>
57#include <dns/rootns.h>
58#include <dns/stats.h>
59#include <dns/tsig.h>
60#include <dns/validator.h>
61
62#define DNS_RESOLVER_TRACE
63#ifdef DNS_RESOLVER_TRACE
64#define RTRACE(m)       isc_log_write(dns_lctx, \
65				      DNS_LOGCATEGORY_RESOLVER, \
66				      DNS_LOGMODULE_RESOLVER, \
67				      ISC_LOG_DEBUG(3), \
68				      "res %p: %s", res, (m))
69#define RRTRACE(r, m)   isc_log_write(dns_lctx, \
70				      DNS_LOGCATEGORY_RESOLVER, \
71				      DNS_LOGMODULE_RESOLVER, \
72				      ISC_LOG_DEBUG(3), \
73				      "res %p: %s", (r), (m))
74#define FCTXTRACE(m)    isc_log_write(dns_lctx, \
75				      DNS_LOGCATEGORY_RESOLVER, \
76				      DNS_LOGMODULE_RESOLVER, \
77				      ISC_LOG_DEBUG(3), \
78				      "fctx %p(%s'): %s", fctx, fctx->info, (m))
79#define FCTXTRACE2(m1, m2) \
80			isc_log_write(dns_lctx, \
81				      DNS_LOGCATEGORY_RESOLVER, \
82				      DNS_LOGMODULE_RESOLVER, \
83				      ISC_LOG_DEBUG(3), \
84				      "fctx %p(%s): %s %s", \
85				      fctx, fctx->info, (m1), (m2))
86#define FTRACE(m)       isc_log_write(dns_lctx, \
87				      DNS_LOGCATEGORY_RESOLVER, \
88				      DNS_LOGMODULE_RESOLVER, \
89				      ISC_LOG_DEBUG(3), \
90				      "fetch %p (fctx %p(%s)): %s", \
91				      fetch, fetch->private, \
92				      fetch->private->info, (m))
93#define QTRACE(m)       isc_log_write(dns_lctx, \
94				      DNS_LOGCATEGORY_RESOLVER, \
95				      DNS_LOGMODULE_RESOLVER, \
96				      ISC_LOG_DEBUG(3), \
97				      "resquery %p (fctx %p(%s)): %s", \
98				      query, query->fctx, \
99				      query->fctx->info, (m))
100#else
101#define RTRACE(m)
102#define RRTRACE(r, m)
103#define FCTXTRACE(m)
104#define FTRACE(m)
105#define QTRACE(m)
106#endif
107
108#ifndef DEFAULT_QUERY_TIMEOUT
109#define DEFAULT_QUERY_TIMEOUT 30  /* The default time in seconds for the whole query to live. */
110#endif
111
112#ifndef MAXIMUM_QUERY_TIMEOUT
113#define MAXIMUM_QUERY_TIMEOUT 30 /* The maximum time in seconds for the whole query to live. */
114#endif
115
116/*%
117 * Maximum EDNS0 input packet size.
118 */
119#define RECV_BUFFER_SIZE                4096            /* XXXRTH  Constant. */
120
121/*%
122 * This defines the maximum number of timeouts we will permit before we
123 * disable EDNS0 on the query.
124 */
125#define MAX_EDNS0_TIMEOUTS      3
126
127typedef struct fetchctx fetchctx_t;
128
129typedef struct query {
130	/* Locked by task event serialization. */
131	unsigned int			magic;
132	fetchctx_t *			fctx;
133	isc_mem_t *			mctx;
134	dns_dispatchmgr_t *		dispatchmgr;
135	dns_dispatch_t *		dispatch;
136	isc_boolean_t			exclusivesocket;
137	dns_adbaddrinfo_t *		addrinfo;
138	isc_socket_t *			tcpsocket;
139	isc_time_t			start;
140	dns_messageid_t			id;
141	dns_dispentry_t *		dispentry;
142	ISC_LINK(struct query)		link;
143	isc_buffer_t			buffer;
144	isc_buffer_t			*tsig;
145	dns_tsigkey_t			*tsigkey;
146	unsigned int			options;
147	unsigned int			attributes;
148	unsigned int			sends;
149	unsigned int			connects;
150	unsigned char			data[512];
151} resquery_t;
152
153#define QUERY_MAGIC			ISC_MAGIC('Q', '!', '!', '!')
154#define VALID_QUERY(query)		ISC_MAGIC_VALID(query, QUERY_MAGIC)
155
156#define RESQUERY_ATTR_CANCELED          0x02
157
158#define RESQUERY_CONNECTING(q)          ((q)->connects > 0)
159#define RESQUERY_CANCELED(q)            (((q)->attributes & \
160					  RESQUERY_ATTR_CANCELED) != 0)
161#define RESQUERY_SENDING(q)             ((q)->sends > 0)
162
163typedef enum {
164	fetchstate_init = 0,            /*%< Start event has not run yet. */
165	fetchstate_active,
166	fetchstate_done                 /*%< FETCHDONE events posted. */
167} fetchstate;
168
169typedef enum {
170	badns_unreachable = 0,
171	badns_response,
172	badns_validation
173} badnstype_t;
174
175struct fetchctx {
176	/*% Not locked. */
177	unsigned int			magic;
178	dns_resolver_t *		res;
179	dns_name_t			name;
180	dns_rdatatype_t			type;
181	unsigned int			options;
182	unsigned int			bucketnum;
183	char *				info;
184	isc_mem_t *			mctx;
185
186	/*% Locked by appropriate bucket lock. */
187	fetchstate			state;
188	isc_boolean_t			want_shutdown;
189	isc_boolean_t			cloned;
190	isc_boolean_t			spilled;
191	unsigned int			references;
192	isc_event_t			control_event;
193	ISC_LINK(struct fetchctx)       link;
194	ISC_LIST(dns_fetchevent_t)      events;
195	/*% Locked by task event serialization. */
196	dns_name_t			domain;
197	dns_rdataset_t			nameservers;
198	unsigned int			attributes;
199	isc_timer_t *			timer;
200	isc_time_t			expires;
201	isc_interval_t			interval;
202	dns_message_t *			qmessage;
203	dns_message_t *			rmessage;
204	ISC_LIST(resquery_t)		queries;
205	dns_adbfindlist_t		finds;
206	dns_adbfind_t *			find;
207	dns_adbfindlist_t		altfinds;
208	dns_adbfind_t *			altfind;
209	dns_adbaddrinfolist_t		forwaddrs;
210	dns_adbaddrinfolist_t		altaddrs;
211	isc_sockaddrlist_t		forwarders;
212	dns_fwdpolicy_t			fwdpolicy;
213	isc_sockaddrlist_t		bad;
214	isc_sockaddrlist_t		edns;
215	isc_sockaddrlist_t		edns512;
216	isc_sockaddrlist_t		bad_edns;
217	dns_validator_t			*validator;
218	ISC_LIST(dns_validator_t)       validators;
219	dns_db_t *			cache;
220	dns_adb_t *			adb;
221	isc_boolean_t			ns_ttl_ok;
222	isc_uint32_t			ns_ttl;
223
224	/*%
225	 * The number of events we're waiting for.
226	 */
227	unsigned int			pending;
228
229	/*%
230	 * The number of times we've "restarted" the current
231	 * nameserver set.  This acts as a failsafe to prevent
232	 * us from pounding constantly on a particular set of
233	 * servers that, for whatever reason, are not giving
234	 * us useful responses, but are responding in such a
235	 * way that they are not marked "bad".
236	 */
237	unsigned int			restarts;
238
239	/*%
240	 * The number of timeouts that have occurred since we
241	 * last successfully received a response packet.  This
242	 * is used for EDNS0 black hole detection.
243	 */
244	unsigned int			timeouts;
245
246	/*%
247	 * Look aside state for DS lookups.
248	 */
249	dns_name_t 			nsname;
250	dns_fetch_t *			nsfetch;
251	dns_rdataset_t			nsrrset;
252
253	/*%
254	 * Number of queries that reference this context.
255	 */
256	unsigned int			nqueries;
257
258	/*%
259	 * The reason to print when logging a successful
260	 * response to a query.
261	 */
262	const char *			reason;
263
264	/*%
265	 * Random numbers to use for mixing up server addresses.
266	 */
267	isc_uint32_t                    rand_buf;
268	isc_uint32_t                    rand_bits;
269
270	/*%
271	 * Fetch-local statistics for detailed logging.
272	 */
273	isc_result_t			result; /*%< fetch result  */
274	isc_result_t			vresult; /*%< validation result  */
275	int				exitline;
276	isc_time_t			start;
277	isc_uint64_t			duration;
278	isc_boolean_t			logged;
279	unsigned int			querysent;
280	unsigned int			referrals;
281	unsigned int			lamecount;
282	unsigned int			neterr;
283	unsigned int			badresp;
284	unsigned int			adberr;
285	unsigned int			findfail;
286	unsigned int			valfail;
287	isc_boolean_t			timeout;
288	dns_adbaddrinfo_t 		*addrinfo;
289	isc_sockaddr_t			*client;
290};
291
292#define FCTX_MAGIC			ISC_MAGIC('F', '!', '!', '!')
293#define VALID_FCTX(fctx)		ISC_MAGIC_VALID(fctx, FCTX_MAGIC)
294
295#define FCTX_ATTR_HAVEANSWER            0x0001
296#define FCTX_ATTR_GLUING                0x0002
297#define FCTX_ATTR_ADDRWAIT              0x0004
298#define FCTX_ATTR_SHUTTINGDOWN          0x0008
299#define FCTX_ATTR_WANTCACHE             0x0010
300#define FCTX_ATTR_WANTNCACHE            0x0020
301#define FCTX_ATTR_NEEDEDNS0             0x0040
302#define FCTX_ATTR_TRIEDFIND             0x0080
303#define FCTX_ATTR_TRIEDALT              0x0100
304
305#define HAVE_ANSWER(f)          (((f)->attributes & FCTX_ATTR_HAVEANSWER) != \
306				 0)
307#define GLUING(f)               (((f)->attributes & FCTX_ATTR_GLUING) != \
308				 0)
309#define ADDRWAIT(f)             (((f)->attributes & FCTX_ATTR_ADDRWAIT) != \
310				 0)
311#define SHUTTINGDOWN(f)         (((f)->attributes & FCTX_ATTR_SHUTTINGDOWN) \
312				 != 0)
313#define WANTCACHE(f)            (((f)->attributes & FCTX_ATTR_WANTCACHE) != 0)
314#define WANTNCACHE(f)           (((f)->attributes & FCTX_ATTR_WANTNCACHE) != 0)
315#define NEEDEDNS0(f)            (((f)->attributes & FCTX_ATTR_NEEDEDNS0) != 0)
316#define TRIEDFIND(f)            (((f)->attributes & FCTX_ATTR_TRIEDFIND) != 0)
317#define TRIEDALT(f)             (((f)->attributes & FCTX_ATTR_TRIEDALT) != 0)
318
319typedef struct {
320	dns_adbaddrinfo_t *		addrinfo;
321	fetchctx_t *			fctx;
322} dns_valarg_t;
323
324struct dns_fetch {
325	unsigned int			magic;
326	fetchctx_t *			private;
327};
328
329#define DNS_FETCH_MAGIC			ISC_MAGIC('F', 't', 'c', 'h')
330#define DNS_FETCH_VALID(fetch)		ISC_MAGIC_VALID(fetch, DNS_FETCH_MAGIC)
331
332typedef struct fctxbucket {
333	isc_task_t *			task;
334	isc_mutex_t			lock;
335	ISC_LIST(fetchctx_t)		fctxs;
336	isc_boolean_t			exiting;
337	isc_mem_t *			mctx;
338} fctxbucket_t;
339
340typedef struct alternate {
341	isc_boolean_t			isaddress;
342	union   {
343		isc_sockaddr_t		addr;
344		struct {
345			dns_name_t      name;
346			in_port_t       port;
347		} _n;
348	} _u;
349	ISC_LINK(struct alternate)      link;
350} alternate_t;
351
352typedef struct dns_badcache dns_badcache_t;
353struct dns_badcache {
354	dns_badcache_t *	next;
355	dns_rdatatype_t 	type;
356	isc_time_t		expire;
357	unsigned int		hashval;
358	dns_name_t		name;
359};
360#define DNS_BADCACHE_SIZE 1021
361#define DNS_BADCACHE_TTL(fctx) \
362	(((fctx)->res->lame_ttl > 30 ) ? (fctx)->res->lame_ttl : 30)
363
364struct dns_resolver {
365	/* Unlocked. */
366	unsigned int			magic;
367	isc_mem_t *			mctx;
368	isc_mutex_t			lock;
369	isc_mutex_t			nlock;
370	isc_mutex_t			primelock;
371	dns_rdataclass_t		rdclass;
372	isc_socketmgr_t *		socketmgr;
373	isc_timermgr_t *		timermgr;
374	isc_taskmgr_t *			taskmgr;
375	dns_view_t *			view;
376	isc_boolean_t			frozen;
377	unsigned int			options;
378	dns_dispatchmgr_t *		dispatchmgr;
379	dns_dispatch_t *		dispatchv4;
380	isc_boolean_t			exclusivev4;
381	dns_dispatch_t *		dispatchv6;
382	isc_boolean_t			exclusivev6;
383	unsigned int			ndisps;
384	unsigned int			nbuckets;
385	fctxbucket_t *			buckets;
386	isc_uint32_t			lame_ttl;
387	ISC_LIST(alternate_t)		alternates;
388	isc_uint16_t			udpsize;
389#if USE_ALGLOCK
390	isc_rwlock_t			alglock;
391#endif
392	dns_rbt_t *			algorithms;
393#if USE_MBSLOCK
394	isc_rwlock_t			mbslock;
395#endif
396	dns_rbt_t *			mustbesecure;
397	unsigned int			spillatmax;
398	unsigned int			spillatmin;
399	isc_timer_t *			spillattimer;
400	isc_boolean_t			zero_no_soa_ttl;
401	unsigned int			query_timeout;
402
403	/* Locked by lock. */
404	unsigned int			references;
405	isc_boolean_t			exiting;
406	isc_eventlist_t			whenshutdown;
407	unsigned int			activebuckets;
408	isc_boolean_t			priming;
409	unsigned int			spillat;	/* clients-per-query */
410	unsigned int			nextdisp;
411
412	/* Bad cache. */
413	dns_badcache_t  ** 		badcache;
414	unsigned int 			badcount;
415	unsigned int 			badhash;
416	unsigned int 			badsweep;
417
418	/* Locked by primelock. */
419	dns_fetch_t *			primefetch;
420	/* Locked by nlock. */
421	unsigned int			nfctx;
422};
423
424#define RES_MAGIC			ISC_MAGIC('R', 'e', 's', '!')
425#define VALID_RESOLVER(res)		ISC_MAGIC_VALID(res, RES_MAGIC)
426
427/*%
428 * Private addrinfo flags.  These must not conflict with DNS_FETCHOPT_NOEDNS0,
429 * which we also use as an addrinfo flag.
430 */
431#define FCTX_ADDRINFO_MARK              0x0001
432#define FCTX_ADDRINFO_FORWARDER         0x1000
433#define FCTX_ADDRINFO_TRIED             0x2000
434#define UNMARKED(a)                     (((a)->flags & FCTX_ADDRINFO_MARK) \
435					 == 0)
436#define ISFORWARDER(a)                  (((a)->flags & \
437					 FCTX_ADDRINFO_FORWARDER) != 0)
438#define TRIED(a)                        (((a)->flags & \
439					 FCTX_ADDRINFO_TRIED) != 0)
440
441#define NXDOMAIN(r) (((r)->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
442#define NEGATIVE(r) (((r)->attributes & DNS_RDATASETATTR_NEGATIVE) != 0)
443
444static void destroy(dns_resolver_t *res);
445static void empty_bucket(dns_resolver_t *res);
446static isc_result_t resquery_send(resquery_t *query);
447static void resquery_response(isc_task_t *task, isc_event_t *event);
448static void resquery_connected(isc_task_t *task, isc_event_t *event);
449static void fctx_try(fetchctx_t *fctx, isc_boolean_t retrying,
450		     isc_boolean_t badcache);
451static void fctx_destroy(fetchctx_t *fctx);
452static isc_boolean_t fctx_unlink(fetchctx_t *fctx);
453static isc_result_t ncache_adderesult(dns_message_t *message,
454				      dns_db_t *cache, dns_dbnode_t *node,
455				      dns_rdatatype_t covers,
456				      isc_stdtime_t now, dns_ttl_t maxttl,
457				      isc_boolean_t optout,
458				      dns_rdataset_t *ardataset,
459				      isc_result_t *eresultp);
460static void validated(isc_task_t *task, isc_event_t *event);
461static isc_boolean_t maybe_destroy(fetchctx_t *fctx, isc_boolean_t locked);
462static void add_bad(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo,
463		    isc_result_t reason, badnstype_t badtype);
464
465/*%
466 * Increment resolver-related statistics counters.
467 */
468static inline void
469inc_stats(dns_resolver_t *res, isc_statscounter_t counter) {
470	if (res->view->resstats != NULL)
471		isc_stats_increment(res->view->resstats, counter);
472}
473
474static isc_result_t
475valcreate(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo, dns_name_t *name,
476	  dns_rdatatype_t type, dns_rdataset_t *rdataset,
477	  dns_rdataset_t *sigrdataset, unsigned int valoptions,
478	  isc_task_t *task)
479{
480	dns_validator_t *validator = NULL;
481	dns_valarg_t *valarg;
482	isc_result_t result;
483
484	valarg = isc_mem_get(fctx->mctx, sizeof(*valarg));
485	if (valarg == NULL)
486		return (ISC_R_NOMEMORY);
487
488	valarg->fctx = fctx;
489	valarg->addrinfo = addrinfo;
490
491	if (!ISC_LIST_EMPTY(fctx->validators))
492		INSIST((valoptions & DNS_VALIDATOR_DEFER) != 0);
493
494	result = dns_validator_create(fctx->res->view, name, type, rdataset,
495				      sigrdataset, fctx->rmessage,
496				      valoptions, task, validated, valarg,
497				      &validator);
498	if (result == ISC_R_SUCCESS) {
499		inc_stats(fctx->res, dns_resstatscounter_val);
500		if ((valoptions & DNS_VALIDATOR_DEFER) == 0) {
501			INSIST(fctx->validator == NULL);
502			fctx->validator = validator;
503		}
504		ISC_LIST_APPEND(fctx->validators, validator, link);
505	} else
506		isc_mem_put(fctx->mctx, valarg, sizeof(*valarg));
507	return (result);
508}
509
510static isc_boolean_t
511rrsig_fromchildzone(fetchctx_t *fctx, dns_rdataset_t *rdataset) {
512	dns_namereln_t namereln;
513	dns_rdata_rrsig_t rrsig;
514	dns_rdata_t rdata = DNS_RDATA_INIT;
515	int order;
516	isc_result_t result;
517	unsigned int labels;
518
519	for (result = dns_rdataset_first(rdataset);
520	     result == ISC_R_SUCCESS;
521	     result = dns_rdataset_next(rdataset)) {
522		dns_rdataset_current(rdataset, &rdata);
523		result = dns_rdata_tostruct(&rdata, &rrsig, NULL);
524		RUNTIME_CHECK(result == ISC_R_SUCCESS);
525		namereln = dns_name_fullcompare(&rrsig.signer, &fctx->domain,
526						&order, &labels);
527		if (namereln == dns_namereln_subdomain)
528			return (ISC_TRUE);
529		dns_rdata_reset(&rdata);
530	}
531	return (ISC_FALSE);
532}
533
534static isc_boolean_t
535fix_mustbedelegationornxdomain(dns_message_t *message, fetchctx_t *fctx) {
536	dns_name_t *name;
537	dns_name_t *domain = &fctx->domain;
538	dns_rdataset_t *rdataset;
539	dns_rdatatype_t type;
540	isc_result_t result;
541	isc_boolean_t keep_auth = ISC_FALSE;
542
543	if (message->rcode == dns_rcode_nxdomain)
544		return (ISC_FALSE);
545
546	/*
547	 * A DS RRset can appear anywhere in a zone, even for a delegation-only
548	 * zone.  So a response to an explicit query for this type should be
549	 * excluded from delegation-only fixup.
550	 *
551	 * SOA, NS, and DNSKEY can only exist at a zone apex, so a postive
552	 * response to a query for these types can never violate the
553	 * delegation-only assumption: if the query name is below a
554	 * zone cut, the response should normally be a referral, which should
555	 * be accepted; if the query name is below a zone cut but the server
556	 * happens to have authority for the zone of the query name, the
557	 * response is a (non-referral) answer.  But this does not violate
558	 * delegation-only because the query name must be in a different zone
559	 * due to the "apex-only" nature of these types.  Note that if the
560	 * remote server happens to have authority for a child zone of a
561	 * delegation-only zone, we may still incorrectly "fix" the response
562	 * with NXDOMAIN for queries for other types.  Unfortunately it's
563	 * generally impossible to differentiate this case from violation of
564	 * the delegation-only assumption.  Once the resolver learns the
565	 * correct zone cut, possibly via a separate query for an "apex-only"
566	 * type, queries for other types will be resolved correctly.
567	 *
568	 * A query for type ANY will be accepted if it hits an exceptional
569	 * type above in the answer section as it should be from a child
570	 * zone.
571	 *
572	 * Also accept answers with RRSIG records from the child zone.
573	 * Direct queries for RRSIG records should not be answered from
574	 * the parent zone.
575	 */
576
577	if (message->counts[DNS_SECTION_ANSWER] != 0 &&
578	    (fctx->type == dns_rdatatype_ns ||
579	     fctx->type == dns_rdatatype_ds ||
580	     fctx->type == dns_rdatatype_soa ||
581	     fctx->type == dns_rdatatype_any ||
582	     fctx->type == dns_rdatatype_rrsig ||
583	     fctx->type == dns_rdatatype_dnskey)) {
584		result = dns_message_firstname(message, DNS_SECTION_ANSWER);
585		while (result == ISC_R_SUCCESS) {
586			name = NULL;
587			dns_message_currentname(message, DNS_SECTION_ANSWER,
588						&name);
589			for (rdataset = ISC_LIST_HEAD(name->list);
590			     rdataset != NULL;
591			     rdataset = ISC_LIST_NEXT(rdataset, link)) {
592				if (!dns_name_equal(name, &fctx->name))
593					continue;
594				type = rdataset->type;
595				/*
596				 * RRsig from child?
597				 */
598				if (type == dns_rdatatype_rrsig &&
599				    rrsig_fromchildzone(fctx, rdataset))
600					return (ISC_FALSE);
601				/*
602				 * Direct query for apex records or DS.
603				 */
604				if (fctx->type == type &&
605				    (type == dns_rdatatype_ds ||
606				     type == dns_rdatatype_ns ||
607				     type == dns_rdatatype_soa ||
608				     type == dns_rdatatype_dnskey))
609					return (ISC_FALSE);
610				/*
611				 * Indirect query for apex records or DS.
612				 */
613				if (fctx->type == dns_rdatatype_any &&
614				    (type == dns_rdatatype_ns ||
615				     type == dns_rdatatype_ds ||
616				     type == dns_rdatatype_soa ||
617				     type == dns_rdatatype_dnskey))
618					return (ISC_FALSE);
619			}
620			result = dns_message_nextname(message,
621						      DNS_SECTION_ANSWER);
622		}
623	}
624
625	/*
626	 * A NODATA response to a DS query?
627	 */
628	if (fctx->type == dns_rdatatype_ds &&
629	    message->counts[DNS_SECTION_ANSWER] == 0)
630		return (ISC_FALSE);
631
632	/* Look for referral or indication of answer from child zone? */
633	if (message->counts[DNS_SECTION_AUTHORITY] == 0)
634		goto munge;
635
636	result = dns_message_firstname(message, DNS_SECTION_AUTHORITY);
637	while (result == ISC_R_SUCCESS) {
638		name = NULL;
639		dns_message_currentname(message, DNS_SECTION_AUTHORITY, &name);
640		for (rdataset = ISC_LIST_HEAD(name->list);
641		     rdataset != NULL;
642		     rdataset = ISC_LIST_NEXT(rdataset, link)) {
643			type = rdataset->type;
644			if (type == dns_rdatatype_soa &&
645			    dns_name_equal(name, domain))
646				keep_auth = ISC_TRUE;
647
648			if (type != dns_rdatatype_ns &&
649			    type != dns_rdatatype_soa &&
650			    type != dns_rdatatype_rrsig)
651				continue;
652
653			if (type == dns_rdatatype_rrsig) {
654				if (rrsig_fromchildzone(fctx, rdataset))
655					return (ISC_FALSE);
656				else
657					continue;
658			}
659
660			/* NS or SOA records. */
661			if (dns_name_equal(name, domain)) {
662				/*
663				 * If a query for ANY causes a negative
664				 * response, we can be sure that this is
665				 * an empty node.  For other type of queries
666				 * we cannot differentiate an empty node
667				 * from a node that just doesn't have that
668				 * type of record.  We only accept the former
669				 * case.
670				 */
671				if (message->counts[DNS_SECTION_ANSWER] == 0 &&
672				    fctx->type == dns_rdatatype_any)
673					return (ISC_FALSE);
674			} else if (dns_name_issubdomain(name, domain)) {
675				/* Referral or answer from child zone. */
676				return (ISC_FALSE);
677			}
678		}
679		result = dns_message_nextname(message, DNS_SECTION_AUTHORITY);
680	}
681
682 munge:
683	message->rcode = dns_rcode_nxdomain;
684	message->counts[DNS_SECTION_ANSWER] = 0;
685	if (!keep_auth)
686		message->counts[DNS_SECTION_AUTHORITY] = 0;
687	message->counts[DNS_SECTION_ADDITIONAL] = 0;
688	return (ISC_TRUE);
689}
690
691static inline isc_result_t
692fctx_starttimer(fetchctx_t *fctx) {
693	/*
694	 * Start the lifetime timer for fctx.
695	 *
696	 * This is also used for stopping the idle timer; in that
697	 * case we must purge events already posted to ensure that
698	 * no further idle events are delivered.
699	 */
700	return (isc_timer_reset(fctx->timer, isc_timertype_once,
701				&fctx->expires, NULL, ISC_TRUE));
702}
703
704static inline void
705fctx_stoptimer(fetchctx_t *fctx) {
706	isc_result_t result;
707
708	/*
709	 * We don't return a result if resetting the timer to inactive fails
710	 * since there's nothing to be done about it.  Resetting to inactive
711	 * should never fail anyway, since the code as currently written
712	 * cannot fail in that case.
713	 */
714	result = isc_timer_reset(fctx->timer, isc_timertype_inactive,
715				  NULL, NULL, ISC_TRUE);
716	if (result != ISC_R_SUCCESS) {
717		UNEXPECTED_ERROR(__FILE__, __LINE__,
718				 "isc_timer_reset(): %s",
719				 isc_result_totext(result));
720	}
721}
722
723
724static inline isc_result_t
725fctx_startidletimer(fetchctx_t *fctx, isc_interval_t *interval) {
726	/*
727	 * Start the idle timer for fctx.  The lifetime timer continues
728	 * to be in effect.
729	 */
730	return (isc_timer_reset(fctx->timer, isc_timertype_once,
731				&fctx->expires, interval, ISC_FALSE));
732}
733
734/*
735 * Stopping the idle timer is equivalent to calling fctx_starttimer(), but
736 * we use fctx_stopidletimer for readability in the code below.
737 */
738#define fctx_stopidletimer      fctx_starttimer
739
740
741static inline void
742resquery_destroy(resquery_t **queryp) {
743	resquery_t *query;
744
745	REQUIRE(queryp != NULL);
746	query = *queryp;
747	REQUIRE(!ISC_LINK_LINKED(query, link));
748
749	INSIST(query->tcpsocket == NULL);
750
751	query->fctx->nqueries--;
752	if (SHUTTINGDOWN(query->fctx)) {
753		dns_resolver_t *res = query->fctx->res;
754		if (maybe_destroy(query->fctx, ISC_FALSE))
755			empty_bucket(res);
756	}
757	query->magic = 0;
758	isc_mem_put(query->mctx, query, sizeof(*query));
759	*queryp = NULL;
760}
761
762static void
763fctx_cancelquery(resquery_t **queryp, dns_dispatchevent_t **deventp,
764		 isc_time_t *finish, isc_boolean_t no_response)
765{
766	fetchctx_t *fctx;
767	resquery_t *query;
768	unsigned int rtt, rttms;
769	unsigned int factor;
770	dns_adbfind_t *find;
771	dns_adbaddrinfo_t *addrinfo;
772	isc_socket_t *socket;
773
774	query = *queryp;
775	fctx = query->fctx;
776
777	FCTXTRACE("cancelquery");
778
779	REQUIRE(!RESQUERY_CANCELED(query));
780
781	query->attributes |= RESQUERY_ATTR_CANCELED;
782
783	/*
784	 * Should we update the RTT?
785	 */
786	if (finish != NULL || no_response) {
787		if (finish != NULL) {
788			/*
789			 * We have both the start and finish times for this
790			 * packet, so we can compute a real RTT.
791			 */
792			rtt = (unsigned int)isc_time_microdiff(finish,
793							       &query->start);
794			factor = DNS_ADB_RTTADJDEFAULT;
795
796			rttms = rtt / 1000;
797			if (rttms < DNS_RESOLVER_QRYRTTCLASS0) {
798				inc_stats(fctx->res,
799					  dns_resstatscounter_queryrtt0);
800			} else if (rttms < DNS_RESOLVER_QRYRTTCLASS1) {
801				inc_stats(fctx->res,
802					  dns_resstatscounter_queryrtt1);
803			} else if (rttms < DNS_RESOLVER_QRYRTTCLASS2) {
804				inc_stats(fctx->res,
805					  dns_resstatscounter_queryrtt2);
806			} else if (rttms < DNS_RESOLVER_QRYRTTCLASS3) {
807				inc_stats(fctx->res,
808					  dns_resstatscounter_queryrtt3);
809			} else if (rttms < DNS_RESOLVER_QRYRTTCLASS4) {
810				inc_stats(fctx->res,
811					  dns_resstatscounter_queryrtt4);
812			} else {
813				inc_stats(fctx->res,
814					  dns_resstatscounter_queryrtt5);
815			}
816		} else {
817			/*
818			 * We don't have an RTT for this query.  Maybe the
819			 * packet was lost, or maybe this server is very
820			 * slow.  We don't know.  Increase the RTT.
821			 */
822			INSIST(no_response);
823			rtt = query->addrinfo->srtt + 200000;
824			if (rtt > 10000000)
825				rtt = 10000000;
826			/*
827			 * Replace the current RTT with our value.
828			 */
829			factor = DNS_ADB_RTTADJREPLACE;
830		}
831		dns_adb_adjustsrtt(fctx->adb, query->addrinfo, rtt, factor);
832	}
833
834	/* Remember that the server has been tried. */
835	if (!TRIED(query->addrinfo)) {
836		dns_adb_changeflags(fctx->adb, query->addrinfo,
837				    FCTX_ADDRINFO_TRIED, FCTX_ADDRINFO_TRIED);
838	}
839
840	/*
841	 * Age RTTs of servers not tried.
842	 */
843	factor = DNS_ADB_RTTADJAGE;
844	if (finish != NULL)
845		for (addrinfo = ISC_LIST_HEAD(fctx->forwaddrs);
846		     addrinfo != NULL;
847		     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
848			if (UNMARKED(addrinfo))
849				dns_adb_adjustsrtt(fctx->adb, addrinfo,
850						   0, factor);
851
852	if (finish != NULL && TRIEDFIND(fctx))
853		for (find = ISC_LIST_HEAD(fctx->finds);
854		     find != NULL;
855		     find = ISC_LIST_NEXT(find, publink))
856			for (addrinfo = ISC_LIST_HEAD(find->list);
857			     addrinfo != NULL;
858			     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
859				if (UNMARKED(addrinfo))
860					dns_adb_adjustsrtt(fctx->adb, addrinfo,
861							   0, factor);
862
863	if (finish != NULL && TRIEDALT(fctx)) {
864		for (addrinfo = ISC_LIST_HEAD(fctx->altaddrs);
865		     addrinfo != NULL;
866		     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
867			if (UNMARKED(addrinfo))
868				dns_adb_adjustsrtt(fctx->adb, addrinfo,
869						   0, factor);
870		for (find = ISC_LIST_HEAD(fctx->altfinds);
871		     find != NULL;
872		     find = ISC_LIST_NEXT(find, publink))
873			for (addrinfo = ISC_LIST_HEAD(find->list);
874			     addrinfo != NULL;
875			     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
876				if (UNMARKED(addrinfo))
877					dns_adb_adjustsrtt(fctx->adb, addrinfo,
878							   0, factor);
879	}
880
881	/*
882	 * Check for any outstanding socket events.  If they exist, cancel
883	 * them and let the event handlers finish the cleanup.  The resolver
884	 * only needs to worry about managing the connect and send events;
885	 * the dispatcher manages the recv events.
886	 */
887	if (RESQUERY_CONNECTING(query)) {
888		/*
889		 * Cancel the connect.
890		 */
891		if (query->tcpsocket != NULL) {
892			isc_socket_cancel(query->tcpsocket, NULL,
893					  ISC_SOCKCANCEL_CONNECT);
894		} else if (query->dispentry != NULL) {
895			INSIST(query->exclusivesocket);
896			socket = dns_dispatch_getentrysocket(query->dispentry);
897			if (socket != NULL)
898				isc_socket_cancel(socket, NULL,
899						  ISC_SOCKCANCEL_CONNECT);
900		}
901	} else if (RESQUERY_SENDING(query)) {
902		/*
903		 * Cancel the pending send.
904		 */
905		if (query->exclusivesocket && query->dispentry != NULL)
906			socket = dns_dispatch_getentrysocket(query->dispentry);
907		else
908			socket = dns_dispatch_getsocket(query->dispatch);
909		if (socket != NULL)
910			isc_socket_cancel(socket, NULL, ISC_SOCKCANCEL_SEND);
911	}
912
913	if (query->dispentry != NULL)
914		dns_dispatch_removeresponse(&query->dispentry, deventp);
915
916	ISC_LIST_UNLINK(fctx->queries, query, link);
917
918	if (query->tsig != NULL)
919		isc_buffer_free(&query->tsig);
920
921	if (query->tsigkey != NULL)
922		dns_tsigkey_detach(&query->tsigkey);
923
924	if (query->dispatch != NULL)
925		dns_dispatch_detach(&query->dispatch);
926
927	if (! (RESQUERY_CONNECTING(query) || RESQUERY_SENDING(query)))
928		/*
929		 * It's safe to destroy the query now.
930		 */
931		resquery_destroy(&query);
932}
933
934static void
935fctx_cancelqueries(fetchctx_t *fctx, isc_boolean_t no_response) {
936	resquery_t *query, *next_query;
937
938	FCTXTRACE("cancelqueries");
939
940	for (query = ISC_LIST_HEAD(fctx->queries);
941	     query != NULL;
942	     query = next_query) {
943		next_query = ISC_LIST_NEXT(query, link);
944		fctx_cancelquery(&query, NULL, NULL, no_response);
945	}
946}
947
948static void
949fctx_cleanupfinds(fetchctx_t *fctx) {
950	dns_adbfind_t *find, *next_find;
951
952	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
953
954	for (find = ISC_LIST_HEAD(fctx->finds);
955	     find != NULL;
956	     find = next_find) {
957		next_find = ISC_LIST_NEXT(find, publink);
958		ISC_LIST_UNLINK(fctx->finds, find, publink);
959		dns_adb_destroyfind(&find);
960	}
961	fctx->find = NULL;
962}
963
964static void
965fctx_cleanupaltfinds(fetchctx_t *fctx) {
966	dns_adbfind_t *find, *next_find;
967
968	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
969
970	for (find = ISC_LIST_HEAD(fctx->altfinds);
971	     find != NULL;
972	     find = next_find) {
973		next_find = ISC_LIST_NEXT(find, publink);
974		ISC_LIST_UNLINK(fctx->altfinds, find, publink);
975		dns_adb_destroyfind(&find);
976	}
977	fctx->altfind = NULL;
978}
979
980static void
981fctx_cleanupforwaddrs(fetchctx_t *fctx) {
982	dns_adbaddrinfo_t *addr, *next_addr;
983
984	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
985
986	for (addr = ISC_LIST_HEAD(fctx->forwaddrs);
987	     addr != NULL;
988	     addr = next_addr) {
989		next_addr = ISC_LIST_NEXT(addr, publink);
990		ISC_LIST_UNLINK(fctx->forwaddrs, addr, publink);
991		dns_adb_freeaddrinfo(fctx->adb, &addr);
992	}
993}
994
995static void
996fctx_cleanupaltaddrs(fetchctx_t *fctx) {
997	dns_adbaddrinfo_t *addr, *next_addr;
998
999	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
1000
1001	for (addr = ISC_LIST_HEAD(fctx->altaddrs);
1002	     addr != NULL;
1003	     addr = next_addr) {
1004		next_addr = ISC_LIST_NEXT(addr, publink);
1005		ISC_LIST_UNLINK(fctx->altaddrs, addr, publink);
1006		dns_adb_freeaddrinfo(fctx->adb, &addr);
1007	}
1008}
1009
1010static inline void
1011fctx_stopeverything(fetchctx_t *fctx, isc_boolean_t no_response) {
1012	FCTXTRACE("stopeverything");
1013	fctx_cancelqueries(fctx, no_response);
1014	fctx_cleanupfinds(fctx);
1015	fctx_cleanupaltfinds(fctx);
1016	fctx_cleanupforwaddrs(fctx);
1017	fctx_cleanupaltaddrs(fctx);
1018	fctx_stoptimer(fctx);
1019}
1020
1021static inline void
1022fctx_sendevents(fetchctx_t *fctx, isc_result_t result, int line) {
1023	dns_fetchevent_t *event, *next_event;
1024	isc_task_t *task;
1025	unsigned int count = 0;
1026	isc_interval_t i;
1027	isc_boolean_t logit = ISC_FALSE;
1028	isc_time_t now;
1029	unsigned int old_spillat;
1030	unsigned int new_spillat = 0;	/* initialized to silence
1031					   compiler warnings */
1032
1033	/*
1034	 * Caller must be holding the appropriate bucket lock.
1035	 */
1036	REQUIRE(fctx->state == fetchstate_done);
1037
1038	FCTXTRACE("sendevents");
1039
1040	/*
1041	 * Keep some record of fetch result for logging later (if required).
1042	 */
1043	fctx->result = result;
1044	fctx->exitline = line;
1045	TIME_NOW(&now);
1046	fctx->duration = isc_time_microdiff(&now, &fctx->start);
1047
1048	for (event = ISC_LIST_HEAD(fctx->events);
1049	     event != NULL;
1050	     event = next_event) {
1051		next_event = ISC_LIST_NEXT(event, ev_link);
1052		ISC_LIST_UNLINK(fctx->events, event, ev_link);
1053		task = event->ev_sender;
1054		event->ev_sender = fctx;
1055		event->vresult = fctx->vresult;
1056		if (!HAVE_ANSWER(fctx))
1057			event->result = result;
1058
1059		INSIST(result != ISC_R_SUCCESS ||
1060		       dns_rdataset_isassociated(event->rdataset) ||
1061		       fctx->type == dns_rdatatype_any ||
1062		       fctx->type == dns_rdatatype_rrsig ||
1063		       fctx->type == dns_rdatatype_sig);
1064
1065		/*
1066		 * Negative results must be indicated in event->result.
1067		 */
1068		if (dns_rdataset_isassociated(event->rdataset) &&
1069		    NEGATIVE(event->rdataset)) {
1070			INSIST(event->result == DNS_R_NCACHENXDOMAIN ||
1071			       event->result == DNS_R_NCACHENXRRSET);
1072		}
1073
1074		isc_task_sendanddetach(&task, ISC_EVENT_PTR(&event));
1075		count++;
1076	}
1077
1078	if ((fctx->attributes & FCTX_ATTR_HAVEANSWER) != 0 &&
1079	    fctx->spilled &&
1080	    (count < fctx->res->spillatmax || fctx->res->spillatmax == 0)) {
1081		LOCK(&fctx->res->lock);
1082		if (count == fctx->res->spillat && !fctx->res->exiting) {
1083			old_spillat = fctx->res->spillat;
1084			fctx->res->spillat += 5;
1085			if (fctx->res->spillat > fctx->res->spillatmax &&
1086			    fctx->res->spillatmax != 0)
1087				fctx->res->spillat = fctx->res->spillatmax;
1088			new_spillat = fctx->res->spillat;
1089			if (new_spillat != old_spillat) {
1090				logit = ISC_TRUE;
1091			}
1092			isc_interval_set(&i, 20 * 60, 0);
1093			result = isc_timer_reset(fctx->res->spillattimer,
1094						 isc_timertype_ticker, NULL,
1095						 &i, ISC_TRUE);
1096			RUNTIME_CHECK(result == ISC_R_SUCCESS);
1097		}
1098		UNLOCK(&fctx->res->lock);
1099		if (logit)
1100			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
1101				      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
1102				      "clients-per-query increased to %u",
1103				      new_spillat);
1104	}
1105}
1106
1107static inline void
1108log_edns(fetchctx_t *fctx) {
1109	char domainbuf[DNS_NAME_FORMATSIZE];
1110
1111	if (fctx->reason == NULL)
1112		return;
1113
1114	dns_name_format(&fctx->domain, domainbuf, sizeof(domainbuf));
1115	isc_log_write(dns_lctx, DNS_LOGCATEGORY_EDNS_DISABLED,
1116		      DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
1117		      "success resolving '%s' (in '%s'?) after %s",
1118		      fctx->info, domainbuf, fctx->reason);
1119
1120	fctx->reason = NULL;
1121}
1122
1123static void
1124fctx_done(fetchctx_t *fctx, isc_result_t result, int line) {
1125	dns_resolver_t *res;
1126	isc_boolean_t no_response;
1127
1128	REQUIRE(line >= 0);
1129
1130	FCTXTRACE("done");
1131
1132	res = fctx->res;
1133
1134	if (result == ISC_R_SUCCESS) {
1135		/*%
1136		 * Log any deferred EDNS timeout messages.
1137		 */
1138		log_edns(fctx);
1139		no_response = ISC_TRUE;
1140	 } else
1141		no_response = ISC_FALSE;
1142
1143	fctx->reason = NULL;
1144	fctx_stopeverything(fctx, no_response);
1145
1146	LOCK(&res->buckets[fctx->bucketnum].lock);
1147
1148	fctx->state = fetchstate_done;
1149	fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
1150	fctx_sendevents(fctx, result, line);
1151
1152	UNLOCK(&res->buckets[fctx->bucketnum].lock);
1153}
1154
1155static void
1156process_sendevent(resquery_t *query, isc_event_t *event) {
1157	isc_socketevent_t *sevent = (isc_socketevent_t *)event;
1158	isc_boolean_t retry = ISC_FALSE;
1159	isc_result_t result;
1160	fetchctx_t *fctx;
1161
1162	fctx = query->fctx;
1163
1164	if (RESQUERY_CANCELED(query)) {
1165		if (query->sends == 0 && query->connects == 0) {
1166			/*
1167			 * This query was canceled while the
1168			 * isc_socket_sendto/connect() was in progress.
1169			 */
1170			if (query->tcpsocket != NULL)
1171				isc_socket_detach(&query->tcpsocket);
1172			resquery_destroy(&query);
1173		}
1174	} else {
1175		switch (sevent->result) {
1176		case ISC_R_SUCCESS:
1177			break;
1178
1179		case ISC_R_HOSTUNREACH:
1180		case ISC_R_NETUNREACH:
1181		case ISC_R_NOPERM:
1182		case ISC_R_ADDRNOTAVAIL:
1183		case ISC_R_CONNREFUSED:
1184
1185			/*
1186			 * No route to remote.
1187			 */
1188			add_bad(fctx, query->addrinfo, sevent->result,
1189				badns_unreachable);
1190			fctx_cancelquery(&query, NULL, NULL, ISC_TRUE);
1191			retry = ISC_TRUE;
1192			break;
1193
1194		default:
1195			fctx_cancelquery(&query, NULL, NULL, ISC_FALSE);
1196			break;
1197		}
1198	}
1199
1200	isc_event_free(&event);
1201
1202	if (retry) {
1203		/*
1204		 * Behave as if the idle timer has expired.  For TCP
1205		 * this may not actually reflect the latest timer.
1206		 */
1207		fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
1208		result = fctx_stopidletimer(fctx);
1209		if (result != ISC_R_SUCCESS)
1210			fctx_done(fctx, result, __LINE__);
1211		else
1212			fctx_try(fctx, ISC_TRUE, ISC_FALSE);
1213	}
1214}
1215
1216static void
1217resquery_udpconnected(isc_task_t *task, isc_event_t *event) {
1218	resquery_t *query = event->ev_arg;
1219
1220	REQUIRE(event->ev_type == ISC_SOCKEVENT_CONNECT);
1221
1222	QTRACE("udpconnected");
1223
1224	UNUSED(task);
1225
1226	INSIST(RESQUERY_CONNECTING(query));
1227
1228	query->connects--;
1229
1230	process_sendevent(query, event);
1231}
1232
1233static void
1234resquery_senddone(isc_task_t *task, isc_event_t *event) {
1235	resquery_t *query = event->ev_arg;
1236
1237	REQUIRE(event->ev_type == ISC_SOCKEVENT_SENDDONE);
1238
1239	QTRACE("senddone");
1240
1241	/*
1242	 * XXXRTH
1243	 *
1244	 * Currently we don't wait for the senddone event before retrying
1245	 * a query.  This means that if we get really behind, we may end
1246	 * up doing extra work!
1247	 */
1248
1249	UNUSED(task);
1250
1251	INSIST(RESQUERY_SENDING(query));
1252
1253	query->sends--;
1254
1255	process_sendevent(query, event);
1256}
1257
1258static inline isc_result_t
1259fctx_addopt(dns_message_t *message, unsigned int version,
1260	    isc_uint16_t udpsize, isc_boolean_t request_nsid)
1261{
1262	dns_rdataset_t *rdataset;
1263	dns_rdatalist_t *rdatalist;
1264	dns_rdata_t *rdata;
1265	isc_result_t result;
1266
1267	rdatalist = NULL;
1268	result = dns_message_gettemprdatalist(message, &rdatalist);
1269	if (result != ISC_R_SUCCESS)
1270		return (result);
1271	rdata = NULL;
1272	result = dns_message_gettemprdata(message, &rdata);
1273	if (result != ISC_R_SUCCESS)
1274		return (result);
1275	rdataset = NULL;
1276	result = dns_message_gettemprdataset(message, &rdataset);
1277	if (result != ISC_R_SUCCESS)
1278		return (result);
1279	dns_rdataset_init(rdataset);
1280
1281	rdatalist->type = dns_rdatatype_opt;
1282	rdatalist->covers = 0;
1283
1284	/*
1285	 * Set Maximum UDP buffer size.
1286	 */
1287	rdatalist->rdclass = udpsize;
1288
1289	/*
1290	 * Set EXTENDED-RCODE and Z to 0, DO to 1.
1291	 */
1292	rdatalist->ttl = (version << 16);
1293	rdatalist->ttl |= DNS_MESSAGEEXTFLAG_DO;
1294
1295	/*
1296	 * Set EDNS options if applicable
1297	 */
1298	if (request_nsid) {
1299		/* Send empty NSID option (RFC5001) */
1300		unsigned char data[4];
1301		isc_buffer_t buf;
1302
1303		isc_buffer_init(&buf, data, sizeof(data));
1304		isc_buffer_putuint16(&buf, DNS_OPT_NSID);
1305		isc_buffer_putuint16(&buf, 0);
1306		rdata->data = data;
1307		rdata->length = sizeof(data);
1308	} else {
1309		rdata->data = NULL;
1310		rdata->length = 0;
1311	}
1312
1313	rdata->rdclass = rdatalist->rdclass;
1314	rdata->type = rdatalist->type;
1315	rdata->flags = 0;
1316
1317	ISC_LIST_INIT(rdatalist->rdata);
1318	ISC_LIST_APPEND(rdatalist->rdata, rdata, link);
1319	RUNTIME_CHECK(dns_rdatalist_tordataset(rdatalist, rdataset) == ISC_R_SUCCESS);
1320
1321	return (dns_message_setopt(message, rdataset));
1322}
1323
1324static inline void
1325fctx_setretryinterval(fetchctx_t *fctx, unsigned int rtt) {
1326	unsigned int seconds;
1327	unsigned int us;
1328
1329	/*
1330	 * We retry every .8 seconds the first two times through the address
1331	 * list, and then we do exponential back-off.
1332	 */
1333	if (fctx->restarts < 3)
1334		us = 800000;
1335	else
1336		us = (800000 << (fctx->restarts - 2));
1337
1338	/*
1339	 * Double the round-trip time.
1340	 */
1341	rtt *= 2;
1342
1343	/*
1344	 * Always wait for at least the doubled round-trip time.
1345	 */
1346	if (us < rtt)
1347		us = rtt;
1348
1349	/*
1350	 * But don't ever wait for more than 10 seconds.
1351	 */
1352	if (us > 10000000)
1353		us = 10000000;
1354
1355	seconds = us / 1000000;
1356	us -= seconds * 1000000;
1357	isc_interval_set(&fctx->interval, seconds, us * 1000);
1358}
1359
1360static isc_result_t
1361fctx_query(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo,
1362	   unsigned int options)
1363{
1364	dns_resolver_t *res;
1365	isc_task_t *task;
1366	isc_result_t result;
1367	resquery_t *query;
1368	isc_sockaddr_t addr;
1369	isc_boolean_t have_addr = ISC_FALSE;
1370	unsigned int srtt;
1371
1372	FCTXTRACE("query");
1373
1374	res = fctx->res;
1375	task = res->buckets[fctx->bucketnum].task;
1376
1377	srtt = addrinfo->srtt;
1378	if (ISFORWARDER(addrinfo) && srtt < 1000000)
1379		srtt = 1000000;
1380
1381	fctx_setretryinterval(fctx, srtt);
1382	result = fctx_startidletimer(fctx, &fctx->interval);
1383	if (result != ISC_R_SUCCESS)
1384		return (result);
1385
1386	INSIST(ISC_LIST_EMPTY(fctx->validators));
1387
1388	dns_message_reset(fctx->rmessage, DNS_MESSAGE_INTENTPARSE);
1389
1390	query = isc_mem_get(fctx->mctx, sizeof(*query));
1391	if (query == NULL) {
1392		result = ISC_R_NOMEMORY;
1393		goto stop_idle_timer;
1394	}
1395	query->mctx = fctx->mctx;
1396	query->options = options;
1397	query->attributes = 0;
1398	query->sends = 0;
1399	query->connects = 0;
1400	/*
1401	 * Note that the caller MUST guarantee that 'addrinfo' will remain
1402	 * valid until this query is canceled.
1403	 */
1404	query->addrinfo = addrinfo;
1405	TIME_NOW(&query->start);
1406
1407	/*
1408	 * If this is a TCP query, then we need to make a socket and
1409	 * a dispatch for it here.  Otherwise we use the resolver's
1410	 * shared dispatch.
1411	 */
1412	query->dispatchmgr = res->dispatchmgr;
1413	query->dispatch = NULL;
1414	query->exclusivesocket = ISC_FALSE;
1415	query->tcpsocket = NULL;
1416	if (res->view->peers != NULL) {
1417		dns_peer_t *peer = NULL;
1418		isc_netaddr_t dstip;
1419		isc_netaddr_fromsockaddr(&dstip, &addrinfo->sockaddr);
1420		result = dns_peerlist_peerbyaddr(res->view->peers,
1421						 &dstip, &peer);
1422		if (result == ISC_R_SUCCESS) {
1423			result = dns_peer_getquerysource(peer, &addr);
1424			if (result == ISC_R_SUCCESS)
1425				have_addr = ISC_TRUE;
1426		}
1427	}
1428
1429	if ((query->options & DNS_FETCHOPT_TCP) != 0) {
1430		int pf;
1431
1432		pf = isc_sockaddr_pf(&addrinfo->sockaddr);
1433		if (!have_addr) {
1434			switch (pf) {
1435			case PF_INET:
1436				result =
1437				  dns_dispatch_getlocaladdress(res->dispatchv4,
1438							       &addr);
1439				break;
1440			case PF_INET6:
1441				result =
1442				  dns_dispatch_getlocaladdress(res->dispatchv6,
1443							       &addr);
1444				break;
1445			default:
1446				result = ISC_R_NOTIMPLEMENTED;
1447				break;
1448			}
1449			if (result != ISC_R_SUCCESS)
1450				goto cleanup_query;
1451		}
1452		isc_sockaddr_setport(&addr, 0);
1453
1454		result = isc_socket_create(res->socketmgr, pf,
1455					   isc_sockettype_tcp,
1456					   &query->tcpsocket);
1457		if (result != ISC_R_SUCCESS)
1458			goto cleanup_query;
1459
1460#ifndef BROKEN_TCP_BIND_BEFORE_CONNECT
1461		result = isc_socket_bind(query->tcpsocket, &addr, 0);
1462		if (result != ISC_R_SUCCESS)
1463			goto cleanup_socket;
1464#endif
1465
1466		/*
1467		 * A dispatch will be created once the connect succeeds.
1468		 */
1469	} else {
1470		if (have_addr) {
1471			unsigned int attrs, attrmask;
1472			attrs = DNS_DISPATCHATTR_UDP;
1473			switch (isc_sockaddr_pf(&addr)) {
1474			case AF_INET:
1475				attrs |= DNS_DISPATCHATTR_IPV4;
1476				break;
1477			case AF_INET6:
1478				attrs |= DNS_DISPATCHATTR_IPV6;
1479				break;
1480			default:
1481				result = ISC_R_NOTIMPLEMENTED;
1482				goto cleanup_query;
1483			}
1484			attrmask = DNS_DISPATCHATTR_UDP;
1485			attrmask |= DNS_DISPATCHATTR_TCP;
1486			attrmask |= DNS_DISPATCHATTR_IPV4;
1487			attrmask |= DNS_DISPATCHATTR_IPV6;
1488			result = dns_dispatch_getudp(res->dispatchmgr,
1489						     res->socketmgr,
1490						     res->taskmgr, &addr,
1491						     4096, 1000, 32768, 16411,
1492						     16433, attrs, attrmask,
1493						     &query->dispatch);
1494			if (result != ISC_R_SUCCESS)
1495				goto cleanup_query;
1496		} else {
1497			switch (isc_sockaddr_pf(&addrinfo->sockaddr)) {
1498			case PF_INET:
1499				dns_dispatch_attach(res->dispatchv4,
1500						    &query->dispatch);
1501				query->exclusivesocket = res->exclusivev4;
1502				break;
1503			case PF_INET6:
1504				dns_dispatch_attach(res->dispatchv6,
1505						    &query->dispatch);
1506				query->exclusivesocket = res->exclusivev6;
1507				break;
1508			default:
1509				result = ISC_R_NOTIMPLEMENTED;
1510				goto cleanup_query;
1511			}
1512		}
1513		/*
1514		 * We should always have a valid dispatcher here.  If we
1515		 * don't support a protocol family, then its dispatcher
1516		 * will be NULL, but we shouldn't be finding addresses for
1517		 * protocol types we don't support, so the dispatcher
1518		 * we found should never be NULL.
1519		 */
1520		INSIST(query->dispatch != NULL);
1521	}
1522
1523	query->dispentry = NULL;
1524	query->fctx = fctx;
1525	query->tsig = NULL;
1526	query->tsigkey = NULL;
1527	ISC_LINK_INIT(query, link);
1528	query->magic = QUERY_MAGIC;
1529
1530	if ((query->options & DNS_FETCHOPT_TCP) != 0) {
1531		/*
1532		 * Connect to the remote server.
1533		 *
1534		 * XXXRTH  Should we attach to the socket?
1535		 */
1536		result = isc_socket_connect(query->tcpsocket,
1537					    &addrinfo->sockaddr, task,
1538					    resquery_connected, query);
1539		if (result != ISC_R_SUCCESS)
1540			goto cleanup_socket;
1541		query->connects++;
1542		QTRACE("connecting via TCP");
1543	} else {
1544		result = resquery_send(query);
1545		if (result != ISC_R_SUCCESS)
1546			goto cleanup_dispatch;
1547	}
1548	fctx->querysent++;
1549
1550	ISC_LIST_APPEND(fctx->queries, query, link);
1551	query->fctx->nqueries++;
1552	if (isc_sockaddr_pf(&addrinfo->sockaddr) == PF_INET)
1553		inc_stats(res, dns_resstatscounter_queryv4);
1554	else
1555		inc_stats(res, dns_resstatscounter_queryv6);
1556	if (res->view->resquerystats != NULL)
1557		dns_rdatatypestats_increment(res->view->resquerystats,
1558					     fctx->type);
1559
1560	return (ISC_R_SUCCESS);
1561
1562 cleanup_socket:
1563	isc_socket_detach(&query->tcpsocket);
1564
1565 cleanup_dispatch:
1566	if (query->dispatch != NULL)
1567		dns_dispatch_detach(&query->dispatch);
1568
1569 cleanup_query:
1570	if (query->connects == 0) {
1571		query->magic = 0;
1572		isc_mem_put(fctx->mctx, query, sizeof(*query));
1573	}
1574
1575 stop_idle_timer:
1576	RUNTIME_CHECK(fctx_stopidletimer(fctx) == ISC_R_SUCCESS);
1577
1578	return (result);
1579}
1580
1581static isc_boolean_t
1582bad_edns(fetchctx_t *fctx, isc_sockaddr_t *address) {
1583	isc_sockaddr_t *sa;
1584
1585	for (sa = ISC_LIST_HEAD(fctx->bad_edns);
1586	     sa != NULL;
1587	     sa = ISC_LIST_NEXT(sa, link)) {
1588		if (isc_sockaddr_equal(sa, address))
1589			return (ISC_TRUE);
1590	}
1591
1592	return (ISC_FALSE);
1593}
1594
1595static void
1596add_bad_edns(fetchctx_t *fctx, isc_sockaddr_t *address) {
1597	isc_sockaddr_t *sa;
1598
1599	if (bad_edns(fctx, address))
1600		return;
1601
1602	sa = isc_mem_get(fctx->mctx, sizeof(*sa));
1603	if (sa == NULL)
1604		return;
1605
1606	*sa = *address;
1607	ISC_LIST_INITANDAPPEND(fctx->bad_edns, sa, link);
1608}
1609
1610static isc_boolean_t
1611triededns(fetchctx_t *fctx, isc_sockaddr_t *address) {
1612	isc_sockaddr_t *sa;
1613
1614	for (sa = ISC_LIST_HEAD(fctx->edns);
1615	     sa != NULL;
1616	     sa = ISC_LIST_NEXT(sa, link)) {
1617		if (isc_sockaddr_equal(sa, address))
1618			return (ISC_TRUE);
1619	}
1620
1621	return (ISC_FALSE);
1622}
1623
1624static void
1625add_triededns(fetchctx_t *fctx, isc_sockaddr_t *address) {
1626	isc_sockaddr_t *sa;
1627
1628	if (triededns(fctx, address))
1629		return;
1630
1631	sa = isc_mem_get(fctx->mctx, sizeof(*sa));
1632	if (sa == NULL)
1633		return;
1634
1635	*sa = *address;
1636	ISC_LIST_INITANDAPPEND(fctx->edns, sa, link);
1637}
1638
1639static isc_boolean_t
1640triededns512(fetchctx_t *fctx, isc_sockaddr_t *address) {
1641	isc_sockaddr_t *sa;
1642
1643	for (sa = ISC_LIST_HEAD(fctx->edns512);
1644	     sa != NULL;
1645	     sa = ISC_LIST_NEXT(sa, link)) {
1646		if (isc_sockaddr_equal(sa, address))
1647			return (ISC_TRUE);
1648	}
1649
1650	return (ISC_FALSE);
1651}
1652
1653static void
1654add_triededns512(fetchctx_t *fctx, isc_sockaddr_t *address) {
1655	isc_sockaddr_t *sa;
1656
1657	if (triededns512(fctx, address))
1658		return;
1659
1660	sa = isc_mem_get(fctx->mctx, sizeof(*sa));
1661	if (sa == NULL)
1662		return;
1663
1664	*sa = *address;
1665	ISC_LIST_INITANDAPPEND(fctx->edns512, sa, link);
1666}
1667
1668static isc_result_t
1669resquery_send(resquery_t *query) {
1670	fetchctx_t *fctx;
1671	isc_result_t result;
1672	dns_name_t *qname = NULL;
1673	dns_rdataset_t *qrdataset = NULL;
1674	isc_region_t r;
1675	dns_resolver_t *res;
1676	isc_task_t *task;
1677	isc_socket_t *socket;
1678	isc_buffer_t tcpbuffer;
1679	isc_sockaddr_t *address;
1680	isc_buffer_t *buffer;
1681	isc_netaddr_t ipaddr;
1682	dns_tsigkey_t *tsigkey = NULL;
1683	dns_peer_t *peer = NULL;
1684	isc_boolean_t useedns;
1685	dns_compress_t cctx;
1686	isc_boolean_t cleanup_cctx = ISC_FALSE;
1687	isc_boolean_t secure_domain;
1688	isc_boolean_t connecting = ISC_FALSE;
1689
1690	fctx = query->fctx;
1691	QTRACE("send");
1692
1693	res = fctx->res;
1694	task = res->buckets[fctx->bucketnum].task;
1695	address = NULL;
1696
1697	if ((query->options & DNS_FETCHOPT_TCP) != 0) {
1698		/*
1699		 * Reserve space for the TCP message length.
1700		 */
1701		isc_buffer_init(&tcpbuffer, query->data, sizeof(query->data));
1702		isc_buffer_init(&query->buffer, query->data + 2,
1703				sizeof(query->data) - 2);
1704		buffer = &tcpbuffer;
1705	} else {
1706		isc_buffer_init(&query->buffer, query->data,
1707				sizeof(query->data));
1708		buffer = &query->buffer;
1709	}
1710
1711	result = dns_message_gettempname(fctx->qmessage, &qname);
1712	if (result != ISC_R_SUCCESS)
1713		goto cleanup_temps;
1714	result = dns_message_gettemprdataset(fctx->qmessage, &qrdataset);
1715	if (result != ISC_R_SUCCESS)
1716		goto cleanup_temps;
1717
1718	/*
1719	 * Get a query id from the dispatch.
1720	 */
1721	result = dns_dispatch_addresponse2(query->dispatch,
1722					   &query->addrinfo->sockaddr,
1723					   task,
1724					   resquery_response,
1725					   query,
1726					   &query->id,
1727					   &query->dispentry,
1728					   res->socketmgr);
1729	if (result != ISC_R_SUCCESS)
1730		goto cleanup_temps;
1731
1732	fctx->qmessage->opcode = dns_opcode_query;
1733
1734	/*
1735	 * Set up question.
1736	 */
1737	dns_name_init(qname, NULL);
1738	dns_name_clone(&fctx->name, qname);
1739	dns_rdataset_init(qrdataset);
1740	dns_rdataset_makequestion(qrdataset, res->rdclass, fctx->type);
1741	ISC_LIST_APPEND(qname->list, qrdataset, link);
1742	dns_message_addname(fctx->qmessage, qname, DNS_SECTION_QUESTION);
1743	qname = NULL;
1744	qrdataset = NULL;
1745
1746	/*
1747	 * Set RD if the client has requested that we do a recursive query,
1748	 * or if we're sending to a forwarder.
1749	 */
1750	if ((query->options & DNS_FETCHOPT_RECURSIVE) != 0 ||
1751	    ISFORWARDER(query->addrinfo))
1752		fctx->qmessage->flags |= DNS_MESSAGEFLAG_RD;
1753
1754	/*
1755	 * Set CD if the client says don't validate or the question is
1756	 * under a secure entry point.
1757	 */
1758	if ((query->options & DNS_FETCHOPT_NOVALIDATE) != 0) {
1759		fctx->qmessage->flags |= DNS_MESSAGEFLAG_CD;
1760	} else if (res->view->enablevalidation) {
1761		result = dns_view_issecuredomain(res->view, &fctx->name,
1762						 &secure_domain);
1763		if (result != ISC_R_SUCCESS)
1764			secure_domain = ISC_FALSE;
1765		if (res->view->dlv != NULL)
1766			secure_domain = ISC_TRUE;
1767		if (secure_domain)
1768			fctx->qmessage->flags |= DNS_MESSAGEFLAG_CD;
1769	}
1770
1771	/*
1772	 * We don't have to set opcode because it defaults to query.
1773	 */
1774	fctx->qmessage->id = query->id;
1775
1776	/*
1777	 * Convert the question to wire format.
1778	 */
1779	result = dns_compress_init(&cctx, -1, fctx->res->mctx);
1780	if (result != ISC_R_SUCCESS)
1781		goto cleanup_message;
1782	cleanup_cctx = ISC_TRUE;
1783
1784	result = dns_message_renderbegin(fctx->qmessage, &cctx,
1785					 &query->buffer);
1786	if (result != ISC_R_SUCCESS)
1787		goto cleanup_message;
1788
1789	result = dns_message_rendersection(fctx->qmessage,
1790					   DNS_SECTION_QUESTION, 0);
1791	if (result != ISC_R_SUCCESS)
1792		goto cleanup_message;
1793
1794	peer = NULL;
1795	isc_netaddr_fromsockaddr(&ipaddr, &query->addrinfo->sockaddr);
1796	(void) dns_peerlist_peerbyaddr(fctx->res->view->peers, &ipaddr, &peer);
1797
1798	/*
1799	 * The ADB does not know about servers with "edns no".  Check this,
1800	 * and then inform the ADB for future use.
1801	 */
1802	if ((query->addrinfo->flags & DNS_FETCHOPT_NOEDNS0) == 0 &&
1803	    peer != NULL &&
1804	    dns_peer_getsupportedns(peer, &useedns) == ISC_R_SUCCESS &&
1805	    !useedns)
1806	{
1807		query->options |= DNS_FETCHOPT_NOEDNS0;
1808		dns_adb_changeflags(fctx->adb, query->addrinfo,
1809				    DNS_FETCHOPT_NOEDNS0,
1810				    DNS_FETCHOPT_NOEDNS0);
1811	}
1812
1813	/* Sync NOEDNS0 flag in addrinfo->flags and options now. */
1814	if ((query->addrinfo->flags & DNS_FETCHOPT_NOEDNS0) != 0)
1815		query->options |= DNS_FETCHOPT_NOEDNS0;
1816
1817	/*
1818	 * Handle timeouts by reducing the UDP response size to 512 bytes
1819	 * then if that doesn't work disabling EDNS (includes DO) and CD.
1820	 *
1821	 * These timeout can be due to:
1822	 *	* broken nameservers that don't respond to EDNS queries.
1823	 *	* broken/misconfigured firewalls and NAT implementations
1824	 *	  that don't handle IP fragmentation.
1825	 *	* broken/misconfigured firewalls that don't handle responses
1826	 *	  greater than 512 bytes.
1827	 *	* broken/misconfigured firewalls that don't handle EDNS, DO
1828	 *	  or CD.
1829	 *	* packet loss / link outage.
1830	 */
1831	if (fctx->timeout) {
1832		if ((triededns512(fctx, &query->addrinfo->sockaddr) ||
1833		     fctx->timeouts >= (MAX_EDNS0_TIMEOUTS * 2)) &&
1834		    (query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
1835			query->options |= DNS_FETCHOPT_NOEDNS0;
1836			fctx->reason = "disabling EDNS";
1837		} else if ((triededns(fctx, &query->addrinfo->sockaddr) ||
1838			    fctx->timeouts >= MAX_EDNS0_TIMEOUTS) &&
1839			   (query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
1840			query->options |= DNS_FETCHOPT_EDNS512;
1841			fctx->reason = "reducing the advertised EDNS UDP "
1842				       "packet size to 512 octets";
1843		}
1844		fctx->timeout = ISC_FALSE;
1845	}
1846
1847	/*
1848	 * Use EDNS0, unless the caller doesn't want it, or we know that
1849	 * the remote server doesn't like it.
1850	 */
1851	if ((query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
1852		if ((query->addrinfo->flags & DNS_FETCHOPT_NOEDNS0) == 0) {
1853			unsigned int version = 0;       /* Default version. */
1854			unsigned int flags;
1855			isc_uint16_t udpsize = res->udpsize;
1856			isc_boolean_t reqnsid = res->view->requestnsid;
1857
1858			flags = query->addrinfo->flags;
1859			if ((flags & DNS_FETCHOPT_EDNSVERSIONSET) != 0) {
1860				version = flags & DNS_FETCHOPT_EDNSVERSIONMASK;
1861				version >>= DNS_FETCHOPT_EDNSVERSIONSHIFT;
1862			}
1863			if ((query->options & DNS_FETCHOPT_EDNS512) != 0)
1864				udpsize = 512;
1865			else if (peer != NULL)
1866				(void)dns_peer_getudpsize(peer, &udpsize);
1867
1868			/* request NSID for current view or peer? */
1869			if (peer != NULL)
1870				(void) dns_peer_getrequestnsid(peer, &reqnsid);
1871			result = fctx_addopt(fctx->qmessage, version,
1872					     udpsize, reqnsid);
1873			if (reqnsid && result == ISC_R_SUCCESS) {
1874				query->options |= DNS_FETCHOPT_WANTNSID;
1875			} else if (result != ISC_R_SUCCESS) {
1876				/*
1877				 * We couldn't add the OPT, but we'll press on.
1878				 * We're not using EDNS0, so set the NOEDNS0
1879				 * bit.
1880				 */
1881				query->options |= DNS_FETCHOPT_NOEDNS0;
1882			}
1883		} else {
1884			/*
1885			 * We know this server doesn't like EDNS0, so we
1886			 * won't use it.  Set the NOEDNS0 bit since we're
1887			 * not using EDNS0.
1888			 */
1889			query->options |= DNS_FETCHOPT_NOEDNS0;
1890		}
1891	}
1892
1893	/*
1894	 * If we need EDNS0 to do this query and aren't using it, we lose.
1895	 */
1896	if (NEEDEDNS0(fctx) && (query->options & DNS_FETCHOPT_NOEDNS0) != 0) {
1897		result = DNS_R_SERVFAIL;
1898		goto cleanup_message;
1899	}
1900
1901	if ((query->options & DNS_FETCHOPT_NOEDNS0) == 0)
1902		add_triededns(fctx, &query->addrinfo->sockaddr);
1903
1904	if ((query->options & DNS_FETCHOPT_EDNS512) != 0)
1905		add_triededns512(fctx, &query->addrinfo->sockaddr);
1906
1907	/*
1908	 * Clear CD if EDNS is not in use.
1909	 */
1910	if ((query->options & DNS_FETCHOPT_NOEDNS0) != 0)
1911		fctx->qmessage->flags &= ~DNS_MESSAGEFLAG_CD;
1912
1913	/*
1914	 * Add TSIG record tailored to the current recipient.
1915	 */
1916	result = dns_view_getpeertsig(fctx->res->view, &ipaddr, &tsigkey);
1917	if (result != ISC_R_SUCCESS && result != ISC_R_NOTFOUND)
1918		goto cleanup_message;
1919
1920	if (tsigkey != NULL) {
1921		result = dns_message_settsigkey(fctx->qmessage, tsigkey);
1922		dns_tsigkey_detach(&tsigkey);
1923		if (result != ISC_R_SUCCESS)
1924			goto cleanup_message;
1925	}
1926
1927	result = dns_message_rendersection(fctx->qmessage,
1928					   DNS_SECTION_ADDITIONAL, 0);
1929	if (result != ISC_R_SUCCESS)
1930		goto cleanup_message;
1931
1932	result = dns_message_renderend(fctx->qmessage);
1933	if (result != ISC_R_SUCCESS)
1934		goto cleanup_message;
1935
1936	dns_compress_invalidate(&cctx);
1937	cleanup_cctx = ISC_FALSE;
1938
1939	if (dns_message_gettsigkey(fctx->qmessage) != NULL) {
1940		dns_tsigkey_attach(dns_message_gettsigkey(fctx->qmessage),
1941				   &query->tsigkey);
1942		result = dns_message_getquerytsig(fctx->qmessage,
1943						  fctx->res->mctx,
1944						  &query->tsig);
1945		if (result != ISC_R_SUCCESS)
1946			goto cleanup_message;
1947	}
1948
1949	/*
1950	 * If using TCP, write the length of the message at the beginning
1951	 * of the buffer.
1952	 */
1953	if ((query->options & DNS_FETCHOPT_TCP) != 0) {
1954		isc_buffer_usedregion(&query->buffer, &r);
1955		isc_buffer_putuint16(&tcpbuffer, (isc_uint16_t)r.length);
1956		isc_buffer_add(&tcpbuffer, r.length);
1957	}
1958
1959	/*
1960	 * We're now done with the query message.
1961	 */
1962	dns_message_reset(fctx->qmessage, DNS_MESSAGE_INTENTRENDER);
1963
1964	if (query->exclusivesocket)
1965		socket = dns_dispatch_getentrysocket(query->dispentry);
1966	else
1967		socket = dns_dispatch_getsocket(query->dispatch);
1968	/*
1969	 * Send the query!
1970	 */
1971	if ((query->options & DNS_FETCHOPT_TCP) == 0) {
1972		address = &query->addrinfo->sockaddr;
1973		if (query->exclusivesocket) {
1974			result = isc_socket_connect(socket, address, task,
1975						    resquery_udpconnected,
1976						    query);
1977			if (result != ISC_R_SUCCESS)
1978				goto cleanup_message;
1979			connecting = ISC_TRUE;
1980			query->connects++;
1981		}
1982	}
1983	isc_buffer_usedregion(buffer, &r);
1984
1985	/*
1986	 * XXXRTH  Make sure we don't send to ourselves!  We should probably
1987	 *		prune out these addresses when we get them from the ADB.
1988	 */
1989	result = isc_socket_sendto(socket, &r, task, resquery_senddone,
1990				   query, address, NULL);
1991	if (result != ISC_R_SUCCESS) {
1992		if (connecting) {
1993			/*
1994			 * This query is still connecting.
1995			 * Mark it as canceled so that it will just be
1996			 * cleaned up when the connected event is received.
1997			 * Keep fctx around until the event is processed.
1998			 */
1999			query->fctx->nqueries++;
2000			query->attributes |= RESQUERY_ATTR_CANCELED;
2001		}
2002		goto cleanup_message;
2003	}
2004
2005	query->sends++;
2006
2007	QTRACE("sent");
2008
2009	return (ISC_R_SUCCESS);
2010
2011 cleanup_message:
2012	if (cleanup_cctx)
2013		dns_compress_invalidate(&cctx);
2014
2015	dns_message_reset(fctx->qmessage, DNS_MESSAGE_INTENTRENDER);
2016
2017	/*
2018	 * Stop the dispatcher from listening.
2019	 */
2020	dns_dispatch_removeresponse(&query->dispentry, NULL);
2021
2022 cleanup_temps:
2023	if (qname != NULL)
2024		dns_message_puttempname(fctx->qmessage, &qname);
2025	if (qrdataset != NULL)
2026		dns_message_puttemprdataset(fctx->qmessage, &qrdataset);
2027
2028	return (result);
2029}
2030
2031static void
2032resquery_connected(isc_task_t *task, isc_event_t *event) {
2033	isc_socketevent_t *sevent = (isc_socketevent_t *)event;
2034	resquery_t *query = event->ev_arg;
2035	isc_boolean_t retry = ISC_FALSE;
2036	isc_interval_t interval;
2037	isc_result_t result;
2038	unsigned int attrs;
2039	fetchctx_t *fctx;
2040
2041	REQUIRE(event->ev_type == ISC_SOCKEVENT_CONNECT);
2042	REQUIRE(VALID_QUERY(query));
2043
2044	QTRACE("connected");
2045
2046	UNUSED(task);
2047
2048	/*
2049	 * XXXRTH
2050	 *
2051	 * Currently we don't wait for the connect event before retrying
2052	 * a query.  This means that if we get really behind, we may end
2053	 * up doing extra work!
2054	 */
2055
2056	query->connects--;
2057	fctx = query->fctx;
2058
2059	if (RESQUERY_CANCELED(query)) {
2060		/*
2061		 * This query was canceled while the connect() was in
2062		 * progress.
2063		 */
2064		isc_socket_detach(&query->tcpsocket);
2065		resquery_destroy(&query);
2066	} else {
2067		switch (sevent->result) {
2068		case ISC_R_SUCCESS:
2069
2070			/*
2071			 * Extend the idle timer for TCP.  20 seconds
2072			 * should be long enough for a TCP connection to be
2073			 * established, a single DNS request to be sent,
2074			 * and the response received.
2075			 */
2076			isc_interval_set(&interval, 20, 0);
2077			result = fctx_startidletimer(query->fctx, &interval);
2078			if (result != ISC_R_SUCCESS) {
2079				fctx_cancelquery(&query, NULL, NULL, ISC_FALSE);
2080				fctx_done(fctx, result, __LINE__);
2081				break;
2082			}
2083			/*
2084			 * We are connected.  Create a dispatcher and
2085			 * send the query.
2086			 */
2087			attrs = 0;
2088			attrs |= DNS_DISPATCHATTR_TCP;
2089			attrs |= DNS_DISPATCHATTR_PRIVATE;
2090			attrs |= DNS_DISPATCHATTR_CONNECTED;
2091			if (isc_sockaddr_pf(&query->addrinfo->sockaddr) ==
2092			    AF_INET)
2093				attrs |= DNS_DISPATCHATTR_IPV4;
2094			else
2095				attrs |= DNS_DISPATCHATTR_IPV6;
2096			attrs |= DNS_DISPATCHATTR_MAKEQUERY;
2097
2098			result = dns_dispatch_createtcp(query->dispatchmgr,
2099						     query->tcpsocket,
2100						     query->fctx->res->taskmgr,
2101						     4096, 2, 1, 1, 3, attrs,
2102						     &query->dispatch);
2103
2104			/*
2105			 * Regardless of whether dns_dispatch_create()
2106			 * succeeded or not, we don't need our reference
2107			 * to the socket anymore.
2108			 */
2109			isc_socket_detach(&query->tcpsocket);
2110
2111			if (result == ISC_R_SUCCESS)
2112				result = resquery_send(query);
2113
2114			if (result != ISC_R_SUCCESS) {
2115				fctx_cancelquery(&query, NULL, NULL, ISC_FALSE);
2116				fctx_done(fctx, result, __LINE__);
2117			}
2118			break;
2119
2120		case ISC_R_NETUNREACH:
2121		case ISC_R_HOSTUNREACH:
2122		case ISC_R_CONNREFUSED:
2123		case ISC_R_NOPERM:
2124		case ISC_R_ADDRNOTAVAIL:
2125		case ISC_R_CONNECTIONRESET:
2126			/*
2127			 * No route to remote.
2128			 */
2129			isc_socket_detach(&query->tcpsocket);
2130			fctx_cancelquery(&query, NULL, NULL, ISC_TRUE);
2131			retry = ISC_TRUE;
2132			break;
2133
2134		default:
2135			isc_socket_detach(&query->tcpsocket);
2136			fctx_cancelquery(&query, NULL, NULL, ISC_FALSE);
2137			break;
2138		}
2139	}
2140
2141	isc_event_free(&event);
2142
2143	if (retry) {
2144		/*
2145		 * Behave as if the idle timer has expired.  For TCP
2146		 * connections this may not actually reflect the latest timer.
2147		 */
2148		fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
2149		result = fctx_stopidletimer(fctx);
2150		if (result != ISC_R_SUCCESS)
2151			fctx_done(fctx, result, __LINE__);
2152		else
2153			fctx_try(fctx, ISC_TRUE, ISC_FALSE);
2154	}
2155}
2156
2157static void
2158fctx_finddone(isc_task_t *task, isc_event_t *event) {
2159	fetchctx_t *fctx;
2160	dns_adbfind_t *find;
2161	dns_resolver_t *res;
2162	isc_boolean_t want_try = ISC_FALSE;
2163	isc_boolean_t want_done = ISC_FALSE;
2164	isc_boolean_t bucket_empty = ISC_FALSE;
2165	unsigned int bucketnum;
2166	isc_boolean_t destroy = ISC_FALSE;
2167
2168	find = event->ev_sender;
2169	fctx = event->ev_arg;
2170	REQUIRE(VALID_FCTX(fctx));
2171	res = fctx->res;
2172
2173	UNUSED(task);
2174
2175	FCTXTRACE("finddone");
2176
2177	bucketnum = fctx->bucketnum;
2178	LOCK(&res->buckets[bucketnum].lock);
2179
2180	INSIST(fctx->pending > 0);
2181	fctx->pending--;
2182
2183	if (ADDRWAIT(fctx)) {
2184		/*
2185		 * The fetch is waiting for a name to be found.
2186		 */
2187		INSIST(!SHUTTINGDOWN(fctx));
2188		fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
2189		if (event->ev_type == DNS_EVENT_ADBMOREADDRESSES)
2190			want_try = ISC_TRUE;
2191		else {
2192			fctx->findfail++;
2193			if (fctx->pending == 0) {
2194				/*
2195				 * We've got nothing else to wait for and don't
2196				 * know the answer.  There's nothing to do but
2197				 * fail the fctx.
2198				 */
2199				want_done = ISC_TRUE;
2200			}
2201		}
2202	} else if (SHUTTINGDOWN(fctx) && fctx->pending == 0 &&
2203		   fctx->nqueries == 0 && ISC_LIST_EMPTY(fctx->validators)) {
2204
2205		if (fctx->references == 0) {
2206			bucket_empty = fctx_unlink(fctx);
2207			destroy = ISC_TRUE;
2208		}
2209	}
2210	UNLOCK(&res->buckets[bucketnum].lock);
2211
2212	isc_event_free(&event);
2213	dns_adb_destroyfind(&find);
2214
2215	if (want_try)
2216		fctx_try(fctx, ISC_TRUE, ISC_FALSE);
2217	else if (want_done)
2218		fctx_done(fctx, ISC_R_FAILURE, __LINE__);
2219	else if (destroy) {
2220			fctx_destroy(fctx);
2221		if (bucket_empty)
2222			empty_bucket(res);
2223	}
2224}
2225
2226
2227static inline isc_boolean_t
2228bad_server(fetchctx_t *fctx, isc_sockaddr_t *address) {
2229	isc_sockaddr_t *sa;
2230
2231	for (sa = ISC_LIST_HEAD(fctx->bad);
2232	     sa != NULL;
2233	     sa = ISC_LIST_NEXT(sa, link)) {
2234		if (isc_sockaddr_equal(sa, address))
2235			return (ISC_TRUE);
2236	}
2237
2238	return (ISC_FALSE);
2239}
2240
2241static inline isc_boolean_t
2242mark_bad(fetchctx_t *fctx) {
2243	dns_adbfind_t *curr;
2244	dns_adbaddrinfo_t *addrinfo;
2245	isc_boolean_t all_bad = ISC_TRUE;
2246
2247	/*
2248	 * Mark all known bad servers, so we don't try to talk to them
2249	 * again.
2250	 */
2251
2252	/*
2253	 * Mark any bad nameservers.
2254	 */
2255	for (curr = ISC_LIST_HEAD(fctx->finds);
2256	     curr != NULL;
2257	     curr = ISC_LIST_NEXT(curr, publink)) {
2258		for (addrinfo = ISC_LIST_HEAD(curr->list);
2259		     addrinfo != NULL;
2260		     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2261			if (bad_server(fctx, &addrinfo->sockaddr))
2262				addrinfo->flags |= FCTX_ADDRINFO_MARK;
2263			else
2264				all_bad = ISC_FALSE;
2265		}
2266	}
2267
2268	/*
2269	 * Mark any bad forwarders.
2270	 */
2271	for (addrinfo = ISC_LIST_HEAD(fctx->forwaddrs);
2272	     addrinfo != NULL;
2273	     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2274		if (bad_server(fctx, &addrinfo->sockaddr))
2275			addrinfo->flags |= FCTX_ADDRINFO_MARK;
2276		else
2277			all_bad = ISC_FALSE;
2278	}
2279
2280	/*
2281	 * Mark any bad alternates.
2282	 */
2283	for (curr = ISC_LIST_HEAD(fctx->altfinds);
2284	     curr != NULL;
2285	     curr = ISC_LIST_NEXT(curr, publink)) {
2286		for (addrinfo = ISC_LIST_HEAD(curr->list);
2287		     addrinfo != NULL;
2288		     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2289			if (bad_server(fctx, &addrinfo->sockaddr))
2290				addrinfo->flags |= FCTX_ADDRINFO_MARK;
2291			else
2292				all_bad = ISC_FALSE;
2293		}
2294	}
2295
2296	for (addrinfo = ISC_LIST_HEAD(fctx->altaddrs);
2297	     addrinfo != NULL;
2298	     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2299		if (bad_server(fctx, &addrinfo->sockaddr))
2300			addrinfo->flags |= FCTX_ADDRINFO_MARK;
2301		else
2302			all_bad = ISC_FALSE;
2303	}
2304
2305	return (all_bad);
2306}
2307
2308static void
2309add_bad(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo, isc_result_t reason,
2310	badnstype_t badtype)
2311{
2312	char namebuf[DNS_NAME_FORMATSIZE];
2313	char addrbuf[ISC_SOCKADDR_FORMATSIZE];
2314	char classbuf[64];
2315	char typebuf[64];
2316	char code[64];
2317	isc_buffer_t b;
2318	isc_sockaddr_t *sa;
2319	const char *spc = "";
2320	isc_sockaddr_t *address = &addrinfo->sockaddr;
2321
2322	if (reason == DNS_R_LAME)
2323		fctx->lamecount++;
2324	else {
2325		switch (badtype) {
2326		case badns_unreachable:
2327			fctx->neterr++;
2328			break;
2329		case badns_response:
2330			fctx->badresp++;
2331			break;
2332		case badns_validation:
2333			break;	/* counted as 'valfail' */
2334		}
2335	}
2336
2337	if (bad_server(fctx, address)) {
2338		/*
2339		 * We already know this server is bad.
2340		 */
2341		return;
2342	}
2343
2344	FCTXTRACE("add_bad");
2345
2346	sa = isc_mem_get(fctx->mctx, sizeof(*sa));
2347	if (sa == NULL)
2348		return;
2349	*sa = *address;
2350	ISC_LIST_INITANDAPPEND(fctx->bad, sa, link);
2351
2352	if (reason == DNS_R_LAME)       /* already logged */
2353		return;
2354
2355	if (reason == DNS_R_UNEXPECTEDRCODE &&
2356	    fctx->rmessage->rcode == dns_rcode_servfail &&
2357	    ISFORWARDER(addrinfo))
2358		return;
2359
2360	if (reason == DNS_R_UNEXPECTEDRCODE) {
2361		isc_buffer_init(&b, code, sizeof(code) - 1);
2362		dns_rcode_totext(fctx->rmessage->rcode, &b);
2363		code[isc_buffer_usedlength(&b)] = '\0';
2364		spc = " ";
2365	} else if (reason == DNS_R_UNEXPECTEDOPCODE) {
2366		isc_buffer_init(&b, code, sizeof(code) - 1);
2367		dns_opcode_totext((dns_opcode_t)fctx->rmessage->opcode, &b);
2368		code[isc_buffer_usedlength(&b)] = '\0';
2369		spc = " ";
2370	} else {
2371		code[0] = '\0';
2372	}
2373	dns_name_format(&fctx->name, namebuf, sizeof(namebuf));
2374	dns_rdatatype_format(fctx->type, typebuf, sizeof(typebuf));
2375	dns_rdataclass_format(fctx->res->rdclass, classbuf, sizeof(classbuf));
2376	isc_sockaddr_format(address, addrbuf, sizeof(addrbuf));
2377	isc_log_write(dns_lctx, DNS_LOGCATEGORY_LAME_SERVERS,
2378		      DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
2379		      "error (%s%s%s) resolving '%s/%s/%s': %s",
2380		      dns_result_totext(reason), spc, code,
2381		      namebuf, typebuf, classbuf, addrbuf);
2382}
2383
2384/*
2385 * Sort addrinfo list by RTT.
2386 */
2387static void
2388sort_adbfind(dns_adbfind_t *find) {
2389	dns_adbaddrinfo_t *best, *curr;
2390	dns_adbaddrinfolist_t sorted;
2391
2392	/* Lame N^2 bubble sort. */
2393	ISC_LIST_INIT(sorted);
2394	while (!ISC_LIST_EMPTY(find->list)) {
2395		best = ISC_LIST_HEAD(find->list);
2396		curr = ISC_LIST_NEXT(best, publink);
2397		while (curr != NULL) {
2398			if (curr->srtt < best->srtt)
2399				best = curr;
2400			curr = ISC_LIST_NEXT(curr, publink);
2401		}
2402		ISC_LIST_UNLINK(find->list, best, publink);
2403		ISC_LIST_APPEND(sorted, best, publink);
2404	}
2405	find->list = sorted;
2406}
2407
2408/*
2409 * Sort a list of finds by server RTT.
2410 */
2411static void
2412sort_finds(dns_adbfindlist_t *findlist) {
2413	dns_adbfind_t *best, *curr;
2414	dns_adbfindlist_t sorted;
2415	dns_adbaddrinfo_t *addrinfo, *bestaddrinfo;
2416
2417	/* Sort each find's addrinfo list by SRTT. */
2418	for (curr = ISC_LIST_HEAD(*findlist);
2419	     curr != NULL;
2420	     curr = ISC_LIST_NEXT(curr, publink))
2421		sort_adbfind(curr);
2422
2423	/* Lame N^2 bubble sort. */
2424	ISC_LIST_INIT(sorted);
2425	while (!ISC_LIST_EMPTY(*findlist)) {
2426		best = ISC_LIST_HEAD(*findlist);
2427		bestaddrinfo = ISC_LIST_HEAD(best->list);
2428		INSIST(bestaddrinfo != NULL);
2429		curr = ISC_LIST_NEXT(best, publink);
2430		while (curr != NULL) {
2431			addrinfo = ISC_LIST_HEAD(curr->list);
2432			INSIST(addrinfo != NULL);
2433			if (addrinfo->srtt < bestaddrinfo->srtt) {
2434				best = curr;
2435				bestaddrinfo = addrinfo;
2436			}
2437			curr = ISC_LIST_NEXT(curr, publink);
2438		}
2439		ISC_LIST_UNLINK(*findlist, best, publink);
2440		ISC_LIST_APPEND(sorted, best, publink);
2441	}
2442	*findlist = sorted;
2443}
2444
2445static void
2446findname(fetchctx_t *fctx, dns_name_t *name, in_port_t port,
2447	 unsigned int options, unsigned int flags, isc_stdtime_t now,
2448	 isc_boolean_t *need_alternate)
2449{
2450	dns_adbaddrinfo_t *ai;
2451	dns_adbfind_t *find;
2452	dns_resolver_t *res;
2453	isc_boolean_t unshared;
2454	isc_result_t result;
2455
2456	res = fctx->res;
2457	unshared = ISC_TF((fctx->options | DNS_FETCHOPT_UNSHARED) != 0);
2458	/*
2459	 * If this name is a subdomain of the query domain, tell
2460	 * the ADB to start looking using zone/hint data. This keeps us
2461	 * from getting stuck if the nameserver is beneath the zone cut
2462	 * and we don't know its address (e.g. because the A record has
2463	 * expired).
2464	 */
2465	if (dns_name_issubdomain(name, &fctx->domain))
2466		options |= DNS_ADBFIND_STARTATZONE;
2467	options |= DNS_ADBFIND_GLUEOK;
2468	options |= DNS_ADBFIND_HINTOK;
2469
2470	/*
2471	 * See what we know about this address.
2472	 */
2473	find = NULL;
2474	result = dns_adb_createfind(fctx->adb,
2475				    res->buckets[fctx->bucketnum].task,
2476				    fctx_finddone, fctx, name,
2477				    &fctx->name, fctx->type,
2478				    options, now, NULL,
2479				    res->view->dstport, &find);
2480	if (result != ISC_R_SUCCESS) {
2481		if (result == DNS_R_ALIAS) {
2482			/*
2483			 * XXXRTH  Follow the CNAME/DNAME chain?
2484			 */
2485			dns_adb_destroyfind(&find);
2486			fctx->adberr++;
2487		}
2488	} else if (!ISC_LIST_EMPTY(find->list)) {
2489		/*
2490		 * We have at least some of the addresses for the
2491		 * name.
2492		 */
2493		INSIST((find->options & DNS_ADBFIND_WANTEVENT) == 0);
2494		if (flags != 0 || port != 0) {
2495			for (ai = ISC_LIST_HEAD(find->list);
2496			     ai != NULL;
2497			     ai = ISC_LIST_NEXT(ai, publink)) {
2498				ai->flags |= flags;
2499				if (port != 0)
2500					isc_sockaddr_setport(&ai->sockaddr,
2501							     port);
2502			}
2503		}
2504		if ((flags & FCTX_ADDRINFO_FORWARDER) != 0)
2505			ISC_LIST_APPEND(fctx->altfinds, find, publink);
2506		else
2507			ISC_LIST_APPEND(fctx->finds, find, publink);
2508	} else {
2509		/*
2510		 * We don't know any of the addresses for this
2511		 * name.
2512		 */
2513		if ((find->options & DNS_ADBFIND_WANTEVENT) != 0) {
2514			/*
2515			 * We're looking for them and will get an
2516			 * event about it later.
2517			 */
2518			fctx->pending++;
2519			/*
2520			 * Bootstrap.
2521			 */
2522			if (need_alternate != NULL &&
2523			    !*need_alternate && unshared &&
2524			    ((res->dispatchv4 == NULL &&
2525			      find->result_v6 != DNS_R_NXDOMAIN) ||
2526			     (res->dispatchv6 == NULL &&
2527			      find->result_v4 != DNS_R_NXDOMAIN)))
2528				*need_alternate = ISC_TRUE;
2529		} else {
2530			if ((find->options & DNS_ADBFIND_LAMEPRUNED) != 0)
2531				fctx->lamecount++; /* cached lame server */
2532			else
2533				fctx->adberr++; /* unreachable server, etc. */
2534
2535			/*
2536			 * If we know there are no addresses for
2537			 * the family we are using then try to add
2538			 * an alternative server.
2539			 */
2540			if (need_alternate != NULL && !*need_alternate &&
2541			    ((res->dispatchv4 == NULL &&
2542			      find->result_v6 == DNS_R_NXRRSET) ||
2543			     (res->dispatchv6 == NULL &&
2544			      find->result_v4 == DNS_R_NXRRSET)))
2545				*need_alternate = ISC_TRUE;
2546			dns_adb_destroyfind(&find);
2547		}
2548	}
2549}
2550
2551static isc_boolean_t
2552isstrictsubdomain(dns_name_t *name1, dns_name_t *name2) {
2553	int order;
2554	unsigned int nlabels;
2555	dns_namereln_t namereln;
2556
2557	namereln = dns_name_fullcompare(name1, name2, &order, &nlabels);
2558	return (ISC_TF(namereln == dns_namereln_subdomain));
2559}
2560
2561static isc_result_t
2562fctx_getaddresses(fetchctx_t *fctx, isc_boolean_t badcache) {
2563	dns_rdata_t rdata = DNS_RDATA_INIT;
2564	isc_result_t result;
2565	dns_resolver_t *res;
2566	isc_stdtime_t now;
2567	unsigned int stdoptions = 0;
2568	isc_sockaddr_t *sa;
2569	dns_adbaddrinfo_t *ai;
2570	isc_boolean_t all_bad;
2571	dns_rdata_ns_t ns;
2572	isc_boolean_t need_alternate = ISC_FALSE;
2573
2574	FCTXTRACE("getaddresses");
2575
2576	/*
2577	 * Don't pound on remote servers.  (Failsafe!)
2578	 */
2579	fctx->restarts++;
2580	if (fctx->restarts > 10) {
2581		FCTXTRACE("too many restarts");
2582		return (DNS_R_SERVFAIL);
2583	}
2584
2585	res = fctx->res;
2586
2587	/*
2588	 * Forwarders.
2589	 */
2590
2591	INSIST(ISC_LIST_EMPTY(fctx->forwaddrs));
2592	INSIST(ISC_LIST_EMPTY(fctx->altaddrs));
2593
2594	/*
2595	 * If this fctx has forwarders, use them; otherwise use any
2596	 * selective forwarders specified in the view; otherwise use the
2597	 * resolver's forwarders (if any).
2598	 */
2599	sa = ISC_LIST_HEAD(fctx->forwarders);
2600	if (sa == NULL) {
2601		dns_forwarders_t *forwarders = NULL;
2602		dns_name_t *name = &fctx->name;
2603		dns_name_t suffix;
2604		unsigned int labels;
2605		dns_fixedname_t fixed;
2606		dns_name_t *domain;
2607
2608		/*
2609		 * DS records are found in the parent server.
2610		 * Strip label to get the correct forwarder (if any).
2611		 */
2612		if (dns_rdatatype_atparent(fctx->type) &&
2613		    dns_name_countlabels(name) > 1) {
2614			dns_name_init(&suffix, NULL);
2615			labels = dns_name_countlabels(name);
2616			dns_name_getlabelsequence(name, 1, labels - 1, &suffix);
2617			name = &suffix;
2618		}
2619
2620		dns_fixedname_init(&fixed);
2621		domain = dns_fixedname_name(&fixed);
2622		result = dns_fwdtable_find2(fctx->res->view->fwdtable, name,
2623					    domain, &forwarders);
2624		if (result == ISC_R_SUCCESS) {
2625			sa = ISC_LIST_HEAD(forwarders->addrs);
2626			fctx->fwdpolicy = forwarders->fwdpolicy;
2627			if (fctx->fwdpolicy == dns_fwdpolicy_only &&
2628			    isstrictsubdomain(domain, &fctx->domain)) {
2629				dns_name_free(&fctx->domain, fctx->mctx);
2630				dns_name_init(&fctx->domain, NULL);
2631				result = dns_name_dup(domain, fctx->mctx,
2632						      &fctx->domain);
2633				if (result != ISC_R_SUCCESS)
2634					return (result);
2635			}
2636		}
2637	}
2638
2639	while (sa != NULL) {
2640		if ((isc_sockaddr_pf(sa) == AF_INET &&
2641			 fctx->res->dispatchv4 == NULL) ||
2642		    (isc_sockaddr_pf(sa) == AF_INET6 &&
2643			fctx->res->dispatchv6 == NULL)) {
2644				sa = ISC_LIST_NEXT(sa, link);
2645				continue;
2646		}
2647		ai = NULL;
2648		result = dns_adb_findaddrinfo(fctx->adb,
2649					      sa, &ai, 0);  /* XXXMLG */
2650		if (result == ISC_R_SUCCESS) {
2651			dns_adbaddrinfo_t *cur;
2652			ai->flags |= FCTX_ADDRINFO_FORWARDER;
2653			cur = ISC_LIST_HEAD(fctx->forwaddrs);
2654			while (cur != NULL && cur->srtt < ai->srtt)
2655				cur = ISC_LIST_NEXT(cur, publink);
2656			if (cur != NULL)
2657				ISC_LIST_INSERTBEFORE(fctx->forwaddrs, cur,
2658						      ai, publink);
2659			else
2660				ISC_LIST_APPEND(fctx->forwaddrs, ai, publink);
2661		}
2662		sa = ISC_LIST_NEXT(sa, link);
2663	}
2664
2665	/*
2666	 * If the forwarding policy is "only", we don't need the addresses
2667	 * of the nameservers.
2668	 */
2669	if (fctx->fwdpolicy == dns_fwdpolicy_only)
2670		goto out;
2671
2672	/*
2673	 * Normal nameservers.
2674	 */
2675
2676	stdoptions = DNS_ADBFIND_WANTEVENT | DNS_ADBFIND_EMPTYEVENT;
2677	if (fctx->restarts == 1) {
2678		/*
2679		 * To avoid sending out a flood of queries likely to
2680		 * result in NXRRSET, we suppress fetches for address
2681		 * families we don't have the first time through,
2682		 * provided that we have addresses in some family we
2683		 * can use.
2684		 *
2685		 * We don't want to set this option all the time, since
2686		 * if fctx->restarts > 1, we've clearly been having trouble
2687		 * with the addresses we had, so getting more could help.
2688		 */
2689		stdoptions |= DNS_ADBFIND_AVOIDFETCHES;
2690	}
2691	if (res->dispatchv4 != NULL)
2692		stdoptions |= DNS_ADBFIND_INET;
2693	if (res->dispatchv6 != NULL)
2694		stdoptions |= DNS_ADBFIND_INET6;
2695	isc_stdtime_get(&now);
2696
2697	INSIST(ISC_LIST_EMPTY(fctx->finds));
2698	INSIST(ISC_LIST_EMPTY(fctx->altfinds));
2699
2700	for (result = dns_rdataset_first(&fctx->nameservers);
2701	     result == ISC_R_SUCCESS;
2702	     result = dns_rdataset_next(&fctx->nameservers))
2703	{
2704		dns_rdataset_current(&fctx->nameservers, &rdata);
2705		/*
2706		 * Extract the name from the NS record.
2707		 */
2708		result = dns_rdata_tostruct(&rdata, &ns, NULL);
2709		if (result != ISC_R_SUCCESS)
2710			continue;
2711
2712		findname(fctx, &ns.name, 0, stdoptions, 0, now,
2713			 &need_alternate);
2714		dns_rdata_reset(&rdata);
2715		dns_rdata_freestruct(&ns);
2716	}
2717	if (result != ISC_R_NOMORE)
2718		return (result);
2719
2720	/*
2721	 * Do we need to use 6 to 4?
2722	 */
2723	if (need_alternate) {
2724		int family;
2725		alternate_t *a;
2726		family = (res->dispatchv6 != NULL) ? AF_INET6 : AF_INET;
2727		for (a = ISC_LIST_HEAD(fctx->res->alternates);
2728		     a != NULL;
2729		     a = ISC_LIST_NEXT(a, link)) {
2730			if (!a->isaddress) {
2731				findname(fctx, &a->_u._n.name, a->_u._n.port,
2732					 stdoptions, FCTX_ADDRINFO_FORWARDER,
2733					 now, NULL);
2734				continue;
2735			}
2736			if (isc_sockaddr_pf(&a->_u.addr) != family)
2737				continue;
2738			ai = NULL;
2739			result = dns_adb_findaddrinfo(fctx->adb, &a->_u.addr,
2740						      &ai, 0);
2741			if (result == ISC_R_SUCCESS) {
2742				dns_adbaddrinfo_t *cur;
2743				ai->flags |= FCTX_ADDRINFO_FORWARDER;
2744				cur = ISC_LIST_HEAD(fctx->altaddrs);
2745				while (cur != NULL && cur->srtt < ai->srtt)
2746					cur = ISC_LIST_NEXT(cur, publink);
2747				if (cur != NULL)
2748					ISC_LIST_INSERTBEFORE(fctx->altaddrs,
2749							      cur, ai, publink);
2750				else
2751					ISC_LIST_APPEND(fctx->altaddrs, ai,
2752							publink);
2753			}
2754		}
2755	}
2756
2757 out:
2758	/*
2759	 * Mark all known bad servers.
2760	 */
2761	all_bad = mark_bad(fctx);
2762
2763	/*
2764	 * How are we doing?
2765	 */
2766	if (all_bad) {
2767		/*
2768		 * We've got no addresses.
2769		 */
2770		if (fctx->pending > 0) {
2771			/*
2772			 * We're fetching the addresses, but don't have any
2773			 * yet.   Tell the caller to wait for an answer.
2774			 */
2775			result = DNS_R_WAIT;
2776		} else {
2777			isc_time_t expire;
2778			isc_interval_t i;
2779			/*
2780			 * We've lost completely.  We don't know any
2781			 * addresses, and the ADB has told us it can't get
2782			 * them.
2783			 */
2784			FCTXTRACE("no addresses");
2785			isc_interval_set(&i, DNS_BADCACHE_TTL(fctx), 0);
2786			result = isc_time_nowplusinterval(&expire, &i);
2787			if (badcache &&
2788			    (fctx->type == dns_rdatatype_dnskey ||
2789			     fctx->type == dns_rdatatype_dlv ||
2790			     fctx->type == dns_rdatatype_ds) &&
2791			     result == ISC_R_SUCCESS)
2792				dns_resolver_addbadcache(fctx->res,
2793							 &fctx->name,
2794							 fctx->type, &expire);
2795			result = ISC_R_FAILURE;
2796		}
2797	} else {
2798		/*
2799		 * We've found some addresses.  We might still be looking
2800		 * for more addresses.
2801		 */
2802		sort_finds(&fctx->finds);
2803		sort_finds(&fctx->altfinds);
2804		result = ISC_R_SUCCESS;
2805	}
2806
2807	return (result);
2808}
2809
2810static inline void
2811possibly_mark(fetchctx_t *fctx, dns_adbaddrinfo_t *addr)
2812{
2813	isc_netaddr_t na;
2814	char buf[ISC_NETADDR_FORMATSIZE];
2815	isc_sockaddr_t *sa;
2816	isc_boolean_t aborted = ISC_FALSE;
2817	isc_boolean_t bogus;
2818	dns_acl_t *blackhole;
2819	isc_netaddr_t ipaddr;
2820	dns_peer_t *peer = NULL;
2821	dns_resolver_t *res;
2822	const char *msg = NULL;
2823
2824	sa = &addr->sockaddr;
2825
2826	res = fctx->res;
2827	isc_netaddr_fromsockaddr(&ipaddr, sa);
2828	blackhole = dns_dispatchmgr_getblackhole(res->dispatchmgr);
2829	(void) dns_peerlist_peerbyaddr(res->view->peers, &ipaddr, &peer);
2830
2831	if (blackhole != NULL) {
2832		int match;
2833
2834		if (dns_acl_match(&ipaddr, NULL, blackhole,
2835				  &res->view->aclenv,
2836				  &match, NULL) == ISC_R_SUCCESS &&
2837		    match > 0)
2838			aborted = ISC_TRUE;
2839	}
2840
2841	if (peer != NULL &&
2842	    dns_peer_getbogus(peer, &bogus) == ISC_R_SUCCESS &&
2843	    bogus)
2844		aborted = ISC_TRUE;
2845
2846	if (aborted) {
2847		addr->flags |= FCTX_ADDRINFO_MARK;
2848		msg = "ignoring blackholed / bogus server: ";
2849	} else if (isc_sockaddr_ismulticast(sa)) {
2850		addr->flags |= FCTX_ADDRINFO_MARK;
2851		msg = "ignoring multicast address: ";
2852	} else if (isc_sockaddr_isexperimental(sa)) {
2853		addr->flags |= FCTX_ADDRINFO_MARK;
2854		msg = "ignoring experimental address: ";
2855	} else if (sa->type.sa.sa_family != AF_INET6) {
2856		return;
2857	} else if (IN6_IS_ADDR_V4MAPPED(&sa->type.sin6.sin6_addr)) {
2858		addr->flags |= FCTX_ADDRINFO_MARK;
2859		msg = "ignoring IPv6 mapped IPV4 address: ";
2860	} else if (IN6_IS_ADDR_V4COMPAT(&sa->type.sin6.sin6_addr)) {
2861		addr->flags |= FCTX_ADDRINFO_MARK;
2862		msg = "ignoring IPv6 compatibility IPV4 address: ";
2863	} else
2864		return;
2865
2866	if (!isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(3)))
2867		return;
2868
2869	isc_netaddr_fromsockaddr(&na, sa);
2870	isc_netaddr_format(&na, buf, sizeof(buf));
2871	FCTXTRACE2(msg, buf);
2872}
2873
2874static inline dns_adbaddrinfo_t *
2875fctx_nextaddress(fetchctx_t *fctx) {
2876	dns_adbfind_t *find, *start;
2877	dns_adbaddrinfo_t *addrinfo;
2878	dns_adbaddrinfo_t *faddrinfo;
2879
2880	/*
2881	 * Return the next untried address, if any.
2882	 */
2883
2884	/*
2885	 * Find the first unmarked forwarder (if any).
2886	 */
2887	for (addrinfo = ISC_LIST_HEAD(fctx->forwaddrs);
2888	     addrinfo != NULL;
2889	     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2890		if (!UNMARKED(addrinfo))
2891			continue;
2892		possibly_mark(fctx, addrinfo);
2893		if (UNMARKED(addrinfo)) {
2894			addrinfo->flags |= FCTX_ADDRINFO_MARK;
2895			fctx->find = NULL;
2896			return (addrinfo);
2897		}
2898	}
2899
2900	/*
2901	 * No forwarders.  Move to the next find.
2902	 */
2903
2904	fctx->attributes |= FCTX_ATTR_TRIEDFIND;
2905
2906	find = fctx->find;
2907	if (find == NULL)
2908		find = ISC_LIST_HEAD(fctx->finds);
2909	else {
2910		find = ISC_LIST_NEXT(find, publink);
2911		if (find == NULL)
2912			find = ISC_LIST_HEAD(fctx->finds);
2913	}
2914
2915	/*
2916	 * Find the first unmarked addrinfo.
2917	 */
2918	addrinfo = NULL;
2919	if (find != NULL) {
2920		start = find;
2921		do {
2922			for (addrinfo = ISC_LIST_HEAD(find->list);
2923			     addrinfo != NULL;
2924			     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2925				if (!UNMARKED(addrinfo))
2926					continue;
2927				possibly_mark(fctx, addrinfo);
2928				if (UNMARKED(addrinfo)) {
2929					addrinfo->flags |= FCTX_ADDRINFO_MARK;
2930					break;
2931				}
2932			}
2933			if (addrinfo != NULL)
2934				break;
2935			find = ISC_LIST_NEXT(find, publink);
2936			if (find == NULL)
2937				find = ISC_LIST_HEAD(fctx->finds);
2938		} while (find != start);
2939	}
2940
2941	fctx->find = find;
2942	if (addrinfo != NULL)
2943		return (addrinfo);
2944
2945	/*
2946	 * No nameservers left.  Try alternates.
2947	 */
2948
2949	fctx->attributes |= FCTX_ATTR_TRIEDALT;
2950
2951	find = fctx->altfind;
2952	if (find == NULL)
2953		find = ISC_LIST_HEAD(fctx->altfinds);
2954	else {
2955		find = ISC_LIST_NEXT(find, publink);
2956		if (find == NULL)
2957			find = ISC_LIST_HEAD(fctx->altfinds);
2958	}
2959
2960	/*
2961	 * Find the first unmarked addrinfo.
2962	 */
2963	addrinfo = NULL;
2964	if (find != NULL) {
2965		start = find;
2966		do {
2967			for (addrinfo = ISC_LIST_HEAD(find->list);
2968			     addrinfo != NULL;
2969			     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2970				if (!UNMARKED(addrinfo))
2971					continue;
2972				possibly_mark(fctx, addrinfo);
2973				if (UNMARKED(addrinfo)) {
2974					addrinfo->flags |= FCTX_ADDRINFO_MARK;
2975					break;
2976				}
2977			}
2978			if (addrinfo != NULL)
2979				break;
2980			find = ISC_LIST_NEXT(find, publink);
2981			if (find == NULL)
2982				find = ISC_LIST_HEAD(fctx->altfinds);
2983		} while (find != start);
2984	}
2985
2986	faddrinfo = addrinfo;
2987
2988	/*
2989	 * See if we have a better alternate server by address.
2990	 */
2991
2992	for (addrinfo = ISC_LIST_HEAD(fctx->altaddrs);
2993	     addrinfo != NULL;
2994	     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2995		if (!UNMARKED(addrinfo))
2996			continue;
2997		possibly_mark(fctx, addrinfo);
2998		if (UNMARKED(addrinfo) &&
2999		    (faddrinfo == NULL ||
3000		     addrinfo->srtt < faddrinfo->srtt)) {
3001			if (faddrinfo != NULL)
3002				faddrinfo->flags &= ~FCTX_ADDRINFO_MARK;
3003			addrinfo->flags |= FCTX_ADDRINFO_MARK;
3004			break;
3005		}
3006	}
3007
3008	if (addrinfo == NULL) {
3009		addrinfo = faddrinfo;
3010		fctx->altfind = find;
3011	}
3012
3013	return (addrinfo);
3014}
3015
3016static void
3017fctx_try(fetchctx_t *fctx, isc_boolean_t retrying, isc_boolean_t badcache) {
3018	isc_result_t result;
3019	dns_adbaddrinfo_t *addrinfo;
3020
3021	FCTXTRACE("try");
3022
3023	REQUIRE(!ADDRWAIT(fctx));
3024
3025	addrinfo = fctx_nextaddress(fctx);
3026	if (addrinfo == NULL) {
3027		/*
3028		 * We have no more addresses.  Start over.
3029		 */
3030		fctx_cancelqueries(fctx, ISC_TRUE);
3031		fctx_cleanupfinds(fctx);
3032		fctx_cleanupaltfinds(fctx);
3033		fctx_cleanupforwaddrs(fctx);
3034		fctx_cleanupaltaddrs(fctx);
3035		result = fctx_getaddresses(fctx, badcache);
3036		if (result == DNS_R_WAIT) {
3037			/*
3038			 * Sleep waiting for addresses.
3039			 */
3040			FCTXTRACE("addrwait");
3041			fctx->attributes |= FCTX_ATTR_ADDRWAIT;
3042			return;
3043		} else if (result != ISC_R_SUCCESS) {
3044			/*
3045			 * Something bad happened.
3046			 */
3047			fctx_done(fctx, result, __LINE__);
3048			return;
3049		}
3050
3051		addrinfo = fctx_nextaddress(fctx);
3052		/*
3053		 * While we may have addresses from the ADB, they
3054		 * might be bad ones.  In this case, return SERVFAIL.
3055		 */
3056		if (addrinfo == NULL) {
3057			fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
3058			return;
3059		}
3060	}
3061
3062	result = fctx_query(fctx, addrinfo, fctx->options);
3063	if (result != ISC_R_SUCCESS)
3064		fctx_done(fctx, result, __LINE__);
3065	else if (retrying)
3066		inc_stats(fctx->res, dns_resstatscounter_retry);
3067}
3068
3069static isc_boolean_t
3070fctx_unlink(fetchctx_t *fctx) {
3071	dns_resolver_t *res;
3072	unsigned int bucketnum;
3073
3074	/*
3075	 * Caller must be holding the bucket lock.
3076	 */
3077
3078	REQUIRE(VALID_FCTX(fctx));
3079	REQUIRE(fctx->state == fetchstate_done ||
3080		fctx->state == fetchstate_init);
3081	REQUIRE(ISC_LIST_EMPTY(fctx->events));
3082	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
3083	REQUIRE(ISC_LIST_EMPTY(fctx->finds));
3084	REQUIRE(ISC_LIST_EMPTY(fctx->altfinds));
3085	REQUIRE(fctx->pending == 0);
3086	REQUIRE(fctx->references == 0);
3087	REQUIRE(ISC_LIST_EMPTY(fctx->validators));
3088
3089	FCTXTRACE("unlink");
3090
3091	res = fctx->res;
3092	bucketnum = fctx->bucketnum;
3093
3094	ISC_LIST_UNLINK(res->buckets[bucketnum].fctxs, fctx, link);
3095
3096	LOCK(&res->nlock);
3097	res->nfctx--;
3098	UNLOCK(&res->nlock);
3099
3100	if (res->buckets[bucketnum].exiting &&
3101	    ISC_LIST_EMPTY(res->buckets[bucketnum].fctxs))
3102		return (ISC_TRUE);
3103
3104	return (ISC_FALSE);
3105}
3106
3107static void
3108fctx_destroy(fetchctx_t *fctx) {
3109	isc_sockaddr_t *sa, *next_sa;
3110
3111	REQUIRE(VALID_FCTX(fctx));
3112	REQUIRE(fctx->state == fetchstate_done ||
3113		fctx->state == fetchstate_init);
3114	REQUIRE(ISC_LIST_EMPTY(fctx->events));
3115	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
3116	REQUIRE(ISC_LIST_EMPTY(fctx->finds));
3117	REQUIRE(ISC_LIST_EMPTY(fctx->altfinds));
3118	REQUIRE(fctx->pending == 0);
3119	REQUIRE(fctx->references == 0);
3120	REQUIRE(ISC_LIST_EMPTY(fctx->validators));
3121	REQUIRE(!ISC_LINK_LINKED(fctx, link));
3122
3123	FCTXTRACE("destroy");
3124
3125	/*
3126	 * Free bad.
3127	 */
3128	for (sa = ISC_LIST_HEAD(fctx->bad);
3129	     sa != NULL;
3130	     sa = next_sa) {
3131		next_sa = ISC_LIST_NEXT(sa, link);
3132		ISC_LIST_UNLINK(fctx->bad, sa, link);
3133		isc_mem_put(fctx->mctx, sa, sizeof(*sa));
3134	}
3135
3136	for (sa = ISC_LIST_HEAD(fctx->edns);
3137	     sa != NULL;
3138	     sa = next_sa) {
3139		next_sa = ISC_LIST_NEXT(sa, link);
3140		ISC_LIST_UNLINK(fctx->edns, sa, link);
3141		isc_mem_put(fctx->mctx, sa, sizeof(*sa));
3142	}
3143
3144	for (sa = ISC_LIST_HEAD(fctx->edns512);
3145	     sa != NULL;
3146	     sa = next_sa) {
3147		next_sa = ISC_LIST_NEXT(sa, link);
3148		ISC_LIST_UNLINK(fctx->edns512, sa, link);
3149		isc_mem_put(fctx->mctx, sa, sizeof(*sa));
3150	}
3151
3152	for (sa = ISC_LIST_HEAD(fctx->bad_edns);
3153	     sa != NULL;
3154	     sa = next_sa) {
3155		next_sa = ISC_LIST_NEXT(sa, link);
3156		ISC_LIST_UNLINK(fctx->bad_edns, sa, link);
3157		isc_mem_put(fctx->mctx, sa, sizeof(*sa));
3158	}
3159
3160	isc_timer_detach(&fctx->timer);
3161	dns_message_destroy(&fctx->rmessage);
3162	dns_message_destroy(&fctx->qmessage);
3163	if (dns_name_countlabels(&fctx->domain) > 0)
3164		dns_name_free(&fctx->domain, fctx->mctx);
3165	if (dns_rdataset_isassociated(&fctx->nameservers))
3166		dns_rdataset_disassociate(&fctx->nameservers);
3167	dns_name_free(&fctx->name, fctx->mctx);
3168	dns_db_detach(&fctx->cache);
3169	dns_adb_detach(&fctx->adb);
3170	isc_mem_free(fctx->mctx, fctx->info);
3171	isc_mem_putanddetach(&fctx->mctx, fctx, sizeof(*fctx));
3172}
3173
3174/*
3175 * Fetch event handlers.
3176 */
3177
3178static void
3179fctx_timeout(isc_task_t *task, isc_event_t *event) {
3180	fetchctx_t *fctx = event->ev_arg;
3181	isc_timerevent_t *tevent = (isc_timerevent_t *)event;
3182	resquery_t *query;
3183
3184	REQUIRE(VALID_FCTX(fctx));
3185
3186	UNUSED(task);
3187
3188	FCTXTRACE("timeout");
3189
3190	inc_stats(fctx->res, dns_resstatscounter_querytimeout);
3191
3192	if (event->ev_type == ISC_TIMEREVENT_LIFE) {
3193		fctx->reason = NULL;
3194		fctx_done(fctx, ISC_R_TIMEDOUT, __LINE__);
3195	} else {
3196		isc_result_t result;
3197
3198		fctx->timeouts++;
3199		fctx->timeout = ISC_TRUE;
3200		/*
3201		 * We could cancel the running queries here, or we could let
3202		 * them keep going.  Since we normally use separate sockets for
3203		 * different queries, we adopt the former approach to reduce
3204		 * the number of open sockets: cancel the oldest query if it
3205		 * expired after the query had started (this is usually the
3206		 * case but is not always so, depending on the task schedule
3207		 * timing).
3208		 */
3209		query = ISC_LIST_HEAD(fctx->queries);
3210		if (query != NULL &&
3211		    isc_time_compare(&tevent->due, &query->start) >= 0) {
3212			fctx_cancelquery(&query, NULL, NULL, ISC_TRUE);
3213		}
3214		fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
3215		/*
3216		 * Our timer has triggered.  Reestablish the fctx lifetime
3217		 * timer.
3218		 */
3219		result = fctx_starttimer(fctx);
3220		if (result != ISC_R_SUCCESS)
3221			fctx_done(fctx, result, __LINE__);
3222		else
3223			/*
3224			 * Keep trying.
3225			 */
3226			fctx_try(fctx, ISC_TRUE, ISC_FALSE);
3227	}
3228
3229	isc_event_free(&event);
3230}
3231
3232static void
3233fctx_shutdown(fetchctx_t *fctx) {
3234	isc_event_t *cevent;
3235
3236	/*
3237	 * Start the shutdown process for fctx, if it isn't already underway.
3238	 */
3239
3240	FCTXTRACE("shutdown");
3241
3242	/*
3243	 * The caller must be holding the appropriate bucket lock.
3244	 */
3245
3246	if (fctx->want_shutdown)
3247		return;
3248
3249	fctx->want_shutdown = ISC_TRUE;
3250
3251	/*
3252	 * Unless we're still initializing (in which case the
3253	 * control event is still outstanding), we need to post
3254	 * the control event to tell the fetch we want it to
3255	 * exit.
3256	 */
3257	if (fctx->state != fetchstate_init) {
3258		cevent = &fctx->control_event;
3259		isc_task_send(fctx->res->buckets[fctx->bucketnum].task,
3260			      &cevent);
3261	}
3262}
3263
3264static void
3265fctx_doshutdown(isc_task_t *task, isc_event_t *event) {
3266	fetchctx_t *fctx = event->ev_arg;
3267	isc_boolean_t bucket_empty = ISC_FALSE;
3268	dns_resolver_t *res;
3269	unsigned int bucketnum;
3270	dns_validator_t *validator;
3271	isc_boolean_t destroy = ISC_FALSE;
3272
3273	REQUIRE(VALID_FCTX(fctx));
3274
3275	UNUSED(task);
3276
3277	res = fctx->res;
3278	bucketnum = fctx->bucketnum;
3279
3280	FCTXTRACE("doshutdown");
3281
3282	/*
3283	 * An fctx that is shutting down is no longer in ADDRWAIT mode.
3284	 */
3285	fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
3286
3287	/*
3288	 * Cancel all pending validators.  Note that this must be done
3289	 * without the bucket lock held, since that could cause deadlock.
3290	 */
3291	validator = ISC_LIST_HEAD(fctx->validators);
3292	while (validator != NULL) {
3293		dns_validator_cancel(validator);
3294		validator = ISC_LIST_NEXT(validator, link);
3295	}
3296
3297	if (fctx->nsfetch != NULL)
3298		dns_resolver_cancelfetch(fctx->nsfetch);
3299
3300	/*
3301	 * Shut down anything that is still running on behalf of this
3302	 * fetch.  To avoid deadlock with the ADB, we must do this
3303	 * before we lock the bucket lock.
3304	 */
3305	fctx_stopeverything(fctx, ISC_FALSE);
3306
3307	LOCK(&res->buckets[bucketnum].lock);
3308
3309	fctx->attributes |= FCTX_ATTR_SHUTTINGDOWN;
3310
3311	INSIST(fctx->state == fetchstate_active ||
3312	       fctx->state == fetchstate_done);
3313	INSIST(fctx->want_shutdown);
3314
3315	if (fctx->state != fetchstate_done) {
3316		fctx->state = fetchstate_done;
3317		fctx_sendevents(fctx, ISC_R_CANCELED, __LINE__);
3318	}
3319
3320	if (fctx->references == 0 && fctx->pending == 0 &&
3321	    fctx->nqueries == 0 && ISC_LIST_EMPTY(fctx->validators)) {
3322		bucket_empty = fctx_unlink(fctx);
3323		destroy = ISC_TRUE;
3324	}
3325
3326	UNLOCK(&res->buckets[bucketnum].lock);
3327
3328	if (destroy) {
3329		fctx_destroy(fctx);
3330		if (bucket_empty)
3331			empty_bucket(res);
3332	}
3333}
3334
3335static void
3336fctx_start(isc_task_t *task, isc_event_t *event) {
3337	fetchctx_t *fctx = event->ev_arg;
3338	isc_boolean_t done = ISC_FALSE, bucket_empty = ISC_FALSE;
3339	dns_resolver_t *res;
3340	unsigned int bucketnum;
3341	isc_boolean_t destroy = ISC_FALSE;
3342
3343	REQUIRE(VALID_FCTX(fctx));
3344
3345	UNUSED(task);
3346
3347	res = fctx->res;
3348	bucketnum = fctx->bucketnum;
3349
3350	FCTXTRACE("start");
3351
3352	LOCK(&res->buckets[bucketnum].lock);
3353
3354	INSIST(fctx->state == fetchstate_init);
3355	if (fctx->want_shutdown) {
3356		/*
3357		 * We haven't started this fctx yet, and we've been requested
3358		 * to shut it down.
3359		 */
3360		fctx->attributes |= FCTX_ATTR_SHUTTINGDOWN;
3361		fctx->state = fetchstate_done;
3362		fctx_sendevents(fctx, ISC_R_CANCELED, __LINE__);
3363		/*
3364		 * Since we haven't started, we INSIST that we have no
3365		 * pending ADB finds and no pending validations.
3366		 */
3367		INSIST(fctx->pending == 0);
3368		INSIST(fctx->nqueries == 0);
3369		INSIST(ISC_LIST_EMPTY(fctx->validators));
3370		if (fctx->references == 0) {
3371			/*
3372			 * It's now safe to destroy this fctx.
3373			 */
3374			bucket_empty = fctx_unlink(fctx);
3375			destroy = ISC_TRUE;
3376		}
3377		done = ISC_TRUE;
3378	} else {
3379		/*
3380		 * Normal fctx startup.
3381		 */
3382		fctx->state = fetchstate_active;
3383		/*
3384		 * Reset the control event for later use in shutting down
3385		 * the fctx.
3386		 */
3387		ISC_EVENT_INIT(event, sizeof(*event), 0, NULL,
3388			       DNS_EVENT_FETCHCONTROL, fctx_doshutdown, fctx,
3389			       NULL, NULL, NULL);
3390	}
3391
3392	UNLOCK(&res->buckets[bucketnum].lock);
3393
3394	if (!done) {
3395		isc_result_t result;
3396
3397		INSIST(!destroy);
3398
3399		/*
3400		 * All is well.  Start working on the fetch.
3401		 */
3402		result = fctx_starttimer(fctx);
3403		if (result != ISC_R_SUCCESS)
3404			fctx_done(fctx, result, __LINE__);
3405		else
3406			fctx_try(fctx, ISC_FALSE, ISC_FALSE);
3407	} else if (destroy) {
3408			fctx_destroy(fctx);
3409		if (bucket_empty)
3410			empty_bucket(res);
3411	}
3412}
3413
3414/*
3415 * Fetch Creation, Joining, and Cancelation.
3416 */
3417
3418static inline isc_result_t
3419fctx_join(fetchctx_t *fctx, isc_task_t *task, isc_sockaddr_t *client,
3420	  dns_messageid_t id, isc_taskaction_t action, void *arg,
3421	  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset,
3422	  dns_fetch_t *fetch)
3423{
3424	isc_task_t *clone;
3425	dns_fetchevent_t *event;
3426
3427	FCTXTRACE("join");
3428
3429	/*
3430	 * We store the task we're going to send this event to in the
3431	 * sender field.  We'll make the fetch the sender when we actually
3432	 * send the event.
3433	 */
3434	clone = NULL;
3435	isc_task_attach(task, &clone);
3436	event = (dns_fetchevent_t *)
3437		isc_event_allocate(fctx->res->mctx, clone, DNS_EVENT_FETCHDONE,
3438				   action, arg, sizeof(*event));
3439	if (event == NULL) {
3440		isc_task_detach(&clone);
3441		return (ISC_R_NOMEMORY);
3442	}
3443	event->result = DNS_R_SERVFAIL;
3444	event->qtype = fctx->type;
3445	event->db = NULL;
3446	event->node = NULL;
3447	event->rdataset = rdataset;
3448	event->sigrdataset = sigrdataset;
3449	event->fetch = fetch;
3450	event->client = client;
3451	event->id = id;
3452	dns_fixedname_init(&event->foundname);
3453
3454	/*
3455	 * Make sure that we can store the sigrdataset in the
3456	 * first event if it is needed by any of the events.
3457	 */
3458	if (event->sigrdataset != NULL)
3459		ISC_LIST_PREPEND(fctx->events, event, ev_link);
3460	else
3461		ISC_LIST_APPEND(fctx->events, event, ev_link);
3462	fctx->references++;
3463	fctx->client = client;
3464
3465	fetch->magic = DNS_FETCH_MAGIC;
3466	fetch->private = fctx;
3467
3468	return (ISC_R_SUCCESS);
3469}
3470
3471static inline void
3472log_ns_ttl(fetchctx_t *fctx, const char *where) {
3473	char namebuf[DNS_NAME_FORMATSIZE];
3474	char domainbuf[DNS_NAME_FORMATSIZE];
3475
3476	dns_name_format(&fctx->name, namebuf, sizeof(namebuf));
3477	dns_name_format(&fctx->domain, domainbuf, sizeof(domainbuf));
3478	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
3479		      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(10),
3480		      "log_ns_ttl: fctx %p: %s: %s (in '%s'?): %u %u",
3481		      fctx, where, namebuf, domainbuf,
3482		      fctx->ns_ttl_ok, fctx->ns_ttl);
3483}
3484
3485static isc_result_t
3486fctx_create(dns_resolver_t *res, dns_name_t *name, dns_rdatatype_t type,
3487	    dns_name_t *domain, dns_rdataset_t *nameservers,
3488	    unsigned int options, unsigned int bucketnum, fetchctx_t **fctxp)
3489{
3490	fetchctx_t *fctx;
3491	isc_result_t result;
3492	isc_result_t iresult;
3493	isc_interval_t interval;
3494	dns_fixedname_t fixed;
3495	unsigned int findoptions = 0;
3496	char buf[DNS_NAME_FORMATSIZE + DNS_RDATATYPE_FORMATSIZE];
3497	char typebuf[DNS_RDATATYPE_FORMATSIZE];
3498	dns_name_t suffix;
3499	isc_mem_t *mctx;
3500
3501	/*
3502	 * Caller must be holding the lock for bucket number 'bucketnum'.
3503	 */
3504	REQUIRE(fctxp != NULL && *fctxp == NULL);
3505
3506	mctx = res->buckets[bucketnum].mctx;
3507	fctx = isc_mem_get(mctx, sizeof(*fctx));
3508	if (fctx == NULL)
3509		return (ISC_R_NOMEMORY);
3510	dns_name_format(name, buf, sizeof(buf));
3511	dns_rdatatype_format(type, typebuf, sizeof(typebuf));
3512	strcat(buf, "/");       /* checked */
3513	strcat(buf, typebuf);   /* checked */
3514	fctx->info = isc_mem_strdup(mctx, buf);
3515	if (fctx->info == NULL) {
3516		result = ISC_R_NOMEMORY;
3517		goto cleanup_fetch;
3518	}
3519	FCTXTRACE("create");
3520	dns_name_init(&fctx->name, NULL);
3521	result = dns_name_dup(name, mctx, &fctx->name);
3522	if (result != ISC_R_SUCCESS)
3523		goto cleanup_info;
3524	dns_name_init(&fctx->domain, NULL);
3525	dns_rdataset_init(&fctx->nameservers);
3526
3527	fctx->type = type;
3528	fctx->options = options;
3529	/*
3530	 * Note!  We do not attach to the task.  We are relying on the
3531	 * resolver to ensure that this task doesn't go away while we are
3532	 * using it.
3533	 */
3534	fctx->res = res;
3535	fctx->references = 0;
3536	fctx->bucketnum = bucketnum;
3537	fctx->state = fetchstate_init;
3538	fctx->want_shutdown = ISC_FALSE;
3539	fctx->cloned = ISC_FALSE;
3540	ISC_LIST_INIT(fctx->queries);
3541	ISC_LIST_INIT(fctx->finds);
3542	ISC_LIST_INIT(fctx->altfinds);
3543	ISC_LIST_INIT(fctx->forwaddrs);
3544	ISC_LIST_INIT(fctx->altaddrs);
3545	ISC_LIST_INIT(fctx->forwarders);
3546	fctx->fwdpolicy = dns_fwdpolicy_none;
3547	ISC_LIST_INIT(fctx->bad);
3548	ISC_LIST_INIT(fctx->edns);
3549	ISC_LIST_INIT(fctx->edns512);
3550	ISC_LIST_INIT(fctx->bad_edns);
3551	ISC_LIST_INIT(fctx->validators);
3552	fctx->validator = NULL;
3553	fctx->find = NULL;
3554	fctx->altfind = NULL;
3555	fctx->pending = 0;
3556	fctx->restarts = 0;
3557	fctx->querysent = 0;
3558	fctx->referrals = 0;
3559	TIME_NOW(&fctx->start);
3560	fctx->timeouts = 0;
3561	fctx->lamecount = 0;
3562	fctx->adberr = 0;
3563	fctx->neterr = 0;
3564	fctx->badresp = 0;
3565	fctx->findfail = 0;
3566	fctx->valfail = 0;
3567	fctx->result = ISC_R_FAILURE;
3568	fctx->vresult = ISC_R_SUCCESS;
3569	fctx->exitline = -1;	/* sentinel */
3570	fctx->logged = ISC_FALSE;
3571	fctx->attributes = 0;
3572	fctx->spilled = ISC_FALSE;
3573	fctx->nqueries = 0;
3574	fctx->reason = NULL;
3575	fctx->rand_buf = 0;
3576	fctx->rand_bits = 0;
3577	fctx->timeout = ISC_FALSE;
3578	fctx->addrinfo = NULL;
3579	fctx->client = NULL;
3580	fctx->ns_ttl = 0;
3581	fctx->ns_ttl_ok = ISC_FALSE;
3582
3583	dns_name_init(&fctx->nsname, NULL);
3584	fctx->nsfetch = NULL;
3585	dns_rdataset_init(&fctx->nsrrset);
3586
3587	if (domain == NULL) {
3588		dns_forwarders_t *forwarders = NULL;
3589		unsigned int labels;
3590		dns_name_t *fwdname = name;
3591
3592		/*
3593		 * DS records are found in the parent server.
3594		 * Strip label to get the correct forwarder (if any).
3595		 */
3596		if (dns_rdatatype_atparent(fctx->type) &&
3597		    dns_name_countlabels(name) > 1) {
3598			dns_name_init(&suffix, NULL);
3599			labels = dns_name_countlabels(name);
3600			dns_name_getlabelsequence(name, 1, labels - 1, &suffix);
3601			fwdname = &suffix;
3602		}
3603		dns_fixedname_init(&fixed);
3604		domain = dns_fixedname_name(&fixed);
3605		result = dns_fwdtable_find2(fctx->res->view->fwdtable, fwdname,
3606					    domain, &forwarders);
3607		if (result == ISC_R_SUCCESS)
3608			fctx->fwdpolicy = forwarders->fwdpolicy;
3609
3610		if (fctx->fwdpolicy != dns_fwdpolicy_only) {
3611			/*
3612			 * The caller didn't supply a query domain and
3613			 * nameservers, and we're not in forward-only mode,
3614			 * so find the best nameservers to use.
3615			 */
3616			if (dns_rdatatype_atparent(fctx->type))
3617				findoptions |= DNS_DBFIND_NOEXACT;
3618			result = dns_view_findzonecut(res->view, name, domain,
3619						      0, findoptions, ISC_TRUE,
3620						      &fctx->nameservers,
3621						      NULL);
3622			if (result != ISC_R_SUCCESS)
3623				goto cleanup_name;
3624			result = dns_name_dup(domain, mctx, &fctx->domain);
3625			if (result != ISC_R_SUCCESS) {
3626				dns_rdataset_disassociate(&fctx->nameservers);
3627				goto cleanup_name;
3628			}
3629			fctx->ns_ttl = fctx->nameservers.ttl;
3630			fctx->ns_ttl_ok = ISC_TRUE;
3631		} else {
3632			/*
3633			 * We're in forward-only mode.  Set the query domain.
3634			 */
3635			result = dns_name_dup(domain, mctx, &fctx->domain);
3636			if (result != ISC_R_SUCCESS)
3637				goto cleanup_name;
3638		}
3639	} else {
3640		result = dns_name_dup(domain, mctx, &fctx->domain);
3641		if (result != ISC_R_SUCCESS)
3642			goto cleanup_name;
3643		dns_rdataset_clone(nameservers, &fctx->nameservers);
3644		fctx->ns_ttl = fctx->nameservers.ttl;
3645		fctx->ns_ttl_ok = ISC_TRUE;
3646	}
3647
3648	log_ns_ttl(fctx, "fctx_create");
3649
3650	INSIST(dns_name_issubdomain(&fctx->name, &fctx->domain));
3651
3652	fctx->qmessage = NULL;
3653	result = dns_message_create(mctx, DNS_MESSAGE_INTENTRENDER,
3654				    &fctx->qmessage);
3655
3656	if (result != ISC_R_SUCCESS)
3657		goto cleanup_domain;
3658
3659	fctx->rmessage = NULL;
3660	result = dns_message_create(mctx, DNS_MESSAGE_INTENTPARSE,
3661				    &fctx->rmessage);
3662
3663	if (result != ISC_R_SUCCESS)
3664		goto cleanup_qmessage;
3665
3666	/*
3667	 * Compute an expiration time for the entire fetch.
3668	 */
3669	isc_interval_set(&interval, res->query_timeout, 0);
3670	iresult = isc_time_nowplusinterval(&fctx->expires, &interval);
3671	if (iresult != ISC_R_SUCCESS) {
3672		UNEXPECTED_ERROR(__FILE__, __LINE__,
3673				 "isc_time_nowplusinterval: %s",
3674				 isc_result_totext(iresult));
3675		result = ISC_R_UNEXPECTED;
3676		goto cleanup_rmessage;
3677	}
3678
3679	/*
3680	 * Default retry interval initialization.  We set the interval now
3681	 * mostly so it won't be uninitialized.  It will be set to the
3682	 * correct value before a query is issued.
3683	 */
3684	isc_interval_set(&fctx->interval, 2, 0);
3685
3686	/*
3687	 * Create an inactive timer.  It will be made active when the fetch
3688	 * is actually started.
3689	 */
3690	fctx->timer = NULL;
3691	iresult = isc_timer_create(res->timermgr, isc_timertype_inactive,
3692				   NULL, NULL,
3693				   res->buckets[bucketnum].task, fctx_timeout,
3694				   fctx, &fctx->timer);
3695	if (iresult != ISC_R_SUCCESS) {
3696		UNEXPECTED_ERROR(__FILE__, __LINE__,
3697				 "isc_timer_create: %s",
3698				 isc_result_totext(iresult));
3699		result = ISC_R_UNEXPECTED;
3700		goto cleanup_rmessage;
3701	}
3702
3703	/*
3704	 * Attach to the view's cache and adb.
3705	 */
3706	fctx->cache = NULL;
3707	dns_db_attach(res->view->cachedb, &fctx->cache);
3708	fctx->adb = NULL;
3709	dns_adb_attach(res->view->adb, &fctx->adb);
3710	fctx->mctx = NULL;
3711	isc_mem_attach(mctx, &fctx->mctx);
3712
3713	ISC_LIST_INIT(fctx->events);
3714	ISC_LINK_INIT(fctx, link);
3715	fctx->magic = FCTX_MAGIC;
3716
3717	ISC_LIST_APPEND(res->buckets[bucketnum].fctxs, fctx, link);
3718
3719	LOCK(&res->nlock);
3720	res->nfctx++;
3721	UNLOCK(&res->nlock);
3722
3723	*fctxp = fctx;
3724
3725	return (ISC_R_SUCCESS);
3726
3727 cleanup_rmessage:
3728	dns_message_destroy(&fctx->rmessage);
3729
3730 cleanup_qmessage:
3731	dns_message_destroy(&fctx->qmessage);
3732
3733 cleanup_domain:
3734	if (dns_name_countlabels(&fctx->domain) > 0)
3735		dns_name_free(&fctx->domain, mctx);
3736	if (dns_rdataset_isassociated(&fctx->nameservers))
3737		dns_rdataset_disassociate(&fctx->nameservers);
3738
3739 cleanup_name:
3740	dns_name_free(&fctx->name, mctx);
3741
3742 cleanup_info:
3743	isc_mem_free(mctx, fctx->info);
3744
3745 cleanup_fetch:
3746	isc_mem_put(mctx, fctx, sizeof(*fctx));
3747
3748	return (result);
3749}
3750
3751/*
3752 * Handle Responses
3753 */
3754static inline isc_boolean_t
3755is_lame(fetchctx_t *fctx) {
3756	dns_message_t *message = fctx->rmessage;
3757	dns_name_t *name;
3758	dns_rdataset_t *rdataset;
3759	isc_result_t result;
3760
3761	if (message->rcode != dns_rcode_noerror &&
3762	    message->rcode != dns_rcode_nxdomain)
3763		return (ISC_FALSE);
3764
3765	if (message->counts[DNS_SECTION_ANSWER] != 0)
3766		return (ISC_FALSE);
3767
3768	if (message->counts[DNS_SECTION_AUTHORITY] == 0)
3769		return (ISC_FALSE);
3770
3771	result = dns_message_firstname(message, DNS_SECTION_AUTHORITY);
3772	while (result == ISC_R_SUCCESS) {
3773		name = NULL;
3774		dns_message_currentname(message, DNS_SECTION_AUTHORITY, &name);
3775		for (rdataset = ISC_LIST_HEAD(name->list);
3776		     rdataset != NULL;
3777		     rdataset = ISC_LIST_NEXT(rdataset, link)) {
3778			dns_namereln_t namereln;
3779			int order;
3780			unsigned int labels;
3781			if (rdataset->type != dns_rdatatype_ns)
3782				continue;
3783			namereln = dns_name_fullcompare(name, &fctx->domain,
3784							&order, &labels);
3785			if (namereln == dns_namereln_equal &&
3786			    (message->flags & DNS_MESSAGEFLAG_AA) != 0)
3787				return (ISC_FALSE);
3788			if (namereln == dns_namereln_subdomain)
3789				return (ISC_FALSE);
3790			return (ISC_TRUE);
3791		}
3792		result = dns_message_nextname(message, DNS_SECTION_AUTHORITY);
3793	}
3794
3795	return (ISC_FALSE);
3796}
3797
3798static inline void
3799log_lame(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo) {
3800	char namebuf[DNS_NAME_FORMATSIZE];
3801	char domainbuf[DNS_NAME_FORMATSIZE];
3802	char addrbuf[ISC_SOCKADDR_FORMATSIZE];
3803
3804	dns_name_format(&fctx->name, namebuf, sizeof(namebuf));
3805	dns_name_format(&fctx->domain, domainbuf, sizeof(domainbuf));
3806	isc_sockaddr_format(&addrinfo->sockaddr, addrbuf, sizeof(addrbuf));
3807	isc_log_write(dns_lctx, DNS_LOGCATEGORY_LAME_SERVERS,
3808		      DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
3809		      "lame server resolving '%s' (in '%s'?): %s",
3810		      namebuf, domainbuf, addrbuf);
3811}
3812
3813static inline void
3814log_formerr(fetchctx_t *fctx, const char *format, ...) {
3815	char nsbuf[ISC_SOCKADDR_FORMATSIZE];
3816	char clbuf[ISC_SOCKADDR_FORMATSIZE];
3817	const char *clmsg = "";
3818	char msgbuf[2048];
3819	va_list args;
3820
3821	va_start(args, format);
3822	vsnprintf(msgbuf, sizeof(msgbuf), format, args);
3823	va_end(args);
3824
3825	isc_sockaddr_format(&fctx->addrinfo->sockaddr, nsbuf, sizeof(nsbuf));
3826
3827	if (fctx->client != NULL) {
3828		clmsg = " for client ";
3829		isc_sockaddr_format(fctx->client, clbuf, sizeof(clbuf));
3830	} else {
3831		clbuf[0] = '\0';
3832	}
3833
3834	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
3835		      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
3836		      "DNS format error from %s resolving %s%s%s: %s",
3837		      nsbuf, fctx->info, clmsg, clbuf, msgbuf);
3838}
3839
3840static inline isc_result_t
3841same_question(fetchctx_t *fctx) {
3842	isc_result_t result;
3843	dns_message_t *message = fctx->rmessage;
3844	dns_name_t *name;
3845	dns_rdataset_t *rdataset;
3846
3847	/*
3848	 * Caller must be holding the fctx lock.
3849	 */
3850
3851	/*
3852	 * XXXRTH  Currently we support only one question.
3853	 */
3854	if (message->counts[DNS_SECTION_QUESTION] != 1) {
3855		log_formerr(fctx, "too many questions");
3856		return (DNS_R_FORMERR);
3857	}
3858
3859	result = dns_message_firstname(message, DNS_SECTION_QUESTION);
3860	if (result != ISC_R_SUCCESS)
3861		return (result);
3862	name = NULL;
3863	dns_message_currentname(message, DNS_SECTION_QUESTION, &name);
3864	rdataset = ISC_LIST_HEAD(name->list);
3865	INSIST(rdataset != NULL);
3866	INSIST(ISC_LIST_NEXT(rdataset, link) == NULL);
3867
3868	if (fctx->type != rdataset->type ||
3869	    fctx->res->rdclass != rdataset->rdclass ||
3870	    !dns_name_equal(&fctx->name, name)) {
3871		char namebuf[DNS_NAME_FORMATSIZE];
3872		char class[DNS_RDATACLASS_FORMATSIZE];
3873		char type[DNS_RDATATYPE_FORMATSIZE];
3874
3875		dns_name_format(name, namebuf, sizeof(namebuf));
3876		dns_rdataclass_format(rdataset->rdclass, class, sizeof(class));
3877		dns_rdatatype_format(rdataset->type, type, sizeof(type));
3878		log_formerr(fctx, "question section mismatch: got %s/%s/%s",
3879			    namebuf, class, type);
3880		return (DNS_R_FORMERR);
3881	}
3882
3883	return (ISC_R_SUCCESS);
3884}
3885
3886static void
3887clone_results(fetchctx_t *fctx) {
3888	dns_fetchevent_t *event, *hevent;
3889	isc_result_t result;
3890	dns_name_t *name, *hname;
3891
3892	FCTXTRACE("clone_results");
3893
3894	/*
3895	 * Set up any other events to have the same data as the first
3896	 * event.
3897	 *
3898	 * Caller must be holding the appropriate lock.
3899	 */
3900
3901	fctx->cloned = ISC_TRUE;
3902	hevent = ISC_LIST_HEAD(fctx->events);
3903	if (hevent == NULL)
3904		return;
3905	hname = dns_fixedname_name(&hevent->foundname);
3906	for (event = ISC_LIST_NEXT(hevent, ev_link);
3907	     event != NULL;
3908	     event = ISC_LIST_NEXT(event, ev_link)) {
3909		name = dns_fixedname_name(&event->foundname);
3910		result = dns_name_copy(hname, name, NULL);
3911		if (result != ISC_R_SUCCESS)
3912			event->result = result;
3913		else
3914			event->result = hevent->result;
3915		dns_db_attach(hevent->db, &event->db);
3916		dns_db_attachnode(hevent->db, hevent->node, &event->node);
3917		INSIST(hevent->rdataset != NULL);
3918		INSIST(event->rdataset != NULL);
3919		if (dns_rdataset_isassociated(hevent->rdataset))
3920			dns_rdataset_clone(hevent->rdataset, event->rdataset);
3921		INSIST(! (hevent->sigrdataset == NULL &&
3922			  event->sigrdataset != NULL));
3923		if (hevent->sigrdataset != NULL &&
3924		    dns_rdataset_isassociated(hevent->sigrdataset) &&
3925		    event->sigrdataset != NULL)
3926			dns_rdataset_clone(hevent->sigrdataset,
3927					   event->sigrdataset);
3928	}
3929}
3930
3931#define CACHE(r)        (((r)->attributes & DNS_RDATASETATTR_CACHE) != 0)
3932#define ANSWER(r)       (((r)->attributes & DNS_RDATASETATTR_ANSWER) != 0)
3933#define ANSWERSIG(r)    (((r)->attributes & DNS_RDATASETATTR_ANSWERSIG) != 0)
3934#define EXTERNAL(r)     (((r)->attributes & DNS_RDATASETATTR_EXTERNAL) != 0)
3935#define CHAINING(r)     (((r)->attributes & DNS_RDATASETATTR_CHAINING) != 0)
3936#define CHASE(r)        (((r)->attributes & DNS_RDATASETATTR_CHASE) != 0)
3937#define CHECKNAMES(r)   (((r)->attributes & DNS_RDATASETATTR_CHECKNAMES) != 0)
3938
3939
3940/*
3941 * Destroy '*fctx' if it is ready to be destroyed (i.e., if it has
3942 * no references and is no longer waiting for any events).
3943 *
3944 * Requires:
3945 *      '*fctx' is shutting down.
3946 *
3947 * Returns:
3948 *	true if the resolver is exiting and this is the last fctx in the bucket.
3949 */
3950static isc_boolean_t
3951maybe_destroy(fetchctx_t *fctx, isc_boolean_t locked) {
3952	unsigned int bucketnum;
3953	isc_boolean_t bucket_empty = ISC_FALSE;
3954	dns_resolver_t *res = fctx->res;
3955	dns_validator_t *validator, *next_validator;
3956	isc_boolean_t destroy = ISC_FALSE;
3957
3958	REQUIRE(SHUTTINGDOWN(fctx));
3959
3960	bucketnum = fctx->bucketnum;
3961	if (!locked)
3962		LOCK(&res->buckets[bucketnum].lock);
3963	if (fctx->pending != 0 || fctx->nqueries != 0)
3964		goto unlock;
3965
3966	for (validator = ISC_LIST_HEAD(fctx->validators);
3967	     validator != NULL; validator = next_validator) {
3968		next_validator = ISC_LIST_NEXT(validator, link);
3969		dns_validator_cancel(validator);
3970	}
3971
3972	if (fctx->references == 0 && ISC_LIST_EMPTY(fctx->validators)) {
3973		bucket_empty = fctx_unlink(fctx);
3974		destroy = ISC_TRUE;
3975	}
3976 unlock:
3977	if (!locked)
3978		UNLOCK(&res->buckets[bucketnum].lock);
3979	if (destroy)
3980		fctx_destroy(fctx);
3981	return (bucket_empty);
3982}
3983
3984/*
3985 * The validator has finished.
3986 */
3987static void
3988validated(isc_task_t *task, isc_event_t *event) {
3989	dns_adbaddrinfo_t *addrinfo;
3990	dns_dbnode_t *node = NULL;
3991	dns_dbnode_t *nsnode = NULL;
3992	dns_fetchevent_t *hevent;
3993	dns_name_t *name;
3994	dns_rdataset_t *ardataset = NULL;
3995	dns_rdataset_t *asigrdataset = NULL;
3996	dns_rdataset_t *rdataset;
3997	dns_rdataset_t *sigrdataset;
3998	dns_resolver_t *res;
3999	dns_valarg_t *valarg;
4000	dns_validatorevent_t *vevent;
4001	fetchctx_t *fctx;
4002	isc_boolean_t chaining;
4003	isc_boolean_t negative;
4004	isc_boolean_t sentresponse;
4005	isc_result_t eresult = ISC_R_SUCCESS;
4006	isc_result_t result = ISC_R_SUCCESS;
4007	isc_stdtime_t now;
4008	isc_uint32_t ttl;
4009
4010	UNUSED(task); /* for now */
4011
4012	REQUIRE(event->ev_type == DNS_EVENT_VALIDATORDONE);
4013	valarg = event->ev_arg;
4014	fctx = valarg->fctx;
4015	res = fctx->res;
4016	addrinfo = valarg->addrinfo;
4017	REQUIRE(VALID_FCTX(fctx));
4018	REQUIRE(!ISC_LIST_EMPTY(fctx->validators));
4019
4020	vevent = (dns_validatorevent_t *)event;
4021	fctx->vresult = vevent->result;
4022
4023	FCTXTRACE("received validation completion event");
4024
4025	LOCK(&res->buckets[fctx->bucketnum].lock);
4026
4027	ISC_LIST_UNLINK(fctx->validators, vevent->validator, link);
4028	fctx->validator = NULL;
4029
4030	/*
4031	 * Destroy the validator early so that we can
4032	 * destroy the fctx if necessary.
4033	 */
4034	dns_validator_destroy(&vevent->validator);
4035	isc_mem_put(fctx->mctx, valarg, sizeof(*valarg));
4036
4037	negative = ISC_TF(vevent->rdataset == NULL);
4038
4039	sentresponse = ISC_TF((fctx->options & DNS_FETCHOPT_NOVALIDATE) != 0);
4040
4041	/*
4042	 * If shutting down, ignore the results.  Check to see if we're
4043	 * done waiting for validator completions and ADB pending events; if
4044	 * so, destroy the fctx.
4045	 */
4046	if (SHUTTINGDOWN(fctx) && !sentresponse) {
4047		isc_uint32_t bucketnum = fctx->bucketnum;
4048		isc_boolean_t bucket_empty;
4049		bucket_empty = maybe_destroy(fctx, ISC_TRUE);
4050		UNLOCK(&res->buckets[bucketnum].lock);
4051		if (bucket_empty)
4052			empty_bucket(res);
4053		goto cleanup_event;
4054	}
4055
4056	isc_stdtime_get(&now);
4057
4058	/*
4059	 * If chaining, we need to make sure that the right result code is
4060	 * returned, and that the rdatasets are bound.
4061	 */
4062	if (vevent->result == ISC_R_SUCCESS &&
4063	    !negative &&
4064	    vevent->rdataset != NULL &&
4065	    CHAINING(vevent->rdataset))
4066	{
4067		if (vevent->rdataset->type == dns_rdatatype_cname)
4068			eresult = DNS_R_CNAME;
4069		else {
4070			INSIST(vevent->rdataset->type == dns_rdatatype_dname);
4071			eresult = DNS_R_DNAME;
4072		}
4073		chaining = ISC_TRUE;
4074	} else
4075		chaining = ISC_FALSE;
4076
4077	/*
4078	 * Either we're not shutting down, or we are shutting down but want
4079	 * to cache the result anyway (if this was a validation started by
4080	 * a query with cd set)
4081	 */
4082
4083	hevent = ISC_LIST_HEAD(fctx->events);
4084	if (hevent != NULL) {
4085		if (!negative && !chaining &&
4086		    (fctx->type == dns_rdatatype_any ||
4087		     fctx->type == dns_rdatatype_rrsig ||
4088		     fctx->type == dns_rdatatype_sig)) {
4089			/*
4090			 * Don't bind rdatasets; the caller
4091			 * will iterate the node.
4092			 */
4093		} else {
4094			ardataset = hevent->rdataset;
4095			asigrdataset = hevent->sigrdataset;
4096		}
4097	}
4098
4099	if (vevent->result != ISC_R_SUCCESS) {
4100		FCTXTRACE("validation failed");
4101		inc_stats(res, dns_resstatscounter_valfail);
4102		fctx->valfail++;
4103		fctx->vresult = vevent->result;
4104		if (fctx->vresult != DNS_R_BROKENCHAIN) {
4105			result = ISC_R_NOTFOUND;
4106			if (vevent->rdataset != NULL)
4107				result = dns_db_findnode(fctx->cache,
4108							 vevent->name,
4109							 ISC_TRUE, &node);
4110			if (result == ISC_R_SUCCESS)
4111				(void)dns_db_deleterdataset(fctx->cache, node,
4112							     NULL,
4113							    vevent->type, 0);
4114			if (result == ISC_R_SUCCESS &&
4115			     vevent->sigrdataset != NULL)
4116				(void)dns_db_deleterdataset(fctx->cache, node,
4117							    NULL,
4118							    dns_rdatatype_rrsig,
4119							    vevent->type);
4120			if (result == ISC_R_SUCCESS)
4121				dns_db_detachnode(fctx->cache, &node);
4122		}
4123		if (fctx->vresult == DNS_R_BROKENCHAIN && !negative) {
4124			/*
4125			 * Cache the data as pending for later validation.
4126			 */
4127			result = ISC_R_NOTFOUND;
4128			if (vevent->rdataset != NULL)
4129				result = dns_db_findnode(fctx->cache,
4130							 vevent->name,
4131							 ISC_TRUE, &node);
4132			if (result == ISC_R_SUCCESS) {
4133				(void)dns_db_addrdataset(fctx->cache, node,
4134							 NULL, now,
4135							 vevent->rdataset, 0,
4136							 NULL);
4137			}
4138			if (result == ISC_R_SUCCESS &&
4139			    vevent->sigrdataset != NULL)
4140				(void)dns_db_addrdataset(fctx->cache, node,
4141							 NULL, now,
4142							 vevent->sigrdataset,
4143							 0, NULL);
4144			if (result == ISC_R_SUCCESS)
4145				dns_db_detachnode(fctx->cache, &node);
4146		}
4147		result = fctx->vresult;
4148		add_bad(fctx, addrinfo, result, badns_validation);
4149		isc_event_free(&event);
4150		UNLOCK(&res->buckets[fctx->bucketnum].lock);
4151		INSIST(fctx->validator == NULL);
4152		fctx->validator = ISC_LIST_HEAD(fctx->validators);
4153		if (fctx->validator != NULL)
4154			dns_validator_send(fctx->validator);
4155		else if (sentresponse)
4156			fctx_done(fctx, result, __LINE__); /* Locks bucket. */
4157		else if (result == DNS_R_BROKENCHAIN) {
4158			isc_result_t tresult;
4159			isc_time_t expire;
4160			isc_interval_t i;
4161
4162			isc_interval_set(&i, DNS_BADCACHE_TTL(fctx), 0);
4163			tresult = isc_time_nowplusinterval(&expire, &i);
4164			if (negative &&
4165			    (fctx->type == dns_rdatatype_dnskey ||
4166			     fctx->type == dns_rdatatype_dlv ||
4167			     fctx->type == dns_rdatatype_ds) &&
4168			     tresult == ISC_R_SUCCESS)
4169				dns_resolver_addbadcache(res, &fctx->name,
4170							 fctx->type, &expire);
4171			fctx_done(fctx, result, __LINE__); /* Locks bucket. */
4172		} else
4173			fctx_try(fctx, ISC_TRUE, ISC_TRUE); /* Locks bucket. */
4174		return;
4175	}
4176
4177
4178	if (negative) {
4179		dns_rdatatype_t covers;
4180		FCTXTRACE("nonexistence validation OK");
4181
4182		inc_stats(res, dns_resstatscounter_valnegsuccess);
4183
4184		if (fctx->rmessage->rcode == dns_rcode_nxdomain)
4185			covers = dns_rdatatype_any;
4186		else
4187			covers = fctx->type;
4188
4189		result = dns_db_findnode(fctx->cache, vevent->name, ISC_TRUE,
4190					 &node);
4191		if (result != ISC_R_SUCCESS)
4192			goto noanswer_response;
4193
4194		/*
4195		 * If we are asking for a SOA record set the cache time
4196		 * to zero to facilitate locating the containing zone of
4197		 * a arbitrary zone.
4198		 */
4199		ttl = res->view->maxncachettl;
4200		if (fctx->type == dns_rdatatype_soa &&
4201		    covers == dns_rdatatype_any && res->zero_no_soa_ttl)
4202			ttl = 0;
4203
4204		result = ncache_adderesult(fctx->rmessage, fctx->cache, node,
4205					   covers, now, ttl, vevent->optout,
4206					   ardataset, &eresult);
4207		if (result != ISC_R_SUCCESS)
4208			goto noanswer_response;
4209		goto answer_response;
4210	} else
4211		inc_stats(res, dns_resstatscounter_valsuccess);
4212
4213	FCTXTRACE("validation OK");
4214
4215	if (vevent->proofs[DNS_VALIDATOR_NOQNAMEPROOF] != NULL) {
4216
4217		result = dns_rdataset_addnoqname(vevent->rdataset,
4218				   vevent->proofs[DNS_VALIDATOR_NOQNAMEPROOF]);
4219		RUNTIME_CHECK(result == ISC_R_SUCCESS);
4220		INSIST(vevent->sigrdataset != NULL);
4221		vevent->sigrdataset->ttl = vevent->rdataset->ttl;
4222		if (vevent->proofs[DNS_VALIDATOR_CLOSESTENCLOSER] != NULL) {
4223			result = dns_rdataset_addclosest(vevent->rdataset,
4224				 vevent->proofs[DNS_VALIDATOR_CLOSESTENCLOSER]);
4225			RUNTIME_CHECK(result == ISC_R_SUCCESS);
4226		}
4227	}
4228
4229	/*
4230	 * The data was already cached as pending data.
4231	 * Re-cache it as secure and bind the cached
4232	 * rdatasets to the first event on the fetch
4233	 * event list.
4234	 */
4235	result = dns_db_findnode(fctx->cache, vevent->name, ISC_TRUE, &node);
4236	if (result != ISC_R_SUCCESS)
4237		goto noanswer_response;
4238
4239	result = dns_db_addrdataset(fctx->cache, node, NULL, now,
4240				    vevent->rdataset, 0, ardataset);
4241	if (result != ISC_R_SUCCESS &&
4242	    result != DNS_R_UNCHANGED)
4243		goto noanswer_response;
4244	if (ardataset != NULL && NEGATIVE(ardataset)) {
4245		if (NXDOMAIN(ardataset))
4246			eresult = DNS_R_NCACHENXDOMAIN;
4247		else
4248			eresult = DNS_R_NCACHENXRRSET;
4249	} else if (vevent->sigrdataset != NULL) {
4250		result = dns_db_addrdataset(fctx->cache, node, NULL, now,
4251					    vevent->sigrdataset, 0,
4252					    asigrdataset);
4253		if (result != ISC_R_SUCCESS &&
4254		    result != DNS_R_UNCHANGED)
4255			goto noanswer_response;
4256	}
4257
4258	if (sentresponse) {
4259		isc_boolean_t bucket_empty = ISC_FALSE;
4260		/*
4261		 * If we only deferred the destroy because we wanted to cache
4262		 * the data, destroy now.
4263		 */
4264		dns_db_detachnode(fctx->cache, &node);
4265		if (SHUTTINGDOWN(fctx))
4266			bucket_empty = maybe_destroy(fctx, ISC_TRUE);
4267		UNLOCK(&res->buckets[fctx->bucketnum].lock);
4268		if (bucket_empty)
4269			empty_bucket(res);
4270		goto cleanup_event;
4271	}
4272
4273	if (!ISC_LIST_EMPTY(fctx->validators)) {
4274		INSIST(!negative);
4275		INSIST(fctx->type == dns_rdatatype_any ||
4276		       fctx->type == dns_rdatatype_rrsig ||
4277		       fctx->type == dns_rdatatype_sig);
4278		/*
4279		 * Don't send a response yet - we have
4280		 * more rdatasets that still need to
4281		 * be validated.
4282		 */
4283		dns_db_detachnode(fctx->cache, &node);
4284		UNLOCK(&res->buckets[fctx->bucketnum].lock);
4285		dns_validator_send(ISC_LIST_HEAD(fctx->validators));
4286		goto cleanup_event;
4287	}
4288
4289 answer_response:
4290	/*
4291	 * Cache any NS/NSEC records that happened to be validated.
4292	 */
4293	result = dns_message_firstname(fctx->rmessage, DNS_SECTION_AUTHORITY);
4294	while (result == ISC_R_SUCCESS) {
4295		name = NULL;
4296		dns_message_currentname(fctx->rmessage, DNS_SECTION_AUTHORITY,
4297					&name);
4298		for (rdataset = ISC_LIST_HEAD(name->list);
4299		     rdataset != NULL;
4300		     rdataset = ISC_LIST_NEXT(rdataset, link)) {
4301			if ((rdataset->type != dns_rdatatype_ns &&
4302			     rdataset->type != dns_rdatatype_nsec) ||
4303			    rdataset->trust != dns_trust_secure)
4304				continue;
4305			for (sigrdataset = ISC_LIST_HEAD(name->list);
4306			     sigrdataset != NULL;
4307			     sigrdataset = ISC_LIST_NEXT(sigrdataset, link)) {
4308				if (sigrdataset->type != dns_rdatatype_rrsig ||
4309				    sigrdataset->covers != rdataset->type)
4310					continue;
4311				break;
4312			}
4313			if (sigrdataset == NULL ||
4314			    sigrdataset->trust != dns_trust_secure)
4315				continue;
4316			result = dns_db_findnode(fctx->cache, name, ISC_TRUE,
4317						 &nsnode);
4318			if (result != ISC_R_SUCCESS)
4319				continue;
4320
4321			result = dns_db_addrdataset(fctx->cache, nsnode, NULL,
4322						    now, rdataset, 0, NULL);
4323			if (result == ISC_R_SUCCESS)
4324				result = dns_db_addrdataset(fctx->cache, nsnode,
4325							    NULL, now,
4326							    sigrdataset, 0,
4327							    NULL);
4328			dns_db_detachnode(fctx->cache, &nsnode);
4329			if (result != ISC_R_SUCCESS)
4330				continue;
4331		}
4332		result = dns_message_nextname(fctx->rmessage,
4333					      DNS_SECTION_AUTHORITY);
4334	}
4335
4336	result = ISC_R_SUCCESS;
4337
4338	/*
4339	 * Respond with an answer, positive or negative,
4340	 * as opposed to an error.  'node' must be non-NULL.
4341	 */
4342
4343	fctx->attributes |= FCTX_ATTR_HAVEANSWER;
4344
4345	if (hevent != NULL) {
4346		hevent->result = eresult;
4347		RUNTIME_CHECK(dns_name_copy(vevent->name,
4348			      dns_fixedname_name(&hevent->foundname), NULL)
4349			      == ISC_R_SUCCESS);
4350		dns_db_attach(fctx->cache, &hevent->db);
4351		dns_db_transfernode(fctx->cache, &node, &hevent->node);
4352		clone_results(fctx);
4353	}
4354
4355 noanswer_response:
4356	if (node != NULL)
4357		dns_db_detachnode(fctx->cache, &node);
4358
4359	UNLOCK(&res->buckets[fctx->bucketnum].lock);
4360	fctx_done(fctx, result, __LINE__); /* Locks bucket. */
4361
4362 cleanup_event:
4363	INSIST(node == NULL);
4364	isc_event_free(&event);
4365}
4366
4367static inline isc_result_t
4368cache_name(fetchctx_t *fctx, dns_name_t *name, dns_adbaddrinfo_t *addrinfo,
4369	   isc_stdtime_t now)
4370{
4371	dns_rdataset_t *rdataset, *sigrdataset;
4372	dns_rdataset_t *addedrdataset, *ardataset, *asigrdataset;
4373	dns_rdataset_t *valrdataset = NULL, *valsigrdataset = NULL;
4374	dns_dbnode_t *node, **anodep;
4375	dns_db_t **adbp;
4376	dns_name_t *aname;
4377	dns_resolver_t *res;
4378	isc_boolean_t need_validation, secure_domain, have_answer;
4379	isc_result_t result, eresult;
4380	dns_fetchevent_t *event;
4381	unsigned int options;
4382	isc_task_t *task;
4383	isc_boolean_t fail;
4384	unsigned int valoptions = 0;
4385
4386	/*
4387	 * The appropriate bucket lock must be held.
4388	 */
4389
4390	res = fctx->res;
4391	need_validation = ISC_FALSE;
4392	POST(need_validation);
4393	secure_domain = ISC_FALSE;
4394	have_answer = ISC_FALSE;
4395	eresult = ISC_R_SUCCESS;
4396	task = res->buckets[fctx->bucketnum].task;
4397
4398	/*
4399	 * Is DNSSEC validation required for this name?
4400	 */
4401	if (res->view->enablevalidation) {
4402		result = dns_view_issecuredomain(res->view, name,
4403						 &secure_domain);
4404		if (result != ISC_R_SUCCESS)
4405			return (result);
4406
4407		if (!secure_domain && res->view->dlv != NULL) {
4408			valoptions = DNS_VALIDATOR_DLV;
4409			secure_domain = ISC_TRUE;
4410		}
4411	}
4412
4413	if ((fctx->options & DNS_FETCHOPT_NOVALIDATE) != 0)
4414		need_validation = ISC_FALSE;
4415	else
4416		need_validation = secure_domain;
4417
4418	adbp = NULL;
4419	aname = NULL;
4420	anodep = NULL;
4421	ardataset = NULL;
4422	asigrdataset = NULL;
4423	event = NULL;
4424	if ((name->attributes & DNS_NAMEATTR_ANSWER) != 0 &&
4425	    !need_validation) {
4426		have_answer = ISC_TRUE;
4427		event = ISC_LIST_HEAD(fctx->events);
4428		if (event != NULL) {
4429			adbp = &event->db;
4430			aname = dns_fixedname_name(&event->foundname);
4431			result = dns_name_copy(name, aname, NULL);
4432			if (result != ISC_R_SUCCESS)
4433				return (result);
4434			anodep = &event->node;
4435			/*
4436			 * If this is an ANY, SIG or RRSIG query, we're not
4437			 * going to return any rdatasets, unless we encountered
4438			 * a CNAME or DNAME as "the answer".  In this case,
4439			 * we're going to return DNS_R_CNAME or DNS_R_DNAME
4440			 * and we must set up the rdatasets.
4441			 */
4442			if ((fctx->type != dns_rdatatype_any &&
4443			     fctx->type != dns_rdatatype_rrsig &&
4444			     fctx->type != dns_rdatatype_sig) ||
4445			    (name->attributes & DNS_NAMEATTR_CHAINING) != 0) {
4446				ardataset = event->rdataset;
4447				asigrdataset = event->sigrdataset;
4448			}
4449		}
4450	}
4451
4452	/*
4453	 * Find or create the cache node.
4454	 */
4455	node = NULL;
4456	result = dns_db_findnode(fctx->cache, name, ISC_TRUE, &node);
4457	if (result != ISC_R_SUCCESS)
4458		return (result);
4459
4460	/*
4461	 * Cache or validate each cacheable rdataset.
4462	 */
4463	fail = ISC_TF((fctx->res->options & DNS_RESOLVER_CHECKNAMESFAIL) != 0);
4464	for (rdataset = ISC_LIST_HEAD(name->list);
4465	     rdataset != NULL;
4466	     rdataset = ISC_LIST_NEXT(rdataset, link)) {
4467		if (!CACHE(rdataset))
4468			continue;
4469		if (CHECKNAMES(rdataset)) {
4470			char namebuf[DNS_NAME_FORMATSIZE];
4471			char typebuf[DNS_RDATATYPE_FORMATSIZE];
4472			char classbuf[DNS_RDATATYPE_FORMATSIZE];
4473
4474			dns_name_format(name, namebuf, sizeof(namebuf));
4475			dns_rdatatype_format(rdataset->type, typebuf,
4476					     sizeof(typebuf));
4477			dns_rdataclass_format(rdataset->rdclass, classbuf,
4478					      sizeof(classbuf));
4479			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
4480				      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
4481				      "check-names %s %s/%s/%s",
4482				      fail ? "failure" : "warning",
4483				      namebuf, typebuf, classbuf);
4484			if (fail) {
4485				if (ANSWER(rdataset)) {
4486					dns_db_detachnode(fctx->cache, &node);
4487					return (DNS_R_BADNAME);
4488				}
4489				continue;
4490			}
4491		}
4492
4493		/*
4494		 * Enforce the configure maximum cache TTL.
4495		 */
4496		if (rdataset->ttl > res->view->maxcachettl)
4497			rdataset->ttl = res->view->maxcachettl;
4498
4499		/*
4500		 * If this RRset is in a secure domain, is in bailiwick,
4501		 * and is not glue, attempt DNSSEC validation.	(We do not
4502		 * attempt to validate glue or out-of-bailiwick data--even
4503		 * though there might be some performance benefit to doing
4504		 * so--because it makes it simpler and safer to ensure that
4505		 * records from a secure domain are only cached if validated
4506		 * within the context of a query to the domain that owns
4507		 * them.)
4508		 */
4509		if (secure_domain && rdataset->trust != dns_trust_glue &&
4510		    !EXTERNAL(rdataset)) {
4511			dns_trust_t trust;
4512
4513			/*
4514			 * RRSIGs are validated as part of validating the
4515			 * type they cover.
4516			 */
4517			if (rdataset->type == dns_rdatatype_rrsig)
4518				continue;
4519			/*
4520			 * Find the SIG for this rdataset, if we have it.
4521			 */
4522			for (sigrdataset = ISC_LIST_HEAD(name->list);
4523			     sigrdataset != NULL;
4524			     sigrdataset = ISC_LIST_NEXT(sigrdataset, link)) {
4525				if (sigrdataset->type == dns_rdatatype_rrsig &&
4526				    sigrdataset->covers == rdataset->type)
4527					break;
4528			}
4529			if (sigrdataset == NULL) {
4530				if (!ANSWER(rdataset) && need_validation) {
4531					/*
4532					 * Ignore non-answer rdatasets that
4533					 * are missing signatures.
4534					 */
4535					continue;
4536				}
4537			}
4538
4539			/*
4540			 * Normalize the rdataset and sigrdataset TTLs.
4541			 */
4542			if (sigrdataset != NULL) {
4543				rdataset->ttl = ISC_MIN(rdataset->ttl,
4544							sigrdataset->ttl);
4545				sigrdataset->ttl = rdataset->ttl;
4546			}
4547
4548			/*
4549			 * Cache this rdataset/sigrdataset pair as
4550			 * pending data.  Track whether it was additional
4551			 * or not.
4552			 */
4553			if (rdataset->trust == dns_trust_additional)
4554				trust = dns_trust_pending_additional;
4555			else
4556				trust = dns_trust_pending_answer;
4557
4558			rdataset->trust = trust;
4559			if (sigrdataset != NULL)
4560				sigrdataset->trust = trust;
4561			if (!need_validation || !ANSWER(rdataset)) {
4562				addedrdataset = ardataset;
4563				result = dns_db_addrdataset(fctx->cache, node,
4564							    NULL, now, rdataset,
4565							    0, addedrdataset);
4566				if (result == DNS_R_UNCHANGED) {
4567					result = ISC_R_SUCCESS;
4568					if (!need_validation &&
4569					    ardataset != NULL &&
4570					    NEGATIVE(ardataset)) {
4571						/*
4572						 * The answer in the cache is
4573						 * better than the answer we
4574						 * found, and is a negative
4575						 * cache entry, so we must set
4576						 * eresult appropriately.
4577						 */
4578						if (NXDOMAIN(ardataset))
4579							eresult =
4580							   DNS_R_NCACHENXDOMAIN;
4581						else
4582							eresult =
4583							   DNS_R_NCACHENXRRSET;
4584						/*
4585						 * We have a negative response
4586						 * from the cache so don't
4587						 * attempt to add the RRSIG
4588						 * rrset.
4589						 */
4590						continue;
4591					}
4592				}
4593				if (result != ISC_R_SUCCESS)
4594					break;
4595				if (sigrdataset != NULL) {
4596					addedrdataset = asigrdataset;
4597					result = dns_db_addrdataset(fctx->cache,
4598								node, NULL, now,
4599								sigrdataset, 0,
4600								addedrdataset);
4601					if (result == DNS_R_UNCHANGED)
4602						result = ISC_R_SUCCESS;
4603					if (result != ISC_R_SUCCESS)
4604						break;
4605				} else if (!ANSWER(rdataset))
4606					continue;
4607			}
4608
4609			if (ANSWER(rdataset) && need_validation) {
4610				if (fctx->type != dns_rdatatype_any &&
4611				    fctx->type != dns_rdatatype_rrsig &&
4612				    fctx->type != dns_rdatatype_sig) {
4613					/*
4614					 * This is The Answer.  We will
4615					 * validate it, but first we cache
4616					 * the rest of the response - it may
4617					 * contain useful keys.
4618					 */
4619					INSIST(valrdataset == NULL &&
4620					       valsigrdataset == NULL);
4621					valrdataset = rdataset;
4622					valsigrdataset = sigrdataset;
4623				} else {
4624					/*
4625					 * This is one of (potentially)
4626					 * multiple answers to an ANY
4627					 * or SIG query.  To keep things
4628					 * simple, we just start the
4629					 * validator right away rather
4630					 * than caching first and
4631					 * having to remember which
4632					 * rdatasets needed validation.
4633					 */
4634					result = valcreate(fctx, addrinfo,
4635							   name, rdataset->type,
4636							   rdataset,
4637							   sigrdataset,
4638							   valoptions, task);
4639					/*
4640					 * Defer any further validations.
4641					 * This prevents multiple validators
4642					 * from manipulating fctx->rmessage
4643					 * simultaneously.
4644					 */
4645					valoptions |= DNS_VALIDATOR_DEFER;
4646				}
4647			} else if (CHAINING(rdataset)) {
4648				if (rdataset->type == dns_rdatatype_cname)
4649					eresult = DNS_R_CNAME;
4650				else {
4651					INSIST(rdataset->type ==
4652					       dns_rdatatype_dname);
4653					eresult = DNS_R_DNAME;
4654				}
4655			}
4656		} else if (!EXTERNAL(rdataset)) {
4657			/*
4658			 * It's OK to cache this rdataset now.
4659			 */
4660			if (ANSWER(rdataset))
4661				addedrdataset = ardataset;
4662			else if (ANSWERSIG(rdataset))
4663				addedrdataset = asigrdataset;
4664			else
4665				addedrdataset = NULL;
4666			if (CHAINING(rdataset)) {
4667				if (rdataset->type == dns_rdatatype_cname)
4668					eresult = DNS_R_CNAME;
4669				else {
4670					INSIST(rdataset->type ==
4671					       dns_rdatatype_dname);
4672					eresult = DNS_R_DNAME;
4673				}
4674			}
4675			if (rdataset->trust == dns_trust_glue &&
4676			    (rdataset->type == dns_rdatatype_ns ||
4677			     (rdataset->type == dns_rdatatype_rrsig &&
4678			      rdataset->covers == dns_rdatatype_ns))) {
4679				/*
4680				 * If the trust level is 'dns_trust_glue'
4681				 * then we are adding data from a referral
4682				 * we got while executing the search algorithm.
4683				 * New referral data always takes precedence
4684				 * over the existing cache contents.
4685				 */
4686				options = DNS_DBADD_FORCE;
4687			} else
4688				options = 0;
4689			/*
4690			 * Now we can add the rdataset.
4691			 */
4692			result = dns_db_addrdataset(fctx->cache,
4693						    node, NULL, now,
4694						    rdataset,
4695						    options,
4696						    addedrdataset);
4697			if (result == DNS_R_UNCHANGED) {
4698				if (ANSWER(rdataset) &&
4699				    ardataset != NULL &&
4700				    NEGATIVE(ardataset)) {
4701					/*
4702					 * The answer in the cache is better
4703					 * than the answer we found, and is
4704					 * a negative cache entry, so we
4705					 * must set eresult appropriately.
4706					 */
4707					if (NXDOMAIN(ardataset))
4708						eresult = DNS_R_NCACHENXDOMAIN;
4709					else
4710						eresult = DNS_R_NCACHENXRRSET;
4711				}
4712				result = ISC_R_SUCCESS;
4713			} else if (result != ISC_R_SUCCESS)
4714				break;
4715		}
4716	}
4717
4718	if (valrdataset != NULL)
4719		result = valcreate(fctx, addrinfo, name, fctx->type,
4720				   valrdataset, valsigrdataset, valoptions,
4721				   task);
4722
4723	if (result == ISC_R_SUCCESS && have_answer) {
4724		fctx->attributes |= FCTX_ATTR_HAVEANSWER;
4725		if (event != NULL) {
4726			/*
4727			 * Negative results must be indicated in event->result.
4728			 */
4729			if (dns_rdataset_isassociated(event->rdataset) &&
4730			    NEGATIVE(event->rdataset)) {
4731				INSIST(eresult == DNS_R_NCACHENXDOMAIN ||
4732				       eresult == DNS_R_NCACHENXRRSET);
4733			}
4734			event->result = eresult;
4735			dns_db_attach(fctx->cache, adbp);
4736			dns_db_transfernode(fctx->cache, &node, anodep);
4737			clone_results(fctx);
4738		}
4739	}
4740
4741	if (node != NULL)
4742		dns_db_detachnode(fctx->cache, &node);
4743
4744	return (result);
4745}
4746
4747static inline isc_result_t
4748cache_message(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo, isc_stdtime_t now)
4749{
4750	isc_result_t result;
4751	dns_section_t section;
4752	dns_name_t *name;
4753
4754	FCTXTRACE("cache_message");
4755
4756	fctx->attributes &= ~FCTX_ATTR_WANTCACHE;
4757
4758	LOCK(&fctx->res->buckets[fctx->bucketnum].lock);
4759
4760	for (section = DNS_SECTION_ANSWER;
4761	     section <= DNS_SECTION_ADDITIONAL;
4762	     section++) {
4763		result = dns_message_firstname(fctx->rmessage, section);
4764		while (result == ISC_R_SUCCESS) {
4765			name = NULL;
4766			dns_message_currentname(fctx->rmessage, section,
4767						&name);
4768			if ((name->attributes & DNS_NAMEATTR_CACHE) != 0) {
4769				result = cache_name(fctx, name, addrinfo, now);
4770				if (result != ISC_R_SUCCESS)
4771					break;
4772			}
4773			result = dns_message_nextname(fctx->rmessage, section);
4774		}
4775		if (result != ISC_R_NOMORE)
4776			break;
4777	}
4778	if (result == ISC_R_NOMORE)
4779		result = ISC_R_SUCCESS;
4780
4781	UNLOCK(&fctx->res->buckets[fctx->bucketnum].lock);
4782
4783	return (result);
4784}
4785
4786/*
4787 * Do what dns_ncache_addoptout() does, and then compute an appropriate eresult.
4788 */
4789static isc_result_t
4790ncache_adderesult(dns_message_t *message, dns_db_t *cache, dns_dbnode_t *node,
4791		  dns_rdatatype_t covers, isc_stdtime_t now, dns_ttl_t maxttl,
4792		  isc_boolean_t optout, dns_rdataset_t *ardataset,
4793		  isc_result_t *eresultp)
4794{
4795	isc_result_t result;
4796	dns_rdataset_t rdataset;
4797
4798	if (ardataset == NULL) {
4799		dns_rdataset_init(&rdataset);
4800		ardataset = &rdataset;
4801	}
4802	result = dns_ncache_addoptout(message, cache, node, covers, now,
4803				     maxttl, optout, ardataset);
4804	if (result == DNS_R_UNCHANGED || result == ISC_R_SUCCESS) {
4805		/*
4806		 * If the cache now contains a negative entry and we
4807		 * care about whether it is DNS_R_NCACHENXDOMAIN or
4808		 * DNS_R_NCACHENXRRSET then extract it.
4809		 */
4810		if (NEGATIVE(ardataset)) {
4811			/*
4812			 * The cache data is a negative cache entry.
4813			 */
4814			if (NXDOMAIN(ardataset))
4815				*eresultp = DNS_R_NCACHENXDOMAIN;
4816			else
4817				*eresultp = DNS_R_NCACHENXRRSET;
4818		} else {
4819			/*
4820			 * Either we don't care about the nature of the
4821			 * cache rdataset (because no fetch is interested
4822			 * in the outcome), or the cache rdataset is not
4823			 * a negative cache entry.  Whichever case it is,
4824			 * we can return success.
4825			 *
4826			 * XXXRTH  There's a CNAME/DNAME problem here.
4827			 */
4828			*eresultp = ISC_R_SUCCESS;
4829		}
4830		result = ISC_R_SUCCESS;
4831	}
4832	if (ardataset == &rdataset && dns_rdataset_isassociated(ardataset))
4833		dns_rdataset_disassociate(ardataset);
4834
4835	return (result);
4836}
4837
4838static inline isc_result_t
4839ncache_message(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo,
4840	       dns_rdatatype_t covers, isc_stdtime_t now)
4841{
4842	isc_result_t result, eresult;
4843	dns_name_t *name;
4844	dns_resolver_t *res;
4845	dns_db_t **adbp;
4846	dns_dbnode_t *node, **anodep;
4847	dns_rdataset_t *ardataset;
4848	isc_boolean_t need_validation, secure_domain;
4849	dns_name_t *aname;
4850	dns_fetchevent_t *event;
4851	isc_uint32_t ttl;
4852	unsigned int valoptions = 0;
4853
4854	FCTXTRACE("ncache_message");
4855
4856	fctx->attributes &= ~FCTX_ATTR_WANTNCACHE;
4857
4858	res = fctx->res;
4859	need_validation = ISC_FALSE;
4860	POST(need_validation);
4861	secure_domain = ISC_FALSE;
4862	eresult = ISC_R_SUCCESS;
4863	name = &fctx->name;
4864	node = NULL;
4865
4866	/*
4867	 * XXXMPA remove when we follow cnames and adjust the setting
4868	 * of FCTX_ATTR_WANTNCACHE in noanswer_response().
4869	 */
4870	INSIST(fctx->rmessage->counts[DNS_SECTION_ANSWER] == 0);
4871
4872	/*
4873	 * Is DNSSEC validation required for this name?
4874	 */
4875	if (fctx->res->view->enablevalidation) {
4876		result = dns_view_issecuredomain(res->view, name,
4877						 &secure_domain);
4878		if (result != ISC_R_SUCCESS)
4879			return (result);
4880
4881		if (!secure_domain && res->view->dlv != NULL) {
4882			valoptions = DNS_VALIDATOR_DLV;
4883			secure_domain = ISC_TRUE;
4884		}
4885	}
4886
4887	if ((fctx->options & DNS_FETCHOPT_NOVALIDATE) != 0)
4888		need_validation = ISC_FALSE;
4889	else
4890		need_validation = secure_domain;
4891
4892	if (secure_domain) {
4893		/*
4894		 * Mark all rdatasets as pending.
4895		 */
4896		dns_rdataset_t *trdataset;
4897		dns_name_t *tname;
4898
4899		result = dns_message_firstname(fctx->rmessage,
4900					       DNS_SECTION_AUTHORITY);
4901		while (result == ISC_R_SUCCESS) {
4902			tname = NULL;
4903			dns_message_currentname(fctx->rmessage,
4904						DNS_SECTION_AUTHORITY,
4905						&tname);
4906			for (trdataset = ISC_LIST_HEAD(tname->list);
4907			     trdataset != NULL;
4908			     trdataset = ISC_LIST_NEXT(trdataset, link))
4909				trdataset->trust = dns_trust_pending_answer;
4910			result = dns_message_nextname(fctx->rmessage,
4911						      DNS_SECTION_AUTHORITY);
4912		}
4913		if (result != ISC_R_NOMORE)
4914			return (result);
4915
4916	}
4917
4918	if (need_validation) {
4919		/*
4920		 * Do negative response validation.
4921		 */
4922		result = valcreate(fctx, addrinfo, name, fctx->type,
4923				   NULL, NULL, valoptions,
4924				   res->buckets[fctx->bucketnum].task);
4925		/*
4926		 * If validation is necessary, return now.  Otherwise continue
4927		 * to process the message, letting the validation complete
4928		 * in its own good time.
4929		 */
4930		return (result);
4931	}
4932
4933	LOCK(&res->buckets[fctx->bucketnum].lock);
4934
4935	adbp = NULL;
4936	aname = NULL;
4937	anodep = NULL;
4938	ardataset = NULL;
4939	if (!HAVE_ANSWER(fctx)) {
4940		event = ISC_LIST_HEAD(fctx->events);
4941		if (event != NULL) {
4942			adbp = &event->db;
4943			aname = dns_fixedname_name(&event->foundname);
4944			result = dns_name_copy(name, aname, NULL);
4945			if (result != ISC_R_SUCCESS)
4946				goto unlock;
4947			anodep = &event->node;
4948			ardataset = event->rdataset;
4949		}
4950	} else
4951		event = NULL;
4952
4953	result = dns_db_findnode(fctx->cache, name, ISC_TRUE, &node);
4954	if (result != ISC_R_SUCCESS)
4955		goto unlock;
4956
4957	/*
4958	 * If we are asking for a SOA record set the cache time
4959	 * to zero to facilitate locating the containing zone of
4960	 * a arbitrary zone.
4961	 */
4962	ttl = fctx->res->view->maxncachettl;
4963	if (fctx->type == dns_rdatatype_soa &&
4964	    covers == dns_rdatatype_any &&
4965	    fctx->res->zero_no_soa_ttl)
4966		ttl = 0;
4967
4968	result = ncache_adderesult(fctx->rmessage, fctx->cache, node,
4969				   covers, now, ttl, ISC_FALSE,
4970				   ardataset, &eresult);
4971	if (result != ISC_R_SUCCESS)
4972		goto unlock;
4973
4974	if (!HAVE_ANSWER(fctx)) {
4975		fctx->attributes |= FCTX_ATTR_HAVEANSWER;
4976		if (event != NULL) {
4977			event->result = eresult;
4978			dns_db_attach(fctx->cache, adbp);
4979			dns_db_transfernode(fctx->cache, &node, anodep);
4980			clone_results(fctx);
4981		}
4982	}
4983
4984 unlock:
4985	UNLOCK(&res->buckets[fctx->bucketnum].lock);
4986
4987	if (node != NULL)
4988		dns_db_detachnode(fctx->cache, &node);
4989
4990	return (result);
4991}
4992
4993static inline void
4994mark_related(dns_name_t *name, dns_rdataset_t *rdataset,
4995	     isc_boolean_t external, isc_boolean_t gluing)
4996{
4997	name->attributes |= DNS_NAMEATTR_CACHE;
4998	if (gluing) {
4999		rdataset->trust = dns_trust_glue;
5000		/*
5001		 * Glue with 0 TTL causes problems.  We force the TTL to
5002		 * 1 second to prevent this.
5003		 */
5004		if (rdataset->ttl == 0)
5005			rdataset->ttl = 1;
5006	} else
5007		rdataset->trust = dns_trust_additional;
5008	/*
5009	 * Avoid infinite loops by only marking new rdatasets.
5010	 */
5011	if (!CACHE(rdataset)) {
5012		name->attributes |= DNS_NAMEATTR_CHASE;
5013		rdataset->attributes |= DNS_RDATASETATTR_CHASE;
5014	}
5015	rdataset->attributes |= DNS_RDATASETATTR_CACHE;
5016	if (external)
5017		rdataset->attributes |= DNS_RDATASETATTR_EXTERNAL;
5018}
5019
5020static isc_result_t
5021check_section(void *arg, dns_name_t *addname, dns_rdatatype_t type,
5022	      dns_section_t section)
5023{
5024	fetchctx_t *fctx = arg;
5025	isc_result_t result;
5026	dns_name_t *name;
5027	dns_rdataset_t *rdataset;
5028	isc_boolean_t external;
5029	dns_rdatatype_t rtype;
5030	isc_boolean_t gluing;
5031
5032	REQUIRE(VALID_FCTX(fctx));
5033
5034#if CHECK_FOR_GLUE_IN_ANSWER
5035	if (section == DNS_SECTION_ANSWER && type != dns_rdatatype_a)
5036		return (ISC_R_SUCCESS);
5037#endif
5038
5039	if (GLUING(fctx))
5040		gluing = ISC_TRUE;
5041	else
5042		gluing = ISC_FALSE;
5043	name = NULL;
5044	rdataset = NULL;
5045	result = dns_message_findname(fctx->rmessage, section, addname,
5046				      dns_rdatatype_any, 0, &name, NULL);
5047	if (result == ISC_R_SUCCESS) {
5048		external = ISC_TF(!dns_name_issubdomain(name, &fctx->domain));
5049		if (type == dns_rdatatype_a) {
5050			for (rdataset = ISC_LIST_HEAD(name->list);
5051			     rdataset != NULL;
5052			     rdataset = ISC_LIST_NEXT(rdataset, link)) {
5053				if (rdataset->type == dns_rdatatype_rrsig)
5054					rtype = rdataset->covers;
5055				else
5056					rtype = rdataset->type;
5057				if (rtype == dns_rdatatype_a ||
5058				    rtype == dns_rdatatype_aaaa)
5059					mark_related(name, rdataset, external,
5060						     gluing);
5061			}
5062		} else {
5063			result = dns_message_findtype(name, type, 0,
5064						      &rdataset);
5065			if (result == ISC_R_SUCCESS) {
5066				mark_related(name, rdataset, external, gluing);
5067				/*
5068				 * Do we have its SIG too?
5069				 */
5070				rdataset = NULL;
5071				result = dns_message_findtype(name,
5072						      dns_rdatatype_rrsig,
5073						      type, &rdataset);
5074				if (result == ISC_R_SUCCESS)
5075					mark_related(name, rdataset, external,
5076						     gluing);
5077			}
5078		}
5079	}
5080
5081	return (ISC_R_SUCCESS);
5082}
5083
5084static isc_result_t
5085check_related(void *arg, dns_name_t *addname, dns_rdatatype_t type) {
5086	return (check_section(arg, addname, type, DNS_SECTION_ADDITIONAL));
5087}
5088
5089#ifndef CHECK_FOR_GLUE_IN_ANSWER
5090#define CHECK_FOR_GLUE_IN_ANSWER 0
5091#endif
5092#if CHECK_FOR_GLUE_IN_ANSWER
5093static isc_result_t
5094check_answer(void *arg, dns_name_t *addname, dns_rdatatype_t type) {
5095	return (check_section(arg, addname, type, DNS_SECTION_ANSWER));
5096}
5097#endif
5098
5099static void
5100chase_additional(fetchctx_t *fctx) {
5101	isc_boolean_t rescan;
5102	dns_section_t section = DNS_SECTION_ADDITIONAL;
5103	isc_result_t result;
5104
5105 again:
5106	rescan = ISC_FALSE;
5107
5108	for (result = dns_message_firstname(fctx->rmessage, section);
5109	     result == ISC_R_SUCCESS;
5110	     result = dns_message_nextname(fctx->rmessage, section)) {
5111		dns_name_t *name = NULL;
5112		dns_rdataset_t *rdataset;
5113		dns_message_currentname(fctx->rmessage, DNS_SECTION_ADDITIONAL,
5114					&name);
5115		if ((name->attributes & DNS_NAMEATTR_CHASE) == 0)
5116			continue;
5117		name->attributes &= ~DNS_NAMEATTR_CHASE;
5118		for (rdataset = ISC_LIST_HEAD(name->list);
5119		     rdataset != NULL;
5120		     rdataset = ISC_LIST_NEXT(rdataset, link)) {
5121			if (CHASE(rdataset)) {
5122				rdataset->attributes &= ~DNS_RDATASETATTR_CHASE;
5123				(void)dns_rdataset_additionaldata(rdataset,
5124								  check_related,
5125								  fctx);
5126				rescan = ISC_TRUE;
5127			}
5128		}
5129	}
5130	if (rescan)
5131		goto again;
5132}
5133
5134static inline isc_result_t
5135cname_target(dns_rdataset_t *rdataset, dns_name_t *tname) {
5136	isc_result_t result;
5137	dns_rdata_t rdata = DNS_RDATA_INIT;
5138	dns_rdata_cname_t cname;
5139
5140	result = dns_rdataset_first(rdataset);
5141	if (result != ISC_R_SUCCESS)
5142		return (result);
5143	dns_rdataset_current(rdataset, &rdata);
5144	result = dns_rdata_tostruct(&rdata, &cname, NULL);
5145	if (result != ISC_R_SUCCESS)
5146		return (result);
5147	dns_name_init(tname, NULL);
5148	dns_name_clone(&cname.cname, tname);
5149	dns_rdata_freestruct(&cname);
5150
5151	return (ISC_R_SUCCESS);
5152}
5153
5154static inline isc_result_t
5155dname_target(fetchctx_t *fctx, dns_rdataset_t *rdataset, dns_name_t *qname,
5156	     dns_name_t *oname, dns_fixedname_t *fixeddname)
5157{
5158	isc_result_t result;
5159	dns_rdata_t rdata = DNS_RDATA_INIT;
5160	unsigned int nlabels;
5161	int order;
5162	dns_namereln_t namereln;
5163	dns_rdata_dname_t dname;
5164	dns_fixedname_t prefix;
5165
5166	/*
5167	 * Get the target name of the DNAME.
5168	 */
5169	result = dns_rdataset_first(rdataset);
5170	if (result != ISC_R_SUCCESS)
5171		return (result);
5172	dns_rdataset_current(rdataset, &rdata);
5173	result = dns_rdata_tostruct(&rdata, &dname, NULL);
5174	if (result != ISC_R_SUCCESS)
5175		return (result);
5176
5177	/*
5178	 * Get the prefix of qname.
5179	 */
5180	namereln = dns_name_fullcompare(qname, oname, &order, &nlabels);
5181	if (namereln != dns_namereln_subdomain) {
5182		char qbuf[DNS_NAME_FORMATSIZE];
5183		char obuf[DNS_NAME_FORMATSIZE];
5184
5185		dns_rdata_freestruct(&dname);
5186		dns_name_format(qname, qbuf, sizeof(qbuf));
5187		dns_name_format(oname, obuf, sizeof(obuf));
5188		log_formerr(fctx, "unrelated DNAME in answer: "
5189				   "%s is not in %s", qbuf, obuf);
5190		return (DNS_R_FORMERR);
5191	}
5192	dns_fixedname_init(&prefix);
5193	dns_name_split(qname, nlabels, dns_fixedname_name(&prefix), NULL);
5194	dns_fixedname_init(fixeddname);
5195	result = dns_name_concatenate(dns_fixedname_name(&prefix),
5196				      &dname.dname,
5197				      dns_fixedname_name(fixeddname), NULL);
5198	dns_rdata_freestruct(&dname);
5199	return (result);
5200}
5201
5202static isc_boolean_t
5203is_answeraddress_allowed(dns_view_t *view, dns_name_t *name,
5204			 dns_rdataset_t *rdataset)
5205{
5206	isc_result_t result;
5207	dns_rdata_t rdata = DNS_RDATA_INIT;
5208	struct in_addr ina;
5209	struct in6_addr in6a;
5210	isc_netaddr_t netaddr;
5211	char addrbuf[ISC_NETADDR_FORMATSIZE];
5212	char namebuf[DNS_NAME_FORMATSIZE];
5213	char classbuf[64];
5214	char typebuf[64];
5215	int match;
5216
5217	/* By default, we allow any addresses. */
5218	if (view->denyansweracl == NULL)
5219		return (ISC_TRUE);
5220
5221	/*
5222	 * If the owner name matches one in the exclusion list, either exactly
5223	 * or partially, allow it.
5224	 */
5225	if (view->answeracl_exclude != NULL) {
5226		dns_rbtnode_t *node = NULL;
5227
5228		result = dns_rbt_findnode(view->answeracl_exclude, name, NULL,
5229					  &node, NULL, 0, NULL, NULL);
5230
5231		if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH)
5232			return (ISC_TRUE);
5233	}
5234
5235	/*
5236	 * Otherwise, search the filter list for a match for each address
5237	 * record.  If a match is found, the address should be filtered,
5238	 * so should the entire answer.
5239	 */
5240	for (result = dns_rdataset_first(rdataset);
5241	     result == ISC_R_SUCCESS;
5242	     result = dns_rdataset_next(rdataset)) {
5243		dns_rdata_reset(&rdata);
5244		dns_rdataset_current(rdataset, &rdata);
5245		if (rdataset->type == dns_rdatatype_a) {
5246			INSIST(rdata.length == sizeof(ina.s_addr));
5247			memcpy(&ina.s_addr, rdata.data, sizeof(ina.s_addr));
5248			isc_netaddr_fromin(&netaddr, &ina);
5249		} else {
5250			INSIST(rdata.length == sizeof(in6a.s6_addr));
5251			memcpy(in6a.s6_addr, rdata.data, sizeof(in6a.s6_addr));
5252			isc_netaddr_fromin6(&netaddr, &in6a);
5253		}
5254
5255		result = dns_acl_match(&netaddr, NULL, view->denyansweracl,
5256				       &view->aclenv, &match, NULL);
5257
5258		if (result == ISC_R_SUCCESS && match > 0) {
5259			isc_netaddr_format(&netaddr, addrbuf, sizeof(addrbuf));
5260			dns_name_format(name, namebuf, sizeof(namebuf));
5261			dns_rdatatype_format(rdataset->type, typebuf,
5262					     sizeof(typebuf));
5263			dns_rdataclass_format(rdataset->rdclass, classbuf,
5264					      sizeof(classbuf));
5265			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
5266				      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
5267				      "answer address %s denied for %s/%s/%s",
5268				      addrbuf, namebuf, typebuf, classbuf);
5269			return (ISC_FALSE);
5270		}
5271	}
5272
5273	return (ISC_TRUE);
5274}
5275
5276static isc_boolean_t
5277is_answertarget_allowed(dns_view_t *view, dns_name_t *name,
5278			dns_rdatatype_t type, dns_name_t *tname,
5279			dns_name_t *domain)
5280{
5281	isc_result_t result;
5282	dns_rbtnode_t *node = NULL;
5283	char qnamebuf[DNS_NAME_FORMATSIZE];
5284	char tnamebuf[DNS_NAME_FORMATSIZE];
5285	char classbuf[64];
5286	char typebuf[64];
5287
5288	/* By default, we allow any target name. */
5289	if (view->denyanswernames == NULL)
5290		return (ISC_TRUE);
5291
5292	/*
5293	 * If the owner name matches one in the exclusion list, either exactly
5294	 * or partially, allow it.
5295	 */
5296	if (view->answernames_exclude != NULL) {
5297		result = dns_rbt_findnode(view->answernames_exclude, name, NULL,
5298					  &node, NULL, 0, NULL, NULL);
5299		if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH)
5300			return (ISC_TRUE);
5301	}
5302
5303	/*
5304	 * If the target name is a subdomain of the search domain, allow it.
5305	 */
5306	if (dns_name_issubdomain(tname, domain))
5307		return (ISC_TRUE);
5308
5309	/*
5310	 * Otherwise, apply filters.
5311	 */
5312	result = dns_rbt_findnode(view->denyanswernames, tname, NULL, &node,
5313				  NULL, 0, NULL, NULL);
5314	if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
5315		dns_name_format(name, qnamebuf, sizeof(qnamebuf));
5316		dns_name_format(tname, tnamebuf, sizeof(tnamebuf));
5317		dns_rdatatype_format(type, typebuf, sizeof(typebuf));
5318		dns_rdataclass_format(view->rdclass, classbuf,
5319				      sizeof(classbuf));
5320		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
5321			      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
5322			      "%s target %s denied for %s/%s",
5323			      typebuf, tnamebuf, qnamebuf, classbuf);
5324		return (ISC_FALSE);
5325	}
5326
5327	return (ISC_TRUE);
5328}
5329
5330static void
5331trim_ns_ttl(fetchctx_t *fctx, dns_name_t *name, dns_rdataset_t *rdataset) {
5332	char ns_namebuf[DNS_NAME_FORMATSIZE];
5333	char namebuf[DNS_NAME_FORMATSIZE];
5334	char tbuf[DNS_RDATATYPE_FORMATSIZE];
5335
5336	if (fctx->ns_ttl_ok && rdataset->ttl > fctx->ns_ttl) {
5337		dns_name_format(name, ns_namebuf, sizeof(ns_namebuf));
5338		dns_name_format(&fctx->name, namebuf, sizeof(namebuf));
5339		dns_rdatatype_format(fctx->type, tbuf, sizeof(tbuf));
5340
5341		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
5342			      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(10),
5343			      "fctx %p: trimming ttl of %s/NS for %s/%s: "
5344			      "%u -> %u", fctx, ns_namebuf, namebuf, tbuf,
5345			      rdataset->ttl, fctx->ns_ttl);
5346		rdataset->ttl = fctx->ns_ttl;
5347	}
5348}
5349
5350/*
5351 * Handle a no-answer response (NXDOMAIN, NXRRSET, or referral).
5352 * If look_in_options has LOOK_FOR_NS_IN_ANSWER then we look in the answer
5353 * section for the NS RRset if the query type is NS; if it has
5354 * LOOK_FOR_GLUE_IN_ANSWER we look for glue incorrectly returned in the answer
5355 * section for A and AAAA queries.
5356 */
5357#define LOOK_FOR_NS_IN_ANSWER 0x1
5358#define LOOK_FOR_GLUE_IN_ANSWER 0x2
5359
5360static isc_result_t
5361noanswer_response(fetchctx_t *fctx, dns_name_t *oqname,
5362		  unsigned int look_in_options)
5363{
5364	isc_result_t result;
5365	dns_message_t *message;
5366	dns_name_t *name, *qname, *ns_name, *soa_name, *ds_name;
5367	dns_rdataset_t *rdataset, *ns_rdataset;
5368	isc_boolean_t aa, negative_response;
5369	dns_rdatatype_t type;
5370	dns_section_t section;
5371
5372	FCTXTRACE("noanswer_response");
5373
5374	if ((look_in_options & LOOK_FOR_NS_IN_ANSWER) != 0) {
5375		INSIST(fctx->type == dns_rdatatype_ns);
5376		section = DNS_SECTION_ANSWER;
5377	} else
5378		section = DNS_SECTION_AUTHORITY;
5379
5380	message = fctx->rmessage;
5381
5382	/*
5383	 * Setup qname.
5384	 */
5385	if (oqname == NULL) {
5386		/*
5387		 * We have a normal, non-chained negative response or
5388		 * referral.
5389		 */
5390		if ((message->flags & DNS_MESSAGEFLAG_AA) != 0)
5391			aa = ISC_TRUE;
5392		else
5393			aa = ISC_FALSE;
5394		qname = &fctx->name;
5395	} else {
5396		/*
5397		 * We're being invoked by answer_response() after it has
5398		 * followed a CNAME/DNAME chain.
5399		 */
5400		qname = oqname;
5401		aa = ISC_FALSE;
5402		/*
5403		 * If the current qname is not a subdomain of the query
5404		 * domain, there's no point in looking at the authority
5405		 * section without doing DNSSEC validation.
5406		 *
5407		 * Until we do that validation, we'll just return success
5408		 * in this case.
5409		 */
5410		if (!dns_name_issubdomain(qname, &fctx->domain))
5411			return (ISC_R_SUCCESS);
5412	}
5413
5414	/*
5415	 * We have to figure out if this is a negative response, or a
5416	 * referral.
5417	 */
5418
5419	/*
5420	 * Sometimes we can tell if its a negative response by looking at
5421	 * the message header.
5422	 */
5423	negative_response = ISC_FALSE;
5424	if (message->rcode == dns_rcode_nxdomain ||
5425	    (message->counts[DNS_SECTION_ANSWER] == 0 &&
5426	     message->counts[DNS_SECTION_AUTHORITY] == 0))
5427		negative_response = ISC_TRUE;
5428
5429	/*
5430	 * Process the authority section.
5431	 */
5432	ns_name = NULL;
5433	ns_rdataset = NULL;
5434	soa_name = NULL;
5435	ds_name = NULL;
5436	result = dns_message_firstname(message, section);
5437	while (result == ISC_R_SUCCESS) {
5438		name = NULL;
5439		dns_message_currentname(message, section, &name);
5440		if (dns_name_issubdomain(name, &fctx->domain)) {
5441			/*
5442			 * Look for NS/SOA RRsets first.
5443			 */
5444			for (rdataset = ISC_LIST_HEAD(name->list);
5445			     rdataset != NULL;
5446			     rdataset = ISC_LIST_NEXT(rdataset, link)) {
5447				type = rdataset->type;
5448				if (type == dns_rdatatype_rrsig)
5449					type = rdataset->covers;
5450				if (((type == dns_rdatatype_ns ||
5451				      type == dns_rdatatype_soa) &&
5452				     !dns_name_issubdomain(qname, name))) {
5453					char qbuf[DNS_NAME_FORMATSIZE];
5454					char nbuf[DNS_NAME_FORMATSIZE];
5455					char tbuf[DNS_RDATATYPE_FORMATSIZE];
5456					dns_rdatatype_format(fctx->type, tbuf,
5457							     sizeof(tbuf));
5458					dns_name_format(name, nbuf,
5459							     sizeof(nbuf));
5460					dns_name_format(qname, qbuf,
5461							     sizeof(qbuf));
5462					log_formerr(fctx,
5463						    "unrelated %s %s in "
5464						    "%s authority section",
5465						    tbuf, qbuf, nbuf);
5466					return (DNS_R_FORMERR);
5467				}
5468				if (type == dns_rdatatype_ns) {
5469					/*
5470					 * NS or RRSIG NS.
5471					 *
5472					 * Only one set of NS RRs is allowed.
5473					 */
5474					if (rdataset->type ==
5475					    dns_rdatatype_ns) {
5476						if (ns_name != NULL &&
5477						    name != ns_name) {
5478							log_formerr(fctx,
5479								"multiple NS "
5480								"RRsets in "
5481								"authority "
5482								"section");
5483							return (DNS_R_FORMERR);
5484						}
5485						ns_name = name;
5486						ns_rdataset = rdataset;
5487					}
5488					name->attributes |=
5489						DNS_NAMEATTR_CACHE;
5490					rdataset->attributes |=
5491						DNS_RDATASETATTR_CACHE;
5492					rdataset->trust = dns_trust_glue;
5493				}
5494				if (type == dns_rdatatype_soa) {
5495					/*
5496					 * SOA, or RRSIG SOA.
5497					 *
5498					 * Only one SOA is allowed.
5499					 */
5500					if (rdataset->type ==
5501					    dns_rdatatype_soa) {
5502						if (soa_name != NULL &&
5503						    name != soa_name) {
5504							log_formerr(fctx,
5505								"multiple SOA "
5506								"RRs in "
5507								"authority "
5508								"section");
5509							return (DNS_R_FORMERR);
5510						}
5511						soa_name = name;
5512					}
5513					name->attributes |=
5514						DNS_NAMEATTR_NCACHE;
5515					rdataset->attributes |=
5516						DNS_RDATASETATTR_NCACHE;
5517					if (aa)
5518						rdataset->trust =
5519						    dns_trust_authauthority;
5520					else if (ISFORWARDER(fctx->addrinfo))
5521						rdataset->trust =
5522							dns_trust_answer;
5523					else
5524						rdataset->trust =
5525							dns_trust_additional;
5526				}
5527			}
5528		}
5529		result = dns_message_nextname(message, section);
5530		if (result == ISC_R_NOMORE)
5531			break;
5532		else if (result != ISC_R_SUCCESS)
5533			return (result);
5534	}
5535
5536	log_ns_ttl(fctx, "noanswer_response");
5537
5538	if (ns_rdataset != NULL && dns_name_equal(&fctx->domain, ns_name) &&
5539	    !dns_name_equal(ns_name, dns_rootname))
5540		trim_ns_ttl(fctx, ns_name, ns_rdataset);
5541
5542	/*
5543	 * A negative response has a SOA record (Type 2)
5544	 * and a optional NS RRset (Type 1) or it has neither
5545	 * a SOA or a NS RRset (Type 3, handled above) or
5546	 * rcode is NXDOMAIN (handled above) in which case
5547	 * the NS RRset is allowed (Type 4).
5548	 */
5549	if (soa_name != NULL)
5550		negative_response = ISC_TRUE;
5551
5552	result = dns_message_firstname(message, section);
5553	while (result == ISC_R_SUCCESS) {
5554		name = NULL;
5555		dns_message_currentname(message, section, &name);
5556		if (dns_name_issubdomain(name, &fctx->domain)) {
5557			for (rdataset = ISC_LIST_HEAD(name->list);
5558			     rdataset != NULL;
5559			     rdataset = ISC_LIST_NEXT(rdataset, link)) {
5560				type = rdataset->type;
5561				if (type == dns_rdatatype_rrsig)
5562					type = rdataset->covers;
5563				if (type == dns_rdatatype_nsec ||
5564				    type == dns_rdatatype_nsec3) {
5565					/*
5566					 * NSEC or RRSIG NSEC.
5567					 */
5568					if (negative_response) {
5569						name->attributes |=
5570							DNS_NAMEATTR_NCACHE;
5571						rdataset->attributes |=
5572							DNS_RDATASETATTR_NCACHE;
5573					} else if (type == dns_rdatatype_nsec) {
5574						name->attributes |=
5575							DNS_NAMEATTR_CACHE;
5576						rdataset->attributes |=
5577							DNS_RDATASETATTR_CACHE;
5578					}
5579					if (aa)
5580						rdataset->trust =
5581						    dns_trust_authauthority;
5582					else if (ISFORWARDER(fctx->addrinfo))
5583						rdataset->trust =
5584							dns_trust_answer;
5585					else
5586						rdataset->trust =
5587							dns_trust_additional;
5588					/*
5589					 * No additional data needs to be
5590					 * marked.
5591					 */
5592				} else if (type == dns_rdatatype_ds) {
5593					/*
5594					 * DS or SIG DS.
5595					 *
5596					 * These should only be here if
5597					 * this is a referral, and there
5598					 * should only be one DS RRset.
5599					 */
5600					if (ns_name == NULL) {
5601						log_formerr(fctx,
5602							    "DS with no "
5603							    "referral");
5604						return (DNS_R_FORMERR);
5605					}
5606					if (rdataset->type ==
5607					    dns_rdatatype_ds) {
5608						if (ds_name != NULL &&
5609						    name != ds_name) {
5610							log_formerr(fctx,
5611								"DS doesn't "
5612								"match "
5613								"referral "
5614								"(NS)");
5615							return (DNS_R_FORMERR);
5616						}
5617						ds_name = name;
5618					}
5619					name->attributes |=
5620						DNS_NAMEATTR_CACHE;
5621					rdataset->attributes |=
5622						DNS_RDATASETATTR_CACHE;
5623					if (aa)
5624						rdataset->trust =
5625						    dns_trust_authauthority;
5626					else if (ISFORWARDER(fctx->addrinfo))
5627						rdataset->trust =
5628							dns_trust_answer;
5629					else
5630						rdataset->trust =
5631							dns_trust_additional;
5632				}
5633			}
5634		}
5635		result = dns_message_nextname(message, section);
5636		if (result == ISC_R_NOMORE)
5637			break;
5638		else if (result != ISC_R_SUCCESS)
5639			return (result);
5640	}
5641
5642	/*
5643	 * Trigger lookups for DNS nameservers.
5644	 */
5645	if (negative_response && message->rcode == dns_rcode_noerror &&
5646	    fctx->type == dns_rdatatype_ds && soa_name != NULL &&
5647	    dns_name_equal(soa_name, qname) &&
5648	    !dns_name_equal(qname, dns_rootname))
5649		return (DNS_R_CHASEDSSERVERS);
5650
5651	/*
5652	 * Did we find anything?
5653	 */
5654	if (!negative_response && ns_name == NULL) {
5655		/*
5656		 * Nope.
5657		 */
5658		if (oqname != NULL) {
5659			/*
5660			 * We've already got a partial CNAME/DNAME chain,
5661			 * and haven't found else anything useful here, but
5662			 * no error has occurred since we have an answer.
5663			 */
5664			return (ISC_R_SUCCESS);
5665		} else {
5666			/*
5667			 * The responder is insane.
5668			 */
5669			log_formerr(fctx, "invalid response");
5670			return (DNS_R_FORMERR);
5671		}
5672	}
5673
5674	/*
5675	 * If we found both NS and SOA, they should be the same name.
5676	 */
5677	if (ns_name != NULL && soa_name != NULL && ns_name != soa_name) {
5678		log_formerr(fctx, "NS/SOA mismatch");
5679		return (DNS_R_FORMERR);
5680	}
5681
5682	/*
5683	 * Do we have a referral?  (We only want to follow a referral if
5684	 * we're not following a chain.)
5685	 */
5686	if (!negative_response && ns_name != NULL && oqname == NULL) {
5687		/*
5688		 * We already know ns_name is a subdomain of fctx->domain.
5689		 * If ns_name is equal to fctx->domain, we're not making
5690		 * progress.  We return DNS_R_FORMERR so that we'll keep
5691		 * trying other servers.
5692		 */
5693		if (dns_name_equal(ns_name, &fctx->domain)) {
5694			log_formerr(fctx, "non-improving referral");
5695			return (DNS_R_FORMERR);
5696		}
5697
5698		/*
5699		 * If the referral name is not a parent of the query
5700		 * name, consider the responder insane.
5701		 */
5702		if (! dns_name_issubdomain(&fctx->name, ns_name)) {
5703			/* Logged twice */
5704			log_formerr(fctx, "referral to non-parent");
5705			FCTXTRACE("referral to non-parent");
5706			return (DNS_R_FORMERR);
5707		}
5708
5709		/*
5710		 * Mark any additional data related to this rdataset.
5711		 * It's important that we do this before we change the
5712		 * query domain.
5713		 */
5714		INSIST(ns_rdataset != NULL);
5715		fctx->attributes |= FCTX_ATTR_GLUING;
5716		(void)dns_rdataset_additionaldata(ns_rdataset, check_related,
5717						  fctx);
5718#if CHECK_FOR_GLUE_IN_ANSWER
5719		/*
5720		 * Look in the answer section for "glue" that is incorrectly
5721		 * returned as a answer.  This is needed if the server also
5722		 * minimizes the response size by not adding records to the
5723		 * additional section that are in the answer section or if
5724		 * the record gets dropped due to message size constraints.
5725		 */
5726		if ((look_in_options & LOOK_FOR_GLUE_IN_ANSWER) != 0 &&
5727		    (fctx->type == dns_rdatatype_aaaa ||
5728		     fctx->type == dns_rdatatype_a))
5729			(void)dns_rdataset_additionaldata(ns_rdataset,
5730							  check_answer, fctx);
5731#endif
5732		fctx->attributes &= ~FCTX_ATTR_GLUING;
5733		/*
5734		 * NS rdatasets with 0 TTL cause problems.
5735		 * dns_view_findzonecut() will not find them when we
5736		 * try to follow the referral, and we'll SERVFAIL
5737		 * because the best nameservers are now above QDOMAIN.
5738		 * We force the TTL to 1 second to prevent this.
5739		 */
5740		if (ns_rdataset->ttl == 0)
5741			ns_rdataset->ttl = 1;
5742		/*
5743		 * Set the current query domain to the referral name.
5744		 *
5745		 * XXXRTH  We should check if we're in forward-only mode, and
5746		 *		if so we should bail out.
5747		 */
5748		INSIST(dns_name_countlabels(&fctx->domain) > 0);
5749		dns_name_free(&fctx->domain, fctx->mctx);
5750		if (dns_rdataset_isassociated(&fctx->nameservers))
5751			dns_rdataset_disassociate(&fctx->nameservers);
5752		dns_name_init(&fctx->domain, NULL);
5753		result = dns_name_dup(ns_name, fctx->mctx, &fctx->domain);
5754		if (result != ISC_R_SUCCESS)
5755			return (result);
5756		fctx->attributes |= FCTX_ATTR_WANTCACHE;
5757		fctx->ns_ttl_ok = ISC_FALSE;
5758		log_ns_ttl(fctx, "DELEGATION");
5759		return (DNS_R_DELEGATION);
5760	}
5761
5762	/*
5763	 * Since we're not doing a referral, we don't want to cache any
5764	 * NS RRs we may have found.
5765	 */
5766	if (ns_name != NULL)
5767		ns_name->attributes &= ~DNS_NAMEATTR_CACHE;
5768
5769	if (negative_response && oqname == NULL)
5770		fctx->attributes |= FCTX_ATTR_WANTNCACHE;
5771
5772	return (ISC_R_SUCCESS);
5773}
5774
5775static isc_result_t
5776answer_response(fetchctx_t *fctx) {
5777	isc_result_t result;
5778	dns_message_t *message;
5779	dns_name_t *name, *qname, tname, *ns_name;
5780	dns_rdataset_t *rdataset, *ns_rdataset;
5781	isc_boolean_t done, external, chaining, aa, found, want_chaining;
5782	isc_boolean_t have_answer, found_cname, found_type, wanted_chaining;
5783	unsigned int aflag;
5784	dns_rdatatype_t type;
5785	dns_fixedname_t dname, fqname;
5786	dns_view_t *view;
5787
5788	FCTXTRACE("answer_response");
5789
5790	message = fctx->rmessage;
5791
5792	/*
5793	 * Examine the answer section, marking those rdatasets which are
5794	 * part of the answer and should be cached.
5795	 */
5796
5797	done = ISC_FALSE;
5798	found_cname = ISC_FALSE;
5799	found_type = ISC_FALSE;
5800	chaining = ISC_FALSE;
5801	have_answer = ISC_FALSE;
5802	want_chaining = ISC_FALSE;
5803	POST(want_chaining);
5804	if ((message->flags & DNS_MESSAGEFLAG_AA) != 0)
5805		aa = ISC_TRUE;
5806	else
5807		aa = ISC_FALSE;
5808	qname = &fctx->name;
5809	type = fctx->type;
5810	view = fctx->res->view;
5811	result = dns_message_firstname(message, DNS_SECTION_ANSWER);
5812	while (!done && result == ISC_R_SUCCESS) {
5813		name = NULL;
5814		dns_message_currentname(message, DNS_SECTION_ANSWER, &name);
5815		external = ISC_TF(!dns_name_issubdomain(name, &fctx->domain));
5816		if (dns_name_equal(name, qname)) {
5817			wanted_chaining = ISC_FALSE;
5818			for (rdataset = ISC_LIST_HEAD(name->list);
5819			     rdataset != NULL;
5820			     rdataset = ISC_LIST_NEXT(rdataset, link)) {
5821				found = ISC_FALSE;
5822				want_chaining = ISC_FALSE;
5823				aflag = 0;
5824				if (rdataset->type == dns_rdatatype_nsec3) {
5825					/*
5826					 * NSEC3 records are not allowed to
5827					 * appear in the answer section.
5828					 */
5829					log_formerr(fctx, "NSEC3 in answer");
5830					return (DNS_R_FORMERR);
5831				}
5832
5833				/*
5834				 * Apply filters, if given, on answers to reject
5835				 * a malicious attempt of rebinding.
5836				 */
5837				if ((rdataset->type == dns_rdatatype_a ||
5838				     rdataset->type == dns_rdatatype_aaaa) &&
5839				    !is_answeraddress_allowed(view, name,
5840							      rdataset)) {
5841					return (DNS_R_SERVFAIL);
5842				}
5843
5844				if (rdataset->type == type && !found_cname) {
5845					/*
5846					 * We've found an ordinary answer.
5847					 */
5848					found = ISC_TRUE;
5849					found_type = ISC_TRUE;
5850					done = ISC_TRUE;
5851					aflag = DNS_RDATASETATTR_ANSWER;
5852				} else if (type == dns_rdatatype_any) {
5853					/*
5854					 * We've found an answer matching
5855					 * an ANY query.  There may be
5856					 * more.
5857					 */
5858					found = ISC_TRUE;
5859					aflag = DNS_RDATASETATTR_ANSWER;
5860				} else if (rdataset->type == dns_rdatatype_rrsig
5861					   && rdataset->covers == type
5862					   && !found_cname) {
5863					/*
5864					 * We've found a signature that
5865					 * covers the type we're looking for.
5866					 */
5867					found = ISC_TRUE;
5868					found_type = ISC_TRUE;
5869					aflag = DNS_RDATASETATTR_ANSWERSIG;
5870				} else if (rdataset->type ==
5871					   dns_rdatatype_cname
5872					   && !found_type) {
5873					/*
5874					 * We're looking for something else,
5875					 * but we found a CNAME.
5876					 *
5877					 * Getting a CNAME response for some
5878					 * query types is an error.
5879					 */
5880					if (type == dns_rdatatype_rrsig ||
5881					    type == dns_rdatatype_dnskey ||
5882					    type == dns_rdatatype_nsec ||
5883					    type == dns_rdatatype_nsec3) {
5884						char buf[DNS_RDATATYPE_FORMATSIZE];
5885						dns_rdatatype_format(fctx->type,
5886							      buf, sizeof(buf));
5887						log_formerr(fctx,
5888							    "CNAME response "
5889							    "for %s RR", buf);
5890						return (DNS_R_FORMERR);
5891					}
5892					found = ISC_TRUE;
5893					found_cname = ISC_TRUE;
5894					want_chaining = ISC_TRUE;
5895					aflag = DNS_RDATASETATTR_ANSWER;
5896					result = cname_target(rdataset,
5897							      &tname);
5898					if (result != ISC_R_SUCCESS)
5899						return (result);
5900					/* Apply filters on the target name. */
5901					if (!is_answertarget_allowed(view,
5902							name,
5903							rdataset->type,
5904							&tname,
5905							&fctx->domain)) {
5906						return (DNS_R_SERVFAIL);
5907					}
5908				} else if (rdataset->type == dns_rdatatype_rrsig
5909					   && rdataset->covers ==
5910					   dns_rdatatype_cname
5911					   && !found_type) {
5912					/*
5913					 * We're looking for something else,
5914					 * but we found a SIG CNAME.
5915					 */
5916					found = ISC_TRUE;
5917					found_cname = ISC_TRUE;
5918					aflag = DNS_RDATASETATTR_ANSWERSIG;
5919				}
5920
5921				if (found) {
5922					/*
5923					 * We've found an answer to our
5924					 * question.
5925					 */
5926					name->attributes |=
5927						DNS_NAMEATTR_CACHE;
5928					rdataset->attributes |=
5929						DNS_RDATASETATTR_CACHE;
5930					rdataset->trust = dns_trust_answer;
5931					if (!chaining) {
5932						/*
5933						 * This data is "the" answer
5934						 * to our question only if
5935						 * we're not chaining (i.e.
5936						 * if we haven't followed
5937						 * a CNAME or DNAME).
5938						 */
5939						INSIST(!external);
5940						if (aflag ==
5941						    DNS_RDATASETATTR_ANSWER)
5942							have_answer = ISC_TRUE;
5943						name->attributes |=
5944							DNS_NAMEATTR_ANSWER;
5945						rdataset->attributes |= aflag;
5946						if (aa)
5947							rdataset->trust =
5948							  dns_trust_authanswer;
5949					} else if (external) {
5950						/*
5951						 * This data is outside of
5952						 * our query domain, and
5953						 * may not be cached.
5954						 */
5955						rdataset->attributes |=
5956						    DNS_RDATASETATTR_EXTERNAL;
5957					}
5958
5959					/*
5960					 * Mark any additional data related
5961					 * to this rdataset.
5962					 */
5963					(void)dns_rdataset_additionaldata(
5964							rdataset,
5965							check_related,
5966							fctx);
5967
5968					/*
5969					 * CNAME chaining.
5970					 */
5971					if (want_chaining) {
5972						wanted_chaining = ISC_TRUE;
5973						name->attributes |=
5974							DNS_NAMEATTR_CHAINING;
5975						rdataset->attributes |=
5976						    DNS_RDATASETATTR_CHAINING;
5977						qname = &tname;
5978					}
5979				}
5980				/*
5981				 * We could add an "else" clause here and
5982				 * log that we're ignoring this rdataset.
5983				 */
5984			}
5985			/*
5986			 * If wanted_chaining is true, we've done
5987			 * some chaining as the result of processing
5988			 * this node, and thus we need to set
5989			 * chaining to true.
5990			 *
5991			 * We don't set chaining inside of the
5992			 * rdataset loop because doing that would
5993			 * cause us to ignore the signatures of
5994			 * CNAMEs.
5995			 */
5996			if (wanted_chaining)
5997				chaining = ISC_TRUE;
5998		} else {
5999			/*
6000			 * Look for a DNAME (or its SIG).  Anything else is
6001			 * ignored.
6002			 */
6003			wanted_chaining = ISC_FALSE;
6004			for (rdataset = ISC_LIST_HEAD(name->list);
6005			     rdataset != NULL;
6006			     rdataset = ISC_LIST_NEXT(rdataset, link)) {
6007				isc_boolean_t found_dname = ISC_FALSE;
6008				dns_name_t *dname_name;
6009
6010				found = ISC_FALSE;
6011				aflag = 0;
6012				if (rdataset->type == dns_rdatatype_dname) {
6013					/*
6014					 * We're looking for something else,
6015					 * but we found a DNAME.
6016					 *
6017					 * If we're not chaining, then the
6018					 * DNAME should not be external.
6019					 */
6020					if (!chaining && external) {
6021						log_formerr(fctx,
6022							    "external DNAME");
6023						return (DNS_R_FORMERR);
6024					}
6025					found = ISC_TRUE;
6026					want_chaining = ISC_TRUE;
6027					POST(want_chaining);
6028					aflag = DNS_RDATASETATTR_ANSWER;
6029					result = dname_target(fctx, rdataset,
6030							      qname, name,
6031							      &dname);
6032					if (result == ISC_R_NOSPACE) {
6033						/*
6034						 * We can't construct the
6035						 * DNAME target.  Do not
6036						 * try to continue.
6037						 */
6038						want_chaining = ISC_FALSE;
6039						POST(want_chaining);
6040					} else if (result != ISC_R_SUCCESS)
6041						return (result);
6042					else
6043						found_dname = ISC_TRUE;
6044
6045					dname_name = dns_fixedname_name(&dname);
6046					if (!is_answertarget_allowed(view,
6047							qname,
6048							rdataset->type,
6049							dname_name,
6050							&fctx->domain)) {
6051						return (DNS_R_SERVFAIL);
6052					}
6053				} else if (rdataset->type == dns_rdatatype_rrsig
6054					   && rdataset->covers ==
6055					   dns_rdatatype_dname) {
6056					/*
6057					 * We've found a signature that
6058					 * covers the DNAME.
6059					 */
6060					found = ISC_TRUE;
6061					aflag = DNS_RDATASETATTR_ANSWERSIG;
6062				}
6063
6064				if (found) {
6065					/*
6066					 * We've found an answer to our
6067					 * question.
6068					 */
6069					name->attributes |=
6070						DNS_NAMEATTR_CACHE;
6071					rdataset->attributes |=
6072						DNS_RDATASETATTR_CACHE;
6073					rdataset->trust = dns_trust_answer;
6074					if (!chaining) {
6075						/*
6076						 * This data is "the" answer
6077						 * to our question only if
6078						 * we're not chaining.
6079						 */
6080						INSIST(!external);
6081						if (aflag ==
6082						    DNS_RDATASETATTR_ANSWER)
6083							have_answer = ISC_TRUE;
6084						name->attributes |=
6085							DNS_NAMEATTR_ANSWER;
6086						rdataset->attributes |= aflag;
6087						if (aa)
6088							rdataset->trust =
6089							  dns_trust_authanswer;
6090					} else if (external) {
6091						rdataset->attributes |=
6092						    DNS_RDATASETATTR_EXTERNAL;
6093					}
6094
6095					/*
6096					 * DNAME chaining.
6097					 */
6098					if (found_dname) {
6099						/*
6100						 * Copy the dname into the
6101						 * qname fixed name.
6102						 *
6103						 * Although we check for
6104						 * failure of the copy
6105						 * operation, in practice it
6106						 * should never fail since
6107						 * we already know that the
6108						 * result fits in a fixedname.
6109						 */
6110						dns_fixedname_init(&fqname);
6111						result = dns_name_copy(
6112						  dns_fixedname_name(&dname),
6113						  dns_fixedname_name(&fqname),
6114						  NULL);
6115						if (result != ISC_R_SUCCESS)
6116							return (result);
6117						wanted_chaining = ISC_TRUE;
6118						name->attributes |=
6119							DNS_NAMEATTR_CHAINING;
6120						rdataset->attributes |=
6121						    DNS_RDATASETATTR_CHAINING;
6122						qname = dns_fixedname_name(
6123								   &fqname);
6124					}
6125				}
6126			}
6127			if (wanted_chaining)
6128				chaining = ISC_TRUE;
6129		}
6130		result = dns_message_nextname(message, DNS_SECTION_ANSWER);
6131	}
6132	if (result == ISC_R_NOMORE)
6133		result = ISC_R_SUCCESS;
6134	if (result != ISC_R_SUCCESS)
6135		return (result);
6136
6137	/*
6138	 * We should have found an answer.
6139	 */
6140	if (!have_answer) {
6141		log_formerr(fctx, "reply has no answer");
6142		return (DNS_R_FORMERR);
6143	}
6144
6145	/*
6146	 * This response is now potentially cacheable.
6147	 */
6148	fctx->attributes |= FCTX_ATTR_WANTCACHE;
6149
6150	/*
6151	 * Did chaining end before we got the final answer?
6152	 */
6153	if (chaining) {
6154		/*
6155		 * Yes.  This may be a negative reply, so hand off
6156		 * authority section processing to the noanswer code.
6157		 * If it isn't a noanswer response, no harm will be
6158		 * done.
6159		 */
6160		return (noanswer_response(fctx, qname, 0));
6161	}
6162
6163	/*
6164	 * We didn't end with an incomplete chain, so the rcode should be
6165	 * "no error".
6166	 */
6167	if (message->rcode != dns_rcode_noerror) {
6168		log_formerr(fctx, "CNAME/DNAME chain complete, but RCODE "
6169				  "indicates error");
6170		return (DNS_R_FORMERR);
6171	}
6172
6173	/*
6174	 * Examine the authority section (if there is one).
6175	 *
6176	 * We expect there to be only one owner name for all the rdatasets
6177	 * in this section, and we expect that it is not external.
6178	 */
6179	done = ISC_FALSE;
6180	ns_name = NULL;
6181	ns_rdataset = NULL;
6182	result = dns_message_firstname(message, DNS_SECTION_AUTHORITY);
6183	while (!done && result == ISC_R_SUCCESS) {
6184		name = NULL;
6185		dns_message_currentname(message, DNS_SECTION_AUTHORITY, &name);
6186		external = ISC_TF(!dns_name_issubdomain(name, &fctx->domain));
6187		if (!external) {
6188			/*
6189			 * We expect to find NS or SIG NS rdatasets, and
6190			 * nothing else.
6191			 */
6192			for (rdataset = ISC_LIST_HEAD(name->list);
6193			     rdataset != NULL;
6194			     rdataset = ISC_LIST_NEXT(rdataset, link)) {
6195				if (rdataset->type == dns_rdatatype_ns ||
6196				    (rdataset->type == dns_rdatatype_rrsig &&
6197				     rdataset->covers == dns_rdatatype_ns)) {
6198					name->attributes |=
6199						DNS_NAMEATTR_CACHE;
6200					rdataset->attributes |=
6201						DNS_RDATASETATTR_CACHE;
6202					if (aa && !chaining)
6203						rdataset->trust =
6204						    dns_trust_authauthority;
6205					else
6206						rdataset->trust =
6207						    dns_trust_additional;
6208
6209					if (rdataset->type == dns_rdatatype_ns) {
6210						ns_name = name;
6211						ns_rdataset = rdataset;
6212					}
6213					/*
6214					 * Mark any additional data related
6215					 * to this rdataset.
6216					 */
6217					(void)dns_rdataset_additionaldata(
6218							rdataset,
6219							check_related,
6220							fctx);
6221					done = ISC_TRUE;
6222				}
6223			}
6224		}
6225		result = dns_message_nextname(message, DNS_SECTION_AUTHORITY);
6226	}
6227	if (result == ISC_R_NOMORE)
6228		result = ISC_R_SUCCESS;
6229
6230	log_ns_ttl(fctx, "answer_response");
6231
6232	if (ns_rdataset != NULL && dns_name_equal(&fctx->domain, ns_name) &&
6233	    !dns_name_equal(ns_name, dns_rootname))
6234		trim_ns_ttl(fctx, ns_name, ns_rdataset);
6235
6236	return (result);
6237}
6238
6239static isc_boolean_t
6240fctx_decreference(fetchctx_t *fctx) {
6241	isc_boolean_t bucket_empty = ISC_FALSE;
6242
6243	INSIST(fctx->references > 0);
6244	fctx->references--;
6245	if (fctx->references == 0) {
6246		/*
6247		 * No one cares about the result of this fetch anymore.
6248		 */
6249		if (fctx->pending == 0 && fctx->nqueries == 0 &&
6250		    ISC_LIST_EMPTY(fctx->validators) && SHUTTINGDOWN(fctx)) {
6251			/*
6252			 * This fctx is already shutdown; we were just
6253			 * waiting for the last reference to go away.
6254			 */
6255			bucket_empty = fctx_unlink(fctx);
6256			fctx_destroy(fctx);
6257		} else {
6258			/*
6259			 * Initiate shutdown.
6260			 */
6261			fctx_shutdown(fctx);
6262		}
6263	}
6264	return (bucket_empty);
6265}
6266
6267static void
6268resume_dslookup(isc_task_t *task, isc_event_t *event) {
6269	dns_fetchevent_t *fevent;
6270	dns_resolver_t *res;
6271	fetchctx_t *fctx;
6272	isc_result_t result;
6273	isc_boolean_t bucket_empty;
6274	isc_boolean_t locked = ISC_FALSE;
6275	unsigned int bucketnum;
6276	dns_rdataset_t nameservers;
6277	dns_fixedname_t fixed;
6278	dns_name_t *domain;
6279
6280	REQUIRE(event->ev_type == DNS_EVENT_FETCHDONE);
6281	fevent = (dns_fetchevent_t *)event;
6282	fctx = event->ev_arg;
6283	REQUIRE(VALID_FCTX(fctx));
6284	res = fctx->res;
6285
6286	UNUSED(task);
6287	FCTXTRACE("resume_dslookup");
6288
6289	if (fevent->node != NULL)
6290		dns_db_detachnode(fevent->db, &fevent->node);
6291	if (fevent->db != NULL)
6292		dns_db_detach(&fevent->db);
6293
6294	dns_rdataset_init(&nameservers);
6295
6296	bucketnum = fctx->bucketnum;
6297	if (fevent->result == ISC_R_CANCELED) {
6298		dns_resolver_destroyfetch(&fctx->nsfetch);
6299		fctx_done(fctx, ISC_R_CANCELED, __LINE__);
6300	} else if (fevent->result == ISC_R_SUCCESS) {
6301
6302		FCTXTRACE("resuming DS lookup");
6303
6304		dns_resolver_destroyfetch(&fctx->nsfetch);
6305		if (dns_rdataset_isassociated(&fctx->nameservers))
6306			dns_rdataset_disassociate(&fctx->nameservers);
6307		dns_rdataset_clone(fevent->rdataset, &fctx->nameservers);
6308		fctx->ns_ttl = fctx->nameservers.ttl;
6309		fctx->ns_ttl_ok = ISC_TRUE;
6310		log_ns_ttl(fctx, "resume_dslookup");
6311		dns_name_free(&fctx->domain, fctx->mctx);
6312		dns_name_init(&fctx->domain, NULL);
6313		result = dns_name_dup(&fctx->nsname, fctx->mctx, &fctx->domain);
6314		if (result != ISC_R_SUCCESS) {
6315			fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
6316			goto cleanup;
6317		}
6318		/*
6319		 * Try again.
6320		 */
6321		fctx_try(fctx, ISC_TRUE, ISC_FALSE);
6322	} else {
6323		unsigned int n;
6324		dns_rdataset_t *nsrdataset = NULL;
6325
6326		/*
6327		 * Retrieve state from fctx->nsfetch before we destroy it.
6328		 */
6329		dns_fixedname_init(&fixed);
6330		domain = dns_fixedname_name(&fixed);
6331		dns_name_copy(&fctx->nsfetch->private->domain, domain, NULL);
6332		if (dns_name_equal(&fctx->nsname, domain)) {
6333			fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
6334			dns_resolver_destroyfetch(&fctx->nsfetch);
6335			goto cleanup;
6336		}
6337		if (dns_rdataset_isassociated(
6338		    &fctx->nsfetch->private->nameservers)) {
6339			dns_rdataset_clone(
6340			    &fctx->nsfetch->private->nameservers,
6341			    &nameservers);
6342			nsrdataset = &nameservers;
6343		} else
6344			domain = NULL;
6345		dns_resolver_destroyfetch(&fctx->nsfetch);
6346		n = dns_name_countlabels(&fctx->nsname);
6347		dns_name_getlabelsequence(&fctx->nsname, 1, n - 1,
6348					  &fctx->nsname);
6349
6350		if (dns_rdataset_isassociated(fevent->rdataset))
6351			dns_rdataset_disassociate(fevent->rdataset);
6352		FCTXTRACE("continuing to look for parent's NS records");
6353		result = dns_resolver_createfetch(fctx->res, &fctx->nsname,
6354						  dns_rdatatype_ns, domain,
6355						  nsrdataset, NULL, 0, task,
6356						  resume_dslookup, fctx,
6357						  &fctx->nsrrset, NULL,
6358						  &fctx->nsfetch);
6359		if (result != ISC_R_SUCCESS)
6360			fctx_done(fctx, result, __LINE__);
6361		else {
6362			LOCK(&res->buckets[bucketnum].lock);
6363			locked = ISC_TRUE;
6364			fctx->references++;
6365		}
6366	}
6367
6368 cleanup:
6369	if (dns_rdataset_isassociated(&nameservers))
6370		dns_rdataset_disassociate(&nameservers);
6371	if (dns_rdataset_isassociated(fevent->rdataset))
6372		dns_rdataset_disassociate(fevent->rdataset);
6373	INSIST(fevent->sigrdataset == NULL);
6374	isc_event_free(&event);
6375	if (!locked)
6376		LOCK(&res->buckets[bucketnum].lock);
6377	bucket_empty = fctx_decreference(fctx);
6378	UNLOCK(&res->buckets[bucketnum].lock);
6379	if (bucket_empty)
6380		empty_bucket(res);
6381}
6382
6383static inline void
6384checknamessection(dns_message_t *message, dns_section_t section) {
6385	isc_result_t result;
6386	dns_name_t *name;
6387	dns_rdata_t rdata = DNS_RDATA_INIT;
6388	dns_rdataset_t *rdataset;
6389
6390	for (result = dns_message_firstname(message, section);
6391	     result == ISC_R_SUCCESS;
6392	     result = dns_message_nextname(message, section))
6393	{
6394		name = NULL;
6395		dns_message_currentname(message, section, &name);
6396		for (rdataset = ISC_LIST_HEAD(name->list);
6397		     rdataset != NULL;
6398		     rdataset = ISC_LIST_NEXT(rdataset, link)) {
6399			for (result = dns_rdataset_first(rdataset);
6400			     result == ISC_R_SUCCESS;
6401			     result = dns_rdataset_next(rdataset)) {
6402				dns_rdataset_current(rdataset, &rdata);
6403				if (!dns_rdata_checkowner(name, rdata.rdclass,
6404							  rdata.type,
6405							  ISC_FALSE) ||
6406				    !dns_rdata_checknames(&rdata, name, NULL))
6407				{
6408					rdataset->attributes |=
6409						DNS_RDATASETATTR_CHECKNAMES;
6410				}
6411				dns_rdata_reset(&rdata);
6412			}
6413		}
6414	}
6415}
6416
6417static void
6418checknames(dns_message_t *message) {
6419
6420	checknamessection(message, DNS_SECTION_ANSWER);
6421	checknamessection(message, DNS_SECTION_AUTHORITY);
6422	checknamessection(message, DNS_SECTION_ADDITIONAL);
6423}
6424
6425/*
6426 * Log server NSID at log level 'level'
6427 */
6428static isc_result_t
6429log_nsid(dns_rdataset_t *opt, resquery_t *query, int level, isc_mem_t *mctx)
6430{
6431	static const char hex[17] = "0123456789abcdef";
6432	char addrbuf[ISC_SOCKADDR_FORMATSIZE];
6433	isc_uint16_t optcode, nsid_len, buflen, i;
6434	isc_result_t result;
6435	isc_buffer_t nsidbuf;
6436	dns_rdata_t rdata;
6437	unsigned char *p, *buf, *nsid;
6438
6439	/* Extract rdata from OPT rdataset */
6440	result = dns_rdataset_first(opt);
6441	if (result != ISC_R_SUCCESS)
6442		return (ISC_R_FAILURE);
6443
6444	dns_rdata_init(&rdata);
6445	dns_rdataset_current(opt, &rdata);
6446	if (rdata.length < 4)
6447		return (ISC_R_FAILURE);
6448
6449	/* Check for NSID */
6450	isc_buffer_init(&nsidbuf, rdata.data, rdata.length);
6451	isc_buffer_add(&nsidbuf, rdata.length);
6452	optcode = isc_buffer_getuint16(&nsidbuf);
6453	nsid_len = isc_buffer_getuint16(&nsidbuf);
6454	if (optcode != DNS_OPT_NSID || nsid_len == 0)
6455		return (ISC_R_FAILURE);
6456
6457	/* Allocate buffer for storing hex version of the NSID */
6458	buflen = nsid_len * 2 + 1;
6459	buf = isc_mem_get(mctx, buflen);
6460	if (buf == NULL)
6461		return (ISC_R_NOSPACE);
6462
6463	/* Convert to hex */
6464	p = buf;
6465	nsid = rdata.data + 4;
6466	for (i = 0; i < nsid_len; i++) {
6467		*p++ = hex[(nsid[0] >> 4) & 0xf];
6468		*p++ = hex[nsid[0] & 0xf];
6469		nsid++;
6470	}
6471	*p = '\0';
6472
6473	isc_sockaddr_format(&query->addrinfo->sockaddr, addrbuf,
6474			    sizeof(addrbuf));
6475	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
6476		      DNS_LOGMODULE_RESOLVER, level,
6477		      "received NSID '%s' from %s", buf, addrbuf);
6478
6479	/* Clean up */
6480	isc_mem_put(mctx, buf, buflen);
6481	return (ISC_R_SUCCESS);
6482}
6483
6484static void
6485log_packet(dns_message_t *message, int level, isc_mem_t *mctx) {
6486	isc_buffer_t buffer;
6487	char *buf = NULL;
6488	int len = 1024;
6489	isc_result_t result;
6490
6491	if (! isc_log_wouldlog(dns_lctx, level))
6492		return;
6493
6494	/*
6495	 * Note that these are multiline debug messages.  We want a newline
6496	 * to appear in the log after each message.
6497	 */
6498
6499	do {
6500		buf = isc_mem_get(mctx, len);
6501		if (buf == NULL)
6502			break;
6503		isc_buffer_init(&buffer, buf, len);
6504		result = dns_message_totext(message, &dns_master_style_debug,
6505					    0, &buffer);
6506		if (result == ISC_R_NOSPACE) {
6507			isc_mem_put(mctx, buf, len);
6508			len += 1024;
6509		} else if (result == ISC_R_SUCCESS)
6510			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
6511				      DNS_LOGMODULE_RESOLVER, level,
6512				      "received packet:\n%.*s",
6513				      (int)isc_buffer_usedlength(&buffer),
6514				      buf);
6515	} while (result == ISC_R_NOSPACE);
6516
6517	if (buf != NULL)
6518		isc_mem_put(mctx, buf, len);
6519}
6520
6521static isc_boolean_t
6522iscname(fetchctx_t *fctx) {
6523	isc_result_t result;
6524
6525	result = dns_message_findname(fctx->rmessage, DNS_SECTION_ANSWER,
6526				      &fctx->name, dns_rdatatype_cname, 0,
6527				      NULL, NULL);
6528	return (result == ISC_R_SUCCESS ? ISC_TRUE : ISC_FALSE);
6529}
6530
6531static isc_boolean_t
6532betterreferral(fetchctx_t *fctx) {
6533	isc_result_t result;
6534	dns_name_t *name;
6535	dns_rdataset_t *rdataset;
6536	dns_message_t *message = fctx->rmessage;
6537
6538	for (result = dns_message_firstname(message, DNS_SECTION_AUTHORITY);
6539	     result == ISC_R_SUCCESS;
6540	     result = dns_message_nextname(message, DNS_SECTION_AUTHORITY)) {
6541		name = NULL;
6542		dns_message_currentname(message, DNS_SECTION_AUTHORITY, &name);
6543		if (!isstrictsubdomain(name, &fctx->domain))
6544			continue;
6545		for (rdataset = ISC_LIST_HEAD(name->list);
6546		     rdataset != NULL;
6547		     rdataset = ISC_LIST_NEXT(rdataset, link))
6548			if (rdataset->type == dns_rdatatype_ns)
6549				return (ISC_TRUE);
6550	}
6551	return (ISC_FALSE);
6552}
6553
6554static void
6555resquery_response(isc_task_t *task, isc_event_t *event) {
6556	isc_result_t result = ISC_R_SUCCESS;
6557	resquery_t *query = event->ev_arg;
6558	dns_dispatchevent_t *devent = (dns_dispatchevent_t *)event;
6559	isc_boolean_t keep_trying, get_nameservers, resend;
6560	isc_boolean_t truncated;
6561	dns_message_t *message;
6562	dns_rdataset_t *opt;
6563	fetchctx_t *fctx;
6564	dns_name_t *fname;
6565	dns_fixedname_t foundname;
6566	isc_stdtime_t now;
6567	isc_time_t tnow, *finish;
6568	dns_adbaddrinfo_t *addrinfo;
6569	unsigned int options;
6570	unsigned int findoptions;
6571	isc_result_t broken_server;
6572	badnstype_t broken_type = badns_response;
6573	isc_boolean_t no_response;
6574
6575	REQUIRE(VALID_QUERY(query));
6576	fctx = query->fctx;
6577	options = query->options;
6578	REQUIRE(VALID_FCTX(fctx));
6579	REQUIRE(event->ev_type == DNS_EVENT_DISPATCH);
6580
6581	QTRACE("response");
6582
6583	if (isc_sockaddr_pf(&query->addrinfo->sockaddr) == PF_INET)
6584		inc_stats(fctx->res, dns_resstatscounter_responsev4);
6585	else
6586		inc_stats(fctx->res, dns_resstatscounter_responsev6);
6587
6588	(void)isc_timer_touch(fctx->timer);
6589
6590	keep_trying = ISC_FALSE;
6591	broken_server = ISC_R_SUCCESS;
6592	get_nameservers = ISC_FALSE;
6593	resend = ISC_FALSE;
6594	truncated = ISC_FALSE;
6595	finish = NULL;
6596	no_response = ISC_FALSE;
6597
6598	if (fctx->res->exiting) {
6599		result = ISC_R_SHUTTINGDOWN;
6600		goto done;
6601	}
6602
6603	fctx->timeouts = 0;
6604	fctx->timeout = ISC_FALSE;
6605	fctx->addrinfo = query->addrinfo;
6606
6607	/*
6608	 * XXXRTH  We should really get the current time just once.  We
6609	 *		need a routine to convert from an isc_time_t to an
6610	 *		isc_stdtime_t.
6611	 */
6612	TIME_NOW(&tnow);
6613	finish = &tnow;
6614	isc_stdtime_get(&now);
6615
6616	/*
6617	 * Did the dispatcher have a problem?
6618	 */
6619	if (devent->result != ISC_R_SUCCESS) {
6620		if (devent->result == ISC_R_EOF &&
6621		    (query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
6622			/*
6623			 * The problem might be that they
6624			 * don't understand EDNS0.  Turn it
6625			 * off and try again.
6626			 */
6627			options |= DNS_FETCHOPT_NOEDNS0;
6628			resend = ISC_TRUE;
6629			/*
6630			 * Remember that they don't like EDNS0.
6631			 */
6632			dns_adb_changeflags(fctx->adb,
6633					    query->addrinfo,
6634					    DNS_FETCHOPT_NOEDNS0,
6635					    DNS_FETCHOPT_NOEDNS0);
6636		} else {
6637			/*
6638			 * There's no hope for this query.
6639			 */
6640			keep_trying = ISC_TRUE;
6641
6642			/*
6643			 * If this is a network error on an exclusive query
6644			 * socket, mark the server as bad so that we won't try
6645			 * it for this fetch again.  Also adjust finish and
6646			 * no_response so that we penalize this address in SRTT
6647			 * adjustment later.
6648			 */
6649			if (query->exclusivesocket &&
6650			    (devent->result == ISC_R_HOSTUNREACH ||
6651			     devent->result == ISC_R_NETUNREACH ||
6652			     devent->result == ISC_R_CONNREFUSED ||
6653			     devent->result == ISC_R_CANCELED)) {
6654				    broken_server = devent->result;
6655				    broken_type = badns_unreachable;
6656				    finish = NULL;
6657				    no_response = ISC_TRUE;
6658			}
6659		}
6660		goto done;
6661	}
6662
6663	message = fctx->rmessage;
6664
6665	if (query->tsig != NULL) {
6666		result = dns_message_setquerytsig(message, query->tsig);
6667		if (result != ISC_R_SUCCESS)
6668			goto done;
6669	}
6670
6671	if (query->tsigkey) {
6672		result = dns_message_settsigkey(message, query->tsigkey);
6673		if (result != ISC_R_SUCCESS)
6674			goto done;
6675	}
6676
6677	result = dns_message_parse(message, &devent->buffer, 0);
6678	if (result != ISC_R_SUCCESS) {
6679		switch (result) {
6680		case ISC_R_UNEXPECTEDEND:
6681			if (!message->question_ok ||
6682			    (message->flags & DNS_MESSAGEFLAG_TC) == 0 ||
6683			    (options & DNS_FETCHOPT_TCP) != 0) {
6684				/*
6685				 * Either the message ended prematurely,
6686				 * and/or wasn't marked as being truncated,
6687				 * and/or this is a response to a query we
6688				 * sent over TCP.  In all of these cases,
6689				 * something is wrong with the remote
6690				 * server and we don't want to retry using
6691				 * TCP.
6692				 */
6693				if ((query->options & DNS_FETCHOPT_NOEDNS0)
6694				    == 0) {
6695					/*
6696					 * The problem might be that they
6697					 * don't understand EDNS0.  Turn it
6698					 * off and try again.
6699					 */
6700					options |= DNS_FETCHOPT_NOEDNS0;
6701					resend = ISC_TRUE;
6702					/*
6703					 * Remember that they don't like EDNS0.
6704					 */
6705					dns_adb_changeflags(
6706							fctx->adb,
6707							query->addrinfo,
6708							DNS_FETCHOPT_NOEDNS0,
6709							DNS_FETCHOPT_NOEDNS0);
6710					inc_stats(fctx->res,
6711						 dns_resstatscounter_edns0fail);
6712				} else {
6713					broken_server = result;
6714					keep_trying = ISC_TRUE;
6715				}
6716				goto done;
6717			}
6718			/*
6719			 * We defer retrying via TCP for a bit so we can
6720			 * check out this message further.
6721			 */
6722			truncated = ISC_TRUE;
6723			break;
6724		case DNS_R_FORMERR:
6725			if ((query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
6726				/*
6727				 * The problem might be that they
6728				 * don't understand EDNS0.  Turn it
6729				 * off and try again.
6730				 */
6731				options |= DNS_FETCHOPT_NOEDNS0;
6732				resend = ISC_TRUE;
6733				/*
6734				 * Remember that they don't like EDNS0.
6735				 */
6736				dns_adb_changeflags(fctx->adb,
6737						    query->addrinfo,
6738						    DNS_FETCHOPT_NOEDNS0,
6739						    DNS_FETCHOPT_NOEDNS0);
6740				inc_stats(fctx->res,
6741						 dns_resstatscounter_edns0fail);
6742			} else {
6743				broken_server = DNS_R_UNEXPECTEDRCODE;
6744				keep_trying = ISC_TRUE;
6745			}
6746			goto done;
6747		default:
6748			/*
6749			 * Something bad has happened.
6750			 */
6751			goto done;
6752		}
6753	}
6754
6755
6756	/*
6757	 * Log the incoming packet.
6758	 */
6759	log_packet(message, ISC_LOG_DEBUG(10), fctx->res->mctx);
6760
6761	/*
6762	 * Did we request NSID?  If so, and if the response contains
6763	 * NSID data, log it at INFO level.
6764	 */
6765	opt = dns_message_getopt(message);
6766	if (opt != NULL && (query->options & DNS_FETCHOPT_WANTNSID) != 0)
6767		log_nsid(opt, query, ISC_LOG_INFO, fctx->res->mctx);
6768
6769	/*
6770	 * If the message is signed, check the signature.  If not, this
6771	 * returns success anyway.
6772	 */
6773	result = dns_message_checksig(message, fctx->res->view);
6774	if (result != ISC_R_SUCCESS)
6775		goto done;
6776
6777	/*
6778	 * The dispatcher should ensure we only get responses with QR set.
6779	 */
6780	INSIST((message->flags & DNS_MESSAGEFLAG_QR) != 0);
6781	/*
6782	 * INSIST() that the message comes from the place we sent it to,
6783	 * since the dispatch code should ensure this.
6784	 *
6785	 * INSIST() that the message id is correct (this should also be
6786	 * ensured by the dispatch code).
6787	 */
6788
6789	/*
6790	 * We have an affirmative response to the query and we have
6791	 * previously got a response from this server which indicated
6792	 * EDNS may not be supported so we can now cache the lack of
6793	 * EDNS support.
6794	 */
6795	if (opt == NULL &&
6796	    (message->rcode == dns_rcode_noerror ||
6797	     message->rcode == dns_rcode_nxdomain ||
6798	     message->rcode == dns_rcode_refused ||
6799	     message->rcode == dns_rcode_yxdomain) &&
6800	     bad_edns(fctx, &query->addrinfo->sockaddr)) {
6801		char addrbuf[ISC_SOCKADDR_FORMATSIZE];
6802		isc_sockaddr_format(&query->addrinfo->sockaddr, addrbuf,
6803				    sizeof(addrbuf));
6804		dns_adb_changeflags(fctx->adb, query->addrinfo,
6805				    DNS_FETCHOPT_NOEDNS0,
6806				    DNS_FETCHOPT_NOEDNS0);
6807	}
6808
6809	/*
6810	 * Deal with truncated responses by retrying using TCP.
6811	 */
6812	if ((message->flags & DNS_MESSAGEFLAG_TC) != 0)
6813		truncated = ISC_TRUE;
6814
6815	if (truncated) {
6816		inc_stats(fctx->res, dns_resstatscounter_truncated);
6817		if ((options & DNS_FETCHOPT_TCP) != 0) {
6818			broken_server = DNS_R_TRUNCATEDTCP;
6819			keep_trying = ISC_TRUE;
6820		} else {
6821			options |= DNS_FETCHOPT_TCP;
6822			resend = ISC_TRUE;
6823		}
6824		goto done;
6825	}
6826
6827	/*
6828	 * Is it a query response?
6829	 */
6830	if (message->opcode != dns_opcode_query) {
6831		/* XXXRTH Log */
6832		broken_server = DNS_R_UNEXPECTEDOPCODE;
6833		keep_trying = ISC_TRUE;
6834		goto done;
6835	}
6836
6837	/*
6838	 * Update statistics about erroneous responses.
6839	 */
6840	if (message->rcode != dns_rcode_noerror) {
6841		switch (message->rcode) {
6842		case dns_rcode_nxdomain:
6843			inc_stats(fctx->res, dns_resstatscounter_nxdomain);
6844			break;
6845		case dns_rcode_servfail:
6846			inc_stats(fctx->res, dns_resstatscounter_servfail);
6847			break;
6848		case dns_rcode_formerr:
6849			inc_stats(fctx->res, dns_resstatscounter_formerr);
6850			break;
6851		default:
6852			inc_stats(fctx->res, dns_resstatscounter_othererror);
6853			break;
6854		}
6855	}
6856
6857	/*
6858	 * Is the remote server broken, or does it dislike us?
6859	 */
6860	if (message->rcode != dns_rcode_noerror &&
6861	    message->rcode != dns_rcode_nxdomain) {
6862		if (((message->rcode == dns_rcode_formerr ||
6863		      message->rcode == dns_rcode_notimp) ||
6864		     (message->rcode == dns_rcode_servfail &&
6865		      dns_message_getopt(message) == NULL)) &&
6866		    (query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
6867			/*
6868			 * It's very likely they don't like EDNS0.
6869			 * If the response code is SERVFAIL, also check if the
6870			 * response contains an OPT RR and don't cache the
6871			 * failure since it can be returned for various other
6872			 * reasons.
6873			 *
6874			 * XXXRTH  We should check if the question
6875			 *		we're asking requires EDNS0, and
6876			 *		if so, we should bail out.
6877			 */
6878			options |= DNS_FETCHOPT_NOEDNS0;
6879			resend = ISC_TRUE;
6880			/*
6881			 * Remember that they may not like EDNS0.
6882			 */
6883			add_bad_edns(fctx, &query->addrinfo->sockaddr);
6884			inc_stats(fctx->res, dns_resstatscounter_edns0fail);
6885		} else if (message->rcode == dns_rcode_formerr) {
6886			if (ISFORWARDER(query->addrinfo)) {
6887				/*
6888				 * This forwarder doesn't understand us,
6889				 * but other forwarders might.  Keep trying.
6890				 */
6891				broken_server = DNS_R_REMOTEFORMERR;
6892				keep_trying = ISC_TRUE;
6893			} else {
6894				/*
6895				 * The server doesn't understand us.  Since
6896				 * all servers for a zone need similar
6897				 * capabilities, we assume that we will get
6898				 * FORMERR from all servers, and thus we
6899				 * cannot make any more progress with this
6900				 * fetch.
6901				 */
6902				log_formerr(fctx, "server sent FORMERR");
6903				result = DNS_R_FORMERR;
6904			}
6905		} else if (message->rcode == dns_rcode_yxdomain) {
6906			/*
6907			 * DNAME mapping failed because the new name
6908			 * was too long.  There's no chance of success
6909			 * for this fetch.
6910			 */
6911			result = DNS_R_YXDOMAIN;
6912		} else if (message->rcode == dns_rcode_badvers) {
6913			unsigned int flags, mask;
6914			unsigned int version;
6915
6916			resend = ISC_TRUE;
6917			INSIST(opt != NULL);
6918			version = (opt->ttl >> 16) & 0xff;
6919			flags = (version << DNS_FETCHOPT_EDNSVERSIONSHIFT) |
6920				DNS_FETCHOPT_EDNSVERSIONSET;
6921			mask = DNS_FETCHOPT_EDNSVERSIONMASK |
6922			       DNS_FETCHOPT_EDNSVERSIONSET;
6923			switch (version) {
6924			case 0:
6925				dns_adb_changeflags(fctx->adb, query->addrinfo,
6926						    flags, mask);
6927				break;
6928			default:
6929				broken_server = DNS_R_BADVERS;
6930				keep_trying = ISC_TRUE;
6931				break;
6932			}
6933		} else {
6934			/*
6935			 * XXXRTH log.
6936			 */
6937			broken_server = DNS_R_UNEXPECTEDRCODE;
6938			INSIST(broken_server != ISC_R_SUCCESS);
6939			keep_trying = ISC_TRUE;
6940		}
6941		goto done;
6942	}
6943
6944	/*
6945	 * Is the question the same as the one we asked?
6946	 */
6947	result = same_question(fctx);
6948	if (result != ISC_R_SUCCESS) {
6949		/* XXXRTH Log */
6950		if (result == DNS_R_FORMERR)
6951			keep_trying = ISC_TRUE;
6952		goto done;
6953	}
6954
6955	/*
6956	 * Is the server lame?
6957	 */
6958	if (fctx->res->lame_ttl != 0 && !ISFORWARDER(query->addrinfo) &&
6959	    is_lame(fctx)) {
6960		inc_stats(fctx->res, dns_resstatscounter_lame);
6961		log_lame(fctx, query->addrinfo);
6962		result = dns_adb_marklame(fctx->adb, query->addrinfo,
6963					  &fctx->name, fctx->type,
6964					  now + fctx->res->lame_ttl);
6965		if (result != ISC_R_SUCCESS)
6966			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
6967				      DNS_LOGMODULE_RESOLVER, ISC_LOG_ERROR,
6968				      "could not mark server as lame: %s",
6969				      isc_result_totext(result));
6970		broken_server = DNS_R_LAME;
6971		keep_trying = ISC_TRUE;
6972		goto done;
6973	}
6974
6975	/*
6976	 * Enforce delegations only zones like NET and COM.
6977	 */
6978	if (!ISFORWARDER(query->addrinfo) &&
6979	    dns_view_isdelegationonly(fctx->res->view, &fctx->domain) &&
6980	    !dns_name_equal(&fctx->domain, &fctx->name) &&
6981	    fix_mustbedelegationornxdomain(message, fctx)) {
6982		char namebuf[DNS_NAME_FORMATSIZE];
6983		char domainbuf[DNS_NAME_FORMATSIZE];
6984		char addrbuf[ISC_SOCKADDR_FORMATSIZE];
6985		char classbuf[64];
6986		char typebuf[64];
6987
6988		dns_name_format(&fctx->name, namebuf, sizeof(namebuf));
6989		dns_name_format(&fctx->domain, domainbuf, sizeof(domainbuf));
6990		dns_rdatatype_format(fctx->type, typebuf, sizeof(typebuf));
6991		dns_rdataclass_format(fctx->res->rdclass, classbuf,
6992				      sizeof(classbuf));
6993		isc_sockaddr_format(&query->addrinfo->sockaddr, addrbuf,
6994				    sizeof(addrbuf));
6995
6996		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DELEGATION_ONLY,
6997			     DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
6998			     "enforced delegation-only for '%s' (%s/%s/%s) "
6999			     "from %s",
7000			     domainbuf, namebuf, typebuf, classbuf, addrbuf);
7001	}
7002
7003	if ((fctx->res->options & DNS_RESOLVER_CHECKNAMES) != 0)
7004		checknames(message);
7005
7006	/*
7007	 * Clear cache bits.
7008	 */
7009	fctx->attributes &= ~(FCTX_ATTR_WANTNCACHE | FCTX_ATTR_WANTCACHE);
7010
7011	/*
7012	 * Did we get any answers?
7013	 */
7014	if (message->counts[DNS_SECTION_ANSWER] > 0 &&
7015	    (message->rcode == dns_rcode_noerror ||
7016	     message->rcode == dns_rcode_nxdomain)) {
7017		/*
7018		 * [normal case]
7019		 * We've got answers.  If it has an authoritative answer or an
7020		 * answer from a forwarder, we're done.
7021		 */
7022		if ((message->flags & DNS_MESSAGEFLAG_AA) != 0 ||
7023		    ISFORWARDER(query->addrinfo))
7024			result = answer_response(fctx);
7025		else if (iscname(fctx) &&
7026			 fctx->type != dns_rdatatype_any &&
7027			 fctx->type != dns_rdatatype_cname) {
7028			/*
7029			 * A BIND8 server could return a non-authoritative
7030			 * answer when a CNAME is followed.  We should treat
7031			 * it as a valid answer.
7032			 */
7033			result = answer_response(fctx);
7034		} else if (fctx->type != dns_rdatatype_ns &&
7035			   !betterreferral(fctx)) {
7036			/*
7037			 * Lame response !!!.
7038			 */
7039			result = answer_response(fctx);
7040		} else {
7041			if (fctx->type == dns_rdatatype_ns) {
7042				/*
7043				 * A BIND 8 server could incorrectly return a
7044				 * non-authoritative answer to an NS query
7045				 * instead of a referral. Since this answer
7046				 * lacks the SIGs necessary to do DNSSEC
7047				 * validation, we must invoke the following
7048				 * special kludge to treat it as a referral.
7049				 */
7050				result = noanswer_response(fctx, NULL,
7051						   LOOK_FOR_NS_IN_ANSWER);
7052			} else {
7053				/*
7054				 * Some other servers may still somehow include
7055				 * an answer when it should return a referral
7056				 * with an empty answer.  Check to see if we can
7057				 * treat this as a referral by ignoring the
7058				 * answer.  Further more, there may be an
7059				 * implementation that moves A/AAAA glue records
7060				 * to the answer section for that type of
7061				 * delegation when the query is for that glue
7062				 * record.  LOOK_FOR_GLUE_IN_ANSWER will handle
7063				 * such a corner case.
7064				 */
7065				result = noanswer_response(fctx, NULL,
7066						   LOOK_FOR_GLUE_IN_ANSWER);
7067			}
7068			if (result != DNS_R_DELEGATION) {
7069				/*
7070				 * At this point, AA is not set, the response
7071				 * is not a referral, and the server is not a
7072				 * forwarder.  It is technically lame and it's
7073				 * easier to treat it as such than to figure out
7074				 * some more elaborate course of action.
7075				 */
7076				broken_server = DNS_R_LAME;
7077				keep_trying = ISC_TRUE;
7078				goto done;
7079			}
7080			goto force_referral;
7081		}
7082		if (result != ISC_R_SUCCESS) {
7083			if (result == DNS_R_FORMERR)
7084				keep_trying = ISC_TRUE;
7085			goto done;
7086		}
7087	} else if (message->counts[DNS_SECTION_AUTHORITY] > 0 ||
7088		   message->rcode == dns_rcode_noerror ||
7089		   message->rcode == dns_rcode_nxdomain) {
7090		/*
7091		 * NXDOMAIN, NXRDATASET, or referral.
7092		 */
7093		result = noanswer_response(fctx, NULL, 0);
7094		if (result == DNS_R_CHASEDSSERVERS) {
7095		} else if (result == DNS_R_DELEGATION) {
7096		force_referral:
7097			/*
7098			 * We don't have the answer, but we know a better
7099			 * place to look.
7100			 */
7101			get_nameservers = ISC_TRUE;
7102			keep_trying = ISC_TRUE;
7103			/*
7104			 * We have a new set of name servers, and it
7105			 * has not experienced any restarts yet.
7106			 */
7107			fctx->restarts = 0;
7108
7109			/*
7110			 * Update local statistics counters collected for each
7111			 * new zone.
7112			 */
7113			fctx->referrals++;
7114			fctx->querysent = 0;
7115			fctx->lamecount = 0;
7116			fctx->neterr = 0;
7117			fctx->badresp = 0;
7118			fctx->adberr = 0;
7119
7120			result = ISC_R_SUCCESS;
7121		} else if (result != ISC_R_SUCCESS) {
7122			/*
7123			 * Something has gone wrong.
7124			 */
7125			if (result == DNS_R_FORMERR)
7126				keep_trying = ISC_TRUE;
7127			goto done;
7128		}
7129	} else {
7130		/*
7131		 * The server is insane.
7132		 */
7133		/* XXXRTH Log */
7134		broken_server = DNS_R_UNEXPECTEDRCODE;
7135		keep_trying = ISC_TRUE;
7136		goto done;
7137	}
7138
7139	/*
7140	 * Follow additional section data chains.
7141	 */
7142	chase_additional(fctx);
7143
7144	/*
7145	 * Cache the cacheable parts of the message.  This may also cause
7146	 * work to be queued to the DNSSEC validator.
7147	 */
7148	if (WANTCACHE(fctx)) {
7149		result = cache_message(fctx, query->addrinfo, now);
7150		if (result != ISC_R_SUCCESS)
7151			goto done;
7152	}
7153
7154	/*
7155	 * Ncache the negatively cacheable parts of the message.  This may
7156	 * also cause work to be queued to the DNSSEC validator.
7157	 */
7158	if (WANTNCACHE(fctx)) {
7159		dns_rdatatype_t covers;
7160		if (message->rcode == dns_rcode_nxdomain)
7161			covers = dns_rdatatype_any;
7162		else
7163			covers = fctx->type;
7164
7165		/*
7166		 * Cache any negative cache entries in the message.
7167		 */
7168		result = ncache_message(fctx, query->addrinfo, covers, now);
7169	}
7170
7171 done:
7172	/*
7173	 * Remember the query's addrinfo, in case we need to mark the
7174	 * server as broken.
7175	 */
7176	addrinfo = query->addrinfo;
7177
7178	/*
7179	 * Cancel the query.
7180	 *
7181	 * XXXRTH  Don't cancel the query if waiting for validation?
7182	 */
7183	fctx_cancelquery(&query, &devent, finish, no_response);
7184
7185	if (keep_trying) {
7186		if (result == DNS_R_FORMERR)
7187			broken_server = DNS_R_FORMERR;
7188		if (broken_server != ISC_R_SUCCESS) {
7189			/*
7190			 * Add this server to the list of bad servers for
7191			 * this fctx.
7192			 */
7193			add_bad(fctx, addrinfo, broken_server, broken_type);
7194		}
7195
7196		if (get_nameservers) {
7197			dns_name_t *name;
7198			dns_fixedname_init(&foundname);
7199			fname = dns_fixedname_name(&foundname);
7200			if (result != ISC_R_SUCCESS) {
7201				fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
7202				return;
7203			}
7204			findoptions = 0;
7205			if (dns_rdatatype_atparent(fctx->type))
7206				findoptions |= DNS_DBFIND_NOEXACT;
7207			if ((options & DNS_FETCHOPT_UNSHARED) == 0)
7208				name = &fctx->name;
7209			else
7210				name = &fctx->domain;
7211			result = dns_view_findzonecut(fctx->res->view,
7212						      name, fname,
7213						      now, findoptions,
7214						      ISC_TRUE,
7215						      &fctx->nameservers,
7216						      NULL);
7217			if (result != ISC_R_SUCCESS) {
7218				FCTXTRACE("couldn't find a zonecut");
7219				fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
7220				return;
7221			}
7222			if (!dns_name_issubdomain(fname, &fctx->domain)) {
7223				/*
7224				 * The best nameservers are now above our
7225				 * QDOMAIN.
7226				 */
7227				FCTXTRACE("nameservers now above QDOMAIN");
7228				fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
7229				return;
7230			}
7231			dns_name_free(&fctx->domain, fctx->mctx);
7232			dns_name_init(&fctx->domain, NULL);
7233			result = dns_name_dup(fname, fctx->mctx, &fctx->domain);
7234			if (result != ISC_R_SUCCESS) {
7235				fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
7236				return;
7237			}
7238			fctx->ns_ttl = fctx->nameservers.ttl;
7239			fctx->ns_ttl_ok = ISC_TRUE;
7240			fctx_cancelqueries(fctx, ISC_TRUE);
7241			fctx_cleanupfinds(fctx);
7242			fctx_cleanupaltfinds(fctx);
7243			fctx_cleanupforwaddrs(fctx);
7244			fctx_cleanupaltaddrs(fctx);
7245		}
7246		/*
7247		 * Try again.
7248		 */
7249		fctx_try(fctx, !get_nameservers, ISC_FALSE);
7250	} else if (resend) {
7251		/*
7252		 * Resend (probably with changed options).
7253		 */
7254		FCTXTRACE("resend");
7255		inc_stats(fctx->res, dns_resstatscounter_retry);
7256		result = fctx_query(fctx, addrinfo, options);
7257		if (result != ISC_R_SUCCESS)
7258			fctx_done(fctx, result, __LINE__);
7259	} else if (result == ISC_R_SUCCESS && !HAVE_ANSWER(fctx)) {
7260		/*
7261		 * All has gone well so far, but we are waiting for the
7262		 * DNSSEC validator to validate the answer.
7263		 */
7264		FCTXTRACE("wait for validator");
7265		fctx_cancelqueries(fctx, ISC_TRUE);
7266		/*
7267		 * We must not retransmit while the validator is working;
7268		 * it has references to the current rmessage.
7269		 */
7270		result = fctx_stopidletimer(fctx);
7271		if (result != ISC_R_SUCCESS)
7272			fctx_done(fctx, result, __LINE__);
7273	} else if (result == DNS_R_CHASEDSSERVERS) {
7274		unsigned int n;
7275		add_bad(fctx, addrinfo, result, broken_type);
7276		fctx_cancelqueries(fctx, ISC_TRUE);
7277		fctx_cleanupfinds(fctx);
7278		fctx_cleanupforwaddrs(fctx);
7279
7280		n = dns_name_countlabels(&fctx->name);
7281		dns_name_getlabelsequence(&fctx->name, 1, n - 1, &fctx->nsname);
7282
7283		FCTXTRACE("suspending DS lookup to find parent's NS records");
7284
7285		result = dns_resolver_createfetch(fctx->res, &fctx->nsname,
7286						  dns_rdatatype_ns,
7287						  NULL, NULL, NULL, 0, task,
7288						  resume_dslookup, fctx,
7289						  &fctx->nsrrset, NULL,
7290						  &fctx->nsfetch);
7291		if (result != ISC_R_SUCCESS)
7292			fctx_done(fctx, result, __LINE__);
7293		else {
7294			LOCK(&fctx->res->buckets[fctx->bucketnum].lock);
7295			fctx->references++;
7296			UNLOCK(&fctx->res->buckets[fctx->bucketnum].lock);
7297			result = fctx_stopidletimer(fctx);
7298			if (result != ISC_R_SUCCESS)
7299				fctx_done(fctx, result, __LINE__);
7300		}
7301	} else {
7302		/*
7303		 * We're done.
7304		 */
7305		fctx_done(fctx, result, __LINE__);
7306	}
7307}
7308
7309
7310/***
7311 *** Resolver Methods
7312 ***/
7313static void
7314destroy_badcache(dns_resolver_t *res) {
7315	dns_badcache_t *bad, *next;
7316	unsigned int i;
7317
7318	if (res->badcache != NULL) {
7319		for (i = 0; i < res->badhash; i++)
7320			for (bad = res->badcache[i]; bad != NULL;
7321			     bad = next) {
7322				next = bad->next;
7323				isc_mem_put(res->mctx, bad, sizeof(*bad) +
7324					    bad->name.length);
7325				res->badcount--;
7326			}
7327		isc_mem_put(res->mctx, res->badcache,
7328			    sizeof(*res->badcache) * res->badhash);
7329		res->badcache = NULL;
7330		res->badhash = 0;
7331		INSIST(res->badcount == 0);
7332	}
7333}
7334
7335static void
7336destroy(dns_resolver_t *res) {
7337	unsigned int i;
7338	alternate_t *a;
7339
7340	REQUIRE(res->references == 0);
7341	REQUIRE(!res->priming);
7342	REQUIRE(res->primefetch == NULL);
7343
7344	RTRACE("destroy");
7345
7346	INSIST(res->nfctx == 0);
7347
7348	DESTROYLOCK(&res->primelock);
7349	DESTROYLOCK(&res->nlock);
7350	DESTROYLOCK(&res->lock);
7351	for (i = 0; i < res->nbuckets; i++) {
7352		INSIST(ISC_LIST_EMPTY(res->buckets[i].fctxs));
7353		isc_task_shutdown(res->buckets[i].task);
7354		isc_task_detach(&res->buckets[i].task);
7355		DESTROYLOCK(&res->buckets[i].lock);
7356		isc_mem_detach(&res->buckets[i].mctx);
7357	}
7358	isc_mem_put(res->mctx, res->buckets,
7359		    res->nbuckets * sizeof(fctxbucket_t));
7360	if (res->dispatchv4 != NULL)
7361		dns_dispatch_detach(&res->dispatchv4);
7362	if (res->dispatchv6 != NULL)
7363		dns_dispatch_detach(&res->dispatchv6);
7364	while ((a = ISC_LIST_HEAD(res->alternates)) != NULL) {
7365		ISC_LIST_UNLINK(res->alternates, a, link);
7366		if (!a->isaddress)
7367			dns_name_free(&a->_u._n.name, res->mctx);
7368		isc_mem_put(res->mctx, a, sizeof(*a));
7369	}
7370	dns_resolver_reset_algorithms(res);
7371	destroy_badcache(res);
7372	dns_resolver_resetmustbesecure(res);
7373#if USE_ALGLOCK
7374	isc_rwlock_destroy(&res->alglock);
7375#endif
7376#if USE_MBSLOCK
7377	isc_rwlock_destroy(&res->mbslock);
7378#endif
7379	isc_timer_detach(&res->spillattimer);
7380	res->magic = 0;
7381	isc_mem_put(res->mctx, res, sizeof(*res));
7382}
7383
7384static void
7385send_shutdown_events(dns_resolver_t *res) {
7386	isc_event_t *event, *next_event;
7387	isc_task_t *etask;
7388
7389	/*
7390	 * Caller must be holding the resolver lock.
7391	 */
7392
7393	for (event = ISC_LIST_HEAD(res->whenshutdown);
7394	     event != NULL;
7395	     event = next_event) {
7396		next_event = ISC_LIST_NEXT(event, ev_link);
7397		ISC_LIST_UNLINK(res->whenshutdown, event, ev_link);
7398		etask = event->ev_sender;
7399		event->ev_sender = res;
7400		isc_task_sendanddetach(&etask, &event);
7401	}
7402}
7403
7404static void
7405empty_bucket(dns_resolver_t *res) {
7406	RTRACE("empty_bucket");
7407
7408	LOCK(&res->lock);
7409
7410	INSIST(res->activebuckets > 0);
7411	res->activebuckets--;
7412	if (res->activebuckets == 0)
7413		send_shutdown_events(res);
7414
7415	UNLOCK(&res->lock);
7416}
7417
7418static void
7419spillattimer_countdown(isc_task_t *task, isc_event_t *event) {
7420	dns_resolver_t *res = event->ev_arg;
7421	isc_result_t result;
7422	unsigned int count;
7423	isc_boolean_t logit = ISC_FALSE;
7424
7425	REQUIRE(VALID_RESOLVER(res));
7426
7427	UNUSED(task);
7428
7429	LOCK(&res->lock);
7430	INSIST(!res->exiting);
7431	if (res->spillat > res->spillatmin) {
7432		res->spillat--;
7433		logit = ISC_TRUE;
7434	}
7435	if (res->spillat <= res->spillatmin) {
7436		result = isc_timer_reset(res->spillattimer,
7437					 isc_timertype_inactive, NULL,
7438					 NULL, ISC_TRUE);
7439		RUNTIME_CHECK(result == ISC_R_SUCCESS);
7440	}
7441	count = res->spillat;
7442	UNLOCK(&res->lock);
7443	if (logit)
7444		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
7445			      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
7446			      "clients-per-query decreased to %u", count);
7447
7448	isc_event_free(&event);
7449}
7450
7451isc_result_t
7452dns_resolver_create(dns_view_t *view,
7453		    isc_taskmgr_t *taskmgr, unsigned int ntasks,
7454		    isc_socketmgr_t *socketmgr,
7455		    isc_timermgr_t *timermgr,
7456		    unsigned int options,
7457		    dns_dispatchmgr_t *dispatchmgr,
7458		    dns_dispatch_t *dispatchv4,
7459		    dns_dispatch_t *dispatchv6,
7460		    dns_resolver_t **resp)
7461{
7462	dns_resolver_t *res;
7463	isc_result_t result = ISC_R_SUCCESS;
7464	unsigned int i, buckets_created = 0;
7465	isc_task_t *task = NULL;
7466	char name[16];
7467	unsigned dispattr;
7468
7469	/*
7470	 * Create a resolver.
7471	 */
7472
7473	REQUIRE(DNS_VIEW_VALID(view));
7474	REQUIRE(ntasks > 0);
7475	REQUIRE(resp != NULL && *resp == NULL);
7476	REQUIRE(dispatchmgr != NULL);
7477	REQUIRE(dispatchv4 != NULL || dispatchv6 != NULL);
7478
7479	res = isc_mem_get(view->mctx, sizeof(*res));
7480	if (res == NULL)
7481		return (ISC_R_NOMEMORY);
7482	RTRACE("create");
7483	res->mctx = view->mctx;
7484	res->rdclass = view->rdclass;
7485	res->socketmgr = socketmgr;
7486	res->timermgr = timermgr;
7487	res->taskmgr = taskmgr;
7488	res->dispatchmgr = dispatchmgr;
7489	res->view = view;
7490	res->options = options;
7491	res->lame_ttl = 0;
7492	ISC_LIST_INIT(res->alternates);
7493	res->udpsize = RECV_BUFFER_SIZE;
7494	res->algorithms = NULL;
7495	res->badcache = NULL;
7496	res->badcount = 0;
7497	res->badhash = 0;
7498	res->badsweep = 0;
7499	res->mustbesecure = NULL;
7500	res->spillatmin = res->spillat = 10;
7501	res->spillatmax = 100;
7502	res->spillattimer = NULL;
7503	res->zero_no_soa_ttl = ISC_FALSE;
7504	res->query_timeout = DEFAULT_QUERY_TIMEOUT;
7505	res->ndisps = 0;
7506	res->nextdisp = 0; /* meaningless at this point, but init it */
7507	res->nbuckets = ntasks;
7508	res->activebuckets = ntasks;
7509	res->buckets = isc_mem_get(view->mctx,
7510				   ntasks * sizeof(fctxbucket_t));
7511	if (res->buckets == NULL) {
7512		result = ISC_R_NOMEMORY;
7513		goto cleanup_res;
7514	}
7515	for (i = 0; i < ntasks; i++) {
7516		result = isc_mutex_init(&res->buckets[i].lock);
7517		if (result != ISC_R_SUCCESS)
7518			goto cleanup_buckets;
7519		res->buckets[i].task = NULL;
7520		result = isc_task_create(taskmgr, 0, &res->buckets[i].task);
7521		if (result != ISC_R_SUCCESS) {
7522			DESTROYLOCK(&res->buckets[i].lock);
7523			goto cleanup_buckets;
7524		}
7525		res->buckets[i].mctx = NULL;
7526		snprintf(name, sizeof(name), "res%u", i);
7527#ifdef ISC_PLATFORM_USETHREADS
7528		/*
7529		 * Use a separate memory context for each bucket to reduce
7530		 * contention among multiple threads.  Do this only when
7531		 * enabling threads because it will be require more memory.
7532		 */
7533		result = isc_mem_create(0, 0, &res->buckets[i].mctx);
7534		if (result != ISC_R_SUCCESS) {
7535			isc_task_detach(&res->buckets[i].task);
7536			DESTROYLOCK(&res->buckets[i].lock);
7537			goto cleanup_buckets;
7538		}
7539		isc_mem_setname(res->buckets[i].mctx, name, NULL);
7540#else
7541		isc_mem_attach(view->mctx, &res->buckets[i].mctx);
7542#endif
7543		isc_task_setname(res->buckets[i].task, name, res);
7544		ISC_LIST_INIT(res->buckets[i].fctxs);
7545		res->buckets[i].exiting = ISC_FALSE;
7546		buckets_created++;
7547	}
7548
7549	res->dispatchv4 = NULL;
7550	if (dispatchv4 != NULL) {
7551		dns_dispatch_attach(dispatchv4, &res->dispatchv4);
7552		dispattr = dns_dispatch_getattributes(dispatchv4);
7553		res->exclusivev4 =
7554			ISC_TF((dispattr & DNS_DISPATCHATTR_EXCLUSIVE) != 0);
7555	}
7556
7557	res->dispatchv6 = NULL;
7558	if (dispatchv6 != NULL) {
7559		dns_dispatch_attach(dispatchv6, &res->dispatchv6);
7560		dispattr = dns_dispatch_getattributes(dispatchv6);
7561		res->exclusivev6 =
7562			ISC_TF((dispattr & DNS_DISPATCHATTR_EXCLUSIVE) != 0);
7563	}
7564
7565	res->references = 1;
7566	res->exiting = ISC_FALSE;
7567	res->frozen = ISC_FALSE;
7568	ISC_LIST_INIT(res->whenshutdown);
7569	res->priming = ISC_FALSE;
7570	res->primefetch = NULL;
7571	res->nfctx = 0;
7572
7573	result = isc_mutex_init(&res->lock);
7574	if (result != ISC_R_SUCCESS)
7575		goto cleanup_dispatches;
7576
7577	result = isc_mutex_init(&res->nlock);
7578	if (result != ISC_R_SUCCESS)
7579		goto cleanup_lock;
7580
7581	result = isc_mutex_init(&res->primelock);
7582	if (result != ISC_R_SUCCESS)
7583		goto cleanup_nlock;
7584
7585	task = NULL;
7586	result = isc_task_create(taskmgr, 0, &task);
7587	if (result != ISC_R_SUCCESS)
7588		goto cleanup_primelock;
7589
7590	result = isc_timer_create(timermgr, isc_timertype_inactive, NULL, NULL,
7591				  task, spillattimer_countdown, res,
7592				  &res->spillattimer);
7593	isc_task_detach(&task);
7594	if (result != ISC_R_SUCCESS)
7595		goto cleanup_primelock;
7596
7597#if USE_ALGLOCK
7598	result = isc_rwlock_init(&res->alglock, 0, 0);
7599	if (result != ISC_R_SUCCESS)
7600		goto cleanup_spillattimer;
7601#endif
7602#if USE_MBSLOCK
7603	result = isc_rwlock_init(&res->mbslock, 0, 0);
7604	if (result != ISC_R_SUCCESS)
7605		goto cleanup_alglock;
7606#endif
7607
7608	res->magic = RES_MAGIC;
7609
7610	*resp = res;
7611
7612	return (ISC_R_SUCCESS);
7613
7614#if USE_MBSLOCK
7615 cleanup_alglock:
7616#if USE_ALGLOCK
7617	isc_rwlock_destroy(&res->alglock);
7618#endif
7619#endif
7620#if USE_ALGLOCK || USE_MBSLOCK
7621 cleanup_spillattimer:
7622	isc_timer_detach(&res->spillattimer);
7623#endif
7624
7625 cleanup_primelock:
7626	DESTROYLOCK(&res->primelock);
7627
7628 cleanup_nlock:
7629	DESTROYLOCK(&res->nlock);
7630
7631 cleanup_lock:
7632	DESTROYLOCK(&res->lock);
7633
7634 cleanup_dispatches:
7635	if (res->dispatchv6 != NULL)
7636		dns_dispatch_detach(&res->dispatchv6);
7637	if (res->dispatchv4 != NULL)
7638		dns_dispatch_detach(&res->dispatchv4);
7639
7640 cleanup_buckets:
7641	for (i = 0; i < buckets_created; i++) {
7642		isc_mem_detach(&res->buckets[i].mctx);
7643		DESTROYLOCK(&res->buckets[i].lock);
7644		isc_task_shutdown(res->buckets[i].task);
7645		isc_task_detach(&res->buckets[i].task);
7646	}
7647	isc_mem_put(view->mctx, res->buckets,
7648		    res->nbuckets * sizeof(fctxbucket_t));
7649
7650 cleanup_res:
7651	isc_mem_put(view->mctx, res, sizeof(*res));
7652
7653	return (result);
7654}
7655
7656#ifdef BIND9
7657static void
7658prime_done(isc_task_t *task, isc_event_t *event) {
7659	dns_resolver_t *res;
7660	dns_fetchevent_t *fevent;
7661	dns_fetch_t *fetch;
7662	dns_db_t *db = NULL;
7663
7664	REQUIRE(event->ev_type == DNS_EVENT_FETCHDONE);
7665	fevent = (dns_fetchevent_t *)event;
7666	res = event->ev_arg;
7667	REQUIRE(VALID_RESOLVER(res));
7668
7669	UNUSED(task);
7670
7671	LOCK(&res->lock);
7672
7673	INSIST(res->priming);
7674	res->priming = ISC_FALSE;
7675	LOCK(&res->primelock);
7676	fetch = res->primefetch;
7677	res->primefetch = NULL;
7678	UNLOCK(&res->primelock);
7679
7680	UNLOCK(&res->lock);
7681
7682	if (fevent->result == ISC_R_SUCCESS &&
7683	    res->view->cache != NULL && res->view->hints != NULL) {
7684		dns_cache_attachdb(res->view->cache, &db);
7685		dns_root_checkhints(res->view, res->view->hints, db);
7686		dns_db_detach(&db);
7687	}
7688
7689	if (fevent->node != NULL)
7690		dns_db_detachnode(fevent->db, &fevent->node);
7691	if (fevent->db != NULL)
7692		dns_db_detach(&fevent->db);
7693	if (dns_rdataset_isassociated(fevent->rdataset))
7694		dns_rdataset_disassociate(fevent->rdataset);
7695	INSIST(fevent->sigrdataset == NULL);
7696
7697	isc_mem_put(res->mctx, fevent->rdataset, sizeof(*fevent->rdataset));
7698
7699	isc_event_free(&event);
7700	dns_resolver_destroyfetch(&fetch);
7701}
7702
7703void
7704dns_resolver_prime(dns_resolver_t *res) {
7705	isc_boolean_t want_priming = ISC_FALSE;
7706	dns_rdataset_t *rdataset;
7707	isc_result_t result;
7708
7709	REQUIRE(VALID_RESOLVER(res));
7710	REQUIRE(res->frozen);
7711
7712	RTRACE("dns_resolver_prime");
7713
7714	LOCK(&res->lock);
7715
7716	if (!res->exiting && !res->priming) {
7717		INSIST(res->primefetch == NULL);
7718		res->priming = ISC_TRUE;
7719		want_priming = ISC_TRUE;
7720	}
7721
7722	UNLOCK(&res->lock);
7723
7724	if (want_priming) {
7725		/*
7726		 * To avoid any possible recursive locking problems, we
7727		 * start the priming fetch like any other fetch, and holding
7728		 * no resolver locks.  No one else will try to start it
7729		 * because we're the ones who set res->priming to true.
7730		 * Any other callers of dns_resolver_prime() while we're
7731		 * running will see that res->priming is already true and
7732		 * do nothing.
7733		 */
7734		RTRACE("priming");
7735		rdataset = isc_mem_get(res->mctx, sizeof(*rdataset));
7736		if (rdataset == NULL) {
7737			LOCK(&res->lock);
7738			INSIST(res->priming);
7739			INSIST(res->primefetch == NULL);
7740			res->priming = ISC_FALSE;
7741			UNLOCK(&res->lock);
7742			return;
7743		}
7744		dns_rdataset_init(rdataset);
7745		LOCK(&res->primelock);
7746		result = dns_resolver_createfetch(res, dns_rootname,
7747						  dns_rdatatype_ns,
7748						  NULL, NULL, NULL, 0,
7749						  res->buckets[0].task,
7750						  prime_done,
7751						  res, rdataset, NULL,
7752						  &res->primefetch);
7753		UNLOCK(&res->primelock);
7754		if (result != ISC_R_SUCCESS) {
7755			LOCK(&res->lock);
7756			INSIST(res->priming);
7757			res->priming = ISC_FALSE;
7758			UNLOCK(&res->lock);
7759		}
7760	}
7761}
7762#endif /* BIND9 */
7763
7764void
7765dns_resolver_freeze(dns_resolver_t *res) {
7766	/*
7767	 * Freeze resolver.
7768	 */
7769
7770	REQUIRE(VALID_RESOLVER(res));
7771
7772	res->frozen = ISC_TRUE;
7773}
7774
7775void
7776dns_resolver_attach(dns_resolver_t *source, dns_resolver_t **targetp) {
7777	REQUIRE(VALID_RESOLVER(source));
7778	REQUIRE(targetp != NULL && *targetp == NULL);
7779
7780	RRTRACE(source, "attach");
7781	LOCK(&source->lock);
7782	REQUIRE(!source->exiting);
7783
7784	INSIST(source->references > 0);
7785	source->references++;
7786	INSIST(source->references != 0);
7787	UNLOCK(&source->lock);
7788
7789	*targetp = source;
7790}
7791
7792void
7793dns_resolver_whenshutdown(dns_resolver_t *res, isc_task_t *task,
7794			  isc_event_t **eventp)
7795{
7796	isc_task_t *clone;
7797	isc_event_t *event;
7798
7799	REQUIRE(VALID_RESOLVER(res));
7800	REQUIRE(eventp != NULL);
7801
7802	event = *eventp;
7803	*eventp = NULL;
7804
7805	LOCK(&res->lock);
7806
7807	if (res->exiting && res->activebuckets == 0) {
7808		/*
7809		 * We're already shutdown.  Send the event.
7810		 */
7811		event->ev_sender = res;
7812		isc_task_send(task, &event);
7813	} else {
7814		clone = NULL;
7815		isc_task_attach(task, &clone);
7816		event->ev_sender = clone;
7817		ISC_LIST_APPEND(res->whenshutdown, event, ev_link);
7818	}
7819
7820	UNLOCK(&res->lock);
7821}
7822
7823void
7824dns_resolver_shutdown(dns_resolver_t *res) {
7825	unsigned int i;
7826	fetchctx_t *fctx;
7827	isc_socket_t *sock;
7828	isc_result_t result;
7829
7830	REQUIRE(VALID_RESOLVER(res));
7831
7832	RTRACE("shutdown");
7833
7834	LOCK(&res->lock);
7835
7836	if (!res->exiting) {
7837		RTRACE("exiting");
7838		res->exiting = ISC_TRUE;
7839
7840		for (i = 0; i < res->nbuckets; i++) {
7841			LOCK(&res->buckets[i].lock);
7842			for (fctx = ISC_LIST_HEAD(res->buckets[i].fctxs);
7843			     fctx != NULL;
7844			     fctx = ISC_LIST_NEXT(fctx, link))
7845				fctx_shutdown(fctx);
7846			if (res->dispatchv4 != NULL && !res->exclusivev4) {
7847				sock = dns_dispatch_getsocket(res->dispatchv4);
7848				isc_socket_cancel(sock, res->buckets[i].task,
7849						  ISC_SOCKCANCEL_ALL);
7850			}
7851			if (res->dispatchv6 != NULL && !res->exclusivev6) {
7852				sock = dns_dispatch_getsocket(res->dispatchv6);
7853				isc_socket_cancel(sock, res->buckets[i].task,
7854						  ISC_SOCKCANCEL_ALL);
7855			}
7856			res->buckets[i].exiting = ISC_TRUE;
7857			if (ISC_LIST_EMPTY(res->buckets[i].fctxs)) {
7858				INSIST(res->activebuckets > 0);
7859				res->activebuckets--;
7860			}
7861			UNLOCK(&res->buckets[i].lock);
7862		}
7863		if (res->activebuckets == 0)
7864			send_shutdown_events(res);
7865		result = isc_timer_reset(res->spillattimer,
7866					 isc_timertype_inactive, NULL,
7867					 NULL, ISC_TRUE);
7868		RUNTIME_CHECK(result == ISC_R_SUCCESS);
7869	}
7870
7871	UNLOCK(&res->lock);
7872}
7873
7874void
7875dns_resolver_detach(dns_resolver_t **resp) {
7876	dns_resolver_t *res;
7877	isc_boolean_t need_destroy = ISC_FALSE;
7878
7879	REQUIRE(resp != NULL);
7880	res = *resp;
7881	REQUIRE(VALID_RESOLVER(res));
7882
7883	RTRACE("detach");
7884
7885	LOCK(&res->lock);
7886
7887	INSIST(res->references > 0);
7888	res->references--;
7889	if (res->references == 0) {
7890		INSIST(res->exiting && res->activebuckets == 0);
7891		need_destroy = ISC_TRUE;
7892	}
7893
7894	UNLOCK(&res->lock);
7895
7896	if (need_destroy)
7897		destroy(res);
7898
7899	*resp = NULL;
7900}
7901
7902static inline isc_boolean_t
7903fctx_match(fetchctx_t *fctx, dns_name_t *name, dns_rdatatype_t type,
7904	   unsigned int options)
7905{
7906	/*
7907	 * Don't match fetch contexts that are shutting down.
7908	 */
7909	if (fctx->cloned || fctx->state == fetchstate_done ||
7910	    ISC_LIST_EMPTY(fctx->events))
7911		return (ISC_FALSE);
7912
7913	if (fctx->type != type || fctx->options != options)
7914		return (ISC_FALSE);
7915	return (dns_name_equal(&fctx->name, name));
7916}
7917
7918static inline void
7919log_fetch(dns_name_t *name, dns_rdatatype_t type) {
7920	char namebuf[DNS_NAME_FORMATSIZE];
7921	char typebuf[DNS_RDATATYPE_FORMATSIZE];
7922	int level = ISC_LOG_DEBUG(1);
7923
7924	if (! isc_log_wouldlog(dns_lctx, level))
7925		return;
7926
7927	dns_name_format(name, namebuf, sizeof(namebuf));
7928	dns_rdatatype_format(type, typebuf, sizeof(typebuf));
7929
7930	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
7931		      DNS_LOGMODULE_RESOLVER, level,
7932		      "createfetch: %s %s", namebuf, typebuf);
7933}
7934
7935isc_result_t
7936dns_resolver_createfetch(dns_resolver_t *res, dns_name_t *name,
7937			 dns_rdatatype_t type,
7938			 dns_name_t *domain, dns_rdataset_t *nameservers,
7939			 dns_forwarders_t *forwarders,
7940			 unsigned int options, isc_task_t *task,
7941			 isc_taskaction_t action, void *arg,
7942			 dns_rdataset_t *rdataset,
7943			 dns_rdataset_t *sigrdataset,
7944			 dns_fetch_t **fetchp)
7945{
7946	return (dns_resolver_createfetch2(res, name, type, domain,
7947					  nameservers, forwarders, NULL, 0,
7948					  options, task, action, arg,
7949					  rdataset, sigrdataset, fetchp));
7950}
7951
7952isc_result_t
7953dns_resolver_createfetch2(dns_resolver_t *res, dns_name_t *name,
7954			  dns_rdatatype_t type,
7955			  dns_name_t *domain, dns_rdataset_t *nameservers,
7956			  dns_forwarders_t *forwarders,
7957			  isc_sockaddr_t *client, dns_messageid_t id,
7958			  unsigned int options, isc_task_t *task,
7959			  isc_taskaction_t action, void *arg,
7960			  dns_rdataset_t *rdataset,
7961			  dns_rdataset_t *sigrdataset,
7962			  dns_fetch_t **fetchp)
7963{
7964	dns_fetch_t *fetch;
7965	fetchctx_t *fctx = NULL;
7966	isc_result_t result = ISC_R_SUCCESS;
7967	unsigned int bucketnum;
7968	isc_boolean_t new_fctx = ISC_FALSE;
7969	isc_event_t *event;
7970	unsigned int count = 0;
7971	unsigned int spillat;
7972	unsigned int spillatmin;
7973	isc_boolean_t destroy = ISC_FALSE;
7974
7975	UNUSED(forwarders);
7976
7977	REQUIRE(VALID_RESOLVER(res));
7978	REQUIRE(res->frozen);
7979	/* XXXRTH  Check for meta type */
7980	if (domain != NULL) {
7981		REQUIRE(DNS_RDATASET_VALID(nameservers));
7982		REQUIRE(nameservers->type == dns_rdatatype_ns);
7983	} else
7984		REQUIRE(nameservers == NULL);
7985	REQUIRE(forwarders == NULL);
7986	REQUIRE(!dns_rdataset_isassociated(rdataset));
7987	REQUIRE(sigrdataset == NULL ||
7988		!dns_rdataset_isassociated(sigrdataset));
7989	REQUIRE(fetchp != NULL && *fetchp == NULL);
7990
7991	log_fetch(name, type);
7992
7993	/*
7994	 * XXXRTH  use a mempool?
7995	 */
7996	fetch = isc_mem_get(res->mctx, sizeof(*fetch));
7997	if (fetch == NULL)
7998		return (ISC_R_NOMEMORY);
7999
8000	bucketnum = dns_name_fullhash(name, ISC_FALSE) % res->nbuckets;
8001
8002	LOCK(&res->lock);
8003	spillat = res->spillat;
8004	spillatmin = res->spillatmin;
8005	UNLOCK(&res->lock);
8006	LOCK(&res->buckets[bucketnum].lock);
8007
8008	if (res->buckets[bucketnum].exiting) {
8009		result = ISC_R_SHUTTINGDOWN;
8010		goto unlock;
8011	}
8012
8013	if ((options & DNS_FETCHOPT_UNSHARED) == 0) {
8014		for (fctx = ISC_LIST_HEAD(res->buckets[bucketnum].fctxs);
8015		     fctx != NULL;
8016		     fctx = ISC_LIST_NEXT(fctx, link)) {
8017			if (fctx_match(fctx, name, type, options))
8018				break;
8019		}
8020	}
8021
8022	/*
8023	 * Is this a duplicate?
8024	 */
8025	if (fctx != NULL && client != NULL) {
8026		dns_fetchevent_t *fevent;
8027		for (fevent = ISC_LIST_HEAD(fctx->events);
8028		     fevent != NULL;
8029		     fevent = ISC_LIST_NEXT(fevent, ev_link)) {
8030			if (fevent->client != NULL && fevent->id == id &&
8031			    isc_sockaddr_equal(fevent->client, client)) {
8032				result = DNS_R_DUPLICATE;
8033				goto unlock;
8034			}
8035			count++;
8036		}
8037	}
8038	if (count >= spillatmin && spillatmin != 0) {
8039		INSIST(fctx != NULL);
8040		if (count >= spillat)
8041			fctx->spilled = ISC_TRUE;
8042		if (fctx->spilled) {
8043			result = DNS_R_DROP;
8044			goto unlock;
8045		}
8046	}
8047
8048	if (fctx == NULL) {
8049		result = fctx_create(res, name, type, domain, nameservers,
8050				     options, bucketnum, &fctx);
8051		if (result != ISC_R_SUCCESS)
8052			goto unlock;
8053		new_fctx = ISC_TRUE;
8054	}
8055
8056	result = fctx_join(fctx, task, client, id, action, arg,
8057			   rdataset, sigrdataset, fetch);
8058	if (new_fctx) {
8059		if (result == ISC_R_SUCCESS) {
8060			/*
8061			 * Launch this fctx.
8062			 */
8063			event = &fctx->control_event;
8064			ISC_EVENT_INIT(event, sizeof(*event), 0, NULL,
8065				       DNS_EVENT_FETCHCONTROL,
8066				       fctx_start, fctx, NULL,
8067				       NULL, NULL);
8068			isc_task_send(res->buckets[bucketnum].task, &event);
8069		} else {
8070			/*
8071			 * We don't care about the result of fctx_unlink()
8072			 * since we know we're not exiting.
8073			 */
8074			(void)fctx_unlink(fctx);
8075			destroy = ISC_TRUE;
8076		}
8077	}
8078
8079 unlock:
8080	UNLOCK(&res->buckets[bucketnum].lock);
8081
8082	if (destroy)
8083		fctx_destroy(fctx);
8084
8085	if (result == ISC_R_SUCCESS) {
8086		FTRACE("created");
8087		*fetchp = fetch;
8088	} else
8089		isc_mem_put(res->mctx, fetch, sizeof(*fetch));
8090
8091	return (result);
8092}
8093
8094void
8095dns_resolver_cancelfetch(dns_fetch_t *fetch) {
8096	fetchctx_t *fctx;
8097	dns_resolver_t *res;
8098	dns_fetchevent_t *event, *next_event;
8099	isc_task_t *etask;
8100
8101	REQUIRE(DNS_FETCH_VALID(fetch));
8102	fctx = fetch->private;
8103	REQUIRE(VALID_FCTX(fctx));
8104	res = fctx->res;
8105
8106	FTRACE("cancelfetch");
8107
8108	LOCK(&res->buckets[fctx->bucketnum].lock);
8109
8110	/*
8111	 * Find the completion event for this fetch (as opposed
8112	 * to those for other fetches that have joined the same
8113	 * fctx) and send it with result = ISC_R_CANCELED.
8114	 */
8115	event = NULL;
8116	if (fctx->state != fetchstate_done) {
8117		for (event = ISC_LIST_HEAD(fctx->events);
8118		     event != NULL;
8119		     event = next_event) {
8120			next_event = ISC_LIST_NEXT(event, ev_link);
8121			if (event->fetch == fetch) {
8122				ISC_LIST_UNLINK(fctx->events, event, ev_link);
8123				break;
8124			}
8125		}
8126	}
8127	if (event != NULL) {
8128		etask = event->ev_sender;
8129		event->ev_sender = fctx;
8130		event->result = ISC_R_CANCELED;
8131		isc_task_sendanddetach(&etask, ISC_EVENT_PTR(&event));
8132	}
8133	/*
8134	 * The fctx continues running even if no fetches remain;
8135	 * the answer is still cached.
8136	 */
8137
8138	UNLOCK(&res->buckets[fctx->bucketnum].lock);
8139}
8140
8141void
8142dns_resolver_destroyfetch(dns_fetch_t **fetchp) {
8143	dns_fetch_t *fetch;
8144	dns_resolver_t *res;
8145	dns_fetchevent_t *event, *next_event;
8146	fetchctx_t *fctx;
8147	unsigned int bucketnum;
8148	isc_boolean_t bucket_empty;
8149
8150	REQUIRE(fetchp != NULL);
8151	fetch = *fetchp;
8152	REQUIRE(DNS_FETCH_VALID(fetch));
8153	fctx = fetch->private;
8154	REQUIRE(VALID_FCTX(fctx));
8155	res = fctx->res;
8156
8157	FTRACE("destroyfetch");
8158
8159	bucketnum = fctx->bucketnum;
8160	LOCK(&res->buckets[bucketnum].lock);
8161
8162	/*
8163	 * Sanity check: the caller should have gotten its event before
8164	 * trying to destroy the fetch.
8165	 */
8166	event = NULL;
8167	if (fctx->state != fetchstate_done) {
8168		for (event = ISC_LIST_HEAD(fctx->events);
8169		     event != NULL;
8170		     event = next_event) {
8171			next_event = ISC_LIST_NEXT(event, ev_link);
8172			RUNTIME_CHECK(event->fetch != fetch);
8173		}
8174	}
8175
8176	bucket_empty = fctx_decreference(fctx);
8177
8178	UNLOCK(&res->buckets[bucketnum].lock);
8179
8180	isc_mem_put(res->mctx, fetch, sizeof(*fetch));
8181	*fetchp = NULL;
8182
8183	if (bucket_empty)
8184		empty_bucket(res);
8185}
8186
8187void
8188dns_resolver_logfetch(dns_fetch_t *fetch, isc_log_t *lctx,
8189		      isc_logcategory_t *category, isc_logmodule_t *module,
8190		      int level, isc_boolean_t duplicateok)
8191{
8192	fetchctx_t *fctx;
8193	dns_resolver_t *res;
8194	char domainbuf[DNS_NAME_FORMATSIZE];
8195
8196	REQUIRE(DNS_FETCH_VALID(fetch));
8197	fctx = fetch->private;
8198	REQUIRE(VALID_FCTX(fctx));
8199	res = fctx->res;
8200
8201	LOCK(&res->buckets[fctx->bucketnum].lock);
8202
8203	INSIST(fctx->exitline >= 0);
8204	if (!fctx->logged || duplicateok) {
8205		dns_name_format(&fctx->domain, domainbuf, sizeof(domainbuf));
8206		isc_log_write(lctx, category, module, level,
8207			      "fetch completed at %s:%d for %s in "
8208			      "%" ISC_PRINT_QUADFORMAT "u."
8209			      "%06" ISC_PRINT_QUADFORMAT "u: %s/%s "
8210			      "[domain:%s,referral:%u,restart:%u,qrysent:%u,"
8211			      "timeout:%u,lame:%u,neterr:%u,badresp:%u,"
8212			      "adberr:%u,findfail:%u,valfail:%u]",
8213			      __FILE__, fctx->exitline, fctx->info,
8214			      fctx->duration / 1000000,
8215			      fctx->duration % 1000000,
8216			      isc_result_totext(fctx->result),
8217			      isc_result_totext(fctx->vresult), domainbuf,
8218			      fctx->referrals, fctx->restarts,
8219			      fctx->querysent, fctx->timeouts, fctx->lamecount,
8220			      fctx->neterr, fctx->badresp, fctx->adberr,
8221			      fctx->findfail, fctx->valfail);
8222		fctx->logged = ISC_TRUE;
8223	}
8224
8225	UNLOCK(&res->buckets[fctx->bucketnum].lock);
8226}
8227
8228dns_dispatchmgr_t *
8229dns_resolver_dispatchmgr(dns_resolver_t *resolver) {
8230	REQUIRE(VALID_RESOLVER(resolver));
8231	return (resolver->dispatchmgr);
8232}
8233
8234dns_dispatch_t *
8235dns_resolver_dispatchv4(dns_resolver_t *resolver) {
8236	REQUIRE(VALID_RESOLVER(resolver));
8237	return (resolver->dispatchv4);
8238}
8239
8240dns_dispatch_t *
8241dns_resolver_dispatchv6(dns_resolver_t *resolver) {
8242	REQUIRE(VALID_RESOLVER(resolver));
8243	return (resolver->dispatchv6);
8244}
8245
8246isc_socketmgr_t *
8247dns_resolver_socketmgr(dns_resolver_t *resolver) {
8248	REQUIRE(VALID_RESOLVER(resolver));
8249	return (resolver->socketmgr);
8250}
8251
8252isc_taskmgr_t *
8253dns_resolver_taskmgr(dns_resolver_t *resolver) {
8254	REQUIRE(VALID_RESOLVER(resolver));
8255	return (resolver->taskmgr);
8256}
8257
8258isc_uint32_t
8259dns_resolver_getlamettl(dns_resolver_t *resolver) {
8260	REQUIRE(VALID_RESOLVER(resolver));
8261	return (resolver->lame_ttl);
8262}
8263
8264void
8265dns_resolver_setlamettl(dns_resolver_t *resolver, isc_uint32_t lame_ttl) {
8266	REQUIRE(VALID_RESOLVER(resolver));
8267	resolver->lame_ttl = lame_ttl;
8268}
8269
8270unsigned int
8271dns_resolver_nrunning(dns_resolver_t *resolver) {
8272	unsigned int n;
8273	LOCK(&resolver->nlock);
8274	n = resolver->nfctx;
8275	UNLOCK(&resolver->nlock);
8276	return (n);
8277}
8278
8279isc_result_t
8280dns_resolver_addalternate(dns_resolver_t *resolver, isc_sockaddr_t *alt,
8281			  dns_name_t *name, in_port_t port) {
8282	alternate_t *a;
8283	isc_result_t result;
8284
8285	REQUIRE(VALID_RESOLVER(resolver));
8286	REQUIRE(!resolver->frozen);
8287	REQUIRE((alt == NULL) ^ (name == NULL));
8288
8289	a = isc_mem_get(resolver->mctx, sizeof(*a));
8290	if (a == NULL)
8291		return (ISC_R_NOMEMORY);
8292	if (alt != NULL) {
8293		a->isaddress = ISC_TRUE;
8294		a->_u.addr = *alt;
8295	} else {
8296		a->isaddress = ISC_FALSE;
8297		a->_u._n.port = port;
8298		dns_name_init(&a->_u._n.name, NULL);
8299		result = dns_name_dup(name, resolver->mctx, &a->_u._n.name);
8300		if (result != ISC_R_SUCCESS) {
8301			isc_mem_put(resolver->mctx, a, sizeof(*a));
8302			return (result);
8303		}
8304	}
8305	ISC_LINK_INIT(a, link);
8306	ISC_LIST_APPEND(resolver->alternates, a, link);
8307
8308	return (ISC_R_SUCCESS);
8309}
8310
8311void
8312dns_resolver_setudpsize(dns_resolver_t *resolver, isc_uint16_t udpsize) {
8313	REQUIRE(VALID_RESOLVER(resolver));
8314	resolver->udpsize = udpsize;
8315}
8316
8317isc_uint16_t
8318dns_resolver_getudpsize(dns_resolver_t *resolver) {
8319	REQUIRE(VALID_RESOLVER(resolver));
8320	return (resolver->udpsize);
8321}
8322
8323void
8324dns_resolver_flushbadcache(dns_resolver_t *resolver, dns_name_t *name) {
8325	unsigned int i;
8326	dns_badcache_t *bad, *prev, *next;
8327
8328	REQUIRE(VALID_RESOLVER(resolver));
8329
8330	LOCK(&resolver->lock);
8331	if (resolver->badcache == NULL)
8332		goto unlock;
8333
8334	if (name != NULL) {
8335		isc_time_t now;
8336		isc_result_t result;
8337		result = isc_time_now(&now);
8338		if (result != ISC_R_SUCCESS)
8339			isc_time_settoepoch(&now);
8340		i = dns_name_hash(name, ISC_FALSE) % resolver->badhash;
8341		prev = NULL;
8342		for (bad = resolver->badcache[i]; bad != NULL; bad = next) {
8343			int n;
8344			next = bad->next;
8345			n = isc_time_compare(&bad->expire, &now);
8346			if (n < 0 || dns_name_equal(name, &bad->name)) {
8347				if (prev == NULL)
8348					resolver->badcache[i] = bad->next;
8349				else
8350					prev->next = bad->next;
8351				isc_mem_put(resolver->mctx, bad, sizeof(*bad) +
8352					    bad->name.length);
8353				resolver->badcount--;
8354			} else
8355				prev = bad;
8356		}
8357	} else
8358		destroy_badcache(resolver);
8359
8360 unlock:
8361	UNLOCK(&resolver->lock);
8362
8363}
8364
8365static void
8366resizehash(dns_resolver_t *resolver, isc_time_t *now, isc_boolean_t grow) {
8367	unsigned int newsize;
8368	dns_badcache_t **new, *bad, *next;
8369	unsigned int i;
8370
8371	if (grow)
8372		newsize = resolver->badhash * 2 + 1;
8373	else
8374		newsize = (resolver->badhash - 1) / 2;
8375
8376	new = isc_mem_get(resolver->mctx,
8377			  sizeof(*resolver->badcache) * newsize);
8378	if (new == NULL)
8379		return;
8380	memset(new, 0, sizeof(*resolver->badcache) * newsize);
8381	for (i = 0; i < resolver->badhash; i++) {
8382		for (bad = resolver->badcache[i]; bad != NULL; bad = next) {
8383			next = bad->next;
8384			if (isc_time_compare(&bad->expire, now) < 0) {
8385				isc_mem_put(resolver->mctx, bad, sizeof(*bad) +
8386					    bad->name.length);
8387				resolver->badcount--;
8388			} else {
8389				bad->next = new[bad->hashval % newsize];
8390				new[bad->hashval % newsize] = bad;
8391			}
8392		}
8393	}
8394	isc_mem_put(resolver->mctx, resolver->badcache,
8395		    sizeof(*resolver->badcache) * resolver->badhash);
8396	resolver->badhash = newsize;
8397	resolver->badcache = new;
8398}
8399
8400void
8401dns_resolver_addbadcache(dns_resolver_t *resolver, dns_name_t *name,
8402			 dns_rdatatype_t type, isc_time_t *expire)
8403{
8404	isc_time_t now;
8405	isc_result_t result = ISC_R_SUCCESS;
8406	unsigned int i, hashval;
8407	dns_badcache_t *bad, *prev, *next;
8408
8409	REQUIRE(VALID_RESOLVER(resolver));
8410
8411	LOCK(&resolver->lock);
8412	if (resolver->badcache == NULL) {
8413		resolver->badcache = isc_mem_get(resolver->mctx,
8414						 sizeof(*resolver->badcache) *
8415						 DNS_BADCACHE_SIZE);
8416		if (resolver->badcache == NULL)
8417			goto cleanup;
8418		resolver->badhash = DNS_BADCACHE_SIZE;
8419		memset(resolver->badcache, 0, sizeof(*resolver->badcache) *
8420		       resolver->badhash);
8421	}
8422
8423	result = isc_time_now(&now);
8424	if (result != ISC_R_SUCCESS)
8425		isc_time_settoepoch(&now);
8426	hashval = dns_name_hash(name, ISC_FALSE);
8427	i = hashval % resolver->badhash;
8428	prev = NULL;
8429	for (bad = resolver->badcache[i]; bad != NULL; bad = next) {
8430		next = bad->next;
8431		if (bad->type == type && dns_name_equal(name, &bad->name))
8432			break;
8433		if (isc_time_compare(&bad->expire, &now) < 0) {
8434			if (prev == NULL)
8435				resolver->badcache[i] = bad->next;
8436			else
8437				prev->next = bad->next;
8438			isc_mem_put(resolver->mctx, bad, sizeof(*bad) +
8439				    bad->name.length);
8440			resolver->badcount--;
8441		} else
8442			prev = bad;
8443	}
8444	if (bad == NULL) {
8445		isc_buffer_t buffer;
8446		bad = isc_mem_get(resolver->mctx, sizeof(*bad) + name->length);
8447		if (bad == NULL)
8448			goto cleanup;
8449		bad->type = type;
8450		bad->hashval = hashval;
8451		isc_buffer_init(&buffer, bad + 1, name->length);
8452		dns_name_init(&bad->name, NULL);
8453		dns_name_copy(name, &bad->name, &buffer);
8454		bad->next = resolver->badcache[i];
8455		resolver->badcache[i] = bad;
8456		resolver->badcount++;
8457		if (resolver->badcount > resolver->badhash * 8)
8458			resizehash(resolver, &now, ISC_TRUE);
8459		if (resolver->badcount < resolver->badhash * 2 &&
8460		    resolver->badhash > DNS_BADCACHE_SIZE)
8461			resizehash(resolver, &now, ISC_FALSE);
8462	}
8463	bad->expire = *expire;
8464 cleanup:
8465	UNLOCK(&resolver->lock);
8466}
8467
8468isc_boolean_t
8469dns_resolver_getbadcache(dns_resolver_t *resolver, dns_name_t *name,
8470			 dns_rdatatype_t type, isc_time_t *now)
8471{
8472	dns_badcache_t *bad, *prev, *next;
8473	isc_boolean_t answer = ISC_FALSE;
8474	unsigned int i;
8475
8476	REQUIRE(VALID_RESOLVER(resolver));
8477
8478	LOCK(&resolver->lock);
8479	if (resolver->badcache == NULL)
8480		goto unlock;
8481
8482	i = dns_name_hash(name, ISC_FALSE) % resolver->badhash;
8483	prev = NULL;
8484	for (bad = resolver->badcache[i]; bad != NULL; bad = next) {
8485		next = bad->next;
8486		/*
8487		 * Search the hash list. Clean out expired records as we go.
8488		 */
8489		if (isc_time_compare(&bad->expire, now) < 0) {
8490			if (prev != NULL)
8491				prev->next = bad->next;
8492			else
8493				resolver->badcache[i] = bad->next;
8494			isc_mem_put(resolver->mctx, bad, sizeof(*bad) +
8495				    bad->name.length);
8496			resolver->badcount--;
8497			continue;
8498		}
8499		if (bad->type == type && dns_name_equal(name, &bad->name)) {
8500			answer = ISC_TRUE;
8501			break;
8502		}
8503		prev = bad;
8504	}
8505
8506	/*
8507	 * Slow sweep to clean out stale records.
8508	 */
8509	i = resolver->badsweep++ % resolver->badhash;
8510	bad = resolver->badcache[i];
8511	if (bad != NULL && isc_time_compare(&bad->expire, now) < 0) {
8512		resolver->badcache[i] = bad->next;
8513		isc_mem_put(resolver->mctx, bad, sizeof(*bad) +
8514			    bad->name.length);
8515		resolver->badcount--;
8516	}
8517
8518 unlock:
8519	UNLOCK(&resolver->lock);
8520	return (answer);
8521}
8522
8523void
8524dns_resolver_printbadcache(dns_resolver_t *resolver, FILE *fp) {
8525	char namebuf[DNS_NAME_FORMATSIZE];
8526	char typebuf[DNS_RDATATYPE_FORMATSIZE];
8527	dns_badcache_t *bad, *next, *prev;
8528	isc_time_t now;
8529	unsigned int i;
8530	isc_uint64_t t;
8531
8532	LOCK(&resolver->lock);
8533	fprintf(fp, ";\n; Bad cache\n;\n");
8534
8535	if (resolver->badcache == NULL)
8536		goto unlock;
8537
8538	TIME_NOW(&now);
8539	for (i = 0; i < resolver->badhash; i++) {
8540		prev = NULL;
8541		for (bad = resolver->badcache[i]; bad != NULL; bad = next) {
8542			next = bad->next;
8543			if (isc_time_compare(&bad->expire, &now) < 0) {
8544				if (prev != NULL)
8545					prev->next = bad->next;
8546				else
8547					resolver->badcache[i] = bad->next;
8548				isc_mem_put(resolver->mctx, bad, sizeof(*bad) +
8549					    bad->name.length);
8550				resolver->badcount--;
8551				continue;
8552			}
8553			prev = bad;
8554			dns_name_format(&bad->name, namebuf, sizeof(namebuf));
8555			dns_rdatatype_format(bad->type, typebuf,
8556					     sizeof(typebuf));
8557			t = isc_time_microdiff(&bad->expire, &now);
8558			t /= 1000;
8559			fprintf(fp, "; %s/%s [ttl "
8560				"%" ISC_PLATFORM_QUADFORMAT "u]\n",
8561				namebuf, typebuf, t);
8562		}
8563	}
8564
8565 unlock:
8566	UNLOCK(&resolver->lock);
8567}
8568
8569static void
8570free_algorithm(void *node, void *arg) {
8571	unsigned char *algorithms = node;
8572	isc_mem_t *mctx = arg;
8573
8574	isc_mem_put(mctx, algorithms, *algorithms);
8575}
8576
8577void
8578dns_resolver_reset_algorithms(dns_resolver_t *resolver) {
8579
8580	REQUIRE(VALID_RESOLVER(resolver));
8581
8582#if USE_ALGLOCK
8583	RWLOCK(&resolver->alglock, isc_rwlocktype_write);
8584#endif
8585	if (resolver->algorithms != NULL)
8586		dns_rbt_destroy(&resolver->algorithms);
8587#if USE_ALGLOCK
8588	RWUNLOCK(&resolver->alglock, isc_rwlocktype_write);
8589#endif
8590}
8591
8592isc_result_t
8593dns_resolver_disable_algorithm(dns_resolver_t *resolver, dns_name_t *name,
8594			       unsigned int alg)
8595{
8596	unsigned int len, mask;
8597	unsigned char *new;
8598	unsigned char *algorithms;
8599	isc_result_t result;
8600	dns_rbtnode_t *node = NULL;
8601
8602	REQUIRE(VALID_RESOLVER(resolver));
8603	if (alg > 255)
8604		return (ISC_R_RANGE);
8605
8606#if USE_ALGLOCK
8607	RWLOCK(&resolver->alglock, isc_rwlocktype_write);
8608#endif
8609	if (resolver->algorithms == NULL) {
8610		result = dns_rbt_create(resolver->mctx, free_algorithm,
8611					resolver->mctx, &resolver->algorithms);
8612		if (result != ISC_R_SUCCESS)
8613			goto cleanup;
8614	}
8615
8616	len = alg/8 + 2;
8617	mask = 1 << (alg%8);
8618
8619	result = dns_rbt_addnode(resolver->algorithms, name, &node);
8620
8621	if (result == ISC_R_SUCCESS || result == ISC_R_EXISTS) {
8622		algorithms = node->data;
8623		if (algorithms == NULL || len > *algorithms) {
8624			new = isc_mem_get(resolver->mctx, len);
8625			if (new == NULL) {
8626				result = ISC_R_NOMEMORY;
8627				goto cleanup;
8628			}
8629			memset(new, 0, len);
8630			if (algorithms != NULL)
8631				memcpy(new, algorithms, *algorithms);
8632			new[len-1] |= mask;
8633			*new = len;
8634			node->data = new;
8635			if (algorithms != NULL)
8636				isc_mem_put(resolver->mctx, algorithms,
8637					    *algorithms);
8638		} else
8639			algorithms[len-1] |= mask;
8640	}
8641	result = ISC_R_SUCCESS;
8642 cleanup:
8643#if USE_ALGLOCK
8644	RWUNLOCK(&resolver->alglock, isc_rwlocktype_write);
8645#endif
8646	return (result);
8647}
8648
8649isc_boolean_t
8650dns_resolver_algorithm_supported(dns_resolver_t *resolver, dns_name_t *name,
8651				 unsigned int alg)
8652{
8653	unsigned int len, mask;
8654	unsigned char *algorithms;
8655	void *data = NULL;
8656	isc_result_t result;
8657	isc_boolean_t found = ISC_FALSE;
8658
8659	REQUIRE(VALID_RESOLVER(resolver));
8660
8661#if USE_ALGLOCK
8662	RWLOCK(&resolver->alglock, isc_rwlocktype_read);
8663#endif
8664	if (resolver->algorithms == NULL)
8665		goto unlock;
8666	result = dns_rbt_findname(resolver->algorithms, name, 0, NULL, &data);
8667	if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
8668		len = alg/8 + 2;
8669		mask = 1 << (alg%8);
8670		algorithms = data;
8671		if (len <= *algorithms && (algorithms[len-1] & mask) != 0)
8672			found = ISC_TRUE;
8673	}
8674 unlock:
8675#if USE_ALGLOCK
8676	RWUNLOCK(&resolver->alglock, isc_rwlocktype_read);
8677#endif
8678	if (found)
8679		return (ISC_FALSE);
8680	return (dst_algorithm_supported(alg));
8681}
8682
8683isc_boolean_t
8684dns_resolver_digest_supported(dns_resolver_t *resolver, unsigned int digest) {
8685
8686	UNUSED(resolver);
8687	return (dns_ds_digest_supported(digest));
8688}
8689
8690void
8691dns_resolver_resetmustbesecure(dns_resolver_t *resolver) {
8692
8693	REQUIRE(VALID_RESOLVER(resolver));
8694
8695#if USE_MBSLOCK
8696	RWLOCK(&resolver->mbslock, isc_rwlocktype_write);
8697#endif
8698	if (resolver->mustbesecure != NULL)
8699		dns_rbt_destroy(&resolver->mustbesecure);
8700#if USE_MBSLOCK
8701	RWUNLOCK(&resolver->mbslock, isc_rwlocktype_write);
8702#endif
8703}
8704
8705static isc_boolean_t yes = ISC_TRUE, no = ISC_FALSE;
8706
8707isc_result_t
8708dns_resolver_setmustbesecure(dns_resolver_t *resolver, dns_name_t *name,
8709			     isc_boolean_t value)
8710{
8711	isc_result_t result;
8712
8713	REQUIRE(VALID_RESOLVER(resolver));
8714
8715#if USE_MBSLOCK
8716	RWLOCK(&resolver->mbslock, isc_rwlocktype_write);
8717#endif
8718	if (resolver->mustbesecure == NULL) {
8719		result = dns_rbt_create(resolver->mctx, NULL, NULL,
8720					&resolver->mustbesecure);
8721		if (result != ISC_R_SUCCESS)
8722			goto cleanup;
8723	}
8724	result = dns_rbt_addname(resolver->mustbesecure, name,
8725				 value ? &yes : &no);
8726 cleanup:
8727#if USE_MBSLOCK
8728	RWUNLOCK(&resolver->mbslock, isc_rwlocktype_write);
8729#endif
8730	return (result);
8731}
8732
8733isc_boolean_t
8734dns_resolver_getmustbesecure(dns_resolver_t *resolver, dns_name_t *name) {
8735	void *data = NULL;
8736	isc_boolean_t value = ISC_FALSE;
8737	isc_result_t result;
8738
8739	REQUIRE(VALID_RESOLVER(resolver));
8740
8741#if USE_MBSLOCK
8742	RWLOCK(&resolver->mbslock, isc_rwlocktype_read);
8743#endif
8744	if (resolver->mustbesecure == NULL)
8745		goto unlock;
8746	result = dns_rbt_findname(resolver->mustbesecure, name, 0, NULL, &data);
8747	if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH)
8748		value = *(isc_boolean_t*)data;
8749 unlock:
8750#if USE_MBSLOCK
8751	RWUNLOCK(&resolver->mbslock, isc_rwlocktype_read);
8752#endif
8753	return (value);
8754}
8755
8756void
8757dns_resolver_getclientsperquery(dns_resolver_t *resolver, isc_uint32_t *cur,
8758				isc_uint32_t *min, isc_uint32_t *max)
8759{
8760	REQUIRE(VALID_RESOLVER(resolver));
8761
8762	LOCK(&resolver->lock);
8763	if (cur != NULL)
8764		*cur = resolver->spillat;
8765	if (min != NULL)
8766		*min = resolver->spillatmin;
8767	if (max != NULL)
8768		*max = resolver->spillatmax;
8769	UNLOCK(&resolver->lock);
8770}
8771
8772void
8773dns_resolver_setclientsperquery(dns_resolver_t *resolver, isc_uint32_t min,
8774				isc_uint32_t max)
8775{
8776	REQUIRE(VALID_RESOLVER(resolver));
8777
8778	LOCK(&resolver->lock);
8779	resolver->spillatmin = resolver->spillat = min;
8780	resolver->spillatmax = max;
8781	UNLOCK(&resolver->lock);
8782}
8783
8784isc_boolean_t
8785dns_resolver_getzeronosoattl(dns_resolver_t *resolver) {
8786	REQUIRE(VALID_RESOLVER(resolver));
8787
8788	return (resolver->zero_no_soa_ttl);
8789}
8790
8791void
8792dns_resolver_setzeronosoattl(dns_resolver_t *resolver, isc_boolean_t state) {
8793	REQUIRE(VALID_RESOLVER(resolver));
8794
8795	resolver->zero_no_soa_ttl = state;
8796}
8797
8798unsigned int
8799dns_resolver_getoptions(dns_resolver_t *resolver) {
8800	REQUIRE(VALID_RESOLVER(resolver));
8801
8802	return (resolver->options);
8803}
8804
8805unsigned int
8806dns_resolver_gettimeout(dns_resolver_t *resolver) {
8807	REQUIRE(VALID_RESOLVER(resolver));
8808
8809	return (resolver->query_timeout);
8810}
8811
8812void
8813dns_resolver_settimeout(dns_resolver_t *resolver, unsigned int seconds) {
8814	REQUIRE(VALID_RESOLVER(resolver));
8815
8816	if (seconds == 0)
8817		seconds = DEFAULT_QUERY_TIMEOUT;
8818	if (seconds > MAXIMUM_QUERY_TIMEOUT)
8819		seconds = MAXIMUM_QUERY_TIMEOUT;
8820
8821	resolver->query_timeout = seconds;
8822}
8823