iterator.c revision 356345
1/*
2 * iterator/iterator.c - iterative resolver DNS query response module
3 *
4 * Copyright (c) 2007, NLnet Labs. All rights reserved.
5 *
6 * This software is open source.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
14 *
15 * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 *
19 * Neither the name of the NLNET LABS nor the names of its contributors may
20 * be used to endorse or promote products derived from this software without
21 * specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35
36/**
37 * \file
38 *
39 * This file contains a module that performs recursive iterative DNS query
40 * processing.
41 */
42
43#include "config.h"
44#include "iterator/iterator.h"
45#include "iterator/iter_utils.h"
46#include "iterator/iter_hints.h"
47#include "iterator/iter_fwd.h"
48#include "iterator/iter_donotq.h"
49#include "iterator/iter_delegpt.h"
50#include "iterator/iter_resptype.h"
51#include "iterator/iter_scrub.h"
52#include "iterator/iter_priv.h"
53#include "validator/val_neg.h"
54#include "services/cache/dns.h"
55#include "services/cache/infra.h"
56#include "services/authzone.h"
57#include "util/module.h"
58#include "util/netevent.h"
59#include "util/net_help.h"
60#include "util/regional.h"
61#include "util/data/dname.h"
62#include "util/data/msgencode.h"
63#include "util/fptr_wlist.h"
64#include "util/config_file.h"
65#include "util/random.h"
66#include "sldns/rrdef.h"
67#include "sldns/wire2str.h"
68#include "sldns/str2wire.h"
69#include "sldns/parseutil.h"
70#include "sldns/sbuffer.h"
71
72/* in msec */
73int UNKNOWN_SERVER_NICENESS = 376;
74
75int
76iter_init(struct module_env* env, int id)
77{
78	struct iter_env* iter_env = (struct iter_env*)calloc(1,
79		sizeof(struct iter_env));
80	if(!iter_env) {
81		log_err("malloc failure");
82		return 0;
83	}
84	env->modinfo[id] = (void*)iter_env;
85
86	lock_basic_init(&iter_env->queries_ratelimit_lock);
87	lock_protect(&iter_env->queries_ratelimit_lock,
88			&iter_env->num_queries_ratelimited,
89		sizeof(iter_env->num_queries_ratelimited));
90
91	if(!iter_apply_cfg(iter_env, env->cfg)) {
92		log_err("iterator: could not apply configuration settings.");
93		return 0;
94	}
95
96	return 1;
97}
98
99/** delete caps_whitelist element */
100static void
101caps_free(struct rbnode_type* n, void* ATTR_UNUSED(d))
102{
103	if(n) {
104		free(((struct name_tree_node*)n)->name);
105		free(n);
106	}
107}
108
109void
110iter_deinit(struct module_env* env, int id)
111{
112	struct iter_env* iter_env;
113	if(!env || !env->modinfo[id])
114		return;
115	iter_env = (struct iter_env*)env->modinfo[id];
116	lock_basic_destroy(&iter_env->queries_ratelimit_lock);
117	free(iter_env->target_fetch_policy);
118	priv_delete(iter_env->priv);
119	donotq_delete(iter_env->donotq);
120	if(iter_env->caps_white) {
121		traverse_postorder(iter_env->caps_white, caps_free, NULL);
122		free(iter_env->caps_white);
123	}
124	free(iter_env);
125	env->modinfo[id] = NULL;
126}
127
128/** new query for iterator */
129static int
130iter_new(struct module_qstate* qstate, int id)
131{
132	struct iter_qstate* iq = (struct iter_qstate*)regional_alloc(
133		qstate->region, sizeof(struct iter_qstate));
134	qstate->minfo[id] = iq;
135	if(!iq)
136		return 0;
137	memset(iq, 0, sizeof(*iq));
138	iq->state = INIT_REQUEST_STATE;
139	iq->final_state = FINISHED_STATE;
140	iq->an_prepend_list = NULL;
141	iq->an_prepend_last = NULL;
142	iq->ns_prepend_list = NULL;
143	iq->ns_prepend_last = NULL;
144	iq->dp = NULL;
145	iq->depth = 0;
146	iq->num_target_queries = 0;
147	iq->num_current_queries = 0;
148	iq->query_restart_count = 0;
149	iq->referral_count = 0;
150	iq->sent_count = 0;
151	iq->ratelimit_ok = 0;
152	iq->target_count = NULL;
153	iq->wait_priming_stub = 0;
154	iq->refetch_glue = 0;
155	iq->dnssec_expected = 0;
156	iq->dnssec_lame_query = 0;
157	iq->chase_flags = qstate->query_flags;
158	/* Start with the (current) qname. */
159	iq->qchase = qstate->qinfo;
160	outbound_list_init(&iq->outlist);
161	iq->minimise_count = 0;
162	iq->minimise_timeout_count = 0;
163	if (qstate->env->cfg->qname_minimisation)
164		iq->minimisation_state = INIT_MINIMISE_STATE;
165	else
166		iq->minimisation_state = DONOT_MINIMISE_STATE;
167
168	memset(&iq->qinfo_out, 0, sizeof(struct query_info));
169	return 1;
170}
171
172/**
173 * Transition to the next state. This can be used to advance a currently
174 * processing event. It cannot be used to reactivate a forEvent.
175 *
176 * @param iq: iterator query state
177 * @param nextstate The state to transition to.
178 * @return true. This is so this can be called as the return value for the
179 *         actual process*State() methods. (Transitioning to the next state
180 *         implies further processing).
181 */
182static int
183next_state(struct iter_qstate* iq, enum iter_state nextstate)
184{
185	/* If transitioning to a "response" state, make sure that there is a
186	 * response */
187	if(iter_state_is_responsestate(nextstate)) {
188		if(iq->response == NULL) {
189			log_err("transitioning to response state sans "
190				"response.");
191		}
192	}
193	iq->state = nextstate;
194	return 1;
195}
196
197/**
198 * Transition an event to its final state. Final states always either return
199 * a result up the module chain, or reactivate a dependent event. Which
200 * final state to transition to is set in the module state for the event when
201 * it was created, and depends on the original purpose of the event.
202 *
203 * The response is stored in the qstate->buf buffer.
204 *
205 * @param iq: iterator query state
206 * @return false. This is so this method can be used as the return value for
207 *         the processState methods. (Transitioning to the final state
208 */
209static int
210final_state(struct iter_qstate* iq)
211{
212	return next_state(iq, iq->final_state);
213}
214
215/**
216 * Callback routine to handle errors in parent query states
217 * @param qstate: query state that failed.
218 * @param id: module id.
219 * @param super: super state.
220 */
221static void
222error_supers(struct module_qstate* qstate, int id, struct module_qstate* super)
223{
224	struct iter_qstate* super_iq = (struct iter_qstate*)super->minfo[id];
225
226	if(qstate->qinfo.qtype == LDNS_RR_TYPE_A ||
227		qstate->qinfo.qtype == LDNS_RR_TYPE_AAAA) {
228		/* mark address as failed. */
229		struct delegpt_ns* dpns = NULL;
230		super_iq->num_target_queries--;
231		if(super_iq->dp)
232			dpns = delegpt_find_ns(super_iq->dp,
233				qstate->qinfo.qname, qstate->qinfo.qname_len);
234		if(!dpns) {
235			/* not interested */
236			/* this can happen, for eg. qname minimisation asked
237			 * for an NXDOMAIN to be validated, and used qtype
238			 * A for that, and the error of that, the name, is
239			 * not listed in super_iq->dp */
240			verbose(VERB_ALGO, "subq error, but not interested");
241			log_query_info(VERB_ALGO, "superq", &super->qinfo);
242			return;
243		} else {
244			/* see if the failure did get (parent-lame) info */
245			if(!cache_fill_missing(super->env, super_iq->qchase.qclass,
246				super->region, super_iq->dp))
247				log_err("out of memory adding missing");
248		}
249		dpns->resolved = 1; /* mark as failed */
250	}
251	if(qstate->qinfo.qtype == LDNS_RR_TYPE_NS) {
252		/* prime failed to get delegation */
253		super_iq->dp = NULL;
254	}
255	/* evaluate targets again */
256	super_iq->state = QUERYTARGETS_STATE;
257	/* super becomes runnable, and will process this change */
258}
259
260/**
261 * Return an error to the client
262 * @param qstate: our query state
263 * @param id: module id
264 * @param rcode: error code (DNS errcode).
265 * @return: 0 for use by caller, to make notation easy, like:
266 * 	return error_response(..).
267 */
268static int
269error_response(struct module_qstate* qstate, int id, int rcode)
270{
271	verbose(VERB_QUERY, "return error response %s",
272		sldns_lookup_by_id(sldns_rcodes, rcode)?
273		sldns_lookup_by_id(sldns_rcodes, rcode)->name:"??");
274	qstate->return_rcode = rcode;
275	qstate->return_msg = NULL;
276	qstate->ext_state[id] = module_finished;
277	return 0;
278}
279
280/**
281 * Return an error to the client and cache the error code in the
282 * message cache (so per qname, qtype, qclass).
283 * @param qstate: our query state
284 * @param id: module id
285 * @param rcode: error code (DNS errcode).
286 * @return: 0 for use by caller, to make notation easy, like:
287 * 	return error_response(..).
288 */
289static int
290error_response_cache(struct module_qstate* qstate, int id, int rcode)
291{
292	if(!qstate->no_cache_store) {
293		/* store in cache */
294		struct reply_info err;
295		if(qstate->prefetch_leeway > NORR_TTL) {
296			verbose(VERB_ALGO, "error response for prefetch in cache");
297			/* attempt to adjust the cache entry prefetch */
298			if(dns_cache_prefetch_adjust(qstate->env, &qstate->qinfo,
299				NORR_TTL, qstate->query_flags))
300				return error_response(qstate, id, rcode);
301			/* if that fails (not in cache), fall through to store err */
302		}
303		if(qstate->env->cfg->serve_expired) {
304			/* if serving expired contents, and such content is
305			 * already available, don't overwrite this servfail */
306			struct msgreply_entry* msg;
307			if((msg=msg_cache_lookup(qstate->env,
308				qstate->qinfo.qname, qstate->qinfo.qname_len,
309				qstate->qinfo.qtype, qstate->qinfo.qclass,
310				qstate->query_flags, 0,
311				qstate->env->cfg->serve_expired_ttl_reset))
312				!= NULL) {
313				if(qstate->env->cfg->serve_expired_ttl_reset) {
314					struct reply_info* rep =
315						(struct reply_info*)msg->entry.data;
316					if(rep && *qstate->env->now +
317						qstate->env->cfg->serve_expired_ttl  >
318						rep->serve_expired_ttl) {
319						rep->serve_expired_ttl =
320							*qstate->env->now +
321							qstate->env->cfg->serve_expired_ttl;
322					}
323				}
324				lock_rw_unlock(&msg->entry.lock);
325				return error_response(qstate, id, rcode);
326			}
327			/* serving expired contents, but nothing is cached
328			 * at all, so the servfail cache entry is useful
329			 * (stops waste of time on this servfail NORR_TTL) */
330		} else {
331			/* don't overwrite existing (non-expired) data in
332			 * cache with a servfail */
333			struct msgreply_entry* msg;
334			if((msg=msg_cache_lookup(qstate->env,
335				qstate->qinfo.qname, qstate->qinfo.qname_len,
336				qstate->qinfo.qtype, qstate->qinfo.qclass,
337				qstate->query_flags, *qstate->env->now, 0))
338				!= NULL) {
339				struct reply_info* rep = (struct reply_info*)
340					msg->entry.data;
341				if(FLAGS_GET_RCODE(rep->flags) ==
342					LDNS_RCODE_NOERROR ||
343					FLAGS_GET_RCODE(rep->flags) ==
344					LDNS_RCODE_NXDOMAIN) {
345					/* we have a good entry,
346					 * don't overwrite */
347					lock_rw_unlock(&msg->entry.lock);
348					return error_response(qstate, id, rcode);
349				}
350				lock_rw_unlock(&msg->entry.lock);
351			}
352
353		}
354		memset(&err, 0, sizeof(err));
355		err.flags = (uint16_t)(BIT_QR | BIT_RA);
356		FLAGS_SET_RCODE(err.flags, rcode);
357		err.qdcount = 1;
358		err.ttl = NORR_TTL;
359		err.prefetch_ttl = PREFETCH_TTL_CALC(err.ttl);
360		err.serve_expired_ttl = NORR_TTL;
361		/* do not waste time trying to validate this servfail */
362		err.security = sec_status_indeterminate;
363		verbose(VERB_ALGO, "store error response in message cache");
364		iter_dns_store(qstate->env, &qstate->qinfo, &err, 0, 0, 0, NULL,
365			qstate->query_flags);
366	}
367	return error_response(qstate, id, rcode);
368}
369
370/** check if prepend item is duplicate item */
371static int
372prepend_is_duplicate(struct ub_packed_rrset_key** sets, size_t to,
373	struct ub_packed_rrset_key* dup)
374{
375	size_t i;
376	for(i=0; i<to; i++) {
377		if(sets[i]->rk.type == dup->rk.type &&
378			sets[i]->rk.rrset_class == dup->rk.rrset_class &&
379			sets[i]->rk.dname_len == dup->rk.dname_len &&
380			query_dname_compare(sets[i]->rk.dname, dup->rk.dname)
381			== 0)
382			return 1;
383	}
384	return 0;
385}
386
387/** prepend the prepend list in the answer and authority section of dns_msg */
388static int
389iter_prepend(struct iter_qstate* iq, struct dns_msg* msg,
390	struct regional* region)
391{
392	struct iter_prep_list* p;
393	struct ub_packed_rrset_key** sets;
394	size_t num_an = 0, num_ns = 0;;
395	for(p = iq->an_prepend_list; p; p = p->next)
396		num_an++;
397	for(p = iq->ns_prepend_list; p; p = p->next)
398		num_ns++;
399	if(num_an + num_ns == 0)
400		return 1;
401	verbose(VERB_ALGO, "prepending %d rrsets", (int)num_an + (int)num_ns);
402	if(num_an > RR_COUNT_MAX || num_ns > RR_COUNT_MAX ||
403		msg->rep->rrset_count > RR_COUNT_MAX) return 0; /* overflow */
404	sets = regional_alloc(region, (num_an+num_ns+msg->rep->rrset_count) *
405		sizeof(struct ub_packed_rrset_key*));
406	if(!sets)
407		return 0;
408	/* ANSWER section */
409	num_an = 0;
410	for(p = iq->an_prepend_list; p; p = p->next) {
411		sets[num_an++] = p->rrset;
412		if(ub_packed_rrset_ttl(p->rrset) < msg->rep->ttl)
413			msg->rep->ttl = ub_packed_rrset_ttl(p->rrset);
414	}
415	memcpy(sets+num_an, msg->rep->rrsets, msg->rep->an_numrrsets *
416		sizeof(struct ub_packed_rrset_key*));
417	/* AUTH section */
418	num_ns = 0;
419	for(p = iq->ns_prepend_list; p; p = p->next) {
420		if(prepend_is_duplicate(sets+msg->rep->an_numrrsets+num_an,
421			num_ns, p->rrset) || prepend_is_duplicate(
422			msg->rep->rrsets+msg->rep->an_numrrsets,
423			msg->rep->ns_numrrsets, p->rrset))
424			continue;
425		sets[msg->rep->an_numrrsets + num_an + num_ns++] = p->rrset;
426		if(ub_packed_rrset_ttl(p->rrset) < msg->rep->ttl)
427			msg->rep->ttl = ub_packed_rrset_ttl(p->rrset);
428	}
429	memcpy(sets + num_an + msg->rep->an_numrrsets + num_ns,
430		msg->rep->rrsets + msg->rep->an_numrrsets,
431		(msg->rep->ns_numrrsets + msg->rep->ar_numrrsets) *
432		sizeof(struct ub_packed_rrset_key*));
433
434	/* NXDOMAIN rcode can stay if we prepended DNAME/CNAMEs, because
435	 * this is what recursors should give. */
436	msg->rep->rrset_count += num_an + num_ns;
437	msg->rep->an_numrrsets += num_an;
438	msg->rep->ns_numrrsets += num_ns;
439	msg->rep->rrsets = sets;
440	return 1;
441}
442
443/**
444 * Find rrset in ANSWER prepend list.
445 * to avoid duplicate DNAMEs when a DNAME is traversed twice.
446 * @param iq: iterator query state.
447 * @param rrset: rrset to add.
448 * @return false if not found
449 */
450static int
451iter_find_rrset_in_prepend_answer(struct iter_qstate* iq,
452	struct ub_packed_rrset_key* rrset)
453{
454	struct iter_prep_list* p = iq->an_prepend_list;
455	while(p) {
456		if(ub_rrset_compare(p->rrset, rrset) == 0 &&
457			rrsetdata_equal((struct packed_rrset_data*)p->rrset
458			->entry.data, (struct packed_rrset_data*)rrset
459			->entry.data))
460			return 1;
461		p = p->next;
462	}
463	return 0;
464}
465
466/**
467 * Add rrset to ANSWER prepend list
468 * @param qstate: query state.
469 * @param iq: iterator query state.
470 * @param rrset: rrset to add.
471 * @return false on failure (malloc).
472 */
473static int
474iter_add_prepend_answer(struct module_qstate* qstate, struct iter_qstate* iq,
475	struct ub_packed_rrset_key* rrset)
476{
477	struct iter_prep_list* p = (struct iter_prep_list*)regional_alloc(
478		qstate->region, sizeof(struct iter_prep_list));
479	if(!p)
480		return 0;
481	p->rrset = rrset;
482	p->next = NULL;
483	/* add at end */
484	if(iq->an_prepend_last)
485		iq->an_prepend_last->next = p;
486	else	iq->an_prepend_list = p;
487	iq->an_prepend_last = p;
488	return 1;
489}
490
491/**
492 * Add rrset to AUTHORITY prepend list
493 * @param qstate: query state.
494 * @param iq: iterator query state.
495 * @param rrset: rrset to add.
496 * @return false on failure (malloc).
497 */
498static int
499iter_add_prepend_auth(struct module_qstate* qstate, struct iter_qstate* iq,
500	struct ub_packed_rrset_key* rrset)
501{
502	struct iter_prep_list* p = (struct iter_prep_list*)regional_alloc(
503		qstate->region, sizeof(struct iter_prep_list));
504	if(!p)
505		return 0;
506	p->rrset = rrset;
507	p->next = NULL;
508	/* add at end */
509	if(iq->ns_prepend_last)
510		iq->ns_prepend_last->next = p;
511	else	iq->ns_prepend_list = p;
512	iq->ns_prepend_last = p;
513	return 1;
514}
515
516/**
517 * Given a CNAME response (defined as a response containing a CNAME or DNAME
518 * that does not answer the request), process the response, modifying the
519 * state as necessary. This follows the CNAME/DNAME chain and returns the
520 * final query name.
521 *
522 * sets the new query name, after following the CNAME/DNAME chain.
523 * @param qstate: query state.
524 * @param iq: iterator query state.
525 * @param msg: the response.
526 * @param mname: returned target new query name.
527 * @param mname_len: length of mname.
528 * @return false on (malloc) error.
529 */
530static int
531handle_cname_response(struct module_qstate* qstate, struct iter_qstate* iq,
532        struct dns_msg* msg, uint8_t** mname, size_t* mname_len)
533{
534	size_t i;
535	/* Start with the (current) qname. */
536	*mname = iq->qchase.qname;
537	*mname_len = iq->qchase.qname_len;
538
539	/* Iterate over the ANSWER rrsets in order, looking for CNAMEs and
540	 * DNAMES. */
541	for(i=0; i<msg->rep->an_numrrsets; i++) {
542		struct ub_packed_rrset_key* r = msg->rep->rrsets[i];
543		/* If there is a (relevant) DNAME, add it to the list.
544		 * We always expect there to be CNAME that was generated
545		 * by this DNAME following, so we don't process the DNAME
546		 * directly.  */
547		if(ntohs(r->rk.type) == LDNS_RR_TYPE_DNAME &&
548			dname_strict_subdomain_c(*mname, r->rk.dname) &&
549			!iter_find_rrset_in_prepend_answer(iq, r)) {
550			if(!iter_add_prepend_answer(qstate, iq, r))
551				return 0;
552			continue;
553		}
554
555		if(ntohs(r->rk.type) == LDNS_RR_TYPE_CNAME &&
556			query_dname_compare(*mname, r->rk.dname) == 0 &&
557			!iter_find_rrset_in_prepend_answer(iq, r)) {
558			/* Add this relevant CNAME rrset to the prepend list.*/
559			if(!iter_add_prepend_answer(qstate, iq, r))
560				return 0;
561			get_cname_target(r, mname, mname_len);
562		}
563
564		/* Other rrsets in the section are ignored. */
565	}
566	/* add authority rrsets to authority prepend, for wildcarded CNAMEs */
567	for(i=msg->rep->an_numrrsets; i<msg->rep->an_numrrsets +
568		msg->rep->ns_numrrsets; i++) {
569		struct ub_packed_rrset_key* r = msg->rep->rrsets[i];
570		/* only add NSEC/NSEC3, as they may be needed for validation */
571		if(ntohs(r->rk.type) == LDNS_RR_TYPE_NSEC ||
572			ntohs(r->rk.type) == LDNS_RR_TYPE_NSEC3) {
573			if(!iter_add_prepend_auth(qstate, iq, r))
574				return 0;
575		}
576	}
577	return 1;
578}
579
580/** see if last resort is possible - does config allow queries to parent */
581static int
582can_have_last_resort(struct module_env* env, uint8_t* nm, size_t nmlen,
583	uint16_t qclass, struct delegpt** retdp)
584{
585	struct delegpt* fwddp;
586	struct iter_hints_stub* stub;
587	int labs = dname_count_labels(nm);
588	/* do not process a last resort (the parent side) if a stub
589	 * or forward is configured, because we do not want to go 'above'
590	 * the configured servers */
591	if(!dname_is_root(nm) && (stub = (struct iter_hints_stub*)
592		name_tree_find(&env->hints->tree, nm, nmlen, labs, qclass)) &&
593		/* has_parent side is turned off for stub_first, where we
594		 * are allowed to go to the parent */
595		stub->dp->has_parent_side_NS) {
596		if(retdp) *retdp = stub->dp;
597		return 0;
598	}
599	if((fwddp = forwards_find(env->fwds, nm, qclass)) &&
600		/* has_parent_side is turned off for forward_first, where
601		 * we are allowed to go to the parent */
602		fwddp->has_parent_side_NS) {
603		if(retdp) *retdp = fwddp;
604		return 0;
605	}
606	return 1;
607}
608
609/** see if target name is caps-for-id whitelisted */
610static int
611is_caps_whitelisted(struct iter_env* ie, struct iter_qstate* iq)
612{
613	if(!ie->caps_white) return 0; /* no whitelist, or no capsforid */
614	return name_tree_lookup(ie->caps_white, iq->qchase.qname,
615		iq->qchase.qname_len, dname_count_labels(iq->qchase.qname),
616		iq->qchase.qclass) != NULL;
617}
618
619/** create target count structure for this query */
620static void
621target_count_create(struct iter_qstate* iq)
622{
623	if(!iq->target_count) {
624		iq->target_count = (int*)calloc(2, sizeof(int));
625		/* if calloc fails we simply do not track this number */
626		if(iq->target_count)
627			iq->target_count[0] = 1;
628	}
629}
630
631static void
632target_count_increase(struct iter_qstate* iq, int num)
633{
634	target_count_create(iq);
635	if(iq->target_count)
636		iq->target_count[1] += num;
637}
638
639/**
640 * Generate a subrequest.
641 * Generate a local request event. Local events are tied to this module, and
642 * have a corresponding (first tier) event that is waiting for this event to
643 * resolve to continue.
644 *
645 * @param qname The query name for this request.
646 * @param qnamelen length of qname
647 * @param qtype The query type for this request.
648 * @param qclass The query class for this request.
649 * @param qstate The event that is generating this event.
650 * @param id: module id.
651 * @param iq: The iterator state that is generating this event.
652 * @param initial_state The initial response state (normally this
653 *          is QUERY_RESP_STATE, unless it is known that the request won't
654 *          need iterative processing
655 * @param finalstate The final state for the response to this request.
656 * @param subq_ret: if newly allocated, the subquerystate, or NULL if it does
657 * 	not need initialisation.
658 * @param v: if true, validation is done on the subquery.
659 * @return false on error (malloc).
660 */
661static int
662generate_sub_request(uint8_t* qname, size_t qnamelen, uint16_t qtype,
663	uint16_t qclass, struct module_qstate* qstate, int id,
664	struct iter_qstate* iq, enum iter_state initial_state,
665	enum iter_state finalstate, struct module_qstate** subq_ret, int v)
666{
667	struct module_qstate* subq = NULL;
668	struct iter_qstate* subiq = NULL;
669	uint16_t qflags = 0; /* OPCODE QUERY, no flags */
670	struct query_info qinf;
671	int prime = (finalstate == PRIME_RESP_STATE)?1:0;
672	int valrec = 0;
673	qinf.qname = qname;
674	qinf.qname_len = qnamelen;
675	qinf.qtype = qtype;
676	qinf.qclass = qclass;
677	qinf.local_alias = NULL;
678
679	/* RD should be set only when sending the query back through the INIT
680	 * state. */
681	if(initial_state == INIT_REQUEST_STATE)
682		qflags |= BIT_RD;
683	/* We set the CD flag so we can send this through the "head" of
684	 * the resolution chain, which might have a validator. We are
685	 * uninterested in validating things not on the direct resolution
686	 * path.  */
687	if(!v) {
688		qflags |= BIT_CD;
689		valrec = 1;
690	}
691
692	/* attach subquery, lookup existing or make a new one */
693	fptr_ok(fptr_whitelist_modenv_attach_sub(qstate->env->attach_sub));
694	if(!(*qstate->env->attach_sub)(qstate, &qinf, qflags, prime, valrec,
695		&subq)) {
696		return 0;
697	}
698	*subq_ret = subq;
699	if(subq) {
700		/* initialise the new subquery */
701		subq->curmod = id;
702		subq->ext_state[id] = module_state_initial;
703		subq->minfo[id] = regional_alloc(subq->region,
704			sizeof(struct iter_qstate));
705		if(!subq->minfo[id]) {
706			log_err("init subq: out of memory");
707			fptr_ok(fptr_whitelist_modenv_kill_sub(
708				qstate->env->kill_sub));
709			(*qstate->env->kill_sub)(subq);
710			return 0;
711		}
712		subiq = (struct iter_qstate*)subq->minfo[id];
713		memset(subiq, 0, sizeof(*subiq));
714		subiq->num_target_queries = 0;
715		target_count_create(iq);
716		subiq->target_count = iq->target_count;
717		if(iq->target_count)
718			iq->target_count[0] ++; /* extra reference */
719		subiq->num_current_queries = 0;
720		subiq->depth = iq->depth+1;
721		outbound_list_init(&subiq->outlist);
722		subiq->state = initial_state;
723		subiq->final_state = finalstate;
724		subiq->qchase = subq->qinfo;
725		subiq->chase_flags = subq->query_flags;
726		subiq->refetch_glue = 0;
727		if(qstate->env->cfg->qname_minimisation)
728			subiq->minimisation_state = INIT_MINIMISE_STATE;
729		else
730			subiq->minimisation_state = DONOT_MINIMISE_STATE;
731		memset(&subiq->qinfo_out, 0, sizeof(struct query_info));
732	}
733	return 1;
734}
735
736/**
737 * Generate and send a root priming request.
738 * @param qstate: the qtstate that triggered the need to prime.
739 * @param iq: iterator query state.
740 * @param id: module id.
741 * @param qclass: the class to prime.
742 * @return 0 on failure
743 */
744static int
745prime_root(struct module_qstate* qstate, struct iter_qstate* iq, int id,
746	uint16_t qclass)
747{
748	struct delegpt* dp;
749	struct module_qstate* subq;
750	verbose(VERB_DETAIL, "priming . %s NS",
751		sldns_lookup_by_id(sldns_rr_classes, (int)qclass)?
752		sldns_lookup_by_id(sldns_rr_classes, (int)qclass)->name:"??");
753	dp = hints_lookup_root(qstate->env->hints, qclass);
754	if(!dp) {
755		verbose(VERB_ALGO, "Cannot prime due to lack of hints");
756		return 0;
757	}
758	/* Priming requests start at the QUERYTARGETS state, skipping
759	 * the normal INIT state logic (which would cause an infloop). */
760	if(!generate_sub_request((uint8_t*)"\000", 1, LDNS_RR_TYPE_NS,
761		qclass, qstate, id, iq, QUERYTARGETS_STATE, PRIME_RESP_STATE,
762		&subq, 0)) {
763		verbose(VERB_ALGO, "could not prime root");
764		return 0;
765	}
766	if(subq) {
767		struct iter_qstate* subiq =
768			(struct iter_qstate*)subq->minfo[id];
769		/* Set the initial delegation point to the hint.
770		 * copy dp, it is now part of the root prime query.
771		 * dp was part of in the fixed hints structure. */
772		subiq->dp = delegpt_copy(dp, subq->region);
773		if(!subiq->dp) {
774			log_err("out of memory priming root, copydp");
775			fptr_ok(fptr_whitelist_modenv_kill_sub(
776				qstate->env->kill_sub));
777			(*qstate->env->kill_sub)(subq);
778			return 0;
779		}
780		/* there should not be any target queries. */
781		subiq->num_target_queries = 0;
782		subiq->dnssec_expected = iter_indicates_dnssec(
783			qstate->env, subiq->dp, NULL, subq->qinfo.qclass);
784	}
785
786	/* this module stops, our submodule starts, and does the query. */
787	qstate->ext_state[id] = module_wait_subquery;
788	return 1;
789}
790
791/**
792 * Generate and process a stub priming request. This method tests for the
793 * need to prime a stub zone, so it is safe to call for every request.
794 *
795 * @param qstate: the qtstate that triggered the need to prime.
796 * @param iq: iterator query state.
797 * @param id: module id.
798 * @param qname: request name.
799 * @param qclass: request class.
800 * @return true if a priming subrequest was made, false if not. The will only
801 *         issue a priming request if it detects an unprimed stub.
802 *         Uses value of 2 to signal during stub-prime in root-prime situation
803 *         that a noprime-stub is available and resolution can continue.
804 */
805static int
806prime_stub(struct module_qstate* qstate, struct iter_qstate* iq, int id,
807	uint8_t* qname, uint16_t qclass)
808{
809	/* Lookup the stub hint. This will return null if the stub doesn't
810	 * need to be re-primed. */
811	struct iter_hints_stub* stub;
812	struct delegpt* stub_dp;
813	struct module_qstate* subq;
814
815	if(!qname) return 0;
816	stub = hints_lookup_stub(qstate->env->hints, qname, qclass, iq->dp);
817	/* The stub (if there is one) does not need priming. */
818	if(!stub)
819		return 0;
820	stub_dp = stub->dp;
821	/* if we have an auth_zone dp, and stub is equal, don't prime stub
822	 * yet, unless we want to fallback and avoid the auth_zone */
823	if(!iq->auth_zone_avoid && iq->dp && iq->dp->auth_dp &&
824		query_dname_compare(iq->dp->name, stub_dp->name) == 0)
825		return 0;
826
827	/* is it a noprime stub (always use) */
828	if(stub->noprime) {
829		int r = 0;
830		if(iq->dp == NULL) r = 2;
831		/* copy the dp out of the fixed hints structure, so that
832		 * it can be changed when servicing this query */
833		iq->dp = delegpt_copy(stub_dp, qstate->region);
834		if(!iq->dp) {
835			log_err("out of memory priming stub");
836			errinf(qstate, "malloc failure, priming stub");
837			(void)error_response(qstate, id, LDNS_RCODE_SERVFAIL);
838			return 1; /* return 1 to make module stop, with error */
839		}
840		log_nametypeclass(VERB_DETAIL, "use stub", stub_dp->name,
841			LDNS_RR_TYPE_NS, qclass);
842		return r;
843	}
844
845	/* Otherwise, we need to (re)prime the stub. */
846	log_nametypeclass(VERB_DETAIL, "priming stub", stub_dp->name,
847		LDNS_RR_TYPE_NS, qclass);
848
849	/* Stub priming events start at the QUERYTARGETS state to avoid the
850	 * redundant INIT state processing. */
851	if(!generate_sub_request(stub_dp->name, stub_dp->namelen,
852		LDNS_RR_TYPE_NS, qclass, qstate, id, iq,
853		QUERYTARGETS_STATE, PRIME_RESP_STATE, &subq, 0)) {
854		verbose(VERB_ALGO, "could not prime stub");
855		errinf(qstate, "could not generate lookup for stub prime");
856		(void)error_response(qstate, id, LDNS_RCODE_SERVFAIL);
857		return 1; /* return 1 to make module stop, with error */
858	}
859	if(subq) {
860		struct iter_qstate* subiq =
861			(struct iter_qstate*)subq->minfo[id];
862
863		/* Set the initial delegation point to the hint. */
864		/* make copy to avoid use of stub dp by different qs/threads */
865		subiq->dp = delegpt_copy(stub_dp, subq->region);
866		if(!subiq->dp) {
867			log_err("out of memory priming stub, copydp");
868			fptr_ok(fptr_whitelist_modenv_kill_sub(
869				qstate->env->kill_sub));
870			(*qstate->env->kill_sub)(subq);
871			errinf(qstate, "malloc failure, in stub prime");
872			(void)error_response(qstate, id, LDNS_RCODE_SERVFAIL);
873			return 1; /* return 1 to make module stop, with error */
874		}
875		/* there should not be any target queries -- although there
876		 * wouldn't be anyway, since stub hints never have
877		 * missing targets. */
878		subiq->num_target_queries = 0;
879		subiq->wait_priming_stub = 1;
880		subiq->dnssec_expected = iter_indicates_dnssec(
881			qstate->env, subiq->dp, NULL, subq->qinfo.qclass);
882	}
883
884	/* this module stops, our submodule starts, and does the query. */
885	qstate->ext_state[id] = module_wait_subquery;
886	return 1;
887}
888
889/**
890 * Generate a delegation point for an auth zone (unless cached dp is better)
891 * false on alloc failure.
892 */
893static int
894auth_zone_delegpt(struct module_qstate* qstate, struct iter_qstate* iq,
895	uint8_t* delname, size_t delnamelen)
896{
897	struct auth_zone* z;
898	if(iq->auth_zone_avoid)
899		return 1;
900	if(!delname) {
901		delname = iq->qchase.qname;
902		delnamelen = iq->qchase.qname_len;
903	}
904	lock_rw_rdlock(&qstate->env->auth_zones->lock);
905	z = auth_zones_find_zone(qstate->env->auth_zones, delname, delnamelen,
906		qstate->qinfo.qclass);
907	if(!z) {
908		lock_rw_unlock(&qstate->env->auth_zones->lock);
909		return 1;
910	}
911	lock_rw_rdlock(&z->lock);
912	lock_rw_unlock(&qstate->env->auth_zones->lock);
913	if(z->for_upstream) {
914		if(iq->dp && query_dname_compare(z->name, iq->dp->name) == 0
915			&& iq->dp->auth_dp && qstate->blacklist &&
916			z->fallback_enabled) {
917			/* cache is blacklisted and fallback, and we
918			 * already have an auth_zone dp */
919			if(verbosity>=VERB_ALGO) {
920				char buf[255+1];
921				dname_str(z->name, buf);
922				verbose(VERB_ALGO, "auth_zone %s "
923				  "fallback because cache blacklisted",
924				  buf);
925			}
926			lock_rw_unlock(&z->lock);
927			iq->dp = NULL;
928			return 1;
929		}
930		if(iq->dp==NULL || dname_subdomain_c(z->name, iq->dp->name)) {
931			struct delegpt* dp;
932			if(qstate->blacklist && z->fallback_enabled) {
933				/* cache is blacklisted because of a DNSSEC
934				 * validation failure, and the zone allows
935				 * fallback to the internet, query there. */
936				if(verbosity>=VERB_ALGO) {
937					char buf[255+1];
938					dname_str(z->name, buf);
939					verbose(VERB_ALGO, "auth_zone %s "
940					  "fallback because cache blacklisted",
941					  buf);
942				}
943				lock_rw_unlock(&z->lock);
944				return 1;
945			}
946			dp = (struct delegpt*)regional_alloc_zero(
947				qstate->region, sizeof(*dp));
948			if(!dp) {
949				log_err("alloc failure");
950				if(z->fallback_enabled) {
951					lock_rw_unlock(&z->lock);
952					return 1; /* just fallback */
953				}
954				lock_rw_unlock(&z->lock);
955				errinf(qstate, "malloc failure");
956				return 0;
957			}
958			dp->name = regional_alloc_init(qstate->region,
959				z->name, z->namelen);
960			if(!dp->name) {
961				log_err("alloc failure");
962				if(z->fallback_enabled) {
963					lock_rw_unlock(&z->lock);
964					return 1; /* just fallback */
965				}
966				lock_rw_unlock(&z->lock);
967				errinf(qstate, "malloc failure");
968				return 0;
969			}
970			dp->namelen = z->namelen;
971			dp->namelabs = z->namelabs;
972			dp->auth_dp = 1;
973			iq->dp = dp;
974		}
975	}
976
977	lock_rw_unlock(&z->lock);
978	return 1;
979}
980
981/**
982 * Generate A and AAAA checks for glue that is in-zone for the referral
983 * we just got to obtain authoritative information on the addresses.
984 *
985 * @param qstate: the qtstate that triggered the need to prime.
986 * @param iq: iterator query state.
987 * @param id: module id.
988 */
989static void
990generate_a_aaaa_check(struct module_qstate* qstate, struct iter_qstate* iq,
991	int id)
992{
993	struct iter_env* ie = (struct iter_env*)qstate->env->modinfo[id];
994	struct module_qstate* subq;
995	size_t i;
996	struct reply_info* rep = iq->response->rep;
997	struct ub_packed_rrset_key* s;
998	log_assert(iq->dp);
999
1000	if(iq->depth == ie->max_dependency_depth)
1001		return;
1002	/* walk through additional, and check if in-zone,
1003	 * only relevant A, AAAA are left after scrub anyway */
1004	for(i=rep->an_numrrsets+rep->ns_numrrsets; i<rep->rrset_count; i++) {
1005		s = rep->rrsets[i];
1006		/* check *ALL* addresses that are transmitted in additional*/
1007		/* is it an address ? */
1008		if( !(ntohs(s->rk.type)==LDNS_RR_TYPE_A ||
1009			ntohs(s->rk.type)==LDNS_RR_TYPE_AAAA)) {
1010			continue;
1011		}
1012		/* is this query the same as the A/AAAA check for it */
1013		if(qstate->qinfo.qtype == ntohs(s->rk.type) &&
1014			qstate->qinfo.qclass == ntohs(s->rk.rrset_class) &&
1015			query_dname_compare(qstate->qinfo.qname,
1016				s->rk.dname)==0 &&
1017			(qstate->query_flags&BIT_RD) &&
1018			!(qstate->query_flags&BIT_CD))
1019			continue;
1020
1021		/* generate subrequest for it */
1022		log_nametypeclass(VERB_ALGO, "schedule addr fetch",
1023			s->rk.dname, ntohs(s->rk.type),
1024			ntohs(s->rk.rrset_class));
1025		if(!generate_sub_request(s->rk.dname, s->rk.dname_len,
1026			ntohs(s->rk.type), ntohs(s->rk.rrset_class),
1027			qstate, id, iq,
1028			INIT_REQUEST_STATE, FINISHED_STATE, &subq, 1)) {
1029			verbose(VERB_ALGO, "could not generate addr check");
1030			return;
1031		}
1032		/* ignore subq - not need for more init */
1033	}
1034}
1035
1036/**
1037 * Generate a NS check request to obtain authoritative information
1038 * on an NS rrset.
1039 *
1040 * @param qstate: the qtstate that triggered the need to prime.
1041 * @param iq: iterator query state.
1042 * @param id: module id.
1043 */
1044static void
1045generate_ns_check(struct module_qstate* qstate, struct iter_qstate* iq, int id)
1046{
1047	struct iter_env* ie = (struct iter_env*)qstate->env->modinfo[id];
1048	struct module_qstate* subq;
1049	log_assert(iq->dp);
1050
1051	if(iq->depth == ie->max_dependency_depth)
1052		return;
1053	if(!can_have_last_resort(qstate->env, iq->dp->name, iq->dp->namelen,
1054		iq->qchase.qclass, NULL))
1055		return;
1056	/* is this query the same as the nscheck? */
1057	if(qstate->qinfo.qtype == LDNS_RR_TYPE_NS &&
1058		query_dname_compare(iq->dp->name, qstate->qinfo.qname)==0 &&
1059		(qstate->query_flags&BIT_RD) && !(qstate->query_flags&BIT_CD)){
1060		/* spawn off A, AAAA queries for in-zone glue to check */
1061		generate_a_aaaa_check(qstate, iq, id);
1062		return;
1063	}
1064	/* no need to get the NS record for DS, it is above the zonecut */
1065	if(qstate->qinfo.qtype == LDNS_RR_TYPE_DS)
1066		return;
1067
1068	log_nametypeclass(VERB_ALGO, "schedule ns fetch",
1069		iq->dp->name, LDNS_RR_TYPE_NS, iq->qchase.qclass);
1070	if(!generate_sub_request(iq->dp->name, iq->dp->namelen,
1071		LDNS_RR_TYPE_NS, iq->qchase.qclass, qstate, id, iq,
1072		INIT_REQUEST_STATE, FINISHED_STATE, &subq, 1)) {
1073		verbose(VERB_ALGO, "could not generate ns check");
1074		return;
1075	}
1076	if(subq) {
1077		struct iter_qstate* subiq =
1078			(struct iter_qstate*)subq->minfo[id];
1079
1080		/* make copy to avoid use of stub dp by different qs/threads */
1081		/* refetch glue to start higher up the tree */
1082		subiq->refetch_glue = 1;
1083		subiq->dp = delegpt_copy(iq->dp, subq->region);
1084		if(!subiq->dp) {
1085			log_err("out of memory generating ns check, copydp");
1086			fptr_ok(fptr_whitelist_modenv_kill_sub(
1087				qstate->env->kill_sub));
1088			(*qstate->env->kill_sub)(subq);
1089			return;
1090		}
1091	}
1092}
1093
1094/**
1095 * Generate a DNSKEY prefetch query to get the DNSKEY for the DS record we
1096 * just got in a referral (where we have dnssec_expected, thus have trust
1097 * anchors above it).  Note that right after calling this routine the
1098 * iterator detached subqueries (because of following the referral), and thus
1099 * the DNSKEY query becomes detached, its return stored in the cache for
1100 * later lookup by the validator.  This cache lookup by the validator avoids
1101 * the roundtrip incurred by the DNSKEY query.  The DNSKEY query is now
1102 * performed at about the same time the original query is sent to the domain,
1103 * thus the two answers are likely to be returned at about the same time,
1104 * saving a roundtrip from the validated lookup.
1105 *
1106 * @param qstate: the qtstate that triggered the need to prime.
1107 * @param iq: iterator query state.
1108 * @param id: module id.
1109 */
1110static void
1111generate_dnskey_prefetch(struct module_qstate* qstate,
1112	struct iter_qstate* iq, int id)
1113{
1114	struct module_qstate* subq;
1115	log_assert(iq->dp);
1116
1117	/* is this query the same as the prefetch? */
1118	if(qstate->qinfo.qtype == LDNS_RR_TYPE_DNSKEY &&
1119		query_dname_compare(iq->dp->name, qstate->qinfo.qname)==0 &&
1120		(qstate->query_flags&BIT_RD) && !(qstate->query_flags&BIT_CD)){
1121		return;
1122	}
1123
1124	/* if the DNSKEY is in the cache this lookup will stop quickly */
1125	log_nametypeclass(VERB_ALGO, "schedule dnskey prefetch",
1126		iq->dp->name, LDNS_RR_TYPE_DNSKEY, iq->qchase.qclass);
1127	if(!generate_sub_request(iq->dp->name, iq->dp->namelen,
1128		LDNS_RR_TYPE_DNSKEY, iq->qchase.qclass, qstate, id, iq,
1129		INIT_REQUEST_STATE, FINISHED_STATE, &subq, 0)) {
1130		/* we'll be slower, but it'll work */
1131		verbose(VERB_ALGO, "could not generate dnskey prefetch");
1132		return;
1133	}
1134	if(subq) {
1135		struct iter_qstate* subiq =
1136			(struct iter_qstate*)subq->minfo[id];
1137		/* this qstate has the right delegation for the dnskey lookup*/
1138		/* make copy to avoid use of stub dp by different qs/threads */
1139		subiq->dp = delegpt_copy(iq->dp, subq->region);
1140		/* if !subiq->dp, it'll start from the cache, no problem */
1141	}
1142}
1143
1144/**
1145 * See if the query needs forwarding.
1146 *
1147 * @param qstate: query state.
1148 * @param iq: iterator query state.
1149 * @return true if the request is forwarded, false if not.
1150 * 	If returns true but, iq->dp is NULL then a malloc failure occurred.
1151 */
1152static int
1153forward_request(struct module_qstate* qstate, struct iter_qstate* iq)
1154{
1155	struct delegpt* dp;
1156	uint8_t* delname = iq->qchase.qname;
1157	size_t delnamelen = iq->qchase.qname_len;
1158	if(iq->refetch_glue && iq->dp) {
1159		delname = iq->dp->name;
1160		delnamelen = iq->dp->namelen;
1161	}
1162	/* strip one label off of DS query to lookup higher for it */
1163	if( (iq->qchase.qtype == LDNS_RR_TYPE_DS || iq->refetch_glue)
1164		&& !dname_is_root(iq->qchase.qname))
1165		dname_remove_label(&delname, &delnamelen);
1166	dp = forwards_lookup(qstate->env->fwds, delname, iq->qchase.qclass);
1167	if(!dp)
1168		return 0;
1169	/* send recursion desired to forward addr */
1170	iq->chase_flags |= BIT_RD;
1171	iq->dp = delegpt_copy(dp, qstate->region);
1172	/* iq->dp checked by caller */
1173	verbose(VERB_ALGO, "forwarding request");
1174	return 1;
1175}
1176
1177/**
1178 * Process the initial part of the request handling. This state roughly
1179 * corresponds to resolver algorithms steps 1 (find answer in cache) and 2
1180 * (find the best servers to ask).
1181 *
1182 * Note that all requests start here, and query restarts revisit this state.
1183 *
1184 * This state either generates: 1) a response, from cache or error, 2) a
1185 * priming event, or 3) forwards the request to the next state (init2,
1186 * generally).
1187 *
1188 * @param qstate: query state.
1189 * @param iq: iterator query state.
1190 * @param ie: iterator shared global environment.
1191 * @param id: module id.
1192 * @return true if the event needs more request processing immediately,
1193 *         false if not.
1194 */
1195static int
1196processInitRequest(struct module_qstate* qstate, struct iter_qstate* iq,
1197	struct iter_env* ie, int id)
1198{
1199	uint8_t* delname;
1200	size_t delnamelen;
1201	struct dns_msg* msg = NULL;
1202
1203	log_query_info(VERB_DETAIL, "resolving", &qstate->qinfo);
1204	/* check effort */
1205
1206	/* We enforce a maximum number of query restarts. This is primarily a
1207	 * cheap way to prevent CNAME loops. */
1208	if(iq->query_restart_count > MAX_RESTART_COUNT) {
1209		verbose(VERB_QUERY, "request has exceeded the maximum number"
1210			" of query restarts with %d", iq->query_restart_count);
1211		errinf(qstate, "request has exceeded the maximum number "
1212			"restarts (eg. indirections)");
1213		if(iq->qchase.qname)
1214			errinf_dname(qstate, "stop at", iq->qchase.qname);
1215		return error_response(qstate, id, LDNS_RCODE_SERVFAIL);
1216	}
1217
1218	/* We enforce a maximum recursion/dependency depth -- in general,
1219	 * this is unnecessary for dependency loops (although it will
1220	 * catch those), but it provides a sensible limit to the amount
1221	 * of work required to answer a given query. */
1222	verbose(VERB_ALGO, "request has dependency depth of %d", iq->depth);
1223	if(iq->depth > ie->max_dependency_depth) {
1224		verbose(VERB_QUERY, "request has exceeded the maximum "
1225			"dependency depth with depth of %d", iq->depth);
1226		errinf(qstate, "request has exceeded the maximum dependency "
1227			"depth (eg. nameserver lookup recursion)");
1228		return error_response(qstate, id, LDNS_RCODE_SERVFAIL);
1229	}
1230
1231	/* If the request is qclass=ANY, setup to generate each class */
1232	if(qstate->qinfo.qclass == LDNS_RR_CLASS_ANY) {
1233		iq->qchase.qclass = 0;
1234		return next_state(iq, COLLECT_CLASS_STATE);
1235	}
1236
1237	/*
1238	 * If we are restricted by a forward-zone or a stub-zone, we
1239	 * can't re-fetch glue for this delegation point.
1240	 * we won���t try to re-fetch glue if the iq->dp is null.
1241	 */
1242	if (iq->refetch_glue &&
1243	        iq->dp &&
1244	        !can_have_last_resort(qstate->env, iq->dp->name,
1245	             iq->dp->namelen, iq->qchase.qclass, NULL)) {
1246	    iq->refetch_glue = 0;
1247	}
1248
1249	/* Resolver Algorithm Step 1 -- Look for the answer in local data. */
1250
1251	/* This either results in a query restart (CNAME cache response), a
1252	 * terminating response (ANSWER), or a cache miss (null). */
1253
1254	if (iter_stub_fwd_no_cache(qstate, &iq->qchase)) {
1255		/* Asked to not query cache. */
1256		verbose(VERB_ALGO, "no-cache set, going to the network");
1257		qstate->no_cache_lookup = 1;
1258		qstate->no_cache_store = 1;
1259		msg = NULL;
1260	} else if(qstate->blacklist) {
1261		/* if cache, or anything else, was blacklisted then
1262		 * getting older results from cache is a bad idea, no cache */
1263		verbose(VERB_ALGO, "cache blacklisted, going to the network");
1264		msg = NULL;
1265	} else if(!qstate->no_cache_lookup) {
1266		msg = dns_cache_lookup(qstate->env, iq->qchase.qname,
1267			iq->qchase.qname_len, iq->qchase.qtype,
1268			iq->qchase.qclass, qstate->query_flags,
1269			qstate->region, qstate->env->scratch, 0);
1270		if(!msg && qstate->env->neg_cache &&
1271			iter_qname_indicates_dnssec(qstate->env, &iq->qchase)) {
1272			/* lookup in negative cache; may result in
1273			 * NOERROR/NODATA or NXDOMAIN answers that need validation */
1274			msg = val_neg_getmsg(qstate->env->neg_cache, &iq->qchase,
1275				qstate->region, qstate->env->rrset_cache,
1276				qstate->env->scratch_buffer,
1277				*qstate->env->now, 1/*add SOA*/, NULL,
1278				qstate->env->cfg);
1279		}
1280		/* item taken from cache does not match our query name, thus
1281		 * security needs to be re-examined later */
1282		if(msg && query_dname_compare(qstate->qinfo.qname,
1283			iq->qchase.qname) != 0)
1284			msg->rep->security = sec_status_unchecked;
1285	}
1286	if(msg) {
1287		/* handle positive cache response */
1288		enum response_type type = response_type_from_cache(msg,
1289			&iq->qchase);
1290		if(verbosity >= VERB_ALGO) {
1291			log_dns_msg("msg from cache lookup", &msg->qinfo,
1292				msg->rep);
1293			verbose(VERB_ALGO, "msg ttl is %d, prefetch ttl %d",
1294				(int)msg->rep->ttl,
1295				(int)msg->rep->prefetch_ttl);
1296		}
1297
1298		if(type == RESPONSE_TYPE_CNAME) {
1299			uint8_t* sname = 0;
1300			size_t slen = 0;
1301			verbose(VERB_ALGO, "returning CNAME response from "
1302				"cache");
1303			if(!handle_cname_response(qstate, iq, msg,
1304				&sname, &slen)) {
1305				errinf(qstate, "failed to prepend CNAME "
1306					"components, malloc failure");
1307				return error_response(qstate, id,
1308					LDNS_RCODE_SERVFAIL);
1309			}
1310			iq->qchase.qname = sname;
1311			iq->qchase.qname_len = slen;
1312			/* This *is* a query restart, even if it is a cheap
1313			 * one. */
1314			iq->dp = NULL;
1315			iq->refetch_glue = 0;
1316			iq->query_restart_count++;
1317			iq->sent_count = 0;
1318			sock_list_insert(&qstate->reply_origin, NULL, 0, qstate->region);
1319			if(qstate->env->cfg->qname_minimisation)
1320				iq->minimisation_state = INIT_MINIMISE_STATE;
1321			return next_state(iq, INIT_REQUEST_STATE);
1322		}
1323
1324		/* if from cache, NULL, else insert 'cache IP' len=0 */
1325		if(qstate->reply_origin)
1326			sock_list_insert(&qstate->reply_origin, NULL, 0, qstate->region);
1327		if(FLAGS_GET_RCODE(msg->rep->flags) == LDNS_RCODE_SERVFAIL)
1328			errinf(qstate, "SERVFAIL in cache");
1329		/* it is an answer, response, to final state */
1330		verbose(VERB_ALGO, "returning answer from cache.");
1331		iq->response = msg;
1332		return final_state(iq);
1333	}
1334
1335	/* attempt to forward the request */
1336	if(forward_request(qstate, iq))
1337	{
1338		if(!iq->dp) {
1339			log_err("alloc failure for forward dp");
1340			errinf(qstate, "malloc failure for forward zone");
1341			return error_response(qstate, id, LDNS_RCODE_SERVFAIL);
1342		}
1343		iq->refetch_glue = 0;
1344		iq->minimisation_state = DONOT_MINIMISE_STATE;
1345		/* the request has been forwarded.
1346		 * forwarded requests need to be immediately sent to the
1347		 * next state, QUERYTARGETS. */
1348		return next_state(iq, QUERYTARGETS_STATE);
1349	}
1350
1351	/* Resolver Algorithm Step 2 -- find the "best" servers. */
1352
1353	/* first, adjust for DS queries. To avoid the grandparent problem,
1354	 * we just look for the closest set of server to the parent of qname.
1355	 * When re-fetching glue we also need to ask the parent.
1356	 */
1357	if(iq->refetch_glue) {
1358		if(!iq->dp) {
1359			log_err("internal or malloc fail: no dp for refetch");
1360			errinf(qstate, "malloc failure, for delegation info");
1361			return error_response(qstate, id, LDNS_RCODE_SERVFAIL);
1362		}
1363		delname = iq->dp->name;
1364		delnamelen = iq->dp->namelen;
1365	} else {
1366		delname = iq->qchase.qname;
1367		delnamelen = iq->qchase.qname_len;
1368	}
1369	if(iq->qchase.qtype == LDNS_RR_TYPE_DS || iq->refetch_glue ||
1370	   (iq->qchase.qtype == LDNS_RR_TYPE_NS && qstate->prefetch_leeway
1371	   && can_have_last_resort(qstate->env, delname, delnamelen, iq->qchase.qclass, NULL))) {
1372		/* remove first label from delname, root goes to hints,
1373		 * but only to fetch glue, not for qtype=DS. */
1374		/* also when prefetching an NS record, fetch it again from
1375		 * its parent, just as if it expired, so that you do not
1376		 * get stuck on an older nameserver that gives old NSrecords */
1377		if(dname_is_root(delname) && (iq->refetch_glue ||
1378			(iq->qchase.qtype == LDNS_RR_TYPE_NS &&
1379			qstate->prefetch_leeway)))
1380			delname = NULL; /* go to root priming */
1381		else 	dname_remove_label(&delname, &delnamelen);
1382	}
1383	/* delname is the name to lookup a delegation for. If NULL rootprime */
1384	while(1) {
1385
1386		/* Lookup the delegation in the cache. If null, then the
1387		 * cache needs to be primed for the qclass. */
1388		if(delname)
1389		     iq->dp = dns_cache_find_delegation(qstate->env, delname,
1390			delnamelen, iq->qchase.qtype, iq->qchase.qclass,
1391			qstate->region, &iq->deleg_msg,
1392			*qstate->env->now+qstate->prefetch_leeway);
1393		else iq->dp = NULL;
1394
1395		/* If the cache has returned nothing, then we have a
1396		 * root priming situation. */
1397		if(iq->dp == NULL) {
1398			int r;
1399			/* if under auth zone, no prime needed */
1400			if(!auth_zone_delegpt(qstate, iq, delname, delnamelen))
1401				return error_response(qstate, id,
1402					LDNS_RCODE_SERVFAIL);
1403			if(iq->dp) /* use auth zone dp */
1404				return next_state(iq, INIT_REQUEST_2_STATE);
1405			/* if there is a stub, then no root prime needed */
1406			r = prime_stub(qstate, iq, id, delname,
1407				iq->qchase.qclass);
1408			if(r == 2)
1409				break; /* got noprime-stub-zone, continue */
1410			else if(r)
1411				return 0; /* stub prime request made */
1412			if(forwards_lookup_root(qstate->env->fwds,
1413				iq->qchase.qclass)) {
1414				/* forward zone root, no root prime needed */
1415				/* fill in some dp - safety belt */
1416				iq->dp = hints_lookup_root(qstate->env->hints,
1417					iq->qchase.qclass);
1418				if(!iq->dp) {
1419					log_err("internal error: no hints dp");
1420					errinf(qstate, "no hints for this class");
1421					return error_response(qstate, id,
1422						LDNS_RCODE_SERVFAIL);
1423				}
1424				iq->dp = delegpt_copy(iq->dp, qstate->region);
1425				if(!iq->dp) {
1426					log_err("out of memory in safety belt");
1427					errinf(qstate, "malloc failure, in safety belt");
1428					return error_response(qstate, id,
1429						LDNS_RCODE_SERVFAIL);
1430				}
1431				return next_state(iq, INIT_REQUEST_2_STATE);
1432			}
1433			/* Note that the result of this will set a new
1434			 * DelegationPoint based on the result of priming. */
1435			if(!prime_root(qstate, iq, id, iq->qchase.qclass))
1436				return error_response(qstate, id,
1437					LDNS_RCODE_REFUSED);
1438
1439			/* priming creates and sends a subordinate query, with
1440			 * this query as the parent. So further processing for
1441			 * this event will stop until reactivated by the
1442			 * results of priming. */
1443			return 0;
1444		}
1445		if(!iq->ratelimit_ok && qstate->prefetch_leeway)
1446			iq->ratelimit_ok = 1; /* allow prefetches, this keeps
1447			otherwise valid data in the cache */
1448		if(!iq->ratelimit_ok && infra_ratelimit_exceeded(
1449			qstate->env->infra_cache, iq->dp->name,
1450			iq->dp->namelen, *qstate->env->now)) {
1451			/* and increment the rate, so that the rate for time
1452			 * now will also exceed the rate, keeping cache fresh */
1453			(void)infra_ratelimit_inc(qstate->env->infra_cache,
1454				iq->dp->name, iq->dp->namelen,
1455				*qstate->env->now, &qstate->qinfo,
1456				qstate->reply);
1457			/* see if we are passed through with slip factor */
1458			if(qstate->env->cfg->ratelimit_factor != 0 &&
1459				ub_random_max(qstate->env->rnd,
1460				    qstate->env->cfg->ratelimit_factor) == 1) {
1461				iq->ratelimit_ok = 1;
1462				log_nametypeclass(VERB_ALGO, "ratelimit allowed through for "
1463					"delegation point", iq->dp->name,
1464					LDNS_RR_TYPE_NS, LDNS_RR_CLASS_IN);
1465			} else {
1466				lock_basic_lock(&ie->queries_ratelimit_lock);
1467				ie->num_queries_ratelimited++;
1468				lock_basic_unlock(&ie->queries_ratelimit_lock);
1469				log_nametypeclass(VERB_ALGO, "ratelimit exceeded with "
1470					"delegation point", iq->dp->name,
1471					LDNS_RR_TYPE_NS, LDNS_RR_CLASS_IN);
1472				qstate->was_ratelimited = 1;
1473				errinf(qstate, "query was ratelimited");
1474				errinf_dname(qstate, "for zone", iq->dp->name);
1475				return error_response(qstate, id, LDNS_RCODE_SERVFAIL);
1476			}
1477		}
1478
1479		/* see if this dp not useless.
1480		 * It is useless if:
1481		 *	o all NS items are required glue.
1482		 *	  or the query is for NS item that is required glue.
1483		 *	o no addresses are provided.
1484		 *	o RD qflag is on.
1485		 * Instead, go up one level, and try to get even further
1486		 * If the root was useless, use safety belt information.
1487		 * Only check cache returns, because replies for servers
1488		 * could be useless but lead to loops (bumping into the
1489		 * same server reply) if useless-checked.
1490		 */
1491		if(iter_dp_is_useless(&qstate->qinfo, qstate->query_flags,
1492			iq->dp)) {
1493			struct delegpt* retdp = NULL;
1494			if(!can_have_last_resort(qstate->env, iq->dp->name, iq->dp->namelen, iq->qchase.qclass, &retdp)) {
1495				if(retdp) {
1496					verbose(VERB_QUERY, "cache has stub "
1497						"or fwd but no addresses, "
1498						"fallback to config");
1499					iq->dp = delegpt_copy(retdp,
1500						qstate->region);
1501					if(!iq->dp) {
1502						log_err("out of memory in "
1503							"stub/fwd fallback");
1504						errinf(qstate, "malloc failure, for fallback to config");
1505						return error_response(qstate,
1506						    id, LDNS_RCODE_SERVFAIL);
1507					}
1508					break;
1509				}
1510				verbose(VERB_ALGO, "useless dp "
1511					"but cannot go up, servfail");
1512				delegpt_log(VERB_ALGO, iq->dp);
1513				errinf(qstate, "no useful nameservers, "
1514					"and cannot go up");
1515				errinf_dname(qstate, "for zone", iq->dp->name);
1516				return error_response(qstate, id,
1517					LDNS_RCODE_SERVFAIL);
1518			}
1519			if(dname_is_root(iq->dp->name)) {
1520				/* use safety belt */
1521				verbose(VERB_QUERY, "Cache has root NS but "
1522				"no addresses. Fallback to the safety belt.");
1523				iq->dp = hints_lookup_root(qstate->env->hints,
1524					iq->qchase.qclass);
1525				/* note deleg_msg is from previous lookup,
1526				 * but RD is on, so it is not used */
1527				if(!iq->dp) {
1528					log_err("internal error: no hints dp");
1529					return error_response(qstate, id,
1530						LDNS_RCODE_REFUSED);
1531				}
1532				iq->dp = delegpt_copy(iq->dp, qstate->region);
1533				if(!iq->dp) {
1534					log_err("out of memory in safety belt");
1535					errinf(qstate, "malloc failure, in safety belt, for root");
1536					return error_response(qstate, id,
1537						LDNS_RCODE_SERVFAIL);
1538				}
1539				break;
1540			} else {
1541				verbose(VERB_ALGO,
1542					"cache delegation was useless:");
1543				delegpt_log(VERB_ALGO, iq->dp);
1544				/* go up */
1545				delname = iq->dp->name;
1546				delnamelen = iq->dp->namelen;
1547				dname_remove_label(&delname, &delnamelen);
1548			}
1549		} else break;
1550	}
1551
1552	verbose(VERB_ALGO, "cache delegation returns delegpt");
1553	delegpt_log(VERB_ALGO, iq->dp);
1554
1555	/* Otherwise, set the current delegation point and move on to the
1556	 * next state. */
1557	return next_state(iq, INIT_REQUEST_2_STATE);
1558}
1559
1560/**
1561 * Process the second part of the initial request handling. This state
1562 * basically exists so that queries that generate root priming events have
1563 * the same init processing as ones that do not. Request events that reach
1564 * this state must have a valid currentDelegationPoint set.
1565 *
1566 * This part is primarily handling stub zone priming. Events that reach this
1567 * state must have a current delegation point.
1568 *
1569 * @param qstate: query state.
1570 * @param iq: iterator query state.
1571 * @param id: module id.
1572 * @return true if the event needs more request processing immediately,
1573 *         false if not.
1574 */
1575static int
1576processInitRequest2(struct module_qstate* qstate, struct iter_qstate* iq,
1577	int id)
1578{
1579	uint8_t* delname;
1580	size_t delnamelen;
1581	log_query_info(VERB_QUERY, "resolving (init part 2): ",
1582		&qstate->qinfo);
1583
1584	delname = iq->qchase.qname;
1585	delnamelen = iq->qchase.qname_len;
1586	if(iq->refetch_glue) {
1587		struct iter_hints_stub* stub;
1588		if(!iq->dp) {
1589			log_err("internal or malloc fail: no dp for refetch");
1590			errinf(qstate, "malloc failure, no delegation info");
1591			return error_response(qstate, id, LDNS_RCODE_SERVFAIL);
1592		}
1593		/* Do not send queries above stub, do not set delname to dp if
1594		 * this is above stub without stub-first. */
1595		stub = hints_lookup_stub(
1596			qstate->env->hints, iq->qchase.qname, iq->qchase.qclass,
1597			iq->dp);
1598		if(!stub || !stub->dp->has_parent_side_NS ||
1599			dname_subdomain_c(iq->dp->name, stub->dp->name)) {
1600			delname = iq->dp->name;
1601			delnamelen = iq->dp->namelen;
1602		}
1603	}
1604	if(iq->qchase.qtype == LDNS_RR_TYPE_DS || iq->refetch_glue) {
1605		if(!dname_is_root(delname))
1606			dname_remove_label(&delname, &delnamelen);
1607		iq->refetch_glue = 0; /* if CNAME causes restart, no refetch */
1608	}
1609
1610	/* see if we have an auth zone to answer from, improves dp from cache
1611	 * (if any dp from cache) with auth zone dp, if that is lower */
1612	if(!auth_zone_delegpt(qstate, iq, delname, delnamelen))
1613		return error_response(qstate, id, LDNS_RCODE_SERVFAIL);
1614
1615	/* Check to see if we need to prime a stub zone. */
1616	if(prime_stub(qstate, iq, id, delname, iq->qchase.qclass)) {
1617		/* A priming sub request was made */
1618		return 0;
1619	}
1620
1621	/* most events just get forwarded to the next state. */
1622	return next_state(iq, INIT_REQUEST_3_STATE);
1623}
1624
1625/**
1626 * Process the third part of the initial request handling. This state exists
1627 * as a separate state so that queries that generate stub priming events
1628 * will get the tail end of the init process but not repeat the stub priming
1629 * check.
1630 *
1631 * @param qstate: query state.
1632 * @param iq: iterator query state.
1633 * @param id: module id.
1634 * @return true, advancing the event to the QUERYTARGETS_STATE.
1635 */
1636static int
1637processInitRequest3(struct module_qstate* qstate, struct iter_qstate* iq,
1638	int id)
1639{
1640	log_query_info(VERB_QUERY, "resolving (init part 3): ",
1641		&qstate->qinfo);
1642	/* if the cache reply dp equals a validation anchor or msg has DS,
1643	 * then DNSSEC RRSIGs are expected in the reply */
1644	iq->dnssec_expected = iter_indicates_dnssec(qstate->env, iq->dp,
1645		iq->deleg_msg, iq->qchase.qclass);
1646
1647	/* If the RD flag wasn't set, then we just finish with the
1648	 * cached referral as the response. */
1649	if(!(qstate->query_flags & BIT_RD) && iq->deleg_msg) {
1650		iq->response = iq->deleg_msg;
1651		if(verbosity >= VERB_ALGO && iq->response)
1652			log_dns_msg("no RD requested, using delegation msg",
1653				&iq->response->qinfo, iq->response->rep);
1654		if(qstate->reply_origin)
1655			sock_list_insert(&qstate->reply_origin, NULL, 0, qstate->region);
1656		return final_state(iq);
1657	}
1658	/* After this point, unset the RD flag -- this query is going to
1659	 * be sent to an auth. server. */
1660	iq->chase_flags &= ~BIT_RD;
1661
1662	/* if dnssec expected, fetch key for the trust-anchor or cached-DS */
1663	if(iq->dnssec_expected && qstate->env->cfg->prefetch_key &&
1664		!(qstate->query_flags&BIT_CD)) {
1665		generate_dnskey_prefetch(qstate, iq, id);
1666		fptr_ok(fptr_whitelist_modenv_detach_subs(
1667			qstate->env->detach_subs));
1668		(*qstate->env->detach_subs)(qstate);
1669	}
1670
1671	/* Jump to the next state. */
1672	return next_state(iq, QUERYTARGETS_STATE);
1673}
1674
1675/**
1676 * Given a basic query, generate a parent-side "target" query.
1677 * These are subordinate queries for missing delegation point target addresses,
1678 * for which only the parent of the delegation provides correct IP addresses.
1679 *
1680 * @param qstate: query state.
1681 * @param iq: iterator query state.
1682 * @param id: module id.
1683 * @param name: target qname.
1684 * @param namelen: target qname length.
1685 * @param qtype: target qtype (either A or AAAA).
1686 * @param qclass: target qclass.
1687 * @return true on success, false on failure.
1688 */
1689static int
1690generate_parentside_target_query(struct module_qstate* qstate,
1691	struct iter_qstate* iq, int id, uint8_t* name, size_t namelen,
1692	uint16_t qtype, uint16_t qclass)
1693{
1694	struct module_qstate* subq;
1695	if(!generate_sub_request(name, namelen, qtype, qclass, qstate,
1696		id, iq, INIT_REQUEST_STATE, FINISHED_STATE, &subq, 0))
1697		return 0;
1698	if(subq) {
1699		struct iter_qstate* subiq =
1700			(struct iter_qstate*)subq->minfo[id];
1701		/* blacklist the cache - we want to fetch parent stuff */
1702		sock_list_insert(&subq->blacklist, NULL, 0, subq->region);
1703		subiq->query_for_pside_glue = 1;
1704		if(dname_subdomain_c(name, iq->dp->name)) {
1705			subiq->dp = delegpt_copy(iq->dp, subq->region);
1706			subiq->dnssec_expected = iter_indicates_dnssec(
1707				qstate->env, subiq->dp, NULL,
1708				subq->qinfo.qclass);
1709			subiq->refetch_glue = 1;
1710		} else {
1711			subiq->dp = dns_cache_find_delegation(qstate->env,
1712				name, namelen, qtype, qclass, subq->region,
1713				&subiq->deleg_msg,
1714				*qstate->env->now+subq->prefetch_leeway);
1715			/* if no dp, then it's from root, refetch unneeded */
1716			if(subiq->dp) {
1717				subiq->dnssec_expected = iter_indicates_dnssec(
1718					qstate->env, subiq->dp, NULL,
1719					subq->qinfo.qclass);
1720				subiq->refetch_glue = 1;
1721			}
1722		}
1723	}
1724	log_nametypeclass(VERB_QUERY, "new pside target", name, qtype, qclass);
1725	return 1;
1726}
1727
1728/**
1729 * Given a basic query, generate a "target" query. These are subordinate
1730 * queries for missing delegation point target addresses.
1731 *
1732 * @param qstate: query state.
1733 * @param iq: iterator query state.
1734 * @param id: module id.
1735 * @param name: target qname.
1736 * @param namelen: target qname length.
1737 * @param qtype: target qtype (either A or AAAA).
1738 * @param qclass: target qclass.
1739 * @return true on success, false on failure.
1740 */
1741static int
1742generate_target_query(struct module_qstate* qstate, struct iter_qstate* iq,
1743        int id, uint8_t* name, size_t namelen, uint16_t qtype, uint16_t qclass)
1744{
1745	struct module_qstate* subq;
1746	if(!generate_sub_request(name, namelen, qtype, qclass, qstate,
1747		id, iq, INIT_REQUEST_STATE, FINISHED_STATE, &subq, 0))
1748		return 0;
1749	log_nametypeclass(VERB_QUERY, "new target", name, qtype, qclass);
1750	return 1;
1751}
1752
1753/**
1754 * Given an event at a certain state, generate zero or more target queries
1755 * for it's current delegation point.
1756 *
1757 * @param qstate: query state.
1758 * @param iq: iterator query state.
1759 * @param ie: iterator shared global environment.
1760 * @param id: module id.
1761 * @param maxtargets: The maximum number of targets to query for.
1762 *	if it is negative, there is no maximum number of targets.
1763 * @param num: returns the number of queries generated and processed,
1764 *	which may be zero if there were no missing targets.
1765 * @return false on error.
1766 */
1767static int
1768query_for_targets(struct module_qstate* qstate, struct iter_qstate* iq,
1769        struct iter_env* ie, int id, int maxtargets, int* num)
1770{
1771	int query_count = 0;
1772	struct delegpt_ns* ns;
1773	int missing;
1774	int toget = 0;
1775
1776	if(iq->depth == ie->max_dependency_depth)
1777		return 0;
1778	if(iq->depth > 0 && iq->target_count &&
1779		iq->target_count[1] > MAX_TARGET_COUNT) {
1780		char s[LDNS_MAX_DOMAINLEN+1];
1781		dname_str(qstate->qinfo.qname, s);
1782		verbose(VERB_QUERY, "request %s has exceeded the maximum "
1783			"number of glue fetches %d", s, iq->target_count[1]);
1784		return 0;
1785	}
1786
1787	iter_mark_cycle_targets(qstate, iq->dp);
1788	missing = (int)delegpt_count_missing_targets(iq->dp);
1789	log_assert(maxtargets != 0); /* that would not be useful */
1790
1791	/* Generate target requests. Basically, any missing targets
1792	 * are queried for here, regardless if it is necessary to do
1793	 * so to continue processing. */
1794	if(maxtargets < 0 || maxtargets > missing)
1795		toget = missing;
1796	else	toget = maxtargets;
1797	if(toget == 0) {
1798		*num = 0;
1799		return 1;
1800	}
1801	/* select 'toget' items from the total of 'missing' items */
1802	log_assert(toget <= missing);
1803
1804	/* loop over missing targets */
1805	for(ns = iq->dp->nslist; ns; ns = ns->next) {
1806		if(ns->resolved)
1807			continue;
1808
1809		/* randomly select this item with probability toget/missing */
1810		if(!iter_ns_probability(qstate->env->rnd, toget, missing)) {
1811			/* do not select this one, next; select toget number
1812			 * of items from a list one less in size */
1813			missing --;
1814			continue;
1815		}
1816
1817		if(ie->supports_ipv6 && !ns->got6) {
1818			/* Send the AAAA request. */
1819			if(!generate_target_query(qstate, iq, id,
1820				ns->name, ns->namelen,
1821				LDNS_RR_TYPE_AAAA, iq->qchase.qclass)) {
1822				*num = query_count;
1823				if(query_count > 0)
1824					qstate->ext_state[id] = module_wait_subquery;
1825				return 0;
1826			}
1827			query_count++;
1828		}
1829		/* Send the A request. */
1830		if(ie->supports_ipv4 && !ns->got4) {
1831			if(!generate_target_query(qstate, iq, id,
1832				ns->name, ns->namelen,
1833				LDNS_RR_TYPE_A, iq->qchase.qclass)) {
1834				*num = query_count;
1835				if(query_count > 0)
1836					qstate->ext_state[id] = module_wait_subquery;
1837				return 0;
1838			}
1839			query_count++;
1840		}
1841
1842		/* mark this target as in progress. */
1843		ns->resolved = 1;
1844		missing--;
1845		toget--;
1846		if(toget == 0)
1847			break;
1848	}
1849	*num = query_count;
1850	if(query_count > 0)
1851		qstate->ext_state[id] = module_wait_subquery;
1852
1853	return 1;
1854}
1855
1856/**
1857 * Called by processQueryTargets when it would like extra targets to query
1858 * but it seems to be out of options.  At last resort some less appealing
1859 * options are explored.  If there are no more options, the result is SERVFAIL
1860 *
1861 * @param qstate: query state.
1862 * @param iq: iterator query state.
1863 * @param ie: iterator shared global environment.
1864 * @param id: module id.
1865 * @return true if the event requires more request processing immediately,
1866 *         false if not.
1867 */
1868static int
1869processLastResort(struct module_qstate* qstate, struct iter_qstate* iq,
1870	struct iter_env* ie, int id)
1871{
1872	struct delegpt_ns* ns;
1873	int query_count = 0;
1874	verbose(VERB_ALGO, "No more query targets, attempting last resort");
1875	log_assert(iq->dp);
1876
1877	if(!can_have_last_resort(qstate->env, iq->dp->name, iq->dp->namelen,
1878		iq->qchase.qclass, NULL)) {
1879		/* fail -- no more targets, no more hope of targets, no hope
1880		 * of a response. */
1881		errinf(qstate, "all the configured stub or forward servers failed,");
1882		errinf_dname(qstate, "at zone", iq->dp->name);
1883		verbose(VERB_QUERY, "configured stub or forward servers failed -- returning SERVFAIL");
1884		return error_response_cache(qstate, id, LDNS_RCODE_SERVFAIL);
1885	}
1886	if(!iq->dp->has_parent_side_NS && dname_is_root(iq->dp->name)) {
1887		struct delegpt* p = hints_lookup_root(qstate->env->hints,
1888			iq->qchase.qclass);
1889		if(p) {
1890			struct delegpt_addr* a;
1891			iq->chase_flags &= ~BIT_RD; /* go to authorities */
1892			for(ns = p->nslist; ns; ns=ns->next) {
1893				(void)delegpt_add_ns(iq->dp, qstate->region,
1894					ns->name, ns->lame);
1895			}
1896			for(a = p->target_list; a; a=a->next_target) {
1897				(void)delegpt_add_addr(iq->dp, qstate->region,
1898					&a->addr, a->addrlen, a->bogus,
1899					a->lame, a->tls_auth_name);
1900			}
1901		}
1902		iq->dp->has_parent_side_NS = 1;
1903	} else if(!iq->dp->has_parent_side_NS) {
1904		if(!iter_lookup_parent_NS_from_cache(qstate->env, iq->dp,
1905			qstate->region, &qstate->qinfo)
1906			|| !iq->dp->has_parent_side_NS) {
1907			/* if: malloc failure in lookup go up to try */
1908			/* if: no parent NS in cache - go up one level */
1909			verbose(VERB_ALGO, "try to grab parent NS");
1910			iq->store_parent_NS = iq->dp;
1911			iq->chase_flags &= ~BIT_RD; /* go to authorities */
1912			iq->deleg_msg = NULL;
1913			iq->refetch_glue = 1;
1914			iq->query_restart_count++;
1915			iq->sent_count = 0;
1916			if(qstate->env->cfg->qname_minimisation)
1917				iq->minimisation_state = INIT_MINIMISE_STATE;
1918			return next_state(iq, INIT_REQUEST_STATE);
1919		}
1920	}
1921	/* see if that makes new names available */
1922	if(!cache_fill_missing(qstate->env, iq->qchase.qclass,
1923		qstate->region, iq->dp))
1924		log_err("out of memory in cache_fill_missing");
1925	if(iq->dp->usable_list) {
1926		verbose(VERB_ALGO, "try parent-side-name, w. glue from cache");
1927		return next_state(iq, QUERYTARGETS_STATE);
1928	}
1929	/* try to fill out parent glue from cache */
1930	if(iter_lookup_parent_glue_from_cache(qstate->env, iq->dp,
1931		qstate->region, &qstate->qinfo)) {
1932		/* got parent stuff from cache, see if we can continue */
1933		verbose(VERB_ALGO, "try parent-side glue from cache");
1934		return next_state(iq, QUERYTARGETS_STATE);
1935	}
1936	/* query for an extra name added by the parent-NS record */
1937	if(delegpt_count_missing_targets(iq->dp) > 0) {
1938		int qs = 0;
1939		verbose(VERB_ALGO, "try parent-side target name");
1940		if(!query_for_targets(qstate, iq, ie, id, 1, &qs)) {
1941			errinf(qstate, "could not fetch nameserver");
1942			errinf_dname(qstate, "at zone", iq->dp->name);
1943			return error_response(qstate, id, LDNS_RCODE_SERVFAIL);
1944		}
1945		iq->num_target_queries += qs;
1946		target_count_increase(iq, qs);
1947		if(qs != 0) {
1948			qstate->ext_state[id] = module_wait_subquery;
1949			return 0; /* and wait for them */
1950		}
1951	}
1952	if(iq->depth == ie->max_dependency_depth) {
1953		verbose(VERB_QUERY, "maxdepth and need more nameservers, fail");
1954		errinf(qstate, "cannot fetch more nameservers because at max dependency depth");
1955		return error_response_cache(qstate, id, LDNS_RCODE_SERVFAIL);
1956	}
1957	if(iq->depth > 0 && iq->target_count &&
1958		iq->target_count[1] > MAX_TARGET_COUNT) {
1959		char s[LDNS_MAX_DOMAINLEN+1];
1960		dname_str(qstate->qinfo.qname, s);
1961		verbose(VERB_QUERY, "request %s has exceeded the maximum "
1962			"number of glue fetches %d", s, iq->target_count[1]);
1963		errinf(qstate, "exceeded the maximum number of glue fetches");
1964		return error_response_cache(qstate, id, LDNS_RCODE_SERVFAIL);
1965	}
1966	/* mark cycle targets for parent-side lookups */
1967	iter_mark_pside_cycle_targets(qstate, iq->dp);
1968	/* see if we can issue queries to get nameserver addresses */
1969	/* this lookup is not randomized, but sequential. */
1970	for(ns = iq->dp->nslist; ns; ns = ns->next) {
1971		/* if this nameserver is at a delegation point, but that
1972		 * delegation point is a stub and we cannot go higher, skip*/
1973		if( ((ie->supports_ipv6 && !ns->done_pside6) ||
1974		    (ie->supports_ipv4 && !ns->done_pside4)) &&
1975		    !can_have_last_resort(qstate->env, ns->name, ns->namelen,
1976			iq->qchase.qclass, NULL)) {
1977			log_nametypeclass(VERB_ALGO, "cannot pside lookup ns "
1978				"because it is also a stub/forward,",
1979				ns->name, LDNS_RR_TYPE_NS, iq->qchase.qclass);
1980			if(ie->supports_ipv6) ns->done_pside6 = 1;
1981			if(ie->supports_ipv4) ns->done_pside4 = 1;
1982			continue;
1983		}
1984		/* query for parent-side A and AAAA for nameservers */
1985		if(ie->supports_ipv6 && !ns->done_pside6) {
1986			/* Send the AAAA request. */
1987			if(!generate_parentside_target_query(qstate, iq, id,
1988				ns->name, ns->namelen,
1989				LDNS_RR_TYPE_AAAA, iq->qchase.qclass)) {
1990				errinf_dname(qstate, "could not generate nameserver AAAA lookup for", ns->name);
1991				return error_response(qstate, id,
1992					LDNS_RCODE_SERVFAIL);
1993			}
1994			ns->done_pside6 = 1;
1995			query_count++;
1996		}
1997		if(ie->supports_ipv4 && !ns->done_pside4) {
1998			/* Send the A request. */
1999			if(!generate_parentside_target_query(qstate, iq, id,
2000				ns->name, ns->namelen,
2001				LDNS_RR_TYPE_A, iq->qchase.qclass)) {
2002				errinf_dname(qstate, "could not generate nameserver A lookup for", ns->name);
2003				return error_response(qstate, id,
2004					LDNS_RCODE_SERVFAIL);
2005			}
2006			ns->done_pside4 = 1;
2007			query_count++;
2008		}
2009		if(query_count != 0) { /* suspend to await results */
2010			verbose(VERB_ALGO, "try parent-side glue lookup");
2011			iq->num_target_queries += query_count;
2012			target_count_increase(iq, query_count);
2013			qstate->ext_state[id] = module_wait_subquery;
2014			return 0;
2015		}
2016	}
2017
2018	/* if this was a parent-side glue query itself, then store that
2019	 * failure in cache. */
2020	if(!qstate->no_cache_store && iq->query_for_pside_glue
2021		&& !iq->pside_glue)
2022			iter_store_parentside_neg(qstate->env, &qstate->qinfo,
2023				iq->deleg_msg?iq->deleg_msg->rep:
2024				(iq->response?iq->response->rep:NULL));
2025
2026	errinf(qstate, "all servers for this domain failed,");
2027	errinf_dname(qstate, "at zone", iq->dp->name);
2028	verbose(VERB_QUERY, "out of query targets -- returning SERVFAIL");
2029	/* fail -- no more targets, no more hope of targets, no hope
2030	 * of a response. */
2031	return error_response_cache(qstate, id, LDNS_RCODE_SERVFAIL);
2032}
2033
2034/**
2035 * Try to find the NS record set that will resolve a qtype DS query. Due
2036 * to grandparent/grandchild reasons we did not get a proper lookup right
2037 * away.  We need to create type NS queries until we get the right parent
2038 * for this lookup.  We remove labels from the query to find the right point.
2039 * If we end up at the old dp name, then there is no solution.
2040 *
2041 * @param qstate: query state.
2042 * @param iq: iterator query state.
2043 * @param id: module id.
2044 * @return true if the event requires more immediate processing, false if
2045 *         not. This is generally only true when forwarding the request to
2046 *         the final state (i.e., on answer).
2047 */
2048static int
2049processDSNSFind(struct module_qstate* qstate, struct iter_qstate* iq, int id)
2050{
2051	struct module_qstate* subq = NULL;
2052	verbose(VERB_ALGO, "processDSNSFind");
2053
2054	if(!iq->dsns_point) {
2055		/* initialize */
2056		iq->dsns_point = iq->qchase.qname;
2057		iq->dsns_point_len = iq->qchase.qname_len;
2058	}
2059	/* robustcheck for internal error: we are not underneath the dp */
2060	if(!dname_subdomain_c(iq->dsns_point, iq->dp->name)) {
2061		errinf_dname(qstate, "for DS query parent-child nameserver search the query is not under the zone", iq->dp->name);
2062		return error_response_cache(qstate, id, LDNS_RCODE_SERVFAIL);
2063	}
2064
2065	/* go up one (more) step, until we hit the dp, if so, end */
2066	dname_remove_label(&iq->dsns_point, &iq->dsns_point_len);
2067	if(query_dname_compare(iq->dsns_point, iq->dp->name) == 0) {
2068		/* there was no inbetween nameserver, use the old delegation
2069		 * point again.  And this time, because dsns_point is nonNULL
2070		 * we are going to accept the (bad) result */
2071		iq->state = QUERYTARGETS_STATE;
2072		return 1;
2073	}
2074	iq->state = DSNS_FIND_STATE;
2075
2076	/* spawn NS lookup (validation not needed, this is for DS lookup) */
2077	log_nametypeclass(VERB_ALGO, "fetch nameservers",
2078		iq->dsns_point, LDNS_RR_TYPE_NS, iq->qchase.qclass);
2079	if(!generate_sub_request(iq->dsns_point, iq->dsns_point_len,
2080		LDNS_RR_TYPE_NS, iq->qchase.qclass, qstate, id, iq,
2081		INIT_REQUEST_STATE, FINISHED_STATE, &subq, 0)) {
2082		errinf_dname(qstate, "for DS query parent-child nameserver search, could not generate NS lookup for", iq->dsns_point);
2083		return error_response_cache(qstate, id, LDNS_RCODE_SERVFAIL);
2084	}
2085
2086	return 0;
2087}
2088
2089/**
2090 * This is the request event state where the request will be sent to one of
2091 * its current query targets. This state also handles issuing target lookup
2092 * queries for missing target IP addresses. Queries typically iterate on
2093 * this state, both when they are just trying different targets for a given
2094 * delegation point, and when they change delegation points. This state
2095 * roughly corresponds to RFC 1034 algorithm steps 3 and 4.
2096 *
2097 * @param qstate: query state.
2098 * @param iq: iterator query state.
2099 * @param ie: iterator shared global environment.
2100 * @param id: module id.
2101 * @return true if the event requires more request processing immediately,
2102 *         false if not. This state only returns true when it is generating
2103 *         a SERVFAIL response because the query has hit a dead end.
2104 */
2105static int
2106processQueryTargets(struct module_qstate* qstate, struct iter_qstate* iq,
2107	struct iter_env* ie, int id)
2108{
2109	int tf_policy;
2110	struct delegpt_addr* target;
2111	struct outbound_entry* outq;
2112	int auth_fallback = 0;
2113	uint8_t* qout_orig = NULL;
2114	size_t qout_orig_len = 0;
2115
2116	/* NOTE: a request will encounter this state for each target it
2117	 * needs to send a query to. That is, at least one per referral,
2118	 * more if some targets timeout or return throwaway answers. */
2119
2120	log_query_info(VERB_QUERY, "processQueryTargets:", &qstate->qinfo);
2121	verbose(VERB_ALGO, "processQueryTargets: targetqueries %d, "
2122		"currentqueries %d sentcount %d", iq->num_target_queries,
2123		iq->num_current_queries, iq->sent_count);
2124
2125	/* Make sure that we haven't run away */
2126	/* FIXME: is this check even necessary? */
2127	if(iq->referral_count > MAX_REFERRAL_COUNT) {
2128		verbose(VERB_QUERY, "request has exceeded the maximum "
2129			"number of referrrals with %d", iq->referral_count);
2130		errinf(qstate, "exceeded the maximum of referrals");
2131		return error_response(qstate, id, LDNS_RCODE_SERVFAIL);
2132	}
2133	if(iq->sent_count > MAX_SENT_COUNT) {
2134		verbose(VERB_QUERY, "request has exceeded the maximum "
2135			"number of sends with %d", iq->sent_count);
2136		errinf(qstate, "exceeded the maximum number of sends");
2137		return error_response(qstate, id, LDNS_RCODE_SERVFAIL);
2138	}
2139
2140	/* Make sure we have a delegation point, otherwise priming failed
2141	 * or another failure occurred */
2142	if(!iq->dp) {
2143		verbose(VERB_QUERY, "Failed to get a delegation, giving up");
2144		errinf(qstate, "failed to get a delegation (eg. prime failure)");
2145		return error_response(qstate, id, LDNS_RCODE_SERVFAIL);
2146	}
2147	if(!ie->supports_ipv6)
2148		delegpt_no_ipv6(iq->dp);
2149	if(!ie->supports_ipv4)
2150		delegpt_no_ipv4(iq->dp);
2151	delegpt_log(VERB_ALGO, iq->dp);
2152
2153	if(iq->num_current_queries>0) {
2154		/* already busy answering a query, this restart is because
2155		 * more delegpt addrs became available, wait for existing
2156		 * query. */
2157		verbose(VERB_ALGO, "woke up, but wait for outstanding query");
2158		qstate->ext_state[id] = module_wait_reply;
2159		return 0;
2160	}
2161
2162	if(iq->minimisation_state == INIT_MINIMISE_STATE
2163		&& !(iq->chase_flags & BIT_RD)) {
2164		/* (Re)set qinfo_out to (new) delegation point, except when
2165		 * qinfo_out is already a subdomain of dp. This happens when
2166		 * increasing by more than one label at once (QNAMEs with more
2167		 * than MAX_MINIMISE_COUNT labels). */
2168		if(!(iq->qinfo_out.qname_len
2169			&& dname_subdomain_c(iq->qchase.qname,
2170				iq->qinfo_out.qname)
2171			&& dname_subdomain_c(iq->qinfo_out.qname,
2172				iq->dp->name))) {
2173			iq->qinfo_out.qname = iq->dp->name;
2174			iq->qinfo_out.qname_len = iq->dp->namelen;
2175			iq->qinfo_out.qtype = LDNS_RR_TYPE_A;
2176			iq->qinfo_out.qclass = iq->qchase.qclass;
2177			iq->qinfo_out.local_alias = NULL;
2178			iq->minimise_count = 0;
2179		}
2180
2181		iq->minimisation_state = MINIMISE_STATE;
2182	}
2183	if(iq->minimisation_state == MINIMISE_STATE) {
2184		int qchaselabs = dname_count_labels(iq->qchase.qname);
2185		int labdiff = qchaselabs -
2186			dname_count_labels(iq->qinfo_out.qname);
2187
2188		qout_orig = iq->qinfo_out.qname;
2189		qout_orig_len = iq->qinfo_out.qname_len;
2190		iq->qinfo_out.qname = iq->qchase.qname;
2191		iq->qinfo_out.qname_len = iq->qchase.qname_len;
2192		iq->minimise_count++;
2193		iq->minimise_timeout_count = 0;
2194
2195		iter_dec_attempts(iq->dp, 1);
2196
2197		/* Limit number of iterations for QNAMEs with more
2198		 * than MAX_MINIMISE_COUNT labels. Send first MINIMISE_ONE_LAB
2199		 * labels of QNAME always individually.
2200		 */
2201		if(qchaselabs > MAX_MINIMISE_COUNT && labdiff > 1 &&
2202			iq->minimise_count > MINIMISE_ONE_LAB) {
2203			if(iq->minimise_count < MAX_MINIMISE_COUNT) {
2204				int multilabs = qchaselabs - 1 -
2205					MINIMISE_ONE_LAB;
2206				int extralabs = multilabs /
2207					MINIMISE_MULTIPLE_LABS;
2208
2209				if (MAX_MINIMISE_COUNT - iq->minimise_count >=
2210					multilabs % MINIMISE_MULTIPLE_LABS)
2211					/* Default behaviour is to add 1 label
2212					 * every iteration. Therefore, decrement
2213					 * the extralabs by 1 */
2214					extralabs--;
2215				if (extralabs < labdiff)
2216					labdiff -= extralabs;
2217				else
2218					labdiff = 1;
2219			}
2220			/* Last minimised iteration, send all labels with
2221			 * QTYPE=NS */
2222			else
2223				labdiff = 1;
2224		}
2225
2226		if(labdiff > 1) {
2227			verbose(VERB_QUERY, "removing %d labels", labdiff-1);
2228			dname_remove_labels(&iq->qinfo_out.qname,
2229				&iq->qinfo_out.qname_len,
2230				labdiff-1);
2231		}
2232		if(labdiff < 1 || (labdiff < 2
2233			&& (iq->qchase.qtype == LDNS_RR_TYPE_DS
2234			|| iq->qchase.qtype == LDNS_RR_TYPE_A)))
2235			/* Stop minimising this query, resolve "as usual" */
2236			iq->minimisation_state = DONOT_MINIMISE_STATE;
2237		else if(!qstate->no_cache_lookup) {
2238			struct dns_msg* msg = dns_cache_lookup(qstate->env,
2239				iq->qinfo_out.qname, iq->qinfo_out.qname_len,
2240				iq->qinfo_out.qtype, iq->qinfo_out.qclass,
2241				qstate->query_flags, qstate->region,
2242				qstate->env->scratch, 0);
2243			if(msg && msg->rep->an_numrrsets == 0
2244				&& FLAGS_GET_RCODE(msg->rep->flags) ==
2245				LDNS_RCODE_NOERROR)
2246				/* no need to send query if it is already
2247				 * cached as NOERROR/NODATA */
2248				return 1;
2249		}
2250	}
2251	if(iq->minimisation_state == SKIP_MINIMISE_STATE) {
2252		if(iq->minimise_timeout_count < MAX_MINIMISE_TIMEOUT_COUNT)
2253			/* Do not increment qname, continue incrementing next
2254			 * iteration */
2255			iq->minimisation_state = MINIMISE_STATE;
2256		else if(!qstate->env->cfg->qname_minimisation_strict)
2257			/* Too many time-outs detected for this QNAME and QTYPE.
2258			 * We give up, disable QNAME minimisation. */
2259			iq->minimisation_state = DONOT_MINIMISE_STATE;
2260	}
2261	if(iq->minimisation_state == DONOT_MINIMISE_STATE)
2262		iq->qinfo_out = iq->qchase;
2263
2264	/* now find an answer to this query */
2265	/* see if authority zones have an answer */
2266	/* now we know the dp, we can check the auth zone for locally hosted
2267	 * contents */
2268	if(!iq->auth_zone_avoid && qstate->blacklist) {
2269		if(auth_zones_can_fallback(qstate->env->auth_zones,
2270			iq->dp->name, iq->dp->namelen, iq->qinfo_out.qclass)) {
2271			/* if cache is blacklisted and this zone allows us
2272			 * to fallback to the internet, then do so, and
2273			 * fetch results from the internet servers */
2274			iq->auth_zone_avoid = 1;
2275		}
2276	}
2277	if(iq->auth_zone_avoid) {
2278		iq->auth_zone_avoid = 0;
2279		auth_fallback = 1;
2280	} else if(auth_zones_lookup(qstate->env->auth_zones, &iq->qinfo_out,
2281		qstate->region, &iq->response, &auth_fallback, iq->dp->name,
2282		iq->dp->namelen)) {
2283		/* use this as a response to be processed by the iterator */
2284		if(verbosity >= VERB_ALGO) {
2285			log_dns_msg("msg from auth zone",
2286				&iq->response->qinfo, iq->response->rep);
2287		}
2288		if((iq->chase_flags&BIT_RD) && !(iq->response->rep->flags&BIT_AA)) {
2289			verbose(VERB_ALGO, "forwarder, ignoring referral from auth zone");
2290		} else {
2291			lock_rw_wrlock(&qstate->env->auth_zones->lock);
2292			qstate->env->auth_zones->num_query_up++;
2293			lock_rw_unlock(&qstate->env->auth_zones->lock);
2294			iq->num_current_queries++;
2295			iq->chase_to_rd = 0;
2296			iq->dnssec_lame_query = 0;
2297			iq->auth_zone_response = 1;
2298			return next_state(iq, QUERY_RESP_STATE);
2299		}
2300	}
2301	iq->auth_zone_response = 0;
2302	if(auth_fallback == 0) {
2303		/* like we got servfail from the auth zone lookup, and
2304		 * no internet fallback */
2305		verbose(VERB_ALGO, "auth zone lookup failed, no fallback,"
2306			" servfail");
2307		errinf(qstate, "auth zone lookup failed, fallback is off");
2308		return error_response(qstate, id, LDNS_RCODE_SERVFAIL);
2309	}
2310	if(iq->dp->auth_dp) {
2311		/* we wanted to fallback, but had no delegpt, only the
2312		 * auth zone generated delegpt, create an actual one */
2313		iq->auth_zone_avoid = 1;
2314		return next_state(iq, INIT_REQUEST_STATE);
2315	}
2316	/* but mostly, fallback==1 (like, when no such auth zone exists)
2317	 * and we continue with lookups */
2318
2319	tf_policy = 0;
2320	/* < not <=, because although the array is large enough for <=, the
2321	 * generated query will immediately be discarded due to depth and
2322	 * that servfail is cached, which is not good as opportunism goes. */
2323	if(iq->depth < ie->max_dependency_depth
2324		&& iq->sent_count < TARGET_FETCH_STOP) {
2325		tf_policy = ie->target_fetch_policy[iq->depth];
2326	}
2327
2328	/* if in 0x20 fallback get as many targets as possible */
2329	if(iq->caps_fallback) {
2330		int extra = 0;
2331		size_t naddr, nres, navail;
2332		if(!query_for_targets(qstate, iq, ie, id, -1, &extra)) {
2333			errinf(qstate, "could not fetch nameservers for 0x20 fallback");
2334			return error_response(qstate, id, LDNS_RCODE_SERVFAIL);
2335		}
2336		iq->num_target_queries += extra;
2337		target_count_increase(iq, extra);
2338		if(iq->num_target_queries > 0) {
2339			/* wait to get all targets, we want to try em */
2340			verbose(VERB_ALGO, "wait for all targets for fallback");
2341			qstate->ext_state[id] = module_wait_reply;
2342			/* undo qname minimise step because we'll get back here
2343			 * to do it again */
2344			if(qout_orig && iq->minimise_count > 0) {
2345				iq->minimise_count--;
2346				iq->qinfo_out.qname = qout_orig;
2347				iq->qinfo_out.qname_len = qout_orig_len;
2348			}
2349			return 0;
2350		}
2351		/* did we do enough fallback queries already? */
2352		delegpt_count_addr(iq->dp, &naddr, &nres, &navail);
2353		/* the current caps_server is the number of fallbacks sent.
2354		 * the original query is one that matched too, so we have
2355		 * caps_server+1 number of matching queries now */
2356		if(iq->caps_server+1 >= naddr*3 ||
2357			iq->caps_server*2+2 >= MAX_SENT_COUNT) {
2358			/* *2 on sentcount check because ipv6 may fail */
2359			/* we're done, process the response */
2360			verbose(VERB_ALGO, "0x20 fallback had %d responses "
2361				"match for %d wanted, done.",
2362				(int)iq->caps_server+1, (int)naddr*3);
2363			iq->response = iq->caps_response;
2364			iq->caps_fallback = 0;
2365			iter_dec_attempts(iq->dp, 3); /* space for fallback */
2366			iq->num_current_queries++; /* RespState decrements it*/
2367			iq->referral_count++; /* make sure we don't loop */
2368			iq->sent_count = 0;
2369			iq->state = QUERY_RESP_STATE;
2370			return 1;
2371		}
2372		verbose(VERB_ALGO, "0x20 fallback number %d",
2373			(int)iq->caps_server);
2374
2375	/* if there is a policy to fetch missing targets
2376	 * opportunistically, do it. we rely on the fact that once a
2377	 * query (or queries) for a missing name have been issued,
2378	 * they will not show up again. */
2379	} else if(tf_policy != 0) {
2380		int extra = 0;
2381		verbose(VERB_ALGO, "attempt to get extra %d targets",
2382			tf_policy);
2383		(void)query_for_targets(qstate, iq, ie, id, tf_policy, &extra);
2384		/* errors ignored, these targets are not strictly necessary for
2385		 * this result, we do not have to reply with SERVFAIL */
2386		iq->num_target_queries += extra;
2387		target_count_increase(iq, extra);
2388	}
2389
2390	/* Add the current set of unused targets to our queue. */
2391	delegpt_add_unused_targets(iq->dp);
2392
2393	/* Select the next usable target, filtering out unsuitable targets. */
2394	target = iter_server_selection(ie, qstate->env, iq->dp,
2395		iq->dp->name, iq->dp->namelen, iq->qchase.qtype,
2396		&iq->dnssec_lame_query, &iq->chase_to_rd,
2397		iq->num_target_queries, qstate->blacklist,
2398		qstate->prefetch_leeway);
2399
2400	/* If no usable target was selected... */
2401	if(!target) {
2402		/* Here we distinguish between three states: generate a new
2403		 * target query, just wait, or quit (with a SERVFAIL).
2404		 * We have the following information: number of active
2405		 * target queries, number of active current queries,
2406		 * the presence of missing targets at this delegation
2407		 * point, and the given query target policy. */
2408
2409		/* Check for the wait condition. If this is true, then
2410		 * an action must be taken. */
2411		if(iq->num_target_queries==0 && iq->num_current_queries==0) {
2412			/* If there is nothing to wait for, then we need
2413			 * to distinguish between generating (a) new target
2414			 * query, or failing. */
2415			if(delegpt_count_missing_targets(iq->dp) > 0) {
2416				int qs = 0;
2417				verbose(VERB_ALGO, "querying for next "
2418					"missing target");
2419				if(!query_for_targets(qstate, iq, ie, id,
2420					1, &qs)) {
2421					errinf(qstate, "could not fetch nameserver");
2422					errinf_dname(qstate, "at zone", iq->dp->name);
2423					return error_response(qstate, id,
2424						LDNS_RCODE_SERVFAIL);
2425				}
2426				if(qs == 0 &&
2427				   delegpt_count_missing_targets(iq->dp) == 0){
2428					/* it looked like there were missing
2429					 * targets, but they did not turn up.
2430					 * Try the bad choices again (if any),
2431					 * when we get back here missing==0,
2432					 * so this is not a loop. */
2433					return 1;
2434				}
2435				iq->num_target_queries += qs;
2436				target_count_increase(iq, qs);
2437			}
2438			/* Since a target query might have been made, we
2439			 * need to check again. */
2440			if(iq->num_target_queries == 0) {
2441				/* if in capsforid fallback, instead of last
2442				 * resort, we agree with the current reply
2443				 * we have (if any) (our count of addrs bad)*/
2444				if(iq->caps_fallback && iq->caps_reply) {
2445					/* we're done, process the response */
2446					verbose(VERB_ALGO, "0x20 fallback had %d responses, "
2447						"but no more servers except "
2448						"last resort, done.",
2449						(int)iq->caps_server+1);
2450					iq->response = iq->caps_response;
2451					iq->caps_fallback = 0;
2452					iter_dec_attempts(iq->dp, 3); /* space for fallback */
2453					iq->num_current_queries++; /* RespState decrements it*/
2454					iq->referral_count++; /* make sure we don't loop */
2455					iq->sent_count = 0;
2456					iq->state = QUERY_RESP_STATE;
2457					return 1;
2458				}
2459				return processLastResort(qstate, iq, ie, id);
2460			}
2461		}
2462
2463		/* otherwise, we have no current targets, so submerge
2464		 * until one of the target or direct queries return. */
2465		if(iq->num_target_queries>0 && iq->num_current_queries>0) {
2466			verbose(VERB_ALGO, "no current targets -- waiting "
2467				"for %d targets to resolve or %d outstanding"
2468				" queries to respond", iq->num_target_queries,
2469				iq->num_current_queries);
2470			qstate->ext_state[id] = module_wait_reply;
2471		} else if(iq->num_target_queries>0) {
2472			verbose(VERB_ALGO, "no current targets -- waiting "
2473				"for %d targets to resolve.",
2474				iq->num_target_queries);
2475			qstate->ext_state[id] = module_wait_subquery;
2476		} else {
2477			verbose(VERB_ALGO, "no current targets -- waiting "
2478				"for %d outstanding queries to respond.",
2479				iq->num_current_queries);
2480			qstate->ext_state[id] = module_wait_reply;
2481		}
2482		/* undo qname minimise step because we'll get back here
2483		 * to do it again */
2484		if(qout_orig && iq->minimise_count > 0) {
2485			iq->minimise_count--;
2486			iq->qinfo_out.qname = qout_orig;
2487			iq->qinfo_out.qname_len = qout_orig_len;
2488		}
2489		return 0;
2490	}
2491
2492	/* if not forwarding, check ratelimits per delegationpoint name */
2493	if(!(iq->chase_flags & BIT_RD) && !iq->ratelimit_ok) {
2494		if(!infra_ratelimit_inc(qstate->env->infra_cache, iq->dp->name,
2495			iq->dp->namelen, *qstate->env->now, &qstate->qinfo,
2496			qstate->reply)) {
2497			lock_basic_lock(&ie->queries_ratelimit_lock);
2498			ie->num_queries_ratelimited++;
2499			lock_basic_unlock(&ie->queries_ratelimit_lock);
2500			verbose(VERB_ALGO, "query exceeded ratelimits");
2501			qstate->was_ratelimited = 1;
2502			errinf_dname(qstate, "exceeded ratelimit for zone",
2503				iq->dp->name);
2504			return error_response(qstate, id, LDNS_RCODE_SERVFAIL);
2505		}
2506	}
2507
2508	/* We have a valid target. */
2509	if(verbosity >= VERB_QUERY) {
2510		log_query_info(VERB_QUERY, "sending query:", &iq->qinfo_out);
2511		log_name_addr(VERB_QUERY, "sending to target:", iq->dp->name,
2512			&target->addr, target->addrlen);
2513		verbose(VERB_ALGO, "dnssec status: %s%s",
2514			iq->dnssec_expected?"expected": "not expected",
2515			iq->dnssec_lame_query?" but lame_query anyway": "");
2516	}
2517	fptr_ok(fptr_whitelist_modenv_send_query(qstate->env->send_query));
2518	outq = (*qstate->env->send_query)(&iq->qinfo_out,
2519		iq->chase_flags | (iq->chase_to_rd?BIT_RD:0),
2520		/* unset CD if to forwarder(RD set) and not dnssec retry
2521		 * (blacklist nonempty) and no trust-anchors are configured
2522		 * above the qname or on the first attempt when dnssec is on */
2523		EDNS_DO| ((iq->chase_to_rd||(iq->chase_flags&BIT_RD)!=0)&&
2524		!qstate->blacklist&&(!iter_qname_indicates_dnssec(qstate->env,
2525		&iq->qinfo_out)||target->attempts==1)?0:BIT_CD),
2526		iq->dnssec_expected, iq->caps_fallback || is_caps_whitelisted(
2527		ie, iq), &target->addr, target->addrlen,
2528		iq->dp->name, iq->dp->namelen,
2529		(iq->dp->ssl_upstream || qstate->env->cfg->ssl_upstream),
2530		target->tls_auth_name, qstate);
2531	if(!outq) {
2532		log_addr(VERB_DETAIL, "error sending query to auth server",
2533			&target->addr, target->addrlen);
2534		if(!(iq->chase_flags & BIT_RD) && !iq->ratelimit_ok)
2535		    infra_ratelimit_dec(qstate->env->infra_cache, iq->dp->name,
2536			iq->dp->namelen, *qstate->env->now);
2537		if(qstate->env->cfg->qname_minimisation)
2538			iq->minimisation_state = SKIP_MINIMISE_STATE;
2539		return next_state(iq, QUERYTARGETS_STATE);
2540	}
2541	outbound_list_insert(&iq->outlist, outq);
2542	iq->num_current_queries++;
2543	iq->sent_count++;
2544	qstate->ext_state[id] = module_wait_reply;
2545
2546	return 0;
2547}
2548
2549/** find NS rrset in given list */
2550static struct ub_packed_rrset_key*
2551find_NS(struct reply_info* rep, size_t from, size_t to)
2552{
2553	size_t i;
2554	for(i=from; i<to; i++) {
2555		if(ntohs(rep->rrsets[i]->rk.type) == LDNS_RR_TYPE_NS)
2556			return rep->rrsets[i];
2557	}
2558	return NULL;
2559}
2560
2561
2562/**
2563 * Process the query response. All queries end up at this state first. This
2564 * process generally consists of analyzing the response and routing the
2565 * event to the next state (either bouncing it back to a request state, or
2566 * terminating the processing for this event).
2567 *
2568 * @param qstate: query state.
2569 * @param iq: iterator query state.
2570 * @param id: module id.
2571 * @return true if the event requires more immediate processing, false if
2572 *         not. This is generally only true when forwarding the request to
2573 *         the final state (i.e., on answer).
2574 */
2575static int
2576processQueryResponse(struct module_qstate* qstate, struct iter_qstate* iq,
2577	int id)
2578{
2579	int dnsseclame = 0;
2580	enum response_type type;
2581	iq->num_current_queries--;
2582
2583	if(!inplace_cb_query_response_call(qstate->env, qstate, iq->response))
2584		log_err("unable to call query_response callback");
2585
2586	if(iq->response == NULL) {
2587		/* Don't increment qname when QNAME minimisation is enabled */
2588		if(qstate->env->cfg->qname_minimisation) {
2589			iq->minimise_timeout_count++;
2590			iq->minimisation_state = SKIP_MINIMISE_STATE;
2591		}
2592		iq->chase_to_rd = 0;
2593		iq->dnssec_lame_query = 0;
2594		verbose(VERB_ALGO, "query response was timeout");
2595		return next_state(iq, QUERYTARGETS_STATE);
2596	}
2597	type = response_type_from_server(
2598		(int)((iq->chase_flags&BIT_RD) || iq->chase_to_rd),
2599		iq->response, &iq->qinfo_out, iq->dp);
2600	iq->chase_to_rd = 0;
2601	if(type == RESPONSE_TYPE_REFERRAL && (iq->chase_flags&BIT_RD) &&
2602		!iq->auth_zone_response) {
2603		/* When forwarding (RD bit is set), we handle referrals
2604		 * differently. No queries should be sent elsewhere */
2605		type = RESPONSE_TYPE_ANSWER;
2606	}
2607	if(!qstate->env->cfg->disable_dnssec_lame_check && iq->dnssec_expected
2608                && !iq->dnssec_lame_query &&
2609		!(iq->chase_flags&BIT_RD)
2610		&& iq->sent_count < DNSSEC_LAME_DETECT_COUNT
2611		&& type != RESPONSE_TYPE_LAME
2612		&& type != RESPONSE_TYPE_REC_LAME
2613		&& type != RESPONSE_TYPE_THROWAWAY
2614		&& type != RESPONSE_TYPE_UNTYPED) {
2615		/* a possible answer, see if it is missing DNSSEC */
2616		/* but not when forwarding, so we dont mark fwder lame */
2617		if(!iter_msg_has_dnssec(iq->response)) {
2618			/* Mark this address as dnsseclame in this dp,
2619			 * because that will make serverselection disprefer
2620			 * it, but also, once it is the only final option,
2621			 * use dnssec-lame-bypass if it needs to query there.*/
2622			if(qstate->reply) {
2623				struct delegpt_addr* a = delegpt_find_addr(
2624					iq->dp, &qstate->reply->addr,
2625					qstate->reply->addrlen);
2626				if(a) a->dnsseclame = 1;
2627			}
2628			/* test the answer is from the zone we expected,
2629		 	 * otherwise, (due to parent,child on same server), we
2630		 	 * might mark the server,zone lame inappropriately */
2631			if(!iter_msg_from_zone(iq->response, iq->dp, type,
2632				iq->qchase.qclass))
2633				qstate->reply = NULL;
2634			type = RESPONSE_TYPE_LAME;
2635			dnsseclame = 1;
2636		}
2637	} else iq->dnssec_lame_query = 0;
2638	/* see if referral brings us close to the target */
2639	if(type == RESPONSE_TYPE_REFERRAL) {
2640		struct ub_packed_rrset_key* ns = find_NS(
2641			iq->response->rep, iq->response->rep->an_numrrsets,
2642			iq->response->rep->an_numrrsets
2643			+ iq->response->rep->ns_numrrsets);
2644		if(!ns) ns = find_NS(iq->response->rep, 0,
2645				iq->response->rep->an_numrrsets);
2646		if(!ns || !dname_strict_subdomain_c(ns->rk.dname, iq->dp->name)
2647			|| !dname_subdomain_c(iq->qchase.qname, ns->rk.dname)){
2648			verbose(VERB_ALGO, "bad referral, throwaway");
2649			type = RESPONSE_TYPE_THROWAWAY;
2650		} else
2651			iter_scrub_ds(iq->response, ns, iq->dp->name);
2652	} else iter_scrub_ds(iq->response, NULL, NULL);
2653	if(type == RESPONSE_TYPE_THROWAWAY &&
2654		FLAGS_GET_RCODE(iq->response->rep->flags) == LDNS_RCODE_YXDOMAIN) {
2655		/* YXDOMAIN is a permanent error, no need to retry */
2656		type = RESPONSE_TYPE_ANSWER;
2657	}
2658	if(type == RESPONSE_TYPE_CNAME && iq->response->rep->an_numrrsets >= 1
2659		&& ntohs(iq->response->rep->rrsets[0]->rk.type) == LDNS_RR_TYPE_DNAME) {
2660		uint8_t* sname = NULL;
2661		size_t snamelen = 0;
2662		get_cname_target(iq->response->rep->rrsets[0], &sname,
2663			&snamelen);
2664		if(snamelen && dname_subdomain_c(sname, iq->response->rep->rrsets[0]->rk.dname)) {
2665			/* DNAME to a subdomain loop; do not recurse */
2666			type = RESPONSE_TYPE_ANSWER;
2667		}
2668	} else if(type == RESPONSE_TYPE_CNAME &&
2669		iq->qchase.qtype == LDNS_RR_TYPE_CNAME &&
2670		iq->minimisation_state == MINIMISE_STATE &&
2671		query_dname_compare(iq->qchase.qname, iq->qinfo_out.qname) == 0) {
2672		/* The minimised query for full QTYPE and hidden QTYPE can be
2673		 * classified as CNAME response type, even when the original
2674		 * QTYPE=CNAME. This should be treated as answer response type.
2675		 */
2676		type = RESPONSE_TYPE_ANSWER;
2677	}
2678
2679	/* handle each of the type cases */
2680	if(type == RESPONSE_TYPE_ANSWER) {
2681		/* ANSWER type responses terminate the query algorithm,
2682		 * so they sent on their */
2683		if(verbosity >= VERB_DETAIL) {
2684			verbose(VERB_DETAIL, "query response was %s",
2685				FLAGS_GET_RCODE(iq->response->rep->flags)
2686				==LDNS_RCODE_NXDOMAIN?"NXDOMAIN ANSWER":
2687				(iq->response->rep->an_numrrsets?"ANSWER":
2688				"nodata ANSWER"));
2689		}
2690		/* if qtype is DS, check we have the right level of answer,
2691		 * like grandchild answer but we need the middle, reject it */
2692		if(iq->qchase.qtype == LDNS_RR_TYPE_DS && !iq->dsns_point
2693			&& !(iq->chase_flags&BIT_RD)
2694			&& iter_ds_toolow(iq->response, iq->dp)
2695			&& iter_dp_cangodown(&iq->qchase, iq->dp)) {
2696			/* close down outstanding requests to be discarded */
2697			outbound_list_clear(&iq->outlist);
2698			iq->num_current_queries = 0;
2699			fptr_ok(fptr_whitelist_modenv_detach_subs(
2700				qstate->env->detach_subs));
2701			(*qstate->env->detach_subs)(qstate);
2702			iq->num_target_queries = 0;
2703			return processDSNSFind(qstate, iq, id);
2704		}
2705		if(!qstate->no_cache_store)
2706			iter_dns_store(qstate->env, &iq->response->qinfo,
2707				iq->response->rep, 0, qstate->prefetch_leeway,
2708				iq->dp&&iq->dp->has_parent_side_NS,
2709				qstate->region, qstate->query_flags);
2710		/* close down outstanding requests to be discarded */
2711		outbound_list_clear(&iq->outlist);
2712		iq->num_current_queries = 0;
2713		fptr_ok(fptr_whitelist_modenv_detach_subs(
2714			qstate->env->detach_subs));
2715		(*qstate->env->detach_subs)(qstate);
2716		iq->num_target_queries = 0;
2717		if(qstate->reply)
2718			sock_list_insert(&qstate->reply_origin,
2719				&qstate->reply->addr, qstate->reply->addrlen,
2720				qstate->region);
2721		if(iq->minimisation_state != DONOT_MINIMISE_STATE
2722			&& !(iq->chase_flags & BIT_RD)) {
2723			if(FLAGS_GET_RCODE(iq->response->rep->flags) !=
2724				LDNS_RCODE_NOERROR) {
2725				if(qstate->env->cfg->qname_minimisation_strict) {
2726					if(FLAGS_GET_RCODE(iq->response->rep->flags) ==
2727						LDNS_RCODE_NXDOMAIN) {
2728						iter_scrub_nxdomain(iq->response);
2729						return final_state(iq);
2730					}
2731					return error_response(qstate, id,
2732						LDNS_RCODE_SERVFAIL);
2733				}
2734				/* Best effort qname-minimisation.
2735				 * Stop minimising and send full query when
2736				 * RCODE is not NOERROR. */
2737				iq->minimisation_state = DONOT_MINIMISE_STATE;
2738			}
2739			if(FLAGS_GET_RCODE(iq->response->rep->flags) ==
2740				LDNS_RCODE_NXDOMAIN) {
2741				/* Stop resolving when NXDOMAIN is DNSSEC
2742				 * signed. Based on assumption that nameservers
2743				 * serving signed zones do not return NXDOMAIN
2744				 * for empty-non-terminals. */
2745				if(iq->dnssec_expected)
2746					return final_state(iq);
2747				/* Make subrequest to validate intermediate
2748				 * NXDOMAIN if harden-below-nxdomain is
2749				 * enabled. */
2750				if(qstate->env->cfg->harden_below_nxdomain) {
2751					struct module_qstate* subq = NULL;
2752					log_query_info(VERB_QUERY,
2753						"schedule NXDOMAIN validation:",
2754						&iq->response->qinfo);
2755					if(!generate_sub_request(
2756						iq->response->qinfo.qname,
2757						iq->response->qinfo.qname_len,
2758						iq->response->qinfo.qtype,
2759						iq->response->qinfo.qclass,
2760						qstate, id, iq,
2761						INIT_REQUEST_STATE,
2762						FINISHED_STATE, &subq, 1))
2763						verbose(VERB_ALGO,
2764						"could not validate NXDOMAIN "
2765						"response");
2766					outbound_list_clear(&iq->outlist);
2767					iq->num_current_queries = 0;
2768					fptr_ok(fptr_whitelist_modenv_detach_subs(
2769						qstate->env->detach_subs));
2770					(*qstate->env->detach_subs)(qstate);
2771					iq->num_target_queries = 0;
2772				}
2773			}
2774			return next_state(iq, QUERYTARGETS_STATE);
2775		}
2776		return final_state(iq);
2777	} else if(type == RESPONSE_TYPE_REFERRAL) {
2778		/* REFERRAL type responses get a reset of the
2779		 * delegation point, and back to the QUERYTARGETS_STATE. */
2780		verbose(VERB_DETAIL, "query response was REFERRAL");
2781
2782		if(!(iq->chase_flags & BIT_RD) && !iq->ratelimit_ok) {
2783			/* we have a referral, no ratelimit, we can send
2784			 * our queries to the given name */
2785			infra_ratelimit_dec(qstate->env->infra_cache,
2786				iq->dp->name, iq->dp->namelen,
2787				*qstate->env->now);
2788		}
2789
2790		/* if hardened, only store referral if we asked for it */
2791		if(!qstate->no_cache_store &&
2792		(!qstate->env->cfg->harden_referral_path ||
2793		    (  qstate->qinfo.qtype == LDNS_RR_TYPE_NS
2794			&& (qstate->query_flags&BIT_RD)
2795			&& !(qstate->query_flags&BIT_CD)
2796			   /* we know that all other NS rrsets are scrubbed
2797			    * away, thus on referral only one is left.
2798			    * see if that equals the query name... */
2799			&& ( /* auth section, but sometimes in answer section*/
2800			  reply_find_rrset_section_ns(iq->response->rep,
2801				iq->qchase.qname, iq->qchase.qname_len,
2802				LDNS_RR_TYPE_NS, iq->qchase.qclass)
2803			  || reply_find_rrset_section_an(iq->response->rep,
2804				iq->qchase.qname, iq->qchase.qname_len,
2805				LDNS_RR_TYPE_NS, iq->qchase.qclass)
2806			  )
2807		    ))) {
2808			/* Store the referral under the current query */
2809			/* no prefetch-leeway, since its not the answer */
2810			iter_dns_store(qstate->env, &iq->response->qinfo,
2811				iq->response->rep, 1, 0, 0, NULL, 0);
2812			if(iq->store_parent_NS)
2813				iter_store_parentside_NS(qstate->env,
2814					iq->response->rep);
2815			if(qstate->env->neg_cache)
2816				val_neg_addreferral(qstate->env->neg_cache,
2817					iq->response->rep, iq->dp->name);
2818		}
2819		/* store parent-side-in-zone-glue, if directly queried for */
2820		if(!qstate->no_cache_store && iq->query_for_pside_glue
2821			&& !iq->pside_glue) {
2822				iq->pside_glue = reply_find_rrset(iq->response->rep,
2823					iq->qchase.qname, iq->qchase.qname_len,
2824					iq->qchase.qtype, iq->qchase.qclass);
2825				if(iq->pside_glue) {
2826					log_rrset_key(VERB_ALGO, "found parent-side "
2827						"glue", iq->pside_glue);
2828					iter_store_parentside_rrset(qstate->env,
2829						iq->pside_glue);
2830				}
2831		}
2832
2833		/* Reset the event state, setting the current delegation
2834		 * point to the referral. */
2835		iq->deleg_msg = iq->response;
2836		iq->dp = delegpt_from_message(iq->response, qstate->region);
2837		if (qstate->env->cfg->qname_minimisation)
2838			iq->minimisation_state = INIT_MINIMISE_STATE;
2839		if(!iq->dp) {
2840			errinf(qstate, "malloc failure, for delegation point");
2841			return error_response(qstate, id, LDNS_RCODE_SERVFAIL);
2842		}
2843		if(!cache_fill_missing(qstate->env, iq->qchase.qclass,
2844			qstate->region, iq->dp)) {
2845			errinf(qstate, "malloc failure, copy extra info into delegation point");
2846			return error_response(qstate, id, LDNS_RCODE_SERVFAIL);
2847		}
2848		if(iq->store_parent_NS && query_dname_compare(iq->dp->name,
2849			iq->store_parent_NS->name) == 0)
2850			iter_merge_retry_counts(iq->dp, iq->store_parent_NS);
2851		delegpt_log(VERB_ALGO, iq->dp);
2852		/* Count this as a referral. */
2853		iq->referral_count++;
2854		iq->sent_count = 0;
2855		/* see if the next dp is a trust anchor, or a DS was sent
2856		 * along, indicating dnssec is expected for next zone */
2857		iq->dnssec_expected = iter_indicates_dnssec(qstate->env,
2858			iq->dp, iq->response, iq->qchase.qclass);
2859		/* if dnssec, validating then also fetch the key for the DS */
2860		if(iq->dnssec_expected && qstate->env->cfg->prefetch_key &&
2861			!(qstate->query_flags&BIT_CD))
2862			generate_dnskey_prefetch(qstate, iq, id);
2863
2864		/* spawn off NS and addr to auth servers for the NS we just
2865		 * got in the referral. This gets authoritative answer
2866		 * (answer section trust level) rrset.
2867		 * right after, we detach the subs, answer goes to cache. */
2868		if(qstate->env->cfg->harden_referral_path)
2869			generate_ns_check(qstate, iq, id);
2870
2871		/* stop current outstanding queries.
2872		 * FIXME: should the outstanding queries be waited for and
2873		 * handled? Say by a subquery that inherits the outbound_entry.
2874		 */
2875		outbound_list_clear(&iq->outlist);
2876		iq->num_current_queries = 0;
2877		fptr_ok(fptr_whitelist_modenv_detach_subs(
2878			qstate->env->detach_subs));
2879		(*qstate->env->detach_subs)(qstate);
2880		iq->num_target_queries = 0;
2881		verbose(VERB_ALGO, "cleared outbound list for next round");
2882		return next_state(iq, QUERYTARGETS_STATE);
2883	} else if(type == RESPONSE_TYPE_CNAME) {
2884		uint8_t* sname = NULL;
2885		size_t snamelen = 0;
2886		/* CNAME type responses get a query restart (i.e., get a
2887		 * reset of the query state and go back to INIT_REQUEST_STATE).
2888		 */
2889		verbose(VERB_DETAIL, "query response was CNAME");
2890		if(verbosity >= VERB_ALGO)
2891			log_dns_msg("cname msg", &iq->response->qinfo,
2892				iq->response->rep);
2893		/* if qtype is DS, check we have the right level of answer,
2894		 * like grandchild answer but we need the middle, reject it */
2895		if(iq->qchase.qtype == LDNS_RR_TYPE_DS && !iq->dsns_point
2896			&& !(iq->chase_flags&BIT_RD)
2897			&& iter_ds_toolow(iq->response, iq->dp)
2898			&& iter_dp_cangodown(&iq->qchase, iq->dp)) {
2899			outbound_list_clear(&iq->outlist);
2900			iq->num_current_queries = 0;
2901			fptr_ok(fptr_whitelist_modenv_detach_subs(
2902				qstate->env->detach_subs));
2903			(*qstate->env->detach_subs)(qstate);
2904			iq->num_target_queries = 0;
2905			return processDSNSFind(qstate, iq, id);
2906		}
2907		/* Process the CNAME response. */
2908		if(!handle_cname_response(qstate, iq, iq->response,
2909			&sname, &snamelen)) {
2910			errinf(qstate, "malloc failure, CNAME info");
2911			return error_response(qstate, id, LDNS_RCODE_SERVFAIL);
2912		}
2913		/* cache the CNAME response under the current query */
2914		/* NOTE : set referral=1, so that rrsets get stored but not
2915		 * the partial query answer (CNAME only). */
2916		/* prefetchleeway applied because this updates answer parts */
2917		if(!qstate->no_cache_store)
2918			iter_dns_store(qstate->env, &iq->response->qinfo,
2919				iq->response->rep, 1, qstate->prefetch_leeway,
2920				iq->dp&&iq->dp->has_parent_side_NS, NULL,
2921				qstate->query_flags);
2922		/* set the current request's qname to the new value. */
2923		iq->qchase.qname = sname;
2924		iq->qchase.qname_len = snamelen;
2925		/* Clear the query state, since this is a query restart. */
2926		iq->deleg_msg = NULL;
2927		iq->dp = NULL;
2928		iq->dsns_point = NULL;
2929		iq->auth_zone_response = 0;
2930		iq->sent_count = 0;
2931		if(iq->minimisation_state != MINIMISE_STATE)
2932			/* Only count as query restart when it is not an extra
2933			 * query as result of qname minimisation. */
2934			iq->query_restart_count++;
2935		if(qstate->env->cfg->qname_minimisation)
2936			iq->minimisation_state = INIT_MINIMISE_STATE;
2937
2938		/* stop current outstanding queries.
2939		 * FIXME: should the outstanding queries be waited for and
2940		 * handled? Say by a subquery that inherits the outbound_entry.
2941		 */
2942		outbound_list_clear(&iq->outlist);
2943		iq->num_current_queries = 0;
2944		fptr_ok(fptr_whitelist_modenv_detach_subs(
2945			qstate->env->detach_subs));
2946		(*qstate->env->detach_subs)(qstate);
2947		iq->num_target_queries = 0;
2948		if(qstate->reply)
2949			sock_list_insert(&qstate->reply_origin,
2950				&qstate->reply->addr, qstate->reply->addrlen,
2951				qstate->region);
2952		verbose(VERB_ALGO, "cleared outbound list for query restart");
2953		/* go to INIT_REQUEST_STATE for new qname. */
2954		return next_state(iq, INIT_REQUEST_STATE);
2955	} else if(type == RESPONSE_TYPE_LAME) {
2956		/* Cache the LAMEness. */
2957		verbose(VERB_DETAIL, "query response was %sLAME",
2958			dnsseclame?"DNSSEC ":"");
2959		if(!dname_subdomain_c(iq->qchase.qname, iq->dp->name)) {
2960			log_err("mark lame: mismatch in qname and dpname");
2961			/* throwaway this reply below */
2962		} else if(qstate->reply) {
2963			/* need addr for lameness cache, but we may have
2964			 * gotten this from cache, so test to be sure */
2965			if(!infra_set_lame(qstate->env->infra_cache,
2966				&qstate->reply->addr, qstate->reply->addrlen,
2967				iq->dp->name, iq->dp->namelen,
2968				*qstate->env->now, dnsseclame, 0,
2969				iq->qchase.qtype))
2970				log_err("mark host lame: out of memory");
2971		}
2972	} else if(type == RESPONSE_TYPE_REC_LAME) {
2973		/* Cache the LAMEness. */
2974		verbose(VERB_DETAIL, "query response REC_LAME: "
2975			"recursive but not authoritative server");
2976		if(!dname_subdomain_c(iq->qchase.qname, iq->dp->name)) {
2977			log_err("mark rec_lame: mismatch in qname and dpname");
2978			/* throwaway this reply below */
2979		} else if(qstate->reply) {
2980			/* need addr for lameness cache, but we may have
2981			 * gotten this from cache, so test to be sure */
2982			verbose(VERB_DETAIL, "mark as REC_LAME");
2983			if(!infra_set_lame(qstate->env->infra_cache,
2984				&qstate->reply->addr, qstate->reply->addrlen,
2985				iq->dp->name, iq->dp->namelen,
2986				*qstate->env->now, 0, 1, iq->qchase.qtype))
2987				log_err("mark host lame: out of memory");
2988		}
2989	} else if(type == RESPONSE_TYPE_THROWAWAY) {
2990		/* LAME and THROWAWAY responses are handled the same way.
2991		 * In this case, the event is just sent directly back to
2992		 * the QUERYTARGETS_STATE without resetting anything,
2993		 * because, clearly, the next target must be tried. */
2994		verbose(VERB_DETAIL, "query response was THROWAWAY");
2995	} else {
2996		log_warn("A query response came back with an unknown type: %d",
2997			(int)type);
2998	}
2999
3000	/* LAME, THROWAWAY and "unknown" all end up here.
3001	 * Recycle to the QUERYTARGETS state to hopefully try a
3002	 * different target. */
3003	if (qstate->env->cfg->qname_minimisation &&
3004		!qstate->env->cfg->qname_minimisation_strict)
3005		iq->minimisation_state = DONOT_MINIMISE_STATE;
3006	if(iq->auth_zone_response) {
3007		/* can we fallback? */
3008		iq->auth_zone_response = 0;
3009		if(!auth_zones_can_fallback(qstate->env->auth_zones,
3010			iq->dp->name, iq->dp->namelen, qstate->qinfo.qclass)) {
3011			verbose(VERB_ALGO, "auth zone response bad, and no"
3012				" fallback possible, servfail");
3013			errinf_dname(qstate, "response is bad, no fallback, "
3014				"for auth zone", iq->dp->name);
3015			return error_response(qstate, id, LDNS_RCODE_SERVFAIL);
3016		}
3017		verbose(VERB_ALGO, "auth zone response was bad, "
3018			"fallback enabled");
3019		iq->auth_zone_avoid = 1;
3020		if(iq->dp->auth_dp) {
3021			/* we are using a dp for the auth zone, with no
3022			 * nameservers, get one first */
3023			iq->dp = NULL;
3024			return next_state(iq, INIT_REQUEST_STATE);
3025		}
3026	}
3027	return next_state(iq, QUERYTARGETS_STATE);
3028}
3029
3030/**
3031 * Return priming query results to interested super querystates.
3032 *
3033 * Sets the delegation point and delegation message (not nonRD queries).
3034 * This is a callback from walk_supers.
3035 *
3036 * @param qstate: priming query state that finished.
3037 * @param id: module id.
3038 * @param forq: the qstate for which priming has been done.
3039 */
3040static void
3041prime_supers(struct module_qstate* qstate, int id, struct module_qstate* forq)
3042{
3043	struct iter_qstate* foriq = (struct iter_qstate*)forq->minfo[id];
3044	struct delegpt* dp = NULL;
3045
3046	log_assert(qstate->is_priming || foriq->wait_priming_stub);
3047	log_assert(qstate->return_rcode == LDNS_RCODE_NOERROR);
3048	/* Convert our response to a delegation point */
3049	dp = delegpt_from_message(qstate->return_msg, forq->region);
3050	if(!dp) {
3051		/* if there is no convertable delegation point, then
3052		 * the ANSWER type was (presumably) a negative answer. */
3053		verbose(VERB_ALGO, "prime response was not a positive "
3054			"ANSWER; failing");
3055		foriq->dp = NULL;
3056		foriq->state = QUERYTARGETS_STATE;
3057		return;
3058	}
3059
3060	log_query_info(VERB_DETAIL, "priming successful for", &qstate->qinfo);
3061	delegpt_log(VERB_ALGO, dp);
3062	foriq->dp = dp;
3063	foriq->deleg_msg = dns_copy_msg(qstate->return_msg, forq->region);
3064	if(!foriq->deleg_msg) {
3065		log_err("copy prime response: out of memory");
3066		foriq->dp = NULL;
3067		foriq->state = QUERYTARGETS_STATE;
3068		return;
3069	}
3070
3071	/* root priming responses go to init stage 2, priming stub
3072	 * responses to to stage 3. */
3073	if(foriq->wait_priming_stub) {
3074		foriq->state = INIT_REQUEST_3_STATE;
3075		foriq->wait_priming_stub = 0;
3076	} else	foriq->state = INIT_REQUEST_2_STATE;
3077	/* because we are finished, the parent will be reactivated */
3078}
3079
3080/**
3081 * This handles the response to a priming query. This is used to handle both
3082 * root and stub priming responses. This is basically the equivalent of the
3083 * QUERY_RESP_STATE, but will not handle CNAME responses and will treat
3084 * REFERRALs as ANSWERS. It will also update and reactivate the originating
3085 * event.
3086 *
3087 * @param qstate: query state.
3088 * @param id: module id.
3089 * @return true if the event needs more immediate processing, false if not.
3090 *         This state always returns false.
3091 */
3092static int
3093processPrimeResponse(struct module_qstate* qstate, int id)
3094{
3095	struct iter_qstate* iq = (struct iter_qstate*)qstate->minfo[id];
3096	enum response_type type;
3097	iq->response->rep->flags &= ~(BIT_RD|BIT_RA); /* ignore rec-lame */
3098	type = response_type_from_server(
3099		(int)((iq->chase_flags&BIT_RD) || iq->chase_to_rd),
3100		iq->response, &iq->qchase, iq->dp);
3101	if(type == RESPONSE_TYPE_ANSWER) {
3102		qstate->return_rcode = LDNS_RCODE_NOERROR;
3103		qstate->return_msg = iq->response;
3104	} else {
3105		errinf(qstate, "prime response did not get an answer");
3106		errinf_dname(qstate, "for", qstate->qinfo.qname);
3107		qstate->return_rcode = LDNS_RCODE_SERVFAIL;
3108		qstate->return_msg = NULL;
3109	}
3110
3111	/* validate the root or stub after priming (if enabled).
3112	 * This is the same query as the prime query, but with validation.
3113	 * Now that we are primed, the additional queries that validation
3114	 * may need can be resolved, such as DLV. */
3115	if(qstate->env->cfg->harden_referral_path) {
3116		struct module_qstate* subq = NULL;
3117		log_nametypeclass(VERB_ALGO, "schedule prime validation",
3118			qstate->qinfo.qname, qstate->qinfo.qtype,
3119			qstate->qinfo.qclass);
3120		if(!generate_sub_request(qstate->qinfo.qname,
3121			qstate->qinfo.qname_len, qstate->qinfo.qtype,
3122			qstate->qinfo.qclass, qstate, id, iq,
3123			INIT_REQUEST_STATE, FINISHED_STATE, &subq, 1)) {
3124			verbose(VERB_ALGO, "could not generate prime check");
3125		}
3126		generate_a_aaaa_check(qstate, iq, id);
3127	}
3128
3129	/* This event is finished. */
3130	qstate->ext_state[id] = module_finished;
3131	return 0;
3132}
3133
3134/**
3135 * Do final processing on responses to target queries. Events reach this
3136 * state after the iterative resolution algorithm terminates. This state is
3137 * responsible for reactivating the original event, and housekeeping related
3138 * to received target responses (caching, updating the current delegation
3139 * point, etc).
3140 * Callback from walk_supers for every super state that is interested in
3141 * the results from this query.
3142 *
3143 * @param qstate: query state.
3144 * @param id: module id.
3145 * @param forq: super query state.
3146 */
3147static void
3148processTargetResponse(struct module_qstate* qstate, int id,
3149	struct module_qstate* forq)
3150{
3151	struct iter_qstate* iq = (struct iter_qstate*)qstate->minfo[id];
3152	struct iter_qstate* foriq = (struct iter_qstate*)forq->minfo[id];
3153	struct ub_packed_rrset_key* rrset;
3154	struct delegpt_ns* dpns;
3155	log_assert(qstate->return_rcode == LDNS_RCODE_NOERROR);
3156
3157	foriq->state = QUERYTARGETS_STATE;
3158	log_query_info(VERB_ALGO, "processTargetResponse", &qstate->qinfo);
3159	log_query_info(VERB_ALGO, "processTargetResponse super", &forq->qinfo);
3160
3161	/* Tell the originating event that this target query has finished
3162	 * (regardless if it succeeded or not). */
3163	foriq->num_target_queries--;
3164
3165	/* check to see if parent event is still interested (in orig name).  */
3166	if(!foriq->dp) {
3167		verbose(VERB_ALGO, "subq: parent not interested, was reset");
3168		return; /* not interested anymore */
3169	}
3170	dpns = delegpt_find_ns(foriq->dp, qstate->qinfo.qname,
3171			qstate->qinfo.qname_len);
3172	if(!dpns) {
3173		/* If not interested, just stop processing this event */
3174		verbose(VERB_ALGO, "subq: parent not interested anymore");
3175		/* could be because parent was jostled out of the cache,
3176		   and a new identical query arrived, that does not want it*/
3177		return;
3178	}
3179
3180	/* if iq->query_for_pside_glue then add the pside_glue (marked lame) */
3181	if(iq->pside_glue) {
3182		/* if the pside_glue is NULL, then it could not be found,
3183		 * the done_pside is already set when created and a cache
3184		 * entry created in processFinished so nothing to do here */
3185		log_rrset_key(VERB_ALGO, "add parentside glue to dp",
3186			iq->pside_glue);
3187		if(!delegpt_add_rrset(foriq->dp, forq->region,
3188			iq->pside_glue, 1))
3189			log_err("out of memory adding pside glue");
3190	}
3191
3192	/* This response is relevant to the current query, so we
3193	 * add (attempt to add, anyway) this target(s) and reactivate
3194	 * the original event.
3195	 * NOTE: we could only look for the AnswerRRset if the
3196	 * response type was ANSWER. */
3197	rrset = reply_find_answer_rrset(&iq->qchase, qstate->return_msg->rep);
3198	if(rrset) {
3199		/* if CNAMEs have been followed - add new NS to delegpt. */
3200		/* BTW. RFC 1918 says NS should not have got CNAMEs. Robust. */
3201		if(!delegpt_find_ns(foriq->dp, rrset->rk.dname,
3202			rrset->rk.dname_len)) {
3203			/* if dpns->lame then set newcname ns lame too */
3204			if(!delegpt_add_ns(foriq->dp, forq->region,
3205				rrset->rk.dname, dpns->lame))
3206				log_err("out of memory adding cnamed-ns");
3207		}
3208		/* if dpns->lame then set the address(es) lame too */
3209		if(!delegpt_add_rrset(foriq->dp, forq->region, rrset,
3210			dpns->lame))
3211			log_err("out of memory adding targets");
3212		verbose(VERB_ALGO, "added target response");
3213		delegpt_log(VERB_ALGO, foriq->dp);
3214	} else {
3215		verbose(VERB_ALGO, "iterator TargetResponse failed");
3216		dpns->resolved = 1; /* fail the target */
3217	}
3218}
3219
3220/**
3221 * Process response for DS NS Find queries, that attempt to find the delegation
3222 * point where we ask the DS query from.
3223 *
3224 * @param qstate: query state.
3225 * @param id: module id.
3226 * @param forq: super query state.
3227 */
3228static void
3229processDSNSResponse(struct module_qstate* qstate, int id,
3230	struct module_qstate* forq)
3231{
3232	struct iter_qstate* foriq = (struct iter_qstate*)forq->minfo[id];
3233
3234	/* if the finished (iq->response) query has no NS set: continue
3235	 * up to look for the right dp; nothing to change, do DPNSstate */
3236	if(qstate->return_rcode != LDNS_RCODE_NOERROR)
3237		return; /* seek further */
3238	/* find the NS RRset (without allowing CNAMEs) */
3239	if(!reply_find_rrset(qstate->return_msg->rep, qstate->qinfo.qname,
3240		qstate->qinfo.qname_len, LDNS_RR_TYPE_NS,
3241		qstate->qinfo.qclass)){
3242		return; /* seek further */
3243	}
3244
3245	/* else, store as DP and continue at querytargets */
3246	foriq->state = QUERYTARGETS_STATE;
3247	foriq->dp = delegpt_from_message(qstate->return_msg, forq->region);
3248	if(!foriq->dp) {
3249		log_err("out of memory in dsns dp alloc");
3250		errinf(qstate, "malloc failure, in DS search");
3251		return; /* dp==NULL in QUERYTARGETS makes SERVFAIL */
3252	}
3253	/* success, go query the querytargets in the new dp (and go down) */
3254}
3255
3256/**
3257 * Process response for qclass=ANY queries for a particular class.
3258 * Append to result or error-exit.
3259 *
3260 * @param qstate: query state.
3261 * @param id: module id.
3262 * @param forq: super query state.
3263 */
3264static void
3265processClassResponse(struct module_qstate* qstate, int id,
3266	struct module_qstate* forq)
3267{
3268	struct iter_qstate* foriq = (struct iter_qstate*)forq->minfo[id];
3269	struct dns_msg* from = qstate->return_msg;
3270	log_query_info(VERB_ALGO, "processClassResponse", &qstate->qinfo);
3271	log_query_info(VERB_ALGO, "processClassResponse super", &forq->qinfo);
3272	if(qstate->return_rcode != LDNS_RCODE_NOERROR) {
3273		/* cause servfail for qclass ANY query */
3274		foriq->response = NULL;
3275		foriq->state = FINISHED_STATE;
3276		return;
3277	}
3278	/* append result */
3279	if(!foriq->response) {
3280		/* allocate the response: copy RCODE, sec_state */
3281		foriq->response = dns_copy_msg(from, forq->region);
3282		if(!foriq->response) {
3283			log_err("malloc failed for qclass ANY response");
3284			foriq->state = FINISHED_STATE;
3285			return;
3286		}
3287		foriq->response->qinfo.qclass = forq->qinfo.qclass;
3288		/* qclass ANY does not receive the AA flag on replies */
3289		foriq->response->rep->authoritative = 0;
3290	} else {
3291		struct dns_msg* to = foriq->response;
3292		/* add _from_ this response _to_ existing collection */
3293		/* if there are records, copy RCODE */
3294		/* lower sec_state if this message is lower */
3295		if(from->rep->rrset_count != 0) {
3296			size_t n = from->rep->rrset_count+to->rep->rrset_count;
3297			struct ub_packed_rrset_key** dest, **d;
3298			/* copy appropriate rcode */
3299			to->rep->flags = from->rep->flags;
3300			/* copy rrsets */
3301			if(from->rep->rrset_count > RR_COUNT_MAX ||
3302				to->rep->rrset_count > RR_COUNT_MAX) {
3303				log_err("malloc failed (too many rrsets) in collect ANY");
3304				foriq->state = FINISHED_STATE;
3305				return; /* integer overflow protection */
3306			}
3307			dest = regional_alloc(forq->region, sizeof(dest[0])*n);
3308			if(!dest) {
3309				log_err("malloc failed in collect ANY");
3310				foriq->state = FINISHED_STATE;
3311				return;
3312			}
3313			d = dest;
3314			/* copy AN */
3315			memcpy(dest, to->rep->rrsets, to->rep->an_numrrsets
3316				* sizeof(dest[0]));
3317			dest += to->rep->an_numrrsets;
3318			memcpy(dest, from->rep->rrsets, from->rep->an_numrrsets
3319				* sizeof(dest[0]));
3320			dest += from->rep->an_numrrsets;
3321			/* copy NS */
3322			memcpy(dest, to->rep->rrsets+to->rep->an_numrrsets,
3323				to->rep->ns_numrrsets * sizeof(dest[0]));
3324			dest += to->rep->ns_numrrsets;
3325			memcpy(dest, from->rep->rrsets+from->rep->an_numrrsets,
3326				from->rep->ns_numrrsets * sizeof(dest[0]));
3327			dest += from->rep->ns_numrrsets;
3328			/* copy AR */
3329			memcpy(dest, to->rep->rrsets+to->rep->an_numrrsets+
3330				to->rep->ns_numrrsets,
3331				to->rep->ar_numrrsets * sizeof(dest[0]));
3332			dest += to->rep->ar_numrrsets;
3333			memcpy(dest, from->rep->rrsets+from->rep->an_numrrsets+
3334				from->rep->ns_numrrsets,
3335				from->rep->ar_numrrsets * sizeof(dest[0]));
3336			/* update counts */
3337			to->rep->rrsets = d;
3338			to->rep->an_numrrsets += from->rep->an_numrrsets;
3339			to->rep->ns_numrrsets += from->rep->ns_numrrsets;
3340			to->rep->ar_numrrsets += from->rep->ar_numrrsets;
3341			to->rep->rrset_count = n;
3342		}
3343		if(from->rep->security < to->rep->security) /* lowest sec */
3344			to->rep->security = from->rep->security;
3345		if(from->rep->qdcount != 0) /* insert qd if appropriate */
3346			to->rep->qdcount = from->rep->qdcount;
3347		if(from->rep->ttl < to->rep->ttl) /* use smallest TTL */
3348			to->rep->ttl = from->rep->ttl;
3349		if(from->rep->prefetch_ttl < to->rep->prefetch_ttl)
3350			to->rep->prefetch_ttl = from->rep->prefetch_ttl;
3351		if(from->rep->serve_expired_ttl < to->rep->serve_expired_ttl)
3352			to->rep->serve_expired_ttl = from->rep->serve_expired_ttl;
3353	}
3354	/* are we done? */
3355	foriq->num_current_queries --;
3356	if(foriq->num_current_queries == 0)
3357		foriq->state = FINISHED_STATE;
3358}
3359
3360/**
3361 * Collect class ANY responses and make them into one response.  This
3362 * state is started and it creates queries for all classes (that have
3363 * root hints).  The answers are then collected.
3364 *
3365 * @param qstate: query state.
3366 * @param id: module id.
3367 * @return true if the event needs more immediate processing, false if not.
3368 */
3369static int
3370processCollectClass(struct module_qstate* qstate, int id)
3371{
3372	struct iter_qstate* iq = (struct iter_qstate*)qstate->minfo[id];
3373	struct module_qstate* subq;
3374	/* If qchase.qclass == 0 then send out queries for all classes.
3375	 * Otherwise, do nothing (wait for all answers to arrive and the
3376	 * processClassResponse to put them together, and that moves us
3377	 * towards the Finished state when done. */
3378	if(iq->qchase.qclass == 0) {
3379		uint16_t c = 0;
3380		iq->qchase.qclass = LDNS_RR_CLASS_ANY;
3381		while(iter_get_next_root(qstate->env->hints,
3382			qstate->env->fwds, &c)) {
3383			/* generate query for this class */
3384			log_nametypeclass(VERB_ALGO, "spawn collect query",
3385				qstate->qinfo.qname, qstate->qinfo.qtype, c);
3386			if(!generate_sub_request(qstate->qinfo.qname,
3387				qstate->qinfo.qname_len, qstate->qinfo.qtype,
3388				c, qstate, id, iq, INIT_REQUEST_STATE,
3389				FINISHED_STATE, &subq,
3390				(int)!(qstate->query_flags&BIT_CD))) {
3391				errinf(qstate, "could not generate class ANY"
3392					" lookup query");
3393				return error_response(qstate, id,
3394					LDNS_RCODE_SERVFAIL);
3395			}
3396			/* ignore subq, no special init required */
3397			iq->num_current_queries ++;
3398			if(c == 0xffff)
3399				break;
3400			else c++;
3401		}
3402		/* if no roots are configured at all, return */
3403		if(iq->num_current_queries == 0) {
3404			verbose(VERB_ALGO, "No root hints or fwds, giving up "
3405				"on qclass ANY");
3406			return error_response(qstate, id, LDNS_RCODE_REFUSED);
3407		}
3408		/* return false, wait for queries to return */
3409	}
3410	/* if woke up here because of an answer, wait for more answers */
3411	return 0;
3412}
3413
3414/**
3415 * This handles the final state for first-tier responses (i.e., responses to
3416 * externally generated queries).
3417 *
3418 * @param qstate: query state.
3419 * @param iq: iterator query state.
3420 * @param id: module id.
3421 * @return true if the event needs more processing, false if not. Since this
3422 *         is the final state for an event, it always returns false.
3423 */
3424static int
3425processFinished(struct module_qstate* qstate, struct iter_qstate* iq,
3426	int id)
3427{
3428	log_query_info(VERB_QUERY, "finishing processing for",
3429		&qstate->qinfo);
3430
3431	/* store negative cache element for parent side glue. */
3432	if(!qstate->no_cache_store && iq->query_for_pside_glue
3433		&& !iq->pside_glue)
3434			iter_store_parentside_neg(qstate->env, &qstate->qinfo,
3435				iq->deleg_msg?iq->deleg_msg->rep:
3436				(iq->response?iq->response->rep:NULL));
3437	if(!iq->response) {
3438		verbose(VERB_ALGO, "No response is set, servfail");
3439		errinf(qstate, "(no response found at query finish)");
3440		return error_response(qstate, id, LDNS_RCODE_SERVFAIL);
3441	}
3442
3443	/* Make sure that the RA flag is set (since the presence of
3444	 * this module means that recursion is available) */
3445	iq->response->rep->flags |= BIT_RA;
3446
3447	/* Clear the AA flag */
3448	/* FIXME: does this action go here or in some other module? */
3449	iq->response->rep->flags &= ~BIT_AA;
3450
3451	/* make sure QR flag is on */
3452	iq->response->rep->flags |= BIT_QR;
3453
3454	/* we have finished processing this query */
3455	qstate->ext_state[id] = module_finished;
3456
3457	/* TODO:  we are using a private TTL, trim the response. */
3458	/* if (mPrivateTTL > 0){IterUtils.setPrivateTTL(resp, mPrivateTTL); } */
3459
3460	/* prepend any items we have accumulated */
3461	if(iq->an_prepend_list || iq->ns_prepend_list) {
3462		if(!iter_prepend(iq, iq->response, qstate->region)) {
3463			log_err("prepend rrsets: out of memory");
3464			return error_response(qstate, id, LDNS_RCODE_SERVFAIL);
3465		}
3466		/* reset the query name back */
3467		iq->response->qinfo = qstate->qinfo;
3468		/* the security state depends on the combination */
3469		iq->response->rep->security = sec_status_unchecked;
3470		/* store message with the finished prepended items,
3471		 * but only if we did recursion. The nonrecursion referral
3472		 * from cache does not need to be stored in the msg cache. */
3473		if(!qstate->no_cache_store && qstate->query_flags&BIT_RD) {
3474			iter_dns_store(qstate->env, &qstate->qinfo,
3475				iq->response->rep, 0, qstate->prefetch_leeway,
3476				iq->dp&&iq->dp->has_parent_side_NS,
3477				qstate->region, qstate->query_flags);
3478		}
3479	}
3480	qstate->return_rcode = LDNS_RCODE_NOERROR;
3481	qstate->return_msg = iq->response;
3482	return 0;
3483}
3484
3485/*
3486 * Return priming query results to interested super querystates.
3487 *
3488 * Sets the delegation point and delegation message (not nonRD queries).
3489 * This is a callback from walk_supers.
3490 *
3491 * @param qstate: query state that finished.
3492 * @param id: module id.
3493 * @param super: the qstate to inform.
3494 */
3495void
3496iter_inform_super(struct module_qstate* qstate, int id,
3497	struct module_qstate* super)
3498{
3499	if(!qstate->is_priming && super->qinfo.qclass == LDNS_RR_CLASS_ANY)
3500		processClassResponse(qstate, id, super);
3501	else if(super->qinfo.qtype == LDNS_RR_TYPE_DS && ((struct iter_qstate*)
3502		super->minfo[id])->state == DSNS_FIND_STATE)
3503		processDSNSResponse(qstate, id, super);
3504	else if(qstate->return_rcode != LDNS_RCODE_NOERROR)
3505		error_supers(qstate, id, super);
3506	else if(qstate->is_priming)
3507		prime_supers(qstate, id, super);
3508	else	processTargetResponse(qstate, id, super);
3509}
3510
3511/**
3512 * Handle iterator state.
3513 * Handle events. This is the real processing loop for events, responsible
3514 * for moving events through the various states. If a processing method
3515 * returns true, then it will be advanced to the next state. If false, then
3516 * processing will stop.
3517 *
3518 * @param qstate: query state.
3519 * @param ie: iterator shared global environment.
3520 * @param iq: iterator query state.
3521 * @param id: module id.
3522 */
3523static void
3524iter_handle(struct module_qstate* qstate, struct iter_qstate* iq,
3525	struct iter_env* ie, int id)
3526{
3527	int cont = 1;
3528	while(cont) {
3529		verbose(VERB_ALGO, "iter_handle processing q with state %s",
3530			iter_state_to_string(iq->state));
3531		switch(iq->state) {
3532			case INIT_REQUEST_STATE:
3533				cont = processInitRequest(qstate, iq, ie, id);
3534				break;
3535			case INIT_REQUEST_2_STATE:
3536				cont = processInitRequest2(qstate, iq, id);
3537				break;
3538			case INIT_REQUEST_3_STATE:
3539				cont = processInitRequest3(qstate, iq, id);
3540				break;
3541			case QUERYTARGETS_STATE:
3542				cont = processQueryTargets(qstate, iq, ie, id);
3543				break;
3544			case QUERY_RESP_STATE:
3545				cont = processQueryResponse(qstate, iq, id);
3546				break;
3547			case PRIME_RESP_STATE:
3548				cont = processPrimeResponse(qstate, id);
3549				break;
3550			case COLLECT_CLASS_STATE:
3551				cont = processCollectClass(qstate, id);
3552				break;
3553			case DSNS_FIND_STATE:
3554				cont = processDSNSFind(qstate, iq, id);
3555				break;
3556			case FINISHED_STATE:
3557				cont = processFinished(qstate, iq, id);
3558				break;
3559			default:
3560				log_warn("iterator: invalid state: %d",
3561					iq->state);
3562				cont = 0;
3563				break;
3564		}
3565	}
3566}
3567
3568/**
3569 * This is the primary entry point for processing request events. Note that
3570 * this method should only be used by external modules.
3571 * @param qstate: query state.
3572 * @param ie: iterator shared global environment.
3573 * @param iq: iterator query state.
3574 * @param id: module id.
3575 */
3576static void
3577process_request(struct module_qstate* qstate, struct iter_qstate* iq,
3578	struct iter_env* ie, int id)
3579{
3580	/* external requests start in the INIT state, and finish using the
3581	 * FINISHED state. */
3582	iq->state = INIT_REQUEST_STATE;
3583	iq->final_state = FINISHED_STATE;
3584	verbose(VERB_ALGO, "process_request: new external request event");
3585	iter_handle(qstate, iq, ie, id);
3586}
3587
3588/** process authoritative server reply */
3589static void
3590process_response(struct module_qstate* qstate, struct iter_qstate* iq,
3591	struct iter_env* ie, int id, struct outbound_entry* outbound,
3592	enum module_ev event)
3593{
3594	struct msg_parse* prs;
3595	struct edns_data edns;
3596	sldns_buffer* pkt;
3597
3598	verbose(VERB_ALGO, "process_response: new external response event");
3599	iq->response = NULL;
3600	iq->state = QUERY_RESP_STATE;
3601	if(event == module_event_noreply || event == module_event_error) {
3602		if(event == module_event_noreply && iq->sent_count >= 3 &&
3603			qstate->env->cfg->use_caps_bits_for_id &&
3604			!iq->caps_fallback && !is_caps_whitelisted(ie, iq)) {
3605			/* start fallback */
3606			iq->caps_fallback = 1;
3607			iq->caps_server = 0;
3608			iq->caps_reply = NULL;
3609			iq->caps_response = NULL;
3610			iq->caps_minimisation_state = DONOT_MINIMISE_STATE;
3611			iq->state = QUERYTARGETS_STATE;
3612			iq->num_current_queries--;
3613			/* need fresh attempts for the 0x20 fallback, if
3614			 * that was the cause for the failure */
3615			iter_dec_attempts(iq->dp, 3);
3616			verbose(VERB_DETAIL, "Capsforid: timeouts, starting fallback");
3617			goto handle_it;
3618		}
3619		goto handle_it;
3620	}
3621	if( (event != module_event_reply && event != module_event_capsfail)
3622		|| !qstate->reply) {
3623		log_err("Bad event combined with response");
3624		outbound_list_remove(&iq->outlist, outbound);
3625		errinf(qstate, "module iterator received wrong internal event with a response message");
3626		(void)error_response(qstate, id, LDNS_RCODE_SERVFAIL);
3627		return;
3628	}
3629
3630	/* parse message */
3631	prs = (struct msg_parse*)regional_alloc(qstate->env->scratch,
3632		sizeof(struct msg_parse));
3633	if(!prs) {
3634		log_err("out of memory on incoming message");
3635		/* like packet got dropped */
3636		goto handle_it;
3637	}
3638	memset(prs, 0, sizeof(*prs));
3639	memset(&edns, 0, sizeof(edns));
3640	pkt = qstate->reply->c->buffer;
3641	sldns_buffer_set_position(pkt, 0);
3642	if(parse_packet(pkt, prs, qstate->env->scratch) != LDNS_RCODE_NOERROR) {
3643		verbose(VERB_ALGO, "parse error on reply packet");
3644		goto handle_it;
3645	}
3646	/* edns is not examined, but removed from message to help cache */
3647	if(parse_extract_edns(prs, &edns, qstate->env->scratch) !=
3648		LDNS_RCODE_NOERROR)
3649		goto handle_it;
3650
3651	/* Copy the edns options we may got from the back end */
3652	if(edns.opt_list) {
3653		qstate->edns_opts_back_in = edns_opt_copy_region(edns.opt_list,
3654			qstate->region);
3655		if(!qstate->edns_opts_back_in) {
3656			log_err("out of memory on incoming message");
3657			/* like packet got dropped */
3658			goto handle_it;
3659		}
3660		if(!inplace_cb_edns_back_parsed_call(qstate->env, qstate)) {
3661			log_err("unable to call edns_back_parsed callback");
3662			goto handle_it;
3663		}
3664	}
3665
3666	/* remove CD-bit, we asked for in case we handle validation ourself */
3667	prs->flags &= ~BIT_CD;
3668
3669	/* normalize and sanitize: easy to delete items from linked lists */
3670	if(!scrub_message(pkt, prs, &iq->qinfo_out, iq->dp->name,
3671		qstate->env->scratch, qstate->env, ie)) {
3672		/* if 0x20 enabled, start fallback, but we have no message */
3673		if(event == module_event_capsfail && !iq->caps_fallback) {
3674			iq->caps_fallback = 1;
3675			iq->caps_server = 0;
3676			iq->caps_reply = NULL;
3677			iq->caps_response = NULL;
3678			iq->caps_minimisation_state = DONOT_MINIMISE_STATE;
3679			iq->state = QUERYTARGETS_STATE;
3680			iq->num_current_queries--;
3681			verbose(VERB_DETAIL, "Capsforid: scrub failed, starting fallback with no response");
3682		}
3683		goto handle_it;
3684	}
3685
3686	/* allocate response dns_msg in region */
3687	iq->response = dns_alloc_msg(pkt, prs, qstate->region);
3688	if(!iq->response)
3689		goto handle_it;
3690	log_query_info(VERB_DETAIL, "response for", &qstate->qinfo);
3691	log_name_addr(VERB_DETAIL, "reply from", iq->dp->name,
3692		&qstate->reply->addr, qstate->reply->addrlen);
3693	if(verbosity >= VERB_ALGO)
3694		log_dns_msg("incoming scrubbed packet:", &iq->response->qinfo,
3695			iq->response->rep);
3696
3697	if(event == module_event_capsfail || iq->caps_fallback) {
3698		if(qstate->env->cfg->qname_minimisation &&
3699			iq->minimisation_state != DONOT_MINIMISE_STATE) {
3700			/* Skip QNAME minimisation for next query, since that
3701			 * one has to match the current query. */
3702			iq->minimisation_state = SKIP_MINIMISE_STATE;
3703		}
3704		/* for fallback we care about main answer, not additionals */
3705		/* removing that makes comparison more likely to succeed */
3706		caps_strip_reply(iq->response->rep);
3707
3708		if(iq->caps_fallback &&
3709			iq->caps_minimisation_state != iq->minimisation_state) {
3710			/* QNAME minimisation state has changed, restart caps
3711			 * fallback. */
3712			iq->caps_fallback = 0;
3713		}
3714
3715		if(!iq->caps_fallback) {
3716			/* start fallback */
3717			iq->caps_fallback = 1;
3718			iq->caps_server = 0;
3719			iq->caps_reply = iq->response->rep;
3720			iq->caps_response = iq->response;
3721			iq->caps_minimisation_state = iq->minimisation_state;
3722			iq->state = QUERYTARGETS_STATE;
3723			iq->num_current_queries--;
3724			verbose(VERB_DETAIL, "Capsforid: starting fallback");
3725			goto handle_it;
3726		} else {
3727			/* check if reply is the same, otherwise, fail */
3728			if(!iq->caps_reply) {
3729				iq->caps_reply = iq->response->rep;
3730				iq->caps_response = iq->response;
3731				iq->caps_server = -1; /*become zero at ++,
3732				so that we start the full set of trials */
3733			} else if(caps_failed_rcode(iq->caps_reply) &&
3734				!caps_failed_rcode(iq->response->rep)) {
3735				/* prefer to upgrade to non-SERVFAIL */
3736				iq->caps_reply = iq->response->rep;
3737				iq->caps_response = iq->response;
3738			} else if(!caps_failed_rcode(iq->caps_reply) &&
3739				caps_failed_rcode(iq->response->rep)) {
3740				/* if we have non-SERVFAIL as answer then
3741				 * we can ignore SERVFAILs for the equality
3742				 * comparison */
3743				/* no instructions here, skip other else */
3744			} else if(caps_failed_rcode(iq->caps_reply) &&
3745				caps_failed_rcode(iq->response->rep)) {
3746				/* failure is same as other failure in fallbk*/
3747				/* no instructions here, skip other else */
3748			} else if(!reply_equal(iq->response->rep, iq->caps_reply,
3749				qstate->env->scratch)) {
3750				verbose(VERB_DETAIL, "Capsforid fallback: "
3751					"getting different replies, failed");
3752				outbound_list_remove(&iq->outlist, outbound);
3753				errinf(qstate, "0x20 failed, then got different replies in fallback");
3754				(void)error_response(qstate, id,
3755					LDNS_RCODE_SERVFAIL);
3756				return;
3757			}
3758			/* continue the fallback procedure at next server */
3759			iq->caps_server++;
3760			iq->state = QUERYTARGETS_STATE;
3761			iq->num_current_queries--;
3762			verbose(VERB_DETAIL, "Capsforid: reply is equal. "
3763				"go to next fallback");
3764			goto handle_it;
3765		}
3766	}
3767	iq->caps_fallback = 0; /* if we were in fallback, 0x20 is OK now */
3768
3769handle_it:
3770	outbound_list_remove(&iq->outlist, outbound);
3771	iter_handle(qstate, iq, ie, id);
3772}
3773
3774void
3775iter_operate(struct module_qstate* qstate, enum module_ev event, int id,
3776	struct outbound_entry* outbound)
3777{
3778	struct iter_env* ie = (struct iter_env*)qstate->env->modinfo[id];
3779	struct iter_qstate* iq = (struct iter_qstate*)qstate->minfo[id];
3780	verbose(VERB_QUERY, "iterator[module %d] operate: extstate:%s event:%s",
3781		id, strextstate(qstate->ext_state[id]), strmodulevent(event));
3782	if(iq) log_query_info(VERB_QUERY, "iterator operate: query",
3783		&qstate->qinfo);
3784	if(iq && qstate->qinfo.qname != iq->qchase.qname)
3785		log_query_info(VERB_QUERY, "iterator operate: chased to",
3786			&iq->qchase);
3787
3788	/* perform iterator state machine */
3789	if((event == module_event_new || event == module_event_pass) &&
3790		iq == NULL) {
3791		if(!iter_new(qstate, id)) {
3792			errinf(qstate, "malloc failure, new iterator module allocation");
3793			(void)error_response(qstate, id, LDNS_RCODE_SERVFAIL);
3794			return;
3795		}
3796		iq = (struct iter_qstate*)qstate->minfo[id];
3797		process_request(qstate, iq, ie, id);
3798		return;
3799	}
3800	if(iq && event == module_event_pass) {
3801		iter_handle(qstate, iq, ie, id);
3802		return;
3803	}
3804	if(iq && outbound) {
3805		process_response(qstate, iq, ie, id, outbound, event);
3806		return;
3807	}
3808	if(event == module_event_error) {
3809		verbose(VERB_ALGO, "got called with event error, giving up");
3810		errinf(qstate, "iterator module got the error event");
3811		(void)error_response(qstate, id, LDNS_RCODE_SERVFAIL);
3812		return;
3813	}
3814
3815	log_err("bad event for iterator");
3816	errinf(qstate, "iterator module received wrong event");
3817	(void)error_response(qstate, id, LDNS_RCODE_SERVFAIL);
3818}
3819
3820void
3821iter_clear(struct module_qstate* qstate, int id)
3822{
3823	struct iter_qstate* iq;
3824	if(!qstate)
3825		return;
3826	iq = (struct iter_qstate*)qstate->minfo[id];
3827	if(iq) {
3828		outbound_list_clear(&iq->outlist);
3829		if(iq->target_count && --iq->target_count[0] == 0)
3830			free(iq->target_count);
3831		iq->num_current_queries = 0;
3832	}
3833	qstate->minfo[id] = NULL;
3834}
3835
3836size_t
3837iter_get_mem(struct module_env* env, int id)
3838{
3839	struct iter_env* ie = (struct iter_env*)env->modinfo[id];
3840	if(!ie)
3841		return 0;
3842	return sizeof(*ie) + sizeof(int)*((size_t)ie->max_dependency_depth+1)
3843		+ donotq_get_mem(ie->donotq) + priv_get_mem(ie->priv);
3844}
3845
3846/**
3847 * The iterator function block
3848 */
3849static struct module_func_block iter_block = {
3850	"iterator",
3851	&iter_init, &iter_deinit, &iter_operate, &iter_inform_super,
3852	&iter_clear, &iter_get_mem
3853};
3854
3855struct module_func_block*
3856iter_get_funcblock(void)
3857{
3858	return &iter_block;
3859}
3860
3861const char*
3862iter_state_to_string(enum iter_state state)
3863{
3864	switch (state)
3865	{
3866	case INIT_REQUEST_STATE :
3867		return "INIT REQUEST STATE";
3868	case INIT_REQUEST_2_STATE :
3869		return "INIT REQUEST STATE (stage 2)";
3870	case INIT_REQUEST_3_STATE:
3871		return "INIT REQUEST STATE (stage 3)";
3872	case QUERYTARGETS_STATE :
3873		return "QUERY TARGETS STATE";
3874	case PRIME_RESP_STATE :
3875		return "PRIME RESPONSE STATE";
3876	case COLLECT_CLASS_STATE :
3877		return "COLLECT CLASS STATE";
3878	case DSNS_FIND_STATE :
3879		return "DSNS FIND STATE";
3880	case QUERY_RESP_STATE :
3881		return "QUERY RESPONSE STATE";
3882	case FINISHED_STATE :
3883		return "FINISHED RESPONSE STATE";
3884	default :
3885		return "UNKNOWN ITER STATE";
3886	}
3887}
3888
3889int
3890iter_state_is_responsestate(enum iter_state s)
3891{
3892	switch(s) {
3893		case INIT_REQUEST_STATE :
3894		case INIT_REQUEST_2_STATE :
3895		case INIT_REQUEST_3_STATE :
3896		case QUERYTARGETS_STATE :
3897		case COLLECT_CLASS_STATE :
3898			return 0;
3899		default:
3900			break;
3901	}
3902	return 1;
3903}
3904