dns.c revision 1.20
1/*
2 * services/cache/dns.c - Cache services for DNS using msg and rrset caches.
3 *
4 * Copyright (c) 2007, NLnet Labs. All rights reserved.
5 *
6 * This software is open source.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
14 *
15 * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 *
19 * Neither the name of the NLNET LABS nor the names of its contributors may
20 * be used to endorse or promote products derived from this software without
21 * specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35
36/**
37 * \file
38 *
39 * This file contains the DNS cache.
40 */
41#include "config.h"
42#include "iterator/iter_delegpt.h"
43#include "iterator/iter_utils.h"
44#include "validator/val_nsec.h"
45#include "validator/val_utils.h"
46#include "services/cache/dns.h"
47#include "services/cache/rrset.h"
48#include "util/data/msgparse.h"
49#include "util/data/msgreply.h"
50#include "util/data/packed_rrset.h"
51#include "util/data/dname.h"
52#include "util/module.h"
53#include "util/net_help.h"
54#include "util/regional.h"
55#include "util/config_file.h"
56#include "sldns/sbuffer.h"
57
58/** store rrsets in the rrset cache.
59 * @param env: module environment with caches.
60 * @param rep: contains list of rrsets to store.
61 * @param now: current time.
62 * @param leeway: during prefetch how much leeway to update TTLs.
63 * 	This makes rrsets (other than type NS) timeout sooner so they get
64 * 	updated with a new full TTL.
65 * 	Type NS does not get this, because it must not be refreshed from the
66 * 	child domain, but keep counting down properly.
67 * @param pside: if from parentside discovered NS, so that its NS is okay
68 * 	in a prefetch situation to be updated (without becoming sticky).
69 * @param qrep: update rrsets here if cache is better
70 * @param region: for qrep allocs.
71 * @param qstarttime: time when delegations were looked up, this is perhaps
72 *	earlier than the time in now. The time is used to determine if RRsets
73 *	of type NS have expired, so that they can only be updated using
74 *	lookups of delegation points that did not use them, since they had
75 *	expired then.
76 */
77static void
78store_rrsets(struct module_env* env, struct reply_info* rep, time_t now,
79	time_t leeway, int pside, struct reply_info* qrep,
80	struct regional* region, time_t qstarttime)
81{
82	size_t i;
83	/* see if rrset already exists in cache, if not insert it. */
84	for(i=0; i<rep->rrset_count; i++) {
85		rep->ref[i].key = rep->rrsets[i];
86		rep->ref[i].id = rep->rrsets[i]->id;
87		/* update ref if it was in the cache */
88		switch(rrset_cache_update(env->rrset_cache, &rep->ref[i],
89				env->alloc, ((ntohs(rep->ref[i].key->rk.type)==
90				LDNS_RR_TYPE_NS && !pside)?qstarttime:now + leeway))) {
91		case 0: /* ref unchanged, item inserted */
92			break;
93		case 2: /* ref updated, cache is superior */
94			if(region) {
95				struct ub_packed_rrset_key* ck;
96				lock_rw_rdlock(&rep->ref[i].key->entry.lock);
97				/* if deleted rrset, do not copy it */
98				if(rep->ref[i].key->id == 0)
99					ck = NULL;
100				else 	ck = packed_rrset_copy_region(
101					rep->ref[i].key, region, now);
102				lock_rw_unlock(&rep->ref[i].key->entry.lock);
103				if(ck) {
104					/* use cached copy if memory allows */
105					qrep->rrsets[i] = ck;
106				}
107			}
108			/* no break: also copy key item */
109			/* the line below is matched by gcc regex and silences
110			 * the fallthrough warning */
111			/* fallthrough */
112		case 1: /* ref updated, item inserted */
113			rep->rrsets[i] = rep->ref[i].key;
114		}
115	}
116}
117
118/** delete message from message cache */
119void
120msg_cache_remove(struct module_env* env, uint8_t* qname, size_t qnamelen,
121	uint16_t qtype, uint16_t qclass, uint16_t flags)
122{
123	struct query_info k;
124	hashvalue_type h;
125
126	k.qname = qname;
127	k.qname_len = qnamelen;
128	k.qtype = qtype;
129	k.qclass = qclass;
130	k.local_alias = NULL;
131	h = query_info_hash(&k, flags);
132	slabhash_remove(env->msg_cache, h, &k);
133}
134
135/** remove servfail msg cache entry */
136static void
137msg_del_servfail(struct module_env* env, struct query_info* qinfo,
138	uint32_t flags)
139{
140	struct msgreply_entry* e;
141	/* see if the entry is servfail, and then remove it, so that
142	 * lookups move from the cacheresponse stage to the recursionresponse
143	 * stage */
144	e = msg_cache_lookup(env, qinfo->qname, qinfo->qname_len,
145		qinfo->qtype, qinfo->qclass, flags, 0, 0);
146	if(!e) return;
147	/* we don't check for the ttl here, also expired servfail entries
148	 * are removed.  If the user uses serve-expired, they would still be
149	 * used to answer from cache */
150	if(FLAGS_GET_RCODE(((struct reply_info*)e->entry.data)->flags)
151		!= LDNS_RCODE_SERVFAIL) {
152		lock_rw_unlock(&e->entry.lock);
153		return;
154	}
155	lock_rw_unlock(&e->entry.lock);
156	msg_cache_remove(env, qinfo->qname, qinfo->qname_len, qinfo->qtype,
157		qinfo->qclass, flags);
158}
159
160void
161dns_cache_store_msg(struct module_env* env, struct query_info* qinfo,
162	hashvalue_type hash, struct reply_info* rep, time_t leeway, int pside,
163	struct reply_info* qrep, uint32_t flags, struct regional* region,
164	time_t qstarttime)
165{
166	struct msgreply_entry* e;
167	time_t ttl = rep->ttl;
168	size_t i;
169
170	/* store RRsets */
171        for(i=0; i<rep->rrset_count; i++) {
172		rep->ref[i].key = rep->rrsets[i];
173		rep->ref[i].id = rep->rrsets[i]->id;
174	}
175
176	/* there was a reply_info_sortref(rep) here but it seems to be
177	 * unnecessary, because the cache gets locked per rrset. */
178	reply_info_set_ttls(rep, *env->now);
179	store_rrsets(env, rep, *env->now, leeway, pside, qrep, region,
180		qstarttime);
181	if(ttl == 0 && !(flags & DNSCACHE_STORE_ZEROTTL)) {
182		/* we do not store the message, but we did store the RRs,
183		 * which could be useful for delegation information */
184		verbose(VERB_ALGO, "TTL 0: dropped msg from cache");
185		free(rep);
186		/* if the message is SERVFAIL in cache, remove that SERVFAIL,
187		 * so that the TTL 0 response can be returned for future
188		 * responses (i.e. don't get answered by the servfail from
189		 * cache, but instead go to recursion to get this TTL0
190		 * response). */
191		msg_del_servfail(env, qinfo, flags);
192		return;
193	}
194
195	/* store msg in the cache */
196	reply_info_sortref(rep);
197	if(!(e = query_info_entrysetup(qinfo, rep, hash))) {
198		log_err("store_msg: malloc failed");
199		return;
200	}
201	slabhash_insert(env->msg_cache, hash, &e->entry, rep, env->alloc);
202}
203
204/** see if an rrset is expired above the qname, return upper qname. */
205static int
206rrset_expired_above(struct module_env* env, uint8_t** qname, size_t* qnamelen,
207	uint16_t searchtype, uint16_t qclass, time_t now, uint8_t* expiretop,
208	size_t expiretoplen)
209{
210	struct ub_packed_rrset_key *rrset;
211	uint8_t lablen;
212
213	while(*qnamelen > 0) {
214		/* look one label higher */
215		lablen = **qname;
216		*qname += lablen + 1;
217		*qnamelen -= lablen + 1;
218		if(*qnamelen <= 0)
219			break;
220
221		/* looks up with a time of 0, to see expired entries */
222		if((rrset = rrset_cache_lookup(env->rrset_cache, *qname,
223			*qnamelen, searchtype, qclass, 0, 0, 0))) {
224			struct packed_rrset_data* data =
225				(struct packed_rrset_data*)rrset->entry.data;
226			if(now > data->ttl) {
227				/* it is expired, this is not wanted */
228				lock_rw_unlock(&rrset->entry.lock);
229				log_nametypeclass(VERB_ALGO, "this rrset is expired", *qname, searchtype, qclass);
230				return 1;
231			}
232			/* it is not expired, continue looking */
233			lock_rw_unlock(&rrset->entry.lock);
234		}
235
236		/* do not look above the expiretop. */
237		if(expiretop && *qnamelen == expiretoplen &&
238			query_dname_compare(*qname, expiretop)==0)
239			break;
240	}
241	return 0;
242}
243
244/** find closest NS or DNAME and returns the rrset (locked) */
245static struct ub_packed_rrset_key*
246find_closest_of_type(struct module_env* env, uint8_t* qname, size_t qnamelen,
247	uint16_t qclass, time_t now, uint16_t searchtype, int stripfront,
248	int noexpiredabove, uint8_t* expiretop, size_t expiretoplen)
249{
250	struct ub_packed_rrset_key *rrset;
251	uint8_t lablen;
252
253	if(stripfront) {
254		/* strip off so that DNAMEs have strict subdomain match */
255		lablen = *qname;
256		qname += lablen + 1;
257		qnamelen -= lablen + 1;
258	}
259
260	/* snip off front part of qname until the type is found */
261	while(qnamelen > 0) {
262		if((rrset = rrset_cache_lookup(env->rrset_cache, qname,
263			qnamelen, searchtype, qclass, 0, now, 0))) {
264			uint8_t* origqname = qname;
265			size_t origqnamelen = qnamelen;
266			if(!noexpiredabove)
267				return rrset;
268			/* if expiretop set, do not look above it, but
269			 * qname is equal, so the just found result is also
270			 * the nonexpired above part. */
271			if(expiretop && qnamelen == expiretoplen &&
272				query_dname_compare(qname, expiretop)==0)
273				return rrset;
274			/* check for expiry, but we have to let go of the rrset
275			 * for the lock ordering */
276			lock_rw_unlock(&rrset->entry.lock);
277			/* the expired_above function always takes off one
278			 * label (if qnamelen>0) and returns the final qname
279			 * where it searched, so we can continue from there
280			 * turning the O N*N search into O N. */
281			if(!rrset_expired_above(env, &qname, &qnamelen,
282				searchtype, qclass, now, expiretop,
283				expiretoplen)) {
284				/* we want to return rrset, but it may be
285				 * gone from cache, if so, just loop like
286				 * it was not in the cache in the first place.
287				 */
288				if((rrset = rrset_cache_lookup(env->
289					rrset_cache, origqname, origqnamelen,
290					searchtype, qclass, 0, now, 0))) {
291					return rrset;
292				}
293			}
294			log_nametypeclass(VERB_ALGO, "ignoring rrset because expired rrsets exist above it", origqname, searchtype, qclass);
295			continue;
296		}
297
298		/* snip off front label */
299		lablen = *qname;
300		qname += lablen + 1;
301		qnamelen -= lablen + 1;
302	}
303	return NULL;
304}
305
306/** add addr to additional section */
307static void
308addr_to_additional(struct ub_packed_rrset_key* rrset, struct regional* region,
309	struct dns_msg* msg, time_t now)
310{
311	if((msg->rep->rrsets[msg->rep->rrset_count] =
312		packed_rrset_copy_region(rrset, region, now))) {
313		msg->rep->ar_numrrsets++;
314		msg->rep->rrset_count++;
315	}
316}
317
318/** lookup message in message cache */
319struct msgreply_entry*
320msg_cache_lookup(struct module_env* env, uint8_t* qname, size_t qnamelen,
321	uint16_t qtype, uint16_t qclass, uint16_t flags, time_t now, int wr)
322{
323	struct lruhash_entry* e;
324	struct query_info k;
325	hashvalue_type h;
326
327	k.qname = qname;
328	k.qname_len = qnamelen;
329	k.qtype = qtype;
330	k.qclass = qclass;
331	k.local_alias = NULL;
332	h = query_info_hash(&k, flags);
333	e = slabhash_lookup(env->msg_cache, h, &k, wr);
334
335	if(!e) return NULL;
336	if( now > ((struct reply_info*)e->data)->ttl ) {
337		lock_rw_unlock(&e->lock);
338		return NULL;
339	}
340	return (struct msgreply_entry*)e->key;
341}
342
343/** find and add A and AAAA records for nameservers in delegpt */
344static int
345find_add_addrs(struct module_env* env, uint16_t qclass,
346	struct regional* region, struct delegpt* dp, time_t now,
347	struct dns_msg** msg)
348{
349	struct delegpt_ns* ns;
350	struct msgreply_entry* neg;
351	struct ub_packed_rrset_key* akey;
352	for(ns = dp->nslist; ns; ns = ns->next) {
353		akey = rrset_cache_lookup(env->rrset_cache, ns->name,
354			ns->namelen, LDNS_RR_TYPE_A, qclass, 0, now, 0);
355		if(akey) {
356			if(!delegpt_add_rrset_A(dp, region, akey, 0, NULL)) {
357				lock_rw_unlock(&akey->entry.lock);
358				return 0;
359			}
360			if(msg)
361				addr_to_additional(akey, region, *msg, now);
362			lock_rw_unlock(&akey->entry.lock);
363		} else {
364			/* BIT_CD on false because delegpt lookup does
365			 * not use dns64 translation */
366			neg = msg_cache_lookup(env, ns->name, ns->namelen,
367				LDNS_RR_TYPE_A, qclass, 0, now, 0);
368			if(neg) {
369				delegpt_add_neg_msg(dp, neg);
370				lock_rw_unlock(&neg->entry.lock);
371			}
372		}
373		akey = rrset_cache_lookup(env->rrset_cache, ns->name,
374			ns->namelen, LDNS_RR_TYPE_AAAA, qclass, 0, now, 0);
375		if(akey) {
376			if(!delegpt_add_rrset_AAAA(dp, region, akey, 0, NULL)) {
377				lock_rw_unlock(&akey->entry.lock);
378				return 0;
379			}
380			if(msg)
381				addr_to_additional(akey, region, *msg, now);
382			lock_rw_unlock(&akey->entry.lock);
383		} else {
384			/* BIT_CD on false because delegpt lookup does
385			 * not use dns64 translation */
386			neg = msg_cache_lookup(env, ns->name, ns->namelen,
387				LDNS_RR_TYPE_AAAA, qclass, 0, now, 0);
388			if(neg) {
389				delegpt_add_neg_msg(dp, neg);
390				lock_rw_unlock(&neg->entry.lock);
391			}
392		}
393	}
394	return 1;
395}
396
397/** find and add A and AAAA records for missing nameservers in delegpt */
398int
399cache_fill_missing(struct module_env* env, uint16_t qclass,
400	struct regional* region, struct delegpt* dp)
401{
402	struct delegpt_ns* ns;
403	struct msgreply_entry* neg;
404	struct ub_packed_rrset_key* akey;
405	time_t now = *env->now;
406	for(ns = dp->nslist; ns; ns = ns->next) {
407		if(ns->cache_lookup_count > ITERATOR_NAME_CACHELOOKUP_MAX)
408			continue;
409		ns->cache_lookup_count++;
410		akey = rrset_cache_lookup(env->rrset_cache, ns->name,
411			ns->namelen, LDNS_RR_TYPE_A, qclass, 0, now, 0);
412		if(akey) {
413			if(!delegpt_add_rrset_A(dp, region, akey, ns->lame,
414				NULL)) {
415				lock_rw_unlock(&akey->entry.lock);
416				return 0;
417			}
418			log_nametypeclass(VERB_ALGO, "found in cache",
419				ns->name, LDNS_RR_TYPE_A, qclass);
420			lock_rw_unlock(&akey->entry.lock);
421		} else {
422			/* BIT_CD on false because delegpt lookup does
423			 * not use dns64 translation */
424			neg = msg_cache_lookup(env, ns->name, ns->namelen,
425				LDNS_RR_TYPE_A, qclass, 0, now, 0);
426			if(neg) {
427				delegpt_add_neg_msg(dp, neg);
428				lock_rw_unlock(&neg->entry.lock);
429			}
430		}
431		akey = rrset_cache_lookup(env->rrset_cache, ns->name,
432			ns->namelen, LDNS_RR_TYPE_AAAA, qclass, 0, now, 0);
433		if(akey) {
434			if(!delegpt_add_rrset_AAAA(dp, region, akey, ns->lame,
435				NULL)) {
436				lock_rw_unlock(&akey->entry.lock);
437				return 0;
438			}
439			log_nametypeclass(VERB_ALGO, "found in cache",
440				ns->name, LDNS_RR_TYPE_AAAA, qclass);
441			lock_rw_unlock(&akey->entry.lock);
442		} else {
443			/* BIT_CD on false because delegpt lookup does
444			 * not use dns64 translation */
445			neg = msg_cache_lookup(env, ns->name, ns->namelen,
446				LDNS_RR_TYPE_AAAA, qclass, 0, now, 0);
447			if(neg) {
448				delegpt_add_neg_msg(dp, neg);
449				lock_rw_unlock(&neg->entry.lock);
450			}
451		}
452	}
453	return 1;
454}
455
456/** find and add DS or NSEC to delegation msg */
457static void
458find_add_ds(struct module_env* env, struct regional* region,
459	struct dns_msg* msg, struct delegpt* dp, time_t now)
460{
461	/* Lookup the DS or NSEC at the delegation point. */
462	struct ub_packed_rrset_key* rrset = rrset_cache_lookup(
463		env->rrset_cache, dp->name, dp->namelen, LDNS_RR_TYPE_DS,
464		msg->qinfo.qclass, 0, now, 0);
465	if(!rrset) {
466		/* NOTE: this won't work for alternate NSEC schemes
467		 *	(opt-in, NSEC3) */
468		rrset = rrset_cache_lookup(env->rrset_cache, dp->name,
469			dp->namelen, LDNS_RR_TYPE_NSEC, msg->qinfo.qclass,
470			0, now, 0);
471		/* Note: the PACKED_RRSET_NSEC_AT_APEX flag is not used.
472		 * since this is a referral, we need the NSEC at the parent
473		 * side of the zone cut, not the NSEC at apex side. */
474		if(rrset && nsec_has_type(rrset, LDNS_RR_TYPE_DS)) {
475			lock_rw_unlock(&rrset->entry.lock);
476			rrset = NULL; /* discard wrong NSEC */
477		}
478	}
479	if(rrset) {
480		/* add it to auth section. This is the second rrset. */
481		if((msg->rep->rrsets[msg->rep->rrset_count] =
482			packed_rrset_copy_region(rrset, region, now))) {
483			msg->rep->ns_numrrsets++;
484			msg->rep->rrset_count++;
485		}
486		lock_rw_unlock(&rrset->entry.lock);
487	}
488}
489
490struct dns_msg*
491dns_msg_create(uint8_t* qname, size_t qnamelen, uint16_t qtype,
492	uint16_t qclass, struct regional* region, size_t capacity)
493{
494	struct dns_msg* msg = (struct dns_msg*)regional_alloc(region,
495		sizeof(struct dns_msg));
496	if(!msg)
497		return NULL;
498	msg->qinfo.qname = regional_alloc_init(region, qname, qnamelen);
499	if(!msg->qinfo.qname)
500		return NULL;
501	msg->qinfo.qname_len = qnamelen;
502	msg->qinfo.qtype = qtype;
503	msg->qinfo.qclass = qclass;
504	msg->qinfo.local_alias = NULL;
505	/* non-packed reply_info, because it needs to grow the array */
506	msg->rep = (struct reply_info*)regional_alloc_zero(region,
507		sizeof(struct reply_info)-sizeof(struct rrset_ref));
508	if(!msg->rep)
509		return NULL;
510	if(capacity > RR_COUNT_MAX)
511		return NULL; /* integer overflow protection */
512	msg->rep->flags = BIT_QR; /* with QR, no AA */
513	msg->rep->qdcount = 1;
514	msg->rep->reason_bogus = LDNS_EDE_NONE;
515	msg->rep->rrsets = (struct ub_packed_rrset_key**)
516		regional_alloc(region,
517		capacity*sizeof(struct ub_packed_rrset_key*));
518	if(!msg->rep->rrsets)
519		return NULL;
520	return msg;
521}
522
523int
524dns_msg_authadd(struct dns_msg* msg, struct regional* region,
525	struct ub_packed_rrset_key* rrset, time_t now)
526{
527	if(!(msg->rep->rrsets[msg->rep->rrset_count++] =
528		packed_rrset_copy_region(rrset, region, now)))
529		return 0;
530	msg->rep->ns_numrrsets++;
531	return 1;
532}
533
534int
535dns_msg_ansadd(struct dns_msg* msg, struct regional* region,
536	struct ub_packed_rrset_key* rrset, time_t now)
537{
538	if(!(msg->rep->rrsets[msg->rep->rrset_count++] =
539		packed_rrset_copy_region(rrset, region, now)))
540		return 0;
541	msg->rep->an_numrrsets++;
542	return 1;
543}
544
545struct delegpt*
546dns_cache_find_delegation(struct module_env* env, uint8_t* qname,
547	size_t qnamelen, uint16_t qtype, uint16_t qclass,
548	struct regional* region, struct dns_msg** msg, time_t now,
549	int noexpiredabove, uint8_t* expiretop, size_t expiretoplen)
550{
551	/* try to find closest NS rrset */
552	struct ub_packed_rrset_key* nskey;
553	struct packed_rrset_data* nsdata;
554	struct delegpt* dp;
555
556	nskey = find_closest_of_type(env, qname, qnamelen, qclass, now,
557		LDNS_RR_TYPE_NS, 0, noexpiredabove, expiretop, expiretoplen);
558	if(!nskey) /* hope the caller has hints to prime or something */
559		return NULL;
560	nsdata = (struct packed_rrset_data*)nskey->entry.data;
561	/* got the NS key, create delegation point */
562	dp = delegpt_create(region);
563	if(!dp || !delegpt_set_name(dp, region, nskey->rk.dname)) {
564		lock_rw_unlock(&nskey->entry.lock);
565		log_err("find_delegation: out of memory");
566		return NULL;
567	}
568	/* create referral message */
569	if(msg) {
570		/* allocate the array to as much as we could need:
571		 *	NS rrset + DS/NSEC rrset +
572		 *	A rrset for every NS RR
573		 *	AAAA rrset for every NS RR
574		 */
575		*msg = dns_msg_create(qname, qnamelen, qtype, qclass, region,
576			2 + nsdata->count*2);
577		if(!*msg || !dns_msg_authadd(*msg, region, nskey, now)) {
578			lock_rw_unlock(&nskey->entry.lock);
579			log_err("find_delegation: out of memory");
580			return NULL;
581		}
582	}
583	if(!delegpt_rrset_add_ns(dp, region, nskey, 0))
584		log_err("find_delegation: addns out of memory");
585	lock_rw_unlock(&nskey->entry.lock); /* first unlock before next lookup*/
586	/* find and add DS/NSEC (if any) */
587	if(msg)
588		find_add_ds(env, region, *msg, dp, now);
589	/* find and add A entries */
590	if(!find_add_addrs(env, qclass, region, dp, now, msg))
591		log_err("find_delegation: addrs out of memory");
592	return dp;
593}
594
595/** allocate dns_msg from query_info and reply_info */
596static struct dns_msg*
597gen_dns_msg(struct regional* region, struct query_info* q, size_t num)
598{
599	struct dns_msg* msg = (struct dns_msg*)regional_alloc(region,
600		sizeof(struct dns_msg));
601	if(!msg)
602		return NULL;
603	memcpy(&msg->qinfo, q, sizeof(struct query_info));
604	msg->qinfo.qname = regional_alloc_init(region, q->qname, q->qname_len);
605	if(!msg->qinfo.qname)
606		return NULL;
607	/* allocate replyinfo struct and rrset key array separately */
608	msg->rep = (struct reply_info*)regional_alloc(region,
609		sizeof(struct reply_info) - sizeof(struct rrset_ref));
610	if(!msg->rep)
611		return NULL;
612	msg->rep->reason_bogus = LDNS_EDE_NONE;
613	if(num > RR_COUNT_MAX)
614		return NULL; /* integer overflow protection */
615	msg->rep->rrsets = (struct ub_packed_rrset_key**)
616		regional_alloc(region,
617		num * sizeof(struct ub_packed_rrset_key*));
618	if(!msg->rep->rrsets)
619		return NULL;
620	return msg;
621}
622
623struct dns_msg*
624tomsg(struct module_env* env, struct query_info* q, struct reply_info* r,
625	struct regional* region, time_t now, int allow_expired,
626	struct regional* scratch)
627{
628	struct dns_msg* msg;
629	size_t i;
630	int is_expired = 0;
631	time_t now_control = now;
632	if(now > r->ttl) {
633		/* Check if we are allowed to serve expired */
634		if(allow_expired) {
635			if(env->cfg->serve_expired_ttl &&
636				r->serve_expired_ttl < now) {
637				return NULL;
638			}
639		} else {
640			return NULL;
641		}
642		/* Change the current time so we can pass the below TTL checks when
643		 * serving expired data. */
644		now_control = r->ttl - env->cfg->serve_expired_reply_ttl;
645		is_expired = 1;
646	}
647
648	msg = gen_dns_msg(region, q, r->rrset_count);
649	if(!msg) return NULL;
650	msg->rep->flags = r->flags;
651	msg->rep->qdcount = r->qdcount;
652	msg->rep->ttl = is_expired
653		?SERVE_EXPIRED_REPLY_TTL
654		:r->ttl - now;
655	if(r->prefetch_ttl > now)
656		msg->rep->prefetch_ttl = r->prefetch_ttl - now;
657	else
658		msg->rep->prefetch_ttl = PREFETCH_TTL_CALC(msg->rep->ttl);
659	msg->rep->serve_expired_ttl = msg->rep->ttl + SERVE_EXPIRED_TTL;
660	msg->rep->security = r->security;
661	msg->rep->an_numrrsets = r->an_numrrsets;
662	msg->rep->ns_numrrsets = r->ns_numrrsets;
663	msg->rep->ar_numrrsets = r->ar_numrrsets;
664	msg->rep->rrset_count = r->rrset_count;
665	msg->rep->authoritative = r->authoritative;
666	msg->rep->reason_bogus = r->reason_bogus;
667	if(!rrset_array_lock(r->ref, r->rrset_count, now_control)) {
668		return NULL;
669	}
670	if(r->an_numrrsets > 0 && (r->rrsets[0]->rk.type == htons(
671		LDNS_RR_TYPE_CNAME) || r->rrsets[0]->rk.type == htons(
672		LDNS_RR_TYPE_DNAME)) && !reply_check_cname_chain(q, r)) {
673		/* cname chain is now invalid, reconstruct msg */
674		rrset_array_unlock(r->ref, r->rrset_count);
675		return NULL;
676	}
677	if(r->security == sec_status_secure && !reply_all_rrsets_secure(r)) {
678		/* message rrsets have changed status, revalidate */
679		rrset_array_unlock(r->ref, r->rrset_count);
680		return NULL;
681	}
682	for(i=0; i<msg->rep->rrset_count; i++) {
683		msg->rep->rrsets[i] = packed_rrset_copy_region(r->rrsets[i],
684			region, now);
685		if(!msg->rep->rrsets[i]) {
686			rrset_array_unlock(r->ref, r->rrset_count);
687			return NULL;
688		}
689	}
690	if(env)
691		rrset_array_unlock_touch(env->rrset_cache, scratch, r->ref,
692		r->rrset_count);
693	else
694		rrset_array_unlock(r->ref, r->rrset_count);
695	return msg;
696}
697
698/** synthesize RRset-only response from cached RRset item */
699static struct dns_msg*
700rrset_msg(struct ub_packed_rrset_key* rrset, struct regional* region,
701	time_t now, struct query_info* q)
702{
703	struct dns_msg* msg;
704	struct packed_rrset_data* d = (struct packed_rrset_data*)
705		rrset->entry.data;
706	if(now > d->ttl)
707		return NULL;
708	msg = gen_dns_msg(region, q, 1); /* only the CNAME (or other) RRset */
709	if(!msg)
710		return NULL;
711	msg->rep->flags = BIT_QR; /* reply, no AA, no error */
712        msg->rep->authoritative = 0; /* reply stored in cache can't be authoritative */
713	msg->rep->qdcount = 1;
714	msg->rep->ttl = d->ttl - now;
715	msg->rep->prefetch_ttl = PREFETCH_TTL_CALC(msg->rep->ttl);
716	msg->rep->serve_expired_ttl = msg->rep->ttl + SERVE_EXPIRED_TTL;
717	msg->rep->security = sec_status_unchecked;
718	msg->rep->an_numrrsets = 1;
719	msg->rep->ns_numrrsets = 0;
720	msg->rep->ar_numrrsets = 0;
721	msg->rep->rrset_count = 1;
722	msg->rep->reason_bogus = LDNS_EDE_NONE;
723	msg->rep->rrsets[0] = packed_rrset_copy_region(rrset, region, now);
724	if(!msg->rep->rrsets[0]) /* copy CNAME */
725		return NULL;
726	return msg;
727}
728
729/** synthesize DNAME+CNAME response from cached DNAME item */
730static struct dns_msg*
731synth_dname_msg(struct ub_packed_rrset_key* rrset, struct regional* region,
732	time_t now, struct query_info* q, enum sec_status* sec_status)
733{
734	struct dns_msg* msg;
735	struct ub_packed_rrset_key* ck;
736	struct packed_rrset_data* newd, *d = (struct packed_rrset_data*)
737		rrset->entry.data;
738	uint8_t* newname, *dtarg = NULL;
739	size_t newlen, dtarglen;
740	if(now > d->ttl)
741		return NULL;
742	/* only allow validated (with DNSSEC) DNAMEs used from cache
743	 * for insecure DNAMEs, query again. */
744	*sec_status = d->security;
745	/* return sec status, so the status of the CNAME can be checked
746	 * by the calling routine. */
747	msg = gen_dns_msg(region, q, 2); /* DNAME + CNAME RRset */
748	if(!msg)
749		return NULL;
750	msg->rep->flags = BIT_QR; /* reply, no AA, no error */
751        msg->rep->authoritative = 0; /* reply stored in cache can't be authoritative */
752	msg->rep->qdcount = 1;
753	msg->rep->ttl = d->ttl - now;
754	msg->rep->prefetch_ttl = PREFETCH_TTL_CALC(msg->rep->ttl);
755	msg->rep->serve_expired_ttl = msg->rep->ttl + SERVE_EXPIRED_TTL;
756	msg->rep->security = sec_status_unchecked;
757	msg->rep->an_numrrsets = 1;
758	msg->rep->ns_numrrsets = 0;
759	msg->rep->ar_numrrsets = 0;
760	msg->rep->rrset_count = 1;
761	msg->rep->reason_bogus = LDNS_EDE_NONE;
762	msg->rep->rrsets[0] = packed_rrset_copy_region(rrset, region, now);
763	if(!msg->rep->rrsets[0]) /* copy DNAME */
764		return NULL;
765	/* synth CNAME rrset */
766	get_cname_target(rrset, &dtarg, &dtarglen);
767	if(!dtarg)
768		return NULL;
769	newlen = q->qname_len + dtarglen - rrset->rk.dname_len;
770	if(newlen > LDNS_MAX_DOMAINLEN) {
771		msg->rep->flags |= LDNS_RCODE_YXDOMAIN;
772		return msg;
773	}
774	newname = (uint8_t*)regional_alloc(region, newlen);
775	if(!newname)
776		return NULL;
777	/* new name is concatenation of qname front (without DNAME owner)
778	 * and DNAME target name */
779	memcpy(newname, q->qname, q->qname_len-rrset->rk.dname_len);
780	memmove(newname+(q->qname_len-rrset->rk.dname_len), dtarg, dtarglen);
781	/* create rest of CNAME rrset */
782	ck = (struct ub_packed_rrset_key*)regional_alloc(region,
783		sizeof(struct ub_packed_rrset_key));
784	if(!ck)
785		return NULL;
786	memset(&ck->entry, 0, sizeof(ck->entry));
787	msg->rep->rrsets[1] = ck;
788	ck->entry.key = ck;
789	ck->rk.type = htons(LDNS_RR_TYPE_CNAME);
790	ck->rk.rrset_class = rrset->rk.rrset_class;
791	ck->rk.flags = 0;
792	ck->rk.dname = regional_alloc_init(region, q->qname, q->qname_len);
793	if(!ck->rk.dname)
794		return NULL;
795	ck->rk.dname_len = q->qname_len;
796	ck->entry.hash = rrset_key_hash(&ck->rk);
797	newd = (struct packed_rrset_data*)regional_alloc_zero(region,
798		sizeof(struct packed_rrset_data) + sizeof(size_t) +
799		sizeof(uint8_t*) + sizeof(time_t) + sizeof(uint16_t)
800		+ newlen);
801	if(!newd)
802		return NULL;
803	ck->entry.data = newd;
804	newd->ttl = 0; /* 0 for synthesized CNAME TTL */
805	newd->count = 1;
806	newd->rrsig_count = 0;
807	newd->trust = rrset_trust_ans_noAA;
808	newd->rr_len = (size_t*)((uint8_t*)newd +
809		sizeof(struct packed_rrset_data));
810	newd->rr_len[0] = newlen + sizeof(uint16_t);
811	packed_rrset_ptr_fixup(newd);
812	newd->rr_ttl[0] = newd->ttl;
813	msg->rep->ttl = newd->ttl;
814	msg->rep->prefetch_ttl = PREFETCH_TTL_CALC(newd->ttl);
815	msg->rep->serve_expired_ttl = newd->ttl + SERVE_EXPIRED_TTL;
816	sldns_write_uint16(newd->rr_data[0], newlen);
817	memmove(newd->rr_data[0] + sizeof(uint16_t), newname, newlen);
818	msg->rep->an_numrrsets ++;
819	msg->rep->rrset_count ++;
820	return msg;
821}
822
823/** Fill TYPE_ANY response with some data from cache */
824static struct dns_msg*
825fill_any(struct module_env* env,
826	uint8_t* qname, size_t qnamelen, uint16_t qtype, uint16_t qclass,
827	struct regional* region)
828{
829	time_t now = *env->now;
830	struct dns_msg* msg = NULL;
831	uint16_t lookup[] = {LDNS_RR_TYPE_A, LDNS_RR_TYPE_AAAA,
832		LDNS_RR_TYPE_MX, LDNS_RR_TYPE_SOA, LDNS_RR_TYPE_NS,
833		LDNS_RR_TYPE_DNAME, 0};
834	int i, num=6; /* number of RR types to look up */
835	log_assert(lookup[num] == 0);
836
837	if(env->cfg->deny_any) {
838		/* return empty message */
839		msg = dns_msg_create(qname, qnamelen, qtype, qclass,
840			region, 0);
841		if(!msg) {
842			return NULL;
843		}
844		/* set NOTIMPL for RFC 8482 */
845		msg->rep->flags |= LDNS_RCODE_NOTIMPL;
846		msg->rep->security = sec_status_indeterminate;
847		return msg;
848	}
849
850	for(i=0; i<num; i++) {
851		/* look up this RR for inclusion in type ANY response */
852		struct ub_packed_rrset_key* rrset = rrset_cache_lookup(
853			env->rrset_cache, qname, qnamelen, lookup[i],
854			qclass, 0, now, 0);
855		struct packed_rrset_data *d;
856		if(!rrset)
857			continue;
858
859		/* only if rrset from answer section */
860		d = (struct packed_rrset_data*)rrset->entry.data;
861		if(d->trust == rrset_trust_add_noAA ||
862			d->trust == rrset_trust_auth_noAA ||
863			d->trust == rrset_trust_add_AA ||
864			d->trust == rrset_trust_auth_AA) {
865			lock_rw_unlock(&rrset->entry.lock);
866			continue;
867		}
868
869		/* create msg if none */
870		if(!msg) {
871			msg = dns_msg_create(qname, qnamelen, qtype, qclass,
872				region, (size_t)(num-i));
873			if(!msg) {
874				lock_rw_unlock(&rrset->entry.lock);
875				return NULL;
876			}
877		}
878
879		/* add RRset to response */
880		if(!dns_msg_ansadd(msg, region, rrset, now)) {
881			lock_rw_unlock(&rrset->entry.lock);
882			return NULL;
883		}
884		lock_rw_unlock(&rrset->entry.lock);
885	}
886	return msg;
887}
888
889struct dns_msg*
890dns_cache_lookup(struct module_env* env,
891	uint8_t* qname, size_t qnamelen, uint16_t qtype, uint16_t qclass,
892	uint16_t flags, struct regional* region, struct regional* scratch,
893	int no_partial, uint8_t* dpname, size_t dpnamelen)
894{
895	struct lruhash_entry* e;
896	struct query_info k;
897	hashvalue_type h;
898	time_t now = *env->now;
899	struct ub_packed_rrset_key* rrset;
900
901	/* lookup first, this has both NXdomains and ANSWER responses */
902	k.qname = qname;
903	k.qname_len = qnamelen;
904	k.qtype = qtype;
905	k.qclass = qclass;
906	k.local_alias = NULL;
907	h = query_info_hash(&k, flags);
908	e = slabhash_lookup(env->msg_cache, h, &k, 0);
909	if(e) {
910		struct msgreply_entry* key = (struct msgreply_entry*)e->key;
911		struct reply_info* data = (struct reply_info*)e->data;
912		struct dns_msg* msg = tomsg(env, &key->key, data, region, now, 0,
913			scratch);
914		if(msg) {
915			lock_rw_unlock(&e->lock);
916			return msg;
917		}
918		/* could be msg==NULL; due to TTL or not all rrsets available */
919		lock_rw_unlock(&e->lock);
920	}
921
922	/* see if a DNAME exists. Checked for first, to enforce that DNAMEs
923	 * are more important, the CNAME is resynthesized and thus
924	 * consistent with the DNAME */
925	if(!no_partial &&
926		(rrset=find_closest_of_type(env, qname, qnamelen, qclass, now,
927		LDNS_RR_TYPE_DNAME, 1, 0, NULL, 0))) {
928		/* synthesize a DNAME+CNAME message based on this */
929		enum sec_status sec_status = sec_status_unchecked;
930		struct dns_msg* msg = synth_dname_msg(rrset, region, now, &k,
931			&sec_status);
932		if(msg) {
933			struct ub_packed_rrset_key* cname_rrset;
934			lock_rw_unlock(&rrset->entry.lock);
935			/* now, after unlocking the DNAME rrset lock,
936			 * check the sec_status, and see if we need to look
937			 * up the CNAME record associated before it can
938			 * be used */
939			/* normally, only secure DNAMEs allowed from cache*/
940			if(sec_status == sec_status_secure)
941				return msg;
942			/* but if we have a CNAME cached with this name, then we
943			 * have previously already allowed this name to pass.
944			 * the next cache lookup is going to fetch that CNAME itself,
945			 * but it is better to have the (unsigned)DNAME + CNAME in
946			 * that case */
947			cname_rrset = rrset_cache_lookup(
948				env->rrset_cache, qname, qnamelen,
949				LDNS_RR_TYPE_CNAME, qclass, 0, now, 0);
950			if(cname_rrset) {
951				/* CNAME already synthesized by
952				 * synth_dname_msg routine, so we can
953				 * straight up return the msg */
954				lock_rw_unlock(&cname_rrset->entry.lock);
955				return msg;
956			}
957		} else {
958			lock_rw_unlock(&rrset->entry.lock);
959		}
960	}
961
962	/* see if we have CNAME for this domain,
963	 * but not for DS records (which are part of the parent) */
964	if(!no_partial && qtype != LDNS_RR_TYPE_DS &&
965	   (rrset=rrset_cache_lookup(env->rrset_cache, qname, qnamelen,
966		LDNS_RR_TYPE_CNAME, qclass, 0, now, 0))) {
967		uint8_t* wc = NULL;
968		size_t wl;
969		/* if the rrset is not a wildcard expansion, with wcname */
970		/* because, if we return that CNAME rrset on its own, it is
971		 * missing the NSEC or NSEC3 proof */
972		if(!(val_rrset_wildcard(rrset, &wc, &wl) && wc != NULL)) {
973			struct dns_msg* msg = rrset_msg(rrset, region, now, &k);
974			if(msg) {
975				lock_rw_unlock(&rrset->entry.lock);
976				return msg;
977			}
978		}
979		lock_rw_unlock(&rrset->entry.lock);
980	}
981
982	/* construct DS, DNSKEY messages from rrset cache. */
983	if((qtype == LDNS_RR_TYPE_DS || qtype == LDNS_RR_TYPE_DNSKEY) &&
984		(rrset=rrset_cache_lookup(env->rrset_cache, qname, qnamelen,
985		qtype, qclass, 0, now, 0))) {
986		/* if the rrset is from the additional section, and the
987		 * signatures have fallen off, then do not synthesize a msg
988		 * instead, allow a full query for signed results to happen.
989		 * Forego all rrset data from additional section, because
990		 * some signatures may not be present and cause validation
991		 * failure.
992		 */
993		struct packed_rrset_data *d = (struct packed_rrset_data*)
994			rrset->entry.data;
995		if(d->trust != rrset_trust_add_noAA &&
996			d->trust != rrset_trust_add_AA &&
997			(qtype == LDNS_RR_TYPE_DS ||
998				(d->trust != rrset_trust_auth_noAA
999				&& d->trust != rrset_trust_auth_AA) )) {
1000			struct dns_msg* msg = rrset_msg(rrset, region, now, &k);
1001			if(msg) {
1002				lock_rw_unlock(&rrset->entry.lock);
1003				return msg;
1004			}
1005		}
1006		lock_rw_unlock(&rrset->entry.lock);
1007	}
1008
1009	/* stop downwards cache search on NXDOMAIN.
1010	 * Empty nonterminals are NOERROR, so an NXDOMAIN for foo
1011	 * means bla.foo also does not exist.  The DNSSEC proofs are
1012	 * the same.  We search upwards for NXDOMAINs. */
1013	if(env->cfg->harden_below_nxdomain) {
1014		while(!dname_is_root(k.qname)) {
1015			if(dpname && dpnamelen
1016				&& !dname_subdomain_c(k.qname, dpname))
1017				break; /* no synth nxdomain above the stub */
1018			dname_remove_label(&k.qname, &k.qname_len);
1019			h = query_info_hash(&k, flags);
1020			e = slabhash_lookup(env->msg_cache, h, &k, 0);
1021			if(!e && k.qtype != LDNS_RR_TYPE_A &&
1022				env->cfg->qname_minimisation) {
1023				k.qtype = LDNS_RR_TYPE_A;
1024				h = query_info_hash(&k, flags);
1025				e = slabhash_lookup(env->msg_cache, h, &k, 0);
1026			}
1027			if(e) {
1028				struct reply_info* data = (struct reply_info*)e->data;
1029				struct dns_msg* msg;
1030				if(FLAGS_GET_RCODE(data->flags) == LDNS_RCODE_NXDOMAIN
1031					&& data->security == sec_status_secure
1032					&& (data->an_numrrsets == 0 ||
1033						ntohs(data->rrsets[0]->rk.type) != LDNS_RR_TYPE_CNAME)
1034					&& (msg=tomsg(env, &k, data, region, now, 0, scratch))) {
1035					lock_rw_unlock(&e->lock);
1036					msg->qinfo.qname=qname;
1037					msg->qinfo.qname_len=qnamelen;
1038					/* check that DNSSEC really works out */
1039					msg->rep->security = sec_status_unchecked;
1040					iter_scrub_nxdomain(msg);
1041					return msg;
1042				}
1043				lock_rw_unlock(&e->lock);
1044			}
1045			k.qtype = qtype;
1046		}
1047	}
1048
1049	/* fill common RR types for ANY response to avoid requery */
1050	if(qtype == LDNS_RR_TYPE_ANY) {
1051		return fill_any(env, qname, qnamelen, qtype, qclass, region);
1052	}
1053
1054	return NULL;
1055}
1056
1057int
1058dns_cache_store(struct module_env* env, struct query_info* msgqinf,
1059        struct reply_info* msgrep, int is_referral, time_t leeway, int pside,
1060	struct regional* region, uint32_t flags, time_t qstarttime)
1061{
1062	struct reply_info* rep = NULL;
1063	/* alloc, malloc properly (not in region, like msg is) */
1064	rep = reply_info_copy(msgrep, env->alloc, NULL);
1065	if(!rep)
1066		return 0;
1067	/* ttl must be relative ;i.e. 0..86400 not  time(0)+86400.
1068	 * the env->now is added to message and RRsets in this routine. */
1069	/* the leeway is used to invalidate other rrsets earlier */
1070
1071	if(is_referral) {
1072		/* store rrsets */
1073		struct rrset_ref ref;
1074		size_t i;
1075		for(i=0; i<rep->rrset_count; i++) {
1076			packed_rrset_ttl_add((struct packed_rrset_data*)
1077				rep->rrsets[i]->entry.data, *env->now);
1078			ref.key = rep->rrsets[i];
1079			ref.id = rep->rrsets[i]->id;
1080			/*ignore ret: it was in the cache, ref updated */
1081			/* no leeway for typeNS */
1082			(void)rrset_cache_update(env->rrset_cache, &ref,
1083				env->alloc,
1084				((ntohs(ref.key->rk.type)==LDNS_RR_TYPE_NS
1085				 && !pside) ? qstarttime:*env->now + leeway));
1086		}
1087		free(rep);
1088		return 1;
1089	} else {
1090		/* store msg, and rrsets */
1091		struct query_info qinf;
1092		hashvalue_type h;
1093
1094		qinf = *msgqinf;
1095		qinf.qname = memdup(msgqinf->qname, msgqinf->qname_len);
1096		if(!qinf.qname) {
1097			reply_info_parsedelete(rep, env->alloc);
1098			return 0;
1099		}
1100		/* fixup flags to be sensible for a reply based on the cache */
1101		/* this module means that RA is available. It is an answer QR.
1102		 * Not AA from cache. Not CD in cache (depends on client bit). */
1103		rep->flags |= (BIT_RA | BIT_QR);
1104		rep->flags &= ~(BIT_AA | BIT_CD);
1105		h = query_info_hash(&qinf, (uint16_t)flags);
1106		dns_cache_store_msg(env, &qinf, h, rep, leeway, pside, msgrep,
1107			flags, region, qstarttime);
1108		/* qname is used inside query_info_entrysetup, and set to
1109		 * NULL. If it has not been used, free it. free(0) is safe. */
1110		free(qinf.qname);
1111	}
1112	return 1;
1113}
1114
1115int
1116dns_cache_prefetch_adjust(struct module_env* env, struct query_info* qinfo,
1117        time_t adjust, uint16_t flags)
1118{
1119	struct msgreply_entry* msg;
1120	msg = msg_cache_lookup(env, qinfo->qname, qinfo->qname_len,
1121		qinfo->qtype, qinfo->qclass, flags, *env->now, 1);
1122	if(msg) {
1123		struct reply_info* rep = (struct reply_info*)msg->entry.data;
1124		if(rep) {
1125			rep->prefetch_ttl += adjust;
1126			lock_rw_unlock(&msg->entry.lock);
1127			return 1;
1128		}
1129		lock_rw_unlock(&msg->entry.lock);
1130	}
1131	return 0;
1132}
1133