rrset.c revision 356345
1/*
2 * services/cache/rrset.c - Resource record set cache.
3 *
4 * Copyright (c) 2007, NLnet Labs. All rights reserved.
5 *
6 * This software is open source.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
14 *
15 * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 *
19 * Neither the name of the NLNET LABS nor the names of its contributors may
20 * be used to endorse or promote products derived from this software without
21 * specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35
36/**
37 * \file
38 *
39 * This file contains the rrset cache.
40 */
41#include "config.h"
42#include "services/cache/rrset.h"
43#include "sldns/rrdef.h"
44#include "util/storage/slabhash.h"
45#include "util/config_file.h"
46#include "util/data/packed_rrset.h"
47#include "util/data/msgreply.h"
48#include "util/regional.h"
49#include "util/alloc.h"
50#include "util/net_help.h"
51
52void
53rrset_markdel(void* key)
54{
55	struct ub_packed_rrset_key* r = (struct ub_packed_rrset_key*)key;
56	r->id = 0;
57}
58
59struct rrset_cache* rrset_cache_create(struct config_file* cfg,
60	struct alloc_cache* alloc)
61{
62	size_t slabs = (cfg?cfg->rrset_cache_slabs:HASH_DEFAULT_SLABS);
63	size_t startarray = HASH_DEFAULT_STARTARRAY;
64	size_t maxmem = (cfg?cfg->rrset_cache_size:HASH_DEFAULT_MAXMEM);
65
66	struct rrset_cache *r = (struct rrset_cache*)slabhash_create(slabs,
67		startarray, maxmem, ub_rrset_sizefunc, ub_rrset_compare,
68		ub_rrset_key_delete, rrset_data_delete, alloc);
69	slabhash_setmarkdel(&r->table, &rrset_markdel);
70	return r;
71}
72
73void rrset_cache_delete(struct rrset_cache* r)
74{
75	if(!r)
76		return;
77	slabhash_delete(&r->table);
78	/* slabhash delete also does free(r), since table is first in struct*/
79}
80
81struct rrset_cache* rrset_cache_adjust(struct rrset_cache *r,
82	struct config_file* cfg, struct alloc_cache* alloc)
83{
84	if(!r || !cfg || !slabhash_is_size(&r->table, cfg->rrset_cache_size,
85		cfg->rrset_cache_slabs))
86	{
87		rrset_cache_delete(r);
88		r = rrset_cache_create(cfg, alloc);
89	}
90	return r;
91}
92
93void
94rrset_cache_touch(struct rrset_cache* r, struct ub_packed_rrset_key* key,
95        hashvalue_type hash, rrset_id_type id)
96{
97	struct lruhash* table = slabhash_gettable(&r->table, hash);
98	/*
99	 * This leads to locking problems, deadlocks, if the caller is
100	 * holding any other rrset lock.
101	 * Because a lookup through the hashtable does:
102	 *	tablelock -> entrylock  (for that entry caller holds)
103	 * And this would do
104	 *	entrylock(already held) -> tablelock
105	 * And if two threads do this, it results in deadlock.
106	 * So, the caller must not hold entrylock.
107	 */
108	lock_quick_lock(&table->lock);
109	/* we have locked the hash table, the item can still be deleted.
110	 * because it could already have been reclaimed, but not yet set id=0.
111	 * This is because some lruhash routines have lazy deletion.
112	 * so, we must acquire a lock on the item to verify the id != 0.
113	 * also, with hash not changed, we are using the right slab.
114	 */
115	lock_rw_rdlock(&key->entry.lock);
116	if(key->id == id && key->entry.hash == hash) {
117		lru_touch(table, &key->entry);
118	}
119	lock_rw_unlock(&key->entry.lock);
120	lock_quick_unlock(&table->lock);
121}
122
123/** see if rrset needs to be updated in the cache */
124static int
125need_to_update_rrset(void* nd, void* cd, time_t timenow, int equal, int ns)
126{
127	struct packed_rrset_data* newd = (struct packed_rrset_data*)nd;
128	struct packed_rrset_data* cached = (struct packed_rrset_data*)cd;
129	/* 	o store if rrset has been validated
130	 *  		everything better than bogus data
131	 *  		secure is preferred */
132	if( newd->security == sec_status_secure &&
133		cached->security != sec_status_secure)
134		return 1;
135	if( cached->security == sec_status_bogus &&
136		newd->security != sec_status_bogus && !equal)
137		return 1;
138        /*      o if current RRset is more trustworthy - insert it */
139        if( newd->trust > cached->trust ) {
140		/* if the cached rrset is bogus, and this one equal,
141		 * do not update the TTL - let it expire. */
142		if(equal && cached->ttl >= timenow &&
143			cached->security == sec_status_bogus)
144			return 0;
145                return 1;
146	}
147	/*	o item in cache has expired */
148	if( cached->ttl < timenow )
149		return 1;
150	/*  o same trust, but different in data - insert it */
151	if( newd->trust == cached->trust && !equal ) {
152		/* if this is type NS, do not 'stick' to owner that changes
153		 * the NS RRset, but use the old TTL for the new data, and
154		 * update to fetch the latest data. ttl is not expired, because
155		 * that check was before this one. */
156		if(ns) {
157			size_t i;
158			newd->ttl = cached->ttl;
159			for(i=0; i<(newd->count+newd->rrsig_count); i++)
160				if(newd->rr_ttl[i] > newd->ttl)
161					newd->rr_ttl[i] = newd->ttl;
162		}
163		return 1;
164	}
165	return 0;
166}
167
168/** Update RRSet special key ID */
169static void
170rrset_update_id(struct rrset_ref* ref, struct alloc_cache* alloc)
171{
172	/* this may clear the cache and invalidate lock below */
173	uint64_t newid = alloc_get_id(alloc);
174	/* obtain writelock */
175	lock_rw_wrlock(&ref->key->entry.lock);
176	/* check if it was deleted in the meantime, if so, skip update */
177	if(ref->key->id == ref->id) {
178		ref->key->id = newid;
179		ref->id = newid;
180	}
181	lock_rw_unlock(&ref->key->entry.lock);
182}
183
184int
185rrset_cache_update(struct rrset_cache* r, struct rrset_ref* ref,
186	struct alloc_cache* alloc, time_t timenow)
187{
188	struct lruhash_entry* e;
189	struct ub_packed_rrset_key* k = ref->key;
190	hashvalue_type h = k->entry.hash;
191	uint16_t rrset_type = ntohs(k->rk.type);
192	int equal = 0;
193	log_assert(ref->id != 0 && k->id != 0);
194	log_assert(k->rk.dname != NULL);
195	/* looks up item with a readlock - no editing! */
196	if((e=slabhash_lookup(&r->table, h, k, 0)) != 0) {
197		/* return id and key as they will be used in the cache
198		 * since the lruhash_insert, if item already exists, deallocs
199		 * the passed key in favor of the already stored key.
200		 * because of the small gap (see below) this key ptr and id
201		 * may prove later to be already deleted, which is no problem
202		 * as it only makes a cache miss.
203		 */
204		ref->key = (struct ub_packed_rrset_key*)e->key;
205		ref->id = ref->key->id;
206		equal = rrsetdata_equal((struct packed_rrset_data*)k->entry.
207			data, (struct packed_rrset_data*)e->data);
208		if(!need_to_update_rrset(k->entry.data, e->data, timenow,
209			equal, (rrset_type==LDNS_RR_TYPE_NS))) {
210			/* cache is superior, return that value */
211			lock_rw_unlock(&e->lock);
212			ub_packed_rrset_parsedelete(k, alloc);
213			if(equal) return 2;
214			return 1;
215		}
216		lock_rw_unlock(&e->lock);
217		/* Go on and insert the passed item.
218		 * small gap here, where entry is not locked.
219		 * possibly entry is updated with something else.
220		 * we then overwrite that with our data.
221		 * this is just too bad, its cache anyway. */
222		/* use insert to update entry to manage lruhash
223		 * cache size values nicely. */
224	}
225	log_assert(ref->key->id != 0);
226	slabhash_insert(&r->table, h, &k->entry, k->entry.data, alloc);
227	if(e) {
228		/* For NSEC, NSEC3, DNAME, when rdata is updated, update
229		 * the ID number so that proofs in message cache are
230		 * invalidated */
231		if((rrset_type == LDNS_RR_TYPE_NSEC
232			|| rrset_type == LDNS_RR_TYPE_NSEC3
233			|| rrset_type == LDNS_RR_TYPE_DNAME) && !equal) {
234			rrset_update_id(ref, alloc);
235		}
236		return 1;
237	}
238	return 0;
239}
240
241void rrset_cache_update_wildcard(struct rrset_cache* rrset_cache,
242	struct ub_packed_rrset_key* rrset, uint8_t* ce, size_t ce_len,
243	struct alloc_cache* alloc, time_t timenow)
244{
245	struct rrset_ref ref;
246	uint8_t wc_dname[LDNS_MAX_DOMAINLEN+3];
247	rrset = packed_rrset_copy_alloc(rrset, alloc, timenow);
248	if(!rrset) {
249		log_err("malloc failure in rrset_cache_update_wildcard");
250		return;
251	}
252	/* ce has at least one label less then qname, we can therefore safely
253	 * add the wildcard label. */
254	wc_dname[0] = 1;
255	wc_dname[1] = (uint8_t)'*';
256	memmove(wc_dname+2, ce, ce_len);
257
258	free(rrset->rk.dname);
259	rrset->rk.dname_len = ce_len + 2;
260	rrset->rk.dname = (uint8_t*)memdup(wc_dname, rrset->rk.dname_len);
261	if(!rrset->rk.dname) {
262		alloc_special_release(alloc, rrset);
263		log_err("memdup failure in rrset_cache_update_wildcard");
264		return;
265	}
266
267	rrset->entry.hash = rrset_key_hash(&rrset->rk);
268	ref.key = rrset;
269	ref.id = rrset->id;
270	/* ignore ret: if it was in the cache, ref updated */
271	(void)rrset_cache_update(rrset_cache, &ref, alloc, timenow);
272}
273
274struct ub_packed_rrset_key*
275rrset_cache_lookup(struct rrset_cache* r, uint8_t* qname, size_t qnamelen,
276	uint16_t qtype, uint16_t qclass, uint32_t flags, time_t timenow,
277	int wr)
278{
279	struct lruhash_entry* e;
280	struct ub_packed_rrset_key key;
281
282	key.entry.key = &key;
283	key.entry.data = NULL;
284	key.rk.dname = qname;
285	key.rk.dname_len = qnamelen;
286	key.rk.type = htons(qtype);
287	key.rk.rrset_class = htons(qclass);
288	key.rk.flags = flags;
289
290	key.entry.hash = rrset_key_hash(&key.rk);
291
292	if((e = slabhash_lookup(&r->table, key.entry.hash, &key, wr))) {
293		/* check TTL */
294		struct packed_rrset_data* data =
295			(struct packed_rrset_data*)e->data;
296		if(timenow > data->ttl) {
297			lock_rw_unlock(&e->lock);
298			return NULL;
299		}
300		/* we're done */
301		return (struct ub_packed_rrset_key*)e->key;
302	}
303	return NULL;
304}
305
306int
307rrset_array_lock(struct rrset_ref* ref, size_t count, time_t timenow)
308{
309	size_t i;
310	for(i=0; i<count; i++) {
311		if(i>0 && ref[i].key == ref[i-1].key)
312			continue; /* only lock items once */
313		lock_rw_rdlock(&ref[i].key->entry.lock);
314		if(ref[i].id != ref[i].key->id || timenow >
315			((struct packed_rrset_data*)(ref[i].key->entry.data))
316			->ttl) {
317			/* failure! rollback our readlocks */
318			rrset_array_unlock(ref, i+1);
319			return 0;
320		}
321	}
322	return 1;
323}
324
325void
326rrset_array_unlock(struct rrset_ref* ref, size_t count)
327{
328	size_t i;
329	for(i=0; i<count; i++) {
330		if(i>0 && ref[i].key == ref[i-1].key)
331			continue; /* only unlock items once */
332		lock_rw_unlock(&ref[i].key->entry.lock);
333	}
334}
335
336void
337rrset_array_unlock_touch(struct rrset_cache* r, struct regional* scratch,
338	struct rrset_ref* ref, size_t count)
339{
340	hashvalue_type* h;
341	size_t i;
342	if(count > RR_COUNT_MAX || !(h = (hashvalue_type*)regional_alloc(
343		scratch, sizeof(hashvalue_type)*count))) {
344		log_warn("rrset LRU: memory allocation failed");
345		h = NULL;
346	} else 	/* store hash values */
347		for(i=0; i<count; i++)
348			h[i] = ref[i].key->entry.hash;
349	/* unlock */
350	for(i=0; i<count; i++) {
351		if(i>0 && ref[i].key == ref[i-1].key)
352			continue; /* only unlock items once */
353		lock_rw_unlock(&ref[i].key->entry.lock);
354	}
355	if(h) {
356		/* LRU touch, with no rrset locks held */
357		for(i=0; i<count; i++) {
358			if(i>0 && ref[i].key == ref[i-1].key)
359				continue; /* only touch items once */
360			rrset_cache_touch(r, ref[i].key, h[i], ref[i].id);
361		}
362	}
363}
364
365void
366rrset_update_sec_status(struct rrset_cache* r,
367	struct ub_packed_rrset_key* rrset, time_t now)
368{
369	struct packed_rrset_data* updata =
370		(struct packed_rrset_data*)rrset->entry.data;
371	struct lruhash_entry* e;
372	struct packed_rrset_data* cachedata;
373
374	/* hash it again to make sure it has a hash */
375	rrset->entry.hash = rrset_key_hash(&rrset->rk);
376
377	e = slabhash_lookup(&r->table, rrset->entry.hash, rrset, 1);
378	if(!e)
379		return; /* not in the cache anymore */
380	cachedata = (struct packed_rrset_data*)e->data;
381	if(!rrsetdata_equal(updata, cachedata)) {
382		lock_rw_unlock(&e->lock);
383		return; /* rrset has changed in the meantime */
384	}
385	/* update the cached rrset */
386	if(updata->security > cachedata->security) {
387		size_t i;
388		if(updata->trust > cachedata->trust)
389			cachedata->trust = updata->trust;
390		cachedata->security = updata->security;
391		/* for NS records only shorter TTLs, other types: update it */
392		if(ntohs(rrset->rk.type) != LDNS_RR_TYPE_NS ||
393			updata->ttl+now < cachedata->ttl ||
394			cachedata->ttl < now ||
395			updata->security == sec_status_bogus) {
396			cachedata->ttl = updata->ttl + now;
397			for(i=0; i<cachedata->count+cachedata->rrsig_count; i++)
398				cachedata->rr_ttl[i] = updata->rr_ttl[i]+now;
399		}
400	}
401	lock_rw_unlock(&e->lock);
402}
403
404void
405rrset_check_sec_status(struct rrset_cache* r,
406	struct ub_packed_rrset_key* rrset, time_t now)
407{
408	struct packed_rrset_data* updata =
409		(struct packed_rrset_data*)rrset->entry.data;
410	struct lruhash_entry* e;
411	struct packed_rrset_data* cachedata;
412
413	/* hash it again to make sure it has a hash */
414	rrset->entry.hash = rrset_key_hash(&rrset->rk);
415
416	e = slabhash_lookup(&r->table, rrset->entry.hash, rrset, 0);
417	if(!e)
418		return; /* not in the cache anymore */
419	cachedata = (struct packed_rrset_data*)e->data;
420	if(now > cachedata->ttl || !rrsetdata_equal(updata, cachedata)) {
421		lock_rw_unlock(&e->lock);
422		return; /* expired, or rrset has changed in the meantime */
423	}
424	if(cachedata->security > updata->security) {
425		updata->security = cachedata->security;
426		if(cachedata->security == sec_status_bogus) {
427			size_t i;
428			updata->ttl = cachedata->ttl - now;
429			for(i=0; i<cachedata->count+cachedata->rrsig_count; i++)
430				if(cachedata->rr_ttl[i] < now)
431					updata->rr_ttl[i] = 0;
432				else updata->rr_ttl[i] =
433					cachedata->rr_ttl[i]-now;
434		}
435		if(cachedata->trust > updata->trust)
436			updata->trust = cachedata->trust;
437	}
438	lock_rw_unlock(&e->lock);
439}
440
441void rrset_cache_remove(struct rrset_cache* r, uint8_t* nm, size_t nmlen,
442	uint16_t type, uint16_t dclass, uint32_t flags)
443{
444	struct ub_packed_rrset_key key;
445	key.entry.key = &key;
446	key.rk.dname = nm;
447	key.rk.dname_len = nmlen;
448	key.rk.rrset_class = htons(dclass);
449	key.rk.type = htons(type);
450	key.rk.flags = flags;
451	key.entry.hash = rrset_key_hash(&key.rk);
452	slabhash_remove(&r->table, key.entry.hash, &key);
453}
454