1/*
2 * services/cache/rrset.c - Resource record set cache.
3 *
4 * Copyright (c) 2007, NLnet Labs. All rights reserved.
5 *
6 * This software is open source.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
14 *
15 * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 *
19 * Neither the name of the NLNET LABS nor the names of its contributors may
20 * be used to endorse or promote products derived from this software without
21 * specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
27 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 */
35
36/**
37 * \file
38 *
39 * This file contains the rrset cache.
40 */
41#include "config.h"
42#include "services/cache/rrset.h"
43#include "util/storage/slabhash.h"
44#include "util/config_file.h"
45#include "util/data/packed_rrset.h"
46#include "util/data/msgreply.h"
47#include "util/regional.h"
48#include "util/alloc.h"
49
50void
51rrset_markdel(void* key)
52{
53	struct ub_packed_rrset_key* r = (struct ub_packed_rrset_key*)key;
54	r->id = 0;
55}
56
57struct rrset_cache* rrset_cache_create(struct config_file* cfg,
58	struct alloc_cache* alloc)
59{
60	size_t slabs = (cfg?cfg->rrset_cache_slabs:HASH_DEFAULT_SLABS);
61	size_t startarray = HASH_DEFAULT_STARTARRAY;
62	size_t maxmem = (cfg?cfg->rrset_cache_size:HASH_DEFAULT_MAXMEM);
63
64	struct rrset_cache *r = (struct rrset_cache*)slabhash_create(slabs,
65		startarray, maxmem, ub_rrset_sizefunc, ub_rrset_compare,
66		ub_rrset_key_delete, rrset_data_delete, alloc);
67	slabhash_setmarkdel(&r->table, &rrset_markdel);
68	return r;
69}
70
71void rrset_cache_delete(struct rrset_cache* r)
72{
73	if(!r)
74		return;
75	slabhash_delete(&r->table);
76	/* slabhash delete also does free(r), since table is first in struct*/
77}
78
79struct rrset_cache* rrset_cache_adjust(struct rrset_cache *r,
80	struct config_file* cfg, struct alloc_cache* alloc)
81{
82	if(!r || !cfg || cfg->rrset_cache_slabs != r->table.size ||
83		cfg->rrset_cache_size != slabhash_get_size(&r->table))
84	{
85		rrset_cache_delete(r);
86		r = rrset_cache_create(cfg, alloc);
87	}
88	return r;
89}
90
91void
92rrset_cache_touch(struct rrset_cache* r, struct ub_packed_rrset_key* key,
93        hashvalue_t hash, rrset_id_t id)
94{
95	struct lruhash* table = slabhash_gettable(&r->table, hash);
96	/*
97	 * This leads to locking problems, deadlocks, if the caller is
98	 * holding any other rrset lock.
99	 * Because a lookup through the hashtable does:
100	 *	tablelock -> entrylock  (for that entry caller holds)
101	 * And this would do
102	 *	entrylock(already held) -> tablelock
103	 * And if two threads do this, it results in deadlock.
104	 * So, the caller must not hold entrylock.
105	 */
106	lock_quick_lock(&table->lock);
107	/* we have locked the hash table, the item can still be deleted.
108	 * because it could already have been reclaimed, but not yet set id=0.
109	 * This is because some lruhash routines have lazy deletion.
110	 * so, we must acquire a lock on the item to verify the id != 0.
111	 * also, with hash not changed, we are using the right slab.
112	 */
113	lock_rw_rdlock(&key->entry.lock);
114	if(key->id == id && key->entry.hash == hash) {
115		lru_touch(table, &key->entry);
116	}
117	lock_rw_unlock(&key->entry.lock);
118	lock_quick_unlock(&table->lock);
119}
120
121/** see if rrset needs to be updated in the cache */
122static int
123need_to_update_rrset(void* nd, void* cd, uint32_t timenow, int equal, int ns)
124{
125	struct packed_rrset_data* newd = (struct packed_rrset_data*)nd;
126	struct packed_rrset_data* cached = (struct packed_rrset_data*)cd;
127	/* 	o store if rrset has been validated
128	 *  		everything better than bogus data
129	 *  		secure is preferred */
130	if( newd->security == sec_status_secure &&
131		cached->security != sec_status_secure)
132		return 1;
133	if( cached->security == sec_status_bogus &&
134		newd->security != sec_status_bogus && !equal)
135		return 1;
136        /*      o if current RRset is more trustworthy - insert it */
137        if( newd->trust > cached->trust ) {
138		/* if the cached rrset is bogus, and this one equal,
139		 * do not update the TTL - let it expire. */
140		if(equal && cached->ttl >= timenow &&
141			cached->security == sec_status_bogus)
142			return 0;
143                return 1;
144	}
145	/*	o item in cache has expired */
146	if( cached->ttl < timenow )
147		return 1;
148	/*  o same trust, but different in data - insert it */
149	if( newd->trust == cached->trust && !equal ) {
150		/* if this is type NS, do not 'stick' to owner that changes
151		 * the NS RRset, but use the old TTL for the new data, and
152		 * update to fetch the latest data. ttl is not expired, because
153		 * that check was before this one. */
154		if(ns) {
155			size_t i;
156			newd->ttl = cached->ttl;
157			for(i=0; i<(newd->count+newd->rrsig_count); i++)
158				if(newd->rr_ttl[i] > newd->ttl)
159					newd->rr_ttl[i] = newd->ttl;
160		}
161		return 1;
162	}
163	return 0;
164}
165
166/** Update RRSet special key ID */
167static void
168rrset_update_id(struct rrset_ref* ref, struct alloc_cache* alloc)
169{
170	/* this may clear the cache and invalidate lock below */
171	uint64_t newid = alloc_get_id(alloc);
172	/* obtain writelock */
173	lock_rw_wrlock(&ref->key->entry.lock);
174	/* check if it was deleted in the meantime, if so, skip update */
175	if(ref->key->id == ref->id) {
176		ref->key->id = newid;
177		ref->id = newid;
178	}
179	lock_rw_unlock(&ref->key->entry.lock);
180}
181
182int
183rrset_cache_update(struct rrset_cache* r, struct rrset_ref* ref,
184	struct alloc_cache* alloc, uint32_t timenow)
185{
186	struct lruhash_entry* e;
187	struct ub_packed_rrset_key* k = ref->key;
188	hashvalue_t h = k->entry.hash;
189	uint16_t rrset_type = ntohs(k->rk.type);
190	int equal = 0;
191	log_assert(ref->id != 0 && k->id != 0);
192	/* looks up item with a readlock - no editing! */
193	if((e=slabhash_lookup(&r->table, h, k, 0)) != 0) {
194		/* return id and key as they will be used in the cache
195		 * since the lruhash_insert, if item already exists, deallocs
196		 * the passed key in favor of the already stored key.
197		 * because of the small gap (see below) this key ptr and id
198		 * may prove later to be already deleted, which is no problem
199		 * as it only makes a cache miss.
200		 */
201		ref->key = (struct ub_packed_rrset_key*)e->key;
202		ref->id = ref->key->id;
203		equal = rrsetdata_equal((struct packed_rrset_data*)k->entry.
204			data, (struct packed_rrset_data*)e->data);
205		if(!need_to_update_rrset(k->entry.data, e->data, timenow,
206			equal, (rrset_type==LDNS_RR_TYPE_NS))) {
207			/* cache is superior, return that value */
208			lock_rw_unlock(&e->lock);
209			ub_packed_rrset_parsedelete(k, alloc);
210			if(equal) return 2;
211			return 1;
212		}
213		lock_rw_unlock(&e->lock);
214		/* Go on and insert the passed item.
215		 * small gap here, where entry is not locked.
216		 * possibly entry is updated with something else.
217		 * we then overwrite that with our data.
218		 * this is just too bad, its cache anyway. */
219		/* use insert to update entry to manage lruhash
220		 * cache size values nicely. */
221	}
222	log_assert(ref->key->id != 0);
223	slabhash_insert(&r->table, h, &k->entry, k->entry.data, alloc);
224	if(e) {
225		/* For NSEC, NSEC3, DNAME, when rdata is updated, update
226		 * the ID number so that proofs in message cache are
227		 * invalidated */
228		if((rrset_type == LDNS_RR_TYPE_NSEC
229			|| rrset_type == LDNS_RR_TYPE_NSEC3
230			|| rrset_type == LDNS_RR_TYPE_DNAME) && !equal) {
231			rrset_update_id(ref, alloc);
232		}
233		return 1;
234	}
235	return 0;
236}
237
238struct ub_packed_rrset_key*
239rrset_cache_lookup(struct rrset_cache* r, uint8_t* qname, size_t qnamelen,
240	uint16_t qtype, uint16_t qclass, uint32_t flags, uint32_t timenow,
241	int wr)
242{
243	struct lruhash_entry* e;
244	struct ub_packed_rrset_key key;
245
246	key.entry.key = &key;
247	key.entry.data = NULL;
248	key.rk.dname = qname;
249	key.rk.dname_len = qnamelen;
250	key.rk.type = htons(qtype);
251	key.rk.rrset_class = htons(qclass);
252	key.rk.flags = flags;
253
254	key.entry.hash = rrset_key_hash(&key.rk);
255
256	if((e = slabhash_lookup(&r->table, key.entry.hash, &key, wr))) {
257		/* check TTL */
258		struct packed_rrset_data* data =
259			(struct packed_rrset_data*)e->data;
260		if(timenow > data->ttl) {
261			lock_rw_unlock(&e->lock);
262			return NULL;
263		}
264		/* we're done */
265		return (struct ub_packed_rrset_key*)e->key;
266	}
267	return NULL;
268}
269
270int
271rrset_array_lock(struct rrset_ref* ref, size_t count, uint32_t timenow)
272{
273	size_t i;
274	for(i=0; i<count; i++) {
275		if(i>0 && ref[i].key == ref[i-1].key)
276			continue; /* only lock items once */
277		lock_rw_rdlock(&ref[i].key->entry.lock);
278		if(ref[i].id != ref[i].key->id || timenow >
279			((struct packed_rrset_data*)(ref[i].key->entry.data))
280			->ttl) {
281			/* failure! rollback our readlocks */
282			rrset_array_unlock(ref, i+1);
283			return 0;
284		}
285	}
286	return 1;
287}
288
289void
290rrset_array_unlock(struct rrset_ref* ref, size_t count)
291{
292	size_t i;
293	for(i=0; i<count; i++) {
294		if(i>0 && ref[i].key == ref[i-1].key)
295			continue; /* only unlock items once */
296		lock_rw_unlock(&ref[i].key->entry.lock);
297	}
298}
299
300void
301rrset_array_unlock_touch(struct rrset_cache* r, struct regional* scratch,
302	struct rrset_ref* ref, size_t count)
303{
304	hashvalue_t* h;
305	size_t i;
306	if(!(h = (hashvalue_t*)regional_alloc(scratch,
307		sizeof(hashvalue_t)*count)))
308		log_warn("rrset LRU: memory allocation failed");
309	else 	/* store hash values */
310		for(i=0; i<count; i++)
311			h[i] = ref[i].key->entry.hash;
312	/* unlock */
313	for(i=0; i<count; i++) {
314		if(i>0 && ref[i].key == ref[i-1].key)
315			continue; /* only unlock items once */
316		lock_rw_unlock(&ref[i].key->entry.lock);
317	}
318	if(h) {
319		/* LRU touch, with no rrset locks held */
320		for(i=0; i<count; i++) {
321			if(i>0 && ref[i].key == ref[i-1].key)
322				continue; /* only touch items once */
323			rrset_cache_touch(r, ref[i].key, h[i], ref[i].id);
324		}
325	}
326}
327
328void
329rrset_update_sec_status(struct rrset_cache* r,
330	struct ub_packed_rrset_key* rrset, uint32_t now)
331{
332	struct packed_rrset_data* updata =
333		(struct packed_rrset_data*)rrset->entry.data;
334	struct lruhash_entry* e;
335	struct packed_rrset_data* cachedata;
336
337	/* hash it again to make sure it has a hash */
338	rrset->entry.hash = rrset_key_hash(&rrset->rk);
339
340	e = slabhash_lookup(&r->table, rrset->entry.hash, rrset, 1);
341	if(!e)
342		return; /* not in the cache anymore */
343	cachedata = (struct packed_rrset_data*)e->data;
344	if(!rrsetdata_equal(updata, cachedata)) {
345		lock_rw_unlock(&e->lock);
346		return; /* rrset has changed in the meantime */
347	}
348	/* update the cached rrset */
349	if(updata->security > cachedata->security) {
350		size_t i;
351		if(updata->trust > cachedata->trust)
352			cachedata->trust = updata->trust;
353		cachedata->security = updata->security;
354		/* for NS records only shorter TTLs, other types: update it */
355		if(ntohs(rrset->rk.type) != LDNS_RR_TYPE_NS ||
356			updata->ttl+now < cachedata->ttl ||
357			cachedata->ttl < now ||
358			updata->security == sec_status_bogus) {
359			cachedata->ttl = updata->ttl + now;
360			for(i=0; i<cachedata->count+cachedata->rrsig_count; i++)
361				cachedata->rr_ttl[i] = updata->rr_ttl[i]+now;
362		}
363	}
364	lock_rw_unlock(&e->lock);
365}
366
367void
368rrset_check_sec_status(struct rrset_cache* r,
369	struct ub_packed_rrset_key* rrset, uint32_t now)
370{
371	struct packed_rrset_data* updata =
372		(struct packed_rrset_data*)rrset->entry.data;
373	struct lruhash_entry* e;
374	struct packed_rrset_data* cachedata;
375
376	/* hash it again to make sure it has a hash */
377	rrset->entry.hash = rrset_key_hash(&rrset->rk);
378
379	e = slabhash_lookup(&r->table, rrset->entry.hash, rrset, 0);
380	if(!e)
381		return; /* not in the cache anymore */
382	cachedata = (struct packed_rrset_data*)e->data;
383	if(now > cachedata->ttl || !rrsetdata_equal(updata, cachedata)) {
384		lock_rw_unlock(&e->lock);
385		return; /* expired, or rrset has changed in the meantime */
386	}
387	if(cachedata->security > updata->security) {
388		updata->security = cachedata->security;
389		if(cachedata->security == sec_status_bogus) {
390			size_t i;
391			updata->ttl = cachedata->ttl - now;
392			for(i=0; i<cachedata->count+cachedata->rrsig_count; i++)
393				if(cachedata->rr_ttl[i] < now)
394					updata->rr_ttl[i] = 0;
395				else updata->rr_ttl[i] =
396					cachedata->rr_ttl[i]-now;
397		}
398		if(cachedata->trust > updata->trust)
399			updata->trust = cachedata->trust;
400	}
401	lock_rw_unlock(&e->lock);
402}
403
404void rrset_cache_remove(struct rrset_cache* r, uint8_t* nm, size_t nmlen,
405	uint16_t type, uint16_t dclass, uint32_t flags)
406{
407	struct ub_packed_rrset_key key;
408	key.entry.key = &key;
409	key.rk.dname = nm;
410	key.rk.dname_len = nmlen;
411	key.rk.rrset_class = htons(dclass);
412	key.rk.type = htons(type);
413	key.rk.flags = flags;
414	key.entry.hash = rrset_key_hash(&key.rk);
415	slabhash_remove(&r->table, key.entry.hash, &key);
416}
417