infra.c revision 356345
1/*
2 * services/cache/infra.c - infrastructure cache, server rtt and capabilities
3 *
4 * Copyright (c) 2007, NLnet Labs. All rights reserved.
5 *
6 * This software is open source.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
14 *
15 * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 *
19 * Neither the name of the NLNET LABS nor the names of its contributors may
20 * be used to endorse or promote products derived from this software without
21 * specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35
36/**
37 * \file
38 *
39 * This file contains the infrastructure cache.
40 */
41#include "config.h"
42#include "sldns/rrdef.h"
43#include "sldns/str2wire.h"
44#include "sldns/sbuffer.h"
45#include "sldns/wire2str.h"
46#include "services/cache/infra.h"
47#include "util/storage/slabhash.h"
48#include "util/storage/lookup3.h"
49#include "util/data/dname.h"
50#include "util/log.h"
51#include "util/net_help.h"
52#include "util/config_file.h"
53#include "iterator/iterator.h"
54
55/** Timeout when only a single probe query per IP is allowed. */
56#define PROBE_MAXRTO 12000 /* in msec */
57
58/** number of timeouts for a type when the domain can be blocked ;
59 * even if another type has completely rtt maxed it, the different type
60 * can do this number of packets (until those all timeout too) */
61#define TIMEOUT_COUNT_MAX 3
62
63/** ratelimit value for delegation point */
64int infra_dp_ratelimit = 0;
65
66/** ratelimit value for client ip addresses,
67 *  in queries per second. */
68int infra_ip_ratelimit = 0;
69
70size_t
71infra_sizefunc(void* k, void* ATTR_UNUSED(d))
72{
73	struct infra_key* key = (struct infra_key*)k;
74	return sizeof(*key) + sizeof(struct infra_data) + key->namelen
75		+ lock_get_mem(&key->entry.lock);
76}
77
78int
79infra_compfunc(void* key1, void* key2)
80{
81	struct infra_key* k1 = (struct infra_key*)key1;
82	struct infra_key* k2 = (struct infra_key*)key2;
83	int r = sockaddr_cmp(&k1->addr, k1->addrlen, &k2->addr, k2->addrlen);
84	if(r != 0)
85		return r;
86	if(k1->namelen != k2->namelen) {
87		if(k1->namelen < k2->namelen)
88			return -1;
89		return 1;
90	}
91	return query_dname_compare(k1->zonename, k2->zonename);
92}
93
94void
95infra_delkeyfunc(void* k, void* ATTR_UNUSED(arg))
96{
97	struct infra_key* key = (struct infra_key*)k;
98	if(!key)
99		return;
100	lock_rw_destroy(&key->entry.lock);
101	free(key->zonename);
102	free(key);
103}
104
105void
106infra_deldatafunc(void* d, void* ATTR_UNUSED(arg))
107{
108	struct infra_data* data = (struct infra_data*)d;
109	free(data);
110}
111
112size_t
113rate_sizefunc(void* k, void* ATTR_UNUSED(d))
114{
115	struct rate_key* key = (struct rate_key*)k;
116	return sizeof(*key) + sizeof(struct rate_data) + key->namelen
117		+ lock_get_mem(&key->entry.lock);
118}
119
120int
121rate_compfunc(void* key1, void* key2)
122{
123	struct rate_key* k1 = (struct rate_key*)key1;
124	struct rate_key* k2 = (struct rate_key*)key2;
125	if(k1->namelen != k2->namelen) {
126		if(k1->namelen < k2->namelen)
127			return -1;
128		return 1;
129	}
130	return query_dname_compare(k1->name, k2->name);
131}
132
133void
134rate_delkeyfunc(void* k, void* ATTR_UNUSED(arg))
135{
136	struct rate_key* key = (struct rate_key*)k;
137	if(!key)
138		return;
139	lock_rw_destroy(&key->entry.lock);
140	free(key->name);
141	free(key);
142}
143
144void
145rate_deldatafunc(void* d, void* ATTR_UNUSED(arg))
146{
147	struct rate_data* data = (struct rate_data*)d;
148	free(data);
149}
150
151/** find or create element in domainlimit tree */
152static struct domain_limit_data* domain_limit_findcreate(
153	struct infra_cache* infra, char* name)
154{
155	uint8_t* nm;
156	int labs;
157	size_t nmlen;
158	struct domain_limit_data* d;
159
160	/* parse name */
161	nm = sldns_str2wire_dname(name, &nmlen);
162	if(!nm) {
163		log_err("could not parse %s", name);
164		return NULL;
165	}
166	labs = dname_count_labels(nm);
167
168	/* can we find it? */
169	d = (struct domain_limit_data*)name_tree_find(&infra->domain_limits,
170		nm, nmlen, labs, LDNS_RR_CLASS_IN);
171	if(d) {
172		free(nm);
173		return d;
174	}
175
176	/* create it */
177	d = (struct domain_limit_data*)calloc(1, sizeof(*d));
178	if(!d) {
179		free(nm);
180		return NULL;
181	}
182	d->node.node.key = &d->node;
183	d->node.name = nm;
184	d->node.len = nmlen;
185	d->node.labs = labs;
186	d->node.dclass = LDNS_RR_CLASS_IN;
187	d->lim = -1;
188	d->below = -1;
189	if(!name_tree_insert(&infra->domain_limits, &d->node, nm, nmlen,
190		labs, LDNS_RR_CLASS_IN)) {
191		log_err("duplicate element in domainlimit tree");
192		free(nm);
193		free(d);
194		return NULL;
195	}
196	return d;
197}
198
199/** insert rate limit configuration into lookup tree */
200static int infra_ratelimit_cfg_insert(struct infra_cache* infra,
201	struct config_file* cfg)
202{
203	struct config_str2list* p;
204	struct domain_limit_data* d;
205	for(p = cfg->ratelimit_for_domain; p; p = p->next) {
206		d = domain_limit_findcreate(infra, p->str);
207		if(!d)
208			return 0;
209		d->lim = atoi(p->str2);
210	}
211	for(p = cfg->ratelimit_below_domain; p; p = p->next) {
212		d = domain_limit_findcreate(infra, p->str);
213		if(!d)
214			return 0;
215		d->below = atoi(p->str2);
216	}
217	return 1;
218}
219
220/** setup domain limits tree (0 on failure) */
221static int
222setup_domain_limits(struct infra_cache* infra, struct config_file* cfg)
223{
224	name_tree_init(&infra->domain_limits);
225	if(!infra_ratelimit_cfg_insert(infra, cfg)) {
226		return 0;
227	}
228	name_tree_init_parents(&infra->domain_limits);
229	return 1;
230}
231
232struct infra_cache*
233infra_create(struct config_file* cfg)
234{
235	struct infra_cache* infra = (struct infra_cache*)calloc(1,
236		sizeof(struct infra_cache));
237	size_t maxmem = cfg->infra_cache_numhosts * (sizeof(struct infra_key)+
238		sizeof(struct infra_data)+INFRA_BYTES_NAME);
239	infra->hosts = slabhash_create(cfg->infra_cache_slabs,
240		INFRA_HOST_STARTSIZE, maxmem, &infra_sizefunc, &infra_compfunc,
241		&infra_delkeyfunc, &infra_deldatafunc, NULL);
242	if(!infra->hosts) {
243		free(infra);
244		return NULL;
245	}
246	infra->host_ttl = cfg->host_ttl;
247	infra_dp_ratelimit = cfg->ratelimit;
248	infra->domain_rates = slabhash_create(cfg->ratelimit_slabs,
249		INFRA_HOST_STARTSIZE, cfg->ratelimit_size,
250		&rate_sizefunc, &rate_compfunc, &rate_delkeyfunc,
251		&rate_deldatafunc, NULL);
252	if(!infra->domain_rates) {
253		infra_delete(infra);
254		return NULL;
255	}
256	/* insert config data into ratelimits */
257	if(!setup_domain_limits(infra, cfg)) {
258		infra_delete(infra);
259		return NULL;
260	}
261	infra_ip_ratelimit = cfg->ip_ratelimit;
262	infra->client_ip_rates = slabhash_create(cfg->ip_ratelimit_slabs,
263	    INFRA_HOST_STARTSIZE, cfg->ip_ratelimit_size, &ip_rate_sizefunc,
264	    &ip_rate_compfunc, &ip_rate_delkeyfunc, &ip_rate_deldatafunc, NULL);
265	if(!infra->client_ip_rates) {
266		infra_delete(infra);
267		return NULL;
268	}
269	return infra;
270}
271
272/** delete domain_limit entries */
273static void domain_limit_free(rbnode_type* n, void* ATTR_UNUSED(arg))
274{
275	if(n) {
276		free(((struct domain_limit_data*)n)->node.name);
277		free(n);
278	}
279}
280
281void
282infra_delete(struct infra_cache* infra)
283{
284	if(!infra)
285		return;
286	slabhash_delete(infra->hosts);
287	slabhash_delete(infra->domain_rates);
288	traverse_postorder(&infra->domain_limits, domain_limit_free, NULL);
289	slabhash_delete(infra->client_ip_rates);
290	free(infra);
291}
292
293struct infra_cache*
294infra_adjust(struct infra_cache* infra, struct config_file* cfg)
295{
296	size_t maxmem;
297	if(!infra)
298		return infra_create(cfg);
299	infra->host_ttl = cfg->host_ttl;
300	infra_dp_ratelimit = cfg->ratelimit;
301	infra_ip_ratelimit = cfg->ip_ratelimit;
302	maxmem = cfg->infra_cache_numhosts * (sizeof(struct infra_key)+
303		sizeof(struct infra_data)+INFRA_BYTES_NAME);
304	/* divide cachesize by slabs and multiply by slabs, because if the
305	 * cachesize is not an even multiple of slabs, that is the resulting
306	 * size of the slabhash */
307	if(!slabhash_is_size(infra->hosts, maxmem, cfg->infra_cache_slabs) ||
308	   !slabhash_is_size(infra->domain_rates, cfg->ratelimit_size,
309	   	cfg->ratelimit_slabs) ||
310	   !slabhash_is_size(infra->client_ip_rates, cfg->ip_ratelimit_size,
311	   	cfg->ip_ratelimit_slabs)) {
312		infra_delete(infra);
313		infra = infra_create(cfg);
314	} else {
315		/* reapply domain limits */
316		traverse_postorder(&infra->domain_limits, domain_limit_free,
317			NULL);
318		if(!setup_domain_limits(infra, cfg)) {
319			infra_delete(infra);
320			return NULL;
321		}
322	}
323	return infra;
324}
325
326/** calculate the hash value for a host key
327 *  set use_port to a non-0 number to use the port in
328 *  the hash calculation; 0 to ignore the port.*/
329static hashvalue_type
330hash_addr(struct sockaddr_storage* addr, socklen_t addrlen,
331  int use_port)
332{
333	hashvalue_type h = 0xab;
334	/* select the pieces to hash, some OS have changing data inside */
335	if(addr_is_ip6(addr, addrlen)) {
336		struct sockaddr_in6* in6 = (struct sockaddr_in6*)addr;
337		h = hashlittle(&in6->sin6_family, sizeof(in6->sin6_family), h);
338		if(use_port){
339			h = hashlittle(&in6->sin6_port, sizeof(in6->sin6_port), h);
340		}
341		h = hashlittle(&in6->sin6_addr, INET6_SIZE, h);
342	} else {
343		struct sockaddr_in* in = (struct sockaddr_in*)addr;
344		h = hashlittle(&in->sin_family, sizeof(in->sin_family), h);
345		if(use_port){
346			h = hashlittle(&in->sin_port, sizeof(in->sin_port), h);
347		}
348		h = hashlittle(&in->sin_addr, INET_SIZE, h);
349	}
350	return h;
351}
352
353/** calculate infra hash for a key */
354static hashvalue_type
355hash_infra(struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* name)
356{
357	return dname_query_hash(name, hash_addr(addr, addrlen, 1));
358}
359
360/** lookup version that does not check host ttl (you check it) */
361struct lruhash_entry*
362infra_lookup_nottl(struct infra_cache* infra, struct sockaddr_storage* addr,
363	socklen_t addrlen, uint8_t* name, size_t namelen, int wr)
364{
365	struct infra_key k;
366	k.addrlen = addrlen;
367	memcpy(&k.addr, addr, addrlen);
368	k.namelen = namelen;
369	k.zonename = name;
370	k.entry.hash = hash_infra(addr, addrlen, name);
371	k.entry.key = (void*)&k;
372	k.entry.data = NULL;
373	return slabhash_lookup(infra->hosts, k.entry.hash, &k, wr);
374}
375
376/** init the data elements */
377static void
378data_entry_init(struct infra_cache* infra, struct lruhash_entry* e,
379	time_t timenow)
380{
381	struct infra_data* data = (struct infra_data*)e->data;
382	data->ttl = timenow + infra->host_ttl;
383	rtt_init(&data->rtt);
384	data->edns_version = 0;
385	data->edns_lame_known = 0;
386	data->probedelay = 0;
387	data->isdnsseclame = 0;
388	data->rec_lame = 0;
389	data->lame_type_A = 0;
390	data->lame_other = 0;
391	data->timeout_A = 0;
392	data->timeout_AAAA = 0;
393	data->timeout_other = 0;
394}
395
396/**
397 * Create and init a new entry for a host
398 * @param infra: infra structure with config parameters.
399 * @param addr: host address.
400 * @param addrlen: length of addr.
401 * @param name: name of zone
402 * @param namelen: length of name.
403 * @param tm: time now.
404 * @return: the new entry or NULL on malloc failure.
405 */
406static struct lruhash_entry*
407new_entry(struct infra_cache* infra, struct sockaddr_storage* addr,
408	socklen_t addrlen, uint8_t* name, size_t namelen, time_t tm)
409{
410	struct infra_data* data;
411	struct infra_key* key = (struct infra_key*)malloc(sizeof(*key));
412	if(!key)
413		return NULL;
414	data = (struct infra_data*)malloc(sizeof(struct infra_data));
415	if(!data) {
416		free(key);
417		return NULL;
418	}
419	key->zonename = memdup(name, namelen);
420	if(!key->zonename) {
421		free(key);
422		free(data);
423		return NULL;
424	}
425	key->namelen = namelen;
426	lock_rw_init(&key->entry.lock);
427	key->entry.hash = hash_infra(addr, addrlen, name);
428	key->entry.key = (void*)key;
429	key->entry.data = (void*)data;
430	key->addrlen = addrlen;
431	memcpy(&key->addr, addr, addrlen);
432	data_entry_init(infra, &key->entry, tm);
433	return &key->entry;
434}
435
436int
437infra_host(struct infra_cache* infra, struct sockaddr_storage* addr,
438        socklen_t addrlen, uint8_t* nm, size_t nmlen, time_t timenow,
439	int* edns_vs, uint8_t* edns_lame_known, int* to)
440{
441	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
442		nm, nmlen, 0);
443	struct infra_data* data;
444	int wr = 0;
445	if(e && ((struct infra_data*)e->data)->ttl < timenow) {
446		/* it expired, try to reuse existing entry */
447		int old = ((struct infra_data*)e->data)->rtt.rto;
448		uint8_t tA = ((struct infra_data*)e->data)->timeout_A;
449		uint8_t tAAAA = ((struct infra_data*)e->data)->timeout_AAAA;
450		uint8_t tother = ((struct infra_data*)e->data)->timeout_other;
451		lock_rw_unlock(&e->lock);
452		e = infra_lookup_nottl(infra, addr, addrlen, nm, nmlen, 1);
453		if(e) {
454			/* if its still there we have a writelock, init */
455			/* re-initialise */
456			/* do not touch lameness, it may be valid still */
457			data_entry_init(infra, e, timenow);
458			wr = 1;
459			/* TOP_TIMEOUT remains on reuse */
460			if(old >= USEFUL_SERVER_TOP_TIMEOUT) {
461				((struct infra_data*)e->data)->rtt.rto
462					= USEFUL_SERVER_TOP_TIMEOUT;
463				((struct infra_data*)e->data)->timeout_A = tA;
464				((struct infra_data*)e->data)->timeout_AAAA = tAAAA;
465				((struct infra_data*)e->data)->timeout_other = tother;
466			}
467		}
468	}
469	if(!e) {
470		/* insert new entry */
471		if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow)))
472			return 0;
473		data = (struct infra_data*)e->data;
474		*edns_vs = data->edns_version;
475		*edns_lame_known = data->edns_lame_known;
476		*to = rtt_timeout(&data->rtt);
477		slabhash_insert(infra->hosts, e->hash, e, data, NULL);
478		return 1;
479	}
480	/* use existing entry */
481	data = (struct infra_data*)e->data;
482	*edns_vs = data->edns_version;
483	*edns_lame_known = data->edns_lame_known;
484	*to = rtt_timeout(&data->rtt);
485	if(*to >= PROBE_MAXRTO && rtt_notimeout(&data->rtt)*4 <= *to) {
486		/* delay other queries, this is the probe query */
487		if(!wr) {
488			lock_rw_unlock(&e->lock);
489			e = infra_lookup_nottl(infra, addr,addrlen,nm,nmlen, 1);
490			if(!e) { /* flushed from cache real fast, no use to
491				allocate just for the probedelay */
492				return 1;
493			}
494			data = (struct infra_data*)e->data;
495		}
496		/* add 999 to round up the timeout value from msec to sec,
497		 * then add a whole second so it is certain that this probe
498		 * has timed out before the next is allowed */
499		data->probedelay = timenow + ((*to)+1999)/1000;
500	}
501	lock_rw_unlock(&e->lock);
502	return 1;
503}
504
505int
506infra_set_lame(struct infra_cache* infra, struct sockaddr_storage* addr,
507	socklen_t addrlen, uint8_t* nm, size_t nmlen, time_t timenow,
508	int dnsseclame, int reclame, uint16_t qtype)
509{
510	struct infra_data* data;
511	struct lruhash_entry* e;
512	int needtoinsert = 0;
513	e = infra_lookup_nottl(infra, addr, addrlen, nm, nmlen, 1);
514	if(!e) {
515		/* insert it */
516		if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow))) {
517			log_err("set_lame: malloc failure");
518			return 0;
519		}
520		needtoinsert = 1;
521	} else if( ((struct infra_data*)e->data)->ttl < timenow) {
522		/* expired, reuse existing entry */
523		data_entry_init(infra, e, timenow);
524	}
525	/* got an entry, now set the zone lame */
526	data = (struct infra_data*)e->data;
527	/* merge data (if any) */
528	if(dnsseclame)
529		data->isdnsseclame = 1;
530	if(reclame)
531		data->rec_lame = 1;
532	if(!dnsseclame && !reclame && qtype == LDNS_RR_TYPE_A)
533		data->lame_type_A = 1;
534	if(!dnsseclame  && !reclame && qtype != LDNS_RR_TYPE_A)
535		data->lame_other = 1;
536	/* done */
537	if(needtoinsert)
538		slabhash_insert(infra->hosts, e->hash, e, e->data, NULL);
539	else 	{ lock_rw_unlock(&e->lock); }
540	return 1;
541}
542
543void
544infra_update_tcp_works(struct infra_cache* infra,
545        struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* nm,
546	size_t nmlen)
547{
548	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
549		nm, nmlen, 1);
550	struct infra_data* data;
551	if(!e)
552		return; /* doesn't exist */
553	data = (struct infra_data*)e->data;
554	if(data->rtt.rto >= RTT_MAX_TIMEOUT)
555		/* do not disqualify this server altogether, it is better
556		 * than nothing */
557		data->rtt.rto = RTT_MAX_TIMEOUT-1000;
558	lock_rw_unlock(&e->lock);
559}
560
561int
562infra_rtt_update(struct infra_cache* infra, struct sockaddr_storage* addr,
563	socklen_t addrlen, uint8_t* nm, size_t nmlen, int qtype,
564	int roundtrip, int orig_rtt, time_t timenow)
565{
566	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
567		nm, nmlen, 1);
568	struct infra_data* data;
569	int needtoinsert = 0;
570	int rto = 1;
571	if(!e) {
572		if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow)))
573			return 0;
574		needtoinsert = 1;
575	} else if(((struct infra_data*)e->data)->ttl < timenow) {
576		data_entry_init(infra, e, timenow);
577	}
578	/* have an entry, update the rtt */
579	data = (struct infra_data*)e->data;
580	if(roundtrip == -1) {
581		rtt_lost(&data->rtt, orig_rtt);
582		if(qtype == LDNS_RR_TYPE_A) {
583			if(data->timeout_A < TIMEOUT_COUNT_MAX)
584				data->timeout_A++;
585		} else if(qtype == LDNS_RR_TYPE_AAAA) {
586			if(data->timeout_AAAA < TIMEOUT_COUNT_MAX)
587				data->timeout_AAAA++;
588		} else {
589			if(data->timeout_other < TIMEOUT_COUNT_MAX)
590				data->timeout_other++;
591		}
592	} else {
593		/* if we got a reply, but the old timeout was above server
594		 * selection height, delete the timeout so the server is
595		 * fully available again */
596		if(rtt_unclamped(&data->rtt) >= USEFUL_SERVER_TOP_TIMEOUT)
597			rtt_init(&data->rtt);
598		rtt_update(&data->rtt, roundtrip);
599		data->probedelay = 0;
600		if(qtype == LDNS_RR_TYPE_A)
601			data->timeout_A = 0;
602		else if(qtype == LDNS_RR_TYPE_AAAA)
603			data->timeout_AAAA = 0;
604		else	data->timeout_other = 0;
605	}
606	if(data->rtt.rto > 0)
607		rto = data->rtt.rto;
608
609	if(needtoinsert)
610		slabhash_insert(infra->hosts, e->hash, e, e->data, NULL);
611	else 	{ lock_rw_unlock(&e->lock); }
612	return rto;
613}
614
615long long infra_get_host_rto(struct infra_cache* infra,
616        struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* nm,
617	size_t nmlen, struct rtt_info* rtt, int* delay, time_t timenow,
618	int* tA, int* tAAAA, int* tother)
619{
620	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
621		nm, nmlen, 0);
622	struct infra_data* data;
623	long long ttl = -2;
624	if(!e) return -1;
625	data = (struct infra_data*)e->data;
626	if(data->ttl >= timenow) {
627		ttl = (long long)(data->ttl - timenow);
628		memmove(rtt, &data->rtt, sizeof(*rtt));
629		if(timenow < data->probedelay)
630			*delay = (int)(data->probedelay - timenow);
631		else	*delay = 0;
632	}
633	*tA = (int)data->timeout_A;
634	*tAAAA = (int)data->timeout_AAAA;
635	*tother = (int)data->timeout_other;
636	lock_rw_unlock(&e->lock);
637	return ttl;
638}
639
640int
641infra_edns_update(struct infra_cache* infra, struct sockaddr_storage* addr,
642	socklen_t addrlen, uint8_t* nm, size_t nmlen, int edns_version,
643	time_t timenow)
644{
645	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
646		nm, nmlen, 1);
647	struct infra_data* data;
648	int needtoinsert = 0;
649	if(!e) {
650		if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow)))
651			return 0;
652		needtoinsert = 1;
653	} else if(((struct infra_data*)e->data)->ttl < timenow) {
654		data_entry_init(infra, e, timenow);
655	}
656	/* have an entry, update the rtt, and the ttl */
657	data = (struct infra_data*)e->data;
658	/* do not update if noEDNS and stored is yesEDNS */
659	if(!(edns_version == -1 && (data->edns_version != -1 &&
660		data->edns_lame_known))) {
661		data->edns_version = edns_version;
662		data->edns_lame_known = 1;
663	}
664
665	if(needtoinsert)
666		slabhash_insert(infra->hosts, e->hash, e, e->data, NULL);
667	else 	{ lock_rw_unlock(&e->lock); }
668	return 1;
669}
670
671int
672infra_get_lame_rtt(struct infra_cache* infra,
673        struct sockaddr_storage* addr, socklen_t addrlen,
674        uint8_t* name, size_t namelen, uint16_t qtype,
675	int* lame, int* dnsseclame, int* reclame, int* rtt, time_t timenow)
676{
677	struct infra_data* host;
678	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
679		name, namelen, 0);
680	if(!e)
681		return 0;
682	host = (struct infra_data*)e->data;
683	*rtt = rtt_unclamped(&host->rtt);
684	if(host->rtt.rto >= PROBE_MAXRTO && timenow < host->probedelay
685		&& rtt_notimeout(&host->rtt)*4 <= host->rtt.rto) {
686		/* single probe for this domain, and we are not probing */
687		/* unless the query type allows a probe to happen */
688		if(qtype == LDNS_RR_TYPE_A) {
689			if(host->timeout_A >= TIMEOUT_COUNT_MAX)
690				*rtt = USEFUL_SERVER_TOP_TIMEOUT;
691			else	*rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
692		} else if(qtype == LDNS_RR_TYPE_AAAA) {
693			if(host->timeout_AAAA >= TIMEOUT_COUNT_MAX)
694				*rtt = USEFUL_SERVER_TOP_TIMEOUT;
695			else	*rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
696		} else {
697			if(host->timeout_other >= TIMEOUT_COUNT_MAX)
698				*rtt = USEFUL_SERVER_TOP_TIMEOUT;
699			else	*rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
700		}
701	}
702	if(timenow > host->ttl) {
703		/* expired entry */
704		/* see if this can be a re-probe of an unresponsive server */
705		/* minus 1000 because that is outside of the RTTBAND, so
706		 * blacklisted servers stay blacklisted if this is chosen */
707		if(host->rtt.rto >= USEFUL_SERVER_TOP_TIMEOUT) {
708			lock_rw_unlock(&e->lock);
709			*rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
710			*lame = 0;
711			*dnsseclame = 0;
712			*reclame = 0;
713			return 1;
714		}
715		lock_rw_unlock(&e->lock);
716		return 0;
717	}
718	/* check lameness first */
719	if(host->lame_type_A && qtype == LDNS_RR_TYPE_A) {
720		lock_rw_unlock(&e->lock);
721		*lame = 1;
722		*dnsseclame = 0;
723		*reclame = 0;
724		return 1;
725	} else if(host->lame_other && qtype != LDNS_RR_TYPE_A) {
726		lock_rw_unlock(&e->lock);
727		*lame = 1;
728		*dnsseclame = 0;
729		*reclame = 0;
730		return 1;
731	} else if(host->isdnsseclame) {
732		lock_rw_unlock(&e->lock);
733		*lame = 0;
734		*dnsseclame = 1;
735		*reclame = 0;
736		return 1;
737	} else if(host->rec_lame) {
738		lock_rw_unlock(&e->lock);
739		*lame = 0;
740		*dnsseclame = 0;
741		*reclame = 1;
742		return 1;
743	}
744	/* no lameness for this type of query */
745	lock_rw_unlock(&e->lock);
746	*lame = 0;
747	*dnsseclame = 0;
748	*reclame = 0;
749	return 1;
750}
751
752int infra_find_ratelimit(struct infra_cache* infra, uint8_t* name,
753	size_t namelen)
754{
755	int labs = dname_count_labels(name);
756	struct domain_limit_data* d = (struct domain_limit_data*)
757		name_tree_lookup(&infra->domain_limits, name, namelen, labs,
758		LDNS_RR_CLASS_IN);
759	if(!d) return infra_dp_ratelimit;
760
761	if(d->node.labs == labs && d->lim != -1)
762		return d->lim; /* exact match */
763
764	/* find 'below match' */
765	if(d->node.labs == labs)
766		d = (struct domain_limit_data*)d->node.parent;
767	while(d) {
768		if(d->below != -1)
769			return d->below;
770		d = (struct domain_limit_data*)d->node.parent;
771	}
772	return infra_dp_ratelimit;
773}
774
775size_t ip_rate_sizefunc(void* k, void* ATTR_UNUSED(d))
776{
777	struct ip_rate_key* key = (struct ip_rate_key*)k;
778	return sizeof(*key) + sizeof(struct ip_rate_data)
779		+ lock_get_mem(&key->entry.lock);
780}
781
782int ip_rate_compfunc(void* key1, void* key2)
783{
784	struct ip_rate_key* k1 = (struct ip_rate_key*)key1;
785	struct ip_rate_key* k2 = (struct ip_rate_key*)key2;
786	return sockaddr_cmp_addr(&k1->addr, k1->addrlen,
787		&k2->addr, k2->addrlen);
788}
789
790void ip_rate_delkeyfunc(void* k, void* ATTR_UNUSED(arg))
791{
792	struct ip_rate_key* key = (struct ip_rate_key*)k;
793	if(!key)
794		return;
795	lock_rw_destroy(&key->entry.lock);
796	free(key);
797}
798
799/** find data item in array, for write access, caller unlocks */
800static struct lruhash_entry* infra_find_ratedata(struct infra_cache* infra,
801	uint8_t* name, size_t namelen, int wr)
802{
803	struct rate_key key;
804	hashvalue_type h = dname_query_hash(name, 0xab);
805	memset(&key, 0, sizeof(key));
806	key.name = name;
807	key.namelen = namelen;
808	key.entry.hash = h;
809	return slabhash_lookup(infra->domain_rates, h, &key, wr);
810}
811
812/** find data item in array for ip addresses */
813static struct lruhash_entry* infra_find_ip_ratedata(struct infra_cache* infra,
814	struct comm_reply* repinfo, int wr)
815{
816	struct ip_rate_key key;
817	hashvalue_type h = hash_addr(&(repinfo->addr),
818		repinfo->addrlen, 0);
819	memset(&key, 0, sizeof(key));
820	key.addr = repinfo->addr;
821	key.addrlen = repinfo->addrlen;
822	key.entry.hash = h;
823	return slabhash_lookup(infra->client_ip_rates, h, &key, wr);
824}
825
826/** create rate data item for name, number 1 in now */
827static void infra_create_ratedata(struct infra_cache* infra,
828	uint8_t* name, size_t namelen, time_t timenow)
829{
830	hashvalue_type h = dname_query_hash(name, 0xab);
831	struct rate_key* k = (struct rate_key*)calloc(1, sizeof(*k));
832	struct rate_data* d = (struct rate_data*)calloc(1, sizeof(*d));
833	if(!k || !d) {
834		free(k);
835		free(d);
836		return; /* alloc failure */
837	}
838	k->namelen = namelen;
839	k->name = memdup(name, namelen);
840	if(!k->name) {
841		free(k);
842		free(d);
843		return; /* alloc failure */
844	}
845	lock_rw_init(&k->entry.lock);
846	k->entry.hash = h;
847	k->entry.key = k;
848	k->entry.data = d;
849	d->qps[0] = 1;
850	d->timestamp[0] = timenow;
851	slabhash_insert(infra->domain_rates, h, &k->entry, d, NULL);
852}
853
854/** create rate data item for ip address */
855static void infra_ip_create_ratedata(struct infra_cache* infra,
856	struct comm_reply* repinfo, time_t timenow)
857{
858	hashvalue_type h = hash_addr(&(repinfo->addr),
859	repinfo->addrlen, 0);
860	struct ip_rate_key* k = (struct ip_rate_key*)calloc(1, sizeof(*k));
861	struct ip_rate_data* d = (struct ip_rate_data*)calloc(1, sizeof(*d));
862	if(!k || !d) {
863		free(k);
864		free(d);
865		return; /* alloc failure */
866	}
867	k->addr = repinfo->addr;
868	k->addrlen = repinfo->addrlen;
869	lock_rw_init(&k->entry.lock);
870	k->entry.hash = h;
871	k->entry.key = k;
872	k->entry.data = d;
873	d->qps[0] = 1;
874	d->timestamp[0] = timenow;
875	slabhash_insert(infra->client_ip_rates, h, &k->entry, d, NULL);
876}
877
878/** find the second and return its rate counter, if none, remove oldest */
879static int* infra_rate_find_second(void* data, time_t t)
880{
881	struct rate_data* d = (struct rate_data*)data;
882	int i, oldest;
883	for(i=0; i<RATE_WINDOW; i++) {
884		if(d->timestamp[i] == t)
885			return &(d->qps[i]);
886	}
887	/* remove oldest timestamp, and insert it at t with 0 qps */
888	oldest = 0;
889	for(i=0; i<RATE_WINDOW; i++) {
890		if(d->timestamp[i] < d->timestamp[oldest])
891			oldest = i;
892	}
893	d->timestamp[oldest] = t;
894	d->qps[oldest] = 0;
895	return &(d->qps[oldest]);
896}
897
898int infra_rate_max(void* data, time_t now)
899{
900	struct rate_data* d = (struct rate_data*)data;
901	int i, max = 0;
902	for(i=0; i<RATE_WINDOW; i++) {
903		if(now-d->timestamp[i] <= RATE_WINDOW) {
904			if(d->qps[i] > max)
905				max = d->qps[i];
906		}
907	}
908	return max;
909}
910
911int infra_ratelimit_inc(struct infra_cache* infra, uint8_t* name,
912	size_t namelen, time_t timenow, struct query_info* qinfo,
913	struct comm_reply* replylist)
914{
915	int lim, max;
916	struct lruhash_entry* entry;
917
918	if(!infra_dp_ratelimit)
919		return 1; /* not enabled */
920
921	/* find ratelimit */
922	lim = infra_find_ratelimit(infra, name, namelen);
923	if(!lim)
924		return 1; /* disabled for this domain */
925
926	/* find or insert ratedata */
927	entry = infra_find_ratedata(infra, name, namelen, 1);
928	if(entry) {
929		int premax = infra_rate_max(entry->data, timenow);
930		int* cur = infra_rate_find_second(entry->data, timenow);
931		(*cur)++;
932		max = infra_rate_max(entry->data, timenow);
933		lock_rw_unlock(&entry->lock);
934
935		if(premax < lim && max >= lim) {
936			char buf[257], qnm[257], ts[12], cs[12], ip[128];
937			dname_str(name, buf);
938			dname_str(qinfo->qname, qnm);
939			sldns_wire2str_type_buf(qinfo->qtype, ts, sizeof(ts));
940			sldns_wire2str_class_buf(qinfo->qclass, cs, sizeof(cs));
941			ip[0]=0;
942			if(replylist) {
943				addr_to_str((struct sockaddr_storage *)&replylist->addr,
944					replylist->addrlen, ip, sizeof(ip));
945				verbose(VERB_OPS, "ratelimit exceeded %s %d query %s %s %s from %s", buf, lim, qnm, cs, ts, ip);
946			} else {
947				verbose(VERB_OPS, "ratelimit exceeded %s %d query %s %s %s", buf, lim, qnm, cs, ts);
948			}
949		}
950		return (max < lim);
951	}
952
953	/* create */
954	infra_create_ratedata(infra, name, namelen, timenow);
955	return (1 < lim);
956}
957
958void infra_ratelimit_dec(struct infra_cache* infra, uint8_t* name,
959	size_t namelen, time_t timenow)
960{
961	struct lruhash_entry* entry;
962	int* cur;
963	if(!infra_dp_ratelimit)
964		return; /* not enabled */
965	entry = infra_find_ratedata(infra, name, namelen, 1);
966	if(!entry) return; /* not cached */
967	cur = infra_rate_find_second(entry->data, timenow);
968	if((*cur) > 0)
969		(*cur)--;
970	lock_rw_unlock(&entry->lock);
971}
972
973int infra_ratelimit_exceeded(struct infra_cache* infra, uint8_t* name,
974	size_t namelen, time_t timenow)
975{
976	struct lruhash_entry* entry;
977	int lim, max;
978	if(!infra_dp_ratelimit)
979		return 0; /* not enabled */
980
981	/* find ratelimit */
982	lim = infra_find_ratelimit(infra, name, namelen);
983	if(!lim)
984		return 0; /* disabled for this domain */
985
986	/* find current rate */
987	entry = infra_find_ratedata(infra, name, namelen, 0);
988	if(!entry)
989		return 0; /* not cached */
990	max = infra_rate_max(entry->data, timenow);
991	lock_rw_unlock(&entry->lock);
992
993	return (max >= lim);
994}
995
996size_t
997infra_get_mem(struct infra_cache* infra)
998{
999	size_t s = sizeof(*infra) + slabhash_get_mem(infra->hosts);
1000	if(infra->domain_rates) s += slabhash_get_mem(infra->domain_rates);
1001	if(infra->client_ip_rates) s += slabhash_get_mem(infra->client_ip_rates);
1002	/* ignore domain_limits because walk through tree is big */
1003	return s;
1004}
1005
1006int infra_ip_ratelimit_inc(struct infra_cache* infra,
1007  struct comm_reply* repinfo, time_t timenow, struct sldns_buffer* buffer)
1008{
1009	int max;
1010	struct lruhash_entry* entry;
1011
1012	/* not enabled */
1013	if(!infra_ip_ratelimit) {
1014		return 1;
1015	}
1016	/* find or insert ratedata */
1017	entry = infra_find_ip_ratedata(infra, repinfo, 1);
1018	if(entry) {
1019		int premax = infra_rate_max(entry->data, timenow);
1020		int* cur = infra_rate_find_second(entry->data, timenow);
1021		(*cur)++;
1022		max = infra_rate_max(entry->data, timenow);
1023		lock_rw_unlock(&entry->lock);
1024
1025		if(premax < infra_ip_ratelimit && max >= infra_ip_ratelimit) {
1026			char client_ip[128], qnm[LDNS_MAX_DOMAINLEN+1+12+12];
1027			addr_to_str((struct sockaddr_storage *)&repinfo->addr,
1028				repinfo->addrlen, client_ip, sizeof(client_ip));
1029			qnm[0]=0;
1030			if(sldns_buffer_limit(buffer)>LDNS_HEADER_SIZE &&
1031				LDNS_QDCOUNT(sldns_buffer_begin(buffer))!=0) {
1032				(void)sldns_wire2str_rrquestion_buf(
1033					sldns_buffer_at(buffer, LDNS_HEADER_SIZE),
1034					sldns_buffer_limit(buffer)-LDNS_HEADER_SIZE,
1035					qnm, sizeof(qnm));
1036				if(strlen(qnm)>0 && qnm[strlen(qnm)-1]=='\n')
1037					qnm[strlen(qnm)-1] = 0; /*remove newline*/
1038				if(strchr(qnm, '\t'))
1039					*strchr(qnm, '\t') = ' ';
1040				if(strchr(qnm, '\t'))
1041					*strchr(qnm, '\t') = ' ';
1042				verbose(VERB_OPS, "ip_ratelimit exceeded %s %d %s",
1043					client_ip, infra_ip_ratelimit, qnm);
1044			} else {
1045				verbose(VERB_OPS, "ip_ratelimit exceeded %s %d (no query name)",
1046					client_ip, infra_ip_ratelimit);
1047			}
1048		}
1049		return (max <= infra_ip_ratelimit);
1050	}
1051
1052	/* create */
1053	infra_ip_create_ratedata(infra, repinfo, timenow);
1054	return 1;
1055}
1056