1/*
2 * services/cache/infra.c - infrastructure cache, server rtt and capabilities
3 *
4 * Copyright (c) 2007, NLnet Labs. All rights reserved.
5 *
6 * This software is open source.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
14 *
15 * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 *
19 * Neither the name of the NLNET LABS nor the names of its contributors may
20 * be used to endorse or promote products derived from this software without
21 * specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35
36/**
37 * \file
38 *
39 * This file contains the infrastructure cache.
40 */
41#include "config.h"
42#include "sldns/rrdef.h"
43#include "sldns/str2wire.h"
44#include "sldns/sbuffer.h"
45#include "sldns/wire2str.h"
46#include "services/cache/infra.h"
47#include "util/storage/slabhash.h"
48#include "util/storage/lookup3.h"
49#include "util/data/dname.h"
50#include "util/log.h"
51#include "util/net_help.h"
52#include "util/config_file.h"
53#include "iterator/iterator.h"
54
55/** Timeout when only a single probe query per IP is allowed. */
56#define PROBE_MAXRTO 12000 /* in msec */
57
58/** number of timeouts for a type when the domain can be blocked ;
59 * even if another type has completely rtt maxed it, the different type
60 * can do this number of packets (until those all timeout too) */
61#define TIMEOUT_COUNT_MAX 3
62
63/** ratelimit value for delegation point */
64int infra_dp_ratelimit = 0;
65
66/** ratelimit value for client ip addresses,
67 *  in queries per second. */
68int infra_ip_ratelimit = 0;
69
70size_t
71infra_sizefunc(void* k, void* ATTR_UNUSED(d))
72{
73	struct infra_key* key = (struct infra_key*)k;
74	return sizeof(*key) + sizeof(struct infra_data) + key->namelen
75		+ lock_get_mem(&key->entry.lock);
76}
77
78int
79infra_compfunc(void* key1, void* key2)
80{
81	struct infra_key* k1 = (struct infra_key*)key1;
82	struct infra_key* k2 = (struct infra_key*)key2;
83	int r = sockaddr_cmp(&k1->addr, k1->addrlen, &k2->addr, k2->addrlen);
84	if(r != 0)
85		return r;
86	if(k1->namelen != k2->namelen) {
87		if(k1->namelen < k2->namelen)
88			return -1;
89		return 1;
90	}
91	return query_dname_compare(k1->zonename, k2->zonename);
92}
93
94void
95infra_delkeyfunc(void* k, void* ATTR_UNUSED(arg))
96{
97	struct infra_key* key = (struct infra_key*)k;
98	if(!key)
99		return;
100	lock_rw_destroy(&key->entry.lock);
101	free(key->zonename);
102	free(key);
103}
104
105void
106infra_deldatafunc(void* d, void* ATTR_UNUSED(arg))
107{
108	struct infra_data* data = (struct infra_data*)d;
109	free(data);
110}
111
112size_t
113rate_sizefunc(void* k, void* ATTR_UNUSED(d))
114{
115	struct rate_key* key = (struct rate_key*)k;
116	return sizeof(*key) + sizeof(struct rate_data) + key->namelen
117		+ lock_get_mem(&key->entry.lock);
118}
119
120int
121rate_compfunc(void* key1, void* key2)
122{
123	struct rate_key* k1 = (struct rate_key*)key1;
124	struct rate_key* k2 = (struct rate_key*)key2;
125	if(k1->namelen != k2->namelen) {
126		if(k1->namelen < k2->namelen)
127			return -1;
128		return 1;
129	}
130	return query_dname_compare(k1->name, k2->name);
131}
132
133void
134rate_delkeyfunc(void* k, void* ATTR_UNUSED(arg))
135{
136	struct rate_key* key = (struct rate_key*)k;
137	if(!key)
138		return;
139	lock_rw_destroy(&key->entry.lock);
140	free(key->name);
141	free(key);
142}
143
144void
145rate_deldatafunc(void* d, void* ATTR_UNUSED(arg))
146{
147	struct rate_data* data = (struct rate_data*)d;
148	free(data);
149}
150
151/** find or create element in domainlimit tree */
152static struct domain_limit_data* domain_limit_findcreate(
153	struct infra_cache* infra, char* name)
154{
155	uint8_t* nm;
156	int labs;
157	size_t nmlen;
158	struct domain_limit_data* d;
159
160	/* parse name */
161	nm = sldns_str2wire_dname(name, &nmlen);
162	if(!nm) {
163		log_err("could not parse %s", name);
164		return NULL;
165	}
166	labs = dname_count_labels(nm);
167
168	/* can we find it? */
169	d = (struct domain_limit_data*)name_tree_find(&infra->domain_limits,
170		nm, nmlen, labs, LDNS_RR_CLASS_IN);
171	if(d) {
172		free(nm);
173		return d;
174	}
175
176	/* create it */
177	d = (struct domain_limit_data*)calloc(1, sizeof(*d));
178	if(!d) {
179		free(nm);
180		return NULL;
181	}
182	d->node.node.key = &d->node;
183	d->node.name = nm;
184	d->node.len = nmlen;
185	d->node.labs = labs;
186	d->node.dclass = LDNS_RR_CLASS_IN;
187	d->lim = -1;
188	d->below = -1;
189	if(!name_tree_insert(&infra->domain_limits, &d->node, nm, nmlen,
190		labs, LDNS_RR_CLASS_IN)) {
191		log_err("duplicate element in domainlimit tree");
192		free(nm);
193		free(d);
194		return NULL;
195	}
196	return d;
197}
198
199/** insert rate limit configuration into lookup tree */
200static int infra_ratelimit_cfg_insert(struct infra_cache* infra,
201	struct config_file* cfg)
202{
203	struct config_str2list* p;
204	struct domain_limit_data* d;
205	for(p = cfg->ratelimit_for_domain; p; p = p->next) {
206		d = domain_limit_findcreate(infra, p->str);
207		if(!d)
208			return 0;
209		d->lim = atoi(p->str2);
210	}
211	for(p = cfg->ratelimit_below_domain; p; p = p->next) {
212		d = domain_limit_findcreate(infra, p->str);
213		if(!d)
214			return 0;
215		d->below = atoi(p->str2);
216	}
217	return 1;
218}
219
220/** setup domain limits tree (0 on failure) */
221static int
222setup_domain_limits(struct infra_cache* infra, struct config_file* cfg)
223{
224	name_tree_init(&infra->domain_limits);
225	if(!infra_ratelimit_cfg_insert(infra, cfg)) {
226		return 0;
227	}
228	name_tree_init_parents(&infra->domain_limits);
229	return 1;
230}
231
232struct infra_cache*
233infra_create(struct config_file* cfg)
234{
235	struct infra_cache* infra = (struct infra_cache*)calloc(1,
236		sizeof(struct infra_cache));
237	size_t maxmem = cfg->infra_cache_numhosts * (sizeof(struct infra_key)+
238		sizeof(struct infra_data)+INFRA_BYTES_NAME);
239	infra->hosts = slabhash_create(cfg->infra_cache_slabs,
240		INFRA_HOST_STARTSIZE, maxmem, &infra_sizefunc, &infra_compfunc,
241		&infra_delkeyfunc, &infra_deldatafunc, NULL);
242	if(!infra->hosts) {
243		free(infra);
244		return NULL;
245	}
246	infra->host_ttl = cfg->host_ttl;
247	infra->infra_keep_probing = cfg->infra_keep_probing;
248	infra_dp_ratelimit = cfg->ratelimit;
249	infra->domain_rates = slabhash_create(cfg->ratelimit_slabs,
250		INFRA_HOST_STARTSIZE, cfg->ratelimit_size,
251		&rate_sizefunc, &rate_compfunc, &rate_delkeyfunc,
252		&rate_deldatafunc, NULL);
253	if(!infra->domain_rates) {
254		infra_delete(infra);
255		return NULL;
256	}
257	/* insert config data into ratelimits */
258	if(!setup_domain_limits(infra, cfg)) {
259		infra_delete(infra);
260		return NULL;
261	}
262	infra_ip_ratelimit = cfg->ip_ratelimit;
263	infra->client_ip_rates = slabhash_create(cfg->ip_ratelimit_slabs,
264	    INFRA_HOST_STARTSIZE, cfg->ip_ratelimit_size, &ip_rate_sizefunc,
265	    &ip_rate_compfunc, &ip_rate_delkeyfunc, &ip_rate_deldatafunc, NULL);
266	if(!infra->client_ip_rates) {
267		infra_delete(infra);
268		return NULL;
269	}
270	return infra;
271}
272
273/** delete domain_limit entries */
274static void domain_limit_free(rbnode_type* n, void* ATTR_UNUSED(arg))
275{
276	if(n) {
277		free(((struct domain_limit_data*)n)->node.name);
278		free(n);
279	}
280}
281
282void
283infra_delete(struct infra_cache* infra)
284{
285	if(!infra)
286		return;
287	slabhash_delete(infra->hosts);
288	slabhash_delete(infra->domain_rates);
289	traverse_postorder(&infra->domain_limits, domain_limit_free, NULL);
290	slabhash_delete(infra->client_ip_rates);
291	free(infra);
292}
293
294struct infra_cache*
295infra_adjust(struct infra_cache* infra, struct config_file* cfg)
296{
297	size_t maxmem;
298	if(!infra)
299		return infra_create(cfg);
300	infra->host_ttl = cfg->host_ttl;
301	infra->infra_keep_probing = cfg->infra_keep_probing;
302	infra_dp_ratelimit = cfg->ratelimit;
303	infra_ip_ratelimit = cfg->ip_ratelimit;
304	maxmem = cfg->infra_cache_numhosts * (sizeof(struct infra_key)+
305		sizeof(struct infra_data)+INFRA_BYTES_NAME);
306	/* divide cachesize by slabs and multiply by slabs, because if the
307	 * cachesize is not an even multiple of slabs, that is the resulting
308	 * size of the slabhash */
309	if(!slabhash_is_size(infra->hosts, maxmem, cfg->infra_cache_slabs) ||
310	   !slabhash_is_size(infra->domain_rates, cfg->ratelimit_size,
311	   	cfg->ratelimit_slabs) ||
312	   !slabhash_is_size(infra->client_ip_rates, cfg->ip_ratelimit_size,
313	   	cfg->ip_ratelimit_slabs)) {
314		infra_delete(infra);
315		infra = infra_create(cfg);
316	} else {
317		/* reapply domain limits */
318		traverse_postorder(&infra->domain_limits, domain_limit_free,
319			NULL);
320		if(!setup_domain_limits(infra, cfg)) {
321			infra_delete(infra);
322			return NULL;
323		}
324	}
325	return infra;
326}
327
328/** calculate the hash value for a host key
329 *  set use_port to a non-0 number to use the port in
330 *  the hash calculation; 0 to ignore the port.*/
331static hashvalue_type
332hash_addr(struct sockaddr_storage* addr, socklen_t addrlen,
333  int use_port)
334{
335	hashvalue_type h = 0xab;
336	/* select the pieces to hash, some OS have changing data inside */
337	if(addr_is_ip6(addr, addrlen)) {
338		struct sockaddr_in6* in6 = (struct sockaddr_in6*)addr;
339		h = hashlittle(&in6->sin6_family, sizeof(in6->sin6_family), h);
340		if(use_port){
341			h = hashlittle(&in6->sin6_port, sizeof(in6->sin6_port), h);
342		}
343		h = hashlittle(&in6->sin6_addr, INET6_SIZE, h);
344	} else {
345		struct sockaddr_in* in = (struct sockaddr_in*)addr;
346		h = hashlittle(&in->sin_family, sizeof(in->sin_family), h);
347		if(use_port){
348			h = hashlittle(&in->sin_port, sizeof(in->sin_port), h);
349		}
350		h = hashlittle(&in->sin_addr, INET_SIZE, h);
351	}
352	return h;
353}
354
355/** calculate infra hash for a key */
356static hashvalue_type
357hash_infra(struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* name)
358{
359	return dname_query_hash(name, hash_addr(addr, addrlen, 1));
360}
361
362/** lookup version that does not check host ttl (you check it) */
363struct lruhash_entry*
364infra_lookup_nottl(struct infra_cache* infra, struct sockaddr_storage* addr,
365	socklen_t addrlen, uint8_t* name, size_t namelen, int wr)
366{
367	struct infra_key k;
368	k.addrlen = addrlen;
369	memcpy(&k.addr, addr, addrlen);
370	k.namelen = namelen;
371	k.zonename = name;
372	k.entry.hash = hash_infra(addr, addrlen, name);
373	k.entry.key = (void*)&k;
374	k.entry.data = NULL;
375	return slabhash_lookup(infra->hosts, k.entry.hash, &k, wr);
376}
377
378/** init the data elements */
379static void
380data_entry_init(struct infra_cache* infra, struct lruhash_entry* e,
381	time_t timenow)
382{
383	struct infra_data* data = (struct infra_data*)e->data;
384	data->ttl = timenow + infra->host_ttl;
385	rtt_init(&data->rtt);
386	data->edns_version = 0;
387	data->edns_lame_known = 0;
388	data->probedelay = 0;
389	data->isdnsseclame = 0;
390	data->rec_lame = 0;
391	data->lame_type_A = 0;
392	data->lame_other = 0;
393	data->timeout_A = 0;
394	data->timeout_AAAA = 0;
395	data->timeout_other = 0;
396}
397
398/**
399 * Create and init a new entry for a host
400 * @param infra: infra structure with config parameters.
401 * @param addr: host address.
402 * @param addrlen: length of addr.
403 * @param name: name of zone
404 * @param namelen: length of name.
405 * @param tm: time now.
406 * @return: the new entry or NULL on malloc failure.
407 */
408static struct lruhash_entry*
409new_entry(struct infra_cache* infra, struct sockaddr_storage* addr,
410	socklen_t addrlen, uint8_t* name, size_t namelen, time_t tm)
411{
412	struct infra_data* data;
413	struct infra_key* key = (struct infra_key*)malloc(sizeof(*key));
414	if(!key)
415		return NULL;
416	data = (struct infra_data*)malloc(sizeof(struct infra_data));
417	if(!data) {
418		free(key);
419		return NULL;
420	}
421	key->zonename = memdup(name, namelen);
422	if(!key->zonename) {
423		free(key);
424		free(data);
425		return NULL;
426	}
427	key->namelen = namelen;
428	lock_rw_init(&key->entry.lock);
429	key->entry.hash = hash_infra(addr, addrlen, name);
430	key->entry.key = (void*)key;
431	key->entry.data = (void*)data;
432	key->addrlen = addrlen;
433	memcpy(&key->addr, addr, addrlen);
434	data_entry_init(infra, &key->entry, tm);
435	return &key->entry;
436}
437
438int
439infra_host(struct infra_cache* infra, struct sockaddr_storage* addr,
440        socklen_t addrlen, uint8_t* nm, size_t nmlen, time_t timenow,
441	int* edns_vs, uint8_t* edns_lame_known, int* to)
442{
443	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
444		nm, nmlen, 0);
445	struct infra_data* data;
446	int wr = 0;
447	if(e && ((struct infra_data*)e->data)->ttl < timenow) {
448		/* it expired, try to reuse existing entry */
449		int old = ((struct infra_data*)e->data)->rtt.rto;
450		time_t tprobe = ((struct infra_data*)e->data)->probedelay;
451		uint8_t tA = ((struct infra_data*)e->data)->timeout_A;
452		uint8_t tAAAA = ((struct infra_data*)e->data)->timeout_AAAA;
453		uint8_t tother = ((struct infra_data*)e->data)->timeout_other;
454		lock_rw_unlock(&e->lock);
455		e = infra_lookup_nottl(infra, addr, addrlen, nm, nmlen, 1);
456		if(e) {
457			/* if its still there we have a writelock, init */
458			/* re-initialise */
459			/* do not touch lameness, it may be valid still */
460			data_entry_init(infra, e, timenow);
461			wr = 1;
462			/* TOP_TIMEOUT remains on reuse */
463			if(old >= USEFUL_SERVER_TOP_TIMEOUT) {
464				((struct infra_data*)e->data)->rtt.rto
465					= USEFUL_SERVER_TOP_TIMEOUT;
466				((struct infra_data*)e->data)->probedelay = tprobe;
467				((struct infra_data*)e->data)->timeout_A = tA;
468				((struct infra_data*)e->data)->timeout_AAAA = tAAAA;
469				((struct infra_data*)e->data)->timeout_other = tother;
470			}
471		}
472	}
473	if(!e) {
474		/* insert new entry */
475		if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow)))
476			return 0;
477		data = (struct infra_data*)e->data;
478		*edns_vs = data->edns_version;
479		*edns_lame_known = data->edns_lame_known;
480		*to = rtt_timeout(&data->rtt);
481		slabhash_insert(infra->hosts, e->hash, e, data, NULL);
482		return 1;
483	}
484	/* use existing entry */
485	data = (struct infra_data*)e->data;
486	*edns_vs = data->edns_version;
487	*edns_lame_known = data->edns_lame_known;
488	*to = rtt_timeout(&data->rtt);
489	if(*to >= PROBE_MAXRTO && (infra->infra_keep_probing ||
490		rtt_notimeout(&data->rtt)*4 <= *to)) {
491		/* delay other queries, this is the probe query */
492		if(!wr) {
493			lock_rw_unlock(&e->lock);
494			e = infra_lookup_nottl(infra, addr,addrlen,nm,nmlen, 1);
495			if(!e) { /* flushed from cache real fast, no use to
496				allocate just for the probedelay */
497				return 1;
498			}
499			data = (struct infra_data*)e->data;
500		}
501		/* add 999 to round up the timeout value from msec to sec,
502		 * then add a whole second so it is certain that this probe
503		 * has timed out before the next is allowed */
504		data->probedelay = timenow + ((*to)+1999)/1000;
505	}
506	lock_rw_unlock(&e->lock);
507	return 1;
508}
509
510int
511infra_set_lame(struct infra_cache* infra, struct sockaddr_storage* addr,
512	socklen_t addrlen, uint8_t* nm, size_t nmlen, time_t timenow,
513	int dnsseclame, int reclame, uint16_t qtype)
514{
515	struct infra_data* data;
516	struct lruhash_entry* e;
517	int needtoinsert = 0;
518	e = infra_lookup_nottl(infra, addr, addrlen, nm, nmlen, 1);
519	if(!e) {
520		/* insert it */
521		if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow))) {
522			log_err("set_lame: malloc failure");
523			return 0;
524		}
525		needtoinsert = 1;
526	} else if( ((struct infra_data*)e->data)->ttl < timenow) {
527		/* expired, reuse existing entry */
528		data_entry_init(infra, e, timenow);
529	}
530	/* got an entry, now set the zone lame */
531	data = (struct infra_data*)e->data;
532	/* merge data (if any) */
533	if(dnsseclame)
534		data->isdnsseclame = 1;
535	if(reclame)
536		data->rec_lame = 1;
537	if(!dnsseclame && !reclame && qtype == LDNS_RR_TYPE_A)
538		data->lame_type_A = 1;
539	if(!dnsseclame  && !reclame && qtype != LDNS_RR_TYPE_A)
540		data->lame_other = 1;
541	/* done */
542	if(needtoinsert)
543		slabhash_insert(infra->hosts, e->hash, e, e->data, NULL);
544	else 	{ lock_rw_unlock(&e->lock); }
545	return 1;
546}
547
548void
549infra_update_tcp_works(struct infra_cache* infra,
550        struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* nm,
551	size_t nmlen)
552{
553	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
554		nm, nmlen, 1);
555	struct infra_data* data;
556	if(!e)
557		return; /* doesn't exist */
558	data = (struct infra_data*)e->data;
559	if(data->rtt.rto >= RTT_MAX_TIMEOUT)
560		/* do not disqualify this server altogether, it is better
561		 * than nothing */
562		data->rtt.rto = RTT_MAX_TIMEOUT-1000;
563	lock_rw_unlock(&e->lock);
564}
565
566int
567infra_rtt_update(struct infra_cache* infra, struct sockaddr_storage* addr,
568	socklen_t addrlen, uint8_t* nm, size_t nmlen, int qtype,
569	int roundtrip, int orig_rtt, time_t timenow)
570{
571	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
572		nm, nmlen, 1);
573	struct infra_data* data;
574	int needtoinsert = 0, expired = 0;
575	int rto = 1;
576	time_t oldprobedelay = 0;
577	if(!e) {
578		if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow)))
579			return 0;
580		needtoinsert = 1;
581	} else if(((struct infra_data*)e->data)->ttl < timenow) {
582		oldprobedelay = ((struct infra_data*)e->data)->probedelay;
583		data_entry_init(infra, e, timenow);
584		expired = 1;
585	}
586	/* have an entry, update the rtt */
587	data = (struct infra_data*)e->data;
588	if(roundtrip == -1) {
589		if(needtoinsert || expired) {
590			/* timeout on entry that has expired before the timer
591			 * keep old timeout from the function caller */
592			data->rtt.rto = orig_rtt;
593			data->probedelay = oldprobedelay;
594		}
595		rtt_lost(&data->rtt, orig_rtt);
596		if(qtype == LDNS_RR_TYPE_A) {
597			if(data->timeout_A < TIMEOUT_COUNT_MAX)
598				data->timeout_A++;
599		} else if(qtype == LDNS_RR_TYPE_AAAA) {
600			if(data->timeout_AAAA < TIMEOUT_COUNT_MAX)
601				data->timeout_AAAA++;
602		} else {
603			if(data->timeout_other < TIMEOUT_COUNT_MAX)
604				data->timeout_other++;
605		}
606	} else {
607		/* if we got a reply, but the old timeout was above server
608		 * selection height, delete the timeout so the server is
609		 * fully available again */
610		if(rtt_unclamped(&data->rtt) >= USEFUL_SERVER_TOP_TIMEOUT)
611			rtt_init(&data->rtt);
612		rtt_update(&data->rtt, roundtrip);
613		data->probedelay = 0;
614		if(qtype == LDNS_RR_TYPE_A)
615			data->timeout_A = 0;
616		else if(qtype == LDNS_RR_TYPE_AAAA)
617			data->timeout_AAAA = 0;
618		else	data->timeout_other = 0;
619	}
620	if(data->rtt.rto > 0)
621		rto = data->rtt.rto;
622
623	if(needtoinsert)
624		slabhash_insert(infra->hosts, e->hash, e, e->data, NULL);
625	else 	{ lock_rw_unlock(&e->lock); }
626	return rto;
627}
628
629long long infra_get_host_rto(struct infra_cache* infra,
630        struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* nm,
631	size_t nmlen, struct rtt_info* rtt, int* delay, time_t timenow,
632	int* tA, int* tAAAA, int* tother)
633{
634	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
635		nm, nmlen, 0);
636	struct infra_data* data;
637	long long ttl = -2;
638	if(!e) return -1;
639	data = (struct infra_data*)e->data;
640	if(data->ttl >= timenow) {
641		ttl = (long long)(data->ttl - timenow);
642		memmove(rtt, &data->rtt, sizeof(*rtt));
643		if(timenow < data->probedelay)
644			*delay = (int)(data->probedelay - timenow);
645		else	*delay = 0;
646	}
647	*tA = (int)data->timeout_A;
648	*tAAAA = (int)data->timeout_AAAA;
649	*tother = (int)data->timeout_other;
650	lock_rw_unlock(&e->lock);
651	return ttl;
652}
653
654int
655infra_edns_update(struct infra_cache* infra, struct sockaddr_storage* addr,
656	socklen_t addrlen, uint8_t* nm, size_t nmlen, int edns_version,
657	time_t timenow)
658{
659	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
660		nm, nmlen, 1);
661	struct infra_data* data;
662	int needtoinsert = 0;
663	if(!e) {
664		if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow)))
665			return 0;
666		needtoinsert = 1;
667	} else if(((struct infra_data*)e->data)->ttl < timenow) {
668		data_entry_init(infra, e, timenow);
669	}
670	/* have an entry, update the rtt, and the ttl */
671	data = (struct infra_data*)e->data;
672	/* do not update if noEDNS and stored is yesEDNS */
673	if(!(edns_version == -1 && (data->edns_version != -1 &&
674		data->edns_lame_known))) {
675		data->edns_version = edns_version;
676		data->edns_lame_known = 1;
677	}
678
679	if(needtoinsert)
680		slabhash_insert(infra->hosts, e->hash, e, e->data, NULL);
681	else 	{ lock_rw_unlock(&e->lock); }
682	return 1;
683}
684
685int
686infra_get_lame_rtt(struct infra_cache* infra,
687        struct sockaddr_storage* addr, socklen_t addrlen,
688        uint8_t* name, size_t namelen, uint16_t qtype,
689	int* lame, int* dnsseclame, int* reclame, int* rtt, time_t timenow)
690{
691	struct infra_data* host;
692	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
693		name, namelen, 0);
694	if(!e)
695		return 0;
696	host = (struct infra_data*)e->data;
697	*rtt = rtt_unclamped(&host->rtt);
698	if(host->rtt.rto >= PROBE_MAXRTO && timenow >= host->probedelay
699		&& infra->infra_keep_probing) {
700		/* single probe, keep probing */
701		if(*rtt >= USEFUL_SERVER_TOP_TIMEOUT)
702			*rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
703	} else if(host->rtt.rto >= PROBE_MAXRTO && timenow < host->probedelay
704		&& rtt_notimeout(&host->rtt)*4 <= host->rtt.rto) {
705		/* single probe for this domain, and we are not probing */
706		/* unless the query type allows a probe to happen */
707		if(qtype == LDNS_RR_TYPE_A) {
708			if(host->timeout_A >= TIMEOUT_COUNT_MAX)
709				*rtt = USEFUL_SERVER_TOP_TIMEOUT;
710			else	*rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
711		} else if(qtype == LDNS_RR_TYPE_AAAA) {
712			if(host->timeout_AAAA >= TIMEOUT_COUNT_MAX)
713				*rtt = USEFUL_SERVER_TOP_TIMEOUT;
714			else	*rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
715		} else {
716			if(host->timeout_other >= TIMEOUT_COUNT_MAX)
717				*rtt = USEFUL_SERVER_TOP_TIMEOUT;
718			else	*rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
719		}
720	}
721	if(timenow > host->ttl) {
722		/* expired entry */
723		/* see if this can be a re-probe of an unresponsive server */
724		/* minus 1000 because that is outside of the RTTBAND, so
725		 * blacklisted servers stay blacklisted if this is chosen */
726		if(host->rtt.rto >= USEFUL_SERVER_TOP_TIMEOUT ||
727			infra->infra_keep_probing) {
728			lock_rw_unlock(&e->lock);
729			*rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
730			*lame = 0;
731			*dnsseclame = 0;
732			*reclame = 0;
733			return 1;
734		}
735		lock_rw_unlock(&e->lock);
736		return 0;
737	}
738	/* check lameness first */
739	if(host->lame_type_A && qtype == LDNS_RR_TYPE_A) {
740		lock_rw_unlock(&e->lock);
741		*lame = 1;
742		*dnsseclame = 0;
743		*reclame = 0;
744		return 1;
745	} else if(host->lame_other && qtype != LDNS_RR_TYPE_A) {
746		lock_rw_unlock(&e->lock);
747		*lame = 1;
748		*dnsseclame = 0;
749		*reclame = 0;
750		return 1;
751	} else if(host->isdnsseclame) {
752		lock_rw_unlock(&e->lock);
753		*lame = 0;
754		*dnsseclame = 1;
755		*reclame = 0;
756		return 1;
757	} else if(host->rec_lame) {
758		lock_rw_unlock(&e->lock);
759		*lame = 0;
760		*dnsseclame = 0;
761		*reclame = 1;
762		return 1;
763	}
764	/* no lameness for this type of query */
765	lock_rw_unlock(&e->lock);
766	*lame = 0;
767	*dnsseclame = 0;
768	*reclame = 0;
769	return 1;
770}
771
772int infra_find_ratelimit(struct infra_cache* infra, uint8_t* name,
773	size_t namelen)
774{
775	int labs = dname_count_labels(name);
776	struct domain_limit_data* d = (struct domain_limit_data*)
777		name_tree_lookup(&infra->domain_limits, name, namelen, labs,
778		LDNS_RR_CLASS_IN);
779	if(!d) return infra_dp_ratelimit;
780
781	if(d->node.labs == labs && d->lim != -1)
782		return d->lim; /* exact match */
783
784	/* find 'below match' */
785	if(d->node.labs == labs)
786		d = (struct domain_limit_data*)d->node.parent;
787	while(d) {
788		if(d->below != -1)
789			return d->below;
790		d = (struct domain_limit_data*)d->node.parent;
791	}
792	return infra_dp_ratelimit;
793}
794
795size_t ip_rate_sizefunc(void* k, void* ATTR_UNUSED(d))
796{
797	struct ip_rate_key* key = (struct ip_rate_key*)k;
798	return sizeof(*key) + sizeof(struct ip_rate_data)
799		+ lock_get_mem(&key->entry.lock);
800}
801
802int ip_rate_compfunc(void* key1, void* key2)
803{
804	struct ip_rate_key* k1 = (struct ip_rate_key*)key1;
805	struct ip_rate_key* k2 = (struct ip_rate_key*)key2;
806	return sockaddr_cmp_addr(&k1->addr, k1->addrlen,
807		&k2->addr, k2->addrlen);
808}
809
810void ip_rate_delkeyfunc(void* k, void* ATTR_UNUSED(arg))
811{
812	struct ip_rate_key* key = (struct ip_rate_key*)k;
813	if(!key)
814		return;
815	lock_rw_destroy(&key->entry.lock);
816	free(key);
817}
818
819/** find data item in array, for write access, caller unlocks */
820static struct lruhash_entry* infra_find_ratedata(struct infra_cache* infra,
821	uint8_t* name, size_t namelen, int wr)
822{
823	struct rate_key key;
824	hashvalue_type h = dname_query_hash(name, 0xab);
825	memset(&key, 0, sizeof(key));
826	key.name = name;
827	key.namelen = namelen;
828	key.entry.hash = h;
829	return slabhash_lookup(infra->domain_rates, h, &key, wr);
830}
831
832/** find data item in array for ip addresses */
833static struct lruhash_entry* infra_find_ip_ratedata(struct infra_cache* infra,
834	struct comm_reply* repinfo, int wr)
835{
836	struct ip_rate_key key;
837	hashvalue_type h = hash_addr(&(repinfo->addr),
838		repinfo->addrlen, 0);
839	memset(&key, 0, sizeof(key));
840	key.addr = repinfo->addr;
841	key.addrlen = repinfo->addrlen;
842	key.entry.hash = h;
843	return slabhash_lookup(infra->client_ip_rates, h, &key, wr);
844}
845
846/** create rate data item for name, number 1 in now */
847static void infra_create_ratedata(struct infra_cache* infra,
848	uint8_t* name, size_t namelen, time_t timenow)
849{
850	hashvalue_type h = dname_query_hash(name, 0xab);
851	struct rate_key* k = (struct rate_key*)calloc(1, sizeof(*k));
852	struct rate_data* d = (struct rate_data*)calloc(1, sizeof(*d));
853	if(!k || !d) {
854		free(k);
855		free(d);
856		return; /* alloc failure */
857	}
858	k->namelen = namelen;
859	k->name = memdup(name, namelen);
860	if(!k->name) {
861		free(k);
862		free(d);
863		return; /* alloc failure */
864	}
865	lock_rw_init(&k->entry.lock);
866	k->entry.hash = h;
867	k->entry.key = k;
868	k->entry.data = d;
869	d->qps[0] = 1;
870	d->timestamp[0] = timenow;
871	slabhash_insert(infra->domain_rates, h, &k->entry, d, NULL);
872}
873
874/** create rate data item for ip address */
875static void infra_ip_create_ratedata(struct infra_cache* infra,
876	struct comm_reply* repinfo, time_t timenow)
877{
878	hashvalue_type h = hash_addr(&(repinfo->addr),
879	repinfo->addrlen, 0);
880	struct ip_rate_key* k = (struct ip_rate_key*)calloc(1, sizeof(*k));
881	struct ip_rate_data* d = (struct ip_rate_data*)calloc(1, sizeof(*d));
882	if(!k || !d) {
883		free(k);
884		free(d);
885		return; /* alloc failure */
886	}
887	k->addr = repinfo->addr;
888	k->addrlen = repinfo->addrlen;
889	lock_rw_init(&k->entry.lock);
890	k->entry.hash = h;
891	k->entry.key = k;
892	k->entry.data = d;
893	d->qps[0] = 1;
894	d->timestamp[0] = timenow;
895	slabhash_insert(infra->client_ip_rates, h, &k->entry, d, NULL);
896}
897
898/** find the second and return its rate counter, if none, remove oldest */
899static int* infra_rate_find_second(void* data, time_t t)
900{
901	struct rate_data* d = (struct rate_data*)data;
902	int i, oldest;
903	for(i=0; i<RATE_WINDOW; i++) {
904		if(d->timestamp[i] == t)
905			return &(d->qps[i]);
906	}
907	/* remove oldest timestamp, and insert it at t with 0 qps */
908	oldest = 0;
909	for(i=0; i<RATE_WINDOW; i++) {
910		if(d->timestamp[i] < d->timestamp[oldest])
911			oldest = i;
912	}
913	d->timestamp[oldest] = t;
914	d->qps[oldest] = 0;
915	return &(d->qps[oldest]);
916}
917
918int infra_rate_max(void* data, time_t now)
919{
920	struct rate_data* d = (struct rate_data*)data;
921	int i, max = 0;
922	for(i=0; i<RATE_WINDOW; i++) {
923		if(now-d->timestamp[i] <= RATE_WINDOW) {
924			if(d->qps[i] > max)
925				max = d->qps[i];
926		}
927	}
928	return max;
929}
930
931int infra_ratelimit_inc(struct infra_cache* infra, uint8_t* name,
932	size_t namelen, time_t timenow, struct query_info* qinfo,
933	struct comm_reply* replylist)
934{
935	int lim, max;
936	struct lruhash_entry* entry;
937
938	if(!infra_dp_ratelimit)
939		return 1; /* not enabled */
940
941	/* find ratelimit */
942	lim = infra_find_ratelimit(infra, name, namelen);
943	if(!lim)
944		return 1; /* disabled for this domain */
945
946	/* find or insert ratedata */
947	entry = infra_find_ratedata(infra, name, namelen, 1);
948	if(entry) {
949		int premax = infra_rate_max(entry->data, timenow);
950		int* cur = infra_rate_find_second(entry->data, timenow);
951		(*cur)++;
952		max = infra_rate_max(entry->data, timenow);
953		lock_rw_unlock(&entry->lock);
954
955		if(premax < lim && max >= lim) {
956			char buf[257], qnm[257], ts[12], cs[12], ip[128];
957			dname_str(name, buf);
958			dname_str(qinfo->qname, qnm);
959			sldns_wire2str_type_buf(qinfo->qtype, ts, sizeof(ts));
960			sldns_wire2str_class_buf(qinfo->qclass, cs, sizeof(cs));
961			ip[0]=0;
962			if(replylist) {
963				addr_to_str((struct sockaddr_storage *)&replylist->addr,
964					replylist->addrlen, ip, sizeof(ip));
965				verbose(VERB_OPS, "ratelimit exceeded %s %d query %s %s %s from %s", buf, lim, qnm, cs, ts, ip);
966			} else {
967				verbose(VERB_OPS, "ratelimit exceeded %s %d query %s %s %s", buf, lim, qnm, cs, ts);
968			}
969		}
970		return (max < lim);
971	}
972
973	/* create */
974	infra_create_ratedata(infra, name, namelen, timenow);
975	return (1 < lim);
976}
977
978void infra_ratelimit_dec(struct infra_cache* infra, uint8_t* name,
979	size_t namelen, time_t timenow)
980{
981	struct lruhash_entry* entry;
982	int* cur;
983	if(!infra_dp_ratelimit)
984		return; /* not enabled */
985	entry = infra_find_ratedata(infra, name, namelen, 1);
986	if(!entry) return; /* not cached */
987	cur = infra_rate_find_second(entry->data, timenow);
988	if((*cur) > 0)
989		(*cur)--;
990	lock_rw_unlock(&entry->lock);
991}
992
993int infra_ratelimit_exceeded(struct infra_cache* infra, uint8_t* name,
994	size_t namelen, time_t timenow)
995{
996	struct lruhash_entry* entry;
997	int lim, max;
998	if(!infra_dp_ratelimit)
999		return 0; /* not enabled */
1000
1001	/* find ratelimit */
1002	lim = infra_find_ratelimit(infra, name, namelen);
1003	if(!lim)
1004		return 0; /* disabled for this domain */
1005
1006	/* find current rate */
1007	entry = infra_find_ratedata(infra, name, namelen, 0);
1008	if(!entry)
1009		return 0; /* not cached */
1010	max = infra_rate_max(entry->data, timenow);
1011	lock_rw_unlock(&entry->lock);
1012
1013	return (max >= lim);
1014}
1015
1016size_t
1017infra_get_mem(struct infra_cache* infra)
1018{
1019	size_t s = sizeof(*infra) + slabhash_get_mem(infra->hosts);
1020	if(infra->domain_rates) s += slabhash_get_mem(infra->domain_rates);
1021	if(infra->client_ip_rates) s += slabhash_get_mem(infra->client_ip_rates);
1022	/* ignore domain_limits because walk through tree is big */
1023	return s;
1024}
1025
1026int infra_ip_ratelimit_inc(struct infra_cache* infra,
1027  struct comm_reply* repinfo, time_t timenow, struct sldns_buffer* buffer)
1028{
1029	int max;
1030	struct lruhash_entry* entry;
1031
1032	/* not enabled */
1033	if(!infra_ip_ratelimit) {
1034		return 1;
1035	}
1036	/* find or insert ratedata */
1037	entry = infra_find_ip_ratedata(infra, repinfo, 1);
1038	if(entry) {
1039		int premax = infra_rate_max(entry->data, timenow);
1040		int* cur = infra_rate_find_second(entry->data, timenow);
1041		(*cur)++;
1042		max = infra_rate_max(entry->data, timenow);
1043		lock_rw_unlock(&entry->lock);
1044
1045		if(premax < infra_ip_ratelimit && max >= infra_ip_ratelimit) {
1046			char client_ip[128], qnm[LDNS_MAX_DOMAINLEN+1+12+12];
1047			addr_to_str((struct sockaddr_storage *)&repinfo->addr,
1048				repinfo->addrlen, client_ip, sizeof(client_ip));
1049			qnm[0]=0;
1050			if(sldns_buffer_limit(buffer)>LDNS_HEADER_SIZE &&
1051				LDNS_QDCOUNT(sldns_buffer_begin(buffer))!=0) {
1052				(void)sldns_wire2str_rrquestion_buf(
1053					sldns_buffer_at(buffer, LDNS_HEADER_SIZE),
1054					sldns_buffer_limit(buffer)-LDNS_HEADER_SIZE,
1055					qnm, sizeof(qnm));
1056				if(strlen(qnm)>0 && qnm[strlen(qnm)-1]=='\n')
1057					qnm[strlen(qnm)-1] = 0; /*remove newline*/
1058				if(strchr(qnm, '\t'))
1059					*strchr(qnm, '\t') = ' ';
1060				if(strchr(qnm, '\t'))
1061					*strchr(qnm, '\t') = ' ';
1062				verbose(VERB_OPS, "ip_ratelimit exceeded %s %d %s",
1063					client_ip, infra_ip_ratelimit, qnm);
1064			} else {
1065				verbose(VERB_OPS, "ip_ratelimit exceeded %s %d (no query name)",
1066					client_ip, infra_ip_ratelimit);
1067			}
1068		}
1069		return (max <= infra_ip_ratelimit);
1070	}
1071
1072	/* create */
1073	infra_ip_create_ratedata(infra, repinfo, timenow);
1074	return 1;
1075}
1076