1262445Serwin/*
2262445Serwin * Copyright (C) 2013, 2014  Internet Systems Consortium, Inc. ("ISC")
3262445Serwin *
4262445Serwin * Permission to use, copy, modify, and/or distribute this software for any
5262445Serwin * purpose with or without fee is hereby granted, provided that the above
6262445Serwin * copyright notice and this permission notice appear in all copies.
7262445Serwin *
8262445Serwin * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
9262445Serwin * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
10262445Serwin * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
11262445Serwin * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
12262445Serwin * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
13262445Serwin * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
14262445Serwin * PERFORMANCE OF THIS SOFTWARE.
15262445Serwin */
16262445Serwin
17262445Serwin/*! \file */
18262445Serwin
19262445Serwin/*
20262445Serwin * Rate limit DNS responses.
21262445Serwin */
22262445Serwin
23262445Serwin/* #define ISC_LIST_CHECKINIT */
24262445Serwin
25262445Serwin#include <config.h>
26262445Serwin#include <isc/mem.h>
27262445Serwin#include <isc/net.h>
28262445Serwin#include <isc/netaddr.h>
29262445Serwin#include <isc/print.h>
30262445Serwin
31262445Serwin#include <dns/result.h>
32262445Serwin#include <dns/rcode.h>
33262445Serwin#include <dns/rdatatype.h>
34262445Serwin#include <dns/rdataclass.h>
35262445Serwin#include <dns/log.h>
36262445Serwin#include <dns/rrl.h>
37262445Serwin#include <dns/view.h>
38262445Serwin
39262445Serwinstatic void
40262445Serwinlog_end(dns_rrl_t *rrl, dns_rrl_entry_t *e, isc_boolean_t early,
41262445Serwin	char *log_buf, unsigned int log_buf_len);
42262445Serwin
43262445Serwin/*
44262445Serwin * Get a modulus for a hash function that is tolerably likely to be
45262445Serwin * relatively prime to most inputs.  Of course, we get a prime for for initial
46262445Serwin * values not larger than the square of the last prime.  We often get a prime
47262445Serwin * after that.
48262445Serwin * This works well in practice for hash tables up to at least 100
49262445Serwin * times the square of the last prime and better than a multiplicative hash.
50262445Serwin */
51262445Serwinstatic int
52262445Serwinhash_divisor(unsigned int initial) {
53262445Serwin	static isc_uint16_t primes[] = {
54262445Serwin		  3,   5,   7,  11,  13,  17,  19,  23,  29,  31,  37,  41,
55262445Serwin		 43,  47,  53,  59,  61,  67,  71,  73,  79,  83,  89,  97,
56262445Serwin#if 0
57262445Serwin		101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157,
58262445Serwin		163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227,
59262445Serwin		229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283,
60262445Serwin		293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367,
61262445Serwin		373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439,
62262445Serwin		443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509,
63262445Serwin		521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599,
64262445Serwin		601, 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, 661,
65262445Serwin		673, 677, 683, 691, 701, 709, 719, 727, 733, 739, 743, 751,
66262445Serwin		757, 761, 769, 773, 787, 797, 809, 811, 821, 823, 827, 829,
67262445Serwin		839, 853, 857, 859, 863, 877, 881, 883, 887, 907, 911, 919,
68262445Serwin		929, 937, 941, 947, 953, 967, 971, 977, 983, 991, 997,1009,
69262445Serwin#endif
70262445Serwin	};
71262445Serwin	int divisions, tries;
72262445Serwin	unsigned int result;
73262445Serwin	isc_uint16_t *pp, p;
74262445Serwin
75262445Serwin	result = initial;
76262445Serwin
77262445Serwin	if (primes[sizeof(primes)/sizeof(primes[0])-1] >= result) {
78262445Serwin		pp = primes;
79262445Serwin		while (*pp < result)
80262445Serwin			++pp;
81262445Serwin		return (*pp);
82262445Serwin	}
83262445Serwin
84262445Serwin	if ((result & 1) == 0)
85262445Serwin		++result;
86262445Serwin
87262445Serwin	divisions = 0;
88262445Serwin	tries = 1;
89262445Serwin	pp = primes;
90262445Serwin	do {
91262445Serwin		p = *pp++;
92262445Serwin		++divisions;
93262445Serwin		if ((result % p) == 0) {
94262445Serwin			++tries;
95262445Serwin			result += 2;
96262445Serwin			pp = primes;
97262445Serwin		}
98262445Serwin	} while (pp < &primes[sizeof(primes) / sizeof(primes[0])]);
99262445Serwin
100262445Serwin	if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3))
101262445Serwin		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
102262445Serwin			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG3,
103262445Serwin			      "%d hash_divisor() divisions in %d tries"
104262445Serwin			      " to get %d from %d",
105262445Serwin			      divisions, tries, result, initial);
106262445Serwin
107262445Serwin	return (result);
108262445Serwin}
109262445Serwin
110262445Serwin/*
111262445Serwin * Convert a timestamp to a number of seconds in the past.
112262445Serwin */
113262445Serwinstatic inline int
114262445Serwindelta_rrl_time(isc_stdtime_t ts, isc_stdtime_t now) {
115262445Serwin	int delta;
116262445Serwin
117262445Serwin	delta = now - ts;
118262445Serwin	if (delta >= 0)
119262445Serwin		return (delta);
120262445Serwin
121262445Serwin	/*
122262445Serwin	 * The timestamp is in the future.  That future might result from
123262445Serwin	 * re-ordered requests, because we use timestamps on requests
124262445Serwin	 * instead of consulting a clock.  Timestamps in the distant future are
125262445Serwin	 * assumed to result from clock changes.  When the clock changes to
126262445Serwin	 * the past, make existing timestamps appear to be in the past.
127262445Serwin	 */
128262445Serwin	if (delta < -DNS_RRL_MAX_TIME_TRAVEL)
129262445Serwin		return (DNS_RRL_FOREVER);
130262445Serwin	return (0);
131262445Serwin}
132262445Serwin
133262445Serwinstatic inline int
134262445Serwinget_age(const dns_rrl_t *rrl, const dns_rrl_entry_t *e, isc_stdtime_t now) {
135262445Serwin	if (!e->ts_valid)
136262445Serwin		return (DNS_RRL_FOREVER);
137262445Serwin	return (delta_rrl_time(e->ts + rrl->ts_bases[e->ts_gen], now));
138262445Serwin}
139262445Serwin
140262445Serwinstatic inline void
141262445Serwinset_age(dns_rrl_t *rrl, dns_rrl_entry_t *e, isc_stdtime_t now) {
142262445Serwin	dns_rrl_entry_t *e_old;
143262445Serwin	unsigned int ts_gen;
144262445Serwin	int i, ts;
145262445Serwin
146262445Serwin	ts_gen = rrl->ts_gen;
147262445Serwin	ts = now - rrl->ts_bases[ts_gen];
148262445Serwin	if (ts < 0) {
149262445Serwin		if (ts < -DNS_RRL_MAX_TIME_TRAVEL)
150262445Serwin			ts = DNS_RRL_FOREVER;
151262445Serwin		else
152262445Serwin			ts = 0;
153262445Serwin	}
154262445Serwin
155262445Serwin	/*
156262445Serwin	 * Make a new timestamp base if the current base is too old.
157262445Serwin	 * All entries older than DNS_RRL_MAX_WINDOW seconds are ancient,
158262445Serwin	 * useless history.  Their timestamps can be treated as if they are
159262445Serwin	 * all the same.
160262445Serwin	 * We only do arithmetic on more recent timestamps, so bases for
161262445Serwin	 * older timestamps can be recycled provided the old timestamps are
162262445Serwin	 * marked as ancient history.
163262445Serwin	 * This loop is almost always very short because most entries are
164262445Serwin	 * recycled after one second and any entries that need to be marked
165262445Serwin	 * are older than (DNS_RRL_TS_BASES)*DNS_RRL_MAX_TS seconds.
166262445Serwin	 */
167262445Serwin	if (ts >= DNS_RRL_MAX_TS) {
168262445Serwin		ts_gen = (ts_gen + 1) % DNS_RRL_TS_BASES;
169262445Serwin		for (e_old = ISC_LIST_TAIL(rrl->lru), i = 0;
170262445Serwin		     e_old != NULL && (e_old->ts_gen == ts_gen ||
171262445Serwin				       !ISC_LINK_LINKED(e_old, hlink));
172262445Serwin		     e_old = ISC_LIST_PREV(e_old, lru), ++i)
173262445Serwin		{
174262445Serwin			e_old->ts_valid = ISC_FALSE;
175262445Serwin		}
176262445Serwin		if (i != 0)
177262445Serwin			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
178262445Serwin				      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG1,
179262445Serwin				      "rrl new time base scanned %d entries"
180262445Serwin				      " at %d for %d %d %d %d",
181262445Serwin				      i, now, rrl->ts_bases[ts_gen],
182262445Serwin				      rrl->ts_bases[(ts_gen + 1) %
183262445Serwin					DNS_RRL_TS_BASES],
184262445Serwin				      rrl->ts_bases[(ts_gen + 2) %
185262445Serwin					DNS_RRL_TS_BASES],
186262445Serwin				      rrl->ts_bases[(ts_gen + 3) %
187262445Serwin					DNS_RRL_TS_BASES]);
188262445Serwin		rrl->ts_gen = ts_gen;
189262445Serwin		rrl->ts_bases[ts_gen] = now;
190262445Serwin		ts = 0;
191262445Serwin	}
192262445Serwin
193262445Serwin	e->ts_gen = ts_gen;
194262445Serwin	e->ts = ts;
195262445Serwin	e->ts_valid = ISC_TRUE;
196262445Serwin}
197262445Serwin
198262445Serwinstatic isc_result_t
199262445Serwinexpand_entries(dns_rrl_t *rrl, int new) {
200262445Serwin	unsigned int bsize;
201262445Serwin	dns_rrl_block_t *b;
202262445Serwin	dns_rrl_entry_t *e;
203262445Serwin	double rate;
204262445Serwin	int i;
205262445Serwin
206262445Serwin	if (rrl->num_entries + new >= rrl->max_entries &&
207262445Serwin	    rrl->max_entries != 0)
208262445Serwin	{
209262445Serwin		new = rrl->max_entries - rrl->num_entries;
210262445Serwin		if (new <= 0)
211262445Serwin			return (ISC_R_SUCCESS);
212262445Serwin	}
213262445Serwin
214262445Serwin	/*
215262445Serwin	 * Log expansions so that the user can tune max-table-size
216262445Serwin	 * and min-table-size.
217262445Serwin	 */
218262445Serwin	if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DROP) &&
219262445Serwin	    rrl->hash != NULL) {
220262445Serwin		rate = rrl->probes;
221262445Serwin		if (rrl->searches != 0)
222262445Serwin			rate /= rrl->searches;
223262445Serwin		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
224262445Serwin			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP,
225262445Serwin			      "increase from %d to %d RRL entries with"
226262445Serwin			      " %d bins; average search length %.1f",
227262445Serwin			      rrl->num_entries, rrl->num_entries+new,
228262445Serwin			      rrl->hash->length, rate);
229262445Serwin	}
230262445Serwin
231262445Serwin	bsize = sizeof(dns_rrl_block_t) + (new-1)*sizeof(dns_rrl_entry_t);
232262445Serwin	b = isc_mem_get(rrl->mctx, bsize);
233262445Serwin	if (b == NULL) {
234262445Serwin		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
235262445Serwin			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_FAIL,
236262445Serwin			      "isc_mem_get(%d) failed for RRL entries",
237262445Serwin			      bsize);
238262445Serwin		return (ISC_R_NOMEMORY);
239262445Serwin	}
240262445Serwin	memset(b, 0, bsize);
241262445Serwin	b->size = bsize;
242262445Serwin
243262445Serwin	e = b->entries;
244262445Serwin	for (i = 0; i < new; ++i, ++e) {
245262445Serwin		ISC_LINK_INIT(e, hlink);
246262445Serwin		ISC_LIST_INITANDAPPEND(rrl->lru, e, lru);
247262445Serwin	}
248262445Serwin	rrl->num_entries += new;
249262445Serwin	ISC_LIST_INITANDAPPEND(rrl->blocks, b, link);
250262445Serwin
251262445Serwin	return (ISC_R_SUCCESS);
252262445Serwin}
253262445Serwin
254262445Serwinstatic inline dns_rrl_bin_t *
255262445Serwinget_bin(dns_rrl_hash_t *hash, unsigned int hval) {
256262445Serwin	return (&hash->bins[hval % hash->length]);
257262445Serwin}
258262445Serwin
259262445Serwinstatic void
260262445Serwinfree_old_hash(dns_rrl_t *rrl) {
261262445Serwin	dns_rrl_hash_t *old_hash;
262262445Serwin	dns_rrl_bin_t *old_bin;
263262445Serwin	dns_rrl_entry_t *e, *e_next;
264262445Serwin
265262445Serwin	old_hash = rrl->old_hash;
266262445Serwin	for (old_bin = &old_hash->bins[0];
267262445Serwin	     old_bin < &old_hash->bins[old_hash->length];
268262445Serwin	     ++old_bin)
269262445Serwin	{
270262445Serwin		for (e = ISC_LIST_HEAD(*old_bin); e != NULL; e = e_next) {
271262445Serwin			e_next = ISC_LIST_NEXT(e, hlink);
272262445Serwin			ISC_LINK_INIT(e, hlink);
273262445Serwin		}
274262445Serwin	}
275262445Serwin
276262445Serwin	isc_mem_put(rrl->mctx, old_hash,
277262445Serwin		    sizeof(*old_hash)
278262445Serwin		      + (old_hash->length - 1) * sizeof(old_hash->bins[0]));
279262445Serwin	rrl->old_hash = NULL;
280262445Serwin}
281262445Serwin
282262445Serwinstatic isc_result_t
283262445Serwinexpand_rrl_hash(dns_rrl_t *rrl, isc_stdtime_t now) {
284262445Serwin	dns_rrl_hash_t *hash;
285262445Serwin	int old_bins, new_bins, hsize;
286262445Serwin	double rate;
287262445Serwin
288262445Serwin	if (rrl->old_hash != NULL)
289262445Serwin		free_old_hash(rrl);
290262445Serwin
291262445Serwin	/*
292262445Serwin	 * Most searches fail and so go to the end of the chain.
293262445Serwin	 * Use a small hash table load factor.
294262445Serwin	 */
295262445Serwin	old_bins = (rrl->hash == NULL) ? 0 : rrl->hash->length;
296262445Serwin	new_bins = old_bins/8 + old_bins;
297262445Serwin	if (new_bins < rrl->num_entries)
298262445Serwin		new_bins = rrl->num_entries;
299262445Serwin	new_bins = hash_divisor(new_bins);
300262445Serwin
301262445Serwin	hsize = sizeof(dns_rrl_hash_t) + (new_bins-1)*sizeof(hash->bins[0]);
302262445Serwin	hash = isc_mem_get(rrl->mctx, hsize);
303262445Serwin	if (hash == NULL) {
304262445Serwin		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
305262445Serwin			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_FAIL,
306262445Serwin			      "isc_mem_get(%d) failed for"
307262445Serwin			      " RRL hash table",
308262445Serwin			      hsize);
309262445Serwin		return (ISC_R_NOMEMORY);
310262445Serwin	}
311262445Serwin	memset(hash, 0, hsize);
312262445Serwin	hash->length = new_bins;
313262445Serwin	rrl->hash_gen ^= 1;
314262445Serwin	hash->gen = rrl->hash_gen;
315262445Serwin
316262445Serwin	if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DROP) && old_bins != 0) {
317262445Serwin		rate = rrl->probes;
318262445Serwin		if (rrl->searches != 0)
319262445Serwin			rate /= rrl->searches;
320262445Serwin		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
321262445Serwin			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP,
322262445Serwin			      "increase from %d to %d RRL bins for"
323262445Serwin			      " %d entries; average search length %.1f",
324262445Serwin			      old_bins, new_bins, rrl->num_entries, rate);
325262445Serwin	}
326262445Serwin
327262445Serwin	rrl->old_hash = rrl->hash;
328262445Serwin	if (rrl->old_hash != NULL)
329262445Serwin		rrl->old_hash->check_time = now;
330262445Serwin	rrl->hash = hash;
331262445Serwin
332262445Serwin	return (ISC_R_SUCCESS);
333262445Serwin}
334262445Serwin
335262445Serwinstatic void
336262445Serwinref_entry(dns_rrl_t *rrl, dns_rrl_entry_t *e, int probes, isc_stdtime_t now) {
337262445Serwin	/*
338262445Serwin	 * Make the entry most recently used.
339262445Serwin	 */
340262445Serwin	if (ISC_LIST_HEAD(rrl->lru) != e) {
341262445Serwin		if (e == rrl->last_logged)
342262445Serwin			rrl->last_logged = ISC_LIST_PREV(e, lru);
343262445Serwin		ISC_LIST_UNLINK(rrl->lru, e, lru);
344262445Serwin		ISC_LIST_PREPEND(rrl->lru, e, lru);
345262445Serwin	}
346262445Serwin
347262445Serwin	/*
348262445Serwin	 * Expand the hash table if it is time and necessary.
349262445Serwin	 * This will leave the newly referenced entry in a chain in the
350262445Serwin	 * old hash table.  It will migrate to the new hash table the next
351262445Serwin	 * time it is used or be cut loose when the old hash table is destroyed.
352262445Serwin	 */
353262445Serwin	rrl->probes += probes;
354262445Serwin	++rrl->searches;
355262445Serwin	if (rrl->searches > 100 &&
356262445Serwin	    delta_rrl_time(rrl->hash->check_time, now) > 1) {
357262445Serwin		if (rrl->probes/rrl->searches > 2)
358262445Serwin			expand_rrl_hash(rrl, now);
359262445Serwin		rrl->hash->check_time = now;
360262445Serwin		rrl->probes = 0;
361262445Serwin		rrl->searches = 0;
362262445Serwin	}
363262445Serwin}
364262445Serwin
365262445Serwinstatic inline isc_boolean_t
366262445Serwinkey_cmp(const dns_rrl_key_t *a, const dns_rrl_key_t *b) {
367262445Serwin	if (memcmp(a, b, sizeof(dns_rrl_key_t)) == 0)
368262445Serwin		return (ISC_TRUE);
369262445Serwin	return (ISC_FALSE);
370262445Serwin}
371262445Serwin
372262445Serwinstatic inline isc_uint32_t
373262445Serwinhash_key(const dns_rrl_key_t *key) {
374262445Serwin	isc_uint32_t hval;
375262445Serwin	int i;
376262445Serwin
377262445Serwin	hval = key->w[0];
378262445Serwin	for (i = sizeof(*key) / sizeof(key->w[0]) - 1; i >= 0; --i) {
379262445Serwin		hval = key->w[i] + (hval<<1);
380262445Serwin	}
381262445Serwin	return (hval);
382262445Serwin}
383262445Serwin
384262445Serwin/*
385262445Serwin * Construct the hash table key.
386262445Serwin * Use a hash of the DNS query name to save space in the database.
387262445Serwin * Collisions result in legitimate rate limiting responses for one
388262445Serwin * query name also limiting responses for other names to the
389262445Serwin * same client.  This is rare and benign enough given the large
390262445Serwin * space costs compared to keeping the entire name in the database
391262445Serwin * entry or the time costs of dynamic allocation.
392262445Serwin */
393262445Serwinstatic void
394262445Serwinmake_key(const dns_rrl_t *rrl, dns_rrl_key_t *key,
395262445Serwin	 const isc_sockaddr_t *client_addr,
396262445Serwin	 dns_rdatatype_t qtype, dns_name_t *qname, dns_rdataclass_t qclass,
397262445Serwin	 dns_rrl_rtype_t rtype)
398262445Serwin{
399262445Serwin	dns_name_t base;
400262445Serwin	dns_offsets_t base_offsets;
401262445Serwin	int labels, i;
402262445Serwin
403262445Serwin	memset(key, 0, sizeof(*key));
404262445Serwin
405262445Serwin	key->s.rtype = rtype;
406262445Serwin	if (rtype == DNS_RRL_RTYPE_QUERY) {
407262445Serwin		key->s.qtype = qtype;
408262445Serwin		key->s.qclass = qclass & 0xff;
409262445Serwin	} else if (rtype == DNS_RRL_RTYPE_REFERRAL ||
410262445Serwin		   rtype == DNS_RRL_RTYPE_NODATA) {
411262445Serwin		/*
412262445Serwin		 * Because there is no qtype in the empty answer sections of
413262445Serwin		 * referral and NODATA responses, count them as the same.
414262445Serwin		 */
415262445Serwin		key->s.qclass = qclass & 0xff;
416262445Serwin	}
417262445Serwin
418262445Serwin	if (qname != NULL && qname->labels != 0) {
419262445Serwin		/*
420262445Serwin		 * Ignore the first label of wildcards.
421262445Serwin		 */
422262445Serwin		if ((qname->attributes & DNS_NAMEATTR_WILDCARD) != 0 &&
423262445Serwin		    (labels = dns_name_countlabels(qname)) > 1)
424262445Serwin		{
425262445Serwin			dns_name_init(&base, base_offsets);
426262445Serwin			dns_name_getlabelsequence(qname, 1, labels-1, &base);
427262445Serwin			key->s.qname_hash = dns_name_hashbylabel(&base,
428262445Serwin							ISC_FALSE);
429262445Serwin		} else {
430262445Serwin			key->s.qname_hash = dns_name_hashbylabel(qname,
431262445Serwin							ISC_FALSE);
432262445Serwin		}
433262445Serwin	}
434262445Serwin
435262445Serwin	switch (client_addr->type.sa.sa_family) {
436262445Serwin	case AF_INET:
437262445Serwin		key->s.ip[0] = (client_addr->type.sin.sin_addr.s_addr &
438262445Serwin			      rrl->ipv4_mask);
439262445Serwin		break;
440262445Serwin	case AF_INET6:
441262445Serwin		key->s.ipv6 = ISC_TRUE;
442262445Serwin		memmove(key->s.ip, &client_addr->type.sin6.sin6_addr,
443262445Serwin			sizeof(key->s.ip));
444262445Serwin		for (i = 0; i < DNS_RRL_MAX_PREFIX/32; ++i)
445262445Serwin			key->s.ip[i] &= rrl->ipv6_mask[i];
446262445Serwin		break;
447262445Serwin	}
448262445Serwin}
449262445Serwin
450262445Serwinstatic inline dns_rrl_rate_t *
451262445Serwinget_rate(dns_rrl_t *rrl, dns_rrl_rtype_t rtype) {
452262445Serwin	switch (rtype) {
453262445Serwin	case DNS_RRL_RTYPE_QUERY:
454262445Serwin		return (&rrl->responses_per_second);
455262445Serwin	case DNS_RRL_RTYPE_REFERRAL:
456262445Serwin		return (&rrl->referrals_per_second);
457262445Serwin	case DNS_RRL_RTYPE_NODATA:
458262445Serwin		return (&rrl->nodata_per_second);
459262445Serwin	case DNS_RRL_RTYPE_NXDOMAIN:
460262445Serwin		return (&rrl->nxdomains_per_second);
461262445Serwin	case DNS_RRL_RTYPE_ERROR:
462262445Serwin		return (&rrl->errors_per_second);
463262445Serwin	case DNS_RRL_RTYPE_ALL:
464262445Serwin		return (&rrl->all_per_second);
465262445Serwin	default:
466262445Serwin		INSIST(0);
467262445Serwin	}
468262445Serwin	return (NULL);
469262445Serwin}
470262445Serwin
471262445Serwinstatic int
472262445Serwinresponse_balance(dns_rrl_t *rrl, const dns_rrl_entry_t *e, int age) {
473262445Serwin	dns_rrl_rate_t *ratep;
474262445Serwin	int balance, rate;
475262445Serwin
476262445Serwin	if (e->key.s.rtype == DNS_RRL_RTYPE_TCP) {
477262445Serwin		rate = 1;
478262445Serwin	} else {
479262445Serwin		ratep = get_rate(rrl, e->key.s.rtype);
480262445Serwin		rate = ratep->scaled;
481262445Serwin	}
482262445Serwin
483262445Serwin	balance = e->responses + age * rate;
484262445Serwin	if (balance > rate)
485262445Serwin		balance = rate;
486262445Serwin	return (balance);
487262445Serwin}
488262445Serwin
489262445Serwin/*
490262445Serwin * Search for an entry for a response and optionally create it.
491262445Serwin */
492262445Serwinstatic dns_rrl_entry_t *
493262445Serwinget_entry(dns_rrl_t *rrl, const isc_sockaddr_t *client_addr,
494262445Serwin	  dns_rdataclass_t qclass, dns_rdatatype_t qtype, dns_name_t *qname,
495262445Serwin	  dns_rrl_rtype_t rtype, isc_stdtime_t now, isc_boolean_t create,
496262445Serwin	  char *log_buf, unsigned int log_buf_len)
497262445Serwin{
498262445Serwin	dns_rrl_key_t key;
499262445Serwin	isc_uint32_t hval;
500262445Serwin	dns_rrl_entry_t *e;
501262445Serwin	dns_rrl_hash_t *hash;
502262445Serwin	dns_rrl_bin_t *new_bin, *old_bin;
503262445Serwin	int probes, age;
504262445Serwin
505262445Serwin	make_key(rrl, &key, client_addr, qtype, qname, qclass, rtype);
506262445Serwin	hval = hash_key(&key);
507262445Serwin
508262445Serwin	/*
509262445Serwin	 * Look for the entry in the current hash table.
510262445Serwin	 */
511262445Serwin	new_bin = get_bin(rrl->hash, hval);
512262445Serwin	probes = 1;
513262445Serwin	e = ISC_LIST_HEAD(*new_bin);
514262445Serwin	while (e != NULL) {
515262445Serwin		if (key_cmp(&e->key, &key)) {
516262445Serwin			ref_entry(rrl, e, probes, now);
517262445Serwin			return (e);
518262445Serwin		}
519262445Serwin		++probes;
520262445Serwin		e = ISC_LIST_NEXT(e, hlink);
521262445Serwin	}
522262445Serwin
523262445Serwin	/*
524262445Serwin	 * Look in the old hash table.
525262445Serwin	 */
526262445Serwin	if (rrl->old_hash != NULL) {
527262445Serwin		old_bin = get_bin(rrl->old_hash, hval);
528262445Serwin		e = ISC_LIST_HEAD(*old_bin);
529262445Serwin		while (e != NULL) {
530262445Serwin			if (key_cmp(&e->key, &key)) {
531262445Serwin				ISC_LIST_UNLINK(*old_bin, e, hlink);
532262445Serwin				ISC_LIST_PREPEND(*new_bin, e, hlink);
533262445Serwin				e->hash_gen = rrl->hash_gen;
534262445Serwin				ref_entry(rrl, e, probes, now);
535262445Serwin				return (e);
536262445Serwin			}
537262445Serwin			e = ISC_LIST_NEXT(e, hlink);
538262445Serwin		}
539262445Serwin
540262445Serwin		/*
541262445Serwin		 * Discard prevous hash table when all of its entries are old.
542262445Serwin		 */
543262445Serwin		age = delta_rrl_time(rrl->old_hash->check_time, now);
544262445Serwin		if (age > rrl->window)
545262445Serwin			free_old_hash(rrl);
546262445Serwin	}
547262445Serwin
548262445Serwin	if (!create)
549262445Serwin		return (NULL);
550262445Serwin
551262445Serwin	/*
552262445Serwin	 * The entry does not exist, so create it by finding a free entry.
553262445Serwin	 * Keep currently penalized and logged entries.
554262445Serwin	 * Try to make more entries if none are idle.
555262445Serwin	 * Steal the oldest entry if we cannot create more.
556262445Serwin	 */
557262445Serwin	for (e = ISC_LIST_TAIL(rrl->lru);
558262445Serwin	     e != NULL;
559262445Serwin	     e = ISC_LIST_PREV(e, lru))
560262445Serwin	{
561262445Serwin		if (!ISC_LINK_LINKED(e, hlink))
562262445Serwin			break;
563262445Serwin		age = get_age(rrl, e, now);
564262445Serwin		if (age <= 1) {
565262445Serwin			e = NULL;
566262445Serwin			break;
567262445Serwin		}
568262445Serwin		if (!e->logged && response_balance(rrl, e, age) > 0)
569262445Serwin			break;
570262445Serwin	}
571262445Serwin	if (e == NULL) {
572262445Serwin		expand_entries(rrl, ISC_MIN((rrl->num_entries+1)/2, 1000));
573262445Serwin		e = ISC_LIST_TAIL(rrl->lru);
574262445Serwin	}
575262445Serwin	if (e->logged)
576262445Serwin		log_end(rrl, e, ISC_TRUE, log_buf, log_buf_len);
577262445Serwin	if (ISC_LINK_LINKED(e, hlink)) {
578262445Serwin		if (e->hash_gen == rrl->hash_gen)
579262445Serwin			hash = rrl->hash;
580262445Serwin		else
581262445Serwin			hash = rrl->old_hash;
582262445Serwin		old_bin = get_bin(hash, hash_key(&e->key));
583262445Serwin		ISC_LIST_UNLINK(*old_bin, e, hlink);
584262445Serwin	}
585262445Serwin	ISC_LIST_PREPEND(*new_bin, e, hlink);
586262445Serwin	e->hash_gen = rrl->hash_gen;
587262445Serwin	e->key = key;
588262445Serwin	e->ts_valid = ISC_FALSE;
589262445Serwin	ref_entry(rrl, e, probes, now);
590262445Serwin	return (e);
591262445Serwin}
592262445Serwin
593262445Serwinstatic void
594262445Serwindebit_log(const dns_rrl_entry_t *e, int age, const char *action) {
595262445Serwin	char buf[sizeof("age=12345678")];
596262445Serwin	const char *age_str;
597262445Serwin
598262445Serwin	if (age == DNS_RRL_FOREVER) {
599262445Serwin		age_str = "";
600262445Serwin	} else {
601262445Serwin		snprintf(buf, sizeof(buf), "age=%d", age);
602262445Serwin		age_str = buf;
603262445Serwin	}
604262445Serwin	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
605262445Serwin		      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG3,
606262445Serwin		      "rrl %08x %6s  responses=%-3d %s",
607262445Serwin		      hash_key(&e->key), age_str, e->responses, action);
608262445Serwin}
609262445Serwin
610262445Serwinstatic inline dns_rrl_result_t
611262445Serwindebit_rrl_entry(dns_rrl_t *rrl, dns_rrl_entry_t *e, double qps, double scale,
612262445Serwin		const isc_sockaddr_t *client_addr, isc_stdtime_t now,
613262445Serwin		char *log_buf, unsigned int log_buf_len)
614262445Serwin{
615262445Serwin	int rate, new_rate, slip, new_slip, age, log_secs, min;
616262445Serwin	dns_rrl_rate_t *ratep;
617262445Serwin	dns_rrl_entry_t const *credit_e;
618262445Serwin
619262445Serwin	/*
620262445Serwin	 * Pick the rate counter.
621262445Serwin	 * Optionally adjust the rate by the estimated query/second rate.
622262445Serwin	 */
623262445Serwin	ratep = get_rate(rrl, e->key.s.rtype);
624262445Serwin	rate = ratep->r;
625262445Serwin	if (rate == 0)
626262445Serwin		return (DNS_RRL_RESULT_OK);
627262445Serwin
628262445Serwin	if (scale < 1.0) {
629262445Serwin		/*
630262445Serwin		 * The limit for clients that have used TCP is not scaled.
631262445Serwin		 */
632262445Serwin		credit_e = get_entry(rrl, client_addr,
633262445Serwin				     0, dns_rdatatype_none, NULL,
634262445Serwin				     DNS_RRL_RTYPE_TCP, now, ISC_FALSE,
635262445Serwin				     log_buf, log_buf_len);
636262445Serwin		if (credit_e != NULL) {
637262445Serwin			age = get_age(rrl, e, now);
638262445Serwin			if (age < rrl->window)
639262445Serwin				scale = 1.0;
640262445Serwin		}
641262445Serwin	}
642262445Serwin	if (scale < 1.0) {
643262445Serwin		new_rate = (int) (rate * scale);
644262445Serwin		if (new_rate < 1)
645262445Serwin			new_rate = 1;
646262445Serwin		if (ratep->scaled != new_rate) {
647262445Serwin			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
648262445Serwin				      DNS_LOGMODULE_REQUEST,
649262445Serwin				      DNS_RRL_LOG_DEBUG1,
650262445Serwin				      "%d qps scaled %s by %.2f"
651262445Serwin				      " from %d to %d",
652262445Serwin				      (int)qps, ratep->str, scale,
653262445Serwin				      rate, new_rate);
654262445Serwin			rate = new_rate;
655262445Serwin			ratep->scaled = rate;
656262445Serwin		}
657262445Serwin	}
658262445Serwin
659262445Serwin	min = -rrl->window * rate;
660262445Serwin
661262445Serwin	/*
662262445Serwin	 * Treat time jumps into the recent past as no time.
663262445Serwin	 * Treat entries older than the window as if they were just created
664262445Serwin	 * Credit other entries.
665262445Serwin	 */
666262445Serwin	age = get_age(rrl, e, now);
667262445Serwin	if (age > 0) {
668262445Serwin		/*
669262445Serwin		 * Credit tokens earned during elapsed time.
670262445Serwin		 */
671262445Serwin		if (age > rrl->window) {
672262445Serwin			e->responses = rate;
673262445Serwin			e->slip_cnt = 0;
674262445Serwin		} else {
675262445Serwin			e->responses += rate*age;
676262445Serwin			if (e->responses > rate) {
677262445Serwin				e->responses = rate;
678262445Serwin				e->slip_cnt = 0;
679262445Serwin			}
680262445Serwin		}
681262445Serwin		/*
682262445Serwin		 * Find the seconds since last log message without overflowing
683262445Serwin		 * small counter.  This counter is reset when an entry is
684262445Serwin		 * created.  It is not necessarily reset when some requests
685262445Serwin		 * are answered provided other requests continue to be dropped
686262445Serwin		 * or slipped.  This can happen when the request rate is just
687262445Serwin		 * at the limit.
688262445Serwin		 */
689262445Serwin		if (e->logged) {
690262445Serwin			log_secs = e->log_secs;
691262445Serwin			log_secs += age;
692262445Serwin			if (log_secs > DNS_RRL_MAX_LOG_SECS || log_secs < 0)
693262445Serwin				log_secs = DNS_RRL_MAX_LOG_SECS;
694262445Serwin			e->log_secs = log_secs;
695262445Serwin		}
696262445Serwin	}
697262445Serwin	set_age(rrl, e, now);
698262445Serwin
699262445Serwin	/*
700262445Serwin	 * Debit the entry for this response.
701262445Serwin	 */
702262445Serwin	if (--e->responses >= 0) {
703262445Serwin		if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3))
704262445Serwin			debit_log(e, age, "");
705262445Serwin		return (DNS_RRL_RESULT_OK);
706262445Serwin	}
707262445Serwin
708262445Serwin	if (e->responses < min)
709262445Serwin		e->responses = min;
710262445Serwin
711262445Serwin	/*
712262445Serwin	 * Drop this response unless it should slip or leak.
713262445Serwin	 */
714262445Serwin	slip = rrl->slip.r;
715262445Serwin	if (slip > 2 && scale < 1.0) {
716262445Serwin		new_slip = (int) (slip * scale);
717262445Serwin		if (new_slip < 2)
718262445Serwin			new_slip = 2;
719262445Serwin		if (rrl->slip.scaled != new_slip) {
720262445Serwin			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
721262445Serwin				      DNS_LOGMODULE_REQUEST,
722262445Serwin				      DNS_RRL_LOG_DEBUG1,
723262445Serwin				      "%d qps scaled slip"
724262445Serwin				      " by %.2f from %d to %d",
725262445Serwin				      (int)qps, scale,
726262445Serwin				      slip, new_slip);
727262445Serwin			slip = new_slip;
728262445Serwin			rrl->slip.scaled = slip;
729262445Serwin		}
730262445Serwin	}
731262445Serwin	if (slip != 0 && e->key.s.rtype != DNS_RRL_RTYPE_ALL) {
732262445Serwin		if (e->slip_cnt++ == 0) {
733262445Serwin			if ((int) e->slip_cnt >= slip)
734262445Serwin				e->slip_cnt = 0;
735262445Serwin			if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3))
736262445Serwin				debit_log(e, age, "slip");
737262445Serwin			return (DNS_RRL_RESULT_SLIP);
738262445Serwin		} else if ((int) e->slip_cnt >= slip) {
739262445Serwin			e->slip_cnt = 0;
740262445Serwin		}
741262445Serwin	}
742262445Serwin
743262445Serwin	if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3))
744262445Serwin		debit_log(e, age, "drop");
745262445Serwin	return (DNS_RRL_RESULT_DROP);
746262445Serwin}
747262445Serwin
748262445Serwinstatic inline dns_rrl_qname_buf_t *
749262445Serwinget_qname(dns_rrl_t *rrl, const dns_rrl_entry_t *e) {
750262445Serwin	dns_rrl_qname_buf_t *qbuf;
751262445Serwin
752262445Serwin	qbuf = rrl->qnames[e->log_qname];
753262445Serwin	if (qbuf == NULL || qbuf->e != e)
754262445Serwin		return (NULL);
755262445Serwin	return (qbuf);
756262445Serwin}
757262445Serwin
758262445Serwinstatic inline void
759262445Serwinfree_qname(dns_rrl_t *rrl, dns_rrl_entry_t *e) {
760262445Serwin	dns_rrl_qname_buf_t *qbuf;
761262445Serwin
762262445Serwin	qbuf = get_qname(rrl, e);
763262445Serwin	if (qbuf != NULL) {
764262445Serwin		qbuf->e = NULL;
765262445Serwin		ISC_LIST_APPEND(rrl->qname_free, qbuf, link);
766262445Serwin	}
767262445Serwin}
768262445Serwin
769262445Serwinstatic void
770262445Serwinadd_log_str(isc_buffer_t *lb, const char *str, unsigned int str_len) {
771262445Serwin	isc_region_t region;
772262445Serwin
773262445Serwin	isc_buffer_availableregion(lb, &region);
774262445Serwin	if (str_len >= region.length) {
775262445Serwin		if (region.length <= 0)
776262445Serwin			return;
777262445Serwin		str_len = region.length;
778262445Serwin	}
779262445Serwin	memmove(region.base, str, str_len);
780262445Serwin	isc_buffer_add(lb, str_len);
781262445Serwin}
782262445Serwin
783262445Serwin#define ADD_LOG_CSTR(eb, s) add_log_str(eb, s, sizeof(s)-1)
784262445Serwin
785262445Serwin/*
786262445Serwin * Build strings for the logs
787262445Serwin */
788262445Serwinstatic void
789262445Serwinmake_log_buf(dns_rrl_t *rrl, dns_rrl_entry_t *e,
790262445Serwin	     const char *str1, const char *str2, isc_boolean_t plural,
791262445Serwin	     dns_name_t *qname, isc_boolean_t save_qname,
792262445Serwin	     dns_rrl_result_t rrl_result, isc_result_t resp_result,
793262445Serwin	     char *log_buf, unsigned int log_buf_len)
794262445Serwin{
795262445Serwin	isc_buffer_t lb;
796262445Serwin	dns_rrl_qname_buf_t *qbuf;
797262445Serwin	isc_netaddr_t cidr;
798262445Serwin	char strbuf[ISC_MAX(sizeof("/123"), sizeof("  (12345678)"))];
799262445Serwin	const char *rstr;
800262445Serwin	isc_result_t msg_result;
801262445Serwin
802262445Serwin	if (log_buf_len <= 1) {
803262445Serwin		if (log_buf_len == 1)
804262445Serwin			log_buf[0] = '\0';
805262445Serwin		return;
806262445Serwin	}
807262445Serwin	isc_buffer_init(&lb, log_buf, log_buf_len-1);
808262445Serwin
809262445Serwin	if (str1 != NULL)
810262445Serwin		add_log_str(&lb, str1, strlen(str1));
811262445Serwin	if (str2 != NULL)
812262445Serwin		add_log_str(&lb, str2, strlen(str2));
813262445Serwin
814262445Serwin	switch (rrl_result) {
815262445Serwin	case DNS_RRL_RESULT_OK:
816262445Serwin		break;
817262445Serwin	case DNS_RRL_RESULT_DROP:
818262445Serwin		ADD_LOG_CSTR(&lb, "drop ");
819262445Serwin		break;
820262445Serwin	case DNS_RRL_RESULT_SLIP:
821262445Serwin		ADD_LOG_CSTR(&lb, "slip ");
822262445Serwin		break;
823262445Serwin	default:
824262445Serwin		INSIST(0);
825262445Serwin		break;
826262445Serwin	}
827262445Serwin
828262445Serwin	switch (e->key.s.rtype) {
829262445Serwin	case DNS_RRL_RTYPE_QUERY:
830262445Serwin		break;
831262445Serwin	case DNS_RRL_RTYPE_REFERRAL:
832262445Serwin		ADD_LOG_CSTR(&lb, "referral ");
833262445Serwin		break;
834262445Serwin	case DNS_RRL_RTYPE_NODATA:
835262445Serwin		ADD_LOG_CSTR(&lb, "NODATA ");
836262445Serwin		break;
837262445Serwin	case DNS_RRL_RTYPE_NXDOMAIN:
838262445Serwin		ADD_LOG_CSTR(&lb, "NXDOMAIN ");
839262445Serwin		break;
840262445Serwin	case DNS_RRL_RTYPE_ERROR:
841262445Serwin		if (resp_result == ISC_R_SUCCESS) {
842262445Serwin			ADD_LOG_CSTR(&lb, "error ");
843262445Serwin		} else {
844262445Serwin			rstr = isc_result_totext(resp_result);
845262445Serwin			add_log_str(&lb, rstr, strlen(rstr));
846262445Serwin			ADD_LOG_CSTR(&lb, " error ");
847262445Serwin		}
848262445Serwin		break;
849262445Serwin	case DNS_RRL_RTYPE_ALL:
850262445Serwin		ADD_LOG_CSTR(&lb, "all ");
851262445Serwin		break;
852262445Serwin	default:
853262445Serwin		INSIST(0);
854262445Serwin	}
855262445Serwin
856262445Serwin	if (plural)
857262445Serwin		ADD_LOG_CSTR(&lb, "responses to ");
858262445Serwin	else
859262445Serwin		ADD_LOG_CSTR(&lb, "response to ");
860262445Serwin
861262445Serwin	memset(&cidr, 0, sizeof(cidr));
862262445Serwin	if (e->key.s.ipv6) {
863262445Serwin		snprintf(strbuf, sizeof(strbuf), "/%d", rrl->ipv6_prefixlen);
864262445Serwin		cidr.family = AF_INET6;
865262445Serwin		memset(&cidr.type.in6, 0,  sizeof(cidr.type.in6));
866262445Serwin		memmove(&cidr.type.in6, e->key.s.ip, sizeof(e->key.s.ip));
867262445Serwin	} else {
868262445Serwin		snprintf(strbuf, sizeof(strbuf), "/%d", rrl->ipv4_prefixlen);
869262445Serwin		cidr.family = AF_INET;
870262445Serwin		cidr.type.in.s_addr = e->key.s.ip[0];
871262445Serwin	}
872262445Serwin	msg_result = isc_netaddr_totext(&cidr, &lb);
873262445Serwin	if (msg_result != ISC_R_SUCCESS)
874262445Serwin		ADD_LOG_CSTR(&lb, "?");
875262445Serwin	add_log_str(&lb, strbuf, strlen(strbuf));
876262445Serwin
877262445Serwin	if (e->key.s.rtype == DNS_RRL_RTYPE_QUERY ||
878262445Serwin	    e->key.s.rtype == DNS_RRL_RTYPE_REFERRAL ||
879262445Serwin	    e->key.s.rtype == DNS_RRL_RTYPE_NODATA ||
880262445Serwin	    e->key.s.rtype == DNS_RRL_RTYPE_NXDOMAIN) {
881262445Serwin		qbuf = get_qname(rrl, e);
882262445Serwin		if (save_qname && qbuf == NULL &&
883262445Serwin		    qname != NULL && dns_name_isabsolute(qname)) {
884262445Serwin			/*
885262445Serwin			 * Capture the qname for the "stop limiting" message.
886262445Serwin			 */
887262445Serwin			qbuf = ISC_LIST_TAIL(rrl->qname_free);
888262445Serwin			if (qbuf != NULL) {
889262445Serwin				ISC_LIST_UNLINK(rrl->qname_free, qbuf, link);
890262445Serwin			} else if (rrl->num_qnames < DNS_RRL_QNAMES) {
891262445Serwin				qbuf = isc_mem_get(rrl->mctx, sizeof(*qbuf));
892262445Serwin				if (qbuf != NULL) {
893262445Serwin					memset(qbuf, 0, sizeof(*qbuf));
894262445Serwin					ISC_LINK_INIT(qbuf, link);
895262445Serwin					qbuf->index = rrl->num_qnames;
896262445Serwin					rrl->qnames[rrl->num_qnames++] = qbuf;
897262445Serwin				} else {
898262445Serwin					isc_log_write(dns_lctx,
899262445Serwin						      DNS_LOGCATEGORY_RRL,
900262445Serwin						      DNS_LOGMODULE_REQUEST,
901262445Serwin						      DNS_RRL_LOG_FAIL,
902262445Serwin						      "isc_mem_get(%d)"
903262445Serwin						      " failed for RRL qname",
904262445Serwin						      (int)sizeof(*qbuf));
905262445Serwin				}
906262445Serwin			}
907262445Serwin			if (qbuf != NULL) {
908262445Serwin				e->log_qname = qbuf->index;
909262445Serwin				qbuf->e = e;
910262445Serwin				dns_fixedname_init(&qbuf->qname);
911262445Serwin				dns_name_copy(qname,
912262445Serwin					      dns_fixedname_name(&qbuf->qname),
913262445Serwin					      NULL);
914262445Serwin			}
915262445Serwin		}
916262445Serwin		if (qbuf != NULL)
917262445Serwin			qname = dns_fixedname_name(&qbuf->qname);
918262445Serwin		if (qname != NULL) {
919262445Serwin			ADD_LOG_CSTR(&lb, " for ");
920262445Serwin			(void)dns_name_totext(qname, ISC_TRUE, &lb);
921262445Serwin		} else {
922262445Serwin			ADD_LOG_CSTR(&lb, " for (?)");
923262445Serwin		}
924262445Serwin		if (e->key.s.rtype != DNS_RRL_RTYPE_NXDOMAIN) {
925262445Serwin			ADD_LOG_CSTR(&lb, " ");
926262445Serwin			(void)dns_rdataclass_totext(e->key.s.qclass, &lb);
927262445Serwin			if (e->key.s.rtype == DNS_RRL_RTYPE_QUERY) {
928262445Serwin				ADD_LOG_CSTR(&lb, " ");
929262445Serwin				(void)dns_rdatatype_totext(e->key.s.qtype, &lb);
930262445Serwin			}
931262445Serwin		}
932262445Serwin		snprintf(strbuf, sizeof(strbuf), "  (%08x)",
933262445Serwin			 e->key.s.qname_hash);
934262445Serwin		add_log_str(&lb, strbuf, strlen(strbuf));
935262445Serwin	}
936262445Serwin
937262445Serwin	/*
938262445Serwin	 * We saved room for '\0'.
939262445Serwin	 */
940262445Serwin	log_buf[isc_buffer_usedlength(&lb)] = '\0';
941262445Serwin}
942262445Serwin
943262445Serwinstatic void
944262445Serwinlog_end(dns_rrl_t *rrl, dns_rrl_entry_t *e, isc_boolean_t early,
945262445Serwin	char *log_buf, unsigned int log_buf_len)
946262445Serwin{
947262445Serwin	if (e->logged) {
948262445Serwin		make_log_buf(rrl, e,
949262445Serwin			     early ? "*" : NULL,
950262445Serwin			     rrl->log_only ? "would stop limiting "
951262445Serwin					   : "stop limiting ",
952262445Serwin			     ISC_TRUE, NULL, ISC_FALSE,
953262445Serwin			     DNS_RRL_RESULT_OK, ISC_R_SUCCESS,
954262445Serwin			     log_buf, log_buf_len);
955262445Serwin		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
956262445Serwin			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP,
957262445Serwin			      "%s", log_buf);
958262445Serwin		free_qname(rrl, e);
959262445Serwin		e->logged = ISC_FALSE;
960262445Serwin		--rrl->num_logged;
961262445Serwin	}
962262445Serwin}
963262445Serwin
964262445Serwin/*
965262445Serwin * Log messages for streams that have stopped being rate limited.
966262445Serwin */
967262445Serwinstatic void
968262445Serwinlog_stops(dns_rrl_t *rrl, isc_stdtime_t now, int limit,
969262445Serwin	  char *log_buf, unsigned int log_buf_len)
970262445Serwin{
971262445Serwin	dns_rrl_entry_t *e;
972262445Serwin	int age;
973262445Serwin
974262445Serwin	for (e = rrl->last_logged; e != NULL; e = ISC_LIST_PREV(e, lru)) {
975262445Serwin		if (!e->logged)
976262445Serwin			continue;
977262445Serwin		if (now != 0) {
978262445Serwin			age = get_age(rrl, e, now);
979262445Serwin			if (age < DNS_RRL_STOP_LOG_SECS ||
980262445Serwin			    response_balance(rrl, e, age) < 0)
981262445Serwin				break;
982262445Serwin		}
983262445Serwin
984262445Serwin		log_end(rrl, e, now == 0, log_buf, log_buf_len);
985262445Serwin		if (rrl->num_logged <= 0)
986262445Serwin			break;
987262445Serwin
988262445Serwin		/*
989262445Serwin		 * Too many messages could stall real work.
990262445Serwin		 */
991262445Serwin		if (--limit < 0) {
992262445Serwin			rrl->last_logged = ISC_LIST_PREV(e, lru);
993262445Serwin			return;
994262445Serwin		}
995262445Serwin	}
996262445Serwin	if (e == NULL) {
997262445Serwin		INSIST(rrl->num_logged == 0);
998262445Serwin		rrl->log_stops_time = now;
999262445Serwin	}
1000262445Serwin	rrl->last_logged = e;
1001262445Serwin}
1002262445Serwin
1003262445Serwin/*
1004262445Serwin * Main rate limit interface.
1005262445Serwin */
1006262445Serwindns_rrl_result_t
1007262445Serwindns_rrl(dns_view_t *view,
1008262445Serwin	const isc_sockaddr_t *client_addr, isc_boolean_t is_tcp,
1009262445Serwin	dns_rdataclass_t qclass, dns_rdatatype_t qtype,
1010262445Serwin	dns_name_t *qname, isc_result_t resp_result, isc_stdtime_t now,
1011262445Serwin	isc_boolean_t wouldlog, char *log_buf, unsigned int log_buf_len)
1012262445Serwin{
1013262445Serwin	dns_rrl_t *rrl;
1014262445Serwin	dns_rrl_rtype_t rtype;
1015262445Serwin	dns_rrl_entry_t *e;
1016262445Serwin	isc_netaddr_t netclient;
1017262445Serwin	int secs;
1018262445Serwin	double qps, scale;
1019262445Serwin	int exempt_match;
1020262445Serwin	isc_result_t result;
1021262445Serwin	dns_rrl_result_t rrl_result;
1022262445Serwin
1023262445Serwin	INSIST(log_buf != NULL && log_buf_len > 0);
1024262445Serwin
1025262445Serwin	rrl = view->rrl;
1026262445Serwin	if (rrl->exempt != NULL) {
1027262445Serwin		isc_netaddr_fromsockaddr(&netclient, client_addr);
1028262445Serwin		result = dns_acl_match(&netclient, NULL, rrl->exempt,
1029262445Serwin				       &view->aclenv, &exempt_match, NULL);
1030262445Serwin		if (result == ISC_R_SUCCESS && exempt_match > 0)
1031262445Serwin			return (DNS_RRL_RESULT_OK);
1032262445Serwin	}
1033262445Serwin
1034262445Serwin	LOCK(&rrl->lock);
1035262445Serwin
1036262445Serwin	/*
1037262445Serwin	 * Estimate total query per second rate when scaling by qps.
1038262445Serwin	 */
1039262445Serwin	if (rrl->qps_scale == 0) {
1040262445Serwin		qps = 0.0;
1041262445Serwin		scale = 1.0;
1042262445Serwin	} else {
1043262445Serwin		++rrl->qps_responses;
1044262445Serwin		secs = delta_rrl_time(rrl->qps_time, now);
1045262445Serwin		if (secs <= 0) {
1046262445Serwin			qps = rrl->qps;
1047262445Serwin		} else {
1048262445Serwin			qps = (1.0*rrl->qps_responses) / secs;
1049262445Serwin			if (secs >= rrl->window) {
1050262445Serwin				if (isc_log_wouldlog(dns_lctx,
1051262445Serwin						     DNS_RRL_LOG_DEBUG3))
1052262445Serwin					isc_log_write(dns_lctx,
1053262445Serwin						      DNS_LOGCATEGORY_RRL,
1054262445Serwin						      DNS_LOGMODULE_REQUEST,
1055262445Serwin						      DNS_RRL_LOG_DEBUG3,
1056262445Serwin						      "%d responses/%d seconds"
1057262445Serwin						      " = %d qps",
1058262445Serwin						      rrl->qps_responses, secs,
1059262445Serwin						      (int)qps);
1060262445Serwin				rrl->qps = qps;
1061262445Serwin				rrl->qps_responses = 0;
1062262445Serwin				rrl->qps_time = now;
1063262445Serwin			} else if (qps < rrl->qps) {
1064262445Serwin				qps = rrl->qps;
1065262445Serwin			}
1066262445Serwin		}
1067262445Serwin		scale = rrl->qps_scale / qps;
1068262445Serwin	}
1069262445Serwin
1070262445Serwin	/*
1071262445Serwin	 * Do maintenance once per second.
1072262445Serwin	 */
1073262445Serwin	if (rrl->num_logged > 0 && rrl->log_stops_time != now)
1074262445Serwin		log_stops(rrl, now, 8, log_buf, log_buf_len);
1075262445Serwin
1076262445Serwin	/*
1077262445Serwin	 * Notice TCP responses when scaling limits by qps.
1078262445Serwin	 * Do not try to rate limit TCP responses.
1079262445Serwin	 */
1080262445Serwin	if (is_tcp) {
1081262445Serwin		if (scale < 1.0) {
1082262445Serwin			e = get_entry(rrl, client_addr,
1083262445Serwin				      0, dns_rdatatype_none, NULL,
1084262445Serwin				      DNS_RRL_RTYPE_TCP, now, ISC_TRUE,
1085262445Serwin				      log_buf, log_buf_len);
1086262445Serwin			if (e != NULL) {
1087262445Serwin				e->responses = -(rrl->window+1);
1088262445Serwin				set_age(rrl, e, now);
1089262445Serwin			}
1090262445Serwin		}
1091262445Serwin		UNLOCK(&rrl->lock);
1092262445Serwin		return (ISC_R_SUCCESS);
1093262445Serwin	}
1094262445Serwin
1095262445Serwin	/*
1096262445Serwin	 * Find the right kind of entry, creating it if necessary.
1097262445Serwin	 * If that is impossible, then nothing more can be done
1098262445Serwin	 */
1099262445Serwin	switch (resp_result) {
1100262445Serwin	case ISC_R_SUCCESS:
1101262445Serwin		rtype = DNS_RRL_RTYPE_QUERY;
1102262445Serwin		break;
1103262445Serwin	case DNS_R_DELEGATION:
1104262445Serwin		rtype = DNS_RRL_RTYPE_REFERRAL;
1105262445Serwin		break;
1106262445Serwin	case DNS_R_NXRRSET:
1107262445Serwin		rtype = DNS_RRL_RTYPE_NODATA;
1108262445Serwin		break;
1109262445Serwin	case DNS_R_NXDOMAIN:
1110262445Serwin		rtype = DNS_RRL_RTYPE_NXDOMAIN;
1111262445Serwin		break;
1112262445Serwin	default:
1113262445Serwin		rtype = DNS_RRL_RTYPE_ERROR;
1114262445Serwin		break;
1115262445Serwin	}
1116262445Serwin	e = get_entry(rrl, client_addr, qclass, qtype, qname, rtype,
1117262445Serwin		      now, ISC_TRUE, log_buf, log_buf_len);
1118262445Serwin	if (e == NULL) {
1119262445Serwin		UNLOCK(&rrl->lock);
1120262445Serwin		return (DNS_RRL_RESULT_OK);
1121262445Serwin	}
1122262445Serwin
1123262445Serwin	if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG1)) {
1124262445Serwin		/*
1125262445Serwin		 * Do not worry about speed or releasing the lock.
1126262445Serwin		 * This message appears before messages from debit_rrl_entry().
1127262445Serwin		 */
1128262445Serwin		make_log_buf(rrl, e, "consider limiting ", NULL, ISC_FALSE,
1129262445Serwin			     qname, ISC_FALSE, DNS_RRL_RESULT_OK, resp_result,
1130262445Serwin			     log_buf, log_buf_len);
1131262445Serwin		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
1132262445Serwin			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG1,
1133262445Serwin			      "%s", log_buf);
1134262445Serwin	}
1135262445Serwin
1136262445Serwin	rrl_result = debit_rrl_entry(rrl, e, qps, scale, client_addr, now,
1137262445Serwin				     log_buf, log_buf_len);
1138262445Serwin
1139262445Serwin	if (rrl->all_per_second.r != 0) {
1140262445Serwin		/*
1141262445Serwin		 * We must debit the all-per-second token bucket if we have
1142262445Serwin		 * an all-per-second limit for the IP address.
1143262445Serwin		 * The all-per-second limit determines the log message
1144262445Serwin		 * when both limits are hit.
1145262445Serwin		 * The response limiting must continue if the
1146262445Serwin		 * all-per-second limiting lapses.
1147262445Serwin		 */
1148262445Serwin		dns_rrl_entry_t *e_all;
1149262445Serwin		dns_rrl_result_t rrl_all_result;
1150262445Serwin
1151262445Serwin		e_all = get_entry(rrl, client_addr,
1152262445Serwin				  0, dns_rdatatype_none, NULL,
1153262445Serwin				  DNS_RRL_RTYPE_ALL, now, ISC_TRUE,
1154262445Serwin				  log_buf, log_buf_len);
1155262445Serwin		if (e_all == NULL) {
1156262445Serwin			UNLOCK(&rrl->lock);
1157262445Serwin			return (DNS_RRL_RESULT_OK);
1158262445Serwin		}
1159262445Serwin		rrl_all_result = debit_rrl_entry(rrl, e_all, qps, scale,
1160262445Serwin						 client_addr, now,
1161262445Serwin						 log_buf, log_buf_len);
1162262445Serwin		if (rrl_all_result != DNS_RRL_RESULT_OK) {
1163262445Serwin			int level;
1164262445Serwin
1165262445Serwin			e = e_all;
1166262445Serwin			rrl_result = rrl_all_result;
1167262445Serwin			if (rrl_result == DNS_RRL_RESULT_OK)
1168262445Serwin				level = DNS_RRL_LOG_DEBUG2;
1169262445Serwin			else
1170262445Serwin				level = DNS_RRL_LOG_DEBUG1;
1171262445Serwin			if (isc_log_wouldlog(dns_lctx, level)) {
1172262445Serwin				make_log_buf(rrl, e,
1173262445Serwin					     "prefer all-per-second limiting ",
1174262445Serwin					     NULL, ISC_TRUE, qname, ISC_FALSE,
1175262445Serwin					     DNS_RRL_RESULT_OK, resp_result,
1176262445Serwin					     log_buf, log_buf_len);
1177262445Serwin				isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
1178262445Serwin					      DNS_LOGMODULE_REQUEST, level,
1179262445Serwin					      "%s", log_buf);
1180262445Serwin			}
1181262445Serwin		}
1182262445Serwin	}
1183262445Serwin
1184262445Serwin	if (rrl_result == DNS_RRL_RESULT_OK) {
1185262445Serwin		UNLOCK(&rrl->lock);
1186262445Serwin		return (DNS_RRL_RESULT_OK);
1187262445Serwin	}
1188262445Serwin
1189262445Serwin	/*
1190262445Serwin	 * Log occassionally in the rate-limit category.
1191262445Serwin	 */
1192262445Serwin	if ((!e->logged || e->log_secs >= DNS_RRL_MAX_LOG_SECS) &&
1193262445Serwin	    isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DROP)) {
1194262445Serwin		make_log_buf(rrl, e, rrl->log_only ? "would " : NULL,
1195262445Serwin			     e->logged ? "continue limiting " : "limit ",
1196262445Serwin			     ISC_TRUE, qname, ISC_TRUE,
1197262445Serwin			     DNS_RRL_RESULT_OK, resp_result,
1198262445Serwin			     log_buf, log_buf_len);
1199262445Serwin		if (!e->logged) {
1200262445Serwin			e->logged = ISC_TRUE;
1201262445Serwin			if (++rrl->num_logged <= 1)
1202262445Serwin				rrl->last_logged = e;
1203262445Serwin		}
1204262445Serwin		e->log_secs = 0;
1205262445Serwin
1206262445Serwin		/*
1207262445Serwin		 * Avoid holding the lock.
1208262445Serwin		 */
1209262445Serwin		if (!wouldlog) {
1210262445Serwin			UNLOCK(&rrl->lock);
1211262445Serwin			e = NULL;
1212262445Serwin		}
1213262445Serwin		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
1214262445Serwin			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP,
1215262445Serwin			      "%s", log_buf);
1216262445Serwin	}
1217262445Serwin
1218262445Serwin	/*
1219262445Serwin	 * Make a log message for the caller.
1220262445Serwin	 */
1221262445Serwin	if (wouldlog)
1222262445Serwin		make_log_buf(rrl, e,
1223262445Serwin			     rrl->log_only ? "would rate limit " : "rate limit ",
1224262445Serwin			     NULL, ISC_FALSE, qname, ISC_FALSE,
1225262445Serwin			     rrl_result, resp_result, log_buf, log_buf_len);
1226262445Serwin
1227262445Serwin	if (e != NULL) {
1228262445Serwin		/*
1229262445Serwin		 * Do not save the qname unless we might need it for
1230262445Serwin		 * the ending log message.
1231262445Serwin		 */
1232262445Serwin		if (!e->logged)
1233262445Serwin			free_qname(rrl, e);
1234262445Serwin		UNLOCK(&rrl->lock);
1235262445Serwin	}
1236262445Serwin
1237262445Serwin	return (rrl_result);
1238262445Serwin}
1239262445Serwin
1240262445Serwinvoid
1241262445Serwindns_rrl_view_destroy(dns_view_t *view) {
1242262445Serwin	dns_rrl_t *rrl;
1243262445Serwin	dns_rrl_block_t *b;
1244262445Serwin	dns_rrl_hash_t *h;
1245262445Serwin	char log_buf[DNS_RRL_LOG_BUF_LEN];
1246262445Serwin	int i;
1247262445Serwin
1248262445Serwin	rrl = view->rrl;
1249262445Serwin	if (rrl == NULL)
1250262445Serwin		return;
1251262445Serwin	view->rrl = NULL;
1252262445Serwin
1253262445Serwin	/*
1254262445Serwin	 * Assume the caller takes care of locking the view and anything else.
1255262445Serwin	 */
1256262445Serwin
1257262445Serwin	if (rrl->num_logged > 0)
1258262445Serwin		log_stops(rrl, 0, ISC_INT32_MAX, log_buf, sizeof(log_buf));
1259262445Serwin
1260262445Serwin	for (i = 0; i < DNS_RRL_QNAMES; ++i) {
1261262445Serwin		if (rrl->qnames[i] == NULL)
1262262445Serwin			break;
1263262445Serwin		isc_mem_put(rrl->mctx, rrl->qnames[i], sizeof(*rrl->qnames[i]));
1264262445Serwin	}
1265262445Serwin
1266262445Serwin	if (rrl->exempt != NULL)
1267262445Serwin		dns_acl_detach(&rrl->exempt);
1268262445Serwin
1269262445Serwin	DESTROYLOCK(&rrl->lock);
1270262445Serwin
1271262445Serwin	while (!ISC_LIST_EMPTY(rrl->blocks)) {
1272262445Serwin		b = ISC_LIST_HEAD(rrl->blocks);
1273262445Serwin		ISC_LIST_UNLINK(rrl->blocks, b, link);
1274262445Serwin		isc_mem_put(rrl->mctx, b, b->size);
1275262445Serwin	}
1276262445Serwin
1277262445Serwin	h = rrl->hash;
1278262445Serwin	if (h != NULL)
1279262445Serwin		isc_mem_put(rrl->mctx, h,
1280262445Serwin			    sizeof(*h) + (h->length - 1) * sizeof(h->bins[0]));
1281262445Serwin
1282262445Serwin	h = rrl->old_hash;
1283262445Serwin	if (h != NULL)
1284262445Serwin		isc_mem_put(rrl->mctx, h,
1285262445Serwin			    sizeof(*h) + (h->length - 1) * sizeof(h->bins[0]));
1286262445Serwin
1287262445Serwin	isc_mem_putanddetach(&rrl->mctx, rrl, sizeof(*rrl));
1288262445Serwin}
1289262445Serwin
1290262445Serwinisc_result_t
1291262445Serwindns_rrl_init(dns_rrl_t **rrlp, dns_view_t *view, int min_entries) {
1292262445Serwin	dns_rrl_t *rrl;
1293262445Serwin	isc_result_t result;
1294262445Serwin
1295262445Serwin	*rrlp = NULL;
1296262445Serwin
1297262445Serwin	rrl = isc_mem_get(view->mctx, sizeof(*rrl));
1298262445Serwin	if (rrl == NULL)
1299262445Serwin		return (ISC_R_NOMEMORY);
1300262445Serwin	memset(rrl, 0, sizeof(*rrl));
1301262445Serwin	isc_mem_attach(view->mctx, &rrl->mctx);
1302262445Serwin	result = isc_mutex_init(&rrl->lock);
1303262445Serwin	if (result != ISC_R_SUCCESS) {
1304262445Serwin		isc_mem_putanddetach(&rrl->mctx, rrl, sizeof(*rrl));
1305262445Serwin		return (result);
1306262445Serwin	}
1307262445Serwin	isc_stdtime_get(&rrl->ts_bases[0]);
1308262445Serwin
1309262445Serwin	view->rrl = rrl;
1310262445Serwin
1311262445Serwin	result = expand_entries(rrl, min_entries);
1312262445Serwin	if (result != ISC_R_SUCCESS) {
1313262445Serwin		dns_rrl_view_destroy(view);
1314262445Serwin		return (result);
1315262445Serwin	}
1316262445Serwin	result = expand_rrl_hash(rrl, 0);
1317262445Serwin	if (result != ISC_R_SUCCESS) {
1318262445Serwin		dns_rrl_view_destroy(view);
1319262445Serwin		return (result);
1320262445Serwin	}
1321262445Serwin
1322262445Serwin	*rrlp = rrl;
1323262445Serwin	return (ISC_R_SUCCESS);
1324262445Serwin}
1325