1/*
2 * services/cache/infra.h - infrastructure cache, server rtt and capabilities
3 *
4 * Copyright (c) 2007, NLnet Labs. All rights reserved.
5 *
6 * This software is open source.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
14 *
15 * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 *
19 * Neither the name of the NLNET LABS nor the names of its contributors may
20 * be used to endorse or promote products derived from this software without
21 * specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35
36/**
37 * \file
38 *
39 * This file contains the infrastructure cache, as well as rate limiting.
40 * Note that there are two sorts of rate-limiting here:
41 *  - Pre-cache, per-query rate limiting (query ratelimits)
42 *  - Post-cache, per-domain name rate limiting (infra-ratelimits)
43 */
44
45#ifndef SERVICES_CACHE_INFRA_H
46#define SERVICES_CACHE_INFRA_H
47#include "util/storage/lruhash.h"
48#include "util/storage/dnstree.h"
49#include "util/rtt.h"
50#include "util/netevent.h"
51#include "util/data/msgreply.h"
52struct slabhash;
53struct config_file;
54
55/**
56 * Host information kept for every server, per zone.
57 */
58struct infra_key {
59	/** the host address. */
60	struct sockaddr_storage addr;
61	/** length of addr. */
62	socklen_t addrlen;
63	/** zone name in wireformat */
64	uint8_t* zonename;
65	/** length of zonename */
66	size_t namelen;
67	/** hash table entry, data of type infra_data. */
68	struct lruhash_entry entry;
69};
70
71/**
72 * Host information encompasses host capabilities and retransmission timeouts.
73 * And lameness information (notAuthoritative, noEDNS, Recursive)
74 */
75struct infra_data {
76	/** TTL value for this entry. absolute time. */
77	time_t ttl;
78
79	/** time in seconds (absolute) when probing re-commences, 0 disabled */
80	time_t probedelay;
81	/** round trip times for timeout calculation */
82	struct rtt_info rtt;
83
84	/** edns version that the host supports, -1 means no EDNS */
85	int edns_version;
86	/** if the EDNS lameness is already known or not.
87	 * EDNS lame is when EDNS queries or replies are dropped,
88	 * and cause a timeout */
89	uint8_t edns_lame_known;
90
91	/** is the host lame (does not serve the zone authoritatively),
92	 * or is the host dnssec lame (does not serve DNSSEC data) */
93	uint8_t isdnsseclame;
94	/** is the host recursion lame (not AA, but RA) */
95	uint8_t rec_lame;
96	/** the host is lame (not authoritative) for A records */
97	uint8_t lame_type_A;
98	/** the host is lame (not authoritative) for other query types */
99	uint8_t lame_other;
100
101	/** timeouts counter for type A */
102	uint8_t timeout_A;
103	/** timeouts counter for type AAAA */
104	uint8_t timeout_AAAA;
105	/** timeouts counter for others */
106	uint8_t timeout_other;
107};
108
109/**
110 * Infra cache
111 */
112struct infra_cache {
113	/** The hash table with hosts */
114	struct slabhash* hosts;
115	/** TTL value for host information, in seconds */
116	int host_ttl;
117	/** the hosts that are down are kept probed for recovery */
118	int infra_keep_probing;
119	/** hash table with query rates per name: rate_key, rate_data */
120	struct slabhash* domain_rates;
121	/** ratelimit settings for domains, struct domain_limit_data */
122	rbtree_type domain_limits;
123	/** hash table with query rates per client ip: ip_rate_key, ip_rate_data */
124	struct slabhash* client_ip_rates;
125	/** tree of addr_tree_node, with wait_limit_netblock_info information */
126	rbtree_type wait_limits_netblock;
127	/** tree of addr_tree_node, with wait_limit_netblock_info information */
128	rbtree_type wait_limits_cookie_netblock;
129};
130
131/** ratelimit, unless overridden by domain_limits, 0 is off */
132extern int infra_dp_ratelimit;
133
134/**
135 * ratelimit settings for domains
136 */
137struct domain_limit_data {
138	/** key for rbtree, must be first in struct, name of domain */
139	struct name_tree_node node;
140	/** ratelimit for exact match with this name, -1 if not set */
141	int lim;
142	/** ratelimit for names below this name, -1 if not set */
143	int below;
144};
145
146/**
147 * key for ratelimit lookups, a domain name
148 */
149struct rate_key {
150	/** lruhash key entry */
151	struct lruhash_entry entry;
152	/** domain name in uncompressed wireformat */
153	uint8_t* name;
154	/** length of name */
155	size_t namelen;
156};
157
158/** ip ratelimit, 0 is off */
159extern int infra_ip_ratelimit;
160/** ip ratelimit for DNS Cookie clients, 0 is off */
161extern int infra_ip_ratelimit_cookie;
162
163/**
164 * key for ip_ratelimit lookups, a source IP.
165 */
166struct ip_rate_key {
167	/** lruhash key entry */
168	struct lruhash_entry entry;
169	/** client ip information */
170	struct sockaddr_storage addr;
171	/** length of address */
172	socklen_t addrlen;
173};
174
175/** number of seconds to track qps rate */
176#define RATE_WINDOW 2
177
178/**
179 * Data for ratelimits per domain name
180 * It is incremented when a non-cache-lookup happens for that domain name.
181 * The name is the delegation point we have for the name.
182 * If a new delegation point is found (a referral reply), the previous
183 * delegation point is decremented, and the new one is charged with the query.
184 */
185struct rate_data {
186	/** queries counted, for that second. 0 if not in use. */
187	int qps[RATE_WINDOW];
188	/** what the timestamp is of the qps array members, counter is
189	 * valid for that timestamp.  Usually now and now-1. */
190	time_t timestamp[RATE_WINDOW];
191	/** the number of queries waiting in the mesh */
192	int mesh_wait;
193};
194
195#define ip_rate_data rate_data
196
197/**
198 * Data to store the configuration per netblock for the wait limit
199 */
200struct wait_limit_netblock_info {
201	/** The addr tree node, this must be first. */
202	struct addr_tree_node node;
203	/** the limit on the amount */
204	int limit;
205};
206
207/** infra host cache default hash lookup size */
208#define INFRA_HOST_STARTSIZE 32
209/** bytes per zonename reserved in the hostcache, dnamelen(zonename.com.) */
210#define INFRA_BYTES_NAME 14
211
212/**
213 * Create infra cache.
214 * @param cfg: config parameters or NULL for defaults.
215 * @return: new infra cache, or NULL.
216 */
217struct infra_cache* infra_create(struct config_file* cfg);
218
219/**
220 * Delete infra cache.
221 * @param infra: infrastructure cache to delete.
222 */
223void infra_delete(struct infra_cache* infra);
224
225/**
226 * Adjust infra cache to use updated configuration settings.
227 * This may clean the cache. Operates a bit like realloc.
228 * There may be no threading or use by other threads.
229 * @param infra: existing cache. If NULL a new infra cache is returned.
230 * @param cfg: config options.
231 * @return the new infra cache pointer or NULL on error.
232 */
233struct infra_cache* infra_adjust(struct infra_cache* infra,
234	struct config_file* cfg);
235
236/**
237 * Plain find infra data function (used by the the other functions)
238 * @param infra: infrastructure cache.
239 * @param addr: host address.
240 * @param addrlen: length of addr.
241 * @param name: domain name of zone.
242 * @param namelen: length of domain name.
243 * @param wr: if true, writelock, else readlock.
244 * @return the entry, could be expired (this is not checked) or NULL.
245 */
246struct lruhash_entry* infra_lookup_nottl(struct infra_cache* infra,
247	struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* name,
248	size_t namelen, int wr);
249
250/**
251 * Find host information to send a packet. Creates new entry if not found.
252 * Lameness is empty. EDNS is 0 (try with first), and rtt is returned for
253 * the first message to it.
254 * Use this to send a packet only, because it also locks out others when
255 * probing is restricted.
256 * @param infra: infrastructure cache.
257 * @param addr: host address.
258 * @param addrlen: length of addr.
259 * @param name: domain name of zone.
260 * @param namelen: length of domain name.
261 * @param timenow: what time it is now.
262 * @param edns_vs: edns version it supports, is returned.
263 * @param edns_lame_known: if EDNS lame (EDNS is dropped in transit) has
264 * 	already been probed, is returned.
265 * @param to: timeout to use, is returned.
266 * @return: 0 on error.
267 */
268int infra_host(struct infra_cache* infra, struct sockaddr_storage* addr,
269	socklen_t addrlen, uint8_t* name, size_t namelen,
270	time_t timenow, int* edns_vs, uint8_t* edns_lame_known, int* to);
271
272/**
273 * Set a host to be lame for the given zone.
274 * @param infra: infrastructure cache.
275 * @param addr: host address.
276 * @param addrlen: length of addr.
277 * @param name: domain name of zone apex.
278 * @param namelen: length of domain name.
279 * @param timenow: what time it is now.
280 * @param dnsseclame: if true the host is set dnssec lame.
281 *	if false, the host is marked lame (not serving the zone).
282 * @param reclame: if true host is a recursor not AA server.
283 *      if false, dnsseclame or marked lame.
284 * @param qtype: the query type for which it is lame.
285 * @return: 0 on error.
286 */
287int infra_set_lame(struct infra_cache* infra,
288        struct sockaddr_storage* addr, socklen_t addrlen,
289	uint8_t* name, size_t namelen, time_t timenow, int dnsseclame,
290	int reclame, uint16_t qtype);
291
292/**
293 * Update rtt information for the host.
294 * @param infra: infrastructure cache.
295 * @param addr: host address.
296 * @param addrlen: length of addr.
297 * @param name: zone name
298 * @param namelen: zone name length
299 * @param qtype: query type.
300 * @param roundtrip: estimate of roundtrip time in milliseconds or -1 for
301 * 	timeout.
302 * @param orig_rtt: original rtt for the query that timed out (roundtrip==-1).
303 * 	ignored if roundtrip != -1.
304 * @param timenow: what time it is now.
305 * @return: 0 on error. new rto otherwise.
306 */
307int infra_rtt_update(struct infra_cache* infra, struct sockaddr_storage* addr,
308	socklen_t addrlen, uint8_t* name, size_t namelen, int qtype,
309	int roundtrip, int orig_rtt, time_t timenow);
310
311/**
312 * Update information for the host, store that a TCP transaction works.
313 * @param infra: infrastructure cache.
314 * @param addr: host address.
315 * @param addrlen: length of addr.
316 * @param name: name of zone
317 * @param namelen: length of name
318 */
319void infra_update_tcp_works(struct infra_cache* infra,
320        struct sockaddr_storage* addr, socklen_t addrlen,
321	uint8_t* name, size_t namelen);
322
323/**
324 * Update edns information for the host.
325 * @param infra: infrastructure cache.
326 * @param addr: host address.
327 * @param addrlen: length of addr.
328 * @param name: name of zone
329 * @param namelen: length of name
330 * @param edns_version: the version that it publishes.
331 * 	If it is known to support EDNS then no-EDNS is not stored over it.
332 * @param timenow: what time it is now.
333 * @return: 0 on error.
334 */
335int infra_edns_update(struct infra_cache* infra,
336        struct sockaddr_storage* addr, socklen_t addrlen,
337	uint8_t* name, size_t namelen, int edns_version, time_t timenow);
338
339/**
340 * Get Lameness information and average RTT if host is in the cache.
341 * This information is to be used for server selection.
342 * @param infra: infrastructure cache.
343 * @param addr: host address.
344 * @param addrlen: length of addr.
345 * @param name: zone name.
346 * @param namelen: zone name length.
347 * @param qtype: the query to be made.
348 * @param lame: if function returns true, this returns lameness of the zone.
349 * @param dnsseclame: if function returns true, this returns if the zone
350 *	is dnssec-lame.
351 * @param reclame: if function returns true, this is if it is recursion lame.
352 * @param rtt: if function returns true, this returns avg rtt of the server.
353 * 	The rtt value is unclamped and reflects recent timeouts.
354 * @param timenow: what time it is now.
355 * @return if found in cache, or false if not (or TTL bad).
356 */
357int infra_get_lame_rtt(struct infra_cache* infra,
358        struct sockaddr_storage* addr, socklen_t addrlen,
359	uint8_t* name, size_t namelen, uint16_t qtype,
360	int* lame, int* dnsseclame, int* reclame, int* rtt, time_t timenow);
361
362/**
363 * Get additional (debug) info on timing.
364 * @param infra: infra cache.
365 * @param addr: host address.
366 * @param addrlen: length of addr.
367 * @param name: zone name
368 * @param namelen: zone name length
369 * @param rtt: the rtt_info is copied into here (caller alloced return struct).
370 * @param delay: probe delay (if any).
371 * @param timenow: what time it is now.
372 * @param tA: timeout counter on type A.
373 * @param tAAAA: timeout counter on type AAAA.
374 * @param tother: timeout counter on type other.
375 * @return TTL the infra host element is valid for. If -1: not found in cache.
376 *	TTL -2: found but expired.
377 */
378long long infra_get_host_rto(struct infra_cache* infra,
379        struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* name,
380	size_t namelen, struct rtt_info* rtt, int* delay, time_t timenow,
381	int* tA, int* tAAAA, int* tother);
382
383/**
384 * Increment the query rate counter for a delegation point.
385 * @param infra: infra cache.
386 * @param name: zone name
387 * @param namelen: zone name length
388 * @param timenow: what time it is now.
389 * @param backoff: if backoff is enabled.
390 * @param qinfo: for logging, query name.
391 * @param replylist: for logging, querier's address (if any).
392 * @return 1 if it could be incremented. 0 if the increment overshot the
393 * ratelimit or if in the previous second the ratelimit was exceeded.
394 * Failures like alloc failures are not returned (probably as 1).
395 */
396int infra_ratelimit_inc(struct infra_cache* infra, uint8_t* name,
397	size_t namelen, time_t timenow, int backoff, struct query_info* qinfo,
398	struct comm_reply* replylist);
399
400/**
401 * Decrement the query rate counter for a delegation point.
402 * Because the reply received for the delegation point was pleasant,
403 * we do not charge this delegation point with it (i.e. it was a referral).
404 * Should call it with same second as when inc() was called.
405 * @param infra: infra cache.
406 * @param name: zone name
407 * @param namelen: zone name length
408 * @param timenow: what time it is now.
409 */
410void infra_ratelimit_dec(struct infra_cache* infra, uint8_t* name,
411	size_t namelen, time_t timenow);
412
413/**
414 * See if the query rate counter for a delegation point is exceeded.
415 * So, no queries are going to be allowed.
416 * @param infra: infra cache.
417 * @param name: zone name
418 * @param namelen: zone name length
419 * @param timenow: what time it is now.
420 * @param backoff: if backoff is enabled.
421 * @return true if exceeded.
422 */
423int infra_ratelimit_exceeded(struct infra_cache* infra, uint8_t* name,
424	size_t namelen, time_t timenow, int backoff);
425
426/** find the maximum rate stored. 0 if no information.
427 *  When backoff is enabled look for the maximum in the whole RATE_WINDOW. */
428int infra_rate_max(void* data, time_t now, int backoff);
429
430/** find the ratelimit in qps for a domain. 0 if no limit for domain. */
431int infra_find_ratelimit(struct infra_cache* infra, uint8_t* name,
432	size_t namelen);
433
434/** Update query ratelimit hash and decide
435 *  whether or not a query should be dropped.
436 *  @param infra: infra cache
437 *  @param addr: client address
438 *  @param addrlen: client address length
439 *  @param timenow: what time it is now.
440 *  @param has_cookie: if the request came with a DNS Cookie.
441 *  @param backoff: if backoff is enabled.
442 *  @param buffer: with query for logging.
443 *  @return 1 if it could be incremented. 0 if the increment overshot the
444 *  ratelimit and the query should be dropped. */
445int infra_ip_ratelimit_inc(struct infra_cache* infra,
446	struct sockaddr_storage* addr, socklen_t addrlen, time_t timenow,
447	int has_cookie, int backoff, struct sldns_buffer* buffer);
448
449/**
450 * Get memory used by the infra cache.
451 * @param infra: infrastructure cache.
452 * @return memory in use in bytes.
453 */
454size_t infra_get_mem(struct infra_cache* infra);
455
456/** calculate size for the hashtable, does not count size of lameness,
457 * so the hashtable is a fixed number of items */
458size_t infra_sizefunc(void* k, void* d);
459
460/** compare two addresses, returns -1, 0, or +1 */
461int infra_compfunc(void* key1, void* key2);
462
463/** delete key, and destroy the lock */
464void infra_delkeyfunc(void* k, void* arg);
465
466/** delete data and destroy the lameness hashtable */
467void infra_deldatafunc(void* d, void* arg);
468
469/** calculate size for the hashtable */
470size_t rate_sizefunc(void* k, void* d);
471
472/** compare two names, returns -1, 0, or +1 */
473int rate_compfunc(void* key1, void* key2);
474
475/** delete key, and destroy the lock */
476void rate_delkeyfunc(void* k, void* arg);
477
478/** delete data */
479void rate_deldatafunc(void* d, void* arg);
480
481/* calculate size for the client ip hashtable */
482size_t ip_rate_sizefunc(void* k, void* d);
483
484/* compare two addresses */
485int ip_rate_compfunc(void* key1, void* key2);
486
487/* delete key, and destroy the lock */
488void ip_rate_delkeyfunc(void* d, void* arg);
489
490/* delete data */
491#define ip_rate_deldatafunc rate_deldatafunc
492
493/** See if the IP address can have another reply in the wait limit */
494int infra_wait_limit_allowed(struct infra_cache* infra, struct comm_reply* rep,
495	int cookie_valid, struct config_file* cfg);
496
497/** Increment number of waiting replies for IP */
498void infra_wait_limit_inc(struct infra_cache* infra, struct comm_reply* rep,
499	time_t timenow, struct config_file* cfg);
500
501/** Decrement number of waiting replies for IP */
502void infra_wait_limit_dec(struct infra_cache* infra, struct comm_reply* rep,
503	struct config_file* cfg);
504
505#endif /* SERVICES_CACHE_INFRA_H */
506