1/*
2 * edns-subnet/subnetmod.c - edns subnet module. Must be called before validator
3 * and iterator.
4 *
5 * Copyright (c) 2013, NLnet Labs. All rights reserved.
6 *
7 * This software is open source.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * Redistributions of source code must retain the above copyright notice,
14 * this list of conditions and the following disclaimer.
15 *
16 * Redistributions in binary form must reproduce the above copyright notice,
17 * this list of conditions and the following disclaimer in the documentation
18 * and/or other materials provided with the distribution.
19 *
20 * Neither the name of the NLNET LABS nor the names of its contributors may
21 * be used to endorse or promote products derived from this software without
22 * specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
30 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
31 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
32 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
33 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 */
36 /**
37 * \file
38 * subnet module for unbound.
39 */
40
41#include "config.h"
42
43#ifdef CLIENT_SUBNET /* keeps splint happy */
44
45#include "edns-subnet/subnetmod.h"
46#include "edns-subnet/edns-subnet.h"
47#include "edns-subnet/addrtree.h"
48#include "edns-subnet/subnet-whitelist.h"
49
50#include "services/mesh.h"
51#include "services/cache/dns.h"
52#include "util/module.h"
53#include "util/regional.h"
54#include "util/storage/slabhash.h"
55#include "util/config_file.h"
56#include "util/data/msgreply.h"
57#include "sldns/sbuffer.h"
58#include "iterator/iter_utils.h"
59
60/** externally called */
61void
62subnet_data_delete(void *d, void *ATTR_UNUSED(arg))
63{
64	struct subnet_msg_cache_data *r;
65	r = (struct subnet_msg_cache_data*)d;
66	addrtree_delete(r->tree4);
67	addrtree_delete(r->tree6);
68	free(r);
69}
70
71/** externally called */
72size_t
73msg_cache_sizefunc(void *k, void *d)
74{
75	struct msgreply_entry *q = (struct msgreply_entry*)k;
76	struct subnet_msg_cache_data *r = (struct subnet_msg_cache_data*)d;
77	size_t s = sizeof(struct msgreply_entry)
78		+ sizeof(struct subnet_msg_cache_data)
79		+ q->key.qname_len + lock_get_mem(&q->entry.lock);
80	s += addrtree_size(r->tree4);
81	s += addrtree_size(r->tree6);
82	return s;
83}
84
85/** new query for ecs module */
86static int
87subnet_new_qstate(struct module_qstate *qstate, int id)
88{
89	struct subnet_qstate *sq = (struct subnet_qstate*)regional_alloc(
90		qstate->region, sizeof(struct subnet_qstate));
91	if(!sq)
92		return 0;
93	qstate->minfo[id] = sq;
94	memset(sq, 0, sizeof(*sq));
95	sq->started_no_cache_store = qstate->no_cache_store;
96	return 1;
97}
98
99/** Add ecs struct to edns list, after parsing it to wire format. */
100static void
101ecs_opt_list_append(struct ecs_data* ecs, struct edns_option** list,
102	struct module_qstate *qstate)
103{
104	size_t sn_octs, sn_octs_remainder;
105	sldns_buffer* buf = qstate->env->scratch_buffer;
106
107	if(ecs->subnet_validdata) {
108		log_assert(ecs->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4 ||
109			ecs->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP6);
110		log_assert(ecs->subnet_addr_fam != EDNSSUBNET_ADDRFAM_IP4 ||
111			ecs->subnet_source_mask <=  INET_SIZE*8);
112		log_assert(ecs->subnet_addr_fam != EDNSSUBNET_ADDRFAM_IP6 ||
113			ecs->subnet_source_mask <= INET6_SIZE*8);
114
115		sn_octs = ecs->subnet_source_mask / 8;
116		sn_octs_remainder =
117			(size_t)((ecs->subnet_source_mask % 8)>0?1:0);
118
119		log_assert(sn_octs + sn_octs_remainder <= INET6_SIZE);
120
121		sldns_buffer_clear(buf);
122		sldns_buffer_write_u16(buf, ecs->subnet_addr_fam);
123		sldns_buffer_write_u8(buf, ecs->subnet_source_mask);
124		sldns_buffer_write_u8(buf, ecs->subnet_scope_mask);
125		sldns_buffer_write(buf, ecs->subnet_addr, sn_octs);
126		if(sn_octs_remainder)
127			sldns_buffer_write_u8(buf, ecs->subnet_addr[sn_octs] &
128				~(0xFF >> (ecs->subnet_source_mask % 8)));
129		sldns_buffer_flip(buf);
130
131		edns_opt_list_append(list,
132				qstate->env->cfg->client_subnet_opcode,
133				sn_octs + sn_octs_remainder + 4,
134				sldns_buffer_begin(buf), qstate->region);
135	}
136}
137
138int ecs_whitelist_check(struct query_info* qinfo,
139	uint16_t ATTR_UNUSED(flags), struct module_qstate* qstate,
140	struct sockaddr_storage* addr, socklen_t addrlen,
141	uint8_t* ATTR_UNUSED(zone), size_t ATTR_UNUSED(zonelen),
142	struct regional* ATTR_UNUSED(region), int id, void* ATTR_UNUSED(cbargs))
143{
144	struct subnet_qstate *sq;
145	struct subnet_env *sn_env;
146
147	if(!(sq=(struct subnet_qstate*)qstate->minfo[id]))
148		return 1;
149	sn_env = (struct subnet_env*)qstate->env->modinfo[id];
150
151	/* Cache by default, might be disabled after parsing EDNS option
152	 * received from nameserver. */
153	if(!iter_stub_fwd_no_cache(qstate, &qstate->qinfo)) {
154		qstate->no_cache_store = 0;
155	}
156
157	if(sq->ecs_server_out.subnet_validdata && ((sq->subnet_downstream &&
158		qstate->env->cfg->client_subnet_always_forward) ||
159		ecs_is_whitelisted(sn_env->whitelist,
160		addr, addrlen, qinfo->qname, qinfo->qname_len,
161		qinfo->qclass))) {
162		/* Address on whitelist or client query contains ECS option, we
163		 * want to sent out ECS. Only add option if it is not already
164		 * set. */
165		if(!(sq->subnet_sent)) {
166			ecs_opt_list_append(&sq->ecs_server_out,
167				&qstate->edns_opts_back_out, qstate);
168			sq->subnet_sent = 1;
169		}
170	}
171	else if(sq->subnet_sent) {
172		/* Outgoing ECS option is set, but we don't want to sent it to
173		 * this address, remove option. */
174		edns_opt_list_remove(&qstate->edns_opts_back_out,
175			qstate->env->cfg->client_subnet_opcode);
176		sq->subnet_sent = 0;
177	}
178	return 1;
179}
180
181
182void
183subnet_markdel(void* key)
184{
185	struct msgreply_entry *e = (struct msgreply_entry*)key;
186	e->key.qtype = 0;
187	e->key.qclass = 0;
188}
189
190int
191subnetmod_init(struct module_env *env, int id)
192{
193	struct subnet_env *sn_env = (struct subnet_env*)calloc(1,
194		sizeof(struct subnet_env));
195	if(!sn_env) {
196		log_err("malloc failure");
197		return 0;
198	}
199	alloc_init(&sn_env->alloc, NULL, 0);
200	env->modinfo[id] = (void*)sn_env;
201	/* Copy msg_cache settings */
202	sn_env->subnet_msg_cache = slabhash_create(env->cfg->msg_cache_slabs,
203		HASH_DEFAULT_STARTARRAY, env->cfg->msg_cache_size,
204		msg_cache_sizefunc, query_info_compare, query_entry_delete,
205		subnet_data_delete, NULL);
206	slabhash_setmarkdel(sn_env->subnet_msg_cache, &subnet_markdel);
207	if(!sn_env->subnet_msg_cache) {
208		log_err("subnet: could not create cache");
209		free(sn_env);
210		env->modinfo[id] = NULL;
211		return 0;
212	}
213	/* whitelist for edns subnet capable servers */
214	sn_env->whitelist = ecs_whitelist_create();
215	if(!sn_env->whitelist ||
216		!ecs_whitelist_apply_cfg(sn_env->whitelist, env->cfg)) {
217		log_err("subnet: could not create ECS whitelist");
218		slabhash_delete(sn_env->subnet_msg_cache);
219		free(sn_env);
220		env->modinfo[id] = NULL;
221		return 0;
222	}
223
224	verbose(VERB_QUERY, "subnet: option registered (%d)",
225		env->cfg->client_subnet_opcode);
226	/* Create new mesh state for all queries. */
227	env->unique_mesh = 1;
228	if(!edns_register_option(env->cfg->client_subnet_opcode,
229		env->cfg->client_subnet_always_forward /* bypass cache */,
230		0 /* no aggregation */, env)) {
231		log_err("subnet: could not register opcode");
232		ecs_whitelist_delete(sn_env->whitelist);
233		slabhash_delete(sn_env->subnet_msg_cache);
234		free(sn_env);
235		env->modinfo[id] = NULL;
236		return 0;
237	}
238	inplace_cb_register((void*)ecs_whitelist_check, inplace_cb_query, NULL,
239		env, id);
240	inplace_cb_register((void*)ecs_edns_back_parsed,
241		inplace_cb_edns_back_parsed, NULL, env, id);
242	inplace_cb_register((void*)ecs_query_response,
243		inplace_cb_query_response, NULL, env, id);
244	lock_rw_init(&sn_env->biglock);
245	return 1;
246}
247
248void
249subnetmod_deinit(struct module_env *env, int id)
250{
251	struct subnet_env *sn_env;
252	if(!env || !env->modinfo[id])
253		return;
254	sn_env = (struct subnet_env*)env->modinfo[id];
255	lock_rw_destroy(&sn_env->biglock);
256	inplace_cb_delete(env, inplace_cb_edns_back_parsed, id);
257	inplace_cb_delete(env, inplace_cb_query, id);
258	inplace_cb_delete(env, inplace_cb_query_response, id);
259	ecs_whitelist_delete(sn_env->whitelist);
260	slabhash_delete(sn_env->subnet_msg_cache);
261	alloc_clear(&sn_env->alloc);
262	free(sn_env);
263	env->modinfo[id] = NULL;
264}
265
266/** Tells client that upstream has no/improper support */
267static void
268cp_edns_bad_response(struct ecs_data *target, struct ecs_data *source)
269{
270	target->subnet_scope_mask  = 0;
271	target->subnet_source_mask = source->subnet_source_mask;
272	target->subnet_addr_fam    = source->subnet_addr_fam;
273	memcpy(target->subnet_addr, source->subnet_addr, INET6_SIZE);
274	target->subnet_validdata = 1;
275}
276
277static void
278delfunc(void *envptr, void *elemptr) {
279	struct reply_info *elem = (struct reply_info *)elemptr;
280	struct subnet_env *env = (struct subnet_env *)envptr;
281	reply_info_parsedelete(elem, &env->alloc);
282}
283
284static size_t
285sizefunc(void *elemptr) {
286	struct reply_info *elem  = (struct reply_info *)elemptr;
287	return sizeof (struct reply_info) - sizeof (struct rrset_ref)
288		+ elem->rrset_count * sizeof (struct rrset_ref)
289		+ elem->rrset_count * sizeof (struct ub_packed_rrset_key *);
290}
291
292/**
293 * Select tree from cache entry based on edns data.
294 * If for address family not present it will create a new one.
295 * NULL on failure to create. */
296static struct addrtree*
297get_tree(struct subnet_msg_cache_data *data, struct ecs_data *edns,
298	struct subnet_env *env, struct config_file* cfg)
299{
300	struct addrtree *tree;
301	if (edns->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4) {
302		if (!data->tree4)
303			data->tree4 = addrtree_create(
304				cfg->max_client_subnet_ipv4, &delfunc,
305				&sizefunc, env, cfg->max_ecs_tree_size_ipv4);
306		tree = data->tree4;
307	} else {
308		if (!data->tree6)
309			data->tree6 = addrtree_create(
310				cfg->max_client_subnet_ipv6, &delfunc,
311				&sizefunc, env, cfg->max_ecs_tree_size_ipv6);
312		tree = data->tree6;
313	}
314	return tree;
315}
316
317static void
318update_cache(struct module_qstate *qstate, int id)
319{
320	struct msgreply_entry *mrep_entry;
321	struct addrtree *tree;
322	struct reply_info *rep;
323	struct query_info qinf;
324	struct subnet_env *sne = qstate->env->modinfo[id];
325	struct subnet_qstate *sq = (struct subnet_qstate*)qstate->minfo[id];
326	struct slabhash *subnet_msg_cache = sne->subnet_msg_cache;
327	struct ecs_data *edns = &sq->ecs_client_in;
328	size_t i;
329
330	/* We already calculated hash upon lookup */
331	hashvalue_type h = qstate->minfo[id] ?
332		((struct subnet_qstate*)qstate->minfo[id])->qinfo_hash :
333		query_info_hash(&qstate->qinfo, qstate->query_flags);
334	/* Step 1, general qinfo lookup */
335	struct lruhash_entry *lru_entry = slabhash_lookup(subnet_msg_cache, h,
336		&qstate->qinfo, 1);
337	int need_to_insert = (lru_entry == NULL);
338	if (!lru_entry) {
339		void* data = calloc(1,
340			sizeof(struct subnet_msg_cache_data));
341		if(!data) {
342			log_err("malloc failed");
343			return;
344		}
345		qinf = qstate->qinfo;
346		qinf.qname = memdup(qstate->qinfo.qname,
347			qstate->qinfo.qname_len);
348		if(!qinf.qname) {
349			free(data);
350			log_err("memdup failed");
351			return;
352		}
353		mrep_entry = query_info_entrysetup(&qinf, data, h);
354		free(qinf.qname); /* if qname 'consumed', it is set to NULL */
355		if (!mrep_entry) {
356			free(data);
357			log_err("query_info_entrysetup failed");
358			return;
359		}
360		lru_entry = &mrep_entry->entry;
361		lock_rw_wrlock(&lru_entry->lock);
362	}
363	/* lru_entry->lock is locked regardless of how we got here,
364	 * either from the slabhash_lookup, or above in the new allocated */
365	/* Step 2, find the correct tree */
366	if (!(tree = get_tree(lru_entry->data, edns, sne, qstate->env->cfg))) {
367		lock_rw_unlock(&lru_entry->lock);
368		log_err("Subnet cache insertion failed");
369		return;
370	}
371	lock_quick_lock(&sne->alloc.lock);
372	rep = reply_info_copy(qstate->return_msg->rep, &sne->alloc, NULL);
373	lock_quick_unlock(&sne->alloc.lock);
374	if (!rep) {
375		lock_rw_unlock(&lru_entry->lock);
376		log_err("Subnet cache insertion failed");
377		return;
378	}
379
380	/* store RRsets */
381	for(i=0; i<rep->rrset_count; i++) {
382		rep->ref[i].key = rep->rrsets[i];
383		rep->ref[i].id = rep->rrsets[i]->id;
384	}
385	reply_info_set_ttls(rep, *qstate->env->now);
386	rep->flags |= (BIT_RA | BIT_QR); /* fix flags to be sensible for */
387	rep->flags &= ~(BIT_AA | BIT_CD);/* a reply based on the cache   */
388	addrtree_insert(tree, (addrkey_t*)edns->subnet_addr,
389		edns->subnet_source_mask, sq->max_scope, rep,
390		rep->ttl, *qstate->env->now);
391
392	lock_rw_unlock(&lru_entry->lock);
393	if (need_to_insert) {
394		slabhash_insert(subnet_msg_cache, h, lru_entry, lru_entry->data,
395			NULL);
396	}
397}
398
399/** Lookup in cache and reply true iff reply is sent. */
400static int
401lookup_and_reply(struct module_qstate *qstate, int id, struct subnet_qstate *sq)
402{
403	struct lruhash_entry *e;
404	struct module_env *env = qstate->env;
405	struct subnet_env *sne = (struct subnet_env*)env->modinfo[id];
406	hashvalue_type h = query_info_hash(&qstate->qinfo, qstate->query_flags);
407	struct subnet_msg_cache_data *data;
408	struct ecs_data *ecs = &sq->ecs_client_in;
409	struct addrtree *tree;
410	struct addrnode *node;
411	uint8_t scope;
412
413	memset(&sq->ecs_client_out, 0, sizeof(sq->ecs_client_out));
414
415	if (sq) sq->qinfo_hash = h; /* Might be useful on cache miss */
416	e = slabhash_lookup(sne->subnet_msg_cache, h, &qstate->qinfo, 1);
417	if (!e) return 0; /* qinfo not in cache */
418	data = e->data;
419	tree = (ecs->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4)?
420		data->tree4 : data->tree6;
421	if (!tree) { /* qinfo in cache but not for this family */
422		lock_rw_unlock(&e->lock);
423		return 0;
424	}
425	node = addrtree_find(tree, (addrkey_t*)ecs->subnet_addr,
426		ecs->subnet_source_mask, *env->now);
427	if (!node) { /* plain old cache miss */
428		lock_rw_unlock(&e->lock);
429		return 0;
430	}
431
432	qstate->return_msg = tomsg(NULL, &qstate->qinfo,
433		(struct reply_info *)node->elem, qstate->region, *env->now, 0,
434		env->scratch);
435	scope = (uint8_t)node->scope;
436	lock_rw_unlock(&e->lock);
437
438	if (!qstate->return_msg) { /* Failed allocation or expired TTL */
439		return 0;
440	}
441
442	if (sq->subnet_downstream) { /* relay to interested client */
443		sq->ecs_client_out.subnet_scope_mask = scope;
444		sq->ecs_client_out.subnet_addr_fam = ecs->subnet_addr_fam;
445		sq->ecs_client_out.subnet_source_mask = ecs->subnet_source_mask;
446		memcpy(&sq->ecs_client_out.subnet_addr, &ecs->subnet_addr,
447			INET6_SIZE);
448		sq->ecs_client_out.subnet_validdata = 1;
449	}
450	return 1;
451}
452
453/**
454 * Test first bits of addresses for equality. Caller is responsible
455 * for making sure that both a and b are at least net/8 octets long.
456 * @param a: first address.
457 * @param a: seconds address.
458 * @param net: Number of bits to test.
459 * @return: 1 if equal, 0 otherwise.
460 */
461static int
462common_prefix(uint8_t *a, uint8_t *b, uint8_t net)
463{
464	size_t n = (size_t)net / 8;
465	return !memcmp(a, b, n) && ((net % 8) == 0 || a[n] == b[n]);
466}
467
468static enum module_ext_state
469eval_response(struct module_qstate *qstate, int id, struct subnet_qstate *sq)
470{
471	struct subnet_env *sne = qstate->env->modinfo[id];
472
473	struct ecs_data *c_in  = &sq->ecs_client_in; /* rcvd from client */
474	struct ecs_data *c_out = &sq->ecs_client_out;/* will send to client */
475	struct ecs_data *s_in  = &sq->ecs_server_in; /* rcvd from auth */
476	struct ecs_data *s_out = &sq->ecs_server_out;/* sent to auth */
477
478	memset(c_out, 0, sizeof(*c_out));
479
480	if (!qstate->return_msg) {
481		/* already an answer and its not a message, but retain
482		 * the actual rcode, instead of module_error, so send
483		 * module_finished */
484		return module_finished;
485	}
486
487	/* We have not asked for subnet data */
488	if (!sq->subnet_sent) {
489		if (s_in->subnet_validdata)
490			verbose(VERB_QUERY, "subnet: received spurious data");
491		if (sq->subnet_downstream) /* Copy back to client */
492			cp_edns_bad_response(c_out, c_in);
493		return module_finished;
494	}
495
496	/* subnet sent but nothing came back */
497	if (!s_in->subnet_validdata) {
498		/* The authority indicated no support for edns subnet. As a
499		 * consequence the answer ended up in the regular cache. It
500		 * is still usefull to put it in the edns subnet cache for
501		 * when a client explicitly asks for subnet specific answer. */
502		verbose(VERB_QUERY, "subnet: Authority indicates no support");
503		if(!sq->started_no_cache_store) {
504			lock_rw_wrlock(&sne->biglock);
505			update_cache(qstate, id);
506			lock_rw_unlock(&sne->biglock);
507		}
508		if (sq->subnet_downstream)
509			cp_edns_bad_response(c_out, c_in);
510		return module_finished;
511	}
512
513	/* Being here means we have asked for and got a subnet specific
514	 * answer. Also, the answer from the authority is not yet cached
515	 * anywhere. */
516
517	/* can we accept response? */
518	if(s_out->subnet_addr_fam != s_in->subnet_addr_fam ||
519		s_out->subnet_source_mask != s_in->subnet_source_mask ||
520		!common_prefix(s_out->subnet_addr, s_in->subnet_addr,
521			s_out->subnet_source_mask))
522	{
523		/* we can not accept, restart query without option */
524		verbose(VERB_QUERY, "subnet: forged data");
525		s_out->subnet_validdata = 0;
526		(void)edns_opt_list_remove(&qstate->edns_opts_back_out,
527			qstate->env->cfg->client_subnet_opcode);
528		sq->subnet_sent = 0;
529		return module_restart_next;
530	}
531
532	lock_rw_wrlock(&sne->biglock);
533	if(!sq->started_no_cache_store) {
534		update_cache(qstate, id);
535	}
536	sne->num_msg_nocache++;
537	lock_rw_unlock(&sne->biglock);
538
539	if (sq->subnet_downstream) {
540		/* Client wants to see the answer, echo option back
541		 * and adjust the scope. */
542		c_out->subnet_addr_fam = c_in->subnet_addr_fam;
543		c_out->subnet_source_mask = c_in->subnet_source_mask;
544		memcpy(&c_out->subnet_addr, &c_in->subnet_addr, INET6_SIZE);
545		c_out->subnet_scope_mask = sq->max_scope;
546		/* Limit scope returned to client to scope used for caching. */
547		if(c_out->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4) {
548			if(c_out->subnet_scope_mask >
549				qstate->env->cfg->max_client_subnet_ipv4) {
550				c_out->subnet_scope_mask =
551					qstate->env->cfg->max_client_subnet_ipv4;
552			}
553		}
554		else if(c_out->subnet_scope_mask >
555				qstate->env->cfg->max_client_subnet_ipv6) {
556				c_out->subnet_scope_mask =
557					qstate->env->cfg->max_client_subnet_ipv6;
558		}
559		c_out->subnet_validdata = 1;
560	}
561	return module_finished;
562}
563
564/** Parse EDNS opt data containing ECS */
565static int
566parse_subnet_option(struct edns_option* ecs_option, struct ecs_data* ecs)
567{
568	memset(ecs, 0, sizeof(*ecs));
569	if (ecs_option->opt_len < 4)
570		return 0;
571
572	ecs->subnet_addr_fam = sldns_read_uint16(ecs_option->opt_data);
573	ecs->subnet_source_mask = ecs_option->opt_data[2];
574	ecs->subnet_scope_mask = ecs_option->opt_data[3];
575	/* remaining bytes indicate address */
576
577	/* validate input*/
578	/* option length matches calculated length? */
579	if (ecs_option->opt_len != (size_t)((ecs->subnet_source_mask+7)/8 + 4))
580		return 0;
581	if (ecs_option->opt_len - 4 > INET6_SIZE || ecs_option->opt_len == 0)
582		return 0;
583	if (ecs->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4) {
584		if (ecs->subnet_source_mask > 32 || ecs->subnet_scope_mask > 32)
585			return 0;
586	} else if (ecs->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP6) {
587		if (ecs->subnet_source_mask > 128 ||
588			ecs->subnet_scope_mask > 128)
589			return 0;
590	} else
591		return 0;
592
593	/* valid ECS data, write to ecs_data */
594	if (copy_clear(ecs->subnet_addr, INET6_SIZE, ecs_option->opt_data + 4,
595		ecs_option->opt_len - 4, ecs->subnet_source_mask))
596		return 0;
597	ecs->subnet_validdata = 1;
598	return 1;
599}
600
601static void
602subnet_option_from_ss(struct sockaddr_storage *ss, struct ecs_data* ecs,
603	struct config_file* cfg)
604{
605	void* sinaddr;
606
607	/* Construct subnet option from original query */
608	if(((struct sockaddr_in*)ss)->sin_family == AF_INET) {
609		ecs->subnet_source_mask = cfg->max_client_subnet_ipv4;
610		ecs->subnet_addr_fam = EDNSSUBNET_ADDRFAM_IP4;
611		sinaddr = &((struct sockaddr_in*)ss)->sin_addr;
612		if (!copy_clear( ecs->subnet_addr, INET6_SIZE,
613			(uint8_t *)sinaddr, INET_SIZE,
614			ecs->subnet_source_mask)) {
615			ecs->subnet_validdata = 1;
616		}
617	}
618#ifdef INET6
619	else {
620		ecs->subnet_source_mask = cfg->max_client_subnet_ipv6;
621		ecs->subnet_addr_fam = EDNSSUBNET_ADDRFAM_IP6;
622		sinaddr = &((struct sockaddr_in6*)ss)->sin6_addr;
623		if (!copy_clear( ecs->subnet_addr, INET6_SIZE,
624			(uint8_t *)sinaddr, INET6_SIZE,
625			ecs->subnet_source_mask)) {
626			ecs->subnet_validdata = 1;
627		}
628	}
629#else
630			/* We don't know how to handle ip6, just pass */
631#endif /* INET6 */
632}
633
634int
635ecs_query_response(struct module_qstate* qstate, struct dns_msg* response,
636	int id, void* ATTR_UNUSED(cbargs))
637{
638	struct subnet_qstate *sq;
639
640	if(!response || !(sq=(struct subnet_qstate*)qstate->minfo[id]))
641		return 1;
642
643	if(sq->subnet_sent &&
644		FLAGS_GET_RCODE(response->rep->flags) == LDNS_RCODE_REFUSED) {
645		/* REFUSED response to ECS query, remove ECS option. */
646		edns_opt_list_remove(&qstate->edns_opts_back_out,
647			qstate->env->cfg->client_subnet_opcode);
648		sq->subnet_sent = 0;
649		memset(&sq->ecs_server_out, 0, sizeof(sq->ecs_server_out));
650	} else if (!sq->track_max_scope &&
651		FLAGS_GET_RCODE(response->rep->flags) == LDNS_RCODE_NOERROR &&
652		response->rep->an_numrrsets > 0
653		) {
654		struct ub_packed_rrset_key* s = response->rep->rrsets[0];
655		if(ntohs(s->rk.type) == LDNS_RR_TYPE_CNAME &&
656			query_dname_compare(qstate->qinfo.qname,
657			s->rk.dname) == 0) {
658			/* CNAME response for QNAME. From now on keep track of
659			 * longest received ECS prefix for all queries on this
660			 * qstate. */
661			sq->track_max_scope = 1;
662		}
663	}
664	return 1;
665}
666
667int
668ecs_edns_back_parsed(struct module_qstate* qstate, int id,
669	void* ATTR_UNUSED(cbargs))
670{
671	struct subnet_qstate *sq;
672	struct edns_option* ecs_opt;
673
674	if(!(sq=(struct subnet_qstate*)qstate->minfo[id]))
675		return 1;
676	if((ecs_opt = edns_opt_list_find(
677		qstate->edns_opts_back_in,
678		qstate->env->cfg->client_subnet_opcode)) &&
679		parse_subnet_option(ecs_opt, &sq->ecs_server_in) &&
680		sq->subnet_sent && sq->ecs_server_in.subnet_validdata) {
681			/* Only skip global cache store if we sent an ECS option
682			 * and received one back. Answers from non-whitelisted
683			 * servers will end up in global cache. Answers for
684			 * queries with 0 source will not (unless nameserver
685			 * does not support ECS). */
686			qstate->no_cache_store = 1;
687			if(!sq->track_max_scope || (sq->track_max_scope &&
688				sq->ecs_server_in.subnet_scope_mask >
689				sq->max_scope))
690				sq->max_scope = sq->ecs_server_in.subnet_scope_mask;
691	}
692
693	return 1;
694}
695
696void
697subnetmod_operate(struct module_qstate *qstate, enum module_ev event,
698	int id, struct outbound_entry* outbound)
699{
700	struct subnet_env *sne = qstate->env->modinfo[id];
701	struct subnet_qstate *sq = (struct subnet_qstate*)qstate->minfo[id];
702
703	verbose(VERB_QUERY, "subnet[module %d] operate: extstate:%s "
704		"event:%s", id, strextstate(qstate->ext_state[id]),
705		strmodulevent(event));
706	log_query_info(VERB_QUERY, "subnet operate: query", &qstate->qinfo);
707
708	if((event == module_event_new || event == module_event_pass) &&
709		sq == NULL) {
710		struct edns_option* ecs_opt;
711		if(!subnet_new_qstate(qstate, id)) {
712			qstate->return_msg = NULL;
713			qstate->ext_state[id] = module_finished;
714			return;
715		}
716
717		sq = (struct subnet_qstate*)qstate->minfo[id];
718
719		if((ecs_opt = edns_opt_list_find(
720			qstate->edns_opts_front_in,
721			qstate->env->cfg->client_subnet_opcode))) {
722			if(!parse_subnet_option(ecs_opt, &sq->ecs_client_in)) {
723				/* Wrongly formatted ECS option. RFC mandates to
724				 * return FORMERROR. */
725				qstate->return_rcode = LDNS_RCODE_FORMERR;
726				qstate->ext_state[id] = module_finished;
727				return;
728			}
729			sq->subnet_downstream = 1;
730		}
731		else if(qstate->mesh_info->reply_list) {
732			subnet_option_from_ss(
733				&qstate->mesh_info->reply_list->query_reply.addr,
734				&sq->ecs_client_in, qstate->env->cfg);
735		}
736
737		if(sq->ecs_client_in.subnet_validdata == 0) {
738			/* No clients are interested in result or we could not
739			 * parse it, we don't do client subnet */
740			sq->ecs_server_out.subnet_validdata = 0;
741			verbose(VERB_ALGO, "subnet: pass to next module");
742			qstate->ext_state[id] = module_wait_module;
743			return;
744		}
745
746		/* Limit to minimum allowed source mask */
747		if(sq->ecs_client_in.subnet_source_mask != 0 && (
748			(sq->ecs_client_in.subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4 &&
749			 sq->ecs_client_in.subnet_source_mask < qstate->env->cfg->min_client_subnet_ipv4) ||
750			(sq->ecs_client_in.subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP6 &&
751			 sq->ecs_client_in.subnet_source_mask < qstate->env->cfg->min_client_subnet_ipv6))) {
752				qstate->return_rcode = LDNS_RCODE_REFUSED;
753				qstate->ext_state[id] = module_finished;
754				return;
755		}
756
757		lock_rw_wrlock(&sne->biglock);
758		if (lookup_and_reply(qstate, id, sq)) {
759			sne->num_msg_cache++;
760			lock_rw_unlock(&sne->biglock);
761			verbose(VERB_QUERY, "subnet: answered from cache");
762			qstate->ext_state[id] = module_finished;
763
764			ecs_opt_list_append(&sq->ecs_client_out,
765				&qstate->edns_opts_front_out, qstate);
766			return;
767		}
768		lock_rw_unlock(&sne->biglock);
769
770		sq->ecs_server_out.subnet_addr_fam =
771			sq->ecs_client_in.subnet_addr_fam;
772		sq->ecs_server_out.subnet_source_mask =
773			sq->ecs_client_in.subnet_source_mask;
774		/* Limit source prefix to configured maximum */
775		if(sq->ecs_server_out.subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4
776			&& sq->ecs_server_out.subnet_source_mask >
777			qstate->env->cfg->max_client_subnet_ipv4)
778			sq->ecs_server_out.subnet_source_mask =
779				qstate->env->cfg->max_client_subnet_ipv4;
780		else if(sq->ecs_server_out.subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP6
781			&& sq->ecs_server_out.subnet_source_mask >
782			qstate->env->cfg->max_client_subnet_ipv6)
783			sq->ecs_server_out.subnet_source_mask =
784				qstate->env->cfg->max_client_subnet_ipv6;
785		/* Safe to copy completely, even if the source is limited by the
786		 * configuration. ecs_opt_list_append() will limit the address.
787		 * */
788		memcpy(&sq->ecs_server_out.subnet_addr,
789			sq->ecs_client_in.subnet_addr, INET6_SIZE);
790		sq->ecs_server_out.subnet_scope_mask = 0;
791		sq->ecs_server_out.subnet_validdata = 1;
792		if(sq->ecs_server_out.subnet_source_mask != 0 &&
793			qstate->env->cfg->client_subnet_always_forward &&
794			sq->subnet_downstream)
795			/* ECS specific data required, do not look at the global
796			 * cache in other modules. */
797			qstate->no_cache_lookup = 1;
798
799		/* pass request to next module */
800		verbose(VERB_ALGO,
801			"subnet: not found in cache. pass to next module");
802		qstate->ext_state[id] = module_wait_module;
803		return;
804	}
805	/* Query handed back by next module, we have a 'final' answer */
806	if(sq && event == module_event_moddone) {
807		qstate->ext_state[id] = eval_response(qstate, id, sq);
808		if(qstate->ext_state[id] == module_finished &&
809			qstate->return_msg) {
810			ecs_opt_list_append(&sq->ecs_client_out,
811				&qstate->edns_opts_front_out, qstate);
812		}
813		qstate->no_cache_store = sq->started_no_cache_store;
814		return;
815	}
816	if(sq && outbound) {
817		return;
818	}
819	/* We are being revisited */
820	if(event == module_event_pass || event == module_event_new) {
821		/* Just pass it on, we already did the work */
822		verbose(VERB_ALGO, "subnet: pass to next module");
823		qstate->ext_state[id] = module_wait_module;
824		return;
825	}
826	if(!sq && (event == module_event_moddone)) {
827		/* during priming, module done but we never started */
828		qstate->ext_state[id] = module_finished;
829		return;
830	}
831	log_err("subnet: bad event %s", strmodulevent(event));
832	qstate->ext_state[id] = module_error;
833	return;
834}
835
836void
837subnetmod_clear(struct module_qstate *ATTR_UNUSED(qstate),
838	int ATTR_UNUSED(id))
839{
840	/* qstate has no data outside region */
841}
842
843void
844subnetmod_inform_super(struct module_qstate *ATTR_UNUSED(qstate),
845	int ATTR_UNUSED(id), struct module_qstate *ATTR_UNUSED(super))
846{
847	/* Not used */
848}
849
850size_t
851subnetmod_get_mem(struct module_env *env, int id)
852{
853	struct subnet_env *sn_env = env->modinfo[id];
854	if (!sn_env) return 0;
855	return sizeof(*sn_env) +
856		slabhash_get_mem(sn_env->subnet_msg_cache) +
857		ecs_whitelist_get_mem(sn_env->whitelist);
858}
859
860/**
861 * The module function block
862 */
863static struct module_func_block subnetmod_block = {
864	"subnet", &subnetmod_init, &subnetmod_deinit, &subnetmod_operate,
865	&subnetmod_inform_super, &subnetmod_clear, &subnetmod_get_mem
866};
867
868struct module_func_block*
869subnetmod_get_funcblock(void)
870{
871	return &subnetmod_block;
872}
873
874/** Wrappers for static functions to unit test */
875size_t
876unittest_wrapper_subnetmod_sizefunc(void *elemptr)
877{
878	return sizefunc(elemptr);
879}
880
881#endif  /* CLIENT_SUBNET */
882