mesh.c revision 269257
1/*
2 * services/mesh.c - deal with mesh of query states and handle events for that.
3 *
4 * Copyright (c) 2007, NLnet Labs. All rights reserved.
5 *
6 * This software is open source.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
14 *
15 * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 *
19 * Neither the name of the NLNET LABS nor the names of its contributors may
20 * be used to endorse or promote products derived from this software without
21 * specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35
36/**
37 * \file
38 *
39 * This file contains functions to assist in dealing with a mesh of
40 * query states. This mesh is supposed to be thread-specific.
41 * It consists of query states (per qname, qtype, qclass) and connections
42 * between query states and the super and subquery states, and replies to
43 * send back to clients.
44 */
45#include "config.h"
46#include "services/mesh.h"
47#include "services/outbound_list.h"
48#include "services/cache/dns.h"
49#include "util/log.h"
50#include "util/net_help.h"
51#include "util/module.h"
52#include "util/regional.h"
53#include "util/data/msgencode.h"
54#include "util/timehist.h"
55#include "util/fptr_wlist.h"
56#include "util/alloc.h"
57#include "util/config_file.h"
58#include "ldns/sbuffer.h"
59
60/** subtract timers and the values do not overflow or become negative */
61static void
62timeval_subtract(struct timeval* d, const struct timeval* end, const struct timeval* start)
63{
64#ifndef S_SPLINT_S
65	time_t end_usec = end->tv_usec;
66	d->tv_sec = end->tv_sec - start->tv_sec;
67	if(end_usec < start->tv_usec) {
68		end_usec += 1000000;
69		d->tv_sec--;
70	}
71	d->tv_usec = end_usec - start->tv_usec;
72#endif
73}
74
75/** add timers and the values do not overflow or become negative */
76static void
77timeval_add(struct timeval* d, const struct timeval* add)
78{
79#ifndef S_SPLINT_S
80	d->tv_sec += add->tv_sec;
81	d->tv_usec += add->tv_usec;
82	if(d->tv_usec > 1000000 ) {
83		d->tv_usec -= 1000000;
84		d->tv_sec++;
85	}
86#endif
87}
88
89/** divide sum of timers to get average */
90static void
91timeval_divide(struct timeval* avg, const struct timeval* sum, size_t d)
92{
93#ifndef S_SPLINT_S
94	size_t leftover;
95	if(d == 0) {
96		avg->tv_sec = 0;
97		avg->tv_usec = 0;
98		return;
99	}
100	avg->tv_sec = sum->tv_sec / d;
101	avg->tv_usec = sum->tv_usec / d;
102	/* handle fraction from seconds divide */
103	leftover = sum->tv_sec - avg->tv_sec*d;
104	avg->tv_usec += (leftover*1000000)/d;
105#endif
106}
107
108/** histogram compare of time values */
109static int
110timeval_smaller(const struct timeval* x, const struct timeval* y)
111{
112#ifndef S_SPLINT_S
113	if(x->tv_sec < y->tv_sec)
114		return 1;
115	else if(x->tv_sec == y->tv_sec) {
116		if(x->tv_usec <= y->tv_usec)
117			return 1;
118		else	return 0;
119	}
120	else	return 0;
121#endif
122}
123
124int
125mesh_state_compare(const void* ap, const void* bp)
126{
127	struct mesh_state* a = (struct mesh_state*)ap;
128	struct mesh_state* b = (struct mesh_state*)bp;
129
130	if(a->s.is_priming && !b->s.is_priming)
131		return -1;
132	if(!a->s.is_priming && b->s.is_priming)
133		return 1;
134
135	if((a->s.query_flags&BIT_RD) && !(b->s.query_flags&BIT_RD))
136		return -1;
137	if(!(a->s.query_flags&BIT_RD) && (b->s.query_flags&BIT_RD))
138		return 1;
139
140	if((a->s.query_flags&BIT_CD) && !(b->s.query_flags&BIT_CD))
141		return -1;
142	if(!(a->s.query_flags&BIT_CD) && (b->s.query_flags&BIT_CD))
143		return 1;
144
145	return query_info_compare(&a->s.qinfo, &b->s.qinfo);
146}
147
148int
149mesh_state_ref_compare(const void* ap, const void* bp)
150{
151	struct mesh_state_ref* a = (struct mesh_state_ref*)ap;
152	struct mesh_state_ref* b = (struct mesh_state_ref*)bp;
153	return mesh_state_compare(a->s, b->s);
154}
155
156struct mesh_area*
157mesh_create(struct module_stack* stack, struct module_env* env)
158{
159	struct mesh_area* mesh = calloc(1, sizeof(struct mesh_area));
160	if(!mesh) {
161		log_err("mesh area alloc: out of memory");
162		return NULL;
163	}
164	mesh->histogram = timehist_setup();
165	mesh->qbuf_bak = sldns_buffer_new(env->cfg->msg_buffer_size);
166	if(!mesh->histogram || !mesh->qbuf_bak) {
167		free(mesh);
168		log_err("mesh area alloc: out of memory");
169		return NULL;
170	}
171	mesh->mods = *stack;
172	mesh->env = env;
173	rbtree_init(&mesh->run, &mesh_state_compare);
174	rbtree_init(&mesh->all, &mesh_state_compare);
175	mesh->num_reply_addrs = 0;
176	mesh->num_reply_states = 0;
177	mesh->num_detached_states = 0;
178	mesh->num_forever_states = 0;
179	mesh->stats_jostled = 0;
180	mesh->stats_dropped = 0;
181	mesh->max_reply_states = env->cfg->num_queries_per_thread;
182	mesh->max_forever_states = (mesh->max_reply_states+1)/2;
183#ifndef S_SPLINT_S
184	mesh->jostle_max.tv_sec = (time_t)(env->cfg->jostle_time / 1000);
185	mesh->jostle_max.tv_usec = (time_t)((env->cfg->jostle_time % 1000)
186		*1000);
187#endif
188	return mesh;
189}
190
191/** help mesh delete delete mesh states */
192static void
193mesh_delete_helper(rbnode_t* n)
194{
195	struct mesh_state* mstate = (struct mesh_state*)n->key;
196	/* perform a full delete, not only 'cleanup' routine,
197	 * because other callbacks expect a clean state in the mesh.
198	 * For 're-entrant' calls */
199	mesh_state_delete(&mstate->s);
200	/* but because these delete the items from the tree, postorder
201	 * traversal and rbtree rebalancing do not work together */
202}
203
204void
205mesh_delete(struct mesh_area* mesh)
206{
207	if(!mesh)
208		return;
209	/* free all query states */
210	while(mesh->all.count)
211		mesh_delete_helper(mesh->all.root);
212	timehist_delete(mesh->histogram);
213	sldns_buffer_free(mesh->qbuf_bak);
214	free(mesh);
215}
216
217void
218mesh_delete_all(struct mesh_area* mesh)
219{
220	/* free all query states */
221	while(mesh->all.count)
222		mesh_delete_helper(mesh->all.root);
223	mesh->stats_dropped += mesh->num_reply_addrs;
224	/* clear mesh area references */
225	rbtree_init(&mesh->run, &mesh_state_compare);
226	rbtree_init(&mesh->all, &mesh_state_compare);
227	mesh->num_reply_addrs = 0;
228	mesh->num_reply_states = 0;
229	mesh->num_detached_states = 0;
230	mesh->num_forever_states = 0;
231	mesh->forever_first = NULL;
232	mesh->forever_last = NULL;
233	mesh->jostle_first = NULL;
234	mesh->jostle_last = NULL;
235}
236
237int mesh_make_new_space(struct mesh_area* mesh, sldns_buffer* qbuf)
238{
239	struct mesh_state* m = mesh->jostle_first;
240	/* free space is available */
241	if(mesh->num_reply_states < mesh->max_reply_states)
242		return 1;
243	/* try to kick out a jostle-list item */
244	if(m && m->reply_list && m->list_select == mesh_jostle_list) {
245		/* how old is it? */
246		struct timeval age;
247		timeval_subtract(&age, mesh->env->now_tv,
248			&m->reply_list->start_time);
249		if(timeval_smaller(&mesh->jostle_max, &age)) {
250			/* its a goner */
251			log_nametypeclass(VERB_ALGO, "query jostled out to "
252				"make space for a new one",
253				m->s.qinfo.qname, m->s.qinfo.qtype,
254				m->s.qinfo.qclass);
255			/* backup the query */
256			if(qbuf) sldns_buffer_copy(mesh->qbuf_bak, qbuf);
257			/* notify supers */
258			if(m->super_set.count > 0) {
259				verbose(VERB_ALGO, "notify supers of failure");
260				m->s.return_msg = NULL;
261				m->s.return_rcode = LDNS_RCODE_SERVFAIL;
262				mesh_walk_supers(mesh, m);
263			}
264			mesh->stats_jostled ++;
265			mesh_state_delete(&m->s);
266			/* restore the query - note that the qinfo ptr to
267			 * the querybuffer is then correct again. */
268			if(qbuf) sldns_buffer_copy(qbuf, mesh->qbuf_bak);
269			return 1;
270		}
271	}
272	/* no space for new item */
273	return 0;
274}
275
276void mesh_new_client(struct mesh_area* mesh, struct query_info* qinfo,
277        uint16_t qflags, struct edns_data* edns, struct comm_reply* rep,
278        uint16_t qid)
279{
280	/* do not use CD flag from user for mesh state, we want the CD-query
281	 * to receive validation anyway, to protect out cache contents and
282	 * avoid bad-data in this cache that a downstream validator cannot
283	 * remove from this cache */
284	struct mesh_state* s = mesh_area_find(mesh, qinfo, qflags&BIT_RD, 0);
285	int was_detached = 0;
286	int was_noreply = 0;
287	int added = 0;
288	/* does this create a new reply state? */
289	if(!s || s->list_select == mesh_no_list) {
290		if(!mesh_make_new_space(mesh, rep->c->buffer)) {
291			verbose(VERB_ALGO, "Too many queries. dropping "
292				"incoming query.");
293			comm_point_drop_reply(rep);
294			mesh->stats_dropped ++;
295			return;
296		}
297		/* for this new reply state, the reply address is free,
298		 * so the limit of reply addresses does not stop reply states*/
299	} else {
300		/* protect our memory usage from storing reply addresses */
301		if(mesh->num_reply_addrs > mesh->max_reply_states*16) {
302			verbose(VERB_ALGO, "Too many requests queued. "
303				"dropping incoming query.");
304			mesh->stats_dropped++;
305			comm_point_drop_reply(rep);
306			return;
307		}
308	}
309	/* see if it already exists, if not, create one */
310	if(!s) {
311#ifdef UNBOUND_DEBUG
312		struct rbnode_t* n;
313#endif
314		s = mesh_state_create(mesh->env, qinfo, qflags&BIT_RD, 0);
315		if(!s) {
316			log_err("mesh_state_create: out of memory; SERVFAIL");
317			error_encode(rep->c->buffer, LDNS_RCODE_SERVFAIL,
318				qinfo, qid, qflags, edns);
319			comm_point_send_reply(rep);
320			return;
321		}
322#ifdef UNBOUND_DEBUG
323		n =
324#else
325		(void)
326#endif
327		rbtree_insert(&mesh->all, &s->node);
328		log_assert(n != NULL);
329		/* set detached (it is now) */
330		mesh->num_detached_states++;
331		added = 1;
332	}
333	if(!s->reply_list && !s->cb_list && s->super_set.count == 0)
334		was_detached = 1;
335	if(!s->reply_list && !s->cb_list)
336		was_noreply = 1;
337	/* add reply to s */
338	if(!mesh_state_add_reply(s, edns, rep, qid, qflags, qinfo->qname)) {
339			log_err("mesh_new_client: out of memory; SERVFAIL");
340			error_encode(rep->c->buffer, LDNS_RCODE_SERVFAIL,
341				qinfo, qid, qflags, edns);
342			comm_point_send_reply(rep);
343			if(added)
344				mesh_state_delete(&s->s);
345			return;
346	}
347	/* update statistics */
348	if(was_detached) {
349		log_assert(mesh->num_detached_states > 0);
350		mesh->num_detached_states--;
351	}
352	if(was_noreply) {
353		mesh->num_reply_states ++;
354	}
355	mesh->num_reply_addrs++;
356	if(s->list_select == mesh_no_list) {
357		/* move to either the forever or the jostle_list */
358		if(mesh->num_forever_states < mesh->max_forever_states) {
359			mesh->num_forever_states ++;
360			mesh_list_insert(s, &mesh->forever_first,
361				&mesh->forever_last);
362			s->list_select = mesh_forever_list;
363		} else {
364			mesh_list_insert(s, &mesh->jostle_first,
365				&mesh->jostle_last);
366			s->list_select = mesh_jostle_list;
367		}
368	}
369	if(added)
370		mesh_run(mesh, s, module_event_new, NULL);
371}
372
373int
374mesh_new_callback(struct mesh_area* mesh, struct query_info* qinfo,
375	uint16_t qflags, struct edns_data* edns, sldns_buffer* buf,
376	uint16_t qid, mesh_cb_func_t cb, void* cb_arg)
377{
378	struct mesh_state* s = mesh_area_find(mesh, qinfo, qflags&BIT_RD, 0);
379	int was_detached = 0;
380	int was_noreply = 0;
381	int added = 0;
382	/* there are no limits on the number of callbacks */
383
384	/* see if it already exists, if not, create one */
385	if(!s) {
386#ifdef UNBOUND_DEBUG
387		struct rbnode_t* n;
388#endif
389		s = mesh_state_create(mesh->env, qinfo, qflags&BIT_RD, 0);
390		if(!s) {
391			return 0;
392		}
393#ifdef UNBOUND_DEBUG
394		n =
395#else
396		(void)
397#endif
398		rbtree_insert(&mesh->all, &s->node);
399		log_assert(n != NULL);
400		/* set detached (it is now) */
401		mesh->num_detached_states++;
402		added = 1;
403	}
404	if(!s->reply_list && !s->cb_list && s->super_set.count == 0)
405		was_detached = 1;
406	if(!s->reply_list && !s->cb_list)
407		was_noreply = 1;
408	/* add reply to s */
409	if(!mesh_state_add_cb(s, edns, buf, cb, cb_arg, qid, qflags)) {
410			if(added)
411				mesh_state_delete(&s->s);
412			return 0;
413	}
414	/* update statistics */
415	if(was_detached) {
416		log_assert(mesh->num_detached_states > 0);
417		mesh->num_detached_states--;
418	}
419	if(was_noreply) {
420		mesh->num_reply_states ++;
421	}
422	mesh->num_reply_addrs++;
423	if(added)
424		mesh_run(mesh, s, module_event_new, NULL);
425	return 1;
426}
427
428void mesh_new_prefetch(struct mesh_area* mesh, struct query_info* qinfo,
429        uint16_t qflags, time_t leeway)
430{
431	struct mesh_state* s = mesh_area_find(mesh, qinfo, qflags&BIT_RD, 0);
432#ifdef UNBOUND_DEBUG
433	struct rbnode_t* n;
434#endif
435	/* already exists, and for a different purpose perhaps.
436	 * if mesh_no_list, keep it that way. */
437	if(s) {
438		/* make it ignore the cache from now on */
439		if(!s->s.blacklist)
440			sock_list_insert(&s->s.blacklist, NULL, 0, s->s.region);
441		if(s->s.prefetch_leeway < leeway)
442			s->s.prefetch_leeway = leeway;
443		return;
444	}
445	if(!mesh_make_new_space(mesh, NULL)) {
446		verbose(VERB_ALGO, "Too many queries. dropped prefetch.");
447		mesh->stats_dropped ++;
448		return;
449	}
450	s = mesh_state_create(mesh->env, qinfo, qflags&BIT_RD, 0);
451	if(!s) {
452		log_err("prefetch mesh_state_create: out of memory");
453		return;
454	}
455#ifdef UNBOUND_DEBUG
456	n =
457#else
458	(void)
459#endif
460	rbtree_insert(&mesh->all, &s->node);
461	log_assert(n != NULL);
462	/* set detached (it is now) */
463	mesh->num_detached_states++;
464	/* make it ignore the cache */
465	sock_list_insert(&s->s.blacklist, NULL, 0, s->s.region);
466	s->s.prefetch_leeway = leeway;
467
468	if(s->list_select == mesh_no_list) {
469		/* move to either the forever or the jostle_list */
470		if(mesh->num_forever_states < mesh->max_forever_states) {
471			mesh->num_forever_states ++;
472			mesh_list_insert(s, &mesh->forever_first,
473				&mesh->forever_last);
474			s->list_select = mesh_forever_list;
475		} else {
476			mesh_list_insert(s, &mesh->jostle_first,
477				&mesh->jostle_last);
478			s->list_select = mesh_jostle_list;
479		}
480	}
481	mesh_run(mesh, s, module_event_new, NULL);
482}
483
484void mesh_report_reply(struct mesh_area* mesh, struct outbound_entry* e,
485        struct comm_reply* reply, int what)
486{
487	enum module_ev event = module_event_reply;
488	e->qstate->reply = reply;
489	if(what != NETEVENT_NOERROR) {
490		event = module_event_noreply;
491		if(what == NETEVENT_CAPSFAIL)
492			event = module_event_capsfail;
493	}
494	mesh_run(mesh, e->qstate->mesh_info, event, e);
495}
496
497struct mesh_state*
498mesh_state_create(struct module_env* env, struct query_info* qinfo,
499	uint16_t qflags, int prime)
500{
501	struct regional* region = alloc_reg_obtain(env->alloc);
502	struct mesh_state* mstate;
503	int i;
504	if(!region)
505		return NULL;
506	mstate = (struct mesh_state*)regional_alloc(region,
507		sizeof(struct mesh_state));
508	if(!mstate) {
509		alloc_reg_release(env->alloc, region);
510		return NULL;
511	}
512	memset(mstate, 0, sizeof(*mstate));
513	mstate->node = *RBTREE_NULL;
514	mstate->run_node = *RBTREE_NULL;
515	mstate->node.key = mstate;
516	mstate->run_node.key = mstate;
517	mstate->reply_list = NULL;
518	mstate->list_select = mesh_no_list;
519	mstate->replies_sent = 0;
520	rbtree_init(&mstate->super_set, &mesh_state_ref_compare);
521	rbtree_init(&mstate->sub_set, &mesh_state_ref_compare);
522	mstate->num_activated = 0;
523	/* init module qstate */
524	mstate->s.qinfo.qtype = qinfo->qtype;
525	mstate->s.qinfo.qclass = qinfo->qclass;
526	mstate->s.qinfo.qname_len = qinfo->qname_len;
527	mstate->s.qinfo.qname = regional_alloc_init(region, qinfo->qname,
528		qinfo->qname_len);
529	if(!mstate->s.qinfo.qname) {
530		alloc_reg_release(env->alloc, region);
531		return NULL;
532	}
533	/* remove all weird bits from qflags */
534	mstate->s.query_flags = (qflags & (BIT_RD|BIT_CD));
535	mstate->s.is_priming = prime;
536	mstate->s.reply = NULL;
537	mstate->s.region = region;
538	mstate->s.curmod = 0;
539	mstate->s.return_msg = 0;
540	mstate->s.return_rcode = LDNS_RCODE_NOERROR;
541	mstate->s.env = env;
542	mstate->s.mesh_info = mstate;
543	mstate->s.prefetch_leeway = 0;
544	/* init modules */
545	for(i=0; i<env->mesh->mods.num; i++) {
546		mstate->s.minfo[i] = NULL;
547		mstate->s.ext_state[i] = module_state_initial;
548	}
549	return mstate;
550}
551
552void
553mesh_state_cleanup(struct mesh_state* mstate)
554{
555	struct mesh_area* mesh;
556	int i;
557	if(!mstate)
558		return;
559	mesh = mstate->s.env->mesh;
560	/* drop unsent replies */
561	if(!mstate->replies_sent) {
562		struct mesh_reply* rep;
563		struct mesh_cb* cb;
564		for(rep=mstate->reply_list; rep; rep=rep->next) {
565			comm_point_drop_reply(&rep->query_reply);
566			mesh->num_reply_addrs--;
567		}
568		for(cb=mstate->cb_list; cb; cb=cb->next) {
569			fptr_ok(fptr_whitelist_mesh_cb(cb->cb));
570			(*cb->cb)(cb->cb_arg, LDNS_RCODE_SERVFAIL, NULL,
571				sec_status_unchecked, NULL);
572			mesh->num_reply_addrs--;
573		}
574	}
575
576	/* de-init modules */
577	for(i=0; i<mesh->mods.num; i++) {
578		fptr_ok(fptr_whitelist_mod_clear(mesh->mods.mod[i]->clear));
579		(*mesh->mods.mod[i]->clear)(&mstate->s, i);
580		mstate->s.minfo[i] = NULL;
581		mstate->s.ext_state[i] = module_finished;
582	}
583	alloc_reg_release(mstate->s.env->alloc, mstate->s.region);
584}
585
586void
587mesh_state_delete(struct module_qstate* qstate)
588{
589	struct mesh_area* mesh;
590	struct mesh_state_ref* super, ref;
591	struct mesh_state* mstate;
592	if(!qstate)
593		return;
594	mstate = qstate->mesh_info;
595	mesh = mstate->s.env->mesh;
596	mesh_detach_subs(&mstate->s);
597	if(mstate->list_select == mesh_forever_list) {
598		mesh->num_forever_states --;
599		mesh_list_remove(mstate, &mesh->forever_first,
600			&mesh->forever_last);
601	} else if(mstate->list_select == mesh_jostle_list) {
602		mesh_list_remove(mstate, &mesh->jostle_first,
603			&mesh->jostle_last);
604	}
605	if(!mstate->reply_list && !mstate->cb_list
606		&& mstate->super_set.count == 0) {
607		log_assert(mesh->num_detached_states > 0);
608		mesh->num_detached_states--;
609	}
610	if(mstate->reply_list || mstate->cb_list) {
611		log_assert(mesh->num_reply_states > 0);
612		mesh->num_reply_states--;
613	}
614	ref.node.key = &ref;
615	ref.s = mstate;
616	RBTREE_FOR(super, struct mesh_state_ref*, &mstate->super_set) {
617		(void)rbtree_delete(&super->s->sub_set, &ref);
618	}
619	(void)rbtree_delete(&mesh->run, mstate);
620	(void)rbtree_delete(&mesh->all, mstate);
621	mesh_state_cleanup(mstate);
622}
623
624/** helper recursive rbtree find routine */
625static int
626find_in_subsub(struct mesh_state* m, struct mesh_state* tofind, size_t *c)
627{
628	struct mesh_state_ref* r;
629	if((*c)++ > MESH_MAX_SUBSUB)
630		return 1;
631	RBTREE_FOR(r, struct mesh_state_ref*, &m->sub_set) {
632		if(r->s == tofind || find_in_subsub(r->s, tofind, c))
633			return 1;
634	}
635	return 0;
636}
637
638/** find cycle for already looked up mesh_state */
639static int
640mesh_detect_cycle_found(struct module_qstate* qstate, struct mesh_state* dep_m)
641{
642	struct mesh_state* cyc_m = qstate->mesh_info;
643	size_t counter = 0;
644	if(!dep_m)
645		return 0;
646	if(dep_m == cyc_m || find_in_subsub(dep_m, cyc_m, &counter)) {
647		if(counter > MESH_MAX_SUBSUB)
648			return 2;
649		return 1;
650	}
651	return 0;
652}
653
654void mesh_detach_subs(struct module_qstate* qstate)
655{
656	struct mesh_area* mesh = qstate->env->mesh;
657	struct mesh_state_ref* ref, lookup;
658#ifdef UNBOUND_DEBUG
659	struct rbnode_t* n;
660#endif
661	lookup.node.key = &lookup;
662	lookup.s = qstate->mesh_info;
663	RBTREE_FOR(ref, struct mesh_state_ref*, &qstate->mesh_info->sub_set) {
664#ifdef UNBOUND_DEBUG
665		n =
666#else
667		(void)
668#endif
669		rbtree_delete(&ref->s->super_set, &lookup);
670		log_assert(n != NULL); /* must have been present */
671		if(!ref->s->reply_list && !ref->s->cb_list
672			&& ref->s->super_set.count == 0) {
673			mesh->num_detached_states++;
674			log_assert(mesh->num_detached_states +
675				mesh->num_reply_states <= mesh->all.count);
676		}
677	}
678	rbtree_init(&qstate->mesh_info->sub_set, &mesh_state_ref_compare);
679}
680
681int mesh_attach_sub(struct module_qstate* qstate, struct query_info* qinfo,
682        uint16_t qflags, int prime, struct module_qstate** newq)
683{
684	/* find it, if not, create it */
685	struct mesh_area* mesh = qstate->env->mesh;
686	struct mesh_state* sub = mesh_area_find(mesh, qinfo, qflags, prime);
687	int was_detached;
688	if(mesh_detect_cycle_found(qstate, sub)) {
689		verbose(VERB_ALGO, "attach failed, cycle detected");
690		return 0;
691	}
692	if(!sub) {
693#ifdef UNBOUND_DEBUG
694		struct rbnode_t* n;
695#endif
696		/* create a new one */
697		sub = mesh_state_create(qstate->env, qinfo, qflags, prime);
698		if(!sub) {
699			log_err("mesh_attach_sub: out of memory");
700			return 0;
701		}
702#ifdef UNBOUND_DEBUG
703		n =
704#else
705		(void)
706#endif
707		rbtree_insert(&mesh->all, &sub->node);
708		log_assert(n != NULL);
709		/* set detached (it is now) */
710		mesh->num_detached_states++;
711		/* set new query state to run */
712#ifdef UNBOUND_DEBUG
713		n =
714#else
715		(void)
716#endif
717		rbtree_insert(&mesh->run, &sub->run_node);
718		log_assert(n != NULL);
719		*newq = &sub->s;
720	} else
721		*newq = NULL;
722	was_detached = (sub->super_set.count == 0);
723	if(!mesh_state_attachment(qstate->mesh_info, sub))
724		return 0;
725	/* if it was a duplicate  attachment, the count was not zero before */
726	if(!sub->reply_list && !sub->cb_list && was_detached &&
727		sub->super_set.count == 1) {
728		/* it used to be detached, before this one got added */
729		log_assert(mesh->num_detached_states > 0);
730		mesh->num_detached_states--;
731	}
732	/* *newq will be run when inited after the current module stops */
733	return 1;
734}
735
736int mesh_state_attachment(struct mesh_state* super, struct mesh_state* sub)
737{
738#ifdef UNBOUND_DEBUG
739	struct rbnode_t* n;
740#endif
741	struct mesh_state_ref* subref; /* points to sub, inserted in super */
742	struct mesh_state_ref* superref; /* points to super, inserted in sub */
743	if( !(subref = regional_alloc(super->s.region,
744		sizeof(struct mesh_state_ref))) ||
745		!(superref = regional_alloc(sub->s.region,
746		sizeof(struct mesh_state_ref))) ) {
747		log_err("mesh_state_attachment: out of memory");
748		return 0;
749	}
750	superref->node.key = superref;
751	superref->s = super;
752	subref->node.key = subref;
753	subref->s = sub;
754	if(!rbtree_insert(&sub->super_set, &superref->node)) {
755		/* this should not happen, iterator and validator do not
756		 * attach subqueries that are identical. */
757		/* already attached, we are done, nothing todo.
758		 * since superref and subref already allocated in region,
759		 * we cannot free them */
760		return 1;
761	}
762#ifdef UNBOUND_DEBUG
763	n =
764#else
765	(void)
766#endif
767	rbtree_insert(&super->sub_set, &subref->node);
768	log_assert(n != NULL); /* we checked above if statement, the reverse
769	  administration should not fail now, unless they are out of sync */
770	return 1;
771}
772
773/**
774 * callback results to mesh cb entry
775 * @param m: mesh state to send it for.
776 * @param rcode: if not 0, error code.
777 * @param rep: reply to send (or NULL if rcode is set).
778 * @param r: callback entry
779 */
780static void
781mesh_do_callback(struct mesh_state* m, int rcode, struct reply_info* rep,
782	struct mesh_cb* r)
783{
784	int secure;
785	char* reason = NULL;
786	/* bogus messages are not made into servfail, sec_status passed
787	 * to the callback function */
788	if(rep && rep->security == sec_status_secure)
789		secure = 1;
790	else	secure = 0;
791	if(!rep && rcode == LDNS_RCODE_NOERROR)
792		rcode = LDNS_RCODE_SERVFAIL;
793	if(!rcode && rep->security == sec_status_bogus) {
794		if(!(reason = errinf_to_str(&m->s)))
795			rcode = LDNS_RCODE_SERVFAIL;
796	}
797	/* send the reply */
798	if(rcode) {
799		fptr_ok(fptr_whitelist_mesh_cb(r->cb));
800		(*r->cb)(r->cb_arg, rcode, r->buf, sec_status_unchecked, NULL);
801	} else {
802		size_t udp_size = r->edns.udp_size;
803		sldns_buffer_clear(r->buf);
804		r->edns.edns_version = EDNS_ADVERTISED_VERSION;
805		r->edns.udp_size = EDNS_ADVERTISED_SIZE;
806		r->edns.ext_rcode = 0;
807		r->edns.bits &= EDNS_DO;
808		if(!reply_info_answer_encode(&m->s.qinfo, rep, r->qid,
809			r->qflags, r->buf, 0, 1,
810			m->s.env->scratch, udp_size, &r->edns,
811			(int)(r->edns.bits & EDNS_DO), secure))
812		{
813			fptr_ok(fptr_whitelist_mesh_cb(r->cb));
814			(*r->cb)(r->cb_arg, LDNS_RCODE_SERVFAIL, r->buf,
815				sec_status_unchecked, NULL);
816		} else {
817			fptr_ok(fptr_whitelist_mesh_cb(r->cb));
818			(*r->cb)(r->cb_arg, LDNS_RCODE_NOERROR, r->buf,
819				rep->security, reason);
820		}
821	}
822	free(reason);
823	m->s.env->mesh->num_reply_addrs--;
824}
825
826/**
827 * Send reply to mesh reply entry
828 * @param m: mesh state to send it for.
829 * @param rcode: if not 0, error code.
830 * @param rep: reply to send (or NULL if rcode is set).
831 * @param r: reply entry
832 * @param prev: previous reply, already has its answer encoded in buffer.
833 */
834static void
835mesh_send_reply(struct mesh_state* m, int rcode, struct reply_info* rep,
836	struct mesh_reply* r, struct mesh_reply* prev)
837{
838	struct timeval end_time;
839	struct timeval duration;
840	int secure;
841	/* examine security status */
842	if(m->s.env->need_to_validate && (!(r->qflags&BIT_CD) ||
843		m->s.env->cfg->ignore_cd) && rep &&
844		rep->security <= sec_status_bogus) {
845		rcode = LDNS_RCODE_SERVFAIL;
846		if(m->s.env->cfg->stat_extended)
847			m->s.env->mesh->ans_bogus++;
848	}
849	if(rep && rep->security == sec_status_secure)
850		secure = 1;
851	else	secure = 0;
852	if(!rep && rcode == LDNS_RCODE_NOERROR)
853		rcode = LDNS_RCODE_SERVFAIL;
854	/* send the reply */
855	if(prev && prev->qflags == r->qflags &&
856		prev->edns.edns_present == r->edns.edns_present &&
857		prev->edns.bits == r->edns.bits &&
858		prev->edns.udp_size == r->edns.udp_size) {
859		/* if the previous reply is identical to this one, fix ID */
860		if(prev->query_reply.c->buffer != r->query_reply.c->buffer)
861			sldns_buffer_copy(r->query_reply.c->buffer,
862				prev->query_reply.c->buffer);
863		sldns_buffer_write_at(r->query_reply.c->buffer, 0,
864			&r->qid, sizeof(uint16_t));
865		sldns_buffer_write_at(r->query_reply.c->buffer, 12,
866			r->qname, m->s.qinfo.qname_len);
867		comm_point_send_reply(&r->query_reply);
868	} else if(rcode) {
869		m->s.qinfo.qname = r->qname;
870		error_encode(r->query_reply.c->buffer, rcode, &m->s.qinfo,
871			r->qid, r->qflags, &r->edns);
872		comm_point_send_reply(&r->query_reply);
873	} else {
874		size_t udp_size = r->edns.udp_size;
875		r->edns.edns_version = EDNS_ADVERTISED_VERSION;
876		r->edns.udp_size = EDNS_ADVERTISED_SIZE;
877		r->edns.ext_rcode = 0;
878		r->edns.bits &= EDNS_DO;
879		m->s.qinfo.qname = r->qname;
880		if(!reply_info_answer_encode(&m->s.qinfo, rep, r->qid,
881			r->qflags, r->query_reply.c->buffer, 0, 1,
882			m->s.env->scratch, udp_size, &r->edns,
883			(int)(r->edns.bits & EDNS_DO), secure))
884		{
885			error_encode(r->query_reply.c->buffer,
886				LDNS_RCODE_SERVFAIL, &m->s.qinfo, r->qid,
887				r->qflags, &r->edns);
888		}
889		comm_point_send_reply(&r->query_reply);
890	}
891	/* account */
892	m->s.env->mesh->num_reply_addrs--;
893	end_time = *m->s.env->now_tv;
894	timeval_subtract(&duration, &end_time, &r->start_time);
895	verbose(VERB_ALGO, "query took " ARG_LL "d.%6.6d sec",
896		(long long)duration.tv_sec, (int)duration.tv_usec);
897	m->s.env->mesh->replies_sent++;
898	timeval_add(&m->s.env->mesh->replies_sum_wait, &duration);
899	timehist_insert(m->s.env->mesh->histogram, &duration);
900	if(m->s.env->cfg->stat_extended) {
901		uint16_t rc = FLAGS_GET_RCODE(sldns_buffer_read_u16_at(r->
902			query_reply.c->buffer, 2));
903		if(secure) m->s.env->mesh->ans_secure++;
904		m->s.env->mesh->ans_rcode[ rc ] ++;
905		if(rc == 0 && LDNS_ANCOUNT(sldns_buffer_begin(r->
906			query_reply.c->buffer)) == 0)
907			m->s.env->mesh->ans_nodata++;
908	}
909}
910
911void mesh_query_done(struct mesh_state* mstate)
912{
913	struct mesh_reply* r;
914	struct mesh_reply* prev = NULL;
915	struct mesh_cb* c;
916	struct reply_info* rep = (mstate->s.return_msg?
917		mstate->s.return_msg->rep:NULL);
918	for(r = mstate->reply_list; r; r = r->next) {
919		mesh_send_reply(mstate, mstate->s.return_rcode, rep, r, prev);
920		prev = r;
921	}
922	mstate->replies_sent = 1;
923	for(c = mstate->cb_list; c; c = c->next) {
924		mesh_do_callback(mstate, mstate->s.return_rcode, rep, c);
925	}
926}
927
928void mesh_walk_supers(struct mesh_area* mesh, struct mesh_state* mstate)
929{
930	struct mesh_state_ref* ref;
931	RBTREE_FOR(ref, struct mesh_state_ref*, &mstate->super_set)
932	{
933		/* make super runnable */
934		(void)rbtree_insert(&mesh->run, &ref->s->run_node);
935		/* callback the function to inform super of result */
936		fptr_ok(fptr_whitelist_mod_inform_super(
937			mesh->mods.mod[ref->s->s.curmod]->inform_super));
938		(*mesh->mods.mod[ref->s->s.curmod]->inform_super)(&mstate->s,
939			ref->s->s.curmod, &ref->s->s);
940	}
941}
942
943struct mesh_state* mesh_area_find(struct mesh_area* mesh,
944	struct query_info* qinfo, uint16_t qflags, int prime)
945{
946	struct mesh_state key;
947	struct mesh_state* result;
948
949	key.node.key = &key;
950	key.s.is_priming = prime;
951	key.s.qinfo = *qinfo;
952	key.s.query_flags = qflags;
953
954	result = (struct mesh_state*)rbtree_search(&mesh->all, &key);
955	return result;
956}
957
958int mesh_state_add_cb(struct mesh_state* s, struct edns_data* edns,
959        sldns_buffer* buf, mesh_cb_func_t cb, void* cb_arg,
960	uint16_t qid, uint16_t qflags)
961{
962	struct mesh_cb* r = regional_alloc(s->s.region,
963		sizeof(struct mesh_cb));
964	if(!r)
965		return 0;
966	r->buf = buf;
967	log_assert(fptr_whitelist_mesh_cb(cb)); /* early failure ifmissing*/
968	r->cb = cb;
969	r->cb_arg = cb_arg;
970	r->edns = *edns;
971	r->qid = qid;
972	r->qflags = qflags;
973	r->next = s->cb_list;
974	s->cb_list = r;
975	return 1;
976
977}
978
979int mesh_state_add_reply(struct mesh_state* s, struct edns_data* edns,
980        struct comm_reply* rep, uint16_t qid, uint16_t qflags, uint8_t* qname)
981{
982	struct mesh_reply* r = regional_alloc(s->s.region,
983		sizeof(struct mesh_reply));
984	if(!r)
985		return 0;
986	r->query_reply = *rep;
987	r->edns = *edns;
988	r->qid = qid;
989	r->qflags = qflags;
990	r->start_time = *s->s.env->now_tv;
991	r->next = s->reply_list;
992	r->qname = regional_alloc_init(s->s.region, qname,
993		s->s.qinfo.qname_len);
994	if(!r->qname)
995		return 0;
996	s->reply_list = r;
997	return 1;
998
999}
1000
1001/**
1002 * Continue processing the mesh state at another module.
1003 * Handles module to modules tranfer of control.
1004 * Handles module finished.
1005 * @param mesh: the mesh area.
1006 * @param mstate: currently active mesh state.
1007 * 	Deleted if finished, calls _done and _supers to
1008 * 	send replies to clients and inform other mesh states.
1009 * 	This in turn may create additional runnable mesh states.
1010 * @param s: state at which the current module exited.
1011 * @param ev: the event sent to the module.
1012 * 	returned is the event to send to the next module.
1013 * @return true if continue processing at the new module.
1014 * 	false if not continued processing is needed.
1015 */
1016static int
1017mesh_continue(struct mesh_area* mesh, struct mesh_state* mstate,
1018	enum module_ext_state s, enum module_ev* ev)
1019{
1020	mstate->num_activated++;
1021	if(mstate->num_activated > MESH_MAX_ACTIVATION) {
1022		/* module is looping. Stop it. */
1023		log_err("internal error: looping module stopped");
1024		log_query_info(VERB_QUERY, "pass error for qstate",
1025			&mstate->s.qinfo);
1026		s = module_error;
1027	}
1028	if(s == module_wait_module || s == module_restart_next) {
1029		/* start next module */
1030		mstate->s.curmod++;
1031		if(mesh->mods.num == mstate->s.curmod) {
1032			log_err("Cannot pass to next module; at last module");
1033			log_query_info(VERB_QUERY, "pass error for qstate",
1034				&mstate->s.qinfo);
1035			mstate->s.curmod--;
1036			return mesh_continue(mesh, mstate, module_error, ev);
1037		}
1038		if(s == module_restart_next) {
1039			fptr_ok(fptr_whitelist_mod_clear(
1040				mesh->mods.mod[mstate->s.curmod]->clear));
1041			(*mesh->mods.mod[mstate->s.curmod]->clear)
1042				(&mstate->s, mstate->s.curmod);
1043			mstate->s.minfo[mstate->s.curmod] = NULL;
1044		}
1045		*ev = module_event_pass;
1046		return 1;
1047	}
1048	if(s == module_error && mstate->s.return_rcode == LDNS_RCODE_NOERROR) {
1049		/* error is bad, handle pass back up below */
1050		mstate->s.return_rcode = LDNS_RCODE_SERVFAIL;
1051	}
1052	if(s == module_error || s == module_finished) {
1053		if(mstate->s.curmod == 0) {
1054			mesh_query_done(mstate);
1055			mesh_walk_supers(mesh, mstate);
1056			mesh_state_delete(&mstate->s);
1057			return 0;
1058		}
1059		/* pass along the locus of control */
1060		mstate->s.curmod --;
1061		*ev = module_event_moddone;
1062		return 1;
1063	}
1064	return 0;
1065}
1066
1067void mesh_run(struct mesh_area* mesh, struct mesh_state* mstate,
1068	enum module_ev ev, struct outbound_entry* e)
1069{
1070	enum module_ext_state s;
1071	verbose(VERB_ALGO, "mesh_run: start");
1072	while(mstate) {
1073		/* run the module */
1074		fptr_ok(fptr_whitelist_mod_operate(
1075			mesh->mods.mod[mstate->s.curmod]->operate));
1076		(*mesh->mods.mod[mstate->s.curmod]->operate)
1077			(&mstate->s, ev, mstate->s.curmod, e);
1078
1079		/* examine results */
1080		mstate->s.reply = NULL;
1081		regional_free_all(mstate->s.env->scratch);
1082		s = mstate->s.ext_state[mstate->s.curmod];
1083		verbose(VERB_ALGO, "mesh_run: %s module exit state is %s",
1084			mesh->mods.mod[mstate->s.curmod]->name, strextstate(s));
1085		e = NULL;
1086		if(mesh_continue(mesh, mstate, s, &ev))
1087			continue;
1088
1089		/* run more modules */
1090		ev = module_event_pass;
1091		if(mesh->run.count > 0) {
1092			/* pop random element off the runnable tree */
1093			mstate = (struct mesh_state*)mesh->run.root->key;
1094			(void)rbtree_delete(&mesh->run, mstate);
1095		} else mstate = NULL;
1096	}
1097	if(verbosity >= VERB_ALGO) {
1098		mesh_stats(mesh, "mesh_run: end");
1099		mesh_log_list(mesh);
1100	}
1101}
1102
1103void
1104mesh_log_list(struct mesh_area* mesh)
1105{
1106	char buf[30];
1107	struct mesh_state* m;
1108	int num = 0;
1109	RBTREE_FOR(m, struct mesh_state*, &mesh->all) {
1110		snprintf(buf, sizeof(buf), "%d%s%s%s%s%s mod%d %s%s",
1111			num++, (m->s.is_priming)?"p":"",  /* prime */
1112			(m->s.query_flags&BIT_RD)?"RD":"",
1113			(m->s.query_flags&BIT_CD)?"CD":"",
1114			(m->super_set.count==0)?"d":"", /* detached */
1115			(m->sub_set.count!=0)?"c":"",  /* children */
1116			m->s.curmod, (m->reply_list)?"rep":"", /*hasreply*/
1117			(m->cb_list)?"cb":"" /* callbacks */
1118			);
1119		log_query_info(VERB_ALGO, buf, &m->s.qinfo);
1120	}
1121}
1122
1123void
1124mesh_stats(struct mesh_area* mesh, const char* str)
1125{
1126	verbose(VERB_DETAIL, "%s %u recursion states (%u with reply, "
1127		"%u detached), %u waiting replies, %u recursion replies "
1128		"sent, %d replies dropped, %d states jostled out",
1129		str, (unsigned)mesh->all.count,
1130		(unsigned)mesh->num_reply_states,
1131		(unsigned)mesh->num_detached_states,
1132		(unsigned)mesh->num_reply_addrs,
1133		(unsigned)mesh->replies_sent,
1134		(unsigned)mesh->stats_dropped,
1135		(unsigned)mesh->stats_jostled);
1136	if(mesh->replies_sent > 0) {
1137		struct timeval avg;
1138		timeval_divide(&avg, &mesh->replies_sum_wait,
1139			mesh->replies_sent);
1140		log_info("average recursion processing time "
1141			ARG_LL "d.%6.6d sec",
1142			(long long)avg.tv_sec, (int)avg.tv_usec);
1143		log_info("histogram of recursion processing times");
1144		timehist_log(mesh->histogram, "recursions");
1145	}
1146}
1147
1148void
1149mesh_stats_clear(struct mesh_area* mesh)
1150{
1151	if(!mesh)
1152		return;
1153	mesh->replies_sent = 0;
1154	mesh->replies_sum_wait.tv_sec = 0;
1155	mesh->replies_sum_wait.tv_usec = 0;
1156	mesh->stats_jostled = 0;
1157	mesh->stats_dropped = 0;
1158	timehist_clear(mesh->histogram);
1159	mesh->ans_secure = 0;
1160	mesh->ans_bogus = 0;
1161	memset(&mesh->ans_rcode[0], 0, sizeof(size_t)*16);
1162	mesh->ans_nodata = 0;
1163}
1164
1165size_t
1166mesh_get_mem(struct mesh_area* mesh)
1167{
1168	struct mesh_state* m;
1169	size_t s = sizeof(*mesh) + sizeof(struct timehist) +
1170		sizeof(struct th_buck)*mesh->histogram->num +
1171		sizeof(sldns_buffer) + sldns_buffer_capacity(mesh->qbuf_bak);
1172	RBTREE_FOR(m, struct mesh_state*, &mesh->all) {
1173		/* all, including m itself allocated in qstate region */
1174		s += regional_get_mem(m->s.region);
1175	}
1176	return s;
1177}
1178
1179int
1180mesh_detect_cycle(struct module_qstate* qstate, struct query_info* qinfo,
1181	uint16_t flags, int prime)
1182{
1183	struct mesh_area* mesh = qstate->env->mesh;
1184	struct mesh_state* dep_m = mesh_area_find(mesh, qinfo, flags, prime);
1185	return mesh_detect_cycle_found(qstate, dep_m);
1186}
1187
1188void mesh_list_insert(struct mesh_state* m, struct mesh_state** fp,
1189        struct mesh_state** lp)
1190{
1191	/* insert as last element */
1192	m->prev = *lp;
1193	m->next = NULL;
1194	if(*lp)
1195		(*lp)->next = m;
1196	else	*fp = m;
1197	*lp = m;
1198}
1199
1200void mesh_list_remove(struct mesh_state* m, struct mesh_state** fp,
1201        struct mesh_state** lp)
1202{
1203	if(m->next)
1204		m->next->prev = m->prev;
1205	else	*lp = m->prev;
1206	if(m->prev)
1207		m->prev->next = m->next;
1208	else	*fp = m->next;
1209}
1210