1/*
2 * util/netevent.c - event notification
3 *
4 * Copyright (c) 2007, NLnet Labs. All rights reserved.
5 *
6 * This software is open source.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
14 *
15 * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 *
19 * Neither the name of the NLNET LABS nor the names of its contributors may
20 * be used to endorse or promote products derived from this software without
21 * specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
27 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 */
35
36/**
37 * \file
38 *
39 * This file contains event notification functions.
40 */
41#include "config.h"
42#include <ldns/wire2host.h>
43#include "util/netevent.h"
44#include "util/log.h"
45#include "util/net_help.h"
46#include "util/fptr_wlist.h"
47#ifdef HAVE_OPENSSL_SSL_H
48#include <openssl/ssl.h>
49#endif
50#ifdef HAVE_OPENSSL_ERR_H
51#include <openssl/err.h>
52#endif
53
54/* -------- Start of local definitions -------- */
55/** if CMSG_ALIGN is not defined on this platform, a workaround */
56#ifndef CMSG_ALIGN
57#  ifdef _CMSG_DATA_ALIGN
58#    define CMSG_ALIGN _CMSG_DATA_ALIGN
59#  else
60#    define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1))
61#  endif
62#endif
63
64/** if CMSG_LEN is not defined on this platform, a workaround */
65#ifndef CMSG_LEN
66#  define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len))
67#endif
68
69/** if CMSG_SPACE is not defined on this platform, a workaround */
70#ifndef CMSG_SPACE
71#  ifdef _CMSG_HDR_ALIGN
72#    define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr)))
73#  else
74#    define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr)))
75#  endif
76#endif
77
78/** The TCP reading or writing query timeout in seconds */
79#define TCP_QUERY_TIMEOUT 120
80
81#ifndef NONBLOCKING_IS_BROKEN
82/** number of UDP reads to perform per read indication from select */
83#define NUM_UDP_PER_SELECT 100
84#else
85#define NUM_UDP_PER_SELECT 1
86#endif
87
88/* We define libevent structures here to hide the libevent stuff. */
89
90#ifdef USE_MINI_EVENT
91#  ifdef USE_WINSOCK
92#    include "util/winsock_event.h"
93#  else
94#    include "util/mini_event.h"
95#  endif /* USE_WINSOCK */
96#else /* USE_MINI_EVENT */
97   /* we use libevent */
98#  ifdef HAVE_EVENT_H
99#    include <event.h>
100#  else
101#    include "event2/event.h"
102#    include "event2/event_struct.h"
103#    include "event2/event_compat.h"
104#  endif
105#endif /* USE_MINI_EVENT */
106
107/**
108 * The internal event structure for keeping libevent info for the event.
109 * Possibly other structures (list, tree) this is part of.
110 */
111struct internal_event {
112	/** the comm base */
113	struct comm_base* base;
114	/** libevent event type, alloced here */
115	struct event ev;
116};
117
118/**
119 * Internal base structure, so that every thread has its own events.
120 */
121struct internal_base {
122	/** libevent event_base type. */
123	struct event_base* base;
124	/** seconds time pointer points here */
125	uint32_t secs;
126	/** timeval with current time */
127	struct timeval now;
128	/** the event used for slow_accept timeouts */
129	struct event slow_accept;
130	/** true if slow_accept is enabled */
131	int slow_accept_enabled;
132};
133
134/**
135 * Internal timer structure, to store timer event in.
136 */
137struct internal_timer {
138	/** the comm base */
139	struct comm_base* base;
140	/** libevent event type, alloced here */
141	struct event ev;
142	/** is timer enabled */
143	uint8_t enabled;
144};
145
146/**
147 * Internal signal structure, to store signal event in.
148 */
149struct internal_signal {
150	/** libevent event type, alloced here */
151	struct event ev;
152	/** next in signal list */
153	struct internal_signal* next;
154};
155
156/** create a tcp handler with a parent */
157static struct comm_point* comm_point_create_tcp_handler(
158	struct comm_base *base, struct comm_point* parent, size_t bufsize,
159        comm_point_callback_t* callback, void* callback_arg);
160
161/* -------- End of local definitions -------- */
162
163#ifdef USE_MINI_EVENT
164/** minievent updates the time when it blocks. */
165#define comm_base_now(x) /* nothing to do */
166#else /* !USE_MINI_EVENT */
167/** fillup the time values in the event base */
168static void
169comm_base_now(struct comm_base* b)
170{
171	if(gettimeofday(&b->eb->now, NULL) < 0) {
172		log_err("gettimeofday: %s", strerror(errno));
173	}
174	b->eb->secs = (uint32_t)b->eb->now.tv_sec;
175}
176#endif /* USE_MINI_EVENT */
177
178struct comm_base*
179comm_base_create(int sigs)
180{
181	struct comm_base* b = (struct comm_base*)calloc(1,
182		sizeof(struct comm_base));
183	if(!b)
184		return NULL;
185	b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
186	if(!b->eb) {
187		free(b);
188		return NULL;
189	}
190#ifdef USE_MINI_EVENT
191	(void)sigs;
192	/* use mini event time-sharing feature */
193	b->eb->base = event_init(&b->eb->secs, &b->eb->now);
194#else
195#  if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP)
196	/* libev */
197	if(sigs)
198		b->eb->base=(struct event_base *)ev_default_loop(EVFLAG_AUTO);
199	else
200		b->eb->base=(struct event_base *)ev_loop_new(EVFLAG_AUTO);
201#  else
202	(void)sigs;
203#    ifdef HAVE_EVENT_BASE_NEW
204	b->eb->base = event_base_new();
205#    else
206	b->eb->base = event_init();
207#    endif
208#  endif
209#endif
210	if(!b->eb->base) {
211		free(b->eb);
212		free(b);
213		return NULL;
214	}
215	comm_base_now(b);
216	/* avoid event_get_method call which causes crashes even when
217	 * not printing, because its result is passed */
218	verbose(VERB_ALGO,
219#if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP)
220		"libev"
221#elif defined(USE_MINI_EVENT)
222		"event "
223#else
224		"libevent "
225#endif
226		"%s uses %s method.",
227		event_get_version(),
228#ifdef HAVE_EVENT_BASE_GET_METHOD
229		event_base_get_method(b->eb->base)
230#else
231		"not_obtainable"
232#endif
233	);
234	return b;
235}
236
237void
238comm_base_delete(struct comm_base* b)
239{
240	if(!b)
241		return;
242	if(b->eb->slow_accept_enabled) {
243		if(event_del(&b->eb->slow_accept) != 0) {
244			log_err("could not event_del slow_accept");
245		}
246	}
247#ifdef USE_MINI_EVENT
248	event_base_free(b->eb->base);
249#elif defined(HAVE_EVENT_BASE_FREE) && defined(HAVE_EVENT_BASE_ONCE)
250	/* only libevent 1.2+ has it, but in 1.2 it is broken -
251	   assertion fails on signal handling ev that is not deleted
252 	   in libevent 1.3c (event_base_once appears) this is fixed. */
253	event_base_free(b->eb->base);
254#endif /* HAVE_EVENT_BASE_FREE and HAVE_EVENT_BASE_ONCE */
255	b->eb->base = NULL;
256	free(b->eb);
257	free(b);
258}
259
260void
261comm_base_timept(struct comm_base* b, uint32_t** tt, struct timeval** tv)
262{
263	*tt = &b->eb->secs;
264	*tv = &b->eb->now;
265}
266
267void
268comm_base_dispatch(struct comm_base* b)
269{
270	int retval;
271	retval = event_base_dispatch(b->eb->base);
272	if(retval != 0) {
273		fatal_exit("event_dispatch returned error %d, "
274			"errno is %s", retval, strerror(errno));
275	}
276}
277
278void comm_base_exit(struct comm_base* b)
279{
280	if(event_base_loopexit(b->eb->base, NULL) != 0) {
281		log_err("Could not loopexit");
282	}
283}
284
285void comm_base_set_slow_accept_handlers(struct comm_base* b,
286	void (*stop_acc)(void*), void (*start_acc)(void*), void* arg)
287{
288	b->stop_accept = stop_acc;
289	b->start_accept = start_acc;
290	b->cb_arg = arg;
291}
292
293struct event_base* comm_base_internal(struct comm_base* b)
294{
295	return b->eb->base;
296}
297
298/** see if errno for udp has to be logged or not uses globals */
299static int
300udp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
301{
302	/* do not log transient errors (unless high verbosity) */
303#if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN)
304	switch(errno) {
305#  ifdef ENETUNREACH
306		case ENETUNREACH:
307#  endif
308#  ifdef EHOSTDOWN
309		case EHOSTDOWN:
310#  endif
311#  ifdef EHOSTUNREACH
312		case EHOSTUNREACH:
313#  endif
314#  ifdef ENETDOWN
315		case ENETDOWN:
316#  endif
317			if(verbosity < VERB_ALGO)
318				return 0;
319		default:
320			break;
321	}
322#endif
323	/* squelch errors where people deploy AAAA ::ffff:bla for
324	 * authority servers, which we try for intranets. */
325	if(errno == EINVAL && addr_is_ip4mapped(
326		(struct sockaddr_storage*)addr, addrlen) &&
327		verbosity < VERB_DETAIL)
328		return 0;
329	/* SO_BROADCAST sockopt can give access to 255.255.255.255,
330	 * but a dns cache does not need it. */
331	if(errno == EACCES && addr_is_broadcast(
332		(struct sockaddr_storage*)addr, addrlen) &&
333		verbosity < VERB_DETAIL)
334		return 0;
335	return 1;
336}
337
338int tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
339{
340	return udp_send_errno_needs_log(addr, addrlen);
341}
342
343/* send a UDP reply */
344int
345comm_point_send_udp_msg(struct comm_point *c, ldns_buffer* packet,
346	struct sockaddr* addr, socklen_t addrlen)
347{
348	ssize_t sent;
349	log_assert(c->fd != -1);
350#ifdef UNBOUND_DEBUG
351	if(ldns_buffer_remaining(packet) == 0)
352		log_err("error: send empty UDP packet");
353#endif
354	log_assert(addr && addrlen > 0);
355	sent = sendto(c->fd, (void*)ldns_buffer_begin(packet),
356		ldns_buffer_remaining(packet), 0,
357		addr, addrlen);
358	if(sent == -1) {
359		if(!udp_send_errno_needs_log(addr, addrlen))
360			return 0;
361#ifndef USE_WINSOCK
362		verbose(VERB_OPS, "sendto failed: %s", strerror(errno));
363#else
364		verbose(VERB_OPS, "sendto failed: %s",
365			wsa_strerror(WSAGetLastError()));
366#endif
367		log_addr(VERB_OPS, "remote address is",
368			(struct sockaddr_storage*)addr, addrlen);
369		return 0;
370	} else if((size_t)sent != ldns_buffer_remaining(packet)) {
371		log_err("sent %d in place of %d bytes",
372			(int)sent, (int)ldns_buffer_remaining(packet));
373		return 0;
374	}
375	return 1;
376}
377
378#if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG))
379/** print debug ancillary info */
380static void p_ancil(const char* str, struct comm_reply* r)
381{
382	if(r->srctype != 4 && r->srctype != 6) {
383		log_info("%s: unknown srctype %d", str, r->srctype);
384		return;
385	}
386	if(r->srctype == 6) {
387		char buf[1024];
388		if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr,
389			buf, (socklen_t)sizeof(buf)) == 0) {
390			strncpy(buf, "(inet_ntop error)", sizeof(buf));
391		}
392		buf[sizeof(buf)-1]=0;
393		log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex);
394	} else if(r->srctype == 4) {
395#ifdef IP_PKTINFO
396		char buf1[1024], buf2[1024];
397		if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr,
398			buf1, (socklen_t)sizeof(buf1)) == 0) {
399			strncpy(buf1, "(inet_ntop error)", sizeof(buf1));
400		}
401		buf1[sizeof(buf1)-1]=0;
402#ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST
403		if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst,
404			buf2, (socklen_t)sizeof(buf2)) == 0) {
405			strncpy(buf2, "(inet_ntop error)", sizeof(buf2));
406		}
407		buf2[sizeof(buf2)-1]=0;
408#else
409		buf2[0]=0;
410#endif
411		log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex,
412			buf1, buf2);
413#elif defined(IP_RECVDSTADDR)
414		char buf1[1024];
415		if(inet_ntop(AF_INET, &r->pktinfo.v4addr,
416			buf1, (socklen_t)sizeof(buf1)) == 0) {
417			strncpy(buf1, "(inet_ntop error)", sizeof(buf1));
418		}
419		buf1[sizeof(buf1)-1]=0;
420		log_info("%s: %s", str, buf1);
421#endif /* IP_PKTINFO or PI_RECVDSTDADDR */
422	}
423}
424#endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */
425
426/** send a UDP reply over specified interface*/
427static int
428comm_point_send_udp_msg_if(struct comm_point *c, ldns_buffer* packet,
429	struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r)
430{
431#if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG)
432	ssize_t sent;
433	struct msghdr msg;
434	struct iovec iov[1];
435	char control[256];
436#ifndef S_SPLINT_S
437	struct cmsghdr *cmsg;
438#endif /* S_SPLINT_S */
439
440	log_assert(c->fd != -1);
441#ifdef UNBOUND_DEBUG
442	if(ldns_buffer_remaining(packet) == 0)
443		log_err("error: send empty UDP packet");
444#endif
445	log_assert(addr && addrlen > 0);
446
447	msg.msg_name = addr;
448	msg.msg_namelen = addrlen;
449	iov[0].iov_base = ldns_buffer_begin(packet);
450	iov[0].iov_len = ldns_buffer_remaining(packet);
451	msg.msg_iov = iov;
452	msg.msg_iovlen = 1;
453	msg.msg_control = control;
454#ifndef S_SPLINT_S
455	msg.msg_controllen = sizeof(control);
456#endif /* S_SPLINT_S */
457	msg.msg_flags = 0;
458
459#ifndef S_SPLINT_S
460	cmsg = CMSG_FIRSTHDR(&msg);
461	if(r->srctype == 4) {
462#ifdef IP_PKTINFO
463		msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
464		log_assert(msg.msg_controllen <= sizeof(control));
465		cmsg->cmsg_level = IPPROTO_IP;
466		cmsg->cmsg_type = IP_PKTINFO;
467		memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info,
468			sizeof(struct in_pktinfo));
469		cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
470#elif defined(IP_SENDSRCADDR)
471		msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
472		log_assert(msg.msg_controllen <= sizeof(control));
473		cmsg->cmsg_level = IPPROTO_IP;
474		cmsg->cmsg_type = IP_SENDSRCADDR;
475		memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr,
476			sizeof(struct in_addr));
477		cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
478#else
479		verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR");
480		msg.msg_control = NULL;
481#endif /* IP_PKTINFO or IP_SENDSRCADDR */
482	} else if(r->srctype == 6) {
483		msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
484		log_assert(msg.msg_controllen <= sizeof(control));
485		cmsg->cmsg_level = IPPROTO_IPV6;
486		cmsg->cmsg_type = IPV6_PKTINFO;
487		memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info,
488			sizeof(struct in6_pktinfo));
489		cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
490	} else {
491		/* try to pass all 0 to use default route */
492		msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
493		log_assert(msg.msg_controllen <= sizeof(control));
494		cmsg->cmsg_level = IPPROTO_IPV6;
495		cmsg->cmsg_type = IPV6_PKTINFO;
496		memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo));
497		cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
498	}
499#endif /* S_SPLINT_S */
500	if(verbosity >= VERB_ALGO)
501		p_ancil("send_udp over interface", r);
502	sent = sendmsg(c->fd, &msg, 0);
503	if(sent == -1) {
504		if(!udp_send_errno_needs_log(addr, addrlen))
505			return 0;
506		verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno));
507		log_addr(VERB_OPS, "remote address is",
508			(struct sockaddr_storage*)addr, addrlen);
509		return 0;
510	} else if((size_t)sent != ldns_buffer_remaining(packet)) {
511		log_err("sent %d in place of %d bytes",
512			(int)sent, (int)ldns_buffer_remaining(packet));
513		return 0;
514	}
515	return 1;
516#else
517	(void)c;
518	(void)packet;
519	(void)addr;
520	(void)addrlen;
521	(void)r;
522	log_err("sendmsg: IPV6_PKTINFO not supported");
523	return 0;
524#endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */
525}
526
527void
528comm_point_udp_ancil_callback(int fd, short event, void* arg)
529{
530#if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
531	struct comm_reply rep;
532	struct msghdr msg;
533	struct iovec iov[1];
534	ssize_t rcv;
535	char ancil[256];
536	int i;
537#ifndef S_SPLINT_S
538	struct cmsghdr* cmsg;
539#endif /* S_SPLINT_S */
540
541	rep.c = (struct comm_point*)arg;
542	log_assert(rep.c->type == comm_udp);
543
544	if(!(event&EV_READ))
545		return;
546	log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
547	comm_base_now(rep.c->ev->base);
548	for(i=0; i<NUM_UDP_PER_SELECT; i++) {
549		ldns_buffer_clear(rep.c->buffer);
550		rep.addrlen = (socklen_t)sizeof(rep.addr);
551		log_assert(fd != -1);
552		log_assert(ldns_buffer_remaining(rep.c->buffer) > 0);
553		msg.msg_name = &rep.addr;
554		msg.msg_namelen = (socklen_t)sizeof(rep.addr);
555		iov[0].iov_base = ldns_buffer_begin(rep.c->buffer);
556		iov[0].iov_len = ldns_buffer_remaining(rep.c->buffer);
557		msg.msg_iov = iov;
558		msg.msg_iovlen = 1;
559		msg.msg_control = ancil;
560#ifndef S_SPLINT_S
561		msg.msg_controllen = sizeof(ancil);
562#endif /* S_SPLINT_S */
563		msg.msg_flags = 0;
564		rcv = recvmsg(fd, &msg, 0);
565		if(rcv == -1) {
566			if(errno != EAGAIN && errno != EINTR) {
567				log_err("recvmsg failed: %s", strerror(errno));
568			}
569			return;
570		}
571		rep.addrlen = msg.msg_namelen;
572		ldns_buffer_skip(rep.c->buffer, rcv);
573		ldns_buffer_flip(rep.c->buffer);
574		rep.srctype = 0;
575#ifndef S_SPLINT_S
576		for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
577			cmsg = CMSG_NXTHDR(&msg, cmsg)) {
578			if( cmsg->cmsg_level == IPPROTO_IPV6 &&
579				cmsg->cmsg_type == IPV6_PKTINFO) {
580				rep.srctype = 6;
581				memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg),
582					sizeof(struct in6_pktinfo));
583				break;
584#ifdef IP_PKTINFO
585			} else if( cmsg->cmsg_level == IPPROTO_IP &&
586				cmsg->cmsg_type == IP_PKTINFO) {
587				rep.srctype = 4;
588				memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg),
589					sizeof(struct in_pktinfo));
590				break;
591#elif defined(IP_RECVDSTADDR)
592			} else if( cmsg->cmsg_level == IPPROTO_IP &&
593				cmsg->cmsg_type == IP_RECVDSTADDR) {
594				rep.srctype = 4;
595				memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg),
596					sizeof(struct in_addr));
597				break;
598#endif /* IP_PKTINFO or IP_RECVDSTADDR */
599			}
600		}
601		if(verbosity >= VERB_ALGO)
602			p_ancil("receive_udp on interface", &rep);
603#endif /* S_SPLINT_S */
604		fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
605		if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
606			/* send back immediate reply */
607			(void)comm_point_send_udp_msg_if(rep.c, rep.c->buffer,
608				(struct sockaddr*)&rep.addr, rep.addrlen, &rep);
609		}
610		if(rep.c->fd == -1) /* commpoint closed */
611			break;
612	}
613#else
614	(void)fd;
615	(void)event;
616	(void)arg;
617	fatal_exit("recvmsg: No support for IPV6_PKTINFO. "
618		"Please disable interface-automatic");
619#endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */
620}
621
622void
623comm_point_udp_callback(int fd, short event, void* arg)
624{
625	struct comm_reply rep;
626	ssize_t rcv;
627	int i;
628
629	rep.c = (struct comm_point*)arg;
630	log_assert(rep.c->type == comm_udp);
631
632	if(!(event&EV_READ))
633		return;
634	log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
635	comm_base_now(rep.c->ev->base);
636	for(i=0; i<NUM_UDP_PER_SELECT; i++) {
637		ldns_buffer_clear(rep.c->buffer);
638		rep.addrlen = (socklen_t)sizeof(rep.addr);
639		log_assert(fd != -1);
640		log_assert(ldns_buffer_remaining(rep.c->buffer) > 0);
641		rcv = recvfrom(fd, (void*)ldns_buffer_begin(rep.c->buffer),
642			ldns_buffer_remaining(rep.c->buffer), 0,
643			(struct sockaddr*)&rep.addr, &rep.addrlen);
644		if(rcv == -1) {
645#ifndef USE_WINSOCK
646			if(errno != EAGAIN && errno != EINTR)
647				log_err("recvfrom %d failed: %s",
648					fd, strerror(errno));
649#else
650			if(WSAGetLastError() != WSAEINPROGRESS &&
651				WSAGetLastError() != WSAECONNRESET &&
652				WSAGetLastError()!= WSAEWOULDBLOCK)
653				log_err("recvfrom failed: %s",
654					wsa_strerror(WSAGetLastError()));
655#endif
656			return;
657		}
658		ldns_buffer_skip(rep.c->buffer, rcv);
659		ldns_buffer_flip(rep.c->buffer);
660		rep.srctype = 0;
661		fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
662		if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
663			/* send back immediate reply */
664			(void)comm_point_send_udp_msg(rep.c, rep.c->buffer,
665				(struct sockaddr*)&rep.addr, rep.addrlen);
666		}
667		if(rep.c->fd != fd) /* commpoint closed to -1 or reused for
668		another UDP port. Note rep.c cannot be reused with TCP fd. */
669			break;
670	}
671}
672
673/** Use a new tcp handler for new query fd, set to read query */
674static void
675setup_tcp_handler(struct comm_point* c, int fd)
676{
677	log_assert(c->type == comm_tcp);
678	log_assert(c->fd == -1);
679	ldns_buffer_clear(c->buffer);
680	c->tcp_is_reading = 1;
681	c->tcp_byte_count = 0;
682	comm_point_start_listening(c, fd, TCP_QUERY_TIMEOUT);
683}
684
685void comm_base_handle_slow_accept(int ATTR_UNUSED(fd),
686	short ATTR_UNUSED(event), void* arg)
687{
688	struct comm_base* b = (struct comm_base*)arg;
689	/* timeout for the slow accept, re-enable accepts again */
690	if(b->start_accept) {
691		verbose(VERB_ALGO, "wait is over, slow accept disabled");
692		fptr_ok(fptr_whitelist_start_accept(b->start_accept));
693		(*b->start_accept)(b->cb_arg);
694		b->eb->slow_accept_enabled = 0;
695	}
696}
697
698int comm_point_perform_accept(struct comm_point* c,
699	struct sockaddr_storage* addr, socklen_t* addrlen)
700{
701	int new_fd;
702	*addrlen = (socklen_t)sizeof(*addr);
703	new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen);
704	if(new_fd == -1) {
705#ifndef USE_WINSOCK
706		/* EINTR is signal interrupt. others are closed connection. */
707		if(	errno == EINTR || errno == EAGAIN
708#ifdef EWOULDBLOCK
709			|| errno == EWOULDBLOCK
710#endif
711#ifdef ECONNABORTED
712			|| errno == ECONNABORTED
713#endif
714#ifdef EPROTO
715			|| errno == EPROTO
716#endif /* EPROTO */
717			)
718			return -1;
719#if defined(ENFILE) && defined(EMFILE)
720		if(errno == ENFILE || errno == EMFILE) {
721			/* out of file descriptors, likely outside of our
722			 * control. stop accept() calls for some time */
723			if(c->ev->base->stop_accept) {
724				struct comm_base* b = c->ev->base;
725				struct timeval tv;
726				verbose(VERB_ALGO, "out of file descriptors: "
727					"slow accept");
728				b->eb->slow_accept_enabled = 1;
729				fptr_ok(fptr_whitelist_stop_accept(
730					b->stop_accept));
731				(*b->stop_accept)(b->cb_arg);
732				/* set timeout, no mallocs */
733				tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000;
734				tv.tv_usec = NETEVENT_SLOW_ACCEPT_TIME%1000;
735				event_set(&b->eb->slow_accept, -1, EV_TIMEOUT,
736					comm_base_handle_slow_accept, b);
737				if(event_base_set(b->eb->base,
738					&b->eb->slow_accept) != 0) {
739					/* we do not want to log here, because
740					 * that would spam the logfiles.
741					 * error: "event_base_set failed." */
742				}
743				if(event_add(&b->eb->slow_accept, &tv) != 0) {
744					/* we do not want to log here,
745					 * error: "event_add failed." */
746				}
747			}
748			return -1;
749		}
750#endif
751		log_err("accept failed: %s", strerror(errno));
752#else /* USE_WINSOCK */
753		if(WSAGetLastError() == WSAEINPROGRESS ||
754			WSAGetLastError() == WSAECONNRESET)
755			return -1;
756		if(WSAGetLastError() == WSAEWOULDBLOCK) {
757			winsock_tcp_wouldblock(&c->ev->ev, EV_READ);
758			return -1;
759		}
760		log_err("accept failed: %s", wsa_strerror(WSAGetLastError()));
761#endif
762		log_addr(0, "remote address is", addr, *addrlen);
763		return -1;
764	}
765	fd_set_nonblock(new_fd);
766	return new_fd;
767}
768
769#ifdef USE_WINSOCK
770static long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp),
771        int ATTR_UNUSED(argi), long argl, long retvalue)
772{
773	verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper,
774		(oper&BIO_CB_RETURN)?"return":"before",
775		(oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"),
776		WSAGetLastError()==WSAEWOULDBLOCK?"wsawb":"");
777	/* on windows, check if previous operation caused EWOULDBLOCK */
778	if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) ||
779		(oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) {
780		if(WSAGetLastError() == WSAEWOULDBLOCK)
781			winsock_tcp_wouldblock((struct event*)
782				BIO_get_callback_arg(b), EV_READ);
783	}
784	if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) ||
785		(oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) {
786		if(WSAGetLastError() == WSAEWOULDBLOCK)
787			winsock_tcp_wouldblock((struct event*)
788				BIO_get_callback_arg(b), EV_WRITE);
789	}
790	/* return original return value */
791	return retvalue;
792}
793
794/** set win bio callbacks for nonblocking operations */
795void
796comm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl)
797{
798	SSL* ssl = (SSL*)thessl;
799	/* set them both just in case, but usually they are the same BIO */
800	BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb);
801	BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)&c->ev->ev);
802	BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb);
803	BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)&c->ev->ev);
804}
805#endif
806
807void
808comm_point_tcp_accept_callback(int fd, short event, void* arg)
809{
810	struct comm_point* c = (struct comm_point*)arg, *c_hdl;
811	int new_fd;
812	log_assert(c->type == comm_tcp_accept);
813	if(!(event & EV_READ)) {
814		log_info("ignoring tcp accept event %d", (int)event);
815		return;
816	}
817	comm_base_now(c->ev->base);
818	/* find free tcp handler. */
819	if(!c->tcp_free) {
820		log_warn("accepted too many tcp, connections full");
821		return;
822	}
823	/* accept incoming connection. */
824	c_hdl = c->tcp_free;
825	log_assert(fd != -1);
826	new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.addr,
827		&c_hdl->repinfo.addrlen);
828	if(new_fd == -1)
829		return;
830	if(c->ssl) {
831		c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd);
832		if(!c_hdl->ssl) {
833			c_hdl->fd = new_fd;
834			comm_point_close(c_hdl);
835			return;
836		}
837		c_hdl->ssl_shake_state = comm_ssl_shake_read;
838#ifdef USE_WINSOCK
839		comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl);
840#endif
841	}
842
843	/* grab the tcp handler buffers */
844	c->tcp_free = c_hdl->tcp_free;
845	if(!c->tcp_free) {
846		/* stop accepting incoming queries for now. */
847		comm_point_stop_listening(c);
848	}
849	/* addr is dropped. Not needed for tcp reply. */
850	setup_tcp_handler(c_hdl, new_fd);
851}
852
853/** Make tcp handler free for next assignment */
854static void
855reclaim_tcp_handler(struct comm_point* c)
856{
857	log_assert(c->type == comm_tcp);
858	if(c->ssl) {
859#ifdef HAVE_SSL
860		SSL_shutdown(c->ssl);
861		SSL_free(c->ssl);
862		c->ssl = NULL;
863#endif
864	}
865	comm_point_close(c);
866	if(c->tcp_parent) {
867		c->tcp_free = c->tcp_parent->tcp_free;
868		c->tcp_parent->tcp_free = c;
869		if(!c->tcp_free) {
870			/* re-enable listening on accept socket */
871			comm_point_start_listening(c->tcp_parent, -1, -1);
872		}
873	}
874}
875
876/** do the callback when writing is done */
877static void
878tcp_callback_writer(struct comm_point* c)
879{
880	log_assert(c->type == comm_tcp);
881	ldns_buffer_clear(c->buffer);
882	if(c->tcp_do_toggle_rw)
883		c->tcp_is_reading = 1;
884	c->tcp_byte_count = 0;
885	/* switch from listening(write) to listening(read) */
886	comm_point_stop_listening(c);
887	comm_point_start_listening(c, -1, -1);
888}
889
890/** do the callback when reading is done */
891static void
892tcp_callback_reader(struct comm_point* c)
893{
894	log_assert(c->type == comm_tcp || c->type == comm_local);
895	ldns_buffer_flip(c->buffer);
896	if(c->tcp_do_toggle_rw)
897		c->tcp_is_reading = 0;
898	c->tcp_byte_count = 0;
899	if(c->type == comm_tcp)
900		comm_point_stop_listening(c);
901	fptr_ok(fptr_whitelist_comm_point(c->callback));
902	if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
903		comm_point_start_listening(c, -1, TCP_QUERY_TIMEOUT);
904	}
905}
906
907/** continue ssl handshake */
908#ifdef HAVE_SSL
909static int
910ssl_handshake(struct comm_point* c)
911{
912	int r;
913	if(c->ssl_shake_state == comm_ssl_shake_hs_read) {
914		/* read condition satisfied back to writing */
915		comm_point_listen_for_rw(c, 1, 1);
916		c->ssl_shake_state = comm_ssl_shake_none;
917		return 1;
918	}
919	if(c->ssl_shake_state == comm_ssl_shake_hs_write) {
920		/* write condition satisfied, back to reading */
921		comm_point_listen_for_rw(c, 1, 0);
922		c->ssl_shake_state = comm_ssl_shake_none;
923		return 1;
924	}
925
926	ERR_clear_error();
927	r = SSL_do_handshake(c->ssl);
928	if(r != 1) {
929		int want = SSL_get_error(c->ssl, r);
930		if(want == SSL_ERROR_WANT_READ) {
931			if(c->ssl_shake_state == comm_ssl_shake_read)
932				return 1;
933			c->ssl_shake_state = comm_ssl_shake_read;
934			comm_point_listen_for_rw(c, 1, 0);
935			return 1;
936		} else if(want == SSL_ERROR_WANT_WRITE) {
937			if(c->ssl_shake_state == comm_ssl_shake_write)
938				return 1;
939			c->ssl_shake_state = comm_ssl_shake_write;
940			comm_point_listen_for_rw(c, 0, 1);
941			return 1;
942		} else if(r == 0) {
943			return 0; /* closed */
944		} else if(want == SSL_ERROR_SYSCALL) {
945			/* SYSCALL and errno==0 means closed uncleanly */
946			if(errno != 0)
947				log_err("SSL_handshake syscall: %s",
948					strerror(errno));
949			return 0;
950		} else {
951			log_crypto_err("ssl handshake failed");
952			log_addr(1, "ssl handshake failed", &c->repinfo.addr,
953				c->repinfo.addrlen);
954			return 0;
955		}
956	}
957	/* this is where peer verification could take place */
958	log_addr(VERB_ALGO, "SSL DNS connection", &c->repinfo.addr,
959		c->repinfo.addrlen);
960
961	/* setup listen rw correctly */
962	if(c->tcp_is_reading) {
963		if(c->ssl_shake_state != comm_ssl_shake_read)
964			comm_point_listen_for_rw(c, 1, 0);
965	} else {
966		comm_point_listen_for_rw(c, 1, 1);
967	}
968	c->ssl_shake_state = comm_ssl_shake_none;
969	return 1;
970}
971#endif /* HAVE_SSL */
972
973/** ssl read callback on TCP */
974static int
975ssl_handle_read(struct comm_point* c)
976{
977#ifdef HAVE_SSL
978	int r;
979	if(c->ssl_shake_state != comm_ssl_shake_none) {
980		if(!ssl_handshake(c))
981			return 0;
982		if(c->ssl_shake_state != comm_ssl_shake_none)
983			return 1;
984	}
985	if(c->tcp_byte_count < sizeof(uint16_t)) {
986		/* read length bytes */
987		ERR_clear_error();
988		if((r=SSL_read(c->ssl, (void*)ldns_buffer_at(c->buffer,
989			c->tcp_byte_count), (int)(sizeof(uint16_t) -
990			c->tcp_byte_count))) <= 0) {
991			int want = SSL_get_error(c->ssl, r);
992			if(want == SSL_ERROR_ZERO_RETURN) {
993				return 0; /* shutdown, closed */
994			} else if(want == SSL_ERROR_WANT_READ) {
995				return 1; /* read more later */
996			} else if(want == SSL_ERROR_WANT_WRITE) {
997				c->ssl_shake_state = comm_ssl_shake_hs_write;
998				comm_point_listen_for_rw(c, 0, 1);
999				return 1;
1000			} else if(want == SSL_ERROR_SYSCALL) {
1001				if(errno != 0)
1002					log_err("SSL_read syscall: %s",
1003						strerror(errno));
1004				return 0;
1005			}
1006			log_crypto_err("could not SSL_read");
1007			return 0;
1008		}
1009		c->tcp_byte_count += r;
1010		if(c->tcp_byte_count != sizeof(uint16_t))
1011			return 1;
1012		if(ldns_buffer_read_u16_at(c->buffer, 0) >
1013			ldns_buffer_capacity(c->buffer)) {
1014			verbose(VERB_QUERY, "ssl: dropped larger than buffer");
1015			return 0;
1016		}
1017		ldns_buffer_set_limit(c->buffer,
1018			ldns_buffer_read_u16_at(c->buffer, 0));
1019		if(ldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1020			verbose(VERB_QUERY, "ssl: dropped bogus too short.");
1021			return 0;
1022		}
1023		verbose(VERB_ALGO, "Reading ssl tcp query of length %d",
1024			(int)ldns_buffer_limit(c->buffer));
1025	}
1026	log_assert(ldns_buffer_remaining(c->buffer) > 0);
1027	ERR_clear_error();
1028	r = SSL_read(c->ssl, (void*)ldns_buffer_current(c->buffer),
1029		(int)ldns_buffer_remaining(c->buffer));
1030	if(r <= 0) {
1031		int want = SSL_get_error(c->ssl, r);
1032		if(want == SSL_ERROR_ZERO_RETURN) {
1033			return 0; /* shutdown, closed */
1034		} else if(want == SSL_ERROR_WANT_READ) {
1035			return 1; /* read more later */
1036		} else if(want == SSL_ERROR_WANT_WRITE) {
1037			c->ssl_shake_state = comm_ssl_shake_hs_write;
1038			comm_point_listen_for_rw(c, 0, 1);
1039			return 1;
1040		} else if(want == SSL_ERROR_SYSCALL) {
1041			if(errno != 0)
1042				log_err("SSL_read syscall: %s",
1043					strerror(errno));
1044			return 0;
1045		}
1046		log_crypto_err("could not SSL_read");
1047		return 0;
1048	}
1049	ldns_buffer_skip(c->buffer, (ssize_t)r);
1050	if(ldns_buffer_remaining(c->buffer) <= 0) {
1051		tcp_callback_reader(c);
1052	}
1053	return 1;
1054#else
1055	(void)c;
1056	return 0;
1057#endif /* HAVE_SSL */
1058}
1059
1060/** ssl write callback on TCP */
1061static int
1062ssl_handle_write(struct comm_point* c)
1063{
1064#ifdef HAVE_SSL
1065	int r;
1066	if(c->ssl_shake_state != comm_ssl_shake_none) {
1067		if(!ssl_handshake(c))
1068			return 0;
1069		if(c->ssl_shake_state != comm_ssl_shake_none)
1070			return 1;
1071	}
1072	/* ignore return, if fails we may simply block */
1073	(void)SSL_set_mode(c->ssl, SSL_MODE_ENABLE_PARTIAL_WRITE);
1074	if(c->tcp_byte_count < sizeof(uint16_t)) {
1075		uint16_t len = htons(ldns_buffer_limit(c->buffer));
1076		ERR_clear_error();
1077		r = SSL_write(c->ssl,
1078			(void*)(((uint8_t*)&len)+c->tcp_byte_count),
1079			(int)(sizeof(uint16_t)-c->tcp_byte_count));
1080		if(r <= 0) {
1081			int want = SSL_get_error(c->ssl, r);
1082			if(want == SSL_ERROR_ZERO_RETURN) {
1083				return 0; /* closed */
1084			} else if(want == SSL_ERROR_WANT_READ) {
1085				c->ssl_shake_state = comm_ssl_shake_read;
1086				comm_point_listen_for_rw(c, 1, 0);
1087				return 1; /* wait for read condition */
1088			} else if(want == SSL_ERROR_WANT_WRITE) {
1089				return 1; /* write more later */
1090			} else if(want == SSL_ERROR_SYSCALL) {
1091				if(errno != 0)
1092					log_err("SSL_write syscall: %s",
1093						strerror(errno));
1094				return 0;
1095			}
1096			log_crypto_err("could not SSL_write");
1097			return 0;
1098		}
1099		c->tcp_byte_count += r;
1100		if(c->tcp_byte_count < sizeof(uint16_t))
1101			return 1;
1102		ldns_buffer_set_position(c->buffer, c->tcp_byte_count -
1103			sizeof(uint16_t));
1104		if(ldns_buffer_remaining(c->buffer) == 0) {
1105			tcp_callback_writer(c);
1106			return 1;
1107		}
1108	}
1109	log_assert(ldns_buffer_remaining(c->buffer) > 0);
1110	ERR_clear_error();
1111	r = SSL_write(c->ssl, (void*)ldns_buffer_current(c->buffer),
1112		(int)ldns_buffer_remaining(c->buffer));
1113	if(r <= 0) {
1114		int want = SSL_get_error(c->ssl, r);
1115		if(want == SSL_ERROR_ZERO_RETURN) {
1116			return 0; /* closed */
1117		} else if(want == SSL_ERROR_WANT_READ) {
1118			c->ssl_shake_state = comm_ssl_shake_read;
1119			comm_point_listen_for_rw(c, 1, 0);
1120			return 1; /* wait for read condition */
1121		} else if(want == SSL_ERROR_WANT_WRITE) {
1122			return 1; /* write more later */
1123		} else if(want == SSL_ERROR_SYSCALL) {
1124			if(errno != 0)
1125				log_err("SSL_write syscall: %s",
1126					strerror(errno));
1127			return 0;
1128		}
1129		log_crypto_err("could not SSL_write");
1130		return 0;
1131	}
1132	ldns_buffer_skip(c->buffer, (ssize_t)r);
1133
1134	if(ldns_buffer_remaining(c->buffer) == 0) {
1135		tcp_callback_writer(c);
1136	}
1137	return 1;
1138#else
1139	(void)c;
1140	return 0;
1141#endif /* HAVE_SSL */
1142}
1143
1144/** handle ssl tcp connection with dns contents */
1145static int
1146ssl_handle_it(struct comm_point* c)
1147{
1148	if(c->tcp_is_reading)
1149		return ssl_handle_read(c);
1150	return ssl_handle_write(c);
1151}
1152
1153/** Handle tcp reading callback.
1154 * @param fd: file descriptor of socket.
1155 * @param c: comm point to read from into buffer.
1156 * @param short_ok: if true, very short packets are OK (for comm_local).
1157 * @return: 0 on error
1158 */
1159static int
1160comm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok)
1161{
1162	ssize_t r;
1163	log_assert(c->type == comm_tcp || c->type == comm_local);
1164	if(c->ssl)
1165		return ssl_handle_it(c);
1166	if(!c->tcp_is_reading)
1167		return 0;
1168
1169	log_assert(fd != -1);
1170	if(c->tcp_byte_count < sizeof(uint16_t)) {
1171		/* read length bytes */
1172		r = recv(fd,(void*)ldns_buffer_at(c->buffer,c->tcp_byte_count),
1173			sizeof(uint16_t)-c->tcp_byte_count, 0);
1174		if(r == 0)
1175			return 0;
1176		else if(r == -1) {
1177#ifndef USE_WINSOCK
1178			if(errno == EINTR || errno == EAGAIN)
1179				return 1;
1180#ifdef ECONNRESET
1181			if(errno == ECONNRESET && verbosity < 2)
1182				return 0; /* silence reset by peer */
1183#endif
1184			log_err("read (in tcp s): %s", strerror(errno));
1185#else /* USE_WINSOCK */
1186			if(WSAGetLastError() == WSAECONNRESET)
1187				return 0;
1188			if(WSAGetLastError() == WSAEINPROGRESS)
1189				return 1;
1190			if(WSAGetLastError() == WSAEWOULDBLOCK) {
1191				winsock_tcp_wouldblock(&c->ev->ev, EV_READ);
1192				return 1;
1193			}
1194			log_err("read (in tcp s): %s",
1195				wsa_strerror(WSAGetLastError()));
1196#endif
1197			log_addr(0, "remote address is", &c->repinfo.addr,
1198				c->repinfo.addrlen);
1199			return 0;
1200		}
1201		c->tcp_byte_count += r;
1202		if(c->tcp_byte_count != sizeof(uint16_t))
1203			return 1;
1204		if(ldns_buffer_read_u16_at(c->buffer, 0) >
1205			ldns_buffer_capacity(c->buffer)) {
1206			verbose(VERB_QUERY, "tcp: dropped larger than buffer");
1207			return 0;
1208		}
1209		ldns_buffer_set_limit(c->buffer,
1210			ldns_buffer_read_u16_at(c->buffer, 0));
1211		if(!short_ok &&
1212			ldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1213			verbose(VERB_QUERY, "tcp: dropped bogus too short.");
1214			return 0;
1215		}
1216		verbose(VERB_ALGO, "Reading tcp query of length %d",
1217			(int)ldns_buffer_limit(c->buffer));
1218	}
1219
1220	log_assert(ldns_buffer_remaining(c->buffer) > 0);
1221	r = recv(fd, (void*)ldns_buffer_current(c->buffer),
1222		ldns_buffer_remaining(c->buffer), 0);
1223	if(r == 0) {
1224		return 0;
1225	} else if(r == -1) {
1226#ifndef USE_WINSOCK
1227		if(errno == EINTR || errno == EAGAIN)
1228			return 1;
1229		log_err("read (in tcp r): %s", strerror(errno));
1230#else /* USE_WINSOCK */
1231		if(WSAGetLastError() == WSAECONNRESET)
1232			return 0;
1233		if(WSAGetLastError() == WSAEINPROGRESS)
1234			return 1;
1235		if(WSAGetLastError() == WSAEWOULDBLOCK) {
1236			winsock_tcp_wouldblock(&c->ev->ev, EV_READ);
1237			return 1;
1238		}
1239		log_err("read (in tcp r): %s",
1240			wsa_strerror(WSAGetLastError()));
1241#endif
1242		log_addr(0, "remote address is", &c->repinfo.addr,
1243			c->repinfo.addrlen);
1244		return 0;
1245	}
1246	ldns_buffer_skip(c->buffer, r);
1247	if(ldns_buffer_remaining(c->buffer) <= 0) {
1248		tcp_callback_reader(c);
1249	}
1250	return 1;
1251}
1252
1253/**
1254 * Handle tcp writing callback.
1255 * @param fd: file descriptor of socket.
1256 * @param c: comm point to write buffer out of.
1257 * @return: 0 on error
1258 */
1259static int
1260comm_point_tcp_handle_write(int fd, struct comm_point* c)
1261{
1262	ssize_t r;
1263	log_assert(c->type == comm_tcp);
1264	if(c->tcp_is_reading && !c->ssl)
1265		return 0;
1266	log_assert(fd != -1);
1267	if(c->tcp_byte_count == 0 && c->tcp_check_nb_connect) {
1268		/* check for pending error from nonblocking connect */
1269		/* from Stevens, unix network programming, vol1, 3rd ed, p450*/
1270		int error = 0;
1271		socklen_t len = (socklen_t)sizeof(error);
1272		if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error,
1273			&len) < 0){
1274#ifndef USE_WINSOCK
1275			error = errno; /* on solaris errno is error */
1276#else /* USE_WINSOCK */
1277			error = WSAGetLastError();
1278#endif
1279		}
1280#ifndef USE_WINSOCK
1281#if defined(EINPROGRESS) && defined(EWOULDBLOCK)
1282		if(error == EINPROGRESS || error == EWOULDBLOCK)
1283			return 1; /* try again later */
1284		else
1285#endif
1286		if(error != 0 && verbosity < 2)
1287			return 0; /* silence lots of chatter in the logs */
1288                else if(error != 0) {
1289			log_err("tcp connect: %s", strerror(error));
1290#else /* USE_WINSOCK */
1291		/* examine error */
1292		if(error == WSAEINPROGRESS)
1293			return 1;
1294		else if(error == WSAEWOULDBLOCK) {
1295			winsock_tcp_wouldblock(&c->ev->ev, EV_WRITE);
1296			return 1;
1297		} else if(error != 0 && verbosity < 2)
1298			return 0;
1299		else if(error != 0) {
1300			log_err("tcp connect: %s", wsa_strerror(error));
1301#endif /* USE_WINSOCK */
1302			log_addr(0, "remote address is", &c->repinfo.addr,
1303				c->repinfo.addrlen);
1304			return 0;
1305		}
1306	}
1307	if(c->ssl)
1308		return ssl_handle_it(c);
1309
1310	if(c->tcp_byte_count < sizeof(uint16_t)) {
1311		uint16_t len = htons(ldns_buffer_limit(c->buffer));
1312#ifdef HAVE_WRITEV
1313		struct iovec iov[2];
1314		iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
1315		iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
1316		iov[1].iov_base = ldns_buffer_begin(c->buffer);
1317		iov[1].iov_len = ldns_buffer_limit(c->buffer);
1318		log_assert(iov[0].iov_len > 0);
1319		log_assert(iov[1].iov_len > 0);
1320		r = writev(fd, iov, 2);
1321#else /* HAVE_WRITEV */
1322		r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count),
1323			sizeof(uint16_t)-c->tcp_byte_count, 0);
1324#endif /* HAVE_WRITEV */
1325		if(r == -1) {
1326#ifndef USE_WINSOCK
1327#ifdef EPIPE
1328                	if(errno == EPIPE && verbosity < 2)
1329                        	return 0; /* silence 'broken pipe' */
1330#endif
1331			if(errno == EINTR || errno == EAGAIN)
1332				return 1;
1333			log_err("tcp writev: %s", strerror(errno));
1334#else
1335			if(WSAGetLastError() == WSAENOTCONN)
1336				return 1;
1337			if(WSAGetLastError() == WSAEINPROGRESS)
1338				return 1;
1339			if(WSAGetLastError() == WSAEWOULDBLOCK) {
1340				winsock_tcp_wouldblock(&c->ev->ev, EV_WRITE);
1341				return 1;
1342			}
1343			log_err("tcp send s: %s",
1344				wsa_strerror(WSAGetLastError()));
1345#endif
1346			log_addr(0, "remote address is", &c->repinfo.addr,
1347				c->repinfo.addrlen);
1348			return 0;
1349		}
1350		c->tcp_byte_count += r;
1351		if(c->tcp_byte_count < sizeof(uint16_t))
1352			return 1;
1353		ldns_buffer_set_position(c->buffer, c->tcp_byte_count -
1354			sizeof(uint16_t));
1355		if(ldns_buffer_remaining(c->buffer) == 0) {
1356			tcp_callback_writer(c);
1357			return 1;
1358		}
1359	}
1360	log_assert(ldns_buffer_remaining(c->buffer) > 0);
1361	r = send(fd, (void*)ldns_buffer_current(c->buffer),
1362		ldns_buffer_remaining(c->buffer), 0);
1363	if(r == -1) {
1364#ifndef USE_WINSOCK
1365		if(errno == EINTR || errno == EAGAIN)
1366			return 1;
1367		log_err("tcp send r: %s", strerror(errno));
1368#else
1369		if(WSAGetLastError() == WSAEINPROGRESS)
1370			return 1;
1371		if(WSAGetLastError() == WSAEWOULDBLOCK) {
1372			winsock_tcp_wouldblock(&c->ev->ev, EV_WRITE);
1373			return 1;
1374		}
1375		log_err("tcp send r: %s",
1376			wsa_strerror(WSAGetLastError()));
1377#endif
1378		log_addr(0, "remote address is", &c->repinfo.addr,
1379			c->repinfo.addrlen);
1380		return 0;
1381	}
1382	ldns_buffer_skip(c->buffer, r);
1383
1384	if(ldns_buffer_remaining(c->buffer) == 0) {
1385		tcp_callback_writer(c);
1386	}
1387
1388	return 1;
1389}
1390
1391void
1392comm_point_tcp_handle_callback(int fd, short event, void* arg)
1393{
1394	struct comm_point* c = (struct comm_point*)arg;
1395	log_assert(c->type == comm_tcp);
1396	comm_base_now(c->ev->base);
1397
1398	if(event&EV_READ) {
1399		if(!comm_point_tcp_handle_read(fd, c, 0)) {
1400			reclaim_tcp_handler(c);
1401			if(!c->tcp_do_close) {
1402				fptr_ok(fptr_whitelist_comm_point(
1403					c->callback));
1404				(void)(*c->callback)(c, c->cb_arg,
1405					NETEVENT_CLOSED, NULL);
1406			}
1407		}
1408		return;
1409	}
1410	if(event&EV_WRITE) {
1411		if(!comm_point_tcp_handle_write(fd, c)) {
1412			reclaim_tcp_handler(c);
1413			if(!c->tcp_do_close) {
1414				fptr_ok(fptr_whitelist_comm_point(
1415					c->callback));
1416				(void)(*c->callback)(c, c->cb_arg,
1417					NETEVENT_CLOSED, NULL);
1418			}
1419		}
1420		return;
1421	}
1422	if(event&EV_TIMEOUT) {
1423		verbose(VERB_QUERY, "tcp took too long, dropped");
1424		reclaim_tcp_handler(c);
1425		if(!c->tcp_do_close) {
1426			fptr_ok(fptr_whitelist_comm_point(c->callback));
1427			(void)(*c->callback)(c, c->cb_arg,
1428				NETEVENT_TIMEOUT, NULL);
1429		}
1430		return;
1431	}
1432	log_err("Ignored event %d for tcphdl.", event);
1433}
1434
1435void comm_point_local_handle_callback(int fd, short event, void* arg)
1436{
1437	struct comm_point* c = (struct comm_point*)arg;
1438	log_assert(c->type == comm_local);
1439	comm_base_now(c->ev->base);
1440
1441	if(event&EV_READ) {
1442		if(!comm_point_tcp_handle_read(fd, c, 1)) {
1443			fptr_ok(fptr_whitelist_comm_point(c->callback));
1444			(void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED,
1445				NULL);
1446		}
1447		return;
1448	}
1449	log_err("Ignored event %d for localhdl.", event);
1450}
1451
1452void comm_point_raw_handle_callback(int ATTR_UNUSED(fd),
1453	short event, void* arg)
1454{
1455	struct comm_point* c = (struct comm_point*)arg;
1456	int err = NETEVENT_NOERROR;
1457	log_assert(c->type == comm_raw);
1458	comm_base_now(c->ev->base);
1459
1460	if(event&EV_TIMEOUT)
1461		err = NETEVENT_TIMEOUT;
1462	fptr_ok(fptr_whitelist_comm_point_raw(c->callback));
1463	(void)(*c->callback)(c, c->cb_arg, err, NULL);
1464}
1465
1466struct comm_point*
1467comm_point_create_udp(struct comm_base *base, int fd, ldns_buffer* buffer,
1468	comm_point_callback_t* callback, void* callback_arg)
1469{
1470	struct comm_point* c = (struct comm_point*)calloc(1,
1471		sizeof(struct comm_point));
1472	short evbits;
1473	if(!c)
1474		return NULL;
1475	c->ev = (struct internal_event*)calloc(1,
1476		sizeof(struct internal_event));
1477	if(!c->ev) {
1478		free(c);
1479		return NULL;
1480	}
1481	c->ev->base = base;
1482	c->fd = fd;
1483	c->buffer = buffer;
1484	c->timeout = NULL;
1485	c->tcp_is_reading = 0;
1486	c->tcp_byte_count = 0;
1487	c->tcp_parent = NULL;
1488	c->max_tcp_count = 0;
1489	c->tcp_handlers = NULL;
1490	c->tcp_free = NULL;
1491	c->type = comm_udp;
1492	c->tcp_do_close = 0;
1493	c->do_not_close = 0;
1494	c->tcp_do_toggle_rw = 0;
1495	c->tcp_check_nb_connect = 0;
1496	c->inuse = 0;
1497	c->callback = callback;
1498	c->cb_arg = callback_arg;
1499	evbits = EV_READ | EV_PERSIST;
1500	/* libevent stuff */
1501	event_set(&c->ev->ev, c->fd, evbits, comm_point_udp_callback, c);
1502	if(event_base_set(base->eb->base, &c->ev->ev) != 0) {
1503		log_err("could not baseset udp event");
1504		comm_point_delete(c);
1505		return NULL;
1506	}
1507	if(fd!=-1 && event_add(&c->ev->ev, c->timeout) != 0 ) {
1508		log_err("could not add udp event");
1509		comm_point_delete(c);
1510		return NULL;
1511	}
1512	return c;
1513}
1514
1515struct comm_point*
1516comm_point_create_udp_ancil(struct comm_base *base, int fd,
1517	ldns_buffer* buffer,
1518	comm_point_callback_t* callback, void* callback_arg)
1519{
1520	struct comm_point* c = (struct comm_point*)calloc(1,
1521		sizeof(struct comm_point));
1522	short evbits;
1523	if(!c)
1524		return NULL;
1525	c->ev = (struct internal_event*)calloc(1,
1526		sizeof(struct internal_event));
1527	if(!c->ev) {
1528		free(c);
1529		return NULL;
1530	}
1531	c->ev->base = base;
1532	c->fd = fd;
1533	c->buffer = buffer;
1534	c->timeout = NULL;
1535	c->tcp_is_reading = 0;
1536	c->tcp_byte_count = 0;
1537	c->tcp_parent = NULL;
1538	c->max_tcp_count = 0;
1539	c->tcp_handlers = NULL;
1540	c->tcp_free = NULL;
1541	c->type = comm_udp;
1542	c->tcp_do_close = 0;
1543	c->do_not_close = 0;
1544	c->inuse = 0;
1545	c->tcp_do_toggle_rw = 0;
1546	c->tcp_check_nb_connect = 0;
1547	c->callback = callback;
1548	c->cb_arg = callback_arg;
1549	evbits = EV_READ | EV_PERSIST;
1550	/* libevent stuff */
1551	event_set(&c->ev->ev, c->fd, evbits, comm_point_udp_ancil_callback, c);
1552	if(event_base_set(base->eb->base, &c->ev->ev) != 0) {
1553		log_err("could not baseset udp event");
1554		comm_point_delete(c);
1555		return NULL;
1556	}
1557	if(fd!=-1 && event_add(&c->ev->ev, c->timeout) != 0 ) {
1558		log_err("could not add udp event");
1559		comm_point_delete(c);
1560		return NULL;
1561	}
1562	return c;
1563}
1564
1565static struct comm_point*
1566comm_point_create_tcp_handler(struct comm_base *base,
1567	struct comm_point* parent, size_t bufsize,
1568        comm_point_callback_t* callback, void* callback_arg)
1569{
1570	struct comm_point* c = (struct comm_point*)calloc(1,
1571		sizeof(struct comm_point));
1572	short evbits;
1573	if(!c)
1574		return NULL;
1575	c->ev = (struct internal_event*)calloc(1,
1576		sizeof(struct internal_event));
1577	if(!c->ev) {
1578		free(c);
1579		return NULL;
1580	}
1581	c->ev->base = base;
1582	c->fd = -1;
1583	c->buffer = ldns_buffer_new(bufsize);
1584	if(!c->buffer) {
1585		free(c->ev);
1586		free(c);
1587		return NULL;
1588	}
1589	c->timeout = (struct timeval*)malloc(sizeof(struct timeval));
1590	if(!c->timeout) {
1591		ldns_buffer_free(c->buffer);
1592		free(c->ev);
1593		free(c);
1594		return NULL;
1595	}
1596	c->tcp_is_reading = 0;
1597	c->tcp_byte_count = 0;
1598	c->tcp_parent = parent;
1599	c->max_tcp_count = 0;
1600	c->tcp_handlers = NULL;
1601	c->tcp_free = NULL;
1602	c->type = comm_tcp;
1603	c->tcp_do_close = 0;
1604	c->do_not_close = 0;
1605	c->tcp_do_toggle_rw = 1;
1606	c->tcp_check_nb_connect = 0;
1607	c->repinfo.c = c;
1608	c->callback = callback;
1609	c->cb_arg = callback_arg;
1610	/* add to parent free list */
1611	c->tcp_free = parent->tcp_free;
1612	parent->tcp_free = c;
1613	/* libevent stuff */
1614	evbits = EV_PERSIST | EV_READ | EV_TIMEOUT;
1615	event_set(&c->ev->ev, c->fd, evbits, comm_point_tcp_handle_callback, c);
1616	if(event_base_set(base->eb->base, &c->ev->ev) != 0)
1617	{
1618		log_err("could not basetset tcphdl event");
1619		parent->tcp_free = c->tcp_free;
1620		free(c->ev);
1621		free(c);
1622		return NULL;
1623	}
1624	return c;
1625}
1626
1627struct comm_point*
1628comm_point_create_tcp(struct comm_base *base, int fd, int num, size_t bufsize,
1629        comm_point_callback_t* callback, void* callback_arg)
1630{
1631	struct comm_point* c = (struct comm_point*)calloc(1,
1632		sizeof(struct comm_point));
1633	short evbits;
1634	int i;
1635	/* first allocate the TCP accept listener */
1636	if(!c)
1637		return NULL;
1638	c->ev = (struct internal_event*)calloc(1,
1639		sizeof(struct internal_event));
1640	if(!c->ev) {
1641		free(c);
1642		return NULL;
1643	}
1644	c->ev->base = base;
1645	c->fd = fd;
1646	c->buffer = NULL;
1647	c->timeout = NULL;
1648	c->tcp_is_reading = 0;
1649	c->tcp_byte_count = 0;
1650	c->tcp_parent = NULL;
1651	c->max_tcp_count = num;
1652	c->tcp_handlers = (struct comm_point**)calloc((size_t)num,
1653		sizeof(struct comm_point*));
1654	if(!c->tcp_handlers) {
1655		free(c->ev);
1656		free(c);
1657		return NULL;
1658	}
1659	c->tcp_free = NULL;
1660	c->type = comm_tcp_accept;
1661	c->tcp_do_close = 0;
1662	c->do_not_close = 0;
1663	c->tcp_do_toggle_rw = 0;
1664	c->tcp_check_nb_connect = 0;
1665	c->callback = NULL;
1666	c->cb_arg = NULL;
1667	evbits = EV_READ | EV_PERSIST;
1668	/* libevent stuff */
1669	event_set(&c->ev->ev, c->fd, evbits, comm_point_tcp_accept_callback, c);
1670	if(event_base_set(base->eb->base, &c->ev->ev) != 0 ||
1671		event_add(&c->ev->ev, c->timeout) != 0 )
1672	{
1673		log_err("could not add tcpacc event");
1674		comm_point_delete(c);
1675		return NULL;
1676	}
1677
1678	/* now prealloc the tcp handlers */
1679	for(i=0; i<num; i++) {
1680		c->tcp_handlers[i] = comm_point_create_tcp_handler(base,
1681			c, bufsize, callback, callback_arg);
1682		if(!c->tcp_handlers[i]) {
1683			comm_point_delete(c);
1684			return NULL;
1685		}
1686	}
1687
1688	return c;
1689}
1690
1691struct comm_point*
1692comm_point_create_tcp_out(struct comm_base *base, size_t bufsize,
1693        comm_point_callback_t* callback, void* callback_arg)
1694{
1695	struct comm_point* c = (struct comm_point*)calloc(1,
1696		sizeof(struct comm_point));
1697	short evbits;
1698	if(!c)
1699		return NULL;
1700	c->ev = (struct internal_event*)calloc(1,
1701		sizeof(struct internal_event));
1702	if(!c->ev) {
1703		free(c);
1704		return NULL;
1705	}
1706	c->ev->base = base;
1707	c->fd = -1;
1708	c->buffer = ldns_buffer_new(bufsize);
1709	if(!c->buffer) {
1710		free(c->ev);
1711		free(c);
1712		return NULL;
1713	}
1714	c->timeout = NULL;
1715	c->tcp_is_reading = 0;
1716	c->tcp_byte_count = 0;
1717	c->tcp_parent = NULL;
1718	c->max_tcp_count = 0;
1719	c->tcp_handlers = NULL;
1720	c->tcp_free = NULL;
1721	c->type = comm_tcp;
1722	c->tcp_do_close = 0;
1723	c->do_not_close = 0;
1724	c->tcp_do_toggle_rw = 1;
1725	c->tcp_check_nb_connect = 1;
1726	c->repinfo.c = c;
1727	c->callback = callback;
1728	c->cb_arg = callback_arg;
1729	evbits = EV_PERSIST | EV_WRITE;
1730	event_set(&c->ev->ev, c->fd, evbits, comm_point_tcp_handle_callback, c);
1731	if(event_base_set(base->eb->base, &c->ev->ev) != 0)
1732	{
1733		log_err("could not basetset tcpout event");
1734		ldns_buffer_free(c->buffer);
1735		free(c->ev);
1736		free(c);
1737		return NULL;
1738	}
1739
1740	return c;
1741}
1742
1743struct comm_point*
1744comm_point_create_local(struct comm_base *base, int fd, size_t bufsize,
1745        comm_point_callback_t* callback, void* callback_arg)
1746{
1747	struct comm_point* c = (struct comm_point*)calloc(1,
1748		sizeof(struct comm_point));
1749	short evbits;
1750	if(!c)
1751		return NULL;
1752	c->ev = (struct internal_event*)calloc(1,
1753		sizeof(struct internal_event));
1754	if(!c->ev) {
1755		free(c);
1756		return NULL;
1757	}
1758	c->ev->base = base;
1759	c->fd = fd;
1760	c->buffer = ldns_buffer_new(bufsize);
1761	if(!c->buffer) {
1762		free(c->ev);
1763		free(c);
1764		return NULL;
1765	}
1766	c->timeout = NULL;
1767	c->tcp_is_reading = 1;
1768	c->tcp_byte_count = 0;
1769	c->tcp_parent = NULL;
1770	c->max_tcp_count = 0;
1771	c->tcp_handlers = NULL;
1772	c->tcp_free = NULL;
1773	c->type = comm_local;
1774	c->tcp_do_close = 0;
1775	c->do_not_close = 1;
1776	c->tcp_do_toggle_rw = 0;
1777	c->tcp_check_nb_connect = 0;
1778	c->callback = callback;
1779	c->cb_arg = callback_arg;
1780	/* libevent stuff */
1781	evbits = EV_PERSIST | EV_READ;
1782	event_set(&c->ev->ev, c->fd, evbits, comm_point_local_handle_callback,
1783		c);
1784	if(event_base_set(base->eb->base, &c->ev->ev) != 0 ||
1785		event_add(&c->ev->ev, c->timeout) != 0 )
1786	{
1787		log_err("could not add localhdl event");
1788		free(c->ev);
1789		free(c);
1790		return NULL;
1791	}
1792	return c;
1793}
1794
1795struct comm_point*
1796comm_point_create_raw(struct comm_base* base, int fd, int writing,
1797	comm_point_callback_t* callback, void* callback_arg)
1798{
1799	struct comm_point* c = (struct comm_point*)calloc(1,
1800		sizeof(struct comm_point));
1801	short evbits;
1802	if(!c)
1803		return NULL;
1804	c->ev = (struct internal_event*)calloc(1,
1805		sizeof(struct internal_event));
1806	if(!c->ev) {
1807		free(c);
1808		return NULL;
1809	}
1810	c->ev->base = base;
1811	c->fd = fd;
1812	c->buffer = NULL;
1813	c->timeout = NULL;
1814	c->tcp_is_reading = 0;
1815	c->tcp_byte_count = 0;
1816	c->tcp_parent = NULL;
1817	c->max_tcp_count = 0;
1818	c->tcp_handlers = NULL;
1819	c->tcp_free = NULL;
1820	c->type = comm_raw;
1821	c->tcp_do_close = 0;
1822	c->do_not_close = 1;
1823	c->tcp_do_toggle_rw = 0;
1824	c->tcp_check_nb_connect = 0;
1825	c->callback = callback;
1826	c->cb_arg = callback_arg;
1827	/* libevent stuff */
1828	if(writing)
1829		evbits = EV_PERSIST | EV_WRITE;
1830	else 	evbits = EV_PERSIST | EV_READ;
1831	event_set(&c->ev->ev, c->fd, evbits, comm_point_raw_handle_callback,
1832		c);
1833	if(event_base_set(base->eb->base, &c->ev->ev) != 0 ||
1834		event_add(&c->ev->ev, c->timeout) != 0 )
1835	{
1836		log_err("could not add rawhdl event");
1837		free(c->ev);
1838		free(c);
1839		return NULL;
1840	}
1841	return c;
1842}
1843
1844void
1845comm_point_close(struct comm_point* c)
1846{
1847	if(!c)
1848		return;
1849	if(c->fd != -1)
1850		if(event_del(&c->ev->ev) != 0) {
1851			log_err("could not event_del on close");
1852		}
1853	/* close fd after removing from event lists, or epoll.. is messed up */
1854	if(c->fd != -1 && !c->do_not_close) {
1855		verbose(VERB_ALGO, "close fd %d", c->fd);
1856#ifndef USE_WINSOCK
1857		close(c->fd);
1858#else
1859		closesocket(c->fd);
1860#endif
1861	}
1862	c->fd = -1;
1863}
1864
1865void
1866comm_point_delete(struct comm_point* c)
1867{
1868	if(!c)
1869		return;
1870	if(c->type == comm_tcp && c->ssl) {
1871#ifdef HAVE_SSL
1872		SSL_shutdown(c->ssl);
1873		SSL_free(c->ssl);
1874#endif
1875	}
1876	comm_point_close(c);
1877	if(c->tcp_handlers) {
1878		int i;
1879		for(i=0; i<c->max_tcp_count; i++)
1880			comm_point_delete(c->tcp_handlers[i]);
1881		free(c->tcp_handlers);
1882	}
1883	free(c->timeout);
1884	if(c->type == comm_tcp || c->type == comm_local)
1885		ldns_buffer_free(c->buffer);
1886	free(c->ev);
1887	free(c);
1888}
1889
1890void
1891comm_point_send_reply(struct comm_reply *repinfo)
1892{
1893	log_assert(repinfo && repinfo->c);
1894	if(repinfo->c->type == comm_udp) {
1895		if(repinfo->srctype)
1896			comm_point_send_udp_msg_if(repinfo->c,
1897			repinfo->c->buffer, (struct sockaddr*)&repinfo->addr,
1898			repinfo->addrlen, repinfo);
1899		else
1900			comm_point_send_udp_msg(repinfo->c, repinfo->c->buffer,
1901			(struct sockaddr*)&repinfo->addr, repinfo->addrlen);
1902	} else {
1903		comm_point_start_listening(repinfo->c, -1, TCP_QUERY_TIMEOUT);
1904	}
1905}
1906
1907void
1908comm_point_drop_reply(struct comm_reply* repinfo)
1909{
1910	if(!repinfo)
1911		return;
1912	log_assert(repinfo && repinfo->c);
1913	log_assert(repinfo->c->type != comm_tcp_accept);
1914	if(repinfo->c->type == comm_udp)
1915		return;
1916	reclaim_tcp_handler(repinfo->c);
1917}
1918
1919void
1920comm_point_stop_listening(struct comm_point* c)
1921{
1922	verbose(VERB_ALGO, "comm point stop listening %d", c->fd);
1923	if(event_del(&c->ev->ev) != 0) {
1924		log_err("event_del error to stoplisten");
1925	}
1926}
1927
1928void
1929comm_point_start_listening(struct comm_point* c, int newfd, int sec)
1930{
1931	verbose(VERB_ALGO, "comm point start listening %d",
1932		c->fd==-1?newfd:c->fd);
1933	if(c->type == comm_tcp_accept && !c->tcp_free) {
1934		/* no use to start listening no free slots. */
1935		return;
1936	}
1937	if(sec != -1 && sec != 0) {
1938		if(!c->timeout) {
1939			c->timeout = (struct timeval*)malloc(sizeof(
1940				struct timeval));
1941			if(!c->timeout) {
1942				log_err("cpsl: malloc failed. No net read.");
1943				return;
1944			}
1945		}
1946		c->ev->ev.ev_events |= EV_TIMEOUT;
1947#ifndef S_SPLINT_S /* splint fails on struct timeval. */
1948		c->timeout->tv_sec = sec;
1949		c->timeout->tv_usec = 0;
1950#endif /* S_SPLINT_S */
1951	}
1952	if(c->type == comm_tcp) {
1953		c->ev->ev.ev_events &= ~(EV_READ|EV_WRITE);
1954		if(c->tcp_is_reading)
1955			c->ev->ev.ev_events |= EV_READ;
1956		else	c->ev->ev.ev_events |= EV_WRITE;
1957	}
1958	if(newfd != -1) {
1959		if(c->fd != -1) {
1960#ifndef USE_WINSOCK
1961			close(c->fd);
1962#else
1963			closesocket(c->fd);
1964#endif
1965		}
1966		c->fd = newfd;
1967		c->ev->ev.ev_fd = c->fd;
1968	}
1969	if(event_add(&c->ev->ev, sec==0?NULL:c->timeout) != 0) {
1970		log_err("event_add failed. in cpsl.");
1971	}
1972}
1973
1974void comm_point_listen_for_rw(struct comm_point* c, int rd, int wr)
1975{
1976	verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr);
1977	if(event_del(&c->ev->ev) != 0) {
1978		log_err("event_del error to cplf");
1979	}
1980	c->ev->ev.ev_events &= ~(EV_READ|EV_WRITE);
1981	if(rd) c->ev->ev.ev_events |= EV_READ;
1982	if(wr) c->ev->ev.ev_events |= EV_WRITE;
1983	if(event_add(&c->ev->ev, c->timeout) != 0) {
1984		log_err("event_add failed. in cplf.");
1985	}
1986}
1987
1988size_t comm_point_get_mem(struct comm_point* c)
1989{
1990	size_t s;
1991	if(!c)
1992		return 0;
1993	s = sizeof(*c) + sizeof(*c->ev);
1994	if(c->timeout)
1995		s += sizeof(*c->timeout);
1996	if(c->type == comm_tcp || c->type == comm_local)
1997		s += sizeof(*c->buffer) + ldns_buffer_capacity(c->buffer);
1998	if(c->type == comm_tcp_accept) {
1999		int i;
2000		for(i=0; i<c->max_tcp_count; i++)
2001			s += comm_point_get_mem(c->tcp_handlers[i]);
2002	}
2003	return s;
2004}
2005
2006struct comm_timer*
2007comm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg)
2008{
2009	struct comm_timer *tm = (struct comm_timer*)calloc(1,
2010		sizeof(struct comm_timer));
2011	if(!tm)
2012		return NULL;
2013	tm->ev_timer = (struct internal_timer*)calloc(1,
2014		sizeof(struct internal_timer));
2015	if(!tm->ev_timer) {
2016		log_err("malloc failed");
2017		free(tm);
2018		return NULL;
2019	}
2020	tm->ev_timer->base = base;
2021	tm->callback = cb;
2022	tm->cb_arg = cb_arg;
2023	event_set(&tm->ev_timer->ev, -1, EV_TIMEOUT,
2024		comm_timer_callback, tm);
2025	if(event_base_set(base->eb->base, &tm->ev_timer->ev) != 0) {
2026		log_err("timer_create: event_base_set failed.");
2027		free(tm->ev_timer);
2028		free(tm);
2029		return NULL;
2030	}
2031	return tm;
2032}
2033
2034void
2035comm_timer_disable(struct comm_timer* timer)
2036{
2037	if(!timer)
2038		return;
2039	evtimer_del(&timer->ev_timer->ev);
2040	timer->ev_timer->enabled = 0;
2041}
2042
2043void
2044comm_timer_set(struct comm_timer* timer, struct timeval* tv)
2045{
2046	log_assert(tv);
2047	if(timer->ev_timer->enabled)
2048		comm_timer_disable(timer);
2049	event_set(&timer->ev_timer->ev, -1, EV_TIMEOUT,
2050		comm_timer_callback, timer);
2051	if(event_base_set(timer->ev_timer->base->eb->base,
2052		&timer->ev_timer->ev) != 0)
2053		log_err("comm_timer_set: set_base failed.");
2054	if(evtimer_add(&timer->ev_timer->ev, tv) != 0)
2055		log_err("comm_timer_set: evtimer_add failed.");
2056	timer->ev_timer->enabled = 1;
2057}
2058
2059void
2060comm_timer_delete(struct comm_timer* timer)
2061{
2062	if(!timer)
2063		return;
2064	comm_timer_disable(timer);
2065	free(timer->ev_timer);
2066	free(timer);
2067}
2068
2069void
2070comm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg)
2071{
2072	struct comm_timer* tm = (struct comm_timer*)arg;
2073	if(!(event&EV_TIMEOUT))
2074		return;
2075	comm_base_now(tm->ev_timer->base);
2076	tm->ev_timer->enabled = 0;
2077	fptr_ok(fptr_whitelist_comm_timer(tm->callback));
2078	(*tm->callback)(tm->cb_arg);
2079}
2080
2081int
2082comm_timer_is_set(struct comm_timer* timer)
2083{
2084	return (int)timer->ev_timer->enabled;
2085}
2086
2087size_t
2088comm_timer_get_mem(struct comm_timer* timer)
2089{
2090	return sizeof(*timer) + sizeof(struct internal_timer);
2091}
2092
2093struct comm_signal*
2094comm_signal_create(struct comm_base* base,
2095        void (*callback)(int, void*), void* cb_arg)
2096{
2097	struct comm_signal* com = (struct comm_signal*)malloc(
2098		sizeof(struct comm_signal));
2099	if(!com) {
2100		log_err("malloc failed");
2101		return NULL;
2102	}
2103	com->base = base;
2104	com->callback = callback;
2105	com->cb_arg = cb_arg;
2106	com->ev_signal = NULL;
2107	return com;
2108}
2109
2110void
2111comm_signal_callback(int sig, short event, void* arg)
2112{
2113	struct comm_signal* comsig = (struct comm_signal*)arg;
2114	if(!(event & EV_SIGNAL))
2115		return;
2116	comm_base_now(comsig->base);
2117	fptr_ok(fptr_whitelist_comm_signal(comsig->callback));
2118	(*comsig->callback)(sig, comsig->cb_arg);
2119}
2120
2121int
2122comm_signal_bind(struct comm_signal* comsig, int sig)
2123{
2124	struct internal_signal* entry = (struct internal_signal*)calloc(1,
2125		sizeof(struct internal_signal));
2126	if(!entry) {
2127		log_err("malloc failed");
2128		return 0;
2129	}
2130	log_assert(comsig);
2131	/* add signal event */
2132	signal_set(&entry->ev, sig, comm_signal_callback, comsig);
2133	if(event_base_set(comsig->base->eb->base, &entry->ev) != 0) {
2134		log_err("Could not set signal base");
2135		free(entry);
2136		return 0;
2137	}
2138	if(signal_add(&entry->ev, NULL) != 0) {
2139		log_err("Could not add signal handler");
2140		free(entry);
2141		return 0;
2142	}
2143	/* link into list */
2144	entry->next = comsig->ev_signal;
2145	comsig->ev_signal = entry;
2146	return 1;
2147}
2148
2149void
2150comm_signal_delete(struct comm_signal* comsig)
2151{
2152	struct internal_signal* p, *np;
2153	if(!comsig)
2154		return;
2155	p=comsig->ev_signal;
2156	while(p) {
2157		np = p->next;
2158		signal_del(&p->ev);
2159		free(p);
2160		p = np;
2161	}
2162	free(comsig);
2163}
2164