1238106Sdes/*
2238106Sdes * util/netevent.c - event notification
3238106Sdes *
4238106Sdes * Copyright (c) 2007, NLnet Labs. All rights reserved.
5238106Sdes *
6238106Sdes * This software is open source.
7238106Sdes *
8238106Sdes * Redistribution and use in source and binary forms, with or without
9238106Sdes * modification, are permitted provided that the following conditions
10238106Sdes * are met:
11238106Sdes *
12238106Sdes * Redistributions of source code must retain the above copyright notice,
13238106Sdes * this list of conditions and the following disclaimer.
14238106Sdes *
15238106Sdes * Redistributions in binary form must reproduce the above copyright notice,
16238106Sdes * this list of conditions and the following disclaimer in the documentation
17238106Sdes * and/or other materials provided with the distribution.
18238106Sdes *
19238106Sdes * Neither the name of the NLNET LABS nor the names of its contributors may
20238106Sdes * be used to endorse or promote products derived from this software without
21238106Sdes * specific prior written permission.
22238106Sdes *
23238106Sdes * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24238106Sdes * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25238106Sdes * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26238106Sdes * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
27238106Sdes * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28238106Sdes * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29238106Sdes * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30238106Sdes * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31238106Sdes * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32238106Sdes * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33238106Sdes * POSSIBILITY OF SUCH DAMAGE.
34238106Sdes */
35238106Sdes
36238106Sdes/**
37238106Sdes * \file
38238106Sdes *
39238106Sdes * This file contains event notification functions.
40238106Sdes */
41238106Sdes#include "config.h"
42238106Sdes#include <ldns/wire2host.h>
43238106Sdes#include "util/netevent.h"
44238106Sdes#include "util/log.h"
45238106Sdes#include "util/net_help.h"
46238106Sdes#include "util/fptr_wlist.h"
47249141Sdes#ifdef HAVE_OPENSSL_SSL_H
48238106Sdes#include <openssl/ssl.h>
49249141Sdes#endif
50249141Sdes#ifdef HAVE_OPENSSL_ERR_H
51238106Sdes#include <openssl/err.h>
52249141Sdes#endif
53238106Sdes
54238106Sdes/* -------- Start of local definitions -------- */
55238106Sdes/** if CMSG_ALIGN is not defined on this platform, a workaround */
56238106Sdes#ifndef CMSG_ALIGN
57238106Sdes#  ifdef _CMSG_DATA_ALIGN
58238106Sdes#    define CMSG_ALIGN _CMSG_DATA_ALIGN
59238106Sdes#  else
60238106Sdes#    define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1))
61238106Sdes#  endif
62238106Sdes#endif
63238106Sdes
64238106Sdes/** if CMSG_LEN is not defined on this platform, a workaround */
65238106Sdes#ifndef CMSG_LEN
66238106Sdes#  define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len))
67238106Sdes#endif
68238106Sdes
69238106Sdes/** if CMSG_SPACE is not defined on this platform, a workaround */
70238106Sdes#ifndef CMSG_SPACE
71238106Sdes#  ifdef _CMSG_HDR_ALIGN
72238106Sdes#    define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr)))
73238106Sdes#  else
74238106Sdes#    define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr)))
75238106Sdes#  endif
76238106Sdes#endif
77238106Sdes
78238106Sdes/** The TCP reading or writing query timeout in seconds */
79238106Sdes#define TCP_QUERY_TIMEOUT 120
80238106Sdes
81238106Sdes#ifndef NONBLOCKING_IS_BROKEN
82238106Sdes/** number of UDP reads to perform per read indication from select */
83238106Sdes#define NUM_UDP_PER_SELECT 100
84238106Sdes#else
85238106Sdes#define NUM_UDP_PER_SELECT 1
86238106Sdes#endif
87238106Sdes
88238106Sdes/* We define libevent structures here to hide the libevent stuff. */
89238106Sdes
90238106Sdes#ifdef USE_MINI_EVENT
91238106Sdes#  ifdef USE_WINSOCK
92238106Sdes#    include "util/winsock_event.h"
93238106Sdes#  else
94238106Sdes#    include "util/mini_event.h"
95238106Sdes#  endif /* USE_WINSOCK */
96238106Sdes#else /* USE_MINI_EVENT */
97238106Sdes   /* we use libevent */
98249141Sdes#  ifdef HAVE_EVENT_H
99249141Sdes#    include <event.h>
100249141Sdes#  else
101249141Sdes#    include "event2/event.h"
102249141Sdes#    include "event2/event_struct.h"
103249141Sdes#    include "event2/event_compat.h"
104249141Sdes#  endif
105238106Sdes#endif /* USE_MINI_EVENT */
106238106Sdes
107238106Sdes/**
108238106Sdes * The internal event structure for keeping libevent info for the event.
109238106Sdes * Possibly other structures (list, tree) this is part of.
110238106Sdes */
111238106Sdesstruct internal_event {
112238106Sdes	/** the comm base */
113238106Sdes	struct comm_base* base;
114238106Sdes	/** libevent event type, alloced here */
115238106Sdes	struct event ev;
116238106Sdes};
117238106Sdes
118238106Sdes/**
119238106Sdes * Internal base structure, so that every thread has its own events.
120238106Sdes */
121238106Sdesstruct internal_base {
122238106Sdes	/** libevent event_base type. */
123238106Sdes	struct event_base* base;
124238106Sdes	/** seconds time pointer points here */
125238106Sdes	uint32_t secs;
126238106Sdes	/** timeval with current time */
127238106Sdes	struct timeval now;
128238106Sdes	/** the event used for slow_accept timeouts */
129238106Sdes	struct event slow_accept;
130238106Sdes	/** true if slow_accept is enabled */
131238106Sdes	int slow_accept_enabled;
132238106Sdes};
133238106Sdes
134238106Sdes/**
135238106Sdes * Internal timer structure, to store timer event in.
136238106Sdes */
137238106Sdesstruct internal_timer {
138238106Sdes	/** the comm base */
139238106Sdes	struct comm_base* base;
140238106Sdes	/** libevent event type, alloced here */
141238106Sdes	struct event ev;
142238106Sdes	/** is timer enabled */
143238106Sdes	uint8_t enabled;
144238106Sdes};
145238106Sdes
146238106Sdes/**
147238106Sdes * Internal signal structure, to store signal event in.
148238106Sdes */
149238106Sdesstruct internal_signal {
150238106Sdes	/** libevent event type, alloced here */
151238106Sdes	struct event ev;
152238106Sdes	/** next in signal list */
153238106Sdes	struct internal_signal* next;
154238106Sdes};
155238106Sdes
156238106Sdes/** create a tcp handler with a parent */
157238106Sdesstatic struct comm_point* comm_point_create_tcp_handler(
158238106Sdes	struct comm_base *base, struct comm_point* parent, size_t bufsize,
159238106Sdes        comm_point_callback_t* callback, void* callback_arg);
160238106Sdes
161238106Sdes/* -------- End of local definitions -------- */
162238106Sdes
163238106Sdes#ifdef USE_MINI_EVENT
164238106Sdes/** minievent updates the time when it blocks. */
165238106Sdes#define comm_base_now(x) /* nothing to do */
166238106Sdes#else /* !USE_MINI_EVENT */
167238106Sdes/** fillup the time values in the event base */
168238106Sdesstatic void
169238106Sdescomm_base_now(struct comm_base* b)
170238106Sdes{
171238106Sdes	if(gettimeofday(&b->eb->now, NULL) < 0) {
172238106Sdes		log_err("gettimeofday: %s", strerror(errno));
173238106Sdes	}
174238106Sdes	b->eb->secs = (uint32_t)b->eb->now.tv_sec;
175238106Sdes}
176238106Sdes#endif /* USE_MINI_EVENT */
177238106Sdes
178238106Sdesstruct comm_base*
179238106Sdescomm_base_create(int sigs)
180238106Sdes{
181238106Sdes	struct comm_base* b = (struct comm_base*)calloc(1,
182238106Sdes		sizeof(struct comm_base));
183238106Sdes	if(!b)
184238106Sdes		return NULL;
185238106Sdes	b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
186238106Sdes	if(!b->eb) {
187238106Sdes		free(b);
188238106Sdes		return NULL;
189238106Sdes	}
190238106Sdes#ifdef USE_MINI_EVENT
191238106Sdes	(void)sigs;
192238106Sdes	/* use mini event time-sharing feature */
193238106Sdes	b->eb->base = event_init(&b->eb->secs, &b->eb->now);
194238106Sdes#else
195238106Sdes#  if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP)
196238106Sdes	/* libev */
197238106Sdes	if(sigs)
198238106Sdes		b->eb->base=(struct event_base *)ev_default_loop(EVFLAG_AUTO);
199238106Sdes	else
200238106Sdes		b->eb->base=(struct event_base *)ev_loop_new(EVFLAG_AUTO);
201238106Sdes#  else
202238106Sdes	(void)sigs;
203238106Sdes#    ifdef HAVE_EVENT_BASE_NEW
204238106Sdes	b->eb->base = event_base_new();
205238106Sdes#    else
206238106Sdes	b->eb->base = event_init();
207238106Sdes#    endif
208238106Sdes#  endif
209238106Sdes#endif
210238106Sdes	if(!b->eb->base) {
211238106Sdes		free(b->eb);
212238106Sdes		free(b);
213238106Sdes		return NULL;
214238106Sdes	}
215238106Sdes	comm_base_now(b);
216238106Sdes	/* avoid event_get_method call which causes crashes even when
217238106Sdes	 * not printing, because its result is passed */
218238106Sdes	verbose(VERB_ALGO,
219238106Sdes#if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP)
220238106Sdes		"libev"
221238106Sdes#elif defined(USE_MINI_EVENT)
222238106Sdes		"event "
223238106Sdes#else
224238106Sdes		"libevent "
225238106Sdes#endif
226238106Sdes		"%s uses %s method.",
227238106Sdes		event_get_version(),
228238106Sdes#ifdef HAVE_EVENT_BASE_GET_METHOD
229238106Sdes		event_base_get_method(b->eb->base)
230238106Sdes#else
231238106Sdes		"not_obtainable"
232238106Sdes#endif
233238106Sdes	);
234238106Sdes	return b;
235238106Sdes}
236238106Sdes
237238106Sdesvoid
238238106Sdescomm_base_delete(struct comm_base* b)
239238106Sdes{
240238106Sdes	if(!b)
241238106Sdes		return;
242238106Sdes	if(b->eb->slow_accept_enabled) {
243238106Sdes		if(event_del(&b->eb->slow_accept) != 0) {
244238106Sdes			log_err("could not event_del slow_accept");
245238106Sdes		}
246238106Sdes	}
247238106Sdes#ifdef USE_MINI_EVENT
248238106Sdes	event_base_free(b->eb->base);
249238106Sdes#elif defined(HAVE_EVENT_BASE_FREE) && defined(HAVE_EVENT_BASE_ONCE)
250238106Sdes	/* only libevent 1.2+ has it, but in 1.2 it is broken -
251238106Sdes	   assertion fails on signal handling ev that is not deleted
252238106Sdes 	   in libevent 1.3c (event_base_once appears) this is fixed. */
253238106Sdes	event_base_free(b->eb->base);
254238106Sdes#endif /* HAVE_EVENT_BASE_FREE and HAVE_EVENT_BASE_ONCE */
255238106Sdes	b->eb->base = NULL;
256238106Sdes	free(b->eb);
257238106Sdes	free(b);
258238106Sdes}
259238106Sdes
260238106Sdesvoid
261238106Sdescomm_base_timept(struct comm_base* b, uint32_t** tt, struct timeval** tv)
262238106Sdes{
263238106Sdes	*tt = &b->eb->secs;
264238106Sdes	*tv = &b->eb->now;
265238106Sdes}
266238106Sdes
267238106Sdesvoid
268238106Sdescomm_base_dispatch(struct comm_base* b)
269238106Sdes{
270238106Sdes	int retval;
271238106Sdes	retval = event_base_dispatch(b->eb->base);
272238106Sdes	if(retval != 0) {
273238106Sdes		fatal_exit("event_dispatch returned error %d, "
274238106Sdes			"errno is %s", retval, strerror(errno));
275238106Sdes	}
276238106Sdes}
277238106Sdes
278238106Sdesvoid comm_base_exit(struct comm_base* b)
279238106Sdes{
280238106Sdes	if(event_base_loopexit(b->eb->base, NULL) != 0) {
281238106Sdes		log_err("Could not loopexit");
282238106Sdes	}
283238106Sdes}
284238106Sdes
285238106Sdesvoid comm_base_set_slow_accept_handlers(struct comm_base* b,
286238106Sdes	void (*stop_acc)(void*), void (*start_acc)(void*), void* arg)
287238106Sdes{
288238106Sdes	b->stop_accept = stop_acc;
289238106Sdes	b->start_accept = start_acc;
290238106Sdes	b->cb_arg = arg;
291238106Sdes}
292238106Sdes
293238106Sdesstruct event_base* comm_base_internal(struct comm_base* b)
294238106Sdes{
295238106Sdes	return b->eb->base;
296238106Sdes}
297238106Sdes
298238106Sdes/** see if errno for udp has to be logged or not uses globals */
299238106Sdesstatic int
300238106Sdesudp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
301238106Sdes{
302238106Sdes	/* do not log transient errors (unless high verbosity) */
303238106Sdes#if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN)
304238106Sdes	switch(errno) {
305238106Sdes#  ifdef ENETUNREACH
306238106Sdes		case ENETUNREACH:
307238106Sdes#  endif
308238106Sdes#  ifdef EHOSTDOWN
309238106Sdes		case EHOSTDOWN:
310238106Sdes#  endif
311238106Sdes#  ifdef EHOSTUNREACH
312238106Sdes		case EHOSTUNREACH:
313238106Sdes#  endif
314238106Sdes#  ifdef ENETDOWN
315238106Sdes		case ENETDOWN:
316238106Sdes#  endif
317238106Sdes			if(verbosity < VERB_ALGO)
318238106Sdes				return 0;
319238106Sdes		default:
320238106Sdes			break;
321238106Sdes	}
322238106Sdes#endif
323238106Sdes	/* squelch errors where people deploy AAAA ::ffff:bla for
324238106Sdes	 * authority servers, which we try for intranets. */
325238106Sdes	if(errno == EINVAL && addr_is_ip4mapped(
326238106Sdes		(struct sockaddr_storage*)addr, addrlen) &&
327238106Sdes		verbosity < VERB_DETAIL)
328238106Sdes		return 0;
329238106Sdes	/* SO_BROADCAST sockopt can give access to 255.255.255.255,
330238106Sdes	 * but a dns cache does not need it. */
331238106Sdes	if(errno == EACCES && addr_is_broadcast(
332238106Sdes		(struct sockaddr_storage*)addr, addrlen) &&
333238106Sdes		verbosity < VERB_DETAIL)
334238106Sdes		return 0;
335238106Sdes	return 1;
336238106Sdes}
337238106Sdes
338238106Sdesint tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
339238106Sdes{
340238106Sdes	return udp_send_errno_needs_log(addr, addrlen);
341238106Sdes}
342238106Sdes
343238106Sdes/* send a UDP reply */
344238106Sdesint
345238106Sdescomm_point_send_udp_msg(struct comm_point *c, ldns_buffer* packet,
346238106Sdes	struct sockaddr* addr, socklen_t addrlen)
347238106Sdes{
348238106Sdes	ssize_t sent;
349238106Sdes	log_assert(c->fd != -1);
350238106Sdes#ifdef UNBOUND_DEBUG
351238106Sdes	if(ldns_buffer_remaining(packet) == 0)
352238106Sdes		log_err("error: send empty UDP packet");
353238106Sdes#endif
354238106Sdes	log_assert(addr && addrlen > 0);
355238106Sdes	sent = sendto(c->fd, (void*)ldns_buffer_begin(packet),
356238106Sdes		ldns_buffer_remaining(packet), 0,
357238106Sdes		addr, addrlen);
358238106Sdes	if(sent == -1) {
359238106Sdes		if(!udp_send_errno_needs_log(addr, addrlen))
360238106Sdes			return 0;
361238106Sdes#ifndef USE_WINSOCK
362238106Sdes		verbose(VERB_OPS, "sendto failed: %s", strerror(errno));
363238106Sdes#else
364238106Sdes		verbose(VERB_OPS, "sendto failed: %s",
365238106Sdes			wsa_strerror(WSAGetLastError()));
366238106Sdes#endif
367238106Sdes		log_addr(VERB_OPS, "remote address is",
368238106Sdes			(struct sockaddr_storage*)addr, addrlen);
369238106Sdes		return 0;
370238106Sdes	} else if((size_t)sent != ldns_buffer_remaining(packet)) {
371238106Sdes		log_err("sent %d in place of %d bytes",
372238106Sdes			(int)sent, (int)ldns_buffer_remaining(packet));
373238106Sdes		return 0;
374238106Sdes	}
375238106Sdes	return 1;
376238106Sdes}
377238106Sdes
378238106Sdes#if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG))
379238106Sdes/** print debug ancillary info */
380238106Sdesstatic void p_ancil(const char* str, struct comm_reply* r)
381238106Sdes{
382238106Sdes	if(r->srctype != 4 && r->srctype != 6) {
383238106Sdes		log_info("%s: unknown srctype %d", str, r->srctype);
384238106Sdes		return;
385238106Sdes	}
386238106Sdes	if(r->srctype == 6) {
387238106Sdes		char buf[1024];
388238106Sdes		if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr,
389238106Sdes			buf, (socklen_t)sizeof(buf)) == 0) {
390238106Sdes			strncpy(buf, "(inet_ntop error)", sizeof(buf));
391238106Sdes		}
392238106Sdes		buf[sizeof(buf)-1]=0;
393238106Sdes		log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex);
394238106Sdes	} else if(r->srctype == 4) {
395238106Sdes#ifdef IP_PKTINFO
396238106Sdes		char buf1[1024], buf2[1024];
397238106Sdes		if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr,
398238106Sdes			buf1, (socklen_t)sizeof(buf1)) == 0) {
399238106Sdes			strncpy(buf1, "(inet_ntop error)", sizeof(buf1));
400238106Sdes		}
401238106Sdes		buf1[sizeof(buf1)-1]=0;
402238106Sdes#ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST
403238106Sdes		if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst,
404238106Sdes			buf2, (socklen_t)sizeof(buf2)) == 0) {
405238106Sdes			strncpy(buf2, "(inet_ntop error)", sizeof(buf2));
406238106Sdes		}
407238106Sdes		buf2[sizeof(buf2)-1]=0;
408238106Sdes#else
409238106Sdes		buf2[0]=0;
410238106Sdes#endif
411238106Sdes		log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex,
412238106Sdes			buf1, buf2);
413238106Sdes#elif defined(IP_RECVDSTADDR)
414238106Sdes		char buf1[1024];
415238106Sdes		if(inet_ntop(AF_INET, &r->pktinfo.v4addr,
416238106Sdes			buf1, (socklen_t)sizeof(buf1)) == 0) {
417238106Sdes			strncpy(buf1, "(inet_ntop error)", sizeof(buf1));
418238106Sdes		}
419238106Sdes		buf1[sizeof(buf1)-1]=0;
420238106Sdes		log_info("%s: %s", str, buf1);
421238106Sdes#endif /* IP_PKTINFO or PI_RECVDSTDADDR */
422238106Sdes	}
423238106Sdes}
424238106Sdes#endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */
425238106Sdes
426238106Sdes/** send a UDP reply over specified interface*/
427238106Sdesstatic int
428238106Sdescomm_point_send_udp_msg_if(struct comm_point *c, ldns_buffer* packet,
429238106Sdes	struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r)
430238106Sdes{
431238106Sdes#if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG)
432238106Sdes	ssize_t sent;
433238106Sdes	struct msghdr msg;
434238106Sdes	struct iovec iov[1];
435238106Sdes	char control[256];
436238106Sdes#ifndef S_SPLINT_S
437238106Sdes	struct cmsghdr *cmsg;
438238106Sdes#endif /* S_SPLINT_S */
439238106Sdes
440238106Sdes	log_assert(c->fd != -1);
441238106Sdes#ifdef UNBOUND_DEBUG
442238106Sdes	if(ldns_buffer_remaining(packet) == 0)
443238106Sdes		log_err("error: send empty UDP packet");
444238106Sdes#endif
445238106Sdes	log_assert(addr && addrlen > 0);
446238106Sdes
447238106Sdes	msg.msg_name = addr;
448238106Sdes	msg.msg_namelen = addrlen;
449238106Sdes	iov[0].iov_base = ldns_buffer_begin(packet);
450238106Sdes	iov[0].iov_len = ldns_buffer_remaining(packet);
451238106Sdes	msg.msg_iov = iov;
452238106Sdes	msg.msg_iovlen = 1;
453238106Sdes	msg.msg_control = control;
454238106Sdes#ifndef S_SPLINT_S
455238106Sdes	msg.msg_controllen = sizeof(control);
456238106Sdes#endif /* S_SPLINT_S */
457238106Sdes	msg.msg_flags = 0;
458238106Sdes
459238106Sdes#ifndef S_SPLINT_S
460238106Sdes	cmsg = CMSG_FIRSTHDR(&msg);
461238106Sdes	if(r->srctype == 4) {
462238106Sdes#ifdef IP_PKTINFO
463238106Sdes		msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
464238106Sdes		log_assert(msg.msg_controllen <= sizeof(control));
465238106Sdes		cmsg->cmsg_level = IPPROTO_IP;
466238106Sdes		cmsg->cmsg_type = IP_PKTINFO;
467238106Sdes		memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info,
468238106Sdes			sizeof(struct in_pktinfo));
469238106Sdes		cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
470238106Sdes#elif defined(IP_SENDSRCADDR)
471238106Sdes		msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
472238106Sdes		log_assert(msg.msg_controllen <= sizeof(control));
473238106Sdes		cmsg->cmsg_level = IPPROTO_IP;
474238106Sdes		cmsg->cmsg_type = IP_SENDSRCADDR;
475238106Sdes		memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr,
476238106Sdes			sizeof(struct in_addr));
477238106Sdes		cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
478238106Sdes#else
479238106Sdes		verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR");
480238106Sdes		msg.msg_control = NULL;
481238106Sdes#endif /* IP_PKTINFO or IP_SENDSRCADDR */
482238106Sdes	} else if(r->srctype == 6) {
483238106Sdes		msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
484238106Sdes		log_assert(msg.msg_controllen <= sizeof(control));
485238106Sdes		cmsg->cmsg_level = IPPROTO_IPV6;
486238106Sdes		cmsg->cmsg_type = IPV6_PKTINFO;
487238106Sdes		memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info,
488238106Sdes			sizeof(struct in6_pktinfo));
489238106Sdes		cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
490238106Sdes	} else {
491238106Sdes		/* try to pass all 0 to use default route */
492238106Sdes		msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
493238106Sdes		log_assert(msg.msg_controllen <= sizeof(control));
494238106Sdes		cmsg->cmsg_level = IPPROTO_IPV6;
495238106Sdes		cmsg->cmsg_type = IPV6_PKTINFO;
496238106Sdes		memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo));
497238106Sdes		cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
498238106Sdes	}
499238106Sdes#endif /* S_SPLINT_S */
500238106Sdes	if(verbosity >= VERB_ALGO)
501238106Sdes		p_ancil("send_udp over interface", r);
502238106Sdes	sent = sendmsg(c->fd, &msg, 0);
503238106Sdes	if(sent == -1) {
504238106Sdes		if(!udp_send_errno_needs_log(addr, addrlen))
505238106Sdes			return 0;
506238106Sdes		verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno));
507238106Sdes		log_addr(VERB_OPS, "remote address is",
508238106Sdes			(struct sockaddr_storage*)addr, addrlen);
509238106Sdes		return 0;
510238106Sdes	} else if((size_t)sent != ldns_buffer_remaining(packet)) {
511238106Sdes		log_err("sent %d in place of %d bytes",
512238106Sdes			(int)sent, (int)ldns_buffer_remaining(packet));
513238106Sdes		return 0;
514238106Sdes	}
515238106Sdes	return 1;
516238106Sdes#else
517238106Sdes	(void)c;
518238106Sdes	(void)packet;
519238106Sdes	(void)addr;
520238106Sdes	(void)addrlen;
521238106Sdes	(void)r;
522238106Sdes	log_err("sendmsg: IPV6_PKTINFO not supported");
523238106Sdes	return 0;
524238106Sdes#endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */
525238106Sdes}
526238106Sdes
527238106Sdesvoid
528238106Sdescomm_point_udp_ancil_callback(int fd, short event, void* arg)
529238106Sdes{
530238106Sdes#if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
531238106Sdes	struct comm_reply rep;
532238106Sdes	struct msghdr msg;
533238106Sdes	struct iovec iov[1];
534238106Sdes	ssize_t rcv;
535238106Sdes	char ancil[256];
536238106Sdes	int i;
537238106Sdes#ifndef S_SPLINT_S
538238106Sdes	struct cmsghdr* cmsg;
539238106Sdes#endif /* S_SPLINT_S */
540238106Sdes
541238106Sdes	rep.c = (struct comm_point*)arg;
542238106Sdes	log_assert(rep.c->type == comm_udp);
543238106Sdes
544238106Sdes	if(!(event&EV_READ))
545238106Sdes		return;
546238106Sdes	log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
547238106Sdes	comm_base_now(rep.c->ev->base);
548238106Sdes	for(i=0; i<NUM_UDP_PER_SELECT; i++) {
549238106Sdes		ldns_buffer_clear(rep.c->buffer);
550238106Sdes		rep.addrlen = (socklen_t)sizeof(rep.addr);
551238106Sdes		log_assert(fd != -1);
552238106Sdes		log_assert(ldns_buffer_remaining(rep.c->buffer) > 0);
553238106Sdes		msg.msg_name = &rep.addr;
554238106Sdes		msg.msg_namelen = (socklen_t)sizeof(rep.addr);
555238106Sdes		iov[0].iov_base = ldns_buffer_begin(rep.c->buffer);
556238106Sdes		iov[0].iov_len = ldns_buffer_remaining(rep.c->buffer);
557238106Sdes		msg.msg_iov = iov;
558238106Sdes		msg.msg_iovlen = 1;
559238106Sdes		msg.msg_control = ancil;
560238106Sdes#ifndef S_SPLINT_S
561238106Sdes		msg.msg_controllen = sizeof(ancil);
562238106Sdes#endif /* S_SPLINT_S */
563238106Sdes		msg.msg_flags = 0;
564238106Sdes		rcv = recvmsg(fd, &msg, 0);
565238106Sdes		if(rcv == -1) {
566238106Sdes			if(errno != EAGAIN && errno != EINTR) {
567238106Sdes				log_err("recvmsg failed: %s", strerror(errno));
568238106Sdes			}
569238106Sdes			return;
570238106Sdes		}
571238106Sdes		rep.addrlen = msg.msg_namelen;
572238106Sdes		ldns_buffer_skip(rep.c->buffer, rcv);
573238106Sdes		ldns_buffer_flip(rep.c->buffer);
574238106Sdes		rep.srctype = 0;
575238106Sdes#ifndef S_SPLINT_S
576238106Sdes		for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
577238106Sdes			cmsg = CMSG_NXTHDR(&msg, cmsg)) {
578238106Sdes			if( cmsg->cmsg_level == IPPROTO_IPV6 &&
579238106Sdes				cmsg->cmsg_type == IPV6_PKTINFO) {
580238106Sdes				rep.srctype = 6;
581238106Sdes				memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg),
582238106Sdes					sizeof(struct in6_pktinfo));
583238106Sdes				break;
584238106Sdes#ifdef IP_PKTINFO
585238106Sdes			} else if( cmsg->cmsg_level == IPPROTO_IP &&
586238106Sdes				cmsg->cmsg_type == IP_PKTINFO) {
587238106Sdes				rep.srctype = 4;
588238106Sdes				memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg),
589238106Sdes					sizeof(struct in_pktinfo));
590238106Sdes				break;
591238106Sdes#elif defined(IP_RECVDSTADDR)
592238106Sdes			} else if( cmsg->cmsg_level == IPPROTO_IP &&
593238106Sdes				cmsg->cmsg_type == IP_RECVDSTADDR) {
594238106Sdes				rep.srctype = 4;
595238106Sdes				memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg),
596238106Sdes					sizeof(struct in_addr));
597238106Sdes				break;
598238106Sdes#endif /* IP_PKTINFO or IP_RECVDSTADDR */
599238106Sdes			}
600238106Sdes		}
601238106Sdes		if(verbosity >= VERB_ALGO)
602238106Sdes			p_ancil("receive_udp on interface", &rep);
603238106Sdes#endif /* S_SPLINT_S */
604238106Sdes		fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
605238106Sdes		if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
606238106Sdes			/* send back immediate reply */
607238106Sdes			(void)comm_point_send_udp_msg_if(rep.c, rep.c->buffer,
608238106Sdes				(struct sockaddr*)&rep.addr, rep.addrlen, &rep);
609238106Sdes		}
610238106Sdes		if(rep.c->fd == -1) /* commpoint closed */
611238106Sdes			break;
612238106Sdes	}
613238106Sdes#else
614238106Sdes	(void)fd;
615238106Sdes	(void)event;
616238106Sdes	(void)arg;
617238106Sdes	fatal_exit("recvmsg: No support for IPV6_PKTINFO. "
618238106Sdes		"Please disable interface-automatic");
619238106Sdes#endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */
620238106Sdes}
621238106Sdes
622238106Sdesvoid
623238106Sdescomm_point_udp_callback(int fd, short event, void* arg)
624238106Sdes{
625238106Sdes	struct comm_reply rep;
626238106Sdes	ssize_t rcv;
627238106Sdes	int i;
628238106Sdes
629238106Sdes	rep.c = (struct comm_point*)arg;
630238106Sdes	log_assert(rep.c->type == comm_udp);
631238106Sdes
632238106Sdes	if(!(event&EV_READ))
633238106Sdes		return;
634238106Sdes	log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
635238106Sdes	comm_base_now(rep.c->ev->base);
636238106Sdes	for(i=0; i<NUM_UDP_PER_SELECT; i++) {
637238106Sdes		ldns_buffer_clear(rep.c->buffer);
638238106Sdes		rep.addrlen = (socklen_t)sizeof(rep.addr);
639238106Sdes		log_assert(fd != -1);
640238106Sdes		log_assert(ldns_buffer_remaining(rep.c->buffer) > 0);
641238106Sdes		rcv = recvfrom(fd, (void*)ldns_buffer_begin(rep.c->buffer),
642238106Sdes			ldns_buffer_remaining(rep.c->buffer), 0,
643238106Sdes			(struct sockaddr*)&rep.addr, &rep.addrlen);
644238106Sdes		if(rcv == -1) {
645238106Sdes#ifndef USE_WINSOCK
646238106Sdes			if(errno != EAGAIN && errno != EINTR)
647238106Sdes				log_err("recvfrom %d failed: %s",
648238106Sdes					fd, strerror(errno));
649238106Sdes#else
650238106Sdes			if(WSAGetLastError() != WSAEINPROGRESS &&
651238106Sdes				WSAGetLastError() != WSAECONNRESET &&
652238106Sdes				WSAGetLastError()!= WSAEWOULDBLOCK)
653238106Sdes				log_err("recvfrom failed: %s",
654238106Sdes					wsa_strerror(WSAGetLastError()));
655238106Sdes#endif
656238106Sdes			return;
657238106Sdes		}
658238106Sdes		ldns_buffer_skip(rep.c->buffer, rcv);
659238106Sdes		ldns_buffer_flip(rep.c->buffer);
660238106Sdes		rep.srctype = 0;
661238106Sdes		fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
662238106Sdes		if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
663238106Sdes			/* send back immediate reply */
664238106Sdes			(void)comm_point_send_udp_msg(rep.c, rep.c->buffer,
665238106Sdes				(struct sockaddr*)&rep.addr, rep.addrlen);
666238106Sdes		}
667238106Sdes		if(rep.c->fd != fd) /* commpoint closed to -1 or reused for
668238106Sdes		another UDP port. Note rep.c cannot be reused with TCP fd. */
669238106Sdes			break;
670238106Sdes	}
671238106Sdes}
672238106Sdes
673238106Sdes/** Use a new tcp handler for new query fd, set to read query */
674238106Sdesstatic void
675238106Sdessetup_tcp_handler(struct comm_point* c, int fd)
676238106Sdes{
677238106Sdes	log_assert(c->type == comm_tcp);
678238106Sdes	log_assert(c->fd == -1);
679238106Sdes	ldns_buffer_clear(c->buffer);
680238106Sdes	c->tcp_is_reading = 1;
681238106Sdes	c->tcp_byte_count = 0;
682238106Sdes	comm_point_start_listening(c, fd, TCP_QUERY_TIMEOUT);
683238106Sdes}
684238106Sdes
685238106Sdesvoid comm_base_handle_slow_accept(int ATTR_UNUSED(fd),
686238106Sdes	short ATTR_UNUSED(event), void* arg)
687238106Sdes{
688238106Sdes	struct comm_base* b = (struct comm_base*)arg;
689238106Sdes	/* timeout for the slow accept, re-enable accepts again */
690238106Sdes	if(b->start_accept) {
691238106Sdes		verbose(VERB_ALGO, "wait is over, slow accept disabled");
692238106Sdes		fptr_ok(fptr_whitelist_start_accept(b->start_accept));
693238106Sdes		(*b->start_accept)(b->cb_arg);
694238106Sdes		b->eb->slow_accept_enabled = 0;
695238106Sdes	}
696238106Sdes}
697238106Sdes
698238106Sdesint comm_point_perform_accept(struct comm_point* c,
699238106Sdes	struct sockaddr_storage* addr, socklen_t* addrlen)
700238106Sdes{
701238106Sdes	int new_fd;
702238106Sdes	*addrlen = (socklen_t)sizeof(*addr);
703238106Sdes	new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen);
704238106Sdes	if(new_fd == -1) {
705238106Sdes#ifndef USE_WINSOCK
706238106Sdes		/* EINTR is signal interrupt. others are closed connection. */
707238106Sdes		if(	errno == EINTR || errno == EAGAIN
708238106Sdes#ifdef EWOULDBLOCK
709238106Sdes			|| errno == EWOULDBLOCK
710238106Sdes#endif
711238106Sdes#ifdef ECONNABORTED
712238106Sdes			|| errno == ECONNABORTED
713238106Sdes#endif
714238106Sdes#ifdef EPROTO
715238106Sdes			|| errno == EPROTO
716238106Sdes#endif /* EPROTO */
717238106Sdes			)
718238106Sdes			return -1;
719238106Sdes#if defined(ENFILE) && defined(EMFILE)
720238106Sdes		if(errno == ENFILE || errno == EMFILE) {
721238106Sdes			/* out of file descriptors, likely outside of our
722238106Sdes			 * control. stop accept() calls for some time */
723238106Sdes			if(c->ev->base->stop_accept) {
724238106Sdes				struct comm_base* b = c->ev->base;
725238106Sdes				struct timeval tv;
726238106Sdes				verbose(VERB_ALGO, "out of file descriptors: "
727238106Sdes					"slow accept");
728238106Sdes				b->eb->slow_accept_enabled = 1;
729238106Sdes				fptr_ok(fptr_whitelist_stop_accept(
730238106Sdes					b->stop_accept));
731238106Sdes				(*b->stop_accept)(b->cb_arg);
732238106Sdes				/* set timeout, no mallocs */
733238106Sdes				tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000;
734238106Sdes				tv.tv_usec = NETEVENT_SLOW_ACCEPT_TIME%1000;
735238106Sdes				event_set(&b->eb->slow_accept, -1, EV_TIMEOUT,
736238106Sdes					comm_base_handle_slow_accept, b);
737238106Sdes				if(event_base_set(b->eb->base,
738238106Sdes					&b->eb->slow_accept) != 0) {
739238106Sdes					/* we do not want to log here, because
740238106Sdes					 * that would spam the logfiles.
741238106Sdes					 * error: "event_base_set failed." */
742238106Sdes				}
743238106Sdes				if(event_add(&b->eb->slow_accept, &tv) != 0) {
744238106Sdes					/* we do not want to log here,
745238106Sdes					 * error: "event_add failed." */
746238106Sdes				}
747238106Sdes			}
748238106Sdes			return -1;
749238106Sdes		}
750238106Sdes#endif
751238106Sdes		log_err("accept failed: %s", strerror(errno));
752238106Sdes#else /* USE_WINSOCK */
753238106Sdes		if(WSAGetLastError() == WSAEINPROGRESS ||
754238106Sdes			WSAGetLastError() == WSAECONNRESET)
755238106Sdes			return -1;
756238106Sdes		if(WSAGetLastError() == WSAEWOULDBLOCK) {
757238106Sdes			winsock_tcp_wouldblock(&c->ev->ev, EV_READ);
758238106Sdes			return -1;
759238106Sdes		}
760238106Sdes		log_err("accept failed: %s", wsa_strerror(WSAGetLastError()));
761238106Sdes#endif
762238106Sdes		log_addr(0, "remote address is", addr, *addrlen);
763238106Sdes		return -1;
764238106Sdes	}
765238106Sdes	fd_set_nonblock(new_fd);
766238106Sdes	return new_fd;
767238106Sdes}
768238106Sdes
769238106Sdes#ifdef USE_WINSOCK
770238106Sdesstatic long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp),
771238106Sdes        int ATTR_UNUSED(argi), long argl, long retvalue)
772238106Sdes{
773238106Sdes	verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper,
774238106Sdes		(oper&BIO_CB_RETURN)?"return":"before",
775238106Sdes		(oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"),
776238106Sdes		WSAGetLastError()==WSAEWOULDBLOCK?"wsawb":"");
777238106Sdes	/* on windows, check if previous operation caused EWOULDBLOCK */
778238106Sdes	if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) ||
779238106Sdes		(oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) {
780238106Sdes		if(WSAGetLastError() == WSAEWOULDBLOCK)
781238106Sdes			winsock_tcp_wouldblock((struct event*)
782238106Sdes				BIO_get_callback_arg(b), EV_READ);
783238106Sdes	}
784238106Sdes	if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) ||
785238106Sdes		(oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) {
786238106Sdes		if(WSAGetLastError() == WSAEWOULDBLOCK)
787238106Sdes			winsock_tcp_wouldblock((struct event*)
788238106Sdes				BIO_get_callback_arg(b), EV_WRITE);
789238106Sdes	}
790238106Sdes	/* return original return value */
791238106Sdes	return retvalue;
792238106Sdes}
793238106Sdes
794238106Sdes/** set win bio callbacks for nonblocking operations */
795238106Sdesvoid
796238106Sdescomm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl)
797238106Sdes{
798238106Sdes	SSL* ssl = (SSL*)thessl;
799238106Sdes	/* set them both just in case, but usually they are the same BIO */
800238106Sdes	BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb);
801238106Sdes	BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)&c->ev->ev);
802238106Sdes	BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb);
803238106Sdes	BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)&c->ev->ev);
804238106Sdes}
805238106Sdes#endif
806238106Sdes
807238106Sdesvoid
808238106Sdescomm_point_tcp_accept_callback(int fd, short event, void* arg)
809238106Sdes{
810238106Sdes	struct comm_point* c = (struct comm_point*)arg, *c_hdl;
811238106Sdes	int new_fd;
812238106Sdes	log_assert(c->type == comm_tcp_accept);
813238106Sdes	if(!(event & EV_READ)) {
814238106Sdes		log_info("ignoring tcp accept event %d", (int)event);
815238106Sdes		return;
816238106Sdes	}
817238106Sdes	comm_base_now(c->ev->base);
818238106Sdes	/* find free tcp handler. */
819238106Sdes	if(!c->tcp_free) {
820238106Sdes		log_warn("accepted too many tcp, connections full");
821238106Sdes		return;
822238106Sdes	}
823238106Sdes	/* accept incoming connection. */
824238106Sdes	c_hdl = c->tcp_free;
825238106Sdes	log_assert(fd != -1);
826238106Sdes	new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.addr,
827238106Sdes		&c_hdl->repinfo.addrlen);
828238106Sdes	if(new_fd == -1)
829238106Sdes		return;
830238106Sdes	if(c->ssl) {
831238106Sdes		c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd);
832238106Sdes		if(!c_hdl->ssl) {
833238106Sdes			c_hdl->fd = new_fd;
834238106Sdes			comm_point_close(c_hdl);
835238106Sdes			return;
836238106Sdes		}
837238106Sdes		c_hdl->ssl_shake_state = comm_ssl_shake_read;
838238106Sdes#ifdef USE_WINSOCK
839238106Sdes		comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl);
840238106Sdes#endif
841238106Sdes	}
842238106Sdes
843238106Sdes	/* grab the tcp handler buffers */
844238106Sdes	c->tcp_free = c_hdl->tcp_free;
845238106Sdes	if(!c->tcp_free) {
846238106Sdes		/* stop accepting incoming queries for now. */
847238106Sdes		comm_point_stop_listening(c);
848238106Sdes	}
849238106Sdes	/* addr is dropped. Not needed for tcp reply. */
850238106Sdes	setup_tcp_handler(c_hdl, new_fd);
851238106Sdes}
852238106Sdes
853238106Sdes/** Make tcp handler free for next assignment */
854238106Sdesstatic void
855238106Sdesreclaim_tcp_handler(struct comm_point* c)
856238106Sdes{
857238106Sdes	log_assert(c->type == comm_tcp);
858238106Sdes	if(c->ssl) {
859249141Sdes#ifdef HAVE_SSL
860238106Sdes		SSL_shutdown(c->ssl);
861238106Sdes		SSL_free(c->ssl);
862238106Sdes		c->ssl = NULL;
863249141Sdes#endif
864238106Sdes	}
865238106Sdes	comm_point_close(c);
866238106Sdes	if(c->tcp_parent) {
867238106Sdes		c->tcp_free = c->tcp_parent->tcp_free;
868238106Sdes		c->tcp_parent->tcp_free = c;
869238106Sdes		if(!c->tcp_free) {
870238106Sdes			/* re-enable listening on accept socket */
871238106Sdes			comm_point_start_listening(c->tcp_parent, -1, -1);
872238106Sdes		}
873238106Sdes	}
874238106Sdes}
875238106Sdes
876238106Sdes/** do the callback when writing is done */
877238106Sdesstatic void
878238106Sdestcp_callback_writer(struct comm_point* c)
879238106Sdes{
880238106Sdes	log_assert(c->type == comm_tcp);
881238106Sdes	ldns_buffer_clear(c->buffer);
882238106Sdes	if(c->tcp_do_toggle_rw)
883238106Sdes		c->tcp_is_reading = 1;
884238106Sdes	c->tcp_byte_count = 0;
885238106Sdes	/* switch from listening(write) to listening(read) */
886238106Sdes	comm_point_stop_listening(c);
887238106Sdes	comm_point_start_listening(c, -1, -1);
888238106Sdes}
889238106Sdes
890238106Sdes/** do the callback when reading is done */
891238106Sdesstatic void
892238106Sdestcp_callback_reader(struct comm_point* c)
893238106Sdes{
894238106Sdes	log_assert(c->type == comm_tcp || c->type == comm_local);
895238106Sdes	ldns_buffer_flip(c->buffer);
896238106Sdes	if(c->tcp_do_toggle_rw)
897238106Sdes		c->tcp_is_reading = 0;
898238106Sdes	c->tcp_byte_count = 0;
899238106Sdes	if(c->type == comm_tcp)
900238106Sdes		comm_point_stop_listening(c);
901238106Sdes	fptr_ok(fptr_whitelist_comm_point(c->callback));
902238106Sdes	if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
903238106Sdes		comm_point_start_listening(c, -1, TCP_QUERY_TIMEOUT);
904238106Sdes	}
905238106Sdes}
906238106Sdes
907238106Sdes/** continue ssl handshake */
908249141Sdes#ifdef HAVE_SSL
909238106Sdesstatic int
910238106Sdesssl_handshake(struct comm_point* c)
911238106Sdes{
912238106Sdes	int r;
913238106Sdes	if(c->ssl_shake_state == comm_ssl_shake_hs_read) {
914238106Sdes		/* read condition satisfied back to writing */
915238106Sdes		comm_point_listen_for_rw(c, 1, 1);
916238106Sdes		c->ssl_shake_state = comm_ssl_shake_none;
917238106Sdes		return 1;
918238106Sdes	}
919238106Sdes	if(c->ssl_shake_state == comm_ssl_shake_hs_write) {
920238106Sdes		/* write condition satisfied, back to reading */
921238106Sdes		comm_point_listen_for_rw(c, 1, 0);
922238106Sdes		c->ssl_shake_state = comm_ssl_shake_none;
923238106Sdes		return 1;
924238106Sdes	}
925238106Sdes
926238106Sdes	ERR_clear_error();
927238106Sdes	r = SSL_do_handshake(c->ssl);
928238106Sdes	if(r != 1) {
929238106Sdes		int want = SSL_get_error(c->ssl, r);
930238106Sdes		if(want == SSL_ERROR_WANT_READ) {
931238106Sdes			if(c->ssl_shake_state == comm_ssl_shake_read)
932238106Sdes				return 1;
933238106Sdes			c->ssl_shake_state = comm_ssl_shake_read;
934238106Sdes			comm_point_listen_for_rw(c, 1, 0);
935238106Sdes			return 1;
936238106Sdes		} else if(want == SSL_ERROR_WANT_WRITE) {
937238106Sdes			if(c->ssl_shake_state == comm_ssl_shake_write)
938238106Sdes				return 1;
939238106Sdes			c->ssl_shake_state = comm_ssl_shake_write;
940238106Sdes			comm_point_listen_for_rw(c, 0, 1);
941238106Sdes			return 1;
942238106Sdes		} else if(r == 0) {
943238106Sdes			return 0; /* closed */
944238106Sdes		} else if(want == SSL_ERROR_SYSCALL) {
945238106Sdes			/* SYSCALL and errno==0 means closed uncleanly */
946238106Sdes			if(errno != 0)
947238106Sdes				log_err("SSL_handshake syscall: %s",
948238106Sdes					strerror(errno));
949238106Sdes			return 0;
950238106Sdes		} else {
951238106Sdes			log_crypto_err("ssl handshake failed");
952238106Sdes			log_addr(1, "ssl handshake failed", &c->repinfo.addr,
953238106Sdes				c->repinfo.addrlen);
954238106Sdes			return 0;
955238106Sdes		}
956238106Sdes	}
957238106Sdes	/* this is where peer verification could take place */
958238106Sdes	log_addr(VERB_ALGO, "SSL DNS connection", &c->repinfo.addr,
959238106Sdes		c->repinfo.addrlen);
960238106Sdes
961238106Sdes	/* setup listen rw correctly */
962238106Sdes	if(c->tcp_is_reading) {
963238106Sdes		if(c->ssl_shake_state != comm_ssl_shake_read)
964238106Sdes			comm_point_listen_for_rw(c, 1, 0);
965238106Sdes	} else {
966238106Sdes		comm_point_listen_for_rw(c, 1, 1);
967238106Sdes	}
968238106Sdes	c->ssl_shake_state = comm_ssl_shake_none;
969238106Sdes	return 1;
970238106Sdes}
971249141Sdes#endif /* HAVE_SSL */
972238106Sdes
973238106Sdes/** ssl read callback on TCP */
974238106Sdesstatic int
975238106Sdesssl_handle_read(struct comm_point* c)
976238106Sdes{
977249141Sdes#ifdef HAVE_SSL
978238106Sdes	int r;
979238106Sdes	if(c->ssl_shake_state != comm_ssl_shake_none) {
980238106Sdes		if(!ssl_handshake(c))
981238106Sdes			return 0;
982238106Sdes		if(c->ssl_shake_state != comm_ssl_shake_none)
983238106Sdes			return 1;
984238106Sdes	}
985238106Sdes	if(c->tcp_byte_count < sizeof(uint16_t)) {
986238106Sdes		/* read length bytes */
987238106Sdes		ERR_clear_error();
988238106Sdes		if((r=SSL_read(c->ssl, (void*)ldns_buffer_at(c->buffer,
989238106Sdes			c->tcp_byte_count), (int)(sizeof(uint16_t) -
990238106Sdes			c->tcp_byte_count))) <= 0) {
991238106Sdes			int want = SSL_get_error(c->ssl, r);
992238106Sdes			if(want == SSL_ERROR_ZERO_RETURN) {
993238106Sdes				return 0; /* shutdown, closed */
994238106Sdes			} else if(want == SSL_ERROR_WANT_READ) {
995238106Sdes				return 1; /* read more later */
996238106Sdes			} else if(want == SSL_ERROR_WANT_WRITE) {
997238106Sdes				c->ssl_shake_state = comm_ssl_shake_hs_write;
998238106Sdes				comm_point_listen_for_rw(c, 0, 1);
999238106Sdes				return 1;
1000238106Sdes			} else if(want == SSL_ERROR_SYSCALL) {
1001238106Sdes				if(errno != 0)
1002238106Sdes					log_err("SSL_read syscall: %s",
1003238106Sdes						strerror(errno));
1004238106Sdes				return 0;
1005238106Sdes			}
1006238106Sdes			log_crypto_err("could not SSL_read");
1007238106Sdes			return 0;
1008238106Sdes		}
1009238106Sdes		c->tcp_byte_count += r;
1010238106Sdes		if(c->tcp_byte_count != sizeof(uint16_t))
1011238106Sdes			return 1;
1012238106Sdes		if(ldns_buffer_read_u16_at(c->buffer, 0) >
1013238106Sdes			ldns_buffer_capacity(c->buffer)) {
1014238106Sdes			verbose(VERB_QUERY, "ssl: dropped larger than buffer");
1015238106Sdes			return 0;
1016238106Sdes		}
1017238106Sdes		ldns_buffer_set_limit(c->buffer,
1018238106Sdes			ldns_buffer_read_u16_at(c->buffer, 0));
1019238106Sdes		if(ldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1020238106Sdes			verbose(VERB_QUERY, "ssl: dropped bogus too short.");
1021238106Sdes			return 0;
1022238106Sdes		}
1023238106Sdes		verbose(VERB_ALGO, "Reading ssl tcp query of length %d",
1024238106Sdes			(int)ldns_buffer_limit(c->buffer));
1025238106Sdes	}
1026238106Sdes	log_assert(ldns_buffer_remaining(c->buffer) > 0);
1027238106Sdes	ERR_clear_error();
1028238106Sdes	r = SSL_read(c->ssl, (void*)ldns_buffer_current(c->buffer),
1029238106Sdes		(int)ldns_buffer_remaining(c->buffer));
1030238106Sdes	if(r <= 0) {
1031238106Sdes		int want = SSL_get_error(c->ssl, r);
1032238106Sdes		if(want == SSL_ERROR_ZERO_RETURN) {
1033238106Sdes			return 0; /* shutdown, closed */
1034238106Sdes		} else if(want == SSL_ERROR_WANT_READ) {
1035238106Sdes			return 1; /* read more later */
1036238106Sdes		} else if(want == SSL_ERROR_WANT_WRITE) {
1037238106Sdes			c->ssl_shake_state = comm_ssl_shake_hs_write;
1038238106Sdes			comm_point_listen_for_rw(c, 0, 1);
1039238106Sdes			return 1;
1040238106Sdes		} else if(want == SSL_ERROR_SYSCALL) {
1041238106Sdes			if(errno != 0)
1042238106Sdes				log_err("SSL_read syscall: %s",
1043238106Sdes					strerror(errno));
1044238106Sdes			return 0;
1045238106Sdes		}
1046238106Sdes		log_crypto_err("could not SSL_read");
1047238106Sdes		return 0;
1048238106Sdes	}
1049238106Sdes	ldns_buffer_skip(c->buffer, (ssize_t)r);
1050238106Sdes	if(ldns_buffer_remaining(c->buffer) <= 0) {
1051238106Sdes		tcp_callback_reader(c);
1052238106Sdes	}
1053238106Sdes	return 1;
1054249141Sdes#else
1055249141Sdes	(void)c;
1056249141Sdes	return 0;
1057249141Sdes#endif /* HAVE_SSL */
1058238106Sdes}
1059238106Sdes
1060238106Sdes/** ssl write callback on TCP */
1061238106Sdesstatic int
1062238106Sdesssl_handle_write(struct comm_point* c)
1063238106Sdes{
1064249141Sdes#ifdef HAVE_SSL
1065238106Sdes	int r;
1066238106Sdes	if(c->ssl_shake_state != comm_ssl_shake_none) {
1067238106Sdes		if(!ssl_handshake(c))
1068238106Sdes			return 0;
1069238106Sdes		if(c->ssl_shake_state != comm_ssl_shake_none)
1070238106Sdes			return 1;
1071238106Sdes	}
1072238106Sdes	/* ignore return, if fails we may simply block */
1073238106Sdes	(void)SSL_set_mode(c->ssl, SSL_MODE_ENABLE_PARTIAL_WRITE);
1074238106Sdes	if(c->tcp_byte_count < sizeof(uint16_t)) {
1075238106Sdes		uint16_t len = htons(ldns_buffer_limit(c->buffer));
1076238106Sdes		ERR_clear_error();
1077238106Sdes		r = SSL_write(c->ssl,
1078238106Sdes			(void*)(((uint8_t*)&len)+c->tcp_byte_count),
1079238106Sdes			(int)(sizeof(uint16_t)-c->tcp_byte_count));
1080238106Sdes		if(r <= 0) {
1081238106Sdes			int want = SSL_get_error(c->ssl, r);
1082238106Sdes			if(want == SSL_ERROR_ZERO_RETURN) {
1083238106Sdes				return 0; /* closed */
1084238106Sdes			} else if(want == SSL_ERROR_WANT_READ) {
1085238106Sdes				c->ssl_shake_state = comm_ssl_shake_read;
1086238106Sdes				comm_point_listen_for_rw(c, 1, 0);
1087238106Sdes				return 1; /* wait for read condition */
1088238106Sdes			} else if(want == SSL_ERROR_WANT_WRITE) {
1089238106Sdes				return 1; /* write more later */
1090238106Sdes			} else if(want == SSL_ERROR_SYSCALL) {
1091238106Sdes				if(errno != 0)
1092238106Sdes					log_err("SSL_write syscall: %s",
1093238106Sdes						strerror(errno));
1094238106Sdes				return 0;
1095238106Sdes			}
1096238106Sdes			log_crypto_err("could not SSL_write");
1097238106Sdes			return 0;
1098238106Sdes		}
1099238106Sdes		c->tcp_byte_count += r;
1100238106Sdes		if(c->tcp_byte_count < sizeof(uint16_t))
1101238106Sdes			return 1;
1102238106Sdes		ldns_buffer_set_position(c->buffer, c->tcp_byte_count -
1103238106Sdes			sizeof(uint16_t));
1104238106Sdes		if(ldns_buffer_remaining(c->buffer) == 0) {
1105238106Sdes			tcp_callback_writer(c);
1106238106Sdes			return 1;
1107238106Sdes		}
1108238106Sdes	}
1109238106Sdes	log_assert(ldns_buffer_remaining(c->buffer) > 0);
1110238106Sdes	ERR_clear_error();
1111238106Sdes	r = SSL_write(c->ssl, (void*)ldns_buffer_current(c->buffer),
1112238106Sdes		(int)ldns_buffer_remaining(c->buffer));
1113238106Sdes	if(r <= 0) {
1114238106Sdes		int want = SSL_get_error(c->ssl, r);
1115238106Sdes		if(want == SSL_ERROR_ZERO_RETURN) {
1116238106Sdes			return 0; /* closed */
1117238106Sdes		} else if(want == SSL_ERROR_WANT_READ) {
1118238106Sdes			c->ssl_shake_state = comm_ssl_shake_read;
1119238106Sdes			comm_point_listen_for_rw(c, 1, 0);
1120238106Sdes			return 1; /* wait for read condition */
1121238106Sdes		} else if(want == SSL_ERROR_WANT_WRITE) {
1122238106Sdes			return 1; /* write more later */
1123238106Sdes		} else if(want == SSL_ERROR_SYSCALL) {
1124238106Sdes			if(errno != 0)
1125238106Sdes				log_err("SSL_write syscall: %s",
1126238106Sdes					strerror(errno));
1127238106Sdes			return 0;
1128238106Sdes		}
1129238106Sdes		log_crypto_err("could not SSL_write");
1130238106Sdes		return 0;
1131238106Sdes	}
1132238106Sdes	ldns_buffer_skip(c->buffer, (ssize_t)r);
1133238106Sdes
1134238106Sdes	if(ldns_buffer_remaining(c->buffer) == 0) {
1135238106Sdes		tcp_callback_writer(c);
1136238106Sdes	}
1137238106Sdes	return 1;
1138249141Sdes#else
1139249141Sdes	(void)c;
1140249141Sdes	return 0;
1141249141Sdes#endif /* HAVE_SSL */
1142238106Sdes}
1143238106Sdes
1144238106Sdes/** handle ssl tcp connection with dns contents */
1145238106Sdesstatic int
1146238106Sdesssl_handle_it(struct comm_point* c)
1147238106Sdes{
1148238106Sdes	if(c->tcp_is_reading)
1149238106Sdes		return ssl_handle_read(c);
1150238106Sdes	return ssl_handle_write(c);
1151238106Sdes}
1152238106Sdes
1153238106Sdes/** Handle tcp reading callback.
1154238106Sdes * @param fd: file descriptor of socket.
1155238106Sdes * @param c: comm point to read from into buffer.
1156238106Sdes * @param short_ok: if true, very short packets are OK (for comm_local).
1157238106Sdes * @return: 0 on error
1158238106Sdes */
1159238106Sdesstatic int
1160238106Sdescomm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok)
1161238106Sdes{
1162238106Sdes	ssize_t r;
1163238106Sdes	log_assert(c->type == comm_tcp || c->type == comm_local);
1164238106Sdes	if(c->ssl)
1165238106Sdes		return ssl_handle_it(c);
1166238106Sdes	if(!c->tcp_is_reading)
1167238106Sdes		return 0;
1168238106Sdes
1169238106Sdes	log_assert(fd != -1);
1170238106Sdes	if(c->tcp_byte_count < sizeof(uint16_t)) {
1171238106Sdes		/* read length bytes */
1172238106Sdes		r = recv(fd,(void*)ldns_buffer_at(c->buffer,c->tcp_byte_count),
1173238106Sdes			sizeof(uint16_t)-c->tcp_byte_count, 0);
1174238106Sdes		if(r == 0)
1175238106Sdes			return 0;
1176238106Sdes		else if(r == -1) {
1177238106Sdes#ifndef USE_WINSOCK
1178238106Sdes			if(errno == EINTR || errno == EAGAIN)
1179238106Sdes				return 1;
1180238106Sdes#ifdef ECONNRESET
1181238106Sdes			if(errno == ECONNRESET && verbosity < 2)
1182238106Sdes				return 0; /* silence reset by peer */
1183238106Sdes#endif
1184238106Sdes			log_err("read (in tcp s): %s", strerror(errno));
1185238106Sdes#else /* USE_WINSOCK */
1186238106Sdes			if(WSAGetLastError() == WSAECONNRESET)
1187238106Sdes				return 0;
1188238106Sdes			if(WSAGetLastError() == WSAEINPROGRESS)
1189238106Sdes				return 1;
1190238106Sdes			if(WSAGetLastError() == WSAEWOULDBLOCK) {
1191238106Sdes				winsock_tcp_wouldblock(&c->ev->ev, EV_READ);
1192238106Sdes				return 1;
1193238106Sdes			}
1194238106Sdes			log_err("read (in tcp s): %s",
1195238106Sdes				wsa_strerror(WSAGetLastError()));
1196238106Sdes#endif
1197238106Sdes			log_addr(0, "remote address is", &c->repinfo.addr,
1198238106Sdes				c->repinfo.addrlen);
1199238106Sdes			return 0;
1200238106Sdes		}
1201238106Sdes		c->tcp_byte_count += r;
1202238106Sdes		if(c->tcp_byte_count != sizeof(uint16_t))
1203238106Sdes			return 1;
1204238106Sdes		if(ldns_buffer_read_u16_at(c->buffer, 0) >
1205238106Sdes			ldns_buffer_capacity(c->buffer)) {
1206238106Sdes			verbose(VERB_QUERY, "tcp: dropped larger than buffer");
1207238106Sdes			return 0;
1208238106Sdes		}
1209238106Sdes		ldns_buffer_set_limit(c->buffer,
1210238106Sdes			ldns_buffer_read_u16_at(c->buffer, 0));
1211238106Sdes		if(!short_ok &&
1212238106Sdes			ldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1213238106Sdes			verbose(VERB_QUERY, "tcp: dropped bogus too short.");
1214238106Sdes			return 0;
1215238106Sdes		}
1216238106Sdes		verbose(VERB_ALGO, "Reading tcp query of length %d",
1217238106Sdes			(int)ldns_buffer_limit(c->buffer));
1218238106Sdes	}
1219238106Sdes
1220238106Sdes	log_assert(ldns_buffer_remaining(c->buffer) > 0);
1221238106Sdes	r = recv(fd, (void*)ldns_buffer_current(c->buffer),
1222238106Sdes		ldns_buffer_remaining(c->buffer), 0);
1223238106Sdes	if(r == 0) {
1224238106Sdes		return 0;
1225238106Sdes	} else if(r == -1) {
1226238106Sdes#ifndef USE_WINSOCK
1227238106Sdes		if(errno == EINTR || errno == EAGAIN)
1228238106Sdes			return 1;
1229238106Sdes		log_err("read (in tcp r): %s", strerror(errno));
1230238106Sdes#else /* USE_WINSOCK */
1231238106Sdes		if(WSAGetLastError() == WSAECONNRESET)
1232238106Sdes			return 0;
1233238106Sdes		if(WSAGetLastError() == WSAEINPROGRESS)
1234238106Sdes			return 1;
1235238106Sdes		if(WSAGetLastError() == WSAEWOULDBLOCK) {
1236238106Sdes			winsock_tcp_wouldblock(&c->ev->ev, EV_READ);
1237238106Sdes			return 1;
1238238106Sdes		}
1239238106Sdes		log_err("read (in tcp r): %s",
1240238106Sdes			wsa_strerror(WSAGetLastError()));
1241238106Sdes#endif
1242238106Sdes		log_addr(0, "remote address is", &c->repinfo.addr,
1243238106Sdes			c->repinfo.addrlen);
1244238106Sdes		return 0;
1245238106Sdes	}
1246238106Sdes	ldns_buffer_skip(c->buffer, r);
1247238106Sdes	if(ldns_buffer_remaining(c->buffer) <= 0) {
1248238106Sdes		tcp_callback_reader(c);
1249238106Sdes	}
1250238106Sdes	return 1;
1251238106Sdes}
1252238106Sdes
1253238106Sdes/**
1254238106Sdes * Handle tcp writing callback.
1255238106Sdes * @param fd: file descriptor of socket.
1256238106Sdes * @param c: comm point to write buffer out of.
1257238106Sdes * @return: 0 on error
1258238106Sdes */
1259238106Sdesstatic int
1260238106Sdescomm_point_tcp_handle_write(int fd, struct comm_point* c)
1261238106Sdes{
1262238106Sdes	ssize_t r;
1263238106Sdes	log_assert(c->type == comm_tcp);
1264238106Sdes	if(c->tcp_is_reading && !c->ssl)
1265238106Sdes		return 0;
1266238106Sdes	log_assert(fd != -1);
1267238106Sdes	if(c->tcp_byte_count == 0 && c->tcp_check_nb_connect) {
1268238106Sdes		/* check for pending error from nonblocking connect */
1269238106Sdes		/* from Stevens, unix network programming, vol1, 3rd ed, p450*/
1270238106Sdes		int error = 0;
1271238106Sdes		socklen_t len = (socklen_t)sizeof(error);
1272238106Sdes		if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error,
1273238106Sdes			&len) < 0){
1274238106Sdes#ifndef USE_WINSOCK
1275238106Sdes			error = errno; /* on solaris errno is error */
1276238106Sdes#else /* USE_WINSOCK */
1277238106Sdes			error = WSAGetLastError();
1278238106Sdes#endif
1279238106Sdes		}
1280238106Sdes#ifndef USE_WINSOCK
1281238106Sdes#if defined(EINPROGRESS) && defined(EWOULDBLOCK)
1282238106Sdes		if(error == EINPROGRESS || error == EWOULDBLOCK)
1283238106Sdes			return 1; /* try again later */
1284238106Sdes		else
1285238106Sdes#endif
1286238106Sdes		if(error != 0 && verbosity < 2)
1287238106Sdes			return 0; /* silence lots of chatter in the logs */
1288238106Sdes                else if(error != 0) {
1289238106Sdes			log_err("tcp connect: %s", strerror(error));
1290238106Sdes#else /* USE_WINSOCK */
1291238106Sdes		/* examine error */
1292238106Sdes		if(error == WSAEINPROGRESS)
1293238106Sdes			return 1;
1294238106Sdes		else if(error == WSAEWOULDBLOCK) {
1295238106Sdes			winsock_tcp_wouldblock(&c->ev->ev, EV_WRITE);
1296238106Sdes			return 1;
1297238106Sdes		} else if(error != 0 && verbosity < 2)
1298238106Sdes			return 0;
1299238106Sdes		else if(error != 0) {
1300238106Sdes			log_err("tcp connect: %s", wsa_strerror(error));
1301238106Sdes#endif /* USE_WINSOCK */
1302238106Sdes			log_addr(0, "remote address is", &c->repinfo.addr,
1303238106Sdes				c->repinfo.addrlen);
1304238106Sdes			return 0;
1305238106Sdes		}
1306238106Sdes	}
1307238106Sdes	if(c->ssl)
1308238106Sdes		return ssl_handle_it(c);
1309238106Sdes
1310238106Sdes	if(c->tcp_byte_count < sizeof(uint16_t)) {
1311238106Sdes		uint16_t len = htons(ldns_buffer_limit(c->buffer));
1312238106Sdes#ifdef HAVE_WRITEV
1313238106Sdes		struct iovec iov[2];
1314238106Sdes		iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
1315238106Sdes		iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
1316238106Sdes		iov[1].iov_base = ldns_buffer_begin(c->buffer);
1317238106Sdes		iov[1].iov_len = ldns_buffer_limit(c->buffer);
1318238106Sdes		log_assert(iov[0].iov_len > 0);
1319238106Sdes		log_assert(iov[1].iov_len > 0);
1320238106Sdes		r = writev(fd, iov, 2);
1321238106Sdes#else /* HAVE_WRITEV */
1322238106Sdes		r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count),
1323238106Sdes			sizeof(uint16_t)-c->tcp_byte_count, 0);
1324238106Sdes#endif /* HAVE_WRITEV */
1325238106Sdes		if(r == -1) {
1326238106Sdes#ifndef USE_WINSOCK
1327238106Sdes#ifdef EPIPE
1328238106Sdes                	if(errno == EPIPE && verbosity < 2)
1329238106Sdes                        	return 0; /* silence 'broken pipe' */
1330238106Sdes#endif
1331238106Sdes			if(errno == EINTR || errno == EAGAIN)
1332238106Sdes				return 1;
1333238106Sdes			log_err("tcp writev: %s", strerror(errno));
1334238106Sdes#else
1335238106Sdes			if(WSAGetLastError() == WSAENOTCONN)
1336238106Sdes				return 1;
1337238106Sdes			if(WSAGetLastError() == WSAEINPROGRESS)
1338238106Sdes				return 1;
1339238106Sdes			if(WSAGetLastError() == WSAEWOULDBLOCK) {
1340238106Sdes				winsock_tcp_wouldblock(&c->ev->ev, EV_WRITE);
1341238106Sdes				return 1;
1342238106Sdes			}
1343238106Sdes			log_err("tcp send s: %s",
1344238106Sdes				wsa_strerror(WSAGetLastError()));
1345238106Sdes#endif
1346238106Sdes			log_addr(0, "remote address is", &c->repinfo.addr,
1347238106Sdes				c->repinfo.addrlen);
1348238106Sdes			return 0;
1349238106Sdes		}
1350238106Sdes		c->tcp_byte_count += r;
1351238106Sdes		if(c->tcp_byte_count < sizeof(uint16_t))
1352238106Sdes			return 1;
1353238106Sdes		ldns_buffer_set_position(c->buffer, c->tcp_byte_count -
1354238106Sdes			sizeof(uint16_t));
1355238106Sdes		if(ldns_buffer_remaining(c->buffer) == 0) {
1356238106Sdes			tcp_callback_writer(c);
1357238106Sdes			return 1;
1358238106Sdes		}
1359238106Sdes	}
1360238106Sdes	log_assert(ldns_buffer_remaining(c->buffer) > 0);
1361238106Sdes	r = send(fd, (void*)ldns_buffer_current(c->buffer),
1362238106Sdes		ldns_buffer_remaining(c->buffer), 0);
1363238106Sdes	if(r == -1) {
1364238106Sdes#ifndef USE_WINSOCK
1365238106Sdes		if(errno == EINTR || errno == EAGAIN)
1366238106Sdes			return 1;
1367238106Sdes		log_err("tcp send r: %s", strerror(errno));
1368238106Sdes#else
1369238106Sdes		if(WSAGetLastError() == WSAEINPROGRESS)
1370238106Sdes			return 1;
1371238106Sdes		if(WSAGetLastError() == WSAEWOULDBLOCK) {
1372238106Sdes			winsock_tcp_wouldblock(&c->ev->ev, EV_WRITE);
1373238106Sdes			return 1;
1374238106Sdes		}
1375238106Sdes		log_err("tcp send r: %s",
1376238106Sdes			wsa_strerror(WSAGetLastError()));
1377238106Sdes#endif
1378238106Sdes		log_addr(0, "remote address is", &c->repinfo.addr,
1379238106Sdes			c->repinfo.addrlen);
1380238106Sdes		return 0;
1381238106Sdes	}
1382238106Sdes	ldns_buffer_skip(c->buffer, r);
1383238106Sdes
1384238106Sdes	if(ldns_buffer_remaining(c->buffer) == 0) {
1385238106Sdes		tcp_callback_writer(c);
1386238106Sdes	}
1387238106Sdes
1388238106Sdes	return 1;
1389238106Sdes}
1390238106Sdes
1391238106Sdesvoid
1392238106Sdescomm_point_tcp_handle_callback(int fd, short event, void* arg)
1393238106Sdes{
1394238106Sdes	struct comm_point* c = (struct comm_point*)arg;
1395238106Sdes	log_assert(c->type == comm_tcp);
1396238106Sdes	comm_base_now(c->ev->base);
1397238106Sdes
1398238106Sdes	if(event&EV_READ) {
1399238106Sdes		if(!comm_point_tcp_handle_read(fd, c, 0)) {
1400238106Sdes			reclaim_tcp_handler(c);
1401238106Sdes			if(!c->tcp_do_close) {
1402238106Sdes				fptr_ok(fptr_whitelist_comm_point(
1403238106Sdes					c->callback));
1404238106Sdes				(void)(*c->callback)(c, c->cb_arg,
1405238106Sdes					NETEVENT_CLOSED, NULL);
1406238106Sdes			}
1407238106Sdes		}
1408238106Sdes		return;
1409238106Sdes	}
1410238106Sdes	if(event&EV_WRITE) {
1411238106Sdes		if(!comm_point_tcp_handle_write(fd, c)) {
1412238106Sdes			reclaim_tcp_handler(c);
1413238106Sdes			if(!c->tcp_do_close) {
1414238106Sdes				fptr_ok(fptr_whitelist_comm_point(
1415238106Sdes					c->callback));
1416238106Sdes				(void)(*c->callback)(c, c->cb_arg,
1417238106Sdes					NETEVENT_CLOSED, NULL);
1418238106Sdes			}
1419238106Sdes		}
1420238106Sdes		return;
1421238106Sdes	}
1422238106Sdes	if(event&EV_TIMEOUT) {
1423238106Sdes		verbose(VERB_QUERY, "tcp took too long, dropped");
1424238106Sdes		reclaim_tcp_handler(c);
1425238106Sdes		if(!c->tcp_do_close) {
1426238106Sdes			fptr_ok(fptr_whitelist_comm_point(c->callback));
1427238106Sdes			(void)(*c->callback)(c, c->cb_arg,
1428238106Sdes				NETEVENT_TIMEOUT, NULL);
1429238106Sdes		}
1430238106Sdes		return;
1431238106Sdes	}
1432238106Sdes	log_err("Ignored event %d for tcphdl.", event);
1433238106Sdes}
1434238106Sdes
1435238106Sdesvoid comm_point_local_handle_callback(int fd, short event, void* arg)
1436238106Sdes{
1437238106Sdes	struct comm_point* c = (struct comm_point*)arg;
1438238106Sdes	log_assert(c->type == comm_local);
1439238106Sdes	comm_base_now(c->ev->base);
1440238106Sdes
1441238106Sdes	if(event&EV_READ) {
1442238106Sdes		if(!comm_point_tcp_handle_read(fd, c, 1)) {
1443238106Sdes			fptr_ok(fptr_whitelist_comm_point(c->callback));
1444238106Sdes			(void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED,
1445238106Sdes				NULL);
1446238106Sdes		}
1447238106Sdes		return;
1448238106Sdes	}
1449238106Sdes	log_err("Ignored event %d for localhdl.", event);
1450238106Sdes}
1451238106Sdes
1452238106Sdesvoid comm_point_raw_handle_callback(int ATTR_UNUSED(fd),
1453238106Sdes	short event, void* arg)
1454238106Sdes{
1455238106Sdes	struct comm_point* c = (struct comm_point*)arg;
1456238106Sdes	int err = NETEVENT_NOERROR;
1457238106Sdes	log_assert(c->type == comm_raw);
1458238106Sdes	comm_base_now(c->ev->base);
1459238106Sdes
1460238106Sdes	if(event&EV_TIMEOUT)
1461238106Sdes		err = NETEVENT_TIMEOUT;
1462238106Sdes	fptr_ok(fptr_whitelist_comm_point_raw(c->callback));
1463238106Sdes	(void)(*c->callback)(c, c->cb_arg, err, NULL);
1464238106Sdes}
1465238106Sdes
1466238106Sdesstruct comm_point*
1467238106Sdescomm_point_create_udp(struct comm_base *base, int fd, ldns_buffer* buffer,
1468238106Sdes	comm_point_callback_t* callback, void* callback_arg)
1469238106Sdes{
1470238106Sdes	struct comm_point* c = (struct comm_point*)calloc(1,
1471238106Sdes		sizeof(struct comm_point));
1472238106Sdes	short evbits;
1473238106Sdes	if(!c)
1474238106Sdes		return NULL;
1475238106Sdes	c->ev = (struct internal_event*)calloc(1,
1476238106Sdes		sizeof(struct internal_event));
1477238106Sdes	if(!c->ev) {
1478238106Sdes		free(c);
1479238106Sdes		return NULL;
1480238106Sdes	}
1481238106Sdes	c->ev->base = base;
1482238106Sdes	c->fd = fd;
1483238106Sdes	c->buffer = buffer;
1484238106Sdes	c->timeout = NULL;
1485238106Sdes	c->tcp_is_reading = 0;
1486238106Sdes	c->tcp_byte_count = 0;
1487238106Sdes	c->tcp_parent = NULL;
1488238106Sdes	c->max_tcp_count = 0;
1489238106Sdes	c->tcp_handlers = NULL;
1490238106Sdes	c->tcp_free = NULL;
1491238106Sdes	c->type = comm_udp;
1492238106Sdes	c->tcp_do_close = 0;
1493238106Sdes	c->do_not_close = 0;
1494238106Sdes	c->tcp_do_toggle_rw = 0;
1495238106Sdes	c->tcp_check_nb_connect = 0;
1496238106Sdes	c->inuse = 0;
1497238106Sdes	c->callback = callback;
1498238106Sdes	c->cb_arg = callback_arg;
1499238106Sdes	evbits = EV_READ | EV_PERSIST;
1500238106Sdes	/* libevent stuff */
1501238106Sdes	event_set(&c->ev->ev, c->fd, evbits, comm_point_udp_callback, c);
1502238106Sdes	if(event_base_set(base->eb->base, &c->ev->ev) != 0) {
1503238106Sdes		log_err("could not baseset udp event");
1504238106Sdes		comm_point_delete(c);
1505238106Sdes		return NULL;
1506238106Sdes	}
1507238106Sdes	if(fd!=-1 && event_add(&c->ev->ev, c->timeout) != 0 ) {
1508238106Sdes		log_err("could not add udp event");
1509238106Sdes		comm_point_delete(c);
1510238106Sdes		return NULL;
1511238106Sdes	}
1512238106Sdes	return c;
1513238106Sdes}
1514238106Sdes
1515238106Sdesstruct comm_point*
1516238106Sdescomm_point_create_udp_ancil(struct comm_base *base, int fd,
1517238106Sdes	ldns_buffer* buffer,
1518238106Sdes	comm_point_callback_t* callback, void* callback_arg)
1519238106Sdes{
1520238106Sdes	struct comm_point* c = (struct comm_point*)calloc(1,
1521238106Sdes		sizeof(struct comm_point));
1522238106Sdes	short evbits;
1523238106Sdes	if(!c)
1524238106Sdes		return NULL;
1525238106Sdes	c->ev = (struct internal_event*)calloc(1,
1526238106Sdes		sizeof(struct internal_event));
1527238106Sdes	if(!c->ev) {
1528238106Sdes		free(c);
1529238106Sdes		return NULL;
1530238106Sdes	}
1531238106Sdes	c->ev->base = base;
1532238106Sdes	c->fd = fd;
1533238106Sdes	c->buffer = buffer;
1534238106Sdes	c->timeout = NULL;
1535238106Sdes	c->tcp_is_reading = 0;
1536238106Sdes	c->tcp_byte_count = 0;
1537238106Sdes	c->tcp_parent = NULL;
1538238106Sdes	c->max_tcp_count = 0;
1539238106Sdes	c->tcp_handlers = NULL;
1540238106Sdes	c->tcp_free = NULL;
1541238106Sdes	c->type = comm_udp;
1542238106Sdes	c->tcp_do_close = 0;
1543238106Sdes	c->do_not_close = 0;
1544238106Sdes	c->inuse = 0;
1545238106Sdes	c->tcp_do_toggle_rw = 0;
1546238106Sdes	c->tcp_check_nb_connect = 0;
1547238106Sdes	c->callback = callback;
1548238106Sdes	c->cb_arg = callback_arg;
1549238106Sdes	evbits = EV_READ | EV_PERSIST;
1550238106Sdes	/* libevent stuff */
1551238106Sdes	event_set(&c->ev->ev, c->fd, evbits, comm_point_udp_ancil_callback, c);
1552238106Sdes	if(event_base_set(base->eb->base, &c->ev->ev) != 0) {
1553238106Sdes		log_err("could not baseset udp event");
1554238106Sdes		comm_point_delete(c);
1555238106Sdes		return NULL;
1556238106Sdes	}
1557238106Sdes	if(fd!=-1 && event_add(&c->ev->ev, c->timeout) != 0 ) {
1558238106Sdes		log_err("could not add udp event");
1559238106Sdes		comm_point_delete(c);
1560238106Sdes		return NULL;
1561238106Sdes	}
1562238106Sdes	return c;
1563238106Sdes}
1564238106Sdes
1565238106Sdesstatic struct comm_point*
1566238106Sdescomm_point_create_tcp_handler(struct comm_base *base,
1567238106Sdes	struct comm_point* parent, size_t bufsize,
1568238106Sdes        comm_point_callback_t* callback, void* callback_arg)
1569238106Sdes{
1570238106Sdes	struct comm_point* c = (struct comm_point*)calloc(1,
1571238106Sdes		sizeof(struct comm_point));
1572238106Sdes	short evbits;
1573238106Sdes	if(!c)
1574238106Sdes		return NULL;
1575238106Sdes	c->ev = (struct internal_event*)calloc(1,
1576238106Sdes		sizeof(struct internal_event));
1577238106Sdes	if(!c->ev) {
1578238106Sdes		free(c);
1579238106Sdes		return NULL;
1580238106Sdes	}
1581238106Sdes	c->ev->base = base;
1582238106Sdes	c->fd = -1;
1583238106Sdes	c->buffer = ldns_buffer_new(bufsize);
1584238106Sdes	if(!c->buffer) {
1585238106Sdes		free(c->ev);
1586238106Sdes		free(c);
1587238106Sdes		return NULL;
1588238106Sdes	}
1589238106Sdes	c->timeout = (struct timeval*)malloc(sizeof(struct timeval));
1590238106Sdes	if(!c->timeout) {
1591238106Sdes		ldns_buffer_free(c->buffer);
1592238106Sdes		free(c->ev);
1593238106Sdes		free(c);
1594238106Sdes		return NULL;
1595238106Sdes	}
1596238106Sdes	c->tcp_is_reading = 0;
1597238106Sdes	c->tcp_byte_count = 0;
1598238106Sdes	c->tcp_parent = parent;
1599238106Sdes	c->max_tcp_count = 0;
1600238106Sdes	c->tcp_handlers = NULL;
1601238106Sdes	c->tcp_free = NULL;
1602238106Sdes	c->type = comm_tcp;
1603238106Sdes	c->tcp_do_close = 0;
1604238106Sdes	c->do_not_close = 0;
1605238106Sdes	c->tcp_do_toggle_rw = 1;
1606238106Sdes	c->tcp_check_nb_connect = 0;
1607238106Sdes	c->repinfo.c = c;
1608238106Sdes	c->callback = callback;
1609238106Sdes	c->cb_arg = callback_arg;
1610238106Sdes	/* add to parent free list */
1611238106Sdes	c->tcp_free = parent->tcp_free;
1612238106Sdes	parent->tcp_free = c;
1613238106Sdes	/* libevent stuff */
1614238106Sdes	evbits = EV_PERSIST | EV_READ | EV_TIMEOUT;
1615238106Sdes	event_set(&c->ev->ev, c->fd, evbits, comm_point_tcp_handle_callback, c);
1616238106Sdes	if(event_base_set(base->eb->base, &c->ev->ev) != 0)
1617238106Sdes	{
1618238106Sdes		log_err("could not basetset tcphdl event");
1619238106Sdes		parent->tcp_free = c->tcp_free;
1620238106Sdes		free(c->ev);
1621238106Sdes		free(c);
1622238106Sdes		return NULL;
1623238106Sdes	}
1624238106Sdes	return c;
1625238106Sdes}
1626238106Sdes
1627238106Sdesstruct comm_point*
1628238106Sdescomm_point_create_tcp(struct comm_base *base, int fd, int num, size_t bufsize,
1629238106Sdes        comm_point_callback_t* callback, void* callback_arg)
1630238106Sdes{
1631238106Sdes	struct comm_point* c = (struct comm_point*)calloc(1,
1632238106Sdes		sizeof(struct comm_point));
1633238106Sdes	short evbits;
1634238106Sdes	int i;
1635238106Sdes	/* first allocate the TCP accept listener */
1636238106Sdes	if(!c)
1637238106Sdes		return NULL;
1638238106Sdes	c->ev = (struct internal_event*)calloc(1,
1639238106Sdes		sizeof(struct internal_event));
1640238106Sdes	if(!c->ev) {
1641238106Sdes		free(c);
1642238106Sdes		return NULL;
1643238106Sdes	}
1644238106Sdes	c->ev->base = base;
1645238106Sdes	c->fd = fd;
1646238106Sdes	c->buffer = NULL;
1647238106Sdes	c->timeout = NULL;
1648238106Sdes	c->tcp_is_reading = 0;
1649238106Sdes	c->tcp_byte_count = 0;
1650238106Sdes	c->tcp_parent = NULL;
1651238106Sdes	c->max_tcp_count = num;
1652238106Sdes	c->tcp_handlers = (struct comm_point**)calloc((size_t)num,
1653238106Sdes		sizeof(struct comm_point*));
1654238106Sdes	if(!c->tcp_handlers) {
1655238106Sdes		free(c->ev);
1656238106Sdes		free(c);
1657238106Sdes		return NULL;
1658238106Sdes	}
1659238106Sdes	c->tcp_free = NULL;
1660238106Sdes	c->type = comm_tcp_accept;
1661238106Sdes	c->tcp_do_close = 0;
1662238106Sdes	c->do_not_close = 0;
1663238106Sdes	c->tcp_do_toggle_rw = 0;
1664238106Sdes	c->tcp_check_nb_connect = 0;
1665238106Sdes	c->callback = NULL;
1666238106Sdes	c->cb_arg = NULL;
1667238106Sdes	evbits = EV_READ | EV_PERSIST;
1668238106Sdes	/* libevent stuff */
1669238106Sdes	event_set(&c->ev->ev, c->fd, evbits, comm_point_tcp_accept_callback, c);
1670238106Sdes	if(event_base_set(base->eb->base, &c->ev->ev) != 0 ||
1671238106Sdes		event_add(&c->ev->ev, c->timeout) != 0 )
1672238106Sdes	{
1673238106Sdes		log_err("could not add tcpacc event");
1674238106Sdes		comm_point_delete(c);
1675238106Sdes		return NULL;
1676238106Sdes	}
1677238106Sdes
1678238106Sdes	/* now prealloc the tcp handlers */
1679238106Sdes	for(i=0; i<num; i++) {
1680238106Sdes		c->tcp_handlers[i] = comm_point_create_tcp_handler(base,
1681238106Sdes			c, bufsize, callback, callback_arg);
1682238106Sdes		if(!c->tcp_handlers[i]) {
1683238106Sdes			comm_point_delete(c);
1684238106Sdes			return NULL;
1685238106Sdes		}
1686238106Sdes	}
1687238106Sdes
1688238106Sdes	return c;
1689238106Sdes}
1690238106Sdes
1691238106Sdesstruct comm_point*
1692238106Sdescomm_point_create_tcp_out(struct comm_base *base, size_t bufsize,
1693238106Sdes        comm_point_callback_t* callback, void* callback_arg)
1694238106Sdes{
1695238106Sdes	struct comm_point* c = (struct comm_point*)calloc(1,
1696238106Sdes		sizeof(struct comm_point));
1697238106Sdes	short evbits;
1698238106Sdes	if(!c)
1699238106Sdes		return NULL;
1700238106Sdes	c->ev = (struct internal_event*)calloc(1,
1701238106Sdes		sizeof(struct internal_event));
1702238106Sdes	if(!c->ev) {
1703238106Sdes		free(c);
1704238106Sdes		return NULL;
1705238106Sdes	}
1706238106Sdes	c->ev->base = base;
1707238106Sdes	c->fd = -1;
1708238106Sdes	c->buffer = ldns_buffer_new(bufsize);
1709238106Sdes	if(!c->buffer) {
1710238106Sdes		free(c->ev);
1711238106Sdes		free(c);
1712238106Sdes		return NULL;
1713238106Sdes	}
1714238106Sdes	c->timeout = NULL;
1715238106Sdes	c->tcp_is_reading = 0;
1716238106Sdes	c->tcp_byte_count = 0;
1717238106Sdes	c->tcp_parent = NULL;
1718238106Sdes	c->max_tcp_count = 0;
1719238106Sdes	c->tcp_handlers = NULL;
1720238106Sdes	c->tcp_free = NULL;
1721238106Sdes	c->type = comm_tcp;
1722238106Sdes	c->tcp_do_close = 0;
1723238106Sdes	c->do_not_close = 0;
1724238106Sdes	c->tcp_do_toggle_rw = 1;
1725238106Sdes	c->tcp_check_nb_connect = 1;
1726238106Sdes	c->repinfo.c = c;
1727238106Sdes	c->callback = callback;
1728238106Sdes	c->cb_arg = callback_arg;
1729238106Sdes	evbits = EV_PERSIST | EV_WRITE;
1730238106Sdes	event_set(&c->ev->ev, c->fd, evbits, comm_point_tcp_handle_callback, c);
1731238106Sdes	if(event_base_set(base->eb->base, &c->ev->ev) != 0)
1732238106Sdes	{
1733238106Sdes		log_err("could not basetset tcpout event");
1734238106Sdes		ldns_buffer_free(c->buffer);
1735238106Sdes		free(c->ev);
1736238106Sdes		free(c);
1737238106Sdes		return NULL;
1738238106Sdes	}
1739238106Sdes
1740238106Sdes	return c;
1741238106Sdes}
1742238106Sdes
1743238106Sdesstruct comm_point*
1744238106Sdescomm_point_create_local(struct comm_base *base, int fd, size_t bufsize,
1745238106Sdes        comm_point_callback_t* callback, void* callback_arg)
1746238106Sdes{
1747238106Sdes	struct comm_point* c = (struct comm_point*)calloc(1,
1748238106Sdes		sizeof(struct comm_point));
1749238106Sdes	short evbits;
1750238106Sdes	if(!c)
1751238106Sdes		return NULL;
1752238106Sdes	c->ev = (struct internal_event*)calloc(1,
1753238106Sdes		sizeof(struct internal_event));
1754238106Sdes	if(!c->ev) {
1755238106Sdes		free(c);
1756238106Sdes		return NULL;
1757238106Sdes	}
1758238106Sdes	c->ev->base = base;
1759238106Sdes	c->fd = fd;
1760238106Sdes	c->buffer = ldns_buffer_new(bufsize);
1761238106Sdes	if(!c->buffer) {
1762238106Sdes		free(c->ev);
1763238106Sdes		free(c);
1764238106Sdes		return NULL;
1765238106Sdes	}
1766238106Sdes	c->timeout = NULL;
1767238106Sdes	c->tcp_is_reading = 1;
1768238106Sdes	c->tcp_byte_count = 0;
1769238106Sdes	c->tcp_parent = NULL;
1770238106Sdes	c->max_tcp_count = 0;
1771238106Sdes	c->tcp_handlers = NULL;
1772238106Sdes	c->tcp_free = NULL;
1773238106Sdes	c->type = comm_local;
1774238106Sdes	c->tcp_do_close = 0;
1775238106Sdes	c->do_not_close = 1;
1776238106Sdes	c->tcp_do_toggle_rw = 0;
1777238106Sdes	c->tcp_check_nb_connect = 0;
1778238106Sdes	c->callback = callback;
1779238106Sdes	c->cb_arg = callback_arg;
1780238106Sdes	/* libevent stuff */
1781238106Sdes	evbits = EV_PERSIST | EV_READ;
1782238106Sdes	event_set(&c->ev->ev, c->fd, evbits, comm_point_local_handle_callback,
1783238106Sdes		c);
1784238106Sdes	if(event_base_set(base->eb->base, &c->ev->ev) != 0 ||
1785238106Sdes		event_add(&c->ev->ev, c->timeout) != 0 )
1786238106Sdes	{
1787238106Sdes		log_err("could not add localhdl event");
1788238106Sdes		free(c->ev);
1789238106Sdes		free(c);
1790238106Sdes		return NULL;
1791238106Sdes	}
1792238106Sdes	return c;
1793238106Sdes}
1794238106Sdes
1795238106Sdesstruct comm_point*
1796238106Sdescomm_point_create_raw(struct comm_base* base, int fd, int writing,
1797238106Sdes	comm_point_callback_t* callback, void* callback_arg)
1798238106Sdes{
1799238106Sdes	struct comm_point* c = (struct comm_point*)calloc(1,
1800238106Sdes		sizeof(struct comm_point));
1801238106Sdes	short evbits;
1802238106Sdes	if(!c)
1803238106Sdes		return NULL;
1804238106Sdes	c->ev = (struct internal_event*)calloc(1,
1805238106Sdes		sizeof(struct internal_event));
1806238106Sdes	if(!c->ev) {
1807238106Sdes		free(c);
1808238106Sdes		return NULL;
1809238106Sdes	}
1810238106Sdes	c->ev->base = base;
1811238106Sdes	c->fd = fd;
1812238106Sdes	c->buffer = NULL;
1813238106Sdes	c->timeout = NULL;
1814238106Sdes	c->tcp_is_reading = 0;
1815238106Sdes	c->tcp_byte_count = 0;
1816238106Sdes	c->tcp_parent = NULL;
1817238106Sdes	c->max_tcp_count = 0;
1818238106Sdes	c->tcp_handlers = NULL;
1819238106Sdes	c->tcp_free = NULL;
1820238106Sdes	c->type = comm_raw;
1821238106Sdes	c->tcp_do_close = 0;
1822238106Sdes	c->do_not_close = 1;
1823238106Sdes	c->tcp_do_toggle_rw = 0;
1824238106Sdes	c->tcp_check_nb_connect = 0;
1825238106Sdes	c->callback = callback;
1826238106Sdes	c->cb_arg = callback_arg;
1827238106Sdes	/* libevent stuff */
1828238106Sdes	if(writing)
1829238106Sdes		evbits = EV_PERSIST | EV_WRITE;
1830238106Sdes	else 	evbits = EV_PERSIST | EV_READ;
1831238106Sdes	event_set(&c->ev->ev, c->fd, evbits, comm_point_raw_handle_callback,
1832238106Sdes		c);
1833238106Sdes	if(event_base_set(base->eb->base, &c->ev->ev) != 0 ||
1834238106Sdes		event_add(&c->ev->ev, c->timeout) != 0 )
1835238106Sdes	{
1836238106Sdes		log_err("could not add rawhdl event");
1837238106Sdes		free(c->ev);
1838238106Sdes		free(c);
1839238106Sdes		return NULL;
1840238106Sdes	}
1841238106Sdes	return c;
1842238106Sdes}
1843238106Sdes
1844238106Sdesvoid
1845238106Sdescomm_point_close(struct comm_point* c)
1846238106Sdes{
1847238106Sdes	if(!c)
1848238106Sdes		return;
1849238106Sdes	if(c->fd != -1)
1850238106Sdes		if(event_del(&c->ev->ev) != 0) {
1851238106Sdes			log_err("could not event_del on close");
1852238106Sdes		}
1853238106Sdes	/* close fd after removing from event lists, or epoll.. is messed up */
1854238106Sdes	if(c->fd != -1 && !c->do_not_close) {
1855238106Sdes		verbose(VERB_ALGO, "close fd %d", c->fd);
1856238106Sdes#ifndef USE_WINSOCK
1857238106Sdes		close(c->fd);
1858238106Sdes#else
1859238106Sdes		closesocket(c->fd);
1860238106Sdes#endif
1861238106Sdes	}
1862238106Sdes	c->fd = -1;
1863238106Sdes}
1864238106Sdes
1865238106Sdesvoid
1866238106Sdescomm_point_delete(struct comm_point* c)
1867238106Sdes{
1868238106Sdes	if(!c)
1869238106Sdes		return;
1870238106Sdes	if(c->type == comm_tcp && c->ssl) {
1871249141Sdes#ifdef HAVE_SSL
1872238106Sdes		SSL_shutdown(c->ssl);
1873238106Sdes		SSL_free(c->ssl);
1874249141Sdes#endif
1875238106Sdes	}
1876238106Sdes	comm_point_close(c);
1877238106Sdes	if(c->tcp_handlers) {
1878238106Sdes		int i;
1879238106Sdes		for(i=0; i<c->max_tcp_count; i++)
1880238106Sdes			comm_point_delete(c->tcp_handlers[i]);
1881238106Sdes		free(c->tcp_handlers);
1882238106Sdes	}
1883238106Sdes	free(c->timeout);
1884238106Sdes	if(c->type == comm_tcp || c->type == comm_local)
1885238106Sdes		ldns_buffer_free(c->buffer);
1886238106Sdes	free(c->ev);
1887238106Sdes	free(c);
1888238106Sdes}
1889238106Sdes
1890238106Sdesvoid
1891238106Sdescomm_point_send_reply(struct comm_reply *repinfo)
1892238106Sdes{
1893238106Sdes	log_assert(repinfo && repinfo->c);
1894238106Sdes	if(repinfo->c->type == comm_udp) {
1895238106Sdes		if(repinfo->srctype)
1896238106Sdes			comm_point_send_udp_msg_if(repinfo->c,
1897238106Sdes			repinfo->c->buffer, (struct sockaddr*)&repinfo->addr,
1898238106Sdes			repinfo->addrlen, repinfo);
1899238106Sdes		else
1900238106Sdes			comm_point_send_udp_msg(repinfo->c, repinfo->c->buffer,
1901238106Sdes			(struct sockaddr*)&repinfo->addr, repinfo->addrlen);
1902238106Sdes	} else {
1903238106Sdes		comm_point_start_listening(repinfo->c, -1, TCP_QUERY_TIMEOUT);
1904238106Sdes	}
1905238106Sdes}
1906238106Sdes
1907238106Sdesvoid
1908238106Sdescomm_point_drop_reply(struct comm_reply* repinfo)
1909238106Sdes{
1910238106Sdes	if(!repinfo)
1911238106Sdes		return;
1912238106Sdes	log_assert(repinfo && repinfo->c);
1913238106Sdes	log_assert(repinfo->c->type != comm_tcp_accept);
1914238106Sdes	if(repinfo->c->type == comm_udp)
1915238106Sdes		return;
1916238106Sdes	reclaim_tcp_handler(repinfo->c);
1917238106Sdes}
1918238106Sdes
1919238106Sdesvoid
1920238106Sdescomm_point_stop_listening(struct comm_point* c)
1921238106Sdes{
1922238106Sdes	verbose(VERB_ALGO, "comm point stop listening %d", c->fd);
1923238106Sdes	if(event_del(&c->ev->ev) != 0) {
1924238106Sdes		log_err("event_del error to stoplisten");
1925238106Sdes	}
1926238106Sdes}
1927238106Sdes
1928238106Sdesvoid
1929238106Sdescomm_point_start_listening(struct comm_point* c, int newfd, int sec)
1930238106Sdes{
1931238106Sdes	verbose(VERB_ALGO, "comm point start listening %d",
1932238106Sdes		c->fd==-1?newfd:c->fd);
1933238106Sdes	if(c->type == comm_tcp_accept && !c->tcp_free) {
1934238106Sdes		/* no use to start listening no free slots. */
1935238106Sdes		return;
1936238106Sdes	}
1937238106Sdes	if(sec != -1 && sec != 0) {
1938238106Sdes		if(!c->timeout) {
1939238106Sdes			c->timeout = (struct timeval*)malloc(sizeof(
1940238106Sdes				struct timeval));
1941238106Sdes			if(!c->timeout) {
1942238106Sdes				log_err("cpsl: malloc failed. No net read.");
1943238106Sdes				return;
1944238106Sdes			}
1945238106Sdes		}
1946238106Sdes		c->ev->ev.ev_events |= EV_TIMEOUT;
1947238106Sdes#ifndef S_SPLINT_S /* splint fails on struct timeval. */
1948238106Sdes		c->timeout->tv_sec = sec;
1949238106Sdes		c->timeout->tv_usec = 0;
1950238106Sdes#endif /* S_SPLINT_S */
1951238106Sdes	}
1952238106Sdes	if(c->type == comm_tcp) {
1953238106Sdes		c->ev->ev.ev_events &= ~(EV_READ|EV_WRITE);
1954238106Sdes		if(c->tcp_is_reading)
1955238106Sdes			c->ev->ev.ev_events |= EV_READ;
1956238106Sdes		else	c->ev->ev.ev_events |= EV_WRITE;
1957238106Sdes	}
1958238106Sdes	if(newfd != -1) {
1959238106Sdes		if(c->fd != -1) {
1960238106Sdes#ifndef USE_WINSOCK
1961238106Sdes			close(c->fd);
1962238106Sdes#else
1963238106Sdes			closesocket(c->fd);
1964238106Sdes#endif
1965238106Sdes		}
1966238106Sdes		c->fd = newfd;
1967238106Sdes		c->ev->ev.ev_fd = c->fd;
1968238106Sdes	}
1969238106Sdes	if(event_add(&c->ev->ev, sec==0?NULL:c->timeout) != 0) {
1970238106Sdes		log_err("event_add failed. in cpsl.");
1971238106Sdes	}
1972238106Sdes}
1973238106Sdes
1974238106Sdesvoid comm_point_listen_for_rw(struct comm_point* c, int rd, int wr)
1975238106Sdes{
1976238106Sdes	verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr);
1977238106Sdes	if(event_del(&c->ev->ev) != 0) {
1978238106Sdes		log_err("event_del error to cplf");
1979238106Sdes	}
1980238106Sdes	c->ev->ev.ev_events &= ~(EV_READ|EV_WRITE);
1981238106Sdes	if(rd) c->ev->ev.ev_events |= EV_READ;
1982238106Sdes	if(wr) c->ev->ev.ev_events |= EV_WRITE;
1983238106Sdes	if(event_add(&c->ev->ev, c->timeout) != 0) {
1984238106Sdes		log_err("event_add failed. in cplf.");
1985238106Sdes	}
1986238106Sdes}
1987238106Sdes
1988238106Sdessize_t comm_point_get_mem(struct comm_point* c)
1989238106Sdes{
1990238106Sdes	size_t s;
1991238106Sdes	if(!c)
1992238106Sdes		return 0;
1993238106Sdes	s = sizeof(*c) + sizeof(*c->ev);
1994238106Sdes	if(c->timeout)
1995238106Sdes		s += sizeof(*c->timeout);
1996238106Sdes	if(c->type == comm_tcp || c->type == comm_local)
1997238106Sdes		s += sizeof(*c->buffer) + ldns_buffer_capacity(c->buffer);
1998238106Sdes	if(c->type == comm_tcp_accept) {
1999238106Sdes		int i;
2000238106Sdes		for(i=0; i<c->max_tcp_count; i++)
2001238106Sdes			s += comm_point_get_mem(c->tcp_handlers[i]);
2002238106Sdes	}
2003238106Sdes	return s;
2004238106Sdes}
2005238106Sdes
2006238106Sdesstruct comm_timer*
2007238106Sdescomm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg)
2008238106Sdes{
2009238106Sdes	struct comm_timer *tm = (struct comm_timer*)calloc(1,
2010238106Sdes		sizeof(struct comm_timer));
2011238106Sdes	if(!tm)
2012238106Sdes		return NULL;
2013238106Sdes	tm->ev_timer = (struct internal_timer*)calloc(1,
2014238106Sdes		sizeof(struct internal_timer));
2015238106Sdes	if(!tm->ev_timer) {
2016238106Sdes		log_err("malloc failed");
2017238106Sdes		free(tm);
2018238106Sdes		return NULL;
2019238106Sdes	}
2020238106Sdes	tm->ev_timer->base = base;
2021238106Sdes	tm->callback = cb;
2022238106Sdes	tm->cb_arg = cb_arg;
2023238106Sdes	event_set(&tm->ev_timer->ev, -1, EV_TIMEOUT,
2024238106Sdes		comm_timer_callback, tm);
2025238106Sdes	if(event_base_set(base->eb->base, &tm->ev_timer->ev) != 0) {
2026238106Sdes		log_err("timer_create: event_base_set failed.");
2027238106Sdes		free(tm->ev_timer);
2028238106Sdes		free(tm);
2029238106Sdes		return NULL;
2030238106Sdes	}
2031238106Sdes	return tm;
2032238106Sdes}
2033238106Sdes
2034238106Sdesvoid
2035238106Sdescomm_timer_disable(struct comm_timer* timer)
2036238106Sdes{
2037238106Sdes	if(!timer)
2038238106Sdes		return;
2039238106Sdes	evtimer_del(&timer->ev_timer->ev);
2040238106Sdes	timer->ev_timer->enabled = 0;
2041238106Sdes}
2042238106Sdes
2043238106Sdesvoid
2044238106Sdescomm_timer_set(struct comm_timer* timer, struct timeval* tv)
2045238106Sdes{
2046238106Sdes	log_assert(tv);
2047238106Sdes	if(timer->ev_timer->enabled)
2048238106Sdes		comm_timer_disable(timer);
2049238106Sdes	event_set(&timer->ev_timer->ev, -1, EV_TIMEOUT,
2050238106Sdes		comm_timer_callback, timer);
2051238106Sdes	if(event_base_set(timer->ev_timer->base->eb->base,
2052238106Sdes		&timer->ev_timer->ev) != 0)
2053238106Sdes		log_err("comm_timer_set: set_base failed.");
2054238106Sdes	if(evtimer_add(&timer->ev_timer->ev, tv) != 0)
2055238106Sdes		log_err("comm_timer_set: evtimer_add failed.");
2056238106Sdes	timer->ev_timer->enabled = 1;
2057238106Sdes}
2058238106Sdes
2059238106Sdesvoid
2060238106Sdescomm_timer_delete(struct comm_timer* timer)
2061238106Sdes{
2062238106Sdes	if(!timer)
2063238106Sdes		return;
2064238106Sdes	comm_timer_disable(timer);
2065238106Sdes	free(timer->ev_timer);
2066238106Sdes	free(timer);
2067238106Sdes}
2068238106Sdes
2069238106Sdesvoid
2070238106Sdescomm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg)
2071238106Sdes{
2072238106Sdes	struct comm_timer* tm = (struct comm_timer*)arg;
2073238106Sdes	if(!(event&EV_TIMEOUT))
2074238106Sdes		return;
2075238106Sdes	comm_base_now(tm->ev_timer->base);
2076238106Sdes	tm->ev_timer->enabled = 0;
2077238106Sdes	fptr_ok(fptr_whitelist_comm_timer(tm->callback));
2078238106Sdes	(*tm->callback)(tm->cb_arg);
2079238106Sdes}
2080238106Sdes
2081238106Sdesint
2082238106Sdescomm_timer_is_set(struct comm_timer* timer)
2083238106Sdes{
2084238106Sdes	return (int)timer->ev_timer->enabled;
2085238106Sdes}
2086238106Sdes
2087238106Sdessize_t
2088238106Sdescomm_timer_get_mem(struct comm_timer* timer)
2089238106Sdes{
2090238106Sdes	return sizeof(*timer) + sizeof(struct internal_timer);
2091238106Sdes}
2092238106Sdes
2093238106Sdesstruct comm_signal*
2094238106Sdescomm_signal_create(struct comm_base* base,
2095238106Sdes        void (*callback)(int, void*), void* cb_arg)
2096238106Sdes{
2097238106Sdes	struct comm_signal* com = (struct comm_signal*)malloc(
2098238106Sdes		sizeof(struct comm_signal));
2099238106Sdes	if(!com) {
2100238106Sdes		log_err("malloc failed");
2101238106Sdes		return NULL;
2102238106Sdes	}
2103238106Sdes	com->base = base;
2104238106Sdes	com->callback = callback;
2105238106Sdes	com->cb_arg = cb_arg;
2106238106Sdes	com->ev_signal = NULL;
2107238106Sdes	return com;
2108238106Sdes}
2109238106Sdes
2110238106Sdesvoid
2111238106Sdescomm_signal_callback(int sig, short event, void* arg)
2112238106Sdes{
2113238106Sdes	struct comm_signal* comsig = (struct comm_signal*)arg;
2114238106Sdes	if(!(event & EV_SIGNAL))
2115238106Sdes		return;
2116238106Sdes	comm_base_now(comsig->base);
2117238106Sdes	fptr_ok(fptr_whitelist_comm_signal(comsig->callback));
2118238106Sdes	(*comsig->callback)(sig, comsig->cb_arg);
2119238106Sdes}
2120238106Sdes
2121238106Sdesint
2122238106Sdescomm_signal_bind(struct comm_signal* comsig, int sig)
2123238106Sdes{
2124238106Sdes	struct internal_signal* entry = (struct internal_signal*)calloc(1,
2125238106Sdes		sizeof(struct internal_signal));
2126238106Sdes	if(!entry) {
2127238106Sdes		log_err("malloc failed");
2128238106Sdes		return 0;
2129238106Sdes	}
2130238106Sdes	log_assert(comsig);
2131238106Sdes	/* add signal event */
2132238106Sdes	signal_set(&entry->ev, sig, comm_signal_callback, comsig);
2133238106Sdes	if(event_base_set(comsig->base->eb->base, &entry->ev) != 0) {
2134238106Sdes		log_err("Could not set signal base");
2135238106Sdes		free(entry);
2136238106Sdes		return 0;
2137238106Sdes	}
2138238106Sdes	if(signal_add(&entry->ev, NULL) != 0) {
2139238106Sdes		log_err("Could not add signal handler");
2140238106Sdes		free(entry);
2141238106Sdes		return 0;
2142238106Sdes	}
2143238106Sdes	/* link into list */
2144238106Sdes	entry->next = comsig->ev_signal;
2145238106Sdes	comsig->ev_signal = entry;
2146238106Sdes	return 1;
2147238106Sdes}
2148238106Sdes
2149238106Sdesvoid
2150238106Sdescomm_signal_delete(struct comm_signal* comsig)
2151238106Sdes{
2152238106Sdes	struct internal_signal* p, *np;
2153238106Sdes	if(!comsig)
2154238106Sdes		return;
2155238106Sdes	p=comsig->ev_signal;
2156238106Sdes	while(p) {
2157238106Sdes		np = p->next;
2158238106Sdes		signal_del(&p->ev);
2159238106Sdes		free(p);
2160238106Sdes		p = np;
2161238106Sdes	}
2162238106Sdes	free(comsig);
2163238106Sdes}
2164