1/* Copyright 2006-2007 Niels Provos
2 * Copyright 2007-2012 Nick Mathewson and Niels Provos
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 * 3. The name of the author may not be used to endorse or promote products
13 *    derived from this software without specific prior written permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27/* Based on software by Adam Langly. Adam's original message:
28 *
29 * Async DNS Library
30 * Adam Langley <agl@imperialviolet.org>
31 * http://www.imperialviolet.org/eventdns.html
32 * Public Domain code
33 *
34 * This software is Public Domain. To view a copy of the public domain dedication,
35 * visit http://creativecommons.org/licenses/publicdomain/ or send a letter to
36 * Creative Commons, 559 Nathan Abbott Way, Stanford, California 94305, USA.
37 *
38 * I ask and expect, but do not require, that all derivative works contain an
39 * attribution similar to:
40 *	Parts developed by Adam Langley <agl@imperialviolet.org>
41 *
42 * You may wish to replace the word "Parts" with something else depending on
43 * the amount of original code.
44 *
45 * (Derivative works does not include programs which link against, run or include
46 * the source verbatim in their source distributions)
47 *
48 * Version: 0.1b
49 */
50
51#include <sys/types.h>
52#include "event2/event-config.h"
53
54#ifndef _FORTIFY_SOURCE
55#define _FORTIFY_SOURCE 3
56#endif
57
58#include <string.h>
59#include <fcntl.h>
60#ifdef _EVENT_HAVE_SYS_TIME_H
61#include <sys/time.h>
62#endif
63#ifdef _EVENT_HAVE_STDINT_H
64#include <stdint.h>
65#endif
66#include <stdlib.h>
67#include <string.h>
68#include <errno.h>
69#ifdef _EVENT_HAVE_UNISTD_H
70#include <unistd.h>
71#endif
72#include <limits.h>
73#include <sys/stat.h>
74#include <stdio.h>
75#include <stdarg.h>
76#ifdef WIN32
77#include <winsock2.h>
78#include <ws2tcpip.h>
79#ifndef _WIN32_IE
80#define _WIN32_IE 0x400
81#endif
82#include <shlobj.h>
83#endif
84
85#include "event2/dns.h"
86#include "event2/dns_struct.h"
87#include "event2/dns_compat.h"
88#include "event2/util.h"
89#include "event2/event.h"
90#include "event2/event_struct.h"
91#include "event2/thread.h"
92
93#include "event2/bufferevent.h"
94#include "event2/bufferevent_struct.h"
95#include "bufferevent-internal.h"
96
97#include "defer-internal.h"
98#include "log-internal.h"
99#include "mm-internal.h"
100#include "strlcpy-internal.h"
101#include "ipv6-internal.h"
102#include "util-internal.h"
103#include "evthread-internal.h"
104#ifdef WIN32
105#include <ctype.h>
106#include <winsock2.h>
107#include <windows.h>
108#include <iphlpapi.h>
109#include <io.h>
110#else
111#include <sys/socket.h>
112#include <netinet/in.h>
113#include <arpa/inet.h>
114#endif
115
116#ifdef _EVENT_HAVE_NETINET_IN6_H
117#include <netinet/in6.h>
118#endif
119
120#define EVDNS_LOG_DEBUG 0
121#define EVDNS_LOG_WARN 1
122#define EVDNS_LOG_MSG 2
123
124#ifndef HOST_NAME_MAX
125#define HOST_NAME_MAX 255
126#endif
127
128#include <stdio.h>
129
130#undef MIN
131#define MIN(a,b) ((a)<(b)?(a):(b))
132
133#define ASSERT_VALID_REQUEST(req) \
134	EVUTIL_ASSERT((req)->handle && (req)->handle->current_req == (req))
135
136#define u64 ev_uint64_t
137#define u32 ev_uint32_t
138#define u16 ev_uint16_t
139#define u8  ev_uint8_t
140
141/* maximum number of addresses from a single packet */
142/* that we bother recording */
143#define MAX_V4_ADDRS 32
144#define MAX_V6_ADDRS 32
145
146
147#define TYPE_A	       EVDNS_TYPE_A
148#define TYPE_CNAME     5
149#define TYPE_PTR       EVDNS_TYPE_PTR
150#define TYPE_SOA       EVDNS_TYPE_SOA
151#define TYPE_AAAA      EVDNS_TYPE_AAAA
152
153#define CLASS_INET     EVDNS_CLASS_INET
154
155/* Persistent handle.  We keep this separate from 'struct request' since we
156 * need some object to last for as long as an evdns_request is outstanding so
157 * that it can be canceled, whereas a search request can lead to multiple
158 * 'struct request' instances being created over its lifetime. */
159struct evdns_request {
160	struct request *current_req;
161	struct evdns_base *base;
162
163	int pending_cb; /* Waiting for its callback to be invoked; not
164			 * owned by event base any more. */
165
166	/* elements used by the searching code */
167	int search_index;
168	struct search_state *search_state;
169	char *search_origname;	/* needs to be free()ed */
170	int search_flags;
171};
172
173struct request {
174	u8 *request;  /* the dns packet data */
175	u8 request_type; /* TYPE_PTR or TYPE_A or TYPE_AAAA */
176	unsigned int request_len;
177	int reissue_count;
178	int tx_count;  /* the number of times that this packet has been sent */
179	void *user_pointer;  /* the pointer given to us for this request */
180	evdns_callback_type user_callback;
181	struct nameserver *ns;	/* the server which we last sent it */
182
183	/* these objects are kept in a circular list */
184	/* XXX We could turn this into a CIRCLEQ. */
185	struct request *next, *prev;
186
187	struct event timeout_event;
188
189	u16 trans_id;  /* the transaction id */
190	unsigned request_appended :1;	/* true if the request pointer is data which follows this struct */
191	unsigned transmit_me :1;  /* needs to be transmitted */
192
193	/* XXXX This is a horrible hack. */
194	char **put_cname_in_ptr; /* store the cname here if we get one. */
195
196	struct evdns_base *base;
197
198	struct evdns_request *handle;
199};
200
201struct reply {
202	unsigned int type;
203	unsigned int have_answer : 1;
204	union {
205		struct {
206			u32 addrcount;
207			u32 addresses[MAX_V4_ADDRS];
208		} a;
209		struct {
210			u32 addrcount;
211			struct in6_addr addresses[MAX_V6_ADDRS];
212		} aaaa;
213		struct {
214			char name[HOST_NAME_MAX];
215		} ptr;
216	} data;
217};
218
219struct nameserver {
220	evutil_socket_t socket;	 /* a connected UDP socket */
221	struct sockaddr_storage address;
222	ev_socklen_t addrlen;
223	int failed_times;  /* number of times which we have given this server a chance */
224	int timedout;  /* number of times in a row a request has timed out */
225	struct event event;
226	/* these objects are kept in a circular list */
227	struct nameserver *next, *prev;
228	struct event timeout_event;  /* used to keep the timeout for */
229				     /* when we next probe this server. */
230				     /* Valid if state == 0 */
231	/* Outstanding probe request for this nameserver, if any */
232	struct evdns_request *probe_request;
233	char state;  /* zero if we think that this server is down */
234	char choked;  /* true if we have an EAGAIN from this server's socket */
235	char write_waiting;  /* true if we are waiting for EV_WRITE events */
236	struct evdns_base *base;
237};
238
239
240/* Represents a local port where we're listening for DNS requests. Right now, */
241/* only UDP is supported. */
242struct evdns_server_port {
243	evutil_socket_t socket; /* socket we use to read queries and write replies. */
244	int refcnt; /* reference count. */
245	char choked; /* Are we currently blocked from writing? */
246	char closing; /* Are we trying to close this port, pending writes? */
247	evdns_request_callback_fn_type user_callback; /* Fn to handle requests */
248	void *user_data; /* Opaque pointer passed to user_callback */
249	struct event event; /* Read/write event */
250	/* circular list of replies that we want to write. */
251	struct server_request *pending_replies;
252	struct event_base *event_base;
253
254#ifndef _EVENT_DISABLE_THREAD_SUPPORT
255	void *lock;
256#endif
257};
258
259/* Represents part of a reply being built.	(That is, a single RR.) */
260struct server_reply_item {
261	struct server_reply_item *next; /* next item in sequence. */
262	char *name; /* name part of the RR */
263	u16 type; /* The RR type */
264	u16 class; /* The RR class (usually CLASS_INET) */
265	u32 ttl; /* The RR TTL */
266	char is_name; /* True iff data is a label */
267	u16 datalen; /* Length of data; -1 if data is a label */
268	void *data; /* The contents of the RR */
269};
270
271/* Represents a request that we've received as a DNS server, and holds */
272/* the components of the reply as we're constructing it. */
273struct server_request {
274	/* Pointers to the next and previous entries on the list of replies */
275	/* that we're waiting to write.	 Only set if we have tried to respond */
276	/* and gotten EAGAIN. */
277	struct server_request *next_pending;
278	struct server_request *prev_pending;
279
280	u16 trans_id; /* Transaction id. */
281	struct evdns_server_port *port; /* Which port received this request on? */
282	struct sockaddr_storage addr; /* Where to send the response */
283	ev_socklen_t addrlen; /* length of addr */
284
285	int n_answer; /* how many answer RRs have been set? */
286	int n_authority; /* how many authority RRs have been set? */
287	int n_additional; /* how many additional RRs have been set? */
288
289	struct server_reply_item *answer; /* linked list of answer RRs */
290	struct server_reply_item *authority; /* linked list of authority RRs */
291	struct server_reply_item *additional; /* linked list of additional RRs */
292
293	/* Constructed response.  Only set once we're ready to send a reply. */
294	/* Once this is set, the RR fields are cleared, and no more should be set. */
295	char *response;
296	size_t response_len;
297
298	/* Caller-visible fields: flags, questions. */
299	struct evdns_server_request base;
300};
301
302struct evdns_base {
303	/* An array of n_req_heads circular lists for inflight requests.
304	 * Each inflight request req is in req_heads[req->trans_id % n_req_heads].
305	 */
306	struct request **req_heads;
307	/* A circular list of requests that we're waiting to send, but haven't
308	 * sent yet because there are too many requests inflight */
309	struct request *req_waiting_head;
310	/* A circular list of nameservers. */
311	struct nameserver *server_head;
312	int n_req_heads;
313
314	struct event_base *event_base;
315
316	/* The number of good nameservers that we have */
317	int global_good_nameservers;
318
319	/* inflight requests are contained in the req_head list */
320	/* and are actually going out across the network */
321	int global_requests_inflight;
322	/* requests which aren't inflight are in the waiting list */
323	/* and are counted here */
324	int global_requests_waiting;
325
326	int global_max_requests_inflight;
327
328	struct timeval global_timeout;	/* 5 seconds by default */
329	int global_max_reissues;  /* a reissue occurs when we get some errors from the server */
330	int global_max_retransmits;  /* number of times we'll retransmit a request which timed out */
331	/* number of timeouts in a row before we consider this server to be down */
332	int global_max_nameserver_timeout;
333	/* true iff we will use the 0x20 hack to prevent poisoning attacks. */
334	int global_randomize_case;
335
336	/* The first time that a nameserver fails, how long do we wait before
337	 * probing to see if it has returned?  */
338	struct timeval global_nameserver_probe_initial_timeout;
339
340	/** Port to bind to for outgoing DNS packets. */
341	struct sockaddr_storage global_outgoing_address;
342	/** ev_socklen_t for global_outgoing_address. 0 if it isn't set. */
343	ev_socklen_t global_outgoing_addrlen;
344
345	struct timeval global_getaddrinfo_allow_skew;
346
347	int getaddrinfo_ipv4_timeouts;
348	int getaddrinfo_ipv6_timeouts;
349	int getaddrinfo_ipv4_answered;
350	int getaddrinfo_ipv6_answered;
351
352	struct search_state *global_search_state;
353
354	TAILQ_HEAD(hosts_list, hosts_entry) hostsdb;
355
356#ifndef _EVENT_DISABLE_THREAD_SUPPORT
357	void *lock;
358#endif
359};
360
361struct hosts_entry {
362	TAILQ_ENTRY(hosts_entry) next;
363	union {
364		struct sockaddr sa;
365		struct sockaddr_in sin;
366		struct sockaddr_in6 sin6;
367	} addr;
368	int addrlen;
369	char hostname[1];
370};
371
372static struct evdns_base *current_base = NULL;
373
374struct evdns_base *
375evdns_get_global_base(void)
376{
377	return current_base;
378}
379
380/* Given a pointer to an evdns_server_request, get the corresponding */
381/* server_request. */
382#define TO_SERVER_REQUEST(base_ptr)					\
383	((struct server_request*)					\
384	  (((char*)(base_ptr) - evutil_offsetof(struct server_request, base))))
385
386#define REQ_HEAD(base, id) ((base)->req_heads[id % (base)->n_req_heads])
387
388static struct nameserver *nameserver_pick(struct evdns_base *base);
389static void evdns_request_insert(struct request *req, struct request **head);
390static void evdns_request_remove(struct request *req, struct request **head);
391static void nameserver_ready_callback(evutil_socket_t fd, short events, void *arg);
392static int evdns_transmit(struct evdns_base *base);
393static int evdns_request_transmit(struct request *req);
394static void nameserver_send_probe(struct nameserver *const ns);
395static void search_request_finished(struct evdns_request *const);
396static int search_try_next(struct evdns_request *const req);
397static struct request *search_request_new(struct evdns_base *base, struct evdns_request *handle, int type, const char *const name, int flags, evdns_callback_type user_callback, void *user_arg);
398static void evdns_requests_pump_waiting_queue(struct evdns_base *base);
399static u16 transaction_id_pick(struct evdns_base *base);
400static struct request *request_new(struct evdns_base *base, struct evdns_request *handle, int type, const char *name, int flags, evdns_callback_type callback, void *ptr);
401static void request_submit(struct request *const req);
402
403static int server_request_free(struct server_request *req);
404static void server_request_free_answers(struct server_request *req);
405static void server_port_free(struct evdns_server_port *port);
406static void server_port_ready_callback(evutil_socket_t fd, short events, void *arg);
407static int evdns_base_resolv_conf_parse_impl(struct evdns_base *base, int flags, const char *const filename);
408static int evdns_base_set_option_impl(struct evdns_base *base,
409    const char *option, const char *val, int flags);
410static void evdns_base_free_and_unlock(struct evdns_base *base, int fail_requests);
411
412static int strtoint(const char *const str);
413
414#ifdef _EVENT_DISABLE_THREAD_SUPPORT
415#define EVDNS_LOCK(base)  _EVUTIL_NIL_STMT
416#define EVDNS_UNLOCK(base) _EVUTIL_NIL_STMT
417#define ASSERT_LOCKED(base) _EVUTIL_NIL_STMT
418#else
419#define EVDNS_LOCK(base)			\
420	EVLOCK_LOCK((base)->lock, 0)
421#define EVDNS_UNLOCK(base)			\
422	EVLOCK_UNLOCK((base)->lock, 0)
423#define ASSERT_LOCKED(base)			\
424	EVLOCK_ASSERT_LOCKED((base)->lock)
425#endif
426
427static void
428default_evdns_log_fn(int warning, const char *buf)
429{
430	if (warning == EVDNS_LOG_WARN)
431		event_warnx("[evdns] %s", buf);
432	else if (warning == EVDNS_LOG_MSG)
433		event_msgx("[evdns] %s", buf);
434	else
435		event_debug(("[evdns] %s", buf));
436}
437
438static evdns_debug_log_fn_type evdns_log_fn = NULL;
439
440void
441evdns_set_log_fn(evdns_debug_log_fn_type fn)
442{
443	evdns_log_fn = fn;
444}
445
446#ifdef __GNUC__
447#define EVDNS_LOG_CHECK	 __attribute__ ((format(printf, 2, 3)))
448#else
449#define EVDNS_LOG_CHECK
450#endif
451
452static void _evdns_log(int warn, const char *fmt, ...) EVDNS_LOG_CHECK;
453static void
454_evdns_log(int warn, const char *fmt, ...)
455{
456	va_list args;
457	char buf[512];
458	if (!evdns_log_fn)
459		return;
460	va_start(args,fmt);
461	evutil_vsnprintf(buf, sizeof(buf), fmt, args);
462	va_end(args);
463	if (evdns_log_fn) {
464		if (warn == EVDNS_LOG_MSG)
465			warn = EVDNS_LOG_WARN;
466		evdns_log_fn(warn, buf);
467	} else {
468		default_evdns_log_fn(warn, buf);
469	}
470
471}
472
473#define log _evdns_log
474
475/* This walks the list of inflight requests to find the */
476/* one with a matching transaction id. Returns NULL on */
477/* failure */
478static struct request *
479request_find_from_trans_id(struct evdns_base *base, u16 trans_id) {
480	struct request *req = REQ_HEAD(base, trans_id);
481	struct request *const started_at = req;
482
483	ASSERT_LOCKED(base);
484
485	if (req) {
486		do {
487			if (req->trans_id == trans_id) return req;
488			req = req->next;
489		} while (req != started_at);
490	}
491
492	return NULL;
493}
494
495/* a libevent callback function which is called when a nameserver */
496/* has gone down and we want to test if it has came back to life yet */
497static void
498nameserver_prod_callback(evutil_socket_t fd, short events, void *arg) {
499	struct nameserver *const ns = (struct nameserver *) arg;
500	(void)fd;
501	(void)events;
502
503	EVDNS_LOCK(ns->base);
504	nameserver_send_probe(ns);
505	EVDNS_UNLOCK(ns->base);
506}
507
508/* a libevent callback which is called when a nameserver probe (to see if */
509/* it has come back to life) times out. We increment the count of failed_times */
510/* and wait longer to send the next probe packet. */
511static void
512nameserver_probe_failed(struct nameserver *const ns) {
513	struct timeval timeout;
514	int i;
515
516	ASSERT_LOCKED(ns->base);
517	(void) evtimer_del(&ns->timeout_event);
518	if (ns->state == 1) {
519		/* This can happen if the nameserver acts in a way which makes us mark */
520		/* it as bad and then starts sending good replies. */
521		return;
522	}
523
524#define MAX_PROBE_TIMEOUT 3600
525#define TIMEOUT_BACKOFF_FACTOR 3
526
527	memcpy(&timeout, &ns->base->global_nameserver_probe_initial_timeout,
528	    sizeof(struct timeval));
529	for (i=ns->failed_times; i > 0 && timeout.tv_sec < MAX_PROBE_TIMEOUT; --i) {
530		timeout.tv_sec *= TIMEOUT_BACKOFF_FACTOR;
531		timeout.tv_usec *= TIMEOUT_BACKOFF_FACTOR;
532		if (timeout.tv_usec > 1000000) {
533			timeout.tv_sec += timeout.tv_usec / 1000000;
534			timeout.tv_usec %= 1000000;
535		}
536	}
537	if (timeout.tv_sec > MAX_PROBE_TIMEOUT) {
538		timeout.tv_sec = MAX_PROBE_TIMEOUT;
539		timeout.tv_usec = 0;
540	}
541
542	ns->failed_times++;
543
544	if (evtimer_add(&ns->timeout_event, &timeout) < 0) {
545		char addrbuf[128];
546		log(EVDNS_LOG_WARN,
547		    "Error from libevent when adding timer event for %s",
548		    evutil_format_sockaddr_port(
549			    (struct sockaddr *)&ns->address,
550			    addrbuf, sizeof(addrbuf)));
551	}
552}
553
554/* called when a nameserver has been deemed to have failed. For example, too */
555/* many packets have timed out etc */
556static void
557nameserver_failed(struct nameserver *const ns, const char *msg) {
558	struct request *req, *started_at;
559	struct evdns_base *base = ns->base;
560	int i;
561	char addrbuf[128];
562
563	ASSERT_LOCKED(base);
564	/* if this nameserver has already been marked as failed */
565	/* then don't do anything */
566	if (!ns->state) return;
567
568	log(EVDNS_LOG_MSG, "Nameserver %s has failed: %s",
569	    evutil_format_sockaddr_port(
570		    (struct sockaddr *)&ns->address,
571		    addrbuf, sizeof(addrbuf)),
572	    msg);
573
574	base->global_good_nameservers--;
575	EVUTIL_ASSERT(base->global_good_nameservers >= 0);
576	if (base->global_good_nameservers == 0) {
577		log(EVDNS_LOG_MSG, "All nameservers have failed");
578	}
579
580	ns->state = 0;
581	ns->failed_times = 1;
582
583	if (evtimer_add(&ns->timeout_event,
584		&base->global_nameserver_probe_initial_timeout) < 0) {
585		log(EVDNS_LOG_WARN,
586		    "Error from libevent when adding timer event for %s",
587		    evutil_format_sockaddr_port(
588			    (struct sockaddr *)&ns->address,
589			    addrbuf, sizeof(addrbuf)));
590		/* ???? Do more? */
591	}
592
593	/* walk the list of inflight requests to see if any can be reassigned to */
594	/* a different server. Requests in the waiting queue don't have a */
595	/* nameserver assigned yet */
596
597	/* if we don't have *any* good nameservers then there's no point */
598	/* trying to reassign requests to one */
599	if (!base->global_good_nameservers) return;
600
601	for (i = 0; i < base->n_req_heads; ++i) {
602		req = started_at = base->req_heads[i];
603		if (req) {
604			do {
605				if (req->tx_count == 0 && req->ns == ns) {
606					/* still waiting to go out, can be moved */
607					/* to another server */
608					req->ns = nameserver_pick(base);
609				}
610				req = req->next;
611			} while (req != started_at);
612		}
613	}
614}
615
616static void
617nameserver_up(struct nameserver *const ns)
618{
619	char addrbuf[128];
620	ASSERT_LOCKED(ns->base);
621	if (ns->state) return;
622	log(EVDNS_LOG_MSG, "Nameserver %s is back up",
623	    evutil_format_sockaddr_port(
624		    (struct sockaddr *)&ns->address,
625		    addrbuf, sizeof(addrbuf)));
626	evtimer_del(&ns->timeout_event);
627	if (ns->probe_request) {
628		evdns_cancel_request(ns->base, ns->probe_request);
629		ns->probe_request = NULL;
630	}
631	ns->state = 1;
632	ns->failed_times = 0;
633	ns->timedout = 0;
634	ns->base->global_good_nameservers++;
635}
636
637static void
638request_trans_id_set(struct request *const req, const u16 trans_id) {
639	req->trans_id = trans_id;
640	*((u16 *) req->request) = htons(trans_id);
641}
642
643/* Called to remove a request from a list and dealloc it. */
644/* head is a pointer to the head of the list it should be */
645/* removed from or NULL if the request isn't in a list. */
646/* when free_handle is one, free the handle as well. */
647static void
648request_finished(struct request *const req, struct request **head, int free_handle) {
649	struct evdns_base *base = req->base;
650	int was_inflight = (head != &base->req_waiting_head);
651	EVDNS_LOCK(base);
652	ASSERT_VALID_REQUEST(req);
653
654	if (head)
655		evdns_request_remove(req, head);
656
657	log(EVDNS_LOG_DEBUG, "Removing timeout for request %p", req);
658	if (was_inflight) {
659		evtimer_del(&req->timeout_event);
660		base->global_requests_inflight--;
661	} else {
662		base->global_requests_waiting--;
663	}
664	/* it was initialized during request_new / evtimer_assign */
665	event_debug_unassign(&req->timeout_event);
666
667	if (!req->request_appended) {
668		/* need to free the request data on it's own */
669		mm_free(req->request);
670	} else {
671		/* the request data is appended onto the header */
672		/* so everything gets free()ed when we: */
673	}
674
675	if (req->handle) {
676		EVUTIL_ASSERT(req->handle->current_req == req);
677
678		if (free_handle) {
679			search_request_finished(req->handle);
680			req->handle->current_req = NULL;
681			if (! req->handle->pending_cb) {
682				/* If we're planning to run the callback,
683				 * don't free the handle until later. */
684				mm_free(req->handle);
685			}
686			req->handle = NULL; /* If we have a bug, let's crash
687					     * early */
688		} else {
689			req->handle->current_req = NULL;
690		}
691	}
692
693	mm_free(req);
694
695	evdns_requests_pump_waiting_queue(base);
696	EVDNS_UNLOCK(base);
697}
698
699/* This is called when a server returns a funny error code. */
700/* We try the request again with another server. */
701/* */
702/* return: */
703/*   0 ok */
704/*   1 failed/reissue is pointless */
705static int
706request_reissue(struct request *req) {
707	const struct nameserver *const last_ns = req->ns;
708	ASSERT_LOCKED(req->base);
709	ASSERT_VALID_REQUEST(req);
710	/* the last nameserver should have been marked as failing */
711	/* by the caller of this function, therefore pick will try */
712	/* not to return it */
713	req->ns = nameserver_pick(req->base);
714	if (req->ns == last_ns) {
715		/* ... but pick did return it */
716		/* not a lot of point in trying again with the */
717		/* same server */
718		return 1;
719	}
720
721	req->reissue_count++;
722	req->tx_count = 0;
723	req->transmit_me = 1;
724
725	return 0;
726}
727
728/* this function looks for space on the inflight queue and promotes */
729/* requests from the waiting queue if it can. */
730static void
731evdns_requests_pump_waiting_queue(struct evdns_base *base) {
732	ASSERT_LOCKED(base);
733	while (base->global_requests_inflight < base->global_max_requests_inflight &&
734		   base->global_requests_waiting) {
735		struct request *req;
736		/* move a request from the waiting queue to the inflight queue */
737		EVUTIL_ASSERT(base->req_waiting_head);
738		req = base->req_waiting_head;
739		evdns_request_remove(req, &base->req_waiting_head);
740
741		base->global_requests_waiting--;
742		base->global_requests_inflight++;
743
744		req->ns = nameserver_pick(base);
745		request_trans_id_set(req, transaction_id_pick(base));
746
747		evdns_request_insert(req, &REQ_HEAD(base, req->trans_id));
748		evdns_request_transmit(req);
749		evdns_transmit(base);
750	}
751}
752
753/* TODO(nickm) document */
754struct deferred_reply_callback {
755	struct deferred_cb deferred;
756	struct evdns_request *handle;
757	u8 request_type;
758	u8 have_reply;
759	u32 ttl;
760	u32 err;
761	evdns_callback_type user_callback;
762	struct reply reply;
763};
764
765static void
766reply_run_callback(struct deferred_cb *d, void *user_pointer)
767{
768	struct deferred_reply_callback *cb =
769	    EVUTIL_UPCAST(d, struct deferred_reply_callback, deferred);
770
771	switch (cb->request_type) {
772	case TYPE_A:
773		if (cb->have_reply)
774			cb->user_callback(DNS_ERR_NONE, DNS_IPv4_A,
775			    cb->reply.data.a.addrcount, cb->ttl,
776			    cb->reply.data.a.addresses,
777			    user_pointer);
778		else
779			cb->user_callback(cb->err, 0, 0, cb->ttl, NULL, user_pointer);
780		break;
781	case TYPE_PTR:
782		if (cb->have_reply) {
783			char *name = cb->reply.data.ptr.name;
784			cb->user_callback(DNS_ERR_NONE, DNS_PTR, 1, cb->ttl,
785			    &name, user_pointer);
786		} else {
787			cb->user_callback(cb->err, 0, 0, cb->ttl, NULL, user_pointer);
788		}
789		break;
790	case TYPE_AAAA:
791		if (cb->have_reply)
792			cb->user_callback(DNS_ERR_NONE, DNS_IPv6_AAAA,
793			    cb->reply.data.aaaa.addrcount, cb->ttl,
794			    cb->reply.data.aaaa.addresses,
795			    user_pointer);
796		else
797			cb->user_callback(cb->err, 0, 0, cb->ttl, NULL, user_pointer);
798		break;
799	default:
800		EVUTIL_ASSERT(0);
801	}
802
803	if (cb->handle && cb->handle->pending_cb) {
804		mm_free(cb->handle);
805	}
806
807	mm_free(cb);
808}
809
810static void
811reply_schedule_callback(struct request *const req, u32 ttl, u32 err, struct reply *reply)
812{
813	struct deferred_reply_callback *d = mm_calloc(1, sizeof(*d));
814
815	if (!d) {
816		event_warn("%s: Couldn't allocate space for deferred callback.",
817		    __func__);
818		return;
819	}
820
821	ASSERT_LOCKED(req->base);
822
823	d->request_type = req->request_type;
824	d->user_callback = req->user_callback;
825	d->ttl = ttl;
826	d->err = err;
827	if (reply) {
828		d->have_reply = 1;
829		memcpy(&d->reply, reply, sizeof(struct reply));
830	}
831
832	if (req->handle) {
833		req->handle->pending_cb = 1;
834		d->handle = req->handle;
835	}
836
837	event_deferred_cb_init(&d->deferred, reply_run_callback,
838	    req->user_pointer);
839	event_deferred_cb_schedule(
840		event_base_get_deferred_cb_queue(req->base->event_base),
841		&d->deferred);
842}
843
844/* this processes a parsed reply packet */
845static void
846reply_handle(struct request *const req, u16 flags, u32 ttl, struct reply *reply) {
847	int error;
848	char addrbuf[128];
849	static const int error_codes[] = {
850		DNS_ERR_FORMAT, DNS_ERR_SERVERFAILED, DNS_ERR_NOTEXIST,
851		DNS_ERR_NOTIMPL, DNS_ERR_REFUSED
852	};
853
854	ASSERT_LOCKED(req->base);
855	ASSERT_VALID_REQUEST(req);
856
857	if (flags & 0x020f || !reply || !reply->have_answer) {
858		/* there was an error */
859		if (flags & 0x0200) {
860			error = DNS_ERR_TRUNCATED;
861		} else if (flags & 0x000f) {
862			u16 error_code = (flags & 0x000f) - 1;
863			if (error_code > 4) {
864				error = DNS_ERR_UNKNOWN;
865			} else {
866				error = error_codes[error_code];
867			}
868		} else if (reply && !reply->have_answer) {
869			error = DNS_ERR_NODATA;
870		} else {
871			error = DNS_ERR_UNKNOWN;
872		}
873
874		switch (error) {
875		case DNS_ERR_NOTIMPL:
876		case DNS_ERR_REFUSED:
877			/* we regard these errors as marking a bad nameserver */
878			if (req->reissue_count < req->base->global_max_reissues) {
879				char msg[64];
880				evutil_snprintf(msg, sizeof(msg), "Bad response %d (%s)",
881					 error, evdns_err_to_string(error));
882				nameserver_failed(req->ns, msg);
883				if (!request_reissue(req)) return;
884			}
885			break;
886		case DNS_ERR_SERVERFAILED:
887			/* rcode 2 (servfailed) sometimes means "we
888			 * are broken" and sometimes (with some binds)
889			 * means "that request was very confusing."
890			 * Treat this as a timeout, not a failure.
891			 */
892			log(EVDNS_LOG_DEBUG, "Got a SERVERFAILED from nameserver"
893				"at %s; will allow the request to time out.",
894			    evutil_format_sockaddr_port(
895				    (struct sockaddr *)&req->ns->address,
896				    addrbuf, sizeof(addrbuf)));
897			break;
898		default:
899			/* we got a good reply from the nameserver: it is up. */
900			if (req->handle == req->ns->probe_request) {
901				/* Avoid double-free */
902				req->ns->probe_request = NULL;
903			}
904
905			nameserver_up(req->ns);
906		}
907
908		if (req->handle->search_state &&
909		    req->request_type != TYPE_PTR) {
910			/* if we have a list of domains to search in,
911			 * try the next one */
912			if (!search_try_next(req->handle)) {
913				/* a new request was issued so this
914				 * request is finished and */
915				/* the user callback will be made when
916				 * that request (or a */
917				/* child of it) finishes. */
918				return;
919			}
920		}
921
922		/* all else failed. Pass the failure up */
923		reply_schedule_callback(req, ttl, error, NULL);
924		request_finished(req, &REQ_HEAD(req->base, req->trans_id), 1);
925	} else {
926		/* all ok, tell the user */
927		reply_schedule_callback(req, ttl, 0, reply);
928		if (req->handle == req->ns->probe_request)
929			req->ns->probe_request = NULL; /* Avoid double-free */
930		nameserver_up(req->ns);
931		request_finished(req, &REQ_HEAD(req->base, req->trans_id), 1);
932	}
933}
934
935static int
936name_parse(u8 *packet, int length, int *idx, char *name_out, int name_out_len) {
937	int name_end = -1;
938	int j = *idx;
939	int ptr_count = 0;
940#define GET32(x) do { if (j + 4 > length) goto err; memcpy(&_t32, packet + j, 4); j += 4; x = ntohl(_t32); } while (0)
941#define GET16(x) do { if (j + 2 > length) goto err; memcpy(&_t, packet + j, 2); j += 2; x = ntohs(_t); } while (0)
942#define GET8(x) do { if (j >= length) goto err; x = packet[j++]; } while (0)
943
944	char *cp = name_out;
945	const char *const end = name_out + name_out_len;
946
947	/* Normally, names are a series of length prefixed strings terminated */
948	/* with a length of 0 (the lengths are u8's < 63). */
949	/* However, the length can start with a pair of 1 bits and that */
950	/* means that the next 14 bits are a pointer within the current */
951	/* packet. */
952
953	for (;;) {
954		u8 label_len;
955		if (j >= length) return -1;
956		GET8(label_len);
957		if (!label_len) break;
958		if (label_len & 0xc0) {
959			u8 ptr_low;
960			GET8(ptr_low);
961			if (name_end < 0) name_end = j;
962			j = (((int)label_len & 0x3f) << 8) + ptr_low;
963			/* Make sure that the target offset is in-bounds. */
964			if (j < 0 || j >= length) return -1;
965			/* If we've jumped more times than there are characters in the
966			 * message, we must have a loop. */
967			if (++ptr_count > length) return -1;
968			continue;
969		}
970		if (label_len > 63) return -1;
971		if (cp != name_out) {
972			if (cp + 1 >= end) return -1;
973			*cp++ = '.';
974		}
975		if (cp + label_len >= end) return -1;
976		memcpy(cp, packet + j, label_len);
977		cp += label_len;
978		j += label_len;
979	}
980	if (cp >= end) return -1;
981	*cp = '\0';
982	if (name_end < 0)
983		*idx = j;
984	else
985		*idx = name_end;
986	return 0;
987 err:
988	return -1;
989}
990
991/* parses a raw request from a nameserver */
992static int
993reply_parse(struct evdns_base *base, u8 *packet, int length) {
994	int j = 0, k = 0;  /* index into packet */
995	u16 _t;	 /* used by the macros */
996	u32 _t32;  /* used by the macros */
997	char tmp_name[256], cmp_name[256]; /* used by the macros */
998	int name_matches = 0;
999
1000	u16 trans_id, questions, answers, authority, additional, datalength;
1001	u16 flags = 0;
1002	u32 ttl, ttl_r = 0xffffffff;
1003	struct reply reply;
1004	struct request *req = NULL;
1005	unsigned int i;
1006
1007	ASSERT_LOCKED(base);
1008
1009	GET16(trans_id);
1010	GET16(flags);
1011	GET16(questions);
1012	GET16(answers);
1013	GET16(authority);
1014	GET16(additional);
1015	(void) authority; /* suppress "unused variable" warnings. */
1016	(void) additional; /* suppress "unused variable" warnings. */
1017
1018	req = request_find_from_trans_id(base, trans_id);
1019	if (!req) return -1;
1020	EVUTIL_ASSERT(req->base == base);
1021
1022	memset(&reply, 0, sizeof(reply));
1023
1024	/* If it's not an answer, it doesn't correspond to any request. */
1025	if (!(flags & 0x8000)) return -1;  /* must be an answer */
1026	if ((flags & 0x020f) && (flags & 0x020f) != DNS_ERR_NOTEXIST) {
1027		/* there was an error and it's not NXDOMAIN */
1028		goto err;
1029	}
1030	/* if (!answers) return; */  /* must have an answer of some form */
1031
1032	/* This macro skips a name in the DNS reply. */
1033#define SKIP_NAME						\
1034	do { tmp_name[0] = '\0';				\
1035		if (name_parse(packet, length, &j, tmp_name,	\
1036			sizeof(tmp_name))<0)			\
1037			goto err;				\
1038	} while (0)
1039#define TEST_NAME							\
1040	do { tmp_name[0] = '\0';					\
1041		cmp_name[0] = '\0';					\
1042		k = j;							\
1043		if (name_parse(packet, length, &j, tmp_name,		\
1044			sizeof(tmp_name))<0)				\
1045			goto err;					\
1046		if (name_parse(req->request, req->request_len, &k,	\
1047			cmp_name, sizeof(cmp_name))<0)			\
1048			goto err;					\
1049		if (base->global_randomize_case) {			\
1050			if (strcmp(tmp_name, cmp_name) == 0)		\
1051				name_matches = 1;			\
1052		} else {						\
1053			if (evutil_ascii_strcasecmp(tmp_name, cmp_name) == 0) \
1054				name_matches = 1;			\
1055		}							\
1056	} while (0)
1057
1058	reply.type = req->request_type;
1059
1060	/* skip over each question in the reply */
1061	for (i = 0; i < questions; ++i) {
1062		/* the question looks like
1063		 *   <label:name><u16:type><u16:class>
1064		 */
1065		TEST_NAME;
1066		j += 4;
1067		if (j > length) goto err;
1068	}
1069
1070	if (!name_matches)
1071		goto err;
1072
1073	/* now we have the answer section which looks like
1074	 * <label:name><u16:type><u16:class><u32:ttl><u16:len><data...>
1075	 */
1076
1077	for (i = 0; i < answers; ++i) {
1078		u16 type, class;
1079
1080		SKIP_NAME;
1081		GET16(type);
1082		GET16(class);
1083		GET32(ttl);
1084		GET16(datalength);
1085
1086		if (type == TYPE_A && class == CLASS_INET) {
1087			int addrcount, addrtocopy;
1088			if (req->request_type != TYPE_A) {
1089				j += datalength; continue;
1090			}
1091			if ((datalength & 3) != 0) /* not an even number of As. */
1092			    goto err;
1093			addrcount = datalength >> 2;
1094			addrtocopy = MIN(MAX_V4_ADDRS - reply.data.a.addrcount, (unsigned)addrcount);
1095
1096			ttl_r = MIN(ttl_r, ttl);
1097			/* we only bother with the first four addresses. */
1098			if (j + 4*addrtocopy > length) goto err;
1099			memcpy(&reply.data.a.addresses[reply.data.a.addrcount],
1100				   packet + j, 4*addrtocopy);
1101			j += 4*addrtocopy;
1102			reply.data.a.addrcount += addrtocopy;
1103			reply.have_answer = 1;
1104			if (reply.data.a.addrcount == MAX_V4_ADDRS) break;
1105		} else if (type == TYPE_PTR && class == CLASS_INET) {
1106			if (req->request_type != TYPE_PTR) {
1107				j += datalength; continue;
1108			}
1109			if (name_parse(packet, length, &j, reply.data.ptr.name,
1110						   sizeof(reply.data.ptr.name))<0)
1111				goto err;
1112			ttl_r = MIN(ttl_r, ttl);
1113			reply.have_answer = 1;
1114			break;
1115		} else if (type == TYPE_CNAME) {
1116			char cname[HOST_NAME_MAX];
1117			if (!req->put_cname_in_ptr || *req->put_cname_in_ptr) {
1118				j += datalength; continue;
1119			}
1120			if (name_parse(packet, length, &j, cname,
1121				sizeof(cname))<0)
1122				goto err;
1123			*req->put_cname_in_ptr = mm_strdup(cname);
1124		} else if (type == TYPE_AAAA && class == CLASS_INET) {
1125			int addrcount, addrtocopy;
1126			if (req->request_type != TYPE_AAAA) {
1127				j += datalength; continue;
1128			}
1129			if ((datalength & 15) != 0) /* not an even number of AAAAs. */
1130				goto err;
1131			addrcount = datalength >> 4;  /* each address is 16 bytes long */
1132			addrtocopy = MIN(MAX_V6_ADDRS - reply.data.aaaa.addrcount, (unsigned)addrcount);
1133			ttl_r = MIN(ttl_r, ttl);
1134
1135			/* we only bother with the first four addresses. */
1136			if (j + 16*addrtocopy > length) goto err;
1137			memcpy(&reply.data.aaaa.addresses[reply.data.aaaa.addrcount],
1138				   packet + j, 16*addrtocopy);
1139			reply.data.aaaa.addrcount += addrtocopy;
1140			j += 16*addrtocopy;
1141			reply.have_answer = 1;
1142			if (reply.data.aaaa.addrcount == MAX_V6_ADDRS) break;
1143		} else {
1144			/* skip over any other type of resource */
1145			j += datalength;
1146		}
1147	}
1148
1149	if (!reply.have_answer) {
1150		for (i = 0; i < authority; ++i) {
1151			u16 type, class;
1152			SKIP_NAME;
1153			GET16(type);
1154			GET16(class);
1155			GET32(ttl);
1156			GET16(datalength);
1157			if (type == TYPE_SOA && class == CLASS_INET) {
1158				u32 serial, refresh, retry, expire, minimum;
1159				SKIP_NAME;
1160				SKIP_NAME;
1161				GET32(serial);
1162				GET32(refresh);
1163				GET32(retry);
1164				GET32(expire);
1165				GET32(minimum);
1166				(void)expire;
1167				(void)retry;
1168				(void)refresh;
1169				(void)serial;
1170				ttl_r = MIN(ttl_r, ttl);
1171				ttl_r = MIN(ttl_r, minimum);
1172			} else {
1173				/* skip over any other type of resource */
1174				j += datalength;
1175			}
1176		}
1177	}
1178
1179	if (ttl_r == 0xffffffff)
1180		ttl_r = 0;
1181
1182	reply_handle(req, flags, ttl_r, &reply);
1183	return 0;
1184 err:
1185	if (req)
1186		reply_handle(req, flags, 0, NULL);
1187	return -1;
1188}
1189
1190/* Parse a raw request (packet,length) sent to a nameserver port (port) from */
1191/* a DNS client (addr,addrlen), and if it's well-formed, call the corresponding */
1192/* callback. */
1193static int
1194request_parse(u8 *packet, int length, struct evdns_server_port *port, struct sockaddr *addr, ev_socklen_t addrlen)
1195{
1196	int j = 0;	/* index into packet */
1197	u16 _t;	 /* used by the macros */
1198	char tmp_name[256]; /* used by the macros */
1199
1200	int i;
1201	u16 trans_id, flags, questions, answers, authority, additional;
1202	struct server_request *server_req = NULL;
1203
1204	ASSERT_LOCKED(port);
1205
1206	/* Get the header fields */
1207	GET16(trans_id);
1208	GET16(flags);
1209	GET16(questions);
1210	GET16(answers);
1211	GET16(authority);
1212	GET16(additional);
1213	(void)answers;
1214	(void)additional;
1215	(void)authority;
1216
1217	if (flags & 0x8000) return -1; /* Must not be an answer. */
1218	flags &= 0x0110; /* Only RD and CD get preserved. */
1219
1220	server_req = mm_malloc(sizeof(struct server_request));
1221	if (server_req == NULL) return -1;
1222	memset(server_req, 0, sizeof(struct server_request));
1223
1224	server_req->trans_id = trans_id;
1225	memcpy(&server_req->addr, addr, addrlen);
1226	server_req->addrlen = addrlen;
1227
1228	server_req->base.flags = flags;
1229	server_req->base.nquestions = 0;
1230	server_req->base.questions = mm_calloc(sizeof(struct evdns_server_question *), questions);
1231	if (server_req->base.questions == NULL)
1232		goto err;
1233
1234	for (i = 0; i < questions; ++i) {
1235		u16 type, class;
1236		struct evdns_server_question *q;
1237		int namelen;
1238		if (name_parse(packet, length, &j, tmp_name, sizeof(tmp_name))<0)
1239			goto err;
1240		GET16(type);
1241		GET16(class);
1242		namelen = (int)strlen(tmp_name);
1243		q = mm_malloc(sizeof(struct evdns_server_question) + namelen);
1244		if (!q)
1245			goto err;
1246		q->type = type;
1247		q->dns_question_class = class;
1248		memcpy(q->name, tmp_name, namelen+1);
1249		server_req->base.questions[server_req->base.nquestions++] = q;
1250	}
1251
1252	/* Ignore answers, authority, and additional. */
1253
1254	server_req->port = port;
1255	port->refcnt++;
1256
1257	/* Only standard queries are supported. */
1258	if (flags & 0x7800) {
1259		evdns_server_request_respond(&(server_req->base), DNS_ERR_NOTIMPL);
1260		return -1;
1261	}
1262
1263	port->user_callback(&(server_req->base), port->user_data);
1264
1265	return 0;
1266err:
1267	if (server_req) {
1268		if (server_req->base.questions) {
1269			for (i = 0; i < server_req->base.nquestions; ++i)
1270				mm_free(server_req->base.questions[i]);
1271			mm_free(server_req->base.questions);
1272		}
1273		mm_free(server_req);
1274	}
1275	return -1;
1276
1277#undef SKIP_NAME
1278#undef GET32
1279#undef GET16
1280#undef GET8
1281}
1282
1283
1284void
1285evdns_set_transaction_id_fn(ev_uint16_t (*fn)(void))
1286{
1287}
1288
1289void
1290evdns_set_random_bytes_fn(void (*fn)(char *, size_t))
1291{
1292}
1293
1294/* Try to choose a strong transaction id which isn't already in flight */
1295static u16
1296transaction_id_pick(struct evdns_base *base) {
1297	ASSERT_LOCKED(base);
1298	for (;;) {
1299		u16 trans_id;
1300		evutil_secure_rng_get_bytes(&trans_id, sizeof(trans_id));
1301
1302		if (trans_id == 0xffff) continue;
1303		/* now check to see if that id is already inflight */
1304		if (request_find_from_trans_id(base, trans_id) == NULL)
1305			return trans_id;
1306	}
1307}
1308
1309/* choose a namesever to use. This function will try to ignore */
1310/* nameservers which we think are down and load balance across the rest */
1311/* by updating the server_head global each time. */
1312static struct nameserver *
1313nameserver_pick(struct evdns_base *base) {
1314	struct nameserver *started_at = base->server_head, *picked;
1315	ASSERT_LOCKED(base);
1316	if (!base->server_head) return NULL;
1317
1318	/* if we don't have any good nameservers then there's no */
1319	/* point in trying to find one. */
1320	if (!base->global_good_nameservers) {
1321		base->server_head = base->server_head->next;
1322		return base->server_head;
1323	}
1324
1325	/* remember that nameservers are in a circular list */
1326	for (;;) {
1327		if (base->server_head->state) {
1328			/* we think this server is currently good */
1329			picked = base->server_head;
1330			base->server_head = base->server_head->next;
1331			return picked;
1332		}
1333
1334		base->server_head = base->server_head->next;
1335		if (base->server_head == started_at) {
1336			/* all the nameservers seem to be down */
1337			/* so we just return this one and hope for the */
1338			/* best */
1339			EVUTIL_ASSERT(base->global_good_nameservers == 0);
1340			picked = base->server_head;
1341			base->server_head = base->server_head->next;
1342			return picked;
1343		}
1344	}
1345}
1346
1347/* this is called when a namesever socket is ready for reading */
1348static void
1349nameserver_read(struct nameserver *ns) {
1350	struct sockaddr_storage ss;
1351	ev_socklen_t addrlen = sizeof(ss);
1352	u8 packet[1500];
1353	char addrbuf[128];
1354	ASSERT_LOCKED(ns->base);
1355
1356	for (;;) {
1357		const int r = recvfrom(ns->socket, (void*)packet,
1358		    sizeof(packet), 0,
1359		    (struct sockaddr*)&ss, &addrlen);
1360		if (r < 0) {
1361			int err = evutil_socket_geterror(ns->socket);
1362			if (EVUTIL_ERR_RW_RETRIABLE(err))
1363				return;
1364			nameserver_failed(ns,
1365			    evutil_socket_error_to_string(err));
1366			return;
1367		}
1368		if (evutil_sockaddr_cmp((struct sockaddr*)&ss,
1369			(struct sockaddr*)&ns->address, 0)) {
1370			log(EVDNS_LOG_WARN, "Address mismatch on received "
1371			    "DNS packet.  Apparent source was %s",
1372			    evutil_format_sockaddr_port(
1373				    (struct sockaddr *)&ss,
1374				    addrbuf, sizeof(addrbuf)));
1375			return;
1376		}
1377
1378		ns->timedout = 0;
1379		reply_parse(ns->base, packet, r);
1380	}
1381}
1382
1383/* Read a packet from a DNS client on a server port s, parse it, and */
1384/* act accordingly. */
1385static void
1386server_port_read(struct evdns_server_port *s) {
1387	u8 packet[1500];
1388	struct sockaddr_storage addr;
1389	ev_socklen_t addrlen;
1390	int r;
1391	ASSERT_LOCKED(s);
1392
1393	for (;;) {
1394		addrlen = sizeof(struct sockaddr_storage);
1395		r = recvfrom(s->socket, (void*)packet, sizeof(packet), 0,
1396					 (struct sockaddr*) &addr, &addrlen);
1397		if (r < 0) {
1398			int err = evutil_socket_geterror(s->socket);
1399			if (EVUTIL_ERR_RW_RETRIABLE(err))
1400				return;
1401			log(EVDNS_LOG_WARN,
1402			    "Error %s (%d) while reading request.",
1403			    evutil_socket_error_to_string(err), err);
1404			return;
1405		}
1406		request_parse(packet, r, s, (struct sockaddr*) &addr, addrlen);
1407	}
1408}
1409
1410/* Try to write all pending replies on a given DNS server port. */
1411static void
1412server_port_flush(struct evdns_server_port *port)
1413{
1414	struct server_request *req = port->pending_replies;
1415	ASSERT_LOCKED(port);
1416	while (req) {
1417		int r = sendto(port->socket, req->response, (int)req->response_len, 0,
1418			   (struct sockaddr*) &req->addr, (ev_socklen_t)req->addrlen);
1419		if (r < 0) {
1420			int err = evutil_socket_geterror(port->socket);
1421			if (EVUTIL_ERR_RW_RETRIABLE(err))
1422				return;
1423			log(EVDNS_LOG_WARN, "Error %s (%d) while writing response to port; dropping", evutil_socket_error_to_string(err), err);
1424		}
1425		if (server_request_free(req)) {
1426			/* we released the last reference to req->port. */
1427			return;
1428		} else {
1429			EVUTIL_ASSERT(req != port->pending_replies);
1430			req = port->pending_replies;
1431		}
1432	}
1433
1434	/* We have no more pending requests; stop listening for 'writeable' events. */
1435	(void) event_del(&port->event);
1436	event_assign(&port->event, port->event_base,
1437				 port->socket, EV_READ | EV_PERSIST,
1438				 server_port_ready_callback, port);
1439
1440	if (event_add(&port->event, NULL) < 0) {
1441		log(EVDNS_LOG_WARN, "Error from libevent when adding event for DNS server.");
1442		/* ???? Do more? */
1443	}
1444}
1445
1446/* set if we are waiting for the ability to write to this server. */
1447/* if waiting is true then we ask libevent for EV_WRITE events, otherwise */
1448/* we stop these events. */
1449static void
1450nameserver_write_waiting(struct nameserver *ns, char waiting) {
1451	ASSERT_LOCKED(ns->base);
1452	if (ns->write_waiting == waiting) return;
1453
1454	ns->write_waiting = waiting;
1455	(void) event_del(&ns->event);
1456	event_assign(&ns->event, ns->base->event_base,
1457	    ns->socket, EV_READ | (waiting ? EV_WRITE : 0) | EV_PERSIST,
1458	    nameserver_ready_callback, ns);
1459	if (event_add(&ns->event, NULL) < 0) {
1460		char addrbuf[128];
1461		log(EVDNS_LOG_WARN, "Error from libevent when adding event for %s",
1462		    evutil_format_sockaddr_port(
1463			    (struct sockaddr *)&ns->address,
1464			    addrbuf, sizeof(addrbuf)));
1465		/* ???? Do more? */
1466	}
1467}
1468
1469/* a callback function. Called by libevent when the kernel says that */
1470/* a nameserver socket is ready for writing or reading */
1471static void
1472nameserver_ready_callback(evutil_socket_t fd, short events, void *arg) {
1473	struct nameserver *ns = (struct nameserver *) arg;
1474	(void)fd;
1475
1476	EVDNS_LOCK(ns->base);
1477	if (events & EV_WRITE) {
1478		ns->choked = 0;
1479		if (!evdns_transmit(ns->base)) {
1480			nameserver_write_waiting(ns, 0);
1481		}
1482	}
1483	if (events & EV_READ) {
1484		nameserver_read(ns);
1485	}
1486	EVDNS_UNLOCK(ns->base);
1487}
1488
1489/* a callback function. Called by libevent when the kernel says that */
1490/* a server socket is ready for writing or reading. */
1491static void
1492server_port_ready_callback(evutil_socket_t fd, short events, void *arg) {
1493	struct evdns_server_port *port = (struct evdns_server_port *) arg;
1494	(void) fd;
1495
1496	EVDNS_LOCK(port);
1497	if (events & EV_WRITE) {
1498		port->choked = 0;
1499		server_port_flush(port);
1500	}
1501	if (events & EV_READ) {
1502		server_port_read(port);
1503	}
1504	EVDNS_UNLOCK(port);
1505}
1506
1507/* This is an inefficient representation; only use it via the dnslabel_table_*
1508 * functions, so that is can be safely replaced with something smarter later. */
1509#define MAX_LABELS 128
1510/* Structures used to implement name compression */
1511struct dnslabel_entry { char *v; off_t pos; };
1512struct dnslabel_table {
1513	int n_labels; /* number of current entries */
1514	/* map from name to position in message */
1515	struct dnslabel_entry labels[MAX_LABELS];
1516};
1517
1518/* Initialize dnslabel_table. */
1519static void
1520dnslabel_table_init(struct dnslabel_table *table)
1521{
1522	table->n_labels = 0;
1523}
1524
1525/* Free all storage held by table, but not the table itself. */
1526static void
1527dnslabel_clear(struct dnslabel_table *table)
1528{
1529	int i;
1530	for (i = 0; i < table->n_labels; ++i)
1531		mm_free(table->labels[i].v);
1532	table->n_labels = 0;
1533}
1534
1535/* return the position of the label in the current message, or -1 if the label */
1536/* hasn't been used yet. */
1537static int
1538dnslabel_table_get_pos(const struct dnslabel_table *table, const char *label)
1539{
1540	int i;
1541	for (i = 0; i < table->n_labels; ++i) {
1542		if (!strcmp(label, table->labels[i].v))
1543			return table->labels[i].pos;
1544	}
1545	return -1;
1546}
1547
1548/* remember that we've used the label at position pos */
1549static int
1550dnslabel_table_add(struct dnslabel_table *table, const char *label, off_t pos)
1551{
1552	char *v;
1553	int p;
1554	if (table->n_labels == MAX_LABELS)
1555		return (-1);
1556	v = mm_strdup(label);
1557	if (v == NULL)
1558		return (-1);
1559	p = table->n_labels++;
1560	table->labels[p].v = v;
1561	table->labels[p].pos = pos;
1562
1563	return (0);
1564}
1565
1566/* Converts a string to a length-prefixed set of DNS labels, starting */
1567/* at buf[j]. name and buf must not overlap. name_len should be the length */
1568/* of name.	 table is optional, and is used for compression. */
1569/* */
1570/* Input: abc.def */
1571/* Output: <3>abc<3>def<0> */
1572/* */
1573/* Returns the first index after the encoded name, or negative on error. */
1574/*	 -1	 label was > 63 bytes */
1575/*	 -2	 name too long to fit in buffer. */
1576/* */
1577static off_t
1578dnsname_to_labels(u8 *const buf, size_t buf_len, off_t j,
1579				  const char *name, const size_t name_len,
1580				  struct dnslabel_table *table) {
1581	const char *end = name + name_len;
1582	int ref = 0;
1583	u16 _t;
1584
1585#define APPEND16(x) do {						\
1586		if (j + 2 > (off_t)buf_len)				\
1587			goto overflow;					\
1588		_t = htons(x);						\
1589		memcpy(buf + j, &_t, 2);				\
1590		j += 2;							\
1591	} while (0)
1592#define APPEND32(x) do {						\
1593		if (j + 4 > (off_t)buf_len)				\
1594			goto overflow;					\
1595		_t32 = htonl(x);					\
1596		memcpy(buf + j, &_t32, 4);				\
1597		j += 4;							\
1598	} while (0)
1599
1600	if (name_len > 255) return -2;
1601
1602	for (;;) {
1603		const char *const start = name;
1604		if (table && (ref = dnslabel_table_get_pos(table, name)) >= 0) {
1605			APPEND16(ref | 0xc000);
1606			return j;
1607		}
1608		name = strchr(name, '.');
1609		if (!name) {
1610			const size_t label_len = end - start;
1611			if (label_len > 63) return -1;
1612			if ((size_t)(j+label_len+1) > buf_len) return -2;
1613			if (table) dnslabel_table_add(table, start, j);
1614			buf[j++] = (ev_uint8_t)label_len;
1615
1616			memcpy(buf + j, start, label_len);
1617			j += (int) label_len;
1618			break;
1619		} else {
1620			/* append length of the label. */
1621			const size_t label_len = name - start;
1622			if (label_len > 63) return -1;
1623			if ((size_t)(j+label_len+1) > buf_len) return -2;
1624			if (table) dnslabel_table_add(table, start, j);
1625			buf[j++] = (ev_uint8_t)label_len;
1626
1627			memcpy(buf + j, start, label_len);
1628			j += (int) label_len;
1629			/* hop over the '.' */
1630			name++;
1631		}
1632	}
1633
1634	/* the labels must be terminated by a 0. */
1635	/* It's possible that the name ended in a . */
1636	/* in which case the zero is already there */
1637	if (!j || buf[j-1]) buf[j++] = 0;
1638	return j;
1639 overflow:
1640	return (-2);
1641}
1642
1643/* Finds the length of a dns request for a DNS name of the given */
1644/* length. The actual request may be smaller than the value returned */
1645/* here */
1646static size_t
1647evdns_request_len(const size_t name_len) {
1648	return 96 + /* length of the DNS standard header */
1649		name_len + 2 +
1650		4;  /* space for the resource type */
1651}
1652
1653/* build a dns request packet into buf. buf should be at least as long */
1654/* as evdns_request_len told you it should be. */
1655/* */
1656/* Returns the amount of space used. Negative on error. */
1657static int
1658evdns_request_data_build(const char *const name, const size_t name_len,
1659    const u16 trans_id, const u16 type, const u16 class,
1660    u8 *const buf, size_t buf_len) {
1661	off_t j = 0;  /* current offset into buf */
1662	u16 _t;	 /* used by the macros */
1663
1664	APPEND16(trans_id);
1665	APPEND16(0x0100);  /* standard query, recusion needed */
1666	APPEND16(1);  /* one question */
1667	APPEND16(0);  /* no answers */
1668	APPEND16(0);  /* no authority */
1669	APPEND16(0);  /* no additional */
1670
1671	j = dnsname_to_labels(buf, buf_len, j, name, name_len, NULL);
1672	if (j < 0) {
1673		return (int)j;
1674	}
1675
1676	APPEND16(type);
1677	APPEND16(class);
1678
1679	return (int)j;
1680 overflow:
1681	return (-1);
1682}
1683
1684/* exported function */
1685struct evdns_server_port *
1686evdns_add_server_port_with_base(struct event_base *base, evutil_socket_t socket, int flags, evdns_request_callback_fn_type cb, void *user_data)
1687{
1688	struct evdns_server_port *port;
1689	if (flags)
1690		return NULL; /* flags not yet implemented */
1691	if (!(port = mm_malloc(sizeof(struct evdns_server_port))))
1692		return NULL;
1693	memset(port, 0, sizeof(struct evdns_server_port));
1694
1695
1696	port->socket = socket;
1697	port->refcnt = 1;
1698	port->choked = 0;
1699	port->closing = 0;
1700	port->user_callback = cb;
1701	port->user_data = user_data;
1702	port->pending_replies = NULL;
1703	port->event_base = base;
1704
1705	event_assign(&port->event, port->event_base,
1706				 port->socket, EV_READ | EV_PERSIST,
1707				 server_port_ready_callback, port);
1708	if (event_add(&port->event, NULL) < 0) {
1709		mm_free(port);
1710		return NULL;
1711	}
1712	EVTHREAD_ALLOC_LOCK(port->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
1713	return port;
1714}
1715
1716struct evdns_server_port *
1717evdns_add_server_port(evutil_socket_t socket, int flags, evdns_request_callback_fn_type cb, void *user_data)
1718{
1719	return evdns_add_server_port_with_base(NULL, socket, flags, cb, user_data);
1720}
1721
1722/* exported function */
1723void
1724evdns_close_server_port(struct evdns_server_port *port)
1725{
1726	EVDNS_LOCK(port);
1727	if (--port->refcnt == 0) {
1728		EVDNS_UNLOCK(port);
1729		server_port_free(port);
1730	} else {
1731		port->closing = 1;
1732	}
1733}
1734
1735/* exported function */
1736int
1737evdns_server_request_add_reply(struct evdns_server_request *_req, int section, const char *name, int type, int class, int ttl, int datalen, int is_name, const char *data)
1738{
1739	struct server_request *req = TO_SERVER_REQUEST(_req);
1740	struct server_reply_item **itemp, *item;
1741	int *countp;
1742	int result = -1;
1743
1744	EVDNS_LOCK(req->port);
1745	if (req->response) /* have we already answered? */
1746		goto done;
1747
1748	switch (section) {
1749	case EVDNS_ANSWER_SECTION:
1750		itemp = &req->answer;
1751		countp = &req->n_answer;
1752		break;
1753	case EVDNS_AUTHORITY_SECTION:
1754		itemp = &req->authority;
1755		countp = &req->n_authority;
1756		break;
1757	case EVDNS_ADDITIONAL_SECTION:
1758		itemp = &req->additional;
1759		countp = &req->n_additional;
1760		break;
1761	default:
1762		goto done;
1763	}
1764	while (*itemp) {
1765		itemp = &((*itemp)->next);
1766	}
1767	item = mm_malloc(sizeof(struct server_reply_item));
1768	if (!item)
1769		goto done;
1770	item->next = NULL;
1771	if (!(item->name = mm_strdup(name))) {
1772		mm_free(item);
1773		goto done;
1774	}
1775	item->type = type;
1776	item->dns_question_class = class;
1777	item->ttl = ttl;
1778	item->is_name = is_name != 0;
1779	item->datalen = 0;
1780	item->data = NULL;
1781	if (data) {
1782		if (item->is_name) {
1783			if (!(item->data = mm_strdup(data))) {
1784				mm_free(item->name);
1785				mm_free(item);
1786				goto done;
1787			}
1788			item->datalen = (u16)-1;
1789		} else {
1790			if (!(item->data = mm_malloc(datalen))) {
1791				mm_free(item->name);
1792				mm_free(item);
1793				goto done;
1794			}
1795			item->datalen = datalen;
1796			memcpy(item->data, data, datalen);
1797		}
1798	}
1799
1800	*itemp = item;
1801	++(*countp);
1802	result = 0;
1803done:
1804	EVDNS_UNLOCK(req->port);
1805	return result;
1806}
1807
1808/* exported function */
1809int
1810evdns_server_request_add_a_reply(struct evdns_server_request *req, const char *name, int n, const void *addrs, int ttl)
1811{
1812	return evdns_server_request_add_reply(
1813		  req, EVDNS_ANSWER_SECTION, name, TYPE_A, CLASS_INET,
1814		  ttl, n*4, 0, addrs);
1815}
1816
1817/* exported function */
1818int
1819evdns_server_request_add_aaaa_reply(struct evdns_server_request *req, const char *name, int n, const void *addrs, int ttl)
1820{
1821	return evdns_server_request_add_reply(
1822		  req, EVDNS_ANSWER_SECTION, name, TYPE_AAAA, CLASS_INET,
1823		  ttl, n*16, 0, addrs);
1824}
1825
1826/* exported function */
1827int
1828evdns_server_request_add_ptr_reply(struct evdns_server_request *req, struct in_addr *in, const char *inaddr_name, const char *hostname, int ttl)
1829{
1830	u32 a;
1831	char buf[32];
1832	if (in && inaddr_name)
1833		return -1;
1834	else if (!in && !inaddr_name)
1835		return -1;
1836	if (in) {
1837		a = ntohl(in->s_addr);
1838		evutil_snprintf(buf, sizeof(buf), "%d.%d.%d.%d.in-addr.arpa",
1839				(int)(u8)((a	)&0xff),
1840				(int)(u8)((a>>8 )&0xff),
1841				(int)(u8)((a>>16)&0xff),
1842				(int)(u8)((a>>24)&0xff));
1843		inaddr_name = buf;
1844	}
1845	return evdns_server_request_add_reply(
1846		  req, EVDNS_ANSWER_SECTION, inaddr_name, TYPE_PTR, CLASS_INET,
1847		  ttl, -1, 1, hostname);
1848}
1849
1850/* exported function */
1851int
1852evdns_server_request_add_cname_reply(struct evdns_server_request *req, const char *name, const char *cname, int ttl)
1853{
1854	return evdns_server_request_add_reply(
1855		  req, EVDNS_ANSWER_SECTION, name, TYPE_CNAME, CLASS_INET,
1856		  ttl, -1, 1, cname);
1857}
1858
1859/* exported function */
1860void
1861evdns_server_request_set_flags(struct evdns_server_request *exreq, int flags)
1862{
1863	struct server_request *req = TO_SERVER_REQUEST(exreq);
1864	req->base.flags &= ~(EVDNS_FLAGS_AA|EVDNS_FLAGS_RD);
1865	req->base.flags |= flags;
1866}
1867
1868static int
1869evdns_server_request_format_response(struct server_request *req, int err)
1870{
1871	unsigned char buf[1500];
1872	size_t buf_len = sizeof(buf);
1873	off_t j = 0, r;
1874	u16 _t;
1875	u32 _t32;
1876	int i;
1877	u16 flags;
1878	struct dnslabel_table table;
1879
1880	if (err < 0 || err > 15) return -1;
1881
1882	/* Set response bit and error code; copy OPCODE and RD fields from
1883	 * question; copy RA and AA if set by caller. */
1884	flags = req->base.flags;
1885	flags |= (0x8000 | err);
1886
1887	dnslabel_table_init(&table);
1888	APPEND16(req->trans_id);
1889	APPEND16(flags);
1890	APPEND16(req->base.nquestions);
1891	APPEND16(req->n_answer);
1892	APPEND16(req->n_authority);
1893	APPEND16(req->n_additional);
1894
1895	/* Add questions. */
1896	for (i=0; i < req->base.nquestions; ++i) {
1897		const char *s = req->base.questions[i]->name;
1898		j = dnsname_to_labels(buf, buf_len, j, s, strlen(s), &table);
1899		if (j < 0) {
1900			dnslabel_clear(&table);
1901			return (int) j;
1902		}
1903		APPEND16(req->base.questions[i]->type);
1904		APPEND16(req->base.questions[i]->dns_question_class);
1905	}
1906
1907	/* Add answer, authority, and additional sections. */
1908	for (i=0; i<3; ++i) {
1909		struct server_reply_item *item;
1910		if (i==0)
1911			item = req->answer;
1912		else if (i==1)
1913			item = req->authority;
1914		else
1915			item = req->additional;
1916		while (item) {
1917			r = dnsname_to_labels(buf, buf_len, j, item->name, strlen(item->name), &table);
1918			if (r < 0)
1919				goto overflow;
1920			j = r;
1921
1922			APPEND16(item->type);
1923			APPEND16(item->dns_question_class);
1924			APPEND32(item->ttl);
1925			if (item->is_name) {
1926				off_t len_idx = j, name_start;
1927				j += 2;
1928				name_start = j;
1929				r = dnsname_to_labels(buf, buf_len, j, item->data, strlen(item->data), &table);
1930				if (r < 0)
1931					goto overflow;
1932				j = r;
1933				_t = htons( (short) (j-name_start) );
1934				memcpy(buf+len_idx, &_t, 2);
1935			} else {
1936				APPEND16(item->datalen);
1937				if (j+item->datalen > (off_t)buf_len)
1938					goto overflow;
1939				memcpy(buf+j, item->data, item->datalen);
1940				j += item->datalen;
1941			}
1942			item = item->next;
1943		}
1944	}
1945
1946	if (j > 512) {
1947overflow:
1948		j = 512;
1949		buf[2] |= 0x02; /* set the truncated bit. */
1950	}
1951
1952	req->response_len = j;
1953
1954	if (!(req->response = mm_malloc(req->response_len))) {
1955		server_request_free_answers(req);
1956		dnslabel_clear(&table);
1957		return (-1);
1958	}
1959	memcpy(req->response, buf, req->response_len);
1960	server_request_free_answers(req);
1961	dnslabel_clear(&table);
1962	return (0);
1963}
1964
1965/* exported function */
1966int
1967evdns_server_request_respond(struct evdns_server_request *_req, int err)
1968{
1969	struct server_request *req = TO_SERVER_REQUEST(_req);
1970	struct evdns_server_port *port = req->port;
1971	int r = -1;
1972
1973	EVDNS_LOCK(port);
1974	if (!req->response) {
1975		if ((r = evdns_server_request_format_response(req, err))<0)
1976			goto done;
1977	}
1978
1979	r = sendto(port->socket, req->response, (int)req->response_len, 0,
1980			   (struct sockaddr*) &req->addr, (ev_socklen_t)req->addrlen);
1981	if (r<0) {
1982		int sock_err = evutil_socket_geterror(port->socket);
1983		if (EVUTIL_ERR_RW_RETRIABLE(sock_err))
1984			goto done;
1985
1986		if (port->pending_replies) {
1987			req->prev_pending = port->pending_replies->prev_pending;
1988			req->next_pending = port->pending_replies;
1989			req->prev_pending->next_pending =
1990				req->next_pending->prev_pending = req;
1991		} else {
1992			req->prev_pending = req->next_pending = req;
1993			port->pending_replies = req;
1994			port->choked = 1;
1995
1996			(void) event_del(&port->event);
1997			event_assign(&port->event, port->event_base, port->socket, (port->closing?0:EV_READ) | EV_WRITE | EV_PERSIST, server_port_ready_callback, port);
1998
1999			if (event_add(&port->event, NULL) < 0) {
2000				log(EVDNS_LOG_WARN, "Error from libevent when adding event for DNS server");
2001			}
2002
2003		}
2004
2005		r = 1;
2006		goto done;
2007	}
2008	if (server_request_free(req)) {
2009		r = 0;
2010		goto done;
2011	}
2012
2013	if (port->pending_replies)
2014		server_port_flush(port);
2015
2016	r = 0;
2017done:
2018	EVDNS_UNLOCK(port);
2019	return r;
2020}
2021
2022/* Free all storage held by RRs in req. */
2023static void
2024server_request_free_answers(struct server_request *req)
2025{
2026	struct server_reply_item *victim, *next, **list;
2027	int i;
2028	for (i = 0; i < 3; ++i) {
2029		if (i==0)
2030			list = &req->answer;
2031		else if (i==1)
2032			list = &req->authority;
2033		else
2034			list = &req->additional;
2035
2036		victim = *list;
2037		while (victim) {
2038			next = victim->next;
2039			mm_free(victim->name);
2040			if (victim->data)
2041				mm_free(victim->data);
2042			mm_free(victim);
2043			victim = next;
2044		}
2045		*list = NULL;
2046	}
2047}
2048
2049/* Free all storage held by req, and remove links to it. */
2050/* return true iff we just wound up freeing the server_port. */
2051static int
2052server_request_free(struct server_request *req)
2053{
2054	int i, rc=1, lock=0;
2055	if (req->base.questions) {
2056		for (i = 0; i < req->base.nquestions; ++i)
2057			mm_free(req->base.questions[i]);
2058		mm_free(req->base.questions);
2059	}
2060
2061	if (req->port) {
2062		EVDNS_LOCK(req->port);
2063		lock=1;
2064		if (req->port->pending_replies == req) {
2065			if (req->next_pending && req->next_pending != req)
2066				req->port->pending_replies = req->next_pending;
2067			else
2068				req->port->pending_replies = NULL;
2069		}
2070		rc = --req->port->refcnt;
2071	}
2072
2073	if (req->response) {
2074		mm_free(req->response);
2075	}
2076
2077	server_request_free_answers(req);
2078
2079	if (req->next_pending && req->next_pending != req) {
2080		req->next_pending->prev_pending = req->prev_pending;
2081		req->prev_pending->next_pending = req->next_pending;
2082	}
2083
2084	if (rc == 0) {
2085		EVDNS_UNLOCK(req->port); /* ????? nickm */
2086		server_port_free(req->port);
2087		mm_free(req);
2088		return (1);
2089	}
2090	if (lock)
2091		EVDNS_UNLOCK(req->port);
2092	mm_free(req);
2093	return (0);
2094}
2095
2096/* Free all storage held by an evdns_server_port.  Only called when  */
2097static void
2098server_port_free(struct evdns_server_port *port)
2099{
2100	EVUTIL_ASSERT(port);
2101	EVUTIL_ASSERT(!port->refcnt);
2102	EVUTIL_ASSERT(!port->pending_replies);
2103	if (port->socket > 0) {
2104		evutil_closesocket(port->socket);
2105		port->socket = -1;
2106	}
2107	(void) event_del(&port->event);
2108	event_debug_unassign(&port->event);
2109	EVTHREAD_FREE_LOCK(port->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
2110	mm_free(port);
2111}
2112
2113/* exported function */
2114int
2115evdns_server_request_drop(struct evdns_server_request *_req)
2116{
2117	struct server_request *req = TO_SERVER_REQUEST(_req);
2118	server_request_free(req);
2119	return 0;
2120}
2121
2122/* exported function */
2123int
2124evdns_server_request_get_requesting_addr(struct evdns_server_request *_req, struct sockaddr *sa, int addr_len)
2125{
2126	struct server_request *req = TO_SERVER_REQUEST(_req);
2127	if (addr_len < (int)req->addrlen)
2128		return -1;
2129	memcpy(sa, &(req->addr), req->addrlen);
2130	return req->addrlen;
2131}
2132
2133#undef APPEND16
2134#undef APPEND32
2135
2136/* this is a libevent callback function which is called when a request */
2137/* has timed out. */
2138static void
2139evdns_request_timeout_callback(evutil_socket_t fd, short events, void *arg) {
2140	struct request *const req = (struct request *) arg;
2141#ifndef _EVENT_DISABLE_THREAD_SUPPORT
2142	struct evdns_base *base = req->base;
2143#endif
2144	(void) fd;
2145	(void) events;
2146
2147	log(EVDNS_LOG_DEBUG, "Request %p timed out", arg);
2148	EVDNS_LOCK(base);
2149
2150	req->ns->timedout++;
2151	if (req->ns->timedout > req->base->global_max_nameserver_timeout) {
2152		req->ns->timedout = 0;
2153		nameserver_failed(req->ns, "request timed out.");
2154	}
2155
2156	if (req->tx_count >= req->base->global_max_retransmits) {
2157		/* this request has failed */
2158		reply_schedule_callback(req, 0, DNS_ERR_TIMEOUT, NULL);
2159		request_finished(req, &REQ_HEAD(req->base, req->trans_id), 1);
2160	} else {
2161		/* retransmit it */
2162		(void) evtimer_del(&req->timeout_event);
2163		evdns_request_transmit(req);
2164	}
2165	EVDNS_UNLOCK(base);
2166}
2167
2168/* try to send a request to a given server. */
2169/* */
2170/* return: */
2171/*   0 ok */
2172/*   1 temporary failure */
2173/*   2 other failure */
2174static int
2175evdns_request_transmit_to(struct request *req, struct nameserver *server) {
2176	int r;
2177	ASSERT_LOCKED(req->base);
2178	ASSERT_VALID_REQUEST(req);
2179	r = sendto(server->socket, (void*)req->request, req->request_len, 0,
2180	    (struct sockaddr *)&server->address, server->addrlen);
2181	if (r < 0) {
2182		int err = evutil_socket_geterror(server->socket);
2183		if (EVUTIL_ERR_RW_RETRIABLE(err))
2184			return 1;
2185		nameserver_failed(req->ns, evutil_socket_error_to_string(err));
2186		return 2;
2187	} else if (r != (int)req->request_len) {
2188		return 1;  /* short write */
2189	} else {
2190		return 0;
2191	}
2192}
2193
2194/* try to send a request, updating the fields of the request */
2195/* as needed */
2196/* */
2197/* return: */
2198/*   0 ok */
2199/*   1 failed */
2200static int
2201evdns_request_transmit(struct request *req) {
2202	int retcode = 0, r;
2203
2204	ASSERT_LOCKED(req->base);
2205	ASSERT_VALID_REQUEST(req);
2206	/* if we fail to send this packet then this flag marks it */
2207	/* for evdns_transmit */
2208	req->transmit_me = 1;
2209	EVUTIL_ASSERT(req->trans_id != 0xffff);
2210
2211	if (req->ns->choked) {
2212		/* don't bother trying to write to a socket */
2213		/* which we have had EAGAIN from */
2214		return 1;
2215	}
2216
2217	r = evdns_request_transmit_to(req, req->ns);
2218	switch (r) {
2219	case 1:
2220		/* temp failure */
2221		req->ns->choked = 1;
2222		nameserver_write_waiting(req->ns, 1);
2223		return 1;
2224	case 2:
2225		/* failed to transmit the request entirely. */
2226		retcode = 1;
2227		/* fall through: we'll set a timeout, which will time out,
2228		 * and make us retransmit the request anyway. */
2229	default:
2230		/* all ok */
2231		log(EVDNS_LOG_DEBUG,
2232		    "Setting timeout for request %p", req);
2233		if (evtimer_add(&req->timeout_event, &req->base->global_timeout) < 0) {
2234			log(EVDNS_LOG_WARN,
2235		      "Error from libevent when adding timer for request %p",
2236			    req);
2237			/* ???? Do more? */
2238		}
2239		req->tx_count++;
2240		req->transmit_me = 0;
2241		return retcode;
2242	}
2243}
2244
2245static void
2246nameserver_probe_callback(int result, char type, int count, int ttl, void *addresses, void *arg) {
2247	struct nameserver *const ns = (struct nameserver *) arg;
2248	(void) type;
2249	(void) count;
2250	(void) ttl;
2251	(void) addresses;
2252
2253	EVDNS_LOCK(ns->base);
2254	ns->probe_request = NULL;
2255	if (result == DNS_ERR_CANCEL) {
2256		/* We canceled this request because the nameserver came up
2257		 * for some other reason.  Do not change our opinion about
2258		 * the nameserver. */
2259	} else if (result == DNS_ERR_NONE || result == DNS_ERR_NOTEXIST) {
2260		/* this is a good reply */
2261		nameserver_up(ns);
2262	} else {
2263		nameserver_probe_failed(ns);
2264	}
2265	EVDNS_UNLOCK(ns->base);
2266}
2267
2268static void
2269nameserver_send_probe(struct nameserver *const ns) {
2270	struct evdns_request *handle;
2271	struct request *req;
2272	char addrbuf[128];
2273	/* here we need to send a probe to a given nameserver */
2274	/* in the hope that it is up now. */
2275
2276	ASSERT_LOCKED(ns->base);
2277	log(EVDNS_LOG_DEBUG, "Sending probe to %s",
2278	    evutil_format_sockaddr_port(
2279		    (struct sockaddr *)&ns->address,
2280		    addrbuf, sizeof(addrbuf)));
2281	handle = mm_calloc(1, sizeof(*handle));
2282	if (!handle) return;
2283	req = request_new(ns->base, handle, TYPE_A, "google.com", DNS_QUERY_NO_SEARCH, nameserver_probe_callback, ns);
2284	if (!req) return;
2285	ns->probe_request = handle;
2286	/* we force this into the inflight queue no matter what */
2287	request_trans_id_set(req, transaction_id_pick(ns->base));
2288	req->ns = ns;
2289	request_submit(req);
2290}
2291
2292/* returns: */
2293/*   0 didn't try to transmit anything */
2294/*   1 tried to transmit something */
2295static int
2296evdns_transmit(struct evdns_base *base) {
2297	char did_try_to_transmit = 0;
2298	int i;
2299
2300	ASSERT_LOCKED(base);
2301	for (i = 0; i < base->n_req_heads; ++i) {
2302		if (base->req_heads[i]) {
2303			struct request *const started_at = base->req_heads[i], *req = started_at;
2304			/* first transmit all the requests which are currently waiting */
2305			do {
2306				if (req->transmit_me) {
2307					did_try_to_transmit = 1;
2308					evdns_request_transmit(req);
2309				}
2310
2311				req = req->next;
2312			} while (req != started_at);
2313		}
2314	}
2315
2316	return did_try_to_transmit;
2317}
2318
2319/* exported function */
2320int
2321evdns_base_count_nameservers(struct evdns_base *base)
2322{
2323	const struct nameserver *server;
2324	int n = 0;
2325
2326	EVDNS_LOCK(base);
2327	server = base->server_head;
2328	if (!server)
2329		goto done;
2330	do {
2331		++n;
2332		server = server->next;
2333	} while (server != base->server_head);
2334done:
2335	EVDNS_UNLOCK(base);
2336	return n;
2337}
2338
2339int
2340evdns_count_nameservers(void)
2341{
2342	return evdns_base_count_nameservers(current_base);
2343}
2344
2345/* exported function */
2346int
2347evdns_base_clear_nameservers_and_suspend(struct evdns_base *base)
2348{
2349	struct nameserver *server, *started_at;
2350	int i;
2351
2352	EVDNS_LOCK(base);
2353	server = base->server_head;
2354	started_at = base->server_head;
2355	if (!server) {
2356		EVDNS_UNLOCK(base);
2357		return 0;
2358	}
2359	while (1) {
2360		struct nameserver *next = server->next;
2361		(void) event_del(&server->event);
2362		if (evtimer_initialized(&server->timeout_event))
2363			(void) evtimer_del(&server->timeout_event);
2364		if (server->socket >= 0)
2365			evutil_closesocket(server->socket);
2366		mm_free(server);
2367		if (next == started_at)
2368			break;
2369		server = next;
2370	}
2371	base->server_head = NULL;
2372	base->global_good_nameservers = 0;
2373
2374	for (i = 0; i < base->n_req_heads; ++i) {
2375		struct request *req, *req_started_at;
2376		req = req_started_at = base->req_heads[i];
2377		while (req) {
2378			struct request *next = req->next;
2379			req->tx_count = req->reissue_count = 0;
2380			req->ns = NULL;
2381			/* ???? What to do about searches? */
2382			(void) evtimer_del(&req->timeout_event);
2383			req->trans_id = 0;
2384			req->transmit_me = 0;
2385
2386			base->global_requests_waiting++;
2387			evdns_request_insert(req, &base->req_waiting_head);
2388			/* We want to insert these suspended elements at the front of
2389			 * the waiting queue, since they were pending before any of
2390			 * the waiting entries were added.  This is a circular list,
2391			 * so we can just shift the start back by one.*/
2392			base->req_waiting_head = base->req_waiting_head->prev;
2393
2394			if (next == req_started_at)
2395				break;
2396			req = next;
2397		}
2398		base->req_heads[i] = NULL;
2399	}
2400
2401	base->global_requests_inflight = 0;
2402
2403	EVDNS_UNLOCK(base);
2404	return 0;
2405}
2406
2407int
2408evdns_clear_nameservers_and_suspend(void)
2409{
2410	return evdns_base_clear_nameservers_and_suspend(current_base);
2411}
2412
2413
2414/* exported function */
2415int
2416evdns_base_resume(struct evdns_base *base)
2417{
2418	EVDNS_LOCK(base);
2419	evdns_requests_pump_waiting_queue(base);
2420	EVDNS_UNLOCK(base);
2421	return 0;
2422}
2423
2424int
2425evdns_resume(void)
2426{
2427	return evdns_base_resume(current_base);
2428}
2429
2430static int
2431_evdns_nameserver_add_impl(struct evdns_base *base, const struct sockaddr *address, int addrlen) {
2432	/* first check to see if we already have this nameserver */
2433
2434	const struct nameserver *server = base->server_head, *const started_at = base->server_head;
2435	struct nameserver *ns;
2436	int err = 0;
2437	char addrbuf[128];
2438
2439	ASSERT_LOCKED(base);
2440	if (server) {
2441		do {
2442			if (!evutil_sockaddr_cmp((struct sockaddr*)&server->address, address, 1)) return 3;
2443			server = server->next;
2444		} while (server != started_at);
2445	}
2446	if (addrlen > (int)sizeof(ns->address)) {
2447		log(EVDNS_LOG_DEBUG, "Addrlen %d too long.", (int)addrlen);
2448		return 2;
2449	}
2450
2451	ns = (struct nameserver *) mm_malloc(sizeof(struct nameserver));
2452	if (!ns) return -1;
2453
2454	memset(ns, 0, sizeof(struct nameserver));
2455	ns->base = base;
2456
2457	evtimer_assign(&ns->timeout_event, ns->base->event_base, nameserver_prod_callback, ns);
2458
2459	ns->socket = socket(address->sa_family, SOCK_DGRAM, 0);
2460	if (ns->socket < 0) { err = 1; goto out1; }
2461	evutil_make_socket_closeonexec(ns->socket);
2462	evutil_make_socket_nonblocking(ns->socket);
2463
2464	if (base->global_outgoing_addrlen &&
2465	    !evutil_sockaddr_is_loopback(address)) {
2466		if (bind(ns->socket,
2467			(struct sockaddr*)&base->global_outgoing_address,
2468			base->global_outgoing_addrlen) < 0) {
2469			log(EVDNS_LOG_WARN,"Couldn't bind to outgoing address");
2470			err = 2;
2471			goto out2;
2472		}
2473	}
2474
2475	memcpy(&ns->address, address, addrlen);
2476	ns->addrlen = addrlen;
2477	ns->state = 1;
2478	event_assign(&ns->event, ns->base->event_base, ns->socket, EV_READ | EV_PERSIST, nameserver_ready_callback, ns);
2479	if (event_add(&ns->event, NULL) < 0) {
2480		err = 2;
2481		goto out2;
2482	}
2483
2484	log(EVDNS_LOG_DEBUG, "Added nameserver %s",
2485	    evutil_format_sockaddr_port(address, addrbuf, sizeof(addrbuf)));
2486
2487	/* insert this nameserver into the list of them */
2488	if (!base->server_head) {
2489		ns->next = ns->prev = ns;
2490		base->server_head = ns;
2491	} else {
2492		ns->next = base->server_head->next;
2493		ns->prev = base->server_head;
2494		base->server_head->next = ns;
2495		ns->next->prev = ns;
2496	}
2497
2498	base->global_good_nameservers++;
2499
2500	return 0;
2501
2502out2:
2503	evutil_closesocket(ns->socket);
2504out1:
2505	event_debug_unassign(&ns->event);
2506	mm_free(ns);
2507	log(EVDNS_LOG_WARN, "Unable to add nameserver %s: error %d",
2508	    evutil_format_sockaddr_port(address, addrbuf, sizeof(addrbuf)), err);
2509	return err;
2510}
2511
2512/* exported function */
2513int
2514evdns_base_nameserver_add(struct evdns_base *base, unsigned long int address)
2515{
2516	struct sockaddr_in sin;
2517	int res;
2518	sin.sin_addr.s_addr = address;
2519	sin.sin_port = htons(53);
2520	sin.sin_family = AF_INET;
2521	EVDNS_LOCK(base);
2522	res = _evdns_nameserver_add_impl(base, (struct sockaddr*)&sin, sizeof(sin));
2523	EVDNS_UNLOCK(base);
2524	return res;
2525}
2526
2527int
2528evdns_nameserver_add(unsigned long int address) {
2529	if (!current_base)
2530		current_base = evdns_base_new(NULL, 0);
2531	return evdns_base_nameserver_add(current_base, address);
2532}
2533
2534static void
2535sockaddr_setport(struct sockaddr *sa, ev_uint16_t port)
2536{
2537	if (sa->sa_family == AF_INET) {
2538		((struct sockaddr_in *)sa)->sin_port = htons(port);
2539	} else if (sa->sa_family == AF_INET6) {
2540		((struct sockaddr_in6 *)sa)->sin6_port = htons(port);
2541	}
2542}
2543
2544static ev_uint16_t
2545sockaddr_getport(struct sockaddr *sa)
2546{
2547	if (sa->sa_family == AF_INET) {
2548		return ntohs(((struct sockaddr_in *)sa)->sin_port);
2549	} else if (sa->sa_family == AF_INET6) {
2550		return ntohs(((struct sockaddr_in6 *)sa)->sin6_port);
2551	} else {
2552		return 0;
2553	}
2554}
2555
2556/* exported function */
2557int
2558evdns_base_nameserver_ip_add(struct evdns_base *base, const char *ip_as_string) {
2559	struct sockaddr_storage ss;
2560	struct sockaddr *sa;
2561	int len = sizeof(ss);
2562	int res;
2563	if (evutil_parse_sockaddr_port(ip_as_string, (struct sockaddr *)&ss,
2564		&len)) {
2565		log(EVDNS_LOG_WARN, "Unable to parse nameserver address %s",
2566			ip_as_string);
2567		return 4;
2568	}
2569	sa = (struct sockaddr *) &ss;
2570	if (sockaddr_getport(sa) == 0)
2571		sockaddr_setport(sa, 53);
2572
2573	EVDNS_LOCK(base);
2574	res = _evdns_nameserver_add_impl(base, sa, len);
2575	EVDNS_UNLOCK(base);
2576	return res;
2577}
2578
2579int
2580evdns_nameserver_ip_add(const char *ip_as_string) {
2581	if (!current_base)
2582		current_base = evdns_base_new(NULL, 0);
2583	return evdns_base_nameserver_ip_add(current_base, ip_as_string);
2584}
2585
2586int
2587evdns_base_nameserver_sockaddr_add(struct evdns_base *base,
2588    const struct sockaddr *sa, ev_socklen_t len, unsigned flags)
2589{
2590	int res;
2591	EVUTIL_ASSERT(base);
2592	EVDNS_LOCK(base);
2593	res = _evdns_nameserver_add_impl(base, sa, len);
2594	EVDNS_UNLOCK(base);
2595	return res;
2596}
2597
2598/* remove from the queue */
2599static void
2600evdns_request_remove(struct request *req, struct request **head)
2601{
2602	ASSERT_LOCKED(req->base);
2603	ASSERT_VALID_REQUEST(req);
2604
2605#if 0
2606	{
2607		struct request *ptr;
2608		int found = 0;
2609		EVUTIL_ASSERT(*head != NULL);
2610
2611		ptr = *head;
2612		do {
2613			if (ptr == req) {
2614				found = 1;
2615				break;
2616			}
2617			ptr = ptr->next;
2618		} while (ptr != *head);
2619		EVUTIL_ASSERT(found);
2620
2621		EVUTIL_ASSERT(req->next);
2622	}
2623#endif
2624
2625	if (req->next == req) {
2626		/* only item in the list */
2627		*head = NULL;
2628	} else {
2629		req->next->prev = req->prev;
2630		req->prev->next = req->next;
2631		if (*head == req) *head = req->next;
2632	}
2633	req->next = req->prev = NULL;
2634}
2635
2636/* insert into the tail of the queue */
2637static void
2638evdns_request_insert(struct request *req, struct request **head) {
2639	ASSERT_LOCKED(req->base);
2640	ASSERT_VALID_REQUEST(req);
2641	if (!*head) {
2642		*head = req;
2643		req->next = req->prev = req;
2644		return;
2645	}
2646
2647	req->prev = (*head)->prev;
2648	req->prev->next = req;
2649	req->next = *head;
2650	(*head)->prev = req;
2651}
2652
2653static int
2654string_num_dots(const char *s) {
2655	int count = 0;
2656	while ((s = strchr(s, '.'))) {
2657		s++;
2658		count++;
2659	}
2660	return count;
2661}
2662
2663static struct request *
2664request_new(struct evdns_base *base, struct evdns_request *handle, int type,
2665	    const char *name, int flags, evdns_callback_type callback,
2666	    void *user_ptr) {
2667
2668	const char issuing_now =
2669	    (base->global_requests_inflight < base->global_max_requests_inflight) ? 1 : 0;
2670
2671	const size_t name_len = strlen(name);
2672	const size_t request_max_len = evdns_request_len(name_len);
2673	const u16 trans_id = issuing_now ? transaction_id_pick(base) : 0xffff;
2674	/* the request data is alloced in a single block with the header */
2675	struct request *const req =
2676	    mm_malloc(sizeof(struct request) + request_max_len);
2677	int rlen;
2678	char namebuf[256];
2679	(void) flags;
2680
2681	ASSERT_LOCKED(base);
2682
2683	if (!req) return NULL;
2684
2685	if (name_len >= sizeof(namebuf)) {
2686		mm_free(req);
2687		return NULL;
2688	}
2689
2690	memset(req, 0, sizeof(struct request));
2691	req->base = base;
2692
2693	evtimer_assign(&req->timeout_event, req->base->event_base, evdns_request_timeout_callback, req);
2694
2695	if (base->global_randomize_case) {
2696		unsigned i;
2697		char randbits[(sizeof(namebuf)+7)/8];
2698		strlcpy(namebuf, name, sizeof(namebuf));
2699		evutil_secure_rng_get_bytes(randbits, (name_len+7)/8);
2700		for (i = 0; i < name_len; ++i) {
2701			if (EVUTIL_ISALPHA(namebuf[i])) {
2702				if ((randbits[i >> 3] & (1<<(i & 7))))
2703					namebuf[i] |= 0x20;
2704				else
2705					namebuf[i] &= ~0x20;
2706			}
2707		}
2708		name = namebuf;
2709	}
2710
2711	/* request data lives just after the header */
2712	req->request = ((u8 *) req) + sizeof(struct request);
2713	/* denotes that the request data shouldn't be free()ed */
2714	req->request_appended = 1;
2715	rlen = evdns_request_data_build(name, name_len, trans_id,
2716	    type, CLASS_INET, req->request, request_max_len);
2717	if (rlen < 0)
2718		goto err1;
2719
2720	req->request_len = rlen;
2721	req->trans_id = trans_id;
2722	req->tx_count = 0;
2723	req->request_type = type;
2724	req->user_pointer = user_ptr;
2725	req->user_callback = callback;
2726	req->ns = issuing_now ? nameserver_pick(base) : NULL;
2727	req->next = req->prev = NULL;
2728	req->handle = handle;
2729	if (handle) {
2730		handle->current_req = req;
2731		handle->base = base;
2732	}
2733
2734	return req;
2735err1:
2736	mm_free(req);
2737	return NULL;
2738}
2739
2740static void
2741request_submit(struct request *const req) {
2742	struct evdns_base *base = req->base;
2743	ASSERT_LOCKED(base);
2744	ASSERT_VALID_REQUEST(req);
2745	if (req->ns) {
2746		/* if it has a nameserver assigned then this is going */
2747		/* straight into the inflight queue */
2748		evdns_request_insert(req, &REQ_HEAD(base, req->trans_id));
2749		base->global_requests_inflight++;
2750		evdns_request_transmit(req);
2751	} else {
2752		evdns_request_insert(req, &base->req_waiting_head);
2753		base->global_requests_waiting++;
2754	}
2755}
2756
2757/* exported function */
2758void
2759evdns_cancel_request(struct evdns_base *base, struct evdns_request *handle)
2760{
2761	struct request *req;
2762
2763	if (!handle->current_req)
2764		return;
2765
2766	if (!base) {
2767		/* This redundancy is silly; can we fix it? (Not for 2.0) XXXX */
2768		base = handle->base;
2769		if (!base)
2770			base = handle->current_req->base;
2771	}
2772
2773	EVDNS_LOCK(base);
2774	if (handle->pending_cb) {
2775		EVDNS_UNLOCK(base);
2776		return;
2777	}
2778
2779	req = handle->current_req;
2780	ASSERT_VALID_REQUEST(req);
2781
2782	reply_schedule_callback(req, 0, DNS_ERR_CANCEL, NULL);
2783	if (req->ns) {
2784		/* remove from inflight queue */
2785		request_finished(req, &REQ_HEAD(base, req->trans_id), 1);
2786	} else {
2787		/* remove from global_waiting head */
2788		request_finished(req, &base->req_waiting_head, 1);
2789	}
2790	EVDNS_UNLOCK(base);
2791}
2792
2793/* exported function */
2794struct evdns_request *
2795evdns_base_resolve_ipv4(struct evdns_base *base, const char *name, int flags,
2796    evdns_callback_type callback, void *ptr) {
2797	struct evdns_request *handle;
2798	struct request *req;
2799	log(EVDNS_LOG_DEBUG, "Resolve requested for %s", name);
2800	handle = mm_calloc(1, sizeof(*handle));
2801	if (handle == NULL)
2802		return NULL;
2803	EVDNS_LOCK(base);
2804	if (flags & DNS_QUERY_NO_SEARCH) {
2805		req =
2806			request_new(base, handle, TYPE_A, name, flags,
2807				    callback, ptr);
2808		if (req)
2809			request_submit(req);
2810	} else {
2811		search_request_new(base, handle, TYPE_A, name, flags,
2812		    callback, ptr);
2813	}
2814	if (handle->current_req == NULL) {
2815		mm_free(handle);
2816		handle = NULL;
2817	}
2818	EVDNS_UNLOCK(base);
2819	return handle;
2820}
2821
2822int evdns_resolve_ipv4(const char *name, int flags,
2823					   evdns_callback_type callback, void *ptr)
2824{
2825	return evdns_base_resolve_ipv4(current_base, name, flags, callback, ptr)
2826		? 0 : -1;
2827}
2828
2829
2830/* exported function */
2831struct evdns_request *
2832evdns_base_resolve_ipv6(struct evdns_base *base,
2833    const char *name, int flags,
2834    evdns_callback_type callback, void *ptr)
2835{
2836	struct evdns_request *handle;
2837	struct request *req;
2838	log(EVDNS_LOG_DEBUG, "Resolve requested for %s", name);
2839	handle = mm_calloc(1, sizeof(*handle));
2840	if (handle == NULL)
2841		return NULL;
2842	EVDNS_LOCK(base);
2843	if (flags & DNS_QUERY_NO_SEARCH) {
2844		req = request_new(base, handle, TYPE_AAAA, name, flags,
2845				  callback, ptr);
2846		if (req)
2847			request_submit(req);
2848	} else {
2849		search_request_new(base, handle, TYPE_AAAA, name, flags,
2850		    callback, ptr);
2851	}
2852	if (handle->current_req == NULL) {
2853		mm_free(handle);
2854		handle = NULL;
2855	}
2856	EVDNS_UNLOCK(base);
2857	return handle;
2858}
2859
2860int evdns_resolve_ipv6(const char *name, int flags,
2861    evdns_callback_type callback, void *ptr) {
2862	return evdns_base_resolve_ipv6(current_base, name, flags, callback, ptr)
2863		? 0 : -1;
2864}
2865
2866struct evdns_request *
2867evdns_base_resolve_reverse(struct evdns_base *base, const struct in_addr *in, int flags, evdns_callback_type callback, void *ptr) {
2868	char buf[32];
2869	struct evdns_request *handle;
2870	struct request *req;
2871	u32 a;
2872	EVUTIL_ASSERT(in);
2873	a = ntohl(in->s_addr);
2874	evutil_snprintf(buf, sizeof(buf), "%d.%d.%d.%d.in-addr.arpa",
2875			(int)(u8)((a	)&0xff),
2876			(int)(u8)((a>>8 )&0xff),
2877			(int)(u8)((a>>16)&0xff),
2878			(int)(u8)((a>>24)&0xff));
2879	handle = mm_calloc(1, sizeof(*handle));
2880	if (handle == NULL)
2881		return NULL;
2882	log(EVDNS_LOG_DEBUG, "Resolve requested for %s (reverse)", buf);
2883	EVDNS_LOCK(base);
2884	req = request_new(base, handle, TYPE_PTR, buf, flags, callback, ptr);
2885	if (req)
2886		request_submit(req);
2887	if (handle->current_req == NULL) {
2888		mm_free(handle);
2889		handle = NULL;
2890	}
2891	EVDNS_UNLOCK(base);
2892	return (handle);
2893}
2894
2895int evdns_resolve_reverse(const struct in_addr *in, int flags, evdns_callback_type callback, void *ptr) {
2896	return evdns_base_resolve_reverse(current_base, in, flags, callback, ptr)
2897		? 0 : -1;
2898}
2899
2900struct evdns_request *
2901evdns_base_resolve_reverse_ipv6(struct evdns_base *base, const struct in6_addr *in, int flags, evdns_callback_type callback, void *ptr) {
2902	/* 32 nybbles, 32 periods, "ip6.arpa", NUL. */
2903	char buf[73];
2904	char *cp;
2905	struct evdns_request *handle;
2906	struct request *req;
2907	int i;
2908	EVUTIL_ASSERT(in);
2909	cp = buf;
2910	for (i=15; i >= 0; --i) {
2911		u8 byte = in->s6_addr[i];
2912		*cp++ = "0123456789abcdef"[byte & 0x0f];
2913		*cp++ = '.';
2914		*cp++ = "0123456789abcdef"[byte >> 4];
2915		*cp++ = '.';
2916	}
2917	EVUTIL_ASSERT(cp + strlen("ip6.arpa") < buf+sizeof(buf));
2918	memcpy(cp, "ip6.arpa", strlen("ip6.arpa")+1);
2919	handle = mm_calloc(1, sizeof(*handle));
2920	if (handle == NULL)
2921		return NULL;
2922	log(EVDNS_LOG_DEBUG, "Resolve requested for %s (reverse)", buf);
2923	EVDNS_LOCK(base);
2924	req = request_new(base, handle, TYPE_PTR, buf, flags, callback, ptr);
2925	if (req)
2926		request_submit(req);
2927	if (handle->current_req == NULL) {
2928		mm_free(handle);
2929		handle = NULL;
2930	}
2931	EVDNS_UNLOCK(base);
2932	return (handle);
2933}
2934
2935int evdns_resolve_reverse_ipv6(const struct in6_addr *in, int flags, evdns_callback_type callback, void *ptr) {
2936	return evdns_base_resolve_reverse_ipv6(current_base, in, flags, callback, ptr)
2937		? 0 : -1;
2938}
2939
2940/* ================================================================= */
2941/* Search support */
2942/* */
2943/* the libc resolver has support for searching a number of domains */
2944/* to find a name. If nothing else then it takes the single domain */
2945/* from the gethostname() call. */
2946/* */
2947/* It can also be configured via the domain and search options in a */
2948/* resolv.conf. */
2949/* */
2950/* The ndots option controls how many dots it takes for the resolver */
2951/* to decide that a name is non-local and so try a raw lookup first. */
2952
2953struct search_domain {
2954	int len;
2955	struct search_domain *next;
2956	/* the text string is appended to this structure */
2957};
2958
2959struct search_state {
2960	int refcount;
2961	int ndots;
2962	int num_domains;
2963	struct search_domain *head;
2964};
2965
2966static void
2967search_state_decref(struct search_state *const state) {
2968	if (!state) return;
2969	state->refcount--;
2970	if (!state->refcount) {
2971		struct search_domain *next, *dom;
2972		for (dom = state->head; dom; dom = next) {
2973			next = dom->next;
2974			mm_free(dom);
2975		}
2976		mm_free(state);
2977	}
2978}
2979
2980static struct search_state *
2981search_state_new(void) {
2982	struct search_state *state = (struct search_state *) mm_malloc(sizeof(struct search_state));
2983	if (!state) return NULL;
2984	memset(state, 0, sizeof(struct search_state));
2985	state->refcount = 1;
2986	state->ndots = 1;
2987
2988	return state;
2989}
2990
2991static void
2992search_postfix_clear(struct evdns_base *base) {
2993	search_state_decref(base->global_search_state);
2994
2995	base->global_search_state = search_state_new();
2996}
2997
2998/* exported function */
2999void
3000evdns_base_search_clear(struct evdns_base *base)
3001{
3002	EVDNS_LOCK(base);
3003	search_postfix_clear(base);
3004	EVDNS_UNLOCK(base);
3005}
3006
3007void
3008evdns_search_clear(void) {
3009	evdns_base_search_clear(current_base);
3010}
3011
3012static void
3013search_postfix_add(struct evdns_base *base, const char *domain) {
3014	size_t domain_len;
3015	struct search_domain *sdomain;
3016	while (domain[0] == '.') domain++;
3017	domain_len = strlen(domain);
3018
3019	ASSERT_LOCKED(base);
3020	if (!base->global_search_state) base->global_search_state = search_state_new();
3021	if (!base->global_search_state) return;
3022	base->global_search_state->num_domains++;
3023
3024	sdomain = (struct search_domain *) mm_malloc(sizeof(struct search_domain) + domain_len);
3025	if (!sdomain) return;
3026	memcpy( ((u8 *) sdomain) + sizeof(struct search_domain), domain, domain_len);
3027	sdomain->next = base->global_search_state->head;
3028	sdomain->len = (int) domain_len;
3029
3030	base->global_search_state->head = sdomain;
3031}
3032
3033/* reverse the order of members in the postfix list. This is needed because, */
3034/* when parsing resolv.conf we push elements in the wrong order */
3035static void
3036search_reverse(struct evdns_base *base) {
3037	struct search_domain *cur, *prev = NULL, *next;
3038	ASSERT_LOCKED(base);
3039	cur = base->global_search_state->head;
3040	while (cur) {
3041		next = cur->next;
3042		cur->next = prev;
3043		prev = cur;
3044		cur = next;
3045	}
3046
3047	base->global_search_state->head = prev;
3048}
3049
3050/* exported function */
3051void
3052evdns_base_search_add(struct evdns_base *base, const char *domain) {
3053	EVDNS_LOCK(base);
3054	search_postfix_add(base, domain);
3055	EVDNS_UNLOCK(base);
3056}
3057void
3058evdns_search_add(const char *domain) {
3059	evdns_base_search_add(current_base, domain);
3060}
3061
3062/* exported function */
3063void
3064evdns_base_search_ndots_set(struct evdns_base *base, const int ndots) {
3065	EVDNS_LOCK(base);
3066	if (!base->global_search_state) base->global_search_state = search_state_new();
3067	if (base->global_search_state)
3068		base->global_search_state->ndots = ndots;
3069	EVDNS_UNLOCK(base);
3070}
3071void
3072evdns_search_ndots_set(const int ndots) {
3073	evdns_base_search_ndots_set(current_base, ndots);
3074}
3075
3076static void
3077search_set_from_hostname(struct evdns_base *base) {
3078	char hostname[HOST_NAME_MAX + 1], *domainname;
3079
3080	ASSERT_LOCKED(base);
3081	search_postfix_clear(base);
3082	if (gethostname(hostname, sizeof(hostname))) return;
3083	domainname = strchr(hostname, '.');
3084	if (!domainname) return;
3085	search_postfix_add(base, domainname);
3086}
3087
3088/* warning: returns malloced string */
3089static char *
3090search_make_new(const struct search_state *const state, int n, const char *const base_name) {
3091	const size_t base_len = strlen(base_name);
3092	const char need_to_append_dot = base_name[base_len - 1] == '.' ? 0 : 1;
3093	struct search_domain *dom;
3094
3095	for (dom = state->head; dom; dom = dom->next) {
3096		if (!n--) {
3097			/* this is the postfix we want */
3098			/* the actual postfix string is kept at the end of the structure */
3099			const u8 *const postfix = ((u8 *) dom) + sizeof(struct search_domain);
3100			const int postfix_len = dom->len;
3101			char *const newname = (char *) mm_malloc(base_len + need_to_append_dot + postfix_len + 1);
3102			if (!newname) return NULL;
3103			memcpy(newname, base_name, base_len);
3104			if (need_to_append_dot) newname[base_len] = '.';
3105			memcpy(newname + base_len + need_to_append_dot, postfix, postfix_len);
3106			newname[base_len + need_to_append_dot + postfix_len] = 0;
3107			return newname;
3108		}
3109	}
3110
3111	/* we ran off the end of the list and still didn't find the requested string */
3112	EVUTIL_ASSERT(0);
3113	return NULL; /* unreachable; stops warnings in some compilers. */
3114}
3115
3116static struct request *
3117search_request_new(struct evdns_base *base, struct evdns_request *handle,
3118		   int type, const char *const name, int flags,
3119		   evdns_callback_type user_callback, void *user_arg) {
3120	ASSERT_LOCKED(base);
3121	EVUTIL_ASSERT(type == TYPE_A || type == TYPE_AAAA);
3122	EVUTIL_ASSERT(handle->current_req == NULL);
3123	if ( ((flags & DNS_QUERY_NO_SEARCH) == 0) &&
3124	     base->global_search_state &&
3125		 base->global_search_state->num_domains) {
3126		/* we have some domains to search */
3127		struct request *req;
3128		if (string_num_dots(name) >= base->global_search_state->ndots) {
3129			req = request_new(base, handle, type, name, flags, user_callback, user_arg);
3130			if (!req) return NULL;
3131			handle->search_index = -1;
3132		} else {
3133			char *const new_name = search_make_new(base->global_search_state, 0, name);
3134			if (!new_name) return NULL;
3135			req = request_new(base, handle, type, new_name, flags, user_callback, user_arg);
3136			mm_free(new_name);
3137			if (!req) return NULL;
3138			handle->search_index = 0;
3139		}
3140		EVUTIL_ASSERT(handle->search_origname == NULL);
3141		handle->search_origname = mm_strdup(name);
3142		if (handle->search_origname == NULL) {
3143			/* XXX Should we dealloc req? If yes, how? */
3144			return NULL;
3145		}
3146		handle->search_state = base->global_search_state;
3147		handle->search_flags = flags;
3148		base->global_search_state->refcount++;
3149		request_submit(req);
3150		return req;
3151	} else {
3152		struct request *const req = request_new(base, handle, type, name, flags, user_callback, user_arg);
3153		if (!req) return NULL;
3154		request_submit(req);
3155		return req;
3156	}
3157}
3158
3159/* this is called when a request has failed to find a name. We need to check */
3160/* if it is part of a search and, if so, try the next name in the list */
3161/* returns: */
3162/*   0 another request has been submitted */
3163/*   1 no more requests needed */
3164static int
3165search_try_next(struct evdns_request *const handle) {
3166	struct request *req = handle->current_req;
3167	struct evdns_base *base = req->base;
3168	struct request *newreq;
3169	ASSERT_LOCKED(base);
3170	if (handle->search_state) {
3171		/* it is part of a search */
3172		char *new_name;
3173		handle->search_index++;
3174		if (handle->search_index >= handle->search_state->num_domains) {
3175			/* no more postfixes to try, however we may need to try */
3176			/* this name without a postfix */
3177			if (string_num_dots(handle->search_origname) < handle->search_state->ndots) {
3178				/* yep, we need to try it raw */
3179				newreq = request_new(base, NULL, req->request_type, handle->search_origname, handle->search_flags, req->user_callback, req->user_pointer);
3180				log(EVDNS_LOG_DEBUG, "Search: trying raw query %s", handle->search_origname);
3181				if (newreq) {
3182					search_request_finished(handle);
3183					goto submit_next;
3184				}
3185			}
3186			return 1;
3187		}
3188
3189		new_name = search_make_new(handle->search_state, handle->search_index, handle->search_origname);
3190		if (!new_name) return 1;
3191		log(EVDNS_LOG_DEBUG, "Search: now trying %s (%d)", new_name, handle->search_index);
3192		newreq = request_new(base, NULL, req->request_type, new_name, handle->search_flags, req->user_callback, req->user_pointer);
3193		mm_free(new_name);
3194		if (!newreq) return 1;
3195		goto submit_next;
3196	}
3197	return 1;
3198
3199submit_next:
3200	request_finished(req, &REQ_HEAD(req->base, req->trans_id), 0);
3201	handle->current_req = newreq;
3202	newreq->handle = handle;
3203	request_submit(newreq);
3204	return 0;
3205}
3206
3207static void
3208search_request_finished(struct evdns_request *const handle) {
3209	ASSERT_LOCKED(handle->current_req->base);
3210	if (handle->search_state) {
3211		search_state_decref(handle->search_state);
3212		handle->search_state = NULL;
3213	}
3214	if (handle->search_origname) {
3215		mm_free(handle->search_origname);
3216		handle->search_origname = NULL;
3217	}
3218}
3219
3220/* ================================================================= */
3221/* Parsing resolv.conf files */
3222
3223static void
3224evdns_resolv_set_defaults(struct evdns_base *base, int flags) {
3225	/* if the file isn't found then we assume a local resolver */
3226	ASSERT_LOCKED(base);
3227	if (flags & DNS_OPTION_SEARCH) search_set_from_hostname(base);
3228	if (flags & DNS_OPTION_NAMESERVERS) evdns_base_nameserver_ip_add(base,"127.0.0.1");
3229}
3230
3231#ifndef _EVENT_HAVE_STRTOK_R
3232static char *
3233strtok_r(char *s, const char *delim, char **state) {
3234	char *cp, *start;
3235	start = cp = s ? s : *state;
3236	if (!cp)
3237		return NULL;
3238	while (*cp && !strchr(delim, *cp))
3239		++cp;
3240	if (!*cp) {
3241		if (cp == start)
3242			return NULL;
3243		*state = NULL;
3244		return start;
3245	} else {
3246		*cp++ = '\0';
3247		*state = cp;
3248		return start;
3249	}
3250}
3251#endif
3252
3253/* helper version of atoi which returns -1 on error */
3254static int
3255strtoint(const char *const str)
3256{
3257	char *endptr;
3258	const int r = strtol(str, &endptr, 10);
3259	if (*endptr) return -1;
3260	return r;
3261}
3262
3263/* Parse a number of seconds into a timeval; return -1 on error. */
3264static int
3265strtotimeval(const char *const str, struct timeval *out)
3266{
3267	double d;
3268	char *endptr;
3269	d = strtod(str, &endptr);
3270	if (*endptr) return -1;
3271	if (d < 0) return -1;
3272	out->tv_sec = (int) d;
3273	out->tv_usec = (int) ((d - (int) d)*1000000);
3274	if (out->tv_sec == 0 && out->tv_usec < 1000) /* less than 1 msec */
3275		return -1;
3276	return 0;
3277}
3278
3279/* helper version of atoi that returns -1 on error and clips to bounds. */
3280static int
3281strtoint_clipped(const char *const str, int min, int max)
3282{
3283	int r = strtoint(str);
3284	if (r == -1)
3285		return r;
3286	else if (r<min)
3287		return min;
3288	else if (r>max)
3289		return max;
3290	else
3291		return r;
3292}
3293
3294static int
3295evdns_base_set_max_requests_inflight(struct evdns_base *base, int maxinflight)
3296{
3297	int old_n_heads = base->n_req_heads, n_heads;
3298	struct request **old_heads = base->req_heads, **new_heads, *req;
3299	int i;
3300
3301	ASSERT_LOCKED(base);
3302	if (maxinflight < 1)
3303		maxinflight = 1;
3304	n_heads = (maxinflight+4) / 5;
3305	EVUTIL_ASSERT(n_heads > 0);
3306	new_heads = mm_calloc(n_heads, sizeof(struct request*));
3307	if (!new_heads)
3308		return (-1);
3309	if (old_heads) {
3310		for (i = 0; i < old_n_heads; ++i) {
3311			while (old_heads[i]) {
3312				req = old_heads[i];
3313				evdns_request_remove(req, &old_heads[i]);
3314				evdns_request_insert(req, &new_heads[req->trans_id % n_heads]);
3315			}
3316		}
3317		mm_free(old_heads);
3318	}
3319	base->req_heads = new_heads;
3320	base->n_req_heads = n_heads;
3321	base->global_max_requests_inflight = maxinflight;
3322	return (0);
3323}
3324
3325/* exported function */
3326int
3327evdns_base_set_option(struct evdns_base *base,
3328    const char *option, const char *val)
3329{
3330	int res;
3331	EVDNS_LOCK(base);
3332	res = evdns_base_set_option_impl(base, option, val, DNS_OPTIONS_ALL);
3333	EVDNS_UNLOCK(base);
3334	return res;
3335}
3336
3337static inline int
3338str_matches_option(const char *s1, const char *optionname)
3339{
3340	/* Option names are given as "option:" We accept either 'option' in
3341	 * s1, or 'option:randomjunk'.  The latter form is to implement the
3342	 * resolv.conf parser. */
3343	size_t optlen = strlen(optionname);
3344	size_t slen = strlen(s1);
3345	if (slen == optlen || slen == optlen - 1)
3346		return !strncmp(s1, optionname, slen);
3347	else if (slen > optlen)
3348		return !strncmp(s1, optionname, optlen);
3349	else
3350		return 0;
3351}
3352
3353static int
3354evdns_base_set_option_impl(struct evdns_base *base,
3355    const char *option, const char *val, int flags)
3356{
3357	ASSERT_LOCKED(base);
3358	if (str_matches_option(option, "ndots:")) {
3359		const int ndots = strtoint(val);
3360		if (ndots == -1) return -1;
3361		if (!(flags & DNS_OPTION_SEARCH)) return 0;
3362		log(EVDNS_LOG_DEBUG, "Setting ndots to %d", ndots);
3363		if (!base->global_search_state) base->global_search_state = search_state_new();
3364		if (!base->global_search_state) return -1;
3365		base->global_search_state->ndots = ndots;
3366	} else if (str_matches_option(option, "timeout:")) {
3367		struct timeval tv;
3368		if (strtotimeval(val, &tv) == -1) return -1;
3369		if (!(flags & DNS_OPTION_MISC)) return 0;
3370		log(EVDNS_LOG_DEBUG, "Setting timeout to %s", val);
3371		memcpy(&base->global_timeout, &tv, sizeof(struct timeval));
3372	} else if (str_matches_option(option, "getaddrinfo-allow-skew:")) {
3373		struct timeval tv;
3374		if (strtotimeval(val, &tv) == -1) return -1;
3375		if (!(flags & DNS_OPTION_MISC)) return 0;
3376		log(EVDNS_LOG_DEBUG, "Setting getaddrinfo-allow-skew to %s",
3377		    val);
3378		memcpy(&base->global_getaddrinfo_allow_skew, &tv,
3379		    sizeof(struct timeval));
3380	} else if (str_matches_option(option, "max-timeouts:")) {
3381		const int maxtimeout = strtoint_clipped(val, 1, 255);
3382		if (maxtimeout == -1) return -1;
3383		if (!(flags & DNS_OPTION_MISC)) return 0;
3384		log(EVDNS_LOG_DEBUG, "Setting maximum allowed timeouts to %d",
3385			maxtimeout);
3386		base->global_max_nameserver_timeout = maxtimeout;
3387	} else if (str_matches_option(option, "max-inflight:")) {
3388		const int maxinflight = strtoint_clipped(val, 1, 65000);
3389		if (maxinflight == -1) return -1;
3390		if (!(flags & DNS_OPTION_MISC)) return 0;
3391		log(EVDNS_LOG_DEBUG, "Setting maximum inflight requests to %d",
3392			maxinflight);
3393		evdns_base_set_max_requests_inflight(base, maxinflight);
3394	} else if (str_matches_option(option, "attempts:")) {
3395		int retries = strtoint(val);
3396		if (retries == -1) return -1;
3397		if (retries > 255) retries = 255;
3398		if (!(flags & DNS_OPTION_MISC)) return 0;
3399		log(EVDNS_LOG_DEBUG, "Setting retries to %d", retries);
3400		base->global_max_retransmits = retries;
3401	} else if (str_matches_option(option, "randomize-case:")) {
3402		int randcase = strtoint(val);
3403		if (!(flags & DNS_OPTION_MISC)) return 0;
3404		base->global_randomize_case = randcase;
3405	} else if (str_matches_option(option, "bind-to:")) {
3406		/* XXX This only applies to successive nameservers, not
3407		 * to already-configured ones.	We might want to fix that. */
3408		int len = sizeof(base->global_outgoing_address);
3409		if (!(flags & DNS_OPTION_NAMESERVERS)) return 0;
3410		if (evutil_parse_sockaddr_port(val,
3411			(struct sockaddr*)&base->global_outgoing_address, &len))
3412			return -1;
3413		base->global_outgoing_addrlen = len;
3414	} else if (str_matches_option(option, "initial-probe-timeout:")) {
3415		struct timeval tv;
3416		if (strtotimeval(val, &tv) == -1) return -1;
3417		if (tv.tv_sec > 3600)
3418			tv.tv_sec = 3600;
3419		if (!(flags & DNS_OPTION_MISC)) return 0;
3420		log(EVDNS_LOG_DEBUG, "Setting initial probe timeout to %s",
3421		    val);
3422		memcpy(&base->global_nameserver_probe_initial_timeout, &tv,
3423		    sizeof(tv));
3424	}
3425	return 0;
3426}
3427
3428int
3429evdns_set_option(const char *option, const char *val, int flags)
3430{
3431	if (!current_base)
3432		current_base = evdns_base_new(NULL, 0);
3433	return evdns_base_set_option(current_base, option, val);
3434}
3435
3436static void
3437resolv_conf_parse_line(struct evdns_base *base, char *const start, int flags) {
3438	char *strtok_state;
3439	static const char *const delims = " \t";
3440#define NEXT_TOKEN strtok_r(NULL, delims, &strtok_state)
3441
3442
3443	char *const first_token = strtok_r(start, delims, &strtok_state);
3444	ASSERT_LOCKED(base);
3445	if (!first_token) return;
3446
3447	if (!strcmp(first_token, "nameserver") && (flags & DNS_OPTION_NAMESERVERS)) {
3448		const char *const nameserver = NEXT_TOKEN;
3449
3450		if (nameserver)
3451			evdns_base_nameserver_ip_add(base, nameserver);
3452	} else if (!strcmp(first_token, "domain") && (flags & DNS_OPTION_SEARCH)) {
3453		const char *const domain = NEXT_TOKEN;
3454		if (domain) {
3455			search_postfix_clear(base);
3456			search_postfix_add(base, domain);
3457		}
3458	} else if (!strcmp(first_token, "search") && (flags & DNS_OPTION_SEARCH)) {
3459		const char *domain;
3460		search_postfix_clear(base);
3461
3462		while ((domain = NEXT_TOKEN)) {
3463			search_postfix_add(base, domain);
3464		}
3465		search_reverse(base);
3466	} else if (!strcmp(first_token, "options")) {
3467		const char *option;
3468		while ((option = NEXT_TOKEN)) {
3469			const char *val = strchr(option, ':');
3470			evdns_base_set_option_impl(base, option, val ? val+1 : "", flags);
3471		}
3472	}
3473#undef NEXT_TOKEN
3474}
3475
3476/* exported function */
3477/* returns: */
3478/*   0 no errors */
3479/*   1 failed to open file */
3480/*   2 failed to stat file */
3481/*   3 file too large */
3482/*   4 out of memory */
3483/*   5 short read from file */
3484int
3485evdns_base_resolv_conf_parse(struct evdns_base *base, int flags, const char *const filename) {
3486	int res;
3487	EVDNS_LOCK(base);
3488	res = evdns_base_resolv_conf_parse_impl(base, flags, filename);
3489	EVDNS_UNLOCK(base);
3490	return res;
3491}
3492
3493static char *
3494evdns_get_default_hosts_filename(void)
3495{
3496#ifdef WIN32
3497	/* Windows is a little coy about where it puts its configuration
3498	 * files.  Sure, they're _usually_ in C:\windows\system32, but
3499	 * there's no reason in principle they couldn't be in
3500	 * W:\hoboken chicken emergency\
3501	 */
3502	char path[MAX_PATH+1];
3503	static const char hostfile[] = "\\drivers\\etc\\hosts";
3504	char *path_out;
3505	size_t len_out;
3506
3507	if (! SHGetSpecialFolderPathA(NULL, path, CSIDL_SYSTEM, 0))
3508		return NULL;
3509	len_out = strlen(path)+strlen(hostfile);
3510	path_out = mm_malloc(len_out+1);
3511	evutil_snprintf(path_out, len_out, "%s%s", path, hostfile);
3512	return path_out;
3513#else
3514	return mm_strdup("/etc/hosts");
3515#endif
3516}
3517
3518static int
3519evdns_base_resolv_conf_parse_impl(struct evdns_base *base, int flags, const char *const filename) {
3520	size_t n;
3521	char *resolv;
3522	char *start;
3523	int err = 0;
3524
3525	log(EVDNS_LOG_DEBUG, "Parsing resolv.conf file %s", filename);
3526
3527	if (flags & DNS_OPTION_HOSTSFILE) {
3528		char *fname = evdns_get_default_hosts_filename();
3529		evdns_base_load_hosts(base, fname);
3530		if (fname)
3531			mm_free(fname);
3532	}
3533
3534	if ((err = evutil_read_file(filename, &resolv, &n, 0)) < 0) {
3535		if (err == -1) {
3536			/* No file. */
3537			evdns_resolv_set_defaults(base, flags);
3538			return 1;
3539		} else {
3540			return 2;
3541		}
3542	}
3543
3544	start = resolv;
3545	for (;;) {
3546		char *const newline = strchr(start, '\n');
3547		if (!newline) {
3548			resolv_conf_parse_line(base, start, flags);
3549			break;
3550		} else {
3551			*newline = 0;
3552			resolv_conf_parse_line(base, start, flags);
3553			start = newline + 1;
3554		}
3555	}
3556
3557	if (!base->server_head && (flags & DNS_OPTION_NAMESERVERS)) {
3558		/* no nameservers were configured. */
3559		evdns_base_nameserver_ip_add(base, "127.0.0.1");
3560		err = 6;
3561	}
3562	if (flags & DNS_OPTION_SEARCH && (!base->global_search_state || base->global_search_state->num_domains == 0)) {
3563		search_set_from_hostname(base);
3564	}
3565
3566	mm_free(resolv);
3567	return err;
3568}
3569
3570int
3571evdns_resolv_conf_parse(int flags, const char *const filename) {
3572	if (!current_base)
3573		current_base = evdns_base_new(NULL, 0);
3574	return evdns_base_resolv_conf_parse(current_base, flags, filename);
3575}
3576
3577
3578#ifdef WIN32
3579/* Add multiple nameservers from a space-or-comma-separated list. */
3580static int
3581evdns_nameserver_ip_add_line(struct evdns_base *base, const char *ips) {
3582	const char *addr;
3583	char *buf;
3584	int r;
3585	ASSERT_LOCKED(base);
3586	while (*ips) {
3587		while (isspace(*ips) || *ips == ',' || *ips == '\t')
3588			++ips;
3589		addr = ips;
3590		while (isdigit(*ips) || *ips == '.' || *ips == ':' ||
3591		    *ips=='[' || *ips==']')
3592			++ips;
3593		buf = mm_malloc(ips-addr+1);
3594		if (!buf) return 4;
3595		memcpy(buf, addr, ips-addr);
3596		buf[ips-addr] = '\0';
3597		r = evdns_base_nameserver_ip_add(base, buf);
3598		mm_free(buf);
3599		if (r) return r;
3600	}
3601	return 0;
3602}
3603
3604typedef DWORD(WINAPI *GetNetworkParams_fn_t)(FIXED_INFO *, DWORD*);
3605
3606/* Use the windows GetNetworkParams interface in iphlpapi.dll to */
3607/* figure out what our nameservers are. */
3608static int
3609load_nameservers_with_getnetworkparams(struct evdns_base *base)
3610{
3611	/* Based on MSDN examples and inspection of  c-ares code. */
3612	FIXED_INFO *fixed;
3613	HMODULE handle = 0;
3614	ULONG size = sizeof(FIXED_INFO);
3615	void *buf = NULL;
3616	int status = 0, r, added_any;
3617	IP_ADDR_STRING *ns;
3618	GetNetworkParams_fn_t fn;
3619
3620	ASSERT_LOCKED(base);
3621	if (!(handle = evutil_load_windows_system_library(
3622			TEXT("iphlpapi.dll")))) {
3623		log(EVDNS_LOG_WARN, "Could not open iphlpapi.dll");
3624		status = -1;
3625		goto done;
3626	}
3627	if (!(fn = (GetNetworkParams_fn_t) GetProcAddress(handle, "GetNetworkParams"))) {
3628		log(EVDNS_LOG_WARN, "Could not get address of function.");
3629		status = -1;
3630		goto done;
3631	}
3632
3633	buf = mm_malloc(size);
3634	if (!buf) { status = 4; goto done; }
3635	fixed = buf;
3636	r = fn(fixed, &size);
3637	if (r != ERROR_SUCCESS && r != ERROR_BUFFER_OVERFLOW) {
3638		status = -1;
3639		goto done;
3640	}
3641	if (r != ERROR_SUCCESS) {
3642		mm_free(buf);
3643		buf = mm_malloc(size);
3644		if (!buf) { status = 4; goto done; }
3645		fixed = buf;
3646		r = fn(fixed, &size);
3647		if (r != ERROR_SUCCESS) {
3648			log(EVDNS_LOG_DEBUG, "fn() failed.");
3649			status = -1;
3650			goto done;
3651		}
3652	}
3653
3654	EVUTIL_ASSERT(fixed);
3655	added_any = 0;
3656	ns = &(fixed->DnsServerList);
3657	while (ns) {
3658		r = evdns_nameserver_ip_add_line(base, ns->IpAddress.String);
3659		if (r) {
3660			log(EVDNS_LOG_DEBUG,"Could not add nameserver %s to list,error: %d",
3661				(ns->IpAddress.String),(int)GetLastError());
3662			status = r;
3663		} else {
3664			++added_any;
3665			log(EVDNS_LOG_DEBUG,"Successfully added %s as nameserver",ns->IpAddress.String);
3666		}
3667
3668		ns = ns->Next;
3669	}
3670
3671	if (!added_any) {
3672		log(EVDNS_LOG_DEBUG, "No nameservers added.");
3673		if (status == 0)
3674			status = -1;
3675	} else {
3676		status = 0;
3677	}
3678
3679 done:
3680	if (buf)
3681		mm_free(buf);
3682	if (handle)
3683		FreeLibrary(handle);
3684	return status;
3685}
3686
3687static int
3688config_nameserver_from_reg_key(struct evdns_base *base, HKEY key, const TCHAR *subkey)
3689{
3690	char *buf;
3691	DWORD bufsz = 0, type = 0;
3692	int status = 0;
3693
3694	ASSERT_LOCKED(base);
3695	if (RegQueryValueEx(key, subkey, 0, &type, NULL, &bufsz)
3696	    != ERROR_MORE_DATA)
3697		return -1;
3698	if (!(buf = mm_malloc(bufsz)))
3699		return -1;
3700
3701	if (RegQueryValueEx(key, subkey, 0, &type, (LPBYTE)buf, &bufsz)
3702	    == ERROR_SUCCESS && bufsz > 1) {
3703		status = evdns_nameserver_ip_add_line(base,buf);
3704	}
3705
3706	mm_free(buf);
3707	return status;
3708}
3709
3710#define SERVICES_KEY TEXT("System\\CurrentControlSet\\Services\\")
3711#define WIN_NS_9X_KEY  SERVICES_KEY TEXT("VxD\\MSTCP")
3712#define WIN_NS_NT_KEY  SERVICES_KEY TEXT("Tcpip\\Parameters")
3713
3714static int
3715load_nameservers_from_registry(struct evdns_base *base)
3716{
3717	int found = 0;
3718	int r;
3719#define TRY(k, name) \
3720	if (!found && config_nameserver_from_reg_key(base,k,TEXT(name)) == 0) { \
3721		log(EVDNS_LOG_DEBUG,"Found nameservers in %s/%s",#k,name); \
3722		found = 1;						\
3723	} else if (!found) {						\
3724		log(EVDNS_LOG_DEBUG,"Didn't find nameservers in %s/%s", \
3725		    #k,#name);						\
3726	}
3727
3728	ASSERT_LOCKED(base);
3729
3730	if (((int)GetVersion()) > 0) { /* NT */
3731		HKEY nt_key = 0, interfaces_key = 0;
3732
3733		if (RegOpenKeyEx(HKEY_LOCAL_MACHINE, WIN_NS_NT_KEY, 0,
3734				 KEY_READ, &nt_key) != ERROR_SUCCESS) {
3735			log(EVDNS_LOG_DEBUG,"Couldn't open nt key, %d",(int)GetLastError());
3736			return -1;
3737		}
3738		r = RegOpenKeyEx(nt_key, TEXT("Interfaces"), 0,
3739			     KEY_QUERY_VALUE|KEY_ENUMERATE_SUB_KEYS,
3740			     &interfaces_key);
3741		if (r != ERROR_SUCCESS) {
3742			log(EVDNS_LOG_DEBUG,"Couldn't open interfaces key, %d",(int)GetLastError());
3743			return -1;
3744		}
3745		TRY(nt_key, "NameServer");
3746		TRY(nt_key, "DhcpNameServer");
3747		TRY(interfaces_key, "NameServer");
3748		TRY(interfaces_key, "DhcpNameServer");
3749		RegCloseKey(interfaces_key);
3750		RegCloseKey(nt_key);
3751	} else {
3752		HKEY win_key = 0;
3753		if (RegOpenKeyEx(HKEY_LOCAL_MACHINE, WIN_NS_9X_KEY, 0,
3754				 KEY_READ, &win_key) != ERROR_SUCCESS) {
3755			log(EVDNS_LOG_DEBUG, "Couldn't open registry key, %d", (int)GetLastError());
3756			return -1;
3757		}
3758		TRY(win_key, "NameServer");
3759		RegCloseKey(win_key);
3760	}
3761
3762	if (found == 0) {
3763		log(EVDNS_LOG_WARN,"Didn't find any nameservers.");
3764	}
3765
3766	return found ? 0 : -1;
3767#undef TRY
3768}
3769
3770int
3771evdns_base_config_windows_nameservers(struct evdns_base *base)
3772{
3773	int r;
3774	char *fname;
3775	if (base == NULL)
3776		base = current_base;
3777	if (base == NULL)
3778		return -1;
3779	EVDNS_LOCK(base);
3780	if (load_nameservers_with_getnetworkparams(base) == 0) {
3781		EVDNS_UNLOCK(base);
3782		return 0;
3783	}
3784	r = load_nameservers_from_registry(base);
3785
3786	fname = evdns_get_default_hosts_filename();
3787	evdns_base_load_hosts(base, fname);
3788	if (fname)
3789		mm_free(fname);
3790
3791	EVDNS_UNLOCK(base);
3792	return r;
3793}
3794
3795int
3796evdns_config_windows_nameservers(void)
3797{
3798	if (!current_base) {
3799		current_base = evdns_base_new(NULL, 1);
3800		return current_base == NULL ? -1 : 0;
3801	} else {
3802		return evdns_base_config_windows_nameservers(current_base);
3803	}
3804}
3805#endif
3806
3807struct evdns_base *
3808evdns_base_new(struct event_base *event_base, int initialize_nameservers)
3809{
3810	struct evdns_base *base;
3811
3812	if (evutil_secure_rng_init() < 0) {
3813		log(EVDNS_LOG_WARN, "Unable to seed random number generator; "
3814		    "DNS can't run.");
3815		return NULL;
3816	}
3817
3818	/* Give the evutil library a hook into its evdns-enabled
3819	 * functionality.  We can't just call evdns_getaddrinfo directly or
3820	 * else libevent-core will depend on libevent-extras. */
3821	evutil_set_evdns_getaddrinfo_fn(evdns_getaddrinfo);
3822
3823	base = mm_malloc(sizeof(struct evdns_base));
3824	if (base == NULL)
3825		return (NULL);
3826	memset(base, 0, sizeof(struct evdns_base));
3827	base->req_waiting_head = NULL;
3828
3829	EVTHREAD_ALLOC_LOCK(base->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
3830	EVDNS_LOCK(base);
3831
3832	/* Set max requests inflight and allocate req_heads. */
3833	base->req_heads = NULL;
3834
3835	evdns_base_set_max_requests_inflight(base, 64);
3836
3837	base->server_head = NULL;
3838	base->event_base = event_base;
3839	base->global_good_nameservers = base->global_requests_inflight =
3840		base->global_requests_waiting = 0;
3841
3842	base->global_timeout.tv_sec = 5;
3843	base->global_timeout.tv_usec = 0;
3844	base->global_max_reissues = 1;
3845	base->global_max_retransmits = 3;
3846	base->global_max_nameserver_timeout = 3;
3847	base->global_search_state = NULL;
3848	base->global_randomize_case = 1;
3849	base->global_getaddrinfo_allow_skew.tv_sec = 3;
3850	base->global_getaddrinfo_allow_skew.tv_usec = 0;
3851	base->global_nameserver_probe_initial_timeout.tv_sec = 10;
3852	base->global_nameserver_probe_initial_timeout.tv_usec = 0;
3853
3854	TAILQ_INIT(&base->hostsdb);
3855
3856	if (initialize_nameservers) {
3857		int r;
3858#ifdef WIN32
3859		r = evdns_base_config_windows_nameservers(base);
3860#else
3861		r = evdns_base_resolv_conf_parse(base, DNS_OPTIONS_ALL, "/etc/resolv.conf");
3862#endif
3863		if (r == -1) {
3864			evdns_base_free_and_unlock(base, 0);
3865			return NULL;
3866		}
3867	}
3868	EVDNS_UNLOCK(base);
3869	return base;
3870}
3871
3872int
3873evdns_init(void)
3874{
3875	struct evdns_base *base = evdns_base_new(NULL, 1);
3876	if (base) {
3877		current_base = base;
3878		return 0;
3879	} else {
3880		return -1;
3881	}
3882}
3883
3884const char *
3885evdns_err_to_string(int err)
3886{
3887    switch (err) {
3888	case DNS_ERR_NONE: return "no error";
3889	case DNS_ERR_FORMAT: return "misformatted query";
3890	case DNS_ERR_SERVERFAILED: return "server failed";
3891	case DNS_ERR_NOTEXIST: return "name does not exist";
3892	case DNS_ERR_NOTIMPL: return "query not implemented";
3893	case DNS_ERR_REFUSED: return "refused";
3894
3895	case DNS_ERR_TRUNCATED: return "reply truncated or ill-formed";
3896	case DNS_ERR_UNKNOWN: return "unknown";
3897	case DNS_ERR_TIMEOUT: return "request timed out";
3898	case DNS_ERR_SHUTDOWN: return "dns subsystem shut down";
3899	case DNS_ERR_CANCEL: return "dns request canceled";
3900	case DNS_ERR_NODATA: return "no records in the reply";
3901	default: return "[Unknown error code]";
3902    }
3903}
3904
3905static void
3906evdns_nameserver_free(struct nameserver *server)
3907{
3908	if (server->socket >= 0)
3909	evutil_closesocket(server->socket);
3910	(void) event_del(&server->event);
3911	event_debug_unassign(&server->event);
3912	if (server->state == 0)
3913		(void) event_del(&server->timeout_event);
3914	event_debug_unassign(&server->timeout_event);
3915	mm_free(server);
3916}
3917
3918static void
3919evdns_base_free_and_unlock(struct evdns_base *base, int fail_requests)
3920{
3921	struct nameserver *server, *server_next;
3922	struct search_domain *dom, *dom_next;
3923	int i;
3924
3925	/* Requires that we hold the lock. */
3926
3927	/* TODO(nickm) we might need to refcount here. */
3928
3929	for (i = 0; i < base->n_req_heads; ++i) {
3930		while (base->req_heads[i]) {
3931			if (fail_requests)
3932				reply_schedule_callback(base->req_heads[i], 0, DNS_ERR_SHUTDOWN, NULL);
3933			request_finished(base->req_heads[i], &REQ_HEAD(base, base->req_heads[i]->trans_id), 1);
3934		}
3935	}
3936	while (base->req_waiting_head) {
3937		if (fail_requests)
3938			reply_schedule_callback(base->req_waiting_head, 0, DNS_ERR_SHUTDOWN, NULL);
3939		request_finished(base->req_waiting_head, &base->req_waiting_head, 1);
3940	}
3941	base->global_requests_inflight = base->global_requests_waiting = 0;
3942
3943	for (server = base->server_head; server; server = server_next) {
3944		server_next = server->next;
3945		evdns_nameserver_free(server);
3946		if (server_next == base->server_head)
3947			break;
3948	}
3949	base->server_head = NULL;
3950	base->global_good_nameservers = 0;
3951
3952	if (base->global_search_state) {
3953		for (dom = base->global_search_state->head; dom; dom = dom_next) {
3954			dom_next = dom->next;
3955			mm_free(dom);
3956		}
3957		mm_free(base->global_search_state);
3958		base->global_search_state = NULL;
3959	}
3960
3961	{
3962		struct hosts_entry *victim;
3963		while ((victim = TAILQ_FIRST(&base->hostsdb))) {
3964			TAILQ_REMOVE(&base->hostsdb, victim, next);
3965			mm_free(victim);
3966		}
3967	}
3968
3969	mm_free(base->req_heads);
3970
3971	EVDNS_UNLOCK(base);
3972	EVTHREAD_FREE_LOCK(base->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
3973
3974	mm_free(base);
3975}
3976
3977void
3978evdns_base_free(struct evdns_base *base, int fail_requests)
3979{
3980	EVDNS_LOCK(base);
3981	evdns_base_free_and_unlock(base, fail_requests);
3982}
3983
3984void
3985evdns_shutdown(int fail_requests)
3986{
3987	if (current_base) {
3988		struct evdns_base *b = current_base;
3989		current_base = NULL;
3990		evdns_base_free(b, fail_requests);
3991	}
3992	evdns_log_fn = NULL;
3993}
3994
3995static int
3996evdns_base_parse_hosts_line(struct evdns_base *base, char *line)
3997{
3998	char *strtok_state;
3999	static const char *const delims = " \t";
4000	char *const addr = strtok_r(line, delims, &strtok_state);
4001	char *hostname, *hash;
4002	struct sockaddr_storage ss;
4003	int socklen = sizeof(ss);
4004	ASSERT_LOCKED(base);
4005
4006#define NEXT_TOKEN strtok_r(NULL, delims, &strtok_state)
4007
4008	if (!addr || *addr == '#')
4009		return 0;
4010
4011	memset(&ss, 0, sizeof(ss));
4012	if (evutil_parse_sockaddr_port(addr, (struct sockaddr*)&ss, &socklen)<0)
4013		return -1;
4014	if (socklen > (int)sizeof(struct sockaddr_in6))
4015		return -1;
4016
4017	if (sockaddr_getport((struct sockaddr*)&ss))
4018		return -1;
4019
4020	while ((hostname = NEXT_TOKEN)) {
4021		struct hosts_entry *he;
4022		size_t namelen;
4023		if ((hash = strchr(hostname, '#'))) {
4024			if (hash == hostname)
4025				return 0;
4026			*hash = '\0';
4027		}
4028
4029		namelen = strlen(hostname);
4030
4031		he = mm_calloc(1, sizeof(struct hosts_entry)+namelen);
4032		if (!he)
4033			return -1;
4034		EVUTIL_ASSERT(socklen <= (int)sizeof(he->addr));
4035		memcpy(&he->addr, &ss, socklen);
4036		memcpy(he->hostname, hostname, namelen+1);
4037		he->addrlen = socklen;
4038
4039		TAILQ_INSERT_TAIL(&base->hostsdb, he, next);
4040
4041		if (hash)
4042			return 0;
4043	}
4044
4045	return 0;
4046#undef NEXT_TOKEN
4047}
4048
4049static int
4050evdns_base_load_hosts_impl(struct evdns_base *base, const char *hosts_fname)
4051{
4052	char *str=NULL, *cp, *eol;
4053	size_t len;
4054	int err=0;
4055
4056	ASSERT_LOCKED(base);
4057
4058	if (hosts_fname == NULL ||
4059	    (err = evutil_read_file(hosts_fname, &str, &len, 0)) < 0) {
4060		char tmp[64];
4061		strlcpy(tmp, "127.0.0.1   localhost", sizeof(tmp));
4062		evdns_base_parse_hosts_line(base, tmp);
4063		strlcpy(tmp, "::1   localhost", sizeof(tmp));
4064		evdns_base_parse_hosts_line(base, tmp);
4065		return err ? -1 : 0;
4066	}
4067
4068	/* This will break early if there is a NUL in the hosts file.
4069	 * Probably not a problem.*/
4070	cp = str;
4071	for (;;) {
4072		eol = strchr(cp, '\n');
4073
4074		if (eol) {
4075			*eol = '\0';
4076			evdns_base_parse_hosts_line(base, cp);
4077			cp = eol+1;
4078		} else {
4079			evdns_base_parse_hosts_line(base, cp);
4080			break;
4081		}
4082	}
4083
4084	mm_free(str);
4085	return 0;
4086}
4087
4088int
4089evdns_base_load_hosts(struct evdns_base *base, const char *hosts_fname)
4090{
4091	int res;
4092	if (!base)
4093		base = current_base;
4094	EVDNS_LOCK(base);
4095	res = evdns_base_load_hosts_impl(base, hosts_fname);
4096	EVDNS_UNLOCK(base);
4097	return res;
4098}
4099
4100/* A single request for a getaddrinfo, either v4 or v6. */
4101struct getaddrinfo_subrequest {
4102	struct evdns_request *r;
4103	ev_uint32_t type;
4104};
4105
4106/* State data used to implement an in-progress getaddrinfo. */
4107struct evdns_getaddrinfo_request {
4108	struct evdns_base *evdns_base;
4109	/* Copy of the modified 'hints' data that we'll use to build
4110	 * answers. */
4111	struct evutil_addrinfo hints;
4112	/* The callback to invoke when we're done */
4113	evdns_getaddrinfo_cb user_cb;
4114	/* User-supplied data to give to the callback. */
4115	void *user_data;
4116	/* The port to use when building sockaddrs. */
4117	ev_uint16_t port;
4118	/* The sub_request for an A record (if any) */
4119	struct getaddrinfo_subrequest ipv4_request;
4120	/* The sub_request for an AAAA record (if any) */
4121	struct getaddrinfo_subrequest ipv6_request;
4122
4123	/* The cname result that we were told (if any) */
4124	char *cname_result;
4125
4126	/* If we have one request answered and one request still inflight,
4127	 * then this field holds the answer from the first request... */
4128	struct evutil_addrinfo *pending_result;
4129	/* And this event is a timeout that will tell us to cancel the second
4130	 * request if it's taking a long time. */
4131	struct event timeout;
4132
4133	/* And this field holds the error code from the first request... */
4134	int pending_error;
4135	/* If this is set, the user canceled this request. */
4136	unsigned user_canceled : 1;
4137	/* If this is set, the user can no longer cancel this request; we're
4138	 * just waiting for the free. */
4139	unsigned request_done : 1;
4140};
4141
4142/* Convert an evdns errors to the equivalent getaddrinfo error. */
4143static int
4144evdns_err_to_getaddrinfo_err(int e1)
4145{
4146	/* XXX Do this better! */
4147	if (e1 == DNS_ERR_NONE)
4148		return 0;
4149	else if (e1 == DNS_ERR_NOTEXIST)
4150		return EVUTIL_EAI_NONAME;
4151	else
4152		return EVUTIL_EAI_FAIL;
4153}
4154
4155/* Return the more informative of two getaddrinfo errors. */
4156static int
4157getaddrinfo_merge_err(int e1, int e2)
4158{
4159	/* XXXX be cleverer here. */
4160	if (e1 == 0)
4161		return e2;
4162	else
4163		return e1;
4164}
4165
4166static void
4167free_getaddrinfo_request(struct evdns_getaddrinfo_request *data)
4168{
4169	/* DO NOT CALL this if either of the requests is pending.  Only once
4170	 * both callbacks have been invoked is it safe to free the request */
4171	if (data->pending_result)
4172		evutil_freeaddrinfo(data->pending_result);
4173	if (data->cname_result)
4174		mm_free(data->cname_result);
4175	event_del(&data->timeout);
4176	mm_free(data);
4177	return;
4178}
4179
4180static void
4181add_cname_to_reply(struct evdns_getaddrinfo_request *data,
4182    struct evutil_addrinfo *ai)
4183{
4184	if (data->cname_result && ai) {
4185		ai->ai_canonname = data->cname_result;
4186		data->cname_result = NULL;
4187	}
4188}
4189
4190/* Callback: invoked when one request in a mixed-format A/AAAA getaddrinfo
4191 * request has finished, but the other one took too long to answer. Pass
4192 * along the answer we got, and cancel the other request.
4193 */
4194static void
4195evdns_getaddrinfo_timeout_cb(evutil_socket_t fd, short what, void *ptr)
4196{
4197	int v4_timedout = 0, v6_timedout = 0;
4198	struct evdns_getaddrinfo_request *data = ptr;
4199
4200	/* Cancel any pending requests, and note which one */
4201	if (data->ipv4_request.r) {
4202		evdns_cancel_request(NULL, data->ipv4_request.r);
4203		v4_timedout = 1;
4204		EVDNS_LOCK(data->evdns_base);
4205		++data->evdns_base->getaddrinfo_ipv4_timeouts;
4206		EVDNS_UNLOCK(data->evdns_base);
4207	}
4208	if (data->ipv6_request.r) {
4209		evdns_cancel_request(NULL, data->ipv6_request.r);
4210		v6_timedout = 1;
4211		EVDNS_LOCK(data->evdns_base);
4212		++data->evdns_base->getaddrinfo_ipv6_timeouts;
4213		EVDNS_UNLOCK(data->evdns_base);
4214	}
4215
4216	/* We only use this timeout callback when we have an answer for
4217	 * one address. */
4218	EVUTIL_ASSERT(!v4_timedout || !v6_timedout);
4219
4220	/* Report the outcome of the other request that didn't time out. */
4221	if (data->pending_result) {
4222		add_cname_to_reply(data, data->pending_result);
4223		data->user_cb(0, data->pending_result, data->user_data);
4224		data->pending_result = NULL;
4225	} else {
4226		int e = data->pending_error;
4227		if (!e)
4228			e = EVUTIL_EAI_AGAIN;
4229		data->user_cb(e, NULL, data->user_data);
4230	}
4231
4232	if (!v4_timedout && !v6_timedout) {
4233		/* should be impossible? XXXX */
4234		free_getaddrinfo_request(data);
4235	}
4236}
4237
4238static int
4239evdns_getaddrinfo_set_timeout(struct evdns_base *evdns_base,
4240    struct evdns_getaddrinfo_request *data)
4241{
4242	return event_add(&data->timeout, &evdns_base->global_getaddrinfo_allow_skew);
4243}
4244
4245static inline int
4246evdns_result_is_answer(int result)
4247{
4248	return (result != DNS_ERR_NOTIMPL && result != DNS_ERR_REFUSED &&
4249	    result != DNS_ERR_SERVERFAILED && result != DNS_ERR_CANCEL);
4250}
4251
4252static void
4253evdns_getaddrinfo_gotresolve(int result, char type, int count,
4254    int ttl, void *addresses, void *arg)
4255{
4256	int i;
4257	struct getaddrinfo_subrequest *req = arg;
4258	struct getaddrinfo_subrequest *other_req;
4259	struct evdns_getaddrinfo_request *data;
4260
4261	struct evutil_addrinfo *res;
4262
4263	struct sockaddr_in sin;
4264	struct sockaddr_in6 sin6;
4265	struct sockaddr *sa;
4266	int socklen, addrlen;
4267	void *addrp;
4268	int err;
4269	int user_canceled;
4270
4271	EVUTIL_ASSERT(req->type == DNS_IPv4_A || req->type == DNS_IPv6_AAAA);
4272	if (req->type == DNS_IPv4_A) {
4273		data = EVUTIL_UPCAST(req, struct evdns_getaddrinfo_request, ipv4_request);
4274		other_req = &data->ipv6_request;
4275	} else {
4276		data = EVUTIL_UPCAST(req, struct evdns_getaddrinfo_request, ipv6_request);
4277		other_req = &data->ipv4_request;
4278	}
4279
4280	EVDNS_LOCK(data->evdns_base);
4281	if (evdns_result_is_answer(result)) {
4282		if (req->type == DNS_IPv4_A)
4283			++data->evdns_base->getaddrinfo_ipv4_answered;
4284		else
4285			++data->evdns_base->getaddrinfo_ipv6_answered;
4286	}
4287	user_canceled = data->user_canceled;
4288	if (other_req->r == NULL)
4289		data->request_done = 1;
4290	EVDNS_UNLOCK(data->evdns_base);
4291
4292	req->r = NULL;
4293
4294	if (result == DNS_ERR_CANCEL && ! user_canceled) {
4295		/* Internal cancel request from timeout or internal error.
4296		 * we already answered the user. */
4297		if (other_req->r == NULL)
4298			free_getaddrinfo_request(data);
4299		return;
4300	}
4301
4302	if (result == DNS_ERR_NONE) {
4303		if (count == 0)
4304			err = EVUTIL_EAI_NODATA;
4305		else
4306			err = 0;
4307	} else {
4308		err = evdns_err_to_getaddrinfo_err(result);
4309	}
4310
4311	if (err) {
4312		/* Looks like we got an error. */
4313		if (other_req->r) {
4314			/* The other request is still working; maybe it will
4315			 * succeed. */
4316			/* XXXX handle failure from set_timeout */
4317			evdns_getaddrinfo_set_timeout(data->evdns_base, data);
4318			data->pending_error = err;
4319			return;
4320		}
4321
4322		if (user_canceled) {
4323			data->user_cb(EVUTIL_EAI_CANCEL, NULL, data->user_data);
4324		} else if (data->pending_result) {
4325			/* If we have an answer waiting, and we weren't
4326			 * canceled, ignore this error. */
4327			add_cname_to_reply(data, data->pending_result);
4328			data->user_cb(0, data->pending_result, data->user_data);
4329			data->pending_result = NULL;
4330		} else {
4331			if (data->pending_error)
4332				err = getaddrinfo_merge_err(err,
4333				    data->pending_error);
4334			data->user_cb(err, NULL, data->user_data);
4335		}
4336		free_getaddrinfo_request(data);
4337		return;
4338	} else if (user_canceled) {
4339		if (other_req->r) {
4340			/* The other request is still working; let it hit this
4341			 * callback with EVUTIL_EAI_CANCEL callback and report
4342			 * the failure. */
4343			return;
4344		}
4345		data->user_cb(EVUTIL_EAI_CANCEL, NULL, data->user_data);
4346		free_getaddrinfo_request(data);
4347		return;
4348	}
4349
4350	/* Looks like we got some answers. We should turn them into addrinfos
4351	 * and then either queue those or return them all. */
4352	EVUTIL_ASSERT(type == DNS_IPv4_A || type == DNS_IPv6_AAAA);
4353
4354	if (type == DNS_IPv4_A) {
4355		memset(&sin, 0, sizeof(sin));
4356		sin.sin_family = AF_INET;
4357		sin.sin_port = htons(data->port);
4358
4359		sa = (struct sockaddr *)&sin;
4360		socklen = sizeof(sin);
4361		addrlen = 4;
4362		addrp = &sin.sin_addr.s_addr;
4363	} else {
4364		memset(&sin6, 0, sizeof(sin6));
4365		sin6.sin6_family = AF_INET6;
4366		sin6.sin6_port = htons(data->port);
4367
4368		sa = (struct sockaddr *)&sin6;
4369		socklen = sizeof(sin6);
4370		addrlen = 16;
4371		addrp = &sin6.sin6_addr.s6_addr;
4372	}
4373
4374	res = NULL;
4375	for (i=0; i < count; ++i) {
4376		struct evutil_addrinfo *ai;
4377		memcpy(addrp, ((char*)addresses)+i*addrlen, addrlen);
4378		ai = evutil_new_addrinfo(sa, socklen, &data->hints);
4379		if (!ai) {
4380			if (other_req->r) {
4381				evdns_cancel_request(NULL, other_req->r);
4382			}
4383			data->user_cb(EVUTIL_EAI_MEMORY, NULL, data->user_data);
4384			if (res)
4385				evutil_freeaddrinfo(res);
4386
4387			if (other_req->r == NULL)
4388				free_getaddrinfo_request(data);
4389			return;
4390		}
4391		res = evutil_addrinfo_append(res, ai);
4392	}
4393
4394	if (other_req->r) {
4395		/* The other request is still in progress; wait for it */
4396		/* XXXX handle failure from set_timeout */
4397		evdns_getaddrinfo_set_timeout(data->evdns_base, data);
4398		data->pending_result = res;
4399		return;
4400	} else {
4401		/* The other request is done or never started; append its
4402		 * results (if any) and return them. */
4403		if (data->pending_result) {
4404			if (req->type == DNS_IPv4_A)
4405				res = evutil_addrinfo_append(res,
4406				    data->pending_result);
4407			else
4408				res = evutil_addrinfo_append(
4409				    data->pending_result, res);
4410			data->pending_result = NULL;
4411		}
4412
4413		/* Call the user callback. */
4414		add_cname_to_reply(data, res);
4415		data->user_cb(0, res, data->user_data);
4416
4417		/* Free data. */
4418		free_getaddrinfo_request(data);
4419	}
4420}
4421
4422static struct hosts_entry *
4423find_hosts_entry(struct evdns_base *base, const char *hostname,
4424    struct hosts_entry *find_after)
4425{
4426	struct hosts_entry *e;
4427
4428	if (find_after)
4429		e = TAILQ_NEXT(find_after, next);
4430	else
4431		e = TAILQ_FIRST(&base->hostsdb);
4432
4433	for (; e; e = TAILQ_NEXT(e, next)) {
4434		if (!evutil_ascii_strcasecmp(e->hostname, hostname))
4435			return e;
4436	}
4437	return NULL;
4438}
4439
4440static int
4441evdns_getaddrinfo_fromhosts(struct evdns_base *base,
4442    const char *nodename, struct evutil_addrinfo *hints, ev_uint16_t port,
4443    struct evutil_addrinfo **res)
4444{
4445	int n_found = 0;
4446	struct hosts_entry *e;
4447	struct evutil_addrinfo *ai=NULL;
4448	int f = hints->ai_family;
4449
4450	EVDNS_LOCK(base);
4451	for (e = find_hosts_entry(base, nodename, NULL); e;
4452	    e = find_hosts_entry(base, nodename, e)) {
4453		struct evutil_addrinfo *ai_new;
4454		++n_found;
4455		if ((e->addr.sa.sa_family == AF_INET && f == PF_INET6) ||
4456		    (e->addr.sa.sa_family == AF_INET6 && f == PF_INET))
4457			continue;
4458		ai_new = evutil_new_addrinfo(&e->addr.sa, e->addrlen, hints);
4459		if (!ai_new) {
4460			n_found = 0;
4461			goto out;
4462		}
4463		sockaddr_setport(ai_new->ai_addr, port);
4464		ai = evutil_addrinfo_append(ai, ai_new);
4465	}
4466	EVDNS_UNLOCK(base);
4467out:
4468	if (n_found) {
4469		/* Note that we return an empty answer if we found entries for
4470		 * this hostname but none were of the right address type. */
4471		*res = ai;
4472		return 0;
4473	} else {
4474		if (ai)
4475			evutil_freeaddrinfo(ai);
4476		return -1;
4477	}
4478}
4479
4480struct evdns_getaddrinfo_request *
4481evdns_getaddrinfo(struct evdns_base *dns_base,
4482    const char *nodename, const char *servname,
4483    const struct evutil_addrinfo *hints_in,
4484    evdns_getaddrinfo_cb cb, void *arg)
4485{
4486	struct evdns_getaddrinfo_request *data;
4487	struct evutil_addrinfo hints;
4488	struct evutil_addrinfo *res = NULL;
4489	int err;
4490	int port = 0;
4491	int want_cname = 0;
4492
4493	if (!dns_base) {
4494		dns_base = current_base;
4495		if (!dns_base) {
4496			log(EVDNS_LOG_WARN,
4497			    "Call to getaddrinfo_async with no "
4498			    "evdns_base configured.");
4499			cb(EVUTIL_EAI_FAIL, NULL, arg); /* ??? better error? */
4500			return NULL;
4501		}
4502	}
4503
4504	/* If we _must_ answer this immediately, do so. */
4505	if ((hints_in && (hints_in->ai_flags & EVUTIL_AI_NUMERICHOST))) {
4506		res = NULL;
4507		err = evutil_getaddrinfo(nodename, servname, hints_in, &res);
4508		cb(err, res, arg);
4509		return NULL;
4510	}
4511
4512	if (hints_in) {
4513		memcpy(&hints, hints_in, sizeof(hints));
4514	} else {
4515		memset(&hints, 0, sizeof(hints));
4516		hints.ai_family = PF_UNSPEC;
4517	}
4518
4519	evutil_adjust_hints_for_addrconfig(&hints);
4520
4521	/* Now try to see if we _can_ answer immediately. */
4522	/* (It would be nice to do this by calling getaddrinfo directly, with
4523	 * AI_NUMERICHOST, on plaforms that have it, but we can't: there isn't
4524	 * a reliable way to distinguish the "that wasn't a numeric host!" case
4525	 * from any other EAI_NONAME cases.) */
4526	err = evutil_getaddrinfo_common(nodename, servname, &hints, &res, &port);
4527	if (err != EVUTIL_EAI_NEED_RESOLVE) {
4528		cb(err, res, arg);
4529		return NULL;
4530	}
4531
4532	/* If there is an entry in the hosts file, we should give it now. */
4533	if (!evdns_getaddrinfo_fromhosts(dns_base, nodename, &hints, port, &res)) {
4534		cb(0, res, arg);
4535		return NULL;
4536	}
4537
4538	/* Okay, things are serious now. We're going to need to actually
4539	 * launch a request.
4540	 */
4541	data = mm_calloc(1,sizeof(struct evdns_getaddrinfo_request));
4542	if (!data) {
4543		cb(EVUTIL_EAI_MEMORY, NULL, arg);
4544		return NULL;
4545	}
4546
4547	memcpy(&data->hints, &hints, sizeof(data->hints));
4548	data->port = (ev_uint16_t)port;
4549	data->ipv4_request.type = DNS_IPv4_A;
4550	data->ipv6_request.type = DNS_IPv6_AAAA;
4551	data->user_cb = cb;
4552	data->user_data = arg;
4553	data->evdns_base = dns_base;
4554
4555	want_cname = (hints.ai_flags & EVUTIL_AI_CANONNAME);
4556
4557	/* If we are asked for a PF_UNSPEC address, we launch two requests in
4558	 * parallel: one for an A address and one for an AAAA address.  We
4559	 * can't send just one request, since many servers only answer one
4560	 * question per DNS request.
4561	 *
4562	 * Once we have the answer to one request, we allow for a short
4563	 * timeout before we report it, to see if the other one arrives.  If
4564	 * they both show up in time, then we report both the answers.
4565	 *
4566	 * If too many addresses of one type time out or fail, we should stop
4567	 * launching those requests. (XXX we don't do that yet.)
4568	 */
4569
4570	if (hints.ai_family != PF_INET6) {
4571		log(EVDNS_LOG_DEBUG, "Sending request for %s on ipv4 as %p",
4572		    nodename, &data->ipv4_request);
4573
4574		data->ipv4_request.r = evdns_base_resolve_ipv4(dns_base,
4575		    nodename, 0, evdns_getaddrinfo_gotresolve,
4576		    &data->ipv4_request);
4577		if (want_cname)
4578			data->ipv4_request.r->current_req->put_cname_in_ptr =
4579			    &data->cname_result;
4580	}
4581	if (hints.ai_family != PF_INET) {
4582		log(EVDNS_LOG_DEBUG, "Sending request for %s on ipv6 as %p",
4583		    nodename, &data->ipv6_request);
4584
4585		data->ipv6_request.r = evdns_base_resolve_ipv6(dns_base,
4586		    nodename, 0, evdns_getaddrinfo_gotresolve,
4587		    &data->ipv6_request);
4588		if (want_cname)
4589			data->ipv6_request.r->current_req->put_cname_in_ptr =
4590			    &data->cname_result;
4591	}
4592
4593	evtimer_assign(&data->timeout, dns_base->event_base,
4594	    evdns_getaddrinfo_timeout_cb, data);
4595
4596	if (data->ipv4_request.r || data->ipv6_request.r) {
4597		return data;
4598	} else {
4599		mm_free(data);
4600		cb(EVUTIL_EAI_FAIL, NULL, arg);
4601		return NULL;
4602	}
4603}
4604
4605void
4606evdns_getaddrinfo_cancel(struct evdns_getaddrinfo_request *data)
4607{
4608	EVDNS_LOCK(data->evdns_base);
4609	if (data->request_done) {
4610		EVDNS_UNLOCK(data->evdns_base);
4611		return;
4612	}
4613	event_del(&data->timeout);
4614	data->user_canceled = 1;
4615	if (data->ipv4_request.r)
4616		evdns_cancel_request(data->evdns_base, data->ipv4_request.r);
4617	if (data->ipv6_request.r)
4618		evdns_cancel_request(data->evdns_base, data->ipv6_request.r);
4619	EVDNS_UNLOCK(data->evdns_base);
4620}
4621