1/* Copyright 2006-2007 Niels Provos
2 * Copyright 2007-2012 Nick Mathewson and Niels Provos
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 * 3. The name of the author may not be used to endorse or promote products
13 *    derived from this software without specific prior written permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27/* Based on software by Adam Langly. Adam's original message:
28 *
29 * Async DNS Library
30 * Adam Langley <agl@imperialviolet.org>
31 * http://www.imperialviolet.org/eventdns.html
32 * Public Domain code
33 *
34 * This software is Public Domain. To view a copy of the public domain dedication,
35 * visit http://creativecommons.org/licenses/publicdomain/ or send a letter to
36 * Creative Commons, 559 Nathan Abbott Way, Stanford, California 94305, USA.
37 *
38 * I ask and expect, but do not require, that all derivative works contain an
39 * attribution similar to:
40 *	Parts developed by Adam Langley <agl@imperialviolet.org>
41 *
42 * You may wish to replace the word "Parts" with something else depending on
43 * the amount of original code.
44 *
45 * (Derivative works does not include programs which link against, run or include
46 * the source verbatim in their source distributions)
47 *
48 * Version: 0.1b
49 */
50
51#include <sys/types.h>
52#include "event2/event-config.h"
53
54#ifndef _FORTIFY_SOURCE
55#define _FORTIFY_SOURCE 3
56#endif
57
58#include <string.h>
59#include <fcntl.h>
60#ifdef _EVENT_HAVE_SYS_TIME_H
61#include <sys/time.h>
62#endif
63#ifdef _EVENT_HAVE_STDINT_H
64#include <stdint.h>
65#endif
66#include <stdlib.h>
67#include <string.h>
68#include <errno.h>
69#ifdef _EVENT_HAVE_UNISTD_H
70#include <unistd.h>
71#endif
72#include <limits.h>
73#include <sys/stat.h>
74#include <stdio.h>
75#include <stdarg.h>
76#ifdef WIN32
77#include <winsock2.h>
78#include <ws2tcpip.h>
79#ifndef _WIN32_IE
80#define _WIN32_IE 0x400
81#endif
82#include <shlobj.h>
83#endif
84
85#include "event2/dns.h"
86#include "event2/dns_struct.h"
87#include "event2/dns_compat.h"
88#include "event2/util.h"
89#include "event2/event.h"
90#include "event2/event_struct.h"
91#include "event2/thread.h"
92
93#include "event2/bufferevent.h"
94#include "event2/bufferevent_struct.h"
95#include "bufferevent-internal.h"
96
97#include "defer-internal.h"
98#include "log-internal.h"
99#include "mm-internal.h"
100#include "strlcpy-internal.h"
101#include "ipv6-internal.h"
102#include "util-internal.h"
103#include "evthread-internal.h"
104#ifdef WIN32
105#include <ctype.h>
106#include <winsock2.h>
107#include <windows.h>
108#include <iphlpapi.h>
109#include <io.h>
110#else
111#include <sys/socket.h>
112#include <netinet/in.h>
113#include <arpa/inet.h>
114#endif
115
116#ifdef _EVENT_HAVE_NETINET_IN6_H
117#include <netinet/in6.h>
118#endif
119
120#define EVDNS_LOG_DEBUG 0
121#define EVDNS_LOG_WARN 1
122#define EVDNS_LOG_MSG 2
123
124#ifndef HOST_NAME_MAX
125#define HOST_NAME_MAX 255
126#endif
127
128#include <stdio.h>
129
130#undef MIN
131#define MIN(a,b) ((a)<(b)?(a):(b))
132
133#define ASSERT_VALID_REQUEST(req) \
134	EVUTIL_ASSERT((req)->handle && (req)->handle->current_req == (req))
135
136#define u64 ev_uint64_t
137#define u32 ev_uint32_t
138#define u16 ev_uint16_t
139#define u8  ev_uint8_t
140
141/* maximum number of addresses from a single packet */
142/* that we bother recording */
143#define MAX_V4_ADDRS 32
144#define MAX_V6_ADDRS 32
145
146
147#define TYPE_A	       EVDNS_TYPE_A
148#define TYPE_CNAME     5
149#define TYPE_PTR       EVDNS_TYPE_PTR
150#define TYPE_SOA       EVDNS_TYPE_SOA
151#define TYPE_AAAA      EVDNS_TYPE_AAAA
152
153#define CLASS_INET     EVDNS_CLASS_INET
154
155/* Persistent handle.  We keep this separate from 'struct request' since we
156 * need some object to last for as long as an evdns_request is outstanding so
157 * that it can be canceled, whereas a search request can lead to multiple
158 * 'struct request' instances being created over its lifetime. */
159struct evdns_request {
160	struct request *current_req;
161	struct evdns_base *base;
162
163	int pending_cb; /* Waiting for its callback to be invoked; not
164			 * owned by event base any more. */
165
166	/* elements used by the searching code */
167	int search_index;
168	struct search_state *search_state;
169	char *search_origname;	/* needs to be free()ed */
170	int search_flags;
171};
172
173struct request {
174	u8 *request;  /* the dns packet data */
175	u8 request_type; /* TYPE_PTR or TYPE_A or TYPE_AAAA */
176	unsigned int request_len;
177	int reissue_count;
178	int tx_count;  /* the number of times that this packet has been sent */
179	void *user_pointer;  /* the pointer given to us for this request */
180	evdns_callback_type user_callback;
181	struct nameserver *ns;	/* the server which we last sent it */
182
183	/* these objects are kept in a circular list */
184	/* XXX We could turn this into a CIRCLEQ. */
185	struct request *next, *prev;
186
187	struct event timeout_event;
188
189	u16 trans_id;  /* the transaction id */
190	unsigned request_appended :1;	/* true if the request pointer is data which follows this struct */
191	unsigned transmit_me :1;  /* needs to be transmitted */
192
193	/* XXXX This is a horrible hack. */
194	char **put_cname_in_ptr; /* store the cname here if we get one. */
195
196	struct evdns_base *base;
197
198	struct evdns_request *handle;
199};
200
201struct reply {
202	unsigned int type;
203	unsigned int have_answer : 1;
204	union {
205		struct {
206			u32 addrcount;
207			u32 addresses[MAX_V4_ADDRS];
208		} a;
209		struct {
210			u32 addrcount;
211			struct in6_addr addresses[MAX_V6_ADDRS];
212		} aaaa;
213		struct {
214			char name[HOST_NAME_MAX];
215		} ptr;
216	} data;
217};
218
219struct nameserver {
220	evutil_socket_t socket;	 /* a connected UDP socket */
221	struct sockaddr_storage address;
222	ev_socklen_t addrlen;
223	int failed_times;  /* number of times which we have given this server a chance */
224	int timedout;  /* number of times in a row a request has timed out */
225	struct event event;
226	/* these objects are kept in a circular list */
227	struct nameserver *next, *prev;
228	struct event timeout_event;  /* used to keep the timeout for */
229				     /* when we next probe this server. */
230				     /* Valid if state == 0 */
231	/* Outstanding probe request for this nameserver, if any */
232	struct evdns_request *probe_request;
233	char state;  /* zero if we think that this server is down */
234	char choked;  /* true if we have an EAGAIN from this server's socket */
235	char write_waiting;  /* true if we are waiting for EV_WRITE events */
236	struct evdns_base *base;
237};
238
239
240/* Represents a local port where we're listening for DNS requests. Right now, */
241/* only UDP is supported. */
242struct evdns_server_port {
243	evutil_socket_t socket; /* socket we use to read queries and write replies. */
244	int refcnt; /* reference count. */
245	char choked; /* Are we currently blocked from writing? */
246	char closing; /* Are we trying to close this port, pending writes? */
247	evdns_request_callback_fn_type user_callback; /* Fn to handle requests */
248	void *user_data; /* Opaque pointer passed to user_callback */
249	struct event event; /* Read/write event */
250	/* circular list of replies that we want to write. */
251	struct server_request *pending_replies;
252	struct event_base *event_base;
253
254#ifndef _EVENT_DISABLE_THREAD_SUPPORT
255	void *lock;
256#endif
257};
258
259/* Represents part of a reply being built.	(That is, a single RR.) */
260struct server_reply_item {
261	struct server_reply_item *next; /* next item in sequence. */
262	char *name; /* name part of the RR */
263	u16 type; /* The RR type */
264	u16 class; /* The RR class (usually CLASS_INET) */
265	u32 ttl; /* The RR TTL */
266	char is_name; /* True iff data is a label */
267	u16 datalen; /* Length of data; -1 if data is a label */
268	void *data; /* The contents of the RR */
269};
270
271/* Represents a request that we've received as a DNS server, and holds */
272/* the components of the reply as we're constructing it. */
273struct server_request {
274	/* Pointers to the next and previous entries on the list of replies */
275	/* that we're waiting to write.	 Only set if we have tried to respond */
276	/* and gotten EAGAIN. */
277	struct server_request *next_pending;
278	struct server_request *prev_pending;
279
280	u16 trans_id; /* Transaction id. */
281	struct evdns_server_port *port; /* Which port received this request on? */
282	struct sockaddr_storage addr; /* Where to send the response */
283	ev_socklen_t addrlen; /* length of addr */
284
285	int n_answer; /* how many answer RRs have been set? */
286	int n_authority; /* how many authority RRs have been set? */
287	int n_additional; /* how many additional RRs have been set? */
288
289	struct server_reply_item *answer; /* linked list of answer RRs */
290	struct server_reply_item *authority; /* linked list of authority RRs */
291	struct server_reply_item *additional; /* linked list of additional RRs */
292
293	/* Constructed response.  Only set once we're ready to send a reply. */
294	/* Once this is set, the RR fields are cleared, and no more should be set. */
295	char *response;
296	size_t response_len;
297
298	/* Caller-visible fields: flags, questions. */
299	struct evdns_server_request base;
300};
301
302struct evdns_base {
303	/* An array of n_req_heads circular lists for inflight requests.
304	 * Each inflight request req is in req_heads[req->trans_id % n_req_heads].
305	 */
306	struct request **req_heads;
307	/* A circular list of requests that we're waiting to send, but haven't
308	 * sent yet because there are too many requests inflight */
309	struct request *req_waiting_head;
310	/* A circular list of nameservers. */
311	struct nameserver *server_head;
312	int n_req_heads;
313
314	struct event_base *event_base;
315
316	/* The number of good nameservers that we have */
317	int global_good_nameservers;
318
319	/* inflight requests are contained in the req_head list */
320	/* and are actually going out across the network */
321	int global_requests_inflight;
322	/* requests which aren't inflight are in the waiting list */
323	/* and are counted here */
324	int global_requests_waiting;
325
326	int global_max_requests_inflight;
327
328	struct timeval global_timeout;	/* 5 seconds by default */
329	int global_max_reissues;  /* a reissue occurs when we get some errors from the server */
330	int global_max_retransmits;  /* number of times we'll retransmit a request which timed out */
331	/* number of timeouts in a row before we consider this server to be down */
332	int global_max_nameserver_timeout;
333	/* true iff we will use the 0x20 hack to prevent poisoning attacks. */
334	int global_randomize_case;
335
336	/* The first time that a nameserver fails, how long do we wait before
337	 * probing to see if it has returned?  */
338	struct timeval global_nameserver_probe_initial_timeout;
339
340	/** Port to bind to for outgoing DNS packets. */
341	struct sockaddr_storage global_outgoing_address;
342	/** ev_socklen_t for global_outgoing_address. 0 if it isn't set. */
343	ev_socklen_t global_outgoing_addrlen;
344
345	struct timeval global_getaddrinfo_allow_skew;
346
347	int getaddrinfo_ipv4_timeouts;
348	int getaddrinfo_ipv6_timeouts;
349	int getaddrinfo_ipv4_answered;
350	int getaddrinfo_ipv6_answered;
351
352	struct search_state *global_search_state;
353
354	TAILQ_HEAD(hosts_list, hosts_entry) hostsdb;
355
356#ifndef _EVENT_DISABLE_THREAD_SUPPORT
357	void *lock;
358#endif
359};
360
361struct hosts_entry {
362	TAILQ_ENTRY(hosts_entry) next;
363	union {
364		struct sockaddr sa;
365		struct sockaddr_in sin;
366		struct sockaddr_in6 sin6;
367	} addr;
368	int addrlen;
369	char hostname[1];
370};
371
372static struct evdns_base *current_base = NULL;
373
374struct evdns_base *
375evdns_get_global_base(void)
376{
377	return current_base;
378}
379
380/* Given a pointer to an evdns_server_request, get the corresponding */
381/* server_request. */
382#define TO_SERVER_REQUEST(base_ptr)					\
383	((struct server_request*)					\
384	  (((char*)(base_ptr) - evutil_offsetof(struct server_request, base))))
385
386#define REQ_HEAD(base, id) ((base)->req_heads[id % (base)->n_req_heads])
387
388static struct nameserver *nameserver_pick(struct evdns_base *base);
389static void evdns_request_insert(struct request *req, struct request **head);
390static void evdns_request_remove(struct request *req, struct request **head);
391static void nameserver_ready_callback(evutil_socket_t fd, short events, void *arg);
392static int evdns_transmit(struct evdns_base *base);
393static int evdns_request_transmit(struct request *req);
394static void nameserver_send_probe(struct nameserver *const ns);
395static void search_request_finished(struct evdns_request *const);
396static int search_try_next(struct evdns_request *const req);
397static struct request *search_request_new(struct evdns_base *base, struct evdns_request *handle, int type, const char *const name, int flags, evdns_callback_type user_callback, void *user_arg);
398static void evdns_requests_pump_waiting_queue(struct evdns_base *base);
399static u16 transaction_id_pick(struct evdns_base *base);
400static struct request *request_new(struct evdns_base *base, struct evdns_request *handle, int type, const char *name, int flags, evdns_callback_type callback, void *ptr);
401static void request_submit(struct request *const req);
402
403static int server_request_free(struct server_request *req);
404static void server_request_free_answers(struct server_request *req);
405static void server_port_free(struct evdns_server_port *port);
406static void server_port_ready_callback(evutil_socket_t fd, short events, void *arg);
407static int evdns_base_resolv_conf_parse_impl(struct evdns_base *base, int flags, const char *const filename);
408static int evdns_base_set_option_impl(struct evdns_base *base,
409    const char *option, const char *val, int flags);
410static void evdns_base_free_and_unlock(struct evdns_base *base, int fail_requests);
411
412static int strtoint(const char *const str);
413
414#ifdef _EVENT_DISABLE_THREAD_SUPPORT
415#define EVDNS_LOCK(base)  _EVUTIL_NIL_STMT
416#define EVDNS_UNLOCK(base) _EVUTIL_NIL_STMT
417#define ASSERT_LOCKED(base) _EVUTIL_NIL_STMT
418#else
419#define EVDNS_LOCK(base)			\
420	EVLOCK_LOCK((base)->lock, 0)
421#define EVDNS_UNLOCK(base)			\
422	EVLOCK_UNLOCK((base)->lock, 0)
423#define ASSERT_LOCKED(base)			\
424	EVLOCK_ASSERT_LOCKED((base)->lock)
425#endif
426
427static void
428default_evdns_log_fn(int warning, const char *buf)
429{
430	if (warning == EVDNS_LOG_WARN)
431		event_warnx("[evdns] %s", buf);
432	else if (warning == EVDNS_LOG_MSG)
433		event_msgx("[evdns] %s", buf);
434	else
435		event_debug(("[evdns] %s", buf));
436}
437
438static evdns_debug_log_fn_type evdns_log_fn = NULL;
439
440void
441evdns_set_log_fn(evdns_debug_log_fn_type fn)
442{
443	evdns_log_fn = fn;
444}
445
446#ifdef __GNUC__
447#define EVDNS_LOG_CHECK	 __attribute__ ((format(printf, 2, 3)))
448#else
449#define EVDNS_LOG_CHECK
450#endif
451
452static void _evdns_log(int warn, const char *fmt, ...) EVDNS_LOG_CHECK;
453static void
454_evdns_log(int warn, const char *fmt, ...)
455{
456	va_list args;
457	char buf[512];
458	if (!evdns_log_fn)
459		return;
460	va_start(args,fmt);
461	evutil_vsnprintf(buf, sizeof(buf), fmt, args);
462	va_end(args);
463	if (evdns_log_fn) {
464		if (warn == EVDNS_LOG_MSG)
465			warn = EVDNS_LOG_WARN;
466		evdns_log_fn(warn, buf);
467	} else {
468		default_evdns_log_fn(warn, buf);
469	}
470
471}
472
473#define log _evdns_log
474
475/* This walks the list of inflight requests to find the */
476/* one with a matching transaction id. Returns NULL on */
477/* failure */
478static struct request *
479request_find_from_trans_id(struct evdns_base *base, u16 trans_id) {
480	struct request *req = REQ_HEAD(base, trans_id);
481	struct request *const started_at = req;
482
483	ASSERT_LOCKED(base);
484
485	if (req) {
486		do {
487			if (req->trans_id == trans_id) return req;
488			req = req->next;
489		} while (req != started_at);
490	}
491
492	return NULL;
493}
494
495/* a libevent callback function which is called when a nameserver */
496/* has gone down and we want to test if it has came back to life yet */
497static void
498nameserver_prod_callback(evutil_socket_t fd, short events, void *arg) {
499	struct nameserver *const ns = (struct nameserver *) arg;
500	(void)fd;
501	(void)events;
502
503	EVDNS_LOCK(ns->base);
504	nameserver_send_probe(ns);
505	EVDNS_UNLOCK(ns->base);
506}
507
508/* a libevent callback which is called when a nameserver probe (to see if */
509/* it has come back to life) times out. We increment the count of failed_times */
510/* and wait longer to send the next probe packet. */
511static void
512nameserver_probe_failed(struct nameserver *const ns) {
513	struct timeval timeout;
514	int i;
515
516	ASSERT_LOCKED(ns->base);
517	(void) evtimer_del(&ns->timeout_event);
518	if (ns->state == 1) {
519		/* This can happen if the nameserver acts in a way which makes us mark */
520		/* it as bad and then starts sending good replies. */
521		return;
522	}
523
524#define MAX_PROBE_TIMEOUT 3600
525#define TIMEOUT_BACKOFF_FACTOR 3
526
527	memcpy(&timeout, &ns->base->global_nameserver_probe_initial_timeout,
528	    sizeof(struct timeval));
529	for (i=ns->failed_times; i > 0 && timeout.tv_sec < MAX_PROBE_TIMEOUT; --i) {
530		timeout.tv_sec *= TIMEOUT_BACKOFF_FACTOR;
531		timeout.tv_usec *= TIMEOUT_BACKOFF_FACTOR;
532		if (timeout.tv_usec > 1000000) {
533			timeout.tv_sec += timeout.tv_usec / 1000000;
534			timeout.tv_usec %= 1000000;
535		}
536	}
537	if (timeout.tv_sec > MAX_PROBE_TIMEOUT) {
538		timeout.tv_sec = MAX_PROBE_TIMEOUT;
539		timeout.tv_usec = 0;
540	}
541
542	ns->failed_times++;
543
544	if (evtimer_add(&ns->timeout_event, &timeout) < 0) {
545		char addrbuf[128];
546		log(EVDNS_LOG_WARN,
547		    "Error from libevent when adding timer event for %s",
548		    evutil_format_sockaddr_port(
549			    (struct sockaddr *)&ns->address,
550			    addrbuf, sizeof(addrbuf)));
551	}
552}
553
554/* called when a nameserver has been deemed to have failed. For example, too */
555/* many packets have timed out etc */
556static void
557nameserver_failed(struct nameserver *const ns, const char *msg) {
558	struct request *req, *started_at;
559	struct evdns_base *base = ns->base;
560	int i;
561	char addrbuf[128];
562
563	ASSERT_LOCKED(base);
564	/* if this nameserver has already been marked as failed */
565	/* then don't do anything */
566	if (!ns->state) return;
567
568	log(EVDNS_LOG_MSG, "Nameserver %s has failed: %s",
569	    evutil_format_sockaddr_port(
570		    (struct sockaddr *)&ns->address,
571		    addrbuf, sizeof(addrbuf)),
572	    msg);
573
574	base->global_good_nameservers--;
575	EVUTIL_ASSERT(base->global_good_nameservers >= 0);
576	if (base->global_good_nameservers == 0) {
577		log(EVDNS_LOG_MSG, "All nameservers have failed");
578	}
579
580	ns->state = 0;
581	ns->failed_times = 1;
582
583	if (evtimer_add(&ns->timeout_event,
584		&base->global_nameserver_probe_initial_timeout) < 0) {
585		log(EVDNS_LOG_WARN,
586		    "Error from libevent when adding timer event for %s",
587		    evutil_format_sockaddr_port(
588			    (struct sockaddr *)&ns->address,
589			    addrbuf, sizeof(addrbuf)));
590		/* ???? Do more? */
591	}
592
593	/* walk the list of inflight requests to see if any can be reassigned to */
594	/* a different server. Requests in the waiting queue don't have a */
595	/* nameserver assigned yet */
596
597	/* if we don't have *any* good nameservers then there's no point */
598	/* trying to reassign requests to one */
599	if (!base->global_good_nameservers) return;
600
601	for (i = 0; i < base->n_req_heads; ++i) {
602		req = started_at = base->req_heads[i];
603		if (req) {
604			do {
605				if (req->tx_count == 0 && req->ns == ns) {
606					/* still waiting to go out, can be moved */
607					/* to another server */
608					req->ns = nameserver_pick(base);
609				}
610				req = req->next;
611			} while (req != started_at);
612		}
613	}
614}
615
616static void
617nameserver_up(struct nameserver *const ns)
618{
619	char addrbuf[128];
620	ASSERT_LOCKED(ns->base);
621	if (ns->state) return;
622	log(EVDNS_LOG_MSG, "Nameserver %s is back up",
623	    evutil_format_sockaddr_port(
624		    (struct sockaddr *)&ns->address,
625		    addrbuf, sizeof(addrbuf)));
626	evtimer_del(&ns->timeout_event);
627	if (ns->probe_request) {
628		evdns_cancel_request(ns->base, ns->probe_request);
629		ns->probe_request = NULL;
630	}
631	ns->state = 1;
632	ns->failed_times = 0;
633	ns->timedout = 0;
634	ns->base->global_good_nameservers++;
635}
636
637static void
638request_trans_id_set(struct request *const req, const u16 trans_id) {
639	req->trans_id = trans_id;
640	*((u16 *) req->request) = htons(trans_id);
641}
642
643/* Called to remove a request from a list and dealloc it. */
644/* head is a pointer to the head of the list it should be */
645/* removed from or NULL if the request isn't in a list. */
646/* when free_handle is one, free the handle as well. */
647static void
648request_finished(struct request *const req, struct request **head, int free_handle) {
649	struct evdns_base *base = req->base;
650	int was_inflight = (head != &base->req_waiting_head);
651	EVDNS_LOCK(base);
652	ASSERT_VALID_REQUEST(req);
653
654	if (head)
655		evdns_request_remove(req, head);
656
657	log(EVDNS_LOG_DEBUG, "Removing timeout for request %p", req);
658	if (was_inflight) {
659		evtimer_del(&req->timeout_event);
660		base->global_requests_inflight--;
661	} else {
662		base->global_requests_waiting--;
663	}
664	/* it was initialized during request_new / evtimer_assign */
665	event_debug_unassign(&req->timeout_event);
666
667	if (!req->request_appended) {
668		/* need to free the request data on it's own */
669		mm_free(req->request);
670	} else {
671		/* the request data is appended onto the header */
672		/* so everything gets free()ed when we: */
673	}
674
675	if (req->handle) {
676		EVUTIL_ASSERT(req->handle->current_req == req);
677
678		if (free_handle) {
679			search_request_finished(req->handle);
680			req->handle->current_req = NULL;
681			if (! req->handle->pending_cb) {
682				/* If we're planning to run the callback,
683				 * don't free the handle until later. */
684				mm_free(req->handle);
685			}
686			req->handle = NULL; /* If we have a bug, let's crash
687					     * early */
688		} else {
689			req->handle->current_req = NULL;
690		}
691	}
692
693	mm_free(req);
694
695	evdns_requests_pump_waiting_queue(base);
696	EVDNS_UNLOCK(base);
697}
698
699/* This is called when a server returns a funny error code. */
700/* We try the request again with another server. */
701/* */
702/* return: */
703/*   0 ok */
704/*   1 failed/reissue is pointless */
705static int
706request_reissue(struct request *req) {
707	const struct nameserver *const last_ns = req->ns;
708	ASSERT_LOCKED(req->base);
709	ASSERT_VALID_REQUEST(req);
710	/* the last nameserver should have been marked as failing */
711	/* by the caller of this function, therefore pick will try */
712	/* not to return it */
713	req->ns = nameserver_pick(req->base);
714	if (req->ns == last_ns) {
715		/* ... but pick did return it */
716		/* not a lot of point in trying again with the */
717		/* same server */
718		return 1;
719	}
720
721	req->reissue_count++;
722	req->tx_count = 0;
723	req->transmit_me = 1;
724
725	return 0;
726}
727
728/* this function looks for space on the inflight queue and promotes */
729/* requests from the waiting queue if it can. */
730static void
731evdns_requests_pump_waiting_queue(struct evdns_base *base) {
732	ASSERT_LOCKED(base);
733	while (base->global_requests_inflight < base->global_max_requests_inflight &&
734		   base->global_requests_waiting) {
735		struct request *req;
736		/* move a request from the waiting queue to the inflight queue */
737		EVUTIL_ASSERT(base->req_waiting_head);
738		req = base->req_waiting_head;
739		evdns_request_remove(req, &base->req_waiting_head);
740
741		base->global_requests_waiting--;
742		base->global_requests_inflight++;
743
744		req->ns = nameserver_pick(base);
745		request_trans_id_set(req, transaction_id_pick(base));
746
747		evdns_request_insert(req, &REQ_HEAD(base, req->trans_id));
748		evdns_request_transmit(req);
749		evdns_transmit(base);
750	}
751}
752
753/* TODO(nickm) document */
754struct deferred_reply_callback {
755	struct deferred_cb deferred;
756	struct evdns_request *handle;
757	u8 request_type;
758	u8 have_reply;
759	u32 ttl;
760	u32 err;
761	evdns_callback_type user_callback;
762	struct reply reply;
763};
764
765static void
766reply_run_callback(struct deferred_cb *d, void *user_pointer)
767{
768	struct deferred_reply_callback *cb =
769	    EVUTIL_UPCAST(d, struct deferred_reply_callback, deferred);
770
771	switch (cb->request_type) {
772	case TYPE_A:
773		if (cb->have_reply)
774			cb->user_callback(DNS_ERR_NONE, DNS_IPv4_A,
775			    cb->reply.data.a.addrcount, cb->ttl,
776			    cb->reply.data.a.addresses,
777			    user_pointer);
778		else
779			cb->user_callback(cb->err, 0, 0, cb->ttl, NULL, user_pointer);
780		break;
781	case TYPE_PTR:
782		if (cb->have_reply) {
783			char *name = cb->reply.data.ptr.name;
784			cb->user_callback(DNS_ERR_NONE, DNS_PTR, 1, cb->ttl,
785			    &name, user_pointer);
786		} else {
787			cb->user_callback(cb->err, 0, 0, cb->ttl, NULL, user_pointer);
788		}
789		break;
790	case TYPE_AAAA:
791		if (cb->have_reply)
792			cb->user_callback(DNS_ERR_NONE, DNS_IPv6_AAAA,
793			    cb->reply.data.aaaa.addrcount, cb->ttl,
794			    cb->reply.data.aaaa.addresses,
795			    user_pointer);
796		else
797			cb->user_callback(cb->err, 0, 0, cb->ttl, NULL, user_pointer);
798		break;
799	default:
800		EVUTIL_ASSERT(0);
801	}
802
803	if (cb->handle && cb->handle->pending_cb) {
804		mm_free(cb->handle);
805	}
806
807	mm_free(cb);
808}
809
810static void
811reply_schedule_callback(struct request *const req, u32 ttl, u32 err, struct reply *reply)
812{
813	struct deferred_reply_callback *d = mm_calloc(1, sizeof(*d));
814
815	if (!d) {
816		event_warn("%s: Couldn't allocate space for deferred callback.",
817		    __func__);
818		return;
819	}
820
821	ASSERT_LOCKED(req->base);
822
823	d->request_type = req->request_type;
824	d->user_callback = req->user_callback;
825	d->ttl = ttl;
826	d->err = err;
827	if (reply) {
828		d->have_reply = 1;
829		memcpy(&d->reply, reply, sizeof(struct reply));
830	}
831
832	if (req->handle) {
833		req->handle->pending_cb = 1;
834		d->handle = req->handle;
835	}
836
837	event_deferred_cb_init(&d->deferred, reply_run_callback,
838	    req->user_pointer);
839	event_deferred_cb_schedule(
840		event_base_get_deferred_cb_queue(req->base->event_base),
841		&d->deferred);
842}
843
844/* this processes a parsed reply packet */
845static void
846reply_handle(struct request *const req, u16 flags, u32 ttl, struct reply *reply) {
847	int error;
848	char addrbuf[128];
849	static const int error_codes[] = {
850		DNS_ERR_FORMAT, DNS_ERR_SERVERFAILED, DNS_ERR_NOTEXIST,
851		DNS_ERR_NOTIMPL, DNS_ERR_REFUSED
852	};
853
854	ASSERT_LOCKED(req->base);
855	ASSERT_VALID_REQUEST(req);
856
857	if (flags & 0x020f || !reply || !reply->have_answer) {
858		/* there was an error */
859		if (flags & 0x0200) {
860			error = DNS_ERR_TRUNCATED;
861		} else if (flags & 0x000f) {
862			u16 error_code = (flags & 0x000f) - 1;
863			if (error_code > 4) {
864				error = DNS_ERR_UNKNOWN;
865			} else {
866				error = error_codes[error_code];
867			}
868		} else if (reply && !reply->have_answer) {
869			error = DNS_ERR_NODATA;
870		} else {
871			error = DNS_ERR_UNKNOWN;
872		}
873
874		switch (error) {
875		case DNS_ERR_NOTIMPL:
876		case DNS_ERR_REFUSED:
877			/* we regard these errors as marking a bad nameserver */
878			if (req->reissue_count < req->base->global_max_reissues) {
879				char msg[64];
880				evutil_snprintf(msg, sizeof(msg), "Bad response %d (%s)",
881					 error, evdns_err_to_string(error));
882				nameserver_failed(req->ns, msg);
883				if (!request_reissue(req)) return;
884			}
885			break;
886		case DNS_ERR_SERVERFAILED:
887			/* rcode 2 (servfailed) sometimes means "we
888			 * are broken" and sometimes (with some binds)
889			 * means "that request was very confusing."
890			 * Treat this as a timeout, not a failure.
891			 */
892			log(EVDNS_LOG_DEBUG, "Got a SERVERFAILED from nameserver"
893				"at %s; will allow the request to time out.",
894			    evutil_format_sockaddr_port(
895				    (struct sockaddr *)&req->ns->address,
896				    addrbuf, sizeof(addrbuf)));
897			break;
898		default:
899			/* we got a good reply from the nameserver: it is up. */
900			if (req->handle == req->ns->probe_request) {
901				/* Avoid double-free */
902				req->ns->probe_request = NULL;
903			}
904
905			nameserver_up(req->ns);
906		}
907
908		if (req->handle->search_state &&
909		    req->request_type != TYPE_PTR) {
910			/* if we have a list of domains to search in,
911			 * try the next one */
912			if (!search_try_next(req->handle)) {
913				/* a new request was issued so this
914				 * request is finished and */
915				/* the user callback will be made when
916				 * that request (or a */
917				/* child of it) finishes. */
918				return;
919			}
920		}
921
922		/* all else failed. Pass the failure up */
923		reply_schedule_callback(req, ttl, error, NULL);
924		request_finished(req, &REQ_HEAD(req->base, req->trans_id), 1);
925	} else {
926		/* all ok, tell the user */
927		reply_schedule_callback(req, ttl, 0, reply);
928		if (req->handle == req->ns->probe_request)
929			req->ns->probe_request = NULL; /* Avoid double-free */
930		nameserver_up(req->ns);
931		request_finished(req, &REQ_HEAD(req->base, req->trans_id), 1);
932	}
933}
934
935static int
936name_parse(u8 *packet, int length, int *idx, char *name_out, int name_out_len) {
937	int name_end = -1;
938	int j = *idx;
939	int ptr_count = 0;
940#define GET32(x) do { if (j + 4 > length) goto err; memcpy(&_t32, packet + j, 4); j += 4; x = ntohl(_t32); } while (0)
941#define GET16(x) do { if (j + 2 > length) goto err; memcpy(&_t, packet + j, 2); j += 2; x = ntohs(_t); } while (0)
942#define GET8(x) do { if (j >= length) goto err; x = packet[j++]; } while (0)
943
944	char *cp = name_out;
945	const char *const end = name_out + name_out_len;
946
947	/* Normally, names are a series of length prefixed strings terminated */
948	/* with a length of 0 (the lengths are u8's < 63). */
949	/* However, the length can start with a pair of 1 bits and that */
950	/* means that the next 14 bits are a pointer within the current */
951	/* packet. */
952
953	for (;;) {
954		u8 label_len;
955		if (j >= length) return -1;
956		GET8(label_len);
957		if (!label_len) break;
958		if (label_len & 0xc0) {
959			u8 ptr_low;
960			GET8(ptr_low);
961			if (name_end < 0) name_end = j;
962			j = (((int)label_len & 0x3f) << 8) + ptr_low;
963			/* Make sure that the target offset is in-bounds. */
964			if (j < 0 || j >= length) return -1;
965			/* If we've jumped more times than there are characters in the
966			 * message, we must have a loop. */
967			if (++ptr_count > length) return -1;
968			continue;
969		}
970		if (label_len > 63) return -1;
971		if (cp != name_out) {
972			if (cp + 1 >= end) return -1;
973			*cp++ = '.';
974		}
975		if (cp + label_len >= end) return -1;
976		memcpy(cp, packet + j, label_len);
977		cp += label_len;
978		j += label_len;
979	}
980	if (cp >= end) return -1;
981	*cp = '\0';
982	if (name_end < 0)
983		*idx = j;
984	else
985		*idx = name_end;
986	return 0;
987 err:
988	return -1;
989}
990
991/* parses a raw request from a nameserver */
992static int
993reply_parse(struct evdns_base *base, u8 *packet, int length) {
994	int j = 0, k = 0;  /* index into packet */
995	u16 _t;	 /* used by the macros */
996	u32 _t32;  /* used by the macros */
997	char tmp_name[256], cmp_name[256]; /* used by the macros */
998	int name_matches = 0;
999
1000	u16 trans_id, questions, answers, authority, additional, datalength;
1001	u16 flags = 0;
1002	u32 ttl, ttl_r = 0xffffffff;
1003	struct reply reply;
1004	struct request *req = NULL;
1005	unsigned int i;
1006
1007	ASSERT_LOCKED(base);
1008
1009	GET16(trans_id);
1010	GET16(flags);
1011	GET16(questions);
1012	GET16(answers);
1013	GET16(authority);
1014	GET16(additional);
1015	(void) authority; /* suppress "unused variable" warnings. */
1016	(void) additional; /* suppress "unused variable" warnings. */
1017
1018	req = request_find_from_trans_id(base, trans_id);
1019	if (!req) return -1;
1020	EVUTIL_ASSERT(req->base == base);
1021
1022	memset(&reply, 0, sizeof(reply));
1023
1024	/* If it's not an answer, it doesn't correspond to any request. */
1025	if (!(flags & 0x8000)) return -1;  /* must be an answer */
1026	if ((flags & 0x020f) && (flags & 0x020f) != DNS_ERR_NOTEXIST) {
1027		/* there was an error and it's not NXDOMAIN */
1028		goto err;
1029	}
1030	/* if (!answers) return; */  /* must have an answer of some form */
1031
1032	/* This macro skips a name in the DNS reply. */
1033#define SKIP_NAME						\
1034	do { tmp_name[0] = '\0';				\
1035		if (name_parse(packet, length, &j, tmp_name,	\
1036			sizeof(tmp_name))<0)			\
1037			goto err;				\
1038	} while (0)
1039#define TEST_NAME							\
1040	do { tmp_name[0] = '\0';					\
1041		cmp_name[0] = '\0';					\
1042		k = j;							\
1043		if (name_parse(packet, length, &j, tmp_name,		\
1044			sizeof(tmp_name))<0)				\
1045			goto err;					\
1046		if (name_parse(req->request, req->request_len, &k,	\
1047			cmp_name, sizeof(cmp_name))<0)			\
1048			goto err;					\
1049		if (base->global_randomize_case) {			\
1050			if (strcmp(tmp_name, cmp_name) == 0)		\
1051				name_matches = 1;			\
1052		} else {						\
1053			if (evutil_ascii_strcasecmp(tmp_name, cmp_name) == 0) \
1054				name_matches = 1;			\
1055		}							\
1056	} while (0)
1057
1058	reply.type = req->request_type;
1059
1060	/* skip over each question in the reply */
1061	for (i = 0; i < questions; ++i) {
1062		/* the question looks like
1063		 *   <label:name><u16:type><u16:class>
1064		 */
1065		TEST_NAME;
1066		j += 4;
1067		if (j > length) goto err;
1068	}
1069
1070	if (!name_matches)
1071		goto err;
1072
1073	/* now we have the answer section which looks like
1074	 * <label:name><u16:type><u16:class><u32:ttl><u16:len><data...>
1075	 */
1076
1077	for (i = 0; i < answers; ++i) {
1078		u16 type, class;
1079
1080		SKIP_NAME;
1081		GET16(type);
1082		GET16(class);
1083		GET32(ttl);
1084		GET16(datalength);
1085
1086		if (type == TYPE_A && class == CLASS_INET) {
1087			int addrcount, addrtocopy;
1088			if (req->request_type != TYPE_A) {
1089				j += datalength; continue;
1090			}
1091			if ((datalength & 3) != 0) /* not an even number of As. */
1092			    goto err;
1093			addrcount = datalength >> 2;
1094			addrtocopy = MIN(MAX_V4_ADDRS - reply.data.a.addrcount, (unsigned)addrcount);
1095
1096			ttl_r = MIN(ttl_r, ttl);
1097			/* we only bother with the first four addresses. */
1098			if (j + 4*addrtocopy > length) goto err;
1099			memcpy(&reply.data.a.addresses[reply.data.a.addrcount],
1100				   packet + j, 4*addrtocopy);
1101			j += 4*addrtocopy;
1102			reply.data.a.addrcount += addrtocopy;
1103			reply.have_answer = 1;
1104			if (reply.data.a.addrcount == MAX_V4_ADDRS) break;
1105		} else if (type == TYPE_PTR && class == CLASS_INET) {
1106			if (req->request_type != TYPE_PTR) {
1107				j += datalength; continue;
1108			}
1109			if (name_parse(packet, length, &j, reply.data.ptr.name,
1110						   sizeof(reply.data.ptr.name))<0)
1111				goto err;
1112			ttl_r = MIN(ttl_r, ttl);
1113			reply.have_answer = 1;
1114			break;
1115		} else if (type == TYPE_CNAME) {
1116			char cname[HOST_NAME_MAX];
1117			if (!req->put_cname_in_ptr || *req->put_cname_in_ptr) {
1118				j += datalength; continue;
1119			}
1120			if (name_parse(packet, length, &j, cname,
1121				sizeof(cname))<0)
1122				goto err;
1123			*req->put_cname_in_ptr = mm_strdup(cname);
1124		} else if (type == TYPE_AAAA && class == CLASS_INET) {
1125			int addrcount, addrtocopy;
1126			if (req->request_type != TYPE_AAAA) {
1127				j += datalength; continue;
1128			}
1129			if ((datalength & 15) != 0) /* not an even number of AAAAs. */
1130				goto err;
1131			addrcount = datalength >> 4;  /* each address is 16 bytes long */
1132			addrtocopy = MIN(MAX_V6_ADDRS - reply.data.aaaa.addrcount, (unsigned)addrcount);
1133			ttl_r = MIN(ttl_r, ttl);
1134
1135			/* we only bother with the first four addresses. */
1136			if (j + 16*addrtocopy > length) goto err;
1137			memcpy(&reply.data.aaaa.addresses[reply.data.aaaa.addrcount],
1138				   packet + j, 16*addrtocopy);
1139			reply.data.aaaa.addrcount += addrtocopy;
1140			j += 16*addrtocopy;
1141			reply.have_answer = 1;
1142			if (reply.data.aaaa.addrcount == MAX_V6_ADDRS) break;
1143		} else {
1144			/* skip over any other type of resource */
1145			j += datalength;
1146		}
1147	}
1148
1149	if (!reply.have_answer) {
1150		for (i = 0; i < authority; ++i) {
1151			u16 type, class;
1152			SKIP_NAME;
1153			GET16(type);
1154			GET16(class);
1155			GET32(ttl);
1156			GET16(datalength);
1157			if (type == TYPE_SOA && class == CLASS_INET) {
1158				u32 serial, refresh, retry, expire, minimum;
1159				SKIP_NAME;
1160				SKIP_NAME;
1161				GET32(serial);
1162				GET32(refresh);
1163				GET32(retry);
1164				GET32(expire);
1165				GET32(minimum);
1166				(void)expire;
1167				(void)retry;
1168				(void)refresh;
1169				(void)serial;
1170				ttl_r = MIN(ttl_r, ttl);
1171				ttl_r = MIN(ttl_r, minimum);
1172			} else {
1173				/* skip over any other type of resource */
1174				j += datalength;
1175			}
1176		}
1177	}
1178
1179	if (ttl_r == 0xffffffff)
1180		ttl_r = 0;
1181
1182	reply_handle(req, flags, ttl_r, &reply);
1183	return 0;
1184 err:
1185	if (req)
1186		reply_handle(req, flags, 0, NULL);
1187	return -1;
1188}
1189
1190/* Parse a raw request (packet,length) sent to a nameserver port (port) from */
1191/* a DNS client (addr,addrlen), and if it's well-formed, call the corresponding */
1192/* callback. */
1193static int
1194request_parse(u8 *packet, int length, struct evdns_server_port *port, struct sockaddr *addr, ev_socklen_t addrlen)
1195{
1196	int j = 0;	/* index into packet */
1197	u16 _t;	 /* used by the macros */
1198	char tmp_name[256]; /* used by the macros */
1199
1200	int i;
1201	u16 trans_id, flags, questions, answers, authority, additional;
1202	struct server_request *server_req = NULL;
1203
1204	ASSERT_LOCKED(port);
1205
1206	/* Get the header fields */
1207	GET16(trans_id);
1208	GET16(flags);
1209	GET16(questions);
1210	GET16(answers);
1211	GET16(authority);
1212	GET16(additional);
1213	(void)answers;
1214	(void)additional;
1215	(void)authority;
1216
1217	if (flags & 0x8000) return -1; /* Must not be an answer. */
1218	flags &= 0x0110; /* Only RD and CD get preserved. */
1219
1220	server_req = mm_malloc(sizeof(struct server_request));
1221	if (server_req == NULL) return -1;
1222	memset(server_req, 0, sizeof(struct server_request));
1223
1224	server_req->trans_id = trans_id;
1225	memcpy(&server_req->addr, addr, addrlen);
1226	server_req->addrlen = addrlen;
1227
1228	server_req->base.flags = flags;
1229	server_req->base.nquestions = 0;
1230	server_req->base.questions = mm_calloc(sizeof(struct evdns_server_question *), questions);
1231	if (server_req->base.questions == NULL)
1232		goto err;
1233
1234	for (i = 0; i < questions; ++i) {
1235		u16 type, class;
1236		struct evdns_server_question *q;
1237		int namelen;
1238		if (name_parse(packet, length, &j, tmp_name, sizeof(tmp_name))<0)
1239			goto err;
1240		GET16(type);
1241		GET16(class);
1242		namelen = (int)strlen(tmp_name);
1243		q = mm_malloc(sizeof(struct evdns_server_question) + namelen);
1244		if (!q)
1245			goto err;
1246		q->type = type;
1247		q->dns_question_class = class;
1248		memcpy(q->name, tmp_name, namelen+1);
1249		server_req->base.questions[server_req->base.nquestions++] = q;
1250	}
1251
1252	/* Ignore answers, authority, and additional. */
1253
1254	server_req->port = port;
1255	port->refcnt++;
1256
1257	/* Only standard queries are supported. */
1258	if (flags & 0x7800) {
1259		evdns_server_request_respond(&(server_req->base), DNS_ERR_NOTIMPL);
1260		return -1;
1261	}
1262
1263	port->user_callback(&(server_req->base), port->user_data);
1264
1265	return 0;
1266err:
1267	if (server_req) {
1268		if (server_req->base.questions) {
1269			for (i = 0; i < server_req->base.nquestions; ++i)
1270				mm_free(server_req->base.questions[i]);
1271			mm_free(server_req->base.questions);
1272		}
1273		mm_free(server_req);
1274	}
1275	return -1;
1276
1277#undef SKIP_NAME
1278#undef GET32
1279#undef GET16
1280#undef GET8
1281}
1282
1283
1284void
1285evdns_set_transaction_id_fn(ev_uint16_t (*fn)(void))
1286{
1287}
1288
1289void
1290evdns_set_random_bytes_fn(void (*fn)(char *, size_t))
1291{
1292}
1293
1294/* Try to choose a strong transaction id which isn't already in flight */
1295static u16
1296transaction_id_pick(struct evdns_base *base) {
1297	ASSERT_LOCKED(base);
1298	for (;;) {
1299		u16 trans_id;
1300		evutil_secure_rng_get_bytes(&trans_id, sizeof(trans_id));
1301
1302		if (trans_id == 0xffff) continue;
1303		/* now check to see if that id is already inflight */
1304		if (request_find_from_trans_id(base, trans_id) == NULL)
1305			return trans_id;
1306	}
1307}
1308
1309/* choose a namesever to use. This function will try to ignore */
1310/* nameservers which we think are down and load balance across the rest */
1311/* by updating the server_head global each time. */
1312static struct nameserver *
1313nameserver_pick(struct evdns_base *base) {
1314	struct nameserver *started_at = base->server_head, *picked;
1315	ASSERT_LOCKED(base);
1316	if (!base->server_head) return NULL;
1317
1318	/* if we don't have any good nameservers then there's no */
1319	/* point in trying to find one. */
1320	if (!base->global_good_nameservers) {
1321		base->server_head = base->server_head->next;
1322		return base->server_head;
1323	}
1324
1325	/* remember that nameservers are in a circular list */
1326	for (;;) {
1327		if (base->server_head->state) {
1328			/* we think this server is currently good */
1329			picked = base->server_head;
1330			base->server_head = base->server_head->next;
1331			return picked;
1332		}
1333
1334		base->server_head = base->server_head->next;
1335		if (base->server_head == started_at) {
1336			/* all the nameservers seem to be down */
1337			/* so we just return this one and hope for the */
1338			/* best */
1339			EVUTIL_ASSERT(base->global_good_nameservers == 0);
1340			picked = base->server_head;
1341			base->server_head = base->server_head->next;
1342			return picked;
1343		}
1344	}
1345}
1346
1347/* this is called when a namesever socket is ready for reading */
1348static void
1349nameserver_read(struct nameserver *ns) {
1350	struct sockaddr_storage ss;
1351	ev_socklen_t addrlen = sizeof(ss);
1352	u8 packet[1500];
1353	char addrbuf[128];
1354	ASSERT_LOCKED(ns->base);
1355
1356	for (;;) {
1357		const int r = recvfrom(ns->socket, (void*)packet,
1358		    sizeof(packet), 0,
1359		    (struct sockaddr*)&ss, &addrlen);
1360		if (r < 0) {
1361			int err = evutil_socket_geterror(ns->socket);
1362			if (EVUTIL_ERR_RW_RETRIABLE(err))
1363				return;
1364			nameserver_failed(ns,
1365			    evutil_socket_error_to_string(err));
1366			return;
1367		}
1368		if (evutil_sockaddr_cmp((struct sockaddr*)&ss,
1369			(struct sockaddr*)&ns->address, 0)) {
1370			log(EVDNS_LOG_WARN, "Address mismatch on received "
1371			    "DNS packet.  Apparent source was %s",
1372			    evutil_format_sockaddr_port(
1373				    (struct sockaddr *)&ss,
1374				    addrbuf, sizeof(addrbuf)));
1375			return;
1376		}
1377
1378		ns->timedout = 0;
1379		reply_parse(ns->base, packet, r);
1380	}
1381}
1382
1383/* Read a packet from a DNS client on a server port s, parse it, and */
1384/* act accordingly. */
1385static void
1386server_port_read(struct evdns_server_port *s) {
1387	u8 packet[1500];
1388	struct sockaddr_storage addr;
1389	ev_socklen_t addrlen;
1390	int r;
1391	ASSERT_LOCKED(s);
1392
1393	for (;;) {
1394		addrlen = sizeof(struct sockaddr_storage);
1395		r = recvfrom(s->socket, (void*)packet, sizeof(packet), 0,
1396					 (struct sockaddr*) &addr, &addrlen);
1397		if (r < 0) {
1398			int err = evutil_socket_geterror(s->socket);
1399			if (EVUTIL_ERR_RW_RETRIABLE(err))
1400				return;
1401			log(EVDNS_LOG_WARN,
1402			    "Error %s (%d) while reading request.",
1403			    evutil_socket_error_to_string(err), err);
1404			return;
1405		}
1406		request_parse(packet, r, s, (struct sockaddr*) &addr, addrlen);
1407	}
1408}
1409
1410/* Try to write all pending replies on a given DNS server port. */
1411static void
1412server_port_flush(struct evdns_server_port *port)
1413{
1414	struct server_request *req = port->pending_replies;
1415	ASSERT_LOCKED(port);
1416	while (req) {
1417		int r = sendto(port->socket, req->response, (int)req->response_len, 0,
1418			   (struct sockaddr*) &req->addr, (ev_socklen_t)req->addrlen);
1419		if (r < 0) {
1420			int err = evutil_socket_geterror(port->socket);
1421			if (EVUTIL_ERR_RW_RETRIABLE(err))
1422				return;
1423			log(EVDNS_LOG_WARN, "Error %s (%d) while writing response to port; dropping", evutil_socket_error_to_string(err), err);
1424		}
1425		if (server_request_free(req)) {
1426			/* we released the last reference to req->port. */
1427			return;
1428		} else {
1429			EVUTIL_ASSERT(req != port->pending_replies);
1430			req = port->pending_replies;
1431		}
1432	}
1433
1434	/* We have no more pending requests; stop listening for 'writeable' events. */
1435	(void) event_del(&port->event);
1436	event_assign(&port->event, port->event_base,
1437				 port->socket, EV_READ | EV_PERSIST,
1438				 server_port_ready_callback, port);
1439
1440	if (event_add(&port->event, NULL) < 0) {
1441		log(EVDNS_LOG_WARN, "Error from libevent when adding event for DNS server.");
1442		/* ???? Do more? */
1443	}
1444}
1445
1446/* set if we are waiting for the ability to write to this server. */
1447/* if waiting is true then we ask libevent for EV_WRITE events, otherwise */
1448/* we stop these events. */
1449static void
1450nameserver_write_waiting(struct nameserver *ns, char waiting) {
1451	ASSERT_LOCKED(ns->base);
1452	if (ns->write_waiting == waiting) return;
1453
1454	ns->write_waiting = waiting;
1455	(void) event_del(&ns->event);
1456	event_assign(&ns->event, ns->base->event_base,
1457	    ns->socket, EV_READ | (waiting ? EV_WRITE : 0) | EV_PERSIST,
1458	    nameserver_ready_callback, ns);
1459	if (event_add(&ns->event, NULL) < 0) {
1460		char addrbuf[128];
1461		log(EVDNS_LOG_WARN, "Error from libevent when adding event for %s",
1462		    evutil_format_sockaddr_port(
1463			    (struct sockaddr *)&ns->address,
1464			    addrbuf, sizeof(addrbuf)));
1465		/* ???? Do more? */
1466	}
1467}
1468
1469/* a callback function. Called by libevent when the kernel says that */
1470/* a nameserver socket is ready for writing or reading */
1471static void
1472nameserver_ready_callback(evutil_socket_t fd, short events, void *arg) {
1473	struct nameserver *ns = (struct nameserver *) arg;
1474	(void)fd;
1475
1476	EVDNS_LOCK(ns->base);
1477	if (events & EV_WRITE) {
1478		ns->choked = 0;
1479		if (!evdns_transmit(ns->base)) {
1480			nameserver_write_waiting(ns, 0);
1481		}
1482	}
1483	if (events & EV_READ) {
1484		nameserver_read(ns);
1485	}
1486	EVDNS_UNLOCK(ns->base);
1487}
1488
1489/* a callback function. Called by libevent when the kernel says that */
1490/* a server socket is ready for writing or reading. */
1491static void
1492server_port_ready_callback(evutil_socket_t fd, short events, void *arg) {
1493	struct evdns_server_port *port = (struct evdns_server_port *) arg;
1494	(void) fd;
1495
1496	EVDNS_LOCK(port);
1497	if (events & EV_WRITE) {
1498		port->choked = 0;
1499		server_port_flush(port);
1500	}
1501	if (events & EV_READ) {
1502		server_port_read(port);
1503	}
1504	EVDNS_UNLOCK(port);
1505}
1506
1507/* This is an inefficient representation; only use it via the dnslabel_table_*
1508 * functions, so that is can be safely replaced with something smarter later. */
1509#define MAX_LABELS 128
1510/* Structures used to implement name compression */
1511struct dnslabel_entry { char *v; off_t pos; };
1512struct dnslabel_table {
1513	int n_labels; /* number of current entries */
1514	/* map from name to position in message */
1515	struct dnslabel_entry labels[MAX_LABELS];
1516};
1517
1518/* Initialize dnslabel_table. */
1519static void
1520dnslabel_table_init(struct dnslabel_table *table)
1521{
1522	table->n_labels = 0;
1523}
1524
1525/* Free all storage held by table, but not the table itself. */
1526static void
1527dnslabel_clear(struct dnslabel_table *table)
1528{
1529	int i;
1530	for (i = 0; i < table->n_labels; ++i)
1531		mm_free(table->labels[i].v);
1532	table->n_labels = 0;
1533}
1534
1535/* return the position of the label in the current message, or -1 if the label */
1536/* hasn't been used yet. */
1537static int
1538dnslabel_table_get_pos(const struct dnslabel_table *table, const char *label)
1539{
1540	int i;
1541	for (i = 0; i < table->n_labels; ++i) {
1542		if (!strcmp(label, table->labels[i].v))
1543			return table->labels[i].pos;
1544	}
1545	return -1;
1546}
1547
1548/* remember that we've used the label at position pos */
1549static int
1550dnslabel_table_add(struct dnslabel_table *table, const char *label, off_t pos)
1551{
1552	char *v;
1553	int p;
1554	if (table->n_labels == MAX_LABELS)
1555		return (-1);
1556	v = mm_strdup(label);
1557	if (v == NULL)
1558		return (-1);
1559	p = table->n_labels++;
1560	table->labels[p].v = v;
1561	table->labels[p].pos = pos;
1562
1563	return (0);
1564}
1565
1566/* Converts a string to a length-prefixed set of DNS labels, starting */
1567/* at buf[j]. name and buf must not overlap. name_len should be the length */
1568/* of name.	 table is optional, and is used for compression. */
1569/* */
1570/* Input: abc.def */
1571/* Output: <3>abc<3>def<0> */
1572/* */
1573/* Returns the first index after the encoded name, or negative on error. */
1574/*	 -1	 label was > 63 bytes */
1575/*	 -2	 name too long to fit in buffer. */
1576/* */
1577static off_t
1578dnsname_to_labels(u8 *const buf, size_t buf_len, off_t j,
1579				  const char *name, const size_t name_len,
1580				  struct dnslabel_table *table) {
1581	const char *end = name + name_len;
1582	int ref = 0;
1583	u16 _t;
1584
1585#define APPEND16(x) do {						\
1586		if (j + 2 > (off_t)buf_len)				\
1587			goto overflow;					\
1588		_t = htons(x);						\
1589		memcpy(buf + j, &_t, 2);				\
1590		j += 2;							\
1591	} while (0)
1592#define APPEND32(x) do {						\
1593		if (j + 4 > (off_t)buf_len)				\
1594			goto overflow;					\
1595		_t32 = htonl(x);					\
1596		memcpy(buf + j, &_t32, 4);				\
1597		j += 4;							\
1598	} while (0)
1599
1600	if (name_len > 255) return -2;
1601
1602	for (;;) {
1603		const char *const start = name;
1604		if (table && (ref = dnslabel_table_get_pos(table, name)) >= 0) {
1605			APPEND16(ref | 0xc000);
1606			return j;
1607		}
1608		name = strchr(name, '.');
1609		if (!name) {
1610			const size_t label_len = end - start;
1611			if (label_len > 63) return -1;
1612			if ((size_t)(j+label_len+1) > buf_len) return -2;
1613			if (table) dnslabel_table_add(table, start, j);
1614			buf[j++] = (ev_uint8_t)label_len;
1615
1616			memcpy(buf + j, start, label_len);
1617			j += (int) label_len;
1618			break;
1619		} else {
1620			/* append length of the label. */
1621			const size_t label_len = name - start;
1622			if (label_len > 63) return -1;
1623			if ((size_t)(j+label_len+1) > buf_len) return -2;
1624			if (table) dnslabel_table_add(table, start, j);
1625			buf[j++] = (ev_uint8_t)label_len;
1626
1627			memcpy(buf + j, start, label_len);
1628			j += (int) label_len;
1629			/* hop over the '.' */
1630			name++;
1631		}
1632	}
1633
1634	/* the labels must be terminated by a 0. */
1635	/* It's possible that the name ended in a . */
1636	/* in which case the zero is already there */
1637	if (!j || buf[j-1]) buf[j++] = 0;
1638	return j;
1639 overflow:
1640	return (-2);
1641}
1642
1643/* Finds the length of a dns request for a DNS name of the given */
1644/* length. The actual request may be smaller than the value returned */
1645/* here */
1646static size_t
1647evdns_request_len(const size_t name_len) {
1648	return 96 + /* length of the DNS standard header */
1649		name_len + 2 +
1650		4;  /* space for the resource type */
1651}
1652
1653/* build a dns request packet into buf. buf should be at least as long */
1654/* as evdns_request_len told you it should be. */
1655/* */
1656/* Returns the amount of space used. Negative on error. */
1657static int
1658evdns_request_data_build(const char *const name, const size_t name_len,
1659    const u16 trans_id, const u16 type, const u16 class,
1660    u8 *const buf, size_t buf_len) {
1661	off_t j = 0;  /* current offset into buf */
1662	u16 _t;	 /* used by the macros */
1663
1664	APPEND16(trans_id);
1665	APPEND16(0x0100);  /* standard query, recusion needed */
1666	APPEND16(1);  /* one question */
1667	APPEND16(0);  /* no answers */
1668	APPEND16(0);  /* no authority */
1669	APPEND16(0);  /* no additional */
1670
1671	j = dnsname_to_labels(buf, buf_len, j, name, name_len, NULL);
1672	if (j < 0) {
1673		return (int)j;
1674	}
1675
1676	APPEND16(type);
1677	APPEND16(class);
1678
1679	return (int)j;
1680 overflow:
1681	return (-1);
1682}
1683
1684/* exported function */
1685struct evdns_server_port *
1686evdns_add_server_port_with_base(struct event_base *base, evutil_socket_t socket, int flags, evdns_request_callback_fn_type cb, void *user_data)
1687{
1688	struct evdns_server_port *port;
1689	if (flags)
1690		return NULL; /* flags not yet implemented */
1691	if (!(port = mm_malloc(sizeof(struct evdns_server_port))))
1692		return NULL;
1693	memset(port, 0, sizeof(struct evdns_server_port));
1694
1695
1696	port->socket = socket;
1697	port->refcnt = 1;
1698	port->choked = 0;
1699	port->closing = 0;
1700	port->user_callback = cb;
1701	port->user_data = user_data;
1702	port->pending_replies = NULL;
1703	port->event_base = base;
1704
1705	event_assign(&port->event, port->event_base,
1706				 port->socket, EV_READ | EV_PERSIST,
1707				 server_port_ready_callback, port);
1708	if (event_add(&port->event, NULL) < 0) {
1709		mm_free(port);
1710		return NULL;
1711	}
1712	EVTHREAD_ALLOC_LOCK(port->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
1713	return port;
1714}
1715
1716struct evdns_server_port *
1717evdns_add_server_port(evutil_socket_t socket, int flags, evdns_request_callback_fn_type cb, void *user_data)
1718{
1719	return evdns_add_server_port_with_base(NULL, socket, flags, cb, user_data);
1720}
1721
1722/* exported function */
1723void
1724evdns_close_server_port(struct evdns_server_port *port)
1725{
1726	EVDNS_LOCK(port);
1727	if (--port->refcnt == 0) {
1728		EVDNS_UNLOCK(port);
1729		server_port_free(port);
1730	} else {
1731		port->closing = 1;
1732	}
1733}
1734
1735/* exported function */
1736int
1737evdns_server_request_add_reply(struct evdns_server_request *_req, int section, const char *name, int type, int class, int ttl, int datalen, int is_name, const char *data)
1738{
1739	struct server_request *req = TO_SERVER_REQUEST(_req);
1740	struct server_reply_item **itemp, *item;
1741	int *countp;
1742	int result = -1;
1743
1744	EVDNS_LOCK(req->port);
1745	if (req->response) /* have we already answered? */
1746		goto done;
1747
1748	switch (section) {
1749	case EVDNS_ANSWER_SECTION:
1750		itemp = &req->answer;
1751		countp = &req->n_answer;
1752		break;
1753	case EVDNS_AUTHORITY_SECTION:
1754		itemp = &req->authority;
1755		countp = &req->n_authority;
1756		break;
1757	case EVDNS_ADDITIONAL_SECTION:
1758		itemp = &req->additional;
1759		countp = &req->n_additional;
1760		break;
1761	default:
1762		goto done;
1763	}
1764	while (*itemp) {
1765		itemp = &((*itemp)->next);
1766	}
1767	item = mm_malloc(sizeof(struct server_reply_item));
1768	if (!item)
1769		goto done;
1770	item->next = NULL;
1771	if (!(item->name = mm_strdup(name))) {
1772		mm_free(item);
1773		goto done;
1774	}
1775	item->type = type;
1776	item->dns_question_class = class;
1777	item->ttl = ttl;
1778	item->is_name = is_name != 0;
1779	item->datalen = 0;
1780	item->data = NULL;
1781	if (data) {
1782		if (item->is_name) {
1783			if (!(item->data = mm_strdup(data))) {
1784				mm_free(item->name);
1785				mm_free(item);
1786				goto done;
1787			}
1788			item->datalen = (u16)-1;
1789		} else {
1790			if (!(item->data = mm_malloc(datalen))) {
1791				mm_free(item->name);
1792				mm_free(item);
1793				goto done;
1794			}
1795			item->datalen = datalen;
1796			memcpy(item->data, data, datalen);
1797		}
1798	}
1799
1800	*itemp = item;
1801	++(*countp);
1802	result = 0;
1803done:
1804	EVDNS_UNLOCK(req->port);
1805	return result;
1806}
1807
1808/* exported function */
1809int
1810evdns_server_request_add_a_reply(struct evdns_server_request *req, const char *name, int n, const void *addrs, int ttl)
1811{
1812	return evdns_server_request_add_reply(
1813		  req, EVDNS_ANSWER_SECTION, name, TYPE_A, CLASS_INET,
1814		  ttl, n*4, 0, addrs);
1815}
1816
1817/* exported function */
1818int
1819evdns_server_request_add_aaaa_reply(struct evdns_server_request *req, const char *name, int n, const void *addrs, int ttl)
1820{
1821	return evdns_server_request_add_reply(
1822		  req, EVDNS_ANSWER_SECTION, name, TYPE_AAAA, CLASS_INET,
1823		  ttl, n*16, 0, addrs);
1824}
1825
1826/* exported function */
1827int
1828evdns_server_request_add_ptr_reply(struct evdns_server_request *req, struct in_addr *in, const char *inaddr_name, const char *hostname, int ttl)
1829{
1830	u32 a;
1831	char buf[32];
1832	if (in && inaddr_name)
1833		return -1;
1834	else if (!in && !inaddr_name)
1835		return -1;
1836	if (in) {
1837		a = ntohl(in->s_addr);
1838		evutil_snprintf(buf, sizeof(buf), "%d.%d.%d.%d.in-addr.arpa",
1839				(int)(u8)((a	)&0xff),
1840				(int)(u8)((a>>8 )&0xff),
1841				(int)(u8)((a>>16)&0xff),
1842				(int)(u8)((a>>24)&0xff));
1843		inaddr_name = buf;
1844	}
1845	return evdns_server_request_add_reply(
1846		  req, EVDNS_ANSWER_SECTION, inaddr_name, TYPE_PTR, CLASS_INET,
1847		  ttl, -1, 1, hostname);
1848}
1849
1850/* exported function */
1851int
1852evdns_server_request_add_cname_reply(struct evdns_server_request *req, const char *name, const char *cname, int ttl)
1853{
1854	return evdns_server_request_add_reply(
1855		  req, EVDNS_ANSWER_SECTION, name, TYPE_CNAME, CLASS_INET,
1856		  ttl, -1, 1, cname);
1857}
1858
1859/* exported function */
1860void
1861evdns_server_request_set_flags(struct evdns_server_request *exreq, int flags)
1862{
1863	struct server_request *req = TO_SERVER_REQUEST(exreq);
1864	req->base.flags &= ~(EVDNS_FLAGS_AA|EVDNS_FLAGS_RD);
1865	req->base.flags |= flags;
1866}
1867
1868static int
1869evdns_server_request_format_response(struct server_request *req, int err)
1870{
1871	unsigned char buf[1500];
1872	size_t buf_len = sizeof(buf);
1873	off_t j = 0, r;
1874	u16 _t;
1875	u32 _t32;
1876	int i;
1877	u16 flags;
1878	struct dnslabel_table table;
1879
1880	if (err < 0 || err > 15) return -1;
1881
1882	/* Set response bit and error code; copy OPCODE and RD fields from
1883	 * question; copy RA and AA if set by caller. */
1884	flags = req->base.flags;
1885	flags |= (0x8000 | err);
1886
1887	dnslabel_table_init(&table);
1888	APPEND16(req->trans_id);
1889	APPEND16(flags);
1890	APPEND16(req->base.nquestions);
1891	APPEND16(req->n_answer);
1892	APPEND16(req->n_authority);
1893	APPEND16(req->n_additional);
1894
1895	/* Add questions. */
1896	for (i=0; i < req->base.nquestions; ++i) {
1897		const char *s = req->base.questions[i]->name;
1898		j = dnsname_to_labels(buf, buf_len, j, s, strlen(s), &table);
1899		if (j < 0) {
1900			dnslabel_clear(&table);
1901			return (int) j;
1902		}
1903		APPEND16(req->base.questions[i]->type);
1904		APPEND16(req->base.questions[i]->dns_question_class);
1905	}
1906
1907	/* Add answer, authority, and additional sections. */
1908	for (i=0; i<3; ++i) {
1909		struct server_reply_item *item;
1910		if (i==0)
1911			item = req->answer;
1912		else if (i==1)
1913			item = req->authority;
1914		else
1915			item = req->additional;
1916		while (item) {
1917			r = dnsname_to_labels(buf, buf_len, j, item->name, strlen(item->name), &table);
1918			if (r < 0)
1919				goto overflow;
1920			j = r;
1921
1922			APPEND16(item->type);
1923			APPEND16(item->dns_question_class);
1924			APPEND32(item->ttl);
1925			if (item->is_name) {
1926				off_t len_idx = j, name_start;
1927				j += 2;
1928				name_start = j;
1929				r = dnsname_to_labels(buf, buf_len, j, item->data, strlen(item->data), &table);
1930				if (r < 0)
1931					goto overflow;
1932				j = r;
1933				_t = htons( (short) (j-name_start) );
1934				memcpy(buf+len_idx, &_t, 2);
1935			} else {
1936				APPEND16(item->datalen);
1937				if (j+item->datalen > (off_t)buf_len)
1938					goto overflow;
1939				memcpy(buf+j, item->data, item->datalen);
1940				j += item->datalen;
1941			}
1942			item = item->next;
1943		}
1944	}
1945
1946	if (j > 512) {
1947overflow:
1948		j = 512;
1949		buf[2] |= 0x02; /* set the truncated bit. */
1950	}
1951
1952	req->response_len = j;
1953
1954	if (!(req->response = mm_malloc(req->response_len))) {
1955		server_request_free_answers(req);
1956		dnslabel_clear(&table);
1957		return (-1);
1958	}
1959	memcpy(req->response, buf, req->response_len);
1960	server_request_free_answers(req);
1961	dnslabel_clear(&table);
1962	return (0);
1963}
1964
1965/* exported function */
1966int
1967evdns_server_request_respond(struct evdns_server_request *_req, int err)
1968{
1969	struct server_request *req = TO_SERVER_REQUEST(_req);
1970	struct evdns_server_port *port = req->port;
1971	int r = -1;
1972
1973	EVDNS_LOCK(port);
1974	if (!req->response) {
1975		if ((r = evdns_server_request_format_response(req, err))<0)
1976			goto done;
1977	}
1978
1979	r = sendto(port->socket, req->response, (int)req->response_len, 0,
1980			   (struct sockaddr*) &req->addr, (ev_socklen_t)req->addrlen);
1981	if (r<0) {
1982		int sock_err = evutil_socket_geterror(port->socket);
1983		if (EVUTIL_ERR_RW_RETRIABLE(sock_err))
1984			goto done;
1985
1986		if (port->pending_replies) {
1987			req->prev_pending = port->pending_replies->prev_pending;
1988			req->next_pending = port->pending_replies;
1989			req->prev_pending->next_pending =
1990				req->next_pending->prev_pending = req;
1991		} else {
1992			req->prev_pending = req->next_pending = req;
1993			port->pending_replies = req;
1994			port->choked = 1;
1995
1996			(void) event_del(&port->event);
1997			event_assign(&port->event, port->event_base, port->socket, (port->closing?0:EV_READ) | EV_WRITE | EV_PERSIST, server_port_ready_callback, port);
1998
1999			if (event_add(&port->event, NULL) < 0) {
2000				log(EVDNS_LOG_WARN, "Error from libevent when adding event for DNS server");
2001			}
2002
2003		}
2004
2005		r = 1;
2006		goto done;
2007	}
2008	if (server_request_free(req)) {
2009		r = 0;
2010		goto done;
2011	}
2012
2013	if (port->pending_replies)
2014		server_port_flush(port);
2015
2016	r = 0;
2017done:
2018	EVDNS_UNLOCK(port);
2019	return r;
2020}
2021
2022/* Free all storage held by RRs in req. */
2023static void
2024server_request_free_answers(struct server_request *req)
2025{
2026	struct server_reply_item *victim, *next, **list;
2027	int i;
2028	for (i = 0; i < 3; ++i) {
2029		if (i==0)
2030			list = &req->answer;
2031		else if (i==1)
2032			list = &req->authority;
2033		else
2034			list = &req->additional;
2035
2036		victim = *list;
2037		while (victim) {
2038			next = victim->next;
2039			mm_free(victim->name);
2040			if (victim->data)
2041				mm_free(victim->data);
2042			mm_free(victim);
2043			victim = next;
2044		}
2045		*list = NULL;
2046	}
2047}
2048
2049/* Free all storage held by req, and remove links to it. */
2050/* return true iff we just wound up freeing the server_port. */
2051static int
2052server_request_free(struct server_request *req)
2053{
2054	int i, rc=1, lock=0;
2055	if (req->base.questions) {
2056		for (i = 0; i < req->base.nquestions; ++i)
2057			mm_free(req->base.questions[i]);
2058		mm_free(req->base.questions);
2059	}
2060
2061	if (req->port) {
2062		EVDNS_LOCK(req->port);
2063		lock=1;
2064		if (req->port->pending_replies == req) {
2065			if (req->next_pending && req->next_pending != req)
2066				req->port->pending_replies = req->next_pending;
2067			else
2068				req->port->pending_replies = NULL;
2069		}
2070		rc = --req->port->refcnt;
2071	}
2072
2073	if (req->response) {
2074		mm_free(req->response);
2075	}
2076
2077	server_request_free_answers(req);
2078
2079	if (req->next_pending && req->next_pending != req) {
2080		req->next_pending->prev_pending = req->prev_pending;
2081		req->prev_pending->next_pending = req->next_pending;
2082	}
2083
2084	if (rc == 0) {
2085		EVDNS_UNLOCK(req->port); /* ????? nickm */
2086		server_port_free(req->port);
2087		mm_free(req);
2088		return (1);
2089	}
2090	if (lock)
2091		EVDNS_UNLOCK(req->port);
2092	mm_free(req);
2093	return (0);
2094}
2095
2096/* Free all storage held by an evdns_server_port.  Only called when  */
2097static void
2098server_port_free(struct evdns_server_port *port)
2099{
2100	EVUTIL_ASSERT(port);
2101	EVUTIL_ASSERT(!port->refcnt);
2102	EVUTIL_ASSERT(!port->pending_replies);
2103	if (port->socket > 0) {
2104		evutil_closesocket(port->socket);
2105		port->socket = -1;
2106	}
2107	(void) event_del(&port->event);
2108	event_debug_unassign(&port->event);
2109	EVTHREAD_FREE_LOCK(port->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
2110	mm_free(port);
2111}
2112
2113/* exported function */
2114int
2115evdns_server_request_drop(struct evdns_server_request *_req)
2116{
2117	struct server_request *req = TO_SERVER_REQUEST(_req);
2118	server_request_free(req);
2119	return 0;
2120}
2121
2122/* exported function */
2123int
2124evdns_server_request_get_requesting_addr(struct evdns_server_request *_req, struct sockaddr *sa, int addr_len)
2125{
2126	struct server_request *req = TO_SERVER_REQUEST(_req);
2127	if (addr_len < (int)req->addrlen)
2128		return -1;
2129	memcpy(sa, &(req->addr), req->addrlen);
2130	return req->addrlen;
2131}
2132
2133#undef APPEND16
2134#undef APPEND32
2135
2136/* this is a libevent callback function which is called when a request */
2137/* has timed out. */
2138static void
2139evdns_request_timeout_callback(evutil_socket_t fd, short events, void *arg) {
2140	struct request *const req = (struct request *) arg;
2141	struct evdns_base *base = req->base;
2142
2143	(void) fd;
2144	(void) events;
2145
2146	log(EVDNS_LOG_DEBUG, "Request %p timed out", arg);
2147	EVDNS_LOCK(base);
2148
2149	req->ns->timedout++;
2150	if (req->ns->timedout > req->base->global_max_nameserver_timeout) {
2151		req->ns->timedout = 0;
2152		nameserver_failed(req->ns, "request timed out.");
2153	}
2154
2155	if (req->tx_count >= req->base->global_max_retransmits) {
2156		/* this request has failed */
2157		log(EVDNS_LOG_DEBUG, "Giving up on request %p; tx_count==%d",
2158		    arg, req->tx_count);
2159		reply_schedule_callback(req, 0, DNS_ERR_TIMEOUT, NULL);
2160		request_finished(req, &REQ_HEAD(req->base, req->trans_id), 1);
2161	} else {
2162		/* retransmit it */
2163		struct nameserver *new_ns;
2164		log(EVDNS_LOG_DEBUG, "Retransmitting request %p; tx_count==%d",
2165		    arg, req->tx_count);
2166		(void) evtimer_del(&req->timeout_event);
2167		new_ns = nameserver_pick(base);
2168		if (new_ns)
2169			req->ns = new_ns;
2170		evdns_request_transmit(req);
2171	}
2172	EVDNS_UNLOCK(base);
2173}
2174
2175/* try to send a request to a given server. */
2176/* */
2177/* return: */
2178/*   0 ok */
2179/*   1 temporary failure */
2180/*   2 other failure */
2181static int
2182evdns_request_transmit_to(struct request *req, struct nameserver *server) {
2183	int r;
2184	ASSERT_LOCKED(req->base);
2185	ASSERT_VALID_REQUEST(req);
2186	r = sendto(server->socket, (void*)req->request, req->request_len, 0,
2187	    (struct sockaddr *)&server->address, server->addrlen);
2188	if (r < 0) {
2189		int err = evutil_socket_geterror(server->socket);
2190		if (EVUTIL_ERR_RW_RETRIABLE(err))
2191			return 1;
2192		nameserver_failed(req->ns, evutil_socket_error_to_string(err));
2193		return 2;
2194	} else if (r != (int)req->request_len) {
2195		return 1;  /* short write */
2196	} else {
2197		return 0;
2198	}
2199}
2200
2201/* try to send a request, updating the fields of the request */
2202/* as needed */
2203/* */
2204/* return: */
2205/*   0 ok */
2206/*   1 failed */
2207static int
2208evdns_request_transmit(struct request *req) {
2209	int retcode = 0, r;
2210
2211	ASSERT_LOCKED(req->base);
2212	ASSERT_VALID_REQUEST(req);
2213	/* if we fail to send this packet then this flag marks it */
2214	/* for evdns_transmit */
2215	req->transmit_me = 1;
2216	EVUTIL_ASSERT(req->trans_id != 0xffff);
2217
2218	if (req->ns->choked) {
2219		/* don't bother trying to write to a socket */
2220		/* which we have had EAGAIN from */
2221		return 1;
2222	}
2223
2224	r = evdns_request_transmit_to(req, req->ns);
2225	switch (r) {
2226	case 1:
2227		/* temp failure */
2228		req->ns->choked = 1;
2229		nameserver_write_waiting(req->ns, 1);
2230		return 1;
2231	case 2:
2232		/* failed to transmit the request entirely. */
2233		retcode = 1;
2234		/* fall through: we'll set a timeout, which will time out,
2235		 * and make us retransmit the request anyway. */
2236	default:
2237		/* all ok */
2238		log(EVDNS_LOG_DEBUG,
2239		    "Setting timeout for request %p, sent to nameserver %p", req, req->ns);
2240		if (evtimer_add(&req->timeout_event, &req->base->global_timeout) < 0) {
2241			log(EVDNS_LOG_WARN,
2242		      "Error from libevent when adding timer for request %p",
2243			    req);
2244			/* ???? Do more? */
2245		}
2246		req->tx_count++;
2247		req->transmit_me = 0;
2248		return retcode;
2249	}
2250}
2251
2252static void
2253nameserver_probe_callback(int result, char type, int count, int ttl, void *addresses, void *arg) {
2254	struct nameserver *const ns = (struct nameserver *) arg;
2255	(void) type;
2256	(void) count;
2257	(void) ttl;
2258	(void) addresses;
2259
2260	if (result == DNS_ERR_CANCEL) {
2261		/* We canceled this request because the nameserver came up
2262		 * for some other reason.  Do not change our opinion about
2263		 * the nameserver. */
2264		return;
2265	}
2266
2267	EVDNS_LOCK(ns->base);
2268	ns->probe_request = NULL;
2269	if (result == DNS_ERR_NONE || result == DNS_ERR_NOTEXIST) {
2270		/* this is a good reply */
2271		nameserver_up(ns);
2272	} else {
2273		nameserver_probe_failed(ns);
2274	}
2275	EVDNS_UNLOCK(ns->base);
2276}
2277
2278static void
2279nameserver_send_probe(struct nameserver *const ns) {
2280	struct evdns_request *handle;
2281	struct request *req;
2282	char addrbuf[128];
2283	/* here we need to send a probe to a given nameserver */
2284	/* in the hope that it is up now. */
2285
2286	ASSERT_LOCKED(ns->base);
2287	log(EVDNS_LOG_DEBUG, "Sending probe to %s",
2288	    evutil_format_sockaddr_port(
2289		    (struct sockaddr *)&ns->address,
2290		    addrbuf, sizeof(addrbuf)));
2291	handle = mm_calloc(1, sizeof(*handle));
2292	if (!handle) return;
2293	req = request_new(ns->base, handle, TYPE_A, "google.com", DNS_QUERY_NO_SEARCH, nameserver_probe_callback, ns);
2294	if (!req) return;
2295	ns->probe_request = handle;
2296	/* we force this into the inflight queue no matter what */
2297	request_trans_id_set(req, transaction_id_pick(ns->base));
2298	req->ns = ns;
2299	request_submit(req);
2300}
2301
2302/* returns: */
2303/*   0 didn't try to transmit anything */
2304/*   1 tried to transmit something */
2305static int
2306evdns_transmit(struct evdns_base *base) {
2307	char did_try_to_transmit = 0;
2308	int i;
2309
2310	ASSERT_LOCKED(base);
2311	for (i = 0; i < base->n_req_heads; ++i) {
2312		if (base->req_heads[i]) {
2313			struct request *const started_at = base->req_heads[i], *req = started_at;
2314			/* first transmit all the requests which are currently waiting */
2315			do {
2316				if (req->transmit_me) {
2317					did_try_to_transmit = 1;
2318					evdns_request_transmit(req);
2319				}
2320
2321				req = req->next;
2322			} while (req != started_at);
2323		}
2324	}
2325
2326	return did_try_to_transmit;
2327}
2328
2329/* exported function */
2330int
2331evdns_base_count_nameservers(struct evdns_base *base)
2332{
2333	const struct nameserver *server;
2334	int n = 0;
2335
2336	EVDNS_LOCK(base);
2337	server = base->server_head;
2338	if (!server)
2339		goto done;
2340	do {
2341		++n;
2342		server = server->next;
2343	} while (server != base->server_head);
2344done:
2345	EVDNS_UNLOCK(base);
2346	return n;
2347}
2348
2349int
2350evdns_count_nameservers(void)
2351{
2352	return evdns_base_count_nameservers(current_base);
2353}
2354
2355/* exported function */
2356int
2357evdns_base_clear_nameservers_and_suspend(struct evdns_base *base)
2358{
2359	struct nameserver *server, *started_at;
2360	int i;
2361
2362	EVDNS_LOCK(base);
2363	server = base->server_head;
2364	started_at = base->server_head;
2365	if (!server) {
2366		EVDNS_UNLOCK(base);
2367		return 0;
2368	}
2369	while (1) {
2370		struct nameserver *next = server->next;
2371		(void) event_del(&server->event);
2372		if (evtimer_initialized(&server->timeout_event))
2373			(void) evtimer_del(&server->timeout_event);
2374		if (server->probe_request) {
2375			evdns_cancel_request(server->base, server->probe_request);
2376			server->probe_request = NULL;
2377		}
2378		if (server->socket >= 0)
2379			evutil_closesocket(server->socket);
2380		mm_free(server);
2381		if (next == started_at)
2382			break;
2383		server = next;
2384	}
2385	base->server_head = NULL;
2386	base->global_good_nameservers = 0;
2387
2388	for (i = 0; i < base->n_req_heads; ++i) {
2389		struct request *req, *req_started_at;
2390		req = req_started_at = base->req_heads[i];
2391		while (req) {
2392			struct request *next = req->next;
2393			req->tx_count = req->reissue_count = 0;
2394			req->ns = NULL;
2395			/* ???? What to do about searches? */
2396			(void) evtimer_del(&req->timeout_event);
2397			req->trans_id = 0;
2398			req->transmit_me = 0;
2399
2400			base->global_requests_waiting++;
2401			evdns_request_insert(req, &base->req_waiting_head);
2402			/* We want to insert these suspended elements at the front of
2403			 * the waiting queue, since they were pending before any of
2404			 * the waiting entries were added.  This is a circular list,
2405			 * so we can just shift the start back by one.*/
2406			base->req_waiting_head = base->req_waiting_head->prev;
2407
2408			if (next == req_started_at)
2409				break;
2410			req = next;
2411		}
2412		base->req_heads[i] = NULL;
2413	}
2414
2415	base->global_requests_inflight = 0;
2416
2417	EVDNS_UNLOCK(base);
2418	return 0;
2419}
2420
2421int
2422evdns_clear_nameservers_and_suspend(void)
2423{
2424	return evdns_base_clear_nameservers_and_suspend(current_base);
2425}
2426
2427
2428/* exported function */
2429int
2430evdns_base_resume(struct evdns_base *base)
2431{
2432	EVDNS_LOCK(base);
2433	evdns_requests_pump_waiting_queue(base);
2434	EVDNS_UNLOCK(base);
2435	return 0;
2436}
2437
2438int
2439evdns_resume(void)
2440{
2441	return evdns_base_resume(current_base);
2442}
2443
2444static int
2445_evdns_nameserver_add_impl(struct evdns_base *base, const struct sockaddr *address, int addrlen) {
2446	/* first check to see if we already have this nameserver */
2447
2448	const struct nameserver *server = base->server_head, *const started_at = base->server_head;
2449	struct nameserver *ns;
2450	int err = 0;
2451	char addrbuf[128];
2452
2453	ASSERT_LOCKED(base);
2454	if (server) {
2455		do {
2456			if (!evutil_sockaddr_cmp((struct sockaddr*)&server->address, address, 1)) return 3;
2457			server = server->next;
2458		} while (server != started_at);
2459	}
2460	if (addrlen > (int)sizeof(ns->address)) {
2461		log(EVDNS_LOG_DEBUG, "Addrlen %d too long.", (int)addrlen);
2462		return 2;
2463	}
2464
2465	ns = (struct nameserver *) mm_malloc(sizeof(struct nameserver));
2466	if (!ns) return -1;
2467
2468	memset(ns, 0, sizeof(struct nameserver));
2469	ns->base = base;
2470
2471	evtimer_assign(&ns->timeout_event, ns->base->event_base, nameserver_prod_callback, ns);
2472
2473	ns->socket = socket(address->sa_family, SOCK_DGRAM, 0);
2474	if (ns->socket < 0) { err = 1; goto out1; }
2475	evutil_make_socket_closeonexec(ns->socket);
2476	evutil_make_socket_nonblocking(ns->socket);
2477
2478	if (base->global_outgoing_addrlen &&
2479	    !evutil_sockaddr_is_loopback(address)) {
2480		if (bind(ns->socket,
2481			(struct sockaddr*)&base->global_outgoing_address,
2482			base->global_outgoing_addrlen) < 0) {
2483			log(EVDNS_LOG_WARN,"Couldn't bind to outgoing address");
2484			err = 2;
2485			goto out2;
2486		}
2487	}
2488
2489	memcpy(&ns->address, address, addrlen);
2490	ns->addrlen = addrlen;
2491	ns->state = 1;
2492	event_assign(&ns->event, ns->base->event_base, ns->socket, EV_READ | EV_PERSIST, nameserver_ready_callback, ns);
2493	if (event_add(&ns->event, NULL) < 0) {
2494		err = 2;
2495		goto out2;
2496	}
2497
2498	log(EVDNS_LOG_DEBUG, "Added nameserver %s as %p",
2499	    evutil_format_sockaddr_port(address, addrbuf, sizeof(addrbuf)), ns);
2500
2501	/* insert this nameserver into the list of them */
2502	if (!base->server_head) {
2503		ns->next = ns->prev = ns;
2504		base->server_head = ns;
2505	} else {
2506		ns->next = base->server_head->next;
2507		ns->prev = base->server_head;
2508		base->server_head->next = ns;
2509		ns->next->prev = ns;
2510	}
2511
2512	base->global_good_nameservers++;
2513
2514	return 0;
2515
2516out2:
2517	evutil_closesocket(ns->socket);
2518out1:
2519	event_debug_unassign(&ns->event);
2520	mm_free(ns);
2521	log(EVDNS_LOG_WARN, "Unable to add nameserver %s: error %d",
2522	    evutil_format_sockaddr_port(address, addrbuf, sizeof(addrbuf)), err);
2523	return err;
2524}
2525
2526/* exported function */
2527int
2528evdns_base_nameserver_add(struct evdns_base *base, unsigned long int address)
2529{
2530	struct sockaddr_in sin;
2531	int res;
2532	memset(&sin, 0, sizeof(sin));
2533	sin.sin_addr.s_addr = address;
2534	sin.sin_port = htons(53);
2535	sin.sin_family = AF_INET;
2536	EVDNS_LOCK(base);
2537	res = _evdns_nameserver_add_impl(base, (struct sockaddr*)&sin, sizeof(sin));
2538	EVDNS_UNLOCK(base);
2539	return res;
2540}
2541
2542int
2543evdns_nameserver_add(unsigned long int address) {
2544	if (!current_base)
2545		current_base = evdns_base_new(NULL, 0);
2546	return evdns_base_nameserver_add(current_base, address);
2547}
2548
2549static void
2550sockaddr_setport(struct sockaddr *sa, ev_uint16_t port)
2551{
2552	if (sa->sa_family == AF_INET) {
2553		((struct sockaddr_in *)sa)->sin_port = htons(port);
2554	} else if (sa->sa_family == AF_INET6) {
2555		((struct sockaddr_in6 *)sa)->sin6_port = htons(port);
2556	}
2557}
2558
2559static ev_uint16_t
2560sockaddr_getport(struct sockaddr *sa)
2561{
2562	if (sa->sa_family == AF_INET) {
2563		return ntohs(((struct sockaddr_in *)sa)->sin_port);
2564	} else if (sa->sa_family == AF_INET6) {
2565		return ntohs(((struct sockaddr_in6 *)sa)->sin6_port);
2566	} else {
2567		return 0;
2568	}
2569}
2570
2571/* exported function */
2572int
2573evdns_base_nameserver_ip_add(struct evdns_base *base, const char *ip_as_string) {
2574	struct sockaddr_storage ss;
2575	struct sockaddr *sa;
2576	int len = sizeof(ss);
2577	int res;
2578	if (evutil_parse_sockaddr_port(ip_as_string, (struct sockaddr *)&ss,
2579		&len)) {
2580		log(EVDNS_LOG_WARN, "Unable to parse nameserver address %s",
2581			ip_as_string);
2582		return 4;
2583	}
2584	sa = (struct sockaddr *) &ss;
2585	if (sockaddr_getport(sa) == 0)
2586		sockaddr_setport(sa, 53);
2587
2588	EVDNS_LOCK(base);
2589	res = _evdns_nameserver_add_impl(base, sa, len);
2590	EVDNS_UNLOCK(base);
2591	return res;
2592}
2593
2594int
2595evdns_nameserver_ip_add(const char *ip_as_string) {
2596	if (!current_base)
2597		current_base = evdns_base_new(NULL, 0);
2598	return evdns_base_nameserver_ip_add(current_base, ip_as_string);
2599}
2600
2601int
2602evdns_base_nameserver_sockaddr_add(struct evdns_base *base,
2603    const struct sockaddr *sa, ev_socklen_t len, unsigned flags)
2604{
2605	int res;
2606	EVUTIL_ASSERT(base);
2607	EVDNS_LOCK(base);
2608	res = _evdns_nameserver_add_impl(base, sa, len);
2609	EVDNS_UNLOCK(base);
2610	return res;
2611}
2612
2613/* remove from the queue */
2614static void
2615evdns_request_remove(struct request *req, struct request **head)
2616{
2617	ASSERT_LOCKED(req->base);
2618	ASSERT_VALID_REQUEST(req);
2619
2620#if 0
2621	{
2622		struct request *ptr;
2623		int found = 0;
2624		EVUTIL_ASSERT(*head != NULL);
2625
2626		ptr = *head;
2627		do {
2628			if (ptr == req) {
2629				found = 1;
2630				break;
2631			}
2632			ptr = ptr->next;
2633		} while (ptr != *head);
2634		EVUTIL_ASSERT(found);
2635
2636		EVUTIL_ASSERT(req->next);
2637	}
2638#endif
2639
2640	if (req->next == req) {
2641		/* only item in the list */
2642		*head = NULL;
2643	} else {
2644		req->next->prev = req->prev;
2645		req->prev->next = req->next;
2646		if (*head == req) *head = req->next;
2647	}
2648	req->next = req->prev = NULL;
2649}
2650
2651/* insert into the tail of the queue */
2652static void
2653evdns_request_insert(struct request *req, struct request **head) {
2654	ASSERT_LOCKED(req->base);
2655	ASSERT_VALID_REQUEST(req);
2656	if (!*head) {
2657		*head = req;
2658		req->next = req->prev = req;
2659		return;
2660	}
2661
2662	req->prev = (*head)->prev;
2663	req->prev->next = req;
2664	req->next = *head;
2665	(*head)->prev = req;
2666}
2667
2668static int
2669string_num_dots(const char *s) {
2670	int count = 0;
2671	while ((s = strchr(s, '.'))) {
2672		s++;
2673		count++;
2674	}
2675	return count;
2676}
2677
2678static struct request *
2679request_new(struct evdns_base *base, struct evdns_request *handle, int type,
2680	    const char *name, int flags, evdns_callback_type callback,
2681	    void *user_ptr) {
2682
2683	const char issuing_now =
2684	    (base->global_requests_inflight < base->global_max_requests_inflight) ? 1 : 0;
2685
2686	const size_t name_len = strlen(name);
2687	const size_t request_max_len = evdns_request_len(name_len);
2688	const u16 trans_id = issuing_now ? transaction_id_pick(base) : 0xffff;
2689	/* the request data is alloced in a single block with the header */
2690	struct request *const req =
2691	    mm_malloc(sizeof(struct request) + request_max_len);
2692	int rlen;
2693	char namebuf[256];
2694	(void) flags;
2695
2696	ASSERT_LOCKED(base);
2697
2698	if (!req) return NULL;
2699
2700	if (name_len >= sizeof(namebuf)) {
2701		mm_free(req);
2702		return NULL;
2703	}
2704
2705	memset(req, 0, sizeof(struct request));
2706	req->base = base;
2707
2708	evtimer_assign(&req->timeout_event, req->base->event_base, evdns_request_timeout_callback, req);
2709
2710	if (base->global_randomize_case) {
2711		unsigned i;
2712		char randbits[(sizeof(namebuf)+7)/8];
2713		strlcpy(namebuf, name, sizeof(namebuf));
2714		evutil_secure_rng_get_bytes(randbits, (name_len+7)/8);
2715		for (i = 0; i < name_len; ++i) {
2716			if (EVUTIL_ISALPHA(namebuf[i])) {
2717				if ((randbits[i >> 3] & (1<<(i & 7))))
2718					namebuf[i] |= 0x20;
2719				else
2720					namebuf[i] &= ~0x20;
2721			}
2722		}
2723		name = namebuf;
2724	}
2725
2726	/* request data lives just after the header */
2727	req->request = ((u8 *) req) + sizeof(struct request);
2728	/* denotes that the request data shouldn't be free()ed */
2729	req->request_appended = 1;
2730	rlen = evdns_request_data_build(name, name_len, trans_id,
2731	    type, CLASS_INET, req->request, request_max_len);
2732	if (rlen < 0)
2733		goto err1;
2734
2735	req->request_len = rlen;
2736	req->trans_id = trans_id;
2737	req->tx_count = 0;
2738	req->request_type = type;
2739	req->user_pointer = user_ptr;
2740	req->user_callback = callback;
2741	req->ns = issuing_now ? nameserver_pick(base) : NULL;
2742	req->next = req->prev = NULL;
2743	req->handle = handle;
2744	if (handle) {
2745		handle->current_req = req;
2746		handle->base = base;
2747	}
2748
2749	return req;
2750err1:
2751	mm_free(req);
2752	return NULL;
2753}
2754
2755static void
2756request_submit(struct request *const req) {
2757	struct evdns_base *base = req->base;
2758	ASSERT_LOCKED(base);
2759	ASSERT_VALID_REQUEST(req);
2760	if (req->ns) {
2761		/* if it has a nameserver assigned then this is going */
2762		/* straight into the inflight queue */
2763		evdns_request_insert(req, &REQ_HEAD(base, req->trans_id));
2764		base->global_requests_inflight++;
2765		evdns_request_transmit(req);
2766	} else {
2767		evdns_request_insert(req, &base->req_waiting_head);
2768		base->global_requests_waiting++;
2769	}
2770}
2771
2772/* exported function */
2773void
2774evdns_cancel_request(struct evdns_base *base, struct evdns_request *handle)
2775{
2776	struct request *req;
2777
2778	if (!handle->current_req)
2779		return;
2780
2781	if (!base) {
2782		/* This redundancy is silly; can we fix it? (Not for 2.0) XXXX */
2783		base = handle->base;
2784		if (!base)
2785			base = handle->current_req->base;
2786	}
2787
2788	EVDNS_LOCK(base);
2789	if (handle->pending_cb) {
2790		EVDNS_UNLOCK(base);
2791		return;
2792	}
2793
2794	req = handle->current_req;
2795	ASSERT_VALID_REQUEST(req);
2796
2797	reply_schedule_callback(req, 0, DNS_ERR_CANCEL, NULL);
2798	if (req->ns) {
2799		/* remove from inflight queue */
2800		request_finished(req, &REQ_HEAD(base, req->trans_id), 1);
2801	} else {
2802		/* remove from global_waiting head */
2803		request_finished(req, &base->req_waiting_head, 1);
2804	}
2805	EVDNS_UNLOCK(base);
2806}
2807
2808/* exported function */
2809struct evdns_request *
2810evdns_base_resolve_ipv4(struct evdns_base *base, const char *name, int flags,
2811    evdns_callback_type callback, void *ptr) {
2812	struct evdns_request *handle;
2813	struct request *req;
2814	log(EVDNS_LOG_DEBUG, "Resolve requested for %s", name);
2815	handle = mm_calloc(1, sizeof(*handle));
2816	if (handle == NULL)
2817		return NULL;
2818	EVDNS_LOCK(base);
2819	if (flags & DNS_QUERY_NO_SEARCH) {
2820		req =
2821			request_new(base, handle, TYPE_A, name, flags,
2822				    callback, ptr);
2823		if (req)
2824			request_submit(req);
2825	} else {
2826		search_request_new(base, handle, TYPE_A, name, flags,
2827		    callback, ptr);
2828	}
2829	if (handle->current_req == NULL) {
2830		mm_free(handle);
2831		handle = NULL;
2832	}
2833	EVDNS_UNLOCK(base);
2834	return handle;
2835}
2836
2837int evdns_resolve_ipv4(const char *name, int flags,
2838					   evdns_callback_type callback, void *ptr)
2839{
2840	return evdns_base_resolve_ipv4(current_base, name, flags, callback, ptr)
2841		? 0 : -1;
2842}
2843
2844
2845/* exported function */
2846struct evdns_request *
2847evdns_base_resolve_ipv6(struct evdns_base *base,
2848    const char *name, int flags,
2849    evdns_callback_type callback, void *ptr)
2850{
2851	struct evdns_request *handle;
2852	struct request *req;
2853	log(EVDNS_LOG_DEBUG, "Resolve requested for %s", name);
2854	handle = mm_calloc(1, sizeof(*handle));
2855	if (handle == NULL)
2856		return NULL;
2857	EVDNS_LOCK(base);
2858	if (flags & DNS_QUERY_NO_SEARCH) {
2859		req = request_new(base, handle, TYPE_AAAA, name, flags,
2860				  callback, ptr);
2861		if (req)
2862			request_submit(req);
2863	} else {
2864		search_request_new(base, handle, TYPE_AAAA, name, flags,
2865		    callback, ptr);
2866	}
2867	if (handle->current_req == NULL) {
2868		mm_free(handle);
2869		handle = NULL;
2870	}
2871	EVDNS_UNLOCK(base);
2872	return handle;
2873}
2874
2875int evdns_resolve_ipv6(const char *name, int flags,
2876    evdns_callback_type callback, void *ptr) {
2877	return evdns_base_resolve_ipv6(current_base, name, flags, callback, ptr)
2878		? 0 : -1;
2879}
2880
2881struct evdns_request *
2882evdns_base_resolve_reverse(struct evdns_base *base, const struct in_addr *in, int flags, evdns_callback_type callback, void *ptr) {
2883	char buf[32];
2884	struct evdns_request *handle;
2885	struct request *req;
2886	u32 a;
2887	EVUTIL_ASSERT(in);
2888	a = ntohl(in->s_addr);
2889	evutil_snprintf(buf, sizeof(buf), "%d.%d.%d.%d.in-addr.arpa",
2890			(int)(u8)((a	)&0xff),
2891			(int)(u8)((a>>8 )&0xff),
2892			(int)(u8)((a>>16)&0xff),
2893			(int)(u8)((a>>24)&0xff));
2894	handle = mm_calloc(1, sizeof(*handle));
2895	if (handle == NULL)
2896		return NULL;
2897	log(EVDNS_LOG_DEBUG, "Resolve requested for %s (reverse)", buf);
2898	EVDNS_LOCK(base);
2899	req = request_new(base, handle, TYPE_PTR, buf, flags, callback, ptr);
2900	if (req)
2901		request_submit(req);
2902	if (handle->current_req == NULL) {
2903		mm_free(handle);
2904		handle = NULL;
2905	}
2906	EVDNS_UNLOCK(base);
2907	return (handle);
2908}
2909
2910int evdns_resolve_reverse(const struct in_addr *in, int flags, evdns_callback_type callback, void *ptr) {
2911	return evdns_base_resolve_reverse(current_base, in, flags, callback, ptr)
2912		? 0 : -1;
2913}
2914
2915struct evdns_request *
2916evdns_base_resolve_reverse_ipv6(struct evdns_base *base, const struct in6_addr *in, int flags, evdns_callback_type callback, void *ptr) {
2917	/* 32 nybbles, 32 periods, "ip6.arpa", NUL. */
2918	char buf[73];
2919	char *cp;
2920	struct evdns_request *handle;
2921	struct request *req;
2922	int i;
2923	EVUTIL_ASSERT(in);
2924	cp = buf;
2925	for (i=15; i >= 0; --i) {
2926		u8 byte = in->s6_addr[i];
2927		*cp++ = "0123456789abcdef"[byte & 0x0f];
2928		*cp++ = '.';
2929		*cp++ = "0123456789abcdef"[byte >> 4];
2930		*cp++ = '.';
2931	}
2932	EVUTIL_ASSERT(cp + strlen("ip6.arpa") < buf+sizeof(buf));
2933	memcpy(cp, "ip6.arpa", strlen("ip6.arpa")+1);
2934	handle = mm_calloc(1, sizeof(*handle));
2935	if (handle == NULL)
2936		return NULL;
2937	log(EVDNS_LOG_DEBUG, "Resolve requested for %s (reverse)", buf);
2938	EVDNS_LOCK(base);
2939	req = request_new(base, handle, TYPE_PTR, buf, flags, callback, ptr);
2940	if (req)
2941		request_submit(req);
2942	if (handle->current_req == NULL) {
2943		mm_free(handle);
2944		handle = NULL;
2945	}
2946	EVDNS_UNLOCK(base);
2947	return (handle);
2948}
2949
2950int evdns_resolve_reverse_ipv6(const struct in6_addr *in, int flags, evdns_callback_type callback, void *ptr) {
2951	return evdns_base_resolve_reverse_ipv6(current_base, in, flags, callback, ptr)
2952		? 0 : -1;
2953}
2954
2955/* ================================================================= */
2956/* Search support */
2957/* */
2958/* the libc resolver has support for searching a number of domains */
2959/* to find a name. If nothing else then it takes the single domain */
2960/* from the gethostname() call. */
2961/* */
2962/* It can also be configured via the domain and search options in a */
2963/* resolv.conf. */
2964/* */
2965/* The ndots option controls how many dots it takes for the resolver */
2966/* to decide that a name is non-local and so try a raw lookup first. */
2967
2968struct search_domain {
2969	int len;
2970	struct search_domain *next;
2971	/* the text string is appended to this structure */
2972};
2973
2974struct search_state {
2975	int refcount;
2976	int ndots;
2977	int num_domains;
2978	struct search_domain *head;
2979};
2980
2981static void
2982search_state_decref(struct search_state *const state) {
2983	if (!state) return;
2984	state->refcount--;
2985	if (!state->refcount) {
2986		struct search_domain *next, *dom;
2987		for (dom = state->head; dom; dom = next) {
2988			next = dom->next;
2989			mm_free(dom);
2990		}
2991		mm_free(state);
2992	}
2993}
2994
2995static struct search_state *
2996search_state_new(void) {
2997	struct search_state *state = (struct search_state *) mm_malloc(sizeof(struct search_state));
2998	if (!state) return NULL;
2999	memset(state, 0, sizeof(struct search_state));
3000	state->refcount = 1;
3001	state->ndots = 1;
3002
3003	return state;
3004}
3005
3006static void
3007search_postfix_clear(struct evdns_base *base) {
3008	search_state_decref(base->global_search_state);
3009
3010	base->global_search_state = search_state_new();
3011}
3012
3013/* exported function */
3014void
3015evdns_base_search_clear(struct evdns_base *base)
3016{
3017	EVDNS_LOCK(base);
3018	search_postfix_clear(base);
3019	EVDNS_UNLOCK(base);
3020}
3021
3022void
3023evdns_search_clear(void) {
3024	evdns_base_search_clear(current_base);
3025}
3026
3027static void
3028search_postfix_add(struct evdns_base *base, const char *domain) {
3029	size_t domain_len;
3030	struct search_domain *sdomain;
3031	while (domain[0] == '.') domain++;
3032	domain_len = strlen(domain);
3033
3034	ASSERT_LOCKED(base);
3035	if (!base->global_search_state) base->global_search_state = search_state_new();
3036	if (!base->global_search_state) return;
3037	base->global_search_state->num_domains++;
3038
3039	sdomain = (struct search_domain *) mm_malloc(sizeof(struct search_domain) + domain_len);
3040	if (!sdomain) return;
3041	memcpy( ((u8 *) sdomain) + sizeof(struct search_domain), domain, domain_len);
3042	sdomain->next = base->global_search_state->head;
3043	sdomain->len = (int) domain_len;
3044
3045	base->global_search_state->head = sdomain;
3046}
3047
3048/* reverse the order of members in the postfix list. This is needed because, */
3049/* when parsing resolv.conf we push elements in the wrong order */
3050static void
3051search_reverse(struct evdns_base *base) {
3052	struct search_domain *cur, *prev = NULL, *next;
3053	ASSERT_LOCKED(base);
3054	cur = base->global_search_state->head;
3055	while (cur) {
3056		next = cur->next;
3057		cur->next = prev;
3058		prev = cur;
3059		cur = next;
3060	}
3061
3062	base->global_search_state->head = prev;
3063}
3064
3065/* exported function */
3066void
3067evdns_base_search_add(struct evdns_base *base, const char *domain) {
3068	EVDNS_LOCK(base);
3069	search_postfix_add(base, domain);
3070	EVDNS_UNLOCK(base);
3071}
3072void
3073evdns_search_add(const char *domain) {
3074	evdns_base_search_add(current_base, domain);
3075}
3076
3077/* exported function */
3078void
3079evdns_base_search_ndots_set(struct evdns_base *base, const int ndots) {
3080	EVDNS_LOCK(base);
3081	if (!base->global_search_state) base->global_search_state = search_state_new();
3082	if (base->global_search_state)
3083		base->global_search_state->ndots = ndots;
3084	EVDNS_UNLOCK(base);
3085}
3086void
3087evdns_search_ndots_set(const int ndots) {
3088	evdns_base_search_ndots_set(current_base, ndots);
3089}
3090
3091static void
3092search_set_from_hostname(struct evdns_base *base) {
3093	char hostname[HOST_NAME_MAX + 1], *domainname;
3094
3095	ASSERT_LOCKED(base);
3096	search_postfix_clear(base);
3097	if (gethostname(hostname, sizeof(hostname))) return;
3098	domainname = strchr(hostname, '.');
3099	if (!domainname) return;
3100	search_postfix_add(base, domainname);
3101}
3102
3103/* warning: returns malloced string */
3104static char *
3105search_make_new(const struct search_state *const state, int n, const char *const base_name) {
3106	const size_t base_len = strlen(base_name);
3107	const char need_to_append_dot = base_name[base_len - 1] == '.' ? 0 : 1;
3108	struct search_domain *dom;
3109
3110	for (dom = state->head; dom; dom = dom->next) {
3111		if (!n--) {
3112			/* this is the postfix we want */
3113			/* the actual postfix string is kept at the end of the structure */
3114			const u8 *const postfix = ((u8 *) dom) + sizeof(struct search_domain);
3115			const int postfix_len = dom->len;
3116			char *const newname = (char *) mm_malloc(base_len + need_to_append_dot + postfix_len + 1);
3117			if (!newname) return NULL;
3118			memcpy(newname, base_name, base_len);
3119			if (need_to_append_dot) newname[base_len] = '.';
3120			memcpy(newname + base_len + need_to_append_dot, postfix, postfix_len);
3121			newname[base_len + need_to_append_dot + postfix_len] = 0;
3122			return newname;
3123		}
3124	}
3125
3126	/* we ran off the end of the list and still didn't find the requested string */
3127	EVUTIL_ASSERT(0);
3128	return NULL; /* unreachable; stops warnings in some compilers. */
3129}
3130
3131static struct request *
3132search_request_new(struct evdns_base *base, struct evdns_request *handle,
3133		   int type, const char *const name, int flags,
3134		   evdns_callback_type user_callback, void *user_arg) {
3135	ASSERT_LOCKED(base);
3136	EVUTIL_ASSERT(type == TYPE_A || type == TYPE_AAAA);
3137	EVUTIL_ASSERT(handle->current_req == NULL);
3138	if ( ((flags & DNS_QUERY_NO_SEARCH) == 0) &&
3139	     base->global_search_state &&
3140		 base->global_search_state->num_domains) {
3141		/* we have some domains to search */
3142		struct request *req;
3143		if (string_num_dots(name) >= base->global_search_state->ndots) {
3144			req = request_new(base, handle, type, name, flags, user_callback, user_arg);
3145			if (!req) return NULL;
3146			handle->search_index = -1;
3147		} else {
3148			char *const new_name = search_make_new(base->global_search_state, 0, name);
3149			if (!new_name) return NULL;
3150			req = request_new(base, handle, type, new_name, flags, user_callback, user_arg);
3151			mm_free(new_name);
3152			if (!req) return NULL;
3153			handle->search_index = 0;
3154		}
3155		EVUTIL_ASSERT(handle->search_origname == NULL);
3156		handle->search_origname = mm_strdup(name);
3157		if (handle->search_origname == NULL) {
3158			/* XXX Should we dealloc req? If yes, how? */
3159			return NULL;
3160		}
3161		handle->search_state = base->global_search_state;
3162		handle->search_flags = flags;
3163		base->global_search_state->refcount++;
3164		request_submit(req);
3165		return req;
3166	} else {
3167		struct request *const req = request_new(base, handle, type, name, flags, user_callback, user_arg);
3168		if (!req) return NULL;
3169		request_submit(req);
3170		return req;
3171	}
3172}
3173
3174/* this is called when a request has failed to find a name. We need to check */
3175/* if it is part of a search and, if so, try the next name in the list */
3176/* returns: */
3177/*   0 another request has been submitted */
3178/*   1 no more requests needed */
3179static int
3180search_try_next(struct evdns_request *const handle) {
3181	struct request *req = handle->current_req;
3182	struct evdns_base *base = req->base;
3183	struct request *newreq;
3184	ASSERT_LOCKED(base);
3185	if (handle->search_state) {
3186		/* it is part of a search */
3187		char *new_name;
3188		handle->search_index++;
3189		if (handle->search_index >= handle->search_state->num_domains) {
3190			/* no more postfixes to try, however we may need to try */
3191			/* this name without a postfix */
3192			if (string_num_dots(handle->search_origname) < handle->search_state->ndots) {
3193				/* yep, we need to try it raw */
3194				newreq = request_new(base, NULL, req->request_type, handle->search_origname, handle->search_flags, req->user_callback, req->user_pointer);
3195				log(EVDNS_LOG_DEBUG, "Search: trying raw query %s", handle->search_origname);
3196				if (newreq) {
3197					search_request_finished(handle);
3198					goto submit_next;
3199				}
3200			}
3201			return 1;
3202		}
3203
3204		new_name = search_make_new(handle->search_state, handle->search_index, handle->search_origname);
3205		if (!new_name) return 1;
3206		log(EVDNS_LOG_DEBUG, "Search: now trying %s (%d)", new_name, handle->search_index);
3207		newreq = request_new(base, NULL, req->request_type, new_name, handle->search_flags, req->user_callback, req->user_pointer);
3208		mm_free(new_name);
3209		if (!newreq) return 1;
3210		goto submit_next;
3211	}
3212	return 1;
3213
3214submit_next:
3215	request_finished(req, &REQ_HEAD(req->base, req->trans_id), 0);
3216	handle->current_req = newreq;
3217	newreq->handle = handle;
3218	request_submit(newreq);
3219	return 0;
3220}
3221
3222static void
3223search_request_finished(struct evdns_request *const handle) {
3224	ASSERT_LOCKED(handle->current_req->base);
3225	if (handle->search_state) {
3226		search_state_decref(handle->search_state);
3227		handle->search_state = NULL;
3228	}
3229	if (handle->search_origname) {
3230		mm_free(handle->search_origname);
3231		handle->search_origname = NULL;
3232	}
3233}
3234
3235/* ================================================================= */
3236/* Parsing resolv.conf files */
3237
3238static void
3239evdns_resolv_set_defaults(struct evdns_base *base, int flags) {
3240	/* if the file isn't found then we assume a local resolver */
3241	ASSERT_LOCKED(base);
3242	if (flags & DNS_OPTION_SEARCH) search_set_from_hostname(base);
3243	if (flags & DNS_OPTION_NAMESERVERS) evdns_base_nameserver_ip_add(base,"127.0.0.1");
3244}
3245
3246#ifndef _EVENT_HAVE_STRTOK_R
3247static char *
3248strtok_r(char *s, const char *delim, char **state) {
3249	char *cp, *start;
3250	start = cp = s ? s : *state;
3251	if (!cp)
3252		return NULL;
3253	while (*cp && !strchr(delim, *cp))
3254		++cp;
3255	if (!*cp) {
3256		if (cp == start)
3257			return NULL;
3258		*state = NULL;
3259		return start;
3260	} else {
3261		*cp++ = '\0';
3262		*state = cp;
3263		return start;
3264	}
3265}
3266#endif
3267
3268/* helper version of atoi which returns -1 on error */
3269static int
3270strtoint(const char *const str)
3271{
3272	char *endptr;
3273	const int r = strtol(str, &endptr, 10);
3274	if (*endptr) return -1;
3275	return r;
3276}
3277
3278/* Parse a number of seconds into a timeval; return -1 on error. */
3279static int
3280strtotimeval(const char *const str, struct timeval *out)
3281{
3282	double d;
3283	char *endptr;
3284	d = strtod(str, &endptr);
3285	if (*endptr) return -1;
3286	if (d < 0) return -1;
3287	out->tv_sec = (int) d;
3288	out->tv_usec = (int) ((d - (int) d)*1000000);
3289	if (out->tv_sec == 0 && out->tv_usec < 1000) /* less than 1 msec */
3290		return -1;
3291	return 0;
3292}
3293
3294/* helper version of atoi that returns -1 on error and clips to bounds. */
3295static int
3296strtoint_clipped(const char *const str, int min, int max)
3297{
3298	int r = strtoint(str);
3299	if (r == -1)
3300		return r;
3301	else if (r<min)
3302		return min;
3303	else if (r>max)
3304		return max;
3305	else
3306		return r;
3307}
3308
3309static int
3310evdns_base_set_max_requests_inflight(struct evdns_base *base, int maxinflight)
3311{
3312	int old_n_heads = base->n_req_heads, n_heads;
3313	struct request **old_heads = base->req_heads, **new_heads, *req;
3314	int i;
3315
3316	ASSERT_LOCKED(base);
3317	if (maxinflight < 1)
3318		maxinflight = 1;
3319	n_heads = (maxinflight+4) / 5;
3320	EVUTIL_ASSERT(n_heads > 0);
3321	new_heads = mm_calloc(n_heads, sizeof(struct request*));
3322	if (!new_heads)
3323		return (-1);
3324	if (old_heads) {
3325		for (i = 0; i < old_n_heads; ++i) {
3326			while (old_heads[i]) {
3327				req = old_heads[i];
3328				evdns_request_remove(req, &old_heads[i]);
3329				evdns_request_insert(req, &new_heads[req->trans_id % n_heads]);
3330			}
3331		}
3332		mm_free(old_heads);
3333	}
3334	base->req_heads = new_heads;
3335	base->n_req_heads = n_heads;
3336	base->global_max_requests_inflight = maxinflight;
3337	return (0);
3338}
3339
3340/* exported function */
3341int
3342evdns_base_set_option(struct evdns_base *base,
3343    const char *option, const char *val)
3344{
3345	int res;
3346	EVDNS_LOCK(base);
3347	res = evdns_base_set_option_impl(base, option, val, DNS_OPTIONS_ALL);
3348	EVDNS_UNLOCK(base);
3349	return res;
3350}
3351
3352static inline int
3353str_matches_option(const char *s1, const char *optionname)
3354{
3355	/* Option names are given as "option:" We accept either 'option' in
3356	 * s1, or 'option:randomjunk'.  The latter form is to implement the
3357	 * resolv.conf parser. */
3358	size_t optlen = strlen(optionname);
3359	size_t slen = strlen(s1);
3360	if (slen == optlen || slen == optlen - 1)
3361		return !strncmp(s1, optionname, slen);
3362	else if (slen > optlen)
3363		return !strncmp(s1, optionname, optlen);
3364	else
3365		return 0;
3366}
3367
3368static int
3369evdns_base_set_option_impl(struct evdns_base *base,
3370    const char *option, const char *val, int flags)
3371{
3372	ASSERT_LOCKED(base);
3373	if (str_matches_option(option, "ndots:")) {
3374		const int ndots = strtoint(val);
3375		if (ndots == -1) return -1;
3376		if (!(flags & DNS_OPTION_SEARCH)) return 0;
3377		log(EVDNS_LOG_DEBUG, "Setting ndots to %d", ndots);
3378		if (!base->global_search_state) base->global_search_state = search_state_new();
3379		if (!base->global_search_state) return -1;
3380		base->global_search_state->ndots = ndots;
3381	} else if (str_matches_option(option, "timeout:")) {
3382		struct timeval tv;
3383		if (strtotimeval(val, &tv) == -1) return -1;
3384		if (!(flags & DNS_OPTION_MISC)) return 0;
3385		log(EVDNS_LOG_DEBUG, "Setting timeout to %s", val);
3386		memcpy(&base->global_timeout, &tv, sizeof(struct timeval));
3387	} else if (str_matches_option(option, "getaddrinfo-allow-skew:")) {
3388		struct timeval tv;
3389		if (strtotimeval(val, &tv) == -1) return -1;
3390		if (!(flags & DNS_OPTION_MISC)) return 0;
3391		log(EVDNS_LOG_DEBUG, "Setting getaddrinfo-allow-skew to %s",
3392		    val);
3393		memcpy(&base->global_getaddrinfo_allow_skew, &tv,
3394		    sizeof(struct timeval));
3395	} else if (str_matches_option(option, "max-timeouts:")) {
3396		const int maxtimeout = strtoint_clipped(val, 1, 255);
3397		if (maxtimeout == -1) return -1;
3398		if (!(flags & DNS_OPTION_MISC)) return 0;
3399		log(EVDNS_LOG_DEBUG, "Setting maximum allowed timeouts to %d",
3400			maxtimeout);
3401		base->global_max_nameserver_timeout = maxtimeout;
3402	} else if (str_matches_option(option, "max-inflight:")) {
3403		const int maxinflight = strtoint_clipped(val, 1, 65000);
3404		if (maxinflight == -1) return -1;
3405		if (!(flags & DNS_OPTION_MISC)) return 0;
3406		log(EVDNS_LOG_DEBUG, "Setting maximum inflight requests to %d",
3407			maxinflight);
3408		evdns_base_set_max_requests_inflight(base, maxinflight);
3409	} else if (str_matches_option(option, "attempts:")) {
3410		int retries = strtoint(val);
3411		if (retries == -1) return -1;
3412		if (retries > 255) retries = 255;
3413		if (!(flags & DNS_OPTION_MISC)) return 0;
3414		log(EVDNS_LOG_DEBUG, "Setting retries to %d", retries);
3415		base->global_max_retransmits = retries;
3416	} else if (str_matches_option(option, "randomize-case:")) {
3417		int randcase = strtoint(val);
3418		if (!(flags & DNS_OPTION_MISC)) return 0;
3419		base->global_randomize_case = randcase;
3420	} else if (str_matches_option(option, "bind-to:")) {
3421		/* XXX This only applies to successive nameservers, not
3422		 * to already-configured ones.	We might want to fix that. */
3423		int len = sizeof(base->global_outgoing_address);
3424		if (!(flags & DNS_OPTION_NAMESERVERS)) return 0;
3425		if (evutil_parse_sockaddr_port(val,
3426			(struct sockaddr*)&base->global_outgoing_address, &len))
3427			return -1;
3428		base->global_outgoing_addrlen = len;
3429	} else if (str_matches_option(option, "initial-probe-timeout:")) {
3430		struct timeval tv;
3431		if (strtotimeval(val, &tv) == -1) return -1;
3432		if (tv.tv_sec > 3600)
3433			tv.tv_sec = 3600;
3434		if (!(flags & DNS_OPTION_MISC)) return 0;
3435		log(EVDNS_LOG_DEBUG, "Setting initial probe timeout to %s",
3436		    val);
3437		memcpy(&base->global_nameserver_probe_initial_timeout, &tv,
3438		    sizeof(tv));
3439	}
3440	return 0;
3441}
3442
3443int
3444evdns_set_option(const char *option, const char *val, int flags)
3445{
3446	if (!current_base)
3447		current_base = evdns_base_new(NULL, 0);
3448	return evdns_base_set_option(current_base, option, val);
3449}
3450
3451static void
3452resolv_conf_parse_line(struct evdns_base *base, char *const start, int flags) {
3453	char *strtok_state;
3454	static const char *const delims = " \t";
3455#define NEXT_TOKEN strtok_r(NULL, delims, &strtok_state)
3456
3457
3458	char *const first_token = strtok_r(start, delims, &strtok_state);
3459	ASSERT_LOCKED(base);
3460	if (!first_token) return;
3461
3462	if (!strcmp(first_token, "nameserver") && (flags & DNS_OPTION_NAMESERVERS)) {
3463		const char *const nameserver = NEXT_TOKEN;
3464
3465		if (nameserver)
3466			evdns_base_nameserver_ip_add(base, nameserver);
3467	} else if (!strcmp(first_token, "domain") && (flags & DNS_OPTION_SEARCH)) {
3468		const char *const domain = NEXT_TOKEN;
3469		if (domain) {
3470			search_postfix_clear(base);
3471			search_postfix_add(base, domain);
3472		}
3473	} else if (!strcmp(first_token, "search") && (flags & DNS_OPTION_SEARCH)) {
3474		const char *domain;
3475		search_postfix_clear(base);
3476
3477		while ((domain = NEXT_TOKEN)) {
3478			search_postfix_add(base, domain);
3479		}
3480		search_reverse(base);
3481	} else if (!strcmp(first_token, "options")) {
3482		const char *option;
3483		while ((option = NEXT_TOKEN)) {
3484			const char *val = strchr(option, ':');
3485			evdns_base_set_option_impl(base, option, val ? val+1 : "", flags);
3486		}
3487	}
3488#undef NEXT_TOKEN
3489}
3490
3491/* exported function */
3492/* returns: */
3493/*   0 no errors */
3494/*   1 failed to open file */
3495/*   2 failed to stat file */
3496/*   3 file too large */
3497/*   4 out of memory */
3498/*   5 short read from file */
3499int
3500evdns_base_resolv_conf_parse(struct evdns_base *base, int flags, const char *const filename) {
3501	int res;
3502	EVDNS_LOCK(base);
3503	res = evdns_base_resolv_conf_parse_impl(base, flags, filename);
3504	EVDNS_UNLOCK(base);
3505	return res;
3506}
3507
3508static char *
3509evdns_get_default_hosts_filename(void)
3510{
3511#ifdef WIN32
3512	/* Windows is a little coy about where it puts its configuration
3513	 * files.  Sure, they're _usually_ in C:\windows\system32, but
3514	 * there's no reason in principle they couldn't be in
3515	 * W:\hoboken chicken emergency\
3516	 */
3517	char path[MAX_PATH+1];
3518	static const char hostfile[] = "\\drivers\\etc\\hosts";
3519	char *path_out;
3520	size_t len_out;
3521
3522	if (! SHGetSpecialFolderPathA(NULL, path, CSIDL_SYSTEM, 0))
3523		return NULL;
3524	len_out = strlen(path)+strlen(hostfile);
3525	path_out = mm_malloc(len_out+1);
3526	evutil_snprintf(path_out, len_out, "%s%s", path, hostfile);
3527	return path_out;
3528#else
3529	return mm_strdup("/etc/hosts");
3530#endif
3531}
3532
3533static int
3534evdns_base_resolv_conf_parse_impl(struct evdns_base *base, int flags, const char *const filename) {
3535	size_t n;
3536	char *resolv;
3537	char *start;
3538	int err = 0;
3539
3540	log(EVDNS_LOG_DEBUG, "Parsing resolv.conf file %s", filename);
3541
3542	if (flags & DNS_OPTION_HOSTSFILE) {
3543		char *fname = evdns_get_default_hosts_filename();
3544		evdns_base_load_hosts(base, fname);
3545		if (fname)
3546			mm_free(fname);
3547	}
3548
3549	if ((err = evutil_read_file(filename, &resolv, &n, 0)) < 0) {
3550		if (err == -1) {
3551			/* No file. */
3552			evdns_resolv_set_defaults(base, flags);
3553			return 1;
3554		} else {
3555			return 2;
3556		}
3557	}
3558
3559	start = resolv;
3560	for (;;) {
3561		char *const newline = strchr(start, '\n');
3562		if (!newline) {
3563			resolv_conf_parse_line(base, start, flags);
3564			break;
3565		} else {
3566			*newline = 0;
3567			resolv_conf_parse_line(base, start, flags);
3568			start = newline + 1;
3569		}
3570	}
3571
3572	if (!base->server_head && (flags & DNS_OPTION_NAMESERVERS)) {
3573		/* no nameservers were configured. */
3574		evdns_base_nameserver_ip_add(base, "127.0.0.1");
3575		err = 6;
3576	}
3577	if (flags & DNS_OPTION_SEARCH && (!base->global_search_state || base->global_search_state->num_domains == 0)) {
3578		search_set_from_hostname(base);
3579	}
3580
3581	mm_free(resolv);
3582	return err;
3583}
3584
3585int
3586evdns_resolv_conf_parse(int flags, const char *const filename) {
3587	if (!current_base)
3588		current_base = evdns_base_new(NULL, 0);
3589	return evdns_base_resolv_conf_parse(current_base, flags, filename);
3590}
3591
3592
3593#ifdef WIN32
3594/* Add multiple nameservers from a space-or-comma-separated list. */
3595static int
3596evdns_nameserver_ip_add_line(struct evdns_base *base, const char *ips) {
3597	const char *addr;
3598	char *buf;
3599	int r;
3600	ASSERT_LOCKED(base);
3601	while (*ips) {
3602		while (isspace(*ips) || *ips == ',' || *ips == '\t')
3603			++ips;
3604		addr = ips;
3605		while (isdigit(*ips) || *ips == '.' || *ips == ':' ||
3606		    *ips=='[' || *ips==']')
3607			++ips;
3608		buf = mm_malloc(ips-addr+1);
3609		if (!buf) return 4;
3610		memcpy(buf, addr, ips-addr);
3611		buf[ips-addr] = '\0';
3612		r = evdns_base_nameserver_ip_add(base, buf);
3613		mm_free(buf);
3614		if (r) return r;
3615	}
3616	return 0;
3617}
3618
3619typedef DWORD(WINAPI *GetNetworkParams_fn_t)(FIXED_INFO *, DWORD*);
3620
3621/* Use the windows GetNetworkParams interface in iphlpapi.dll to */
3622/* figure out what our nameservers are. */
3623static int
3624load_nameservers_with_getnetworkparams(struct evdns_base *base)
3625{
3626	/* Based on MSDN examples and inspection of  c-ares code. */
3627	FIXED_INFO *fixed;
3628	HMODULE handle = 0;
3629	ULONG size = sizeof(FIXED_INFO);
3630	void *buf = NULL;
3631	int status = 0, r, added_any;
3632	IP_ADDR_STRING *ns;
3633	GetNetworkParams_fn_t fn;
3634
3635	ASSERT_LOCKED(base);
3636	if (!(handle = evutil_load_windows_system_library(
3637			TEXT("iphlpapi.dll")))) {
3638		log(EVDNS_LOG_WARN, "Could not open iphlpapi.dll");
3639		status = -1;
3640		goto done;
3641	}
3642	if (!(fn = (GetNetworkParams_fn_t) GetProcAddress(handle, "GetNetworkParams"))) {
3643		log(EVDNS_LOG_WARN, "Could not get address of function.");
3644		status = -1;
3645		goto done;
3646	}
3647
3648	buf = mm_malloc(size);
3649	if (!buf) { status = 4; goto done; }
3650	fixed = buf;
3651	r = fn(fixed, &size);
3652	if (r != ERROR_SUCCESS && r != ERROR_BUFFER_OVERFLOW) {
3653		status = -1;
3654		goto done;
3655	}
3656	if (r != ERROR_SUCCESS) {
3657		mm_free(buf);
3658		buf = mm_malloc(size);
3659		if (!buf) { status = 4; goto done; }
3660		fixed = buf;
3661		r = fn(fixed, &size);
3662		if (r != ERROR_SUCCESS) {
3663			log(EVDNS_LOG_DEBUG, "fn() failed.");
3664			status = -1;
3665			goto done;
3666		}
3667	}
3668
3669	EVUTIL_ASSERT(fixed);
3670	added_any = 0;
3671	ns = &(fixed->DnsServerList);
3672	while (ns) {
3673		r = evdns_nameserver_ip_add_line(base, ns->IpAddress.String);
3674		if (r) {
3675			log(EVDNS_LOG_DEBUG,"Could not add nameserver %s to list,error: %d",
3676				(ns->IpAddress.String),(int)GetLastError());
3677			status = r;
3678		} else {
3679			++added_any;
3680			log(EVDNS_LOG_DEBUG,"Successfully added %s as nameserver",ns->IpAddress.String);
3681		}
3682
3683		ns = ns->Next;
3684	}
3685
3686	if (!added_any) {
3687		log(EVDNS_LOG_DEBUG, "No nameservers added.");
3688		if (status == 0)
3689			status = -1;
3690	} else {
3691		status = 0;
3692	}
3693
3694 done:
3695	if (buf)
3696		mm_free(buf);
3697	if (handle)
3698		FreeLibrary(handle);
3699	return status;
3700}
3701
3702static int
3703config_nameserver_from_reg_key(struct evdns_base *base, HKEY key, const TCHAR *subkey)
3704{
3705	char *buf;
3706	DWORD bufsz = 0, type = 0;
3707	int status = 0;
3708
3709	ASSERT_LOCKED(base);
3710	if (RegQueryValueEx(key, subkey, 0, &type, NULL, &bufsz)
3711	    != ERROR_MORE_DATA)
3712		return -1;
3713	if (!(buf = mm_malloc(bufsz)))
3714		return -1;
3715
3716	if (RegQueryValueEx(key, subkey, 0, &type, (LPBYTE)buf, &bufsz)
3717	    == ERROR_SUCCESS && bufsz > 1) {
3718		status = evdns_nameserver_ip_add_line(base,buf);
3719	}
3720
3721	mm_free(buf);
3722	return status;
3723}
3724
3725#define SERVICES_KEY TEXT("System\\CurrentControlSet\\Services\\")
3726#define WIN_NS_9X_KEY  SERVICES_KEY TEXT("VxD\\MSTCP")
3727#define WIN_NS_NT_KEY  SERVICES_KEY TEXT("Tcpip\\Parameters")
3728
3729static int
3730load_nameservers_from_registry(struct evdns_base *base)
3731{
3732	int found = 0;
3733	int r;
3734#define TRY(k, name) \
3735	if (!found && config_nameserver_from_reg_key(base,k,TEXT(name)) == 0) { \
3736		log(EVDNS_LOG_DEBUG,"Found nameservers in %s/%s",#k,name); \
3737		found = 1;						\
3738	} else if (!found) {						\
3739		log(EVDNS_LOG_DEBUG,"Didn't find nameservers in %s/%s", \
3740		    #k,#name);						\
3741	}
3742
3743	ASSERT_LOCKED(base);
3744
3745	if (((int)GetVersion()) > 0) { /* NT */
3746		HKEY nt_key = 0, interfaces_key = 0;
3747
3748		if (RegOpenKeyEx(HKEY_LOCAL_MACHINE, WIN_NS_NT_KEY, 0,
3749				 KEY_READ, &nt_key) != ERROR_SUCCESS) {
3750			log(EVDNS_LOG_DEBUG,"Couldn't open nt key, %d",(int)GetLastError());
3751			return -1;
3752		}
3753		r = RegOpenKeyEx(nt_key, TEXT("Interfaces"), 0,
3754			     KEY_QUERY_VALUE|KEY_ENUMERATE_SUB_KEYS,
3755			     &interfaces_key);
3756		if (r != ERROR_SUCCESS) {
3757			log(EVDNS_LOG_DEBUG,"Couldn't open interfaces key, %d",(int)GetLastError());
3758			return -1;
3759		}
3760		TRY(nt_key, "NameServer");
3761		TRY(nt_key, "DhcpNameServer");
3762		TRY(interfaces_key, "NameServer");
3763		TRY(interfaces_key, "DhcpNameServer");
3764		RegCloseKey(interfaces_key);
3765		RegCloseKey(nt_key);
3766	} else {
3767		HKEY win_key = 0;
3768		if (RegOpenKeyEx(HKEY_LOCAL_MACHINE, WIN_NS_9X_KEY, 0,
3769				 KEY_READ, &win_key) != ERROR_SUCCESS) {
3770			log(EVDNS_LOG_DEBUG, "Couldn't open registry key, %d", (int)GetLastError());
3771			return -1;
3772		}
3773		TRY(win_key, "NameServer");
3774		RegCloseKey(win_key);
3775	}
3776
3777	if (found == 0) {
3778		log(EVDNS_LOG_WARN,"Didn't find any nameservers.");
3779	}
3780
3781	return found ? 0 : -1;
3782#undef TRY
3783}
3784
3785int
3786evdns_base_config_windows_nameservers(struct evdns_base *base)
3787{
3788	int r;
3789	char *fname;
3790	if (base == NULL)
3791		base = current_base;
3792	if (base == NULL)
3793		return -1;
3794	EVDNS_LOCK(base);
3795	if (load_nameservers_with_getnetworkparams(base) == 0) {
3796		EVDNS_UNLOCK(base);
3797		return 0;
3798	}
3799	r = load_nameservers_from_registry(base);
3800
3801	fname = evdns_get_default_hosts_filename();
3802	evdns_base_load_hosts(base, fname);
3803	if (fname)
3804		mm_free(fname);
3805
3806	EVDNS_UNLOCK(base);
3807	return r;
3808}
3809
3810int
3811evdns_config_windows_nameservers(void)
3812{
3813	if (!current_base) {
3814		current_base = evdns_base_new(NULL, 1);
3815		return current_base == NULL ? -1 : 0;
3816	} else {
3817		return evdns_base_config_windows_nameservers(current_base);
3818	}
3819}
3820#endif
3821
3822struct evdns_base *
3823evdns_base_new(struct event_base *event_base, int initialize_nameservers)
3824{
3825	struct evdns_base *base;
3826
3827	if (evutil_secure_rng_init() < 0) {
3828		log(EVDNS_LOG_WARN, "Unable to seed random number generator; "
3829		    "DNS can't run.");
3830		return NULL;
3831	}
3832
3833	/* Give the evutil library a hook into its evdns-enabled
3834	 * functionality.  We can't just call evdns_getaddrinfo directly or
3835	 * else libevent-core will depend on libevent-extras. */
3836	evutil_set_evdns_getaddrinfo_fn(evdns_getaddrinfo);
3837
3838	base = mm_malloc(sizeof(struct evdns_base));
3839	if (base == NULL)
3840		return (NULL);
3841	memset(base, 0, sizeof(struct evdns_base));
3842	base->req_waiting_head = NULL;
3843
3844	EVTHREAD_ALLOC_LOCK(base->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
3845	EVDNS_LOCK(base);
3846
3847	/* Set max requests inflight and allocate req_heads. */
3848	base->req_heads = NULL;
3849
3850	evdns_base_set_max_requests_inflight(base, 64);
3851
3852	base->server_head = NULL;
3853	base->event_base = event_base;
3854	base->global_good_nameservers = base->global_requests_inflight =
3855		base->global_requests_waiting = 0;
3856
3857	base->global_timeout.tv_sec = 5;
3858	base->global_timeout.tv_usec = 0;
3859	base->global_max_reissues = 1;
3860	base->global_max_retransmits = 3;
3861	base->global_max_nameserver_timeout = 3;
3862	base->global_search_state = NULL;
3863	base->global_randomize_case = 1;
3864	base->global_getaddrinfo_allow_skew.tv_sec = 3;
3865	base->global_getaddrinfo_allow_skew.tv_usec = 0;
3866	base->global_nameserver_probe_initial_timeout.tv_sec = 10;
3867	base->global_nameserver_probe_initial_timeout.tv_usec = 0;
3868
3869	TAILQ_INIT(&base->hostsdb);
3870
3871	if (initialize_nameservers) {
3872		int r;
3873#ifdef WIN32
3874		r = evdns_base_config_windows_nameservers(base);
3875#else
3876		r = evdns_base_resolv_conf_parse(base, DNS_OPTIONS_ALL, "/etc/resolv.conf");
3877#endif
3878		if (r == -1) {
3879			evdns_base_free_and_unlock(base, 0);
3880			return NULL;
3881		}
3882	}
3883	EVDNS_UNLOCK(base);
3884	return base;
3885}
3886
3887int
3888evdns_init(void)
3889{
3890	struct evdns_base *base = evdns_base_new(NULL, 1);
3891	if (base) {
3892		current_base = base;
3893		return 0;
3894	} else {
3895		return -1;
3896	}
3897}
3898
3899const char *
3900evdns_err_to_string(int err)
3901{
3902    switch (err) {
3903	case DNS_ERR_NONE: return "no error";
3904	case DNS_ERR_FORMAT: return "misformatted query";
3905	case DNS_ERR_SERVERFAILED: return "server failed";
3906	case DNS_ERR_NOTEXIST: return "name does not exist";
3907	case DNS_ERR_NOTIMPL: return "query not implemented";
3908	case DNS_ERR_REFUSED: return "refused";
3909
3910	case DNS_ERR_TRUNCATED: return "reply truncated or ill-formed";
3911	case DNS_ERR_UNKNOWN: return "unknown";
3912	case DNS_ERR_TIMEOUT: return "request timed out";
3913	case DNS_ERR_SHUTDOWN: return "dns subsystem shut down";
3914	case DNS_ERR_CANCEL: return "dns request canceled";
3915	case DNS_ERR_NODATA: return "no records in the reply";
3916	default: return "[Unknown error code]";
3917    }
3918}
3919
3920static void
3921evdns_nameserver_free(struct nameserver *server)
3922{
3923	if (server->socket >= 0)
3924	evutil_closesocket(server->socket);
3925	(void) event_del(&server->event);
3926	event_debug_unassign(&server->event);
3927	if (server->state == 0)
3928		(void) event_del(&server->timeout_event);
3929	event_debug_unassign(&server->timeout_event);
3930	mm_free(server);
3931}
3932
3933static void
3934evdns_base_free_and_unlock(struct evdns_base *base, int fail_requests)
3935{
3936	struct nameserver *server, *server_next;
3937	struct search_domain *dom, *dom_next;
3938	int i;
3939
3940	/* Requires that we hold the lock. */
3941
3942	/* TODO(nickm) we might need to refcount here. */
3943
3944	for (i = 0; i < base->n_req_heads; ++i) {
3945		while (base->req_heads[i]) {
3946			if (fail_requests)
3947				reply_schedule_callback(base->req_heads[i], 0, DNS_ERR_SHUTDOWN, NULL);
3948			request_finished(base->req_heads[i], &REQ_HEAD(base, base->req_heads[i]->trans_id), 1);
3949		}
3950	}
3951	while (base->req_waiting_head) {
3952		if (fail_requests)
3953			reply_schedule_callback(base->req_waiting_head, 0, DNS_ERR_SHUTDOWN, NULL);
3954		request_finished(base->req_waiting_head, &base->req_waiting_head, 1);
3955	}
3956	base->global_requests_inflight = base->global_requests_waiting = 0;
3957
3958	for (server = base->server_head; server; server = server_next) {
3959		server_next = server->next;
3960		evdns_nameserver_free(server);
3961		if (server_next == base->server_head)
3962			break;
3963	}
3964	base->server_head = NULL;
3965	base->global_good_nameservers = 0;
3966
3967	if (base->global_search_state) {
3968		for (dom = base->global_search_state->head; dom; dom = dom_next) {
3969			dom_next = dom->next;
3970			mm_free(dom);
3971		}
3972		mm_free(base->global_search_state);
3973		base->global_search_state = NULL;
3974	}
3975
3976	{
3977		struct hosts_entry *victim;
3978		while ((victim = TAILQ_FIRST(&base->hostsdb))) {
3979			TAILQ_REMOVE(&base->hostsdb, victim, next);
3980			mm_free(victim);
3981		}
3982	}
3983
3984	mm_free(base->req_heads);
3985
3986	EVDNS_UNLOCK(base);
3987	EVTHREAD_FREE_LOCK(base->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
3988
3989	mm_free(base);
3990}
3991
3992void
3993evdns_base_free(struct evdns_base *base, int fail_requests)
3994{
3995	EVDNS_LOCK(base);
3996	evdns_base_free_and_unlock(base, fail_requests);
3997}
3998
3999void
4000evdns_shutdown(int fail_requests)
4001{
4002	if (current_base) {
4003		struct evdns_base *b = current_base;
4004		current_base = NULL;
4005		evdns_base_free(b, fail_requests);
4006	}
4007	evdns_log_fn = NULL;
4008}
4009
4010static int
4011evdns_base_parse_hosts_line(struct evdns_base *base, char *line)
4012{
4013	char *strtok_state;
4014	static const char *const delims = " \t";
4015	char *const addr = strtok_r(line, delims, &strtok_state);
4016	char *hostname, *hash;
4017	struct sockaddr_storage ss;
4018	int socklen = sizeof(ss);
4019	ASSERT_LOCKED(base);
4020
4021#define NEXT_TOKEN strtok_r(NULL, delims, &strtok_state)
4022
4023	if (!addr || *addr == '#')
4024		return 0;
4025
4026	memset(&ss, 0, sizeof(ss));
4027	if (evutil_parse_sockaddr_port(addr, (struct sockaddr*)&ss, &socklen)<0)
4028		return -1;
4029	if (socklen > (int)sizeof(struct sockaddr_in6))
4030		return -1;
4031
4032	if (sockaddr_getport((struct sockaddr*)&ss))
4033		return -1;
4034
4035	while ((hostname = NEXT_TOKEN)) {
4036		struct hosts_entry *he;
4037		size_t namelen;
4038		if ((hash = strchr(hostname, '#'))) {
4039			if (hash == hostname)
4040				return 0;
4041			*hash = '\0';
4042		}
4043
4044		namelen = strlen(hostname);
4045
4046		he = mm_calloc(1, sizeof(struct hosts_entry)+namelen);
4047		if (!he)
4048			return -1;
4049		EVUTIL_ASSERT(socklen <= (int)sizeof(he->addr));
4050		memcpy(&he->addr, &ss, socklen);
4051		memcpy(he->hostname, hostname, namelen+1);
4052		he->addrlen = socklen;
4053
4054		TAILQ_INSERT_TAIL(&base->hostsdb, he, next);
4055
4056		if (hash)
4057			return 0;
4058	}
4059
4060	return 0;
4061#undef NEXT_TOKEN
4062}
4063
4064static int
4065evdns_base_load_hosts_impl(struct evdns_base *base, const char *hosts_fname)
4066{
4067	char *str=NULL, *cp, *eol;
4068	size_t len;
4069	int err=0;
4070
4071	ASSERT_LOCKED(base);
4072
4073	if (hosts_fname == NULL ||
4074	    (err = evutil_read_file(hosts_fname, &str, &len, 0)) < 0) {
4075		char tmp[64];
4076		strlcpy(tmp, "127.0.0.1   localhost", sizeof(tmp));
4077		evdns_base_parse_hosts_line(base, tmp);
4078		strlcpy(tmp, "::1   localhost", sizeof(tmp));
4079		evdns_base_parse_hosts_line(base, tmp);
4080		return err ? -1 : 0;
4081	}
4082
4083	/* This will break early if there is a NUL in the hosts file.
4084	 * Probably not a problem.*/
4085	cp = str;
4086	for (;;) {
4087		eol = strchr(cp, '\n');
4088
4089		if (eol) {
4090			*eol = '\0';
4091			evdns_base_parse_hosts_line(base, cp);
4092			cp = eol+1;
4093		} else {
4094			evdns_base_parse_hosts_line(base, cp);
4095			break;
4096		}
4097	}
4098
4099	mm_free(str);
4100	return 0;
4101}
4102
4103int
4104evdns_base_load_hosts(struct evdns_base *base, const char *hosts_fname)
4105{
4106	int res;
4107	if (!base)
4108		base = current_base;
4109	EVDNS_LOCK(base);
4110	res = evdns_base_load_hosts_impl(base, hosts_fname);
4111	EVDNS_UNLOCK(base);
4112	return res;
4113}
4114
4115/* A single request for a getaddrinfo, either v4 or v6. */
4116struct getaddrinfo_subrequest {
4117	struct evdns_request *r;
4118	ev_uint32_t type;
4119};
4120
4121/* State data used to implement an in-progress getaddrinfo. */
4122struct evdns_getaddrinfo_request {
4123	struct evdns_base *evdns_base;
4124	/* Copy of the modified 'hints' data that we'll use to build
4125	 * answers. */
4126	struct evutil_addrinfo hints;
4127	/* The callback to invoke when we're done */
4128	evdns_getaddrinfo_cb user_cb;
4129	/* User-supplied data to give to the callback. */
4130	void *user_data;
4131	/* The port to use when building sockaddrs. */
4132	ev_uint16_t port;
4133	/* The sub_request for an A record (if any) */
4134	struct getaddrinfo_subrequest ipv4_request;
4135	/* The sub_request for an AAAA record (if any) */
4136	struct getaddrinfo_subrequest ipv6_request;
4137
4138	/* The cname result that we were told (if any) */
4139	char *cname_result;
4140
4141	/* If we have one request answered and one request still inflight,
4142	 * then this field holds the answer from the first request... */
4143	struct evutil_addrinfo *pending_result;
4144	/* And this event is a timeout that will tell us to cancel the second
4145	 * request if it's taking a long time. */
4146	struct event timeout;
4147
4148	/* And this field holds the error code from the first request... */
4149	int pending_error;
4150	/* If this is set, the user canceled this request. */
4151	unsigned user_canceled : 1;
4152	/* If this is set, the user can no longer cancel this request; we're
4153	 * just waiting for the free. */
4154	unsigned request_done : 1;
4155};
4156
4157/* Convert an evdns errors to the equivalent getaddrinfo error. */
4158static int
4159evdns_err_to_getaddrinfo_err(int e1)
4160{
4161	/* XXX Do this better! */
4162	if (e1 == DNS_ERR_NONE)
4163		return 0;
4164	else if (e1 == DNS_ERR_NOTEXIST)
4165		return EVUTIL_EAI_NONAME;
4166	else
4167		return EVUTIL_EAI_FAIL;
4168}
4169
4170/* Return the more informative of two getaddrinfo errors. */
4171static int
4172getaddrinfo_merge_err(int e1, int e2)
4173{
4174	/* XXXX be cleverer here. */
4175	if (e1 == 0)
4176		return e2;
4177	else
4178		return e1;
4179}
4180
4181static void
4182free_getaddrinfo_request(struct evdns_getaddrinfo_request *data)
4183{
4184	/* DO NOT CALL this if either of the requests is pending.  Only once
4185	 * both callbacks have been invoked is it safe to free the request */
4186	if (data->pending_result)
4187		evutil_freeaddrinfo(data->pending_result);
4188	if (data->cname_result)
4189		mm_free(data->cname_result);
4190	event_del(&data->timeout);
4191	mm_free(data);
4192	return;
4193}
4194
4195static void
4196add_cname_to_reply(struct evdns_getaddrinfo_request *data,
4197    struct evutil_addrinfo *ai)
4198{
4199	if (data->cname_result && ai) {
4200		ai->ai_canonname = data->cname_result;
4201		data->cname_result = NULL;
4202	}
4203}
4204
4205/* Callback: invoked when one request in a mixed-format A/AAAA getaddrinfo
4206 * request has finished, but the other one took too long to answer. Pass
4207 * along the answer we got, and cancel the other request.
4208 */
4209static void
4210evdns_getaddrinfo_timeout_cb(evutil_socket_t fd, short what, void *ptr)
4211{
4212	int v4_timedout = 0, v6_timedout = 0;
4213	struct evdns_getaddrinfo_request *data = ptr;
4214
4215	/* Cancel any pending requests, and note which one */
4216	if (data->ipv4_request.r) {
4217		/* XXXX This does nothing if the request's callback is already
4218		 * running (pending_cb is set). */
4219		evdns_cancel_request(NULL, data->ipv4_request.r);
4220		v4_timedout = 1;
4221		EVDNS_LOCK(data->evdns_base);
4222		++data->evdns_base->getaddrinfo_ipv4_timeouts;
4223		EVDNS_UNLOCK(data->evdns_base);
4224	}
4225	if (data->ipv6_request.r) {
4226		/* XXXX This does nothing if the request's callback is already
4227		 * running (pending_cb is set). */
4228		evdns_cancel_request(NULL, data->ipv6_request.r);
4229		v6_timedout = 1;
4230		EVDNS_LOCK(data->evdns_base);
4231		++data->evdns_base->getaddrinfo_ipv6_timeouts;
4232		EVDNS_UNLOCK(data->evdns_base);
4233	}
4234
4235	/* We only use this timeout callback when we have an answer for
4236	 * one address. */
4237	EVUTIL_ASSERT(!v4_timedout || !v6_timedout);
4238
4239	/* Report the outcome of the other request that didn't time out. */
4240	if (data->pending_result) {
4241		add_cname_to_reply(data, data->pending_result);
4242		data->user_cb(0, data->pending_result, data->user_data);
4243		data->pending_result = NULL;
4244	} else {
4245		int e = data->pending_error;
4246		if (!e)
4247			e = EVUTIL_EAI_AGAIN;
4248		data->user_cb(e, NULL, data->user_data);
4249	}
4250
4251	data->user_cb = NULL; /* prevent double-call if evdns callbacks are
4252			       * in-progress. XXXX It would be better if this
4253			       * weren't necessary. */
4254
4255	if (!v4_timedout && !v6_timedout) {
4256		/* should be impossible? XXXX */
4257		free_getaddrinfo_request(data);
4258	}
4259}
4260
4261static int
4262evdns_getaddrinfo_set_timeout(struct evdns_base *evdns_base,
4263    struct evdns_getaddrinfo_request *data)
4264{
4265	return event_add(&data->timeout, &evdns_base->global_getaddrinfo_allow_skew);
4266}
4267
4268static inline int
4269evdns_result_is_answer(int result)
4270{
4271	return (result != DNS_ERR_NOTIMPL && result != DNS_ERR_REFUSED &&
4272	    result != DNS_ERR_SERVERFAILED && result != DNS_ERR_CANCEL);
4273}
4274
4275static void
4276evdns_getaddrinfo_gotresolve(int result, char type, int count,
4277    int ttl, void *addresses, void *arg)
4278{
4279	int i;
4280	struct getaddrinfo_subrequest *req = arg;
4281	struct getaddrinfo_subrequest *other_req;
4282	struct evdns_getaddrinfo_request *data;
4283
4284	struct evutil_addrinfo *res;
4285
4286	struct sockaddr_in sin;
4287	struct sockaddr_in6 sin6;
4288	struct sockaddr *sa;
4289	int socklen, addrlen;
4290	void *addrp;
4291	int err;
4292	int user_canceled;
4293
4294	EVUTIL_ASSERT(req->type == DNS_IPv4_A || req->type == DNS_IPv6_AAAA);
4295	if (req->type == DNS_IPv4_A) {
4296		data = EVUTIL_UPCAST(req, struct evdns_getaddrinfo_request, ipv4_request);
4297		other_req = &data->ipv6_request;
4298	} else {
4299		data = EVUTIL_UPCAST(req, struct evdns_getaddrinfo_request, ipv6_request);
4300		other_req = &data->ipv4_request;
4301	}
4302
4303	EVDNS_LOCK(data->evdns_base);
4304	if (evdns_result_is_answer(result)) {
4305		if (req->type == DNS_IPv4_A)
4306			++data->evdns_base->getaddrinfo_ipv4_answered;
4307		else
4308			++data->evdns_base->getaddrinfo_ipv6_answered;
4309	}
4310	user_canceled = data->user_canceled;
4311	if (other_req->r == NULL)
4312		data->request_done = 1;
4313	EVDNS_UNLOCK(data->evdns_base);
4314
4315	req->r = NULL;
4316
4317	if (result == DNS_ERR_CANCEL && ! user_canceled) {
4318		/* Internal cancel request from timeout or internal error.
4319		 * we already answered the user. */
4320		if (other_req->r == NULL)
4321			free_getaddrinfo_request(data);
4322		return;
4323	}
4324
4325	if (data->user_cb == NULL) {
4326		/* We already answered.  XXXX This shouldn't be needed; see
4327		 * comments in evdns_getaddrinfo_timeout_cb */
4328		free_getaddrinfo_request(data);
4329		return;
4330	}
4331
4332	if (result == DNS_ERR_NONE) {
4333		if (count == 0)
4334			err = EVUTIL_EAI_NODATA;
4335		else
4336			err = 0;
4337	} else {
4338		err = evdns_err_to_getaddrinfo_err(result);
4339	}
4340
4341	if (err) {
4342		/* Looks like we got an error. */
4343		if (other_req->r) {
4344			/* The other request is still working; maybe it will
4345			 * succeed. */
4346			/* XXXX handle failure from set_timeout */
4347			evdns_getaddrinfo_set_timeout(data->evdns_base, data);
4348			data->pending_error = err;
4349			return;
4350		}
4351
4352		if (user_canceled) {
4353			data->user_cb(EVUTIL_EAI_CANCEL, NULL, data->user_data);
4354		} else if (data->pending_result) {
4355			/* If we have an answer waiting, and we weren't
4356			 * canceled, ignore this error. */
4357			add_cname_to_reply(data, data->pending_result);
4358			data->user_cb(0, data->pending_result, data->user_data);
4359			data->pending_result = NULL;
4360		} else {
4361			if (data->pending_error)
4362				err = getaddrinfo_merge_err(err,
4363				    data->pending_error);
4364			data->user_cb(err, NULL, data->user_data);
4365		}
4366		free_getaddrinfo_request(data);
4367		return;
4368	} else if (user_canceled) {
4369		if (other_req->r) {
4370			/* The other request is still working; let it hit this
4371			 * callback with EVUTIL_EAI_CANCEL callback and report
4372			 * the failure. */
4373			return;
4374		}
4375		data->user_cb(EVUTIL_EAI_CANCEL, NULL, data->user_data);
4376		free_getaddrinfo_request(data);
4377		return;
4378	}
4379
4380	/* Looks like we got some answers. We should turn them into addrinfos
4381	 * and then either queue those or return them all. */
4382	EVUTIL_ASSERT(type == DNS_IPv4_A || type == DNS_IPv6_AAAA);
4383
4384	if (type == DNS_IPv4_A) {
4385		memset(&sin, 0, sizeof(sin));
4386		sin.sin_family = AF_INET;
4387		sin.sin_port = htons(data->port);
4388
4389		sa = (struct sockaddr *)&sin;
4390		socklen = sizeof(sin);
4391		addrlen = 4;
4392		addrp = &sin.sin_addr.s_addr;
4393	} else {
4394		memset(&sin6, 0, sizeof(sin6));
4395		sin6.sin6_family = AF_INET6;
4396		sin6.sin6_port = htons(data->port);
4397
4398		sa = (struct sockaddr *)&sin6;
4399		socklen = sizeof(sin6);
4400		addrlen = 16;
4401		addrp = &sin6.sin6_addr.s6_addr;
4402	}
4403
4404	res = NULL;
4405	for (i=0; i < count; ++i) {
4406		struct evutil_addrinfo *ai;
4407		memcpy(addrp, ((char*)addresses)+i*addrlen, addrlen);
4408		ai = evutil_new_addrinfo(sa, socklen, &data->hints);
4409		if (!ai) {
4410			if (other_req->r) {
4411				evdns_cancel_request(NULL, other_req->r);
4412			}
4413			data->user_cb(EVUTIL_EAI_MEMORY, NULL, data->user_data);
4414			if (res)
4415				evutil_freeaddrinfo(res);
4416
4417			if (other_req->r == NULL)
4418				free_getaddrinfo_request(data);
4419			return;
4420		}
4421		res = evutil_addrinfo_append(res, ai);
4422	}
4423
4424	if (other_req->r) {
4425		/* The other request is still in progress; wait for it */
4426		/* XXXX handle failure from set_timeout */
4427		evdns_getaddrinfo_set_timeout(data->evdns_base, data);
4428		data->pending_result = res;
4429		return;
4430	} else {
4431		/* The other request is done or never started; append its
4432		 * results (if any) and return them. */
4433		if (data->pending_result) {
4434			if (req->type == DNS_IPv4_A)
4435				res = evutil_addrinfo_append(res,
4436				    data->pending_result);
4437			else
4438				res = evutil_addrinfo_append(
4439				    data->pending_result, res);
4440			data->pending_result = NULL;
4441		}
4442
4443		/* Call the user callback. */
4444		add_cname_to_reply(data, res);
4445		data->user_cb(0, res, data->user_data);
4446
4447		/* Free data. */
4448		free_getaddrinfo_request(data);
4449	}
4450}
4451
4452static struct hosts_entry *
4453find_hosts_entry(struct evdns_base *base, const char *hostname,
4454    struct hosts_entry *find_after)
4455{
4456	struct hosts_entry *e;
4457
4458	if (find_after)
4459		e = TAILQ_NEXT(find_after, next);
4460	else
4461		e = TAILQ_FIRST(&base->hostsdb);
4462
4463	for (; e; e = TAILQ_NEXT(e, next)) {
4464		if (!evutil_ascii_strcasecmp(e->hostname, hostname))
4465			return e;
4466	}
4467	return NULL;
4468}
4469
4470static int
4471evdns_getaddrinfo_fromhosts(struct evdns_base *base,
4472    const char *nodename, struct evutil_addrinfo *hints, ev_uint16_t port,
4473    struct evutil_addrinfo **res)
4474{
4475	int n_found = 0;
4476	struct hosts_entry *e;
4477	struct evutil_addrinfo *ai=NULL;
4478	int f = hints->ai_family;
4479
4480	EVDNS_LOCK(base);
4481	for (e = find_hosts_entry(base, nodename, NULL); e;
4482	    e = find_hosts_entry(base, nodename, e)) {
4483		struct evutil_addrinfo *ai_new;
4484		++n_found;
4485		if ((e->addr.sa.sa_family == AF_INET && f == PF_INET6) ||
4486		    (e->addr.sa.sa_family == AF_INET6 && f == PF_INET))
4487			continue;
4488		ai_new = evutil_new_addrinfo(&e->addr.sa, e->addrlen, hints);
4489		if (!ai_new) {
4490			n_found = 0;
4491			goto out;
4492		}
4493		sockaddr_setport(ai_new->ai_addr, port);
4494		ai = evutil_addrinfo_append(ai, ai_new);
4495	}
4496	EVDNS_UNLOCK(base);
4497out:
4498	if (n_found) {
4499		/* Note that we return an empty answer if we found entries for
4500		 * this hostname but none were of the right address type. */
4501		*res = ai;
4502		return 0;
4503	} else {
4504		if (ai)
4505			evutil_freeaddrinfo(ai);
4506		return -1;
4507	}
4508}
4509
4510struct evdns_getaddrinfo_request *
4511evdns_getaddrinfo(struct evdns_base *dns_base,
4512    const char *nodename, const char *servname,
4513    const struct evutil_addrinfo *hints_in,
4514    evdns_getaddrinfo_cb cb, void *arg)
4515{
4516	struct evdns_getaddrinfo_request *data;
4517	struct evutil_addrinfo hints;
4518	struct evutil_addrinfo *res = NULL;
4519	int err;
4520	int port = 0;
4521	int want_cname = 0;
4522
4523	if (!dns_base) {
4524		dns_base = current_base;
4525		if (!dns_base) {
4526			log(EVDNS_LOG_WARN,
4527			    "Call to getaddrinfo_async with no "
4528			    "evdns_base configured.");
4529			cb(EVUTIL_EAI_FAIL, NULL, arg); /* ??? better error? */
4530			return NULL;
4531		}
4532	}
4533
4534	/* If we _must_ answer this immediately, do so. */
4535	if ((hints_in && (hints_in->ai_flags & EVUTIL_AI_NUMERICHOST))) {
4536		res = NULL;
4537		err = evutil_getaddrinfo(nodename, servname, hints_in, &res);
4538		cb(err, res, arg);
4539		return NULL;
4540	}
4541
4542	if (hints_in) {
4543		memcpy(&hints, hints_in, sizeof(hints));
4544	} else {
4545		memset(&hints, 0, sizeof(hints));
4546		hints.ai_family = PF_UNSPEC;
4547	}
4548
4549	evutil_adjust_hints_for_addrconfig(&hints);
4550
4551	/* Now try to see if we _can_ answer immediately. */
4552	/* (It would be nice to do this by calling getaddrinfo directly, with
4553	 * AI_NUMERICHOST, on plaforms that have it, but we can't: there isn't
4554	 * a reliable way to distinguish the "that wasn't a numeric host!" case
4555	 * from any other EAI_NONAME cases.) */
4556	err = evutil_getaddrinfo_common(nodename, servname, &hints, &res, &port);
4557	if (err != EVUTIL_EAI_NEED_RESOLVE) {
4558		cb(err, res, arg);
4559		return NULL;
4560	}
4561
4562	/* If there is an entry in the hosts file, we should give it now. */
4563	if (!evdns_getaddrinfo_fromhosts(dns_base, nodename, &hints, port, &res)) {
4564		cb(0, res, arg);
4565		return NULL;
4566	}
4567
4568	/* Okay, things are serious now. We're going to need to actually
4569	 * launch a request.
4570	 */
4571	data = mm_calloc(1,sizeof(struct evdns_getaddrinfo_request));
4572	if (!data) {
4573		cb(EVUTIL_EAI_MEMORY, NULL, arg);
4574		return NULL;
4575	}
4576
4577	memcpy(&data->hints, &hints, sizeof(data->hints));
4578	data->port = (ev_uint16_t)port;
4579	data->ipv4_request.type = DNS_IPv4_A;
4580	data->ipv6_request.type = DNS_IPv6_AAAA;
4581	data->user_cb = cb;
4582	data->user_data = arg;
4583	data->evdns_base = dns_base;
4584
4585	want_cname = (hints.ai_flags & EVUTIL_AI_CANONNAME);
4586
4587	/* If we are asked for a PF_UNSPEC address, we launch two requests in
4588	 * parallel: one for an A address and one for an AAAA address.  We
4589	 * can't send just one request, since many servers only answer one
4590	 * question per DNS request.
4591	 *
4592	 * Once we have the answer to one request, we allow for a short
4593	 * timeout before we report it, to see if the other one arrives.  If
4594	 * they both show up in time, then we report both the answers.
4595	 *
4596	 * If too many addresses of one type time out or fail, we should stop
4597	 * launching those requests. (XXX we don't do that yet.)
4598	 */
4599
4600	if (hints.ai_family != PF_INET6) {
4601		log(EVDNS_LOG_DEBUG, "Sending request for %s on ipv4 as %p",
4602		    nodename, &data->ipv4_request);
4603
4604		data->ipv4_request.r = evdns_base_resolve_ipv4(dns_base,
4605		    nodename, 0, evdns_getaddrinfo_gotresolve,
4606		    &data->ipv4_request);
4607		if (want_cname)
4608			data->ipv4_request.r->current_req->put_cname_in_ptr =
4609			    &data->cname_result;
4610	}
4611	if (hints.ai_family != PF_INET) {
4612		log(EVDNS_LOG_DEBUG, "Sending request for %s on ipv6 as %p",
4613		    nodename, &data->ipv6_request);
4614
4615		data->ipv6_request.r = evdns_base_resolve_ipv6(dns_base,
4616		    nodename, 0, evdns_getaddrinfo_gotresolve,
4617		    &data->ipv6_request);
4618		if (want_cname)
4619			data->ipv6_request.r->current_req->put_cname_in_ptr =
4620			    &data->cname_result;
4621	}
4622
4623	evtimer_assign(&data->timeout, dns_base->event_base,
4624	    evdns_getaddrinfo_timeout_cb, data);
4625
4626	if (data->ipv4_request.r || data->ipv6_request.r) {
4627		return data;
4628	} else {
4629		mm_free(data);
4630		cb(EVUTIL_EAI_FAIL, NULL, arg);
4631		return NULL;
4632	}
4633}
4634
4635void
4636evdns_getaddrinfo_cancel(struct evdns_getaddrinfo_request *data)
4637{
4638	EVDNS_LOCK(data->evdns_base);
4639	if (data->request_done) {
4640		EVDNS_UNLOCK(data->evdns_base);
4641		return;
4642	}
4643	event_del(&data->timeout);
4644	data->user_canceled = 1;
4645	if (data->ipv4_request.r)
4646		evdns_cancel_request(data->evdns_base, data->ipv4_request.r);
4647	if (data->ipv6_request.r)
4648		evdns_cancel_request(data->evdns_base, data->ipv6_request.r);
4649	EVDNS_UNLOCK(data->evdns_base);
4650}
4651