1/* Copyright 2006-2007 Niels Provos
2 * Copyright 2007-2012 Nick Mathewson and Niels Provos
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 * 3. The name of the author may not be used to endorse or promote products
13 *    derived from this software without specific prior written permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27/* Based on software by Adam Langly. Adam's original message:
28 *
29 * Async DNS Library
30 * Adam Langley <agl@imperialviolet.org>
31 * http://www.imperialviolet.org/eventdns.html
32 * Public Domain code
33 *
34 * This software is Public Domain. To view a copy of the public domain dedication,
35 * visit http://creativecommons.org/licenses/publicdomain/ or send a letter to
36 * Creative Commons, 559 Nathan Abbott Way, Stanford, California 94305, USA.
37 *
38 * I ask and expect, but do not require, that all derivative works contain an
39 * attribution similar to:
40 *	Parts developed by Adam Langley <agl@imperialviolet.org>
41 *
42 * You may wish to replace the word "Parts" with something else depending on
43 * the amount of original code.
44 *
45 * (Derivative works does not include programs which link against, run or include
46 * the source verbatim in their source distributions)
47 *
48 * Version: 0.1b
49 */
50
51#include "event2/event-config.h"
52#include "evconfig-private.h"
53
54#include <sys/types.h>
55
56#ifndef _FORTIFY_SOURCE
57#define _FORTIFY_SOURCE 3
58#endif
59
60#include <string.h>
61#include <fcntl.h>
62#ifdef EVENT__HAVE_SYS_TIME_H
63#include <sys/time.h>
64#endif
65#ifdef EVENT__HAVE_STDINT_H
66#include <stdint.h>
67#endif
68#include <stdlib.h>
69#include <string.h>
70#include <errno.h>
71#ifdef EVENT__HAVE_UNISTD_H
72#include <unistd.h>
73#endif
74#include <limits.h>
75#include <sys/stat.h>
76#include <stdio.h>
77#include <stdarg.h>
78#ifdef _WIN32
79#include <winsock2.h>
80#include <ws2tcpip.h>
81#ifndef _WIN32_IE
82#define _WIN32_IE 0x400
83#endif
84#include <shlobj.h>
85#endif
86
87#include "event2/dns.h"
88#include "event2/dns_struct.h"
89#include "event2/dns_compat.h"
90#include "event2/util.h"
91#include "event2/event.h"
92#include "event2/event_struct.h"
93#include "event2/thread.h"
94
95#include "defer-internal.h"
96#include "log-internal.h"
97#include "mm-internal.h"
98#include "strlcpy-internal.h"
99#include "ipv6-internal.h"
100#include "util-internal.h"
101#include "evthread-internal.h"
102#ifdef _WIN32
103#include <ctype.h>
104#include <winsock2.h>
105#include <windows.h>
106#include <iphlpapi.h>
107#include <io.h>
108#else
109#include <sys/socket.h>
110#include <netinet/in.h>
111#include <arpa/inet.h>
112#endif
113
114#ifdef EVENT__HAVE_NETINET_IN6_H
115#include <netinet/in6.h>
116#endif
117
118#define EVDNS_LOG_DEBUG EVENT_LOG_DEBUG
119#define EVDNS_LOG_WARN EVENT_LOG_WARN
120#define EVDNS_LOG_MSG EVENT_LOG_MSG
121
122#ifndef HOST_NAME_MAX
123#define HOST_NAME_MAX 255
124#endif
125
126#include <stdio.h>
127
128#undef MIN
129#define MIN(a,b) ((a)<(b)?(a):(b))
130
131#define ASSERT_VALID_REQUEST(req) \
132	EVUTIL_ASSERT((req)->handle && (req)->handle->current_req == (req))
133
134#define u64 ev_uint64_t
135#define u32 ev_uint32_t
136#define u16 ev_uint16_t
137#define u8  ev_uint8_t
138
139/* maximum number of addresses from a single packet */
140/* that we bother recording */
141#define MAX_V4_ADDRS 32
142#define MAX_V6_ADDRS 32
143
144
145#define TYPE_A	       EVDNS_TYPE_A
146#define TYPE_CNAME     5
147#define TYPE_PTR       EVDNS_TYPE_PTR
148#define TYPE_SOA       EVDNS_TYPE_SOA
149#define TYPE_AAAA      EVDNS_TYPE_AAAA
150
151#define CLASS_INET     EVDNS_CLASS_INET
152
153/* Persistent handle.  We keep this separate from 'struct request' since we
154 * need some object to last for as long as an evdns_request is outstanding so
155 * that it can be canceled, whereas a search request can lead to multiple
156 * 'struct request' instances being created over its lifetime. */
157struct evdns_request {
158	struct request *current_req;
159	struct evdns_base *base;
160
161	int pending_cb; /* Waiting for its callback to be invoked; not
162			 * owned by event base any more. */
163
164	/* elements used by the searching code */
165	int search_index;
166	struct search_state *search_state;
167	char *search_origname;	/* needs to be free()ed */
168	int search_flags;
169};
170
171struct request {
172	u8 *request;  /* the dns packet data */
173	u8 request_type; /* TYPE_PTR or TYPE_A or TYPE_AAAA */
174	unsigned int request_len;
175	int reissue_count;
176	int tx_count;  /* the number of times that this packet has been sent */
177	void *user_pointer;  /* the pointer given to us for this request */
178	evdns_callback_type user_callback;
179	struct nameserver *ns;	/* the server which we last sent it */
180
181	/* these objects are kept in a circular list */
182	/* XXX We could turn this into a CIRCLEQ. */
183	struct request *next, *prev;
184
185	struct event timeout_event;
186
187	u16 trans_id;  /* the transaction id */
188	unsigned request_appended :1;	/* true if the request pointer is data which follows this struct */
189	unsigned transmit_me :1;  /* needs to be transmitted */
190
191	/* XXXX This is a horrible hack. */
192	char **put_cname_in_ptr; /* store the cname here if we get one. */
193
194	struct evdns_base *base;
195
196	struct evdns_request *handle;
197};
198
199struct reply {
200	unsigned int type;
201	unsigned int have_answer : 1;
202	union {
203		struct {
204			u32 addrcount;
205			u32 addresses[MAX_V4_ADDRS];
206		} a;
207		struct {
208			u32 addrcount;
209			struct in6_addr addresses[MAX_V6_ADDRS];
210		} aaaa;
211		struct {
212			char name[HOST_NAME_MAX];
213		} ptr;
214	} data;
215};
216
217struct nameserver {
218	evutil_socket_t socket;	 /* a connected UDP socket */
219	struct sockaddr_storage address;
220	ev_socklen_t addrlen;
221	int failed_times;  /* number of times which we have given this server a chance */
222	int timedout;  /* number of times in a row a request has timed out */
223	struct event event;
224	/* these objects are kept in a circular list */
225	struct nameserver *next, *prev;
226	struct event timeout_event;  /* used to keep the timeout for */
227				     /* when we next probe this server. */
228				     /* Valid if state == 0 */
229	/* Outstanding probe request for this nameserver, if any */
230	struct evdns_request *probe_request;
231	char state;  /* zero if we think that this server is down */
232	char choked;  /* true if we have an EAGAIN from this server's socket */
233	char write_waiting;  /* true if we are waiting for EV_WRITE events */
234	struct evdns_base *base;
235
236	/* Number of currently inflight requests: used
237	 * to track when we should add/del the event. */
238	int requests_inflight;
239};
240
241
242/* Represents a local port where we're listening for DNS requests. Right now, */
243/* only UDP is supported. */
244struct evdns_server_port {
245	evutil_socket_t socket; /* socket we use to read queries and write replies. */
246	int refcnt; /* reference count. */
247	char choked; /* Are we currently blocked from writing? */
248	char closing; /* Are we trying to close this port, pending writes? */
249	evdns_request_callback_fn_type user_callback; /* Fn to handle requests */
250	void *user_data; /* Opaque pointer passed to user_callback */
251	struct event event; /* Read/write event */
252	/* circular list of replies that we want to write. */
253	struct server_request *pending_replies;
254	struct event_base *event_base;
255
256#ifndef EVENT__DISABLE_THREAD_SUPPORT
257	void *lock;
258#endif
259};
260
261/* Represents part of a reply being built.	(That is, a single RR.) */
262struct server_reply_item {
263	struct server_reply_item *next; /* next item in sequence. */
264	char *name; /* name part of the RR */
265	u16 type; /* The RR type */
266	u16 class; /* The RR class (usually CLASS_INET) */
267	u32 ttl; /* The RR TTL */
268	char is_name; /* True iff data is a label */
269	u16 datalen; /* Length of data; -1 if data is a label */
270	void *data; /* The contents of the RR */
271};
272
273/* Represents a request that we've received as a DNS server, and holds */
274/* the components of the reply as we're constructing it. */
275struct server_request {
276	/* Pointers to the next and previous entries on the list of replies */
277	/* that we're waiting to write.	 Only set if we have tried to respond */
278	/* and gotten EAGAIN. */
279	struct server_request *next_pending;
280	struct server_request *prev_pending;
281
282	u16 trans_id; /* Transaction id. */
283	struct evdns_server_port *port; /* Which port received this request on? */
284	struct sockaddr_storage addr; /* Where to send the response */
285	ev_socklen_t addrlen; /* length of addr */
286
287	int n_answer; /* how many answer RRs have been set? */
288	int n_authority; /* how many authority RRs have been set? */
289	int n_additional; /* how many additional RRs have been set? */
290
291	struct server_reply_item *answer; /* linked list of answer RRs */
292	struct server_reply_item *authority; /* linked list of authority RRs */
293	struct server_reply_item *additional; /* linked list of additional RRs */
294
295	/* Constructed response.  Only set once we're ready to send a reply. */
296	/* Once this is set, the RR fields are cleared, and no more should be set. */
297	char *response;
298	size_t response_len;
299
300	/* Caller-visible fields: flags, questions. */
301	struct evdns_server_request base;
302};
303
304struct evdns_base {
305	/* An array of n_req_heads circular lists for inflight requests.
306	 * Each inflight request req is in req_heads[req->trans_id % n_req_heads].
307	 */
308	struct request **req_heads;
309	/* A circular list of requests that we're waiting to send, but haven't
310	 * sent yet because there are too many requests inflight */
311	struct request *req_waiting_head;
312	/* A circular list of nameservers. */
313	struct nameserver *server_head;
314	int n_req_heads;
315
316	struct event_base *event_base;
317
318	/* The number of good nameservers that we have */
319	int global_good_nameservers;
320
321	/* inflight requests are contained in the req_head list */
322	/* and are actually going out across the network */
323	int global_requests_inflight;
324	/* requests which aren't inflight are in the waiting list */
325	/* and are counted here */
326	int global_requests_waiting;
327
328	int global_max_requests_inflight;
329
330	struct timeval global_timeout;	/* 5 seconds by default */
331	int global_max_reissues;  /* a reissue occurs when we get some errors from the server */
332	int global_max_retransmits;  /* number of times we'll retransmit a request which timed out */
333	/* number of timeouts in a row before we consider this server to be down */
334	int global_max_nameserver_timeout;
335	/* true iff we will use the 0x20 hack to prevent poisoning attacks. */
336	int global_randomize_case;
337
338	/* The first time that a nameserver fails, how long do we wait before
339	 * probing to see if it has returned?  */
340	struct timeval global_nameserver_probe_initial_timeout;
341
342	/** Port to bind to for outgoing DNS packets. */
343	struct sockaddr_storage global_outgoing_address;
344	/** ev_socklen_t for global_outgoing_address. 0 if it isn't set. */
345	ev_socklen_t global_outgoing_addrlen;
346
347	struct timeval global_getaddrinfo_allow_skew;
348
349	int getaddrinfo_ipv4_timeouts;
350	int getaddrinfo_ipv6_timeouts;
351	int getaddrinfo_ipv4_answered;
352	int getaddrinfo_ipv6_answered;
353
354	struct search_state *global_search_state;
355
356	TAILQ_HEAD(hosts_list, hosts_entry) hostsdb;
357
358#ifndef EVENT__DISABLE_THREAD_SUPPORT
359	void *lock;
360#endif
361
362	int disable_when_inactive;
363};
364
365struct hosts_entry {
366	TAILQ_ENTRY(hosts_entry) next;
367	union {
368		struct sockaddr sa;
369		struct sockaddr_in sin;
370		struct sockaddr_in6 sin6;
371	} addr;
372	int addrlen;
373	char hostname[1];
374};
375
376static struct evdns_base *current_base = NULL;
377
378struct evdns_base *
379evdns_get_global_base(void)
380{
381	return current_base;
382}
383
384/* Given a pointer to an evdns_server_request, get the corresponding */
385/* server_request. */
386#define TO_SERVER_REQUEST(base_ptr)					\
387	((struct server_request*)					\
388	  (((char*)(base_ptr) - evutil_offsetof(struct server_request, base))))
389
390#define REQ_HEAD(base, id) ((base)->req_heads[id % (base)->n_req_heads])
391
392static struct nameserver *nameserver_pick(struct evdns_base *base);
393static void evdns_request_insert(struct request *req, struct request **head);
394static void evdns_request_remove(struct request *req, struct request **head);
395static void nameserver_ready_callback(evutil_socket_t fd, short events, void *arg);
396static int evdns_transmit(struct evdns_base *base);
397static int evdns_request_transmit(struct request *req);
398static void nameserver_send_probe(struct nameserver *const ns);
399static void search_request_finished(struct evdns_request *const);
400static int search_try_next(struct evdns_request *const req);
401static struct request *search_request_new(struct evdns_base *base, struct evdns_request *handle, int type, const char *const name, int flags, evdns_callback_type user_callback, void *user_arg);
402static void evdns_requests_pump_waiting_queue(struct evdns_base *base);
403static u16 transaction_id_pick(struct evdns_base *base);
404static struct request *request_new(struct evdns_base *base, struct evdns_request *handle, int type, const char *name, int flags, evdns_callback_type callback, void *ptr);
405static void request_submit(struct request *const req);
406
407static int server_request_free(struct server_request *req);
408static void server_request_free_answers(struct server_request *req);
409static void server_port_free(struct evdns_server_port *port);
410static void server_port_ready_callback(evutil_socket_t fd, short events, void *arg);
411static int evdns_base_resolv_conf_parse_impl(struct evdns_base *base, int flags, const char *const filename);
412static int evdns_base_set_option_impl(struct evdns_base *base,
413    const char *option, const char *val, int flags);
414static void evdns_base_free_and_unlock(struct evdns_base *base, int fail_requests);
415static void evdns_request_timeout_callback(evutil_socket_t fd, short events, void *arg);
416
417static int strtoint(const char *const str);
418
419#ifdef EVENT__DISABLE_THREAD_SUPPORT
420#define EVDNS_LOCK(base)  EVUTIL_NIL_STMT_
421#define EVDNS_UNLOCK(base) EVUTIL_NIL_STMT_
422#define ASSERT_LOCKED(base) EVUTIL_NIL_STMT_
423#else
424#define EVDNS_LOCK(base)			\
425	EVLOCK_LOCK((base)->lock, 0)
426#define EVDNS_UNLOCK(base)			\
427	EVLOCK_UNLOCK((base)->lock, 0)
428#define ASSERT_LOCKED(base)			\
429	EVLOCK_ASSERT_LOCKED((base)->lock)
430#endif
431
432static evdns_debug_log_fn_type evdns_log_fn = NULL;
433
434void
435evdns_set_log_fn(evdns_debug_log_fn_type fn)
436{
437	evdns_log_fn = fn;
438}
439
440#ifdef __GNUC__
441#define EVDNS_LOG_CHECK	 __attribute__ ((format(printf, 2, 3)))
442#else
443#define EVDNS_LOG_CHECK
444#endif
445
446static void evdns_log_(int severity, const char *fmt, ...) EVDNS_LOG_CHECK;
447static void
448evdns_log_(int severity, const char *fmt, ...)
449{
450	va_list args;
451	va_start(args,fmt);
452	if (evdns_log_fn) {
453		char buf[512];
454		int is_warn = (severity == EVDNS_LOG_WARN);
455		evutil_vsnprintf(buf, sizeof(buf), fmt, args);
456		evdns_log_fn(is_warn, buf);
457	} else {
458		event_logv_(severity, NULL, fmt, args);
459	}
460	va_end(args);
461}
462
463#define log evdns_log_
464
465/* This walks the list of inflight requests to find the */
466/* one with a matching transaction id. Returns NULL on */
467/* failure */
468static struct request *
469request_find_from_trans_id(struct evdns_base *base, u16 trans_id) {
470	struct request *req = REQ_HEAD(base, trans_id);
471	struct request *const started_at = req;
472
473	ASSERT_LOCKED(base);
474
475	if (req) {
476		do {
477			if (req->trans_id == trans_id) return req;
478			req = req->next;
479		} while (req != started_at);
480	}
481
482	return NULL;
483}
484
485/* a libevent callback function which is called when a nameserver */
486/* has gone down and we want to test if it has came back to life yet */
487static void
488nameserver_prod_callback(evutil_socket_t fd, short events, void *arg) {
489	struct nameserver *const ns = (struct nameserver *) arg;
490	(void)fd;
491	(void)events;
492
493	EVDNS_LOCK(ns->base);
494	nameserver_send_probe(ns);
495	EVDNS_UNLOCK(ns->base);
496}
497
498/* a libevent callback which is called when a nameserver probe (to see if */
499/* it has come back to life) times out. We increment the count of failed_times */
500/* and wait longer to send the next probe packet. */
501static void
502nameserver_probe_failed(struct nameserver *const ns) {
503	struct timeval timeout;
504	int i;
505
506	ASSERT_LOCKED(ns->base);
507	(void) evtimer_del(&ns->timeout_event);
508	if (ns->state == 1) {
509		/* This can happen if the nameserver acts in a way which makes us mark */
510		/* it as bad and then starts sending good replies. */
511		return;
512	}
513
514#define MAX_PROBE_TIMEOUT 3600
515#define TIMEOUT_BACKOFF_FACTOR 3
516
517	memcpy(&timeout, &ns->base->global_nameserver_probe_initial_timeout,
518	    sizeof(struct timeval));
519	for (i=ns->failed_times; i > 0 && timeout.tv_sec < MAX_PROBE_TIMEOUT; --i) {
520		timeout.tv_sec *= TIMEOUT_BACKOFF_FACTOR;
521		timeout.tv_usec *= TIMEOUT_BACKOFF_FACTOR;
522		if (timeout.tv_usec > 1000000) {
523			timeout.tv_sec += timeout.tv_usec / 1000000;
524			timeout.tv_usec %= 1000000;
525		}
526	}
527	if (timeout.tv_sec > MAX_PROBE_TIMEOUT) {
528		timeout.tv_sec = MAX_PROBE_TIMEOUT;
529		timeout.tv_usec = 0;
530	}
531
532	ns->failed_times++;
533
534	if (evtimer_add(&ns->timeout_event, &timeout) < 0) {
535		char addrbuf[128];
536		log(EVDNS_LOG_WARN,
537		    "Error from libevent when adding timer event for %s",
538		    evutil_format_sockaddr_port_(
539			    (struct sockaddr *)&ns->address,
540			    addrbuf, sizeof(addrbuf)));
541	}
542}
543
544static void
545request_swap_ns(struct request *req, struct nameserver *ns) {
546	if (ns && req->ns != ns) {
547		EVUTIL_ASSERT(req->ns->requests_inflight > 0);
548		req->ns->requests_inflight--;
549		ns->requests_inflight++;
550
551		req->ns = ns;
552	}
553}
554
555/* called when a nameserver has been deemed to have failed. For example, too */
556/* many packets have timed out etc */
557static void
558nameserver_failed(struct nameserver *const ns, const char *msg) {
559	struct request *req, *started_at;
560	struct evdns_base *base = ns->base;
561	int i;
562	char addrbuf[128];
563
564	ASSERT_LOCKED(base);
565	/* if this nameserver has already been marked as failed */
566	/* then don't do anything */
567	if (!ns->state) return;
568
569	log(EVDNS_LOG_MSG, "Nameserver %s has failed: %s",
570	    evutil_format_sockaddr_port_(
571		    (struct sockaddr *)&ns->address,
572		    addrbuf, sizeof(addrbuf)),
573	    msg);
574
575	base->global_good_nameservers--;
576	EVUTIL_ASSERT(base->global_good_nameservers >= 0);
577	if (base->global_good_nameservers == 0) {
578		log(EVDNS_LOG_MSG, "All nameservers have failed");
579	}
580
581	ns->state = 0;
582	ns->failed_times = 1;
583
584	if (evtimer_add(&ns->timeout_event,
585		&base->global_nameserver_probe_initial_timeout) < 0) {
586		log(EVDNS_LOG_WARN,
587		    "Error from libevent when adding timer event for %s",
588		    evutil_format_sockaddr_port_(
589			    (struct sockaddr *)&ns->address,
590			    addrbuf, sizeof(addrbuf)));
591		/* ???? Do more? */
592	}
593
594	/* walk the list of inflight requests to see if any can be reassigned to */
595	/* a different server. Requests in the waiting queue don't have a */
596	/* nameserver assigned yet */
597
598	/* if we don't have *any* good nameservers then there's no point */
599	/* trying to reassign requests to one */
600	if (!base->global_good_nameservers) return;
601
602	for (i = 0; i < base->n_req_heads; ++i) {
603		req = started_at = base->req_heads[i];
604		if (req) {
605			do {
606				if (req->tx_count == 0 && req->ns == ns) {
607					/* still waiting to go out, can be moved */
608					/* to another server */
609					request_swap_ns(req, nameserver_pick(base));
610				}
611				req = req->next;
612			} while (req != started_at);
613		}
614	}
615}
616
617static void
618nameserver_up(struct nameserver *const ns)
619{
620	char addrbuf[128];
621	ASSERT_LOCKED(ns->base);
622	if (ns->state) return;
623	log(EVDNS_LOG_MSG, "Nameserver %s is back up",
624	    evutil_format_sockaddr_port_(
625		    (struct sockaddr *)&ns->address,
626		    addrbuf, sizeof(addrbuf)));
627	evtimer_del(&ns->timeout_event);
628	if (ns->probe_request) {
629		evdns_cancel_request(ns->base, ns->probe_request);
630		ns->probe_request = NULL;
631	}
632	ns->state = 1;
633	ns->failed_times = 0;
634	ns->timedout = 0;
635	ns->base->global_good_nameservers++;
636}
637
638static void
639request_trans_id_set(struct request *const req, const u16 trans_id) {
640	req->trans_id = trans_id;
641	*((u16 *) req->request) = htons(trans_id);
642}
643
644/* Called to remove a request from a list and dealloc it. */
645/* head is a pointer to the head of the list it should be */
646/* removed from or NULL if the request isn't in a list. */
647/* when free_handle is one, free the handle as well. */
648static void
649request_finished(struct request *const req, struct request **head, int free_handle) {
650	struct evdns_base *base = req->base;
651	int was_inflight = (head != &base->req_waiting_head);
652	EVDNS_LOCK(base);
653	ASSERT_VALID_REQUEST(req);
654
655	if (head)
656		evdns_request_remove(req, head);
657
658	log(EVDNS_LOG_DEBUG, "Removing timeout for request %p", req);
659	if (was_inflight) {
660		evtimer_del(&req->timeout_event);
661		base->global_requests_inflight--;
662		req->ns->requests_inflight--;
663	} else {
664		base->global_requests_waiting--;
665	}
666	/* it was initialized during request_new / evtimer_assign */
667	event_debug_unassign(&req->timeout_event);
668
669	if (req->ns &&
670	    req->ns->requests_inflight == 0 &&
671	    req->base->disable_when_inactive) {
672		event_del(&req->ns->event);
673		evtimer_del(&req->ns->timeout_event);
674	}
675
676	if (!req->request_appended) {
677		/* need to free the request data on it's own */
678		mm_free(req->request);
679	} else {
680		/* the request data is appended onto the header */
681		/* so everything gets free()ed when we: */
682	}
683
684	if (req->handle) {
685		EVUTIL_ASSERT(req->handle->current_req == req);
686
687		if (free_handle) {
688			search_request_finished(req->handle);
689			req->handle->current_req = NULL;
690			if (! req->handle->pending_cb) {
691				/* If we're planning to run the callback,
692				 * don't free the handle until later. */
693				mm_free(req->handle);
694			}
695			req->handle = NULL; /* If we have a bug, let's crash
696					     * early */
697		} else {
698			req->handle->current_req = NULL;
699		}
700	}
701
702	mm_free(req);
703
704	evdns_requests_pump_waiting_queue(base);
705	EVDNS_UNLOCK(base);
706}
707
708/* This is called when a server returns a funny error code. */
709/* We try the request again with another server. */
710/* */
711/* return: */
712/*   0 ok */
713/*   1 failed/reissue is pointless */
714static int
715request_reissue(struct request *req) {
716	const struct nameserver *const last_ns = req->ns;
717	ASSERT_LOCKED(req->base);
718	ASSERT_VALID_REQUEST(req);
719	/* the last nameserver should have been marked as failing */
720	/* by the caller of this function, therefore pick will try */
721	/* not to return it */
722	request_swap_ns(req, nameserver_pick(req->base));
723	if (req->ns == last_ns) {
724		/* ... but pick did return it */
725		/* not a lot of point in trying again with the */
726		/* same server */
727		return 1;
728	}
729
730	req->reissue_count++;
731	req->tx_count = 0;
732	req->transmit_me = 1;
733
734	return 0;
735}
736
737/* this function looks for space on the inflight queue and promotes */
738/* requests from the waiting queue if it can. */
739/* */
740/* TODO: */
741/* add return code, see at nameserver_pick() and other functions. */
742static void
743evdns_requests_pump_waiting_queue(struct evdns_base *base) {
744	ASSERT_LOCKED(base);
745	while (base->global_requests_inflight < base->global_max_requests_inflight &&
746		   base->global_requests_waiting) {
747		struct request *req;
748
749		EVUTIL_ASSERT(base->req_waiting_head);
750		req = base->req_waiting_head;
751
752		req->ns = nameserver_pick(base);
753		if (!req->ns)
754			return;
755
756		/* move a request from the waiting queue to the inflight queue */
757		req->ns->requests_inflight++;
758
759		evdns_request_remove(req, &base->req_waiting_head);
760
761		base->global_requests_waiting--;
762		base->global_requests_inflight++;
763
764		request_trans_id_set(req, transaction_id_pick(base));
765
766		evdns_request_insert(req, &REQ_HEAD(base, req->trans_id));
767		evdns_request_transmit(req);
768		evdns_transmit(base);
769	}
770}
771
772/* TODO(nickm) document */
773struct deferred_reply_callback {
774	struct event_callback deferred;
775	struct evdns_request *handle;
776	u8 request_type;
777	u8 have_reply;
778	u32 ttl;
779	u32 err;
780	evdns_callback_type user_callback;
781	struct reply reply;
782};
783
784static void
785reply_run_callback(struct event_callback *d, void *user_pointer)
786{
787	struct deferred_reply_callback *cb =
788	    EVUTIL_UPCAST(d, struct deferred_reply_callback, deferred);
789
790	switch (cb->request_type) {
791	case TYPE_A:
792		if (cb->have_reply)
793			cb->user_callback(DNS_ERR_NONE, DNS_IPv4_A,
794			    cb->reply.data.a.addrcount, cb->ttl,
795			    cb->reply.data.a.addresses,
796			    user_pointer);
797		else
798			cb->user_callback(cb->err, 0, 0, cb->ttl, NULL, user_pointer);
799		break;
800	case TYPE_PTR:
801		if (cb->have_reply) {
802			char *name = cb->reply.data.ptr.name;
803			cb->user_callback(DNS_ERR_NONE, DNS_PTR, 1, cb->ttl,
804			    &name, user_pointer);
805		} else {
806			cb->user_callback(cb->err, 0, 0, cb->ttl, NULL, user_pointer);
807		}
808		break;
809	case TYPE_AAAA:
810		if (cb->have_reply)
811			cb->user_callback(DNS_ERR_NONE, DNS_IPv6_AAAA,
812			    cb->reply.data.aaaa.addrcount, cb->ttl,
813			    cb->reply.data.aaaa.addresses,
814			    user_pointer);
815		else
816			cb->user_callback(cb->err, 0, 0, cb->ttl, NULL, user_pointer);
817		break;
818	default:
819		EVUTIL_ASSERT(0);
820	}
821
822	if (cb->handle && cb->handle->pending_cb) {
823		mm_free(cb->handle);
824	}
825
826	mm_free(cb);
827}
828
829static void
830reply_schedule_callback(struct request *const req, u32 ttl, u32 err, struct reply *reply)
831{
832	struct deferred_reply_callback *d = mm_calloc(1, sizeof(*d));
833
834	if (!d) {
835		event_warn("%s: Couldn't allocate space for deferred callback.",
836		    __func__);
837		return;
838	}
839
840	ASSERT_LOCKED(req->base);
841
842	d->request_type = req->request_type;
843	d->user_callback = req->user_callback;
844	d->ttl = ttl;
845	d->err = err;
846	if (reply) {
847		d->have_reply = 1;
848		memcpy(&d->reply, reply, sizeof(struct reply));
849	}
850
851	if (req->handle) {
852		req->handle->pending_cb = 1;
853		d->handle = req->handle;
854	}
855
856	event_deferred_cb_init_(
857	    &d->deferred,
858	    event_get_priority(&req->timeout_event),
859	    reply_run_callback,
860	    req->user_pointer);
861	event_deferred_cb_schedule_(
862		req->base->event_base,
863		&d->deferred);
864}
865
866/* this processes a parsed reply packet */
867static void
868reply_handle(struct request *const req, u16 flags, u32 ttl, struct reply *reply) {
869	int error;
870	char addrbuf[128];
871	static const int error_codes[] = {
872		DNS_ERR_FORMAT, DNS_ERR_SERVERFAILED, DNS_ERR_NOTEXIST,
873		DNS_ERR_NOTIMPL, DNS_ERR_REFUSED
874	};
875
876	ASSERT_LOCKED(req->base);
877	ASSERT_VALID_REQUEST(req);
878
879	if (flags & 0x020f || !reply || !reply->have_answer) {
880		/* there was an error */
881		if (flags & 0x0200) {
882			error = DNS_ERR_TRUNCATED;
883		} else if (flags & 0x000f) {
884			u16 error_code = (flags & 0x000f) - 1;
885			if (error_code > 4) {
886				error = DNS_ERR_UNKNOWN;
887			} else {
888				error = error_codes[error_code];
889			}
890		} else if (reply && !reply->have_answer) {
891			error = DNS_ERR_NODATA;
892		} else {
893			error = DNS_ERR_UNKNOWN;
894		}
895
896		switch (error) {
897		case DNS_ERR_NOTIMPL:
898		case DNS_ERR_REFUSED:
899			/* we regard these errors as marking a bad nameserver */
900			if (req->reissue_count < req->base->global_max_reissues) {
901				char msg[64];
902				evutil_snprintf(msg, sizeof(msg), "Bad response %d (%s)",
903					 error, evdns_err_to_string(error));
904				nameserver_failed(req->ns, msg);
905				if (!request_reissue(req)) return;
906			}
907			break;
908		case DNS_ERR_SERVERFAILED:
909			/* rcode 2 (servfailed) sometimes means "we
910			 * are broken" and sometimes (with some binds)
911			 * means "that request was very confusing."
912			 * Treat this as a timeout, not a failure.
913			 */
914			log(EVDNS_LOG_DEBUG, "Got a SERVERFAILED from nameserver"
915				"at %s; will allow the request to time out.",
916			    evutil_format_sockaddr_port_(
917				    (struct sockaddr *)&req->ns->address,
918				    addrbuf, sizeof(addrbuf)));
919			/* Call the timeout function */
920			evdns_request_timeout_callback(0, 0, req);
921			return;
922		default:
923			/* we got a good reply from the nameserver: it is up. */
924			if (req->handle == req->ns->probe_request) {
925				/* Avoid double-free */
926				req->ns->probe_request = NULL;
927			}
928
929			nameserver_up(req->ns);
930		}
931
932		if (req->handle->search_state &&
933		    req->request_type != TYPE_PTR) {
934			/* if we have a list of domains to search in,
935			 * try the next one */
936			if (!search_try_next(req->handle)) {
937				/* a new request was issued so this
938				 * request is finished and */
939				/* the user callback will be made when
940				 * that request (or a */
941				/* child of it) finishes. */
942				return;
943			}
944		}
945
946		/* all else failed. Pass the failure up */
947		reply_schedule_callback(req, ttl, error, NULL);
948		request_finished(req, &REQ_HEAD(req->base, req->trans_id), 1);
949	} else {
950		/* all ok, tell the user */
951		reply_schedule_callback(req, ttl, 0, reply);
952		if (req->handle == req->ns->probe_request)
953			req->ns->probe_request = NULL; /* Avoid double-free */
954		nameserver_up(req->ns);
955		request_finished(req, &REQ_HEAD(req->base, req->trans_id), 1);
956	}
957}
958
959static int
960name_parse(u8 *packet, int length, int *idx, char *name_out, int name_out_len) {
961	int name_end = -1;
962	int j = *idx;
963	int ptr_count = 0;
964#define GET32(x) do { if (j + 4 > length) goto err; memcpy(&t32_, packet + j, 4); j += 4; x = ntohl(t32_); } while (0)
965#define GET16(x) do { if (j + 2 > length) goto err; memcpy(&t_, packet + j, 2); j += 2; x = ntohs(t_); } while (0)
966#define GET8(x) do { if (j >= length) goto err; x = packet[j++]; } while (0)
967
968	char *cp = name_out;
969	const char *const end = name_out + name_out_len;
970
971	/* Normally, names are a series of length prefixed strings terminated */
972	/* with a length of 0 (the lengths are u8's < 63). */
973	/* However, the length can start with a pair of 1 bits and that */
974	/* means that the next 14 bits are a pointer within the current */
975	/* packet. */
976
977	for (;;) {
978		u8 label_len;
979		GET8(label_len);
980		if (!label_len) break;
981		if (label_len & 0xc0) {
982			u8 ptr_low;
983			GET8(ptr_low);
984			if (name_end < 0) name_end = j;
985			j = (((int)label_len & 0x3f) << 8) + ptr_low;
986			/* Make sure that the target offset is in-bounds. */
987			if (j < 0 || j >= length) return -1;
988			/* If we've jumped more times than there are characters in the
989			 * message, we must have a loop. */
990			if (++ptr_count > length) return -1;
991			continue;
992		}
993		if (label_len > 63) return -1;
994		if (cp != name_out) {
995			if (cp + 1 >= end) return -1;
996			*cp++ = '.';
997		}
998		if (cp + label_len >= end) return -1;
999		if (j + label_len > length) return -1;
1000		memcpy(cp, packet + j, label_len);
1001		cp += label_len;
1002		j += label_len;
1003	}
1004	if (cp >= end) return -1;
1005	*cp = '\0';
1006	if (name_end < 0)
1007		*idx = j;
1008	else
1009		*idx = name_end;
1010	return 0;
1011 err:
1012	return -1;
1013}
1014
1015/* parses a raw request from a nameserver */
1016static int
1017reply_parse(struct evdns_base *base, u8 *packet, int length) {
1018	int j = 0, k = 0;  /* index into packet */
1019	u16 t_;	 /* used by the macros */
1020	u32 t32_;  /* used by the macros */
1021	char tmp_name[256], cmp_name[256]; /* used by the macros */
1022	int name_matches = 0;
1023
1024	u16 trans_id, questions, answers, authority, additional, datalength;
1025	u16 flags = 0;
1026	u32 ttl, ttl_r = 0xffffffff;
1027	struct reply reply;
1028	struct request *req = NULL;
1029	unsigned int i;
1030
1031	ASSERT_LOCKED(base);
1032
1033	GET16(trans_id);
1034	GET16(flags);
1035	GET16(questions);
1036	GET16(answers);
1037	GET16(authority);
1038	GET16(additional);
1039	(void) authority; /* suppress "unused variable" warnings. */
1040	(void) additional; /* suppress "unused variable" warnings. */
1041
1042	req = request_find_from_trans_id(base, trans_id);
1043	if (!req) return -1;
1044	EVUTIL_ASSERT(req->base == base);
1045
1046	memset(&reply, 0, sizeof(reply));
1047
1048	/* If it's not an answer, it doesn't correspond to any request. */
1049	if (!(flags & 0x8000)) return -1;  /* must be an answer */
1050	if ((flags & 0x020f) && (flags & 0x020f) != DNS_ERR_NOTEXIST) {
1051		/* there was an error and it's not NXDOMAIN */
1052		goto err;
1053	}
1054	/* if (!answers) return; */  /* must have an answer of some form */
1055
1056	/* This macro skips a name in the DNS reply. */
1057#define SKIP_NAME						\
1058	do { tmp_name[0] = '\0';				\
1059		if (name_parse(packet, length, &j, tmp_name,	\
1060			sizeof(tmp_name))<0)			\
1061			goto err;				\
1062	} while (0)
1063
1064	reply.type = req->request_type;
1065
1066	/* skip over each question in the reply */
1067	for (i = 0; i < questions; ++i) {
1068		/* the question looks like
1069		 *   <label:name><u16:type><u16:class>
1070		 */
1071		tmp_name[0] = '\0';
1072		cmp_name[0] = '\0';
1073		k = j;
1074		if (name_parse(packet, length, &j, tmp_name, sizeof(tmp_name)) < 0)
1075			goto err;
1076		if (name_parse(req->request, req->request_len, &k,
1077			cmp_name, sizeof(cmp_name))<0)
1078			goto err;
1079		if (!base->global_randomize_case) {
1080			if (strcmp(tmp_name, cmp_name) == 0)
1081				name_matches = 1;
1082		} else {
1083			if (evutil_ascii_strcasecmp(tmp_name, cmp_name) == 0)
1084				name_matches = 1;
1085		}
1086
1087		j += 4;
1088		if (j > length)
1089			goto err;
1090	}
1091
1092	if (!name_matches)
1093		goto err;
1094
1095	/* now we have the answer section which looks like
1096	 * <label:name><u16:type><u16:class><u32:ttl><u16:len><data...>
1097	 */
1098
1099	for (i = 0; i < answers; ++i) {
1100		u16 type, class;
1101
1102		SKIP_NAME;
1103		GET16(type);
1104		GET16(class);
1105		GET32(ttl);
1106		GET16(datalength);
1107
1108		if (type == TYPE_A && class == CLASS_INET) {
1109			int addrcount, addrtocopy;
1110			if (req->request_type != TYPE_A) {
1111				j += datalength; continue;
1112			}
1113			if ((datalength & 3) != 0) /* not an even number of As. */
1114			    goto err;
1115			addrcount = datalength >> 2;
1116			addrtocopy = MIN(MAX_V4_ADDRS - reply.data.a.addrcount, (unsigned)addrcount);
1117
1118			ttl_r = MIN(ttl_r, ttl);
1119			/* we only bother with the first four addresses. */
1120			if (j + 4*addrtocopy > length) goto err;
1121			memcpy(&reply.data.a.addresses[reply.data.a.addrcount],
1122				   packet + j, 4*addrtocopy);
1123			j += 4*addrtocopy;
1124			reply.data.a.addrcount += addrtocopy;
1125			reply.have_answer = 1;
1126			if (reply.data.a.addrcount == MAX_V4_ADDRS) break;
1127		} else if (type == TYPE_PTR && class == CLASS_INET) {
1128			if (req->request_type != TYPE_PTR) {
1129				j += datalength; continue;
1130			}
1131			if (name_parse(packet, length, &j, reply.data.ptr.name,
1132						   sizeof(reply.data.ptr.name))<0)
1133				goto err;
1134			ttl_r = MIN(ttl_r, ttl);
1135			reply.have_answer = 1;
1136			break;
1137		} else if (type == TYPE_CNAME) {
1138			char cname[HOST_NAME_MAX];
1139			if (!req->put_cname_in_ptr || *req->put_cname_in_ptr) {
1140				j += datalength; continue;
1141			}
1142			if (name_parse(packet, length, &j, cname,
1143				sizeof(cname))<0)
1144				goto err;
1145			*req->put_cname_in_ptr = mm_strdup(cname);
1146		} else if (type == TYPE_AAAA && class == CLASS_INET) {
1147			int addrcount, addrtocopy;
1148			if (req->request_type != TYPE_AAAA) {
1149				j += datalength; continue;
1150			}
1151			if ((datalength & 15) != 0) /* not an even number of AAAAs. */
1152				goto err;
1153			addrcount = datalength >> 4;  /* each address is 16 bytes long */
1154			addrtocopy = MIN(MAX_V6_ADDRS - reply.data.aaaa.addrcount, (unsigned)addrcount);
1155			ttl_r = MIN(ttl_r, ttl);
1156
1157			/* we only bother with the first four addresses. */
1158			if (j + 16*addrtocopy > length) goto err;
1159			memcpy(&reply.data.aaaa.addresses[reply.data.aaaa.addrcount],
1160				   packet + j, 16*addrtocopy);
1161			reply.data.aaaa.addrcount += addrtocopy;
1162			j += 16*addrtocopy;
1163			reply.have_answer = 1;
1164			if (reply.data.aaaa.addrcount == MAX_V6_ADDRS) break;
1165		} else {
1166			/* skip over any other type of resource */
1167			j += datalength;
1168		}
1169	}
1170
1171	if (!reply.have_answer) {
1172		for (i = 0; i < authority; ++i) {
1173			u16 type, class;
1174			SKIP_NAME;
1175			GET16(type);
1176			GET16(class);
1177			GET32(ttl);
1178			GET16(datalength);
1179			if (type == TYPE_SOA && class == CLASS_INET) {
1180				u32 serial, refresh, retry, expire, minimum;
1181				SKIP_NAME;
1182				SKIP_NAME;
1183				GET32(serial);
1184				GET32(refresh);
1185				GET32(retry);
1186				GET32(expire);
1187				GET32(minimum);
1188				(void)expire;
1189				(void)retry;
1190				(void)refresh;
1191				(void)serial;
1192				ttl_r = MIN(ttl_r, ttl);
1193				ttl_r = MIN(ttl_r, minimum);
1194			} else {
1195				/* skip over any other type of resource */
1196				j += datalength;
1197			}
1198		}
1199	}
1200
1201	if (ttl_r == 0xffffffff)
1202		ttl_r = 0;
1203
1204	reply_handle(req, flags, ttl_r, &reply);
1205	return 0;
1206 err:
1207	if (req)
1208		reply_handle(req, flags, 0, NULL);
1209	return -1;
1210}
1211
1212/* Parse a raw request (packet,length) sent to a nameserver port (port) from */
1213/* a DNS client (addr,addrlen), and if it's well-formed, call the corresponding */
1214/* callback. */
1215static int
1216request_parse(u8 *packet, int length, struct evdns_server_port *port, struct sockaddr *addr, ev_socklen_t addrlen)
1217{
1218	int j = 0;	/* index into packet */
1219	u16 t_;	 /* used by the macros */
1220	char tmp_name[256]; /* used by the macros */
1221
1222	int i;
1223	u16 trans_id, flags, questions, answers, authority, additional;
1224	struct server_request *server_req = NULL;
1225
1226	ASSERT_LOCKED(port);
1227
1228	/* Get the header fields */
1229	GET16(trans_id);
1230	GET16(flags);
1231	GET16(questions);
1232	GET16(answers);
1233	GET16(authority);
1234	GET16(additional);
1235	(void)answers;
1236	(void)additional;
1237	(void)authority;
1238
1239	if (flags & 0x8000) return -1; /* Must not be an answer. */
1240	flags &= 0x0110; /* Only RD and CD get preserved. */
1241
1242	server_req = mm_malloc(sizeof(struct server_request));
1243	if (server_req == NULL) return -1;
1244	memset(server_req, 0, sizeof(struct server_request));
1245
1246	server_req->trans_id = trans_id;
1247	memcpy(&server_req->addr, addr, addrlen);
1248	server_req->addrlen = addrlen;
1249
1250	server_req->base.flags = flags;
1251	server_req->base.nquestions = 0;
1252	server_req->base.questions = mm_calloc(sizeof(struct evdns_server_question *), questions);
1253	if (server_req->base.questions == NULL)
1254		goto err;
1255
1256	for (i = 0; i < questions; ++i) {
1257		u16 type, class;
1258		struct evdns_server_question *q;
1259		int namelen;
1260		if (name_parse(packet, length, &j, tmp_name, sizeof(tmp_name))<0)
1261			goto err;
1262		GET16(type);
1263		GET16(class);
1264		namelen = (int)strlen(tmp_name);
1265		q = mm_malloc(sizeof(struct evdns_server_question) + namelen);
1266		if (!q)
1267			goto err;
1268		q->type = type;
1269		q->dns_question_class = class;
1270		memcpy(q->name, tmp_name, namelen+1);
1271		server_req->base.questions[server_req->base.nquestions++] = q;
1272	}
1273
1274	/* Ignore answers, authority, and additional. */
1275
1276	server_req->port = port;
1277	port->refcnt++;
1278
1279	/* Only standard queries are supported. */
1280	if (flags & 0x7800) {
1281		evdns_server_request_respond(&(server_req->base), DNS_ERR_NOTIMPL);
1282		return -1;
1283	}
1284
1285	port->user_callback(&(server_req->base), port->user_data);
1286
1287	return 0;
1288err:
1289	if (server_req) {
1290		if (server_req->base.questions) {
1291			for (i = 0; i < server_req->base.nquestions; ++i)
1292				mm_free(server_req->base.questions[i]);
1293			mm_free(server_req->base.questions);
1294		}
1295		mm_free(server_req);
1296	}
1297	return -1;
1298
1299#undef SKIP_NAME
1300#undef GET32
1301#undef GET16
1302#undef GET8
1303}
1304
1305
1306void
1307evdns_set_transaction_id_fn(ev_uint16_t (*fn)(void))
1308{
1309}
1310
1311void
1312evdns_set_random_bytes_fn(void (*fn)(char *, size_t))
1313{
1314}
1315
1316/* Try to choose a strong transaction id which isn't already in flight */
1317static u16
1318transaction_id_pick(struct evdns_base *base) {
1319	ASSERT_LOCKED(base);
1320	for (;;) {
1321		u16 trans_id;
1322		evutil_secure_rng_get_bytes(&trans_id, sizeof(trans_id));
1323
1324		if (trans_id == 0xffff) continue;
1325		/* now check to see if that id is already inflight */
1326		if (request_find_from_trans_id(base, trans_id) == NULL)
1327			return trans_id;
1328	}
1329}
1330
1331/* choose a namesever to use. This function will try to ignore */
1332/* nameservers which we think are down and load balance across the rest */
1333/* by updating the server_head global each time. */
1334static struct nameserver *
1335nameserver_pick(struct evdns_base *base) {
1336	struct nameserver *started_at = base->server_head, *picked;
1337	ASSERT_LOCKED(base);
1338	if (!base->server_head) return NULL;
1339
1340	/* if we don't have any good nameservers then there's no */
1341	/* point in trying to find one. */
1342	if (!base->global_good_nameservers) {
1343		base->server_head = base->server_head->next;
1344		return base->server_head;
1345	}
1346
1347	/* remember that nameservers are in a circular list */
1348	for (;;) {
1349		if (base->server_head->state) {
1350			/* we think this server is currently good */
1351			picked = base->server_head;
1352			base->server_head = base->server_head->next;
1353			return picked;
1354		}
1355
1356		base->server_head = base->server_head->next;
1357		if (base->server_head == started_at) {
1358			/* all the nameservers seem to be down */
1359			/* so we just return this one and hope for the */
1360			/* best */
1361			EVUTIL_ASSERT(base->global_good_nameservers == 0);
1362			picked = base->server_head;
1363			base->server_head = base->server_head->next;
1364			return picked;
1365		}
1366	}
1367}
1368
1369/* this is called when a namesever socket is ready for reading */
1370static void
1371nameserver_read(struct nameserver *ns) {
1372	struct sockaddr_storage ss;
1373	ev_socklen_t addrlen = sizeof(ss);
1374	u8 packet[1500];
1375	char addrbuf[128];
1376	ASSERT_LOCKED(ns->base);
1377
1378	for (;;) {
1379		const int r = recvfrom(ns->socket, (void*)packet,
1380		    sizeof(packet), 0,
1381		    (struct sockaddr*)&ss, &addrlen);
1382		if (r < 0) {
1383			int err = evutil_socket_geterror(ns->socket);
1384			if (EVUTIL_ERR_RW_RETRIABLE(err))
1385				return;
1386			nameserver_failed(ns,
1387			    evutil_socket_error_to_string(err));
1388			return;
1389		}
1390		if (evutil_sockaddr_cmp((struct sockaddr*)&ss,
1391			(struct sockaddr*)&ns->address, 0)) {
1392			log(EVDNS_LOG_WARN, "Address mismatch on received "
1393			    "DNS packet.  Apparent source was %s",
1394			    evutil_format_sockaddr_port_(
1395				    (struct sockaddr *)&ss,
1396				    addrbuf, sizeof(addrbuf)));
1397			return;
1398		}
1399
1400		ns->timedout = 0;
1401		reply_parse(ns->base, packet, r);
1402	}
1403}
1404
1405/* Read a packet from a DNS client on a server port s, parse it, and */
1406/* act accordingly. */
1407static void
1408server_port_read(struct evdns_server_port *s) {
1409	u8 packet[1500];
1410	struct sockaddr_storage addr;
1411	ev_socklen_t addrlen;
1412	int r;
1413	ASSERT_LOCKED(s);
1414
1415	for (;;) {
1416		addrlen = sizeof(struct sockaddr_storage);
1417		r = recvfrom(s->socket, (void*)packet, sizeof(packet), 0,
1418					 (struct sockaddr*) &addr, &addrlen);
1419		if (r < 0) {
1420			int err = evutil_socket_geterror(s->socket);
1421			if (EVUTIL_ERR_RW_RETRIABLE(err))
1422				return;
1423			log(EVDNS_LOG_WARN,
1424			    "Error %s (%d) while reading request.",
1425			    evutil_socket_error_to_string(err), err);
1426			return;
1427		}
1428		request_parse(packet, r, s, (struct sockaddr*) &addr, addrlen);
1429	}
1430}
1431
1432/* Try to write all pending replies on a given DNS server port. */
1433static void
1434server_port_flush(struct evdns_server_port *port)
1435{
1436	struct server_request *req = port->pending_replies;
1437	ASSERT_LOCKED(port);
1438	while (req) {
1439		int r = sendto(port->socket, req->response, (int)req->response_len, 0,
1440			   (struct sockaddr*) &req->addr, (ev_socklen_t)req->addrlen);
1441		if (r < 0) {
1442			int err = evutil_socket_geterror(port->socket);
1443			if (EVUTIL_ERR_RW_RETRIABLE(err))
1444				return;
1445			log(EVDNS_LOG_WARN, "Error %s (%d) while writing response to port; dropping", evutil_socket_error_to_string(err), err);
1446		}
1447		if (server_request_free(req)) {
1448			/* we released the last reference to req->port. */
1449			return;
1450		} else {
1451			EVUTIL_ASSERT(req != port->pending_replies);
1452			req = port->pending_replies;
1453		}
1454	}
1455
1456	/* We have no more pending requests; stop listening for 'writeable' events. */
1457	(void) event_del(&port->event);
1458	event_assign(&port->event, port->event_base,
1459				 port->socket, EV_READ | EV_PERSIST,
1460				 server_port_ready_callback, port);
1461
1462	if (event_add(&port->event, NULL) < 0) {
1463		log(EVDNS_LOG_WARN, "Error from libevent when adding event for DNS server.");
1464		/* ???? Do more? */
1465	}
1466}
1467
1468/* set if we are waiting for the ability to write to this server. */
1469/* if waiting is true then we ask libevent for EV_WRITE events, otherwise */
1470/* we stop these events. */
1471static void
1472nameserver_write_waiting(struct nameserver *ns, char waiting) {
1473	ASSERT_LOCKED(ns->base);
1474	if (ns->write_waiting == waiting) return;
1475
1476	ns->write_waiting = waiting;
1477	(void) event_del(&ns->event);
1478	event_assign(&ns->event, ns->base->event_base,
1479	    ns->socket, EV_READ | (waiting ? EV_WRITE : 0) | EV_PERSIST,
1480	    nameserver_ready_callback, ns);
1481	if (event_add(&ns->event, NULL) < 0) {
1482		char addrbuf[128];
1483		log(EVDNS_LOG_WARN, "Error from libevent when adding event for %s",
1484		    evutil_format_sockaddr_port_(
1485			    (struct sockaddr *)&ns->address,
1486			    addrbuf, sizeof(addrbuf)));
1487		/* ???? Do more? */
1488	}
1489}
1490
1491/* a callback function. Called by libevent when the kernel says that */
1492/* a nameserver socket is ready for writing or reading */
1493static void
1494nameserver_ready_callback(evutil_socket_t fd, short events, void *arg) {
1495	struct nameserver *ns = (struct nameserver *) arg;
1496	(void)fd;
1497
1498	EVDNS_LOCK(ns->base);
1499	if (events & EV_WRITE) {
1500		ns->choked = 0;
1501		if (!evdns_transmit(ns->base)) {
1502			nameserver_write_waiting(ns, 0);
1503		}
1504	}
1505	if (events & EV_READ) {
1506		nameserver_read(ns);
1507	}
1508	EVDNS_UNLOCK(ns->base);
1509}
1510
1511/* a callback function. Called by libevent when the kernel says that */
1512/* a server socket is ready for writing or reading. */
1513static void
1514server_port_ready_callback(evutil_socket_t fd, short events, void *arg) {
1515	struct evdns_server_port *port = (struct evdns_server_port *) arg;
1516	(void) fd;
1517
1518	EVDNS_LOCK(port);
1519	if (events & EV_WRITE) {
1520		port->choked = 0;
1521		server_port_flush(port);
1522	}
1523	if (events & EV_READ) {
1524		server_port_read(port);
1525	}
1526	EVDNS_UNLOCK(port);
1527}
1528
1529/* This is an inefficient representation; only use it via the dnslabel_table_*
1530 * functions, so that is can be safely replaced with something smarter later. */
1531#define MAX_LABELS 128
1532/* Structures used to implement name compression */
1533struct dnslabel_entry { char *v; off_t pos; };
1534struct dnslabel_table {
1535	int n_labels; /* number of current entries */
1536	/* map from name to position in message */
1537	struct dnslabel_entry labels[MAX_LABELS];
1538};
1539
1540/* Initialize dnslabel_table. */
1541static void
1542dnslabel_table_init(struct dnslabel_table *table)
1543{
1544	table->n_labels = 0;
1545}
1546
1547/* Free all storage held by table, but not the table itself. */
1548static void
1549dnslabel_clear(struct dnslabel_table *table)
1550{
1551	int i;
1552	for (i = 0; i < table->n_labels; ++i)
1553		mm_free(table->labels[i].v);
1554	table->n_labels = 0;
1555}
1556
1557/* return the position of the label in the current message, or -1 if the label */
1558/* hasn't been used yet. */
1559static int
1560dnslabel_table_get_pos(const struct dnslabel_table *table, const char *label)
1561{
1562	int i;
1563	for (i = 0; i < table->n_labels; ++i) {
1564		if (!strcmp(label, table->labels[i].v))
1565			return table->labels[i].pos;
1566	}
1567	return -1;
1568}
1569
1570/* remember that we've used the label at position pos */
1571static int
1572dnslabel_table_add(struct dnslabel_table *table, const char *label, off_t pos)
1573{
1574	char *v;
1575	int p;
1576	if (table->n_labels == MAX_LABELS)
1577		return (-1);
1578	v = mm_strdup(label);
1579	if (v == NULL)
1580		return (-1);
1581	p = table->n_labels++;
1582	table->labels[p].v = v;
1583	table->labels[p].pos = pos;
1584
1585	return (0);
1586}
1587
1588/* Converts a string to a length-prefixed set of DNS labels, starting */
1589/* at buf[j]. name and buf must not overlap. name_len should be the length */
1590/* of name.	 table is optional, and is used for compression. */
1591/* */
1592/* Input: abc.def */
1593/* Output: <3>abc<3>def<0> */
1594/* */
1595/* Returns the first index after the encoded name, or negative on error. */
1596/*	 -1	 label was > 63 bytes */
1597/*	 -2	 name too long to fit in buffer. */
1598/* */
1599static off_t
1600dnsname_to_labels(u8 *const buf, size_t buf_len, off_t j,
1601				  const char *name, const size_t name_len,
1602				  struct dnslabel_table *table) {
1603	const char *end = name + name_len;
1604	int ref = 0;
1605	u16 t_;
1606
1607#define APPEND16(x) do {						\
1608		if (j + 2 > (off_t)buf_len)				\
1609			goto overflow;					\
1610		t_ = htons(x);						\
1611		memcpy(buf + j, &t_, 2);				\
1612		j += 2;							\
1613	} while (0)
1614#define APPEND32(x) do {						\
1615		if (j + 4 > (off_t)buf_len)				\
1616			goto overflow;					\
1617		t32_ = htonl(x);					\
1618		memcpy(buf + j, &t32_, 4);				\
1619		j += 4;							\
1620	} while (0)
1621
1622	if (name_len > 255) return -2;
1623
1624	for (;;) {
1625		const char *const start = name;
1626		if (table && (ref = dnslabel_table_get_pos(table, name)) >= 0) {
1627			APPEND16(ref | 0xc000);
1628			return j;
1629		}
1630		name = strchr(name, '.');
1631		if (!name) {
1632			const size_t label_len = end - start;
1633			if (label_len > 63) return -1;
1634			if ((size_t)(j+label_len+1) > buf_len) return -2;
1635			if (table) dnslabel_table_add(table, start, j);
1636			buf[j++] = (ev_uint8_t)label_len;
1637
1638			memcpy(buf + j, start, label_len);
1639			j += (int) label_len;
1640			break;
1641		} else {
1642			/* append length of the label. */
1643			const size_t label_len = name - start;
1644			if (label_len > 63) return -1;
1645			if ((size_t)(j+label_len+1) > buf_len) return -2;
1646			if (table) dnslabel_table_add(table, start, j);
1647			buf[j++] = (ev_uint8_t)label_len;
1648
1649			memcpy(buf + j, start, label_len);
1650			j += (int) label_len;
1651			/* hop over the '.' */
1652			name++;
1653		}
1654	}
1655
1656	/* the labels must be terminated by a 0. */
1657	/* It's possible that the name ended in a . */
1658	/* in which case the zero is already there */
1659	if (!j || buf[j-1]) buf[j++] = 0;
1660	return j;
1661 overflow:
1662	return (-2);
1663}
1664
1665/* Finds the length of a dns request for a DNS name of the given */
1666/* length. The actual request may be smaller than the value returned */
1667/* here */
1668static size_t
1669evdns_request_len(const size_t name_len) {
1670	return 96 + /* length of the DNS standard header */
1671		name_len + 2 +
1672		4;  /* space for the resource type */
1673}
1674
1675/* build a dns request packet into buf. buf should be at least as long */
1676/* as evdns_request_len told you it should be. */
1677/* */
1678/* Returns the amount of space used. Negative on error. */
1679static int
1680evdns_request_data_build(const char *const name, const size_t name_len,
1681    const u16 trans_id, const u16 type, const u16 class,
1682    u8 *const buf, size_t buf_len) {
1683	off_t j = 0;  /* current offset into buf */
1684	u16 t_;	 /* used by the macros */
1685
1686	APPEND16(trans_id);
1687	APPEND16(0x0100);  /* standard query, recusion needed */
1688	APPEND16(1);  /* one question */
1689	APPEND16(0);  /* no answers */
1690	APPEND16(0);  /* no authority */
1691	APPEND16(0);  /* no additional */
1692
1693	j = dnsname_to_labels(buf, buf_len, j, name, name_len, NULL);
1694	if (j < 0) {
1695		return (int)j;
1696	}
1697
1698	APPEND16(type);
1699	APPEND16(class);
1700
1701	return (int)j;
1702 overflow:
1703	return (-1);
1704}
1705
1706/* exported function */
1707struct evdns_server_port *
1708evdns_add_server_port_with_base(struct event_base *base, evutil_socket_t socket, int flags, evdns_request_callback_fn_type cb, void *user_data)
1709{
1710	struct evdns_server_port *port;
1711	if (flags)
1712		return NULL; /* flags not yet implemented */
1713	if (!(port = mm_malloc(sizeof(struct evdns_server_port))))
1714		return NULL;
1715	memset(port, 0, sizeof(struct evdns_server_port));
1716
1717
1718	port->socket = socket;
1719	port->refcnt = 1;
1720	port->choked = 0;
1721	port->closing = 0;
1722	port->user_callback = cb;
1723	port->user_data = user_data;
1724	port->pending_replies = NULL;
1725	port->event_base = base;
1726
1727	event_assign(&port->event, port->event_base,
1728				 port->socket, EV_READ | EV_PERSIST,
1729				 server_port_ready_callback, port);
1730	if (event_add(&port->event, NULL) < 0) {
1731		mm_free(port);
1732		return NULL;
1733	}
1734	EVTHREAD_ALLOC_LOCK(port->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
1735	return port;
1736}
1737
1738struct evdns_server_port *
1739evdns_add_server_port(evutil_socket_t socket, int flags, evdns_request_callback_fn_type cb, void *user_data)
1740{
1741	return evdns_add_server_port_with_base(NULL, socket, flags, cb, user_data);
1742}
1743
1744/* exported function */
1745void
1746evdns_close_server_port(struct evdns_server_port *port)
1747{
1748	EVDNS_LOCK(port);
1749	if (--port->refcnt == 0) {
1750		EVDNS_UNLOCK(port);
1751		server_port_free(port);
1752	} else {
1753		port->closing = 1;
1754	}
1755}
1756
1757/* exported function */
1758int
1759evdns_server_request_add_reply(struct evdns_server_request *req_, int section, const char *name, int type, int class, int ttl, int datalen, int is_name, const char *data)
1760{
1761	struct server_request *req = TO_SERVER_REQUEST(req_);
1762	struct server_reply_item **itemp, *item;
1763	int *countp;
1764	int result = -1;
1765
1766	EVDNS_LOCK(req->port);
1767	if (req->response) /* have we already answered? */
1768		goto done;
1769
1770	switch (section) {
1771	case EVDNS_ANSWER_SECTION:
1772		itemp = &req->answer;
1773		countp = &req->n_answer;
1774		break;
1775	case EVDNS_AUTHORITY_SECTION:
1776		itemp = &req->authority;
1777		countp = &req->n_authority;
1778		break;
1779	case EVDNS_ADDITIONAL_SECTION:
1780		itemp = &req->additional;
1781		countp = &req->n_additional;
1782		break;
1783	default:
1784		goto done;
1785	}
1786	while (*itemp) {
1787		itemp = &((*itemp)->next);
1788	}
1789	item = mm_malloc(sizeof(struct server_reply_item));
1790	if (!item)
1791		goto done;
1792	item->next = NULL;
1793	if (!(item->name = mm_strdup(name))) {
1794		mm_free(item);
1795		goto done;
1796	}
1797	item->type = type;
1798	item->dns_question_class = class;
1799	item->ttl = ttl;
1800	item->is_name = is_name != 0;
1801	item->datalen = 0;
1802	item->data = NULL;
1803	if (data) {
1804		if (item->is_name) {
1805			if (!(item->data = mm_strdup(data))) {
1806				mm_free(item->name);
1807				mm_free(item);
1808				goto done;
1809			}
1810			item->datalen = (u16)-1;
1811		} else {
1812			if (!(item->data = mm_malloc(datalen))) {
1813				mm_free(item->name);
1814				mm_free(item);
1815				goto done;
1816			}
1817			item->datalen = datalen;
1818			memcpy(item->data, data, datalen);
1819		}
1820	}
1821
1822	*itemp = item;
1823	++(*countp);
1824	result = 0;
1825done:
1826	EVDNS_UNLOCK(req->port);
1827	return result;
1828}
1829
1830/* exported function */
1831int
1832evdns_server_request_add_a_reply(struct evdns_server_request *req, const char *name, int n, const void *addrs, int ttl)
1833{
1834	return evdns_server_request_add_reply(
1835		  req, EVDNS_ANSWER_SECTION, name, TYPE_A, CLASS_INET,
1836		  ttl, n*4, 0, addrs);
1837}
1838
1839/* exported function */
1840int
1841evdns_server_request_add_aaaa_reply(struct evdns_server_request *req, const char *name, int n, const void *addrs, int ttl)
1842{
1843	return evdns_server_request_add_reply(
1844		  req, EVDNS_ANSWER_SECTION, name, TYPE_AAAA, CLASS_INET,
1845		  ttl, n*16, 0, addrs);
1846}
1847
1848/* exported function */
1849int
1850evdns_server_request_add_ptr_reply(struct evdns_server_request *req, struct in_addr *in, const char *inaddr_name, const char *hostname, int ttl)
1851{
1852	u32 a;
1853	char buf[32];
1854	if (in && inaddr_name)
1855		return -1;
1856	else if (!in && !inaddr_name)
1857		return -1;
1858	if (in) {
1859		a = ntohl(in->s_addr);
1860		evutil_snprintf(buf, sizeof(buf), "%d.%d.%d.%d.in-addr.arpa",
1861				(int)(u8)((a	)&0xff),
1862				(int)(u8)((a>>8 )&0xff),
1863				(int)(u8)((a>>16)&0xff),
1864				(int)(u8)((a>>24)&0xff));
1865		inaddr_name = buf;
1866	}
1867	return evdns_server_request_add_reply(
1868		  req, EVDNS_ANSWER_SECTION, inaddr_name, TYPE_PTR, CLASS_INET,
1869		  ttl, -1, 1, hostname);
1870}
1871
1872/* exported function */
1873int
1874evdns_server_request_add_cname_reply(struct evdns_server_request *req, const char *name, const char *cname, int ttl)
1875{
1876	return evdns_server_request_add_reply(
1877		  req, EVDNS_ANSWER_SECTION, name, TYPE_CNAME, CLASS_INET,
1878		  ttl, -1, 1, cname);
1879}
1880
1881/* exported function */
1882void
1883evdns_server_request_set_flags(struct evdns_server_request *exreq, int flags)
1884{
1885	struct server_request *req = TO_SERVER_REQUEST(exreq);
1886	req->base.flags &= ~(EVDNS_FLAGS_AA|EVDNS_FLAGS_RD);
1887	req->base.flags |= flags;
1888}
1889
1890static int
1891evdns_server_request_format_response(struct server_request *req, int err)
1892{
1893	unsigned char buf[1500];
1894	size_t buf_len = sizeof(buf);
1895	off_t j = 0, r;
1896	u16 t_;
1897	u32 t32_;
1898	int i;
1899	u16 flags;
1900	struct dnslabel_table table;
1901
1902	if (err < 0 || err > 15) return -1;
1903
1904	/* Set response bit and error code; copy OPCODE and RD fields from
1905	 * question; copy RA and AA if set by caller. */
1906	flags = req->base.flags;
1907	flags |= (0x8000 | err);
1908
1909	dnslabel_table_init(&table);
1910	APPEND16(req->trans_id);
1911	APPEND16(flags);
1912	APPEND16(req->base.nquestions);
1913	APPEND16(req->n_answer);
1914	APPEND16(req->n_authority);
1915	APPEND16(req->n_additional);
1916
1917	/* Add questions. */
1918	for (i=0; i < req->base.nquestions; ++i) {
1919		const char *s = req->base.questions[i]->name;
1920		j = dnsname_to_labels(buf, buf_len, j, s, strlen(s), &table);
1921		if (j < 0) {
1922			dnslabel_clear(&table);
1923			return (int) j;
1924		}
1925		APPEND16(req->base.questions[i]->type);
1926		APPEND16(req->base.questions[i]->dns_question_class);
1927	}
1928
1929	/* Add answer, authority, and additional sections. */
1930	for (i=0; i<3; ++i) {
1931		struct server_reply_item *item;
1932		if (i==0)
1933			item = req->answer;
1934		else if (i==1)
1935			item = req->authority;
1936		else
1937			item = req->additional;
1938		while (item) {
1939			r = dnsname_to_labels(buf, buf_len, j, item->name, strlen(item->name), &table);
1940			if (r < 0)
1941				goto overflow;
1942			j = r;
1943
1944			APPEND16(item->type);
1945			APPEND16(item->dns_question_class);
1946			APPEND32(item->ttl);
1947			if (item->is_name) {
1948				off_t len_idx = j, name_start;
1949				j += 2;
1950				name_start = j;
1951				r = dnsname_to_labels(buf, buf_len, j, item->data, strlen(item->data), &table);
1952				if (r < 0)
1953					goto overflow;
1954				j = r;
1955				t_ = htons( (short) (j-name_start) );
1956				memcpy(buf+len_idx, &t_, 2);
1957			} else {
1958				APPEND16(item->datalen);
1959				if (j+item->datalen > (off_t)buf_len)
1960					goto overflow;
1961				memcpy(buf+j, item->data, item->datalen);
1962				j += item->datalen;
1963			}
1964			item = item->next;
1965		}
1966	}
1967
1968	if (j > 512) {
1969overflow:
1970		j = 512;
1971		buf[2] |= 0x02; /* set the truncated bit. */
1972	}
1973
1974	req->response_len = j;
1975
1976	if (!(req->response = mm_malloc(req->response_len))) {
1977		server_request_free_answers(req);
1978		dnslabel_clear(&table);
1979		return (-1);
1980	}
1981	memcpy(req->response, buf, req->response_len);
1982	server_request_free_answers(req);
1983	dnslabel_clear(&table);
1984	return (0);
1985}
1986
1987/* exported function */
1988int
1989evdns_server_request_respond(struct evdns_server_request *req_, int err)
1990{
1991	struct server_request *req = TO_SERVER_REQUEST(req_);
1992	struct evdns_server_port *port = req->port;
1993	int r = -1;
1994
1995	EVDNS_LOCK(port);
1996	if (!req->response) {
1997		if ((r = evdns_server_request_format_response(req, err))<0)
1998			goto done;
1999	}
2000
2001	r = sendto(port->socket, req->response, (int)req->response_len, 0,
2002			   (struct sockaddr*) &req->addr, (ev_socklen_t)req->addrlen);
2003	if (r<0) {
2004		int sock_err = evutil_socket_geterror(port->socket);
2005		if (EVUTIL_ERR_RW_RETRIABLE(sock_err))
2006			goto done;
2007
2008		if (port->pending_replies) {
2009			req->prev_pending = port->pending_replies->prev_pending;
2010			req->next_pending = port->pending_replies;
2011			req->prev_pending->next_pending =
2012				req->next_pending->prev_pending = req;
2013		} else {
2014			req->prev_pending = req->next_pending = req;
2015			port->pending_replies = req;
2016			port->choked = 1;
2017
2018			(void) event_del(&port->event);
2019			event_assign(&port->event, port->event_base, port->socket, (port->closing?0:EV_READ) | EV_WRITE | EV_PERSIST, server_port_ready_callback, port);
2020
2021			if (event_add(&port->event, NULL) < 0) {
2022				log(EVDNS_LOG_WARN, "Error from libevent when adding event for DNS server");
2023			}
2024
2025		}
2026
2027		r = 1;
2028		goto done;
2029	}
2030	if (server_request_free(req)) {
2031		r = 0;
2032		goto done;
2033	}
2034
2035	if (port->pending_replies)
2036		server_port_flush(port);
2037
2038	r = 0;
2039done:
2040	EVDNS_UNLOCK(port);
2041	return r;
2042}
2043
2044/* Free all storage held by RRs in req. */
2045static void
2046server_request_free_answers(struct server_request *req)
2047{
2048	struct server_reply_item *victim, *next, **list;
2049	int i;
2050	for (i = 0; i < 3; ++i) {
2051		if (i==0)
2052			list = &req->answer;
2053		else if (i==1)
2054			list = &req->authority;
2055		else
2056			list = &req->additional;
2057
2058		victim = *list;
2059		while (victim) {
2060			next = victim->next;
2061			mm_free(victim->name);
2062			if (victim->data)
2063				mm_free(victim->data);
2064			mm_free(victim);
2065			victim = next;
2066		}
2067		*list = NULL;
2068	}
2069}
2070
2071/* Free all storage held by req, and remove links to it. */
2072/* return true iff we just wound up freeing the server_port. */
2073static int
2074server_request_free(struct server_request *req)
2075{
2076	int i, rc=1, lock=0;
2077	if (req->base.questions) {
2078		for (i = 0; i < req->base.nquestions; ++i)
2079			mm_free(req->base.questions[i]);
2080		mm_free(req->base.questions);
2081	}
2082
2083	if (req->port) {
2084		EVDNS_LOCK(req->port);
2085		lock=1;
2086		if (req->port->pending_replies == req) {
2087			if (req->next_pending && req->next_pending != req)
2088				req->port->pending_replies = req->next_pending;
2089			else
2090				req->port->pending_replies = NULL;
2091		}
2092		rc = --req->port->refcnt;
2093	}
2094
2095	if (req->response) {
2096		mm_free(req->response);
2097	}
2098
2099	server_request_free_answers(req);
2100
2101	if (req->next_pending && req->next_pending != req) {
2102		req->next_pending->prev_pending = req->prev_pending;
2103		req->prev_pending->next_pending = req->next_pending;
2104	}
2105
2106	if (rc == 0) {
2107		EVDNS_UNLOCK(req->port); /* ????? nickm */
2108		server_port_free(req->port);
2109		mm_free(req);
2110		return (1);
2111	}
2112	if (lock)
2113		EVDNS_UNLOCK(req->port);
2114	mm_free(req);
2115	return (0);
2116}
2117
2118/* Free all storage held by an evdns_server_port.  Only called when  */
2119static void
2120server_port_free(struct evdns_server_port *port)
2121{
2122	EVUTIL_ASSERT(port);
2123	EVUTIL_ASSERT(!port->refcnt);
2124	EVUTIL_ASSERT(!port->pending_replies);
2125	if (port->socket > 0) {
2126		evutil_closesocket(port->socket);
2127		port->socket = -1;
2128	}
2129	(void) event_del(&port->event);
2130	event_debug_unassign(&port->event);
2131	EVTHREAD_FREE_LOCK(port->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
2132	mm_free(port);
2133}
2134
2135/* exported function */
2136int
2137evdns_server_request_drop(struct evdns_server_request *req_)
2138{
2139	struct server_request *req = TO_SERVER_REQUEST(req_);
2140	server_request_free(req);
2141	return 0;
2142}
2143
2144/* exported function */
2145int
2146evdns_server_request_get_requesting_addr(struct evdns_server_request *req_, struct sockaddr *sa, int addr_len)
2147{
2148	struct server_request *req = TO_SERVER_REQUEST(req_);
2149	if (addr_len < (int)req->addrlen)
2150		return -1;
2151	memcpy(sa, &(req->addr), req->addrlen);
2152	return req->addrlen;
2153}
2154
2155#undef APPEND16
2156#undef APPEND32
2157
2158/* this is a libevent callback function which is called when a request */
2159/* has timed out. */
2160static void
2161evdns_request_timeout_callback(evutil_socket_t fd, short events, void *arg) {
2162	struct request *const req = (struct request *) arg;
2163	struct evdns_base *base = req->base;
2164
2165	(void) fd;
2166	(void) events;
2167
2168	log(EVDNS_LOG_DEBUG, "Request %p timed out", arg);
2169	EVDNS_LOCK(base);
2170
2171	if (req->tx_count >= req->base->global_max_retransmits) {
2172		struct nameserver *ns = req->ns;
2173		/* this request has failed */
2174		log(EVDNS_LOG_DEBUG, "Giving up on request %p; tx_count==%d",
2175		    arg, req->tx_count);
2176		reply_schedule_callback(req, 0, DNS_ERR_TIMEOUT, NULL);
2177
2178		request_finished(req, &REQ_HEAD(req->base, req->trans_id), 1);
2179		nameserver_failed(ns, "request timed out.");
2180	} else {
2181		/* retransmit it */
2182		log(EVDNS_LOG_DEBUG, "Retransmitting request %p; tx_count==%d",
2183		    arg, req->tx_count);
2184		(void) evtimer_del(&req->timeout_event);
2185		request_swap_ns(req, nameserver_pick(base));
2186		evdns_request_transmit(req);
2187
2188		req->ns->timedout++;
2189		if (req->ns->timedout > req->base->global_max_nameserver_timeout) {
2190			req->ns->timedout = 0;
2191			nameserver_failed(req->ns, "request timed out.");
2192		}
2193	}
2194
2195	EVDNS_UNLOCK(base);
2196}
2197
2198/* try to send a request to a given server. */
2199/* */
2200/* return: */
2201/*   0 ok */
2202/*   1 temporary failure */
2203/*   2 other failure */
2204static int
2205evdns_request_transmit_to(struct request *req, struct nameserver *server) {
2206	int r;
2207	ASSERT_LOCKED(req->base);
2208	ASSERT_VALID_REQUEST(req);
2209
2210	if (server->requests_inflight == 1 &&
2211		req->base->disable_when_inactive &&
2212		event_add(&server->event, NULL) < 0) {
2213		return 1;
2214	}
2215
2216	r = sendto(server->socket, (void*)req->request, req->request_len, 0,
2217	    (struct sockaddr *)&server->address, server->addrlen);
2218	if (r < 0) {
2219		int err = evutil_socket_geterror(server->socket);
2220		if (EVUTIL_ERR_RW_RETRIABLE(err))
2221			return 1;
2222		nameserver_failed(req->ns, evutil_socket_error_to_string(err));
2223		return 2;
2224	} else if (r != (int)req->request_len) {
2225		return 1;  /* short write */
2226	} else {
2227		return 0;
2228	}
2229}
2230
2231/* try to send a request, updating the fields of the request */
2232/* as needed */
2233/* */
2234/* return: */
2235/*   0 ok */
2236/*   1 failed */
2237static int
2238evdns_request_transmit(struct request *req) {
2239	int retcode = 0, r;
2240
2241	ASSERT_LOCKED(req->base);
2242	ASSERT_VALID_REQUEST(req);
2243	/* if we fail to send this packet then this flag marks it */
2244	/* for evdns_transmit */
2245	req->transmit_me = 1;
2246	EVUTIL_ASSERT(req->trans_id != 0xffff);
2247
2248	if (!req->ns)
2249	{
2250		/* unable to transmit request if no nameservers */
2251		return 1;
2252	}
2253
2254	if (req->ns->choked) {
2255		/* don't bother trying to write to a socket */
2256		/* which we have had EAGAIN from */
2257		return 1;
2258	}
2259
2260	r = evdns_request_transmit_to(req, req->ns);
2261	switch (r) {
2262	case 1:
2263		/* temp failure */
2264		req->ns->choked = 1;
2265		nameserver_write_waiting(req->ns, 1);
2266		return 1;
2267	case 2:
2268		/* failed to transmit the request entirely. */
2269		retcode = 1;
2270		/* fall through: we'll set a timeout, which will time out,
2271		 * and make us retransmit the request anyway. */
2272	default:
2273		/* all ok */
2274		log(EVDNS_LOG_DEBUG,
2275		    "Setting timeout for request %p, sent to nameserver %p", req, req->ns);
2276		if (evtimer_add(&req->timeout_event, &req->base->global_timeout) < 0) {
2277			log(EVDNS_LOG_WARN,
2278		      "Error from libevent when adding timer for request %p",
2279			    req);
2280			/* ???? Do more? */
2281		}
2282		req->tx_count++;
2283		req->transmit_me = 0;
2284		return retcode;
2285	}
2286}
2287
2288static void
2289nameserver_probe_callback(int result, char type, int count, int ttl, void *addresses, void *arg) {
2290	struct nameserver *const ns = (struct nameserver *) arg;
2291	(void) type;
2292	(void) count;
2293	(void) ttl;
2294	(void) addresses;
2295
2296	if (result == DNS_ERR_CANCEL) {
2297		/* We canceled this request because the nameserver came up
2298		 * for some other reason.  Do not change our opinion about
2299		 * the nameserver. */
2300		return;
2301	}
2302
2303	EVDNS_LOCK(ns->base);
2304	ns->probe_request = NULL;
2305	if (result == DNS_ERR_NONE || result == DNS_ERR_NOTEXIST) {
2306		/* this is a good reply */
2307		nameserver_up(ns);
2308	} else {
2309		nameserver_probe_failed(ns);
2310	}
2311	EVDNS_UNLOCK(ns->base);
2312}
2313
2314static void
2315nameserver_send_probe(struct nameserver *const ns) {
2316	struct evdns_request *handle;
2317	struct request *req;
2318	char addrbuf[128];
2319	/* here we need to send a probe to a given nameserver */
2320	/* in the hope that it is up now. */
2321
2322	ASSERT_LOCKED(ns->base);
2323	log(EVDNS_LOG_DEBUG, "Sending probe to %s",
2324	    evutil_format_sockaddr_port_(
2325		    (struct sockaddr *)&ns->address,
2326		    addrbuf, sizeof(addrbuf)));
2327	handle = mm_calloc(1, sizeof(*handle));
2328	if (!handle) return;
2329	req = request_new(ns->base, handle, TYPE_A, "google.com", DNS_QUERY_NO_SEARCH, nameserver_probe_callback, ns);
2330	if (!req) {
2331		mm_free(handle);
2332		return;
2333	}
2334	ns->probe_request = handle;
2335	/* we force this into the inflight queue no matter what */
2336	request_trans_id_set(req, transaction_id_pick(ns->base));
2337	req->ns = ns;
2338	request_submit(req);
2339}
2340
2341/* returns: */
2342/*   0 didn't try to transmit anything */
2343/*   1 tried to transmit something */
2344static int
2345evdns_transmit(struct evdns_base *base) {
2346	char did_try_to_transmit = 0;
2347	int i;
2348
2349	ASSERT_LOCKED(base);
2350	for (i = 0; i < base->n_req_heads; ++i) {
2351		if (base->req_heads[i]) {
2352			struct request *const started_at = base->req_heads[i], *req = started_at;
2353			/* first transmit all the requests which are currently waiting */
2354			do {
2355				if (req->transmit_me) {
2356					did_try_to_transmit = 1;
2357					evdns_request_transmit(req);
2358				}
2359
2360				req = req->next;
2361			} while (req != started_at);
2362		}
2363	}
2364
2365	return did_try_to_transmit;
2366}
2367
2368/* exported function */
2369int
2370evdns_base_count_nameservers(struct evdns_base *base)
2371{
2372	const struct nameserver *server;
2373	int n = 0;
2374
2375	EVDNS_LOCK(base);
2376	server = base->server_head;
2377	if (!server)
2378		goto done;
2379	do {
2380		++n;
2381		server = server->next;
2382	} while (server != base->server_head);
2383done:
2384	EVDNS_UNLOCK(base);
2385	return n;
2386}
2387
2388int
2389evdns_count_nameservers(void)
2390{
2391	return evdns_base_count_nameservers(current_base);
2392}
2393
2394/* exported function */
2395int
2396evdns_base_clear_nameservers_and_suspend(struct evdns_base *base)
2397{
2398	struct nameserver *server, *started_at;
2399	int i;
2400
2401	EVDNS_LOCK(base);
2402	server = base->server_head;
2403	started_at = base->server_head;
2404	if (!server) {
2405		EVDNS_UNLOCK(base);
2406		return 0;
2407	}
2408	while (1) {
2409		struct nameserver *next = server->next;
2410		(void) event_del(&server->event);
2411		if (evtimer_initialized(&server->timeout_event))
2412			(void) evtimer_del(&server->timeout_event);
2413		if (server->probe_request) {
2414			evdns_cancel_request(server->base, server->probe_request);
2415			server->probe_request = NULL;
2416		}
2417		if (server->socket >= 0)
2418			evutil_closesocket(server->socket);
2419		mm_free(server);
2420		if (next == started_at)
2421			break;
2422		server = next;
2423	}
2424	base->server_head = NULL;
2425	base->global_good_nameservers = 0;
2426
2427	for (i = 0; i < base->n_req_heads; ++i) {
2428		struct request *req, *req_started_at;
2429		req = req_started_at = base->req_heads[i];
2430		while (req) {
2431			struct request *next = req->next;
2432			req->tx_count = req->reissue_count = 0;
2433			req->ns = NULL;
2434			/* ???? What to do about searches? */
2435			(void) evtimer_del(&req->timeout_event);
2436			req->trans_id = 0;
2437			req->transmit_me = 0;
2438
2439			base->global_requests_waiting++;
2440			evdns_request_insert(req, &base->req_waiting_head);
2441			/* We want to insert these suspended elements at the front of
2442			 * the waiting queue, since they were pending before any of
2443			 * the waiting entries were added.  This is a circular list,
2444			 * so we can just shift the start back by one.*/
2445			base->req_waiting_head = base->req_waiting_head->prev;
2446
2447			if (next == req_started_at)
2448				break;
2449			req = next;
2450		}
2451		base->req_heads[i] = NULL;
2452	}
2453
2454	base->global_requests_inflight = 0;
2455
2456	EVDNS_UNLOCK(base);
2457	return 0;
2458}
2459
2460int
2461evdns_clear_nameservers_and_suspend(void)
2462{
2463	return evdns_base_clear_nameservers_and_suspend(current_base);
2464}
2465
2466
2467/* exported function */
2468int
2469evdns_base_resume(struct evdns_base *base)
2470{
2471	EVDNS_LOCK(base);
2472	evdns_requests_pump_waiting_queue(base);
2473	EVDNS_UNLOCK(base);
2474
2475	return 0;
2476}
2477
2478int
2479evdns_resume(void)
2480{
2481	return evdns_base_resume(current_base);
2482}
2483
2484static int
2485evdns_nameserver_add_impl_(struct evdns_base *base, const struct sockaddr *address, int addrlen) {
2486	/* first check to see if we already have this nameserver */
2487
2488	const struct nameserver *server = base->server_head, *const started_at = base->server_head;
2489	struct nameserver *ns;
2490	int err = 0;
2491	char addrbuf[128];
2492
2493	ASSERT_LOCKED(base);
2494	if (server) {
2495		do {
2496			if (!evutil_sockaddr_cmp((struct sockaddr*)&server->address, address, 1)) return 3;
2497			server = server->next;
2498		} while (server != started_at);
2499	}
2500	if (addrlen > (int)sizeof(ns->address)) {
2501		log(EVDNS_LOG_DEBUG, "Addrlen %d too long.", (int)addrlen);
2502		return 2;
2503	}
2504
2505	ns = (struct nameserver *) mm_malloc(sizeof(struct nameserver));
2506	if (!ns) return -1;
2507
2508	memset(ns, 0, sizeof(struct nameserver));
2509	ns->base = base;
2510
2511	evtimer_assign(&ns->timeout_event, ns->base->event_base, nameserver_prod_callback, ns);
2512
2513	ns->socket = evutil_socket_(address->sa_family,
2514	    SOCK_DGRAM|EVUTIL_SOCK_NONBLOCK|EVUTIL_SOCK_CLOEXEC, 0);
2515	if (ns->socket < 0) { err = 1; goto out1; }
2516
2517	if (base->global_outgoing_addrlen &&
2518	    !evutil_sockaddr_is_loopback_(address)) {
2519		if (bind(ns->socket,
2520			(struct sockaddr*)&base->global_outgoing_address,
2521			base->global_outgoing_addrlen) < 0) {
2522			log(EVDNS_LOG_WARN,"Couldn't bind to outgoing address");
2523			err = 2;
2524			goto out2;
2525		}
2526	}
2527
2528	memcpy(&ns->address, address, addrlen);
2529	ns->addrlen = addrlen;
2530	ns->state = 1;
2531	event_assign(&ns->event, ns->base->event_base, ns->socket,
2532				 EV_READ | EV_PERSIST, nameserver_ready_callback, ns);
2533	if (!base->disable_when_inactive && event_add(&ns->event, NULL) < 0) {
2534		err = 2;
2535		goto out2;
2536	}
2537
2538	log(EVDNS_LOG_DEBUG, "Added nameserver %s as %p",
2539	    evutil_format_sockaddr_port_(address, addrbuf, sizeof(addrbuf)), ns);
2540
2541	/* insert this nameserver into the list of them */
2542	if (!base->server_head) {
2543		ns->next = ns->prev = ns;
2544		base->server_head = ns;
2545	} else {
2546		ns->next = base->server_head->next;
2547		ns->prev = base->server_head;
2548		base->server_head->next = ns;
2549		ns->next->prev = ns;
2550	}
2551
2552	base->global_good_nameservers++;
2553
2554	return 0;
2555
2556out2:
2557	evutil_closesocket(ns->socket);
2558out1:
2559	event_debug_unassign(&ns->event);
2560	mm_free(ns);
2561	log(EVDNS_LOG_WARN, "Unable to add nameserver %s: error %d",
2562	    evutil_format_sockaddr_port_(address, addrbuf, sizeof(addrbuf)), err);
2563	return err;
2564}
2565
2566/* exported function */
2567int
2568evdns_base_nameserver_add(struct evdns_base *base, unsigned long int address)
2569{
2570	struct sockaddr_in sin;
2571	int res;
2572	memset(&sin, 0, sizeof(sin));
2573	sin.sin_addr.s_addr = address;
2574	sin.sin_port = htons(53);
2575	sin.sin_family = AF_INET;
2576	EVDNS_LOCK(base);
2577	res = evdns_nameserver_add_impl_(base, (struct sockaddr*)&sin, sizeof(sin));
2578	EVDNS_UNLOCK(base);
2579	return res;
2580}
2581
2582int
2583evdns_nameserver_add(unsigned long int address) {
2584	if (!current_base)
2585		current_base = evdns_base_new(NULL, 0);
2586	return evdns_base_nameserver_add(current_base, address);
2587}
2588
2589static void
2590sockaddr_setport(struct sockaddr *sa, ev_uint16_t port)
2591{
2592	if (sa->sa_family == AF_INET) {
2593		((struct sockaddr_in *)sa)->sin_port = htons(port);
2594	} else if (sa->sa_family == AF_INET6) {
2595		((struct sockaddr_in6 *)sa)->sin6_port = htons(port);
2596	}
2597}
2598
2599static ev_uint16_t
2600sockaddr_getport(struct sockaddr *sa)
2601{
2602	if (sa->sa_family == AF_INET) {
2603		return ntohs(((struct sockaddr_in *)sa)->sin_port);
2604	} else if (sa->sa_family == AF_INET6) {
2605		return ntohs(((struct sockaddr_in6 *)sa)->sin6_port);
2606	} else {
2607		return 0;
2608	}
2609}
2610
2611/* exported function */
2612int
2613evdns_base_nameserver_ip_add(struct evdns_base *base, const char *ip_as_string) {
2614	struct sockaddr_storage ss;
2615	struct sockaddr *sa;
2616	int len = sizeof(ss);
2617	int res;
2618	if (evutil_parse_sockaddr_port(ip_as_string, (struct sockaddr *)&ss,
2619		&len)) {
2620		log(EVDNS_LOG_WARN, "Unable to parse nameserver address %s",
2621			ip_as_string);
2622		return 4;
2623	}
2624	sa = (struct sockaddr *) &ss;
2625	if (sockaddr_getport(sa) == 0)
2626		sockaddr_setport(sa, 53);
2627
2628	EVDNS_LOCK(base);
2629	res = evdns_nameserver_add_impl_(base, sa, len);
2630	EVDNS_UNLOCK(base);
2631	return res;
2632}
2633
2634int
2635evdns_nameserver_ip_add(const char *ip_as_string) {
2636	if (!current_base)
2637		current_base = evdns_base_new(NULL, 0);
2638	return evdns_base_nameserver_ip_add(current_base, ip_as_string);
2639}
2640
2641int
2642evdns_base_nameserver_sockaddr_add(struct evdns_base *base,
2643    const struct sockaddr *sa, ev_socklen_t len, unsigned flags)
2644{
2645	int res;
2646	EVUTIL_ASSERT(base);
2647	EVDNS_LOCK(base);
2648	res = evdns_nameserver_add_impl_(base, sa, len);
2649	EVDNS_UNLOCK(base);
2650	return res;
2651}
2652
2653int
2654evdns_base_get_nameserver_addr(struct evdns_base *base, int idx,
2655    struct sockaddr *sa, ev_socklen_t len)
2656{
2657	int result = -1;
2658	int i;
2659	struct nameserver *server;
2660	EVDNS_LOCK(base);
2661	server = base->server_head;
2662	for (i = 0; i < idx && server; ++i, server = server->next) {
2663		if (server->next == base->server_head)
2664			goto done;
2665	}
2666	if (! server)
2667		goto done;
2668
2669	if (server->addrlen > len) {
2670		result = (int) server->addrlen;
2671		goto done;
2672	}
2673
2674	memcpy(sa, &server->address, server->addrlen);
2675	result = (int) server->addrlen;
2676done:
2677	EVDNS_UNLOCK(base);
2678	return result;
2679}
2680
2681/* remove from the queue */
2682static void
2683evdns_request_remove(struct request *req, struct request **head)
2684{
2685	ASSERT_LOCKED(req->base);
2686	ASSERT_VALID_REQUEST(req);
2687
2688#if 0
2689	{
2690		struct request *ptr;
2691		int found = 0;
2692		EVUTIL_ASSERT(*head != NULL);
2693
2694		ptr = *head;
2695		do {
2696			if (ptr == req) {
2697				found = 1;
2698				break;
2699			}
2700			ptr = ptr->next;
2701		} while (ptr != *head);
2702		EVUTIL_ASSERT(found);
2703
2704		EVUTIL_ASSERT(req->next);
2705	}
2706#endif
2707
2708	if (req->next == req) {
2709		/* only item in the list */
2710		*head = NULL;
2711	} else {
2712		req->next->prev = req->prev;
2713		req->prev->next = req->next;
2714		if (*head == req) *head = req->next;
2715	}
2716	req->next = req->prev = NULL;
2717}
2718
2719/* insert into the tail of the queue */
2720static void
2721evdns_request_insert(struct request *req, struct request **head) {
2722	ASSERT_LOCKED(req->base);
2723	ASSERT_VALID_REQUEST(req);
2724	if (!*head) {
2725		*head = req;
2726		req->next = req->prev = req;
2727		return;
2728	}
2729
2730	req->prev = (*head)->prev;
2731	req->prev->next = req;
2732	req->next = *head;
2733	(*head)->prev = req;
2734}
2735
2736static int
2737string_num_dots(const char *s) {
2738	int count = 0;
2739	while ((s = strchr(s, '.'))) {
2740		s++;
2741		count++;
2742	}
2743	return count;
2744}
2745
2746static struct request *
2747request_new(struct evdns_base *base, struct evdns_request *handle, int type,
2748	    const char *name, int flags, evdns_callback_type callback,
2749	    void *user_ptr) {
2750
2751	const char issuing_now =
2752	    (base->global_requests_inflight < base->global_max_requests_inflight) ? 1 : 0;
2753
2754	const size_t name_len = strlen(name);
2755	const size_t request_max_len = evdns_request_len(name_len);
2756	const u16 trans_id = issuing_now ? transaction_id_pick(base) : 0xffff;
2757	/* the request data is alloced in a single block with the header */
2758	struct request *const req =
2759	    mm_malloc(sizeof(struct request) + request_max_len);
2760	int rlen;
2761	char namebuf[256];
2762	(void) flags;
2763
2764	ASSERT_LOCKED(base);
2765
2766	if (!req) return NULL;
2767
2768	if (name_len >= sizeof(namebuf)) {
2769		mm_free(req);
2770		return NULL;
2771	}
2772
2773	memset(req, 0, sizeof(struct request));
2774	req->base = base;
2775
2776	evtimer_assign(&req->timeout_event, req->base->event_base, evdns_request_timeout_callback, req);
2777
2778	if (base->global_randomize_case) {
2779		unsigned i;
2780		char randbits[(sizeof(namebuf)+7)/8];
2781		strlcpy(namebuf, name, sizeof(namebuf));
2782		evutil_secure_rng_get_bytes(randbits, (name_len+7)/8);
2783		for (i = 0; i < name_len; ++i) {
2784			if (EVUTIL_ISALPHA_(namebuf[i])) {
2785				if ((randbits[i >> 3] & (1<<(i & 7))))
2786					namebuf[i] |= 0x20;
2787				else
2788					namebuf[i] &= ~0x20;
2789			}
2790		}
2791		name = namebuf;
2792	}
2793
2794	/* request data lives just after the header */
2795	req->request = ((u8 *) req) + sizeof(struct request);
2796	/* denotes that the request data shouldn't be free()ed */
2797	req->request_appended = 1;
2798	rlen = evdns_request_data_build(name, name_len, trans_id,
2799	    type, CLASS_INET, req->request, request_max_len);
2800	if (rlen < 0)
2801		goto err1;
2802
2803	req->request_len = rlen;
2804	req->trans_id = trans_id;
2805	req->tx_count = 0;
2806	req->request_type = type;
2807	req->user_pointer = user_ptr;
2808	req->user_callback = callback;
2809	req->ns = issuing_now ? nameserver_pick(base) : NULL;
2810	req->next = req->prev = NULL;
2811	req->handle = handle;
2812	if (handle) {
2813		handle->current_req = req;
2814		handle->base = base;
2815	}
2816
2817	return req;
2818err1:
2819	mm_free(req);
2820	return NULL;
2821}
2822
2823static void
2824request_submit(struct request *const req) {
2825	struct evdns_base *base = req->base;
2826	ASSERT_LOCKED(base);
2827	ASSERT_VALID_REQUEST(req);
2828	if (req->ns) {
2829		/* if it has a nameserver assigned then this is going */
2830		/* straight into the inflight queue */
2831		evdns_request_insert(req, &REQ_HEAD(base, req->trans_id));
2832
2833		base->global_requests_inflight++;
2834		req->ns->requests_inflight++;
2835
2836		evdns_request_transmit(req);
2837	} else {
2838		evdns_request_insert(req, &base->req_waiting_head);
2839		base->global_requests_waiting++;
2840	}
2841}
2842
2843/* exported function */
2844void
2845evdns_cancel_request(struct evdns_base *base, struct evdns_request *handle)
2846{
2847	struct request *req;
2848
2849	if (!handle->current_req)
2850		return;
2851
2852	if (!base) {
2853		/* This redundancy is silly; can we fix it? (Not for 2.0) XXXX */
2854		base = handle->base;
2855		if (!base)
2856			base = handle->current_req->base;
2857	}
2858
2859	EVDNS_LOCK(base);
2860	if (handle->pending_cb) {
2861		EVDNS_UNLOCK(base);
2862		return;
2863	}
2864
2865	req = handle->current_req;
2866	ASSERT_VALID_REQUEST(req);
2867
2868	reply_schedule_callback(req, 0, DNS_ERR_CANCEL, NULL);
2869	if (req->ns) {
2870		/* remove from inflight queue */
2871		request_finished(req, &REQ_HEAD(base, req->trans_id), 1);
2872	} else {
2873		/* remove from global_waiting head */
2874		request_finished(req, &base->req_waiting_head, 1);
2875	}
2876	EVDNS_UNLOCK(base);
2877}
2878
2879/* exported function */
2880struct evdns_request *
2881evdns_base_resolve_ipv4(struct evdns_base *base, const char *name, int flags,
2882    evdns_callback_type callback, void *ptr) {
2883	struct evdns_request *handle;
2884	struct request *req;
2885	log(EVDNS_LOG_DEBUG, "Resolve requested for %s", name);
2886	handle = mm_calloc(1, sizeof(*handle));
2887	if (handle == NULL)
2888		return NULL;
2889	EVDNS_LOCK(base);
2890	if (flags & DNS_QUERY_NO_SEARCH) {
2891		req =
2892			request_new(base, handle, TYPE_A, name, flags,
2893				    callback, ptr);
2894		if (req)
2895			request_submit(req);
2896	} else {
2897		search_request_new(base, handle, TYPE_A, name, flags,
2898		    callback, ptr);
2899	}
2900	if (handle->current_req == NULL) {
2901		mm_free(handle);
2902		handle = NULL;
2903	}
2904	EVDNS_UNLOCK(base);
2905	return handle;
2906}
2907
2908int evdns_resolve_ipv4(const char *name, int flags,
2909					   evdns_callback_type callback, void *ptr)
2910{
2911	return evdns_base_resolve_ipv4(current_base, name, flags, callback, ptr)
2912		? 0 : -1;
2913}
2914
2915
2916/* exported function */
2917struct evdns_request *
2918evdns_base_resolve_ipv6(struct evdns_base *base,
2919    const char *name, int flags,
2920    evdns_callback_type callback, void *ptr)
2921{
2922	struct evdns_request *handle;
2923	struct request *req;
2924	log(EVDNS_LOG_DEBUG, "Resolve requested for %s", name);
2925	handle = mm_calloc(1, sizeof(*handle));
2926	if (handle == NULL)
2927		return NULL;
2928	EVDNS_LOCK(base);
2929	if (flags & DNS_QUERY_NO_SEARCH) {
2930		req = request_new(base, handle, TYPE_AAAA, name, flags,
2931				  callback, ptr);
2932		if (req)
2933			request_submit(req);
2934	} else {
2935		search_request_new(base, handle, TYPE_AAAA, name, flags,
2936		    callback, ptr);
2937	}
2938	if (handle->current_req == NULL) {
2939		mm_free(handle);
2940		handle = NULL;
2941	}
2942	EVDNS_UNLOCK(base);
2943	return handle;
2944}
2945
2946int evdns_resolve_ipv6(const char *name, int flags,
2947    evdns_callback_type callback, void *ptr) {
2948	return evdns_base_resolve_ipv6(current_base, name, flags, callback, ptr)
2949		? 0 : -1;
2950}
2951
2952struct evdns_request *
2953evdns_base_resolve_reverse(struct evdns_base *base, const struct in_addr *in, int flags, evdns_callback_type callback, void *ptr) {
2954	char buf[32];
2955	struct evdns_request *handle;
2956	struct request *req;
2957	u32 a;
2958	EVUTIL_ASSERT(in);
2959	a = ntohl(in->s_addr);
2960	evutil_snprintf(buf, sizeof(buf), "%d.%d.%d.%d.in-addr.arpa",
2961			(int)(u8)((a	)&0xff),
2962			(int)(u8)((a>>8 )&0xff),
2963			(int)(u8)((a>>16)&0xff),
2964			(int)(u8)((a>>24)&0xff));
2965	handle = mm_calloc(1, sizeof(*handle));
2966	if (handle == NULL)
2967		return NULL;
2968	log(EVDNS_LOG_DEBUG, "Resolve requested for %s (reverse)", buf);
2969	EVDNS_LOCK(base);
2970	req = request_new(base, handle, TYPE_PTR, buf, flags, callback, ptr);
2971	if (req)
2972		request_submit(req);
2973	if (handle->current_req == NULL) {
2974		mm_free(handle);
2975		handle = NULL;
2976	}
2977	EVDNS_UNLOCK(base);
2978	return (handle);
2979}
2980
2981int evdns_resolve_reverse(const struct in_addr *in, int flags, evdns_callback_type callback, void *ptr) {
2982	return evdns_base_resolve_reverse(current_base, in, flags, callback, ptr)
2983		? 0 : -1;
2984}
2985
2986struct evdns_request *
2987evdns_base_resolve_reverse_ipv6(struct evdns_base *base, const struct in6_addr *in, int flags, evdns_callback_type callback, void *ptr) {
2988	/* 32 nybbles, 32 periods, "ip6.arpa", NUL. */
2989	char buf[73];
2990	char *cp;
2991	struct evdns_request *handle;
2992	struct request *req;
2993	int i;
2994	EVUTIL_ASSERT(in);
2995	cp = buf;
2996	for (i=15; i >= 0; --i) {
2997		u8 byte = in->s6_addr[i];
2998		*cp++ = "0123456789abcdef"[byte & 0x0f];
2999		*cp++ = '.';
3000		*cp++ = "0123456789abcdef"[byte >> 4];
3001		*cp++ = '.';
3002	}
3003	EVUTIL_ASSERT(cp + strlen("ip6.arpa") < buf+sizeof(buf));
3004	memcpy(cp, "ip6.arpa", strlen("ip6.arpa")+1);
3005	handle = mm_calloc(1, sizeof(*handle));
3006	if (handle == NULL)
3007		return NULL;
3008	log(EVDNS_LOG_DEBUG, "Resolve requested for %s (reverse)", buf);
3009	EVDNS_LOCK(base);
3010	req = request_new(base, handle, TYPE_PTR, buf, flags, callback, ptr);
3011	if (req)
3012		request_submit(req);
3013	if (handle->current_req == NULL) {
3014		mm_free(handle);
3015		handle = NULL;
3016	}
3017	EVDNS_UNLOCK(base);
3018	return (handle);
3019}
3020
3021int evdns_resolve_reverse_ipv6(const struct in6_addr *in, int flags, evdns_callback_type callback, void *ptr) {
3022	return evdns_base_resolve_reverse_ipv6(current_base, in, flags, callback, ptr)
3023		? 0 : -1;
3024}
3025
3026/* ================================================================= */
3027/* Search support */
3028/* */
3029/* the libc resolver has support for searching a number of domains */
3030/* to find a name. If nothing else then it takes the single domain */
3031/* from the gethostname() call. */
3032/* */
3033/* It can also be configured via the domain and search options in a */
3034/* resolv.conf. */
3035/* */
3036/* The ndots option controls how many dots it takes for the resolver */
3037/* to decide that a name is non-local and so try a raw lookup first. */
3038
3039struct search_domain {
3040	int len;
3041	struct search_domain *next;
3042	/* the text string is appended to this structure */
3043};
3044
3045struct search_state {
3046	int refcount;
3047	int ndots;
3048	int num_domains;
3049	struct search_domain *head;
3050};
3051
3052static void
3053search_state_decref(struct search_state *const state) {
3054	if (!state) return;
3055	state->refcount--;
3056	if (!state->refcount) {
3057		struct search_domain *next, *dom;
3058		for (dom = state->head; dom; dom = next) {
3059			next = dom->next;
3060			mm_free(dom);
3061		}
3062		mm_free(state);
3063	}
3064}
3065
3066static struct search_state *
3067search_state_new(void) {
3068	struct search_state *state = (struct search_state *) mm_malloc(sizeof(struct search_state));
3069	if (!state) return NULL;
3070	memset(state, 0, sizeof(struct search_state));
3071	state->refcount = 1;
3072	state->ndots = 1;
3073
3074	return state;
3075}
3076
3077static void
3078search_postfix_clear(struct evdns_base *base) {
3079	search_state_decref(base->global_search_state);
3080
3081	base->global_search_state = search_state_new();
3082}
3083
3084/* exported function */
3085void
3086evdns_base_search_clear(struct evdns_base *base)
3087{
3088	EVDNS_LOCK(base);
3089	search_postfix_clear(base);
3090	EVDNS_UNLOCK(base);
3091}
3092
3093void
3094evdns_search_clear(void) {
3095	evdns_base_search_clear(current_base);
3096}
3097
3098static void
3099search_postfix_add(struct evdns_base *base, const char *domain) {
3100	size_t domain_len;
3101	struct search_domain *sdomain;
3102	while (domain[0] == '.') domain++;
3103	domain_len = strlen(domain);
3104
3105	ASSERT_LOCKED(base);
3106	if (!base->global_search_state) base->global_search_state = search_state_new();
3107	if (!base->global_search_state) return;
3108	base->global_search_state->num_domains++;
3109
3110	sdomain = (struct search_domain *) mm_malloc(sizeof(struct search_domain) + domain_len);
3111	if (!sdomain) return;
3112	memcpy( ((u8 *) sdomain) + sizeof(struct search_domain), domain, domain_len);
3113	sdomain->next = base->global_search_state->head;
3114	sdomain->len = (int) domain_len;
3115
3116	base->global_search_state->head = sdomain;
3117}
3118
3119/* reverse the order of members in the postfix list. This is needed because, */
3120/* when parsing resolv.conf we push elements in the wrong order */
3121static void
3122search_reverse(struct evdns_base *base) {
3123	struct search_domain *cur, *prev = NULL, *next;
3124	ASSERT_LOCKED(base);
3125	cur = base->global_search_state->head;
3126	while (cur) {
3127		next = cur->next;
3128		cur->next = prev;
3129		prev = cur;
3130		cur = next;
3131	}
3132
3133	base->global_search_state->head = prev;
3134}
3135
3136/* exported function */
3137void
3138evdns_base_search_add(struct evdns_base *base, const char *domain) {
3139	EVDNS_LOCK(base);
3140	search_postfix_add(base, domain);
3141	EVDNS_UNLOCK(base);
3142}
3143void
3144evdns_search_add(const char *domain) {
3145	evdns_base_search_add(current_base, domain);
3146}
3147
3148/* exported function */
3149void
3150evdns_base_search_ndots_set(struct evdns_base *base, const int ndots) {
3151	EVDNS_LOCK(base);
3152	if (!base->global_search_state) base->global_search_state = search_state_new();
3153	if (base->global_search_state)
3154		base->global_search_state->ndots = ndots;
3155	EVDNS_UNLOCK(base);
3156}
3157void
3158evdns_search_ndots_set(const int ndots) {
3159	evdns_base_search_ndots_set(current_base, ndots);
3160}
3161
3162static void
3163search_set_from_hostname(struct evdns_base *base) {
3164	char hostname[HOST_NAME_MAX + 1], *domainname;
3165
3166	ASSERT_LOCKED(base);
3167	search_postfix_clear(base);
3168	if (gethostname(hostname, sizeof(hostname))) return;
3169	domainname = strchr(hostname, '.');
3170	if (!domainname) return;
3171	search_postfix_add(base, domainname);
3172}
3173
3174/* warning: returns malloced string */
3175static char *
3176search_make_new(const struct search_state *const state, int n, const char *const base_name) {
3177	const size_t base_len = strlen(base_name);
3178	char need_to_append_dot;
3179	struct search_domain *dom;
3180
3181	if (!base_len) return NULL;
3182	need_to_append_dot = base_name[base_len - 1] == '.' ? 0 : 1;
3183
3184	for (dom = state->head; dom; dom = dom->next) {
3185		if (!n--) {
3186			/* this is the postfix we want */
3187			/* the actual postfix string is kept at the end of the structure */
3188			const u8 *const postfix = ((u8 *) dom) + sizeof(struct search_domain);
3189			const int postfix_len = dom->len;
3190			char *const newname = (char *) mm_malloc(base_len + need_to_append_dot + postfix_len + 1);
3191			if (!newname) return NULL;
3192			memcpy(newname, base_name, base_len);
3193			if (need_to_append_dot) newname[base_len] = '.';
3194			memcpy(newname + base_len + need_to_append_dot, postfix, postfix_len);
3195			newname[base_len + need_to_append_dot + postfix_len] = 0;
3196			return newname;
3197		}
3198	}
3199
3200	/* we ran off the end of the list and still didn't find the requested string */
3201	EVUTIL_ASSERT(0);
3202	return NULL; /* unreachable; stops warnings in some compilers. */
3203}
3204
3205static struct request *
3206search_request_new(struct evdns_base *base, struct evdns_request *handle,
3207		   int type, const char *const name, int flags,
3208		   evdns_callback_type user_callback, void *user_arg) {
3209	ASSERT_LOCKED(base);
3210	EVUTIL_ASSERT(type == TYPE_A || type == TYPE_AAAA);
3211	EVUTIL_ASSERT(handle->current_req == NULL);
3212	if ( ((flags & DNS_QUERY_NO_SEARCH) == 0) &&
3213	     base->global_search_state &&
3214		 base->global_search_state->num_domains) {
3215		/* we have some domains to search */
3216		struct request *req;
3217		if (string_num_dots(name) >= base->global_search_state->ndots) {
3218			req = request_new(base, handle, type, name, flags, user_callback, user_arg);
3219			if (!req) return NULL;
3220			handle->search_index = -1;
3221		} else {
3222			char *const new_name = search_make_new(base->global_search_state, 0, name);
3223			if (!new_name) return NULL;
3224			req = request_new(base, handle, type, new_name, flags, user_callback, user_arg);
3225			mm_free(new_name);
3226			if (!req) return NULL;
3227			handle->search_index = 0;
3228		}
3229		EVUTIL_ASSERT(handle->search_origname == NULL);
3230		handle->search_origname = mm_strdup(name);
3231		if (handle->search_origname == NULL) {
3232			/* XXX Should we dealloc req? If yes, how? */
3233			if (req)
3234				mm_free(req);
3235			return NULL;
3236		}
3237		handle->search_state = base->global_search_state;
3238		handle->search_flags = flags;
3239		base->global_search_state->refcount++;
3240		request_submit(req);
3241		return req;
3242	} else {
3243		struct request *const req = request_new(base, handle, type, name, flags, user_callback, user_arg);
3244		if (!req) return NULL;
3245		request_submit(req);
3246		return req;
3247	}
3248}
3249
3250/* this is called when a request has failed to find a name. We need to check */
3251/* if it is part of a search and, if so, try the next name in the list */
3252/* returns: */
3253/*   0 another request has been submitted */
3254/*   1 no more requests needed */
3255static int
3256search_try_next(struct evdns_request *const handle) {
3257	struct request *req = handle->current_req;
3258	struct evdns_base *base = req->base;
3259	struct request *newreq;
3260	ASSERT_LOCKED(base);
3261	if (handle->search_state) {
3262		/* it is part of a search */
3263		char *new_name;
3264		handle->search_index++;
3265		if (handle->search_index >= handle->search_state->num_domains) {
3266			/* no more postfixes to try, however we may need to try */
3267			/* this name without a postfix */
3268			if (string_num_dots(handle->search_origname) < handle->search_state->ndots) {
3269				/* yep, we need to try it raw */
3270				newreq = request_new(base, NULL, req->request_type, handle->search_origname, handle->search_flags, req->user_callback, req->user_pointer);
3271				log(EVDNS_LOG_DEBUG, "Search: trying raw query %s", handle->search_origname);
3272				if (newreq) {
3273					search_request_finished(handle);
3274					goto submit_next;
3275				}
3276			}
3277			return 1;
3278		}
3279
3280		new_name = search_make_new(handle->search_state, handle->search_index, handle->search_origname);
3281		if (!new_name) return 1;
3282		log(EVDNS_LOG_DEBUG, "Search: now trying %s (%d)", new_name, handle->search_index);
3283		newreq = request_new(base, NULL, req->request_type, new_name, handle->search_flags, req->user_callback, req->user_pointer);
3284		mm_free(new_name);
3285		if (!newreq) return 1;
3286		goto submit_next;
3287	}
3288	return 1;
3289
3290submit_next:
3291	request_finished(req, &REQ_HEAD(req->base, req->trans_id), 0);
3292	handle->current_req = newreq;
3293	newreq->handle = handle;
3294	request_submit(newreq);
3295	return 0;
3296}
3297
3298static void
3299search_request_finished(struct evdns_request *const handle) {
3300	ASSERT_LOCKED(handle->current_req->base);
3301	if (handle->search_state) {
3302		search_state_decref(handle->search_state);
3303		handle->search_state = NULL;
3304	}
3305	if (handle->search_origname) {
3306		mm_free(handle->search_origname);
3307		handle->search_origname = NULL;
3308	}
3309}
3310
3311/* ================================================================= */
3312/* Parsing resolv.conf files */
3313
3314static void
3315evdns_resolv_set_defaults(struct evdns_base *base, int flags) {
3316	/* if the file isn't found then we assume a local resolver */
3317	ASSERT_LOCKED(base);
3318	if (flags & DNS_OPTION_SEARCH) search_set_from_hostname(base);
3319	if (flags & DNS_OPTION_NAMESERVERS) evdns_base_nameserver_ip_add(base,"127.0.0.1");
3320}
3321
3322#ifndef EVENT__HAVE_STRTOK_R
3323static char *
3324strtok_r(char *s, const char *delim, char **state) {
3325	char *cp, *start;
3326	start = cp = s ? s : *state;
3327	if (!cp)
3328		return NULL;
3329	while (*cp && !strchr(delim, *cp))
3330		++cp;
3331	if (!*cp) {
3332		if (cp == start)
3333			return NULL;
3334		*state = NULL;
3335		return start;
3336	} else {
3337		*cp++ = '\0';
3338		*state = cp;
3339		return start;
3340	}
3341}
3342#endif
3343
3344/* helper version of atoi which returns -1 on error */
3345static int
3346strtoint(const char *const str)
3347{
3348	char *endptr;
3349	const int r = strtol(str, &endptr, 10);
3350	if (*endptr) return -1;
3351	return r;
3352}
3353
3354/* Parse a number of seconds into a timeval; return -1 on error. */
3355static int
3356evdns_strtotimeval(const char *const str, struct timeval *out)
3357{
3358	double d;
3359	char *endptr;
3360	d = strtod(str, &endptr);
3361	if (*endptr) return -1;
3362	if (d < 0) return -1;
3363	out->tv_sec = (int) d;
3364	out->tv_usec = (int) ((d - (int) d)*1000000);
3365	if (out->tv_sec == 0 && out->tv_usec < 1000) /* less than 1 msec */
3366		return -1;
3367	return 0;
3368}
3369
3370/* helper version of atoi that returns -1 on error and clips to bounds. */
3371static int
3372strtoint_clipped(const char *const str, int min, int max)
3373{
3374	int r = strtoint(str);
3375	if (r == -1)
3376		return r;
3377	else if (r<min)
3378		return min;
3379	else if (r>max)
3380		return max;
3381	else
3382		return r;
3383}
3384
3385static int
3386evdns_base_set_max_requests_inflight(struct evdns_base *base, int maxinflight)
3387{
3388	int old_n_heads = base->n_req_heads, n_heads;
3389	struct request **old_heads = base->req_heads, **new_heads, *req;
3390	int i;
3391
3392	ASSERT_LOCKED(base);
3393	if (maxinflight < 1)
3394		maxinflight = 1;
3395	n_heads = (maxinflight+4) / 5;
3396	EVUTIL_ASSERT(n_heads > 0);
3397	new_heads = mm_calloc(n_heads, sizeof(struct request*));
3398	if (!new_heads)
3399		return (-1);
3400	if (old_heads) {
3401		for (i = 0; i < old_n_heads; ++i) {
3402			while (old_heads[i]) {
3403				req = old_heads[i];
3404				evdns_request_remove(req, &old_heads[i]);
3405				evdns_request_insert(req, &new_heads[req->trans_id % n_heads]);
3406			}
3407		}
3408		mm_free(old_heads);
3409	}
3410	base->req_heads = new_heads;
3411	base->n_req_heads = n_heads;
3412	base->global_max_requests_inflight = maxinflight;
3413	return (0);
3414}
3415
3416/* exported function */
3417int
3418evdns_base_set_option(struct evdns_base *base,
3419    const char *option, const char *val)
3420{
3421	int res;
3422	EVDNS_LOCK(base);
3423	res = evdns_base_set_option_impl(base, option, val, DNS_OPTIONS_ALL);
3424	EVDNS_UNLOCK(base);
3425	return res;
3426}
3427
3428static inline int
3429str_matches_option(const char *s1, const char *optionname)
3430{
3431	/* Option names are given as "option:" We accept either 'option' in
3432	 * s1, or 'option:randomjunk'.  The latter form is to implement the
3433	 * resolv.conf parser. */
3434	size_t optlen = strlen(optionname);
3435	size_t slen = strlen(s1);
3436	if (slen == optlen || slen == optlen - 1)
3437		return !strncmp(s1, optionname, slen);
3438	else if (slen > optlen)
3439		return !strncmp(s1, optionname, optlen);
3440	else
3441		return 0;
3442}
3443
3444static int
3445evdns_base_set_option_impl(struct evdns_base *base,
3446    const char *option, const char *val, int flags)
3447{
3448	ASSERT_LOCKED(base);
3449	if (str_matches_option(option, "ndots:")) {
3450		const int ndots = strtoint(val);
3451		if (ndots == -1) return -1;
3452		if (!(flags & DNS_OPTION_SEARCH)) return 0;
3453		log(EVDNS_LOG_DEBUG, "Setting ndots to %d", ndots);
3454		if (!base->global_search_state) base->global_search_state = search_state_new();
3455		if (!base->global_search_state) return -1;
3456		base->global_search_state->ndots = ndots;
3457	} else if (str_matches_option(option, "timeout:")) {
3458		struct timeval tv;
3459		if (evdns_strtotimeval(val, &tv) == -1) return -1;
3460		if (!(flags & DNS_OPTION_MISC)) return 0;
3461		log(EVDNS_LOG_DEBUG, "Setting timeout to %s", val);
3462		memcpy(&base->global_timeout, &tv, sizeof(struct timeval));
3463	} else if (str_matches_option(option, "getaddrinfo-allow-skew:")) {
3464		struct timeval tv;
3465		if (evdns_strtotimeval(val, &tv) == -1) return -1;
3466		if (!(flags & DNS_OPTION_MISC)) return 0;
3467		log(EVDNS_LOG_DEBUG, "Setting getaddrinfo-allow-skew to %s",
3468		    val);
3469		memcpy(&base->global_getaddrinfo_allow_skew, &tv,
3470		    sizeof(struct timeval));
3471	} else if (str_matches_option(option, "max-timeouts:")) {
3472		const int maxtimeout = strtoint_clipped(val, 1, 255);
3473		if (maxtimeout == -1) return -1;
3474		if (!(flags & DNS_OPTION_MISC)) return 0;
3475		log(EVDNS_LOG_DEBUG, "Setting maximum allowed timeouts to %d",
3476			maxtimeout);
3477		base->global_max_nameserver_timeout = maxtimeout;
3478	} else if (str_matches_option(option, "max-inflight:")) {
3479		const int maxinflight = strtoint_clipped(val, 1, 65000);
3480		if (maxinflight == -1) return -1;
3481		if (!(flags & DNS_OPTION_MISC)) return 0;
3482		log(EVDNS_LOG_DEBUG, "Setting maximum inflight requests to %d",
3483			maxinflight);
3484		evdns_base_set_max_requests_inflight(base, maxinflight);
3485	} else if (str_matches_option(option, "attempts:")) {
3486		int retries = strtoint(val);
3487		if (retries == -1) return -1;
3488		if (retries > 255) retries = 255;
3489		if (!(flags & DNS_OPTION_MISC)) return 0;
3490		log(EVDNS_LOG_DEBUG, "Setting retries to %d", retries);
3491		base->global_max_retransmits = retries;
3492	} else if (str_matches_option(option, "randomize-case:")) {
3493		int randcase = strtoint(val);
3494		if (!(flags & DNS_OPTION_MISC)) return 0;
3495		base->global_randomize_case = randcase;
3496	} else if (str_matches_option(option, "bind-to:")) {
3497		/* XXX This only applies to successive nameservers, not
3498		 * to already-configured ones.	We might want to fix that. */
3499		int len = sizeof(base->global_outgoing_address);
3500		if (!(flags & DNS_OPTION_NAMESERVERS)) return 0;
3501		if (evutil_parse_sockaddr_port(val,
3502			(struct sockaddr*)&base->global_outgoing_address, &len))
3503			return -1;
3504		base->global_outgoing_addrlen = len;
3505	} else if (str_matches_option(option, "initial-probe-timeout:")) {
3506		struct timeval tv;
3507		if (evdns_strtotimeval(val, &tv) == -1) return -1;
3508		if (tv.tv_sec > 3600)
3509			tv.tv_sec = 3600;
3510		if (!(flags & DNS_OPTION_MISC)) return 0;
3511		log(EVDNS_LOG_DEBUG, "Setting initial probe timeout to %s",
3512		    val);
3513		memcpy(&base->global_nameserver_probe_initial_timeout, &tv,
3514		    sizeof(tv));
3515	}
3516	return 0;
3517}
3518
3519int
3520evdns_set_option(const char *option, const char *val, int flags)
3521{
3522	if (!current_base)
3523		current_base = evdns_base_new(NULL, 0);
3524	return evdns_base_set_option(current_base, option, val);
3525}
3526
3527static void
3528resolv_conf_parse_line(struct evdns_base *base, char *const start, int flags) {
3529	char *strtok_state;
3530	static const char *const delims = " \t";
3531#define NEXT_TOKEN strtok_r(NULL, delims, &strtok_state)
3532
3533
3534	char *const first_token = strtok_r(start, delims, &strtok_state);
3535	ASSERT_LOCKED(base);
3536	if (!first_token) return;
3537
3538	if (!strcmp(first_token, "nameserver") && (flags & DNS_OPTION_NAMESERVERS)) {
3539		const char *const nameserver = NEXT_TOKEN;
3540
3541		if (nameserver)
3542			evdns_base_nameserver_ip_add(base, nameserver);
3543	} else if (!strcmp(first_token, "domain") && (flags & DNS_OPTION_SEARCH)) {
3544		const char *const domain = NEXT_TOKEN;
3545		if (domain) {
3546			search_postfix_clear(base);
3547			search_postfix_add(base, domain);
3548		}
3549	} else if (!strcmp(first_token, "search") && (flags & DNS_OPTION_SEARCH)) {
3550		const char *domain;
3551		search_postfix_clear(base);
3552
3553		while ((domain = NEXT_TOKEN)) {
3554			search_postfix_add(base, domain);
3555		}
3556		search_reverse(base);
3557	} else if (!strcmp(first_token, "options")) {
3558		const char *option;
3559		while ((option = NEXT_TOKEN)) {
3560			const char *val = strchr(option, ':');
3561			evdns_base_set_option_impl(base, option, val ? val+1 : "", flags);
3562		}
3563	}
3564#undef NEXT_TOKEN
3565}
3566
3567/* exported function */
3568/* returns: */
3569/*   0 no errors */
3570/*   1 failed to open file */
3571/*   2 failed to stat file */
3572/*   3 file too large */
3573/*   4 out of memory */
3574/*   5 short read from file */
3575int
3576evdns_base_resolv_conf_parse(struct evdns_base *base, int flags, const char *const filename) {
3577	int res;
3578	EVDNS_LOCK(base);
3579	res = evdns_base_resolv_conf_parse_impl(base, flags, filename);
3580	EVDNS_UNLOCK(base);
3581	return res;
3582}
3583
3584static char *
3585evdns_get_default_hosts_filename(void)
3586{
3587#ifdef _WIN32
3588	/* Windows is a little coy about where it puts its configuration
3589	 * files.  Sure, they're _usually_ in C:\windows\system32, but
3590	 * there's no reason in principle they couldn't be in
3591	 * W:\hoboken chicken emergency\
3592	 */
3593	char path[MAX_PATH+1];
3594	static const char hostfile[] = "\\drivers\\etc\\hosts";
3595	char *path_out;
3596	size_t len_out;
3597
3598	if (! SHGetSpecialFolderPathA(NULL, path, CSIDL_SYSTEM, 0))
3599		return NULL;
3600	len_out = strlen(path)+strlen(hostfile)+1;
3601	path_out = mm_malloc(len_out);
3602	evutil_snprintf(path_out, len_out, "%s%s", path, hostfile);
3603	return path_out;
3604#else
3605	return mm_strdup("/etc/hosts");
3606#endif
3607}
3608
3609static int
3610evdns_base_resolv_conf_parse_impl(struct evdns_base *base, int flags, const char *const filename) {
3611	size_t n;
3612	char *resolv;
3613	char *start;
3614	int err = 0;
3615
3616	log(EVDNS_LOG_DEBUG, "Parsing resolv.conf file %s", filename);
3617
3618	if (flags & DNS_OPTION_HOSTSFILE) {
3619		char *fname = evdns_get_default_hosts_filename();
3620		evdns_base_load_hosts(base, fname);
3621		if (fname)
3622			mm_free(fname);
3623	}
3624
3625	if ((err = evutil_read_file_(filename, &resolv, &n, 0)) < 0) {
3626		if (err == -1) {
3627			/* No file. */
3628			evdns_resolv_set_defaults(base, flags);
3629			return 1;
3630		} else {
3631			return 2;
3632		}
3633	}
3634
3635	start = resolv;
3636	for (;;) {
3637		char *const newline = strchr(start, '\n');
3638		if (!newline) {
3639			resolv_conf_parse_line(base, start, flags);
3640			break;
3641		} else {
3642			*newline = 0;
3643			resolv_conf_parse_line(base, start, flags);
3644			start = newline + 1;
3645		}
3646	}
3647
3648	if (!base->server_head && (flags & DNS_OPTION_NAMESERVERS)) {
3649		/* no nameservers were configured. */
3650		evdns_base_nameserver_ip_add(base, "127.0.0.1");
3651		err = 6;
3652	}
3653	if (flags & DNS_OPTION_SEARCH && (!base->global_search_state || base->global_search_state->num_domains == 0)) {
3654		search_set_from_hostname(base);
3655	}
3656
3657	mm_free(resolv);
3658	return err;
3659}
3660
3661int
3662evdns_resolv_conf_parse(int flags, const char *const filename) {
3663	if (!current_base)
3664		current_base = evdns_base_new(NULL, 0);
3665	return evdns_base_resolv_conf_parse(current_base, flags, filename);
3666}
3667
3668
3669#ifdef _WIN32
3670/* Add multiple nameservers from a space-or-comma-separated list. */
3671static int
3672evdns_nameserver_ip_add_line(struct evdns_base *base, const char *ips) {
3673	const char *addr;
3674	char *buf;
3675	int r;
3676	ASSERT_LOCKED(base);
3677	while (*ips) {
3678		while (isspace(*ips) || *ips == ',' || *ips == '\t')
3679			++ips;
3680		addr = ips;
3681		while (isdigit(*ips) || *ips == '.' || *ips == ':' ||
3682		    *ips=='[' || *ips==']')
3683			++ips;
3684		buf = mm_malloc(ips-addr+1);
3685		if (!buf) return 4;
3686		memcpy(buf, addr, ips-addr);
3687		buf[ips-addr] = '\0';
3688		r = evdns_base_nameserver_ip_add(base, buf);
3689		mm_free(buf);
3690		if (r) return r;
3691	}
3692	return 0;
3693}
3694
3695typedef DWORD(WINAPI *GetNetworkParams_fn_t)(FIXED_INFO *, DWORD*);
3696
3697/* Use the windows GetNetworkParams interface in iphlpapi.dll to */
3698/* figure out what our nameservers are. */
3699static int
3700load_nameservers_with_getnetworkparams(struct evdns_base *base)
3701{
3702	/* Based on MSDN examples and inspection of  c-ares code. */
3703	FIXED_INFO *fixed;
3704	HMODULE handle = 0;
3705	ULONG size = sizeof(FIXED_INFO);
3706	void *buf = NULL;
3707	int status = 0, r, added_any;
3708	IP_ADDR_STRING *ns;
3709	GetNetworkParams_fn_t fn;
3710
3711	ASSERT_LOCKED(base);
3712	if (!(handle = evutil_load_windows_system_library_(
3713			TEXT("iphlpapi.dll")))) {
3714		log(EVDNS_LOG_WARN, "Could not open iphlpapi.dll");
3715		status = -1;
3716		goto done;
3717	}
3718	if (!(fn = (GetNetworkParams_fn_t) GetProcAddress(handle, "GetNetworkParams"))) {
3719		log(EVDNS_LOG_WARN, "Could not get address of function.");
3720		status = -1;
3721		goto done;
3722	}
3723
3724	buf = mm_malloc(size);
3725	if (!buf) { status = 4; goto done; }
3726	fixed = buf;
3727	r = fn(fixed, &size);
3728	if (r != ERROR_SUCCESS && r != ERROR_BUFFER_OVERFLOW) {
3729		status = -1;
3730		goto done;
3731	}
3732	if (r != ERROR_SUCCESS) {
3733		mm_free(buf);
3734		buf = mm_malloc(size);
3735		if (!buf) { status = 4; goto done; }
3736		fixed = buf;
3737		r = fn(fixed, &size);
3738		if (r != ERROR_SUCCESS) {
3739			log(EVDNS_LOG_DEBUG, "fn() failed.");
3740			status = -1;
3741			goto done;
3742		}
3743	}
3744
3745	EVUTIL_ASSERT(fixed);
3746	added_any = 0;
3747	ns = &(fixed->DnsServerList);
3748	while (ns) {
3749		r = evdns_nameserver_ip_add_line(base, ns->IpAddress.String);
3750		if (r) {
3751			log(EVDNS_LOG_DEBUG,"Could not add nameserver %s to list,error: %d",
3752				(ns->IpAddress.String),(int)GetLastError());
3753			status = r;
3754		} else {
3755			++added_any;
3756			log(EVDNS_LOG_DEBUG,"Successfully added %s as nameserver",ns->IpAddress.String);
3757		}
3758
3759		ns = ns->Next;
3760	}
3761
3762	if (!added_any) {
3763		log(EVDNS_LOG_DEBUG, "No nameservers added.");
3764		if (status == 0)
3765			status = -1;
3766	} else {
3767		status = 0;
3768	}
3769
3770 done:
3771	if (buf)
3772		mm_free(buf);
3773	if (handle)
3774		FreeLibrary(handle);
3775	return status;
3776}
3777
3778static int
3779config_nameserver_from_reg_key(struct evdns_base *base, HKEY key, const TCHAR *subkey)
3780{
3781	char *buf;
3782	DWORD bufsz = 0, type = 0;
3783	int status = 0;
3784
3785	ASSERT_LOCKED(base);
3786	if (RegQueryValueEx(key, subkey, 0, &type, NULL, &bufsz)
3787	    != ERROR_MORE_DATA)
3788		return -1;
3789	if (!(buf = mm_malloc(bufsz)))
3790		return -1;
3791
3792	if (RegQueryValueEx(key, subkey, 0, &type, (LPBYTE)buf, &bufsz)
3793	    == ERROR_SUCCESS && bufsz > 1) {
3794		status = evdns_nameserver_ip_add_line(base,buf);
3795	}
3796
3797	mm_free(buf);
3798	return status;
3799}
3800
3801#define SERVICES_KEY TEXT("System\\CurrentControlSet\\Services\\")
3802#define WIN_NS_9X_KEY  SERVICES_KEY TEXT("VxD\\MSTCP")
3803#define WIN_NS_NT_KEY  SERVICES_KEY TEXT("Tcpip\\Parameters")
3804
3805static int
3806load_nameservers_from_registry(struct evdns_base *base)
3807{
3808	int found = 0;
3809	int r;
3810#define TRY(k, name) \
3811	if (!found && config_nameserver_from_reg_key(base,k,TEXT(name)) == 0) { \
3812		log(EVDNS_LOG_DEBUG,"Found nameservers in %s/%s",#k,name); \
3813		found = 1;						\
3814	} else if (!found) {						\
3815		log(EVDNS_LOG_DEBUG,"Didn't find nameservers in %s/%s", \
3816		    #k,#name);						\
3817	}
3818
3819	ASSERT_LOCKED(base);
3820
3821	if (((int)GetVersion()) > 0) { /* NT */
3822		HKEY nt_key = 0, interfaces_key = 0;
3823
3824		if (RegOpenKeyEx(HKEY_LOCAL_MACHINE, WIN_NS_NT_KEY, 0,
3825				 KEY_READ, &nt_key) != ERROR_SUCCESS) {
3826			log(EVDNS_LOG_DEBUG,"Couldn't open nt key, %d",(int)GetLastError());
3827			return -1;
3828		}
3829		r = RegOpenKeyEx(nt_key, TEXT("Interfaces"), 0,
3830			     KEY_QUERY_VALUE|KEY_ENUMERATE_SUB_KEYS,
3831			     &interfaces_key);
3832		if (r != ERROR_SUCCESS) {
3833			log(EVDNS_LOG_DEBUG,"Couldn't open interfaces key, %d",(int)GetLastError());
3834			return -1;
3835		}
3836		TRY(nt_key, "NameServer");
3837		TRY(nt_key, "DhcpNameServer");
3838		TRY(interfaces_key, "NameServer");
3839		TRY(interfaces_key, "DhcpNameServer");
3840		RegCloseKey(interfaces_key);
3841		RegCloseKey(nt_key);
3842	} else {
3843		HKEY win_key = 0;
3844		if (RegOpenKeyEx(HKEY_LOCAL_MACHINE, WIN_NS_9X_KEY, 0,
3845				 KEY_READ, &win_key) != ERROR_SUCCESS) {
3846			log(EVDNS_LOG_DEBUG, "Couldn't open registry key, %d", (int)GetLastError());
3847			return -1;
3848		}
3849		TRY(win_key, "NameServer");
3850		RegCloseKey(win_key);
3851	}
3852
3853	if (found == 0) {
3854		log(EVDNS_LOG_WARN,"Didn't find any nameservers.");
3855	}
3856
3857	return found ? 0 : -1;
3858#undef TRY
3859}
3860
3861int
3862evdns_base_config_windows_nameservers(struct evdns_base *base)
3863{
3864	int r;
3865	char *fname;
3866	if (base == NULL)
3867		base = current_base;
3868	if (base == NULL)
3869		return -1;
3870	EVDNS_LOCK(base);
3871	fname = evdns_get_default_hosts_filename();
3872	log(EVDNS_LOG_DEBUG, "Loading hosts entries from %s", fname);
3873	evdns_base_load_hosts(base, fname);
3874	if (fname)
3875		mm_free(fname);
3876
3877	if (load_nameservers_with_getnetworkparams(base) == 0) {
3878		EVDNS_UNLOCK(base);
3879		return 0;
3880	}
3881	r = load_nameservers_from_registry(base);
3882
3883	EVDNS_UNLOCK(base);
3884	return r;
3885}
3886
3887int
3888evdns_config_windows_nameservers(void)
3889{
3890	if (!current_base) {
3891		current_base = evdns_base_new(NULL, 1);
3892		return current_base == NULL ? -1 : 0;
3893	} else {
3894		return evdns_base_config_windows_nameservers(current_base);
3895	}
3896}
3897#endif
3898
3899struct evdns_base *
3900evdns_base_new(struct event_base *event_base, int flags)
3901{
3902	struct evdns_base *base;
3903
3904	if (evutil_secure_rng_init() < 0) {
3905		log(EVDNS_LOG_WARN, "Unable to seed random number generator; "
3906		    "DNS can't run.");
3907		return NULL;
3908	}
3909
3910	/* Give the evutil library a hook into its evdns-enabled
3911	 * functionality.  We can't just call evdns_getaddrinfo directly or
3912	 * else libevent-core will depend on libevent-extras. */
3913	evutil_set_evdns_getaddrinfo_fn_(evdns_getaddrinfo);
3914	evutil_set_evdns_getaddrinfo_cancel_fn_(evdns_getaddrinfo_cancel);
3915
3916	base = mm_malloc(sizeof(struct evdns_base));
3917	if (base == NULL)
3918		return (NULL);
3919	memset(base, 0, sizeof(struct evdns_base));
3920	base->req_waiting_head = NULL;
3921
3922	EVTHREAD_ALLOC_LOCK(base->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
3923	EVDNS_LOCK(base);
3924
3925	/* Set max requests inflight and allocate req_heads. */
3926	base->req_heads = NULL;
3927
3928	evdns_base_set_max_requests_inflight(base, 64);
3929
3930	base->server_head = NULL;
3931	base->event_base = event_base;
3932	base->global_good_nameservers = base->global_requests_inflight =
3933		base->global_requests_waiting = 0;
3934
3935	base->global_timeout.tv_sec = 5;
3936	base->global_timeout.tv_usec = 0;
3937	base->global_max_reissues = 1;
3938	base->global_max_retransmits = 3;
3939	base->global_max_nameserver_timeout = 3;
3940	base->global_search_state = NULL;
3941	base->global_randomize_case = 1;
3942	base->global_getaddrinfo_allow_skew.tv_sec = 3;
3943	base->global_getaddrinfo_allow_skew.tv_usec = 0;
3944	base->global_nameserver_probe_initial_timeout.tv_sec = 10;
3945	base->global_nameserver_probe_initial_timeout.tv_usec = 0;
3946
3947	TAILQ_INIT(&base->hostsdb);
3948
3949#define EVDNS_BASE_ALL_FLAGS (0x8001)
3950	if (flags & ~EVDNS_BASE_ALL_FLAGS) {
3951		flags = EVDNS_BASE_INITIALIZE_NAMESERVERS;
3952		log(EVDNS_LOG_WARN,
3953		    "Unrecognized flag passed to evdns_base_new(). Assuming "
3954		    "you meant EVDNS_BASE_INITIALIZE_NAMESERVERS.");
3955	}
3956#undef EVDNS_BASE_ALL_FLAGS
3957
3958	if (flags & EVDNS_BASE_INITIALIZE_NAMESERVERS) {
3959		int r;
3960#ifdef _WIN32
3961		r = evdns_base_config_windows_nameservers(base);
3962#else
3963		r = evdns_base_resolv_conf_parse(base, DNS_OPTIONS_ALL, "/etc/resolv.conf");
3964#endif
3965		if (r == -1) {
3966			evdns_base_free_and_unlock(base, 0);
3967			return NULL;
3968		}
3969	}
3970	if (flags & EVDNS_BASE_DISABLE_WHEN_INACTIVE) {
3971		base->disable_when_inactive = 1;
3972	}
3973
3974	EVDNS_UNLOCK(base);
3975	return base;
3976}
3977
3978int
3979evdns_init(void)
3980{
3981	struct evdns_base *base = evdns_base_new(NULL, 1);
3982	if (base) {
3983		current_base = base;
3984		return 0;
3985	} else {
3986		return -1;
3987	}
3988}
3989
3990const char *
3991evdns_err_to_string(int err)
3992{
3993    switch (err) {
3994	case DNS_ERR_NONE: return "no error";
3995	case DNS_ERR_FORMAT: return "misformatted query";
3996	case DNS_ERR_SERVERFAILED: return "server failed";
3997	case DNS_ERR_NOTEXIST: return "name does not exist";
3998	case DNS_ERR_NOTIMPL: return "query not implemented";
3999	case DNS_ERR_REFUSED: return "refused";
4000
4001	case DNS_ERR_TRUNCATED: return "reply truncated or ill-formed";
4002	case DNS_ERR_UNKNOWN: return "unknown";
4003	case DNS_ERR_TIMEOUT: return "request timed out";
4004	case DNS_ERR_SHUTDOWN: return "dns subsystem shut down";
4005	case DNS_ERR_CANCEL: return "dns request canceled";
4006	case DNS_ERR_NODATA: return "no records in the reply";
4007	default: return "[Unknown error code]";
4008    }
4009}
4010
4011static void
4012evdns_nameserver_free(struct nameserver *server)
4013{
4014	if (server->socket >= 0)
4015		evutil_closesocket(server->socket);
4016	(void) event_del(&server->event);
4017	event_debug_unassign(&server->event);
4018	if (server->state == 0)
4019		(void) event_del(&server->timeout_event);
4020	if (server->probe_request) {
4021		evdns_cancel_request(server->base, server->probe_request);
4022		server->probe_request = NULL;
4023	}
4024	event_debug_unassign(&server->timeout_event);
4025	mm_free(server);
4026}
4027
4028static void
4029evdns_base_free_and_unlock(struct evdns_base *base, int fail_requests)
4030{
4031	struct nameserver *server, *server_next;
4032	struct search_domain *dom, *dom_next;
4033	int i;
4034
4035	/* Requires that we hold the lock. */
4036
4037	/* TODO(nickm) we might need to refcount here. */
4038
4039	for (i = 0; i < base->n_req_heads; ++i) {
4040		while (base->req_heads[i]) {
4041			if (fail_requests)
4042				reply_schedule_callback(base->req_heads[i], 0, DNS_ERR_SHUTDOWN, NULL);
4043			request_finished(base->req_heads[i], &REQ_HEAD(base, base->req_heads[i]->trans_id), 1);
4044		}
4045	}
4046	while (base->req_waiting_head) {
4047		if (fail_requests)
4048			reply_schedule_callback(base->req_waiting_head, 0, DNS_ERR_SHUTDOWN, NULL);
4049		request_finished(base->req_waiting_head, &base->req_waiting_head, 1);
4050	}
4051	base->global_requests_inflight = base->global_requests_waiting = 0;
4052
4053	for (server = base->server_head; server; server = server_next) {
4054		server_next = server->next;
4055		/** already done something before */
4056		server->probe_request = NULL;
4057		evdns_nameserver_free(server);
4058		if (server_next == base->server_head)
4059			break;
4060	}
4061	base->server_head = NULL;
4062	base->global_good_nameservers = 0;
4063
4064	if (base->global_search_state) {
4065		for (dom = base->global_search_state->head; dom; dom = dom_next) {
4066			dom_next = dom->next;
4067			mm_free(dom);
4068		}
4069		mm_free(base->global_search_state);
4070		base->global_search_state = NULL;
4071	}
4072
4073	{
4074		struct hosts_entry *victim;
4075		while ((victim = TAILQ_FIRST(&base->hostsdb))) {
4076			TAILQ_REMOVE(&base->hostsdb, victim, next);
4077			mm_free(victim);
4078		}
4079	}
4080
4081	mm_free(base->req_heads);
4082
4083	EVDNS_UNLOCK(base);
4084	EVTHREAD_FREE_LOCK(base->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
4085
4086	mm_free(base);
4087}
4088
4089void
4090evdns_base_free(struct evdns_base *base, int fail_requests)
4091{
4092	EVDNS_LOCK(base);
4093	evdns_base_free_and_unlock(base, fail_requests);
4094}
4095
4096void
4097evdns_base_clear_host_addresses(struct evdns_base *base)
4098{
4099	struct hosts_entry *victim;
4100	EVDNS_LOCK(base);
4101	while ((victim = TAILQ_FIRST(&base->hostsdb))) {
4102		TAILQ_REMOVE(&base->hostsdb, victim, next);
4103		mm_free(victim);
4104	}
4105	EVDNS_UNLOCK(base);
4106}
4107
4108void
4109evdns_shutdown(int fail_requests)
4110{
4111	if (current_base) {
4112		struct evdns_base *b = current_base;
4113		current_base = NULL;
4114		evdns_base_free(b, fail_requests);
4115	}
4116	evdns_log_fn = NULL;
4117}
4118
4119static int
4120evdns_base_parse_hosts_line(struct evdns_base *base, char *line)
4121{
4122	char *strtok_state;
4123	static const char *const delims = " \t";
4124	char *const addr = strtok_r(line, delims, &strtok_state);
4125	char *hostname, *hash;
4126	struct sockaddr_storage ss;
4127	int socklen = sizeof(ss);
4128	ASSERT_LOCKED(base);
4129
4130#define NEXT_TOKEN strtok_r(NULL, delims, &strtok_state)
4131
4132	if (!addr || *addr == '#')
4133		return 0;
4134
4135	memset(&ss, 0, sizeof(ss));
4136	if (evutil_parse_sockaddr_port(addr, (struct sockaddr*)&ss, &socklen)<0)
4137		return -1;
4138	if (socklen > (int)sizeof(struct sockaddr_in6))
4139		return -1;
4140
4141	if (sockaddr_getport((struct sockaddr*)&ss))
4142		return -1;
4143
4144	while ((hostname = NEXT_TOKEN)) {
4145		struct hosts_entry *he;
4146		size_t namelen;
4147		if ((hash = strchr(hostname, '#'))) {
4148			if (hash == hostname)
4149				return 0;
4150			*hash = '\0';
4151		}
4152
4153		namelen = strlen(hostname);
4154
4155		he = mm_calloc(1, sizeof(struct hosts_entry)+namelen);
4156		if (!he)
4157			return -1;
4158		EVUTIL_ASSERT(socklen <= (int)sizeof(he->addr));
4159		memcpy(&he->addr, &ss, socklen);
4160		memcpy(he->hostname, hostname, namelen+1);
4161		he->addrlen = socklen;
4162
4163		TAILQ_INSERT_TAIL(&base->hostsdb, he, next);
4164
4165		if (hash)
4166			return 0;
4167	}
4168
4169	return 0;
4170#undef NEXT_TOKEN
4171}
4172
4173static int
4174evdns_base_load_hosts_impl(struct evdns_base *base, const char *hosts_fname)
4175{
4176	char *str=NULL, *cp, *eol;
4177	size_t len;
4178	int err=0;
4179
4180	ASSERT_LOCKED(base);
4181
4182	if (hosts_fname == NULL ||
4183	    (err = evutil_read_file_(hosts_fname, &str, &len, 0)) < 0) {
4184		char tmp[64];
4185		strlcpy(tmp, "127.0.0.1   localhost", sizeof(tmp));
4186		evdns_base_parse_hosts_line(base, tmp);
4187		strlcpy(tmp, "::1   localhost", sizeof(tmp));
4188		evdns_base_parse_hosts_line(base, tmp);
4189		return err ? -1 : 0;
4190	}
4191
4192	/* This will break early if there is a NUL in the hosts file.
4193	 * Probably not a problem.*/
4194	cp = str;
4195	for (;;) {
4196		eol = strchr(cp, '\n');
4197
4198		if (eol) {
4199			*eol = '\0';
4200			evdns_base_parse_hosts_line(base, cp);
4201			cp = eol+1;
4202		} else {
4203			evdns_base_parse_hosts_line(base, cp);
4204			break;
4205		}
4206	}
4207
4208	mm_free(str);
4209	return 0;
4210}
4211
4212int
4213evdns_base_load_hosts(struct evdns_base *base, const char *hosts_fname)
4214{
4215	int res;
4216	if (!base)
4217		base = current_base;
4218	EVDNS_LOCK(base);
4219	res = evdns_base_load_hosts_impl(base, hosts_fname);
4220	EVDNS_UNLOCK(base);
4221	return res;
4222}
4223
4224/* A single request for a getaddrinfo, either v4 or v6. */
4225struct getaddrinfo_subrequest {
4226	struct evdns_request *r;
4227	ev_uint32_t type;
4228};
4229
4230/* State data used to implement an in-progress getaddrinfo. */
4231struct evdns_getaddrinfo_request {
4232	struct evdns_base *evdns_base;
4233	/* Copy of the modified 'hints' data that we'll use to build
4234	 * answers. */
4235	struct evutil_addrinfo hints;
4236	/* The callback to invoke when we're done */
4237	evdns_getaddrinfo_cb user_cb;
4238	/* User-supplied data to give to the callback. */
4239	void *user_data;
4240	/* The port to use when building sockaddrs. */
4241	ev_uint16_t port;
4242	/* The sub_request for an A record (if any) */
4243	struct getaddrinfo_subrequest ipv4_request;
4244	/* The sub_request for an AAAA record (if any) */
4245	struct getaddrinfo_subrequest ipv6_request;
4246
4247	/* The cname result that we were told (if any) */
4248	char *cname_result;
4249
4250	/* If we have one request answered and one request still inflight,
4251	 * then this field holds the answer from the first request... */
4252	struct evutil_addrinfo *pending_result;
4253	/* And this event is a timeout that will tell us to cancel the second
4254	 * request if it's taking a long time. */
4255	struct event timeout;
4256
4257	/* And this field holds the error code from the first request... */
4258	int pending_error;
4259	/* If this is set, the user canceled this request. */
4260	unsigned user_canceled : 1;
4261	/* If this is set, the user can no longer cancel this request; we're
4262	 * just waiting for the free. */
4263	unsigned request_done : 1;
4264};
4265
4266/* Convert an evdns errors to the equivalent getaddrinfo error. */
4267static int
4268evdns_err_to_getaddrinfo_err(int e1)
4269{
4270	/* XXX Do this better! */
4271	if (e1 == DNS_ERR_NONE)
4272		return 0;
4273	else if (e1 == DNS_ERR_NOTEXIST)
4274		return EVUTIL_EAI_NONAME;
4275	else
4276		return EVUTIL_EAI_FAIL;
4277}
4278
4279/* Return the more informative of two getaddrinfo errors. */
4280static int
4281getaddrinfo_merge_err(int e1, int e2)
4282{
4283	/* XXXX be cleverer here. */
4284	if (e1 == 0)
4285		return e2;
4286	else
4287		return e1;
4288}
4289
4290static void
4291free_getaddrinfo_request(struct evdns_getaddrinfo_request *data)
4292{
4293	/* DO NOT CALL this if either of the requests is pending.  Only once
4294	 * both callbacks have been invoked is it safe to free the request */
4295	if (data->pending_result)
4296		evutil_freeaddrinfo(data->pending_result);
4297	if (data->cname_result)
4298		mm_free(data->cname_result);
4299	event_del(&data->timeout);
4300	mm_free(data);
4301	return;
4302}
4303
4304static void
4305add_cname_to_reply(struct evdns_getaddrinfo_request *data,
4306    struct evutil_addrinfo *ai)
4307{
4308	if (data->cname_result && ai) {
4309		ai->ai_canonname = data->cname_result;
4310		data->cname_result = NULL;
4311	}
4312}
4313
4314/* Callback: invoked when one request in a mixed-format A/AAAA getaddrinfo
4315 * request has finished, but the other one took too long to answer. Pass
4316 * along the answer we got, and cancel the other request.
4317 */
4318static void
4319evdns_getaddrinfo_timeout_cb(evutil_socket_t fd, short what, void *ptr)
4320{
4321	int v4_timedout = 0, v6_timedout = 0;
4322	struct evdns_getaddrinfo_request *data = ptr;
4323
4324	/* Cancel any pending requests, and note which one */
4325	if (data->ipv4_request.r) {
4326		/* XXXX This does nothing if the request's callback is already
4327		 * running (pending_cb is set). */
4328		evdns_cancel_request(NULL, data->ipv4_request.r);
4329		v4_timedout = 1;
4330		EVDNS_LOCK(data->evdns_base);
4331		++data->evdns_base->getaddrinfo_ipv4_timeouts;
4332		EVDNS_UNLOCK(data->evdns_base);
4333	}
4334	if (data->ipv6_request.r) {
4335		/* XXXX This does nothing if the request's callback is already
4336		 * running (pending_cb is set). */
4337		evdns_cancel_request(NULL, data->ipv6_request.r);
4338		v6_timedout = 1;
4339		EVDNS_LOCK(data->evdns_base);
4340		++data->evdns_base->getaddrinfo_ipv6_timeouts;
4341		EVDNS_UNLOCK(data->evdns_base);
4342	}
4343
4344	/* We only use this timeout callback when we have an answer for
4345	 * one address. */
4346	EVUTIL_ASSERT(!v4_timedout || !v6_timedout);
4347
4348	/* Report the outcome of the other request that didn't time out. */
4349	if (data->pending_result) {
4350		add_cname_to_reply(data, data->pending_result);
4351		data->user_cb(0, data->pending_result, data->user_data);
4352		data->pending_result = NULL;
4353	} else {
4354		int e = data->pending_error;
4355		if (!e)
4356			e = EVUTIL_EAI_AGAIN;
4357		data->user_cb(e, NULL, data->user_data);
4358	}
4359
4360	data->user_cb = NULL; /* prevent double-call if evdns callbacks are
4361			       * in-progress. XXXX It would be better if this
4362			       * weren't necessary. */
4363
4364	if (!v4_timedout && !v6_timedout) {
4365		/* should be impossible? XXXX */
4366		free_getaddrinfo_request(data);
4367	}
4368}
4369
4370static int
4371evdns_getaddrinfo_set_timeout(struct evdns_base *evdns_base,
4372    struct evdns_getaddrinfo_request *data)
4373{
4374	return event_add(&data->timeout, &evdns_base->global_getaddrinfo_allow_skew);
4375}
4376
4377static inline int
4378evdns_result_is_answer(int result)
4379{
4380	return (result != DNS_ERR_NOTIMPL && result != DNS_ERR_REFUSED &&
4381	    result != DNS_ERR_SERVERFAILED && result != DNS_ERR_CANCEL);
4382}
4383
4384static void
4385evdns_getaddrinfo_gotresolve(int result, char type, int count,
4386    int ttl, void *addresses, void *arg)
4387{
4388	int i;
4389	struct getaddrinfo_subrequest *req = arg;
4390	struct getaddrinfo_subrequest *other_req;
4391	struct evdns_getaddrinfo_request *data;
4392
4393	struct evutil_addrinfo *res;
4394
4395	struct sockaddr_in sin;
4396	struct sockaddr_in6 sin6;
4397	struct sockaddr *sa;
4398	int socklen, addrlen;
4399	void *addrp;
4400	int err;
4401	int user_canceled;
4402
4403	EVUTIL_ASSERT(req->type == DNS_IPv4_A || req->type == DNS_IPv6_AAAA);
4404	if (req->type == DNS_IPv4_A) {
4405		data = EVUTIL_UPCAST(req, struct evdns_getaddrinfo_request, ipv4_request);
4406		other_req = &data->ipv6_request;
4407	} else {
4408		data = EVUTIL_UPCAST(req, struct evdns_getaddrinfo_request, ipv6_request);
4409		other_req = &data->ipv4_request;
4410	}
4411
4412	/** Called from evdns_base_free() with @fail_requests == 1 */
4413	if (result != DNS_ERR_SHUTDOWN) {
4414		EVDNS_LOCK(data->evdns_base);
4415		if (evdns_result_is_answer(result)) {
4416			if (req->type == DNS_IPv4_A)
4417				++data->evdns_base->getaddrinfo_ipv4_answered;
4418			else
4419				++data->evdns_base->getaddrinfo_ipv6_answered;
4420		}
4421		user_canceled = data->user_canceled;
4422		if (other_req->r == NULL)
4423			data->request_done = 1;
4424		EVDNS_UNLOCK(data->evdns_base);
4425	} else {
4426		data->evdns_base = NULL;
4427		user_canceled = data->user_canceled;
4428	}
4429
4430	req->r = NULL;
4431
4432	if (result == DNS_ERR_CANCEL && ! user_canceled) {
4433		/* Internal cancel request from timeout or internal error.
4434		 * we already answered the user. */
4435		if (other_req->r == NULL)
4436			free_getaddrinfo_request(data);
4437		return;
4438	}
4439
4440	if (data->user_cb == NULL) {
4441		/* We already answered.  XXXX This shouldn't be needed; see
4442		 * comments in evdns_getaddrinfo_timeout_cb */
4443		free_getaddrinfo_request(data);
4444		return;
4445	}
4446
4447	if (result == DNS_ERR_NONE) {
4448		if (count == 0)
4449			err = EVUTIL_EAI_NODATA;
4450		else
4451			err = 0;
4452	} else {
4453		err = evdns_err_to_getaddrinfo_err(result);
4454	}
4455
4456	if (err) {
4457		/* Looks like we got an error. */
4458		if (other_req->r) {
4459			/* The other request is still working; maybe it will
4460			 * succeed. */
4461			/* XXXX handle failure from set_timeout */
4462			if (result != DNS_ERR_SHUTDOWN) {
4463				evdns_getaddrinfo_set_timeout(data->evdns_base, data);
4464			}
4465			data->pending_error = err;
4466			return;
4467		}
4468
4469		if (user_canceled) {
4470			data->user_cb(EVUTIL_EAI_CANCEL, NULL, data->user_data);
4471		} else if (data->pending_result) {
4472			/* If we have an answer waiting, and we weren't
4473			 * canceled, ignore this error. */
4474			add_cname_to_reply(data, data->pending_result);
4475			data->user_cb(0, data->pending_result, data->user_data);
4476			data->pending_result = NULL;
4477		} else {
4478			if (data->pending_error)
4479				err = getaddrinfo_merge_err(err,
4480				    data->pending_error);
4481			data->user_cb(err, NULL, data->user_data);
4482		}
4483		free_getaddrinfo_request(data);
4484		return;
4485	} else if (user_canceled) {
4486		if (other_req->r) {
4487			/* The other request is still working; let it hit this
4488			 * callback with EVUTIL_EAI_CANCEL callback and report
4489			 * the failure. */
4490			return;
4491		}
4492		data->user_cb(EVUTIL_EAI_CANCEL, NULL, data->user_data);
4493		free_getaddrinfo_request(data);
4494		return;
4495	}
4496
4497	/* Looks like we got some answers. We should turn them into addrinfos
4498	 * and then either queue those or return them all. */
4499	EVUTIL_ASSERT(type == DNS_IPv4_A || type == DNS_IPv6_AAAA);
4500
4501	if (type == DNS_IPv4_A) {
4502		memset(&sin, 0, sizeof(sin));
4503		sin.sin_family = AF_INET;
4504		sin.sin_port = htons(data->port);
4505
4506		sa = (struct sockaddr *)&sin;
4507		socklen = sizeof(sin);
4508		addrlen = 4;
4509		addrp = &sin.sin_addr.s_addr;
4510	} else {
4511		memset(&sin6, 0, sizeof(sin6));
4512		sin6.sin6_family = AF_INET6;
4513		sin6.sin6_port = htons(data->port);
4514
4515		sa = (struct sockaddr *)&sin6;
4516		socklen = sizeof(sin6);
4517		addrlen = 16;
4518		addrp = &sin6.sin6_addr.s6_addr;
4519	}
4520
4521	res = NULL;
4522	for (i=0; i < count; ++i) {
4523		struct evutil_addrinfo *ai;
4524		memcpy(addrp, ((char*)addresses)+i*addrlen, addrlen);
4525		ai = evutil_new_addrinfo_(sa, socklen, &data->hints);
4526		if (!ai) {
4527			if (other_req->r) {
4528				evdns_cancel_request(NULL, other_req->r);
4529			}
4530			data->user_cb(EVUTIL_EAI_MEMORY, NULL, data->user_data);
4531			if (res)
4532				evutil_freeaddrinfo(res);
4533
4534			if (other_req->r == NULL)
4535				free_getaddrinfo_request(data);
4536			return;
4537		}
4538		res = evutil_addrinfo_append_(res, ai);
4539	}
4540
4541	if (other_req->r) {
4542		/* The other request is still in progress; wait for it */
4543		/* XXXX handle failure from set_timeout */
4544		evdns_getaddrinfo_set_timeout(data->evdns_base, data);
4545		data->pending_result = res;
4546		return;
4547	} else {
4548		/* The other request is done or never started; append its
4549		 * results (if any) and return them. */
4550		if (data->pending_result) {
4551			if (req->type == DNS_IPv4_A)
4552				res = evutil_addrinfo_append_(res,
4553				    data->pending_result);
4554			else
4555				res = evutil_addrinfo_append_(
4556				    data->pending_result, res);
4557			data->pending_result = NULL;
4558		}
4559
4560		/* Call the user callback. */
4561		add_cname_to_reply(data, res);
4562		data->user_cb(0, res, data->user_data);
4563
4564		/* Free data. */
4565		free_getaddrinfo_request(data);
4566	}
4567}
4568
4569static struct hosts_entry *
4570find_hosts_entry(struct evdns_base *base, const char *hostname,
4571    struct hosts_entry *find_after)
4572{
4573	struct hosts_entry *e;
4574
4575	if (find_after)
4576		e = TAILQ_NEXT(find_after, next);
4577	else
4578		e = TAILQ_FIRST(&base->hostsdb);
4579
4580	for (; e; e = TAILQ_NEXT(e, next)) {
4581		if (!evutil_ascii_strcasecmp(e->hostname, hostname))
4582			return e;
4583	}
4584	return NULL;
4585}
4586
4587static int
4588evdns_getaddrinfo_fromhosts(struct evdns_base *base,
4589    const char *nodename, struct evutil_addrinfo *hints, ev_uint16_t port,
4590    struct evutil_addrinfo **res)
4591{
4592	int n_found = 0;
4593	struct hosts_entry *e;
4594	struct evutil_addrinfo *ai=NULL;
4595	int f = hints->ai_family;
4596
4597	EVDNS_LOCK(base);
4598	for (e = find_hosts_entry(base, nodename, NULL); e;
4599	    e = find_hosts_entry(base, nodename, e)) {
4600		struct evutil_addrinfo *ai_new;
4601		++n_found;
4602		if ((e->addr.sa.sa_family == AF_INET && f == PF_INET6) ||
4603		    (e->addr.sa.sa_family == AF_INET6 && f == PF_INET))
4604			continue;
4605		ai_new = evutil_new_addrinfo_(&e->addr.sa, e->addrlen, hints);
4606		if (!ai_new) {
4607			n_found = 0;
4608			goto out;
4609		}
4610		sockaddr_setport(ai_new->ai_addr, port);
4611		ai = evutil_addrinfo_append_(ai, ai_new);
4612	}
4613	EVDNS_UNLOCK(base);
4614out:
4615	if (n_found) {
4616		/* Note that we return an empty answer if we found entries for
4617		 * this hostname but none were of the right address type. */
4618		*res = ai;
4619		return 0;
4620	} else {
4621		if (ai)
4622			evutil_freeaddrinfo(ai);
4623		return -1;
4624	}
4625}
4626
4627struct evdns_getaddrinfo_request *
4628evdns_getaddrinfo(struct evdns_base *dns_base,
4629    const char *nodename, const char *servname,
4630    const struct evutil_addrinfo *hints_in,
4631    evdns_getaddrinfo_cb cb, void *arg)
4632{
4633	struct evdns_getaddrinfo_request *data;
4634	struct evutil_addrinfo hints;
4635	struct evutil_addrinfo *res = NULL;
4636	int err;
4637	int port = 0;
4638	int want_cname = 0;
4639
4640	if (!dns_base) {
4641		dns_base = current_base;
4642		if (!dns_base) {
4643			log(EVDNS_LOG_WARN,
4644			    "Call to getaddrinfo_async with no "
4645			    "evdns_base configured.");
4646			cb(EVUTIL_EAI_FAIL, NULL, arg); /* ??? better error? */
4647			return NULL;
4648		}
4649	}
4650
4651	/* If we _must_ answer this immediately, do so. */
4652	if ((hints_in && (hints_in->ai_flags & EVUTIL_AI_NUMERICHOST))) {
4653		res = NULL;
4654		err = evutil_getaddrinfo(nodename, servname, hints_in, &res);
4655		cb(err, res, arg);
4656		return NULL;
4657	}
4658
4659	if (hints_in) {
4660		memcpy(&hints, hints_in, sizeof(hints));
4661	} else {
4662		memset(&hints, 0, sizeof(hints));
4663		hints.ai_family = PF_UNSPEC;
4664	}
4665
4666	evutil_adjust_hints_for_addrconfig_(&hints);
4667
4668	/* Now try to see if we _can_ answer immediately. */
4669	/* (It would be nice to do this by calling getaddrinfo directly, with
4670	 * AI_NUMERICHOST, on plaforms that have it, but we can't: there isn't
4671	 * a reliable way to distinguish the "that wasn't a numeric host!" case
4672	 * from any other EAI_NONAME cases.) */
4673	err = evutil_getaddrinfo_common_(nodename, servname, &hints, &res, &port);
4674	if (err != EVUTIL_EAI_NEED_RESOLVE) {
4675		cb(err, res, arg);
4676		return NULL;
4677	}
4678
4679	/* If there is an entry in the hosts file, we should give it now. */
4680	if (!evdns_getaddrinfo_fromhosts(dns_base, nodename, &hints, port, &res)) {
4681		cb(0, res, arg);
4682		return NULL;
4683	}
4684
4685	/* Okay, things are serious now. We're going to need to actually
4686	 * launch a request.
4687	 */
4688	data = mm_calloc(1,sizeof(struct evdns_getaddrinfo_request));
4689	if (!data) {
4690		cb(EVUTIL_EAI_MEMORY, NULL, arg);
4691		return NULL;
4692	}
4693
4694	memcpy(&data->hints, &hints, sizeof(data->hints));
4695	data->port = (ev_uint16_t)port;
4696	data->ipv4_request.type = DNS_IPv4_A;
4697	data->ipv6_request.type = DNS_IPv6_AAAA;
4698	data->user_cb = cb;
4699	data->user_data = arg;
4700	data->evdns_base = dns_base;
4701
4702	want_cname = (hints.ai_flags & EVUTIL_AI_CANONNAME);
4703
4704	/* If we are asked for a PF_UNSPEC address, we launch two requests in
4705	 * parallel: one for an A address and one for an AAAA address.  We
4706	 * can't send just one request, since many servers only answer one
4707	 * question per DNS request.
4708	 *
4709	 * Once we have the answer to one request, we allow for a short
4710	 * timeout before we report it, to see if the other one arrives.  If
4711	 * they both show up in time, then we report both the answers.
4712	 *
4713	 * If too many addresses of one type time out or fail, we should stop
4714	 * launching those requests. (XXX we don't do that yet.)
4715	 */
4716
4717	if (hints.ai_family != PF_INET6) {
4718		log(EVDNS_LOG_DEBUG, "Sending request for %s on ipv4 as %p",
4719		    nodename, &data->ipv4_request);
4720
4721		data->ipv4_request.r = evdns_base_resolve_ipv4(dns_base,
4722		    nodename, 0, evdns_getaddrinfo_gotresolve,
4723		    &data->ipv4_request);
4724		if (want_cname && data->ipv4_request.r)
4725			data->ipv4_request.r->current_req->put_cname_in_ptr =
4726			    &data->cname_result;
4727	}
4728	if (hints.ai_family != PF_INET) {
4729		log(EVDNS_LOG_DEBUG, "Sending request for %s on ipv6 as %p",
4730		    nodename, &data->ipv6_request);
4731
4732		data->ipv6_request.r = evdns_base_resolve_ipv6(dns_base,
4733		    nodename, 0, evdns_getaddrinfo_gotresolve,
4734		    &data->ipv6_request);
4735		if (want_cname && data->ipv6_request.r)
4736			data->ipv6_request.r->current_req->put_cname_in_ptr =
4737			    &data->cname_result;
4738	}
4739
4740	evtimer_assign(&data->timeout, dns_base->event_base,
4741	    evdns_getaddrinfo_timeout_cb, data);
4742
4743	if (data->ipv4_request.r || data->ipv6_request.r) {
4744		return data;
4745	} else {
4746		mm_free(data);
4747		cb(EVUTIL_EAI_FAIL, NULL, arg);
4748		return NULL;
4749	}
4750}
4751
4752void
4753evdns_getaddrinfo_cancel(struct evdns_getaddrinfo_request *data)
4754{
4755	EVDNS_LOCK(data->evdns_base);
4756	if (data->request_done) {
4757		EVDNS_UNLOCK(data->evdns_base);
4758		return;
4759	}
4760	event_del(&data->timeout);
4761	data->user_canceled = 1;
4762	if (data->ipv4_request.r)
4763		evdns_cancel_request(data->evdns_base, data->ipv4_request.r);
4764	if (data->ipv6_request.r)
4765		evdns_cancel_request(data->evdns_base, data->ipv6_request.r);
4766	EVDNS_UNLOCK(data->evdns_base);
4767}
4768