1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD$");
30
31#include <sys/param.h>
32#ifndef WITHOUT_CAPSICUM
33#include <sys/capsicum.h>
34#endif
35#include <sys/endian.h>
36#include <sys/ioctl.h>
37#include <sys/mman.h>
38#include <sys/queue.h>
39#include <sys/socket.h>
40#include <machine/atomic.h>
41#include <machine/specialreg.h>
42#include <machine/vmm.h>
43#include <netinet/in.h>
44#include <assert.h>
45#ifndef WITHOUT_CAPSICUM
46#include <capsicum_helpers.h>
47#endif
48#include <err.h>
49#include <errno.h>
50#include <fcntl.h>
51#include <pthread.h>
52#include <pthread_np.h>
53#include <stdbool.h>
54#include <stdio.h>
55#include <stdlib.h>
56#include <string.h>
57#include <sysexits.h>
58#include <unistd.h>
59#include <vmmapi.h>
60
61#include "bhyverun.h"
62#include "gdb.h"
63#include "mem.h"
64#include "mevent.h"
65
66/*
67 * GDB_SIGNAL_* numbers are part of the GDB remote protocol.  Most stops
68 * use SIGTRAP.
69 */
70#define	GDB_SIGNAL_TRAP		5
71
72static void gdb_resume_vcpus(void);
73static void check_command(int fd);
74
75static struct mevent *read_event, *write_event;
76
77static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting;
78static pthread_mutex_t gdb_lock;
79static pthread_cond_t idle_vcpus;
80static bool first_stop, report_next_stop, swbreak_enabled;
81
82/*
83 * An I/O buffer contains 'capacity' bytes of room at 'data'.  For a
84 * read buffer, 'start' is unused and 'len' contains the number of
85 * valid bytes in the buffer.  For a write buffer, 'start' is set to
86 * the index of the next byte in 'data' to send, and 'len' contains
87 * the remaining number of valid bytes to send.
88 */
89struct io_buffer {
90	uint8_t *data;
91	size_t capacity;
92	size_t start;
93	size_t len;
94};
95
96struct breakpoint {
97	uint64_t gpa;
98	uint8_t shadow_inst;
99	TAILQ_ENTRY(breakpoint) link;
100};
101
102/*
103 * When a vCPU stops to due to an event that should be reported to the
104 * debugger, information about the event is stored in this structure.
105 * The vCPU thread then sets 'stopped_vcpu' if it is not already set
106 * and stops other vCPUs so the event can be reported.  The
107 * report_stop() function reports the event for the 'stopped_vcpu'
108 * vCPU.  When the debugger resumes execution via continue or step,
109 * the event for 'stopped_vcpu' is cleared.  vCPUs will loop in their
110 * event handlers until the associated event is reported or disabled.
111 *
112 * An idle vCPU will have all of the boolean fields set to false.
113 *
114 * When a vCPU is stepped, 'stepping' is set to true when the vCPU is
115 * released to execute the stepped instruction.  When the vCPU reports
116 * the stepping trap, 'stepped' is set.
117 *
118 * When a vCPU hits a breakpoint set by the debug server,
119 * 'hit_swbreak' is set to true.
120 */
121struct vcpu_state {
122	bool stepping;
123	bool stepped;
124	bool hit_swbreak;
125};
126
127static struct io_buffer cur_comm, cur_resp;
128static uint8_t cur_csum;
129static struct vmctx *ctx;
130static int cur_fd = -1;
131static TAILQ_HEAD(, breakpoint) breakpoints;
132static struct vcpu_state *vcpu_state;
133static int cur_vcpu, stopped_vcpu;
134
135const int gdb_regset[] = {
136	VM_REG_GUEST_RAX,
137	VM_REG_GUEST_RBX,
138	VM_REG_GUEST_RCX,
139	VM_REG_GUEST_RDX,
140	VM_REG_GUEST_RSI,
141	VM_REG_GUEST_RDI,
142	VM_REG_GUEST_RBP,
143	VM_REG_GUEST_RSP,
144	VM_REG_GUEST_R8,
145	VM_REG_GUEST_R9,
146	VM_REG_GUEST_R10,
147	VM_REG_GUEST_R11,
148	VM_REG_GUEST_R12,
149	VM_REG_GUEST_R13,
150	VM_REG_GUEST_R14,
151	VM_REG_GUEST_R15,
152	VM_REG_GUEST_RIP,
153	VM_REG_GUEST_RFLAGS,
154	VM_REG_GUEST_CS,
155	VM_REG_GUEST_SS,
156	VM_REG_GUEST_DS,
157	VM_REG_GUEST_ES,
158	VM_REG_GUEST_FS,
159	VM_REG_GUEST_GS
160};
161
162const int gdb_regsize[] = {
163	8,
164	8,
165	8,
166	8,
167	8,
168	8,
169	8,
170	8,
171	8,
172	8,
173	8,
174	8,
175	8,
176	8,
177	8,
178	8,
179	8,
180	4,
181	4,
182	4,
183	4,
184	4,
185	4,
186	4
187};
188
189#ifdef GDB_LOG
190#include <stdarg.h>
191#include <stdio.h>
192
193static void __printflike(1, 2)
194debug(const char *fmt, ...)
195{
196	static FILE *logfile;
197	va_list ap;
198
199	if (logfile == NULL) {
200		logfile = fopen("/tmp/bhyve_gdb.log", "w");
201		if (logfile == NULL)
202			return;
203#ifndef WITHOUT_CAPSICUM
204		if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) {
205			fclose(logfile);
206			logfile = NULL;
207			return;
208		}
209#endif
210		setlinebuf(logfile);
211	}
212	va_start(ap, fmt);
213	vfprintf(logfile, fmt, ap);
214	va_end(ap);
215}
216#else
217#define debug(...)
218#endif
219
220static void	remove_all_sw_breakpoints(void);
221
222static int
223guest_paging_info(int vcpu, struct vm_guest_paging *paging)
224{
225	uint64_t regs[4];
226	const int regset[4] = {
227		VM_REG_GUEST_CR0,
228		VM_REG_GUEST_CR3,
229		VM_REG_GUEST_CR4,
230		VM_REG_GUEST_EFER
231	};
232
233	if (vm_get_register_set(ctx, vcpu, nitems(regset), regset, regs) == -1)
234		return (-1);
235
236	/*
237	 * For the debugger, always pretend to be the kernel (CPL 0),
238	 * and if long-mode is enabled, always parse addresses as if
239	 * in 64-bit mode.
240	 */
241	paging->cr3 = regs[1];
242	paging->cpl = 0;
243	if (regs[3] & EFER_LMA)
244		paging->cpu_mode = CPU_MODE_64BIT;
245	else if (regs[0] & CR0_PE)
246		paging->cpu_mode = CPU_MODE_PROTECTED;
247	else
248		paging->cpu_mode = CPU_MODE_REAL;
249	if (!(regs[0] & CR0_PG))
250		paging->paging_mode = PAGING_MODE_FLAT;
251	else if (!(regs[2] & CR4_PAE))
252		paging->paging_mode = PAGING_MODE_32;
253	else if (regs[3] & EFER_LME)
254		paging->paging_mode = (regs[2] & CR4_LA57) ?
255		    PAGING_MODE_64_LA57 :  PAGING_MODE_64;
256	else
257		paging->paging_mode = PAGING_MODE_PAE;
258	return (0);
259}
260
261/*
262 * Map a guest virtual address to a physical address (for a given vcpu).
263 * If a guest virtual address is valid, return 1.  If the address is
264 * not valid, return 0.  If an error occurs obtaining the mapping,
265 * return -1.
266 */
267static int
268guest_vaddr2paddr(int vcpu, uint64_t vaddr, uint64_t *paddr)
269{
270	struct vm_guest_paging paging;
271	int fault;
272
273	if (guest_paging_info(vcpu, &paging) == -1)
274		return (-1);
275
276	/*
277	 * Always use PROT_READ.  We really care if the VA is
278	 * accessible, not if the current vCPU can write.
279	 */
280	if (vm_gla2gpa_nofault(ctx, vcpu, &paging, vaddr, PROT_READ, paddr,
281	    &fault) == -1)
282		return (-1);
283	if (fault)
284		return (0);
285	return (1);
286}
287
288static void
289io_buffer_reset(struct io_buffer *io)
290{
291
292	io->start = 0;
293	io->len = 0;
294}
295
296/* Available room for adding data. */
297static size_t
298io_buffer_avail(struct io_buffer *io)
299{
300
301	return (io->capacity - (io->start + io->len));
302}
303
304static uint8_t *
305io_buffer_head(struct io_buffer *io)
306{
307
308	return (io->data + io->start);
309}
310
311static uint8_t *
312io_buffer_tail(struct io_buffer *io)
313{
314
315	return (io->data + io->start + io->len);
316}
317
318static void
319io_buffer_advance(struct io_buffer *io, size_t amount)
320{
321
322	assert(amount <= io->len);
323	io->start += amount;
324	io->len -= amount;
325}
326
327static void
328io_buffer_consume(struct io_buffer *io, size_t amount)
329{
330
331	io_buffer_advance(io, amount);
332	if (io->len == 0) {
333		io->start = 0;
334		return;
335	}
336
337	/*
338	 * XXX: Consider making this move optional and compacting on a
339	 * future read() before realloc().
340	 */
341	memmove(io->data, io_buffer_head(io), io->len);
342	io->start = 0;
343}
344
345static void
346io_buffer_grow(struct io_buffer *io, size_t newsize)
347{
348	uint8_t *new_data;
349	size_t avail, new_cap;
350
351	avail = io_buffer_avail(io);
352	if (newsize <= avail)
353		return;
354
355	new_cap = io->capacity + (newsize - avail);
356	new_data = realloc(io->data, new_cap);
357	if (new_data == NULL)
358		err(1, "Failed to grow GDB I/O buffer");
359	io->data = new_data;
360	io->capacity = new_cap;
361}
362
363static bool
364response_pending(void)
365{
366
367	if (cur_resp.start == 0 && cur_resp.len == 0)
368		return (false);
369	if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+')
370		return (false);
371	return (true);
372}
373
374static void
375close_connection(void)
376{
377
378	/*
379	 * XXX: This triggers a warning because mevent does the close
380	 * before the EV_DELETE.
381	 */
382	pthread_mutex_lock(&gdb_lock);
383	mevent_delete(write_event);
384	mevent_delete_close(read_event);
385	write_event = NULL;
386	read_event = NULL;
387	io_buffer_reset(&cur_comm);
388	io_buffer_reset(&cur_resp);
389	cur_fd = -1;
390
391	remove_all_sw_breakpoints();
392
393	/* Clear any pending events. */
394	memset(vcpu_state, 0, guest_ncpus * sizeof(*vcpu_state));
395
396	/* Resume any stopped vCPUs. */
397	gdb_resume_vcpus();
398	pthread_mutex_unlock(&gdb_lock);
399}
400
401static uint8_t
402hex_digit(uint8_t nibble)
403{
404
405	if (nibble <= 9)
406		return (nibble + '0');
407	else
408		return (nibble + 'a' - 10);
409}
410
411static uint8_t
412parse_digit(uint8_t v)
413{
414
415	if (v >= '0' && v <= '9')
416		return (v - '0');
417	if (v >= 'a' && v <= 'f')
418		return (v - 'a' + 10);
419	if (v >= 'A' && v <= 'F')
420		return (v - 'A' + 10);
421	return (0xF);
422}
423
424/* Parses big-endian hexadecimal. */
425static uintmax_t
426parse_integer(const uint8_t *p, size_t len)
427{
428	uintmax_t v;
429
430	v = 0;
431	while (len > 0) {
432		v <<= 4;
433		v |= parse_digit(*p);
434		p++;
435		len--;
436	}
437	return (v);
438}
439
440static uint8_t
441parse_byte(const uint8_t *p)
442{
443
444	return (parse_digit(p[0]) << 4 | parse_digit(p[1]));
445}
446
447static void
448send_pending_data(int fd)
449{
450	ssize_t nwritten;
451
452	if (cur_resp.len == 0) {
453		mevent_disable(write_event);
454		return;
455	}
456	nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len);
457	if (nwritten == -1) {
458		warn("Write to GDB socket failed");
459		close_connection();
460	} else {
461		io_buffer_advance(&cur_resp, nwritten);
462		if (cur_resp.len == 0)
463			mevent_disable(write_event);
464		else
465			mevent_enable(write_event);
466	}
467}
468
469/* Append a single character to the output buffer. */
470static void
471send_char(uint8_t data)
472{
473	io_buffer_grow(&cur_resp, 1);
474	*io_buffer_tail(&cur_resp) = data;
475	cur_resp.len++;
476}
477
478/* Append an array of bytes to the output buffer. */
479static void
480send_data(const uint8_t *data, size_t len)
481{
482
483	io_buffer_grow(&cur_resp, len);
484	memcpy(io_buffer_tail(&cur_resp), data, len);
485	cur_resp.len += len;
486}
487
488static void
489format_byte(uint8_t v, uint8_t *buf)
490{
491
492	buf[0] = hex_digit(v >> 4);
493	buf[1] = hex_digit(v & 0xf);
494}
495
496/*
497 * Append a single byte (formatted as two hex characters) to the
498 * output buffer.
499 */
500static void
501send_byte(uint8_t v)
502{
503	uint8_t buf[2];
504
505	format_byte(v, buf);
506	send_data(buf, sizeof(buf));
507}
508
509static void
510start_packet(void)
511{
512
513	send_char('$');
514	cur_csum = 0;
515}
516
517static void
518finish_packet(void)
519{
520
521	send_char('#');
522	send_byte(cur_csum);
523	debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp));
524}
525
526/*
527 * Append a single character (for the packet payload) and update the
528 * checksum.
529 */
530static void
531append_char(uint8_t v)
532{
533
534	send_char(v);
535	cur_csum += v;
536}
537
538/*
539 * Append an array of bytes (for the packet payload) and update the
540 * checksum.
541 */
542static void
543append_packet_data(const uint8_t *data, size_t len)
544{
545
546	send_data(data, len);
547	while (len > 0) {
548		cur_csum += *data;
549		data++;
550		len--;
551	}
552}
553
554static void
555append_string(const char *str)
556{
557
558	append_packet_data(str, strlen(str));
559}
560
561static void
562append_byte(uint8_t v)
563{
564	uint8_t buf[2];
565
566	format_byte(v, buf);
567	append_packet_data(buf, sizeof(buf));
568}
569
570static void
571append_unsigned_native(uintmax_t value, size_t len)
572{
573	size_t i;
574
575	for (i = 0; i < len; i++) {
576		append_byte(value);
577		value >>= 8;
578	}
579}
580
581static void
582append_unsigned_be(uintmax_t value, size_t len)
583{
584	char buf[len * 2];
585	size_t i;
586
587	for (i = 0; i < len; i++) {
588		format_byte(value, buf + (len - i - 1) * 2);
589		value >>= 8;
590	}
591	append_packet_data(buf, sizeof(buf));
592}
593
594static void
595append_integer(unsigned int value)
596{
597
598	if (value == 0)
599		append_char('0');
600	else
601		append_unsigned_be(value, (fls(value) + 7) / 8);
602}
603
604static void
605append_asciihex(const char *str)
606{
607
608	while (*str != '\0') {
609		append_byte(*str);
610		str++;
611	}
612}
613
614static void
615send_empty_response(void)
616{
617
618	start_packet();
619	finish_packet();
620}
621
622static void
623send_error(int error)
624{
625
626	start_packet();
627	append_char('E');
628	append_byte(error);
629	finish_packet();
630}
631
632static void
633send_ok(void)
634{
635
636	start_packet();
637	append_string("OK");
638	finish_packet();
639}
640
641static int
642parse_threadid(const uint8_t *data, size_t len)
643{
644
645	if (len == 1 && *data == '0')
646		return (0);
647	if (len == 2 && memcmp(data, "-1", 2) == 0)
648		return (-1);
649	if (len == 0)
650		return (-2);
651	return (parse_integer(data, len));
652}
653
654/*
655 * Report the current stop event to the debugger.  If the stop is due
656 * to an event triggered on a specific vCPU such as a breakpoint or
657 * stepping trap, stopped_vcpu will be set to the vCPU triggering the
658 * stop.  If 'set_cur_vcpu' is true, then cur_vcpu will be updated to
659 * the reporting vCPU for vCPU events.
660 */
661static void
662report_stop(bool set_cur_vcpu)
663{
664	struct vcpu_state *vs;
665
666	start_packet();
667	if (stopped_vcpu == -1) {
668		append_char('S');
669		append_byte(GDB_SIGNAL_TRAP);
670	} else {
671		vs = &vcpu_state[stopped_vcpu];
672		if (set_cur_vcpu)
673			cur_vcpu = stopped_vcpu;
674		append_char('T');
675		append_byte(GDB_SIGNAL_TRAP);
676		append_string("thread:");
677		append_integer(stopped_vcpu + 1);
678		append_char(';');
679		if (vs->hit_swbreak) {
680			debug("$vCPU %d reporting swbreak\n", stopped_vcpu);
681			if (swbreak_enabled)
682				append_string("swbreak:;");
683		} else if (vs->stepped)
684			debug("$vCPU %d reporting step\n", stopped_vcpu);
685		else
686			debug("$vCPU %d reporting ???\n", stopped_vcpu);
687	}
688	finish_packet();
689	report_next_stop = false;
690}
691
692/*
693 * If this stop is due to a vCPU event, clear that event to mark it as
694 * acknowledged.
695 */
696static void
697discard_stop(void)
698{
699	struct vcpu_state *vs;
700
701	if (stopped_vcpu != -1) {
702		vs = &vcpu_state[stopped_vcpu];
703		vs->hit_swbreak = false;
704		vs->stepped = false;
705		stopped_vcpu = -1;
706	}
707	report_next_stop = true;
708}
709
710static void
711gdb_finish_suspend_vcpus(void)
712{
713
714	if (first_stop) {
715		first_stop = false;
716		stopped_vcpu = -1;
717	} else if (report_next_stop) {
718		assert(!response_pending());
719		report_stop(true);
720		send_pending_data(cur_fd);
721	}
722}
723
724/*
725 * vCPU threads invoke this function whenever the vCPU enters the
726 * debug server to pause or report an event.  vCPU threads wait here
727 * as long as the debug server keeps them suspended.
728 */
729static void
730_gdb_cpu_suspend(int vcpu, bool report_stop)
731{
732
733	debug("$vCPU %d suspending\n", vcpu);
734	CPU_SET(vcpu, &vcpus_waiting);
735	if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
736		gdb_finish_suspend_vcpus();
737	while (CPU_ISSET(vcpu, &vcpus_suspended))
738		pthread_cond_wait(&idle_vcpus, &gdb_lock);
739	CPU_CLR(vcpu, &vcpus_waiting);
740	debug("$vCPU %d resuming\n", vcpu);
741}
742
743/*
744 * Invoked at the start of a vCPU thread's execution to inform the
745 * debug server about the new thread.
746 */
747void
748gdb_cpu_add(int vcpu)
749{
750
751	debug("$vCPU %d starting\n", vcpu);
752	pthread_mutex_lock(&gdb_lock);
753	assert(vcpu < guest_ncpus);
754	CPU_SET(vcpu, &vcpus_active);
755	if (!TAILQ_EMPTY(&breakpoints)) {
756		vm_set_capability(ctx, vcpu, VM_CAP_BPT_EXIT, 1);
757		debug("$vCPU %d enabled breakpoint exits\n", vcpu);
758	}
759
760	/*
761	 * If a vcpu is added while vcpus are stopped, suspend the new
762	 * vcpu so that it will pop back out with a debug exit before
763	 * executing the first instruction.
764	 */
765	if (!CPU_EMPTY(&vcpus_suspended)) {
766		CPU_SET(vcpu, &vcpus_suspended);
767		_gdb_cpu_suspend(vcpu, false);
768	}
769	pthread_mutex_unlock(&gdb_lock);
770}
771
772/*
773 * Invoked by vCPU before resuming execution.  This enables stepping
774 * if the vCPU is marked as stepping.
775 */
776static void
777gdb_cpu_resume(int vcpu)
778{
779	struct vcpu_state *vs;
780	int error;
781
782	vs = &vcpu_state[vcpu];
783
784	/*
785	 * Any pending event should already be reported before
786	 * resuming.
787	 */
788	assert(vs->hit_swbreak == false);
789	assert(vs->stepped == false);
790	if (vs->stepping) {
791		error = vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 1);
792		assert(error == 0);
793	}
794}
795
796/*
797 * Handler for VM_EXITCODE_DEBUG used to suspend a vCPU when the guest
798 * has been suspended due to an event on different vCPU or in response
799 * to a guest-wide suspend such as Ctrl-C or the stop on attach.
800 */
801void
802gdb_cpu_suspend(int vcpu)
803{
804
805	pthread_mutex_lock(&gdb_lock);
806	_gdb_cpu_suspend(vcpu, true);
807	gdb_cpu_resume(vcpu);
808	pthread_mutex_unlock(&gdb_lock);
809}
810
811static void
812gdb_suspend_vcpus(void)
813{
814
815	assert(pthread_mutex_isowned_np(&gdb_lock));
816	debug("suspending all CPUs\n");
817	vcpus_suspended = vcpus_active;
818	vm_suspend_cpu(ctx, -1);
819	if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
820		gdb_finish_suspend_vcpus();
821}
822
823/*
824 * Handler for VM_EXITCODE_MTRAP reported when a vCPU single-steps via
825 * the VT-x-specific MTRAP exit.
826 */
827void
828gdb_cpu_mtrap(int vcpu)
829{
830	struct vcpu_state *vs;
831
832	debug("$vCPU %d MTRAP\n", vcpu);
833	pthread_mutex_lock(&gdb_lock);
834	vs = &vcpu_state[vcpu];
835	if (vs->stepping) {
836		vs->stepping = false;
837		vs->stepped = true;
838		vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 0);
839		while (vs->stepped) {
840			if (stopped_vcpu == -1) {
841				debug("$vCPU %d reporting step\n", vcpu);
842				stopped_vcpu = vcpu;
843				gdb_suspend_vcpus();
844			}
845			_gdb_cpu_suspend(vcpu, true);
846		}
847		gdb_cpu_resume(vcpu);
848	}
849	pthread_mutex_unlock(&gdb_lock);
850}
851
852static struct breakpoint *
853find_breakpoint(uint64_t gpa)
854{
855	struct breakpoint *bp;
856
857	TAILQ_FOREACH(bp, &breakpoints, link) {
858		if (bp->gpa == gpa)
859			return (bp);
860	}
861	return (NULL);
862}
863
864void
865gdb_cpu_breakpoint(int vcpu, struct vm_exit *vmexit)
866{
867	struct breakpoint *bp;
868	struct vcpu_state *vs;
869	uint64_t gpa;
870	int error;
871
872	pthread_mutex_lock(&gdb_lock);
873	error = guest_vaddr2paddr(vcpu, vmexit->rip, &gpa);
874	assert(error == 1);
875	bp = find_breakpoint(gpa);
876	if (bp != NULL) {
877		vs = &vcpu_state[vcpu];
878		assert(vs->stepping == false);
879		assert(vs->stepped == false);
880		assert(vs->hit_swbreak == false);
881		vs->hit_swbreak = true;
882		vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, vmexit->rip);
883		for (;;) {
884			if (stopped_vcpu == -1) {
885				debug("$vCPU %d reporting breakpoint at rip %#lx\n", vcpu,
886				    vmexit->rip);
887				stopped_vcpu = vcpu;
888				gdb_suspend_vcpus();
889			}
890			_gdb_cpu_suspend(vcpu, true);
891			if (!vs->hit_swbreak) {
892				/* Breakpoint reported. */
893				break;
894			}
895			bp = find_breakpoint(gpa);
896			if (bp == NULL) {
897				/* Breakpoint was removed. */
898				vs->hit_swbreak = false;
899				break;
900			}
901		}
902		gdb_cpu_resume(vcpu);
903	} else {
904		debug("$vCPU %d injecting breakpoint at rip %#lx\n", vcpu,
905		    vmexit->rip);
906		error = vm_set_register(ctx, vcpu,
907		    VM_REG_GUEST_ENTRY_INST_LENGTH, vmexit->u.bpt.inst_length);
908		assert(error == 0);
909		error = vm_inject_exception(ctx, vcpu, IDT_BP, 0, 0, 0);
910		assert(error == 0);
911	}
912	pthread_mutex_unlock(&gdb_lock);
913}
914
915static bool
916gdb_step_vcpu(int vcpu)
917{
918	int error, val;
919
920	debug("$vCPU %d step\n", vcpu);
921	error = vm_get_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, &val);
922	if (error < 0)
923		return (false);
924
925	discard_stop();
926	vcpu_state[vcpu].stepping = true;
927	vm_resume_cpu(ctx, vcpu);
928	CPU_CLR(vcpu, &vcpus_suspended);
929	pthread_cond_broadcast(&idle_vcpus);
930	return (true);
931}
932
933static void
934gdb_resume_vcpus(void)
935{
936
937	assert(pthread_mutex_isowned_np(&gdb_lock));
938	vm_resume_cpu(ctx, -1);
939	debug("resuming all CPUs\n");
940	CPU_ZERO(&vcpus_suspended);
941	pthread_cond_broadcast(&idle_vcpus);
942}
943
944static void
945gdb_read_regs(void)
946{
947	uint64_t regvals[nitems(gdb_regset)];
948	int i;
949
950	if (vm_get_register_set(ctx, cur_vcpu, nitems(gdb_regset),
951	    gdb_regset, regvals) == -1) {
952		send_error(errno);
953		return;
954	}
955	start_packet();
956	for (i = 0; i < nitems(regvals); i++)
957		append_unsigned_native(regvals[i], gdb_regsize[i]);
958	finish_packet();
959}
960
961static void
962gdb_read_mem(const uint8_t *data, size_t len)
963{
964	uint64_t gpa, gva, val;
965	uint8_t *cp;
966	size_t resid, todo, bytes;
967	bool started;
968	int error;
969
970	/* Skip 'm' */
971	data += 1;
972	len -= 1;
973
974	/* Parse and consume address. */
975	cp = memchr(data, ',', len);
976	if (cp == NULL || cp == data) {
977		send_error(EINVAL);
978		return;
979	}
980	gva = parse_integer(data, cp - data);
981	len -= (cp - data) + 1;
982	data += (cp - data) + 1;
983
984	/* Parse length. */
985	resid = parse_integer(data, len);
986
987	started = false;
988	while (resid > 0) {
989		error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
990		if (error == -1) {
991			if (started)
992				finish_packet();
993			else
994				send_error(errno);
995			return;
996		}
997		if (error == 0) {
998			if (started)
999				finish_packet();
1000			else
1001				send_error(EFAULT);
1002			return;
1003		}
1004
1005		/* Read bytes from current page. */
1006		todo = getpagesize() - gpa % getpagesize();
1007		if (todo > resid)
1008			todo = resid;
1009
1010		cp = paddr_guest2host(ctx, gpa, todo);
1011		if (cp != NULL) {
1012			/*
1013			 * If this page is guest RAM, read it a byte
1014			 * at a time.
1015			 */
1016			if (!started) {
1017				start_packet();
1018				started = true;
1019			}
1020			while (todo > 0) {
1021				append_byte(*cp);
1022				cp++;
1023				gpa++;
1024				gva++;
1025				resid--;
1026				todo--;
1027			}
1028		} else {
1029			/*
1030			 * If this page isn't guest RAM, try to handle
1031			 * it via MMIO.  For MMIO requests, use
1032			 * aligned reads of words when possible.
1033			 */
1034			while (todo > 0) {
1035				if (gpa & 1 || todo == 1)
1036					bytes = 1;
1037				else if (gpa & 2 || todo == 2)
1038					bytes = 2;
1039				else
1040					bytes = 4;
1041				error = read_mem(ctx, cur_vcpu, gpa, &val,
1042				    bytes);
1043				if (error == 0) {
1044					if (!started) {
1045						start_packet();
1046						started = true;
1047					}
1048					gpa += bytes;
1049					gva += bytes;
1050					resid -= bytes;
1051					todo -= bytes;
1052					while (bytes > 0) {
1053						append_byte(val);
1054						val >>= 8;
1055						bytes--;
1056					}
1057				} else {
1058					if (started)
1059						finish_packet();
1060					else
1061						send_error(EFAULT);
1062					return;
1063				}
1064			}
1065		}
1066		assert(resid == 0 || gpa % getpagesize() == 0);
1067	}
1068	if (!started)
1069		start_packet();
1070	finish_packet();
1071}
1072
1073static void
1074gdb_write_mem(const uint8_t *data, size_t len)
1075{
1076	uint64_t gpa, gva, val;
1077	uint8_t *cp;
1078	size_t resid, todo, bytes;
1079	int error;
1080
1081	/* Skip 'M' */
1082	data += 1;
1083	len -= 1;
1084
1085	/* Parse and consume address. */
1086	cp = memchr(data, ',', len);
1087	if (cp == NULL || cp == data) {
1088		send_error(EINVAL);
1089		return;
1090	}
1091	gva = parse_integer(data, cp - data);
1092	len -= (cp - data) + 1;
1093	data += (cp - data) + 1;
1094
1095	/* Parse and consume length. */
1096	cp = memchr(data, ':', len);
1097	if (cp == NULL || cp == data) {
1098		send_error(EINVAL);
1099		return;
1100	}
1101	resid = parse_integer(data, cp - data);
1102	len -= (cp - data) + 1;
1103	data += (cp - data) + 1;
1104
1105	/* Verify the available bytes match the length. */
1106	if (len != resid * 2) {
1107		send_error(EINVAL);
1108		return;
1109	}
1110
1111	while (resid > 0) {
1112		error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
1113		if (error == -1) {
1114			send_error(errno);
1115			return;
1116		}
1117		if (error == 0) {
1118			send_error(EFAULT);
1119			return;
1120		}
1121
1122		/* Write bytes to current page. */
1123		todo = getpagesize() - gpa % getpagesize();
1124		if (todo > resid)
1125			todo = resid;
1126
1127		cp = paddr_guest2host(ctx, gpa, todo);
1128		if (cp != NULL) {
1129			/*
1130			 * If this page is guest RAM, write it a byte
1131			 * at a time.
1132			 */
1133			while (todo > 0) {
1134				assert(len >= 2);
1135				*cp = parse_byte(data);
1136				data += 2;
1137				len -= 2;
1138				cp++;
1139				gpa++;
1140				gva++;
1141				resid--;
1142				todo--;
1143			}
1144		} else {
1145			/*
1146			 * If this page isn't guest RAM, try to handle
1147			 * it via MMIO.  For MMIO requests, use
1148			 * aligned writes of words when possible.
1149			 */
1150			while (todo > 0) {
1151				if (gpa & 1 || todo == 1) {
1152					bytes = 1;
1153					val = parse_byte(data);
1154				} else if (gpa & 2 || todo == 2) {
1155					bytes = 2;
1156					val = be16toh(parse_integer(data, 4));
1157				} else {
1158					bytes = 4;
1159					val = be32toh(parse_integer(data, 8));
1160				}
1161				error = write_mem(ctx, cur_vcpu, gpa, val,
1162				    bytes);
1163				if (error == 0) {
1164					gpa += bytes;
1165					gva += bytes;
1166					resid -= bytes;
1167					todo -= bytes;
1168					data += 2 * bytes;
1169					len -= 2 * bytes;
1170				} else {
1171					send_error(EFAULT);
1172					return;
1173				}
1174			}
1175		}
1176		assert(resid == 0 || gpa % getpagesize() == 0);
1177	}
1178	assert(len == 0);
1179	send_ok();
1180}
1181
1182static bool
1183set_breakpoint_caps(bool enable)
1184{
1185	cpuset_t mask;
1186	int vcpu;
1187
1188	mask = vcpus_active;
1189	while (!CPU_EMPTY(&mask)) {
1190		vcpu = CPU_FFS(&mask) - 1;
1191		CPU_CLR(vcpu, &mask);
1192		if (vm_set_capability(ctx, vcpu, VM_CAP_BPT_EXIT,
1193		    enable ? 1 : 0) < 0)
1194			return (false);
1195		debug("$vCPU %d %sabled breakpoint exits\n", vcpu,
1196		    enable ? "en" : "dis");
1197	}
1198	return (true);
1199}
1200
1201static void
1202remove_all_sw_breakpoints(void)
1203{
1204	struct breakpoint *bp, *nbp;
1205	uint8_t *cp;
1206
1207	if (TAILQ_EMPTY(&breakpoints))
1208		return;
1209
1210	TAILQ_FOREACH_SAFE(bp, &breakpoints, link, nbp) {
1211		debug("remove breakpoint at %#lx\n", bp->gpa);
1212		cp = paddr_guest2host(ctx, bp->gpa, 1);
1213		*cp = bp->shadow_inst;
1214		TAILQ_REMOVE(&breakpoints, bp, link);
1215		free(bp);
1216	}
1217	TAILQ_INIT(&breakpoints);
1218	set_breakpoint_caps(false);
1219}
1220
1221static void
1222update_sw_breakpoint(uint64_t gva, int kind, bool insert)
1223{
1224	struct breakpoint *bp;
1225	uint64_t gpa;
1226	uint8_t *cp;
1227	int error;
1228
1229	if (kind != 1) {
1230		send_error(EINVAL);
1231		return;
1232	}
1233
1234	error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
1235	if (error == -1) {
1236		send_error(errno);
1237		return;
1238	}
1239	if (error == 0) {
1240		send_error(EFAULT);
1241		return;
1242	}
1243
1244	cp = paddr_guest2host(ctx, gpa, 1);
1245
1246	/* Only permit breakpoints in guest RAM. */
1247	if (cp == NULL) {
1248		send_error(EFAULT);
1249		return;
1250	}
1251
1252	/* Find any existing breakpoint. */
1253	bp = find_breakpoint(gpa);
1254
1255	/*
1256	 * Silently ignore duplicate commands since the protocol
1257	 * requires these packets to be idempotent.
1258	 */
1259	if (insert) {
1260		if (bp == NULL) {
1261			if (TAILQ_EMPTY(&breakpoints) &&
1262			    !set_breakpoint_caps(true)) {
1263				send_empty_response();
1264				return;
1265			}
1266			bp = malloc(sizeof(*bp));
1267			bp->gpa = gpa;
1268			bp->shadow_inst = *cp;
1269			*cp = 0xcc;	/* INT 3 */
1270			TAILQ_INSERT_TAIL(&breakpoints, bp, link);
1271			debug("new breakpoint at %#lx\n", gpa);
1272		}
1273	} else {
1274		if (bp != NULL) {
1275			debug("remove breakpoint at %#lx\n", gpa);
1276			*cp = bp->shadow_inst;
1277			TAILQ_REMOVE(&breakpoints, bp, link);
1278			free(bp);
1279			if (TAILQ_EMPTY(&breakpoints))
1280				set_breakpoint_caps(false);
1281		}
1282	}
1283	send_ok();
1284}
1285
1286static void
1287parse_breakpoint(const uint8_t *data, size_t len)
1288{
1289	uint64_t gva;
1290	uint8_t *cp;
1291	bool insert;
1292	int kind, type;
1293
1294	insert = data[0] == 'Z';
1295
1296	/* Skip 'Z/z' */
1297	data += 1;
1298	len -= 1;
1299
1300	/* Parse and consume type. */
1301	cp = memchr(data, ',', len);
1302	if (cp == NULL || cp == data) {
1303		send_error(EINVAL);
1304		return;
1305	}
1306	type = parse_integer(data, cp - data);
1307	len -= (cp - data) + 1;
1308	data += (cp - data) + 1;
1309
1310	/* Parse and consume address. */
1311	cp = memchr(data, ',', len);
1312	if (cp == NULL || cp == data) {
1313		send_error(EINVAL);
1314		return;
1315	}
1316	gva = parse_integer(data, cp - data);
1317	len -= (cp - data) + 1;
1318	data += (cp - data) + 1;
1319
1320	/* Parse and consume kind. */
1321	cp = memchr(data, ';', len);
1322	if (cp == data) {
1323		send_error(EINVAL);
1324		return;
1325	}
1326	if (cp != NULL) {
1327		/*
1328		 * We do not advertise support for either the
1329		 * ConditionalBreakpoints or BreakpointCommands
1330		 * features, so we should not be getting conditions or
1331		 * commands from the remote end.
1332		 */
1333		send_empty_response();
1334		return;
1335	}
1336	kind = parse_integer(data, len);
1337	data += len;
1338	len = 0;
1339
1340	switch (type) {
1341	case 0:
1342		update_sw_breakpoint(gva, kind, insert);
1343		break;
1344	default:
1345		send_empty_response();
1346		break;
1347	}
1348}
1349
1350static bool
1351command_equals(const uint8_t *data, size_t len, const char *cmd)
1352{
1353
1354	if (strlen(cmd) > len)
1355		return (false);
1356	return (memcmp(data, cmd, strlen(cmd)) == 0);
1357}
1358
1359static void
1360check_features(const uint8_t *data, size_t len)
1361{
1362	char *feature, *next_feature, *str, *value;
1363	bool supported;
1364
1365	str = malloc(len + 1);
1366	memcpy(str, data, len);
1367	str[len] = '\0';
1368	next_feature = str;
1369
1370	while ((feature = strsep(&next_feature, ";")) != NULL) {
1371		/*
1372		 * Null features shouldn't exist, but skip if they
1373		 * do.
1374		 */
1375		if (strcmp(feature, "") == 0)
1376			continue;
1377
1378		/*
1379		 * Look for the value or supported / not supported
1380		 * flag.
1381		 */
1382		value = strchr(feature, '=');
1383		if (value != NULL) {
1384			*value = '\0';
1385			value++;
1386			supported = true;
1387		} else {
1388			value = feature + strlen(feature) - 1;
1389			switch (*value) {
1390			case '+':
1391				supported = true;
1392				break;
1393			case '-':
1394				supported = false;
1395				break;
1396			default:
1397				/*
1398				 * This is really a protocol error,
1399				 * but we just ignore malformed
1400				 * features for ease of
1401				 * implementation.
1402				 */
1403				continue;
1404			}
1405			value = NULL;
1406		}
1407
1408		if (strcmp(feature, "swbreak") == 0)
1409			swbreak_enabled = supported;
1410	}
1411	free(str);
1412
1413	start_packet();
1414
1415	/* This is an arbitrary limit. */
1416	append_string("PacketSize=4096");
1417	append_string(";swbreak+");
1418	finish_packet();
1419}
1420
1421static void
1422gdb_query(const uint8_t *data, size_t len)
1423{
1424
1425	/*
1426	 * TODO:
1427	 * - qSearch
1428	 */
1429	if (command_equals(data, len, "qAttached")) {
1430		start_packet();
1431		append_char('1');
1432		finish_packet();
1433	} else if (command_equals(data, len, "qC")) {
1434		start_packet();
1435		append_string("QC");
1436		append_integer(cur_vcpu + 1);
1437		finish_packet();
1438	} else if (command_equals(data, len, "qfThreadInfo")) {
1439		cpuset_t mask;
1440		bool first;
1441		int vcpu;
1442
1443		if (CPU_EMPTY(&vcpus_active)) {
1444			send_error(EINVAL);
1445			return;
1446		}
1447		mask = vcpus_active;
1448		start_packet();
1449		append_char('m');
1450		first = true;
1451		while (!CPU_EMPTY(&mask)) {
1452			vcpu = CPU_FFS(&mask) - 1;
1453			CPU_CLR(vcpu, &mask);
1454			if (first)
1455				first = false;
1456			else
1457				append_char(',');
1458			append_integer(vcpu + 1);
1459		}
1460		finish_packet();
1461	} else if (command_equals(data, len, "qsThreadInfo")) {
1462		start_packet();
1463		append_char('l');
1464		finish_packet();
1465	} else if (command_equals(data, len, "qSupported")) {
1466		data += strlen("qSupported");
1467		len -= strlen("qSupported");
1468		check_features(data, len);
1469	} else if (command_equals(data, len, "qThreadExtraInfo")) {
1470		char buf[16];
1471		int tid;
1472
1473		data += strlen("qThreadExtraInfo");
1474		len -= strlen("qThreadExtraInfo");
1475		if (*data != ',') {
1476			send_error(EINVAL);
1477			return;
1478		}
1479		tid = parse_threadid(data + 1, len - 1);
1480		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1481			send_error(EINVAL);
1482			return;
1483		}
1484
1485		snprintf(buf, sizeof(buf), "vCPU %d", tid - 1);
1486		start_packet();
1487		append_asciihex(buf);
1488		finish_packet();
1489	} else
1490		send_empty_response();
1491}
1492
1493static void
1494handle_command(const uint8_t *data, size_t len)
1495{
1496
1497	/* Reject packets with a sequence-id. */
1498	if (len >= 3 && data[0] >= '0' && data[0] <= '9' &&
1499	    data[0] >= '0' && data[0] <= '9' && data[2] == ':') {
1500		send_empty_response();
1501		return;
1502	}
1503
1504	switch (*data) {
1505	case 'c':
1506		if (len != 1) {
1507			send_error(EINVAL);
1508			break;
1509		}
1510
1511		discard_stop();
1512		gdb_resume_vcpus();
1513		break;
1514	case 'D':
1515		send_ok();
1516
1517		/* TODO: Resume any stopped CPUs. */
1518		break;
1519	case 'g': {
1520		gdb_read_regs();
1521		break;
1522	}
1523	case 'H': {
1524		int tid;
1525
1526		if (data[1] != 'g' && data[1] != 'c') {
1527			send_error(EINVAL);
1528			break;
1529		}
1530		tid = parse_threadid(data + 2, len - 2);
1531		if (tid == -2) {
1532			send_error(EINVAL);
1533			break;
1534		}
1535
1536		if (CPU_EMPTY(&vcpus_active)) {
1537			send_error(EINVAL);
1538			break;
1539		}
1540		if (tid == -1 || tid == 0)
1541			cur_vcpu = CPU_FFS(&vcpus_active) - 1;
1542		else if (CPU_ISSET(tid - 1, &vcpus_active))
1543			cur_vcpu = tid - 1;
1544		else {
1545			send_error(EINVAL);
1546			break;
1547		}
1548		send_ok();
1549		break;
1550	}
1551	case 'm':
1552		gdb_read_mem(data, len);
1553		break;
1554	case 'M':
1555		gdb_write_mem(data, len);
1556		break;
1557	case 'T': {
1558		int tid;
1559
1560		tid = parse_threadid(data + 1, len - 1);
1561		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1562			send_error(EINVAL);
1563			return;
1564		}
1565		send_ok();
1566		break;
1567	}
1568	case 'q':
1569		gdb_query(data, len);
1570		break;
1571	case 's':
1572		if (len != 1) {
1573			send_error(EINVAL);
1574			break;
1575		}
1576
1577		/* Don't send a reply until a stop occurs. */
1578		if (!gdb_step_vcpu(cur_vcpu)) {
1579			send_error(EOPNOTSUPP);
1580			break;
1581		}
1582		break;
1583	case 'z':
1584	case 'Z':
1585		parse_breakpoint(data, len);
1586		break;
1587	case '?':
1588		report_stop(false);
1589		break;
1590	case 'G': /* TODO */
1591	case 'v':
1592		/* Handle 'vCont' */
1593		/* 'vCtrlC' */
1594	case 'p': /* TODO */
1595	case 'P': /* TODO */
1596	case 'Q': /* TODO */
1597	case 't': /* TODO */
1598	case 'X': /* TODO */
1599	default:
1600		send_empty_response();
1601	}
1602}
1603
1604/* Check for a valid packet in the command buffer. */
1605static void
1606check_command(int fd)
1607{
1608	uint8_t *head, *hash, *p, sum;
1609	size_t avail, plen;
1610
1611	for (;;) {
1612		avail = cur_comm.len;
1613		if (avail == 0)
1614			return;
1615		head = io_buffer_head(&cur_comm);
1616		switch (*head) {
1617		case 0x03:
1618			debug("<- Ctrl-C\n");
1619			io_buffer_consume(&cur_comm, 1);
1620
1621			gdb_suspend_vcpus();
1622			break;
1623		case '+':
1624			/* ACK of previous response. */
1625			debug("<- +\n");
1626			if (response_pending())
1627				io_buffer_reset(&cur_resp);
1628			io_buffer_consume(&cur_comm, 1);
1629			if (stopped_vcpu != -1 && report_next_stop) {
1630				report_stop(true);
1631				send_pending_data(fd);
1632			}
1633			break;
1634		case '-':
1635			/* NACK of previous response. */
1636			debug("<- -\n");
1637			if (response_pending()) {
1638				cur_resp.len += cur_resp.start;
1639				cur_resp.start = 0;
1640				if (cur_resp.data[0] == '+')
1641					io_buffer_advance(&cur_resp, 1);
1642				debug("-> %.*s\n", (int)cur_resp.len,
1643				    io_buffer_head(&cur_resp));
1644			}
1645			io_buffer_consume(&cur_comm, 1);
1646			send_pending_data(fd);
1647			break;
1648		case '$':
1649			/* Packet. */
1650
1651			if (response_pending()) {
1652				warnx("New GDB command while response in "
1653				    "progress");
1654				io_buffer_reset(&cur_resp);
1655			}
1656
1657			/* Is packet complete? */
1658			hash = memchr(head, '#', avail);
1659			if (hash == NULL)
1660				return;
1661			plen = (hash - head + 1) + 2;
1662			if (avail < plen)
1663				return;
1664			debug("<- %.*s\n", (int)plen, head);
1665
1666			/* Verify checksum. */
1667			for (sum = 0, p = head + 1; p < hash; p++)
1668				sum += *p;
1669			if (sum != parse_byte(hash + 1)) {
1670				io_buffer_consume(&cur_comm, plen);
1671				debug("-> -\n");
1672				send_char('-');
1673				send_pending_data(fd);
1674				break;
1675			}
1676			send_char('+');
1677
1678			handle_command(head + 1, hash - (head + 1));
1679			io_buffer_consume(&cur_comm, plen);
1680			if (!response_pending())
1681				debug("-> +\n");
1682			send_pending_data(fd);
1683			break;
1684		default:
1685			/* XXX: Possibly drop connection instead. */
1686			debug("-> %02x\n", *head);
1687			io_buffer_consume(&cur_comm, 1);
1688			break;
1689		}
1690	}
1691}
1692
1693static void
1694gdb_readable(int fd, enum ev_type event, void *arg)
1695{
1696	ssize_t nread;
1697	int pending;
1698
1699	if (ioctl(fd, FIONREAD, &pending) == -1) {
1700		warn("FIONREAD on GDB socket");
1701		return;
1702	}
1703
1704	/*
1705	 * 'pending' might be zero due to EOF.  We need to call read
1706	 * with a non-zero length to detect EOF.
1707	 */
1708	if (pending == 0)
1709		pending = 1;
1710
1711	/* Ensure there is room in the command buffer. */
1712	io_buffer_grow(&cur_comm, pending);
1713	assert(io_buffer_avail(&cur_comm) >= pending);
1714
1715	nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm));
1716	if (nread == 0) {
1717		close_connection();
1718	} else if (nread == -1) {
1719		if (errno == EAGAIN)
1720			return;
1721
1722		warn("Read from GDB socket");
1723		close_connection();
1724	} else {
1725		cur_comm.len += nread;
1726		pthread_mutex_lock(&gdb_lock);
1727		check_command(fd);
1728		pthread_mutex_unlock(&gdb_lock);
1729	}
1730}
1731
1732static void
1733gdb_writable(int fd, enum ev_type event, void *arg)
1734{
1735
1736	send_pending_data(fd);
1737}
1738
1739static void
1740new_connection(int fd, enum ev_type event, void *arg)
1741{
1742	int optval, s;
1743
1744	s = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
1745	if (s == -1) {
1746		if (arg != NULL)
1747			err(1, "Failed accepting initial GDB connection");
1748
1749		/* Silently ignore errors post-startup. */
1750		return;
1751	}
1752
1753	optval = 1;
1754	if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) ==
1755	    -1) {
1756		warn("Failed to disable SIGPIPE for GDB connection");
1757		close(s);
1758		return;
1759	}
1760
1761	pthread_mutex_lock(&gdb_lock);
1762	if (cur_fd != -1) {
1763		close(s);
1764		warnx("Ignoring additional GDB connection.");
1765	}
1766
1767	read_event = mevent_add(s, EVF_READ, gdb_readable, NULL);
1768	if (read_event == NULL) {
1769		if (arg != NULL)
1770			err(1, "Failed to setup initial GDB connection");
1771		pthread_mutex_unlock(&gdb_lock);
1772		return;
1773	}
1774	write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL);
1775	if (write_event == NULL) {
1776		if (arg != NULL)
1777			err(1, "Failed to setup initial GDB connection");
1778		mevent_delete_close(read_event);
1779		read_event = NULL;
1780	}
1781
1782	cur_fd = s;
1783	cur_vcpu = 0;
1784	stopped_vcpu = -1;
1785
1786	/* Break on attach. */
1787	first_stop = true;
1788	report_next_stop = false;
1789	gdb_suspend_vcpus();
1790	pthread_mutex_unlock(&gdb_lock);
1791}
1792
1793#ifndef WITHOUT_CAPSICUM
1794void
1795limit_gdb_socket(int s)
1796{
1797	cap_rights_t rights;
1798	unsigned long ioctls[] = { FIONREAD };
1799
1800	cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE,
1801	    CAP_SETSOCKOPT, CAP_IOCTL);
1802	if (caph_rights_limit(s, &rights) == -1)
1803		errx(EX_OSERR, "Unable to apply rights for sandbox");
1804	if (caph_ioctls_limit(s, ioctls, nitems(ioctls)) == -1)
1805		errx(EX_OSERR, "Unable to apply rights for sandbox");
1806}
1807#endif
1808
1809void
1810init_gdb(struct vmctx *_ctx, int sport, bool wait)
1811{
1812	struct sockaddr_in sin;
1813	int error, flags, optval, s;
1814
1815	debug("==> starting on %d, %swaiting\n", sport, wait ? "" : "not ");
1816
1817	error = pthread_mutex_init(&gdb_lock, NULL);
1818	if (error != 0)
1819		errc(1, error, "gdb mutex init");
1820	error = pthread_cond_init(&idle_vcpus, NULL);
1821	if (error != 0)
1822		errc(1, error, "gdb cv init");
1823
1824	ctx = _ctx;
1825	s = socket(PF_INET, SOCK_STREAM, 0);
1826	if (s < 0)
1827		err(1, "gdb socket create");
1828
1829	optval = 1;
1830	(void)setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval));
1831
1832	sin.sin_len = sizeof(sin);
1833	sin.sin_family = AF_INET;
1834	sin.sin_addr.s_addr = htonl(INADDR_ANY);
1835	sin.sin_port = htons(sport);
1836
1837	if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0)
1838		err(1, "gdb socket bind");
1839
1840	if (listen(s, 1) < 0)
1841		err(1, "gdb socket listen");
1842
1843	stopped_vcpu = -1;
1844	TAILQ_INIT(&breakpoints);
1845	vcpu_state = calloc(guest_ncpus, sizeof(*vcpu_state));
1846	if (wait) {
1847		/*
1848		 * Set vcpu 0 in vcpus_suspended.  This will trigger the
1849		 * logic in gdb_cpu_add() to suspend the first vcpu before
1850		 * it starts execution.  The vcpu will remain suspended
1851		 * until a debugger connects.
1852		 */
1853		CPU_SET(0, &vcpus_suspended);
1854		stopped_vcpu = 0;
1855	}
1856
1857	flags = fcntl(s, F_GETFL);
1858	if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
1859		err(1, "Failed to mark gdb socket non-blocking");
1860
1861#ifndef WITHOUT_CAPSICUM
1862	limit_gdb_socket(s);
1863#endif
1864	mevent_add(s, EVF_READ, new_connection, NULL);
1865}
1866