1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD$");
30
31#include <sys/param.h>
32#ifndef WITHOUT_CAPSICUM
33#include <sys/capsicum.h>
34#endif
35#include <sys/endian.h>
36#include <sys/ioctl.h>
37#include <sys/mman.h>
38#include <sys/queue.h>
39#include <sys/socket.h>
40#include <machine/atomic.h>
41#include <machine/specialreg.h>
42#include <machine/vmm.h>
43#include <netinet/in.h>
44#include <assert.h>
45#ifndef WITHOUT_CAPSICUM
46#include <capsicum_helpers.h>
47#endif
48#include <err.h>
49#include <errno.h>
50#include <fcntl.h>
51#include <pthread.h>
52#include <pthread_np.h>
53#include <stdbool.h>
54#include <stdio.h>
55#include <stdlib.h>
56#include <string.h>
57#include <sysexits.h>
58#include <unistd.h>
59#include <vmmapi.h>
60
61#include "bhyverun.h"
62#include "gdb.h"
63#include "mem.h"
64#include "mevent.h"
65
66/*
67 * GDB_SIGNAL_* numbers are part of the GDB remote protocol.  Most stops
68 * use SIGTRAP.
69 */
70#define	GDB_SIGNAL_TRAP		5
71
72static void gdb_resume_vcpus(void);
73static void check_command(int fd);
74
75static struct mevent *read_event, *write_event;
76
77static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting;
78static pthread_mutex_t gdb_lock;
79static pthread_cond_t idle_vcpus;
80static bool first_stop, report_next_stop, swbreak_enabled;
81
82/*
83 * An I/O buffer contains 'capacity' bytes of room at 'data'.  For a
84 * read buffer, 'start' is unused and 'len' contains the number of
85 * valid bytes in the buffer.  For a write buffer, 'start' is set to
86 * the index of the next byte in 'data' to send, and 'len' contains
87 * the remaining number of valid bytes to send.
88 */
89struct io_buffer {
90	uint8_t *data;
91	size_t capacity;
92	size_t start;
93	size_t len;
94};
95
96struct breakpoint {
97	uint64_t gpa;
98	uint8_t shadow_inst;
99	TAILQ_ENTRY(breakpoint) link;
100};
101
102/*
103 * When a vCPU stops to due to an event that should be reported to the
104 * debugger, information about the event is stored in this structure.
105 * The vCPU thread then sets 'stopped_vcpu' if it is not already set
106 * and stops other vCPUs so the event can be reported.  The
107 * report_stop() function reports the event for the 'stopped_vcpu'
108 * vCPU.  When the debugger resumes execution via continue or step,
109 * the event for 'stopped_vcpu' is cleared.  vCPUs will loop in their
110 * event handlers until the associated event is reported or disabled.
111 *
112 * An idle vCPU will have all of the boolean fields set to false.
113 *
114 * When a vCPU is stepped, 'stepping' is set to true when the vCPU is
115 * released to execute the stepped instruction.  When the vCPU reports
116 * the stepping trap, 'stepped' is set.
117 *
118 * When a vCPU hits a breakpoint set by the debug server,
119 * 'hit_swbreak' is set to true.
120 */
121struct vcpu_state {
122	bool stepping;
123	bool stepped;
124	bool hit_swbreak;
125};
126
127static struct io_buffer cur_comm, cur_resp;
128static uint8_t cur_csum;
129static struct vmctx *ctx;
130static int cur_fd = -1;
131static TAILQ_HEAD(, breakpoint) breakpoints;
132static struct vcpu_state *vcpu_state;
133static int cur_vcpu, stopped_vcpu;
134
135const int gdb_regset[] = {
136	VM_REG_GUEST_RAX,
137	VM_REG_GUEST_RBX,
138	VM_REG_GUEST_RCX,
139	VM_REG_GUEST_RDX,
140	VM_REG_GUEST_RSI,
141	VM_REG_GUEST_RDI,
142	VM_REG_GUEST_RBP,
143	VM_REG_GUEST_RSP,
144	VM_REG_GUEST_R8,
145	VM_REG_GUEST_R9,
146	VM_REG_GUEST_R10,
147	VM_REG_GUEST_R11,
148	VM_REG_GUEST_R12,
149	VM_REG_GUEST_R13,
150	VM_REG_GUEST_R14,
151	VM_REG_GUEST_R15,
152	VM_REG_GUEST_RIP,
153	VM_REG_GUEST_RFLAGS,
154	VM_REG_GUEST_CS,
155	VM_REG_GUEST_SS,
156	VM_REG_GUEST_DS,
157	VM_REG_GUEST_ES,
158	VM_REG_GUEST_FS,
159	VM_REG_GUEST_GS
160};
161
162const int gdb_regsize[] = {
163	8,
164	8,
165	8,
166	8,
167	8,
168	8,
169	8,
170	8,
171	8,
172	8,
173	8,
174	8,
175	8,
176	8,
177	8,
178	8,
179	8,
180	4,
181	4,
182	4,
183	4,
184	4,
185	4,
186	4
187};
188
189#ifdef GDB_LOG
190#include <stdarg.h>
191#include <stdio.h>
192
193static void __printflike(1, 2)
194debug(const char *fmt, ...)
195{
196	static FILE *logfile;
197	va_list ap;
198
199	if (logfile == NULL) {
200		logfile = fopen("/tmp/bhyve_gdb.log", "w");
201		if (logfile == NULL)
202			return;
203#ifndef WITHOUT_CAPSICUM
204		if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) {
205			fclose(logfile);
206			logfile = NULL;
207			return;
208		}
209#endif
210		setlinebuf(logfile);
211	}
212	va_start(ap, fmt);
213	vfprintf(logfile, fmt, ap);
214	va_end(ap);
215}
216#else
217#define debug(...)
218#endif
219
220static void	remove_all_sw_breakpoints(void);
221
222static int
223guest_paging_info(int vcpu, struct vm_guest_paging *paging)
224{
225	uint64_t regs[4];
226	const int regset[4] = {
227		VM_REG_GUEST_CR0,
228		VM_REG_GUEST_CR3,
229		VM_REG_GUEST_CR4,
230		VM_REG_GUEST_EFER
231	};
232
233	if (vm_get_register_set(ctx, vcpu, nitems(regset), regset, regs) == -1)
234		return (-1);
235
236	/*
237	 * For the debugger, always pretend to be the kernel (CPL 0),
238	 * and if long-mode is enabled, always parse addresses as if
239	 * in 64-bit mode.
240	 */
241	paging->cr3 = regs[1];
242	paging->cpl = 0;
243	if (regs[3] & EFER_LMA)
244		paging->cpu_mode = CPU_MODE_64BIT;
245	else if (regs[0] & CR0_PE)
246		paging->cpu_mode = CPU_MODE_PROTECTED;
247	else
248		paging->cpu_mode = CPU_MODE_REAL;
249	if (!(regs[0] & CR0_PG))
250		paging->paging_mode = PAGING_MODE_FLAT;
251	else if (!(regs[2] & CR4_PAE))
252		paging->paging_mode = PAGING_MODE_32;
253	else if (regs[3] & EFER_LME)
254		paging->paging_mode = PAGING_MODE_64;
255	else
256		paging->paging_mode = PAGING_MODE_PAE;
257	return (0);
258}
259
260/*
261 * Map a guest virtual address to a physical address (for a given vcpu).
262 * If a guest virtual address is valid, return 1.  If the address is
263 * not valid, return 0.  If an error occurs obtaining the mapping,
264 * return -1.
265 */
266static int
267guest_vaddr2paddr(int vcpu, uint64_t vaddr, uint64_t *paddr)
268{
269	struct vm_guest_paging paging;
270	int fault;
271
272	if (guest_paging_info(vcpu, &paging) == -1)
273		return (-1);
274
275	/*
276	 * Always use PROT_READ.  We really care if the VA is
277	 * accessible, not if the current vCPU can write.
278	 */
279	if (vm_gla2gpa_nofault(ctx, vcpu, &paging, vaddr, PROT_READ, paddr,
280	    &fault) == -1)
281		return (-1);
282	if (fault)
283		return (0);
284	return (1);
285}
286
287static void
288io_buffer_reset(struct io_buffer *io)
289{
290
291	io->start = 0;
292	io->len = 0;
293}
294
295/* Available room for adding data. */
296static size_t
297io_buffer_avail(struct io_buffer *io)
298{
299
300	return (io->capacity - (io->start + io->len));
301}
302
303static uint8_t *
304io_buffer_head(struct io_buffer *io)
305{
306
307	return (io->data + io->start);
308}
309
310static uint8_t *
311io_buffer_tail(struct io_buffer *io)
312{
313
314	return (io->data + io->start + io->len);
315}
316
317static void
318io_buffer_advance(struct io_buffer *io, size_t amount)
319{
320
321	assert(amount <= io->len);
322	io->start += amount;
323	io->len -= amount;
324}
325
326static void
327io_buffer_consume(struct io_buffer *io, size_t amount)
328{
329
330	io_buffer_advance(io, amount);
331	if (io->len == 0) {
332		io->start = 0;
333		return;
334	}
335
336	/*
337	 * XXX: Consider making this move optional and compacting on a
338	 * future read() before realloc().
339	 */
340	memmove(io->data, io_buffer_head(io), io->len);
341	io->start = 0;
342}
343
344static void
345io_buffer_grow(struct io_buffer *io, size_t newsize)
346{
347	uint8_t *new_data;
348	size_t avail, new_cap;
349
350	avail = io_buffer_avail(io);
351	if (newsize <= avail)
352		return;
353
354	new_cap = io->capacity + (newsize - avail);
355	new_data = realloc(io->data, new_cap);
356	if (new_data == NULL)
357		err(1, "Failed to grow GDB I/O buffer");
358	io->data = new_data;
359	io->capacity = new_cap;
360}
361
362static bool
363response_pending(void)
364{
365
366	if (cur_resp.start == 0 && cur_resp.len == 0)
367		return (false);
368	if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+')
369		return (false);
370	return (true);
371}
372
373static void
374close_connection(void)
375{
376
377	/*
378	 * XXX: This triggers a warning because mevent does the close
379	 * before the EV_DELETE.
380	 */
381	pthread_mutex_lock(&gdb_lock);
382	mevent_delete(write_event);
383	mevent_delete_close(read_event);
384	write_event = NULL;
385	read_event = NULL;
386	io_buffer_reset(&cur_comm);
387	io_buffer_reset(&cur_resp);
388	cur_fd = -1;
389
390	remove_all_sw_breakpoints();
391
392	/* Clear any pending events. */
393	memset(vcpu_state, 0, guest_ncpus * sizeof(*vcpu_state));
394
395	/* Resume any stopped vCPUs. */
396	gdb_resume_vcpus();
397	pthread_mutex_unlock(&gdb_lock);
398}
399
400static uint8_t
401hex_digit(uint8_t nibble)
402{
403
404	if (nibble <= 9)
405		return (nibble + '0');
406	else
407		return (nibble + 'a' - 10);
408}
409
410static uint8_t
411parse_digit(uint8_t v)
412{
413
414	if (v >= '0' && v <= '9')
415		return (v - '0');
416	if (v >= 'a' && v <= 'f')
417		return (v - 'a' + 10);
418	if (v >= 'A' && v <= 'F')
419		return (v - 'A' + 10);
420	return (0xF);
421}
422
423/* Parses big-endian hexadecimal. */
424static uintmax_t
425parse_integer(const uint8_t *p, size_t len)
426{
427	uintmax_t v;
428
429	v = 0;
430	while (len > 0) {
431		v <<= 4;
432		v |= parse_digit(*p);
433		p++;
434		len--;
435	}
436	return (v);
437}
438
439static uint8_t
440parse_byte(const uint8_t *p)
441{
442
443	return (parse_digit(p[0]) << 4 | parse_digit(p[1]));
444}
445
446static void
447send_pending_data(int fd)
448{
449	ssize_t nwritten;
450
451	if (cur_resp.len == 0) {
452		mevent_disable(write_event);
453		return;
454	}
455	nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len);
456	if (nwritten == -1) {
457		warn("Write to GDB socket failed");
458		close_connection();
459	} else {
460		io_buffer_advance(&cur_resp, nwritten);
461		if (cur_resp.len == 0)
462			mevent_disable(write_event);
463		else
464			mevent_enable(write_event);
465	}
466}
467
468/* Append a single character to the output buffer. */
469static void
470send_char(uint8_t data)
471{
472	io_buffer_grow(&cur_resp, 1);
473	*io_buffer_tail(&cur_resp) = data;
474	cur_resp.len++;
475}
476
477/* Append an array of bytes to the output buffer. */
478static void
479send_data(const uint8_t *data, size_t len)
480{
481
482	io_buffer_grow(&cur_resp, len);
483	memcpy(io_buffer_tail(&cur_resp), data, len);
484	cur_resp.len += len;
485}
486
487static void
488format_byte(uint8_t v, uint8_t *buf)
489{
490
491	buf[0] = hex_digit(v >> 4);
492	buf[1] = hex_digit(v & 0xf);
493}
494
495/*
496 * Append a single byte (formatted as two hex characters) to the
497 * output buffer.
498 */
499static void
500send_byte(uint8_t v)
501{
502	uint8_t buf[2];
503
504	format_byte(v, buf);
505	send_data(buf, sizeof(buf));
506}
507
508static void
509start_packet(void)
510{
511
512	send_char('$');
513	cur_csum = 0;
514}
515
516static void
517finish_packet(void)
518{
519
520	send_char('#');
521	send_byte(cur_csum);
522	debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp));
523}
524
525/*
526 * Append a single character (for the packet payload) and update the
527 * checksum.
528 */
529static void
530append_char(uint8_t v)
531{
532
533	send_char(v);
534	cur_csum += v;
535}
536
537/*
538 * Append an array of bytes (for the packet payload) and update the
539 * checksum.
540 */
541static void
542append_packet_data(const uint8_t *data, size_t len)
543{
544
545	send_data(data, len);
546	while (len > 0) {
547		cur_csum += *data;
548		data++;
549		len--;
550	}
551}
552
553static void
554append_string(const char *str)
555{
556
557	append_packet_data(str, strlen(str));
558}
559
560static void
561append_byte(uint8_t v)
562{
563	uint8_t buf[2];
564
565	format_byte(v, buf);
566	append_packet_data(buf, sizeof(buf));
567}
568
569static void
570append_unsigned_native(uintmax_t value, size_t len)
571{
572	size_t i;
573
574	for (i = 0; i < len; i++) {
575		append_byte(value);
576		value >>= 8;
577	}
578}
579
580static void
581append_unsigned_be(uintmax_t value, size_t len)
582{
583	char buf[len * 2];
584	size_t i;
585
586	for (i = 0; i < len; i++) {
587		format_byte(value, buf + (len - i - 1) * 2);
588		value >>= 8;
589	}
590	append_packet_data(buf, sizeof(buf));
591}
592
593static void
594append_integer(unsigned int value)
595{
596
597	if (value == 0)
598		append_char('0');
599	else
600		append_unsigned_be(value, (fls(value) + 7) / 8);
601}
602
603static void
604append_asciihex(const char *str)
605{
606
607	while (*str != '\0') {
608		append_byte(*str);
609		str++;
610	}
611}
612
613static void
614send_empty_response(void)
615{
616
617	start_packet();
618	finish_packet();
619}
620
621static void
622send_error(int error)
623{
624
625	start_packet();
626	append_char('E');
627	append_byte(error);
628	finish_packet();
629}
630
631static void
632send_ok(void)
633{
634
635	start_packet();
636	append_string("OK");
637	finish_packet();
638}
639
640static int
641parse_threadid(const uint8_t *data, size_t len)
642{
643
644	if (len == 1 && *data == '0')
645		return (0);
646	if (len == 2 && memcmp(data, "-1", 2) == 0)
647		return (-1);
648	if (len == 0)
649		return (-2);
650	return (parse_integer(data, len));
651}
652
653/*
654 * Report the current stop event to the debugger.  If the stop is due
655 * to an event triggered on a specific vCPU such as a breakpoint or
656 * stepping trap, stopped_vcpu will be set to the vCPU triggering the
657 * stop.  If 'set_cur_vcpu' is true, then cur_vcpu will be updated to
658 * the reporting vCPU for vCPU events.
659 */
660static void
661report_stop(bool set_cur_vcpu)
662{
663	struct vcpu_state *vs;
664
665	start_packet();
666	if (stopped_vcpu == -1) {
667		append_char('S');
668		append_byte(GDB_SIGNAL_TRAP);
669	} else {
670		vs = &vcpu_state[stopped_vcpu];
671		if (set_cur_vcpu)
672			cur_vcpu = stopped_vcpu;
673		append_char('T');
674		append_byte(GDB_SIGNAL_TRAP);
675		append_string("thread:");
676		append_integer(stopped_vcpu + 1);
677		append_char(';');
678		if (vs->hit_swbreak) {
679			debug("$vCPU %d reporting swbreak\n", stopped_vcpu);
680			if (swbreak_enabled)
681				append_string("swbreak:;");
682		} else if (vs->stepped)
683			debug("$vCPU %d reporting step\n", stopped_vcpu);
684		else
685			debug("$vCPU %d reporting ???\n", stopped_vcpu);
686	}
687	finish_packet();
688	report_next_stop = false;
689}
690
691/*
692 * If this stop is due to a vCPU event, clear that event to mark it as
693 * acknowledged.
694 */
695static void
696discard_stop(void)
697{
698	struct vcpu_state *vs;
699
700	if (stopped_vcpu != -1) {
701		vs = &vcpu_state[stopped_vcpu];
702		vs->hit_swbreak = false;
703		vs->stepped = false;
704		stopped_vcpu = -1;
705	}
706	report_next_stop = true;
707}
708
709static void
710gdb_finish_suspend_vcpus(void)
711{
712
713	if (first_stop) {
714		first_stop = false;
715		stopped_vcpu = -1;
716	} else if (report_next_stop) {
717		assert(!response_pending());
718		report_stop(true);
719		send_pending_data(cur_fd);
720	}
721}
722
723/*
724 * vCPU threads invoke this function whenever the vCPU enters the
725 * debug server to pause or report an event.  vCPU threads wait here
726 * as long as the debug server keeps them suspended.
727 */
728static void
729_gdb_cpu_suspend(int vcpu, bool report_stop)
730{
731
732	debug("$vCPU %d suspending\n", vcpu);
733	CPU_SET(vcpu, &vcpus_waiting);
734	if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
735		gdb_finish_suspend_vcpus();
736	while (CPU_ISSET(vcpu, &vcpus_suspended))
737		pthread_cond_wait(&idle_vcpus, &gdb_lock);
738	CPU_CLR(vcpu, &vcpus_waiting);
739	debug("$vCPU %d resuming\n", vcpu);
740}
741
742/*
743 * Invoked at the start of a vCPU thread's execution to inform the
744 * debug server about the new thread.
745 */
746void
747gdb_cpu_add(int vcpu)
748{
749
750	debug("$vCPU %d starting\n", vcpu);
751	pthread_mutex_lock(&gdb_lock);
752	assert(vcpu < guest_ncpus);
753	CPU_SET(vcpu, &vcpus_active);
754	if (!TAILQ_EMPTY(&breakpoints)) {
755		vm_set_capability(ctx, vcpu, VM_CAP_BPT_EXIT, 1);
756		debug("$vCPU %d enabled breakpoint exits\n", vcpu);
757	}
758
759	/*
760	 * If a vcpu is added while vcpus are stopped, suspend the new
761	 * vcpu so that it will pop back out with a debug exit before
762	 * executing the first instruction.
763	 */
764	if (!CPU_EMPTY(&vcpus_suspended)) {
765		CPU_SET(vcpu, &vcpus_suspended);
766		_gdb_cpu_suspend(vcpu, false);
767	}
768	pthread_mutex_unlock(&gdb_lock);
769}
770
771/*
772 * Invoked by vCPU before resuming execution.  This enables stepping
773 * if the vCPU is marked as stepping.
774 */
775static void
776gdb_cpu_resume(int vcpu)
777{
778	struct vcpu_state *vs;
779	int error;
780
781	vs = &vcpu_state[vcpu];
782
783	/*
784	 * Any pending event should already be reported before
785	 * resuming.
786	 */
787	assert(vs->hit_swbreak == false);
788	assert(vs->stepped == false);
789	if (vs->stepping) {
790		error = vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 1);
791		assert(error == 0);
792	}
793}
794
795/*
796 * Handler for VM_EXITCODE_DEBUG used to suspend a vCPU when the guest
797 * has been suspended due to an event on different vCPU or in response
798 * to a guest-wide suspend such as Ctrl-C or the stop on attach.
799 */
800void
801gdb_cpu_suspend(int vcpu)
802{
803
804	pthread_mutex_lock(&gdb_lock);
805	_gdb_cpu_suspend(vcpu, true);
806	gdb_cpu_resume(vcpu);
807	pthread_mutex_unlock(&gdb_lock);
808}
809
810static void
811gdb_suspend_vcpus(void)
812{
813
814	assert(pthread_mutex_isowned_np(&gdb_lock));
815	debug("suspending all CPUs\n");
816	vcpus_suspended = vcpus_active;
817	vm_suspend_cpu(ctx, -1);
818	if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
819		gdb_finish_suspend_vcpus();
820}
821
822/*
823 * Handler for VM_EXITCODE_MTRAP reported when a vCPU single-steps via
824 * the VT-x-specific MTRAP exit.
825 */
826void
827gdb_cpu_mtrap(int vcpu)
828{
829	struct vcpu_state *vs;
830
831	debug("$vCPU %d MTRAP\n", vcpu);
832	pthread_mutex_lock(&gdb_lock);
833	vs = &vcpu_state[vcpu];
834	if (vs->stepping) {
835		vs->stepping = false;
836		vs->stepped = true;
837		vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 0);
838		while (vs->stepped) {
839			if (stopped_vcpu == -1) {
840				debug("$vCPU %d reporting step\n", vcpu);
841				stopped_vcpu = vcpu;
842				gdb_suspend_vcpus();
843			}
844			_gdb_cpu_suspend(vcpu, true);
845		}
846		gdb_cpu_resume(vcpu);
847	}
848	pthread_mutex_unlock(&gdb_lock);
849}
850
851static struct breakpoint *
852find_breakpoint(uint64_t gpa)
853{
854	struct breakpoint *bp;
855
856	TAILQ_FOREACH(bp, &breakpoints, link) {
857		if (bp->gpa == gpa)
858			return (bp);
859	}
860	return (NULL);
861}
862
863void
864gdb_cpu_breakpoint(int vcpu, struct vm_exit *vmexit)
865{
866	struct breakpoint *bp;
867	struct vcpu_state *vs;
868	uint64_t gpa;
869	int error;
870
871	pthread_mutex_lock(&gdb_lock);
872	error = guest_vaddr2paddr(vcpu, vmexit->rip, &gpa);
873	assert(error == 1);
874	bp = find_breakpoint(gpa);
875	if (bp != NULL) {
876		vs = &vcpu_state[vcpu];
877		assert(vs->stepping == false);
878		assert(vs->stepped == false);
879		assert(vs->hit_swbreak == false);
880		vs->hit_swbreak = true;
881		vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, vmexit->rip);
882		for (;;) {
883			if (stopped_vcpu == -1) {
884				debug("$vCPU %d reporting breakpoint at rip %#lx\n", vcpu,
885				    vmexit->rip);
886				stopped_vcpu = vcpu;
887				gdb_suspend_vcpus();
888			}
889			_gdb_cpu_suspend(vcpu, true);
890			if (!vs->hit_swbreak) {
891				/* Breakpoint reported. */
892				break;
893			}
894			bp = find_breakpoint(gpa);
895			if (bp == NULL) {
896				/* Breakpoint was removed. */
897				vs->hit_swbreak = false;
898				break;
899			}
900		}
901		gdb_cpu_resume(vcpu);
902	} else {
903		debug("$vCPU %d injecting breakpoint at rip %#lx\n", vcpu,
904		    vmexit->rip);
905		error = vm_set_register(ctx, vcpu,
906		    VM_REG_GUEST_ENTRY_INST_LENGTH, vmexit->u.bpt.inst_length);
907		assert(error == 0);
908		error = vm_inject_exception(ctx, vcpu, IDT_BP, 0, 0, 0);
909		assert(error == 0);
910	}
911	pthread_mutex_unlock(&gdb_lock);
912}
913
914static bool
915gdb_step_vcpu(int vcpu)
916{
917	int error, val;
918
919	debug("$vCPU %d step\n", vcpu);
920	error = vm_get_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, &val);
921	if (error < 0)
922		return (false);
923
924	discard_stop();
925	vcpu_state[vcpu].stepping = true;
926	vm_resume_cpu(ctx, vcpu);
927	CPU_CLR(vcpu, &vcpus_suspended);
928	pthread_cond_broadcast(&idle_vcpus);
929	return (true);
930}
931
932static void
933gdb_resume_vcpus(void)
934{
935
936	assert(pthread_mutex_isowned_np(&gdb_lock));
937	vm_resume_cpu(ctx, -1);
938	debug("resuming all CPUs\n");
939	CPU_ZERO(&vcpus_suspended);
940	pthread_cond_broadcast(&idle_vcpus);
941}
942
943static void
944gdb_read_regs(void)
945{
946	uint64_t regvals[nitems(gdb_regset)];
947	int i;
948
949	if (vm_get_register_set(ctx, cur_vcpu, nitems(gdb_regset),
950	    gdb_regset, regvals) == -1) {
951		send_error(errno);
952		return;
953	}
954	start_packet();
955	for (i = 0; i < nitems(regvals); i++)
956		append_unsigned_native(regvals[i], gdb_regsize[i]);
957	finish_packet();
958}
959
960static void
961gdb_read_mem(const uint8_t *data, size_t len)
962{
963	uint64_t gpa, gva, val;
964	uint8_t *cp;
965	size_t resid, todo, bytes;
966	bool started;
967	int error;
968
969	/* Skip 'm' */
970	data += 1;
971	len -= 1;
972
973	/* Parse and consume address. */
974	cp = memchr(data, ',', len);
975	if (cp == NULL || cp == data) {
976		send_error(EINVAL);
977		return;
978	}
979	gva = parse_integer(data, cp - data);
980	len -= (cp - data) + 1;
981	data += (cp - data) + 1;
982
983	/* Parse length. */
984	resid = parse_integer(data, len);
985
986	started = false;
987	while (resid > 0) {
988		error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
989		if (error == -1) {
990			if (started)
991				finish_packet();
992			else
993				send_error(errno);
994			return;
995		}
996		if (error == 0) {
997			if (started)
998				finish_packet();
999			else
1000				send_error(EFAULT);
1001			return;
1002		}
1003
1004		/* Read bytes from current page. */
1005		todo = getpagesize() - gpa % getpagesize();
1006		if (todo > resid)
1007			todo = resid;
1008
1009		cp = paddr_guest2host(ctx, gpa, todo);
1010		if (cp != NULL) {
1011			/*
1012			 * If this page is guest RAM, read it a byte
1013			 * at a time.
1014			 */
1015			if (!started) {
1016				start_packet();
1017				started = true;
1018			}
1019			while (todo > 0) {
1020				append_byte(*cp);
1021				cp++;
1022				gpa++;
1023				gva++;
1024				resid--;
1025				todo--;
1026			}
1027		} else {
1028			/*
1029			 * If this page isn't guest RAM, try to handle
1030			 * it via MMIO.  For MMIO requests, use
1031			 * aligned reads of words when possible.
1032			 */
1033			while (todo > 0) {
1034				if (gpa & 1 || todo == 1)
1035					bytes = 1;
1036				else if (gpa & 2 || todo == 2)
1037					bytes = 2;
1038				else
1039					bytes = 4;
1040				error = read_mem(ctx, cur_vcpu, gpa, &val,
1041				    bytes);
1042				if (error == 0) {
1043					if (!started) {
1044						start_packet();
1045						started = true;
1046					}
1047					gpa += bytes;
1048					gva += bytes;
1049					resid -= bytes;
1050					todo -= bytes;
1051					while (bytes > 0) {
1052						append_byte(val);
1053						val >>= 8;
1054						bytes--;
1055					}
1056				} else {
1057					if (started)
1058						finish_packet();
1059					else
1060						send_error(EFAULT);
1061					return;
1062				}
1063			}
1064		}
1065		assert(resid == 0 || gpa % getpagesize() == 0);
1066	}
1067	if (!started)
1068		start_packet();
1069	finish_packet();
1070}
1071
1072static void
1073gdb_write_mem(const uint8_t *data, size_t len)
1074{
1075	uint64_t gpa, gva, val;
1076	uint8_t *cp;
1077	size_t resid, todo, bytes;
1078	int error;
1079
1080	/* Skip 'M' */
1081	data += 1;
1082	len -= 1;
1083
1084	/* Parse and consume address. */
1085	cp = memchr(data, ',', len);
1086	if (cp == NULL || cp == data) {
1087		send_error(EINVAL);
1088		return;
1089	}
1090	gva = parse_integer(data, cp - data);
1091	len -= (cp - data) + 1;
1092	data += (cp - data) + 1;
1093
1094	/* Parse and consume length. */
1095	cp = memchr(data, ':', len);
1096	if (cp == NULL || cp == data) {
1097		send_error(EINVAL);
1098		return;
1099	}
1100	resid = parse_integer(data, cp - data);
1101	len -= (cp - data) + 1;
1102	data += (cp - data) + 1;
1103
1104	/* Verify the available bytes match the length. */
1105	if (len != resid * 2) {
1106		send_error(EINVAL);
1107		return;
1108	}
1109
1110	while (resid > 0) {
1111		error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
1112		if (error == -1) {
1113			send_error(errno);
1114			return;
1115		}
1116		if (error == 0) {
1117			send_error(EFAULT);
1118			return;
1119		}
1120
1121		/* Write bytes to current page. */
1122		todo = getpagesize() - gpa % getpagesize();
1123		if (todo > resid)
1124			todo = resid;
1125
1126		cp = paddr_guest2host(ctx, gpa, todo);
1127		if (cp != NULL) {
1128			/*
1129			 * If this page is guest RAM, write it a byte
1130			 * at a time.
1131			 */
1132			while (todo > 0) {
1133				assert(len >= 2);
1134				*cp = parse_byte(data);
1135				data += 2;
1136				len -= 2;
1137				cp++;
1138				gpa++;
1139				gva++;
1140				resid--;
1141				todo--;
1142			}
1143		} else {
1144			/*
1145			 * If this page isn't guest RAM, try to handle
1146			 * it via MMIO.  For MMIO requests, use
1147			 * aligned writes of words when possible.
1148			 */
1149			while (todo > 0) {
1150				if (gpa & 1 || todo == 1) {
1151					bytes = 1;
1152					val = parse_byte(data);
1153				} else if (gpa & 2 || todo == 2) {
1154					bytes = 2;
1155					val = be16toh(parse_integer(data, 4));
1156				} else {
1157					bytes = 4;
1158					val = be32toh(parse_integer(data, 8));
1159				}
1160				error = write_mem(ctx, cur_vcpu, gpa, val,
1161				    bytes);
1162				if (error == 0) {
1163					gpa += bytes;
1164					gva += bytes;
1165					resid -= bytes;
1166					todo -= bytes;
1167					data += 2 * bytes;
1168					len -= 2 * bytes;
1169				} else {
1170					send_error(EFAULT);
1171					return;
1172				}
1173			}
1174		}
1175		assert(resid == 0 || gpa % getpagesize() == 0);
1176	}
1177	assert(len == 0);
1178	send_ok();
1179}
1180
1181static bool
1182set_breakpoint_caps(bool enable)
1183{
1184	cpuset_t mask;
1185	int vcpu;
1186
1187	mask = vcpus_active;
1188	while (!CPU_EMPTY(&mask)) {
1189		vcpu = CPU_FFS(&mask) - 1;
1190		CPU_CLR(vcpu, &mask);
1191		if (vm_set_capability(ctx, vcpu, VM_CAP_BPT_EXIT,
1192		    enable ? 1 : 0) < 0)
1193			return (false);
1194		debug("$vCPU %d %sabled breakpoint exits\n", vcpu,
1195		    enable ? "en" : "dis");
1196	}
1197	return (true);
1198}
1199
1200static void
1201remove_all_sw_breakpoints(void)
1202{
1203	struct breakpoint *bp, *nbp;
1204	uint8_t *cp;
1205
1206	if (TAILQ_EMPTY(&breakpoints))
1207		return;
1208
1209	TAILQ_FOREACH_SAFE(bp, &breakpoints, link, nbp) {
1210		debug("remove breakpoint at %#lx\n", bp->gpa);
1211		cp = paddr_guest2host(ctx, bp->gpa, 1);
1212		*cp = bp->shadow_inst;
1213		TAILQ_REMOVE(&breakpoints, bp, link);
1214		free(bp);
1215	}
1216	TAILQ_INIT(&breakpoints);
1217	set_breakpoint_caps(false);
1218}
1219
1220static void
1221update_sw_breakpoint(uint64_t gva, int kind, bool insert)
1222{
1223	struct breakpoint *bp;
1224	uint64_t gpa;
1225	uint8_t *cp;
1226	int error;
1227
1228	if (kind != 1) {
1229		send_error(EINVAL);
1230		return;
1231	}
1232
1233	error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
1234	if (error == -1) {
1235		send_error(errno);
1236		return;
1237	}
1238	if (error == 0) {
1239		send_error(EFAULT);
1240		return;
1241	}
1242
1243	cp = paddr_guest2host(ctx, gpa, 1);
1244
1245	/* Only permit breakpoints in guest RAM. */
1246	if (cp == NULL) {
1247		send_error(EFAULT);
1248		return;
1249	}
1250
1251	/* Find any existing breakpoint. */
1252	bp = find_breakpoint(gpa);
1253
1254	/*
1255	 * Silently ignore duplicate commands since the protocol
1256	 * requires these packets to be idempotent.
1257	 */
1258	if (insert) {
1259		if (bp == NULL) {
1260			if (TAILQ_EMPTY(&breakpoints) &&
1261			    !set_breakpoint_caps(true)) {
1262				send_empty_response();
1263				return;
1264			}
1265			bp = malloc(sizeof(*bp));
1266			bp->gpa = gpa;
1267			bp->shadow_inst = *cp;
1268			*cp = 0xcc;	/* INT 3 */
1269			TAILQ_INSERT_TAIL(&breakpoints, bp, link);
1270			debug("new breakpoint at %#lx\n", gpa);
1271		}
1272	} else {
1273		if (bp != NULL) {
1274			debug("remove breakpoint at %#lx\n", gpa);
1275			*cp = bp->shadow_inst;
1276			TAILQ_REMOVE(&breakpoints, bp, link);
1277			free(bp);
1278			if (TAILQ_EMPTY(&breakpoints))
1279				set_breakpoint_caps(false);
1280		}
1281	}
1282	send_ok();
1283}
1284
1285static void
1286parse_breakpoint(const uint8_t *data, size_t len)
1287{
1288	uint64_t gva;
1289	uint8_t *cp;
1290	bool insert;
1291	int kind, type;
1292
1293	insert = data[0] == 'Z';
1294
1295	/* Skip 'Z/z' */
1296	data += 1;
1297	len -= 1;
1298
1299	/* Parse and consume type. */
1300	cp = memchr(data, ',', len);
1301	if (cp == NULL || cp == data) {
1302		send_error(EINVAL);
1303		return;
1304	}
1305	type = parse_integer(data, cp - data);
1306	len -= (cp - data) + 1;
1307	data += (cp - data) + 1;
1308
1309	/* Parse and consume address. */
1310	cp = memchr(data, ',', len);
1311	if (cp == NULL || cp == data) {
1312		send_error(EINVAL);
1313		return;
1314	}
1315	gva = parse_integer(data, cp - data);
1316	len -= (cp - data) + 1;
1317	data += (cp - data) + 1;
1318
1319	/* Parse and consume kind. */
1320	cp = memchr(data, ';', len);
1321	if (cp == data) {
1322		send_error(EINVAL);
1323		return;
1324	}
1325	if (cp != NULL) {
1326		/*
1327		 * We do not advertise support for either the
1328		 * ConditionalBreakpoints or BreakpointCommands
1329		 * features, so we should not be getting conditions or
1330		 * commands from the remote end.
1331		 */
1332		send_empty_response();
1333		return;
1334	}
1335	kind = parse_integer(data, len);
1336	data += len;
1337	len = 0;
1338
1339	switch (type) {
1340	case 0:
1341		update_sw_breakpoint(gva, kind, insert);
1342		break;
1343	default:
1344		send_empty_response();
1345		break;
1346	}
1347}
1348
1349static bool
1350command_equals(const uint8_t *data, size_t len, const char *cmd)
1351{
1352
1353	if (strlen(cmd) > len)
1354		return (false);
1355	return (memcmp(data, cmd, strlen(cmd)) == 0);
1356}
1357
1358static void
1359check_features(const uint8_t *data, size_t len)
1360{
1361	char *feature, *next_feature, *str, *value;
1362	bool supported;
1363
1364	str = malloc(len + 1);
1365	memcpy(str, data, len);
1366	str[len] = '\0';
1367	next_feature = str;
1368
1369	while ((feature = strsep(&next_feature, ";")) != NULL) {
1370		/*
1371		 * Null features shouldn't exist, but skip if they
1372		 * do.
1373		 */
1374		if (strcmp(feature, "") == 0)
1375			continue;
1376
1377		/*
1378		 * Look for the value or supported / not supported
1379		 * flag.
1380		 */
1381		value = strchr(feature, '=');
1382		if (value != NULL) {
1383			*value = '\0';
1384			value++;
1385			supported = true;
1386		} else {
1387			value = feature + strlen(feature) - 1;
1388			switch (*value) {
1389			case '+':
1390				supported = true;
1391				break;
1392			case '-':
1393				supported = false;
1394				break;
1395			default:
1396				/*
1397				 * This is really a protocol error,
1398				 * but we just ignore malformed
1399				 * features for ease of
1400				 * implementation.
1401				 */
1402				continue;
1403			}
1404			value = NULL;
1405		}
1406
1407		if (strcmp(feature, "swbreak") == 0)
1408			swbreak_enabled = supported;
1409	}
1410	free(str);
1411
1412	start_packet();
1413
1414	/* This is an arbitrary limit. */
1415	append_string("PacketSize=4096");
1416	append_string(";swbreak+");
1417	finish_packet();
1418}
1419
1420static void
1421gdb_query(const uint8_t *data, size_t len)
1422{
1423
1424	/*
1425	 * TODO:
1426	 * - qSearch
1427	 */
1428	if (command_equals(data, len, "qAttached")) {
1429		start_packet();
1430		append_char('1');
1431		finish_packet();
1432	} else if (command_equals(data, len, "qC")) {
1433		start_packet();
1434		append_string("QC");
1435		append_integer(cur_vcpu + 1);
1436		finish_packet();
1437	} else if (command_equals(data, len, "qfThreadInfo")) {
1438		cpuset_t mask;
1439		bool first;
1440		int vcpu;
1441
1442		if (CPU_EMPTY(&vcpus_active)) {
1443			send_error(EINVAL);
1444			return;
1445		}
1446		mask = vcpus_active;
1447		start_packet();
1448		append_char('m');
1449		first = true;
1450		while (!CPU_EMPTY(&mask)) {
1451			vcpu = CPU_FFS(&mask) - 1;
1452			CPU_CLR(vcpu, &mask);
1453			if (first)
1454				first = false;
1455			else
1456				append_char(',');
1457			append_integer(vcpu + 1);
1458		}
1459		finish_packet();
1460	} else if (command_equals(data, len, "qsThreadInfo")) {
1461		start_packet();
1462		append_char('l');
1463		finish_packet();
1464	} else if (command_equals(data, len, "qSupported")) {
1465		data += strlen("qSupported");
1466		len -= strlen("qSupported");
1467		check_features(data, len);
1468	} else if (command_equals(data, len, "qThreadExtraInfo")) {
1469		char buf[16];
1470		int tid;
1471
1472		data += strlen("qThreadExtraInfo");
1473		len -= strlen("qThreadExtraInfo");
1474		if (*data != ',') {
1475			send_error(EINVAL);
1476			return;
1477		}
1478		tid = parse_threadid(data + 1, len - 1);
1479		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1480			send_error(EINVAL);
1481			return;
1482		}
1483
1484		snprintf(buf, sizeof(buf), "vCPU %d", tid - 1);
1485		start_packet();
1486		append_asciihex(buf);
1487		finish_packet();
1488	} else
1489		send_empty_response();
1490}
1491
1492static void
1493handle_command(const uint8_t *data, size_t len)
1494{
1495
1496	/* Reject packets with a sequence-id. */
1497	if (len >= 3 && data[0] >= '0' && data[0] <= '9' &&
1498	    data[0] >= '0' && data[0] <= '9' && data[2] == ':') {
1499		send_empty_response();
1500		return;
1501	}
1502
1503	switch (*data) {
1504	case 'c':
1505		if (len != 1) {
1506			send_error(EINVAL);
1507			break;
1508		}
1509
1510		discard_stop();
1511		gdb_resume_vcpus();
1512		break;
1513	case 'D':
1514		send_ok();
1515
1516		/* TODO: Resume any stopped CPUs. */
1517		break;
1518	case 'g': {
1519		gdb_read_regs();
1520		break;
1521	}
1522	case 'H': {
1523		int tid;
1524
1525		if (data[1] != 'g' && data[1] != 'c') {
1526			send_error(EINVAL);
1527			break;
1528		}
1529		tid = parse_threadid(data + 2, len - 2);
1530		if (tid == -2) {
1531			send_error(EINVAL);
1532			break;
1533		}
1534
1535		if (CPU_EMPTY(&vcpus_active)) {
1536			send_error(EINVAL);
1537			break;
1538		}
1539		if (tid == -1 || tid == 0)
1540			cur_vcpu = CPU_FFS(&vcpus_active) - 1;
1541		else if (CPU_ISSET(tid - 1, &vcpus_active))
1542			cur_vcpu = tid - 1;
1543		else {
1544			send_error(EINVAL);
1545			break;
1546		}
1547		send_ok();
1548		break;
1549	}
1550	case 'm':
1551		gdb_read_mem(data, len);
1552		break;
1553	case 'M':
1554		gdb_write_mem(data, len);
1555		break;
1556	case 'T': {
1557		int tid;
1558
1559		tid = parse_threadid(data + 1, len - 1);
1560		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1561			send_error(EINVAL);
1562			return;
1563		}
1564		send_ok();
1565		break;
1566	}
1567	case 'q':
1568		gdb_query(data, len);
1569		break;
1570	case 's':
1571		if (len != 1) {
1572			send_error(EINVAL);
1573			break;
1574		}
1575
1576		/* Don't send a reply until a stop occurs. */
1577		if (!gdb_step_vcpu(cur_vcpu)) {
1578			send_error(EOPNOTSUPP);
1579			break;
1580		}
1581		break;
1582	case 'z':
1583	case 'Z':
1584		parse_breakpoint(data, len);
1585		break;
1586	case '?':
1587		report_stop(false);
1588		break;
1589	case 'G': /* TODO */
1590	case 'v':
1591		/* Handle 'vCont' */
1592		/* 'vCtrlC' */
1593	case 'p': /* TODO */
1594	case 'P': /* TODO */
1595	case 'Q': /* TODO */
1596	case 't': /* TODO */
1597	case 'X': /* TODO */
1598	default:
1599		send_empty_response();
1600	}
1601}
1602
1603/* Check for a valid packet in the command buffer. */
1604static void
1605check_command(int fd)
1606{
1607	uint8_t *head, *hash, *p, sum;
1608	size_t avail, plen;
1609
1610	for (;;) {
1611		avail = cur_comm.len;
1612		if (avail == 0)
1613			return;
1614		head = io_buffer_head(&cur_comm);
1615		switch (*head) {
1616		case 0x03:
1617			debug("<- Ctrl-C\n");
1618			io_buffer_consume(&cur_comm, 1);
1619
1620			gdb_suspend_vcpus();
1621			break;
1622		case '+':
1623			/* ACK of previous response. */
1624			debug("<- +\n");
1625			if (response_pending())
1626				io_buffer_reset(&cur_resp);
1627			io_buffer_consume(&cur_comm, 1);
1628			if (stopped_vcpu != -1 && report_next_stop) {
1629				report_stop(true);
1630				send_pending_data(fd);
1631			}
1632			break;
1633		case '-':
1634			/* NACK of previous response. */
1635			debug("<- -\n");
1636			if (response_pending()) {
1637				cur_resp.len += cur_resp.start;
1638				cur_resp.start = 0;
1639				if (cur_resp.data[0] == '+')
1640					io_buffer_advance(&cur_resp, 1);
1641				debug("-> %.*s\n", (int)cur_resp.len,
1642				    io_buffer_head(&cur_resp));
1643			}
1644			io_buffer_consume(&cur_comm, 1);
1645			send_pending_data(fd);
1646			break;
1647		case '$':
1648			/* Packet. */
1649
1650			if (response_pending()) {
1651				warnx("New GDB command while response in "
1652				    "progress");
1653				io_buffer_reset(&cur_resp);
1654			}
1655
1656			/* Is packet complete? */
1657			hash = memchr(head, '#', avail);
1658			if (hash == NULL)
1659				return;
1660			plen = (hash - head + 1) + 2;
1661			if (avail < plen)
1662				return;
1663			debug("<- %.*s\n", (int)plen, head);
1664
1665			/* Verify checksum. */
1666			for (sum = 0, p = head + 1; p < hash; p++)
1667				sum += *p;
1668			if (sum != parse_byte(hash + 1)) {
1669				io_buffer_consume(&cur_comm, plen);
1670				debug("-> -\n");
1671				send_char('-');
1672				send_pending_data(fd);
1673				break;
1674			}
1675			send_char('+');
1676
1677			handle_command(head + 1, hash - (head + 1));
1678			io_buffer_consume(&cur_comm, plen);
1679			if (!response_pending())
1680				debug("-> +\n");
1681			send_pending_data(fd);
1682			break;
1683		default:
1684			/* XXX: Possibly drop connection instead. */
1685			debug("-> %02x\n", *head);
1686			io_buffer_consume(&cur_comm, 1);
1687			break;
1688		}
1689	}
1690}
1691
1692static void
1693gdb_readable(int fd, enum ev_type event, void *arg)
1694{
1695	ssize_t nread;
1696	int pending;
1697
1698	if (ioctl(fd, FIONREAD, &pending) == -1) {
1699		warn("FIONREAD on GDB socket");
1700		return;
1701	}
1702
1703	/*
1704	 * 'pending' might be zero due to EOF.  We need to call read
1705	 * with a non-zero length to detect EOF.
1706	 */
1707	if (pending == 0)
1708		pending = 1;
1709
1710	/* Ensure there is room in the command buffer. */
1711	io_buffer_grow(&cur_comm, pending);
1712	assert(io_buffer_avail(&cur_comm) >= pending);
1713
1714	nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm));
1715	if (nread == 0) {
1716		close_connection();
1717	} else if (nread == -1) {
1718		if (errno == EAGAIN)
1719			return;
1720
1721		warn("Read from GDB socket");
1722		close_connection();
1723	} else {
1724		cur_comm.len += nread;
1725		pthread_mutex_lock(&gdb_lock);
1726		check_command(fd);
1727		pthread_mutex_unlock(&gdb_lock);
1728	}
1729}
1730
1731static void
1732gdb_writable(int fd, enum ev_type event, void *arg)
1733{
1734
1735	send_pending_data(fd);
1736}
1737
1738static void
1739new_connection(int fd, enum ev_type event, void *arg)
1740{
1741	int optval, s;
1742
1743	s = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
1744	if (s == -1) {
1745		if (arg != NULL)
1746			err(1, "Failed accepting initial GDB connection");
1747
1748		/* Silently ignore errors post-startup. */
1749		return;
1750	}
1751
1752	optval = 1;
1753	if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) ==
1754	    -1) {
1755		warn("Failed to disable SIGPIPE for GDB connection");
1756		close(s);
1757		return;
1758	}
1759
1760	pthread_mutex_lock(&gdb_lock);
1761	if (cur_fd != -1) {
1762		close(s);
1763		warnx("Ignoring additional GDB connection.");
1764	}
1765
1766	read_event = mevent_add(s, EVF_READ, gdb_readable, NULL);
1767	if (read_event == NULL) {
1768		if (arg != NULL)
1769			err(1, "Failed to setup initial GDB connection");
1770		pthread_mutex_unlock(&gdb_lock);
1771		return;
1772	}
1773	write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL);
1774	if (write_event == NULL) {
1775		if (arg != NULL)
1776			err(1, "Failed to setup initial GDB connection");
1777		mevent_delete_close(read_event);
1778		read_event = NULL;
1779	}
1780
1781	cur_fd = s;
1782	cur_vcpu = 0;
1783	stopped_vcpu = -1;
1784
1785	/* Break on attach. */
1786	first_stop = true;
1787	report_next_stop = false;
1788	gdb_suspend_vcpus();
1789	pthread_mutex_unlock(&gdb_lock);
1790}
1791
1792#ifndef WITHOUT_CAPSICUM
1793void
1794limit_gdb_socket(int s)
1795{
1796	cap_rights_t rights;
1797	unsigned long ioctls[] = { FIONREAD };
1798
1799	cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE,
1800	    CAP_SETSOCKOPT, CAP_IOCTL);
1801	if (caph_rights_limit(s, &rights) == -1)
1802		errx(EX_OSERR, "Unable to apply rights for sandbox");
1803	if (caph_ioctls_limit(s, ioctls, nitems(ioctls)) == -1)
1804		errx(EX_OSERR, "Unable to apply rights for sandbox");
1805}
1806#endif
1807
1808void
1809init_gdb(struct vmctx *_ctx, int sport, bool wait)
1810{
1811	struct sockaddr_in sin;
1812	int error, flags, optval, s;
1813
1814	debug("==> starting on %d, %swaiting\n", sport, wait ? "" : "not ");
1815
1816	error = pthread_mutex_init(&gdb_lock, NULL);
1817	if (error != 0)
1818		errc(1, error, "gdb mutex init");
1819	error = pthread_cond_init(&idle_vcpus, NULL);
1820	if (error != 0)
1821		errc(1, error, "gdb cv init");
1822
1823	ctx = _ctx;
1824	s = socket(PF_INET, SOCK_STREAM, 0);
1825	if (s < 0)
1826		err(1, "gdb socket create");
1827
1828	optval = 1;
1829	(void)setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval));
1830
1831	sin.sin_len = sizeof(sin);
1832	sin.sin_family = AF_INET;
1833	sin.sin_addr.s_addr = htonl(INADDR_ANY);
1834	sin.sin_port = htons(sport);
1835
1836	if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0)
1837		err(1, "gdb socket bind");
1838
1839	if (listen(s, 1) < 0)
1840		err(1, "gdb socket listen");
1841
1842	stopped_vcpu = -1;
1843	TAILQ_INIT(&breakpoints);
1844	vcpu_state = calloc(guest_ncpus, sizeof(*vcpu_state));
1845	if (wait) {
1846		/*
1847		 * Set vcpu 0 in vcpus_suspended.  This will trigger the
1848		 * logic in gdb_cpu_add() to suspend the first vcpu before
1849		 * it starts execution.  The vcpu will remain suspended
1850		 * until a debugger connects.
1851		 */
1852		CPU_SET(0, &vcpus_suspended);
1853		stopped_vcpu = 0;
1854	}
1855
1856	flags = fcntl(s, F_GETFL);
1857	if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
1858		err(1, "Failed to mark gdb socket non-blocking");
1859
1860#ifndef WITHOUT_CAPSICUM
1861	limit_gdb_socket(s);
1862#endif
1863	mevent_add(s, EVF_READ, new_connection, NULL);
1864}
1865