1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#ifndef	_SYS_MACHPRIVREGS_H
28#define	_SYS_MACHPRIVREGS_H
29
30#include <sys/hypervisor.h>
31
32/*
33 * Platform dependent instruction sequences for manipulating
34 * privileged state
35 */
36
37#ifdef __cplusplus
38extern "C" {
39#endif
40
41/*
42 * CLI and STI are quite complex to virtualize!
43 */
44
45#if defined(__amd64)
46
47#define	CURVCPU(r)					\
48	movq	%gs:CPU_VCPU_INFO, r
49
50#define	CURTHREAD(r)					\
51	movq	%gs:CPU_THREAD, r
52
53#elif defined(__i386)
54
55#define	CURVCPU(r)					\
56	movl	%gs:CPU_VCPU_INFO, r
57
58#define	CURTHREAD(r)					\
59	movl	%gs:CPU_THREAD, r
60
61#endif	/* __i386 */
62
63#define	XEN_TEST_EVENT_PENDING(r)			\
64	testb	$0xff, VCPU_INFO_EVTCHN_UPCALL_PENDING(r)
65
66#define	XEN_SET_UPCALL_MASK(r)				\
67	movb	$1, VCPU_INFO_EVTCHN_UPCALL_MASK(r)
68
69#define	XEN_GET_UPCALL_MASK(r, mask)			\
70	movb	VCPU_INFO_EVTCHN_UPCALL_MASK(r), mask
71
72#define	XEN_TEST_UPCALL_MASK(r)				\
73	testb	$1, VCPU_INFO_EVTCHN_UPCALL_MASK(r)
74
75#define	XEN_CLEAR_UPCALL_MASK(r)			\
76	ASSERT_UPCALL_MASK_IS_SET;			\
77	movb	$0, VCPU_INFO_EVTCHN_UPCALL_MASK(r)
78
79#ifdef DEBUG
80
81/*
82 * Much logic depends on the upcall mask being set at
83 * various points in the code; use this macro to validate.
84 *
85 * Need to use CURVCPU(r) to establish the vcpu pointer.
86 */
87#if defined(__amd64)
88
89#define	ASSERT_UPCALL_MASK_IS_SET			\
90	pushq	%r11;					\
91	CURVCPU(%r11);					\
92	XEN_TEST_UPCALL_MASK(%r11);			\
93	jne	6f;					\
94	cmpl	$0, stistipanic(%rip);			\
95	jle	6f;					\
96	movl	$-1, stistipanic(%rip);			\
97	movq	stistimsg(%rip), %rdi;			\
98	xorl	%eax, %eax;				\
99	call	panic;					\
1006:	pushq	%rax;					\
101	pushq	%rbx;					\
102	movl	%gs:CPU_ID, %eax;			\
103	leaq	.+0(%rip), %r11;			\
104	leaq	laststi(%rip), %rbx;			\
105	movq	%r11, (%rbx, %rax, 8);			\
106	popq	%rbx;					\
107	popq	%rax;					\
108	popq	%r11
109
110#define	SAVE_CLI_LOCATION				\
111	pushq	%rax;					\
112	pushq	%rbx;					\
113	pushq	%rcx;					\
114	movl	%gs:CPU_ID, %eax;			\
115	leaq	.+0(%rip), %rcx;			\
116	leaq	lastcli, %rbx;				\
117	movq	%rcx, (%rbx, %rax, 8);			\
118	popq	%rcx;					\
119	popq	%rbx;					\
120	popq	%rax;					\
121
122#elif defined(__i386)
123
124#define	ASSERT_UPCALL_MASK_IS_SET			\
125	pushl	%ecx;					\
126	CURVCPU(%ecx);					\
127	XEN_TEST_UPCALL_MASK(%ecx);			\
128	jne	6f;					\
129	cmpl	$0, stistipanic;			\
130	jle	6f;					\
131	movl	$-1, stistipanic;			\
132	movl	stistimsg, %ecx;			\
133	pushl	%ecx;					\
134	call	panic;					\
1356:	pushl	%eax;					\
136	pushl	%ebx;					\
137	movl	%gs:CPU_ID, %eax;			\
138	leal	.+0, %ecx;				\
139	leal	laststi, %ebx;				\
140	movl	%ecx, (%ebx, %eax, 4);			\
141	popl	%ebx;					\
142	popl	%eax;					\
143	popl	%ecx
144
145#define	SAVE_CLI_LOCATION				\
146	pushl	%eax;					\
147	pushl	%ebx;					\
148	pushl	%ecx;					\
149	movl	%gs:CPU_ID, %eax;			\
150	leal	.+0, %ecx;				\
151	leal	lastcli, %ebx;				\
152	movl	%ecx, (%ebx, %eax, 4);			\
153	popl	%ecx;					\
154	popl	%ebx;					\
155	popl	%eax;					\
156
157#endif	/* __i386 */
158
159#else	/* DEBUG */
160
161#define	ASSERT_UPCALL_MASK_IS_SET	/* empty */
162#define	SAVE_CLI_LOCATION		/* empty */
163
164#endif	/* DEBUG */
165
166#define	KPREEMPT_DISABLE(t)				\
167	addb	$1, T_PREEMPT(t)
168
169#define	KPREEMPT_ENABLE_NOKP(t)				\
170	subb	$1, T_PREEMPT(t)
171
172#define	CLI(r)						\
173	CURTHREAD(r);					\
174	KPREEMPT_DISABLE(r);				\
175	CURVCPU(r);					\
176	XEN_SET_UPCALL_MASK(r);				\
177	SAVE_CLI_LOCATION;				\
178	CURTHREAD(r);					\
179	KPREEMPT_ENABLE_NOKP(r)
180
181#define	CLIRET(r, ret)					\
182	CURTHREAD(r);					\
183	KPREEMPT_DISABLE(r);				\
184	CURVCPU(r);					\
185	XEN_GET_UPCALL_MASK(r, ret);			\
186	XEN_SET_UPCALL_MASK(r);				\
187	SAVE_CLI_LOCATION;				\
188	CURTHREAD(r);					\
189	KPREEMPT_ENABLE_NOKP(r)
190
191/*
192 * We use the fact that HYPERVISOR_block will clear the upcall mask
193 * for us and then give us an upcall if there is a pending event
194 * to achieve getting a callback on this cpu without the danger of
195 * being preempted and migrating to another cpu between the upcall
196 * enable and the callback delivery.
197 */
198#if defined(__amd64)
199
200#define	STI_CLOBBER		/* clobbers %rax, %rdi, %r11 */		\
201	CURVCPU(%r11);							\
202	ASSERT_UPCALL_MASK_IS_SET;					\
203	movw	$0x100, %ax;	/* assume mask set, pending clear */	\
204	movw	$0, %di;	/* clear mask and pending */		\
205	lock;								\
206	cmpxchgw %di, VCPU_INFO_EVTCHN_UPCALL_PENDING(%r11);		\
207	jz	7f;		/* xchg worked, we're done */		\
208	movl	$__HYPERVISOR_sched_op, %eax; /* have pending upcall */	\
209	movl	$SCHEDOP_block, %edi;					\
210	pushq	%rsi;	/* hypercall clobbers C param regs plus r10 */	\
211	pushq	%rcx;							\
212	pushq	%rdx;							\
213	pushq	%r8;							\
214	pushq	%r9;							\
215	pushq	%r10;							\
216	TRAP_INSTR;	/* clear upcall mask, force upcall */ 		\
217	popq	%r10;							\
218	popq	%r9;							\
219	popq	%r8;							\
220	popq	%rdx;							\
221	popq	%rcx;							\
222	popq	%rsi;							\
2237:
224
225#define	STI								\
226	pushq	%r11;							\
227	pushq	%rdi;							\
228	pushq	%rax;							\
229	STI_CLOBBER;	/* clobbers %r11, %rax, %rdi */			\
230	popq	%rax;							\
231	popq	%rdi;							\
232	popq	%r11
233
234#elif defined(__i386)
235
236#define	STI_CLOBBER		/* clobbers %eax, %ebx, %ecx */		\
237	CURVCPU(%ecx);							\
238	ASSERT_UPCALL_MASK_IS_SET;					\
239	movw	$0x100, %ax;	/* assume mask set, pending clear */	\
240	movw	$0, %bx;	/* clear mask and pending */		\
241	lock;								\
242	cmpxchgw %bx, VCPU_INFO_EVTCHN_UPCALL_PENDING(%ecx);		\
243	jz	7f;		/* xchg worked, we're done */		\
244	movl	$__HYPERVISOR_sched_op, %eax; /* have pending upcall */	\
245	movl	$SCHEDOP_block, %ebx;					\
246	TRAP_INSTR;		/* clear upcall mask, force upcall */	\
2477:
248
249#define	STI						\
250	pushl	%eax;					\
251	pushl	%ebx;					\
252	pushl	%ecx;					\
253	STI_CLOBBER;	/* clobbers %eax, %ebx, %ecx */	\
254	popl	%ecx;					\
255	popl	%ebx;					\
256	popl	%eax
257
258#endif	/* __i386 */
259
260/*
261 * Map the PS_IE bit to the hypervisor's event mask bit
262 * To -set- the event mask, we have to do a CLI
263 * To -clear- the event mask, we have to do a STI
264 * (with all the accompanying pre-emption and callbacks, ick)
265 *
266 * And vice versa.
267 */
268
269#if defined(__amd64)
270
271#define	IE_TO_EVENT_MASK(rtmp, rfl)		\
272	testq	$PS_IE, rfl;			\
273	jnz	4f;				\
274	CLI(rtmp);				\
275	jmp	5f;				\
2764:	STI;					\
2775:
278
279#define	EVENT_MASK_TO_IE(rtmp, rfl)		\
280	andq	$_BITNOT(PS_IE), rfl;		\
281	CURVCPU(rtmp);				\
282	XEN_TEST_UPCALL_MASK(rtmp);		\
283	jnz	1f;				\
284	orq	$PS_IE, rfl;			\
2851:
286
287#elif defined(__i386)
288
289#define	IE_TO_EVENT_MASK(rtmp, rfl)		\
290	testl	$PS_IE, rfl;			\
291	jnz	4f;				\
292	CLI(rtmp);				\
293	jmp	5f;				\
2944:	STI;					\
2955:
296
297#define	EVENT_MASK_TO_IE(rtmp, rfl)		\
298	andl	$_BITNOT(PS_IE), rfl;		\
299	CURVCPU(rtmp);				\
300	XEN_TEST_UPCALL_MASK(rtmp);		\
301	jnz	1f;				\
302	orl	$PS_IE, rfl;			\
3031:
304
305#endif	/* __i386 */
306
307/*
308 * Used to re-enable interrupts in the body of exception handlers
309 */
310
311#if defined(__amd64)
312
313#define	ENABLE_INTR_FLAGS		\
314	pushq	$F_ON;			\
315	popfq;				\
316	STI
317
318#elif defined(__i386)
319
320#define	ENABLE_INTR_FLAGS		\
321	pushl	$F_ON;			\
322	popfl;				\
323	STI
324
325#endif	/* __i386 */
326
327/*
328 * Virtualize IRET and SYSRET
329 */
330
331#if defined(__amd64)
332
333#if defined(DEBUG)
334
335/*
336 * Die nastily with a #ud trap if we are about to switch to user
337 * mode in HYPERVISOR_IRET and RUPDATE_PENDING is set.
338 */
339#define	__ASSERT_NO_RUPDATE_PENDING			\
340	pushq	%r15;					\
341	cmpw	$KCS_SEL, 0x10(%rsp);			\
342	je	1f;					\
343	movq	%gs:CPU_THREAD, %r15;			\
344	movq	T_LWP(%r15), %r15;			\
345	testb	$0x1, PCB_RUPDATE(%r15);		\
346	je	1f;					\
347	ud2;						\
3481:	popq	%r15
349
350#else	/* DEBUG */
351
352#define	__ASSERT_NO_RUPDATE_PENDING
353
354#endif	/* DEBUG */
355
356/*
357 * Switching from guest kernel to user mode.
358 * flag == VGCF_IN_SYSCALL => return via sysret
359 * flag == 0 => return via iretq
360 *
361 * See definition in public/arch-x86_64.h. Stack going in must be:
362 * rax, r11, rcx, flags, rip, cs, rflags, rsp, ss.
363 */
364#define	HYPERVISOR_IRET(flag)			\
365	__ASSERT_NO_RUPDATE_PENDING;		\
366	pushq	$flag;				\
367	pushq	%rcx;				\
368	pushq	%r11;				\
369	pushq	%rax;				\
370	movl	$__HYPERVISOR_iret, %eax;	\
371	syscall;				\
372	ud2	/* die nastily if we return! */
373
374#define	IRET	HYPERVISOR_IRET(0)
375
376/*
377 * XXPV: Normally we would expect to use sysret to return from kernel to
378 *       user mode when using the syscall instruction. The iret hypercall
379 *       does support both iret and sysret semantics. For us to use sysret
380 *	 style would require that we use the hypervisor's private descriptors
381 *	 that obey syscall instruction's imposed segment selector ordering.
382 *	 With iret we can use whatever %cs value we choose. We should fix
383 *	 this to use sysret one day.
384 */
385#define	SYSRETQ	HYPERVISOR_IRET(0)
386#define	SYSRETL	ud2		/* 32-bit syscall/sysret not supported */
387#define	SWAPGS	/* empty - handled in hypervisor */
388
389#elif defined(__i386)
390
391/*
392 * Switching from guest kernel to user mode.
393 * See definition in public/arch-x86_32.h. Stack going in must be:
394 * eax, flags, eip, cs, eflags, esp, ss.
395 */
396#define	HYPERVISOR_IRET				\
397	pushl	%eax;				\
398	movl	$__HYPERVISOR_iret, %eax;	\
399	int	$0x82;				\
400	ud2	/* die nastily if we return! */
401
402#define	IRET	HYPERVISOR_IRET
403#define	SYSRET	ud2		/* 32-bit syscall/sysret not supported */
404
405#endif	/* __i386 */
406
407
408/*
409 * Xen 3.x wedges the current value of upcall_mask into unused byte of
410 * saved %cs on stack at the time of passing through a trap or interrupt
411 * gate.  Since Xen also updates PS_IE in %[e,r]lags as well, we always
412 * mask off the saved upcall mask so the kernel and/or tools like debuggers
413 * will not be confused about bits set in reserved portions of %cs slot.
414 *
415 * See xen/include/public/arch-x86_[32,64].h:cpu_user_regs_t for details.
416 */
417#if defined(__amd64)
418
419#define	CLEAN_CS	movb	$0, REGOFF_CS+4(%rsp)
420
421#elif defined(__i386)
422
423#define	CLEAN_CS	movb	$0, REGOFF_CS+2(%esp)
424
425#endif	/* __i386 */
426
427/*
428 * All exceptions for amd64 have %r11 and %rcx on the stack.
429 * Just pop them back into their appropriate registers and
430 * let it get saved as is running native.
431 */
432#if defined(__amd64)
433
434#define	XPV_TRAP_POP	\
435	popq	%rcx;	\
436	popq	%r11
437
438#define	XPV_TRAP_PUSH	\
439	pushq	%r11;	\
440	pushq	%rcx
441
442#endif	/* __amd64 */
443
444
445/*
446 * Macros for saving the original segment registers and restoring them
447 * for fast traps.
448 */
449#if defined(__amd64)
450
451/*
452 * Smaller versions of INTR_PUSH and INTR_POP for fast traps.
453 * The following registers have been pushed onto the stack by
454 * hardware at this point:
455 *
456 *	greg_t	r_rip;
457 *	greg_t	r_cs;
458 *	greg_t	r_rfl;
459 *	greg_t	r_rsp;
460 *	greg_t	r_ss;
461 *
462 * This handler is executed both by 32-bit and 64-bit applications.
463 * 64-bit applications allow us to treat the set (%rdi, %rsi, %rdx,
464 * %rcx, %r8, %r9, %r10, %r11, %rax) as volatile across function calls.
465 * However, 32-bit applications only expect (%eax, %edx, %ecx) to be volatile
466 * across a function call -- in particular, %esi and %edi MUST be saved!
467 *
468 * We could do this differently by making a FAST_INTR_PUSH32 for 32-bit
469 * programs, and FAST_INTR_PUSH for 64-bit programs, but it doesn't seem
470 * particularly worth it.
471 *
472 */
473#define	FAST_INTR_PUSH			\
474	INTGATE_INIT_KERNEL_FLAGS;	\
475	popq	%rcx;			\
476	popq	%r11;			\
477	subq    $REGOFF_RIP, %rsp;	\
478	movq    %rsi, REGOFF_RSI(%rsp);	\
479	movq    %rdi, REGOFF_RDI(%rsp);	\
480	CLEAN_CS
481
482#define	FAST_INTR_POP			\
483	movq    REGOFF_RSI(%rsp), %rsi;	\
484	movq    REGOFF_RDI(%rsp), %rdi;	\
485	addq    $REGOFF_RIP, %rsp
486
487#define	FAST_INTR_RETURN		\
488	ASSERT_UPCALL_MASK_IS_SET;	\
489	HYPERVISOR_IRET(0)
490
491#elif defined(__i386)
492
493#define	FAST_INTR_PUSH			\
494	cld;				\
495	__SEGREGS_PUSH			\
496	__SEGREGS_LOAD_KERNEL		\
497
498#define	FAST_INTR_POP			\
499	__SEGREGS_POP
500
501#define	FAST_INTR_RETURN		\
502	IRET
503
504#endif	/* __i386 */
505
506/*
507 * Handling the CR0.TS bit for floating point handling.
508 *
509 * When the TS bit is *set*, attempts to touch the floating
510 * point hardware will result in a #nm trap.
511 */
512#if defined(__amd64)
513
514#define	STTS(rtmp)				\
515	pushq	%rdi;				\
516	movl	$1, %edi;			\
517	call	HYPERVISOR_fpu_taskswitch;	\
518	popq	%rdi
519
520#define	CLTS					\
521	pushq	%rdi;				\
522	xorl	%edi, %edi;			\
523	call	HYPERVISOR_fpu_taskswitch;	\
524	popq	%rdi
525
526#elif defined(__i386)
527
528#define	STTS(r)					\
529	pushl	$1;				\
530	call	HYPERVISOR_fpu_taskswitch;	\
531	addl	$4, %esp
532
533#define	CLTS					\
534	pushl	$0;				\
535	call	HYPERVISOR_fpu_taskswitch;	\
536	addl	$4, %esp
537
538#endif	/* __i386 */
539
540#ifdef __cplusplus
541}
542#endif
543
544#endif	/* _SYS_MACHPRIVREGS_H */
545