1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28/*	  All Rights Reserved  	*/
29
30#include <sys/types.h>
31#include <sys/t_lock.h>
32#include <sys/param.h>
33#include <sys/cred.h>
34#include <sys/debug.h>
35#include <sys/inline.h>
36#include <sys/kmem.h>
37#include <sys/proc.h>
38#include <sys/regset.h>
39#include <sys/privregs.h>
40#include <sys/sysmacros.h>
41#include <sys/systm.h>
42#include <sys/vfs.h>
43#include <sys/vnode.h>
44#include <sys/psw.h>
45#include <sys/pcb.h>
46#include <sys/buf.h>
47#include <sys/signal.h>
48#include <sys/user.h>
49#include <sys/cpuvar.h>
50
51#include <sys/fault.h>
52#include <sys/syscall.h>
53#include <sys/procfs.h>
54#include <sys/cmn_err.h>
55#include <sys/stack.h>
56#include <sys/debugreg.h>
57#include <sys/copyops.h>
58
59#include <sys/vmem.h>
60#include <sys/mman.h>
61#include <sys/vmparam.h>
62#include <sys/fp.h>
63#include <sys/archsystm.h>
64#include <sys/vmsystm.h>
65#include <vm/hat.h>
66#include <vm/as.h>
67#include <vm/seg.h>
68#include <vm/seg_kmem.h>
69#include <vm/seg_kp.h>
70#include <vm/page.h>
71
72#include <sys/sysi86.h>
73
74#include <fs/proc/prdata.h>
75
76int	prnwatch = 10000;	/* maximum number of watched areas */
77
78/*
79 * Force a thread into the kernel if it is not already there.
80 * This is a no-op on uniprocessors.
81 */
82/* ARGSUSED */
83void
84prpokethread(kthread_t *t)
85{
86	if (t->t_state == TS_ONPROC && t->t_cpu != CPU)
87		poke_cpu(t->t_cpu->cpu_id);
88}
89
90/*
91 * Return general registers.
92 */
93void
94prgetprregs(klwp_t *lwp, prgregset_t prp)
95{
96	ASSERT(MUTEX_NOT_HELD(&lwptoproc(lwp)->p_lock));
97
98	getgregs(lwp, prp);
99}
100
101/*
102 * Set general registers.
103 * (Note: This can be an alias to setgregs().)
104 */
105void
106prsetprregs(klwp_t *lwp, prgregset_t prp, int initial)
107{
108	if (initial)		/* set initial values */
109		lwptoregs(lwp)->r_ps = PSL_USER;
110	(void) setgregs(lwp, prp);
111}
112
113#ifdef _SYSCALL32_IMPL
114
115/*
116 * Convert prgregset32 to native prgregset
117 */
118void
119prgregset_32ton(klwp_t *lwp, prgregset32_t src, prgregset_t dst)
120{
121	struct regs *rp = lwptoregs(lwp);
122
123	dst[REG_GSBASE] = lwp->lwp_pcb.pcb_gsbase;
124	dst[REG_FSBASE] = lwp->lwp_pcb.pcb_fsbase;
125
126	dst[REG_DS] = (uint16_t)src[DS];
127	dst[REG_ES] = (uint16_t)src[ES];
128
129	dst[REG_GS] = (uint16_t)src[GS];
130	dst[REG_FS] = (uint16_t)src[FS];
131	dst[REG_SS] = (uint16_t)src[SS];
132	dst[REG_RSP] = (uint32_t)src[UESP];
133	dst[REG_RFL] =
134	    (rp->r_ps & ~PSL_USERMASK) | (src[EFL] & PSL_USERMASK);
135	dst[REG_CS] = (uint16_t)src[CS];
136	dst[REG_RIP] = (uint32_t)src[EIP];
137	dst[REG_ERR] = (uint32_t)src[ERR];
138	dst[REG_TRAPNO] = (uint32_t)src[TRAPNO];
139	dst[REG_RAX] = (uint32_t)src[EAX];
140	dst[REG_RCX] = (uint32_t)src[ECX];
141	dst[REG_RDX] = (uint32_t)src[EDX];
142	dst[REG_RBX] = (uint32_t)src[EBX];
143	dst[REG_RBP] = (uint32_t)src[EBP];
144	dst[REG_RSI] = (uint32_t)src[ESI];
145	dst[REG_RDI] = (uint32_t)src[EDI];
146	dst[REG_R8] = dst[REG_R9] = dst[REG_R10] = dst[REG_R11] =
147	    dst[REG_R12] = dst[REG_R13] = dst[REG_R14] = dst[REG_R15] = 0;
148}
149
150/*
151 * Return 32-bit general registers
152 */
153void
154prgetprregs32(klwp_t *lwp, prgregset32_t prp)
155{
156	ASSERT(MUTEX_NOT_HELD(&lwptoproc(lwp)->p_lock));
157	getgregs32(lwp, prp);
158}
159
160#endif	/* _SYSCALL32_IMPL */
161
162/*
163 * Get the syscall return values for the lwp.
164 */
165int
166prgetrvals(klwp_t *lwp, long *rval1, long *rval2)
167{
168	struct regs *r = lwptoregs(lwp);
169
170	if (r->r_ps & PS_C)
171		return (r->r_r0);
172	if (lwp->lwp_eosys == JUSTRETURN) {
173		*rval1 = 0;
174		*rval2 = 0;
175	} else if (lwp_getdatamodel(lwp) != DATAMODEL_NATIVE) {
176		/*
177		 * XX64	Not sure we -really- need to do this, because the
178		 *	syscall return already masks off the bottom values ..?
179		 */
180		*rval1 = r->r_r0 & (uint32_t)0xffffffffu;
181		*rval2 = r->r_r1 & (uint32_t)0xffffffffu;
182	} else {
183		*rval1 = r->r_r0;
184		*rval2 = r->r_r1;
185	}
186	return (0);
187}
188
189/*
190 * Does the system support floating-point, either through hardware
191 * or by trapping and emulating floating-point machine instructions?
192 */
193int
194prhasfp(void)
195{
196	extern int fp_kind;
197
198	return (fp_kind != FP_NO);
199}
200
201/*
202 * Get floating-point registers.
203 */
204void
205prgetprfpregs(klwp_t *lwp, prfpregset_t *pfp)
206{
207	bzero(pfp, sizeof (prfpregset_t));
208	getfpregs(lwp, pfp);
209}
210
211#if defined(_SYSCALL32_IMPL)
212void
213prgetprfpregs32(klwp_t *lwp, prfpregset32_t *pfp)
214{
215	bzero(pfp, sizeof (*pfp));
216	getfpregs32(lwp, pfp);
217}
218#endif	/* _SYSCALL32_IMPL */
219
220/*
221 * Set floating-point registers.
222 * (Note: This can be an alias to setfpregs().)
223 */
224void
225prsetprfpregs(klwp_t *lwp, prfpregset_t *pfp)
226{
227	setfpregs(lwp, pfp);
228}
229
230#if defined(_SYSCALL32_IMPL)
231void
232prsetprfpregs32(klwp_t *lwp, prfpregset32_t *pfp)
233{
234	setfpregs32(lwp, pfp);
235}
236#endif	/* _SYSCALL32_IMPL */
237
238/*
239 * Does the system support extra register state?
240 */
241/* ARGSUSED */
242int
243prhasx(proc_t *p)
244{
245	return (0);
246}
247
248/*
249 * Get the size of the extra registers.
250 */
251/* ARGSUSED */
252int
253prgetprxregsize(proc_t *p)
254{
255	return (0);
256}
257
258/*
259 * Get extra registers.
260 */
261/*ARGSUSED*/
262void
263prgetprxregs(klwp_t *lwp, caddr_t prx)
264{
265	/* no extra registers */
266}
267
268/*
269 * Set extra registers.
270 */
271/*ARGSUSED*/
272void
273prsetprxregs(klwp_t *lwp, caddr_t prx)
274{
275	/* no extra registers */
276}
277
278/*
279 * Return the base (lower limit) of the process stack.
280 */
281caddr_t
282prgetstackbase(proc_t *p)
283{
284	return (p->p_usrstack - p->p_stksize);
285}
286
287/*
288 * Return the "addr" field for pr_addr in prpsinfo_t.
289 * This is a vestige of the past, so whatever we return is OK.
290 */
291caddr_t
292prgetpsaddr(proc_t *p)
293{
294	return ((caddr_t)p);
295}
296
297/*
298 * Arrange to single-step the lwp.
299 */
300void
301prstep(klwp_t *lwp, int watchstep)
302{
303	ASSERT(MUTEX_NOT_HELD(&lwptoproc(lwp)->p_lock));
304
305	/*
306	 * flag LWP so that its r_efl trace bit (PS_T) will be set on
307	 * next return to usermode.
308	 */
309	lwp->lwp_pcb.pcb_flags |= REQUEST_STEP;
310	lwp->lwp_pcb.pcb_flags &= ~REQUEST_NOSTEP;
311
312	if (watchstep)
313		lwp->lwp_pcb.pcb_flags |= WATCH_STEP;
314	else
315		lwp->lwp_pcb.pcb_flags |= NORMAL_STEP;
316
317	aston(lwptot(lwp));	/* let trap() set PS_T in rp->r_efl */
318}
319
320/*
321 * Undo prstep().
322 */
323void
324prnostep(klwp_t *lwp)
325{
326	ASSERT(ttolwp(curthread) == lwp ||
327	    MUTEX_NOT_HELD(&lwptoproc(lwp)->p_lock));
328
329	/*
330	 * flag LWP so that its r_efl trace bit (PS_T) will be cleared on
331	 * next return to usermode.
332	 */
333	lwp->lwp_pcb.pcb_flags |= REQUEST_NOSTEP;
334
335	lwp->lwp_pcb.pcb_flags &=
336	    ~(REQUEST_STEP|NORMAL_STEP|WATCH_STEP|DEBUG_PENDING);
337
338	aston(lwptot(lwp));	/* let trap() clear PS_T in rp->r_efl */
339}
340
341/*
342 * Return non-zero if a single-step is in effect.
343 */
344int
345prisstep(klwp_t *lwp)
346{
347	ASSERT(MUTEX_NOT_HELD(&lwptoproc(lwp)->p_lock));
348
349	return ((lwp->lwp_pcb.pcb_flags &
350	    (NORMAL_STEP|WATCH_STEP|DEBUG_PENDING)) != 0);
351}
352
353/*
354 * Set the PC to the specified virtual address.
355 */
356void
357prsvaddr(klwp_t *lwp, caddr_t vaddr)
358{
359	struct regs *r = lwptoregs(lwp);
360
361	ASSERT(MUTEX_NOT_HELD(&lwptoproc(lwp)->p_lock));
362
363	r->r_pc = (uintptr_t)vaddr;
364}
365
366/*
367 * Map address "addr" in address space "as" into a kernel virtual address.
368 * The memory is guaranteed to be resident and locked down.
369 */
370caddr_t
371prmapin(struct as *as, caddr_t addr, int writing)
372{
373	page_t *pp;
374	caddr_t kaddr;
375	pfn_t pfnum;
376
377	/*
378	 * XXX - Because of past mistakes, we have bits being returned
379	 * by getpfnum that are actually the page type bits of the pte.
380	 * When the object we are trying to map is a memory page with
381	 * a page structure everything is ok and we can use the optimal
382	 * method, ppmapin.  Otherwise, we have to do something special.
383	 */
384	pfnum = hat_getpfnum(as->a_hat, addr);
385	if (pf_is_memory(pfnum)) {
386		pp = page_numtopp_nolock(pfnum);
387		if (pp != NULL) {
388			ASSERT(PAGE_LOCKED(pp));
389			kaddr = ppmapin(pp, writing ?
390			    (PROT_READ | PROT_WRITE) : PROT_READ, (caddr_t)-1);
391			return (kaddr + ((uintptr_t)addr & PAGEOFFSET));
392		}
393	}
394
395	/*
396	 * Oh well, we didn't have a page struct for the object we were
397	 * trying to map in; ppmapin doesn't handle devices, but allocating a
398	 * heap address allows ppmapout to free virtual space when done.
399	 */
400	kaddr = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
401
402	hat_devload(kas.a_hat, kaddr, MMU_PAGESIZE,  pfnum,
403	    writing ? (PROT_READ | PROT_WRITE) : PROT_READ, 0);
404
405	return (kaddr + ((uintptr_t)addr & PAGEOFFSET));
406}
407
408/*
409 * Unmap address "addr" in address space "as"; inverse of prmapin().
410 */
411/* ARGSUSED */
412void
413prmapout(struct as *as, caddr_t addr, caddr_t vaddr, int writing)
414{
415	extern void ppmapout(caddr_t);
416
417	vaddr = (caddr_t)((uintptr_t)vaddr & PAGEMASK);
418	ppmapout(vaddr);
419}
420
421/*
422 * Make sure the lwp is in an orderly state
423 * for inspection by a debugger through /proc.
424 *
425 * This needs to be called only once while the current thread remains in the
426 * kernel and needs to be called while holding no resources (mutex locks, etc).
427 *
428 * As a hedge against these conditions, if prstop() is called repeatedly
429 * before prunstop() is called, it does nothing and just returns.
430 *
431 * prunstop() must be called before the thread returns to user level.
432 */
433/* ARGSUSED */
434void
435prstop(int why, int what)
436{
437	klwp_t *lwp = ttolwp(curthread);
438	struct regs *r = lwptoregs(lwp);
439
440	if (lwp->lwp_pcb.pcb_flags & PRSTOP_CALLED)
441		return;
442
443	/*
444	 * Make sure we don't deadlock on a recursive call
445	 * to prstop().  stop() tests the lwp_nostop flag.
446	 */
447	ASSERT(lwp->lwp_nostop == 0);
448	lwp->lwp_nostop = 1;
449
450	if (copyin_nowatch((caddr_t)r->r_pc, &lwp->lwp_pcb.pcb_instr,
451	    sizeof (lwp->lwp_pcb.pcb_instr)) == 0)
452		lwp->lwp_pcb.pcb_flags |= INSTR_VALID;
453	else {
454		lwp->lwp_pcb.pcb_flags &= ~INSTR_VALID;
455		lwp->lwp_pcb.pcb_instr = 0;
456	}
457
458	(void) save_syscall_args();
459	ASSERT(lwp->lwp_nostop == 1);
460	lwp->lwp_nostop = 0;
461
462	lwp->lwp_pcb.pcb_flags |= PRSTOP_CALLED;
463	aston(curthread);	/* so prunstop() will be called */
464}
465
466/*
467 * Inform prstop() that it should do its work again
468 * the next time it is called.
469 */
470void
471prunstop(void)
472{
473	ttolwp(curthread)->lwp_pcb.pcb_flags &= ~PRSTOP_CALLED;
474}
475
476/*
477 * Fetch the user-level instruction on which the lwp is stopped.
478 * It was saved by the lwp itself, in prstop().
479 * Return non-zero if the instruction is valid.
480 */
481int
482prfetchinstr(klwp_t *lwp, ulong_t *ip)
483{
484	*ip = (ulong_t)(instr_t)lwp->lwp_pcb.pcb_instr;
485	return (lwp->lwp_pcb.pcb_flags & INSTR_VALID);
486}
487
488/*
489 * Called from trap() when a load or store instruction
490 * falls in a watched page but is not a watchpoint.
491 * We emulate the instruction in the kernel.
492 */
493/* ARGSUSED */
494int
495pr_watch_emul(struct regs *rp, caddr_t addr, enum seg_rw rw)
496{
497#ifdef SOMEDAY
498	int res;
499	proc_t *p = curproc;
500	char *badaddr = (caddr_t)(-1);
501	int mapped;
502
503	/* prevent recursive calls to pr_watch_emul() */
504	ASSERT(!(curthread->t_flag & T_WATCHPT));
505	curthread->t_flag |= T_WATCHPT;
506
507	watch_disable_addr(addr, 8, rw);
508	res = do_unaligned(rp, &badaddr);
509	watch_enable_addr(addr, 8, rw);
510
511	curthread->t_flag &= ~T_WATCHPT;
512	if (res == SIMU_SUCCESS) {
513		/* adjust the pc */
514		return (1);
515	}
516#endif
517	return (0);
518}
519
520/*
521 * Return the number of active entries in the local descriptor table.
522 */
523int
524prnldt(proc_t *p)
525{
526	int limit, i, n;
527	user_desc_t *udp;
528
529	ASSERT(MUTEX_HELD(&p->p_ldtlock));
530
531	/*
532	 * Currently 64 bit processes cannot have private LDTs.
533	 */
534	ASSERT(p->p_model != DATAMODEL_LP64 || p->p_ldt == NULL);
535
536	if (p->p_ldt == NULL)
537		return (0);
538	n = 0;
539	limit = p->p_ldtlimit;
540	ASSERT(limit >= 0 && limit < MAXNLDT);
541
542	/*
543	 * Count all present user descriptors.
544	 */
545	for (i = LDT_UDBASE, udp = &p->p_ldt[i]; i <= limit; i++, udp++)
546		if (udp->usd_type != 0 || udp->usd_dpl != 0 || udp->usd_p != 0)
547			n++;
548	return (n);
549}
550
551/*
552 * Fetch the active entries from the local descriptor table.
553 */
554void
555prgetldt(proc_t *p, struct ssd *ssd)
556{
557	int i, limit;
558	user_desc_t *udp;
559
560	ASSERT(MUTEX_HELD(&p->p_ldtlock));
561
562	if (p->p_ldt == NULL)
563		return;
564
565	limit = p->p_ldtlimit;
566	ASSERT(limit >= 0 && limit < MAXNLDT);
567
568	/*
569	 * All present user descriptors.
570	 */
571	for (i = LDT_UDBASE, udp = &p->p_ldt[i]; i <= limit; i++, udp++)
572		if (udp->usd_type != 0 || udp->usd_dpl != 0 ||
573		    udp->usd_p != 0)
574			usd_to_ssd(udp, ssd++, SEL_LDT(i));
575}
576