cpr_impl.c revision 6336:4eaf084434c9
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28/*
29 * Platform specific implementation code
30 * Currently only suspend to RAM is supported (ACPI S3)
31 */
32
33#define	SUNDDI_IMPL
34
35#include <sys/types.h>
36#include <sys/promif.h>
37#include <sys/prom_isa.h>
38#include <sys/prom_plat.h>
39#include <sys/cpuvar.h>
40#include <sys/pte.h>
41#include <vm/hat.h>
42#include <vm/page.h>
43#include <vm/as.h>
44#include <sys/cpr.h>
45#include <sys/kmem.h>
46#include <sys/clock.h>
47#include <sys/kmem.h>
48#include <sys/panic.h>
49#include <vm/seg_kmem.h>
50#include <sys/cpu_module.h>
51#include <sys/callb.h>
52#include <sys/machsystm.h>
53#include <sys/vmsystm.h>
54#include <sys/systm.h>
55#include <sys/archsystm.h>
56#include <sys/stack.h>
57#include <sys/fs/ufs_fs.h>
58#include <sys/memlist.h>
59#include <sys/bootconf.h>
60#include <sys/thread.h>
61#include <sys/x_call.h>
62#include <sys/smp_impldefs.h>
63#include <vm/vm_dep.h>
64#include <sys/psm.h>
65#include <sys/epm.h>
66#include <sys/cpr_wakecode.h>
67#include <sys/x86_archext.h>
68#include <sys/reboot.h>
69#include <sys/acpi/acpi.h>
70#include <sys/acpica.h>
71
72#define	AFMT	"%lx"
73
74extern int	flushes_require_xcalls;
75extern cpuset_t	cpu_ready_set;
76
77#if defined(__amd64)
78extern void	*wc_long_mode_64(void);
79#endif	/* __amd64 */
80extern int	tsc_gethrtime_enable;
81extern	void	i_cpr_start_cpu(void);
82
83ushort_t	cpr_mach_type = CPR_MACHTYPE_X86;
84void		(*cpr_start_cpu_func)(void) = i_cpr_start_cpu;
85
86static wc_cpu_t	*wc_other_cpus = NULL;
87static cpuset_t procset;
88
89static void
90init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt);
91
92static int i_cpr_platform_alloc(psm_state_request_t *req);
93static void i_cpr_platform_free(psm_state_request_t *req);
94static int i_cpr_save_apic(psm_state_request_t *req);
95static int i_cpr_restore_apic(psm_state_request_t *req);
96static int wait_for_set(cpuset_t *set, int who);
97
98#if defined(__amd64)
99static void restore_stack(wc_cpu_t *cpup);
100static void save_stack(wc_cpu_t *cpup);
101void (*save_stack_func)(wc_cpu_t *) = save_stack;
102#endif	/* __amd64 */
103
104/*
105 * restart paused slave cpus
106 */
107void
108i_cpr_machdep_setup(void)
109{
110	if (ncpus > 1) {
111		CPR_DEBUG(CPR_DEBUG1, ("MP restarted...\n"));
112		mutex_enter(&cpu_lock);
113		start_cpus();
114		mutex_exit(&cpu_lock);
115	}
116}
117
118
119/*
120 * Stop all interrupt activities in the system
121 */
122void
123i_cpr_stop_intr(void)
124{
125	(void) spl7();
126}
127
128/*
129 * Set machine up to take interrupts
130 */
131void
132i_cpr_enable_intr(void)
133{
134	(void) spl0();
135}
136
137/*
138 * Save miscellaneous information which needs to be written to the
139 * state file.  This information is required to re-initialize
140 * kernel/prom handshaking.
141 */
142void
143i_cpr_save_machdep_info(void)
144{
145	int notcalled = 0;
146	ASSERT(notcalled);
147}
148
149
150void
151i_cpr_set_tbr(void)
152{
153}
154
155
156processorid_t
157i_cpr_bootcpuid(void)
158{
159	return (0);
160}
161
162/*
163 * cpu0 should contain bootcpu info
164 */
165cpu_t *
166i_cpr_bootcpu(void)
167{
168	ASSERT(MUTEX_HELD(&cpu_lock));
169
170	return (cpu_get(i_cpr_bootcpuid()));
171}
172
173/*
174 *	Save context for the specified CPU
175 */
176void *
177i_cpr_save_context(void *arg)
178{
179	long	index = (long)arg;
180	psm_state_request_t *papic_state;
181	int resuming;
182	int	ret;
183
184	PMD(PMD_SX, ("i_cpr_save_context() index = %ld\n", index))
185
186	ASSERT(index < NCPU);
187
188	papic_state = &(wc_other_cpus + index)->wc_apic_state;
189
190	ret = i_cpr_platform_alloc(papic_state);
191	ASSERT(ret == 0);
192
193	ret = i_cpr_save_apic(papic_state);
194	ASSERT(ret == 0);
195
196	/*
197	 * wc_save_context returns twice, once when susending and
198	 * once when resuming,  wc_save_context() returns 0 when
199	 * suspending and non-zero upon resume
200	 */
201	resuming = (wc_save_context(wc_other_cpus + index) == 0);
202
203	PMD(PMD_SX, ("i_cpr_save_context: wc_save_context returns %d\n",
204	    resuming))
205
206	/*
207	 * do NOT call any functions after this point, because doing so
208	 * will modify the stack that we are running on
209	 */
210
211	if (resuming) {
212
213		ret = i_cpr_restore_apic(papic_state);
214		ASSERT(ret == 0);
215
216		i_cpr_platform_free(papic_state);
217
218		/*
219		 * Setting the bit in cpu_ready_set must be the last operation
220		 * in processor initialization; the boot CPU will continue to
221		 * boot once it sees this bit set for all active CPUs.
222		 */
223		CPUSET_ATOMIC_ADD(cpu_ready_set, CPU->cpu_id);
224
225		PMD(PMD_SX,
226		    ("i_cpr_save_context() resuming cpu %d in cpu_ready_set\n",
227		    CPU->cpu_id))
228	}
229	return (NULL);
230}
231
232static ushort_t *warm_reset_vector = NULL;
233
234static ushort_t *
235map_warm_reset_vector()
236{
237	/*LINTED*/
238	if (!(warm_reset_vector = (ushort_t *)psm_map_phys(WARM_RESET_VECTOR,
239	    sizeof (ushort_t *), PROT_READ|PROT_WRITE)))
240		return (NULL);
241
242	/*
243	 * setup secondary cpu bios boot up vector
244	 */
245	*warm_reset_vector = (ushort_t)((caddr_t)
246	    /*LINTED*/
247	    ((struct rm_platter *)rm_platter_va)->rm_code - rm_platter_va
248	    + ((ulong_t)rm_platter_va & 0xf));
249	warm_reset_vector++;
250	*warm_reset_vector = (ushort_t)(rm_platter_pa >> 4);
251
252	--warm_reset_vector;
253	return (warm_reset_vector);
254}
255
256void
257i_cpr_pre_resume_cpus()
258{
259	/*
260	 * this is a cut down version of start_other_cpus()
261	 * just do the initialization to wake the other cpus
262	 */
263	unsigned who;
264	int boot_cpuid = i_cpr_bootcpuid();
265	uint32_t		code_length = 0;
266	caddr_t			wakevirt = rm_platter_va;
267	/*LINTED*/
268	wakecode_t		*wp = (wakecode_t *)wakevirt;
269	char *str = "i_cpr_pre_resume_cpus";
270	extern int get_tsc_ready();
271	int err;
272
273	/*LINTED*/
274	rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
275
276	/*
277	 * Copy the real mode code at "real_mode_start" to the
278	 * page at rm_platter_va.
279	 */
280	warm_reset_vector = map_warm_reset_vector();
281	if (warm_reset_vector == NULL) {
282		PMD(PMD_SX, ("i_cpr_pre_resume_cpus() returning #2\n"))
283		return;
284	}
285
286	flushes_require_xcalls = 1;
287
288	/*
289	 * We lock our affinity to the master CPU to ensure that all slave CPUs
290	 * do their TSC syncs with the same CPU.
291	 */
292
293	affinity_set(CPU_CURRENT);
294
295	/*
296	 * Mark the boot cpu as being ready and in the procset, since we are
297	 * running on that cpu.
298	 */
299	CPUSET_ONLY(cpu_ready_set, boot_cpuid);
300	CPUSET_ONLY(procset, boot_cpuid);
301
302	for (who = 0; who < ncpus; who++) {
303
304		wc_cpu_t	*cpup = wc_other_cpus + who;
305		wc_desctbr_t	gdt;
306
307		if (who == boot_cpuid)
308			continue;
309
310		if (!CPU_IN_SET(mp_cpus, who))
311			continue;
312
313		PMD(PMD_SX, ("%s() waking up %d cpu\n", str, who))
314
315		bcopy(cpup, &(wp->wc_cpu), sizeof (wc_cpu_t));
316
317		gdt.base = cpup->wc_gdt_base;
318		gdt.limit = cpup->wc_gdt_limit;
319
320#if defined(__amd64)
321		code_length = (uint32_t)wc_long_mode_64 - (uint32_t)wc_rm_start;
322#else
323		code_length = 0;
324#endif
325
326		init_real_mode_platter(who, code_length, cpup->wc_cr4, gdt);
327
328		if ((err = mach_cpuid_start(who, rm_platter_va)) != 0) {
329			cmn_err(CE_WARN, "cpu%d: failed to start during "
330			    "suspend/resume error %d", who, err);
331			continue;
332		}
333
334		PMD(PMD_SX, ("%s() #1 waiting for %d in procset\n", str, who))
335
336		if (!wait_for_set(&procset, who))
337			continue;
338
339		PMD(PMD_SX, ("%s() %d cpu started\n", str, who))
340
341		PMD(PMD_SX, ("%s() tsc_ready = %d\n", str, get_tsc_ready()))
342
343		if (tsc_gethrtime_enable) {
344			PMD(PMD_SX, ("%s() calling tsc_sync_master\n", str))
345			tsc_sync_master(who);
346		}
347
348		PMD(PMD_SX, ("%s() waiting for %d in cpu_ready_set\n", str,
349		    who))
350		/*
351		 * Wait for cpu to declare that it is ready, we want the
352		 * cpus to start serially instead of in parallel, so that
353		 * they do not contend with each other in wc_rm_start()
354		 */
355		if (!wait_for_set(&cpu_ready_set, who))
356			continue;
357
358		/*
359		 * do not need to re-initialize dtrace using dtrace_cpu_init
360		 * function
361		 */
362		PMD(PMD_SX, ("%s() cpu %d now ready\n", str, who))
363	}
364
365	affinity_clear();
366
367	PMD(PMD_SX, ("%s() all cpus now ready\n", str))
368
369}
370
371static void
372unmap_warm_reset_vector(ushort_t *warm_reset_vector)
373{
374	psm_unmap_phys((caddr_t)warm_reset_vector, sizeof (ushort_t *));
375}
376
377/*
378 * We need to setup a 1:1 (virtual to physical) mapping for the
379 * page containing the wakeup code.
380 */
381static struct as *save_as;	/* when switching to kas */
382
383static void
384unmap_wakeaddr_1to1(uint64_t wakephys)
385{
386	uintptr_t	wp = (uintptr_t)wakephys;
387	hat_setup(save_as->a_hat, 0);	/* switch back from kernel hat */
388	hat_unload(kas.a_hat, (caddr_t)wp, PAGESIZE, HAT_UNLOAD);
389}
390
391void
392i_cpr_post_resume_cpus()
393{
394	uint64_t	wakephys = rm_platter_pa;
395
396	if (warm_reset_vector != NULL)
397		unmap_warm_reset_vector(warm_reset_vector);
398
399	hat_unload(kas.a_hat, (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
400	    HAT_UNLOAD);
401
402	/*
403	 * cmi_post_mpstartup() is only required upon boot not upon
404	 * resume from RAM
405	 */
406
407	PT(PT_UNDO1to1);
408	/* Tear down 1:1 mapping for wakeup code */
409	unmap_wakeaddr_1to1(wakephys);
410}
411
412/* ARGSUSED */
413void
414i_cpr_handle_xc(int flag)
415{
416}
417
418int
419i_cpr_reusable_supported(void)
420{
421	return (0);
422}
423static void
424map_wakeaddr_1to1(uint64_t wakephys)
425{
426	uintptr_t	wp = (uintptr_t)wakephys;
427	hat_devload(kas.a_hat, (caddr_t)wp, PAGESIZE, btop(wakephys),
428	    (PROT_READ|PROT_WRITE|PROT_EXEC|HAT_STORECACHING_OK|HAT_NOSYNC),
429	    HAT_LOAD);
430	save_as = curthread->t_procp->p_as;
431	hat_setup(kas.a_hat, 0);	/* switch to kernel-only hat */
432}
433
434
435void
436prt_other_cpus()
437{
438	int	who;
439
440	if (ncpus == 1) {
441		PMD(PMD_SX, ("prt_other_cpus() other cpu table empty for "
442		    "uniprocessor machine\n"))
443		return;
444	}
445
446	for (who = 0; who < ncpus; who++) {
447
448		wc_cpu_t	*cpup = wc_other_cpus + who;
449
450		PMD(PMD_SX, ("prt_other_cpus() who = %d, gdt=%p:%x, "
451		    "idt=%p:%x, ldt=%lx, tr=%lx, kgsbase="
452		    AFMT ", sp=%lx\n", who,
453		    (void *)cpup->wc_gdt_base, cpup->wc_gdt_limit,
454		    (void *)cpup->wc_idt_base, cpup->wc_idt_limit,
455		    (long)cpup->wc_ldt, (long)cpup->wc_tr,
456		    (long)cpup->wc_kgsbase, (long)cpup->wc_rsp))
457	}
458}
459
460/*
461 * Power down the system.
462 */
463int
464i_cpr_power_down(int sleeptype)
465{
466	caddr_t		wakevirt = rm_platter_va;
467	uint64_t	wakephys = rm_platter_pa;
468	ulong_t		saved_intr;
469	uint32_t	code_length = 0;
470	wc_desctbr_t	gdt;
471	/*LINTED*/
472	wakecode_t	*wp = (wakecode_t *)wakevirt;
473	/*LINTED*/
474	rm_platter_t	*wcpp = (rm_platter_t *)wakevirt;
475	wc_cpu_t	*cpup = &(wp->wc_cpu);
476	dev_info_t	*ppm;
477	int		ret = 0;
478	power_req_t	power_req;
479	char *str =	"i_cpr_power_down";
480#if defined(__amd64)
481	/*LINTED*/
482	rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
483#endif
484	extern int	cpr_suspend_succeeded;
485	extern void	kernel_wc_code();
486
487	ASSERT(sleeptype == CPR_TORAM);
488	ASSERT(CPU->cpu_id == 0);
489
490	if ((ppm = PPM(ddi_root_node())) == NULL) {
491		PMD(PMD_SX, ("%s: root node not claimed\n", str))
492		return (ENOTTY);
493	}
494
495	PMD(PMD_SX, ("Entering %s()\n", str))
496
497	PT(PT_IC);
498	saved_intr = intr_clear();
499
500	PT(PT_1to1);
501	/* Setup 1:1 mapping for wakeup code */
502	map_wakeaddr_1to1(wakephys);
503
504	PMD(PMD_SX, ("ncpus=%d\n", ncpus))
505
506	PMD(PMD_SX, ("wc_rm_end - wc_rm_start=%lx WC_CODESIZE=%x\n",
507	    ((size_t)((uint_t)wc_rm_end - (uint_t)wc_rm_start)), WC_CODESIZE))
508
509	PMD(PMD_SX, ("wakevirt=%p, wakephys=%x\n",
510	    (void *)wakevirt, (uint_t)wakephys))
511
512	ASSERT(((size_t)((uint_t)wc_rm_end - (uint_t)wc_rm_start)) <
513	    WC_CODESIZE);
514
515	bzero(wakevirt, PAGESIZE);
516
517	/* Copy code to rm_platter */
518	bcopy((caddr_t)wc_rm_start, wakevirt,
519	    (size_t)((uint_t)wc_rm_end - (uint_t)wc_rm_start));
520
521	prt_other_cpus();
522
523#if defined(__amd64)
524
525	PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
526	    (ulong_t)real_mode_platter->rm_cr4, (ulong_t)getcr4()))
527	PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
528	    (ulong_t)real_mode_platter->rm_pdbr, getcr3()))
529
530	real_mode_platter->rm_cr4 = getcr4();
531	real_mode_platter->rm_pdbr = getcr3();
532
533	rmp_gdt_init(real_mode_platter);
534
535	/*
536	 * Since the CPU needs to jump to protected mode using an identity
537	 * mapped address, we need to calculate it here.
538	 */
539	real_mode_platter->rm_longmode64_addr = rm_platter_pa +
540	    ((uint32_t)wc_long_mode_64 - (uint32_t)wc_rm_start);
541
542	PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
543	    (ulong_t)real_mode_platter->rm_cr4, getcr4()))
544
545	PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
546	    (ulong_t)real_mode_platter->rm_pdbr, getcr3()))
547
548	PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n",
549	    (ulong_t)real_mode_platter->rm_longmode64_addr))
550
551#endif
552
553	PT(PT_SC);
554	if (wc_save_context(cpup)) {
555
556		ret = i_cpr_platform_alloc(&(wc_other_cpus->wc_apic_state));
557		if (ret != 0)
558			return (ret);
559
560		ret = i_cpr_save_apic(&(wc_other_cpus->wc_apic_state));
561		PMD(PMD_SX, ("%s: i_cpr_save_apic() returned %d\n", str, ret))
562		if (ret != 0)
563			return (ret);
564
565		PMD(PMD_SX, ("wakephys=%x, kernel_wc_code=%p\n",
566		    (uint_t)wakephys, (void *)&kernel_wc_code))
567		PMD(PMD_SX, ("virtaddr=%lx, retaddr=%lx\n",
568		    (long)cpup->wc_virtaddr, (long)cpup->wc_retaddr))
569		PMD(PMD_SX, ("ebx=%x, edi=%x, esi=%x, ebp=%x, esp=%x\n",
570		    cpup->wc_ebx, cpup->wc_edi, cpup->wc_esi, cpup->wc_ebp,
571		    cpup->wc_esp))
572		PMD(PMD_SX, ("cr0=%lx, cr3=%lx, cr4=%lx\n",
573		    (long)cpup->wc_cr0, (long)cpup->wc_cr3,
574		    (long)cpup->wc_cr4))
575		PMD(PMD_SX, ("cs=%x, ds=%x, es=%x, ss=%x, fs=%lx, gs=%lx, "
576		    "flgs=%lx\n", cpup->wc_cs, cpup->wc_ds, cpup->wc_es,
577		    cpup->wc_ss, (long)cpup->wc_fs, (long)cpup->wc_gs,
578		    (long)cpup->wc_eflags))
579
580		PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, "
581		    "kgbase=%lx\n", (void *)cpup->wc_gdt_base,
582		    cpup->wc_gdt_limit, (void *)cpup->wc_idt_base,
583		    cpup->wc_idt_limit, (long)cpup->wc_ldt,
584		    (long)cpup->wc_tr, (long)cpup->wc_kgsbase))
585
586		gdt.base = cpup->wc_gdt_base;
587		gdt.limit = cpup->wc_gdt_limit;
588
589#if defined(__amd64)
590		code_length = (uint32_t)wc_long_mode_64 -
591		    (uint32_t)wc_rm_start;
592#else
593		code_length = 0;
594#endif
595
596		init_real_mode_platter(0, code_length, cpup->wc_cr4, gdt);
597
598#if defined(__amd64)
599		PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
600		    (ulong_t)wcpp->rm_cr4, getcr4()))
601
602		PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
603		    (ulong_t)wcpp->rm_pdbr, getcr3()))
604
605		PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n",
606		    (ulong_t)wcpp->rm_longmode64_addr))
607
608		PMD(PMD_SX,
609		    ("real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64]=%lx\n",
610		    (ulong_t)wcpp->rm_temp_gdt[TEMPGDT_KCODE64]))
611#endif
612
613		PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, "
614		    "kgsbase=%lx\n", (void *)wcpp->rm_gdt_base,
615		    wcpp->rm_gdt_lim, (void *)wcpp->rm_idt_base,
616		    wcpp->rm_idt_lim, (long)cpup->wc_ldt, (long)cpup->wc_tr,
617		    (long)cpup->wc_kgsbase))
618
619		power_req.request_type = PMR_PPM_ENTER_SX;
620		power_req.req.ppm_power_enter_sx_req.sx_state = S3;
621		power_req.req.ppm_power_enter_sx_req.test_point =
622		    cpr_test_point;
623		power_req.req.ppm_power_enter_sx_req.wakephys = wakephys;
624
625		PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_ENTER_SX\n", str))
626		PT(PT_PPMCTLOP);
627		(void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER,
628		    &power_req, &ret);
629		PMD(PMD_SX, ("%s: returns %d\n", str, ret))
630
631		/*
632		 * If it works, we get control back to the else branch below
633		 * If we get control back here, it didn't work.
634		 * XXX return EINVAL here?
635		 */
636
637		unmap_wakeaddr_1to1(wakephys);
638		intr_restore(saved_intr);
639
640		return (ret);
641	} else {
642		cpr_suspend_succeeded = 1;
643
644		power_req.request_type = PMR_PPM_EXIT_SX;
645		power_req.req.ppm_power_enter_sx_req.sx_state = S3;
646
647		PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_EXIT_SX\n", str))
648		PT(PT_PPMCTLOP);
649		(void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER,
650		    &power_req, &ret);
651		PMD(PMD_SX, ("%s: returns %d\n", str, ret))
652
653		ret = i_cpr_restore_apic(&(wc_other_cpus->wc_apic_state));
654		/*
655		 * the restore should never fail, if the saved suceeded
656		 */
657		ASSERT(ret == 0);
658
659		i_cpr_platform_free(&(wc_other_cpus->wc_apic_state));
660
661		PT(PT_INTRRESTORE);
662		intr_restore(saved_intr);
663		PT(PT_CPU);
664
665		return (ret);
666	}
667}
668
669/*
670 * Stop all other cpu's before halting or rebooting. We pause the cpu's
671 * instead of sending a cross call.
672 * Stolen from sun4/os/mp_states.c
673 */
674
675static int cpu_are_paused;	/* sic */
676
677void
678i_cpr_stop_other_cpus(void)
679{
680	mutex_enter(&cpu_lock);
681	if (cpu_are_paused) {
682		mutex_exit(&cpu_lock);
683		return;
684	}
685	pause_cpus(NULL);
686	cpu_are_paused = 1;
687
688	mutex_exit(&cpu_lock);
689}
690
691int
692i_cpr_is_supported(int sleeptype)
693{
694	extern int cpr_supported_override;
695	extern int cpr_platform_enable;
696	extern int pm_S3_enabled;
697
698	if (sleeptype != CPR_TORAM)
699		return (0);
700
701	/*
702	 * The next statement tests if a specific platform has turned off
703	 * cpr support.
704	 */
705	if (cpr_supported_override)
706		return (0);
707
708	/*
709	 * If a platform has specifically turned on cpr support ...
710	 */
711	if (cpr_platform_enable)
712		return (1);
713
714	return (pm_S3_enabled);
715}
716
717void
718i_cpr_bitmap_cleanup(void)
719{
720}
721
722void
723i_cpr_free_memory_resources(void)
724{
725}
726
727/*
728 * Needed only for S3 so far
729 */
730static int
731i_cpr_platform_alloc(psm_state_request_t *req)
732{
733	char	*str = "i_cpr_platform_alloc";
734
735	PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req))
736
737	if (ncpus == 1) {
738		PMD(PMD_SX, ("%s() : ncpus == 1\n", str))
739		return (0);
740	}
741
742	req->psr_cmd = PSM_STATE_ALLOC;
743	return ((*psm_state)(req));
744}
745
746/*
747 * Needed only for S3 so far
748 */
749static void
750i_cpr_platform_free(psm_state_request_t *req)
751{
752	char	*str = "i_cpr_platform_free";
753
754	PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req))
755
756	if (ncpus == 1) {
757		PMD(PMD_SX, ("%s() : ncpus == 1\n", str))
758	}
759
760	req->psr_cmd = PSM_STATE_FREE;
761	(void) (*psm_state)(req);
762}
763
764static int
765i_cpr_save_apic(psm_state_request_t *req)
766{
767	char	*str = "i_cpr_save_apic";
768
769	if (ncpus == 1) {
770		PMD(PMD_SX, ("%s() : ncpus == 1\n", str))
771		return (0);
772	}
773
774	req->psr_cmd = PSM_STATE_SAVE;
775	return ((*psm_state)(req));
776}
777
778static int
779i_cpr_restore_apic(psm_state_request_t *req)
780{
781	char	*str = "i_cpr_restore_apic";
782
783	if (ncpus == 1) {
784		PMD(PMD_SX, ("%s() : ncpus == 1\n", str))
785		return (0);
786	}
787
788	req->psr_cmd = PSM_STATE_RESTORE;
789	return ((*psm_state)(req));
790}
791
792
793/* stop lint complaining about offset not being used in 32bit mode */
794#if !defined(__amd64)
795/*ARGSUSED*/
796#endif
797static void
798init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt)
799{
800	/*LINTED*/
801	rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
802
803	/*
804	 * Fill up the real mode platter to make it easy for real mode code to
805	 * kick it off. This area should really be one passed by boot to kernel
806	 * and guaranteed to be below 1MB and aligned to 16 bytes. Should also
807	 * have identical physical and virtual address in paged mode.
808	 */
809
810	real_mode_platter->rm_pdbr = getcr3();
811	real_mode_platter->rm_cpu = cpun;
812	real_mode_platter->rm_cr4 = cr4;
813
814	real_mode_platter->rm_gdt_base = gdt.base;
815	real_mode_platter->rm_gdt_lim = gdt.limit;
816
817#if defined(__amd64)
818	real_mode_platter->rm_x86feature = x86_feature;
819
820	if (getcr3() > 0xffffffffUL)
821		panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
822		    "located above 4G in physical memory (@ 0x%llx).",
823		    (unsigned long long)getcr3());
824
825	/*
826	 * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY
827	 * by code in real_mode_start():
828	 *
829	 * GDT[0]:  NULL selector
830	 * GDT[1]:  64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1
831	 *
832	 * Clear the IDT as interrupts will be off and a limit of 0 will cause
833	 * the CPU to triple fault and reset on an NMI, seemingly as reasonable
834	 * a course of action as any other, though it may cause the entire
835	 * platform to reset in some cases...
836	 */
837	real_mode_platter->rm_temp_gdt[0] = 0ULL;
838	real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL;
839
840	real_mode_platter->rm_temp_gdt_lim = (ushort_t)
841	    (sizeof (real_mode_platter->rm_temp_gdt) - 1);
842	real_mode_platter->rm_temp_gdt_base = rm_platter_pa +
843	    (uint32_t)(&((rm_platter_t *)0)->rm_temp_gdt);
844
845	real_mode_platter->rm_temp_idt_lim = 0;
846	real_mode_platter->rm_temp_idt_base = 0;
847
848	/*
849	 * Since the CPU needs to jump to protected mode using an identity
850	 * mapped address, we need to calculate it here.
851	 */
852	real_mode_platter->rm_longmode64_addr = rm_platter_pa + offset;
853#endif	/* __amd64 */
854
855	/* return; */
856}
857
858void
859i_cpr_start_cpu(void)
860{
861
862	struct cpu *cp = CPU;
863
864	char *str = "i_cpr_start_cpu";
865	extern void init_cpu_syscall(struct cpu *cp);
866
867#if defined(__amd64)
868	wc_cpu_t	*cpup = wc_other_cpus + cp->cpu_id;
869#endif	/*	__amd64	*/
870
871	PMD(PMD_SX, ("%s() called\n", str))
872
873	PMD(PMD_SX, ("%s() #0 cp->cpu_base_spl %d\n", str,
874	    cp->cpu_base_spl))
875
876	mutex_enter(&cpu_lock);
877	if (cp == i_cpr_bootcpu()) {
878		mutex_exit(&cpu_lock);
879		PMD(PMD_SX,
880		    ("%s() called on bootcpu nothing to do!\n", str))
881		return;
882	}
883	mutex_exit(&cpu_lock);
884
885	/*
886	 * We need to Sync PAT with cpu0's PAT. We have to do
887	 * this with interrupts disabled.
888	 */
889	if (x86_feature & X86_PAT)
890		pat_sync();
891
892	/*
893	 * Initialize this CPU's syscall handlers
894	 */
895	init_cpu_syscall(cp);
896
897	PMD(PMD_SX, ("%s() #1 cp->cpu_base_spl %d\n", str, cp->cpu_base_spl))
898
899	/*
900	 * Do not need to call cpuid_pass2(), cpuid_pass3(), cpuid_pass4() or
901	 * init_cpu_info(), since the work that they do is only needed to
902	 * be done once at boot time
903	 */
904
905
906	mutex_enter(&cpu_lock);
907
908#if defined(__amd64)
909	restore_stack(cpup);
910#endif	/*	__amd64	*/
911
912	CPUSET_ADD(procset, cp->cpu_id);
913	mutex_exit(&cpu_lock);
914
915	PMD(PMD_SX, ("%s() #2 cp->cpu_base_spl %d\n", str,
916	    cp->cpu_base_spl))
917
918	if (tsc_gethrtime_enable) {
919		PMD(PMD_SX, ("%s() calling tsc_sync_slave\n", str))
920		tsc_sync_slave();
921	}
922
923	PMD(PMD_SX, ("%s() cp->cpu_id %d, cp->cpu_intr_actv %d\n", str,
924	    cp->cpu_id, cp->cpu_intr_actv))
925	PMD(PMD_SX, ("%s() #3 cp->cpu_base_spl %d\n", str,
926	    cp->cpu_base_spl))
927
928	(void) spl0();		/* enable interrupts */
929
930	PMD(PMD_SX, ("%s() #4 cp->cpu_base_spl %d\n", str,
931	    cp->cpu_base_spl))
932
933	/*
934	 * Set up the CPU module for this CPU.  This can't be done before
935	 * this CPU is made CPU_READY, because we may (in heterogeneous systems)
936	 * need to go load another CPU module.  The act of attempting to load
937	 * a module may trigger a cross-call, which will ASSERT unless this
938	 * cpu is CPU_READY.
939	 */
940
941	/*
942	 * cmi already been init'd (during boot), so do not need to do it again
943	 */
944#ifdef PM_REINITMCAONRESUME
945	if (x86_feature & X86_MCA)
946		cmi_mca_init();
947#endif
948
949	PMD(PMD_SX, ("%s() returning\n", str))
950
951	/* return; */
952}
953
954#if defined(__amd64)
955/*
956 * we only need to do this for amd64!
957 */
958
959/*
960 * save the stack
961 */
962void
963save_stack(wc_cpu_t *cpup)
964{
965	char *str = "save_stack";
966	caddr_t base = curthread->t_stk;
967	caddr_t sp = (caddr_t)cpup->wc_rsp;
968
969
970	PMD(PMD_SX, ("%s() CPU->cpu_id %d\n", str, CPU->cpu_id))
971	PMD(PMD_SX, ("save_stack() curthread->t_stk = %p, sp = %p\n",
972	    (void *)base, (void *)sp))
973
974	ASSERT(base > sp);
975	/*LINTED*/
976	bcopy(sp, cpup->wc_stack, base - sp);
977
978}
979
980/*
981 * restore the stack
982 */
983static	void
984restore_stack(wc_cpu_t *cpup)
985{
986	/*
987	 * we only need to do this for amd64!
988	 */
989
990	char *str = "restore_stack";
991	caddr_t base = curthread->t_stk;
992	caddr_t sp = (caddr_t)cpup->wc_rsp;
993
994	PMD(PMD_SX, ("%s() CPU->cpu_id %d\n", str, CPU->cpu_id))
995	PMD(PMD_SX, ("%s() curthread->t_stk = %p, sp = %p\n", str,
996	    (void *)base, (void *)sp))
997
998	ASSERT(base > sp);
999	/*LINTED*/
1000	bcopy(cpup->wc_stack, sp, base - sp);
1001
1002}
1003
1004#endif	/*	__amd64	*/
1005
1006
1007void
1008i_cpr_alloc_cpus(void)
1009{
1010	char *str = "i_cpr_alloc_cpus";
1011
1012	PMD(PMD_SX, ("%s() CPU->cpu_id %d\n", str, CPU->cpu_id))
1013	/*
1014	 * we allocate this only when we actually need it to save on
1015	 * kernel memory
1016	 */
1017
1018	if (wc_other_cpus == NULL) {
1019		wc_other_cpus = kmem_zalloc(ncpus * sizeof (wc_cpu_t),
1020		    KM_SLEEP);
1021	}
1022
1023}
1024
1025void
1026i_cpr_free_cpus(void)
1027{
1028	if (wc_other_cpus != NULL) {
1029		kmem_free((void *) wc_other_cpus, ncpus * sizeof (wc_cpu_t));
1030		wc_other_cpus = NULL;
1031	}
1032}
1033
1034/*
1035 * wrapper for acpica_ddi_save_resources()
1036 */
1037void
1038i_cpr_save_configuration(dev_info_t *dip)
1039{
1040	acpica_ddi_save_resources(dip);
1041}
1042
1043/*
1044 * wrapper for acpica_ddi_restore_resources()
1045 */
1046void
1047i_cpr_restore_configuration(dev_info_t *dip)
1048{
1049	acpica_ddi_restore_resources(dip);
1050}
1051
1052static int
1053wait_for_set(cpuset_t *set, int who)
1054{
1055	int delays;
1056	char *str = "wait_for_set";
1057
1058	for (delays = 0; !CPU_IN_SET(*set, who); delays++) {
1059		if (delays == 500) {
1060			/*
1061			 * After five seconds, things are probably
1062			 * looking a bit bleak - explain the hang.
1063			 */
1064			cmn_err(CE_NOTE, "cpu%d: started, "
1065			    "but not running in the kernel yet", who);
1066			PMD(PMD_SX, ("%s() %d cpu started "
1067			    "but not running in the kernel yet\n",
1068			    str, who))
1069		} else if (delays > 2000) {
1070			/*
1071			 * We waited at least 20 seconds, bail ..
1072			 */
1073			cmn_err(CE_WARN, "cpu%d: timed out", who);
1074			PMD(PMD_SX, ("%s() %d cpu timed out\n",
1075			    str, who))
1076			return (0);
1077		}
1078
1079		/*
1080		 * wait at least 10ms, then check again..
1081		 */
1082		drv_usecwait(10000);
1083	}
1084
1085	return (1);
1086}
1087