mach_startup.c revision 6441:e428d5c501f5
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29#include <sys/machsystm.h>
30#include <sys/archsystm.h>
31#include <sys/prom_plat.h>
32#include <sys/promif.h>
33#include <sys/vm.h>
34#include <sys/cpu.h>
35#include <sys/atomic.h>
36#include <sys/cpupart.h>
37#include <sys/disp.h>
38#include <sys/hypervisor_api.h>
39#include <sys/traptrace.h>
40#include <sys/modctl.h>
41#include <sys/ldoms.h>
42#include <sys/cpu_module.h>
43#include <sys/mutex_impl.h>
44#include <sys/rwlock.h>
45#include <vm/vm_dep.h>
46#include <sys/sdt.h>
47
48#ifdef TRAPTRACE
49int mach_htraptrace_enable = 1;
50#else
51int mach_htraptrace_enable = 0;
52#endif
53int htrap_tr0_inuse = 0;
54extern char htrap_tr0[];	/* prealloc buf for boot cpu */
55
56caddr_t	mmu_fault_status_area;
57
58extern void sfmmu_set_tsbs(void);
59/*
60 * CPU IDLE optimization variables/routines
61 */
62static int enable_halt_idle_cpus = 1;
63
64/*
65 * Defines for the idle_state_transition DTrace probe
66 *
67 * The probe fires when the CPU undergoes an idle state change (e.g. hv yield)
68 * The agument passed is the state to which the CPU is transitioning.
69 *
70 * The states are defined here.
71 */
72#define	IDLE_STATE_NORMAL 0
73#define	IDLE_STATE_YIELDED 1
74
75#define	SUN4V_CLOCK_TICK_THRESHOLD	64
76#define	SUN4V_CLOCK_TICK_NCPUS		64
77
78extern int	clock_tick_threshold;
79extern int	clock_tick_ncpus;
80
81void
82setup_trap_table(void)
83{
84	caddr_t mmfsa_va;
85	extern	 caddr_t mmu_fault_status_area;
86	mmfsa_va =
87	    mmu_fault_status_area + (MMFSA_SIZE * CPU->cpu_id);
88
89	intr_init(CPU);		/* init interrupt request free list */
90	setwstate(WSTATE_KERN);
91	set_mmfsa_scratchpad(mmfsa_va);
92	prom_set_mmfsa_traptable(&trap_table, va_to_pa(mmfsa_va));
93	sfmmu_set_tsbs();
94}
95
96void
97phys_install_has_changed(void)
98{
99
100}
101
102/*
103 * Halt the present CPU until awoken via an interrupt
104 */
105static void
106cpu_halt(void)
107{
108	cpu_t *cpup = CPU;
109	processorid_t cpun = cpup->cpu_id;
110	cpupart_t *cp = cpup->cpu_part;
111	int hset_update = 1;
112	volatile int *p = &cpup->cpu_disp->disp_nrunnable;
113	uint_t s;
114
115	/*
116	 * If this CPU is online, and there's multiple CPUs
117	 * in the system, then we should notate our halting
118	 * by adding ourselves to the partition's halted CPU
119	 * bitmap. This allows other CPUs to find/awaken us when
120	 * work becomes available.
121	 */
122	if (CPU->cpu_flags & CPU_OFFLINE || ncpus == 1)
123		hset_update = 0;
124
125	/*
126	 * Add ourselves to the partition's halted CPUs bitmask
127	 * and set our HALTED flag, if necessary.
128	 *
129	 * When a thread becomes runnable, it is placed on the queue
130	 * and then the halted cpuset is checked to determine who
131	 * (if anyone) should be awoken. We therefore need to first
132	 * add ourselves to the halted cpuset, and then check if there
133	 * is any work available.
134	 */
135	if (hset_update) {
136		cpup->cpu_disp_flags |= CPU_DISP_HALTED;
137		membar_producer();
138		CPUSET_ATOMIC_ADD(cp->cp_mach->mc_haltset, cpun);
139	}
140
141	/*
142	 * Check to make sure there's really nothing to do.
143	 * Work destined for this CPU may become available after
144	 * this check. We'll be notified through the clearing of our
145	 * bit in the halted CPU bitmask, and a poke.
146	 */
147	if (disp_anywork()) {
148		if (hset_update) {
149			cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
150			CPUSET_ATOMIC_DEL(cp->cp_mach->mc_haltset, cpun);
151		}
152		return;
153	}
154
155	/*
156	 * We're on our way to being halted.  Wait until something becomes
157	 * runnable locally or we are awaken (i.e. removed from the halt set).
158	 * Note that the call to hv_cpu_yield() can return even if we have
159	 * nothing to do.
160	 *
161	 * Disable interrupts now, so that we'll awaken immediately
162	 * after halting if someone tries to poke us between now and
163	 * the time we actually halt.
164	 *
165	 * We check for the presence of our bit after disabling interrupts.
166	 * If it's cleared, we'll return. If the bit is cleared after
167	 * we check then the poke will pop us out of the halted state.
168	 * Also, if the offlined CPU has been brought back on-line, then
169	 * we return as well.
170	 *
171	 * The ordering of the poke and the clearing of the bit by cpu_wakeup
172	 * is important.
173	 * cpu_wakeup() must clear, then poke.
174	 * cpu_halt() must disable interrupts, then check for the bit.
175	 *
176	 * The check for anything locally runnable is here for performance
177	 * and isn't needed for correctness. disp_nrunnable ought to be
178	 * in our cache still, so it's inexpensive to check, and if there
179	 * is anything runnable we won't have to wait for the poke.
180	 *
181	 */
182	s = disable_vec_intr();
183	while (*p == 0 &&
184	    ((hset_update && CPU_IN_SET(cp->cp_mach->mc_haltset, cpun)) ||
185	    (!hset_update && (CPU->cpu_flags & CPU_OFFLINE)))) {
186
187		DTRACE_PROBE1(idle__state__transition,
188		    uint_t, IDLE_STATE_YIELDED);
189		(void) hv_cpu_yield();
190		DTRACE_PROBE1(idle__state__transition,
191		    uint_t, IDLE_STATE_NORMAL);
192
193		enable_vec_intr(s);
194		s = disable_vec_intr();
195	}
196
197	/*
198	 * We're no longer halted
199	 */
200	enable_vec_intr(s);
201	if (hset_update) {
202		cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
203		CPUSET_ATOMIC_DEL(cp->cp_mach->mc_haltset, cpun);
204	}
205}
206
207/*
208 * If "cpu" is halted, then wake it up clearing its halted bit in advance.
209 * Otherwise, see if other CPUs in the cpu partition are halted and need to
210 * be woken up so that they can steal the thread we placed on this CPU.
211 * This function is only used on MP systems.
212 */
213static void
214cpu_wakeup(cpu_t *cpu, int bound)
215{
216	uint_t		cpu_found;
217	int		result;
218	cpupart_t	*cp;
219
220	cp = cpu->cpu_part;
221	if (CPU_IN_SET(cp->cp_mach->mc_haltset, cpu->cpu_id)) {
222		/*
223		 * Clear the halted bit for that CPU since it will be
224		 * poked in a moment.
225		 */
226		CPUSET_ATOMIC_DEL(cp->cp_mach->mc_haltset, cpu->cpu_id);
227		/*
228		 * We may find the current CPU present in the halted cpuset
229		 * if we're in the context of an interrupt that occurred
230		 * before we had a chance to clear our bit in cpu_halt().
231		 * Poking ourself is obviously unnecessary, since if
232		 * we're here, we're not halted.
233		 */
234		if (cpu != CPU)
235			poke_cpu(cpu->cpu_id);
236		return;
237	} else {
238		/*
239		 * This cpu isn't halted, but it's idle or undergoing a
240		 * context switch. No need to awaken anyone else.
241		 */
242		if (cpu->cpu_thread == cpu->cpu_idle_thread ||
243		    cpu->cpu_disp_flags & CPU_DISP_DONTSTEAL)
244			return;
245	}
246
247	/*
248	 * No need to wake up other CPUs if the thread we just enqueued
249	 * is bound.
250	 */
251	if (bound)
252		return;
253
254	/*
255	 * See if there's any other halted CPUs. If there are, then
256	 * select one, and awaken it.
257	 * It's possible that after we find a CPU, somebody else
258	 * will awaken it before we get the chance.
259	 * In that case, look again.
260	 */
261	do {
262		CPUSET_FIND(cp->cp_mach->mc_haltset, cpu_found);
263		if (cpu_found == CPUSET_NOTINSET)
264			return;
265
266		ASSERT(cpu_found >= 0 && cpu_found < NCPU);
267		CPUSET_ATOMIC_XDEL(cp->cp_mach->mc_haltset, cpu_found, result);
268	} while (result < 0);
269
270	if (cpu_found != CPU->cpu_id)
271		poke_cpu(cpu_found);
272}
273
274void
275mach_cpu_halt_idle()
276{
277	if (enable_halt_idle_cpus) {
278		idle_cpu = cpu_halt;
279		disp_enq_thread = cpu_wakeup;
280	}
281}
282
283int
284ndata_alloc_mmfsa(struct memlist *ndata)
285{
286	size_t	size;
287
288	size = MMFSA_SIZE * max_ncpus;
289	mmu_fault_status_area = ndata_alloc(ndata, size, ecache_alignsize);
290	if (mmu_fault_status_area == NULL)
291		return (-1);
292	return (0);
293}
294
295void
296mach_memscrub(void)
297{
298	/* no memscrub support for sun4v for now */
299}
300
301void
302mach_fpras()
303{
304	/* no fpras support for sun4v for now */
305}
306
307void
308mach_hw_copy_limit(void)
309{
310	/* HW copy limits set by individual CPU module */
311}
312
313/*
314 * We need to enable soft ring functionality on Niagara platform since
315 * one strand can't handle interrupts for a 1Gb NIC. Set the tunable
316 * ip_squeue_soft_ring by default on this platform. We can also set
317 * ip_threads_per_cpu to track number of threads per core. The variables
318 * themselves are defined in space.c and used by IP module
319 */
320extern uint_t ip_threads_per_cpu;
321extern boolean_t ip_squeue_soft_ring;
322void
323startup_platform(void)
324{
325	ip_squeue_soft_ring = B_TRUE;
326	if (clock_tick_threshold == 0)
327		clock_tick_threshold = SUN4V_CLOCK_TICK_THRESHOLD;
328	if (clock_tick_ncpus == 0)
329		clock_tick_ncpus = SUN4V_CLOCK_TICK_NCPUS;
330	/* set per-platform constants for mutex_backoff */
331	mutex_backoff_base = 1;
332	mutex_cap_factor = 4;
333	if (l2_cache_node_count() > 1) {
334		/* VF for example */
335		mutex_backoff_base = 2;
336		mutex_cap_factor = 64;
337	}
338	rw_lock_backoff = default_lock_backoff;
339	rw_lock_delay = default_lock_delay;
340}
341
342/*
343 * This function sets up hypervisor traptrace buffer
344 * This routine is called by the boot cpu only
345 */
346void
347mach_htraptrace_setup(int cpuid)
348{
349	TRAP_TRACE_CTL	*ctlp;
350	int bootcpuid = getprocessorid(); /* invoked on boot cpu only */
351
352	if (mach_htraptrace_enable && ((cpuid != bootcpuid) ||
353	    !htrap_tr0_inuse)) {
354		ctlp = &trap_trace_ctl[cpuid];
355		ctlp->d.hvaddr_base = (cpuid == bootcpuid) ? htrap_tr0 :
356		    contig_mem_alloc_align(HTRAP_TSIZE, HTRAP_TSIZE);
357		if (ctlp->d.hvaddr_base == NULL) {
358			ctlp->d.hlimit = 0;
359			ctlp->d.hpaddr_base = NULL;
360			cmn_err(CE_WARN, "!cpu%d: failed to allocate HV "
361			    "traptrace buffer", cpuid);
362		} else {
363			ctlp->d.hlimit = HTRAP_TSIZE;
364			ctlp->d.hpaddr_base = va_to_pa(ctlp->d.hvaddr_base);
365		}
366	}
367}
368
369/*
370 * This function enables or disables the hypervisor traptracing
371 */
372void
373mach_htraptrace_configure(int cpuid)
374{
375	uint64_t ret;
376	uint64_t prev_buf, prev_bufsize;
377	uint64_t prev_enable;
378	uint64_t size;
379	TRAP_TRACE_CTL	*ctlp;
380
381	ctlp = &trap_trace_ctl[cpuid];
382	if (mach_htraptrace_enable) {
383		if ((ctlp->d.hvaddr_base != NULL) &&
384		    ((ctlp->d.hvaddr_base != htrap_tr0) ||
385		    (!htrap_tr0_inuse))) {
386			ret = hv_ttrace_buf_info(&prev_buf, &prev_bufsize);
387			if ((ret == H_EOK) && (prev_bufsize != 0)) {
388				cmn_err(CE_CONT,
389				    "!cpu%d: previous HV traptrace buffer of "
390				    "size 0x%lx at address 0x%lx", cpuid,
391				    prev_bufsize, prev_buf);
392			}
393
394			ret = hv_ttrace_buf_conf(ctlp->d.hpaddr_base,
395			    ctlp->d.hlimit /
396			    (sizeof (struct htrap_trace_record)), &size);
397			if (ret == H_EOK) {
398				ret = hv_ttrace_enable(\
399				    (uint64_t)TRAP_TENABLE_ALL, &prev_enable);
400				if (ret != H_EOK) {
401					cmn_err(CE_WARN,
402					    "!cpu%d: HV traptracing not "
403					    "enabled, ta: 0x%x returned error: "
404					    "%ld", cpuid, TTRACE_ENABLE, ret);
405				} else {
406					if (ctlp->d.hvaddr_base == htrap_tr0)
407						htrap_tr0_inuse = 1;
408				}
409			} else {
410				cmn_err(CE_WARN,
411				    "!cpu%d: HV traptrace buffer not "
412				    "configured, ta: 0x%x returned error: %ld",
413				    cpuid, TTRACE_BUF_CONF, ret);
414			}
415			/*
416			 * set hvaddr_base to NULL when traptrace buffer
417			 * registration fails
418			 */
419			if (ret != H_EOK) {
420				ctlp->d.hvaddr_base = NULL;
421				ctlp->d.hlimit = 0;
422				ctlp->d.hpaddr_base = NULL;
423			}
424		}
425	} else {
426		ret = hv_ttrace_buf_info(&prev_buf, &prev_bufsize);
427		if ((ret == H_EOK) && (prev_bufsize != 0)) {
428			ret = hv_ttrace_enable((uint64_t)TRAP_TDISABLE_ALL,
429			    &prev_enable);
430			if (ret == H_EOK) {
431				if (ctlp->d.hvaddr_base == htrap_tr0)
432					htrap_tr0_inuse = 0;
433				ctlp->d.hvaddr_base = NULL;
434				ctlp->d.hlimit = 0;
435				ctlp->d.hpaddr_base = NULL;
436			} else
437				cmn_err(CE_WARN,
438				    "!cpu%d: HV traptracing is not disabled, "
439				    "ta: 0x%x returned error: %ld",
440				    cpuid, TTRACE_ENABLE, ret);
441		}
442	}
443}
444
445/*
446 * This function cleans up the hypervisor traptrace buffer
447 */
448void
449mach_htraptrace_cleanup(int cpuid)
450{
451	if (mach_htraptrace_enable) {
452		TRAP_TRACE_CTL *ctlp;
453		caddr_t httrace_buf_va;
454
455		ASSERT(cpuid < max_ncpus);
456		ctlp = &trap_trace_ctl[cpuid];
457		httrace_buf_va = ctlp->d.hvaddr_base;
458		if (httrace_buf_va == htrap_tr0) {
459			bzero(httrace_buf_va, HTRAP_TSIZE);
460		} else if (httrace_buf_va != NULL) {
461			contig_mem_free(httrace_buf_va, HTRAP_TSIZE);
462		}
463		ctlp->d.hvaddr_base = NULL;
464		ctlp->d.hlimit = 0;
465		ctlp->d.hpaddr_base = NULL;
466	}
467}
468
469/*
470 * Load any required machine class (sun4v) specific drivers.
471 */
472void
473load_mach_drivers(void)
474{
475	/*
476	 * We don't want to load these LDOMs-specific
477	 * modules if domaining is not supported.  Also,
478	 * we must be able to run on non-LDOMs firmware.
479	 */
480	if (!domaining_supported())
481		return;
482
483	/*
484	 * Load the core domain services module
485	 */
486	if (modload("misc", "ds") == -1)
487		cmn_err(CE_NOTE, "!'ds' module failed to load");
488
489	/*
490	 * Load the rest of the domain services
491	 */
492	if (modload("misc", "fault_iso") == -1)
493		cmn_err(CE_NOTE, "!'fault_iso' module failed to load");
494
495	if (modload("misc", "platsvc") == -1)
496		cmn_err(CE_NOTE, "!'platsvc' module failed to load");
497
498	if (domaining_enabled() && modload("misc", "dr_cpu") == -1)
499		cmn_err(CE_NOTE, "!'dr_cpu' module failed to load");
500
501	if (modload("misc", "dr_io") == -1)
502		cmn_err(CE_NOTE, "!'dr_io' module failed to load");
503
504	/*
505	 * Attempt to attach any virtual device servers. These
506	 * drivers must be loaded at start of day so that they
507	 * can respond to any updates to the machine description.
508	 *
509	 * Since it is quite likely that a domain will not support
510	 * one or more of these servers, failures are ignored.
511	 */
512
513	/* virtual disk server */
514	(void) i_ddi_attach_hw_nodes("vds");
515
516	/* virtual network switch */
517	(void) i_ddi_attach_hw_nodes("vsw");
518
519	/* virtual console concentrator */
520	(void) i_ddi_attach_hw_nodes("vcc");
521}
522