pg.c revision 3434:5142e1d7d0bc
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#include <sys/systm.h>
29#include <sys/types.h>
30#include <sys/param.h>
31#include <sys/thread.h>
32#include <sys/cpuvar.h>
33#include <sys/cpupart.h>
34#include <sys/kmem.h>
35#include <sys/cmn_err.h>
36#include <sys/kstat.h>
37#include <sys/processor.h>
38#include <sys/disp.h>
39#include <sys/group.h>
40#include <sys/pg.h>
41
42/*
43 * Processor groups
44 *
45 * With the introduction of Chip Multi-Threaded (CMT) processor architectures,
46 * it is no longer necessarily true that a given physical processor module
47 * will present itself as a single schedulable entity (cpu_t). Rather, each
48 * chip and/or processor core may present itself as one or more "logical" CPUs.
49 *
50 * The logical CPUs presented may share physical components such as caches,
51 * data pipes, execution pipelines, FPUs, etc. It is advantageous to have the
52 * kernel be aware of the relationships existing between logical CPUs so that
53 * the appropriate optmizations may be employed.
54 *
55 * The processor group abstraction represents a set of logical CPUs that
56 * generally share some sort of physical or characteristic relationship.
57 *
58 * In the case of a physical sharing relationship, the CPUs in the group may
59 * share a pipeline, cache or floating point unit. In the case of a logical
60 * relationship, a PG may represent the set of CPUs in a processor set, or the
61 * set of CPUs running at a particular clock speed.
62 *
63 * The generic processor group structure, pg_t, contains the elements generic
64 * to a group of CPUs. Depending on the nature of the CPU relationship
65 * (LOGICAL or PHYSICAL), a pointer to a pg may be recast to a "view" of that
66 * PG where more specific data is represented.
67 *
68 * As an example, a PG representing a PHYSICAL relationship, may be recast to
69 * a pghw_t, where data further describing the hardware sharing relationship
70 * is maintained. See pghw.c and pghw.h for details on physical PGs.
71 *
72 * At this time a more specialized casting of a PG representing a LOGICAL
73 * relationship has not been implemented, but the architecture allows for this
74 * in the future.
75 *
76 * Processor Group Classes
77 *
78 * Processor group consumers may wish to maintain and associate specific
79 * data with the PGs they create. For this reason, a mechanism for creating
80 * class specific PGs exists. Classes may overload the default functions for
81 * creating, destroying, and associating CPUs with PGs, and may also register
82 * class specific callbacks to be invoked when the CPU related system
83 * configuration changes. Class specific data is stored/associated with
84 * PGs by incorporating the pg_t (or pghw_t, as appropriate), as the first
85 * element of a class specific PG object. In memory, such a structure may look
86 * like:
87 *
88 * ----------------------- - - -
89 * | common              | | | |  <--(pg_t *)
90 * ----------------------- | | -
91 * | HW specific         | | | <-----(pghw_t *)
92 * ----------------------- | -
93 * | class specific      | | <-------(pg_cmt_t *)
94 * ----------------------- -
95 *
96 * Access to the PG class specific data can be had by casting a pointer to
97 * it's class specific view.
98 */
99
100static pg_t		*pg_alloc_default(pg_class_t);
101static void		pg_free_default(pg_t *);
102
103/*
104 * Bootstrap CPU specific PG data
105 * See pg_cpu_bootstrap()
106 */
107static cpu_pg_t		bootstrap_pg_data;
108
109/*
110 * Bitset of allocated PG ids (they are sequential)
111 * and the next free id in the set.
112 */
113static bitset_t		pg_id_set;
114static pgid_t		pg_id_next = 0;
115
116/*
117 * Default and externed PG ops vectors
118 */
119static struct pg_ops pg_ops_default = {
120	pg_alloc_default,	/* alloc */
121	pg_free_default,	/* free */
122	NULL,			/* cpu_init */
123	NULL,			/* cpu_fini */
124	NULL,			/* cpu_active */
125	NULL,			/* cpu_inactive */
126	NULL,			/* cpupart_in */
127	NULL,			/* cpupart_out */
128	NULL,			/* cpupart_move */
129	NULL,			/* cpu_belongs */
130};
131
132/*
133 * Class specific PG allocation callbacks
134 */
135#define	PG_ALLOC(class)							\
136	(pg_classes[class].pgc_ops->alloc ?				\
137	    pg_classes[class].pgc_ops->alloc() :			\
138	    pg_classes[pg_default_cid].pgc_ops->alloc())
139
140#define	PG_FREE(pg)							\
141	((pg)->pg_class->pgc_ops->free ?				\
142	    (pg)->pg_class->pgc_ops->free(pg) :				\
143	    pg_classes[pg_default_cid].pgc_ops->free(pg))		\
144
145
146/*
147 * Class specific membership test callback
148 */
149#define	PG_CPU_BELONGS(pg, cp)						\
150	((pg)->pg_class->pgc_ops->cpu_belongs ?				\
151	    (pg)->pg_class->pgc_ops->cpu_belongs(pg, cp) : 0)		\
152
153/*
154 * CPU configuration callbacks
155 */
156#define	PG_CPU_INIT(class, cp)						\
157{									\
158	if (pg_classes[class].pgc_ops->cpu_init)			\
159		pg_classes[class].pgc_ops->cpu_init(cp);		\
160}
161
162#define	PG_CPU_FINI(class, cp)						\
163{									\
164	if (pg_classes[class].pgc_ops->cpu_fini)			\
165		pg_classes[class].pgc_ops->cpu_fini(cp);		\
166}
167
168#define	PG_CPU_ACTIVE(class, cp)					\
169{									\
170	if (pg_classes[class].pgc_ops->cpu_active)			\
171		pg_classes[class].pgc_ops->cpu_active(cp);		\
172}
173
174#define	PG_CPU_INACTIVE(class, cp)					\
175{									\
176	if (pg_classes[class].pgc_ops->cpu_inactive)			\
177		pg_classes[class].pgc_ops->cpu_inactive(cp);		\
178}
179
180/*
181 * CPU / cpupart configuration callbacks
182 */
183#define	PG_CPUPART_IN(class, cp, pp)					\
184{									\
185	if (pg_classes[class].pgc_ops->cpupart_in)			\
186		pg_classes[class].pgc_ops->cpupart_in(cp, pp);		\
187}
188
189#define	PG_CPUPART_OUT(class, cp, pp)					\
190{									\
191	if (pg_classes[class].pgc_ops->cpupart_out)			\
192		pg_classes[class].pgc_ops->cpupart_out(cp, pp);		\
193}
194
195#define	PG_CPUPART_MOVE(class, cp, old, new)				\
196{									\
197	if (pg_classes[class].pgc_ops->cpupart_move)			\
198		pg_classes[class].pgc_ops->cpupart_move(cp, old, new);	\
199}
200
201
202
203static pg_class_t	*pg_classes;
204static int		pg_nclasses;
205
206static pg_cid_t		pg_default_cid;
207
208/*
209 * Initialze common PG subsystem. Perform CPU 0 initialization
210 */
211void
212pg_init(void)
213{
214	pg_default_cid =
215	    pg_class_register("default", &pg_ops_default, PGR_LOGICAL);
216}
217
218/*
219 * Perform CPU 0 initialization
220 */
221void
222pg_cpu0_init(void)
223{
224	extern void pghw_physid_create();
225
226	/*
227	 * Create the physical ID cache for the boot CPU
228	 */
229	pghw_physid_create(CPU);
230
231	/*
232	 * pg_cpu_* require that cpu_lock be held
233	 */
234	mutex_enter(&cpu_lock);
235
236	pg_cpu_init(CPU);
237	pg_cpupart_in(CPU, &cp_default);
238	pg_cpu_active(CPU);
239
240	mutex_exit(&cpu_lock);
241}
242
243/*
244 * Register a new PG class
245 */
246pg_cid_t
247pg_class_register(char *name, struct pg_ops *ops, pg_relation_t relation)
248{
249	pg_class_t	*newclass;
250	pg_class_t	*classes_old;
251	id_t		cid;
252
253	mutex_enter(&cpu_lock);
254
255	/*
256	 * Allocate a new pg_class_t in the pg_classes array
257	 */
258	if (pg_nclasses == 0) {
259		pg_classes = kmem_zalloc(sizeof (pg_class_t), KM_SLEEP);
260	} else {
261		classes_old = pg_classes;
262		pg_classes =
263		    kmem_zalloc(sizeof (pg_class_t) * (pg_nclasses + 1),
264			KM_SLEEP);
265		(void) kcopy(classes_old, pg_classes,
266		    sizeof (pg_class_t) * pg_nclasses);
267		kmem_free(classes_old, sizeof (pg_class_t) * pg_nclasses);
268	}
269
270	cid = pg_nclasses++;
271	newclass = &pg_classes[cid];
272
273	(void) strncpy(newclass->pgc_name, name, PG_CLASS_NAME_MAX);
274	newclass->pgc_id = cid;
275	newclass->pgc_ops = ops;
276	newclass->pgc_relation = relation;
277
278	mutex_exit(&cpu_lock);
279
280	return (cid);
281}
282
283/*
284 * Try to find an existing pg in set in which to place cp.
285 * Returns the pg if found, and NULL otherwise.
286 * In the event that the CPU could belong to multiple
287 * PGs in the set, the first matching PG will be returned.
288 */
289pg_t *
290pg_cpu_find_pg(cpu_t *cp, group_t *set)
291{
292	pg_t		*pg;
293	group_iter_t	i;
294
295	group_iter_init(&i);
296	while ((pg = group_iterate(set, &i)) != NULL) {
297		/*
298		 * Ask the class if the CPU belongs here
299		 */
300		if (PG_CPU_BELONGS(pg, cp))
301			return (pg);
302	}
303	return (NULL);
304}
305
306/*
307 * Iterate over the CPUs in a PG after initializing
308 * the iterator with PG_CPU_ITR_INIT()
309 */
310cpu_t *
311pg_cpu_next(pg_cpu_itr_t *itr)
312{
313	cpu_t		*cpu;
314	pg_t		*pg = itr->pg;
315
316	cpu = group_iterate(&pg->pg_cpus, &itr->position);
317	return (cpu);
318}
319
320/*
321 * Create a PG of a given class.
322 * This routine may block.
323 */
324pg_t *
325pg_create(pg_cid_t cid)
326{
327	pg_t	*pg;
328	pgid_t	id;
329
330	ASSERT(MUTEX_HELD(&cpu_lock));
331
332	/*
333	 * Call the class specific PG allocation routine
334	 */
335	pg = PG_ALLOC(cid);
336	pg->pg_class = &pg_classes[cid];
337	pg->pg_relation = pg->pg_class->pgc_relation;
338
339	/*
340	 * Find the next free sequential pg id
341	 */
342	do {
343		if (pg_id_next >= bitset_capacity(&pg_id_set))
344			bitset_resize(&pg_id_set, pg_id_next + 1);
345		id = pg_id_next++;
346	} while (bitset_in_set(&pg_id_set, id));
347
348	pg->pg_id = id;
349	bitset_add(&pg_id_set, pg->pg_id);
350
351	/*
352	 * Create the PG's CPU group
353	 */
354	group_create(&pg->pg_cpus);
355
356	return (pg);
357}
358
359/*
360 * Destroy a PG.
361 * This routine may block.
362 */
363void
364pg_destroy(pg_t *pg)
365{
366	ASSERT(MUTEX_HELD(&cpu_lock));
367
368	group_destroy(&pg->pg_cpus);
369
370	/*
371	 * Unassign the pg_id
372	 */
373	if (pg_id_next > pg->pg_id)
374		pg_id_next = pg->pg_id;
375	bitset_del(&pg_id_set, pg->pg_id);
376
377	/*
378	 * Invoke the class specific de-allocation routine
379	 */
380	PG_FREE(pg);
381}
382
383/*
384 * Add the CPU "cp" to processor group "pg"
385 * This routine may block.
386 */
387void
388pg_cpu_add(pg_t *pg, cpu_t *cp)
389{
390	int	err;
391
392	ASSERT(MUTEX_HELD(&cpu_lock));
393
394	/* This adds the CPU to the PG's CPU group */
395	err = group_add(&pg->pg_cpus, cp, GRP_RESIZE);
396	ASSERT(err == 0);
397
398	/* This adds the PG to the CPUs PG group */
399	ASSERT(cp->cpu_pg != &bootstrap_pg_data);
400	err = group_add(&cp->cpu_pg->pgs, pg, GRP_RESIZE);
401	ASSERT(err == 0);
402}
403
404/*
405 * Remove "cp" from "pg".
406 * This routine may block.
407 */
408void
409pg_cpu_delete(pg_t *pg, cpu_t *cp)
410{
411	int	err;
412
413	ASSERT(MUTEX_HELD(&cpu_lock));
414
415	/* Remove the CPU from the PG */
416	err = group_remove(&pg->pg_cpus, cp, GRP_RESIZE);
417	ASSERT(err == 0);
418
419	/* Remove the PG from the CPU's PG group */
420	ASSERT(cp->cpu_pg != &bootstrap_pg_data);
421	err = group_remove(&cp->cpu_pg->pgs, pg, GRP_RESIZE);
422	ASSERT(err == 0);
423}
424
425/*
426 * Allocate a CPU's PG data. This hangs off struct cpu at cpu_pg
427 */
428static cpu_pg_t *
429pg_cpu_data_alloc(void)
430{
431	cpu_pg_t	*pgd;
432
433	pgd = kmem_zalloc(sizeof (cpu_pg_t), KM_SLEEP);
434	group_create(&pgd->pgs);
435	group_create(&pgd->cmt_pgs);
436
437	return (pgd);
438}
439
440/*
441 * Free the CPU's PG data.
442 */
443static void
444pg_cpu_data_free(cpu_pg_t *pgd)
445{
446	group_destroy(&pgd->pgs);
447	group_destroy(&pgd->cmt_pgs);
448	kmem_free(pgd, sizeof (cpu_pg_t));
449}
450
451/*
452 * A new CPU is coming into the system, either via booting or DR.
453 * Allocate it's PG data, and notify all registered classes about
454 * the new CPU.
455 *
456 * This routine may block.
457 */
458void
459pg_cpu_init(cpu_t *cp)
460{
461	pg_cid_t	i;
462
463	ASSERT(MUTEX_HELD(&cpu_lock));
464
465	/*
466	 * Allocate and size the per CPU pg data
467	 */
468	cp->cpu_pg = pg_cpu_data_alloc();
469
470	/*
471	 * Notify all registered classes about the new CPU
472	 */
473	for (i = 0; i < pg_nclasses; i++)
474		PG_CPU_INIT(i, cp);
475}
476
477/*
478 * This CPU is being deleted from the system. Notify the classes
479 * and free up the CPU's PG data.
480 */
481void
482pg_cpu_fini(cpu_t *cp)
483{
484	pg_cid_t	i;
485
486	ASSERT(MUTEX_HELD(&cpu_lock));
487
488	/*
489	 * This can happen if the CPU coming into the system
490	 * failed to power on.
491	 */
492	if (cp->cpu_pg == NULL ||
493	    cp->cpu_pg == &bootstrap_pg_data)
494		return;
495
496	for (i = 0; i < pg_nclasses; i++)
497		PG_CPU_FINI(i, cp);
498
499	pg_cpu_data_free(cp->cpu_pg);
500	cp->cpu_pg = NULL;
501}
502
503/*
504 * This CPU is becoming active (online)
505 * This routine may not block as it is called from paused CPUs
506 * context.
507 */
508void
509pg_cpu_active(cpu_t *cp)
510{
511	pg_cid_t	i;
512
513	ASSERT(MUTEX_HELD(&cpu_lock));
514
515	/*
516	 * Notify all registered classes about the new CPU
517	 */
518	for (i = 0; i < pg_nclasses; i++)
519		PG_CPU_ACTIVE(i, cp);
520}
521
522/*
523 * This CPU is going inactive (offline)
524 * This routine may not block, as it is called from paused
525 * CPUs context.
526 */
527void
528pg_cpu_inactive(cpu_t *cp)
529{
530	pg_cid_t	i;
531
532	ASSERT(MUTEX_HELD(&cpu_lock));
533
534	/*
535	 * Notify all registered classes about the new CPU
536	 */
537	for (i = 0; i < pg_nclasses; i++)
538		PG_CPU_INACTIVE(i, cp);
539}
540
541/*
542 * Invoked when the CPU is about to move into the partition
543 * This routine may block.
544 */
545void
546pg_cpupart_in(cpu_t *cp, cpupart_t *pp)
547{
548	int	i;
549
550	ASSERT(MUTEX_HELD(&cpu_lock));
551
552	/*
553	 * Notify all registered classes that the
554	 * CPU is about to enter the CPU partition
555	 */
556	for (i = 0; i < pg_nclasses; i++)
557		PG_CPUPART_IN(i, cp, pp);
558}
559
560/*
561 * Invoked when the CPU is about to move out of the partition
562 * This routine may block.
563 */
564/*ARGSUSED*/
565void
566pg_cpupart_out(cpu_t *cp, cpupart_t *pp)
567{
568	int	i;
569
570	ASSERT(MUTEX_HELD(&cpu_lock));
571
572	/*
573	 * Notify all registered classes that the
574	 * CPU is about to leave the CPU partition
575	 */
576	for (i = 0; i < pg_nclasses; i++)
577		PG_CPUPART_OUT(i, cp, pp);
578}
579
580/*
581 * Invoked when the CPU is *moving* partitions.
582 *
583 * This routine may not block, as it is called from paused CPUs
584 * context.
585 */
586void
587pg_cpupart_move(cpu_t *cp, cpupart_t *oldpp, cpupart_t *newpp)
588{
589	int	i;
590
591	ASSERT(MUTEX_HELD(&cpu_lock));
592
593	/*
594	 * Notify all registered classes that the
595	 * CPU is about to leave the CPU partition
596	 */
597	for (i = 0; i < pg_nclasses; i++)
598		PG_CPUPART_MOVE(i, cp, oldpp, newpp);
599}
600
601/*
602 * Provide the specified CPU a bootstrap pg
603 * This is needed to allow sane behaviour if any PG consuming
604 * code needs to deal with a partially initialized CPU
605 */
606void
607pg_cpu_bootstrap(cpu_t *cp)
608{
609	cp->cpu_pg = &bootstrap_pg_data;
610}
611
612/*ARGSUSED*/
613static pg_t *
614pg_alloc_default(pg_class_t class)
615{
616	return (kmem_zalloc(sizeof (pg_t), KM_SLEEP));
617}
618
619/*ARGSUSED*/
620static void
621pg_free_default(struct pg *pg)
622{
623	kmem_free(pg, sizeof (pg_t));
624}
625