1139804Simp/*- 276440Sjhb * Copyright (c) 2001 Wind River Systems, Inc. 376440Sjhb * All rights reserved. 476440Sjhb * Written by: John Baldwin <jhb@FreeBSD.org> 576440Sjhb * 6194784Sjeff * Copyright (c) 2009 Jeffrey Roberson <jeff@freebsd.org> 7194784Sjeff * All rights reserved. 8194784Sjeff * 976440Sjhb * Redistribution and use in source and binary forms, with or without 1076440Sjhb * modification, are permitted provided that the following conditions 1176440Sjhb * are met: 1276440Sjhb * 1. Redistributions of source code must retain the above copyright 1376440Sjhb * notice, this list of conditions and the following disclaimer. 1476440Sjhb * 2. Redistributions in binary form must reproduce the above copyright 1576440Sjhb * notice, this list of conditions and the following disclaimer in the 1676440Sjhb * documentation and/or other materials provided with the distribution. 1776440Sjhb * 4. Neither the name of the author nor the names of any co-contributors 1876440Sjhb * may be used to endorse or promote products derived from this software 1976440Sjhb * without specific prior written permission. 2076440Sjhb * 2176440Sjhb * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 2276440Sjhb * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2376440Sjhb * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2476440Sjhb * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 2576440Sjhb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2676440Sjhb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2776440Sjhb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2876440Sjhb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2976440Sjhb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3076440Sjhb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3176440Sjhb * SUCH DAMAGE. 3276440Sjhb */ 3376440Sjhb 3476440Sjhb/* 3576440Sjhb * This module provides MI support for per-cpu data. 3685444Sjhb * 3785444Sjhb * Each architecture determines the mapping of logical CPU IDs to physical 3885444Sjhb * CPUs. The requirements of this mapping are as follows: 3985444Sjhb * - Logical CPU IDs must reside in the range 0 ... MAXCPU - 1. 4085444Sjhb * - The mapping is not required to be dense. That is, there may be 4185444Sjhb * gaps in the mappings. 4285444Sjhb * - The platform sets the value of MAXCPU in <machine/param.h>. 4385444Sjhb * - It is suggested, but not required, that in the non-SMP case, the 4485444Sjhb * platform define MAXCPU to be 1 and define the logical ID of the 4585444Sjhb * sole CPU as 0. 4676440Sjhb */ 4776440Sjhb 48116182Sobrien#include <sys/cdefs.h> 49116182Sobrien__FBSDID("$FreeBSD$"); 50116182Sobrien 5187702Sjhb#include "opt_ddb.h" 5287702Sjhb 5376440Sjhb#include <sys/param.h> 5476440Sjhb#include <sys/systm.h> 55194784Sjeff#include <sys/sysctl.h> 5687702Sjhb#include <sys/lock.h> 57194784Sjeff#include <sys/malloc.h> 5876440Sjhb#include <sys/pcpu.h> 5987702Sjhb#include <sys/proc.h> 60150576Srwatson#include <sys/smp.h> 61194784Sjeff#include <sys/sx.h> 62262739Sglebius#include <vm/uma.h> 6387702Sjhb#include <ddb/ddb.h> 6476440Sjhb 65227293Sedstatic MALLOC_DEFINE(M_PCPU, "Per-cpu", "Per-cpu resource accouting."); 66194784Sjeff 67194784Sjeffstruct dpcpu_free { 68194784Sjeff uintptr_t df_start; 69194784Sjeff int df_len; 70194784Sjeff TAILQ_ENTRY(dpcpu_free) df_link; 71194784Sjeff}; 72194784Sjeff 73215701Sdimstatic DPCPU_DEFINE(char, modspace[DPCPU_MODMIN]); 74194784Sjeffstatic TAILQ_HEAD(, dpcpu_free) dpcpu_head = TAILQ_HEAD_INITIALIZER(dpcpu_head); 75194784Sjeffstatic struct sx dpcpu_lock; 76194784Sjeffuintptr_t dpcpu_off[MAXCPU]; 77173444Supsstruct pcpu *cpuid_to_pcpu[MAXCPU]; 78222531Snwhitehornstruct cpuhead cpuhead = STAILQ_HEAD_INITIALIZER(cpuhead); 7976440Sjhb 8076440Sjhb/* 8187702Sjhb * Initialize the MI portions of a struct pcpu. 8276440Sjhb */ 8376440Sjhbvoid 8487702Sjhbpcpu_init(struct pcpu *pcpu, int cpuid, size_t size) 8576440Sjhb{ 8676440Sjhb 8787702Sjhb bzero(pcpu, size); 8887702Sjhb KASSERT(cpuid >= 0 && cpuid < MAXCPU, 8987702Sjhb ("pcpu_init: invalid cpuid %d", cpuid)); 9087702Sjhb pcpu->pc_cpuid = cpuid; 9187702Sjhb cpuid_to_pcpu[cpuid] = pcpu; 92222531Snwhitehorn STAILQ_INSERT_TAIL(&cpuhead, pcpu, pc_allcpu); 9387702Sjhb cpu_pcpu_init(pcpu, cpuid, size); 94173444Sups pcpu->pc_rm_queue.rmq_next = &pcpu->pc_rm_queue; 95173444Sups pcpu->pc_rm_queue.rmq_prev = &pcpu->pc_rm_queue; 96194784Sjeff} 97173444Sups 98194784Sjeffvoid 99194784Sjeffdpcpu_init(void *dpcpu, int cpuid) 100194784Sjeff{ 101194784Sjeff struct pcpu *pcpu; 102194784Sjeff 103194784Sjeff pcpu = pcpu_find(cpuid); 104194784Sjeff pcpu->pc_dynamic = (uintptr_t)dpcpu - DPCPU_START; 105194784Sjeff 106194784Sjeff /* 107194784Sjeff * Initialize defaults from our linker section. 108194784Sjeff */ 109194784Sjeff memcpy(dpcpu, (void *)DPCPU_START, DPCPU_BYTES); 110194784Sjeff 111194784Sjeff /* 112194784Sjeff * Place it in the global pcpu offset array. 113194784Sjeff */ 114194784Sjeff dpcpu_off[cpuid] = pcpu->pc_dynamic; 11576440Sjhb} 11676440Sjhb 117194784Sjeffstatic void 118194784Sjeffdpcpu_startup(void *dummy __unused) 119194784Sjeff{ 120194784Sjeff struct dpcpu_free *df; 121194784Sjeff 122194784Sjeff df = malloc(sizeof(*df), M_PCPU, M_WAITOK | M_ZERO); 123194784Sjeff df->df_start = (uintptr_t)&DPCPU_NAME(modspace); 124208100Sbz df->df_len = DPCPU_MODMIN; 125194784Sjeff TAILQ_INSERT_HEAD(&dpcpu_head, df, df_link); 126194784Sjeff sx_init(&dpcpu_lock, "dpcpu alloc lock"); 127194784Sjeff} 128194784SjeffSYSINIT(dpcpu, SI_SUB_KLD, SI_ORDER_FIRST, dpcpu_startup, 0); 129194784Sjeff 13076440Sjhb/* 131262739Sglebius * UMA_PCPU_ZONE zones, that are available for all kernel 132262739Sglebius * consumers. Right now 64 bit zone is used for counter(9) 133262739Sglebius * and pointer zone is used by flowtable. 134262739Sglebius */ 135262739Sglebius 136262739Sglebiusuma_zone_t pcpu_zone_64; 137262739Sglebiusuma_zone_t pcpu_zone_ptr; 138262739Sglebius 139262739Sglebiusstatic void 140262739Sglebiuspcpu_zones_startup(void) 141262739Sglebius{ 142262739Sglebius 143262739Sglebius pcpu_zone_64 = uma_zcreate("64 pcpu", sizeof(uint64_t), 144262739Sglebius NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU); 145262739Sglebius 146262739Sglebius if (sizeof(uint64_t) == sizeof(void *)) 147262739Sglebius pcpu_zone_ptr = pcpu_zone_64; 148262739Sglebius else 149262739Sglebius pcpu_zone_ptr = uma_zcreate("ptr pcpu", sizeof(void *), 150262739Sglebius NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU); 151262739Sglebius} 152262739SglebiusSYSINIT(pcpu_zones, SI_SUB_KMEM, SI_ORDER_ANY, pcpu_zones_startup, NULL); 153262739Sglebius 154262739Sglebius/* 155194784Sjeff * First-fit extent based allocator for allocating space in the per-cpu 156194784Sjeff * region reserved for modules. This is only intended for use by the 157194784Sjeff * kernel linkers to place module linker sets. 158194784Sjeff */ 159194784Sjeffvoid * 160194784Sjeffdpcpu_alloc(int size) 161194784Sjeff{ 162194784Sjeff struct dpcpu_free *df; 163194784Sjeff void *s; 164194784Sjeff 165194784Sjeff s = NULL; 166194784Sjeff size = roundup2(size, sizeof(void *)); 167194784Sjeff sx_xlock(&dpcpu_lock); 168194784Sjeff TAILQ_FOREACH(df, &dpcpu_head, df_link) { 169194784Sjeff if (df->df_len < size) 170194784Sjeff continue; 171194784Sjeff if (df->df_len == size) { 172194784Sjeff s = (void *)df->df_start; 173194784Sjeff TAILQ_REMOVE(&dpcpu_head, df, df_link); 174194784Sjeff free(df, M_PCPU); 175194784Sjeff break; 176194784Sjeff } 177194784Sjeff s = (void *)df->df_start; 178194784Sjeff df->df_len -= size; 179194784Sjeff df->df_start = df->df_start + size; 180194784Sjeff break; 181194784Sjeff } 182194784Sjeff sx_xunlock(&dpcpu_lock); 183194784Sjeff 184194784Sjeff return (s); 185194784Sjeff} 186194784Sjeff 187194784Sjeff/* 188194784Sjeff * Free dynamic per-cpu space at module unload time. 189194784Sjeff */ 190194784Sjeffvoid 191194784Sjeffdpcpu_free(void *s, int size) 192194784Sjeff{ 193194784Sjeff struct dpcpu_free *df; 194194784Sjeff struct dpcpu_free *dn; 195194784Sjeff uintptr_t start; 196194784Sjeff uintptr_t end; 197194784Sjeff 198194784Sjeff size = roundup2(size, sizeof(void *)); 199194784Sjeff start = (uintptr_t)s; 200194784Sjeff end = start + size; 201194784Sjeff /* 202194784Sjeff * Free a region of space and merge it with as many neighbors as 203194784Sjeff * possible. Keeping the list sorted simplifies this operation. 204194784Sjeff */ 205194784Sjeff sx_xlock(&dpcpu_lock); 206194784Sjeff TAILQ_FOREACH(df, &dpcpu_head, df_link) { 207194784Sjeff if (df->df_start > end) 208194784Sjeff break; 209194784Sjeff /* 210194784Sjeff * If we expand at the end of an entry we may have to 211194784Sjeff * merge it with the one following it as well. 212194784Sjeff */ 213194784Sjeff if (df->df_start + df->df_len == start) { 214194784Sjeff df->df_len += size; 215194784Sjeff dn = TAILQ_NEXT(df, df_link); 216194784Sjeff if (df->df_start + df->df_len == dn->df_start) { 217194784Sjeff df->df_len += dn->df_len; 218194784Sjeff TAILQ_REMOVE(&dpcpu_head, dn, df_link); 219194784Sjeff free(dn, M_PCPU); 220194784Sjeff } 221194784Sjeff sx_xunlock(&dpcpu_lock); 222194784Sjeff return; 223194784Sjeff } 224194784Sjeff if (df->df_start == end) { 225194784Sjeff df->df_start = start; 226194784Sjeff df->df_len += size; 227194784Sjeff sx_xunlock(&dpcpu_lock); 228194784Sjeff return; 229194784Sjeff } 230194784Sjeff } 231194784Sjeff dn = malloc(sizeof(*df), M_PCPU, M_WAITOK | M_ZERO); 232194784Sjeff dn->df_start = start; 233194784Sjeff dn->df_len = size; 234194784Sjeff if (df) 235194784Sjeff TAILQ_INSERT_BEFORE(df, dn, df_link); 236194784Sjeff else 237194784Sjeff TAILQ_INSERT_TAIL(&dpcpu_head, dn, df_link); 238194784Sjeff sx_xunlock(&dpcpu_lock); 239194784Sjeff} 240194784Sjeff 241194784Sjeff/* 242194784Sjeff * Initialize the per-cpu storage from an updated linker-set region. 243194784Sjeff */ 244194784Sjeffvoid 245194784Sjeffdpcpu_copy(void *s, int size) 246194784Sjeff{ 247194784Sjeff#ifdef SMP 248194784Sjeff uintptr_t dpcpu; 249194784Sjeff int i; 250194784Sjeff 251194784Sjeff for (i = 0; i < mp_ncpus; ++i) { 252194784Sjeff dpcpu = dpcpu_off[i]; 253194784Sjeff if (dpcpu == 0) 254194784Sjeff continue; 255194784Sjeff memcpy((void *)(dpcpu + (uintptr_t)s), s, size); 256194784Sjeff } 257194784Sjeff#else 258194784Sjeff memcpy((void *)(dpcpu_off[0] + (uintptr_t)s), s, size); 259194784Sjeff#endif 260194784Sjeff} 261194784Sjeff 262194784Sjeff/* 26387702Sjhb * Destroy a struct pcpu. 26476440Sjhb */ 26587702Sjhbvoid 26687702Sjhbpcpu_destroy(struct pcpu *pcpu) 26776440Sjhb{ 26876440Sjhb 269222531Snwhitehorn STAILQ_REMOVE(&cpuhead, pcpu, pcpu, pc_allcpu); 27087702Sjhb cpuid_to_pcpu[pcpu->pc_cpuid] = NULL; 271194784Sjeff dpcpu_off[pcpu->pc_cpuid] = 0; 27276440Sjhb} 27387702Sjhb 27487702Sjhb/* 27587702Sjhb * Locate a struct pcpu by cpu id. 27687702Sjhb */ 27787702Sjhbstruct pcpu * 27887702Sjhbpcpu_find(u_int cpuid) 27987702Sjhb{ 28087702Sjhb 28187702Sjhb return (cpuid_to_pcpu[cpuid]); 28287702Sjhb} 28387702Sjhb 284194784Sjeffint 285194784Sjeffsysctl_dpcpu_quad(SYSCTL_HANDLER_ARGS) 286194784Sjeff{ 287194935Sjeff uintptr_t dpcpu; 288194784Sjeff int64_t count; 289194784Sjeff int i; 290194784Sjeff 291194784Sjeff count = 0; 292194784Sjeff for (i = 0; i < mp_ncpus; ++i) { 293194784Sjeff dpcpu = dpcpu_off[i]; 294194784Sjeff if (dpcpu == 0) 295194784Sjeff continue; 296194784Sjeff count += *(int64_t *)(dpcpu + (uintptr_t)arg1); 297194784Sjeff } 298194784Sjeff return (SYSCTL_OUT(req, &count, sizeof(count))); 299194784Sjeff} 300194784Sjeff 301194784Sjeffint 302194935Sjeffsysctl_dpcpu_long(SYSCTL_HANDLER_ARGS) 303194935Sjeff{ 304194935Sjeff uintptr_t dpcpu; 305194935Sjeff long count; 306194935Sjeff int i; 307194935Sjeff 308194935Sjeff count = 0; 309194935Sjeff for (i = 0; i < mp_ncpus; ++i) { 310194935Sjeff dpcpu = dpcpu_off[i]; 311194935Sjeff if (dpcpu == 0) 312194935Sjeff continue; 313194935Sjeff count += *(long *)(dpcpu + (uintptr_t)arg1); 314194935Sjeff } 315194935Sjeff return (SYSCTL_OUT(req, &count, sizeof(count))); 316194935Sjeff} 317194935Sjeff 318194935Sjeffint 319194784Sjeffsysctl_dpcpu_int(SYSCTL_HANDLER_ARGS) 320194784Sjeff{ 321194935Sjeff uintptr_t dpcpu; 322194784Sjeff int count; 323194784Sjeff int i; 324194784Sjeff 325194784Sjeff count = 0; 326194784Sjeff for (i = 0; i < mp_ncpus; ++i) { 327194784Sjeff dpcpu = dpcpu_off[i]; 328194784Sjeff if (dpcpu == 0) 329194784Sjeff continue; 330194784Sjeff count += *(int *)(dpcpu + (uintptr_t)arg1); 331194784Sjeff } 332194784Sjeff return (SYSCTL_OUT(req, &count, sizeof(count))); 333194784Sjeff} 334194784Sjeff 33587702Sjhb#ifdef DDB 336196132SbzDB_SHOW_COMMAND(dpcpu_off, db_show_dpcpu_off) 337196132Sbz{ 338196132Sbz int id; 339150576Srwatson 340209059Sjhb CPU_FOREACH(id) { 341196132Sbz db_printf("dpcpu_off[%2d] = 0x%jx (+ DPCPU_START = %p)\n", 342196132Sbz id, (uintmax_t)dpcpu_off[id], 343196132Sbz (void *)(uintptr_t)(dpcpu_off[id] + DPCPU_START)); 344196132Sbz } 345196132Sbz} 346196132Sbz 347150576Srwatsonstatic void 348150576Srwatsonshow_pcpu(struct pcpu *pc) 34987702Sjhb{ 35087702Sjhb struct thread *td; 35187702Sjhb 35287702Sjhb db_printf("cpuid = %d\n", pc->pc_cpuid); 353208392Sjhb db_printf("dynamic pcpu = %p\n", (void *)pc->pc_dynamic); 35487702Sjhb db_printf("curthread = "); 35587702Sjhb td = pc->pc_curthread; 35687702Sjhb if (td != NULL) 35787702Sjhb db_printf("%p: pid %d \"%s\"\n", td, td->td_proc->p_pid, 358173600Sjulian td->td_name); 35987702Sjhb else 36087702Sjhb db_printf("none\n"); 36187702Sjhb db_printf("curpcb = %p\n", pc->pc_curpcb); 36287702Sjhb db_printf("fpcurthread = "); 36387702Sjhb td = pc->pc_fpcurthread; 36487702Sjhb if (td != NULL) 36587702Sjhb db_printf("%p: pid %d \"%s\"\n", td, td->td_proc->p_pid, 366173600Sjulian td->td_name); 36787702Sjhb else 36887702Sjhb db_printf("none\n"); 36987702Sjhb db_printf("idlethread = "); 37087702Sjhb td = pc->pc_idlethread; 37187702Sjhb if (td != NULL) 372208392Sjhb db_printf("%p: tid %d \"%s\"\n", td, td->td_tid, td->td_name); 37387702Sjhb else 37487702Sjhb db_printf("none\n"); 37587702Sjhb db_show_mdpcpu(pc); 376208392Sjhb 377191816Szec#ifdef VIMAGE 378191816Szec db_printf("curvnet = %p\n", pc->pc_curthread->td_vnet); 379191816Szec#endif 380191816Szec 38187702Sjhb#ifdef WITNESS 38287702Sjhb db_printf("spin locks held:\n"); 383207929Sattilio witness_list_locks(&pc->pc_spinlocks, db_printf); 38487702Sjhb#endif 38587702Sjhb} 386150576Srwatson 387150576SrwatsonDB_SHOW_COMMAND(pcpu, db_show_pcpu) 388150576Srwatson{ 389150576Srwatson struct pcpu *pc; 390150576Srwatson int id; 391150576Srwatson 392150576Srwatson if (have_addr) 393150576Srwatson id = ((addr >> 4) % 16) * 10 + (addr % 16); 394150576Srwatson else 395150576Srwatson id = PCPU_GET(cpuid); 396150576Srwatson pc = pcpu_find(id); 397150576Srwatson if (pc == NULL) { 398150576Srwatson db_printf("CPU %d not found\n", id); 399150576Srwatson return; 400150576Srwatson } 401150576Srwatson show_pcpu(pc); 402150576Srwatson} 403150576Srwatson 404183054SsamDB_SHOW_ALL_COMMAND(pcpu, db_show_cpu_all) 405150576Srwatson{ 406150576Srwatson struct pcpu *pc; 407150576Srwatson int id; 408150576Srwatson 409150576Srwatson db_printf("Current CPU: %d\n\n", PCPU_GET(cpuid)); 410152021Sjhb for (id = 0; id <= mp_maxid; id++) { 411150576Srwatson pc = pcpu_find(id); 412150576Srwatson if (pc != NULL) { 413150576Srwatson show_pcpu(pc); 414150576Srwatson db_printf("\n"); 415150576Srwatson } 416150576Srwatson } 417150576Srwatson} 418183054SsamDB_SHOW_ALIAS(allpcpu, db_show_cpu_all); 41987702Sjhb#endif 420