1/* 2 * drivers/base/memory.c - basic Memory class support 3 * 4 * Written by Matt Tolentino <matthew.e.tolentino@intel.com> 5 * Dave Hansen <haveblue@us.ibm.com> 6 * 7 * This file provides the necessary infrastructure to represent 8 * a SPARSEMEM-memory-model system's physical memory in /sysfs. 9 * All arch-independent code that assumes MEMORY_HOTPLUG requires 10 * SPARSEMEM should be contained here, or in mm/memory_hotplug.c. 11 */ 12 13#include <linux/sysdev.h> 14#include <linux/module.h> 15#include <linux/init.h> 16#include <linux/topology.h> 17#include <linux/capability.h> 18#include <linux/device.h> 19#include <linux/memory.h> 20#include <linux/kobject.h> 21#include <linux/memory_hotplug.h> 22#include <linux/mm.h> 23#include <asm/atomic.h> 24#include <asm/uaccess.h> 25 26#define MEMORY_CLASS_NAME "memory" 27 28static struct sysdev_class memory_sysdev_class = { 29 set_kset_name(MEMORY_CLASS_NAME), 30}; 31 32static const char *memory_uevent_name(struct kset *kset, struct kobject *kobj) 33{ 34 return MEMORY_CLASS_NAME; 35} 36 37static int memory_uevent(struct kset *kset, struct kobject *kobj, char **envp, 38 int num_envp, char *buffer, int buffer_size) 39{ 40 int retval = 0; 41 42 return retval; 43} 44 45static struct kset_uevent_ops memory_uevent_ops = { 46 .name = memory_uevent_name, 47 .uevent = memory_uevent, 48}; 49 50static BLOCKING_NOTIFIER_HEAD(memory_chain); 51 52int register_memory_notifier(struct notifier_block *nb) 53{ 54 return blocking_notifier_chain_register(&memory_chain, nb); 55} 56 57void unregister_memory_notifier(struct notifier_block *nb) 58{ 59 blocking_notifier_chain_unregister(&memory_chain, nb); 60} 61 62/* 63 * register_memory - Setup a sysfs device for a memory block 64 */ 65int register_memory(struct memory_block *memory, struct mem_section *section, 66 struct node *root) 67{ 68 int error; 69 70 memory->sysdev.cls = &memory_sysdev_class; 71 memory->sysdev.id = __section_nr(section); 72 73 error = sysdev_register(&memory->sysdev); 74 75 if (root && !error) 76 error = sysfs_create_link(&root->sysdev.kobj, 77 &memory->sysdev.kobj, 78 kobject_name(&memory->sysdev.kobj)); 79 80 return error; 81} 82 83static void 84unregister_memory(struct memory_block *memory, struct mem_section *section, 85 struct node *root) 86{ 87 BUG_ON(memory->sysdev.cls != &memory_sysdev_class); 88 BUG_ON(memory->sysdev.id != __section_nr(section)); 89 90 sysdev_unregister(&memory->sysdev); 91 if (root) 92 sysfs_remove_link(&root->sysdev.kobj, 93 kobject_name(&memory->sysdev.kobj)); 94} 95 96/* 97 * use this as the physical section index that this memsection 98 * uses. 99 */ 100 101static ssize_t show_mem_phys_index(struct sys_device *dev, char *buf) 102{ 103 struct memory_block *mem = 104 container_of(dev, struct memory_block, sysdev); 105 return sprintf(buf, "%08lx\n", mem->phys_index); 106} 107 108/* 109 * online, offline, going offline, etc. 110 */ 111static ssize_t show_mem_state(struct sys_device *dev, char *buf) 112{ 113 struct memory_block *mem = 114 container_of(dev, struct memory_block, sysdev); 115 ssize_t len = 0; 116 117 /* 118 * We can probably put these states in a nice little array 119 * so that they're not open-coded 120 */ 121 switch (mem->state) { 122 case MEM_ONLINE: 123 len = sprintf(buf, "online\n"); 124 break; 125 case MEM_OFFLINE: 126 len = sprintf(buf, "offline\n"); 127 break; 128 case MEM_GOING_OFFLINE: 129 len = sprintf(buf, "going-offline\n"); 130 break; 131 default: 132 len = sprintf(buf, "ERROR-UNKNOWN-%ld\n", 133 mem->state); 134 WARN_ON(1); 135 break; 136 } 137 138 return len; 139} 140 141static inline int memory_notify(unsigned long val, void *v) 142{ 143 return blocking_notifier_call_chain(&memory_chain, val, v); 144} 145 146/* 147 * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is 148 * OK to have direct references to sparsemem variables in here. 149 */ 150static int 151memory_block_action(struct memory_block *mem, unsigned long action) 152{ 153 int i; 154 unsigned long psection; 155 unsigned long start_pfn, start_paddr; 156 struct page *first_page; 157 int ret; 158 int old_state = mem->state; 159 160 psection = mem->phys_index; 161 first_page = pfn_to_page(psection << PFN_SECTION_SHIFT); 162 163 /* 164 * The probe routines leave the pages reserved, just 165 * as the bootmem code does. Make sure they're still 166 * that way. 167 */ 168 if (action == MEM_ONLINE) { 169 for (i = 0; i < PAGES_PER_SECTION; i++) { 170 if (PageReserved(first_page+i)) 171 continue; 172 173 printk(KERN_WARNING "section number %ld page number %d " 174 "not reserved, was it already online? \n", 175 psection, i); 176 return -EBUSY; 177 } 178 } 179 180 switch (action) { 181 case MEM_ONLINE: 182 start_pfn = page_to_pfn(first_page); 183 ret = online_pages(start_pfn, PAGES_PER_SECTION); 184 break; 185 case MEM_OFFLINE: 186 mem->state = MEM_GOING_OFFLINE; 187 memory_notify(MEM_GOING_OFFLINE, NULL); 188 start_paddr = page_to_pfn(first_page) << PAGE_SHIFT; 189 ret = remove_memory(start_paddr, 190 PAGES_PER_SECTION << PAGE_SHIFT); 191 if (ret) { 192 mem->state = old_state; 193 break; 194 } 195 memory_notify(MEM_MAPPING_INVALID, NULL); 196 break; 197 default: 198 printk(KERN_WARNING "%s(%p, %ld) unknown action: %ld\n", 199 __FUNCTION__, mem, action, action); 200 WARN_ON(1); 201 ret = -EINVAL; 202 } 203 /* 204 * For now, only notify on successful memory operations 205 */ 206 if (!ret) 207 memory_notify(action, NULL); 208 209 return ret; 210} 211 212static int memory_block_change_state(struct memory_block *mem, 213 unsigned long to_state, unsigned long from_state_req) 214{ 215 int ret = 0; 216 down(&mem->state_sem); 217 218 if (mem->state != from_state_req) { 219 ret = -EINVAL; 220 goto out; 221 } 222 223 ret = memory_block_action(mem, to_state); 224 if (!ret) 225 mem->state = to_state; 226 227out: 228 up(&mem->state_sem); 229 return ret; 230} 231 232static ssize_t 233store_mem_state(struct sys_device *dev, const char *buf, size_t count) 234{ 235 struct memory_block *mem; 236 unsigned int phys_section_nr; 237 int ret = -EINVAL; 238 239 mem = container_of(dev, struct memory_block, sysdev); 240 phys_section_nr = mem->phys_index; 241 242 if (!valid_section_nr(phys_section_nr)) 243 goto out; 244 245 if (!strncmp(buf, "online", min((int)count, 6))) 246 ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE); 247 else if(!strncmp(buf, "offline", min((int)count, 7))) 248 ret = memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE); 249out: 250 if (ret) 251 return ret; 252 return count; 253} 254 255/* 256 * phys_device is a bad name for this. What I really want 257 * is a way to differentiate between memory ranges that 258 * are part of physical devices that constitute 259 * a complete removable unit or fru. 260 * i.e. do these ranges belong to the same physical device, 261 * s.t. if I offline all of these sections I can then 262 * remove the physical device? 263 */ 264static ssize_t show_phys_device(struct sys_device *dev, char *buf) 265{ 266 struct memory_block *mem = 267 container_of(dev, struct memory_block, sysdev); 268 return sprintf(buf, "%d\n", mem->phys_device); 269} 270 271static SYSDEV_ATTR(phys_index, 0444, show_mem_phys_index, NULL); 272static SYSDEV_ATTR(state, 0644, show_mem_state, store_mem_state); 273static SYSDEV_ATTR(phys_device, 0444, show_phys_device, NULL); 274 275#define mem_create_simple_file(mem, attr_name) \ 276 sysdev_create_file(&mem->sysdev, &attr_##attr_name) 277#define mem_remove_simple_file(mem, attr_name) \ 278 sysdev_remove_file(&mem->sysdev, &attr_##attr_name) 279 280/* 281 * Block size attribute stuff 282 */ 283static ssize_t 284print_block_size(struct class *class, char *buf) 285{ 286 return sprintf(buf, "%lx\n", (unsigned long)PAGES_PER_SECTION * PAGE_SIZE); 287} 288 289static CLASS_ATTR(block_size_bytes, 0444, print_block_size, NULL); 290 291static int block_size_init(void) 292{ 293 return sysfs_create_file(&memory_sysdev_class.kset.kobj, 294 &class_attr_block_size_bytes.attr); 295} 296 297/* 298 * Some architectures will have custom drivers to do this, and 299 * will not need to do it from userspace. The fake hot-add code 300 * as well as ppc64 will do all of their discovery in userspace 301 * and will require this interface. 302 */ 303#ifdef CONFIG_ARCH_MEMORY_PROBE 304static ssize_t 305memory_probe_store(struct class *class, const char *buf, size_t count) 306{ 307 u64 phys_addr; 308 int nid; 309 int ret; 310 311 phys_addr = simple_strtoull(buf, NULL, 0); 312 313 nid = memory_add_physaddr_to_nid(phys_addr); 314 ret = add_memory(nid, phys_addr, PAGES_PER_SECTION << PAGE_SHIFT); 315 316 if (ret) 317 count = ret; 318 319 return count; 320} 321static CLASS_ATTR(probe, 0700, NULL, memory_probe_store); 322 323static int memory_probe_init(void) 324{ 325 return sysfs_create_file(&memory_sysdev_class.kset.kobj, 326 &class_attr_probe.attr); 327} 328#else 329static inline int memory_probe_init(void) 330{ 331 return 0; 332} 333#endif 334 335/* 336 * Note that phys_device is optional. It is here to allow for 337 * differentiation between which *physical* devices each 338 * section belongs to... 339 */ 340 341static int add_memory_block(unsigned long node_id, struct mem_section *section, 342 unsigned long state, int phys_device) 343{ 344 struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL); 345 int ret = 0; 346 347 if (!mem) 348 return -ENOMEM; 349 350 mem->phys_index = __section_nr(section); 351 mem->state = state; 352 init_MUTEX(&mem->state_sem); 353 mem->phys_device = phys_device; 354 355 ret = register_memory(mem, section, NULL); 356 if (!ret) 357 ret = mem_create_simple_file(mem, phys_index); 358 if (!ret) 359 ret = mem_create_simple_file(mem, state); 360 if (!ret) 361 ret = mem_create_simple_file(mem, phys_device); 362 363 return ret; 364} 365 366/* 367 * For now, we have a linear search to go find the appropriate 368 * memory_block corresponding to a particular phys_index. If 369 * this gets to be a real problem, we can always use a radix 370 * tree or something here. 371 * 372 * This could be made generic for all sysdev classes. 373 */ 374static struct memory_block *find_memory_block(struct mem_section *section) 375{ 376 struct kobject *kobj; 377 struct sys_device *sysdev; 378 struct memory_block *mem; 379 char name[sizeof(MEMORY_CLASS_NAME) + 9 + 1]; 380 381 /* 382 * This only works because we know that section == sysdev->id 383 * slightly redundant with sysdev_register() 384 */ 385 sprintf(&name[0], "%s%d", MEMORY_CLASS_NAME, __section_nr(section)); 386 387 kobj = kset_find_obj(&memory_sysdev_class.kset, name); 388 if (!kobj) 389 return NULL; 390 391 sysdev = container_of(kobj, struct sys_device, kobj); 392 mem = container_of(sysdev, struct memory_block, sysdev); 393 394 return mem; 395} 396 397int remove_memory_block(unsigned long node_id, struct mem_section *section, 398 int phys_device) 399{ 400 struct memory_block *mem; 401 402 mem = find_memory_block(section); 403 mem_remove_simple_file(mem, phys_index); 404 mem_remove_simple_file(mem, state); 405 mem_remove_simple_file(mem, phys_device); 406 unregister_memory(mem, section, NULL); 407 408 return 0; 409} 410 411/* 412 * need an interface for the VM to add new memory regions, 413 * but without onlining it. 414 */ 415int register_new_memory(struct mem_section *section) 416{ 417 return add_memory_block(0, section, MEM_OFFLINE, 0); 418} 419 420int unregister_memory_section(struct mem_section *section) 421{ 422 if (!valid_section(section)) 423 return -EINVAL; 424 425 return remove_memory_block(0, section, 0); 426} 427 428/* 429 * Initialize the sysfs support for memory devices... 430 */ 431int __init memory_dev_init(void) 432{ 433 unsigned int i; 434 int ret; 435 int err; 436 437 memory_sysdev_class.kset.uevent_ops = &memory_uevent_ops; 438 ret = sysdev_class_register(&memory_sysdev_class); 439 if (ret) 440 goto out; 441 442 /* 443 * Create entries for memory sections that were found 444 * during boot and have been initialized 445 */ 446 for (i = 0; i < NR_MEM_SECTIONS; i++) { 447 if (!valid_section_nr(i)) 448 continue; 449 err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE, 0); 450 if (!ret) 451 ret = err; 452 } 453 454 err = memory_probe_init(); 455 if (!ret) 456 ret = err; 457 err = block_size_init(); 458 if (!ret) 459 ret = err; 460out: 461 if (ret) 462 printk(KERN_ERR "%s() failed: %d\n", __FUNCTION__, ret); 463 return ret; 464} 465