1/* SPDX-License-Identifier: GPL-2.0 */ 2#ifndef MM_SLAB_H 3#define MM_SLAB_H 4 5#include <linux/reciprocal_div.h> 6#include <linux/list_lru.h> 7#include <linux/local_lock.h> 8#include <linux/random.h> 9#include <linux/kobject.h> 10#include <linux/sched/mm.h> 11#include <linux/memcontrol.h> 12#include <linux/kfence.h> 13#include <linux/kasan.h> 14 15/* 16 * Internal slab definitions 17 */ 18 19#ifdef CONFIG_64BIT 20# ifdef system_has_cmpxchg128 21# define system_has_freelist_aba() system_has_cmpxchg128() 22# define try_cmpxchg_freelist try_cmpxchg128 23# endif 24#define this_cpu_try_cmpxchg_freelist this_cpu_try_cmpxchg128 25typedef u128 freelist_full_t; 26#else /* CONFIG_64BIT */ 27# ifdef system_has_cmpxchg64 28# define system_has_freelist_aba() system_has_cmpxchg64() 29# define try_cmpxchg_freelist try_cmpxchg64 30# endif 31#define this_cpu_try_cmpxchg_freelist this_cpu_try_cmpxchg64 32typedef u64 freelist_full_t; 33#endif /* CONFIG_64BIT */ 34 35#if defined(system_has_freelist_aba) && !defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) 36#undef system_has_freelist_aba 37#endif 38 39/* 40 * Freelist pointer and counter to cmpxchg together, avoids the typical ABA 41 * problems with cmpxchg of just a pointer. 42 */ 43typedef union { 44 struct { 45 void *freelist; 46 unsigned long counter; 47 }; 48 freelist_full_t full; 49} freelist_aba_t; 50 51/* Reuses the bits in struct page */ 52struct slab { 53 unsigned long __page_flags; 54 55 struct kmem_cache *slab_cache; 56 union { 57 struct { 58 union { 59 struct list_head slab_list; 60#ifdef CONFIG_SLUB_CPU_PARTIAL 61 struct { 62 struct slab *next; 63 int slabs; /* Nr of slabs left */ 64 }; 65#endif 66 }; 67 /* Double-word boundary */ 68 union { 69 struct { 70 void *freelist; /* first free object */ 71 union { 72 unsigned long counters; 73 struct { 74 unsigned inuse:16; 75 unsigned objects:15; 76 unsigned frozen:1; 77 }; 78 }; 79 }; 80#ifdef system_has_freelist_aba 81 freelist_aba_t freelist_counter; 82#endif 83 }; 84 }; 85 struct rcu_head rcu_head; 86 }; 87 unsigned int __unused; 88 89 atomic_t __page_refcount; 90#ifdef CONFIG_MEMCG 91 unsigned long memcg_data; 92#endif 93}; 94 95#define SLAB_MATCH(pg, sl) \ 96 static_assert(offsetof(struct page, pg) == offsetof(struct slab, sl)) 97SLAB_MATCH(flags, __page_flags); 98SLAB_MATCH(compound_head, slab_cache); /* Ensure bit 0 is clear */ 99SLAB_MATCH(_refcount, __page_refcount); 100#ifdef CONFIG_MEMCG 101SLAB_MATCH(memcg_data, memcg_data); 102#endif 103#undef SLAB_MATCH 104static_assert(sizeof(struct slab) <= sizeof(struct page)); 105#if defined(system_has_freelist_aba) 106static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(freelist_aba_t))); 107#endif 108 109/** 110 * folio_slab - Converts from folio to slab. 111 * @folio: The folio. 112 * 113 * Currently struct slab is a different representation of a folio where 114 * folio_test_slab() is true. 115 * 116 * Return: The slab which contains this folio. 117 */ 118#define folio_slab(folio) (_Generic((folio), \ 119 const struct folio *: (const struct slab *)(folio), \ 120 struct folio *: (struct slab *)(folio))) 121 122/** 123 * slab_folio - The folio allocated for a slab 124 * @slab: The slab. 125 * 126 * Slabs are allocated as folios that contain the individual objects and are 127 * using some fields in the first struct page of the folio - those fields are 128 * now accessed by struct slab. It is occasionally necessary to convert back to 129 * a folio in order to communicate with the rest of the mm. Please use this 130 * helper function instead of casting yourself, as the implementation may change 131 * in the future. 132 */ 133#define slab_folio(s) (_Generic((s), \ 134 const struct slab *: (const struct folio *)s, \ 135 struct slab *: (struct folio *)s)) 136 137/** 138 * page_slab - Converts from first struct page to slab. 139 * @p: The first (either head of compound or single) page of slab. 140 * 141 * A temporary wrapper to convert struct page to struct slab in situations where 142 * we know the page is the compound head, or single order-0 page. 143 * 144 * Long-term ideally everything would work with struct slab directly or go 145 * through folio to struct slab. 146 * 147 * Return: The slab which contains this page 148 */ 149#define page_slab(p) (_Generic((p), \ 150 const struct page *: (const struct slab *)(p), \ 151 struct page *: (struct slab *)(p))) 152 153/** 154 * slab_page - The first struct page allocated for a slab 155 * @slab: The slab. 156 * 157 * A convenience wrapper for converting slab to the first struct page of the 158 * underlying folio, to communicate with code not yet converted to folio or 159 * struct slab. 160 */ 161#define slab_page(s) folio_page(slab_folio(s), 0) 162 163/* 164 * If network-based swap is enabled, sl*b must keep track of whether pages 165 * were allocated from pfmemalloc reserves. 166 */ 167static inline bool slab_test_pfmemalloc(const struct slab *slab) 168{ 169 return folio_test_active((struct folio *)slab_folio(slab)); 170} 171 172static inline void slab_set_pfmemalloc(struct slab *slab) 173{ 174 folio_set_active(slab_folio(slab)); 175} 176 177static inline void slab_clear_pfmemalloc(struct slab *slab) 178{ 179 folio_clear_active(slab_folio(slab)); 180} 181 182static inline void __slab_clear_pfmemalloc(struct slab *slab) 183{ 184 __folio_clear_active(slab_folio(slab)); 185} 186 187static inline void *slab_address(const struct slab *slab) 188{ 189 return folio_address(slab_folio(slab)); 190} 191 192static inline int slab_nid(const struct slab *slab) 193{ 194 return folio_nid(slab_folio(slab)); 195} 196 197static inline pg_data_t *slab_pgdat(const struct slab *slab) 198{ 199 return folio_pgdat(slab_folio(slab)); 200} 201 202static inline struct slab *virt_to_slab(const void *addr) 203{ 204 struct folio *folio = virt_to_folio(addr); 205 206 if (!folio_test_slab(folio)) 207 return NULL; 208 209 return folio_slab(folio); 210} 211 212static inline int slab_order(const struct slab *slab) 213{ 214 return folio_order((struct folio *)slab_folio(slab)); 215} 216 217static inline size_t slab_size(const struct slab *slab) 218{ 219 return PAGE_SIZE << slab_order(slab); 220} 221 222#ifdef CONFIG_SLUB_CPU_PARTIAL 223#define slub_percpu_partial(c) ((c)->partial) 224 225#define slub_set_percpu_partial(c, p) \ 226({ \ 227 slub_percpu_partial(c) = (p)->next; \ 228}) 229 230#define slub_percpu_partial_read_once(c) READ_ONCE(slub_percpu_partial(c)) 231#else 232#define slub_percpu_partial(c) NULL 233 234#define slub_set_percpu_partial(c, p) 235 236#define slub_percpu_partial_read_once(c) NULL 237#endif // CONFIG_SLUB_CPU_PARTIAL 238 239/* 240 * Word size structure that can be atomically updated or read and that 241 * contains both the order and the number of objects that a slab of the 242 * given order would contain. 243 */ 244struct kmem_cache_order_objects { 245 unsigned int x; 246}; 247 248/* 249 * Slab cache management. 250 */ 251struct kmem_cache { 252#ifndef CONFIG_SLUB_TINY 253 struct kmem_cache_cpu __percpu *cpu_slab; 254#endif 255 /* Used for retrieving partial slabs, etc. */ 256 slab_flags_t flags; 257 unsigned long min_partial; 258 unsigned int size; /* Object size including metadata */ 259 unsigned int object_size; /* Object size without metadata */ 260 struct reciprocal_value reciprocal_size; 261 unsigned int offset; /* Free pointer offset */ 262#ifdef CONFIG_SLUB_CPU_PARTIAL 263 /* Number of per cpu partial objects to keep around */ 264 unsigned int cpu_partial; 265 /* Number of per cpu partial slabs to keep around */ 266 unsigned int cpu_partial_slabs; 267#endif 268 struct kmem_cache_order_objects oo; 269 270 /* Allocation and freeing of slabs */ 271 struct kmem_cache_order_objects min; 272 gfp_t allocflags; /* gfp flags to use on each alloc */ 273 int refcount; /* Refcount for slab cache destroy */ 274 void (*ctor)(void *object); /* Object constructor */ 275 unsigned int inuse; /* Offset to metadata */ 276 unsigned int align; /* Alignment */ 277 unsigned int red_left_pad; /* Left redzone padding size */ 278 const char *name; /* Name (only for display!) */ 279 struct list_head list; /* List of slab caches */ 280#ifdef CONFIG_SYSFS 281 struct kobject kobj; /* For sysfs */ 282#endif 283#ifdef CONFIG_SLAB_FREELIST_HARDENED 284 unsigned long random; 285#endif 286 287#ifdef CONFIG_NUMA 288 /* 289 * Defragmentation by allocating from a remote node. 290 */ 291 unsigned int remote_node_defrag_ratio; 292#endif 293 294#ifdef CONFIG_SLAB_FREELIST_RANDOM 295 unsigned int *random_seq; 296#endif 297 298#ifdef CONFIG_KASAN_GENERIC 299 struct kasan_cache kasan_info; 300#endif 301 302#ifdef CONFIG_HARDENED_USERCOPY 303 unsigned int useroffset; /* Usercopy region offset */ 304 unsigned int usersize; /* Usercopy region size */ 305#endif 306 307 struct kmem_cache_node *node[MAX_NUMNODES]; 308}; 309 310#if defined(CONFIG_SYSFS) && !defined(CONFIG_SLUB_TINY) 311#define SLAB_SUPPORTS_SYSFS 312void sysfs_slab_unlink(struct kmem_cache *s); 313void sysfs_slab_release(struct kmem_cache *s); 314#else 315static inline void sysfs_slab_unlink(struct kmem_cache *s) { } 316static inline void sysfs_slab_release(struct kmem_cache *s) { } 317#endif 318 319void *fixup_red_left(struct kmem_cache *s, void *p); 320 321static inline void *nearest_obj(struct kmem_cache *cache, 322 const struct slab *slab, void *x) 323{ 324 void *object = x - (x - slab_address(slab)) % cache->size; 325 void *last_object = slab_address(slab) + 326 (slab->objects - 1) * cache->size; 327 void *result = (unlikely(object > last_object)) ? last_object : object; 328 329 result = fixup_red_left(cache, result); 330 return result; 331} 332 333/* Determine object index from a given position */ 334static inline unsigned int __obj_to_index(const struct kmem_cache *cache, 335 void *addr, void *obj) 336{ 337 return reciprocal_divide(kasan_reset_tag(obj) - addr, 338 cache->reciprocal_size); 339} 340 341static inline unsigned int obj_to_index(const struct kmem_cache *cache, 342 const struct slab *slab, void *obj) 343{ 344 if (is_kfence_address(obj)) 345 return 0; 346 return __obj_to_index(cache, slab_address(slab), obj); 347} 348 349static inline int objs_per_slab(const struct kmem_cache *cache, 350 const struct slab *slab) 351{ 352 return slab->objects; 353} 354 355/* 356 * State of the slab allocator. 357 * 358 * This is used to describe the states of the allocator during bootup. 359 * Allocators use this to gradually bootstrap themselves. Most allocators 360 * have the problem that the structures used for managing slab caches are 361 * allocated from slab caches themselves. 362 */ 363enum slab_state { 364 DOWN, /* No slab functionality yet */ 365 PARTIAL, /* SLUB: kmem_cache_node available */ 366 UP, /* Slab caches usable but not all extras yet */ 367 FULL /* Everything is working */ 368}; 369 370extern enum slab_state slab_state; 371 372/* The slab cache mutex protects the management structures during changes */ 373extern struct mutex slab_mutex; 374 375/* The list of all slab caches on the system */ 376extern struct list_head slab_caches; 377 378/* The slab cache that manages slab cache information */ 379extern struct kmem_cache *kmem_cache; 380 381/* A table of kmalloc cache names and sizes */ 382extern const struct kmalloc_info_struct { 383 const char *name[NR_KMALLOC_TYPES]; 384 unsigned int size; 385} kmalloc_info[]; 386 387/* Kmalloc array related functions */ 388void setup_kmalloc_cache_index_table(void); 389void create_kmalloc_caches(void); 390 391extern u8 kmalloc_size_index[24]; 392 393static inline unsigned int size_index_elem(unsigned int bytes) 394{ 395 return (bytes - 1) / 8; 396} 397 398/* 399 * Find the kmem_cache structure that serves a given size of 400 * allocation 401 * 402 * This assumes size is larger than zero and not larger than 403 * KMALLOC_MAX_CACHE_SIZE and the caller must check that. 404 */ 405static inline struct kmem_cache * 406kmalloc_slab(size_t size, gfp_t flags, unsigned long caller) 407{ 408 unsigned int index; 409 410 if (size <= 192) 411 index = kmalloc_size_index[size_index_elem(size)]; 412 else 413 index = fls(size - 1); 414 415 return kmalloc_caches[kmalloc_type(flags, caller)][index]; 416} 417 418gfp_t kmalloc_fix_flags(gfp_t flags); 419 420/* Functions provided by the slab allocators */ 421int __kmem_cache_create(struct kmem_cache *, slab_flags_t flags); 422 423void __init kmem_cache_init(void); 424extern void create_boot_cache(struct kmem_cache *, const char *name, 425 unsigned int size, slab_flags_t flags, 426 unsigned int useroffset, unsigned int usersize); 427 428int slab_unmergeable(struct kmem_cache *s); 429struct kmem_cache *find_mergeable(unsigned size, unsigned align, 430 slab_flags_t flags, const char *name, void (*ctor)(void *)); 431struct kmem_cache * 432__kmem_cache_alias(const char *name, unsigned int size, unsigned int align, 433 slab_flags_t flags, void (*ctor)(void *)); 434 435slab_flags_t kmem_cache_flags(slab_flags_t flags, const char *name); 436 437static inline bool is_kmalloc_cache(struct kmem_cache *s) 438{ 439 return (s->flags & SLAB_KMALLOC); 440} 441 442/* Legal flag mask for kmem_cache_create(), for various configurations */ 443#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | \ 444 SLAB_CACHE_DMA32 | SLAB_PANIC | \ 445 SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS ) 446 447#ifdef CONFIG_SLUB_DEBUG 448#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ 449 SLAB_TRACE | SLAB_CONSISTENCY_CHECKS) 450#else 451#define SLAB_DEBUG_FLAGS (0) 452#endif 453 454#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \ 455 SLAB_TEMPORARY | SLAB_ACCOUNT | \ 456 SLAB_NO_USER_FLAGS | SLAB_KMALLOC | SLAB_NO_MERGE) 457 458/* Common flags available with current configuration */ 459#define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS) 460 461/* Common flags permitted for kmem_cache_create */ 462#define SLAB_FLAGS_PERMITTED (SLAB_CORE_FLAGS | \ 463 SLAB_RED_ZONE | \ 464 SLAB_POISON | \ 465 SLAB_STORE_USER | \ 466 SLAB_TRACE | \ 467 SLAB_CONSISTENCY_CHECKS | \ 468 SLAB_NOLEAKTRACE | \ 469 SLAB_RECLAIM_ACCOUNT | \ 470 SLAB_TEMPORARY | \ 471 SLAB_ACCOUNT | \ 472 SLAB_KMALLOC | \ 473 SLAB_NO_MERGE | \ 474 SLAB_NO_USER_FLAGS) 475 476bool __kmem_cache_empty(struct kmem_cache *); 477int __kmem_cache_shutdown(struct kmem_cache *); 478void __kmem_cache_release(struct kmem_cache *); 479int __kmem_cache_shrink(struct kmem_cache *); 480void slab_kmem_cache_release(struct kmem_cache *); 481 482struct seq_file; 483struct file; 484 485struct slabinfo { 486 unsigned long active_objs; 487 unsigned long num_objs; 488 unsigned long active_slabs; 489 unsigned long num_slabs; 490 unsigned long shared_avail; 491 unsigned int limit; 492 unsigned int batchcount; 493 unsigned int shared; 494 unsigned int objects_per_slab; 495 unsigned int cache_order; 496}; 497 498void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo); 499void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s); 500ssize_t slabinfo_write(struct file *file, const char __user *buffer, 501 size_t count, loff_t *ppos); 502 503#ifdef CONFIG_SLUB_DEBUG 504#ifdef CONFIG_SLUB_DEBUG_ON 505DECLARE_STATIC_KEY_TRUE(slub_debug_enabled); 506#else 507DECLARE_STATIC_KEY_FALSE(slub_debug_enabled); 508#endif 509extern void print_tracking(struct kmem_cache *s, void *object); 510long validate_slab_cache(struct kmem_cache *s); 511static inline bool __slub_debug_enabled(void) 512{ 513 return static_branch_unlikely(&slub_debug_enabled); 514} 515#else 516static inline void print_tracking(struct kmem_cache *s, void *object) 517{ 518} 519static inline bool __slub_debug_enabled(void) 520{ 521 return false; 522} 523#endif 524 525/* 526 * Returns true if any of the specified slab_debug flags is enabled for the 527 * cache. Use only for flags parsed by setup_slub_debug() as it also enables 528 * the static key. 529 */ 530static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t flags) 531{ 532 if (IS_ENABLED(CONFIG_SLUB_DEBUG)) 533 VM_WARN_ON_ONCE(!(flags & SLAB_DEBUG_FLAGS)); 534 if (__slub_debug_enabled()) 535 return s->flags & flags; 536 return false; 537} 538 539#ifdef CONFIG_MEMCG_KMEM 540/* 541 * slab_objcgs - get the object cgroups vector associated with a slab 542 * @slab: a pointer to the slab struct 543 * 544 * Returns a pointer to the object cgroups vector associated with the slab, 545 * or NULL if no such vector has been associated yet. 546 */ 547static inline struct obj_cgroup **slab_objcgs(struct slab *slab) 548{ 549 unsigned long memcg_data = READ_ONCE(slab->memcg_data); 550 551 VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS), 552 slab_page(slab)); 553 VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, slab_page(slab)); 554 555 return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); 556} 557 558int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s, 559 gfp_t gfp, bool new_slab); 560void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, 561 enum node_stat_item idx, int nr); 562#else /* CONFIG_MEMCG_KMEM */ 563static inline struct obj_cgroup **slab_objcgs(struct slab *slab) 564{ 565 return NULL; 566} 567 568static inline int memcg_alloc_slab_cgroups(struct slab *slab, 569 struct kmem_cache *s, gfp_t gfp, 570 bool new_slab) 571{ 572 return 0; 573} 574#endif /* CONFIG_MEMCG_KMEM */ 575 576size_t __ksize(const void *objp); 577 578static inline size_t slab_ksize(const struct kmem_cache *s) 579{ 580#ifdef CONFIG_SLUB_DEBUG 581 /* 582 * Debugging requires use of the padding between object 583 * and whatever may come after it. 584 */ 585 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) 586 return s->object_size; 587#endif 588 if (s->flags & SLAB_KASAN) 589 return s->object_size; 590 /* 591 * If we have the need to store the freelist pointer 592 * back there or track user information then we can 593 * only use the space before that information. 594 */ 595 if (s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER)) 596 return s->inuse; 597 /* 598 * Else we can use all the padding etc for the allocation 599 */ 600 return s->size; 601} 602 603#ifdef CONFIG_SLUB_DEBUG 604void dump_unreclaimable_slab(void); 605#else 606static inline void dump_unreclaimable_slab(void) 607{ 608} 609#endif 610 611void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr); 612 613#ifdef CONFIG_SLAB_FREELIST_RANDOM 614int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count, 615 gfp_t gfp); 616void cache_random_seq_destroy(struct kmem_cache *cachep); 617#else 618static inline int cache_random_seq_create(struct kmem_cache *cachep, 619 unsigned int count, gfp_t gfp) 620{ 621 return 0; 622} 623static inline void cache_random_seq_destroy(struct kmem_cache *cachep) { } 624#endif /* CONFIG_SLAB_FREELIST_RANDOM */ 625 626static inline bool slab_want_init_on_alloc(gfp_t flags, struct kmem_cache *c) 627{ 628 if (static_branch_maybe(CONFIG_INIT_ON_ALLOC_DEFAULT_ON, 629 &init_on_alloc)) { 630 if (c->ctor) 631 return false; 632 if (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) 633 return flags & __GFP_ZERO; 634 return true; 635 } 636 return flags & __GFP_ZERO; 637} 638 639static inline bool slab_want_init_on_free(struct kmem_cache *c) 640{ 641 if (static_branch_maybe(CONFIG_INIT_ON_FREE_DEFAULT_ON, 642 &init_on_free)) 643 return !(c->ctor || 644 (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON))); 645 return false; 646} 647 648#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_SLUB_DEBUG) 649void debugfs_slab_release(struct kmem_cache *); 650#else 651static inline void debugfs_slab_release(struct kmem_cache *s) { } 652#endif 653 654#ifdef CONFIG_PRINTK 655#define KS_ADDRS_COUNT 16 656struct kmem_obj_info { 657 void *kp_ptr; 658 struct slab *kp_slab; 659 void *kp_objp; 660 unsigned long kp_data_offset; 661 struct kmem_cache *kp_slab_cache; 662 void *kp_ret; 663 void *kp_stack[KS_ADDRS_COUNT]; 664 void *kp_free_stack[KS_ADDRS_COUNT]; 665}; 666void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab); 667#endif 668 669void __check_heap_object(const void *ptr, unsigned long n, 670 const struct slab *slab, bool to_user); 671 672#ifdef CONFIG_SLUB_DEBUG 673void skip_orig_size_check(struct kmem_cache *s, const void *object); 674#endif 675 676#endif /* MM_SLAB_H */ 677