1/* 2 * xvmalloc memory allocator 3 * 4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta 5 * 6 * This code is released using a dual license strategy: BSD/GPL 7 * You can choose the licence that better fits your requirements. 8 * 9 * Released under the terms of 3-clause BSD License 10 * Released under the terms of GNU General Public License Version 2.0 11 */ 12 13#include <linux/bitops.h> 14#include <linux/errno.h> 15#include <linux/highmem.h> 16#include <linux/init.h> 17#include <linux/string.h> 18#include <linux/slab.h> 19 20#include "xvmalloc.h" 21#include "xvmalloc_int.h" 22 23static void stat_inc(u64 *value) 24{ 25 *value = *value + 1; 26} 27 28static void stat_dec(u64 *value) 29{ 30 *value = *value - 1; 31} 32 33static int test_flag(struct block_header *block, enum blockflags flag) 34{ 35 return block->prev & BIT(flag); 36} 37 38static void set_flag(struct block_header *block, enum blockflags flag) 39{ 40 block->prev |= BIT(flag); 41} 42 43static void clear_flag(struct block_header *block, enum blockflags flag) 44{ 45 block->prev &= ~BIT(flag); 46} 47 48/* 49 * Given <page, offset> pair, provide a derefrencable pointer. 50 * This is called from xv_malloc/xv_free path, so it 51 * needs to be fast. 52 */ 53static void *get_ptr_atomic(struct page *page, u16 offset, enum km_type type) 54{ 55 unsigned char *base; 56 57 base = kmap_atomic(page, type); 58 return base + offset; 59} 60 61static void put_ptr_atomic(void *ptr, enum km_type type) 62{ 63 kunmap_atomic(ptr, type); 64} 65 66static u32 get_blockprev(struct block_header *block) 67{ 68 return block->prev & PREV_MASK; 69} 70 71static void set_blockprev(struct block_header *block, u16 new_offset) 72{ 73 block->prev = new_offset | (block->prev & FLAGS_MASK); 74} 75 76static struct block_header *BLOCK_NEXT(struct block_header *block) 77{ 78 return (struct block_header *) 79 ((char *)block + block->size + XV_ALIGN); 80} 81 82/* 83 * Get index of free list containing blocks of maximum size 84 * which is less than or equal to given size. 85 */ 86static u32 get_index_for_insert(u32 size) 87{ 88 if (unlikely(size > XV_MAX_ALLOC_SIZE)) 89 size = XV_MAX_ALLOC_SIZE; 90 size &= ~FL_DELTA_MASK; 91 return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT; 92} 93 94/* 95 * Get index of free list having blocks of size greater than 96 * or equal to requested size. 97 */ 98static u32 get_index(u32 size) 99{ 100 if (unlikely(size < XV_MIN_ALLOC_SIZE)) 101 size = XV_MIN_ALLOC_SIZE; 102 size = ALIGN(size, FL_DELTA); 103 return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT; 104} 105 106/** 107 * find_block - find block of at least given size 108 * @pool: memory pool to search from 109 * @size: size of block required 110 * @page: page containing required block 111 * @offset: offset within the page where block is located. 112 * 113 * Searches two level bitmap to locate block of at least 114 * the given size. If such a block is found, it provides 115 * <page, offset> to identify this block and returns index 116 * in freelist where we found this block. 117 * Otherwise, returns 0 and <page, offset> params are not touched. 118 */ 119static u32 find_block(struct xv_pool *pool, u32 size, 120 struct page **page, u32 *offset) 121{ 122 ulong flbitmap, slbitmap; 123 u32 flindex, slindex, slbitstart; 124 125 /* There are no free blocks in this pool */ 126 if (!pool->flbitmap) 127 return 0; 128 129 /* Get freelist index correspoding to this size */ 130 slindex = get_index(size); 131 slbitmap = pool->slbitmap[slindex / BITS_PER_LONG]; 132 slbitstart = slindex % BITS_PER_LONG; 133 134 /* 135 * If freelist is not empty at this index, we found the 136 * block - head of this list. This is approximate best-fit match. 137 */ 138 if (test_bit(slbitstart, &slbitmap)) { 139 *page = pool->freelist[slindex].page; 140 *offset = pool->freelist[slindex].offset; 141 return slindex; 142 } 143 144 /* 145 * No best-fit found. Search a bit further in bitmap for a free block. 146 * Second level bitmap consists of series of 32-bit chunks. Search 147 * further in the chunk where we expected a best-fit, starting from 148 * index location found above. 149 */ 150 slbitstart++; 151 slbitmap >>= slbitstart; 152 153 /* Skip this search if we were already at end of this bitmap chunk */ 154 if ((slbitstart != BITS_PER_LONG) && slbitmap) { 155 slindex += __ffs(slbitmap) + 1; 156 *page = pool->freelist[slindex].page; 157 *offset = pool->freelist[slindex].offset; 158 return slindex; 159 } 160 161 /* Now do a full two-level bitmap search to find next nearest fit */ 162 flindex = slindex / BITS_PER_LONG; 163 164 flbitmap = (pool->flbitmap) >> (flindex + 1); 165 if (!flbitmap) 166 return 0; 167 168 flindex += __ffs(flbitmap) + 1; 169 slbitmap = pool->slbitmap[flindex]; 170 slindex = (flindex * BITS_PER_LONG) + __ffs(slbitmap); 171 *page = pool->freelist[slindex].page; 172 *offset = pool->freelist[slindex].offset; 173 174 return slindex; 175} 176 177/* 178 * Insert block at <page, offset> in freelist of given pool. 179 * freelist used depends on block size. 180 */ 181static void insert_block(struct xv_pool *pool, struct page *page, u32 offset, 182 struct block_header *block) 183{ 184 u32 flindex, slindex; 185 struct block_header *nextblock; 186 187 slindex = get_index_for_insert(block->size); 188 flindex = slindex / BITS_PER_LONG; 189 190 block->link.prev_page = 0; 191 block->link.prev_offset = 0; 192 block->link.next_page = pool->freelist[slindex].page; 193 block->link.next_offset = pool->freelist[slindex].offset; 194 pool->freelist[slindex].page = page; 195 pool->freelist[slindex].offset = offset; 196 197 if (block->link.next_page) { 198 nextblock = get_ptr_atomic(block->link.next_page, 199 block->link.next_offset, KM_USER1); 200 nextblock->link.prev_page = page; 201 nextblock->link.prev_offset = offset; 202 put_ptr_atomic(nextblock, KM_USER1); 203 } 204 205 __set_bit(slindex % BITS_PER_LONG, &pool->slbitmap[flindex]); 206 __set_bit(flindex, &pool->flbitmap); 207} 208 209/* 210 * Remove block from head of freelist. Index 'slindex' identifies the freelist. 211 */ 212static void remove_block_head(struct xv_pool *pool, 213 struct block_header *block, u32 slindex) 214{ 215 struct block_header *tmpblock; 216 u32 flindex = slindex / BITS_PER_LONG; 217 218 pool->freelist[slindex].page = block->link.next_page; 219 pool->freelist[slindex].offset = block->link.next_offset; 220 block->link.prev_page = 0; 221 block->link.prev_offset = 0; 222 223 if (!pool->freelist[slindex].page) { 224 __clear_bit(slindex % BITS_PER_LONG, &pool->slbitmap[flindex]); 225 if (!pool->slbitmap[flindex]) 226 __clear_bit(flindex, &pool->flbitmap); 227 } else { 228 /* 229 * DEBUG ONLY: We need not reinitialize freelist head previous 230 * pointer to 0 - we never depend on its value. But just for 231 * sanity, lets do it. 232 */ 233 tmpblock = get_ptr_atomic(pool->freelist[slindex].page, 234 pool->freelist[slindex].offset, KM_USER1); 235 tmpblock->link.prev_page = 0; 236 tmpblock->link.prev_offset = 0; 237 put_ptr_atomic(tmpblock, KM_USER1); 238 } 239} 240 241/* 242 * Remove block from freelist. Index 'slindex' identifies the freelist. 243 */ 244static void remove_block(struct xv_pool *pool, struct page *page, u32 offset, 245 struct block_header *block, u32 slindex) 246{ 247 u32 flindex; 248 struct block_header *tmpblock; 249 250 if (pool->freelist[slindex].page == page 251 && pool->freelist[slindex].offset == offset) { 252 remove_block_head(pool, block, slindex); 253 return; 254 } 255 256 flindex = slindex / BITS_PER_LONG; 257 258 if (block->link.prev_page) { 259 tmpblock = get_ptr_atomic(block->link.prev_page, 260 block->link.prev_offset, KM_USER1); 261 tmpblock->link.next_page = block->link.next_page; 262 tmpblock->link.next_offset = block->link.next_offset; 263 put_ptr_atomic(tmpblock, KM_USER1); 264 } 265 266 if (block->link.next_page) { 267 tmpblock = get_ptr_atomic(block->link.next_page, 268 block->link.next_offset, KM_USER1); 269 tmpblock->link.prev_page = block->link.prev_page; 270 tmpblock->link.prev_offset = block->link.prev_offset; 271 put_ptr_atomic(tmpblock, KM_USER1); 272 } 273} 274 275/* 276 * Allocate a page and add it to freelist of given pool. 277 */ 278static int grow_pool(struct xv_pool *pool, gfp_t flags) 279{ 280 struct page *page; 281 struct block_header *block; 282 283 page = alloc_page(flags); 284 if (unlikely(!page)) 285 return -ENOMEM; 286 287 stat_inc(&pool->total_pages); 288 289 spin_lock(&pool->lock); 290 block = get_ptr_atomic(page, 0, KM_USER0); 291 292 block->size = PAGE_SIZE - XV_ALIGN; 293 set_flag(block, BLOCK_FREE); 294 clear_flag(block, PREV_FREE); 295 set_blockprev(block, 0); 296 297 insert_block(pool, page, 0, block); 298 299 put_ptr_atomic(block, KM_USER0); 300 spin_unlock(&pool->lock); 301 302 return 0; 303} 304 305/* 306 * Create a memory pool. Allocates freelist, bitmaps and other 307 * per-pool metadata. 308 */ 309struct xv_pool *xv_create_pool(void) 310{ 311 u32 ovhd_size; 312 struct xv_pool *pool; 313 314 ovhd_size = roundup(sizeof(*pool), PAGE_SIZE); 315 pool = kzalloc(ovhd_size, GFP_KERNEL); 316 if (!pool) 317 return NULL; 318 319 spin_lock_init(&pool->lock); 320 321 return pool; 322} 323 324void xv_destroy_pool(struct xv_pool *pool) 325{ 326 kfree(pool); 327} 328 329/** 330 * xv_malloc - Allocate block of given size from pool. 331 * @pool: pool to allocate from 332 * @size: size of block to allocate 333 * @page: page no. that holds the object 334 * @offset: location of object within page 335 * 336 * On success, <page, offset> identifies block allocated 337 * and 0 is returned. On failure, <page, offset> is set to 338 * 0 and -ENOMEM is returned. 339 * 340 * Allocation requests with size > XV_MAX_ALLOC_SIZE will fail. 341 */ 342int xv_malloc(struct xv_pool *pool, u32 size, struct page **page, 343 u32 *offset, gfp_t flags) 344{ 345 int error; 346 u32 index, tmpsize, origsize, tmpoffset; 347 struct block_header *block, *tmpblock; 348 349 *page = NULL; 350 *offset = 0; 351 origsize = size; 352 353 if (unlikely(!size || size > XV_MAX_ALLOC_SIZE)) 354 return -ENOMEM; 355 356 size = ALIGN(size, XV_ALIGN); 357 358 spin_lock(&pool->lock); 359 360 index = find_block(pool, size, page, offset); 361 362 if (!*page) { 363 spin_unlock(&pool->lock); 364 if (flags & GFP_NOWAIT) 365 return -ENOMEM; 366 error = grow_pool(pool, flags); 367 if (unlikely(error)) 368 return error; 369 370 spin_lock(&pool->lock); 371 index = find_block(pool, size, page, offset); 372 } 373 374 if (!*page) { 375 spin_unlock(&pool->lock); 376 return -ENOMEM; 377 } 378 379 block = get_ptr_atomic(*page, *offset, KM_USER0); 380 381 remove_block_head(pool, block, index); 382 383 /* Split the block if required */ 384 tmpoffset = *offset + size + XV_ALIGN; 385 tmpsize = block->size - size; 386 tmpblock = (struct block_header *)((char *)block + size + XV_ALIGN); 387 if (tmpsize) { 388 tmpblock->size = tmpsize - XV_ALIGN; 389 set_flag(tmpblock, BLOCK_FREE); 390 clear_flag(tmpblock, PREV_FREE); 391 392 set_blockprev(tmpblock, *offset); 393 if (tmpblock->size >= XV_MIN_ALLOC_SIZE) 394 insert_block(pool, *page, tmpoffset, tmpblock); 395 396 if (tmpoffset + XV_ALIGN + tmpblock->size != PAGE_SIZE) { 397 tmpblock = BLOCK_NEXT(tmpblock); 398 set_blockprev(tmpblock, tmpoffset); 399 } 400 } else { 401 /* This block is exact fit */ 402 if (tmpoffset != PAGE_SIZE) 403 clear_flag(tmpblock, PREV_FREE); 404 } 405 406 block->size = origsize; 407 clear_flag(block, BLOCK_FREE); 408 409 put_ptr_atomic(block, KM_USER0); 410 spin_unlock(&pool->lock); 411 412 *offset += XV_ALIGN; 413 414 return 0; 415} 416 417/* 418 * Free block identified with <page, offset> 419 */ 420void xv_free(struct xv_pool *pool, struct page *page, u32 offset) 421{ 422 void *page_start; 423 struct block_header *block, *tmpblock; 424 425 offset -= XV_ALIGN; 426 427 spin_lock(&pool->lock); 428 429 page_start = get_ptr_atomic(page, 0, KM_USER0); 430 block = (struct block_header *)((char *)page_start + offset); 431 432 /* Catch double free bugs */ 433 BUG_ON(test_flag(block, BLOCK_FREE)); 434 435 block->size = ALIGN(block->size, XV_ALIGN); 436 437 tmpblock = BLOCK_NEXT(block); 438 if (offset + block->size + XV_ALIGN == PAGE_SIZE) 439 tmpblock = NULL; 440 441 /* Merge next block if its free */ 442 if (tmpblock && test_flag(tmpblock, BLOCK_FREE)) { 443 /* 444 * Blocks smaller than XV_MIN_ALLOC_SIZE 445 * are not inserted in any free list. 446 */ 447 if (tmpblock->size >= XV_MIN_ALLOC_SIZE) { 448 remove_block(pool, page, 449 offset + block->size + XV_ALIGN, tmpblock, 450 get_index_for_insert(tmpblock->size)); 451 } 452 block->size += tmpblock->size + XV_ALIGN; 453 } 454 455 /* Merge previous block if its free */ 456 if (test_flag(block, PREV_FREE)) { 457 tmpblock = (struct block_header *)((char *)(page_start) + 458 get_blockprev(block)); 459 offset = offset - tmpblock->size - XV_ALIGN; 460 461 if (tmpblock->size >= XV_MIN_ALLOC_SIZE) 462 remove_block(pool, page, offset, tmpblock, 463 get_index_for_insert(tmpblock->size)); 464 465 tmpblock->size += block->size + XV_ALIGN; 466 block = tmpblock; 467 } 468 469 /* No used objects in this page. Free it. */ 470 if (block->size == PAGE_SIZE - XV_ALIGN) { 471 put_ptr_atomic(page_start, KM_USER0); 472 spin_unlock(&pool->lock); 473 474 __free_page(page); 475 stat_dec(&pool->total_pages); 476 return; 477 } 478 479 set_flag(block, BLOCK_FREE); 480 if (block->size >= XV_MIN_ALLOC_SIZE) 481 insert_block(pool, page, offset, block); 482 483 if (offset + block->size + XV_ALIGN != PAGE_SIZE) { 484 tmpblock = BLOCK_NEXT(block); 485 set_flag(tmpblock, PREV_FREE); 486 set_blockprev(tmpblock, offset); 487 } 488 489 put_ptr_atomic(page_start, KM_USER0); 490 spin_unlock(&pool->lock); 491} 492 493u32 xv_get_object_size(void *obj) 494{ 495 struct block_header *blk; 496 497 blk = (struct block_header *)((char *)(obj) - XV_ALIGN); 498 return blk->size; 499} 500 501/* 502 * Returns total memory used by allocator (userdata + metadata) 503 */ 504u64 xv_get_total_size_bytes(struct xv_pool *pool) 505{ 506 return pool->total_pages << PAGE_SHIFT; 507} 508