1/* 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18#include "xfs.h" 19#include "xfs_fs.h" 20#include "xfs_types.h" 21#include "xfs_bit.h" 22#include "xfs_log.h" 23#include "xfs_inum.h" 24#include "xfs_trans.h" 25#include "xfs_sb.h" 26#include "xfs_ag.h" 27#include "xfs_mount.h" 28#include "xfs_bmap_btree.h" 29#include "xfs_alloc_btree.h" 30#include "xfs_ialloc_btree.h" 31#include "xfs_dinode.h" 32#include "xfs_inode.h" 33#include "xfs_btree.h" 34#include "xfs_alloc.h" 35#include "xfs_error.h" 36#include "xfs_trace.h" 37 38 39#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b))) 40 41#define XFSA_FIXUP_BNO_OK 1 42#define XFSA_FIXUP_CNT_OK 2 43 44static int 45xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, 46 xfs_agblock_t bno, xfs_extlen_t len); 47 48/* 49 * Prototypes for per-ag allocation routines 50 */ 51 52STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); 53STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); 54STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); 55STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, 56 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); 57 58/* 59 * Internal functions. 60 */ 61 62/* 63 * Lookup the record equal to [bno, len] in the btree given by cur. 64 */ 65STATIC int /* error */ 66xfs_alloc_lookup_eq( 67 struct xfs_btree_cur *cur, /* btree cursor */ 68 xfs_agblock_t bno, /* starting block of extent */ 69 xfs_extlen_t len, /* length of extent */ 70 int *stat) /* success/failure */ 71{ 72 cur->bc_rec.a.ar_startblock = bno; 73 cur->bc_rec.a.ar_blockcount = len; 74 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); 75} 76 77/* 78 * Lookup the first record greater than or equal to [bno, len] 79 * in the btree given by cur. 80 */ 81STATIC int /* error */ 82xfs_alloc_lookup_ge( 83 struct xfs_btree_cur *cur, /* btree cursor */ 84 xfs_agblock_t bno, /* starting block of extent */ 85 xfs_extlen_t len, /* length of extent */ 86 int *stat) /* success/failure */ 87{ 88 cur->bc_rec.a.ar_startblock = bno; 89 cur->bc_rec.a.ar_blockcount = len; 90 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 91} 92 93/* 94 * Lookup the first record less than or equal to [bno, len] 95 * in the btree given by cur. 96 */ 97STATIC int /* error */ 98xfs_alloc_lookup_le( 99 struct xfs_btree_cur *cur, /* btree cursor */ 100 xfs_agblock_t bno, /* starting block of extent */ 101 xfs_extlen_t len, /* length of extent */ 102 int *stat) /* success/failure */ 103{ 104 cur->bc_rec.a.ar_startblock = bno; 105 cur->bc_rec.a.ar_blockcount = len; 106 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); 107} 108 109/* 110 * Update the record referred to by cur to the value given 111 * by [bno, len]. 112 * This either works (return 0) or gets an EFSCORRUPTED error. 113 */ 114STATIC int /* error */ 115xfs_alloc_update( 116 struct xfs_btree_cur *cur, /* btree cursor */ 117 xfs_agblock_t bno, /* starting block of extent */ 118 xfs_extlen_t len) /* length of extent */ 119{ 120 union xfs_btree_rec rec; 121 122 rec.alloc.ar_startblock = cpu_to_be32(bno); 123 rec.alloc.ar_blockcount = cpu_to_be32(len); 124 return xfs_btree_update(cur, &rec); 125} 126 127/* 128 * Get the data from the pointed-to record. 129 */ 130STATIC int /* error */ 131xfs_alloc_get_rec( 132 struct xfs_btree_cur *cur, /* btree cursor */ 133 xfs_agblock_t *bno, /* output: starting block of extent */ 134 xfs_extlen_t *len, /* output: length of extent */ 135 int *stat) /* output: success/failure */ 136{ 137 union xfs_btree_rec *rec; 138 int error; 139 140 error = xfs_btree_get_rec(cur, &rec, stat); 141 if (!error && *stat == 1) { 142 *bno = be32_to_cpu(rec->alloc.ar_startblock); 143 *len = be32_to_cpu(rec->alloc.ar_blockcount); 144 } 145 return error; 146} 147 148/* 149 * Compute aligned version of the found extent. 150 * Takes alignment and min length into account. 151 */ 152STATIC void 153xfs_alloc_compute_aligned( 154 xfs_agblock_t foundbno, /* starting block in found extent */ 155 xfs_extlen_t foundlen, /* length in found extent */ 156 xfs_extlen_t alignment, /* alignment for allocation */ 157 xfs_extlen_t minlen, /* minimum length for allocation */ 158 xfs_agblock_t *resbno, /* result block number */ 159 xfs_extlen_t *reslen) /* result length */ 160{ 161 xfs_agblock_t bno; 162 xfs_extlen_t diff; 163 xfs_extlen_t len; 164 165 if (alignment > 1 && foundlen >= minlen) { 166 bno = roundup(foundbno, alignment); 167 diff = bno - foundbno; 168 len = diff >= foundlen ? 0 : foundlen - diff; 169 } else { 170 bno = foundbno; 171 len = foundlen; 172 } 173 *resbno = bno; 174 *reslen = len; 175} 176 177/* 178 * Compute best start block and diff for "near" allocations. 179 * freelen >= wantlen already checked by caller. 180 */ 181STATIC xfs_extlen_t /* difference value (absolute) */ 182xfs_alloc_compute_diff( 183 xfs_agblock_t wantbno, /* target starting block */ 184 xfs_extlen_t wantlen, /* target length */ 185 xfs_extlen_t alignment, /* target alignment */ 186 xfs_agblock_t freebno, /* freespace's starting block */ 187 xfs_extlen_t freelen, /* freespace's length */ 188 xfs_agblock_t *newbnop) /* result: best start block from free */ 189{ 190 xfs_agblock_t freeend; /* end of freespace extent */ 191 xfs_agblock_t newbno1; /* return block number */ 192 xfs_agblock_t newbno2; /* other new block number */ 193 xfs_extlen_t newlen1=0; /* length with newbno1 */ 194 xfs_extlen_t newlen2=0; /* length with newbno2 */ 195 xfs_agblock_t wantend; /* end of target extent */ 196 197 ASSERT(freelen >= wantlen); 198 freeend = freebno + freelen; 199 wantend = wantbno + wantlen; 200 if (freebno >= wantbno) { 201 if ((newbno1 = roundup(freebno, alignment)) >= freeend) 202 newbno1 = NULLAGBLOCK; 203 } else if (freeend >= wantend && alignment > 1) { 204 newbno1 = roundup(wantbno, alignment); 205 newbno2 = newbno1 - alignment; 206 if (newbno1 >= freeend) 207 newbno1 = NULLAGBLOCK; 208 else 209 newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1); 210 if (newbno2 < freebno) 211 newbno2 = NULLAGBLOCK; 212 else 213 newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2); 214 if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) { 215 if (newlen1 < newlen2 || 216 (newlen1 == newlen2 && 217 XFS_ABSDIFF(newbno1, wantbno) > 218 XFS_ABSDIFF(newbno2, wantbno))) 219 newbno1 = newbno2; 220 } else if (newbno2 != NULLAGBLOCK) 221 newbno1 = newbno2; 222 } else if (freeend >= wantend) { 223 newbno1 = wantbno; 224 } else if (alignment > 1) { 225 newbno1 = roundup(freeend - wantlen, alignment); 226 if (newbno1 > freeend - wantlen && 227 newbno1 - alignment >= freebno) 228 newbno1 -= alignment; 229 else if (newbno1 >= freeend) 230 newbno1 = NULLAGBLOCK; 231 } else 232 newbno1 = freeend - wantlen; 233 *newbnop = newbno1; 234 return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno); 235} 236 237/* 238 * Fix up the length, based on mod and prod. 239 * len should be k * prod + mod for some k. 240 * If len is too small it is returned unchanged. 241 * If len hits maxlen it is left alone. 242 */ 243STATIC void 244xfs_alloc_fix_len( 245 xfs_alloc_arg_t *args) /* allocation argument structure */ 246{ 247 xfs_extlen_t k; 248 xfs_extlen_t rlen; 249 250 ASSERT(args->mod < args->prod); 251 rlen = args->len; 252 ASSERT(rlen >= args->minlen); 253 ASSERT(rlen <= args->maxlen); 254 if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen || 255 (args->mod == 0 && rlen < args->prod)) 256 return; 257 k = rlen % args->prod; 258 if (k == args->mod) 259 return; 260 if (k > args->mod) { 261 if ((int)(rlen = rlen - k - args->mod) < (int)args->minlen) 262 return; 263 } else { 264 if ((int)(rlen = rlen - args->prod - (args->mod - k)) < 265 (int)args->minlen) 266 return; 267 } 268 ASSERT(rlen >= args->minlen); 269 ASSERT(rlen <= args->maxlen); 270 args->len = rlen; 271} 272 273/* 274 * Fix up length if there is too little space left in the a.g. 275 * Return 1 if ok, 0 if too little, should give up. 276 */ 277STATIC int 278xfs_alloc_fix_minleft( 279 xfs_alloc_arg_t *args) /* allocation argument structure */ 280{ 281 xfs_agf_t *agf; /* a.g. freelist header */ 282 int diff; /* free space difference */ 283 284 if (args->minleft == 0) 285 return 1; 286 agf = XFS_BUF_TO_AGF(args->agbp); 287 diff = be32_to_cpu(agf->agf_freeblks) 288 + be32_to_cpu(agf->agf_flcount) 289 - args->len - args->minleft; 290 if (diff >= 0) 291 return 1; 292 args->len += diff; /* shrink the allocated space */ 293 if (args->len >= args->minlen) 294 return 1; 295 args->agbno = NULLAGBLOCK; 296 return 0; 297} 298 299/* 300 * Update the two btrees, logically removing from freespace the extent 301 * starting at rbno, rlen blocks. The extent is contained within the 302 * actual (current) free extent fbno for flen blocks. 303 * Flags are passed in indicating whether the cursors are set to the 304 * relevant records. 305 */ 306STATIC int /* error code */ 307xfs_alloc_fixup_trees( 308 xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */ 309 xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */ 310 xfs_agblock_t fbno, /* starting block of free extent */ 311 xfs_extlen_t flen, /* length of free extent */ 312 xfs_agblock_t rbno, /* starting block of returned extent */ 313 xfs_extlen_t rlen, /* length of returned extent */ 314 int flags) /* flags, XFSA_FIXUP_... */ 315{ 316 int error; /* error code */ 317 int i; /* operation results */ 318 xfs_agblock_t nfbno1; /* first new free startblock */ 319 xfs_agblock_t nfbno2; /* second new free startblock */ 320 xfs_extlen_t nflen1=0; /* first new free length */ 321 xfs_extlen_t nflen2=0; /* second new free length */ 322 323 /* 324 * Look up the record in the by-size tree if necessary. 325 */ 326 if (flags & XFSA_FIXUP_CNT_OK) { 327#ifdef DEBUG 328 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i))) 329 return error; 330 XFS_WANT_CORRUPTED_RETURN( 331 i == 1 && nfbno1 == fbno && nflen1 == flen); 332#endif 333 } else { 334 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i))) 335 return error; 336 XFS_WANT_CORRUPTED_RETURN(i == 1); 337 } 338 /* 339 * Look up the record in the by-block tree if necessary. 340 */ 341 if (flags & XFSA_FIXUP_BNO_OK) { 342#ifdef DEBUG 343 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i))) 344 return error; 345 XFS_WANT_CORRUPTED_RETURN( 346 i == 1 && nfbno1 == fbno && nflen1 == flen); 347#endif 348 } else { 349 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i))) 350 return error; 351 XFS_WANT_CORRUPTED_RETURN(i == 1); 352 } 353 354#ifdef DEBUG 355 if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) { 356 struct xfs_btree_block *bnoblock; 357 struct xfs_btree_block *cntblock; 358 359 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]); 360 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]); 361 362 XFS_WANT_CORRUPTED_RETURN( 363 bnoblock->bb_numrecs == cntblock->bb_numrecs); 364 } 365#endif 366 367 /* 368 * Deal with all four cases: the allocated record is contained 369 * within the freespace record, so we can have new freespace 370 * at either (or both) end, or no freespace remaining. 371 */ 372 if (rbno == fbno && rlen == flen) 373 nfbno1 = nfbno2 = NULLAGBLOCK; 374 else if (rbno == fbno) { 375 nfbno1 = rbno + rlen; 376 nflen1 = flen - rlen; 377 nfbno2 = NULLAGBLOCK; 378 } else if (rbno + rlen == fbno + flen) { 379 nfbno1 = fbno; 380 nflen1 = flen - rlen; 381 nfbno2 = NULLAGBLOCK; 382 } else { 383 nfbno1 = fbno; 384 nflen1 = rbno - fbno; 385 nfbno2 = rbno + rlen; 386 nflen2 = (fbno + flen) - nfbno2; 387 } 388 /* 389 * Delete the entry from the by-size btree. 390 */ 391 if ((error = xfs_btree_delete(cnt_cur, &i))) 392 return error; 393 XFS_WANT_CORRUPTED_RETURN(i == 1); 394 /* 395 * Add new by-size btree entry(s). 396 */ 397 if (nfbno1 != NULLAGBLOCK) { 398 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i))) 399 return error; 400 XFS_WANT_CORRUPTED_RETURN(i == 0); 401 if ((error = xfs_btree_insert(cnt_cur, &i))) 402 return error; 403 XFS_WANT_CORRUPTED_RETURN(i == 1); 404 } 405 if (nfbno2 != NULLAGBLOCK) { 406 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i))) 407 return error; 408 XFS_WANT_CORRUPTED_RETURN(i == 0); 409 if ((error = xfs_btree_insert(cnt_cur, &i))) 410 return error; 411 XFS_WANT_CORRUPTED_RETURN(i == 1); 412 } 413 /* 414 * Fix up the by-block btree entry(s). 415 */ 416 if (nfbno1 == NULLAGBLOCK) { 417 /* 418 * No remaining freespace, just delete the by-block tree entry. 419 */ 420 if ((error = xfs_btree_delete(bno_cur, &i))) 421 return error; 422 XFS_WANT_CORRUPTED_RETURN(i == 1); 423 } else { 424 /* 425 * Update the by-block entry to start later|be shorter. 426 */ 427 if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1))) 428 return error; 429 } 430 if (nfbno2 != NULLAGBLOCK) { 431 /* 432 * 2 resulting free entries, need to add one. 433 */ 434 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i))) 435 return error; 436 XFS_WANT_CORRUPTED_RETURN(i == 0); 437 if ((error = xfs_btree_insert(bno_cur, &i))) 438 return error; 439 XFS_WANT_CORRUPTED_RETURN(i == 1); 440 } 441 return 0; 442} 443 444/* 445 * Read in the allocation group free block array. 446 */ 447STATIC int /* error */ 448xfs_alloc_read_agfl( 449 xfs_mount_t *mp, /* mount point structure */ 450 xfs_trans_t *tp, /* transaction pointer */ 451 xfs_agnumber_t agno, /* allocation group number */ 452 xfs_buf_t **bpp) /* buffer for the ag free block array */ 453{ 454 xfs_buf_t *bp; /* return value */ 455 int error; 456 457 ASSERT(agno != NULLAGNUMBER); 458 error = xfs_trans_read_buf( 459 mp, tp, mp->m_ddev_targp, 460 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)), 461 XFS_FSS_TO_BB(mp, 1), 0, &bp); 462 if (error) 463 return error; 464 ASSERT(bp); 465 ASSERT(!XFS_BUF_GETERROR(bp)); 466 XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGFL, XFS_AGFL_REF); 467 *bpp = bp; 468 return 0; 469} 470 471/* 472 * Allocation group level functions. 473 */ 474 475/* 476 * Allocate a variable extent in the allocation group agno. 477 * Type and bno are used to determine where in the allocation group the 478 * extent will start. 479 * Extent's length (returned in *len) will be between minlen and maxlen, 480 * and of the form k * prod + mod unless there's nothing that large. 481 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. 482 */ 483STATIC int /* error */ 484xfs_alloc_ag_vextent( 485 xfs_alloc_arg_t *args) /* argument structure for allocation */ 486{ 487 int error=0; 488 489 ASSERT(args->minlen > 0); 490 ASSERT(args->maxlen > 0); 491 ASSERT(args->minlen <= args->maxlen); 492 ASSERT(args->mod < args->prod); 493 ASSERT(args->alignment > 0); 494 /* 495 * Branch to correct routine based on the type. 496 */ 497 args->wasfromfl = 0; 498 switch (args->type) { 499 case XFS_ALLOCTYPE_THIS_AG: 500 error = xfs_alloc_ag_vextent_size(args); 501 break; 502 case XFS_ALLOCTYPE_NEAR_BNO: 503 error = xfs_alloc_ag_vextent_near(args); 504 break; 505 case XFS_ALLOCTYPE_THIS_BNO: 506 error = xfs_alloc_ag_vextent_exact(args); 507 break; 508 default: 509 ASSERT(0); 510 /* NOTREACHED */ 511 } 512 if (error) 513 return error; 514 /* 515 * If the allocation worked, need to change the agf structure 516 * (and log it), and the superblock. 517 */ 518 if (args->agbno != NULLAGBLOCK) { 519 xfs_agf_t *agf; /* allocation group freelist header */ 520 long slen = (long)args->len; 521 522 ASSERT(args->len >= args->minlen && args->len <= args->maxlen); 523 ASSERT(!(args->wasfromfl) || !args->isfl); 524 ASSERT(args->agbno % args->alignment == 0); 525 if (!(args->wasfromfl)) { 526 527 agf = XFS_BUF_TO_AGF(args->agbp); 528 be32_add_cpu(&agf->agf_freeblks, -(args->len)); 529 xfs_trans_agblocks_delta(args->tp, 530 -((long)(args->len))); 531 args->pag->pagf_freeblks -= args->len; 532 ASSERT(be32_to_cpu(agf->agf_freeblks) <= 533 be32_to_cpu(agf->agf_length)); 534 xfs_alloc_log_agf(args->tp, args->agbp, 535 XFS_AGF_FREEBLKS); 536 /* 537 * Search the busylist for these blocks and mark the 538 * transaction as synchronous if blocks are found. This 539 * avoids the need to block due to a synchronous log 540 * force to ensure correct ordering as the synchronous 541 * transaction will guarantee that for us. 542 */ 543 if (xfs_alloc_busy_search(args->mp, args->agno, 544 args->agbno, args->len)) 545 xfs_trans_set_sync(args->tp); 546 } 547 if (!args->isfl) 548 xfs_trans_mod_sb(args->tp, 549 args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS : 550 XFS_TRANS_SB_FDBLOCKS, -slen); 551 XFS_STATS_INC(xs_allocx); 552 XFS_STATS_ADD(xs_allocb, args->len); 553 } 554 return 0; 555} 556 557/* 558 * Allocate a variable extent at exactly agno/bno. 559 * Extent's length (returned in *len) will be between minlen and maxlen, 560 * and of the form k * prod + mod unless there's nothing that large. 561 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it. 562 */ 563STATIC int /* error */ 564xfs_alloc_ag_vextent_exact( 565 xfs_alloc_arg_t *args) /* allocation argument structure */ 566{ 567 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ 568 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ 569 xfs_agblock_t end; /* end of allocated extent */ 570 int error; 571 xfs_agblock_t fbno; /* start block of found extent */ 572 xfs_agblock_t fend; /* end block of found extent */ 573 xfs_extlen_t flen; /* length of found extent */ 574 int i; /* success/failure of operation */ 575 xfs_agblock_t maxend; /* end of maximal extent */ 576 xfs_agblock_t minend; /* end of minimal extent */ 577 xfs_extlen_t rlen; /* length of returned extent */ 578 579 ASSERT(args->alignment == 1); 580 /* 581 * Allocate/initialize a cursor for the by-number freespace btree. 582 */ 583 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 584 args->agno, XFS_BTNUM_BNO); 585 /* 586 * Lookup bno and minlen in the btree (minlen is irrelevant, really). 587 * Look for the closest free block <= bno, it must contain bno 588 * if any free block does. 589 */ 590 if ((error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i))) 591 goto error0; 592 if (!i) { 593 /* 594 * Didn't find it, return null. 595 */ 596 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 597 args->agbno = NULLAGBLOCK; 598 return 0; 599 } 600 /* 601 * Grab the freespace record. 602 */ 603 if ((error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i))) 604 goto error0; 605 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 606 ASSERT(fbno <= args->agbno); 607 minend = args->agbno + args->minlen; 608 maxend = args->agbno + args->maxlen; 609 fend = fbno + flen; 610 /* 611 * Give up if the freespace isn't long enough for the minimum request. 612 */ 613 if (fend < minend) { 614 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 615 args->agbno = NULLAGBLOCK; 616 return 0; 617 } 618 /* 619 * End of extent will be smaller of the freespace end and the 620 * maximal requested end. 621 */ 622 end = XFS_AGBLOCK_MIN(fend, maxend); 623 /* 624 * Fix the length according to mod and prod if given. 625 */ 626 args->len = end - args->agbno; 627 xfs_alloc_fix_len(args); 628 if (!xfs_alloc_fix_minleft(args)) { 629 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 630 return 0; 631 } 632 rlen = args->len; 633 ASSERT(args->agbno + rlen <= fend); 634 end = args->agbno + rlen; 635 /* 636 * We are allocating agbno for rlen [agbno .. end] 637 * Allocate/initialize a cursor for the by-size btree. 638 */ 639 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 640 args->agno, XFS_BTNUM_CNT); 641 ASSERT(args->agbno + args->len <= 642 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 643 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, 644 args->agbno, args->len, XFSA_FIXUP_BNO_OK))) { 645 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 646 goto error0; 647 } 648 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 649 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 650 651 trace_xfs_alloc_exact_done(args); 652 args->wasfromfl = 0; 653 return 0; 654 655error0: 656 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 657 trace_xfs_alloc_exact_error(args); 658 return error; 659} 660 661/* 662 * Allocate a variable extent near bno in the allocation group agno. 663 * Extent's length (returned in len) will be between minlen and maxlen, 664 * and of the form k * prod + mod unless there's nothing that large. 665 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. 666 */ 667STATIC int /* error */ 668xfs_alloc_ag_vextent_near( 669 xfs_alloc_arg_t *args) /* allocation argument structure */ 670{ 671 xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */ 672 xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */ 673 xfs_btree_cur_t *cnt_cur; /* cursor for count btree */ 674 xfs_agblock_t gtbno; /* start bno of right side entry */ 675 xfs_agblock_t gtbnoa; /* aligned ... */ 676 xfs_extlen_t gtdiff; /* difference to right side entry */ 677 xfs_extlen_t gtlen; /* length of right side entry */ 678 xfs_extlen_t gtlena; /* aligned ... */ 679 xfs_agblock_t gtnew; /* useful start bno of right side */ 680 int error; /* error code */ 681 int i; /* result code, temporary */ 682 int j; /* result code, temporary */ 683 xfs_agblock_t ltbno; /* start bno of left side entry */ 684 xfs_agblock_t ltbnoa; /* aligned ... */ 685 xfs_extlen_t ltdiff; /* difference to left side entry */ 686 xfs_extlen_t ltlen; /* length of left side entry */ 687 xfs_extlen_t ltlena; /* aligned ... */ 688 xfs_agblock_t ltnew; /* useful start bno of left side */ 689 xfs_extlen_t rlen; /* length of returned extent */ 690#if defined(DEBUG) && defined(__KERNEL__) 691 /* 692 * Randomly don't execute the first algorithm. 693 */ 694 int dofirst; /* set to do first algorithm */ 695 696 dofirst = random32() & 1; 697#endif 698 /* 699 * Get a cursor for the by-size btree. 700 */ 701 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 702 args->agno, XFS_BTNUM_CNT); 703 ltlen = 0; 704 bno_cur_lt = bno_cur_gt = NULL; 705 /* 706 * See if there are any free extents as big as maxlen. 707 */ 708 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i))) 709 goto error0; 710 /* 711 * If none, then pick up the last entry in the tree unless the 712 * tree is empty. 713 */ 714 if (!i) { 715 if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, <bno, 716 <len, &i))) 717 goto error0; 718 if (i == 0 || ltlen == 0) { 719 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 720 return 0; 721 } 722 ASSERT(i == 1); 723 } 724 args->wasfromfl = 0; 725 /* 726 * First algorithm. 727 * If the requested extent is large wrt the freespaces available 728 * in this a.g., then the cursor will be pointing to a btree entry 729 * near the right edge of the tree. If it's in the last btree leaf 730 * block, then we just examine all the entries in that block 731 * that are big enough, and pick the best one. 732 * This is written as a while loop so we can break out of it, 733 * but we never loop back to the top. 734 */ 735 while (xfs_btree_islastblock(cnt_cur, 0)) { 736 xfs_extlen_t bdiff; 737 int besti=0; 738 xfs_extlen_t blen=0; 739 xfs_agblock_t bnew=0; 740 741#if defined(DEBUG) && defined(__KERNEL__) 742 if (!dofirst) 743 break; 744#endif 745 /* 746 * Start from the entry that lookup found, sequence through 747 * all larger free blocks. If we're actually pointing at a 748 * record smaller than maxlen, go to the start of this block, 749 * and skip all those smaller than minlen. 750 */ 751 if (ltlen || args->alignment > 1) { 752 cnt_cur->bc_ptrs[0] = 1; 753 do { 754 if ((error = xfs_alloc_get_rec(cnt_cur, <bno, 755 <len, &i))) 756 goto error0; 757 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 758 if (ltlen >= args->minlen) 759 break; 760 if ((error = xfs_btree_increment(cnt_cur, 0, &i))) 761 goto error0; 762 } while (i); 763 ASSERT(ltlen >= args->minlen); 764 if (!i) 765 break; 766 } 767 i = cnt_cur->bc_ptrs[0]; 768 for (j = 1, blen = 0, bdiff = 0; 769 !error && j && (blen < args->maxlen || bdiff > 0); 770 error = xfs_btree_increment(cnt_cur, 0, &j)) { 771 /* 772 * For each entry, decide if it's better than 773 * the previous best entry. 774 */ 775 if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) 776 goto error0; 777 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 778 xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment, 779 args->minlen, <bnoa, <lena); 780 if (ltlena < args->minlen) 781 continue; 782 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 783 xfs_alloc_fix_len(args); 784 ASSERT(args->len >= args->minlen); 785 if (args->len < blen) 786 continue; 787 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, 788 args->alignment, ltbno, ltlen, <new); 789 if (ltnew != NULLAGBLOCK && 790 (args->len > blen || ltdiff < bdiff)) { 791 bdiff = ltdiff; 792 bnew = ltnew; 793 blen = args->len; 794 besti = cnt_cur->bc_ptrs[0]; 795 } 796 } 797 /* 798 * It didn't work. We COULD be in a case where 799 * there's a good record somewhere, so try again. 800 */ 801 if (blen == 0) 802 break; 803 /* 804 * Point at the best entry, and retrieve it again. 805 */ 806 cnt_cur->bc_ptrs[0] = besti; 807 if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) 808 goto error0; 809 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 810 ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 811 args->len = blen; 812 if (!xfs_alloc_fix_minleft(args)) { 813 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 814 trace_xfs_alloc_near_nominleft(args); 815 return 0; 816 } 817 blen = args->len; 818 /* 819 * We are allocating starting at bnew for blen blocks. 820 */ 821 args->agbno = bnew; 822 ASSERT(bnew >= ltbno); 823 ASSERT(bnew + blen <= ltbno + ltlen); 824 /* 825 * Set up a cursor for the by-bno tree. 826 */ 827 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, 828 args->agbp, args->agno, XFS_BTNUM_BNO); 829 /* 830 * Fix up the btree entries. 831 */ 832 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, 833 ltlen, bnew, blen, XFSA_FIXUP_CNT_OK))) 834 goto error0; 835 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 836 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); 837 838 trace_xfs_alloc_near_first(args); 839 return 0; 840 } 841 /* 842 * Second algorithm. 843 * Search in the by-bno tree to the left and to the right 844 * simultaneously, until in each case we find a space big enough, 845 * or run into the edge of the tree. When we run into the edge, 846 * we deallocate that cursor. 847 * If both searches succeed, we compare the two spaces and pick 848 * the better one. 849 * With alignment, it's possible for both to fail; the upper 850 * level algorithm that picks allocation groups for allocations 851 * is not supposed to do this. 852 */ 853 /* 854 * Allocate and initialize the cursor for the leftward search. 855 */ 856 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 857 args->agno, XFS_BTNUM_BNO); 858 /* 859 * Lookup <= bno to find the leftward search's starting point. 860 */ 861 if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i))) 862 goto error0; 863 if (!i) { 864 /* 865 * Didn't find anything; use this cursor for the rightward 866 * search. 867 */ 868 bno_cur_gt = bno_cur_lt; 869 bno_cur_lt = NULL; 870 } 871 /* 872 * Found something. Duplicate the cursor for the rightward search. 873 */ 874 else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt))) 875 goto error0; 876 /* 877 * Increment the cursor, so we will point at the entry just right 878 * of the leftward entry if any, or to the leftmost entry. 879 */ 880 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) 881 goto error0; 882 if (!i) { 883 /* 884 * It failed, there are no rightward entries. 885 */ 886 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR); 887 bno_cur_gt = NULL; 888 } 889 /* 890 * Loop going left with the leftward cursor, right with the 891 * rightward cursor, until either both directions give up or 892 * we find an entry at least as big as minlen. 893 */ 894 do { 895 if (bno_cur_lt) { 896 if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) 897 goto error0; 898 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 899 xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment, 900 args->minlen, <bnoa, <lena); 901 if (ltlena >= args->minlen) 902 break; 903 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) 904 goto error0; 905 if (!i) { 906 xfs_btree_del_cursor(bno_cur_lt, 907 XFS_BTREE_NOERROR); 908 bno_cur_lt = NULL; 909 } 910 } 911 if (bno_cur_gt) { 912 if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) 913 goto error0; 914 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 915 xfs_alloc_compute_aligned(gtbno, gtlen, args->alignment, 916 args->minlen, >bnoa, >lena); 917 if (gtlena >= args->minlen) 918 break; 919 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) 920 goto error0; 921 if (!i) { 922 xfs_btree_del_cursor(bno_cur_gt, 923 XFS_BTREE_NOERROR); 924 bno_cur_gt = NULL; 925 } 926 } 927 } while (bno_cur_lt || bno_cur_gt); 928 /* 929 * Got both cursors still active, need to find better entry. 930 */ 931 if (bno_cur_lt && bno_cur_gt) { 932 /* 933 * Left side is long enough, look for a right side entry. 934 */ 935 if (ltlena >= args->minlen) { 936 /* 937 * Fix up the length. 938 */ 939 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 940 xfs_alloc_fix_len(args); 941 rlen = args->len; 942 ltdiff = xfs_alloc_compute_diff(args->agbno, rlen, 943 args->alignment, ltbno, ltlen, <new); 944 /* 945 * Not perfect. 946 */ 947 if (ltdiff) { 948 /* 949 * Look until we find a better one, run out of 950 * space, or run off the end. 951 */ 952 while (bno_cur_lt && bno_cur_gt) { 953 if ((error = xfs_alloc_get_rec( 954 bno_cur_gt, >bno, 955 >len, &i))) 956 goto error0; 957 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 958 xfs_alloc_compute_aligned(gtbno, gtlen, 959 args->alignment, args->minlen, 960 >bnoa, >lena); 961 /* 962 * The left one is clearly better. 963 */ 964 if (gtbnoa >= args->agbno + ltdiff) { 965 xfs_btree_del_cursor( 966 bno_cur_gt, 967 XFS_BTREE_NOERROR); 968 bno_cur_gt = NULL; 969 break; 970 } 971 /* 972 * If we reach a big enough entry, 973 * compare the two and pick the best. 974 */ 975 if (gtlena >= args->minlen) { 976 args->len = 977 XFS_EXTLEN_MIN(gtlena, 978 args->maxlen); 979 xfs_alloc_fix_len(args); 980 rlen = args->len; 981 gtdiff = xfs_alloc_compute_diff( 982 args->agbno, rlen, 983 args->alignment, 984 gtbno, gtlen, >new); 985 /* 986 * Right side is better. 987 */ 988 if (gtdiff < ltdiff) { 989 xfs_btree_del_cursor( 990 bno_cur_lt, 991 XFS_BTREE_NOERROR); 992 bno_cur_lt = NULL; 993 } 994 /* 995 * Left side is better. 996 */ 997 else { 998 xfs_btree_del_cursor( 999 bno_cur_gt, 1000 XFS_BTREE_NOERROR); 1001 bno_cur_gt = NULL; 1002 } 1003 break; 1004 } 1005 /* 1006 * Fell off the right end. 1007 */ 1008 if ((error = xfs_btree_increment( 1009 bno_cur_gt, 0, &i))) 1010 goto error0; 1011 if (!i) { 1012 xfs_btree_del_cursor( 1013 bno_cur_gt, 1014 XFS_BTREE_NOERROR); 1015 bno_cur_gt = NULL; 1016 break; 1017 } 1018 } 1019 } 1020 /* 1021 * The left side is perfect, trash the right side. 1022 */ 1023 else { 1024 xfs_btree_del_cursor(bno_cur_gt, 1025 XFS_BTREE_NOERROR); 1026 bno_cur_gt = NULL; 1027 } 1028 } 1029 /* 1030 * It's the right side that was found first, look left. 1031 */ 1032 else { 1033 /* 1034 * Fix up the length. 1035 */ 1036 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); 1037 xfs_alloc_fix_len(args); 1038 rlen = args->len; 1039 gtdiff = xfs_alloc_compute_diff(args->agbno, rlen, 1040 args->alignment, gtbno, gtlen, >new); 1041 /* 1042 * Right side entry isn't perfect. 1043 */ 1044 if (gtdiff) { 1045 /* 1046 * Look until we find a better one, run out of 1047 * space, or run off the end. 1048 */ 1049 while (bno_cur_lt && bno_cur_gt) { 1050 if ((error = xfs_alloc_get_rec( 1051 bno_cur_lt, <bno, 1052 <len, &i))) 1053 goto error0; 1054 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1055 xfs_alloc_compute_aligned(ltbno, ltlen, 1056 args->alignment, args->minlen, 1057 <bnoa, <lena); 1058 /* 1059 * The right one is clearly better. 1060 */ 1061 if (ltbnoa <= args->agbno - gtdiff) { 1062 xfs_btree_del_cursor( 1063 bno_cur_lt, 1064 XFS_BTREE_NOERROR); 1065 bno_cur_lt = NULL; 1066 break; 1067 } 1068 /* 1069 * If we reach a big enough entry, 1070 * compare the two and pick the best. 1071 */ 1072 if (ltlena >= args->minlen) { 1073 args->len = XFS_EXTLEN_MIN( 1074 ltlena, args->maxlen); 1075 xfs_alloc_fix_len(args); 1076 rlen = args->len; 1077 ltdiff = xfs_alloc_compute_diff( 1078 args->agbno, rlen, 1079 args->alignment, 1080 ltbno, ltlen, <new); 1081 /* 1082 * Left side is better. 1083 */ 1084 if (ltdiff < gtdiff) { 1085 xfs_btree_del_cursor( 1086 bno_cur_gt, 1087 XFS_BTREE_NOERROR); 1088 bno_cur_gt = NULL; 1089 } 1090 /* 1091 * Right side is better. 1092 */ 1093 else { 1094 xfs_btree_del_cursor( 1095 bno_cur_lt, 1096 XFS_BTREE_NOERROR); 1097 bno_cur_lt = NULL; 1098 } 1099 break; 1100 } 1101 /* 1102 * Fell off the left end. 1103 */ 1104 if ((error = xfs_btree_decrement( 1105 bno_cur_lt, 0, &i))) 1106 goto error0; 1107 if (!i) { 1108 xfs_btree_del_cursor(bno_cur_lt, 1109 XFS_BTREE_NOERROR); 1110 bno_cur_lt = NULL; 1111 break; 1112 } 1113 } 1114 } 1115 /* 1116 * The right side is perfect, trash the left side. 1117 */ 1118 else { 1119 xfs_btree_del_cursor(bno_cur_lt, 1120 XFS_BTREE_NOERROR); 1121 bno_cur_lt = NULL; 1122 } 1123 } 1124 } 1125 /* 1126 * If we couldn't get anything, give up. 1127 */ 1128 if (bno_cur_lt == NULL && bno_cur_gt == NULL) { 1129 trace_xfs_alloc_size_neither(args); 1130 args->agbno = NULLAGBLOCK; 1131 return 0; 1132 } 1133 /* 1134 * At this point we have selected a freespace entry, either to the 1135 * left or to the right. If it's on the right, copy all the 1136 * useful variables to the "left" set so we only have one 1137 * copy of this code. 1138 */ 1139 if (bno_cur_gt) { 1140 bno_cur_lt = bno_cur_gt; 1141 bno_cur_gt = NULL; 1142 ltbno = gtbno; 1143 ltbnoa = gtbnoa; 1144 ltlen = gtlen; 1145 ltlena = gtlena; 1146 j = 1; 1147 } else 1148 j = 0; 1149 /* 1150 * Fix up the length and compute the useful address. 1151 */ 1152 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 1153 xfs_alloc_fix_len(args); 1154 if (!xfs_alloc_fix_minleft(args)) { 1155 trace_xfs_alloc_near_nominleft(args); 1156 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); 1157 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1158 return 0; 1159 } 1160 rlen = args->len; 1161 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno, 1162 ltlen, <new); 1163 ASSERT(ltnew >= ltbno); 1164 ASSERT(ltnew + rlen <= ltbno + ltlen); 1165 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 1166 args->agbno = ltnew; 1167 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, 1168 ltnew, rlen, XFSA_FIXUP_BNO_OK))) 1169 goto error0; 1170 1171 if (j) 1172 trace_xfs_alloc_near_greater(args); 1173 else 1174 trace_xfs_alloc_near_lesser(args); 1175 1176 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1177 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); 1178 return 0; 1179 1180 error0: 1181 trace_xfs_alloc_near_error(args); 1182 if (cnt_cur != NULL) 1183 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 1184 if (bno_cur_lt != NULL) 1185 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR); 1186 if (bno_cur_gt != NULL) 1187 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR); 1188 return error; 1189} 1190 1191/* 1192 * Allocate a variable extent anywhere in the allocation group agno. 1193 * Extent's length (returned in len) will be between minlen and maxlen, 1194 * and of the form k * prod + mod unless there's nothing that large. 1195 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. 1196 */ 1197STATIC int /* error */ 1198xfs_alloc_ag_vextent_size( 1199 xfs_alloc_arg_t *args) /* allocation argument structure */ 1200{ 1201 xfs_btree_cur_t *bno_cur; /* cursor for bno btree */ 1202 xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */ 1203 int error; /* error result */ 1204 xfs_agblock_t fbno; /* start of found freespace */ 1205 xfs_extlen_t flen; /* length of found freespace */ 1206 int i; /* temp status variable */ 1207 xfs_agblock_t rbno; /* returned block number */ 1208 xfs_extlen_t rlen; /* length of returned extent */ 1209 1210 /* 1211 * Allocate and initialize a cursor for the by-size btree. 1212 */ 1213 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 1214 args->agno, XFS_BTNUM_CNT); 1215 bno_cur = NULL; 1216 /* 1217 * Look for an entry >= maxlen+alignment-1 blocks. 1218 */ 1219 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, 1220 args->maxlen + args->alignment - 1, &i))) 1221 goto error0; 1222 /* 1223 * If none, then pick up the last entry in the tree unless the 1224 * tree is empty. 1225 */ 1226 if (!i) { 1227 if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno, 1228 &flen, &i))) 1229 goto error0; 1230 if (i == 0 || flen == 0) { 1231 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1232 trace_xfs_alloc_size_noentry(args); 1233 return 0; 1234 } 1235 ASSERT(i == 1); 1236 } 1237 /* 1238 * There's a freespace as big as maxlen+alignment-1, get it. 1239 */ 1240 else { 1241 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i))) 1242 goto error0; 1243 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1244 } 1245 /* 1246 * In the first case above, we got the last entry in the 1247 * by-size btree. Now we check to see if the space hits maxlen 1248 * once aligned; if not, we search left for something better. 1249 * This can't happen in the second case above. 1250 */ 1251 xfs_alloc_compute_aligned(fbno, flen, args->alignment, args->minlen, 1252 &rbno, &rlen); 1253 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1254 XFS_WANT_CORRUPTED_GOTO(rlen == 0 || 1255 (rlen <= flen && rbno + rlen <= fbno + flen), error0); 1256 if (rlen < args->maxlen) { 1257 xfs_agblock_t bestfbno; 1258 xfs_extlen_t bestflen; 1259 xfs_agblock_t bestrbno; 1260 xfs_extlen_t bestrlen; 1261 1262 bestrlen = rlen; 1263 bestrbno = rbno; 1264 bestflen = flen; 1265 bestfbno = fbno; 1266 for (;;) { 1267 if ((error = xfs_btree_decrement(cnt_cur, 0, &i))) 1268 goto error0; 1269 if (i == 0) 1270 break; 1271 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, 1272 &i))) 1273 goto error0; 1274 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1275 if (flen < bestrlen) 1276 break; 1277 xfs_alloc_compute_aligned(fbno, flen, args->alignment, 1278 args->minlen, &rbno, &rlen); 1279 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1280 XFS_WANT_CORRUPTED_GOTO(rlen == 0 || 1281 (rlen <= flen && rbno + rlen <= fbno + flen), 1282 error0); 1283 if (rlen > bestrlen) { 1284 bestrlen = rlen; 1285 bestrbno = rbno; 1286 bestflen = flen; 1287 bestfbno = fbno; 1288 if (rlen == args->maxlen) 1289 break; 1290 } 1291 } 1292 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen, 1293 &i))) 1294 goto error0; 1295 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1296 rlen = bestrlen; 1297 rbno = bestrbno; 1298 flen = bestflen; 1299 fbno = bestfbno; 1300 } 1301 args->wasfromfl = 0; 1302 /* 1303 * Fix up the length. 1304 */ 1305 args->len = rlen; 1306 xfs_alloc_fix_len(args); 1307 if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) { 1308 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1309 trace_xfs_alloc_size_nominleft(args); 1310 args->agbno = NULLAGBLOCK; 1311 return 0; 1312 } 1313 rlen = args->len; 1314 XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0); 1315 /* 1316 * Allocate and initialize a cursor for the by-block tree. 1317 */ 1318 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 1319 args->agno, XFS_BTNUM_BNO); 1320 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, 1321 rbno, rlen, XFSA_FIXUP_CNT_OK))) 1322 goto error0; 1323 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1324 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 1325 cnt_cur = bno_cur = NULL; 1326 args->len = rlen; 1327 args->agbno = rbno; 1328 XFS_WANT_CORRUPTED_GOTO( 1329 args->agbno + args->len <= 1330 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), 1331 error0); 1332 trace_xfs_alloc_size_done(args); 1333 return 0; 1334 1335error0: 1336 trace_xfs_alloc_size_error(args); 1337 if (cnt_cur) 1338 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 1339 if (bno_cur) 1340 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 1341 return error; 1342} 1343 1344/* 1345 * Deal with the case where only small freespaces remain. 1346 * Either return the contents of the last freespace record, 1347 * or allocate space from the freelist if there is nothing in the tree. 1348 */ 1349STATIC int /* error */ 1350xfs_alloc_ag_vextent_small( 1351 xfs_alloc_arg_t *args, /* allocation argument structure */ 1352 xfs_btree_cur_t *ccur, /* by-size cursor */ 1353 xfs_agblock_t *fbnop, /* result block number */ 1354 xfs_extlen_t *flenp, /* result length */ 1355 int *stat) /* status: 0-freelist, 1-normal/none */ 1356{ 1357 int error; 1358 xfs_agblock_t fbno; 1359 xfs_extlen_t flen; 1360 int i; 1361 1362 if ((error = xfs_btree_decrement(ccur, 0, &i))) 1363 goto error0; 1364 if (i) { 1365 if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i))) 1366 goto error0; 1367 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1368 } 1369 /* 1370 * Nothing in the btree, try the freelist. Make sure 1371 * to respect minleft even when pulling from the 1372 * freelist. 1373 */ 1374 else if (args->minlen == 1 && args->alignment == 1 && !args->isfl && 1375 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) 1376 > args->minleft)) { 1377 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0); 1378 if (error) 1379 goto error0; 1380 if (fbno != NULLAGBLOCK) { 1381 if (args->userdata) { 1382 xfs_buf_t *bp; 1383 1384 bp = xfs_btree_get_bufs(args->mp, args->tp, 1385 args->agno, fbno, 0); 1386 xfs_trans_binval(args->tp, bp); 1387 } 1388 args->len = 1; 1389 args->agbno = fbno; 1390 XFS_WANT_CORRUPTED_GOTO( 1391 args->agbno + args->len <= 1392 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), 1393 error0); 1394 args->wasfromfl = 1; 1395 trace_xfs_alloc_small_freelist(args); 1396 *stat = 0; 1397 return 0; 1398 } 1399 /* 1400 * Nothing in the freelist. 1401 */ 1402 else 1403 flen = 0; 1404 } 1405 /* 1406 * Can't allocate from the freelist for some reason. 1407 */ 1408 else { 1409 fbno = NULLAGBLOCK; 1410 flen = 0; 1411 } 1412 /* 1413 * Can't do the allocation, give up. 1414 */ 1415 if (flen < args->minlen) { 1416 args->agbno = NULLAGBLOCK; 1417 trace_xfs_alloc_small_notenough(args); 1418 flen = 0; 1419 } 1420 *fbnop = fbno; 1421 *flenp = flen; 1422 *stat = 1; 1423 trace_xfs_alloc_small_done(args); 1424 return 0; 1425 1426error0: 1427 trace_xfs_alloc_small_error(args); 1428 return error; 1429} 1430 1431/* 1432 * Free the extent starting at agno/bno for length. 1433 */ 1434STATIC int /* error */ 1435xfs_free_ag_extent( 1436 xfs_trans_t *tp, /* transaction pointer */ 1437 xfs_buf_t *agbp, /* buffer for a.g. freelist header */ 1438 xfs_agnumber_t agno, /* allocation group number */ 1439 xfs_agblock_t bno, /* starting block number */ 1440 xfs_extlen_t len, /* length of extent */ 1441 int isfl) /* set if is freelist blocks - no sb acctg */ 1442{ 1443 xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */ 1444 xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */ 1445 int error; /* error return value */ 1446 xfs_agblock_t gtbno; /* start of right neighbor block */ 1447 xfs_extlen_t gtlen; /* length of right neighbor block */ 1448 int haveleft; /* have a left neighbor block */ 1449 int haveright; /* have a right neighbor block */ 1450 int i; /* temp, result code */ 1451 xfs_agblock_t ltbno; /* start of left neighbor block */ 1452 xfs_extlen_t ltlen; /* length of left neighbor block */ 1453 xfs_mount_t *mp; /* mount point struct for filesystem */ 1454 xfs_agblock_t nbno; /* new starting block of freespace */ 1455 xfs_extlen_t nlen; /* new length of freespace */ 1456 1457 mp = tp->t_mountp; 1458 /* 1459 * Allocate and initialize a cursor for the by-block btree. 1460 */ 1461 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO); 1462 cnt_cur = NULL; 1463 /* 1464 * Look for a neighboring block on the left (lower block numbers) 1465 * that is contiguous with this space. 1466 */ 1467 if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft))) 1468 goto error0; 1469 if (haveleft) { 1470 /* 1471 * There is a block to our left. 1472 */ 1473 if ((error = xfs_alloc_get_rec(bno_cur, <bno, <len, &i))) 1474 goto error0; 1475 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1476 /* 1477 * It's not contiguous, though. 1478 */ 1479 if (ltbno + ltlen < bno) 1480 haveleft = 0; 1481 else { 1482 /* 1483 * If this failure happens the request to free this 1484 * space was invalid, it's (partly) already free. 1485 * Very bad. 1486 */ 1487 XFS_WANT_CORRUPTED_GOTO(ltbno + ltlen <= bno, error0); 1488 } 1489 } 1490 /* 1491 * Look for a neighboring block on the right (higher block numbers) 1492 * that is contiguous with this space. 1493 */ 1494 if ((error = xfs_btree_increment(bno_cur, 0, &haveright))) 1495 goto error0; 1496 if (haveright) { 1497 /* 1498 * There is a block to our right. 1499 */ 1500 if ((error = xfs_alloc_get_rec(bno_cur, >bno, >len, &i))) 1501 goto error0; 1502 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1503 /* 1504 * It's not contiguous, though. 1505 */ 1506 if (bno + len < gtbno) 1507 haveright = 0; 1508 else { 1509 /* 1510 * If this failure happens the request to free this 1511 * space was invalid, it's (partly) already free. 1512 * Very bad. 1513 */ 1514 XFS_WANT_CORRUPTED_GOTO(gtbno >= bno + len, error0); 1515 } 1516 } 1517 /* 1518 * Now allocate and initialize a cursor for the by-size tree. 1519 */ 1520 cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT); 1521 /* 1522 * Have both left and right contiguous neighbors. 1523 * Merge all three into a single free block. 1524 */ 1525 if (haveleft && haveright) { 1526 /* 1527 * Delete the old by-size entry on the left. 1528 */ 1529 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) 1530 goto error0; 1531 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1532 if ((error = xfs_btree_delete(cnt_cur, &i))) 1533 goto error0; 1534 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1535 /* 1536 * Delete the old by-size entry on the right. 1537 */ 1538 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) 1539 goto error0; 1540 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1541 if ((error = xfs_btree_delete(cnt_cur, &i))) 1542 goto error0; 1543 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1544 /* 1545 * Delete the old by-block entry for the right block. 1546 */ 1547 if ((error = xfs_btree_delete(bno_cur, &i))) 1548 goto error0; 1549 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1550 /* 1551 * Move the by-block cursor back to the left neighbor. 1552 */ 1553 if ((error = xfs_btree_decrement(bno_cur, 0, &i))) 1554 goto error0; 1555 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1556#ifdef DEBUG 1557 /* 1558 * Check that this is the right record: delete didn't 1559 * mangle the cursor. 1560 */ 1561 { 1562 xfs_agblock_t xxbno; 1563 xfs_extlen_t xxlen; 1564 1565 if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen, 1566 &i))) 1567 goto error0; 1568 XFS_WANT_CORRUPTED_GOTO( 1569 i == 1 && xxbno == ltbno && xxlen == ltlen, 1570 error0); 1571 } 1572#endif 1573 /* 1574 * Update remaining by-block entry to the new, joined block. 1575 */ 1576 nbno = ltbno; 1577 nlen = len + ltlen + gtlen; 1578 if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) 1579 goto error0; 1580 } 1581 /* 1582 * Have only a left contiguous neighbor. 1583 * Merge it together with the new freespace. 1584 */ 1585 else if (haveleft) { 1586 /* 1587 * Delete the old by-size entry on the left. 1588 */ 1589 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) 1590 goto error0; 1591 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1592 if ((error = xfs_btree_delete(cnt_cur, &i))) 1593 goto error0; 1594 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1595 /* 1596 * Back up the by-block cursor to the left neighbor, and 1597 * update its length. 1598 */ 1599 if ((error = xfs_btree_decrement(bno_cur, 0, &i))) 1600 goto error0; 1601 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1602 nbno = ltbno; 1603 nlen = len + ltlen; 1604 if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) 1605 goto error0; 1606 } 1607 /* 1608 * Have only a right contiguous neighbor. 1609 * Merge it together with the new freespace. 1610 */ 1611 else if (haveright) { 1612 /* 1613 * Delete the old by-size entry on the right. 1614 */ 1615 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) 1616 goto error0; 1617 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1618 if ((error = xfs_btree_delete(cnt_cur, &i))) 1619 goto error0; 1620 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1621 /* 1622 * Update the starting block and length of the right 1623 * neighbor in the by-block tree. 1624 */ 1625 nbno = bno; 1626 nlen = len + gtlen; 1627 if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) 1628 goto error0; 1629 } 1630 /* 1631 * No contiguous neighbors. 1632 * Insert the new freespace into the by-block tree. 1633 */ 1634 else { 1635 nbno = bno; 1636 nlen = len; 1637 if ((error = xfs_btree_insert(bno_cur, &i))) 1638 goto error0; 1639 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1640 } 1641 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 1642 bno_cur = NULL; 1643 /* 1644 * In all cases we need to insert the new freespace in the by-size tree. 1645 */ 1646 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i))) 1647 goto error0; 1648 XFS_WANT_CORRUPTED_GOTO(i == 0, error0); 1649 if ((error = xfs_btree_insert(cnt_cur, &i))) 1650 goto error0; 1651 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1652 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1653 cnt_cur = NULL; 1654 /* 1655 * Update the freespace totals in the ag and superblock. 1656 */ 1657 { 1658 xfs_agf_t *agf; 1659 xfs_perag_t *pag; /* per allocation group data */ 1660 1661 pag = xfs_perag_get(mp, agno); 1662 pag->pagf_freeblks += len; 1663 xfs_perag_put(pag); 1664 1665 agf = XFS_BUF_TO_AGF(agbp); 1666 be32_add_cpu(&agf->agf_freeblks, len); 1667 xfs_trans_agblocks_delta(tp, len); 1668 XFS_WANT_CORRUPTED_GOTO( 1669 be32_to_cpu(agf->agf_freeblks) <= 1670 be32_to_cpu(agf->agf_length), 1671 error0); 1672 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); 1673 if (!isfl) 1674 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len); 1675 XFS_STATS_INC(xs_freex); 1676 XFS_STATS_ADD(xs_freeb, len); 1677 } 1678 1679 trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); 1680 1681 /* 1682 * Since blocks move to the free list without the coordination 1683 * used in xfs_bmap_finish, we can't allow block to be available 1684 * for reallocation and non-transaction writing (user data) 1685 * until we know that the transaction that moved it to the free 1686 * list is permanently on disk. We track the blocks by declaring 1687 * these blocks as "busy"; the busy list is maintained on a per-ag 1688 * basis and each transaction records which entries should be removed 1689 * when the iclog commits to disk. If a busy block is allocated, 1690 * the iclog is pushed up to the LSN that freed the block. 1691 */ 1692 xfs_alloc_busy_insert(tp, agno, bno, len); 1693 return 0; 1694 1695 error0: 1696 trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1); 1697 if (bno_cur) 1698 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 1699 if (cnt_cur) 1700 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 1701 return error; 1702} 1703 1704/* 1705 * Visible (exported) allocation/free functions. 1706 * Some of these are used just by xfs_alloc_btree.c and this file. 1707 */ 1708 1709/* 1710 * Compute and fill in value of m_ag_maxlevels. 1711 */ 1712void 1713xfs_alloc_compute_maxlevels( 1714 xfs_mount_t *mp) /* file system mount structure */ 1715{ 1716 int level; 1717 uint maxblocks; 1718 uint maxleafents; 1719 int minleafrecs; 1720 int minnoderecs; 1721 1722 maxleafents = (mp->m_sb.sb_agblocks + 1) / 2; 1723 minleafrecs = mp->m_alloc_mnr[0]; 1724 minnoderecs = mp->m_alloc_mnr[1]; 1725 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; 1726 for (level = 1; maxblocks > 1; level++) 1727 maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; 1728 mp->m_ag_maxlevels = level; 1729} 1730 1731/* 1732 * Find the length of the longest extent in an AG. 1733 */ 1734xfs_extlen_t 1735xfs_alloc_longest_free_extent( 1736 struct xfs_mount *mp, 1737 struct xfs_perag *pag) 1738{ 1739 xfs_extlen_t need, delta = 0; 1740 1741 need = XFS_MIN_FREELIST_PAG(pag, mp); 1742 if (need > pag->pagf_flcount) 1743 delta = need - pag->pagf_flcount; 1744 1745 if (pag->pagf_longest > delta) 1746 return pag->pagf_longest - delta; 1747 return pag->pagf_flcount > 0 || pag->pagf_longest > 0; 1748} 1749 1750/* 1751 * Decide whether to use this allocation group for this allocation. 1752 * If so, fix up the btree freelist's size. 1753 */ 1754STATIC int /* error */ 1755xfs_alloc_fix_freelist( 1756 xfs_alloc_arg_t *args, /* allocation argument structure */ 1757 int flags) /* XFS_ALLOC_FLAG_... */ 1758{ 1759 xfs_buf_t *agbp; /* agf buffer pointer */ 1760 xfs_agf_t *agf; /* a.g. freespace structure pointer */ 1761 xfs_buf_t *agflbp;/* agfl buffer pointer */ 1762 xfs_agblock_t bno; /* freelist block */ 1763 xfs_extlen_t delta; /* new blocks needed in freelist */ 1764 int error; /* error result code */ 1765 xfs_extlen_t longest;/* longest extent in allocation group */ 1766 xfs_mount_t *mp; /* file system mount point structure */ 1767 xfs_extlen_t need; /* total blocks needed in freelist */ 1768 xfs_perag_t *pag; /* per-ag information structure */ 1769 xfs_alloc_arg_t targs; /* local allocation arguments */ 1770 xfs_trans_t *tp; /* transaction pointer */ 1771 1772 mp = args->mp; 1773 1774 pag = args->pag; 1775 tp = args->tp; 1776 if (!pag->pagf_init) { 1777 if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags, 1778 &agbp))) 1779 return error; 1780 if (!pag->pagf_init) { 1781 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); 1782 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); 1783 args->agbp = NULL; 1784 return 0; 1785 } 1786 } else 1787 agbp = NULL; 1788 1789 /* 1790 * If this is a metadata preferred pag and we are user data 1791 * then try somewhere else if we are not being asked to 1792 * try harder at this point 1793 */ 1794 if (pag->pagf_metadata && args->userdata && 1795 (flags & XFS_ALLOC_FLAG_TRYLOCK)) { 1796 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); 1797 args->agbp = NULL; 1798 return 0; 1799 } 1800 1801 if (!(flags & XFS_ALLOC_FLAG_FREEING)) { 1802 /* 1803 * If it looks like there isn't a long enough extent, or enough 1804 * total blocks, reject it. 1805 */ 1806 need = XFS_MIN_FREELIST_PAG(pag, mp); 1807 longest = xfs_alloc_longest_free_extent(mp, pag); 1808 if ((args->minlen + args->alignment + args->minalignslop - 1) > 1809 longest || 1810 ((int)(pag->pagf_freeblks + pag->pagf_flcount - 1811 need - args->total) < (int)args->minleft)) { 1812 if (agbp) 1813 xfs_trans_brelse(tp, agbp); 1814 args->agbp = NULL; 1815 return 0; 1816 } 1817 } 1818 1819 /* 1820 * Get the a.g. freespace buffer. 1821 * Can fail if we're not blocking on locks, and it's held. 1822 */ 1823 if (agbp == NULL) { 1824 if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags, 1825 &agbp))) 1826 return error; 1827 if (agbp == NULL) { 1828 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); 1829 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); 1830 args->agbp = NULL; 1831 return 0; 1832 } 1833 } 1834 /* 1835 * Figure out how many blocks we should have in the freelist. 1836 */ 1837 agf = XFS_BUF_TO_AGF(agbp); 1838 need = XFS_MIN_FREELIST(agf, mp); 1839 /* 1840 * If there isn't enough total or single-extent, reject it. 1841 */ 1842 if (!(flags & XFS_ALLOC_FLAG_FREEING)) { 1843 delta = need > be32_to_cpu(agf->agf_flcount) ? 1844 (need - be32_to_cpu(agf->agf_flcount)) : 0; 1845 longest = be32_to_cpu(agf->agf_longest); 1846 longest = (longest > delta) ? (longest - delta) : 1847 (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0); 1848 if ((args->minlen + args->alignment + args->minalignslop - 1) > 1849 longest || 1850 ((int)(be32_to_cpu(agf->agf_freeblks) + 1851 be32_to_cpu(agf->agf_flcount) - need - args->total) < 1852 (int)args->minleft)) { 1853 xfs_trans_brelse(tp, agbp); 1854 args->agbp = NULL; 1855 return 0; 1856 } 1857 } 1858 /* 1859 * Make the freelist shorter if it's too long. 1860 */ 1861 while (be32_to_cpu(agf->agf_flcount) > need) { 1862 xfs_buf_t *bp; 1863 1864 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0); 1865 if (error) 1866 return error; 1867 if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1))) 1868 return error; 1869 bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0); 1870 xfs_trans_binval(tp, bp); 1871 } 1872 /* 1873 * Initialize the args structure. 1874 */ 1875 targs.tp = tp; 1876 targs.mp = mp; 1877 targs.agbp = agbp; 1878 targs.agno = args->agno; 1879 targs.mod = targs.minleft = targs.wasdel = targs.userdata = 1880 targs.minalignslop = 0; 1881 targs.alignment = targs.minlen = targs.prod = targs.isfl = 1; 1882 targs.type = XFS_ALLOCTYPE_THIS_AG; 1883 targs.pag = pag; 1884 if ((error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp))) 1885 return error; 1886 /* 1887 * Make the freelist longer if it's too short. 1888 */ 1889 while (be32_to_cpu(agf->agf_flcount) < need) { 1890 targs.agbno = 0; 1891 targs.maxlen = need - be32_to_cpu(agf->agf_flcount); 1892 /* 1893 * Allocate as many blocks as possible at once. 1894 */ 1895 if ((error = xfs_alloc_ag_vextent(&targs))) { 1896 xfs_trans_brelse(tp, agflbp); 1897 return error; 1898 } 1899 /* 1900 * Stop if we run out. Won't happen if callers are obeying 1901 * the restrictions correctly. Can happen for free calls 1902 * on a completely full ag. 1903 */ 1904 if (targs.agbno == NULLAGBLOCK) { 1905 if (flags & XFS_ALLOC_FLAG_FREEING) 1906 break; 1907 xfs_trans_brelse(tp, agflbp); 1908 args->agbp = NULL; 1909 return 0; 1910 } 1911 /* 1912 * Put each allocated block on the list. 1913 */ 1914 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) { 1915 error = xfs_alloc_put_freelist(tp, agbp, 1916 agflbp, bno, 0); 1917 if (error) 1918 return error; 1919 } 1920 } 1921 xfs_trans_brelse(tp, agflbp); 1922 args->agbp = agbp; 1923 return 0; 1924} 1925 1926/* 1927 * Get a block from the freelist. 1928 * Returns with the buffer for the block gotten. 1929 */ 1930int /* error */ 1931xfs_alloc_get_freelist( 1932 xfs_trans_t *tp, /* transaction pointer */ 1933 xfs_buf_t *agbp, /* buffer containing the agf structure */ 1934 xfs_agblock_t *bnop, /* block address retrieved from freelist */ 1935 int btreeblk) /* destination is a AGF btree */ 1936{ 1937 xfs_agf_t *agf; /* a.g. freespace structure */ 1938 xfs_agfl_t *agfl; /* a.g. freelist structure */ 1939 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */ 1940 xfs_agblock_t bno; /* block number returned */ 1941 int error; 1942 int logflags; 1943 xfs_mount_t *mp; /* mount structure */ 1944 xfs_perag_t *pag; /* per allocation group data */ 1945 1946 agf = XFS_BUF_TO_AGF(agbp); 1947 /* 1948 * Freelist is empty, give up. 1949 */ 1950 if (!agf->agf_flcount) { 1951 *bnop = NULLAGBLOCK; 1952 return 0; 1953 } 1954 /* 1955 * Read the array of free blocks. 1956 */ 1957 mp = tp->t_mountp; 1958 if ((error = xfs_alloc_read_agfl(mp, tp, 1959 be32_to_cpu(agf->agf_seqno), &agflbp))) 1960 return error; 1961 agfl = XFS_BUF_TO_AGFL(agflbp); 1962 /* 1963 * Get the block number and update the data structures. 1964 */ 1965 bno = be32_to_cpu(agfl->agfl_bno[be32_to_cpu(agf->agf_flfirst)]); 1966 be32_add_cpu(&agf->agf_flfirst, 1); 1967 xfs_trans_brelse(tp, agflbp); 1968 if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp)) 1969 agf->agf_flfirst = 0; 1970 1971 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); 1972 be32_add_cpu(&agf->agf_flcount, -1); 1973 xfs_trans_agflist_delta(tp, -1); 1974 pag->pagf_flcount--; 1975 xfs_perag_put(pag); 1976 1977 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT; 1978 if (btreeblk) { 1979 be32_add_cpu(&agf->agf_btreeblks, 1); 1980 pag->pagf_btreeblks++; 1981 logflags |= XFS_AGF_BTREEBLKS; 1982 } 1983 1984 xfs_alloc_log_agf(tp, agbp, logflags); 1985 *bnop = bno; 1986 1987 /* 1988 * As blocks are freed, they are added to the per-ag busy list and 1989 * remain there until the freeing transaction is committed to disk. 1990 * Now that we have allocated blocks, this list must be searched to see 1991 * if a block is being reused. If one is, then the freeing transaction 1992 * must be pushed to disk before this transaction. 1993 * 1994 * We do this by setting the current transaction to a sync transaction 1995 * which guarantees that the freeing transaction is on disk before this 1996 * transaction. This is done instead of a synchronous log force here so 1997 * that we don't sit and wait with the AGF locked in the transaction 1998 * during the log force. 1999 */ 2000 if (xfs_alloc_busy_search(mp, be32_to_cpu(agf->agf_seqno), bno, 1)) 2001 xfs_trans_set_sync(tp); 2002 return 0; 2003} 2004 2005/* 2006 * Log the given fields from the agf structure. 2007 */ 2008void 2009xfs_alloc_log_agf( 2010 xfs_trans_t *tp, /* transaction pointer */ 2011 xfs_buf_t *bp, /* buffer for a.g. freelist header */ 2012 int fields) /* mask of fields to be logged (XFS_AGF_...) */ 2013{ 2014 int first; /* first byte offset */ 2015 int last; /* last byte offset */ 2016 static const short offsets[] = { 2017 offsetof(xfs_agf_t, agf_magicnum), 2018 offsetof(xfs_agf_t, agf_versionnum), 2019 offsetof(xfs_agf_t, agf_seqno), 2020 offsetof(xfs_agf_t, agf_length), 2021 offsetof(xfs_agf_t, agf_roots[0]), 2022 offsetof(xfs_agf_t, agf_levels[0]), 2023 offsetof(xfs_agf_t, agf_flfirst), 2024 offsetof(xfs_agf_t, agf_fllast), 2025 offsetof(xfs_agf_t, agf_flcount), 2026 offsetof(xfs_agf_t, agf_freeblks), 2027 offsetof(xfs_agf_t, agf_longest), 2028 offsetof(xfs_agf_t, agf_btreeblks), 2029 sizeof(xfs_agf_t) 2030 }; 2031 2032 trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_); 2033 2034 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last); 2035 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last); 2036} 2037 2038/* 2039 * Interface for inode allocation to force the pag data to be initialized. 2040 */ 2041int /* error */ 2042xfs_alloc_pagf_init( 2043 xfs_mount_t *mp, /* file system mount structure */ 2044 xfs_trans_t *tp, /* transaction pointer */ 2045 xfs_agnumber_t agno, /* allocation group number */ 2046 int flags) /* XFS_ALLOC_FLAGS_... */ 2047{ 2048 xfs_buf_t *bp; 2049 int error; 2050 2051 if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp))) 2052 return error; 2053 if (bp) 2054 xfs_trans_brelse(tp, bp); 2055 return 0; 2056} 2057 2058/* 2059 * Put the block on the freelist for the allocation group. 2060 */ 2061int /* error */ 2062xfs_alloc_put_freelist( 2063 xfs_trans_t *tp, /* transaction pointer */ 2064 xfs_buf_t *agbp, /* buffer for a.g. freelist header */ 2065 xfs_buf_t *agflbp,/* buffer for a.g. free block array */ 2066 xfs_agblock_t bno, /* block being freed */ 2067 int btreeblk) /* block came from a AGF btree */ 2068{ 2069 xfs_agf_t *agf; /* a.g. freespace structure */ 2070 xfs_agfl_t *agfl; /* a.g. free block array */ 2071 __be32 *blockp;/* pointer to array entry */ 2072 int error; 2073 int logflags; 2074 xfs_mount_t *mp; /* mount structure */ 2075 xfs_perag_t *pag; /* per allocation group data */ 2076 2077 agf = XFS_BUF_TO_AGF(agbp); 2078 mp = tp->t_mountp; 2079 2080 if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp, 2081 be32_to_cpu(agf->agf_seqno), &agflbp))) 2082 return error; 2083 agfl = XFS_BUF_TO_AGFL(agflbp); 2084 be32_add_cpu(&agf->agf_fllast, 1); 2085 if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp)) 2086 agf->agf_fllast = 0; 2087 2088 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); 2089 be32_add_cpu(&agf->agf_flcount, 1); 2090 xfs_trans_agflist_delta(tp, 1); 2091 pag->pagf_flcount++; 2092 2093 logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT; 2094 if (btreeblk) { 2095 be32_add_cpu(&agf->agf_btreeblks, -1); 2096 pag->pagf_btreeblks--; 2097 logflags |= XFS_AGF_BTREEBLKS; 2098 } 2099 xfs_perag_put(pag); 2100 2101 xfs_alloc_log_agf(tp, agbp, logflags); 2102 2103 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)); 2104 blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)]; 2105 *blockp = cpu_to_be32(bno); 2106 xfs_alloc_log_agf(tp, agbp, logflags); 2107 xfs_trans_log_buf(tp, agflbp, 2108 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl), 2109 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl + 2110 sizeof(xfs_agblock_t) - 1)); 2111 return 0; 2112} 2113 2114/* 2115 * Read in the allocation group header (free/alloc section). 2116 */ 2117int /* error */ 2118xfs_read_agf( 2119 struct xfs_mount *mp, /* mount point structure */ 2120 struct xfs_trans *tp, /* transaction pointer */ 2121 xfs_agnumber_t agno, /* allocation group number */ 2122 int flags, /* XFS_BUF_ */ 2123 struct xfs_buf **bpp) /* buffer for the ag freelist header */ 2124{ 2125 struct xfs_agf *agf; /* ag freelist header */ 2126 int agf_ok; /* set if agf is consistent */ 2127 int error; 2128 2129 ASSERT(agno != NULLAGNUMBER); 2130 error = xfs_trans_read_buf( 2131 mp, tp, mp->m_ddev_targp, 2132 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), 2133 XFS_FSS_TO_BB(mp, 1), flags, bpp); 2134 if (error) 2135 return error; 2136 if (!*bpp) 2137 return 0; 2138 2139 ASSERT(!XFS_BUF_GETERROR(*bpp)); 2140 agf = XFS_BUF_TO_AGF(*bpp); 2141 2142 /* 2143 * Validate the magic number of the agf block. 2144 */ 2145 agf_ok = 2146 be32_to_cpu(agf->agf_magicnum) == XFS_AGF_MAGIC && 2147 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && 2148 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && 2149 be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && 2150 be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && 2151 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp) && 2152 be32_to_cpu(agf->agf_seqno) == agno; 2153 if (xfs_sb_version_haslazysbcount(&mp->m_sb)) 2154 agf_ok = agf_ok && be32_to_cpu(agf->agf_btreeblks) <= 2155 be32_to_cpu(agf->agf_length); 2156 if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF, 2157 XFS_RANDOM_ALLOC_READ_AGF))) { 2158 XFS_CORRUPTION_ERROR("xfs_alloc_read_agf", 2159 XFS_ERRLEVEL_LOW, mp, agf); 2160 xfs_trans_brelse(tp, *bpp); 2161 return XFS_ERROR(EFSCORRUPTED); 2162 } 2163 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGF, XFS_AGF_REF); 2164 return 0; 2165} 2166 2167/* 2168 * Read in the allocation group header (free/alloc section). 2169 */ 2170int /* error */ 2171xfs_alloc_read_agf( 2172 struct xfs_mount *mp, /* mount point structure */ 2173 struct xfs_trans *tp, /* transaction pointer */ 2174 xfs_agnumber_t agno, /* allocation group number */ 2175 int flags, /* XFS_ALLOC_FLAG_... */ 2176 struct xfs_buf **bpp) /* buffer for the ag freelist header */ 2177{ 2178 struct xfs_agf *agf; /* ag freelist header */ 2179 struct xfs_perag *pag; /* per allocation group data */ 2180 int error; 2181 2182 ASSERT(agno != NULLAGNUMBER); 2183 2184 error = xfs_read_agf(mp, tp, agno, 2185 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0, 2186 bpp); 2187 if (error) 2188 return error; 2189 if (!*bpp) 2190 return 0; 2191 ASSERT(!XFS_BUF_GETERROR(*bpp)); 2192 2193 agf = XFS_BUF_TO_AGF(*bpp); 2194 pag = xfs_perag_get(mp, agno); 2195 if (!pag->pagf_init) { 2196 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); 2197 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks); 2198 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount); 2199 pag->pagf_longest = be32_to_cpu(agf->agf_longest); 2200 pag->pagf_levels[XFS_BTNUM_BNOi] = 2201 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]); 2202 pag->pagf_levels[XFS_BTNUM_CNTi] = 2203 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); 2204 spin_lock_init(&pag->pagb_lock); 2205 pag->pagb_count = 0; 2206 pag->pagb_tree = RB_ROOT; 2207 pag->pagf_init = 1; 2208 } 2209#ifdef DEBUG 2210 else if (!XFS_FORCED_SHUTDOWN(mp)) { 2211 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks)); 2212 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks)); 2213 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount)); 2214 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest)); 2215 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] == 2216 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi])); 2217 ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] == 2218 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi])); 2219 } 2220#endif 2221 xfs_perag_put(pag); 2222 return 0; 2223} 2224 2225/* 2226 * Allocate an extent (variable-size). 2227 * Depending on the allocation type, we either look in a single allocation 2228 * group or loop over the allocation groups to find the result. 2229 */ 2230int /* error */ 2231xfs_alloc_vextent( 2232 xfs_alloc_arg_t *args) /* allocation argument structure */ 2233{ 2234 xfs_agblock_t agsize; /* allocation group size */ 2235 int error; 2236 int flags; /* XFS_ALLOC_FLAG_... locking flags */ 2237 xfs_extlen_t minleft;/* minimum left value, temp copy */ 2238 xfs_mount_t *mp; /* mount structure pointer */ 2239 xfs_agnumber_t sagno; /* starting allocation group number */ 2240 xfs_alloctype_t type; /* input allocation type */ 2241 int bump_rotor = 0; 2242 int no_min = 0; 2243 xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */ 2244 2245 mp = args->mp; 2246 type = args->otype = args->type; 2247 args->agbno = NULLAGBLOCK; 2248 /* 2249 * Just fix this up, for the case where the last a.g. is shorter 2250 * (or there's only one a.g.) and the caller couldn't easily figure 2251 * that out (xfs_bmap_alloc). 2252 */ 2253 agsize = mp->m_sb.sb_agblocks; 2254 if (args->maxlen > agsize) 2255 args->maxlen = agsize; 2256 if (args->alignment == 0) 2257 args->alignment = 1; 2258 ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount); 2259 ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize); 2260 ASSERT(args->minlen <= args->maxlen); 2261 ASSERT(args->minlen <= agsize); 2262 ASSERT(args->mod < args->prod); 2263 if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount || 2264 XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize || 2265 args->minlen > args->maxlen || args->minlen > agsize || 2266 args->mod >= args->prod) { 2267 args->fsbno = NULLFSBLOCK; 2268 trace_xfs_alloc_vextent_badargs(args); 2269 return 0; 2270 } 2271 minleft = args->minleft; 2272 2273 switch (type) { 2274 case XFS_ALLOCTYPE_THIS_AG: 2275 case XFS_ALLOCTYPE_NEAR_BNO: 2276 case XFS_ALLOCTYPE_THIS_BNO: 2277 /* 2278 * These three force us into a single a.g. 2279 */ 2280 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); 2281 args->pag = xfs_perag_get(mp, args->agno); 2282 args->minleft = 0; 2283 error = xfs_alloc_fix_freelist(args, 0); 2284 args->minleft = minleft; 2285 if (error) { 2286 trace_xfs_alloc_vextent_nofix(args); 2287 goto error0; 2288 } 2289 if (!args->agbp) { 2290 trace_xfs_alloc_vextent_noagbp(args); 2291 break; 2292 } 2293 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); 2294 if ((error = xfs_alloc_ag_vextent(args))) 2295 goto error0; 2296 break; 2297 case XFS_ALLOCTYPE_START_BNO: 2298 /* 2299 * Try near allocation first, then anywhere-in-ag after 2300 * the first a.g. fails. 2301 */ 2302 if ((args->userdata == XFS_ALLOC_INITIAL_USER_DATA) && 2303 (mp->m_flags & XFS_MOUNT_32BITINODES)) { 2304 args->fsbno = XFS_AGB_TO_FSB(mp, 2305 ((mp->m_agfrotor / rotorstep) % 2306 mp->m_sb.sb_agcount), 0); 2307 bump_rotor = 1; 2308 } 2309 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); 2310 args->type = XFS_ALLOCTYPE_NEAR_BNO; 2311 /* FALLTHROUGH */ 2312 case XFS_ALLOCTYPE_ANY_AG: 2313 case XFS_ALLOCTYPE_START_AG: 2314 case XFS_ALLOCTYPE_FIRST_AG: 2315 /* 2316 * Rotate through the allocation groups looking for a winner. 2317 */ 2318 if (type == XFS_ALLOCTYPE_ANY_AG) { 2319 /* 2320 * Start with the last place we left off. 2321 */ 2322 args->agno = sagno = (mp->m_agfrotor / rotorstep) % 2323 mp->m_sb.sb_agcount; 2324 args->type = XFS_ALLOCTYPE_THIS_AG; 2325 flags = XFS_ALLOC_FLAG_TRYLOCK; 2326 } else if (type == XFS_ALLOCTYPE_FIRST_AG) { 2327 /* 2328 * Start with allocation group given by bno. 2329 */ 2330 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); 2331 args->type = XFS_ALLOCTYPE_THIS_AG; 2332 sagno = 0; 2333 flags = 0; 2334 } else { 2335 if (type == XFS_ALLOCTYPE_START_AG) 2336 args->type = XFS_ALLOCTYPE_THIS_AG; 2337 /* 2338 * Start with the given allocation group. 2339 */ 2340 args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno); 2341 flags = XFS_ALLOC_FLAG_TRYLOCK; 2342 } 2343 /* 2344 * Loop over allocation groups twice; first time with 2345 * trylock set, second time without. 2346 */ 2347 for (;;) { 2348 args->pag = xfs_perag_get(mp, args->agno); 2349 if (no_min) args->minleft = 0; 2350 error = xfs_alloc_fix_freelist(args, flags); 2351 args->minleft = minleft; 2352 if (error) { 2353 trace_xfs_alloc_vextent_nofix(args); 2354 goto error0; 2355 } 2356 /* 2357 * If we get a buffer back then the allocation will fly. 2358 */ 2359 if (args->agbp) { 2360 if ((error = xfs_alloc_ag_vextent(args))) 2361 goto error0; 2362 break; 2363 } 2364 2365 trace_xfs_alloc_vextent_loopfailed(args); 2366 2367 /* 2368 * Didn't work, figure out the next iteration. 2369 */ 2370 if (args->agno == sagno && 2371 type == XFS_ALLOCTYPE_START_BNO) 2372 args->type = XFS_ALLOCTYPE_THIS_AG; 2373 /* 2374 * For the first allocation, we can try any AG to get 2375 * space. However, if we already have allocated a 2376 * block, we don't want to try AGs whose number is below 2377 * sagno. Otherwise, we may end up with out-of-order 2378 * locking of AGF, which might cause deadlock. 2379 */ 2380 if (++(args->agno) == mp->m_sb.sb_agcount) { 2381 if (args->firstblock != NULLFSBLOCK) 2382 args->agno = sagno; 2383 else 2384 args->agno = 0; 2385 } 2386 /* 2387 * Reached the starting a.g., must either be done 2388 * or switch to non-trylock mode. 2389 */ 2390 if (args->agno == sagno) { 2391 if (no_min == 1) { 2392 args->agbno = NULLAGBLOCK; 2393 trace_xfs_alloc_vextent_allfailed(args); 2394 break; 2395 } 2396 if (flags == 0) { 2397 no_min = 1; 2398 } else { 2399 flags = 0; 2400 if (type == XFS_ALLOCTYPE_START_BNO) { 2401 args->agbno = XFS_FSB_TO_AGBNO(mp, 2402 args->fsbno); 2403 args->type = XFS_ALLOCTYPE_NEAR_BNO; 2404 } 2405 } 2406 } 2407 xfs_perag_put(args->pag); 2408 } 2409 if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) { 2410 if (args->agno == sagno) 2411 mp->m_agfrotor = (mp->m_agfrotor + 1) % 2412 (mp->m_sb.sb_agcount * rotorstep); 2413 else 2414 mp->m_agfrotor = (args->agno * rotorstep + 1) % 2415 (mp->m_sb.sb_agcount * rotorstep); 2416 } 2417 break; 2418 default: 2419 ASSERT(0); 2420 /* NOTREACHED */ 2421 } 2422 if (args->agbno == NULLAGBLOCK) 2423 args->fsbno = NULLFSBLOCK; 2424 else { 2425 args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno); 2426#ifdef DEBUG 2427 ASSERT(args->len >= args->minlen); 2428 ASSERT(args->len <= args->maxlen); 2429 ASSERT(args->agbno % args->alignment == 0); 2430 XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), 2431 args->len); 2432#endif 2433 } 2434 xfs_perag_put(args->pag); 2435 return 0; 2436error0: 2437 xfs_perag_put(args->pag); 2438 return error; 2439} 2440 2441/* 2442 * Free an extent. 2443 * Just break up the extent address and hand off to xfs_free_ag_extent 2444 * after fixing up the freelist. 2445 */ 2446int /* error */ 2447xfs_free_extent( 2448 xfs_trans_t *tp, /* transaction pointer */ 2449 xfs_fsblock_t bno, /* starting block number of extent */ 2450 xfs_extlen_t len) /* length of extent */ 2451{ 2452 xfs_alloc_arg_t args; 2453 int error; 2454 2455 ASSERT(len != 0); 2456 memset(&args, 0, sizeof(xfs_alloc_arg_t)); 2457 args.tp = tp; 2458 args.mp = tp->t_mountp; 2459 args.agno = XFS_FSB_TO_AGNO(args.mp, bno); 2460 ASSERT(args.agno < args.mp->m_sb.sb_agcount); 2461 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); 2462 args.pag = xfs_perag_get(args.mp, args.agno); 2463 if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) 2464 goto error0; 2465#ifdef DEBUG 2466 ASSERT(args.agbp != NULL); 2467 ASSERT((args.agbno + len) <= 2468 be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)); 2469#endif 2470 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); 2471error0: 2472 xfs_perag_put(args.pag); 2473 return error; 2474} 2475 2476 2477/* 2478 * AG Busy list management 2479 * The busy list contains block ranges that have been freed but whose 2480 * transactions have not yet hit disk. If any block listed in a busy 2481 * list is reused, the transaction that freed it must be forced to disk 2482 * before continuing to use the block. 2483 * 2484 * xfs_alloc_busy_insert - add to the per-ag busy list 2485 * xfs_alloc_busy_clear - remove an item from the per-ag busy list 2486 * xfs_alloc_busy_search - search for a busy extent 2487 */ 2488 2489/* 2490 * Insert a new extent into the busy tree. 2491 * 2492 * The busy extent tree is indexed by the start block of the busy extent. 2493 * there can be multiple overlapping ranges in the busy extent tree but only 2494 * ever one entry at a given start block. The reason for this is that 2495 * multi-block extents can be freed, then smaller chunks of that extent 2496 * allocated and freed again before the first transaction commit is on disk. 2497 * If the exact same start block is freed a second time, we have to wait for 2498 * that busy extent to pass out of the tree before the new extent is inserted. 2499 * There are two main cases we have to handle here. 2500 * 2501 * The first case is a transaction that triggers a "free - allocate - free" 2502 * cycle. This can occur during btree manipulations as a btree block is freed 2503 * to the freelist, then allocated from the free list, then freed again. In 2504 * this case, the second extxpnet free is what triggers the duplicate and as 2505 * such the transaction IDs should match. Because the extent was allocated in 2506 * this transaction, the transaction must be marked as synchronous. This is 2507 * true for all cases where the free/alloc/free occurs in the one transaction, 2508 * hence the addition of the ASSERT(tp->t_flags & XFS_TRANS_SYNC) to this case. 2509 * This serves to catch violations of the second case quite effectively. 2510 * 2511 * The second case is where the free/alloc/free occur in different 2512 * transactions. In this case, the thread freeing the extent the second time 2513 * can't mark the extent busy immediately because it is already tracked in a 2514 * transaction that may be committing. When the log commit for the existing 2515 * busy extent completes, the busy extent will be removed from the tree. If we 2516 * allow the second busy insert to continue using that busy extent structure, 2517 * it can be freed before this transaction is safely in the log. Hence our 2518 * only option in this case is to force the log to remove the existing busy 2519 * extent from the list before we insert the new one with the current 2520 * transaction ID. 2521 * 2522 * The problem we are trying to avoid in the free-alloc-free in separate 2523 * transactions is most easily described with a timeline: 2524 * 2525 * Thread 1 Thread 2 Thread 3 xfslogd 2526 * xact alloc 2527 * free X 2528 * mark busy 2529 * commit xact 2530 * free xact 2531 * xact alloc 2532 * alloc X 2533 * busy search 2534 * mark xact sync 2535 * commit xact 2536 * free xact 2537 * force log 2538 * checkpoint starts 2539 * .... 2540 * xact alloc 2541 * free X 2542 * mark busy 2543 * finds match 2544 * *** KABOOM! *** 2545 * .... 2546 * log IO completes 2547 * unbusy X 2548 * checkpoint completes 2549 * 2550 * By issuing a log force in thread 3 @ "KABOOM", the thread will block until 2551 * the checkpoint completes, and the busy extent it matched will have been 2552 * removed from the tree when it is woken. Hence it can then continue safely. 2553 * 2554 * However, to ensure this matching process is robust, we need to use the 2555 * transaction ID for identifying transaction, as delayed logging results in 2556 * the busy extent and transaction lifecycles being different. i.e. the busy 2557 * extent is active for a lot longer than the transaction. Hence the 2558 * transaction structure can be freed and reallocated, then mark the same 2559 * extent busy again in the new transaction. In this case the new transaction 2560 * will have a different tid but can have the same address, and hence we need 2561 * to check against the tid. 2562 * 2563 * Future: for delayed logging, we could avoid the log force if the extent was 2564 * first freed in the current checkpoint sequence. This, however, requires the 2565 * ability to pin the current checkpoint in memory until this transaction 2566 * commits to ensure that both the original free and the current one combine 2567 * logically into the one checkpoint. If the checkpoint sequences are 2568 * different, however, we still need to wait on a log force. 2569 */ 2570void 2571xfs_alloc_busy_insert( 2572 struct xfs_trans *tp, 2573 xfs_agnumber_t agno, 2574 xfs_agblock_t bno, 2575 xfs_extlen_t len) 2576{ 2577 struct xfs_busy_extent *new; 2578 struct xfs_busy_extent *busyp; 2579 struct xfs_perag *pag; 2580 struct rb_node **rbp; 2581 struct rb_node *parent; 2582 int match; 2583 2584 2585 new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL); 2586 if (!new) { 2587 /* 2588 * No Memory! Since it is now not possible to track the free 2589 * block, make this a synchronous transaction to insure that 2590 * the block is not reused before this transaction commits. 2591 */ 2592 trace_xfs_alloc_busy(tp, agno, bno, len, 1); 2593 xfs_trans_set_sync(tp); 2594 return; 2595 } 2596 2597 new->agno = agno; 2598 new->bno = bno; 2599 new->length = len; 2600 new->tid = xfs_log_get_trans_ident(tp); 2601 2602 INIT_LIST_HEAD(&new->list); 2603 2604 /* trace before insert to be able to see failed inserts */ 2605 trace_xfs_alloc_busy(tp, agno, bno, len, 0); 2606 2607 pag = xfs_perag_get(tp->t_mountp, new->agno); 2608restart: 2609 spin_lock(&pag->pagb_lock); 2610 rbp = &pag->pagb_tree.rb_node; 2611 parent = NULL; 2612 busyp = NULL; 2613 match = 0; 2614 while (*rbp && match >= 0) { 2615 parent = *rbp; 2616 busyp = rb_entry(parent, struct xfs_busy_extent, rb_node); 2617 2618 if (new->bno < busyp->bno) { 2619 /* may overlap, but exact start block is lower */ 2620 rbp = &(*rbp)->rb_left; 2621 if (new->bno + new->length > busyp->bno) 2622 match = busyp->tid == new->tid ? 1 : -1; 2623 } else if (new->bno > busyp->bno) { 2624 /* may overlap, but exact start block is higher */ 2625 rbp = &(*rbp)->rb_right; 2626 if (bno < busyp->bno + busyp->length) 2627 match = busyp->tid == new->tid ? 1 : -1; 2628 } else { 2629 match = busyp->tid == new->tid ? 1 : -1; 2630 break; 2631 } 2632 } 2633 if (match < 0) { 2634 /* overlap marked busy in different transaction */ 2635 spin_unlock(&pag->pagb_lock); 2636 xfs_log_force(tp->t_mountp, XFS_LOG_SYNC); 2637 goto restart; 2638 } 2639 if (match > 0) { 2640 /* 2641 * overlap marked busy in same transaction. Update if exact 2642 * start block match, otherwise combine the busy extents into 2643 * a single range. 2644 */ 2645 if (busyp->bno == new->bno) { 2646 busyp->length = max(busyp->length, new->length); 2647 spin_unlock(&pag->pagb_lock); 2648 ASSERT(tp->t_flags & XFS_TRANS_SYNC); 2649 xfs_perag_put(pag); 2650 kmem_free(new); 2651 return; 2652 } 2653 rb_erase(&busyp->rb_node, &pag->pagb_tree); 2654 new->length = max(busyp->bno + busyp->length, 2655 new->bno + new->length) - 2656 min(busyp->bno, new->bno); 2657 new->bno = min(busyp->bno, new->bno); 2658 } else 2659 busyp = NULL; 2660 2661 rb_link_node(&new->rb_node, parent, rbp); 2662 rb_insert_color(&new->rb_node, &pag->pagb_tree); 2663 2664 list_add(&new->list, &tp->t_busy); 2665 spin_unlock(&pag->pagb_lock); 2666 xfs_perag_put(pag); 2667 kmem_free(busyp); 2668} 2669 2670/* 2671 * Search for a busy extent within the range of the extent we are about to 2672 * allocate. You need to be holding the busy extent tree lock when calling 2673 * xfs_alloc_busy_search(). This function returns 0 for no overlapping busy 2674 * extent, -1 for an overlapping but not exact busy extent, and 1 for an exact 2675 * match. This is done so that a non-zero return indicates an overlap that 2676 * will require a synchronous transaction, but it can still be 2677 * used to distinguish between a partial or exact match. 2678 */ 2679static int 2680xfs_alloc_busy_search( 2681 struct xfs_mount *mp, 2682 xfs_agnumber_t agno, 2683 xfs_agblock_t bno, 2684 xfs_extlen_t len) 2685{ 2686 struct xfs_perag *pag; 2687 struct rb_node *rbp; 2688 struct xfs_busy_extent *busyp; 2689 int match = 0; 2690 2691 pag = xfs_perag_get(mp, agno); 2692 spin_lock(&pag->pagb_lock); 2693 2694 rbp = pag->pagb_tree.rb_node; 2695 2696 /* find closest start bno overlap */ 2697 while (rbp) { 2698 busyp = rb_entry(rbp, struct xfs_busy_extent, rb_node); 2699 if (bno < busyp->bno) { 2700 /* may overlap, but exact start block is lower */ 2701 if (bno + len > busyp->bno) 2702 match = -1; 2703 rbp = rbp->rb_left; 2704 } else if (bno > busyp->bno) { 2705 /* may overlap, but exact start block is higher */ 2706 if (bno < busyp->bno + busyp->length) 2707 match = -1; 2708 rbp = rbp->rb_right; 2709 } else { 2710 /* bno matches busyp, length determines exact match */ 2711 match = (busyp->length == len) ? 1 : -1; 2712 break; 2713 } 2714 } 2715 spin_unlock(&pag->pagb_lock); 2716 trace_xfs_alloc_busysearch(mp, agno, bno, len, !!match); 2717 xfs_perag_put(pag); 2718 return match; 2719} 2720 2721void 2722xfs_alloc_busy_clear( 2723 struct xfs_mount *mp, 2724 struct xfs_busy_extent *busyp) 2725{ 2726 struct xfs_perag *pag; 2727 2728 trace_xfs_alloc_unbusy(mp, busyp->agno, busyp->bno, 2729 busyp->length); 2730 2731 ASSERT(xfs_alloc_busy_search(mp, busyp->agno, busyp->bno, 2732 busyp->length) == 1); 2733 2734 list_del_init(&busyp->list); 2735 2736 pag = xfs_perag_get(mp, busyp->agno); 2737 spin_lock(&pag->pagb_lock); 2738 rb_erase(&busyp->rb_node, &pag->pagb_tree); 2739 spin_unlock(&pag->pagb_lock); 2740 xfs_perag_put(pag); 2741 2742 kmem_free(busyp); 2743} 2744