1/* 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18#include "xfs.h" 19#include "xfs_fs.h" 20#include "xfs_types.h" 21#include "xfs_bit.h" 22#include "xfs_log.h" 23#include "xfs_inum.h" 24#include "xfs_trans.h" 25#include "xfs_sb.h" 26#include "xfs_ag.h" 27#include "xfs_dir2.h" 28#include "xfs_dmapi.h" 29#include "xfs_mount.h" 30#include "xfs_bmap_btree.h" 31#include "xfs_alloc_btree.h" 32#include "xfs_ialloc_btree.h" 33#include "xfs_dir2_sf.h" 34#include "xfs_attr_sf.h" 35#include "xfs_dinode.h" 36#include "xfs_inode.h" 37#include "xfs_btree.h" 38#include "xfs_ialloc.h" 39#include "xfs_alloc.h" 40#include "xfs_error.h" 41 42 43#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b))) 44 45#define XFSA_FIXUP_BNO_OK 1 46#define XFSA_FIXUP_CNT_OK 2 47 48STATIC int 49xfs_alloc_search_busy(xfs_trans_t *tp, 50 xfs_agnumber_t agno, 51 xfs_agblock_t bno, 52 xfs_extlen_t len); 53 54#if defined(XFS_ALLOC_TRACE) 55ktrace_t *xfs_alloc_trace_buf; 56 57#define TRACE_ALLOC(s,a) \ 58 xfs_alloc_trace_alloc(fname, s, a, __LINE__) 59#define TRACE_FREE(s,a,b,x,f) \ 60 xfs_alloc_trace_free(fname, s, mp, a, b, x, f, __LINE__) 61#define TRACE_MODAGF(s,a,f) \ 62 xfs_alloc_trace_modagf(fname, s, mp, a, f, __LINE__) 63#define TRACE_BUSY(fname,s,ag,agb,l,sl,tp) \ 64 xfs_alloc_trace_busy(fname, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__) 65#define TRACE_UNBUSY(fname,s,ag,sl,tp) \ 66 xfs_alloc_trace_busy(fname, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__) 67#define TRACE_BUSYSEARCH(fname,s,ag,agb,l,sl,tp) \ 68 xfs_alloc_trace_busy(fname, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__) 69#else 70#define TRACE_ALLOC(s,a) 71#define TRACE_FREE(s,a,b,x,f) 72#define TRACE_MODAGF(s,a,f) 73#define TRACE_BUSY(s,a,ag,agb,l,sl,tp) 74#define TRACE_UNBUSY(fname,s,ag,sl,tp) 75#define TRACE_BUSYSEARCH(fname,s,ag,agb,l,sl,tp) 76#endif /* XFS_ALLOC_TRACE */ 77 78/* 79 * Prototypes for per-ag allocation routines 80 */ 81 82STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); 83STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); 84STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); 85STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, 86 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); 87 88/* 89 * Internal functions. 90 */ 91 92/* 93 * Compute aligned version of the found extent. 94 * Takes alignment and min length into account. 95 */ 96STATIC int /* success (>= minlen) */ 97xfs_alloc_compute_aligned( 98 xfs_agblock_t foundbno, /* starting block in found extent */ 99 xfs_extlen_t foundlen, /* length in found extent */ 100 xfs_extlen_t alignment, /* alignment for allocation */ 101 xfs_extlen_t minlen, /* minimum length for allocation */ 102 xfs_agblock_t *resbno, /* result block number */ 103 xfs_extlen_t *reslen) /* result length */ 104{ 105 xfs_agblock_t bno; 106 xfs_extlen_t diff; 107 xfs_extlen_t len; 108 109 if (alignment > 1 && foundlen >= minlen) { 110 bno = roundup(foundbno, alignment); 111 diff = bno - foundbno; 112 len = diff >= foundlen ? 0 : foundlen - diff; 113 } else { 114 bno = foundbno; 115 len = foundlen; 116 } 117 *resbno = bno; 118 *reslen = len; 119 return len >= minlen; 120} 121 122/* 123 * Compute best start block and diff for "near" allocations. 124 * freelen >= wantlen already checked by caller. 125 */ 126STATIC xfs_extlen_t /* difference value (absolute) */ 127xfs_alloc_compute_diff( 128 xfs_agblock_t wantbno, /* target starting block */ 129 xfs_extlen_t wantlen, /* target length */ 130 xfs_extlen_t alignment, /* target alignment */ 131 xfs_agblock_t freebno, /* freespace's starting block */ 132 xfs_extlen_t freelen, /* freespace's length */ 133 xfs_agblock_t *newbnop) /* result: best start block from free */ 134{ 135 xfs_agblock_t freeend; /* end of freespace extent */ 136 xfs_agblock_t newbno1; /* return block number */ 137 xfs_agblock_t newbno2; /* other new block number */ 138 xfs_extlen_t newlen1=0; /* length with newbno1 */ 139 xfs_extlen_t newlen2=0; /* length with newbno2 */ 140 xfs_agblock_t wantend; /* end of target extent */ 141 142 ASSERT(freelen >= wantlen); 143 freeend = freebno + freelen; 144 wantend = wantbno + wantlen; 145 if (freebno >= wantbno) { 146 if ((newbno1 = roundup(freebno, alignment)) >= freeend) 147 newbno1 = NULLAGBLOCK; 148 } else if (freeend >= wantend && alignment > 1) { 149 newbno1 = roundup(wantbno, alignment); 150 newbno2 = newbno1 - alignment; 151 if (newbno1 >= freeend) 152 newbno1 = NULLAGBLOCK; 153 else 154 newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1); 155 if (newbno2 < freebno) 156 newbno2 = NULLAGBLOCK; 157 else 158 newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2); 159 if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) { 160 if (newlen1 < newlen2 || 161 (newlen1 == newlen2 && 162 XFS_ABSDIFF(newbno1, wantbno) > 163 XFS_ABSDIFF(newbno2, wantbno))) 164 newbno1 = newbno2; 165 } else if (newbno2 != NULLAGBLOCK) 166 newbno1 = newbno2; 167 } else if (freeend >= wantend) { 168 newbno1 = wantbno; 169 } else if (alignment > 1) { 170 newbno1 = roundup(freeend - wantlen, alignment); 171 if (newbno1 > freeend - wantlen && 172 newbno1 - alignment >= freebno) 173 newbno1 -= alignment; 174 else if (newbno1 >= freeend) 175 newbno1 = NULLAGBLOCK; 176 } else 177 newbno1 = freeend - wantlen; 178 *newbnop = newbno1; 179 return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno); 180} 181 182/* 183 * Fix up the length, based on mod and prod. 184 * len should be k * prod + mod for some k. 185 * If len is too small it is returned unchanged. 186 * If len hits maxlen it is left alone. 187 */ 188STATIC void 189xfs_alloc_fix_len( 190 xfs_alloc_arg_t *args) /* allocation argument structure */ 191{ 192 xfs_extlen_t k; 193 xfs_extlen_t rlen; 194 195 ASSERT(args->mod < args->prod); 196 rlen = args->len; 197 ASSERT(rlen >= args->minlen); 198 ASSERT(rlen <= args->maxlen); 199 if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen || 200 (args->mod == 0 && rlen < args->prod)) 201 return; 202 k = rlen % args->prod; 203 if (k == args->mod) 204 return; 205 if (k > args->mod) { 206 if ((int)(rlen = rlen - k - args->mod) < (int)args->minlen) 207 return; 208 } else { 209 if ((int)(rlen = rlen - args->prod - (args->mod - k)) < 210 (int)args->minlen) 211 return; 212 } 213 ASSERT(rlen >= args->minlen); 214 ASSERT(rlen <= args->maxlen); 215 args->len = rlen; 216} 217 218/* 219 * Fix up length if there is too little space left in the a.g. 220 * Return 1 if ok, 0 if too little, should give up. 221 */ 222STATIC int 223xfs_alloc_fix_minleft( 224 xfs_alloc_arg_t *args) /* allocation argument structure */ 225{ 226 xfs_agf_t *agf; /* a.g. freelist header */ 227 int diff; /* free space difference */ 228 229 if (args->minleft == 0) 230 return 1; 231 agf = XFS_BUF_TO_AGF(args->agbp); 232 diff = be32_to_cpu(agf->agf_freeblks) 233 + be32_to_cpu(agf->agf_flcount) 234 - args->len - args->minleft; 235 if (diff >= 0) 236 return 1; 237 args->len += diff; /* shrink the allocated space */ 238 if (args->len >= args->minlen) 239 return 1; 240 args->agbno = NULLAGBLOCK; 241 return 0; 242} 243 244/* 245 * Update the two btrees, logically removing from freespace the extent 246 * starting at rbno, rlen blocks. The extent is contained within the 247 * actual (current) free extent fbno for flen blocks. 248 * Flags are passed in indicating whether the cursors are set to the 249 * relevant records. 250 */ 251STATIC int /* error code */ 252xfs_alloc_fixup_trees( 253 xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */ 254 xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */ 255 xfs_agblock_t fbno, /* starting block of free extent */ 256 xfs_extlen_t flen, /* length of free extent */ 257 xfs_agblock_t rbno, /* starting block of returned extent */ 258 xfs_extlen_t rlen, /* length of returned extent */ 259 int flags) /* flags, XFSA_FIXUP_... */ 260{ 261 int error; /* error code */ 262 int i; /* operation results */ 263 xfs_agblock_t nfbno1; /* first new free startblock */ 264 xfs_agblock_t nfbno2; /* second new free startblock */ 265 xfs_extlen_t nflen1=0; /* first new free length */ 266 xfs_extlen_t nflen2=0; /* second new free length */ 267 268 /* 269 * Look up the record in the by-size tree if necessary. 270 */ 271 if (flags & XFSA_FIXUP_CNT_OK) { 272#ifdef DEBUG 273 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i))) 274 return error; 275 XFS_WANT_CORRUPTED_RETURN( 276 i == 1 && nfbno1 == fbno && nflen1 == flen); 277#endif 278 } else { 279 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i))) 280 return error; 281 XFS_WANT_CORRUPTED_RETURN(i == 1); 282 } 283 /* 284 * Look up the record in the by-block tree if necessary. 285 */ 286 if (flags & XFSA_FIXUP_BNO_OK) { 287#ifdef DEBUG 288 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i))) 289 return error; 290 XFS_WANT_CORRUPTED_RETURN( 291 i == 1 && nfbno1 == fbno && nflen1 == flen); 292#endif 293 } else { 294 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i))) 295 return error; 296 XFS_WANT_CORRUPTED_RETURN(i == 1); 297 } 298#ifdef DEBUG 299 { 300 xfs_alloc_block_t *bnoblock; 301 xfs_alloc_block_t *cntblock; 302 303 if (bno_cur->bc_nlevels == 1 && 304 cnt_cur->bc_nlevels == 1) { 305 bnoblock = XFS_BUF_TO_ALLOC_BLOCK(bno_cur->bc_bufs[0]); 306 cntblock = XFS_BUF_TO_ALLOC_BLOCK(cnt_cur->bc_bufs[0]); 307 XFS_WANT_CORRUPTED_RETURN( 308 be16_to_cpu(bnoblock->bb_numrecs) == 309 be16_to_cpu(cntblock->bb_numrecs)); 310 } 311 } 312#endif 313 /* 314 * Deal with all four cases: the allocated record is contained 315 * within the freespace record, so we can have new freespace 316 * at either (or both) end, or no freespace remaining. 317 */ 318 if (rbno == fbno && rlen == flen) 319 nfbno1 = nfbno2 = NULLAGBLOCK; 320 else if (rbno == fbno) { 321 nfbno1 = rbno + rlen; 322 nflen1 = flen - rlen; 323 nfbno2 = NULLAGBLOCK; 324 } else if (rbno + rlen == fbno + flen) { 325 nfbno1 = fbno; 326 nflen1 = flen - rlen; 327 nfbno2 = NULLAGBLOCK; 328 } else { 329 nfbno1 = fbno; 330 nflen1 = rbno - fbno; 331 nfbno2 = rbno + rlen; 332 nflen2 = (fbno + flen) - nfbno2; 333 } 334 /* 335 * Delete the entry from the by-size btree. 336 */ 337 if ((error = xfs_alloc_delete(cnt_cur, &i))) 338 return error; 339 XFS_WANT_CORRUPTED_RETURN(i == 1); 340 /* 341 * Add new by-size btree entry(s). 342 */ 343 if (nfbno1 != NULLAGBLOCK) { 344 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i))) 345 return error; 346 XFS_WANT_CORRUPTED_RETURN(i == 0); 347 if ((error = xfs_alloc_insert(cnt_cur, &i))) 348 return error; 349 XFS_WANT_CORRUPTED_RETURN(i == 1); 350 } 351 if (nfbno2 != NULLAGBLOCK) { 352 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i))) 353 return error; 354 XFS_WANT_CORRUPTED_RETURN(i == 0); 355 if ((error = xfs_alloc_insert(cnt_cur, &i))) 356 return error; 357 XFS_WANT_CORRUPTED_RETURN(i == 1); 358 } 359 /* 360 * Fix up the by-block btree entry(s). 361 */ 362 if (nfbno1 == NULLAGBLOCK) { 363 /* 364 * No remaining freespace, just delete the by-block tree entry. 365 */ 366 if ((error = xfs_alloc_delete(bno_cur, &i))) 367 return error; 368 XFS_WANT_CORRUPTED_RETURN(i == 1); 369 } else { 370 /* 371 * Update the by-block entry to start later|be shorter. 372 */ 373 if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1))) 374 return error; 375 } 376 if (nfbno2 != NULLAGBLOCK) { 377 /* 378 * 2 resulting free entries, need to add one. 379 */ 380 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i))) 381 return error; 382 XFS_WANT_CORRUPTED_RETURN(i == 0); 383 if ((error = xfs_alloc_insert(bno_cur, &i))) 384 return error; 385 XFS_WANT_CORRUPTED_RETURN(i == 1); 386 } 387 return 0; 388} 389 390/* 391 * Read in the allocation group free block array. 392 */ 393STATIC int /* error */ 394xfs_alloc_read_agfl( 395 xfs_mount_t *mp, /* mount point structure */ 396 xfs_trans_t *tp, /* transaction pointer */ 397 xfs_agnumber_t agno, /* allocation group number */ 398 xfs_buf_t **bpp) /* buffer for the ag free block array */ 399{ 400 xfs_buf_t *bp; /* return value */ 401 int error; 402 403 ASSERT(agno != NULLAGNUMBER); 404 error = xfs_trans_read_buf( 405 mp, tp, mp->m_ddev_targp, 406 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)), 407 XFS_FSS_TO_BB(mp, 1), 0, &bp); 408 if (error) 409 return error; 410 ASSERT(bp); 411 ASSERT(!XFS_BUF_GETERROR(bp)); 412 XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGFL, XFS_AGFL_REF); 413 *bpp = bp; 414 return 0; 415} 416 417#if defined(XFS_ALLOC_TRACE) 418/* 419 * Add an allocation trace entry for an alloc call. 420 */ 421STATIC void 422xfs_alloc_trace_alloc( 423 char *name, /* function tag string */ 424 char *str, /* additional string */ 425 xfs_alloc_arg_t *args, /* allocation argument structure */ 426 int line) /* source line number */ 427{ 428 ktrace_enter(xfs_alloc_trace_buf, 429 (void *)(__psint_t)(XFS_ALLOC_KTRACE_ALLOC | (line << 16)), 430 (void *)name, 431 (void *)str, 432 (void *)args->mp, 433 (void *)(__psunsigned_t)args->agno, 434 (void *)(__psunsigned_t)args->agbno, 435 (void *)(__psunsigned_t)args->minlen, 436 (void *)(__psunsigned_t)args->maxlen, 437 (void *)(__psunsigned_t)args->mod, 438 (void *)(__psunsigned_t)args->prod, 439 (void *)(__psunsigned_t)args->minleft, 440 (void *)(__psunsigned_t)args->total, 441 (void *)(__psunsigned_t)args->alignment, 442 (void *)(__psunsigned_t)args->len, 443 (void *)((((__psint_t)args->type) << 16) | 444 (__psint_t)args->otype), 445 (void *)(__psint_t)((args->wasdel << 3) | 446 (args->wasfromfl << 2) | 447 (args->isfl << 1) | 448 (args->userdata << 0))); 449} 450 451/* 452 * Add an allocation trace entry for a free call. 453 */ 454STATIC void 455xfs_alloc_trace_free( 456 char *name, /* function tag string */ 457 char *str, /* additional string */ 458 xfs_mount_t *mp, /* file system mount point */ 459 xfs_agnumber_t agno, /* allocation group number */ 460 xfs_agblock_t agbno, /* a.g. relative block number */ 461 xfs_extlen_t len, /* length of extent */ 462 int isfl, /* set if is freelist allocation/free */ 463 int line) /* source line number */ 464{ 465 ktrace_enter(xfs_alloc_trace_buf, 466 (void *)(__psint_t)(XFS_ALLOC_KTRACE_FREE | (line << 16)), 467 (void *)name, 468 (void *)str, 469 (void *)mp, 470 (void *)(__psunsigned_t)agno, 471 (void *)(__psunsigned_t)agbno, 472 (void *)(__psunsigned_t)len, 473 (void *)(__psint_t)isfl, 474 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); 475} 476 477/* 478 * Add an allocation trace entry for modifying an agf. 479 */ 480STATIC void 481xfs_alloc_trace_modagf( 482 char *name, /* function tag string */ 483 char *str, /* additional string */ 484 xfs_mount_t *mp, /* file system mount point */ 485 xfs_agf_t *agf, /* new agf value */ 486 int flags, /* logging flags for agf */ 487 int line) /* source line number */ 488{ 489 ktrace_enter(xfs_alloc_trace_buf, 490 (void *)(__psint_t)(XFS_ALLOC_KTRACE_MODAGF | (line << 16)), 491 (void *)name, 492 (void *)str, 493 (void *)mp, 494 (void *)(__psint_t)flags, 495 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_seqno), 496 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_length), 497 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]), 498 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]), 499 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]), 500 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]), 501 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_flfirst), 502 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_fllast), 503 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_flcount), 504 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_freeblks), 505 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_longest)); 506} 507 508STATIC void 509xfs_alloc_trace_busy( 510 char *name, /* function tag string */ 511 char *str, /* additional string */ 512 xfs_mount_t *mp, /* file system mount point */ 513 xfs_agnumber_t agno, /* allocation group number */ 514 xfs_agblock_t agbno, /* a.g. relative block number */ 515 xfs_extlen_t len, /* length of extent */ 516 int slot, /* perag Busy slot */ 517 xfs_trans_t *tp, 518 int trtype, /* type: add, delete, search */ 519 int line) /* source line number */ 520{ 521 ktrace_enter(xfs_alloc_trace_buf, 522 (void *)(__psint_t)(trtype | (line << 16)), 523 (void *)name, 524 (void *)str, 525 (void *)mp, 526 (void *)(__psunsigned_t)agno, 527 (void *)(__psunsigned_t)agbno, 528 (void *)(__psunsigned_t)len, 529 (void *)(__psint_t)slot, 530 (void *)tp, 531 NULL, NULL, NULL, NULL, NULL, NULL, NULL); 532} 533#endif /* XFS_ALLOC_TRACE */ 534 535/* 536 * Allocation group level functions. 537 */ 538 539/* 540 * Allocate a variable extent in the allocation group agno. 541 * Type and bno are used to determine where in the allocation group the 542 * extent will start. 543 * Extent's length (returned in *len) will be between minlen and maxlen, 544 * and of the form k * prod + mod unless there's nothing that large. 545 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. 546 */ 547STATIC int /* error */ 548xfs_alloc_ag_vextent( 549 xfs_alloc_arg_t *args) /* argument structure for allocation */ 550{ 551 int error=0; 552#ifdef XFS_ALLOC_TRACE 553 static char fname[] = "xfs_alloc_ag_vextent"; 554#endif 555 556 ASSERT(args->minlen > 0); 557 ASSERT(args->maxlen > 0); 558 ASSERT(args->minlen <= args->maxlen); 559 ASSERT(args->mod < args->prod); 560 ASSERT(args->alignment > 0); 561 /* 562 * Branch to correct routine based on the type. 563 */ 564 args->wasfromfl = 0; 565 switch (args->type) { 566 case XFS_ALLOCTYPE_THIS_AG: 567 error = xfs_alloc_ag_vextent_size(args); 568 break; 569 case XFS_ALLOCTYPE_NEAR_BNO: 570 error = xfs_alloc_ag_vextent_near(args); 571 break; 572 case XFS_ALLOCTYPE_THIS_BNO: 573 error = xfs_alloc_ag_vextent_exact(args); 574 break; 575 default: 576 ASSERT(0); 577 /* NOTREACHED */ 578 } 579 if (error) 580 return error; 581 /* 582 * If the allocation worked, need to change the agf structure 583 * (and log it), and the superblock. 584 */ 585 if (args->agbno != NULLAGBLOCK) { 586 xfs_agf_t *agf; /* allocation group freelist header */ 587#ifdef XFS_ALLOC_TRACE 588 xfs_mount_t *mp = args->mp; 589#endif 590 long slen = (long)args->len; 591 592 ASSERT(args->len >= args->minlen && args->len <= args->maxlen); 593 ASSERT(!(args->wasfromfl) || !args->isfl); 594 ASSERT(args->agbno % args->alignment == 0); 595 if (!(args->wasfromfl)) { 596 597 agf = XFS_BUF_TO_AGF(args->agbp); 598 be32_add(&agf->agf_freeblks, -(args->len)); 599 xfs_trans_agblocks_delta(args->tp, 600 -((long)(args->len))); 601 args->pag->pagf_freeblks -= args->len; 602 ASSERT(be32_to_cpu(agf->agf_freeblks) <= 603 be32_to_cpu(agf->agf_length)); 604 TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS); 605 xfs_alloc_log_agf(args->tp, args->agbp, 606 XFS_AGF_FREEBLKS); 607 /* search the busylist for these blocks */ 608 xfs_alloc_search_busy(args->tp, args->agno, 609 args->agbno, args->len); 610 } 611 if (!args->isfl) 612 xfs_trans_mod_sb(args->tp, 613 args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS : 614 XFS_TRANS_SB_FDBLOCKS, -slen); 615 XFS_STATS_INC(xs_allocx); 616 XFS_STATS_ADD(xs_allocb, args->len); 617 } 618 return 0; 619} 620 621/* 622 * Allocate a variable extent at exactly agno/bno. 623 * Extent's length (returned in *len) will be between minlen and maxlen, 624 * and of the form k * prod + mod unless there's nothing that large. 625 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it. 626 */ 627STATIC int /* error */ 628xfs_alloc_ag_vextent_exact( 629 xfs_alloc_arg_t *args) /* allocation argument structure */ 630{ 631 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ 632 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ 633 xfs_agblock_t end; /* end of allocated extent */ 634 int error; 635 xfs_agblock_t fbno; /* start block of found extent */ 636 xfs_agblock_t fend; /* end block of found extent */ 637 xfs_extlen_t flen; /* length of found extent */ 638#ifdef XFS_ALLOC_TRACE 639 static char fname[] = "xfs_alloc_ag_vextent_exact"; 640#endif 641 int i; /* success/failure of operation */ 642 xfs_agblock_t maxend; /* end of maximal extent */ 643 xfs_agblock_t minend; /* end of minimal extent */ 644 xfs_extlen_t rlen; /* length of returned extent */ 645 646 ASSERT(args->alignment == 1); 647 /* 648 * Allocate/initialize a cursor for the by-number freespace btree. 649 */ 650 bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, 651 args->agno, XFS_BTNUM_BNO, NULL, 0); 652 /* 653 * Lookup bno and minlen in the btree (minlen is irrelevant, really). 654 * Look for the closest free block <= bno, it must contain bno 655 * if any free block does. 656 */ 657 if ((error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i))) 658 goto error0; 659 if (!i) { 660 /* 661 * Didn't find it, return null. 662 */ 663 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 664 args->agbno = NULLAGBLOCK; 665 return 0; 666 } 667 /* 668 * Grab the freespace record. 669 */ 670 if ((error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i))) 671 goto error0; 672 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 673 ASSERT(fbno <= args->agbno); 674 minend = args->agbno + args->minlen; 675 maxend = args->agbno + args->maxlen; 676 fend = fbno + flen; 677 /* 678 * Give up if the freespace isn't long enough for the minimum request. 679 */ 680 if (fend < minend) { 681 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 682 args->agbno = NULLAGBLOCK; 683 return 0; 684 } 685 /* 686 * End of extent will be smaller of the freespace end and the 687 * maximal requested end. 688 */ 689 end = XFS_AGBLOCK_MIN(fend, maxend); 690 /* 691 * Fix the length according to mod and prod if given. 692 */ 693 args->len = end - args->agbno; 694 xfs_alloc_fix_len(args); 695 if (!xfs_alloc_fix_minleft(args)) { 696 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 697 return 0; 698 } 699 rlen = args->len; 700 ASSERT(args->agbno + rlen <= fend); 701 end = args->agbno + rlen; 702 /* 703 * We are allocating agbno for rlen [agbno .. end] 704 * Allocate/initialize a cursor for the by-size btree. 705 */ 706 cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, 707 args->agno, XFS_BTNUM_CNT, NULL, 0); 708 ASSERT(args->agbno + args->len <= 709 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 710 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, 711 args->agbno, args->len, XFSA_FIXUP_BNO_OK))) { 712 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 713 goto error0; 714 } 715 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 716 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 717 TRACE_ALLOC("normal", args); 718 args->wasfromfl = 0; 719 return 0; 720 721error0: 722 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 723 TRACE_ALLOC("error", args); 724 return error; 725} 726 727/* 728 * Allocate a variable extent near bno in the allocation group agno. 729 * Extent's length (returned in len) will be between minlen and maxlen, 730 * and of the form k * prod + mod unless there's nothing that large. 731 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. 732 */ 733STATIC int /* error */ 734xfs_alloc_ag_vextent_near( 735 xfs_alloc_arg_t *args) /* allocation argument structure */ 736{ 737 xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */ 738 xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */ 739 xfs_btree_cur_t *cnt_cur; /* cursor for count btree */ 740#ifdef XFS_ALLOC_TRACE 741 static char fname[] = "xfs_alloc_ag_vextent_near"; 742#endif 743 xfs_agblock_t gtbno; /* start bno of right side entry */ 744 xfs_agblock_t gtbnoa; /* aligned ... */ 745 xfs_extlen_t gtdiff; /* difference to right side entry */ 746 xfs_extlen_t gtlen; /* length of right side entry */ 747 xfs_extlen_t gtlena; /* aligned ... */ 748 xfs_agblock_t gtnew; /* useful start bno of right side */ 749 int error; /* error code */ 750 int i; /* result code, temporary */ 751 int j; /* result code, temporary */ 752 xfs_agblock_t ltbno; /* start bno of left side entry */ 753 xfs_agblock_t ltbnoa; /* aligned ... */ 754 xfs_extlen_t ltdiff; /* difference to left side entry */ 755 /*REFERENCED*/ 756 xfs_agblock_t ltend; /* end bno of left side entry */ 757 xfs_extlen_t ltlen; /* length of left side entry */ 758 xfs_extlen_t ltlena; /* aligned ... */ 759 xfs_agblock_t ltnew; /* useful start bno of left side */ 760 xfs_extlen_t rlen; /* length of returned extent */ 761#if defined(DEBUG) && defined(__KERNEL__) 762 /* 763 * Randomly don't execute the first algorithm. 764 */ 765 int dofirst; /* set to do first algorithm */ 766 767 dofirst = random32() & 1; 768#endif 769 /* 770 * Get a cursor for the by-size btree. 771 */ 772 cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, 773 args->agno, XFS_BTNUM_CNT, NULL, 0); 774 ltlen = 0; 775 bno_cur_lt = bno_cur_gt = NULL; 776 /* 777 * See if there are any free extents as big as maxlen. 778 */ 779 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i))) 780 goto error0; 781 /* 782 * If none, then pick up the last entry in the tree unless the 783 * tree is empty. 784 */ 785 if (!i) { 786 if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, <bno, 787 <len, &i))) 788 goto error0; 789 if (i == 0 || ltlen == 0) { 790 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 791 return 0; 792 } 793 ASSERT(i == 1); 794 } 795 args->wasfromfl = 0; 796 /* 797 * First algorithm. 798 * If the requested extent is large wrt the freespaces available 799 * in this a.g., then the cursor will be pointing to a btree entry 800 * near the right edge of the tree. If it's in the last btree leaf 801 * block, then we just examine all the entries in that block 802 * that are big enough, and pick the best one. 803 * This is written as a while loop so we can break out of it, 804 * but we never loop back to the top. 805 */ 806 while (xfs_btree_islastblock(cnt_cur, 0)) { 807 xfs_extlen_t bdiff; 808 int besti=0; 809 xfs_extlen_t blen=0; 810 xfs_agblock_t bnew=0; 811 812#if defined(DEBUG) && defined(__KERNEL__) 813 if (!dofirst) 814 break; 815#endif 816 /* 817 * Start from the entry that lookup found, sequence through 818 * all larger free blocks. If we're actually pointing at a 819 * record smaller than maxlen, go to the start of this block, 820 * and skip all those smaller than minlen. 821 */ 822 if (ltlen || args->alignment > 1) { 823 cnt_cur->bc_ptrs[0] = 1; 824 do { 825 if ((error = xfs_alloc_get_rec(cnt_cur, <bno, 826 <len, &i))) 827 goto error0; 828 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 829 if (ltlen >= args->minlen) 830 break; 831 if ((error = xfs_alloc_increment(cnt_cur, 0, &i))) 832 goto error0; 833 } while (i); 834 ASSERT(ltlen >= args->minlen); 835 if (!i) 836 break; 837 } 838 i = cnt_cur->bc_ptrs[0]; 839 for (j = 1, blen = 0, bdiff = 0; 840 !error && j && (blen < args->maxlen || bdiff > 0); 841 error = xfs_alloc_increment(cnt_cur, 0, &j)) { 842 /* 843 * For each entry, decide if it's better than 844 * the previous best entry. 845 */ 846 if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) 847 goto error0; 848 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 849 if (!xfs_alloc_compute_aligned(ltbno, ltlen, 850 args->alignment, args->minlen, 851 <bnoa, <lena)) 852 continue; 853 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 854 xfs_alloc_fix_len(args); 855 ASSERT(args->len >= args->minlen); 856 if (args->len < blen) 857 continue; 858 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, 859 args->alignment, ltbno, ltlen, <new); 860 if (ltnew != NULLAGBLOCK && 861 (args->len > blen || ltdiff < bdiff)) { 862 bdiff = ltdiff; 863 bnew = ltnew; 864 blen = args->len; 865 besti = cnt_cur->bc_ptrs[0]; 866 } 867 } 868 /* 869 * It didn't work. We COULD be in a case where 870 * there's a good record somewhere, so try again. 871 */ 872 if (blen == 0) 873 break; 874 /* 875 * Point at the best entry, and retrieve it again. 876 */ 877 cnt_cur->bc_ptrs[0] = besti; 878 if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) 879 goto error0; 880 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 881 ltend = ltbno + ltlen; 882 ASSERT(ltend <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 883 args->len = blen; 884 if (!xfs_alloc_fix_minleft(args)) { 885 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 886 TRACE_ALLOC("nominleft", args); 887 return 0; 888 } 889 blen = args->len; 890 /* 891 * We are allocating starting at bnew for blen blocks. 892 */ 893 args->agbno = bnew; 894 ASSERT(bnew >= ltbno); 895 ASSERT(bnew + blen <= ltend); 896 /* 897 * Set up a cursor for the by-bno tree. 898 */ 899 bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp, 900 args->agbp, args->agno, XFS_BTNUM_BNO, NULL, 0); 901 /* 902 * Fix up the btree entries. 903 */ 904 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, 905 ltlen, bnew, blen, XFSA_FIXUP_CNT_OK))) 906 goto error0; 907 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 908 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); 909 TRACE_ALLOC("first", args); 910 return 0; 911 } 912 /* 913 * Second algorithm. 914 * Search in the by-bno tree to the left and to the right 915 * simultaneously, until in each case we find a space big enough, 916 * or run into the edge of the tree. When we run into the edge, 917 * we deallocate that cursor. 918 * If both searches succeed, we compare the two spaces and pick 919 * the better one. 920 * With alignment, it's possible for both to fail; the upper 921 * level algorithm that picks allocation groups for allocations 922 * is not supposed to do this. 923 */ 924 /* 925 * Allocate and initialize the cursor for the leftward search. 926 */ 927 bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, 928 args->agno, XFS_BTNUM_BNO, NULL, 0); 929 /* 930 * Lookup <= bno to find the leftward search's starting point. 931 */ 932 if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i))) 933 goto error0; 934 if (!i) { 935 /* 936 * Didn't find anything; use this cursor for the rightward 937 * search. 938 */ 939 bno_cur_gt = bno_cur_lt; 940 bno_cur_lt = NULL; 941 } 942 /* 943 * Found something. Duplicate the cursor for the rightward search. 944 */ 945 else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt))) 946 goto error0; 947 /* 948 * Increment the cursor, so we will point at the entry just right 949 * of the leftward entry if any, or to the leftmost entry. 950 */ 951 if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i))) 952 goto error0; 953 if (!i) { 954 /* 955 * It failed, there are no rightward entries. 956 */ 957 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR); 958 bno_cur_gt = NULL; 959 } 960 /* 961 * Loop going left with the leftward cursor, right with the 962 * rightward cursor, until either both directions give up or 963 * we find an entry at least as big as minlen. 964 */ 965 do { 966 if (bno_cur_lt) { 967 if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) 968 goto error0; 969 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 970 if (xfs_alloc_compute_aligned(ltbno, ltlen, 971 args->alignment, args->minlen, 972 <bnoa, <lena)) 973 break; 974 if ((error = xfs_alloc_decrement(bno_cur_lt, 0, &i))) 975 goto error0; 976 if (!i) { 977 xfs_btree_del_cursor(bno_cur_lt, 978 XFS_BTREE_NOERROR); 979 bno_cur_lt = NULL; 980 } 981 } 982 if (bno_cur_gt) { 983 if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) 984 goto error0; 985 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 986 if (xfs_alloc_compute_aligned(gtbno, gtlen, 987 args->alignment, args->minlen, 988 >bnoa, >lena)) 989 break; 990 if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i))) 991 goto error0; 992 if (!i) { 993 xfs_btree_del_cursor(bno_cur_gt, 994 XFS_BTREE_NOERROR); 995 bno_cur_gt = NULL; 996 } 997 } 998 } while (bno_cur_lt || bno_cur_gt); 999 /* 1000 * Got both cursors still active, need to find better entry. 1001 */ 1002 if (bno_cur_lt && bno_cur_gt) { 1003 /* 1004 * Left side is long enough, look for a right side entry. 1005 */ 1006 if (ltlena >= args->minlen) { 1007 /* 1008 * Fix up the length. 1009 */ 1010 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 1011 xfs_alloc_fix_len(args); 1012 rlen = args->len; 1013 ltdiff = xfs_alloc_compute_diff(args->agbno, rlen, 1014 args->alignment, ltbno, ltlen, <new); 1015 /* 1016 * Not perfect. 1017 */ 1018 if (ltdiff) { 1019 /* 1020 * Look until we find a better one, run out of 1021 * space, or run off the end. 1022 */ 1023 while (bno_cur_lt && bno_cur_gt) { 1024 if ((error = xfs_alloc_get_rec( 1025 bno_cur_gt, >bno, 1026 >len, &i))) 1027 goto error0; 1028 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1029 xfs_alloc_compute_aligned(gtbno, gtlen, 1030 args->alignment, args->minlen, 1031 >bnoa, >lena); 1032 /* 1033 * The left one is clearly better. 1034 */ 1035 if (gtbnoa >= args->agbno + ltdiff) { 1036 xfs_btree_del_cursor( 1037 bno_cur_gt, 1038 XFS_BTREE_NOERROR); 1039 bno_cur_gt = NULL; 1040 break; 1041 } 1042 /* 1043 * If we reach a big enough entry, 1044 * compare the two and pick the best. 1045 */ 1046 if (gtlena >= args->minlen) { 1047 args->len = 1048 XFS_EXTLEN_MIN(gtlena, 1049 args->maxlen); 1050 xfs_alloc_fix_len(args); 1051 rlen = args->len; 1052 gtdiff = xfs_alloc_compute_diff( 1053 args->agbno, rlen, 1054 args->alignment, 1055 gtbno, gtlen, >new); 1056 /* 1057 * Right side is better. 1058 */ 1059 if (gtdiff < ltdiff) { 1060 xfs_btree_del_cursor( 1061 bno_cur_lt, 1062 XFS_BTREE_NOERROR); 1063 bno_cur_lt = NULL; 1064 } 1065 /* 1066 * Left side is better. 1067 */ 1068 else { 1069 xfs_btree_del_cursor( 1070 bno_cur_gt, 1071 XFS_BTREE_NOERROR); 1072 bno_cur_gt = NULL; 1073 } 1074 break; 1075 } 1076 /* 1077 * Fell off the right end. 1078 */ 1079 if ((error = xfs_alloc_increment( 1080 bno_cur_gt, 0, &i))) 1081 goto error0; 1082 if (!i) { 1083 xfs_btree_del_cursor( 1084 bno_cur_gt, 1085 XFS_BTREE_NOERROR); 1086 bno_cur_gt = NULL; 1087 break; 1088 } 1089 } 1090 } 1091 /* 1092 * The left side is perfect, trash the right side. 1093 */ 1094 else { 1095 xfs_btree_del_cursor(bno_cur_gt, 1096 XFS_BTREE_NOERROR); 1097 bno_cur_gt = NULL; 1098 } 1099 } 1100 /* 1101 * It's the right side that was found first, look left. 1102 */ 1103 else { 1104 /* 1105 * Fix up the length. 1106 */ 1107 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); 1108 xfs_alloc_fix_len(args); 1109 rlen = args->len; 1110 gtdiff = xfs_alloc_compute_diff(args->agbno, rlen, 1111 args->alignment, gtbno, gtlen, >new); 1112 /* 1113 * Right side entry isn't perfect. 1114 */ 1115 if (gtdiff) { 1116 /* 1117 * Look until we find a better one, run out of 1118 * space, or run off the end. 1119 */ 1120 while (bno_cur_lt && bno_cur_gt) { 1121 if ((error = xfs_alloc_get_rec( 1122 bno_cur_lt, <bno, 1123 <len, &i))) 1124 goto error0; 1125 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1126 xfs_alloc_compute_aligned(ltbno, ltlen, 1127 args->alignment, args->minlen, 1128 <bnoa, <lena); 1129 /* 1130 * The right one is clearly better. 1131 */ 1132 if (ltbnoa <= args->agbno - gtdiff) { 1133 xfs_btree_del_cursor( 1134 bno_cur_lt, 1135 XFS_BTREE_NOERROR); 1136 bno_cur_lt = NULL; 1137 break; 1138 } 1139 /* 1140 * If we reach a big enough entry, 1141 * compare the two and pick the best. 1142 */ 1143 if (ltlena >= args->minlen) { 1144 args->len = XFS_EXTLEN_MIN( 1145 ltlena, args->maxlen); 1146 xfs_alloc_fix_len(args); 1147 rlen = args->len; 1148 ltdiff = xfs_alloc_compute_diff( 1149 args->agbno, rlen, 1150 args->alignment, 1151 ltbno, ltlen, <new); 1152 /* 1153 * Left side is better. 1154 */ 1155 if (ltdiff < gtdiff) { 1156 xfs_btree_del_cursor( 1157 bno_cur_gt, 1158 XFS_BTREE_NOERROR); 1159 bno_cur_gt = NULL; 1160 } 1161 /* 1162 * Right side is better. 1163 */ 1164 else { 1165 xfs_btree_del_cursor( 1166 bno_cur_lt, 1167 XFS_BTREE_NOERROR); 1168 bno_cur_lt = NULL; 1169 } 1170 break; 1171 } 1172 /* 1173 * Fell off the left end. 1174 */ 1175 if ((error = xfs_alloc_decrement( 1176 bno_cur_lt, 0, &i))) 1177 goto error0; 1178 if (!i) { 1179 xfs_btree_del_cursor(bno_cur_lt, 1180 XFS_BTREE_NOERROR); 1181 bno_cur_lt = NULL; 1182 break; 1183 } 1184 } 1185 } 1186 /* 1187 * The right side is perfect, trash the left side. 1188 */ 1189 else { 1190 xfs_btree_del_cursor(bno_cur_lt, 1191 XFS_BTREE_NOERROR); 1192 bno_cur_lt = NULL; 1193 } 1194 } 1195 } 1196 /* 1197 * If we couldn't get anything, give up. 1198 */ 1199 if (bno_cur_lt == NULL && bno_cur_gt == NULL) { 1200 TRACE_ALLOC("neither", args); 1201 args->agbno = NULLAGBLOCK; 1202 return 0; 1203 } 1204 /* 1205 * At this point we have selected a freespace entry, either to the 1206 * left or to the right. If it's on the right, copy all the 1207 * useful variables to the "left" set so we only have one 1208 * copy of this code. 1209 */ 1210 if (bno_cur_gt) { 1211 bno_cur_lt = bno_cur_gt; 1212 bno_cur_gt = NULL; 1213 ltbno = gtbno; 1214 ltbnoa = gtbnoa; 1215 ltlen = gtlen; 1216 ltlena = gtlena; 1217 j = 1; 1218 } else 1219 j = 0; 1220 /* 1221 * Fix up the length and compute the useful address. 1222 */ 1223 ltend = ltbno + ltlen; 1224 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 1225 xfs_alloc_fix_len(args); 1226 if (!xfs_alloc_fix_minleft(args)) { 1227 TRACE_ALLOC("nominleft", args); 1228 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); 1229 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1230 return 0; 1231 } 1232 rlen = args->len; 1233 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno, 1234 ltlen, <new); 1235 ASSERT(ltnew >= ltbno); 1236 ASSERT(ltnew + rlen <= ltend); 1237 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 1238 args->agbno = ltnew; 1239 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, 1240 ltnew, rlen, XFSA_FIXUP_BNO_OK))) 1241 goto error0; 1242 TRACE_ALLOC(j ? "gt" : "lt", args); 1243 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1244 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); 1245 return 0; 1246 1247 error0: 1248 TRACE_ALLOC("error", args); 1249 if (cnt_cur != NULL) 1250 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 1251 if (bno_cur_lt != NULL) 1252 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR); 1253 if (bno_cur_gt != NULL) 1254 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR); 1255 return error; 1256} 1257 1258/* 1259 * Allocate a variable extent anywhere in the allocation group agno. 1260 * Extent's length (returned in len) will be between minlen and maxlen, 1261 * and of the form k * prod + mod unless there's nothing that large. 1262 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. 1263 */ 1264STATIC int /* error */ 1265xfs_alloc_ag_vextent_size( 1266 xfs_alloc_arg_t *args) /* allocation argument structure */ 1267{ 1268 xfs_btree_cur_t *bno_cur; /* cursor for bno btree */ 1269 xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */ 1270 int error; /* error result */ 1271 xfs_agblock_t fbno; /* start of found freespace */ 1272 xfs_extlen_t flen; /* length of found freespace */ 1273#ifdef XFS_ALLOC_TRACE 1274 static char fname[] = "xfs_alloc_ag_vextent_size"; 1275#endif 1276 int i; /* temp status variable */ 1277 xfs_agblock_t rbno; /* returned block number */ 1278 xfs_extlen_t rlen; /* length of returned extent */ 1279 1280 /* 1281 * Allocate and initialize a cursor for the by-size btree. 1282 */ 1283 cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, 1284 args->agno, XFS_BTNUM_CNT, NULL, 0); 1285 bno_cur = NULL; 1286 /* 1287 * Look for an entry >= maxlen+alignment-1 blocks. 1288 */ 1289 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, 1290 args->maxlen + args->alignment - 1, &i))) 1291 goto error0; 1292 /* 1293 * If none, then pick up the last entry in the tree unless the 1294 * tree is empty. 1295 */ 1296 if (!i) { 1297 if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno, 1298 &flen, &i))) 1299 goto error0; 1300 if (i == 0 || flen == 0) { 1301 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1302 TRACE_ALLOC("noentry", args); 1303 return 0; 1304 } 1305 ASSERT(i == 1); 1306 } 1307 /* 1308 * There's a freespace as big as maxlen+alignment-1, get it. 1309 */ 1310 else { 1311 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i))) 1312 goto error0; 1313 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1314 } 1315 /* 1316 * In the first case above, we got the last entry in the 1317 * by-size btree. Now we check to see if the space hits maxlen 1318 * once aligned; if not, we search left for something better. 1319 * This can't happen in the second case above. 1320 */ 1321 xfs_alloc_compute_aligned(fbno, flen, args->alignment, args->minlen, 1322 &rbno, &rlen); 1323 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1324 XFS_WANT_CORRUPTED_GOTO(rlen == 0 || 1325 (rlen <= flen && rbno + rlen <= fbno + flen), error0); 1326 if (rlen < args->maxlen) { 1327 xfs_agblock_t bestfbno; 1328 xfs_extlen_t bestflen; 1329 xfs_agblock_t bestrbno; 1330 xfs_extlen_t bestrlen; 1331 1332 bestrlen = rlen; 1333 bestrbno = rbno; 1334 bestflen = flen; 1335 bestfbno = fbno; 1336 for (;;) { 1337 if ((error = xfs_alloc_decrement(cnt_cur, 0, &i))) 1338 goto error0; 1339 if (i == 0) 1340 break; 1341 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, 1342 &i))) 1343 goto error0; 1344 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1345 if (flen < bestrlen) 1346 break; 1347 xfs_alloc_compute_aligned(fbno, flen, args->alignment, 1348 args->minlen, &rbno, &rlen); 1349 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1350 XFS_WANT_CORRUPTED_GOTO(rlen == 0 || 1351 (rlen <= flen && rbno + rlen <= fbno + flen), 1352 error0); 1353 if (rlen > bestrlen) { 1354 bestrlen = rlen; 1355 bestrbno = rbno; 1356 bestflen = flen; 1357 bestfbno = fbno; 1358 if (rlen == args->maxlen) 1359 break; 1360 } 1361 } 1362 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen, 1363 &i))) 1364 goto error0; 1365 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1366 rlen = bestrlen; 1367 rbno = bestrbno; 1368 flen = bestflen; 1369 fbno = bestfbno; 1370 } 1371 args->wasfromfl = 0; 1372 /* 1373 * Fix up the length. 1374 */ 1375 args->len = rlen; 1376 xfs_alloc_fix_len(args); 1377 if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) { 1378 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1379 TRACE_ALLOC("nominleft", args); 1380 args->agbno = NULLAGBLOCK; 1381 return 0; 1382 } 1383 rlen = args->len; 1384 XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0); 1385 /* 1386 * Allocate and initialize a cursor for the by-block tree. 1387 */ 1388 bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, 1389 args->agno, XFS_BTNUM_BNO, NULL, 0); 1390 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, 1391 rbno, rlen, XFSA_FIXUP_CNT_OK))) 1392 goto error0; 1393 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1394 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 1395 cnt_cur = bno_cur = NULL; 1396 args->len = rlen; 1397 args->agbno = rbno; 1398 XFS_WANT_CORRUPTED_GOTO( 1399 args->agbno + args->len <= 1400 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), 1401 error0); 1402 TRACE_ALLOC("normal", args); 1403 return 0; 1404 1405error0: 1406 TRACE_ALLOC("error", args); 1407 if (cnt_cur) 1408 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 1409 if (bno_cur) 1410 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 1411 return error; 1412} 1413 1414/* 1415 * Deal with the case where only small freespaces remain. 1416 * Either return the contents of the last freespace record, 1417 * or allocate space from the freelist if there is nothing in the tree. 1418 */ 1419STATIC int /* error */ 1420xfs_alloc_ag_vextent_small( 1421 xfs_alloc_arg_t *args, /* allocation argument structure */ 1422 xfs_btree_cur_t *ccur, /* by-size cursor */ 1423 xfs_agblock_t *fbnop, /* result block number */ 1424 xfs_extlen_t *flenp, /* result length */ 1425 int *stat) /* status: 0-freelist, 1-normal/none */ 1426{ 1427 int error; 1428 xfs_agblock_t fbno; 1429 xfs_extlen_t flen; 1430#ifdef XFS_ALLOC_TRACE 1431 static char fname[] = "xfs_alloc_ag_vextent_small"; 1432#endif 1433 int i; 1434 1435 if ((error = xfs_alloc_decrement(ccur, 0, &i))) 1436 goto error0; 1437 if (i) { 1438 if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i))) 1439 goto error0; 1440 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1441 } 1442 /* 1443 * Nothing in the btree, try the freelist. Make sure 1444 * to respect minleft even when pulling from the 1445 * freelist. 1446 */ 1447 else if (args->minlen == 1 && args->alignment == 1 && !args->isfl && 1448 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) 1449 > args->minleft)) { 1450 if ((error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno))) 1451 goto error0; 1452 if (fbno != NULLAGBLOCK) { 1453 if (args->userdata) { 1454 xfs_buf_t *bp; 1455 1456 bp = xfs_btree_get_bufs(args->mp, args->tp, 1457 args->agno, fbno, 0); 1458 xfs_trans_binval(args->tp, bp); 1459 } 1460 args->len = 1; 1461 args->agbno = fbno; 1462 XFS_WANT_CORRUPTED_GOTO( 1463 args->agbno + args->len <= 1464 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), 1465 error0); 1466 args->wasfromfl = 1; 1467 TRACE_ALLOC("freelist", args); 1468 *stat = 0; 1469 return 0; 1470 } 1471 /* 1472 * Nothing in the freelist. 1473 */ 1474 else 1475 flen = 0; 1476 } 1477 /* 1478 * Can't allocate from the freelist for some reason. 1479 */ 1480 else { 1481 fbno = NULLAGBLOCK; 1482 flen = 0; 1483 } 1484 /* 1485 * Can't do the allocation, give up. 1486 */ 1487 if (flen < args->minlen) { 1488 args->agbno = NULLAGBLOCK; 1489 TRACE_ALLOC("notenough", args); 1490 flen = 0; 1491 } 1492 *fbnop = fbno; 1493 *flenp = flen; 1494 *stat = 1; 1495 TRACE_ALLOC("normal", args); 1496 return 0; 1497 1498error0: 1499 TRACE_ALLOC("error", args); 1500 return error; 1501} 1502 1503/* 1504 * Free the extent starting at agno/bno for length. 1505 */ 1506STATIC int /* error */ 1507xfs_free_ag_extent( 1508 xfs_trans_t *tp, /* transaction pointer */ 1509 xfs_buf_t *agbp, /* buffer for a.g. freelist header */ 1510 xfs_agnumber_t agno, /* allocation group number */ 1511 xfs_agblock_t bno, /* starting block number */ 1512 xfs_extlen_t len, /* length of extent */ 1513 int isfl) /* set if is freelist blocks - no sb acctg */ 1514{ 1515 xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */ 1516 xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */ 1517 int error; /* error return value */ 1518#ifdef XFS_ALLOC_TRACE 1519 static char fname[] = "xfs_free_ag_extent"; 1520#endif 1521 xfs_agblock_t gtbno; /* start of right neighbor block */ 1522 xfs_extlen_t gtlen; /* length of right neighbor block */ 1523 int haveleft; /* have a left neighbor block */ 1524 int haveright; /* have a right neighbor block */ 1525 int i; /* temp, result code */ 1526 xfs_agblock_t ltbno; /* start of left neighbor block */ 1527 xfs_extlen_t ltlen; /* length of left neighbor block */ 1528 xfs_mount_t *mp; /* mount point struct for filesystem */ 1529 xfs_agblock_t nbno; /* new starting block of freespace */ 1530 xfs_extlen_t nlen; /* new length of freespace */ 1531 1532 mp = tp->t_mountp; 1533 /* 1534 * Allocate and initialize a cursor for the by-block btree. 1535 */ 1536 bno_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO, NULL, 1537 0); 1538 cnt_cur = NULL; 1539 /* 1540 * Look for a neighboring block on the left (lower block numbers) 1541 * that is contiguous with this space. 1542 */ 1543 if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft))) 1544 goto error0; 1545 if (haveleft) { 1546 /* 1547 * There is a block to our left. 1548 */ 1549 if ((error = xfs_alloc_get_rec(bno_cur, <bno, <len, &i))) 1550 goto error0; 1551 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1552 /* 1553 * It's not contiguous, though. 1554 */ 1555 if (ltbno + ltlen < bno) 1556 haveleft = 0; 1557 else { 1558 /* 1559 * If this failure happens the request to free this 1560 * space was invalid, it's (partly) already free. 1561 * Very bad. 1562 */ 1563 XFS_WANT_CORRUPTED_GOTO(ltbno + ltlen <= bno, error0); 1564 } 1565 } 1566 /* 1567 * Look for a neighboring block on the right (higher block numbers) 1568 * that is contiguous with this space. 1569 */ 1570 if ((error = xfs_alloc_increment(bno_cur, 0, &haveright))) 1571 goto error0; 1572 if (haveright) { 1573 /* 1574 * There is a block to our right. 1575 */ 1576 if ((error = xfs_alloc_get_rec(bno_cur, >bno, >len, &i))) 1577 goto error0; 1578 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1579 /* 1580 * It's not contiguous, though. 1581 */ 1582 if (bno + len < gtbno) 1583 haveright = 0; 1584 else { 1585 /* 1586 * If this failure happens the request to free this 1587 * space was invalid, it's (partly) already free. 1588 * Very bad. 1589 */ 1590 XFS_WANT_CORRUPTED_GOTO(gtbno >= bno + len, error0); 1591 } 1592 } 1593 /* 1594 * Now allocate and initialize a cursor for the by-size tree. 1595 */ 1596 cnt_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT, NULL, 1597 0); 1598 /* 1599 * Have both left and right contiguous neighbors. 1600 * Merge all three into a single free block. 1601 */ 1602 if (haveleft && haveright) { 1603 /* 1604 * Delete the old by-size entry on the left. 1605 */ 1606 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) 1607 goto error0; 1608 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1609 if ((error = xfs_alloc_delete(cnt_cur, &i))) 1610 goto error0; 1611 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1612 /* 1613 * Delete the old by-size entry on the right. 1614 */ 1615 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) 1616 goto error0; 1617 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1618 if ((error = xfs_alloc_delete(cnt_cur, &i))) 1619 goto error0; 1620 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1621 /* 1622 * Delete the old by-block entry for the right block. 1623 */ 1624 if ((error = xfs_alloc_delete(bno_cur, &i))) 1625 goto error0; 1626 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1627 /* 1628 * Move the by-block cursor back to the left neighbor. 1629 */ 1630 if ((error = xfs_alloc_decrement(bno_cur, 0, &i))) 1631 goto error0; 1632 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1633#ifdef DEBUG 1634 /* 1635 * Check that this is the right record: delete didn't 1636 * mangle the cursor. 1637 */ 1638 { 1639 xfs_agblock_t xxbno; 1640 xfs_extlen_t xxlen; 1641 1642 if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen, 1643 &i))) 1644 goto error0; 1645 XFS_WANT_CORRUPTED_GOTO( 1646 i == 1 && xxbno == ltbno && xxlen == ltlen, 1647 error0); 1648 } 1649#endif 1650 /* 1651 * Update remaining by-block entry to the new, joined block. 1652 */ 1653 nbno = ltbno; 1654 nlen = len + ltlen + gtlen; 1655 if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) 1656 goto error0; 1657 } 1658 /* 1659 * Have only a left contiguous neighbor. 1660 * Merge it together with the new freespace. 1661 */ 1662 else if (haveleft) { 1663 /* 1664 * Delete the old by-size entry on the left. 1665 */ 1666 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) 1667 goto error0; 1668 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1669 if ((error = xfs_alloc_delete(cnt_cur, &i))) 1670 goto error0; 1671 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1672 /* 1673 * Back up the by-block cursor to the left neighbor, and 1674 * update its length. 1675 */ 1676 if ((error = xfs_alloc_decrement(bno_cur, 0, &i))) 1677 goto error0; 1678 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1679 nbno = ltbno; 1680 nlen = len + ltlen; 1681 if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) 1682 goto error0; 1683 } 1684 /* 1685 * Have only a right contiguous neighbor. 1686 * Merge it together with the new freespace. 1687 */ 1688 else if (haveright) { 1689 /* 1690 * Delete the old by-size entry on the right. 1691 */ 1692 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) 1693 goto error0; 1694 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1695 if ((error = xfs_alloc_delete(cnt_cur, &i))) 1696 goto error0; 1697 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1698 /* 1699 * Update the starting block and length of the right 1700 * neighbor in the by-block tree. 1701 */ 1702 nbno = bno; 1703 nlen = len + gtlen; 1704 if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) 1705 goto error0; 1706 } 1707 /* 1708 * No contiguous neighbors. 1709 * Insert the new freespace into the by-block tree. 1710 */ 1711 else { 1712 nbno = bno; 1713 nlen = len; 1714 if ((error = xfs_alloc_insert(bno_cur, &i))) 1715 goto error0; 1716 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1717 } 1718 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 1719 bno_cur = NULL; 1720 /* 1721 * In all cases we need to insert the new freespace in the by-size tree. 1722 */ 1723 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i))) 1724 goto error0; 1725 XFS_WANT_CORRUPTED_GOTO(i == 0, error0); 1726 if ((error = xfs_alloc_insert(cnt_cur, &i))) 1727 goto error0; 1728 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1729 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1730 cnt_cur = NULL; 1731 /* 1732 * Update the freespace totals in the ag and superblock. 1733 */ 1734 { 1735 xfs_agf_t *agf; 1736 xfs_perag_t *pag; /* per allocation group data */ 1737 1738 agf = XFS_BUF_TO_AGF(agbp); 1739 pag = &mp->m_perag[agno]; 1740 be32_add(&agf->agf_freeblks, len); 1741 xfs_trans_agblocks_delta(tp, len); 1742 pag->pagf_freeblks += len; 1743 XFS_WANT_CORRUPTED_GOTO( 1744 be32_to_cpu(agf->agf_freeblks) <= 1745 be32_to_cpu(agf->agf_length), 1746 error0); 1747 TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS); 1748 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); 1749 if (!isfl) 1750 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len); 1751 XFS_STATS_INC(xs_freex); 1752 XFS_STATS_ADD(xs_freeb, len); 1753 } 1754 TRACE_FREE(haveleft ? 1755 (haveright ? "both" : "left") : 1756 (haveright ? "right" : "none"), 1757 agno, bno, len, isfl); 1758 1759 /* 1760 * Since blocks move to the free list without the coordination 1761 * used in xfs_bmap_finish, we can't allow block to be available 1762 * for reallocation and non-transaction writing (user data) 1763 * until we know that the transaction that moved it to the free 1764 * list is permanently on disk. We track the blocks by declaring 1765 * these blocks as "busy"; the busy list is maintained on a per-ag 1766 * basis and each transaction records which entries should be removed 1767 * when the iclog commits to disk. If a busy block is allocated, 1768 * the iclog is pushed up to the LSN that freed the block. 1769 */ 1770 xfs_alloc_mark_busy(tp, agno, bno, len); 1771 return 0; 1772 1773 error0: 1774 TRACE_FREE("error", agno, bno, len, isfl); 1775 if (bno_cur) 1776 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 1777 if (cnt_cur) 1778 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 1779 return error; 1780} 1781 1782/* 1783 * Visible (exported) allocation/free functions. 1784 * Some of these are used just by xfs_alloc_btree.c and this file. 1785 */ 1786 1787/* 1788 * Compute and fill in value of m_ag_maxlevels. 1789 */ 1790void 1791xfs_alloc_compute_maxlevels( 1792 xfs_mount_t *mp) /* file system mount structure */ 1793{ 1794 int level; 1795 uint maxblocks; 1796 uint maxleafents; 1797 int minleafrecs; 1798 int minnoderecs; 1799 1800 maxleafents = (mp->m_sb.sb_agblocks + 1) / 2; 1801 minleafrecs = mp->m_alloc_mnr[0]; 1802 minnoderecs = mp->m_alloc_mnr[1]; 1803 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; 1804 for (level = 1; maxblocks > 1; level++) 1805 maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; 1806 mp->m_ag_maxlevels = level; 1807} 1808 1809/* 1810 * Decide whether to use this allocation group for this allocation. 1811 * If so, fix up the btree freelist's size. 1812 */ 1813STATIC int /* error */ 1814xfs_alloc_fix_freelist( 1815 xfs_alloc_arg_t *args, /* allocation argument structure */ 1816 int flags) /* XFS_ALLOC_FLAG_... */ 1817{ 1818 xfs_buf_t *agbp; /* agf buffer pointer */ 1819 xfs_agf_t *agf; /* a.g. freespace structure pointer */ 1820 xfs_buf_t *agflbp;/* agfl buffer pointer */ 1821 xfs_agblock_t bno; /* freelist block */ 1822 xfs_extlen_t delta; /* new blocks needed in freelist */ 1823 int error; /* error result code */ 1824 xfs_extlen_t longest;/* longest extent in allocation group */ 1825 xfs_mount_t *mp; /* file system mount point structure */ 1826 xfs_extlen_t need; /* total blocks needed in freelist */ 1827 xfs_perag_t *pag; /* per-ag information structure */ 1828 xfs_alloc_arg_t targs; /* local allocation arguments */ 1829 xfs_trans_t *tp; /* transaction pointer */ 1830 1831 mp = args->mp; 1832 1833 pag = args->pag; 1834 tp = args->tp; 1835 if (!pag->pagf_init) { 1836 if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags, 1837 &agbp))) 1838 return error; 1839 if (!pag->pagf_init) { 1840 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); 1841 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); 1842 args->agbp = NULL; 1843 return 0; 1844 } 1845 } else 1846 agbp = NULL; 1847 1848 /* 1849 * If this is a metadata preferred pag and we are user data 1850 * then try somewhere else if we are not being asked to 1851 * try harder at this point 1852 */ 1853 if (pag->pagf_metadata && args->userdata && 1854 (flags & XFS_ALLOC_FLAG_TRYLOCK)) { 1855 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); 1856 args->agbp = NULL; 1857 return 0; 1858 } 1859 1860 if (!(flags & XFS_ALLOC_FLAG_FREEING)) { 1861 need = XFS_MIN_FREELIST_PAG(pag, mp); 1862 delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0; 1863 /* 1864 * If it looks like there isn't a long enough extent, or enough 1865 * total blocks, reject it. 1866 */ 1867 longest = (pag->pagf_longest > delta) ? 1868 (pag->pagf_longest - delta) : 1869 (pag->pagf_flcount > 0 || pag->pagf_longest > 0); 1870 if ((args->minlen + args->alignment + args->minalignslop - 1) > 1871 longest || 1872 ((int)(pag->pagf_freeblks + pag->pagf_flcount - 1873 need - args->total) < (int)args->minleft)) { 1874 if (agbp) 1875 xfs_trans_brelse(tp, agbp); 1876 args->agbp = NULL; 1877 return 0; 1878 } 1879 } 1880 1881 /* 1882 * Get the a.g. freespace buffer. 1883 * Can fail if we're not blocking on locks, and it's held. 1884 */ 1885 if (agbp == NULL) { 1886 if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags, 1887 &agbp))) 1888 return error; 1889 if (agbp == NULL) { 1890 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); 1891 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); 1892 args->agbp = NULL; 1893 return 0; 1894 } 1895 } 1896 /* 1897 * Figure out how many blocks we should have in the freelist. 1898 */ 1899 agf = XFS_BUF_TO_AGF(agbp); 1900 need = XFS_MIN_FREELIST(agf, mp); 1901 /* 1902 * If there isn't enough total or single-extent, reject it. 1903 */ 1904 if (!(flags & XFS_ALLOC_FLAG_FREEING)) { 1905 delta = need > be32_to_cpu(agf->agf_flcount) ? 1906 (need - be32_to_cpu(agf->agf_flcount)) : 0; 1907 longest = be32_to_cpu(agf->agf_longest); 1908 longest = (longest > delta) ? (longest - delta) : 1909 (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0); 1910 if ((args->minlen + args->alignment + args->minalignslop - 1) > 1911 longest || 1912 ((int)(be32_to_cpu(agf->agf_freeblks) + 1913 be32_to_cpu(agf->agf_flcount) - need - args->total) < 1914 (int)args->minleft)) { 1915 xfs_trans_brelse(tp, agbp); 1916 args->agbp = NULL; 1917 return 0; 1918 } 1919 } 1920 /* 1921 * Make the freelist shorter if it's too long. 1922 */ 1923 while (be32_to_cpu(agf->agf_flcount) > need) { 1924 xfs_buf_t *bp; 1925 1926 if ((error = xfs_alloc_get_freelist(tp, agbp, &bno))) 1927 return error; 1928 if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1))) 1929 return error; 1930 bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0); 1931 xfs_trans_binval(tp, bp); 1932 } 1933 /* 1934 * Initialize the args structure. 1935 */ 1936 targs.tp = tp; 1937 targs.mp = mp; 1938 targs.agbp = agbp; 1939 targs.agno = args->agno; 1940 targs.mod = targs.minleft = targs.wasdel = targs.userdata = 1941 targs.minalignslop = 0; 1942 targs.alignment = targs.minlen = targs.prod = targs.isfl = 1; 1943 targs.type = XFS_ALLOCTYPE_THIS_AG; 1944 targs.pag = pag; 1945 if ((error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp))) 1946 return error; 1947 /* 1948 * Make the freelist longer if it's too short. 1949 */ 1950 while (be32_to_cpu(agf->agf_flcount) < need) { 1951 targs.agbno = 0; 1952 targs.maxlen = need - be32_to_cpu(agf->agf_flcount); 1953 /* 1954 * Allocate as many blocks as possible at once. 1955 */ 1956 if ((error = xfs_alloc_ag_vextent(&targs))) { 1957 xfs_trans_brelse(tp, agflbp); 1958 return error; 1959 } 1960 /* 1961 * Stop if we run out. Won't happen if callers are obeying 1962 * the restrictions correctly. Can happen for free calls 1963 * on a completely full ag. 1964 */ 1965 if (targs.agbno == NULLAGBLOCK) { 1966 if (flags & XFS_ALLOC_FLAG_FREEING) 1967 break; 1968 xfs_trans_brelse(tp, agflbp); 1969 args->agbp = NULL; 1970 return 0; 1971 } 1972 /* 1973 * Put each allocated block on the list. 1974 */ 1975 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) { 1976 if ((error = xfs_alloc_put_freelist(tp, agbp, agflbp, 1977 bno))) 1978 return error; 1979 } 1980 } 1981 xfs_trans_brelse(tp, agflbp); 1982 args->agbp = agbp; 1983 return 0; 1984} 1985 1986/* 1987 * Get a block from the freelist. 1988 * Returns with the buffer for the block gotten. 1989 */ 1990int /* error */ 1991xfs_alloc_get_freelist( 1992 xfs_trans_t *tp, /* transaction pointer */ 1993 xfs_buf_t *agbp, /* buffer containing the agf structure */ 1994 xfs_agblock_t *bnop) /* block address retrieved from freelist */ 1995{ 1996 xfs_agf_t *agf; /* a.g. freespace structure */ 1997 xfs_agfl_t *agfl; /* a.g. freelist structure */ 1998 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */ 1999 xfs_agblock_t bno; /* block number returned */ 2000 int error; 2001#ifdef XFS_ALLOC_TRACE 2002 static char fname[] = "xfs_alloc_get_freelist"; 2003#endif 2004 xfs_mount_t *mp; /* mount structure */ 2005 xfs_perag_t *pag; /* per allocation group data */ 2006 2007 agf = XFS_BUF_TO_AGF(agbp); 2008 /* 2009 * Freelist is empty, give up. 2010 */ 2011 if (!agf->agf_flcount) { 2012 *bnop = NULLAGBLOCK; 2013 return 0; 2014 } 2015 /* 2016 * Read the array of free blocks. 2017 */ 2018 mp = tp->t_mountp; 2019 if ((error = xfs_alloc_read_agfl(mp, tp, 2020 be32_to_cpu(agf->agf_seqno), &agflbp))) 2021 return error; 2022 agfl = XFS_BUF_TO_AGFL(agflbp); 2023 /* 2024 * Get the block number and update the data structures. 2025 */ 2026 bno = be32_to_cpu(agfl->agfl_bno[be32_to_cpu(agf->agf_flfirst)]); 2027 be32_add(&agf->agf_flfirst, 1); 2028 xfs_trans_brelse(tp, agflbp); 2029 if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp)) 2030 agf->agf_flfirst = 0; 2031 pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)]; 2032 be32_add(&agf->agf_flcount, -1); 2033 xfs_trans_agflist_delta(tp, -1); 2034 pag->pagf_flcount--; 2035 TRACE_MODAGF(NULL, agf, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT); 2036 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT); 2037 *bnop = bno; 2038 2039 /* 2040 * As blocks are freed, they are added to the per-ag busy list 2041 * and remain there until the freeing transaction is committed to 2042 * disk. Now that we have allocated blocks, this list must be 2043 * searched to see if a block is being reused. If one is, then 2044 * the freeing transaction must be pushed to disk NOW by forcing 2045 * to disk all iclogs up that transaction's LSN. 2046 */ 2047 xfs_alloc_search_busy(tp, be32_to_cpu(agf->agf_seqno), bno, 1); 2048 return 0; 2049} 2050 2051/* 2052 * Log the given fields from the agf structure. 2053 */ 2054void 2055xfs_alloc_log_agf( 2056 xfs_trans_t *tp, /* transaction pointer */ 2057 xfs_buf_t *bp, /* buffer for a.g. freelist header */ 2058 int fields) /* mask of fields to be logged (XFS_AGF_...) */ 2059{ 2060 int first; /* first byte offset */ 2061 int last; /* last byte offset */ 2062 static const short offsets[] = { 2063 offsetof(xfs_agf_t, agf_magicnum), 2064 offsetof(xfs_agf_t, agf_versionnum), 2065 offsetof(xfs_agf_t, agf_seqno), 2066 offsetof(xfs_agf_t, agf_length), 2067 offsetof(xfs_agf_t, agf_roots[0]), 2068 offsetof(xfs_agf_t, agf_levels[0]), 2069 offsetof(xfs_agf_t, agf_flfirst), 2070 offsetof(xfs_agf_t, agf_fllast), 2071 offsetof(xfs_agf_t, agf_flcount), 2072 offsetof(xfs_agf_t, agf_freeblks), 2073 offsetof(xfs_agf_t, agf_longest), 2074 sizeof(xfs_agf_t) 2075 }; 2076 2077 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last); 2078 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last); 2079} 2080 2081/* 2082 * Interface for inode allocation to force the pag data to be initialized. 2083 */ 2084int /* error */ 2085xfs_alloc_pagf_init( 2086 xfs_mount_t *mp, /* file system mount structure */ 2087 xfs_trans_t *tp, /* transaction pointer */ 2088 xfs_agnumber_t agno, /* allocation group number */ 2089 int flags) /* XFS_ALLOC_FLAGS_... */ 2090{ 2091 xfs_buf_t *bp; 2092 int error; 2093 2094 if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp))) 2095 return error; 2096 if (bp) 2097 xfs_trans_brelse(tp, bp); 2098 return 0; 2099} 2100 2101/* 2102 * Put the block on the freelist for the allocation group. 2103 */ 2104int /* error */ 2105xfs_alloc_put_freelist( 2106 xfs_trans_t *tp, /* transaction pointer */ 2107 xfs_buf_t *agbp, /* buffer for a.g. freelist header */ 2108 xfs_buf_t *agflbp,/* buffer for a.g. free block array */ 2109 xfs_agblock_t bno) /* block being freed */ 2110{ 2111 xfs_agf_t *agf; /* a.g. freespace structure */ 2112 xfs_agfl_t *agfl; /* a.g. free block array */ 2113 __be32 *blockp;/* pointer to array entry */ 2114 int error; 2115#ifdef XFS_ALLOC_TRACE 2116 static char fname[] = "xfs_alloc_put_freelist"; 2117#endif 2118 xfs_mount_t *mp; /* mount structure */ 2119 xfs_perag_t *pag; /* per allocation group data */ 2120 2121 agf = XFS_BUF_TO_AGF(agbp); 2122 mp = tp->t_mountp; 2123 2124 if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp, 2125 be32_to_cpu(agf->agf_seqno), &agflbp))) 2126 return error; 2127 agfl = XFS_BUF_TO_AGFL(agflbp); 2128 be32_add(&agf->agf_fllast, 1); 2129 if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp)) 2130 agf->agf_fllast = 0; 2131 pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)]; 2132 be32_add(&agf->agf_flcount, 1); 2133 xfs_trans_agflist_delta(tp, 1); 2134 pag->pagf_flcount++; 2135 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)); 2136 blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)]; 2137 *blockp = cpu_to_be32(bno); 2138 TRACE_MODAGF(NULL, agf, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT); 2139 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT); 2140 xfs_trans_log_buf(tp, agflbp, 2141 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl), 2142 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl + 2143 sizeof(xfs_agblock_t) - 1)); 2144 return 0; 2145} 2146 2147/* 2148 * Read in the allocation group header (free/alloc section). 2149 */ 2150int /* error */ 2151xfs_alloc_read_agf( 2152 xfs_mount_t *mp, /* mount point structure */ 2153 xfs_trans_t *tp, /* transaction pointer */ 2154 xfs_agnumber_t agno, /* allocation group number */ 2155 int flags, /* XFS_ALLOC_FLAG_... */ 2156 xfs_buf_t **bpp) /* buffer for the ag freelist header */ 2157{ 2158 xfs_agf_t *agf; /* ag freelist header */ 2159 int agf_ok; /* set if agf is consistent */ 2160 xfs_buf_t *bp; /* return value */ 2161 xfs_perag_t *pag; /* per allocation group data */ 2162 int error; 2163 2164 ASSERT(agno != NULLAGNUMBER); 2165 error = xfs_trans_read_buf( 2166 mp, tp, mp->m_ddev_targp, 2167 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), 2168 XFS_FSS_TO_BB(mp, 1), 2169 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XFS_BUF_TRYLOCK : 0U, 2170 &bp); 2171 if (error) 2172 return error; 2173 ASSERT(!bp || !XFS_BUF_GETERROR(bp)); 2174 if (!bp) { 2175 *bpp = NULL; 2176 return 0; 2177 } 2178 /* 2179 * Validate the magic number of the agf block. 2180 */ 2181 agf = XFS_BUF_TO_AGF(bp); 2182 agf_ok = 2183 be32_to_cpu(agf->agf_magicnum) == XFS_AGF_MAGIC && 2184 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && 2185 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && 2186 be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && 2187 be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && 2188 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp); 2189 if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF, 2190 XFS_RANDOM_ALLOC_READ_AGF))) { 2191 XFS_CORRUPTION_ERROR("xfs_alloc_read_agf", 2192 XFS_ERRLEVEL_LOW, mp, agf); 2193 xfs_trans_brelse(tp, bp); 2194 return XFS_ERROR(EFSCORRUPTED); 2195 } 2196 pag = &mp->m_perag[agno]; 2197 if (!pag->pagf_init) { 2198 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); 2199 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount); 2200 pag->pagf_longest = be32_to_cpu(agf->agf_longest); 2201 pag->pagf_levels[XFS_BTNUM_BNOi] = 2202 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]); 2203 pag->pagf_levels[XFS_BTNUM_CNTi] = 2204 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); 2205 spinlock_init(&pag->pagb_lock, "xfspagb"); 2206 pag->pagb_list = kmem_zalloc(XFS_PAGB_NUM_SLOTS * 2207 sizeof(xfs_perag_busy_t), KM_SLEEP); 2208 pag->pagf_init = 1; 2209 } 2210#ifdef DEBUG 2211 else if (!XFS_FORCED_SHUTDOWN(mp)) { 2212 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks)); 2213 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount)); 2214 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest)); 2215 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] == 2216 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi])); 2217 ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] == 2218 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi])); 2219 } 2220#endif 2221 XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGF, XFS_AGF_REF); 2222 *bpp = bp; 2223 return 0; 2224} 2225 2226/* 2227 * Allocate an extent (variable-size). 2228 * Depending on the allocation type, we either look in a single allocation 2229 * group or loop over the allocation groups to find the result. 2230 */ 2231int /* error */ 2232xfs_alloc_vextent( 2233 xfs_alloc_arg_t *args) /* allocation argument structure */ 2234{ 2235 xfs_agblock_t agsize; /* allocation group size */ 2236 int error; 2237 int flags; /* XFS_ALLOC_FLAG_... locking flags */ 2238#ifdef XFS_ALLOC_TRACE 2239 static char fname[] = "xfs_alloc_vextent"; 2240#endif 2241 xfs_extlen_t minleft;/* minimum left value, temp copy */ 2242 xfs_mount_t *mp; /* mount structure pointer */ 2243 xfs_agnumber_t sagno; /* starting allocation group number */ 2244 xfs_alloctype_t type; /* input allocation type */ 2245 int bump_rotor = 0; 2246 int no_min = 0; 2247 xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */ 2248 2249 mp = args->mp; 2250 type = args->otype = args->type; 2251 args->agbno = NULLAGBLOCK; 2252 /* 2253 * Just fix this up, for the case where the last a.g. is shorter 2254 * (or there's only one a.g.) and the caller couldn't easily figure 2255 * that out (xfs_bmap_alloc). 2256 */ 2257 agsize = mp->m_sb.sb_agblocks; 2258 if (args->maxlen > agsize) 2259 args->maxlen = agsize; 2260 if (args->alignment == 0) 2261 args->alignment = 1; 2262 ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount); 2263 ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize); 2264 ASSERT(args->minlen <= args->maxlen); 2265 ASSERT(args->minlen <= agsize); 2266 ASSERT(args->mod < args->prod); 2267 if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount || 2268 XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize || 2269 args->minlen > args->maxlen || args->minlen > agsize || 2270 args->mod >= args->prod) { 2271 args->fsbno = NULLFSBLOCK; 2272 TRACE_ALLOC("badargs", args); 2273 return 0; 2274 } 2275 minleft = args->minleft; 2276 2277 switch (type) { 2278 case XFS_ALLOCTYPE_THIS_AG: 2279 case XFS_ALLOCTYPE_NEAR_BNO: 2280 case XFS_ALLOCTYPE_THIS_BNO: 2281 /* 2282 * These three force us into a single a.g. 2283 */ 2284 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); 2285 down_read(&mp->m_peraglock); 2286 args->pag = &mp->m_perag[args->agno]; 2287 args->minleft = 0; 2288 error = xfs_alloc_fix_freelist(args, 0); 2289 args->minleft = minleft; 2290 if (error) { 2291 TRACE_ALLOC("nofix", args); 2292 goto error0; 2293 } 2294 if (!args->agbp) { 2295 up_read(&mp->m_peraglock); 2296 TRACE_ALLOC("noagbp", args); 2297 break; 2298 } 2299 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); 2300 if ((error = xfs_alloc_ag_vextent(args))) 2301 goto error0; 2302 up_read(&mp->m_peraglock); 2303 break; 2304 case XFS_ALLOCTYPE_START_BNO: 2305 /* 2306 * Try near allocation first, then anywhere-in-ag after 2307 * the first a.g. fails. 2308 */ 2309 if ((args->userdata == XFS_ALLOC_INITIAL_USER_DATA) && 2310 (mp->m_flags & XFS_MOUNT_32BITINODES)) { 2311 args->fsbno = XFS_AGB_TO_FSB(mp, 2312 ((mp->m_agfrotor / rotorstep) % 2313 mp->m_sb.sb_agcount), 0); 2314 bump_rotor = 1; 2315 } 2316 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); 2317 args->type = XFS_ALLOCTYPE_NEAR_BNO; 2318 /* FALLTHROUGH */ 2319 case XFS_ALLOCTYPE_ANY_AG: 2320 case XFS_ALLOCTYPE_START_AG: 2321 case XFS_ALLOCTYPE_FIRST_AG: 2322 /* 2323 * Rotate through the allocation groups looking for a winner. 2324 */ 2325 if (type == XFS_ALLOCTYPE_ANY_AG) { 2326 /* 2327 * Start with the last place we left off. 2328 */ 2329 args->agno = sagno = (mp->m_agfrotor / rotorstep) % 2330 mp->m_sb.sb_agcount; 2331 args->type = XFS_ALLOCTYPE_THIS_AG; 2332 flags = XFS_ALLOC_FLAG_TRYLOCK; 2333 } else if (type == XFS_ALLOCTYPE_FIRST_AG) { 2334 /* 2335 * Start with allocation group given by bno. 2336 */ 2337 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); 2338 args->type = XFS_ALLOCTYPE_THIS_AG; 2339 sagno = 0; 2340 flags = 0; 2341 } else { 2342 if (type == XFS_ALLOCTYPE_START_AG) 2343 args->type = XFS_ALLOCTYPE_THIS_AG; 2344 /* 2345 * Start with the given allocation group. 2346 */ 2347 args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno); 2348 flags = XFS_ALLOC_FLAG_TRYLOCK; 2349 } 2350 /* 2351 * Loop over allocation groups twice; first time with 2352 * trylock set, second time without. 2353 */ 2354 down_read(&mp->m_peraglock); 2355 for (;;) { 2356 args->pag = &mp->m_perag[args->agno]; 2357 if (no_min) args->minleft = 0; 2358 error = xfs_alloc_fix_freelist(args, flags); 2359 args->minleft = minleft; 2360 if (error) { 2361 TRACE_ALLOC("nofix", args); 2362 goto error0; 2363 } 2364 /* 2365 * If we get a buffer back then the allocation will fly. 2366 */ 2367 if (args->agbp) { 2368 if ((error = xfs_alloc_ag_vextent(args))) 2369 goto error0; 2370 break; 2371 } 2372 TRACE_ALLOC("loopfailed", args); 2373 /* 2374 * Didn't work, figure out the next iteration. 2375 */ 2376 if (args->agno == sagno && 2377 type == XFS_ALLOCTYPE_START_BNO) 2378 args->type = XFS_ALLOCTYPE_THIS_AG; 2379 /* 2380 * For the first allocation, we can try any AG to get 2381 * space. However, if we already have allocated a 2382 * block, we don't want to try AGs whose number is below 2383 * sagno. Otherwise, we may end up with out-of-order 2384 * locking of AGF, which might cause deadlock. 2385 */ 2386 if (++(args->agno) == mp->m_sb.sb_agcount) { 2387 if (args->firstblock != NULLFSBLOCK) 2388 args->agno = sagno; 2389 else 2390 args->agno = 0; 2391 } 2392 /* 2393 * Reached the starting a.g., must either be done 2394 * or switch to non-trylock mode. 2395 */ 2396 if (args->agno == sagno) { 2397 if (no_min == 1) { 2398 args->agbno = NULLAGBLOCK; 2399 TRACE_ALLOC("allfailed", args); 2400 break; 2401 } 2402 if (flags == 0) { 2403 no_min = 1; 2404 } else { 2405 flags = 0; 2406 if (type == XFS_ALLOCTYPE_START_BNO) { 2407 args->agbno = XFS_FSB_TO_AGBNO(mp, 2408 args->fsbno); 2409 args->type = XFS_ALLOCTYPE_NEAR_BNO; 2410 } 2411 } 2412 } 2413 } 2414 up_read(&mp->m_peraglock); 2415 if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) { 2416 if (args->agno == sagno) 2417 mp->m_agfrotor = (mp->m_agfrotor + 1) % 2418 (mp->m_sb.sb_agcount * rotorstep); 2419 else 2420 mp->m_agfrotor = (args->agno * rotorstep + 1) % 2421 (mp->m_sb.sb_agcount * rotorstep); 2422 } 2423 break; 2424 default: 2425 ASSERT(0); 2426 /* NOTREACHED */ 2427 } 2428 if (args->agbno == NULLAGBLOCK) 2429 args->fsbno = NULLFSBLOCK; 2430 else { 2431 args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno); 2432#ifdef DEBUG 2433 ASSERT(args->len >= args->minlen); 2434 ASSERT(args->len <= args->maxlen); 2435 ASSERT(args->agbno % args->alignment == 0); 2436 XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), 2437 args->len); 2438#endif 2439 } 2440 return 0; 2441error0: 2442 up_read(&mp->m_peraglock); 2443 return error; 2444} 2445 2446/* 2447 * Free an extent. 2448 * Just break up the extent address and hand off to xfs_free_ag_extent 2449 * after fixing up the freelist. 2450 */ 2451int /* error */ 2452xfs_free_extent( 2453 xfs_trans_t *tp, /* transaction pointer */ 2454 xfs_fsblock_t bno, /* starting block number of extent */ 2455 xfs_extlen_t len) /* length of extent */ 2456{ 2457 xfs_alloc_arg_t args; 2458 int error; 2459 2460 ASSERT(len != 0); 2461 memset(&args, 0, sizeof(xfs_alloc_arg_t)); 2462 args.tp = tp; 2463 args.mp = tp->t_mountp; 2464 args.agno = XFS_FSB_TO_AGNO(args.mp, bno); 2465 ASSERT(args.agno < args.mp->m_sb.sb_agcount); 2466 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); 2467 down_read(&args.mp->m_peraglock); 2468 args.pag = &args.mp->m_perag[args.agno]; 2469 if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) 2470 goto error0; 2471#ifdef DEBUG 2472 ASSERT(args.agbp != NULL); 2473 ASSERT((args.agbno + len) <= 2474 be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)); 2475#endif 2476 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); 2477error0: 2478 up_read(&args.mp->m_peraglock); 2479 return error; 2480} 2481 2482 2483/* 2484 * AG Busy list management 2485 * The busy list contains block ranges that have been freed but whose 2486 * transactions have not yet hit disk. If any block listed in a busy 2487 * list is reused, the transaction that freed it must be forced to disk 2488 * before continuing to use the block. 2489 * 2490 * xfs_alloc_mark_busy - add to the per-ag busy list 2491 * xfs_alloc_clear_busy - remove an item from the per-ag busy list 2492 */ 2493void 2494xfs_alloc_mark_busy(xfs_trans_t *tp, 2495 xfs_agnumber_t agno, 2496 xfs_agblock_t bno, 2497 xfs_extlen_t len) 2498{ 2499 xfs_mount_t *mp; 2500 xfs_perag_busy_t *bsy; 2501 int n; 2502 SPLDECL(s); 2503 2504 mp = tp->t_mountp; 2505 s = mutex_spinlock(&mp->m_perag[agno].pagb_lock); 2506 2507 /* search pagb_list for an open slot */ 2508 for (bsy = mp->m_perag[agno].pagb_list, n = 0; 2509 n < XFS_PAGB_NUM_SLOTS; 2510 bsy++, n++) { 2511 if (bsy->busy_tp == NULL) { 2512 break; 2513 } 2514 } 2515 2516 if (n < XFS_PAGB_NUM_SLOTS) { 2517 bsy = &mp->m_perag[agno].pagb_list[n]; 2518 mp->m_perag[agno].pagb_count++; 2519 TRACE_BUSY("xfs_alloc_mark_busy", "got", agno, bno, len, n, tp); 2520 bsy->busy_start = bno; 2521 bsy->busy_length = len; 2522 bsy->busy_tp = tp; 2523 xfs_trans_add_busy(tp, agno, n); 2524 } else { 2525 TRACE_BUSY("xfs_alloc_mark_busy", "FULL", agno, bno, len, -1, tp); 2526 /* 2527 * The busy list is full! Since it is now not possible to 2528 * track the free block, make this a synchronous transaction 2529 * to insure that the block is not reused before this 2530 * transaction commits. 2531 */ 2532 xfs_trans_set_sync(tp); 2533 } 2534 2535 mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s); 2536} 2537 2538void 2539xfs_alloc_clear_busy(xfs_trans_t *tp, 2540 xfs_agnumber_t agno, 2541 int idx) 2542{ 2543 xfs_mount_t *mp; 2544 xfs_perag_busy_t *list; 2545 SPLDECL(s); 2546 2547 mp = tp->t_mountp; 2548 2549 s = mutex_spinlock(&mp->m_perag[agno].pagb_lock); 2550 list = mp->m_perag[agno].pagb_list; 2551 2552 ASSERT(idx < XFS_PAGB_NUM_SLOTS); 2553 if (list[idx].busy_tp == tp) { 2554 TRACE_UNBUSY("xfs_alloc_clear_busy", "found", agno, idx, tp); 2555 list[idx].busy_tp = NULL; 2556 mp->m_perag[agno].pagb_count--; 2557 } else { 2558 TRACE_UNBUSY("xfs_alloc_clear_busy", "missing", agno, idx, tp); 2559 } 2560 2561 mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s); 2562} 2563 2564 2565/* 2566 * returns non-zero if any of (agno,bno):len is in a busy list 2567 */ 2568STATIC int 2569xfs_alloc_search_busy(xfs_trans_t *tp, 2570 xfs_agnumber_t agno, 2571 xfs_agblock_t bno, 2572 xfs_extlen_t len) 2573{ 2574 xfs_mount_t *mp; 2575 xfs_perag_busy_t *bsy; 2576 int n; 2577 xfs_agblock_t uend, bend; 2578 xfs_lsn_t lsn; 2579 int cnt; 2580 SPLDECL(s); 2581 2582 mp = tp->t_mountp; 2583 2584 s = mutex_spinlock(&mp->m_perag[agno].pagb_lock); 2585 cnt = mp->m_perag[agno].pagb_count; 2586 2587 uend = bno + len - 1; 2588 2589 /* search pagb_list for this slot, skipping open slots */ 2590 for (bsy = mp->m_perag[agno].pagb_list, n = 0; 2591 cnt; bsy++, n++) { 2592 2593 /* 2594 * (start1,length1) within (start2, length2) 2595 */ 2596 if (bsy->busy_tp != NULL) { 2597 bend = bsy->busy_start + bsy->busy_length - 1; 2598 if ((bno > bend) || 2599 (uend < bsy->busy_start)) { 2600 cnt--; 2601 } else { 2602 TRACE_BUSYSEARCH("xfs_alloc_search_busy", 2603 "found1", agno, bno, len, n, 2604 tp); 2605 break; 2606 } 2607 } 2608 } 2609 2610 /* 2611 * If a block was found, force the log through the LSN of the 2612 * transaction that freed the block 2613 */ 2614 if (cnt) { 2615 TRACE_BUSYSEARCH("xfs_alloc_search_busy", "found", agno, bno, len, n, tp); 2616 lsn = bsy->busy_tp->t_commit_lsn; 2617 mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s); 2618 xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC); 2619 } else { 2620 TRACE_BUSYSEARCH("xfs_alloc_search_busy", "not-found", agno, bno, len, n, tp); 2621 n = -1; 2622 mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s); 2623 } 2624 2625 return n; 2626} 2627