1// SPDX-License-Identifier: GPL-2.0+ 2/* 3 * Copyright (C) 2016 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6#include "xfs.h" 7#include "xfs_fs.h" 8#include "xfs_format.h" 9#include "xfs_log_format.h" 10#include "xfs_trans_resv.h" 11#include "xfs_bit.h" 12#include "xfs_shared.h" 13#include "xfs_mount.h" 14#include "xfs_defer.h" 15#include "xfs_trans.h" 16#include "xfs_trans_priv.h" 17#include "xfs_refcount_item.h" 18#include "xfs_log.h" 19#include "xfs_refcount.h" 20#include "xfs_error.h" 21#include "xfs_log_priv.h" 22#include "xfs_log_recover.h" 23#include "xfs_ag.h" 24 25struct kmem_cache *xfs_cui_cache; 26struct kmem_cache *xfs_cud_cache; 27 28static const struct xfs_item_ops xfs_cui_item_ops; 29 30static inline struct xfs_cui_log_item *CUI_ITEM(struct xfs_log_item *lip) 31{ 32 return container_of(lip, struct xfs_cui_log_item, cui_item); 33} 34 35STATIC void 36xfs_cui_item_free( 37 struct xfs_cui_log_item *cuip) 38{ 39 kvfree(cuip->cui_item.li_lv_shadow); 40 if (cuip->cui_format.cui_nextents > XFS_CUI_MAX_FAST_EXTENTS) 41 kfree(cuip); 42 else 43 kmem_cache_free(xfs_cui_cache, cuip); 44} 45 46/* 47 * Freeing the CUI requires that we remove it from the AIL if it has already 48 * been placed there. However, the CUI may not yet have been placed in the AIL 49 * when called by xfs_cui_release() from CUD processing due to the ordering of 50 * committed vs unpin operations in bulk insert operations. Hence the reference 51 * count to ensure only the last caller frees the CUI. 52 */ 53STATIC void 54xfs_cui_release( 55 struct xfs_cui_log_item *cuip) 56{ 57 ASSERT(atomic_read(&cuip->cui_refcount) > 0); 58 if (!atomic_dec_and_test(&cuip->cui_refcount)) 59 return; 60 61 xfs_trans_ail_delete(&cuip->cui_item, 0); 62 xfs_cui_item_free(cuip); 63} 64 65 66STATIC void 67xfs_cui_item_size( 68 struct xfs_log_item *lip, 69 int *nvecs, 70 int *nbytes) 71{ 72 struct xfs_cui_log_item *cuip = CUI_ITEM(lip); 73 74 *nvecs += 1; 75 *nbytes += xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents); 76} 77 78/* 79 * This is called to fill in the vector of log iovecs for the 80 * given cui log item. We use only 1 iovec, and we point that 81 * at the cui_log_format structure embedded in the cui item. 82 * It is at this point that we assert that all of the extent 83 * slots in the cui item have been filled. 84 */ 85STATIC void 86xfs_cui_item_format( 87 struct xfs_log_item *lip, 88 struct xfs_log_vec *lv) 89{ 90 struct xfs_cui_log_item *cuip = CUI_ITEM(lip); 91 struct xfs_log_iovec *vecp = NULL; 92 93 ASSERT(atomic_read(&cuip->cui_next_extent) == 94 cuip->cui_format.cui_nextents); 95 96 cuip->cui_format.cui_type = XFS_LI_CUI; 97 cuip->cui_format.cui_size = 1; 98 99 xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUI_FORMAT, &cuip->cui_format, 100 xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents)); 101} 102 103/* 104 * The unpin operation is the last place an CUI is manipulated in the log. It is 105 * either inserted in the AIL or aborted in the event of a log I/O error. In 106 * either case, the CUI transaction has been successfully committed to make it 107 * this far. Therefore, we expect whoever committed the CUI to either construct 108 * and commit the CUD or drop the CUD's reference in the event of error. Simply 109 * drop the log's CUI reference now that the log is done with it. 110 */ 111STATIC void 112xfs_cui_item_unpin( 113 struct xfs_log_item *lip, 114 int remove) 115{ 116 struct xfs_cui_log_item *cuip = CUI_ITEM(lip); 117 118 xfs_cui_release(cuip); 119} 120 121/* 122 * The CUI has been either committed or aborted if the transaction has been 123 * cancelled. If the transaction was cancelled, an CUD isn't going to be 124 * constructed and thus we free the CUI here directly. 125 */ 126STATIC void 127xfs_cui_item_release( 128 struct xfs_log_item *lip) 129{ 130 xfs_cui_release(CUI_ITEM(lip)); 131} 132 133/* 134 * Allocate and initialize an cui item with the given number of extents. 135 */ 136STATIC struct xfs_cui_log_item * 137xfs_cui_init( 138 struct xfs_mount *mp, 139 uint nextents) 140 141{ 142 struct xfs_cui_log_item *cuip; 143 144 ASSERT(nextents > 0); 145 if (nextents > XFS_CUI_MAX_FAST_EXTENTS) 146 cuip = kzalloc(xfs_cui_log_item_sizeof(nextents), 147 GFP_KERNEL | __GFP_NOFAIL); 148 else 149 cuip = kmem_cache_zalloc(xfs_cui_cache, 150 GFP_KERNEL | __GFP_NOFAIL); 151 152 xfs_log_item_init(mp, &cuip->cui_item, XFS_LI_CUI, &xfs_cui_item_ops); 153 cuip->cui_format.cui_nextents = nextents; 154 cuip->cui_format.cui_id = (uintptr_t)(void *)cuip; 155 atomic_set(&cuip->cui_next_extent, 0); 156 atomic_set(&cuip->cui_refcount, 2); 157 158 return cuip; 159} 160 161static inline struct xfs_cud_log_item *CUD_ITEM(struct xfs_log_item *lip) 162{ 163 return container_of(lip, struct xfs_cud_log_item, cud_item); 164} 165 166STATIC void 167xfs_cud_item_size( 168 struct xfs_log_item *lip, 169 int *nvecs, 170 int *nbytes) 171{ 172 *nvecs += 1; 173 *nbytes += sizeof(struct xfs_cud_log_format); 174} 175 176/* 177 * This is called to fill in the vector of log iovecs for the 178 * given cud log item. We use only 1 iovec, and we point that 179 * at the cud_log_format structure embedded in the cud item. 180 * It is at this point that we assert that all of the extent 181 * slots in the cud item have been filled. 182 */ 183STATIC void 184xfs_cud_item_format( 185 struct xfs_log_item *lip, 186 struct xfs_log_vec *lv) 187{ 188 struct xfs_cud_log_item *cudp = CUD_ITEM(lip); 189 struct xfs_log_iovec *vecp = NULL; 190 191 cudp->cud_format.cud_type = XFS_LI_CUD; 192 cudp->cud_format.cud_size = 1; 193 194 xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUD_FORMAT, &cudp->cud_format, 195 sizeof(struct xfs_cud_log_format)); 196} 197 198/* 199 * The CUD is either committed or aborted if the transaction is cancelled. If 200 * the transaction is cancelled, drop our reference to the CUI and free the 201 * CUD. 202 */ 203STATIC void 204xfs_cud_item_release( 205 struct xfs_log_item *lip) 206{ 207 struct xfs_cud_log_item *cudp = CUD_ITEM(lip); 208 209 xfs_cui_release(cudp->cud_cuip); 210 kvfree(cudp->cud_item.li_lv_shadow); 211 kmem_cache_free(xfs_cud_cache, cudp); 212} 213 214static struct xfs_log_item * 215xfs_cud_item_intent( 216 struct xfs_log_item *lip) 217{ 218 return &CUD_ITEM(lip)->cud_cuip->cui_item; 219} 220 221static const struct xfs_item_ops xfs_cud_item_ops = { 222 .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED | 223 XFS_ITEM_INTENT_DONE, 224 .iop_size = xfs_cud_item_size, 225 .iop_format = xfs_cud_item_format, 226 .iop_release = xfs_cud_item_release, 227 .iop_intent = xfs_cud_item_intent, 228}; 229 230/* Sort refcount intents by AG. */ 231static int 232xfs_refcount_update_diff_items( 233 void *priv, 234 const struct list_head *a, 235 const struct list_head *b) 236{ 237 struct xfs_refcount_intent *ra; 238 struct xfs_refcount_intent *rb; 239 240 ra = container_of(a, struct xfs_refcount_intent, ri_list); 241 rb = container_of(b, struct xfs_refcount_intent, ri_list); 242 243 return ra->ri_pag->pag_agno - rb->ri_pag->pag_agno; 244} 245 246/* Set the phys extent flags for this reverse mapping. */ 247static void 248xfs_trans_set_refcount_flags( 249 struct xfs_phys_extent *pmap, 250 enum xfs_refcount_intent_type type) 251{ 252 pmap->pe_flags = 0; 253 switch (type) { 254 case XFS_REFCOUNT_INCREASE: 255 case XFS_REFCOUNT_DECREASE: 256 case XFS_REFCOUNT_ALLOC_COW: 257 case XFS_REFCOUNT_FREE_COW: 258 pmap->pe_flags |= type; 259 break; 260 default: 261 ASSERT(0); 262 } 263} 264 265/* Log refcount updates in the intent item. */ 266STATIC void 267xfs_refcount_update_log_item( 268 struct xfs_trans *tp, 269 struct xfs_cui_log_item *cuip, 270 struct xfs_refcount_intent *ri) 271{ 272 uint next_extent; 273 struct xfs_phys_extent *pmap; 274 275 /* 276 * atomic_inc_return gives us the value after the increment; 277 * we want to use it as an array index so we need to subtract 1 from 278 * it. 279 */ 280 next_extent = atomic_inc_return(&cuip->cui_next_extent) - 1; 281 ASSERT(next_extent < cuip->cui_format.cui_nextents); 282 pmap = &cuip->cui_format.cui_extents[next_extent]; 283 pmap->pe_startblock = ri->ri_startblock; 284 pmap->pe_len = ri->ri_blockcount; 285 xfs_trans_set_refcount_flags(pmap, ri->ri_type); 286} 287 288static struct xfs_log_item * 289xfs_refcount_update_create_intent( 290 struct xfs_trans *tp, 291 struct list_head *items, 292 unsigned int count, 293 bool sort) 294{ 295 struct xfs_mount *mp = tp->t_mountp; 296 struct xfs_cui_log_item *cuip = xfs_cui_init(mp, count); 297 struct xfs_refcount_intent *ri; 298 299 ASSERT(count > 0); 300 301 if (sort) 302 list_sort(mp, items, xfs_refcount_update_diff_items); 303 list_for_each_entry(ri, items, ri_list) 304 xfs_refcount_update_log_item(tp, cuip, ri); 305 return &cuip->cui_item; 306} 307 308/* Get an CUD so we can process all the deferred refcount updates. */ 309static struct xfs_log_item * 310xfs_refcount_update_create_done( 311 struct xfs_trans *tp, 312 struct xfs_log_item *intent, 313 unsigned int count) 314{ 315 struct xfs_cui_log_item *cuip = CUI_ITEM(intent); 316 struct xfs_cud_log_item *cudp; 317 318 cudp = kmem_cache_zalloc(xfs_cud_cache, GFP_KERNEL | __GFP_NOFAIL); 319 xfs_log_item_init(tp->t_mountp, &cudp->cud_item, XFS_LI_CUD, 320 &xfs_cud_item_ops); 321 cudp->cud_cuip = cuip; 322 cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id; 323 324 return &cudp->cud_item; 325} 326 327/* Take a passive ref to the AG containing the space we're refcounting. */ 328void 329xfs_refcount_update_get_group( 330 struct xfs_mount *mp, 331 struct xfs_refcount_intent *ri) 332{ 333 xfs_agnumber_t agno; 334 335 agno = XFS_FSB_TO_AGNO(mp, ri->ri_startblock); 336 ri->ri_pag = xfs_perag_intent_get(mp, agno); 337} 338 339/* Release a passive AG ref after finishing refcounting work. */ 340static inline void 341xfs_refcount_update_put_group( 342 struct xfs_refcount_intent *ri) 343{ 344 xfs_perag_intent_put(ri->ri_pag); 345} 346 347/* Process a deferred refcount update. */ 348STATIC int 349xfs_refcount_update_finish_item( 350 struct xfs_trans *tp, 351 struct xfs_log_item *done, 352 struct list_head *item, 353 struct xfs_btree_cur **state) 354{ 355 struct xfs_refcount_intent *ri; 356 int error; 357 358 ri = container_of(item, struct xfs_refcount_intent, ri_list); 359 360 /* Did we run out of reservation? Requeue what we didn't finish. */ 361 error = xfs_refcount_finish_one(tp, ri, state); 362 if (!error && ri->ri_blockcount > 0) { 363 ASSERT(ri->ri_type == XFS_REFCOUNT_INCREASE || 364 ri->ri_type == XFS_REFCOUNT_DECREASE); 365 return -EAGAIN; 366 } 367 368 xfs_refcount_update_put_group(ri); 369 kmem_cache_free(xfs_refcount_intent_cache, ri); 370 return error; 371} 372 373/* Abort all pending CUIs. */ 374STATIC void 375xfs_refcount_update_abort_intent( 376 struct xfs_log_item *intent) 377{ 378 xfs_cui_release(CUI_ITEM(intent)); 379} 380 381/* Cancel a deferred refcount update. */ 382STATIC void 383xfs_refcount_update_cancel_item( 384 struct list_head *item) 385{ 386 struct xfs_refcount_intent *ri; 387 388 ri = container_of(item, struct xfs_refcount_intent, ri_list); 389 390 xfs_refcount_update_put_group(ri); 391 kmem_cache_free(xfs_refcount_intent_cache, ri); 392} 393 394/* Is this recovered CUI ok? */ 395static inline bool 396xfs_cui_validate_phys( 397 struct xfs_mount *mp, 398 struct xfs_phys_extent *pmap) 399{ 400 if (!xfs_has_reflink(mp)) 401 return false; 402 403 if (pmap->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS) 404 return false; 405 406 switch (pmap->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK) { 407 case XFS_REFCOUNT_INCREASE: 408 case XFS_REFCOUNT_DECREASE: 409 case XFS_REFCOUNT_ALLOC_COW: 410 case XFS_REFCOUNT_FREE_COW: 411 break; 412 default: 413 return false; 414 } 415 416 return xfs_verify_fsbext(mp, pmap->pe_startblock, pmap->pe_len); 417} 418 419static inline void 420xfs_cui_recover_work( 421 struct xfs_mount *mp, 422 struct xfs_defer_pending *dfp, 423 struct xfs_phys_extent *pmap) 424{ 425 struct xfs_refcount_intent *ri; 426 427 ri = kmem_cache_alloc(xfs_refcount_intent_cache, 428 GFP_KERNEL | __GFP_NOFAIL); 429 ri->ri_type = pmap->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK; 430 ri->ri_startblock = pmap->pe_startblock; 431 ri->ri_blockcount = pmap->pe_len; 432 xfs_refcount_update_get_group(mp, ri); 433 434 xfs_defer_add_item(dfp, &ri->ri_list); 435} 436 437/* 438 * Process a refcount update intent item that was recovered from the log. 439 * We need to update the refcountbt. 440 */ 441STATIC int 442xfs_refcount_recover_work( 443 struct xfs_defer_pending *dfp, 444 struct list_head *capture_list) 445{ 446 struct xfs_trans_res resv; 447 struct xfs_log_item *lip = dfp->dfp_intent; 448 struct xfs_cui_log_item *cuip = CUI_ITEM(lip); 449 struct xfs_trans *tp; 450 struct xfs_mount *mp = lip->li_log->l_mp; 451 int i; 452 int error = 0; 453 454 /* 455 * First check the validity of the extents described by the 456 * CUI. If any are bad, then assume that all are bad and 457 * just toss the CUI. 458 */ 459 for (i = 0; i < cuip->cui_format.cui_nextents; i++) { 460 if (!xfs_cui_validate_phys(mp, 461 &cuip->cui_format.cui_extents[i])) { 462 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, 463 &cuip->cui_format, 464 sizeof(cuip->cui_format)); 465 return -EFSCORRUPTED; 466 } 467 468 xfs_cui_recover_work(mp, dfp, &cuip->cui_format.cui_extents[i]); 469 } 470 471 /* 472 * Under normal operation, refcount updates are deferred, so we 473 * wouldn't be adding them directly to a transaction. All 474 * refcount updates manage reservation usage internally and 475 * dynamically by deferring work that won't fit in the 476 * transaction. Normally, any work that needs to be deferred 477 * gets attached to the same defer_ops that scheduled the 478 * refcount update. However, we're in log recovery here, so we 479 * use the passed in defer_ops and to finish up any work that 480 * doesn't fit. We need to reserve enough blocks to handle a 481 * full btree split on either end of the refcount range. 482 */ 483 resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate); 484 error = xfs_trans_alloc(mp, &resv, mp->m_refc_maxlevels * 2, 0, 485 XFS_TRANS_RESERVE, &tp); 486 if (error) 487 return error; 488 489 error = xlog_recover_finish_intent(tp, dfp); 490 if (error == -EFSCORRUPTED) 491 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, 492 &cuip->cui_format, 493 sizeof(cuip->cui_format)); 494 if (error) 495 goto abort_error; 496 497 return xfs_defer_ops_capture_and_commit(tp, capture_list); 498 499abort_error: 500 xfs_trans_cancel(tp); 501 return error; 502} 503 504/* Relog an intent item to push the log tail forward. */ 505static struct xfs_log_item * 506xfs_refcount_relog_intent( 507 struct xfs_trans *tp, 508 struct xfs_log_item *intent, 509 struct xfs_log_item *done_item) 510{ 511 struct xfs_cui_log_item *cuip; 512 struct xfs_phys_extent *pmap; 513 unsigned int count; 514 515 count = CUI_ITEM(intent)->cui_format.cui_nextents; 516 pmap = CUI_ITEM(intent)->cui_format.cui_extents; 517 518 cuip = xfs_cui_init(tp->t_mountp, count); 519 memcpy(cuip->cui_format.cui_extents, pmap, count * sizeof(*pmap)); 520 atomic_set(&cuip->cui_next_extent, count); 521 522 return &cuip->cui_item; 523} 524 525const struct xfs_defer_op_type xfs_refcount_update_defer_type = { 526 .name = "refcount", 527 .max_items = XFS_CUI_MAX_FAST_EXTENTS, 528 .create_intent = xfs_refcount_update_create_intent, 529 .abort_intent = xfs_refcount_update_abort_intent, 530 .create_done = xfs_refcount_update_create_done, 531 .finish_item = xfs_refcount_update_finish_item, 532 .finish_cleanup = xfs_refcount_finish_one_cleanup, 533 .cancel_item = xfs_refcount_update_cancel_item, 534 .recover_work = xfs_refcount_recover_work, 535 .relog_intent = xfs_refcount_relog_intent, 536}; 537 538STATIC bool 539xfs_cui_item_match( 540 struct xfs_log_item *lip, 541 uint64_t intent_id) 542{ 543 return CUI_ITEM(lip)->cui_format.cui_id == intent_id; 544} 545 546static const struct xfs_item_ops xfs_cui_item_ops = { 547 .flags = XFS_ITEM_INTENT, 548 .iop_size = xfs_cui_item_size, 549 .iop_format = xfs_cui_item_format, 550 .iop_unpin = xfs_cui_item_unpin, 551 .iop_release = xfs_cui_item_release, 552 .iop_match = xfs_cui_item_match, 553}; 554 555static inline void 556xfs_cui_copy_format( 557 struct xfs_cui_log_format *dst, 558 const struct xfs_cui_log_format *src) 559{ 560 unsigned int i; 561 562 memcpy(dst, src, offsetof(struct xfs_cui_log_format, cui_extents)); 563 564 for (i = 0; i < src->cui_nextents; i++) 565 memcpy(&dst->cui_extents[i], &src->cui_extents[i], 566 sizeof(struct xfs_phys_extent)); 567} 568 569/* 570 * This routine is called to create an in-core extent refcount update 571 * item from the cui format structure which was logged on disk. 572 * It allocates an in-core cui, copies the extents from the format 573 * structure into it, and adds the cui to the AIL with the given 574 * LSN. 575 */ 576STATIC int 577xlog_recover_cui_commit_pass2( 578 struct xlog *log, 579 struct list_head *buffer_list, 580 struct xlog_recover_item *item, 581 xfs_lsn_t lsn) 582{ 583 struct xfs_mount *mp = log->l_mp; 584 struct xfs_cui_log_item *cuip; 585 struct xfs_cui_log_format *cui_formatp; 586 size_t len; 587 588 cui_formatp = item->ri_buf[0].i_addr; 589 590 if (item->ri_buf[0].i_len < xfs_cui_log_format_sizeof(0)) { 591 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, 592 item->ri_buf[0].i_addr, item->ri_buf[0].i_len); 593 return -EFSCORRUPTED; 594 } 595 596 len = xfs_cui_log_format_sizeof(cui_formatp->cui_nextents); 597 if (item->ri_buf[0].i_len != len) { 598 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, 599 item->ri_buf[0].i_addr, item->ri_buf[0].i_len); 600 return -EFSCORRUPTED; 601 } 602 603 cuip = xfs_cui_init(mp, cui_formatp->cui_nextents); 604 xfs_cui_copy_format(&cuip->cui_format, cui_formatp); 605 atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents); 606 607 xlog_recover_intent_item(log, &cuip->cui_item, lsn, 608 &xfs_refcount_update_defer_type); 609 return 0; 610} 611 612const struct xlog_recover_item_ops xlog_cui_item_ops = { 613 .item_type = XFS_LI_CUI, 614 .commit_pass2 = xlog_recover_cui_commit_pass2, 615}; 616 617/* 618 * This routine is called when an CUD format structure is found in a committed 619 * transaction in the log. Its purpose is to cancel the corresponding CUI if it 620 * was still in the log. To do this it searches the AIL for the CUI with an id 621 * equal to that in the CUD format structure. If we find it we drop the CUD 622 * reference, which removes the CUI from the AIL and frees it. 623 */ 624STATIC int 625xlog_recover_cud_commit_pass2( 626 struct xlog *log, 627 struct list_head *buffer_list, 628 struct xlog_recover_item *item, 629 xfs_lsn_t lsn) 630{ 631 struct xfs_cud_log_format *cud_formatp; 632 633 cud_formatp = item->ri_buf[0].i_addr; 634 if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) { 635 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, 636 item->ri_buf[0].i_addr, item->ri_buf[0].i_len); 637 return -EFSCORRUPTED; 638 } 639 640 xlog_recover_release_intent(log, XFS_LI_CUI, cud_formatp->cud_cui_id); 641 return 0; 642} 643 644const struct xlog_recover_item_ops xlog_cud_item_ops = { 645 .item_type = XFS_LI_CUD, 646 .commit_pass2 = xlog_recover_cud_commit_pass2, 647}; 648