Deleted Added
sdiff udiff text old ( 177698 ) new ( 185029 )
full compact
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE

--- 5 unchanged lines hidden (view full) ---

14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident "%Z%%M% %I% %E% SMI"
27
28#include <sys/zfs_context.h>
29#include <sys/spa_impl.h>
30#include <sys/dmu.h>
31#include <sys/dmu_tx.h>
32#include <sys/space_map.h>
33#include <sys/metaslab_impl.h>
34#include <sys/vdev_impl.h>
35#include <sys/zio.h>
36
37uint64_t metaslab_aliquot = 512ULL << 10;
38
39/*
40 * ==========================================================================
41 * Metaslab classes
42 * ==========================================================================
43 */
44metaslab_class_t *
45metaslab_class_create(void)

--- 290 unchanged lines hidden (view full) ---

336
337void
338metaslab_fini(metaslab_t *msp)
339{
340 metaslab_group_t *mg = msp->ms_group;
341 int t;
342
343 vdev_space_update(mg->mg_vd, -msp->ms_map.sm_size,
344 -msp->ms_smo.smo_alloc);
345
346 metaslab_group_remove(mg, msp);
347
348 mutex_enter(&msp->ms_lock);
349
350 space_map_unload(&msp->ms_map);
351 space_map_destroy(&msp->ms_map);
352

--- 176 unchanged lines hidden (view full) ---

529
530 space_map_sync(allocmap, SM_ALLOC, smo, mos, tx);
531 space_map_sync(freemap, SM_FREE, smo, mos, tx);
532
533 mutex_exit(&msp->ms_lock);
534
535 VERIFY(0 == dmu_bonus_hold(mos, smo->smo_object, FTAG, &db));
536 dmu_buf_will_dirty(db, tx);
537 ASSERT3U(db->db_size, ==, sizeof (*smo));
538 bcopy(smo, db->db_data, db->db_size);
539 dmu_buf_rele(db, FTAG);
540
541 dmu_tx_commit(tx);
542}
543
544/*
545 * Called after a transaction group has completely synced to mark
546 * all of the metaslab's free space as usable.

--- 17 unchanged lines hidden (view full) ---

564 */
565 if (freed_map->sm_size == 0) {
566 for (t = 0; t < TXG_SIZE; t++) {
567 space_map_create(&msp->ms_allocmap[t], sm->sm_start,
568 sm->sm_size, sm->sm_shift, sm->sm_lock);
569 space_map_create(&msp->ms_freemap[t], sm->sm_start,
570 sm->sm_size, sm->sm_shift, sm->sm_lock);
571 }
572 vdev_space_update(vd, sm->sm_size, 0);
573 }
574
575 vdev_space_update(vd, 0, smosync->smo_alloc - smo->smo_alloc);
576
577 ASSERT(msp->ms_allocmap[txg & TXG_MASK].sm_space == 0);
578 ASSERT(msp->ms_freemap[txg & TXG_MASK].sm_space == 0);
579
580 /*
581 * If there's a space_map_load() in progress, wait for it to complete
582 * so that we have a consistent view of the in-core space map.
583 * Then, add everything we freed in this txg to the map.

--- 125 unchanged lines hidden (view full) ---

709
710 return (offset);
711}
712
713/*
714 * Allocate a block for the specified i/o.
715 */
716static int
717metaslab_alloc_dva(spa_t *spa, uint64_t psize, dva_t *dva, int d,
718 dva_t *hintdva, uint64_t txg, boolean_t hintdva_avoid)
719{
720 metaslab_group_t *mg, *rotor;
721 metaslab_class_t *mc;
722 vdev_t *vd;
723 int dshift = 3;
724 int all_zero;
725 uint64_t offset = -1ULL;
726 uint64_t asize;
727 uint64_t distance;
728
729 ASSERT(!DVA_IS_VALID(&dva[d]));
730
731 mc = spa_metaslab_class_select(spa);
732
733 /*
734 * Start at the rotor and loop through all mgs until we find something.
735 * Note that there's no locking on mc_rotor or mc_allocated because
736 * nothing actually breaks if we miss a few updates -- we just won't
737 * allocate quite as evenly. It all balances out over time.
738 *
739 * If we are doing ditto or log blocks, try to spread them across

--- 9 unchanged lines hidden (view full) ---

749 *
750 * If we are doing gang blocks (hintdva is non-NULL), try to keep
751 * ourselves on the same vdev as our gang block header. That
752 * way, we can hope for locality in vdev_cache, plus it makes our
753 * fault domains something tractable.
754 */
755 if (hintdva) {
756 vd = vdev_lookup_top(spa, DVA_GET_VDEV(&hintdva[d]));
757 if (hintdva_avoid)
758 mg = vd->vdev_mg->mg_next;
759 else
760 mg = vd->vdev_mg;
761 } else if (d != 0) {
762 vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d - 1]));
763 mg = vd->vdev_mg->mg_next;
764 } else {
765 mg = mc->mc_rotor;
766 }
767 rotor = mg;
768
769top:
770 all_zero = B_TRUE;
771 do {
772 vd = mg->mg_vd;
773
774 distance = vd->vdev_asize >> dshift;
775 if (distance <= (1ULL << vd->vdev_ms_shift))
776 distance = 0;
777 else
778 all_zero = B_FALSE;
779
780 asize = vdev_psize_to_asize(vd, psize);
781 ASSERT(P2PHASE(asize, 1ULL << vd->vdev_ashift) == 0);

--- 31 unchanged lines hidden (view full) ---

813 if (atomic_add_64_nv(&mc->mc_allocated, asize) >=
814 mg->mg_aliquot + mg->mg_bias) {
815 mc->mc_rotor = mg->mg_next;
816 mc->mc_allocated = 0;
817 }
818
819 DVA_SET_VDEV(&dva[d], vd->vdev_id);
820 DVA_SET_OFFSET(&dva[d], offset);
821 DVA_SET_GANG(&dva[d], 0);
822 DVA_SET_ASIZE(&dva[d], asize);
823
824 return (0);
825 }
826 mc->mc_rotor = mg->mg_next;
827 mc->mc_allocated = 0;
828 } while ((mg = mg->mg_next) != rotor);
829
830 if (!all_zero) {
831 dshift++;
832 ASSERT(dshift < 64);
833 goto top;

--- 40 unchanged lines hidden (view full) ---

874 if (now) {
875 space_map_remove(&msp->ms_allocmap[txg & TXG_MASK],
876 offset, size);
877 space_map_free(&msp->ms_map, offset, size);
878 } else {
879 if (msp->ms_freemap[txg & TXG_MASK].sm_space == 0)
880 vdev_dirty(vd, VDD_METASLAB, msp, txg);
881 space_map_add(&msp->ms_freemap[txg & TXG_MASK], offset, size);
882
883 /*
884 * verify that this region is actually allocated in
885 * either a ms_allocmap or the ms_map
886 */
887 if (msp->ms_map.sm_loaded) {
888 boolean_t allocd = B_FALSE;
889 int i;
890
891 if (!space_map_contains(&msp->ms_map, offset, size)) {
892 allocd = B_TRUE;
893 } else {
894 for (i = 0; i < TXG_CONCURRENT_STATES; i++) {
895 space_map_t *sm = &msp->ms_allocmap
896 [(txg - i) & TXG_MASK];
897 if (space_map_contains(sm,
898 offset, size)) {
899 allocd = B_TRUE;
900 break;
901 }
902 }
903 }
904
905 if (!allocd) {
906 zfs_panic_recover("freeing free segment "
907 "(vdev=%llu offset=%llx size=%llx)",
908 (longlong_t)vdev, (longlong_t)offset,
909 (longlong_t)size);
910 }
911 }
912
913
914 }
915
916 mutex_exit(&msp->ms_lock);
917}
918
919/*
920 * Intent log support: upon opening the pool after a crash, notify the SPA
921 * of blocks that the intent log has allocated for immediate write, but

--- 19 unchanged lines hidden (view full) ---

941 msp = vd->vdev_ms[offset >> vd->vdev_ms_shift];
942
943 if (DVA_GET_GANG(dva))
944 size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE);
945
946 mutex_enter(&msp->ms_lock);
947
948 error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY);
949 if (error) {
950 mutex_exit(&msp->ms_lock);
951 return (error);
952 }
953
954 if (msp->ms_allocmap[txg & TXG_MASK].sm_space == 0)
955 vdev_dirty(vd, VDD_METASLAB, msp, txg);
956
957 space_map_claim(&msp->ms_map, offset, size);
958 space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, size);
959
960 mutex_exit(&msp->ms_lock);
961
962 return (0);
963}
964
965int
966metaslab_alloc(spa_t *spa, uint64_t psize, blkptr_t *bp, int ndvas,
967 uint64_t txg, blkptr_t *hintbp, boolean_t hintbp_avoid)
968{
969 dva_t *dva = bp->blk_dva;
970 dva_t *hintdva = hintbp->blk_dva;
971 int d;
972 int error = 0;
973
974 ASSERT(ndvas > 0 && ndvas <= spa_max_replication(spa));
975 ASSERT(BP_GET_NDVAS(bp) == 0);
976 ASSERT(hintbp == NULL || ndvas <= BP_GET_NDVAS(hintbp));
977
978 for (d = 0; d < ndvas; d++) {
979 error = metaslab_alloc_dva(spa, psize, dva, d, hintdva,
980 txg, hintbp_avoid);
981 if (error) {
982 for (d--; d >= 0; d--) {
983 metaslab_free_dva(spa, &dva[d], txg, B_TRUE);
984 bzero(&dva[d], sizeof (dva_t));
985 }
986 return (error);
987 }
988 }
989 ASSERT(error == 0);
990 ASSERT(BP_GET_NDVAS(bp) == ndvas);
991
992 return (0);
993}
994
995void
996metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg, boolean_t now)
997{
998 const dva_t *dva = bp->blk_dva;
999 int ndvas = BP_GET_NDVAS(bp);
1000 int d;
1001
1002 ASSERT(!BP_IS_HOLE(bp));
1003
1004 for (d = 0; d < ndvas; d++)
1005 metaslab_free_dva(spa, &dva[d], txg, now);
1006}
1007
1008int
1009metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg)
1010{
1011 const dva_t *dva = bp->blk_dva;
1012 int ndvas = BP_GET_NDVAS(bp);
1013 int d, error;
1014 int last_error = 0;
1015
1016 ASSERT(!BP_IS_HOLE(bp));
1017
1018 for (d = 0; d < ndvas; d++)
1019 if ((error = metaslab_claim_dva(spa, &dva[d], txg)) != 0)
1020 last_error = error;
1021
1022 return (last_error);
1023}