1/* 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18#include "xfs.h" 19#include "xfs_fs.h" 20#include "xfs_types.h" 21#include "xfs_bit.h" 22#include "xfs_log.h" 23#include "xfs_inum.h" 24#include "xfs_trans.h" 25#include "xfs_sb.h" 26#include "xfs_ag.h" 27#include "xfs_dir.h" 28#include "xfs_dir2.h" 29#include "xfs_dmapi.h" 30#include "xfs_mount.h" 31#include "xfs_bmap_btree.h" 32#include "xfs_alloc_btree.h" 33#include "xfs_ialloc_btree.h" 34#include "xfs_dir_sf.h" 35#include "xfs_dir2_sf.h" 36#include "xfs_attr_sf.h" 37#include "xfs_dinode.h" 38#include "xfs_inode.h" 39#include "xfs_inode_item.h" 40#include "xfs_bmap.h" 41#include "xfs_btree.h" 42#include "xfs_ialloc.h" 43#include "xfs_itable.h" 44#include "xfs_dfrag.h" 45#include "xfs_error.h" 46#include "xfs_mac.h" 47#include "xfs_rw.h" 48 49#include <sys/capability.h> 50#include <sys/file.h> 51 52/* 53 * Syssgi interface for swapext 54 */ 55int 56xfs_swapext( 57 xfs_swapext_t __user *sxu) 58{ 59 xfs_swapext_t *sxp; 60 xfs_inode_t *ip=NULL, *tip=NULL; 61 xfs_mount_t *mp; 62 xfs_vnode_t *vp = NULL, *tvp = NULL; 63 struct vnode *bvp, *btvp; 64 int error = 0; 65 66 sxp = kmem_alloc(sizeof(xfs_swapext_t), KM_MAYFAIL); 67 if (!sxp) { 68 error = XFS_ERROR(ENOMEM); 69 goto error0; 70 } 71 struct thread *td; 72 struct cred *cred; 73 74 td = curthread; 75 cred = td->td_ucred; 76 77 if (copy_from_user(sxp, sxu, sizeof(xfs_swapext_t))) { 78 error = XFS_ERROR(EFAULT); 79 goto error0; 80 } 81 82 /* Pull information for the target fd */ 83 if (fgetvp(td, (int)sxp->sx_fdtarget, CAP_READ | CAP_WRITE, &bvp) 84 != 0) { 85 error = XFS_ERROR(EINVAL); 86 goto error0; 87 } 88 89 vp = VPTOXFSVP(bvp); 90 ip = xfs_vtoi(vp); 91 if (ip == NULL) { 92 error = XFS_ERROR(EBADF); 93 goto error0; 94 } 95 96 if (fgetvp(td, (int)sxp->sx_fdtmp, CAP_READ | CAP_WRITE, &btvp) != 0) { 97 error = XFS_ERROR(EINVAL); 98 goto error0; 99 } 100 101 tvp = VPTOXFSVP(btvp); 102 tip = xfs_vtoi(tvp); 103 if (tip == NULL) { 104 error = XFS_ERROR(EBADF); 105 goto error0; 106 } 107 108 if (ip->i_mount != tip->i_mount) { 109 error = XFS_ERROR(EINVAL); 110 goto error0; 111 } 112 113 if (ip->i_ino == tip->i_ino) { 114 error = XFS_ERROR(EINVAL); 115 goto error0; 116 } 117 118 mp = ip->i_mount; 119 120 if (XFS_FORCED_SHUTDOWN(mp)) { 121 error = XFS_ERROR(EIO); 122 goto error0; 123 } 124 125 error = XFS_SWAP_EXTENTS(mp, &ip->i_iocore, &tip->i_iocore, sxp); 126 127 error0: 128#ifdef RMC 129 if (fp != NULL) 130 fput(fp); 131 if (tfp != NULL) 132 fput(tfp); 133#endif 134 135 if (sxp != NULL) 136 kmem_free(sxp, sizeof(xfs_swapext_t)); 137 138 return error; 139} 140 141int 142xfs_swap_extents( 143 xfs_inode_t *ip, 144 xfs_inode_t *tip, 145 xfs_swapext_t *sxp) 146{ 147 xfs_mount_t *mp; 148 xfs_inode_t *ips[2]; 149 xfs_trans_t *tp; 150 xfs_bstat_t *sbp = &sxp->sx_stat; 151 xfs_vnode_t *vp, *tvp; 152 xfs_ifork_t *tempifp, *ifp, *tifp; 153 int ilf_fields, tilf_fields; 154 static uint lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL; 155 int error = 0; 156 int aforkblks = 0; 157 int taforkblks = 0; 158 __uint64_t tmp; 159 char locked = 0; 160 161 mp = ip->i_mount; 162 163 tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); 164 if (!tempifp) { 165 error = XFS_ERROR(ENOMEM); 166 goto error0; 167 } 168 169 sbp = &sxp->sx_stat; 170 vp = XFS_ITOV(ip); 171 tvp = XFS_ITOV(tip); 172 173 /* Lock in i_ino order */ 174 if (ip->i_ino < tip->i_ino) { 175 ips[0] = ip; 176 ips[1] = tip; 177 } else { 178 ips[0] = tip; 179 ips[1] = ip; 180 } 181 182 xfs_lock_inodes(ips, 2, 0, lock_flags); 183 locked = 1; 184 185 /* Check permissions */ 186 error = xfs_iaccess(ip, VWRITE, NULL); 187 if (error) 188 goto error0; 189 190 error = xfs_iaccess(tip, VWRITE, NULL); 191 if (error) 192 goto error0; 193 194 /* Verify that both files have the same format */ 195 if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { 196 error = XFS_ERROR(EINVAL); 197 goto error0; 198 } 199 200 /* Verify both files are either real-time or non-realtime */ 201 if ((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 202 (tip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { 203 error = XFS_ERROR(EINVAL); 204 goto error0; 205 } 206 207 /* Should never get a local format */ 208 if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL || 209 tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) { 210 error = XFS_ERROR(EINVAL); 211 goto error0; 212 } 213 214 if (VN_CACHED(tvp) != 0) { 215 xfs_inval_cached_trace(&tip->i_iocore, 0, -1, 0, -1); 216 XVOP_FLUSHINVAL_PAGES(tvp, 0, -1, FI_REMAPF_LOCKED); 217 } 218 219 /* Verify O_DIRECT for ftmp */ 220 if (VN_CACHED(tvp) != 0) { 221 error = XFS_ERROR(EINVAL); 222 goto error0; 223 } 224 225 /* Verify all data are being swapped */ 226 if (sxp->sx_offset != 0 || 227 sxp->sx_length != ip->i_d.di_size || 228 sxp->sx_length != tip->i_d.di_size) { 229 error = XFS_ERROR(EFAULT); 230 goto error0; 231 } 232 233 /* 234 * If the target has extended attributes, the tmp file 235 * must also in order to ensure the correct data fork 236 * format. 237 */ 238 if ( XFS_IFORK_Q(ip) != XFS_IFORK_Q(tip) ) { 239 error = XFS_ERROR(EINVAL); 240 goto error0; 241 } 242 243 /* 244 * Compare the current change & modify times with that 245 * passed in. If they differ, we abort this swap. 246 * This is the mechanism used to ensure the calling 247 * process that the file was not changed out from 248 * under it. 249 */ 250 if ((sbp->bs_ctime.tv_sec != ip->i_d.di_ctime.t_sec) || 251 (sbp->bs_ctime.tv_nsec != ip->i_d.di_ctime.t_nsec) || 252 (sbp->bs_mtime.tv_sec != ip->i_d.di_mtime.t_sec) || 253 (sbp->bs_mtime.tv_nsec != ip->i_d.di_mtime.t_nsec)) { 254 error = XFS_ERROR(EBUSY); 255 goto error0; 256 } 257 258 /* We need to fail if the file is memory mapped. Once we have tossed 259 * all existing pages, the page fault will have no option 260 * but to go to the filesystem for pages. By making the page fault call 261 * VOP_READ (or write in the case of autogrow) they block on the iolock 262 * until we have switched the extents. 263 */ 264 if (VN_MAPPED(vp)) { 265 error = XFS_ERROR(EBUSY); 266 goto error0; 267 } 268 269 xfs_iunlock(ip, XFS_ILOCK_EXCL); 270 xfs_iunlock(tip, XFS_ILOCK_EXCL); 271 272 /* 273 * There is a race condition here since we gave up the 274 * ilock. However, the data fork will not change since 275 * we have the iolock (locked for truncation too) so we 276 * are safe. We don't really care if non-io related 277 * fields change. 278 */ 279 280 XVOP_TOSS_PAGES(vp, 0, -1, FI_REMAPF); 281 282 tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); 283 if ((error = xfs_trans_reserve(tp, 0, 284 XFS_ICHANGE_LOG_RES(mp), 0, 285 0, 0))) { 286 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 287 xfs_iunlock(tip, XFS_IOLOCK_EXCL); 288 xfs_trans_cancel(tp, 0); 289 locked = 0; 290 goto error0; 291 } 292 xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL); 293 294 /* 295 * Count the number of extended attribute blocks 296 */ 297 if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) && 298 (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { 299 error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &aforkblks); 300 if (error) { 301 xfs_trans_cancel(tp, 0); 302 goto error0; 303 } 304 } 305 if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) && 306 (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { 307 error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, 308 &taforkblks); 309 if (error) { 310 xfs_trans_cancel(tp, 0); 311 goto error0; 312 } 313 } 314 315 /* 316 * Swap the data forks of the inodes 317 */ 318 ifp = &ip->i_df; 319 tifp = &tip->i_df; 320 *tempifp = *ifp; /* struct copy */ 321 *ifp = *tifp; /* struct copy */ 322 *tifp = *tempifp; /* struct copy */ 323 324 /* 325 * Fix the on-disk inode values 326 */ 327 tmp = (__uint64_t)ip->i_d.di_nblocks; 328 ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks; 329 tip->i_d.di_nblocks = tmp + taforkblks - aforkblks; 330 331 tmp = (__uint64_t) ip->i_d.di_nextents; 332 ip->i_d.di_nextents = tip->i_d.di_nextents; 333 tip->i_d.di_nextents = tmp; 334 335 tmp = (__uint64_t) ip->i_d.di_format; 336 ip->i_d.di_format = tip->i_d.di_format; 337 tip->i_d.di_format = tmp; 338 339 ilf_fields = XFS_ILOG_CORE; 340 341 switch(ip->i_d.di_format) { 342 case XFS_DINODE_FMT_EXTENTS: 343 /* If the extents fit in the inode, fix the 344 * pointer. Otherwise it's already NULL or 345 * pointing to the extent. 346 */ 347 if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) { 348 ifp->if_u1.if_extents = 349 ifp->if_u2.if_inline_ext; 350 } 351 ilf_fields |= XFS_ILOG_DEXT; 352 break; 353 case XFS_DINODE_FMT_BTREE: 354 ilf_fields |= XFS_ILOG_DBROOT; 355 break; 356 } 357 358 tilf_fields = XFS_ILOG_CORE; 359 360 switch(tip->i_d.di_format) { 361 case XFS_DINODE_FMT_EXTENTS: 362 /* If the extents fit in the inode, fix the 363 * pointer. Otherwise it's already NULL or 364 * pointing to the extent. 365 */ 366 if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) { 367 tifp->if_u1.if_extents = 368 tifp->if_u2.if_inline_ext; 369 } 370 tilf_fields |= XFS_ILOG_DEXT; 371 break; 372 case XFS_DINODE_FMT_BTREE: 373 tilf_fields |= XFS_ILOG_DBROOT; 374 break; 375 } 376 377#ifdef XXXKAN /* Not necessary, vnodes are vrefed already by fgetvp */ 378 /* 379 * Increment vnode ref counts since xfs_trans_commit & 380 * xfs_trans_cancel will both unlock the inodes and 381 * decrement the associated ref counts. 382 */ 383 VN_HOLD(vp); 384 VN_HOLD(tvp); 385#endif 386 387 xfs_trans_ijoin(tp, ip, lock_flags); 388 xfs_trans_ijoin(tp, tip, lock_flags); 389 390 xfs_trans_log_inode(tp, ip, ilf_fields); 391 xfs_trans_log_inode(tp, tip, tilf_fields); 392 393 /* 394 * If this is a synchronous mount, make sure that the 395 * transaction goes to disk before returning to the user. 396 */ 397 if (mp->m_flags & XFS_MOUNT_WSYNC) { 398 xfs_trans_set_sync(tp); 399 } 400 401 error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT, NULL); 402 403 locked = 0; 404 405 error0: 406 if (locked) { 407 xfs_iunlock(ip, lock_flags); 408 xfs_iunlock(tip, lock_flags); 409 } 410 if (tempifp != NULL) 411 kmem_free(tempifp, sizeof(xfs_ifork_t)); 412 return error; 413} 414