1/* 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18#include "xfs.h" 19#include "xfs_fs.h" 20#include "xfs_types.h" 21#include "xfs_bit.h" 22#include "xfs_log.h" 23#include "xfs_inum.h" 24#include "xfs_trans.h" 25#include "xfs_sb.h" 26#include "xfs_ag.h" 27#include "xfs_dir2.h" 28#include "xfs_dmapi.h" 29#include "xfs_mount.h" 30#include "xfs_bmap_btree.h" 31#include "xfs_alloc_btree.h" 32#include "xfs_ialloc_btree.h" 33#include "xfs_dir2_sf.h" 34#include "xfs_attr_sf.h" 35#include "xfs_dinode.h" 36#include "xfs_inode.h" 37#include "xfs_inode_item.h" 38#include "xfs_bmap.h" 39#include "xfs_btree.h" 40#include "xfs_ialloc.h" 41#include "xfs_itable.h" 42#include "xfs_dfrag.h" 43#include "xfs_error.h" 44#include "xfs_rw.h" 45 46/* 47 * Syssgi interface for swapext 48 */ 49int 50xfs_swapext( 51 xfs_swapext_t __user *sxu) 52{ 53 xfs_swapext_t *sxp; 54 xfs_inode_t *ip=NULL, *tip=NULL; 55 xfs_mount_t *mp; 56 struct file *fp = NULL, *tfp = NULL; 57 bhv_vnode_t *vp, *tvp; 58 int error = 0; 59 60 sxp = kmem_alloc(sizeof(xfs_swapext_t), KM_MAYFAIL); 61 if (!sxp) { 62 error = XFS_ERROR(ENOMEM); 63 goto error0; 64 } 65 66 if (copy_from_user(sxp, sxu, sizeof(xfs_swapext_t))) { 67 error = XFS_ERROR(EFAULT); 68 goto error0; 69 } 70 71 /* Pull information for the target fd */ 72 if (((fp = fget((int)sxp->sx_fdtarget)) == NULL) || 73 ((vp = vn_from_inode(fp->f_path.dentry->d_inode)) == NULL)) { 74 error = XFS_ERROR(EINVAL); 75 goto error0; 76 } 77 78 ip = xfs_vtoi(vp); 79 if (ip == NULL) { 80 error = XFS_ERROR(EBADF); 81 goto error0; 82 } 83 84 if (((tfp = fget((int)sxp->sx_fdtmp)) == NULL) || 85 ((tvp = vn_from_inode(tfp->f_path.dentry->d_inode)) == NULL)) { 86 error = XFS_ERROR(EINVAL); 87 goto error0; 88 } 89 90 tip = xfs_vtoi(tvp); 91 if (tip == NULL) { 92 error = XFS_ERROR(EBADF); 93 goto error0; 94 } 95 96 if (ip->i_mount != tip->i_mount) { 97 error = XFS_ERROR(EINVAL); 98 goto error0; 99 } 100 101 if (ip->i_ino == tip->i_ino) { 102 error = XFS_ERROR(EINVAL); 103 goto error0; 104 } 105 106 mp = ip->i_mount; 107 108 if (XFS_FORCED_SHUTDOWN(mp)) { 109 error = XFS_ERROR(EIO); 110 goto error0; 111 } 112 113 error = XFS_SWAP_EXTENTS(mp, &ip->i_iocore, &tip->i_iocore, sxp); 114 115 error0: 116 if (fp != NULL) 117 fput(fp); 118 if (tfp != NULL) 119 fput(tfp); 120 121 if (sxp != NULL) 122 kmem_free(sxp, sizeof(xfs_swapext_t)); 123 124 return error; 125} 126 127int 128xfs_swap_extents( 129 xfs_inode_t *ip, 130 xfs_inode_t *tip, 131 xfs_swapext_t *sxp) 132{ 133 xfs_mount_t *mp; 134 xfs_inode_t *ips[2]; 135 xfs_trans_t *tp; 136 xfs_bstat_t *sbp = &sxp->sx_stat; 137 bhv_vnode_t *vp, *tvp; 138 xfs_ifork_t *tempifp, *ifp, *tifp; 139 int ilf_fields, tilf_fields; 140 static uint lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL; 141 int error = 0; 142 int aforkblks = 0; 143 int taforkblks = 0; 144 __uint64_t tmp; 145 char locked = 0; 146 147 mp = ip->i_mount; 148 149 tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); 150 if (!tempifp) { 151 error = XFS_ERROR(ENOMEM); 152 goto error0; 153 } 154 155 sbp = &sxp->sx_stat; 156 vp = XFS_ITOV(ip); 157 tvp = XFS_ITOV(tip); 158 159 /* Lock in i_ino order */ 160 if (ip->i_ino < tip->i_ino) { 161 ips[0] = ip; 162 ips[1] = tip; 163 } else { 164 ips[0] = tip; 165 ips[1] = ip; 166 } 167 168 xfs_lock_inodes(ips, 2, 0, lock_flags); 169 locked = 1; 170 171 /* Check permissions */ 172 error = xfs_iaccess(ip, S_IWUSR, NULL); 173 if (error) 174 goto error0; 175 176 error = xfs_iaccess(tip, S_IWUSR, NULL); 177 if (error) 178 goto error0; 179 180 /* Verify that both files have the same format */ 181 if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { 182 error = XFS_ERROR(EINVAL); 183 goto error0; 184 } 185 186 /* Verify both files are either real-time or non-realtime */ 187 if ((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 188 (tip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { 189 error = XFS_ERROR(EINVAL); 190 goto error0; 191 } 192 193 /* Should never get a local format */ 194 if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL || 195 tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) { 196 error = XFS_ERROR(EINVAL); 197 goto error0; 198 } 199 200 if (VN_CACHED(tvp) != 0) { 201 xfs_inval_cached_trace(&tip->i_iocore, 0, -1, 0, -1); 202 error = bhv_vop_flushinval_pages(tvp, 0, -1, FI_REMAPF_LOCKED); 203 if (error) 204 goto error0; 205 } 206 207 /* Verify O_DIRECT for ftmp */ 208 if (VN_CACHED(tvp) != 0) { 209 error = XFS_ERROR(EINVAL); 210 goto error0; 211 } 212 213 /* Verify all data are being swapped */ 214 if (sxp->sx_offset != 0 || 215 sxp->sx_length != ip->i_d.di_size || 216 sxp->sx_length != tip->i_d.di_size) { 217 error = XFS_ERROR(EFAULT); 218 goto error0; 219 } 220 221 /* 222 * If the target has extended attributes, the tmp file 223 * must also in order to ensure the correct data fork 224 * format. 225 */ 226 if ( XFS_IFORK_Q(ip) != XFS_IFORK_Q(tip) ) { 227 error = XFS_ERROR(EINVAL); 228 goto error0; 229 } 230 231 /* 232 * Compare the current change & modify times with that 233 * passed in. If they differ, we abort this swap. 234 * This is the mechanism used to ensure the calling 235 * process that the file was not changed out from 236 * under it. 237 */ 238 if ((sbp->bs_ctime.tv_sec != ip->i_d.di_ctime.t_sec) || 239 (sbp->bs_ctime.tv_nsec != ip->i_d.di_ctime.t_nsec) || 240 (sbp->bs_mtime.tv_sec != ip->i_d.di_mtime.t_sec) || 241 (sbp->bs_mtime.tv_nsec != ip->i_d.di_mtime.t_nsec)) { 242 error = XFS_ERROR(EBUSY); 243 goto error0; 244 } 245 246 /* We need to fail if the file is memory mapped. Once we have tossed 247 * all existing pages, the page fault will have no option 248 * but to go to the filesystem for pages. By making the page fault call 249 * vop_read (or write in the case of autogrow) they block on the iolock 250 * until we have switched the extents. 251 */ 252 if (VN_MAPPED(vp)) { 253 error = XFS_ERROR(EBUSY); 254 goto error0; 255 } 256 257 xfs_iunlock(ip, XFS_ILOCK_EXCL); 258 xfs_iunlock(tip, XFS_ILOCK_EXCL); 259 260 /* 261 * There is a race condition here since we gave up the 262 * ilock. However, the data fork will not change since 263 * we have the iolock (locked for truncation too) so we 264 * are safe. We don't really care if non-io related 265 * fields change. 266 */ 267 268 bhv_vop_toss_pages(vp, 0, -1, FI_REMAPF); 269 270 tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); 271 if ((error = xfs_trans_reserve(tp, 0, 272 XFS_ICHANGE_LOG_RES(mp), 0, 273 0, 0))) { 274 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 275 xfs_iunlock(tip, XFS_IOLOCK_EXCL); 276 xfs_trans_cancel(tp, 0); 277 locked = 0; 278 goto error0; 279 } 280 xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL); 281 282 /* 283 * Count the number of extended attribute blocks 284 */ 285 if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) && 286 (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { 287 error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &aforkblks); 288 if (error) { 289 xfs_trans_cancel(tp, 0); 290 goto error0; 291 } 292 } 293 if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) && 294 (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { 295 error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, 296 &taforkblks); 297 if (error) { 298 xfs_trans_cancel(tp, 0); 299 goto error0; 300 } 301 } 302 303 /* 304 * Swap the data forks of the inodes 305 */ 306 ifp = &ip->i_df; 307 tifp = &tip->i_df; 308 *tempifp = *ifp; /* struct copy */ 309 *ifp = *tifp; /* struct copy */ 310 *tifp = *tempifp; /* struct copy */ 311 312 /* 313 * Fix the on-disk inode values 314 */ 315 tmp = (__uint64_t)ip->i_d.di_nblocks; 316 ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks; 317 tip->i_d.di_nblocks = tmp + taforkblks - aforkblks; 318 319 tmp = (__uint64_t) ip->i_d.di_nextents; 320 ip->i_d.di_nextents = tip->i_d.di_nextents; 321 tip->i_d.di_nextents = tmp; 322 323 tmp = (__uint64_t) ip->i_d.di_format; 324 ip->i_d.di_format = tip->i_d.di_format; 325 tip->i_d.di_format = tmp; 326 327 ilf_fields = XFS_ILOG_CORE; 328 329 switch(ip->i_d.di_format) { 330 case XFS_DINODE_FMT_EXTENTS: 331 /* If the extents fit in the inode, fix the 332 * pointer. Otherwise it's already NULL or 333 * pointing to the extent. 334 */ 335 if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) { 336 ifp->if_u1.if_extents = 337 ifp->if_u2.if_inline_ext; 338 } 339 ilf_fields |= XFS_ILOG_DEXT; 340 break; 341 case XFS_DINODE_FMT_BTREE: 342 ilf_fields |= XFS_ILOG_DBROOT; 343 break; 344 } 345 346 tilf_fields = XFS_ILOG_CORE; 347 348 switch(tip->i_d.di_format) { 349 case XFS_DINODE_FMT_EXTENTS: 350 /* If the extents fit in the inode, fix the 351 * pointer. Otherwise it's already NULL or 352 * pointing to the extent. 353 */ 354 if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) { 355 tifp->if_u1.if_extents = 356 tifp->if_u2.if_inline_ext; 357 } 358 tilf_fields |= XFS_ILOG_DEXT; 359 break; 360 case XFS_DINODE_FMT_BTREE: 361 tilf_fields |= XFS_ILOG_DBROOT; 362 break; 363 } 364 365 /* 366 * Increment vnode ref counts since xfs_trans_commit & 367 * xfs_trans_cancel will both unlock the inodes and 368 * decrement the associated ref counts. 369 */ 370 VN_HOLD(vp); 371 VN_HOLD(tvp); 372 373 xfs_trans_ijoin(tp, ip, lock_flags); 374 xfs_trans_ijoin(tp, tip, lock_flags); 375 376 xfs_trans_log_inode(tp, ip, ilf_fields); 377 xfs_trans_log_inode(tp, tip, tilf_fields); 378 379 /* 380 * If this is a synchronous mount, make sure that the 381 * transaction goes to disk before returning to the user. 382 */ 383 if (mp->m_flags & XFS_MOUNT_WSYNC) { 384 xfs_trans_set_sync(tp); 385 } 386 387 error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT); 388 locked = 0; 389 390 error0: 391 if (locked) { 392 xfs_iunlock(ip, lock_flags); 393 xfs_iunlock(tip, lock_flags); 394 } 395 if (tempifp != NULL) 396 kmem_free(tempifp, sizeof(xfs_ifork_t)); 397 return error; 398} 399