1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 2001,2008 Oracle. All rights reserved. 5 * 6 * $Id: fop_rec.c,v 12.27 2008/01/31 18:40:43 bostic Exp $ 7 */ 8 9#include "db_config.h" 10 11#include "db_int.h" 12#include "dbinc/db_page.h" 13#include "dbinc/fop.h" 14#include "dbinc/db_am.h" 15#include "dbinc/mp.h" 16#include "dbinc/txn.h" 17 18static int __fop_rename_recover_int 19 __P((ENV *, DBT *, DB_LSN *, db_recops, void *, int)); 20 21/* 22 * The transactional guarantees Berkeley DB provides for file 23 * system level operations (database physical file create, delete, 24 * rename) are based on our understanding of current file system 25 * semantics; a system that does not provide these semantics and 26 * guarantees could be in danger. 27 * 28 * First, as in standard database changes, fsync and fdatasync must 29 * work: when applied to the log file, the records written into the 30 * log must be transferred to stable storage. 31 * 32 * Second, it must not be possible for the log file to be removed 33 * without previous file system level operations being flushed to 34 * stable storage. Berkeley DB applications write log records 35 * describing file system operations into the log, then perform the 36 * file system operation, then commit the enclosing transaction 37 * (which flushes the log file to stable storage). Subsequently, 38 * a database environment checkpoint may make it possible for the 39 * application to remove the log file containing the record of the 40 * file system operation. DB's transactional guarantees for file 41 * system operations require the log file removal not succeed until 42 * all previous filesystem operations have been flushed to stable 43 * storage. In other words, the flush of the log file, or the 44 * removal of the log file, must block until all previous 45 * filesystem operations have been flushed to stable storage. This 46 * semantic is not, as far as we know, required by any existing 47 * standards document, but we have never seen a filesystem where 48 * it does not apply. 49 */ 50 51/* 52 * __fop_create_recover -- 53 * Recovery function for create. 54 * 55 * PUBLIC: int __fop_create_recover 56 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 57 */ 58int 59__fop_create_recover(env, dbtp, lsnp, op, info) 60 ENV *env; 61 DBT *dbtp; 62 DB_LSN *lsnp; 63 db_recops op; 64 void *info; 65{ 66 __fop_create_args *argp; 67 DB_FH *fhp; 68 DBMETA *meta; 69 u_int8_t mbuf[DBMETASIZE]; 70 int ret; 71 char *real_name; 72 73 COMPQUIET(info, NULL); 74 75 real_name = NULL; 76 REC_PRINT(__fop_create_print); 77 REC_NOOP_INTRO(__fop_create_read); 78 meta = (DBMETA *)mbuf; 79 80 if ((ret = __db_appname(env, (APPNAME)argp->appname, 81 (const char *)argp->name.data, 0, NULL, &real_name)) != 0) 82 goto out; 83 84 if (DB_UNDO(op)) { 85 /* 86 * If the file was opened in mpool, we must mark it as 87 * dead via nameop which will also unlink the file. 88 */ 89 if (__os_open(env, real_name, 0, 0, 0, &fhp) == 0) { 90 if (__fop_read_meta(env, 91 real_name, mbuf, DBMETASIZE, fhp, 1, NULL) == 0 && 92 __db_chk_meta(env, NULL, meta, 1) == 0) { 93 if ((ret = __memp_nameop(env, 94 meta->uid, NULL, real_name, NULL, 0)) != 0) 95 goto out; 96 } else 97 goto do_unlink; 98 (void)__os_closehandle(env, fhp); 99 } else 100do_unlink: (void)__os_unlink(env, real_name, 0); 101 } else if (DB_REDO(op)) { 102 if ((ret = __os_open(env, real_name, 0, 103 DB_OSO_CREATE, (int)argp->mode, &fhp)) == 0) 104 (void)__os_closehandle(env, fhp); 105 else 106 goto out; 107 } 108 109 *lsnp = argp->prev_lsn; 110 111out: if (real_name != NULL) 112 __os_free(env, real_name); 113 114 REC_NOOP_CLOSE; 115} 116 117/* 118 * __fop_remove_recover -- 119 * Recovery function for remove. 120 * 121 * PUBLIC: int __fop_remove_recover 122 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 123 */ 124int 125__fop_remove_recover(env, dbtp, lsnp, op, info) 126 ENV *env; 127 DBT *dbtp; 128 DB_LSN *lsnp; 129 db_recops op; 130 void *info; 131{ 132 __fop_remove_args *argp; 133 int ret; 134 char *real_name; 135 136 COMPQUIET(info, NULL); 137 138 real_name = NULL; 139 REC_PRINT(__fop_remove_print); 140 REC_NOOP_INTRO(__fop_remove_read); 141 142 if ((ret = __db_appname(env, (APPNAME)argp->appname, 143 (const char *)argp->name.data, 0, NULL, &real_name)) != 0) 144 goto out; 145 146 /* Its ok if the file is not there. */ 147 if (DB_REDO(op)) 148 (void)__memp_nameop(env, 149 (u_int8_t *)argp->fid.data, NULL, real_name, NULL, 0); 150 151 *lsnp = argp->prev_lsn; 152out: if (real_name != NULL) 153 __os_free(env, real_name); 154 REC_NOOP_CLOSE; 155} 156 157/* 158 * __fop_write_recover -- 159 * Recovery function for writechunk. 160 * 161 * PUBLIC: int __fop_write_recover 162 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 163 */ 164int 165__fop_write_recover(env, dbtp, lsnp, op, info) 166 ENV *env; 167 DBT *dbtp; 168 DB_LSN *lsnp; 169 db_recops op; 170 void *info; 171{ 172 __fop_write_args *argp; 173 int ret; 174 175 COMPQUIET(info, NULL); 176 177 REC_PRINT(__fop_write_print); 178 REC_NOOP_INTRO(__fop_write_read); 179 180 ret = 0; 181 if (DB_UNDO(op)) 182 DB_ASSERT(env, argp->flag != 0); 183 else if (DB_REDO(op)) 184 ret = __fop_write(env, 185 argp->txnp, argp->name.data, (APPNAME)argp->appname, 186 NULL, argp->pgsize, argp->pageno, argp->offset, 187 argp->page.data, argp->page.size, argp->flag, 0); 188 189 if (ret == 0) 190 *lsnp = argp->prev_lsn; 191 REC_NOOP_CLOSE; 192} 193 194/* 195 * __fop_rename_recover -- 196 * Recovery functions for rename. There are two variants that 197 * both use the same utility function. Had we known about this on day 198 * one, we would have simply added a parameter. However, since we need 199 * to retain old records for backward compatibility (online-upgrade) 200 * wrapping the two seems like the right solution. 201 * 202 * PUBLIC: int __fop_rename_recover 203 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 204 * 205 * PUBLIC: int __fop_rename_noundo_recover 206 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 207 */ 208int 209__fop_rename_recover(env, dbtp, lsnp, op, info) 210 ENV *env; 211 DBT *dbtp; 212 DB_LSN *lsnp; 213 db_recops op; 214 void *info; 215{ 216 return (__fop_rename_recover_int(env, dbtp, lsnp, op, info, 1)); 217} 218 219int 220__fop_rename_noundo_recover(env, dbtp, lsnp, op, info) 221 ENV *env; 222 DBT *dbtp; 223 DB_LSN *lsnp; 224 db_recops op; 225 void *info; 226{ 227 return (__fop_rename_recover_int(env, dbtp, lsnp, op, info, 0)); 228} 229 230static int 231__fop_rename_recover_int(env, dbtp, lsnp, op, info, undo) 232 ENV *env; 233 DBT *dbtp; 234 DB_LSN *lsnp; 235 db_recops op; 236 void *info; 237 int undo; 238{ 239 __fop_rename_args *argp; 240 DB_FH *fhp; 241 DBMETA *meta; 242 u_int8_t *fileid, mbuf[DBMETASIZE]; 243 int ret; 244 char *real_new, *real_old, *src; 245 246 COMPQUIET(info, NULL); 247 248 fhp = NULL; 249 meta = (DBMETA *)&mbuf[0]; 250 ret = 0; 251 real_new = real_old = NULL; 252 253 REC_PRINT(__fop_rename_print); 254 REC_NOOP_INTRO(__fop_rename_read); 255 fileid = argp->fileid.data; 256 257 if ((ret = __db_appname(env, (APPNAME)argp->appname, 258 (const char *)argp->newname.data, 0, NULL, &real_new)) != 0) 259 goto out; 260 if ((ret = __db_appname(env, (APPNAME)argp->appname, 261 (const char *)argp->oldname.data, 0, NULL, &real_old)) != 0) 262 goto out; 263 264 /* 265 * Verify that we are manipulating the correct file. We should always 266 * be OK on an ABORT or an APPLY, but during recovery, we have to 267 * check. 268 */ 269 if (op != DB_TXN_ABORT && op != DB_TXN_APPLY) { 270 src = DB_UNDO(op) ? real_new : real_old; 271 /* 272 * Interpret any error as meaning that the file either doesn't 273 * exist, doesn't have a meta-data page, or is in some other 274 * way, shape or form, incorrect, so that we should not restore 275 * it. 276 */ 277 if (__os_open(env, src, 0, 0, 0, &fhp) != 0) 278 goto done; 279 if (__fop_read_meta(env, 280 src, mbuf, DBMETASIZE, fhp, 1, NULL) != 0) 281 goto done; 282 if (__db_chk_meta(env, NULL, meta, 1) != 0) 283 goto done; 284 if (memcmp(argp->fileid.data, meta->uid, DB_FILE_ID_LEN) != 0) 285 goto done; 286 (void)__os_closehandle(env, fhp); 287 fhp = NULL; 288 if (DB_REDO(op)) { 289 /* 290 * Check to see if the target file exists. If it 291 * does and it does not have the proper id then 292 * it is a later version. We just remove the source 293 * file since the state of the world is beyond this 294 * point. 295 */ 296 if (__os_open(env, real_new, 0, 0, 0, &fhp) == 0 && 297 __fop_read_meta(env, src, mbuf, 298 DBMETASIZE, fhp, 1, NULL) == 0 && 299 __db_chk_meta(env, NULL, meta, 1) == 0 && 300 memcmp(argp->fileid.data, 301 meta->uid, DB_FILE_ID_LEN) != 0) { 302 (void)__memp_nameop(env, 303 fileid, NULL, real_old, NULL, 0); 304 goto done; 305 } 306 } 307 } 308 309 if (undo && DB_UNDO(op)) 310 (void)__memp_nameop(env, fileid, 311 (const char *)argp->oldname.data, real_new, real_old, 0); 312 if (DB_REDO(op)) 313 (void)__memp_nameop(env, fileid, 314 (const char *)argp->newname.data, real_old, real_new, 0); 315 316done: *lsnp = argp->prev_lsn; 317out: if (real_new != NULL) 318 __os_free(env, real_new); 319 if (real_old != NULL) 320 __os_free(env, real_old); 321 if (fhp != NULL) 322 (void)__os_closehandle(env, fhp); 323 324 REC_NOOP_CLOSE; 325} 326 327/* 328 * __fop_file_remove_recover -- 329 * Recovery function for file_remove. On the REDO pass, we need to 330 * make sure no one recreated the file while we weren't looking. On an 331 * undo pass must check if the file we are interested in is the one that 332 * exists and then set the status of the child transaction depending on 333 * what we find out. 334 * 335 * PUBLIC: int __fop_file_remove_recover 336 * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); 337 */ 338int 339__fop_file_remove_recover(env, dbtp, lsnp, op, info) 340 ENV *env; 341 DBT *dbtp; 342 DB_LSN *lsnp; 343 db_recops op; 344 void *info; 345{ 346 __fop_file_remove_args *argp; 347 DBMETA *meta; 348 DB_FH *fhp; 349 size_t len; 350 u_int8_t mbuf[DBMETASIZE]; 351 u_int32_t cstat, ret_stat; 352 int is_real, is_tmp, ret; 353 char *real_name; 354 355 fhp = NULL; 356 meta = (DBMETA *)&mbuf[0]; 357 is_real = is_tmp = 0; 358 real_name = NULL; 359 REC_PRINT(__fop_file_remove_print); 360 REC_NOOP_INTRO(__fop_file_remove_read); 361 362 /* 363 * This record is only interesting on the backward, forward, and 364 * apply phases. 365 */ 366 if (op != DB_TXN_BACKWARD_ROLL && 367 op != DB_TXN_FORWARD_ROLL && op != DB_TXN_APPLY) 368 goto done; 369 370 if ((ret = __db_appname(env, 371 (APPNAME)argp->appname, argp->name.data, 0, NULL, &real_name)) != 0) 372 goto out; 373 374 /* Verify that we are manipulating the correct file. */ 375 len = 0; 376 if (__os_open(env, real_name, 0, 0, 0, &fhp) != 0 || 377 (ret = __fop_read_meta(env, real_name, 378 mbuf, DBMETASIZE, fhp, 1, &len)) != 0) { 379 /* 380 * If len is non-zero, then the file exists and has something 381 * in it, but that something isn't a full meta-data page, so 382 * this is very bad. Bail out! 383 */ 384 if (len != 0) 385 goto out; 386 387 /* File does not exist. */ 388 cstat = TXN_EXPECTED; 389 } else { 390 /* 391 * We can ignore errors here since we'll simply fail the 392 * checks below and assume this is the wrong file. 393 */ 394 (void)__db_chk_meta(env, NULL, meta, 1); 395 is_real = 396 memcmp(argp->real_fid.data, meta->uid, DB_FILE_ID_LEN) == 0; 397 is_tmp = 398 memcmp(argp->tmp_fid.data, meta->uid, DB_FILE_ID_LEN) == 0; 399 400 if (!is_real && !is_tmp) 401 /* File exists, but isn't what we were removing. */ 402 cstat = TXN_IGNORE; 403 else 404 /* File exists and is the one that we were removing. */ 405 cstat = TXN_COMMIT; 406 } 407 if (fhp != NULL) { 408 (void)__os_closehandle(env, fhp); 409 fhp = NULL; 410 } 411 412 if (DB_UNDO(op)) { 413 /* On the backward pass, we leave a note for the child txn. */ 414 if ((ret = __db_txnlist_update(env, 415 info, argp->child, cstat, NULL, &ret_stat, 1)) != 0) 416 goto out; 417 } else if (DB_REDO(op)) { 418 /* 419 * On the forward pass, check if someone recreated the 420 * file while we weren't looking. 421 */ 422 if (cstat == TXN_COMMIT) 423 (void)__memp_nameop(env, 424 is_real ? argp->real_fid.data : argp->tmp_fid.data, 425 NULL, real_name, NULL, 0); 426 } 427 428done: *lsnp = argp->prev_lsn; 429 ret = 0; 430 431out: if (real_name != NULL) 432 __os_free(env, real_name); 433 if (fhp != NULL) 434 (void)__os_closehandle(env, fhp); 435 REC_NOOP_CLOSE; 436} 437