1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 2001, 2010 Oracle and/or its affiliates. All rights reserved. 5 * 6 * $Id$ 7 */ 8 9#include "db_config.h" 10 11#include "db_int.h" 12#include "dbinc/db_page.h" 13#include "dbinc/lock.h" 14#include "dbinc/mp.h" 15#include "dbinc/txn.h" 16#include "dbinc/log.h" 17#include "dbinc/db_am.h" 18 19typedef struct __txn_event TXN_EVENT; 20struct __txn_event { 21 TXN_EVENT_T op; 22 TAILQ_ENTRY(__txn_event) links; 23 union { 24 struct { 25 /* Delayed close. */ 26 DB *dbp; 27 } c; 28 struct { 29 /* Delayed remove. */ 30 char *name; 31 u_int8_t *fileid; 32 int inmem; 33 } r; 34 struct { 35 /* Lock event. */ 36 DB_LOCK lock; 37 DB_LOCKER *locker; 38 DB *dbp; 39 } t; 40 } u; 41}; 42 43#define TXN_TOP_PARENT(txn) do { \ 44 while (txn->parent != NULL) \ 45 txn = txn->parent; \ 46} while (0) 47 48/* 49 * __txn_closeevent -- 50 * 51 * Creates a close event that can be added to the [so-called] commit list, so 52 * that we can redo a failed DB handle close once we've aborted the transaction. 53 * 54 * PUBLIC: int __txn_closeevent __P((ENV *, DB_TXN *, DB *)); 55 */ 56int 57__txn_closeevent(env, txn, dbp) 58 ENV *env; 59 DB_TXN *txn; 60 DB *dbp; 61{ 62 int ret; 63 TXN_EVENT *e; 64 65 e = NULL; 66 if ((ret = __os_calloc(env, 1, sizeof(TXN_EVENT), &e)) != 0) 67 return (ret); 68 69 e->u.c.dbp = dbp; 70 e->op = TXN_CLOSE; 71 TXN_TOP_PARENT(txn); 72 TAILQ_INSERT_TAIL(&txn->events, e, links); 73 74 return (0); 75} 76 77/* 78 * __txn_remevent -- 79 * 80 * Creates a remove event that can be added to the commit list. 81 * 82 * PUBLIC: int __txn_remevent __P((ENV *, 83 * PUBLIC: DB_TXN *, const char *, u_int8_t *, int)); 84 */ 85int 86__txn_remevent(env, txn, name, fileid, inmem) 87 ENV *env; 88 DB_TXN *txn; 89 const char *name; 90 u_int8_t *fileid; 91 int inmem; 92{ 93 int ret; 94 TXN_EVENT *e; 95 96 e = NULL; 97 if ((ret = __os_calloc(env, 1, sizeof(TXN_EVENT), &e)) != 0) 98 return (ret); 99 100 if ((ret = __os_strdup(env, name, &e->u.r.name)) != 0) 101 goto err; 102 103 if (fileid != NULL) { 104 if ((ret = __os_calloc(env, 105 1, DB_FILE_ID_LEN, &e->u.r.fileid)) != 0) 106 return (ret); 107 memcpy(e->u.r.fileid, fileid, DB_FILE_ID_LEN); 108 } 109 110 e->u.r.inmem = inmem; 111 e->op = TXN_REMOVE; 112 TXN_TOP_PARENT(txn); 113 TAILQ_INSERT_TAIL(&txn->events, e, links); 114 115 return (0); 116 117err: if (e != NULL) 118 __os_free(env, e); 119 120 return (ret); 121} 122 123/* 124 * __txn_remrem -- 125 * Remove a remove event because the remove has been superceeded, 126 * by a create of the same name, for example. 127 * 128 * PUBLIC: void __txn_remrem __P((ENV *, DB_TXN *, const char *)); 129 */ 130void 131__txn_remrem(env, txn, name) 132 ENV *env; 133 DB_TXN *txn; 134 const char *name; 135{ 136 TXN_EVENT *e, *next_e; 137 138 TXN_TOP_PARENT(txn); 139 for (e = TAILQ_FIRST(&txn->events); e != NULL; e = next_e) { 140 next_e = TAILQ_NEXT(e, links); 141 if (e->op != TXN_REMOVE || strcmp(name, e->u.r.name) != 0) 142 continue; 143 TAILQ_REMOVE(&txn->events, e, links); 144 __os_free(env, e->u.r.name); 145 if (e->u.r.fileid != NULL) 146 __os_free(env, e->u.r.fileid); 147 __os_free(env, e); 148 } 149 150 return; 151} 152 153/* 154 * __txn_lockevent -- 155 * 156 * Add a lockevent to the commit-queue. The lock event indicates a locker 157 * trade. 158 * 159 * PUBLIC: int __txn_lockevent __P((ENV *, 160 * PUBLIC: DB_TXN *, DB *, DB_LOCK *, DB_LOCKER *)); 161 */ 162int 163__txn_lockevent(env, txn, dbp, lock, locker) 164 ENV *env; 165 DB_TXN *txn; 166 DB *dbp; 167 DB_LOCK *lock; 168 DB_LOCKER *locker; 169{ 170 int ret; 171 TXN_EVENT *e; 172 173 if (!LOCKING_ON(env)) 174 return (0); 175 176 e = NULL; 177 if ((ret = __os_calloc(env, 1, sizeof(TXN_EVENT), &e)) != 0) 178 return (ret); 179 180 e->u.t.locker = locker; 181 e->u.t.lock = *lock; 182 e->u.t.dbp = dbp; 183 e->op = TXN_TRADE; 184 /* This event goes on the current transaction, not its parent. */ 185 TAILQ_INSERT_TAIL(&txn->events, e, links); 186 dbp->cur_txn = txn; 187 188 return (0); 189} 190 191/* 192 * __txn_remlock -- 193 * Remove a lock event because the locker is going away. We can remove 194 * by lock (using offset) or by locker_id (or by both). 195 * 196 * PUBLIC: void __txn_remlock __P((ENV *, DB_TXN *, DB_LOCK *, DB_LOCKER *)); 197 */ 198void 199__txn_remlock(env, txn, lock, locker) 200 ENV *env; 201 DB_TXN *txn; 202 DB_LOCK *lock; 203 DB_LOCKER *locker; 204{ 205 TXN_EVENT *e, *next_e; 206 207 for (e = TAILQ_FIRST(&txn->events); e != NULL; e = next_e) { 208 next_e = TAILQ_NEXT(e, links); 209 if ((e->op != TXN_TRADE && e->op != TXN_TRADED) || 210 (e->u.t.lock.off != lock->off && e->u.t.locker != locker)) 211 continue; 212 TAILQ_REMOVE(&txn->events, e, links); 213 __os_free(env, e); 214 } 215 216 return; 217} 218 219/* 220 * __txn_doevents -- 221 * Process the list of events associated with a transaction. On commit, 222 * apply the events; on abort, just toss the entries. 223 * 224 * PUBLIC: int __txn_doevents __P((ENV *, DB_TXN *, int, int)); 225 */ 226#define DO_TRADE do { \ 227 memset(&req, 0, sizeof(req)); \ 228 req.lock = e->u.t.lock; \ 229 req.op = DB_LOCK_TRADE; \ 230 t_ret = __lock_vec(env, txn->parent ? \ 231 txn->parent->locker : e->u.t.locker, 0, &req, 1, NULL); \ 232 if (t_ret == 0) { \ 233 if (txn->parent != NULL) { \ 234 e->u.t.dbp->cur_txn = txn->parent; \ 235 e->u.t.dbp->cur_locker = txn->parent->locker; \ 236 } else { \ 237 e->op = TXN_TRADED; \ 238 e->u.t.dbp->cur_locker = e->u.t.locker; \ 239 e->u.t.dbp->cur_txn = NULL; \ 240 } \ 241 } else if (t_ret == DB_NOTFOUND) \ 242 t_ret = 0; \ 243 if (t_ret != 0 && ret == 0) \ 244 ret = t_ret; \ 245} while (0) 246 247int 248__txn_doevents(env, txn, opcode, preprocess) 249 ENV *env; 250 DB_TXN *txn; 251 int opcode, preprocess; 252{ 253 DB_LOCKREQ req; 254 TXN_EVENT *e, *enext; 255 int ret, t_ret; 256 257 ret = 0; 258 259 /* 260 * This phase only gets called if we have a phase where we 261 * release read locks. Since not all paths will call this 262 * phase, we have to check for it below as well. So, when 263 * we do the trade, we update the opcode of the entry so that 264 * we don't try the trade again. 265 */ 266 if (preprocess) { 267 for (e = TAILQ_FIRST(&txn->events); 268 e != NULL; e = enext) { 269 enext = TAILQ_NEXT(e, links); 270 if (e->op != TXN_TRADE || 271 IS_WRITELOCK(e->u.t.lock.mode)) 272 continue; 273 DO_TRADE; 274 if (txn->parent != NULL) { 275 TAILQ_REMOVE(&txn->events, e, links); 276 TAILQ_INSERT_HEAD( 277 &txn->parent->events, e, links); 278 } 279 } 280 return (ret); 281 } 282 283 /* 284 * Prepare should only cause a preprocess, since the transaction 285 * isn't over. 286 */ 287 DB_ASSERT(env, opcode != TXN_PREPARE); 288 while ((e = TAILQ_FIRST(&txn->events)) != NULL) { 289 TAILQ_REMOVE(&txn->events, e, links); 290 /* 291 * Most deferred events should only happen on 292 * commits, not aborts or prepares. The one exception 293 * is a close which gets done on commit and abort, but 294 * not prepare. If we're not doing operations, then we 295 * can just go free resources. 296 */ 297 if (opcode == TXN_ABORT && e->op != TXN_CLOSE) 298 goto dofree; 299 switch (e->op) { 300 case TXN_CLOSE: 301 if ((t_ret = __db_close(e->u.c.dbp, 302 NULL, DB_NOSYNC)) != 0 && ret == 0) 303 ret = t_ret; 304 break; 305 case TXN_REMOVE: 306 if (e->u.r.fileid != NULL) { 307 if ((t_ret = __memp_nameop(env, 308 e->u.r.fileid, NULL, e->u.r.name, 309 NULL, e->u.r.inmem)) != 0 && ret == 0) 310 ret = t_ret; 311 } else if ((t_ret = 312 __os_unlink(env, e->u.r.name, 0)) != 0 && ret == 0) 313 ret = t_ret; 314 break; 315 case TXN_TRADE: 316 DO_TRADE; 317 if (txn->parent != NULL) { 318 TAILQ_INSERT_HEAD( 319 &txn->parent->events, e, links); 320 continue; 321 } 322 /* Fall through */ 323 case TXN_TRADED: 324 /* Downgrade the lock. */ 325 if ((t_ret = __lock_downgrade(env, 326 &e->u.t.lock, DB_LOCK_READ, 0)) != 0 && ret == 0) 327 ret = t_ret; 328 break; 329 default: 330 /* This had better never happen. */ 331 DB_ASSERT(env, 0); 332 } 333dofree: 334 /* Free resources here. */ 335 switch (e->op) { 336 case TXN_REMOVE: 337 if (e->u.r.fileid != NULL) 338 __os_free(env, e->u.r.fileid); 339 __os_free(env, e->u.r.name); 340 break; 341 case TXN_TRADE: 342 if (opcode == TXN_ABORT) 343 e->u.t.dbp->cur_txn = NULL; 344 break; 345 case TXN_CLOSE: 346 case TXN_TRADED: 347 default: 348 break; 349 } 350 __os_free(env, e); 351 } 352 353 return (ret); 354} 355 356/* 357 * PUBLIC: int __txn_record_fname __P((ENV *, DB_TXN *, FNAME *)); 358 */ 359int 360__txn_record_fname(env, txn, fname) 361 ENV *env; 362 DB_TXN *txn; 363 FNAME *fname; 364{ 365 DB_LOG *dblp; 366 DB_TXNMGR *mgr; 367 TXN_DETAIL *td; 368 roff_t fname_off; 369 roff_t *np, *ldbs; 370 u_int32_t i; 371 int ret; 372 373 if ((td = txn->td) == NULL) 374 return (0); 375 mgr = env->tx_handle; 376 dblp = env->lg_handle; 377 fname_off = R_OFFSET(&dblp->reginfo, fname); 378 379 /* See if we already have a ref to this DB handle. */ 380 ldbs = R_ADDR(&mgr->reginfo, td->log_dbs); 381 for (i = 0, np = ldbs; i < td->nlog_dbs; i++, np++) 382 if (*np == fname_off) 383 return (0); 384 385 if (td->nlog_slots <= td->nlog_dbs) { 386 TXN_SYSTEM_LOCK(env); 387 if ((ret = __env_alloc(&mgr->reginfo, 388 sizeof(roff_t) * (td->nlog_slots << 1), &np)) != 0) { 389 TXN_SYSTEM_UNLOCK(env); 390 return (ret); 391 } 392 393 memcpy(np, ldbs, td->nlog_dbs * sizeof(roff_t)); 394 if (td->nlog_slots > TXN_NSLOTS) 395 __env_alloc_free(&mgr->reginfo, ldbs); 396 397 TXN_SYSTEM_UNLOCK(env); 398 td->log_dbs = R_OFFSET(&mgr->reginfo, np); 399 ldbs = np; 400 td->nlog_slots = td->nlog_slots << 1; 401 } 402 403 ldbs[td->nlog_dbs] = fname_off; 404 td->nlog_dbs++; 405 fname->txn_ref++; 406 407 return (0); 408} 409 410/* 411 * __txn_dref_fnam -- 412 * Either pass the fname to our parent txn or decrement the refcount 413 * and close the fileid if it goes to zero. 414 * 415 * PUBLIC: int __txn_dref_fname __P((ENV *, DB_TXN *)); 416 */ 417int 418__txn_dref_fname(env, txn) 419 ENV *env; 420 DB_TXN *txn; 421{ 422 DB_LOG *dblp; 423 DB_TXNMGR *mgr; 424 FNAME *fname; 425 roff_t *np; 426 TXN_DETAIL *ptd, *td; 427 u_int32_t i; 428 int ret; 429 430 td = txn->td; 431 432 if (td->nlog_dbs == 0) 433 return (0); 434 435 mgr = env->tx_handle; 436 dblp = env->lg_handle; 437 ret = 0; 438 439 ptd = txn->parent != NULL ? txn->parent->td : NULL; 440 441 np = R_ADDR(&mgr->reginfo, td->log_dbs); 442 for (i = 0; i < td->nlog_dbs; i++, np++) { 443 fname = R_ADDR(&dblp->reginfo, *np); 444 MUTEX_LOCK(env, fname->mutex); 445 if (ptd != NULL) { 446 ret = __txn_record_fname(env, txn->parent, fname); 447 fname->txn_ref--; 448 MUTEX_UNLOCK(env, fname->mutex); 449 } else if (fname->txn_ref == 1) { 450 MUTEX_UNLOCK(env, fname->mutex); 451 DB_ASSERT(env, fname->txn_ref != 0); 452 ret = __dbreg_close_id_int( 453 env, fname, DBREG_CLOSE, 0); 454 } else { 455 fname->txn_ref--; 456 MUTEX_UNLOCK(env, fname->mutex); 457 } 458 if (ret != 0) 459 break; 460 } 461 462 return (ret); 463} 464