1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 2001,2008 Oracle. All rights reserved. 5 * 6 * $Id: txn_util.c,v 12.25 2008/01/31 18:40:48 bostic Exp $ 7 */ 8 9#include "db_config.h" 10 11#include "db_int.h" 12#include "dbinc/db_page.h" 13#include "dbinc/lock.h" 14#include "dbinc/mp.h" 15#include "dbinc/txn.h" 16#include "dbinc/log.h" 17#include "dbinc/db_am.h" 18 19typedef struct __txn_event TXN_EVENT; 20struct __txn_event { 21 TXN_EVENT_T op; 22 TAILQ_ENTRY(__txn_event) links; 23 union { 24 struct { 25 /* Delayed close. */ 26 DB *dbp; 27 } c; 28 struct { 29 /* Delayed remove. */ 30 char *name; 31 u_int8_t *fileid; 32 int inmem; 33 } r; 34 struct { 35 /* Lock event. */ 36 DB_LOCK lock; 37 DB_LOCKER *locker; 38 DB *dbp; 39 } t; 40 } u; 41}; 42 43/* 44 * __txn_closeevent -- 45 * 46 * Creates a close event that can be added to the [so-called] commit list, so 47 * that we can redo a failed DB handle close once we've aborted the transaction. 48 * 49 * PUBLIC: int __txn_closeevent __P((ENV *, DB_TXN *, DB *)); 50 */ 51int 52__txn_closeevent(env, txn, dbp) 53 ENV *env; 54 DB_TXN *txn; 55 DB *dbp; 56{ 57 int ret; 58 TXN_EVENT *e; 59 60 e = NULL; 61 if ((ret = __os_calloc(env, 1, sizeof(TXN_EVENT), &e)) != 0) 62 return (ret); 63 64 e->u.c.dbp = dbp; 65 e->op = TXN_CLOSE; 66 TAILQ_INSERT_TAIL(&txn->events, e, links); 67 68 return (0); 69} 70 71/* 72 * __txn_remevent -- 73 * 74 * Creates a remove event that can be added to the commit list. 75 * 76 * PUBLIC: int __txn_remevent __P((ENV *, 77 * PUBLIC: DB_TXN *, const char *, u_int8_t *, int)); 78 */ 79int 80__txn_remevent(env, txn, name, fileid, inmem) 81 ENV *env; 82 DB_TXN *txn; 83 const char *name; 84 u_int8_t *fileid; 85 int inmem; 86{ 87 int ret; 88 TXN_EVENT *e; 89 90 e = NULL; 91 if ((ret = __os_calloc(env, 1, sizeof(TXN_EVENT), &e)) != 0) 92 return (ret); 93 94 if ((ret = __os_strdup(env, name, &e->u.r.name)) != 0) 95 goto err; 96 97 if (fileid != NULL) { 98 if ((ret = __os_calloc(env, 99 1, DB_FILE_ID_LEN, &e->u.r.fileid)) != 0) 100 return (ret); 101 memcpy(e->u.r.fileid, fileid, DB_FILE_ID_LEN); 102 } 103 104 e->u.r.inmem = inmem; 105 e->op = TXN_REMOVE; 106 TAILQ_INSERT_TAIL(&txn->events, e, links); 107 108 return (0); 109 110err: if (e != NULL) 111 __os_free(env, e); 112 113 return (ret); 114} 115 116/* 117 * __txn_remrem -- 118 * Remove a remove event because the remove has been superceeded, 119 * by a create of the same name, for example. 120 * 121 * PUBLIC: void __txn_remrem __P((ENV *, DB_TXN *, const char *)); 122 */ 123void 124__txn_remrem(env, txn, name) 125 ENV *env; 126 DB_TXN *txn; 127 const char *name; 128{ 129 TXN_EVENT *e, *next_e; 130 131 for (e = TAILQ_FIRST(&txn->events); e != NULL; e = next_e) { 132 next_e = TAILQ_NEXT(e, links); 133 if (e->op != TXN_REMOVE || strcmp(name, e->u.r.name) != 0) 134 continue; 135 TAILQ_REMOVE(&txn->events, e, links); 136 __os_free(env, e->u.r.name); 137 if (e->u.r.fileid != NULL) 138 __os_free(env, e->u.r.fileid); 139 __os_free(env, e); 140 } 141 142 return; 143} 144 145/* 146 * __txn_lockevent -- 147 * 148 * Add a lockevent to the commit-queue. The lock event indicates a locker 149 * trade. 150 * 151 * PUBLIC: int __txn_lockevent __P((ENV *, 152 * PUBLIC: DB_TXN *, DB *, DB_LOCK *, DB_LOCKER *)); 153 */ 154int 155__txn_lockevent(env, txn, dbp, lock, locker) 156 ENV *env; 157 DB_TXN *txn; 158 DB *dbp; 159 DB_LOCK *lock; 160 DB_LOCKER *locker; 161{ 162 int ret; 163 TXN_EVENT *e; 164 165 if (!LOCKING_ON(env)) 166 return (0); 167 168 e = NULL; 169 if ((ret = __os_calloc(env, 1, sizeof(TXN_EVENT), &e)) != 0) 170 return (ret); 171 172 e->u.t.locker = locker; 173 e->u.t.lock = *lock; 174 e->u.t.dbp = dbp; 175 e->op = TXN_TRADE; 176 TAILQ_INSERT_TAIL(&txn->events, e, links); 177 dbp->cur_txn = txn; 178 179 return (0); 180} 181 182/* 183 * __txn_remlock -- 184 * Remove a lock event because the locker is going away. We can remove 185 * by lock (using offset) or by locker_id (or by both). 186 * 187 * PUBLIC: void __txn_remlock __P((ENV *, DB_TXN *, DB_LOCK *, DB_LOCKER *)); 188 */ 189void 190__txn_remlock(env, txn, lock, locker) 191 ENV *env; 192 DB_TXN *txn; 193 DB_LOCK *lock; 194 DB_LOCKER *locker; 195{ 196 TXN_EVENT *e, *next_e; 197 198 for (e = TAILQ_FIRST(&txn->events); e != NULL; e = next_e) { 199 next_e = TAILQ_NEXT(e, links); 200 if ((e->op != TXN_TRADE && e->op != TXN_TRADED) || 201 (e->u.t.lock.off != lock->off && e->u.t.locker != locker)) 202 continue; 203 TAILQ_REMOVE(&txn->events, e, links); 204 __os_free(env, e); 205 } 206 207 return; 208} 209 210/* 211 * __txn_doevents -- 212 * Process the list of events associated with a transaction. On commit, 213 * apply the events; on abort, just toss the entries. 214 * 215 * PUBLIC: int __txn_doevents __P((ENV *, DB_TXN *, int, int)); 216 */ 217#define DO_TRADE do { \ 218 memset(&req, 0, sizeof(req)); \ 219 req.lock = e->u.t.lock; \ 220 req.op = DB_LOCK_TRADE; \ 221 t_ret = __lock_vec(env, e->u.t.locker, 0, &req, 1, NULL); \ 222 if (t_ret == 0) { \ 223 e->u.t.dbp->cur_locker = e->u.t.locker; \ 224 e->u.t.dbp->cur_txn = NULL; \ 225 } else if (t_ret == DB_NOTFOUND) \ 226 t_ret = 0; \ 227 if (t_ret != 0 && ret == 0) \ 228 ret = t_ret; \ 229 e->op = TXN_TRADED; \ 230} while (0) 231 232int 233__txn_doevents(env, txn, opcode, preprocess) 234 ENV *env; 235 DB_TXN *txn; 236 int opcode, preprocess; 237{ 238 DB_LOCKREQ req; 239 TXN_EVENT *e; 240 int ret, t_ret; 241 242 ret = 0; 243 244 /* 245 * This phase only gets called if we have a phase where we 246 * release read locks. Since not all paths will call this 247 * phase, we have to check for it below as well. So, when 248 * we do the trade, we update the opcode of the entry so that 249 * we don't try the trade again. 250 */ 251 if (preprocess) { 252 for (e = TAILQ_FIRST(&txn->events); 253 e != NULL; e = TAILQ_NEXT(e, links)) { 254 if (e->op != TXN_TRADE || 255 IS_WRITELOCK(e->u.t.lock.mode)) 256 continue; 257 DO_TRADE; 258 } 259 return (ret); 260 } 261 262 /* 263 * Prepare should only cause a preprocess, since the transaction 264 * isn't over. 265 */ 266 DB_ASSERT(env, opcode != TXN_PREPARE); 267 while ((e = TAILQ_FIRST(&txn->events)) != NULL) { 268 TAILQ_REMOVE(&txn->events, e, links); 269 /* 270 * Most deferred events should only happen on 271 * commits, not aborts or prepares. The one exception 272 * is a close which gets done on commit and abort, but 273 * not prepare. If we're not doing operations, then we 274 * can just go free resources. 275 */ 276 if (opcode == TXN_ABORT && e->op != TXN_CLOSE) 277 goto dofree; 278 switch (e->op) { 279 case TXN_CLOSE: 280 if ((t_ret = __db_close(e->u.c.dbp, 281 NULL, DB_NOSYNC)) != 0 && ret == 0) 282 ret = t_ret; 283 break; 284 case TXN_REMOVE: 285 if (e->u.r.fileid != NULL) { 286 if ((t_ret = __memp_nameop(env, 287 e->u.r.fileid, NULL, e->u.r.name, 288 NULL, e->u.r.inmem)) != 0 && ret == 0) 289 ret = t_ret; 290 } else if ((t_ret = 291 __os_unlink(env, e->u.r.name, 0)) != 0 && ret == 0) 292 ret = t_ret; 293 break; 294 case TXN_TRADE: 295 DO_TRADE; 296 /* Fall through */ 297 case TXN_TRADED: 298 /* Downgrade the lock. */ 299 if ((t_ret = __lock_downgrade(env, 300 &e->u.t.lock, DB_LOCK_READ, 0)) != 0 && ret == 0) 301 ret = t_ret; 302 break; 303 default: 304 /* This had better never happen. */ 305 DB_ASSERT(env, 0); 306 } 307dofree: 308 /* Free resources here. */ 309 switch (e->op) { 310 case TXN_REMOVE: 311 if (e->u.r.fileid != NULL) 312 __os_free(env, e->u.r.fileid); 313 __os_free(env, e->u.r.name); 314 break; 315 case TXN_TRADE: 316 if (opcode == TXN_ABORT) 317 e->u.t.dbp->cur_txn = NULL; 318 break; 319 case TXN_CLOSE: 320 case TXN_TRADED: 321 default: 322 break; 323 } 324 __os_free(env, e); 325 } 326 327 return (ret); 328} 329 330/* 331 * PUBLIC: int __txn_record_fname __P((ENV *, DB_TXN *, FNAME *)); 332 */ 333int 334__txn_record_fname(env, txn, fname) 335 ENV *env; 336 DB_TXN *txn; 337 FNAME *fname; 338{ 339 DB_LOG *dblp; 340 DB_TXNMGR *mgr; 341 TXN_DETAIL *td; 342 roff_t fname_off; 343 roff_t *np, *ldbs; 344 u_int32_t i; 345 int ret; 346 347 if ((td = txn->td) == NULL) 348 return (0); 349 mgr = env->tx_handle; 350 dblp = env->lg_handle; 351 fname_off = R_OFFSET(&dblp->reginfo, fname); 352 353 /* See if we already have a ref to this DB handle. */ 354 ldbs = R_ADDR(&mgr->reginfo, td->log_dbs); 355 for (i = 0, np = ldbs; i < td->nlog_dbs; i++, np++) 356 if (*np == fname_off) 357 return (0); 358 359 if (td->nlog_slots <= td->nlog_dbs) { 360 TXN_SYSTEM_LOCK(env); 361 if ((ret = __env_alloc(&mgr->reginfo, 362 sizeof(roff_t) * (td->nlog_slots << 1), &np)) != 0) 363 return (ret); 364 memcpy(np, ldbs, td->nlog_dbs * sizeof(roff_t)); 365 if (td->nlog_slots > TXN_NSLOTS) 366 __env_alloc_free(&mgr->reginfo, ldbs); 367 368 TXN_SYSTEM_UNLOCK(env); 369 td->log_dbs = R_OFFSET(&mgr->reginfo, np); 370 ldbs = np; 371 td->nlog_slots = td->nlog_slots << 1; 372 } 373 374 ldbs[td->nlog_dbs] = fname_off; 375 td->nlog_dbs++; 376 fname->txn_ref++; 377 378 return (0); 379} 380 381/* 382 * __txn_dref_fnam -- 383 * Either pass the fname to our parent txn or decrement the refcount 384 * and close the fileid if it goes to zero. 385 * 386 * PUBLIC: int __txn_dref_fname __P((ENV *, DB_TXN *)); 387 */ 388int 389__txn_dref_fname(env, txn) 390 ENV *env; 391 DB_TXN *txn; 392{ 393 DB_LOG *dblp; 394 DB_TXNMGR *mgr; 395 FNAME *fname; 396 roff_t *np; 397 TXN_DETAIL *ptd, *td; 398 u_int32_t i; 399 int ret; 400 401 td = txn->td; 402 403 if (td->nlog_dbs == 0) 404 return (0); 405 406 mgr = env->tx_handle; 407 dblp = env->lg_handle; 408 ret = 0; 409 410 ptd = txn->parent != NULL ? txn->parent->td : NULL; 411 412 np = R_ADDR(&mgr->reginfo, td->log_dbs); 413 for (i = 0; i < td->nlog_dbs; i++, np++) { 414 fname = R_ADDR(&dblp->reginfo, *np); 415 MUTEX_LOCK(env, fname->mutex); 416 if (ptd != NULL) { 417 ret = __txn_record_fname(env, txn->parent, fname); 418 fname->txn_ref--; 419 MUTEX_UNLOCK(env, fname->mutex); 420 } else if (fname->txn_ref == 1) { 421 MUTEX_UNLOCK(env, fname->mutex); 422 DB_ASSERT(env, fname->txn_ref != 0); 423 ret = __dbreg_close_id_int( 424 env, fname, DBREG_CLOSE, 0); 425 } else { 426 fname->txn_ref--; 427 MUTEX_UNLOCK(env, fname->mutex); 428 } 429 if (ret != 0) 430 break; 431 } 432 433 return (ret); 434} 435