1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1996,2008 Oracle. All rights reserved. 5 * 6 * $Id: txn_region.c,v 12.34 2008/01/08 20:59:00 bostic Exp $ 7 */ 8 9#include "db_config.h" 10 11#include "db_int.h" 12#include "dbinc/log.h" 13#include "dbinc/txn.h" 14 15static int __txn_init __P((ENV *, DB_TXNMGR *)); 16static size_t __txn_region_size __P((ENV *)); 17 18/* 19 * __txn_open -- 20 * Open a transaction region. 21 * 22 * PUBLIC: int __txn_open __P((ENV *, int)); 23 */ 24int 25__txn_open(env, create_ok) 26 ENV *env; 27 int create_ok; 28{ 29 DB_TXNMGR *mgr; 30 int ret; 31 32 /* Create/initialize the transaction manager structure. */ 33 if ((ret = __os_calloc(env, 1, sizeof(DB_TXNMGR), &mgr)) != 0) 34 return (ret); 35 TAILQ_INIT(&mgr->txn_chain); 36 mgr->env = env; 37 38 /* Join/create the txn region. */ 39 mgr->reginfo.env = env; 40 mgr->reginfo.type = REGION_TYPE_TXN; 41 mgr->reginfo.id = INVALID_REGION_ID; 42 mgr->reginfo.flags = REGION_JOIN_OK; 43 if (create_ok) 44 F_SET(&mgr->reginfo, REGION_CREATE_OK); 45 if ((ret = __env_region_attach(env, 46 &mgr->reginfo, __txn_region_size(env))) != 0) 47 goto err; 48 49 /* If we created the region, initialize it. */ 50 if (F_ISSET(&mgr->reginfo, REGION_CREATE)) 51 if ((ret = __txn_init(env, mgr)) != 0) 52 goto err; 53 54 /* Set the local addresses. */ 55 mgr->reginfo.primary = 56 R_ADDR(&mgr->reginfo, mgr->reginfo.rp->primary); 57 58 /* If threaded, acquire a mutex to protect the active TXN list. */ 59 if ((ret = __mutex_alloc( 60 env, MTX_TXN_ACTIVE, DB_MUTEX_PROCESS_ONLY, &mgr->mutex)) != 0) 61 goto err; 62 63 env->tx_handle = mgr; 64 return (0); 65 66err: env->tx_handle = NULL; 67 if (mgr->reginfo.addr != NULL) 68 (void)__env_region_detach(env, &mgr->reginfo, 0); 69 70 (void)__mutex_free(env, &mgr->mutex); 71 __os_free(env, mgr); 72 return (ret); 73} 74 75/* 76 * __txn_init -- 77 * Initialize a transaction region in shared memory. 78 */ 79static int 80__txn_init(env, mgr) 81 ENV *env; 82 DB_TXNMGR *mgr; 83{ 84 DB_ENV *dbenv; 85 DB_LSN last_ckp; 86 DB_TXNREGION *region; 87 int ret; 88 89 dbenv = env->dbenv; 90 91 /* 92 * Find the last checkpoint in the log. 93 */ 94 ZERO_LSN(last_ckp); 95 if (LOGGING_ON(env)) { 96 /* 97 * The log system has already walked through the last 98 * file. Get the LSN of a checkpoint it may have found. 99 */ 100 if ((ret = __log_get_cached_ckp_lsn(env, &last_ckp)) != 0) 101 return (ret); 102 103 /* 104 * If that didn't work, look backwards from the beginning of 105 * the last log file until we find the last checkpoint. 106 */ 107 if (IS_ZERO_LSN(last_ckp) && 108 (ret = __txn_findlastckp(env, &last_ckp, NULL)) != 0) 109 return (ret); 110 } 111 112 if ((ret = __env_alloc(&mgr->reginfo, 113 sizeof(DB_TXNREGION), &mgr->reginfo.primary)) != 0) { 114 __db_errx(env, 115 "Unable to allocate memory for the transaction region"); 116 return (ret); 117 } 118 mgr->reginfo.rp->primary = 119 R_OFFSET(&mgr->reginfo, mgr->reginfo.primary); 120 region = mgr->reginfo.primary; 121 memset(region, 0, sizeof(*region)); 122 123 if ((ret = __mutex_alloc( 124 env, MTX_TXN_REGION, 0, ®ion->mtx_region)) != 0) 125 return (ret); 126 127 region->maxtxns = dbenv->tx_max; 128 region->last_txnid = TXN_MINIMUM; 129 region->cur_maxid = TXN_MAXIMUM; 130 131 if ((ret = __mutex_alloc( 132 env, MTX_TXN_CHKPT, 0, ®ion->mtx_ckp)) != 0) 133 return (ret); 134 region->last_ckp = last_ckp; 135 region->time_ckp = time(NULL); 136 137 memset(®ion->stat, 0, sizeof(region->stat)); 138#ifdef HAVE_STATISTICS 139 region->stat.st_maxtxns = region->maxtxns; 140#endif 141 142 SH_TAILQ_INIT(®ion->active_txn); 143 SH_TAILQ_INIT(®ion->mvcc_txn); 144 return (ret); 145} 146 147/* 148 * __txn_findlastckp -- 149 * Find the last checkpoint in the log, walking backwards from the 150 * max_lsn given or the beginning of the last log file. (The 151 * log system looked through the last log file when it started up.) 152 * 153 * PUBLIC: int __txn_findlastckp __P((ENV *, DB_LSN *, DB_LSN *)); 154 */ 155int 156__txn_findlastckp(env, lsnp, max_lsn) 157 ENV *env; 158 DB_LSN *lsnp; 159 DB_LSN *max_lsn; 160{ 161 DBT dbt; 162 DB_LOGC *logc; 163 DB_LSN lsn; 164 int ret, t_ret; 165 u_int32_t rectype; 166 167 ZERO_LSN(*lsnp); 168 169 if ((ret = __log_cursor(env, &logc)) != 0) 170 return (ret); 171 172 /* Get the last LSN. */ 173 memset(&dbt, 0, sizeof(dbt)); 174 if (max_lsn != NULL) { 175 lsn = *max_lsn; 176 if ((ret = __logc_get(logc, &lsn, &dbt, DB_SET)) != 0) 177 goto err; 178 } else { 179 if ((ret = __logc_get(logc, &lsn, &dbt, DB_LAST)) != 0) 180 goto err; 181 /* 182 * Twiddle the last LSN so it points to the beginning of the 183 * last file; we know there's no checkpoint after that, since 184 * the log system already looked there. 185 */ 186 lsn.offset = 0; 187 } 188 189 /* Read backwards, looking for checkpoints. */ 190 while ((ret = __logc_get(logc, &lsn, &dbt, DB_PREV)) == 0) { 191 if (dbt.size < sizeof(u_int32_t)) 192 continue; 193 memcpy(&rectype, dbt.data, sizeof(u_int32_t)); 194 if (rectype == DB___txn_ckp) { 195 *lsnp = lsn; 196 break; 197 } 198 } 199 200err: if ((t_ret = __logc_close(logc)) != 0 && ret == 0) 201 ret = t_ret; 202 203 /* 204 * Not finding a checkpoint is not an error; there may not exist 205 * one in the log. 206 */ 207 return ((ret == 0 || ret == DB_NOTFOUND) ? 0 : ret); 208} 209 210/* 211 * __txn_env_refresh -- 212 * Clean up after the transaction system on a close or failed open. 213 * 214 * PUBLIC: int __txn_env_refresh __P((ENV *)); 215 */ 216int 217__txn_env_refresh(env) 218 ENV *env; 219{ 220 DB_TXN *txn; 221 DB_TXNMGR *mgr; 222 REGINFO *reginfo; 223 u_int32_t txnid; 224 int aborted, ret, t_ret; 225 226 ret = 0; 227 mgr = env->tx_handle; 228 reginfo = &mgr->reginfo; 229 230 /* 231 * This function can only be called once per process (i.e., not 232 * once per thread), so no synchronization is required. 233 * 234 * The caller is probably doing something wrong if close is called with 235 * active transactions. Try and abort any active transactions that are 236 * not prepared, but it's quite likely the aborts will fail because 237 * recovery won't find open files. If we can't abort any of the 238 * unprepared transaction, panic, we have to run recovery to get back 239 * to a known state. 240 */ 241 aborted = 0; 242 if (TAILQ_FIRST(&mgr->txn_chain) != NULL) { 243 while ((txn = TAILQ_FIRST(&mgr->txn_chain)) != NULL) { 244 /* Prepared transactions are OK. */ 245 txnid = txn->txnid; 246 if (((TXN_DETAIL *)txn->td)->status == TXN_PREPARED) { 247 if ((ret = __txn_discard_int(txn, 0)) != 0) { 248 __db_err(env, ret, 249 "unable to discard txn %#lx", 250 (u_long)txnid); 251 break; 252 } 253 continue; 254 } 255 aborted = 1; 256 if ((t_ret = __txn_abort(txn)) != 0) { 257 __db_err(env, t_ret, 258 "unable to abort transaction %#lx", 259 (u_long)txnid); 260 ret = __env_panic(env, t_ret); 261 break; 262 } 263 } 264 if (aborted) { 265 __db_errx(env, 266 "Error: closing the transaction region with active transactions"); 267 if (ret == 0) 268 ret = EINVAL; 269 } 270 } 271 272 /* Discard the per-thread lock. */ 273 if ((t_ret = __mutex_free(env, &mgr->mutex)) != 0 && ret == 0) 274 ret = t_ret; 275 276 /* Detach from the region. */ 277 if ((t_ret = __env_region_detach(env, reginfo, 0)) != 0 && ret == 0) 278 ret = t_ret; 279 280 __os_free(env, mgr); 281 282 env->tx_handle = NULL; 283 return (ret); 284} 285 286/* 287 * __txn_region_mutex_count -- 288 * Return the number of mutexes the txn region will need. 289 * 290 * PUBLIC: u_int32_t __txn_region_mutex_count __P((ENV *)); 291 */ 292u_int32_t 293__txn_region_mutex_count(env) 294 ENV *env; 295{ 296 DB_ENV *dbenv; 297 298 dbenv = env->dbenv; 299 300 /* 301 * We need a MVCC mutex for each TXN_DETAIL structure, a mutex for 302 * DB_TXNMGR structure, two mutexes for the DB_TXNREGION structure. 303 */ 304 return (dbenv->tx_max + 1 + 2); 305} 306 307/* 308 * __txn_region_size -- 309 * Return the amount of space needed for the txn region. 310 */ 311static size_t 312__txn_region_size(env) 313 ENV *env; 314{ 315 DB_ENV *dbenv; 316 size_t s; 317 318 dbenv = env->dbenv; 319 320 /* 321 * Make the region large enough to hold the primary transaction region 322 * structure, txn_max transaction detail structures, txn_max chunks of 323 * overhead required by the underlying shared region allocator for each 324 * chunk of memory, txn_max transaction names, at an average of 20 325 * bytes each, and 10KB for safety. 326 */ 327 s = sizeof(DB_TXNREGION) + 328 dbenv->tx_max * (sizeof(TXN_DETAIL) + __env_alloc_overhead() + 20) + 329 10 * 1024; 330 return (s); 331} 332 333/* 334 * __txn_id_set -- 335 * Set the current transaction ID and current maximum unused ID (for 336 * testing purposes only). 337 * 338 * PUBLIC: int __txn_id_set __P((ENV *, u_int32_t, u_int32_t)); 339 */ 340int 341__txn_id_set(env, cur_txnid, max_txnid) 342 ENV *env; 343 u_int32_t cur_txnid, max_txnid; 344{ 345 DB_TXNMGR *mgr; 346 DB_TXNREGION *region; 347 int ret; 348 349 ENV_REQUIRES_CONFIG(env, env->tx_handle, "txn_id_set", DB_INIT_TXN); 350 351 mgr = env->tx_handle; 352 region = mgr->reginfo.primary; 353 region->last_txnid = cur_txnid; 354 region->cur_maxid = max_txnid; 355 356 ret = 0; 357 if (cur_txnid < TXN_MINIMUM) { 358 __db_errx(env, "Current ID value %lu below minimum", 359 (u_long)cur_txnid); 360 ret = EINVAL; 361 } 362 if (max_txnid < TXN_MINIMUM) { 363 __db_errx(env, "Maximum ID value %lu below minimum", 364 (u_long)max_txnid); 365 ret = EINVAL; 366 } 367 return (ret); 368} 369 370/* 371 * __txn_oldest_reader -- 372 * Find the oldest "read LSN" of any active transaction' 373 * MVCC changes older than this can safely be discarded from the cache. 374 * 375 * PUBLIC: int __txn_oldest_reader __P((ENV *, DB_LSN *)); 376 */ 377int 378__txn_oldest_reader(env, lsnp) 379 ENV *env; 380 DB_LSN *lsnp; 381{ 382 DB_LSN old_lsn; 383 DB_TXNMGR *mgr; 384 DB_TXNREGION *region; 385 TXN_DETAIL *td; 386 int ret; 387 388 if ((mgr = env->tx_handle) == NULL) 389 return (0); 390 region = mgr->reginfo.primary; 391 392 if ((ret = __log_current_lsn(env, &old_lsn, NULL, NULL)) != 0) 393 return (ret); 394 395 TXN_SYSTEM_LOCK(env); 396 SH_TAILQ_FOREACH(td, ®ion->active_txn, links, __txn_detail) 397 if (LOG_COMPARE(&td->read_lsn, &old_lsn) < 0) 398 old_lsn = td->read_lsn; 399 TXN_SYSTEM_UNLOCK(env); 400 401 DB_ASSERT(env, LOG_COMPARE(&old_lsn, lsnp) >= 0); 402 *lsnp = old_lsn; 403 404 return (0); 405} 406 407/* 408 * __txn_add_buffer -- 409 * Add to the count of buffers created by the given transaction. 410 * 411 * PUBLIC: int __txn_add_buffer __P((ENV *, TXN_DETAIL *)); 412 */ 413int 414__txn_add_buffer(env, td) 415 ENV *env; 416 TXN_DETAIL *td; 417{ 418 DB_ASSERT(env, td != NULL); 419 420 MUTEX_LOCK(env, td->mvcc_mtx); 421 DB_ASSERT(env, td->mvcc_ref < UINT32_MAX); 422 ++td->mvcc_ref; 423 MUTEX_UNLOCK(env, td->mvcc_mtx); 424 425 COMPQUIET(env, NULL); 426 return (0); 427} 428 429/* 430 * __txn_remove_buffer -- 431 * Remove a buffer from a transaction -- free the transaction if necessary. 432 * 433 * PUBLIC: int __txn_remove_buffer __P((ENV *, TXN_DETAIL *, db_mutex_t)); 434 */ 435int 436__txn_remove_buffer(env, td, hash_mtx) 437 ENV *env; 438 TXN_DETAIL *td; 439 db_mutex_t hash_mtx; 440{ 441 DB_TXNMGR *mgr; 442 DB_TXNREGION *region; 443 int need_free, ret; 444 445 DB_ASSERT(env, td != NULL); 446 ret = 0; 447 mgr = env->tx_handle; 448 region = mgr->reginfo.primary; 449 450 MUTEX_LOCK(env, td->mvcc_mtx); 451 DB_ASSERT(env, td->mvcc_ref > 0); 452 need_free = (--td->mvcc_ref == 0); 453 MUTEX_UNLOCK(env, td->mvcc_mtx); 454 455 if (need_free && 456 (td->status == TXN_COMMITTED || td->status == TXN_ABORTED)) { 457 MUTEX_UNLOCK(env, hash_mtx); 458 459 ret = __mutex_free(env, &td->mvcc_mtx); 460 td->mvcc_mtx = MUTEX_INVALID; 461 462 TXN_SYSTEM_LOCK(env); 463 SH_TAILQ_REMOVE(®ion->mvcc_txn, td, links, __txn_detail); 464#ifdef HAVE_STATISTICS 465 --region->stat.st_nsnapshot; 466#endif 467 __env_alloc_free(&mgr->reginfo, td); 468 TXN_SYSTEM_UNLOCK(env); 469 470 MUTEX_LOCK(env, hash_mtx); 471 } 472 473 return (ret); 474} 475