1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 2001-2009 Oracle. All rights reserved. 5 * 6 * $Id$ 7 */ 8 9#include "db_config.h" 10 11#include "db_int.h" 12#include "dbinc/txn.h" 13#include "dbinc/db_page.h" 14#include "dbinc/db_dispatch.h" 15#include "dbinc/log.h" 16#include "dbinc_auto/db_auto.h" 17#include "dbinc_auto/crdel_auto.h" 18#include "dbinc_auto/db_ext.h" 19 20/* 21 * __txn_map_gid 22 * Return the txn that corresponds to this global ID. 23 * 24 * PUBLIC: int __txn_map_gid __P((ENV *, 25 * PUBLIC: u_int8_t *, TXN_DETAIL **, roff_t *)); 26 */ 27int 28__txn_map_gid(env, gid, tdp, offp) 29 ENV *env; 30 u_int8_t *gid; 31 TXN_DETAIL **tdp; 32 roff_t *offp; 33{ 34 DB_TXNMGR *mgr; 35 DB_TXNREGION *region; 36 37 mgr = env->tx_handle; 38 region = mgr->reginfo.primary; 39 40 /* 41 * Search the internal active transaction table to find the 42 * matching xid. If this is a performance hit, then we 43 * can create a hash table, but I doubt it's worth it. 44 */ 45 TXN_SYSTEM_LOCK(env); 46 SH_TAILQ_FOREACH(*tdp, ®ion->active_txn, links, __txn_detail) 47 if (memcmp(gid, (*tdp)->gid, sizeof((*tdp)->gid)) == 0) 48 break; 49 TXN_SYSTEM_UNLOCK(env); 50 51 if (*tdp == NULL) 52 return (EINVAL); 53 54 *offp = R_OFFSET(&mgr->reginfo, *tdp); 55 return (0); 56} 57 58/* 59 * __txn_recover_pp -- 60 * ENV->txn_recover pre/post processing. 61 * 62 * PUBLIC: int __txn_recover_pp __P((DB_ENV *, 63 * PUBLIC: DB_PREPLIST *, u_int32_t, u_int32_t *, u_int32_t)); 64 */ 65int 66__txn_recover_pp(dbenv, preplist, count, retp, flags) 67 DB_ENV *dbenv; 68 DB_PREPLIST *preplist; 69 u_int32_t count, *retp; 70 u_int32_t flags; 71{ 72 DB_THREAD_INFO *ip; 73 ENV *env; 74 int ret; 75 76 env = dbenv->env; 77 78 ENV_REQUIRES_CONFIG( 79 env, env->tx_handle, "txn_recover", DB_INIT_TXN); 80 81 if (F_ISSET((DB_TXNREGION *)env->tx_handle->reginfo.primary, 82 TXN_IN_RECOVERY)) { 83 __db_errx(env, "operation not permitted while in recovery"); 84 return (EINVAL); 85 } 86 87 if (flags != DB_FIRST && flags != DB_NEXT) 88 return (__db_ferr(env, "DB_ENV->txn_recover", 0)); 89 90 ENV_ENTER(env, ip); 91 REPLICATION_WRAP(env, 92 (__txn_recover(env, preplist, count, retp, flags)), 0, ret); 93 ENV_LEAVE(env, ip); 94 return (ret); 95} 96 97/* 98 * __txn_recover -- 99 * ENV->txn_recover. 100 * 101 * PUBLIC: int __txn_recover __P((ENV *, 102 * PUBLIC: DB_PREPLIST *, u_int32_t, u_int32_t *, u_int32_t)); 103 */ 104int 105__txn_recover(env, txns, count, retp, flags) 106 ENV *env; 107 DB_PREPLIST *txns; 108 u_int32_t count, *retp; 109 u_int32_t flags; 110{ 111 DB_LSN min; 112 DB_PREPLIST *prepp; 113 DB_THREAD_INFO *ip; 114 DB_TXNMGR *mgr; 115 DB_TXNREGION *region; 116 TXN_DETAIL *td; 117 u_int32_t i; 118 int restored, ret; 119 120 *retp = 0; 121 122 MAX_LSN(min); 123 prepp = txns; 124 restored = ret = 0; 125 126 DB_ASSERT(env, txns != NULL); 127 /* 128 * If we are starting a scan, then we traverse the active transaction 129 * list once making sure that all transactions are marked as not having 130 * been collected. Then on each pass, we mark the ones we collected 131 * so that if we cannot collect them all at once, we can finish up 132 * next time with a continue. 133 */ 134 135 mgr = env->tx_handle; 136 region = mgr->reginfo.primary; 137 138 /* 139 * During this pass we need to figure out if we are going to need 140 * to open files. We need to open files if we've never collected 141 * before (in which case, none of the COLLECTED bits will be set) 142 * and the ones that we are collecting are restored (if they aren't 143 * restored, then we never crashed; just the main server did). 144 */ 145 TXN_SYSTEM_LOCK(env); 146 147 /* Now begin collecting active transactions. */ 148 for (td = SH_TAILQ_FIRST(®ion->active_txn, __txn_detail); 149 td != NULL && *retp < count; 150 td = SH_TAILQ_NEXT(td, links, __txn_detail)) { 151 if (td->status != TXN_PREPARED || 152 (flags != DB_FIRST && F_ISSET(td, TXN_DTL_COLLECTED))) 153 continue; 154 155 if (F_ISSET(td, TXN_DTL_RESTORED)) 156 restored = 1; 157 158 if ((ret = __os_calloc(env, 159 1, sizeof(DB_TXN), &prepp->txn)) != 0) { 160 TXN_SYSTEM_UNLOCK(env); 161 goto err; 162 } 163 if ((ret = __txn_continue(env, prepp->txn, td)) != 0) 164 goto err; 165 F_SET(prepp->txn, TXN_MALLOC); 166 if (F_ISSET(env->dbenv, DB_ENV_TXN_NOSYNC)) 167 F_SET(prepp->txn, TXN_NOSYNC); 168 else if (F_ISSET(env->dbenv, DB_ENV_TXN_WRITE_NOSYNC)) 169 F_SET(prepp->txn, TXN_WRITE_NOSYNC); 170 else 171 F_SET(prepp->txn, TXN_SYNC); 172 memcpy(prepp->gid, td->gid, sizeof(td->gid)); 173 prepp++; 174 175 if (!IS_ZERO_LSN(td->begin_lsn) && 176 LOG_COMPARE(&td->begin_lsn, &min) < 0) 177 min = td->begin_lsn; 178 179 (*retp)++; 180 F_SET(td, TXN_DTL_COLLECTED); 181 } 182 if (flags == DB_FIRST) 183 for (; td != NULL; td = SH_TAILQ_NEXT(td, links, __txn_detail)) 184 F_CLR(td, TXN_DTL_COLLECTED); 185 TXN_SYSTEM_UNLOCK(env); 186 187 /* 188 * Now link all the transactions into the transaction manager's list. 189 */ 190 if (*retp != 0) { 191 MUTEX_LOCK(env, mgr->mutex); 192 for (i = 0; i < *retp; i++) 193 TAILQ_INSERT_TAIL(&mgr->txn_chain, txns[i].txn, links); 194 MUTEX_UNLOCK(env, mgr->mutex); 195 196 /* 197 * If we are restoring, update our count of outstanding 198 * transactions. 199 */ 200 if (REP_ON(env)) { 201 REP_SYSTEM_LOCK(env); 202 env->rep_handle->region->op_cnt += (u_long)*retp; 203 REP_SYSTEM_UNLOCK(env); 204 } 205 206 } 207 /* 208 * If recovery already opened the files for us, don't 209 * do it here. 210 */ 211 if (restored != 0 && flags == DB_FIRST && 212 !F_ISSET(env->lg_handle, DBLOG_OPENFILES)) { 213 ENV_GET_THREAD_INFO(env, ip); 214 ret = __txn_openfiles(env, ip, &min, 0); 215 } 216 217 if (0) { 218err: TXN_SYSTEM_UNLOCK(env); 219 } 220 return (ret); 221} 222 223/* 224 * __txn_openfiles -- 225 * Call env_openfiles. 226 * 227 * PUBLIC: int __txn_openfiles __P((ENV *, DB_THREAD_INFO *, DB_LSN *, int)); 228 */ 229int 230__txn_openfiles(env, ip, min, force) 231 ENV *env; 232 DB_THREAD_INFO *ip; 233 DB_LSN *min; 234 int force; 235{ 236 DBT data; 237 DB_LOGC *logc; 238 DB_LSN open_lsn; 239 DB_TXNHEAD *txninfo; 240 __txn_ckp_args *ckp_args; 241 int ret, t_ret; 242 243 /* 244 * Figure out the last checkpoint before the smallest 245 * start_lsn in the region. 246 */ 247 logc = NULL; 248 if ((ret = __log_cursor(env, &logc)) != 0) 249 goto err; 250 251 memset(&data, 0, sizeof(data)); 252 if ((ret = __txn_getckp(env, &open_lsn)) == 0) 253 while (!IS_ZERO_LSN(open_lsn) && (ret = 254 __logc_get(logc, &open_lsn, &data, DB_SET)) == 0 && 255 (force || 256 (min != NULL && LOG_COMPARE(min, &open_lsn) < 0))) { 257 /* Format the log record. */ 258 if ((ret = __txn_ckp_read( 259 env, data.data, &ckp_args)) != 0) { 260 __db_errx(env, 261 "Invalid checkpoint record at [%lu][%lu]", 262 (u_long)open_lsn.file, 263 (u_long)open_lsn.offset); 264 goto err; 265 } 266 /* 267 * If force is set, then we're forcing ourselves 268 * to go back far enough to open files. 269 * Use ckp_lsn and then break out of the loop. 270 */ 271 open_lsn = force ? ckp_args->ckp_lsn : 272 ckp_args->last_ckp; 273 __os_free(env, ckp_args); 274 if (force) { 275 if ((ret = __logc_get(logc, &open_lsn, 276 &data, DB_SET)) != 0) 277 goto err; 278 break; 279 } 280 } 281 282 /* 283 * There are several ways by which we may have gotten here. 284 * - We got a DB_NOTFOUND -- we need to read the first 285 * log record. 286 * - We found a checkpoint before min. We're done. 287 * - We found a checkpoint after min who's last_ckp is 0. We 288 * need to start at the beginning of the log. 289 * - We are forcing an openfiles and we have our ckp_lsn. 290 */ 291 if ((ret == DB_NOTFOUND || IS_ZERO_LSN(open_lsn)) && (ret = 292 __logc_get(logc, &open_lsn, &data, DB_FIRST)) != 0) { 293 __db_errx(env, "No log records"); 294 goto err; 295 } 296 297 if ((ret = __db_txnlist_init(env, ip, 0, 0, NULL, &txninfo)) != 0) 298 goto err; 299 ret = __env_openfiles( 300 env, logc, txninfo, &data, &open_lsn, NULL, (double)0, 0); 301 if (txninfo != NULL) 302 __db_txnlist_end(env, txninfo); 303 304err: 305 if (logc != NULL && (t_ret = __logc_close(logc)) != 0 && ret == 0) 306 ret = t_ret; 307 return (ret); 308} 309