1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 2001,2008 Oracle. All rights reserved. 5 * 6 * $Id: txn_recover.c,v 12.37 2008/04/19 15:47:42 mjc Exp $ 7 */ 8 9#include "db_config.h" 10 11#include "db_int.h" 12#include "dbinc/txn.h" 13#include "dbinc/db_page.h" 14#include "dbinc/db_dispatch.h" 15#include "dbinc/log.h" 16#include "dbinc_auto/db_auto.h" 17#include "dbinc_auto/crdel_auto.h" 18#include "dbinc_auto/db_ext.h" 19 20/* 21 * __txn_map_gid 22 * Return the txn that corresponds to this global ID. 23 * 24 * PUBLIC: int __txn_map_gid __P((ENV *, 25 * PUBLIC: u_int8_t *, TXN_DETAIL **, roff_t *)); 26 */ 27int 28__txn_map_gid(env, gid, tdp, offp) 29 ENV *env; 30 u_int8_t *gid; 31 TXN_DETAIL **tdp; 32 roff_t *offp; 33{ 34 DB_TXNMGR *mgr; 35 DB_TXNREGION *region; 36 37 mgr = env->tx_handle; 38 region = mgr->reginfo.primary; 39 40 /* 41 * Search the internal active transaction table to find the 42 * matching xid. If this is a performance hit, then we 43 * can create a hash table, but I doubt it's worth it. 44 */ 45 TXN_SYSTEM_LOCK(env); 46 SH_TAILQ_FOREACH(*tdp, ®ion->active_txn, links, __txn_detail) 47 if (memcmp(gid, (*tdp)->xid, sizeof((*tdp)->xid)) == 0) 48 break; 49 TXN_SYSTEM_UNLOCK(env); 50 51 if (*tdp == NULL) 52 return (EINVAL); 53 54 *offp = R_OFFSET(&mgr->reginfo, *tdp); 55 return (0); 56} 57 58/* 59 * __txn_recover_pp -- 60 * ENV->txn_recover pre/post processing. 61 * 62 * PUBLIC: int __txn_recover_pp 63 * PUBLIC: __P((DB_ENV *, DB_PREPLIST *, long, long *, u_int32_t)); 64 */ 65int 66__txn_recover_pp(dbenv, preplist, count, retp, flags) 67 DB_ENV *dbenv; 68 DB_PREPLIST *preplist; 69 long count, *retp; 70 u_int32_t flags; 71{ 72 DB_THREAD_INFO *ip; 73 ENV *env; 74 int ret; 75 76 env = dbenv->env; 77 78 ENV_REQUIRES_CONFIG( 79 env, env->tx_handle, "txn_recover", DB_INIT_TXN); 80 81 if (F_ISSET((DB_TXNREGION *)env->tx_handle->reginfo.primary, 82 TXN_IN_RECOVERY)) { 83 __db_errx(env, "operation not permitted while in recovery"); 84 return (EINVAL); 85 } 86 87 if (flags != DB_FIRST && flags != DB_NEXT) 88 return (__db_ferr(env, "DB_ENV->txn_recover", 0)); 89 90 ENV_ENTER(env, ip); 91 REPLICATION_WRAP(env, 92 (__txn_recover(env, preplist, count, retp, flags)), 0, ret); 93 ENV_LEAVE(env, ip); 94 return (ret); 95} 96 97/* 98 * __txn_recover -- 99 * ENV->txn_recover. 100 * 101 * PUBLIC: int __txn_recover __P((ENV *, 102 * PUBLIC: DB_PREPLIST *, long, long *, u_int32_t)); 103 */ 104int 105__txn_recover(env, preplist, count, retp, flags) 106 ENV *env; 107 DB_PREPLIST *preplist; 108 long count, *retp; 109 u_int32_t flags; 110{ 111 /* 112 * Public API to retrieve the list of prepared, but not yet committed 113 * transactions. See __txn_get_prepared for details. This function 114 * and __db_xa_recover both wrap that one. 115 */ 116 return (__txn_get_prepared(env, NULL, preplist, count, retp, flags)); 117} 118 119/* 120 * __txn_get_prepared -- 121 * Returns a list of prepared (and for XA, heuristically completed) 122 * transactions (less than or equal to the count parameter). One of 123 * xids or txns must be set to point to an array of the appropriate type. 124 * The count parameter indicates the number of entries in the xids and/or 125 * txns array. The retp parameter will be set to indicate the number of 126 * entries returned in the xids/txns array. Flags indicates the operation, 127 * one of DB_FIRST or DB_NEXT. 128 * 129 * PUBLIC: int __txn_get_prepared __P((ENV *, 130 * PUBLIC: XID *, DB_PREPLIST *, long, long *, u_int32_t)); 131 */ 132int 133__txn_get_prepared(env, xids, txns, count, retp, flags) 134 ENV *env; 135 XID *xids; 136 DB_PREPLIST *txns; 137 long count; /* This is long for XA compatibility. */ 138 long *retp; 139 u_int32_t flags; 140{ 141 DB_LSN min; 142 DB_PREPLIST *prepp; 143 DB_THREAD_INFO *ip; 144 DB_TXNMGR *mgr; 145 DB_TXNREGION *region; 146 TXN_DETAIL *td; 147 XID *xidp; 148 long i; 149 int restored, ret; 150 151 *retp = 0; 152 153 MAX_LSN(min); 154 prepp = txns; 155 xidp = xids; 156 restored = ret = 0; 157 158 /* 159 * If we are starting a scan, then we traverse the active transaction 160 * list once making sure that all transactions are marked as not having 161 * been collected. Then on each pass, we mark the ones we collected 162 * so that if we cannot collect them all at once, we can finish up 163 * next time with a continue. 164 */ 165 166 mgr = env->tx_handle; 167 region = mgr->reginfo.primary; 168 169 /* 170 * During this pass we need to figure out if we are going to need 171 * to open files. We need to open files if we've never collected 172 * before (in which case, none of the COLLECTED bits will be set) 173 * and the ones that we are collecting are restored (if they aren't 174 * restored, then we never crashed; just the main server did). 175 */ 176 TXN_SYSTEM_LOCK(env); 177 178 /* Now begin collecting active transactions. */ 179 for (td = SH_TAILQ_FIRST(®ion->active_txn, __txn_detail); 180 td != NULL && *retp < count; 181 td = SH_TAILQ_NEXT(td, links, __txn_detail)) { 182 if (td->status != TXN_PREPARED || 183 (flags != DB_FIRST && F_ISSET(td, TXN_DTL_COLLECTED))) 184 continue; 185 186 if (F_ISSET(td, TXN_DTL_RESTORED)) 187 restored = 1; 188 189 if (xids != NULL) { 190 xidp->formatID = td->format; 191 /* 192 * XID structure uses longs; we use u_int32_t's as we 193 * log them to disk. Cast them to make the conversion 194 * explicit. 195 */ 196 xidp->gtrid_length = (long)td->gtrid; 197 xidp->bqual_length = (long)td->bqual; 198 memcpy(xidp->data, td->xid, sizeof(td->xid)); 199 xidp++; 200 } 201 202 if (txns != NULL) { 203 if ((ret = __os_calloc(env, 204 1, sizeof(DB_TXN), &prepp->txn)) != 0) { 205 TXN_SYSTEM_UNLOCK(env); 206 goto err; 207 } 208 if ((ret = __txn_continue(env, prepp->txn, td)) != 0) 209 goto err; 210 F_SET(prepp->txn, TXN_MALLOC); 211 if (F_ISSET(env->dbenv, DB_ENV_TXN_NOSYNC)) 212 F_SET(prepp->txn, TXN_NOSYNC); 213 else if (F_ISSET(env->dbenv, DB_ENV_TXN_WRITE_NOSYNC)) 214 F_SET(prepp->txn, TXN_WRITE_NOSYNC); 215 else 216 F_SET(prepp->txn, TXN_SYNC); 217 memcpy(prepp->gid, td->xid, sizeof(td->xid)); 218 prepp++; 219 } 220 221 if (!IS_ZERO_LSN(td->begin_lsn) && 222 LOG_COMPARE(&td->begin_lsn, &min) < 0) 223 min = td->begin_lsn; 224 225 (*retp)++; 226 F_SET(td, TXN_DTL_COLLECTED); 227 } 228 if (flags == DB_FIRST) 229 for (; td != NULL; td = SH_TAILQ_NEXT(td, links, __txn_detail)) 230 F_CLR(td, TXN_DTL_COLLECTED); 231 TXN_SYSTEM_UNLOCK(env); 232 233 /* 234 * Now link all the transactions into the transaction manager's list. 235 */ 236 if (txns != NULL && *retp != 0) { 237 MUTEX_LOCK(env, mgr->mutex); 238 for (i = 0; i < *retp; i++) 239 TAILQ_INSERT_TAIL(&mgr->txn_chain, txns[i].txn, links); 240 MUTEX_UNLOCK(env, mgr->mutex); 241 242 /* 243 * If we are restoring, update our count of outstanding 244 * transactions. 245 */ 246 if (REP_ON(env)) { 247 REP_SYSTEM_LOCK(env); 248 env->rep_handle->region->op_cnt += (u_long)*retp; 249 REP_SYSTEM_UNLOCK(env); 250 } 251 252 } 253 /* 254 * If recovery already opened the files for us, don't 255 * do it here. 256 */ 257 if (restored != 0 && flags == DB_FIRST && 258 !F_ISSET(env->lg_handle, DBLOG_OPENFILES)) { 259 ENV_GET_THREAD_INFO(env, ip); 260 ret = __txn_openfiles(env, ip, &min, 0); 261 } 262 263 if (0) { 264err: TXN_SYSTEM_UNLOCK(env); 265 } 266 return (ret); 267} 268 269/* 270 * __txn_openfiles -- 271 * Call env_openfiles. 272 * 273 * PUBLIC: int __txn_openfiles __P((ENV *, DB_THREAD_INFO *, DB_LSN *, int)); 274 */ 275int 276__txn_openfiles(env, ip, min, force) 277 ENV *env; 278 DB_THREAD_INFO *ip; 279 DB_LSN *min; 280 int force; 281{ 282 DBT data; 283 DB_LOGC *logc; 284 DB_LSN open_lsn; 285 DB_TXNHEAD *txninfo; 286 __txn_ckp_args *ckp_args; 287 int ret, t_ret; 288 289 /* 290 * Figure out the last checkpoint before the smallest 291 * start_lsn in the region. 292 */ 293 logc = NULL; 294 if ((ret = __log_cursor(env, &logc)) != 0) 295 goto err; 296 297 memset(&data, 0, sizeof(data)); 298 if ((ret = __txn_getckp(env, &open_lsn)) == 0) 299 while (!IS_ZERO_LSN(open_lsn) && (ret = 300 __logc_get(logc, &open_lsn, &data, DB_SET)) == 0 && 301 (force || 302 (min != NULL && LOG_COMPARE(min, &open_lsn) < 0))) { 303 /* Format the log record. */ 304 if ((ret = __txn_ckp_read( 305 env, data.data, &ckp_args)) != 0) { 306 __db_errx(env, 307 "Invalid checkpoint record at [%lu][%lu]", 308 (u_long)open_lsn.file, 309 (u_long)open_lsn.offset); 310 goto err; 311 } 312 /* 313 * If force is set, then we're forcing ourselves 314 * to go back far enough to open files. 315 * Use ckp_lsn and then break out of the loop. 316 */ 317 open_lsn = force ? ckp_args->ckp_lsn : 318 ckp_args->last_ckp; 319 __os_free(env, ckp_args); 320 if (force) { 321 if ((ret = __logc_get(logc, &open_lsn, 322 &data, DB_SET)) != 0) 323 goto err; 324 break; 325 } 326 } 327 328 /* 329 * There are several ways by which we may have gotten here. 330 * - We got a DB_NOTFOUND -- we need to read the first 331 * log record. 332 * - We found a checkpoint before min. We're done. 333 * - We found a checkpoint after min who's last_ckp is 0. We 334 * need to start at the beginning of the log. 335 * - We are forcing an openfiles and we have our ckp_lsn. 336 */ 337 if ((ret == DB_NOTFOUND || IS_ZERO_LSN(open_lsn)) && (ret = 338 __logc_get(logc, &open_lsn, &data, DB_FIRST)) != 0) { 339 __db_errx(env, "No log records"); 340 goto err; 341 } 342 343 if ((ret = __db_txnlist_init(env, ip, 0, 0, NULL, &txninfo)) != 0) 344 goto err; 345 ret = __env_openfiles( 346 env, logc, txninfo, &data, &open_lsn, NULL, (double)0, 0); 347 if (txninfo != NULL) 348 __db_txnlist_end(env, txninfo); 349 350err: 351 if (logc != NULL && (t_ret = __logc_close(logc)) != 0 && ret == 0) 352 ret = t_ret; 353 return (ret); 354} 355