1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 2001,2008 Oracle. All rights reserved. 5 * 6 * $Id: rep_region.c,v 12.55 2008/01/11 20:50:03 bostic Exp $ 7 */ 8 9#include "db_config.h" 10 11#include "db_int.h" 12#include "dbinc/db_page.h" 13#include "dbinc/db_am.h" 14#include "dbinc/log.h" 15 16static int __rep_egen_init __P((ENV *, REP *)); 17static int __rep_gen_init __P((ENV *, REP *)); 18 19/* 20 * __rep_open -- 21 * Initialize the shared memory state for the replication system. 22 * 23 * PUBLIC: int __rep_open __P((ENV *)); 24 */ 25int 26__rep_open(env) 27 ENV *env; 28{ 29 DB_REP *db_rep; 30 REGENV *renv; 31 REGINFO *infop; 32 REP *rep; 33 int ret; 34 35 db_rep = env->rep_handle; 36 infop = env->reginfo; 37 renv = infop->primary; 38 ret = 0; 39 40 if (renv->rep_off == INVALID_ROFF) { 41 /* Must create the region. */ 42 if ((ret = __env_alloc(infop, sizeof(REP), &rep)) != 0) 43 return (ret); 44 memset(rep, 0, sizeof(*rep)); 45 46 /* 47 * We have the region; fill in the values. Some values may 48 * have been configured before we open the region, and those 49 * are taken from the DB_REP structure. 50 */ 51 if ((ret = __mutex_alloc( 52 env, MTX_REP_REGION, 0, &rep->mtx_region)) != 0) 53 return (ret); 54 /* 55 * Because we have no way to prevent deadlocks and cannot log 56 * changes made to it, we single-thread access to the client 57 * bookkeeping database. This is suboptimal, but it only gets 58 * accessed when messages arrive out-of-order, so it should 59 * stay small and not be used in a high-performance app. 60 */ 61 if ((ret = __mutex_alloc( 62 env, MTX_REP_DATABASE, 0, &rep->mtx_clientdb)) != 0) 63 return (ret); 64 65 if ((ret = __mutex_alloc( 66 env, MTX_REP_CHKPT, 0, &rep->mtx_ckp)) != 0) 67 return (ret); 68 69 if ((ret = __mutex_alloc( 70 env, MTX_REP_EVENT, 0, &rep->mtx_event)) != 0) 71 return (ret); 72 73 rep->newmaster_event_gen = 0; 74 rep->notified_egen = 0; 75 rep->lease_off = INVALID_ROFF; 76 rep->tally_off = INVALID_ROFF; 77 rep->v2tally_off = INVALID_ROFF; 78 rep->eid = db_rep->eid; 79 rep->master_id = DB_EID_INVALID; 80 rep->gen = 0; 81 rep->version = DB_REPVERSION; 82 if ((ret = __rep_gen_init(env, rep)) != 0) 83 return (ret); 84 if ((ret = __rep_egen_init(env, rep)) != 0) 85 return (ret); 86 rep->gbytes = db_rep->gbytes; 87 rep->bytes = db_rep->bytes; 88 rep->request_gap = db_rep->request_gap; 89 rep->max_gap = db_rep->max_gap; 90 rep->config_nsites = db_rep->config_nsites; 91 rep->config = db_rep->config; 92 rep->elect_timeout = db_rep->elect_timeout; 93 rep->full_elect_timeout = db_rep->full_elect_timeout; 94 rep->lease_timeout = db_rep->lease_timeout; 95 rep->clock_skew = db_rep->clock_skew; 96 rep->clock_base = db_rep->clock_base; 97 timespecclear(&rep->lease_duration); 98 timespecclear(&rep->grant_expire); 99 rep->chkpt_delay = db_rep->chkpt_delay; 100 rep->priority = db_rep->my_priority; 101 102 F_SET(rep, REP_F_NOARCHIVE); 103 104 /* Initialize encapsulating region. */ 105 renv->rep_off = R_OFFSET(infop, rep); 106 (void)time(&renv->rep_timestamp); 107 renv->op_timestamp = 0; 108 F_CLR(renv, DB_REGENV_REPLOCKED); 109 } else 110 rep = R_ADDR(infop, renv->rep_off); 111 112 db_rep->region = rep; 113 114 return (0); 115} 116 117/* 118 * __rep_env_refresh -- 119 * Replication-specific refresh of the ENV structure. 120 * 121 * PUBLIC: int __rep_env_refresh __P((ENV *)); 122 */ 123int 124__rep_env_refresh(env) 125 ENV *env; 126{ 127 DB_REP *db_rep; 128 REGENV *renv; 129 REGINFO *infop; 130 REP *rep; 131 int ret, t_ret; 132 133 db_rep = env->rep_handle; 134 rep = db_rep->region; 135 infop = env->reginfo; 136 renv = infop->primary; 137 ret = 0; 138 139 /* 140 * If we are the last reference closing the env, clear our knowledge of 141 * belonging to a group and that there is a valid handle where 142 * rep_start had already been called. 143 */ 144 if (renv->refcnt == 1) { 145 F_CLR(rep, REP_F_GROUP_ESTD); 146 F_CLR(rep, REP_F_START_CALLED); 147 } 148 149 /* 150 * If a private region, return the memory to the heap. Not needed for 151 * filesystem-backed or system shared memory regions, that memory isn't 152 * owned by any particular process. 153 */ 154 if (F_ISSET(env, ENV_PRIVATE)) { 155 db_rep = env->rep_handle; 156 if (db_rep->region != NULL) { 157 ret = __mutex_free(env, &db_rep->region->mtx_region); 158 if ((t_ret = __mutex_free(env, 159 &db_rep->region->mtx_clientdb)) != 0 && ret == 0) 160 ret = t_ret; 161 if ((t_ret = __mutex_free(env, 162 &db_rep->region->mtx_ckp)) != 0 && ret == 0) 163 ret = t_ret; 164 if ((t_ret = __mutex_free(env, 165 &db_rep->region->mtx_event)) != 0 && ret == 0) 166 ret = t_ret; 167 } 168 169 if (renv->rep_off != INVALID_ROFF) 170 __env_alloc_free(infop, R_ADDR(infop, renv->rep_off)); 171 } 172 173 env->rep_handle->region = NULL; 174 return (ret); 175} 176 177/* 178 * __rep_close -- 179 * Shut down all of replication. 180 * 181 * PUBLIC: int __rep_env_close __P((ENV *)); 182 */ 183int 184__rep_env_close(env) 185 ENV *env; 186{ 187 int ret, t_ret; 188 189 ret = __rep_preclose(env); 190 if ((t_ret = __rep_closefiles(env, 0)) != 0 && ret == 0) 191 ret = t_ret; 192 return (ret); 193} 194 195/* 196 * __rep_preclose -- 197 * If we are a client, shut down our client database and send 198 * any outstanding bulk buffers. 199 * 200 * PUBLIC: int __rep_preclose __P((ENV *)); 201 */ 202int 203__rep_preclose(env) 204 ENV *env; 205{ 206 DB_LOG *dblp; 207 DB_REP *db_rep; 208 LOG *lp; 209 REP_BULK bulk; 210 int ret; 211 212 ret = 0; 213 214 db_rep = env->rep_handle; 215 dblp = env->lg_handle; 216 217 /* 218 * If we have a rep region, we can preclose. Otherwise, return. 219 * If we're on an error path from env open, we may not have 220 * a region, even though we have a handle. 221 */ 222 if (db_rep == NULL || db_rep->region == NULL) 223 return (ret); 224 MUTEX_LOCK(env, db_rep->region->mtx_clientdb); 225 if (db_rep->rep_db != NULL) { 226 ret = __db_close(db_rep->rep_db, NULL, DB_NOSYNC); 227 db_rep->rep_db = NULL; 228 } 229 /* 230 * We could be called early in an env_open error path, so 231 * only do this if we have a log region set up. 232 */ 233 if (dblp == NULL) 234 goto out; 235 lp = dblp->reginfo.primary; 236 /* 237 * If we have something in the bulk buffer, send anything in it 238 * if we are able to. 239 */ 240 if (lp->bulk_off != 0 && db_rep->send != NULL) { 241 memset(&bulk, 0, sizeof(bulk)); 242 bulk.addr = R_ADDR(&dblp->reginfo, lp->bulk_buf); 243 bulk.offp = &lp->bulk_off; 244 bulk.len = lp->bulk_len; 245 bulk.type = REP_BULK_LOG; 246 bulk.eid = DB_EID_BROADCAST; 247 bulk.flagsp = &lp->bulk_flags; 248 /* 249 * Ignore send errors here. This can be called on the 250 * env->close path - make a best attempt to send. 251 */ 252 (void)__rep_send_bulk(env, &bulk, 0); 253 } 254out: MUTEX_UNLOCK(env, db_rep->region->mtx_clientdb); 255 return (ret); 256} 257 258/* 259 * __rep_closefiles -- 260 * If we were a client and are now a master, close all databases 261 * we've opened while applying messages as a client. This can 262 * be called from __env_close and we need to check if the env, 263 * handles and regions are set up, or not. 264 * 265 * PUBLIC: int __rep_closefiles __P((ENV *, int)); 266 */ 267int 268__rep_closefiles(env, do_restored) 269 ENV *env; 270 int do_restored; 271{ 272 DB_LOG *dblp; 273 DB_REP *db_rep; 274 int ret; 275 276 ret = 0; 277 278 db_rep = env->rep_handle; 279 dblp = env->lg_handle; 280 281 if (db_rep == NULL || db_rep->region == NULL) 282 return (ret); 283 if (dblp == NULL) 284 return (ret); 285 if ((ret = __dbreg_close_files(env, do_restored)) == 0) 286 F_CLR(db_rep, DBREP_OPENFILES); 287 288 return (ret); 289} 290 291/* 292 * __rep_egen_init -- 293 * Initialize the value of egen in the region. Called only from 294 * __rep_region_init, which is guaranteed to be single-threaded 295 * as we create the rep region. We set the rep->egen field which 296 * is normally protected by db_rep->region->mutex. 297 */ 298static int 299__rep_egen_init(env, rep) 300 ENV *env; 301 REP *rep; 302{ 303 DB_FH *fhp; 304 int ret; 305 size_t cnt; 306 char *p; 307 308 if ((ret = 309 __db_appname(env, DB_APP_NONE, REP_EGENNAME, 0, NULL, &p)) != 0) 310 return (ret); 311 /* 312 * If the file doesn't exist, create it now and initialize with 1. 313 */ 314 if (__os_exists(env, p, NULL) != 0) { 315 rep->egen = rep->gen + 1; 316 if ((ret = __rep_write_egen(env, rep->egen)) != 0) 317 goto err; 318 } else { 319 /* 320 * File exists, open it and read in our egen. 321 */ 322 if ((ret = __os_open(env, p, 0, 323 DB_OSO_RDONLY, DB_MODE_600, &fhp)) != 0) 324 goto err; 325 if ((ret = __os_read(env, fhp, &rep->egen, sizeof(u_int32_t), 326 &cnt)) != 0 || cnt != sizeof(u_int32_t)) 327 goto err1; 328 RPRINT(env, DB_VERB_REP_MISC, 329 (env, "Read in egen %lu", (u_long)rep->egen)); 330err1: (void)__os_closehandle(env, fhp); 331 } 332err: __os_free(env, p); 333 return (ret); 334} 335 336/* 337 * __rep_write_egen -- 338 * Write out the egen into the env file. 339 * 340 * PUBLIC: int __rep_write_egen __P((ENV *, u_int32_t)); 341 */ 342int 343__rep_write_egen(env, egen) 344 ENV *env; 345 u_int32_t egen; 346{ 347 DB_FH *fhp; 348 int ret; 349 size_t cnt; 350 char *p; 351 352 if ((ret = 353 __db_appname(env, DB_APP_NONE, REP_EGENNAME, 0, NULL, &p)) != 0) 354 return (ret); 355 if ((ret = __os_open( 356 env, p, 0, DB_OSO_CREATE | DB_OSO_TRUNC, DB_MODE_600, &fhp)) == 0) { 357 if ((ret = __os_write(env, fhp, &egen, sizeof(u_int32_t), 358 &cnt)) != 0 || ((ret = __os_fsync(env, fhp)) != 0)) 359 __db_err(env, ret, "%s", p); 360 (void)__os_closehandle(env, fhp); 361 } 362 __os_free(env, p); 363 return (ret); 364} 365 366/* 367 * __rep_gen_init -- 368 * Initialize the value of gen in the region. Called only from 369 * __rep_region_init, which is guaranteed to be single-threaded 370 * as we create the rep region. We set the rep->gen field which 371 * is normally protected by db_rep->region->mutex. 372 */ 373static int 374__rep_gen_init(env, rep) 375 ENV *env; 376 REP *rep; 377{ 378 DB_FH *fhp; 379 int ret; 380 size_t cnt; 381 char *p; 382 383 if ((ret = 384 __db_appname(env, DB_APP_NONE, REP_GENNAME, 0, NULL, &p)) != 0) 385 return (ret); 386 /* 387 * If the file doesn't exist, create it now and initialize with 0. 388 */ 389 if (__os_exists(env, p, NULL) != 0) { 390 rep->gen = 0; 391 if ((ret = __rep_write_gen(env, rep->gen)) != 0) 392 goto err; 393 } else { 394 /* 395 * File exists, open it and read in our gen. 396 */ 397 if ((ret = __os_open(env, p, 0, 398 DB_OSO_RDONLY, DB_MODE_600, &fhp)) != 0) 399 goto err; 400 if ((ret = __os_read(env, fhp, &rep->gen, sizeof(u_int32_t), 401 &cnt)) < 0 || cnt == 0) 402 goto err1; 403 RPRINT(env, DB_VERB_REP_MISC, (env, "Read in gen %lu", 404 (u_long)rep->gen)); 405err1: (void)__os_closehandle(env, fhp); 406 } 407err: __os_free(env, p); 408 return (ret); 409} 410 411/* 412 * __rep_write_gen -- 413 * Write out the gen into the env file. 414 * 415 * PUBLIC: int __rep_write_gen __P((ENV *, u_int32_t)); 416 */ 417int 418__rep_write_gen(env, gen) 419 ENV *env; 420 u_int32_t gen; 421{ 422 DB_FH *fhp; 423 int ret; 424 size_t cnt; 425 char *p; 426 427 if ((ret = 428 __db_appname(env, DB_APP_NONE, REP_GENNAME, 0, NULL, &p)) != 0) 429 return (ret); 430 if ((ret = __os_open( 431 env, p, 0, DB_OSO_CREATE | DB_OSO_TRUNC, DB_MODE_600, &fhp)) == 0) { 432 if ((ret = __os_write(env, fhp, &gen, sizeof(u_int32_t), 433 &cnt)) != 0 || ((ret = __os_fsync(env, fhp)) != 0)) 434 __db_err(env, ret, "%s", p); 435 (void)__os_closehandle(env, fhp); 436 } 437 __os_free(env, p); 438 return (ret); 439} 440