1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1996,2008 Oracle. All rights reserved. 5 * 6 * $Id: mp_fput.c,v 12.46 2008/04/28 02:59:57 alexg Exp $ 7 */ 8 9#include "db_config.h" 10 11#include "db_int.h" 12#include "dbinc/log.h" 13#include "dbinc/mp.h" 14 15static int __memp_reset_lru __P((ENV *, REGINFO *)); 16 17/* 18 * __memp_fput_pp -- 19 * DB_MPOOLFILE->put pre/post processing. 20 * 21 * PUBLIC: int __memp_fput_pp 22 * PUBLIC: __P((DB_MPOOLFILE *, void *, DB_CACHE_PRIORITY, u_int32_t)); 23 */ 24int 25__memp_fput_pp(dbmfp, pgaddr, priority, flags) 26 DB_MPOOLFILE *dbmfp; 27 void *pgaddr; 28 DB_CACHE_PRIORITY priority; 29 u_int32_t flags; 30{ 31 DB_THREAD_INFO *ip; 32 ENV *env; 33 int ret, t_ret; 34 35 env = dbmfp->env; 36 37 if (flags != 0) 38 return (__db_ferr(env, "DB_MPOOLFILE->put", 0)); 39 40 MPF_ILLEGAL_BEFORE_OPEN(dbmfp, "DB_MPOOLFILE->put"); 41 42 ENV_ENTER(env, ip); 43 44 ret = __memp_fput(dbmfp, ip, pgaddr, priority); 45 if (IS_ENV_REPLICATED(env) && 46 (t_ret = __op_rep_exit(env)) != 0 && ret == 0) 47 ret = t_ret; 48 49 ENV_LEAVE(env, ip); 50 return (ret); 51} 52 53/* 54 * __memp_fput -- 55 * DB_MPOOLFILE->put. 56 * 57 * PUBLIC: int __memp_fput __P((DB_MPOOLFILE *, 58 * PUBLIC: DB_THREAD_INFO *, void *, DB_CACHE_PRIORITY)); 59 */ 60int 61__memp_fput(dbmfp, ip, pgaddr, priority) 62 DB_MPOOLFILE *dbmfp; 63 DB_THREAD_INFO *ip; 64 void *pgaddr; 65 DB_CACHE_PRIORITY priority; 66{ 67 BH *bhp; 68 DB_ENV *dbenv; 69 DB_MPOOL *dbmp; 70 DB_MPOOL_HASH *hp; 71 ENV *env; 72 MPOOL *c_mp; 73 MPOOLFILE *mfp; 74 PIN_LIST *list, *lp; 75 REGINFO *infop, *reginfo; 76 roff_t b_ref; 77 int region; 78 int adjust, pfactor, ret, t_ret; 79 char buf[DB_THREADID_STRLEN]; 80 81 env = dbmfp->env; 82 dbenv = env->dbenv; 83 dbmp = env->mp_handle; 84 mfp = dbmfp->mfp; 85 bhp = (BH *)((u_int8_t *)pgaddr - SSZA(BH, buf)); 86 ret = 0; 87 88 /* 89 * If this is marked dummy, we are using it to unpin a buffer for 90 * another thread. 91 */ 92 if (F_ISSET(dbmfp, MP_DUMMY)) 93 goto unpin; 94 95 /* 96 * If we're mapping the file, there's nothing to do. Because we can 97 * stop mapping the file at any time, we have to check on each buffer 98 * to see if the address we gave the application was part of the map 99 * region. 100 */ 101 if (dbmfp->addr != NULL && pgaddr >= dbmfp->addr && 102 (u_int8_t *)pgaddr <= (u_int8_t *)dbmfp->addr + dbmfp->len) 103 return (0); 104 105#ifdef DIAGNOSTIC 106 /* 107 * Decrement the per-file pinned buffer count (mapped pages aren't 108 * counted). 109 */ 110 MPOOL_SYSTEM_LOCK(env); 111 if (dbmfp->pinref == 0) { 112 MPOOL_SYSTEM_UNLOCK(env); 113 __db_errx(env, 114 "%s: more pages returned than retrieved", __memp_fn(dbmfp)); 115 return (__env_panic(env, EACCES)); 116 } 117 --dbmfp->pinref; 118 MPOOL_SYSTEM_UNLOCK(env); 119#endif 120 121unpin: 122 /* Convert a page address to a buffer header and hash bucket. */ 123 MP_GET_BUCKET(env, mfp, bhp->pgno, &infop, hp, ret); 124 if (ret != 0) 125 return (ret); 126 c_mp = infop->primary; 127 128 /* 129 * Check for a reference count going to zero. This can happen if the 130 * application returns a page twice. 131 */ 132 if (bhp->ref == 0) { 133 __db_errx(env, "%s: page %lu: unpinned page returned", 134 __memp_fn(dbmfp), (u_long)bhp->pgno); 135 DB_ASSERT(env, bhp->ref != 0); 136 MUTEX_UNLOCK(env, hp->mtx_hash); 137 return (__env_panic(env, EACCES)); 138 } 139 140 /* Note the activity so allocation won't decide to quit. */ 141 ++c_mp->put_counter; 142 143 if (ip != NULL) { 144 reginfo = env->reginfo; 145 list = R_ADDR(reginfo, ip->dbth_pinlist); 146 region = (int)(infop - dbmp->reginfo); 147 b_ref = R_OFFSET(infop, bhp); 148 for (lp = list; lp < &list[ip->dbth_pinmax]; lp++) 149 if (lp->b_ref == b_ref && lp->region == region) 150 break; 151 152 if (lp == &list[ip->dbth_pinmax]) { 153 __db_errx(env, 154 "__memp_fput: pinned buffer not found for thread %s", 155 dbenv->thread_id_string(dbenv, 156 ip->dbth_pid, ip->dbth_tid, buf)); 157 return (__env_panic(env, EINVAL)); 158 } 159 160 lp->b_ref = INVALID_ROFF; 161 ip->dbth_pincount--; 162 } 163 164 /* 165 * Mark the file dirty. Check for a dirty bit on the buffer as well 166 * as the dirty flag because the buffer might have been marked dirty 167 * in the DB_MPOOLFILE->set method. 168 */ 169 if (F_ISSET(bhp, BH_DIRTY)) { 170 mfp->file_written = 1; 171 172 DB_ASSERT(env, !SH_CHAIN_HASNEXT(bhp, vc)); 173 } 174 175 /* 176 * If more than one reference to the page or a reference other than a 177 * thread waiting to flush the buffer to disk, we're done. Ignore the 178 * discard flags (for now) and leave the buffer's priority alone. 179 */ 180 if (--bhp->ref > 1 || (bhp->ref == 1 && !F_ISSET(bhp, BH_LOCKED))) { 181 MUTEX_UNLOCK(env, hp->mtx_hash); 182 return (0); 183 } 184 185 /* The buffer should not be accessed again. */ 186 MVCC_MPROTECT(bhp->buf, mfp->stat.st_pagesize, 0); 187 188 /* Update priority values. */ 189 if (priority == DB_PRIORITY_VERY_LOW || 190 mfp->priority == MPOOL_PRI_VERY_LOW) 191 bhp->priority = 0; 192 else { 193 /* 194 * We don't lock the LRU counter or the stat.st_pages field, if 195 * we get garbage (which won't happen on a 32-bit machine), it 196 * only means a buffer has the wrong priority. 197 */ 198 bhp->priority = c_mp->lru_count; 199 200 switch (priority) { 201 default: 202 case DB_PRIORITY_UNCHANGED: 203 pfactor = mfp->priority; 204 break; 205 case DB_PRIORITY_VERY_LOW: 206 pfactor = MPOOL_PRI_VERY_LOW; 207 break; 208 case DB_PRIORITY_LOW: 209 pfactor = MPOOL_PRI_LOW; 210 break; 211 case DB_PRIORITY_DEFAULT: 212 pfactor = MPOOL_PRI_DEFAULT; 213 break; 214 case DB_PRIORITY_HIGH: 215 pfactor = MPOOL_PRI_HIGH; 216 break; 217 case DB_PRIORITY_VERY_HIGH: 218 pfactor = MPOOL_PRI_VERY_HIGH; 219 break; 220 } 221 222 adjust = 0; 223 if (pfactor != 0) 224 adjust = (int)c_mp->stat.st_pages / pfactor; 225 226 if (F_ISSET(bhp, BH_DIRTY)) 227 adjust += (int)c_mp->stat.st_pages / MPOOL_PRI_DIRTY; 228 229 if (adjust > 0) { 230 if (UINT32_MAX - bhp->priority >= (u_int32_t)adjust) 231 bhp->priority += adjust; 232 } else if (adjust < 0) 233 if (bhp->priority > (u_int32_t)-adjust) 234 bhp->priority += adjust; 235 } 236 237 /* 238 * The sync code has a separate counter for buffers on which it waits. 239 * It reads that value without holding a lock so we update it as the 240 * last thing we do. Once that value goes to 0, we won't see another 241 * reference to that buffer being returned to the cache until the sync 242 * code has finished, so we're safe as long as we don't let the value 243 * go to 0 before we finish with the buffer. 244 */ 245 if (F_ISSET(bhp, BH_LOCKED) && bhp->ref_sync != 0) 246 --bhp->ref_sync; 247 248 MUTEX_UNLOCK(env, hp->mtx_hash); 249 250 /* 251 * On every buffer put we update the buffer generation number and check 252 * for wraparound. 253 */ 254 if (++c_mp->lru_count == UINT32_MAX) 255 if ((t_ret = 256 __memp_reset_lru(env, dbmp->reginfo)) != 0 && ret == 0) 257 ret = t_ret; 258 259 return (ret); 260} 261 262/* 263 * __memp_reset_lru -- 264 * Reset the cache LRU counter. 265 */ 266static int 267__memp_reset_lru(env, infop) 268 ENV *env; 269 REGINFO *infop; 270{ 271 BH *bhp, *tbhp; 272 DB_MPOOL_HASH *hp; 273 MPOOL *c_mp; 274 u_int32_t bucket, priority; 275 276 c_mp = infop->primary; 277 /* 278 * Update the counter so all future allocations will start at the 279 * bottom. 280 */ 281 c_mp->lru_count -= MPOOL_BASE_DECREMENT; 282 283 /* Adjust the priority of every buffer in the system. */ 284 for (hp = R_ADDR(infop, c_mp->htab), 285 bucket = 0; bucket < c_mp->htab_buckets; ++hp, ++bucket) { 286 /* 287 * Skip empty buckets. 288 * 289 * We can check for empty buckets before locking as we 290 * only care if the pointer is zero or non-zero. 291 */ 292 if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL) { 293 c_mp->lru_reset++; 294 continue; 295 } 296 297 MUTEX_LOCK(env, hp->mtx_hash); 298 c_mp->lru_reset++; 299 /* 300 * We need to take a little care that the bucket does 301 * not become unsorted. This is highly unlikely but 302 * possible. 303 */ 304 priority = 0; 305 SH_TAILQ_FOREACH(bhp, &hp->hash_bucket, hq, __bh) { 306 for (tbhp = bhp; tbhp != NULL; 307 tbhp = SH_CHAIN_PREV(tbhp, vc, __bh)) { 308 if (tbhp->priority != UINT32_MAX && 309 tbhp->priority > MPOOL_BASE_DECREMENT) { 310 tbhp->priority -= MPOOL_BASE_DECREMENT; 311 if (tbhp->priority < priority) 312 tbhp->priority = priority; 313 } 314 } 315 priority = bhp->priority; 316 } 317 MUTEX_UNLOCK(env, hp->mtx_hash); 318 } 319 c_mp->lru_reset = 0; 320 321 COMPQUIET(env, NULL); 322 return (0); 323} 324 325/* 326 * __memp_unpin_buffers -- 327 * Unpin buffers pinned by a thread. 328 * 329 * PUBLIC: int __memp_unpin_buffers __P((ENV *, DB_THREAD_INFO *)); 330 */ 331int 332__memp_unpin_buffers(env, ip) 333 ENV *env; 334 DB_THREAD_INFO *ip; 335{ 336 BH *bhp; 337 DB_MPOOL *dbmp; 338 DB_MPOOLFILE dbmf; 339 PIN_LIST *list, *lp; 340 REGINFO *rinfop, *reginfo; 341 int ret; 342 343 memset(&dbmf, 0, sizeof(dbmf)); 344 dbmf.env = env; 345 dbmf.flags = MP_DUMMY; 346 dbmp = env->mp_handle; 347 reginfo = env->reginfo; 348 349 list = R_ADDR(reginfo, ip->dbth_pinlist); 350 for (lp = list; lp < &list[ip->dbth_pinmax]; lp++) { 351 if (lp->b_ref == INVALID_ROFF) 352 continue; 353 rinfop = &dbmp->reginfo[lp->region]; 354 bhp = R_ADDR(rinfop, lp->b_ref); 355 dbmf.mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset); 356 if ((ret = __memp_fput(&dbmf, ip, 357 (u_int8_t *)bhp + SSZA(BH, buf), 358 DB_PRIORITY_UNCHANGED)) != 0) 359 return (ret); 360 } 361 return (0); 362} 363