1/* 2 * Floating proportions 3 * 4 * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> 5 * 6 * Description: 7 * 8 * The floating proportion is a time derivative with an exponentially decaying 9 * history: 10 * 11 * p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i) 12 * 13 * Where j is an element from {prop_local}, x_{j} is j's number of events, 14 * and i the time period over which the differential is taken. So d/dt_{-i} is 15 * the differential over the i-th last period. 16 * 17 * The decaying history gives smooth transitions. The time differential carries 18 * the notion of speed. 19 * 20 * The denominator is 2^(1+i) because we want the series to be normalised, ie. 21 * 22 * \Sum_{i=0} 1/2^(1+i) = 1 23 * 24 * Further more, if we measure time (t) in the same events as x; so that: 25 * 26 * t = \Sum_{j} x_{j} 27 * 28 * we get that: 29 * 30 * \Sum_{j} p_{j} = 1 31 * 32 * Writing this in an iterative fashion we get (dropping the 'd's): 33 * 34 * if (++x_{j}, ++t > period) 35 * t /= 2; 36 * for_each (j) 37 * x_{j} /= 2; 38 * 39 * so that: 40 * 41 * p_{j} = x_{j} / t; 42 * 43 * We optimize away the '/= 2' for the global time delta by noting that: 44 * 45 * if (++t > period) t /= 2: 46 * 47 * Can be approximated by: 48 * 49 * period/2 + (++t % period/2) 50 * 51 * [ Furthermore, when we choose period to be 2^n it can be written in terms of 52 * binary operations and wraparound artefacts disappear. ] 53 * 54 * Also note that this yields a natural counter of the elapsed periods: 55 * 56 * c = t / (period/2) 57 * 58 * [ Its monotonic increasing property can be applied to mitigate the wrap- 59 * around issue. ] 60 * 61 * This allows us to do away with the loop over all prop_locals on each period 62 * expiration. By remembering the period count under which it was last accessed 63 * as c_{j}, we can obtain the number of 'missed' cycles from: 64 * 65 * c - c_{j} 66 * 67 * We can then lazily catch up to the global period count every time we are 68 * going to use x_{j}, by doing: 69 * 70 * x_{j} /= 2^(c - c_{j}), c_{j} = c 71 */ 72 73#include <linux/proportions.h> 74#include <linux/rcupdate.h> 75 76int prop_descriptor_init(struct prop_descriptor *pd, int shift) 77{ 78 int err; 79 80 if (shift > PROP_MAX_SHIFT) 81 shift = PROP_MAX_SHIFT; 82 83 pd->index = 0; 84 pd->pg[0].shift = shift; 85 mutex_init(&pd->mutex); 86 err = percpu_counter_init(&pd->pg[0].events, 0); 87 if (err) 88 goto out; 89 90 err = percpu_counter_init(&pd->pg[1].events, 0); 91 if (err) 92 percpu_counter_destroy(&pd->pg[0].events); 93 94out: 95 return err; 96} 97 98/* 99 * We have two copies, and flip between them to make it seem like an atomic 100 * update. The update is not really atomic wrt the events counter, but 101 * it is internally consistent with the bit layout depending on shift. 102 * 103 * We copy the events count, move the bits around and flip the index. 104 */ 105void prop_change_shift(struct prop_descriptor *pd, int shift) 106{ 107 int index; 108 int offset; 109 u64 events; 110 unsigned long flags; 111 112 if (shift > PROP_MAX_SHIFT) 113 shift = PROP_MAX_SHIFT; 114 115 mutex_lock(&pd->mutex); 116 117 index = pd->index ^ 1; 118 offset = pd->pg[pd->index].shift - shift; 119 if (!offset) 120 goto out; 121 122 pd->pg[index].shift = shift; 123 124 local_irq_save(flags); 125 events = percpu_counter_sum(&pd->pg[pd->index].events); 126 if (offset < 0) 127 events <<= -offset; 128 else 129 events >>= offset; 130 percpu_counter_set(&pd->pg[index].events, events); 131 132 /* 133 * ensure the new pg is fully written before the switch 134 */ 135 smp_wmb(); 136 pd->index = index; 137 local_irq_restore(flags); 138 139 synchronize_rcu(); 140 141out: 142 mutex_unlock(&pd->mutex); 143} 144 145/* 146 * wrap the access to the data in an rcu_read_lock() section; 147 * this is used to track the active references. 148 */ 149static struct prop_global *prop_get_global(struct prop_descriptor *pd) 150__acquires(RCU) 151{ 152 int index; 153 154 rcu_read_lock(); 155 index = pd->index; 156 /* 157 * match the wmb from vcd_flip() 158 */ 159 smp_rmb(); 160 return &pd->pg[index]; 161} 162 163static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg) 164__releases(RCU) 165{ 166 rcu_read_unlock(); 167} 168 169static void 170prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift) 171{ 172 int offset = *pl_shift - new_shift; 173 174 if (!offset) 175 return; 176 177 if (offset < 0) 178 *pl_period <<= -offset; 179 else 180 *pl_period >>= offset; 181 182 *pl_shift = new_shift; 183} 184 185/* 186 * PERCPU 187 */ 188 189#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids))) 190 191int prop_local_init_percpu(struct prop_local_percpu *pl) 192{ 193 spin_lock_init(&pl->lock); 194 pl->shift = 0; 195 pl->period = 0; 196 return percpu_counter_init(&pl->events, 0); 197} 198 199void prop_local_destroy_percpu(struct prop_local_percpu *pl) 200{ 201 percpu_counter_destroy(&pl->events); 202} 203 204/* 205 * Catch up with missed period expirations. 206 * 207 * until (c_{j} == c) 208 * x_{j} -= x_{j}/2; 209 * c_{j}++; 210 */ 211static 212void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl) 213{ 214 unsigned long period = 1UL << (pg->shift - 1); 215 unsigned long period_mask = ~(period - 1); 216 unsigned long global_period; 217 unsigned long flags; 218 219 global_period = percpu_counter_read(&pg->events); 220 global_period &= period_mask; 221 222 /* 223 * Fast path - check if the local and global period count still match 224 * outside of the lock. 225 */ 226 if (pl->period == global_period) 227 return; 228 229 spin_lock_irqsave(&pl->lock, flags); 230 prop_adjust_shift(&pl->shift, &pl->period, pg->shift); 231 232 /* 233 * For each missed period, we half the local counter. 234 * basically: 235 * pl->events >> (global_period - pl->period); 236 */ 237 period = (global_period - pl->period) >> (pg->shift - 1); 238 if (period < BITS_PER_LONG) { 239 s64 val = percpu_counter_read(&pl->events); 240 241 if (val < (nr_cpu_ids * PROP_BATCH)) 242 val = percpu_counter_sum(&pl->events); 243 244 __percpu_counter_add(&pl->events, -val + (val >> period), 245 PROP_BATCH); 246 } else 247 percpu_counter_set(&pl->events, 0); 248 249 pl->period = global_period; 250 spin_unlock_irqrestore(&pl->lock, flags); 251} 252 253/* 254 * ++x_{j}, ++t 255 */ 256void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl) 257{ 258 struct prop_global *pg = prop_get_global(pd); 259 260 prop_norm_percpu(pg, pl); 261 __percpu_counter_add(&pl->events, 1, PROP_BATCH); 262 percpu_counter_add(&pg->events, 1); 263 prop_put_global(pd, pg); 264} 265 266/* 267 * identical to __prop_inc_percpu, except that it limits this pl's fraction to 268 * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded. 269 */ 270void __prop_inc_percpu_max(struct prop_descriptor *pd, 271 struct prop_local_percpu *pl, long frac) 272{ 273 struct prop_global *pg = prop_get_global(pd); 274 275 prop_norm_percpu(pg, pl); 276 277 if (unlikely(frac != PROP_FRAC_BASE)) { 278 unsigned long period_2 = 1UL << (pg->shift - 1); 279 unsigned long counter_mask = period_2 - 1; 280 unsigned long global_count; 281 long numerator, denominator; 282 283 numerator = percpu_counter_read_positive(&pl->events); 284 global_count = percpu_counter_read(&pg->events); 285 denominator = period_2 + (global_count & counter_mask); 286 287 if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT)) 288 goto out_put; 289 } 290 291 percpu_counter_add(&pl->events, 1); 292 percpu_counter_add(&pg->events, 1); 293 294out_put: 295 prop_put_global(pd, pg); 296} 297 298/* 299 * Obtain a fraction of this proportion 300 * 301 * p_{j} = x_{j} / (period/2 + t % period/2) 302 */ 303void prop_fraction_percpu(struct prop_descriptor *pd, 304 struct prop_local_percpu *pl, 305 long *numerator, long *denominator) 306{ 307 struct prop_global *pg = prop_get_global(pd); 308 unsigned long period_2 = 1UL << (pg->shift - 1); 309 unsigned long counter_mask = period_2 - 1; 310 unsigned long global_count; 311 312 prop_norm_percpu(pg, pl); 313 *numerator = percpu_counter_read_positive(&pl->events); 314 315 global_count = percpu_counter_read(&pg->events); 316 *denominator = period_2 + (global_count & counter_mask); 317 318 prop_put_global(pd, pg); 319} 320 321/* 322 * SINGLE 323 */ 324 325int prop_local_init_single(struct prop_local_single *pl) 326{ 327 spin_lock_init(&pl->lock); 328 pl->shift = 0; 329 pl->period = 0; 330 pl->events = 0; 331 return 0; 332} 333 334void prop_local_destroy_single(struct prop_local_single *pl) 335{ 336} 337 338/* 339 * Catch up with missed period expirations. 340 */ 341static 342void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl) 343{ 344 unsigned long period = 1UL << (pg->shift - 1); 345 unsigned long period_mask = ~(period - 1); 346 unsigned long global_period; 347 unsigned long flags; 348 349 global_period = percpu_counter_read(&pg->events); 350 global_period &= period_mask; 351 352 /* 353 * Fast path - check if the local and global period count still match 354 * outside of the lock. 355 */ 356 if (pl->period == global_period) 357 return; 358 359 spin_lock_irqsave(&pl->lock, flags); 360 prop_adjust_shift(&pl->shift, &pl->period, pg->shift); 361 /* 362 * For each missed period, we half the local counter. 363 */ 364 period = (global_period - pl->period) >> (pg->shift - 1); 365 if (likely(period < BITS_PER_LONG)) 366 pl->events >>= period; 367 else 368 pl->events = 0; 369 pl->period = global_period; 370 spin_unlock_irqrestore(&pl->lock, flags); 371} 372 373/* 374 * ++x_{j}, ++t 375 */ 376void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl) 377{ 378 struct prop_global *pg = prop_get_global(pd); 379 380 prop_norm_single(pg, pl); 381 pl->events++; 382 percpu_counter_add(&pg->events, 1); 383 prop_put_global(pd, pg); 384} 385 386/* 387 * Obtain a fraction of this proportion 388 * 389 * p_{j} = x_{j} / (period/2 + t % period/2) 390 */ 391void prop_fraction_single(struct prop_descriptor *pd, 392 struct prop_local_single *pl, 393 long *numerator, long *denominator) 394{ 395 struct prop_global *pg = prop_get_global(pd); 396 unsigned long period_2 = 1UL << (pg->shift - 1); 397 unsigned long counter_mask = period_2 - 1; 398 unsigned long global_count; 399 400 prop_norm_single(pg, pl); 401 *numerator = pl->events; 402 403 global_count = percpu_counter_read(&pg->events); 404 *denominator = period_2 + (global_count & counter_mask); 405 406 prop_put_global(pd, pg); 407} 408