BlockFrequency.cpp (234353) | BlockFrequency.cpp (261991) |
---|---|
1//====--------------- lib/Support/BlockFrequency.cpp -----------*- C++ -*-====// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// --- 4 unchanged lines hidden (view full) --- 13 14#include "llvm/Support/BranchProbability.h" 15#include "llvm/Support/BlockFrequency.h" 16#include "llvm/Support/raw_ostream.h" 17#include <cassert> 18 19using namespace llvm; 20 | 1//====--------------- lib/Support/BlockFrequency.cpp -----------*- C++ -*-====// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// --- 4 unchanged lines hidden (view full) --- 13 14#include "llvm/Support/BranchProbability.h" 15#include "llvm/Support/BlockFrequency.h" 16#include "llvm/Support/raw_ostream.h" 17#include <cassert> 18 19using namespace llvm; 20 |
21namespace { 22 23/// mult96bit - Multiply FREQ by N and store result in W array. 24void mult96bit(uint64_t freq, uint32_t N, uint64_t W[2]) { | 21/// Multiply FREQ by N and store result in W array. 22static void mult96bit(uint64_t freq, uint32_t N, uint32_t W[3]) { |
25 uint64_t u0 = freq & UINT32_MAX; 26 uint64_t u1 = freq >> 32; 27 | 23 uint64_t u0 = freq & UINT32_MAX; 24 uint64_t u1 = freq >> 32; 25 |
28 // Represent 96-bit value as w[2]:w[1]:w[0]; 29 uint32_t w[3] = { 0, 0, 0 }; 30 | 26 // Represent 96-bit value as W[2]:W[1]:W[0]; |
31 uint64_t t = u0 * N; 32 uint64_t k = t >> 32; | 27 uint64_t t = u0 * N; 28 uint64_t k = t >> 32; |
33 w[0] = t; | 29 W[0] = t; |
34 t = u1 * N + k; | 30 t = u1 * N + k; |
35 w[1] = t; 36 w[2] = t >> 32; 37 38 // W[1] - higher bits. 39 // W[0] - lower bits. 40 W[0] = w[0] + ((uint64_t) w[1] << 32); 41 W[1] = w[2]; | 31 W[1] = t; 32 W[2] = t >> 32; |
42} 43 | 33} 34 |
35/// Divide 96-bit value stored in W[2]:W[1]:W[0] by D. Since our word size is a 36/// 32 bit unsigned integer, we can use a short division algorithm. 37static uint64_t divrem96bit(uint32_t W[3], uint32_t D, uint32_t *Rout) { 38 // We assume that W[2] is non-zero since if W[2] is not then the user should 39 // just use hardware division. 40 assert(W[2] && "This routine assumes that W[2] is non-zero since if W[2] is " 41 "zero, the caller should just use 64/32 hardware."); 42 uint32_t Q[3] = { 0, 0, 0 }; |
|
44 | 43 |
45/// div96bit - Divide 96-bit value stored in W array by D. Return 64-bit frequency. 46uint64_t div96bit(uint64_t W[2], uint32_t D) { 47 uint64_t y = W[0]; 48 uint64_t x = W[1]; 49 int i; 50 51 for (i = 1; i <= 64 && x; ++i) { 52 uint32_t t = (int)x >> 31; 53 x = (x << 1) | (y >> 63); 54 y = y << 1; 55 if ((x | t) >= D) { 56 x -= D; 57 ++y; | 44 // The generalized short division algorithm sets i to m + n - 1, where n is 45 // the number of words in the divisior and m is the number of words by which 46 // the divident exceeds the divisor (i.e. m + n == the length of the dividend 47 // in words). Due to our assumption that W[2] is non-zero, we know that the 48 // dividend is of length 3 implying since n is 1 that m = 2. Thus we set i to 49 // m + n - 1 = 2 + 1 - 1 = 2. 50 uint32_t R = 0; 51 for (int i = 2; i >= 0; --i) { 52 uint64_t PartialD = uint64_t(R) << 32 | W[i]; 53 if (PartialD == 0) { 54 Q[i] = 0; 55 R = 0; 56 } else if (PartialD < D) { 57 Q[i] = 0; 58 R = uint32_t(PartialD); 59 } else if (PartialD == D) { 60 Q[i] = 1; 61 R = 0; 62 } else { 63 Q[i] = uint32_t(PartialD / D); 64 R = uint32_t(PartialD - (Q[i] * D)); |
58 } 59 } 60 | 65 } 66 } 67 |
61 return y << (64 - i + 1); 62} | 68 // If Q[2] is non-zero, then we overflowed. 69 uint64_t Result; 70 if (Q[2]) { 71 Result = UINT64_MAX; 72 R = D; 73 } else { 74 // Form the final uint64_t result, avoiding endianness issues. 75 Result = uint64_t(Q[0]) | (uint64_t(Q[1]) << 32); 76 } |
63 | 77 |
78 if (Rout) 79 *Rout = R; 80 81 return Result; |
|
64} 65 | 82} 83 |
84uint32_t BlockFrequency::scale(uint32_t N, uint32_t D) { 85 assert(D != 0 && "Division by zero"); |
|
66 | 86 |
67BlockFrequency &BlockFrequency::operator*=(const BranchProbability &Prob) { 68 uint32_t n = Prob.getNumerator(); 69 uint32_t d = Prob.getDenominator(); | 87 // Calculate Frequency * N. 88 uint64_t MulLo = (Frequency & UINT32_MAX) * N; 89 uint64_t MulHi = (Frequency >> 32) * N; 90 uint64_t MulRes = (MulHi << 32) + MulLo; |
70 | 91 |
71 assert(n <= d && "Probability must be less or equal to 1."); 72 73 // Calculate Frequency * n. 74 uint64_t mulLo = (Frequency & UINT32_MAX) * n; 75 uint64_t mulHi = (Frequency >> 32) * n; 76 uint64_t mulRes = (mulHi << 32) + mulLo; 77 78 // If there was overflow use 96-bit operations. 79 if (mulHi > UINT32_MAX || mulRes < mulLo) { 80 // 96-bit value represented as W[1]:W[0]. 81 uint64_t W[2]; 82 83 // Probability is less or equal to 1 which means that results must fit 84 // 64-bit. 85 mult96bit(Frequency, n, W); 86 Frequency = div96bit(W, d); 87 return *this; | 92 // If the product fits in 64 bits, just use built-in division. 93 if (MulHi <= UINT32_MAX && MulRes >= MulLo) { 94 Frequency = MulRes / D; 95 return MulRes % D; |
88 } 89 | 96 } 97 |
90 Frequency = mulRes / d; | 98 // Product overflowed, use 96-bit operations. 99 // 96-bit value represented as W[2]:W[1]:W[0]. 100 uint32_t W[3]; 101 uint32_t R; 102 mult96bit(Frequency, N, W); 103 Frequency = divrem96bit(W, D, &R); 104 return R; 105} 106 107BlockFrequency &BlockFrequency::operator*=(const BranchProbability &Prob) { 108 scale(Prob.getNumerator(), Prob.getDenominator()); |
91 return *this; 92} 93 94const BlockFrequency 95BlockFrequency::operator*(const BranchProbability &Prob) const { 96 BlockFrequency Freq(Frequency); 97 Freq *= Prob; 98 return Freq; 99} 100 | 109 return *this; 110} 111 112const BlockFrequency 113BlockFrequency::operator*(const BranchProbability &Prob) const { 114 BlockFrequency Freq(Frequency); 115 Freq *= Prob; 116 return Freq; 117} 118 |
119BlockFrequency &BlockFrequency::operator/=(const BranchProbability &Prob) { 120 scale(Prob.getDenominator(), Prob.getNumerator()); 121 return *this; 122} 123 124BlockFrequency BlockFrequency::operator/(const BranchProbability &Prob) const { 125 BlockFrequency Freq(Frequency); 126 Freq /= Prob; 127 return Freq; 128} 129 |
|
101BlockFrequency &BlockFrequency::operator+=(const BlockFrequency &Freq) { 102 uint64_t Before = Freq.Frequency; 103 Frequency += Freq.Frequency; 104 105 // If overflow, set frequency to the maximum value. 106 if (Frequency < Before) 107 Frequency = UINT64_MAX; 108 109 return *this; 110} 111 112const BlockFrequency 113BlockFrequency::operator+(const BlockFrequency &Prob) const { 114 BlockFrequency Freq(Frequency); 115 Freq += Prob; 116 return Freq; 117} 118 | 130BlockFrequency &BlockFrequency::operator+=(const BlockFrequency &Freq) { 131 uint64_t Before = Freq.Frequency; 132 Frequency += Freq.Frequency; 133 134 // If overflow, set frequency to the maximum value. 135 if (Frequency < Before) 136 Frequency = UINT64_MAX; 137 138 return *this; 139} 140 141const BlockFrequency 142BlockFrequency::operator+(const BlockFrequency &Prob) const { 143 BlockFrequency Freq(Frequency); 144 Freq += Prob; 145 return Freq; 146} 147 |
148uint32_t BlockFrequency::scale(const BranchProbability &Prob) { 149 return scale(Prob.getNumerator(), Prob.getDenominator()); 150} 151 |
|
119void BlockFrequency::print(raw_ostream &OS) const { | 152void BlockFrequency::print(raw_ostream &OS) const { |
120 OS << Frequency; | 153 // Convert fixed-point number to decimal. 154 OS << Frequency / getEntryFrequency() << "."; 155 uint64_t Rem = Frequency % getEntryFrequency(); 156 uint64_t Eps = 1; 157 do { 158 Rem *= 10; 159 Eps *= 10; 160 OS << Rem / getEntryFrequency(); 161 Rem = Rem % getEntryFrequency(); 162 } while (Rem >= Eps/2); |
121} 122 123namespace llvm { 124 125raw_ostream &operator<<(raw_ostream &OS, const BlockFrequency &Freq) { 126 Freq.print(OS); 127 return OS; 128} 129 130} | 163} 164 165namespace llvm { 166 167raw_ostream &operator<<(raw_ostream &OS, const BlockFrequency &Freq) { 168 Freq.print(OS); 169 return OS; 170} 171 172} |