1226584Sdim//====--------------- lib/Support/BlockFrequency.cpp -----------*- C++ -*-====// 2226584Sdim// 3226584Sdim// The LLVM Compiler Infrastructure 4226584Sdim// 5226584Sdim// This file is distributed under the University of Illinois Open Source 6226584Sdim// License. See LICENSE.TXT for details. 7226584Sdim// 8226584Sdim//===----------------------------------------------------------------------===// 9226584Sdim// 10226584Sdim// This file implements Block Frequency class. 11226584Sdim// 12226584Sdim//===----------------------------------------------------------------------===// 13226584Sdim 14226584Sdim#include "llvm/Support/BranchProbability.h" 15226584Sdim#include "llvm/Support/BlockFrequency.h" 16226584Sdim#include "llvm/Support/raw_ostream.h" 17226584Sdim#include <cassert> 18226584Sdim 19226584Sdimusing namespace llvm; 20226584Sdim 21263508Sdim/// Multiply FREQ by N and store result in W array. 22263508Sdimstatic void mult96bit(uint64_t freq, uint32_t N, uint32_t W[3]) { 23226584Sdim uint64_t u0 = freq & UINT32_MAX; 24226584Sdim uint64_t u1 = freq >> 32; 25226584Sdim 26263508Sdim // Represent 96-bit value as W[2]:W[1]:W[0]; 27226584Sdim uint64_t t = u0 * N; 28226584Sdim uint64_t k = t >> 32; 29263508Sdim W[0] = t; 30226584Sdim t = u1 * N + k; 31263508Sdim W[1] = t; 32263508Sdim W[2] = t >> 32; 33226584Sdim} 34226584Sdim 35263508Sdim/// Divide 96-bit value stored in W[2]:W[1]:W[0] by D. Since our word size is a 36263508Sdim/// 32 bit unsigned integer, we can use a short division algorithm. 37263508Sdimstatic uint64_t divrem96bit(uint32_t W[3], uint32_t D, uint32_t *Rout) { 38263508Sdim // We assume that W[2] is non-zero since if W[2] is not then the user should 39263508Sdim // just use hardware division. 40263508Sdim assert(W[2] && "This routine assumes that W[2] is non-zero since if W[2] is " 41263508Sdim "zero, the caller should just use 64/32 hardware."); 42263508Sdim uint32_t Q[3] = { 0, 0, 0 }; 43226584Sdim 44263508Sdim // The generalized short division algorithm sets i to m + n - 1, where n is 45263508Sdim // the number of words in the divisior and m is the number of words by which 46263508Sdim // the divident exceeds the divisor (i.e. m + n == the length of the dividend 47263508Sdim // in words). Due to our assumption that W[2] is non-zero, we know that the 48263508Sdim // dividend is of length 3 implying since n is 1 that m = 2. Thus we set i to 49263508Sdim // m + n - 1 = 2 + 1 - 1 = 2. 50263508Sdim uint32_t R = 0; 51263508Sdim for (int i = 2; i >= 0; --i) { 52263508Sdim uint64_t PartialD = uint64_t(R) << 32 | W[i]; 53263508Sdim if (PartialD == 0) { 54263508Sdim Q[i] = 0; 55263508Sdim R = 0; 56263508Sdim } else if (PartialD < D) { 57263508Sdim Q[i] = 0; 58263508Sdim R = uint32_t(PartialD); 59263508Sdim } else if (PartialD == D) { 60263508Sdim Q[i] = 1; 61263508Sdim R = 0; 62263508Sdim } else { 63263508Sdim Q[i] = uint32_t(PartialD / D); 64263508Sdim R = uint32_t(PartialD - (Q[i] * D)); 65226584Sdim } 66226584Sdim } 67226584Sdim 68263508Sdim // If Q[2] is non-zero, then we overflowed. 69263508Sdim uint64_t Result; 70263508Sdim if (Q[2]) { 71263508Sdim Result = UINT64_MAX; 72263508Sdim R = D; 73263508Sdim } else { 74263508Sdim // Form the final uint64_t result, avoiding endianness issues. 75263508Sdim Result = uint64_t(Q[0]) | (uint64_t(Q[1]) << 32); 76263508Sdim } 77226584Sdim 78263508Sdim if (Rout) 79263508Sdim *Rout = R; 80263508Sdim 81263508Sdim return Result; 82226584Sdim} 83226584Sdim 84263508Sdimuint32_t BlockFrequency::scale(uint32_t N, uint32_t D) { 85263508Sdim assert(D != 0 && "Division by zero"); 86226584Sdim 87263508Sdim // Calculate Frequency * N. 88263508Sdim uint64_t MulLo = (Frequency & UINT32_MAX) * N; 89263508Sdim uint64_t MulHi = (Frequency >> 32) * N; 90263508Sdim uint64_t MulRes = (MulHi << 32) + MulLo; 91226584Sdim 92263508Sdim // If the product fits in 64 bits, just use built-in division. 93263508Sdim if (MulHi <= UINT32_MAX && MulRes >= MulLo) { 94263508Sdim Frequency = MulRes / D; 95263508Sdim return MulRes % D; 96263508Sdim } 97226584Sdim 98263508Sdim // Product overflowed, use 96-bit operations. 99263508Sdim // 96-bit value represented as W[2]:W[1]:W[0]. 100263508Sdim uint32_t W[3]; 101263508Sdim uint32_t R; 102263508Sdim mult96bit(Frequency, N, W); 103263508Sdim Frequency = divrem96bit(W, D, &R); 104263508Sdim return R; 105263508Sdim} 106234353Sdim 107263508SdimBlockFrequency &BlockFrequency::operator*=(const BranchProbability &Prob) { 108263508Sdim scale(Prob.getNumerator(), Prob.getDenominator()); 109226584Sdim return *this; 110226584Sdim} 111226584Sdim 112226584Sdimconst BlockFrequency 113226584SdimBlockFrequency::operator*(const BranchProbability &Prob) const { 114226584Sdim BlockFrequency Freq(Frequency); 115226584Sdim Freq *= Prob; 116226584Sdim return Freq; 117226584Sdim} 118226584Sdim 119263508SdimBlockFrequency &BlockFrequency::operator/=(const BranchProbability &Prob) { 120263508Sdim scale(Prob.getDenominator(), Prob.getNumerator()); 121263508Sdim return *this; 122263508Sdim} 123263508Sdim 124263508SdimBlockFrequency BlockFrequency::operator/(const BranchProbability &Prob) const { 125263508Sdim BlockFrequency Freq(Frequency); 126263508Sdim Freq /= Prob; 127263508Sdim return Freq; 128263508Sdim} 129263508Sdim 130226584SdimBlockFrequency &BlockFrequency::operator+=(const BlockFrequency &Freq) { 131226584Sdim uint64_t Before = Freq.Frequency; 132226584Sdim Frequency += Freq.Frequency; 133226584Sdim 134226584Sdim // If overflow, set frequency to the maximum value. 135226584Sdim if (Frequency < Before) 136226584Sdim Frequency = UINT64_MAX; 137226584Sdim 138226584Sdim return *this; 139226584Sdim} 140226584Sdim 141226584Sdimconst BlockFrequency 142226584SdimBlockFrequency::operator+(const BlockFrequency &Prob) const { 143226584Sdim BlockFrequency Freq(Frequency); 144226584Sdim Freq += Prob; 145226584Sdim return Freq; 146226584Sdim} 147226584Sdim 148263508Sdimuint32_t BlockFrequency::scale(const BranchProbability &Prob) { 149263508Sdim return scale(Prob.getNumerator(), Prob.getDenominator()); 150263508Sdim} 151263508Sdim 152226584Sdimvoid BlockFrequency::print(raw_ostream &OS) const { 153263508Sdim // Convert fixed-point number to decimal. 154263508Sdim OS << Frequency / getEntryFrequency() << "."; 155263508Sdim uint64_t Rem = Frequency % getEntryFrequency(); 156263508Sdim uint64_t Eps = 1; 157263508Sdim do { 158263508Sdim Rem *= 10; 159263508Sdim Eps *= 10; 160263508Sdim OS << Rem / getEntryFrequency(); 161263508Sdim Rem = Rem % getEntryFrequency(); 162263508Sdim } while (Rem >= Eps/2); 163226584Sdim} 164226584Sdim 165226584Sdimnamespace llvm { 166226584Sdim 167226584Sdimraw_ostream &operator<<(raw_ostream &OS, const BlockFrequency &Freq) { 168226584Sdim Freq.print(OS); 169226584Sdim return OS; 170226584Sdim} 171226584Sdim 172226584Sdim} 173