1226584Sdim//====--------------- lib/Support/BlockFrequency.cpp -----------*- C++ -*-====//
2226584Sdim//
3226584Sdim//                     The LLVM Compiler Infrastructure
4226584Sdim//
5226584Sdim// This file is distributed under the University of Illinois Open Source
6226584Sdim// License. See LICENSE.TXT for details.
7226584Sdim//
8226584Sdim//===----------------------------------------------------------------------===//
9226584Sdim//
10226584Sdim// This file implements Block Frequency class.
11226584Sdim//
12226584Sdim//===----------------------------------------------------------------------===//
13226584Sdim
14226584Sdim#include "llvm/Support/BranchProbability.h"
15226584Sdim#include "llvm/Support/BlockFrequency.h"
16226584Sdim#include "llvm/Support/raw_ostream.h"
17226584Sdim#include <cassert>
18226584Sdim
19226584Sdimusing namespace llvm;
20226584Sdim
21263508Sdim/// Multiply FREQ by N and store result in W array.
22263508Sdimstatic void mult96bit(uint64_t freq, uint32_t N, uint32_t W[3]) {
23226584Sdim  uint64_t u0 = freq & UINT32_MAX;
24226584Sdim  uint64_t u1 = freq >> 32;
25226584Sdim
26263508Sdim  // Represent 96-bit value as W[2]:W[1]:W[0];
27226584Sdim  uint64_t t = u0 * N;
28226584Sdim  uint64_t k = t >> 32;
29263508Sdim  W[0] = t;
30226584Sdim  t = u1 * N + k;
31263508Sdim  W[1] = t;
32263508Sdim  W[2] = t >> 32;
33226584Sdim}
34226584Sdim
35263508Sdim/// Divide 96-bit value stored in W[2]:W[1]:W[0] by D. Since our word size is a
36263508Sdim/// 32 bit unsigned integer, we can use a short division algorithm.
37263508Sdimstatic uint64_t divrem96bit(uint32_t W[3], uint32_t D, uint32_t *Rout) {
38263508Sdim  // We assume that W[2] is non-zero since if W[2] is not then the user should
39263508Sdim  // just use hardware division.
40263508Sdim  assert(W[2] && "This routine assumes that W[2] is non-zero since if W[2] is "
41263508Sdim         "zero, the caller should just use 64/32 hardware.");
42263508Sdim  uint32_t Q[3] = { 0, 0, 0 };
43226584Sdim
44263508Sdim  // The generalized short division algorithm sets i to m + n - 1, where n is
45263508Sdim  // the number of words in the divisior and m is the number of words by which
46263508Sdim  // the divident exceeds the divisor (i.e. m + n == the length of the dividend
47263508Sdim  // in words). Due to our assumption that W[2] is non-zero, we know that the
48263508Sdim  // dividend is of length 3 implying since n is 1 that m = 2. Thus we set i to
49263508Sdim  // m + n - 1 = 2 + 1 - 1 = 2.
50263508Sdim  uint32_t R = 0;
51263508Sdim  for (int i = 2; i >= 0; --i) {
52263508Sdim    uint64_t PartialD = uint64_t(R) << 32 | W[i];
53263508Sdim    if (PartialD == 0) {
54263508Sdim      Q[i] = 0;
55263508Sdim      R = 0;
56263508Sdim    } else if (PartialD < D) {
57263508Sdim      Q[i] = 0;
58263508Sdim      R = uint32_t(PartialD);
59263508Sdim    } else if (PartialD == D) {
60263508Sdim      Q[i] = 1;
61263508Sdim      R = 0;
62263508Sdim    } else {
63263508Sdim      Q[i] = uint32_t(PartialD / D);
64263508Sdim      R = uint32_t(PartialD - (Q[i] * D));
65226584Sdim    }
66226584Sdim  }
67226584Sdim
68263508Sdim  // If Q[2] is non-zero, then we overflowed.
69263508Sdim  uint64_t Result;
70263508Sdim  if (Q[2]) {
71263508Sdim    Result = UINT64_MAX;
72263508Sdim    R = D;
73263508Sdim  } else {
74263508Sdim    // Form the final uint64_t result, avoiding endianness issues.
75263508Sdim    Result = uint64_t(Q[0]) | (uint64_t(Q[1]) << 32);
76263508Sdim  }
77226584Sdim
78263508Sdim  if (Rout)
79263508Sdim    *Rout = R;
80263508Sdim
81263508Sdim  return Result;
82226584Sdim}
83226584Sdim
84263508Sdimuint32_t BlockFrequency::scale(uint32_t N, uint32_t D) {
85263508Sdim  assert(D != 0 && "Division by zero");
86226584Sdim
87263508Sdim  // Calculate Frequency * N.
88263508Sdim  uint64_t MulLo = (Frequency & UINT32_MAX) * N;
89263508Sdim  uint64_t MulHi = (Frequency >> 32) * N;
90263508Sdim  uint64_t MulRes = (MulHi << 32) + MulLo;
91226584Sdim
92263508Sdim  // If the product fits in 64 bits, just use built-in division.
93263508Sdim  if (MulHi <= UINT32_MAX && MulRes >= MulLo) {
94263508Sdim    Frequency = MulRes / D;
95263508Sdim    return MulRes % D;
96263508Sdim  }
97226584Sdim
98263508Sdim  // Product overflowed, use 96-bit operations.
99263508Sdim  // 96-bit value represented as W[2]:W[1]:W[0].
100263508Sdim  uint32_t W[3];
101263508Sdim  uint32_t R;
102263508Sdim  mult96bit(Frequency, N, W);
103263508Sdim  Frequency = divrem96bit(W, D, &R);
104263508Sdim  return R;
105263508Sdim}
106234353Sdim
107263508SdimBlockFrequency &BlockFrequency::operator*=(const BranchProbability &Prob) {
108263508Sdim  scale(Prob.getNumerator(), Prob.getDenominator());
109226584Sdim  return *this;
110226584Sdim}
111226584Sdim
112226584Sdimconst BlockFrequency
113226584SdimBlockFrequency::operator*(const BranchProbability &Prob) const {
114226584Sdim  BlockFrequency Freq(Frequency);
115226584Sdim  Freq *= Prob;
116226584Sdim  return Freq;
117226584Sdim}
118226584Sdim
119263508SdimBlockFrequency &BlockFrequency::operator/=(const BranchProbability &Prob) {
120263508Sdim  scale(Prob.getDenominator(), Prob.getNumerator());
121263508Sdim  return *this;
122263508Sdim}
123263508Sdim
124263508SdimBlockFrequency BlockFrequency::operator/(const BranchProbability &Prob) const {
125263508Sdim  BlockFrequency Freq(Frequency);
126263508Sdim  Freq /= Prob;
127263508Sdim  return Freq;
128263508Sdim}
129263508Sdim
130226584SdimBlockFrequency &BlockFrequency::operator+=(const BlockFrequency &Freq) {
131226584Sdim  uint64_t Before = Freq.Frequency;
132226584Sdim  Frequency += Freq.Frequency;
133226584Sdim
134226584Sdim  // If overflow, set frequency to the maximum value.
135226584Sdim  if (Frequency < Before)
136226584Sdim    Frequency = UINT64_MAX;
137226584Sdim
138226584Sdim  return *this;
139226584Sdim}
140226584Sdim
141226584Sdimconst BlockFrequency
142226584SdimBlockFrequency::operator+(const BlockFrequency &Prob) const {
143226584Sdim  BlockFrequency Freq(Frequency);
144226584Sdim  Freq += Prob;
145226584Sdim  return Freq;
146226584Sdim}
147226584Sdim
148263508Sdimuint32_t BlockFrequency::scale(const BranchProbability &Prob) {
149263508Sdim  return scale(Prob.getNumerator(), Prob.getDenominator());
150263508Sdim}
151263508Sdim
152226584Sdimvoid BlockFrequency::print(raw_ostream &OS) const {
153263508Sdim  // Convert fixed-point number to decimal.
154263508Sdim  OS << Frequency / getEntryFrequency() << ".";
155263508Sdim  uint64_t Rem = Frequency % getEntryFrequency();
156263508Sdim  uint64_t Eps = 1;
157263508Sdim  do {
158263508Sdim    Rem *= 10;
159263508Sdim    Eps *= 10;
160263508Sdim    OS << Rem / getEntryFrequency();
161263508Sdim    Rem = Rem % getEntryFrequency();
162263508Sdim  } while (Rem >= Eps/2);
163226584Sdim}
164226584Sdim
165226584Sdimnamespace llvm {
166226584Sdim
167226584Sdimraw_ostream &operator<<(raw_ostream &OS, const BlockFrequency &Freq) {
168226584Sdim  Freq.print(OS);
169226584Sdim  return OS;
170226584Sdim}
171226584Sdim
172226584Sdim}
173