1/* Copyright (C) 2008-2015 Free Software Foundation, Inc.
2
3   This file is free software; you can redistribute it and/or modify it under
4   the terms of the GNU General Public License as published by the Free
5   Software Foundation; either version 3 of the License, or (at your option)
6   any later version.
7
8   This file is distributed in the hope that it will be useful, but WITHOUT
9   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
11   for more details.
12
13   Under Section 7 of GPL version 3, you are granted additional
14   permissions described in the GCC Runtime Library Exception, version
15   3.1, as published by the Free Software Foundation.
16
17   You should have received a copy of the GNU General Public License and
18   a copy of the GCC Runtime Library Exception along with this program;
19   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
20   <http://www.gnu.org/licenses/>.  */
21
22#include <spu_intrinsics.h>
23
24typedef unsigned int UTItype __attribute__ ((mode (TI)));
25typedef int TItype __attribute__ ((mode (TI)));
26TItype __divti3 (TItype u, TItype v);
27TItype __modti3 (TItype u, TItype v);
28UTItype __udivti3 (UTItype u, UTItype v);
29UTItype __umodti3 (UTItype u, UTItype v);
30UTItype __udivmodti4 (UTItype u, UTItype v, UTItype *w);
31
32union qword_UTItype
33  {
34    qword q;
35    UTItype t;
36  };
37
38inline static qword
39si_from_UTItype (UTItype t)
40{
41  union qword_UTItype u;
42  u.t = t;
43  return u.q;
44}
45
46inline static UTItype
47si_to_UTItype (qword q)
48{
49  union qword_UTItype u;
50  u.q = q;
51  return u.t;
52}
53
54inline static unsigned int
55count_leading_zeros (UTItype x)
56{
57  qword c = si_clz (*(qword *) & x);
58  qword cmp0 = si_cgti (c, 31);
59  qword cmp1 = si_and (cmp0, si_shlqbyi (cmp0, 4));
60  qword cmp2 = si_and (cmp1, si_shlqbyi (cmp0, 8));
61  qword s = si_a (c, si_and (cmp0, si_shlqbyi (c, 4)));
62  s = si_a (s, si_and (cmp1, si_shlqbyi (c, 8)));
63  s = si_a (s, si_and (cmp2, si_shlqbyi (c, 12)));
64  return si_to_uint (s);
65}
66
67/* Based on implementation of udivmodsi4, which is essentially
68 * an optimized version of libgcc/udivmodsi4.c
69        clz      %7,%2
70        clz      %4,%1
71        il       %5,1
72        fsmbi    %0,0
73        sf       %7,%4,%7
74        ori      %3,%1,0
75        shl      %5,%5,%7
76        shl      %4,%2,%7
771:      or       %8,%0,%5
78        rotmi    %5,%5,-1
79        clgt     %6,%4,%3
80        sf       %7,%4,%3
81        rotmi    %4,%4,-1
82        selb     %0,%8,%0,%6
83        selb     %3,%7,%3,%6
843:      brnz     %5,1b
85 */
86
87UTItype
88__udivmodti4 (UTItype num, UTItype den, UTItype * rp)
89{
90  qword shift =
91    si_from_uint (count_leading_zeros (den) - count_leading_zeros (num));
92  qword n0 = si_from_UTItype (num);
93  qword d0 = si_from_UTItype (den);
94  qword bit = si_andi (si_fsmbi (1), 1);
95  qword r0 = si_il (0);
96  qword m1 = si_fsmbi (0x000f);
97  qword mask, r1, n1;
98
99  d0 = si_shlqbybi (si_shlqbi (d0, shift), shift);
100  bit = si_shlqbybi (si_shlqbi (bit, shift), shift);
101
102  do
103    {
104      r1 = si_or (r0, bit);
105
106      // n1 = n0 - d0 in TImode
107      n1 = si_bg (d0, n0);
108      n1 = si_shlqbyi (n1, 4);
109      n1 = si_sf (m1, n1);
110      n1 = si_bgx (d0, n0, n1);
111      n1 = si_shlqbyi (n1, 4);
112      n1 = si_sf (m1, n1);
113      n1 = si_bgx (d0, n0, n1);
114      n1 = si_shlqbyi (n1, 4);
115      n1 = si_sf (m1, n1);
116      n1 = si_sfx (d0, n0, n1);
117
118      mask = si_fsm (si_cgti (n1, -1));
119      r0 = si_selb (r0, r1, mask);
120      n0 = si_selb (n0, n1, mask);
121      bit = si_rotqmbii (bit, -1);
122      d0 = si_rotqmbii (d0, -1);
123    }
124  while (si_to_uint (si_orx (bit)));
125  if (rp)
126    *rp = si_to_UTItype (n0);
127  return si_to_UTItype (r0);
128}
129
130UTItype
131__udivti3 (UTItype n, UTItype d)
132{
133  return __udivmodti4 (n, d, (UTItype *)0);
134}
135
136UTItype
137__umodti3 (UTItype n, UTItype d)
138{
139  UTItype w;
140  __udivmodti4 (n, d, &w);
141  return w;
142}
143
144TItype
145__divti3 (TItype n, TItype d)
146{
147  int c = 0;
148  TItype w;
149
150  if (n < 0)
151    {
152        c = ~c;
153        n = -n;
154    }
155  if (d < 0)
156    {
157        c = ~c;
158        d = -d;
159    }
160
161  w = __udivmodti4 (n, d, (UTItype *)0);
162  if (c)
163    w = -w;
164  return w;
165}
166
167TItype
168__modti3 (TItype n, TItype d)
169{
170  int c = 0;
171  TItype w;
172
173  if (n < 0)
174    {
175        c = ~c;
176        n = -n;
177    }
178  if (d < 0)
179    {
180        c = ~c;
181        d = -d;
182    }
183
184  __udivmodti4 (n, d, (UTItype *) &w);
185  if (c)
186    w = -w;
187  return w;
188}
189