1Analysis of cycle costs for SH4:
2
3-> udiv_le128:            5
4-> udiv_ge64k:            6
5-> udiv udiv_25:         10
6-> pos_divisor:           3
7-> pos_result linear:     5
8-> pos_result - -:        5
9-> div_le128:             7
10-> div_ge64k:             9
11sdivsi3 -> udiv_25             13
12udiv25 -> div_ge64k_end:       15
13div_ge64k_end -> rts:          13
14div_le128 -> div_le128_2:       2, r1 latency 3
15udiv_le128 -> div_le128_2:      2, r1 latency 3
16(u)div_le128 -> div_by_1:       9
17(u)div_le128 -> rts:           17
18div_by_1(_neg) -> rts:          4
19div_ge64k -> div_r8:            2
20div_ge64k -> div_ge64k_2:       3
21udiv_ge64k -> udiv_r8:          3
22udiv_ge64k -> div_ge64k_2:      3 + LS
23(u)div_ge64k -> div_ge64k_end: 13
24div_r8 -> div_r8_2:             2
25udiv_r8 -> div_r8_2:            2 + LS
26(u)div_r8 -> rts:              21
27
28-> - + neg_result:             5
29-> + - neg_result:             5
30-> div_le128_neg:              7
31-> div_ge64k_neg:              9
32-> div_r8_neg:                11
33-> <64k div_ge64k_neg_end:    28
34-> >=64k div_ge64k_neg_end:   22
35div_ge64k_neg_end ft -> rts:  14
36div_r8_neg_end -> rts:         4
37div_r8_neg -> div_r8_neg_end: 18
38div_le128_neg -> div_by_1_neg: 4
39div_le128_neg -> rts          18
40
41         sh4-200    absolute divisor range:
42            1  [2..128]  [129..64K) [64K..|dividend|/256] >=64K,>|dividend/256|
43udiv       18     22         38            32                   30
44sdiv pos:  20     24         41            35                   32
45sdiv neg:  15     25         42            36                   33
46
47         sh4-300    absolute divisor range:
48                 8 bit      16 bit       24 bit              > 24 bit
49udiv              15         35            28                   25
50sdiv              14         36            34                   31
51
52
53fp-based:
54
55unsigned: 42 + 3 + 3 (lingering ftrc latency + sts fpul,rx) at caller's site
56signed: 33 + 3 + 3 (lingering ftrc latency + sts fpul,rx) at caller's site
57
58call-div1:    divisor range:
59              [1..64K)  >= 64K
60unsigned:       63        58
61signed:         76        76
62
63SFUNC_STATIC call overhead:
64mov.l 0f,r1
65bsrf r1
66
67SFUNC_GOT call overhead - current:
68mov.l 0f,r1
69mova 0f,r0
70mov.l 1f,r2
71add r1,r0
72mov.l @(r0,r2),r0
73jmp @r0
74; 3 cycles worse than SFUNC_STATIC
75
76SFUNC_GOT call overhead - improved assembler:
77mov.l 0f,r1
78mova 0f,r0
79mov.l @(r0,r1),r0
80jmp @r0
81; 2 cycles worse than SFUNC_STATIC
82
83
84Copyright (C) 2006-2020 Free Software Foundation, Inc.
85
86Copying and distribution of this file, with or without modification,
87are permitted in any medium without royalty provided the copyright
88notice and this notice are preserved.
89