1/*
2 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#ifndef AVCODEC_X86_CABAC_H
22#define AVCODEC_X86_CABAC_H
23
24#include "libavcodec/cabac.h"
25#include "libavutil/attributes.h"
26#include "libavutil/macros.h"
27#include "libavutil/x86/asm.h"
28#include "config.h"
29
30#if   (defined(__i386) && defined(__clang__) && (__clang_major__<2 || (__clang_major__==2 && __clang_minor__<10)))\
31   || (                  !defined(__clang__) && defined(__llvm__) && __GNUC__==4 && __GNUC_MINOR__==2 && __GNUC_PATCHLEVEL__<=1)\
32   || (defined(__INTEL_COMPILER) && defined(_MSC_VER))
33#       define BROKEN_COMPILER 1
34#else
35#       define BROKEN_COMPILER 0
36#endif
37
38#if HAVE_INLINE_ASM
39
40#ifndef UNCHECKED_BITSTREAM_READER
41#define UNCHECKED_BITSTREAM_READER !CONFIG_SAFE_BITSTREAM_READER
42#endif
43
44#if UNCHECKED_BITSTREAM_READER
45#define END_CHECK(end) ""
46#else
47#define END_CHECK(end) \
48        "cmp    "end"       , %%"REG_c"                                 \n\t"\
49        "jge    1f                                                      \n\t"
50#endif
51
52#ifdef BROKEN_RELOCATIONS
53#define TABLES_ARG , "r"(tables)
54
55#if HAVE_FAST_CMOV
56#define BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \
57        "cmp    "low"       , "tmp"                        \n\t"\
58        "cmova  %%ecx       , "range"                      \n\t"\
59        "sbb    %%rcx       , %%rcx                        \n\t"\
60        "and    %%ecx       , "tmp"                        \n\t"\
61        "xor    %%rcx       , "retq"                       \n\t"\
62        "sub    "tmp"       , "low"                        \n\t"
63#else /* HAVE_FAST_CMOV */
64#define BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \
65/* P4 Prescott has crappy cmov,sbb,64bit shift so avoid them */ \
66        "sub    "low"       , "tmp"                        \n\t"\
67        "sar    $31         , "tmp"                        \n\t"\
68        "sub    %%ecx       , "range"                      \n\t"\
69        "and    "tmp"       , "range"                      \n\t"\
70        "add    %%ecx       , "range"                      \n\t"\
71        "shl    $17         , %%ecx                        \n\t"\
72        "and    "tmp"       , %%ecx                        \n\t"\
73        "sub    %%ecx       , "low"                        \n\t"\
74        "xor    "tmp"       , "ret"                        \n\t"\
75        "movslq "ret"       , "retq"                       \n\t"
76#endif /* HAVE_FAST_CMOV */
77
78#define BRANCHLESS_GET_CABAC(ret, retq, statep, low, lowword, range, rangeq, tmp, tmpbyte, byte, end, norm_off, lps_off, mlps_off, tables) \
79        "movzbl "statep"    , "ret"                                     \n\t"\
80        "mov    "range"     , "tmp"                                     \n\t"\
81        "and    $0xC0       , "range"                                   \n\t"\
82        "lea    ("ret", "range", 2), %%ecx                              \n\t"\
83        "movzbl "lps_off"("tables", %%rcx), "range"                     \n\t"\
84        "sub    "range"     , "tmp"                                     \n\t"\
85        "mov    "tmp"       , %%ecx                                     \n\t"\
86        "shl    $17         , "tmp"                                     \n\t"\
87        BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp)              \
88        "movzbl "norm_off"("tables", "rangeq"), %%ecx                   \n\t"\
89        "shl    %%cl        , "range"                                   \n\t"\
90        "movzbl "mlps_off"+128("tables", "retq"), "tmp"                 \n\t"\
91        "shl    %%cl        , "low"                                     \n\t"\
92        "mov    "tmpbyte"   , "statep"                                  \n\t"\
93        "test   "lowword"   , "lowword"                                 \n\t"\
94        "jnz    2f                                                      \n\t"\
95        "mov    "byte"      , %%"REG_c"                                 \n\t"\
96        END_CHECK(end)\
97        "add"OPSIZE" $2     , "byte"                                    \n\t"\
98        "1:                                                             \n\t"\
99        "movzwl (%%"REG_c") , "tmp"                                     \n\t"\
100        "lea    -1("low")   , %%ecx                                     \n\t"\
101        "xor    "low"       , %%ecx                                     \n\t"\
102        "shr    $15         , %%ecx                                     \n\t"\
103        "bswap  "tmp"                                                   \n\t"\
104        "shr    $15         , "tmp"                                     \n\t"\
105        "movzbl "norm_off"("tables", %%rcx), %%ecx                      \n\t"\
106        "sub    $0xFFFF     , "tmp"                                     \n\t"\
107        "neg    %%ecx                                                   \n\t"\
108        "add    $7          , %%ecx                                     \n\t"\
109        "shl    %%cl        , "tmp"                                     \n\t"\
110        "add    "tmp"       , "low"                                     \n\t"\
111        "2:                                                             \n\t"
112
113#else /* BROKEN_RELOCATIONS */
114#define TABLES_ARG NAMED_CONSTRAINTS_ARRAY_ADD(ff_h264_cabac_tables)
115#define RIP_ARG
116
117#if HAVE_FAST_CMOV
118#define BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)\
119        "mov    "tmp"       , %%ecx     \n\t"\
120        "shl    $17         , "tmp"     \n\t"\
121        "cmp    "low"       , "tmp"     \n\t"\
122        "cmova  %%ecx       , "range"   \n\t"\
123        "sbb    %%ecx       , %%ecx     \n\t"\
124        "and    %%ecx       , "tmp"     \n\t"\
125        "xor    %%ecx       , "ret"     \n\t"\
126        "sub    "tmp"       , "low"     \n\t"
127#else /* HAVE_FAST_CMOV */
128#define BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)\
129        "mov    "tmp"       , %%ecx     \n\t"\
130        "shl    $17         , "tmp"     \n\t"\
131        "sub    "low"       , "tmp"     \n\t"\
132        "sar    $31         , "tmp"     \n\t" /*lps_mask*/\
133        "sub    %%ecx       , "range"   \n\t" /*RangeLPS - range*/\
134        "and    "tmp"       , "range"   \n\t" /*(RangeLPS - range)&lps_mask*/\
135        "add    %%ecx       , "range"   \n\t" /*new range*/\
136        "shl    $17         , %%ecx     \n\t"\
137        "and    "tmp"       , %%ecx     \n\t"\
138        "sub    %%ecx       , "low"     \n\t"\
139        "xor    "tmp"       , "ret"     \n\t"
140#endif /* HAVE_FAST_CMOV */
141
142#define BRANCHLESS_GET_CABAC(ret, retq, statep, low, lowword, range, rangeq, tmp, tmpbyte, byte, end, norm_off, lps_off, mlps_off, tables) \
143        "movzbl "statep"    , "ret"                                     \n\t"\
144        "mov    "range"     , "tmp"                                     \n\t"\
145        "and    $0xC0       , "range"                                   \n\t"\
146        "movzbl "MANGLE(ff_h264_cabac_tables)"+"lps_off"("ret", "range", 2), "range" \n\t"\
147        "sub    "range"     , "tmp"                                     \n\t"\
148        BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)                    \
149        "movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"("range"), %%ecx    \n\t"\
150        "shl    %%cl        , "range"                                   \n\t"\
151        "movzbl "MANGLE(ff_h264_cabac_tables)"+"mlps_off"+128("ret"), "tmp"  \n\t"\
152        "shl    %%cl        , "low"                                     \n\t"\
153        "mov    "tmpbyte"   , "statep"                                  \n\t"\
154        "test   "lowword"   , "lowword"                                 \n\t"\
155        " jnz   2f                                                      \n\t"\
156        "mov    "byte"      , %%"REG_c"                                 \n\t"\
157        END_CHECK(end)\
158        "add"OPSIZE" $2     , "byte"                                    \n\t"\
159        "1:                                                             \n\t"\
160        "movzwl (%%"REG_c")     , "tmp"                                 \n\t"\
161        "lea    -1("low")   , %%ecx                                     \n\t"\
162        "xor    "low"       , %%ecx                                     \n\t"\
163        "shr    $15         , %%ecx                                     \n\t"\
164        "bswap  "tmp"                                                   \n\t"\
165        "shr    $15         , "tmp"                                     \n\t"\
166        "movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"(%%ecx), %%ecx \n\t"\
167        "sub    $0xFFFF     , "tmp"                                     \n\t"\
168        "neg    %%ecx                                                   \n\t"\
169        "add    $7          , %%ecx                                     \n\t"\
170        "shl    %%cl        , "tmp"                                     \n\t"\
171        "add    "tmp"       , "low"                                     \n\t"\
172        "2:                                                             \n\t"
173
174#endif /* BROKEN_RELOCATIONS */
175
176#if HAVE_7REGS && !BROKEN_COMPILER
177#define get_cabac_inline get_cabac_inline_x86
178static av_always_inline int get_cabac_inline_x86(CABACContext *c,
179                                                 uint8_t *const state)
180{
181    int bit, tmp;
182#ifdef BROKEN_RELOCATIONS
183    void *tables;
184
185    __asm__ volatile(
186        "lea    "MANGLE(ff_h264_cabac_tables)", %0      \n\t"
187        : "=&r"(tables)
188        : NAMED_CONSTRAINTS_ARRAY(ff_h264_cabac_tables)
189    );
190#endif
191
192    __asm__ volatile(
193        BRANCHLESS_GET_CABAC("%0", "%q0", "(%4)", "%1", "%w1",
194                             "%2", "%q2", "%3", "%b3",
195                             "%c6(%5)", "%c7(%5)",
196                             AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
197                             AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
198                             AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
199                             "%8")
200        : "=&r"(bit), "=&r"(c->low), "=&r"(c->range), "=&q"(tmp)
201        : "r"(state), "r"(c),
202          "i"(offsetof(CABACContext, bytestream)),
203          "i"(offsetof(CABACContext, bytestream_end))
204          TABLES_ARG
205          ,"1"(c->low), "2"(c->range)
206        : "%"REG_c, "memory"
207    );
208    return bit & 1;
209}
210#endif /* HAVE_7REGS && !BROKEN_COMPILER */
211
212#if !BROKEN_COMPILER
213#define get_cabac_bypass_sign get_cabac_bypass_sign_x86
214static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
215{
216    x86_reg tmp;
217    __asm__ volatile(
218        "movl        %c6(%2), %k1       \n\t"
219        "movl        %c3(%2), %%eax     \n\t"
220        "shl             $17, %k1       \n\t"
221        "add           %%eax, %%eax     \n\t"
222        "sub             %k1, %%eax     \n\t"
223        "cdq                            \n\t"
224        "and           %%edx, %k1       \n\t"
225        "add             %k1, %%eax     \n\t"
226        "xor           %%edx, %%ecx     \n\t"
227        "sub           %%edx, %%ecx     \n\t"
228        "test           %%ax, %%ax      \n\t"
229        "jnz              1f            \n\t"
230        "mov         %c4(%2), %1        \n\t"
231        "subl        $0xFFFF, %%eax     \n\t"
232        "movzwl         (%1), %%edx     \n\t"
233        "bswap         %%edx            \n\t"
234        "shrl            $15, %%edx     \n\t"
235#if UNCHECKED_BITSTREAM_READER
236        "add              $2, %1        \n\t"
237        "addl          %%edx, %%eax     \n\t"
238        "mov              %1, %c4(%2)   \n\t"
239#else
240        "addl          %%edx, %%eax     \n\t"
241        "cmp         %c5(%2), %1        \n\t"
242        "jge              1f            \n\t"
243        "add"OPSIZE"      $2, %c4(%2)   \n\t"
244#endif
245        "1:                             \n\t"
246        "movl          %%eax, %c3(%2)   \n\t"
247
248        : "+c"(val), "=&r"(tmp)
249        : "r"(c),
250          "i"(offsetof(CABACContext, low)),
251          "i"(offsetof(CABACContext, bytestream)),
252          "i"(offsetof(CABACContext, bytestream_end)),
253          "i"(offsetof(CABACContext, range))
254        : "%eax", "%edx", "memory"
255    );
256    return val;
257}
258
259#define get_cabac_bypass get_cabac_bypass_x86
260static av_always_inline int get_cabac_bypass_x86(CABACContext *c)
261{
262    x86_reg tmp;
263    int res;
264    __asm__ volatile(
265        "movl        %c6(%2), %k1       \n\t"
266        "movl        %c3(%2), %%eax     \n\t"
267        "shl             $17, %k1       \n\t"
268        "add           %%eax, %%eax     \n\t"
269        "sub             %k1, %%eax     \n\t"
270        "cdq                            \n\t"
271        "and           %%edx, %k1       \n\t"
272        "add             %k1, %%eax     \n\t"
273        "inc           %%edx            \n\t"
274        "test           %%ax, %%ax      \n\t"
275        "jnz              1f            \n\t"
276        "mov         %c4(%2), %1        \n\t"
277        "subl        $0xFFFF, %%eax     \n\t"
278        "movzwl         (%1), %%ecx     \n\t"
279        "bswap         %%ecx            \n\t"
280        "shrl            $15, %%ecx     \n\t"
281        "addl          %%ecx, %%eax     \n\t"
282        "cmp         %c5(%2), %1        \n\t"
283        "jge              1f            \n\t"
284        "add"OPSIZE"      $2, %c4(%2)   \n\t"
285        "1:                             \n\t"
286        "movl          %%eax, %c3(%2)   \n\t"
287
288        : "=&d"(res), "=&r"(tmp)
289        : "r"(c),
290          "i"(offsetof(CABACContext, low)),
291          "i"(offsetof(CABACContext, bytestream)),
292          "i"(offsetof(CABACContext, bytestream_end)),
293          "i"(offsetof(CABACContext, range))
294        : "%eax", "%ecx", "memory"
295    );
296    return res;
297}
298#endif /* !BROKEN_COMPILER */
299
300#endif /* HAVE_INLINE_ASM */
301#endif /* AVCODEC_X86_CABAC_H */
302