1/*
2 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This file is part of Libav.
5 *
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#ifndef AVCODEC_X86_CABAC_H
22#define AVCODEC_X86_CABAC_H
23
24#include "libavcodec/cabac.h"
25#include "libavutil/attributes.h"
26#include "libavutil/x86_cpu.h"
27#include "config.h"
28
29#if HAVE_FAST_CMOV
30#define BRANCHLESS_GET_CABAC_UPDATE(ret, statep, low, lowword, range, tmp)\
31        "mov    "tmp"       , %%ecx     \n\t"\
32        "shl    $17         , "tmp"     \n\t"\
33        "cmp    "low"       , "tmp"     \n\t"\
34        "cmova  %%ecx       , "range"   \n\t"\
35        "sbb    %%ecx       , %%ecx     \n\t"\
36        "and    %%ecx       , "tmp"     \n\t"\
37        "xor    %%ecx       , "ret"     \n\t"\
38        "sub    "tmp"       , "low"     \n\t"
39#else /* HAVE_FAST_CMOV */
40#define BRANCHLESS_GET_CABAC_UPDATE(ret, statep, low, lowword, range, tmp)\
41        "mov    "tmp"       , %%ecx     \n\t"\
42        "shl    $17         , "tmp"     \n\t"\
43        "sub    "low"       , "tmp"     \n\t"\
44        "sar    $31         , "tmp"     \n\t" /*lps_mask*/\
45        "sub    %%ecx       , "range"   \n\t" /*RangeLPS - range*/\
46        "and    "tmp"       , "range"   \n\t" /*(RangeLPS - range)&lps_mask*/\
47        "add    %%ecx       , "range"   \n\t" /*new range*/\
48        "shl    $17         , %%ecx     \n\t"\
49        "and    "tmp"       , %%ecx     \n\t"\
50        "sub    %%ecx       , "low"     \n\t"\
51        "xor    "tmp"       , "ret"     \n\t"
52#endif /* HAVE_FAST_CMOV */
53
54#define BRANCHLESS_GET_CABAC(ret, statep, low, lowword, range, tmp, tmpbyte, byte) \
55        "movzbl "statep"    , "ret"                                     \n\t"\
56        "mov    "range"     , "tmp"                                     \n\t"\
57        "and    $0xC0       , "range"                                   \n\t"\
58        "movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\
59        "sub    "range"     , "tmp"                                     \n\t"\
60        BRANCHLESS_GET_CABAC_UPDATE(ret, statep, low, lowword, range, tmp)   \
61        "movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx          \n\t"\
62        "shl    %%cl        , "range"                                   \n\t"\
63        "movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp"          \n\t"\
64        "shl    %%cl        , "low"                                     \n\t"\
65        "mov    "tmpbyte"   , "statep"                                  \n\t"\
66        "test   "lowword"   , "lowword"                                 \n\t"\
67        " jnz   1f                                                      \n\t"\
68        "mov    "byte"      , %%"REG_c"                                 \n\t"\
69        "add"OPSIZE" $2     , "byte"                                    \n\t"\
70        "movzwl (%%"REG_c")     , "tmp"                                 \n\t"\
71        "lea    -1("low")   , %%ecx                                     \n\t"\
72        "xor    "low"       , %%ecx                                     \n\t"\
73        "shr    $15         , %%ecx                                     \n\t"\
74        "bswap  "tmp"                                                   \n\t"\
75        "shr    $15         , "tmp"                                     \n\t"\
76        "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx            \n\t"\
77        "sub    $0xFFFF     , "tmp"                                     \n\t"\
78        "neg    %%ecx                                                   \n\t"\
79        "add    $7          , %%ecx                                     \n\t"\
80        "shl    %%cl        , "tmp"                                     \n\t"\
81        "add    "tmp"       , "low"                                     \n\t"\
82        "1:                                                             \n\t"
83
84#if HAVE_7REGS && !defined(BROKEN_RELOCATIONS)
85#define get_cabac_inline get_cabac_inline_x86
86static av_always_inline int get_cabac_inline_x86(CABACContext *c,
87                                                 uint8_t *const state)
88{
89    int bit, tmp;
90
91    __asm__ volatile(
92        BRANCHLESS_GET_CABAC("%0", "(%5)", "%1", "%w1", "%2",
93                             "%3", "%b3", "%4")
94        :"=&r"(bit), "+&r"(c->low), "+&r"(c->range), "=&q"(tmp),
95         "+m"(c->bytestream)
96        :"r"(state)
97        : "%"REG_c, "memory"
98    );
99    return bit & 1;
100}
101#endif /* HAVE_7REGS && !defined(BROKEN_RELOCATIONS) */
102
103#define get_cabac_bypass_sign get_cabac_bypass_sign_x86
104static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
105{
106    x86_reg tmp;
107    __asm__ volatile(
108        "movl %4, %k1                           \n\t"
109        "movl %2, %%eax                         \n\t"
110        "shl $17, %k1                           \n\t"
111        "add %%eax, %%eax                       \n\t"
112        "sub %k1, %%eax                         \n\t"
113        "cltd                                   \n\t"
114        "and %%edx, %k1                         \n\t"
115        "add %k1, %%eax                         \n\t"
116        "xor %%edx, %%ecx                       \n\t"
117        "sub %%edx, %%ecx                       \n\t"
118        "test %%ax, %%ax                        \n\t"
119        " jnz 1f                                \n\t"
120        "mov  %3, %1                            \n\t"
121        "subl $0xFFFF, %%eax                    \n\t"
122        "movzwl (%1), %%edx                     \n\t"
123        "bswap %%edx                            \n\t"
124        "shrl $15, %%edx                        \n\t"
125        "add  $2, %1                            \n\t"
126        "addl %%edx, %%eax                      \n\t"
127        "mov  %1, %3                            \n\t"
128        "1:                                     \n\t"
129        "movl %%eax, %2                         \n\t"
130
131        :"+c"(val), "=&r"(tmp), "+m"(c->low), "+m"(c->bytestream)
132        :"m"(c->range)
133        : "%eax", "%edx"
134    );
135    return val;
136}
137
138#endif /* AVCODEC_X86_CABAC_H */
139