1/* 2 * quarterpel DSP functions 3 * Copyright (c) 2000, 2001 Fabrice Bellard 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> 5 * 6 * This file is part of FFmpeg. 7 * 8 * FFmpeg is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public 10 * License as published by the Free Software Foundation; either 11 * version 2.1 of the License, or (at your option) any later version. 12 * 13 * FFmpeg is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with FFmpeg; if not, write to the Free Software 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21 */ 22 23/** 24 * @file 25 * quarterpel DSP functions 26 */ 27 28#include <stddef.h> 29#include <stdint.h> 30 31#include "config.h" 32#include "libavutil/attributes.h" 33#include "copy_block.h" 34#include "qpeldsp.h" 35#include "diracdsp.h" 36 37#define BIT_DEPTH 8 38#include "hpel_template.c" 39#include "pel_template.c" 40#include "qpel_template.c" 41 42#define QPEL_MC(r, OPNAME, RND, OP) \ 43static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, \ 44 int dstStride, int srcStride, \ 45 int h) \ 46{ \ 47 const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ 48 int i; \ 49 \ 50 for (i = 0; i < h; i++) { \ 51 OP(dst[0], (src[0] + src[1]) * 20 - (src[0] + src[2]) * 6 + (src[1] + src[3]) * 3 - (src[2] + src[4])); \ 52 OP(dst[1], (src[1] + src[2]) * 20 - (src[0] + src[3]) * 6 + (src[0] + src[4]) * 3 - (src[1] + src[5])); \ 53 OP(dst[2], (src[2] + src[3]) * 20 - (src[1] + src[4]) * 6 + (src[0] + src[5]) * 3 - (src[0] + src[6])); \ 54 OP(dst[3], (src[3] + src[4]) * 20 - (src[2] + src[5]) * 6 + (src[1] + src[6]) * 3 - (src[0] + src[7])); \ 55 OP(dst[4], (src[4] + src[5]) * 20 - (src[3] + src[6]) * 6 + (src[2] + src[7]) * 3 - (src[1] + src[8])); \ 56 OP(dst[5], (src[5] + src[6]) * 20 - (src[4] + src[7]) * 6 + (src[3] + src[8]) * 3 - (src[2] + src[8])); \ 57 OP(dst[6], (src[6] + src[7]) * 20 - (src[5] + src[8]) * 6 + (src[4] + src[8]) * 3 - (src[3] + src[7])); \ 58 OP(dst[7], (src[7] + src[8]) * 20 - (src[6] + src[8]) * 6 + (src[5] + src[7]) * 3 - (src[4] + src[6])); \ 59 dst += dstStride; \ 60 src += srcStride; \ 61 } \ 62} \ 63 \ 64static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, \ 65 int dstStride, int srcStride) \ 66{ \ 67 const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ 68 const int w = 8; \ 69 int i; \ 70 \ 71 for (i = 0; i < w; i++) { \ 72 const int src0 = src[0 * srcStride]; \ 73 const int src1 = src[1 * srcStride]; \ 74 const int src2 = src[2 * srcStride]; \ 75 const int src3 = src[3 * srcStride]; \ 76 const int src4 = src[4 * srcStride]; \ 77 const int src5 = src[5 * srcStride]; \ 78 const int src6 = src[6 * srcStride]; \ 79 const int src7 = src[7 * srcStride]; \ 80 const int src8 = src[8 * srcStride]; \ 81 OP(dst[0 * dstStride], (src0 + src1) * 20 - (src0 + src2) * 6 + (src1 + src3) * 3 - (src2 + src4)); \ 82 OP(dst[1 * dstStride], (src1 + src2) * 20 - (src0 + src3) * 6 + (src0 + src4) * 3 - (src1 + src5)); \ 83 OP(dst[2 * dstStride], (src2 + src3) * 20 - (src1 + src4) * 6 + (src0 + src5) * 3 - (src0 + src6)); \ 84 OP(dst[3 * dstStride], (src3 + src4) * 20 - (src2 + src5) * 6 + (src1 + src6) * 3 - (src0 + src7)); \ 85 OP(dst[4 * dstStride], (src4 + src5) * 20 - (src3 + src6) * 6 + (src2 + src7) * 3 - (src1 + src8)); \ 86 OP(dst[5 * dstStride], (src5 + src6) * 20 - (src4 + src7) * 6 + (src3 + src8) * 3 - (src2 + src8)); \ 87 OP(dst[6 * dstStride], (src6 + src7) * 20 - (src5 + src8) * 6 + (src4 + src8) * 3 - (src3 + src7)); \ 88 OP(dst[7 * dstStride], (src7 + src8) * 20 - (src6 + src8) * 6 + (src5 + src7) * 3 - (src4 + src6)); \ 89 dst++; \ 90 src++; \ 91 } \ 92} \ 93 \ 94static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, \ 95 int dstStride, int srcStride, \ 96 int h) \ 97{ \ 98 const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ 99 int i; \ 100 \ 101 for (i = 0; i < h; i++) { \ 102 OP(dst[0], (src[0] + src[1]) * 20 - (src[0] + src[2]) * 6 + (src[1] + src[3]) * 3 - (src[2] + src[4])); \ 103 OP(dst[1], (src[1] + src[2]) * 20 - (src[0] + src[3]) * 6 + (src[0] + src[4]) * 3 - (src[1] + src[5])); \ 104 OP(dst[2], (src[2] + src[3]) * 20 - (src[1] + src[4]) * 6 + (src[0] + src[5]) * 3 - (src[0] + src[6])); \ 105 OP(dst[3], (src[3] + src[4]) * 20 - (src[2] + src[5]) * 6 + (src[1] + src[6]) * 3 - (src[0] + src[7])); \ 106 OP(dst[4], (src[4] + src[5]) * 20 - (src[3] + src[6]) * 6 + (src[2] + src[7]) * 3 - (src[1] + src[8])); \ 107 OP(dst[5], (src[5] + src[6]) * 20 - (src[4] + src[7]) * 6 + (src[3] + src[8]) * 3 - (src[2] + src[9])); \ 108 OP(dst[6], (src[6] + src[7]) * 20 - (src[5] + src[8]) * 6 + (src[4] + src[9]) * 3 - (src[3] + src[10])); \ 109 OP(dst[7], (src[7] + src[8]) * 20 - (src[6] + src[9]) * 6 + (src[5] + src[10]) * 3 - (src[4] + src[11])); \ 110 OP(dst[8], (src[8] + src[9]) * 20 - (src[7] + src[10]) * 6 + (src[6] + src[11]) * 3 - (src[5] + src[12])); \ 111 OP(dst[9], (src[9] + src[10]) * 20 - (src[8] + src[11]) * 6 + (src[7] + src[12]) * 3 - (src[6] + src[13])); \ 112 OP(dst[10], (src[10] + src[11]) * 20 - (src[9] + src[12]) * 6 + (src[8] + src[13]) * 3 - (src[7] + src[14])); \ 113 OP(dst[11], (src[11] + src[12]) * 20 - (src[10] + src[13]) * 6 + (src[9] + src[14]) * 3 - (src[8] + src[15])); \ 114 OP(dst[12], (src[12] + src[13]) * 20 - (src[11] + src[14]) * 6 + (src[10] + src[15]) * 3 - (src[9] + src[16])); \ 115 OP(dst[13], (src[13] + src[14]) * 20 - (src[12] + src[15]) * 6 + (src[11] + src[16]) * 3 - (src[10] + src[16])); \ 116 OP(dst[14], (src[14] + src[15]) * 20 - (src[13] + src[16]) * 6 + (src[12] + src[16]) * 3 - (src[11] + src[15])); \ 117 OP(dst[15], (src[15] + src[16]) * 20 - (src[14] + src[16]) * 6 + (src[13] + src[15]) * 3 - (src[12] + src[14])); \ 118 dst += dstStride; \ 119 src += srcStride; \ 120 } \ 121} \ 122 \ 123static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, \ 124 int dstStride, int srcStride) \ 125{ \ 126 const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ 127 const int w = 16; \ 128 int i; \ 129 \ 130 for (i = 0; i < w; i++) { \ 131 const int src0 = src[0 * srcStride]; \ 132 const int src1 = src[1 * srcStride]; \ 133 const int src2 = src[2 * srcStride]; \ 134 const int src3 = src[3 * srcStride]; \ 135 const int src4 = src[4 * srcStride]; \ 136 const int src5 = src[5 * srcStride]; \ 137 const int src6 = src[6 * srcStride]; \ 138 const int src7 = src[7 * srcStride]; \ 139 const int src8 = src[8 * srcStride]; \ 140 const int src9 = src[9 * srcStride]; \ 141 const int src10 = src[10 * srcStride]; \ 142 const int src11 = src[11 * srcStride]; \ 143 const int src12 = src[12 * srcStride]; \ 144 const int src13 = src[13 * srcStride]; \ 145 const int src14 = src[14 * srcStride]; \ 146 const int src15 = src[15 * srcStride]; \ 147 const int src16 = src[16 * srcStride]; \ 148 OP(dst[0 * dstStride], (src0 + src1) * 20 - (src0 + src2) * 6 + (src1 + src3) * 3 - (src2 + src4)); \ 149 OP(dst[1 * dstStride], (src1 + src2) * 20 - (src0 + src3) * 6 + (src0 + src4) * 3 - (src1 + src5)); \ 150 OP(dst[2 * dstStride], (src2 + src3) * 20 - (src1 + src4) * 6 + (src0 + src5) * 3 - (src0 + src6)); \ 151 OP(dst[3 * dstStride], (src3 + src4) * 20 - (src2 + src5) * 6 + (src1 + src6) * 3 - (src0 + src7)); \ 152 OP(dst[4 * dstStride], (src4 + src5) * 20 - (src3 + src6) * 6 + (src2 + src7) * 3 - (src1 + src8)); \ 153 OP(dst[5 * dstStride], (src5 + src6) * 20 - (src4 + src7) * 6 + (src3 + src8) * 3 - (src2 + src9)); \ 154 OP(dst[6 * dstStride], (src6 + src7) * 20 - (src5 + src8) * 6 + (src4 + src9) * 3 - (src3 + src10)); \ 155 OP(dst[7 * dstStride], (src7 + src8) * 20 - (src6 + src9) * 6 + (src5 + src10) * 3 - (src4 + src11)); \ 156 OP(dst[8 * dstStride], (src8 + src9) * 20 - (src7 + src10) * 6 + (src6 + src11) * 3 - (src5 + src12)); \ 157 OP(dst[9 * dstStride], (src9 + src10) * 20 - (src8 + src11) * 6 + (src7 + src12) * 3 - (src6 + src13)); \ 158 OP(dst[10 * dstStride], (src10 + src11) * 20 - (src9 + src12) * 6 + (src8 + src13) * 3 - (src7 + src14)); \ 159 OP(dst[11 * dstStride], (src11 + src12) * 20 - (src10 + src13) * 6 + (src9 + src14) * 3 - (src8 + src15)); \ 160 OP(dst[12 * dstStride], (src12 + src13) * 20 - (src11 + src14) * 6 + (src10 + src15) * 3 - (src9 + src16)); \ 161 OP(dst[13 * dstStride], (src13 + src14) * 20 - (src12 + src15) * 6 + (src11 + src16) * 3 - (src10 + src16)); \ 162 OP(dst[14 * dstStride], (src14 + src15) * 20 - (src13 + src16) * 6 + (src12 + src16) * 3 - (src11 + src15)); \ 163 OP(dst[15 * dstStride], (src15 + src16) * 20 - (src14 + src16) * 6 + (src13 + src15) * 3 - (src12 + src14)); \ 164 dst++; \ 165 src++; \ 166 } \ 167} \ 168 \ 169static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, \ 170 ptrdiff_t stride) \ 171{ \ 172 uint8_t half[64]; \ 173 \ 174 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8); \ 175 OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8); \ 176} \ 177 \ 178static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, \ 179 ptrdiff_t stride) \ 180{ \ 181 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8); \ 182} \ 183 \ 184static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, \ 185 ptrdiff_t stride) \ 186{ \ 187 uint8_t half[64]; \ 188 \ 189 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8); \ 190 OPNAME ## pixels8_l2_8(dst, src + 1, half, stride, stride, 8, 8); \ 191} \ 192 \ 193static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, \ 194 ptrdiff_t stride) \ 195{ \ 196 uint8_t full[16 * 9]; \ 197 uint8_t half[64]; \ 198 \ 199 copy_block9(full, src, 16, stride, 9); \ 200 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16); \ 201 OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8); \ 202} \ 203 \ 204static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, \ 205 ptrdiff_t stride) \ 206{ \ 207 uint8_t full[16 * 9]; \ 208 \ 209 copy_block9(full, src, 16, stride, 9); \ 210 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16); \ 211} \ 212 \ 213static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, \ 214 ptrdiff_t stride) \ 215{ \ 216 uint8_t full[16 * 9]; \ 217 uint8_t half[64]; \ 218 \ 219 copy_block9(full, src, 16, stride, 9); \ 220 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16); \ 221 OPNAME ## pixels8_l2_8(dst, full + 16, half, stride, 16, 8, 8); \ 222} \ 223 \ 224void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, \ 225 ptrdiff_t stride) \ 226{ \ 227 uint8_t full[16 * 9]; \ 228 uint8_t halfH[72]; \ 229 uint8_t halfV[64]; \ 230 uint8_t halfHV[64]; \ 231 \ 232 copy_block9(full, src, 16, stride, 9); \ 233 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ 234 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16); \ 235 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ 236 OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, \ 237 stride, 16, 8, 8, 8, 8); \ 238} \ 239 \ 240static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, \ 241 ptrdiff_t stride) \ 242{ \ 243 uint8_t full[16 * 9]; \ 244 uint8_t halfH[72]; \ 245 uint8_t halfHV[64]; \ 246 \ 247 copy_block9(full, src, 16, stride, 9); \ 248 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ 249 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9); \ 250 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ 251 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8); \ 252} \ 253 \ 254void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, \ 255 ptrdiff_t stride) \ 256{ \ 257 uint8_t full[16 * 9]; \ 258 uint8_t halfH[72]; \ 259 uint8_t halfV[64]; \ 260 uint8_t halfHV[64]; \ 261 \ 262 copy_block9(full, src, 16, stride, 9); \ 263 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ 264 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full + 1, 8, 16); \ 265 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ 266 OPNAME ## pixels8_l4_8(dst, full + 1, halfH, halfV, halfHV, \ 267 stride, 16, 8, 8, 8, 8); \ 268} \ 269 \ 270static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, \ 271 ptrdiff_t stride) \ 272{ \ 273 uint8_t full[16 * 9]; \ 274 uint8_t halfH[72]; \ 275 uint8_t halfHV[64]; \ 276 \ 277 copy_block9(full, src, 16, stride, 9); \ 278 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ 279 put ## RND ## pixels8_l2_8(halfH, halfH, full + 1, 8, 8, 16, 9); \ 280 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ 281 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8); \ 282} \ 283 \ 284void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, \ 285 ptrdiff_t stride) \ 286{ \ 287 uint8_t full[16 * 9]; \ 288 uint8_t halfH[72]; \ 289 uint8_t halfV[64]; \ 290 uint8_t halfHV[64]; \ 291 \ 292 copy_block9(full, src, 16, stride, 9); \ 293 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ 294 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16); \ 295 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ 296 OPNAME ## pixels8_l4_8(dst, full + 16, halfH + 8, halfV, halfHV, \ 297 stride, 16, 8, 8, 8, 8); \ 298} \ 299 \ 300static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, \ 301 ptrdiff_t stride) \ 302{ \ 303 uint8_t full[16 * 9]; \ 304 uint8_t halfH[72]; \ 305 uint8_t halfHV[64]; \ 306 \ 307 copy_block9(full, src, 16, stride, 9); \ 308 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ 309 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9); \ 310 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ 311 OPNAME ## pixels8_l2_8(dst, halfH + 8, halfHV, stride, 8, 8, 8); \ 312} \ 313 \ 314void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, \ 315 ptrdiff_t stride) \ 316{ \ 317 uint8_t full[16 * 9]; \ 318 uint8_t halfH[72]; \ 319 uint8_t halfV[64]; \ 320 uint8_t halfHV[64]; \ 321 \ 322 copy_block9(full, src, 16, stride, 9); \ 323 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ 324 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full + 1, 8, 16); \ 325 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ 326 OPNAME ## pixels8_l4_8(dst, full + 17, halfH + 8, halfV, halfHV, \ 327 stride, 16, 8, 8, 8, 8); \ 328} \ 329 \ 330static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, \ 331 ptrdiff_t stride) \ 332{ \ 333 uint8_t full[16 * 9]; \ 334 uint8_t halfH[72]; \ 335 uint8_t halfHV[64]; \ 336 \ 337 copy_block9(full, src, 16, stride, 9); \ 338 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ 339 put ## RND ## pixels8_l2_8(halfH, halfH, full + 1, 8, 8, 16, 9); \ 340 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ 341 OPNAME ## pixels8_l2_8(dst, halfH + 8, halfHV, stride, 8, 8, 8); \ 342} \ 343 \ 344static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, \ 345 ptrdiff_t stride) \ 346{ \ 347 uint8_t halfH[72]; \ 348 uint8_t halfHV[64]; \ 349 \ 350 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9); \ 351 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ 352 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8); \ 353} \ 354 \ 355static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, \ 356 ptrdiff_t stride) \ 357{ \ 358 uint8_t halfH[72]; \ 359 uint8_t halfHV[64]; \ 360 \ 361 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9); \ 362 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ 363 OPNAME ## pixels8_l2_8(dst, halfH + 8, halfHV, stride, 8, 8, 8); \ 364} \ 365 \ 366void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, \ 367 ptrdiff_t stride) \ 368{ \ 369 uint8_t full[16 * 9]; \ 370 uint8_t halfH[72]; \ 371 uint8_t halfV[64]; \ 372 uint8_t halfHV[64]; \ 373 \ 374 copy_block9(full, src, 16, stride, 9); \ 375 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ 376 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16); \ 377 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ 378 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8); \ 379} \ 380 \ 381static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, \ 382 ptrdiff_t stride) \ 383{ \ 384 uint8_t full[16 * 9]; \ 385 uint8_t halfH[72]; \ 386 \ 387 copy_block9(full, src, 16, stride, 9); \ 388 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ 389 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9); \ 390 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8); \ 391} \ 392 \ 393void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, \ 394 ptrdiff_t stride) \ 395{ \ 396 uint8_t full[16 * 9]; \ 397 uint8_t halfH[72]; \ 398 uint8_t halfV[64]; \ 399 uint8_t halfHV[64]; \ 400 \ 401 copy_block9(full, src, 16, stride, 9); \ 402 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ 403 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full + 1, 8, 16); \ 404 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \ 405 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8); \ 406} \ 407 \ 408static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, \ 409 ptrdiff_t stride) \ 410{ \ 411 uint8_t full[16 * 9]; \ 412 uint8_t halfH[72]; \ 413 \ 414 copy_block9(full, src, 16, stride, 9); \ 415 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \ 416 put ## RND ## pixels8_l2_8(halfH, halfH, full + 1, 8, 8, 16, 9); \ 417 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8); \ 418} \ 419 \ 420static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, \ 421 ptrdiff_t stride) \ 422{ \ 423 uint8_t halfH[72]; \ 424 \ 425 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9); \ 426 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8); \ 427} \ 428 \ 429static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, \ 430 ptrdiff_t stride) \ 431{ \ 432 uint8_t half[256]; \ 433 \ 434 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16); \ 435 OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16); \ 436} \ 437 \ 438static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, \ 439 ptrdiff_t stride) \ 440{ \ 441 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16); \ 442} \ 443 \ 444static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, \ 445 ptrdiff_t stride) \ 446{ \ 447 uint8_t half[256]; \ 448 \ 449 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16); \ 450 OPNAME ## pixels16_l2_8(dst, src + 1, half, stride, stride, 16, 16); \ 451} \ 452 \ 453static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, \ 454 ptrdiff_t stride) \ 455{ \ 456 uint8_t full[24 * 17]; \ 457 uint8_t half[256]; \ 458 \ 459 copy_block17(full, src, 24, stride, 17); \ 460 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24); \ 461 OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16); \ 462} \ 463 \ 464static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, \ 465 ptrdiff_t stride) \ 466{ \ 467 uint8_t full[24 * 17]; \ 468 \ 469 copy_block17(full, src, 24, stride, 17); \ 470 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24); \ 471} \ 472 \ 473static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, \ 474 ptrdiff_t stride) \ 475{ \ 476 uint8_t full[24 * 17]; \ 477 uint8_t half[256]; \ 478 \ 479 copy_block17(full, src, 24, stride, 17); \ 480 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24); \ 481 OPNAME ## pixels16_l2_8(dst, full + 24, half, stride, 24, 16, 16); \ 482} \ 483 \ 484void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, \ 485 ptrdiff_t stride) \ 486{ \ 487 uint8_t full[24 * 17]; \ 488 uint8_t halfH[272]; \ 489 uint8_t halfV[256]; \ 490 uint8_t halfHV[256]; \ 491 \ 492 copy_block17(full, src, 24, stride, 17); \ 493 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ 494 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24); \ 495 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ 496 OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, \ 497 stride, 24, 16, 16, 16, 16); \ 498} \ 499 \ 500static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, \ 501 ptrdiff_t stride) \ 502{ \ 503 uint8_t full[24 * 17]; \ 504 uint8_t halfH[272]; \ 505 uint8_t halfHV[256]; \ 506 \ 507 copy_block17(full, src, 24, stride, 17); \ 508 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ 509 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17); \ 510 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ 511 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16); \ 512} \ 513 \ 514void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, \ 515 ptrdiff_t stride) \ 516{ \ 517 uint8_t full[24 * 17]; \ 518 uint8_t halfH[272]; \ 519 uint8_t halfV[256]; \ 520 uint8_t halfHV[256]; \ 521 \ 522 copy_block17(full, src, 24, stride, 17); \ 523 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ 524 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full + 1, 16, 24); \ 525 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ 526 OPNAME ## pixels16_l4_8(dst, full + 1, halfH, halfV, halfHV, \ 527 stride, 24, 16, 16, 16, 16); \ 528} \ 529 \ 530static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, \ 531 ptrdiff_t stride) \ 532{ \ 533 uint8_t full[24 * 17]; \ 534 uint8_t halfH[272]; \ 535 uint8_t halfHV[256]; \ 536 \ 537 copy_block17(full, src, 24, stride, 17); \ 538 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ 539 put ## RND ## pixels16_l2_8(halfH, halfH, full + 1, 16, 16, 24, 17); \ 540 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ 541 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16); \ 542} \ 543 \ 544void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, \ 545 ptrdiff_t stride) \ 546{ \ 547 uint8_t full[24 * 17]; \ 548 uint8_t halfH[272]; \ 549 uint8_t halfV[256]; \ 550 uint8_t halfHV[256]; \ 551 \ 552 copy_block17(full, src, 24, stride, 17); \ 553 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ 554 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24); \ 555 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ 556 OPNAME ## pixels16_l4_8(dst, full + 24, halfH + 16, halfV, halfHV, \ 557 stride, 24, 16, 16, 16, 16); \ 558} \ 559 \ 560static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, \ 561 ptrdiff_t stride) \ 562{ \ 563 uint8_t full[24 * 17]; \ 564 uint8_t halfH[272]; \ 565 uint8_t halfHV[256]; \ 566 \ 567 copy_block17(full, src, 24, stride, 17); \ 568 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ 569 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17); \ 570 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ 571 OPNAME ## pixels16_l2_8(dst, halfH + 16, halfHV, stride, 16, 16, 16); \ 572} \ 573 \ 574void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, \ 575 ptrdiff_t stride) \ 576{ \ 577 uint8_t full[24 * 17]; \ 578 uint8_t halfH[272]; \ 579 uint8_t halfV[256]; \ 580 uint8_t halfHV[256]; \ 581 \ 582 copy_block17(full, src, 24, stride, 17); \ 583 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ 584 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full + 1, 16, 24); \ 585 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ 586 OPNAME ## pixels16_l4_8(dst, full + 25, halfH + 16, halfV, halfHV, \ 587 stride, 24, 16, 16, 16, 16); \ 588} \ 589 \ 590static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, \ 591 ptrdiff_t stride) \ 592{ \ 593 uint8_t full[24 * 17]; \ 594 uint8_t halfH[272]; \ 595 uint8_t halfHV[256]; \ 596 \ 597 copy_block17(full, src, 24, stride, 17); \ 598 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ 599 put ## RND ## pixels16_l2_8(halfH, halfH, full + 1, 16, 16, 24, 17); \ 600 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ 601 OPNAME ## pixels16_l2_8(dst, halfH + 16, halfHV, stride, 16, 16, 16); \ 602} \ 603 \ 604static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, \ 605 ptrdiff_t stride) \ 606{ \ 607 uint8_t halfH[272]; \ 608 uint8_t halfHV[256]; \ 609 \ 610 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17); \ 611 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ 612 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16); \ 613} \ 614 \ 615static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, \ 616 ptrdiff_t stride) \ 617{ \ 618 uint8_t halfH[272]; \ 619 uint8_t halfHV[256]; \ 620 \ 621 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17); \ 622 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ 623 OPNAME ## pixels16_l2_8(dst, halfH + 16, halfHV, stride, 16, 16, 16); \ 624} \ 625 \ 626void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, \ 627 ptrdiff_t stride) \ 628{ \ 629 uint8_t full[24 * 17]; \ 630 uint8_t halfH[272]; \ 631 uint8_t halfV[256]; \ 632 uint8_t halfHV[256]; \ 633 \ 634 copy_block17(full, src, 24, stride, 17); \ 635 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ 636 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24); \ 637 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ 638 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16); \ 639} \ 640 \ 641static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, \ 642 ptrdiff_t stride) \ 643{ \ 644 uint8_t full[24 * 17]; \ 645 uint8_t halfH[272]; \ 646 \ 647 copy_block17(full, src, 24, stride, 17); \ 648 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ 649 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17); \ 650 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16); \ 651} \ 652 \ 653void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, \ 654 ptrdiff_t stride) \ 655{ \ 656 uint8_t full[24 * 17]; \ 657 uint8_t halfH[272]; \ 658 uint8_t halfV[256]; \ 659 uint8_t halfHV[256]; \ 660 \ 661 copy_block17(full, src, 24, stride, 17); \ 662 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ 663 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full + 1, 16, 24); \ 664 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \ 665 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16); \ 666} \ 667 \ 668static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, \ 669 ptrdiff_t stride) \ 670{ \ 671 uint8_t full[24 * 17]; \ 672 uint8_t halfH[272]; \ 673 \ 674 copy_block17(full, src, 24, stride, 17); \ 675 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \ 676 put ## RND ## pixels16_l2_8(halfH, halfH, full + 1, 16, 16, 24, 17); \ 677 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16); \ 678} \ 679 \ 680static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, \ 681 ptrdiff_t stride) \ 682{ \ 683 uint8_t halfH[272]; \ 684 \ 685 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17); \ 686 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16); \ 687} 688 689#define op_avg(a, b) a = (((a) + cm[((b) + 16) >> 5] + 1) >> 1) 690#define op_put(a, b) a = cm[((b) + 16) >> 5] 691#define op_put_no_rnd(a, b) a = cm[((b) + 15) >> 5] 692 693QPEL_MC(0, put_, _, op_put) 694QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd) 695QPEL_MC(0, avg_, _, op_avg) 696 697#undef op_avg 698#undef op_put 699#undef op_put_no_rnd 700 701void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride) 702{ 703 put_pixels8_8_c(dst, src, stride, 8); 704} 705 706void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride) 707{ 708 avg_pixels8_8_c(dst, src, stride, 8); 709} 710 711void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride) 712{ 713 put_pixels16_8_c(dst, src, stride, 16); 714} 715 716void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride) 717{ 718 avg_pixels16_8_c(dst, src, stride, 16); 719} 720 721#define put_qpel8_mc00_c ff_put_pixels8x8_c 722#define avg_qpel8_mc00_c ff_avg_pixels8x8_c 723#define put_qpel16_mc00_c ff_put_pixels16x16_c 724#define avg_qpel16_mc00_c ff_avg_pixels16x16_c 725#define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c 726#define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c 727 728void ff_put_pixels8_l2_8(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, 729 int dst_stride, int src_stride1, int src_stride2, 730 int h) 731{ 732 put_pixels8_l2_8(dst, src1, src2, dst_stride, src_stride1, src_stride2, h); 733 734} 735 736#if CONFIG_DIRAC_DECODER 737#define DIRAC_MC(OPNAME)\ 738void ff_ ## OPNAME ## _dirac_pixels8_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\ 739{\ 740 OPNAME ## _pixels8_8_c(dst, src[0], stride, h);\ 741}\ 742void ff_ ## OPNAME ## _dirac_pixels16_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\ 743{\ 744 OPNAME ## _pixels16_8_c(dst, src[0], stride, h);\ 745}\ 746void ff_ ## OPNAME ## _dirac_pixels32_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\ 747{\ 748 OPNAME ## _pixels16_8_c(dst , src[0] , stride, h);\ 749 OPNAME ## _pixels16_8_c(dst+16, src[0]+16, stride, h);\ 750}\ 751void ff_ ## OPNAME ## _dirac_pixels8_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\ 752{\ 753 OPNAME ## _pixels8_l2_8(dst, src[0], src[1], stride, stride, stride, h);\ 754}\ 755void ff_ ## OPNAME ## _dirac_pixels16_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\ 756{\ 757 OPNAME ## _pixels16_l2_8(dst, src[0], src[1], stride, stride, stride, h);\ 758}\ 759void ff_ ## OPNAME ## _dirac_pixels32_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\ 760{\ 761 OPNAME ## _pixels16_l2_8(dst , src[0] , src[1] , stride, stride, stride, h);\ 762 OPNAME ## _pixels16_l2_8(dst+16, src[0]+16, src[1]+16, stride, stride, stride, h);\ 763}\ 764void ff_ ## OPNAME ## _dirac_pixels8_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\ 765{\ 766 OPNAME ## _pixels8_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\ 767}\ 768void ff_ ## OPNAME ## _dirac_pixels16_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\ 769{\ 770 OPNAME ## _pixels16_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\ 771}\ 772void ff_ ## OPNAME ## _dirac_pixels32_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\ 773{\ 774 OPNAME ## _pixels16_l4_8(dst , src[0] , src[1] , src[2] , src[3] , stride, stride, stride, stride, stride, h);\ 775 OPNAME ## _pixels16_l4_8(dst+16, src[0]+16, src[1]+16, src[2]+16, src[3]+16, stride, stride, stride, stride, stride, h);\ 776} 777DIRAC_MC(put) 778DIRAC_MC(avg) 779#endif 780 781av_cold void ff_qpeldsp_init(QpelDSPContext *c) 782{ 783#define dspfunc(PFX, IDX, NUM) \ 784 c->PFX ## _pixels_tab[IDX][0] = PFX ## NUM ## _mc00_c; \ 785 c->PFX ## _pixels_tab[IDX][1] = PFX ## NUM ## _mc10_c; \ 786 c->PFX ## _pixels_tab[IDX][2] = PFX ## NUM ## _mc20_c; \ 787 c->PFX ## _pixels_tab[IDX][3] = PFX ## NUM ## _mc30_c; \ 788 c->PFX ## _pixels_tab[IDX][4] = PFX ## NUM ## _mc01_c; \ 789 c->PFX ## _pixels_tab[IDX][5] = PFX ## NUM ## _mc11_c; \ 790 c->PFX ## _pixels_tab[IDX][6] = PFX ## NUM ## _mc21_c; \ 791 c->PFX ## _pixels_tab[IDX][7] = PFX ## NUM ## _mc31_c; \ 792 c->PFX ## _pixels_tab[IDX][8] = PFX ## NUM ## _mc02_c; \ 793 c->PFX ## _pixels_tab[IDX][9] = PFX ## NUM ## _mc12_c; \ 794 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \ 795 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \ 796 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \ 797 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \ 798 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \ 799 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c 800 801 dspfunc(put_qpel, 0, 16); 802 dspfunc(put_qpel, 1, 8); 803 804 dspfunc(put_no_rnd_qpel, 0, 16); 805 dspfunc(put_no_rnd_qpel, 1, 8); 806 807 dspfunc(avg_qpel, 0, 16); 808 dspfunc(avg_qpel, 1, 8); 809 810 if (ARCH_X86) 811 ff_qpeldsp_init_x86(c); 812} 813