1/* 2 * ARM NEON optimised DSP functions 3 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22#include <stdint.h> 23 24#include "config.h" 25#include "libavutil/attributes.h" 26#include "libavutil/arm/cpu.h" 27#include "libavcodec/h264qpel.h" 28 29void ff_put_h264_qpel16_mc00_neon(uint8_t *, uint8_t *, ptrdiff_t); 30void ff_put_h264_qpel16_mc10_neon(uint8_t *, uint8_t *, ptrdiff_t); 31void ff_put_h264_qpel16_mc20_neon(uint8_t *, uint8_t *, ptrdiff_t); 32void ff_put_h264_qpel16_mc30_neon(uint8_t *, uint8_t *, ptrdiff_t); 33void ff_put_h264_qpel16_mc01_neon(uint8_t *, uint8_t *, ptrdiff_t); 34void ff_put_h264_qpel16_mc11_neon(uint8_t *, uint8_t *, ptrdiff_t); 35void ff_put_h264_qpel16_mc21_neon(uint8_t *, uint8_t *, ptrdiff_t); 36void ff_put_h264_qpel16_mc31_neon(uint8_t *, uint8_t *, ptrdiff_t); 37void ff_put_h264_qpel16_mc02_neon(uint8_t *, uint8_t *, ptrdiff_t); 38void ff_put_h264_qpel16_mc12_neon(uint8_t *, uint8_t *, ptrdiff_t); 39void ff_put_h264_qpel16_mc22_neon(uint8_t *, uint8_t *, ptrdiff_t); 40void ff_put_h264_qpel16_mc32_neon(uint8_t *, uint8_t *, ptrdiff_t); 41void ff_put_h264_qpel16_mc03_neon(uint8_t *, uint8_t *, ptrdiff_t); 42void ff_put_h264_qpel16_mc13_neon(uint8_t *, uint8_t *, ptrdiff_t); 43void ff_put_h264_qpel16_mc23_neon(uint8_t *, uint8_t *, ptrdiff_t); 44void ff_put_h264_qpel16_mc33_neon(uint8_t *, uint8_t *, ptrdiff_t); 45 46void ff_put_h264_qpel8_mc00_neon(uint8_t *, uint8_t *, ptrdiff_t); 47void ff_put_h264_qpel8_mc10_neon(uint8_t *, uint8_t *, ptrdiff_t); 48void ff_put_h264_qpel8_mc20_neon(uint8_t *, uint8_t *, ptrdiff_t); 49void ff_put_h264_qpel8_mc30_neon(uint8_t *, uint8_t *, ptrdiff_t); 50void ff_put_h264_qpel8_mc01_neon(uint8_t *, uint8_t *, ptrdiff_t); 51void ff_put_h264_qpel8_mc11_neon(uint8_t *, uint8_t *, ptrdiff_t); 52void ff_put_h264_qpel8_mc21_neon(uint8_t *, uint8_t *, ptrdiff_t); 53void ff_put_h264_qpel8_mc31_neon(uint8_t *, uint8_t *, ptrdiff_t); 54void ff_put_h264_qpel8_mc02_neon(uint8_t *, uint8_t *, ptrdiff_t); 55void ff_put_h264_qpel8_mc12_neon(uint8_t *, uint8_t *, ptrdiff_t); 56void ff_put_h264_qpel8_mc22_neon(uint8_t *, uint8_t *, ptrdiff_t); 57void ff_put_h264_qpel8_mc32_neon(uint8_t *, uint8_t *, ptrdiff_t); 58void ff_put_h264_qpel8_mc03_neon(uint8_t *, uint8_t *, ptrdiff_t); 59void ff_put_h264_qpel8_mc13_neon(uint8_t *, uint8_t *, ptrdiff_t); 60void ff_put_h264_qpel8_mc23_neon(uint8_t *, uint8_t *, ptrdiff_t); 61void ff_put_h264_qpel8_mc33_neon(uint8_t *, uint8_t *, ptrdiff_t); 62 63void ff_avg_h264_qpel16_mc00_neon(uint8_t *, uint8_t *, ptrdiff_t); 64void ff_avg_h264_qpel16_mc10_neon(uint8_t *, uint8_t *, ptrdiff_t); 65void ff_avg_h264_qpel16_mc20_neon(uint8_t *, uint8_t *, ptrdiff_t); 66void ff_avg_h264_qpel16_mc30_neon(uint8_t *, uint8_t *, ptrdiff_t); 67void ff_avg_h264_qpel16_mc01_neon(uint8_t *, uint8_t *, ptrdiff_t); 68void ff_avg_h264_qpel16_mc11_neon(uint8_t *, uint8_t *, ptrdiff_t); 69void ff_avg_h264_qpel16_mc21_neon(uint8_t *, uint8_t *, ptrdiff_t); 70void ff_avg_h264_qpel16_mc31_neon(uint8_t *, uint8_t *, ptrdiff_t); 71void ff_avg_h264_qpel16_mc02_neon(uint8_t *, uint8_t *, ptrdiff_t); 72void ff_avg_h264_qpel16_mc12_neon(uint8_t *, uint8_t *, ptrdiff_t); 73void ff_avg_h264_qpel16_mc22_neon(uint8_t *, uint8_t *, ptrdiff_t); 74void ff_avg_h264_qpel16_mc32_neon(uint8_t *, uint8_t *, ptrdiff_t); 75void ff_avg_h264_qpel16_mc03_neon(uint8_t *, uint8_t *, ptrdiff_t); 76void ff_avg_h264_qpel16_mc13_neon(uint8_t *, uint8_t *, ptrdiff_t); 77void ff_avg_h264_qpel16_mc23_neon(uint8_t *, uint8_t *, ptrdiff_t); 78void ff_avg_h264_qpel16_mc33_neon(uint8_t *, uint8_t *, ptrdiff_t); 79 80void ff_avg_h264_qpel8_mc00_neon(uint8_t *, uint8_t *, ptrdiff_t); 81void ff_avg_h264_qpel8_mc10_neon(uint8_t *, uint8_t *, ptrdiff_t); 82void ff_avg_h264_qpel8_mc20_neon(uint8_t *, uint8_t *, ptrdiff_t); 83void ff_avg_h264_qpel8_mc30_neon(uint8_t *, uint8_t *, ptrdiff_t); 84void ff_avg_h264_qpel8_mc01_neon(uint8_t *, uint8_t *, ptrdiff_t); 85void ff_avg_h264_qpel8_mc11_neon(uint8_t *, uint8_t *, ptrdiff_t); 86void ff_avg_h264_qpel8_mc21_neon(uint8_t *, uint8_t *, ptrdiff_t); 87void ff_avg_h264_qpel8_mc31_neon(uint8_t *, uint8_t *, ptrdiff_t); 88void ff_avg_h264_qpel8_mc02_neon(uint8_t *, uint8_t *, ptrdiff_t); 89void ff_avg_h264_qpel8_mc12_neon(uint8_t *, uint8_t *, ptrdiff_t); 90void ff_avg_h264_qpel8_mc22_neon(uint8_t *, uint8_t *, ptrdiff_t); 91void ff_avg_h264_qpel8_mc32_neon(uint8_t *, uint8_t *, ptrdiff_t); 92void ff_avg_h264_qpel8_mc03_neon(uint8_t *, uint8_t *, ptrdiff_t); 93void ff_avg_h264_qpel8_mc13_neon(uint8_t *, uint8_t *, ptrdiff_t); 94void ff_avg_h264_qpel8_mc23_neon(uint8_t *, uint8_t *, ptrdiff_t); 95void ff_avg_h264_qpel8_mc33_neon(uint8_t *, uint8_t *, ptrdiff_t); 96 97av_cold void ff_h264qpel_init_arm(H264QpelContext *c, int bit_depth) 98{ 99 const int high_bit_depth = bit_depth > 8; 100 int cpu_flags = av_get_cpu_flags(); 101 102 if (have_neon(cpu_flags) && !high_bit_depth) { 103 c->put_h264_qpel_pixels_tab[0][ 0] = ff_put_h264_qpel16_mc00_neon; 104 c->put_h264_qpel_pixels_tab[0][ 1] = ff_put_h264_qpel16_mc10_neon; 105 c->put_h264_qpel_pixels_tab[0][ 2] = ff_put_h264_qpel16_mc20_neon; 106 c->put_h264_qpel_pixels_tab[0][ 3] = ff_put_h264_qpel16_mc30_neon; 107 c->put_h264_qpel_pixels_tab[0][ 4] = ff_put_h264_qpel16_mc01_neon; 108 c->put_h264_qpel_pixels_tab[0][ 5] = ff_put_h264_qpel16_mc11_neon; 109 c->put_h264_qpel_pixels_tab[0][ 6] = ff_put_h264_qpel16_mc21_neon; 110 c->put_h264_qpel_pixels_tab[0][ 7] = ff_put_h264_qpel16_mc31_neon; 111 c->put_h264_qpel_pixels_tab[0][ 8] = ff_put_h264_qpel16_mc02_neon; 112 c->put_h264_qpel_pixels_tab[0][ 9] = ff_put_h264_qpel16_mc12_neon; 113 c->put_h264_qpel_pixels_tab[0][10] = ff_put_h264_qpel16_mc22_neon; 114 c->put_h264_qpel_pixels_tab[0][11] = ff_put_h264_qpel16_mc32_neon; 115 c->put_h264_qpel_pixels_tab[0][12] = ff_put_h264_qpel16_mc03_neon; 116 c->put_h264_qpel_pixels_tab[0][13] = ff_put_h264_qpel16_mc13_neon; 117 c->put_h264_qpel_pixels_tab[0][14] = ff_put_h264_qpel16_mc23_neon; 118 c->put_h264_qpel_pixels_tab[0][15] = ff_put_h264_qpel16_mc33_neon; 119 120 c->put_h264_qpel_pixels_tab[1][ 0] = ff_put_h264_qpel8_mc00_neon; 121 c->put_h264_qpel_pixels_tab[1][ 1] = ff_put_h264_qpel8_mc10_neon; 122 c->put_h264_qpel_pixels_tab[1][ 2] = ff_put_h264_qpel8_mc20_neon; 123 c->put_h264_qpel_pixels_tab[1][ 3] = ff_put_h264_qpel8_mc30_neon; 124 c->put_h264_qpel_pixels_tab[1][ 4] = ff_put_h264_qpel8_mc01_neon; 125 c->put_h264_qpel_pixels_tab[1][ 5] = ff_put_h264_qpel8_mc11_neon; 126 c->put_h264_qpel_pixels_tab[1][ 6] = ff_put_h264_qpel8_mc21_neon; 127 c->put_h264_qpel_pixels_tab[1][ 7] = ff_put_h264_qpel8_mc31_neon; 128 c->put_h264_qpel_pixels_tab[1][ 8] = ff_put_h264_qpel8_mc02_neon; 129 c->put_h264_qpel_pixels_tab[1][ 9] = ff_put_h264_qpel8_mc12_neon; 130 c->put_h264_qpel_pixels_tab[1][10] = ff_put_h264_qpel8_mc22_neon; 131 c->put_h264_qpel_pixels_tab[1][11] = ff_put_h264_qpel8_mc32_neon; 132 c->put_h264_qpel_pixels_tab[1][12] = ff_put_h264_qpel8_mc03_neon; 133 c->put_h264_qpel_pixels_tab[1][13] = ff_put_h264_qpel8_mc13_neon; 134 c->put_h264_qpel_pixels_tab[1][14] = ff_put_h264_qpel8_mc23_neon; 135 c->put_h264_qpel_pixels_tab[1][15] = ff_put_h264_qpel8_mc33_neon; 136 137 c->avg_h264_qpel_pixels_tab[0][ 0] = ff_avg_h264_qpel16_mc00_neon; 138 c->avg_h264_qpel_pixels_tab[0][ 1] = ff_avg_h264_qpel16_mc10_neon; 139 c->avg_h264_qpel_pixels_tab[0][ 2] = ff_avg_h264_qpel16_mc20_neon; 140 c->avg_h264_qpel_pixels_tab[0][ 3] = ff_avg_h264_qpel16_mc30_neon; 141 c->avg_h264_qpel_pixels_tab[0][ 4] = ff_avg_h264_qpel16_mc01_neon; 142 c->avg_h264_qpel_pixels_tab[0][ 5] = ff_avg_h264_qpel16_mc11_neon; 143 c->avg_h264_qpel_pixels_tab[0][ 6] = ff_avg_h264_qpel16_mc21_neon; 144 c->avg_h264_qpel_pixels_tab[0][ 7] = ff_avg_h264_qpel16_mc31_neon; 145 c->avg_h264_qpel_pixels_tab[0][ 8] = ff_avg_h264_qpel16_mc02_neon; 146 c->avg_h264_qpel_pixels_tab[0][ 9] = ff_avg_h264_qpel16_mc12_neon; 147 c->avg_h264_qpel_pixels_tab[0][10] = ff_avg_h264_qpel16_mc22_neon; 148 c->avg_h264_qpel_pixels_tab[0][11] = ff_avg_h264_qpel16_mc32_neon; 149 c->avg_h264_qpel_pixels_tab[0][12] = ff_avg_h264_qpel16_mc03_neon; 150 c->avg_h264_qpel_pixels_tab[0][13] = ff_avg_h264_qpel16_mc13_neon; 151 c->avg_h264_qpel_pixels_tab[0][14] = ff_avg_h264_qpel16_mc23_neon; 152 c->avg_h264_qpel_pixels_tab[0][15] = ff_avg_h264_qpel16_mc33_neon; 153 154 c->avg_h264_qpel_pixels_tab[1][ 0] = ff_avg_h264_qpel8_mc00_neon; 155 c->avg_h264_qpel_pixels_tab[1][ 1] = ff_avg_h264_qpel8_mc10_neon; 156 c->avg_h264_qpel_pixels_tab[1][ 2] = ff_avg_h264_qpel8_mc20_neon; 157 c->avg_h264_qpel_pixels_tab[1][ 3] = ff_avg_h264_qpel8_mc30_neon; 158 c->avg_h264_qpel_pixels_tab[1][ 4] = ff_avg_h264_qpel8_mc01_neon; 159 c->avg_h264_qpel_pixels_tab[1][ 5] = ff_avg_h264_qpel8_mc11_neon; 160 c->avg_h264_qpel_pixels_tab[1][ 6] = ff_avg_h264_qpel8_mc21_neon; 161 c->avg_h264_qpel_pixels_tab[1][ 7] = ff_avg_h264_qpel8_mc31_neon; 162 c->avg_h264_qpel_pixels_tab[1][ 8] = ff_avg_h264_qpel8_mc02_neon; 163 c->avg_h264_qpel_pixels_tab[1][ 9] = ff_avg_h264_qpel8_mc12_neon; 164 c->avg_h264_qpel_pixels_tab[1][10] = ff_avg_h264_qpel8_mc22_neon; 165 c->avg_h264_qpel_pixels_tab[1][11] = ff_avg_h264_qpel8_mc32_neon; 166 c->avg_h264_qpel_pixels_tab[1][12] = ff_avg_h264_qpel8_mc03_neon; 167 c->avg_h264_qpel_pixels_tab[1][13] = ff_avg_h264_qpel8_mc13_neon; 168 c->avg_h264_qpel_pixels_tab[1][14] = ff_avg_h264_qpel8_mc23_neon; 169 c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_neon; 170 } 171} 172