1/* 2 * AltiVec-enhanced yuv-to-yuv conversion routines. 3 * 4 * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org> 5 * based on the equivalent C code in swscale.c 6 * 7 * This file is part of FFmpeg. 8 * 9 * FFmpeg is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU Lesser General Public 11 * License as published by the Free Software Foundation; either 12 * version 2.1 of the License, or (at your option) any later version. 13 * 14 * FFmpeg is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * Lesser General Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public 20 * License along with FFmpeg; if not, write to the Free Software 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22 */ 23 24#include <inttypes.h> 25 26#include "config.h" 27#include "libavutil/attributes.h" 28#include "libavutil/cpu.h" 29#include "libswscale/swscale.h" 30#include "libswscale/swscale_internal.h" 31 32#if HAVE_ALTIVEC 33 34static int yv12toyuy2_unscaled_altivec(SwsContext *c, const uint8_t *src[], 35 int srcStride[], int srcSliceY, 36 int srcSliceH, uint8_t *dstParam[], 37 int dstStride_a[]) 38{ 39 uint8_t *dst = dstParam[0] + dstStride_a[0] * srcSliceY; 40 // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, 41 // srcStride[0], srcStride[1], dstStride[0]); 42 const uint8_t *ysrc = src[0]; 43 const uint8_t *usrc = src[1]; 44 const uint8_t *vsrc = src[2]; 45 const int width = c->srcW; 46 const int height = srcSliceH; 47 const int lumStride = srcStride[0]; 48 const int chromStride = srcStride[1]; 49 const int dstStride = dstStride_a[0]; 50 const vector unsigned char yperm = vec_lvsl(0, ysrc); 51 const int vertLumPerChroma = 2; 52 register unsigned int y; 53 54 /* This code assumes: 55 * 56 * 1) dst is 16 bytes-aligned 57 * 2) dstStride is a multiple of 16 58 * 3) width is a multiple of 16 59 * 4) lum & chrom stride are multiples of 8 60 */ 61 62 for (y = 0; y < height; y++) { 63 int i; 64 for (i = 0; i < width - 31; i += 32) { 65 const unsigned int j = i >> 1; 66 vector unsigned char v_yA = vec_ld(i, ysrc); 67 vector unsigned char v_yB = vec_ld(i + 16, ysrc); 68 vector unsigned char v_yC = vec_ld(i + 32, ysrc); 69 vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm); 70 vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm); 71 vector unsigned char v_uA = vec_ld(j, usrc); 72 vector unsigned char v_uB = vec_ld(j + 16, usrc); 73 vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc)); 74 vector unsigned char v_vA = vec_ld(j, vsrc); 75 vector unsigned char v_vB = vec_ld(j + 16, vsrc); 76 vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc)); 77 vector unsigned char v_uv_a = vec_mergeh(v_u, v_v); 78 vector unsigned char v_uv_b = vec_mergel(v_u, v_v); 79 vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a); 80 vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a); 81 vector unsigned char v_yuy2_2 = vec_mergeh(v_y2, v_uv_b); 82 vector unsigned char v_yuy2_3 = vec_mergel(v_y2, v_uv_b); 83 vec_st(v_yuy2_0, (i << 1), dst); 84 vec_st(v_yuy2_1, (i << 1) + 16, dst); 85 vec_st(v_yuy2_2, (i << 1) + 32, dst); 86 vec_st(v_yuy2_3, (i << 1) + 48, dst); 87 } 88 if (i < width) { 89 const unsigned int j = i >> 1; 90 vector unsigned char v_y1 = vec_ld(i, ysrc); 91 vector unsigned char v_u = vec_ld(j, usrc); 92 vector unsigned char v_v = vec_ld(j, vsrc); 93 vector unsigned char v_uv_a = vec_mergeh(v_u, v_v); 94 vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a); 95 vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a); 96 vec_st(v_yuy2_0, (i << 1), dst); 97 vec_st(v_yuy2_1, (i << 1) + 16, dst); 98 } 99 if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) { 100 usrc += chromStride; 101 vsrc += chromStride; 102 } 103 ysrc += lumStride; 104 dst += dstStride; 105 } 106 107 return srcSliceH; 108} 109 110static int yv12touyvy_unscaled_altivec(SwsContext *c, const uint8_t *src[], 111 int srcStride[], int srcSliceY, 112 int srcSliceH, uint8_t *dstParam[], 113 int dstStride_a[]) 114{ 115 uint8_t *dst = dstParam[0] + dstStride_a[0] * srcSliceY; 116 // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, 117 // srcStride[0], srcStride[1], dstStride[0]); 118 const uint8_t *ysrc = src[0]; 119 const uint8_t *usrc = src[1]; 120 const uint8_t *vsrc = src[2]; 121 const int width = c->srcW; 122 const int height = srcSliceH; 123 const int lumStride = srcStride[0]; 124 const int chromStride = srcStride[1]; 125 const int dstStride = dstStride_a[0]; 126 const int vertLumPerChroma = 2; 127 const vector unsigned char yperm = vec_lvsl(0, ysrc); 128 register unsigned int y; 129 130 /* This code assumes: 131 * 132 * 1) dst is 16 bytes-aligned 133 * 2) dstStride is a multiple of 16 134 * 3) width is a multiple of 16 135 * 4) lum & chrom stride are multiples of 8 136 */ 137 138 for (y = 0; y < height; y++) { 139 int i; 140 for (i = 0; i < width - 31; i += 32) { 141 const unsigned int j = i >> 1; 142 vector unsigned char v_yA = vec_ld(i, ysrc); 143 vector unsigned char v_yB = vec_ld(i + 16, ysrc); 144 vector unsigned char v_yC = vec_ld(i + 32, ysrc); 145 vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm); 146 vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm); 147 vector unsigned char v_uA = vec_ld(j, usrc); 148 vector unsigned char v_uB = vec_ld(j + 16, usrc); 149 vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc)); 150 vector unsigned char v_vA = vec_ld(j, vsrc); 151 vector unsigned char v_vB = vec_ld(j + 16, vsrc); 152 vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc)); 153 vector unsigned char v_uv_a = vec_mergeh(v_u, v_v); 154 vector unsigned char v_uv_b = vec_mergel(v_u, v_v); 155 vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1); 156 vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1); 157 vector unsigned char v_uyvy_2 = vec_mergeh(v_uv_b, v_y2); 158 vector unsigned char v_uyvy_3 = vec_mergel(v_uv_b, v_y2); 159 vec_st(v_uyvy_0, (i << 1), dst); 160 vec_st(v_uyvy_1, (i << 1) + 16, dst); 161 vec_st(v_uyvy_2, (i << 1) + 32, dst); 162 vec_st(v_uyvy_3, (i << 1) + 48, dst); 163 } 164 if (i < width) { 165 const unsigned int j = i >> 1; 166 vector unsigned char v_y1 = vec_ld(i, ysrc); 167 vector unsigned char v_u = vec_ld(j, usrc); 168 vector unsigned char v_v = vec_ld(j, vsrc); 169 vector unsigned char v_uv_a = vec_mergeh(v_u, v_v); 170 vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1); 171 vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1); 172 vec_st(v_uyvy_0, (i << 1), dst); 173 vec_st(v_uyvy_1, (i << 1) + 16, dst); 174 } 175 if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) { 176 usrc += chromStride; 177 vsrc += chromStride; 178 } 179 ysrc += lumStride; 180 dst += dstStride; 181 } 182 return srcSliceH; 183} 184 185#endif /* HAVE_ALTIVEC */ 186 187av_cold void ff_get_unscaled_swscale_ppc(SwsContext *c) 188{ 189#if HAVE_ALTIVEC 190 if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC)) 191 return; 192 193 if (!(c->srcW & 15) && !(c->flags & SWS_BITEXACT) && 194 c->srcFormat == AV_PIX_FMT_YUV420P) { 195 enum AVPixelFormat dstFormat = c->dstFormat; 196 197 // unscaled YV12 -> packed YUV, we want speed 198 if (dstFormat == AV_PIX_FMT_YUYV422) 199 c->swscale = yv12toyuy2_unscaled_altivec; 200 else if (dstFormat == AV_PIX_FMT_UYVY422) 201 c->swscale = yv12touyvy_unscaled_altivec; 202 } 203#endif /* HAVE_ALTIVEC */ 204} 205