1/* 2 * Copyright (c) 2002 Brian Foley 3 * Copyright (c) 2002 Dieter Shirley 4 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org> 5 * 6 * This file is part of FFmpeg. 7 * 8 * FFmpeg is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public 10 * License as published by the Free Software Foundation; either 11 * version 2.1 of the License, or (at your option) any later version. 12 * 13 * FFmpeg is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with FFmpeg; if not, write to the Free Software 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21 */ 22 23#include "config.h" 24#if HAVE_ALTIVEC_H 25#include <altivec.h> 26#endif 27 28#include "libavutil/attributes.h" 29#include "libavutil/cpu.h" 30#include "libavutil/ppc/cpu.h" 31#include "libavutil/ppc/types_altivec.h" 32#include "libavutil/ppc/util_altivec.h" 33#include "libavcodec/avcodec.h" 34#include "libavcodec/pixblockdsp.h" 35 36#if HAVE_ALTIVEC 37 38static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels, 39 int line_size) 40{ 41 int i; 42 vector unsigned char perm = vec_lvsl(0, pixels); 43 const vector unsigned char zero = 44 (const vector unsigned char) vec_splat_u8(0); 45 46 for (i = 0; i < 8; i++) { 47 /* Read potentially unaligned pixels. 48 * We're reading 16 pixels, and actually only want 8, 49 * but we simply ignore the extras. */ 50 vector unsigned char pixl = vec_ld(0, pixels); 51 vector unsigned char pixr = vec_ld(7, pixels); 52 vector unsigned char bytes = vec_perm(pixl, pixr, perm); 53 54 // Convert the bytes into shorts. 55 vector signed short shorts = (vector signed short) vec_mergeh(zero, 56 bytes); 57 58 // Save the data to the block, we assume the block is 16-byte aligned. 59 vec_st(shorts, i * 16, (vector signed short *) block); 60 61 pixels += line_size; 62 } 63} 64 65static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1, 66 const uint8_t *s2, int stride) 67{ 68 int i; 69 vector unsigned char perm1 = vec_lvsl(0, s1); 70 vector unsigned char perm2 = vec_lvsl(0, s2); 71 const vector unsigned char zero = 72 (const vector unsigned char) vec_splat_u8(0); 73 vector signed short shorts1, shorts2; 74 75 for (i = 0; i < 4; i++) { 76 /* Read potentially unaligned pixels. 77 * We're reading 16 pixels, and actually only want 8, 78 * but we simply ignore the extras. */ 79 vector unsigned char pixl = vec_ld(0, s1); 80 vector unsigned char pixr = vec_ld(15, s1); 81 vector unsigned char bytes = vec_perm(pixl, pixr, perm1); 82 83 // Convert the bytes into shorts. 84 shorts1 = (vector signed short) vec_mergeh(zero, bytes); 85 86 // Do the same for the second block of pixels. 87 pixl = vec_ld(0, s2); 88 pixr = vec_ld(15, s2); 89 bytes = vec_perm(pixl, pixr, perm2); 90 91 // Convert the bytes into shorts. 92 shorts2 = (vector signed short) vec_mergeh(zero, bytes); 93 94 // Do the subtraction. 95 shorts1 = vec_sub(shorts1, shorts2); 96 97 // Save the data to the block, we assume the block is 16-byte aligned. 98 vec_st(shorts1, 0, (vector signed short *) block); 99 100 s1 += stride; 101 s2 += stride; 102 block += 8; 103 104 /* The code below is a copy of the code above... 105 * This is a manual unroll. */ 106 107 /* Read potentially unaligned pixels. 108 * We're reading 16 pixels, and actually only want 8, 109 * but we simply ignore the extras. */ 110 pixl = vec_ld(0, s1); 111 pixr = vec_ld(15, s1); 112 bytes = vec_perm(pixl, pixr, perm1); 113 114 // Convert the bytes into shorts. 115 shorts1 = (vector signed short) vec_mergeh(zero, bytes); 116 117 // Do the same for the second block of pixels. 118 pixl = vec_ld(0, s2); 119 pixr = vec_ld(15, s2); 120 bytes = vec_perm(pixl, pixr, perm2); 121 122 // Convert the bytes into shorts. 123 shorts2 = (vector signed short) vec_mergeh(zero, bytes); 124 125 // Do the subtraction. 126 shorts1 = vec_sub(shorts1, shorts2); 127 128 // Save the data to the block, we assume the block is 16-byte aligned. 129 vec_st(shorts1, 0, (vector signed short *) block); 130 131 s1 += stride; 132 s2 += stride; 133 block += 8; 134 } 135} 136 137#endif /* HAVE_ALTIVEC */ 138 139av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, 140 AVCodecContext *avctx, 141 unsigned high_bit_depth) 142{ 143#if HAVE_ALTIVEC 144 if (!PPC_ALTIVEC(av_get_cpu_flags())) 145 return; 146 147 c->diff_pixels = diff_pixels_altivec; 148 149 if (!high_bit_depth) { 150 c->get_pixels = get_pixels_altivec; 151 } 152#endif /* HAVE_ALTIVEC */ 153} 154