1/*
2 * Copyright (c) 2002 Brian Foley
3 * Copyright (c) 2002 Dieter Shirley
4 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23#include "config.h"
24#if HAVE_ALTIVEC_H
25#include <altivec.h>
26#endif
27
28#include "libavutil/attributes.h"
29#include "libavutil/cpu.h"
30#include "libavutil/ppc/cpu.h"
31#include "libavutil/ppc/types_altivec.h"
32#include "libavutil/ppc/util_altivec.h"
33#include "libavcodec/avcodec.h"
34#include "libavcodec/pixblockdsp.h"
35
36#if HAVE_ALTIVEC
37
38static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
39                               int line_size)
40{
41    int i;
42    vector unsigned char perm = vec_lvsl(0, pixels);
43    const vector unsigned char zero =
44        (const vector unsigned char) vec_splat_u8(0);
45
46    for (i = 0; i < 8; i++) {
47        /* Read potentially unaligned pixels.
48         * We're reading 16 pixels, and actually only want 8,
49         * but we simply ignore the extras. */
50        vector unsigned char pixl = vec_ld(0, pixels);
51        vector unsigned char pixr = vec_ld(7, pixels);
52        vector unsigned char bytes = vec_perm(pixl, pixr, perm);
53
54        // Convert the bytes into shorts.
55        vector signed short shorts = (vector signed short) vec_mergeh(zero,
56                                                                      bytes);
57
58        // Save the data to the block, we assume the block is 16-byte aligned.
59        vec_st(shorts, i * 16, (vector signed short *) block);
60
61        pixels += line_size;
62    }
63}
64
65static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
66                                const uint8_t *s2, int stride)
67{
68    int i;
69    vector unsigned char perm1 = vec_lvsl(0, s1);
70    vector unsigned char perm2 = vec_lvsl(0, s2);
71    const vector unsigned char zero =
72        (const vector unsigned char) vec_splat_u8(0);
73    vector signed short shorts1, shorts2;
74
75    for (i = 0; i < 4; i++) {
76        /* Read potentially unaligned pixels.
77         * We're reading 16 pixels, and actually only want 8,
78         * but we simply ignore the extras. */
79        vector unsigned char pixl  = vec_ld(0,  s1);
80        vector unsigned char pixr  = vec_ld(15, s1);
81        vector unsigned char bytes = vec_perm(pixl, pixr, perm1);
82
83        // Convert the bytes into shorts.
84        shorts1 = (vector signed short) vec_mergeh(zero, bytes);
85
86        // Do the same for the second block of pixels.
87        pixl  = vec_ld(0,  s2);
88        pixr  = vec_ld(15, s2);
89        bytes = vec_perm(pixl, pixr, perm2);
90
91        // Convert the bytes into shorts.
92        shorts2 = (vector signed short) vec_mergeh(zero, bytes);
93
94        // Do the subtraction.
95        shorts1 = vec_sub(shorts1, shorts2);
96
97        // Save the data to the block, we assume the block is 16-byte aligned.
98        vec_st(shorts1, 0, (vector signed short *) block);
99
100        s1    += stride;
101        s2    += stride;
102        block += 8;
103
104        /* The code below is a copy of the code above...
105         * This is a manual unroll. */
106
107        /* Read potentially unaligned pixels.
108         * We're reading 16 pixels, and actually only want 8,
109         * but we simply ignore the extras. */
110        pixl  = vec_ld(0,  s1);
111        pixr  = vec_ld(15, s1);
112        bytes = vec_perm(pixl, pixr, perm1);
113
114        // Convert the bytes into shorts.
115        shorts1 = (vector signed short) vec_mergeh(zero, bytes);
116
117        // Do the same for the second block of pixels.
118        pixl  = vec_ld(0,  s2);
119        pixr  = vec_ld(15, s2);
120        bytes = vec_perm(pixl, pixr, perm2);
121
122        // Convert the bytes into shorts.
123        shorts2 = (vector signed short) vec_mergeh(zero, bytes);
124
125        // Do the subtraction.
126        shorts1 = vec_sub(shorts1, shorts2);
127
128        // Save the data to the block, we assume the block is 16-byte aligned.
129        vec_st(shorts1, 0, (vector signed short *) block);
130
131        s1    += stride;
132        s2    += stride;
133        block += 8;
134    }
135}
136
137#endif /* HAVE_ALTIVEC */
138
139av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c,
140                                     AVCodecContext *avctx,
141                                     unsigned high_bit_depth)
142{
143#if HAVE_ALTIVEC
144    if (!PPC_ALTIVEC(av_get_cpu_flags()))
145        return;
146
147    c->diff_pixels = diff_pixels_altivec;
148
149    if (!high_bit_depth) {
150        c->get_pixels = get_pixels_altivec;
151    }
152#endif /* HAVE_ALTIVEC */
153}
154