40 vector
unsigned char perm =
41 (vector
unsigned char) {0x00,0x10, 0x01,0x11,0x02,0x12,0x03,0x13,\
42 0x04,0x14,0x05,0x15,0x06,0x16,0x07,0x17};
43 const vector
unsigned char zero =
44 (
const vector
unsigned char) vec_splat_u8(0);
46 for (i = 0; i < 8; i++) {
50 vector
unsigned char bytes = vec_vsx_ld(0, pixels);
54 vector
signed short shorts = (vector
signed short) vec_perm(bytes, zero, perm);
57 vec_vsx_st(shorts, i * 16, (vector
signed short *) block);
63 static void get_pixels_altivec(int16_t *restrict block,
const uint8_t *pixels,
69 for (i = 0; i < 8; i++) {
70 vec_u8 perm = vec_lvsl(0, pixels);
74 vec_u8 pixl = vec_ld(0, pixels);
75 vec_u8 pixr = vec_ld(7, pixels);
76 vec_u8 bytes = vec_perm(pixl, pixr, perm);
82 vec_st(shorts, i * 16, (
vec_s16 *)block);
91 static void diff_pixels_altivec(int16_t *restrict block,
const uint8_t *
s1,
95 const vector
unsigned char zero =
96 (
const vector
unsigned char) vec_splat_u8(0);
97 vector
signed short shorts1, shorts2;
99 for (i = 0; i < 4; i++) {
103 vector
unsigned char bytes = vec_vsx_ld(0, s1);
106 shorts1 = (vector
signed short) vec_mergeh(bytes, zero);
109 bytes =vec_vsx_ld(0, s2);
112 shorts2 = (vector
signed short) vec_mergeh(bytes, zero);
115 shorts1 = vec_sub(shorts1, shorts2);
118 vec_vsx_st(shorts1, 0, (vector
signed short *) block);
130 bytes = vec_vsx_ld(0, s1);
133 shorts1 = (vector
signed short) vec_mergeh(bytes, zero);
136 bytes = vec_vsx_ld(0, s2);
139 shorts2 = (vector
signed short) vec_mergeh(bytes, zero);
142 shorts1 = vec_sub(shorts1, shorts2);
145 vec_vsx_st(shorts1, 0, (vector
signed short *) block);
153 static void diff_pixels_altivec(int16_t *restrict block,
const uint8_t *s1,
154 const uint8_t *s2, ptrdiff_t stride)
161 for (i = 0; i < 4; i++) {
165 perm = vec_lvsl(0, s1);
166 vec_u8 pixl = vec_ld(0, s1);
167 vec_u8 pixr = vec_ld(15, s1);
168 vec_u8 bytes = vec_perm(pixl, pixr, perm);
171 shorts1 = (
vec_s16)vec_mergeh(zero, bytes);
174 perm = vec_lvsl(0, s2);
175 pixl = vec_ld(0, s2);
176 pixr = vec_ld(15, s2);
177 bytes = vec_perm(pixl, pixr, perm);
180 shorts2 = (
vec_s16)vec_mergeh(zero, bytes);
183 shorts1 = vec_sub(shorts1, shorts2);
186 vec_st(shorts1, 0, (
vec_s16 *)block);
198 perm = vec_lvsl(0, s1);
199 pixl = vec_ld(0, s1);
200 pixr = vec_ld(15, s1);
201 bytes = vec_perm(pixl, pixr, perm);
204 shorts1 = (
vec_s16)vec_mergeh(zero, bytes);
207 perm = vec_lvsl(0, s2);
208 pixl = vec_ld(0, s2);
209 pixr = vec_ld(15, s2);
210 bytes = vec_perm(pixl, pixr, perm);
213 shorts2 = (
vec_s16)vec_mergeh(zero, bytes);
216 shorts1 = vec_sub(shorts1, shorts2);
219 vec_st(shorts1, 0, (
vec_s16 *)block);
232 static void get_pixels_vsx(int16_t *restrict block,
const uint8_t *pixels,
236 for (i = 0; i < 8; i++) {
237 vec_s16 shorts = vsx_ld_u8_s16(0, pixels);
239 vec_vsx_st(shorts, i * 16, block);
245 static void diff_pixels_vsx(int16_t *restrict block,
const uint8_t *s1,
246 const uint8_t *s2, ptrdiff_t stride)
250 for (i = 0; i < 8; i++) {
251 shorts1 = vsx_ld_u8_s16(0, s1);
252 shorts2 = vsx_ld_u8_s16(0, s2);
254 shorts1 = vec_sub(shorts1, shorts2);
256 vec_vsx_st(shorts1, 0, block);
267 unsigned high_bit_depth)
275 if (!high_bit_depth) {
Macro definitions for various function/variable attributes.
void(* diff_pixels)(int16_t *av_restrict block, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride)
#define PPC_ALTIVEC(flags)
Libavcodec external API header.
main external API structure.
av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Contains misc utility macros and inline functions.
void(* get_pixels)(int16_t *av_restrict block, const uint8_t *pixels, ptrdiff_t stride)