FFmpeg  4.0
mpegvideoencdsp.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include "config.h"
20 
21 #include <stdint.h>
22 
23 #include "libavutil/attributes.h"
24 #include "libavutil/cpu.h"
25 #include "libavutil/ppc/cpu.h"
27 
29 
30 #if HAVE_ALTIVEC
31 
32 #if HAVE_VSX
33 static int pix_norm1_altivec(uint8_t *pix, int line_size)
34 {
35  int i, s = 0;
36  const vector unsigned int zero =
37  (const vector unsigned int) vec_splat_u32(0);
38  vector unsigned int sv = (vector unsigned int) vec_splat_u32(0);
39  vector signed int sum;
40 
41  for (i = 0; i < 16; i++) {
42  /* Read the potentially unaligned pixels. */
43  //vector unsigned char pixl = vec_ld(0, pix);
44  //vector unsigned char pixr = vec_ld(15, pix);
45  //vector unsigned char pixv = vec_perm(pixl, pixr, perm);
46  vector unsigned char pixv = vec_vsx_ld(0, pix);
47 
48  /* Square the values, and add them to our sum. */
49  sv = vec_msum(pixv, pixv, sv);
50 
51  pix += line_size;
52  }
53  /* Sum up the four partial sums, and put the result into s. */
54  sum = vec_sums((vector signed int) sv, (vector signed int) zero);
55  sum = vec_splat(sum, 3);
56  vec_ste(sum, 0, &s);
57  return s;
58 }
59 #else
60 static int pix_norm1_altivec(uint8_t *pix, int line_size)
61 {
62  int i, s = 0;
63  const vector unsigned int zero =
64  (const vector unsigned int) vec_splat_u32(0);
65  vector unsigned char perm = vec_lvsl(0, pix);
66  vector unsigned int sv = (vector unsigned int) vec_splat_u32(0);
67  vector signed int sum;
68 
69  for (i = 0; i < 16; i++) {
70  /* Read the potentially unaligned pixels. */
71  vector unsigned char pixl = vec_ld(0, pix);
72  vector unsigned char pixr = vec_ld(15, pix);
73  vector unsigned char pixv = vec_perm(pixl, pixr, perm);
74 
75  /* Square the values, and add them to our sum. */
76  sv = vec_msum(pixv, pixv, sv);
77 
78  pix += line_size;
79  }
80  /* Sum up the four partial sums, and put the result into s. */
81  sum = vec_sums((vector signed int) sv, (vector signed int) zero);
82  sum = vec_splat(sum, 3);
83  vec_ste(sum, 0, &s);
84 
85  return s;
86 }
87 #endif /* HAVE_VSX */
88 
89 #if HAVE_VSX
90 static int pix_sum_altivec(uint8_t *pix, int line_size)
91 {
92  int i, s;
93  const vector unsigned int zero =
94  (const vector unsigned int) vec_splat_u32(0);
95  vector unsigned int sad = (vector unsigned int) vec_splat_u32(0);
96  vector signed int sumdiffs;
97 
98  for (i = 0; i < 16; i++) {
99  /* Read the potentially unaligned 16 pixels into t1. */
100  //vector unsigned char pixl = vec_ld(0, pix);
101  //vector unsigned char pixr = vec_ld(15, pix);
102  //vector unsigned char t1 = vec_perm(pixl, pixr, perm);
103  vector unsigned char t1 = vec_vsx_ld(0, pix);
104 
105  /* Add each 4 pixel group together and put 4 results into sad. */
106  sad = vec_sum4s(t1, sad);
107 
108  pix += line_size;
109  }
110 
111  /* Sum up the four partial sums, and put the result into s. */
112  sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
113  sumdiffs = vec_splat(sumdiffs, 3);
114  vec_ste(sumdiffs, 0, &s);
115  return s;
116 }
117 #else
118 static int pix_sum_altivec(uint8_t *pix, int line_size)
119 {
120  int i, s;
121  const vector unsigned int zero =
122  (const vector unsigned int) vec_splat_u32(0);
123  vector unsigned char perm = vec_lvsl(0, pix);
124  vector unsigned int sad = (vector unsigned int) vec_splat_u32(0);
125  vector signed int sumdiffs;
126 
127  for (i = 0; i < 16; i++) {
128  /* Read the potentially unaligned 16 pixels into t1. */
129  vector unsigned char pixl = vec_ld(0, pix);
130  vector unsigned char pixr = vec_ld(15, pix);
131  vector unsigned char t1 = vec_perm(pixl, pixr, perm);
132 
133  /* Add each 4 pixel group together and put 4 results into sad. */
134  sad = vec_sum4s(t1, sad);
135 
136  pix += line_size;
137  }
138 
139  /* Sum up the four partial sums, and put the result into s. */
140  sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
141  sumdiffs = vec_splat(sumdiffs, 3);
142  vec_ste(sumdiffs, 0, &s);
143 
144  return s;
145 }
146 
147 #endif /* HAVE_VSX */
148 
149 #endif /* HAVE_ALTIVEC */
150 
152  AVCodecContext *avctx)
153 {
154 #if HAVE_ALTIVEC
156  return;
157 
158  c->pix_norm1 = pix_norm1_altivec;
159  c->pix_sum = pix_sum_altivec;
160 #endif /* HAVE_ALTIVEC */
161 }
const char * s
Definition: avisynth_c.h:768
av_cold void ff_mpegvideoencdsp_init_ppc(MpegvideoEncDSPContext *c, AVCodecContext *avctx)
Macro definitions for various function/variable attributes.
uint8_t
#define av_cold
Definition: attributes.h:82
#define PPC_ALTIVEC(flags)
Definition: cpu.h:25
#define t1
Definition: regdef.h:29
#define zero
Definition: regdef.h:64
int(* pix_norm1)(uint8_t *pix, int line_size)
int(* pix_sum)(uint8_t *pix, int line_size)
perm
Definition: f_perms.c:74
main external API structure.
Definition: avcodec.h:1518
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:93
Contains misc utility macros and inline functions.
int
static double c[64]