FFmpeg  4.0
h264pred_mmi.c
Go to the documentation of this file.
1 /*
2  * Loongson SIMD optimized h264pred
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  * Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 #include "h264pred_mips.h"
28 #include "constants.h"
29 
31 {
32  double ftmp[2];
33  uint64_t tmp[1];
34  DECLARE_VAR_ALL64;
35 
36  __asm__ volatile (
37  "dli %[tmp0], 0x08 \n\t"
38  MMI_LDC1(%[ftmp0], %[srcA], 0x00)
39  MMI_LDC1(%[ftmp1], %[srcA], 0x08)
40 
41  "1: \n\t"
42  MMI_SDC1(%[ftmp0], %[src], 0x00)
43  MMI_SDC1(%[ftmp1], %[src], 0x08)
44  PTR_ADDU "%[src], %[src], %[stride] \n\t"
45  MMI_SDC1(%[ftmp0], %[src], 0x00)
46  MMI_SDC1(%[ftmp1], %[src], 0x08)
47 
48  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
49  PTR_ADDU "%[src], %[src], %[stride] \n\t"
50  "bnez %[tmp0], 1b \n\t"
51  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
52  [tmp0]"=&r"(tmp[0]),
53  RESTRICT_ASM_ALL64
54  [src]"+&r"(src)
55  : [stride]"r"((mips_reg)stride), [srcA]"r"((mips_reg)(src-stride))
56  : "memory"
57  );
58 }
59 
61 {
62  uint64_t tmp[3];
63  mips_reg addr[2];
64 
65  __asm__ volatile (
66  PTR_ADDI "%[addr0], %[src], -0x01 \n\t"
67  PTR_ADDU "%[addr1], %[src], $0 \n\t"
68  "dli %[tmp2], 0x08 \n\t"
69  "1: \n\t"
70  "lbu %[tmp0], 0x00(%[addr0]) \n\t"
71  "dmul %[tmp1], %[tmp0], %[ff_pb_1] \n\t"
72  "swl %[tmp1], 0x07(%[addr1]) \n\t"
73  "swr %[tmp1], 0x00(%[addr1]) \n\t"
74  "swl %[tmp1], 0x0f(%[addr1]) \n\t"
75  "swr %[tmp1], 0x08(%[addr1]) \n\t"
76  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
77  PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t"
78  "lbu %[tmp0], 0x00(%[addr0]) \n\t"
79  "dmul %[tmp1], %[tmp0], %[ff_pb_1] \n\t"
80  "swl %[tmp1], 0x07(%[addr1]) \n\t"
81  "swr %[tmp1], 0x00(%[addr1]) \n\t"
82  "swl %[tmp1], 0x0f(%[addr1]) \n\t"
83  "swr %[tmp1], 0x08(%[addr1]) \n\t"
84  "daddi %[tmp2], %[tmp2], -0x01 \n\t"
85  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
86  PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t"
87  "bnez %[tmp2], 1b \n\t"
88  : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
89  [tmp2]"=&r"(tmp[2]),
90  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
91  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride),
92  [ff_pb_1]"r"(ff_pb_1)
93  : "memory"
94  );
95 }
96 
98 {
99  uint64_t tmp[4];
100  mips_reg addr[2];
101 
102  __asm__ volatile (
103  PTR_ADDI "%[addr0], %[src], -0x01 \n\t"
104  "dli %[tmp0], 0x08 \n\t"
105  "xor %[tmp3], %[tmp3], %[tmp3] \n\t"
106  "1: \n\t"
107  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
108  "daddu %[tmp3], %[tmp3], %[tmp1] \n\t"
109  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
110  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
111  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
112  "daddu %[tmp3], %[tmp3], %[tmp1] \n\t"
113  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
114  "bnez %[tmp0], 1b \n\t"
115 
116  "dli %[tmp0], 0x08 \n\t"
117  PTR_SUBU "%[addr0], %[src], %[stride] \n\t"
118  "2: \n\t"
119  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
120  "daddu %[tmp3], %[tmp3], %[tmp1] \n\t"
121  PTR_ADDIU "%[addr0], %[addr0], 0x01 \n\t"
122  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
123  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
124  "daddu %[tmp3], %[tmp3], %[tmp1] \n\t"
125  PTR_ADDIU "%[addr0], %[addr0], 0x01 \n\t"
126  "bnez %[tmp0], 2b \n\t"
127 
128  "daddiu %[tmp3], %[tmp3], 0x10 \n\t"
129  "dsra %[tmp3], 0x05 \n\t"
130  "dmul %[tmp2], %[tmp3], %[ff_pb_1] \n\t"
131  PTR_ADDU "%[addr0], %[src], $0 \n\t"
132  "dli %[tmp0], 0x08 \n\t"
133  "3: \n\t"
134  "swl %[tmp2], 0x07(%[addr0]) \n\t"
135  "swr %[tmp2], 0x00(%[addr0]) \n\t"
136  "swl %[tmp2], 0x0f(%[addr0]) \n\t"
137  "swr %[tmp2], 0x08(%[addr0]) \n\t"
138  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
139  "swl %[tmp2], 0x07(%[addr0]) \n\t"
140  "swr %[tmp2], 0x00(%[addr0]) \n\t"
141  "swl %[tmp2], 0x0f(%[addr0]) \n\t"
142  "swr %[tmp2], 0x08(%[addr0]) \n\t"
143  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
144  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
145  "bnez %[tmp0], 3b \n\t"
146  : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
147  [tmp2]"=&r"(tmp[2]), [tmp3]"=&r"(tmp[3]),
148  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
149  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride),
150  [ff_pb_1]"r"(ff_pb_1)
151  : "memory"
152  );
153 }
154 
155 void ff_pred8x8l_top_dc_8_mmi(uint8_t *src, int has_topleft,
156  int has_topright, ptrdiff_t stride)
157 {
158  uint32_t dc;
159  double ftmp[11];
160  mips_reg tmp[3];
161  DECLARE_VAR_ALL64;
162  DECLARE_VAR_ADDRT;
163 
164  __asm__ volatile (
165  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
166  MMI_ULDC1(%[ftmp10], %[srcA], 0x00)
167  MMI_ULDC1(%[ftmp9], %[src0], 0x00)
168  MMI_ULDC1(%[ftmp8], %[src1], 0x00)
169 
170  "punpcklbh %[ftmp7], %[ftmp10], %[ftmp0] \n\t"
171  "punpckhbh %[ftmp6], %[ftmp10], %[ftmp0] \n\t"
172  "punpcklbh %[ftmp5], %[ftmp9], %[ftmp0] \n\t"
173  "punpckhbh %[ftmp4], %[ftmp9], %[ftmp0] \n\t"
174  "punpcklbh %[ftmp3], %[ftmp8], %[ftmp0] \n\t"
175  "punpckhbh %[ftmp2], %[ftmp8], %[ftmp0] \n\t"
176  "bnez %[has_topleft], 1f \n\t"
177  "pinsrh_0 %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
178 
179  "1: \n\t"
180  "bnez %[has_topright], 2f \n\t"
181  "pinsrh_3 %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
182 
183  "2: \n\t"
184  "dli %[tmp0], 0x02 \n\t"
185  "mtc1 %[tmp0], %[ftmp1] \n\t"
186  "pmullh %[ftmp5], %[ftmp5], %[ff_pw_2] \n\t"
187  "pmullh %[ftmp4], %[ftmp4], %[ff_pw_2] \n\t"
188  "paddh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
189  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
190  "paddh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
191  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
192  "paddh %[ftmp7], %[ftmp7], %[ff_pw_2] \n\t"
193  "paddh %[ftmp6], %[ftmp6], %[ff_pw_2] \n\t"
194  "psrah %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
195  "psrah %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
196  "packushb %[ftmp9], %[ftmp7], %[ftmp6] \n\t"
197  "biadd %[ftmp10], %[ftmp9] \n\t"
198  "mfc1 %[tmp1], %[ftmp10] \n\t"
199  "addiu %[tmp1], %[tmp1], 0x04 \n\t"
200  "srl %[tmp1], %[tmp1], 0x03 \n\t"
201  "mul %[dc], %[tmp1], %[ff_pb_1] \n\t"
202  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
203  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
204  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
205  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
206  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
207  [ftmp10]"=&f"(ftmp[10]),
208  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
209  RESTRICT_ASM_ALL64
210  [dc]"=r"(dc)
211  : [srcA]"r"((mips_reg)(src-stride-1)),
212  [src0]"r"((mips_reg)(src-stride)),
213  [src1]"r"((mips_reg)(src-stride+1)),
214  [has_topleft]"r"(has_topleft), [has_topright]"r"(has_topright),
215  [ff_pb_1]"r"(ff_pb_1), [ff_pw_2]"f"(ff_pw_2)
216  : "memory"
217  );
218 
219  __asm__ volatile (
220  "dli %[tmp0], 0x02 \n\t"
221  "punpcklwd %[ftmp0], %[dc], %[dc] \n\t"
222 
223  "1: \n\t"
224  MMI_SDC1(%[ftmp0], %[src], 0x00)
225  MMI_SDXC1(%[ftmp0], %[src], %[stride], 0x00)
226  PTR_ADDU "%[src], %[src], %[stride] \n\t"
227  PTR_ADDU "%[src], %[src], %[stride] \n\t"
228  MMI_SDC1(%[ftmp0], %[src], 0x00)
229  MMI_SDXC1(%[ftmp0], %[src], %[stride], 0x00)
230 
231  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
232  PTR_ADDU "%[src], %[src], %[stride] \n\t"
233  PTR_ADDU "%[src], %[src], %[stride] \n\t"
234  "bnez %[tmp0], 1b \n\t"
235  : [ftmp0]"=&f"(ftmp[0]), [tmp0]"=&r"(tmp[0]),
236  RESTRICT_ASM_ALL64
237  RESTRICT_ASM_ADDRT
238  [src]"+&r"(src)
239  : [dc]"f"(dc), [stride]"r"((mips_reg)stride)
240  : "memory"
241  );
242 }
243 
244 void ff_pred8x8l_dc_8_mmi(uint8_t *src, int has_topleft, int has_topright,
245  ptrdiff_t stride)
246 {
247  uint32_t dc, dc1, dc2;
248  double ftmp[14];
249  mips_reg tmp[1];
250 
251  const int l0 = ((has_topleft ? src[-1+-1*stride] : src[-1+0*stride]) + 2*src[-1+0*stride] + src[-1+1*stride] + 2) >> 2;
252  const int l1 = (src[-1+0*stride] + 2*src[-1+1*stride] + src[-1+2*stride] + 2) >> 2;
253  const int l2 = (src[-1+1*stride] + 2*src[-1+2*stride] + src[-1+3*stride] + 2) >> 2;
254  const int l3 = (src[-1+2*stride] + 2*src[-1+3*stride] + src[-1+4*stride] + 2) >> 2;
255  const int l4 = (src[-1+3*stride] + 2*src[-1+4*stride] + src[-1+5*stride] + 2) >> 2;
256  const int l5 = (src[-1+4*stride] + 2*src[-1+5*stride] + src[-1+6*stride] + 2) >> 2;
257  const int l6 = (src[-1+5*stride] + 2*src[-1+6*stride] + src[-1+7*stride] + 2) >> 2;
258  const int l7 = (src[-1+6*stride] + 2*src[-1+7*stride] + src[-1+7*stride] + 2) >> 2;
259 
260  DECLARE_VAR_ALL64;
261  DECLARE_VAR_ADDRT;
262 
263  __asm__ volatile (
264  MMI_ULDC1(%[ftmp4], %[srcA], 0x00)
265  MMI_ULDC1(%[ftmp5], %[src0], 0x00)
266  MMI_ULDC1(%[ftmp6], %[src1], 0x00)
267  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
268  "dli %[tmp0], 0x03 \n\t"
269  "punpcklbh %[ftmp7], %[ftmp4], %[ftmp0] \n\t"
270  "punpckhbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t"
271  "mtc1 %[tmp0], %[ftmp1] \n\t"
272  "punpcklbh %[ftmp9], %[ftmp5], %[ftmp0] \n\t"
273  "punpckhbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t"
274  "punpcklbh %[ftmp11], %[ftmp6], %[ftmp0] \n\t"
275  "punpckhbh %[ftmp12], %[ftmp6], %[ftmp0] \n\t"
276  "pshufh %[ftmp3], %[ftmp8], %[ftmp1] \n\t"
277  "pshufh %[ftmp13], %[ftmp12], %[ftmp1] \n\t"
278  "pinsrh_3 %[ftmp8], %[ftmp8], %[ftmp13] \n\t"
279  "pinsrh_3 %[ftmp12], %[ftmp12], %[ftmp3] \n\t"
280  "bnez %[has_topleft], 1f \n\t"
281  "pinsrh_0 %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
282 
283  "1: \n\t"
284  "bnez %[has_topright], 2f \n\t"
285  "pshufh %[ftmp13], %[ftmp10], %[ftmp1] \n\t"
286  "pinsrh_3 %[ftmp8], %[ftmp8], %[ftmp13] \n\t"
287 
288  "2: \n\t"
289  "dli %[tmp0], 0x02 \n\t"
290  "mtc1 %[tmp0], %[ftmp1] \n\t"
291  "pshufh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
292  "pmullh %[ftmp9], %[ftmp9], %[ftmp2] \n\t"
293  "pmullh %[ftmp10], %[ftmp10], %[ftmp2] \n\t"
294  "paddh %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
295  "paddh %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
296  "paddh %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
297  "paddh %[ftmp8], %[ftmp8], %[ftmp12] \n\t"
298  "paddh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
299  "paddh %[ftmp8], %[ftmp8], %[ftmp2] \n\t"
300  "psrah %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
301  "psrah %[ftmp8], %[ftmp8], %[ftmp1] \n\t"
302  "packushb %[ftmp5], %[ftmp7], %[ftmp8] \n\t"
303  "biadd %[ftmp4], %[ftmp5] \n\t"
304  "mfc1 %[dc2], %[ftmp4] \n\t"
305  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
306  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
307  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
308  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
309  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
310  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
311  [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
312  [tmp0]"=&r"(tmp[0]),
313  RESTRICT_ASM_ALL64
314  [dc2]"=r"(dc2)
315  : [srcA]"r"((mips_reg)(src-stride-1)),
316  [src0]"r"((mips_reg)(src-stride)),
317  [src1]"r"((mips_reg)(src-stride+1)),
318  [has_topleft]"r"(has_topleft), [has_topright]"r"(has_topright)
319  : "memory"
320  );
321 
322  dc1 = l0+l1+l2+l3+l4+l5+l6+l7;
323  dc = ((dc1+dc2+8)>>4)*0x01010101U;
324 
325  __asm__ volatile (
326  "dli %[tmp0], 0x02 \n\t"
327  "punpcklwd %[ftmp0], %[dc], %[dc] \n\t"
328 
329  "1: \n\t"
330  MMI_SDC1(%[ftmp0], %[src], 0x00)
331  MMI_SDXC1(%[ftmp0], %[src], %[stride], 0x00)
332  PTR_ADDU "%[src], %[src], %[stride] \n\t"
333  PTR_ADDU "%[src], %[src], %[stride] \n\t"
334  MMI_SDC1(%[ftmp0], %[src], 0x00)
335  MMI_SDXC1(%[ftmp0], %[src], %[stride], 0x00)
336 
337  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
338  PTR_ADDU "%[src], %[src], %[stride] \n\t"
339  PTR_ADDU "%[src], %[src], %[stride] \n\t"
340  "bnez %[tmp0], 1b \n\t"
341  : [ftmp0]"=&f"(ftmp[0]), [tmp0]"=&r"(tmp[0]),
342  RESTRICT_ASM_ALL64
343  RESTRICT_ASM_ADDRT
344  [src]"+&r"(src)
345  : [dc]"f"(dc), [stride]"r"((mips_reg)stride)
346  : "memory"
347  );
348 }
349 
350 void ff_pred8x8l_vertical_8_mmi(uint8_t *src, int has_topleft,
351  int has_topright, ptrdiff_t stride)
352 {
353  double ftmp[12];
354  mips_reg tmp[1];
355  DECLARE_VAR_ALL64;
356 
357  __asm__ volatile (
358  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
359  MMI_LDC1(%[ftmp3], %[srcA], 0x00)
360  MMI_LDC1(%[ftmp4], %[src0], 0x00)
361  MMI_LDC1(%[ftmp5], %[src1], 0x00)
362  "punpcklbh %[ftmp6], %[ftmp3], %[ftmp0] \n\t"
363  "punpckhbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t"
364  "punpcklbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t"
365  "punpckhbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t"
366  "punpcklbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t"
367  "punpckhbh %[ftmp11], %[ftmp5], %[ftmp0] \n\t"
368  "bnez %[has_topleft], 1f \n\t"
369  "pinsrh_0 %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
370 
371  "1: \n\t"
372  "bnez %[has_topright], 2f \n\t"
373  "pinsrh_3 %[ftmp11], %[ftmp11], %[ftmp9] \n\t"
374 
375  "2: \n\t"
376  "dli %[tmp0], 0x02 \n\t"
377  "mtc1 %[tmp0], %[ftmp1] \n\t"
378  "pshufh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
379  "pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t"
380  "pmullh %[ftmp9], %[ftmp9], %[ftmp2] \n\t"
381  "paddh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
382  "paddh %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
383  "paddh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
384  "paddh %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
385  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
386  "paddh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
387  "psrah %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
388  "psrah %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
389  "packushb %[ftmp4], %[ftmp6], %[ftmp7] \n\t"
390  MMI_SDC1(%[ftmp4], %[src], 0x00)
391  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
392  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
393  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
394  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
395  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
396  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
397  [tmp0]"=&r"(tmp[0]),
398  RESTRICT_ASM_ALL64
399  [src]"=r"(src)
400  : [srcA]"r"((mips_reg)(src-stride-1)),
401  [src0]"r"((mips_reg)(src-stride)),
402  [src1]"r"((mips_reg)(src-stride+1)),
403  [has_topleft]"r"(has_topleft), [has_topright]"r"(has_topright)
404  : "memory"
405  );
406 
407  __asm__ volatile (
408  "dli %[tmp0], 0x02 \n\t"
409 
410  "1: \n\t"
411  MMI_SDC1(%[ftmp0], %[src], 0x00)
412  PTR_ADDU "%[src], %[src], %[stride] \n\t"
413  MMI_SDC1(%[ftmp0], %[src], 0x00)
414  PTR_ADDU "%[src], %[src], %[stride] \n\t"
415  MMI_SDC1(%[ftmp0], %[src], 0x00)
416  PTR_ADDU "%[src], %[src], %[stride] \n\t"
417  MMI_SDC1(%[ftmp0], %[src], 0x00)
418 
419  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
420  PTR_ADDU "%[src], %[src], %[stride] \n\t"
421  "bnez %[tmp0], 1b \n\t"
422  : [ftmp0]"=&f"(ftmp[0]), [tmp0]"=&r"(tmp[0]),
423  RESTRICT_ASM_ALL64
424  [src]"+&r"(src)
425  : [stride]"r"((mips_reg)stride)
426  : "memory"
427  );
428 }
429 
430 void ff_pred4x4_dc_8_mmi(uint8_t *src, const uint8_t *topright,
431  ptrdiff_t stride)
432 {
433  const int dc = (src[-stride] + src[1-stride] + src[2-stride]
434  + src[3-stride] + src[-1+0*stride] + src[-1+1*stride]
435  + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
436  uint64_t tmp[2];
437  mips_reg addr[1];
438  DECLARE_VAR_ADDRT;
439 
440  __asm__ volatile (
441  PTR_ADDU "%[tmp0], %[dc], $0 \n\t"
442  "dmul %[tmp1], %[tmp0], %[ff_pb_1] \n\t"
443  "xor %[addr0], %[addr0], %[addr0] \n\t"
444  MMI_SWX(%[tmp1], %[src], %[addr0], 0x00)
445  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
446  MMI_SWX(%[tmp1], %[src], %[addr0], 0x00)
447  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
448  MMI_SWX(%[tmp1], %[src], %[addr0], 0x00)
449  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
450  MMI_SWX(%[tmp1], %[src], %[addr0], 0x00)
451  : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
452  RESTRICT_ASM_ADDRT
453  [addr0]"=&r"(addr[0])
454  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride),
455  [dc]"r"(dc), [ff_pb_1]"r"(ff_pb_1)
456  : "memory"
457  );
458 }
459 
461 {
462  uint64_t tmp[2];
463  mips_reg addr[2];
464 
465  __asm__ volatile (
466  PTR_SUBU "%[addr0], %[src], %[stride] \n\t"
467  PTR_ADDU "%[addr1], %[src], $0 \n\t"
468  "ldl %[tmp0], 0x07(%[addr0]) \n\t"
469  "ldr %[tmp0], 0x00(%[addr0]) \n\t"
470  "dli %[tmp1], 0x04 \n\t"
471  "1: \n\t"
472  "sdl %[tmp0], 0x07(%[addr1]) \n\t"
473  "sdr %[tmp0], 0x00(%[addr1]) \n\t"
474  PTR_ADDU "%[addr1], %[stride] \n\t"
475  "sdl %[tmp0], 0x07(%[addr1]) \n\t"
476  "sdr %[tmp0], 0x00(%[addr1]) \n\t"
477  "daddi %[tmp1], -0x01 \n\t"
478  PTR_ADDU "%[addr1], %[stride] \n\t"
479  "bnez %[tmp1], 1b \n\t"
480  : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
481  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
482  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride)
483  : "memory"
484  );
485 }
486 
488 {
489  uint64_t tmp[3];
490  mips_reg addr[2];
491 
492  __asm__ volatile (
493  PTR_ADDI "%[addr0], %[src], -0x01 \n\t"
494  PTR_ADDU "%[addr1], %[src], $0 \n\t"
495  "dli %[tmp0], 0x04 \n\t"
496  "1: \n\t"
497  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
498  "dmul %[tmp2], %[tmp1], %[ff_pb_1] \n\t"
499  "swl %[tmp2], 0x07(%[addr1]) \n\t"
500  "swr %[tmp2], 0x00(%[addr1]) \n\t"
501  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
502  PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t"
503  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
504  "dmul %[tmp2], %[tmp1], %[ff_pb_1] \n\t"
505  "swl %[tmp2], 0x07(%[addr1]) \n\t"
506  "swr %[tmp2], 0x00(%[addr1]) \n\t"
507  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
508  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
509  PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t"
510  "bnez %[tmp0], 1b \n\t"
511  : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
512  [tmp2]"=&r"(tmp[2]),
513  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
514  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride),
515  [ff_pb_1]"r"(ff_pb_1)
516  : "memory"
517  );
518 }
519 
521 {
522  double ftmp[4];
523  uint64_t tmp[1];
524  mips_reg addr[1];
525  DECLARE_VAR_ALL64;
526 
527  __asm__ volatile (
528  "dli %[tmp0], 0x02 \n\t"
529  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
530  PTR_SUBU "%[addr0], %[src], %[stride] \n\t"
531  MMI_LDC1(%[ftmp1], %[addr0], 0x00)
532  "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
533  "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
534  "biadd %[ftmp2], %[ftmp2] \n\t"
535  "biadd %[ftmp3], %[ftmp3] \n\t"
536  "mtc1 %[tmp0], %[ftmp1] \n\t"
537  "pshufh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
538  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
539  "pshufh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
540  "paddush %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
541  "paddush %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
542  "mtc1 %[tmp0], %[ftmp1] \n\t"
543  "psrlh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
544  "psrlh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
545  "packushb %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
546  MMI_SDC1(%[ftmp1], %[src], 0x00)
547  PTR_ADDU "%[src], %[src], %[stride] \n\t"
548  MMI_SDC1(%[ftmp1], %[src], 0x00)
549  PTR_ADDU "%[src], %[src], %[stride] \n\t"
550  MMI_SDC1(%[ftmp1], %[src], 0x00)
551  PTR_ADDU "%[src], %[src], %[stride] \n\t"
552  MMI_SDC1(%[ftmp1], %[src], 0x00)
553  PTR_ADDU "%[src], %[src], %[stride] \n\t"
554  MMI_SDC1(%[ftmp1], %[src], 0x00)
555  PTR_ADDU "%[src], %[src], %[stride] \n\t"
556  MMI_SDC1(%[ftmp1], %[src], 0x00)
557  PTR_ADDU "%[src], %[src], %[stride] \n\t"
558  MMI_SDC1(%[ftmp1], %[src], 0x00)
559  PTR_ADDU "%[src], %[src], %[stride] \n\t"
560  MMI_SDC1(%[ftmp1], %[src], 0x00)
561  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
562  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
563  [tmp0]"=&r"(tmp[0]),
564  RESTRICT_ASM_ALL64
565  [addr0]"=&r"(addr[0]),
566  [src]"+&r"(src)
567  : [stride]"r"((mips_reg)stride)
568  : "memory"
569  );
570 }
571 
573 {
574  double ftmp[5];
575  mips_reg addr[7];
576 
577  __asm__ volatile (
578  "negu %[addr0], %[stride] \n\t"
579  PTR_ADDU "%[addr0], %[addr0], %[src] \n\t"
580  PTR_ADDIU "%[addr1], %[addr0], 0x04 \n\t"
581  "lbu %[addr2], 0x00(%[addr0]) \n\t"
582  PTR_ADDU "%[addr3], $0, %[addr2] \n\t"
583  PTR_ADDIU "%[addr0], 0x01 \n\t"
584  "lbu %[addr2], 0x00(%[addr1]) \n\t"
585  PTR_ADDU "%[addr4], $0, %[addr2] \n\t"
586  PTR_ADDIU "%[addr1], 0x01 \n\t"
587  "lbu %[addr2], 0x00(%[addr0]) \n\t"
588  PTR_ADDU "%[addr3], %[addr3], %[addr2] \n\t"
589  PTR_ADDIU "%[addr0], 0x01 \n\t"
590  "lbu %[addr2], 0x00(%[addr1]) \n\t"
591  PTR_ADDU "%[addr4], %[addr4], %[addr2] \n\t"
592  PTR_ADDIU "%[addr1], 0x01 \n\t"
593  "lbu %[addr2], 0x00(%[addr0]) \n\t"
594  PTR_ADDU "%[addr3], %[addr3], %[addr2] \n\t"
595  PTR_ADDIU "%[addr0], 0x01 \n\t"
596  "lbu %[addr2], 0x00(%[addr1]) \n\t"
597  PTR_ADDU "%[addr4], %[addr4], %[addr2] \n\t"
598  PTR_ADDIU "%[addr1], 0x01 \n\t"
599  "lbu %[addr2], 0x00(%[addr0]) \n\t"
600  PTR_ADDU "%[addr3], %[addr3], %[addr2] \n\t"
601  PTR_ADDIU "%[addr0], 0x01 \n\t"
602  "lbu %[addr2], 0x00(%[addr1]) \n\t"
603  PTR_ADDU "%[addr4], %[addr4], %[addr2] \n\t"
604  PTR_ADDIU "%[addr1], 0x01 \n\t"
605  "dli %[addr2], -0x01 \n\t"
606  PTR_ADDU "%[addr2], %[addr2], %[src] \n\t"
607  "lbu %[addr1], 0x00(%[addr2]) \n\t"
608  PTR_ADDU "%[addr5], $0, %[addr1] \n\t"
609  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
610  "lbu %[addr1], 0x00(%[addr2]) \n\t"
611  PTR_ADDU "%[addr5], %[addr5], %[addr1] \n\t"
612  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
613  "lbu %[addr1], 0x00(%[addr2]) \n\t"
614  PTR_ADDU "%[addr5], %[addr5], %[addr1] \n\t"
615  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
616  "lbu %[addr1], 0x00(%[addr2]) \n\t"
617  PTR_ADDU "%[addr5], %[addr5], %[addr1] \n\t"
618  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
619  "lbu %[addr1], 0x00(%[addr2]) \n\t"
620  PTR_ADDU "%[addr6], $0, %[addr1] \n\t"
621  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
622  "lbu %[addr1], 0x00(%[addr2]) \n\t"
623  PTR_ADDU "%[addr6], %[addr6], %[addr1] \n\t"
624  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
625  "lbu %[addr1], 0x00(%[addr2]) \n\t"
626  PTR_ADDU "%[addr6], %[addr6], %[addr1] \n\t"
627  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
628  "lbu %[addr1], 0x00(%[addr2]) \n\t"
629  PTR_ADDU "%[addr6], %[addr6], %[addr1] \n\t"
630  PTR_ADDU "%[addr3], %[addr3], %[addr5] \n\t"
631  PTR_ADDIU "%[addr3], %[addr3], 0x04 \n\t"
632  PTR_ADDIU "%[addr4], %[addr4], 0x02 \n\t"
633  PTR_ADDIU "%[addr1], %[addr6], 0x02 \n\t"
634  PTR_ADDU "%[addr2], %[addr4], %[addr1] \n\t"
635  PTR_SRL "%[addr3], 0x03 \n\t"
636  PTR_SRL "%[addr4], 0x02 \n\t"
637  PTR_SRL "%[addr1], 0x02 \n\t"
638  PTR_SRL "%[addr2], 0x03 \n\t"
639  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
640  "dmtc1 %[addr3], %[ftmp1] \n\t"
641  "pshufh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
642  "dmtc1 %[addr4], %[ftmp2] \n\t"
643  "pshufh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
644  "dmtc1 %[addr1], %[ftmp3] \n\t"
645  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
646  "dmtc1 %[addr2], %[ftmp4] \n\t"
647  "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
648  "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
649  "packushb %[ftmp2], %[ftmp3], %[ftmp4] \n\t"
650  PTR_ADDU "%[addr0], $0, %[src] \n\t"
651  MMI_SDC1(%[ftmp1], %[addr0], 0x00)
652  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
653  MMI_SDC1(%[ftmp1], %[addr0], 0x00)
654  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
655  MMI_SDC1(%[ftmp1], %[addr0], 0x00)
656  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
657  MMI_SDC1(%[ftmp1], %[addr0], 0x00)
658  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
659  MMI_SDC1(%[ftmp2], %[addr0], 0x00)
660  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
661  MMI_SDC1(%[ftmp2], %[addr0], 0x00)
662  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
663  MMI_SDC1(%[ftmp2], %[addr0], 0x00)
664  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
665  MMI_SDC1(%[ftmp2], %[addr0], 0x00)
666  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
667  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
668  [ftmp4]"=&f"(ftmp[4]),
669  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
670  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
671  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
672  [addr6]"=&r"(addr[6])
673  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride)
674  : "memory"
675  );
676 }
677 
679 {
680  double ftmp[1];
681  uint64_t tmp[1];
682  DECLARE_VAR_ALL64;
683 
684  __asm__ volatile (
685  MMI_LDC1(%[ftmp0], %[srcA], 0x00)
686  "dli %[tmp0], 0x04 \n\t"
687 
688  "1: \n\t"
689  MMI_SDC1(%[ftmp0], %[src], 0x00)
690  PTR_ADDU "%[src], %[src], %[stride] \n\t"
691  MMI_SDC1(%[ftmp0], %[src], 0x00)
692  PTR_ADDU "%[src], %[src], %[stride] \n\t"
693  MMI_SDC1(%[ftmp0], %[src], 0x00)
694  PTR_ADDU "%[src], %[src], %[stride] \n\t"
695  MMI_SDC1(%[ftmp0], %[src], 0x00)
696 
697  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
698  PTR_ADDU "%[src], %[src], %[stride] \n\t"
699  "bnez %[tmp0], 1b \n\t"
700  : [ftmp0]"=&f"(ftmp[0]),
701  [tmp0]"=&r"(tmp[0]),
702  RESTRICT_ASM_ALL64
703  [src]"+&r"(src)
704  : [stride]"r"((mips_reg)stride), [srcA]"r"((mips_reg)(src-stride))
705  : "memory"
706  );
707 }
708 
710 {
711  uint64_t tmp[3];
712  mips_reg addr[2];
713 
714  __asm__ volatile (
715  PTR_ADDI "%[addr0], %[src], -0x01 \n\t"
716  PTR_ADDU "%[addr1], %[src], $0 \n\t"
717  "dli %[tmp0], 0x08 \n\t"
718  "1: \n\t"
719  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
720  "dmul %[tmp2], %[tmp1], %[ff_pb_1] \n\t"
721  "swl %[tmp2], 0x07(%[addr1]) \n\t"
722  "swr %[tmp2], 0x00(%[addr1]) \n\t"
723  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
724  PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t"
725  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
726  "dmul %[tmp2], %[tmp1], %[ff_pb_1] \n\t"
727  "swl %[tmp2], 0x07(%[addr1]) \n\t"
728  "swr %[tmp2], 0x00(%[addr1]) \n\t"
729  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
730  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
731  PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t"
732  "bnez %[tmp0], 1b \n\t"
733  : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
734  [tmp2]"=&r"(tmp[2]),
735  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
736  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride),
737  [ff_pb_1]"r"(ff_pb_1)
738  : "memory"
739  );
740 }
741 
742 static inline void pred16x16_plane_compat_mmi(uint8_t *src, int stride,
743  const int svq3, const int rv40)
744 {
745  double ftmp[11];
746  uint64_t tmp[6];
747  mips_reg addr[1];
748  DECLARE_VAR_ALL64;
749 
750  __asm__ volatile(
751  PTR_SUBU "%[addr0], %[src], %[stride] \n\t"
752  "dli %[tmp0], 0x20 \n\t"
753  "dmtc1 %[tmp0], %[ftmp4] \n\t"
754  MMI_ULDC1(%[ftmp0], %[addr0], -0x01)
755  MMI_ULDC1(%[ftmp2], %[addr0], 0x08)
756  "dsrl %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
757  "dsrl %[ftmp3], %[ftmp2], %[ftmp4] \n\t"
758  "xor %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
759  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
760  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
761  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
762  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
763  "pmullh %[ftmp0], %[ftmp0], %[ff_pw_m8tom5] \n\t"
764  "pmullh %[ftmp1], %[ftmp1], %[ff_pw_m4tom1] \n\t"
765  "pmullh %[ftmp2], %[ftmp2], %[ff_pw_1to4] \n\t"
766  "pmullh %[ftmp3], %[ftmp3], %[ff_pw_5to8] \n\t"
767  "paddsh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
768  "paddsh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
769  "paddsh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
770  "dli %[tmp0], 0x0e \n\t"
771  "dmtc1 %[tmp0], %[ftmp4] \n\t"
772  "pshufh %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
773  "paddsh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
774  "dli %[tmp0], 0x01 \n\t"
775  "dmtc1 %[tmp0], %[ftmp4] \n\t"
776  "pshufh %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
777  "paddsh %[ftmp5], %[ftmp0], %[ftmp1] \n\t"
778 
779  PTR_ADDIU "%[addr0], %[src], -0x01 \n\t"
780  PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
781  "lbu %[tmp2], 0x00(%[addr0]) \n\t"
782  "lbu %[tmp5], 0x10(%[addr0]) \n\t"
783  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
784  "lbu %[tmp3], 0x00(%[addr0]) \n\t"
785  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
786  "lbu %[tmp4], 0x00(%[addr0]) \n\t"
787  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
788  "lbu %[tmp0], 0x00(%[addr0]) \n\t"
789  "dsll %[tmp3], %[tmp3], 0x10 \n\t"
790  "dsll %[tmp4], %[tmp4], 0x20 \n\t"
791  "dsll %[tmp0], %[tmp0], 0x30 \n\t"
792  "or %[tmp4], %[tmp4], %[tmp0] \n\t"
793  "or %[tmp2], %[tmp2], %[tmp3] \n\t"
794  "or %[tmp2], %[tmp2], %[tmp4] \n\t"
795  "dmtc1 %[tmp2], %[ftmp0] \n\t"
796 
797  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
798  "lbu %[tmp2], 0x00(%[addr0]) \n\t"
799  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
800  "lbu %[tmp3], 0x00(%[addr0]) \n\t"
801  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
802  "lbu %[tmp4], 0x00(%[addr0]) \n\t"
803  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
804  "lbu %[tmp0], 0x00(%[addr0]) \n\t"
805  "dsll %[tmp3], %[tmp3], 0x10 \n\t"
806  "dsll %[tmp4], %[tmp4], 0x20 \n\t"
807  "dsll %[tmp0], %[tmp0], 0x30 \n\t"
808  "or %[tmp4], %[tmp4], %[tmp0] \n\t"
809  "or %[tmp2], %[tmp2], %[tmp3] \n\t"
810  "or %[tmp2], %[tmp2], %[tmp4] \n\t"
811  "dmtc1 %[tmp2], %[ftmp1] \n\t"
812 
813  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
814  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
815  "lbu %[tmp2], 0x00(%[addr0]) \n\t"
816  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
817  "lbu %[tmp3], 0x00(%[addr0]) \n\t"
818  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
819  "lbu %[tmp4], 0x00(%[addr0]) \n\t"
820  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
821  "lbu %[tmp0], 0x00(%[addr0]) \n\t"
822  "dsll %[tmp3], %[tmp3], 0x10 \n\t"
823  "dsll %[tmp4], %[tmp4], 0x20 \n\t"
824  "dsll %[tmp0], %[tmp0], 0x30 \n\t"
825  "or %[tmp4], %[tmp4], %[tmp0] \n\t"
826  "or %[tmp2], %[tmp2], %[tmp3] \n\t"
827  "or %[tmp2], %[tmp2], %[tmp4] \n\t"
828  "dmtc1 %[tmp2], %[ftmp2] \n\t"
829 
830  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
831  "lbu %[tmp2], 0x00(%[addr0]) \n\t"
832  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
833  "lbu %[tmp3], 0x00(%[addr0]) \n\t"
834  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
835  "lbu %[tmp4], 0x00(%[addr0]) \n\t"
836  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
837  "lbu %[tmp0], 0x00(%[addr0]) \n\t"
838  "daddu %[tmp5], %[tmp5], %[tmp0] \n\t"
839  "daddiu %[tmp5], %[tmp5], 0x01 \n\t"
840  "dsll %[tmp5], %[tmp5], 0x04 \n\t"
841 
842  "dsll %[tmp3], %[tmp3], 0x10 \n\t"
843  "dsll %[tmp4], %[tmp4], 0x20 \n\t"
844  "dsll %[tmp0], %[tmp0], 0x30 \n\t"
845  "or %[tmp4], %[tmp4], %[tmp0] \n\t"
846  "or %[tmp2], %[tmp2], %[tmp3] \n\t"
847  "or %[tmp2], %[tmp2], %[tmp4] \n\t"
848  "dmtc1 %[tmp2], %[ftmp3] \n\t"
849 
850  "pmullh %[ftmp0], %[ftmp0], %[ff_pw_m8tom5] \n\t"
851  "pmullh %[ftmp1], %[ftmp1], %[ff_pw_m4tom1] \n\t"
852  "pmullh %[ftmp2], %[ftmp2], %[ff_pw_1to4] \n\t"
853  "pmullh %[ftmp3], %[ftmp3], %[ff_pw_5to8] \n\t"
854  "paddsh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
855  "paddsh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
856  "paddsh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
857  "dli %[tmp0], 0x0e \n\t"
858  "dmtc1 %[tmp0], %[ftmp4] \n\t"
859  "pshufh %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
860  "paddsh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
861 
862  "dli %[tmp0], 0x01 \n\t"
863  "dmtc1 %[tmp0], %[ftmp4] \n\t"
864  "pshufh %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
865  "paddsh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
866 
867  "dmfc1 %[tmp0], %[ftmp5] \n\t"
868  "dsll %[tmp0], %[tmp0], 0x30 \n\t"
869  "dsra %[tmp0], %[tmp0], 0x30 \n\t"
870  "dmfc1 %[tmp1], %[ftmp6] \n\t"
871  "dsll %[tmp1], %[tmp1], 0x30 \n\t"
872  "dsra %[tmp1], %[tmp1], 0x30 \n\t"
873 
874  "beqz %[svq3], 1f \n\t"
875  "dli %[tmp2], 0x04 \n\t"
876  "ddiv %[tmp0], %[tmp0], %[tmp2] \n\t"
877  "ddiv %[tmp1], %[tmp1], %[tmp2] \n\t"
878  "dli %[tmp2], 0x05 \n\t"
879  "dmul %[tmp0], %[tmp0], %[tmp2] \n\t"
880  "dmul %[tmp1], %[tmp1], %[tmp2] \n\t"
881  "dli %[tmp2], 0x10 \n\t"
882  "ddiv %[tmp0], %[tmp0], %[tmp2] \n\t"
883  "ddiv %[tmp1], %[tmp1], %[tmp2] \n\t"
884  "daddu %[tmp2], %[tmp0], $0 \n\t"
885  "daddu %[tmp0], %[tmp1], $0 \n\t"
886  "daddu %[tmp1], %[tmp2], $0 \n\t"
887  "b 2f \n\t"
888 
889  "1: \n\t"
890  "beqz %[rv40], 1f \n\t"
891  "dsra %[tmp2], %[tmp0], 0x02 \n\t"
892  "daddu %[tmp0], %[tmp0], %[tmp2] \n\t"
893  "dsra %[tmp2], %[tmp1], 0x02 \n\t"
894  "daddu %[tmp1], %[tmp1], %[tmp2] \n\t"
895  "dsra %[tmp0], %[tmp0], 0x04 \n\t"
896  "dsra %[tmp1], %[tmp1], 0x04 \n\t"
897  "b 2f \n\t"
898 
899  "1: \n\t"
900  "dli %[tmp2], 0x05 \n\t"
901  "dmul %[tmp0], %[tmp0], %[tmp2] \n\t"
902  "dmul %[tmp1], %[tmp1], %[tmp2] \n\t"
903  "daddiu %[tmp0], %[tmp0], 0x20 \n\t"
904  "daddiu %[tmp1], %[tmp1], 0x20 \n\t"
905  "dsra %[tmp0], %[tmp0], 0x06 \n\t"
906  "dsra %[tmp1], %[tmp1], 0x06 \n\t"
907 
908  "2: \n\t"
909  "daddu %[tmp3], %[tmp0], %[tmp1] \n\t"
910  "dli %[tmp2], 0x07 \n\t"
911  "dmul %[tmp3], %[tmp3], %[tmp2] \n\t"
912  "dsubu %[tmp5], %[tmp5], %[tmp3] \n\t"
913 
914  "xor %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
915  "dmtc1 %[tmp0], %[ftmp0] \n\t"
916  "pshufh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
917  "dmtc1 %[tmp1], %[ftmp5] \n\t"
918  "pshufh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
919  "dmtc1 %[tmp5], %[ftmp6] \n\t"
920  "pshufh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
921  "dli %[tmp0], 0x05 \n\t"
922  "dmtc1 %[tmp0], %[ftmp7] \n\t"
923  "pmullh %[ftmp1], %[ff_pw_0to3], %[ftmp0] \n\t"
924  "dmtc1 %[ff_pw_4to7], %[ftmp2] \n\t"
925  "pmullh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
926  "dmtc1 %[ff_pw_8tob], %[ftmp3] \n\t"
927  "pmullh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
928  "dmtc1 %[ff_pw_ctof], %[ftmp4] \n\t"
929  "pmullh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
930 
931  "dli %[tmp0], 0x10 \n\t"
932  PTR_ADDU "%[addr0], %[src], $0 \n\t"
933  "1: \n\t"
934  "paddsh %[ftmp8], %[ftmp1], %[ftmp6] \n\t"
935  "psrah %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
936  "paddsh %[ftmp9], %[ftmp2], %[ftmp6] \n\t"
937  "psrah %[ftmp9], %[ftmp9], %[ftmp7] \n\t"
938  "packushb %[ftmp0], %[ftmp8], %[ftmp9] \n\t"
939  MMI_SDC1(%[ftmp0], %[addr0], 0x00)
940 
941  "paddsh %[ftmp8], %[ftmp3], %[ftmp6] \n\t"
942  "psrah %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
943  "paddsh %[ftmp9], %[ftmp4], %[ftmp6] \n\t"
944  "psrah %[ftmp9], %[ftmp9], %[ftmp7] \n\t"
945  "packushb %[ftmp0], %[ftmp8], %[ftmp9] \n\t"
946  MMI_SDC1(%[ftmp0], %[addr0], 0x08)
947 
948  "paddsh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
949  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
950  "daddiu %[tmp0], %[tmp0], -0x01 \n\t"
951  "bnez %[tmp0], 1b \n\t"
952  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
953  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
954  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
955  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
956  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
957  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
958  [tmp2]"=&r"(tmp[2]), [tmp3]"=&r"(tmp[3]),
959  [tmp4]"=&r"(tmp[4]), [tmp5]"=&r"(tmp[5]),
960  RESTRICT_ASM_ALL64
961  [addr0]"=&r"(addr[0])
962  : [src]"r"(src), [stride]"r"((mips_reg)stride),
963  [svq3]"r"(svq3), [rv40]"r"(rv40),
968  : "memory"
969  );
970 }
971 
973 {
974  pred16x16_plane_compat_mmi(src, stride, 0, 0);
975 }
976 
978 {
979  pred16x16_plane_compat_mmi(src, stride, 1, 0);
980 }
981 
983 {
984  pred16x16_plane_compat_mmi(src, stride, 0, 1);
985 }
#define mips_reg
Definition: asmdefs.h:44
void ff_pred8x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:709
const uint64_t ff_pw_ctof
Definition: constants.c:55
void ff_pred8x8_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:487
const uint64_t ff_pw_1to4
Definition: constants.c:50
const uint64_t ff_pw_2
Definition: constants.c:27
#define src
Definition: vp8dsp.c:254
int stride
Definition: mace.c:144
void ff_pred8x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:678
void ff_pred8x8l_top_dc_8_mmi(uint8_t *src, int has_topleft, int has_topright, ptrdiff_t stride)
Definition: h264pred_mmi.c:155
static void pred16x16_plane_compat_mmi(uint8_t *src, int stride, const int svq3, const int rv40)
Definition: h264pred_mmi.c:742
uint8_t
void ff_pred16x16_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:97
#define PTR_ADDI
Definition: asmdefs.h:49
const uint64_t ff_pw_0to3
Definition: constants.c:52
void ff_pred4x4_dc_8_mmi(uint8_t *src, const uint8_t *topright, ptrdiff_t stride)
Definition: h264pred_mmi.c:430
#define U(x)
Definition: vp56_arith.h:37
const uint64_t ff_pw_4to7
Definition: constants.c:53
void ff_pred8x8l_dc_8_mmi(uint8_t *src, int has_topleft, int has_topright, ptrdiff_t stride)
Definition: h264pred_mmi.c:244
#define PTR_SUBU
Definition: asmdefs.h:50
void ff_pred16x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:60
void ff_pred16x16_plane_h264_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:972
void ff_pred8x8l_vertical_8_mmi(uint8_t *src, int has_topleft, int has_topright, ptrdiff_t stride)
Definition: h264pred_mmi.c:350
const uint64_t ff_pw_m8tom5
Definition: constants.c:48
#define src1
Definition: h264pred.c:139
void ff_pred16x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:30
#define PTR_ADDIU
Definition: asmdefs.h:48
void ff_pred16x16_plane_rv40_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:982
#define src0
Definition: h264pred.c:138
const uint64_t ff_pw_5to8
Definition: constants.c:51
const uint64_t ff_pw_8tob
Definition: constants.c:54
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> dc
const uint64_t ff_pb_1
Definition: constants.c:57
void ff_pred8x8_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:572
const uint64_t ff_pw_m4tom1
Definition: constants.c:49
void ff_pred8x8_top_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:520
#define PTR_ADDU
Definition: asmdefs.h:47
#define PTR_SRL
Definition: asmdefs.h:54
void ff_pred8x8_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:460
void ff_pred16x16_plane_svq3_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:977
static uint8_t tmp[11]
Definition: aes_ctr.c:26