FFmpeg  4.0
mpegvideo_mmi.c
Go to the documentation of this file.
1 /*
2  * Loongson SIMD optimized mpegvideo
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  * Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 #include "mpegvideo_mips.h"
27 
29  int n, int qscale)
30 {
31  int64_t level, qmul, qadd, nCoeffs;
32  double ftmp[6];
33  mips_reg addr[1];
34  DECLARE_VAR_ALL64;
35 
36  qmul = qscale << 1;
37  av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
38 
39  if (!s->h263_aic) {
40  if (n<4)
41  level = block[0] * s->y_dc_scale;
42  else
43  level = block[0] * s->c_dc_scale;
44  qadd = (qscale-1) | 1;
45  } else {
46  qadd = 0;
47  level = block[0];
48  }
49 
50  if(s->ac_pred)
51  nCoeffs = 63;
52  else
53  nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]];
54 
55  __asm__ volatile (
56  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
57  "packsswh %[qmul], %[qmul], %[qmul] \n\t"
58  "packsswh %[qmul], %[qmul], %[qmul] \n\t"
59  "packsswh %[qadd], %[qadd], %[qadd] \n\t"
60  "packsswh %[qadd], %[qadd], %[qadd] \n\t"
61  "psubh %[ftmp0], %[ftmp0], %[qadd] \n\t"
62  "xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
63  ".p2align 4 \n\t"
64 
65  "1: \n\t"
66  PTR_ADDU "%[addr0], %[block], %[nCoeffs] \n\t"
67  MMI_LDC1(%[ftmp1], %[addr0], 0x00)
68  MMI_LDC1(%[ftmp2], %[addr0], 0x08)
69  "mov.d %[ftmp3], %[ftmp1] \n\t"
70  "mov.d %[ftmp4], %[ftmp2] \n\t"
71  "pmullh %[ftmp1], %[ftmp1], %[qmul] \n\t"
72  "pmullh %[ftmp2], %[ftmp2], %[qmul] \n\t"
73  "pcmpgth %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
74  "pcmpgth %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
75  "xor %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
76  "xor %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
77  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
78  "paddh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
79  "xor %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
80  "xor %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
81  "pcmpeqh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
82  "pcmpeqh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
83  "pandn %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
84  "pandn %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
85  PTR_ADDIU "%[nCoeffs], %[nCoeffs], 0x10 \n\t"
86  MMI_SDC1(%[ftmp1], %[addr0], 0x00)
87  MMI_SDC1(%[ftmp2], %[addr0], 0x08)
88  "blez %[nCoeffs], 1b \n\t"
89  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
90  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
91  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
92  RESTRICT_ASM_ALL64
93  [addr0]"=&r"(addr[0])
94  : [block]"r"((mips_reg)(block+nCoeffs)),
95  [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))),
96  [qmul]"f"(qmul), [qadd]"f"(qadd)
97  : "memory"
98  );
99 
100  block[0] = level;
101 }
102 
104  int n, int qscale)
105 {
106  int64_t qmul, qadd, nCoeffs;
107  double ftmp[6];
108  mips_reg addr[1];
109  DECLARE_VAR_ALL64;
110 
111  qmul = qscale << 1;
112  qadd = (qscale - 1) | 1;
113  av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
114  nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]];
115 
116  __asm__ volatile (
117  "packsswh %[qmul], %[qmul], %[qmul] \n\t"
118  "packsswh %[qmul], %[qmul], %[qmul] \n\t"
119  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
120  "packsswh %[qadd], %[qadd], %[qadd] \n\t"
121  "packsswh %[qadd], %[qadd], %[qadd] \n\t"
122  "psubh %[ftmp0], %[ftmp0], %[qadd] \n\t"
123  "xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
124  ".p2align 4 \n\t"
125  "1: \n\t"
126  PTR_ADDU "%[addr0], %[block], %[nCoeffs] \n\t"
127  MMI_LDC1(%[ftmp1], %[addr0], 0x00)
128  MMI_LDC1(%[ftmp2], %[addr0], 0x08)
129  "mov.d %[ftmp3], %[ftmp1] \n\t"
130  "mov.d %[ftmp4], %[ftmp2] \n\t"
131  "pmullh %[ftmp1], %[ftmp1], %[qmul] \n\t"
132  "pmullh %[ftmp2], %[ftmp2], %[qmul] \n\t"
133  "pcmpgth %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
134  "pcmpgth %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
135  "xor %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
136  "xor %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
137  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
138  "paddh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
139  "xor %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
140  "xor %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
141  "pcmpeqh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
142  "pcmpeqh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
143  "pandn %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
144  "pandn %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
145  PTR_ADDIU "%[nCoeffs], %[nCoeffs], 0x10 \n\t"
146  MMI_SDC1(%[ftmp1], %[addr0], 0x00)
147  MMI_SDC1(%[ftmp2], %[addr0], 0x08)
148  "blez %[nCoeffs], 1b \n\t"
149  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
150  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
151  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
152  RESTRICT_ASM_ALL64
153  [addr0]"=&r"(addr[0])
154  : [block]"r"((mips_reg)(block+nCoeffs)),
155  [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))),
156  [qmul]"f"(qmul), [qadd]"f"(qadd)
157  : "memory"
158  );
159 }
160 
162  int n, int qscale)
163 {
164  int64_t nCoeffs;
165  const uint16_t *quant_matrix;
166  int block0;
167  double ftmp[10];
168  uint64_t tmp[1];
169  mips_reg addr[1];
170  DECLARE_VAR_ALL64;
171  DECLARE_VAR_ADDRT;
172 
173  av_assert2(s->block_last_index[n]>=0);
174  nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1;
175 
176  if (n<4)
177  block0 = block[0] * s->y_dc_scale;
178  else
179  block0 = block[0] * s->c_dc_scale;
180 
181  /* XXX: only mpeg1 */
182  quant_matrix = s->intra_matrix;
183 
184  __asm__ volatile (
185  "dli %[tmp0], 0x0f \n\t"
186  "pcmpeqh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
187  "dmtc1 %[tmp0], %[ftmp4] \n\t"
188  "dmtc1 %[qscale], %[ftmp1] \n\t"
189  "psrlh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
190  "packsswh %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
191  "packsswh %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
192  "or %[addr0], %[nCoeffs], $0 \n\t"
193  ".p2align 4 \n\t"
194 
195  "1: \n\t"
196  MMI_LDXC1(%[ftmp2], %[addr0], %[block], 0x00)
197  MMI_LDXC1(%[ftmp3], %[addr0], %[block], 0x08)
198  "mov.d %[ftmp4], %[ftmp2] \n\t"
199  "mov.d %[ftmp5], %[ftmp3] \n\t"
200  MMI_LDXC1(%[ftmp6], %[addr0], %[quant], 0x00)
201  MMI_LDXC1(%[ftmp7], %[addr0], %[quant], 0x08)
202  "pmullh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
203  "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
204  "xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
205  "xor %[ftmp9], %[ftmp9], %[ftmp9] \n\t"
206  "pcmpgth %[ftmp8], %[ftmp8], %[ftmp2] \n\t"
207  "pcmpgth %[ftmp9], %[ftmp9], %[ftmp3] \n\t"
208  "xor %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
209  "xor %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
210  "psubh %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
211  "psubh %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
212  "pmullh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
213  "pmullh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
214  "xor %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
215  "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
216  "pcmpeqh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
217  "dli %[tmp0], 0x03 \n\t"
218  "pcmpeqh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
219  "dmtc1 %[tmp0], %[ftmp4] \n\t"
220  "psrah %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
221  "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
222  "psubh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
223  "psubh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
224  "or %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
225  "or %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
226  "xor %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
227  "xor %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
228  "psubh %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
229  "psubh %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
230  "pandn %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
231  "pandn %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
232  MMI_SDXC1(%[ftmp6], %[addr0], %[block], 0x00)
233  MMI_SDXC1(%[ftmp7], %[addr0], %[block], 0x08)
234  PTR_ADDIU "%[addr0], %[addr0], 0x10 \n\t"
235  "bltz %[addr0], 1b \n\t"
236  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
237  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
238  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
239  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
240  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
241  [tmp0]"=&r"(tmp[0]),
242  RESTRICT_ASM_ALL64
243  RESTRICT_ASM_ADDRT
244  [addr0]"=&r"(addr[0])
245  : [block]"r"((mips_reg)(block+nCoeffs)),
246  [quant]"r"((mips_reg)(quant_matrix+nCoeffs)),
247  [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))),
248  [qscale]"r"(qscale)
249  : "memory"
250  );
251 
252  block[0] = block0;
253 }
254 
256  int n, int qscale)
257 {
258  int64_t nCoeffs;
259  const uint16_t *quant_matrix;
260  double ftmp[10];
261  uint64_t tmp[1];
262  mips_reg addr[1];
263  DECLARE_VAR_ALL64;
264  DECLARE_VAR_ADDRT;
265 
266  av_assert2(s->block_last_index[n] >= 0);
267  nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1;
268  quant_matrix = s->inter_matrix;
269 
270  __asm__ volatile (
271  "dli %[tmp0], 0x0f \n\t"
272  "pcmpeqh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
273  "dmtc1 %[tmp0], %[ftmp4] \n\t"
274  "dmtc1 %[qscale], %[ftmp1] \n\t"
275  "psrlh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
276  "packsswh %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
277  "packsswh %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
278  "or %[addr0], %[nCoeffs], $0 \n\t"
279  ".p2align 4 \n\t"
280 
281  "1: \n\t"
282  MMI_LDXC1(%[ftmp2], %[addr0], %[block], 0x00)
283  MMI_LDXC1(%[ftmp3], %[addr0], %[block], 0x08)
284  "mov.d %[ftmp4], %[ftmp2] \n\t"
285  "mov.d %[ftmp5], %[ftmp3] \n\t"
286  MMI_LDXC1(%[ftmp6], %[addr0], %[quant], 0x00)
287  MMI_LDXC1(%[ftmp7], %[addr0], %[quant], 0x08)
288  "pmullh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
289  "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
290  "xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
291  "xor %[ftmp9], %[ftmp9], %[ftmp9] \n\t"
292  "pcmpgth %[ftmp8], %[ftmp8], %[ftmp2] \n\t"
293  "pcmpgth %[ftmp9], %[ftmp9], %[ftmp3] \n\t"
294  "xor %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
295  "xor %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
296  "psubh %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
297  "psubh %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
298  "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
299  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
300  "paddh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
301  "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
302  "pmullh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
303  "pmullh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
304  "xor %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
305  "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
306  "pcmpeqh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
307  "dli %[tmp0], 0x04 \n\t"
308  "pcmpeqh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
309  "dmtc1 %[tmp0], %[ftmp4] \n\t"
310  "psrah %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
311  "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
312  "psubh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
313  "psubh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
314  "or %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
315  "or %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
316  "xor %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
317  "xor %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
318  "psubh %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
319  "psubh %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
320  "pandn %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
321  "pandn %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
322  MMI_SDXC1(%[ftmp6], %[addr0], %[block], 0x00)
323  MMI_SDXC1(%[ftmp7], %[addr0], %[block], 0x08)
324  PTR_ADDIU "%[addr0], %[addr0], 0x10 \n\t"
325  "bltz %[addr0], 1b \n\t"
326  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
327  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
328  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
329  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
330  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
331  [tmp0]"=&r"(tmp[0]),
332  RESTRICT_ASM_ALL64
333  RESTRICT_ASM_ADDRT
334  [addr0]"=&r"(addr[0])
335  : [block]"r"((mips_reg)(block+nCoeffs)),
336  [quant]"r"((mips_reg)(quant_matrix+nCoeffs)),
337  [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))),
338  [qscale]"r"(qscale)
339  : "memory"
340  );
341 }
342 
344  int n, int qscale)
345 {
346  uint64_t nCoeffs;
347  const uint16_t *quant_matrix;
348  int block0;
349  double ftmp[10];
350  uint64_t tmp[1];
351  mips_reg addr[1];
352  DECLARE_VAR_ALL64;
353  DECLARE_VAR_ADDRT;
354 
355  assert(s->block_last_index[n]>=0);
356 
357  if (s->alternate_scan)
358  nCoeffs = 63;
359  else
360  nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]];
361 
362  if (n < 4)
363  block0 = block[0] * s->y_dc_scale;
364  else
365  block0 = block[0] * s->c_dc_scale;
366 
367  quant_matrix = s->intra_matrix;
368 
369  __asm__ volatile (
370  "dli %[tmp0], 0x0f \n\t"
371  "pcmpeqh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
372  "mtc1 %[tmp0], %[ftmp3] \n\t"
373  "mtc1 %[qscale], %[ftmp9] \n\t"
374  "psrlh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
375  "packsswh %[ftmp9], %[ftmp9], %[ftmp9] \n\t"
376  "packsswh %[ftmp9], %[ftmp9], %[ftmp9] \n\t"
377  "or %[addr0], %[nCoeffs], $0 \n\t"
378  ".p2align 4 \n\t"
379 
380  "1: \n\t"
381  MMI_LDXC1(%[ftmp1], %[addr0], %[block], 0x00)
382  MMI_LDXC1(%[ftmp2], %[addr0], %[block], 0x08)
383  "mov.d %[ftmp3], %[ftmp1] \n\t"
384  "mov.d %[ftmp4], %[ftmp2] \n\t"
385  MMI_LDXC1(%[ftmp5], %[addr0], %[quant], 0x00)
386  MMI_LDXC1(%[ftmp6], %[addr0], %[quant], 0x08)
387  "pmullh %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
388  "pmullh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
389  "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
390  "xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
391  "pcmpgth %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
392  "pcmpgth %[ftmp8], %[ftmp8], %[ftmp2] \n\t"
393  "xor %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
394  "xor %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
395  "psubh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
396  "psubh %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
397  "pmullh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
398  "pmullh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
399  "xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
400  "xor %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
401  "pcmpeqh %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
402  "dli %[tmp0], 0x03 \n\t"
403  "pcmpeqh %[ftmp6] , %[ftmp6], %[ftmp4] \n\t"
404  "mtc1 %[tmp0], %[ftmp3] \n\t"
405  "psrah %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
406  "psrah %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
407  "xor %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
408  "xor %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
409  "psubh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
410  "psubh %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
411  "pandn %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
412  "pandn %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
413  PTR_ADDIU "%[addr0], %[addr0], 0x10 \n\t"
414  MMI_SDXC1(%[ftmp5], %[addr0], %[block], 0x00)
415  MMI_SDXC1(%[ftmp6], %[addr0], %[block], 0x08)
416  "blez %[addr0], 1b \n\t"
417  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
418  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
419  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
420  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
421  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
422  [tmp0]"=&r"(tmp[0]),
423  RESTRICT_ASM_ALL64
424  RESTRICT_ASM_ADDRT
425  [addr0]"=&r"(addr[0])
426  : [block]"r"((mips_reg)(block+nCoeffs)),
427  [quant]"r"((mips_reg)(quant_matrix+nCoeffs)),
428  [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))),
429  [qscale]"r"(qscale)
430  : "memory"
431  );
432 
433  block[0]= block0;
434 }
435 
437 {
438  const int intra = s->mb_intra;
439  int *sum = s->dct_error_sum[intra];
440  uint16_t *offset = s->dct_offset[intra];
441  double ftmp[8];
442  mips_reg addr[1];
443  DECLARE_VAR_ALL64;
444 
445  s->dct_count[intra]++;
446 
447  __asm__ volatile(
448  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
449  "1: \n\t"
450  MMI_LDC1(%[ftmp1], %[block], 0x00)
451  "xor %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
452  MMI_LDC1(%[ftmp3], %[block], 0x08)
453  "xor %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
454  "pcmpgth %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
455  "pcmpgth %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
456  "xor %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
457  "xor %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
458  "psubh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
459  "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
460  MMI_LDC1(%[ftmp6], %[offset], 0x00)
461  "mov.d %[ftmp5], %[ftmp1] \n\t"
462  "psubush %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
463  MMI_LDC1(%[ftmp6], %[offset], 0x08)
464  "mov.d %[ftmp7], %[ftmp3] \n\t"
465  "psubush %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
466  "xor %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
467  "xor %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
468  "psubh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
469  "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
470  MMI_SDC1(%[ftmp1], %[block], 0x00)
471  MMI_SDC1(%[ftmp3], %[block], 0x08)
472  "mov.d %[ftmp1], %[ftmp5] \n\t"
473  "mov.d %[ftmp3], %[ftmp7] \n\t"
474  "punpcklhw %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
475  "punpckhhw %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
476  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
477  "punpckhhw %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
478  MMI_LDC1(%[ftmp2], %[sum], 0x00)
479  "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
480  MMI_LDC1(%[ftmp2], %[sum], 0x08)
481  "paddw %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
482  MMI_LDC1(%[ftmp2], %[sum], 0x10)
483  "paddw %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
484  MMI_LDC1(%[ftmp2], %[sum], 0x18)
485  "paddw %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
486  MMI_SDC1(%[ftmp5], %[sum], 0x00)
487  MMI_SDC1(%[ftmp1], %[sum], 0x08)
488  MMI_SDC1(%[ftmp7], %[sum], 0x10)
489  MMI_SDC1(%[ftmp3], %[sum], 0x18)
490  PTR_ADDIU "%[block], %[block], 0x10 \n\t"
491  PTR_ADDIU "%[sum], %[sum], 0x20 \n\t"
492  PTR_SUBU "%[addr0], %[block1], %[block] \n\t"
493  PTR_ADDIU "%[offset], %[offset], 0x10 \n\t"
494  "bgtz %[addr0], 1b \n\t"
495  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
496  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
497  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
498  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
499  RESTRICT_ASM_ALL64
500  [addr0]"=&r"(addr[0]),
501  [block]"+&r"(block), [sum]"+&r"(sum),
502  [offset]"+&r"(offset)
503  : [block1]"r"(block+64)
504  : "memory"
505  );
506 }
#define mips_reg
Definition: asmdefs.h:44
const char * s
Definition: avisynth_c.h:768
uint8_t raster_end[64]
Definition: idctdsp.h:34
void ff_dct_unquantize_mpeg1_inter_mmi(MpegEncContext *s, int16_t *block, int n, int qscale)
int h263_aic
Advanced INTRA Coding (AIC)
Definition: mpegvideo.h:87
static int16_t block[64]
Definition: dct.c:115
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:64
uint16_t(* dct_offset)[64]
Definition: mpegvideo.h:334
void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block, int n, int qscale)
Definition: mpegvideo_mmi.c:28
void ff_dct_unquantize_mpeg1_intra_mmi(MpegEncContext *s, int16_t *block, int n, int qscale)
static const uint8_t offset[127][2]
Definition: vf_spp.c:92
#define PTR_SUBU
Definition: asmdefs.h:50
int alternate_scan
Definition: mpegvideo.h:470
int block_last_index[12]
last non zero coefficient in block
Definition: mpegvideo.h:86
int n
Definition: avisynth_c.h:684
void ff_dct_unquantize_mpeg2_intra_mmi(MpegEncContext *s, int16_t *block, int n, int qscale)
uint16_t inter_matrix[64]
Definition: mpegvideo.h:302
ScanTable intra_scantable
Definition: mpegvideo.h:91
#define PTR_ADDIU
Definition: asmdefs.h:48
void ff_dct_unquantize_h263_inter_mmi(MpegEncContext *s, int16_t *block, int n, int qscale)
const uint8_t * quant
uint8_t level
Definition: svq3.c:207
MpegEncContext.
Definition: mpegvideo.h:81
int(* dct_error_sum)[64]
Definition: mpegvideo.h:332
void ff_denoise_dct_mmi(MpegEncContext *s, int16_t *block)
static int16_t block1[64]
Definition: dct.c:116
uint16_t intra_matrix[64]
matrix transmitted in the bitstream
Definition: mpegvideo.h:300
#define PTR_ADDU
Definition: asmdefs.h:47
ScanTable inter_scantable
if inter == intra then intra should be used to reduce the cache usage
Definition: mpegvideo.h:90
int dct_count[2]
Definition: mpegvideo.h:333
static uint8_t tmp[11]
Definition: aes_ctr.c:26