FFmpeg  4.0
idctdsp_mmi.c
Go to the documentation of this file.
1 /*
2  * Loongson SIMD optimized idctdsp
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "idctdsp_mips.h"
25 #include "constants.h"
27 
28 void ff_put_pixels_clamped_mmi(const int16_t *block,
29  uint8_t *av_restrict pixels, ptrdiff_t line_size)
30 {
31  double ftmp[8];
32  mips_reg addr[1];
33  DECLARE_VAR_ALL64;
34  DECLARE_VAR_ADDRT;
35 
36  __asm__ volatile (
37  MMI_LDC1(%[ftmp0], %[block], 0x00)
38  MMI_LDC1(%[ftmp1], %[block], 0x08)
39  MMI_LDC1(%[ftmp2], %[block], 0x10)
40  MMI_LDC1(%[ftmp3], %[block], 0x18)
41  MMI_LDC1(%[ftmp4], %[block], 0x20)
42  MMI_LDC1(%[ftmp5], %[block], 0x28)
43  MMI_LDC1(%[ftmp6], %[block], 0x30)
44  MMI_LDC1(%[ftmp7], %[block], 0x38)
45  PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
46  "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
47  "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
48  "packushb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
49  "packushb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
50  MMI_SDC1(%[ftmp0], %[pixels], 0x00)
51  MMI_SDC1(%[ftmp2], %[addr0], 0x00)
52  MMI_SDXC1(%[ftmp4], %[addr0], %[line_size], 0x00)
53  MMI_SDXC1(%[ftmp6], %[pixels], %[line_sizex3], 0x00)
54  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
55  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
56  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
57  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
58  RESTRICT_ASM_ALL64
59  RESTRICT_ASM_ADDRT
60  [addr0]"=&r"(addr[0]),
61  [pixels]"+&r"(pixels)
62  : [line_size]"r"((mips_reg)line_size),
63  [line_sizex3]"r"((mips_reg)(line_size*3)),
64  [block]"r"(block)
65  : "memory"
66  );
67 
68  pixels += line_size*4;
69  block += 32;
70 
71  __asm__ volatile (
72  MMI_LDC1(%[ftmp0], %[block], 0x00)
73  MMI_LDC1(%[ftmp1], %[block], 0x08)
74  MMI_LDC1(%[ftmp2], %[block], 0x10)
75  MMI_LDC1(%[ftmp3], %[block], 0x18)
76  MMI_LDC1(%[ftmp4], %[block], 0x20)
77  MMI_LDC1(%[ftmp5], %[block], 0x28)
78  MMI_LDC1(%[ftmp6], %[block], 0x30)
79  MMI_LDC1(%[ftmp7], %[block], 0x38)
80  PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
81  "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
82  "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
83  "packushb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
84  "packushb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
85  MMI_SDC1(%[ftmp0], %[pixels], 0x00)
86  MMI_SDC1(%[ftmp2], %[addr0], 0x00)
87  MMI_SDXC1(%[ftmp4], %[addr0], %[line_size], 0x00)
88  MMI_SDXC1(%[ftmp6], %[pixels], %[line_sizex3], 0x00)
89  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
90  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
91  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
92  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
93  RESTRICT_ASM_ALL64
94  RESTRICT_ASM_ADDRT
95  [addr0]"=&r"(addr[0]),
96  [pixels]"+&r"(pixels)
97  : [line_size]"r"((mips_reg)line_size),
98  [line_sizex3]"r"((mips_reg)(line_size*3)),
99  [block]"r"(block)
100  : "memory"
101  );
102 }
103 
105  uint8_t *av_restrict pixels, ptrdiff_t line_size)
106 {
107  int64_t line_skip = line_size;
108  int64_t line_skip3 = 0;
109  double ftmp[5];
110  mips_reg addr[1];
111  DECLARE_VAR_ALL64;
112  DECLARE_VAR_ADDRT;
113 
114  __asm__ volatile (
115  PTR_ADDU "%[line_skip3], %[line_skip], %[line_skip] \n\t"
116  MMI_LDC1(%[ftmp1], %[block], 0x00)
117  MMI_LDC1(%[ftmp0], %[block], 0x08)
118  "packsshb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
119  MMI_LDC1(%[ftmp2], %[block], 0x10)
120  MMI_LDC1(%[ftmp0], %[block], 0x18)
121  "packsshb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
122  MMI_LDC1(%[ftmp3], %[block], 0x20)
123  MMI_LDC1(%[ftmp0], %[block], 0x28)
124  "packsshb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
125  MMI_LDC1(%[ftmp4], %[block], 0x30)
126  MMI_LDC1(%[ftmp0], %[block], 0x38)
127  "packsshb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
128  "paddb %[ftmp1], %[ftmp1], %[ff_pb_80] \n\t"
129  "paddb %[ftmp2], %[ftmp2], %[ff_pb_80] \n\t"
130  "paddb %[ftmp3], %[ftmp3], %[ff_pb_80] \n\t"
131  "paddb %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t"
132  MMI_SDC1(%[ftmp1], %[pixels], 0x00)
133  MMI_SDXC1(%[ftmp2], %[pixels], %[line_skip], 0x00)
134  MMI_SDXC1(%[ftmp3], %[pixels], %[line_skip3], 0x00)
135  PTR_ADDU "%[line_skip3], %[line_skip3], %[line_skip] \n\t"
136  MMI_SDXC1(%[ftmp4], %[pixels], %[line_skip3], 0x00)
137  PTR_ADDU "%[addr0], %[line_skip3], %[line_skip] \n\t"
138  PTR_ADDU "%[pixels], %[pixels], %[addr0] \n\t"
139  MMI_LDC1(%[ftmp1], %[block], 0x40)
140  MMI_LDC1(%[ftmp0], %[block], 0x48)
141  "packsshb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
142  MMI_LDC1(%[ftmp2], %[block], 0x50)
143  MMI_LDC1(%[ftmp0], %[block], 0x58)
144  "packsshb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
145  MMI_LDC1(%[ftmp3], %[block], 0x60)
146  MMI_LDC1(%[ftmp0], %[block], 0x68)
147  "packsshb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
148  MMI_LDC1(%[ftmp4], %[block], 0x70)
149  MMI_LDC1(%[ftmp0], %[block], 0x78)
150  "packsshb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
151  "paddb %[ftmp1], %[ftmp1], %[ff_pb_80] \n\t"
152  "paddb %[ftmp2], %[ftmp2], %[ff_pb_80] \n\t"
153  "paddb %[ftmp3], %[ftmp3], %[ff_pb_80] \n\t"
154  "paddb %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t"
155  MMI_SDC1(%[ftmp1], %[pixels], 0x00)
156  MMI_SDXC1(%[ftmp2], %[pixels], %[line_skip], 0x00)
157  PTR_ADDU "%[addr0], %[line_skip], %[line_skip] \n\t"
158  MMI_SDXC1(%[ftmp3], %[pixels], %[addr0], 0x00)
159  MMI_SDXC1(%[ftmp4], %[pixels], %[line_skip3], 0x00)
160  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
161  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
162  [ftmp4]"=&f"(ftmp[4]),
163  RESTRICT_ASM_ALL64
164  RESTRICT_ASM_ADDRT
165  [addr0]"=&r"(addr[0]),
166  [pixels]"+&r"(pixels), [line_skip3]"+&r"(line_skip3)
167  : [block]"r"(block),
168  [line_skip]"r"((mips_reg)line_skip),
169  [ff_pb_80]"f"(ff_pb_80)
170  : "memory"
171  );
172 }
173 
174 void ff_add_pixels_clamped_mmi(const int16_t *block,
175  uint8_t *av_restrict pixels, ptrdiff_t line_size)
176 {
177  double ftmp[8];
178  uint64_t tmp[1];
179  mips_reg addr[1];
180  DECLARE_VAR_ALL64;
181  DECLARE_VAR_ADDRT;
182 
183  __asm__ volatile (
184  "li %[tmp0], 0x04 \n\t"
185  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
186  "1: \n\t"
187  MMI_LDC1(%[ftmp1], %[block], 0x00)
188  MMI_LDC1(%[ftmp2], %[block], 0x08)
189  MMI_LDC1(%[ftmp3], %[block], 0x10)
190  MMI_LDC1(%[ftmp4], %[block], 0x18)
191  MMI_LDC1(%[ftmp5], %[pixels], 0x00)
192  MMI_LDXC1(%[ftmp6], %[pixels], %[line_size], 0x00)
193  "mov.d %[ftmp7], %[ftmp5] \n\t"
194  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
195  "punpckhbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
196  "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
197  "paddh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
198  "mov.d %[ftmp7], %[ftmp6] \n\t"
199  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
200  "punpckhbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
201  "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
202  "paddh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
203  "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
204  "packushb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
205  MMI_SDC1(%[ftmp1], %[pixels], 0x00)
206  MMI_SDXC1(%[ftmp3], %[pixels], %[line_size], 0x00)
207  "addi %[tmp0], %[tmp0], -0x01 \n\t"
208  PTR_ADDIU "%[block], %[block], 0x20 \n\t"
209  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
210  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
211  "bnez %[tmp0], 1b"
212  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
213  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
214  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
215  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
216  [tmp0]"=&r"(tmp[0]),
217  RESTRICT_ASM_ALL64
218  RESTRICT_ASM_ADDRT
219  [addr0]"=&r"(addr[0]),
220  [pixels]"+&r"(pixels), [block]"+&r"(block)
221  : [line_size]"r"((mips_reg)line_size)
222  : "memory"
223  );
224 }
#define mips_reg
Definition: asmdefs.h:44
static int16_t block[64]
Definition: dct.c:115
const uint64_t ff_pb_80
Definition: constants.c:59
uint8_t
#define av_restrict
Definition: config.h:10
void ff_put_signed_pixels_clamped_mmi(const int16_t *block, uint8_t *av_restrict pixels, ptrdiff_t line_size)
Definition: idctdsp_mmi.c:104
#define PTR_ADDIU
Definition: asmdefs.h:48
void ff_put_pixels_clamped_mmi(const int16_t *block, uint8_t *av_restrict pixels, ptrdiff_t line_size)
Definition: idctdsp_mmi.c:28
void ff_add_pixels_clamped_mmi(const int16_t *block, uint8_t *av_restrict pixels, ptrdiff_t line_size)
Definition: idctdsp_mmi.c:174
int pixels
Definition: avisynth_c.h:429
#define PTR_ADDU
Definition: asmdefs.h:47
static uint8_t tmp[11]
Definition: aes_ctr.c:26