28 #define C0 23170 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 29 #define C1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 30 #define C2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 31 #define C3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 32 #define C4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) - 0.5 33 #define C5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 34 #define C6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 35 #define C7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 60 int16_t *
const temp= (int16_t*)align_tmp;
64 #define DC_COND_IDCT(src0, src4, src1, src5, dst, rounder, rarg, shift) \ 65 "ldc1 $f0, " #src0 " \n\t" \ 66 "ldc1 $f2, " #src4 " \n\t" \ 67 "ldc1 $f4, " #src1 " \n\t" \ 68 "ldc1 $f6, " #src5 " \n\t" \ 70 "and $f8, $f8, $f0 \n\t" \ 71 "or $f8, $f8, $f2 \n\t" \ 72 "or $f8, $f8, $f4 \n\t" \ 73 "or $f8, $f8, $f6 \n\t" \ 74 "packsswh $f8, $f8, $f8 \n\t" \ 75 "li $11, " #shift " \n\t" \ 76 "mfc1 $10, $f8 \n\t" \ 77 "mtc1 $11, $f18 \n\t" \ 79 "ldc1 $f8, 16(%2) \n\t" \ 80 "pmaddhw $f8, $f8, $f0 \n\t" \ 81 "ldc1 $f10, 24(%2) \n\t" \ 82 "pmaddhw $f0, $f0, $f10 \n\t" \ 83 "ldc1 $f10, 32(%2) \n\t" \ 84 "pmaddhw $f10, $f10, $f2 \n\t" \ 85 "ldc1 $f12, 40(%2) \n\t" \ 86 "pmaddhw $f2, $f2, $f12 \n\t" \ 87 "ldc1 $f14, 48(%2) \n\t" \ 88 "ldc1 $f16, " #rarg " \n\t" \ 89 "pmaddhw $f14, $f14, $f4 \n\t" \ 90 #rounder " $f8, $f8, $f16 \n\t" \ 91 "mov.d $f12, $f8 \n\t" \ 92 "paddw $f8, $f8, $f10 \n\t" \ 93 "psubw $f12, $f12, $f10 \n\t" \ 94 "ldc1 $f10, 56(%2) \n\t" \ 95 "ldc1 $f16, " #rarg " \n\t" \ 96 "pmaddhw $f10, $f10, $f6 \n\t" \ 97 #rounder " $f0, $f0, $f16 \n\t" \ 98 "paddw $f2, $f2, $f0 \n\t" \ 99 "ldc1 $f16, 64(%2) \n\t" \ 100 "paddw $f0, $f0, $f0 \n\t" \ 101 "psubw $f0, $f0, $f2 \n\t" \ 102 "pmaddhw $f4, $f4, $f16 \n\t" \ 103 "paddw $f14, $f14, $f10 \n\t" \ 104 "ldc1 $f10, 72(%2) \n\t" \ 105 "pmaddhw $f10, $f10, $f6 \n\t" \ 106 "paddw $f14, $f14, $f8 \n\t" \ 107 "paddw $f8, $f8, $f8 \n\t" \ 108 "psubw $f8, $f8, $f14 \n\t" \ 109 "paddw $f10, $f10, $f4 \n\t" \ 110 "psraw $f14, $f14, $f18 \n\t" \ 111 "psraw $f8, $f8, $f18 \n\t" \ 112 "mov.d $f4, $f2 \n\t" \ 113 "paddw $f2, $f2, $f10 \n\t" \ 114 "psubw $f4, $f4, $f10 \n\t" \ 115 "psraw $f2, $f2, $f18 \n\t" \ 116 "psraw $f4, $f4, $f18 \n\t" \ 117 "packsswh $f14, $f14, $f2 \n\t" \ 118 "packsswh $f4, $f4, $f8 \n\t" \ 119 "sdc1 $f14, " #dst " \n\t" \ 120 "ldc1 $f2, " #src1 " \n\t" \ 121 "ldc1 $f8, 80(%2) \n\t" \ 122 "sdc1 $f4, 24+" #dst " \n\t" \ 123 "pmaddhw $f8, $f8, $f2 \n\t" \ 124 "ldc1 $f16, 96(%2) \n\t" \ 125 "ldc1 $f14, 88(%2) \n\t" \ 126 "pmaddhw $f2, $f2, $f16 \n\t" \ 127 "pmaddhw $f14, $f14, $f6 \n\t" \ 128 "ldc1 $f16, 104(%2) \n\t" \ 129 "mov.d $f4, $f0 \n\t" \ 130 "pmaddhw $f6, $f6, $f16 \n\t" \ 131 "paddw $f8, $f8, $f14 \n\t" \ 132 "paddw $f4, $f4, $f8 \n\t" \ 133 "psubw $f0, $f0, $f8 \n\t" \ 134 "psraw $f4, $f4, $f18 \n\t" \ 135 "psraw $f0, $f0, $f18 \n\t" \ 136 "mov.d $f8, $f12 \n\t" \ 137 "paddw $f6, $f6, $f2 \n\t" \ 138 "paddw $f12, $f12, $f6 \n\t" \ 139 "psubw $f8, $f8, $f6 \n\t" \ 140 "psraw $f12, $f12, $f18 \n\t" \ 141 "packsswh $f4, $f4, $f12 \n\t" \ 142 "sdc1 $f4, 8+" #dst " \n\t" \ 143 "psraw $f8, $f8, $f18 \n\t" \ 144 "packsswh $f8, $f8, $f0 \n\t" \ 145 "sdc1 $f8, 16+" #dst " \n\t" \ 149 "mtc1 $10, $f16 \n\t" \ 150 "psllw $f0, $f0, $f16 \n\t" \ 151 "ldc1 $f16, %4 \n\t" \ 152 "paddw $f0, $f0, $f16 \n\t" \ 154 "mtc1 $10, $f16 \n\t" \ 155 "psraw $f0, $f0, $f16 \n\t" \ 156 "packsswh $f0, $f0, $f0 \n\t" \ 157 "sdc1 $f0, " #dst " \n\t" \ 158 "sdc1 $f0, 8+" #dst " \n\t" \ 159 "sdc1 $f0, 16+" #dst " \n\t" \ 160 "sdc1 $f0, 24+" #dst " \n\t" \ 164 #define Z_COND_IDCT(src0, src4, src1, src5, dst, rounder, rarg, shift, bt) \ 165 "ldc1 $f0, " #src0 " \n\t" \ 166 "ldc1 $f2, " #src4 " \n\t" \ 167 "ldc1 $f4, " #src1 " \n\t" \ 168 "ldc1 $f6, " #src5 " \n\t" \ 169 "mov.d $f8, $f0 \n\t" \ 170 "or $f8, $f8, $f2 \n\t" \ 171 "or $f8, $f8, $f4 \n\t" \ 172 "or $f8, $f8, $f6 \n\t" \ 173 "packsswh $f8, $f8, $f8 \n\t" \ 174 "mfc1 $10, $f8 \n\t" \ 175 "beqz $10, " #bt " \n\t" \ 176 "ldc1 $f8, 16(%2) \n\t" \ 177 "pmaddhw $f8, $f8, $f0 \n\t" \ 178 "ldc1 $f10, 24(%2) \n\t" \ 179 "pmaddhw $f0, $f0, $f10 \n\t" \ 180 "ldc1 $f10, 32(%2) \n\t" \ 181 "pmaddhw $f10, $f10, $f2 \n\t" \ 182 "ldc1 $f12, 40(%2) \n\t" \ 183 "pmaddhw $f2, $f2, $f12 \n\t" \ 184 "ldc1 $f14, 48(%2) \n\t" \ 185 "ldc1 $f16, " #rarg " \n\t" \ 186 "pmaddhw $f14, $f14, $f4 \n\t" \ 187 #rounder " $f8, $f8, $f16 \n\t" \ 188 "mov.d $f12, $f8 \n\t" \ 189 "paddw $f8, $f8, $f10 \n\t" \ 190 "psubw $f12, $f12, $f10 \n\t" \ 191 "ldc1 $f10, 56(%2) \n\t" \ 192 "ldc1 $f16, " #rarg " \n\t" \ 193 "pmaddhw $f10, $f10, $f6 \n\t" \ 194 #rounder " $f0, $f0, $f16 \n\t" \ 195 "paddw $f2, $f2, $f0 \n\t" \ 196 "paddw $f0, $f0, $f0 \n\t" \ 197 "ldc1 $f16, 64(%2) \n\t" \ 198 "psubw $f0, $f0, $f2 \n\t" \ 199 "pmaddhw $f4, $f4, $f16 \n\t" \ 200 "paddw $f14, $f14, $f10 \n\t" \ 201 "ldc1 $f10, 72(%2) \n\t" \ 202 "pmaddhw $f10, $f10, $f6 \n\t" \ 203 "paddw $f14, $f14, $f8 \n\t" \ 204 "paddw $f8, $f8, $f8 \n\t" \ 205 "li $10, " #shift " \n\t" \ 206 "psubw $f8, $f8, $f14 \n\t" \ 207 "mtc1 $10, $f18 \n\t" \ 208 "paddw $f10, $f10, $f4 \n\t" \ 209 "psraw $f14, $f14, $f18 \n\t" \ 210 "psraw $f8, $f8, $f18 \n\t" \ 211 "mov.d $f4, $f2 \n\t" \ 212 "paddw $f2, $f2, $f10 \n\t" \ 213 "psubw $f4, $f4, $f10 \n\t" \ 214 "psraw $f2, $f2, $f18 \n\t" \ 215 "psraw $f4, $f4, $f18 \n\t" \ 216 "packsswh $f14, $f14, $f2 \n\t" \ 217 "packsswh $f4, $f4, $f8 \n\t" \ 218 "sdc1 $f14, " #dst " \n\t" \ 219 "ldc1 $f2, " #src1 " \n\t" \ 220 "ldc1 $f8, 80(%2) \n\t" \ 221 "sdc1 $f4, 24+" #dst " \n\t" \ 222 "pmaddhw $f8, $f8, $f2 \n\t" \ 223 "ldc1 $f16, 96(%2) \n\t" \ 224 "ldc1 $f14, 88(%2) \n\t" \ 225 "pmaddhw $f2, $f2, $f16 \n\t" \ 226 "pmaddhw $f14, $f14, $f6 \n\t" \ 227 "ldc1 $f16, 104(%2) \n\t" \ 228 "mov.d $f4, $f0 \n\t" \ 229 "pmaddhw $f6, $f6, $f16 \n\t" \ 230 "paddw $f8, $f8, $f14 \n\t" \ 231 "paddw $f4, $f4, $f8 \n\t" \ 232 "psubw $f0, $f0, $f8 \n\t" \ 233 "psraw $f4, $f4, $f18 \n\t" \ 234 "psraw $f0, $f0, $f18 \n\t" \ 235 "mov.d $f8, $f12 \n\t" \ 236 "paddw $f6, $f6, $f2 \n\t" \ 237 "paddw $f12, $f12, $f6 \n\t" \ 238 "psubw $f8, $f8, $f6 \n\t" \ 239 "psraw $f12, $f12, $f18 \n\t" \ 240 "packsswh $f4, $f4, $f12 \n\t" \ 241 "sdc1 $f4, 8+" #dst " \n\t" \ 242 "psraw $f8, $f8, $f18 \n\t" \ 243 "packsswh $f8, $f8, $f0 \n\t" \ 244 "sdc1 $f8, 16+" #dst " \n\t" \ 247 DC_COND_IDCT(0(%0), 8(%0), 16(%0), 24(%0), 0(%1), paddw,8(%2), 11)
248 Z_COND_IDCT(32(%0), 40(%0), 48(%0), 56(%0), 32(%1), paddw,(%2), 11, 4f)
249 Z_COND_IDCT(64(%0), 72(%0), 80(%0), 88(%0), 64(%1), paddw,(%2), 11, 2f)
250 Z_COND_IDCT(96(%0),104(%0),112(%0),120(%0), 96(%1), paddw,(%2), 11, 1f)
253 #define IDCT(src0, src4, src1, src5, dst, shift) \ 254 "ldc1 $f0, " #src0 " \n\t" \ 255 "ldc1 $f2, " #src4 " \n\t" \ 256 "ldc1 $f4, " #src1 " \n\t" \ 257 "ldc1 $f6, " #src5 " \n\t" \ 258 "ldc1 $f8, 16(%2) \n\t" \ 259 "pmaddhw $f8, $f8, $f0 \n\t" \ 260 "ldc1 $f10, 24(%2) \n\t" \ 261 "pmaddhw $f0, $f0, $f10 \n\t" \ 262 "ldc1 $f10, 32(%2) \n\t" \ 263 "pmaddhw $f10, $f10, $f2 \n\t" \ 264 "ldc1 $f12, 40(%2) \n\t" \ 265 "pmaddhw $f2, $f2, $f12 \n\t" \ 266 "mov.d $f12, $f8 \n\t" \ 267 "ldc1 $f14, 48(%2) \n\t" \ 268 "pmaddhw $f14, $f14, $f4 \n\t" \ 269 "paddw $f8, $f8, $f10 \n\t" \ 270 "psubw $f12, $f12, $f10 \n\t" \ 271 "mov.d $f10, $f0 \n\t" \ 272 "paddw $f0, $f0, $f2 \n\t" \ 273 "psubw $f10, $f10, $f2 \n\t" \ 274 "ldc1 $f2, 56(%2) \n\t" \ 275 "ldc1 $f16, 64(%2) \n\t" \ 276 "pmaddhw $f2, $f2, $f6 \n\t" \ 277 "pmaddhw $f4, $f4, $f16 \n\t" \ 278 "li $10, " #shift " \n\t" \ 279 "paddw $f14, $f14, $f2 \n\t" \ 280 "ldc1 $f2, 72(%2) \n\t" \ 281 "mtc1 $10, $f18 \n\t" \ 282 "pmaddhw $f2, $f2, $f6 \n\t" \ 283 "paddw $f14, $f14, $f8 \n\t" \ 284 "paddw $f8, $f8, $f8 \n\t" \ 285 "psubw $f8, $f8, $f14 \n\t" \ 286 "paddw $f2, $f2, $f4 \n\t" \ 287 "psraw $f14, $f14, $f18 \n\t" \ 288 "psraw $f8, $f8, $f18 \n\t" \ 289 "mov.d $f4, $f0 \n\t" \ 290 "paddw $f0, $f0, $f2 \n\t" \ 291 "psubw $f4, $f4, $f2 \n\t" \ 292 "psraw $f0, $f0, $f18 \n\t" \ 293 "psraw $f4, $f4, $f18 \n\t" \ 294 "packsswh $f14, $f14, $f14 \n\t" \ 295 "swc1 $f14, " #dst " \n\t" \ 296 "packsswh $f0, $f0, $f0 \n\t" \ 297 "swc1 $f0, 16+" #dst " \n\t" \ 298 "packsswh $f4, $f4, $f4 \n\t" \ 299 "swc1 $f4, 96+" #dst " \n\t" \ 300 "packsswh $f8, $f8, $f8 \n\t" \ 301 "swc1 $f8, 112+" #dst " \n\t" \ 302 "ldc1 $f0, " #src1 " \n\t" \ 303 "ldc1 $f8, 80(%2) \n\t" \ 304 "pmaddhw $f8, $f8, $f0 \n\t" \ 305 "ldc1 $f16, 96(%2) \n\t" \ 306 "ldc1 $f14, 88(%2) \n\t" \ 307 "pmaddhw $f0, $f0, $f16 \n\t" \ 308 "pmaddhw $f14, $f14, $f6 \n\t" \ 309 "ldc1 $f16, 104(%2) \n\t" \ 310 "mov.d $f4, $f10 \n\t" \ 311 "pmaddhw $f6, $f6, $f16 \n\t" \ 312 "paddw $f8, $f8, $f14 \n\t" \ 313 "paddw $f4, $f4, $f8 \n\t" \ 314 "psubw $f10, $f10, $f8 \n\t" \ 315 "psraw $f4, $f4, $f18 \n\t" \ 316 "psraw $f10, $f10, $f18 \n\t" \ 317 "mov.d $f8, $f12 \n\t" \ 318 "paddw $f6, $f6, $f0 \n\t" \ 319 "paddw $f12, $f12, $f6 \n\t" \ 320 "psubw $f8, $f8, $f6 \n\t" \ 321 "psraw $f12, $f12, $f18 \n\t" \ 322 "psraw $f8, $f8, $f18 \n\t" \ 323 "packsswh $f4, $f4, $f4 \n\t" \ 324 "packsswh $f12, $f12, $f12 \n\t" \ 325 "swc1 $f4, 32+" #dst " \n\t" \ 326 "packsswh $f8, $f8, $f8 \n\t" \ 327 "packsswh $f10, $f10, $f10 \n\t" \ 328 "swc1 $f12, 48+" #dst " \n\t" \ 329 "swc1 $f8, 64+" #dst " \n\t" \ 330 "swc1 $f10, 80+" #dst " \n\t" 333 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
334 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
335 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
336 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
341 Z_COND_IDCT(64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddw,(%2), 11, 6f)
342 Z_COND_IDCT(96(%0),104(%0),112(%0),120(%0), 96(%1),paddw,(%2), 11, 5f)
345 #define IDCT(src0, src4, src1, src5, dst, shift) \ 346 "ldc1 $f0, " #src0 " \n\t" \ 347 "ldc1 $f2, " #src4 " \n\t" \ 348 "ldc1 $f6, " #src5 " \n\t" \ 349 "ldc1 $f8, 16(%2) \n\t" \ 350 "pmaddhw $f8, $f8, $f0 \n\t" \ 351 "ldc1 $f10, 24(%2) \n\t" \ 352 "pmaddhw $f0, $f0, $f10 \n\t" \ 353 "ldc1 $f10, 32(%2) \n\t" \ 354 "pmaddhw $f10, $f10, $f2 \n\t" \ 355 "ldc1 $f12, 40(%2) \n\t" \ 356 "pmaddhw $f2, $f2, $f12 \n\t" \ 357 "mov.d $f12, $f8 \n\t" \ 358 "paddw $f8, $f8, $f10 \n\t" \ 359 "psubw $f12, $f12, $f10 \n\t" \ 360 "mov.d $f10, $f0 \n\t" \ 361 "paddw $f0, $f0, $f2 \n\t" \ 362 "psubw $f10, $f10, $f2 \n\t" \ 363 "ldc1 $f2, 56(%2) \n\t" \ 364 "li $10, " #shift " \n\t" \ 365 "pmaddhw $f2, $f2, $f6 \n\t" \ 366 "ldc1 $f14, 72(%2) \n\t" \ 367 "mtc1 $10, $f18 \n\t" \ 368 "pmaddhw $f14, $f14, $f6 \n\t" \ 369 "paddw $f2, $f2, $f8 \n\t" \ 370 "paddw $f8, $f8, $f8 \n\t" \ 371 "psubw $f8, $f8, $f2 \n\t" \ 372 "psraw $f2, $f2, $f18 \n\t" \ 373 "psraw $f8, $f8, $f18 \n\t" \ 374 "mov.d $f4, $f0 \n\t" \ 375 "paddw $f0, $f0, $f14 \n\t" \ 376 "psubw $f4, $f4, $f14 \n\t" \ 377 "psraw $f0, $f0, $f18 \n\t" \ 378 "psraw $f4, $f4, $f18 \n\t" \ 379 "packsswh $f2, $f2, $f2 \n\t" \ 380 "swc1 $f2, " #dst " \n\t" \ 381 "packsswh $f0, $f0, $f0 \n\t" \ 382 "swc1 $f0, 16+" #dst " \n\t" \ 383 "packsswh $f4, $f4, $f4 \n\t" \ 384 "swc1 $f4, 96+" #dst " \n\t" \ 385 "packsswh $f8, $f8, $f8 \n\t" \ 386 "swc1 $f8, 112+" #dst " \n\t" \ 387 "ldc1 $f2, 88(%2) \n\t" \ 388 "ldc1 $f16, 104(%2) \n\t" \ 389 "pmaddhw $f2, $f2, $f6 \n\t" \ 390 "mov.d $f4, $f10 \n\t" \ 391 "pmaddhw $f6, $f6, $f16 \n\t" \ 392 "paddw $f4, $f4, $f2 \n\t" \ 393 "psubw $f10, $f10, $f2 \n\t" \ 394 "psraw $f4, $f4, $f18 \n\t" \ 395 "psraw $f10, $f10, $f18 \n\t" \ 396 "mov.d $f2, $f12 \n\t" \ 397 "paddw $f12, $f12, $f6 \n\t" \ 398 "psubw $f2, $f2, $f6 \n\t" \ 399 "psraw $f12, $f12, $f18 \n\t" \ 400 "psraw $f2, $f2, $f18 \n\t" \ 401 "packsswh $f4, $f4, $f4 \n\t" \ 402 "packsswh $f12, $f12, $f12 \n\t" \ 403 "swc1 $f4, 32+" #dst " \n\t" \ 404 "packsswh $f2, $f2, $f2 \n\t" \ 405 "packsswh $f10, $f10, $f10 \n\t" \ 406 "swc1 $f12, 48+" #dst " \n\t" \ 407 "swc1 $f2, 64+" #dst " \n\t" \ 408 "swc1 $f10, 80+" #dst " \n\t" 411 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
412 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
413 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
414 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
419 Z_COND_IDCT(96(%0),104(%0),112(%0),120(%0), 96(%1),paddw,(%2), 11, 7f)
422 #define IDCT(src0, src4, src1, src5, dst, shift) \ 423 "ldc1 $f0, " #src0 " \n\t" \ 424 "ldc1 $f6, " #src5 " \n\t" \ 425 "ldc1 $f8, 16(%2) \n\t" \ 426 "pmaddhw $f8, $f8, $f0 \n\t" \ 427 "ldc1 $f10, 24(%2) \n\t" \ 428 "pmaddhw $f0, $f0, $f10 \n\t" \ 429 "mov.d $f12, $f8 \n\t" \ 430 "mov.d $f10, $f0 \n\t" \ 431 "ldc1 $f2, 56(%2) \n\t" \ 432 "pmaddhw $f2, $f2, $f6 \n\t" \ 433 "ldc1 $f14, 72(%2) \n\t" \ 434 "li $10, " #shift " \n\t" \ 435 "pmaddhw $f14, $f14, $f6 \n\t" \ 436 "paddw $f2, $f2, $f8 \n\t" \ 437 "mtc1 $10, $f18 \n\t" \ 438 "paddw $f8, $f8, $f8 \n\t" \ 439 "psubw $f8, $f8, $f2 \n\t" \ 440 "psraw $f2, $f2, $f18 \n\t" \ 441 "psraw $f8, $f8, $f18 \n\t" \ 442 "mov.d $f4, $f0 \n\t" \ 443 "paddw $f0, $f0, $f14 \n\t" \ 444 "psubw $f4, $f4, $f14 \n\t" \ 445 "psraw $f0, $f0, $f18 \n\t" \ 446 "psraw $f4, $f4, $f18 \n\t" \ 447 "packsswh $f2, $f2, $f2 \n\t" \ 448 "swc1 $f2, " #dst " \n\t" \ 449 "packsswh $f0, $f0, $f0 \n\t" \ 450 "swc1 $f0, 16+" #dst " \n\t" \ 451 "packsswh $f4, $f4, $f4 \n\t" \ 452 "swc1 $f4, 96+" #dst " \n\t" \ 453 "packsswh $f8, $f8, $f8 \n\t" \ 454 "swc1 $f8, 112+" #dst " \n\t" \ 455 "ldc1 $f2, 88(%2) \n\t" \ 456 "ldc1 $f16, 104(%2) \n\t" \ 457 "pmaddhw $f2, $f2, $f6 \n\t" \ 458 "mov.d $f4, $f10 \n\t" \ 459 "pmaddhw $f6, $f6, $f16 \n\t" \ 460 "paddw $f4, $f4, $f2 \n\t" \ 461 "psubw $f10, $f10, $f2 \n\t" \ 462 "psraw $f4, $f4, $f18 \n\t" \ 463 "psraw $f10, $f10, $f18 \n\t" \ 464 "mov.d $f2, $f12 \n\t" \ 465 "paddw $f12, $f12, $f6 \n\t" \ 466 "psubw $f2, $f2, $f6 \n\t" \ 467 "psraw $f12, $f12, $f18 \n\t" \ 468 "psraw $f2, $f2, $f18 \n\t" \ 469 "packsswh $f4, $f4, $f4 \n\t" \ 470 "packsswh $f12, $f12, $f12 \n\t" \ 471 "swc1 $f4, 32+" #dst " \n\t" \ 472 "packsswh $f2, $f2, $f2 \n\t" \ 473 "packsswh $f10, $f10, $f10 \n\t" \ 474 "swc1 $f12, 48+" #dst " \n\t" \ 475 "swc1 $f2, 64+" #dst " \n\t" \ 476 "swc1 $f10, 80+" #dst " \n\t" 479 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
480 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
481 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
482 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
487 Z_COND_IDCT(96(%0),104(%0),112(%0),120(%0), 96(%1),paddw,(%2), 11, 3f)
490 #define IDCT(src0, src4, src1, src5, dst, shift) \ 491 "ldc1 $f0, " #src0 " \n\t" \ 492 "ldc1 $f4, " #src1 " \n\t" \ 493 "ldc1 $f6, " #src5 " \n\t" \ 494 "ldc1 $f8, 16(%2) \n\t" \ 495 "pmaddhw $f8, $f8, $f0 \n\t" \ 496 "ldc1 $f10, 24(%2) \n\t" \ 497 "pmaddhw $f0, $f0, $f10 \n\t" \ 498 "mov.d $f12, $f8 \n\t" \ 499 "ldc1 $f14, 48(%2) \n\t" \ 500 "pmaddhw $f14, $f14, $f4 \n\t" \ 501 "mov.d $f10, $f0 \n\t" \ 502 "ldc1 $f2, 56(%2) \n\t" \ 503 "pmaddhw $f2, $f2, $f6 \n\t" \ 504 "ldc1 $f16, 64(%2) \n\t" \ 505 "pmaddhw $f4, $f4, $f16 \n\t" \ 506 "paddw $f14, $f14, $f2 \n\t" \ 507 "ldc1 $f2, 72(%2) \n\t" \ 508 "li $10, " #shift " \n\t" \ 509 "pmaddhw $f2, $f2, $f6 \n\t" \ 510 "paddw $f14, $f14, $f8 \n\t" \ 511 "mtc1 $10, $f18 \n\t" \ 512 "paddw $f8, $f8, $f8 \n\t" \ 513 "psubw $f8, $f8, $f14 \n\t" \ 514 "paddw $f2, $f2, $f4 \n\t" \ 515 "psraw $f14, $f14, $f18 \n\t" \ 516 "psraw $f8, $f8, $f18 \n\t" \ 517 "mov.d $f4, $f0 \n\t" \ 518 "paddw $f0, $f0, $f2 \n\t" \ 519 "psubw $f4, $f4, $f2 \n\t" \ 520 "psraw $f0, $f0, $f18 \n\t" \ 521 "psraw $f4, $f4, $f18 \n\t" \ 522 "packsswh $f14, $f14, $f14 \n\t" \ 523 "swc1 $f14, " #dst " \n\t" \ 524 "packsswh $f0, $f0, $f0 \n\t" \ 525 "swc1 $f0, 16+" #dst " \n\t" \ 526 "packsswh $f4, $f4, $f4 \n\t" \ 527 "swc1 $f4, 96+" #dst " \n\t" \ 528 "packsswh $f8, $f8, $f8 \n\t" \ 529 "swc1 $f8, 112+" #dst " \n\t" \ 530 "ldc1 $f0, " #src1 " \n\t" \ 531 "ldc1 $f8, 80(%2) \n\t" \ 532 "pmaddhw $f8, $f8, $f0 \n\t" \ 533 "ldc1 $f14, 88(%2) \n\t" \ 534 "ldc1 $f16, 96(%2) \n\t" \ 535 "pmaddhw $f0, $f0, $f16 \n\t" \ 536 "pmaddhw $f14, $f14, $f6 \n\t" \ 537 "mov.d $f4, $f10 \n\t" \ 538 "ldc1 $f16, 104(%2) \n\t" \ 539 "pmaddhw $f6, $f6, $f16 \n\t" \ 540 "paddw $f8, $f8, $f14 \n\t" \ 541 "paddw $f4, $f4, $f8 \n\t" \ 542 "psubw $f10, $f10, $f8 \n\t" \ 543 "psraw $f4, $f4, $f18 \n\t" \ 544 "psraw $f10, $f10, $f18 \n\t" \ 545 "mov.d $f8, $f12 \n\t" \ 546 "paddw $f6, $f6, $f0 \n\t" \ 547 "paddw $f12, $f12, $f6 \n\t" \ 548 "psubw $f8, $f8, $f6 \n\t" \ 549 "psraw $f12, $f12, $f18 \n\t" \ 550 "psraw $f8, $f8, $f18 \n\t" \ 551 "packsswh $f4, $f4, $f4 \n\t" \ 552 "packsswh $f12, $f12, $f12 \n\t" \ 553 "swc1 $f4, 32+" #dst " \n\t" \ 554 "packsswh $f8, $f8, $f8 \n\t" \ 555 "packsswh $f10, $f10, $f10 \n\t" \ 556 "swc1 $f12, 48+" #dst " \n\t" \ 557 "swc1 $f8, 64+" #dst " \n\t" \ 558 "swc1 $f10, 80+" #dst " \n\t" 561 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
562 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
563 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
564 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
571 #define IDCT(src0, src4, src1, src5, dst, shift) \ 572 "ldc1 $f0, " #src0 " \n\t" \ 573 "ldc1 $f4, " #src1 " \n\t" \ 574 "ldc1 $f8, 16(%2) \n\t" \ 575 "pmaddhw $f8, $f8, $f0 \n\t" \ 576 "ldc1 $f10, 24(%2) \n\t" \ 577 "pmaddhw $f0, $f0, $f10 \n\t" \ 578 "mov.d $f12, $f8 \n\t" \ 579 "ldc1 $f14, 48(%2) \n\t" \ 580 "pmaddhw $f14, $f14, $f4 \n\t" \ 581 "mov.d $f10, $f0 \n\t" \ 582 "ldc1 $f6, 64(%2) \n\t" \ 583 "pmaddhw $f6, $f6, $f4 \n\t" \ 584 "li $10, " #shift " \n\t" \ 585 "paddw $f14, $f14, $f8 \n\t" \ 586 "mtc1 $10, $f18 \n\t" \ 587 "paddw $f8, $f8, $f8 \n\t" \ 588 "psubw $f8, $f8, $f14 \n\t" \ 589 "psraw $f14, $f14, $f18 \n\t" \ 590 "psraw $f8, $f8, $f18 \n\t" \ 591 "mov.d $f2, $f0 \n\t" \ 592 "paddw $f0, $f0, $f6 \n\t" \ 593 "psubw $f2, $f2, $f6 \n\t" \ 594 "psraw $f0, $f0, $f18 \n\t" \ 595 "psraw $f2, $f2, $f18 \n\t" \ 596 "packsswh $f14, $f14, $f14 \n\t" \ 597 "swc1 $f14, " #dst " \n\t" \ 598 "packsswh $f0, $f0, $f0 \n\t" \ 599 "swc1 $f0, 16+" #dst " \n\t" \ 600 "packsswh $f2, $f2, $f2 \n\t" \ 601 "swc1 $f2, 96+" #dst " \n\t" \ 602 "packsswh $f8, $f8, $f8 \n\t" \ 603 "swc1 $f8, 112+" #dst " \n\t" \ 604 "ldc1 $f8, 80(%2) \n\t" \ 605 "ldc1 $f16, 96(%2) \n\t" \ 606 "pmaddhw $f8, $f8, $f4 \n\t" \ 607 "pmaddhw $f4, $f4, $f16 \n\t" \ 608 "mov.d $f2, $f10 \n\t" \ 609 "paddw $f2, $f2, $f8 \n\t" \ 610 "psubw $f10, $f10, $f8 \n\t" \ 611 "psraw $f2, $f2, $f18 \n\t" \ 612 "psraw $f10, $f10, $f18 \n\t" \ 613 "mov.d $f8, $f12 \n\t" \ 614 "paddw $f12, $f12, $f4 \n\t" \ 615 "psubw $f8, $f8, $f4 \n\t" \ 616 "psraw $f12, $f12, $f18 \n\t" \ 617 "psraw $f8, $f8, $f18 \n\t" \ 618 "packsswh $f2, $f2, $f2 \n\t" \ 619 "packsswh $f12, $f12, $f12 \n\t" \ 620 "swc1 $f2, 32+" #dst " \n\t" \ 621 "packsswh $f8, $f8, $f8 \n\t" \ 622 "packsswh $f10, $f10, $f10 \n\t" \ 623 "swc1 $f12, 48+" #dst " \n\t" \ 624 "swc1 $f8, 64+" #dst " \n\t" \ 625 "swc1 $f10, 80+" #dst " \n\t" 628 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
629 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
630 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
631 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
638 #define IDCT(src0, src4, src1, src5, dst, shift) \ 639 "ldc1 $f0, " #src0 " \n\t" \ 640 "ldc1 $f2, " #src4 " \n\t" \ 641 "ldc1 $f8, 16(%2) \n\t" \ 642 "pmaddhw $f8, $f8, $f0 \n\t" \ 643 "ldc1 $f10, 24(%2) \n\t" \ 644 "pmaddhw $f0, $f0, $f10 \n\t" \ 645 "ldc1 $f10, 32(%2) \n\t" \ 646 "pmaddhw $f10, $f10, $f2 \n\t" \ 647 "ldc1 $f12, 40(%2) \n\t" \ 648 "pmaddhw $f2, $f2, $f12 \n\t" \ 649 "mov.d $f12, $f8 \n\t" \ 650 "paddw $f8, $f8, $f10 \n\t" \ 651 "psubw $f12, $f12, $f10 \n\t" \ 652 "mov.d $f10, $f0 \n\t" \ 653 "paddw $f0, $f0, $f2 \n\t" \ 654 "psubw $f10, $f10, $f2 \n\t" \ 655 "ldc1 $f4, 8+" #src0 " \n\t" \ 656 "ldc1 $f6, 8+" #src4 " \n\t" \ 657 "ldc1 $f2, 16(%2) \n\t" \ 658 "pmaddhw $f2, $f2, $f4 \n\t" \ 659 "ldc1 $f14, 24(%2) \n\t" \ 660 "pmaddhw $f4, $f4, $f14 \n\t" \ 661 "ldc1 $f14, 32(%2) \n\t" \ 662 "ldc1 $f16, 40(%2) \n\t" \ 663 "pmaddhw $f14, $f14, $f6 \n\t" \ 664 "pmaddhw $f6, $f6, $f16 \n\t" \ 665 "paddw $f14, $f14, $f2 \n\t" \ 666 "paddw $f2, $f2, $f2 \n\t" \ 667 "psubw $f2, $f2, $f14 \n\t" \ 668 "li $10, " #shift " \n\t" \ 669 "paddw $f6, $f6, $f4 \n\t" \ 670 "mtc1 $10, $f18 \n\t" \ 671 "paddw $f4, $f4, $f4 \n\t" \ 672 "psubw $f4, $f4, $f6 \n\t" \ 673 "psraw $f8, $f8, $f18 \n\t" \ 674 "psraw $f14, $f14, $f18 \n\t" \ 675 "psraw $f6, $f6, $f18 \n\t" \ 676 "packsswh $f8, $f8, $f14 \n\t" \ 677 "sdc1 $f8, " #dst " \n\t" \ 678 "psraw $f0, $f0, $f18 \n\t" \ 679 "packsswh $f0, $f0, $f6 \n\t" \ 680 "sdc1 $f0, 16+" #dst " \n\t" \ 681 "sdc1 $f0, 96+" #dst " \n\t" \ 682 "sdc1 $f8, 112+" #dst " \n\t" \ 683 "psraw $f10, $f10, $f18 \n\t" \ 684 "psraw $f12, $f12, $f18 \n\t" \ 685 "psraw $f4, $f4, $f18 \n\t" \ 686 "packsswh $f10, $f10, $f4 \n\t" \ 687 "sdc1 $f10, 32+" #dst " \n\t" \ 688 "psraw $f2, $f2, $f18 \n\t" \ 689 "packsswh $f12, $f12, $f2 \n\t" \ 690 "sdc1 $f12, 48+" #dst " \n\t" \ 691 "sdc1 $f12, 64+" #dst " \n\t" \ 692 "sdc1 $f10, 80+" #dst " \n\t" 695 IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
696 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
703 #define IDCT(src0, src4, src1, src5, dst, shift) \ 704 "ldc1 $f0, " #src0 " \n\t" \ 705 "ldc1 $f2, " #src4 " \n\t" \ 706 "ldc1 $f4, " #src1 " \n\t" \ 707 "ldc1 $f8, 16(%2) \n\t" \ 708 "li $10, " #shift " \n\t" \ 709 "pmaddhw $f8, $f8, $f0 \n\t" \ 710 "ldc1 $f10, 24(%2) \n\t" \ 711 "mtc1 $10, $f18 \n\t" \ 712 "pmaddhw $f0, $f0, $f10 \n\t" \ 713 "ldc1 $f10, 32(%2) \n\t" \ 714 "pmaddhw $f10, $f10, $f2 \n\t" \ 715 "ldc1 $f12, 40(%2) \n\t" \ 716 "pmaddhw $f2, $f2, $f12 \n\t" \ 717 "mov.d $f12, $f8 \n\t" \ 718 "ldc1 $f14, 48(%2) \n\t" \ 719 "pmaddhw $f14, $f14, $f4 \n\t" \ 720 "paddw $f8, $f8, $f10 \n\t" \ 721 "psubw $f12, $f12, $f10 \n\t" \ 722 "mov.d $f10, $f0 \n\t" \ 723 "paddw $f0, $f0, $f2 \n\t" \ 724 "psubw $f10, $f10, $f2 \n\t" \ 725 "ldc1 $f2, 64(%2) \n\t" \ 726 "pmaddhw $f2, $f2, $f4 \n\t" \ 727 "paddw $f14, $f14, $f8 \n\t" \ 728 "paddw $f8, $f8, $f8 \n\t" \ 729 "psubw $f8, $f8, $f14 \n\t" \ 730 "psraw $f14, $f14, $f18 \n\t" \ 731 "psraw $f8, $f8, $f18 \n\t" \ 732 "mov.d $f6, $f0 \n\t" \ 733 "paddw $f0, $f0, $f2 \n\t" \ 734 "psubw $f6, $f6, $f2 \n\t" \ 735 "psraw $f0, $f0, $f18 \n\t" \ 736 "psraw $f6, $f6, $f18 \n\t" \ 737 "packsswh $f14, $f14, $f14 \n\t" \ 738 "swc1 $f14, " #dst " \n\t" \ 739 "packsswh $f0, $f0, $f0 \n\t" \ 740 "swc1 $f0, 16+" #dst " \n\t" \ 741 "packsswh $f6, $f6, $f6 \n\t" \ 742 "swc1 $f6, 96+" #dst " \n\t" \ 743 "packsswh $f8, $f8, $f8 \n\t" \ 744 "swc1 $f8, 112+" #dst " \n\t" \ 745 "ldc1 $f8, 80(%2) \n\t" \ 746 "ldc1 $f16, 96(%2) \n\t" \ 747 "pmaddhw $f8, $f8, $f4 \n\t" \ 748 "pmaddhw $f4, $f4, $f16 \n\t" \ 749 "mov.d $f6, $f10 \n\t" \ 750 "paddw $f6, $f6, $f8 \n\t" \ 751 "psubw $f10, $f10, $f8 \n\t" \ 752 "psraw $f6, $f6, $f18 \n\t" \ 753 "psraw $f10, $f10, $f18 \n\t" \ 754 "mov.d $f8, $f12 \n\t" \ 755 "paddw $f12, $f12, $f4 \n\t" \ 756 "psubw $f8, $f8, $f4 \n\t" \ 757 "psraw $f12, $f12, $f18 \n\t" \ 758 "packsswh $f6, $f6, $f6 \n\t" \ 759 "swc1 $f6, 32+" #dst " \n\t" \ 760 "psraw $f8, $f8, $f18 \n\t" \ 761 "packsswh $f12, $f12, $f12 \n\t" \ 762 "swc1 $f12, 48+" #dst " \n\t" \ 763 "packsswh $f8, $f8, $f8 \n\t" \ 764 "packsswh $f10, $f10, $f10 \n\t" \ 765 "swc1 $f8, 64+" #dst " \n\t" \ 766 "swc1 $f10, 80+" #dst " \n\t" 769 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
770 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
771 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
772 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
779 #define IDCT(src0, src4, src1, src5, dst, shift) \ 780 "ldc1 $f0, " #src0 " \n\t" \ 781 "ldc1 $f8, 16(%2) \n\t" \ 782 "li $10, " #shift " \n\t" \ 783 "pmaddhw $f8, $f8, $f0 \n\t" \ 784 "mtc1 $10, $f18 \n\t" \ 785 "ldc1 $f10, 24(%2) \n\t" \ 786 "pmaddhw $f0, $f0, $f10 \n\t" \ 787 "psraw $f8, $f8, $f18 \n\t" \ 788 "psraw $f0, $f0, $f18 \n\t" \ 789 "ldc1 $f4, 8+" #src0 " \n\t" \ 790 "ldc1 $f2, 16(%2) \n\t" \ 791 "pmaddhw $f2, $f2, $f4 \n\t" \ 792 "ldc1 $f14, 24(%2) \n\t" \ 793 "pmaddhw $f4, $f4, $f14 \n\t" \ 794 "ldc1 $f14, 32(%2) \n\t" \ 795 "psraw $f2, $f2, $f18 \n\t" \ 796 "packsswh $f8, $f8, $f2 \n\t" \ 797 "sdc1 $f8, " #dst " \n\t" \ 798 "psraw $f4, $f4, $f18 \n\t" \ 799 "packsswh $f0, $f0, $f4 \n\t" \ 800 "sdc1 $f0, 16+" #dst " \n\t" \ 801 "sdc1 $f0, 96+" #dst " \n\t" \ 802 "sdc1 $f8, 112+" #dst " \n\t" \ 803 "sdc1 $f0, 32+" #dst " \n\t" \ 804 "sdc1 $f8, 48+" #dst " \n\t" \ 805 "sdc1 $f8, 64+" #dst " \n\t" \ 806 "sdc1 $f0, 80+" #dst " \n\t" 809 IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
810 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
#define DC_COND_IDCT(src0, src4, src1, src5, dst, rounder, rarg, shift)
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
#define Z_COND_IDCT(src0, src4, src1, src5, dst, rounder, rarg, shift, bt)
#define IDCT(src0, src4, src1, src5, dst, shift)
void ff_simple_idct_mmi(int16_t *block)
static const int16_t coeffs[]