34 # define RENAME(a) a ## _C 36 # define TEMPLATE_PP_C 0 39 #ifdef TEMPLATE_PP_ALTIVEC 40 # define RENAME(a) a ## _altivec 42 # define TEMPLATE_PP_ALTIVEC 0 45 #ifdef TEMPLATE_PP_MMX 46 # define RENAME(a) a ## _MMX 48 # define TEMPLATE_PP_MMX 0 51 #ifdef TEMPLATE_PP_MMXEXT 52 # undef TEMPLATE_PP_MMX 53 # define TEMPLATE_PP_MMX 1 54 # define RENAME(a) a ## _MMX2 56 # define TEMPLATE_PP_MMXEXT 0 59 #ifdef TEMPLATE_PP_3DNOW 60 # undef TEMPLATE_PP_MMX 61 # define TEMPLATE_PP_MMX 1 62 # define RENAME(a) a ## _3DNow 64 # define TEMPLATE_PP_3DNOW 0 67 #ifdef TEMPLATE_PP_SSE2 68 # undef TEMPLATE_PP_MMX 69 # define TEMPLATE_PP_MMX 1 70 # undef TEMPLATE_PP_MMXEXT 71 # define TEMPLATE_PP_MMXEXT 1 72 # define RENAME(a) a ## _SSE2 74 # define TEMPLATE_PP_SSE2 0 82 #if TEMPLATE_PP_MMXEXT 83 #define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t" 84 #elif TEMPLATE_PP_3DNOW 85 #define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t" 87 #define PAVGB(a,b) REAL_PAVGB(a,b) 89 #if TEMPLATE_PP_MMXEXT 90 #define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t" 92 #define PMINUB(b,a,t) \ 93 "movq " #a ", " #t " \n\t"\ 94 "psubusb " #b ", " #t " \n\t"\ 95 "psubb " #t ", " #a " \n\t" 98 #if TEMPLATE_PP_MMXEXT 99 #define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t" 100 #elif TEMPLATE_PP_MMX 101 #define PMAXUB(a,b) \ 102 "psubusb " #a ", " #b " \n\t"\ 103 "paddb " #a ", " #b " \n\t" 115 "movq %0, %%mm7 \n\t" 116 "movq %1, %%mm6 \n\t" 117 : :
"m" (
c->mmxDcOffset[
c->nonBQP]),
"m" (
c->mmxDcThreshold[
c->nonBQP])
121 "lea (%2, %3), %%"FF_REG_a
" \n\t" 125 "movq (%2), %%mm0 \n\t" 126 "movq (%%"FF_REG_a
"), %%mm1 \n\t" 127 "movq %%mm0, %%mm3 \n\t" 128 "movq %%mm0, %%mm4 \n\t" 130 PMINUB(%%mm1, %%mm3, %%mm5)
131 "psubb %%mm1, %%mm0 \n\t" 132 "paddb %%mm7, %%mm0 \n\t" 133 "pcmpgtb %%mm6, %%mm0 \n\t" 135 "movq (%%"FF_REG_a
",%3), %%mm2 \n\t" 137 PMINUB(%%mm2, %%mm3, %%mm5)
138 "psubb %%mm2, %%mm1 \n\t" 139 "paddb %%mm7, %%mm1 \n\t" 140 "pcmpgtb %%mm6, %%mm1 \n\t" 141 "paddb %%mm1, %%mm0 \n\t" 143 "movq (%%"FF_REG_a
", %3, 2), %%mm1 \n\t" 145 PMINUB(%%mm1, %%mm3, %%mm5)
146 "psubb %%mm1, %%mm2 \n\t" 147 "paddb %%mm7, %%mm2 \n\t" 148 "pcmpgtb %%mm6, %%mm2 \n\t" 149 "paddb %%mm2, %%mm0 \n\t" 151 "lea (%%"FF_REG_a
", %3, 4), %%"FF_REG_a
"\n\t" 153 "movq (%2, %3, 4), %%mm2 \n\t" 155 PMINUB(%%mm2, %%mm3, %%mm5)
156 "psubb %%mm2, %%mm1 \n\t" 157 "paddb %%mm7, %%mm1 \n\t" 158 "pcmpgtb %%mm6, %%mm1 \n\t" 159 "paddb %%mm1, %%mm0 \n\t" 161 "movq (%%"FF_REG_a
"), %%mm1 \n\t" 163 PMINUB(%%mm1, %%mm3, %%mm5)
164 "psubb %%mm1, %%mm2 \n\t" 165 "paddb %%mm7, %%mm2 \n\t" 166 "pcmpgtb %%mm6, %%mm2 \n\t" 167 "paddb %%mm2, %%mm0 \n\t" 169 "movq (%%"FF_REG_a
", %3), %%mm2 \n\t" 171 PMINUB(%%mm2, %%mm3, %%mm5)
172 "psubb %%mm2, %%mm1 \n\t" 173 "paddb %%mm7, %%mm1 \n\t" 174 "pcmpgtb %%mm6, %%mm1 \n\t" 175 "paddb %%mm1, %%mm0 \n\t" 177 "movq (%%"FF_REG_a
", %3, 2), %%mm1 \n\t" 179 PMINUB(%%mm1, %%mm3, %%mm5)
180 "psubb %%mm1, %%mm2 \n\t" 181 "paddb %%mm7, %%mm2 \n\t" 182 "pcmpgtb %%mm6, %%mm2 \n\t" 183 "paddb %%mm2, %%mm0 \n\t" 184 "psubusb %%mm3, %%mm4 \n\t" 187 #if TEMPLATE_PP_MMXEXT 188 "pxor %%mm7, %%mm7 \n\t" 189 "psadbw %%mm7, %%mm0 \n\t" 191 "movq %%mm0, %%mm1 \n\t" 192 "psrlw $8, %%mm0 \n\t" 193 "paddb %%mm1, %%mm0 \n\t" 194 "movq %%mm0, %%mm1 \n\t" 195 "psrlq $16, %%mm0 \n\t" 196 "paddb %%mm1, %%mm0 \n\t" 197 "movq %%mm0, %%mm1 \n\t" 198 "psrlq $32, %%mm0 \n\t" 199 "paddb %%mm1, %%mm0 \n\t" 201 "movq %4, %%mm7 \n\t" 202 "paddusb %%mm7, %%mm7 \n\t" 203 "psubusb %%mm7, %%mm4 \n\t" 204 "packssdw %%mm4, %%mm4 \n\t" 205 "movd %%mm0, %0 \n\t" 206 "movd %%mm4, %1 \n\t" 208 :
"=r" (numEq),
"=r" (dcOk)
213 numEq= (-numEq) &0xFF;
214 if(numEq >
c->ppMode.flatnessThreshold){
221 #endif //TEMPLATE_PP_MMX 227 #if !TEMPLATE_PP_ALTIVEC 230 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 233 "movq %2, %%mm0 \n\t" 234 "pxor %%mm4, %%mm4 \n\t" 236 "movq (%0), %%mm6 \n\t" 237 "movq (%0, %1), %%mm5 \n\t" 238 "movq %%mm5, %%mm1 \n\t" 239 "movq %%mm6, %%mm2 \n\t" 240 "psubusb %%mm6, %%mm5 \n\t" 241 "psubusb %%mm1, %%mm2 \n\t" 242 "por %%mm5, %%mm2 \n\t" 243 "psubusb %%mm0, %%mm2 \n\t" 244 "pcmpeqb %%mm4, %%mm2 \n\t" 246 "pand %%mm2, %%mm6 \n\t" 247 "pandn %%mm1, %%mm2 \n\t" 248 "por %%mm2, %%mm6 \n\t" 250 "movq (%0, %1, 8), %%mm5 \n\t" 251 "lea (%0, %1, 4), %%"FF_REG_a
" \n\t" 252 "lea (%0, %1, 8), %%"FF_REG_c
" \n\t" 253 "sub %1, %%"FF_REG_c
" \n\t" 255 "movq (%0, %1, 8), %%mm7 \n\t" 256 "movq %%mm5, %%mm1 \n\t" 257 "movq %%mm7, %%mm2 \n\t" 258 "psubusb %%mm7, %%mm5 \n\t" 259 "psubusb %%mm1, %%mm2 \n\t" 260 "por %%mm5, %%mm2 \n\t" 261 "psubusb %%mm0, %%mm2 \n\t" 262 "pcmpeqb %%mm4, %%mm2 \n\t" 264 "pand %%mm2, %%mm7 \n\t" 265 "pandn %%mm1, %%mm2 \n\t" 266 "por %%mm2, %%mm7 \n\t" 275 "movq (%0, %1), %%mm0 \n\t" 276 "movq %%mm0, %%mm1 \n\t" 280 "movq (%0, %1, 4), %%mm2 \n\t" 281 "movq %%mm2, %%mm5 \n\t" 282 PAVGB((%%FF_REGa), %%mm2)
283 PAVGB((%0, %1, 2), %%mm2)
284 "movq %%mm2, %%mm3 \n\t" 285 "movq (%0), %%mm4 \n\t" 288 "movq %%mm3, (%0) \n\t" 290 "movq %%mm1, %%mm0 \n\t" 292 "movq %%mm4, %%mm3 \n\t" 293 PAVGB((%0,%1,2), %%mm3)
294 PAVGB((%%FF_REGa,%1,2), %%mm5)
295 PAVGB((%%FF_REGa), %%mm5)
298 "movq %%mm3, (%0,%1) \n\t" 301 "movq (%%"FF_REG_c
"), %%mm0 \n\t" 302 PAVGB((%%FF_REGa, %1, 2), %%mm0)
303 "movq %%mm0, %%mm3 \n\t" 307 "movq (%0, %1, 2), %%mm2 \n\t" 308 "movq %%mm0, (%0, %1, 2) \n\t" 310 "movq (%%"FF_REG_a
", %1, 4), %%mm0 \n\t" 311 PAVGB((%%FF_REGc), %%mm0)
317 "movq (%%"FF_REG_a
"), %%mm5 \n\t" 318 "movq %%mm6, (%%"FF_REG_a
") \n\t" 320 "movq (%%"FF_REG_a
", %1, 4), %%mm6 \n\t" 325 "movq (%0, %1, 4), %%mm4 \n\t" 328 "movq %%mm6, (%0, %1, 4) \n\t" 333 "movq (%%"FF_REG_a
", %1, 2), %%mm6 \n\t" 336 "movq %%mm1, (%%"FF_REG_a
", %1, 2) \n\t" 338 PAVGB((%%FF_REGc), %%mm2)
339 "movq (%%"FF_REG_a
", %1, 4), %%mm0 \n\t" 343 "movq %%mm6, (%%"FF_REG_c
") \n\t" 350 "movq %%mm5, (%%"FF_REG_a
", %1, 4) \n\t" 355 :
"%"FF_REG_a,
"%"FF_REG_c
357 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 359 const int l2=
stride + l1;
360 const int l3=
stride + l2;
361 const int l4=
stride + l3;
362 const int l5=
stride + l4;
363 const int l6=
stride + l5;
364 const int l7=
stride + l6;
365 const int l8=
stride + l7;
366 const int l9=
stride + l8;
374 sums[0] = 4*first +
src[l1] +
src[l2] +
src[l3] + 4;
375 sums[1] = sums[0] - first + src[l4];
376 sums[2] = sums[1] - first + src[l5];
377 sums[3] = sums[2] - first + src[l6];
378 sums[4] = sums[3] - first + src[l7];
379 sums[5] = sums[4] - src[l1] + src[l8];
380 sums[6] = sums[5] - src[l2] + last;
381 sums[7] = sums[6] - src[l3] + last;
382 sums[8] = sums[7] - src[l4] + last;
383 sums[9] = sums[8] - src[l5] + last;
385 src[l1]= (sums[0] + sums[2] + 2*src[l1])>>4;
386 src[l2]= (sums[1] + sums[3] + 2*src[l2])>>4;
387 src[l3]= (sums[2] + sums[4] + 2*src[l3])>>4;
388 src[l4]= (sums[3] + sums[5] + 2*src[l4])>>4;
389 src[l5]= (sums[4] + sums[6] + 2*src[l5])>>4;
390 src[l6]= (sums[5] + sums[7] + 2*src[l6])>>4;
391 src[l7]= (sums[6] + sums[8] + 2*src[l7])>>4;
392 src[l8]= (sums[7] + sums[9] + 2*src[l8])>>4;
396 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 398 #endif //TEMPLATE_PP_ALTIVEC 409 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 413 "pxor %%mm7, %%mm7 \n\t" 414 "lea (%0, %1), %%"FF_REG_a
" \n\t" 415 "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_c
"\n\t" 418 "movq (%%"FF_REG_a
", %1, 2), %%mm0 \n\t" 419 "movq (%0, %1, 4), %%mm1 \n\t" 420 "movq %%mm1, %%mm2 \n\t" 421 "psubusb %%mm0, %%mm1 \n\t" 422 "psubusb %%mm2, %%mm0 \n\t" 423 "por %%mm1, %%mm0 \n\t" 424 "movq (%%"FF_REG_c
"), %%mm3 \n\t" 425 "movq (%%"FF_REG_c
", %1), %%mm4 \n\t" 426 "movq %%mm3, %%mm5 \n\t" 427 "psubusb %%mm4, %%mm3 \n\t" 428 "psubusb %%mm5, %%mm4 \n\t" 429 "por %%mm4, %%mm3 \n\t" 431 "movq %%mm2, %%mm1 \n\t" 432 "psubusb %%mm5, %%mm2 \n\t" 433 "movq %%mm2, %%mm4 \n\t" 434 "pcmpeqb %%mm7, %%mm2 \n\t" 435 "psubusb %%mm1, %%mm5 \n\t" 436 "por %%mm5, %%mm4 \n\t" 437 "psubusb %%mm0, %%mm4 \n\t" 438 "movq %%mm4, %%mm3 \n\t" 439 "movq %2, %%mm0 \n\t" 440 "paddusb %%mm0, %%mm0 \n\t" 441 "psubusb %%mm0, %%mm4 \n\t" 442 "pcmpeqb %%mm7, %%mm4 \n\t" 443 "psubusb "MANGLE(b01)
", %%mm3 \n\t" 444 "pand %%mm4, %%mm3 \n\t" 447 "movq %%mm3, %%mm1 \n\t" 451 "movq (%0, %1, 4), %%mm0 \n\t" 452 "pxor %%mm2, %%mm0 \n\t" 453 "psubusb %%mm3, %%mm0 \n\t" 454 "pxor %%mm2, %%mm0 \n\t" 455 "movq %%mm0, (%0, %1, 4) \n\t" 457 "movq (%%"FF_REG_c
"), %%mm0 \n\t" 458 "pxor %%mm2, %%mm0 \n\t" 459 "paddusb %%mm3, %%mm0 \n\t" 460 "pxor %%mm2, %%mm0 \n\t" 461 "movq %%mm0, (%%"FF_REG_c
") \n\t" 465 "movq (%%"FF_REG_a
", %1, 2), %%mm0 \n\t" 466 "pxor %%mm2, %%mm0 \n\t" 467 "psubusb %%mm1, %%mm0 \n\t" 468 "pxor %%mm2, %%mm0 \n\t" 469 "movq %%mm0, (%%"FF_REG_a
", %1, 2) \n\t" 471 "movq (%%"FF_REG_c
", %1), %%mm0 \n\t" 472 "pxor %%mm2, %%mm0 \n\t" 473 "paddusb %%mm1, %%mm0 \n\t" 474 "pxor %%mm2, %%mm0 \n\t" 475 "movq %%mm0, (%%"FF_REG_c
", %1) \n\t" 479 "movq (%%"FF_REG_a
", %1), %%mm0 \n\t" 480 "pxor %%mm2, %%mm0 \n\t" 481 "psubusb %%mm1, %%mm0 \n\t" 482 "pxor %%mm2, %%mm0 \n\t" 483 "movq %%mm0, (%%"FF_REG_a
", %1) \n\t" 485 "movq (%%"FF_REG_c
", %1, 2), %%mm0 \n\t" 486 "pxor %%mm2, %%mm0 \n\t" 487 "paddusb %%mm1, %%mm0 \n\t" 488 "pxor %%mm2, %%mm0 \n\t" 489 "movq %%mm0, (%%"FF_REG_c
", %1, 2) \n\t" 494 :
"%"FF_REG_a,
"%"FF_REG_c
496 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 499 const int l2=
stride + l1;
500 const int l3=
stride + l2;
501 const int l4=
stride + l3;
502 const int l5=
stride + l4;
503 const int l6=
stride + l5;
504 const int l7=
stride + l6;
512 int b= src[l4] - src[l5];
513 int c= src[l5] - src[l6];
530 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 533 #if !TEMPLATE_PP_ALTIVEC 536 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 554 #if 0 //slightly more accurate and slightly slower 555 "pxor %%mm7, %%mm7 \n\t" 556 "lea (%0, %1), %%"FF_REG_a
" \n\t" 557 "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_c
"\n\t" 563 "movq (%0, %1, 2), %%mm0 \n\t" 564 "movq (%0), %%mm1 \n\t" 565 "movq %%mm0, %%mm2 \n\t" 570 "movq (%%"FF_REG_a
"), %%mm1 \n\t" 571 "movq (%%"FF_REG_a
", %1, 2), %%mm3 \n\t" 572 "movq %%mm1, %%mm4 \n\t" 577 "movq %%mm0, %%mm4 \n\t" 578 "psubusb %%mm1, %%mm0 \n\t" 579 "psubusb %%mm4, %%mm1 \n\t" 580 "por %%mm0, %%mm1 \n\t" 583 "movq (%0, %1, 4), %%mm0 \n\t" 584 "movq %%mm0, %%mm4 \n\t" 589 "movq (%%"FF_REG_c
"), %%mm2 \n\t" 590 "movq %%mm3, %%mm5 \n\t" 595 "movq %%mm0, %%mm6 \n\t" 596 "psubusb %%mm3, %%mm0 \n\t" 597 "psubusb %%mm6, %%mm3 \n\t" 598 "por %%mm0, %%mm3 \n\t" 599 "pcmpeqb %%mm7, %%mm0 \n\t" 602 "movq (%%"FF_REG_c
", %1), %%mm6 \n\t" 603 "movq %%mm6, %%mm5 \n\t" 608 "movq (%%"FF_REG_c
", %1, 2), %%mm5 \n\t" 609 "movq %%mm2, %%mm4 \n\t" 614 "movq %%mm6, %%mm4 \n\t" 615 "psubusb %%mm2, %%mm6 \n\t" 616 "psubusb %%mm4, %%mm2 \n\t" 617 "por %%mm6, %%mm2 \n\t" 621 PMINUB(%%mm2, %%mm1, %%mm4)
622 "movq %2, %%mm4 \n\t" 623 "paddusb "MANGLE(b01)
", %%mm4 \n\t" 624 "pcmpgtb %%mm3, %%mm4 \n\t" 625 "psubusb %%mm1, %%mm3 \n\t" 626 "pand %%mm4, %%mm3 \n\t" 628 "movq %%mm3, %%mm1 \n\t" 632 "paddusb %%mm1, %%mm3 \n\t" 635 "movq (%%"FF_REG_a
", %1, 2), %%mm6 \n\t" 636 "movq (%0, %1, 4), %%mm5 \n\t" 637 "movq (%0, %1, 4), %%mm4 \n\t" 638 "psubusb %%mm6, %%mm5 \n\t" 639 "psubusb %%mm4, %%mm6 \n\t" 640 "por %%mm6, %%mm5 \n\t" 641 "pcmpeqb %%mm7, %%mm6 \n\t" 642 "pxor %%mm6, %%mm0 \n\t" 643 "pand %%mm0, %%mm3 \n\t" 644 PMINUB(%%mm5, %%mm3, %%mm0)
646 "psubusb "MANGLE(b01)
", %%mm3 \n\t" 649 "movq (%%"FF_REG_a
", %1, 2), %%mm0 \n\t" 650 "movq (%0, %1, 4), %%mm2 \n\t" 651 "pxor %%mm6, %%mm0 \n\t" 652 "pxor %%mm6, %%mm2 \n\t" 653 "psubb %%mm3, %%mm0 \n\t" 654 "paddb %%mm3, %%mm2 \n\t" 655 "pxor %%mm6, %%mm0 \n\t" 656 "pxor %%mm6, %%mm2 \n\t" 657 "movq %%mm0, (%%"FF_REG_a
", %1, 2) \n\t" 658 "movq %%mm2, (%0, %1, 4) \n\t" 661 "lea (%0, %1), %%"FF_REG_a
" \n\t" 662 "pcmpeqb %%mm6, %%mm6 \n\t" 668 "movq (%%"FF_REG_a
", %1, 2), %%mm1 \n\t" 669 "movq (%0, %1, 4), %%mm0 \n\t" 670 "pxor %%mm6, %%mm1 \n\t" 674 "movq (%%"FF_REG_a
", %1, 4), %%mm2 \n\t" 675 "movq (%%"FF_REG_a
", %1), %%mm3 \n\t" 676 "pxor %%mm6, %%mm2 \n\t" 677 "movq %%mm2, %%mm5 \n\t" 678 "movq "MANGLE(b80)
", %%mm4 \n\t" 679 "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_c
"\n\t" 686 "movq (%%"FF_REG_a
"), %%mm2 \n\t" 687 "pxor %%mm6, %%mm2 \n\t" 690 "movq "MANGLE(b80)
", %%mm3 \n\t" 696 PAVGB((%%FF_REGc, %1), %%mm5)
697 "movq (%%"FF_REG_c
", %1, 2), %%mm1 \n\t" 698 "pxor %%mm6, %%mm1 \n\t" 699 PAVGB((%0, %1, 4), %%mm1)
700 "movq "MANGLE(b80)
", %%mm2 \n\t" 706 "movq "MANGLE(b00)
", %%mm1 \n\t" 707 "movq "MANGLE(b00)
", %%mm5 \n\t" 708 "psubb %%mm2, %%mm1 \n\t" 709 "psubb %%mm3, %%mm5 \n\t" 712 PMINUB(%%mm2, %%mm3, %%mm1)
716 "movq "MANGLE(b00)
", %%mm7 \n\t" 717 "movq %2, %%mm2 \n\t" 719 "psubb %%mm6, %%mm2 \n\t" 721 "movq %%mm4, %%mm1 \n\t" 722 "pcmpgtb %%mm7, %%mm1 \n\t" 723 "pxor %%mm1, %%mm4 \n\t" 724 "psubb %%mm1, %%mm4 \n\t" 725 "pcmpgtb %%mm4, %%mm2 \n\t" 726 "psubusb %%mm3, %%mm4 \n\t" 729 "movq %%mm4, %%mm3 \n\t" 730 "psubusb "MANGLE(b01)
", %%mm4 \n\t" 733 "paddb %%mm3, %%mm4 \n\t" 734 "pand %%mm2, %%mm4 \n\t" 736 "movq "MANGLE(b80)
", %%mm5 \n\t" 737 "psubb %%mm0, %%mm5 \n\t" 738 "paddsb %%mm6, %%mm5 \n\t" 739 "pcmpgtb %%mm5, %%mm7 \n\t" 740 "pxor %%mm7, %%mm5 \n\t" 742 PMINUB(%%mm5, %%mm4, %%mm3)
743 "pxor %%mm1, %%mm7 \n\t" 745 "pand %%mm7, %%mm4 \n\t" 746 "movq (%%"FF_REG_a
", %1, 2), %%mm0 \n\t" 747 "movq (%0, %1, 4), %%mm2 \n\t" 748 "pxor %%mm1, %%mm0 \n\t" 749 "pxor %%mm1, %%mm2 \n\t" 750 "paddb %%mm4, %%mm0 \n\t" 751 "psubb %%mm4, %%mm2 \n\t" 752 "pxor %%mm1, %%mm0 \n\t" 753 "pxor %%mm1, %%mm2 \n\t" 754 "movq %%mm0, (%%"FF_REG_a
", %1, 2) \n\t" 755 "movq %%mm2, (%0, %1, 4) \n\t" 760 :
"%"FF_REG_a,
"%"FF_REG_c
818 #elif TEMPLATE_PP_MMX 822 "pxor %%mm7, %%mm7 \n\t" 827 "movq (%0), %%mm0 \n\t" 828 "movq %%mm0, %%mm1 \n\t" 829 "punpcklbw %%mm7, %%mm0 \n\t" 830 "punpckhbw %%mm7, %%mm1 \n\t" 832 "movq (%0, %1), %%mm2 \n\t" 833 "lea (%0, %1, 2), %%"FF_REG_a
" \n\t" 834 "movq %%mm2, %%mm3 \n\t" 835 "punpcklbw %%mm7, %%mm2 \n\t" 836 "punpckhbw %%mm7, %%mm3 \n\t" 838 "movq (%%"FF_REG_a
"), %%mm4 \n\t" 839 "movq %%mm4, %%mm5 \n\t" 840 "punpcklbw %%mm7, %%mm4 \n\t" 841 "punpckhbw %%mm7, %%mm5 \n\t" 843 "paddw %%mm0, %%mm0 \n\t" 844 "paddw %%mm1, %%mm1 \n\t" 845 "psubw %%mm4, %%mm2 \n\t" 846 "psubw %%mm5, %%mm3 \n\t" 847 "psubw %%mm2, %%mm0 \n\t" 848 "psubw %%mm3, %%mm1 \n\t" 850 "psllw $2, %%mm2 \n\t" 851 "psllw $2, %%mm3 \n\t" 852 "psubw %%mm2, %%mm0 \n\t" 853 "psubw %%mm3, %%mm1 \n\t" 855 "movq (%%"FF_REG_a
", %1), %%mm2 \n\t" 856 "movq %%mm2, %%mm3 \n\t" 857 "punpcklbw %%mm7, %%mm2 \n\t" 858 "punpckhbw %%mm7, %%mm3 \n\t" 860 "psubw %%mm2, %%mm0 \n\t" 861 "psubw %%mm3, %%mm1 \n\t" 862 "psubw %%mm2, %%mm0 \n\t" 863 "psubw %%mm3, %%mm1 \n\t" 864 "movq %%mm0, (%3) \n\t" 865 "movq %%mm1, 8(%3) \n\t" 867 "movq (%%"FF_REG_a
", %1, 2), %%mm0 \n\t" 868 "movq %%mm0, %%mm1 \n\t" 869 "punpcklbw %%mm7, %%mm0 \n\t" 870 "punpckhbw %%mm7, %%mm1 \n\t" 872 "psubw %%mm0, %%mm2 \n\t" 873 "psubw %%mm1, %%mm3 \n\t" 874 "movq %%mm2, 16(%3) \n\t" 875 "movq %%mm3, 24(%3) \n\t" 876 "paddw %%mm4, %%mm4 \n\t" 877 "paddw %%mm5, %%mm5 \n\t" 878 "psubw %%mm2, %%mm4 \n\t" 879 "psubw %%mm3, %%mm5 \n\t" 881 "lea (%%"FF_REG_a
", %1), %0 \n\t" 882 "psllw $2, %%mm2 \n\t" 883 "psllw $2, %%mm3 \n\t" 884 "psubw %%mm2, %%mm4 \n\t" 885 "psubw %%mm3, %%mm5 \n\t" 887 "movq (%0, %1, 2), %%mm2 \n\t" 888 "movq %%mm2, %%mm3 \n\t" 889 "punpcklbw %%mm7, %%mm2 \n\t" 890 "punpckhbw %%mm7, %%mm3 \n\t" 891 "psubw %%mm2, %%mm4 \n\t" 892 "psubw %%mm3, %%mm5 \n\t" 893 "psubw %%mm2, %%mm4 \n\t" 894 "psubw %%mm3, %%mm5 \n\t" 896 "movq (%%"FF_REG_a
", %1, 4), %%mm6 \n\t" 897 "punpcklbw %%mm7, %%mm6 \n\t" 898 "psubw %%mm6, %%mm2 \n\t" 899 "movq (%%"FF_REG_a
", %1, 4), %%mm6 \n\t" 900 "punpckhbw %%mm7, %%mm6 \n\t" 901 "psubw %%mm6, %%mm3 \n\t" 903 "paddw %%mm0, %%mm0 \n\t" 904 "paddw %%mm1, %%mm1 \n\t" 905 "psubw %%mm2, %%mm0 \n\t" 906 "psubw %%mm3, %%mm1 \n\t" 908 "psllw $2, %%mm2 \n\t" 909 "psllw $2, %%mm3 \n\t" 910 "psubw %%mm2, %%mm0 \n\t" 911 "psubw %%mm3, %%mm1 \n\t" 913 "movq (%0, %1, 4), %%mm2 \n\t" 914 "movq %%mm2, %%mm3 \n\t" 915 "punpcklbw %%mm7, %%mm2 \n\t" 916 "punpckhbw %%mm7, %%mm3 \n\t" 918 "paddw %%mm2, %%mm2 \n\t" 919 "paddw %%mm3, %%mm3 \n\t" 920 "psubw %%mm2, %%mm0 \n\t" 921 "psubw %%mm3, %%mm1 \n\t" 923 "movq (%3), %%mm2 \n\t" 924 "movq 8(%3), %%mm3 \n\t" 926 #if TEMPLATE_PP_MMXEXT 927 "movq %%mm7, %%mm6 \n\t" 928 "psubw %%mm0, %%mm6 \n\t" 929 "pmaxsw %%mm6, %%mm0 \n\t" 930 "movq %%mm7, %%mm6 \n\t" 931 "psubw %%mm1, %%mm6 \n\t" 932 "pmaxsw %%mm6, %%mm1 \n\t" 933 "movq %%mm7, %%mm6 \n\t" 934 "psubw %%mm2, %%mm6 \n\t" 935 "pmaxsw %%mm6, %%mm2 \n\t" 936 "movq %%mm7, %%mm6 \n\t" 937 "psubw %%mm3, %%mm6 \n\t" 938 "pmaxsw %%mm6, %%mm3 \n\t" 940 "movq %%mm7, %%mm6 \n\t" 941 "pcmpgtw %%mm0, %%mm6 \n\t" 942 "pxor %%mm6, %%mm0 \n\t" 943 "psubw %%mm6, %%mm0 \n\t" 944 "movq %%mm7, %%mm6 \n\t" 945 "pcmpgtw %%mm1, %%mm6 \n\t" 946 "pxor %%mm6, %%mm1 \n\t" 947 "psubw %%mm6, %%mm1 \n\t" 948 "movq %%mm7, %%mm6 \n\t" 949 "pcmpgtw %%mm2, %%mm6 \n\t" 950 "pxor %%mm6, %%mm2 \n\t" 951 "psubw %%mm6, %%mm2 \n\t" 952 "movq %%mm7, %%mm6 \n\t" 953 "pcmpgtw %%mm3, %%mm6 \n\t" 954 "pxor %%mm6, %%mm3 \n\t" 955 "psubw %%mm6, %%mm3 \n\t" 958 #if TEMPLATE_PP_MMXEXT 959 "pminsw %%mm2, %%mm0 \n\t" 960 "pminsw %%mm3, %%mm1 \n\t" 962 "movq %%mm0, %%mm6 \n\t" 963 "psubusw %%mm2, %%mm6 \n\t" 964 "psubw %%mm6, %%mm0 \n\t" 965 "movq %%mm1, %%mm6 \n\t" 966 "psubusw %%mm3, %%mm6 \n\t" 967 "psubw %%mm6, %%mm1 \n\t" 970 "movd %2, %%mm2 \n\t" 971 "punpcklbw %%mm7, %%mm2 \n\t" 973 "movq %%mm7, %%mm6 \n\t" 974 "pcmpgtw %%mm4, %%mm6 \n\t" 975 "pxor %%mm6, %%mm4 \n\t" 976 "psubw %%mm6, %%mm4 \n\t" 977 "pcmpgtw %%mm5, %%mm7 \n\t" 978 "pxor %%mm7, %%mm5 \n\t" 979 "psubw %%mm7, %%mm5 \n\t" 981 "psllw $3, %%mm2 \n\t" 982 "movq %%mm2, %%mm3 \n\t" 983 "pcmpgtw %%mm4, %%mm2 \n\t" 984 "pcmpgtw %%mm5, %%mm3 \n\t" 985 "pand %%mm2, %%mm4 \n\t" 986 "pand %%mm3, %%mm5 \n\t" 989 "psubusw %%mm0, %%mm4 \n\t" 990 "psubusw %%mm1, %%mm5 \n\t" 993 "movq "MANGLE(w05)
", %%mm2 \n\t" 994 "pmullw %%mm2, %%mm4 \n\t" 995 "pmullw %%mm2, %%mm5 \n\t" 996 "movq "MANGLE(w20)
", %%mm2 \n\t" 997 "paddw %%mm2, %%mm4 \n\t" 998 "paddw %%mm2, %%mm5 \n\t" 999 "psrlw $6, %%mm4 \n\t" 1000 "psrlw $6, %%mm5 \n\t" 1002 "movq 16(%3), %%mm0 \n\t" 1003 "movq 24(%3), %%mm1 \n\t" 1005 "pxor %%mm2, %%mm2 \n\t" 1006 "pxor %%mm3, %%mm3 \n\t" 1008 "pcmpgtw %%mm0, %%mm2 \n\t" 1009 "pcmpgtw %%mm1, %%mm3 \n\t" 1010 "pxor %%mm2, %%mm0 \n\t" 1011 "pxor %%mm3, %%mm1 \n\t" 1012 "psubw %%mm2, %%mm0 \n\t" 1013 "psubw %%mm3, %%mm1 \n\t" 1014 "psrlw $1, %%mm0 \n\t" 1015 "psrlw $1, %%mm1 \n\t" 1017 "pxor %%mm6, %%mm2 \n\t" 1018 "pxor %%mm7, %%mm3 \n\t" 1019 "pand %%mm2, %%mm4 \n\t" 1020 "pand %%mm3, %%mm5 \n\t" 1022 #if TEMPLATE_PP_MMXEXT 1023 "pminsw %%mm0, %%mm4 \n\t" 1024 "pminsw %%mm1, %%mm5 \n\t" 1026 "movq %%mm4, %%mm2 \n\t" 1027 "psubusw %%mm0, %%mm2 \n\t" 1028 "psubw %%mm2, %%mm4 \n\t" 1029 "movq %%mm5, %%mm2 \n\t" 1030 "psubusw %%mm1, %%mm2 \n\t" 1031 "psubw %%mm2, %%mm5 \n\t" 1033 "pxor %%mm6, %%mm4 \n\t" 1034 "pxor %%mm7, %%mm5 \n\t" 1035 "psubw %%mm6, %%mm4 \n\t" 1036 "psubw %%mm7, %%mm5 \n\t" 1037 "packsswb %%mm5, %%mm4 \n\t" 1038 "movq (%0), %%mm0 \n\t" 1039 "paddb %%mm4, %%mm0 \n\t" 1040 "movq %%mm0, (%0) \n\t" 1041 "movq (%0, %1), %%mm0 \n\t" 1042 "psubb %%mm4, %%mm0 \n\t" 1043 "movq %%mm0, (%0, %1) \n\t" 1050 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1052 const int l2=
stride + l1;
1053 const int l3=
stride + l2;
1054 const int l4=
stride + l3;
1055 const int l5=
stride + l4;
1056 const int l6=
stride + l5;
1057 const int l7=
stride + l6;
1058 const int l8=
stride + l7;
1063 const int middleEnergy= 5*(
src[l5] -
src[l4]) + 2*(
src[l3] -
src[l6]);
1064 if(
FFABS(middleEnergy) < 8*
c->QP){
1065 const int q=(
src[l4] -
src[l5])/2;
1066 const int leftEnergy= 5*(
src[l3] -
src[l2]) + 2*(
src[l1] -
src[l4]);
1067 const int rightEnergy= 5*(
src[l7] -
src[l6]) + 2*(
src[l5] -
src[l8]);
1073 d*=
FFSIGN(-middleEnergy);
1088 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1090 #endif //TEMPLATE_PP_ALTIVEC 1092 #if !TEMPLATE_PP_ALTIVEC 1095 #if HAVE_7REGS && (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) 1098 "pxor %%mm6, %%mm6 \n\t" 1099 "pcmpeqb %%mm7, %%mm7 \n\t" 1100 "movq %2, %%mm0 \n\t" 1101 "punpcklbw %%mm6, %%mm0 \n\t" 1102 "psrlw $1, %%mm0 \n\t" 1103 "psubw %%mm7, %%mm0 \n\t" 1104 "packuswb %%mm0, %%mm0 \n\t" 1105 "movq %%mm0, %3 \n\t" 1107 "lea (%0, %1), %%"FF_REG_a
" \n\t" 1108 "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_d
"\n\t" 1113 #undef REAL_FIND_MIN_MAX 1115 #if TEMPLATE_PP_MMXEXT 1116 #define REAL_FIND_MIN_MAX(addr)\ 1117 "movq " #addr ", %%mm0 \n\t"\ 1118 "pminub %%mm0, %%mm7 \n\t"\ 1119 "pmaxub %%mm0, %%mm6 \n\t" 1121 #define REAL_FIND_MIN_MAX(addr)\ 1122 "movq " #addr ", %%mm0 \n\t"\ 1123 "movq %%mm7, %%mm1 \n\t"\ 1124 "psubusb %%mm0, %%mm6 \n\t"\ 1125 "paddb %%mm0, %%mm6 \n\t"\ 1126 "psubusb %%mm0, %%mm1 \n\t"\ 1127 "psubb %%mm1, %%mm7 \n\t" 1129 #define FIND_MIN_MAX(addr) REAL_FIND_MIN_MAX(addr) 1131 FIND_MIN_MAX((%%FF_REGa))
1132 FIND_MIN_MAX((%%FF_REGa, %1))
1133 FIND_MIN_MAX((%%FF_REGa, %1, 2))
1134 FIND_MIN_MAX((%0, %1, 4))
1135 FIND_MIN_MAX((%%FF_REGd))
1136 FIND_MIN_MAX((%%FF_REGd, %1))
1137 FIND_MIN_MAX((%%FF_REGd, %1, 2))
1138 FIND_MIN_MAX((%0, %1, 8))
1140 "movq %%mm7, %%mm4 \n\t" 1141 "psrlq $8, %%mm7 \n\t" 1142 #if TEMPLATE_PP_MMXEXT 1143 "pminub %%mm4, %%mm7 \n\t" 1144 "pshufw $0xF9, %%mm7, %%mm4 \n\t" 1145 "pminub %%mm4, %%mm7 \n\t" 1146 "pshufw $0xFE, %%mm7, %%mm4 \n\t" 1147 "pminub %%mm4, %%mm7 \n\t" 1149 "movq %%mm7, %%mm1 \n\t" 1150 "psubusb %%mm4, %%mm1 \n\t" 1151 "psubb %%mm1, %%mm7 \n\t" 1152 "movq %%mm7, %%mm4 \n\t" 1153 "psrlq $16, %%mm7 \n\t" 1154 "movq %%mm7, %%mm1 \n\t" 1155 "psubusb %%mm4, %%mm1 \n\t" 1156 "psubb %%mm1, %%mm7 \n\t" 1157 "movq %%mm7, %%mm4 \n\t" 1158 "psrlq $32, %%mm7 \n\t" 1159 "movq %%mm7, %%mm1 \n\t" 1160 "psubusb %%mm4, %%mm1 \n\t" 1161 "psubb %%mm1, %%mm7 \n\t" 1165 "movq %%mm6, %%mm4 \n\t" 1166 "psrlq $8, %%mm6 \n\t" 1167 #if TEMPLATE_PP_MMXEXT 1168 "pmaxub %%mm4, %%mm6 \n\t" 1169 "pshufw $0xF9, %%mm6, %%mm4 \n\t" 1170 "pmaxub %%mm4, %%mm6 \n\t" 1171 "pshufw $0xFE, %%mm6, %%mm4 \n\t" 1172 "pmaxub %%mm4, %%mm6 \n\t" 1174 "psubusb %%mm4, %%mm6 \n\t" 1175 "paddb %%mm4, %%mm6 \n\t" 1176 "movq %%mm6, %%mm4 \n\t" 1177 "psrlq $16, %%mm6 \n\t" 1178 "psubusb %%mm4, %%mm6 \n\t" 1179 "paddb %%mm4, %%mm6 \n\t" 1180 "movq %%mm6, %%mm4 \n\t" 1181 "psrlq $32, %%mm6 \n\t" 1182 "psubusb %%mm4, %%mm6 \n\t" 1183 "paddb %%mm4, %%mm6 \n\t" 1185 "movq %%mm6, %%mm0 \n\t" 1186 "psubb %%mm7, %%mm6 \n\t" 1188 "movd %%mm6, %k4 \n\t" 1189 "cmpb "MANGLE(deringThreshold)
", %b4 \n\t" 1193 "punpcklbw %%mm7, %%mm7 \n\t" 1194 "punpcklbw %%mm7, %%mm7 \n\t" 1195 "punpcklbw %%mm7, %%mm7 \n\t" 1196 "movq %%mm7, (%4) \n\t" 1198 "movq (%0), %%mm0 \n\t" 1199 "movq %%mm0, %%mm1 \n\t" 1200 "movq %%mm0, %%mm2 \n\t" 1201 "psllq $8, %%mm1 \n\t" 1202 "psrlq $8, %%mm2 \n\t" 1203 "movd -4(%0), %%mm3 \n\t" 1204 "movd 8(%0), %%mm4 \n\t" 1205 "psrlq $24, %%mm3 \n\t" 1206 "psllq $56, %%mm4 \n\t" 1207 "por %%mm3, %%mm1 \n\t" 1208 "por %%mm4, %%mm2 \n\t" 1209 "movq %%mm1, %%mm3 \n\t" 1212 "psubusb %%mm7, %%mm0 \n\t" 1213 "psubusb %%mm7, %%mm2 \n\t" 1214 "psubusb %%mm7, %%mm3 \n\t" 1215 "pcmpeqb "MANGLE(b00)
", %%mm0 \n\t" 1216 "pcmpeqb "MANGLE(b00)
", %%mm2 \n\t" 1217 "pcmpeqb "MANGLE(b00)
", %%mm3 \n\t" 1218 "paddb %%mm2, %%mm0 \n\t" 1219 "paddb %%mm3, %%mm0 \n\t" 1221 "movq (%%"FF_REG_a
"), %%mm2 \n\t" 1222 "movq %%mm2, %%mm3 \n\t" 1223 "movq %%mm2, %%mm4 \n\t" 1224 "psllq $8, %%mm3 \n\t" 1225 "psrlq $8, %%mm4 \n\t" 1226 "movd -4(%%"FF_REG_a
"), %%mm5 \n\t" 1227 "movd 8(%%"FF_REG_a
"), %%mm6 \n\t" 1228 "psrlq $24, %%mm5 \n\t" 1229 "psllq $56, %%mm6 \n\t" 1230 "por %%mm5, %%mm3 \n\t" 1231 "por %%mm6, %%mm4 \n\t" 1232 "movq %%mm3, %%mm5 \n\t" 1235 "psubusb %%mm7, %%mm2 \n\t" 1236 "psubusb %%mm7, %%mm4 \n\t" 1237 "psubusb %%mm7, %%mm5 \n\t" 1238 "pcmpeqb "MANGLE(b00)
", %%mm2 \n\t" 1239 "pcmpeqb "MANGLE(b00)
", %%mm4 \n\t" 1240 "pcmpeqb "MANGLE(b00)
", %%mm5 \n\t" 1241 "paddb %%mm4, %%mm2 \n\t" 1242 "paddb %%mm5, %%mm2 \n\t" 1244 #define REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \ 1245 "movq " #src ", " #sx " \n\t" \ 1246 "movq " #sx ", " #lx " \n\t" \ 1247 "movq " #sx ", " #t0 " \n\t" \ 1248 "psllq $8, " #lx " \n\t"\ 1249 "psrlq $8, " #t0 " \n\t"\ 1250 "movd -4" #src ", " #t1 " \n\t"\ 1251 "psrlq $24, " #t1 " \n\t"\ 1252 "por " #t1 ", " #lx " \n\t" \ 1253 "movd 8" #src ", " #t1 " \n\t"\ 1254 "psllq $56, " #t1 " \n\t"\ 1255 "por " #t1 ", " #t0 " \n\t" \ 1256 "movq " #lx ", " #t1 " \n\t" \ 1260 "movq " #lx ", 8(%4) \n\t"\ 1261 "movq (%4), " #lx " \n\t"\ 1262 "psubusb " #lx ", " #t1 " \n\t"\ 1263 "psubusb " #lx ", " #t0 " \n\t"\ 1264 "psubusb " #lx ", " #sx " \n\t"\ 1265 "movq "MANGLE(b00)", " #lx " \n\t"\ 1266 "pcmpeqb " #lx ", " #t1 " \n\t" \ 1267 "pcmpeqb " #lx ", " #t0 " \n\t" \ 1268 "pcmpeqb " #lx ", " #sx " \n\t" \ 1269 "paddb " #t1 ", " #t0 " \n\t"\ 1270 "paddb " #t0 ", " #sx " \n\t"\ 1273 "movq " #dst ", " #t0 " \n\t" \ 1274 "movq " #t0 ", " #t1 " \n\t" \ 1275 "psubusb %3, " #t0 " \n\t"\ 1276 "paddusb %3, " #t1 " \n\t"\ 1278 PMINUB(t1, pplx, t0)\ 1279 "paddb " #sx ", " #ppsx " \n\t"\ 1280 "paddb " #psx ", " #ppsx " \n\t"\ 1281 "#paddb "MANGLE(b02)", " #ppsx " \n\t"\ 1282 "pand "MANGLE(b08)", " #ppsx " \n\t"\ 1283 "pcmpeqb " #lx ", " #ppsx " \n\t"\ 1284 "pand " #ppsx ", " #pplx " \n\t"\ 1285 "pandn " #dst ", " #ppsx " \n\t"\ 1286 "por " #pplx ", " #ppsx " \n\t"\ 1287 "movq " #ppsx ", " #dst " \n\t"\ 1288 "movq 8(%4), " #lx " \n\t" 1290 #define DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \ 1291 REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) 1308 DERING_CORE((%%FF_REGa) ,(%%FF_REGa, %1) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
1309 DERING_CORE((%%FF_REGa, %1) ,(%%FF_REGa, %1, 2),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
1310 DERING_CORE((%%FF_REGa, %1, 2),(%0, %1, 4) ,%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
1311 DERING_CORE((%0, %1, 4) ,(%%FF_REGd) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
1312 DERING_CORE((%%FF_REGd) ,(%%FF_REGd, %1) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
1313 DERING_CORE((%%FF_REGd, %1) ,(%%FF_REGd, %1, 2),%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
1314 DERING_CORE((%%FF_REGd, %1, 2),(%0, %1, 8) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
1315 DERING_CORE((%0, %1, 8) ,(%%FF_REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
1320 :
"%"FF_REG_a,
"%"FF_REG_d,
"%"FF_REG_sp
1322 #else // HAVE_7REGS && (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) 1329 const int QP2=
c->QP/2 + 1;
1337 if(*p > max) max= *p;
1338 if(*p < min) min= *p;
1341 avg= (min + max + 1)>>1;
1343 if(max - min <deringThreshold)
return;
1345 for(y=0; y<10; y++){
1360 t &= (t<<1) & (t>>1);
1365 int t = s[y-1] & s[y] & s[y+1];
1379 +2*(*(p -1)) + 4*(*p ) + 2*(*(p +1))
1383 #ifdef DEBUG_DERING_THRESHOLD 1384 __asm__
volatile(
"emms\n\t":);
1386 static uint64_t numPixels=0;
1387 if(x!=1 && x!=8 && y!=1 && y!=8) numPixels++;
1392 static int numSkipped=0;
1393 static int errorSum=0;
1394 static int worstQP=0;
1395 static int worstRange=0;
1396 static int worstDiff=0;
1398 int absDiff=
FFABS(diff);
1401 if(x==1 || x==8 || y==1 || y==8)
continue;
1404 if(absDiff > worstDiff){
1407 worstRange= max-
min;
1411 if(1024LL*1024LL*1024LL % numSkipped == 0){
1413 "wRange:%d, wDiff:%d, relSkip:%1.3f\n",
1414 (
float)errorSum/numSkipped, numSkipped, worstQP, worstRange,
1415 worstDiff, (
float)numSkipped/numPixels);
1420 if (*p + QP2 < f) *p= *p + QP2;
1421 else if(*p - QP2 > f) *p= *p - QP2;
1426 #ifdef DEBUG_DERING_THRESHOLD 1434 *p =
FFMIN(*p + 20, 255);
1440 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1442 #endif //TEMPLATE_PP_ALTIVEC 1452 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1455 "lea (%0, %1), %%"FF_REG_a
" \n\t" 1456 "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_c
"\n\t" 1460 "movq (%0), %%mm0 \n\t" 1461 "movq (%%"FF_REG_a
", %1), %%mm1 \n\t" 1463 "movq %%mm0, (%%"FF_REG_a
") \n\t" 1464 "movq (%0, %1, 4), %%mm0 \n\t" 1466 "movq %%mm1, (%%"FF_REG_a
", %1, 2) \n\t" 1467 "movq (%%"FF_REG_c
", %1), %%mm1 \n\t" 1469 "movq %%mm0, (%%"FF_REG_c
") \n\t" 1470 "movq (%0, %1, 8), %%mm0 \n\t" 1472 "movq %%mm1, (%%"FF_REG_c
", %1, 2) \n\t" 1475 :
"%"FF_REG_a,
"%"FF_REG_c
1484 *(uint32_t*)&
src[
stride*1]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
1486 *(uint32_t*)&
src[
stride*3]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
1488 *(uint32_t*)&
src[
stride*5]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
1490 *(uint32_t*)&
src[
stride*7]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
1505 #if TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1508 "lea (%0, %1), %%"FF_REG_a
" \n\t" 1509 "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_d
"\n\t" 1510 "lea (%%"FF_REG_d
", %1, 4), %%"FF_REG_c
"\n\t" 1511 "add %1, %%"FF_REG_c
" \n\t" 1512 #if TEMPLATE_PP_SSE2 1513 "pxor %%xmm7, %%xmm7 \n\t" 1514 #define REAL_DEINT_CUBIC(a,b,c,d,e)\ 1515 "movq " #a ", %%xmm0 \n\t"\ 1516 "movq " #b ", %%xmm1 \n\t"\ 1517 "movq " #d ", %%xmm2 \n\t"\ 1518 "movq " #e ", %%xmm3 \n\t"\ 1519 "pavgb %%xmm2, %%xmm1 \n\t"\ 1520 "pavgb %%xmm3, %%xmm0 \n\t"\ 1521 "punpcklbw %%xmm7, %%xmm0 \n\t"\ 1522 "punpcklbw %%xmm7, %%xmm1 \n\t"\ 1523 "psubw %%xmm1, %%xmm0 \n\t"\ 1524 "psraw $3, %%xmm0 \n\t"\ 1525 "psubw %%xmm0, %%xmm1 \n\t"\ 1526 "packuswb %%xmm1, %%xmm1 \n\t"\ 1527 "movlps %%xmm1, " #c " \n\t" 1528 #else //TEMPLATE_PP_SSE2 1529 "pxor %%mm7, %%mm7 \n\t" 1533 #define REAL_DEINT_CUBIC(a,b,c,d,e)\ 1534 "movq " #a ", %%mm0 \n\t"\ 1535 "movq " #b ", %%mm1 \n\t"\ 1536 "movq " #d ", %%mm2 \n\t"\ 1537 "movq " #e ", %%mm3 \n\t"\ 1538 PAVGB(%%mm2, %%mm1) \ 1539 PAVGB(%%mm3, %%mm0) \ 1540 "movq %%mm0, %%mm2 \n\t"\ 1541 "punpcklbw %%mm7, %%mm0 \n\t"\ 1542 "punpckhbw %%mm7, %%mm2 \n\t"\ 1543 "movq %%mm1, %%mm3 \n\t"\ 1544 "punpcklbw %%mm7, %%mm1 \n\t"\ 1545 "punpckhbw %%mm7, %%mm3 \n\t"\ 1546 "psubw %%mm1, %%mm0 \n\t" \ 1547 "psubw %%mm3, %%mm2 \n\t" \ 1548 "psraw $3, %%mm0 \n\t" \ 1549 "psraw $3, %%mm2 \n\t" \ 1550 "psubw %%mm0, %%mm1 \n\t" \ 1551 "psubw %%mm2, %%mm3 \n\t" \ 1552 "packuswb %%mm3, %%mm1 \n\t"\ 1553 "movq %%mm1, " #c " \n\t" 1554 #endif //TEMPLATE_PP_SSE2 1555 #define DEINT_CUBIC(a,b,c,d,e) REAL_DEINT_CUBIC(a,b,c,d,e) 1557 DEINT_CUBIC((%0) , (%%FF_REGa, %1), (%%FF_REGa, %1, 2), (%0, %1, 4) , (%%FF_REGd, %1))
1558 DEINT_CUBIC((%%FF_REGa, %1), (%0, %1, 4) , (%%FF_REGd) , (%%FF_REGd, %1), (%0, %1, 8))
1559 DEINT_CUBIC((%0, %1, 4) , (%%FF_REGd, %1), (%%FF_REGd, %1, 2), (%0, %1, 8) , (%%FF_REGc))
1560 DEINT_CUBIC((%%FF_REGd, %1), (%0, %1, 8) , (%%FF_REGd, %1, 4), (%%FF_REGc) , (%%FF_REGc, %1, 2))
1565 XMM_CLOBBERS(
"%xmm0",
"%xmm1",
"%xmm2",
"%xmm3",
"%xmm7",)
1567 "%"FF_REG_a,
"%"FF_REG_d,
"%"FF_REG_c
1569 #undef REAL_DEINT_CUBIC 1570 #else //TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1575 src[stride*5] = av_clip_uint8((-
src[stride*2] + 9*
src[stride*4] + 9*
src[stride*6] -
src[stride*8])>>4);
1576 src[stride*7] = av_clip_uint8((-
src[stride*4] + 9*
src[stride*6] + 9*
src[stride*8] -
src[stride*10])>>4);
1577 src[stride*9] = av_clip_uint8((-
src[stride*6] + 9*
src[stride*8] + 9*
src[stride*10] -
src[stride*12])>>4);
1580 #endif //TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1592 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1595 "lea (%0, %1), %%"FF_REG_a
" \n\t" 1596 "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_d
"\n\t" 1597 "pxor %%mm7, %%mm7 \n\t" 1598 "movq (%2), %%mm0 \n\t" 1602 #define REAL_DEINT_FF(a,b,c,d)\ 1603 "movq " #a ", %%mm1 \n\t"\ 1604 "movq " #b ", %%mm2 \n\t"\ 1605 "movq " #c ", %%mm3 \n\t"\ 1606 "movq " #d ", %%mm4 \n\t"\ 1607 PAVGB(%%mm3, %%mm1) \ 1608 PAVGB(%%mm4, %%mm0) \ 1609 "movq %%mm0, %%mm3 \n\t"\ 1610 "punpcklbw %%mm7, %%mm0 \n\t"\ 1611 "punpckhbw %%mm7, %%mm3 \n\t"\ 1612 "movq %%mm1, %%mm4 \n\t"\ 1613 "punpcklbw %%mm7, %%mm1 \n\t"\ 1614 "punpckhbw %%mm7, %%mm4 \n\t"\ 1615 "psllw $2, %%mm1 \n\t"\ 1616 "psllw $2, %%mm4 \n\t"\ 1617 "psubw %%mm0, %%mm1 \n\t"\ 1618 "psubw %%mm3, %%mm4 \n\t"\ 1619 "movq %%mm2, %%mm5 \n\t"\ 1620 "movq %%mm2, %%mm0 \n\t"\ 1621 "punpcklbw %%mm7, %%mm2 \n\t"\ 1622 "punpckhbw %%mm7, %%mm5 \n\t"\ 1623 "paddw %%mm2, %%mm1 \n\t"\ 1624 "paddw %%mm5, %%mm4 \n\t"\ 1625 "psraw $2, %%mm1 \n\t"\ 1626 "psraw $2, %%mm4 \n\t"\ 1627 "packuswb %%mm4, %%mm1 \n\t"\ 1628 "movq %%mm1, " #b " \n\t"\ 1630 #define DEINT_FF(a,b,c,d) REAL_DEINT_FF(a,b,c,d) 1632 DEINT_FF((%0) , (%%FF_REGa) , (%%FF_REGa, %1), (%%FF_REGa, %1, 2))
1633 DEINT_FF((%%FF_REGa, %1), (%%FF_REGa, %1, 2), (%0, %1, 4) , (%%FF_REGd) )
1634 DEINT_FF((%0, %1, 4) , (%%FF_REGd) , (%%FF_REGd, %1), (%%FF_REGd, %1, 2))
1635 DEINT_FF((%%FF_REGd, %1), (%%FF_REGd, %1, 2), (%0, %1, 8) , (%%FF_REGd, %1, 4))
1637 "movq %%mm0, (%2) \n\t" 1639 :
"%"FF_REG_a,
"%"FF_REG_d
1641 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1659 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1671 #if (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS 1674 "lea (%0, %1), %%"FF_REG_a
" \n\t" 1675 "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_d
"\n\t" 1676 "pxor %%mm7, %%mm7 \n\t" 1677 "movq (%2), %%mm0 \n\t" 1678 "movq (%3), %%mm1 \n\t" 1682 #define REAL_DEINT_L5(t1,t2,a,b,c)\ 1683 "movq " #a ", %%mm2 \n\t"\ 1684 "movq " #b ", %%mm3 \n\t"\ 1685 "movq " #c ", %%mm4 \n\t"\ 1688 "movq %%mm2, %%mm5 \n\t"\ 1689 "movq %%mm2, " #t1 " \n\t"\ 1690 "punpcklbw %%mm7, %%mm2 \n\t"\ 1691 "punpckhbw %%mm7, %%mm5 \n\t"\ 1692 "movq %%mm2, %%mm6 \n\t"\ 1693 "paddw %%mm2, %%mm2 \n\t"\ 1694 "paddw %%mm6, %%mm2 \n\t"\ 1695 "movq %%mm5, %%mm6 \n\t"\ 1696 "paddw %%mm5, %%mm5 \n\t"\ 1697 "paddw %%mm6, %%mm5 \n\t"\ 1698 "movq %%mm3, %%mm6 \n\t"\ 1699 "punpcklbw %%mm7, %%mm3 \n\t"\ 1700 "punpckhbw %%mm7, %%mm6 \n\t"\ 1701 "paddw %%mm3, %%mm3 \n\t"\ 1702 "paddw %%mm6, %%mm6 \n\t"\ 1703 "paddw %%mm3, %%mm2 \n\t"\ 1704 "paddw %%mm6, %%mm5 \n\t"\ 1705 "movq %%mm4, %%mm6 \n\t"\ 1706 "punpcklbw %%mm7, %%mm4 \n\t"\ 1707 "punpckhbw %%mm7, %%mm6 \n\t"\ 1708 "psubw %%mm4, %%mm2 \n\t"\ 1709 "psubw %%mm6, %%mm5 \n\t"\ 1710 "psraw $2, %%mm2 \n\t"\ 1711 "psraw $2, %%mm5 \n\t"\ 1712 "packuswb %%mm5, %%mm2 \n\t"\ 1713 "movq %%mm2, " #a " \n\t"\ 1715 #define DEINT_L5(t1,t2,a,b,c) REAL_DEINT_L5(t1,t2,a,b,c) 1717 DEINT_L5(%%mm0, %%mm1, (%0) , (%%FF_REGa) , (%%FF_REGa, %1) )
1718 DEINT_L5(%%mm1, %%mm0, (%%FF_REGa) , (%%FF_REGa, %1) , (%%FF_REGa, %1, 2))
1719 DEINT_L5(%%mm0, %%mm1, (%%FF_REGa, %1) , (%%FF_REGa, %1, 2), (%0, %1, 4) )
1720 DEINT_L5(%%mm1, %%mm0, (%%FF_REGa, %1, 2), (%0, %1, 4) , (%%FF_REGd) )
1721 DEINT_L5(%%mm0, %%mm1, (%0, %1, 4) , (%%FF_REGd) , (%%FF_REGd, %1) )
1722 DEINT_L5(%%mm1, %%mm0, (%%FF_REGd) , (%%FF_REGd, %1) , (%%FF_REGd, %1, 2))
1723 DEINT_L5(%%mm0, %%mm1, (%%FF_REGd, %1) , (%%FF_REGd, %1, 2), (%0, %1, 8) )
1724 DEINT_L5(%%mm1, %%mm0, (%%FF_REGd, %1, 2), (%0, %1, 8) , (%%FF_REGd, %1, 4))
1726 "movq %%mm0, (%2) \n\t" 1727 "movq %%mm1, (%3) \n\t" 1729 :
"%"FF_REG_a,
"%"FF_REG_d
1731 #else //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS 1741 src[stride*1]= av_clip_uint8((-(t2 +
src[stride*3]) + 2*(t3 +
src[stride*2]) + 6*t1 + 4)>>3);
1743 src[stride*2]= av_clip_uint8((-(t3 +
src[stride*4]) + 2*(t1 +
src[stride*3]) + 6*t2 + 4)>>3);
1745 src[stride*3]= av_clip_uint8((-(t1 +
src[stride*5]) + 2*(t2 +
src[stride*4]) + 6*t3 + 4)>>3);
1747 src[stride*4]= av_clip_uint8((-(t2 +
src[stride*6]) + 2*(t3 +
src[stride*5]) + 6*t1 + 4)>>3);
1749 src[stride*5]= av_clip_uint8((-(t3 +
src[stride*7]) + 2*(t1 +
src[stride*6]) + 6*t2 + 4)>>3);
1751 src[stride*6]= av_clip_uint8((-(t1 +
src[stride*8]) + 2*(t2 +
src[stride*7]) + 6*t3 + 4)>>3);
1753 src[stride*7]= av_clip_uint8((-(t2 +
src[stride*9]) + 2*(t3 +
src[stride*8]) + 6*t1 + 4)>>3);
1760 #endif //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS 1772 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1775 "lea (%0, %1), %%"FF_REG_a
" \n\t" 1776 "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_d
"\n\t" 1780 "movq (%2), %%mm0 \n\t" 1781 "movq (%%"FF_REG_a
"), %%mm1 \n\t" 1783 "movq (%0), %%mm2 \n\t" 1785 "movq %%mm0, (%0) \n\t" 1786 "movq (%%"FF_REG_a
", %1), %%mm0 \n\t" 1789 "movq %%mm2, (%%"FF_REG_a
") \n\t" 1790 "movq (%%"FF_REG_a
", %1, 2), %%mm2 \n\t" 1793 "movq %%mm1, (%%"FF_REG_a
", %1) \n\t" 1794 "movq (%0, %1, 4), %%mm1 \n\t" 1797 "movq %%mm0, (%%"FF_REG_a
", %1, 2) \n\t" 1798 "movq (%%"FF_REG_d
"), %%mm0 \n\t" 1801 "movq %%mm2, (%0, %1, 4) \n\t" 1802 "movq (%%"FF_REG_d
", %1), %%mm2 \n\t" 1805 "movq %%mm1, (%%"FF_REG_d
") \n\t" 1806 "movq (%%"FF_REG_d
", %1, 2), %%mm1 \n\t" 1809 "movq %%mm0, (%%"FF_REG_d
", %1) \n\t" 1810 "movq (%0, %1, 8), %%mm0 \n\t" 1813 "movq %%mm2, (%%"FF_REG_d
", %1, 2) \n\t" 1814 "movq %%mm1, (%2) \n\t" 1817 :
"%"FF_REG_a,
"%"FF_REG_d
1819 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1827 a= (a&
c) + (((a^c)&0xFEFEFEFEUL)>>1);
1828 *(uint32_t*)&
src[
stride*0]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
1831 b= (a&
b) + (((a^b)&0xFEFEFEFEUL)>>1);
1832 *(uint32_t*)&
src[
stride*1]= (c|b) - (((c^
b)&0xFEFEFEFEUL)>>1);
1835 c= (b&
c) + (((b^c)&0xFEFEFEFEUL)>>1);
1836 *(uint32_t*)&
src[
stride*2]= (c|a) - (((c^
a)&0xFEFEFEFEUL)>>1);
1839 a= (a&
c) + (((a^c)&0xFEFEFEFEUL)>>1);
1840 *(uint32_t*)&
src[
stride*3]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
1843 b= (a&
b) + (((a^b)&0xFEFEFEFEUL)>>1);
1844 *(uint32_t*)&
src[
stride*4]= (c|b) - (((c^
b)&0xFEFEFEFEUL)>>1);
1847 c= (b&
c) + (((b^c)&0xFEFEFEFEUL)>>1);
1848 *(uint32_t*)&
src[
stride*5]= (c|a) - (((c^
a)&0xFEFEFEFEUL)>>1);
1851 a= (a&
c) + (((a^c)&0xFEFEFEFEUL)>>1);
1852 *(uint32_t*)&
src[
stride*6]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
1855 b= (a&
b) + (((a^b)&0xFEFEFEFEUL)>>1);
1856 *(uint32_t*)&
src[
stride*7]= (c|b) - (((c^
b)&0xFEFEFEFEUL)>>1);
1862 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1875 #if TEMPLATE_PP_MMXEXT 1877 "lea (%0, %1), %%"FF_REG_a
" \n\t" 1878 "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_d
"\n\t" 1882 "movq (%0), %%mm0 \n\t" 1883 "movq (%%"FF_REG_a
", %1), %%mm2 \n\t" 1884 "movq (%%"FF_REG_a
"), %%mm1 \n\t" 1885 "movq %%mm0, %%mm3 \n\t" 1886 "pmaxub %%mm1, %%mm0 \n\t" 1887 "pminub %%mm3, %%mm1 \n\t" 1888 "pmaxub %%mm2, %%mm1 \n\t" 1889 "pminub %%mm1, %%mm0 \n\t" 1890 "movq %%mm0, (%%"FF_REG_a
") \n\t" 1892 "movq (%0, %1, 4), %%mm0 \n\t" 1893 "movq (%%"FF_REG_a
", %1, 2), %%mm1 \n\t" 1894 "movq %%mm2, %%mm3 \n\t" 1895 "pmaxub %%mm1, %%mm2 \n\t" 1896 "pminub %%mm3, %%mm1 \n\t" 1897 "pmaxub %%mm0, %%mm1 \n\t" 1898 "pminub %%mm1, %%mm2 \n\t" 1899 "movq %%mm2, (%%"FF_REG_a
", %1, 2) \n\t" 1901 "movq (%%"FF_REG_d
"), %%mm2 \n\t" 1902 "movq (%%"FF_REG_d
", %1), %%mm1 \n\t" 1903 "movq %%mm2, %%mm3 \n\t" 1904 "pmaxub %%mm0, %%mm2 \n\t" 1905 "pminub %%mm3, %%mm0 \n\t" 1906 "pmaxub %%mm1, %%mm0 \n\t" 1907 "pminub %%mm0, %%mm2 \n\t" 1908 "movq %%mm2, (%%"FF_REG_d
") \n\t" 1910 "movq (%%"FF_REG_d
", %1, 2), %%mm2 \n\t" 1911 "movq (%0, %1, 8), %%mm0 \n\t" 1912 "movq %%mm2, %%mm3 \n\t" 1913 "pmaxub %%mm0, %%mm2 \n\t" 1914 "pminub %%mm3, %%mm0 \n\t" 1915 "pmaxub %%mm1, %%mm0 \n\t" 1916 "pminub %%mm0, %%mm2 \n\t" 1917 "movq %%mm2, (%%"FF_REG_d
", %1, 2) \n\t" 1921 :
"%"FF_REG_a,
"%"FF_REG_d
1924 #else // MMX without MMX2 1926 "lea (%0, %1), %%"FF_REG_a
" \n\t" 1927 "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_d
"\n\t" 1930 "pxor %%mm7, %%mm7 \n\t" 1932 #define REAL_MEDIAN(a,b,c)\ 1933 "movq " #a ", %%mm0 \n\t"\ 1934 "movq " #b ", %%mm2 \n\t"\ 1935 "movq " #c ", %%mm1 \n\t"\ 1936 "movq %%mm0, %%mm3 \n\t"\ 1937 "movq %%mm1, %%mm4 \n\t"\ 1938 "movq %%mm2, %%mm5 \n\t"\ 1939 "psubusb %%mm1, %%mm3 \n\t"\ 1940 "psubusb %%mm2, %%mm4 \n\t"\ 1941 "psubusb %%mm0, %%mm5 \n\t"\ 1942 "pcmpeqb %%mm7, %%mm3 \n\t"\ 1943 "pcmpeqb %%mm7, %%mm4 \n\t"\ 1944 "pcmpeqb %%mm7, %%mm5 \n\t"\ 1945 "movq %%mm3, %%mm6 \n\t"\ 1946 "pxor %%mm4, %%mm3 \n\t"\ 1947 "pxor %%mm5, %%mm4 \n\t"\ 1948 "pxor %%mm6, %%mm5 \n\t"\ 1949 "por %%mm3, %%mm1 \n\t"\ 1950 "por %%mm4, %%mm2 \n\t"\ 1951 "por %%mm5, %%mm0 \n\t"\ 1952 "pand %%mm2, %%mm0 \n\t"\ 1953 "pand %%mm1, %%mm0 \n\t"\ 1954 "movq %%mm0, " #b " \n\t" 1955 #define MEDIAN(a,b,c) REAL_MEDIAN(a,b,c) 1957 MEDIAN((%0) , (%%FF_REGa) , (%%FF_REGa, %1))
1958 MEDIAN((%%FF_REGa, %1), (%%FF_REGa, %1, 2), (%0, %1, 4))
1959 MEDIAN((%0, %1, 4) , (%%FF_REGd) , (%%FF_REGd, %1))
1960 MEDIAN((%%FF_REGd, %1), (%%FF_REGd, %1, 2), (%0, %1, 8))
1963 :
"%"FF_REG_a,
"%"FF_REG_d
1965 #endif //TEMPLATE_PP_MMXEXT 1966 #else //TEMPLATE_PP_MMX 1972 for (y=0; y<4; y++){
1973 int a,
b,
c, d, e, f;
1980 colsrc[
stride ] = (a|(d^f)) & (b|(d^e)) & (c|(e^f));
1985 #endif //TEMPLATE_PP_MMX 1995 "lea (%0, %1), %%"FF_REG_a
" \n\t" 1998 "movq (%0), %%mm0 \n\t" 1999 "movq (%%"FF_REG_a
"), %%mm1 \n\t" 2000 "movq %%mm0, %%mm2 \n\t" 2001 "punpcklbw %%mm1, %%mm0 \n\t" 2002 "punpckhbw %%mm1, %%mm2 \n\t" 2004 "movq (%%"FF_REG_a
", %1), %%mm1 \n\t" 2005 "movq (%%"FF_REG_a
", %1, 2), %%mm3 \n\t" 2006 "movq %%mm1, %%mm4 \n\t" 2007 "punpcklbw %%mm3, %%mm1 \n\t" 2008 "punpckhbw %%mm3, %%mm4 \n\t" 2010 "movq %%mm0, %%mm3 \n\t" 2011 "punpcklwd %%mm1, %%mm0 \n\t" 2012 "punpckhwd %%mm1, %%mm3 \n\t" 2013 "movq %%mm2, %%mm1 \n\t" 2014 "punpcklwd %%mm4, %%mm2 \n\t" 2015 "punpckhwd %%mm4, %%mm1 \n\t" 2017 "movd %%mm0, 128(%2) \n\t" 2018 "psrlq $32, %%mm0 \n\t" 2019 "movd %%mm0, 144(%2) \n\t" 2020 "movd %%mm3, 160(%2) \n\t" 2021 "psrlq $32, %%mm3 \n\t" 2022 "movd %%mm3, 176(%2) \n\t" 2023 "movd %%mm3, 48(%3) \n\t" 2024 "movd %%mm2, 192(%2) \n\t" 2025 "movd %%mm2, 64(%3) \n\t" 2026 "psrlq $32, %%mm2 \n\t" 2027 "movd %%mm2, 80(%3) \n\t" 2028 "movd %%mm1, 96(%3) \n\t" 2029 "psrlq $32, %%mm1 \n\t" 2030 "movd %%mm1, 112(%3) \n\t" 2032 "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_a
"\n\t" 2034 "movq (%0, %1, 4), %%mm0 \n\t" 2035 "movq (%%"FF_REG_a
"), %%mm1 \n\t" 2036 "movq %%mm0, %%mm2 \n\t" 2037 "punpcklbw %%mm1, %%mm0 \n\t" 2038 "punpckhbw %%mm1, %%mm2 \n\t" 2040 "movq (%%"FF_REG_a
", %1), %%mm1 \n\t" 2041 "movq (%%"FF_REG_a
", %1, 2), %%mm3 \n\t" 2042 "movq %%mm1, %%mm4 \n\t" 2043 "punpcklbw %%mm3, %%mm1 \n\t" 2044 "punpckhbw %%mm3, %%mm4 \n\t" 2046 "movq %%mm0, %%mm3 \n\t" 2047 "punpcklwd %%mm1, %%mm0 \n\t" 2048 "punpckhwd %%mm1, %%mm3 \n\t" 2049 "movq %%mm2, %%mm1 \n\t" 2050 "punpcklwd %%mm4, %%mm2 \n\t" 2051 "punpckhwd %%mm4, %%mm1 \n\t" 2053 "movd %%mm0, 132(%2) \n\t" 2054 "psrlq $32, %%mm0 \n\t" 2055 "movd %%mm0, 148(%2) \n\t" 2056 "movd %%mm3, 164(%2) \n\t" 2057 "psrlq $32, %%mm3 \n\t" 2058 "movd %%mm3, 180(%2) \n\t" 2059 "movd %%mm3, 52(%3) \n\t" 2060 "movd %%mm2, 196(%2) \n\t" 2061 "movd %%mm2, 68(%3) \n\t" 2062 "psrlq $32, %%mm2 \n\t" 2063 "movd %%mm2, 84(%3) \n\t" 2064 "movd %%mm1, 100(%3) \n\t" 2065 "psrlq $32, %%mm1 \n\t" 2066 "movd %%mm1, 116(%3) \n\t" 2069 ::
"r" (
src),
"r" ((
x86_reg)srcStride),
"r" (dst1),
"r" (dst2)
2080 "lea (%0, %1), %%"FF_REG_a
" \n\t" 2081 "lea (%%"FF_REG_a
",%1,4), %%"FF_REG_d
" \n\t" 2084 "movq (%2), %%mm0 \n\t" 2085 "movq 16(%2), %%mm1 \n\t" 2086 "movq %%mm0, %%mm2 \n\t" 2087 "punpcklbw %%mm1, %%mm0 \n\t" 2088 "punpckhbw %%mm1, %%mm2 \n\t" 2090 "movq 32(%2), %%mm1 \n\t" 2091 "movq 48(%2), %%mm3 \n\t" 2092 "movq %%mm1, %%mm4 \n\t" 2093 "punpcklbw %%mm3, %%mm1 \n\t" 2094 "punpckhbw %%mm3, %%mm4 \n\t" 2096 "movq %%mm0, %%mm3 \n\t" 2097 "punpcklwd %%mm1, %%mm0 \n\t" 2098 "punpckhwd %%mm1, %%mm3 \n\t" 2099 "movq %%mm2, %%mm1 \n\t" 2100 "punpcklwd %%mm4, %%mm2 \n\t" 2101 "punpckhwd %%mm4, %%mm1 \n\t" 2103 "movd %%mm0, (%0) \n\t" 2104 "psrlq $32, %%mm0 \n\t" 2105 "movd %%mm0, (%%"FF_REG_a
") \n\t" 2106 "movd %%mm3, (%%"FF_REG_a
", %1) \n\t" 2107 "psrlq $32, %%mm3 \n\t" 2108 "movd %%mm3, (%%"FF_REG_a
", %1, 2) \n\t" 2109 "movd %%mm2, (%0, %1, 4) \n\t" 2110 "psrlq $32, %%mm2 \n\t" 2111 "movd %%mm2, (%%"FF_REG_d
") \n\t" 2112 "movd %%mm1, (%%"FF_REG_d
", %1) \n\t" 2113 "psrlq $32, %%mm1 \n\t" 2114 "movd %%mm1, (%%"FF_REG_d
", %1, 2) \n\t" 2117 "movq 64(%2), %%mm0 \n\t" 2118 "movq 80(%2), %%mm1 \n\t" 2119 "movq %%mm0, %%mm2 \n\t" 2120 "punpcklbw %%mm1, %%mm0 \n\t" 2121 "punpckhbw %%mm1, %%mm2 \n\t" 2123 "movq 96(%2), %%mm1 \n\t" 2124 "movq 112(%2), %%mm3 \n\t" 2125 "movq %%mm1, %%mm4 \n\t" 2126 "punpcklbw %%mm3, %%mm1 \n\t" 2127 "punpckhbw %%mm3, %%mm4 \n\t" 2129 "movq %%mm0, %%mm3 \n\t" 2130 "punpcklwd %%mm1, %%mm0 \n\t" 2131 "punpckhwd %%mm1, %%mm3 \n\t" 2132 "movq %%mm2, %%mm1 \n\t" 2133 "punpcklwd %%mm4, %%mm2 \n\t" 2134 "punpckhwd %%mm4, %%mm1 \n\t" 2136 "movd %%mm0, 4(%0) \n\t" 2137 "psrlq $32, %%mm0 \n\t" 2138 "movd %%mm0, 4(%%"FF_REG_a
") \n\t" 2139 "movd %%mm3, 4(%%"FF_REG_a
", %1) \n\t" 2140 "psrlq $32, %%mm3 \n\t" 2141 "movd %%mm3, 4(%%"FF_REG_a
", %1, 2) \n\t" 2142 "movd %%mm2, 4(%0, %1, 4) \n\t" 2143 "psrlq $32, %%mm2 \n\t" 2144 "movd %%mm2, 4(%%"FF_REG_d
") \n\t" 2145 "movd %%mm1, 4(%%"FF_REG_d
", %1) \n\t" 2146 "psrlq $32, %%mm1 \n\t" 2147 "movd %%mm1, 4(%%"FF_REG_d
", %1, 2) \n\t" 2149 ::
"r" (dst),
"r" ((
x86_reg)dstStride),
"r" (
src)
2150 :
"%"FF_REG_a,
"%"FF_REG_d
2153 #endif //TEMPLATE_PP_MMX 2156 #if !TEMPLATE_PP_ALTIVEC 2158 uint8_t *tempBlurred, uint32_t *tempBlurredPast,
const int *maxNoise)
2161 tempBlurredPast[127]= maxNoise[0];
2162 tempBlurredPast[128]= maxNoise[1];
2163 tempBlurredPast[129]= maxNoise[2];
2165 #define FAST_L2_DIFF 2167 #if (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS 2169 "lea (%2, %2, 2), %%"FF_REG_a
" \n\t" 2170 "lea (%2, %2, 4), %%"FF_REG_d
" \n\t" 2171 "lea (%%"FF_REG_d
", %2, 2), %%"FF_REG_c
"\n\t" 2175 #ifdef L1_DIFF //needs mmx2 2176 "movq (%0), %%mm0 \n\t" 2177 "psadbw (%1), %%mm0 \n\t" 2178 "movq (%0, %2), %%mm1 \n\t" 2179 "psadbw (%1, %2), %%mm1 \n\t" 2180 "movq (%0, %2, 2), %%mm2 \n\t" 2181 "psadbw (%1, %2, 2), %%mm2 \n\t" 2182 "movq (%0, %%"FF_REG_a
"), %%mm3 \n\t" 2183 "psadbw (%1, %%"FF_REG_a
"), %%mm3 \n\t" 2185 "movq (%0, %2, 4), %%mm4 \n\t" 2186 "paddw %%mm1, %%mm0 \n\t" 2187 "psadbw (%1, %2, 4), %%mm4 \n\t" 2188 "movq (%0, %%"FF_REG_d
"), %%mm5 \n\t" 2189 "paddw %%mm2, %%mm0 \n\t" 2190 "psadbw (%1, %%"FF_REG_d
"), %%mm5 \n\t" 2191 "movq (%0, %%"FF_REG_a
", 2), %%mm6 \n\t" 2192 "paddw %%mm3, %%mm0 \n\t" 2193 "psadbw (%1, %%"FF_REG_a
", 2), %%mm6 \n\t" 2194 "movq (%0, %%"FF_REG_c
"), %%mm7 \n\t" 2195 "paddw %%mm4, %%mm0 \n\t" 2196 "psadbw (%1, %%"FF_REG_c
"), %%mm7 \n\t" 2197 "paddw %%mm5, %%mm6 \n\t" 2198 "paddw %%mm7, %%mm6 \n\t" 2199 "paddw %%mm6, %%mm0 \n\t" 2201 #if defined (FAST_L2_DIFF) 2202 "pcmpeqb %%mm7, %%mm7 \n\t" 2203 "movq "MANGLE(b80)
", %%mm6 \n\t" 2204 "pxor %%mm0, %%mm0 \n\t" 2205 #define REAL_L2_DIFF_CORE(a, b)\ 2206 "movq " #a ", %%mm5 \n\t"\ 2207 "movq " #b ", %%mm2 \n\t"\ 2208 "pxor %%mm7, %%mm2 \n\t"\ 2209 PAVGB(%%mm2, %%mm5)\ 2210 "paddb %%mm6, %%mm5 \n\t"\ 2211 "movq %%mm5, %%mm2 \n\t"\ 2212 "psllw $8, %%mm5 \n\t"\ 2213 "pmaddwd %%mm5, %%mm5 \n\t"\ 2214 "pmaddwd %%mm2, %%mm2 \n\t"\ 2215 "paddd %%mm2, %%mm5 \n\t"\ 2216 "psrld $14, %%mm5 \n\t"\ 2217 "paddd %%mm5, %%mm0 \n\t" 2219 #else //defined (FAST_L2_DIFF) 2220 "pxor %%mm7, %%mm7 \n\t" 2221 "pxor %%mm0, %%mm0 \n\t" 2222 #define REAL_L2_DIFF_CORE(a, b)\ 2223 "movq " #a ", %%mm5 \n\t"\ 2224 "movq " #b ", %%mm2 \n\t"\ 2225 "movq %%mm5, %%mm1 \n\t"\ 2226 "movq %%mm2, %%mm3 \n\t"\ 2227 "punpcklbw %%mm7, %%mm5 \n\t"\ 2228 "punpckhbw %%mm7, %%mm1 \n\t"\ 2229 "punpcklbw %%mm7, %%mm2 \n\t"\ 2230 "punpckhbw %%mm7, %%mm3 \n\t"\ 2231 "psubw %%mm2, %%mm5 \n\t"\ 2232 "psubw %%mm3, %%mm1 \n\t"\ 2233 "pmaddwd %%mm5, %%mm5 \n\t"\ 2234 "pmaddwd %%mm1, %%mm1 \n\t"\ 2235 "paddd %%mm1, %%mm5 \n\t"\ 2236 "paddd %%mm5, %%mm0 \n\t" 2238 #endif //defined (FAST_L2_DIFF) 2240 #define L2_DIFF_CORE(a, b) REAL_L2_DIFF_CORE(a, b) 2242 L2_DIFF_CORE((%0) , (%1))
2243 L2_DIFF_CORE((%0, %2) , (%1, %2))
2244 L2_DIFF_CORE((%0, %2, 2) , (%1, %2, 2))
2245 L2_DIFF_CORE((%0, %%FF_REGa) , (%1, %%FF_REGa))
2246 L2_DIFF_CORE((%0, %2, 4) , (%1, %2, 4))
2247 L2_DIFF_CORE((%0, %%FF_REGd) , (%1, %%FF_REGd))
2248 L2_DIFF_CORE((%0, %%FF_REGa,2), (%1, %%FF_REGa,2))
2249 L2_DIFF_CORE((%0, %%FF_REGc) , (%1, %%FF_REGc))
2253 "movq %%mm0, %%mm4 \n\t" 2254 "psrlq $32, %%mm0 \n\t" 2255 "paddd %%mm0, %%mm4 \n\t" 2256 "movd %%mm4, %%ecx \n\t" 2257 "shll $2, %%ecx \n\t" 2258 "mov %3, %%"FF_REG_d
" \n\t" 2259 "addl -4(%%"FF_REG_d
"), %%ecx \n\t" 2260 "addl 4(%%"FF_REG_d
"), %%ecx \n\t" 2261 "addl -1024(%%"FF_REG_d
"), %%ecx \n\t" 2262 "addl $4, %%ecx \n\t" 2263 "addl 1024(%%"FF_REG_d
"), %%ecx \n\t" 2264 "shrl $3, %%ecx \n\t" 2265 "movl %%ecx, (%%"FF_REG_d
") \n\t" 2270 "cmpl 512(%%"FF_REG_d
"), %%ecx \n\t" 2272 "cmpl 516(%%"FF_REG_d
"), %%ecx \n\t" 2275 "lea (%%"FF_REG_a
", %2, 2), %%"FF_REG_d
"\n\t" 2276 "lea (%%"FF_REG_d
", %2, 2), %%"FF_REG_c
"\n\t" 2277 "movq (%0), %%mm0 \n\t" 2278 "movq (%0, %2), %%mm1 \n\t" 2279 "movq (%0, %2, 2), %%mm2 \n\t" 2280 "movq (%0, %%"FF_REG_a
"), %%mm3 \n\t" 2281 "movq (%0, %2, 4), %%mm4 \n\t" 2282 "movq (%0, %%"FF_REG_d
"), %%mm5 \n\t" 2283 "movq (%0, %%"FF_REG_a
", 2), %%mm6 \n\t" 2284 "movq (%0, %%"FF_REG_c
"), %%mm7 \n\t" 2285 "movq %%mm0, (%1) \n\t" 2286 "movq %%mm1, (%1, %2) \n\t" 2287 "movq %%mm2, (%1, %2, 2) \n\t" 2288 "movq %%mm3, (%1, %%"FF_REG_a
") \n\t" 2289 "movq %%mm4, (%1, %2, 4) \n\t" 2290 "movq %%mm5, (%1, %%"FF_REG_d
") \n\t" 2291 "movq %%mm6, (%1, %%"FF_REG_a
", 2) \n\t" 2292 "movq %%mm7, (%1, %%"FF_REG_c
") \n\t" 2296 "lea (%%"FF_REG_a
", %2, 2), %%"FF_REG_d
"\n\t" 2297 "lea (%%"FF_REG_d
", %2, 2), %%"FF_REG_c
"\n\t" 2298 "movq (%0), %%mm0 \n\t" 2300 "movq (%0, %2), %%mm1 \n\t" 2301 PAVGB((%1, %2), %%mm1)
2302 "movq (%0, %2, 2), %%mm2 \n\t" 2303 PAVGB((%1, %2, 2), %%mm2)
2304 "movq (%0, %%"FF_REG_a
"), %%mm3 \n\t" 2305 PAVGB((%1, %%FF_REGa), %%mm3)
2306 "movq (%0, %2, 4), %%mm4 \n\t" 2307 PAVGB((%1, %2, 4), %%mm4)
2308 "movq (%0, %%"FF_REG_d
"), %%mm5 \n\t" 2309 PAVGB((%1, %%FF_REGd), %%mm5)
2310 "movq (%0, %%"FF_REG_a
", 2), %%mm6 \n\t" 2311 PAVGB((%1, %%FF_REGa, 2), %%mm6)
2312 "movq (%0, %%"FF_REG_c
"), %%mm7 \n\t" 2313 PAVGB((%1, %%FF_REGc), %%mm7)
2314 "movq %%mm0, (%1) \n\t" 2315 "movq %%mm1, (%1, %2) \n\t" 2316 "movq %%mm2, (%1, %2, 2) \n\t" 2317 "movq %%mm3, (%1, %%"FF_REG_a
") \n\t" 2318 "movq %%mm4, (%1, %2, 4) \n\t" 2319 "movq %%mm5, (%1, %%"FF_REG_d
") \n\t" 2320 "movq %%mm6, (%1, %%"FF_REG_a
", 2) \n\t" 2321 "movq %%mm7, (%1, %%"FF_REG_c
") \n\t" 2322 "movq %%mm0, (%0) \n\t" 2323 "movq %%mm1, (%0, %2) \n\t" 2324 "movq %%mm2, (%0, %2, 2) \n\t" 2325 "movq %%mm3, (%0, %%"FF_REG_a
") \n\t" 2326 "movq %%mm4, (%0, %2, 4) \n\t" 2327 "movq %%mm5, (%0, %%"FF_REG_d
") \n\t" 2328 "movq %%mm6, (%0, %%"FF_REG_a
", 2) \n\t" 2329 "movq %%mm7, (%0, %%"FF_REG_c
") \n\t" 2333 "cmpl 508(%%"FF_REG_d
"), %%ecx \n\t" 2336 "lea (%%"FF_REG_a
", %2, 2), %%"FF_REG_d
"\n\t" 2337 "lea (%%"FF_REG_d
", %2, 2), %%"FF_REG_c
"\n\t" 2338 "movq (%0), %%mm0 \n\t" 2339 "movq (%0, %2), %%mm1 \n\t" 2340 "movq (%0, %2, 2), %%mm2 \n\t" 2341 "movq (%0, %%"FF_REG_a
"), %%mm3 \n\t" 2342 "movq (%1), %%mm4 \n\t" 2343 "movq (%1, %2), %%mm5 \n\t" 2344 "movq (%1, %2, 2), %%mm6 \n\t" 2345 "movq (%1, %%"FF_REG_a
"), %%mm7 \n\t" 2354 "movq %%mm0, (%1) \n\t" 2355 "movq %%mm1, (%1, %2) \n\t" 2356 "movq %%mm2, (%1, %2, 2) \n\t" 2357 "movq %%mm3, (%1, %%"FF_REG_a
") \n\t" 2358 "movq %%mm0, (%0) \n\t" 2359 "movq %%mm1, (%0, %2) \n\t" 2360 "movq %%mm2, (%0, %2, 2) \n\t" 2361 "movq %%mm3, (%0, %%"FF_REG_a
") \n\t" 2363 "movq (%0, %2, 4), %%mm0 \n\t" 2364 "movq (%0, %%"FF_REG_d
"), %%mm1 \n\t" 2365 "movq (%0, %%"FF_REG_a
", 2), %%mm2 \n\t" 2366 "movq (%0, %%"FF_REG_c
"), %%mm3 \n\t" 2367 "movq (%1, %2, 4), %%mm4 \n\t" 2368 "movq (%1, %%"FF_REG_d
"), %%mm5 \n\t" 2369 "movq (%1, %%"FF_REG_a
", 2), %%mm6 \n\t" 2370 "movq (%1, %%"FF_REG_c
"), %%mm7 \n\t" 2379 "movq %%mm0, (%1, %2, 4) \n\t" 2380 "movq %%mm1, (%1, %%"FF_REG_d
") \n\t" 2381 "movq %%mm2, (%1, %%"FF_REG_a
", 2) \n\t" 2382 "movq %%mm3, (%1, %%"FF_REG_c
") \n\t" 2383 "movq %%mm0, (%0, %2, 4) \n\t" 2384 "movq %%mm1, (%0, %%"FF_REG_d
") \n\t" 2385 "movq %%mm2, (%0, %%"FF_REG_a
", 2) \n\t" 2386 "movq %%mm3, (%0, %%"FF_REG_c
") \n\t" 2390 "lea (%%"FF_REG_a
", %2, 2), %%"FF_REG_d
"\n\t" 2391 "lea (%%"FF_REG_d
", %2, 2), %%"FF_REG_c
"\n\t" 2392 "movq (%0), %%mm0 \n\t" 2393 "movq (%0, %2), %%mm1 \n\t" 2394 "movq (%0, %2, 2), %%mm2 \n\t" 2395 "movq (%0, %%"FF_REG_a
"), %%mm3 \n\t" 2396 "movq (%1), %%mm4 \n\t" 2397 "movq (%1, %2), %%mm5 \n\t" 2398 "movq (%1, %2, 2), %%mm6 \n\t" 2399 "movq (%1, %%"FF_REG_a
"), %%mm7 \n\t" 2412 "movq %%mm0, (%1) \n\t" 2413 "movq %%mm1, (%1, %2) \n\t" 2414 "movq %%mm2, (%1, %2, 2) \n\t" 2415 "movq %%mm3, (%1, %%"FF_REG_a
") \n\t" 2416 "movq %%mm0, (%0) \n\t" 2417 "movq %%mm1, (%0, %2) \n\t" 2418 "movq %%mm2, (%0, %2, 2) \n\t" 2419 "movq %%mm3, (%0, %%"FF_REG_a
") \n\t" 2421 "movq (%0, %2, 4), %%mm0 \n\t" 2422 "movq (%0, %%"FF_REG_d
"), %%mm1 \n\t" 2423 "movq (%0, %%"FF_REG_a
", 2), %%mm2 \n\t" 2424 "movq (%0, %%"FF_REG_c
"), %%mm3 \n\t" 2425 "movq (%1, %2, 4), %%mm4 \n\t" 2426 "movq (%1, %%"FF_REG_d
"), %%mm5 \n\t" 2427 "movq (%1, %%"FF_REG_a
", 2), %%mm6 \n\t" 2428 "movq (%1, %%"FF_REG_c
"), %%mm7 \n\t" 2441 "movq %%mm0, (%1, %2, 4) \n\t" 2442 "movq %%mm1, (%1, %%"FF_REG_d
") \n\t" 2443 "movq %%mm2, (%1, %%"FF_REG_a
", 2) \n\t" 2444 "movq %%mm3, (%1, %%"FF_REG_c
") \n\t" 2445 "movq %%mm0, (%0, %2, 4) \n\t" 2446 "movq %%mm1, (%0, %%"FF_REG_d
") \n\t" 2447 "movq %%mm2, (%0, %%"FF_REG_a
", 2) \n\t" 2448 "movq %%mm3, (%0, %%"FF_REG_c
") \n\t" 2452 ::
"r" (
src),
"r" (tempBlurred),
"r"((
x86_reg)
stride),
"m" (tempBlurredPast)
2454 :
"%"FF_REG_a,
"%"FF_REG_d,
"%"FF_REG_c,
"memory" 2456 #else //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS 2479 +(*(tempBlurredPast-256))
2480 +(*(tempBlurredPast-1))+ (*(tempBlurredPast+1))
2481 +(*(tempBlurredPast+256))
2493 if(d > maxNoise[1]){
2494 if(d < maxNoise[2]){
2500 tempBlurred[ x + y*
stride ]=
2514 if(d < maxNoise[0]){
2520 tempBlurred[ x + y*
stride ]=
2522 (ref*7 + cur + 4)>>3;
2531 tempBlurred[ x + y*
stride ]=
2533 (ref*3 + cur + 2)>>2;
2539 #endif //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS 2541 #endif //TEMPLATE_PP_ALTIVEC 2548 int64_t dc_mask, eq_mask, both_masks;
2549 int64_t sums[10*8*2];
2553 "movq %0, %%mm7 \n\t" 2554 "movq %1, %%mm6 \n\t" 2555 : :
"m" (
c->mmxDcOffset[
c->nonBQP]),
"m" (
c->mmxDcThreshold[
c->nonBQP])
2559 "lea (%2, %3), %%"FF_REG_a
" \n\t" 2563 "movq (%2), %%mm0 \n\t" 2564 "movq (%%"FF_REG_a
"), %%mm1 \n\t" 2565 "movq %%mm1, %%mm3 \n\t" 2566 "movq %%mm1, %%mm4 \n\t" 2567 "psubb %%mm1, %%mm0 \n\t" 2568 "paddb %%mm7, %%mm0 \n\t" 2569 "pcmpgtb %%mm6, %%mm0 \n\t" 2571 "movq (%%"FF_REG_a
",%3), %%mm2 \n\t" 2572 PMAXUB(%%mm2, %%mm4)
2573 PMINUB(%%mm2, %%mm3, %%mm5)
2574 "psubb %%mm2, %%mm1 \n\t" 2575 "paddb %%mm7, %%mm1 \n\t" 2576 "pcmpgtb %%mm6, %%mm1 \n\t" 2577 "paddb %%mm1, %%mm0 \n\t" 2579 "movq (%%"FF_REG_a
", %3, 2), %%mm1 \n\t" 2580 PMAXUB(%%mm1, %%mm4)
2581 PMINUB(%%mm1, %%mm3, %%mm5)
2582 "psubb %%mm1, %%mm2 \n\t" 2583 "paddb %%mm7, %%mm2 \n\t" 2584 "pcmpgtb %%mm6, %%mm2 \n\t" 2585 "paddb %%mm2, %%mm0 \n\t" 2587 "lea (%%"FF_REG_a
", %3, 4), %%"FF_REG_a
"\n\t" 2589 "movq (%2, %3, 4), %%mm2 \n\t" 2590 PMAXUB(%%mm2, %%mm4)
2591 PMINUB(%%mm2, %%mm3, %%mm5)
2592 "psubb %%mm2, %%mm1 \n\t" 2593 "paddb %%mm7, %%mm1 \n\t" 2594 "pcmpgtb %%mm6, %%mm1 \n\t" 2595 "paddb %%mm1, %%mm0 \n\t" 2597 "movq (%%"FF_REG_a
"), %%mm1 \n\t" 2598 PMAXUB(%%mm1, %%mm4)
2599 PMINUB(%%mm1, %%mm3, %%mm5)
2600 "psubb %%mm1, %%mm2 \n\t" 2601 "paddb %%mm7, %%mm2 \n\t" 2602 "pcmpgtb %%mm6, %%mm2 \n\t" 2603 "paddb %%mm2, %%mm0 \n\t" 2605 "movq (%%"FF_REG_a
", %3), %%mm2 \n\t" 2606 PMAXUB(%%mm2, %%mm4)
2607 PMINUB(%%mm2, %%mm3, %%mm5)
2608 "psubb %%mm2, %%mm1 \n\t" 2609 "paddb %%mm7, %%mm1 \n\t" 2610 "pcmpgtb %%mm6, %%mm1 \n\t" 2611 "paddb %%mm1, %%mm0 \n\t" 2613 "movq (%%"FF_REG_a
", %3, 2), %%mm1 \n\t" 2614 PMAXUB(%%mm1, %%mm4)
2615 PMINUB(%%mm1, %%mm3, %%mm5)
2616 "psubb %%mm1, %%mm2 \n\t" 2617 "paddb %%mm7, %%mm2 \n\t" 2618 "pcmpgtb %%mm6, %%mm2 \n\t" 2619 "paddb %%mm2, %%mm0 \n\t" 2621 "movq (%2, %3, 8), %%mm2 \n\t" 2622 PMAXUB(%%mm2, %%mm4)
2623 PMINUB(%%mm2, %%mm3, %%mm5)
2624 "psubb %%mm2, %%mm1 \n\t" 2625 "paddb %%mm7, %%mm1 \n\t" 2626 "pcmpgtb %%mm6, %%mm1 \n\t" 2627 "paddb %%mm1, %%mm0 \n\t" 2629 "movq (%%"FF_REG_a
", %3, 4), %%mm1 \n\t" 2630 "psubb %%mm1, %%mm2 \n\t" 2631 "paddb %%mm7, %%mm2 \n\t" 2632 "pcmpgtb %%mm6, %%mm2 \n\t" 2633 "paddb %%mm2, %%mm0 \n\t" 2634 "psubusb %%mm3, %%mm4 \n\t" 2636 "pxor %%mm6, %%mm6 \n\t" 2637 "movq %4, %%mm7 \n\t" 2638 "paddusb %%mm7, %%mm7 \n\t" 2639 "psubusb %%mm4, %%mm7 \n\t" 2640 "pcmpeqb %%mm6, %%mm7 \n\t" 2641 "pcmpeqb %%mm6, %%mm7 \n\t" 2642 "movq %%mm7, %1 \n\t" 2644 "movq %5, %%mm7 \n\t" 2645 "punpcklbw %%mm7, %%mm7 \n\t" 2646 "punpcklbw %%mm7, %%mm7 \n\t" 2647 "punpcklbw %%mm7, %%mm7 \n\t" 2648 "psubb %%mm0, %%mm6 \n\t" 2649 "pcmpgtb %%mm7, %%mm6 \n\t" 2650 "movq %%mm6, %0 \n\t" 2652 :
"=m" (eq_mask),
"=m" (dc_mask)
2653 :
"r" (
src),
"r" ((
x86_reg)step),
"m" (
c->pQPb),
"m"(
c->ppMode.flatnessThreshold)
2657 both_masks = dc_mask & eq_mask;
2661 int64_t *temp_sums= sums;
2664 "movq %2, %%mm0 \n\t" 2665 "pxor %%mm4, %%mm4 \n\t" 2667 "movq (%0), %%mm6 \n\t" 2668 "movq (%0, %1), %%mm5 \n\t" 2669 "movq %%mm5, %%mm1 \n\t" 2670 "movq %%mm6, %%mm2 \n\t" 2671 "psubusb %%mm6, %%mm5 \n\t" 2672 "psubusb %%mm1, %%mm2 \n\t" 2673 "por %%mm5, %%mm2 \n\t" 2674 "psubusb %%mm2, %%mm0 \n\t" 2675 "pcmpeqb %%mm4, %%mm0 \n\t" 2677 "pxor %%mm6, %%mm1 \n\t" 2678 "pand %%mm0, %%mm1 \n\t" 2679 "pxor %%mm1, %%mm6 \n\t" 2682 "movq (%0, %1, 8), %%mm5 \n\t" 2684 "movq (%0, %1, 8), %%mm7 \n\t" 2685 "movq %%mm5, %%mm1 \n\t" 2686 "movq %%mm7, %%mm2 \n\t" 2687 "psubusb %%mm7, %%mm5 \n\t" 2688 "psubusb %%mm1, %%mm2 \n\t" 2689 "por %%mm5, %%mm2 \n\t" 2690 "movq %2, %%mm0 \n\t" 2691 "psubusb %%mm2, %%mm0 \n\t" 2692 "pcmpeqb %%mm4, %%mm0 \n\t" 2694 "pxor %%mm7, %%mm1 \n\t" 2695 "pand %%mm0, %%mm1 \n\t" 2696 "pxor %%mm1, %%mm7 \n\t" 2698 "movq %%mm6, %%mm5 \n\t" 2699 "punpckhbw %%mm4, %%mm6 \n\t" 2700 "punpcklbw %%mm4, %%mm5 \n\t" 2703 "movq %%mm5, %%mm0 \n\t" 2704 "movq %%mm6, %%mm1 \n\t" 2705 "psllw $2, %%mm0 \n\t" 2706 "psllw $2, %%mm1 \n\t" 2707 "paddw "MANGLE(w04)
", %%mm0 \n\t" 2708 "paddw "MANGLE(w04)
", %%mm1 \n\t" 2711 "movq (%0), %%mm2 \n\t"\ 2712 "movq (%0), %%mm3 \n\t"\ 2714 "punpcklbw %%mm4, %%mm2 \n\t"\ 2715 "punpckhbw %%mm4, %%mm3 \n\t"\ 2716 "paddw %%mm2, %%mm0 \n\t"\ 2717 "paddw %%mm3, %%mm1 \n\t" 2720 "movq (%0), %%mm2 \n\t"\ 2721 "movq (%0), %%mm3 \n\t"\ 2723 "punpcklbw %%mm4, %%mm2 \n\t"\ 2724 "punpckhbw %%mm4, %%mm3 \n\t"\ 2725 "psubw %%mm2, %%mm0 \n\t"\ 2726 "psubw %%mm3, %%mm1 \n\t" 2732 "movq %%mm0, (%3) \n\t" 2733 "movq %%mm1, 8(%3) \n\t" 2736 "psubw %%mm5, %%mm0 \n\t" 2737 "psubw %%mm6, %%mm1 \n\t" 2738 "movq %%mm0, 16(%3) \n\t" 2739 "movq %%mm1, 24(%3) \n\t" 2742 "psubw %%mm5, %%mm0 \n\t" 2743 "psubw %%mm6, %%mm1 \n\t" 2744 "movq %%mm0, 32(%3) \n\t" 2745 "movq %%mm1, 40(%3) \n\t" 2748 "psubw %%mm5, %%mm0 \n\t" 2749 "psubw %%mm6, %%mm1 \n\t" 2750 "movq %%mm0, 48(%3) \n\t" 2751 "movq %%mm1, 56(%3) \n\t" 2754 "psubw %%mm5, %%mm0 \n\t" 2755 "psubw %%mm6, %%mm1 \n\t" 2756 "movq %%mm0, 64(%3) \n\t" 2757 "movq %%mm1, 72(%3) \n\t" 2759 "movq %%mm7, %%mm6 \n\t" 2760 "punpckhbw %%mm4, %%mm7 \n\t" 2761 "punpcklbw %%mm4, %%mm6 \n\t" 2767 "movq %%mm0, 80(%3) \n\t" 2768 "movq %%mm1, 88(%3) \n\t" 2771 "paddw %%mm6, %%mm0 \n\t" 2772 "paddw %%mm7, %%mm1 \n\t" 2773 "movq %%mm0, 96(%3) \n\t" 2774 "movq %%mm1, 104(%3) \n\t" 2777 "paddw %%mm6, %%mm0 \n\t" 2778 "paddw %%mm7, %%mm1 \n\t" 2779 "movq %%mm0, 112(%3) \n\t" 2780 "movq %%mm1, 120(%3) \n\t" 2783 "paddw %%mm6, %%mm0 \n\t" 2784 "paddw %%mm7, %%mm1 \n\t" 2785 "movq %%mm0, 128(%3) \n\t" 2786 "movq %%mm1, 136(%3) \n\t" 2789 "paddw %%mm6, %%mm0 \n\t" 2790 "paddw %%mm7, %%mm1 \n\t" 2791 "movq %%mm0, 144(%3) \n\t" 2792 "movq %%mm1, 152(%3) \n\t" 2797 :
"r" ((
x86_reg)step),
"m" (
c->pQPb),
"r"(sums),
"g"(
src)
2804 "movq %4, %%mm6 \n\t" 2805 "pcmpeqb %%mm5, %%mm5 \n\t" 2806 "pxor %%mm6, %%mm5 \n\t" 2807 "pxor %%mm7, %%mm7 \n\t" 2810 "movq (%1), %%mm0 \n\t" 2811 "movq 8(%1), %%mm1 \n\t" 2812 "paddw 32(%1), %%mm0 \n\t" 2813 "paddw 40(%1), %%mm1 \n\t" 2814 "movq (%0, %3), %%mm2 \n\t" 2815 "movq %%mm2, %%mm3 \n\t" 2816 "movq %%mm2, %%mm4 \n\t" 2817 "punpcklbw %%mm7, %%mm2 \n\t" 2818 "punpckhbw %%mm7, %%mm3 \n\t" 2819 "paddw %%mm2, %%mm0 \n\t" 2820 "paddw %%mm3, %%mm1 \n\t" 2821 "paddw %%mm2, %%mm0 \n\t" 2822 "paddw %%mm3, %%mm1 \n\t" 2823 "psrlw $4, %%mm0 \n\t" 2824 "psrlw $4, %%mm1 \n\t" 2825 "packuswb %%mm1, %%mm0 \n\t" 2826 "pand %%mm6, %%mm0 \n\t" 2827 "pand %%mm5, %%mm4 \n\t" 2828 "por %%mm4, %%mm0 \n\t" 2829 "movq %%mm0, (%0, %3) \n\t" 2834 :
"+r"(
offset),
"+r"(temp_sums)
2840 if(eq_mask != -1LL){
2844 "pxor %%mm7, %%mm7 \n\t" 2848 "movq (%0), %%mm0 \n\t" 2849 "movq %%mm0, %%mm1 \n\t" 2850 "punpcklbw %%mm7, %%mm0 \n\t" 2851 "punpckhbw %%mm7, %%mm1 \n\t" 2853 "movq (%0, %1), %%mm2 \n\t" 2854 "lea (%0, %1, 2), %%"FF_REG_a
" \n\t" 2855 "movq %%mm2, %%mm3 \n\t" 2856 "punpcklbw %%mm7, %%mm2 \n\t" 2857 "punpckhbw %%mm7, %%mm3 \n\t" 2859 "movq (%%"FF_REG_a
"), %%mm4 \n\t" 2860 "movq %%mm4, %%mm5 \n\t" 2861 "punpcklbw %%mm7, %%mm4 \n\t" 2862 "punpckhbw %%mm7, %%mm5 \n\t" 2864 "paddw %%mm0, %%mm0 \n\t" 2865 "paddw %%mm1, %%mm1 \n\t" 2866 "psubw %%mm4, %%mm2 \n\t" 2867 "psubw %%mm5, %%mm3 \n\t" 2868 "psubw %%mm2, %%mm0 \n\t" 2869 "psubw %%mm3, %%mm1 \n\t" 2871 "psllw $2, %%mm2 \n\t" 2872 "psllw $2, %%mm3 \n\t" 2873 "psubw %%mm2, %%mm0 \n\t" 2874 "psubw %%mm3, %%mm1 \n\t" 2876 "movq (%%"FF_REG_a
", %1), %%mm2 \n\t" 2877 "movq %%mm2, %%mm3 \n\t" 2878 "punpcklbw %%mm7, %%mm2 \n\t" 2879 "punpckhbw %%mm7, %%mm3 \n\t" 2881 "psubw %%mm2, %%mm0 \n\t" 2882 "psubw %%mm3, %%mm1 \n\t" 2883 "psubw %%mm2, %%mm0 \n\t" 2884 "psubw %%mm3, %%mm1 \n\t" 2885 "movq %%mm0, (%4) \n\t" 2886 "movq %%mm1, 8(%4) \n\t" 2888 "movq (%%"FF_REG_a
", %1, 2), %%mm0 \n\t" 2889 "movq %%mm0, %%mm1 \n\t" 2890 "punpcklbw %%mm7, %%mm0 \n\t" 2891 "punpckhbw %%mm7, %%mm1 \n\t" 2893 "psubw %%mm0, %%mm2 \n\t" 2894 "psubw %%mm1, %%mm3 \n\t" 2895 "movq %%mm2, 16(%4) \n\t" 2896 "movq %%mm3, 24(%4) \n\t" 2897 "paddw %%mm4, %%mm4 \n\t" 2898 "paddw %%mm5, %%mm5 \n\t" 2899 "psubw %%mm2, %%mm4 \n\t" 2900 "psubw %%mm3, %%mm5 \n\t" 2902 "lea (%%"FF_REG_a
", %1), %0 \n\t" 2903 "psllw $2, %%mm2 \n\t" 2904 "psllw $2, %%mm3 \n\t" 2905 "psubw %%mm2, %%mm4 \n\t" 2906 "psubw %%mm3, %%mm5 \n\t" 2908 "movq (%0, %1, 2), %%mm2 \n\t" 2909 "movq %%mm2, %%mm3 \n\t" 2910 "punpcklbw %%mm7, %%mm2 \n\t" 2911 "punpckhbw %%mm7, %%mm3 \n\t" 2912 "psubw %%mm2, %%mm4 \n\t" 2913 "psubw %%mm3, %%mm5 \n\t" 2914 "psubw %%mm2, %%mm4 \n\t" 2915 "psubw %%mm3, %%mm5 \n\t" 2917 "movq (%%"FF_REG_a
", %1, 4), %%mm6 \n\t" 2918 "punpcklbw %%mm7, %%mm6 \n\t" 2919 "psubw %%mm6, %%mm2 \n\t" 2920 "movq (%%"FF_REG_a
", %1, 4), %%mm6 \n\t" 2921 "punpckhbw %%mm7, %%mm6 \n\t" 2922 "psubw %%mm6, %%mm3 \n\t" 2924 "paddw %%mm0, %%mm0 \n\t" 2925 "paddw %%mm1, %%mm1 \n\t" 2926 "psubw %%mm2, %%mm0 \n\t" 2927 "psubw %%mm3, %%mm1 \n\t" 2929 "psllw $2, %%mm2 \n\t" 2930 "psllw $2, %%mm3 \n\t" 2931 "psubw %%mm2, %%mm0 \n\t" 2932 "psubw %%mm3, %%mm1 \n\t" 2934 "movq (%0, %1, 4), %%mm2 \n\t" 2935 "movq %%mm2, %%mm3 \n\t" 2936 "punpcklbw %%mm7, %%mm2 \n\t" 2937 "punpckhbw %%mm7, %%mm3 \n\t" 2939 "paddw %%mm2, %%mm2 \n\t" 2940 "paddw %%mm3, %%mm3 \n\t" 2941 "psubw %%mm2, %%mm0 \n\t" 2942 "psubw %%mm3, %%mm1 \n\t" 2944 "movq (%4), %%mm2 \n\t" 2945 "movq 8(%4), %%mm3 \n\t" 2947 #if TEMPLATE_PP_MMXEXT 2948 "movq %%mm7, %%mm6 \n\t" 2949 "psubw %%mm0, %%mm6 \n\t" 2950 "pmaxsw %%mm6, %%mm0 \n\t" 2951 "movq %%mm7, %%mm6 \n\t" 2952 "psubw %%mm1, %%mm6 \n\t" 2953 "pmaxsw %%mm6, %%mm1 \n\t" 2954 "movq %%mm7, %%mm6 \n\t" 2955 "psubw %%mm2, %%mm6 \n\t" 2956 "pmaxsw %%mm6, %%mm2 \n\t" 2957 "movq %%mm7, %%mm6 \n\t" 2958 "psubw %%mm3, %%mm6 \n\t" 2959 "pmaxsw %%mm6, %%mm3 \n\t" 2961 "movq %%mm7, %%mm6 \n\t" 2962 "pcmpgtw %%mm0, %%mm6 \n\t" 2963 "pxor %%mm6, %%mm0 \n\t" 2964 "psubw %%mm6, %%mm0 \n\t" 2965 "movq %%mm7, %%mm6 \n\t" 2966 "pcmpgtw %%mm1, %%mm6 \n\t" 2967 "pxor %%mm6, %%mm1 \n\t" 2968 "psubw %%mm6, %%mm1 \n\t" 2969 "movq %%mm7, %%mm6 \n\t" 2970 "pcmpgtw %%mm2, %%mm6 \n\t" 2971 "pxor %%mm6, %%mm2 \n\t" 2972 "psubw %%mm6, %%mm2 \n\t" 2973 "movq %%mm7, %%mm6 \n\t" 2974 "pcmpgtw %%mm3, %%mm6 \n\t" 2975 "pxor %%mm6, %%mm3 \n\t" 2976 "psubw %%mm6, %%mm3 \n\t" 2979 #if TEMPLATE_PP_MMXEXT 2980 "pminsw %%mm2, %%mm0 \n\t" 2981 "pminsw %%mm3, %%mm1 \n\t" 2983 "movq %%mm0, %%mm6 \n\t" 2984 "psubusw %%mm2, %%mm6 \n\t" 2985 "psubw %%mm6, %%mm0 \n\t" 2986 "movq %%mm1, %%mm6 \n\t" 2987 "psubusw %%mm3, %%mm6 \n\t" 2988 "psubw %%mm6, %%mm1 \n\t" 2991 "movd %2, %%mm2 \n\t" 2992 "punpcklbw %%mm7, %%mm2 \n\t" 2994 "movq %%mm7, %%mm6 \n\t" 2995 "pcmpgtw %%mm4, %%mm6 \n\t" 2996 "pxor %%mm6, %%mm4 \n\t" 2997 "psubw %%mm6, %%mm4 \n\t" 2998 "pcmpgtw %%mm5, %%mm7 \n\t" 2999 "pxor %%mm7, %%mm5 \n\t" 3000 "psubw %%mm7, %%mm5 \n\t" 3002 "psllw $3, %%mm2 \n\t" 3003 "movq %%mm2, %%mm3 \n\t" 3004 "pcmpgtw %%mm4, %%mm2 \n\t" 3005 "pcmpgtw %%mm5, %%mm3 \n\t" 3006 "pand %%mm2, %%mm4 \n\t" 3007 "pand %%mm3, %%mm5 \n\t" 3010 "psubusw %%mm0, %%mm4 \n\t" 3011 "psubusw %%mm1, %%mm5 \n\t" 3014 "movq "MANGLE(w05)
", %%mm2 \n\t" 3015 "pmullw %%mm2, %%mm4 \n\t" 3016 "pmullw %%mm2, %%mm5 \n\t" 3017 "movq "MANGLE(w20)
", %%mm2 \n\t" 3018 "paddw %%mm2, %%mm4 \n\t" 3019 "paddw %%mm2, %%mm5 \n\t" 3020 "psrlw $6, %%mm4 \n\t" 3021 "psrlw $6, %%mm5 \n\t" 3023 "movq 16(%4), %%mm0 \n\t" 3024 "movq 24(%4), %%mm1 \n\t" 3026 "pxor %%mm2, %%mm2 \n\t" 3027 "pxor %%mm3, %%mm3 \n\t" 3029 "pcmpgtw %%mm0, %%mm2 \n\t" 3030 "pcmpgtw %%mm1, %%mm3 \n\t" 3031 "pxor %%mm2, %%mm0 \n\t" 3032 "pxor %%mm3, %%mm1 \n\t" 3033 "psubw %%mm2, %%mm0 \n\t" 3034 "psubw %%mm3, %%mm1 \n\t" 3035 "psrlw $1, %%mm0 \n\t" 3036 "psrlw $1, %%mm1 \n\t" 3038 "pxor %%mm6, %%mm2 \n\t" 3039 "pxor %%mm7, %%mm3 \n\t" 3040 "pand %%mm2, %%mm4 \n\t" 3041 "pand %%mm3, %%mm5 \n\t" 3043 #if TEMPLATE_PP_MMXEXT 3044 "pminsw %%mm0, %%mm4 \n\t" 3045 "pminsw %%mm1, %%mm5 \n\t" 3047 "movq %%mm4, %%mm2 \n\t" 3048 "psubusw %%mm0, %%mm2 \n\t" 3049 "psubw %%mm2, %%mm4 \n\t" 3050 "movq %%mm5, %%mm2 \n\t" 3051 "psubusw %%mm1, %%mm2 \n\t" 3052 "psubw %%mm2, %%mm5 \n\t" 3054 "pxor %%mm6, %%mm4 \n\t" 3055 "pxor %%mm7, %%mm5 \n\t" 3056 "psubw %%mm6, %%mm4 \n\t" 3057 "psubw %%mm7, %%mm5 \n\t" 3058 "packsswb %%mm5, %%mm4 \n\t" 3059 "movq %3, %%mm1 \n\t" 3060 "pandn %%mm4, %%mm1 \n\t" 3061 "movq (%0), %%mm0 \n\t" 3062 "paddb %%mm1, %%mm0 \n\t" 3063 "movq %%mm0, (%0) \n\t" 3064 "movq (%0, %1), %%mm0 \n\t" 3065 "psubb %%mm1, %%mm0 \n\t" 3066 "movq %%mm0, (%0, %1) \n\t" 3069 :
"r" ((
x86_reg)step),
"m" (
c->pQPb),
"m"(eq_mask),
"r"(
tmp)
3081 #endif //TEMPLATE_PP_MMX 3084 const int8_t QPs[],
int QPStride,
int isColor,
PPContext *
c);
3090 #undef REAL_SCALED_CPY 3094 int levelFix, int64_t *packedOffsetAndScale)
3096 #if !TEMPLATE_PP_MMX || !HAVE_6REGS 3100 #if TEMPLATE_PP_MMX && HAVE_6REGS 3102 "movq (%%"FF_REG_a
"), %%mm2 \n\t" 3103 "movq 8(%%"FF_REG_a
"), %%mm3 \n\t" 3104 "lea (%2,%4), %%"FF_REG_a
" \n\t" 3105 "lea (%3,%5), %%"FF_REG_d
" \n\t" 3106 "pxor %%mm4, %%mm4 \n\t" 3107 #if TEMPLATE_PP_MMXEXT 3108 #define REAL_SCALED_CPY(src1, src2, dst1, dst2) \ 3109 "movq " #src1 ", %%mm0 \n\t"\ 3110 "movq " #src1 ", %%mm5 \n\t"\ 3111 "movq " #src2 ", %%mm1 \n\t"\ 3112 "movq " #src2 ", %%mm6 \n\t"\ 3113 "punpcklbw %%mm0, %%mm0 \n\t"\ 3114 "punpckhbw %%mm5, %%mm5 \n\t"\ 3115 "punpcklbw %%mm1, %%mm1 \n\t"\ 3116 "punpckhbw %%mm6, %%mm6 \n\t"\ 3117 "pmulhuw %%mm3, %%mm0 \n\t"\ 3118 "pmulhuw %%mm3, %%mm5 \n\t"\ 3119 "pmulhuw %%mm3, %%mm1 \n\t"\ 3120 "pmulhuw %%mm3, %%mm6 \n\t"\ 3121 "psubw %%mm2, %%mm0 \n\t"\ 3122 "psubw %%mm2, %%mm5 \n\t"\ 3123 "psubw %%mm2, %%mm1 \n\t"\ 3124 "psubw %%mm2, %%mm6 \n\t"\ 3125 "packuswb %%mm5, %%mm0 \n\t"\ 3126 "packuswb %%mm6, %%mm1 \n\t"\ 3127 "movq %%mm0, " #dst1 " \n\t"\ 3128 "movq %%mm1, " #dst2 " \n\t"\ 3130 #else //TEMPLATE_PP_MMXEXT 3131 #define REAL_SCALED_CPY(src1, src2, dst1, dst2) \ 3132 "movq " #src1 ", %%mm0 \n\t"\ 3133 "movq " #src1 ", %%mm5 \n\t"\ 3134 "punpcklbw %%mm4, %%mm0 \n\t"\ 3135 "punpckhbw %%mm4, %%mm5 \n\t"\ 3136 "psubw %%mm2, %%mm0 \n\t"\ 3137 "psubw %%mm2, %%mm5 \n\t"\ 3138 "movq " #src2 ", %%mm1 \n\t"\ 3139 "psllw $6, %%mm0 \n\t"\ 3140 "psllw $6, %%mm5 \n\t"\ 3141 "pmulhw %%mm3, %%mm0 \n\t"\ 3142 "movq " #src2 ", %%mm6 \n\t"\ 3143 "pmulhw %%mm3, %%mm5 \n\t"\ 3144 "punpcklbw %%mm4, %%mm1 \n\t"\ 3145 "punpckhbw %%mm4, %%mm6 \n\t"\ 3146 "psubw %%mm2, %%mm1 \n\t"\ 3147 "psubw %%mm2, %%mm6 \n\t"\ 3148 "psllw $6, %%mm1 \n\t"\ 3149 "psllw $6, %%mm6 \n\t"\ 3150 "pmulhw %%mm3, %%mm1 \n\t"\ 3151 "pmulhw %%mm3, %%mm6 \n\t"\ 3152 "packuswb %%mm5, %%mm0 \n\t"\ 3153 "packuswb %%mm6, %%mm1 \n\t"\ 3154 "movq %%mm0, " #dst1 " \n\t"\ 3155 "movq %%mm1, " #dst2 " \n\t"\ 3157 #endif //TEMPLATE_PP_MMXEXT 3158 #define SCALED_CPY(src1, src2, dst1, dst2)\ 3159 REAL_SCALED_CPY(src1, src2, dst1, dst2) 3161 SCALED_CPY((%2) , (%2, %4) , (%3) , (%3, %5))
3162 SCALED_CPY((%2, %4, 2), (%%FF_REGa, %4, 2), (%3, %5, 2), (%%FF_REGd, %5, 2))
3163 SCALED_CPY((%2, %4, 4), (%%FF_REGa, %4, 4), (%3, %5, 4), (%%FF_REGd, %5, 4))
3164 "lea (%%"FF_REG_a
",%4,4), %%"FF_REG_a
" \n\t" 3165 "lea (%%"FF_REG_d
",%5,4), %%"FF_REG_d
" \n\t" 3166 SCALED_CPY((%%FF_REGa, %4), (%%FF_REGa, %4, 2), (%%FF_REGd, %5), (%%FF_REGd, %5, 2))
3169 :
"=&a" (packedOffsetAndScale)
3170 :
"0" (packedOffsetAndScale),
3177 #else //TEMPLATE_PP_MMX && HAVE_6REGS 3179 memcpy( &(dst[dstStride*i]),
3181 #endif //TEMPLATE_PP_MMX && HAVE_6REGS 3183 #if TEMPLATE_PP_MMX && HAVE_6REGS 3185 "lea (%0,%2), %%"FF_REG_a
" \n\t" 3186 "lea (%1,%3), %%"FF_REG_d
" \n\t" 3188 #define REAL_SIMPLE_CPY(src1, src2, dst1, dst2) \ 3189 "movq " #src1 ", %%mm0 \n\t"\ 3190 "movq " #src2 ", %%mm1 \n\t"\ 3191 "movq %%mm0, " #dst1 " \n\t"\ 3192 "movq %%mm1, " #dst2 " \n\t"\ 3194 #define SIMPLE_CPY(src1, src2, dst1, dst2)\ 3195 REAL_SIMPLE_CPY(src1, src2, dst1, dst2) 3197 SIMPLE_CPY((%0) , (%0, %2) , (%1) , (%1, %3))
3198 SIMPLE_CPY((%0, %2, 2), (%%FF_REGa, %2, 2), (%1, %3, 2), (%%FF_REGd, %3, 2))
3199 SIMPLE_CPY((%0, %2, 4), (%%FF_REGa, %2, 4), (%1, %3, 4), (%%FF_REGd, %3, 4))
3200 "lea (%%"FF_REG_a
",%2,4), %%"FF_REG_a
" \n\t" 3201 "lea (%%"FF_REG_d
",%3,4), %%"FF_REG_d
" \n\t" 3202 SIMPLE_CPY((%%FF_REGa, %2), (%%FF_REGa, %2, 2), (%%FF_REGd, %3), (%%FF_REGd, %3, 2))
3208 :
"%"FF_REG_a,
"%"FF_REG_d
3210 #else //TEMPLATE_PP_MMX && HAVE_6REGS 3212 memcpy( &(dst[dstStride*i]),
3214 #endif //TEMPLATE_PP_MMX && HAVE_6REGS 3225 "movq (%0), %%mm0 \n\t" 3226 "movq %%mm0, (%0, %1, 4) \n\t" 3228 "movq %%mm0, (%0) \n\t" 3229 "movq %%mm0, (%0, %1) \n\t" 3230 "movq %%mm0, (%0, %1, 2) \n\t" 3231 "movq %%mm0, (%0, %1, 4) \n\t" 3245 #if ARCH_X86 && TEMPLATE_PP_MMXEXT 3248 __asm__
volatile(
"prefetchnta (%0)\n\t" 3255 __asm__
volatile(
"prefetcht0 (%0)\n\t" 3262 __asm__
volatile(
"prefetcht1 (%0)\n\t" 3269 __asm__
volatile(
"prefetcht2 (%0)\n\t" 3273 #elif !ARCH_X86 && AV_GCC_VERSION_AT_LEAST(3,2) 3276 __builtin_prefetch(p,0,0);
3280 __builtin_prefetch(p,0,1);
3284 __builtin_prefetch(p,0,2);
3288 __builtin_prefetch(p,0,3);
3312 const int8_t QPs[],
int QPStride,
int isColor,
PPContext *
c2)
3316 #ifdef TEMPLATE_PP_TIME_MODE 3317 const int mode= TEMPLATE_PP_TIME_MODE;
3319 const int mode= isColor ?
c.ppMode.chromMode :
c.ppMode.lumMode;
3321 int black=0, white=255;
3322 int QPCorrecture= 256*256;
3329 const int qpHShift= isColor ? 4-
c.hChromaSubSample : 4;
3330 const int qpVShift= isColor ? 4-
c.vChromaSubSample : 4;
3333 uint64_t *
const yHistogram=
c.yHistogram;
3334 uint8_t *
const tempSrc= srcStride > 0 ?
c.tempSrc :
c.tempSrc - 23*srcStride;
3335 uint8_t *
const tempDst= (dstStride > 0 ?
c.tempDst :
c.tempDst - 23*dstStride) + 32;
3340 av_log(
c2,
AV_LOG_WARNING,
"Visualization is currently only supported with the accurate deblock filter without SIMD\n");
3345 for(i=0; i<57; i++){
3346 int offset= ((i*
c.ppMode.baseDcDiff)>>8) + 1;
3347 int threshold= offset*2 + 1;
3348 c.mmxDcOffset[i]= 0x7F -
offset;
3349 c.mmxDcThreshold[i]= 0x7F - threshold;
3350 c.mmxDcOffset[i]*= 0x0101010101010101LL;
3351 c.mmxDcThreshold[i]*= 0x0101010101010101LL;
3365 else if(mode &
DERING) copyAhead=9;
3373 uint64_t maxClipped;
3379 if(
c.frameNum == 1) yHistogram[0]=
width*(uint64_t)
height/64*15/256;
3381 for(i=0; i<256; i++){
3382 sum+= yHistogram[i];
3386 maxClipped=
av_rescale(sum,
c.ppMode.maxClippedThreshold.num,
c.ppMode.maxClippedThreshold.den);
3389 for(black=255; black>0; black--){
3390 if(clipped < maxClipped)
break;
3391 clipped-= yHistogram[black];
3395 for(white=0; white<256; white++){
3396 if(clipped < maxClipped)
break;
3397 clipped-= yHistogram[white];
3400 scale = (
AVRational){
c.ppMode.maxAllowedY -
c.ppMode.minAllowedY, white - black};
3402 #if TEMPLATE_PP_MMXEXT 3404 c.packedYOffset= (((black*
c.packedYScale)>>8) -
c.ppMode.minAllowedY) & 0xFFFF;
3407 c.packedYOffset= (black -
c.ppMode.minAllowedY) & 0xFFFF;
3410 c.packedYOffset|=
c.packedYOffset<<32;
3411 c.packedYOffset|=
c.packedYOffset<<16;
3413 c.packedYScale|=
c.packedYScale<<32;
3414 c.packedYScale|=
c.packedYScale<<16;
3417 else QPCorrecture= 256*256;
3419 c.packedYScale= 0x0100010001000100LL;
3421 QPCorrecture= 256*256;
3427 const uint8_t *srcBlock= &(
src[y*srcStride]);
3428 uint8_t *dstBlock= tempDst + dstStride;
3440 srcBlock + srcStride*8, srcStride, mode &
LEVEL_FIX, &
c.packedYOffset);
3446 else if(mode & LINEAR_BLEND_DEINT_FILTER)
3448 else if(mode & MEDIAN_DEINT_FILTER)
3450 else if(mode & CUBIC_IPOL_DEINT_FILTER)
3454 else if(mode & LOWPASS5_DEINT_FILTER)
3462 if(width==
FFABS(dstStride))
3463 linecpy(dst, tempDst + 9*dstStride, copyAhead, dstStride);
3466 for(i=0; i<copyAhead; i++){
3467 memcpy(dst + i*dstStride, tempDst + (9+i)*dstStride, width);
3474 const uint8_t *srcBlock= &(
src[y*srcStride]);
3475 uint8_t *dstBlock= &(dst[y*dstStride]);
3478 uint8_t *tempBlock2=
c.tempBlocks + 8;
3480 const int8_t *QPptr= &QPs[(y>>qpVShift)*QPStride];
3481 int8_t *nonBQPptr= &
c.nonBQPTable[(y>>qpVShift)*
FFABS(QPStride)];
3489 linecpy(tempSrc + srcStride*copyAhead, srcBlock + srcStride*copyAhead,
3490 FFMAX(height-y-copyAhead, 0), srcStride);
3493 for(i=
FFMAX(height-y, 8); i<copyAhead+8; i++)
3494 memcpy(tempSrc + srcStride*i,
src + srcStride*(height-1),
FFABS(srcStride));
3497 linecpy(tempDst, dstBlock - dstStride,
FFMIN(height-y+1, copyAhead+1), dstStride);
3500 for(i=height-y+1; i<=copyAhead; i++)
3501 memcpy(tempDst + dstStride*i, dst + dstStride*(height-1),
FFABS(dstStride));
3503 dstBlock= tempDst + dstStride;
3510 for(x=0; x<
width; ){
3512 int endx =
FFMIN(width, x+32);
3513 uint8_t *dstBlockStart = dstBlock;
3514 const uint8_t *srcBlockStart = srcBlock;
3516 for(qp_index=0; qp_index < (endx-startx)/
BLOCK_SIZE; qp_index++){
3517 QP = QPptr[(x+qp_index*
BLOCK_SIZE)>>qpHShift];
3518 nonBQP = nonBQPptr[(x+qp_index*
BLOCK_SIZE)>>qpHShift];
3520 QP= (QP* QPCorrecture + 256*128)>>16;
3521 nonBQP= (nonBQP* QPCorrecture + 256*128)>>16;
3522 yHistogram[(srcBlock+qp_index*8)[srcStride*12 + 4]]++;
3524 c.QP_block[qp_index] =
QP;
3525 c.nonBQP_block[qp_index] = nonBQP;
3528 "movd %1, %%mm7 \n\t" 3529 "packuswb %%mm7, %%mm7 \n\t" 3530 "packuswb %%mm7, %%mm7 \n\t" 3531 "packuswb %%mm7, %%mm7 \n\t" 3532 "movq %%mm7, %0 \n\t" 3533 :
"=m" (
c.pQPb_block[qp_index])
3545 srcBlock + srcStride*copyAhead, srcStride, mode &
LEVEL_FIX, &
c.packedYOffset);
3549 else if(mode & LINEAR_BLEND_DEINT_FILTER)
3551 else if(mode & MEDIAN_DEINT_FILTER)
3553 else if(mode & CUBIC_IPOL_DEINT_FILTER)
3557 else if(mode & LOWPASS5_DEINT_FILTER)
3566 dstBlock = dstBlockStart;
3567 srcBlock = srcBlockStart;
3569 for(x = startx, qp_index = 0; x < endx; x+=
BLOCK_SIZE, qp_index++){
3570 const int stride= dstStride;
3573 c.QP =
c.QP_block[qp_index];
3574 c.nonBQP =
c.nonBQP_block[qp_index];
3575 c.pQPb =
c.pQPb_block[qp_index];
3576 c.pQPb2 =
c.pQPb2_block[qp_index];
3580 if(mode & V_X1_FILTER)
3582 else if(mode & V_DEBLOCK){
3598 dstBlock = dstBlockStart;
3599 srcBlock = srcBlockStart;
3601 for(x = startx, qp_index=0; x < endx; x+=
BLOCK_SIZE, qp_index++){
3602 const int stride= dstStride;
3604 c.QP =
c.QP_block[qp_index];
3605 c.nonBQP =
c.nonBQP_block[qp_index];
3606 c.pQPb =
c.pQPb_block[qp_index];
3607 c.pQPb2 =
c.pQPb2_block[qp_index];
3609 RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride);
3617 const int t=
RENAME(vertClassify)(tempBlock1, 16, &
c);
3623 RENAME(do_a_deblock)(tempBlock1, 16, 1, &
c,
mode);
3626 RENAME(transpose2)(dstBlock-4, dstStride, tempBlock1 + 4*16);
3629 if(mode & H_X1_FILTER)
3631 else if(mode & H_DEBLOCK){
3632 #if TEMPLATE_PP_ALTIVEC 3654 }
else if(mode & H_A_DEBLOCK){
3657 #endif //TEMPLATE_PP_MMX 3666 c.tempBlurred[isColor] + y*dstStride + x,
3667 c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256,
3668 c.ppMode.maxTmpNoise);
3676 tmpXchg= tempBlock1;
3677 tempBlock1= tempBlock2;
3678 tempBlock2 = tmpXchg;
3684 if(y > 0)
RENAME(
dering)(dstBlock - dstStride - 8, dstStride, &
c);
3689 c.tempBlurred[isColor] + y*dstStride + x,
3690 c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256,
3691 c.ppMode.maxTmpNoise);
3696 uint8_t *dstBlock= &(dst[y*dstStride]);
3697 if(width==
FFABS(dstStride))
3698 linecpy(dstBlock, tempDst + dstStride, height-y, dstStride);
3701 for(i=0; i<height-y; i++){
3702 memcpy(dstBlock + i*dstStride, tempDst + (i+1)*dstStride, width);
3707 #if TEMPLATE_PP_3DNOW 3708 __asm__
volatile(
"femms");
3709 #elif TEMPLATE_PP_MMX 3710 __asm__
volatile(
"emms");
3713 #ifdef DEBUG_BRIGHTNESS 3717 for(i=0; i<256; i++)
3718 if(yHistogram[i] > max) max=yHistogram[i];
3720 for(i=1; i<256; i++){
3722 int start=yHistogram[i-1]/(max/256+1);
3723 int end=yHistogram[i]/(max/256+1);
3724 int inc= end > start ? 1 : -1;
3725 for(x=start; x!=end+inc; x+=inc)
3726 dst[ i*dstStride + x]+=128;
3729 for(i=0; i<100; i+=2){
3730 dst[ (white)*dstStride + i]+=128;
3731 dst[ (black)*dstStride + i]+=128;
3741 #undef TEMPLATE_PP_C 3742 #undef TEMPLATE_PP_ALTIVEC 3743 #undef TEMPLATE_PP_MMX 3744 #undef TEMPLATE_PP_MMXEXT 3745 #undef TEMPLATE_PP_3DNOW 3746 #undef TEMPLATE_PP_SSE2 static void RENAME() deInterlaceL5(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2)
Deinterlace the given block by filtering every line with a (-1 2 6 2 -1) filter.
static int vertClassify_altivec(uint8_t src[], int stride, PPContext *c)
#define AV_LOG_WARNING
Something somehow does not look correct.
static void RENAME() duplicate(uint8_t src[], int stride)
Duplicate the given 8 src pixels ? times upward.
static void RENAME() doVertLowPass(uint8_t *src, int stride, PPContext *c)
Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle) using the...
static void RENAME() deInterlaceFF(uint8_t src[], int stride, uint8_t *tmp)
Deinterlace the given block by filtering every second line with a (-1 4 2 4 -1) filter.
static void transpose_16x8_char_toPackedAlign_altivec(unsigned char *dst, unsigned char *src, int stride)
#define LINEAR_BLEND_DEINT_FILTER
static av_cold int end(AVCodecContext *avctx)
static void transpose_8x16_char_fromPackedAlign_altivec(unsigned char *dst, unsigned char *src, int stride)
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
static void RENAME() deInterlaceBlendLinear(uint8_t src[], int stride, uint8_t *tmp)
Deinterlace the given block by filtering all lines with a (1 2 1) filter.
#define LOWPASS5_DEINT_FILTER
static void horizX1Filter(uint8_t *src, int stride, int QP)
Experimental Filter 1 (Horizontal) will not damage linear gradients Flat blocks should look like they...
static void RENAME() prefetchnta(const void *p)
static void RENAME() postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, const int8_t QPs[], int QPStride, int isColor, PPContext *c)
Filter array of bytes (Y or U or V values)
static void RENAME() deInterlaceMedian(uint8_t src[], int stride)
Deinterlace the given block by applying a median filter to every second line.
static void linecpy(void *dest, const void *src, int lines, int stride)
#define MEDIAN_DEINT_FILTER
#define CUBIC_IPOL_DEINT_FILTER
static void RENAME() tempNoiseReducer(uint8_t *src, int stride, uint8_t *tempBlurred, uint32_t *tempBlurredPast, const int *maxNoise)
static void RENAME() dering(uint8_t src[], int stride, PPContext *c)
static void RENAME() prefetcht2(const void *p)
static const uint8_t offset[127][2]
static void RENAME() deInterlaceInterpolateLinear(uint8_t src[], int stride)
Deinterlace the given block by linearly interpolating every second line.
int64_t av_rescale(int64_t a, int64_t b, int64_t c)
Rescale a 64-bit integer with rounding to nearest.
#define LINEAR_IPOL_DEINT_FILTER
static void RENAME() prefetcht0(const void *p)
static void RENAME() blockCopy(uint8_t dst[], int dstStride, const uint8_t src[], int srcStride, int levelFix, int64_t *packedOffsetAndScale)
Copy a block from src to dst and fixes the blacklevel.
#define XMM_CLOBBERS(...)
#define FFMPEG_DEINT_FILTER
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
static void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext *c)
static void error(const char *err)
static void RENAME() deInterlaceInterpolateCubic(uint8_t src[], int stride)
Deinterlace the given block by cubic interpolating every second line.
#define AV_LOG_INFO
Standard information.
static void RENAME() vertX1Filter(uint8_t *src, int stride, PPContext *co)
Experimental Filter 1 will not damage linear gradients Flat blocks should look like they were passed ...
Rational number (pair of numerator and denominator).
static void RENAME() doVertDefFilter(uint8_t src[], int stride, PPContext *c)
static int ref[MAX_W *MAX_W]
static void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c)
static void RENAME() prefetcht1(const void *p)
static av_always_inline int diff(const uint32_t a, const uint32_t b)
#define TEMP_NOISE_FILTER
#define LEVEL_FIX
Brightness & Contrast.
mode
Use these values in ebur128_init (or'ed).
#define NAMED_CONSTRAINTS_ADD(...)