39 #if COMPILE_TEMPLATE_AMD3DNOW 40 #define PREFETCH "prefetch" 41 #define PAVGB "pavgusb" 42 #elif COMPILE_TEMPLATE_MMXEXT 43 #define PREFETCH "prefetchnta" 46 #define PREFETCH " # nop" 49 #if COMPILE_TEMPLATE_AMD3DNOW 56 #if COMPILE_TEMPLATE_MMXEXT 57 #define MOVNTQ "movntq" 58 #define SFENCE "sfence" 61 #define SFENCE " # nop" 64 #if !COMPILE_TEMPLATE_SSE2 66 #if !COMPILE_TEMPLATE_AMD3DNOW 75 __asm__
volatile(
PREFETCH" %0"::
"m"(*s):
"memory");
77 __asm__
volatile(
"movq %0, %%mm7"::
"m"(mask32a):
"memory");
81 "movd (%1), %%mm0 \n\t" 82 "punpckldq 3(%1), %%mm0 \n\t" 83 "movd 6(%1), %%mm1 \n\t" 84 "punpckldq 9(%1), %%mm1 \n\t" 85 "movd 12(%1), %%mm2 \n\t" 86 "punpckldq 15(%1), %%mm2 \n\t" 87 "movd 18(%1), %%mm3 \n\t" 88 "punpckldq 21(%1), %%mm3 \n\t" 89 "por %%mm7, %%mm0 \n\t" 90 "por %%mm7, %%mm1 \n\t" 91 "por %%mm7, %%mm2 \n\t" 92 "por %%mm7, %%mm3 \n\t" 95 MOVNTQ" %%mm2, 16(%0) \n\t" 102 __asm__
volatile(
SFENCE:::
"memory");
103 __asm__
volatile(
EMMS:::
"memory");
112 #define STORE_BGR24_MMX \ 113 "psrlq $8, %%mm2 \n\t" \ 114 "psrlq $8, %%mm3 \n\t" \ 115 "psrlq $8, %%mm6 \n\t" \ 116 "psrlq $8, %%mm7 \n\t" \ 117 "pand "MANGLE(mask24l)", %%mm0\n\t" \ 118 "pand "MANGLE(mask24l)", %%mm1\n\t" \ 119 "pand "MANGLE(mask24l)", %%mm4\n\t" \ 120 "pand "MANGLE(mask24l)", %%mm5\n\t" \ 121 "pand "MANGLE(mask24h)", %%mm2\n\t" \ 122 "pand "MANGLE(mask24h)", %%mm3\n\t" \ 123 "pand "MANGLE(mask24h)", %%mm6\n\t" \ 124 "pand "MANGLE(mask24h)", %%mm7\n\t" \ 125 "por %%mm2, %%mm0 \n\t" \ 126 "por %%mm3, %%mm1 \n\t" \ 127 "por %%mm6, %%mm4 \n\t" \ 128 "por %%mm7, %%mm5 \n\t" \ 130 "movq %%mm1, %%mm2 \n\t" \ 131 "movq %%mm4, %%mm3 \n\t" \ 132 "psllq $48, %%mm2 \n\t" \ 133 "psllq $32, %%mm3 \n\t" \ 134 "por %%mm2, %%mm0 \n\t" \ 135 "psrlq $16, %%mm1 \n\t" \ 136 "psrlq $32, %%mm4 \n\t" \ 137 "psllq $16, %%mm5 \n\t" \ 138 "por %%mm3, %%mm1 \n\t" \ 139 "por %%mm5, %%mm4 \n\t" \ 141 MOVNTQ" %%mm0, (%0) \n\t" \ 142 MOVNTQ" %%mm1, 8(%0) \n\t" \ 143 MOVNTQ" %%mm4, 16(%0)" 153 __asm__
volatile(
PREFETCH" %0"::
"m"(*s):
"memory");
158 "movq (%1), %%mm0 \n\t" 159 "movq 8(%1), %%mm1 \n\t" 160 "movq 16(%1), %%mm4 \n\t" 161 "movq 24(%1), %%mm5 \n\t" 162 "movq %%mm0, %%mm2 \n\t" 163 "movq %%mm1, %%mm3 \n\t" 164 "movq %%mm4, %%mm6 \n\t" 165 "movq %%mm5, %%mm7 \n\t" 173 __asm__
volatile(
SFENCE:::
"memory");
174 __asm__
volatile(
EMMS:::
"memory");
196 __asm__
volatile(
PREFETCH" %0"::
"m"(*s));
197 __asm__
volatile(
"movq %0, %%mm4"::
"m"(mask15s));
202 "movq (%1), %%mm0 \n\t" 203 "movq 8(%1), %%mm2 \n\t" 204 "movq %%mm0, %%mm1 \n\t" 205 "movq %%mm2, %%mm3 \n\t" 206 "pand %%mm4, %%mm0 \n\t" 207 "pand %%mm4, %%mm2 \n\t" 208 "paddw %%mm1, %%mm0 \n\t" 209 "paddw %%mm3, %%mm2 \n\t" 217 __asm__
volatile(
SFENCE:::
"memory");
218 __asm__
volatile(
EMMS:::
"memory");
221 register unsigned x= *((
const uint32_t *)s);
222 *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
227 register unsigned short x= *((
const uint16_t *)s);
228 *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
239 __asm__
volatile(
PREFETCH" %0"::
"m"(*s));
240 __asm__
volatile(
"movq %0, %%mm7"::
"m"(mask15rg));
241 __asm__
volatile(
"movq %0, %%mm6"::
"m"(mask15b));
246 "movq (%1), %%mm0 \n\t" 247 "movq 8(%1), %%mm2 \n\t" 248 "movq %%mm0, %%mm1 \n\t" 249 "movq %%mm2, %%mm3 \n\t" 250 "psrlq $1, %%mm0 \n\t" 251 "psrlq $1, %%mm2 \n\t" 252 "pand %%mm7, %%mm0 \n\t" 253 "pand %%mm7, %%mm2 \n\t" 254 "pand %%mm6, %%mm1 \n\t" 255 "pand %%mm6, %%mm3 \n\t" 256 "por %%mm1, %%mm0 \n\t" 257 "por %%mm3, %%mm2 \n\t" 265 __asm__
volatile(
SFENCE:::
"memory");
266 __asm__
volatile(
EMMS:::
"memory");
269 register uint32_t x= *((
const uint32_t*)s);
270 *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F);
275 register uint16_t x= *((
const uint16_t*)s);
276 *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F);
285 uint16_t *d = (uint16_t *)dst;
289 "movq %3, %%mm5 \n\t" 290 "movq %4, %%mm6 \n\t" 291 "movq %5, %%mm7 \n\t" 296 "movd (%1), %%mm0 \n\t" 297 "movd 4(%1), %%mm3 \n\t" 298 "punpckldq 8(%1), %%mm0 \n\t" 299 "punpckldq 12(%1), %%mm3 \n\t" 300 "movq %%mm0, %%mm1 \n\t" 301 "movq %%mm3, %%mm4 \n\t" 302 "pand %%mm6, %%mm0 \n\t" 303 "pand %%mm6, %%mm3 \n\t" 304 "pmaddwd %%mm7, %%mm0 \n\t" 305 "pmaddwd %%mm7, %%mm3 \n\t" 306 "pand %%mm5, %%mm1 \n\t" 307 "pand %%mm5, %%mm4 \n\t" 308 "por %%mm1, %%mm0 \n\t" 309 "por %%mm4, %%mm3 \n\t" 310 "psrld $5, %%mm0 \n\t" 311 "pslld $11, %%mm3 \n\t" 312 "por %%mm3, %%mm0 \n\t" 320 :
"r" (mm_end),
"m" (mask3216g),
"m" (mask3216br),
"m" (mul3216)
322 __asm__
volatile(
SFENCE:::
"memory");
323 __asm__
volatile(
EMMS:::
"memory");
325 register int rgb = *(
const uint32_t*)s; s += 4;
326 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8);
335 uint16_t *d = (uint16_t *)dst;
337 __asm__
volatile(
PREFETCH" %0"::
"m"(*src):
"memory");
339 "movq %0, %%mm7 \n\t" 340 "movq %1, %%mm6 \n\t" 341 ::
"m"(red_16mask),
"m"(green_16mask));
346 "movd (%1), %%mm0 \n\t" 347 "movd 4(%1), %%mm3 \n\t" 348 "punpckldq 8(%1), %%mm0 \n\t" 349 "punpckldq 12(%1), %%mm3 \n\t" 350 "movq %%mm0, %%mm1 \n\t" 351 "movq %%mm0, %%mm2 \n\t" 352 "movq %%mm3, %%mm4 \n\t" 353 "movq %%mm3, %%mm5 \n\t" 354 "psllq $8, %%mm0 \n\t" 355 "psllq $8, %%mm3 \n\t" 356 "pand %%mm7, %%mm0 \n\t" 357 "pand %%mm7, %%mm3 \n\t" 358 "psrlq $5, %%mm1 \n\t" 359 "psrlq $5, %%mm4 \n\t" 360 "pand %%mm6, %%mm1 \n\t" 361 "pand %%mm6, %%mm4 \n\t" 362 "psrlq $19, %%mm2 \n\t" 363 "psrlq $19, %%mm5 \n\t" 364 "pand %2, %%mm2 \n\t" 365 "pand %2, %%mm5 \n\t" 366 "por %%mm1, %%mm0 \n\t" 367 "por %%mm4, %%mm3 \n\t" 368 "por %%mm2, %%mm0 \n\t" 369 "por %%mm5, %%mm3 \n\t" 370 "psllq $16, %%mm3 \n\t" 371 "por %%mm3, %%mm0 \n\t" 373 ::
"r"(d),
"r"(s),
"m"(blue_16mask):
"memory");
377 __asm__
volatile(
SFENCE:::
"memory");
378 __asm__
volatile(
EMMS:::
"memory");
380 register int rgb = *(
const uint32_t*)s; s += 4;
381 *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19);
390 uint16_t *d = (uint16_t *)dst;
394 "movq %3, %%mm5 \n\t" 395 "movq %4, %%mm6 \n\t" 396 "movq %5, %%mm7 \n\t" 401 "movd (%1), %%mm0 \n\t" 402 "movd 4(%1), %%mm3 \n\t" 403 "punpckldq 8(%1), %%mm0 \n\t" 404 "punpckldq 12(%1), %%mm3 \n\t" 405 "movq %%mm0, %%mm1 \n\t" 406 "movq %%mm3, %%mm4 \n\t" 407 "pand %%mm6, %%mm0 \n\t" 408 "pand %%mm6, %%mm3 \n\t" 409 "pmaddwd %%mm7, %%mm0 \n\t" 410 "pmaddwd %%mm7, %%mm3 \n\t" 411 "pand %%mm5, %%mm1 \n\t" 412 "pand %%mm5, %%mm4 \n\t" 413 "por %%mm1, %%mm0 \n\t" 414 "por %%mm4, %%mm3 \n\t" 415 "psrld $6, %%mm0 \n\t" 416 "pslld $10, %%mm3 \n\t" 417 "por %%mm3, %%mm0 \n\t" 425 :
"r" (mm_end),
"m" (mask3215g),
"m" (mask3216br),
"m" (mul3215)
427 __asm__
volatile(
SFENCE:::
"memory");
428 __asm__
volatile(
EMMS:::
"memory");
430 register int rgb = *(
const uint32_t*)s; s += 4;
431 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
440 uint16_t *d = (uint16_t *)dst;
442 __asm__
volatile(
PREFETCH" %0"::
"m"(*src):
"memory");
444 "movq %0, %%mm7 \n\t" 445 "movq %1, %%mm6 \n\t" 446 ::
"m"(red_15mask),
"m"(green_15mask));
451 "movd (%1), %%mm0 \n\t" 452 "movd 4(%1), %%mm3 \n\t" 453 "punpckldq 8(%1), %%mm0 \n\t" 454 "punpckldq 12(%1), %%mm3 \n\t" 455 "movq %%mm0, %%mm1 \n\t" 456 "movq %%mm0, %%mm2 \n\t" 457 "movq %%mm3, %%mm4 \n\t" 458 "movq %%mm3, %%mm5 \n\t" 459 "psllq $7, %%mm0 \n\t" 460 "psllq $7, %%mm3 \n\t" 461 "pand %%mm7, %%mm0 \n\t" 462 "pand %%mm7, %%mm3 \n\t" 463 "psrlq $6, %%mm1 \n\t" 464 "psrlq $6, %%mm4 \n\t" 465 "pand %%mm6, %%mm1 \n\t" 466 "pand %%mm6, %%mm4 \n\t" 467 "psrlq $19, %%mm2 \n\t" 468 "psrlq $19, %%mm5 \n\t" 469 "pand %2, %%mm2 \n\t" 470 "pand %2, %%mm5 \n\t" 471 "por %%mm1, %%mm0 \n\t" 472 "por %%mm4, %%mm3 \n\t" 473 "por %%mm2, %%mm0 \n\t" 474 "por %%mm5, %%mm3 \n\t" 475 "psllq $16, %%mm3 \n\t" 476 "por %%mm3, %%mm0 \n\t" 478 ::
"r"(d),
"r"(s),
"m"(blue_15mask):
"memory");
482 __asm__
volatile(
SFENCE:::
"memory");
483 __asm__
volatile(
EMMS:::
"memory");
485 register int rgb = *(
const uint32_t*)s; s += 4;
486 *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
495 uint16_t *d = (uint16_t *)dst;
497 __asm__
volatile(
PREFETCH" %0"::
"m"(*src):
"memory");
499 "movq %0, %%mm7 \n\t" 500 "movq %1, %%mm6 \n\t" 501 ::
"m"(red_16mask),
"m"(green_16mask));
506 "movd (%1), %%mm0 \n\t" 507 "movd 3(%1), %%mm3 \n\t" 508 "punpckldq 6(%1), %%mm0 \n\t" 509 "punpckldq 9(%1), %%mm3 \n\t" 510 "movq %%mm0, %%mm1 \n\t" 511 "movq %%mm0, %%mm2 \n\t" 512 "movq %%mm3, %%mm4 \n\t" 513 "movq %%mm3, %%mm5 \n\t" 514 "psrlq $3, %%mm0 \n\t" 515 "psrlq $3, %%mm3 \n\t" 516 "pand %2, %%mm0 \n\t" 517 "pand %2, %%mm3 \n\t" 518 "psrlq $5, %%mm1 \n\t" 519 "psrlq $5, %%mm4 \n\t" 520 "pand %%mm6, %%mm1 \n\t" 521 "pand %%mm6, %%mm4 \n\t" 522 "psrlq $8, %%mm2 \n\t" 523 "psrlq $8, %%mm5 \n\t" 524 "pand %%mm7, %%mm2 \n\t" 525 "pand %%mm7, %%mm5 \n\t" 526 "por %%mm1, %%mm0 \n\t" 527 "por %%mm4, %%mm3 \n\t" 528 "por %%mm2, %%mm0 \n\t" 529 "por %%mm5, %%mm3 \n\t" 530 "psllq $16, %%mm3 \n\t" 531 "por %%mm3, %%mm0 \n\t" 533 ::
"r"(d),
"r"(s),
"m"(blue_16mask):
"memory");
537 __asm__
volatile(
SFENCE:::
"memory");
538 __asm__
volatile(
EMMS:::
"memory");
543 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
552 uint16_t *d = (uint16_t *)dst;
554 __asm__
volatile(
PREFETCH" %0"::
"m"(*src):
"memory");
556 "movq %0, %%mm7 \n\t" 557 "movq %1, %%mm6 \n\t" 558 ::
"m"(red_16mask),
"m"(green_16mask));
563 "movd (%1), %%mm0 \n\t" 564 "movd 3(%1), %%mm3 \n\t" 565 "punpckldq 6(%1), %%mm0 \n\t" 566 "punpckldq 9(%1), %%mm3 \n\t" 567 "movq %%mm0, %%mm1 \n\t" 568 "movq %%mm0, %%mm2 \n\t" 569 "movq %%mm3, %%mm4 \n\t" 570 "movq %%mm3, %%mm5 \n\t" 571 "psllq $8, %%mm0 \n\t" 572 "psllq $8, %%mm3 \n\t" 573 "pand %%mm7, %%mm0 \n\t" 574 "pand %%mm7, %%mm3 \n\t" 575 "psrlq $5, %%mm1 \n\t" 576 "psrlq $5, %%mm4 \n\t" 577 "pand %%mm6, %%mm1 \n\t" 578 "pand %%mm6, %%mm4 \n\t" 579 "psrlq $19, %%mm2 \n\t" 580 "psrlq $19, %%mm5 \n\t" 581 "pand %2, %%mm2 \n\t" 582 "pand %2, %%mm5 \n\t" 583 "por %%mm1, %%mm0 \n\t" 584 "por %%mm4, %%mm3 \n\t" 585 "por %%mm2, %%mm0 \n\t" 586 "por %%mm5, %%mm3 \n\t" 587 "psllq $16, %%mm3 \n\t" 588 "por %%mm3, %%mm0 \n\t" 590 ::
"r"(d),
"r"(s),
"m"(blue_16mask):
"memory");
594 __asm__
volatile(
SFENCE:::
"memory");
595 __asm__
volatile(
EMMS:::
"memory");
600 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
609 uint16_t *d = (uint16_t *)dst;
611 __asm__
volatile(
PREFETCH" %0"::
"m"(*src):
"memory");
613 "movq %0, %%mm7 \n\t" 614 "movq %1, %%mm6 \n\t" 615 ::
"m"(red_15mask),
"m"(green_15mask));
620 "movd (%1), %%mm0 \n\t" 621 "movd 3(%1), %%mm3 \n\t" 622 "punpckldq 6(%1), %%mm0 \n\t" 623 "punpckldq 9(%1), %%mm3 \n\t" 624 "movq %%mm0, %%mm1 \n\t" 625 "movq %%mm0, %%mm2 \n\t" 626 "movq %%mm3, %%mm4 \n\t" 627 "movq %%mm3, %%mm5 \n\t" 628 "psrlq $3, %%mm0 \n\t" 629 "psrlq $3, %%mm3 \n\t" 630 "pand %2, %%mm0 \n\t" 631 "pand %2, %%mm3 \n\t" 632 "psrlq $6, %%mm1 \n\t" 633 "psrlq $6, %%mm4 \n\t" 634 "pand %%mm6, %%mm1 \n\t" 635 "pand %%mm6, %%mm4 \n\t" 636 "psrlq $9, %%mm2 \n\t" 637 "psrlq $9, %%mm5 \n\t" 638 "pand %%mm7, %%mm2 \n\t" 639 "pand %%mm7, %%mm5 \n\t" 640 "por %%mm1, %%mm0 \n\t" 641 "por %%mm4, %%mm3 \n\t" 642 "por %%mm2, %%mm0 \n\t" 643 "por %%mm5, %%mm3 \n\t" 644 "psllq $16, %%mm3 \n\t" 645 "por %%mm3, %%mm0 \n\t" 647 ::
"r"(d),
"r"(s),
"m"(blue_15mask):
"memory");
651 __asm__
volatile(
SFENCE:::
"memory");
652 __asm__
volatile(
EMMS:::
"memory");
657 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
666 uint16_t *d = (uint16_t *)dst;
668 __asm__
volatile(
PREFETCH" %0"::
"m"(*src):
"memory");
670 "movq %0, %%mm7 \n\t" 671 "movq %1, %%mm6 \n\t" 672 ::
"m"(red_15mask),
"m"(green_15mask));
677 "movd (%1), %%mm0 \n\t" 678 "movd 3(%1), %%mm3 \n\t" 679 "punpckldq 6(%1), %%mm0 \n\t" 680 "punpckldq 9(%1), %%mm3 \n\t" 681 "movq %%mm0, %%mm1 \n\t" 682 "movq %%mm0, %%mm2 \n\t" 683 "movq %%mm3, %%mm4 \n\t" 684 "movq %%mm3, %%mm5 \n\t" 685 "psllq $7, %%mm0 \n\t" 686 "psllq $7, %%mm3 \n\t" 687 "pand %%mm7, %%mm0 \n\t" 688 "pand %%mm7, %%mm3 \n\t" 689 "psrlq $6, %%mm1 \n\t" 690 "psrlq $6, %%mm4 \n\t" 691 "pand %%mm6, %%mm1 \n\t" 692 "pand %%mm6, %%mm4 \n\t" 693 "psrlq $19, %%mm2 \n\t" 694 "psrlq $19, %%mm5 \n\t" 695 "pand %2, %%mm2 \n\t" 696 "pand %2, %%mm5 \n\t" 697 "por %%mm1, %%mm0 \n\t" 698 "por %%mm4, %%mm3 \n\t" 699 "por %%mm2, %%mm0 \n\t" 700 "por %%mm5, %%mm3 \n\t" 701 "psllq $16, %%mm3 \n\t" 702 "por %%mm3, %%mm0 \n\t" 704 ::
"r"(d),
"r"(s),
"m"(blue_15mask):
"memory");
708 __asm__
volatile(
SFENCE:::
"memory");
709 __asm__
volatile(
EMMS:::
"memory");
714 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
721 const uint16_t *mm_end;
723 const uint16_t *
s = (
const uint16_t*)
src;
724 end = s + src_size/2;
725 __asm__
volatile(
PREFETCH" %0"::
"m"(*s):
"memory");
730 "movq (%1), %%mm0 \n\t" 731 "movq (%1), %%mm1 \n\t" 732 "movq (%1), %%mm2 \n\t" 733 "pand %2, %%mm0 \n\t" 734 "pand %3, %%mm1 \n\t" 735 "pand %4, %%mm2 \n\t" 736 "psllq $5, %%mm0 \n\t" 737 "pmulhw "MANGLE(mul15_mid)
", %%mm0 \n\t" 738 "pmulhw "MANGLE(mul15_mid)
", %%mm1 \n\t" 739 "pmulhw "MANGLE(mul15_hi)
", %%mm2 \n\t" 740 "movq %%mm0, %%mm3 \n\t" 741 "movq %%mm1, %%mm4 \n\t" 742 "movq %%mm2, %%mm5 \n\t" 743 "punpcklwd %5, %%mm0 \n\t" 744 "punpcklwd %5, %%mm1 \n\t" 745 "punpcklwd %5, %%mm2 \n\t" 746 "punpckhwd %5, %%mm3 \n\t" 747 "punpckhwd %5, %%mm4 \n\t" 748 "punpckhwd %5, %%mm5 \n\t" 749 "psllq $8, %%mm1 \n\t" 750 "psllq $16, %%mm2 \n\t" 751 "por %%mm1, %%mm0 \n\t" 752 "por %%mm2, %%mm0 \n\t" 753 "psllq $8, %%mm4 \n\t" 754 "psllq $16, %%mm5 \n\t" 755 "por %%mm4, %%mm3 \n\t" 756 "por %%mm5, %%mm3 \n\t" 758 "movq %%mm0, %%mm6 \n\t" 759 "movq %%mm3, %%mm7 \n\t" 761 "movq 8(%1), %%mm0 \n\t" 762 "movq 8(%1), %%mm1 \n\t" 763 "movq 8(%1), %%mm2 \n\t" 764 "pand %2, %%mm0 \n\t" 765 "pand %3, %%mm1 \n\t" 766 "pand %4, %%mm2 \n\t" 767 "psllq $5, %%mm0 \n\t" 768 "pmulhw "MANGLE(mul15_mid)
", %%mm0 \n\t" 769 "pmulhw "MANGLE(mul15_mid)
", %%mm1 \n\t" 770 "pmulhw "MANGLE(mul15_hi)
", %%mm2 \n\t" 771 "movq %%mm0, %%mm3 \n\t" 772 "movq %%mm1, %%mm4 \n\t" 773 "movq %%mm2, %%mm5 \n\t" 774 "punpcklwd %5, %%mm0 \n\t" 775 "punpcklwd %5, %%mm1 \n\t" 776 "punpcklwd %5, %%mm2 \n\t" 777 "punpckhwd %5, %%mm3 \n\t" 778 "punpckhwd %5, %%mm4 \n\t" 779 "punpckhwd %5, %%mm5 \n\t" 780 "psllq $8, %%mm1 \n\t" 781 "psllq $16, %%mm2 \n\t" 782 "por %%mm1, %%mm0 \n\t" 783 "por %%mm2, %%mm0 \n\t" 784 "psllq $8, %%mm4 \n\t" 785 "psllq $16, %%mm5 \n\t" 786 "por %%mm4, %%mm3 \n\t" 787 "por %%mm5, %%mm3 \n\t" 790 :
"r"(
s),
"m"(mask15b),
"m"(mask15g),
"m"(mask15r),
"m"(mmx_null)
795 "movq %%mm0, %%mm4 \n\t" 796 "movq %%mm3, %%mm5 \n\t" 797 "movq %%mm6, %%mm0 \n\t" 798 "movq %%mm7, %%mm1 \n\t" 800 "movq %%mm4, %%mm6 \n\t" 801 "movq %%mm5, %%mm7 \n\t" 802 "movq %%mm0, %%mm2 \n\t" 803 "movq %%mm1, %%mm3 \n\t" 813 __asm__
volatile(
SFENCE:::
"memory");
814 __asm__
volatile(
EMMS:::
"memory");
816 register uint16_t bgr;
818 *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
819 *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
820 *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
827 const uint16_t *mm_end;
829 const uint16_t *
s = (
const uint16_t *)
src;
830 end = s + src_size/2;
831 __asm__
volatile(
PREFETCH" %0"::
"m"(*s):
"memory");
836 "movq (%1), %%mm0 \n\t" 837 "movq (%1), %%mm1 \n\t" 838 "movq (%1), %%mm2 \n\t" 839 "pand %2, %%mm0 \n\t" 840 "pand %3, %%mm1 \n\t" 841 "pand %4, %%mm2 \n\t" 842 "psllq $5, %%mm0 \n\t" 843 "psrlq $1, %%mm2 \n\t" 844 "pmulhw "MANGLE(mul15_mid)
", %%mm0 \n\t" 845 "pmulhw "MANGLE(mul16_mid)
", %%mm1 \n\t" 846 "pmulhw "MANGLE(mul15_hi)
", %%mm2 \n\t" 847 "movq %%mm0, %%mm3 \n\t" 848 "movq %%mm1, %%mm4 \n\t" 849 "movq %%mm2, %%mm5 \n\t" 850 "punpcklwd %5, %%mm0 \n\t" 851 "punpcklwd %5, %%mm1 \n\t" 852 "punpcklwd %5, %%mm2 \n\t" 853 "punpckhwd %5, %%mm3 \n\t" 854 "punpckhwd %5, %%mm4 \n\t" 855 "punpckhwd %5, %%mm5 \n\t" 856 "psllq $8, %%mm1 \n\t" 857 "psllq $16, %%mm2 \n\t" 858 "por %%mm1, %%mm0 \n\t" 859 "por %%mm2, %%mm0 \n\t" 860 "psllq $8, %%mm4 \n\t" 861 "psllq $16, %%mm5 \n\t" 862 "por %%mm4, %%mm3 \n\t" 863 "por %%mm5, %%mm3 \n\t" 865 "movq %%mm0, %%mm6 \n\t" 866 "movq %%mm3, %%mm7 \n\t" 868 "movq 8(%1), %%mm0 \n\t" 869 "movq 8(%1), %%mm1 \n\t" 870 "movq 8(%1), %%mm2 \n\t" 871 "pand %2, %%mm0 \n\t" 872 "pand %3, %%mm1 \n\t" 873 "pand %4, %%mm2 \n\t" 874 "psllq $5, %%mm0 \n\t" 875 "psrlq $1, %%mm2 \n\t" 876 "pmulhw "MANGLE(mul15_mid)
", %%mm0 \n\t" 877 "pmulhw "MANGLE(mul16_mid)
", %%mm1 \n\t" 878 "pmulhw "MANGLE(mul15_hi)
", %%mm2 \n\t" 879 "movq %%mm0, %%mm3 \n\t" 880 "movq %%mm1, %%mm4 \n\t" 881 "movq %%mm2, %%mm5 \n\t" 882 "punpcklwd %5, %%mm0 \n\t" 883 "punpcklwd %5, %%mm1 \n\t" 884 "punpcklwd %5, %%mm2 \n\t" 885 "punpckhwd %5, %%mm3 \n\t" 886 "punpckhwd %5, %%mm4 \n\t" 887 "punpckhwd %5, %%mm5 \n\t" 888 "psllq $8, %%mm1 \n\t" 889 "psllq $16, %%mm2 \n\t" 890 "por %%mm1, %%mm0 \n\t" 891 "por %%mm2, %%mm0 \n\t" 892 "psllq $8, %%mm4 \n\t" 893 "psllq $16, %%mm5 \n\t" 894 "por %%mm4, %%mm3 \n\t" 895 "por %%mm5, %%mm3 \n\t" 897 :
"r"(
s),
"m"(mask16b),
"m"(mask16g),
"m"(mask16r),
"m"(mmx_null)
902 "movq %%mm0, %%mm4 \n\t" 903 "movq %%mm3, %%mm5 \n\t" 904 "movq %%mm6, %%mm0 \n\t" 905 "movq %%mm7, %%mm1 \n\t" 907 "movq %%mm4, %%mm6 \n\t" 908 "movq %%mm5, %%mm7 \n\t" 909 "movq %%mm0, %%mm2 \n\t" 910 "movq %%mm1, %%mm3 \n\t" 920 __asm__
volatile(
SFENCE:::
"memory");
921 __asm__
volatile(
EMMS:::
"memory");
923 register uint16_t bgr;
925 *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
926 *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
927 *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
939 "packuswb %%mm7, %%mm0 \n\t" \ 940 "packuswb %%mm7, %%mm1 \n\t" \ 941 "packuswb %%mm7, %%mm2 \n\t" \ 942 "punpcklbw %%mm1, %%mm0 \n\t" \ 943 "punpcklbw %%mm6, %%mm2 \n\t" \ 944 "movq %%mm0, %%mm3 \n\t" \ 945 "punpcklwd %%mm2, %%mm0 \n\t" \ 946 "punpckhwd %%mm2, %%mm3 \n\t" \ 947 MOVNTQ" %%mm0, (%0) \n\t" \ 948 MOVNTQ" %%mm3, 8(%0) \n\t" \ 953 const uint16_t *mm_end;
955 const uint16_t *
s = (
const uint16_t *)
src;
956 end = s + src_size/2;
957 __asm__
volatile(
PREFETCH" %0"::
"m"(*s):
"memory");
958 __asm__
volatile(
"pxor %%mm7,%%mm7 \n\t":::
"memory");
959 __asm__
volatile(
"pcmpeqd %%mm6,%%mm6 \n\t":::
"memory");
964 "movq (%1), %%mm0 \n\t" 965 "movq (%1), %%mm1 \n\t" 966 "movq (%1), %%mm2 \n\t" 967 "pand %2, %%mm0 \n\t" 968 "pand %3, %%mm1 \n\t" 969 "pand %4, %%mm2 \n\t" 970 "psllq $5, %%mm0 \n\t" 971 "pmulhw %5, %%mm0 \n\t" 972 "pmulhw %5, %%mm1 \n\t" 973 "pmulhw "MANGLE(mul15_hi)
", %%mm2 \n\t" 975 ::
"r"(d),
"r"(s),
"m"(mask15b),
"m"(mask15g),
"m"(mask15r) ,
"m"(mul15_mid)
981 __asm__
volatile(
SFENCE:::
"memory");
982 __asm__
volatile(
EMMS:::
"memory");
984 register uint16_t bgr;
986 *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
987 *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
988 *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
996 const uint16_t *mm_end;
998 const uint16_t *
s = (
const uint16_t*)
src;
999 end = s + src_size/2;
1000 __asm__
volatile(
PREFETCH" %0"::
"m"(*s):
"memory");
1001 __asm__
volatile(
"pxor %%mm7,%%mm7 \n\t":::
"memory");
1002 __asm__
volatile(
"pcmpeqd %%mm6,%%mm6 \n\t":::
"memory");
1004 while (s < mm_end) {
1007 "movq (%1), %%mm0 \n\t" 1008 "movq (%1), %%mm1 \n\t" 1009 "movq (%1), %%mm2 \n\t" 1010 "pand %2, %%mm0 \n\t" 1011 "pand %3, %%mm1 \n\t" 1012 "pand %4, %%mm2 \n\t" 1013 "psllq $5, %%mm0 \n\t" 1014 "psrlq $1, %%mm2 \n\t" 1015 "pmulhw %5, %%mm0 \n\t" 1016 "pmulhw "MANGLE(mul16_mid)
", %%mm1 \n\t" 1017 "pmulhw "MANGLE(mul15_hi)
", %%mm2 \n\t" 1019 ::
"r"(d),
"r"(s),
"m"(mask16b),
"m"(mask16g),
"m"(mask16r),
"m"(mul15_mid)
1025 __asm__
volatile(
SFENCE:::
"memory");
1026 __asm__
volatile(
EMMS:::
"memory");
1028 register uint16_t bgr;
1030 *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
1031 *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
1032 *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
1046 "movq %3, %%mm7 \n\t" 1047 "pxor %4, %%mm7 \n\t" 1048 "movq %%mm7, %%mm6 \n\t" 1049 "pxor %5, %%mm7 \n\t" 1053 "movq (%1, %0), %%mm0 \n\t" 1054 "movq 8(%1, %0), %%mm1 \n\t" 1055 # if COMPILE_TEMPLATE_MMXEXT 1056 "pshufw $177, %%mm0, %%mm3 \n\t" 1057 "pshufw $177, %%mm1, %%mm5 \n\t" 1058 "pand %%mm7, %%mm0 \n\t" 1059 "pand %%mm6, %%mm3 \n\t" 1060 "pand %%mm7, %%mm1 \n\t" 1061 "pand %%mm6, %%mm5 \n\t" 1062 "por %%mm3, %%mm0 \n\t" 1063 "por %%mm5, %%mm1 \n\t" 1065 "movq %%mm0, %%mm2 \n\t" 1066 "movq %%mm1, %%mm4 \n\t" 1067 "pand %%mm7, %%mm0 \n\t" 1068 "pand %%mm6, %%mm2 \n\t" 1069 "pand %%mm7, %%mm1 \n\t" 1070 "pand %%mm6, %%mm4 \n\t" 1071 "movq %%mm2, %%mm3 \n\t" 1072 "movq %%mm4, %%mm5 \n\t" 1073 "pslld $16, %%mm2 \n\t" 1074 "psrld $16, %%mm3 \n\t" 1075 "pslld $16, %%mm4 \n\t" 1076 "psrld $16, %%mm5 \n\t" 1077 "por %%mm2, %%mm0 \n\t" 1078 "por %%mm4, %%mm1 \n\t" 1079 "por %%mm3, %%mm0 \n\t" 1080 "por %%mm5, %%mm1 \n\t" 1082 MOVNTQ" %%mm0, (%2, %0) \n\t" 1083 MOVNTQ" %%mm1, 8(%2, %0) \n\t" 1090 :
"r" (s),
"r" (d),
"m" (mask32b),
"m" (mask32r),
"m" (mmx_one)
1092 for (; idx<15; idx+=4) {
1093 register unsigned v = *(
const uint32_t *)&s[idx],
g = v & 0xff00ff00;
1095 *(uint32_t *)&d[idx] = (v>>16) +
g + (v<<16);
1102 x86_reg mmx_size= 23 - src_size;
1104 "test %%"FF_REG_a
", %%"FF_REG_a
" \n\t" 1106 "movq "MANGLE(mask24r)
", %%mm5 \n\t" 1107 "movq "MANGLE(mask24g)
", %%mm6 \n\t" 1108 "movq "MANGLE(mask24b)
", %%mm7 \n\t" 1111 PREFETCH" 32(%1, %%"FF_REG_a
") \n\t" 1112 "movq (%1, %%"FF_REG_a
"), %%mm0 \n\t" 1113 "movq (%1, %%"FF_REG_a
"), %%mm1 \n\t" 1114 "movq 2(%1, %%"FF_REG_a
"), %%mm2 \n\t" 1115 "psllq $16, %%mm0 \n\t" 1116 "pand %%mm5, %%mm0 \n\t" 1117 "pand %%mm6, %%mm1 \n\t" 1118 "pand %%mm7, %%mm2 \n\t" 1119 "por %%mm0, %%mm1 \n\t" 1120 "por %%mm2, %%mm1 \n\t" 1121 "movq 6(%1, %%"FF_REG_a
"), %%mm0 \n\t" 1122 MOVNTQ" %%mm1,(%2, %%"FF_REG_a
") \n\t" 1123 "movq 8(%1, %%"FF_REG_a
"), %%mm1 \n\t" 1124 "movq 10(%1, %%"FF_REG_a
"), %%mm2 \n\t" 1125 "pand %%mm7, %%mm0 \n\t" 1126 "pand %%mm5, %%mm1 \n\t" 1127 "pand %%mm6, %%mm2 \n\t" 1128 "por %%mm0, %%mm1 \n\t" 1129 "por %%mm2, %%mm1 \n\t" 1130 "movq 14(%1, %%"FF_REG_a
"), %%mm0 \n\t" 1131 MOVNTQ" %%mm1, 8(%2, %%"FF_REG_a
")\n\t" 1132 "movq 16(%1, %%"FF_REG_a
"), %%mm1 \n\t" 1133 "movq 18(%1, %%"FF_REG_a
"), %%mm2 \n\t" 1134 "pand %%mm6, %%mm0 \n\t" 1135 "pand %%mm7, %%mm1 \n\t" 1136 "pand %%mm5, %%mm2 \n\t" 1137 "por %%mm0, %%mm1 \n\t" 1138 "por %%mm2, %%mm1 \n\t" 1139 MOVNTQ" %%mm1, 16(%2, %%"FF_REG_a
") \n\t" 1140 "add $24, %%"FF_REG_a
" \n\t" 1144 :
"r" (
src-mmx_size),
"r"(dst-mmx_size)
1148 __asm__
volatile(
SFENCE:::
"memory");
1149 __asm__
volatile(
EMMS:::
"memory");
1151 if (mmx_size==23)
return;
1155 src_size= 23-mmx_size;
1158 for (i=0; i<src_size; i+=3) {
1161 dst[i + 1] =
src[i + 1];
1162 dst[i + 2] =
src[i + 0];
1169 int lumStride,
int chromStride,
int dstStride,
int vertLumPerChroma)
1173 for (y=0; y<
height; y++) {
1176 "xor %%"FF_REG_a
", %%"FF_REG_a
" \n\t" 1179 PREFETCH" 32(%1, %%"FF_REG_a
", 2) \n\t" 1180 PREFETCH" 32(%2, %%"FF_REG_a
") \n\t" 1181 PREFETCH" 32(%3, %%"FF_REG_a
") \n\t" 1182 "movq (%2, %%"FF_REG_a
"), %%mm0 \n\t" 1183 "movq %%mm0, %%mm2 \n\t" 1184 "movq (%3, %%"FF_REG_a
"), %%mm1 \n\t" 1185 "punpcklbw %%mm1, %%mm0 \n\t" 1186 "punpckhbw %%mm1, %%mm2 \n\t" 1188 "movq (%1, %%"FF_REG_a
",2), %%mm3 \n\t" 1189 "movq 8(%1, %%"FF_REG_a
",2), %%mm5 \n\t" 1190 "movq %%mm3, %%mm4 \n\t" 1191 "movq %%mm5, %%mm6 \n\t" 1192 "punpcklbw %%mm0, %%mm3 \n\t" 1193 "punpckhbw %%mm0, %%mm4 \n\t" 1194 "punpcklbw %%mm2, %%mm5 \n\t" 1195 "punpckhbw %%mm2, %%mm6 \n\t" 1197 MOVNTQ" %%mm3, (%0, %%"FF_REG_a
", 4) \n\t" 1198 MOVNTQ" %%mm4, 8(%0, %%"FF_REG_a
", 4) \n\t" 1199 MOVNTQ" %%mm5, 16(%0, %%"FF_REG_a
", 4) \n\t" 1200 MOVNTQ" %%mm6, 24(%0, %%"FF_REG_a
", 4) \n\t" 1202 "add $8, %%"FF_REG_a
" \n\t" 1203 "cmp %4, %%"FF_REG_a
" \n\t" 1205 ::
"r"(dst),
"r"(ysrc),
"r"(usrc),
"r"(vsrc),
"g" (chromWidth)
1208 if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
1209 usrc += chromStride;
1210 vsrc += chromStride;
1226 int lumStride,
int chromStride,
int dstStride)
1234 int lumStride,
int chromStride,
int dstStride,
int vertLumPerChroma)
1238 for (y=0; y<
height; y++) {
1241 "xor %%"FF_REG_a
", %%"FF_REG_a
" \n\t" 1244 PREFETCH" 32(%1, %%"FF_REG_a
", 2) \n\t" 1245 PREFETCH" 32(%2, %%"FF_REG_a
") \n\t" 1246 PREFETCH" 32(%3, %%"FF_REG_a
") \n\t" 1247 "movq (%2, %%"FF_REG_a
"), %%mm0 \n\t" 1248 "movq %%mm0, %%mm2 \n\t" 1249 "movq (%3, %%"FF_REG_a
"), %%mm1 \n\t" 1250 "punpcklbw %%mm1, %%mm0 \n\t" 1251 "punpckhbw %%mm1, %%mm2 \n\t" 1253 "movq (%1, %%"FF_REG_a
",2), %%mm3 \n\t" 1254 "movq 8(%1, %%"FF_REG_a
",2), %%mm5 \n\t" 1255 "movq %%mm0, %%mm4 \n\t" 1256 "movq %%mm2, %%mm6 \n\t" 1257 "punpcklbw %%mm3, %%mm0 \n\t" 1258 "punpckhbw %%mm3, %%mm4 \n\t" 1259 "punpcklbw %%mm5, %%mm2 \n\t" 1260 "punpckhbw %%mm5, %%mm6 \n\t" 1262 MOVNTQ" %%mm0, (%0, %%"FF_REG_a
", 4) \n\t" 1263 MOVNTQ" %%mm4, 8(%0, %%"FF_REG_a
", 4) \n\t" 1264 MOVNTQ" %%mm2, 16(%0, %%"FF_REG_a
", 4) \n\t" 1265 MOVNTQ" %%mm6, 24(%0, %%"FF_REG_a
", 4) \n\t" 1267 "add $8, %%"FF_REG_a
" \n\t" 1268 "cmp %4, %%"FF_REG_a
" \n\t" 1270 ::
"r"(dst),
"r"(ysrc),
"r"(usrc),
"r"(vsrc),
"g" (chromWidth)
1273 if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
1274 usrc += chromStride;
1275 vsrc += chromStride;
1291 int lumStride,
int chromStride,
int dstStride)
1302 int lumStride,
int chromStride,
int dstStride)
1312 int lumStride,
int chromStride,
int dstStride)
1323 int lumStride,
int chromStride,
int srcStride)
1327 for (y=0; y<
height; y+=2) {
1329 "xor %%"FF_REG_a
", %%"FF_REG_a
"\n\t" 1330 "pcmpeqw %%mm7, %%mm7 \n\t" 1331 "psrlw $8, %%mm7 \n\t" 1334 PREFETCH" 64(%0, %%"FF_REG_a
", 4) \n\t" 1335 "movq (%0, %%"FF_REG_a
", 4), %%mm0 \n\t" 1336 "movq 8(%0, %%"FF_REG_a
", 4), %%mm1 \n\t" 1337 "movq %%mm0, %%mm2 \n\t" 1338 "movq %%mm1, %%mm3 \n\t" 1339 "psrlw $8, %%mm0 \n\t" 1340 "psrlw $8, %%mm1 \n\t" 1341 "pand %%mm7, %%mm2 \n\t" 1342 "pand %%mm7, %%mm3 \n\t" 1343 "packuswb %%mm1, %%mm0 \n\t" 1344 "packuswb %%mm3, %%mm2 \n\t" 1346 MOVNTQ" %%mm2, (%1, %%"FF_REG_a
", 2) \n\t" 1348 "movq 16(%0, %%"FF_REG_a
", 4), %%mm1 \n\t" 1349 "movq 24(%0, %%"FF_REG_a
", 4), %%mm2 \n\t" 1350 "movq %%mm1, %%mm3 \n\t" 1351 "movq %%mm2, %%mm4 \n\t" 1352 "psrlw $8, %%mm1 \n\t" 1353 "psrlw $8, %%mm2 \n\t" 1354 "pand %%mm7, %%mm3 \n\t" 1355 "pand %%mm7, %%mm4 \n\t" 1356 "packuswb %%mm2, %%mm1 \n\t" 1357 "packuswb %%mm4, %%mm3 \n\t" 1359 MOVNTQ" %%mm3, 8(%1, %%"FF_REG_a
", 2) \n\t" 1361 "movq %%mm0, %%mm2 \n\t" 1362 "movq %%mm1, %%mm3 \n\t" 1363 "psrlw $8, %%mm0 \n\t" 1364 "psrlw $8, %%mm1 \n\t" 1365 "pand %%mm7, %%mm2 \n\t" 1366 "pand %%mm7, %%mm3 \n\t" 1367 "packuswb %%mm1, %%mm0 \n\t" 1368 "packuswb %%mm3, %%mm2 \n\t" 1370 MOVNTQ" %%mm0, (%3, %%"FF_REG_a
") \n\t" 1371 MOVNTQ" %%mm2, (%2, %%"FF_REG_a
") \n\t" 1373 "add $8, %%"FF_REG_a
" \n\t" 1374 "cmp %4, %%"FF_REG_a
" \n\t" 1376 ::
"r"(
src),
"r"(ydst),
"r"(udst),
"r"(vdst),
"g" (chromWidth)
1377 :
"memory",
"%"FF_REG_a
1384 "xor %%"FF_REG_a
", %%"FF_REG_a
"\n\t" 1387 PREFETCH" 64(%0, %%"FF_REG_a
", 4) \n\t" 1388 "movq (%0, %%"FF_REG_a
", 4), %%mm0 \n\t" 1389 "movq 8(%0, %%"FF_REG_a
", 4), %%mm1 \n\t" 1390 "movq 16(%0, %%"FF_REG_a
", 4), %%mm2 \n\t" 1391 "movq 24(%0, %%"FF_REG_a
", 4), %%mm3 \n\t" 1392 "pand %%mm7, %%mm0 \n\t" 1393 "pand %%mm7, %%mm1 \n\t" 1394 "pand %%mm7, %%mm2 \n\t" 1395 "pand %%mm7, %%mm3 \n\t" 1396 "packuswb %%mm1, %%mm0 \n\t" 1397 "packuswb %%mm3, %%mm2 \n\t" 1399 MOVNTQ" %%mm0, (%1, %%"FF_REG_a
", 2) \n\t" 1400 MOVNTQ" %%mm2, 8(%1, %%"FF_REG_a
", 2) \n\t" 1402 "add $8, %%"FF_REG_a
"\n\t" 1403 "cmp %4, %%"FF_REG_a
"\n\t" 1406 ::
"r"(
src),
"r"(ydst),
"r"(udst),
"r"(vdst),
"g" (chromWidth)
1407 :
"memory",
"%"FF_REG_a
1409 udst += chromStride;
1410 vdst += chromStride;
1414 __asm__
volatile(
EMMS" \n\t" 1420 #if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW 1428 for (x=0; x<srcWidth-1; x++) {
1429 dst[2*x+1]= (3*
src[x] +
src[x+1])>>2;
1430 dst[2*x+2]= (
src[x] + 3*
src[x+1])>>2;
1432 dst[2*srcWidth-1]=
src[srcWidth-1];
1436 for (y=1; y<srcHeight; y++) {
1437 x86_reg mmxSize= srcWidth&~15;
1441 "mov %4, %%"FF_REG_a
" \n\t" 1442 "movq "MANGLE(mmx_ff)
", %%mm0 \n\t" 1443 "movq (%0, %%"FF_REG_a
"), %%mm4 \n\t" 1444 "movq %%mm4, %%mm2 \n\t" 1445 "psllq $8, %%mm4 \n\t" 1446 "pand %%mm0, %%mm2 \n\t" 1447 "por %%mm2, %%mm4 \n\t" 1448 "movq (%1, %%"FF_REG_a
"), %%mm5 \n\t" 1449 "movq %%mm5, %%mm3 \n\t" 1450 "psllq $8, %%mm5 \n\t" 1451 "pand %%mm0, %%mm3 \n\t" 1452 "por %%mm3, %%mm5 \n\t" 1454 "movq (%0, %%"FF_REG_a
"), %%mm0 \n\t" 1455 "movq (%1, %%"FF_REG_a
"), %%mm1 \n\t" 1456 "movq 1(%0, %%"FF_REG_a
"), %%mm2 \n\t" 1457 "movq 1(%1, %%"FF_REG_a
"), %%mm3 \n\t" 1458 PAVGB" %%mm0, %%mm5 \n\t" 1459 PAVGB" %%mm0, %%mm3 \n\t" 1460 PAVGB" %%mm0, %%mm5 \n\t" 1461 PAVGB" %%mm0, %%mm3 \n\t" 1462 PAVGB" %%mm1, %%mm4 \n\t" 1463 PAVGB" %%mm1, %%mm2 \n\t" 1464 PAVGB" %%mm1, %%mm4 \n\t" 1465 PAVGB" %%mm1, %%mm2 \n\t" 1466 "movq %%mm5, %%mm7 \n\t" 1467 "movq %%mm4, %%mm6 \n\t" 1468 "punpcklbw %%mm3, %%mm5 \n\t" 1469 "punpckhbw %%mm3, %%mm7 \n\t" 1470 "punpcklbw %%mm2, %%mm4 \n\t" 1471 "punpckhbw %%mm2, %%mm6 \n\t" 1472 MOVNTQ" %%mm5, (%2, %%"FF_REG_a
", 2) \n\t" 1473 MOVNTQ" %%mm7, 8(%2, %%"FF_REG_a
", 2) \n\t" 1474 MOVNTQ" %%mm4, (%3, %%"FF_REG_a
", 2) \n\t" 1475 MOVNTQ" %%mm6, 8(%3, %%"FF_REG_a
", 2) \n\t" 1476 "add $8, %%"FF_REG_a
" \n\t" 1477 "movq -1(%0, %%"FF_REG_a
"), %%mm4 \n\t" 1478 "movq -1(%1, %%"FF_REG_a
"), %%mm5 \n\t" 1480 ::
"r" (
src + mmxSize ),
"r" (
src + srcStride + mmxSize ),
1481 "r" (dst + mmxSize*2),
"r" (dst + dstStride + mmxSize*2),
1488 dst[0] = (
src[0] * 3 +
src[srcStride]) >> 2;
1489 dst[dstStride] = (
src[0] + 3 *
src[srcStride]) >> 2;
1492 for (x=mmxSize-1; x<srcWidth-1; x++) {
1493 dst[2*x +1]= (3*
src[x+0] +
src[x+srcStride+1])>>2;
1494 dst[2*x+dstStride+2]= (
src[x+0] + 3*
src[x+srcStride+1])>>2;
1495 dst[2*x+dstStride+1]= (
src[x+1] + 3*
src[x+srcStride ])>>2;
1496 dst[2*x +2]= (3*
src[x+1] +
src[x+srcStride ])>>2;
1498 dst[srcWidth*2 -1 ]= (3*
src[srcWidth-1] +
src[srcWidth-1 + srcStride])>>2;
1499 dst[srcWidth*2 -1 + dstStride]= (
src[srcWidth-1] + 3*
src[srcWidth-1 + srcStride])>>2;
1508 for (x=0; x<srcWidth-1; x++) {
1509 dst[2*x+1]= (3*
src[x] +
src[x+1])>>2;
1510 dst[2*x+2]= (
src[x] + 3*
src[x+1])>>2;
1512 dst[2*srcWidth-1]=
src[srcWidth-1];
1514 __asm__
volatile(
EMMS" \n\t" 1520 #if !COMPILE_TEMPLATE_AMD3DNOW 1529 int lumStride,
int chromStride,
int srcStride)
1533 for (y=0; y<
height; y+=2) {
1535 "xor %%"FF_REG_a
", %%"FF_REG_a
" \n\t" 1536 "pcmpeqw %%mm7, %%mm7 \n\t" 1537 "psrlw $8, %%mm7 \n\t" 1540 PREFETCH" 64(%0, %%"FF_REG_a
", 4) \n\t" 1541 "movq (%0, %%"FF_REG_a
", 4), %%mm0 \n\t" 1542 "movq 8(%0, %%"FF_REG_a
", 4), %%mm1 \n\t" 1543 "movq %%mm0, %%mm2 \n\t" 1544 "movq %%mm1, %%mm3 \n\t" 1545 "pand %%mm7, %%mm0 \n\t" 1546 "pand %%mm7, %%mm1 \n\t" 1547 "psrlw $8, %%mm2 \n\t" 1548 "psrlw $8, %%mm3 \n\t" 1549 "packuswb %%mm1, %%mm0 \n\t" 1550 "packuswb %%mm3, %%mm2 \n\t" 1552 MOVNTQ" %%mm2, (%1, %%"FF_REG_a
", 2) \n\t" 1554 "movq 16(%0, %%"FF_REG_a
", 4), %%mm1 \n\t" 1555 "movq 24(%0, %%"FF_REG_a
", 4), %%mm2 \n\t" 1556 "movq %%mm1, %%mm3 \n\t" 1557 "movq %%mm2, %%mm4 \n\t" 1558 "pand %%mm7, %%mm1 \n\t" 1559 "pand %%mm7, %%mm2 \n\t" 1560 "psrlw $8, %%mm3 \n\t" 1561 "psrlw $8, %%mm4 \n\t" 1562 "packuswb %%mm2, %%mm1 \n\t" 1563 "packuswb %%mm4, %%mm3 \n\t" 1565 MOVNTQ" %%mm3, 8(%1, %%"FF_REG_a
", 2) \n\t" 1567 "movq %%mm0, %%mm2 \n\t" 1568 "movq %%mm1, %%mm3 \n\t" 1569 "psrlw $8, %%mm0 \n\t" 1570 "psrlw $8, %%mm1 \n\t" 1571 "pand %%mm7, %%mm2 \n\t" 1572 "pand %%mm7, %%mm3 \n\t" 1573 "packuswb %%mm1, %%mm0 \n\t" 1574 "packuswb %%mm3, %%mm2 \n\t" 1576 MOVNTQ" %%mm0, (%3, %%"FF_REG_a
") \n\t" 1577 MOVNTQ" %%mm2, (%2, %%"FF_REG_a
") \n\t" 1579 "add $8, %%"FF_REG_a
" \n\t" 1580 "cmp %4, %%"FF_REG_a
" \n\t" 1582 ::
"r"(
src),
"r"(ydst),
"r"(udst),
"r"(vdst),
"g" (chromWidth)
1583 :
"memory",
"%"FF_REG_a
1590 "xor %%"FF_REG_a
", %%"FF_REG_a
" \n\t" 1593 PREFETCH" 64(%0, %%"FF_REG_a
", 4) \n\t" 1594 "movq (%0, %%"FF_REG_a
", 4), %%mm0 \n\t" 1595 "movq 8(%0, %%"FF_REG_a
", 4), %%mm1 \n\t" 1596 "movq 16(%0, %%"FF_REG_a
", 4), %%mm2 \n\t" 1597 "movq 24(%0, %%"FF_REG_a
", 4), %%mm3 \n\t" 1598 "psrlw $8, %%mm0 \n\t" 1599 "psrlw $8, %%mm1 \n\t" 1600 "psrlw $8, %%mm2 \n\t" 1601 "psrlw $8, %%mm3 \n\t" 1602 "packuswb %%mm1, %%mm0 \n\t" 1603 "packuswb %%mm3, %%mm2 \n\t" 1605 MOVNTQ" %%mm0, (%1, %%"FF_REG_a
", 2) \n\t" 1606 MOVNTQ" %%mm2, 8(%1, %%"FF_REG_a
", 2) \n\t" 1608 "add $8, %%"FF_REG_a
" \n\t" 1609 "cmp %4, %%"FF_REG_a
" \n\t" 1612 ::
"r"(
src),
"r"(ydst),
"r"(udst),
"r"(vdst),
"g" (chromWidth)
1613 :
"memory",
"%"FF_REG_a
1615 udst += chromStride;
1616 vdst += chromStride;
1620 __asm__
volatile(
EMMS" \n\t" 1636 int lumStride,
int chromStride,
int srcStride,
1639 #define BGR2Y_IDX "16*4+16*32" 1640 #define BGR2U_IDX "16*4+16*33" 1641 #define BGR2V_IDX "16*4+16*34" 1648 ydst += 2*lumStride;
1649 udst += chromStride;
1650 vdst += chromStride;
1654 for (y=0; y<
height-2; y+=2) {
1656 for (i=0; i<2; i++) {
1658 "mov %2, %%"FF_REG_a
"\n\t" 1659 "movq "BGR2Y_IDX
"(%3), %%mm6 \n\t" 1660 "movq "MANGLE(ff_w1111)
", %%mm5 \n\t" 1661 "pxor %%mm7, %%mm7 \n\t" 1662 "lea (%%"FF_REG_a
", %%"FF_REG_a
", 2), %%"FF_REG_d
" \n\t" 1665 PREFETCH" 64(%0, %%"FF_REG_d
") \n\t" 1666 "movd (%0, %%"FF_REG_d
"), %%mm0 \n\t" 1667 "movd 3(%0, %%"FF_REG_d
"), %%mm1 \n\t" 1668 "punpcklbw %%mm7, %%mm0 \n\t" 1669 "punpcklbw %%mm7, %%mm1 \n\t" 1670 "movd 6(%0, %%"FF_REG_d
"), %%mm2 \n\t" 1671 "movd 9(%0, %%"FF_REG_d
"), %%mm3 \n\t" 1672 "punpcklbw %%mm7, %%mm2 \n\t" 1673 "punpcklbw %%mm7, %%mm3 \n\t" 1674 "pmaddwd %%mm6, %%mm0 \n\t" 1675 "pmaddwd %%mm6, %%mm1 \n\t" 1676 "pmaddwd %%mm6, %%mm2 \n\t" 1677 "pmaddwd %%mm6, %%mm3 \n\t" 1678 "psrad $8, %%mm0 \n\t" 1679 "psrad $8, %%mm1 \n\t" 1680 "psrad $8, %%mm2 \n\t" 1681 "psrad $8, %%mm3 \n\t" 1682 "packssdw %%mm1, %%mm0 \n\t" 1683 "packssdw %%mm3, %%mm2 \n\t" 1684 "pmaddwd %%mm5, %%mm0 \n\t" 1685 "pmaddwd %%mm5, %%mm2 \n\t" 1686 "packssdw %%mm2, %%mm0 \n\t" 1687 "psraw $7, %%mm0 \n\t" 1689 "movd 12(%0, %%"FF_REG_d
"), %%mm4 \n\t" 1690 "movd 15(%0, %%"FF_REG_d
"), %%mm1 \n\t" 1691 "punpcklbw %%mm7, %%mm4 \n\t" 1692 "punpcklbw %%mm7, %%mm1 \n\t" 1693 "movd 18(%0, %%"FF_REG_d
"), %%mm2 \n\t" 1694 "movd 21(%0, %%"FF_REG_d
"), %%mm3 \n\t" 1695 "punpcklbw %%mm7, %%mm2 \n\t" 1696 "punpcklbw %%mm7, %%mm3 \n\t" 1697 "pmaddwd %%mm6, %%mm4 \n\t" 1698 "pmaddwd %%mm6, %%mm1 \n\t" 1699 "pmaddwd %%mm6, %%mm2 \n\t" 1700 "pmaddwd %%mm6, %%mm3 \n\t" 1701 "psrad $8, %%mm4 \n\t" 1702 "psrad $8, %%mm1 \n\t" 1703 "psrad $8, %%mm2 \n\t" 1704 "psrad $8, %%mm3 \n\t" 1705 "packssdw %%mm1, %%mm4 \n\t" 1706 "packssdw %%mm3, %%mm2 \n\t" 1707 "pmaddwd %%mm5, %%mm4 \n\t" 1708 "pmaddwd %%mm5, %%mm2 \n\t" 1709 "add $24, %%"FF_REG_d
"\n\t" 1710 "packssdw %%mm2, %%mm4 \n\t" 1711 "psraw $7, %%mm4 \n\t" 1713 "packuswb %%mm4, %%mm0 \n\t" 1714 "paddusb "MANGLE(ff_bgr2YOffset)
", %%mm0 \n\t" 1716 MOVNTQ" %%mm0, (%1, %%"FF_REG_a
") \n\t" 1717 "add $8, %%"FF_REG_a
" \n\t" 1721 :
"%"FF_REG_a,
"%"FF_REG_d
1728 "mov %4, %%"FF_REG_a
"\n\t" 1729 "movq "MANGLE(ff_w1111)
", %%mm5 \n\t" 1730 "movq "BGR2U_IDX
"(%5), %%mm6 \n\t" 1731 "pxor %%mm7, %%mm7 \n\t" 1732 "lea (%%"FF_REG_a
", %%"FF_REG_a
", 2), %%"FF_REG_d
" \n\t" 1733 "add %%"FF_REG_d
", %%"FF_REG_d
"\n\t" 1736 PREFETCH" 64(%0, %%"FF_REG_d
") \n\t" 1737 PREFETCH" 64(%1, %%"FF_REG_d
") \n\t" 1738 #if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW 1739 "movq (%0, %%"FF_REG_d
"), %%mm0 \n\t" 1740 "movq (%1, %%"FF_REG_d
"), %%mm1 \n\t" 1741 "movq 6(%0, %%"FF_REG_d
"), %%mm2 \n\t" 1742 "movq 6(%1, %%"FF_REG_d
"), %%mm3 \n\t" 1743 PAVGB" %%mm1, %%mm0 \n\t" 1744 PAVGB" %%mm3, %%mm2 \n\t" 1745 "movq %%mm0, %%mm1 \n\t" 1746 "movq %%mm2, %%mm3 \n\t" 1747 "psrlq $24, %%mm0 \n\t" 1748 "psrlq $24, %%mm2 \n\t" 1749 PAVGB" %%mm1, %%mm0 \n\t" 1750 PAVGB" %%mm3, %%mm2 \n\t" 1751 "punpcklbw %%mm7, %%mm0 \n\t" 1752 "punpcklbw %%mm7, %%mm2 \n\t" 1754 "movd (%0, %%"FF_REG_d
"), %%mm0 \n\t" 1755 "movd (%1, %%"FF_REG_d
"), %%mm1 \n\t" 1756 "movd 3(%0, %%"FF_REG_d
"), %%mm2 \n\t" 1757 "movd 3(%1, %%"FF_REG_d
"), %%mm3 \n\t" 1758 "punpcklbw %%mm7, %%mm0 \n\t" 1759 "punpcklbw %%mm7, %%mm1 \n\t" 1760 "punpcklbw %%mm7, %%mm2 \n\t" 1761 "punpcklbw %%mm7, %%mm3 \n\t" 1762 "paddw %%mm1, %%mm0 \n\t" 1763 "paddw %%mm3, %%mm2 \n\t" 1764 "paddw %%mm2, %%mm0 \n\t" 1765 "movd 6(%0, %%"FF_REG_d
"), %%mm4 \n\t" 1766 "movd 6(%1, %%"FF_REG_d
"), %%mm1 \n\t" 1767 "movd 9(%0, %%"FF_REG_d
"), %%mm2 \n\t" 1768 "movd 9(%1, %%"FF_REG_d
"), %%mm3 \n\t" 1769 "punpcklbw %%mm7, %%mm4 \n\t" 1770 "punpcklbw %%mm7, %%mm1 \n\t" 1771 "punpcklbw %%mm7, %%mm2 \n\t" 1772 "punpcklbw %%mm7, %%mm3 \n\t" 1773 "paddw %%mm1, %%mm4 \n\t" 1774 "paddw %%mm3, %%mm2 \n\t" 1775 "paddw %%mm4, %%mm2 \n\t" 1776 "psrlw $2, %%mm0 \n\t" 1777 "psrlw $2, %%mm2 \n\t" 1779 "movq "BGR2V_IDX
"(%5), %%mm1 \n\t" 1780 "movq "BGR2V_IDX
"(%5), %%mm3 \n\t" 1782 "pmaddwd %%mm0, %%mm1 \n\t" 1783 "pmaddwd %%mm2, %%mm3 \n\t" 1784 "pmaddwd %%mm6, %%mm0 \n\t" 1785 "pmaddwd %%mm6, %%mm2 \n\t" 1786 "psrad $8, %%mm0 \n\t" 1787 "psrad $8, %%mm1 \n\t" 1788 "psrad $8, %%mm2 \n\t" 1789 "psrad $8, %%mm3 \n\t" 1790 "packssdw %%mm2, %%mm0 \n\t" 1791 "packssdw %%mm3, %%mm1 \n\t" 1792 "pmaddwd %%mm5, %%mm0 \n\t" 1793 "pmaddwd %%mm5, %%mm1 \n\t" 1794 "packssdw %%mm1, %%mm0 \n\t" 1795 "psraw $7, %%mm0 \n\t" 1797 #if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW 1798 "movq 12(%0, %%"FF_REG_d
"), %%mm4 \n\t" 1799 "movq 12(%1, %%"FF_REG_d
"), %%mm1 \n\t" 1800 "movq 18(%0, %%"FF_REG_d
"), %%mm2 \n\t" 1801 "movq 18(%1, %%"FF_REG_d
"), %%mm3 \n\t" 1802 PAVGB" %%mm1, %%mm4 \n\t" 1803 PAVGB" %%mm3, %%mm2 \n\t" 1804 "movq %%mm4, %%mm1 \n\t" 1805 "movq %%mm2, %%mm3 \n\t" 1806 "psrlq $24, %%mm4 \n\t" 1807 "psrlq $24, %%mm2 \n\t" 1808 PAVGB" %%mm1, %%mm4 \n\t" 1809 PAVGB" %%mm3, %%mm2 \n\t" 1810 "punpcklbw %%mm7, %%mm4 \n\t" 1811 "punpcklbw %%mm7, %%mm2 \n\t" 1813 "movd 12(%0, %%"FF_REG_d
"), %%mm4 \n\t" 1814 "movd 12(%1, %%"FF_REG_d
"), %%mm1 \n\t" 1815 "movd 15(%0, %%"FF_REG_d
"), %%mm2 \n\t" 1816 "movd 15(%1, %%"FF_REG_d
"), %%mm3 \n\t" 1817 "punpcklbw %%mm7, %%mm4 \n\t" 1818 "punpcklbw %%mm7, %%mm1 \n\t" 1819 "punpcklbw %%mm7, %%mm2 \n\t" 1820 "punpcklbw %%mm7, %%mm3 \n\t" 1821 "paddw %%mm1, %%mm4 \n\t" 1822 "paddw %%mm3, %%mm2 \n\t" 1823 "paddw %%mm2, %%mm4 \n\t" 1824 "movd 18(%0, %%"FF_REG_d
"), %%mm5 \n\t" 1825 "movd 18(%1, %%"FF_REG_d
"), %%mm1 \n\t" 1826 "movd 21(%0, %%"FF_REG_d
"), %%mm2 \n\t" 1827 "movd 21(%1, %%"FF_REG_d
"), %%mm3 \n\t" 1828 "punpcklbw %%mm7, %%mm5 \n\t" 1829 "punpcklbw %%mm7, %%mm1 \n\t" 1830 "punpcklbw %%mm7, %%mm2 \n\t" 1831 "punpcklbw %%mm7, %%mm3 \n\t" 1832 "paddw %%mm1, %%mm5 \n\t" 1833 "paddw %%mm3, %%mm2 \n\t" 1834 "paddw %%mm5, %%mm2 \n\t" 1835 "movq "MANGLE(ff_w1111)
", %%mm5 \n\t" 1836 "psrlw $2, %%mm4 \n\t" 1837 "psrlw $2, %%mm2 \n\t" 1839 "movq "BGR2V_IDX
"(%5), %%mm1 \n\t" 1840 "movq "BGR2V_IDX
"(%5), %%mm3 \n\t" 1842 "pmaddwd %%mm4, %%mm1 \n\t" 1843 "pmaddwd %%mm2, %%mm3 \n\t" 1844 "pmaddwd %%mm6, %%mm4 \n\t" 1845 "pmaddwd %%mm6, %%mm2 \n\t" 1846 "psrad $8, %%mm4 \n\t" 1847 "psrad $8, %%mm1 \n\t" 1848 "psrad $8, %%mm2 \n\t" 1849 "psrad $8, %%mm3 \n\t" 1850 "packssdw %%mm2, %%mm4 \n\t" 1851 "packssdw %%mm3, %%mm1 \n\t" 1852 "pmaddwd %%mm5, %%mm4 \n\t" 1853 "pmaddwd %%mm5, %%mm1 \n\t" 1854 "add $24, %%"FF_REG_d
"\n\t" 1855 "packssdw %%mm1, %%mm4 \n\t" 1856 "psraw $7, %%mm4 \n\t" 1858 "movq %%mm0, %%mm1 \n\t" 1859 "punpckldq %%mm4, %%mm0 \n\t" 1860 "punpckhdq %%mm4, %%mm1 \n\t" 1861 "packsswb %%mm1, %%mm0 \n\t" 1862 "paddb "MANGLE(ff_bgr2UVOffset)
", %%mm0 \n\t" 1863 "movd %%mm0, (%2, %%"FF_REG_a
") \n\t" 1864 "punpckhdq %%mm0, %%mm0 \n\t" 1865 "movd %%mm0, (%3, %%"FF_REG_a
") \n\t" 1866 "add $4, %%"FF_REG_a
" \n\t" 1868 : :
"r" (
src+chromWidth*6),
"r" (
src+srcStride+chromWidth*6),
"r" (udst+chromWidth),
"r" (vdst+chromWidth),
"g" (-chromWidth),
"r"(
rgb2yuv)
1870 :
"%"FF_REG_a,
"%"FF_REG_d
1873 udst += chromStride;
1874 vdst += chromStride;
1878 __asm__
volatile(
EMMS" \n\t" 1887 #if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX 1890 int src2Stride,
int dstStride)
1894 for (h=0; h <
height; h++) {
1898 #if COMPILE_TEMPLATE_SSE2 1899 if (!((((intptr_t)
src1) | ((intptr_t)src2) | ((intptr_t)dest))&15)) {
1901 "xor %%"FF_REG_a
", %%"FF_REG_a
" \n\t" 1903 PREFETCH" 64(%1, %%"FF_REG_a
") \n\t" 1904 PREFETCH" 64(%2, %%"FF_REG_a
") \n\t" 1905 "movdqa (%1, %%"FF_REG_a
"), %%xmm0 \n\t" 1906 "movdqa (%1, %%"FF_REG_a
"), %%xmm1 \n\t" 1907 "movdqa (%2, %%"FF_REG_a
"), %%xmm2 \n\t" 1908 "punpcklbw %%xmm2, %%xmm0 \n\t" 1909 "punpckhbw %%xmm2, %%xmm1 \n\t" 1910 "movntdq %%xmm0, (%0, %%"FF_REG_a
", 2) \n\t" 1911 "movntdq %%xmm1, 16(%0, %%"FF_REG_a
", 2) \n\t" 1912 "add $16, %%"FF_REG_a
" \n\t" 1913 "cmp %3, %%"FF_REG_a
" \n\t" 1915 ::
"r"(dest),
"r"(src1),
"r"(src2),
"r" ((
x86_reg)
width-15)
1916 :
"memory",
XMM_CLOBBERS(
"xmm0",
"xmm1",
"xmm2",)
"%"FF_REG_a
1921 "xor %%"FF_REG_a
", %%"FF_REG_a
" \n\t" 1923 PREFETCH" 64(%1, %%"FF_REG_a
") \n\t" 1924 PREFETCH" 64(%2, %%"FF_REG_a
") \n\t" 1925 "movq (%1, %%"FF_REG_a
"), %%mm0 \n\t" 1926 "movq 8(%1, %%"FF_REG_a
"), %%mm2 \n\t" 1927 "movq %%mm0, %%mm1 \n\t" 1928 "movq %%mm2, %%mm3 \n\t" 1929 "movq (%2, %%"FF_REG_a
"), %%mm4 \n\t" 1930 "movq 8(%2, %%"FF_REG_a
"), %%mm5 \n\t" 1931 "punpcklbw %%mm4, %%mm0 \n\t" 1932 "punpckhbw %%mm4, %%mm1 \n\t" 1933 "punpcklbw %%mm5, %%mm2 \n\t" 1934 "punpckhbw %%mm5, %%mm3 \n\t" 1935 MOVNTQ" %%mm0, (%0, %%"FF_REG_a
", 2) \n\t" 1936 MOVNTQ" %%mm1, 8(%0, %%"FF_REG_a
", 2) \n\t" 1937 MOVNTQ" %%mm2, 16(%0, %%"FF_REG_a
", 2) \n\t" 1938 MOVNTQ" %%mm3, 24(%0, %%"FF_REG_a
", 2) \n\t" 1939 "add $16, %%"FF_REG_a
" \n\t" 1940 "cmp %3, %%"FF_REG_a
" \n\t" 1942 ::
"r"(dest),
"r"(src1),
"r"(src2),
"r" ((
x86_reg)
width-15)
1943 :
"memory",
"%"FF_REG_a
1948 dest[2*w+0] =
src1[
w];
1949 dest[2*w+1] = src2[
w];
1963 #if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL 1964 #if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM 1973 int dst1Stride,
int dst2Stride)
1977 for (h = 0; h <
height; h++) {
1984 #
if !COMPILE_TEMPLATE_SSE2
1994 #if !COMPILE_TEMPLATE_SSE2 1995 #if !COMPILE_TEMPLATE_AMD3DNOW 1999 int srcStride1,
int srcStride2,
2000 int dstStride1,
int dstStride2)
2008 ::
"m"(*(
src1+srcStride1)),
"m"(*(src2+srcStride2)):
"memory");
2013 for (;x<w-31;x+=32) {
2016 "movq (%1,%2), %%mm0 \n\t" 2017 "movq 8(%1,%2), %%mm2 \n\t" 2018 "movq 16(%1,%2), %%mm4 \n\t" 2019 "movq 24(%1,%2), %%mm6 \n\t" 2020 "movq %%mm0, %%mm1 \n\t" 2021 "movq %%mm2, %%mm3 \n\t" 2022 "movq %%mm4, %%mm5 \n\t" 2023 "movq %%mm6, %%mm7 \n\t" 2024 "punpcklbw %%mm0, %%mm0 \n\t" 2025 "punpckhbw %%mm1, %%mm1 \n\t" 2026 "punpcklbw %%mm2, %%mm2 \n\t" 2027 "punpckhbw %%mm3, %%mm3 \n\t" 2028 "punpcklbw %%mm4, %%mm4 \n\t" 2029 "punpckhbw %%mm5, %%mm5 \n\t" 2030 "punpcklbw %%mm6, %%mm6 \n\t" 2031 "punpckhbw %%mm7, %%mm7 \n\t" 2032 MOVNTQ" %%mm0, (%0,%2,2) \n\t" 2033 MOVNTQ" %%mm1, 8(%0,%2,2) \n\t" 2034 MOVNTQ" %%mm2, 16(%0,%2,2) \n\t" 2035 MOVNTQ" %%mm3, 24(%0,%2,2) \n\t" 2036 MOVNTQ" %%mm4, 32(%0,%2,2) \n\t" 2037 MOVNTQ" %%mm5, 40(%0,%2,2) \n\t" 2038 MOVNTQ" %%mm6, 48(%0,%2,2) \n\t" 2039 MOVNTQ" %%mm7, 56(%0,%2,2)" 2040 ::
"r"(d),
"r"(s1),
"r"(x)
2043 for (;x<
w;x++) d[2*x]=d[2*x+1]=s1[x];
2046 const uint8_t*
s2=src2+srcStride2*(y>>1);
2049 for (;x<w-31;x+=32) {
2052 "movq (%1,%2), %%mm0 \n\t" 2053 "movq 8(%1,%2), %%mm2 \n\t" 2054 "movq 16(%1,%2), %%mm4 \n\t" 2055 "movq 24(%1,%2), %%mm6 \n\t" 2056 "movq %%mm0, %%mm1 \n\t" 2057 "movq %%mm2, %%mm3 \n\t" 2058 "movq %%mm4, %%mm5 \n\t" 2059 "movq %%mm6, %%mm7 \n\t" 2060 "punpcklbw %%mm0, %%mm0 \n\t" 2061 "punpckhbw %%mm1, %%mm1 \n\t" 2062 "punpcklbw %%mm2, %%mm2 \n\t" 2063 "punpckhbw %%mm3, %%mm3 \n\t" 2064 "punpcklbw %%mm4, %%mm4 \n\t" 2065 "punpckhbw %%mm5, %%mm5 \n\t" 2066 "punpcklbw %%mm6, %%mm6 \n\t" 2067 "punpckhbw %%mm7, %%mm7 \n\t" 2068 MOVNTQ" %%mm0, (%0,%2,2) \n\t" 2069 MOVNTQ" %%mm1, 8(%0,%2,2) \n\t" 2070 MOVNTQ" %%mm2, 16(%0,%2,2) \n\t" 2071 MOVNTQ" %%mm3, 24(%0,%2,2) \n\t" 2072 MOVNTQ" %%mm4, 32(%0,%2,2) \n\t" 2073 MOVNTQ" %%mm5, 40(%0,%2,2) \n\t" 2074 MOVNTQ" %%mm6, 48(%0,%2,2) \n\t" 2075 MOVNTQ" %%mm7, 56(%0,%2,2)" 2076 ::
"r"(d),
"r"(s2),
"r"(x)
2079 for (;x<
w;x++) d[2*x]=d[2*x+1]=s2[x];
2091 int srcStride1,
int srcStride2,
2092 int srcStride3,
int dstStride)
2099 const uint8_t* up=src2+srcStride2*(y>>2);
2100 const uint8_t* vp=src3+srcStride3*(y>>2);
2108 "movq (%1, %0, 4), %%mm0 \n\t" 2109 "movq (%2, %0), %%mm1 \n\t" 2110 "movq (%3, %0), %%mm2 \n\t" 2111 "movq %%mm0, %%mm3 \n\t" 2112 "movq %%mm1, %%mm4 \n\t" 2113 "movq %%mm2, %%mm5 \n\t" 2114 "punpcklbw %%mm1, %%mm1 \n\t" 2115 "punpcklbw %%mm2, %%mm2 \n\t" 2116 "punpckhbw %%mm4, %%mm4 \n\t" 2117 "punpckhbw %%mm5, %%mm5 \n\t" 2119 "movq %%mm1, %%mm6 \n\t" 2120 "punpcklbw %%mm2, %%mm1 \n\t" 2121 "punpcklbw %%mm1, %%mm0 \n\t" 2122 "punpckhbw %%mm1, %%mm3 \n\t" 2123 MOVNTQ" %%mm0, (%4, %0, 8) \n\t" 2124 MOVNTQ" %%mm3, 8(%4, %0, 8) \n\t" 2126 "punpckhbw %%mm2, %%mm6 \n\t" 2127 "movq 8(%1, %0, 4), %%mm0 \n\t" 2128 "movq %%mm0, %%mm3 \n\t" 2129 "punpcklbw %%mm6, %%mm0 \n\t" 2130 "punpckhbw %%mm6, %%mm3 \n\t" 2131 MOVNTQ" %%mm0, 16(%4, %0, 8) \n\t" 2132 MOVNTQ" %%mm3, 24(%4, %0, 8) \n\t" 2134 "movq %%mm4, %%mm6 \n\t" 2135 "movq 16(%1, %0, 4), %%mm0 \n\t" 2136 "movq %%mm0, %%mm3 \n\t" 2137 "punpcklbw %%mm5, %%mm4 \n\t" 2138 "punpcklbw %%mm4, %%mm0 \n\t" 2139 "punpckhbw %%mm4, %%mm3 \n\t" 2140 MOVNTQ" %%mm0, 32(%4, %0, 8) \n\t" 2141 MOVNTQ" %%mm3, 40(%4, %0, 8) \n\t" 2143 "punpckhbw %%mm5, %%mm6 \n\t" 2144 "movq 24(%1, %0, 4), %%mm0 \n\t" 2145 "movq %%mm0, %%mm3 \n\t" 2146 "punpcklbw %%mm6, %%mm0 \n\t" 2147 "punpckhbw %%mm6, %%mm3 \n\t" 2148 MOVNTQ" %%mm0, 48(%4, %0, 8) \n\t" 2149 MOVNTQ" %%mm3, 56(%4, %0, 8) \n\t" 2152 :
"r"(yp),
"r" (up),
"r"(vp),
"r"(d)
2156 const int x2 = x<<2;
2159 d[8*x+2] = yp[x2+1];
2161 d[8*x+4] = yp[x2+2];
2163 d[8*x+6] = yp[x2+3];
2184 "pcmpeqw %%mm7, %%mm7 \n\t" 2185 "psrlw $8, %%mm7 \n\t" 2187 "movq -30(%1, %0, 2), %%mm0 \n\t" 2188 "movq -22(%1, %0, 2), %%mm1 \n\t" 2189 "movq -14(%1, %0, 2), %%mm2 \n\t" 2190 "movq -6(%1, %0, 2), %%mm3 \n\t" 2191 "pand %%mm7, %%mm0 \n\t" 2192 "pand %%mm7, %%mm1 \n\t" 2193 "pand %%mm7, %%mm2 \n\t" 2194 "pand %%mm7, %%mm3 \n\t" 2195 "packuswb %%mm1, %%mm0 \n\t" 2196 "packuswb %%mm3, %%mm2 \n\t" 2197 MOVNTQ" %%mm0,-15(%2, %0) \n\t" 2198 MOVNTQ" %%mm2,- 7(%2, %0) \n\t" 2202 :
"r"(
src),
"r"(dst)
2222 "pcmpeqw %%mm7, %%mm7 \n\t" 2223 "psrlw $8, %%mm7 \n\t" 2225 "movq -32(%1, %0, 2), %%mm0 \n\t" 2226 "movq -24(%1, %0, 2), %%mm1 \n\t" 2227 "movq -16(%1, %0, 2), %%mm2 \n\t" 2228 "movq -8(%1, %0, 2), %%mm3 \n\t" 2229 "pand %%mm7, %%mm0 \n\t" 2230 "pand %%mm7, %%mm1 \n\t" 2231 "pand %%mm7, %%mm2 \n\t" 2232 "pand %%mm7, %%mm3 \n\t" 2233 "packuswb %%mm1, %%mm0 \n\t" 2234 "packuswb %%mm3, %%mm2 \n\t" 2235 MOVNTQ" %%mm0,-16(%2, %0) \n\t" 2236 MOVNTQ" %%mm2,- 8(%2, %0) \n\t" 2240 :
"r"(
src),
"r"(dst)
2250 #if !COMPILE_TEMPLATE_AMD3DNOW 2260 "pcmpeqw %%mm7, %%mm7 \n\t" 2261 "psrlw $8, %%mm7 \n\t" 2263 "movq -28(%1, %0, 4), %%mm0 \n\t" 2264 "movq -20(%1, %0, 4), %%mm1 \n\t" 2265 "movq -12(%1, %0, 4), %%mm2 \n\t" 2266 "movq -4(%1, %0, 4), %%mm3 \n\t" 2267 "pand %%mm7, %%mm0 \n\t" 2268 "pand %%mm7, %%mm1 \n\t" 2269 "pand %%mm7, %%mm2 \n\t" 2270 "pand %%mm7, %%mm3 \n\t" 2271 "packuswb %%mm1, %%mm0 \n\t" 2272 "packuswb %%mm3, %%mm2 \n\t" 2273 "movq %%mm0, %%mm1 \n\t" 2274 "movq %%mm2, %%mm3 \n\t" 2275 "psrlw $8, %%mm0 \n\t" 2276 "psrlw $8, %%mm2 \n\t" 2277 "pand %%mm7, %%mm1 \n\t" 2278 "pand %%mm7, %%mm3 \n\t" 2279 "packuswb %%mm2, %%mm0 \n\t" 2280 "packuswb %%mm3, %%mm1 \n\t" 2281 MOVNTQ" %%mm0,- 7(%3, %0) \n\t" 2282 MOVNTQ" %%mm1,- 7(%2, %0) \n\t" 2286 :
"r"(
src),
"r"(dst0),
"r"(dst1)
2309 "pcmpeqw %%mm7, %%mm7 \n\t" 2310 "psrlw $8, %%mm7 \n\t" 2312 "movq -28(%1, %0, 4), %%mm0 \n\t" 2313 "movq -20(%1, %0, 4), %%mm1 \n\t" 2314 "movq -12(%1, %0, 4), %%mm2 \n\t" 2315 "movq -4(%1, %0, 4), %%mm3 \n\t" 2316 PAVGB" -28(%2, %0, 4), %%mm0 \n\t" 2317 PAVGB" -20(%2, %0, 4), %%mm1 \n\t" 2318 PAVGB" -12(%2, %0, 4), %%mm2 \n\t" 2319 PAVGB" - 4(%2, %0, 4), %%mm3 \n\t" 2320 "pand %%mm7, %%mm0 \n\t" 2321 "pand %%mm7, %%mm1 \n\t" 2322 "pand %%mm7, %%mm2 \n\t" 2323 "pand %%mm7, %%mm3 \n\t" 2324 "packuswb %%mm1, %%mm0 \n\t" 2325 "packuswb %%mm3, %%mm2 \n\t" 2326 "movq %%mm0, %%mm1 \n\t" 2327 "movq %%mm2, %%mm3 \n\t" 2328 "psrlw $8, %%mm0 \n\t" 2329 "psrlw $8, %%mm2 \n\t" 2330 "pand %%mm7, %%mm1 \n\t" 2331 "pand %%mm7, %%mm3 \n\t" 2332 "packuswb %%mm2, %%mm0 \n\t" 2333 "packuswb %%mm3, %%mm1 \n\t" 2334 MOVNTQ" %%mm0,- 7(%4, %0) \n\t" 2335 MOVNTQ" %%mm1,- 7(%3, %0) \n\t" 2339 :
"r"(
src0),
"r"(
src1),
"r"(dst0),
"r"(dst1)
2351 #if !COMPILE_TEMPLATE_AMD3DNOW 2361 "pcmpeqw %%mm7, %%mm7 \n\t" 2362 "psrlw $8, %%mm7 \n\t" 2364 "movq -28(%1, %0, 4), %%mm0 \n\t" 2365 "movq -20(%1, %0, 4), %%mm1 \n\t" 2366 "movq -12(%1, %0, 4), %%mm2 \n\t" 2367 "movq -4(%1, %0, 4), %%mm3 \n\t" 2368 "psrlw $8, %%mm0 \n\t" 2369 "psrlw $8, %%mm1 \n\t" 2370 "psrlw $8, %%mm2 \n\t" 2371 "psrlw $8, %%mm3 \n\t" 2372 "packuswb %%mm1, %%mm0 \n\t" 2373 "packuswb %%mm3, %%mm2 \n\t" 2374 "movq %%mm0, %%mm1 \n\t" 2375 "movq %%mm2, %%mm3 \n\t" 2376 "psrlw $8, %%mm0 \n\t" 2377 "psrlw $8, %%mm2 \n\t" 2378 "pand %%mm7, %%mm1 \n\t" 2379 "pand %%mm7, %%mm3 \n\t" 2380 "packuswb %%mm2, %%mm0 \n\t" 2381 "packuswb %%mm3, %%mm1 \n\t" 2382 MOVNTQ" %%mm0,- 7(%3, %0) \n\t" 2383 MOVNTQ" %%mm1,- 7(%2, %0) \n\t" 2387 :
"r"(
src),
"r"(dst0),
"r"(dst1)
2411 "pcmpeqw %%mm7, %%mm7 \n\t" 2412 "psrlw $8, %%mm7 \n\t" 2414 "movq -28(%1, %0, 4), %%mm0 \n\t" 2415 "movq -20(%1, %0, 4), %%mm1 \n\t" 2416 "movq -12(%1, %0, 4), %%mm2 \n\t" 2417 "movq -4(%1, %0, 4), %%mm3 \n\t" 2418 PAVGB" -28(%2, %0, 4), %%mm0 \n\t" 2419 PAVGB" -20(%2, %0, 4), %%mm1 \n\t" 2420 PAVGB" -12(%2, %0, 4), %%mm2 \n\t" 2421 PAVGB" - 4(%2, %0, 4), %%mm3 \n\t" 2422 "psrlw $8, %%mm0 \n\t" 2423 "psrlw $8, %%mm1 \n\t" 2424 "psrlw $8, %%mm2 \n\t" 2425 "psrlw $8, %%mm3 \n\t" 2426 "packuswb %%mm1, %%mm0 \n\t" 2427 "packuswb %%mm3, %%mm2 \n\t" 2428 "movq %%mm0, %%mm1 \n\t" 2429 "movq %%mm2, %%mm3 \n\t" 2430 "psrlw $8, %%mm0 \n\t" 2431 "psrlw $8, %%mm2 \n\t" 2432 "pand %%mm7, %%mm1 \n\t" 2433 "pand %%mm7, %%mm3 \n\t" 2434 "packuswb %%mm2, %%mm0 \n\t" 2435 "packuswb %%mm3, %%mm1 \n\t" 2436 MOVNTQ" %%mm0,- 7(%4, %0) \n\t" 2437 MOVNTQ" %%mm1,- 7(%3, %0) \n\t" 2441 :
"r"(
src0),
"r"(
src1),
"r"(dst0),
"r"(dst1)
2457 int lumStride,
int chromStride,
int srcStride)
2462 for (y=0; y<
height; y++) {
2480 #if !COMPILE_TEMPLATE_AMD3DNOW 2483 int lumStride,
int chromStride,
int srcStride)
2488 for (y=0; y<
height; y++) {
2507 int lumStride,
int chromStride,
int srcStride)
2512 for (y=0; y<
height; y++) {
2530 #if !COMPILE_TEMPLATE_AMD3DNOW 2533 int lumStride,
int chromStride,
int srcStride)
2538 for (y=0; y<
height; y++) {
2558 #if !COMPILE_TEMPLATE_SSE2 2559 #if !COMPILE_TEMPLATE_AMD3DNOW 2589 #if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW 2600 #if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX 2603 #if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL 2604 #if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM static void RENAME() rgb32tobgr15(const uint8_t *src, uint8_t *dst, int src_size)
static void RENAME() yuyvtoyuv422(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, int width, int height, int lumStride, int chromStride, int srcStride)
static void RENAME() vu9_to_vu12(const uint8_t *src1, const uint8_t *src2, uint8_t *dst1, uint8_t *dst2, int width, int height, int srcStride1, int srcStride2, int dstStride1, int dstStride2)
static void RENAME() uyvytoyuv422(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, int width, int height, int lumStride, int chromStride, int srcStride)
static void RENAME() rgb16tobgr24(const uint8_t *src, uint8_t *dst, int src_size)
static void fn() rgb2yuv(uint8_t *_yuv[3], const ptrdiff_t yuv_stride[3], int16_t *rgb[3], ptrdiff_t s, int w, int h, const int16_t rgb2yuv_coeffs[3][3][8], const int16_t yuv_offset[8])
static void RENAME() rgb32tobgr16(const uint8_t *src, uint8_t *dst, int src_size)
static void RENAME() rgb24tobgr24(const uint8_t *src, uint8_t *dst, int src_size)
static void RENAME() yuv422ptoyuy2(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, int width, int height, int lumStride, int chromStride, int dstStride)
Width should be a multiple of 16.
static void RENAME() extract_even2(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
static void RENAME() rgb15to32(const uint8_t *src, uint8_t *dst, int src_size)
static void RENAME() uyvytoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int width, int height, int lumStride, int chromStride, int srcStride)
Height should be a multiple of 2 and width should be a multiple of 16.
Macro definitions for various function/variable attributes.
static void RENAME() yuy2toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int width, int height, int lumStride, int chromStride, int srcStride)
Height should be a multiple of 2 and width should be a multiple of 16.
static av_cold int end(AVCodecContext *avctx)
void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int width, int height, int lumStride, int chromStride, int srcStride, int32_t *rgb2yuv)
Height should be a multiple of 2 and width should be a multiple of 2.
static void RENAME() rgb24tobgr15(const uint8_t *src, uint8_t *dst, int src_size)
static void RENAME() extract_odd2avg(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
static void RENAME() rgb24tobgr16(const uint8_t *src, uint8_t *dst, int src_size)
static void RENAME() uyvytoyuv420(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, int width, int height, int lumStride, int chromStride, int srcStride)
static void RENAME() shuffle_bytes_2103(const uint8_t *src, uint8_t *dst, int src_size)
static void RENAME() yv12touyvy(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, int width, int height, int lumStride, int chromStride, int dstStride)
Height should be a multiple of 2 and width should be a multiple of 16 (If this is a problem for anyon...
static av_cold void RENAME() rgb2rgb_init(void)
static void RENAME() yuvPlanartouyvy(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, int width, int height, int lumStride, int chromStride, int dstStride, int vertLumPerChroma)
void(* ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int width, int height, int lumStride, int chromStride, int srcStride, int32_t *rgb2yuv)
Height should be a multiple of 2 and width should be a multiple of 2.
static void RENAME() rgb24to15(const uint8_t *src, uint8_t *dst, int src_size)
static void RENAME() rgb15to16(const uint8_t *src, uint8_t *dst, int src_size)
static void RENAME() rgb16to15(const uint8_t *src, uint8_t *dst, int src_size)
#define XMM_CLOBBERS(...)
static void RENAME() extract_odd(const uint8_t *src, uint8_t *dst, x86_reg count)
static void RENAME() rgb32to16(const uint8_t *src, uint8_t *dst, int src_size)
static void RENAME() extract_even2avg(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
static void RENAME() yuv422ptouyvy(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, int width, int height, int lumStride, int chromStride, int dstStride)
Width should be a multiple of 16.
void(* deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t *dst2, int width, int height, int srcStride, int dst1Stride, int dst2Stride)
typedef void(RENAME(mix_any_func_type))
static void RENAME() yuvPlanartoyuy2(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, int width, int height, int lumStride, int chromStride, int dstStride, int vertLumPerChroma)
static void RENAME() rgb16to32(const uint8_t *src, uint8_t *dst, int src_size)
static void RENAME() rgb15tobgr24(const uint8_t *src, uint8_t *dst, int src_size)
static void RENAME() extract_even(const uint8_t *src, uint8_t *dst, x86_reg count)
static void RENAME() rgb32tobgr24(const uint8_t *src, uint8_t *dst, int src_size)
static void RENAME() yuyvtoyuv420(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, int width, int height, int lumStride, int chromStride, int srcStride)
static void RENAME() yvu9_to_yuy2(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, uint8_t *dst, int width, int height, int srcStride1, int srcStride2, int srcStride3, int dstStride)
static void RENAME() extract_odd2(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
static void RENAME() rgb24tobgr32(const uint8_t *src, uint8_t *dst, int src_size)
static void RENAME() yv12toyuy2(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, int width, int height, int lumStride, int chromStride, int dstStride)
Height should be a multiple of 2 and width should be a multiple of 16.
void(* planar2x)(const uint8_t *src, uint8_t *dst, int width, int height, int srcStride, int dstStride)
static void RENAME() rgb24to16(const uint8_t *src, uint8_t *dst, int src_size)
static void RENAME() rgb32to15(const uint8_t *src, uint8_t *dst, int src_size)
static void RENAME() interleaveBytes(const uint8_t *src1, const uint8_t *src2, uint8_t *dest, int width, int height, int src1Stride, int src2Stride, int dstStride)
Height should be a multiple of 2 and width should be a multiple of 2.
#define NAMED_CONSTRAINTS_ADD(...)
#define AV_CEIL_RSHIFT(a, b)