33 #if COMPILE_TEMPLATE_MMXEXT 34 #define MOVNTQ "movntq" 35 #define SFENCE "sfence" 38 #define SFENCE " # nop" 46 #define YUV2RGB_LOOP(depth) \ 47 h_size = (c->dstW + 7) & ~7; \ 48 if (h_size * depth > FFABS(dstStride[0])) \ 51 vshift = c->srcFormat != AV_PIX_FMT_YUV422P; \ 53 __asm__ volatile ("pxor %mm4, %mm4\n\t"); \ 54 for (y = 0; y < srcSliceH; y++) { \ 55 uint8_t *image = dst[0] + (y + srcSliceY) * dstStride[0]; \ 56 const uint8_t *py = src[0] + y * srcStride[0]; \ 57 const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1]; \ 58 const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \ 59 x86_reg index = -h_size / 2; \ 61 #define YUV2RGB_INITIAL_LOAD \ 63 "movq (%5, %0, 2), %%mm6\n\t" \ 64 "movd (%2, %0), %%mm0\n\t" \ 65 "movd (%3, %0), %%mm1\n\t" \ 86 "movq %%mm6, %%mm7\n\t" \ 87 "punpcklbw %%mm4, %%mm0\n\t" \ 88 "punpcklbw %%mm4, %%mm1\n\t" \ 89 "pand "MANGLE(mmx_00ffw)", %%mm6\n\t" \ 90 "psrlw $8, %%mm7\n\t" \ 91 "psllw $3, %%mm0\n\t" \ 92 "psllw $3, %%mm1\n\t" \ 93 "psllw $3, %%mm6\n\t" \ 94 "psllw $3, %%mm7\n\t" \ 95 "psubsw "U_OFFSET"(%4), %%mm0\n\t" \ 96 "psubsw "V_OFFSET"(%4), %%mm1\n\t" \ 97 "psubw "Y_OFFSET"(%4), %%mm6\n\t" \ 98 "psubw "Y_OFFSET"(%4), %%mm7\n\t" \ 101 "movq %%mm0, %%mm2\n\t" \ 102 "movq %%mm1, %%mm3\n\t" \ 103 "pmulhw "UG_COEFF"(%4), %%mm2\n\t" \ 104 "pmulhw "VG_COEFF"(%4), %%mm3\n\t" \ 105 "pmulhw "Y_COEFF" (%4), %%mm6\n\t" \ 106 "pmulhw "Y_COEFF" (%4), %%mm7\n\t" \ 107 "pmulhw "UB_COEFF"(%4), %%mm0\n\t" \ 108 "pmulhw "VR_COEFF"(%4), %%mm1\n\t" \ 109 "paddsw %%mm3, %%mm2\n\t" \ 114 "movq %%mm7, %%mm3\n\t" \ 115 "movq %%mm7, %%mm5\n\t" \ 116 "paddsw %%mm0, %%mm3\n\t" \ 117 "paddsw %%mm1, %%mm5\n\t" \ 118 "paddsw %%mm2, %%mm7\n\t" \ 119 "paddsw %%mm6, %%mm0\n\t" \ 120 "paddsw %%mm6, %%mm1\n\t" \ 121 "paddsw %%mm6, %%mm2\n\t" \ 123 #define RGB_PACK_INTERLEAVE \ 125 "packuswb %%mm1, %%mm0\n\t" \ 126 "packuswb %%mm5, %%mm3\n\t" \ 127 "packuswb %%mm2, %%mm2\n\t" \ 128 "movq %%mm0, %%mm1\n\n" \ 129 "packuswb %%mm7, %%mm7\n\t" \ 130 "punpcklbw %%mm3, %%mm0\n\t" \ 131 "punpckhbw %%mm3, %%mm1\n\t" \ 132 "punpcklbw %%mm7, %%mm2\n\t" \ 134 #define YUV2RGB_ENDLOOP(depth) \ 135 "movq 8 (%5, %0, 2), %%mm6\n\t" \ 136 "movd 4 (%3, %0), %%mm1\n\t" \ 137 "movd 4 (%2, %0), %%mm0\n\t" \ 138 "add $"AV_STRINGIFY(depth * 8)", %1\n\t" \ 142 #if COMPILE_TEMPLATE_MMXEXT 143 #undef RGB_PACK24_B_OPERANDS 144 #define RGB_PACK24_B_OPERANDS NAMED_CONSTRAINTS_ARRAY_ADD(mask1101,mask0110,mask0100,mask0010,mask1001) 146 #undef RGB_PACK24_B_OPERANDS 147 #define RGB_PACK24_B_OPERANDS 150 #define YUV2RGB_OPERANDS \ 151 : "+r" (index), "+r" (image) \ 152 : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \ 154 NAMED_CONSTRAINTS_ADD(mmx_00ffw,pb_03,pb_07,mmx_redmask,pb_e0) \ 155 RGB_PACK24_B_OPERANDS \ 160 #define YUV2RGB_OPERANDS_ALPHA \ 161 : "+r" (index), "+r" (image) \ 162 : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \ 163 "r" (py - 2*index), "r" (pa - 2*index) \ 164 NAMED_CONSTRAINTS_ADD(mmx_00ffw) \ 169 #define YUV2RGB_ENDFUNC \ 170 __asm__ volatile (SFENCE"\n\t" \ 177 #define RGB_PACK16(gmask, is15) \ 178 "pand "MANGLE(mmx_redmask)", %%mm0\n\t" \ 179 "pand "MANGLE(mmx_redmask)", %%mm1\n\t" \ 180 "movq %%mm2, %%mm3\n\t" \ 181 "psllw $"AV_STRINGIFY(3-is15)", %%mm2\n\t" \ 182 "psrlw $"AV_STRINGIFY(5+is15)", %%mm3\n\t" \ 183 "psrlw $3, %%mm0\n\t" \ 184 IF##is15("psrlw $1, %%mm1\n\t") \ 185 "pand "MANGLE(pb_e0)", %%mm2\n\t" \ 186 "pand "MANGLE(gmask)", %%mm3\n\t" \ 187 "por %%mm2, %%mm0\n\t" \ 188 "por %%mm3, %%mm1\n\t" \ 189 "movq %%mm0, %%mm2\n\t" \ 190 "punpcklbw %%mm1, %%mm0\n\t" \ 191 "punpckhbw %%mm1, %%mm2\n\t" \ 192 MOVNTQ " %%mm0, (%1)\n\t" \ 193 MOVNTQ " %%mm2, 8(%1)\n\t" \ 196 "paddusb "BLUE_DITHER"(%4), %%mm0\n\t" \ 197 "paddusb "GREEN_DITHER"(%4), %%mm2\n\t" \ 198 "paddusb "RED_DITHER"(%4), %%mm1\n\t" \ 200 #if !COMPILE_TEMPLATE_MMXEXT 203 int srcSliceY,
int srcSliceH,
204 uint8_t *dst[],
int dstStride[])
206 int y, h_size, vshift;
231 int srcSliceY,
int srcSliceH,
232 uint8_t *dst[],
int dstStride[])
234 int y, h_size, vshift;
258 #define RGB_PACK24(blue, red)\ 259 "packuswb %%mm3, %%mm0 \n" \ 260 "packuswb %%mm5, %%mm1 \n" \ 261 "packuswb %%mm7, %%mm2 \n" \ 262 "movq %%mm"red", %%mm3 \n"\ 263 "movq %%mm"blue", %%mm6 \n"\ 264 "psrlq $32, %%mm"red" \n" \ 265 "punpcklbw %%mm2, %%mm3 \n" \ 266 "punpcklbw %%mm"red", %%mm6 \n" \ 267 "movq %%mm3, %%mm5 \n"\ 268 "punpckhbw %%mm"blue", %%mm2 \n" \ 269 "punpcklwd %%mm6, %%mm3 \n" \ 270 "punpckhwd %%mm6, %%mm5 \n" \ 273 #if COMPILE_TEMPLATE_MMXEXT 280 #define RGB_PACK24_B\ 281 "pshufw $0xc6, %%mm2, %%mm1 \n"\ 282 "pshufw $0x84, %%mm3, %%mm6 \n"\ 283 "pshufw $0x38, %%mm5, %%mm7 \n"\ 284 "pand "MANGLE(mask1101)", %%mm6 \n" \ 285 "movq %%mm1, %%mm0 \n"\ 286 "pand "MANGLE(mask0110)", %%mm7 \n" \ 287 "movq %%mm1, %%mm2 \n"\ 288 "pand "MANGLE(mask0100)", %%mm1 \n" \ 289 "psrlq $48, %%mm3 \n" \ 290 "pand "MANGLE(mask0010)", %%mm0 \n" \ 291 "psllq $32, %%mm5 \n" \ 292 "pand "MANGLE(mask1001)", %%mm2 \n" \ 293 "por %%mm3, %%mm1 \n"\ 294 "por %%mm6, %%mm0 \n"\ 295 "por %%mm5, %%mm1 \n"\ 296 "por %%mm7, %%mm2 \n"\ 297 MOVNTQ" %%mm0, (%1) \n"\ 298 MOVNTQ" %%mm1, 8(%1) \n"\ 299 MOVNTQ" %%mm2, 16(%1) \n"\ 303 #define RGB_PACK24_B\ 304 "movd %%mm3, (%1) \n" \ 305 "movd %%mm2, 4(%1) \n" \ 306 "psrlq $32, %%mm3 \n"\ 307 "psrlq $16, %%mm2 \n"\ 308 "movd %%mm3, 6(%1) \n" \ 309 "movd %%mm2, 10(%1) \n" \ 310 "psrlq $16, %%mm2 \n"\ 311 "movd %%mm5, 12(%1) \n" \ 312 "movd %%mm2, 16(%1) \n" \ 313 "psrlq $32, %%mm5 \n"\ 314 "movd %%mm2, 20(%1) \n" \ 315 "movd %%mm5, 18(%1) \n" \ 321 int srcSliceY,
int srcSliceH,
322 uint8_t *dst[],
int dstStride[])
324 int y, h_size, vshift;
339 int srcSliceY,
int srcSliceH,
340 uint8_t *dst[],
int dstStride[])
342 int y, h_size, vshift;
356 #define SET_EMPTY_ALPHA \ 357 "pcmpeqd %%mm"REG_ALPHA", %%mm"REG_ALPHA"\n\t" \ 360 "movq (%6, %0, 2), %%mm"REG_ALPHA"\n\t" \ 362 #define RGB_PACK32(red, green, blue, alpha) \ 363 "movq %%mm"blue", %%mm5\n\t" \ 364 "movq %%mm"red", %%mm6\n\t" \ 365 "punpckhbw %%mm"green", %%mm5\n\t" \ 366 "punpcklbw %%mm"green", %%mm"blue"\n\t" \ 367 "punpckhbw %%mm"alpha", %%mm6\n\t" \ 368 "punpcklbw %%mm"alpha", %%mm"red"\n\t" \ 369 "movq %%mm"blue", %%mm"green"\n\t" \ 370 "movq %%mm5, %%mm"alpha"\n\t" \ 371 "punpcklwd %%mm"red", %%mm"blue"\n\t" \ 372 "punpckhwd %%mm"red", %%mm"green"\n\t" \ 373 "punpcklwd %%mm6, %%mm5\n\t" \ 374 "punpckhwd %%mm6, %%mm"alpha"\n\t" \ 375 MOVNTQ " %%mm"blue", 0(%1)\n\t" \ 376 MOVNTQ " %%mm"green", 8(%1)\n\t" \ 377 MOVNTQ " %%mm5, 16(%1)\n\t" \ 378 MOVNTQ " %%mm"alpha", 24(%1)\n\t" \ 380 #if !COMPILE_TEMPLATE_MMXEXT 383 int srcSliceY,
int srcSliceH,
384 uint8_t *dst[],
int dstStride[])
386 int y, h_size, vshift;
401 #if HAVE_7REGS && CONFIG_SWSCALE_ALPHA 404 int srcSliceY,
int srcSliceH,
405 uint8_t *dst[],
int dstStride[])
407 int y, h_size, vshift;
411 const uint8_t *pa =
src[3] + y * srcStride[3];
426 int srcSliceY,
int srcSliceH,
427 uint8_t *dst[],
int dstStride[])
429 int y, h_size, vshift;
444 #if HAVE_7REGS && CONFIG_SWSCALE_ALPHA 447 int srcSliceY,
int srcSliceH,
448 uint8_t *dst[],
int dstStride[])
450 int y, h_size, vshift;
454 const uint8_t *pa =
src[3] + y * srcStride[3];
const uint64_t ff_dither8[2]
#define YUV2RGB_INITIAL_LOAD
static int RENAME() yuv420_rgb24(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[])
const uint64_t ff_dither4[2]
static int RENAME() yuv420_bgr24(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[])
static int RENAME() yuv420_rgb15(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[])
#define RGB_PACK32(red, green, blue, alpha)
#define DECLARE_ASM_CONST(n, t, v)
Declare a static constant aligned variable appropriate for use in inline assembly code...
static int RENAME() yuv420_rgb16(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[])
#define YUV2RGB_ENDLOOP(depth)
static int RENAME() yuv420_bgr32(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[])
#define YUV2RGB_LOOP(depth)
#define RGB_PACK16(gmask, is15)
static int RENAME() yuv420_rgb32(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[])
#define YUV2RGB_OPERANDS_ALPHA
#define RGB_PACK_INTERLEAVE
#define RGB_PACK24(blue, red)