36 #if HAVE_6REGS && HAVE_INLINE_ASM && HAVE_MMX_EXTERNAL 38 void ff_vc1_put_ver_16b_shift2_mmx(int16_t *dst,
42 const int16_t *
src,
int rnd);
44 const int16_t *
src,
int rnd);
47 #define OP_AVG(S,D) "pavgb " #S ", " #D " \n\t" 50 #define NORMALIZE_MMX(SHIFT) \ 51 "paddw %%mm7, %%mm3 \n\t" \ 52 "paddw %%mm7, %%mm4 \n\t" \ 53 "psraw "SHIFT", %%mm3 \n\t" \ 54 "psraw "SHIFT", %%mm4 \n\t" 56 #define TRANSFER_DO_PACK(OP) \ 57 "packuswb %%mm4, %%mm3 \n\t" \ 59 "movq %%mm3, (%2) \n\t" 61 #define TRANSFER_DONT_PACK(OP) \ 64 "movq %%mm3, 0(%2) \n\t" \ 65 "movq %%mm4, 8(%2) \n\t" 68 #define DO_UNPACK(reg) "punpcklbw %%mm0, " reg "\n\t" 69 #define DONT_UNPACK(reg) 72 #define LOAD_ROUNDER_MMX(ROUND) \ 73 "movd "ROUND", %%mm7 \n\t" \ 74 "punpcklwd %%mm7, %%mm7 \n\t" \ 75 "punpckldq %%mm7, %%mm7 \n\t" 81 #define VC1_SHIFT2(OP, OPNAME)\ 82 static void OPNAME ## vc1_shift2_mmx(uint8_t *dst, const uint8_t *src,\ 83 x86_reg stride, int rnd, x86_reg offset)\ 87 "mov $8, %%"FF_REG_c" \n\t"\ 88 LOAD_ROUNDER_MMX("%5")\ 89 "movq "MANGLE(ff_pw_9)", %%mm6\n\t"\ 91 "movd 0(%0 ), %%mm3 \n\t"\ 92 "movd 4(%0 ), %%mm4 \n\t"\ 93 "movd 0(%0,%2), %%mm1 \n\t"\ 94 "movd 4(%0,%2), %%mm2 \n\t"\ 96 "punpcklbw %%mm0, %%mm3 \n\t"\ 97 "punpcklbw %%mm0, %%mm4 \n\t"\ 98 "punpcklbw %%mm0, %%mm1 \n\t"\ 99 "punpcklbw %%mm0, %%mm2 \n\t"\ 100 "paddw %%mm1, %%mm3 \n\t"\ 101 "paddw %%mm2, %%mm4 \n\t"\ 102 "movd 0(%0,%3), %%mm1 \n\t"\ 103 "movd 4(%0,%3), %%mm2 \n\t"\ 104 "pmullw %%mm6, %%mm3 \n\t" \ 105 "pmullw %%mm6, %%mm4 \n\t" \ 106 "punpcklbw %%mm0, %%mm1 \n\t"\ 107 "punpcklbw %%mm0, %%mm2 \n\t"\ 108 "psubw %%mm1, %%mm3 \n\t" \ 109 "psubw %%mm2, %%mm4 \n\t" \ 110 "movd 0(%0,%2), %%mm1 \n\t"\ 111 "movd 4(%0,%2), %%mm2 \n\t"\ 112 "punpcklbw %%mm0, %%mm1 \n\t"\ 113 "punpcklbw %%mm0, %%mm2 \n\t"\ 114 "psubw %%mm1, %%mm3 \n\t" \ 115 "psubw %%mm2, %%mm4 \n\t" \ 117 "packuswb %%mm4, %%mm3 \n\t"\ 119 "movq %%mm3, (%1) \n\t"\ 122 "dec %%"FF_REG_c" \n\t"\ 124 : "+r"(src), "+r"(dst)\ 125 : "r"(offset), "r"(-2*offset), "g"(stride), "m"(rnd),\ 127 NAMED_CONSTRAINTS_ADD(ff_pw_9)\ 128 : "%"FF_REG_c, "memory"\ 145 #define MSPEL_FILTER13_CORE(UNPACK, MOVQ, A1, A2, A3, A4) \ 146 MOVQ "*0+"A1", %%mm1 \n\t" \ 147 MOVQ "*4+"A1", %%mm2 \n\t" \ 150 "pmullw "MANGLE(ff_pw_3)", %%mm1\n\t" \ 151 "pmullw "MANGLE(ff_pw_3)", %%mm2\n\t" \ 152 MOVQ "*0+"A2", %%mm3 \n\t" \ 153 MOVQ "*4+"A2", %%mm4 \n\t" \ 156 "pmullw %%mm6, %%mm3 \n\t" \ 157 "pmullw %%mm6, %%mm4 \n\t" \ 158 "psubw %%mm1, %%mm3 \n\t" \ 159 "psubw %%mm2, %%mm4 \n\t" \ 160 MOVQ "*0+"A4", %%mm1 \n\t" \ 161 MOVQ "*4+"A4", %%mm2 \n\t" \ 164 "psllw $2, %%mm1 \n\t" \ 165 "psllw $2, %%mm2 \n\t" \ 166 "psubw %%mm1, %%mm3 \n\t" \ 167 "psubw %%mm2, %%mm4 \n\t" \ 168 MOVQ "*0+"A3", %%mm1 \n\t" \ 169 MOVQ "*4+"A3", %%mm2 \n\t" \ 172 "pmullw %%mm5, %%mm1 \n\t" \ 173 "pmullw %%mm5, %%mm2 \n\t" \ 174 "paddw %%mm1, %%mm3 \n\t" \ 175 "paddw %%mm2, %%mm4 \n\t" 185 #define MSPEL_FILTER13_VER_16B(NAME, A1, A2, A3, A4) \ 187 vc1_put_ver_16b_ ## NAME ## _mmx(int16_t *dst, const uint8_t *src, \ 188 x86_reg src_stride, \ 189 int rnd, int64_t shift) \ 194 LOAD_ROUNDER_MMX("%5") \ 195 "movq "MANGLE(ff_pw_53)", %%mm5\n\t" \ 196 "movq "MANGLE(ff_pw_18)", %%mm6\n\t" \ 199 MSPEL_FILTER13_CORE(DO_UNPACK, "movd 1", A1, A2, A3, A4) \ 200 NORMALIZE_MMX("%6") \ 201 TRANSFER_DONT_PACK(OP_PUT) \ 203 "movd 8+"A1", %%mm1 \n\t" \ 205 "movq %%mm1, %%mm3 \n\t" \ 206 "paddw %%mm1, %%mm1 \n\t" \ 207 "paddw %%mm3, %%mm1 \n\t" \ 208 "movd 8+"A2", %%mm3 \n\t" \ 210 "pmullw %%mm6, %%mm3 \n\t" \ 211 "psubw %%mm1, %%mm3 \n\t" \ 212 "movd 8+"A3", %%mm1 \n\t" \ 214 "pmullw %%mm5, %%mm1 \n\t" \ 215 "paddw %%mm1, %%mm3 \n\t" \ 216 "movd 8+"A4", %%mm1 \n\t" \ 218 "psllw $2, %%mm1 \n\t" \ 219 "psubw %%mm1, %%mm3 \n\t" \ 220 "paddw %%mm7, %%mm3 \n\t" \ 221 "psraw %6, %%mm3 \n\t" \ 222 "movq %%mm3, 16(%2) \n\t" \ 227 : "+r"(h), "+r" (src), "+r" (dst) \ 228 : "r"(src_stride), "r"(3*src_stride), \ 229 "m"(rnd), "m"(shift) \ 230 NAMED_CONSTRAINTS_ADD(ff_pw_3,ff_pw_53,ff_pw_18) \ 242 #define MSPEL_FILTER13_HOR_16B(NAME, A1, A2, A3, A4, OP, OPNAME) \ 244 OPNAME ## vc1_hor_16b_ ## NAME ## _mmx(uint8_t *dst, x86_reg stride, \ 245 const int16_t *src, int rnd) \ 249 rnd -= (-4+58+13-3)*256; \ 251 LOAD_ROUNDER_MMX("%4") \ 252 "movq "MANGLE(ff_pw_18)", %%mm6 \n\t" \ 253 "movq "MANGLE(ff_pw_53)", %%mm5 \n\t" \ 256 MSPEL_FILTER13_CORE(DONT_UNPACK, "movq 2", A1, A2, A3, A4) \ 257 NORMALIZE_MMX("$7") \ 259 "paddw "MANGLE(ff_pw_128)", %%mm3 \n\t" \ 260 "paddw "MANGLE(ff_pw_128)", %%mm4 \n\t" \ 261 TRANSFER_DO_PACK(OP) \ 266 : "+r"(h), "+r" (src), "+r" (dst) \ 267 : "r"(stride), "m"(rnd) \ 268 NAMED_CONSTRAINTS_ADD(ff_pw_3,ff_pw_18,ff_pw_53,ff_pw_128) \ 281 #define MSPEL_FILTER13_8B(NAME, A1, A2, A3, A4, OP, OPNAME) \ 283 OPNAME ## vc1_## NAME ## _mmx(uint8_t *dst, const uint8_t *src, \ 284 x86_reg stride, int rnd, x86_reg offset) \ 290 LOAD_ROUNDER_MMX("%6") \ 291 "movq "MANGLE(ff_pw_53)", %%mm5 \n\t" \ 292 "movq "MANGLE(ff_pw_18)", %%mm6 \n\t" \ 295 MSPEL_FILTER13_CORE(DO_UNPACK, "movd 1", A1, A2, A3, A4) \ 296 NORMALIZE_MMX("$6") \ 297 TRANSFER_DO_PACK(OP) \ 302 : "+r"(h), "+r" (src), "+r" (dst) \ 303 : "r"(offset), "r"(3*offset), "g"(stride), "m"(rnd) \ 304 NAMED_CONSTRAINTS_ADD(ff_pw_53,ff_pw_18,ff_pw_3) \ 310 MSPEL_FILTER13_8B (
shift1,
"0(%1,%4 )",
"0(%1,%3,2)",
"0(%1,%3 )",
"0(%1 )",
OP_PUT, put_)
311 MSPEL_FILTER13_8B (
shift1,
"0(%1,%4 )",
"0(%1,%3,2)",
"0(%1,%3 )",
"0(%1 )",
OP_AVG, avg_)
317 MSPEL_FILTER13_8B (shift3, "0(%1 )", "0(%1,%3 )", "0(%1,%3,2)", "0(%1,%4 )", OP_PUT, put_)
318 MSPEL_FILTER13_8B (shift3, "0(%1 )", "0(%1,%3 )", "0(%1,%3,2)", "0(%1,%4 )", OP_AVG, avg_)
338 #define VC1_MSPEL_MC(OP, INSTR)\ 339 static void OP ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride,\ 340 int hmode, int vmode, int rnd)\ 342 static const vc1_mspel_mc_filter_ver_16bits vc1_put_shift_ver_16bits[] =\ 343 { NULL, vc1_put_ver_16b_shift1_mmx, ff_vc1_put_ver_16b_shift2_mmx, vc1_put_ver_16b_shift3_mmx };\ 344 static const vc1_mspel_mc_filter_hor_16bits vc1_put_shift_hor_16bits[] =\ 345 { NULL, OP ## vc1_hor_16b_shift1_mmx, ff_vc1_ ## OP ## hor_16b_shift2_ ## INSTR, OP ## vc1_hor_16b_shift3_mmx };\ 346 static const vc1_mspel_mc_filter_8bits vc1_put_shift_8bits[] =\ 347 { NULL, OP ## vc1_shift1_mmx, OP ## vc1_shift2_mmx, OP ## vc1_shift3_mmx };\ 350 "pxor %%mm0, %%mm0 \n\t"\ 356 static const int shift_value[] = { 0, 5, 1, 5 };\ 357 int shift = (shift_value[hmode]+shift_value[vmode])>>1;\ 359 LOCAL_ALIGNED(16, int16_t, tmp, [12*8]);\ 361 r = (1<<(shift-1)) + rnd-1;\ 362 vc1_put_shift_ver_16bits[vmode](tmp, src-1, stride, r, shift);\ 364 vc1_put_shift_hor_16bits[hmode](dst, stride, tmp+1, 64-rnd);\ 368 vc1_put_shift_8bits[vmode](dst, src, stride, 1-rnd, stride);\ 374 vc1_put_shift_8bits[hmode](dst, src, stride, rnd, 1);\ 376 static void OP ## vc1_mspel_mc_16(uint8_t *dst, const uint8_t *src, \ 377 int stride, int hmode, int vmode, int rnd)\ 379 OP ## vc1_mspel_mc(dst + 0, src + 0, stride, hmode, vmode, rnd); \ 380 OP ## vc1_mspel_mc(dst + 8, src + 8, stride, hmode, vmode, rnd); \ 381 dst += 8*stride; src += 8*stride; \ 382 OP ## vc1_mspel_mc(dst + 0, src + 0, stride, hmode, vmode, rnd); \ 383 OP ## vc1_mspel_mc(dst + 8, src + 8, stride, hmode, vmode, rnd); \ 390 #define DECLARE_FUNCTION(a, b) \ 391 static void put_vc1_mspel_mc ## a ## b ## _mmx(uint8_t *dst, \ 392 const uint8_t *src, \ 396 put_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ 398 static void avg_vc1_mspel_mc ## a ## b ## _mmxext(uint8_t *dst, \ 399 const uint8_t *src, \ 403 avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ 405 static void put_vc1_mspel_mc ## a ## b ## _16_mmx(uint8_t *dst, \ 406 const uint8_t *src, \ 410 put_vc1_mspel_mc_16(dst, src, stride, a, b, rnd); \ 412 static void avg_vc1_mspel_mc ## a ## b ## _16_mmxext(uint8_t *dst, \ 417 avg_vc1_mspel_mc_16(dst, src, stride, a, b, rnd); \ 439 #define FN_ASSIGN(OP, X, Y, INSN) \ 440 dsp->OP##vc1_mspel_pixels_tab[1][X+4*Y] = OP##vc1_mspel_mc##X##Y##INSN; \ 441 dsp->OP##vc1_mspel_pixels_tab[0][X+4*Y] = OP##vc1_mspel_mc##X##Y##_16##INSN
static int shift(int a, int b)
void(* vc1_mspel_mc_filter_hor_16bits)(uint8_t *dst, mips_reg dst_stride, const int16_t *src, int rnd)
Memory handling functions.
#define DECLARE_FUNCTION(a, b)
Macro to ease bicubic filter interpolation functions declarations.
void ff_vc1dsp_init_mmx(VC1DSPContext *dsp)
void(* vc1_mspel_mc_filter_ver_16bits)(int16_t *dst, const uint8_t *src, mips_reg src_stride, int rnd, int64_t shift)
1/4 shift bicubic interpolation
#define VC1_SHIFT2(OP, OPNAME)
Purely vertical or horizontal 1/2 shift interpolation.
static const int shift1[6]
static const uint8_t offset[127][2]
#define VC1_MSPEL_MC(OP)
Interpolate fractional pel values by applying proper vertical then horizontal filter.
#define MSPEL_FILTER13_8B(NAME, A1, A2, A3, A4, OP, OPNAME)
Macro to build the 8bits, any direction, version of vc1_put_shift[13].
void ff_vc1dsp_init_mmxext(VC1DSPContext *dsp)
void(* vc1_mspel_mc_filter_8bits)(uint8_t *dst, const uint8_t *src, mips_reg stride, int rnd, mips_reg offset)
#define MSPEL_FILTER13_VER_16B(NAME, A1, A2, A3, A4)
Macro to build the vertical 16bits version of vc1_put_shift[13].
#define MSPEL_FILTER13_HOR_16B(NAME, A1, A2, A3, A4, OP, OPNAME)
Macro to build the horizontal 16bits version of vc1_put_shift[13].