29 int max_allocated_lines,
int line_width,
47 for (i = 0; i < max_allocated_lines; i++) {
69 return buf->
line[line];
85 buffer = buf->
line[line];
116 int dst_step,
int src_step,
int ref_step,
121 const int mirror_right = (width & 1) ^ highpass;
122 const int w = (width >> 1) - 1 + (highpass & width);
125 #define LIFT(src, ref, inv) ((src) + ((inv) ? -(ref) : +(ref))) 127 dst[0] =
LIFT(src[0], ((mul * 2 * ref[0] + add) >> shift), inverse);
132 for (i = 0; i <
w; i++)
133 dst[i * dst_step] =
LIFT(src[i * src_step],
134 ((mul * (ref[i * ref_step] +
135 ref[(i + 1) * ref_step]) +
140 dst[w * dst_step] =
LIFT(src[w * src_step],
141 ((mul * 2 * ref[w * ref_step] + add) >> shift),
146 int dst_step,
int src_step,
int ref_step,
151 const int mirror_right = (width & 1) ^ highpass;
152 const int w = (width >> 1) - 1 + (highpass & width);
156 #define LIFTS(src, ref, inv) \ 157 ((inv) ? (src) + (((ref) + 4 * (src)) >> shift) \ 158 : -((-16 * (src) + (ref) + add / \ 159 4 + 1 + (5 << 25)) / (5 * 4) - (1 << 23))) 161 dst[0] =
LIFTS(src[0], mul * 2 * ref[0] + add, inverse);
166 for (i = 0; i <
w; i++)
167 dst[i * dst_step] =
LIFTS(src[i * src_step],
168 mul * (ref[i * ref_step] +
169 ref[(i + 1) * ref_step]) + add,
173 dst[w * dst_step] =
LIFTS(src[w * src_step],
174 mul * 2 * ref[w * ref_step] + add,
180 const int width2 = width >> 1;
182 const int w2 = (width + 1) >> 1;
184 for (x = 0; x < width2; x++) {
186 temp[x + w2] = b[2 * x + 1];
190 lift(b + w2, temp + w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
191 lift(b, temp, b + w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
199 for (i = 0; i <
width; i++)
200 b1[i] -= (b0[i] + b2[i]) >> 1;
208 for (i = 0; i <
width; i++)
209 b1[i] += (b0[i] + b2[i] + 2) >> 2;
219 for (y = -2; y <
height; y += 2) {
223 if (y + 1 < (
unsigned)height)
225 if (y + 2 < (
unsigned)height)
228 if (y + 1 < (
unsigned)height)
230 if (y + 0 < (
unsigned)height)
240 const int w2 = (width + 1) >> 1;
242 lift(temp + w2, b + 1, b, 1, 2, 2, width,
W_AM,
W_AO,
W_AS, 1, 1);
243 liftS(temp, b, temp + w2, 1, 2, 1, width,
W_BM,
W_BO,
W_BS, 0, 0);
244 lift(b + w2, temp + w2, temp, 1, 1, 1, width,
W_CM,
W_CO,
W_CS, 1, 0);
245 lift(b, temp, b + w2, 1, 1, 1, width,
W_DM,
W_DO,
W_DS, 0, 0);
253 for (i = 0; i <
width; i++)
262 for (i = 0; i <
width; i++)
271 for (i = 0; i <
width; i++)
272 b1[i] = (16 * 4 * b1[i] - 4 * (b0[i] + b2[i]) +
W_BO * 5 + (5 << 27)) /
273 (5 * 16) - (1 << 23);
281 for (i = 0; i <
width; i++)
294 for (y = -4; y <
height; y += 2) {
298 if (y + 3 < (
unsigned)height)
300 if (y + 4 < (
unsigned)height)
303 if (y + 3 < (
unsigned)height)
305 if (y + 2 < (
unsigned)height)
307 if (y + 1 < (
unsigned)height)
309 if (y + 0 < (
unsigned)height)
320 int stride,
int type,
int decomposition_count)
324 for (level = 0; level < decomposition_count; level++) {
328 width >> level, height >> level,
333 width >> level, height >> level,
342 const int width2 = width >> 1;
343 const int w2 = (width + 1) >> 1;
346 for (x = 0; x < width2; x++) {
348 temp[2 * x + 1] = b[x + w2];
353 b[0] = temp[0] - ((temp[1] + 1) >> 1);
354 for (x = 2; x < width - 1; x += 2) {
355 b[x] = temp[x] - ((temp[x - 1] + temp[x + 1] + 2) >> 2);
356 b[x - 1] = temp[x - 1] + ((b[x - 2] + b[x] + 1) >> 1);
359 b[x] = temp[x] - ((temp[x - 1] + 1) >> 1);
360 b[x - 1] = temp[x - 1] + ((b[x - 2] + b[x] + 1) >> 1);
362 b[x - 1] = temp[x - 1] + b[x - 2];
370 for (i = 0; i <
width; i++)
371 b1[i] += (b0[i] + b2[i]) >> 1;
379 for (i = 0; i <
width; i++)
380 b1[i] -= (b0[i] + b2[i] + 2) >> 2;
384 int height,
int stride_line)
416 if (y + 1 < (
unsigned)height && y < (
unsigned)height) {
419 for (x = 0; x <
width; x++) {
420 b2[x] -= (b1[x] + b3[x] + 2) >> 2;
421 b1[x] += (b0[x] + b2[x]) >> 1;
424 if (y + 1 < (
unsigned)
height)
426 if (y + 0 < (
unsigned)
height)
430 if (y - 1 < (
unsigned)
height)
432 if (y + 0 < (
unsigned)
height)
450 if (y + 1 < (
unsigned)height)
452 if (y + 0 < (
unsigned)height)
455 if (y - 1 < (
unsigned)height)
457 if (y + 0 < (
unsigned)height)
467 const int w2 = (width + 1) >> 1;
470 temp[0] = b[0] - ((3 * b[w2] + 2) >> 2);
471 for (x = 1; x < (width >> 1); x++) {
472 temp[2 * x] = b[x] - ((3 * (b[x + w2 - 1] + b[x + w2]) + 4) >> 3);
473 temp[2 * x - 1] = b[x + w2 - 1] - temp[2 * x - 2] - temp[2 * x];
476 temp[2 * x] = b[x] - ((3 * b[x + w2 - 1] + 2) >> 2);
477 temp[2 * x - 1] = b[x + w2 - 1] - temp[2 * x - 2] - temp[2 * x];
479 temp[2 * x - 1] = b[x + w2 - 1] - 2 * temp[2 * x - 2];
481 b[0] = temp[0] + ((2 * temp[0] + temp[1] + 4) >> 3);
482 for (x = 2; x < width - 1; x += 2) {
483 b[x] = temp[x] + ((4 * temp[x] + temp[x - 1] + temp[x + 1] + 8) >> 4);
484 b[x - 1] = temp[x - 1] + ((3 * (b[x - 2] + b[x])) >> 1);
487 b[x] = temp[x] + ((2 * temp[x] + temp[x - 1] + 4) >> 3);
488 b[x - 1] = temp[x - 1] + ((3 * (b[x - 2] + b[x])) >> 1);
490 b[x - 1] = temp[x - 1] + 3 * b[x - 2];
498 for (i = 0; i <
width; i++)
507 for (i = 0; i <
width; i++)
516 for (i = 0; i <
width; i++)
517 b1[i] += (
W_BM * (b0[i] + b2[i]) + 4 * b1[i] +
W_BO) >>
W_BS;
525 for (i = 0; i <
width; i++)
535 for (i = 0; i <
width; i++) {
538 b2[i] += (
W_BM * (b1[i] + b3[i]) + 4 * b2[i] +
W_BO) >>
W_BS;
544 int height,
int stride_line)
581 if (y > 0 && y + 4 < height) {
584 if (y + 3 < (
unsigned)height)
586 if (y + 2 < (
unsigned)height)
588 if (y + 1 < (
unsigned)height)
590 if (y + 0 < (
unsigned)height)
594 if (y - 1 < (
unsigned)height)
596 if (y + 0 < (
unsigned)height)
618 if (y + 3 < (
unsigned)height)
620 if (y + 2 < (
unsigned)height)
622 if (y + 1 < (
unsigned)height)
624 if (y + 0 < (
unsigned)height)
627 if (y - 1 < (
unsigned)height)
629 if (y + 0 < (
unsigned)height)
641 int decomposition_count)
644 for (level = decomposition_count - 1; level >= 0; level--) {
648 stride_line << level);
652 stride_line << level);
661 int type,
int decomposition_count,
int y)
663 const int support = type == 1 ? 3 : 5;
668 for (level = decomposition_count - 1; level >= 0; level--)
669 while (cs[level].y <=
FFMIN((y >> level) + support, height >> level)) {
675 stride_line << level);
681 stride_line << level);
689 int decomposition_count)
692 for (level = decomposition_count - 1; level >= 0; level--) {
709 int decomposition_count,
int y)
711 const int support = type == 1 ? 3 : 5;
716 for (level = decomposition_count - 1; level >= 0; level--)
717 while (cs[level].y <=
FFMIN((y >> level) + support, height >> level)) {
721 height >> level, stride << level);
725 height >> level, stride << level);
732 int stride,
int type,
int decomposition_count)
737 decomposition_count);
738 for (y = 0; y <
height; y += 4)
740 decomposition_count, y);
747 const int dec_count = w == 8 ? 3 : 4;
748 int tmp[32 * 32], tmp2[32];
750 static const int scale[2][2][4][4] = {
753 { 268, 239, 239, 213 },
754 { 0, 224, 224, 152 },
755 { 0, 135, 135, 110 },
758 { 344, 310, 310, 280 },
759 { 0, 320, 320, 228 },
760 { 0, 175, 175, 136 },
761 { 0, 129, 129, 102 },
766 { 275, 245, 245, 218 },
767 { 0, 230, 230, 156 },
768 { 0, 138, 138, 113 },
771 { 352, 317, 317, 286 },
772 { 0, 328, 328, 233 },
773 { 0, 180, 180, 140 },
774 { 0, 132, 132, 105 },
779 for (i = 0; i <
h; i++) {
780 for (j = 0; j <
w; j += 4) {
781 tmp[32 * i + j + 0] = (pix1[j + 0] - pix2[j + 0]) << 4;
782 tmp[32 * i + j + 1] = (pix1[j + 1] - pix2[j + 1]) << 4;
783 tmp[32 * i + j + 2] = (pix1[j + 2] - pix2[j + 2]) << 4;
784 tmp[32 * i + j + 3] = (pix1[j + 3] - pix2[j + 3]) << 4;
794 for (level = 0; level < dec_count; level++)
795 for (ori = level ? 1 : 0; ori < 4; ori++) {
797 int sx = (ori & 1) ? size : 0;
799 int sy = (ori & 2) ? stride >> 1 : 0;
801 for (i = 0; i <
size; i++)
802 for (j = 0; j <
size; j++) {
803 int v = tmp[sx + sy + i * stride + j] *
814 return w_c(v, pix1, pix2, line_size, 8, h, 1);
819 return w_c(v, pix1, pix2, line_size, 8, h, 0);
824 return w_c(v, pix1, pix2, line_size, 16, h, 1);
829 return w_c(v, pix1, pix2, line_size, 16, h, 0);
834 return w_c(v, pix1, pix2, line_size, 32, h, 1);
839 return w_c(v, pix1, pix2, line_size, 32, h, 0);
static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width)
static void spatial_compose97i_dy_buffered(SnowDWTContext *dsp, DWTCompose *cs, slice_buffer *sb, IDWTELEM *temp, int width, int height, int stride_line)
#define LIFTS(src, ref, inv)
static int shift(int a, int b)
void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t **block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer *sb, int add, uint8_t *dst8)
static void spatial_decompose97i(DWTELEM *buffer, DWTELEM *temp, int width, int height, int stride)
void ff_slice_buffer_destroy(slice_buffer *buf)
static void horizontal_decompose53i(DWTELEM *b, DWTELEM *temp, int width)
static int w97_8_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h)
void ff_spatial_idwt_buffered_slice(SnowDWTContext *dsp, DWTCompose *cs, slice_buffer *slice_buf, IDWTELEM *temp, int width, int height, int stride_line, int type, int decomposition_count, int y)
#define LIFT(src, ref, inv)
static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width)
Macro definitions for various function/variable attributes.
static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width)
#define av_assert0(cond)
assert() equivalent, that is always enabled.
av_cold void ff_dwt_init(SnowDWTContext *c)
int ff_w97_32_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h)
static int w53_8_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h)
void ff_snow_horizontal_compose97i(IDWTELEM *b, IDWTELEM *temp, int width)
static av_always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse)
void(* horizontal_compose97i)(IDWTELEM *b, IDWTELEM *temp, int width)
static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride)
void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer *sb, int width, int height, int stride_line, int type, int decomposition_count)
static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width)
static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width)
static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width)
simple assert() macros that are a bit more flexible than ISO C assert().
static void horizontal_decompose97i(DWTELEM *b, DWTELEM *temp, int width)
void ff_slice_buffer_flush(slice_buffer *buf)
static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse)
static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width)
av_cold void ff_dsputil_init_dwt(MECmpContext *c)
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer *sb, int height, int stride_line)
static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, IDWTELEM *temp, int width, int height, int stride)
IDWTELEM * base_buffer
Buffer that this structure is caching.
static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width)
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
static int w_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int w, int h, int type)
static av_always_inline av_const int avpriv_mirror(int x, int w)
static int w97_16_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h)
static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer *sb, IDWTELEM *temp, int width, int height, int stride_line)
static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, IDWTELEM *temp, int width, int height, int stride)
static void spatial_decompose53i(DWTELEM *buffer, DWTELEM *temp, int width, int height, int stride)
void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width)
int ff_slice_buffer_init(slice_buffer *buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM *base_buffer)
static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width)
IDWTELEM * ff_slice_buffer_load_line(slice_buffer *buf, int line)
void ff_spatial_idwt(IDWTELEM *buffer, IDWTELEM *temp, int width, int height, int stride, int type, int decomposition_count)
static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width)
int ff_w53_32_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h)
static void spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count)
static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride)
static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width)
#define MAX_DECOMPOSITIONS
common internal and external API header
static int ref[MAX_W *MAX_W]
IDWTELEM ** line
For use by idwt and predict_slices.
void ff_spatial_dwt(DWTELEM *buffer, DWTELEM *temp, int width, int height, int stride, int type, int decomposition_count)
#define slice_buffer_get_line(slice_buf, line_num)
void(* inner_add_yblock)(const uint8_t *obmc, const int obmc_stride, uint8_t **block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer *sb, int add, uint8_t *dst8)
static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width)
static void horizontal_compose53i(IDWTELEM *b, IDWTELEM *temp, int width)
void(* vertical_compose97i)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width)
static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer *sb, int height, int stride_line)
IDWTELEM ** data_stack
Used for internal purposes.
static void spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, IDWTELEM *temp, int width, int height, int stride, int type, int decomposition_count, int y)
void ff_slice_buffer_release(slice_buffer *buf, int line)
static uint32_t inverse(uint32_t v)
find multiplicative inverse modulo 2 ^ 32
#define av_malloc_array(a, b)
void ff_dwt_init_x86(SnowDWTContext *c)
Used to minimize the amount of memory used in order to optimize cache performance.
static int w53_16_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h)
void * av_mallocz_array(size_t nmemb, size_t size)
Allocate a memory block for an array with av_mallocz().