FFmpeg  4.0
vp9block.c
Go to the documentation of this file.
1 /*
2  * VP9 compatible video decoder
3  *
4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "libavutil/avassert.h"
25 
26 #include "avcodec.h"
27 #include "internal.h"
28 #include "videodsp.h"
29 #include "vp56.h"
30 #include "vp9.h"
31 #include "vp9data.h"
32 #include "vp9dec.h"
33 
34 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
35  ptrdiff_t stride, int v)
36 {
37  switch (w) {
38  case 1:
39  do {
40  *ptr = v;
41  ptr += stride;
42  } while (--h);
43  break;
44  case 2: {
45  int v16 = v * 0x0101;
46  do {
47  AV_WN16A(ptr, v16);
48  ptr += stride;
49  } while (--h);
50  break;
51  }
52  case 4: {
53  uint32_t v32 = v * 0x01010101;
54  do {
55  AV_WN32A(ptr, v32);
56  ptr += stride;
57  } while (--h);
58  break;
59  }
60  case 8: {
61 #if HAVE_FAST_64BIT
62  uint64_t v64 = v * 0x0101010101010101ULL;
63  do {
64  AV_WN64A(ptr, v64);
65  ptr += stride;
66  } while (--h);
67 #else
68  uint32_t v32 = v * 0x01010101;
69  do {
70  AV_WN32A(ptr, v32);
71  AV_WN32A(ptr + 4, v32);
72  ptr += stride;
73  } while (--h);
74 #endif
75  break;
76  }
77  }
78 }
79 
80 static void decode_mode(VP9TileData *td)
81 {
82  static const uint8_t left_ctx[N_BS_SIZES] = {
83  0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
84  };
85  static const uint8_t above_ctx[N_BS_SIZES] = {
86  0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
87  };
88  static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
91  };
92  VP9Context *s = td->s;
93  VP9Block *b = td->b;
94  int row = td->row, col = td->col, row7 = td->row7;
95  enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
96  int bw4 = ff_vp9_bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
97  int bh4 = ff_vp9_bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
98  int have_a = row > 0, have_l = col > td->tile_col_start;
99  int vref, filter_id;
100 
101  if (!s->s.h.segmentation.enabled) {
102  b->seg_id = 0;
103  } else if (s->s.h.keyframe || s->s.h.intraonly) {
104  b->seg_id = !s->s.h.segmentation.update_map ? 0 :
106  } else if (!s->s.h.segmentation.update_map ||
107  (s->s.h.segmentation.temporal &&
110  td->left_segpred_ctx[row7]]))) {
112  int pred = 8, x;
114 
117  for (y = 0; y < h4; y++) {
118  int idx_base = (y + row) * 8 * s->sb_cols + col;
119  for (x = 0; x < w4; x++)
120  pred = FFMIN(pred, refsegmap[idx_base + x]);
121  }
122  av_assert1(pred < 8);
123  b->seg_id = pred;
124  } else {
125  b->seg_id = 0;
126  }
127 
128  memset(&s->above_segpred_ctx[col], 1, w4);
129  memset(&td->left_segpred_ctx[row7], 1, h4);
130  } else {
132  s->s.h.segmentation.prob);
133 
134  memset(&s->above_segpred_ctx[col], 0, w4);
135  memset(&td->left_segpred_ctx[row7], 0, h4);
136  }
137  if (s->s.h.segmentation.enabled &&
138  (s->s.h.segmentation.update_map || s->s.h.keyframe || s->s.h.intraonly)) {
139  setctx_2d(&s->s.frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
140  bw4, bh4, 8 * s->sb_cols, b->seg_id);
141  }
142 
143  b->skip = s->s.h.segmentation.enabled &&
144  s->s.h.segmentation.feat[b->seg_id].skip_enabled;
145  if (!b->skip) {
146  int c = td->left_skip_ctx[row7] + s->above_skip_ctx[col];
147  b->skip = vp56_rac_get_prob(td->c, s->prob.p.skip[c]);
148  td->counts.skip[c][b->skip]++;
149  }
150 
151  if (s->s.h.keyframe || s->s.h.intraonly) {
152  b->intra = 1;
153  } else if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) {
154  b->intra = !s->s.h.segmentation.feat[b->seg_id].ref_val;
155  } else {
156  int c, bit;
157 
158  if (have_a && have_l) {
159  c = s->above_intra_ctx[col] + td->left_intra_ctx[row7];
160  c += (c == 2);
161  } else {
162  c = have_a ? 2 * s->above_intra_ctx[col] :
163  have_l ? 2 * td->left_intra_ctx[row7] : 0;
164  }
165  bit = vp56_rac_get_prob(td->c, s->prob.p.intra[c]);
166  td->counts.intra[c][bit]++;
167  b->intra = !bit;
168  }
169 
170  if ((b->intra || !b->skip) && s->s.h.txfmmode == TX_SWITCHABLE) {
171  int c;
172  if (have_a) {
173  if (have_l) {
174  c = (s->above_skip_ctx[col] ? max_tx :
175  s->above_txfm_ctx[col]) +
176  (td->left_skip_ctx[row7] ? max_tx :
177  td->left_txfm_ctx[row7]) > max_tx;
178  } else {
179  c = s->above_skip_ctx[col] ? 1 :
180  (s->above_txfm_ctx[col] * 2 > max_tx);
181  }
182  } else if (have_l) {
183  c = td->left_skip_ctx[row7] ? 1 :
184  (td->left_txfm_ctx[row7] * 2 > max_tx);
185  } else {
186  c = 1;
187  }
188  switch (max_tx) {
189  case TX_32X32:
190  b->tx = vp56_rac_get_prob(td->c, s->prob.p.tx32p[c][0]);
191  if (b->tx) {
192  b->tx += vp56_rac_get_prob(td->c, s->prob.p.tx32p[c][1]);
193  if (b->tx == 2)
194  b->tx += vp56_rac_get_prob(td->c, s->prob.p.tx32p[c][2]);
195  }
196  td->counts.tx32p[c][b->tx]++;
197  break;
198  case TX_16X16:
199  b->tx = vp56_rac_get_prob(td->c, s->prob.p.tx16p[c][0]);
200  if (b->tx)
201  b->tx += vp56_rac_get_prob(td->c, s->prob.p.tx16p[c][1]);
202  td->counts.tx16p[c][b->tx]++;
203  break;
204  case TX_8X8:
205  b->tx = vp56_rac_get_prob(td->c, s->prob.p.tx8p[c]);
206  td->counts.tx8p[c][b->tx]++;
207  break;
208  case TX_4X4:
209  b->tx = TX_4X4;
210  break;
211  }
212  } else {
213  b->tx = FFMIN(max_tx, s->s.h.txfmmode);
214  }
215 
216  if (s->s.h.keyframe || s->s.h.intraonly) {
217  uint8_t *a = &s->above_mode_ctx[col * 2];
218  uint8_t *l = &td->left_mode_ctx[(row7) << 1];
219 
220  b->comp = 0;
221  if (b->bs > BS_8x8) {
222  // FIXME the memory storage intermediates here aren't really
223  // necessary, they're just there to make the code slightly
224  // simpler for now
225  b->mode[0] =
227  ff_vp9_default_kf_ymode_probs[a[0]][l[0]]);
228  if (b->bs != BS_8x4) {
230  ff_vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
231  l[0] =
232  a[1] = b->mode[1];
233  } else {
234  l[0] =
235  a[1] =
236  b->mode[1] = b->mode[0];
237  }
238  if (b->bs != BS_4x8) {
239  b->mode[2] =
241  ff_vp9_default_kf_ymode_probs[a[0]][l[1]]);
242  if (b->bs != BS_8x4) {
244  ff_vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
245  l[1] =
246  a[1] = b->mode[3];
247  } else {
248  l[1] =
249  a[1] =
250  b->mode[3] = b->mode[2];
251  }
252  } else {
253  b->mode[2] = b->mode[0];
254  l[1] =
255  a[1] =
256  b->mode[3] = b->mode[1];
257  }
258  } else {
261  b->mode[3] =
262  b->mode[2] =
263  b->mode[1] = b->mode[0];
264  // FIXME this can probably be optimized
265  memset(a, b->mode[0], ff_vp9_bwh_tab[0][b->bs][0]);
266  memset(l, b->mode[0], ff_vp9_bwh_tab[0][b->bs][1]);
267  }
270  } else if (b->intra) {
271  b->comp = 0;
272  if (b->bs > BS_8x8) {
274  s->prob.p.y_mode[0]);
275  td->counts.y_mode[0][b->mode[0]]++;
276  if (b->bs != BS_8x4) {
278  s->prob.p.y_mode[0]);
279  td->counts.y_mode[0][b->mode[1]]++;
280  } else {
281  b->mode[1] = b->mode[0];
282  }
283  if (b->bs != BS_4x8) {
285  s->prob.p.y_mode[0]);
286  td->counts.y_mode[0][b->mode[2]]++;
287  if (b->bs != BS_8x4) {
289  s->prob.p.y_mode[0]);
290  td->counts.y_mode[0][b->mode[3]]++;
291  } else {
292  b->mode[3] = b->mode[2];
293  }
294  } else {
295  b->mode[2] = b->mode[0];
296  b->mode[3] = b->mode[1];
297  }
298  } else {
299  static const uint8_t size_group[10] = {
300  3, 3, 3, 3, 2, 2, 2, 1, 1, 1
301  };
302  int sz = size_group[b->bs];
303 
305  s->prob.p.y_mode[sz]);
306  b->mode[1] =
307  b->mode[2] =
308  b->mode[3] = b->mode[0];
309  td->counts.y_mode[sz][b->mode[3]]++;
310  }
312  s->prob.p.uv_mode[b->mode[3]]);
313  td->counts.uv_mode[b->mode[3]][b->uvmode]++;
314  } else {
315  static const uint8_t inter_mode_ctx_lut[14][14] = {
316  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
317  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
318  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
319  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
320  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
321  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
322  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
323  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
324  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
325  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
326  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
327  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
328  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
329  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
330  };
331 
332  if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) {
333  av_assert2(s->s.h.segmentation.feat[b->seg_id].ref_val != 0);
334  b->comp = 0;
335  b->ref[0] = s->s.h.segmentation.feat[b->seg_id].ref_val - 1;
336  } else {
337  // read comp_pred flag
338  if (s->s.h.comppredmode != PRED_SWITCHABLE) {
339  b->comp = s->s.h.comppredmode == PRED_COMPREF;
340  } else {
341  int c;
342 
343  // FIXME add intra as ref=0xff (or -1) to make these easier?
344  if (have_a) {
345  if (have_l) {
346  if (s->above_comp_ctx[col] && td->left_comp_ctx[row7]) {
347  c = 4;
348  } else if (s->above_comp_ctx[col]) {
349  c = 2 + (td->left_intra_ctx[row7] ||
350  td->left_ref_ctx[row7] == s->s.h.fixcompref);
351  } else if (td->left_comp_ctx[row7]) {
352  c = 2 + (s->above_intra_ctx[col] ||
353  s->above_ref_ctx[col] == s->s.h.fixcompref);
354  } else {
355  c = (!s->above_intra_ctx[col] &&
356  s->above_ref_ctx[col] == s->s.h.fixcompref) ^
357  (!td->left_intra_ctx[row7] &&
358  td->left_ref_ctx[row & 7] == s->s.h.fixcompref);
359  }
360  } else {
361  c = s->above_comp_ctx[col] ? 3 :
362  (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->s.h.fixcompref);
363  }
364  } else if (have_l) {
365  c = td->left_comp_ctx[row7] ? 3 :
366  (!td->left_intra_ctx[row7] && td->left_ref_ctx[row7] == s->s.h.fixcompref);
367  } else {
368  c = 1;
369  }
370  b->comp = vp56_rac_get_prob(td->c, s->prob.p.comp[c]);
371  td->counts.comp[c][b->comp]++;
372  }
373 
374  // read actual references
375  // FIXME probably cache a few variables here to prevent repetitive
376  // memory accesses below
377  if (b->comp) { /* two references */
378  int fix_idx = s->s.h.signbias[s->s.h.fixcompref], var_idx = !fix_idx, c, bit;
379 
380  b->ref[fix_idx] = s->s.h.fixcompref;
381  // FIXME can this codeblob be replaced by some sort of LUT?
382  if (have_a) {
383  if (have_l) {
384  if (s->above_intra_ctx[col]) {
385  if (td->left_intra_ctx[row7]) {
386  c = 2;
387  } else {
388  c = 1 + 2 * (td->left_ref_ctx[row7] != s->s.h.varcompref[1]);
389  }
390  } else if (td->left_intra_ctx[row7]) {
391  c = 1 + 2 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
392  } else {
393  int refl = td->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
394 
395  if (refl == refa && refa == s->s.h.varcompref[1]) {
396  c = 0;
397  } else if (!td->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
398  if ((refa == s->s.h.fixcompref && refl == s->s.h.varcompref[0]) ||
399  (refl == s->s.h.fixcompref && refa == s->s.h.varcompref[0])) {
400  c = 4;
401  } else {
402  c = (refa == refl) ? 3 : 1;
403  }
404  } else if (!td->left_comp_ctx[row7]) {
405  if (refa == s->s.h.varcompref[1] && refl != s->s.h.varcompref[1]) {
406  c = 1;
407  } else {
408  c = (refl == s->s.h.varcompref[1] &&
409  refa != s->s.h.varcompref[1]) ? 2 : 4;
410  }
411  } else if (!s->above_comp_ctx[col]) {
412  if (refl == s->s.h.varcompref[1] && refa != s->s.h.varcompref[1]) {
413  c = 1;
414  } else {
415  c = (refa == s->s.h.varcompref[1] &&
416  refl != s->s.h.varcompref[1]) ? 2 : 4;
417  }
418  } else {
419  c = (refl == refa) ? 4 : 2;
420  }
421  }
422  } else {
423  if (s->above_intra_ctx[col]) {
424  c = 2;
425  } else if (s->above_comp_ctx[col]) {
426  c = 4 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
427  } else {
428  c = 3 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
429  }
430  }
431  } else if (have_l) {
432  if (td->left_intra_ctx[row7]) {
433  c = 2;
434  } else if (td->left_comp_ctx[row7]) {
435  c = 4 * (td->left_ref_ctx[row7] != s->s.h.varcompref[1]);
436  } else {
437  c = 3 * (td->left_ref_ctx[row7] != s->s.h.varcompref[1]);
438  }
439  } else {
440  c = 2;
441  }
442  bit = vp56_rac_get_prob(td->c, s->prob.p.comp_ref[c]);
443  b->ref[var_idx] = s->s.h.varcompref[bit];
444  td->counts.comp_ref[c][bit]++;
445  } else /* single reference */ {
446  int bit, c;
447 
448  if (have_a && !s->above_intra_ctx[col]) {
449  if (have_l && !td->left_intra_ctx[row7]) {
450  if (td->left_comp_ctx[row7]) {
451  if (s->above_comp_ctx[col]) {
452  c = 1 + (!s->s.h.fixcompref || !td->left_ref_ctx[row7] ||
453  !s->above_ref_ctx[col]);
454  } else {
455  c = (3 * !s->above_ref_ctx[col]) +
456  (!s->s.h.fixcompref || !td->left_ref_ctx[row7]);
457  }
458  } else if (s->above_comp_ctx[col]) {
459  c = (3 * !td->left_ref_ctx[row7]) +
460  (!s->s.h.fixcompref || !s->above_ref_ctx[col]);
461  } else {
462  c = 2 * !td->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
463  }
464  } else if (s->above_intra_ctx[col]) {
465  c = 2;
466  } else if (s->above_comp_ctx[col]) {
467  c = 1 + (!s->s.h.fixcompref || !s->above_ref_ctx[col]);
468  } else {
469  c = 4 * (!s->above_ref_ctx[col]);
470  }
471  } else if (have_l && !td->left_intra_ctx[row7]) {
472  if (td->left_intra_ctx[row7]) {
473  c = 2;
474  } else if (td->left_comp_ctx[row7]) {
475  c = 1 + (!s->s.h.fixcompref || !td->left_ref_ctx[row7]);
476  } else {
477  c = 4 * (!td->left_ref_ctx[row7]);
478  }
479  } else {
480  c = 2;
481  }
482  bit = vp56_rac_get_prob(td->c, s->prob.p.single_ref[c][0]);
483  td->counts.single_ref[c][0][bit]++;
484  if (!bit) {
485  b->ref[0] = 0;
486  } else {
487  // FIXME can this codeblob be replaced by some sort of LUT?
488  if (have_a) {
489  if (have_l) {
490  if (td->left_intra_ctx[row7]) {
491  if (s->above_intra_ctx[col]) {
492  c = 2;
493  } else if (s->above_comp_ctx[col]) {
494  c = 1 + 2 * (s->s.h.fixcompref == 1 ||
495  s->above_ref_ctx[col] == 1);
496  } else if (!s->above_ref_ctx[col]) {
497  c = 3;
498  } else {
499  c = 4 * (s->above_ref_ctx[col] == 1);
500  }
501  } else if (s->above_intra_ctx[col]) {
502  if (td->left_intra_ctx[row7]) {
503  c = 2;
504  } else if (td->left_comp_ctx[row7]) {
505  c = 1 + 2 * (s->s.h.fixcompref == 1 ||
506  td->left_ref_ctx[row7] == 1);
507  } else if (!td->left_ref_ctx[row7]) {
508  c = 3;
509  } else {
510  c = 4 * (td->left_ref_ctx[row7] == 1);
511  }
512  } else if (s->above_comp_ctx[col]) {
513  if (td->left_comp_ctx[row7]) {
514  if (td->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
515  c = 3 * (s->s.h.fixcompref == 1 ||
516  td->left_ref_ctx[row7] == 1);
517  } else {
518  c = 2;
519  }
520  } else if (!td->left_ref_ctx[row7]) {
521  c = 1 + 2 * (s->s.h.fixcompref == 1 ||
522  s->above_ref_ctx[col] == 1);
523  } else {
524  c = 3 * (td->left_ref_ctx[row7] == 1) +
525  (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1);
526  }
527  } else if (td->left_comp_ctx[row7]) {
528  if (!s->above_ref_ctx[col]) {
529  c = 1 + 2 * (s->s.h.fixcompref == 1 ||
530  td->left_ref_ctx[row7] == 1);
531  } else {
532  c = 3 * (s->above_ref_ctx[col] == 1) +
533  (s->s.h.fixcompref == 1 || td->left_ref_ctx[row7] == 1);
534  }
535  } else if (!s->above_ref_ctx[col]) {
536  if (!td->left_ref_ctx[row7]) {
537  c = 3;
538  } else {
539  c = 4 * (td->left_ref_ctx[row7] == 1);
540  }
541  } else if (!td->left_ref_ctx[row7]) {
542  c = 4 * (s->above_ref_ctx[col] == 1);
543  } else {
544  c = 2 * (td->left_ref_ctx[row7] == 1) +
545  2 * (s->above_ref_ctx[col] == 1);
546  }
547  } else {
548  if (s->above_intra_ctx[col] ||
549  (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
550  c = 2;
551  } else if (s->above_comp_ctx[col]) {
552  c = 3 * (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1);
553  } else {
554  c = 4 * (s->above_ref_ctx[col] == 1);
555  }
556  }
557  } else if (have_l) {
558  if (td->left_intra_ctx[row7] ||
559  (!td->left_comp_ctx[row7] && !td->left_ref_ctx[row7])) {
560  c = 2;
561  } else if (td->left_comp_ctx[row7]) {
562  c = 3 * (s->s.h.fixcompref == 1 || td->left_ref_ctx[row7] == 1);
563  } else {
564  c = 4 * (td->left_ref_ctx[row7] == 1);
565  }
566  } else {
567  c = 2;
568  }
569  bit = vp56_rac_get_prob(td->c, s->prob.p.single_ref[c][1]);
570  td->counts.single_ref[c][1][bit]++;
571  b->ref[0] = 1 + bit;
572  }
573  }
574  }
575 
576  if (b->bs <= BS_8x8) {
577  if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].skip_enabled) {
578  b->mode[0] =
579  b->mode[1] =
580  b->mode[2] =
581  b->mode[3] = ZEROMV;
582  } else {
583  static const uint8_t off[10] = {
584  3, 0, 0, 1, 0, 0, 0, 0, 0, 0
585  };
586 
587  // FIXME this needs to use the LUT tables from find_ref_mvs
588  // because not all are -1,0/0,-1
589  int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
590  [td->left_mode_ctx[row7 + off[b->bs]]];
591 
593  s->prob.p.mv_mode[c]);
594  b->mode[1] =
595  b->mode[2] =
596  b->mode[3] = b->mode[0];
597  td->counts.mv_mode[c][b->mode[0] - 10]++;
598  }
599  }
600 
601  if (s->s.h.filtermode == FILTER_SWITCHABLE) {
602  int c;
603 
604  if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
605  if (have_l && td->left_mode_ctx[row7] >= NEARESTMV) {
606  c = s->above_filter_ctx[col] == td->left_filter_ctx[row7] ?
607  td->left_filter_ctx[row7] : 3;
608  } else {
609  c = s->above_filter_ctx[col];
610  }
611  } else if (have_l && td->left_mode_ctx[row7] >= NEARESTMV) {
612  c = td->left_filter_ctx[row7];
613  } else {
614  c = 3;
615  }
616 
617  filter_id = vp8_rac_get_tree(td->c, ff_vp9_filter_tree,
618  s->prob.p.filter[c]);
619  td->counts.filter[c][filter_id]++;
620  b->filter = ff_vp9_filter_lut[filter_id];
621  } else {
622  b->filter = s->s.h.filtermode;
623  }
624 
625  if (b->bs > BS_8x8) {
626  int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][td->left_mode_ctx[row7]];
627 
629  s->prob.p.mv_mode[c]);
630  td->counts.mv_mode[c][b->mode[0] - 10]++;
631  ff_vp9_fill_mv(td, b->mv[0], b->mode[0], 0);
632 
633  if (b->bs != BS_8x4) {
635  s->prob.p.mv_mode[c]);
636  td->counts.mv_mode[c][b->mode[1] - 10]++;
637  ff_vp9_fill_mv(td, b->mv[1], b->mode[1], 1);
638  } else {
639  b->mode[1] = b->mode[0];
640  AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
641  AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
642  }
643 
644  if (b->bs != BS_4x8) {
646  s->prob.p.mv_mode[c]);
647  td->counts.mv_mode[c][b->mode[2] - 10]++;
648  ff_vp9_fill_mv(td, b->mv[2], b->mode[2], 2);
649 
650  if (b->bs != BS_8x4) {
652  s->prob.p.mv_mode[c]);
653  td->counts.mv_mode[c][b->mode[3] - 10]++;
654  ff_vp9_fill_mv(td, b->mv[3], b->mode[3], 3);
655  } else {
656  b->mode[3] = b->mode[2];
657  AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
658  AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
659  }
660  } else {
661  b->mode[2] = b->mode[0];
662  AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
663  AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
664  b->mode[3] = b->mode[1];
665  AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
666  AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
667  }
668  } else {
669  ff_vp9_fill_mv(td, b->mv[0], b->mode[0], -1);
670  AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
671  AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
672  AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
673  AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
674  AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
675  AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
676  }
677 
678  vref = b->ref[b->comp ? s->s.h.signbias[s->s.h.varcompref[0]] : 0];
679  }
680 
681 #if HAVE_FAST_64BIT
682 #define SPLAT_CTX(var, val, n) \
683  switch (n) { \
684  case 1: var = val; break; \
685  case 2: AV_WN16A(&var, val * 0x0101); break; \
686  case 4: AV_WN32A(&var, val * 0x01010101); break; \
687  case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
688  case 16: { \
689  uint64_t v64 = val * 0x0101010101010101ULL; \
690  AV_WN64A( &var, v64); \
691  AV_WN64A(&((uint8_t *) &var)[8], v64); \
692  break; \
693  } \
694  }
695 #else
696 #define SPLAT_CTX(var, val, n) \
697  switch (n) { \
698  case 1: var = val; break; \
699  case 2: AV_WN16A(&var, val * 0x0101); break; \
700  case 4: AV_WN32A(&var, val * 0x01010101); break; \
701  case 8: { \
702  uint32_t v32 = val * 0x01010101; \
703  AV_WN32A( &var, v32); \
704  AV_WN32A(&((uint8_t *) &var)[4], v32); \
705  break; \
706  } \
707  case 16: { \
708  uint32_t v32 = val * 0x01010101; \
709  AV_WN32A( &var, v32); \
710  AV_WN32A(&((uint8_t *) &var)[4], v32); \
711  AV_WN32A(&((uint8_t *) &var)[8], v32); \
712  AV_WN32A(&((uint8_t *) &var)[12], v32); \
713  break; \
714  } \
715  }
716 #endif
717 
718  switch (ff_vp9_bwh_tab[1][b->bs][0]) {
719 #define SET_CTXS(perf, dir, off, n) \
720  do { \
721  SPLAT_CTX(perf->dir##_skip_ctx[off], b->skip, n); \
722  SPLAT_CTX(perf->dir##_txfm_ctx[off], b->tx, n); \
723  SPLAT_CTX(perf->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
724  if (!s->s.h.keyframe && !s->s.h.intraonly) { \
725  SPLAT_CTX(perf->dir##_intra_ctx[off], b->intra, n); \
726  SPLAT_CTX(perf->dir##_comp_ctx[off], b->comp, n); \
727  SPLAT_CTX(perf->dir##_mode_ctx[off], b->mode[3], n); \
728  if (!b->intra) { \
729  SPLAT_CTX(perf->dir##_ref_ctx[off], vref, n); \
730  if (s->s.h.filtermode == FILTER_SWITCHABLE) { \
731  SPLAT_CTX(perf->dir##_filter_ctx[off], filter_id, n); \
732  } \
733  } \
734  } \
735  } while (0)
736  case 1: SET_CTXS(s, above, col, 1); break;
737  case 2: SET_CTXS(s, above, col, 2); break;
738  case 4: SET_CTXS(s, above, col, 4); break;
739  case 8: SET_CTXS(s, above, col, 8); break;
740  }
741  switch (ff_vp9_bwh_tab[1][b->bs][1]) {
742  case 1: SET_CTXS(td, left, row7, 1); break;
743  case 2: SET_CTXS(td, left, row7, 2); break;
744  case 4: SET_CTXS(td, left, row7, 4); break;
745  case 8: SET_CTXS(td, left, row7, 8); break;
746  }
747 #undef SPLAT_CTX
748 #undef SET_CTXS
749 
750  if (!s->s.h.keyframe && !s->s.h.intraonly) {
751  if (b->bs > BS_8x8) {
752  int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
753 
754  AV_COPY32(&td->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
755  AV_COPY32(&td->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
756  AV_WN32A(&td->left_mv_ctx[row7 * 2 + 1][0], mv0);
757  AV_WN32A(&td->left_mv_ctx[row7 * 2 + 1][1], mv1);
758  AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
759  AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
760  AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
761  AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
762  } else {
763  int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
764 
765  for (n = 0; n < w4 * 2; n++) {
766  AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
767  AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
768  }
769  for (n = 0; n < h4 * 2; n++) {
770  AV_WN32A(&td->left_mv_ctx[row7 * 2 + n][0], mv0);
771  AV_WN32A(&td->left_mv_ctx[row7 * 2 + n][1], mv1);
772  }
773  }
774  }
775 
776  // FIXME kinda ugly
777  for (y = 0; y < h4; y++) {
778  int x, o = (row + y) * s->sb_cols * 8 + col;
779  VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[o];
780 
781  if (b->intra) {
782  for (x = 0; x < w4; x++) {
783  mv[x].ref[0] =
784  mv[x].ref[1] = -1;
785  }
786  } else if (b->comp) {
787  for (x = 0; x < w4; x++) {
788  mv[x].ref[0] = b->ref[0];
789  mv[x].ref[1] = b->ref[1];
790  AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
791  AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
792  }
793  } else {
794  for (x = 0; x < w4; x++) {
795  mv[x].ref[0] = b->ref[0];
796  mv[x].ref[1] = -1;
797  AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
798  }
799  }
800  }
801 }
802 
803 // FIXME merge cnt/eob arguments?
804 static av_always_inline int
805 decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
806  int is_tx32x32, int is8bitsperpixel, int bpp, unsigned (*cnt)[6][3],
807  unsigned (*eob)[6][2], uint8_t (*p)[6][11],
808  int nnz, const int16_t *scan, const int16_t (*nb)[2],
809  const int16_t *band_counts, int16_t *qmul)
810 {
811  int i = 0, band = 0, band_left = band_counts[band];
812  const uint8_t *tp = p[0][nnz];
813  uint8_t cache[1024];
814 
815  do {
816  int val, rc;
817 
818  val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
819  eob[band][nnz][val]++;
820  if (!val)
821  break;
822 
823 skip_eob:
824  if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
825  cnt[band][nnz][0]++;
826  if (!--band_left)
827  band_left = band_counts[++band];
828  cache[scan[i]] = 0;
829  nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
830  tp = p[band][nnz];
831  if (++i == n_coeffs)
832  break; //invalid input; blocks should end with EOB
833  goto skip_eob;
834  }
835 
836  rc = scan[i];
837  if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
838  cnt[band][nnz][1]++;
839  val = 1;
840  cache[rc] = 1;
841  } else {
842  cnt[band][nnz][2]++;
843  if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
844  if (!vp56_rac_get_prob_branchy(c, tp[4])) {
845  cache[rc] = val = 2;
846  } else {
847  val = 3 + vp56_rac_get_prob(c, tp[5]);
848  cache[rc] = 3;
849  }
850  } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
851  cache[rc] = 4;
852  if (!vp56_rac_get_prob_branchy(c, tp[7])) {
853  val = vp56_rac_get_prob(c, 159) + 5;
854  } else {
855  val = (vp56_rac_get_prob(c, 165) << 1) + 7;
856  val += vp56_rac_get_prob(c, 145);
857  }
858  } else { // cat 3-6
859  cache[rc] = 5;
860  if (!vp56_rac_get_prob_branchy(c, tp[8])) {
861  if (!vp56_rac_get_prob_branchy(c, tp[9])) {
862  val = 11 + (vp56_rac_get_prob(c, 173) << 2);
863  val += (vp56_rac_get_prob(c, 148) << 1);
864  val += vp56_rac_get_prob(c, 140);
865  } else {
866  val = 19 + (vp56_rac_get_prob(c, 176) << 3);
867  val += (vp56_rac_get_prob(c, 155) << 2);
868  val += (vp56_rac_get_prob(c, 140) << 1);
869  val += vp56_rac_get_prob(c, 135);
870  }
871  } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
872  val = (vp56_rac_get_prob(c, 180) << 4) + 35;
873  val += (vp56_rac_get_prob(c, 157) << 3);
874  val += (vp56_rac_get_prob(c, 141) << 2);
875  val += (vp56_rac_get_prob(c, 134) << 1);
876  val += vp56_rac_get_prob(c, 130);
877  } else {
878  val = 67;
879  if (!is8bitsperpixel) {
880  if (bpp == 12) {
881  val += vp56_rac_get_prob(c, 255) << 17;
882  val += vp56_rac_get_prob(c, 255) << 16;
883  }
884  val += (vp56_rac_get_prob(c, 255) << 15);
885  val += (vp56_rac_get_prob(c, 255) << 14);
886  }
887  val += (vp56_rac_get_prob(c, 254) << 13);
888  val += (vp56_rac_get_prob(c, 254) << 12);
889  val += (vp56_rac_get_prob(c, 254) << 11);
890  val += (vp56_rac_get_prob(c, 252) << 10);
891  val += (vp56_rac_get_prob(c, 249) << 9);
892  val += (vp56_rac_get_prob(c, 243) << 8);
893  val += (vp56_rac_get_prob(c, 230) << 7);
894  val += (vp56_rac_get_prob(c, 196) << 6);
895  val += (vp56_rac_get_prob(c, 177) << 5);
896  val += (vp56_rac_get_prob(c, 153) << 4);
897  val += (vp56_rac_get_prob(c, 140) << 3);
898  val += (vp56_rac_get_prob(c, 133) << 2);
899  val += (vp56_rac_get_prob(c, 130) << 1);
900  val += vp56_rac_get_prob(c, 129);
901  }
902  }
903  }
904 #define STORE_COEF(c, i, v) do { \
905  if (is8bitsperpixel) { \
906  c[i] = v; \
907  } else { \
908  AV_WN32A(&c[i * 2], v); \
909  } \
910 } while (0)
911  if (!--band_left)
912  band_left = band_counts[++band];
913  if (is_tx32x32)
914  STORE_COEF(coef, rc, (int)((vp8_rac_get(c) ? -val : val) * (unsigned)qmul[!!i]) / 2);
915  else
916  STORE_COEF(coef, rc, (vp8_rac_get(c) ? -val : val) * (unsigned)qmul[!!i]);
917  nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
918  tp = p[band][nnz];
919  } while (++i < n_coeffs);
920 
921  return i;
922 }
923 
924 static int decode_coeffs_b_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
925  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
926  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
927  const int16_t (*nb)[2], const int16_t *band_counts,
928  int16_t *qmul)
929 {
930  return decode_coeffs_b_generic(td->c, coef, n_coeffs, 0, 1, 8, cnt, eob, p,
931  nnz, scan, nb, band_counts, qmul);
932 }
933 
934 static int decode_coeffs_b32_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
935  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
936  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
937  const int16_t (*nb)[2], const int16_t *band_counts,
938  int16_t *qmul)
939 {
940  return decode_coeffs_b_generic(td->c, coef, n_coeffs, 1, 1, 8, cnt, eob, p,
941  nnz, scan, nb, band_counts, qmul);
942 }
943 
944 static int decode_coeffs_b_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
945  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
946  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
947  const int16_t (*nb)[2], const int16_t *band_counts,
948  int16_t *qmul)
949 {
950  return decode_coeffs_b_generic(td->c, coef, n_coeffs, 0, 0, td->s->s.h.bpp, cnt, eob, p,
951  nnz, scan, nb, band_counts, qmul);
952 }
953 
954 static int decode_coeffs_b32_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
955  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
956  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
957  const int16_t (*nb)[2], const int16_t *band_counts,
958  int16_t *qmul)
959 {
960  return decode_coeffs_b_generic(td->c, coef, n_coeffs, 1, 0, td->s->s.h.bpp, cnt, eob, p,
961  nnz, scan, nb, band_counts, qmul);
962 }
963 
964 static av_always_inline int decode_coeffs(VP9TileData *td, int is8bitsperpixel)
965 {
966  VP9Context *s = td->s;
967  VP9Block *b = td->b;
968  int row = td->row, col = td->col;
969  uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
970  unsigned (*c)[6][3] = td->counts.coef[b->tx][0 /* y */][!b->intra];
971  unsigned (*e)[6][2] = td->counts.eob[b->tx][0 /* y */][!b->intra];
972  int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1;
973  int end_x = FFMIN(2 * (s->cols - col), w4);
974  int end_y = FFMIN(2 * (s->rows - row), h4);
975  int n, pl, x, y, ret;
976  int16_t (*qmul)[2] = s->s.h.segmentation.feat[b->seg_id].qmul;
977  int tx = 4 * s->s.h.lossless + b->tx;
978  const int16_t * const *yscans = ff_vp9_scans[tx];
979  const int16_t (* const * ynbs)[2] = ff_vp9_scans_nb[tx];
980  const int16_t *uvscan = ff_vp9_scans[b->uvtx][DCT_DCT];
981  const int16_t (*uvnb)[2] = ff_vp9_scans_nb[b->uvtx][DCT_DCT];
982  uint8_t *a = &s->above_y_nnz_ctx[col * 2];
983  uint8_t *l = &td->left_y_nnz_ctx[(row & 7) << 1];
984  static const int16_t band_counts[4][8] = {
985  { 1, 2, 3, 4, 3, 16 - 13 },
986  { 1, 2, 3, 4, 11, 64 - 21 },
987  { 1, 2, 3, 4, 11, 256 - 21 },
988  { 1, 2, 3, 4, 11, 1024 - 21 },
989  };
990  const int16_t *y_band_counts = band_counts[b->tx];
991  const int16_t *uv_band_counts = band_counts[b->uvtx];
992  int bytesperpixel = is8bitsperpixel ? 1 : 2;
993  int total_coeff = 0;
994 
995 #define MERGE(la, end, step, rd) \
996  for (n = 0; n < end; n += step) \
997  la[n] = !!rd(&la[n])
998 #define MERGE_CTX(step, rd) \
999  do { \
1000  MERGE(l, end_y, step, rd); \
1001  MERGE(a, end_x, step, rd); \
1002  } while (0)
1003 
1004 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
1005  for (n = 0, y = 0; y < end_y; y += step) { \
1006  for (x = 0; x < end_x; x += step, n += step * step) { \
1007  enum TxfmType txtp = ff_vp9_intra_txfm_type[b->mode[mode_index]]; \
1008  ret = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
1009  (td, td->block + 16 * n * bytesperpixel, 16 * step * step, \
1010  c, e, p, a[x] + l[y], yscans[txtp], \
1011  ynbs[txtp], y_band_counts, qmul[0]); \
1012  a[x] = l[y] = !!ret; \
1013  total_coeff |= !!ret; \
1014  if (step >= 4) { \
1015  AV_WN16A(&td->eob[n], ret); \
1016  } else { \
1017  td->eob[n] = ret; \
1018  } \
1019  } \
1020  }
1021 
1022 #define SPLAT(la, end, step, cond) \
1023  if (step == 2) { \
1024  for (n = 1; n < end; n += step) \
1025  la[n] = la[n - 1]; \
1026  } else if (step == 4) { \
1027  if (cond) { \
1028  for (n = 0; n < end; n += step) \
1029  AV_WN32A(&la[n], la[n] * 0x01010101); \
1030  } else { \
1031  for (n = 0; n < end; n += step) \
1032  memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
1033  } \
1034  } else /* step == 8 */ { \
1035  if (cond) { \
1036  if (HAVE_FAST_64BIT) { \
1037  for (n = 0; n < end; n += step) \
1038  AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
1039  } else { \
1040  for (n = 0; n < end; n += step) { \
1041  uint32_t v32 = la[n] * 0x01010101; \
1042  AV_WN32A(&la[n], v32); \
1043  AV_WN32A(&la[n + 4], v32); \
1044  } \
1045  } \
1046  } else { \
1047  for (n = 0; n < end; n += step) \
1048  memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
1049  } \
1050  }
1051 #define SPLAT_CTX(step) \
1052  do { \
1053  SPLAT(a, end_x, step, end_x == w4); \
1054  SPLAT(l, end_y, step, end_y == h4); \
1055  } while (0)
1056 
1057  /* y tokens */
1058  switch (b->tx) {
1059  case TX_4X4:
1060  DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
1061  break;
1062  case TX_8X8:
1063  MERGE_CTX(2, AV_RN16A);
1064  DECODE_Y_COEF_LOOP(2, 0,);
1065  SPLAT_CTX(2);
1066  break;
1067  case TX_16X16:
1068  MERGE_CTX(4, AV_RN32A);
1069  DECODE_Y_COEF_LOOP(4, 0,);
1070  SPLAT_CTX(4);
1071  break;
1072  case TX_32X32:
1073  MERGE_CTX(8, AV_RN64A);
1074  DECODE_Y_COEF_LOOP(8, 0, 32);
1075  SPLAT_CTX(8);
1076  break;
1077  }
1078 
1079 #define DECODE_UV_COEF_LOOP(step, v) \
1080  for (n = 0, y = 0; y < end_y; y += step) { \
1081  for (x = 0; x < end_x; x += step, n += step * step) { \
1082  ret = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
1083  (td, td->uvblock[pl] + 16 * n * bytesperpixel, \
1084  16 * step * step, c, e, p, a[x] + l[y], \
1085  uvscan, uvnb, uv_band_counts, qmul[1]); \
1086  a[x] = l[y] = !!ret; \
1087  total_coeff |= !!ret; \
1088  if (step >= 4) { \
1089  AV_WN16A(&td->uveob[pl][n], ret); \
1090  } else { \
1091  td->uveob[pl][n] = ret; \
1092  } \
1093  } \
1094  }
1095 
1096  p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
1097  c = td->counts.coef[b->uvtx][1 /* uv */][!b->intra];
1098  e = td->counts.eob[b->uvtx][1 /* uv */][!b->intra];
1099  w4 >>= s->ss_h;
1100  end_x >>= s->ss_h;
1101  h4 >>= s->ss_v;
1102  end_y >>= s->ss_v;
1103  for (pl = 0; pl < 2; pl++) {
1104  a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
1105  l = &td->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
1106  switch (b->uvtx) {
1107  case TX_4X4:
1108  DECODE_UV_COEF_LOOP(1,);
1109  break;
1110  case TX_8X8:
1111  MERGE_CTX(2, AV_RN16A);
1112  DECODE_UV_COEF_LOOP(2,);
1113  SPLAT_CTX(2);
1114  break;
1115  case TX_16X16:
1116  MERGE_CTX(4, AV_RN32A);
1117  DECODE_UV_COEF_LOOP(4,);
1118  SPLAT_CTX(4);
1119  break;
1120  case TX_32X32:
1121  MERGE_CTX(8, AV_RN64A);
1122  DECODE_UV_COEF_LOOP(8, 32);
1123  SPLAT_CTX(8);
1124  break;
1125  }
1126  }
1127 
1128  return total_coeff;
1129 }
1130 
1132 {
1133  return decode_coeffs(td, 1);
1134 }
1135 
1137 {
1138  return decode_coeffs(td, 0);
1139 }
1140 
1141 static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
1142  int row_and_7, int col_and_7,
1143  int w, int h, int col_end, int row_end,
1144  enum TxfmMode tx, int skip_inter)
1145 {
1146  static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
1147  static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
1148 
1149  // FIXME I'm pretty sure all loops can be replaced by a single LUT if
1150  // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
1151  // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
1152  // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
1153 
1154  // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
1155  // edges. This means that for UV, we work on two subsampled blocks at
1156  // a time, and we only use the topleft block's mode information to set
1157  // things like block strength. Thus, for any block size smaller than
1158  // 16x16, ignore the odd portion of the block.
1159  if (tx == TX_4X4 && (ss_v | ss_h)) {
1160  if (h == ss_v) {
1161  if (row_and_7 & 1)
1162  return;
1163  if (!row_end)
1164  h += 1;
1165  }
1166  if (w == ss_h) {
1167  if (col_and_7 & 1)
1168  return;
1169  if (!col_end)
1170  w += 1;
1171  }
1172  }
1173 
1174  if (tx == TX_4X4 && !skip_inter) {
1175  int t = 1 << col_and_7, m_col = (t << w) - t, y;
1176  // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
1177  int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
1178 
1179  for (y = row_and_7; y < h + row_and_7; y++) {
1180  int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
1181 
1182  mask[0][y][1] |= m_row_8;
1183  mask[0][y][2] |= m_row_4;
1184  // for odd lines, if the odd col is not being filtered,
1185  // skip odd row also:
1186  // .---. <-- a
1187  // | |
1188  // |___| <-- b
1189  // ^ ^
1190  // c d
1191  //
1192  // if a/c are even row/col and b/d are odd, and d is skipped,
1193  // e.g. right edge of size-66x66.webm, then skip b also (bug)
1194  if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
1195  mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
1196  } else {
1197  mask[1][y][col_mask_id] |= m_col;
1198  }
1199  if (!ss_h)
1200  mask[0][y][3] |= m_col;
1201  if (!ss_v) {
1202  if (ss_h && (col_end & 1))
1203  mask[1][y][3] |= (t << (w - 1)) - t;
1204  else
1205  mask[1][y][3] |= m_col;
1206  }
1207  }
1208  } else {
1209  int y, t = 1 << col_and_7, m_col = (t << w) - t;
1210 
1211  if (!skip_inter) {
1212  int mask_id = (tx == TX_8X8);
1213  int l2 = tx + ss_h - 1, step1d;
1214  static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
1215  int m_row = m_col & masks[l2];
1216 
1217  // at odd UV col/row edges tx16/tx32 loopfilter edges, force
1218  // 8wd loopfilter to prevent going off the visible edge.
1219  if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
1220  int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
1221  int m_row_8 = m_row - m_row_16;
1222 
1223  for (y = row_and_7; y < h + row_and_7; y++) {
1224  mask[0][y][0] |= m_row_16;
1225  mask[0][y][1] |= m_row_8;
1226  }
1227  } else {
1228  for (y = row_and_7; y < h + row_and_7; y++)
1229  mask[0][y][mask_id] |= m_row;
1230  }
1231 
1232  l2 = tx + ss_v - 1;
1233  step1d = 1 << l2;
1234  if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
1235  for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
1236  mask[1][y][0] |= m_col;
1237  if (y - row_and_7 == h - 1)
1238  mask[1][y][1] |= m_col;
1239  } else {
1240  for (y = row_and_7; y < h + row_and_7; y += step1d)
1241  mask[1][y][mask_id] |= m_col;
1242  }
1243  } else if (tx != TX_4X4) {
1244  int mask_id;
1245 
1246  mask_id = (tx == TX_8X8) || (h == ss_v);
1247  mask[1][row_and_7][mask_id] |= m_col;
1248  mask_id = (tx == TX_8X8) || (w == ss_h);
1249  for (y = row_and_7; y < h + row_and_7; y++)
1250  mask[0][y][mask_id] |= t;
1251  } else {
1252  int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8;
1253 
1254  for (y = row_and_7; y < h + row_and_7; y++) {
1255  mask[0][y][2] |= t4;
1256  mask[0][y][1] |= t8;
1257  }
1258  mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
1259  }
1260  }
1261 }
1262 
1263 void ff_vp9_decode_block(VP9TileData *td, int row, int col,
1264  VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
1265  enum BlockLevel bl, enum BlockPartition bp)
1266 {
1267  VP9Context *s = td->s;
1268  VP9Block *b = td->b;
1269  enum BlockSize bs = bl * 3 + bp;
1270  int bytesperpixel = s->bytesperpixel;
1271  int w4 = ff_vp9_bwh_tab[1][bs][0], h4 = ff_vp9_bwh_tab[1][bs][1], lvl;
1272  int emu[2];
1273  AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1274 
1275  td->row = row;
1276  td->row7 = row & 7;
1277  td->col = col;
1278  td->col7 = col & 7;
1279 
1280  td->min_mv.x = -(128 + col * 64);
1281  td->min_mv.y = -(128 + row * 64);
1282  td->max_mv.x = 128 + (s->cols - col - w4) * 64;
1283  td->max_mv.y = 128 + (s->rows - row - h4) * 64;
1284 
1285  if (s->pass < 2) {
1286  b->bs = bs;
1287  b->bl = bl;
1288  b->bp = bp;
1289  decode_mode(td);
1290  b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) ||
1291  (s->ss_v && h4 * 2 == (1 << b->tx)));
1292 
1293  if (!b->skip) {
1294  int has_coeffs;
1295 
1296  if (bytesperpixel == 1) {
1297  has_coeffs = decode_coeffs_8bpp(td);
1298  } else {
1299  has_coeffs = decode_coeffs_16bpp(td);
1300  }
1301  if (!has_coeffs && b->bs <= BS_8x8 && !b->intra) {
1302  b->skip = 1;
1303  memset(&s->above_skip_ctx[col], 1, w4);
1304  memset(&td->left_skip_ctx[td->row7], 1, h4);
1305  }
1306  } else {
1307  int row7 = td->row7;
1308 
1309 #define SPLAT_ZERO_CTX(v, n) \
1310  switch (n) { \
1311  case 1: v = 0; break; \
1312  case 2: AV_ZERO16(&v); break; \
1313  case 4: AV_ZERO32(&v); break; \
1314  case 8: AV_ZERO64(&v); break; \
1315  case 16: AV_ZERO128(&v); break; \
1316  }
1317 #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
1318  do { \
1319  SPLAT_ZERO_CTX(dir##_y_##var[off * 2], n * 2); \
1320  if (s->ss_##dir2) { \
1321  SPLAT_ZERO_CTX(dir##_uv_##var[0][off], n); \
1322  SPLAT_ZERO_CTX(dir##_uv_##var[1][off], n); \
1323  } else { \
1324  SPLAT_ZERO_CTX(dir##_uv_##var[0][off * 2], n * 2); \
1325  SPLAT_ZERO_CTX(dir##_uv_##var[1][off * 2], n * 2); \
1326  } \
1327  } while (0)
1328 
1329  switch (w4) {
1330  case 1: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 1, h); break;
1331  case 2: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 2, h); break;
1332  case 4: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 4, h); break;
1333  case 8: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 8, h); break;
1334  }
1335  switch (h4) {
1336  case 1: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 1, v); break;
1337  case 2: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 2, v); break;
1338  case 4: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 4, v); break;
1339  case 8: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 8, v); break;
1340  }
1341  }
1342 
1343  if (s->pass == 1) {
1344  s->td[0].b++;
1345  s->td[0].block += w4 * h4 * 64 * bytesperpixel;
1346  s->td[0].uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
1347  s->td[0].uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
1348  s->td[0].eob += 4 * w4 * h4;
1349  s->td[0].uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
1350  s->td[0].uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
1351 
1352  return;
1353  }
1354  }
1355 
1356  // emulated overhangs if the stride of the target buffer can't hold. This
1357  // makes it possible to support emu-edge and so on even if we have large block
1358  // overhangs
1359  emu[0] = (col + w4) * 8 * bytesperpixel > f->linesize[0] ||
1360  (row + h4) > s->rows;
1361  emu[1] = ((col + w4) * 8 >> s->ss_h) * bytesperpixel > f->linesize[1] ||
1362  (row + h4) > s->rows;
1363  if (emu[0]) {
1364  td->dst[0] = td->tmp_y;
1365  td->y_stride = 128;
1366  } else {
1367  td->dst[0] = f->data[0] + yoff;
1368  td->y_stride = f->linesize[0];
1369  }
1370  if (emu[1]) {
1371  td->dst[1] = td->tmp_uv[0];
1372  td->dst[2] = td->tmp_uv[1];
1373  td->uv_stride = 128;
1374  } else {
1375  td->dst[1] = f->data[1] + uvoff;
1376  td->dst[2] = f->data[2] + uvoff;
1377  td->uv_stride = f->linesize[1];
1378  }
1379  if (b->intra) {
1380  if (s->s.h.bpp > 8) {
1381  ff_vp9_intra_recon_16bpp(td, yoff, uvoff);
1382  } else {
1383  ff_vp9_intra_recon_8bpp(td, yoff, uvoff);
1384  }
1385  } else {
1386  if (s->s.h.bpp > 8) {
1388  } else {
1390  }
1391  }
1392  if (emu[0]) {
1393  int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
1394 
1395  for (n = 0; o < w; n++) {
1396  int bw = 64 >> n;
1397 
1398  av_assert2(n <= 4);
1399  if (w & bw) {
1400  s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o * bytesperpixel, f->linesize[0],
1401  td->tmp_y + o * bytesperpixel, 128, h, 0, 0);
1402  o += bw;
1403  }
1404  }
1405  }
1406  if (emu[1]) {
1407  int w = FFMIN(s->cols - col, w4) * 8 >> s->ss_h;
1408  int h = FFMIN(s->rows - row, h4) * 8 >> s->ss_v, n, o = 0;
1409 
1410  for (n = s->ss_h; o < w; n++) {
1411  int bw = 64 >> n;
1412 
1413  av_assert2(n <= 4);
1414  if (w & bw) {
1415  s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o * bytesperpixel, f->linesize[1],
1416  td->tmp_uv[0] + o * bytesperpixel, 128, h, 0, 0);
1417  s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o * bytesperpixel, f->linesize[2],
1418  td->tmp_uv[1] + o * bytesperpixel, 128, h, 0, 0);
1419  o += bw;
1420  }
1421  }
1422  }
1423 
1424  // pick filter level and find edges to apply filter to
1425  if (s->s.h.filter.level &&
1426  (lvl = s->s.h.segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
1427  [b->mode[3] != ZEROMV]) > 0) {
1428  int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
1429  int skip_inter = !b->intra && b->skip, col7 = td->col7, row7 = td->row7;
1430 
1431  setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
1432  mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
1433  if (s->ss_h || s->ss_v)
1434  mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end,
1435  s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
1436  s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
1437  b->uvtx, skip_inter);
1438  }
1439 
1440  if (s->pass == 2) {
1441  s->td[0].b++;
1442  s->td[0].block += w4 * h4 * 64 * bytesperpixel;
1443  s->td[0].uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
1444  s->td[0].uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
1445  s->td[0].eob += 4 * w4 * h4;
1446  s->td[0].uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
1447  s->td[0].uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
1448  }
1449 }
ThreadFrame tf
Definition: vp9shared.h:60
unsigned intra[4][2]
Definition: vp9dec.h:174
vp9_mc_func mc[5][N_FILTERS][2][2][2]
Definition: vp9dsp.h:114
uint8_t left_uv_nnz_ctx[2][16]
Definition: vp9dec.h:205
const char const char void * val
Definition: avisynth_c.h:771
const char * s
Definition: avisynth_c.h:768
uint8_t * segmentation_map
Definition: vp9shared.h:62
unsigned single_ref[5][2][2]
Definition: vp9dec.h:176
static int decode_coeffs_b32_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, int16_t *qmul)
Definition: vp9block.c:954
This structure describes decoded (raw) audio or video data.
Definition: frame.h:218
VP5 and VP6 compatible video decoder (common features)
uint8_t update_map
Definition: vp9shared.h:133
static int decode_coeffs_16bpp(VP9TileData *td)
Definition: vp9block.c:1136
uint8_t * above_skip_ctx
Definition: vp9dec.h:138
VP9Context * s
Definition: vp9dec.h:160
AVFrame * f
Definition: thread.h:35
static av_always_inline int vp8_rac_get_tree(VP56RangeCoder *c, const int8_t(*tree)[2], const uint8_t *probs)
Definition: vp56.h:380
VP9BitstreamHeader h
Definition: vp9shared.h:160
ProbContext p
Definition: vp9dec.h:124
static void decode_mode(VP9TileData *td)
Definition: vp9block.c:80
uint8_t ss_v
Definition: vp9dec.h:107
const char * b
Definition: vf_curves.c:113
uint8_t prob[7]
Definition: vp9shared.h:134
uint8_t tx32p[2][3]
Definition: vp9dec.h:55
static int decode_coeffs_b32_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, int16_t *qmul)
Definition: vp9block.c:934
unsigned coef[4][2][2][6][6][3]
Definition: vp9dec.h:194
#define t8
Definition: regdef.h:53
unsigned tx16p[2][3]
Definition: vp9dec.h:179
int col7
Definition: vp9dec.h:163
uint8_t left_segpred_ctx[8]
Definition: vp9dec.h:209
void ff_thread_await_progress(ThreadFrame *f, int n, int field)
Wait for earlier decoding threads to finish reference pictures.
unsigned cols
Definition: vp9dec.h:116
uint8_t ref[2]
Definition: vp9dec.h:80
int stride
Definition: mace.c:144
uint8_t comp_ref[5]
Definition: vp9dec.h:54
const uint8_t ff_vp9_default_kf_ymode_probs[10][10][9]
Definition: vp9data.c:87
#define AV_WN32A(p, v)
Definition: intreadwrite.h:538
#define AV_COPY32(d, s)
Definition: intreadwrite.h:586
uint8_t left_mode_ctx[16]
Definition: vp9dec.h:203
static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h, ptrdiff_t stride, int v)
Definition: vp9block.c:34
void ff_vp9_intra_recon_8bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
Definition: vp9recon.c:288
Definition: vp9.h:29
unsigned tx8p[2][2]
Definition: vp9dec.h:180
uint8_t left_intra_ctx[8]
Definition: vp9dec.h:210
const uint8_t ff_vp9_default_kf_uvmode_probs[10][9]
Definition: vp9data.c:201
unsigned y_mode[4][10]
Definition: vp9dec.h:170
#define AV_RN32A(p)
Definition: intreadwrite.h:526
enum FilterMode filtermode
Definition: vp9shared.h:105
unsigned filter[4][3]
Definition: vp9dec.h:172
uint8_t coef[4][2][2][6][6][3]
Definition: vp9dec.h:125
uint8_t
struct VP9Context::@155 prob
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:64
uint8_t varcompref[2]
Definition: vp9shared.h:114
VP9Frame frames[3]
Definition: vp9shared.h:166
uint8_t left_txfm_ctx[8]
Definition: vp9dec.h:208
void ff_vp9_inter_recon_8bpp(VP9TileData *td)
Definition: vp9recon.c:636
void ff_vp9_intra_recon_16bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
Definition: vp9recon.c:293
#define SET_CTXS(perf, dir, off, n)
static av_always_inline void mask_edges(uint8_t(*mask)[8][4], int ss_h, int ss_v, int row_and_7, int col_and_7, int w, int h, int col_end, int row_end, enum TxfmMode tx, int skip_inter)
Definition: vp9block.c:1141
uint8_t * uveob[2]
Definition: vp9dec.h:219
int col
Definition: vp9dec.h:163
static av_always_inline int decode_coeffs(VP9TileData *td, int is8bitsperpixel)
Definition: vp9block.c:964
const int16_t *const ff_vp9_scans[5][4]
Definition: vp9data.c:600
uint8_t skip[3]
Definition: vp9dec.h:58
uint8_t * above_uv_nnz_ctx[2]
Definition: vp9dec.h:137
VP9DSPContext dsp
Definition: vp9dec.h:95
int row7
Definition: vp9dec.h:163
enum CompPredMode comppredmode
Definition: vp9shared.h:149
#define DECODE_Y_COEF_LOOP(step, mode_index, v)
Definition: vp9.h:38
uint8_t mode[4]
Definition: vp9dec.h:80
uint8_t * above_txfm_ctx
Definition: vp9dec.h:139
struct VP9BitstreamHeader::@161::@163 feat[MAX_SEGMENT]
static int decode_coeffs_b_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, int16_t *qmul)
Definition: vp9block.c:944
Definition: vp9.h:30
uint8_t uv_mode[10][9]
Definition: vp9dec.h:48
uint8_t bytesperpixel
Definition: vp9dec.h:108
uint8_t mask[2][2][8][4]
Definition: vp9dec.h:76
uint8_t left_filter_ctx[8]
Definition: vp9dec.h:213
Definition: vp9.h:28
unsigned tile_col_start
Definition: vp9dec.h:167
#define td
Definition: regdef.h:70
static const uint16_t mask[17]
Definition: lzw.c:38
const int8_t ff_vp9_intramode_tree[9][2]
Definition: vp9data.c:75
uint8_t signbias[3]
Definition: vp9shared.h:112
#define STORE_COEF(c, i, v)
uint8_t intra
Definition: vp9dec.h:80
TxfmMode
Definition: vp9.h:27
simple assert() macros that are a bit more flexible than ISO C assert().
uint8_t intra[4]
Definition: vp9dec.h:51
VP56RangeCoder * c
Definition: vp9dec.h:162
struct VP9BitstreamHeader::@159 filter
void ff_vp9_inter_recon_16bpp(VP9TileData *td)
Definition: vp9recon.c:641
uint8_t * above_filter_ctx
Definition: vp9dec.h:144
uint8_t comp[5]
Definition: vp9dec.h:52
uint8_t left_y_nnz_ctx[16]
Definition: vp9dec.h:202
int uses_2pass
Definition: vp9shared.h:64
VP9TileData * td
Definition: vp9dec.h:93
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:53
#define FFMIN(a, b)
Definition: common.h:96
enum TxfmMode txfmmode
Definition: vp9shared.h:148
BlockSize
Definition: vp9shared.h:77
const int8_t ff_vp9_inter_mode_tree[3][2]
Definition: vp9data.c:214
uint8_t level[8 *8]
Definition: vp9dec.h:74
static av_always_inline int decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs, int is_tx32x32, int is8bitsperpixel, int bpp, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, int16_t *qmul)
Definition: vp9block.c:805
uint8_t keyframe
Definition: vp9shared.h:98
unsigned mv_mode[7][4]
Definition: vp9dec.h:173
uint8_t w
Definition: llviddspenc.c:38
uint8_t tx16p[2][2]
Definition: vp9dec.h:56
const int8_t ff_vp9_segmentation_tree[7][2]
Definition: vp9data.c:65
VP9SharedContext s
Definition: vp9dec.h:92
uint8_t uvmode
Definition: vp9dec.h:80
int n
Definition: avisynth_c.h:684
enum FilterMode ff_vp9_filter_lut[3]
Definition: vp9data.c:225
#define AV_WN64A(p, v)
Definition: intreadwrite.h:542
#define AV_WN16A(p, v)
Definition: intreadwrite.h:534
uint8_t mv_mode[7][3]
Definition: vp9dec.h:50
uint8_t fixcompref
Definition: vp9shared.h:113
#define vp56_rac_get_prob
Definition: vp56.h:254
struct VP9TileData::@156 counts
if(ret< 0)
Definition: vf_mcdeint.c:279
int16_t * block
Definition: vp9dec.h:218
unsigned comp_ref[5][2]
Definition: vp9dec.h:177
uint8_t * above_segpred_ctx
Definition: vp9dec.h:140
unsigned comp[5][2]
Definition: vp9dec.h:175
static const float pred[4]
Definition: siprdata.h:259
unsigned rows
Definition: vp9dec.h:116
unsigned sb_cols
Definition: vp9dec.h:116
static const int8_t mv[256][2]
Definition: 4xm.c:77
static av_always_inline int vp56_rac_get_prob_branchy(VP56RangeCoder *c, int prob)
Definition: vp56.h:271
struct VP9BitstreamHeader::@161 segmentation
struct VP9TileData::@157 max_mv
VP56mv(* above_mv_ctx)[2]
Definition: vp9dec.h:145
Libavcodec external API header.
BlockLevel
Definition: vp9shared.h:70
uint8_t filter[4][2]
Definition: vp9dec.h:49
static int decode_coeffs_8bpp(VP9TileData *td)
Definition: vp9block.c:1131
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:249
int pass
Definition: vp9dec.h:99
#define REF_FRAME_SEGMAP
Definition: vp9shared.h:165
ptrdiff_t uv_stride
Definition: vp9dec.h:165
#define CUR_FRAME
Definition: vp9shared.h:163
int row
Definition: vp9dec.h:163
enum TxfmMode tx uvtx
Definition: vp9dec.h:84
unsigned uv_mode[10][10]
Definition: vp9dec.h:171
uint8_t * above_y_nnz_ctx
Definition: vp9dec.h:136
static int decode_coeffs_b_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, int16_t *qmul)
Definition: vp9block.c:924
uint8_t tx8p[2]
Definition: vp9dec.h:57
uint8_t seg_id
Definition: vp9dec.h:80
#define SPLAT_CTX(var, val, n)
#define MERGE_CTX(step, rd)
uint8_t ss_h
Definition: vp9dec.h:107
uint8_t y_mode[4][9]
Definition: vp9dec.h:47
void ff_vp9_fill_mv(VP9TileData *td, VP56mv *mv, int mode, int sb)
Definition: vp9mvs.c:291
uint8_t tmp_uv[2][64 *64 *2]
Definition: vp9dec.h:216
int16_t * uvblock[2]
Definition: vp9dec.h:218
uint8_t * above_intra_ctx
Definition: vp9dec.h:141
enum BlockSize bs
Definition: vp9dec.h:83
VP56mv mv[4][2]
Definition: vp9dec.h:82
const uint8_t ff_vp9_bwh_tab[2][N_BS_SIZES][2]
Definition: vp9data.c:25
enum BlockPartition bp
Definition: vp9dec.h:86
uint8_t * above_mode_ctx
Definition: vp9dec.h:134
uint8_t single_ref[5][2]
Definition: vp9dec.h:53
uint8_t comp
Definition: vp9dec.h:80
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:232
#define DECODE_UV_COEF_LOOP(step, v)
void ff_vp9_decode_block(VP9TileData *td, int row, int col, VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl, enum BlockPartition bp)
Definition: vp9block.c:1263
uint8_t tmp_y[64 *64 *2]
Definition: vp9dec.h:215
unsigned eob[4][2][2][6][6][2]
Definition: vp9dec.h:195
uint8_t * dst[3]
Definition: vp9dec.h:164
unsigned tx32p[2][4]
Definition: vp9dec.h:178
common internal api header.
static double c[64]
static av_always_inline int vp8_rac_get(VP56RangeCoder *c)
Definition: vp56.h:308
Core video DSP helper functions.
enum BlockLevel bl
Definition: vp9dec.h:85
#define t4
Definition: regdef.h:32
#define SPLAT_ZERO_YUV(dir, var, off, n, dir2)
enum FilterMode filter
Definition: vp9dec.h:81
VP9mvrefPair * mv
Definition: vp9shared.h:63
Definition: vp9.h:31
const int8_t ff_vp9_filter_tree[2][2]
Definition: vp9data.c:220
#define AV_RN16A(p)
Definition: intreadwrite.h:522
uint8_t left_ref_ctx[8]
Definition: vp9dec.h:212
uint8_t pred_prob[3]
Definition: vp9shared.h:135
VP9Block * b
Definition: vp9dec.h:166
#define av_always_inline
Definition: attributes.h:39
ptrdiff_t y_stride
Definition: vp9dec.h:165
const int16_t(*const [5][4] ff_vp9_scans_nb)[2]
Definition: vp9data.c:1157
unsigned skip[3][2]
Definition: vp9dec.h:181
uint8_t skip
Definition: vp9dec.h:80
uint8_t * above_ref_ctx
Definition: vp9dec.h:143
BlockPartition
Definition: vp9shared.h:34
#define AV_RN64A(p)
Definition: intreadwrite.h:530
VP56mv left_mv_ctx[16][2]
Definition: vp9dec.h:204
uint8_t left_comp_ctx[8]
Definition: vp9dec.h:211
uint8_t * above_comp_ctx
Definition: vp9dec.h:142
for(j=16;j >0;--j)
struct VP9TileData::@157 min_mv
uint8_t left_skip_ctx[8]
Definition: vp9dec.h:207