FFmpeg  4.0
vf_nnedi.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2010-2011 Kevin Stone
3  * Copyright (C) 2016 Paul B Mahol
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License along
18  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21 
22 #include <float.h>
23 
24 #include "libavutil/common.h"
25 #include "libavutil/float_dsp.h"
26 #include "libavutil/imgutils.h"
27 #include "libavutil/opt.h"
28 #include "libavutil/pixdesc.h"
29 #include "avfilter.h"
30 #include "formats.h"
31 #include "internal.h"
32 #include "video.h"
33 
34 typedef struct FrameData {
36  int padded_stride[3];
37  int padded_width[3];
38  int padded_height[3];
39 
41  int dst_stride[3];
42 
43  int field[3];
44 
46  float *input;
47  float *temp;
48 } FrameData;
49 
50 typedef struct NNEDIContext {
51  const AVClass *class;
52 
53  char *weights_file;
54 
58  int eof;
59  int64_t cur_pts;
60 
62  int nb_planes;
63  int linesize[4];
64  int planeheight[4];
65 
66  float *weights0;
67  float *weights1[2];
68  int asize;
69  int nns;
70  int xdia;
71  int ydia;
72 
73  // Parameters
74  int deint;
75  int field;
77  int nsize;
78  int nnsparam;
79  int qual;
80  int etype;
81  int pscrn;
82  int fapprox;
83 
84  int max_value;
85 
86  void (*copy_pad)(const AVFrame *, FrameData *, struct NNEDIContext *, int);
87  void (*evalfunc_0)(struct NNEDIContext *, FrameData *);
88  void (*evalfunc_1)(struct NNEDIContext *, FrameData *);
89 
90  // Functions used in evalfunc_0
91  void (*readpixels)(const uint8_t *, const int, float *);
92  void (*compute_network0)(struct NNEDIContext *s, const float *, const float *, uint8_t *);
93  int32_t (*process_line0)(const uint8_t *, int, uint8_t *, const uint8_t *, const int, const int, const int);
94 
95  // Functions used in evalfunc_1
96  void (*extract)(const uint8_t *, const int, const int, const int, float *, float *);
97  void (*dot_prod)(struct NNEDIContext *, const float *, const float *, float *, const int, const int, const float *);
98  void (*expfunc)(float *, const int);
99  void (*wae5)(const float *, const int, float *);
100 
101  FrameData frame_data;
102 } NNEDIContext;
103 
104 #define OFFSET(x) offsetof(NNEDIContext, x)
105 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
106 
107 static const AVOption nnedi_options[] = {
108  {"weights", "set weights file", OFFSET(weights_file), AV_OPT_TYPE_STRING, {.str="nnedi3_weights.bin"}, 0, 0, FLAGS },
109  {"deint", "set which frames to deinterlace", OFFSET(deint), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS, "deint" },
110  {"all", "deinterlace all frames", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "deint" },
111  {"interlaced", "only deinterlace frames marked as interlaced", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "deint" },
112  {"field", "set mode of operation", OFFSET(field), AV_OPT_TYPE_INT, {.i64=-1}, -2, 3, FLAGS, "field" },
113  {"af", "use frame flags, both fields", 0, AV_OPT_TYPE_CONST, {.i64=-2}, 0, 0, FLAGS, "field" },
114  {"a", "use frame flags, single field", 0, AV_OPT_TYPE_CONST, {.i64=-1}, 0, 0, FLAGS, "field" },
115  {"t", "use top field only", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "field" },
116  {"b", "use bottom field only", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "field" },
117  {"tf", "use both fields, top first", 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, FLAGS, "field" },
118  {"bf", "use both fields, bottom first", 0, AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, FLAGS, "field" },
119  {"planes", "set which planes to process", OFFSET(process_plane), AV_OPT_TYPE_INT, {.i64=7}, 0, 7, FLAGS },
120  {"nsize", "set size of local neighborhood around each pixel, used by the predictor neural network", OFFSET(nsize), AV_OPT_TYPE_INT, {.i64=6}, 0, 6, FLAGS, "nsize" },
121  {"s8x6", NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "nsize" },
122  {"s16x6", NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "nsize" },
123  {"s32x6", NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, FLAGS, "nsize" },
124  {"s48x6", NULL, 0, AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, FLAGS, "nsize" },
125  {"s8x4", NULL, 0, AV_OPT_TYPE_CONST, {.i64=4}, 0, 0, FLAGS, "nsize" },
126  {"s16x4", NULL, 0, AV_OPT_TYPE_CONST, {.i64=5}, 0, 0, FLAGS, "nsize" },
127  {"s32x4", NULL, 0, AV_OPT_TYPE_CONST, {.i64=6}, 0, 0, FLAGS, "nsize" },
128  {"nns", "set number of neurons in predictor neural network", OFFSET(nnsparam), AV_OPT_TYPE_INT, {.i64=1}, 0, 4, FLAGS, "nns" },
129  {"n16", NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "nns" },
130  {"n32", NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "nns" },
131  {"n64", NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, FLAGS, "nns" },
132  {"n128", NULL, 0, AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, FLAGS, "nns" },
133  {"n256", NULL, 0, AV_OPT_TYPE_CONST, {.i64=4}, 0, 0, FLAGS, "nns" },
134  {"qual", "set quality", OFFSET(qual), AV_OPT_TYPE_INT, {.i64=1}, 1, 2, FLAGS, "qual" },
135  {"fast", NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "qual" },
136  {"slow", NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, FLAGS, "qual" },
137  {"etype", "set which set of weights to use in the predictor", OFFSET(etype), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS, "etype" },
138  {"a", "weights trained to minimize absolute error", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "etype" },
139  {"s", "weights trained to minimize squared error", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "etype" },
140  {"pscrn", "set prescreening", OFFSET(pscrn), AV_OPT_TYPE_INT, {.i64=2}, 0, 2, FLAGS, "pscrn" },
141  {"none", NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "pscrn" },
142  {"original", NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "pscrn" },
143  {"new", NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, FLAGS, "pscrn" },
144  {"fapprox", NULL, OFFSET(fapprox), AV_OPT_TYPE_INT, {.i64=0}, 0, 3, FLAGS },
145  { NULL }
146 };
147 
148 AVFILTER_DEFINE_CLASS(nnedi);
149 
150 static int config_input(AVFilterLink *inlink)
151 {
152  AVFilterContext *ctx = inlink->dst;
153  NNEDIContext *s = ctx->priv;
155  int ret;
156 
158  if ((ret = av_image_fill_linesizes(s->linesize, inlink->format, inlink->w)) < 0)
159  return ret;
160 
161  s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
162  s->planeheight[0] = s->planeheight[3] = inlink->h;
163 
164  return 0;
165 }
166 
167 static int config_output(AVFilterLink *outlink)
168 {
169  AVFilterContext *ctx = outlink->src;
170  NNEDIContext *s = ctx->priv;
171 
172  outlink->time_base.num = ctx->inputs[0]->time_base.num;
173  outlink->time_base.den = ctx->inputs[0]->time_base.den * 2;
174  outlink->w = ctx->inputs[0]->w;
175  outlink->h = ctx->inputs[0]->h;
176 
177  if (s->field > 1 || s->field == -2)
178  outlink->frame_rate = av_mul_q(ctx->inputs[0]->frame_rate,
179  (AVRational){2, 1});
180 
181  return 0;
182 }
183 
185 {
186  static const enum AVPixelFormat pix_fmts[] = {
196  };
197 
198  AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
199  if (!fmts_list)
200  return AVERROR(ENOMEM);
201  return ff_set_common_formats(ctx, fmts_list);
202 }
203 
204 static void copy_pad(const AVFrame *src, FrameData *frame_data, NNEDIContext *s, int fn)
205 {
206  const int off = 1 - fn;
207  int plane, y, x;
208 
209  for (plane = 0; plane < s->nb_planes; plane++) {
210  const uint8_t *srcp = (const uint8_t *)src->data[plane];
211  uint8_t *dstp = (uint8_t *)frame_data->paddedp[plane];
212 
213  const int src_stride = src->linesize[plane];
214  const int dst_stride = frame_data->padded_stride[plane];
215 
216  const int src_height = s->planeheight[plane];
217  const int dst_height = frame_data->padded_height[plane];
218 
219  const int src_width = s->linesize[plane];
220  const int dst_width = frame_data->padded_width[plane];
221 
222  int c = 4;
223 
224  if (!(s->process_plane & (1 << plane)))
225  continue;
226 
227  // Copy.
228  for (y = off; y < src_height; y += 2)
229  memcpy(dstp + 32 + (6 + y) * dst_stride,
230  srcp + y * src_stride,
231  src_width * sizeof(uint8_t));
232 
233  // And pad.
234  dstp += (6 + off) * dst_stride;
235  for (y = 6 + off; y < dst_height - 6; y += 2) {
236  int c = 2;
237 
238  for (x = 0; x < 32; x++)
239  dstp[x] = dstp[64 - x];
240 
241  for (x = dst_width - 32; x < dst_width; x++, c += 2)
242  dstp[x] = dstp[x - c];
243 
244  dstp += dst_stride * 2;
245  }
246 
247  dstp = (uint8_t *)frame_data->paddedp[plane];
248  for (y = off; y < 6; y += 2)
249  memcpy(dstp + y * dst_stride,
250  dstp + (12 + 2 * off - y) * dst_stride,
251  dst_width * sizeof(uint8_t));
252 
253  for (y = dst_height - 6 + off; y < dst_height; y += 2, c += 4)
254  memcpy(dstp + y * dst_stride,
255  dstp + (y - c) * dst_stride,
256  dst_width * sizeof(uint8_t));
257  }
258 }
259 
260 static void elliott(float *data, const int n)
261 {
262  int i;
263 
264  for (i = 0; i < n; i++)
265  data[i] = data[i] / (1.0f + FFABS(data[i]));
266 }
267 
268 static void dot_prod(NNEDIContext *s, const float *data, const float *weights, float *vals, const int n, const int len, const float *scale)
269 {
270  int i;
271 
272  for (i = 0; i < n; i++) {
273  float sum;
274 
275  sum = s->fdsp->scalarproduct_float(data, &weights[i * len], len);
276 
277  vals[i] = sum * scale[0] + weights[n * len + i];
278  }
279 }
280 
281 static void dot_prods(NNEDIContext *s, const float *dataf, const float *weightsf, float *vals, const int n, const int len, const float *scale)
282 {
283  const int16_t *data = (int16_t *)dataf;
284  const int16_t *weights = (int16_t *)weightsf;
285  const float *wf = (float *)&weights[n * len];
286  int i, j;
287 
288  for (i = 0; i < n; i++) {
289  int sum = 0, off = ((i >> 2) << 3) + (i & 3);
290  for (j = 0; j < len; j++)
291  sum += data[j] * weights[i * len + j];
292 
293  vals[i] = sum * wf[off] * scale[0] + wf[off + 4];
294  }
295 }
296 
297 static void compute_network0(NNEDIContext *s, const float *input, const float *weights, uint8_t *d)
298 {
299  float t, temp[12], scale = 1.0f;
300 
301  dot_prod(s, input, weights, temp, 4, 48, &scale);
302  t = temp[0];
303  elliott(temp, 4);
304  temp[0] = t;
305  dot_prod(s, temp, weights + 4 * 49, temp + 4, 4, 4, &scale);
306  elliott(temp + 4, 4);
307  dot_prod(s, temp, weights + 4 * 49 + 4 * 5, temp + 8, 4, 8, &scale);
308  if (FFMAX(temp[10], temp[11]) <= FFMAX(temp[8], temp[9]))
309  d[0] = 1;
310  else
311  d[0] = 0;
312 }
313 
314 static void compute_network0_i16(NNEDIContext *s, const float *inputf, const float *weightsf, uint8_t *d)
315 {
316  const float *wf = weightsf + 2 * 48;
317  float t, temp[12], scale = 1.0f;
318 
319  dot_prods(s, inputf, weightsf, temp, 4, 48, &scale);
320  t = temp[0];
321  elliott(temp, 4);
322  temp[0] = t;
323  dot_prod(s, temp, wf + 8, temp + 4, 4, 4, &scale);
324  elliott(temp + 4, 4);
325  dot_prod(s, temp, wf + 8 + 4 * 5, temp + 8, 4, 8, &scale);
326  if (FFMAX(temp[10], temp[11]) <= FFMAX(temp[8], temp[9]))
327  d[0] = 1;
328  else
329  d[0] = 0;
330 }
331 
332 static void pixel2float48(const uint8_t *t8, const int pitch, float *p)
333 {
334  const uint8_t *t = (const uint8_t *)t8;
335  int y, x;
336 
337  for (y = 0; y < 4; y++)
338  for (x = 0; x < 12; x++)
339  p[y * 12 + x] = t[y * pitch * 2 + x];
340 }
341 
342 static void byte2word48(const uint8_t *t, const int pitch, float *pf)
343 {
344  int16_t *p = (int16_t *)pf;
345  int y, x;
346 
347  for (y = 0; y < 4; y++)
348  for (x = 0; x < 12; x++)
349  p[y * 12 + x] = t[y * pitch * 2 + x];
350 }
351 
352 static int32_t process_line0(const uint8_t *tempu, int width, uint8_t *dstp8, const uint8_t *src3p8, const int src_pitch, const int max_value, const int chroma)
353 {
354  uint8_t *dstp = (uint8_t *)dstp8;
355  const uint8_t *src3p = (const uint8_t *)src3p8;
356  int minimum = 0;
357  int maximum = max_value - 1; // Technically the -1 is only needed for 8 and 16 bit input.
358  int count = 0, x;
359  for (x = 0; x < width; x++) {
360  if (tempu[x]) {
361  int tmp = 19 * (src3p[x + src_pitch * 2] + src3p[x + src_pitch * 4]) - 3 * (src3p[x] + src3p[x + src_pitch * 6]);
362  tmp /= 32;
363  dstp[x] = FFMAX(FFMIN(tmp, maximum), minimum);
364  } else {
365  dstp[x] = 255;
366  count++;
367  }
368  }
369  return count;
370 }
371 
372 // new prescreener functions
373 static void byte2word64(const uint8_t *t, const int pitch, float *p)
374 {
375  int16_t *ps = (int16_t *)p;
376  int y, x;
377 
378  for (y = 0; y < 4; y++)
379  for (x = 0; x < 16; x++)
380  ps[y * 16 + x] = t[y * pitch * 2 + x];
381 }
382 
383 static void compute_network0new(NNEDIContext *s, const float *datai, const float *weights, uint8_t *d)
384 {
385  int16_t *data = (int16_t *)datai;
386  int16_t *ws = (int16_t *)weights;
387  float *wf = (float *)&ws[4 * 64];
388  float vals[8];
389  int mask, i, j;
390 
391  for (i = 0; i < 4; i++) {
392  int sum = 0;
393  float t;
394 
395  for (j = 0; j < 64; j++)
396  sum += data[j] * ws[(i << 3) + ((j >> 3) << 5) + (j & 7)];
397  t = sum * wf[i] + wf[4 + i];
398  vals[i] = t / (1.0f + FFABS(t));
399  }
400 
401  for (i = 0; i < 4; i++) {
402  float sum = 0.0f;
403 
404  for (j = 0; j < 4; j++)
405  sum += vals[j] * wf[8 + i + (j << 2)];
406  vals[4 + i] = sum + wf[8 + 16 + i];
407  }
408 
409  mask = 0;
410  for (i = 0; i < 4; i++) {
411  if (vals[4 + i] > 0.0f)
412  mask |= (0x1 << (i << 3));
413  }
414 
415  ((int *)d)[0] = mask;
416 }
417 
419 {
420  float *input = frame_data->input;
421  const float *weights0 = s->weights0;
422  float *temp = frame_data->temp;
423  uint8_t *tempu = (uint8_t *)temp;
424  int plane, x, y;
425 
426  // And now the actual work.
427  for (plane = 0; plane < s->nb_planes; plane++) {
428  const uint8_t *srcp = (const uint8_t *)frame_data->paddedp[plane];
429  const int src_stride = frame_data->padded_stride[plane] / sizeof(uint8_t);
430 
431  const int width = frame_data->padded_width[plane];
432  const int height = frame_data->padded_height[plane];
433 
434  uint8_t *dstp = (uint8_t *)frame_data->dstp[plane];
435  const int dst_stride = frame_data->dst_stride[plane] / sizeof(uint8_t);
436  const uint8_t *src3p;
437  int ystart, ystop;
438  int32_t *lcount;
439 
440  if (!(s->process_plane & (1 << plane)))
441  continue;
442 
443  for (y = 1 - frame_data->field[plane]; y < height - 12; y += 2) {
444  memcpy(dstp + y * dst_stride,
445  srcp + 32 + (6 + y) * src_stride,
446  (width - 64) * sizeof(uint8_t));
447 
448  }
449 
450  ystart = 6 + frame_data->field[plane];
451  ystop = height - 6;
452  srcp += ystart * src_stride;
453  dstp += (ystart - 6) * dst_stride - 32;
454  src3p = srcp - src_stride * 3;
455  lcount = frame_data->lcount[plane] - 6;
456 
457  if (s->pscrn == 1) { // original
458  for (y = ystart; y < ystop; y += 2) {
459  for (x = 32; x < width - 32; x++) {
460  s->readpixels((const uint8_t *)(src3p + x - 5), src_stride, input);
461  s->compute_network0(s, input, weights0, tempu+x);
462  }
463  lcount[y] += s->process_line0(tempu + 32, width - 64, (uint8_t *)(dstp + 32), (const uint8_t *)(src3p + 32), src_stride, s->max_value, plane);
464  src3p += src_stride * 2;
465  dstp += dst_stride * 2;
466  }
467  } else if (s->pscrn > 1) { // new
468  for (y = ystart; y < ystop; y += 2) {
469  for (x = 32; x < width - 32; x += 4) {
470  s->readpixels((const uint8_t *)(src3p + x - 6), src_stride, input);
471  s->compute_network0(s, input, weights0, tempu + x);
472  }
473  lcount[y] += s->process_line0(tempu + 32, width - 64, (uint8_t *)(dstp + 32), (const uint8_t *)(src3p + 32), src_stride, s->max_value, plane);
474  src3p += src_stride * 2;
475  dstp += dst_stride * 2;
476  }
477  } else { // no prescreening
478  for (y = ystart; y < ystop; y += 2) {
479  memset(dstp + 32, 255, (width - 64) * sizeof(uint8_t));
480  lcount[y] += width - 64;
481  dstp += dst_stride * 2;
482  }
483  }
484  }
485 }
486 
487 static void extract_m8(const uint8_t *srcp8, const int stride, const int xdia, const int ydia, float *mstd, float *input)
488 {
489  // uint8_t or uint16_t or float
490  const uint8_t *srcp = (const uint8_t *)srcp8;
491  float scale;
492  double tmp;
493 
494  // int32_t or int64_t or double
495  int64_t sum = 0, sumsq = 0;
496  int y, x;
497 
498  for (y = 0; y < ydia; y++) {
499  const uint8_t *srcpT = srcp + y * stride * 2;
500 
501  for (x = 0; x < xdia; x++) {
502  sum += srcpT[x];
503  sumsq += (uint32_t)srcpT[x] * (uint32_t)srcpT[x];
504  input[x] = srcpT[x];
505  }
506  input += xdia;
507  }
508  scale = 1.0f / (xdia * ydia);
509  mstd[0] = sum * scale;
510  tmp = (double)sumsq * scale - (double)mstd[0] * mstd[0];
511  mstd[3] = 0.0f;
512  if (tmp <= FLT_EPSILON)
513  mstd[1] = mstd[2] = 0.0f;
514  else {
515  mstd[1] = sqrt(tmp);
516  mstd[2] = 1.0f / mstd[1];
517  }
518 }
519 
520 static void extract_m8_i16(const uint8_t *srcp, const int stride, const int xdia, const int ydia, float *mstd, float *inputf)
521 {
522  int16_t *input = (int16_t *)inputf;
523  float scale;
524  int sum = 0, sumsq = 0;
525  int y, x;
526 
527  for (y = 0; y < ydia; y++) {
528  const uint8_t *srcpT = srcp + y * stride * 2;
529  for (x = 0; x < xdia; x++) {
530  sum += srcpT[x];
531  sumsq += srcpT[x] * srcpT[x];
532  input[x] = srcpT[x];
533  }
534  input += xdia;
535  }
536  scale = 1.0f / (float)(xdia * ydia);
537  mstd[0] = sum * scale;
538  mstd[1] = sumsq * scale - mstd[0] * mstd[0];
539  mstd[3] = 0.0f;
540  if (mstd[1] <= FLT_EPSILON)
541  mstd[1] = mstd[2] = 0.0f;
542  else {
543  mstd[1] = sqrt(mstd[1]);
544  mstd[2] = 1.0f / mstd[1];
545  }
546 }
547 
548 
549 static const float exp_lo = -80.0f;
550 static const float exp_hi = +80.0f;
551 
552 static void e2_m16(float *s, const int n)
553 {
554  int i;
555 
556  for (i = 0; i < n; i++)
557  s[i] = exp(av_clipf(s[i], exp_lo, exp_hi));
558 }
559 
560 const float min_weight_sum = 1e-10f;
561 
562 static void weighted_avg_elliott_mul5_m16(const float *w, const int n, float *mstd)
563 {
564  float vsum = 0.0f, wsum = 0.0f;
565  int i;
566 
567  for (i = 0; i < n; i++) {
568  vsum += w[i] * (w[n + i] / (1.0f + FFABS(w[n + i])));
569  wsum += w[i];
570  }
571  if (wsum > min_weight_sum)
572  mstd[3] += ((5.0f * vsum) / wsum) * mstd[1] + mstd[0];
573  else
574  mstd[3] += mstd[0];
575 }
576 
577 
579 {
580  float *input = frame_data->input;
581  float *temp = frame_data->temp;
582  float **weights1 = s->weights1;
583  const int qual = s->qual;
584  const int asize = s->asize;
585  const int nns = s->nns;
586  const int xdia = s->xdia;
587  const int xdiad2m1 = (xdia / 2) - 1;
588  const int ydia = s->ydia;
589  const float scale = 1.0f / (float)qual;
590  int plane, y, x, i;
591 
592  for (plane = 0; plane < s->nb_planes; plane++) {
593  const uint8_t *srcp = (const uint8_t *)frame_data->paddedp[plane];
594  const int src_stride = frame_data->padded_stride[plane] / sizeof(uint8_t);
595 
596  const int width = frame_data->padded_width[plane];
597  const int height = frame_data->padded_height[plane];
598 
599  uint8_t *dstp = (uint8_t *)frame_data->dstp[plane];
600  const int dst_stride = frame_data->dst_stride[plane] / sizeof(uint8_t);
601 
602  const int ystart = frame_data->field[plane];
603  const int ystop = height - 12;
604  const uint8_t *srcpp;
605 
606  if (!(s->process_plane & (1 << plane)))
607  continue;
608 
609  srcp += (ystart + 6) * src_stride;
610  dstp += ystart * dst_stride - 32;
611  srcpp = srcp - (ydia - 1) * src_stride - xdiad2m1;
612 
613  for (y = ystart; y < ystop; y += 2) {
614  for (x = 32; x < width - 32; x++) {
615  float mstd[4];
616 
617  if (dstp[x] != 255)
618  continue;
619 
620  s->extract((const uint8_t *)(srcpp + x), src_stride, xdia, ydia, mstd, input);
621  for (i = 0; i < qual; i++) {
622  s->dot_prod(s, input, weights1[i], temp, nns * 2, asize, mstd + 2);
623  s->expfunc(temp, nns);
624  s->wae5(temp, nns, mstd);
625  }
626 
627  dstp[x] = FFMIN(FFMAX((int)(mstd[3] * scale + 0.5f), 0), s->max_value);
628  }
629  srcpp += src_stride * 2;
630  dstp += dst_stride * 2;
631  }
632  }
633 }
634 
635 #define NUM_NSIZE 7
636 #define NUM_NNS 5
637 
638 static int roundds(const double f)
639 {
640  if (f - floor(f) >= 0.5)
641  return FFMIN((int)ceil(f), 32767);
642  return FFMAX((int)floor(f), -32768);
643 }
644 
646 {
647  s->copy_pad = copy_pad;
648  s->evalfunc_0 = evalfunc_0;
649  s->evalfunc_1 = evalfunc_1;
650 
651  // evalfunc_0
653 
654  if (s->pscrn < 2) { // original prescreener
655  if (s->fapprox & 1) { // int16 dot products
656  s->readpixels = byte2word48;
658  } else {
661  }
662  } else { // new prescreener
663  // only int16 dot products
664  s->readpixels = byte2word64;
666  }
667 
668  // evalfunc_1
670 
671  if (s->fapprox & 2) { // use int16 dot products
672  s->extract = extract_m8_i16;
673  s->dot_prod = dot_prods;
674  } else { // use float dot products
675  s->extract = extract_m8;
676  s->dot_prod = dot_prod;
677  }
678 
679  s->expfunc = e2_m16;
680 }
681 
682 static int modnpf(const int m, const int n)
683 {
684  if ((m % n) == 0)
685  return m;
686  return m + n - (m % n);
687 }
688 
689 static int get_frame(AVFilterContext *ctx, int is_second)
690 {
691  NNEDIContext *s = ctx->priv;
692  AVFilterLink *outlink = ctx->outputs[0];
693  AVFrame *src = s->src;
695  int effective_field = s->field;
696  size_t temp_size;
697  int field_n;
698  int plane;
699 
700  if (effective_field > 1)
701  effective_field -= 2;
702  else if (effective_field < 0)
703  effective_field += 2;
704 
705  if (s->field < 0 && src->interlaced_frame && src->top_field_first == 0)
706  effective_field = 0;
707  else if (s->field < 0 && src->interlaced_frame && src->top_field_first == 1)
708  effective_field = 1;
709  else
710  effective_field = !effective_field;
711 
712  if (s->field > 1 || s->field == -2) {
713  if (is_second) {
714  field_n = (effective_field == 0);
715  } else {
716  field_n = (effective_field == 1);
717  }
718  } else {
719  field_n = effective_field;
720  }
721 
722  s->dst = ff_get_video_buffer(outlink, outlink->w, outlink->h);
723  if (!s->dst)
724  return AVERROR(ENOMEM);
725  av_frame_copy_props(s->dst, src);
726  s->dst->interlaced_frame = 0;
727 
728  frame_data = &s->frame_data;
729 
730  for (plane = 0; plane < s->nb_planes; plane++) {
731  int dst_height = s->planeheight[plane];
732  int dst_width = s->linesize[plane];
733 
734  const int min_alignment = 16;
735  const int min_pad = 10;
736 
737  if (!(s->process_plane & (1 << plane))) {
738  av_image_copy_plane(s->dst->data[plane], s->dst->linesize[plane],
739  src->data[plane], src->linesize[plane],
740  s->linesize[plane],
741  s->planeheight[plane]);
742  continue;
743  }
744 
745  frame_data->padded_width[plane] = dst_width + 64;
746  frame_data->padded_height[plane] = dst_height + 12;
747  frame_data->padded_stride[plane] = modnpf(frame_data->padded_width[plane] + min_pad, min_alignment); // TODO: maybe min_pad is in pixels too?
748  if (!frame_data->paddedp[plane]) {
749  frame_data->paddedp[plane] = av_malloc_array(frame_data->padded_stride[plane], frame_data->padded_height[plane]);
750  if (!frame_data->paddedp[plane])
751  return AVERROR(ENOMEM);
752  }
753 
754  frame_data->dstp[plane] = s->dst->data[plane];
755  frame_data->dst_stride[plane] = s->dst->linesize[plane];
756 
757  if (!frame_data->lcount[plane]) {
758  frame_data->lcount[plane] = av_calloc(dst_height, sizeof(int32_t) * 16);
759  if (!frame_data->lcount[plane])
760  return AVERROR(ENOMEM);
761  } else {
762  memset(frame_data->lcount[plane], 0, dst_height * sizeof(int32_t) * 16);
763  }
764 
765  frame_data->field[plane] = field_n;
766  }
767 
768  if (!frame_data->input) {
769  frame_data->input = av_malloc(512 * sizeof(float));
770  if (!frame_data->input)
771  return AVERROR(ENOMEM);
772  }
773  // evalfunc_0 requires at least padded_width[0] bytes.
774  // evalfunc_1 requires at least 512 floats.
775  if (!frame_data->temp) {
776  temp_size = FFMAX(frame_data->padded_width[0], 512 * sizeof(float));
777  frame_data->temp = av_malloc(temp_size);
778  if (!frame_data->temp)
779  return AVERROR(ENOMEM);
780  }
781 
782  // Copy src to a padded "frame" in frame_data and mirror the edges.
783  s->copy_pad(src, frame_data, s, field_n);
784 
785  // Handles prescreening and the cubic interpolation.
786  s->evalfunc_0(s, frame_data);
787 
788  // The rest.
789  s->evalfunc_1(s, frame_data);
790 
791  return 0;
792 }
793 
794 static int filter_frame(AVFilterLink *inlink, AVFrame *src)
795 {
796  AVFilterContext *ctx = inlink->dst;
797  AVFilterLink *outlink = ctx->outputs[0];
798  NNEDIContext *s = ctx->priv;
799  int ret;
800 
801  if ((s->field > 1 ||
802  s->field == -2) && !s->second) {
803  goto second;
804  } else if (s->field > 1 ||
805  s->field == -2) {
806  AVFrame *dst;
807 
808  s->src = s->second;
809  ret = get_frame(ctx, 1);
810  if (ret < 0) {
811  av_frame_free(&s->dst);
812  av_frame_free(&s->src);
813  av_frame_free(&s->second);
814  return ret;
815  }
816  dst = s->dst;
817 
818  if (src->pts != AV_NOPTS_VALUE &&
819  dst->pts != AV_NOPTS_VALUE)
820  dst->pts += src->pts;
821  else
822  dst->pts = AV_NOPTS_VALUE;
823 
824  ret = ff_filter_frame(outlink, dst);
825  if (ret < 0)
826  return ret;
827  if (s->eof)
828  return 0;
829  s->cur_pts = s->second->pts;
830  av_frame_free(&s->second);
831 second:
832  if ((s->deint && src->interlaced_frame &&
833  !ctx->is_disabled) ||
834  (!s->deint && !ctx->is_disabled)) {
835  s->second = src;
836  }
837  }
838 
839  if ((s->deint && !src->interlaced_frame) || ctx->is_disabled) {
840  AVFrame *dst = av_frame_clone(src);
841  if (!dst) {
842  av_frame_free(&src);
843  av_frame_free(&s->second);
844  return AVERROR(ENOMEM);
845  }
846 
847  if (s->field > 1 || s->field == -2) {
848  av_frame_free(&s->second);
849  if ((s->deint && src->interlaced_frame) ||
850  (!s->deint))
851  s->second = src;
852  } else {
853  av_frame_free(&src);
854  }
855  if (dst->pts != AV_NOPTS_VALUE)
856  dst->pts *= 2;
857  return ff_filter_frame(outlink, dst);
858  }
859 
860  s->src = src;
861  ret = get_frame(ctx, 0);
862  if (ret < 0) {
863  av_frame_free(&s->dst);
864  av_frame_free(&s->src);
865  av_frame_free(&s->second);
866  return ret;
867  }
868 
869  if (src->pts != AV_NOPTS_VALUE)
870  s->dst->pts = src->pts * 2;
871  if (s->field <= 1 && s->field > -2) {
872  av_frame_free(&src);
873  s->src = NULL;
874  }
875 
876  return ff_filter_frame(outlink, s->dst);
877 }
878 
879 static int request_frame(AVFilterLink *link)
880 {
881  AVFilterContext *ctx = link->src;
882  NNEDIContext *s = ctx->priv;
883  int ret;
884 
885  if (s->eof)
886  return AVERROR_EOF;
887 
888  ret = ff_request_frame(ctx->inputs[0]);
889 
890  if (ret == AVERROR_EOF && s->second) {
891  AVFrame *next = av_frame_clone(s->second);
892 
893  if (!next)
894  return AVERROR(ENOMEM);
895 
896  next->pts = s->second->pts * 2 - s->cur_pts;
897  s->eof = 1;
898 
899  filter_frame(ctx->inputs[0], next);
900  } else if (ret < 0) {
901  return ret;
902  }
903 
904  return 0;
905 }
906 
908 {
909  NNEDIContext *s = ctx->priv;
910  FILE *weights_file = NULL;
911  int64_t expected_size = 13574928;
912  int64_t weights_size;
913  float *bdata;
914  size_t bytes_read;
915  const int xdia_table[NUM_NSIZE] = { 8, 16, 32, 48, 8, 16, 32 };
916  const int ydia_table[NUM_NSIZE] = { 6, 6, 6, 6, 4, 4, 4 };
917  const int nns_table[NUM_NNS] = { 16, 32, 64, 128, 256 };
918  const int dims0 = 49 * 4 + 5 * 4 + 9 * 4;
919  const int dims0new = 4 * 65 + 4 * 5;
920  const int dims1 = nns_table[s->nnsparam] * 2 * (xdia_table[s->nsize] * ydia_table[s->nsize] + 1);
921  int dims1tsize = 0;
922  int dims1offset = 0;
923  int ret = 0, i, j, k;
924 
925  weights_file = fopen(s->weights_file, "rb");
926  if (!weights_file) {
927  av_log(ctx, AV_LOG_ERROR, "No weights file provided, aborting!\n");
928  return AVERROR(EINVAL);
929  }
930 
931  if (fseek(weights_file, 0, SEEK_END)) {
932  av_log(ctx, AV_LOG_ERROR, "Couldn't seek to the end of weights file.\n");
933  fclose(weights_file);
934  return AVERROR(EINVAL);
935  }
936 
937  weights_size = ftell(weights_file);
938 
939  if (weights_size == -1) {
940  fclose(weights_file);
941  av_log(ctx, AV_LOG_ERROR, "Couldn't get size of weights file.\n");
942  return AVERROR(EINVAL);
943  } else if (weights_size != expected_size) {
944  fclose(weights_file);
945  av_log(ctx, AV_LOG_ERROR, "Unexpected weights file size.\n");
946  return AVERROR(EINVAL);
947  }
948 
949  if (fseek(weights_file, 0, SEEK_SET)) {
950  fclose(weights_file);
951  av_log(ctx, AV_LOG_ERROR, "Couldn't seek to the start of weights file.\n");
952  return AVERROR(EINVAL);
953  }
954 
955  bdata = (float *)av_malloc(expected_size);
956  if (!bdata) {
957  fclose(weights_file);
958  return AVERROR(ENOMEM);
959  }
960 
961  bytes_read = fread(bdata, 1, expected_size, weights_file);
962 
963  if (bytes_read != (size_t)expected_size) {
964  fclose(weights_file);
965  ret = AVERROR_INVALIDDATA;
966  av_log(ctx, AV_LOG_ERROR, "Couldn't read weights file.\n");
967  goto fail;
968  }
969 
970  fclose(weights_file);
971 
972  for (j = 0; j < NUM_NNS; j++) {
973  for (i = 0; i < NUM_NSIZE; i++) {
974  if (i == s->nsize && j == s->nnsparam)
975  dims1offset = dims1tsize;
976  dims1tsize += nns_table[j] * 2 * (xdia_table[i] * ydia_table[i] + 1) * 2;
977  }
978  }
979 
980  s->weights0 = av_malloc_array(FFMAX(dims0, dims0new), sizeof(float));
981  if (!s->weights0) {
982  ret = AVERROR(ENOMEM);
983  goto fail;
984  }
985 
986  for (i = 0; i < 2; i++) {
987  s->weights1[i] = av_malloc_array(dims1, sizeof(float));
988  if (!s->weights1[i]) {
989  ret = AVERROR(ENOMEM);
990  goto fail;
991  }
992  }
993 
994  // Adjust prescreener weights
995  if (s->pscrn >= 2) {// using new prescreener
996  const float *bdw;
997  int16_t *ws;
998  float *wf;
999  double mean[4] = { 0.0, 0.0, 0.0, 0.0 };
1000  int *offt = av_calloc(4 * 64, sizeof(int));
1001 
1002  if (!offt) {
1003  ret = AVERROR(ENOMEM);
1004  goto fail;
1005  }
1006 
1007  for (j = 0; j < 4; j++)
1008  for (k = 0; k < 64; k++)
1009  offt[j * 64 + k] = ((k >> 3) << 5) + ((j & 3) << 3) + (k & 7);
1010 
1011  bdw = bdata + dims0 + dims0new * (s->pscrn - 2);
1012  ws = (int16_t *)s->weights0;
1013  wf = (float *)&ws[4 * 64];
1014  // Calculate mean weight of each first layer neuron
1015  for (j = 0; j < 4; j++) {
1016  double cmean = 0.0;
1017  for (k = 0; k < 64; k++)
1018  cmean += bdw[offt[j * 64 + k]];
1019  mean[j] = cmean / 64.0;
1020  }
1021  // Factor mean removal and 1.0/127.5 scaling
1022  // into first layer weights. scale to int16 range
1023  for (j = 0; j < 4; j++) {
1024  double scale, mval = 0.0;
1025 
1026  for (k = 0; k < 64; k++)
1027  mval = FFMAX(mval, FFABS((bdw[offt[j * 64 + k]] - mean[j]) / 127.5));
1028  scale = 32767.0 / mval;
1029  for (k = 0; k < 64; k++)
1030  ws[offt[j * 64 + k]] = roundds(((bdw[offt[j * 64 + k]] - mean[j]) / 127.5) * scale);
1031  wf[j] = (float)(mval / 32767.0);
1032  }
1033  memcpy(wf + 4, bdw + 4 * 64, (dims0new - 4 * 64) * sizeof(float));
1034  av_free(offt);
1035  } else { // using old prescreener
1036  double mean[4] = { 0.0, 0.0, 0.0, 0.0 };
1037  // Calculate mean weight of each first layer neuron
1038  for (j = 0; j < 4; j++) {
1039  double cmean = 0.0;
1040  for (k = 0; k < 48; k++)
1041  cmean += bdata[j * 48 + k];
1042  mean[j] = cmean / 48.0;
1043  }
1044  if (s->fapprox & 1) {// use int16 dot products in first layer
1045  int16_t *ws = (int16_t *)s->weights0;
1046  float *wf = (float *)&ws[4 * 48];
1047  // Factor mean removal and 1.0/127.5 scaling
1048  // into first layer weights. scale to int16 range
1049  for (j = 0; j < 4; j++) {
1050  double scale, mval = 0.0;
1051  for (k = 0; k < 48; k++)
1052  mval = FFMAX(mval, FFABS((bdata[j * 48 + k] - mean[j]) / 127.5));
1053  scale = 32767.0 / mval;
1054  for (k = 0; k < 48; k++)
1055  ws[j * 48 + k] = roundds(((bdata[j * 48 + k] - mean[j]) / 127.5) * scale);
1056  wf[j] = (float)(mval / 32767.0);
1057  }
1058  memcpy(wf + 4, bdata + 4 * 48, (dims0 - 4 * 48) * sizeof(float));
1059  } else {// use float dot products in first layer
1060  double half = (1 << 8) - 1;
1061 
1062  half /= 2;
1063 
1064  // Factor mean removal and 1.0/half scaling
1065  // into first layer weights.
1066  for (j = 0; j < 4; j++)
1067  for (k = 0; k < 48; k++)
1068  s->weights0[j * 48 + k] = (float)((bdata[j * 48 + k] - mean[j]) / half);
1069  memcpy(s->weights0 + 4 * 48, bdata + 4 * 48, (dims0 - 4 * 48) * sizeof(float));
1070  }
1071  }
1072 
1073  // Adjust prediction weights
1074  for (i = 0; i < 2; i++) {
1075  const float *bdataT = bdata + dims0 + dims0new * 3 + dims1tsize * s->etype + dims1offset + i * dims1;
1076  const int nnst = nns_table[s->nnsparam];
1077  const int asize = xdia_table[s->nsize] * ydia_table[s->nsize];
1078  const int boff = nnst * 2 * asize;
1079  double *mean = (double *)av_calloc(asize + 1 + nnst * 2, sizeof(double));
1080 
1081  if (!mean) {
1082  ret = AVERROR(ENOMEM);
1083  goto fail;
1084  }
1085 
1086  // Calculate mean weight of each neuron (ignore bias)
1087  for (j = 0; j < nnst * 2; j++) {
1088  double cmean = 0.0;
1089  for (k = 0; k < asize; k++)
1090  cmean += bdataT[j * asize + k];
1091  mean[asize + 1 + j] = cmean / (double)asize;
1092  }
1093  // Calculate mean softmax neuron
1094  for (j = 0; j < nnst; j++) {
1095  for (k = 0; k < asize; k++)
1096  mean[k] += bdataT[j * asize + k] - mean[asize + 1 + j];
1097  mean[asize] += bdataT[boff + j];
1098  }
1099  for (j = 0; j < asize + 1; j++)
1100  mean[j] /= (double)(nnst);
1101 
1102  if (s->fapprox & 2) { // use int16 dot products
1103  int16_t *ws = (int16_t *)s->weights1[i];
1104  float *wf = (float *)&ws[nnst * 2 * asize];
1105  // Factor mean removal into weights, remove global offset from
1106  // softmax neurons, and scale weights to int16 range.
1107  for (j = 0; j < nnst; j++) { // softmax neurons
1108  double scale, mval = 0.0;
1109  for (k = 0; k < asize; k++)
1110  mval = FFMAX(mval, FFABS(bdataT[j * asize + k] - mean[asize + 1 + j] - mean[k]));
1111  scale = 32767.0 / mval;
1112  for (k = 0; k < asize; k++)
1113  ws[j * asize + k] = roundds((bdataT[j * asize + k] - mean[asize + 1 + j] - mean[k]) * scale);
1114  wf[(j >> 2) * 8 + (j & 3)] = (float)(mval / 32767.0);
1115  wf[(j >> 2) * 8 + (j & 3) + 4] = (float)(bdataT[boff + j] - mean[asize]);
1116  }
1117  for (j = nnst; j < nnst * 2; j++) { // elliott neurons
1118  double scale, mval = 0.0;
1119  for (k = 0; k < asize; k++)
1120  mval = FFMAX(mval, FFABS(bdataT[j * asize + k] - mean[asize + 1 + j]));
1121  scale = 32767.0 / mval;
1122  for (k = 0; k < asize; k++)
1123  ws[j * asize + k] = roundds((bdataT[j * asize + k] - mean[asize + 1 + j]) * scale);
1124  wf[(j >> 2) * 8 + (j & 3)] = (float)(mval / 32767.0);
1125  wf[(j >> 2) * 8 + (j & 3) + 4] = bdataT[boff + j];
1126  }
1127  } else { // use float dot products
1128  // Factor mean removal into weights, and remove global
1129  // offset from softmax neurons.
1130  for (j = 0; j < nnst * 2; j++) {
1131  for (k = 0; k < asize; k++) {
1132  const double q = j < nnst ? mean[k] : 0.0;
1133  s->weights1[i][j * asize + k] = (float)(bdataT[j * asize + k] - mean[asize + 1 + j] - q);
1134  }
1135  s->weights1[i][boff + j] = (float)(bdataT[boff + j] - (j < nnst ? mean[asize] : 0.0));
1136  }
1137  }
1138  av_free(mean);
1139  }
1140 
1141  s->nns = nns_table[s->nnsparam];
1142  s->xdia = xdia_table[s->nsize];
1143  s->ydia = ydia_table[s->nsize];
1144  s->asize = xdia_table[s->nsize] * ydia_table[s->nsize];
1145 
1146  s->max_value = 65535 >> 8;
1147 
1148  select_functions(s);
1149 
1150  s->fdsp = avpriv_float_dsp_alloc(0);
1151  if (!s->fdsp)
1152  ret = AVERROR(ENOMEM);
1153 
1154 fail:
1155  av_free(bdata);
1156  return ret;
1157 }
1158 
1160 {
1161  NNEDIContext *s = ctx->priv;
1162  int i;
1163 
1164  av_freep(&s->weights0);
1165 
1166  for (i = 0; i < 2; i++)
1167  av_freep(&s->weights1[i]);
1168 
1169  for (i = 0; i < s->nb_planes; i++) {
1170  av_freep(&s->frame_data.paddedp[i]);
1171  av_freep(&s->frame_data.lcount[i]);
1172  }
1173 
1174  av_freep(&s->frame_data.input);
1175  av_freep(&s->frame_data.temp);
1176  av_freep(&s->fdsp);
1177  av_frame_free(&s->second);
1178 }
1179 
1180 static const AVFilterPad inputs[] = {
1181  {
1182  .name = "default",
1183  .type = AVMEDIA_TYPE_VIDEO,
1184  .filter_frame = filter_frame,
1185  .config_props = config_input,
1186  },
1187  { NULL }
1188 };
1189 
1190 static const AVFilterPad outputs[] = {
1191  {
1192  .name = "default",
1193  .type = AVMEDIA_TYPE_VIDEO,
1194  .config_props = config_output,
1195  .request_frame = request_frame,
1196  },
1197  { NULL }
1198 };
1199 
1201  .name = "nnedi",
1202  .description = NULL_IF_CONFIG_SMALL("Apply neural network edge directed interpolation intra-only deinterlacer."),
1203  .priv_size = sizeof(NNEDIContext),
1204  .priv_class = &nnedi_class,
1205  .init = init,
1206  .uninit = uninit,
1208  .inputs = inputs,
1209  .outputs = outputs,
1211 };
static int filter_frame(AVFilterLink *inlink, AVFrame *src)
Definition: vf_nnedi.c:794
#define FLAGS
Definition: vf_nnedi.c:105
int plane
Definition: avisynth_c.h:422
float(* scalarproduct_float)(const float *v1, const float *v2, int len)
Calculate the scalar product of two vectors of floats.
Definition: float_dsp.h:175
#define NULL
Definition: coverity.c:32
int64_t cur_pts
Definition: vf_nnedi.c:59
const char * s
Definition: avisynth_c.h:768
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:59
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2363
static av_always_inline void chroma(WaveformContext *s, AVFrame *in, AVFrame *out, int component, int intensity, int offset_y, int offset_x, int column, int mirror)
Definition: vf_waveform.c:1386
This structure describes decoded (raw) audio or video data.
Definition: frame.h:218
BYTE int const BYTE int src_pitch
Definition: avisynth_c.h:813
AVOption.
Definition: opt.h:246
static int config_input(AVFilterLink *inlink)
Definition: vf_nnedi.c:150
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:67
misc image utilities
int av_pix_fmt_count_planes(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2403
Main libavfilter public API header.
const char * desc
Definition: nvenc.c:65
char * weights_file
Definition: vf_nnedi.c:53
static void compute_network0_i16(NNEDIContext *s, const float *inputf, const float *weightsf, uint8_t *d)
Definition: vf_nnedi.c:314
planar GBR 4:4:4 24bpp
Definition: pixfmt.h:164
void(* evalfunc_1)(struct NNEDIContext *, FrameData *)
Definition: vf_nnedi.c:88
int num
Numerator.
Definition: rational.h:59
float * weights1[2]
Definition: vf_nnedi.c:67
AVFrame * second
Definition: vf_nnedi.c:56
static void compute_network0(NNEDIContext *s, const float *input, const float *weights, uint8_t *d)
Definition: vf_nnedi.c:297
#define t8
Definition: regdef.h:53
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:99
FrameData frame_data
Definition: vf_nnedi.c:101
#define fn(a)
#define src
Definition: vp8dsp.c:254
int max_value
Definition: vf_nnedi.c:84
int stride
Definition: mace.c:144
int is_disabled
the enabled state from the last expression evaluation
Definition: avfilter.h:385
int field[3]
Definition: vf_nnedi.c:43
float * temp
Definition: vf_nnedi.c:47
void * av_calloc(size_t nmemb, size_t size)
Non-inlined equivalent of av_mallocz_array().
Definition: mem.c:244
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:283
void(* dot_prod)(struct NNEDIContext *, const float *, const float *, float *, const int, const int, const float *)
Definition: vf_nnedi.c:97
static void dot_prod(NNEDIContext *s, const float *data, const float *weights, float *vals, const int n, const int len, const float *scale)
Definition: vf_nnedi.c:268
int padded_height[3]
Definition: vf_nnedi.c:38
BYTE int const BYTE * srcp
Definition: avisynth_c.h:813
const char * name
Pad name.
Definition: internal.h:60
AVFilterLink ** inputs
array of pointers to input links
Definition: avfilter.h:346
int dst_stride[3]
Definition: vf_nnedi.c:41
static int request_frame(AVFilterLink *link)
Definition: vf_nnedi.c:879
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1080
static void elliott(float *data, const int n)
Definition: vf_nnedi.c:260
#define NUM_NSIZE
Definition: vf_nnedi.c:635
uint8_t
#define av_cold
Definition: attributes.h:82
#define av_malloc(s)
AVOptions.
AVFrame * src
Definition: vf_nnedi.c:55
static const AVFilterPad outputs[]
Definition: vf_nnedi.c:1190
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:311
int process_plane
Definition: vf_nnedi.c:76
static void copy_pad(const AVFrame *src, FrameData *frame_data, NNEDIContext *s, int fn)
Definition: vf_nnedi.c:204
const char data[16]
Definition: mxf.c:90
#define height
planar YUV 4:4:0 full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV440P and setting color_range...
Definition: pixfmt.h:96
static int flags
Definition: log.c:55
planar YUV 4:2:2, 16bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV422P and setting col...
Definition: pixfmt.h:75
#define AVERROR_EOF
End of file.
Definition: error.h:55
int interlaced_frame
The content of the picture is interlaced.
Definition: frame.h:365
static av_cold void uninit(AVFilterContext *ctx)
Definition: vf_nnedi.c:1159
const float min_weight_sum
Definition: vf_nnedi.c:560
static void evalfunc_1(NNEDIContext *s, FrameData *frame_data)
Definition: vf_nnedi.c:578
static int roundds(const double f)
Definition: vf_nnedi.c:638
#define av_log(a,...)
static int modnpf(const int m, const int n)
Definition: vf_nnedi.c:682
static const float exp_hi
Definition: vf_nnedi.c:550
A filter pad used for either input or output.
Definition: internal.h:54
AVFILTER_DEFINE_CLASS(nnedi)
#define OFFSET(x)
Definition: vf_nnedi.c:104
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:568
uint8_t log2_chroma_h
Amount to shift the luma height right to find the chroma height.
Definition: pixdesc.h:101
AVFloatDSPContext * fdsp
Definition: vf_nnedi.c:61
av_cold AVFloatDSPContext * avpriv_float_dsp_alloc(int bit_exact)
Allocate a float DSP context.
Definition: float_dsp.c:127
static const uint16_t mask[17]
Definition: lzw.c:38
static void select_functions(NNEDIContext *s)
Definition: vf_nnedi.c:645
#define AVERROR(e)
Definition: error.h:43
int32_t(* process_line0)(const uint8_t *, int, uint8_t *, const uint8_t *, const int, const int, const int)
Definition: vf_nnedi.c:93
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:202
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:186
void * priv
private data for use by the filter
Definition: avfilter.h:353
uint16_t width
Definition: gdv.c:47
int padded_width[3]
Definition: vf_nnedi.c:37
#define FFMAX(a, b)
Definition: common.h:94
#define fail()
Definition: checkasm.h:116
int8_t exp
Definition: eval.c:72
static int get_frame(AVFilterContext *ctx, int is_second)
Definition: vf_nnedi.c:689
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:66
float * input
Definition: vf_nnedi.c:46
static void weighted_avg_elliott_mul5_m16(const float *w, const int n, float *mstd)
Definition: vf_nnedi.c:562
void(* expfunc)(float *, const int)
Definition: vf_nnedi.c:98
static int query_formats(AVFilterContext *ctx)
Definition: vf_nnedi.c:184
void(* evalfunc_0)(struct NNEDIContext *, FrameData *)
Definition: vf_nnedi.c:87
#define FFMIN(a, b)
Definition: common.h:96
planar YUV 4:2:0, 12bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV420P and setting col...
Definition: pixfmt.h:74
int nnsparam
Definition: vf_nnedi.c:78
uint8_t w
Definition: llviddspenc.c:38
static const AVFilterPad inputs[]
Definition: vf_nnedi.c:1180
int32_t
AVFormatContext * ctx
Definition: movenc.c:48
AVFilter ff_vf_nnedi
Definition: vf_nnedi.c:1200
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:72
void(* copy_pad)(const AVFrame *, FrameData *, struct NNEDIContext *, int)
Definition: vf_nnedi.c:86
int n
Definition: avisynth_c.h:684
uint8_t * dstp[3]
Definition: vf_nnedi.c:40
static void evalfunc_0(NNEDIContext *s, FrameData *frame_data)
Definition: vf_nnedi.c:418
AVFrame * av_frame_clone(const AVFrame *src)
Create a new frame that references the same data as src.
Definition: frame.c:538
void(* readpixels)(const uint8_t *, const int, float *)
Definition: vf_nnedi.c:91
static void extract_m8(const uint8_t *srcp8, const int stride, const int xdia, const int ydia, float *mstd, float *input)
Definition: vf_nnedi.c:487
void(* compute_network0)(struct NNEDIContext *s, const float *, const float *, uint8_t *)
Definition: vf_nnedi.c:92
void(* extract)(const uint8_t *, const int, const int, const int, float *, float *)
Definition: vf_nnedi.c:96
static void e2_m16(float *s, const int n)
Definition: vf_nnedi.c:552
typedef void(RENAME(mix_any_func_type))
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:249
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:81
int padded_stride[3]
Definition: vf_nnedi.c:36
planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)
Definition: pixfmt.h:68
Describe the class of an AVClass context structure.
Definition: log.h:67
Filter definition.
Definition: avfilter.h:144
int av_image_fill_linesizes(int linesizes[4], enum AVPixelFormat pix_fmt, int width)
Fill plane linesizes for an image with pixel format pix_fmt and width width.
Definition: imgutils.c:89
int32_t * lcount[3]
Definition: vf_nnedi.c:45
Rational number (pair of numerator and denominator).
Definition: rational.h:58
static void compute_network0new(NNEDIContext *s, const float *datai, const float *weights, uint8_t *d)
Definition: vf_nnedi.c:383
static void byte2word48(const uint8_t *t, const int pitch, float *pf)
Definition: vf_nnedi.c:342
const char * name
Filter name.
Definition: avfilter.h:148
static void pixel2float48(const uint8_t *t8, const int pitch, float *p)
Definition: vf_nnedi.c:332
#define AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL
Same as AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, except that the filter will have its filter_frame() c...
Definition: avfilter.h:133
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:350
static enum AVPixelFormat pix_fmts[]
Definition: libkvazaar.c:266
AVFrame * dst
Definition: vf_nnedi.c:57
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:232
int fapprox
Definition: vf_nnedi.c:82
static int32_t process_line0(const uint8_t *tempu, int width, uint8_t *dstp8, const uint8_t *src3p8, const int src_pitch, const int max_value, const int chroma)
Definition: vf_nnedi.c:352
int
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:62
Y , 8bpp.
Definition: pixfmt.h:70
common internal and external API header
static double c[64]
static int config_output(AVFilterLink *outlink)
Definition: vf_nnedi.c:167
#define NUM_NNS
Definition: vf_nnedi.c:636
planar YUV 4:4:4, 24bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV444P and setting col...
Definition: pixfmt.h:76
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
Definition: pixfmt.h:69
int den
Denominator.
Definition: rational.h:60
int nb_planes
Definition: vf_nnedi.c:62
int linesize[4]
Definition: vf_nnedi.c:63
float * weights0
Definition: vf_nnedi.c:66
#define av_free(p)
int top_field_first
If the content is interlaced, is top field displayed first.
Definition: frame.h:370
static av_cold int init(AVFilterContext *ctx)
Definition: vf_nnedi.c:907
int len
void(* wae5)(const float *, const int, float *)
Definition: vf_nnedi.c:99
static void byte2word64(const uint8_t *t, const int pitch, float *p)
Definition: vf_nnedi.c:373
int planeheight[4]
Definition: vf_nnedi.c:64
A list of supported formats for one end of a filter link.
Definition: formats.h:64
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples) full scale (JPEG), deprecated in favor ...
Definition: pixfmt.h:254
An instance of a filter.
Definition: avfilter.h:338
AVRational av_mul_q(AVRational b, AVRational c)
Multiply two rationals.
Definition: rational.c:80
static const AVOption nnedi_options[]
Definition: vf_nnedi.c:107
#define av_freep(p)
uint8_t * paddedp[3]
Definition: vf_nnedi.c:35
void INT64 INT64 count
Definition: avisynth_c.h:690
planar YUV 4:4:0 (1 Cr & Cb sample per 1x2 Y samples)
Definition: pixfmt.h:95
#define av_malloc_array(a, b)
static void dot_prods(NNEDIContext *s, const float *dataf, const float *weightsf, float *vals, const int n, const int len, const float *scale)
Definition: vf_nnedi.c:281
int ff_request_frame(AVFilterLink *link)
Request an input frame from the filter at the other end of the link.
Definition: avfilter.c:407
void av_image_copy_plane(uint8_t *dst, int dst_linesize, const uint8_t *src, int src_linesize, int bytewidth, int height)
Copy image plane from src to dst.
Definition: imgutils.c:338
internal API functions
static void extract_m8_i16(const uint8_t *srcp, const int stride, const int xdia, const int ydia, float *mstd, float *inputf)
Definition: vf_nnedi.c:520
int(* extract)(AVBSFContext *ctx, AVPacket *pkt, uint8_t **data, int *size)
AVPixelFormat
Pixel format.
Definition: pixfmt.h:60
for(j=16;j >0;--j)
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:652
#define AV_NOPTS_VALUE
Undefined timestamp value.
Definition: avutil.h:248
static const float exp_lo
Definition: vf_nnedi.c:549
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:58
static uint8_t tmp[11]
Definition: aes_ctr.c:26