FFmpeg  4.0
af_loudnorm.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016 Kyle Swanson <k@ylo.ph>.
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /* http://k.ylo.ph/2016/04/04/loudnorm.html */
22 
23 #include "libavutil/opt.h"
24 #include "avfilter.h"
25 #include "internal.h"
26 #include "audio.h"
27 #include "ebur128.h"
28 
29 enum FrameType {
35 };
36 
38  OUT,
43 };
44 
50 };
51 
52 typedef struct LoudNormContext {
53  const AVClass *class;
54  double target_i;
55  double target_lra;
56  double target_tp;
57  double measured_i;
58  double measured_lra;
59  double measured_tp;
61  double offset;
62  int linear;
63  int dual_mono;
65 
66  double *buf;
67  int buf_size;
68  int buf_index;
70 
71  double delta[30];
72  double weights[21];
73  double prev_delta;
74  int index;
75 
76  double gain_reduction[2];
77  double *limiter_buf;
78  double *prev_smp;
83  int env_index;
84  int env_cnt;
87 
88  int64_t pts;
92  int channels;
93 
97 
98 #define OFFSET(x) offsetof(LoudNormContext, x)
99 #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
100 
101 static const AVOption loudnorm_options[] = {
102  { "I", "set integrated loudness target", OFFSET(target_i), AV_OPT_TYPE_DOUBLE, {.dbl = -24.}, -70., -5., FLAGS },
103  { "i", "set integrated loudness target", OFFSET(target_i), AV_OPT_TYPE_DOUBLE, {.dbl = -24.}, -70., -5., FLAGS },
104  { "LRA", "set loudness range target", OFFSET(target_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 7.}, 1., 20., FLAGS },
105  { "lra", "set loudness range target", OFFSET(target_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 7.}, 1., 20., FLAGS },
106  { "TP", "set maximum true peak", OFFSET(target_tp), AV_OPT_TYPE_DOUBLE, {.dbl = -2.}, -9., 0., FLAGS },
107  { "tp", "set maximum true peak", OFFSET(target_tp), AV_OPT_TYPE_DOUBLE, {.dbl = -2.}, -9., 0., FLAGS },
108  { "measured_I", "measured IL of input file", OFFSET(measured_i), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, -99., 0., FLAGS },
109  { "measured_i", "measured IL of input file", OFFSET(measured_i), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, -99., 0., FLAGS },
110  { "measured_LRA", "measured LRA of input file", OFFSET(measured_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, 0., 99., FLAGS },
111  { "measured_lra", "measured LRA of input file", OFFSET(measured_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, 0., 99., FLAGS },
112  { "measured_TP", "measured true peak of input file", OFFSET(measured_tp), AV_OPT_TYPE_DOUBLE, {.dbl = 99.}, -99., 99., FLAGS },
113  { "measured_tp", "measured true peak of input file", OFFSET(measured_tp), AV_OPT_TYPE_DOUBLE, {.dbl = 99.}, -99., 99., FLAGS },
114  { "measured_thresh", "measured threshold of input file", OFFSET(measured_thresh), AV_OPT_TYPE_DOUBLE, {.dbl = -70.}, -99., 0., FLAGS },
115  { "offset", "set offset gain", OFFSET(offset), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, -99., 99., FLAGS },
116  { "linear", "normalize linearly if possible", OFFSET(linear), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS },
117  { "dual_mono", "treat mono input as dual-mono", OFFSET(dual_mono), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS },
118  { "print_format", "set print format for stats", OFFSET(print_format), AV_OPT_TYPE_INT, {.i64 = NONE}, NONE, PF_NB -1, FLAGS, "print_format" },
119  { "none", 0, 0, AV_OPT_TYPE_CONST, {.i64 = NONE}, 0, 0, FLAGS, "print_format" },
120  { "json", 0, 0, AV_OPT_TYPE_CONST, {.i64 = JSON}, 0, 0, FLAGS, "print_format" },
121  { "summary", 0, 0, AV_OPT_TYPE_CONST, {.i64 = SUMMARY}, 0, 0, FLAGS, "print_format" },
122  { NULL }
123 };
124 
125 AVFILTER_DEFINE_CLASS(loudnorm);
126 
127 static inline int frame_size(int sample_rate, int frame_len_msec)
128 {
129  const int frame_size = round((double)sample_rate * (frame_len_msec / 1000.0));
130  return frame_size + (frame_size % 2);
131 }
132 
134 {
135  double total_weight = 0.0;
136  const double sigma = 3.5;
137  double adjust;
138  int i;
139 
140  const int offset = 21 / 2;
141  const double c1 = 1.0 / (sigma * sqrt(2.0 * M_PI));
142  const double c2 = 2.0 * pow(sigma, 2.0);
143 
144  for (i = 0; i < 21; i++) {
145  const int x = i - offset;
146  s->weights[i] = c1 * exp(-(pow(x, 2.0) / c2));
147  total_weight += s->weights[i];
148  }
149 
150  adjust = 1.0 / total_weight;
151  for (i = 0; i < 21; i++)
152  s->weights[i] *= adjust;
153 }
154 
156 {
157  double result = 0.;
158  int i;
159 
160  index = index - 10 > 0 ? index - 10 : index + 20;
161  for (i = 0; i < 21; i++)
162  result += s->delta[((index + i) < 30) ? (index + i) : (index + i - 30)] * s->weights[i];
163 
164  return result;
165 }
166 
167 static void detect_peak(LoudNormContext *s, int offset, int nb_samples, int channels, int *peak_delta, double *peak_value)
168 {
169  int n, c, i, index;
170  double ceiling;
171  double *buf;
172 
173  *peak_delta = -1;
174  buf = s->limiter_buf;
175  ceiling = s->target_tp;
176 
177  index = s->limiter_buf_index + (offset * channels) + (1920 * channels);
178  if (index >= s->limiter_buf_size)
179  index -= s->limiter_buf_size;
180 
181  if (s->frame_type == FIRST_FRAME) {
182  for (c = 0; c < channels; c++)
183  s->prev_smp[c] = fabs(buf[index + c - channels]);
184  }
185 
186  for (n = 0; n < nb_samples; n++) {
187  for (c = 0; c < channels; c++) {
188  double this, next, max_peak;
189 
190  this = fabs(buf[(index + c) < s->limiter_buf_size ? (index + c) : (index + c - s->limiter_buf_size)]);
191  next = fabs(buf[(index + c + channels) < s->limiter_buf_size ? (index + c + channels) : (index + c + channels - s->limiter_buf_size)]);
192 
193  if ((s->prev_smp[c] <= this) && (next <= this) && (this > ceiling) && (n > 0)) {
194  int detected;
195 
196  detected = 1;
197  for (i = 2; i < 12; i++) {
198  next = fabs(buf[(index + c + (i * channels)) < s->limiter_buf_size ? (index + c + (i * channels)) : (index + c + (i * channels) - s->limiter_buf_size)]);
199  if (next > this) {
200  detected = 0;
201  break;
202  }
203  }
204 
205  if (!detected)
206  continue;
207 
208  for (c = 0; c < channels; c++) {
209  if (c == 0 || fabs(buf[index + c]) > max_peak)
210  max_peak = fabs(buf[index + c]);
211 
212  s->prev_smp[c] = fabs(buf[(index + c) < s->limiter_buf_size ? (index + c) : (index + c - s->limiter_buf_size)]);
213  }
214 
215  *peak_delta = n;
216  s->peak_index = index;
217  *peak_value = max_peak;
218  return;
219  }
220 
221  s->prev_smp[c] = this;
222  }
223 
224  index += channels;
225  if (index >= s->limiter_buf_size)
226  index -= s->limiter_buf_size;
227  }
228 }
229 
230 static void true_peak_limiter(LoudNormContext *s, double *out, int nb_samples, int channels)
231 {
232  int n, c, index, peak_delta, smp_cnt;
233  double ceiling, peak_value;
234  double *buf;
235 
236  buf = s->limiter_buf;
237  ceiling = s->target_tp;
238  index = s->limiter_buf_index;
239  smp_cnt = 0;
240 
241  if (s->frame_type == FIRST_FRAME) {
242  double max;
243 
244  max = 0.;
245  for (n = 0; n < 1920; n++) {
246  for (c = 0; c < channels; c++) {
247  max = fabs(buf[c]) > max ? fabs(buf[c]) : max;
248  }
249  buf += channels;
250  }
251 
252  if (max > ceiling) {
253  s->gain_reduction[1] = ceiling / max;
254  s->limiter_state = SUSTAIN;
255  buf = s->limiter_buf;
256 
257  for (n = 0; n < 1920; n++) {
258  for (c = 0; c < channels; c++) {
259  double env;
260  env = s->gain_reduction[1];
261  buf[c] *= env;
262  }
263  buf += channels;
264  }
265  }
266 
267  buf = s->limiter_buf;
268  }
269 
270  do {
271 
272  switch(s->limiter_state) {
273  case OUT:
274  detect_peak(s, smp_cnt, nb_samples - smp_cnt, channels, &peak_delta, &peak_value);
275  if (peak_delta != -1) {
276  s->env_cnt = 0;
277  smp_cnt += (peak_delta - s->attack_length);
278  s->gain_reduction[0] = 1.;
279  s->gain_reduction[1] = ceiling / peak_value;
280  s->limiter_state = ATTACK;
281 
282  s->env_index = s->peak_index - (s->attack_length * channels);
283  if (s->env_index < 0)
284  s->env_index += s->limiter_buf_size;
285 
286  s->env_index += (s->env_cnt * channels);
287  if (s->env_index > s->limiter_buf_size)
288  s->env_index -= s->limiter_buf_size;
289 
290  } else {
291  smp_cnt = nb_samples;
292  }
293  break;
294 
295  case ATTACK:
296  for (; s->env_cnt < s->attack_length; s->env_cnt++) {
297  for (c = 0; c < channels; c++) {
298  double env;
299  env = s->gain_reduction[0] - ((double) s->env_cnt / (s->attack_length - 1) * (s->gain_reduction[0] - s->gain_reduction[1]));
300  buf[s->env_index + c] *= env;
301  }
302 
303  s->env_index += channels;
304  if (s->env_index >= s->limiter_buf_size)
305  s->env_index -= s->limiter_buf_size;
306 
307  smp_cnt++;
308  if (smp_cnt >= nb_samples) {
309  s->env_cnt++;
310  break;
311  }
312  }
313 
314  if (smp_cnt < nb_samples) {
315  s->env_cnt = 0;
316  s->attack_length = 1920;
317  s->limiter_state = SUSTAIN;
318  }
319  break;
320 
321  case SUSTAIN:
322  detect_peak(s, smp_cnt, nb_samples, channels, &peak_delta, &peak_value);
323  if (peak_delta == -1) {
324  s->limiter_state = RELEASE;
325  s->gain_reduction[0] = s->gain_reduction[1];
326  s->gain_reduction[1] = 1.;
327  s->env_cnt = 0;
328  break;
329  } else {
330  double gain_reduction;
331  gain_reduction = ceiling / peak_value;
332 
333  if (gain_reduction < s->gain_reduction[1]) {
334  s->limiter_state = ATTACK;
335 
336  s->attack_length = peak_delta;
337  if (s->attack_length <= 1)
338  s->attack_length = 2;
339 
340  s->gain_reduction[0] = s->gain_reduction[1];
342  s->env_cnt = 0;
343  break;
344  }
345 
346  for (s->env_cnt = 0; s->env_cnt < peak_delta; s->env_cnt++) {
347  for (c = 0; c < channels; c++) {
348  double env;
349  env = s->gain_reduction[1];
350  buf[s->env_index + c] *= env;
351  }
352 
353  s->env_index += channels;
354  if (s->env_index >= s->limiter_buf_size)
355  s->env_index -= s->limiter_buf_size;
356 
357  smp_cnt++;
358  if (smp_cnt >= nb_samples) {
359  s->env_cnt++;
360  break;
361  }
362  }
363  }
364  break;
365 
366  case RELEASE:
367  for (; s->env_cnt < s->release_length; s->env_cnt++) {
368  for (c = 0; c < channels; c++) {
369  double env;
370  env = s->gain_reduction[0] + (((double) s->env_cnt / (s->release_length - 1)) * (s->gain_reduction[1] - s->gain_reduction[0]));
371  buf[s->env_index + c] *= env;
372  }
373 
374  s->env_index += channels;
375  if (s->env_index >= s->limiter_buf_size)
376  s->env_index -= s->limiter_buf_size;
377 
378  smp_cnt++;
379  if (smp_cnt >= nb_samples) {
380  s->env_cnt++;
381  break;
382  }
383  }
384 
385  if (smp_cnt < nb_samples) {
386  s->env_cnt = 0;
387  s->limiter_state = OUT;
388  }
389 
390  break;
391  }
392 
393  } while (smp_cnt < nb_samples);
394 
395  for (n = 0; n < nb_samples; n++) {
396  for (c = 0; c < channels; c++) {
397  out[c] = buf[index + c];
398  if (fabs(out[c]) > ceiling) {
399  out[c] = ceiling * (out[c] < 0 ? -1 : 1);
400  }
401  }
402  out += channels;
403  index += channels;
404  if (index >= s->limiter_buf_size)
405  index -= s->limiter_buf_size;
406  }
407 }
408 
409 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
410 {
411  AVFilterContext *ctx = inlink->dst;
412  LoudNormContext *s = ctx->priv;
413  AVFilterLink *outlink = ctx->outputs[0];
414  AVFrame *out;
415  const double *src;
416  double *dst;
417  double *buf;
418  double *limiter_buf;
419  int i, n, c, subframe_length, src_index;
420  double gain, gain_next, env_global, env_shortterm,
421  global, shortterm, lra, relative_threshold;
422 
423  if (av_frame_is_writable(in)) {
424  out = in;
425  } else {
426  out = ff_get_audio_buffer(outlink, in->nb_samples);
427  if (!out) {
428  av_frame_free(&in);
429  return AVERROR(ENOMEM);
430  }
431  av_frame_copy_props(out, in);
432  }
433 
434  if (s->pts == AV_NOPTS_VALUE)
435  s->pts = in->pts;
436 
437  out->pts = s->pts;
438  src = (const double *)in->data[0];
439  dst = (double *)out->data[0];
440  buf = s->buf;
441  limiter_buf = s->limiter_buf;
442 
444 
445  if (s->frame_type == FIRST_FRAME && in->nb_samples < frame_size(inlink->sample_rate, 3000)) {
446  double offset, offset_tp, true_peak;
447 
448  ff_ebur128_loudness_global(s->r128_in, &global);
449  for (c = 0; c < inlink->channels; c++) {
450  double tmp;
451  ff_ebur128_sample_peak(s->r128_in, c, &tmp);
452  if (c == 0 || tmp > true_peak)
453  true_peak = tmp;
454  }
455 
456  offset = s->target_i - global;
457  offset_tp = true_peak + offset;
458  s->offset = offset_tp < s->target_tp ? offset : s->target_tp - true_peak;
459  s->offset = pow(10., s->offset / 20.);
460  s->frame_type = LINEAR_MODE;
461  }
462 
463  switch (s->frame_type) {
464  case FIRST_FRAME:
465  for (n = 0; n < in->nb_samples; n++) {
466  for (c = 0; c < inlink->channels; c++) {
467  buf[s->buf_index + c] = src[c];
468  }
469  src += inlink->channels;
470  s->buf_index += inlink->channels;
471  }
472 
473  ff_ebur128_loudness_shortterm(s->r128_in, &shortterm);
474 
475  if (shortterm < s->measured_thresh) {
476  s->above_threshold = 0;
477  env_shortterm = shortterm <= -70. ? 0. : s->target_i - s->measured_i;
478  } else {
479  s->above_threshold = 1;
480  env_shortterm = shortterm <= -70. ? 0. : s->target_i - shortterm;
481  }
482 
483  for (n = 0; n < 30; n++)
484  s->delta[n] = pow(10., env_shortterm / 20.);
485  s->prev_delta = s->delta[s->index];
486 
487  s->buf_index =
488  s->limiter_buf_index = 0;
489 
490  for (n = 0; n < (s->limiter_buf_size / inlink->channels); n++) {
491  for (c = 0; c < inlink->channels; c++) {
492  limiter_buf[s->limiter_buf_index + c] = buf[s->buf_index + c] * s->delta[s->index] * s->offset;
493  }
494  s->limiter_buf_index += inlink->channels;
495  if (s->limiter_buf_index >= s->limiter_buf_size)
497 
498  s->buf_index += inlink->channels;
499  }
500 
501  subframe_length = frame_size(inlink->sample_rate, 100);
502  true_peak_limiter(s, dst, subframe_length, inlink->channels);
503  ff_ebur128_add_frames_double(s->r128_out, dst, subframe_length);
504 
505  s->pts +=
506  out->nb_samples =
507  inlink->min_samples =
508  inlink->max_samples =
509  inlink->partial_buf_size = subframe_length;
510 
511  s->frame_type = INNER_FRAME;
512  break;
513 
514  case INNER_FRAME:
515  gain = gaussian_filter(s, s->index + 10 < 30 ? s->index + 10 : s->index + 10 - 30);
516  gain_next = gaussian_filter(s, s->index + 11 < 30 ? s->index + 11 : s->index + 11 - 30);
517 
518  for (n = 0; n < in->nb_samples; n++) {
519  for (c = 0; c < inlink->channels; c++) {
520  buf[s->prev_buf_index + c] = src[c];
521  limiter_buf[s->limiter_buf_index + c] = buf[s->buf_index + c] * (gain + (((double) n / in->nb_samples) * (gain_next - gain))) * s->offset;
522  }
523  src += inlink->channels;
524 
525  s->limiter_buf_index += inlink->channels;
526  if (s->limiter_buf_index >= s->limiter_buf_size)
528 
529  s->prev_buf_index += inlink->channels;
530  if (s->prev_buf_index >= s->buf_size)
531  s->prev_buf_index -= s->buf_size;
532 
533  s->buf_index += inlink->channels;
534  if (s->buf_index >= s->buf_size)
535  s->buf_index -= s->buf_size;
536  }
537 
538  subframe_length = (frame_size(inlink->sample_rate, 100) - in->nb_samples) * inlink->channels;
539  s->limiter_buf_index = s->limiter_buf_index + subframe_length < s->limiter_buf_size ? s->limiter_buf_index + subframe_length : s->limiter_buf_index + subframe_length - s->limiter_buf_size;
540 
541  true_peak_limiter(s, dst, in->nb_samples, inlink->channels);
543 
545  ff_ebur128_loudness_global(s->r128_in, &global);
546  ff_ebur128_loudness_shortterm(s->r128_in, &shortterm);
547  ff_ebur128_relative_threshold(s->r128_in, &relative_threshold);
548 
549  if (s->above_threshold == 0) {
550  double shortterm_out;
551 
552  if (shortterm > s->measured_thresh)
553  s->prev_delta *= 1.0058;
554 
555  ff_ebur128_loudness_shortterm(s->r128_out, &shortterm_out);
556  if (shortterm_out >= s->target_i)
557  s->above_threshold = 1;
558  }
559 
560  if (shortterm < relative_threshold || shortterm <= -70. || s->above_threshold == 0) {
561  s->delta[s->index] = s->prev_delta;
562  } else {
563  env_global = fabs(shortterm - global) < (s->target_lra / 2.) ? shortterm - global : (s->target_lra / 2.) * ((shortterm - global) < 0 ? -1 : 1);
564  env_shortterm = s->target_i - shortterm;
565  s->delta[s->index] = pow(10., (env_global + env_shortterm) / 20.);
566  }
567 
568  s->prev_delta = s->delta[s->index];
569  s->index++;
570  if (s->index >= 30)
571  s->index -= 30;
572  s->prev_nb_samples = in->nb_samples;
573  s->pts += in->nb_samples;
574  break;
575 
576  case FINAL_FRAME:
577  gain = gaussian_filter(s, s->index + 10 < 30 ? s->index + 10 : s->index + 10 - 30);
578  s->limiter_buf_index = 0;
579  src_index = 0;
580 
581  for (n = 0; n < s->limiter_buf_size / inlink->channels; n++) {
582  for (c = 0; c < inlink->channels; c++) {
583  s->limiter_buf[s->limiter_buf_index + c] = src[src_index + c] * gain * s->offset;
584  }
585  src_index += inlink->channels;
586 
587  s->limiter_buf_index += inlink->channels;
588  if (s->limiter_buf_index >= s->limiter_buf_size)
590  }
591 
592  subframe_length = frame_size(inlink->sample_rate, 100);
593  for (i = 0; i < in->nb_samples / subframe_length; i++) {
594  true_peak_limiter(s, dst, subframe_length, inlink->channels);
595 
596  for (n = 0; n < subframe_length; n++) {
597  for (c = 0; c < inlink->channels; c++) {
598  if (src_index < (in->nb_samples * inlink->channels)) {
599  limiter_buf[s->limiter_buf_index + c] = src[src_index + c] * gain * s->offset;
600  } else {
601  limiter_buf[s->limiter_buf_index + c] = 0.;
602  }
603  }
604 
605  if (src_index < (in->nb_samples * inlink->channels))
606  src_index += inlink->channels;
607 
608  s->limiter_buf_index += inlink->channels;
609  if (s->limiter_buf_index >= s->limiter_buf_size)
611  }
612 
613  dst += (subframe_length * inlink->channels);
614  }
615 
616  dst = (double *)out->data[0];
618  break;
619 
620  case LINEAR_MODE:
621  for (n = 0; n < in->nb_samples; n++) {
622  for (c = 0; c < inlink->channels; c++) {
623  dst[c] = src[c] * s->offset;
624  }
625  src += inlink->channels;
626  dst += inlink->channels;
627  }
628 
629  dst = (double *)out->data[0];
631  s->pts += in->nb_samples;
632  break;
633  }
634 
635  if (in != out)
636  av_frame_free(&in);
637 
638  return ff_filter_frame(outlink, out);
639 }
640 
641 static int request_frame(AVFilterLink *outlink)
642 {
643  int ret;
644  AVFilterContext *ctx = outlink->src;
645  AVFilterLink *inlink = ctx->inputs[0];
646  LoudNormContext *s = ctx->priv;
647 
648  ret = ff_request_frame(inlink);
649  if (ret == AVERROR_EOF && s->frame_type == INNER_FRAME) {
650  double *src;
651  double *buf;
652  int nb_samples, n, c, offset;
653  AVFrame *frame;
654 
655  nb_samples = (s->buf_size / inlink->channels) - s->prev_nb_samples;
656  nb_samples -= (frame_size(inlink->sample_rate, 100) - s->prev_nb_samples);
657 
658  frame = ff_get_audio_buffer(outlink, nb_samples);
659  if (!frame)
660  return AVERROR(ENOMEM);
661  frame->nb_samples = nb_samples;
662 
663  buf = s->buf;
664  src = (double *)frame->data[0];
665 
666  offset = ((s->limiter_buf_size / inlink->channels) - s->prev_nb_samples) * inlink->channels;
667  offset -= (frame_size(inlink->sample_rate, 100) - s->prev_nb_samples) * inlink->channels;
668  s->buf_index = s->buf_index - offset < 0 ? s->buf_index - offset + s->buf_size : s->buf_index - offset;
669 
670  for (n = 0; n < nb_samples; n++) {
671  for (c = 0; c < inlink->channels; c++) {
672  src[c] = buf[s->buf_index + c];
673  }
674  src += inlink->channels;
675  s->buf_index += inlink->channels;
676  if (s->buf_index >= s->buf_size)
677  s->buf_index -= s->buf_size;
678  }
679 
680  s->frame_type = FINAL_FRAME;
681  ret = filter_frame(inlink, frame);
682  }
683  return ret;
684 }
685 
687 {
688  LoudNormContext *s = ctx->priv;
691  AVFilterLink *inlink = ctx->inputs[0];
692  AVFilterLink *outlink = ctx->outputs[0];
693  static const int input_srate[] = {192000, -1};
694  static const enum AVSampleFormat sample_fmts[] = {
697  };
698  int ret;
699 
700  layouts = ff_all_channel_counts();
701  if (!layouts)
702  return AVERROR(ENOMEM);
703  ret = ff_set_common_channel_layouts(ctx, layouts);
704  if (ret < 0)
705  return ret;
706 
707  formats = ff_make_format_list(sample_fmts);
708  if (!formats)
709  return AVERROR(ENOMEM);
710  ret = ff_set_common_formats(ctx, formats);
711  if (ret < 0)
712  return ret;
713 
714  if (s->frame_type != LINEAR_MODE) {
715  formats = ff_make_format_list(input_srate);
716  if (!formats)
717  return AVERROR(ENOMEM);
718  ret = ff_formats_ref(formats, &inlink->out_samplerates);
719  if (ret < 0)
720  return ret;
721  ret = ff_formats_ref(formats, &outlink->in_samplerates);
722  if (ret < 0)
723  return ret;
724  }
725 
726  return 0;
727 }
728 
729 static int config_input(AVFilterLink *inlink)
730 {
731  AVFilterContext *ctx = inlink->dst;
732  LoudNormContext *s = ctx->priv;
733 
735  if (!s->r128_in)
736  return AVERROR(ENOMEM);
737 
739  if (!s->r128_out)
740  return AVERROR(ENOMEM);
741 
742  if (inlink->channels == 1 && s->dual_mono) {
745  }
746 
747  s->buf_size = frame_size(inlink->sample_rate, 3000) * inlink->channels;
748  s->buf = av_malloc_array(s->buf_size, sizeof(*s->buf));
749  if (!s->buf)
750  return AVERROR(ENOMEM);
751 
752  s->limiter_buf_size = frame_size(inlink->sample_rate, 210) * inlink->channels;
753  s->limiter_buf = av_malloc_array(s->buf_size, sizeof(*s->limiter_buf));
754  if (!s->limiter_buf)
755  return AVERROR(ENOMEM);
756 
757  s->prev_smp = av_malloc_array(inlink->channels, sizeof(*s->prev_smp));
758  if (!s->prev_smp)
759  return AVERROR(ENOMEM);
760 
762 
763  if (s->frame_type != LINEAR_MODE) {
764  inlink->min_samples =
765  inlink->max_samples =
766  inlink->partial_buf_size = frame_size(inlink->sample_rate, 3000);
767  }
768 
769  s->pts = AV_NOPTS_VALUE;
770  s->buf_index =
771  s->prev_buf_index =
772  s->limiter_buf_index = 0;
773  s->channels = inlink->channels;
774  s->index = 1;
775  s->limiter_state = OUT;
776  s->offset = pow(10., s->offset / 20.);
777  s->target_tp = pow(10., s->target_tp / 20.);
778  s->attack_length = frame_size(inlink->sample_rate, 10);
779  s->release_length = frame_size(inlink->sample_rate, 100);
780 
781  return 0;
782 }
783 
785 {
786  LoudNormContext *s = ctx->priv;
787  s->frame_type = FIRST_FRAME;
788 
789  if (s->linear) {
790  double offset, offset_tp;
791  offset = s->target_i - s->measured_i;
792  offset_tp = s->measured_tp + offset;
793 
794  if (s->measured_tp != 99 && s->measured_thresh != -70 && s->measured_lra != 0 && s->measured_i != 0) {
795  if ((offset_tp <= s->target_tp) && (s->measured_lra <= s->target_lra)) {
796  s->frame_type = LINEAR_MODE;
797  s->offset = offset;
798  }
799  }
800  }
801 
802  return 0;
803 }
804 
806 {
807  LoudNormContext *s = ctx->priv;
808  double i_in, i_out, lra_in, lra_out, thresh_in, thresh_out, tp_in, tp_out;
809  int c;
810 
811  if (!s->r128_in || !s->r128_out)
812  goto end;
813 
814  ff_ebur128_loudness_range(s->r128_in, &lra_in);
816  ff_ebur128_relative_threshold(s->r128_in, &thresh_in);
817  for (c = 0; c < s->channels; c++) {
818  double tmp;
819  ff_ebur128_sample_peak(s->r128_in, c, &tmp);
820  if ((c == 0) || (tmp > tp_in))
821  tp_in = tmp;
822  }
823 
824  ff_ebur128_loudness_range(s->r128_out, &lra_out);
826  ff_ebur128_relative_threshold(s->r128_out, &thresh_out);
827  for (c = 0; c < s->channels; c++) {
828  double tmp;
829  ff_ebur128_sample_peak(s->r128_out, c, &tmp);
830  if ((c == 0) || (tmp > tp_out))
831  tp_out = tmp;
832  }
833 
834  switch(s->print_format) {
835  case NONE:
836  break;
837 
838  case JSON:
839  av_log(ctx, AV_LOG_INFO,
840  "\n{\n"
841  "\t\"input_i\" : \"%.2f\",\n"
842  "\t\"input_tp\" : \"%.2f\",\n"
843  "\t\"input_lra\" : \"%.2f\",\n"
844  "\t\"input_thresh\" : \"%.2f\",\n"
845  "\t\"output_i\" : \"%.2f\",\n"
846  "\t\"output_tp\" : \"%+.2f\",\n"
847  "\t\"output_lra\" : \"%.2f\",\n"
848  "\t\"output_thresh\" : \"%.2f\",\n"
849  "\t\"normalization_type\" : \"%s\",\n"
850  "\t\"target_offset\" : \"%.2f\"\n"
851  "}\n",
852  i_in,
853  20. * log10(tp_in),
854  lra_in,
855  thresh_in,
856  i_out,
857  20. * log10(tp_out),
858  lra_out,
859  thresh_out,
860  s->frame_type == LINEAR_MODE ? "linear" : "dynamic",
861  s->target_i - i_out
862  );
863  break;
864 
865  case SUMMARY:
866  av_log(ctx, AV_LOG_INFO,
867  "\n"
868  "Input Integrated: %+6.1f LUFS\n"
869  "Input True Peak: %+6.1f dBTP\n"
870  "Input LRA: %6.1f LU\n"
871  "Input Threshold: %+6.1f LUFS\n"
872  "\n"
873  "Output Integrated: %+6.1f LUFS\n"
874  "Output True Peak: %+6.1f dBTP\n"
875  "Output LRA: %6.1f LU\n"
876  "Output Threshold: %+6.1f LUFS\n"
877  "\n"
878  "Normalization Type: %s\n"
879  "Target Offset: %+6.1f LU\n",
880  i_in,
881  20. * log10(tp_in),
882  lra_in,
883  thresh_in,
884  i_out,
885  20. * log10(tp_out),
886  lra_out,
887  thresh_out,
888  s->frame_type == LINEAR_MODE ? "Linear" : "Dynamic",
889  s->target_i - i_out
890  );
891  break;
892  }
893 
894 end:
895  if (s->r128_in)
897  if (s->r128_out)
899  av_freep(&s->limiter_buf);
900  av_freep(&s->prev_smp);
901  av_freep(&s->buf);
902 }
903 
905  {
906  .name = "default",
907  .type = AVMEDIA_TYPE_AUDIO,
908  .config_props = config_input,
909  .filter_frame = filter_frame,
910  },
911  { NULL }
912 };
913 
915  {
916  .name = "default",
917  .request_frame = request_frame,
918  .type = AVMEDIA_TYPE_AUDIO,
919  },
920  { NULL }
921 };
922 
924  .name = "loudnorm",
925  .description = NULL_IF_CONFIG_SMALL("EBU R128 loudness normalization"),
926  .priv_size = sizeof(LoudNormContext),
927  .priv_class = &loudnorm_class,
929  .init = init,
930  .uninit = uninit,
931  .inputs = avfilter_af_loudnorm_inputs,
932  .outputs = avfilter_af_loudnorm_outputs,
933 };
#define NULL
Definition: coverity.c:32
int ff_set_common_channel_layouts(AVFilterContext *ctx, AVFilterChannelLayouts *layouts)
A helper for query_formats() which sets all links to the same list of channel layouts/sample rates...
Definition: formats.c:549
const char * s
Definition: avisynth_c.h:768
This structure describes decoded (raw) audio or video data.
Definition: frame.h:218
AVOption.
Definition: opt.h:246
double weights[21]
Definition: af_loudnorm.c:72
Main libavfilter public API header.
int ff_ebur128_loudness_global(FFEBUR128State *st, double *out)
Get global integrated loudness in LUFS.
Definition: ebur128.c:603
channels
Definition: aptx.c:30
void ff_ebur128_destroy(FFEBUR128State **st)
Destroy library state.
Definition: ebur128.c:302
double * buf
Definition: af_loudnorm.c:66
double target_tp
Definition: af_loudnorm.c:56
can call ff_ebur128_loudness_global_* and ff_ebur128_relative_threshold
Definition: ebur128.h:89
AVFilter ff_af_loudnorm
Definition: af_loudnorm.c:923
a channel that is counted twice
Definition: ebur128.h:51
can call ff_ebur128_sample_peak
Definition: ebur128.h:93
#define src
Definition: vp8dsp.c:254
static int config_input(AVFilterLink *inlink)
Definition: af_loudnorm.c:729
FFEBUR128State * r128_in
Definition: af_loudnorm.c:94
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:283
void ff_ebur128_add_frames_double(FFEBUR128State *st, const double *src, size_t frames)
See ebur128_add_frames_short.
AVFILTER_DEFINE_CLASS(loudnorm)
const char * name
Pad name.
Definition: internal.h:60
AVFilterLink ** inputs
array of pointers to input links
Definition: avfilter.h:346
static const AVOption loudnorm_options[]
Definition: af_loudnorm.c:101
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1080
#define av_cold
Definition: attributes.h:82
double measured_tp
Definition: af_loudnorm.c:59
AVOptions.
int ff_ebur128_loudness_range(FFEBUR128State *st, double *out)
Get loudness range (LRA) of programme in LU.
Definition: ebur128.c:753
double measured_lra
Definition: af_loudnorm.c:58
static av_cold int end(AVCodecContext *avctx)
Definition: avrndec.c:90
static void detect_peak(LoudNormContext *s, int offset, int nb_samples, int channels, int *peak_delta, double *peak_value)
Definition: af_loudnorm.c:167
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:311
static AVFrame * frame
static const uint64_t c1
Definition: murmur3.c:49
#define AVERROR_EOF
End of file.
Definition: error.h:55
#define av_log(a,...)
enum PrintFormat print_format
Definition: af_loudnorm.c:64
A filter pad used for either input or output.
Definition: internal.h:54
FrameType
G723.1 frame types.
Definition: g723_1.h:63
double target_lra
Definition: af_loudnorm.c:55
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:568
can call ff_ebur128_loudness_shortterm
Definition: ebur128.h:87
static int request_frame(AVFilterLink *outlink)
Definition: af_loudnorm.c:641
AVFrame * ff_get_audio_buffer(AVFilterLink *link, int nb_samples)
Request an audio samples buffer with a specific set of permissions.
Definition: audio.c:86
#define AVERROR(e)
Definition: error.h:43
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:202
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:186
void * priv
private data for use by the filter
Definition: avfilter.h:353
int ff_ebur128_sample_peak(FFEBUR128State *st, unsigned int channel_number, double *out)
Get maximum sample peak of selected channel in float format.
Definition: ebur128.c:758
int ff_ebur128_loudness_shortterm(FFEBUR128State *st, double *out)
Get short-term loudness (last 3s) in LUFS.
Definition: ebur128.c:645
static av_always_inline av_const double round(double x)
Definition: libm.h:444
double * prev_smp
Definition: af_loudnorm.c:78
can call ff_ebur128_loudness_range
Definition: ebur128.h:91
int8_t exp
Definition: eval.c:72
static const AVFilterPad avfilter_af_loudnorm_inputs[]
Definition: af_loudnorm.c:904
double measured_i
Definition: af_loudnorm.c:57
enum LimiterState limiter_state
Definition: af_loudnorm.c:81
double measured_thresh
Definition: af_loudnorm.c:60
int ff_formats_ref(AVFilterFormats *f, AVFilterFormats **ref)
Add *ref as a new reference to formats.
Definition: formats.c:440
double * limiter_buf
Definition: af_loudnorm.c:77
AVFormatContext * ctx
Definition: movenc.c:48
Contains information about the state of a loudness measurement.
Definition: ebur128.h:103
int n
Definition: avisynth_c.h:684
static double gaussian_filter(LoudNormContext *s, int index)
Definition: af_loudnorm.c:155
FFEBUR128State * ff_ebur128_init(unsigned int channels, unsigned long samplerate, unsigned long window, int mode)
Initialize library state.
Definition: ebur128.c:217
static const AVFilterPad inputs[]
Definition: af_acontrast.c:193
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
A list of supported channel layouts.
Definition: formats.h:85
sample_rate
#define AV_LOG_INFO
Standard information.
Definition: log.h:187
#define OFFSET(x)
Definition: af_loudnorm.c:98
AVSampleFormat
Audio sample formats.
Definition: samplefmt.h:58
double prev_delta
Definition: af_loudnorm.c:73
int ff_ebur128_set_channel(FFEBUR128State *st, unsigned int channel_number, int value)
Set channel type.
Definition: ebur128.c:446
int av_frame_is_writable(AVFrame *frame)
Check if the frame data is writable.
Definition: frame.c:592
static void true_peak_limiter(LoudNormContext *s, double *out, int nb_samples, int channels)
Definition: af_loudnorm.c:230
enum FrameType frame_type
Definition: af_loudnorm.c:89
static av_cold int init(AVFilterContext *ctx)
Definition: af_loudnorm.c:784
double delta[30]
Definition: af_loudnorm.c:71
static av_cold void uninit(AVFilterContext *ctx)
Definition: af_loudnorm.c:805
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
Describe the class of an AVClass context structure.
Definition: log.h:67
Filter definition.
Definition: avfilter.h:144
const char * name
Filter name.
Definition: avfilter.h:148
static int frame_size(int sample_rate, int frame_len_msec)
Definition: af_loudnorm.c:127
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:350
enum MovChannelLayoutTag * layouts
Definition: mov_chan.c:434
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:232
double gain_reduction[2]
Definition: af_loudnorm.c:76
static int query_formats(AVFilterContext *ctx)
Definition: af_loudnorm.c:686
static double c[64]
static const uint64_t c2
Definition: murmur3.c:50
int ff_ebur128_relative_threshold(FFEBUR128State *st, double *out)
Get relative threshold in LUFS.
Definition: ebur128.c:587
static void init_gaussian_filter(LoudNormContext *s)
Definition: af_loudnorm.c:133
A list of supported formats for one end of a filter link.
Definition: formats.h:64
An instance of a filter.
Definition: avfilter.h:338
static enum AVSampleFormat sample_fmts[]
Definition: adpcmenc.c:701
FILE * out
Definition: movenc.c:54
static const AVFilterPad avfilter_af_loudnorm_outputs[]
Definition: af_loudnorm.c:914
#define av_freep(p)
#define M_PI
Definition: mathematics.h:52
LimiterState
Definition: af_loudnorm.c:37
#define av_malloc_array(a, b)
int ff_request_frame(AVFilterLink *link)
Request an input frame from the filter at the other end of the link.
Definition: avfilter.c:407
formats
Definition: signature.h:48
FFEBUR128State * r128_out
Definition: af_loudnorm.c:95
internal API functions
AVFilterChannelLayouts * ff_all_channel_counts(void)
Construct an AVFilterChannelLayouts coding for any channel layout, with known or unknown disposition...
Definition: formats.c:410
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
Definition: af_loudnorm.c:409
#define FLAGS
Definition: af_loudnorm.c:99
PrintFormat
Definition: af_loudnorm.c:45
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:284
for(j=16;j >0;--j)
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:652
#define AV_NOPTS_VALUE
Undefined timestamp value.
Definition: avutil.h:248
libebur128 - a library for loudness measurement according to the EBU R128 standard.
static uint8_t tmp[11]
Definition: aes_ctr.c:26