FFmpeg  4.0
af_rubberband.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include <rubberband/rubberband-c.h>
20 
22 #include "libavutil/common.h"
23 #include "libavutil/opt.h"
24 
25 #include "audio.h"
26 #include "avfilter.h"
27 #include "formats.h"
28 #include "internal.h"
29 
30 typedef struct RubberBandContext {
31  const AVClass *class;
32  RubberBandState rbs;
33 
34  double tempo, pitch;
37  int64_t nb_samples_out;
38  int64_t nb_samples_in;
39  int flushed;
41 
42 #define OFFSET(x) offsetof(RubberBandContext, x)
43 #define A AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
44 
45 static const AVOption rubberband_options[] = {
46  { "tempo", "set tempo scale factor", OFFSET(tempo), AV_OPT_TYPE_DOUBLE, {.dbl=1}, 0.01, 100, A },
47  { "pitch", "set pitch scale factor", OFFSET(pitch), AV_OPT_TYPE_DOUBLE, {.dbl=1}, 0.01, 100, A },
48  { "transients", "set transients", OFFSET(transients), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "transients" },
49  { "crisp", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionTransientsCrisp}, 0, 0, A, "transients" },
50  { "mixed", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionTransientsMixed}, 0, 0, A, "transients" },
51  { "smooth", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionTransientsSmooth}, 0, 0, A, "transients" },
52  { "detector", "set detector", OFFSET(detector), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "detector" },
53  { "compound", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionDetectorCompound}, 0, 0, A, "detector" },
54  { "percussive", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionDetectorPercussive}, 0, 0, A, "detector" },
55  { "soft", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionDetectorSoft}, 0, 0, A, "detector" },
56  { "phase", "set phase", OFFSET(phase), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "phase" },
57  { "laminar", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionPhaseLaminar}, 0, 0, A, "phase" },
58  { "independent", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionPhaseIndependent}, 0, 0, A, "phase" },
59  { "window", "set window", OFFSET(window), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "window" },
60  { "standard", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionWindowStandard}, 0, 0, A, "window" },
61  { "short", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionWindowShort}, 0, 0, A, "window" },
62  { "long", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionWindowLong}, 0, 0, A, "window" },
63  { "smoothing", "set smoothing", OFFSET(smoothing), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "smoothing" },
64  { "off", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionSmoothingOff}, 0, 0, A, "smoothing" },
65  { "on", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionSmoothingOn}, 0, 0, A, "smoothing" },
66  { "formant", "set formant", OFFSET(formant), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "formant" },
67  { "shifted", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionFormantShifted}, 0, 0, A, "formant" },
68  { "preserved", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionFormantPreserved}, 0, 0, A, "formant" },
69  { "pitchq", "set pitch quality", OFFSET(opitch), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "pitch" },
70  { "quality", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionPitchHighQuality}, 0, 0, A, "pitch" },
71  { "speed", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionPitchHighSpeed}, 0, 0, A, "pitch" },
72  { "consistency", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionPitchHighConsistency}, 0, 0, A, "pitch" },
73  { "channels", "set channels", OFFSET(channels), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "channels" },
74  { "apart", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionChannelsApart}, 0, 0, A, "channels" },
75  { "together", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionChannelsTogether}, 0, 0, A, "channels" },
76  { NULL },
77 };
78 
79 AVFILTER_DEFINE_CLASS(rubberband);
80 
82 {
83  RubberBandContext *s = ctx->priv;
84 
85  if (s->rbs)
86  rubberband_delete(s->rbs);
87 }
88 
90 {
93  static const enum AVSampleFormat sample_fmts[] = {
96  };
97  int ret;
98 
99  layouts = ff_all_channel_counts();
100  if (!layouts)
101  return AVERROR(ENOMEM);
102  ret = ff_set_common_channel_layouts(ctx, layouts);
103  if (ret < 0)
104  return ret;
105 
106  formats = ff_make_format_list(sample_fmts);
107  if (!formats)
108  return AVERROR(ENOMEM);
109  ret = ff_set_common_formats(ctx, formats);
110  if (ret < 0)
111  return ret;
112 
113  formats = ff_all_samplerates();
114  if (!formats)
115  return AVERROR(ENOMEM);
116  return ff_set_common_samplerates(ctx, formats);
117 }
118 
119 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
120 {
121  RubberBandContext *s = inlink->dst->priv;
122  AVFilterLink *outlink = inlink->dst->outputs[0];
123  AVFrame *out;
124  int ret = 0, nb_samples;
125 
126  rubberband_process(s->rbs, (const float *const *)in->data, in->nb_samples, 0);
127  s->nb_samples_in += in->nb_samples;
128 
129  nb_samples = rubberband_available(s->rbs);
130  if (nb_samples > 0) {
131  out = ff_get_audio_buffer(outlink, nb_samples);
132  if (!out) {
133  av_frame_free(&in);
134  return AVERROR(ENOMEM);
135  }
136  out->pts = av_rescale_q(s->nb_samples_out,
137  (AVRational){ 1, outlink->sample_rate },
138  outlink->time_base);
139  nb_samples = rubberband_retrieve(s->rbs, (float *const *)out->data, nb_samples);
140  out->nb_samples = nb_samples;
141  ret = ff_filter_frame(outlink, out);
142  s->nb_samples_out += nb_samples;
143  }
144 
145  av_frame_free(&in);
146  return ret;
147 }
148 
149 static int config_input(AVFilterLink *inlink)
150 {
151  AVFilterContext *ctx = inlink->dst;
152  RubberBandContext *s = ctx->priv;
153  int opts = s->transients|s->detector|s->phase|s->window|
154  s->smoothing|s->formant|s->opitch|s->channels|
155  RubberBandOptionProcessRealTime;
156 
157  if (s->rbs)
158  rubberband_delete(s->rbs);
159  s->rbs = rubberband_new(inlink->sample_rate, inlink->channels, opts, 1. / s->tempo, s->pitch);
160 
161  inlink->partial_buf_size =
162  inlink->min_samples =
163  inlink->max_samples = rubberband_get_samples_required(s->rbs);
164 
165  return 0;
166 }
167 
168 static int request_frame(AVFilterLink *outlink)
169 {
170  AVFilterContext *ctx = outlink->src;
171  RubberBandContext *s = ctx->priv;
172  AVFilterLink *inlink = ctx->inputs[0];
173  int ret = 0;
174 
175  ret = ff_request_frame(ctx->inputs[0]);
176 
177  if (ret == AVERROR_EOF && !s->flushed) {
178  if (rubberband_available(s->rbs) > 0) {
179  AVFrame *out = ff_get_audio_buffer(inlink, 1);
180  int nb_samples;
181 
182  if (!out)
183  return AVERROR(ENOMEM);
184 
185  rubberband_process(s->rbs, (const float *const *)out->data, 1, 1);
186  av_frame_free(&out);
187  nb_samples = rubberband_available(s->rbs);
188 
189  if (nb_samples > 0) {
190  out = ff_get_audio_buffer(outlink, nb_samples);
191  if (!out)
192  return AVERROR(ENOMEM);
193  out->pts = av_rescale_q(s->nb_samples_out,
194  (AVRational){ 1, outlink->sample_rate },
195  outlink->time_base);
196  nb_samples = rubberband_retrieve(s->rbs, (float *const *)out->data, nb_samples);
197  out->nb_samples = nb_samples;
198  ret = ff_filter_frame(outlink, out);
199  s->nb_samples_out += nb_samples;
200  }
201  }
202  s->flushed = 1;
203  av_log(ctx, AV_LOG_DEBUG, "nb_samples_in %"PRId64" nb_samples_out %"PRId64"\n",
205  }
206 
207  return ret;
208 }
209 
210 static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
211  char *res, int res_len, int flags)
212 {
213  RubberBandContext *s = ctx->priv;
214 
215  if (!strcmp(cmd, "tempo")) {
216  double arg;
217 
218  sscanf(args, "%lf", &arg);
219  if (arg < 0.01 || arg > 100) {
220  av_log(ctx, AV_LOG_ERROR,
221  "Tempo scale factor '%f' out of range\n", arg);
222  return AVERROR(EINVAL);
223  }
224  rubberband_set_time_ratio(s->rbs, 1. / arg);
225  }
226 
227  if (!strcmp(cmd, "pitch")) {
228  double arg;
229 
230  sscanf(args, "%lf", &arg);
231  if (arg < 0.01 || arg > 100) {
232  av_log(ctx, AV_LOG_ERROR,
233  "Pitch scale factor '%f' out of range\n", arg);
234  return AVERROR(EINVAL);
235  }
236  rubberband_set_pitch_scale(s->rbs, arg);
237  }
238 
239  return 0;
240 }
241 
242 static const AVFilterPad rubberband_inputs[] = {
243  {
244  .name = "default",
245  .type = AVMEDIA_TYPE_AUDIO,
246  .config_props = config_input,
247  .filter_frame = filter_frame,
248  },
249  { NULL }
250 };
251 
252 static const AVFilterPad rubberband_outputs[] = {
253  {
254  .name = "default",
255  .type = AVMEDIA_TYPE_AUDIO,
256  .request_frame = request_frame,
257  },
258  { NULL }
259 };
260 
262  .name = "rubberband",
263  .description = NULL_IF_CONFIG_SMALL("Apply time-stretching and pitch-shifting."),
264  .query_formats = query_formats,
265  .priv_size = sizeof(RubberBandContext),
266  .priv_class = &rubberband_class,
267  .uninit = uninit,
268  .inputs = rubberband_inputs,
269  .outputs = rubberband_outputs,
271 };
static const AVFilterPad rubberband_outputs[]
float, planar
Definition: samplefmt.h:69
int64_t nb_samples_out
Definition: af_rubberband.c:37
#define NULL
Definition: coverity.c:32
int ff_set_common_channel_layouts(AVFilterContext *ctx, AVFilterChannelLayouts *layouts)
A helper for query_formats() which sets all links to the same list of channel layouts/sample rates...
Definition: formats.c:549
const char * s
Definition: avisynth_c.h:768
This structure describes decoded (raw) audio or video data.
Definition: frame.h:218
AVOption.
Definition: opt.h:246
static int config_input(AVFilterLink *inlink)
#define A
Definition: af_rubberband.c:43
Main libavfilter public API header.
channels
Definition: aptx.c:30
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:283
const char * name
Pad name.
Definition: internal.h:60
AVFilterLink ** inputs
array of pointers to input links
Definition: avfilter.h:346
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1080
#define av_cold
Definition: attributes.h:82
AVOptions.
static int process_command(AVFilterContext *ctx, const char *cmd, const char *args, char *res, int res_len, int flags)
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:311
static int flags
Definition: log.c:55
#define AVERROR_EOF
End of file.
Definition: error.h:55
#define OFFSET(x)
Definition: af_rubberband.c:42
#define av_log(a,...)
A filter pad used for either input or output.
Definition: internal.h:54
int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq)
Rescale a 64-bit integer by 2 rational numbers.
Definition: mathematics.c:142
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:568
AVFrame * ff_get_audio_buffer(AVFilterLink *link, int nb_samples)
Request an audio samples buffer with a specific set of permissions.
Definition: audio.c:86
#define AVERROR(e)
Definition: error.h:43
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:202
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:186
void * priv
private data for use by the filter
Definition: avfilter.h:353
#define AV_LOG_DEBUG
Stuff which is only useful for libav* developers.
Definition: log.h:197
const char * arg
Definition: jacosubdec.c:66
static const AVOption rubberband_options[]
Definition: af_rubberband.c:45
static const AVFilterPad rubberband_inputs[]
AVDictionary * opts
Definition: movenc.c:50
audio channel layout utility functions
const char AVS_Value args
Definition: avisynth_c.h:780
AVFormatContext * ctx
Definition: movenc.c:48
static int query_formats(AVFilterContext *ctx)
Definition: af_rubberband.c:89
RubberBandState rbs
Definition: af_rubberband.c:32
static const AVFilterPad inputs[]
Definition: af_acontrast.c:193
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
A list of supported channel layouts.
Definition: formats.h:85
AVSampleFormat
Audio sample formats.
Definition: samplefmt.h:58
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
Describe the class of an AVClass context structure.
Definition: log.h:67
Filter definition.
Definition: avfilter.h:144
Rational number (pair of numerator and denominator).
Definition: rational.h:58
static av_cold void uninit(AVFilterContext *ctx)
Definition: af_rubberband.c:81
const char * name
Filter name.
Definition: avfilter.h:148
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:350
enum MovChannelLayoutTag * layouts
Definition: mov_chan.c:434
AVFilterFormats * ff_all_samplerates(void)
Definition: formats.c:395
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:232
AVFilter ff_af_rubberband
common internal and external API header
AVFILTER_DEFINE_CLASS(rubberband)
A list of supported formats for one end of a filter link.
Definition: formats.h:64
An instance of a filter.
Definition: avfilter.h:338
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
static enum AVSampleFormat sample_fmts[]
Definition: adpcmenc.c:701
static int request_frame(AVFilterLink *outlink)
FILE * out
Definition: movenc.c:54
int ff_request_frame(AVFilterLink *link)
Request an input frame from the filter at the other end of the link.
Definition: avfilter.c:407
formats
Definition: signature.h:48
internal API functions
AVFilterChannelLayouts * ff_all_channel_counts(void)
Construct an AVFilterChannelLayouts coding for any channel layout, with known or unknown disposition...
Definition: formats.c:410
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:284
int ff_set_common_samplerates(AVFilterContext *ctx, AVFilterFormats *samplerates)
Definition: formats.c:556