FFmpeg  4.0
psymodel.c
Go to the documentation of this file.
1 /*
2  * audio encoder psychoacoustic model
3  * Copyright (C) 2008 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include <string.h>
23 
24 #include "avcodec.h"
25 #include "psymodel.h"
26 #include "iirfilter.h"
27 #include "libavutil/mem.h"
28 
29 extern const FFPsyModel ff_aac_psy_model;
30 
31 av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens,
32  const uint8_t **bands, const int* num_bands,
33  int num_groups, const uint8_t *group_map)
34 {
35  int i, j, k = 0;
36 
37  ctx->avctx = avctx;
38  ctx->ch = av_mallocz_array(sizeof(ctx->ch[0]), avctx->channels * 2);
39  ctx->group = av_mallocz_array(sizeof(ctx->group[0]), num_groups);
40  ctx->bands = av_malloc_array (sizeof(ctx->bands[0]), num_lens);
41  ctx->num_bands = av_malloc_array (sizeof(ctx->num_bands[0]), num_lens);
42  ctx->cutoff = avctx->cutoff;
43 
44  if (!ctx->ch || !ctx->group || !ctx->bands || !ctx->num_bands) {
45  ff_psy_end(ctx);
46  return AVERROR(ENOMEM);
47  }
48 
49  memcpy(ctx->bands, bands, sizeof(ctx->bands[0]) * num_lens);
50  memcpy(ctx->num_bands, num_bands, sizeof(ctx->num_bands[0]) * num_lens);
51 
52  /* assign channels to groups (with virtual channels for coupling) */
53  for (i = 0; i < num_groups; i++) {
54  /* NOTE: Add 1 to handle the AAC chan_config without modification.
55  * This has the side effect of allowing an array of 0s to map
56  * to one channel per group.
57  */
58  ctx->group[i].num_ch = group_map[i] + 1;
59  for (j = 0; j < ctx->group[i].num_ch * 2; j++)
60  ctx->group[i].ch[j] = &ctx->ch[k++];
61  }
62 
63  switch (ctx->avctx->codec_id) {
64  case AV_CODEC_ID_AAC:
65  ctx->model = &ff_aac_psy_model;
66  break;
67  }
68  if (ctx->model->init)
69  return ctx->model->init(ctx);
70  return 0;
71 }
72 
74 {
75  int i = 0, ch = 0;
76 
77  while (ch <= channel)
78  ch += ctx->group[i++].num_ch;
79 
80  return &ctx->group[i-1];
81 }
82 
84 {
85  if (ctx->model && ctx->model->end)
86  ctx->model->end(ctx);
87  av_freep(&ctx->bands);
88  av_freep(&ctx->num_bands);
89  av_freep(&ctx->group);
90  av_freep(&ctx->ch);
91 }
92 
93 typedef struct FFPsyPreprocessContext{
95  float stereo_att;
100 
101 #define FILT_ORDER 4
102 
104 {
106  int i;
107  float cutoff_coeff = 0;
108  ctx = av_mallocz(sizeof(FFPsyPreprocessContext));
109  if (!ctx)
110  return NULL;
111  ctx->avctx = avctx;
112 
113  /* AAC has its own LP method */
114  if (avctx->codec_id != AV_CODEC_ID_AAC) {
115  if (avctx->cutoff > 0)
116  cutoff_coeff = 2.0 * avctx->cutoff / avctx->sample_rate;
117 
118  if (cutoff_coeff && cutoff_coeff < 0.98)
121  cutoff_coeff, 0.0, 0.0);
122  if (ctx->fcoeffs) {
123  ctx->fstate = av_mallocz_array(sizeof(ctx->fstate[0]), avctx->channels);
124  if (!ctx->fstate) {
125  av_free(ctx->fcoeffs);
126  av_free(ctx);
127  return NULL;
128  }
129  for (i = 0; i < avctx->channels; i++)
131  }
132  }
133 
134  ff_iir_filter_init(&ctx->fiir);
135 
136  return ctx;
137 }
138 
139 void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, float **audio, int channels)
140 {
141  int ch;
142  int frame_size = ctx->avctx->frame_size;
143  FFIIRFilterContext *iir = &ctx->fiir;
144 
145  if (ctx->fstate) {
146  for (ch = 0; ch < channels; ch++)
147  iir->filter_flt(ctx->fcoeffs, ctx->fstate[ch], frame_size,
148  &audio[ch][frame_size], 1, &audio[ch][frame_size], 1);
149  }
150 }
151 
153 {
154  int i;
156  if (ctx->fstate)
157  for (i = 0; i < ctx->avctx->channels; i++)
159  av_freep(&ctx->fstate);
160  av_free(ctx);
161 }
#define NULL
Definition: coverity.c:32
void(* end)(FFPsyContext *apc)
Definition: psymodel.h:141
uint8_t ** bands
scalefactor band sizes for possible frame sizes
Definition: psymodel.h:98
FFPsyChannelGroup * group
channel group information
Definition: psymodel.h:94
Memory handling functions.
channels
Definition: aptx.c:30
av_cold void ff_psy_preprocess_end(struct FFPsyPreprocessContext *ctx)
Cleanup audio preprocessing module.
Definition: psymodel.c:152
psychoacoustic information for an arbitrary group of channels
Definition: psymodel.h:68
uint8_t pi<< 24) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_U8,(uint64_t)((*(const uint8_t *) pi - 0x80U))<< 56) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16,(*(const int16_t *) pi >>8)+0x80) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S16,(uint64_t)(*(const int16_t *) pi)<< 48) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32,(*(const int32_t *) pi >>24)+0x80) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S32,(uint64_t)(*(const int32_t *) pi)<< 32) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S64,(*(const int64_t *) pi >>56)+0x80) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S64, *(const int64_t *) pi *(1.0f/(INT64_C(1)<< 63))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S64, *(const int64_t *) pi *(1.0/(INT64_C(1)<< 63))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_FLT, llrintf(*(const float *) pi *(INT64_C(1)<< 63))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_DBL, llrint(*(const double *) pi *(INT64_C(1)<< 63))) #define FMT_PAIR_FUNC(out, in) static conv_func_type *const fmt_pair_to_conv_functions[AV_SAMPLE_FMT_NB *AV_SAMPLE_FMT_NB]={ FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_S64), };static void cpy1(uint8_t **dst, const uint8_t **src, int len){ memcpy(*dst, *src, len);} static void cpy2(uint8_t **dst, const uint8_t **src, int len){ memcpy(*dst, *src, 2 *len);} static void cpy4(uint8_t **dst, const uint8_t **src, int len){ memcpy(*dst, *src, 4 *len);} static void cpy8(uint8_t **dst, const uint8_t **src, int len){ memcpy(*dst, *src, 8 *len);} AudioConvert *swri_audio_convert_alloc(enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, const int *ch_map, int flags) { AudioConvert *ctx;conv_func_type *f=fmt_pair_to_conv_functions[av_get_packed_sample_fmt(out_fmt)+AV_SAMPLE_FMT_NB *av_get_packed_sample_fmt(in_fmt)];if(!f) return NULL;ctx=av_mallocz(sizeof(*ctx));if(!ctx) return NULL;if(channels==1){ in_fmt=av_get_planar_sample_fmt(in_fmt);out_fmt=av_get_planar_sample_fmt(out_fmt);} ctx->channels=channels;ctx->conv_f=f;ctx->ch_map=ch_map;if(in_fmt==AV_SAMPLE_FMT_U8||in_fmt==AV_SAMPLE_FMT_U8P) memset(ctx->silence, 0x80, sizeof(ctx->silence));if(out_fmt==in_fmt &&!ch_map) { switch(av_get_bytes_per_sample(in_fmt)){ case 1:ctx->simd_f=cpy1;break;case 2:ctx->simd_f=cpy2;break;case 4:ctx->simd_f=cpy4;break;case 8:ctx->simd_f=cpy8;break;} } if(HAVE_X86ASM &&HAVE_MMX) swri_audio_convert_init_x86(ctx, out_fmt, in_fmt, channels);if(ARCH_ARM) swri_audio_convert_init_arm(ctx, out_fmt, in_fmt, channels);if(ARCH_AARCH64) swri_audio_convert_init_aarch64(ctx, out_fmt, in_fmt, channels);return ctx;} void swri_audio_convert_free(AudioConvert **ctx) { av_freep(ctx);} int swri_audio_convert(AudioConvert *ctx, AudioData *out, AudioData *in, int len) { int ch;int off=0;const int os=(out->planar ? 1 :out->ch_count) *out->bps;unsigned misaligned=0;av_assert0(ctx->channels==out->ch_count);if(ctx->in_simd_align_mask) { int planes=in->planar ? in->ch_count :1;unsigned m=0;for(ch=0;ch< planes;ch++) m|=(intptr_t) in->ch[ch];misaligned|=m &ctx->in_simd_align_mask;} if(ctx->out_simd_align_mask) { int planes=out->planar ? out->ch_count :1;unsigned m=0;for(ch=0;ch< planes;ch++) m|=(intptr_t) out->ch[ch];misaligned|=m &ctx->out_simd_align_mask;} if(ctx->simd_f &&!ctx->ch_map &&!misaligned){ off=len &~15;av_assert1(off >=0);av_assert1(off<=len);av_assert2(ctx->channels==SWR_CH_MAX||!in->ch[ctx->channels]);if(off >0){ if(out->planar==in->planar){ int planes=out->planar ? out->ch_count :1;for(ch=0;ch< planes;ch++){ ctx->simd_f(out-> ch ch
Definition: audioconvert.c:56
av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens, const uint8_t **bands, const int *num_bands, int num_groups, const uint8_t *group_map)
Initialize psychoacoustic model.
Definition: psymodel.c:31
int * num_bands
number of scalefactor bands for possible frame sizes
Definition: psymodel.h:99
av_cold struct FFIIRFilterState * ff_iir_filter_init_state(int order)
Create new filter state.
Definition: iirfilter.c:204
av_cold struct FFIIRFilterCoeffs * ff_iir_filter_init_coeffs(void *avc, enum IIRFilterType filt_type, enum IIRFilterMode filt_mode, int order, float cutoff_ratio, float stopband, float ripple)
Initialize filter coefficients.
Definition: iirfilter.c:162
uint8_t
#define av_cold
Definition: attributes.h:82
void(* filter_flt)(const struct FFIIRFilterCoeffs *coeffs, struct FFIIRFilterState *state, int size, const float *src, ptrdiff_t sstep, float *dst, ptrdiff_t dstep)
Perform IIR filtering on floating-point input samples.
Definition: iirfilter.h:63
struct FFIIRFilterCoeffs * fcoeffs
Definition: psymodel.c:96
context used by psychoacoustic model
Definition: psymodel.h:89
const FFPsyModel ff_aac_psy_model
Definition: aacpsy.c:1018
AVCodecContext * avctx
Definition: psymodel.c:94
FFPsyChannel * ch[PSY_MAX_CHANS]
pointers to the individual channels in the group
Definition: psymodel.h:69
#define AVERROR(e)
Definition: error.h:43
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:236
av_cold void ff_iir_filter_free_statep(struct FFIIRFilterState **state)
Free and zero filter state.
Definition: iirfilter.c:307
codec-specific psychoacoustic model implementation
Definition: psymodel.h:114
IIR filter state.
Definition: iirfilter.c:47
int(* init)(FFPsyContext *apc)
Definition: psymodel.h:116
uint8_t num_ch
number of channels in this group
Definition: psymodel.h:70
AVFormatContext * ctx
Definition: movenc.c:48
int frame_size
Number of samples per channel in an audio frame.
Definition: avcodec.h:2193
int frame_size
Definition: mxfenc.c:1947
Libavcodec external API header.
enum AVCodecID codec_id
Definition: avcodec.h:1528
int sample_rate
samples per second
Definition: avcodec.h:2173
FFPsyChannelGroup * ff_psy_find_group(FFPsyContext *ctx, int channel)
Determine what group a channel belongs to.
Definition: psymodel.c:73
main external API structure.
Definition: avcodec.h:1518
static const float bands[]
int cutoff
lowpass frequency cutoff for analysis
Definition: psymodel.h:96
const struct FFPsyModel * model
encoder-specific model functions
Definition: psymodel.h:91
IIR filter global parameters.
Definition: iirfilter.c:37
void ff_iir_filter_init(FFIIRFilterContext *f)
Initialize FFIIRFilterContext.
Definition: iirfilter.c:322
struct FFIIRFilterState ** fstate
Definition: psymodel.c:97
av_cold struct FFPsyPreprocessContext * ff_psy_preprocess_init(AVCodecContext *avctx)
psychoacoustic model audio preprocessing initialization
Definition: psymodel.c:103
channel
Use these values when setting the channel map with ebur128_set_channel().
Definition: ebur128.h:39
void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, float **audio, int channels)
Preprocess several channel in audio frame in order to compress it better.
Definition: psymodel.c:139
#define FILT_ORDER
Definition: psymodel.c:101
struct FFIIRFilterContext fiir
Definition: psymodel.c:98
int cutoff
Audio cutoff bandwidth (0 means "automatic")
Definition: avcodec.h:2217
#define av_free(p)
IIR filter interface.
int channels
number of audio channels
Definition: avcodec.h:2174
FFPsyChannel * ch
single channel information
Definition: psymodel.h:93
#define av_freep(p)
av_cold void ff_iir_filter_free_coeffsp(struct FFIIRFilterCoeffs **coeffsp)
Free filter coefficients.
Definition: iirfilter.c:312
#define av_malloc_array(a, b)
AVCodecContext * avctx
encoder context
Definition: psymodel.h:90
av_cold void ff_psy_end(FFPsyContext *ctx)
Cleanup model context at the end.
Definition: psymodel.c:83
void * av_mallocz_array(size_t nmemb, size_t size)
Allocate a memory block for an array with av_mallocz().
Definition: mem.c:191