FFmpeg  4.0
g722enc.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) CMU 1993 Computer Science, Speech Group
3  * Chengxiang Lu and Alex Hauptmann
4  * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
5  * Copyright (c) 2009 Kenan Gillet
6  * Copyright (c) 2010 Martin Storsjo
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 /**
26  * @file
27  * G.722 ADPCM audio encoder
28  */
29 
30 #include "libavutil/avassert.h"
31 #include "avcodec.h"
32 #include "internal.h"
33 #include "g722.h"
34 #include "libavutil/common.h"
35 
36 #define FREEZE_INTERVAL 128
37 
38 /* This is an arbitrary value. Allowing insanely large values leads to strange
39  problems, so we limit it to a reasonable value */
40 #define MAX_FRAME_SIZE 32768
41 
42 /* We clip the value of avctx->trellis to prevent data type overflows and
43  undefined behavior. Using larger values is insanely slow anyway. */
44 #define MIN_TRELLIS 0
45 #define MAX_TRELLIS 16
46 
48 {
49  G722Context *c = avctx->priv_data;
50  int i;
51  for (i = 0; i < 2; i++) {
52  av_freep(&c->paths[i]);
53  av_freep(&c->node_buf[i]);
54  av_freep(&c->nodep_buf[i]);
55  }
56  return 0;
57 }
58 
60 {
61  G722Context *c = avctx->priv_data;
62  int ret;
63 
64  c->band[0].scale_factor = 8;
65  c->band[1].scale_factor = 2;
66  c->prev_samples_pos = 22;
67 
68  if (avctx->trellis) {
69  int frontier = 1 << avctx->trellis;
70  int max_paths = frontier * FREEZE_INTERVAL;
71  int i;
72  for (i = 0; i < 2; i++) {
73  c->paths[i] = av_mallocz_array(max_paths, sizeof(**c->paths));
74  c->node_buf[i] = av_mallocz_array(frontier, 2 * sizeof(**c->node_buf));
75  c->nodep_buf[i] = av_mallocz_array(frontier, 2 * sizeof(**c->nodep_buf));
76  if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i]) {
77  ret = AVERROR(ENOMEM);
78  goto error;
79  }
80  }
81  }
82 
83  if (avctx->frame_size) {
84  /* validate frame size */
85  if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
86  int new_frame_size;
87 
88  if (avctx->frame_size == 1)
89  new_frame_size = 2;
90  else if (avctx->frame_size > MAX_FRAME_SIZE)
91  new_frame_size = MAX_FRAME_SIZE;
92  else
93  new_frame_size = avctx->frame_size - 1;
94 
95  av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
96  "allowed. Using %d instead of %d\n", new_frame_size,
97  avctx->frame_size);
98  avctx->frame_size = new_frame_size;
99  }
100  } else {
101  /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
102  a common packet size for VoIP applications */
103  avctx->frame_size = 320;
104  }
105  avctx->initial_padding = 22;
106 
107  if (avctx->trellis) {
108  /* validate trellis */
109  if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
110  int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
111  av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
112  "allowed. Using %d instead of %d\n", new_trellis,
113  avctx->trellis);
114  avctx->trellis = new_trellis;
115  }
116  }
117 
118  ff_g722dsp_init(&c->dsp);
119 
120  return 0;
121 error:
122  g722_encode_close(avctx);
123  return ret;
124 }
125 
126 static const int16_t low_quant[33] = {
127  35, 72, 110, 150, 190, 233, 276, 323,
128  370, 422, 473, 530, 587, 650, 714, 786,
129  858, 940, 1023, 1121, 1219, 1339, 1458, 1612,
130  1765, 1980, 2195, 2557, 2919
131 };
132 
133 static inline void filter_samples(G722Context *c, const int16_t *samples,
134  int *xlow, int *xhigh)
135 {
136  int xout[2];
137  c->prev_samples[c->prev_samples_pos++] = samples[0];
138  c->prev_samples[c->prev_samples_pos++] = samples[1];
139  c->dsp.apply_qmf(c->prev_samples + c->prev_samples_pos - 24, xout);
140  *xlow = xout[0] + xout[1] >> 14;
141  *xhigh = xout[0] - xout[1] >> 14;
143  memmove(c->prev_samples,
144  c->prev_samples + c->prev_samples_pos - 22,
145  22 * sizeof(c->prev_samples[0]));
146  c->prev_samples_pos = 22;
147  }
148 }
149 
150 static inline int encode_high(const struct G722Band *state, int xhigh)
151 {
152  int diff = av_clip_int16(xhigh - state->s_predictor);
153  int pred = 141 * state->scale_factor >> 8;
154  /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
155  return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
156 }
157 
158 static inline int encode_low(const struct G722Band* state, int xlow)
159 {
160  int diff = av_clip_int16(xlow - state->s_predictor);
161  /* = diff >= 0 ? diff : -(diff + 1) */
162  int limit = diff ^ (diff >> (sizeof(diff)*8-1));
163  int i = 0;
164  limit = limit + 1 << 10;
165  if (limit > low_quant[8] * state->scale_factor)
166  i = 9;
167  while (i < 29 && limit > low_quant[i] * state->scale_factor)
168  i++;
169  return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
170 }
171 
172 static void g722_encode_trellis(G722Context *c, int trellis,
173  uint8_t *dst, int nb_samples,
174  const int16_t *samples)
175 {
176  int i, j, k;
177  int frontier = 1 << trellis;
178  struct TrellisNode **nodes[2];
179  struct TrellisNode **nodes_next[2];
180  int pathn[2] = {0, 0}, froze = -1;
181  struct TrellisPath *p[2];
182 
183  for (i = 0; i < 2; i++) {
184  nodes[i] = c->nodep_buf[i];
185  nodes_next[i] = c->nodep_buf[i] + frontier;
186  memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf[i]));
187  nodes[i][0] = c->node_buf[i] + frontier;
188  nodes[i][0]->ssd = 0;
189  nodes[i][0]->path = 0;
190  nodes[i][0]->state = c->band[i];
191  }
192 
193  for (i = 0; i < nb_samples >> 1; i++) {
194  int xlow, xhigh;
195  struct TrellisNode *next[2];
196  int heap_pos[2] = {0, 0};
197 
198  for (j = 0; j < 2; j++) {
199  next[j] = c->node_buf[j] + frontier*(i & 1);
200  memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
201  }
202 
203  filter_samples(c, &samples[2*i], &xlow, &xhigh);
204 
205  for (j = 0; j < frontier && nodes[0][j]; j++) {
206  /* Only k >> 2 affects the future adaptive state, therefore testing
207  * small steps that don't change k >> 2 is useless, the original
208  * value from encode_low is better than them. Since we step k
209  * in steps of 4, make sure range is a multiple of 4, so that
210  * we don't miss the original value from encode_low. */
211  int range = j < frontier/2 ? 4 : 0;
212  struct TrellisNode *cur_node = nodes[0][j];
213 
214  int ilow = encode_low(&cur_node->state, xlow);
215 
216  for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
217  int decoded, dec_diff, pos;
218  uint32_t ssd;
219  struct TrellisNode* node;
220 
221  if (k < 0)
222  continue;
223 
224  decoded = av_clip_intp2((cur_node->state.scale_factor *
225  ff_g722_low_inv_quant6[k] >> 10)
226  + cur_node->state.s_predictor, 14);
227  dec_diff = xlow - decoded;
228 
229 #define STORE_NODE(index, UPDATE, VALUE)\
230  ssd = cur_node->ssd + dec_diff*dec_diff;\
231  /* Check for wraparound. Using 64 bit ssd counters would \
232  * be simpler, but is slower on x86 32 bit. */\
233  if (ssd < cur_node->ssd)\
234  continue;\
235  if (heap_pos[index] < frontier) {\
236  pos = heap_pos[index]++;\
237  av_assert2(pathn[index] < FREEZE_INTERVAL * frontier);\
238  node = nodes_next[index][pos] = next[index]++;\
239  node->path = pathn[index]++;\
240  } else {\
241  /* Try to replace one of the leaf nodes with the new \
242  * one, but not always testing the same leaf position */\
243  pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
244  if (ssd >= nodes_next[index][pos]->ssd)\
245  continue;\
246  heap_pos[index]++;\
247  node = nodes_next[index][pos];\
248  }\
249  node->ssd = ssd;\
250  node->state = cur_node->state;\
251  UPDATE;\
252  c->paths[index][node->path].value = VALUE;\
253  c->paths[index][node->path].prev = cur_node->path;\
254  /* Sift the newly inserted node up in the heap to restore \
255  * the heap property */\
256  while (pos > 0) {\
257  int parent = (pos - 1) >> 1;\
258  if (nodes_next[index][parent]->ssd <= ssd)\
259  break;\
260  FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
261  nodes_next[index][pos]);\
262  pos = parent;\
263  }
264  STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
265  }
266  }
267 
268  for (j = 0; j < frontier && nodes[1][j]; j++) {
269  int ihigh;
270  struct TrellisNode *cur_node = nodes[1][j];
271 
272  /* We don't try to get any initial guess for ihigh via
273  * encode_high - since there's only 4 possible values, test
274  * them all. Testing all of these gives a much, much larger
275  * gain than testing a larger range around ilow. */
276  for (ihigh = 0; ihigh < 4; ihigh++) {
277  int dhigh, decoded, dec_diff, pos;
278  uint32_t ssd;
279  struct TrellisNode* node;
280 
281  dhigh = cur_node->state.scale_factor *
282  ff_g722_high_inv_quant[ihigh] >> 10;
283  decoded = av_clip_intp2(dhigh + cur_node->state.s_predictor, 14);
284  dec_diff = xhigh - decoded;
285 
286  STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
287  }
288  }
289 
290  for (j = 0; j < 2; j++) {
291  FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
292 
293  if (nodes[j][0]->ssd > (1 << 16)) {
294  for (k = 1; k < frontier && nodes[j][k]; k++)
295  nodes[j][k]->ssd -= nodes[j][0]->ssd;
296  nodes[j][0]->ssd = 0;
297  }
298  }
299 
300  if (i == froze + FREEZE_INTERVAL) {
301  p[0] = &c->paths[0][nodes[0][0]->path];
302  p[1] = &c->paths[1][nodes[1][0]->path];
303  for (j = i; j > froze; j--) {
304  dst[j] = p[1]->value << 6 | p[0]->value;
305  p[0] = &c->paths[0][p[0]->prev];
306  p[1] = &c->paths[1][p[1]->prev];
307  }
308  froze = i;
309  pathn[0] = pathn[1] = 0;
310  memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
311  memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
312  }
313  }
314 
315  p[0] = &c->paths[0][nodes[0][0]->path];
316  p[1] = &c->paths[1][nodes[1][0]->path];
317  for (j = i; j > froze; j--) {
318  dst[j] = p[1]->value << 6 | p[0]->value;
319  p[0] = &c->paths[0][p[0]->prev];
320  p[1] = &c->paths[1][p[1]->prev];
321  }
322  c->band[0] = nodes[0][0]->state;
323  c->band[1] = nodes[1][0]->state;
324 }
325 
327  const int16_t *samples)
328 {
329  int xlow, xhigh, ilow, ihigh;
330  filter_samples(c, samples, &xlow, &xhigh);
331  ihigh = encode_high(&c->band[1], xhigh);
332  ilow = encode_low (&c->band[0], xlow);
334  ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
335  ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
336  *dst = ihigh << 6 | ilow;
337 }
338 
340  uint8_t *dst, int nb_samples,
341  const int16_t *samples)
342 {
343  int i;
344  for (i = 0; i < nb_samples; i += 2)
345  encode_byte(c, dst++, &samples[i]);
346 }
347 
348 static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
349  const AVFrame *frame, int *got_packet_ptr)
350 {
351  G722Context *c = avctx->priv_data;
352  const int16_t *samples = (const int16_t *)frame->data[0];
353  int nb_samples, out_size, ret;
354 
355  out_size = (frame->nb_samples + 1) / 2;
356  if ((ret = ff_alloc_packet2(avctx, avpkt, out_size, 0)) < 0)
357  return ret;
358 
359  nb_samples = frame->nb_samples - (frame->nb_samples & 1);
360 
361  if (avctx->trellis)
362  g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples);
363  else
364  g722_encode_no_trellis(c, avpkt->data, nb_samples, samples);
365 
366  /* handle last frame with odd frame_size */
367  if (nb_samples < frame->nb_samples) {
368  int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
369  encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples);
370  }
371 
372  if (frame->pts != AV_NOPTS_VALUE)
373  avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
374  *got_packet_ptr = 1;
375  return 0;
376 }
377 
379  .name = "g722",
380  .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
381  .type = AVMEDIA_TYPE_AUDIO,
383  .priv_data_size = sizeof(G722Context),
385  .close = g722_encode_close,
386  .encode2 = g722_encode_frame,
387  .capabilities = AV_CODEC_CAP_SMALL_LAST_FRAME,
389  .channel_layouts = (const uint64_t[]){ AV_CH_LAYOUT_MONO, 0 },
390 };
struct G722Context::TrellisNode ** nodep_buf[2]
int path
Definition: adpcmenc.c:45
This structure describes decoded (raw) audio or video data.
Definition: frame.h:218
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:182
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:35
struct G722Context::TrellisPath * paths[2]
int out_size
Definition: movenc.c:55
static void filter_samples(G722Context *c, const int16_t *samples, int *xlow, int *xhigh)
Definition: g722enc.c:133
#define MIN_TRELLIS
Definition: g722enc.c:44
AVCodec.
Definition: avcodec.h:3408
static void g722_encode_no_trellis(G722Context *c, uint8_t *dst, int nb_samples, const int16_t *samples)
Definition: g722enc.c:339
void(* apply_qmf)(const int16_t *prev_samples, int xout[2])
Definition: g722dsp.h:27
int ff_alloc_packet2(AVCodecContext *avctx, AVPacket *avpkt, int64_t size, int64_t min_size)
Check AVPacket size and/or allocate data.
Definition: encode.c:32
static int encode_high(const struct G722Band *state, int xhigh)
Definition: g722enc.c:150
uint8_t
#define av_cold
Definition: attributes.h:82
#define PREV_SAMPLES_BUF_SIZE
Definition: g722.h:32
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:311
static av_cold int g722_encode_init(AVCodecContext *avctx)
Definition: g722enc.c:59
static AVFrame * frame
uint8_t * data
Definition: avcodec.h:1430
#define av_log(a,...)
uint32_t ssd
Definition: adpcmenc.c:44
struct G722Context::TrellisNode * node_buf[2]
#define AVERROR(e)
Definition: error.h:43
const int16_t ff_g722_low_inv_quant6[64]
Definition: g722.c:63
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:186
int initial_padding
Audio only.
Definition: avcodec.h:3031
int16_t prev_samples[PREV_SAMPLES_BUF_SIZE]
memory of past decoded samples
Definition: g722.h:37
simple assert() macros that are a bit more flexible than ISO C assert().
AVCodec ff_adpcm_g722_encoder
Definition: g722enc.c:378
const char * name
Name of the codec implementation.
Definition: avcodec.h:3415
#define FREEZE_INTERVAL
Definition: g722enc.c:36
struct G722Context::G722Band band[2]
#define AV_CODEC_CAP_SMALL_LAST_FRAME
Codec can be fed a final frame with a smaller size.
Definition: avcodec.h:989
static void g722_encode_trellis(G722Context *c, int trellis, uint8_t *dst, int nb_samples, const int16_t *samples)
Definition: g722enc.c:172
#define MAX_FRAME_SIZE
Definition: g722enc.c:40
static av_cold int g722_encode_close(AVCodecContext *avctx)
Definition: g722enc.c:47
static int encode_low(const struct G722Band *state, int xlow)
Definition: g722enc.c:158
static struct @271 state
void ff_g722_update_low_predictor(struct G722Band *band, const int ilow)
Definition: g722.c:143
static void error(const char *err)
static const float pred[4]
Definition: siprdata.h:259
G722DSPContext dsp
Definition: g722.h:66
int frame_size
Number of samples per channel in an audio frame.
Definition: avcodec.h:2193
static const int16_t low_quant[33]
Definition: g722enc.c:126
Libavcodec external API header.
AVSampleFormat
Audio sample formats.
Definition: samplefmt.h:58
main external API structure.
Definition: avcodec.h:1518
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:232
common internal api header.
common internal and external API header
signed 16 bits
Definition: samplefmt.h:61
static double c[64]
int prev_samples_pos
the number of values in prev_samples
Definition: g722.h:38
int trellis
trellis RD quantization
Definition: avcodec.h:2462
void * priv_data
Definition: avcodec.h:1545
#define STORE_NODE(index, UPDATE, VALUE)
static av_always_inline int diff(const uint32_t a, const uint32_t b)
const int16_t ff_g722_high_inv_quant[4]
Definition: g722.c:51
av_cold void ff_g722dsp_init(G722DSPContext *c)
Definition: g722dsp.c:68
static av_always_inline void encode_byte(G722Context *c, uint8_t *dst, const int16_t *samples)
Definition: g722enc.c:326
static enum AVSampleFormat sample_fmts[]
Definition: adpcmenc.c:701
#define av_freep(p)
static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet_ptr)
Definition: g722enc.c:348
#define av_always_inline
Definition: attributes.h:39
static av_always_inline int64_t ff_samples_to_time_base(AVCodecContext *avctx, int64_t samples)
Rescale from sample rate to AVCodecContext.time_base.
Definition: internal.h:280
#define MAX_TRELLIS
Definition: g722enc.c:45
#define FFSWAP(type, a, b)
Definition: common.h:99
void ff_g722_update_high_predictor(struct G722Band *band, const int dhigh, const int ihigh)
Definition: g722.c:154
#define AV_CH_LAYOUT_MONO
This structure stores compressed data.
Definition: avcodec.h:1407
int16_t scale_factor
delayed quantizer scale factor
Definition: g722.h:52
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:284
int64_t pts
Presentation timestamp in AVStream->time_base units; the time at which the decompressed packet will b...
Definition: avcodec.h:1423
#define AV_NOPTS_VALUE
Undefined timestamp value.
Definition: avutil.h:248
void * av_mallocz_array(size_t nmemb, size_t size)
Allocate a memory block for an array with av_mallocz().
Definition: mem.c:191