FFmpeg  4.0
samidec.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012 Clément Bœsch
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * SAMI subtitle decoder
24  * @see http://msdn.microsoft.com/en-us/library/ms971327.aspx
25  */
26 
27 #include "ass.h"
28 #include "libavutil/avstring.h"
29 #include "libavutil/bprint.h"
30 #include "htmlsubtitles.h"
31 
32 typedef struct {
33  AVBPrint source;
34  AVBPrint content;
35  AVBPrint encoded_source;
36  AVBPrint encoded_content;
37  AVBPrint full;
38  int readorder;
39 } SAMIContext;
40 
41 static int sami_paragraph_to_ass(AVCodecContext *avctx, const char *src)
42 {
43  SAMIContext *sami = avctx->priv_data;
44  int ret = 0;
45  char *tag = NULL;
46  char *dupsrc = av_strdup(src);
47  char *p = dupsrc;
48  AVBPrint *dst_content = &sami->encoded_content;
49  AVBPrint *dst_source = &sami->encoded_source;
50 
51  if (!dupsrc)
52  return AVERROR(ENOMEM);
53 
55  av_bprint_clear(&sami->content);
57  for (;;) {
58  char *saveptr = NULL;
59  int prev_chr_is_space = 0;
60  AVBPrint *dst = &sami->content;
61 
62  /* parse & extract paragraph tag */
63  p = av_stristr(p, "<P");
64  if (!p)
65  break;
66  if (p[2] != '>' && !av_isspace(p[2])) { // avoid confusion with tags such as <PRE>
67  p++;
68  continue;
69  }
70  if (dst->len) // add a separator with the previous paragraph if there was one
71  av_bprintf(dst, "\\N");
72  tag = av_strtok(p, ">", &saveptr);
73  if (!tag || !saveptr)
74  break;
75  p = saveptr;
76 
77  /* check if the current paragraph is the "source" (speaker name) */
78  if (av_stristr(tag, "ID=Source") || av_stristr(tag, "ID=\"Source\"")) {
79  dst = &sami->source;
80  av_bprint_clear(dst);
81  }
82 
83  /* if empty event -> skip subtitle */
84  while (av_isspace(*p))
85  p++;
86  if (!strncmp(p, "&nbsp;", 6)) {
87  ret = -1;
88  goto end;
89  }
90 
91  /* extract the text, stripping most of the tags */
92  while (*p) {
93  if (*p == '<') {
94  if (!av_strncasecmp(p, "<P", 2) && (p[2] == '>' || av_isspace(p[2])))
95  break;
96  }
97  if (!av_strncasecmp(p, "<BR", 3)) {
98  av_bprintf(dst, "\\N");
99  p++;
100  while (*p && *p != '>')
101  p++;
102  if (!*p)
103  break;
104  if (*p == '>')
105  p++;
106  continue;
107  }
108  if (!av_isspace(*p))
109  av_bprint_chars(dst, *p, 1);
110  else if (!prev_chr_is_space)
111  av_bprint_chars(dst, ' ', 1);
112  prev_chr_is_space = av_isspace(*p);
113  p++;
114  }
115  }
116 
117  av_bprint_clear(&sami->full);
118  if (sami->source.len) {
119  ret = ff_htmlmarkup_to_ass(avctx, dst_source, sami->source.str);
120  if (ret < 0)
121  goto end;
122  av_bprintf(&sami->full, "{\\i1}%s{\\i0}\\N", sami->encoded_source.str);
123  }
124  ret = ff_htmlmarkup_to_ass(avctx, dst_content, sami->content.str);
125  if (ret < 0)
126  goto end;
127  av_bprintf(&sami->full, "%s", sami->encoded_content.str);
128 
129 end:
130  av_free(dupsrc);
131  return ret;
132 }
133 
135  void *data, int *got_sub_ptr, AVPacket *avpkt)
136 {
137  AVSubtitle *sub = data;
138  const char *ptr = avpkt->data;
139  SAMIContext *sami = avctx->priv_data;
140 
141  if (ptr && avpkt->size > 0) {
142  int ret = sami_paragraph_to_ass(avctx, ptr);
143  if (ret < 0)
144  return ret;
145  // TODO: pass escaped sami->encoded_source.str as source
146  ret = ff_ass_add_rect(sub, sami->full.str, sami->readorder++, 0, NULL, NULL);
147  if (ret < 0)
148  return ret;
149  }
150  *got_sub_ptr = sub->num_rects > 0;
151  return avpkt->size;
152 }
153 
154 static av_cold int sami_init(AVCodecContext *avctx)
155 {
156  SAMIContext *sami = avctx->priv_data;
157  av_bprint_init(&sami->source, 0, 2048);
158  av_bprint_init(&sami->content, 0, 2048);
159  av_bprint_init(&sami->encoded_source, 0, 2048);
160  av_bprint_init(&sami->encoded_content, 0, 2048);
161  av_bprint_init(&sami->full, 0, 2048);
162  return ff_ass_subtitle_header_default(avctx);
163 }
164 
166 {
167  SAMIContext *sami = avctx->priv_data;
168  av_bprint_finalize(&sami->source, NULL);
172  av_bprint_finalize(&sami->full, NULL);
173  return 0;
174 }
175 
176 static void sami_flush(AVCodecContext *avctx)
177 {
178  SAMIContext *sami = avctx->priv_data;
179  if (!(avctx->flags2 & AV_CODEC_FLAG2_RO_FLUSH_NOOP))
180  sami->readorder = 0;
181 }
182 
184  .name = "sami",
185  .long_name = NULL_IF_CONFIG_SMALL("SAMI subtitle"),
186  .type = AVMEDIA_TYPE_SUBTITLE,
187  .id = AV_CODEC_ID_SAMI,
188  .priv_data_size = sizeof(SAMIContext),
189  .init = sami_init,
190  .close = sami_close,
192  .flush = sami_flush,
193 };
#define NULL
Definition: coverity.c:32
void av_bprintf(AVBPrint *buf, const char *fmt,...)
Definition: bprint.c:94
static int sami_decode_frame(AVCodecContext *avctx, void *data, int *got_sub_ptr, AVPacket *avpkt)
Definition: samidec.c:134
static void flush(AVCodecContext *avctx)
char * av_stristr(const char *s1, const char *s2)
Locate the first case-independent occurrence in the string haystack of the string needle...
Definition: avstring.c:56
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:35
int size
Definition: avcodec.h:1431
static av_const int av_isspace(int c)
Locale-independent conversion of ASCII isspace.
Definition: avstring.h:222
int av_strncasecmp(const char *a, const char *b, size_t n)
Locale-independent case-insensitive compare.
Definition: avstring.c:223
unsigned num_rects
Definition: avcodec.h:3864
int ff_ass_add_rect(AVSubtitle *sub, const char *dialog, int readorder, int layer, const char *style, const char *speaker)
Add an ASS dialog to a subtitle.
Definition: ass.c:101
AVBPrint full
Definition: samidec.c:37
#define src
Definition: vp8dsp.c:254
AVCodec.
Definition: avcodec.h:3408
static void decode(AVCodecContext *dec_ctx, AVPacket *pkt, AVFrame *frame, FILE *outfile)
Definition: decode_audio.c:42
int av_bprint_finalize(AVBPrint *buf, char **ret_str)
Finalize a print buffer.
Definition: bprint.c:235
#define av_cold
Definition: attributes.h:82
static av_cold int sami_init(AVCodecContext *avctx)
Definition: samidec.c:154
static av_cold int end(AVCodecContext *avctx)
Definition: avrndec.c:90
AVBPrint encoded_content
Definition: samidec.c:36
int ff_ass_subtitle_header_default(AVCodecContext *avctx)
Generate a suitable AVCodecContext.subtitle_header for SUBTITLE_ASS with default style.
Definition: ass.c:80
const char data[16]
Definition: mxf.c:90
static av_cold int sami_close(AVCodecContext *avctx)
Definition: samidec.c:165
uint8_t * data
Definition: avcodec.h:1430
static void sami_flush(AVCodecContext *avctx)
Definition: samidec.c:176
uint32_t tag
Definition: movenc.c:1455
#define AV_CODEC_FLAG2_RO_FLUSH_NOOP
Do not reset ASS ReadOrder field on flush (subtitles decoding)
Definition: avcodec.h:941
#define AVERROR(e)
Definition: error.h:43
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:186
void av_bprint_init(AVBPrint *buf, unsigned size_init, unsigned size_max)
Definition: bprint.c:69
const char * name
Name of the codec implementation.
Definition: avcodec.h:3415
int ff_htmlmarkup_to_ass(void *log_ctx, AVBPrint *dst, const char *in)
Definition: htmlsubtitles.c:82
char * av_strdup(const char *s)
Duplicate a string.
Definition: mem.c:251
main external API structure.
Definition: avcodec.h:1518
int readorder
Definition: samidec.c:38
AVCodec ff_sami_decoder
Definition: samidec.c:183
AVBPrint source
Definition: samidec.c:33
void av_bprint_clear(AVBPrint *buf)
Reset the string to "" but keep internal allocated data.
Definition: bprint.c:227
char * av_strtok(char *s, const char *delim, char **saveptr)
Split the string into several tokens which can be accessed by successive calls to av_strtok()...
Definition: avstring.c:184
void * priv_data
Definition: avcodec.h:1545
#define av_free(p)
int flags2
AV_CODEC_FLAG2_*.
Definition: avcodec.h:1605
AVBPrint encoded_source
Definition: samidec.c:35
static int sami_paragraph_to_ass(AVCodecContext *avctx, const char *src)
Definition: samidec.c:41
AVBPrint content
Definition: samidec.c:34
This structure stores compressed data.
Definition: avcodec.h:1407
void av_bprint_chars(AVBPrint *buf, char c, unsigned n)
Append char c n times to a print buffer.
Definition: bprint.c:140