2 This file is part of the HandBrake source code.
3 Homepage: <http://handbrake.fr/>.
4 It may be used under the terms of the GNU General Public License. */
7 * Converts TX3G subtitles to UTF-8 subtitles with limited HTML-style markup (<b>, <i>, <u>).
9 * TX3G == MPEG 4, Part 17 (ISO/IEC 14496-17) == 3GPP Timed Text (26.245)
10 * A full reference to the format can be found here:
11 * http://www.3gpp.org/ftp/Specs/html-info/26245.htm
13 * @author David Foster (davidfstr)
26 #define NUM_FACE_STYLE_FLAGS 3
27 #define MAX_OPEN_TAG_SIZE 3 // "<b>"
28 #define MAX_CLOSE_TAG_SIZE 4 // "</b>"
31 uint16_t startChar; // NOTE: indices in terms of *character* (not: byte) positions
34 uint8_t faceStyleFlags; // FaceStyleFlag
36 uint32_t textColorRGBA;
39 // NOTE: None of these macros check for buffer overflow
40 #define READ_U8() *pos; pos += 1;
41 #define READ_U16() (pos[0] << 8) | pos[1]; pos += 2;
42 #define READ_U32() (pos[0] << 24) | (pos[1] << 16) | (pos[2] << 8) | pos[3]; pos += 4;
43 #define READ_ARRAY(n) pos; pos += n;
44 #define SKIP_ARRAY(n) pos += n;
46 #define WRITE_CHAR(c) {dst[0]=c; dst += 1;}
47 #define WRITE_START_TAG(c) {dst[0]='<'; dst[1]=c; dst[2]='>'; dst += 3;}
48 #define WRITE_END_TAG(c) {dst[0]='<'; dst[1]='/'; dst[2]=c; dst[3]='>'; dst += 4;}
50 #define FOURCC(str) ((((uint32_t) str[0]) << 24) | \
51 (((uint32_t) str[1]) << 16) | \
52 (((uint32_t) str[2]) << 8) | \
53 (((uint32_t) str[3]) << 0))
54 #define IS_10xxxxxx(c) ((c & 0xC0) == 0x80)
56 static hb_buffer_t *tx3g_decode_to_utf8( hb_buffer_t *in )
58 uint8_t *pos = in->data;
59 uint8_t *end = in->data + in->size;
61 uint16_t numStyleRecords = 0;
67 * Parse the packet as a TX3G TextSample.
69 * Look for a single StyleBox ('styl') and read all contained StyleRecords.
70 * Ignore all other box types.
72 * NOTE: Buffer overflows on read are not checked.
74 uint16_t textLength = READ_U16();
75 uint8_t *text = READ_ARRAY(textLength);
76 startStyle = calloc( textLength, 1 );
77 endStyle = calloc( textLength, 1 );
80 * Read TextSampleModifierBox
82 uint32_t size = READ_U32();
84 size = pos - end; // extends to end of packet
87 hb_log( "dectx3gsub: TextSampleModifierBox has unsupported large size" );
90 uint32_t type = READ_U32();
91 if ( type == FOURCC("uuid") ) {
92 hb_log( "dectx3gsub: TextSampleModifierBox has unsupported extended type" );
96 if ( type == FOURCC("styl") ) {
97 // Found a StyleBox. Parse the contained StyleRecords
99 if ( numStyleRecords != 0 ) {
100 hb_log( "dectx3gsub: found additional StyleBoxes on subtitle; skipping" );
105 numStyleRecords = READ_U16();
108 for (i=0; i<numStyleRecords; i++) {
109 StyleRecord curRecord;
110 curRecord.startChar = READ_U16();
111 curRecord.endChar = READ_U16();
112 curRecord.fontID = READ_U16();
113 curRecord.faceStyleFlags = READ_U8();
114 curRecord.fontSize = READ_U8();
115 curRecord.textColorRGBA = READ_U32();
117 startStyle[curRecord.startChar] |= curRecord.faceStyleFlags;
118 endStyle[curRecord.endChar] |= curRecord.faceStyleFlags;
121 // Found some other kind of TextSampleModifierBox. Skip it.
127 * Copy text to output buffer, and add HTML markup for the style records
129 int maxOutputSize = textLength + (numStyleRecords * NUM_FACE_STYLE_FLAGS * (MAX_OPEN_TAG_SIZE + MAX_CLOSE_TAG_SIZE));
130 hb_buffer_t *out = hb_buffer_init( maxOutputSize );
133 uint8_t *dst = out->data;
135 for ( pos = text, end = text + textLength; pos < end; pos++ ) {
136 if (IS_10xxxxxx(*pos)) {
137 // Is a non-first byte of a multi-byte UTF-8 character
139 continue; // ...without incrementing 'charIndex'
142 uint8_t plusStyles = startStyle[charIndex];
143 uint8_t minusStyles = endStyle[charIndex];
145 if (minusStyles & UNDERLINE)
147 if (minusStyles & ITALIC)
149 if (minusStyles & BOLD)
152 if (plusStyles & BOLD)
153 WRITE_START_TAG('b');
154 if (plusStyles & ITALIC)
155 WRITE_START_TAG('i');
156 if (plusStyles & UNDERLINE)
157 WRITE_START_TAG('u');
163 // Trim output buffer to the actual amount of data written
164 out->size = dst - out->data;
166 // Copy metadata from the input packet to the output packet
167 out->start = in->start;
168 out->stop = in->stop;
184 #undef WRITE_START_TAG
187 static int dectx3gInit( hb_work_object_t * w, hb_job_t * job )
192 static int dectx3gWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
193 hb_buffer_t ** buf_out )
195 hb_buffer_t * in = *buf_in;
196 hb_buffer_t * out = NULL;
198 // Warn if the subtitle's duration has not been passed through by the demuxer,
199 // which will prevent the subtitle from displaying at all
200 if ( in->stop == 0 ) {
201 hb_log( "dectx3gsub: subtitle packet lacks duration" );
204 if ( in->size > 0 ) {
205 out = tx3g_decode_to_utf8(in);
207 out = hb_buffer_init( 0 );
211 // We shouldn't be storing the extra NULL character,
212 // but the MP4 muxer expects this, unfortunately.
213 if ( out->size > 0 && out->data[out->size - 1] != '\0' ) {
214 // NOTE: out->size remains unchanged
215 hb_buffer_realloc( out, out->size + 1 );
216 out->data[out->size] = '\0';
219 // If the input packet was non-empty, do not pass through
220 // an empty output packet (even if the subtitle was empty),
221 // as this would be interpreted as an end-of-stream
222 if ( in->size > 0 && out->size == 0 ) {
223 hb_buffer_close(&out);
227 // Dispose the input packet, as it is no longer needed
228 hb_buffer_close(&in);
235 static void dectx3gClose( hb_work_object_t * w )
240 hb_work_object_t hb_dectx3gsub =
243 "TX3G Subtitle Decoder",