OSDN Git Service

Remove the set cpu count option as it doesn't do anything now
[handbrake-jp/handbrake-jp-git.git] / libhb / decsrtsub.c
1 /* 
2    This file is part of the HandBrake source code.
3    Homepage: <http://handbrake.fr/>.
4    It may be used under the terms of the GNU General Public License. */
5
6 #include <stdlib.h>
7 #include <stdio.h>
8 #include <string.h>
9 #include <iconv.h>
10 #include <errno.h>
11 #include "hb.h"
12
13 struct start_and_end {
14     unsigned long start, end;
15 };
16
17 enum
18 {
19     k_state_inEntry,
20     k_state_inEntry_or_new,
21     k_state_potential_new_entry,
22     k_state_timecode,
23 };
24
25 typedef struct srt_entry_s {
26     long offset, duration;
27     long start, stop;
28     char text[1024];
29     int  pos;
30 } srt_entry_t;
31
32 /*
33  * Store all context in the work private struct,
34  */
35 struct hb_work_private_s
36 {
37     hb_job_t * job;
38     FILE     * file;
39     char       buf[1024];
40     int        pos;
41     int        end;
42     char       utf8_buf[2048];
43     int        utf8_pos;
44     int        utf8_end;
45     int        utf8_bom_skipped;
46     unsigned long current_time;
47     unsigned long number_of_entries;
48     unsigned long last_entry_number;
49     unsigned long current_state;
50     srt_entry_t current_entry;
51     iconv_t *iconv_context;
52     hb_subtitle_t *subtitle;
53     uint64_t start_time;              // In HB time
54     uint64_t stop_time;               // In HB time
55 };
56
57 static int 
58 read_time_from_string( const char* timeString, struct start_and_end *result )
59 {
60     // for ex. 00:00:15,248 --> 00:00:16,545
61     
62     long houres1, minutes1, seconds1, milliseconds1,
63          houres2, minutes2, seconds2, milliseconds2;
64     int scanned;
65     
66     scanned = sscanf(timeString, "%ld:%ld:%ld,%ld --> %ld:%ld:%ld,%ld\n",
67                     &houres1, &minutes1, &seconds1, &milliseconds1,
68                     &houres2, &minutes2, &seconds2, &milliseconds2);
69     if (scanned != 8)
70     {
71         return 0;
72     }
73     result->start =
74         milliseconds1 + seconds1*1000 + minutes1*60*1000 + houres1*60*60*1000;
75     result->end =
76         milliseconds2 + seconds2*1000 + minutes2*60*1000 + houres2*60*60*1000;
77     return 1;
78 }
79
80 static int utf8_fill( hb_work_private_t * pv )
81 {
82     int bytes, conversion = 0;
83     size_t out_size;
84
85     /* Align utf8 data to beginning of the buffer so that we can
86      * fill the buffer to its maximum */
87     memmove( pv->utf8_buf, pv->utf8_buf + pv->utf8_pos, pv->utf8_end - pv->utf8_pos );
88     pv->utf8_end -= pv->utf8_pos;
89     pv->utf8_pos = 0;
90     out_size = 2048 - pv->utf8_end;
91     while( out_size )
92     {
93         char *p, *q;
94         size_t in_size, retval;
95
96         if( pv->end == pv->pos )
97         {
98             bytes = fread( pv->buf, 1, 1024, pv->file );
99             pv->pos = 0;
100             pv->end = bytes;
101             if( bytes == 0 )
102             {
103                 if( conversion )
104                     return 1;
105                 else
106                     return 0;
107             }
108         }
109
110         p = pv->buf + pv->pos;
111         q = pv->utf8_buf + pv->utf8_end;
112         in_size = pv->end - pv->pos;
113
114         retval = iconv( pv->iconv_context, &p, &in_size, &q, &out_size);
115         if( q != pv->utf8_buf + pv->utf8_pos )
116             conversion = 1;
117
118         pv->utf8_end = q - pv->utf8_buf;
119         pv->pos = p - pv->buf;
120
121         if ( !pv->utf8_bom_skipped )
122         {
123             uint8_t *buf = (uint8_t*)pv->utf8_buf;
124             if (buf[0] == 0xef && buf[1] == 0xbb && buf[2] == 0xbf)
125             {
126                 pv->utf8_pos = 3;
127             }
128             pv->utf8_bom_skipped = 1;
129         }
130
131         if( ( retval == -1 ) && ( errno == EINVAL ) )
132         {
133             /* Incomplete multibyte sequence, read more data */
134             memmove( pv->buf, p, pv->end - pv->pos );
135             pv->end -= pv->pos;
136             pv->pos = 0;
137             bytes = fread( pv->buf + pv->end, 1, 1024 - pv->end, pv->file );
138             if( bytes == 0 )
139             {
140                 if( !conversion )
141                     return 0;
142                 else
143                     return 1;
144             }
145             pv->end += bytes;
146         } else if ( ( retval == -1 ) && ( errno == EILSEQ ) )
147         {
148             hb_error( "Invalid byte for codeset in input, discard byte" );
149             /* Try the next byte of the input */
150             pv->pos++;
151         } else if ( ( retval == -1 ) && ( errno == E2BIG ) )
152         {
153             /* buffer full */
154             return conversion;
155         }
156     }
157     return 1;
158 }
159
160 static int get_line( hb_work_private_t * pv, char *buf, int size )
161 {
162     int i;
163     char c;
164
165     /* Find newline in converted UTF-8 buffer */
166     for( i = 0; i < size - 1; i++ )
167     {
168         if( pv->utf8_pos >= pv->utf8_end )
169         {
170             if( !utf8_fill( pv ) )
171             {
172                 if( i )
173                     return 1;
174                 else
175                     return 0;
176             }
177         }
178         c = pv->utf8_buf[pv->utf8_pos++];
179         if( c == '\n' )
180         {
181             buf[i] = '\n';
182             buf[i+1] = '\0';
183             return 1;
184         }
185         buf[i] = c;
186     }
187     buf[0] = '\0';
188     return 1;
189 }
190
191 /*
192  * Read the SRT file and put the entries into the subtitle fifo for all to read
193  */
194 static hb_buffer_t *srt_read( hb_work_private_t *pv )
195 {
196     char line_buffer[1024];
197     int reprocess = 0, resync = 0;
198
199     if( !pv->file )
200     {
201         return NULL;
202     }
203     
204     while( reprocess || get_line( pv, line_buffer, sizeof( line_buffer ) ) ) 
205     {
206         reprocess = 0;
207         switch (pv->current_state)
208         {
209         case k_state_timecode:
210         {
211             struct start_and_end timing;
212             int result;
213
214             result = read_time_from_string( line_buffer, &timing );
215             if (!result)
216             {
217                 resync = 1;
218                 pv->current_state = k_state_potential_new_entry;
219                 continue;
220             }
221             pv->current_entry.duration = timing.end - timing.start;
222             pv->current_entry.offset = timing.start - pv->current_time;
223             
224             pv->current_time = timing.end;
225
226             pv->current_entry.start = timing.start;
227             pv->current_entry.stop = timing.end;
228
229             pv->current_state = k_state_inEntry;
230             continue;
231         }
232
233         case k_state_inEntry_or_new:
234         {
235             char *endpoint;
236             long entry_number;
237             /*
238              * Is this really new next entry begin?
239              */
240             entry_number = strtol(line_buffer, &endpoint, 10);
241             if (endpoint == line_buffer ||
242                 (endpoint && *endpoint != '\n' && *endpoint != '\r'))
243             {
244                 /*
245                  * Doesn't resemble an entry number
246                  * must still be in an entry
247                  */
248                 if (!resync)
249                 {
250                     reprocess = 1;
251                     pv->current_state = k_state_inEntry;
252                 }
253                 continue;
254             }
255             reprocess = 1;
256             pv->current_state = k_state_potential_new_entry;
257             break;
258         }
259
260         case k_state_inEntry:
261         {
262             char *q;
263             int  size, len;
264
265             // If the current line is empty, we assume this is the
266             //  seperation betwene two entries. In case we are wrong,
267             //  the mistake is corrected in the next state.
268             if (strcmp(line_buffer, "\n") == 0 || strcmp(line_buffer, "\r\n") == 0) {
269                 pv->current_state = k_state_potential_new_entry;
270                 continue;
271             }
272             
273             q = pv->current_entry.text + pv->current_entry.pos;
274             len = strlen( line_buffer );
275             size = MIN(1024 - pv->current_entry.pos - 1, len );
276             memcpy(q, line_buffer, size);
277             pv->current_entry.pos += size;
278             pv->current_entry.text[pv->current_entry.pos] = '\0';
279             break;
280         }
281
282         case k_state_potential_new_entry:
283         {
284             char *endpoint;
285             long entry_number;
286             hb_buffer_t *buffer = NULL;
287             /*
288              * Is this really new next entry begin?
289              */
290             entry_number = strtol(line_buffer, &endpoint, 10);
291             if (!resync && (*line_buffer == '\n' || *line_buffer == '\r'))
292             {
293                 /*
294                  * Well.. looks like we are in the wrong mode.. lets add the
295                  * newline we misinterpreted...
296                  */
297                 strncat(pv->current_entry.text, " ", 1024);
298                 pv->current_state = k_state_inEntry_or_new;
299                 continue;
300             }
301             if (endpoint == line_buffer ||
302                 (endpoint && *endpoint != '\n' && *endpoint != '\r'))
303             {
304                 /*
305                  * Well.. looks like we are in the wrong mode.. lets add the
306                  * line we misinterpreted...
307                  */
308                 if (!resync)
309                 {
310                     reprocess = 1;
311                     pv->current_state = k_state_inEntry;
312                 }
313                 continue;
314             }
315             /*
316              * We found the next entry - or a really rare error condition
317              */
318             pv->last_entry_number = entry_number;
319             resync = 0;
320             if( *pv->current_entry.text )
321             {
322                 long length;
323                 char *p, *q;
324                 int  line = 1;
325                 uint64_t start_time = ( pv->current_entry.start + 
326                                         pv->subtitle->config.offset ) * 90;
327                 uint64_t stop_time = ( pv->current_entry.stop + 
328                                        pv->subtitle->config.offset ) * 90;
329
330                 if( !( start_time > pv->start_time && stop_time < pv->stop_time ) )
331                 {
332                     hb_deep_log( 3, "Discarding SRT at time start %"PRId64", stop %"PRId64, start_time, stop_time);
333                     memset( &pv->current_entry, 0, sizeof( srt_entry_t ) );
334                     ++(pv->number_of_entries);
335                     pv->current_state = k_state_timecode;
336                     continue;
337                 }
338
339                 length = strlen( pv->current_entry.text );
340
341                 for( q = p = pv->current_entry.text; *p; p++)
342                 {
343                     if( *p == '\n' )
344                     {
345                         if ( line == 1 )
346                         {
347                             *q = *p;
348                             line = 2;
349                         }
350                         else
351                         {
352                             *q = ' ';
353                         }
354                         q++;
355                     }
356                     else if( *p != '\r' )
357                     {
358                         *q = *p;
359                         q++;
360                     }
361                     else
362                     {
363                         length--;
364                     }
365                 }
366                 *q = '\0';
367
368                 buffer = hb_buffer_init( length + 1 );
369
370                 if( buffer )
371                 {
372                     buffer->start = start_time - pv->start_time;
373                     buffer->stop = stop_time - pv->start_time;
374
375                     memcpy( buffer->data, pv->current_entry.text, length + 1 );
376                 }
377             }
378             memset( &pv->current_entry, 0, sizeof( srt_entry_t ) );
379             ++(pv->number_of_entries);
380             pv->current_state = k_state_timecode;
381             if( buffer )
382             {
383                 return buffer;
384             }
385             continue;
386         } 
387         }
388     }
389
390     hb_buffer_t *buffer = NULL;
391     if( *pv->current_entry.text )
392     {
393         long length;
394         char *p, *q;
395         int  line = 1;
396         uint64_t start_time = ( pv->current_entry.start + 
397                                 pv->subtitle->config.offset ) * 90;
398         uint64_t stop_time = ( pv->current_entry.stop + 
399                                pv->subtitle->config.offset ) * 90;
400
401         if( !( start_time > pv->start_time && stop_time < pv->stop_time ) )
402         {
403             hb_deep_log( 3, "Discarding SRT at time start %"PRId64", stop %"PRId64, start_time, stop_time);
404             memset( &pv->current_entry, 0, sizeof( srt_entry_t ) );
405             return NULL;
406         }
407
408         length = strlen( pv->current_entry.text );
409
410         for( q = p = pv->current_entry.text; *p; p++)
411         {
412             if( *p == '\n' )
413             {
414                 if ( line == 1 )
415                 {
416                     *q = *p;
417                     line = 2;
418                 }
419                 else
420                 {
421                     *q = ' ';
422                 }
423                 q++;
424             }
425             else if( *p != '\r' )
426             {
427                 *q = *p;
428                 q++;
429             }
430             else
431             {
432                 length--;
433             }
434         }
435         *q = '\0';
436
437         buffer = hb_buffer_init( length + 1 );
438
439         if( buffer )
440         {
441             buffer->start = start_time - pv->start_time;
442             buffer->stop = stop_time - pv->start_time;
443
444             memcpy( buffer->data, pv->current_entry.text, length + 1 );
445         }
446     }
447     memset( &pv->current_entry, 0, sizeof( srt_entry_t ) );
448     if( buffer )
449     {
450         return buffer;
451     }
452     
453     return NULL;
454 }
455
456 static int decsrtInit( hb_work_object_t * w, hb_job_t * job )
457 {
458     int retval = 1;
459     hb_work_private_t * pv;
460     hb_buffer_t *buffer;
461     int i;
462     hb_chapter_t * chapter;
463     hb_title_t *title = job->title;
464
465     pv = calloc( 1, sizeof( hb_work_private_t ) );
466     if( pv )
467     {
468         w->private_data = pv;
469
470         pv->job = job;
471
472         buffer = hb_buffer_init( 0 );
473         hb_fifo_push( w->fifo_in, buffer);
474         
475         pv->current_state = k_state_potential_new_entry;
476         pv->number_of_entries = 0;
477         pv->last_entry_number = 0;
478         pv->current_time = 0;
479         pv->subtitle = w->subtitle;
480
481         /*
482          * Figure out the start and stop times from teh chapters being
483          * encoded - drop subtitle not in this range.
484          */
485         pv->start_time = 0;
486         for( i = 1; i < job->chapter_start; ++i )
487         {
488             chapter = hb_list_item( title->list_chapter, i - 1 );
489             if( chapter )
490             {
491                 pv->start_time += chapter->duration;
492             } else {
493                 hb_error( "Could not locate chapter %d for SRT start time", i );
494                 retval = 0;
495             }
496         }
497         pv->stop_time = pv->start_time;
498         for( i = job->chapter_start; i <= job->chapter_end; ++i )
499         {
500             chapter = hb_list_item( title->list_chapter, i - 1 );
501             if( chapter )
502             {
503                 pv->stop_time += chapter->duration;
504             } else {
505                 hb_error( "Could not locate chapter %d for SRT start time", i );
506                 retval = 0;
507             }
508         }
509
510         hb_deep_log( 3, "SRT Start time %"PRId64", stop time %"PRId64, pv->start_time, pv->stop_time);
511
512         pv->iconv_context = iconv_open( "utf-8", pv->subtitle->config.src_codeset );
513
514
515         if( pv->iconv_context == (iconv_t) -1 )
516         {
517             hb_error("Could not open the iconv library with those file formats\n");
518
519         } else {
520             memset( &pv->current_entry, 0, sizeof( srt_entry_t ) );
521             
522             pv->file = fopen( w->subtitle->config.src_filename, "r" );
523             
524             if( !pv->file )
525             {
526                 hb_error("Could not open the SRT subtitle file '%s'\n", 
527                          w->subtitle->config.src_filename);
528             } else {
529                 retval = 0;
530             }
531         }
532     } 
533
534     return retval;
535 }
536
537 static int decsrtWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
538                        hb_buffer_t ** buf_out )
539 {
540     hb_work_private_t * pv = w->private_data;
541     hb_buffer_t * in = *buf_in;
542     hb_buffer_t * out = NULL;
543
544     out = srt_read( pv );
545
546     if( out )
547     {
548         /*
549          * Keep a buffer in our input fifo so that we get run.
550          */
551         hb_fifo_push( w->fifo_in, in);
552         *buf_in = NULL;
553         *buf_out = out;
554     } else {
555         *buf_out = NULL;
556         return HB_WORK_OK;
557     }
558
559     return HB_WORK_OK;  
560 }
561
562 static void decsrtClose( hb_work_object_t * w )
563 {
564     hb_work_private_t * pv = w->private_data;
565     fclose( pv->file );
566     iconv_close(pv->iconv_context);
567     free( w->private_data );
568 }
569
570 hb_work_object_t hb_decsrtsub =
571 {
572     WORK_DECSRTSUB,
573     "SRT Subtitle Decoder",
574     decsrtInit,
575     decsrtWork,
576     decsrtClose
577 };