OSDN Git Service

2013.10.24
[uclinux-h8/uClinux-dist.git] / freeswan / doc / utils / html2four.c
1 /*
2         extract headers from HTML files
3         in format suitable for turning into permuted index
4 */
5
6 #include <ctype.h>
7 #include <stdlib.h>
8 #include <stdio.h>
9 #include <string.h>
10
11 /*
12         maximum sizes for input line and for name in <a> tag
13 */
14 #define MAX_LINE  512
15 #define MAX_NAME   64
16
17 /*
18         functions
19         all return 0 for OK, 1 for errors
20 */
21 int do_file( char *, FILE * ) ;
22 int parse_line( char * ) ;
23 int print_line( char *, char *) ;
24 int print_header_problem( char * ) ;
25 int sanity() ;
26
27 void die( char * ) ;
28
29 char    *prog_name ;
30 int     max_level ;
31 char    *current_file ;
32
33 int main(int argc, char* argv[])
34 {
35         char *p ;
36         int temp, done, status ;
37         FILE *fp ;
38
39         prog_name = *argv ;
40         argc--,argv++ ;
41
42         max_level = 9 ; 
43         if(argc && *argv )      {
44                 p = *argv ;
45                 if( p[0] == '-' )       {
46                         if( isdigit(p[1]) && p[2] == '\0' )     {
47                                 max_level = p[1] - 0 ;
48                                 argc-- ;
49                                 argv++ ;
50                         }
51                         else die("unknown option") ;
52         }       }
53
54         status = done = 0 ;
55         if( argc == 0)  {
56                 if( (status = do_file("STDIN", stdin)) == 0 )
57                         done++ ;
58         }
59         else    {
60 /*
61                 printf("ARGC = %d\n", argc ) ;
62 */
63                 while( argc-- ) {
64                         p = *argv++ ;
65 /*
66                         printf("ARGV P %s %s\n", *argv, p) ;
67 */
68                         if( p == NULL ) {
69                                 fprintf(stderr, "%s: null filename pointer\n", prog_name) ;
70                                 status++ ;
71                         } 
72                         else if( (fp = fopen(p,"r")) == NULL )  {
73                                 fprintf(stderr, "%s: cannot open file %s\n", prog_name, p) ;
74                                 status++ ;
75                         }
76                         else    {
77                                 if( (temp = do_file(p, fp)) != 0 )
78                                         status++ ;
79                                 done++ ;
80                                 fclose(fp) ;
81                         }
82                         fflush(stderr) ;
83                         fflush(stdout) ;
84                 }
85         }
86 /*
87         printf("%s: %d files processed, %d with errors\n", prog_name, done, status) ;
88 */
89         return( status ? 1 : 0 ) ;
90 }
91
92 void die( char *message )
93 {
94         fflush(stdout) ;
95         fprintf(stderr, "%s: %s\n", prog_name, message) ;
96         exit(1) ;
97 }
98
99 int header_flags[10] ;
100 int in_header ;
101
102 char buffer[MAX_LINE+1] ;
103 char label[MAX_NAME+1] ;
104
105 int do_file( char *file, FILE *fp )
106 {
107         int i, status, x, y ;
108         char *base, *p ;
109
110         status = 0 ;
111         in_header = 0 ;
112         label[0] = '\0' ;
113         for( i = 0 ; i < 10 ; i++ )
114                 header_flags[i] = 0 ;
115         current_file = file ;
116
117         while( base = fgets(buffer, MAX_LINE, fp) )     {
118                 // count < and > characters in line
119                 for( x = y = 0, p = base ; *p ; p++ )
120                         switch( *p )    {
121                                 case '<':
122                                         x++ ;
123                                         break ;
124                                 case '>':
125                                         y++ ;
126                                         break ;
127                                 default:
128                                         break ;
129                         }
130                 // skip line if no < or >
131                 if( x == 0 && y == 0 )
132                         continue ;
133                 // report error for unequal count
134                 else if( x != y )       {
135                         if( strncmp( base, "<!--", 4) && strncmp(base, "-->", 3) )      {
136                                 fflush(stdout) ;
137                                 fprintf(stderr, "%s in file %s: unequal < > counts %d %d\n",
138                                         prog_name, file, x, y ) ;
139                                 fprintf(stderr, "%s: %s\n", prog_name, base) ;
140                                 fflush(stderr) ;
141                                 status = 1 ;
142                         }
143                         continue ;
144                 }
145                 // parse lines containing tags
146                 else
147                         if( parse_line(base) )
148                                 status = 1 ;
149                 // check that header labelling is sane
150                 for( i = x = y = 0 ; i < 10 ; i++ )     {
151                         // count non-zero entries
152                         if( x = header_flags[i] )
153                                 y++ ;
154                         // should be in 0 or 1 headers at a time
155                         if( x > 1 || x < 0 )
156                                 status = 1 ;
157                 }
158                 if( y > 1 )
159                         status = 1 ;
160         }
161         return status ;
162 }
163
164 int parse_line( char *data )
165 {
166         char *p, *q, *end ;
167         int x ;
168
169         // set end pointer
170         for( end = data ; *end ; end++ )
171                 ;
172         // trim off trailing returns or newlines
173         for( p = end - 1, q = end ; q > data ; p--,q-- )        {
174                 switch( *p )    {
175                         case '\012':
176                         case '\015':
177                                 *p = '\0' ;
178                                 continue ;
179                         default:
180                                 break ; // out of switch()
181                 }
182                 break ; // out of for()
183         }
184         end = q ;
185         p = data ;
186         while( p < end )        {
187                 // find tag delimiters
188                 if( *p == '<')  {
189                         for( q = p + 1 ; *q ; q++ )
190                                 if( *q == '<' || *q == '>' )
191                                         break ;
192                         // if we find another '<'
193                         // restart tag search from it
194                         if( *q == '<' ) {
195                                 p = q ;
196                                 continue ;
197                         }
198                         // "<>" is not interesting
199                         if( q == p + 1 )        {
200                                 fflush(stdout) ;
201                                 fprintf(stderr, "%s: null tag\n", prog_name) ;
202                                 fprintf(stderr, "%s: line\n", prog_name, data) ;
203                                 fflush(stderr) ;
204                                 p = q + 1 ;
205                                 continue ;
206                         }
207                         // ignore delimiters once found
208                         *q = '\0' ;
209                         p++ ;
210                         // p points to tag contents, null terminated
211                         switch( *p )    {
212                         // save contents of <a name= > tags
213                         case 'a' :
214                         case 'A' :
215                                 if(      p[1] == ' ' &&
216                                         (p[2] == 'n' || p[2] == 'N') &&
217                                         (p[3] == 'a' || p[3] == 'A') &&
218                                         (p[4] == 'm' || p[4] == 'M') &&
219                                         (p[5] == 'e' || p[5] == 'E') &&
220                                          p[6] == '=' )
221                                 strncpy(label, p + 7, MAX_NAME) ;
222                                 break ;
223                         case 'b' :
224                         case 'B' :
225                                 if(     in_header && strlen(p) == 2 &&
226                                         (p[1] == 'r' || p[1] == 'R') )
227                                         putchar(' ') ;
228                                 break ;
229                         // header tags
230                         case 'h' :
231                         case 'H' :
232                                 if( strlen(p) == 2 && isdigit(p[1]) )   {
233                                         if( in_header )
234                                                 fprintf(stderr, "%s: bad header nesting in %s\n",
235                                                         prog_name, current_file) ; 
236                                         x = p[1] - '0' ;
237                                         in_header = 1 ;
238                                         header_flags[x]++ ;
239                                         printf("%s\t%s\tH%d\t", current_file, label, x) ;
240                                 }
241                                 break ;
242                         // only care about end-of-header
243                         case '/':
244                                 p++ ;
245                                 switch( *p )    {
246                                 case 'h' :
247                                 case 'H' :
248                                         if( strlen(p) == 2 && isdigit(p[1]) )   {
249                                                 if( ! in_header )
250                                                         fprintf(stderr, "%s: bad header nesting in %s\n",
251                                                                 prog_name, current_file) ; 
252                                                 x = p[1] - '0' ;
253                                                 in_header = 0 ;
254                                                 header_flags[x]-- ;
255                                                 printf("\n") ;
256                                         }
257                                         break ;
258                                 }
259                                 break ;
260                         // uninteresting tag, look for next
261                         default :
262                                 break ;
263                         }
264                 // tag done, point p beyond it
265                 p = q + 1 ;
266                 }
267                 else if( in_header )    {
268                         if( isprint(*p) && *p != '\n' )
269                                 putchar(*p) ;
270                         else
271                                 putchar(' ');
272                         p++ ;
273                 }
274                 else
275                         p++ ;
276         }
277         return(0) ;
278 }
279
280 int print_line( char *tag, char *text)
281 {
282         printf("%%s\ts\t%s\t%s\t\n", current_file, label, tag, text) ;
283         return 0 ;
284 }
285
286 int print_header_problem( char *file )
287 {
288         int i ;
289         fflush(stdout) ;
290         fprintf(stderr, "%s: HEADER TAG PROBLEM in file %s\n", prog_name, file) ;
291         fprintf(stderr, "%s: counts", prog_name) ;
292         for ( i = 0 ; i < 10 ; i++ )
293                 fprintf(stderr, "\t%d", i) ;
294         fprintf(stderr,"\n") ;
295         fflush(stderr) ;
296         return(0) ;
297 }
298