11 char buffer[MAX_LINE+1] ;
14 void die( char *message )
17 fprintf(stderr, "%s: %s\n", prog_name, message) ;
21 int main(int argc, char* argv[])
26 die("pure filter, takes no arguments") ;
28 while( fgets(buffer, MAX_LINE, stdin))
29 errors += do_line(buffer) ;
30 exit(errors ? 1 : 0 ) ;
33 int do_line(char *data)
35 char *p, *q, *r, *end, *before, *after ;
36 // expecting two tab-separated fields
37 // point r to 2nd, null terminate 1st
38 for( r = data ; *r && *r != '\t' ; r++ )
44 for( q = r ; *q ; q++ )
49 // within 1st, parse as space-separated
50 // p will point to current word, q past its end
51 // before & after point to rest of text
52 // spaces converted to nulls & back as req'd
54 for( p = data ; p < end ; p = q + 1 ) {
60 for( q = p ; *q && *q != ' ' ; q++ )
69 print_line(before, p, after, r) ;
78 // print formatted line for permuted index
79 // two tab-separated fields
81 // 2nd is printable line
82 // pipe it through something like
83 // sort -F | awk -F '\t' '{print $2}'
84 // to get final output
86 print_line( char *before, char *word, char *after, char *tag)
90 printf("%s\t%s\t%s\t%s\n", before, word, after, tag) ;
97 // put in sortable field
98 // strip out with awk after sorting
99 printf("%s %s\t", word, after) ;
100 // shorten before string to fit field
101 for( ; x > 30 ; x-- )
103 printf("%30s", before) ;
104 // print keyword, html tagged
105 printf(" %s%s</a> ", tag, word) ;
106 // padding, outside tag
107 for( ; y < 18 ; y++ )
110 printf("%s", after) ;
114 // avoid indexing on common English words
117 "the", "of", "a", "an", "to", "and", "or", "if", "for", "at",
118 "am", "is", "are", "was", "were", "have", "has", "had", "be", "been",
119 "on", "some", "with", "any", "into", "as", "by", "in", "out",
120 "that", "then", "this", "that", "than", "these", "those",
121 "he", "his", "him", "she", "her", "hers", "it", "its",
122 "&", "", "+", "-", "=", "--", "<", ">", "<=", ">=",
123 "!", "?", "#", "$", "%", "/", "\\", "\"", "\'",
126 // interrogative words like "how" and "where" deliberately left out of
127 // above list because users might want to search for "how to..." etc.
129 // return 1 if word in list, else 0
130 // case-insensitive comparison
135 for( z = list ; *z != NULL ; z++ )
136 if( ! strcasecmp( p, *z ) )