OSDN Git Service

2013.10.24
[uclinux-h8/uClinux-dist.git] / lib / flex / dfa.c
1 /* dfa - DFA construction routines */
2
3 /*-
4  * Copyright (c) 1990 The Regents of the University of California.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Vern Paxson.
9  * 
10  * The United States Government has rights in this work pursuant
11  * to contract no. DE-AC03-76SF00098 between the United States
12  * Department of Energy and the University of California.
13  *
14  * Redistribution and use in source and binary forms with or without
15  * modification are permitted provided that: (1) source distributions retain
16  * this entire copyright notice and comment, and (2) distributions including
17  * binaries display the following acknowledgement:  ``This product includes
18  * software developed by the University of California, Berkeley and its
19  * contributors'' in the documentation or other materials provided with the
20  * distribution and in all advertising materials mentioning features or use
21  * of this software.  Neither the name of the University nor the names of
22  * its contributors may be used to endorse or promote products derived from
23  * this software without specific prior written permission.
24  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
25  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
26  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
27  */
28
29 /* $Header: /home/daffy/u0/vern/flex/RCS/dfa.c,v 2.26 95/04/20 13:53:14 vern Exp $ */
30
31 #include "flexdef.h"
32
33
34 /* declare functions that have forward references */
35
36 void dump_associated_rules PROTO((FILE*, int));
37 void dump_transitions PROTO((FILE*, int[]));
38 void sympartition PROTO((int[], int, int[], int[]));
39 int symfollowset PROTO((int[], int, int, int[]));
40
41
42 /* check_for_backing_up - check a DFA state for backing up
43  *
44  * synopsis
45  *     void check_for_backing_up( int ds, int state[numecs] );
46  *
47  * ds is the number of the state to check and state[] is its out-transitions,
48  * indexed by equivalence class.
49  */
50
51 void check_for_backing_up( ds, state )
52 int ds;
53 int state[];
54         {
55         if ( (reject && ! dfaacc[ds].dfaacc_set) ||
56              (! reject && ! dfaacc[ds].dfaacc_state) )
57                 { /* state is non-accepting */
58                 ++num_backing_up;
59
60                 if ( backing_up_report )
61                         {
62                         fprintf( backing_up_file,
63                                 _( "State #%d is non-accepting -\n" ), ds );
64
65                         /* identify the state */
66                         dump_associated_rules( backing_up_file, ds );
67
68                         /* Now identify it further using the out- and
69                          * jam-transitions.
70                          */
71                         dump_transitions( backing_up_file, state );
72
73                         putc( '\n', backing_up_file );
74                         }
75                 }
76         }
77
78
79 /* check_trailing_context - check to see if NFA state set constitutes
80  *                          "dangerous" trailing context
81  *
82  * synopsis
83  *    void check_trailing_context( int nfa_states[num_states+1], int num_states,
84  *                              int accset[nacc+1], int nacc );
85  *
86  * NOTES
87  *  Trailing context is "dangerous" if both the head and the trailing
88  *  part are of variable size \and/ there's a DFA state which contains
89  *  both an accepting state for the head part of the rule and NFA states
90  *  which occur after the beginning of the trailing context.
91  *
92  *  When such a rule is matched, it's impossible to tell if having been
93  *  in the DFA state indicates the beginning of the trailing context or
94  *  further-along scanning of the pattern.  In these cases, a warning
95  *  message is issued.
96  *
97  *    nfa_states[1 .. num_states] is the list of NFA states in the DFA.
98  *    accset[1 .. nacc] is the list of accepting numbers for the DFA state.
99  */
100
101 void check_trailing_context( nfa_states, num_states, accset, nacc )
102 int *nfa_states, num_states;
103 int *accset;
104 int nacc;
105         {
106         register int i, j;
107
108         for ( i = 1; i <= num_states; ++i )
109                 {
110                 int ns = nfa_states[i];
111                 register int type = state_type[ns];
112                 register int ar = assoc_rule[ns];
113
114                 if ( type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE )
115                         { /* do nothing */
116                         }
117
118                 else if ( type == STATE_TRAILING_CONTEXT )
119                         {
120                         /* Potential trouble.  Scan set of accepting numbers
121                          * for the one marking the end of the "head".  We
122                          * assume that this looping will be fairly cheap
123                          * since it's rare that an accepting number set
124                          * is large.
125                          */
126                         for ( j = 1; j <= nacc; ++j )
127                                 if ( accset[j] & YY_TRAILING_HEAD_MASK )
128                                         {
129                                         line_warning(
130                                         _( "dangerous trailing context" ),
131                                                 rule_linenum[ar] );
132                                         return;
133                                         }
134                         }
135                 }
136         }
137
138
139 /* dump_associated_rules - list the rules associated with a DFA state
140  *
141  * Goes through the set of NFA states associated with the DFA and
142  * extracts the first MAX_ASSOC_RULES unique rules, sorts them,
143  * and writes a report to the given file.
144  */
145
146 void dump_associated_rules( file, ds )
147 FILE *file;
148 int ds;
149         {
150         register int i, j;
151         register int num_associated_rules = 0;
152         int rule_set[MAX_ASSOC_RULES + 1];
153         int *dset = dss[ds];
154         int size = dfasiz[ds];
155
156         for ( i = 1; i <= size; ++i )
157                 {
158                 register int rule_num = rule_linenum[assoc_rule[dset[i]]];
159
160                 for ( j = 1; j <= num_associated_rules; ++j )
161                         if ( rule_num == rule_set[j] )
162                                 break;
163
164                 if ( j > num_associated_rules )
165                         { /* new rule */
166                         if ( num_associated_rules < MAX_ASSOC_RULES )
167                                 rule_set[++num_associated_rules] = rule_num;
168                         }
169                 }
170
171         bubble( rule_set, num_associated_rules );
172
173         fprintf( file, _( " associated rule line numbers:" ) );
174
175         for ( i = 1; i <= num_associated_rules; ++i )
176                 {
177                 if ( i % 8 == 1 )
178                         putc( '\n', file );
179
180                 fprintf( file, "\t%d", rule_set[i] );
181                 }
182
183         putc( '\n', file );
184         }
185
186
187 /* dump_transitions - list the transitions associated with a DFA state
188  *
189  * synopsis
190  *     dump_transitions( FILE *file, int state[numecs] );
191  *
192  * Goes through the set of out-transitions and lists them in human-readable
193  * form (i.e., not as equivalence classes); also lists jam transitions
194  * (i.e., all those which are not out-transitions, plus EOF).  The dump
195  * is done to the given file.
196  */
197
198 void dump_transitions( file, state )
199 FILE *file;
200 int state[];
201         {
202         register int i, ec;
203         int out_char_set[CSIZE];
204
205         for ( i = 0; i < csize; ++i )
206                 {
207                 ec = ABS( ecgroup[i] );
208                 out_char_set[i] = state[ec];
209                 }
210
211         fprintf( file, _( " out-transitions: " ) );
212
213         list_character_set( file, out_char_set );
214
215         /* now invert the members of the set to get the jam transitions */
216         for ( i = 0; i < csize; ++i )
217                 out_char_set[i] = ! out_char_set[i];
218
219         fprintf( file, _( "\n jam-transitions: EOF " ) );
220
221         list_character_set( file, out_char_set );
222
223         putc( '\n', file );
224         }
225
226
227 /* epsclosure - construct the epsilon closure of a set of ndfa states
228  *
229  * synopsis
230  *    int *epsclosure( int t[num_states], int *numstates_addr,
231  *                      int accset[num_rules+1], int *nacc_addr,
232  *                      int *hashval_addr );
233  *
234  * NOTES
235  *  The epsilon closure is the set of all states reachable by an arbitrary
236  *  number of epsilon transitions, which themselves do not have epsilon
237  *  transitions going out, unioned with the set of states which have non-null
238  *  accepting numbers.  t is an array of size numstates of nfa state numbers.
239  *  Upon return, t holds the epsilon closure and *numstates_addr is updated.
240  *  accset holds a list of the accepting numbers, and the size of accset is
241  *  given by *nacc_addr.  t may be subjected to reallocation if it is not
242  *  large enough to hold the epsilon closure.
243  *
244  *  hashval is the hash value for the dfa corresponding to the state set.
245  */
246
247 int *epsclosure( t, ns_addr, accset, nacc_addr, hv_addr )
248 int *t, *ns_addr, accset[], *nacc_addr, *hv_addr;
249         {
250         register int stkpos, ns, tsp;
251         int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum;
252         int stkend, nstate;
253         static int did_stk_init = false, *stk; 
254
255 #define MARK_STATE(state) \
256 trans1[state] = trans1[state] - MARKER_DIFFERENCE;
257
258 #define IS_MARKED(state) (trans1[state] < 0)
259
260 #define UNMARK_STATE(state) \
261 trans1[state] = trans1[state] + MARKER_DIFFERENCE;
262
263 #define CHECK_ACCEPT(state) \
264 { \
265 nfaccnum = accptnum[state]; \
266 if ( nfaccnum != NIL ) \
267 accset[++nacc] = nfaccnum; \
268 }
269
270 #define DO_REALLOCATION \
271 { \
272 current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \
273 ++num_reallocs; \
274 t = reallocate_integer_array( t, current_max_dfa_size ); \
275 stk = reallocate_integer_array( stk, current_max_dfa_size ); \
276 } \
277
278 #define PUT_ON_STACK(state) \
279 { \
280 if ( ++stkend >= current_max_dfa_size ) \
281 DO_REALLOCATION \
282 stk[stkend] = state; \
283 MARK_STATE(state) \
284 }
285
286 #define ADD_STATE(state) \
287 { \
288 if ( ++numstates >= current_max_dfa_size ) \
289 DO_REALLOCATION \
290 t[numstates] = state; \
291 hashval += state; \
292 }
293
294 #define STACK_STATE(state) \
295 { \
296 PUT_ON_STACK(state) \
297 CHECK_ACCEPT(state) \
298 if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \
299 ADD_STATE(state) \
300 }
301
302
303         if ( ! did_stk_init )
304                 {
305                 stk = allocate_integer_array( current_max_dfa_size );
306                 did_stk_init = true;
307                 }
308
309         nacc = stkend = hashval = 0;
310
311         for ( nstate = 1; nstate <= numstates; ++nstate )
312                 {
313                 ns = t[nstate];
314
315                 /* The state could be marked if we've already pushed it onto
316                  * the stack.
317                  */
318                 if ( ! IS_MARKED(ns) )
319                         {
320                         PUT_ON_STACK(ns)
321                         CHECK_ACCEPT(ns)
322                         hashval += ns;
323                         }
324                 }
325
326         for ( stkpos = 1; stkpos <= stkend; ++stkpos )
327                 {
328                 ns = stk[stkpos];
329                 transsym = transchar[ns];
330
331                 if ( transsym == SYM_EPSILON )
332                         {
333                         tsp = trans1[ns] + MARKER_DIFFERENCE;
334
335                         if ( tsp != NO_TRANSITION )
336                                 {
337                                 if ( ! IS_MARKED(tsp) )
338                                         STACK_STATE(tsp)
339
340                                 tsp = trans2[ns];
341
342                                 if ( tsp != NO_TRANSITION && ! IS_MARKED(tsp) )
343                                         STACK_STATE(tsp)
344                                 }
345                         }
346                 }
347
348         /* Clear out "visit" markers. */
349
350         for ( stkpos = 1; stkpos <= stkend; ++stkpos )
351                 {
352                 if ( IS_MARKED(stk[stkpos]) )
353                         UNMARK_STATE(stk[stkpos])
354                 else
355                         flexfatal(
356                         _( "consistency check failed in epsclosure()" ) );
357                 }
358
359         *ns_addr = numstates;
360         *hv_addr = hashval;
361         *nacc_addr = nacc;
362
363         return t;
364         }
365
366
367 /* increase_max_dfas - increase the maximum number of DFAs */
368
369 void increase_max_dfas()
370         {
371         current_max_dfas += MAX_DFAS_INCREMENT;
372
373         ++num_reallocs;
374
375         base = reallocate_integer_array( base, current_max_dfas );
376         def = reallocate_integer_array( def, current_max_dfas );
377         dfasiz = reallocate_integer_array( dfasiz, current_max_dfas );
378         accsiz = reallocate_integer_array( accsiz, current_max_dfas );
379         dhash = reallocate_integer_array( dhash, current_max_dfas );
380         dss = reallocate_int_ptr_array( dss, current_max_dfas );
381         dfaacc = reallocate_dfaacc_union( dfaacc, current_max_dfas );
382
383         if ( nultrans )
384                 nultrans =
385                         reallocate_integer_array( nultrans, current_max_dfas );
386         }
387
388
389 /* ntod - convert an ndfa to a dfa
390  *
391  * Creates the dfa corresponding to the ndfa we've constructed.  The
392  * dfa starts out in state #1.
393  */
394
395 void ntod()
396         {
397         int *accset, ds, nacc, newds;
398         int sym, hashval, numstates, dsize;
399         int num_full_table_rows;        /* used only for -f */
400         int *nset, *dset;
401         int targptr, totaltrans, i, comstate, comfreq, targ;
402         int symlist[CSIZE + 1];
403         int num_start_states;
404         int todo_head, todo_next;
405
406         /* Note that the following are indexed by *equivalence classes*
407          * and not by characters.  Since equivalence classes are indexed
408          * beginning with 1, even if the scanner accepts NUL's, this
409          * means that (since every character is potentially in its own
410          * equivalence class) these arrays must have room for indices
411          * from 1 to CSIZE, so their size must be CSIZE + 1.
412          */
413         int duplist[CSIZE + 1], state[CSIZE + 1];
414         int targfreq[CSIZE + 1], targstate[CSIZE + 1];
415
416         accset = allocate_integer_array( num_rules + 1 );
417         nset = allocate_integer_array( current_max_dfa_size );
418
419         /* The "todo" queue is represented by the head, which is the DFA
420          * state currently being processed, and the "next", which is the
421          * next DFA state number available (not in use).  We depend on the
422          * fact that snstods() returns DFA's \in increasing order/, and thus
423          * need only know the bounds of the dfas to be processed.
424          */
425         todo_head = todo_next = 0;
426
427         for ( i = 0; i <= csize; ++i )
428                 {
429                 duplist[i] = NIL;
430                 symlist[i] = false;
431                 }
432
433         for ( i = 0; i <= num_rules; ++i )
434                 accset[i] = NIL;
435
436         if ( trace )
437                 {
438                 dumpnfa( scset[1] );
439                 fputs( _( "\n\nDFA Dump:\n\n" ), stderr );
440                 }
441
442         inittbl();
443
444         /* Check to see whether we should build a separate table for
445          * transitions on NUL characters.  We don't do this for full-speed
446          * (-F) scanners, since for them we don't have a simple state
447          * number lying around with which to index the table.  We also
448          * don't bother doing it for scanners unless (1) NUL is in its own
449          * equivalence class (indicated by a positive value of
450          * ecgroup[NUL]), (2) NUL's equivalence class is the last
451          * equivalence class, and (3) the number of equivalence classes is
452          * the same as the number of characters.  This latter case comes
453          * about when useecs is false or when it's true but every character
454          * still manages to land in its own class (unlikely, but it's
455          * cheap to check for).  If all these things are true then the
456          * character code needed to represent NUL's equivalence class for
457          * indexing the tables is going to take one more bit than the
458          * number of characters, and therefore we won't be assured of
459          * being able to fit it into a YY_CHAR variable.  This rules out
460          * storing the transitions in a compressed table, since the code
461          * for interpreting them uses a YY_CHAR variable (perhaps it
462          * should just use an integer, though; this is worth pondering ...
463          * ###).
464          *
465          * Finally, for full tables, we want the number of entries in the
466          * table to be a power of two so the array references go fast (it
467          * will just take a shift to compute the major index).  If
468          * encoding NUL's transitions in the table will spoil this, we
469          * give it its own table (note that this will be the case if we're
470          * not using equivalence classes).
471          */
472
473         /* Note that the test for ecgroup[0] == numecs below accomplishes
474          * both (1) and (2) above
475          */
476         if ( ! fullspd && ecgroup[0] == numecs )
477                 {
478                 /* NUL is alone in its equivalence class, which is the
479                  * last one.
480                  */
481                 int use_NUL_table = (numecs == csize);
482
483                 if ( fulltbl && ! use_NUL_table )
484                         {
485                         /* We still may want to use the table if numecs
486                          * is a power of 2.
487                          */
488                         int power_of_two;
489
490                         for ( power_of_two = 1; power_of_two <= csize;
491                               power_of_two *= 2 )
492                                 if ( numecs == power_of_two )
493                                         {
494                                         use_NUL_table = true;
495                                         break;
496                                         }
497                         }
498
499                 if ( use_NUL_table )
500                         nultrans = allocate_integer_array( current_max_dfas );
501
502                 /* From now on, nultrans != nil indicates that we're
503                  * saving null transitions for later, separate encoding.
504                  */
505                 }
506
507
508         if ( fullspd )
509                 {
510                 for ( i = 0; i <= numecs; ++i )
511                         state[i] = 0;
512
513                 place_state( state, 0, 0 );
514                 dfaacc[0].dfaacc_state = 0;
515                 }
516
517         else if ( fulltbl )
518                 {
519                 if ( nultrans )
520                         /* We won't be including NUL's transitions in the
521                          * table, so build it for entries from 0 .. numecs - 1.
522                          */
523                         num_full_table_rows = numecs;
524
525                 else
526                         /* Take into account the fact that we'll be including
527                          * the NUL entries in the transition table.  Build it
528                          * from 0 .. numecs.
529                          */
530                         num_full_table_rows = numecs + 1;
531
532                 /* Unless -Ca, declare it "short" because it's a real
533                  * long-shot that that won't be large enough.
534                  */
535                 out_str_dec( "static yyconst %s yy_nxt[][%d] =\n    {\n",
536                         /* '}' so vi doesn't get too confused */
537                         long_align ? "long" : "short", num_full_table_rows );
538
539                 outn( "    {" );
540
541                 /* Generate 0 entries for state #0. */
542                 for ( i = 0; i < num_full_table_rows; ++i )
543                         mk2data( 0 );
544
545                 dataflush();
546                 outn( "    },\n" );
547                 }
548
549         /* Create the first states. */
550
551         num_start_states = lastsc * 2;
552
553         for ( i = 1; i <= num_start_states; ++i )
554                 {
555                 numstates = 1;
556
557                 /* For each start condition, make one state for the case when
558                  * we're at the beginning of the line (the '^' operator) and
559                  * one for the case when we're not.
560                  */
561                 if ( i % 2 == 1 )
562                         nset[numstates] = scset[(i / 2) + 1];
563                 else
564                         nset[numstates] =
565                                 mkbranch( scbol[i / 2], scset[i / 2] );
566
567                 nset = epsclosure( nset, &numstates, accset, &nacc, &hashval );
568
569                 if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) )
570                         {
571                         numas += nacc;
572                         totnst += numstates;
573                         ++todo_next;
574
575                         if ( variable_trailing_context_rules && nacc > 0 )
576                                 check_trailing_context( nset, numstates,
577                                                         accset, nacc );
578                         }
579                 }
580
581         if ( ! fullspd )
582                 {
583                 if ( ! snstods( nset, 0, accset, 0, 0, &end_of_buffer_state ) )
584                         flexfatal(
585                         _( "could not create unique end-of-buffer state" ) );
586
587                 ++numas;
588                 ++num_start_states;
589                 ++todo_next;
590                 }
591
592         while ( todo_head < todo_next )
593                 {
594                 targptr = 0;
595                 totaltrans = 0;
596
597                 for ( i = 1; i <= numecs; ++i )
598                         state[i] = 0;
599
600                 ds = ++todo_head;
601
602                 dset = dss[ds];
603                 dsize = dfasiz[ds];
604
605                 if ( trace )
606                         fprintf( stderr, _( "state # %d:\n" ), ds );
607
608                 sympartition( dset, dsize, symlist, duplist );
609
610                 for ( sym = 1; sym <= numecs; ++sym )
611                         {
612                         if ( symlist[sym] )
613                                 {
614                                 symlist[sym] = 0;
615
616                                 if ( duplist[sym] == NIL )
617                                         {
618                                         /* Symbol has unique out-transitions. */
619                                         numstates = symfollowset( dset, dsize,
620                                                                 sym, nset );
621                                         nset = epsclosure( nset, &numstates,
622                                                 accset, &nacc, &hashval );
623
624                                         if ( snstods( nset, numstates, accset,
625                                                 nacc, hashval, &newds ) )
626                                                 {
627                                                 totnst = totnst + numstates;
628                                                 ++todo_next;
629                                                 numas += nacc;
630
631                                                 if (
632                                         variable_trailing_context_rules &&
633                                                         nacc > 0 )
634                                                         check_trailing_context(
635                                                                 nset, numstates,
636                                                                 accset, nacc );
637                                                 }
638
639                                         state[sym] = newds;
640
641                                         if ( trace )
642                                                 fprintf( stderr, "\t%d\t%d\n",
643                                                         sym, newds );
644
645                                         targfreq[++targptr] = 1;
646                                         targstate[targptr] = newds;
647                                         ++numuniq;
648                                         }
649
650                                 else
651                                         {
652                                         /* sym's equivalence class has the same
653                                          * transitions as duplist(sym)'s
654                                          * equivalence class.
655                                          */
656                                         targ = state[duplist[sym]];
657                                         state[sym] = targ;
658
659                                         if ( trace )
660                                                 fprintf( stderr, "\t%d\t%d\n",
661                                                         sym, targ );
662
663                                         /* Update frequency count for
664                                          * destination state.
665                                          */
666
667                                         i = 0;
668                                         while ( targstate[++i] != targ )
669                                                 ;
670
671                                         ++targfreq[i];
672                                         ++numdup;
673                                         }
674
675                                 ++totaltrans;
676                                 duplist[sym] = NIL;
677                                 }
678                         }
679
680                 if ( caseins && ! useecs )
681                         {
682                         register int j;
683
684                         for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j )
685                                 {
686                                 if ( state[i] == 0 && state[j] != 0 )
687                                         /* We're adding a transition. */
688                                         ++totaltrans;
689
690                                 else if ( state[i] != 0 && state[j] == 0 )
691                                         /* We're taking away a transition. */
692                                         --totaltrans;
693
694                                 state[i] = state[j];
695                                 }
696                         }
697
698                 numsnpairs += totaltrans;
699
700                 if ( ds > num_start_states )
701                         check_for_backing_up( ds, state );
702
703                 if ( nultrans )
704                         {
705                         nultrans[ds] = state[NUL_ec];
706                         state[NUL_ec] = 0;      /* remove transition */
707                         }
708
709                 if ( fulltbl )
710                         {
711                         outn( "    {" );
712
713                         /* Supply array's 0-element. */
714                         if ( ds == end_of_buffer_state )
715                                 mk2data( -end_of_buffer_state );
716                         else
717                                 mk2data( end_of_buffer_state );
718
719                         for ( i = 1; i < num_full_table_rows; ++i )
720                                 /* Jams are marked by negative of state
721                                  * number.
722                                  */
723                                 mk2data( state[i] ? state[i] : -ds );
724
725                         dataflush();
726                         outn( "    },\n" );
727                         }
728
729                 else if ( fullspd )
730                         place_state( state, ds, totaltrans );
731
732                 else if ( ds == end_of_buffer_state )
733                         /* Special case this state to make sure it does what
734                          * it's supposed to, i.e., jam on end-of-buffer.
735                          */
736                         stack1( ds, 0, 0, JAMSTATE );
737
738                 else /* normal, compressed state */
739                         {
740                         /* Determine which destination state is the most
741                          * common, and how many transitions to it there are.
742                          */
743
744                         comfreq = 0;
745                         comstate = 0;
746
747                         for ( i = 1; i <= targptr; ++i )
748                                 if ( targfreq[i] > comfreq )
749                                         {
750                                         comfreq = targfreq[i];
751                                         comstate = targstate[i];
752                                         }
753
754                         bldtbl( state, ds, totaltrans, comstate, comfreq );
755                         }
756                 }
757
758         if ( fulltbl )
759                 dataend();
760
761         else if ( ! fullspd )
762                 {
763                 cmptmps();  /* create compressed template entries */
764
765                 /* Create tables for all the states with only one
766                  * out-transition.
767                  */
768                 while ( onesp > 0 )
769                         {
770                         mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp],
771                         onedef[onesp] );
772                         --onesp;
773                         }
774
775                 mkdeftbl();
776                 }
777
778         flex_free( (void *) accset );
779         flex_free( (void *) nset );
780         }
781
782
783 /* snstods - converts a set of ndfa states into a dfa state
784  *
785  * synopsis
786  *    is_new_state = snstods( int sns[numstates], int numstates,
787  *                              int accset[num_rules+1], int nacc,
788  *                              int hashval, int *newds_addr );
789  *
790  * On return, the dfa state number is in newds.
791  */
792
793 int snstods( sns, numstates, accset, nacc, hashval, newds_addr )
794 int sns[], numstates, accset[], nacc, hashval, *newds_addr;
795         {
796         int didsort = 0;
797         register int i, j;
798         int newds, *oldsns;
799
800         for ( i = 1; i <= lastdfa; ++i )
801                 if ( hashval == dhash[i] )
802                         {
803                         if ( numstates == dfasiz[i] )
804                                 {
805                                 oldsns = dss[i];
806
807                                 if ( ! didsort )
808                                         {
809                                         /* We sort the states in sns so we
810                                          * can compare it to oldsns quickly.
811                                          * We use bubble because there probably
812                                          * aren't very many states.
813                                          */
814                                         bubble( sns, numstates );
815                                         didsort = 1;
816                                         }
817
818                                 for ( j = 1; j <= numstates; ++j )
819                                         if ( sns[j] != oldsns[j] )
820                                                 break;
821
822                                 if ( j > numstates )
823                                         {
824                                         ++dfaeql;
825                                         *newds_addr = i;
826                                         return 0;
827                                         }
828
829                                 ++hshcol;
830                                 }
831
832                         else
833                                 ++hshsave;
834                         }
835
836         /* Make a new dfa. */
837
838         if ( ++lastdfa >= current_max_dfas )
839                 increase_max_dfas();
840
841         newds = lastdfa;
842
843         dss[newds] = allocate_integer_array( numstates + 1 );
844
845         /* If we haven't already sorted the states in sns, we do so now,
846          * so that future comparisons with it can be made quickly.
847          */
848
849         if ( ! didsort )
850                 bubble( sns, numstates );
851
852         for ( i = 1; i <= numstates; ++i )
853                 dss[newds][i] = sns[i];
854
855         dfasiz[newds] = numstates;
856         dhash[newds] = hashval;
857
858         if ( nacc == 0 )
859                 {
860                 if ( reject )
861                         dfaacc[newds].dfaacc_set = (int *) 0;
862                 else
863                         dfaacc[newds].dfaacc_state = 0;
864
865                 accsiz[newds] = 0;
866                 }
867
868         else if ( reject )
869                 {
870                 /* We sort the accepting set in increasing order so the
871                  * disambiguating rule that the first rule listed is considered
872                  * match in the event of ties will work.  We use a bubble
873                  * sort since the list is probably quite small.
874                  */
875
876                 bubble( accset, nacc );
877
878                 dfaacc[newds].dfaacc_set = allocate_integer_array( nacc + 1 );
879
880                 /* Save the accepting set for later */
881                 for ( i = 1; i <= nacc; ++i )
882                         {
883                         dfaacc[newds].dfaacc_set[i] = accset[i];
884
885                         if ( accset[i] <= num_rules )
886                                 /* Who knows, perhaps a REJECT can yield
887                                  * this rule.
888                                  */
889                                 rule_useful[accset[i]] = true;
890                         }
891
892                 accsiz[newds] = nacc;
893                 }
894
895         else
896                 {
897                 /* Find lowest numbered rule so the disambiguating rule
898                  * will work.
899                  */
900                 j = num_rules + 1;
901
902                 for ( i = 1; i <= nacc; ++i )
903                         if ( accset[i] < j )
904                                 j = accset[i];
905
906                 dfaacc[newds].dfaacc_state = j;
907
908                 if ( j <= num_rules )
909                         rule_useful[j] = true;
910                 }
911
912         *newds_addr = newds;
913
914         return 1;
915         }
916
917
918 /* symfollowset - follow the symbol transitions one step
919  *
920  * synopsis
921  *    numstates = symfollowset( int ds[current_max_dfa_size], int dsize,
922  *                              int transsym, int nset[current_max_dfa_size] );
923  */
924
925 int symfollowset( ds, dsize, transsym, nset )
926 int ds[], dsize, transsym, nset[];
927         {
928         int ns, tsp, sym, i, j, lenccl, ch, numstates, ccllist;
929
930         numstates = 0;
931
932         for ( i = 1; i <= dsize; ++i )
933                 { /* for each nfa state ns in the state set of ds */
934                 ns = ds[i];
935                 sym = transchar[ns];
936                 tsp = trans1[ns];
937
938                 if ( sym < 0 )
939                         { /* it's a character class */
940                         sym = -sym;
941                         ccllist = cclmap[sym];
942                         lenccl = ccllen[sym];
943
944                         if ( cclng[sym] )
945                                 {
946                                 for ( j = 0; j < lenccl; ++j )
947                                         {
948                                         /* Loop through negated character
949                                          * class.
950                                          */
951                                         ch = ccltbl[ccllist + j];
952
953                                         if ( ch == 0 )
954                                                 ch = NUL_ec;
955
956                                         if ( ch > transsym )
957                                                 /* Transsym isn't in negated
958                                                  * ccl.
959                                                  */
960                                                 break;
961
962                                         else if ( ch == transsym )
963                                                 /* next 2 */ goto bottom;
964                                         }
965
966                                 /* Didn't find transsym in ccl. */
967                                 nset[++numstates] = tsp;
968                                 }
969
970                         else
971                                 for ( j = 0; j < lenccl; ++j )
972                                         {
973                                         ch = ccltbl[ccllist + j];
974
975                                         if ( ch == 0 )
976                                                 ch = NUL_ec;
977
978                                         if ( ch > transsym )
979                                                 break;
980                                         else if ( ch == transsym )
981                                                 {
982                                                 nset[++numstates] = tsp;
983                                                 break;
984                                                 }
985                                         }
986                         }
987
988                 else if ( sym >= 'A' && sym <= 'Z' && caseins )
989                         flexfatal(
990                         _( "consistency check failed in symfollowset" ) );
991
992                 else if ( sym == SYM_EPSILON )
993                         { /* do nothing */
994                         }
995
996                 else if ( ABS( ecgroup[sym] ) == transsym )
997                         nset[++numstates] = tsp;
998
999                 bottom: ;
1000                 }
1001
1002         return numstates;
1003         }
1004
1005
1006 /* sympartition - partition characters with same out-transitions
1007  *
1008  * synopsis
1009  *    sympartition( int ds[current_max_dfa_size], int numstates,
1010  *                      int symlist[numecs], int duplist[numecs] );
1011  */
1012
1013 void sympartition( ds, numstates, symlist, duplist )
1014 int ds[], numstates;
1015 int symlist[], duplist[];
1016         {
1017         int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich;
1018
1019         /* Partitioning is done by creating equivalence classes for those
1020          * characters which have out-transitions from the given state.  Thus
1021          * we are really creating equivalence classes of equivalence classes.
1022          */
1023
1024         for ( i = 1; i <= numecs; ++i )
1025                 { /* initialize equivalence class list */
1026                 duplist[i] = i - 1;
1027                 dupfwd[i] = i + 1;
1028                 }
1029
1030         duplist[1] = NIL;
1031         dupfwd[numecs] = NIL;
1032
1033         for ( i = 1; i <= numstates; ++i )
1034                 {
1035                 ns = ds[i];
1036                 tch = transchar[ns];
1037
1038                 if ( tch != SYM_EPSILON )
1039                         {
1040                         if ( tch < -lastccl || tch >= csize )
1041                                 {
1042                                 flexfatal(
1043                 _( "bad transition character detected in sympartition()" ) );
1044                                 }
1045
1046                         if ( tch >= 0 )
1047                                 { /* character transition */
1048                                 int ec = ecgroup[tch];
1049
1050                                 mkechar( ec, dupfwd, duplist );
1051                                 symlist[ec] = 1;
1052                                 }
1053
1054                         else
1055                                 { /* character class */
1056                                 tch = -tch;
1057
1058                                 lenccl = ccllen[tch];
1059                                 cclp = cclmap[tch];
1060                                 mkeccl( ccltbl + cclp, lenccl, dupfwd,
1061                                         duplist, numecs, NUL_ec );
1062
1063                                 if ( cclng[tch] )
1064                                         {
1065                                         j = 0;
1066
1067                                         for ( k = 0; k < lenccl; ++k )
1068                                                 {
1069                                                 ich = ccltbl[cclp + k];
1070
1071                                                 if ( ich == 0 )
1072                                                         ich = NUL_ec;
1073
1074                                                 for ( ++j; j < ich; ++j )
1075                                                         symlist[j] = 1;
1076                                                 }
1077
1078                                         for ( ++j; j <= numecs; ++j )
1079                                                 symlist[j] = 1;
1080                                         }
1081
1082                                 else
1083                                         for ( k = 0; k < lenccl; ++k )
1084                                                 {
1085                                                 ich = ccltbl[cclp + k];
1086
1087                                                 if ( ich == 0 )
1088                                                         ich = NUL_ec;
1089
1090                                                 symlist[ich] = 1;
1091                                                 }
1092                                 }
1093                         }
1094                 }
1095         }