6 /********************************
8 ** BYTE NATIVE MODE BENCHMARKS **
11 ** Included in this source **
13 ** Numeric Heapsort **
16 ** Floating point emulation **
17 ** Fourier coefficients **
18 ** Assignment algorithm **
20 ** Huffman compression **
21 ** Back prop. neural net **
22 ** LU Decomposition **
23 ** (linear equations) **
25 ** Rick Grehan, BYTE Magazine **
26 *********************************
29 ** BYTE's Native Mode Benchmarks
30 ** Rick Grehan, BYTE Magazine
33 ** Revision: 3/95;10/95
34 ** 10/95 - Removed allocation that was taking place inside
35 ** the LU Decomposition benchmark. Though it didn't seem to
36 ** make a difference on systems we ran it on, it nonetheless
37 ** removes an operating system dependency that probably should
38 ** not have been there.
41 ** The source, executable, and documentation files that comprise
42 ** the BYTEmark benchmarks are made available on an "as is" basis.
43 ** This means that we at BYTE Magazine have made every reasonable
44 ** effort to verify that the there are no errors in the source and
45 ** executable code. We cannot, however, guarantee that the programs
46 ** are error-free. Consequently, McGraw-HIll and BYTE Magazine make
47 ** no claims in regard to the fitness of the source code, executable
48 ** code, and documentation of the BYTEmark.
49 ** Furthermore, BYTE Magazine, McGraw-Hill, and all employees
50 ** of McGraw-Hill cannot be held responsible for any damages resulting
51 ** from the use of this code or the results obtained from using
68 static int numsort_status=0;
69 static int stringsort_status=0;
72 /*********************
73 ** NUMERIC HEAPSORT **
74 **********************
75 ** This test implements a heapsort algorithm, performed on an
82 ** This routine performs the CPU numeric sort test.
83 ** NOTE: Last version incorrectly stated that the routine
84 ** returned result in # of longword sorted per second.
85 ** Not so; the routine returns # of iterations per sec.
90 SortStruct *numsortstruct; /* Local pointer to global struct */
91 farlong *arraybase; /* Base pointers of array */
92 long accumtime; /* Accumulated time */
93 double iterations; /* Iteration counter */
94 char *errorcontext; /* Error context string pointer */
95 int systemerror; /* For holding error codes */
98 ** Link to global structure
100 numsortstruct=&global_numsortstruct;
103 ** Set the error context string.
105 errorcontext="CPU:Numeric Sort";
108 ** See if we need to do self adjustment code.
110 if(numsortstruct->adjust==0)
113 ** Self-adjustment code. The system begins by sorting 1
114 ** array. If it does that in no time, then two arrays
115 ** are built and sorted. This process continues until
116 ** enough arrays are built to handle the tolerance.
118 numsortstruct->numarrays=1;
122 ** Allocate space for arrays
124 arraybase=(farlong *)AllocateMemory(sizeof(long) *
125 numsortstruct->numarrays * numsortstruct->arraysize,
128 { ReportError(errorcontext,systemerror);
129 FreeMemory((farvoid *)arraybase,
135 ** Do an iteration of the numeric sort. If the
136 ** elapsed time is less than or equal to the permitted
137 ** minimum, then allocate for more arrays and
140 if(DoNumSortIteration(arraybase,
141 numsortstruct->arraysize,
142 numsortstruct->numarrays)>global_min_ticks)
143 break; /* We're ok...exit */
145 FreeMemory((farvoid *)arraybase,&systemerror);
146 if(numsortstruct->numarrays++>NUMNUMARRAYS)
147 { printf("CPU:NSORT -- NUMNUMARRAYS hit.\n");
154 ** Allocate space for arrays
156 arraybase=(farlong *)AllocateMemory(sizeof(long) *
157 numsortstruct->numarrays * numsortstruct->arraysize,
160 { ReportError(errorcontext,systemerror);
161 FreeMemory((farvoid *)arraybase,
168 ** All's well if we get here. Repeatedly perform sorts until the
169 ** accumulated elapsed time is greater than # of seconds requested.
172 iterations=(double)0.0;
175 accumtime+=DoNumSortIteration(arraybase,
176 numsortstruct->arraysize,
177 numsortstruct->numarrays);
178 iterations+=(double)1.0;
179 } while(TicksToSecs(accumtime)<numsortstruct->request_secs);
182 ** Clean up, calculate results, and go home. Be sure to
183 ** show that we don't have to rerun adjustment code.
185 FreeMemory((farvoid *)arraybase,&systemerror);
187 numsortstruct->sortspersec=iterations *
188 (double)numsortstruct->numarrays / TicksToFracSecs(accumtime);
190 if(numsortstruct->adjust==0)
191 numsortstruct->adjust=1;
194 if (numsort_status==0) printf("Numeric sort: OK\n");
200 /***********************
201 ** DoNumSortIteration **
202 ************************
203 ** This routine executes one iteration of the numeric
204 ** sort benchmark. It returns the number of ticks
205 ** elapsed for the iteration.
207 static ulong DoNumSortIteration(farlong *arraybase,
211 ulong elapsed; /* Elapsed ticks */
214 ** Load up the array with random numbers
216 LoadNumArrayWithRand(arraybase,arraysize,numarrays);
219 ** Start the stopwatch
221 elapsed=StartStopwatch();
224 ** Execute a heap of heapsorts
226 for(i=0;i<numarrays;i++)
227 NumHeapSort(arraybase+i*arraysize,0L,arraysize-1L);
232 elapsed=StopStopwatch(elapsed);
235 for(i=0;i<arraysize-1;i++)
237 ** Compare to check for proper
240 if(arraybase[i+1]<arraybase[i])
241 { printf("Sort Error\n");
252 /*************************
253 ** LoadNumArrayWithRand **
254 **************************
255 ** Load up an array with random longs.
257 static void LoadNumArrayWithRand(farlong *array, /* Pointer to arrays */
259 uint numarrays) /* # of elements in array */
261 long i; /* Used for index */
262 farlong *darray; /* Destination array pointer */
264 ** Initialize the random number generator
270 ** Load up first array with randoms
272 for(i=0L;i<arraysize;i++)
273 /* array[i]=randnum(0L); */
274 array[i]=randnum((int32)0);
277 ** Now, if there's more than one array to load, copy the
278 ** first into each of the others.
283 for(i=0L;i<arraysize;i++)
293 ** Pass this routine a pointer to an array of long
294 ** integers. Also pass in minimum and maximum offsets.
295 ** This routine performs a heap sort on that array.
297 static void NumHeapSort(farlong *array,
298 ulong bottom, /* Lower bound */
299 ulong top) /* Upper bound */
301 ulong temp; /* Used to exchange elements */
302 ulong i; /* Loop index */
305 ** First, build a heap in the array
307 for(i=(top/2L); i>0; --i)
308 NumSift(array,i,top);
311 ** Repeatedly extract maximum from heap and place it at the
312 ** end of the array. When we get done, we'll have a sorted
316 { NumSift(array,bottom,i);
317 temp=*array; /* Perform exchange */
327 ** Peforms the sift operation on a numeric array,
328 ** constructing a heap in the array.
330 static void NumSift(farlong *array, /* Array of numbers */
331 ulong i, /* Minimum of array */
332 ulong j) /* Maximum of array */
335 long temp; /* Used for exchange */
341 if(array[k]<array[k+1L])
343 if(array[i]<array[k])
356 /********************
357 ** STRING HEAPSORT **
358 ********************/
363 ** This routine performs the CPU string sort test.
365 ** requested_secs = # of seconds to execute test
366 ** stringspersec = # of strings per second sorted (RETURNED)
368 void DoStringSort(void)
371 SortStruct *strsortstruct; /* Local for sort structure */
372 faruchar *arraybase; /* Base pointer of char array */
373 long accumtime; /* Accumulated time */
374 double iterations; /* # of iterations */
375 char *errorcontext; /* Error context string pointer */
376 int systemerror; /* For holding error code */
379 ** Link to global structure
381 strsortstruct=&global_strsortstruct;
384 ** Set the error context
386 errorcontext="CPU:String Sort";
389 ** See if we have to perform self-adjustment code
391 if(strsortstruct->adjust==0)
394 ** Initialize the number of arrays.
396 strsortstruct->numarrays=1;
400 ** Allocate space for array. We'll add an extra 100
401 ** bytes to protect memory as strings move around
402 ** (this can happen during string adjustment)
404 arraybase=(faruchar *)AllocateMemory((strsortstruct->arraysize+100L) *
405 (long)strsortstruct->numarrays,&systemerror);
407 { ReportError(errorcontext,systemerror);
412 ** Do an iteration of the string sort. If the
413 ** elapsed time is less than or equal to the permitted
414 ** minimum, then de-allocate the array, reallocate a
415 ** an additional array, and try again.
417 if(DoStringSortIteration(arraybase,
418 strsortstruct->numarrays,
419 strsortstruct->arraysize)>global_min_ticks)
420 break; /* We're ok...exit */
422 FreeMemory((farvoid *)arraybase,&systemerror);
423 strsortstruct->numarrays+=1;
429 ** We don't have to perform self adjustment code.
430 ** Simply allocate the space for the array.
432 arraybase=(faruchar *)AllocateMemory((strsortstruct->arraysize+100L) *
433 (long)strsortstruct->numarrays,&systemerror);
435 { ReportError(errorcontext,systemerror);
440 ** All's well if we get here. Repeatedly perform sorts until the
441 ** accumulated elapsed time is greater than # of seconds requested.
444 iterations=(double)0.0;
447 accumtime+=DoStringSortIteration(arraybase,
448 strsortstruct->numarrays,
449 strsortstruct->arraysize);
450 iterations+=(double)strsortstruct->numarrays;
451 } while(TicksToSecs(accumtime)<strsortstruct->request_secs);
454 ** Clean up, calculate results, and go home.
455 ** Set flag to show we don't need to rerun adjustment code.
457 FreeMemory((farvoid *)arraybase,&systemerror);
458 strsortstruct->sortspersec=iterations / (double)TicksToFracSecs(accumtime);
459 if(strsortstruct->adjust==0)
460 strsortstruct->adjust=1;
462 if (stringsort_status==0) printf("String sort: OK\n");
468 /**************************
469 ** DoStringSortIteration **
470 ***************************
471 ** This routine executes one iteration of the string
472 ** sort benchmark. It returns the number of ticks
473 ** Note that this routine also builds the offset pointer
476 static ulong DoStringSortIteration(faruchar *arraybase,
477 uint numarrays,ulong arraysize)
479 farulong *optrarray; /* Offset pointer array */
480 unsigned long elapsed; /* Elapsed ticks */
481 unsigned long nstrings; /* # of strings in array */
482 int syserror; /* System error code */
483 unsigned int i; /* Index */
484 farulong *tempobase; /* Temporary offset pointer base */
485 faruchar *tempsbase; /* Temporary string base pointer */
488 ** Load up the array(s) with random numbers
490 optrarray=LoadStringArray(arraybase,numarrays,&nstrings,arraysize);
493 ** Set temp base pointers...they will be modified as the
494 ** benchmark proceeds.
500 ** Start the stopwatch
502 elapsed=StartStopwatch();
507 for(i=0;i<numarrays;i++)
508 { StrHeapSort(tempobase,tempsbase,nstrings,0L,nstrings-1);
509 tempobase+=nstrings; /* Advance base pointers */
510 tempsbase+=arraysize+100;
514 ** Record elapsed time
516 elapsed=StopStopwatch(elapsed);
521 for(i=0;i<nstrings-1;i++)
523 ** Compare strings to check for proper
526 if(str_is_less(optrarray,arraybase,nstrings,i+1,i))
527 { printf("Sort Error\n");
536 ** Release the offset pointer array built by
539 FreeMemory((farvoid *)optrarray,&syserror);
542 ** Return elapsed ticks.
547 /********************
548 ** LoadStringArray **
549 *********************
550 ** Initialize the string array with random strings of
552 ** Returns the pointer to the offset pointer array.
553 ** Note that since we're creating a number of arrays, this
554 ** routine builds one array, then copies it into the others.
556 static farulong *LoadStringArray(faruchar *strarray, /* String array */
557 uint numarrays, /* # of arrays */
558 ulong *nstrings, /* # of strings */
559 ulong arraysize) /* Size of array */
561 faruchar *tempsbase; /* Temporary string base pointer */
562 farulong *optrarray; /* Local for pointer */
563 farulong *tempobase; /* Temporary offset pointer base pointer */
564 unsigned long curroffset; /* Current offset */
565 int fullflag; /* Indicates full array */
566 unsigned char stringlength; /* Length of string */
567 unsigned char i; /* Index */
568 unsigned long j; /* Another index */
569 unsigned int k; /* Yet another index */
570 unsigned int l; /* Ans still one more index */
571 int systemerror; /* For holding error code */
574 ** Initialize random number generator.
580 ** Start with no strings. Initialize our current offset pointer
590 ** Allocate a string with a random length no
591 ** shorter than 4 bytes and no longer than
592 ** 80 bytes. Note we have to also make sure
593 ** there's room in the array.
595 /* stringlength=(unsigned char)((1+abs_randwc(76L)) & 0xFFL);*/
596 stringlength=(unsigned char)((1+abs_randwc((int32)76)) & 0xFFL);
597 if((unsigned long)stringlength+curroffset+1L>=arraysize)
598 { stringlength=(unsigned char)((arraysize-curroffset-1L) &
600 fullflag=1; /* Indicates a full */
604 ** Store length at curroffset and advance current offset.
606 *(strarray+curroffset)=stringlength;
610 ** Fill up the rest of the string with random bytes.
612 for(i=0;i<stringlength;i++)
613 { *(strarray+curroffset)=
614 /* (unsigned char)(abs_randwc((long)0xFE)); */
615 (unsigned char)(abs_randwc((int32)0xFE));
620 ** Increment the # of strings counter.
624 } while(fullflag==0);
627 ** We now have initialized a single full array. If there
628 ** is more than one array, copy the original into the
634 { tempsbase+=arraysize+100; /* Set base */
635 for(l=0;l<arraysize;l++)
636 tempsbase[l]=strarray[l];
641 ** Now the array is full, allocate enough space for an
642 ** offset pointer array.
644 optrarray=(farulong *)AllocateMemory(*nstrings * sizeof(unsigned long) *
648 { ReportError("CPU:Stringsort",systemerror);
649 FreeMemory((void *)strarray,&systemerror);
654 ** Go through the newly-built string array, building
655 ** offsets and putting them into the offset pointer
659 for(j=0;j<*nstrings;j++)
660 { *(optrarray+j)=curroffset;
661 curroffset+=(unsigned long)(*(strarray+curroffset))+1L;
665 ** As above, we've made one copy of the offset pointers,
666 ** so duplicate this array in the remaining ones.
671 { tempobase+=*nstrings;
672 for(l=0;l<*nstrings;l++)
673 tempobase[l]=optrarray[l];
678 ** All done...go home. Pass local pointer back.
686 ** Used by the string heap sort. Call this routine to adjust the
687 ** string at offset i to length l. The members of the string array
688 ** are moved accordingly and the length of the string at offset i
691 static void stradjust(farulong *optrarray, /* Offset pointer array */
692 faruchar *strarray, /* String array */
693 ulong nstrings, /* # of strings */
694 ulong i, /* Offset to adjust */
695 uchar l) /* New length */
697 unsigned long nbytes; /* # of bytes to move */
698 unsigned long j; /* Index */
699 int direction; /* Direction indicator */
700 unsigned char adjamount; /* Adjustment amount */
703 ** If new length is less than old length, the direction is
704 ** down. If new length is greater than old length, the
707 direction=(int)l - (int)*(strarray+*(optrarray+i));
708 adjamount=(unsigned char)abs(direction);
711 ** See if the adjustment is being made to the last
712 ** string in the string array. If so, we don't have to
713 ** do anything more than adjust the length field.
716 { *(strarray+*(optrarray+i))=l;
721 ** Calculate the total # of bytes in string array from
722 ** location i+1 to end of array. Whether we're moving "up" or
723 ** down, this is how many bytes we'll have to move.
725 nbytes=*(optrarray+nstrings-1L) +
726 (unsigned long)*(strarray+*(optrarray+nstrings-1L)) + 1L -
730 ** Calculate the source and the destination. Source is
731 ** string position i+1. Destination is string position i+l
732 ** (i+"ell"...don't confuse 1 and l).
733 ** Hand this straight to memmove and let it handle the
734 ** "overlap" problem.
736 MoveMemory((farvoid *)(strarray+*(optrarray+i)+l+1),
737 (farvoid *)(strarray+*(optrarray+i+1)),
738 (unsigned long)nbytes);
741 ** We have to adjust the offset pointer array.
742 ** This covers string i+1 to numstrings-1.
744 for(j=i+1;j<nstrings;j++)
746 *(optrarray+j)=*(optrarray+j)-adjamount;
748 *(optrarray+j)=*(optrarray+j)+adjamount;
751 ** Store the new length and go home.
753 *(strarray+*(optrarray+i))=l;
760 ** Pass this routine a pointer to an array of unsigned char.
761 ** The array is presumed to hold strings occupying at most
762 ** 80 bytes (counts a byte count).
763 ** This routine also needs a pointer to an array of offsets
764 ** which represent string locations in the array, and
765 ** an unsigned long indicating the number of strings
768 static void StrHeapSort(farulong *optrarray, /* Offset pointers */
769 faruchar *strarray, /* Strings array */
770 ulong numstrings, /* # of strings in array */
771 ulong bottom, /* Region to sort...bottom */
772 ulong top) /* Region to sort...top */
774 unsigned char temp[80]; /* Used to exchange elements */
775 unsigned char tlen; /* Temp to hold length */
776 unsigned long i; /* Loop index */
780 ** Build a heap in the array
782 for(i=(top/2L); i>0; --i)
783 strsift(optrarray,strarray,numstrings,i,top);
786 ** Repeatedly extract maximum from heap and place it at the
787 ** end of the array. When we get done, we'll have a sorted
792 strsift(optrarray,strarray,numstrings,0,i);
794 /* temp = string[0] */
796 MoveMemory((farvoid *)&temp[0], /* Perform exchange */
798 (unsigned long)(tlen+1));
801 /* string[0]=string[i] */
802 tlen=*(strarray+*(optrarray+i));
803 stradjust(optrarray,strarray,numstrings,0,tlen);
804 MoveMemory((farvoid *)strarray,
805 (farvoid *)(strarray+*(optrarray+i)),
806 (unsigned long)(tlen+1));
810 stradjust(optrarray,strarray,numstrings,i,tlen);
811 MoveMemory((farvoid *)(strarray+*(optrarray+i)),
813 (unsigned long)(tlen+1));
822 ** Pass this function:
823 ** 1) A pointer to an array of offset pointers
824 ** 2) A pointer to a string array
825 ** 3) The number of elements in the string array
826 ** 4) Offsets to two strings (a & b)
827 ** This function returns TRUE if string a is < string b.
829 static int str_is_less(farulong *optrarray, /* Offset pointers */
830 faruchar *strarray, /* String array */
831 ulong numstrings, /* # of strings */
832 ulong a, ulong b) /* Offsets */
834 int slen; /* String length */
837 ** Determine which string has the minimum length. Use that
838 ** to call strncmp(). If they match up to that point, the
839 ** string with the longer length wins.
841 slen=(int)*(strarray+*(optrarray+a));
842 if(slen > (int)*(strarray+*(optrarray+b)))
843 slen=(int)*(strarray+*(optrarray+b));
845 slen=strncmp((char *)(strarray+*(optrarray+a)),
846 (char *)(strarray+*(optrarray+b)),slen);
851 ** They match. Return true if the length of a
852 ** is greater than the length of b.
854 if(*(strarray+*(optrarray+a)) >
855 *(strarray+*(optrarray+b)))
860 if(slen<0) return(TRUE); /* a is strictly less than b */
862 return(FALSE); /* Only other possibility */
868 ** Pass this function:
869 ** 1) A pointer to an array of offset pointers
870 ** 2) A pointer to a string array
871 ** 3) The number of elements in the string array
872 ** 4) Offset within which to sort.
873 ** Sift the array within the bounds of those offsets (thus
876 static void strsift(farulong *optrarray, /* Offset pointers */
877 faruchar *strarray, /* String array */
878 ulong numstrings, /* # of strings */
879 ulong i, ulong j) /* Offsets */
881 unsigned long k; /* Temporaries */
882 unsigned char temp[80];
883 unsigned char tlen; /* For string lengths */
890 if(str_is_less(optrarray,strarray,numstrings,k,k+1L))
892 if(str_is_less(optrarray,strarray,numstrings,i,k))
895 tlen=*(strarray+*(optrarray+k));
896 MoveMemory((farvoid *)&temp[0],
897 (farvoid *)(strarray+*(optrarray+k)),
898 (unsigned long)(tlen+1));
900 /* string[k]=string[i] */
901 tlen=*(strarray+*(optrarray+i));
902 stradjust(optrarray,strarray,numstrings,k,tlen);
903 MoveMemory((farvoid *)(strarray+*(optrarray+k)),
904 (farvoid *)(strarray+*(optrarray+i)),
905 (unsigned long)(tlen+1));
909 stradjust(optrarray,strarray,numstrings,i,tlen);
910 MoveMemory((farvoid *)(strarray+*(optrarray+i)),
912 (unsigned long)(tlen+1));
921 /************************
922 ** BITFIELD OPERATIONS **
923 *************************/
928 ** Perform the bit operations test portion of the CPU
929 ** benchmark. Returns the iterations per second.
933 BitOpStruct *locbitopstruct; /* Local bitop structure */
934 farulong *bitarraybase; /* Base of bitmap array */
935 farulong *bitoparraybase; /* Base of bitmap operations array */
936 ulong nbitops; /* # of bitfield operations */
937 ulong accumtime; /* Accumulated time in ticks */
938 double iterations; /* # of iterations */
939 char *errorcontext; /* Error context string */
940 int systemerror; /* For holding error codes */
944 ** Link to global structure.
946 locbitopstruct=&global_bitopstruct;
949 ** Set the error context.
951 errorcontext="CPU:Bitfields";
954 ** See if we need to run adjustment code.
956 if(locbitopstruct->adjust==0)
958 bitarraybase=(farulong *)AllocateMemory(locbitopstruct->bitfieldarraysize *
959 sizeof(ulong),&systemerror);
961 { ReportError(errorcontext,systemerror);
966 ** Initialize bitfield operations array to [2,30] elements
968 locbitopstruct->bitoparraysize=30L;
973 ** Allocate space for operations array
975 bitoparraybase=(farulong *)AllocateMemory(locbitopstruct->bitoparraysize*2L*
979 { ReportError(errorcontext,systemerror);
980 FreeMemory((farvoid *)bitarraybase,&systemerror);
984 ** Do an iteration of the bitmap test. If the
985 ** elapsed time is less than or equal to the permitted
986 ** minimum, then de-allocate the array, reallocate a
987 ** larger version, and try again.
989 ticks=DoBitfieldIteration(bitarraybase,
991 locbitopstruct->bitoparraysize,
995 if (locbitopstruct->bitoparraysize==30L){
996 /* this is the first loop, write a debug file */
998 unsigned long *running_base; /* same as farulong */
1000 file=fopen("debugbit.dat","w");
1001 running_base=bitarraybase;
1002 for (counter=0;counter<(long)(locbitopstruct->bitfieldarraysize);counter++){
1004 fprintf(file,"%08X",(unsigned int)(*running_base&0xFFFFFFFFL));
1005 fprintf(file,"%08X",(unsigned int)((*running_base>>32)&0xFFFFFFFFL));
1006 if ((counter+1)%4==0) fprintf(file,"\n");
1008 fprintf(file,"%08lX",*running_base);
1009 if ((counter+1)%8==0) fprintf(file,"\n");
1011 running_base=running_base+1;
1014 printf("\nWrote the file debugbit.dat, you may want to compare it to debugbit.good\n");
1019 if (ticks>global_min_ticks) break; /* We're ok...exit */
1021 FreeMemory((farvoid *)bitoparraybase,&systemerror);
1022 locbitopstruct->bitoparraysize+=100L;
1028 ** Don't need to do self adjustment, just allocate
1031 bitarraybase=(farulong *)AllocateMemory(locbitopstruct->bitfieldarraysize *
1032 sizeof(ulong),&systemerror);
1034 { ReportError(errorcontext,systemerror);
1037 bitoparraybase=(farulong *)AllocateMemory(locbitopstruct->bitoparraysize*2L*
1041 { ReportError(errorcontext,systemerror);
1042 FreeMemory((farvoid *)bitarraybase,&systemerror);
1048 ** All's well if we get here. Repeatedly perform bitops until the
1049 ** accumulated elapsed time is greater than # of seconds requested.
1052 iterations=(double)0.0;
1054 accumtime+=DoBitfieldIteration(bitarraybase,
1056 locbitopstruct->bitoparraysize,&nbitops);
1057 iterations+=(double)nbitops;
1058 } while(TicksToSecs(accumtime)<locbitopstruct->request_secs);
1061 ** Clean up, calculate results, and go home.
1062 ** Also, set adjustment flag to show that we don't have
1063 ** to do self adjusting in the future.
1065 FreeMemory((farvoid *)bitarraybase,&systemerror);
1066 FreeMemory((farvoid *)bitoparraybase,&systemerror);
1067 locbitopstruct->bitopspersec=iterations /TicksToFracSecs(accumtime);
1068 if(locbitopstruct->adjust==0)
1069 locbitopstruct->adjust=1;
1074 /************************
1075 ** DoBitfieldIteration **
1076 *************************
1077 ** Perform a single iteration of the bitfield benchmark.
1078 ** Return the # of ticks accumulated by the operation.
1080 static ulong DoBitfieldIteration(farulong *bitarraybase,
1081 farulong *bitoparraybase,
1082 long bitoparraysize,
1086 ulong bitoffset; /* Offset into bitmap */
1087 ulong elapsed; /* Time to execute */
1089 ** Clear # bitops counter
1094 ** Construct a set of bitmap offsets and run lengths.
1095 ** The offset can be any random number from 0 to the
1096 ** size of the bitmap (in bits). The run length can
1097 ** be any random number from 1 to the number of bits
1098 ** between the offset and the end of the bitmap.
1099 ** Note that the bitmap has 8192 * 32 bits in it.
1103 ** Reset random number generator so things repeat.
1104 ** Also reset the bit array we work on.
1105 ** added by Uwe F. Mayer
1108 for (i=0;i<global_bitopstruct.bitfieldarraysize;i++)
1111 *(bitarraybase+i)=(ulong)0x5555555555555555;
1113 *(bitarraybase+i)=(ulong)0x55555555;
1117 /* end of addition of code */
1119 for (i=0;i<bitoparraysize;i++)
1121 /* First item is offset */
1122 /* *(bitoparraybase+i+i)=bitoffset=abs_randwc(262140L); */
1123 *(bitoparraybase+i+i)=bitoffset=abs_randwc((int32)262140);
1125 /* Next item is run length */
1126 /* *nbitops+=*(bitoparraybase+i+i+1L)=abs_randwc(262140L-bitoffset);*/
1127 *nbitops+=*(bitoparraybase+i+i+1L)=abs_randwc((int32)262140-bitoffset);
1131 ** Array of offset and lengths built...do an iteration of
1133 ** Start the stopwatch.
1135 elapsed=StartStopwatch();
1138 ** Loop through array off offset/run length pairs.
1139 ** Execute operation based on modulus of index.
1141 for(i=0;i<bitoparraysize;i++)
1146 case 0: /* Set run of bits */
1147 ToggleBitRun(bitarraybase,
1148 *(bitoparraybase+i+i),
1149 *(bitoparraybase+i+i+1),
1153 case 1: /* Clear run of bits */
1154 ToggleBitRun(bitarraybase,
1155 *(bitoparraybase+i+i),
1156 *(bitoparraybase+i+i+1),
1160 case 2: /* Complement run of bits */
1161 FlipBitRun(bitarraybase,
1162 *(bitoparraybase+i+i),
1163 *(bitoparraybase+i+i+1));
1169 ** Return elapsed time
1171 return(StopStopwatch(elapsed));
1175 /*****************************
1177 ******************************
1178 ** Set or clear a run of nbits starting at
1179 ** bit_addr in bitmap.
1181 static void ToggleBitRun(farulong *bitmap, /* Bitmap */
1182 ulong bit_addr, /* Address of bits to set */
1183 ulong nbits, /* # of bits to set/clr */
1184 uint val) /* 1 or 0 */
1186 unsigned long bindex; /* Index into array */
1187 unsigned long bitnumb; /* Bit number */
1192 bindex=bit_addr>>6; /* Index is number /64 */
1193 bitnumb=bit_addr % 64; /* Bit number in word */
1195 bindex=bit_addr>>5; /* Index is number /32 */
1196 bitnumb=bit_addr % 32; /* bit number in word */
1199 bitmap[bindex]|=(1L<<bitnumb);
1201 bitmap[bindex]&=~(1L<<bitnumb);
1210 ** Complements a run of bits.
1212 static void FlipBitRun(farulong *bitmap, /* Bit map */
1213 ulong bit_addr, /* Bit address */
1214 ulong nbits) /* # of bits to flip */
1216 unsigned long bindex; /* Index into array */
1217 unsigned long bitnumb; /* Bit number */
1222 bindex=bit_addr>>6; /* Index is number /64 */
1223 bitnumb=bit_addr % 64; /* Bit number in longword */
1225 bindex=bit_addr>>5; /* Index is number /32 */
1226 bitnumb=bit_addr % 32; /* Bit number in longword */
1228 bitmap[bindex]^=(1L<<bitnumb);
1235 /*****************************
1236 ** FLOATING-POINT EMULATION **
1237 *****************************/
1242 ** Perform the floating-point emulation routines portion of the
1243 ** CPU benchmark. Returns the operations per second.
1245 void DoEmFloat(void)
1247 EmFloatStruct *locemfloatstruct; /* Local structure */
1248 InternalFPF *abase; /* Base of A array */
1249 InternalFPF *bbase; /* Base of B array */
1250 InternalFPF *cbase; /* Base of C array */
1251 ulong accumtime; /* Accumulated time in ticks */
1252 double iterations; /* # of iterations */
1253 ulong tickcount; /* # of ticks */
1254 char *errorcontext; /* Error context string pointer */
1255 int systemerror; /* For holding error code */
1256 ulong loops; /* # of loops */
1259 ** Link to global structure
1261 locemfloatstruct=&global_emfloatstruct;
1264 ** Set the error context
1266 errorcontext="CPU:Floating Emulation";
1270 ** Test the emulation routines.
1275 abase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF),
1278 { ReportError(errorcontext,systemerror);
1282 bbase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF),
1285 { ReportError(errorcontext,systemerror);
1286 FreeMemory((farvoid *)abase,&systemerror);
1290 cbase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF),
1293 { ReportError(errorcontext,systemerror);
1294 FreeMemory((farvoid *)abase,&systemerror);
1295 FreeMemory((farvoid *)bbase,&systemerror);
1300 ** Set up the arrays
1302 SetupCPUEmFloatArrays(abase,bbase,cbase,locemfloatstruct->arraysize);
1305 ** See if we need to do self-adjusting code.
1307 if(locemfloatstruct->adjust==0)
1309 locemfloatstruct->loops=0;
1312 ** Do an iteration of the tests. If the elapsed time is
1313 ** less than minimum, increase the loop count and try
1316 for(loops=1;loops<CPUEMFLOATLOOPMAX;loops+=loops)
1317 { tickcount=DoEmFloatIteration(abase,bbase,cbase,
1318 locemfloatstruct->arraysize,
1320 if(tickcount>global_min_ticks)
1321 { locemfloatstruct->loops=loops;
1328 ** Verify that selft adjustment code worked.
1330 if(locemfloatstruct->loops==0)
1331 { printf("CPU:EMFPU -- CMPUEMFLOATLOOPMAX limit hit\n");
1332 FreeMemory((farvoid *)abase,&systemerror);
1333 FreeMemory((farvoid *)bbase,&systemerror);
1334 FreeMemory((farvoid *)cbase,&systemerror);
1339 ** All's well if we get here. Repeatedly perform floating
1340 ** tests until the accumulated time is greater than the
1341 ** # of seconds requested.
1342 ** Each iteration performs arraysize * 3 operations.
1345 iterations=(double)0.0;
1347 accumtime+=DoEmFloatIteration(abase,bbase,cbase,
1348 locemfloatstruct->arraysize,
1349 locemfloatstruct->loops);
1350 iterations+=(double)1.0;
1351 } while(TicksToSecs(accumtime)<locemfloatstruct->request_secs);
1355 ** Clean up, calculate results, and go home.
1356 ** Also, indicate that adjustment is done.
1358 FreeMemory((farvoid *)abase,&systemerror);
1359 FreeMemory((farvoid *)bbase,&systemerror);
1360 FreeMemory((farvoid *)cbase,&systemerror);
1362 locemfloatstruct->emflops=(iterations*(double)locemfloatstruct->loops)/
1363 (double)TicksToFracSecs(accumtime);
1364 if(locemfloatstruct->adjust==0)
1365 locemfloatstruct->adjust=1;
1368 printf("----------------------------------------------------------------------------\n");
1373 /*************************
1374 ** FOURIER COEFFICIENTS **
1375 *************************/
1380 ** Perform the transcendental/trigonometric portion of the
1381 ** benchmark. This benchmark calculates the first n
1382 ** fourier coefficients of the function (x+1)^x defined
1383 ** on the interval 0,2.
1385 void DoFourier(void)
1387 FourierStruct *locfourierstruct; /* Local fourier struct */
1388 fardouble *abase; /* Base of A[] coefficients array */
1389 fardouble *bbase; /* Base of B[] coefficients array */
1390 unsigned long accumtime; /* Accumulated time in ticks */
1391 double iterations; /* # of iterations */
1392 char *errorcontext; /* Error context string pointer */
1393 int systemerror; /* For error code */
1396 ** Link to global structure
1398 locfourierstruct=&global_fourierstruct;
1401 ** Set error context string
1403 errorcontext="FPU:Transcendental";
1406 ** See if we need to do self-adjustment code.
1408 if(locfourierstruct->adjust==0)
1410 locfourierstruct->arraysize=100L; /* Start at 100 elements */
1414 abase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
1417 { ReportError(errorcontext,systemerror);
1421 bbase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
1424 { ReportError(errorcontext,systemerror);
1425 FreeMemory((void *)abase,&systemerror);
1429 ** Do an iteration of the tests. If the elapsed time is
1430 ** less than or equal to the permitted minimum, re-allocate
1431 ** larger arrays and try again.
1433 if(DoFPUTransIteration(abase,bbase,
1434 locfourierstruct->arraysize)>global_min_ticks)
1435 break; /* We're ok...exit */
1438 ** Make bigger arrays and try again.
1440 FreeMemory((farvoid *)abase,&systemerror);
1441 FreeMemory((farvoid *)bbase,&systemerror);
1442 locfourierstruct->arraysize+=50L;
1447 ** Don't need self-adjustment. Just allocate the
1450 abase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
1453 { ReportError(errorcontext,systemerror);
1457 bbase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
1460 { ReportError(errorcontext,systemerror);
1461 FreeMemory((void *)abase,&systemerror);
1466 ** All's well if we get here. Repeatedly perform integration
1467 ** tests until the accumulated time is greater than the
1468 ** # of seconds requested.
1471 iterations=(double)0.0;
1473 accumtime+=DoFPUTransIteration(abase,bbase,locfourierstruct->arraysize);
1474 iterations+=(double)locfourierstruct->arraysize*(double)2.0-(double)1.0;
1475 } while(TicksToSecs(accumtime)<locfourierstruct->request_secs);
1479 ** Clean up, calculate results, and go home.
1480 ** Also set adjustment flag to indicate no adjust code needed.
1482 FreeMemory((farvoid *)abase,&systemerror);
1483 FreeMemory((farvoid *)bbase,&systemerror);
1485 locfourierstruct->fflops=iterations/(double)TicksToFracSecs(accumtime);
1487 if(locfourierstruct->adjust==0)
1488 locfourierstruct->adjust=1;
1493 /************************
1494 ** DoFPUTransIteration **
1495 *************************
1496 ** Perform an iteration of the FPU Transcendental/trigonometric
1497 ** benchmark. Here, an iteration consists of calculating the
1498 ** first n fourier coefficients of the function (x+1)^x on
1499 ** the interval 0,2. n is given by arraysize.
1500 ** NOTE: The # of integration steps is fixed at
1503 static ulong DoFPUTransIteration(fardouble *abase, /* A coeffs. */
1504 fardouble *bbase, /* B coeffs. */
1505 ulong arraysize) /* # of coeffs */
1507 double omega; /* Fundamental frequency */
1508 unsigned long i; /* Index */
1509 unsigned long elapsed; /* Elapsed time */
1512 ** Start the stopwatch
1514 elapsed=StartStopwatch();
1517 ** Calculate the fourier series. Begin by
1518 ** calculating A[0].
1521 *abase=TrapezoidIntegrate((double)0.0,
1524 (double)0.0, /* No omega * n needed */
1528 ** Calculate the fundamental frequency.
1529 ** ( 2 * pi ) / period...and since the period
1530 ** is 2, omega is simply pi.
1532 omega=(double)3.1415926535897932;
1534 for(i=1;i<arraysize;i++)
1538 ** Calculate A[i] terms. Note, once again, that we
1539 ** can ignore the 2/period term outside the integral
1540 ** since the period is 2 and the term cancels itself
1543 *(abase+i)=TrapezoidIntegrate((double)0.0,
1550 ** Calculate the B[i] terms.
1552 *(bbase+i)=TrapezoidIntegrate((double)0.0,
1562 printf("\nA[i]=\n");
1563 for (i=0;i<arraysize;i++) printf("%7.3g ",abase[i]);
1564 printf("\nB[i]=\n(undefined) ");
1565 for (i=1;i<arraysize;i++) printf("%7.3g ",bbase[i]);
1569 ** All done, stop the stopwatch
1571 return(StopStopwatch(elapsed));
1574 /***********************
1575 ** TrapezoidIntegrate **
1576 ************************
1577 ** Perform a simple trapezoid integration on the
1578 ** function (x+1)**x.
1579 ** x0,x1 set the lower and upper bounds of the
1581 ** nsteps indicates # of trapezoidal sections
1582 ** omegan is the fundamental frequency times
1583 ** the series member #
1584 ** select = 0 for the A[0] term, 1 for cosine terms, and
1585 ** 2 for sine terms.
1586 ** Returns the value.
1588 static double TrapezoidIntegrate( double x0, /* Lower bound */
1589 double x1, /* Upper bound */
1590 int nsteps, /* # of steps */
1591 double omegan, /* omega * n */
1594 double x; /* Independent variable */
1595 double dx; /* Stepsize */
1596 double rvalue; /* Return value */
1600 ** Initialize independent variable
1605 ** Calculate stepsize
1607 dx=(x1 - x0) / (double)nsteps;
1610 ** Initialize the return value.
1612 rvalue=thefunction(x0,omegan,select)/(double)2.0;
1615 ** Compute the other terms of the integral.
1618 { --nsteps; /* Already done 1 step */
1622 rvalue+=thefunction(x,omegan,select);
1626 ** Finish computation
1628 rvalue=(rvalue+thefunction(x1,omegan,select)/(double)2.0)*dx;
1636 ** This routine selects the function to be used
1637 ** in the Trapezoid integration.
1638 ** x is the independent variable
1639 ** omegan is omega * n
1640 ** select chooses which of the sine/cosine functions
1641 ** are used. note the special case for select=0.
1643 static double thefunction(double x, /* Independent variable */
1644 double omegan, /* Omega * term */
1645 int select) /* Choose term */
1649 ** Use select to pick which function we call.
1653 case 0: return(pow(x+(double)1.0,x));
1655 case 1: return(pow(x+(double)1.0,x) * cos(omegan * x));
1657 case 2: return(pow(x+(double)1.0,x) * sin(omegan * x));
1661 ** We should never reach this point, but the following
1662 ** keeps compilers from issuing a warning message.
1667 /*************************
1668 ** ASSIGNMENT ALGORITHM **
1669 *************************/
1674 ** Perform an assignment algorithm.
1675 ** The algorithm was adapted from the step by step guide found
1676 ** in "Quantitative Decision Making for Business" (Gordon,
1677 ** Pressman, and Cohn; Prentice-Hall)
1681 ** 1. Even though the algorithm distinguishes between
1682 ** ASSIGNROWS and ASSIGNCOLS, as though the two might
1683 ** be different, it does presume a square matrix.
1684 ** I.E., ASSIGNROWS and ASSIGNCOLS must be the same.
1685 ** This makes for some algorithmically-correct but
1686 ** probably non-optimal constructs.
1691 AssignStruct *locassignstruct; /* Local structure ptr */
1699 ** Link to global structure
1701 locassignstruct=&global_assignstruct;
1704 ** Set the error context string.
1706 errorcontext="CPU:Assignment";
1709 ** See if we need to do self adjustment code.
1711 if(locassignstruct->adjust==0)
1714 ** Self-adjustment code. The system begins by working on 1
1715 ** array. If it does that in no time, then two arrays
1716 ** are built. This process continues until
1717 ** enough arrays are built to handle the tolerance.
1719 locassignstruct->numarrays=1;
1723 ** Allocate space for arrays
1725 arraybase=(farlong *) AllocateMemory(sizeof(long)*
1726 ASSIGNROWS*ASSIGNCOLS*locassignstruct->numarrays,
1729 { ReportError(errorcontext,systemerror);
1730 FreeMemory((farvoid *)arraybase,
1736 ** Do an iteration of the assignment alg. If the
1737 ** elapsed time is less than or equal to the permitted
1738 ** minimum, then allocate for more arrays and
1741 if(DoAssignIteration(arraybase,
1742 locassignstruct->numarrays)>global_min_ticks)
1743 break; /* We're ok...exit */
1745 FreeMemory((farvoid *)arraybase, &systemerror);
1746 locassignstruct->numarrays++;
1751 ** Allocate space for arrays
1753 arraybase=(farlong *)AllocateMemory(sizeof(long)*
1754 ASSIGNROWS*ASSIGNCOLS*locassignstruct->numarrays,
1757 { ReportError(errorcontext,systemerror);
1758 FreeMemory((farvoid *)arraybase,
1765 ** All's well if we get here. Do the tests.
1768 iterations=(double)0.0;
1771 accumtime+=DoAssignIteration(arraybase,
1772 locassignstruct->numarrays);
1773 iterations+=(double)1.0;
1774 } while(TicksToSecs(accumtime)<locassignstruct->request_secs);
1777 ** Clean up, calculate results, and go home. Be sure to
1778 ** show that we don't have to rerun adjustment code.
1780 FreeMemory((farvoid *)arraybase,&systemerror);
1782 locassignstruct->iterspersec=iterations *
1783 (double)locassignstruct->numarrays / TicksToFracSecs(accumtime);
1785 if(locassignstruct->adjust==0)
1786 locassignstruct->adjust=1;
1792 /**********************
1793 ** DoAssignIteration **
1794 ***********************
1795 ** This routine executes one iteration of the assignment test.
1796 ** It returns the number of ticks elapsed in the iteration.
1798 static ulong DoAssignIteration(farlong *arraybase,
1801 longptr abase; /* local pointer */
1802 ulong elapsed; /* Elapsed ticks */
1806 ** Set up local pointer
1808 abase.ptrs.p=arraybase;
1811 ** Load up the arrays with a random table.
1813 LoadAssignArrayWithRand(arraybase,numarrays);
1816 ** Start the stopwatch
1818 elapsed=StartStopwatch();
1821 ** Execute assignment algorithms
1823 for(i=0;i<numarrays;i++)
1824 { /* abase.ptrs.p+=i*ASSIGNROWS*ASSIGNCOLS; */
1825 /* Fixed by Eike Dierks */
1826 Assignment(*abase.ptrs.ap);
1827 abase.ptrs.p+=ASSIGNROWS*ASSIGNCOLS;
1833 return(StopStopwatch(elapsed));
1836 /****************************
1837 ** LoadAssignArrayWithRand **
1838 *****************************
1839 ** Load the assignment arrays with random numbers. All positive.
1840 ** These numbers represent costs.
1842 static void LoadAssignArrayWithRand(farlong *arraybase,
1845 longptr abase,abase1; /* Local for array pointer */
1849 ** Set local array pointer
1851 abase.ptrs.p=arraybase;
1852 abase1.ptrs.p=arraybase;
1855 ** Set up the first array. Then just copy it into the
1858 LoadAssign(*(abase.ptrs.ap));
1860 for(i=1;i<numarrays;i++)
1861 { /* abase1.ptrs.p+=i*ASSIGNROWS*ASSIGNCOLS; */
1862 /* Fixed by Eike Dierks */
1863 abase1.ptrs.p+=ASSIGNROWS*ASSIGNCOLS;
1864 CopyToAssign(*(abase.ptrs.ap),*(abase1.ptrs.ap));
1873 ** The array given by arraybase is loaded with positive random
1874 ** numbers. Elements in the array are capped at 5,000,000.
1876 static void LoadAssign(farlong arraybase[][ASSIGNCOLS])
1881 ** Reset random number generator so things repeat.
1886 for(i=0;i<ASSIGNROWS;i++)
1887 for(j=0;j<ASSIGNROWS;j++){
1888 /* arraybase[i][j]=abs_randwc(5000000L);*/
1889 arraybase[i][j]=abs_randwc((int32)5000000);
1898 ** Copy the contents of one array to another. This is called by
1899 ** the routine that builds the initial array, and is used to copy
1900 ** the contents of the intial array into all following arrays.
1902 static void CopyToAssign(farlong arrayfrom[ASSIGNROWS][ASSIGNCOLS],
1903 farlong arrayto[ASSIGNROWS][ASSIGNCOLS])
1907 for(i=0;i<ASSIGNROWS;i++)
1908 for(j=0;j<ASSIGNCOLS;j++)
1909 arrayto[i][j]=arrayfrom[i][j];
1917 static void Assignment(farlong arraybase[][ASSIGNCOLS])
1919 short assignedtableau[ASSIGNROWS][ASSIGNCOLS];
1922 ** First, calculate minimum costs
1924 calc_minimum_costs(arraybase);
1927 ** Repeat following until the number of rows selected
1928 ** equals the number of rows in the tableau.
1930 while(first_assignments(arraybase,assignedtableau)!=ASSIGNROWS)
1931 { second_assignments(arraybase,assignedtableau);
1937 printf("\nColumn choices for each row\n");
1938 for(i=0;i<ASSIGNROWS;i++)
1940 printf("R%03d: ",i);
1941 for(j=0;j<ASSIGNCOLS;j++)
1942 if(assignedtableau[i][j]==1)
1951 /***********************
1952 ** calc_minimum_costs **
1953 ************************
1954 ** Revise the tableau by calculating the minimum costs on a
1955 ** row and column basis. These minima are subtracted from
1956 ** their rows and columns, creating a new tableau.
1958 static void calc_minimum_costs(long tableau[][ASSIGNCOLS])
1960 ushort i,j; /* Index variables */
1961 long currentmin; /* Current minimum */
1963 ** Determine minimum costs on row basis. This is done by
1964 ** subtracting -- on a row-per-row basis -- the minum value
1967 for(i=0;i<ASSIGNROWS;i++)
1969 currentmin=MAXPOSLONG; /* Initialize minimum */
1970 for(j=0;j<ASSIGNCOLS;j++)
1971 if(tableau[i][j]<currentmin)
1972 currentmin=tableau[i][j];
1974 for(j=0;j<ASSIGNCOLS;j++)
1975 tableau[i][j]-=currentmin;
1979 ** Determine minimum cost on a column basis. This works
1980 ** just as above, only now we step through the array
1983 for(j=0;j<ASSIGNCOLS;j++)
1985 currentmin=MAXPOSLONG; /* Initialize minimum */
1986 for(i=0;i<ASSIGNROWS;i++)
1987 if(tableau[i][j]<currentmin)
1988 currentmin=tableau[i][j];
1991 ** Here, we'll take the trouble to see if the current
1992 ** minimum is zero. This is likely worth it, since the
1993 ** preceding loop will have created at least one zero in
1994 ** each row. We can save ourselves a few iterations.
1997 for(i=0;i<ASSIGNROWS;i++)
1998 tableau[i][j]-=currentmin;
2004 /**********************
2005 ** first_assignments **
2006 ***********************
2007 ** Do first assignments.
2008 ** The assignedtableau[] array holds a set of values that
2009 ** indicate the assignment of a value, or its elimination.
2011 ** 0 = Item is neither assigned nor eliminated.
2012 ** 1 = Item is assigned
2013 ** 2 = Item is eliminated
2014 ** Returns the number of selections made. If this equals
2015 ** the number of rows, then an optimum has been determined.
2017 static int first_assignments(long tableau[][ASSIGNCOLS],
2018 short assignedtableau[][ASSIGNCOLS])
2020 ushort i,j,k; /* Index variables */
2021 ushort numassigns; /* # of assignments */
2022 ushort totnumassigns; /* Total # of assignments */
2023 ushort numzeros; /* # of zeros in row */
2024 int selected=0; /* Flag used to indicate selection */
2027 ** Clear the assignedtableau, setting all members to show that
2028 ** no one is yet assigned, eliminated, or anything.
2030 for(i=0;i<ASSIGNROWS;i++)
2031 for(j=0;j<ASSIGNCOLS;j++)
2032 assignedtableau[i][j]=0;
2038 ** Step through rows. For each one that is not currently
2039 ** assigned, see if the row has only one zero in it. If so,
2040 ** mark that as an assigned row/col. Eliminate other zeros
2041 ** in the same column.
2043 for(i=0;i<ASSIGNROWS;i++)
2045 for(j=0;j<ASSIGNCOLS;j++)
2046 if(tableau[i][j]==0L)
2047 if(assignedtableau[i][j]==0)
2054 assignedtableau[i][selected]=1;
2055 for(k=0;k<ASSIGNROWS;k++)
2057 (tableau[k][selected]==0))
2058 assignedtableau[k][selected]=2;
2062 ** Step through columns, doing same as above. Now, be careful
2063 ** of items in the other rows of a selected column.
2065 for(j=0;j<ASSIGNCOLS;j++)
2067 for(i=0;i<ASSIGNROWS;i++)
2068 if(tableau[i][j]==0L)
2069 if(assignedtableau[i][j]==0)
2076 assignedtableau[selected][j]=1;
2077 for(k=0;k<ASSIGNCOLS;k++)
2079 (tableau[selected][k]==0))
2080 assignedtableau[selected][k]=2;
2084 ** Repeat until no more assignments to be made.
2086 } while(numassigns!=0);
2089 ** See if we can leave at this point.
2091 if(totnumassigns==ASSIGNROWS) return(totnumassigns);
2094 ** Now step through the array by row. If you find any unassigned
2095 ** zeros, pick the first in the row. Eliminate all zeros from
2096 ** that same row & column. This occurs if there are multiple optima...
2099 for(i=0;i<ASSIGNROWS;i++)
2101 for(j=0;j<ASSIGNCOLS;j++)
2102 if((tableau[i][j]==0L) &&
2103 (assignedtableau[i][j]==0))
2108 { assignedtableau[i][selected]=1;
2110 for(k=0;k<ASSIGNCOLS;k++)
2112 (tableau[i][k]==0L))
2113 assignedtableau[i][k]=2;
2114 for(k=0;k<ASSIGNROWS;k++)
2116 (tableau[k][selected]==0L))
2117 assignedtableau[k][selected]=2;
2121 return(totnumassigns);
2124 /***********************
2125 ** second_assignments **
2126 ************************
2127 ** This section of the algorithm creates the revised
2128 ** tableau, and is difficult to explain. I suggest you
2129 ** refer to the algorithm's source, mentioned in comments
2130 ** toward the beginning of the program.
2132 static void second_assignments(long tableau[][ASSIGNCOLS],
2133 short assignedtableau[][ASSIGNCOLS])
2135 int i,j; /* Indexes */
2136 short linesrow[ASSIGNROWS];
2137 short linescol[ASSIGNCOLS];
2138 long smallest; /* Holds smallest value */
2139 ushort numassigns; /* Number of assignments */
2140 ushort newrows; /* New rows to be considered */
2142 ** Clear the linesrow and linescol arrays.
2144 for(i=0;i<ASSIGNROWS;i++)
2146 for(i=0;i<ASSIGNCOLS;i++)
2150 ** Scan rows, flag each row that has no assignment in it.
2152 for(i=0;i<ASSIGNROWS;i++)
2154 for(j=0;j<ASSIGNCOLS;j++)
2155 if(assignedtableau[i][j]==1)
2159 if(numassigns==0) linesrow[i]=1;
2166 ** For each row checked above, scan for any zeros. If found,
2167 ** check the associated column.
2169 for(i=0;i<ASSIGNROWS;i++)
2170 { if(linesrow[i]==1)
2171 for(j=0;j<ASSIGNCOLS;j++)
2172 if(tableau[i][j]==0)
2177 ** Now scan checked columns. If any contain assigned zeros, check
2178 ** the associated row.
2180 for(j=0;j<ASSIGNCOLS;j++)
2182 for(i=0;i<ASSIGNROWS;i++)
2183 if((assignedtableau[i][j]==1) &&
2189 } while(newrows!=0);
2192 ** linesrow[n]==0 indicate rows covered by imaginary line
2193 ** linescol[n]==1 indicate cols covered by imaginary line
2194 ** For all cells not covered by imaginary lines, determine smallest
2197 smallest=MAXPOSLONG;
2198 for(i=0;i<ASSIGNROWS;i++)
2200 for(j=0;j<ASSIGNCOLS;j++)
2202 if(tableau[i][j]<smallest)
2203 smallest=tableau[i][j];
2206 ** Subtract smallest from all cells in the above set.
2208 for(i=0;i<ASSIGNROWS;i++)
2210 for(j=0;j<ASSIGNCOLS;j++)
2212 tableau[i][j]-=smallest;
2215 ** Add smallest to all cells covered by two lines.
2217 for(i=0;i<ASSIGNROWS;i++)
2219 for(j=0;j<ASSIGNCOLS;j++)
2221 tableau[i][j]+=smallest;
2226 /********************
2227 ** IDEA Encryption **
2228 *********************
2229 ** IDEA - International Data Encryption Algorithm.
2230 ** Based on code presented in Applied Cryptography by Bruce Schneier.
2231 ** Which was based on code developed by Xuejia Lai and James L. Massey.
2232 ** Other modifications made by Colin Plumb.
2239 ** Perform IDEA encryption. Note that we time encryption & decryption
2240 ** time as being a single loop.
2244 IDEAStruct *locideastruct; /* Loc pointer to global structure */
2252 faruchar *plain1; /* First plaintext buffer */
2253 faruchar *crypt1; /* Encryption buffer */
2254 faruchar *plain2; /* Second plaintext buffer */
2257 ** Link to global data
2259 locideastruct=&global_ideastruct;
2262 ** Set error context
2264 errorcontext="CPU:IDEA";
2267 ** Re-init random-number generator.
2273 ** Build an encryption/decryption key
2276 /* userkey[i]=(u16)(abs_randwc(60000L) & 0xFFFF); */
2277 userkey[i]=(u16)(abs_randwc((int32)60000) & 0xFFFF);
2278 for(i=0;i<KEYLEN;i++)
2282 ** Compute encryption/decryption subkeys
2284 en_key_idea(userkey,Z);
2288 ** Allocate memory for buffers. We'll make 3, called plain1,
2289 ** crypt1, and plain2. It works like this:
2290 ** plain1 >>encrypt>> crypt1 >>decrypt>> plain2.
2291 ** So, plain1 and plain2 should match.
2292 ** Also, fill up plain1 with sample text.
2294 plain1=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror);
2297 ReportError(errorcontext,systemerror);
2301 crypt1=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror);
2304 ReportError(errorcontext,systemerror);
2305 FreeMemory((farvoid *)plain1,&systemerror);
2309 plain2=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror);
2312 ReportError(errorcontext,systemerror);
2313 FreeMemory((farvoid *)plain1,&systemerror);
2314 FreeMemory((farvoid *)crypt1,&systemerror);
2318 ** Note that we build the "plaintext" by simply loading
2319 ** the array up with random numbers.
2321 for(i=0;i<locideastruct->arraysize;i++)
2322 plain1[i]=(uchar)(abs_randwc(255) & 0xFF);
2325 ** See if we need to perform self adjustment loop.
2327 if(locideastruct->adjust==0)
2330 ** Do self-adjustment. This involves initializing the
2331 ** # of loops and increasing the loop count until we
2332 ** get a number of loops that we can use.
2334 for(locideastruct->loops=100L;
2335 locideastruct->loops<MAXIDEALOOPS;
2336 locideastruct->loops+=10L)
2337 if(DoIDEAIteration(plain1,crypt1,plain2,
2338 locideastruct->arraysize,
2339 locideastruct->loops,
2340 Z,DK)>global_min_ticks) break;
2344 ** All's well if we get here. Do the test.
2347 iterations=(double)0.0;
2350 accumtime+=DoIDEAIteration(plain1,crypt1,plain2,
2351 locideastruct->arraysize,
2352 locideastruct->loops,Z,DK);
2353 iterations+=(double)locideastruct->loops;
2354 } while(TicksToSecs(accumtime)<locideastruct->request_secs);
2357 ** Clean up, calculate results, and go home. Be sure to
2358 ** show that we don't have to rerun adjustment code.
2360 FreeMemory((farvoid *)plain1,&systemerror);
2361 FreeMemory((farvoid *)crypt1,&systemerror);
2362 FreeMemory((farvoid *)plain2,&systemerror);
2363 locideastruct->iterspersec=iterations / TicksToFracSecs(accumtime);
2365 if(locideastruct->adjust==0)
2366 locideastruct->adjust=1;
2372 /********************
2373 ** DoIDEAIteration **
2374 *********************
2375 ** Execute a single iteration of the IDEA encryption algorithm.
2376 ** Actually, a single iteration is one encryption and one
2379 static ulong DoIDEAIteration(faruchar *plain1,
2395 ** Start the stopwatch.
2397 elapsed=StartStopwatch();
2400 ** Do everything for nloops.
2402 for(i=0;i<nloops;i++)
2404 for(j=0;j<arraysize;j+=(sizeof(u16)*4))
2405 cipher_idea((u16 *)(plain1+j),(u16 *)(crypt1+j),Z); /* Encrypt */
2407 for(j=0;j<arraysize;j+=(sizeof(u16)*4))
2408 cipher_idea((u16 *)(crypt1+j),(u16 *)(plain2+j),DK); /* Decrypt */
2412 for(j=0;j<arraysize;j++)
2413 if(*(plain1+j)!=*(plain2+j)){
2414 printf("IDEA Error! \n");
2417 if (status==0) printf("IDEA: OK\n");
2421 ** Get elapsed time.
2423 return(StopStopwatch(elapsed));
2429 ** Performs multiplication, modulo (2**16)+1. This code is structured
2430 ** on the assumption that untaken branches are cheaper than taken
2431 ** branches, and that the compiler doesn't schedule branches.
2433 static u16 mul(register u16 a, register u16 b)
2453 ** Compute multiplicative inverse of x, modulo (2**16)+1
2454 ** using Euclid's GCD algorithm. It is unrolled twice
2455 ** to avoid swapping the meaning of the registers. And
2456 ** some subtracts are changed to adds.
2458 static u16 inv(u16 x)
2464 return(x); /* 0 and 1 are self-inverse */
2468 return(low16(1-t1));
2474 if(x==1) return(t0);
2479 return(low16(1-t1));
2485 ** Compute IDEA encryption subkeys Z
2487 static void en_key_idea(u16 *userkey, u16 *Z)
2496 for(i=0;j<KEYLEN;j++)
2498 Z[i+7]=(Z[i&7]<<9)| (Z[(i+1) & 7] >> 7);
2508 ** Compute IDEA decryption subkeys DK from encryption
2511 static void de_key_idea(IDEAkey Z, IDEAkey DK)
2517 p=(u16 *)(TT+KEYLEN);
2527 for(j=1;j<ROUNDS;j++)
2550 ** Copy and destroy temp copy
2552 for(j=0,p=TT;j<KEYLEN;j++)
2562 ** This #define creates a macro that computes x=x*y modulo 0x10001.
2563 ** Requires temps t16 and t32. Also requires y to be strictly 16
2564 ** bits. Here, I am using the simplest form. May not be the
2567 /* #define MUL(x,y) (x=mul(low16(x),y)) */
2572 ** IDEA encryption/decryption algorithm.
2574 static void cipher_idea(u16 in[4],
2578 register u16 x1, x2, x3, x4, t1, t2;
2579 /* register u16 t16;
2580 register u16 t32; */
2616 /************************
2617 ** HUFFMAN COMPRESSION **
2618 ************************/
2623 ** Execute a huffman compression on a block of plaintext.
2624 ** Note that (as with IDEA encryption) an iteration of the
2625 ** Huffman test includes a compression AND a decompression.
2626 ** Also, the compression cycle includes building the
2629 void DoHuffman(void)
2631 HuffStruct *lochuffstruct; /* Loc pointer to global data */
2637 farchar *decomparray;
2641 ** Link to global data
2643 lochuffstruct=&global_huffstruct;
2646 ** Set error context.
2648 errorcontext="CPU:Huffman";
2651 ** Allocate memory for the plaintext and the compressed text.
2652 ** We'll be really pessimistic here, and allocate equal amounts
2653 ** for both (though we know...well, we PRESUME) the compressed
2654 ** stuff will take less than the plain stuff.
2655 ** Also note that we'll build a 3rd buffer to decompress
2656 ** into, and we preallocate space for the huffman tree.
2657 ** (We presume that the Huffman tree will grow no larger
2658 ** than 512 bytes. This is actually a super-conservative
2659 ** estimate...but, who cares?)
2661 plaintext=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror);
2663 { ReportError(errorcontext,systemerror);
2666 comparray=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror);
2668 { ReportError(errorcontext,systemerror);
2669 FreeMemory(plaintext,&systemerror);
2672 decomparray=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror);
2674 { ReportError(errorcontext,systemerror);
2675 FreeMemory(plaintext,&systemerror);
2676 FreeMemory(comparray,&systemerror);
2680 hufftree=(huff_node *)AllocateMemory(sizeof(huff_node) * 512,
2683 { ReportError(errorcontext,systemerror);
2684 FreeMemory(plaintext,&systemerror);
2685 FreeMemory(comparray,&systemerror);
2686 FreeMemory(decomparray,&systemerror);
2691 ** Build the plaintext buffer. Since we want this to
2692 ** actually be able to compress, we'll use the
2693 ** wordcatalog to build the plaintext stuff.
2696 ** Reset random number generator so things repeat.
2697 ** added by Uwe F. Mayer
2700 create_text_block(plaintext,lochuffstruct->arraysize-1,(ushort)500);
2701 plaintext[lochuffstruct->arraysize-1L]='\0';
2702 plaintextlen=lochuffstruct->arraysize;
2705 ** See if we need to perform self adjustment loop.
2707 if(lochuffstruct->adjust==0)
2710 ** Do self-adjustment. This involves initializing the
2711 ** # of loops and increasing the loop count until we
2712 ** get a number of loops that we can use.
2714 for(lochuffstruct->loops=100L;
2715 lochuffstruct->loops<MAXHUFFLOOPS;
2716 lochuffstruct->loops+=10L)
2717 if(DoHuffIteration(plaintext,
2720 lochuffstruct->arraysize,
2721 lochuffstruct->loops,
2722 hufftree)>global_min_ticks) break;
2726 ** All's well if we get here. Do the test.
2729 iterations=(double)0.0;
2732 accumtime+=DoHuffIteration(plaintext,
2735 lochuffstruct->arraysize,
2736 lochuffstruct->loops,
2738 iterations+=(double)lochuffstruct->loops;
2739 } while(TicksToSecs(accumtime)<lochuffstruct->request_secs);
2742 ** Clean up, calculate results, and go home. Be sure to
2743 ** show that we don't have to rerun adjustment code.
2745 FreeMemory((farvoid *)plaintext,&systemerror);
2746 FreeMemory((farvoid *)comparray,&systemerror);
2747 FreeMemory((farvoid *)decomparray,&systemerror);
2748 FreeMemory((farvoid *)hufftree,&systemerror);
2749 lochuffstruct->iterspersec=iterations / TicksToFracSecs(accumtime);
2751 if(lochuffstruct->adjust==0)
2752 lochuffstruct->adjust=1;
2756 /*********************
2757 ** create_text_line **
2758 **********************
2759 ** Create a random line of text, stored at *dt. The line may be
2760 ** no more than nchars long.
2762 static void create_text_line(farchar *dt,
2765 long charssofar; /* # of characters so far */
2766 long tomove; /* # of characters to move */
2767 char myword[40]; /* Local buffer for words */
2768 farchar *wordptr; /* Pointer to word from catalog */
2774 ** Grab a random word from the wordcatalog
2776 /* wordptr=wordcatarray[abs_randwc((long)WORDCATSIZE)];*/
2777 wordptr=wordcatarray[abs_randwc((int32)WORDCATSIZE)];
2778 MoveMemory((farvoid *)myword,
2780 (unsigned long)strlen(wordptr)+1);
2785 tomove=strlen(myword)+1;
2786 myword[tomove-1]=' ';
2789 ** See how long it is. If its length+charssofar > nchars, we have
2792 if((tomove+charssofar)>nchars)
2793 tomove=nchars-charssofar;
2795 ** Attach the word to the current line. Increment counter.
2797 MoveMemory((farvoid *)dt,(farvoid *)myword,(unsigned long)tomove);
2802 ** If we're done, bail out. Otherwise, go get another word.
2804 } while(charssofar<nchars);
2809 /**********************
2810 ** create_text_block **
2811 ***********************
2812 ** Build a block of text randomly loaded with words. The words
2813 ** come from the wordcatalog (which must be loaded before you
2815 ** *tb points to the memory where the text is to be built.
2816 ** tblen is the # of bytes to put into the text block
2817 ** maxlinlen is the maximum length of any line (line end indicated
2818 ** by a carriage return).
2820 static void create_text_block(farchar *tb,
2824 ulong bytessofar; /* # of bytes so far */
2825 ulong linelen; /* Line length */
2831 ** Pick a random length for a line and fill the line.
2832 ** Make sure the line can fit (haven't exceeded tablen) and also
2833 ** make sure you leave room to append a carriage return.
2835 linelen=abs_randwc(maxlinlen-6)+6;
2836 if((linelen+bytessofar)>tblen)
2837 linelen=tblen-bytessofar;
2841 create_text_line(tb,linelen);
2843 tb+=linelen-1; /* Add the carriage return */
2846 bytessofar+=linelen;
2848 } while(bytessofar<tblen);
2852 /********************
2853 ** DoHuffIteration **
2854 *********************
2855 ** Perform the huffman benchmark. This routine
2856 ** (a) Builds the huffman tree
2857 ** (b) Compresses the text
2858 ** (c) Decompresses the text and verifies correct decompression
2860 static ulong DoHuffIteration(farchar *plaintext,
2862 farchar *decomparray,
2865 huff_node *hufftree)
2868 long j; /* Bigger index */
2869 int root; /* Pointer to huffman tree root */
2870 float lowfreq1, lowfreq2; /* Low frequency counters */
2871 int lowidx1, lowidx2; /* Indexes of low freq. elements */
2872 long bitoffset; /* Bit offset into text */
2873 long textoffset; /* Char offset into text */
2874 long maxbitoffset; /* Holds limit of bit offset */
2875 long bitstringlen; /* Length of bitstring */
2876 int c; /* Character from plaintext */
2877 char bitstring[30]; /* Holds bitstring */
2878 ulong elapsed; /* For stopwatch */
2884 ** Start the stopwatch
2886 elapsed=StartStopwatch();
2889 ** Do everything for nloops
2895 ** Calculate the frequency of each byte value. Store the
2896 ** results in what will become the "leaves" of the
2897 ** Huffman tree. Interior nodes will be built in those
2898 ** nodes greater than node #255.
2902 hufftree[i].freq=(float)0.0;
2903 hufftree[i].c=(unsigned char)i;
2906 for(j=0;j<arraysize;j++)
2907 hufftree[(int)plaintext[j]].freq+=(float)1.0;
2910 if(hufftree[i].freq != (float)0.0)
2911 hufftree[i].freq/=(float)arraysize;
2913 /* Reset the second half of the tree. Otherwise the loop below that
2914 ** compares the frequencies up to index 512 makes no sense. Some
2915 ** systems automatically zero out memory upon allocation, others (like
2916 ** for example DEC Unix) do not. Depending on this the loop below gets
2917 ** different data and different run times. On our alpha the data that
2918 ** was arbitrarily assigned led to an underflow error at runtime. We
2919 ** use that zeroed-out bits are in fact 0 as a float.
2921 bzero((char *)&(hufftree[256]),sizeof(huff_node)*256);
2923 ** Build the huffman tree. First clear all the parent
2924 ** pointers and left/right pointers. Also, discard all
2925 ** nodes that have a frequency of true 0. */
2927 { if(hufftree[i].freq==(float)0.0)
2928 hufftree[i].parent=EXCLUDED;
2930 hufftree[i].parent=hufftree[i].left=hufftree[i].right=-1;
2934 ** Go through the tree. Finding nodes of really low
2937 root=255; /* Starting root node-1 */
2940 lowfreq1=(float)2.0; lowfreq2=(float)2.0;
2941 lowidx1=-1; lowidx2=-1;
2943 ** Find first lowest frequency.
2945 for(i=0;i<=root;i++)
2946 if(hufftree[i].parent<0)
2947 if(hufftree[i].freq<lowfreq1)
2948 { lowfreq1=hufftree[i].freq;
2953 ** Did we find a lowest value? If not, the
2956 if(lowidx1==-1) break;
2959 ** Find next lowest frequency
2961 for(i=0;i<=root;i++)
2962 if((hufftree[i].parent<0) && (i!=lowidx1))
2963 if(hufftree[i].freq<lowfreq2)
2964 { lowfreq2=hufftree[i].freq;
2969 ** If we could only find one item, then that
2970 ** item is surely the root, and (as above) the
2973 if(lowidx2==-1) break;
2976 ** Attach the two new nodes to the current root, and
2977 ** advance the current root.
2979 root++; /* New root */
2980 hufftree[lowidx1].parent=root;
2981 hufftree[lowidx2].parent=root;
2982 hufftree[root].freq=lowfreq1+lowfreq2;
2983 hufftree[root].left=lowidx1;
2984 hufftree[root].right=lowidx2;
2985 hufftree[root].parent=-2; /* Show root */
2989 ** Huffman tree built...compress the plaintext
2991 bitoffset=0L; /* Initialize bit offset */
2992 for(i=0;i<arraysize;i++)
2994 c=(int)plaintext[i]; /* Fetch character */
2996 ** Build a bit string for byte c
2999 while(hufftree[c].parent!=-2)
3000 { if(hufftree[hufftree[c].parent].left==c)
3001 bitstring[bitstringlen]='0';
3003 bitstring[bitstringlen]='1';
3004 c=hufftree[c].parent;
3009 ** Step backwards through the bit string, setting
3010 ** bits in the compressed array as you go.
3012 while(bitstringlen--)
3013 { SetCompBit((u8 *)comparray,(u32)bitoffset,bitstring[bitstringlen]);
3019 ** Compression done. Perform de-compression.
3021 maxbitoffset=bitoffset;
3026 while(hufftree[i].left!=-1)
3027 { if(GetCompBit((u8 *)comparray,(u32)bitoffset)==0)
3030 i=hufftree[i].right;
3033 decomparray[textoffset]=hufftree[i].c;
3036 if(hufftree[i].c != plaintext[textoffset])
3039 printf("Error at textoffset %ld\n",textoffset);
3044 } while(bitoffset<maxbitoffset);
3046 } /* End the big while(nloops--) from above */
3052 if (status==0) printf("Huffman: OK\n");
3054 return(StopStopwatch(elapsed));
3060 ** Set a bit in the compression array. The value of the
3061 ** bit is set according to char bitchar.
3063 static void SetCompBit(u8 *comparray,
3071 ** First calculate which element in the comparray to
3072 ** alter. and the bitnumber.
3074 byteoffset=bitoffset>>3;
3075 bitnumb=bitoffset % 8;
3081 comparray[byteoffset]|=(1<<bitnumb);
3083 comparray[byteoffset]&=~(1<<bitnumb);
3091 ** Return the bit value of a bit in the comparession array.
3092 ** Returns 0 if the bit is clear, nonzero otherwise.
3094 static int GetCompBit(u8 *comparray,
3101 ** Calculate byte offset and bit number.
3103 byteoffset=bitoffset>>3;
3104 bitnumb=bitoffset % 8;
3109 return((1<<bitnumb) & comparray[byteoffset] );
3112 /********************************
3113 ** BACK PROPAGATION NEURAL NET **
3114 *********************************
3115 ** This code is a modified version of the code
3116 ** that was submitted to BYTE Magazine by
3117 ** Maureen Caudill. It accomanied an article
3118 ** that I CANNOT NOW RECALL.
3119 ** The author's original heading/comment was
3122 ** Backpropagation Network
3123 ** Written by Maureen Caudill
3124 ** in Think C 4.0 on a Macintosh
3126 ** (c) Maureen Caudill 1988-1991
3127 ** This network will accept 5x7 input patterns
3128 ** and produce 8 bit output patterns.
3129 ** The source code may be copied or modified without restriction,
3130 ** but no fee may be charged for its use.
3133 ** I have modified the code so that it will work
3134 ** on systems other than a Macintosh -- RG
3140 ** Perform the neural net benchmark.
3141 ** Note that this benchmark is one of the few that
3142 ** requires an input file. That file is "NNET.DAT" and
3143 ** should be on the local directory (from which the
3144 ** benchmark program in launched).
3148 NNetStruct *locnnetstruct; /* Local ptr to global data */
3154 ** Link to global data
3156 locnnetstruct=&global_nnetstruct;
3159 ** Set error context
3161 errorcontext="CPU:NNET";
3164 ** Init random number generator.
3165 ** NOTE: It is important that the random number generator
3166 ** be re-initialized for every pass through this test.
3167 ** The NNET algorithm uses the random number generator
3168 ** to initialize the net. Results are sensitive to
3169 ** the initial neural net state.
3175 ** Read in the input and output patterns. We'll do this
3176 ** only once here at the beginning. These values don't
3177 ** change once loaded.
3179 if(read_data_file()!=0)
3184 ** See if we need to perform self adjustment loop.
3186 if(locnnetstruct->adjust==0)
3189 ** Do self-adjustment. This involves initializing the
3190 ** # of loops and increasing the loop count until we
3191 ** get a number of loops that we can use.
3193 for(locnnetstruct->loops=1L;
3194 locnnetstruct->loops<MAXNNETLOOPS;
3195 locnnetstruct->loops++)
3198 if(DoNNetIteration(locnnetstruct->loops)
3199 >global_min_ticks) break;
3204 ** All's well if we get here. Do the test.
3207 iterations=(double)0.0;
3210 /* randnum(3L); */ /* Gotta do this for Neural Net */
3211 randnum((int32)3); /* Gotta do this for Neural Net */
3212 accumtime+=DoNNetIteration(locnnetstruct->loops);
3213 iterations+=(double)locnnetstruct->loops;
3214 } while(TicksToSecs(accumtime)<locnnetstruct->request_secs);
3217 ** Clean up, calculate results, and go home. Be sure to
3218 ** show that we don't have to rerun adjustment code.
3220 locnnetstruct->iterspersec=iterations / TicksToFracSecs(accumtime);
3222 if(locnnetstruct->adjust==0)
3223 locnnetstruct->adjust=1;
3229 /********************
3230 ** DoNNetIteration **
3231 *********************
3232 ** Do a single iteration of the neural net benchmark.
3233 ** By iteration, we mean a "learning" pass.
3235 static ulong DoNNetIteration(ulong nloops)
3237 ulong elapsed; /* Elapsed time */
3241 ** Run nloops learning cycles. Notice that, counted with
3242 ** the learning cycle is the weight randomization and
3243 ** zeroing of changes. This should reduce clock jitter,
3244 ** since we don't have to stop and start the clock for
3247 elapsed=StartStopwatch();
3255 while (learned == F)
3257 for (patt=0; patt<numpats; patt++)
3259 worst_error = 0.0; /* reset this every pass through data */
3260 move_wt_changes(); /* move last pass's wt changes to momentum array */
3261 do_forward_pass(patt);
3266 learned = check_out_error();
3269 printf("Learned in %d passes\n",numpasses);
3272 return(StopStopwatch(elapsed));
3275 /*************************
3276 ** do_mid_forward(patt) **
3277 **************************
3278 ** Process the middle layer's forward pass
3279 ** The activation of middle layer's neurode is the weighted
3280 ** sum of the inputs from the input pattern, with sigmoid
3281 ** function applied to the inputs.
3283 static void do_mid_forward(int patt)
3288 for (neurode=0;neurode<MID_SIZE; neurode++)
3291 for (i=0; i<IN_SIZE; i++)
3292 { /* compute weighted sum of input signals */
3293 sum += mid_wts[neurode][i]*in_pats[patt][i];
3296 ** apply sigmoid function f(x) = 1/(1+exp(-x)) to weighted sum
3298 sum = 1.0/(1.0+exp(-sum));
3299 mid_out[neurode] = sum;
3304 /*********************
3305 ** do_out_forward() **
3306 **********************
3307 ** process the forward pass through the output layer
3308 ** The activation of the output layer is the weighted sum of
3309 ** the inputs (outputs from middle layer), modified by the
3310 ** sigmoid function.
3312 static void do_out_forward()
3317 for (neurode=0; neurode<OUT_SIZE; neurode++)
3320 for (i=0; i<MID_SIZE; i++)
3322 ** compute weighted sum of input signals
3323 ** from middle layer
3325 sum += out_wts[neurode][i]*mid_out[i];
3328 ** Apply f(x) = 1/(1+exp(-x)) to weighted input
3330 sum = 1.0/(1.0+exp(-sum));
3331 out_out[neurode] = sum;
3336 /*************************
3337 ** display_output(patt) **
3338 **************************
3339 ** Display the actual output vs. the desired output of the
3341 ** Once the training is complete, and the "learned" flag set
3342 ** to TRUE, then display_output sends its output to both
3343 ** the screen and to a text output file.
3345 ** NOTE: This routine has been disabled in the benchmark
3349 void display_output(int patt)
3353 fprintf(outfile,"\n Iteration # %d",iteration_count);
3354 fprintf(outfile,"\n Desired Output: ");
3356 for (i=0; i<OUT_SIZE; i++)
3358 fprintf(outfile,"%6.3f ",out_pats[patt][i]);
3360 fprintf(outfile,"\n Actual Output: ");
3362 for (i=0; i<OUT_SIZE; i++)
3364 fprintf(outfile,"%6.3f ",out_out[i]);
3366 fprintf(outfile,"\n");
3371 /**********************
3372 ** do_forward_pass() **
3373 ***********************
3374 ** control function for the forward pass through the network
3375 ** NOTE: I have disabled the call to display_output() in
3376 ** the benchmark version -- RG.
3378 static void do_forward_pass(int patt)
3380 do_mid_forward(patt); /* process forward pass, middle layer */
3381 do_out_forward(); /* process forward pass, output layer */
3382 /* display_output(patt); ** display results of forward pass */
3386 /***********************
3387 ** do_out_error(patt) **
3388 ************************
3389 ** Compute the error for the output layer neurodes.
3390 ** This is simply Desired - Actual.
3392 static void do_out_error(int patt)
3395 double error,tot_error, sum;
3399 for (neurode=0; neurode<OUT_SIZE; neurode++)
3401 out_error[neurode] = out_pats[patt][neurode] - out_out[neurode];
3403 ** while we're here, also compute magnitude
3404 ** of total error and worst error in this pass.
3405 ** We use these to decide if we are done yet.
3407 error = out_error[neurode];
3411 if (-error > tot_error)
3412 tot_error = -error; /* worst error this pattern */
3417 if (error > tot_error)
3418 tot_error = error; /* worst error this pattern */
3421 avg_out_error[patt] = sum/OUT_SIZE;
3422 tot_out_error[patt] = tot_error;
3426 /***********************
3427 ** worst_pass_error() **
3428 ************************
3429 ** Find the worst and average error in the pass and save it
3431 static void worst_pass_error()
3439 for (i=0; i<numpats; i++)
3441 if (tot_out_error[i] > error) error = tot_out_error[i];
3442 sum += avg_out_error[i];
3444 worst_error = error;
3445 average_error = sum/numpats;
3449 /*******************
3450 ** do_mid_error() **
3451 ********************
3452 ** Compute the error for the middle layer neurodes
3453 ** This is based on the output errors computed above.
3454 ** Note that the derivative of the sigmoid f(x) is
3455 ** f'(x) = f(x)(1 - f(x))
3456 ** Recall that f(x) is merely the output of the middle
3457 ** layer neurode on the forward pass.
3459 static void do_mid_error()
3464 for (neurode=0; neurode<MID_SIZE; neurode++)
3467 for (i=0; i<OUT_SIZE; i++)
3468 sum += out_wts[i][neurode]*out_error[i];
3471 ** apply the derivative of the sigmoid here
3472 ** Because of the choice of sigmoid f(I), the derivative
3473 ** of the sigmoid is f'(I) = f(I)(1 - f(I))
3475 mid_error[neurode] = mid_out[neurode]*(1-mid_out[neurode])*sum;
3480 /*********************
3481 ** adjust_out_wts() **
3482 **********************
3483 ** Adjust the weights of the output layer. The error for
3484 ** the output layer has been previously propagated back to
3485 ** the middle layer.
3486 ** Use the Delta Rule with momentum term to adjust the weights.
3488 static void adjust_out_wts()
3490 int weight, neurode;
3491 double learn,delta,alph;
3495 for (neurode=0; neurode<OUT_SIZE; neurode++)
3497 for (weight=0; weight<MID_SIZE; weight++)
3499 /* standard delta rule */
3500 delta = learn * out_error[neurode] * mid_out[weight];
3502 /* now the momentum term */
3503 delta += alph * out_wt_change[neurode][weight];
3504 out_wts[neurode][weight] += delta;
3506 /* keep track of this pass's cum wt changes for next pass's momentum */
3507 out_wt_cum_change[neurode][weight] += delta;
3513 /*************************
3514 ** adjust_mid_wts(patt) **
3515 **************************
3516 ** Adjust the middle layer weights using the previously computed
3518 ** We use the Generalized Delta Rule with momentum term
3520 static void adjust_mid_wts(int patt)
3522 int weight, neurode;
3523 double learn,alph,delta;
3527 for (neurode=0; neurode<MID_SIZE; neurode++)
3529 for (weight=0; weight<IN_SIZE; weight++)
3531 /* first the basic delta rule */
3532 delta = learn * mid_error[neurode] * in_pats[patt][weight];
3534 /* with the momentum term */
3535 delta += alph * mid_wt_change[neurode][weight];
3536 mid_wts[neurode][weight] += delta;
3538 /* keep track of this pass's cum wt changes for next pass's momentum */
3539 mid_wt_cum_change[neurode][weight] += delta;
3545 /*******************
3546 ** do_back_pass() **
3547 ********************
3548 ** Process the backward propagation of error through network.
3550 void do_back_pass(int patt)
3556 adjust_mid_wts(patt);
3562 /**********************
3563 ** move_wt_changes() **
3564 ***********************
3565 ** Move the weight changes accumulated last pass into the wt-change
3566 ** array for use by the momentum term in this pass. Also zero out
3567 ** the accumulating arrays after the move.
3569 static void move_wt_changes()
3573 for (i = 0; i<MID_SIZE; i++)
3574 for (j = 0; j<IN_SIZE; j++)
3576 mid_wt_change[i][j] = mid_wt_cum_change[i][j];
3578 ** Zero it out for next pass accumulation.
3580 mid_wt_cum_change[i][j] = 0.0;
3583 for (i = 0; i<OUT_SIZE; i++)
3584 for (j=0; j<MID_SIZE; j++)
3586 out_wt_change[i][j] = out_wt_cum_change[i][j];
3587 out_wt_cum_change[i][j] = 0.0;
3593 /**********************
3594 ** check_out_error() **
3595 ***********************
3596 ** Check to see if the error in the output layer is below
3597 ** MARGIN*OUT_SIZE for all output patterns. If so, then
3598 ** assume the network has learned acceptably well. This
3599 ** is simply an arbitrary measure of how well the network
3600 ** has learned -- many other standards are possible.
3602 static int check_out_error()
3608 worst_pass_error(); /* identify the worst error in this pass */
3612 printf("\n Iteration # %d",iteration_count);
3615 for (i=0; i<numpats; i++)
3617 /* printf("\n Error pattern %d: Worst: %8.3f; Average: %8.3f",
3618 i+1,tot_out_error[i], avg_out_error[i]);
3620 "\n Error pattern %d: Worst: %8.3f; Average: %8.3f",
3621 i+1,tot_out_error[i]);
3624 if (worst_error >= STOP) result = F;
3625 if (tot_out_error[i] >= 16.0) error = T;
3628 if (error == T) result = ERR;
3632 /* printf("\n Error this pass thru data: Worst: %8.3f; Average: %8.3f",
3633 worst_error,average_error);
3636 "\n Error this pass thru data: Worst: %8.3f; Average: %8.3f",
3637 worst_error, average_error); */
3644 /*******************
3645 ** zero_changes() **
3646 ********************
3647 ** Zero out all the wt change arrays
3649 static void zero_changes()
3653 for (i = 0; i<MID_SIZE; i++)
3655 for (j=0; j<IN_SIZE; j++)
3657 mid_wt_change[i][j] = 0.0;
3658 mid_wt_cum_change[i][j] = 0.0;
3662 for (i = 0; i< OUT_SIZE; i++)
3664 for (j=0; j<MID_SIZE; j++)
3666 out_wt_change[i][j] = 0.0;
3667 out_wt_cum_change[i][j] = 0.0;
3674 /********************
3675 ** randomize_wts() **
3676 *********************
3677 ** Intialize the weights in the middle and output layers to
3678 ** random values between -0.25..+0.25
3679 ** Function rand() returns a value between 0 and 32767.
3681 ** NOTE: Had to make alterations to how the random numbers were
3684 static void randomize_wts()
3690 ** Following not used int benchmark version -- RG
3692 ** printf("\n Please enter a random number seed (1..32767): ");
3697 for (neurode = 0; neurode<MID_SIZE; neurode++)
3699 for(i=0; i<IN_SIZE; i++)
3701 /* value=(double)abs_randwc(100000L); */
3702 value=(double)abs_randwc((int32)100000);
3703 value=value/(double)100000.0 - (double) 0.5;
3704 mid_wts[neurode][i] = value/2;
3707 for (neurode=0; neurode<OUT_SIZE; neurode++)
3709 for(i=0; i<MID_SIZE; i++)
3711 /* value=(double)abs_randwc(100000L); */
3712 value=(double)abs_randwc((int32)100000);
3713 value=value/(double)10000.0 - (double) 0.5;
3714 out_wts[neurode][i] = value/2;
3722 /*********************
3723 ** read_data_file() **
3724 **********************
3725 ** Read in the input data file and store the patterns in
3726 ** in_pats and out_pats.
3727 ** The format for the data file is as follows:
3729 ** line# data expected
3730 ** ----- ------------------------------
3731 ** 1 In-X-size,in-y-size,out-size
3732 ** 2 number of patterns in file
3733 ** 3 1st X row of 1st input pattern
3734 ** 4.. following rows of 1st input pattern pattern
3735 ** in-x+2 y-out pattern
3736 ** 1st X row of 2nd pattern
3739 ** Each row of data is separated by commas or spaces.
3740 ** The data is expected to be ascii text corresponding to
3741 ** either a +1 or a 0.
3743 ** Sample input for a 1-pattern file (The comments to the
3744 ** right may NOT be in the file unless more sophisticated
3745 ** parsing of the input is done.):
3747 ** 5,7,8 input is 5x7 grid, output is 8 bits
3748 ** 1 one pattern in file
3749 ** 0,1,1,1,0 beginning of pattern for "O"
3756 ** 0,1,0,0,1,1,1,1 ASCII code for "O" -- 0100 1111
3758 ** Clearly, this simple scheme can be expanded or enhanced
3759 ** any way you like.
3761 ** Returns -1 if any file error occurred, otherwise 0.
3763 static int read_data_file()
3767 int xinsize,yinsize,youtsize;
3768 int patt, element, i, row;
3770 int val1,val2,val3,val4,val5,val6,val7,val8;
3772 /* printf("\n Opening and retrieving data from file."); */
3774 infile = fopen(inpath, "r");
3777 printf("\n CPU:NNET--error in opening file!");
3780 vals_read =fscanf(infile,"%d %d %d",&xinsize,&yinsize,&youtsize);
3783 printf("\n CPU:NNET -- Should read 3 items in line one; did read %d",vals_read);
3786 vals_read=fscanf(infile,"%d",&numpats);
3789 printf("\n CPU:NNET -- Should read 1 item in line 2; did read %d",vals_read);
3792 if (numpats > MAXPATS)
3795 for (patt=0; patt<numpats; patt++)
3798 for (row = 0; row<yinsize; row++)
3800 vals_read = fscanf(infile,"%d %d %d %d %d",
3801 &val1, &val2, &val3, &val4, &val5);
3804 printf ("\n CPU:NNET -- failure in reading input!");
3807 element=row*xinsize;
3809 in_pats[patt][element] = (double) val1; element++;
3810 in_pats[patt][element] = (double) val2; element++;
3811 in_pats[patt][element] = (double) val3; element++;
3812 in_pats[patt][element] = (double) val4; element++;
3813 in_pats[patt][element] = (double) val5; element++;
3815 for (i=0;i<IN_SIZE; i++)
3817 if (in_pats[patt][i] >= 0.9)
3818 in_pats[patt][i] = 0.9;
3819 if (in_pats[patt][i] <= 0.1)
3820 in_pats[patt][i] = 0.1;
3823 vals_read = fscanf(infile,"%d %d %d %d %d %d %d %d",
3824 &val1, &val2, &val3, &val4, &val5, &val6, &val7, &val8);
3826 out_pats[patt][element] = (double) val1; element++;
3827 out_pats[patt][element] = (double) val2; element++;
3828 out_pats[patt][element] = (double) val3; element++;
3829 out_pats[patt][element] = (double) val4; element++;
3830 out_pats[patt][element] = (double) val5; element++;
3831 out_pats[patt][element] = (double) val6; element++;
3832 out_pats[patt][element] = (double) val7; element++;
3833 out_pats[patt][element] = (double) val8; element++;
3836 /* printf("\n Closing the input file now. "); */
3842 /*********************
3843 ** initialize_net() **
3844 **********************
3845 ** Do all the initialization stuff before beginning
3848 static int initialize_net()
3854 err_code = read_data_file();
3855 iteration_count = 1;
3860 /**********************
3861 ** display_mid_wts() **
3862 ***********************
3863 ** Display the weights on the middle layer neurodes
3864 ** NOTE: This routine is not used in the benchmark
3867 /* static void display_mid_wts()
3869 int neurode, weight, row, col;
3871 fprintf(outfile,"\n Weights of Middle Layer neurodes:");
3873 for (neurode=0; neurode<MID_SIZE; neurode++)
3875 fprintf(outfile,"\n Mid Neurode # %d",neurode);
3876 for (row=0; row<IN_Y_SIZE; row++)
3878 fprintf(outfile,"\n ");
3879 for (col=0; col<IN_X_SIZE; col++)
3881 weight = IN_X_SIZE * row + col;
3882 fprintf(outfile," %8.3f ", mid_wts[neurode][weight]);
3889 /**********************
3890 ** display_out_wts() **
3891 ***********************
3892 ** Display the weights on the output layer neurodes
3893 ** NOTE: This code is not used in the benchmark
3896 /* void display_out_wts()
3898 int neurode, weight;
3900 fprintf(outfile,"\n Weights of Output Layer neurodes:");
3902 for (neurode=0; neurode<OUT_SIZE; neurode++)
3904 fprintf(outfile,"\n Out Neurode # %d \n",neurode);
3905 for (weight=0; weight<MID_SIZE; weight++)
3907 fprintf(outfile," %8.3f ", out_wts[neurode][weight]);
3914 /***********************
3915 ** LU DECOMPOSITION **
3916 ** (Linear Equations) **
3917 ************************
3918 ** These routines come from "Numerical Recipes in Pascal".
3919 ** Note that, as in the assignment algorithm, though we
3920 ** separately define LUARRAYROWS and LUARRAYCOLS, the two
3921 ** must be the same value (this routine depends on a square
3928 ** Perform the LU decomposition benchmark.
3932 LUStruct *loclustruct; /* Local pointer to global data */
3946 ** Link to global data
3948 loclustruct=&global_lustruct;
3951 ** Set error context.
3953 errorcontext="FPU:LU";
3956 ** Our first step is to build a "solvable" problem. This
3957 ** will become the "seed" set that all others will be
3958 ** derived from. (I.E., we'll simply copy these arrays
3961 a=(fardouble *)AllocateMemory(sizeof(double) * LUARRAYCOLS * LUARRAYROWS,
3963 b=(fardouble *)AllocateMemory(sizeof(double) * LUARRAYROWS,
3968 ** We need to allocate a temp vector that is used by the LU
3969 ** algorithm. This removes the allocation routine from the
3972 LUtempvv=(fardouble *)AllocateMemory(sizeof(double)*LUARRAYROWS,
3976 ** Build a problem to be solved.
3978 ptra.ptrs.p=a; /* Gotta coerce linear array to 2D array */
3979 build_problem(*ptra.ptrs.ap,n,b);
3982 ** Now that we have a problem built, see if we need to do
3983 ** auto-adjust. If so, repeatedly call the DoLUIteration routine,
3984 ** increasing the number of solutions per iteration as you go.
3986 if(loclustruct->adjust==0)
3988 loclustruct->numarrays=0;
3989 for(i=1;i<=MAXLUARRAYS;i++)
3991 abase=(fardouble *)AllocateMemory(sizeof(double) *
3992 LUARRAYCOLS*LUARRAYROWS*(i+1),&systemerror);
3994 { ReportError(errorcontext,systemerror);
3995 LUFreeMem(a,b,(fardouble *)NULL,(fardouble *)NULL);
3998 bbase=(fardouble *)AllocateMemory(sizeof(double) *
3999 LUARRAYROWS*(i+1),&systemerror);
4001 { ReportError(errorcontext,systemerror);
4002 LUFreeMem(a,b,abase,(fardouble *)NULL);
4005 if(DoLUIteration(a,b,abase,bbase,i)>global_min_ticks)
4006 { loclustruct->numarrays=i;
4010 ** Not enough arrays...free them all and try again
4012 FreeMemory((farvoid *)abase,&systemerror);
4013 FreeMemory((farvoid *)bbase,&systemerror);
4016 ** Were we able to do it?
4018 if(loclustruct->numarrays==0)
4019 { printf("FPU:LU -- Array limit reached\n");
4020 LUFreeMem(a,b,abase,bbase);
4026 ** Don't need to adjust -- just allocate the proper
4027 ** number of arrays and proceed.
4029 abase=(fardouble *)AllocateMemory(sizeof(double) *
4030 LUARRAYCOLS*LUARRAYROWS*loclustruct->numarrays,
4033 { ReportError(errorcontext,systemerror);
4034 LUFreeMem(a,b,(fardouble *)NULL,(fardouble *)NULL);
4037 bbase=(fardouble *)AllocateMemory(sizeof(double) *
4038 LUARRAYROWS*loclustruct->numarrays,&systemerror);
4041 ReportError(errorcontext,systemerror);
4042 LUFreeMem(a,b,abase,(fardouble *)NULL);
4047 ** All's well if we get here. Do the test.
4050 iterations=(double)0.0;
4053 accumtime+=DoLUIteration(a,b,abase,bbase,
4054 loclustruct->numarrays);
4055 iterations+=(double)loclustruct->numarrays;
4056 } while(TicksToSecs(accumtime)<loclustruct->request_secs);
4059 ** Clean up, calculate results, and go home. Be sure to
4060 ** show that we don't have to rerun adjustment code.
4062 loclustruct->iterspersec=iterations / TicksToFracSecs(accumtime);
4064 if(loclustruct->adjust==0)
4065 loclustruct->adjust=1;
4067 LUFreeMem(a,b,abase,bbase);
4074 ** Release memory associated with LU benchmark.
4076 static void LUFreeMem(fardouble *a, fardouble *b,
4077 fardouble *abase,fardouble *bbase)
4081 FreeMemory((farvoid *)a,&systemerror);
4082 FreeMemory((farvoid *)b,&systemerror);
4083 FreeMemory((farvoid *)LUtempvv,&systemerror);
4085 if(abase!=(fardouble *)NULL) FreeMemory((farvoid *)abase,&systemerror);
4086 if(bbase!=(fardouble *)NULL) FreeMemory((farvoid *)bbase,&systemerror);
4093 ** Perform an iteration of the LU decomposition benchmark.
4094 ** An iteration refers to the repeated solution of several
4095 ** identical matrices.
4097 static ulong DoLUIteration(fardouble *a,fardouble *b,
4098 fardouble *abase, fardouble *bbase,
4101 fardouble *locabase;
4102 fardouble *locbbase;
4103 LUdblptr ptra; /* For converting ptr to 2D array */
4105 ulong j,i; /* Indexes */
4109 ** Move the seed arrays (a & b) into the destination
4112 for(j=0;j<numarrays;j++)
4113 { locabase=abase+j*LUARRAYROWS*LUARRAYCOLS;
4114 locbbase=bbase+j*LUARRAYROWS;
4115 for(i=0;i<LUARRAYROWS*LUARRAYCOLS;i++)
4116 *(locabase+i)=*(a+i);
4117 for(i=0;i<LUARRAYROWS;i++)
4118 *(locbbase+i)=*(b+i);
4122 ** Do test...begin timing.
4124 elapsed=StartStopwatch();
4125 for(i=0;i<numarrays;i++)
4126 { locabase=abase+i*LUARRAYROWS*LUARRAYCOLS;
4127 locbbase=bbase+i*LUARRAYROWS;
4128 ptra.ptrs.p=locabase;
4129 lusolve(*ptra.ptrs.ap,LUARRAYROWS,locbbase);
4132 return(StopStopwatch(elapsed));
4138 ** Constructs a solvable set of linear equations. It does this by
4139 ** creating an identity matrix, then loading the solution vector
4140 ** with random numbers. After that, the identity matrix and
4141 ** solution vector are randomly "scrambled". Scrambling is
4142 ** done by (a) randomly selecting a row and multiplying that
4143 ** row by a random number and (b) adding one randomly-selected
4146 static void build_problem(double a[][LUARRAYCOLS],
4148 double b[LUARRAYROWS])
4150 long i,j,k,k1; /* Indexes */
4151 double rcon; /* Random constant */
4154 ** Reset random number generator
4160 ** Build an identity matrix.
4161 ** We'll also use this as a chance to load the solution
4165 { /* b[i]=(double)(abs_randwc(100L)+1L); */
4166 b[i]=(double)(abs_randwc((int32)100)+(int32)1);
4169 /* a[i][j]=(double)(abs_randwc(1000L)+1L); */
4170 a[i][j]=(double)(abs_randwc((int32)1000)+(int32)1);
4172 a[i][j]=(double)0.0;
4176 printf("Problem:\n");
4181 printf("%6.2f ",a[i][j]);
4183 printf("%.0f/%.0f=%.2f\t",b[i],a[i][i],b[i]/a[i][i]);
4191 ** Scramble. Do this 8n times. See comment above for
4192 ** a description of the scrambling process.
4198 ** Pick a row and a random constant. Multiply
4199 ** all elements in the row by the constant.
4201 /* k=abs_randwc((long)n);
4202 rcon=(double)(abs_randwc(20L)+1L);
4204 a[k][j]=a[k][j]*rcon;
4208 ** Pick two random rows and add second to
4209 ** first. Note that we also occasionally multiply
4210 ** by minus 1 so that we get a subtraction operation.
4212 /* k=abs_randwc((long)n); */
4213 /* k1=abs_randwc((long)n); */
4214 k=abs_randwc((int32)n);
4215 k1=abs_randwc((int32)n);
4218 if(k<k1) rcon=(double)1.0;
4219 else rcon=(double)-1.0;
4221 a[k][j]+=a[k1][j]*rcon;;
4233 ** From the procedure of the same name in "Numerical Recipes in Pascal",
4234 ** by Press, Flannery, Tukolsky, and Vetterling.
4235 ** Given an nxn matrix a[], this routine replaces it by the LU
4236 ** decomposition of a rowwise permutation of itself. a[] and n
4237 ** are input. a[] is output, modified as follows:
4239 ** | b(1,1) b(1,2) b(1,3)... |
4240 ** | a(2,1) b(2,2) b(2,3)... |
4241 ** | a(3,1) a(3,2) b(3,3)... |
4242 ** | a(4,1) a(4,2) a(4,3)... |
4246 ** Where the b(i,j) elements form the upper triangular matrix of the
4247 ** LU decomposition, and the a(i,j) elements form the lower triangular
4248 ** elements. The LU decomposition is calculated so that we don't
4249 ** need to store the a(i,i) elements (which would have laid along the
4250 ** diagonal and would have all been 1).
4252 ** indx[] is an output vector that records the row permutation
4253 ** effected by the partial pivoting; d is output as +/-1 depending
4254 ** on whether the number of row interchanges was even or odd,
4256 ** Returns 0 if matrix singular, else returns 1.
4258 static int ludcmp(double a[][LUARRAYCOLS],
4264 double big; /* Holds largest element value */
4266 double dum; /* Holds dummy value */
4267 int i,j,k; /* Indexes */
4268 int imax=0; /* Holds max index value */
4269 double tiny; /* A really small number */
4271 tiny=(double)1.0e-20;
4273 *d=1; /* No interchanges yet */
4278 if((double)fabs(a[i][j]) > big)
4280 /* Bail out on singular matrix */
4281 if(big==(double)0.0) return(0);
4282 LUtempvv[i]=1.0/big;
4286 ** Crout's algorithm...loop over columns.
4294 sum-=(a[i][k]*a[k][j]);
4302 sum-=a[i][k]*a[k][j];
4304 dum=LUtempvv[i]*fabs(sum);
4310 if(j!=imax) /* Interchange rows if necessary */
4316 *d=-*d; /* Change parity of d */
4318 LUtempvv[imax]=LUtempvv[j]; /* Don't forget scale factor */
4323 ** If the pivot element is zero, the matrix is singular
4324 ** (at least as far as the precision of the machine
4325 ** is concerned.) We'll take the original author's
4326 ** recommendation and replace 0.0 with "tiny".
4328 if(a[j][j]==(double)0.0)
4334 a[i][j]=a[i][j]*dum;
4344 ** Also from "Numerical Recipes in Pascal".
4345 ** This routine solves the set of n linear equations A X = B.
4346 ** Here, a[][] is input, not as the matrix A, but as its
4347 ** LU decomposition, created by the routine ludcmp().
4348 ** Indx[] is input as the permutation vector returned by ludcmp().
4349 ** b[] is input as the right-hand side an returns the
4350 ** solution vector X.
4351 ** a[], n, and indx are not modified by this routine and
4352 ** can be left in place for different values of b[].
4353 ** This routine takes into account the possibility that b will
4354 ** begin with many zero elements, so it is efficient for use in
4355 ** matrix inversion.
4357 static void lubksb( double a[][LUARRAYCOLS],
4359 int indx[LUARRAYROWS],
4360 double b[LUARRAYROWS])
4363 int i,j; /* Indexes */
4364 int ip; /* "pointer" into indx */
4369 ** When ii is set to a positive value, it will become
4370 ** the index of the first nonvanishing element of b[].
4371 ** We now do the forward substitution. The only wrinkle
4372 ** is to unscramble the permutation as we go.
4381 sum=sum-a[i][j]*b[j];
4384 ** If a nonzero element is encountered, we have
4385 ** to do the sums in the loop above.
4387 if(sum!=(double)0.0)
4392 ** Do backsubstitution
4394 for(i=(n-1);i>=0;i--)
4398 for(j=(i+1);j<n;j++)
4399 sum=sum-a[i][j]*b[j];
4408 ** Solve a linear set of equations: A x = b
4409 ** Original matrix A will be destroyed by this operation.
4410 ** Returns 0 if matrix is singular, 1 otherwise.
4412 static int lusolve(double a[][LUARRAYCOLS],
4414 double b[LUARRAYROWS])
4416 int indx[LUARRAYROWS];
4422 if(ludcmp(a,n,indx,&d)==0) return(0);
4424 /* Matrix not singular -- proceed */
4428 printf("Solution:\n");
4433 printf("%6.2f ",a[i][j]);
4436 printf("%6.2f\t",b[i]);