6 * Written by Keith Marshall <keithmarshall@users.sourceforge.net>
7 * Copyright (C) 2009, 2010, MinGW Project
10 * Hashing functions, used to generate CRC hashes from path names,
11 * and to derive signature file names from hashed path names.
14 * This is free software. Permission is granted to copy, modify and
15 * redistribute this software, under the provisions of the GNU General
16 * Public License, Version 3, (or, at your option, any later version),
17 * as published by the Free Software Foundation; see the file COPYING
18 * for licensing details.
20 * Note, in particular, that this software is provided "as is", in the
21 * hope that it may prove useful, but WITHOUT WARRANTY OF ANY KIND; not
22 * even an implied WARRANTY OF MERCHANTABILITY, nor of FITNESS FOR ANY
23 * PARTICULAR PURPOSE. Under no circumstances will the author, or the
24 * MinGW Project, accept liability for any damages, however caused,
25 * arising from the use of this software.
32 #if defined( __MS_DOS__ ) || defined( _WIN32 ) || defined( __CYGWIN__ )
35 * When generating path name hashes for Microsoft file systems, we want
36 * the same result irrespective of case distinctions in the input name,
37 * or of choice of `/' or `\' as the directory name separator; thus we
38 * use this input mapping function which passes each input character to
39 * the hashing algorithm as its lower case equivalent, and `\' as if it
40 * had been specified as `/'.
42 static __inline__ __attribute__((__always_inline__))
43 unsigned long input_as_ulong( const char value )
46 return (unsigned long)('/');
48 return (unsigned long)(tolower(value));
52 * Alternatively, for non-Microsoft file systems, we simply cast each
53 * input character to its literal unsigned long equivalent value.
55 # define input_as_ulong( value ) (unsigned long)(value)
58 static __inline__ unsigned long generic_crc_update
59 ( unsigned bits, unsigned long poly, unsigned long input, unsigned long hash )
61 /* Helper function; it incorporates the effect of the next input byte
62 * into the hash, as already computed from all preceding input bytes,
63 * using the specified generator polynomial and bit length.
65 int bitcount = 8; /* bits in input byte */
66 unsigned long mask = 1UL << (bits - 1); /* most significant bit */
68 /* The input byte is passed as the least significant 8-bits of the
69 * associated unsigned long argument; we begin by aligning its most
70 * significant bit with the most significant bit of the hash...
72 input <<= bits - bitcount;
73 while( bitcount-- > 0 )
75 /* ...then for all eight input bits, from most significant to
76 * least significant...
78 if( (hash ^ input) & mask )
80 * ...a 'one' bit here indicates that appending the current
81 * input bit to the current interim CRC residual makes that
82 * residual modulo-2 divisible by the generator polynomial,
83 * so align and modulo-2 subtract (i.e. XOR) it.
85 hash = (hash << 1) ^ poly;
88 /* ...otherwise, we simply shift out the most significant bit
89 * of the original hash value, (effectively appending a 'zero'
90 * bit to the CRC quotient, and adjusting the residual hash),
91 * in preparation for processing the next input bit...
95 /* ...and, in either case, progress to the next input bit.
100 /* Ultimately, we return the new value of the hash...
101 * Note that we don't discard superfluous carry bits here;
102 * the caller *must* mask the return value, to extract the
103 * appropriate number of least significant bits from the
104 * returned value, to obtain the specified CRC hash.
109 unsigned long generic_crc
110 ( unsigned bits, unsigned long poly, const char *input, size_t length )
112 /* Compute a CRC hash of specified bit length, using the specified
113 * generator polynomial, for the given input byte stream buffer of
116 unsigned long hash = 0UL; /* Initial value */
117 while( length-- > 0 )
119 * Update the hash value, processing each byte of the input buffer
120 * in turn, until all specified bytes have been processed.
122 hash = generic_crc_update( bits, poly, input_as_ulong( *input++ ), hash );
124 /* Note that, for CRCs of fewer than the number of bits in an unsigned
125 * long, the accumulated hash may include unwanted noise, (carried out),
126 * in the more significant bits. The required hash value is to be found
127 * in the specified number of less significant bits; mask out the noise,
128 * and return the required hash value.
130 return hash & ((1UL << bits) - 1L);
133 char *hashed_name( int retry, const char *tag, const char *refname )
135 /* Generate a hashed name, comprising the specified 'tag' prefix,
136 * followed by the collision retry index, the length and a pair of
137 * distinct CRC hashes, which is representative of the specified
138 * 'refname' string. The result is returned in a buffer allocated
139 * dynamically on the heap; this should be freed by the caller,
140 * when no longer required.
143 size_t len = strlen( tag );
145 /* While hash collision may be improbable, it is not impossible;
146 * to mitigate, we provide a collection of generator polynomials,
147 * selected in pairs indexed by the 'retry' parameter, offering
148 * eight hash possibilities for each input 'refname'.
150 static unsigned long p16[] = /* 16-bit CRC generator polynomials */
152 0x1021, /* CCITT standard */
153 0x8408, /* CCITT reversed */
154 0x8005, /* CRC-16 standard */
155 0xA001 /* CRC-16 reversed */
158 static unsigned long p24[] = /* 24-bit CRC generator polynomials */
160 0x5d6dcb, /* CRC-24 (FlexRay) */
161 0x864cfb /* CRC-24 (OpenPGP) */
164 if( (retname = malloc( 19 + len )) != NULL )
166 /* Return buffer successfully allocated; populate it...
169 len = strlen( refname );
170 sprintf( retname, "%s-%u-%03u-%04x-%06x", tag, retry, len,
171 (unsigned)(generic_crc( 16, p16[retry>>1], refname, len )), /* 16-bit CRC */
172 (unsigned)(generic_crc( 24, p24[retry&1], refname, len )) /* 24-bit CRC */
176 /* Return value is either a pointer to the populated buffer,
177 * or NULL on allocation failure.
182 /* $RCSfile$: end of file */