From: Jean-Pierre André <jpandre@users.sourceforge.net>
Date: Tue, 2 Sep 2014 07:45:16 +0000 (+0200)
Subject: Reengineered the compression algorithm
X-Git-Tag: android-x86-6.0-r1~5^2~43
X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=b6152757c505a6fc6479bf6d501bf199807068c6;p=android-x86%2Fexternal-ntfs-3g.git

Reengineered the compression algorithm

This patch changes the algorithm to use hash chains instead of binary
trees, with much stronger hashing.  It also introduces useful (for
performance) parameters, such as the "nice match length" and "maximum
search depth", that are similar to those used in other commonly used
compression algorithms such as zlib's DEFLATE implementation.

The speed improvement is very significant, with some loss of compression
rate. The compression rate is still better than then Windows one.

Contributed by Eric Biggers
---

diff --git a/libntfs-3g/compress.c b/libntfs-3g/compress.c
index 73ad2838..f1070aa2 100644
--- a/libntfs-3g/compress.c
+++ b/libntfs-3g/compress.c
@@ -6,6 +6,7 @@
  * Copyright (c) 2004-2006 Szabolcs Szakacsits
  * Copyright (c)      2005 Yura Pakhuchiy
  * Copyright (c) 2009-2014 Jean-Pierre Andre
+ * Copyright (c)      2014 Eric Biggers
  *
  * This program/include file is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as published
@@ -21,17 +22,6 @@
  * along with this program (in the main directory of the NTFS-3G
  * distribution in the file COPYING); if not, write to the Free Software
  * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- * A part of the compression algorithm is based on lzhuf.c whose header
- * describes the roles of the original authors (with no apparent copyright
- * notice, and according to http://home.earthlink.net/~neilbawd/pall.html
- * this was put into public domain in 1988 by Haruhiko OKUMURA).
- *
- * LZHUF.C English version 1.0
- * Based on Japanese version 29-NOV-1988   
- * LZSS coded by Haruhiko OKUMURA
- * Adaptive Huffman Coding coded by Haruyasu YOSHIZAKI
- * Edited and translated to English by Kenji RIKITAKE
  */
 
 #ifdef HAVE_CONFIG_H
@@ -81,96 +71,191 @@ typedef enum {
 	NTFS_SB_IS_COMPRESSED	=	0x8000,
 } ntfs_compression_constants;
 
+/* Match length at or above which ntfs_best_match() will stop searching for
+ * longer matches.  */
+#define NICE_MATCH_LEN 18
+
+/* Maximum number of potential matches that ntfs_best_match() will consider at
+ * each position.  */
+#define MAX_SEARCH_DEPTH 24
+
+/* log base 2 of the number of entries in the hash table for match-finding.  */
+#define HASH_SHIFT 14
+
+/* Constant for the multiplicative hash function.  */
+#define HASH_MULTIPLIER 0x1E35A7BD
+
 struct COMPRESS_CONTEXT {
 	const unsigned char *inbuf;
 	int bufsize;
 	int size;
 	int rel;
 	int mxsz;
-	s16 head[256];
-	s16 lson[NTFS_SB_SIZE];
-	s16 rson[NTFS_SB_SIZE];
+	s16 head[1 << HASH_SHIFT];
+	s16 prev[NTFS_SB_SIZE];
 } ;
 
 /*
+ *		Hash the next 3-byte sequence in the input buffer
+ */
+static inline unsigned int ntfs_hash(const u8 *p)
+{
+	u32 str;
+	u32 hash;
+
+#if defined(__i386__) || defined(__x86_64__)
+	/* Unaligned access allowed, and little endian CPU.
+	 * Callers ensure that at least 4 (not 3) bytes are remaining.  */
+	str = *(const u32 *)p & 0xFFFFFF;
+#else
+	str = ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16);
+#endif
+
+	hash = str * HASH_MULTIPLIER;
+
+	/* High bits are more random than the low bits.  */
+	return hash >> (32 - HASH_SHIFT);
+}
+
+/*
  *		Search for the longest sequence matching current position
  *
- *	A binary tree is maintained to locate all previously met sequences,
- *	and this function has to be called for all of them.
+ *	A hash table, each entry of which points to a chain of sequence
+ *	positions sharing the corresponding hash code, is maintained to speed up
+ *	searching for matches.  To maintain the hash table, either
+ *	ntfs_best_match() or ntfs_skip_position() has to be called for each
+ *	consecutive position.
+ *
+ *	This function is heavily used; it has to be optimized carefully.
+ *
+ *	This function sets pctx->size and pctx->rel to the length and offset,
+ *	respectively, of the longest match found.
+ *
+ *	The minimum match length is assumed to be 3, and the maximum match
+ *	length is assumed to be pctx->mxsz.  If this function produces
+ *	pctx->size < 3, then no match was found.
  *
- *	This function is heavily used, it has to be optimized carefully
+ *	Note: for the following reasons, this function is not guaranteed to find
+ *	*the* longest match up to pctx->mxsz:
  *
- *	Returns the size of the longest match,
- *		zero if no match is found.
+ *	(1) If this function finds a match of NICE_MATCH_LEN bytes or greater,
+ *	    it ends early because a match this long is good enough and it's not
+ *	    worth spending more time searching.
+ *
+ *	(2) If this function considers MAX_SEARCH_DEPTH matches with a single
+ *	    position, it ends early and returns the longest match found so far.
+ *	    This saves a lot of time on degenerate inputs.
  */
-
-static int ntfs_best_match(struct COMPRESS_CONTEXT *pctx, int i)
+static void ntfs_best_match(struct COMPRESS_CONTEXT *pctx, const int i,
+			    int best_len)
 {
-	s16 *prev;
-	int node;
-	register long j;
-	long maxpos;
-	long startj;
-	long bestj;
-	int bufsize;
-	int bestnode;
-	register const unsigned char *p1,*p2;
-
-	p1 = pctx->inbuf;
-	node = pctx->head[p1[i] & 255];
-	if (node >= 0) {
-		/* search the best match at current position */
-		bestnode = node;
-		bufsize = pctx->bufsize;
-		/* restrict matches to the longest allowed sequence */
-		maxpos = bufsize;
-		if ((i + pctx->mxsz) < maxpos)
-			maxpos = i + pctx->mxsz;
-		startj = i + 1 - maxpos;
-		bestj = startj;
-		/* make indexes relative to end of allowed position */
-		p1 = &p1[maxpos];
-		if (startj < 0) {
-			do {
-			/* indexes are negative */
-				p2 = &p1[node - i];
-			/* no need to compare the first byte */
-				j = startj;
-			/* the second byte cannot lead to useful compression */
-				if (p1[j] == p2[j]) {
-					j++;
-					if (j < 0) {
-						do {
-						} while ((p1[j] == p2[j])
-								&& (++j < 0));
-					}
-					/* remember the match, if better */
-					if (j > bestj) {
-						bestj = j;
-						bestnode = node;
-					}
+	const u8 * const inbuf = pctx->inbuf;
+	const u8 * const strptr = &inbuf[i]; /* String we're matching against */
+	s16 * const prev = pctx->prev;
+	const int max_len = min(pctx->bufsize - i, pctx->mxsz);
+	const int nice_len = min(NICE_MATCH_LEN, max_len);
+	int depth_remaining = MAX_SEARCH_DEPTH;
+	const u8 *best_matchptr = strptr;
+	unsigned int hash;
+	s16 cur_match;
+	const u8 *matchptr;
+	int len;
+
+	if (max_len < 4)
+		goto out;
+
+	/* Insert the current sequence into the appropriate hash chain.  */
+	hash = ntfs_hash(strptr);
+	cur_match = pctx->head[hash];
+	prev[i] = cur_match;
+	pctx->head[hash] = i;
+
+	if (best_len >= max_len) {
+		/* Lazy match is being attempted, but there aren't enough length
+		 * bits remaining to code a longer match.  */
+		goto out;
+	}
+
+	/* Search the appropriate hash chain for matches.  */
+
+	for (; cur_match >= 0 && depth_remaining--;
+		cur_match = prev[cur_match])
+	{
+
+		matchptr = &inbuf[cur_match];
+
+		/* Considering the potential match at 'matchptr':  is it longer
+		 * than 'best_len'?
+		 *
+		 * The bytes at index 'best_len' are the most likely to differ,
+		 * so check them first.
+		 *
+		 * The bytes at indices 'best_len - 1' and '0' are less
+		 * important to check separately.  But doing so still gives a
+		 * slight performance improvement, at least on x86_64, probably
+		 * because they create separate branches for the CPU to predict
+		 * independently of the branches in the main comparison loops.
+		 */
+		if (matchptr[best_len] != strptr[best_len] ||
+		    matchptr[best_len - 1] != strptr[best_len - 1] ||
+		    matchptr[0] != strptr[0])
+			goto next_match;
+
+		for (len = 1; len < best_len - 1; len++)
+			if (matchptr[len] != strptr[len])
+				goto next_match;
+
+		/* The match is the longest found so far ---
+		 * at least 'best_len' + 1 bytes.  Continue extending it.  */
+
+		best_matchptr = matchptr;
+
+		do {
+			if (++best_len >= nice_len) {
+				/* 'nice_len' reached; don't waste time
+				 * searching for longer matches.  Extend the
+				 * match as far as possible and terminate the
+				 * search.  */
+				while (best_len < max_len &&
+					(best_matchptr[best_len] ==
+						strptr[best_len]))
+				{
+					best_len++;
 				}
-				/* walk in the tree in the right direction */
-				if ((j < 0) && (p1[j] < p2[j]))
-					prev = &pctx->lson[node];
-				else
-					prev = &pctx->rson[node];
-				node = *prev;
-				/* stop if reaching a leaf or maximum length */
-			} while ((node >= 0) && (j < 0));
-			/* put the node into the tree if we reached a leaf */
-			if (node < 0)
-				*prev = i;
-		}
-			/* done, return the best match */
-		pctx->size = bestj + maxpos - i;
-		pctx->rel = bestnode - i;
-	} else {
-		pctx->head[p1[i] & 255] = i;
-		pctx->size = 0;
-		pctx->rel = 0;
+				goto out;
+			}
+		} while (best_matchptr[best_len] == strptr[best_len]);
+
+		/* Found a longer match, but 'nice_len' not yet reached.  */
+
+	next_match:
+		/* Continue to next match in the chain.  */
+		;
 	}
-	return (pctx->size);
+
+	/* Reached end of chain, or ended early due to reaching the maximum
+	 * search depth.  */
+
+out:
+	/* Return the longest match we were able to find.  */
+	pctx->size = best_len;
+	pctx->rel = best_matchptr - strptr; /* given as a negative number! */
+}
+
+/*
+ *		Advance the match-finder, but don't search for matches.
+ */
+static void ntfs_skip_position(struct COMPRESS_CONTEXT *pctx, const int i)
+{
+	unsigned int hash;
+
+	if (pctx->bufsize - i < 4)
+		return;
+
+	/* Insert the current sequence into the appropriate hash chain.  */
+	hash = ntfs_hash(pctx->inbuf + i);
+	pctx->prev[i] = pctx->head[hash];
+	pctx->head[hash] = i;
 }
 
 /*
@@ -188,7 +273,7 @@ static int ntfs_best_match(struct COMPRESS_CONTEXT *pctx, int i)
  *		0 if an error has been met. 
  */
 
-static unsigned int ntfs_compress_block(const char *inbuf, int bufsize,
+static unsigned int ntfs_compress_block(const char *inbuf, const int bufsize,
 				char *outbuf)
 {
 	struct COMPRESS_CONTEXT *pctx;
@@ -196,109 +281,168 @@ static unsigned int ntfs_compress_block(const char *inbuf, int bufsize,
 	int j; /* end of best match from current position */
 	int k; /* end of best match from next position */
 	int offs; /* offset to best match */
-	int n;
 	int bp; /* bits to store offset */
+	int bp_cur; /* saved bits to store offset at current position */
 	int mxoff; /* max match offset : 1 << bp */
-	int mxsz2;
 	unsigned int xout;
 	unsigned int q; /* aggregated offset and size */
-	int done;
+	int have_match; /* do we have a match at the current position? */
 	char *ptag; /* location reserved for a tag */
 	int tag;    /* current value of tag */
 	int ntag;   /* count of bits still undefined in tag */
 
-	pctx = (struct COMPRESS_CONTEXT*)ntfs_malloc(sizeof(struct COMPRESS_CONTEXT));
-	if (pctx) {
-		for (n=0; n<NTFS_SB_SIZE; n++)
-			pctx->lson[n] = pctx->rson[n] = -1;
-		for (n=0; n<256; n++)
-			pctx->head[n] = -1;
-		pctx->inbuf = (const unsigned char*)inbuf;
-		pctx->bufsize = bufsize;
-		xout = 2;
-		n = 0;
-		i = 0;
-		bp = 4;
-		mxoff = 1 << bp;
-		pctx->mxsz = (1 << (16 - bp)) + 2;
-		tag = 0;
-		done = -1;
-		ntag = 8;
-		ptag = &outbuf[xout++];
-		while ((i < bufsize) && (xout < (NTFS_SB_SIZE + 2))) {
-		   /* adjust the longest match we can output */
+	pctx = ntfs_malloc(sizeof(struct COMPRESS_CONTEXT));
+	if (!pctx) {
+		errno = ENOMEM;
+		return 0;
+	}
+
+	/* All hash chains start as empty.  The special value '-1' indicates the
+	 * end of each hash chain.  */
+	memset(pctx->head, 0xFF, sizeof(pctx->head));
+
+	pctx->inbuf = (const unsigned char*)inbuf;
+	pctx->bufsize = bufsize;
+	xout = 2;
+	i = 0;
+	bp = 4;
+	mxoff = 1 << bp;
+	pctx->mxsz = (1 << (16 - bp)) + 2;
+	have_match = 0;
+	tag = 0;
+	ntag = 8;
+	ptag = &outbuf[xout++];
+
+	while ((i < bufsize) && (xout < (NTFS_SB_SIZE + 2))) {
+
+		/* This implementation uses "lazy" parsing: it always chooses
+		 * the longest match, unless the match at the next position is
+		 * longer.  This is the same strategy used by the high
+		 * compression modes of zlib.  */
+
+		if (!have_match) {
+			/* Find the longest match at the current position.  But
+			 * first adjust the maximum match length if needed.
+			 * (This loop might need to run more than one time in
+			 * the case that we just output a long match.)  */
 			while (mxoff < i) {
 				bp++;
 				mxoff <<= 1;
 				pctx->mxsz = (pctx->mxsz + 2) >> 1;
 			}
-		/* search the best match at current position */
-			if (done < i)
-				do {
-					ntfs_best_match(pctx,++done);
-				} while (done < i);
+			ntfs_best_match(pctx, i, 2);
+		}
+
+		if (pctx->size >= 3) {
+
+			/* Found a match at the current position.  */
+
 			j = i + pctx->size;
-			if ((j - i) > pctx->mxsz)
-				j = i + pctx->mxsz;
-
-			if ((j - i) > 2) {
-				offs = pctx->rel;
-		  /* check whether there is a better run at i+1 */
-				ntfs_best_match(pctx,i+1);
-				done = i+1;
+			bp_cur = bp;
+			offs = pctx->rel;
+
+			if (pctx->size >= NICE_MATCH_LEN) {
+
+				/* Choose long matches immediately.  */
+
+				q = (~offs << (16 - bp_cur)) + (j - i - 3);
+				outbuf[xout++] = q & 255;
+				outbuf[xout++] = (q >> 8) & 255;
+				tag |= (1 << (8 - ntag));
+
+				if (j == bufsize) {
+					/* Shortcut if the match extends to the
+					 * end of the buffer.  */
+					i = j;
+					--ntag;
+					break;
+				}
+				i += 1;
+				do {
+					ntfs_skip_position(pctx, i);
+				} while (++i != j);
+				have_match = 0;
+			} else {
+				/* Check for a longer match at the next
+				 * position.  */
+
+				/* Doesn't need to be while() since we just
+				 * adjusted the maximum match length at the
+				 * previous position.  */
+				if (mxoff < i + 1) {
+					bp++;
+					mxoff <<= 1;
+					pctx->mxsz = (pctx->mxsz + 2) >> 1;
+				}
+				ntfs_best_match(pctx, i + 1, pctx->size);
 				k = i + 1 + pctx->size;
-				mxsz2 = pctx->mxsz;
-				if (mxoff <= i)
-					mxsz2 = (pctx->mxsz + 2) >> 1;
-				if ((k - i) > mxsz2)
-					k = i + mxsz2;
+
 				if (k > (j + 1)) {
-					/* issue a single byte */
-					outbuf[xout++] = inbuf[i];
-					i++;
+					/* Next match is longer.
+					 * Output a literal.  */
+					outbuf[xout++] = inbuf[i++];
+					have_match = 1;
 				} else {
-					q = (~offs << (16 - bp))
-						+ (j - i - 3);
+					/* Next match isn't longer.
+					 * Output the current match.  */
+					q = (~offs << (16 - bp_cur)) +
+							(j - i - 3);
 					outbuf[xout++] = q & 255;
 					outbuf[xout++] = (q >> 8) & 255;
 					tag |= (1 << (8 - ntag));
-					i = j;
+
+					/* The minimum match length is 3, and
+					 * we've run two bytes through the
+					 * matchfinder already.  So the minimum
+					 * number of positions we need to skip
+					 * is 1.  */
+					i += 2;
+					do {
+						ntfs_skip_position(pctx, i);
+					} while (++i != j);
+					have_match = 0;
 				}
-			} else {
-				outbuf[xout++] = inbuf[i];
-				i++;
-			}
-				/* store the tag if fully used */
-			if (!--ntag) {
-				*ptag = tag;
-				ntag = 8;
-				ptag = &outbuf[xout++];
-				tag = 0;
 			}
+		} else {
+			/* No match at current position.  Output a literal.  */
+			outbuf[xout++] = inbuf[i++];
+			have_match = 0;
 		}
-			/* store the last tag, if partially used */
-		if (ntag == 8)
-			xout--;
-		else
+
+		/* Store the tag if fully used.  */
+		if (!--ntag) {
 			*ptag = tag;
-		/* uncompressed must be full size, accept if better */
-		if ((i >= bufsize) && (xout < (NTFS_SB_SIZE + 2))) {
-			outbuf[0] = (xout - 3) & 255;
-			outbuf[1] = 0xb0 + (((xout - 3) >> 8) & 15);
-		} else {
-			memcpy(&outbuf[2],inbuf,bufsize);
-			if (bufsize < NTFS_SB_SIZE)
-				memset(&outbuf[bufsize+2], 0,
-						NTFS_SB_SIZE - bufsize);
-			outbuf[0] = 0xff;
-			outbuf[1] = 0x3f;
-			xout = NTFS_SB_SIZE + 2;
+			ntag = 8;
+			ptag = &outbuf[xout++];
+			tag = 0;
 		}
-		free(pctx);
+	}
+
+	/* Store the last tag if partially used.  */
+	if (ntag == 8)
+		xout--;
+	else
+		*ptag = tag;
+
+	/* Determine whether to store the data compressed or uncompressed.  */
+
+	if ((i >= bufsize) && (xout < (NTFS_SB_SIZE + 2))) {
+		/* Compressed.  */
+		outbuf[0] = (xout - 3) & 255;
+		outbuf[1] = 0xb0 + (((xout - 3) >> 8) & 15);
 	} else {
-		xout = 0;
-		errno = ENOMEM;
+		/* Uncompressed.  */
+		memcpy(&outbuf[2], inbuf, bufsize);
+		if (bufsize < NTFS_SB_SIZE)
+			memset(&outbuf[bufsize + 2], 0, NTFS_SB_SIZE - bufsize);
+		outbuf[0] = 0xff;
+		outbuf[1] = 0x3f;
+		xout = NTFS_SB_SIZE + 2;
 	}
+
+	/* Free the compression context and return the total number of bytes
+	 * written to 'outbuf'.  */
+	free(pctx);
 	return (xout);
 }