From 4264f19acb945b1269ea2d6503c19083a6889d65 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jean-Pierre=20Andr=C3=A9?= Date: Thu, 28 Jul 2016 16:10:14 +0200 Subject: [PATCH] Cleaned up file name collation code - Update documentation for COLLATION_RULES - Document how ntfs_names_full_collate() compares names - Update comments and DEBUG code to reflect that ntfs_names_full_collate() always access 'upcase', even in CASE_SENSITIVE mode - Remove unneeded assignments to 'c1' and 'c2' in IGNORE_CASE mode Signed-off-by: Eric Biggers --- include/ntfs-3g/layout.h | 33 ++++++++++++--------------------- libntfs-3g/unistr.c | 28 +++++++++++++++++++--------- 2 files changed, 31 insertions(+), 30 deletions(-) diff --git a/include/ntfs-3g/layout.h b/include/ntfs-3g/layout.h index e23fa104..ddd29c11 100644 --- a/include/ntfs-3g/layout.h +++ b/include/ntfs-3g/layout.h @@ -515,16 +515,15 @@ typedef enum { * enum COLLATION_RULES - The collation rules for sorting views/indexes/etc * (32-bit). * - * COLLATION_UNICODE_STRING - Collate Unicode strings by comparing their binary - * Unicode values, except that when a character can be uppercased, the - * upper case value collates before the lower case one. - * COLLATION_FILE_NAME - Collate file names as Unicode strings. The collation - * is done very much like COLLATION_UNICODE_STRING. In fact I have no idea - * what the difference is. Perhaps the difference is that file names - * would treat some special characters in an odd way (see - * unistr.c::ntfs_collate_names() and unistr.c::legal_ansi_char_array[] - * for what I mean but COLLATION_UNICODE_STRING would not give any special - * treatment to any characters at all, but this is speculation. + * COLLATION_BINARY - Collate by binary compare where the first byte is most + * significant. + * COLLATION_FILE_NAME - Collate Unicode strings by comparing their 16-bit + * coding units, primarily ignoring case using the volume's $UpCase table, + * but falling back to a case-sensitive comparison if the names are equal + * ignoring case. + * COLLATION_UNICODE_STRING - TODO: this is not yet implemented and still needs + * to be properly documented --- is it really the same as + * COLLATION_FILE_NAME? * COLLATION_NTOFS_ULONG - Sorting is done according to ascending le32 key * values. E.g. used for $SII index in FILE_Secure, which sorts by * security_id (le32). @@ -549,17 +548,9 @@ typedef enum { * equal then the second le32 values would be compared, etc. */ typedef enum { - COLLATION_BINARY = const_cpu_to_le32(0), /* Collate by binary - compare where the first byte is most - significant. */ - COLLATION_FILE_NAME = const_cpu_to_le32(1), /* Collate file names - as Unicode strings. */ - COLLATION_UNICODE_STRING = const_cpu_to_le32(2), /* Collate Unicode - strings by comparing their binary - Unicode values, except that when a - character can be uppercased, the upper - case value collates before the lower - case one. */ + COLLATION_BINARY = const_cpu_to_le32(0), + COLLATION_FILE_NAME = const_cpu_to_le32(1), + COLLATION_UNICODE_STRING = const_cpu_to_le32(2), COLLATION_NTOFS_ULONG = const_cpu_to_le32(16), COLLATION_NTOFS_SID = const_cpu_to_le32(17), COLLATION_NTOFS_SECURITY_HASH = const_cpu_to_le32(18), diff --git a/libntfs-3g/unistr.c b/libntfs-3g/unistr.c index 54cfd469..4d33bb44 100644 --- a/libntfs-3g/unistr.c +++ b/libntfs-3g/unistr.c @@ -143,14 +143,24 @@ BOOL ntfs_names_are_equal(const ntfschar *s1, size_t s1_len, * @name1_len: length of first Unicode name to compare * @name2: second Unicode name to compare * @name2_len: length of second Unicode name to compare - * @ic: either CASE_SENSITIVE or IGNORE_CASE - * @upcase: upcase table (ignored if @ic is CASE_SENSITIVE) - * @upcase_len: upcase table size (ignored if @ic is CASE_SENSITIVE) + * @ic: either CASE_SENSITIVE or IGNORE_CASE (see below) + * @upcase: upcase table + * @upcase_len: upcase table size * - * -1 if the first name collates before the second one, - * 0 if the names match, - * 1 if the second name collates before the first one, or + * If @ic is CASE_SENSITIVE, then the names are compared primarily ignoring + * case, but if the names are equal ignoring case, then they are compared + * case-sensitively. As an example, "abc" would collate before "BCD" (since + * "abc" and "BCD" differ ignoring case and 'A' < 'B') but after "ABC" (since + * "ABC" and "abc" are equal ignoring case and 'A' < 'a'). This matches the + * collation order of filenames as indexed in NTFS directories. + * + * If @ic is IGNORE_CASE, then the names are only compared case-insensitively + * and are considered to match if and only if they are equal ignoring case. * + * Returns: + * -1 if the first name collates before the second one, + * 0 if the names match, or + * 1 if the second name collates before the first one */ int ntfs_names_full_collate(const ntfschar *name1, const u32 name1_len, const ntfschar *name2, const u32 name2_len, @@ -162,7 +172,7 @@ int ntfs_names_full_collate(const ntfschar *name1, const u32 name1_len, u16 u1, u2; #ifdef DEBUG - if (!name1 || !name2 || (ic && (!upcase || !upcase_len))) { + if (!name1 || !name2 || !upcase || !upcase_len) { ntfs_log_debug("ntfs_names_collate received NULL pointer!\n"); exit(1); } @@ -205,9 +215,9 @@ int ntfs_names_full_collate(const ntfschar *name1, const u32 name1_len, return 1; } else { do { - u1 = c1 = le16_to_cpu(*name1); + u1 = le16_to_cpu(*name1); name1++; - u2 = c2 = le16_to_cpu(*name2); + u2 = le16_to_cpu(*name2); name2++; if (u1 < upcase_len) u1 = le16_to_cpu(upcase[u1]); -- 2.11.0