OSDN Git Service

* src/header.c (convert_filename): fixed bug on utf8 conversion,
[lha/lha.git] / src / header.c
index c063e4b..c327601 100644 (file)
 /* ------------------------------------------------------------------------ */
 #include "lha.h"
 
+#ifdef __APPLE__
+static int ConvertEncodingToUTF8(const char* inCStr, char* outUTF8Buffer, int outUTF8BufferLength, unsigned long scriptEncoding, unsigned long flags);
+static int ConvertUTF8ToEncoding(const char* inUTF8Buf, int inUTF8BufLength, char* outCStrBuffer, int outCStrBufferLength, unsigned long scriptEncoding, unsigned long flags);
+#endif /* __APPLE__ */
+
 /* ------------------------------------------------------------------------ */
 static char    *get_ptr;
 
@@ -22,12 +27,12 @@ char *optional_archive_delim = NULL;
 char *optional_system_delim = NULL;
 int optional_filename_case = NONE;
 
-#if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__hpux)
-/* Cygwin, HP-UX and other UNIX are able to use SJIS as native code. */
-int default_system_kanji_code = CODE_SJIS;
+#ifdef MULTIBYTE_FILENAME
+int default_system_kanji_code = MULTIBYTE_FILENAME;
 #else
-int default_system_kanji_code = CODE_EUC;
+int default_system_kanji_code = NONE;
 #endif
+
 /* ------------------------------------------------------------------------ */
 int
 calc_sum(p, len)
@@ -94,7 +99,7 @@ msdos_to_unix_filename(name, len)
 {
        register int    i;
 
-#ifdef MULTIBYTE_CHAR
+#ifdef MULTIBYTE_FILENAME
        for (i = 0; i < len; i++) {
                if (MULTIBYTE_FIRST_P(name[i]) &&
                    MULTIBYTE_SECOND_P(name[i + 1]))
@@ -123,7 +128,7 @@ generic_to_unix_filename(name, len)
        register int    i;
        boolean         lower_case_used = FALSE;
 
-#ifdef MULTIBYTE_CHAR
+#ifdef MULTIBYTE_FILENAME
        for (i = 0; i < len; i++) {
                if (MULTIBYTE_FIRST_P(name[i]) &&
                    MULTIBYTE_SECOND_P(name[i + 1]))
@@ -191,10 +196,10 @@ unix_to_generic_filename(name, len)
 
 /* added by Koji Arai */
 static void
-filename_conv(name, len, size,
-              from_code, to_code,
-              from_delim, to_delim,
-              case_to)
+convert_filename(name, len, size,
+                 from_code, to_code,
+                 from_delim, to_delim,
+                 case_to)
        register char  *name;
        register int    len;
        register int    size;
@@ -203,9 +208,33 @@ filename_conv(name, len, size,
 
 {
        register int    i;
+#ifdef MULTIBYTE_FILENAME
+    char tmp[256];              /* 256 is sizeof(LzHeader.name) */
+
+    if (from_code == CODE_SJIS && to_code == CODE_UTF8) {
+        for (i = 0; i < len; i++)
+            if (name[i] == '\xff')  name[i] = '/';
+        sjis_to_utf8(tmp, name, sizeof(tmp));
+        strncpy(name, tmp, size);
+        name[size-1] = 0;
+        len = strlen(name);
+        for (i = 0; i < len; i++)
+            if (name[i] == '/')  name[i] = '\xff';
+    }
+    else if (from_code == CODE_UTF8 && to_code == CODE_SJIS) {
+        for (i = 0; i < len; i++)
+            if (name[i] == '\xff')  name[i] = '/';
+        utf8_to_sjis(tmp, name, sizeof(tmp));
+        strncpy(name, tmp, size);
+        name[size-1] = 0;
+        len = strlen(name);
+        for (i = 0; i < len; i++)
+            if (name[i] == '/')  name[i] = '\xff';
+    }
+#endif
 
        for (i = 0; i < len; i ++) {
-#ifdef MULTIBYTE_CHAR
+#ifdef MULTIBYTE_FILENAME
         if (from_code == CODE_EUC &&
             (unsigned char)name[i] == 0x8e) {
             if (to_code != CODE_SJIS) {
@@ -264,7 +293,7 @@ filename_conv(name, len, size,
                        i++;
             continue;
         }
-#endif /* MULTIBYTE_CHAR */
+#endif /* MULTIBYTE_FILENAME */
         {
             char *ptr;
 
@@ -527,7 +556,7 @@ get_header(fp, hdr)
     char *system_delim = "";
     int filename_case = NONE;
 
-       bzero(hdr, sizeof(LzHeader));
+       memset(hdr, 0, sizeof(LzHeader));
 
        if (((header_size = getc(fp)) == EOF) || (header_size == 0)) {
                return FALSE;   /* finish */
@@ -547,7 +576,7 @@ get_header(fp, hdr)
        }
 
        if (hdr->header_level >= 3) {
-               fatal_error("Unknown level header");
+               fatal_error("Unknown level header (level %d)", hdr->header_level);
                return FALSE;
        }
 
@@ -559,7 +588,7 @@ get_header(fp, hdr)
        } else {
                hdr->header_size = header_size;
        }
-       bcopy(data + I_METHOD, hdr->method, METHOD_TYPE_STRAGE);
+       memcpy(hdr->method, data + I_METHOD, METHOD_TYPE_STRAGE);
        setup_get(data + I_PACKED_SIZE);
        hdr->packed_size = get_longword();
        hdr->original_size = get_longword();
@@ -568,7 +597,7 @@ get_header(fp, hdr)
 
        if ((hdr->header_level = get_byte()) != 2) {
                if (calc_sum(data + I_METHOD, header_size) != checksum)
-                       warning("Checksum error (LHarc file?)", "");
+                       warning("Checksum error (LHarc file?)");
                name_length = get_byte();
                for (i = 0; i < name_length; i++)
                        hdr->name[i] = (char) get_byte();
@@ -591,8 +620,8 @@ get_header(fp, hdr)
                                hdr->extend_type = EXTEND_GENERIC;
                                hdr->has_crc = FALSE;
                        } else {
-                               fatal_error("Unkonwn header (lha file?)");
-                               return FALSE;
+                               error("Unkonwn header (lha file?)");
+                exit(1);
                        }
                } else {
                        hdr->has_crc = TRUE;
@@ -658,7 +687,7 @@ get_header(fp, hdr)
                                for (i = 0; i < header_size - 3; i++)
                                        hdr->name[i] = (char) get_byte();
                                hdr->name[header_size - 3] = '\0';
-                               name_length = header_size - 3; /* modified by Koji Arai */
+                               name_length = header_size - 3;
                                break;
                        case 2:
                                /*
@@ -698,13 +727,17 @@ get_header(fp, hdr)
                                /*
                                 * UNIX group name
                                 */
-                               setup_get(get_ptr + header_size - 3);
+                for (i = 0; i < header_size - 3; i++)
+                    hdr->group[i] = get_byte();
+                hdr->group[i] = '\0';
                                break;
                        case 0x53:
                                /*
                                 * UNIX user name
                                 */
-                               setup_get(get_ptr + header_size - 3);
+                for (i = 0; i < header_size - 3; i++)
+                    hdr->user[i] = get_byte();
+                hdr->user[i] = '\0';
                                break;
                        case 0x54:
                                /*
@@ -766,7 +799,7 @@ get_header(fp, hdr)
         archive_delim = "\xff\\";
         system_delim = "//";
         filename_case = noconvertcase ? NONE : TO_LOWER;
-        /* pending: if small letter is included in filename,
+        /* FIXME: if small letter is included in filename,
            the generic_to_unix_filename() do not case conversion,
            but this code does not consider it. */
 
@@ -795,10 +828,10 @@ get_header(fp, hdr)
                name_length += dir_length;
        }
 
-    filename_conv(hdr->name, name_length, sizeof(hdr->name),
-                  archive_kanji_code,
-                  system_kanji_code,
-                  archive_delim, system_delim, filename_case);
+    convert_filename(hdr->name, name_length, sizeof(hdr->name),
+                     archive_kanji_code,
+                     system_kanji_code,
+                     archive_delim, system_delim, filename_case);
 
        return TRUE;
 }
@@ -817,15 +850,17 @@ init_header(name, v_stat, hdr)
     char *system_delim = "";
     int filename_case = NONE;
 
+    memset(hdr, 0, sizeof(LzHeader));
+
     if (optional_system_kanji_code)
         system_kanji_code = optional_system_kanji_code;
 
        if (compress_method == LZHUFF5_METHOD_NUM)  /* Changed N.Watazaki */
-               bcopy(LZHUFF5_METHOD, hdr->method, METHOD_TYPE_STRAGE);
+               memcpy(hdr->method, LZHUFF5_METHOD, METHOD_TYPE_STRAGE);
        else if (compress_method)
-               bcopy(LZHUFF1_METHOD, hdr->method, METHOD_TYPE_STRAGE);
+               memcpy(hdr->method, LZHUFF1_METHOD, METHOD_TYPE_STRAGE);
        else
-               bcopy(LZHUFF0_METHOD, hdr->method, METHOD_TYPE_STRAGE);
+               memcpy(hdr->method, LZHUFF0_METHOD, METHOD_TYPE_STRAGE);
 
        hdr->packed_size = 0;
        hdr->original_size = v_stat->st_size;
@@ -847,8 +882,32 @@ init_header(name, v_stat, hdr)
        hdr->unix_uid = v_stat->st_uid;
        hdr->unix_gid = v_stat->st_gid;
 
+#if INCLUDE_OWNER_NAME_IN_HEADER
+#if HAVE_GETPWUID
+    {
+        struct passwd *ent = getpwuid(hdr->unix_uid);
+
+        if (ent) {
+            strncpy(hdr->user, ent->pw_name, sizeof(hdr->user));
+            if (hdr->user[sizeof(hdr->user)-1])
+                hdr->user[sizeof(hdr->user)-1] = 0;
+        }
+    }
+#endif
+#if HAVE_GETGRGID
+    {
+        struct group *ent = getgrgid(hdr->unix_gid);
+
+        if (ent) {
+            strncpy(hdr->group, ent->gr_name, sizeof(hdr->group));
+            if (hdr->group[sizeof(hdr->group)-1])
+                hdr->group[sizeof(hdr->group)-1] = 0;
+        }
+    }
+#endif
+#endif /* INCLUDE_OWNER_NAME_IN_HEADER */
        if (is_directory(v_stat)) {
-               bcopy(LZHDIRS_METHOD, hdr->method, METHOD_TYPE_STRAGE);
+               memcpy(hdr->method, LZHDIRS_METHOD, METHOD_TYPE_STRAGE);
                hdr->attribute = GENERIC_DIRECTORY_ATTRIBUTE;
                hdr->original_size = 0;
                if (len > 0 && hdr->name[len - 1] != '/')
@@ -857,14 +916,15 @@ init_header(name, v_stat, hdr)
 
 #ifdef S_IFLNK 
        if (is_symlink(v_stat)) {
-               char    lkname[257];
+               char    lkname[256];    /* FIXME: no enough space */
                int             len;    
-               bcopy(LZHDIRS_METHOD, hdr->method, METHOD_TYPE_STRAGE);
+               memcpy(hdr->method, LZHDIRS_METHOD, METHOD_TYPE_STRAGE);
                hdr->attribute = GENERIC_DIRECTORY_ATTRIBUTE;
                hdr->original_size = 0;
-               len = readlink(name, lkname, 256);
-               lkname[len] = (char)'\0';
-               sprintf(hdr->name, "%s|%s", hdr->name, lkname);
+               len = readlink(name, lkname, sizeof(lkname));
+               if (xsnprintf(hdr->name, sizeof(hdr->name),
+                      "%s|%.*s", hdr->name, len, lkname) == -1)
+            error("file name is too long (%s -> %.*s)", hdr->name, len, lkname);
        }
 #endif
 
@@ -873,10 +933,10 @@ init_header(name, v_stat, hdr)
         archive_delim = "\\";
     }
 
-    filename_conv(hdr->name, len, sizeof(hdr->name),
-                  system_kanji_code,
-                  system_kanji_code, /* no change code */
-                  system_delim, archive_delim, filename_case);
+    convert_filename(hdr->name, len, sizeof(hdr->name),
+                     system_kanji_code,
+                     system_kanji_code, /* no change code */
+                     system_delim, archive_delim, filename_case);
 }
 
 /* ------------------------------------------------------------------------ */
@@ -893,14 +953,15 @@ write_header(nafp, hdr)
        char           *headercrc_ptr;
     int archive_kanji_code = CODE_SJIS;
     int system_kanji_code = default_system_kanji_code;
+       char            lzname[256];
 
     if (optional_archive_kanji_code)
         archive_kanji_code = optional_archive_kanji_code;
     if (optional_system_kanji_code)
         system_kanji_code = optional_system_kanji_code;
 
-       bzero(data, LZHEADER_STRAGE);
-       bcopy(hdr->method, data + I_METHOD, METHOD_TYPE_STRAGE);
+       memset(data, 0, LZHEADER_STRAGE);
+       memcpy(data + I_METHOD, hdr->method, METHOD_TYPE_STRAGE);
        setup_put(data + I_PACKED_SIZE);
        put_longword(hdr->packed_size);
        put_longword(hdr->original_size);
@@ -922,18 +983,19 @@ write_header(nafp, hdr)
 
        put_byte(hdr->header_level);
 
-    filename_conv(hdr->name, strlen(hdr->name), sizeof(hdr->name),
-                  system_kanji_code,
-                  archive_kanji_code, /* no change code */
-                  "\xff\\/", "\xff\xff\xff", NONE);
+    strncpy(lzname, hdr->name, sizeof(lzname));
+    convert_filename(lzname, strlen(lzname), sizeof(lzname),
+                     system_kanji_code,
+                     archive_kanji_code,
+                     "\xff\\/", "\xff\xff\xff", NONE);
 
        if (hdr->header_level != HEADER_LEVEL2) {
-               if (p = (char *) strrchr(hdr->name, DELIM2))
+               if (p = (char *) strrchr(lzname, DELIM2))
                        name_length = strlen(++p);
                else
-                       name_length = strlen(hdr->name);
+                       name_length = strlen(lzname);
                put_byte(name_length);
-               bcopy(p ? p : hdr->name, data + I_NAME, name_length);
+               memcpy(data + I_NAME, p ? p : lzname, name_length);
                setup_put(data + I_NAME + name_length);
        }
 
@@ -986,14 +1048,28 @@ write_header(nafp, hdr)
                        put_word(hdr->unix_gid);
                        put_word(hdr->unix_uid);
 
-                       if (p = (char *) strrchr(hdr->name, DELIM2)) {
+            {
+                int i, len = strlen(hdr->group);
+                put_word(len + 3);
+                put_byte(0x52);        /* group name */
+                for (i = 0; i < len; i++)
+                    put_byte(hdr->group[i]);
+
+                len = strlen(hdr->user);
+                put_word(len + 3);
+                put_byte(0x53);        /* user name */
+                for (i = 0; i < len; i++)
+                    put_byte(hdr->user[i]);
+            }
+
+                       if (p = (char *) strrchr(lzname, DELIM2)) {
                                int             i;
 
-                               name_length = p - hdr->name + 1;
+                               name_length = p - lzname + 1;
                                put_word(name_length + 3);
                                put_byte(2);    /* dirname */
                                for (i = 0; i < name_length; i++)
-                                       put_byte(hdr->name[i]);
+                                       put_byte(lzname[i]);
                        }
                }               /* if generic .. */
 
@@ -1012,11 +1088,11 @@ write_header(nafp, hdr)
                        data[I_HEADER_CHECKSUM] = calc_sum(data + I_METHOD, header_size);
                } else {                /* header level 2 */
                        int             i;
-                       if (p = (char *) strrchr(hdr->name, DELIM2))
+                       if (p = (char *) strrchr(lzname, DELIM2))
                                name_length = strlen(++p);
                        else {
-                               p = hdr->name;
-                               name_length = strlen(hdr->name);
+                               p = lzname;
+                               name_length = strlen(lzname);
                        }
                        put_word(name_length + 3);
                        put_byte(1);    /* filename */
@@ -1036,13 +1112,130 @@ write_header(nafp, hdr)
                put_word(hcrc);
        }
 
-       if (fwrite(data, sizeof(char), header_size + 2, nafp) == 0)
+       if (fwrite(data, header_size + 2, 1, nafp) == 0)
                fatal_error("Cannot write to temporary file");
+}
 
-    filename_conv(hdr->name, strlen(hdr->name), sizeof(hdr->name),
-                  archive_kanji_code,
-                  system_kanji_code,
-                  "\xff\\/", "///", NONE);
+#ifdef __APPLE__
+/* this is not need for Mac OS X v 10.2 later */
+enum {
+  kCFStringEncodingAllowLossyConversion = 1,
+  kCFStringEncodingBasicDirectionLeftToRight = (1 << 1),
+  kCFStringEncodingBasicDirectionRightToLeft = (1 << 2),
+  kCFStringEncodingSubstituteCombinings = (1 << 3),
+  kCFStringEncodingComposeCombinings = (1 << 4),
+  kCFStringEncodingIgnoreCombinings = (1 << 5),
+  kCFStringEncodingUseCanonical = (1 << 6),
+  kCFStringEncodingUseHFSPlusCanonical = (1 << 7),
+  kCFStringEncodingPrependBOM = (1 << 8),
+  kCFStringEncodingDisableCorporateArea = (1 << 9),
+  kCFStringEncodingASCIICompatibleConversion = (1 << 10),
+};
+
+static int
+ConvertEncodingToUTF8(const char* inCStr,
+                      char* outUTF8Buffer,
+                      int outUTF8BufferLength,
+                      unsigned long scriptEncoding,
+                      unsigned long flags)
+{
+    unsigned long unicodeChars;
+    unsigned long srcCharsUsed;
+    unsigned long usedByteLen = 0;
+    UniChar uniStr[512];
+    unsigned long cfResult;
+
+    cfResult = CFStringEncodingBytesToUnicode(scriptEncoding,
+                                              flags,
+                                              (char *)inCStr,
+                                              strlen(inCStr),
+                                              &srcCharsUsed,
+                                              uniStr,
+                                              512,
+                                              &unicodeChars);
+    if (cfResult == 0) {
+        cfResult = CFStringEncodingUnicodeToBytes(kCFStringEncodingUTF8,
+                                                  flags,
+                                                  uniStr,
+                                                  unicodeChars,
+                                                  &srcCharsUsed,
+                                                  (char*)outUTF8Buffer,
+                                                  outUTF8BufferLength - 1,
+                                                  &usedByteLen);
+        outUTF8Buffer[usedByteLen] = '\0';
+    }
+
+    return cfResult;
+}
+
+static int
+ConvertUTF8ToEncoding(const char* inUTF8Buf,
+                      int inUTF8BufLength,
+                      char* outCStrBuffer,
+                      int outCStrBufferLength,
+                      unsigned long scriptEncoding,
+                      unsigned long flags)
+{
+    unsigned long unicodeChars;
+    unsigned long srcCharsUsed;
+    unsigned long usedByteLen = 0;
+    UniChar uniStr[256];
+    unsigned long cfResult;
+
+    cfResult = CFStringEncodingBytesToUnicode(kCFStringEncodingUTF8,
+                                              flags,
+                                              (char*)inUTF8Buf,
+                                              inUTF8BufLength,
+                                              &srcCharsUsed,
+                                              uniStr,
+                                              255,
+                                              &unicodeChars);
+    if (cfResult == 0) {
+        cfResult = CFStringEncodingUnicodeToBytes(scriptEncoding,
+                                                  flags,
+                                                  uniStr,
+                                                  unicodeChars,
+                                                  &srcCharsUsed,
+                                                  (char*)outCStrBuffer,
+                                                  outCStrBufferLength - 1,
+                                                  &usedByteLen);
+        outCStrBuffer[usedByteLen] = '\0';
+    }
+
+    return cfResult;
+}
+#endif /* __APPLE__ */
+
+char *
+sjis_to_utf8(char *dst, const char *src, size_t dstsize)
+{
+#ifdef __APPLE__
+  dst[0] = '\0';
+  ConvertEncodingToUTF8(src, dst, dstsize,
+                        kCFStringEncodingDOSJapanese,
+                        kCFStringEncodingUseHFSPlusCanonical);
+
+#else
+  /* not supported */
+#endif
+  return dst;
+}
+
+char *
+utf8_to_sjis(char *dst, const char *src, size_t dstsize)
+{
+#ifdef __APPLE__
+  int srclen;
+
+  dst[0] = '\0';
+  srclen = strlen(src);
+  ConvertUTF8ToEncoding(src, srclen, dst, dstsize,
+                        kCFStringEncodingDOSJapanese,
+                        kCFStringEncodingUseHFSPlusCanonical);
+#else
+  /* not supported */
+#endif
+  return dst;
 }
 
 /*