OSDN Git Service

* src/header.c (ConvertEncodingByIconv): newly added.
[lha/lha.git] / src / header.c
index a8973e6..e71ed01 100644 (file)
 /* ------------------------------------------------------------------------ */
 #include "lha.h"
 
+#if !STRCHR_8BIT_CLEAN
+/* should use 8 bit clean version */
+#undef strchr
+#undef strrchr
+#define strchr  xstrchr
+#define strrchr  xstrrchr
+#endif
+
 /* ------------------------------------------------------------------------ */
 static char    *get_ptr;
 
@@ -86,6 +94,7 @@ put_longword(v)
        put_byte(v >> 24);
 }
 
+#if 0   /* no use */
 /* ------------------------------------------------------------------------ */
 static void
 msdos_to_unix_filename(name, len)
@@ -188,13 +197,14 @@ unix_to_generic_filename(name, len)
                        name[i] = toupper(name[i]);
        }
 }
+#endif /* 0 */
 
 /* added by Koji Arai */
 static void
-filename_conv(name, len, size,
-              from_code, to_code,
-              from_delim, to_delim,
-              case_to)
+convert_filename(name, len, size,
+                 from_code, to_code,
+                 from_delim, to_delim,
+                 case_to)
        register char  *name;
        register int    len;
        register int    size;
@@ -203,6 +213,32 @@ filename_conv(name, len, size,
 
 {
        register int    i;
+#ifdef MULTIBYTE_FILENAME
+    char tmp[256];              /* 256 is sizeof(LzHeader.name) */
+
+    if (from_code == CODE_SJIS && to_code == CODE_UTF8) {
+        for (i = 0; i < len; i++)
+            if ((unsigned char)name[i] == LHA_PATHSEP)  name[i] = '/';
+        sjis_to_utf8(tmp, name, sizeof(tmp));
+        strncpy(name, tmp, size);
+        name[size-1] = 0;
+        len = strlen(name);
+        for (i = 0; i < len; i++)
+            if (name[i] == '/')  name[i] = LHA_PATHSEP;
+        from_code = CODE_UTF8;
+    }
+    else if (from_code == CODE_UTF8 && to_code == CODE_SJIS) {
+        for (i = 0; i < len; i++)
+            if ((unsigned char)name[i] == LHA_PATHSEP)  name[i] = '/';
+        utf8_to_sjis(tmp, name, sizeof(tmp));
+        strncpy(name, tmp, size);
+        name[size-1] = 0;
+        len = strlen(name);
+        for (i = 0; i < len; i++)
+            if (name[i] == '/')  name[i] = LHA_PATHSEP;
+        from_code = CODE_SJIS;
+    }
+#endif
 
        for (i = 0; i < len; i ++) {
 #ifdef MULTIBYTE_FILENAME
@@ -392,7 +428,7 @@ gettz()
 }
 #endif
 #endif                         /* defined(HAVE_FTIME) || defined(HAVE_GETTIMEOFDAY) ||
-                                * defined(HAVE_TZSET) */
+                     * defined(HAVE_TZSET) */
 
 /* ------------------------------------------------------------------------ */
 #ifdef NOT_USED
@@ -527,7 +563,7 @@ get_header(fp, hdr)
     char *system_delim = "";
     int filename_case = NONE;
 
-       bzero(hdr, sizeof(LzHeader));
+       memset(hdr, 0, sizeof(LzHeader));
 
        if (((header_size = getc(fp)) == EOF) || (header_size == 0)) {
                return FALSE;   /* finish */
@@ -559,7 +595,7 @@ get_header(fp, hdr)
        } else {
                hdr->header_size = header_size;
        }
-       bcopy(data + I_METHOD, hdr->method, METHOD_TYPE_STRAGE);
+       memcpy(hdr->method, data + I_METHOD, METHOD_TYPE_STRAGE);
        setup_get(data + I_PACKED_SIZE);
        hdr->packed_size = get_longword();
        hdr->original_size = get_longword();
@@ -617,8 +653,6 @@ get_header(fp, hdr)
                }
                while (extend_size-- > 0)
                        dmy = get_byte();
-               if (hdr->extend_type == EXTEND_UNIX)
-                       return TRUE;
        } else if (hdr->header_level == 1) {
                hdr->has_crc = TRUE;
                extend_size = header_size - name_length-25;
@@ -733,7 +767,8 @@ get_header(fp, hdr)
 
        switch (hdr->extend_type) {
        case EXTEND_MSDOS:
-        archive_delim = "\xff\\";
+        archive_delim = "\377\\";
+                          /* `\' is for level 0 header and broken archive. */
         system_delim = "//";
         filename_case = noconvertcase ? NONE : TO_LOWER;
 
@@ -751,15 +786,17 @@ get_header(fp, hdr)
        case EXTEND_XOSK:
 #endif
        case EXTEND_UNIX:
-        archive_delim = "\xff";
+        archive_delim = "\377\\";
+                          /* `\' is for level 0 header and broken archive. */
         system_delim = "//";
         filename_case = NONE;
 
                break;
 
        case EXTEND_MACOS:
-        archive_delim = "\xff/:";
-        system_delim = "/:/";
+        archive_delim = "\377/:\\";
+                          /* `\' is for level 0 header and broken archive. */
+        system_delim = "/://";
         filename_case = NONE;
 
                hdr->unix_last_modified_stamp =
@@ -767,10 +804,11 @@ get_header(fp, hdr)
                break;
 
        default:
-        archive_delim = "\xff\\";
+        archive_delim = "\377\\";
+                          /* `\' is for level 0 header and broken archive. */
         system_delim = "//";
         filename_case = noconvertcase ? NONE : TO_LOWER;
-        /* pending: if small letter is included in filename,
+        /* FIXME: if small letter is included in filename,
            the generic_to_unix_filename() do not case conversion,
            but this code does not consider it. */
 
@@ -799,10 +837,10 @@ get_header(fp, hdr)
                name_length += dir_length;
        }
 
-    filename_conv(hdr->name, name_length, sizeof(hdr->name),
-                  archive_kanji_code,
-                  system_kanji_code,
-                  archive_delim, system_delim, filename_case);
+    convert_filename(hdr->name, name_length, sizeof(hdr->name),
+                     archive_kanji_code,
+                     system_kanji_code,
+                     archive_delim, system_delim, filename_case);
 
        return TRUE;
 }
@@ -816,22 +854,14 @@ init_header(name, v_stat, hdr)
 {
        int             len;
 
-    int system_kanji_code = default_system_kanji_code;
-    char *archive_delim = "";
-    char *system_delim = "";
-    int filename_case = NONE;
-
     memset(hdr, 0, sizeof(LzHeader));
 
-    if (optional_system_kanji_code)
-        system_kanji_code = optional_system_kanji_code;
-
        if (compress_method == LZHUFF5_METHOD_NUM)  /* Changed N.Watazaki */
-               bcopy(LZHUFF5_METHOD, hdr->method, METHOD_TYPE_STRAGE);
+               memcpy(hdr->method, LZHUFF5_METHOD, METHOD_TYPE_STRAGE);
        else if (compress_method)
-               bcopy(LZHUFF1_METHOD, hdr->method, METHOD_TYPE_STRAGE);
+               memcpy(hdr->method, LZHUFF1_METHOD, METHOD_TYPE_STRAGE);
        else
-               bcopy(LZHUFF0_METHOD, hdr->method, METHOD_TYPE_STRAGE);
+               memcpy(hdr->method, LZHUFF0_METHOD, METHOD_TYPE_STRAGE);
 
        hdr->packed_size = 0;
        hdr->original_size = v_stat->st_size;
@@ -878,7 +908,7 @@ init_header(name, v_stat, hdr)
 #endif
 #endif /* INCLUDE_OWNER_NAME_IN_HEADER */
        if (is_directory(v_stat)) {
-               bcopy(LZHDIRS_METHOD, hdr->method, METHOD_TYPE_STRAGE);
+               memcpy(hdr->method, LZHDIRS_METHOD, METHOD_TYPE_STRAGE);
                hdr->attribute = GENERIC_DIRECTORY_ATTRIBUTE;
                hdr->original_size = 0;
                if (len > 0 && hdr->name[len - 1] != '/')
@@ -889,7 +919,7 @@ init_header(name, v_stat, hdr)
        if (is_symlink(v_stat)) {
                char    lkname[256];    /* FIXME: no enough space */
                int             len;    
-               bcopy(LZHDIRS_METHOD, hdr->method, METHOD_TYPE_STRAGE);
+               memcpy(hdr->method, LZHDIRS_METHOD, METHOD_TYPE_STRAGE);
                hdr->attribute = GENERIC_DIRECTORY_ATTRIBUTE;
                hdr->original_size = 0;
                len = readlink(name, lkname, sizeof(lkname));
@@ -898,16 +928,6 @@ init_header(name, v_stat, hdr)
             error("file name is too long (%s -> %.*s)", hdr->name, len, lkname);
        }
 #endif
-
-       if (generic_format) {
-        filename_case = TO_UPPER;
-        archive_delim = "\\";
-    }
-
-    filename_conv(hdr->name, len, sizeof(hdr->name),
-                  system_kanji_code,
-                  system_kanji_code, /* no change code */
-                  system_delim, archive_delim, filename_case);
 }
 
 /* ------------------------------------------------------------------------ */
@@ -922,16 +942,21 @@ write_header(nafp, hdr)
        char            data[LZHEADER_STRAGE];
        char           *p;
        char           *headercrc_ptr;
+
     int archive_kanji_code = CODE_SJIS;
     int system_kanji_code = default_system_kanji_code;
+    char *archive_delim = "\377";
+    char *system_delim = "/";
+    int filename_case = NONE;
+       char lzname[256];
 
     if (optional_archive_kanji_code)
         archive_kanji_code = optional_archive_kanji_code;
     if (optional_system_kanji_code)
         system_kanji_code = optional_system_kanji_code;
 
-       bzero(data, LZHEADER_STRAGE);
-       bcopy(hdr->method, data + I_METHOD, METHOD_TYPE_STRAGE);
+       memset(data, 0, LZHEADER_STRAGE);
+       memcpy(data + I_METHOD, hdr->method, METHOD_TYPE_STRAGE);
        setup_put(data + I_PACKED_SIZE);
        put_longword(hdr->packed_size);
        put_longword(hdr->original_size);
@@ -953,18 +978,30 @@ write_header(nafp, hdr)
 
        put_byte(hdr->header_level);
 
-    filename_conv(hdr->name, strlen(hdr->name), sizeof(hdr->name),
-                  system_kanji_code,
-                  archive_kanji_code, /* no change code */
-                  "\xff\\/", "\xff\xff\xff", NONE);
+    if (generic_format)
+        filename_case = TO_UPPER;
+
+       if (hdr->header_level == HEADER_LEVEL0) {
+        archive_delim = "\\";
+    }
+
+    strncpy(lzname, hdr->name, sizeof(lzname));
+    convert_filename(lzname, strlen(lzname), sizeof(lzname),
+                     system_kanji_code,
+                     archive_kanji_code,
+                     system_delim, archive_delim, filename_case);
 
        if (hdr->header_level != HEADER_LEVEL2) {
-               if (p = (char *) strrchr(hdr->name, DELIM2))
-                       name_length = strlen(++p);
-               else
-                       name_length = strlen(hdr->name);
+        if (hdr->header_level == HEADER_LEVEL0 ||
+            (p = strchr(lzname, LHA_PATHSEP)) == 0)
+            p = lzname;
+        else
+            ++p;
+        /* level 0 header: write pathname (contain the directory part) */
+        /* level 1 header: write filename (basename only) */
+        name_length = strlen(p);
                put_byte(name_length);
-               bcopy(p ? p : hdr->name, data + I_NAME, name_length);
+               memcpy(data + I_NAME, p, name_length);
                setup_put(data + I_NAME + name_length);
        }
 
@@ -1031,14 +1068,14 @@ write_header(nafp, hdr)
                     put_byte(hdr->user[i]);
             }
 
-                       if (p = (char *) strrchr(hdr->name, DELIM2)) {
+                       if (p = strrchr(lzname, LHA_PATHSEP)) {
                                int             i;
 
-                               name_length = p - hdr->name + 1;
+                               name_length = p - lzname + 1;
                                put_word(name_length + 3);
                                put_byte(2);    /* dirname */
                                for (i = 0; i < name_length; i++)
-                                       put_byte(hdr->name[i]);
+                                       put_byte(lzname[i]);
                        }
                }               /* if generic .. */
 
@@ -1057,11 +1094,11 @@ write_header(nafp, hdr)
                        data[I_HEADER_CHECKSUM] = calc_sum(data + I_METHOD, header_size);
                } else {                /* header level 2 */
                        int             i;
-                       if (p = (char *) strrchr(hdr->name, DELIM2))
+                       if (p = strrchr(lzname, LHA_PATHSEP))
                                name_length = strlen(++p);
                        else {
-                               p = hdr->name;
-                               name_length = strlen(hdr->name);
+                               p = lzname;
+                               name_length = strlen(lzname);
                        }
                        put_word(name_length + 3);
                        put_byte(1);    /* filename */
@@ -1083,11 +1120,189 @@ write_header(nafp, hdr)
 
        if (fwrite(data, header_size + 2, 1, nafp) == 0)
                fatal_error("Cannot write to temporary file");
+}
 
-    filename_conv(hdr->name, strlen(hdr->name), sizeof(hdr->name),
-                  archive_kanji_code,
-                  system_kanji_code,
-                  "\xff\\/", "///", NONE);
+#if MULTIBYTE_FILENAME
+
+#if defined(__APPLE__)
+
+#include <CoreFoundation/CFString.h>
+#include <CoreFoundation/CFStringEncodingExt.h>
+
+/* this is not need for Mac OS X v 10.2 later */
+enum {
+  kCFStringEncodingAllowLossyConversion = 1,
+  kCFStringEncodingBasicDirectionLeftToRight = (1 << 1),
+  kCFStringEncodingBasicDirectionRightToLeft = (1 << 2),
+  kCFStringEncodingSubstituteCombinings = (1 << 3),
+  kCFStringEncodingComposeCombinings = (1 << 4),
+  kCFStringEncodingIgnoreCombinings = (1 << 5),
+  kCFStringEncodingUseCanonical = (1 << 6),
+  kCFStringEncodingUseHFSPlusCanonical = (1 << 7),
+  kCFStringEncodingPrependBOM = (1 << 8),
+  kCFStringEncodingDisableCorporateArea = (1 << 9),
+  kCFStringEncodingASCIICompatibleConversion = (1 << 10),
+};
+
+static int
+ConvertEncodingToUTF8(const char* inCStr,
+                      char* outUTF8Buffer,
+                      int outUTF8BufferLength,
+                      unsigned long scriptEncoding,
+                      unsigned long flags)
+{
+    unsigned long unicodeChars;
+    unsigned long srcCharsUsed;
+    unsigned long usedByteLen = 0;
+    UniChar uniStr[512];
+    unsigned long cfResult;
+
+    cfResult = CFStringEncodingBytesToUnicode(scriptEncoding,
+                                              flags,
+                                              (char *)inCStr,
+                                              strlen(inCStr),
+                                              &srcCharsUsed,
+                                              uniStr,
+                                              512,
+                                              &unicodeChars);
+    if (cfResult == 0) {
+        cfResult = CFStringEncodingUnicodeToBytes(kCFStringEncodingUTF8,
+                                                  flags,
+                                                  uniStr,
+                                                  unicodeChars,
+                                                  &srcCharsUsed,
+                                                  (char*)outUTF8Buffer,
+                                                  outUTF8BufferLength - 1,
+                                                  &usedByteLen);
+        outUTF8Buffer[usedByteLen] = '\0';
+    }
+
+    return cfResult;
+}
+
+static int
+ConvertUTF8ToEncoding(const char* inUTF8Buf,
+                      int inUTF8BufLength,
+                      char* outCStrBuffer,
+                      int outCStrBufferLength,
+                      unsigned long scriptEncoding,
+                      unsigned long flags)
+{
+    unsigned long unicodeChars;
+    unsigned long srcCharsUsed;
+    unsigned long usedByteLen = 0;
+    UniChar uniStr[256];
+    unsigned long cfResult;
+
+    cfResult = CFStringEncodingBytesToUnicode(kCFStringEncodingUTF8,
+                                              flags,
+                                              (char*)inUTF8Buf,
+                                              inUTF8BufLength,
+                                              &srcCharsUsed,
+                                              uniStr,
+                                              255,
+                                              &unicodeChars);
+    if (cfResult == 0) {
+        cfResult = CFStringEncodingUnicodeToBytes(scriptEncoding,
+                                                  flags,
+                                                  uniStr,
+                                                  unicodeChars,
+                                                  &srcCharsUsed,
+                                                  (char*)outCStrBuffer,
+                                                  outCStrBufferLength - 1,
+                                                  &usedByteLen);
+        outCStrBuffer[usedByteLen] = '\0';
+    }
+
+    return cfResult;
+}
+
+#elif HAVE_ICONV
+#include <iconv.h>
+
+static int
+ConvertEncodingByIconv(const char *src, char *dst, int dstsize,
+                       const char *srcEnc, const char *dstEnc)
+{
+    iconv_t ic;
+    static char szTmpBuf[2048];
+    char *src_p;
+    char *dst_p;
+    size_t sLen;
+    size_t iLen;
+
+    dst_p = &szTmpBuf[0];
+    iLen = (size_t)sizeof(szTmpBuf)-1;
+    src_p = (char *)src;
+    sLen = (size_t)strlen(src);
+    memset(szTmpBuf, 0, sizeof(szTmpBuf));
+    memset(dst, 0, dstsize);
+
+    ic = iconv_open(dstEnc, srcEnc);
+    if (ic == (iconv_t)-1) {
+        error("iconv_open() failure");
+        return -1;
+    }
+
+    if (iconv(ic, &src_p, &sLen, &dst_p, &iLen) == (size_t)-1) {
+        error("iconv() failure");
+        iconv_close(ic);
+        return -1;
+    }
+
+    strncpy(dst, szTmpBuf, dstsize);
+
+    iconv_close(ic);
+
+    return 0;
+}
+#endif /* defined(__APPLE__) */
+
+char *
+sjis_to_utf8(char *dst, const char *src, size_t dstsize)
+{
+#if defined(__APPLE__)
+  dst[0] = '\0';
+  if (ConvertEncodingToUTF8(src, dst, dstsize,
+                            kCFStringEncodingDOSJapanese,
+                            kCFStringEncodingUseHFSPlusCanonical) == 0)
+      return dst;
+#elif HAVE_ICONV
+  if (ConvertEncodingByIconv(src, dst, dstsize, "SJIS", "UTF-8") != -1)
+      return dst;
+#else
+  error("not support utf-8 conversion");
+#endif
+
+  /* not supported */
+  if (dstsize < 1) return dst;
+  dst[dstsize-1] = 0;
+  return strncpy(dst, src, dstsize-1);
+}
+
+char *
+utf8_to_sjis(char *dst, const char *src, size_t dstsize)
+{
+#if defined(__APPLE__)
+  int srclen;
+
+  dst[0] = '\0';
+  srclen = strlen(src);
+  if (ConvertUTF8ToEncoding(src, srclen, dst, dstsize,
+                            kCFStringEncodingDOSJapanese,
+                            kCFStringEncodingUseHFSPlusCanonical) == 0)
+      return dst;
+#elif HAVE_ICONV
+  if (ConvertEncodingByIconv(src, dst, dstsize, "UTF-8", "SJIS") != -1)
+      return dst;
+#else
+  error("not support utf-8 conversion");
+#endif
+
+  /* not supported */
+  if (dstsize < 1) return dst;
+  dst[dstsize-1] = 0;
+  return strncpy(dst, src, dstsize-1);
 }
 
 /*
@@ -1120,6 +1335,7 @@ sjis2euc(int *p1, int *p2)
     *p1 |= 0x80;
     *p2 |= 0x80;
 }
+#endif /* MULTIBYTE_FILENAME */
 
 /* Local Variables: */
 /* mode:c */