From b22c4ae3986e7ffd2959f78cb409fe413a500662 Mon Sep 17 00:00:00 2001 From: hikarupsp Date: Mon, 25 Feb 2013 18:51:44 +0900 Subject: [PATCH] =?utf8?q?libtest:=E3=83=A9=E3=82=A4=E3=83=96=E3=83=A9?= =?utf8?q?=E3=83=AA=E3=83=86=E3=82=B9=E3=83=88=E7=94=A8=E3=82=BD=E3=83=BC?= =?utf8?q?=E3=82=B9=E3=82=92=E8=BF=BD=E5=8A=A0=E3=80=82=20chnlib:CHNLIB=5F?= =?utf8?q?UIPArray=5FGetSeparatedUTF8Character=E3=82=92=E8=BF=BD=E5=8A=A0?= =?utf8?q?=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- CHNOSProject/chn/chnlib.c | 2 +- CHNOSProject/chn/chnlib.h | 1 + CHNOSProject/chn/chnlib02.c | 52 +++++++++++++++++++++++++++++++++++++ CHNOSProject/chn/chnlib04.c | 15 ++++++++++- CHNOSProject/libtest/libtest/main.c | 25 ++++++++++++++++++ 5 files changed, 93 insertions(+), 2 deletions(-) create mode 100644 CHNOSProject/libtest/libtest/main.c diff --git a/CHNOSProject/chn/chnlib.c b/CHNOSProject/chn/chnlib.c index 1b96da3..cd8729c 100755 --- a/CHNOSProject/chn/chnlib.c +++ b/CHNOSProject/chn/chnlib.c @@ -180,7 +180,7 @@ void CHNLIB_Debug_Internal_PrintStructureDataSub(void *structure, uint level) for(j = 0; j < i; j++){ p = CHNLIB_UIPArray_GetPointerByIndex(structure, j); CHNLIB_Debug_Internal_PrintIndent(level); - CHNLIB_Debug("%s[%p]:[%d]=(%d,[%p])", "", 0, "", CHNLIB_Debug_Internal_GetStructTypeNameByID(typeid), structure, j, CHNLIB_UIPArray_GetData32ByIndex(structure, j), p); + CHNLIB_Debug("%s[%p]:[%d]=(%u,[%p])", "", 0, "", CHNLIB_Debug_Internal_GetStructTypeNameByID(typeid), structure, j, CHNLIB_UIPArray_GetData32ByIndex(structure, j), p); if(CHNLIB_Debug_PrintStructureData_RecursiveCounter != 0){ CHNLIB_Debug_Internal_PrintStructureDataSub(p, level + 1); } diff --git a/CHNOSProject/chn/chnlib.h b/CHNOSProject/chn/chnlib.h index 3bca6ed..6d9c68d 100755 --- a/CHNOSProject/chn/chnlib.h +++ b/CHNOSProject/chn/chnlib.h @@ -113,6 +113,7 @@ uint CHNLIB_CString_GetCountOfContain(const char s[], const char search[]); //@chnlib02.c int CHNLIB_String_Search_UIPArrayStringLocation(const CHNLIB_String *s, int s_start, const CHNLIB_UIPArray *list, int *location); int CHNLIB_UIPArray_GetSeparatedStringByUIPArray(CHNLIB_UIPArray **separated, const CHNLIB_UIPArray *list, const CHNLIB_String *s); +int CHNLIB_UIPArray_GetSeparatedUTF8Character(CHNLIB_UIPArray **separated, const CHNLIB_String *s); CHNLIB_String *CHNLIB_ReadLine(FILE *fp); //@chnlib03.c diff --git a/CHNOSProject/chn/chnlib02.c b/CHNOSProject/chn/chnlib02.c index e37e969..160ecab 100644 --- a/CHNOSProject/chn/chnlib02.c +++ b/CHNOSProject/chn/chnlib02.c @@ -93,6 +93,58 @@ int CHNLIB_UIPArray_GetSeparatedStringByUIPArray(CHNLIB_UIPArray **separated, co return 0; } +int CHNLIB_UIPArray_GetSeparatedUTF8Character(CHNLIB_UIPArray **separated, const CHNLIB_String *s){ + //文字列sを、UTF-8の一文字ごとに分割し、その文字のUnicodeをdata32、その一文字に該当するStringをpointerに格納し、separatedに追加する形で返す。 + //不完全なUTF-8文字列は無視される。 + const char *refstr; + int i, i_max, type; + int phase, start; + uint unicode; + + if(separated == NULL || CHNLIB_StructureHeader_GetTypeID(s) != CHNLIB_STRUCT_ID_String){ + return 1; + } + + refstr = CHNLIB_String_GetReferencePointerOfCString(s); + i_max = CHNLIB_String_GetLength(s); + + phase = 0; + unicode = 0; + for(i = 0; i < i_max; i++){ + type = CHNLIB_UTF8_GetCharacterType(refstr[i]); + switch (type) { + case 1: + CHNLIB_UIPArray_AppendLast(separated, refstr[i], CHNLIB_String_ExtractByLength(s, i, i)); + phase = 0; + unicode = 0; + break; + + case 0: + if(phase > 0){ + unicode <<= 6; + unicode |= (refstr[i] & 0x3f); + phase--; + if(phase == 0){ + //一文字完成 + CHNLIB_UIPArray_AppendLast(separated, unicode, CHNLIB_String_ExtractByLength(s, start, i - start + 1)); + } + } + break; + + case 2: + case 3: + case 4: + start = i; + unicode = (refstr[i] << (type + 1)) >> (type + 1); + phase = type - 1; + break; + } + } + + return 0; +} + + CHNLIB_String *CHNLIB_ReadLine(FILE *fp) { char s[CHNLIB_MAX_STRING_LENGTH]; diff --git a/CHNOSProject/chn/chnlib04.c b/CHNOSProject/chn/chnlib04.c index b4a4576..7b05572 100644 --- a/CHNOSProject/chn/chnlib04.c +++ b/CHNOSProject/chn/chnlib04.c @@ -19,22 +19,35 @@ int CHNLIB_UTF8_GetCharacterType(char c) { + //UTF-8文字列中の1バイトcが、UTF-8文字列中でどのような役割を持つのかを返す。 if(((c >> 6) & 3) == 2){ //マルチバイト後続バイト + //10xxxxxx return 0; } else if(((c >> 7) & 1) == 0){ //1Byte + //7bit + //0xxxxxxx return 1; } else if(((c >> 5) & 7) == 6){ //2Byte + //11bit + //110xxxxx return 2; } else if(((c >> 4) & 15) == 14){ //3Byte + //16bit + //1110xxxx return 3; } else if(((c >> 3) & 31) == 30){ //4Byte + //21bit + //11110xxx return 4; } return 0; -} \ No newline at end of file +} + + + diff --git a/CHNOSProject/libtest/libtest/main.c b/CHNOSProject/libtest/libtest/main.c new file mode 100644 index 0000000..914c72d --- /dev/null +++ b/CHNOSProject/libtest/libtest/main.c @@ -0,0 +1,25 @@ +// +// main.c +// libtest +// +// Created by 西田 耀 on 13/02/25. +// Copyright (c) 2013年 Hikaru Nishida. All rights reserved. +// + +#include +#include "chnlib.h" + +int main(int argc, const char * argv[]) +{ + CHNLIB_String *s; + CHNLIB_UIPArray *separated; + + separated = CHNLIB_UIPArray_Initialize(); + s = CHNLIB_String_Initialize("あいうえお漢字。"); + + CHNLIB_UIPArray_GetSeparatedUTF8Character(&separated, s); + CHNLIB_Debug_PrintStructureData(separated, 0); + + return 0; +} + -- 2.11.0