OSDN Git Service

track
[luz/luz.git] / src / com / lavans / luz2 / commons / JapaneseTextUtil.java
1 package com.lavans.luz2.commons;\r
2 \r
3 import java.io.UnsupportedEncodingException;\r
4 \r
5 public class JapaneseTextUtil {\r
6         /**\r
7          * strのエンコードを「Windows-31J」に変換し、そのバイト数を返す\r
8          *\r
9          * @param str\r
10          * @return\r
11          */\r
12         public static int getByteLength(String str) {\r
13                 if (str == null) {\r
14                         return 0;\r
15                 }\r
16 \r
17                 int len = 0;\r
18                 try {\r
19                         byte[] buff = str.getBytes("Windows-31J");\r
20                         len = buff.length;\r
21                 } catch (UnsupportedEncodingException e) {}\r
22 \r
23                 return len;\r
24         }\r
25 \r
26         /**\r
27          * 英数字(一部記号)チェック\r
28          *\r
29          */\r
30         private static final String HANKANA = "アイウエオァィゥェォカキクケコサシスセソタチツッテトナニヌネノハヒフヘホマミムメモヤユヨャュョラリルレロワヲンー゙゚";\r
31         private static final String KATAKANA = "アイウエオァィゥェォカキクケコサシスセソタチツッテトナニヌネノハヒフヘホマミムメモヤユヨャュョラリルレロワヲンー゛゜";\r
32         private static final String KATAKANA_D = "ヴガギグゲゴザジズゼゾダヂヅデドバビブベボ";\r
33         private static final String HANKANA_D = "ウカキクケコサシスセソタチツテトハヒフヘホ";\r
34         private static final String KATAKANA_H = "パピプペポ";\r
35         private static final String HANKANA_H = "ハヒフヘホ";\r
36 \r
37         // 句読点あり\r
38         private static final String HANKANA_KUTOUEN = "、。";\r
39         private static final String KATAKANA_KUTOUTEN = "、。";\r
40 \r
41         // static final java.lang.String ひらがな =\r
42         // "あいうえおぁぃぅぇぉかきくけこさしすせそたちつってとなにぬねのはひふへほまみむめもやゆよゃゅょらりるれろわをんー゛゜、。がぎぐげござじずぜぞだぢづでどヴばびぶべぼぱぴぷぺぽ"\r
43         // ;\r
44 \r
45         /**\r
46          * 全角カタカナを半角カタカナに変換する日本語正規化。\r
47          */\r
48         public static String toHankana(String kanaStr) {\r
49                 if(kanaStr==null) return null;\r
50                 StringBuffer hankanaStr = new StringBuffer(kanaStr.length());\r
51                 int index;\r
52                 for (int i = 0; i < kanaStr.length(); i++) {\r
53                         char kana = kanaStr.charAt(i);\r
54                         if ((index = KATAKANA.indexOf(kana)) >= 0) {\r
55                                 hankanaStr.append(HANKANA.charAt(index));\r
56                         } else if ((index = KATAKANA_D.indexOf(kana)) >= 0) {\r
57                                 hankanaStr.append(HANKANA_D.charAt(index) + "゙");\r
58                         } else if ((index = KATAKANA_H.indexOf(kana)) >= 0) {\r
59                                 hankanaStr.append(HANKANA_H.charAt(index) + "゚");\r
60                         } else {\r
61                                 hankanaStr.append(kana);\r
62                         }\r
63                 }\r
64 \r
65                 return hankanaStr.toString();\r
66         }\r
67 \r
68         /**\r
69          * 全角カタカナを半角カタカナに変換。句読点あり。\r
70          */\r
71         public static String toHankanaKutouten(String kanaStr) {\r
72                 StringBuffer hankanaStr = new StringBuffer(kanaStr.length());\r
73                 int index;\r
74                 for (int i = 0; i < kanaStr.length(); i++) {\r
75                         char kana = kanaStr.charAt(i);\r
76                         if ((index = KATAKANA.indexOf(kana)) >= 0) {\r
77                                 hankanaStr.append(HANKANA.charAt(index));\r
78                         } else if ((index = KATAKANA_D.indexOf(kana)) >= 0) {\r
79                                 hankanaStr.append(HANKANA_D.charAt(index) + "゙");\r
80                         } else if ((index = KATAKANA_H.indexOf(kana)) >= 0) {\r
81                                 hankanaStr.append(HANKANA_H.charAt(index) + "゚");\r
82                         } else if ((index = KATAKANA_KUTOUTEN.indexOf(kana)) >= 0) {\r
83                                 hankanaStr.append(HANKANA_KUTOUEN.charAt(index));\r
84                         } else {\r
85                                 hankanaStr.append(kana);\r
86                         }\r
87                 }\r
88 \r
89                 return hankanaStr.toString();\r
90         }\r
91 \r
92         /**\r
93          * 半角カタカナを全角カタカナに変換する日本語正規化 1文字単位。\r
94          */\r
95         public static char toKanaFull(char kana) {\r
96                 int index;\r
97 \r
98                 if ((index = HANKANA.indexOf(kana)) >= 0) {\r
99                         kana = KATAKANA.charAt(index);\r
100                 }\r
101 \r
102                 return kana;\r
103         }\r
104 \r
105         /**\r
106          * 半角カタカナを全角カタカナにし、濁点も統合する。 日本語正規化 使用例 郵便番号辞書 JPRS RACEドメイン\r
107          */\r
108         public static String toKanaFull(java.lang.String str) {\r
109                 StringBuffer str2;\r
110                 char kkv;\r
111                 str2 = new StringBuffer();\r
112                 for (int i = 0; i < str.length(); i++) {\r
113                         kkv = toKanaFull(str.charAt(i));\r
114                         if (kkv == '゙') {\r
115                                 kkv = str2.charAt(str2.length() - 1);\r
116                                 kkv++;\r
117                                 str2.deleteCharAt(str2.length() - 1);\r
118                         } else if (kkv == '゚') {\r
119                                 kkv = str2.charAt(str2.length() - 1);\r
120                                 kkv += 2;\r
121                                 str2.deleteCharAt(str2.length() - 1);\r
122                         }\r
123                         str2.append(kkv);\r
124 \r
125                 }\r
126 \r
127                 return str2.toString();\r
128         }\r
129 \r
130         /**\r
131          * 日本語正規化 全角/半角カタカナをひらがなにする\r
132          */\r
133         public static String toHiragana(String str) {\r
134                 StringBuffer str2;\r
135                 str2 = new StringBuffer();\r
136                 char ch;\r
137                 str = toKanaFull(str);\r
138                 for (int i = 0; i < str.length(); i++) {\r
139                         ch = str.charAt(i);\r
140                         if (ch >= 0x30A0 && ch <= 0x30FA) {\r
141                                 ch -= 0x60;\r
142                         }\r
143                         str2.append(ch);\r
144                 }\r
145                 return str2.toString();\r
146         }\r
147 \r
148         /**\r
149          * ひらがなを全角カタカナにする\r
150          */\r
151         public static String toKatakana(String str) {\r
152                 StringBuffer str2;\r
153                 str2 = new StringBuffer();\r
154                 char ch;\r
155                 for (int i = 0; i < str.length(); i++) {\r
156                         ch = str.charAt(i);\r
157                         if (ch >= 0x3040 && ch <= 0x309A) {\r
158                                 ch += 0x60;\r
159                         }\r
160                         str2.append(ch);\r
161                 }\r
162                 return str2.toString();\r
163         }\r
164 \r
165         private static final java.lang.String twoByte = "+-―‐*/=|!?”#@$%&’`()[],.;:_<>^{}・";\r
166         private static final java.lang.String oneByte = "+---*/=|!?\"#@$%&'`()[],.;:_<>^{}・";\r
167 \r
168         /**\r
169          * 英数字列を半角文字に正規化する。 未完全版。 郵便番号、電話番号、日本語ドメイン等\r
170          */\r
171         public static java.lang.String toHalf(java.lang.String str) {\r
172                 StringBuffer str2;\r
173                 str2 = new StringBuffer();\r
174                 char ch;\r
175                 int idx;\r
176                 for (int i = 0; i < str.length(); i++) {\r
177                         ch = str.charAt(i);\r
178                         if (ch >= 'a' && ch <= 'z') {\r
179                                 ch += 'a' - 'a';\r
180                         } else if (ch >= 'A' && ch <= 'Z') {\r
181                                 ch += 'A' - 'A';\r
182                         } else if (ch >= '0' && ch <= '9') {\r
183                                 ch += '0' - '0';\r
184                         } else if (ch == ' ') {\r
185                                 ch = ' ';\r
186                         } else if ((idx = twoByte.indexOf(ch)) >= 0) {\r
187                                 ch = oneByte.charAt(idx);\r
188                         } else if (ch == '¥') { // 判断は微妙\r
189                                 ch = '\\';\r
190                         }\r
191                         str2.append(ch);\r
192                 }\r
193                 return str2.toString();\r
194         }\r
195 \r
196         /**\r
197          * 英数字列を全角文字に正規化する\r
198          */\r
199         public static java.lang.String toFull(java.lang.String str) {\r
200 \r
201                 if (str == null || str.length() == 0)\r
202                         return str;\r
203 \r
204                 StringBuffer str2;\r
205                 str2 = new StringBuffer();\r
206                 char ch;\r
207                 int idx;\r
208                 for (int i = 0; i < str.length(); i++) {\r
209                         ch = str.charAt(i);\r
210                         if (ch >= 'a' && ch <= 'z') {\r
211                                 ch -= 'a' - 'a';\r
212                         } else if (ch >= 'A' && ch <= 'Z') {\r
213                                 ch -= 'A' - 'A';\r
214                         } else if (ch >= '0' && ch <= '9') {\r
215                                 ch -= '0' - '0';\r
216                         } else if (ch == ' ') {\r
217                                 ch = ' ';\r
218                         } else if ((idx = oneByte.indexOf(ch)) >= 0) {\r
219                                 ch = twoByte.charAt(idx);\r
220                         } else if (ch == '\\') { // 判断は微妙\r
221                                 ch = '¥';\r
222                         }\r
223                         str2.append(ch);\r
224                 }\r
225                 return str2.toString();\r
226         }\r
227 \r
228         /**\r
229          * 指定文字列を全角から半角に英数字・記号全てを変換します。\r
230          *\r
231          * @param 変換前の文字列\r
232          * @return 変換後の文字列\r
233          */\r
234         public static String toHalfAll(String string) {\r
235 \r
236                 if (string == null || string.length() == 0)\r
237                         return string;\r
238 \r
239                 return toHalf(toHankana(string));\r
240         }\r
241 \r
242         /**\r
243          * 指定文字列を半角から全角に英数字・記号全てを変換します。\r
244          *\r
245          * @param 変換前の文字列\r
246          * @return 変換後の文字列\r
247          */\r
248         public static String toFullAll(String string) {\r
249 \r
250                 if (string == null || string.length() == 0)\r
251                         return string;\r
252 \r
253                 return toFull(toKanaFull(string));\r
254         }\r
255         \r
256         /*\r
257          * This method converts Cp932 to JIS.\r
258          */\r
259         public static String toJIS(String s) {\r
260                 StringBuffer sb = new StringBuffer();\r
261                 char c;\r
262 \r
263                 if (s != null) {\r
264                         for (int i = 0; i < s.length(); i++) {\r
265                                 c = s.charAt(i);\r
266                                 switch (c) {\r
267                                         case 0xff3c:    // FULLWIDTH REVERSE SOLIDUS ->\r
268                                                 c = 0x005c;     // REVERSE SOLIDUS\r
269                                                 break;\r
270                                         case 0xff5e:    // FULLWIDTH TILDE ->\r
271                                                 c = 0x301c;     // WAVE DASH\r
272                                                 break;\r
273                                         case 0x2225:    // PARALLEL TO ->\r
274                                                 c = 0x2016;     // DOUBLE VERTICAL LINE\r
275                                                 break;\r
276                                         case 0xff0d:    // FULLWIDTH HYPHEN-MINUS ->\r
277                                                 c = 0x2212;     // MINUS SIGN\r
278                                                 break;\r
279                                         case 0xffe0:    // FULLWIDTH CENT SIGN ->\r
280                                                 c = 0x00a2;     // CENT SIGN\r
281                                                 break;\r
282                                         case 0xffe1:    // FULLWIDTH POUND SIGN ->\r
283                                                 c = 0x00a3;     // POUND SIGN\r
284                                                 break;\r
285                                         case 0xffe2:    // FULLWIDTH NOT SIGN ->\r
286                                                 c = 0x00ac; // NOT SIGN\r
287                                                 break;\r
288                                         case 0x2015:    // HORIZONTAL BAR\r
289                                                 c = 0x2014;     // EM DASH\r
290                                                 break;\r
291                                 }\r
292                                 sb.append(c);\r
293                         }\r
294                 }\r
295 \r
296                 if (s != null) {\r
297                         return new String(sb);\r
298                 }\r
299                 return s;\r
300         }\r
301 \r
302         /*\r
303          * This method convert JIS to Cp932.\r
304          */\r
305         public static String toCp932(String s) {\r
306 \r
307                 StringBuffer sb = new StringBuffer();\r
308                 char c;\r
309                 for (int i = 0; i < s.length(); i++) {\r
310                         c = s.charAt(i);\r
311                         switch (c) {\r
312                                 case 0x005c:    // REVERSE SOLIDUS ->\r
313                                         c = 0xff3c;     // FULLWIDTH REVERSE SOLIDUS\r
314                                         break;\r
315                                 case 0x301c:    // WAVE DASH ->\r
316                                         c = 0xff5e;     // FULLWIDTH TILDE\r
317                                         break;\r
318                                 case 0x2016:    // DOUBLE VERTICAL LINE ->\r
319                                         c = 0x2225;     // PARALLEL TO\r
320                                         break;\r
321                                 case 0x2212:    // MINUS SIGN ->\r
322                                         c = 0xff0d;     // FULLWIDTH HYPHEN-MINUS\r
323                                         break;\r
324                                 case 0x00a2:    // CENT SIGN ->\r
325                                         c = 0xffe0;     // FULLWIDTH CENT SIGN\r
326                                         break;\r
327                                 case 0x00a3:    // POUND SIGN ->\r
328                                         c = 0xffe1;     // FULLWIDTH POUND SIGN\r
329                                         break;\r
330                                 case 0x00ac:    // NOT SIGN ->\r
331                                         c = 0xffe2;     // FULLWIDTH NOT SIGN\r
332                                         break;\r
333                                 case 0x2014 :   // EM DASH\r
334                                         c = 0x2015;     // HORIZONTAL BAR\r
335                         }\r
336                         sb.append(c);\r
337                 }\r
338                 return new String(sb);\r
339         }\r
340 }\r