1 # _nvdajp_unicode.py
\r
2 # -*- coding: utf-8 -*-
\r
4 from __future__ import unicode_literals
\r
7 def unicode_normalize(s):
\r
8 s = s.replace('\ufffd', '') # Unicode REPLACEMENT CHARACTER
\r
9 s = s.replace('\u200e', '') # Unicode LEFT-TO-RIGHT MARK
\r
10 s = s.replace('\u200f', '') # Unicode RIGHT-TO-LEFT MARK
\r
11 # Mecab_text2mecab() で全角に変換され NFKC で戻せない文字
\r
12 s = s.replace('.', '.')
\r
13 s = unicodedata.normalize('NFKC', s)
\r
14 s = s.replace('\u2212', '-') # 0x2212 MUNUS SIGN to 0x002D HYPHEN-MINUS
\r
15 s = s.replace('\u00a5', '\\') # 0x00A5 YEN SIGN
\r
16 s = s.replace('\u301c', '~') # 0x301C WAVE DASH
\r