1 # -*- conding: utf-8 -*-
3 # Author: mshio <mshio@users.osdn.me>
8 from fontparser import all_of_kanjis
11 def parse_source(filepath):
12 with open(filepath, 'r') as fh:
13 chars = [[ord(c) for c in unicode(line.rstrip('\r\n'), 'utf-8')
14 if not line.startswith('#')] for line in fh]
15 return reduce(lambda x, y: x + y, chars)
17 def parse_sources(filepaths):
18 chars = [parse_source(path) for path in filepaths]
19 return reduce(lambda x, y: x + y, chars)
21 sources = {'kyouikuKanji': ['data/grade-%d.txt' % n for n in range(1, 7)]}
23 ('level%d' % n): ['data/level-%ds.txt' % n] for n in range(1, 5)
25 return {k: parse_sources(v) for k, v in sources.items()}
28 parser = argparse.ArgumentParser()
29 parser.add_argument('font_file', help='path of a font file')
31 return parser.parse_args()
33 if __name__ == '__main__':
36 all_kanjis = all_of_kanjis(args.font_file)
37 kanjilist = get_kanjilist()
39 for k, v in kanjilist.items():
40 count = len(set(v) & set(all_kanjis))
41 print '{key}, {num}'.format(key=k, num=count)