4 * License : The MIT License
\r
5 * Copyright(c) 2009 olyutorskii
\r
8 package jp.sourceforge.jindolf.parser;
\r
10 import java.util.regex.Pattern;
\r
11 import jp.sourceforge.jindolf.corelib.PeriodType;
\r
12 import jp.sourceforge.jindolf.corelib.VillageState;
\r
15 * 人狼BBS各種XHTML文字列のパースを行いハンドラに通知する。
\r
17 public class HtmlParser extends AbstractParser{
\r
19 private BasicHandler basicHandler;
\r
20 private final TalkParser talkParser = new TalkParser(this);
\r
21 private final SysEventParser sysEventParser = new SysEventParser(this);
\r
23 private final SeqRange rangepool_1 = new SeqRange();
\r
24 private final SeqRange rangepool_2 = new SeqRange();
\r
29 public HtmlParser(){
\r
35 * {@link BasicHandler}ハンドラを登録する。
\r
36 * @param basicHandler ハンドラ
\r
38 public void setBasicHandler(BasicHandler basicHandler){
\r
39 this.basicHandler = basicHandler;
\r
44 * {@link TalkHandler}ハンドラを登録する。
\r
45 * @param talkHandler ハンドラ
\r
47 public void setTalkHandler(TalkHandler talkHandler){
\r
48 this.talkParser.setTalkHandler(talkHandler);
\r
53 * {@link SysEventHandler}ハンドラを登録する。
\r
54 * @param handler ハンドラ
\r
56 public void setSysEventHandler(SysEventHandler handler){
\r
57 this.sysEventParser.setSysEventHandler(handler);
\r
61 private static final Pattern XMLDECL_PATTERN =
\r
62 compile("<\\?xml\u0020");
\r
63 private static final Pattern O_HTML_PATTERN =
\r
64 compile("<html\u0020");
\r
65 private static final Pattern TITLE_PATTERN =
\r
66 compile("<title>([^<]*)</title>");
\r
67 private static final Pattern O_BODY_PATTERN =
\r
69 private static final Pattern O_DIVMAIN_PATTERN =
\r
70 compile("<div\u0020class=\"main\">");
\r
74 * @throws HtmlParseException パースエラー
\r
76 private void parseHead() throws HtmlParseException{
\r
77 setContextErrorMessage("lost head part");
\r
79 SeqRange titleRange = this.rangepool_1;
\r
81 lookingAtAffirm(XMLDECL_PATTERN);
\r
84 findAffirm(O_HTML_PATTERN);
\r
87 findAffirm(TITLE_PATTERN);
\r
88 titleRange.setLastMatchedGroupRange(getMatcher(), 1);
\r
91 this.basicHandler.pageTitle(getContent(), titleRange);
\r
93 findAffirm(O_BODY_PATTERN);
\r
96 findAffirm(O_DIVMAIN_PATTERN);
\r
102 private static final Pattern LOGINFORM_PATTERN =
\r
106 +"\u0020" + "action=\"index\\.rb\""
\r
107 +"\u0020" + "method=\"post\""
\r
108 +"\u0020" + "class=\"login_form\""
\r
112 +"\u0020" + "class=\"login_form\""
\r
116 private static final Pattern C_EDIV_PATTERN =
\r
119 + "<a\u0020href=\"[^\"]*\">[^<]*</a>"
\r
123 private static final Pattern USERID_PATTERN =
\r
127 + "value=\"([^\"]*)\""
\r
129 private static final Pattern C_FORM_PATTERN =
\r
130 compile("</form>");
\r
134 * ログイン名までの認識を確認したのはF国のみ。
\r
135 * @throws HtmlParseException パースエラー
\r
137 private void parseLoginForm() throws HtmlParseException{
\r
138 setContextErrorMessage("lost login form");
\r
140 SeqRange accountRange = this.rangepool_1;
\r
142 boolean isLand_E_Form;
\r
143 findAffirm(LOGINFORM_PATTERN);
\r
144 if(isGroupMatched(1)){
\r
145 isLand_E_Form = false;
\r
146 }else{ // E国ログインフォーム検出
\r
147 isLand_E_Form = true;
\r
152 lookingAtAffirm(C_EDIV_PATTERN);
\r
156 findAffirm(USERID_PATTERN);
\r
157 accountRange.setLastMatchedGroupRange(getMatcher(), 1);
\r
160 if(accountRange.length() > 0){
\r
162 .loginName(getContent(), accountRange);
\r
165 findAffirm(C_FORM_PATTERN);
\r
172 private static final Pattern VILLAGEINFO_PATTERN =
\r
174 "([^<]+?)" +SP_I // 最短一致数量子
\r
181 +"(?:(?:(午前)|(午後))\u0020)?" // AMPM
\r
192 * @throws HtmlParseException パースエラー
\r
194 private void parseVillageInfo() throws HtmlParseException{
\r
195 setContextErrorMessage("lose village information");
\r
197 SeqRange villageRange = this.rangepool_1;
\r
201 lookingAtAffirm(VILLAGEINFO_PATTERN);
\r
202 villageRange.setLastMatchedGroupRange(getMatcher(), 1);
\r
204 int month = parseGroupedInt(2);
\r
205 int day = parseGroupedInt(3);
\r
206 int hour = parseGroupedInt(6);
\r
207 int minute = parseGroupedInt(7);
\r
208 if(isGroupMatched(5)){ // 午後指定
\r
209 hour = (hour + 12) % 24;
\r
213 this.basicHandler.villageName(getContent(), villageRange);
\r
214 this.basicHandler.commitTime(month, day, hour, minute);
\r
219 private static final Pattern O_PARAG_PATTERN = compile("<p>");
\r
220 private static final Pattern PERIODLINK_PATTERN =
\r
223 + "<span\u0020class=\"time\">"
\r
225 + "<a\u0020href=\"([^\"]*)\">"
\r
230 private static final Pattern PERIOD_PATTERN =
\r
240 private static final Pattern C_SPAN_PATTERN = compile("</span>");
\r
241 private static final Pattern C_ANCHOR_PATTERN = compile("</a>");
\r
245 * @throws HtmlParseException パースエラー
\r
247 private void parsePeriodLink() throws HtmlParseException{
\r
248 setContextErrorMessage("lost period link");
\r
250 SeqRange anchorRange = this.rangepool_1;
\r
252 findAffirm(O_PARAG_PATTERN);
\r
256 Pattern closePattern;
\r
257 anchorRange.setInvalid();
\r
260 lookingAtAffirm(PERIODLINK_PATTERN);
\r
261 if(isGroupMatched(1)){
\r
262 closePattern = C_SPAN_PATTERN;
\r
263 }else if(isGroupMatched(2)){
\r
264 closePattern = C_ANCHOR_PATTERN;
\r
265 anchorRange.setLastMatchedGroupRange(getMatcher(), 2);
\r
266 }else if(isGroupMatched(3)){
\r
271 throw buildParseException();
\r
276 PeriodType periodType = null;
\r
277 lookingAtAffirm(PERIOD_PATTERN);
\r
278 if(isGroupMatched(1)){
\r
279 periodType = PeriodType.PROLOGUE;
\r
280 }else if(isGroupMatched(2)){
\r
281 periodType = PeriodType.EPILOGUE;
\r
282 }else if(isGroupMatched(3)){
\r
284 }else if(isGroupMatched(4)){
\r
285 periodType = PeriodType.PROGRESS;
\r
286 day = parseGroupedInt(4);
\r
289 throw buildParseException();
\r
293 lookingAtAffirm(closePattern);
\r
296 this.basicHandler.periodLink(getContent(),
\r
304 private static final Pattern O_MESSAGE_PATTERN =
\r
305 compile("<div\u0020class=\"message(?:\u0020ch[0-9]+)?\">");
\r
306 private static final Pattern O_RELOAD_PATTERN =
\r
307 compile("<div\u0020id=\"reload\">");
\r
308 private static final Pattern O_MSGKIND_PATTERN =
\r
311 +"<div\u0020class=\"(?:(announce)|(order)|(extra))\">"
\r
314 +"(?:<a name=\"[^\"]*\">)?"
\r
316 +"<span\u0020class=\"mes_no\">"
\r
322 +"<a\u0020name=\"([^\"]*)\"(?:\u0020class=\"ch_name\")?>"
\r
325 private static final Pattern C_DIV_PATTERN = compile("</div>");
\r
329 * @throws HtmlParseException パースエラー
\r
331 private void parseMessage() throws HtmlParseException{
\r
332 setContextErrorMessage("lost message");
\r
334 SeqRange nameRange = this.rangepool_1;
\r
336 boolean skipGarbage = true;
\r
343 skipGarbage = false;
\r
344 matched = findProbe(O_MESSAGE_PATTERN); // 最初の1回のみ
\r
346 matched = lookingAtProbe(O_MESSAGE_PATTERN);
\r
349 matched = lookingAtProbe(O_RELOAD_PATTERN);
\r
352 findAffirm(C_DIV_PATTERN);
\r
362 lookingAtAffirm(O_MSGKIND_PATTERN);
\r
363 if(isGroupMatched(1)){
\r
365 this.sysEventParser.parseAnnounce();
\r
366 }else if(isGroupMatched(2)){
\r
368 this.sysEventParser.parseOrder();
\r
369 }else if(isGroupMatched(3)){
\r
371 this.sysEventParser.parseExtra();
\r
372 }else if(isGroupMatched(5)){
\r
373 nameRange.setLastMatchedGroupRange(getMatcher(), 5);
\r
375 if(isGroupMatched(4)){
\r
376 talkNo = parseGroupedInt(4);
\r
379 this.talkParser.parseTalk(talkNo, nameRange);
\r
382 throw buildParseException();
\r
385 lookingAtAffirm(C_DIV_PATTERN);
\r
392 private static final Pattern O_LISTTABLE_PATTERN =
\r
393 compile("<table\u0020class=\"list\">");
\r
394 private static final Pattern ACTIVEVILLAGE =
\r
400 +"<a\u0020href=\"([^\"]*)\">([^<]*)</a>"
\r
401 +"\u0020<strong>\uff08"
\r
402 +"(?:(?:(午前)|(午後))\u0020)?" // AMPM
\r
421 * @throws HtmlParseException パースエラー
\r
423 private void parseTopList() throws HtmlParseException{
\r
424 setContextErrorMessage("lost village list");
\r
426 SeqRange anchorRange = this.rangepool_1;
\r
427 SeqRange villageRange = this.rangepool_2;
\r
429 if( ! findProbe(O_LISTTABLE_PATTERN) ) return;
\r
433 lookingAtAffirm(ACTIVEVILLAGE);
\r
434 if(isGroupMatched(1)) break;
\r
435 anchorRange .setLastMatchedGroupRange(getMatcher(), 2);
\r
436 villageRange.setLastMatchedGroupRange(getMatcher(), 3);
\r
437 int hour = parseGroupedInt(6);
\r
438 if(isGroupMatched(5)){
\r
439 hour = (hour + 12) % 24;
\r
441 int minute = parseGroupedInt(7);
\r
443 VillageState state;
\r
444 if(isGroupMatched(8)){
\r
445 state = VillageState.PROLOGUE;
\r
446 }else if(isGroupMatched(9)){
\r
447 state = VillageState.PROLOGUE;
\r
448 }else if(isGroupMatched(10)){
\r
449 state = VillageState.PROGRESS;
\r
450 }else if(isGroupMatched(11)){
\r
451 state = VillageState.EPILOGUE;
\r
452 }else if(isGroupMatched(12)){
\r
453 state = VillageState.GAMEOVER;
\r
456 throw buildParseException();
\r
463 this.basicHandler.villageRecord(getContent(),
\r
473 private static final Pattern O_LISTLOG_PATTERN =
\r
475 "<a\u0020href=\"(index[^\"]*(?:ready_0|000_ready))\">"
\r
482 * @throws HtmlParseException パースエラー
\r
484 private void parseLogList() throws HtmlParseException{
\r
485 setContextErrorMessage("lost village list");
\r
487 SeqRange anchorRange = this.rangepool_1;
\r
488 SeqRange villageRange = this.rangepool_2;
\r
490 boolean is1st = true;
\r
494 matched = findProbe(O_LISTLOG_PATTERN);
\r
497 matched = lookingAtProbe(O_LISTLOG_PATTERN);
\r
499 if( ! matched ) break;
\r
501 anchorRange .setLastMatchedGroupRange(getMatcher(), 1);
\r
502 villageRange.setLastMatchedGroupRange(getMatcher(), 2);
\r
506 this.basicHandler.villageRecord(getContent(),
\r
510 VillageState.GAMEOVER );
\r
516 private static final Pattern C_BODY_PATTERN =
\r
517 compile("</body>");
\r
518 private static final Pattern C_HTML_PATTERN =
\r
519 compile(SP_I+ "</html>" +SP_I);
\r
523 * @throws HtmlParseException パースエラー
\r
525 private void parseTail() throws HtmlParseException{
\r
526 setContextErrorMessage("lost last part");
\r
528 findAffirm(C_BODY_PATTERN);
\r
531 matchesAffirm(C_HTML_PATTERN);
\r
537 private static final Pattern LISTTITLE_PATTERN =
\r
538 compile("終了した村の記録");
\r
541 * 人狼BBSのページ種別を自動認識しつつパースする。
\r
542 * @param content パース対象の文字列
\r
543 * @throws HtmlParseException パースエラー
\r
545 public void parseAutomatic(DecodedContent content)
\r
546 throws HtmlParseException{
\r
547 setContent(content);
\r
549 this.basicHandler.startParse(getContent());
\r
555 if(lookingAtProbe(LISTTITLE_PATTERN)){
\r
557 this.basicHandler.pageType(PageType.VILLAGELIST_PAGE);
\r
562 if(lookingAtProbe(O_PARAG_PATTERN)){
\r
564 this.basicHandler.pageType(PageType.TOP_PAGE);
\r
567 this.basicHandler.pageType(PageType.PERIOD_PAGE);
\r
568 parseVillageInfo();
\r
576 this.basicHandler.endParse();
\r