OSDN Git Service

Initial contents of nt2chview
[nt2chview/stable.git] / NT2chCtrl45 / html / HtmlElement_2chparser.cs
1 using System;\r
2 using System.Collections.Generic;\r
3 using System.Text;\r
4 using NT2chObject;\r
5 \r
6 namespace NT2chCtrl.html\r
7 {\r
8     public partial class HtmlElement\r
9     {\r
10         private bool parse2chRes(string source, IAmbiguousSearch search)\r
11         {\r
12             //int idx1 = 0;\r
13             int idx2 = 0;\r
14             int idx3, idx4;\r
15             int length = source.Length;\r
16             int textStart = 0;\r
17             //StringElement sElem = null;\r
18             HtmlElement hElem = null;\r
19 \r
20             //StringBuilder sb = new StringBuilder(source.Length);\r
21 \r
22             for (int idx1 = 0; idx1 < length; idx1++)\r
23             {\r
24                 idx2 = source.IndexOf('<', idx1);\r
25                 if (idx2 < 0)\r
26                     break;\r
27                 if (idx2 < length - 1)\r
28                 {\r
29 \r
30                     switch (source[idx2 + 1])\r
31                     {\r
32                         case 'a':\r
33                         case 'A':\r
34                             idx3 = source.IndexOf(" href=", idx2 + 2, StringComparison.OrdinalIgnoreCase);\r
35                             if (idx3 == idx2 + 2)\r
36                             {\r
37                                 idx4 = source.IndexOf(">", idx3 + 6, StringComparison.OrdinalIgnoreCase);\r
38                                 if (idx4 > 0)\r
39                                 {\r
40                                     //sElem = new StringElement(this, source.Substring(textStart, idx2 - textStart));\r
41                                     parse2chRes2(this, source, textStart, idx2, search);\r
42                                     textStart = idx4 + 1;\r
43                                     idx1 = textStart - 1;\r
44                                 }\r
45                             }\r
46                             break;\r
47                         case 'b':\r
48                         case 'B':\r
49                             idx3 = source.IndexOf("r", idx2 + 2, StringComparison.OrdinalIgnoreCase);\r
50                             if (idx3 == idx2 + 2)\r
51                             {\r
52                                 idx4 = source.IndexOf(">", idx3 + 1, StringComparison.OrdinalIgnoreCase);\r
53                                 if (idx4 > 0)\r
54                                 {\r
55                                     //sElem = new StringElement(this, source.Substring(textStart, idx2 - textStart));\r
56                                     parse2chRes2(this, source, textStart, idx2, search);\r
57                                     hElem = new HtmlElement(this, "br");\r
58                                     hElem.setClosed(true);\r
59                                     textStart = idx4 + 1;\r
60                                     idx1 = textStart - 1;\r
61                                 }\r
62                             }\r
63                             break;\r
64                         case '/':\r
65                             idx3 = source.IndexOf("a>", idx2 + 2, StringComparison.OrdinalIgnoreCase);\r
66                             if (idx3 == idx2 + 2)\r
67                             {\r
68                                     //sElem = new StringElement(this, source.Substring(textStart, idx2 - textStart));\r
69                                     parse2chRes2(this, source, textStart, idx2, search);\r
70                                     textStart = idx3 + 2;\r
71                                     idx1 = textStart - 1;\r
72                              }\r
73                             break;\r
74                         default:\r
75                             break;\r
76                     }\r
77                 }\r
78                 else\r
79                 {\r
80                     break;\r
81                 }\r
82 \r
83             } //while (textStart < length);\r
84 \r
85             if (textStart < length)\r
86             {\r
87                 //sElem = new StringElement(this, source.Substring(textStart, length - textStart));\r
88                 parse2chRes2(this, source, textStart, length, search);\r
89             }\r
90             return true;\r
91         }\r
92 \r
93         private static int findNonDisplayAscii(string source, int start, int end)\r
94         {\r
95             int length = source.Length;\r
96             length = Math.Min(length, end);\r
97             for (int i = start; i < length; i++)\r
98             {\r
99                 char c = source[i];\r
100                 if(c <= ' ' || c >= 0x7f){\r
101                     return i;\r
102                 }\r
103             }\r
104             return length;\r
105         }\r
106 \r
107         private bool parseEmphasisText(HtmlElement pElem, string source, int start, int end, IAmbiguousSearch search)\r
108         {\r
109             HtmlElement hElem;\r
110             HtmlAttribute hAttr;\r
111             StringElement sElem;\r
112 \r
113             int textLength = end - start;\r
114 \r
115             string text = source.Substring(start, textLength);\r
116 \r
117             if (search == null)\r
118             {\r
119                 sElem = new StringElement(pElem, text);\r
120                 return true;\r
121             }\r
122             int idx = 0;\r
123             int findIdx, endIdx;\r
124             do{\r
125                 if (!search.match(text, idx, out findIdx, out endIdx))\r
126                     break;\r
127                 if (idx < findIdx)\r
128                 {\r
129                     sElem = new StringElement(pElem,\r
130                                     text.Substring(idx, findIdx - idx));\r
131                 }\r
132                 hElem = new HtmlElement(pElem, "span");\r
133                 hAttr = new HtmlAttribute("emphasis");\r
134                 hElem.addAttribute(hAttr);\r
135                 sElem = new StringElement(hElem,\r
136                                 text.Substring(findIdx, endIdx - findIdx));\r
137                 hElem.setClosed(true);\r
138                 idx = endIdx;\r
139             }while(idx < textLength);\r
140 \r
141             if (idx < textLength)\r
142             {\r
143                 sElem = new StringElement(pElem,\r
144                                 text.Substring(idx, textLength - idx));\r
145             }\r
146             return true;\r
147         }\r
148 \r
149         private bool parse2chRes2(HtmlElement pElem, string source, int start, int end, IAmbiguousSearch search)\r
150         {\r
151             bool bRet = false;\r
152             int textStart = start;\r
153             int textEnd = 0;\r
154             int state = 0;\r
155             int idx1, idx2;\r
156             StringElement sElem;\r
157             HtmlElement hElem;\r
158             HtmlAttribute hAttr;\r
159             string sLink;\r
160             string prefix;\r
161 \r
162             for (int i = start; i < end; i++)\r
163             {\r
164                 switch (source[i])\r
165                 {\r
166                     case '>':\r
167                     case '>':\r
168                         if (state == 0)\r
169                         {\r
170                             textEnd = i;\r
171                             state = 1;\r
172                         }\r
173                         else if (state == 1)\r
174                         {\r
175                             textEnd = i - 1;\r
176                         }\r
177                         break;\r
178                     case '0':\r
179                     case '1':\r
180                     case '2':\r
181                     case '3':\r
182                     case '4':\r
183                     case '5':\r
184                     case '6':\r
185                     case '7':\r
186                     case '8':\r
187                     case '9':\r
188                         if (state == 1 || state == 2 || state == 3)\r
189                             state = 2;\r
190                         break;\r
191                     case '-':\r
192                     case ',':\r
193                         if (state == 2)\r
194                             state = 3;\r
195                         else\r
196                             state = 0;\r
197                         break;\r
198                     case 'h':\r
199                     case 'H':\r
200                     case 't':\r
201                     case 'T':\r
202                         if (state == 2 || state == 3)\r
203                         {\r
204 \r
205                             //sElem = new StringElement(this,\r
206                             //    source.Substring(textStart, textEnd - textStart));\r
207                             parseEmphasisText(this, source, textStart, textEnd, search);\r
208                             hElem = new HtmlElement(this, "span");\r
209                             hAttr = new HtmlAttribute("res-link");\r
210                             hElem.addAttribute(hAttr);\r
211                             hElem.setClosed(true);\r
212                             if (state == 3)\r
213                                 textStart = i - 2;\r
214                             else if (state == 2)\r
215                                 textStart = i - 1;\r
216                             sElem = new StringElement(hElem, source.Substring(textEnd, textStart - textEnd));\r
217                         }\r
218                         idx1 = source.IndexOf("ttp://", i, StringComparison.OrdinalIgnoreCase);\r
219                         if (idx1 == i || idx1 == (i + 1))\r
220                         {\r
221                             if (idx1 == i)\r
222                                 prefix = "h";\r
223                             else\r
224                                 prefix = string.Empty;\r
225                             idx2 = findNonDisplayAscii(source, idx1 + 6, end);\r
226                             sLink = source.Substring(i, idx2 - i);\r
227                             if (chkGraphicLink(sLink))\r
228                             {\r
229                                 hElem = new HtmlElement(this, "img");\r
230                                 hAttr = new HtmlAttribute("src", sLink);\r
231                                 hElem.addAttribute(hAttr);\r
232                                 hElem.setClosed(true);\r
233                             }\r
234                             hElem = new HtmlElement(this, "a");\r
235                             hAttr = new HtmlAttribute("href", prefix+sLink);\r
236                             hElem.addAttribute(hAttr);\r
237                             hElem.setClosed(true);\r
238                             sElem = new StringElement(hElem, sLink);\r
239                             textStart = idx2;\r
240                             i = idx2 - 1;\r
241                         }\r
242                         else\r
243                         {\r
244                             idx1 = source.IndexOf("ttps://", i, StringComparison.OrdinalIgnoreCase);\r
245                             if (idx1 == i || idx1 == (i + 1))\r
246                             {\r
247                                 if (idx1 == i)\r
248                                     prefix = "h";\r
249                                 else\r
250                                     prefix = string.Empty;\r
251                                 idx2 = findNonDisplayAscii(source, idx1 + 7, end);\r
252                                 sLink = source.Substring(i, idx2 - i);\r
253                                 if (chkGraphicLink(sLink))\r
254                                 {\r
255                                     hElem = new HtmlElement(this, "img");\r
256                                     hAttr = new HtmlAttribute("src", sLink);\r
257                                     hElem.addAttribute(hAttr);\r
258                                     hElem.setClosed(true);\r
259                                 }\r
260                                 hElem = new HtmlElement(this, "a");\r
261                                 hAttr = new HtmlAttribute("href", prefix+sLink);\r
262                                 hElem.addAttribute(hAttr);\r
263                                 hElem.setClosed(true);\r
264                                 sElem = new StringElement(hElem, sLink);\r
265                                 textStart = idx2;\r
266                                 i = idx2 - 1;\r
267                             }\r
268                             else\r
269                             {\r
270                             }\r
271                         }\r
272                         state = 0;\r
273                         break;\r
274                     case 's':\r
275                         if (state == 2 || state == 3)\r
276                         {\r
277 \r
278                             //sElem = new StringElement(this,\r
279                             //    source.Substring(textStart, textEnd - textStart));\r
280                             parseEmphasisText(this, source, textStart, textEnd, search);\r
281                             hElem = new HtmlElement(this, "span");\r
282                             hAttr = new HtmlAttribute("res-link");\r
283                             hElem.addAttribute(hAttr);\r
284                             hElem.setClosed(true);\r
285                             if (state == 3)\r
286                                 textStart = i - 2;\r
287                             else if (state == 2)\r
288                                 textStart = i - 1;\r
289                             sElem = new StringElement(hElem, source.Substring(textEnd, textStart - textEnd));\r
290                             state = 0;\r
291                         }\r
292                         idx1 = source.IndexOf("sssp://", i, StringComparison.OrdinalIgnoreCase);\r
293                         if (idx1 == i || idx1 == (i + 1))\r
294                         {\r
295                             idx2 = findNonDisplayAscii(source, idx1 + 7, end);\r
296                             sLink = "http" + source.Substring(i+4, idx2 - i - 4);\r
297                             if (chkGraphicLink(sLink))\r
298                             {\r
299                                 hElem = new HtmlElement(this, "img");\r
300                                 hAttr = new HtmlAttribute("src", sLink);\r
301                                 hElem.addAttribute(hAttr);\r
302                                 hAttr = new HtmlAttribute("sssp");\r
303                                 hElem.addAttribute(hAttr);\r
304                                 hElem.setClosed(true);\r
305                                 textStart = idx2;\r
306                                 i = idx2 - 1;\r
307                             }\r
308                         }\r
309                         break;\r
310                     default:\r
311                         if (state == 2 || state == 3)\r
312                         {\r
313                             //sElem = new StringElement(this,\r
314                             //    source.Substring(textStart, textEnd - textStart));\r
315                             parseEmphasisText(this, source, textStart, textEnd, search);\r
316                             hElem = new HtmlElement(this, "span");\r
317                             hAttr = new HtmlAttribute("res-link");\r
318                             hElem.addAttribute(hAttr);\r
319                             hElem.setClosed(true);\r
320                             if (state == 3)\r
321                                 textStart = i - 2;\r
322                             else if (state == 2)\r
323                                 textStart = i - 1;\r
324                             sElem = new StringElement(hElem, source.Substring(textEnd, textStart - textEnd));\r
325                             state = 0;\r
326                         }\r
327                         break;\r
328                 }\r
329             }\r
330             if (textStart < end)\r
331             {\r
332                 if (state == 2 || state == 3)\r
333                 {\r
334                     //sElem = new StringElement(this,\r
335                     //    source.Substring(textStart, textEnd - textStart));\r
336                     parseEmphasisText(this, source, textStart, textEnd, search);\r
337                     hElem = new HtmlElement(this, "span");\r
338                     hAttr = new HtmlAttribute("res-link");\r
339                     hElem.addAttribute(hAttr);\r
340                     hElem.setClosed(true);\r
341                     if (state == 3)\r
342                         textStart = end - 1;\r
343                     else if (state == 2)\r
344                         textStart = end;\r
345                     sElem = new StringElement(hElem, source.Substring(textEnd, textStart - textEnd));\r
346                     if (state == 3)\r
347                     {\r
348                         sElem = new StringElement(this, source.Substring(end -1, 1));\r
349                     }\r
350                 }\r
351                 else\r
352                 {\r
353                     //sElem = new StringElement(this, source.Substring(textStart, end - textStart));\r
354                     parseEmphasisText(this, source, textStart, end, search);\r
355                 }\r
356             }\r
357             return bRet;\r
358         }\r
359 \r
360         bool chkGraphicLink(string src)\r
361         {\r
362             //if (!chkShowThumbnail())\r
363             //    return false;\r
364 \r
365             if (src == null || src.Length < 4)\r
366                 return false;\r
367             int idx = src.LastIndexOf('.');\r
368             if (idx < 0)\r
369                 return false;\r
370 \r
371             string suffix = src.Substring(idx + 1).ToLower();\r
372             switch (suffix)\r
373             {\r
374                 case "png":\r
375                 case "jpg":\r
376                 case "jpeg":\r
377                 case "bmp":\r
378                 case "gif":\r
379                     return true;\r
380                 default:\r
381                     return false;\r
382             }\r
383         }\r
384 \r
385         private bool parse2chRes_old(string source)\r
386         {\r
387 \r
388             if (source == null || source.Length == 0)\r
389                 return false;\r
390 \r
391             int textStart = 0;\r
392             int state = 0;\r
393             string tagName, attrName=null, attrVal;\r
394             StringElement sElem;\r
395             HtmlAttribute hAttr;\r
396             HtmlElement hElem = null;\r
397             bool dquote = false;\r
398             bool squote = false;\r
399 \r
400 \r
401             int length = source.Length;\r
402             for (int i = 0; i < length; i++)\r
403             {\r
404                 switch (HtmlParser.getCharToken(source[i]))\r
405                 {\r
406                     case  HtmlParser.CHAR_TOKEN.LT:\r
407                         if (state == 0 && textStart < i)\r
408                         {\r
409                             sElem = new StringElement(\r
410                                 this, source.Substring(textStart, i - textStart));\r
411                         }\r
412                         state = 1;\r
413                         textStart = length;\r
414                         break;\r
415                     case HtmlParser.CHAR_TOKEN.SLASH:\r
416                         if (state == 0)\r
417                             break;\r
418                         else if (state == 6 && (dquote || squote))\r
419                         {\r
420                             break;\r
421                         }\r
422                         else if (state == 1)\r
423                         {\r
424                             state = 8;\r
425                         }\r
426                         else\r
427                         {                            \r
428                             state = 7;\r
429                         }\r
430                         break;\r
431                     case HtmlParser.CHAR_TOKEN.GT:\r
432                         if (state == 0){\r
433                             break;\r
434                         }if (state == 1 || state == 8){\r
435                             return false;\r
436                         }\r
437                         else if (state == 2)\r
438                         {\r
439                             tagName = source.Substring(textStart, i - textStart);\r
440                             hElem = new HtmlElement(this, tagName);\r
441                         }\r
442                         else if (state == 7)\r
443                         {\r
444                             hElem.setClosed(true);\r
445                         }\r
446                         else if (state == 9)\r
447                         {\r
448                             tagName = source.Substring(textStart, i - textStart);\r
449                             int count = mChildren.Count;\r
450                             for (int j = count - 1; j >= 0; j--)\r
451                             {\r
452                                 HtmlElement child = mChildren[j];\r
453                                 if (child.Closed())\r
454                                     continue;\r
455 \r
456                                 if (tagName.Equals(child.getTagName()))\r
457                                 {\r
458                                     child.setClosed(true);\r
459                                     if (j < (count-1))\r
460                                     {\r
461                                         child.mChildren.AddRange(\r
462                                             mChildren.GetRange(j+1, count - 1 - j));\r
463                                         mChildren.RemoveRange(j+1, count - 1 - j);\r
464                                     }\r
465                                     break;\r
466                                 }\r
467                             }\r
468                         }\r
469                         hElem = null;\r
470                         state = 0;\r
471                         textStart = i + 1;\r
472                         break;\r
473                     case  HtmlParser.CHAR_TOKEN.WHITESPACE:\r
474                     case  HtmlParser.CHAR_TOKEN.NL:\r
475                         if (state == 1)\r
476                             return false;\r
477                         else if (state == 2)\r
478                         {\r
479                             tagName = source.Substring(textStart, i - textStart);\r
480                             hElem = new HtmlElement(this, tagName);\r
481                             state = 3;\r
482                         }\r
483                         else if (state == 4)\r
484                         {\r
485                             attrName = source.Substring(textStart, i - textStart);\r
486                             state = 5;\r
487                         }\r
488                         else if (state == 5)\r
489                         {\r
490                             hAttr = new HtmlAttribute(attrName);\r
491                             hElem.addAttribute(hAttr);\r
492                             state = 3;\r
493                             dquote = squote = false;\r
494                         }\r
495                         else if (state == 6)\r
496                         {\r
497                             attrVal = source.Substring(textStart, i - textStart);\r
498                             hAttr = new HtmlAttribute(attrName, attrVal);\r
499                             hElem.addAttribute(hAttr);\r
500                             state = 3;\r
501                             dquote = squote = false;\r
502                         }\r
503                         break;\r
504                     case HtmlParser.CHAR_TOKEN.EQUAL:\r
505                         if (state != 0 && state != 4 && state != 5)\r
506                             return false;\r
507                         if (state == 0)\r
508                             break;\r
509                         if (state == 4)\r
510                         {\r
511                             attrName = source.Substring(textStart, i - textStart);\r
512                         }\r
513                         textStart = i + 1;\r
514                         state = 6;\r
515                         break;\r
516                     case HtmlParser.CHAR_TOKEN.DQUOTE:\r
517                         if (state == 0)\r
518                             break;\r
519                         if(state != 6)\r
520                             return false;\r
521                         if (squote)\r
522                             break;\r
523                         if (dquote)\r
524                         {\r
525                             attrVal = source.Substring(textStart, i - textStart);\r
526                             hAttr = new HtmlAttribute(attrName, attrVal);\r
527                             hElem.addAttribute(hAttr);\r
528                             state = 3;\r
529                             dquote = false;\r
530                         }\r
531                         else\r
532                         {\r
533                             dquote = true;\r
534                             textStart = i + 1;\r
535                         }\r
536                         break;\r
537                     case HtmlParser.CHAR_TOKEN.SQUOTE:\r
538                         if (state == 0)\r
539                             break;\r
540                         if(state != 6)\r
541                             return false;\r
542                         if (dquote)\r
543                             break;\r
544                         if (squote)\r
545                         {\r
546                             attrVal = source.Substring(textStart, i - textStart);\r
547                             hAttr = new HtmlAttribute(attrName, attrVal);\r
548                             hElem.addAttribute(hAttr);\r
549                             state = 3;\r
550                             squote = false;\r
551                         }\r
552                         else\r
553                         {\r
554                             squote = true;\r
555                             textStart = i + 1;\r
556                         }\r
557                         break;\r
558                     case HtmlParser.CHAR_TOKEN.ALPHA:\r
559                         if(state == 0)\r
560                             break;\r
561                         else if (state == 1)\r
562                         {\r
563                             state = 2;\r
564                             textStart = i;\r
565                         }\r
566                         else if (state == 3)\r
567                         {\r
568                             textStart = i;\r
569                             state = 4;\r
570                         }\r
571                         else if (state == 8)\r
572                         {\r
573                             textStart = i;\r
574                             state = 9;\r
575                         }\r
576                         else if(state == 7)\r
577                         {\r
578                             return false;\r
579                         }\r
580                         break;\r
581                     default:\r
582                         if(state == 7)\r
583                         {\r
584                             return false;\r
585                         }\r
586                         break;\r
587                 }\r
588             }\r
589             if (state == 0 && textStart < length)\r
590             {\r
591                 sElem = new StringElement(\r
592                     this, source.Substring(textStart, length - textStart));\r
593             }\r
594             return false;\r
595         }\r
596     }\r
597 }\r