OSDN Git Service

utf8_lexical.hpp 途中
authorMyun2 <myun2@nwhite.info>
Tue, 3 Aug 2010 13:06:14 +0000 (22:06 +0900)
committerMyun2 <myun2@nwhite.info>
Tue, 3 Aug 2010 13:06:14 +0000 (22:06 +0900)
roast/include/roast/str/multibyte/utf8_lexical.hpp
roast/test/lexical_test/mbstring_test.cpp [new file with mode: 0644]

index c14c1c3..68d98e1 100644 (file)
@@ -6,13 +6,66 @@
 #ifndef __SFJP_ROAST__str__multi_byte__utf8_lexical_HPP__
 #define __SFJP_ROAST__str__multi_byte__utf8_lexical_HPP__
 
+#include "roast/lexical/string_structure.hpp"
+
 namespace roast
 {
        namespace multibyte
        {
                namespace utf8
                {
-                       typedef lexical_rule;
+                       using namespace ::roast::lexical::structure;
+                       
+                       /*
+                       (00-7f)
+                       (c0-df)(80-bf)
+                       (e0-ef)(80-bf)(80-bf)
+                       (f0-f7)(80-bf)(80-bf)(80-bf)
+                       (f8-fb)(80-bf)(80-bf)(80-bf)(80-bf)
+                       (fc-fd)(80-bf)(80-bf)(80-bf)(80-bf)(80-bf)
+                       
+                       U+0000\81c\81cU+007F 0xxxxxxx (00-7f) 07bit 
+                       U+0080\81c\81cU+07FF 110yyyyx 10xxxxxx (c0-df)(80-bf) 11bit 
+                       U+0800\81c\81cU+FFFF 1110yyyy 10yxxxxx 10xxxxxx (e0-ef)(80-bf)(80-bf) 16bit 
+                       U+10000\81c\81cU+1FFFFF 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx (f0-f7)(80-bf)(80-bf)(80-bf) 21bit 
+                       U+200000\81c\81cU+3FFFFFF 111110yy 10yyyxxx 10xxxxxx 10xxxxxx 10xxxxxx (f8-fb)(80-bf)(80-bf)(80-bf)(80-bf) 26bit 
+                       U+4000000\81c\81cU+7FFFFFFF 1111110y 10yyyyxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx (fc-fd)(80-bf)(80-bf)(80-bf)(80-bf)(80-bf) 31bit 
+                       
+                       */
+                       typedef or<
+                               //      1byte (U+0000 - U+007F) -> (00-7f)
+                               int_range< 0x00, 0x7f >,
+                       
+                               //      2byte (U+0080 - U+07FF) -> (c0-df)(80-bf)
+                               seq<
+                                       int_range< 0xc0, 0xdf >,
+                                       int_range< 0x80, 0xbf >
+                               >,
+                       
+                               //      3byte (U+0800 - U+FFFF) -> (e0-ef)(80-bf)(80-bf)
+                               seq<
+                                       int_range< 0xc0, 0xdf >,
+                                       int_range< 0x80, 0xbf >
+                               >,
+                       
+                               //      2byte (U+0080 - U+07FF) -> (c0-df)(80-bf)
+                               seq<
+                                       int_range< 0xc0, 0xdf >,
+                                       int_range< 0x80, 0xbf >
+                               >,
+                       
+                               //      2byte (U+0080 - U+07FF) -> (c0-df)(80-bf)
+                               seq<
+                                       int_range< 0xc0, 0xdf >,
+                                       int_range< 0x80, 0xbf >
+                               >,
+                       
+                               //      2byte (U+0080 - U+07FF) -> (c0-df)(80-bf)
+                               seq<
+                                       int_range< 0xc0, 0xdf >,
+                                       int_range< 0x80, 0xbf >
+                               >,
+                       > lexical_rule;
                }
        }
 }
diff --git a/roast/test/lexical_test/mbstring_test.cpp b/roast/test/lexical_test/mbstring_test.cpp
new file mode 100644 (file)
index 0000000..362371d
--- /dev/null
@@ -0,0 +1,64 @@
+#//include "roast/xml/roast_dom_driver.hpp"
+#include "roast/xml/roast_xml/roast_xml_dom_parser.hpp"
+#include "roast/xml/roast_xml/roast_xml_sax_parser.hpp"
+#include <stdio.h>
+#include <time.h>
+
+using namespace roast;
+using namespace roast::lexical;
+
+#define BUFF_SIZE              200*1024*1024
+
+                               
+       class test
+       {
+       private:
+               int attr_count ;
+               int text_count ;
+               int element_count ;
+       public:
+               void attribute( const sized_ccharbuf &attr_name, const sized_ccharbuf &attr_value )
+               {
+                       attr_count ++;
+               }
+
+               void comment( const sized_ccharbuf &s )
+               {
+               }
+
+               void text( const sized_ccharbuf &s )
+               {
+                       text_count ++;
+               }
+               void start_element( const sized_ccharbuf &s )
+               {
+                       element_count ++;
+               }
+               void end_element(){
+               }
+       };
+
+
+int main()
+{
+       FILE* fp = fopen("temp.xml","r");
+       //FILE* fp = fopen("MIDI\83f\83o\83C\83X\83}\83l\81[\83W\83\833.xml","r");
+       //char work[256];
+       //fread(work,sizeof(work),1,fp);
+       char *work = new char [BUFF_SIZE];
+       fread(work,BUFF_SIZE,1,fp);
+
+       printf("%d\n", clock());
+
+       roast_xml::dom_parser xml(work);
+
+       /*roast_xml::sax_parser<
+               roast_xml::sax_callback_sample> xml(work);
+               //test> xml(work);*/
+       roast_xml::document doc = xml.analyze();
+
+       roast_xml::element *e = xml.get_root_element();
+       printf("%d\n", clock());
+
+       return 0;
+}