OSDN Git Service

Rails 2.3.11
[redminele/redminele.git] / ruby / lib / ruby / gems / 1.8 / gems / actionmailer-2.3.11 / lib / action_mailer / vendor / tmail-1.2.7 / tmail / vendor / rchardet-1.3 / lib / rchardet / universaldetector.rb
diff --git a/ruby/lib/ruby/gems/1.8/gems/actionmailer-2.3.11/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/universaldetector.rb b/ruby/lib/ruby/gems/1.8/gems/actionmailer-2.3.11/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/universaldetector.rb
new file mode 100644 (file)
index 0000000..c8990c6
--- /dev/null
@@ -0,0 +1,168 @@
+# encoding: us-ascii
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Universal charset detector code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 2001
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Jeff Hodges - port to Ruby
+#   Mark Pilgrim - port to Python
+#   Shy Shalom - original C code
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+module CharDet
+  MINIMUM_THRESHOLD = 0.20
+  EPureAscii = 0
+  EEscAscii = 1
+  EHighbyte = 2
+
+  class UniversalDetector
+    attr_accessor :result
+    def initialize
+      @_highBitDetector = /[\x80-\xFF]/
+      @_escDetector = /(\033|\~\{)/
+      @_mEscCharSetProber = nil
+      @_mCharSetProbers = []
+      reset()
+    end
+
+    def reset
+      @result = {'encoding' => nil, 'confidence' => 0.0}
+      @done = false
+      @_mStart = true
+      @_mGotData = false
+      @_mInputState = EPureAscii
+      @_mLastChar = ''
+      if @_mEscCharSetProber
+        @_mEscCharSetProber.reset()
+      end
+      for prober in @_mCharSetProbers
+        prober.reset()
+      end
+    end
+
+    def feed(aBuf)
+      return if @done
+
+      aLen = aBuf.length
+      return if not aLen
+
+      if not @_mGotData
+        # If the data starts with BOM, we know it is UTF
+        if aBuf[0...3] == "\xEF\xBB\xBF"
+          # EF BB BF  UTF-8 with BOM
+          @result = {'encoding' => "UTF-8", 'confidence' => 1.0}
+        elsif aBuf[0...4] == "\xFF\xFE\x00\x00"
+          # FF FE 00 00  UTF-32, little-endian BOM
+          @result = {'encoding' => "UTF-32LE", 'confidence' => 1.0}
+        elsif aBuf[0...4] == "\x00\x00\xFE\xFF"
+          # 00 00 FE FF  UTF-32, big-endian BOM
+          @result = {'encoding' => "UTF-32BE", 'confidence' => 1.0}
+        elsif aBuf[0...4] == "\xFE\xFF\x00\x00"
+          # FE FF 00 00  UCS-4, unusual octet order BOM (3412)
+          @result = {'encoding' => "X-ISO-10646-UCS-4-3412", 'confidence' => 1.0}
+        elsif aBuf[0...4] == "\x00\x00\xFF\xFE"
+          # 00 00 FF FE  UCS-4, unusual octet order BOM (2143)
+          @result = {'encoding' =>  "X-ISO-10646-UCS-4-2143", 'confidence' =>  1.0}
+        elsif aBuf[0...2] == "\xFF\xFE"
+          # FF FE  UTF-16, little endian BOM
+          @result = {'encoding' =>  "UTF-16LE", 'confidence' =>  1.0}
+        elsif aBuf[0...2] == "\xFE\xFF"
+          # FE FF  UTF-16, big endian BOM
+          @result = {'encoding' =>  "UTF-16BE", 'confidence' =>  1.0}
+        end
+      end
+
+      @_mGotData = true
+      if @result['encoding'] and (@result['confidence'] > 0.0)
+        @done = true
+        return
+      end
+
+      if @_mInputState == EPureAscii
+        if @_highBitDetector =~ (aBuf)
+          @_mInputState = EHighbyte
+        elsif (@_mInputState == EPureAscii) and @_escDetector =~ (@_mLastChar + aBuf)
+          @_mInputState = EEscAscii
+        end
+      end
+
+      @_mLastChar = aBuf[-1..-1]
+      if @_mInputState == EEscAscii
+        if not @_mEscCharSetProber
+          @_mEscCharSetProber = EscCharSetProber.new()
+        end
+        if @_mEscCharSetProber.feed(aBuf) == EFoundIt
+          @result = {'encoding' =>  self._mEscCharSetProber.get_charset_name(),
+                     'confidence' =>  @_mEscCharSetProber.get_confidence()
+          }
+          @done = true
+        end
+      elsif @_mInputState == EHighbyte
+        if not @_mCharSetProbers or @_mCharSetProbers.empty?
+          @_mCharSetProbers = [MBCSGroupProber.new(), SBCSGroupProber.new(), Latin1Prober.new()]
+        end
+        for prober in @_mCharSetProbers
+          if prober.feed(aBuf) == EFoundIt
+            @result = {'encoding' =>  prober.get_charset_name(),
+                       'confidence' =>  prober.get_confidence()}
+            @done = true
+            break
+          end
+        end
+      end
+
+    end
+
+    def close
+      return if @done
+      if not @_mGotData
+        $stderr << "no data received!\n" if $debug
+        return
+      end
+      @done = true
+
+      if @_mInputState == EPureAscii
+        @result = {'encoding' => 'ascii', 'confidence' => 1.0}
+        return @result
+      end
+
+      if @_mInputState == EHighbyte
+        confidences = {}
+        @_mCharSetProbers.each{ |prober| confidences[prober] = prober.get_confidence }
+        maxProber = @_mCharSetProbers.max{ |a,b| confidences[a] <=> confidences[b] }
+        if maxProber and maxProber.get_confidence > MINIMUM_THRESHOLD
+          @result = {'encoding' =>  maxProber.get_charset_name(),
+                     'confidence' =>  maxProber.get_confidence()}
+          return @result
+        end
+      end
+
+      if $debug
+        $stderr << "no probers hit minimum threshhold\n" if $debug
+        for prober in @_mCharSetProbers[0]._mProbers
+          next if not prober
+          $stderr << "#{prober.get_charset_name} confidence = #{prober.get_confidence}\n" if $debug
+        end
+      end
+    end
+  end
+end