OSDN Git Service

scm: replace invalid utf-8 sequences in comments instead of stripping on Ruby 1.8.
authorToshi MARUYAMA <marutosijp2@yahoo.co.jp>
Sat, 9 Apr 2011 09:31:14 +0000 (09:31 +0000)
committerToshi MARUYAMA <marutosijp2@yahoo.co.jp>
Sat, 9 Apr 2011 09:31:14 +0000 (09:31 +0000)
git-svn-id: svn+ssh://rubyforge.org/var/svn/redmine/trunk@5373 e93f8b46-1217-0410-a6f0-8f06a7374b81

app/models/changeset.rb
test/unit/changeset_test.rb

index 9c0dc68..869e9ba 100644 (file)
@@ -255,8 +255,8 @@ class Changeset < ActiveRecord::Base
       str.force_encoding("UTF-8") if str.respond_to?(:force_encoding)
       return str
     end
+    enc = encoding.blank? ? "UTF-8" : encoding
     if str.respond_to?(:force_encoding)
-      enc = encoding.blank? ? "UTF-8" : encoding
       if enc != "UTF-8"
         str.force_encoding(enc)
         str = str.encode("UTF-8", :invalid => :replace,
@@ -269,19 +269,18 @@ class Changeset < ActiveRecord::Base
         end
       end
     else
-      unless encoding.blank? || encoding == 'UTF-8'
-        begin
-          str = Iconv.conv('UTF-8', encoding, str)
-        rescue Iconv::Failure
-          # do nothing here
-        end
-      end
-      # removes invalid UTF8 sequences
+      ic = Iconv.new('UTF-8', enc)
+      txtar = ""
       begin
-        str = Iconv.conv('UTF-8//IGNORE', 'UTF-8', str + '  ')[0..-3]
-      rescue Iconv::InvalidEncoding
-        # "UTF-8//IGNORE" is not supported on some OS
+        txtar += ic.iconv(str)
+      rescue Iconv::IllegalSequence
+        txtar += $!.success
+        str = '?' + $!.failed[1,$!.failed.length]
+        retry
+      rescue
+        txtar += $!.success
       end
+      str = txtar
     end
     str
   end
index 93027d8..8e98b05 100644 (file)
@@ -21,7 +21,8 @@ require File.expand_path('../../test_helper', __FILE__)
 
 class ChangesetTest < ActiveSupport::TestCase
   fixtures :projects, :repositories, :issues, :issue_statuses,
-           :changesets, :changes, :issue_categories, :enumerations, :custom_fields, :custom_values, :users, :members, :member_roles, :trackers
+           :changesets, :changes, :issue_categories, :enumerations,
+           :custom_fields, :custom_values, :users, :members, :member_roles, :trackers
 
   def setup
   end
@@ -250,29 +251,26 @@ class ChangesetTest < ActiveSupport::TestCase
       assert_equal str_utf8, c.comments
   end
 
-  def test_invalid_utf8_sequences_in_comments_should_be_stripped
+  def test_invalid_utf8_sequences_in_comments_should_be_replaced_latin1
       proj = Project.find(3)
       # str = File.read("#{RAILS_ROOT}/test/fixtures/encoding/iso-8859-1.txt")
       str = "Texte encod\xe9 en ISO-8859-1."
       str.force_encoding("ASCII-8BIT") if str.respond_to?(:force_encoding)
       r = Repository::Bazaar.create!(
-            :project => proj, :url => '/tmp/test/bazaar',
+            :project => proj,
+            :url => '/tmp/test/bazaar',
             :log_encoding => 'UTF-8' )
       assert r
-      c = Changeset.new(:repository => r,
+      c = Changeset.new(:repository   => r,
                         :committed_on => Time.now,
-                        :revision => '123',
-                        :scmid => '12345',
-                        :comments => str)
+                        :revision     => '123',
+                        :scmid        => '12345',
+                        :comments     => str)
       assert( c.save )
-      if str.respond_to?(:force_encoding)
-        assert_equal "Texte encod? en ISO-8859-1.", c.comments
-      else
-        assert_equal "Texte encod en ISO-8859-1.", c.comments
-      end
+      assert_equal "Texte encod? en ISO-8859-1.", c.comments
   end
 
-  def test_invalid_utf8_sequences_in_comments_should_be_stripped_ja_jis
+  def test_invalid_utf8_sequences_in_comments_should_be_replaced_ja_jis
       proj = Project.find(3)
       str = "test\xb5\xfetest\xb5\xfe"
       if str.respond_to?(:force_encoding)
@@ -280,7 +278,7 @@ class ChangesetTest < ActiveSupport::TestCase
       end
       r = Repository::Bazaar.create!(
             :project => proj,
-            :url => '/tmp/test/bazaar',
+            :url     => '/tmp/test/bazaar',
             :log_encoding => 'ISO-2022-JP' )
       assert r
       c = Changeset.new(:repository   => r,
@@ -289,11 +287,7 @@ class ChangesetTest < ActiveSupport::TestCase
                         :scmid        => '12345',
                         :comments     => str)
       assert( c.save )
-      if str.respond_to?(:force_encoding)
-        assert_equal "test??test??", c.comments
-      else
-        assert_equal "testtest", c.comments
-      end
+      assert_equal "test??test??", c.comments
   end
 
   def test_comments_should_be_converted_all_latin1_to_utf8