Merge "Implement (but @hide) java.text.Normalizer from Java 6."

author Elliott Hughes <enh@google.com>

Tue, 2 Mar 2010 04:07:07 +0000 (20:07 -0800)

committer Android (Google) Code Review <android-gerrit@google.com>

Tue, 2 Mar 2010 04:07:07 +0000 (20:07 -0800)
author Elliott Hughes <enh@google.com>
Tue, 2 Mar 2010 04:07:07 +0000 (20:07 -0800)
committer Android (Google) Code Review <android-gerrit@google.com>
Tue, 2 Mar 2010 04:07:07 +0000 (20:07 -0800)
diff --git a/libcore/xml/src/main/java/org/apache/harmony/xml/dom/CDATASectionImpl.java b/libcore/xml/src/main/java/org/apache/harmony/xml/dom/CDATASectionImpl.java

index b28c9da..7d122f2 100644 (file)
--- a/libcore/xml/src/main/java/org/apache/harmony/xml/dom/CDATASectionImpl.java
+++ b/libcore/xml/src/main/java/org/apache/harmony/xml/dom/CDATASectionImpl.java
@@ -29,7 +29,7 @@ import org.w3c.dom.Node;
   * the DOM implementation can easily access them while maintaining the DOM tree
   * structure.
   */
-public class CDATASectionImpl extends TextImpl implements CDATASection {
+public final class CDATASectionImpl extends TextImpl implements CDATASection {
  
      public CDATASectionImpl(DocumentImpl document, String data) {
          super(document, data);
@@ -45,4 +45,43 @@ public class CDATASectionImpl extends TextImpl implements CDATASection {
          return Node.CDATA_SECTION_NODE;
      }
  
+    /**
+     * Splits this CDATA node into parts that do not contain a "]]>" sequence.
+     * Any newly created nodes will be inserted before this node.
+     */
+    public void split() {
+        if (!needsSplitting()) {
+            return;
+        }
+        
+        Node parent = getParentNode();
+        String[] parts = getData().split("\\]\\]>");
+        parent.insertBefore(new CDATASectionImpl(document, parts[0] + "]]"), this);
+        for (int p = 1; p < parts.length - 1; p++) {
+            parent.insertBefore(new CDATASectionImpl(document, ">" + parts[p] + "]]"), this);
+        }
+        setData(">" + parts[parts.length - 1]);
+    }
+
+    /**
+     * Returns true if this CDATA section contains the illegal character
+     * sequence "]]>". Such nodes must be {@link #split} before they are
+     * serialized.
+     */
+    public boolean needsSplitting() {
+        return buffer.indexOf("]]>") != -1;
+    }
+
+    /**
+     * Replaces this node with a semantically equivalent text node. This node
+     * will be removed from the DOM tree and the new node inserted in its place.
+     *
+     * @return the replacement node.
+     */
+    public TextImpl replaceWithText() {
+        TextImpl replacement = new TextImpl(document, getData());
+        parent.insertBefore(replacement, this);
+        parent.removeChild(this);
+        return replacement;
+    }
  }
diff --git a/libcore/xml/src/main/java/org/apache/harmony/xml/dom/CharacterDataImpl.java b/libcore/xml/src/main/java/org/apache/harmony/xml/dom/CharacterDataImpl.java

index 6354747..2d4c3b4 100644 (file)
--- a/libcore/xml/src/main/java/org/apache/harmony/xml/dom/CharacterDataImpl.java
+++ b/libcore/xml/src/main/java/org/apache/harmony/xml/dom/CharacterDataImpl.java
@@ -32,7 +32,7 @@ import org.w3c.dom.DOMException;
  public abstract class CharacterDataImpl extends LeafNodeImpl implements
          CharacterData {
  
-    private StringBuffer buffer;
+    protected StringBuffer buffer;
  
      CharacterDataImpl(DocumentImpl document, String data) {
          super(document);
diff --git a/libcore/xml/src/main/java/org/apache/harmony/xml/dom/CommentImpl.java b/libcore/xml/src/main/java/org/apache/harmony/xml/dom/CommentImpl.java

index 2d4a9c5..5f8a4e0 100644 (file)
--- a/libcore/xml/src/main/java/org/apache/harmony/xml/dom/CommentImpl.java
+++ b/libcore/xml/src/main/java/org/apache/harmony/xml/dom/CommentImpl.java
@@ -29,7 +29,7 @@ import org.w3c.dom.Node;
   * the DOM implementation can easily access them while maintaining the DOM tree
   * structure.
   */
-public class CommentImpl extends CharacterDataImpl implements Comment {
+public final class CommentImpl extends CharacterDataImpl implements Comment {
  
      CommentImpl(DocumentImpl document, String data) {
          super(document, data);
@@ -45,4 +45,11 @@ public class CommentImpl extends CharacterDataImpl implements Comment {
          return Node.COMMENT_NODE;
      }
  
+    /**
+     * Returns true if this comment contains the illegal character sequence
+     * "--". Such nodes may not be serialized.
+     */
+    public boolean containsDashDash() {
+        return buffer.indexOf("--") != -1;
+    }
  }
diff --git a/libcore/xml/src/main/java/org/apache/harmony/xml/dom/DOMConfigurationImpl.java b/libcore/xml/src/main/java/org/apache/harmony/xml/dom/DOMConfigurationImpl.java

index 2f57a4c..1a8acbc 100644 (file)
--- a/libcore/xml/src/main/java/org/apache/harmony/xml/dom/DOMConfigurationImpl.java
+++ b/libcore/xml/src/main/java/org/apache/harmony/xml/dom/DOMConfigurationImpl.java
@@ -16,10 +16,14 @@
  
  package org.apache.harmony.xml.dom;
  
+import org.apache.xml.serializer.dom3.DOMErrorImpl;
  import org.w3c.dom.DOMConfiguration;
+import org.w3c.dom.DOMError;
  import org.w3c.dom.DOMErrorHandler;
  import org.w3c.dom.DOMException;
  import org.w3c.dom.DOMStringList;
+import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Node;
  
  import java.util.Map;
  import java.util.TreeMap;
@@ -368,4 +372,120 @@ public final class DOMConfigurationImpl implements DOMConfiguration {
              }
          };
      }
+
+    public void normalize(Node node) {
+        /*
+         * Since we don't validate, this code doesn't take into account the
+         * following "supported" parameters: datatype-normalization, entities,
+         * schema-location, schema-type, or validate.
+         *
+         * TODO: normalize namespaces
+         */
+
+        switch (node.getNodeType()) {
+            case Node.CDATA_SECTION_NODE:
+                CDATASectionImpl cdata = (CDATASectionImpl) node;
+                if (cdataSections) {
+                    if (cdata.needsSplitting()) {
+                        if (splitCdataSections) {
+                            cdata.split();
+                            report(DOMError.SEVERITY_WARNING, "cdata-sections-splitted");
+                        } else {
+                            report(DOMError.SEVERITY_ERROR, "wf-invalid-character");
+                        }
+                    }
+                    checkTextValidity(cdata.buffer);
+                    break;
+                }
+                node = cdata.replaceWithText();
+                // fall through
+
+            case Node.TEXT_NODE:
+                TextImpl text = (TextImpl) node;
+                text = text.minimize();
+                if (text != null) {
+                    checkTextValidity(text.buffer);
+                }
+                break;
+
+            case Node.COMMENT_NODE:
+                CommentImpl comment = (CommentImpl) node;
+                if (!comments) {
+                    comment.getParentNode().removeChild(comment);
+                    break;
+                }
+                if (comment.containsDashDash()) {
+                    report(DOMError.SEVERITY_ERROR, "wf-invalid-character");
+                }
+                checkTextValidity(comment.buffer);
+                break;
+
+            case Node.PROCESSING_INSTRUCTION_NODE:
+                checkTextValidity(((ProcessingInstructionImpl) node).getData());
+                break;
+
+            case Node.ATTRIBUTE_NODE:
+                checkTextValidity(((AttrImpl) node).getValue());
+                break;
+
+            case Node.ELEMENT_NODE:
+                ElementImpl element = (ElementImpl) node;
+                NamedNodeMap attributes = element.getAttributes();
+                for (int i = 0; i < attributes.getLength(); i++) {
+                    normalize(attributes.item(i));
+                }
+                // fall through
+
+            case Node.DOCUMENT_NODE:
+            case Node.DOCUMENT_FRAGMENT_NODE:
+                Node next;
+                for (Node child = node.getFirstChild(); child != null; child = next) {
+                    // lookup next eagerly because normalize() may remove its subject
+                    next = child.getNextSibling();
+                    normalize(child);
+                }
+                break;
+
+            case Node.NOTATION_NODE:
+            case Node.DOCUMENT_TYPE_NODE:
+            case Node.ENTITY_NODE:
+            case Node.ENTITY_REFERENCE_NODE:
+                break;
+
+            default:
+                throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
+                        "Unsupported node type " + node.getNodeType());
+        }
+    }
+
+    private void checkTextValidity(CharSequence s) {
+        if (wellFormed && !isValid(s)) {
+            report(DOMError.SEVERITY_ERROR, "wf-invalid-character");
+        }
+    }
+
+    /**
+     * Returns true if all of the characters in the text are permitted for use
+     * in XML documents.
+     */
+    private boolean isValid(CharSequence text) {
+        for (int i = 0; i < text.length(); i++) {
+            char c = text.charAt(i);
+            // as defined by http://www.w3.org/TR/REC-xml/#charsets.
+            boolean valid = c == 0x9 || c == 0xA || c == 0xD
+                    || (c >= 0x20 && c <= 0xd7ff)
+                    || (c >= 0xe000 && c <= 0xfffd);
+            if (!valid) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    private void report(short severity, String type) {
+        if (errorHandler != null) {
+            // TODO: abort if handleError returns false
+            errorHandler.handleError(new DOMErrorImpl(severity, type, type));
+        }
+    }
  }
diff --git a/libcore/xml/src/main/java/org/apache/harmony/xml/dom/DocumentImpl.java b/libcore/xml/src/main/java/org/apache/harmony/xml/dom/DocumentImpl.java

index 035e1bb..b009128 100644 (file)
--- a/libcore/xml/src/main/java/org/apache/harmony/xml/dom/DocumentImpl.java
+++ b/libcore/xml/src/main/java/org/apache/harmony/xml/dom/DocumentImpl.java
@@ -44,10 +44,10 @@ import org.w3c.dom.Text;
   * the DOM implementation can easily access them while maintaining the DOM tree
   * structure.
   */
-public class DocumentImpl extends InnerNodeImpl implements Document {
+public final class DocumentImpl extends InnerNodeImpl implements Document {
  
      private DOMImplementation domImplementation;
-    private DOMConfiguration domConfiguration;
+    private DOMConfigurationImpl domConfiguration;
  
      /*
       * The default values of these fields are specified by the Document
@@ -369,7 +369,12 @@ public class DocumentImpl extends InnerNodeImpl implements Document {
      }
  
      public void normalizeDocument() {
-        throw new UnsupportedOperationException(); // TODO
+        Element root = getDocumentElement();
+        if (root == null) {
+            return;
+        }
+
+        ((DOMConfigurationImpl) getDomConfig()).normalize(root);
      }
  
      public Node renameNode(Node n, String namespaceURI, String qualifiedName)
diff --git a/libcore/xml/src/main/java/org/apache/harmony/xml/dom/InnerNodeImpl.java b/libcore/xml/src/main/java/org/apache/harmony/xml/dom/InnerNodeImpl.java

index 9cee352..fa75e21 100644 (file)
--- a/libcore/xml/src/main/java/org/apache/harmony/xml/dom/InnerNodeImpl.java
+++ b/libcore/xml/src/main/java/org/apache/harmony/xml/dom/InnerNodeImpl.java
@@ -19,7 +19,6 @@ package org.apache.harmony.xml.dom;
  import org.w3c.dom.DOMException;
  import org.w3c.dom.Node;
  import org.w3c.dom.NodeList;
-import org.w3c.dom.Text;
  
  import java.util.ArrayList;
  import java.util.List;
@@ -154,29 +153,14 @@ public abstract class InnerNodeImpl extends LeafNodeImpl {
       */
      @Override
      public final void normalize() {
-        Text next = null; // null if next doesn't exist or is not a TEXT_NODE
-        for (int i = children.size() - 1; i >= 0; i--) {
-            Node node = children.get(i);
+        Node next;
+        for (Node node = getFirstChild(); node != null; node = next) {
+            next = node.getNextSibling();
              node.normalize();
  
-            if (node.getNodeType() != Node.TEXT_NODE) {
-                next = null;
-                continue;
+            if (node.getNodeType() == Node.TEXT_NODE) {
+                ((TextImpl) node).minimize();
              }
-
-            Text text = (Text) node;
-
-            if (text.getLength() == 0) {
-                removeChild(text);
-                continue;
-            }
-
-            if (next != null) {
-                text.appendData(next.getData());
-                removeChild(next);
-            }
-
-            next = text;
          }
      }
  
diff --git a/libcore/xml/src/main/java/org/apache/harmony/xml/dom/ProcessingInstructionImpl.java b/libcore/xml/src/main/java/org/apache/harmony/xml/dom/ProcessingInstructionImpl.java

index 179b33c..115245d 100644 (file)
--- a/libcore/xml/src/main/java/org/apache/harmony/xml/dom/ProcessingInstructionImpl.java
+++ b/libcore/xml/src/main/java/org/apache/harmony/xml/dom/ProcessingInstructionImpl.java
@@ -30,7 +30,7 @@ import org.w3c.dom.ProcessingInstruction;
   * the DOM implementation can easily access them while maintaining the DOM tree
   * structure.
   */
-public class ProcessingInstructionImpl extends LeafNodeImpl implements
+public final class ProcessingInstructionImpl extends LeafNodeImpl implements
          ProcessingInstruction {
  
      private String target;
@@ -39,7 +39,7 @@ public class ProcessingInstructionImpl extends LeafNodeImpl implements
  
      ProcessingInstructionImpl(DocumentImpl document, String target, String data) {
          super(document);
-        this.target = target;
+        this.target = target; // TODO: validate that target is well-formed
          this.data = data;
      }
  
diff --git a/libcore/xml/src/main/java/org/apache/harmony/xml/dom/TextImpl.java b/libcore/xml/src/main/java/org/apache/harmony/xml/dom/TextImpl.java

index 3840ef4..d39dff2 100644 (file)
--- a/libcore/xml/src/main/java/org/apache/harmony/xml/dom/TextImpl.java
+++ b/libcore/xml/src/main/java/org/apache/harmony/xml/dom/TextImpl.java
@@ -140,4 +140,33 @@ public class TextImpl extends CharacterDataImpl implements Text {
                  ? (TextImpl) nextSibling
                  : null;
      }
+
+    /**
+     * Tries to remove this node using itself and the previous node as context.
+     * If this node's text is empty, this node is removed and null is returned.
+     * If the previous node exists and is a text node, this node's text will be
+     * appended to that node's text and this node will be removed.
+     *
+     * <p>Although this method alters the structure of the DOM tree, it does
+     * not alter the document's semantics.
+     *
+     * @return the node holding this node's text and the end of the operation.
+     *     Can be null if this node contained the empty string.
+     */
+    public final TextImpl minimize() {
+        if (getLength() == 0) {
+            parent.removeChild(this);
+            return null;
+        }
+
+        Node previous = getPreviousSibling();
+        if (previous == null || previous.getNodeType() != Node.TEXT_NODE) {
+            return this;
+        }
+
+        TextImpl previousText = (TextImpl) previous;
+        previousText.buffer.append(buffer);
+        parent.removeChild(this);
+        return previousText;
+    }
  }
diff --git a/libcore/xml/src/main/java/org/apache/xml/serializer/dom3/DOMErrorImpl.java b/libcore/xml/src/main/java/org/apache/xml/serializer/dom3/DOMErrorImpl.java

index 81eda73..3895a53 100644 (file)
--- a/libcore/xml/src/main/java/org/apache/xml/serializer/dom3/DOMErrorImpl.java
+++ b/libcore/xml/src/main/java/org/apache/xml/serializer/dom3/DOMErrorImpl.java
@@ -32,7 +32,7 @@ import org.w3c.dom.DOMLocator;
   * @xsl.usage internal \r
   */\r
  \r
-final class DOMErrorImpl implements DOMError {\r
+public final class DOMErrorImpl implements DOMError {\r
      \r
      /** private data members */\r
      \r
@@ -70,7 +70,7 @@ final class DOMErrorImpl implements DOMError {
       * @param message\r
       * @param type\r
       */\r
-    DOMErrorImpl(short severity, String message, String type) {\r
+    public DOMErrorImpl(short severity, String message, String type) {\r
          fSeverity = severity;\r
          fMessage = message;\r
          fType = type;\r
@@ -82,7 +82,7 @@ final class DOMErrorImpl implements DOMError {
       * @param type\r
       * @param exception\r
       */\r
-    DOMErrorImpl(short severity, String message, String type,\r
+    public DOMErrorImpl(short severity, String message, String type,\r
              Exception exception) {\r
          fSeverity = severity;\r
          fMessage = message;\r
@@ -98,7 +98,7 @@ final class DOMErrorImpl implements DOMError {
       * @param relatedData\r
       * @param location\r
       */\r
-    DOMErrorImpl(short severity, String message, String type,\r
+    public DOMErrorImpl(short severity, String message, String type,\r
              Exception exception, Object relatedData, DOMLocatorImpl location) {\r
          fSeverity = severity;\r
          fMessage = message;\r
diff --git a/libcore/xml/src/test/java/tests/xml/NormalizeTest.java b/libcore/xml/src/test/java/tests/xml/NormalizeTest.java

index b10ea9c..6fa6c97 100644 (file)
--- a/libcore/xml/src/test/java/tests/xml/NormalizeTest.java
+++ b/libcore/xml/src/test/java/tests/xml/NormalizeTest.java
@@ -17,6 +17,8 @@
  package tests.xml;
  
  import junit.framework.TestCase;
+import org.w3c.dom.CDATASection;
+import org.w3c.dom.Comment;
  import org.w3c.dom.DOMConfiguration;
  import org.w3c.dom.DOMError;
  import org.w3c.dom.DOMErrorHandler;
@@ -25,6 +27,7 @@ import org.w3c.dom.Document;
  import org.w3c.dom.Element;
  import org.w3c.dom.Node;
  import org.w3c.dom.NodeList;
+import org.w3c.dom.ProcessingInstruction;
  import org.w3c.dom.Text;
  import org.xml.sax.InputSource;
  
@@ -37,6 +40,7 @@ import java.io.StringReader;
  import java.io.StringWriter;
  import java.util.ArrayList;
  import java.util.Arrays;
+import java.util.Collections;
  import java.util.List;
  
  /**
@@ -61,26 +65,31 @@ public class NormalizeTest extends TestCase {
      }
  
      public void testCanonicalForm() {
+        assertEquals(false, domConfiguration.getParameter("canonical-form"));
          assertSupported("canonical-form", false);
          assertUnsupported("canonical-form", true);
      }
  
      public void testCdataSections() {
+        assertEquals(true, domConfiguration.getParameter("cdata-sections"));
          assertSupported("cdata-sections", false);
          assertSupported("cdata-sections", true);
      }
  
      public void testCheckCharacterNormalization() {
+        assertEquals(false, domConfiguration.getParameter("check-character-normalization"));
          assertSupported("check-character-normalization", false);
          assertUnsupported("check-character-normalization", true);
      }
  
      public void testComments() {
+        assertEquals(true, domConfiguration.getParameter("comments"));
          assertSupported("comments", false);
          assertSupported("comments", true);
      }
  
      public void testDatatypeNormalization() {
+        assertEquals(false, domConfiguration.getParameter("datatype-normalization"));
          assertSupported("datatype-normalization", false);
          assertSupported("datatype-normalization", true);
  
@@ -95,16 +104,19 @@ public class NormalizeTest extends TestCase {
      }
  
      public void testElementContentWhitespace() {
+        assertEquals(true, domConfiguration.getParameter("element-content-whitespace"));
          assertUnsupported("element-content-whitespace", false);
          assertSupported("element-content-whitespace", true);
      }
  
      public void testEntities() {
+        assertEquals(true, domConfiguration.getParameter("entities"));
          assertSupported("entities", false);
          assertSupported("entities", true);
      }
  
      public void testErrorHandler() {
+        assertEquals(null, domConfiguration.getParameter("error-handler"));
          assertSupported("error-handler", null);
          assertSupported("error-handler", new DOMErrorHandler() {
              public boolean handleError(DOMError error) {
@@ -114,6 +126,7 @@ public class NormalizeTest extends TestCase {
      }
  
      public void testInfoset() {
+        assertEquals(false, domConfiguration.getParameter("infoset"));
          assertSupported("infoset", false);
          assertSupported("infoset", true);
      }
@@ -162,21 +175,25 @@ public class NormalizeTest extends TestCase {
      }
  
      public void testNamespaces() {
+        assertEquals(true, domConfiguration.getParameter("namespaces"));
          assertSupported("namespaces", false);
          assertSupported("namespaces", true);
      }
  
      public void testNamespaceDeclarations() {
+        assertEquals(true, domConfiguration.getParameter("namespace-declarations"));
          assertUnsupported("namespace-declarations", false); // supported in RI 6
          assertSupported("namespace-declarations", true);
      }
  
      public void testNormalizeCharacters() {
+        assertEquals(false, domConfiguration.getParameter("normalize-characters"));
          assertSupported("normalize-characters", false);
          assertUnsupported("normalize-characters", true);
      }
  
      public void testSchemaLocation() {
+        assertEquals(null, domConfiguration.getParameter("schema-location"));
          assertSupported("schema-location", "http://foo");
          assertSupported("schema-location", null);
      }
@@ -190,26 +207,31 @@ public class NormalizeTest extends TestCase {
      }
  
      public void testSchemaTypeXmlSchema() {
+        assertEquals(null, domConfiguration.getParameter("schema-type"));
          assertSupported("schema-type", null);
          assertSupported("schema-type", "http://www.w3.org/2001/XMLSchema");
      }
  
      public void testSplitCdataSections() {
+        assertEquals(true, domConfiguration.getParameter("split-cdata-sections"));
          assertSupported("split-cdata-sections", false);
          assertSupported("split-cdata-sections", true);
      }
  
      public void testValidate() {
+        assertEquals(false, domConfiguration.getParameter("validate"));
          assertSupported("validate", false);
          assertSupported("validate", true);
      }
  
      public void testValidateIfSchema() {
+        assertEquals(false, domConfiguration.getParameter("validate-if-schema"));
          assertSupported("validate-if-schema", false);
          assertUnsupported("validate-if-schema", true);
      }
  
      public void testWellFormed() {
+        assertEquals(true, domConfiguration.getParameter("well-formed"));
          assertSupported("well-formed", false);
          assertSupported("well-formed", true);
      }
@@ -314,30 +336,26 @@ public class NormalizeTest extends TestCase {
  
      public void testCdataSectionsNotHonoredByNodeNormalize() throws Exception {
          String xml = "<foo>ABC<![CDATA[DEF]]>GHI</foo>";
-        document = DocumentBuilderFactory.newInstance().newDocumentBuilder()
-                .parse(new InputSource(new StringReader(xml)));
-        document.getDomConfig().setParameter("cdata-sections", true);
+        parse(xml);
+        domConfiguration.setParameter("cdata-sections", true);
          document.getDocumentElement().normalize();
          assertEquals(xml, domToString(document));
  
-        document = DocumentBuilderFactory.newInstance().newDocumentBuilder()
-                .parse(new InputSource(new StringReader(xml)));
-        document.getDomConfig().setParameter("cdata-sections", false);
+        parse(xml);
+        domConfiguration.setParameter("cdata-sections", false);
          document.getDocumentElement().normalize();
          assertEquals(xml, domToString(document));
      }
  
      public void testCdataSectionsHonoredByDocumentNormalize() throws Exception {
          String xml = "<foo>ABC<![CDATA[DEF]]>GHI</foo>";
-        document = DocumentBuilderFactory.newInstance().newDocumentBuilder()
-                .parse(new InputSource(new StringReader(xml)));
-        document.getDomConfig().setParameter("cdata-sections", true);
+        parse(xml);
+        domConfiguration.setParameter("cdata-sections", true);
          document.normalizeDocument();
          assertEquals(xml, domToString(document));
  
-        document = DocumentBuilderFactory.newInstance().newDocumentBuilder()
-                .parse(new InputSource(new StringReader(xml)));
-        document.getDomConfig().setParameter("cdata-sections", false);
+        parse(xml);
+        domConfiguration.setParameter("cdata-sections", false);
          document.normalizeDocument();
          String expected = xml.replace("<![CDATA[DEF]]>", "DEF");
          assertEquals(expected, domToString(document));
@@ -367,6 +385,170 @@ public class NormalizeTest extends TestCase {
          assertChildren(document.getDocumentElement(), "<br>", "<br>", "<br>");
      }
  
+    public void testRetainingComments() throws Exception {
+        String xml = "<foo>ABC<!-- bar -->DEF<!-- baz -->GHI</foo>";
+        parse(xml);
+        domConfiguration.setParameter("comments", true);
+        document.normalizeDocument();
+        assertEquals(xml, domToString(document));
+    }
+
+    public void testCommentContainingDoubleDash() throws Exception {
+        ErrorRecorder errorRecorder = new ErrorRecorder();
+        domConfiguration.setParameter("error-handler", errorRecorder);
+        domConfiguration.setParameter("namespaces", false);
+        Element root = document.createElement("foo");
+        document.appendChild(root);
+        root.appendChild(document.createComment("ABC -- DEF"));
+        document.normalizeDocument();
+        errorRecorder.assertAllErrors(DOMError.SEVERITY_ERROR, "wf-invalid-character");
+    }
+
+    public void testStrippingComments() throws Exception {
+        String xml = "<foo>ABC<!-- bar -->DEF<!-- baz -->GHI</foo>";
+        parse(xml);
+        domConfiguration.setParameter("comments", false);
+        document.normalizeDocument();
+        assertChildren(document.getDocumentElement(), "ABCDEFGHI");
+    }
+
+    public void testSplittingCdataSectionsSplit() throws Exception {
+        ErrorRecorder errorRecorder = new ErrorRecorder();
+        domConfiguration.setParameter("split-cdata-sections", true);
+        domConfiguration.setParameter("error-handler", errorRecorder);
+        domConfiguration.setParameter("namespaces", false);
+        Element root = document.createElement("foo");
+        document.appendChild(root);
+        root.appendChild(document.createCDATASection("ABC]]>DEF]]>GHI"));
+        document.normalizeDocument();
+        errorRecorder.assertAllErrors(DOMError.SEVERITY_WARNING, "cdata-sections-splitted");
+        assertChildren(root, "<![CDATA[ABC]]]]>", "<![CDATA[>DEF]]]]>", "<![CDATA[>GHI]]>");
+    }
+
+    public void testSplittingCdataSectionsReportError() throws Exception {
+        ErrorRecorder errorRecorder = new ErrorRecorder();
+        domConfiguration.setParameter("split-cdata-sections", false);
+        domConfiguration.setParameter("error-handler", errorRecorder);
+        domConfiguration.setParameter("namespaces", false);
+        Element root = document.createElement("foo");
+        document.appendChild(root);
+        root.appendChild(document.createCDATASection("ABC]]>DEF"));
+        document.normalizeDocument();
+        errorRecorder.assertAllErrors(DOMError.SEVERITY_ERROR, "wf-invalid-character");
+    }
+
+    public void testInvalidCharactersCdata() throws Exception {
+        ErrorRecorder errorRecorder = new ErrorRecorder();
+        domConfiguration.setParameter("cdata-sections", true);
+        domConfiguration.setParameter("error-handler", errorRecorder);
+        domConfiguration.setParameter("namespaces", false);
+        Element root = document.createElement("foo");
+        document.appendChild(root);
+        CDATASection cdata = document.createCDATASection("");
+        root.appendChild(cdata);
+
+        for (int c = 0; c <= Character.MAX_VALUE; c++) {
+            cdata.setData(new String(new char[]{ 'A', 'B', (char) c }));
+            document.normalizeDocument();
+            if (isValid((char) c)) {
+                assertEquals(Collections.<DOMError>emptyList(), errorRecorder.errors);
+            } else {
+                errorRecorder.assertAllErrors("For character " + c,
+                        DOMError.SEVERITY_ERROR, "wf-invalid-character");
+            }
+        }
+    }
+
+    public void testInvalidCharactersText() throws Exception {
+        ErrorRecorder errorRecorder = new ErrorRecorder();
+        domConfiguration.setParameter("error-handler", errorRecorder);
+        domConfiguration.setParameter("namespaces", false);
+        Element root = document.createElement("foo");
+        document.appendChild(root);
+        Text text = document.createTextNode("");
+        root.appendChild(text);
+
+        for (int c = 0; c <= Character.MAX_VALUE; c++) {
+            text.setData(new String(new char[]{ 'A', 'B', (char) c }));
+            document.normalizeDocument();
+            if (isValid((char) c)) {
+                assertEquals(Collections.<DOMError>emptyList(), errorRecorder.errors);
+            } else {
+                errorRecorder.assertAllErrors("For character " + c,
+                        DOMError.SEVERITY_ERROR, "wf-invalid-character");
+            }
+        }
+    }
+
+    public void testInvalidCharactersAttribute() throws Exception {
+        ErrorRecorder errorRecorder = new ErrorRecorder();
+        domConfiguration.setParameter("error-handler", errorRecorder);
+        domConfiguration.setParameter("namespaces", false);
+        Element root = document.createElement("foo");
+        document.appendChild(root);
+
+        for (int c = 0; c <= Character.MAX_VALUE; c++) {
+            root.setAttribute("bar", new String(new char[] { 'A', 'B', (char) c}));
+            document.normalizeDocument();
+            if (isValid((char) c)) {
+                assertEquals(Collections.<DOMError>emptyList(), errorRecorder.errors);
+            } else {
+                errorRecorder.assertAllErrors("For character " + c,
+                        DOMError.SEVERITY_ERROR, "wf-invalid-character");
+            }
+        }
+    }
+
+    public void testInvalidCharactersComment() throws Exception {
+        ErrorRecorder errorRecorder = new ErrorRecorder();
+        domConfiguration.setParameter("error-handler", errorRecorder);
+        domConfiguration.setParameter("namespaces", false);
+        Element root = document.createElement("foo");
+        document.appendChild(root);
+        Comment comment = document.createComment("");
+        root.appendChild(comment);
+
+        for (int c = 0; c <= Character.MAX_VALUE; c++) {
+            comment.setData(new String(new char[] { 'A', 'B', (char) c}));
+            document.normalizeDocument();
+            if (isValid((char) c)) {
+                assertEquals(Collections.<DOMError>emptyList(), errorRecorder.errors);
+            } else {
+                errorRecorder.assertAllErrors("For character " + c,
+                        DOMError.SEVERITY_ERROR, "wf-invalid-character");
+            }
+        }
+    }
+
+    public void testInvalidCharactersProcessingInstructionData() throws Exception {
+        ErrorRecorder errorRecorder = new ErrorRecorder();
+        domConfiguration.setParameter("error-handler", errorRecorder);
+        domConfiguration.setParameter("namespaces", false);
+        Element root = document.createElement("foo");
+        document.appendChild(root);
+        ProcessingInstruction pi = document.createProcessingInstruction("foo", "");
+        root.appendChild(pi);
+
+        for (int c = 0; c <= Character.MAX_VALUE; c++) {
+            pi.setData(new String(new char[] { 'A', 'B', (char) c}));
+            document.normalizeDocument();
+            if (isValid((char) c)) {
+                assertEquals(Collections.<DOMError>emptyList(), errorRecorder.errors);
+            } else {
+                errorRecorder.assertAllErrors("For character " + c,
+                        DOMError.SEVERITY_ERROR, "wf-invalid-character");
+            }
+        }
+    }
+
+    // TODO: test for surrogates
+
+    private boolean isValid(char c) {
+        // as defined by http://www.w3.org/TR/REC-xml/#charsets.
+        return c == 0x9 || c == 0xA || c == 0xD || (c >= 0x20 && c <= 0xd7ff)
+                || (c >= 0xe000 && c <= 0xfffd);
+    }
+
      private Document createDocumentWithAdjacentTexts(String... texts) throws Exception {
          Document result = DocumentBuilderFactory.newInstance()
                  .newDocumentBuilder().newDocument();
@@ -387,13 +569,23 @@ public class NormalizeTest extends TestCase {
          NodeList nodes = element.getChildNodes();
          for (int i = 0; i < nodes.getLength(); i++) {
              Node node = nodes.item(i);
-            actual.add(node.getNodeType() == Node.TEXT_NODE
-                    ? ((Text) node).getData()
-                    : "<" + node.getNodeName() + ">");
+            if (node.getNodeType() == Node.TEXT_NODE) {
+                actual.add(((Text) node).getData());
+            } else if (node.getNodeType() == Node.CDATA_SECTION_NODE) {
+                actual.add("<![CDATA[" + ((CDATASection) node).getData() + "]]>");
+            } else {
+                actual.add("<" + node.getNodeName() + ">");
+            }
          }
          assertEquals(Arrays.asList(texts), actual);
      }
  
+    private void parse(String xml) throws Exception {
+        document = DocumentBuilderFactory.newInstance().newDocumentBuilder()
+                .parse(new InputSource(new StringReader(xml)));
+        domConfiguration = document.getDomConfig();
+    }
+
      private String domToString(Document document) throws TransformerException {
          StringWriter writer = new StringWriter();
          TransformerFactory.newInstance().newTransformer()
@@ -401,4 +593,26 @@ public class NormalizeTest extends TestCase {
          String xml = writer.toString();
          return xml.replaceFirst("<\\?xml[^?]*\\?>", "");
      }
+
+    private class ErrorRecorder implements DOMErrorHandler {
+        private final List<DOMError> errors = new ArrayList<DOMError>();
+
+        public boolean handleError(DOMError error) {
+            errors.add(error);
+            return true;
+        }
+
+        public void assertAllErrors(int severity, String type) {
+            assertAllErrors("Expected one or more " + type + " errors", severity, type);
+        }
+
+        public void assertAllErrors(String message, int severity, String type) {
+            assertFalse(message, errors.isEmpty());
+            for (DOMError error : errors) {
+                assertEquals(message, severity, error.getSeverity());
+                assertEquals(message, type, error.getType());
+            }
+            errors.clear();
+        }
+    }
  }
diff --git a/vm/Init.c b/vm/Init.c

index 61afa7c..744d119 100644 (file)
--- a/vm/Init.c
+++ b/vm/Init.c
@@ -1246,6 +1246,16 @@ int dvmStartup(int argc, const char* const argv[], bool ignoreUnrecognized,
      if (!dvmPrepMainThread())
          goto fail;
  
+    /*
+     * Make sure we haven't accumulated any tracked references.  The main
+     * thread should be starting with a clean slate.
+     */
+    if (dvmReferenceTableEntries(&dvmThreadSelf()->internalLocalRefTable) != 0)
+    {
+        LOGW("Warning: tracked references remain post-initialization\n");
+        dvmDumpReferenceTable(&dvmThreadSelf()->internalLocalRefTable, "MAIN");
+    }
+
      /* general debugging setup */
      if (!dvmDebuggerStartup())
          goto fail;
diff --git a/vm/ReferenceTable.c b/vm/ReferenceTable.c

index b47d775..310669d 100644 (file)
--- a/vm/ReferenceTable.c
+++ b/vm/ReferenceTable.c
@@ -188,12 +188,15 @@ static void logObject(Object* obj, int size, int identical, int equiv)
          return;
      }
  
+    /* handle "raw" dvmMalloc case */
+    const char* descriptor =
+        (obj->clazz != NULL) ? obj->clazz->descriptor : "(raw)";
+
      if (identical + equiv != 0) {
          LOGW("%5d of %s %dB (%d unique)\n", identical + equiv +1,
-            obj->clazz->descriptor, size, equiv +1);
+            descriptor, size, equiv +1);
      } else {
-        LOGW("%5d of %s %dB\n", identical + equiv +1,
-            obj->clazz->descriptor, size);
+        LOGW("%5d of %s %dB\n", identical + equiv +1, descriptor, size);
      }
  }
  
@@ -236,6 +239,9 @@ void dvmDumpReferenceTable(const ReferenceTable* pRef, const char* descr)
              LOGW("%5d: %p cls=%s '%s' (%d bytes)\n", i, ref,
                  (refs[i] == NULL) ? "-" : ref->clazz->descriptor,
                  clazz->descriptor, size);
+        } else if (ref->clazz == NULL) {
+            /* should only be possible right after a plain dvmMalloc() */
+            LOGW("%5d: %p cls=(raw) (%d bytes)\n", i, ref, size);
          } else {
              LOGW("%5d: %p cls=%s (%d bytes)\n", i, ref,
                  (refs[i] == NULL) ? "-" : ref->clazz->descriptor, size);
@@ -282,7 +288,7 @@ void dvmDumpReferenceTable(const ReferenceTable* pRef, const char* descr)
      total += size;
      logObject(refs[count-1], size, identical, equiv);
  
-    LOGW("Memory held directly by native code is %d bytes\n", total);
+    LOGW("Memory held directly by tracked refs is %d bytes\n", total);
      free(tableCopy);
  }
  
diff --git a/vm/native/dalvik_system_Zygote.c b/vm/native/dalvik_system_Zygote.c

index c380e4b..f8e8250 100644 (file)
--- a/vm/native/dalvik_system_Zygote.c
+++ b/vm/native/dalvik_system_Zygote.c
@@ -299,6 +299,29 @@ static void enableDebugFeatures(u4 debugFlags)
              gDvm.executionMode = kExecutionModeInterpFast;
  #endif
      }
+
+#if HAVE_ANDROID_OS
+    if ((debugFlags & DEBUG_ENABLE_DEBUGGER) != 0) {
+        /* To let a non-privileged gdbserver attach to this
+         * process, we must set its dumpable bit flag. However
+         * we are not interested in generating a coredump in
+         * case of a crash, so also set the coredump size to 0
+         * to disable that
+         */
+        if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) < 0) {
+            LOGE("could not set dumpable bit flag for pid %d, errno=%d",
+                 getpid(), errno);
+        } else {
+            struct rlimit rl;
+            rl.rlim_cur = 0;
+            rl.rlim_max = RLIM_INFINITY;
+            if (setrlimit(RLIMIT_CORE, &rl) < 0) {
+                LOGE("could not disable core file generation "
+                     "for pid %d, errno=%d", getpid(), errno);
+            }
+        }
+    }
+#endif
  }
  
  /*
author	Elliott Hughes <enh@google.com>
	Tue, 2 Mar 2010 04:07:07 +0000 (20:07 -0800)
committer	Android (Google) Code Review <android-gerrit@google.com>
	Tue, 2 Mar 2010 04:07:07 +0000 (20:07 -0800)
libcore/xml/src/main/java/org/apache/harmony/xml/dom/CDATASectionImpl.java		patch \| blob \| history
libcore/xml/src/main/java/org/apache/harmony/xml/dom/CharacterDataImpl.java		patch \| blob \| history
libcore/xml/src/main/java/org/apache/harmony/xml/dom/CommentImpl.java		patch \| blob \| history
libcore/xml/src/main/java/org/apache/harmony/xml/dom/DOMConfigurationImpl.java		patch \| blob \| history
libcore/xml/src/main/java/org/apache/harmony/xml/dom/DocumentImpl.java		patch \| blob \| history
libcore/xml/src/main/java/org/apache/harmony/xml/dom/InnerNodeImpl.java		patch \| blob \| history
libcore/xml/src/main/java/org/apache/harmony/xml/dom/ProcessingInstructionImpl.java		patch \| blob \| history
libcore/xml/src/main/java/org/apache/harmony/xml/dom/TextImpl.java		patch \| blob \| history
libcore/xml/src/main/java/org/apache/xml/serializer/dom3/DOMErrorImpl.java		patch \| blob \| history
libcore/xml/src/test/java/tests/xml/NormalizeTest.java		patch \| blob \| history
vm/Init.c		patch \| blob \| history
vm/ReferenceTable.c		patch \| blob \| history
vm/native/dalvik_system_Zygote.c		patch \| blob \| history