OSDN Git Service

Factor bidi algorithm into separate method.
authorDoug Felt <dougfelt@google.com>
Mon, 22 Feb 2010 21:39:01 +0000 (13:39 -0800)
committerDoug Felt <dougfelt@google.com>
Wed, 24 Feb 2010 19:21:46 +0000 (11:21 -0800)
Add rudimentary support for overriding default base line direction heuristic.
Add some simple tests of the bidi implementation.

core/java/android/text/Layout.java
core/java/android/text/StaticLayout.java
core/tests/coretests/src/android/text/StaticLayoutBidiTest.java [new file with mode: 0644]

index 1023036..38ac9b7 100644 (file)
@@ -1936,6 +1936,11 @@ public abstract class Layout {
 
     public static final int DIR_LEFT_TO_RIGHT = 1;
     public static final int DIR_RIGHT_TO_LEFT = -1;
+    
+    /* package */ static final int DIR_REQUEST_LTR = 1;
+    /* package */ static final int DIR_REQUEST_RTL = -1;
+    /* package */ static final int DIR_REQUEST_DEFAULT_LTR = 2;
+    /* package */ static final int DIR_REQUEST_DEFAULT_RTL = -2;
 
     public enum Alignment {
         ALIGN_NORMAL,
index 6de9c65..600ec7e 100644 (file)
@@ -234,215 +234,9 @@ extends Layout
             }
 
             if (!easy) {
-                AndroidCharacter.getDirectionalities(chs, chdirs, end - start);
-
-                /*
-                 * Determine primary paragraph direction
-                 */
-
-                for (int j = start; j < end; j++) {
-                    int d = chdirs[j - start];
-
-                    if (d == Character.DIRECTIONALITY_LEFT_TO_RIGHT) {
-                        dir = DIR_LEFT_TO_RIGHT;
-                        break;
-                    }
-                    if (d == Character.DIRECTIONALITY_RIGHT_TO_LEFT) {
-                        dir = DIR_RIGHT_TO_LEFT;
-                        break;
-                    }
-                }
-
-                /*
-                 * XXX Explicit overrides should go here
-                 */
-
-                /*
-                 * Weak type resolution
-                 */
-
-                final byte SOR = dir == DIR_LEFT_TO_RIGHT ?
-                                    Character.DIRECTIONALITY_LEFT_TO_RIGHT :
-                                    Character.DIRECTIONALITY_RIGHT_TO_LEFT;
-
-                // dump(chdirs, n, "initial");
-
-                // W1 non spacing marks
-                for (int j = 0; j < n; j++) {
-                    if (chdirs[j] == Character.NON_SPACING_MARK) {
-                        if (j == 0)
-                            chdirs[j] = SOR;
-                        else
-                            chdirs[j] = chdirs[j - 1];
-                    }
-                }
-
-                // dump(chdirs, n, "W1");
-
-                // W2 european numbers
-                byte cur = SOR;
-                for (int j = 0; j < n; j++) {
-                    byte d = chdirs[j];
-
-                    if (d == Character.DIRECTIONALITY_LEFT_TO_RIGHT ||
-                        d == Character.DIRECTIONALITY_RIGHT_TO_LEFT ||
-                        d == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
-                        cur = d;
-                    else if (d == Character.DIRECTIONALITY_EUROPEAN_NUMBER) {
-                         if (cur ==
-                            Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
-                            chdirs[j] = Character.DIRECTIONALITY_ARABIC_NUMBER;
-                    }
-                }
-
-                // dump(chdirs, n, "W2");
-
-                // W3 arabic letters
-                for (int j = 0; j < n; j++) {
-                    if (chdirs[j] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
-                        chdirs[j] = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
-                }
-
-                // dump(chdirs, n, "W3");
-
-                // W4 single separator between numbers
-                for (int j = 1; j < n - 1; j++) {
-                    byte d = chdirs[j];
-                    byte prev = chdirs[j - 1];
-                    byte next = chdirs[j + 1];
-
-                    if (d == Character.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR) {
-                        if (prev == Character.DIRECTIONALITY_EUROPEAN_NUMBER &&
-                            next == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
-                            chdirs[j] = Character.DIRECTIONALITY_EUROPEAN_NUMBER;
-                    } else if (d == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR) {
-                        if (prev == Character.DIRECTIONALITY_EUROPEAN_NUMBER &&
-                            next == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
-                            chdirs[j] = Character.DIRECTIONALITY_EUROPEAN_NUMBER;
-                        if (prev == Character.DIRECTIONALITY_ARABIC_NUMBER &&
-                            next == Character.DIRECTIONALITY_ARABIC_NUMBER)
-                            chdirs[j] = Character.DIRECTIONALITY_ARABIC_NUMBER;
-                    }
-                }
-
-                // dump(chdirs, n, "W4");
-
-                // W5 european number terminators
-                boolean adjacent = false;
-                for (int j = 0; j < n; j++) {
-                    byte d = chdirs[j];
-
-                    if (d == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
-                        adjacent = true;
-                    else if (d == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR && adjacent)
-                        chdirs[j] = Character.DIRECTIONALITY_EUROPEAN_NUMBER;
-                    else
-                        adjacent = false;
-                }
-
-                //dump(chdirs, n, "W5");
-
-                // W5 european number terminators part 2,
-                // W6 separators and terminators
-                adjacent = false;
-                for (int j = n - 1; j >= 0; j--) {
-                    byte d = chdirs[j];
-
-                    if (d == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
-                        adjacent = true;
-                    else if (d == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR) {
-                        if (adjacent)
-                            chdirs[j] = Character.DIRECTIONALITY_EUROPEAN_NUMBER;
-                        else
-                            chdirs[j] = Character.DIRECTIONALITY_OTHER_NEUTRALS;
-                    }
-                    else {
-                        adjacent = false;
-
-                        if (d == Character.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR ||
-                            d == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR ||
-                            d == Character.DIRECTIONALITY_PARAGRAPH_SEPARATOR ||
-                            d == Character.DIRECTIONALITY_SEGMENT_SEPARATOR)
-                            chdirs[j] = Character.DIRECTIONALITY_OTHER_NEUTRALS;
-                    }
-                }
-
-                // dump(chdirs, n, "W6");
-
-                // W7 strong direction of european numbers
-                cur = SOR;
-                for (int j = 0; j < n; j++) {
-                    byte d = chdirs[j];
-
-                    if (d == SOR ||
-                        d == Character.DIRECTIONALITY_LEFT_TO_RIGHT ||
-                        d == Character.DIRECTIONALITY_RIGHT_TO_LEFT)
-                        cur = d;
-
-                    if (d == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
-                        chdirs[j] = cur;
-                }
-
-                // dump(chdirs, n, "W7");
-
-                // N1, N2 neutrals
-                cur = SOR;
-                for (int j = 0; j < n; j++) {
-                    byte d = chdirs[j];
-
-                    if (d == Character.DIRECTIONALITY_LEFT_TO_RIGHT ||
-                        d == Character.DIRECTIONALITY_RIGHT_TO_LEFT) {
-                        cur = d;
-                    } else if (d == Character.DIRECTIONALITY_EUROPEAN_NUMBER ||
-                               d == Character.DIRECTIONALITY_ARABIC_NUMBER) {
-                        cur = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
-                    } else {
-                        byte dd = SOR;
-                        int k;
-
-                        for (k = j + 1; k < n; k++) {
-                            dd = chdirs[k];
-
-                            if (dd == Character.DIRECTIONALITY_LEFT_TO_RIGHT ||
-                                dd == Character.DIRECTIONALITY_RIGHT_TO_LEFT) {
-                                break;
-                            }
-                            if (dd == Character.DIRECTIONALITY_EUROPEAN_NUMBER ||
-                                dd == Character.DIRECTIONALITY_ARABIC_NUMBER) {
-                                dd = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
-                                break;
-                            }
-                        }
-
-                        for (int y = j; y < k; y++) {
-                            if (dd == cur)
-                                chdirs[y] = cur;
-                            else
-                                chdirs[y] = SOR;
-                        }
-
-                        j = k - 1;
-                    }
-                }
-
-                // dump(chdirs, n, "final");
-
-                // extra: enforce that all tabs and surrogate characters go the
-                // primary direction
-                // TODO: actually do directions right for surrogates
-
-                for (int j = 0; j < n; j++) {
-                    char c = chs[j];
-
-                    if (c == '\t' || (c >= 0xD800 && c <= 0xDFFF)) {
-                        chdirs[j] = SOR;
-                    }
-                }
-
-                // extra: enforce that object replacements go to the
-                // primary direction
-                // and that none of the underlying characters are treated
-                // as viable breakpoints
+                // Ensure that none of the underlying characters are treated
+                // as viable breakpoints, and that the entire run gets the
+                // same bidi direction.
 
                 if (source instanceof Spanned) {
                     Spanned sp = (Spanned) source;
@@ -453,12 +247,14 @@ extends Layout
                         int b = sp.getSpanEnd(spans[y]);
 
                         for (int x = a; x < b; x++) {
-                            chdirs[x - start] = SOR;
                             chs[x - start] = '\uFFFC';
                         }
                     }
                 }
 
+                // XXX put override flags, etc. into chdirs
+                dir = bidi(dir, chs, chdirs, n, false);
+
                 // Do mirroring for right-to-left segments
 
                 for (int i = 0; i < n; i++) {
@@ -810,6 +606,239 @@ extends Layout
         }
     }
 
+    /**
+     * Runs the unicode bidi algorithm on the first n chars in chs, returning
+     * the char dirs in chInfo and the base line direction of the first
+     * paragraph.
+     * 
+     * XXX change result from dirs to levels
+     *  
+     * @param dir the direction flag, either DIR_REQUEST_LTR,
+     * DIR_REQUEST_RTL, DIR_REQUEST_DEFAULT_LTR, or DIR_REQUEST_DEFAULT_RTL.
+     * @param chs the text to examine
+     * @param chInfo on input, if hasInfo is true, override and other flags 
+     * representing out-of-band embedding information. On output, the generated 
+     * dirs of the text.
+     * @param n the length of the text/information in chs and chInfo
+     * @param hasInfo true if chInfo has input information, otherwise the
+     * input data in chInfo is ignored.
+     * @return the resolved direction level of the first paragraph, either
+     * DIR_LEFT_TO_RIGHT or DIR_RIGHT_TO_LEFT.
+     */
+    /* package */ static int bidi(int dir, char[] chs, byte[] chInfo, int n, 
+            boolean hasInfo) {
+        
+        AndroidCharacter.getDirectionalities(chs, chInfo, n);
+
+        /*
+         * Determine primary paragraph direction if not specified
+         */
+        if (dir != DIR_REQUEST_LTR && dir != DIR_REQUEST_RTL) {
+            // set up default
+            dir = dir >= 0 ? DIR_LEFT_TO_RIGHT : DIR_RIGHT_TO_LEFT;
+            for (int j = 0; j < n; j++) {
+                int d = chInfo[j];
+
+                if (d == Character.DIRECTIONALITY_LEFT_TO_RIGHT) {
+                    dir = DIR_LEFT_TO_RIGHT;
+                    break;
+                }
+                if (d == Character.DIRECTIONALITY_RIGHT_TO_LEFT) {
+                    dir = DIR_RIGHT_TO_LEFT;
+                    break;
+                }
+            }
+        }
+
+        final byte SOR = dir == DIR_LEFT_TO_RIGHT ?
+                Character.DIRECTIONALITY_LEFT_TO_RIGHT :
+                Character.DIRECTIONALITY_RIGHT_TO_LEFT;
+
+        /*
+         * XXX Explicit overrides should go here
+         */
+
+        /*
+         * Weak type resolution
+         */
+
+        // dump(chdirs, n, "initial");
+
+        // W1 non spacing marks
+        for (int j = 0; j < n; j++) {
+            if (chInfo[j] == Character.NON_SPACING_MARK) {
+                if (j == 0)
+                    chInfo[j] = SOR;
+                else
+                    chInfo[j] = chInfo[j - 1];
+            }
+        }
+
+        // dump(chdirs, n, "W1");
+
+        // W2 european numbers
+        byte cur = SOR;
+        for (int j = 0; j < n; j++) {
+            byte d = chInfo[j];
+
+            if (d == Character.DIRECTIONALITY_LEFT_TO_RIGHT ||
+                d == Character.DIRECTIONALITY_RIGHT_TO_LEFT ||
+                d == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
+                cur = d;
+            else if (d == Character.DIRECTIONALITY_EUROPEAN_NUMBER) {
+                 if (cur ==
+                    Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
+                    chInfo[j] = Character.DIRECTIONALITY_ARABIC_NUMBER;
+            }
+        }
+
+        // dump(chdirs, n, "W2");
+
+        // W3 arabic letters
+        for (int j = 0; j < n; j++) {
+            if (chInfo[j] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
+                chInfo[j] = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
+        }
+
+        // dump(chdirs, n, "W3");
+
+        // W4 single separator between numbers
+        for (int j = 1; j < n - 1; j++) {
+            byte d = chInfo[j];
+            byte prev = chInfo[j - 1];
+            byte next = chInfo[j + 1];
+
+            if (d == Character.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR) {
+                if (prev == Character.DIRECTIONALITY_EUROPEAN_NUMBER &&
+                    next == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
+                    chInfo[j] = Character.DIRECTIONALITY_EUROPEAN_NUMBER;
+            } else if (d == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR) {
+                if (prev == Character.DIRECTIONALITY_EUROPEAN_NUMBER &&
+                    next == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
+                    chInfo[j] = Character.DIRECTIONALITY_EUROPEAN_NUMBER;
+                if (prev == Character.DIRECTIONALITY_ARABIC_NUMBER &&
+                    next == Character.DIRECTIONALITY_ARABIC_NUMBER)
+                    chInfo[j] = Character.DIRECTIONALITY_ARABIC_NUMBER;
+            }
+        }
+
+        // dump(chdirs, n, "W4");
+
+        // W5 european number terminators
+        boolean adjacent = false;
+        for (int j = 0; j < n; j++) {
+            byte d = chInfo[j];
+
+            if (d == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
+                adjacent = true;
+            else if (d == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR && adjacent)
+                chInfo[j] = Character.DIRECTIONALITY_EUROPEAN_NUMBER;
+            else
+                adjacent = false;
+        }
+
+        //dump(chdirs, n, "W5");
+
+        // W5 european number terminators part 2,
+        // W6 separators and terminators
+        adjacent = false;
+        for (int j = n - 1; j >= 0; j--) {
+            byte d = chInfo[j];
+
+            if (d == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
+                adjacent = true;
+            else if (d == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR) {
+                if (adjacent)
+                    chInfo[j] = Character.DIRECTIONALITY_EUROPEAN_NUMBER;
+                else
+                    chInfo[j] = Character.DIRECTIONALITY_OTHER_NEUTRALS;
+            }
+            else {
+                adjacent = false;
+
+                if (d == Character.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR ||
+                    d == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR ||
+                    d == Character.DIRECTIONALITY_PARAGRAPH_SEPARATOR ||
+                    d == Character.DIRECTIONALITY_SEGMENT_SEPARATOR)
+                    chInfo[j] = Character.DIRECTIONALITY_OTHER_NEUTRALS;
+            }
+        }
+
+        // dump(chdirs, n, "W6");
+
+        // W7 strong direction of european numbers
+        cur = SOR;
+        for (int j = 0; j < n; j++) {
+            byte d = chInfo[j];
+
+            if (d == SOR ||
+                d == Character.DIRECTIONALITY_LEFT_TO_RIGHT ||
+                d == Character.DIRECTIONALITY_RIGHT_TO_LEFT)
+                cur = d;
+
+            if (d == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
+                chInfo[j] = cur;
+        }
+
+        // dump(chdirs, n, "W7");
+
+        // N1, N2 neutrals
+        cur = SOR;
+        for (int j = 0; j < n; j++) {
+            byte d = chInfo[j];
+
+            if (d == Character.DIRECTIONALITY_LEFT_TO_RIGHT ||
+                d == Character.DIRECTIONALITY_RIGHT_TO_LEFT) {
+                cur = d;
+            } else if (d == Character.DIRECTIONALITY_EUROPEAN_NUMBER ||
+                       d == Character.DIRECTIONALITY_ARABIC_NUMBER) {
+                cur = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
+            } else {
+                byte dd = SOR;
+                int k;
+
+                for (k = j + 1; k < n; k++) {
+                    dd = chInfo[k];
+
+                    if (dd == Character.DIRECTIONALITY_LEFT_TO_RIGHT ||
+                        dd == Character.DIRECTIONALITY_RIGHT_TO_LEFT) {
+                        break;
+                    }
+                    if (dd == Character.DIRECTIONALITY_EUROPEAN_NUMBER ||
+                        dd == Character.DIRECTIONALITY_ARABIC_NUMBER) {
+                        dd = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
+                        break;
+                    }
+                }
+
+                for (int y = j; y < k; y++) {
+                    if (dd == cur)
+                        chInfo[y] = cur;
+                    else
+                        chInfo[y] = SOR;
+                }
+
+                j = k - 1;
+            }
+        }
+
+        // dump(chdirs, n, "final");
+
+        // extra: enforce that all tabs and surrogate characters go the
+        // primary direction
+        // TODO: actually do directions right for surrogates
+
+        for (int j = 0; j < n; j++) {
+            char c = chs[j];
+
+            if (c == '\t' || (c >= 0xD800 && c <= 0xDFFF)) {
+                chInfo[j] = SOR;
+            }
+        }
+        
+        return dir;
+    }
+
     private static final char FIRST_CJK = '\u2E80';
     /**
      * Returns true if the specified character is one of those specified
diff --git a/core/tests/coretests/src/android/text/StaticLayoutBidiTest.java b/core/tests/coretests/src/android/text/StaticLayoutBidiTest.java
new file mode 100644 (file)
index 0000000..ccd0dae
--- /dev/null
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package android.text;
+
+import android.test.suitebuilder.annotation.SmallTest;
+import android.util.Log;
+
+import junit.framework.TestCase;
+
+/**
+ * Tests StaticLayout bidi implementation.
+ */
+public class StaticLayoutBidiTest extends TestCase {
+    
+    public static final int REQ_DL = 2; // Layout.DIR_REQUEST_DEFAULT_LTR;
+    public static final int REQ_DR = -2; // Layout.DIR_REQUEST_DEFAULT_RTL;
+    public static final int REQ_L = 1; // Layout.DIR_REQUEST_LTR;
+    public static final int REQ_R = -1; // Layout.DIR_REQUEST_RTL;
+    public static final int L = Layout.DIR_LEFT_TO_RIGHT;
+    public static final int R = Layout.DIR_RIGHT_TO_LEFT;
+    
+    public static final String SP = " ";
+    public static final String ALEF = "\u05d0";
+    public static final String BET = "\u05d1";
+    public static final String GIMEL = "\u05d2";
+    public static final String DALET = "\u05d3";
+    
+    @SmallTest
+    public void testAllLtr() {
+        expectBidi(REQ_DL, "a test", "000000", L);
+    }
+    
+    @SmallTest
+    public void testLtrRtl() {
+        expectBidi(REQ_DL, "abc " + ALEF + BET + GIMEL, "0000111", L);
+    }
+    
+    @SmallTest
+    public void testAllRtl() {
+        expectBidi(REQ_DL, ALEF + SP + ALEF + BET + GIMEL + DALET, "111111", R);
+    }
+    
+    @SmallTest
+    public void testRtlLtr() {
+        expectBidi(REQ_DL,  ALEF + BET + GIMEL + " abc", "1111000", R);
+    }
+    
+    @SmallTest
+    public void testRAllLtr() {
+        expectBidi(REQ_R, "a test", "000000", R);
+    }
+    
+    @SmallTest
+    public void testRLtrRtl() {
+        expectBidi(REQ_R, "abc " + ALEF + BET + GIMEL, "0001111", R);
+    }
+    
+    @SmallTest
+    public void testLAllRtl() {
+        expectBidi(REQ_L, ALEF + SP + ALEF + BET + GIMEL + DALET, "111111", L);
+    }
+    
+    @SmallTest
+    public void testLRtlLtr() {
+        expectBidi(REQ_L,  ALEF + BET + GIMEL + " abc", "1110000", L);
+    }
+    
+    private void expectBidi(int dir, String text, 
+            String expectedLevels, int expectedDir) {
+        char[] chs = text.toCharArray();
+        int n = chs.length;
+        byte[] chInfo = new byte[n];
+        
+        int resultDir = StaticLayout.bidi(dir, chs, chInfo, n, false);
+        
+        {
+            StringBuilder sb = new StringBuilder("xdirs:");
+            for (int i = 0; i < n; ++i) {
+                sb.append(" ").append(String.valueOf(chInfo[i]));
+            }
+            Log.i("BIDI", sb.toString());
+        }
+        
+        char[] resultLevelChars = new char[n];
+        for (int i = 0; i < n; ++i) {
+            resultLevelChars[i] = (char)('0' + chInfo[i]);
+        }
+        String resultLevels = new String(resultLevelChars);
+        assertEquals("direction", expectedDir, resultDir);
+        assertEquals("levels", expectedLevels, resultLevels);
+    }
+}