The *->Unicode conversions were outputting Unicode that was not

well-formed. They still do, but they do it less often. Chris Fynn wrote this a while back: By normal Tibetan & Dzongkha spelling, writing, and input rules Tibetan script stacks should be entered and written: 1 headline consonant (0F40->0F6A), any subjoined consonant(s) (0F90-> 0F9C), achung (0F71), shabkyu (0F74), any above headline vowel(s) (0F72 0F7A 0F7B 0F7C 0F7D and 0F80); any ngaro (0F7E, 0F82 and 0F83). Now efforts are made to ensure that the converters conform to the above rules.
2004-12-13 02:32:46 +00:00 · 2004-12-13 02:32:46 +00:00 · aa5d86a6e3
commit aa5d86a6e3
parent 3115f22484
3 changed files with 138 additions and 10 deletions
--- a/source/org/thdl/tib/text/tshegbar/UnicodeUtilsTest.java
+++ b/source/org/thdl/tib/text/tshegbar/UnicodeUtilsTest.java
@ -379,4 +379,32 @@ public class UnicodeUtilsTest extends TestCase implements UnicodeConstants {
        assertTrue(UnicodeUtils.isInTibetanRange('\u0FF0'));
        assertTrue(UnicodeUtils.isInTibetanRange('\u0FFF'));
    }
+
+    /**
+     * Tests the {@link UnicodeUtils#fixSomeOrderingErrorsInTibetanUnicode(StringBuffer)}
+     * method. */
+    public void testFixSomeOrderingErrorsInTibetanUnicode() {
+        // Test that "\u0f67\u0f72\u0f71" becomes "\u0f67\u0f71\u0f72", e.g:
+        String tt[][] = {
+            { "\u0f67\u0f72\u0f71", "\u0f67\u0f71\u0f72" },
+            { "\u0f7a\u0f72\u0f71", "\u0f71\u0f7a\u0f72" },
+            { "\u0f67\u0f7e\u0f71", "\u0f67\u0f71\u0f7e" },
+            { "\u0f67\u0f74\u0f71", "\u0f67\u0f71\u0f74" },
+            { "\u0f67\u0f7e\u0f72", "\u0f67\u0f72\u0f7e" },
+            { "\u0f67\u0f7e\u0f74", "\u0f67\u0f74\u0f7e" },
+        };
+        for (int i = 0; i < tt.length; i++) {
+            StringBuffer sb = new StringBuffer(tt[i][0]);
+            assertTrue(true == UnicodeUtils.fixSomeOrderingErrorsInTibetanUnicode(sb));
+            assertTrue(sb.toString().equals(tt[i][1]));
+        }
+
+        // Test that "\u0f67\u0f71\u0f72" stays the same, e.g.:
+        String uu[] = { "\u0f67\u0f71\u0f72" };
+        for (int i = 0; i < uu.length; i++) {
+            StringBuffer sb = new StringBuffer(uu[i]);
+            assertTrue(false == UnicodeUtils.fixSomeOrderingErrorsInTibetanUnicode(sb));
+            assertTrue(sb.toString().equals(uu[i]));
+        }
+    }
 }