Numerous EWTS->Unicode and especially EWTS->TMW improvements.

Fixed ordering of Unicode wowels. [ku+A] gives the correct Unicode now, e.g. EWTS->TMW looks better for some wacky wowels like, I'm guessing here, [ku+A]. EWTS->TMW should now give errors any time the full input isn't used. Previously, wacky wowels like [kai+-i] would lead to some droppage. EWTS->TMW->Unicode testing is now in effect. This found a ton of EWTS->TMW bugs, most or all of which are fixed now. TMW->Unicode is improved/fixed for { \u5350,\u534D,\u0F88+k,\u0F88+kh,U }. (Why U? "\u0f75" is discouraged in favor of "\u0f71\u0f74".) NOTE: TMW_RTF_TO_THDL_WYLIETest is still disabled for the nightly builds' sake, but I ran it in my sandbox and it passed.
2005-07-11 02:51:06 +00:00 · 2005-07-11 02:51:06 +00:00 · 6d419fe641
commit 6d419fe641
parent 36122778b4
19 changed files with 1014 additions and 547 deletions
--- a/source/org/thdl/tib/text/tshegbar/UnicodeUtils.java
+++ b/source/org/thdl/tib/text/tshegbar/UnicodeUtils.java
@ -298,7 +298,7 @@ public class UnicodeUtils implements UnicodeConstants {
        characters will appear as themselves. */
    public static String unicodeCodepointToString(char cp,
                                                  boolean shortenIfPossible) {
-        return unicodeCodepointToString(cp, shortenIfPossible, "\\u");
+        return unicodeCodepointToString(cp, shortenIfPossible, "\\u", false);
    }

    /** Like {@link #unicodeCodepointToString(char, boolean)} if you
@ -307,7 +307,8 @@ public class UnicodeUtils implements UnicodeConstants {
        <code>0F55</code>. */
    public static String unicodeCodepointToString(char cp,
                                                  boolean shortenIfPossible,
-                                                  String prefix) {
+                                                  String prefix,
+                                                  boolean upperCase) {
        if (shortenIfPossible) {
            if ((cp >= 'a' && cp <= 'z')
                || (cp >= 'A' && cp <= 'Z')
@ -348,14 +349,16 @@ public class UnicodeUtils implements UnicodeConstants {
                return "\\r";
        }

+        String suffix;
        if (cp < '\u0010')
-            return prefix + "000" + Integer.toHexString((int)cp);
+            suffix = "000" + Integer.toHexString((int)cp);
        else if (cp < '\u0100')
-            return prefix + "00" + Integer.toHexString((int)cp);
+            suffix = "00" + Integer.toHexString((int)cp);
        else if (cp < '\u1000')
-            return prefix + "0" + Integer.toHexString((int)cp);
+            suffix = "0" + Integer.toHexString((int)cp);
        else
-            return prefix + Integer.toHexString((int)cp);
+            suffix = Integer.toHexString((int)cp);
+        return prefix + (upperCase ? suffix.toUpperCase() : suffix);
    }

    /**