ACIP->Tibetan converters now warn every time {%} is encountered that U+0F14 might've been intended.

The Unicode for ACIP {o} is U+0F37.
2003-11-09 23:15:58 +00:00 · 2003-11-09 23:15:58 +00:00 · 2cb90bd231
commit 2cb90bd231
parent 084e12a02c
2 changed files with 10 additions and 3 deletions
--- a/source/org/thdl/tib/text/ttt/ACIPRules.java
+++ b/source/org/thdl/tib/text/ttt/ACIPRules.java
@ -501,7 +501,8 @@ public class ACIPRules {
            superACIP2unicode.put("`", "\u0F08");
            superACIP2unicode.put("*", "\u0F04\u0F05");
            superACIP2unicode.put("#", "\u0F04\u0F05\u0F05");
-            superACIP2unicode.put("%", "\u0F35"); // FIXME: could be U+0F37 or U+0F35 according to RC if I understand correctly.
+            superACIP2unicode.put("%", "\u0F35"); // but might be U+0F14, so we warn.
+            superACIP2unicode.put("o", "\u0F37");
            superACIP2unicode.put(";", "\u0F11");
            superACIP2unicode.put("\r", "\r");
            superACIP2unicode.put("\t", "\t");
@ -511,7 +512,7 @@ public class ACIPRules {
            superACIP2unicode.put("^", "\u0F38");

            // DLC FIXME: "^ GONG" is "^GONG", right?
-            // DLC FIXME: what's the Unicode for x? for o? RC said there is none in plain-text Unicode for x.  But what about in RTF Unicode?
+            // DLC FIXME: what's the Unicode for x? RC said there is none in plain-text Unicode for x.  But what about in RTF Unicode?
        }
        if (subscribed) {
            String u = (String)subACIP2unicode.get(acip);
--- a/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java
+++ b/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java
@ -160,7 +160,7 @@ public class ACIPTshegBarScanner {
                continue;
            }
            switch (ch) {
-            case '}':
+            case '}': // fall through...
            case ']':
                if (bracketTypeStack.empty()) {
                    // Error.
@ -218,6 +218,8 @@ public class ACIPTshegBarScanner {
            case '{': // NOTE WELL: KX0016I.ACT, KD0095M.ACT, and a
                      // host of other ACIP files use {} brackets like
                      // [] brackets.  I treat both the same.
+                
+                // fall through...
            case '[':
                // This definitely indicates a new token.
                if (startOfString < i) {
@ -824,6 +826,10 @@ public class ACIPTshegBarScanner {
                        }
                    }
                }
+                if ('%' == ch) {
+                    al.add(new TString("The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice",
+                                       TString.WARNING));
+                }
                startOfString = i+1;
                currentType = TString.ERROR;
                break; // end TIBETAN_PUNCTUATION case