Changed converters from unicode non-breaking tsheg to unicode non-breaking wylie space.

2009-02-20 23:11:17 +00:00 · 2009-02-20 23:11:17 +00:00 · 835e74c0cd
commit 835e74c0cd
parent ffb32b3207
7 changed files with 56 additions and 23 deletions
--- a/source/org/thdl/tib/text/TibetanMachineWeb.java
+++ b/source/org/thdl/tib/text/TibetanMachineWeb.java
@ -40,6 +40,7 @@ import org.thdl.util.ThdlDebug;
 import org.thdl.util.ThdlOptions;
 import org.thdl.util.Trie;
 import org.thdl.tib.scanner.BasicTibetanTranscriptionConverter;
+import org.thdl.tib.scanner.Manipulate;

 /**
 * Interfaces between Extended Wylie and the TibetanMachineWeb fonts.
@ -221,7 +222,7 @@ public class TibetanMachineWeb implements THDLWylieConstants {
    /** comma-delimited list of supported punctuation and
        miscellaneous characters: */
    private static final String others
-            = "_, ,/,|,!,:,;,@,#,$,%,(,),H,M,&,@#,?,=,{,},*,~X,X"; // FIXME: not yet supporting all these...
+            = "_, ,/,|,!,:,;,@,#,$,%,(,),H,M,&,@#,?,=,{,},\u00A0,~X,X"; // FIXME: not yet supporting all these...

    /** comma-delimited list of supported vowels: */
    private static final String vowels
@ -760,7 +761,7 @@ public class TibetanMachineWeb implements THDLWylieConstants {
                                            + DELIMITER
                                            + " which means that no Wylie is assigned.  That isn't supported.");
                        if (hashOn) {
-                            tibHash.put(wylie, duffCodes);
+                            tibHash.put(Manipulate.unescape(wylie), duffCodes);
                        }
                        if (isTibetan) {
                            // Delete the dashes:
@ -783,7 +784,7 @@ public class TibetanMachineWeb implements THDLWylieConstants {
                                            + " has a line with wylie " + wylie + " but no TMW; that's not allowed");
                        int font = duffCodes[TMW].getFontNum();
                        int code = duffCodes[TMW].getCharNum()-32;
-                        toHashKey[font][code] = wylie;
+                        toHashKey[font][code] = Manipulate.unescape(wylie);
                    }
                }
            }
--- a/source/org/thdl/tib/text/tibwn.ini
+++ b/source/org/thdl/tib/text/tibwn.ini
@ -113,7 +113,7 @@ __TILDE__M`~242,1~~8,95~~~~~~~0F82
 // dzud.rtags.me.long.can:
 \u0F13~94,5~~9,92~~~~~~~0F13
 // hard tsheg:
-*~205,1~~1,108~~~~~~~0F0C
+\u00A0~205,1~~1,108~~~~~~~0F0C


 <?Input:Tibetan?>
--- a/source/org/thdl/tib/text/tshegbar/UnicodeCodepointToThdlWylie.java
+++ b/source/org/thdl/tib/text/tshegbar/UnicodeCodepointToThdlWylie.java
@ -85,7 +85,7 @@ public class UnicodeCodepointToThdlWylie {
        case '\u0F09': return "\\u0F09";
        case '\u0F0A': return "\\u0F0A";
        case '\u0F0B': return " ";
-        case '\u0F0C': return "*"; // DLC NOW: Jskad does not support this!
+        case '\u0F0C': return "\\u00A0"; // AMP: Non-break space. Does Jskad support this?
        case '\u0F0D': return "/";
        case '\u0F0E': return "//"; // DLC FIXME: this is kind of a hack-- the Unicode standard says the spacing for this construct is different than the spacing for "\u0F0D\u0F0D"
        case '\u0F0F': return ";";
--- a/source/org/thdl/tib/text/ttt/EWTSTshegBarScanner.java
+++ b/source/org/thdl/tib/text/ttt/EWTSTshegBarScanner.java
@ -115,7 +115,7 @@ class EWTSTshegBarScanner extends TTshegBarScanner {
                   || (sb.charAt(i) >= '\u0fcf' && sb.charAt(i) <= '\u0fd1')
                   || (THDLWylieConstants.SAUVASTIKA == sb.charAt(i))
                   || (THDLWylieConstants.SWASTIKA == sb.charAt(i))
-                   || (" /;|!:=_@#$%<>(){}*&\r\n\t\u0f36\u0f38\u0f89\u0f8a\u0f8b".indexOf(sb.charAt(i))
+                   || (" /;|!:=_@#$%<>(){}*&\r\n\t\u0f36\u0f38\u0f89\u0f8a\u0f8b\u00a0".indexOf(sb.charAt(i))
                       >= 0)) {
          al.add(new TString("EWTS", sb.substring(i, i+1),
                             TString.TIBETAN_PUNCTUATION));