TMW->ACIP is much improved. V and W were confused, # and * were

confused; many glyphs that should have yielded errors were not.

I've added a test case that transforms every TMW glyph save the one with
no TM mapping to ACIP.  I hand-checked that it was correct.

ACIP->TMW is fixed for # and *.  I never noticed it, but each needed an
extra swoosh (U+0F05).

Round-tripping would be good, as would testing real-world use of
TMW->ACIP.
This commit is contained in:
dchandler 2004-04-14 05:44:51 +00:00
parent 244a9d1370
commit 1bfd3772e6
10 changed files with 1110 additions and 85 deletions

View file

@ -628,9 +628,16 @@ public class ACIPConverter {
tdocLocation[0] += s.getText().length();
continue; // FIXME: this means the unicode above doesn't go into the output if null != writer && null != tdoc?
} else {
String wy = ACIPRules.getWylieForACIPOther(s.getText());
if (null == wy) throw new Error("No wylie for ACIP " + s.getText());
duff = new Object[] { TibetanMachineWeb.getGlyph(wy) };
if ("#".equals(s.getText())) { // hard-coded ACIP value
duff = new Object[] {
TibetanMachineWeb.getGlyph("@#"),
TibetanMachineWeb.getGlyph("#")
}; // hard-coded EWTS values
} else {
String wy = ACIPRules.getWylieForACIPOther(s.getText());
if (null == wy) throw new Error("No wylie for ACIP " + s.getText());
duff = new Object[] { TibetanMachineWeb.getGlyph(wy) };
}
}
}
}

View file

@ -157,6 +157,9 @@ public class ACIPRules {
getWylieForACIPOther(null);
getWylieForACIPVowel(null);
String ans = (String)wylieToACIP.get(EWTS);
boolean useCapitalW = false;
if (EWTS.startsWith("w"))
useCapitalW = true; // We want W+NA, not V+NA; we want WA, not VA.
if (null == ans) {
StringBuffer finalAns = new StringBuffer(EWTS.length());
StringTokenizer sTok = new StringTokenizer(EWTS, "-+", true);
@ -182,9 +185,14 @@ public class ACIPRules {
if (null == part) return null;
finalAns.append(part);
}
if (useCapitalW)
finalAns.setCharAt(0, 'W');
return finalAns.toString();
}
return ans;
if (useCapitalW)
return "W" + ans.substring(1);
else
return ans;
}
/** Registers acip->wylie mappings in toWylie; registers
@ -193,6 +201,12 @@ public class ACIPRules {
toWylie.put(ACIP, EWTS);
if (null == wylieToACIP) {
wylieToACIP = new HashMap(75);
// We don't want to put "/" in toWylie:
wylieToACIP.put("(", "/");
wylieToACIP.put(")", "/");
wylieToACIP.put("?", "\\");
wylieToACIP.put("_", " "); // oddball.
wylieToACIP.put("o'i", "O'I"); // oddball for TMW9.61.
}
@ -307,14 +321,20 @@ public class ACIPRules {
if (acipOther2wylie == null) {
acipOther2wylie = new HashMap(20);
// don't use putMapping for this. We don't want TMW->ACIP
// to produce "." for a U+0F0C because ACIP doesn't say
// that "." means U+0F0C. It just seems to in practice
// for ACIP Release IV texts.
acipOther2wylie.put(".", "*");
putMapping(acipOther2wylie, "m", "M");
putMapping(acipOther2wylie, ":", "H");
putMapping(acipOther2wylie, ",", "/");
putMapping(acipOther2wylie, " ", " ");
putMapping(acipOther2wylie, ".", "*");
putMapping(acipOther2wylie, "|", "|");
putMapping(acipOther2wylie, ";", "|");
putMapping(acipOther2wylie, "`", "!");
putMapping(acipOther2wylie, ";", ";");
putMapping(acipOther2wylie, "*", "@");
putMapping(acipOther2wylie, "#", "@#");
putMapping(acipOther2wylie, "*", "@#");
// There is no glyph in TMW with the EWTS @##, so we don't do this: putMapping(acipOther2wylie, "#", "@##");
putMapping(acipOther2wylie, "%", "~X");
putMapping(acipOther2wylie, "o", "X");
putMapping(acipOther2wylie, "&", "&");

View file

@ -359,6 +359,7 @@ class TParseTree {
}
}
if (stackSize > 1 && tp.getLeft() != null && tp.getLeft().length() > 1) {
// DLC FIXME: gives a false positive warning for Rsh
hasAmbiguousConsonant = true;
}
}