TMW->ACIP is much improved. V and W were confused, # and * were
confused; many glyphs that should have yielded errors were not. I've added a test case that transforms every TMW glyph save the one with no TM mapping to ACIP. I hand-checked that it was correct. ACIP->TMW is fixed for # and *. I never noticed it, but each needed an extra swoosh (U+0F05). Round-tripping would be good, as would testing real-world use of TMW->ACIP.
This commit is contained in:
parent
244a9d1370
commit
1bfd3772e6
10 changed files with 1110 additions and 85 deletions
|
@ -628,9 +628,16 @@ public class ACIPConverter {
|
|||
tdocLocation[0] += s.getText().length();
|
||||
continue; // FIXME: this means the unicode above doesn't go into the output if null != writer && null != tdoc?
|
||||
} else {
|
||||
String wy = ACIPRules.getWylieForACIPOther(s.getText());
|
||||
if (null == wy) throw new Error("No wylie for ACIP " + s.getText());
|
||||
duff = new Object[] { TibetanMachineWeb.getGlyph(wy) };
|
||||
if ("#".equals(s.getText())) { // hard-coded ACIP value
|
||||
duff = new Object[] {
|
||||
TibetanMachineWeb.getGlyph("@#"),
|
||||
TibetanMachineWeb.getGlyph("#")
|
||||
}; // hard-coded EWTS values
|
||||
} else {
|
||||
String wy = ACIPRules.getWylieForACIPOther(s.getText());
|
||||
if (null == wy) throw new Error("No wylie for ACIP " + s.getText());
|
||||
duff = new Object[] { TibetanMachineWeb.getGlyph(wy) };
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -157,6 +157,9 @@ public class ACIPRules {
|
|||
getWylieForACIPOther(null);
|
||||
getWylieForACIPVowel(null);
|
||||
String ans = (String)wylieToACIP.get(EWTS);
|
||||
boolean useCapitalW = false;
|
||||
if (EWTS.startsWith("w"))
|
||||
useCapitalW = true; // We want W+NA, not V+NA; we want WA, not VA.
|
||||
if (null == ans) {
|
||||
StringBuffer finalAns = new StringBuffer(EWTS.length());
|
||||
StringTokenizer sTok = new StringTokenizer(EWTS, "-+", true);
|
||||
|
@ -182,9 +185,14 @@ public class ACIPRules {
|
|||
if (null == part) return null;
|
||||
finalAns.append(part);
|
||||
}
|
||||
if (useCapitalW)
|
||||
finalAns.setCharAt(0, 'W');
|
||||
return finalAns.toString();
|
||||
}
|
||||
return ans;
|
||||
if (useCapitalW)
|
||||
return "W" + ans.substring(1);
|
||||
else
|
||||
return ans;
|
||||
}
|
||||
|
||||
/** Registers acip->wylie mappings in toWylie; registers
|
||||
|
@ -193,6 +201,12 @@ public class ACIPRules {
|
|||
toWylie.put(ACIP, EWTS);
|
||||
if (null == wylieToACIP) {
|
||||
wylieToACIP = new HashMap(75);
|
||||
|
||||
// We don't want to put "/" in toWylie:
|
||||
wylieToACIP.put("(", "/");
|
||||
wylieToACIP.put(")", "/");
|
||||
wylieToACIP.put("?", "\\");
|
||||
|
||||
wylieToACIP.put("_", " "); // oddball.
|
||||
wylieToACIP.put("o'i", "O'I"); // oddball for TMW9.61.
|
||||
}
|
||||
|
@ -307,14 +321,20 @@ public class ACIPRules {
|
|||
if (acipOther2wylie == null) {
|
||||
acipOther2wylie = new HashMap(20);
|
||||
|
||||
// don't use putMapping for this. We don't want TMW->ACIP
|
||||
// to produce "." for a U+0F0C because ACIP doesn't say
|
||||
// that "." means U+0F0C. It just seems to in practice
|
||||
// for ACIP Release IV texts.
|
||||
acipOther2wylie.put(".", "*");
|
||||
|
||||
putMapping(acipOther2wylie, "m", "M");
|
||||
putMapping(acipOther2wylie, ":", "H");
|
||||
putMapping(acipOther2wylie, ",", "/");
|
||||
putMapping(acipOther2wylie, " ", " ");
|
||||
putMapping(acipOther2wylie, ".", "*");
|
||||
putMapping(acipOther2wylie, "|", "|");
|
||||
putMapping(acipOther2wylie, ";", "|");
|
||||
putMapping(acipOther2wylie, "`", "!");
|
||||
putMapping(acipOther2wylie, ";", ";");
|
||||
putMapping(acipOther2wylie, "*", "@");
|
||||
putMapping(acipOther2wylie, "#", "@#");
|
||||
putMapping(acipOther2wylie, "*", "@#");
|
||||
// There is no glyph in TMW with the EWTS @##, so we don't do this: putMapping(acipOther2wylie, "#", "@##");
|
||||
putMapping(acipOther2wylie, "%", "~X");
|
||||
putMapping(acipOther2wylie, "o", "X");
|
||||
putMapping(acipOther2wylie, "&", "&");
|
||||
|
|
|
@ -359,6 +359,7 @@ class TParseTree {
|
|||
}
|
||||
}
|
||||
if (stackSize > 1 && tp.getLeft() != null && tp.getLeft().length() > 1) {
|
||||
// DLC FIXME: gives a false positive warning for Rsh
|
||||
hasAmbiguousConsonant = true;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue