Fixed important EWTS->Tibetan conversion bugs. [g.yogs] (and maybe

[hUM^]) are not yet converting correctly.

I have not yet committed the end-to-end test that I'm manually doing
to find these problems.  It will be another document for
TMW_RTF_TO_THDL_WYLIETest.java.  Note that thdl.debug=true is
essential to access the GUI for the EWTS->* converters.
This commit is contained in:
dchandler 2005-07-06 07:46:21 +00:00
parent 0b3a636f63
commit 63ff0fb0c9
7 changed files with 101 additions and 40 deletions

View file

@ -149,7 +149,7 @@ public class TibetanConverter implements FontConverterConstants {
out.println("TibetanConverter --colors yes|no"); out.println("TibetanConverter --colors yes|no");
out.println(" --warning-level None|Some|Most|All"); out.println(" --warning-level None|Some|Most|All");
out.println(" --acip-to-tibetan-warning-and-error-messages short|long"); out.println(" --acip-to-tibetan-warning-and-error-messages short|long"); // TODO(DLC)[EWTS->Tibetan]: misnomer, ewts and acip both are affected
out.println(" --find-all-non-tmw | --find-some-non-tmw"); out.println(" --find-all-non-tmw | --find-some-non-tmw");
out.println(" | --tmw-to-tmw-for-testing"); out.println(" | --tmw-to-tmw-for-testing");
out.println(" | --to-tibetan-machine | --to-tibetan-machine-web"); out.println(" | --to-tibetan-machine | --to-tibetan-machine-web");
@ -166,6 +166,10 @@ public class TibetanConverter implements FontConverterConstants {
out.println(""); out.println("");
out.println(" -h | --help for this message"); out.println(" -h | --help for this message");
out.println(""); out.println("");
out.println(" --wylie-to-unicode to convert an EWTS text file to a Unicode");
out.println("");
out.println(" --wylie-to-tmw to convert an EWTS text file to TibetanMachineWeb");
out.println("");
out.println(" --to-tibetan-machine to convert TibetanMachineWeb to TibetanMachine"); out.println(" --to-tibetan-machine to convert TibetanMachineWeb to TibetanMachine");
out.println(""); out.println("");
out.println(" --to-unicode to convert TibetanMachineWeb to Unicode"); out.println(" --to-unicode to convert TibetanMachineWeb to Unicode");

View file

@ -151,6 +151,12 @@ public class EWTSTest extends TestCase {
/** Tests that the EWTS->unicode converter isn't completely /** Tests that the EWTS->unicode converter isn't completely
braindead. */ braindead. */
public void testEwtsBasics() { public void testEwtsBasics() {
if (RUN_FAILING_TESTS) ewts2uni_test("g.yogs", "\u0f42\u0f61\u0f7c\u0f42\u0f66"); // TODO(DLC)[EWTS->Tibetan]: DLC NOW!
if (RUN_FAILING_TESTS) ewts2uni_test("hUM^", "TODO(DLC)[EWTS->Tibetan]: DLC NOW");
ewts2uni_test("brgyad", "\u0f56\u0f62\u0f92\u0fb1\u0f51");
ewts2uni_test("brjod", "\u0f56\u0f62\u0f97\u0f7c\u0f51");
ewts2uni_test("drwa", "\u0f51\u0fb2\u0fad");
ewts2uni_test("rwa", "\u0f62\u0fad");
ewts2uni_test("ug_pha ", "\u0f68\u0f74\u0f42\u00a0\u0f55\u0f0b"); ewts2uni_test("ug_pha ", "\u0f68\u0f74\u0f42\u00a0\u0f55\u0f0b");
ewts2uni_test("a ", "\u0f68\u0f0b"); ewts2uni_test("a ", "\u0f68\u0f0b");
ewts2uni_test("g.a ", "\u0f42\u0f68\u0f0b"); ewts2uni_test("g.a ", "\u0f42\u0f68\u0f0b");

View file

@ -211,7 +211,9 @@ public final class EWTSTraits implements TTraits {
if (wowel.indexOf('M') >= 0) { if (wowel.indexOf('M') >= 0) {
DuffCode last = null; DuffCode last = null;
if (duff.size() > 0) { if (!context_added[0]) {
last = preceding;
} else if (duff.size() > 0) {
last = (DuffCode)duff.get(duff.size() - 1); last = (DuffCode)duff.get(duff.size() - 1);
duff.remove(duff.size() - 1); // getBindu will add it back... duff.remove(duff.size() - 1); // getBindu will add it back...
// TODO(DLC)[EWTS->Tibetan]: is this okay???? when is a bindu okay to be alone??? // TODO(DLC)[EWTS->Tibetan]: is this okay???? when is a bindu okay to be alone???

View file

@ -479,7 +479,7 @@ public class TConverter {
TStackList sl1 = ((null == pt1) TStackList sl1 = ((null == pt1)
? null : pt1.getBestParse()); ? null : pt1.getBestParse());
if (null == sl0 && null == sl1) { if (null == sl0 && null == sl1) {
// {A-DZU} causes this, for example. // ACIP {A-DZU} causes this, for example.
hasErrors = true; hasErrors = true;
String errorMessage = String errorMessage =
"[#ERROR " "[#ERROR "

View file

@ -190,17 +190,19 @@ class TPair {
} }
String getWylie() { String getWylie() {
return getWylie(false); return getWylie(false, false);
} }
/** Returns the EWTS Wylie that corresponds to this pair if /** Returns the EWTS Wylie that corresponds to this pair if
* justLeft is false, or the EWTS Wylie that corresponds to just * justLeft is false, or the EWTS Wylie that corresponds to just
* {@link #getLeft()} if justLeft is true. * {@link #getLeft()} if justLeft is true. If dropDisambiguator
* is true and the right component is a disambiguator, then the
* Wylie will not contain '.'.
* *
* <p>Returns "W" for ACIP "W", "r" for ACIP "R", y for ACIP "Y", * <p>Returns "W" for ACIP "W", "r" for ACIP "R", y for ACIP "Y",
* even though sometimes the EWTS for those is "w", "R", or "Y". * even though sometimes the EWTS for those is "w", "R", or "Y".
* Handle that in the caller. */ * Handle that in the caller. */
String getWylie(boolean justLeft) { String getWylie(boolean justLeft, boolean dropDisambiguator) {
String leftWylie = null; String leftWylie = null;
if (getLeft() != null) { if (getLeft() != null) {
leftWylie = traits.getEwtsForConsonant(getLeft()); leftWylie = traits.getEwtsForConsonant(getLeft());
@ -212,7 +214,7 @@ class TPair {
if (null == leftWylie) leftWylie = ""; if (null == leftWylie) leftWylie = "";
if (justLeft) return leftWylie; if (justLeft) return leftWylie;
String rightWylie = null; String rightWylie = null;
if (traits.disambiguator().equals(getRight())) if (!dropDisambiguator && traits.disambiguator().equals(getRight()))
rightWylie = "."; rightWylie = ".";
else if ("+".equals(getRight())) else if ("+".equals(getRight()))
rightWylie = "+"; rightWylie = "+";

View file

@ -701,10 +701,10 @@ class TPairList {
int previousSize = duffsAndErrors.size(); int previousSize = duffsAndErrors.size();
StringBuffer wylieForConsonant = new StringBuffer(); StringBuffer wylieForConsonant = new StringBuffer();
for (int x = 0; x + 1 < size(); x++) { for (int x = 0; x + 1 < size(); x++) {
wylieForConsonant.append(get(x).getWylie(false)); wylieForConsonant.append(get(x).getWylie(false, true));
} }
TPair lastPair = get(size() - 1); TPair lastPair = get(size() - 1);
wylieForConsonant.append(lastPair.getWylie(true)); wylieForConsonant.append(lastPair.getWylie(true, false));
String hashKey = wylieForConsonant.toString(); String hashKey = wylieForConsonant.toString();
// Because EWTS has special handling for full-formed // Because EWTS has special handling for full-formed
@ -763,6 +763,7 @@ class TPairList {
} }
} }
if (lastPair.getRight() == null if (lastPair.getRight() == null
|| lastPair.getRight().equals(traits.disambiguator())
|| lastPair.equals(traits.disambiguator())) { || lastPair.equals(traits.disambiguator())) {
duffsAndErrors.add(TibetanMachineWeb.getGlyph(hashKey)); duffsAndErrors.add(TibetanMachineWeb.getGlyph(hashKey));
} else { } else {
@ -771,7 +772,7 @@ class TPairList {
lastPair.getRight()); lastPair.getRight());
} }
if (previousSize == duffsAndErrors.size()) if (previousSize == duffsAndErrors.size())
throw new Error("TPairList with no duffs? " + toString()); // FIXME: change to assertion. throw new Error("TPairList with no duffs? " + toString() + " has hash key " + hashKey + " and previous size is " + previousSize); // FIXME: change to assertion.
} }
} }

View file

@ -256,7 +256,14 @@ class TPairListFactory {
/** EWTS helper function that transforms native stacks to include /** EWTS helper function that transforms native stacks to include
* pluses: [(ph . ) (y . ) (w . *)] -> [(ph . +) (y . +) (w * pluses: [(ph . ) (y . ) (w . *)] -> [(ph . +) (y . +) (w
* . *)], e.g. * . *)], e.g. The tricky case is something like [brgyad] or
* [brjod] because b+r is a native stack and so is r+g+y (and in
* fact r+g+y accepts a bao prefix). It's not quite safe to
* always grab the rightmost native stack from a stretch, as
* [drwa] proves. You must grab the longest, rightmost stack.
* In most cases, either way you did it it'd be illegal. In the
* rest, the only way it can be legal is if there's a prefix and
* the rightmost stack.
* @param traits must mesh with orig */ * @param traits must mesh with orig */
private static TPairList transformNativeStacks(TTraits traits, private static TPairList transformNativeStacks(TTraits traits,
TPairList orig) { TPairList orig) {
@ -277,8 +284,9 @@ class TPairListFactory {
// we see a native stack of size 2, we transform it. // we see a native stack of size 2, we transform it.
boolean found_something = false; boolean found_something = false;
TPair p[] = new TPair[maxNativeStackSize]; TPair p[]
for (int j = 0; j < maxNativeStackSize; j++) { = new TPair[maxNativeStackSize + 1]; // plus one for [brgyad]
for (int j = 0; j < maxNativeStackSize + 1; j++) {
if (i + j < orig.size()) if (i + j < orig.size())
p[j] = orig.get(i + j); p[j] = orig.get(i + j);
else else
@ -286,35 +294,32 @@ class TPairListFactory {
} }
// Now p[0] is current pair, p[1] is the one after that, etc. // Now p[0] is current pair, p[1] is the one after that, etc.
for (int nss = maxNativeStackSize; nss >= minNativeStackSize; if (null != p[0].getLeft()
nss--) { && null == p[0].getRight()) {
String hash_key = ""; // TODO(dchandler): The way I do this [drwa] case,
int good = 0; // does it rely on the fact that maxNativeStackSize ==
for (int k = 0; k < nss - 1; k++) { // 3? Let's have it not rely on that...
if (null != p[k] int h;
&& null != p[k].getLeft() if (0 == (h = helper(traits, 0, maxNativeStackSize, p, result))) { // [drwa]
&& null == p[k].getRight()) { // [brgyad] makes us go from right to left.
hash_key += p[k].getLeft() + "-"; // (TODO(dchandler): It's a shame we're doing this
++good; // stuff when we have the code to figure out, for
// ACIP, that [BRGYAD] is what it is.)
for (int offset = 1; offset >= 0; offset--) {
if (found_something) break;
for (int nss = maxNativeStackSize;
nss >= minNativeStackSize;
nss--) {
if (0 != (h = helper(traits, offset, nss, p, result))) {
found_something = true;
i += h;
break;
}
}
} }
} } else {
if (null != p[nss - 1] i += h;
&& null != p[nss - 1].getLeft()
&& !"+".equals(p[nss - 1].getRight())) {
hash_key += p[nss - 1].getLeft();
++good;
}
if (nss == good
&& TibetanMachineWeb.isKnownHashKey(hash_key)) {
found_something = true; found_something = true;
for (int n = 0; n < nss - 1; n++) {
++i;
result.append(new TPair(traits,
p[n].getLeft(), "+"));
}
++i;
result.append(p[nss - 1]);
break; // for ph-y-w etc.
} }
} }
if (!found_something) { if (!found_something) {
@ -328,6 +333,47 @@ class TPairListFactory {
return result; return result;
} }
/** We mutate result and return the number of TPairs we scarfed if
* we find a native stack of size nss at p[offset], p[offset +
* 1], ..., p[offset + nss - 1]. */
private static int helper(TTraits traits, int offset, int nss, TPair p[],
TPairList result) {
String hashKey = "";
int good = 0;
for (int k = 0; k < nss - 1; k++) {
if (null != p[k + offset]
&& null != p[k + offset].getLeft()
&& null == p[k + offset].getRight()) {
hashKey += p[k + offset].getLeft() + "-";
++good;
}
}
if (null != p[nss - 1 + offset]
&& null != p[nss - 1 + offset].getLeft()
&& !"+".equals(p[nss - 1 + offset].getRight())) {
hashKey += p[nss - 1 + offset].getLeft();
++good;
}
if (nss == good
&& TibetanMachineWeb.isKnownHashKey(hashKey)) {
int i = 0;
if (1 == offset) {
++i;
result.append(p[0]);
}
for (int n = 0; n < nss - 1; n++) {
++i;
result.append(new TPair(traits,
p[n + offset].getLeft(),
"+"));
}
++i;
result.append(p[nss - 1 + offset]);
return i;
}
return 0;
}
// TODO(DLC)[EWTS->Tibetan]: doc // TODO(DLC)[EWTS->Tibetan]: doc
private static TPairList breakHelperEWTS(String ewts, TTraits ttraits) { private static TPairList breakHelperEWTS(String ewts, TTraits ttraits) {