Fixed important EWTS->Tibetan conversion bugs. [g.yogs] (and maybe
[hUM^]) are not yet converting correctly. I have not yet committed the end-to-end test that I'm manually doing to find these problems. It will be another document for TMW_RTF_TO_THDL_WYLIETest.java. Note that thdl.debug=true is essential to access the GUI for the EWTS->* converters.
This commit is contained in:
parent
0b3a636f63
commit
63ff0fb0c9
7 changed files with 101 additions and 40 deletions
|
@ -149,7 +149,7 @@ public class TibetanConverter implements FontConverterConstants {
|
|||
|
||||
out.println("TibetanConverter --colors yes|no");
|
||||
out.println(" --warning-level None|Some|Most|All");
|
||||
out.println(" --acip-to-tibetan-warning-and-error-messages short|long");
|
||||
out.println(" --acip-to-tibetan-warning-and-error-messages short|long"); // TODO(DLC)[EWTS->Tibetan]: misnomer, ewts and acip both are affected
|
||||
out.println(" --find-all-non-tmw | --find-some-non-tmw");
|
||||
out.println(" | --tmw-to-tmw-for-testing");
|
||||
out.println(" | --to-tibetan-machine | --to-tibetan-machine-web");
|
||||
|
@ -166,6 +166,10 @@ public class TibetanConverter implements FontConverterConstants {
|
|||
out.println("");
|
||||
out.println(" -h | --help for this message");
|
||||
out.println("");
|
||||
out.println(" --wylie-to-unicode to convert an EWTS text file to a Unicode");
|
||||
out.println("");
|
||||
out.println(" --wylie-to-tmw to convert an EWTS text file to TibetanMachineWeb");
|
||||
out.println("");
|
||||
out.println(" --to-tibetan-machine to convert TibetanMachineWeb to TibetanMachine");
|
||||
out.println("");
|
||||
out.println(" --to-unicode to convert TibetanMachineWeb to Unicode");
|
||||
|
|
|
@ -151,6 +151,12 @@ public class EWTSTest extends TestCase {
|
|||
/** Tests that the EWTS->unicode converter isn't completely
|
||||
braindead. */
|
||||
public void testEwtsBasics() {
|
||||
if (RUN_FAILING_TESTS) ewts2uni_test("g.yogs", "\u0f42\u0f61\u0f7c\u0f42\u0f66"); // TODO(DLC)[EWTS->Tibetan]: DLC NOW!
|
||||
if (RUN_FAILING_TESTS) ewts2uni_test("hUM^", "TODO(DLC)[EWTS->Tibetan]: DLC NOW");
|
||||
ewts2uni_test("brgyad", "\u0f56\u0f62\u0f92\u0fb1\u0f51");
|
||||
ewts2uni_test("brjod", "\u0f56\u0f62\u0f97\u0f7c\u0f51");
|
||||
ewts2uni_test("drwa", "\u0f51\u0fb2\u0fad");
|
||||
ewts2uni_test("rwa", "\u0f62\u0fad");
|
||||
ewts2uni_test("ug_pha ", "\u0f68\u0f74\u0f42\u00a0\u0f55\u0f0b");
|
||||
ewts2uni_test("a ", "\u0f68\u0f0b");
|
||||
ewts2uni_test("g.a ", "\u0f42\u0f68\u0f0b");
|
||||
|
|
|
@ -211,7 +211,9 @@ public final class EWTSTraits implements TTraits {
|
|||
|
||||
if (wowel.indexOf('M') >= 0) {
|
||||
DuffCode last = null;
|
||||
if (duff.size() > 0) {
|
||||
if (!context_added[0]) {
|
||||
last = preceding;
|
||||
} else if (duff.size() > 0) {
|
||||
last = (DuffCode)duff.get(duff.size() - 1);
|
||||
duff.remove(duff.size() - 1); // getBindu will add it back...
|
||||
// TODO(DLC)[EWTS->Tibetan]: is this okay???? when is a bindu okay to be alone???
|
||||
|
|
|
@ -479,7 +479,7 @@ public class TConverter {
|
|||
TStackList sl1 = ((null == pt1)
|
||||
? null : pt1.getBestParse());
|
||||
if (null == sl0 && null == sl1) {
|
||||
// {A-DZU} causes this, for example.
|
||||
// ACIP {A-DZU} causes this, for example.
|
||||
hasErrors = true;
|
||||
String errorMessage =
|
||||
"[#ERROR "
|
||||
|
|
|
@ -190,17 +190,19 @@ class TPair {
|
|||
}
|
||||
|
||||
String getWylie() {
|
||||
return getWylie(false);
|
||||
return getWylie(false, false);
|
||||
}
|
||||
|
||||
/** Returns the EWTS Wylie that corresponds to this pair if
|
||||
* justLeft is false, or the EWTS Wylie that corresponds to just
|
||||
* {@link #getLeft()} if justLeft is true.
|
||||
* {@link #getLeft()} if justLeft is true. If dropDisambiguator
|
||||
* is true and the right component is a disambiguator, then the
|
||||
* Wylie will not contain '.'.
|
||||
*
|
||||
* <p>Returns "W" for ACIP "W", "r" for ACIP "R", y for ACIP "Y",
|
||||
* even though sometimes the EWTS for those is "w", "R", or "Y".
|
||||
* Handle that in the caller. */
|
||||
String getWylie(boolean justLeft) {
|
||||
String getWylie(boolean justLeft, boolean dropDisambiguator) {
|
||||
String leftWylie = null;
|
||||
if (getLeft() != null) {
|
||||
leftWylie = traits.getEwtsForConsonant(getLeft());
|
||||
|
@ -212,7 +214,7 @@ class TPair {
|
|||
if (null == leftWylie) leftWylie = "";
|
||||
if (justLeft) return leftWylie;
|
||||
String rightWylie = null;
|
||||
if (traits.disambiguator().equals(getRight()))
|
||||
if (!dropDisambiguator && traits.disambiguator().equals(getRight()))
|
||||
rightWylie = ".";
|
||||
else if ("+".equals(getRight()))
|
||||
rightWylie = "+";
|
||||
|
|
|
@ -701,10 +701,10 @@ class TPairList {
|
|||
int previousSize = duffsAndErrors.size();
|
||||
StringBuffer wylieForConsonant = new StringBuffer();
|
||||
for (int x = 0; x + 1 < size(); x++) {
|
||||
wylieForConsonant.append(get(x).getWylie(false));
|
||||
wylieForConsonant.append(get(x).getWylie(false, true));
|
||||
}
|
||||
TPair lastPair = get(size() - 1);
|
||||
wylieForConsonant.append(lastPair.getWylie(true));
|
||||
wylieForConsonant.append(lastPair.getWylie(true, false));
|
||||
String hashKey = wylieForConsonant.toString();
|
||||
|
||||
// Because EWTS has special handling for full-formed
|
||||
|
@ -763,6 +763,7 @@ class TPairList {
|
|||
}
|
||||
}
|
||||
if (lastPair.getRight() == null
|
||||
|| lastPair.getRight().equals(traits.disambiguator())
|
||||
|| lastPair.equals(traits.disambiguator())) {
|
||||
duffsAndErrors.add(TibetanMachineWeb.getGlyph(hashKey));
|
||||
} else {
|
||||
|
@ -771,7 +772,7 @@ class TPairList {
|
|||
lastPair.getRight());
|
||||
}
|
||||
if (previousSize == duffsAndErrors.size())
|
||||
throw new Error("TPairList with no duffs? " + toString()); // FIXME: change to assertion.
|
||||
throw new Error("TPairList with no duffs? " + toString() + " has hash key " + hashKey + " and previous size is " + previousSize); // FIXME: change to assertion.
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -256,7 +256,14 @@ class TPairListFactory {
|
|||
|
||||
/** EWTS helper function that transforms native stacks to include
|
||||
* pluses: [(ph . ) (y . ) (w . *)] -> [(ph . +) (y . +) (w
|
||||
* . *)], e.g.
|
||||
* . *)], e.g. The tricky case is something like [brgyad] or
|
||||
* [brjod] because b+r is a native stack and so is r+g+y (and in
|
||||
* fact r+g+y accepts a bao prefix). It's not quite safe to
|
||||
* always grab the rightmost native stack from a stretch, as
|
||||
* [drwa] proves. You must grab the longest, rightmost stack.
|
||||
* In most cases, either way you did it it'd be illegal. In the
|
||||
* rest, the only way it can be legal is if there's a prefix and
|
||||
* the rightmost stack.
|
||||
* @param traits must mesh with orig */
|
||||
private static TPairList transformNativeStacks(TTraits traits,
|
||||
TPairList orig) {
|
||||
|
@ -277,8 +284,9 @@ class TPairListFactory {
|
|||
// we see a native stack of size 2, we transform it.
|
||||
|
||||
boolean found_something = false;
|
||||
TPair p[] = new TPair[maxNativeStackSize];
|
||||
for (int j = 0; j < maxNativeStackSize; j++) {
|
||||
TPair p[]
|
||||
= new TPair[maxNativeStackSize + 1]; // plus one for [brgyad]
|
||||
for (int j = 0; j < maxNativeStackSize + 1; j++) {
|
||||
if (i + j < orig.size())
|
||||
p[j] = orig.get(i + j);
|
||||
else
|
||||
|
@ -286,35 +294,32 @@ class TPairListFactory {
|
|||
}
|
||||
// Now p[0] is current pair, p[1] is the one after that, etc.
|
||||
|
||||
for (int nss = maxNativeStackSize; nss >= minNativeStackSize;
|
||||
nss--) {
|
||||
String hash_key = "";
|
||||
int good = 0;
|
||||
for (int k = 0; k < nss - 1; k++) {
|
||||
if (null != p[k]
|
||||
&& null != p[k].getLeft()
|
||||
&& null == p[k].getRight()) {
|
||||
hash_key += p[k].getLeft() + "-";
|
||||
++good;
|
||||
if (null != p[0].getLeft()
|
||||
&& null == p[0].getRight()) {
|
||||
// TODO(dchandler): The way I do this [drwa] case,
|
||||
// does it rely on the fact that maxNativeStackSize ==
|
||||
// 3? Let's have it not rely on that...
|
||||
int h;
|
||||
if (0 == (h = helper(traits, 0, maxNativeStackSize, p, result))) { // [drwa]
|
||||
// [brgyad] makes us go from right to left.
|
||||
// (TODO(dchandler): It's a shame we're doing this
|
||||
// stuff when we have the code to figure out, for
|
||||
// ACIP, that [BRGYAD] is what it is.)
|
||||
for (int offset = 1; offset >= 0; offset--) {
|
||||
if (found_something) break;
|
||||
for (int nss = maxNativeStackSize;
|
||||
nss >= minNativeStackSize;
|
||||
nss--) {
|
||||
if (0 != (h = helper(traits, offset, nss, p, result))) {
|
||||
found_something = true;
|
||||
i += h;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (null != p[nss - 1]
|
||||
&& null != p[nss - 1].getLeft()
|
||||
&& !"+".equals(p[nss - 1].getRight())) {
|
||||
hash_key += p[nss - 1].getLeft();
|
||||
++good;
|
||||
}
|
||||
if (nss == good
|
||||
&& TibetanMachineWeb.isKnownHashKey(hash_key)) {
|
||||
} else {
|
||||
i += h;
|
||||
found_something = true;
|
||||
for (int n = 0; n < nss - 1; n++) {
|
||||
++i;
|
||||
result.append(new TPair(traits,
|
||||
p[n].getLeft(), "+"));
|
||||
}
|
||||
++i;
|
||||
result.append(p[nss - 1]);
|
||||
break; // for ph-y-w etc.
|
||||
}
|
||||
}
|
||||
if (!found_something) {
|
||||
|
@ -328,6 +333,47 @@ class TPairListFactory {
|
|||
return result;
|
||||
}
|
||||
|
||||
/** We mutate result and return the number of TPairs we scarfed if
|
||||
* we find a native stack of size nss at p[offset], p[offset +
|
||||
* 1], ..., p[offset + nss - 1]. */
|
||||
private static int helper(TTraits traits, int offset, int nss, TPair p[],
|
||||
TPairList result) {
|
||||
String hashKey = "";
|
||||
int good = 0;
|
||||
for (int k = 0; k < nss - 1; k++) {
|
||||
if (null != p[k + offset]
|
||||
&& null != p[k + offset].getLeft()
|
||||
&& null == p[k + offset].getRight()) {
|
||||
hashKey += p[k + offset].getLeft() + "-";
|
||||
++good;
|
||||
}
|
||||
}
|
||||
if (null != p[nss - 1 + offset]
|
||||
&& null != p[nss - 1 + offset].getLeft()
|
||||
&& !"+".equals(p[nss - 1 + offset].getRight())) {
|
||||
hashKey += p[nss - 1 + offset].getLeft();
|
||||
++good;
|
||||
}
|
||||
if (nss == good
|
||||
&& TibetanMachineWeb.isKnownHashKey(hashKey)) {
|
||||
int i = 0;
|
||||
if (1 == offset) {
|
||||
++i;
|
||||
result.append(p[0]);
|
||||
}
|
||||
for (int n = 0; n < nss - 1; n++) {
|
||||
++i;
|
||||
result.append(new TPair(traits,
|
||||
p[n + offset].getLeft(),
|
||||
"+"));
|
||||
}
|
||||
++i;
|
||||
result.append(p[nss - 1 + offset]);
|
||||
return i;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// TODO(DLC)[EWTS->Tibetan]: doc
|
||||
private static TPairList breakHelperEWTS(String ewts, TTraits ttraits) {
|
||||
|
||||
|
|
Loading…
Reference in a new issue