Fixed important EWTS->Tibetan conversion bugs. [g.yogs] (and maybe
[hUM^]) are not yet converting correctly. I have not yet committed the end-to-end test that I'm manually doing to find these problems. It will be another document for TMW_RTF_TO_THDL_WYLIETest.java. Note that thdl.debug=true is essential to access the GUI for the EWTS->* converters.
This commit is contained in:
parent
0b3a636f63
commit
63ff0fb0c9
7 changed files with 101 additions and 40 deletions
|
@ -149,7 +149,7 @@ public class TibetanConverter implements FontConverterConstants {
|
||||||
|
|
||||||
out.println("TibetanConverter --colors yes|no");
|
out.println("TibetanConverter --colors yes|no");
|
||||||
out.println(" --warning-level None|Some|Most|All");
|
out.println(" --warning-level None|Some|Most|All");
|
||||||
out.println(" --acip-to-tibetan-warning-and-error-messages short|long");
|
out.println(" --acip-to-tibetan-warning-and-error-messages short|long"); // TODO(DLC)[EWTS->Tibetan]: misnomer, ewts and acip both are affected
|
||||||
out.println(" --find-all-non-tmw | --find-some-non-tmw");
|
out.println(" --find-all-non-tmw | --find-some-non-tmw");
|
||||||
out.println(" | --tmw-to-tmw-for-testing");
|
out.println(" | --tmw-to-tmw-for-testing");
|
||||||
out.println(" | --to-tibetan-machine | --to-tibetan-machine-web");
|
out.println(" | --to-tibetan-machine | --to-tibetan-machine-web");
|
||||||
|
@ -166,6 +166,10 @@ public class TibetanConverter implements FontConverterConstants {
|
||||||
out.println("");
|
out.println("");
|
||||||
out.println(" -h | --help for this message");
|
out.println(" -h | --help for this message");
|
||||||
out.println("");
|
out.println("");
|
||||||
|
out.println(" --wylie-to-unicode to convert an EWTS text file to a Unicode");
|
||||||
|
out.println("");
|
||||||
|
out.println(" --wylie-to-tmw to convert an EWTS text file to TibetanMachineWeb");
|
||||||
|
out.println("");
|
||||||
out.println(" --to-tibetan-machine to convert TibetanMachineWeb to TibetanMachine");
|
out.println(" --to-tibetan-machine to convert TibetanMachineWeb to TibetanMachine");
|
||||||
out.println("");
|
out.println("");
|
||||||
out.println(" --to-unicode to convert TibetanMachineWeb to Unicode");
|
out.println(" --to-unicode to convert TibetanMachineWeb to Unicode");
|
||||||
|
|
|
@ -151,6 +151,12 @@ public class EWTSTest extends TestCase {
|
||||||
/** Tests that the EWTS->unicode converter isn't completely
|
/** Tests that the EWTS->unicode converter isn't completely
|
||||||
braindead. */
|
braindead. */
|
||||||
public void testEwtsBasics() {
|
public void testEwtsBasics() {
|
||||||
|
if (RUN_FAILING_TESTS) ewts2uni_test("g.yogs", "\u0f42\u0f61\u0f7c\u0f42\u0f66"); // TODO(DLC)[EWTS->Tibetan]: DLC NOW!
|
||||||
|
if (RUN_FAILING_TESTS) ewts2uni_test("hUM^", "TODO(DLC)[EWTS->Tibetan]: DLC NOW");
|
||||||
|
ewts2uni_test("brgyad", "\u0f56\u0f62\u0f92\u0fb1\u0f51");
|
||||||
|
ewts2uni_test("brjod", "\u0f56\u0f62\u0f97\u0f7c\u0f51");
|
||||||
|
ewts2uni_test("drwa", "\u0f51\u0fb2\u0fad");
|
||||||
|
ewts2uni_test("rwa", "\u0f62\u0fad");
|
||||||
ewts2uni_test("ug_pha ", "\u0f68\u0f74\u0f42\u00a0\u0f55\u0f0b");
|
ewts2uni_test("ug_pha ", "\u0f68\u0f74\u0f42\u00a0\u0f55\u0f0b");
|
||||||
ewts2uni_test("a ", "\u0f68\u0f0b");
|
ewts2uni_test("a ", "\u0f68\u0f0b");
|
||||||
ewts2uni_test("g.a ", "\u0f42\u0f68\u0f0b");
|
ewts2uni_test("g.a ", "\u0f42\u0f68\u0f0b");
|
||||||
|
|
|
@ -211,7 +211,9 @@ public final class EWTSTraits implements TTraits {
|
||||||
|
|
||||||
if (wowel.indexOf('M') >= 0) {
|
if (wowel.indexOf('M') >= 0) {
|
||||||
DuffCode last = null;
|
DuffCode last = null;
|
||||||
if (duff.size() > 0) {
|
if (!context_added[0]) {
|
||||||
|
last = preceding;
|
||||||
|
} else if (duff.size() > 0) {
|
||||||
last = (DuffCode)duff.get(duff.size() - 1);
|
last = (DuffCode)duff.get(duff.size() - 1);
|
||||||
duff.remove(duff.size() - 1); // getBindu will add it back...
|
duff.remove(duff.size() - 1); // getBindu will add it back...
|
||||||
// TODO(DLC)[EWTS->Tibetan]: is this okay???? when is a bindu okay to be alone???
|
// TODO(DLC)[EWTS->Tibetan]: is this okay???? when is a bindu okay to be alone???
|
||||||
|
|
|
@ -479,7 +479,7 @@ public class TConverter {
|
||||||
TStackList sl1 = ((null == pt1)
|
TStackList sl1 = ((null == pt1)
|
||||||
? null : pt1.getBestParse());
|
? null : pt1.getBestParse());
|
||||||
if (null == sl0 && null == sl1) {
|
if (null == sl0 && null == sl1) {
|
||||||
// {A-DZU} causes this, for example.
|
// ACIP {A-DZU} causes this, for example.
|
||||||
hasErrors = true;
|
hasErrors = true;
|
||||||
String errorMessage =
|
String errorMessage =
|
||||||
"[#ERROR "
|
"[#ERROR "
|
||||||
|
|
|
@ -190,17 +190,19 @@ class TPair {
|
||||||
}
|
}
|
||||||
|
|
||||||
String getWylie() {
|
String getWylie() {
|
||||||
return getWylie(false);
|
return getWylie(false, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns the EWTS Wylie that corresponds to this pair if
|
/** Returns the EWTS Wylie that corresponds to this pair if
|
||||||
* justLeft is false, or the EWTS Wylie that corresponds to just
|
* justLeft is false, or the EWTS Wylie that corresponds to just
|
||||||
* {@link #getLeft()} if justLeft is true.
|
* {@link #getLeft()} if justLeft is true. If dropDisambiguator
|
||||||
|
* is true and the right component is a disambiguator, then the
|
||||||
|
* Wylie will not contain '.'.
|
||||||
*
|
*
|
||||||
* <p>Returns "W" for ACIP "W", "r" for ACIP "R", y for ACIP "Y",
|
* <p>Returns "W" for ACIP "W", "r" for ACIP "R", y for ACIP "Y",
|
||||||
* even though sometimes the EWTS for those is "w", "R", or "Y".
|
* even though sometimes the EWTS for those is "w", "R", or "Y".
|
||||||
* Handle that in the caller. */
|
* Handle that in the caller. */
|
||||||
String getWylie(boolean justLeft) {
|
String getWylie(boolean justLeft, boolean dropDisambiguator) {
|
||||||
String leftWylie = null;
|
String leftWylie = null;
|
||||||
if (getLeft() != null) {
|
if (getLeft() != null) {
|
||||||
leftWylie = traits.getEwtsForConsonant(getLeft());
|
leftWylie = traits.getEwtsForConsonant(getLeft());
|
||||||
|
@ -212,7 +214,7 @@ class TPair {
|
||||||
if (null == leftWylie) leftWylie = "";
|
if (null == leftWylie) leftWylie = "";
|
||||||
if (justLeft) return leftWylie;
|
if (justLeft) return leftWylie;
|
||||||
String rightWylie = null;
|
String rightWylie = null;
|
||||||
if (traits.disambiguator().equals(getRight()))
|
if (!dropDisambiguator && traits.disambiguator().equals(getRight()))
|
||||||
rightWylie = ".";
|
rightWylie = ".";
|
||||||
else if ("+".equals(getRight()))
|
else if ("+".equals(getRight()))
|
||||||
rightWylie = "+";
|
rightWylie = "+";
|
||||||
|
|
|
@ -701,10 +701,10 @@ class TPairList {
|
||||||
int previousSize = duffsAndErrors.size();
|
int previousSize = duffsAndErrors.size();
|
||||||
StringBuffer wylieForConsonant = new StringBuffer();
|
StringBuffer wylieForConsonant = new StringBuffer();
|
||||||
for (int x = 0; x + 1 < size(); x++) {
|
for (int x = 0; x + 1 < size(); x++) {
|
||||||
wylieForConsonant.append(get(x).getWylie(false));
|
wylieForConsonant.append(get(x).getWylie(false, true));
|
||||||
}
|
}
|
||||||
TPair lastPair = get(size() - 1);
|
TPair lastPair = get(size() - 1);
|
||||||
wylieForConsonant.append(lastPair.getWylie(true));
|
wylieForConsonant.append(lastPair.getWylie(true, false));
|
||||||
String hashKey = wylieForConsonant.toString();
|
String hashKey = wylieForConsonant.toString();
|
||||||
|
|
||||||
// Because EWTS has special handling for full-formed
|
// Because EWTS has special handling for full-formed
|
||||||
|
@ -763,6 +763,7 @@ class TPairList {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (lastPair.getRight() == null
|
if (lastPair.getRight() == null
|
||||||
|
|| lastPair.getRight().equals(traits.disambiguator())
|
||||||
|| lastPair.equals(traits.disambiguator())) {
|
|| lastPair.equals(traits.disambiguator())) {
|
||||||
duffsAndErrors.add(TibetanMachineWeb.getGlyph(hashKey));
|
duffsAndErrors.add(TibetanMachineWeb.getGlyph(hashKey));
|
||||||
} else {
|
} else {
|
||||||
|
@ -771,7 +772,7 @@ class TPairList {
|
||||||
lastPair.getRight());
|
lastPair.getRight());
|
||||||
}
|
}
|
||||||
if (previousSize == duffsAndErrors.size())
|
if (previousSize == duffsAndErrors.size())
|
||||||
throw new Error("TPairList with no duffs? " + toString()); // FIXME: change to assertion.
|
throw new Error("TPairList with no duffs? " + toString() + " has hash key " + hashKey + " and previous size is " + previousSize); // FIXME: change to assertion.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -256,7 +256,14 @@ class TPairListFactory {
|
||||||
|
|
||||||
/** EWTS helper function that transforms native stacks to include
|
/** EWTS helper function that transforms native stacks to include
|
||||||
* pluses: [(ph . ) (y . ) (w . *)] -> [(ph . +) (y . +) (w
|
* pluses: [(ph . ) (y . ) (w . *)] -> [(ph . +) (y . +) (w
|
||||||
* . *)], e.g.
|
* . *)], e.g. The tricky case is something like [brgyad] or
|
||||||
|
* [brjod] because b+r is a native stack and so is r+g+y (and in
|
||||||
|
* fact r+g+y accepts a bao prefix). It's not quite safe to
|
||||||
|
* always grab the rightmost native stack from a stretch, as
|
||||||
|
* [drwa] proves. You must grab the longest, rightmost stack.
|
||||||
|
* In most cases, either way you did it it'd be illegal. In the
|
||||||
|
* rest, the only way it can be legal is if there's a prefix and
|
||||||
|
* the rightmost stack.
|
||||||
* @param traits must mesh with orig */
|
* @param traits must mesh with orig */
|
||||||
private static TPairList transformNativeStacks(TTraits traits,
|
private static TPairList transformNativeStacks(TTraits traits,
|
||||||
TPairList orig) {
|
TPairList orig) {
|
||||||
|
@ -277,8 +284,9 @@ class TPairListFactory {
|
||||||
// we see a native stack of size 2, we transform it.
|
// we see a native stack of size 2, we transform it.
|
||||||
|
|
||||||
boolean found_something = false;
|
boolean found_something = false;
|
||||||
TPair p[] = new TPair[maxNativeStackSize];
|
TPair p[]
|
||||||
for (int j = 0; j < maxNativeStackSize; j++) {
|
= new TPair[maxNativeStackSize + 1]; // plus one for [brgyad]
|
||||||
|
for (int j = 0; j < maxNativeStackSize + 1; j++) {
|
||||||
if (i + j < orig.size())
|
if (i + j < orig.size())
|
||||||
p[j] = orig.get(i + j);
|
p[j] = orig.get(i + j);
|
||||||
else
|
else
|
||||||
|
@ -286,35 +294,32 @@ class TPairListFactory {
|
||||||
}
|
}
|
||||||
// Now p[0] is current pair, p[1] is the one after that, etc.
|
// Now p[0] is current pair, p[1] is the one after that, etc.
|
||||||
|
|
||||||
for (int nss = maxNativeStackSize; nss >= minNativeStackSize;
|
if (null != p[0].getLeft()
|
||||||
|
&& null == p[0].getRight()) {
|
||||||
|
// TODO(dchandler): The way I do this [drwa] case,
|
||||||
|
// does it rely on the fact that maxNativeStackSize ==
|
||||||
|
// 3? Let's have it not rely on that...
|
||||||
|
int h;
|
||||||
|
if (0 == (h = helper(traits, 0, maxNativeStackSize, p, result))) { // [drwa]
|
||||||
|
// [brgyad] makes us go from right to left.
|
||||||
|
// (TODO(dchandler): It's a shame we're doing this
|
||||||
|
// stuff when we have the code to figure out, for
|
||||||
|
// ACIP, that [BRGYAD] is what it is.)
|
||||||
|
for (int offset = 1; offset >= 0; offset--) {
|
||||||
|
if (found_something) break;
|
||||||
|
for (int nss = maxNativeStackSize;
|
||||||
|
nss >= minNativeStackSize;
|
||||||
nss--) {
|
nss--) {
|
||||||
String hash_key = "";
|
if (0 != (h = helper(traits, offset, nss, p, result))) {
|
||||||
int good = 0;
|
|
||||||
for (int k = 0; k < nss - 1; k++) {
|
|
||||||
if (null != p[k]
|
|
||||||
&& null != p[k].getLeft()
|
|
||||||
&& null == p[k].getRight()) {
|
|
||||||
hash_key += p[k].getLeft() + "-";
|
|
||||||
++good;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (null != p[nss - 1]
|
|
||||||
&& null != p[nss - 1].getLeft()
|
|
||||||
&& !"+".equals(p[nss - 1].getRight())) {
|
|
||||||
hash_key += p[nss - 1].getLeft();
|
|
||||||
++good;
|
|
||||||
}
|
|
||||||
if (nss == good
|
|
||||||
&& TibetanMachineWeb.isKnownHashKey(hash_key)) {
|
|
||||||
found_something = true;
|
found_something = true;
|
||||||
for (int n = 0; n < nss - 1; n++) {
|
i += h;
|
||||||
++i;
|
break;
|
||||||
result.append(new TPair(traits,
|
|
||||||
p[n].getLeft(), "+"));
|
|
||||||
}
|
}
|
||||||
++i;
|
}
|
||||||
result.append(p[nss - 1]);
|
}
|
||||||
break; // for ph-y-w etc.
|
} else {
|
||||||
|
i += h;
|
||||||
|
found_something = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!found_something) {
|
if (!found_something) {
|
||||||
|
@ -328,6 +333,47 @@ class TPairListFactory {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** We mutate result and return the number of TPairs we scarfed if
|
||||||
|
* we find a native stack of size nss at p[offset], p[offset +
|
||||||
|
* 1], ..., p[offset + nss - 1]. */
|
||||||
|
private static int helper(TTraits traits, int offset, int nss, TPair p[],
|
||||||
|
TPairList result) {
|
||||||
|
String hashKey = "";
|
||||||
|
int good = 0;
|
||||||
|
for (int k = 0; k < nss - 1; k++) {
|
||||||
|
if (null != p[k + offset]
|
||||||
|
&& null != p[k + offset].getLeft()
|
||||||
|
&& null == p[k + offset].getRight()) {
|
||||||
|
hashKey += p[k + offset].getLeft() + "-";
|
||||||
|
++good;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (null != p[nss - 1 + offset]
|
||||||
|
&& null != p[nss - 1 + offset].getLeft()
|
||||||
|
&& !"+".equals(p[nss - 1 + offset].getRight())) {
|
||||||
|
hashKey += p[nss - 1 + offset].getLeft();
|
||||||
|
++good;
|
||||||
|
}
|
||||||
|
if (nss == good
|
||||||
|
&& TibetanMachineWeb.isKnownHashKey(hashKey)) {
|
||||||
|
int i = 0;
|
||||||
|
if (1 == offset) {
|
||||||
|
++i;
|
||||||
|
result.append(p[0]);
|
||||||
|
}
|
||||||
|
for (int n = 0; n < nss - 1; n++) {
|
||||||
|
++i;
|
||||||
|
result.append(new TPair(traits,
|
||||||
|
p[n + offset].getLeft(),
|
||||||
|
"+"));
|
||||||
|
}
|
||||||
|
++i;
|
||||||
|
result.append(p[nss - 1 + offset]);
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
// TODO(DLC)[EWTS->Tibetan]: doc
|
// TODO(DLC)[EWTS->Tibetan]: doc
|
||||||
private static TPairList breakHelperEWTS(String ewts, TTraits ttraits) {
|
private static TPairList breakHelperEWTS(String ewts, TTraits ttraits) {
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue