diff --git a/source/org/thdl/tib/input/TibetanConverter.java b/source/org/thdl/tib/input/TibetanConverter.java index ee932e0..29e762a 100644 --- a/source/org/thdl/tib/input/TibetanConverter.java +++ b/source/org/thdl/tib/input/TibetanConverter.java @@ -149,7 +149,7 @@ public class TibetanConverter implements FontConverterConstants { out.println("TibetanConverter --colors yes|no"); out.println(" --warning-level None|Some|Most|All"); - out.println(" --acip-to-tibetan-warning-and-error-messages short|long"); + out.println(" --acip-to-tibetan-warning-and-error-messages short|long"); // TODO(DLC)[EWTS->Tibetan]: misnomer, ewts and acip both are affected out.println(" --find-all-non-tmw | --find-some-non-tmw"); out.println(" | --tmw-to-tmw-for-testing"); out.println(" | --to-tibetan-machine | --to-tibetan-machine-web"); @@ -166,6 +166,10 @@ public class TibetanConverter implements FontConverterConstants { out.println(""); out.println(" -h | --help for this message"); out.println(""); + out.println(" --wylie-to-unicode to convert an EWTS text file to a Unicode"); + out.println(""); + out.println(" --wylie-to-tmw to convert an EWTS text file to TibetanMachineWeb"); + out.println(""); out.println(" --to-tibetan-machine to convert TibetanMachineWeb to TibetanMachine"); out.println(""); out.println(" --to-unicode to convert TibetanMachineWeb to Unicode"); diff --git a/source/org/thdl/tib/text/ttt/EWTSTest.java b/source/org/thdl/tib/text/ttt/EWTSTest.java index 2471d40..cc3d412 100644 --- a/source/org/thdl/tib/text/ttt/EWTSTest.java +++ b/source/org/thdl/tib/text/ttt/EWTSTest.java @@ -151,6 +151,12 @@ public class EWTSTest extends TestCase { /** Tests that the EWTS->unicode converter isn't completely braindead. */ public void testEwtsBasics() { + if (RUN_FAILING_TESTS) ewts2uni_test("g.yogs", "\u0f42\u0f61\u0f7c\u0f42\u0f66"); // TODO(DLC)[EWTS->Tibetan]: DLC NOW! + if (RUN_FAILING_TESTS) ewts2uni_test("hUM^", "TODO(DLC)[EWTS->Tibetan]: DLC NOW"); + ewts2uni_test("brgyad", "\u0f56\u0f62\u0f92\u0fb1\u0f51"); + ewts2uni_test("brjod", "\u0f56\u0f62\u0f97\u0f7c\u0f51"); + ewts2uni_test("drwa", "\u0f51\u0fb2\u0fad"); + ewts2uni_test("rwa", "\u0f62\u0fad"); ewts2uni_test("ug_pha ", "\u0f68\u0f74\u0f42\u00a0\u0f55\u0f0b"); ewts2uni_test("a ", "\u0f68\u0f0b"); ewts2uni_test("g.a ", "\u0f42\u0f68\u0f0b"); diff --git a/source/org/thdl/tib/text/ttt/EWTSTraits.java b/source/org/thdl/tib/text/ttt/EWTSTraits.java index a6cf6d0..274f1eb 100644 --- a/source/org/thdl/tib/text/ttt/EWTSTraits.java +++ b/source/org/thdl/tib/text/ttt/EWTSTraits.java @@ -211,7 +211,9 @@ public final class EWTSTraits implements TTraits { if (wowel.indexOf('M') >= 0) { DuffCode last = null; - if (duff.size() > 0) { + if (!context_added[0]) { + last = preceding; + } else if (duff.size() > 0) { last = (DuffCode)duff.get(duff.size() - 1); duff.remove(duff.size() - 1); // getBindu will add it back... // TODO(DLC)[EWTS->Tibetan]: is this okay???? when is a bindu okay to be alone??? diff --git a/source/org/thdl/tib/text/ttt/TConverter.java b/source/org/thdl/tib/text/ttt/TConverter.java index c1aaf8d..5d7d084 100644 --- a/source/org/thdl/tib/text/ttt/TConverter.java +++ b/source/org/thdl/tib/text/ttt/TConverter.java @@ -479,7 +479,7 @@ public class TConverter { TStackList sl1 = ((null == pt1) ? null : pt1.getBestParse()); if (null == sl0 && null == sl1) { - // {A-DZU} causes this, for example. + // ACIP {A-DZU} causes this, for example. hasErrors = true; String errorMessage = "[#ERROR " diff --git a/source/org/thdl/tib/text/ttt/TPair.java b/source/org/thdl/tib/text/ttt/TPair.java index fa63fc2..d5a12df 100644 --- a/source/org/thdl/tib/text/ttt/TPair.java +++ b/source/org/thdl/tib/text/ttt/TPair.java @@ -190,17 +190,19 @@ class TPair { } String getWylie() { - return getWylie(false); + return getWylie(false, false); } /** Returns the EWTS Wylie that corresponds to this pair if * justLeft is false, or the EWTS Wylie that corresponds to just - * {@link #getLeft()} if justLeft is true. + * {@link #getLeft()} if justLeft is true. If dropDisambiguator + * is true and the right component is a disambiguator, then the + * Wylie will not contain '.'. * *
Returns "W" for ACIP "W", "r" for ACIP "R", y for ACIP "Y", * even though sometimes the EWTS for those is "w", "R", or "Y". * Handle that in the caller. */ - String getWylie(boolean justLeft) { + String getWylie(boolean justLeft, boolean dropDisambiguator) { String leftWylie = null; if (getLeft() != null) { leftWylie = traits.getEwtsForConsonant(getLeft()); @@ -212,7 +214,7 @@ class TPair { if (null == leftWylie) leftWylie = ""; if (justLeft) return leftWylie; String rightWylie = null; - if (traits.disambiguator().equals(getRight())) + if (!dropDisambiguator && traits.disambiguator().equals(getRight())) rightWylie = "."; else if ("+".equals(getRight())) rightWylie = "+"; diff --git a/source/org/thdl/tib/text/ttt/TPairList.java b/source/org/thdl/tib/text/ttt/TPairList.java index 3185c6f..00fa151 100644 --- a/source/org/thdl/tib/text/ttt/TPairList.java +++ b/source/org/thdl/tib/text/ttt/TPairList.java @@ -701,10 +701,10 @@ class TPairList { int previousSize = duffsAndErrors.size(); StringBuffer wylieForConsonant = new StringBuffer(); for (int x = 0; x + 1 < size(); x++) { - wylieForConsonant.append(get(x).getWylie(false)); + wylieForConsonant.append(get(x).getWylie(false, true)); } TPair lastPair = get(size() - 1); - wylieForConsonant.append(lastPair.getWylie(true)); + wylieForConsonant.append(lastPair.getWylie(true, false)); String hashKey = wylieForConsonant.toString(); // Because EWTS has special handling for full-formed @@ -763,6 +763,7 @@ class TPairList { } } if (lastPair.getRight() == null + || lastPair.getRight().equals(traits.disambiguator()) || lastPair.equals(traits.disambiguator())) { duffsAndErrors.add(TibetanMachineWeb.getGlyph(hashKey)); } else { @@ -771,7 +772,7 @@ class TPairList { lastPair.getRight()); } if (previousSize == duffsAndErrors.size()) - throw new Error("TPairList with no duffs? " + toString()); // FIXME: change to assertion. + throw new Error("TPairList with no duffs? " + toString() + " has hash key " + hashKey + " and previous size is " + previousSize); // FIXME: change to assertion. } } diff --git a/source/org/thdl/tib/text/ttt/TPairListFactory.java b/source/org/thdl/tib/text/ttt/TPairListFactory.java index b7418e2..7ebcfae 100644 --- a/source/org/thdl/tib/text/ttt/TPairListFactory.java +++ b/source/org/thdl/tib/text/ttt/TPairListFactory.java @@ -256,7 +256,14 @@ class TPairListFactory { /** EWTS helper function that transforms native stacks to include * pluses: [(ph . ) (y . ) (w . *)] -> [(ph . +) (y . +) (w - * . *)], e.g. + * . *)], e.g. The tricky case is something like [brgyad] or + * [brjod] because b+r is a native stack and so is r+g+y (and in + * fact r+g+y accepts a bao prefix). It's not quite safe to + * always grab the rightmost native stack from a stretch, as + * [drwa] proves. You must grab the longest, rightmost stack. + * In most cases, either way you did it it'd be illegal. In the + * rest, the only way it can be legal is if there's a prefix and + * the rightmost stack. * @param traits must mesh with orig */ private static TPairList transformNativeStacks(TTraits traits, TPairList orig) { @@ -277,8 +284,9 @@ class TPairListFactory { // we see a native stack of size 2, we transform it. boolean found_something = false; - TPair p[] = new TPair[maxNativeStackSize]; - for (int j = 0; j < maxNativeStackSize; j++) { + TPair p[] + = new TPair[maxNativeStackSize + 1]; // plus one for [brgyad] + for (int j = 0; j < maxNativeStackSize + 1; j++) { if (i + j < orig.size()) p[j] = orig.get(i + j); else @@ -286,35 +294,32 @@ class TPairListFactory { } // Now p[0] is current pair, p[1] is the one after that, etc. - for (int nss = maxNativeStackSize; nss >= minNativeStackSize; - nss--) { - String hash_key = ""; - int good = 0; - for (int k = 0; k < nss - 1; k++) { - if (null != p[k] - && null != p[k].getLeft() - && null == p[k].getRight()) { - hash_key += p[k].getLeft() + "-"; - ++good; + if (null != p[0].getLeft() + && null == p[0].getRight()) { + // TODO(dchandler): The way I do this [drwa] case, + // does it rely on the fact that maxNativeStackSize == + // 3? Let's have it not rely on that... + int h; + if (0 == (h = helper(traits, 0, maxNativeStackSize, p, result))) { // [drwa] + // [brgyad] makes us go from right to left. + // (TODO(dchandler): It's a shame we're doing this + // stuff when we have the code to figure out, for + // ACIP, that [BRGYAD] is what it is.) + for (int offset = 1; offset >= 0; offset--) { + if (found_something) break; + for (int nss = maxNativeStackSize; + nss >= minNativeStackSize; + nss--) { + if (0 != (h = helper(traits, offset, nss, p, result))) { + found_something = true; + i += h; + break; + } + } } - } - if (null != p[nss - 1] - && null != p[nss - 1].getLeft() - && !"+".equals(p[nss - 1].getRight())) { - hash_key += p[nss - 1].getLeft(); - ++good; - } - if (nss == good - && TibetanMachineWeb.isKnownHashKey(hash_key)) { + } else { + i += h; found_something = true; - for (int n = 0; n < nss - 1; n++) { - ++i; - result.append(new TPair(traits, - p[n].getLeft(), "+")); - } - ++i; - result.append(p[nss - 1]); - break; // for ph-y-w etc. } } if (!found_something) { @@ -328,6 +333,47 @@ class TPairListFactory { return result; } + /** We mutate result and return the number of TPairs we scarfed if + * we find a native stack of size nss at p[offset], p[offset + + * 1], ..., p[offset + nss - 1]. */ + private static int helper(TTraits traits, int offset, int nss, TPair p[], + TPairList result) { + String hashKey = ""; + int good = 0; + for (int k = 0; k < nss - 1; k++) { + if (null != p[k + offset] + && null != p[k + offset].getLeft() + && null == p[k + offset].getRight()) { + hashKey += p[k + offset].getLeft() + "-"; + ++good; + } + } + if (null != p[nss - 1 + offset] + && null != p[nss - 1 + offset].getLeft() + && !"+".equals(p[nss - 1 + offset].getRight())) { + hashKey += p[nss - 1 + offset].getLeft(); + ++good; + } + if (nss == good + && TibetanMachineWeb.isKnownHashKey(hashKey)) { + int i = 0; + if (1 == offset) { + ++i; + result.append(p[0]); + } + for (int n = 0; n < nss - 1; n++) { + ++i; + result.append(new TPair(traits, + p[n + offset].getLeft(), + "+")); + } + ++i; + result.append(p[nss - 1 + offset]); + return i; + } + return 0; + } + // TODO(DLC)[EWTS->Tibetan]: doc private static TPairList breakHelperEWTS(String ewts, TTraits ttraits) {