Bulletproofed EWTS->Tibetan against nasty pseudo-EWTS like [RAM].

Renamed recoverACIP methods.
This commit is contained in:
dchandler 2005-07-07 02:54:36 +00:00
parent 982350371d
commit cddbbae9a1
7 changed files with 59 additions and 39 deletions

View file

@ -633,7 +633,7 @@ public class TibTextUtils implements THDLWylieConstants {
if (vowel.equals(WYLIE_aVOWEL) if (vowel.equals(WYLIE_aVOWEL)
|| TibetanMachineWeb.isTopVowel(context_2)) { || TibetanMachineWeb.isTopVowel(context_2)) {
if (TibetanMachineWeb.isTopVowel(context_2)) if (TibetanMachineWeb.isTopVowel(context_2))
throw new IllegalArgumentException("dropping vowels is bad1"); throw new IllegalArgumentException("dropping vowels is bad1:" + vowel);
if (!context_added[0]) { if (!context_added[0]) {
context_added[0] = true; context_added[0] = true;
if (context_1 != null) if (context_1 != null)
@ -686,7 +686,7 @@ public class TibTextUtils implements THDLWylieConstants {
if (!TibetanMachineWeb.isTopVowel(context_2)) { if (!TibetanMachineWeb.isTopVowel(context_2)) {
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(reverse_i_VOWEL); DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(reverse_i_VOWEL);
l.add(dc_v[TibetanMachineWeb.TMW]); l.add(dc_v[TibetanMachineWeb.TMW]);
} else throw new IllegalArgumentException("dropping vowels is bad2"); } else throw new IllegalArgumentException("dropping vowels is bad2:" + vowel);
return; return;
} }
@ -716,7 +716,7 @@ public class TibTextUtils implements THDLWylieConstants {
if (null != dc_v) if (null != dc_v)
l.add(dc_v); l.add(dc_v);
else throw new IllegalArgumentException("dropping vowels is bad3"); else throw new IllegalArgumentException("dropping vowels is bad3:" + vowel);
return; return;
} }
@ -740,7 +740,7 @@ public class TibTextUtils implements THDLWylieConstants {
if (null != dc_v) if (null != dc_v)
l.add(dc_v); l.add(dc_v);
else throw new IllegalArgumentException("dropping vowels is bad4"); else throw new IllegalArgumentException("dropping vowels is bad4:" + vowel);
return; return;
} }
@ -763,7 +763,7 @@ public class TibTextUtils implements THDLWylieConstants {
if (null != dc_v) if (null != dc_v)
l.add(dc_v); l.add(dc_v);
else throw new IllegalArgumentException("dropping vowels is bad5"); else throw new IllegalArgumentException("dropping vowels is bad5:" + vowel);
return; return;
} }
@ -792,7 +792,7 @@ public class TibTextUtils implements THDLWylieConstants {
if (null != dc_v) if (null != dc_v)
l.add(dc_v); l.add(dc_v);
else throw new IllegalArgumentException("dropping vowels is bad6"); else throw new IllegalArgumentException("dropping vowels is bad6:" + vowel);
return; return;
} }
@ -815,7 +815,7 @@ public class TibTextUtils implements THDLWylieConstants {
if (null != dc_v) if (null != dc_v)
l.add(dc_v); l.add(dc_v);
else throw new IllegalArgumentException("dropping vowels is bad7"); else throw new IllegalArgumentException("dropping vowels is bad7:" + vowel);
return; return;
} }
@ -838,7 +838,7 @@ public class TibTextUtils implements THDLWylieConstants {
if (null != dc_v && !TibetanMachineWeb.isTopVowel(context_2)) if (null != dc_v && !TibetanMachineWeb.isTopVowel(context_2))
l.add(dc_v); l.add(dc_v);
else throw new IllegalArgumentException("dropping vowels is bad8"); else throw new IllegalArgumentException("dropping vowels is bad8:" + vowel);
return; return;
} }
@ -868,7 +868,7 @@ public class TibTextUtils implements THDLWylieConstants {
if (null != dc_v_sub && null != dc_v_sup) { if (null != dc_v_sub && null != dc_v_sup) {
l.add(dc_v_sub); l.add(dc_v_sub);
l.add(dc_v_sup); l.add(dc_v_sup);
} else throw new IllegalArgumentException("dropping vowels is bad9"); } else throw new IllegalArgumentException("dropping vowels is bad9:" + vowel);
return; return;
} }
@ -894,7 +894,7 @@ public class TibTextUtils implements THDLWylieConstants {
if (null != dc_v_sub && null != dc_v_sup) { if (null != dc_v_sub && null != dc_v_sup) {
l.add(dc_v_sub); l.add(dc_v_sub);
l.add(dc_v_sup); l.add(dc_v_sup);
} else throw new IllegalArgumentException("dropping vowels is bad10"); } else throw new IllegalArgumentException("dropping vowels is bad10:" + vowel);
return; return;
} }

View file

@ -160,8 +160,9 @@ public final class EWTSTraits implements TTraits {
public TTshegBarScanner scanner() { return EWTSTshegBarScanner.instance(); } public TTshegBarScanner scanner() { return EWTSTshegBarScanner.instance(); }
public void getDuffForWowel(ArrayList duff, DuffCode preceding, String wowel) { public void getDuffForWowel(ArrayList duff, DuffCode preceding, String wowel)
throws IllegalArgumentException
{
// TODO(DLC)[EWTS->Tibetan]: I have no confidence in this! test, test, test. // TODO(DLC)[EWTS->Tibetan]: I have no confidence in this! test, test, test.
// TODO(DLC)[EWTS->Tibetan]: ko+o doesn't work. kai+-i doesn't work. // TODO(DLC)[EWTS->Tibetan]: ko+o doesn't work. kai+-i doesn't work.

View file

@ -393,7 +393,7 @@ public class PackageTest extends TestCase {
} }
if (l.getACIPError(acip, false) != null) if (l.getACIPError(acip, false) != null)
System.out.println("ACIPError: " + l.getACIPError(acip, false)); System.out.println("ACIPError: " + l.getACIPError(acip, false));
if (!l.recoverACIP().equals(acip) if (!l.recoverTranslit().equals(acip)
&& (acip.indexOf("A+") < 1) // which becomes +, e.g. {NA+YA} && (acip.indexOf("A+") < 1) // which becomes +, e.g. {NA+YA}
&& (acip.indexOf('0') < 0) && (acip.indexOf('0') < 0)
&& (acip.indexOf('1') < 0) && (acip.indexOf('1') < 0)
@ -408,7 +408,7 @@ public class PackageTest extends TestCase {
&& pairListToUse == 1 && pairListToUse == 1
&& (acip.indexOf('\'') < 0)) { && (acip.indexOf('\'') < 0)) {
System.out.println("acip=" + acip System.out.println("acip=" + acip
+ "; recovery is " + l.recoverACIP()); + "; recovery is " + l.recoverTranslit());
assertTrue(false); assertTrue(false);
} }
if (pairListToUse >= 2) { if (pairListToUse >= 2) {
@ -450,7 +450,7 @@ public class PackageTest extends TestCase {
/** Tests {@link ACIPTraits#breakTshegBarIntoChunks(String, /** Tests {@link ACIPTraits#breakTshegBarIntoChunks(String,
* boolean)}, {@link TPairList#getACIPError(String, boolean)}, * boolean)}, {@link TPairList#getACIPError(String, boolean)},
* and {@link TPairList#recoverACIP()}. */ * and {@link TPairList#recoverTranslit()}. */
public void testBreakACIPIntoChunks() { public void testBreakACIPIntoChunks() {
tstHelper("GASN"); // ambiguous with regard to prefix rules tstHelper("GASN"); // ambiguous with regard to prefix rules
tstHelper("BARMA"); // ambiguous with regard to prefix rules tstHelper("BARMA"); // ambiguous with regard to prefix rules

View file

@ -128,10 +128,11 @@ class TPairList {
return b.toString(); return b.toString();
} }
/** Returns the ACIP corresponding to this TPairList. It will /** Returns the transliteration corresponding to this TPairList.
* be as ambiguous as the input. It may have more disambiguators * It will be as ambiguous as the input. It may have more
* than the original, such as in the case of the ACIP {1234}. */ * disambiguators than the original, such as in the case of the
String recoverACIP() { * ACIP {1234}. */
String recoverTranslit() {
StringBuffer original = new StringBuffer(); StringBuffer original = new StringBuffer();
int sz = size(); int sz = size();
for (int i = 0; i < sz; i++) { for (int i = 0; i < sz; i++) {
@ -174,7 +175,7 @@ class TPairList {
: ""), : ""),
traits); traits);
String translit String translit
= (null != originalACIP) ? originalACIP : recoverACIP(); = (null != originalACIP) ? originalACIP : recoverTranslit();
boolean mustBeEntirelyNumeric = get(0).isNumeric(); boolean mustBeEntirelyNumeric = get(0).isNumeric();
for (int i = 0; i < sz; i++) { for (int i = 0; i < sz; i++) {
TPair p = get(i); TPair p = get(i);
@ -759,7 +760,7 @@ class TPairList {
? 137 ? 137
: 511, : 511,
shortMessages, shortMessages,
recoverACIP(), recoverTranslit(),
traits)); traits));
return; return;
} }
@ -769,9 +770,23 @@ class TPairList {
|| lastPair.equals(traits.disambiguator())) { || lastPair.equals(traits.disambiguator())) {
duffsAndErrors.add(TibetanMachineWeb.getGlyph(hashKey)); duffsAndErrors.add(TibetanMachineWeb.getGlyph(hashKey));
} else { } else {
traits.getDuffForWowel(duffsAndErrors, try {
TibetanMachineWeb.getGlyph(hashKey), traits.getDuffForWowel(duffsAndErrors,
lastPair.getRight()); TibetanMachineWeb.getGlyph(hashKey),
lastPair.getRight());
} catch (IllegalArgumentException e) {
// TODO(dchandler): Error 137 isn't the perfect
// message. Try EWTS [RAM], e.g. to see why. It acts
// like we're trying to find a single glyph for (R
// . A+M) in that case.
duffsAndErrors.add(ErrorsAndWarnings.getMessage(noCorrespondingTMWGlyphIsError
? 137
: 511,
shortMessages,
recoverTranslit(),
traits));
return;
}
} }
if (previousSize == duffsAndErrors.size()) if (previousSize == duffsAndErrors.size())
throw new Error("TPairList with no duffs? " + toString() + " has hash key " + hashKey + " and previous size is " + previousSize); // FIXME: change to assertion. throw new Error("TPairList with no duffs? " + toString() + " has hash key " + hashKey + " and previous size is " + previousSize); // FIXME: change to assertion.

View file

@ -333,7 +333,7 @@ class TParseTree {
} }
} }
String translit = (null != originalACIP) ? originalACIP : recoverACIP(); String translit = (null != originalACIP) ? originalACIP : recoverTranslit();
TStackListList up = getUniqueParse(false); TStackListList up = getUniqueParse(false);
if (null == up || up.size() != 1) { if (null == up || up.size() != 1) {
boolean isLastStack[] = new boolean[1]; boolean isLastStack[] = new boolean[1];
@ -508,7 +508,7 @@ n+t+s
&& !pl.get(1).endsStack() && !pl.get(1).endsStack()
&& pl.get(2).endsStack() && pl.get(2).endsStack()
&& null != left && null != right) { && null != left && null != right) {
// TODO(DLC)[EWTS->Tibetan]: This is ACIP-specific. // TODO(DLC)[EWTS->Tibetan]: This function is ACIP-specific.
if (("D".equals(left) && "G".equals(middle) && "R".equals(right)) if (("D".equals(left) && "G".equals(middle) && "R".equals(right))
|| ("D".equals(left) && "G".equals(middle) && "Y".equals(right))) { || ("D".equals(left) && "G".equals(middle) && "Y".equals(right))) {
if (pl.size() == 3) { if (pl.size() == 3) {
@ -556,13 +556,14 @@ n+t+s
return null; return null;
} }
/** Returns something akin to the ACIP input (okay, maybe 1-2-3-4 /** Returns something akin to the transliteration that was input
* instead of 1234, and maybe AUTPA instead of AUT-PA) * (okay, maybe 1-2-3-4 instead of 1234, and maybe AUTPA instead
* corresponding to this parse tree. */ * of AUT-PA [ACIP examples]) corresponding to this parse
public String recoverACIP() { // TODO(DLC)[EWTS->Tibetan]: acip-specific * tree. */
public String recoverTranslit() {
ParseIterator pi = getParseIterator(); ParseIterator pi = getParseIterator();
if (pi.hasNext()) { if (pi.hasNext()) {
return pi.next().recoverACIP(); return pi.next().recoverTranslit();
} }
return null; return null;
} }

View file

@ -70,9 +70,10 @@ class TStackList {
/** Returns true if and only if this list is empty. */ /** Returns true if and only if this list is empty. */
public boolean isEmpty() { return al.isEmpty(); } public boolean isEmpty() { return al.isEmpty(); }
/** Returns the ACIP input (okay, maybe 1-2-3-4 instead of 1234) /** Returns something akin to the transliteration that was input
* corresponding to this stack list. */ * (okay, maybe 1-2-3-4 instead of 1234, and maybe AUTPA instead
public String recoverACIP() { * of AUT-PA [ACIP examples]) corresponding to this stack list. */
public String recoverTranslit() {
return toStringHelper(false); return toStringHelper(false);
} }
@ -87,7 +88,7 @@ class TStackList {
StringBuffer b = new StringBuffer(); StringBuffer b = new StringBuffer();
for (int i = 0; i < sz; i++) { for (int i = 0; i < sz; i++) {
if (brackets) b.append('{'); if (brackets) b.append('{');
b.append(get(i).recoverACIP()); b.append(get(i).recoverTranslit());
if (brackets) b.append('}'); if (brackets) b.append('}');
} }
return b.toString(); return b.toString();

View file

@ -84,10 +84,12 @@ class TStackListList {
* happen. */ * happen. */
public ListIterator listIterator() { return al.listIterator(); } public ListIterator listIterator() { return al.listIterator(); }
/** Returns the ACIP input (okay, maybe 1-2-3-4 instead of 1234) /** Returns something akin to the transliteration that was input
* corresponding to this stack list list. */ * (okay, maybe 1-2-3-4 instead of 1234, and maybe AUTPA instead
public String recoverACIP() { * of AUT-PA [ACIP examples]) corresponding to this stack list
* list. */
public String recoverTranslit() {
if (isEmpty()) return null; if (isEmpty()) return null;
return get(0).recoverACIP(); return get(0).recoverTranslit();
} }
} }