Bulletproofed EWTS->Tibetan against nasty pseudo-EWTS like [RAM].
Renamed recoverACIP methods.
This commit is contained in:
parent
982350371d
commit
cddbbae9a1
7 changed files with 59 additions and 39 deletions
|
@ -633,7 +633,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
if (vowel.equals(WYLIE_aVOWEL)
|
||||
|| TibetanMachineWeb.isTopVowel(context_2)) {
|
||||
if (TibetanMachineWeb.isTopVowel(context_2))
|
||||
throw new IllegalArgumentException("dropping vowels is bad1");
|
||||
throw new IllegalArgumentException("dropping vowels is bad1:" + vowel);
|
||||
if (!context_added[0]) {
|
||||
context_added[0] = true;
|
||||
if (context_1 != null)
|
||||
|
@ -686,7 +686,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
if (!TibetanMachineWeb.isTopVowel(context_2)) {
|
||||
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(reverse_i_VOWEL);
|
||||
l.add(dc_v[TibetanMachineWeb.TMW]);
|
||||
} else throw new IllegalArgumentException("dropping vowels is bad2");
|
||||
} else throw new IllegalArgumentException("dropping vowels is bad2:" + vowel);
|
||||
|
||||
return;
|
||||
}
|
||||
|
@ -716,7 +716,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
|
||||
if (null != dc_v)
|
||||
l.add(dc_v);
|
||||
else throw new IllegalArgumentException("dropping vowels is bad3");
|
||||
else throw new IllegalArgumentException("dropping vowels is bad3:" + vowel);
|
||||
|
||||
return;
|
||||
}
|
||||
|
@ -740,7 +740,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
|
||||
if (null != dc_v)
|
||||
l.add(dc_v);
|
||||
else throw new IllegalArgumentException("dropping vowels is bad4");
|
||||
else throw new IllegalArgumentException("dropping vowels is bad4:" + vowel);
|
||||
|
||||
return;
|
||||
}
|
||||
|
@ -763,7 +763,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
|
||||
if (null != dc_v)
|
||||
l.add(dc_v);
|
||||
else throw new IllegalArgumentException("dropping vowels is bad5");
|
||||
else throw new IllegalArgumentException("dropping vowels is bad5:" + vowel);
|
||||
|
||||
return;
|
||||
}
|
||||
|
@ -792,7 +792,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
|
||||
if (null != dc_v)
|
||||
l.add(dc_v);
|
||||
else throw new IllegalArgumentException("dropping vowels is bad6");
|
||||
else throw new IllegalArgumentException("dropping vowels is bad6:" + vowel);
|
||||
|
||||
return;
|
||||
}
|
||||
|
@ -815,7 +815,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
|
||||
if (null != dc_v)
|
||||
l.add(dc_v);
|
||||
else throw new IllegalArgumentException("dropping vowels is bad7");
|
||||
else throw new IllegalArgumentException("dropping vowels is bad7:" + vowel);
|
||||
|
||||
return;
|
||||
}
|
||||
|
@ -838,7 +838,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
|
||||
if (null != dc_v && !TibetanMachineWeb.isTopVowel(context_2))
|
||||
l.add(dc_v);
|
||||
else throw new IllegalArgumentException("dropping vowels is bad8");
|
||||
else throw new IllegalArgumentException("dropping vowels is bad8:" + vowel);
|
||||
|
||||
return;
|
||||
}
|
||||
|
@ -868,7 +868,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
if (null != dc_v_sub && null != dc_v_sup) {
|
||||
l.add(dc_v_sub);
|
||||
l.add(dc_v_sup);
|
||||
} else throw new IllegalArgumentException("dropping vowels is bad9");
|
||||
} else throw new IllegalArgumentException("dropping vowels is bad9:" + vowel);
|
||||
|
||||
return;
|
||||
}
|
||||
|
@ -894,7 +894,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
if (null != dc_v_sub && null != dc_v_sup) {
|
||||
l.add(dc_v_sub);
|
||||
l.add(dc_v_sup);
|
||||
} else throw new IllegalArgumentException("dropping vowels is bad10");
|
||||
} else throw new IllegalArgumentException("dropping vowels is bad10:" + vowel);
|
||||
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -160,8 +160,9 @@ public final class EWTSTraits implements TTraits {
|
|||
|
||||
public TTshegBarScanner scanner() { return EWTSTshegBarScanner.instance(); }
|
||||
|
||||
public void getDuffForWowel(ArrayList duff, DuffCode preceding, String wowel) {
|
||||
|
||||
public void getDuffForWowel(ArrayList duff, DuffCode preceding, String wowel)
|
||||
throws IllegalArgumentException
|
||||
{
|
||||
// TODO(DLC)[EWTS->Tibetan]: I have no confidence in this! test, test, test.
|
||||
|
||||
// TODO(DLC)[EWTS->Tibetan]: ko+o doesn't work. kai+-i doesn't work.
|
||||
|
|
|
@ -393,7 +393,7 @@ public class PackageTest extends TestCase {
|
|||
}
|
||||
if (l.getACIPError(acip, false) != null)
|
||||
System.out.println("ACIPError: " + l.getACIPError(acip, false));
|
||||
if (!l.recoverACIP().equals(acip)
|
||||
if (!l.recoverTranslit().equals(acip)
|
||||
&& (acip.indexOf("A+") < 1) // which becomes +, e.g. {NA+YA}
|
||||
&& (acip.indexOf('0') < 0)
|
||||
&& (acip.indexOf('1') < 0)
|
||||
|
@ -408,7 +408,7 @@ public class PackageTest extends TestCase {
|
|||
&& pairListToUse == 1
|
||||
&& (acip.indexOf('\'') < 0)) {
|
||||
System.out.println("acip=" + acip
|
||||
+ "; recovery is " + l.recoverACIP());
|
||||
+ "; recovery is " + l.recoverTranslit());
|
||||
assertTrue(false);
|
||||
}
|
||||
if (pairListToUse >= 2) {
|
||||
|
@ -450,7 +450,7 @@ public class PackageTest extends TestCase {
|
|||
|
||||
/** Tests {@link ACIPTraits#breakTshegBarIntoChunks(String,
|
||||
* boolean)}, {@link TPairList#getACIPError(String, boolean)},
|
||||
* and {@link TPairList#recoverACIP()}. */
|
||||
* and {@link TPairList#recoverTranslit()}. */
|
||||
public void testBreakACIPIntoChunks() {
|
||||
tstHelper("GASN"); // ambiguous with regard to prefix rules
|
||||
tstHelper("BARMA"); // ambiguous with regard to prefix rules
|
||||
|
|
|
@ -128,10 +128,11 @@ class TPairList {
|
|||
return b.toString();
|
||||
}
|
||||
|
||||
/** Returns the ACIP corresponding to this TPairList. It will
|
||||
* be as ambiguous as the input. It may have more disambiguators
|
||||
* than the original, such as in the case of the ACIP {1234}. */
|
||||
String recoverACIP() {
|
||||
/** Returns the transliteration corresponding to this TPairList.
|
||||
* It will be as ambiguous as the input. It may have more
|
||||
* disambiguators than the original, such as in the case of the
|
||||
* ACIP {1234}. */
|
||||
String recoverTranslit() {
|
||||
StringBuffer original = new StringBuffer();
|
||||
int sz = size();
|
||||
for (int i = 0; i < sz; i++) {
|
||||
|
@ -174,7 +175,7 @@ class TPairList {
|
|||
: ""),
|
||||
traits);
|
||||
String translit
|
||||
= (null != originalACIP) ? originalACIP : recoverACIP();
|
||||
= (null != originalACIP) ? originalACIP : recoverTranslit();
|
||||
boolean mustBeEntirelyNumeric = get(0).isNumeric();
|
||||
for (int i = 0; i < sz; i++) {
|
||||
TPair p = get(i);
|
||||
|
@ -759,7 +760,7 @@ class TPairList {
|
|||
? 137
|
||||
: 511,
|
||||
shortMessages,
|
||||
recoverACIP(),
|
||||
recoverTranslit(),
|
||||
traits));
|
||||
return;
|
||||
}
|
||||
|
@ -769,9 +770,23 @@ class TPairList {
|
|||
|| lastPair.equals(traits.disambiguator())) {
|
||||
duffsAndErrors.add(TibetanMachineWeb.getGlyph(hashKey));
|
||||
} else {
|
||||
try {
|
||||
traits.getDuffForWowel(duffsAndErrors,
|
||||
TibetanMachineWeb.getGlyph(hashKey),
|
||||
lastPair.getRight());
|
||||
} catch (IllegalArgumentException e) {
|
||||
// TODO(dchandler): Error 137 isn't the perfect
|
||||
// message. Try EWTS [RAM], e.g. to see why. It acts
|
||||
// like we're trying to find a single glyph for (R
|
||||
// . A+M) in that case.
|
||||
duffsAndErrors.add(ErrorsAndWarnings.getMessage(noCorrespondingTMWGlyphIsError
|
||||
? 137
|
||||
: 511,
|
||||
shortMessages,
|
||||
recoverTranslit(),
|
||||
traits));
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (previousSize == duffsAndErrors.size())
|
||||
throw new Error("TPairList with no duffs? " + toString() + " has hash key " + hashKey + " and previous size is " + previousSize); // FIXME: change to assertion.
|
||||
|
|
|
@ -333,7 +333,7 @@ class TParseTree {
|
|||
}
|
||||
}
|
||||
|
||||
String translit = (null != originalACIP) ? originalACIP : recoverACIP();
|
||||
String translit = (null != originalACIP) ? originalACIP : recoverTranslit();
|
||||
TStackListList up = getUniqueParse(false);
|
||||
if (null == up || up.size() != 1) {
|
||||
boolean isLastStack[] = new boolean[1];
|
||||
|
@ -508,7 +508,7 @@ n+t+s
|
|||
&& !pl.get(1).endsStack()
|
||||
&& pl.get(2).endsStack()
|
||||
&& null != left && null != right) {
|
||||
// TODO(DLC)[EWTS->Tibetan]: This is ACIP-specific.
|
||||
// TODO(DLC)[EWTS->Tibetan]: This function is ACIP-specific.
|
||||
if (("D".equals(left) && "G".equals(middle) && "R".equals(right))
|
||||
|| ("D".equals(left) && "G".equals(middle) && "Y".equals(right))) {
|
||||
if (pl.size() == 3) {
|
||||
|
@ -556,13 +556,14 @@ n+t+s
|
|||
return null;
|
||||
}
|
||||
|
||||
/** Returns something akin to the ACIP input (okay, maybe 1-2-3-4
|
||||
* instead of 1234, and maybe AUTPA instead of AUT-PA)
|
||||
* corresponding to this parse tree. */
|
||||
public String recoverACIP() { // TODO(DLC)[EWTS->Tibetan]: acip-specific
|
||||
/** Returns something akin to the transliteration that was input
|
||||
* (okay, maybe 1-2-3-4 instead of 1234, and maybe AUTPA instead
|
||||
* of AUT-PA [ACIP examples]) corresponding to this parse
|
||||
* tree. */
|
||||
public String recoverTranslit() {
|
||||
ParseIterator pi = getParseIterator();
|
||||
if (pi.hasNext()) {
|
||||
return pi.next().recoverACIP();
|
||||
return pi.next().recoverTranslit();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -70,9 +70,10 @@ class TStackList {
|
|||
/** Returns true if and only if this list is empty. */
|
||||
public boolean isEmpty() { return al.isEmpty(); }
|
||||
|
||||
/** Returns the ACIP input (okay, maybe 1-2-3-4 instead of 1234)
|
||||
* corresponding to this stack list. */
|
||||
public String recoverACIP() {
|
||||
/** Returns something akin to the transliteration that was input
|
||||
* (okay, maybe 1-2-3-4 instead of 1234, and maybe AUTPA instead
|
||||
* of AUT-PA [ACIP examples]) corresponding to this stack list. */
|
||||
public String recoverTranslit() {
|
||||
return toStringHelper(false);
|
||||
}
|
||||
|
||||
|
@ -87,7 +88,7 @@ class TStackList {
|
|||
StringBuffer b = new StringBuffer();
|
||||
for (int i = 0; i < sz; i++) {
|
||||
if (brackets) b.append('{');
|
||||
b.append(get(i).recoverACIP());
|
||||
b.append(get(i).recoverTranslit());
|
||||
if (brackets) b.append('}');
|
||||
}
|
||||
return b.toString();
|
||||
|
|
|
@ -84,10 +84,12 @@ class TStackListList {
|
|||
* happen. */
|
||||
public ListIterator listIterator() { return al.listIterator(); }
|
||||
|
||||
/** Returns the ACIP input (okay, maybe 1-2-3-4 instead of 1234)
|
||||
* corresponding to this stack list list. */
|
||||
public String recoverACIP() {
|
||||
/** Returns something akin to the transliteration that was input
|
||||
* (okay, maybe 1-2-3-4 instead of 1234, and maybe AUTPA instead
|
||||
* of AUT-PA [ACIP examples]) corresponding to this stack list
|
||||
* list. */
|
||||
public String recoverTranslit() {
|
||||
if (isEmpty()) return null;
|
||||
return get(0).recoverACIP();
|
||||
return get(0).recoverTranslit();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue