Bulletproofed EWTS->Tibetan against nasty pseudo-EWTS like [RAM].
Renamed recoverACIP methods.
This commit is contained in:
parent
982350371d
commit
cddbbae9a1
7 changed files with 59 additions and 39 deletions
|
@ -633,7 +633,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
if (vowel.equals(WYLIE_aVOWEL)
|
if (vowel.equals(WYLIE_aVOWEL)
|
||||||
|| TibetanMachineWeb.isTopVowel(context_2)) {
|
|| TibetanMachineWeb.isTopVowel(context_2)) {
|
||||||
if (TibetanMachineWeb.isTopVowel(context_2))
|
if (TibetanMachineWeb.isTopVowel(context_2))
|
||||||
throw new IllegalArgumentException("dropping vowels is bad1");
|
throw new IllegalArgumentException("dropping vowels is bad1:" + vowel);
|
||||||
if (!context_added[0]) {
|
if (!context_added[0]) {
|
||||||
context_added[0] = true;
|
context_added[0] = true;
|
||||||
if (context_1 != null)
|
if (context_1 != null)
|
||||||
|
@ -686,7 +686,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
if (!TibetanMachineWeb.isTopVowel(context_2)) {
|
if (!TibetanMachineWeb.isTopVowel(context_2)) {
|
||||||
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(reverse_i_VOWEL);
|
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(reverse_i_VOWEL);
|
||||||
l.add(dc_v[TibetanMachineWeb.TMW]);
|
l.add(dc_v[TibetanMachineWeb.TMW]);
|
||||||
} else throw new IllegalArgumentException("dropping vowels is bad2");
|
} else throw new IllegalArgumentException("dropping vowels is bad2:" + vowel);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -716,7 +716,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
|
|
||||||
if (null != dc_v)
|
if (null != dc_v)
|
||||||
l.add(dc_v);
|
l.add(dc_v);
|
||||||
else throw new IllegalArgumentException("dropping vowels is bad3");
|
else throw new IllegalArgumentException("dropping vowels is bad3:" + vowel);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -740,7 +740,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
|
|
||||||
if (null != dc_v)
|
if (null != dc_v)
|
||||||
l.add(dc_v);
|
l.add(dc_v);
|
||||||
else throw new IllegalArgumentException("dropping vowels is bad4");
|
else throw new IllegalArgumentException("dropping vowels is bad4:" + vowel);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -763,7 +763,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
|
|
||||||
if (null != dc_v)
|
if (null != dc_v)
|
||||||
l.add(dc_v);
|
l.add(dc_v);
|
||||||
else throw new IllegalArgumentException("dropping vowels is bad5");
|
else throw new IllegalArgumentException("dropping vowels is bad5:" + vowel);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -792,7 +792,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
|
|
||||||
if (null != dc_v)
|
if (null != dc_v)
|
||||||
l.add(dc_v);
|
l.add(dc_v);
|
||||||
else throw new IllegalArgumentException("dropping vowels is bad6");
|
else throw new IllegalArgumentException("dropping vowels is bad6:" + vowel);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -815,7 +815,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
|
|
||||||
if (null != dc_v)
|
if (null != dc_v)
|
||||||
l.add(dc_v);
|
l.add(dc_v);
|
||||||
else throw new IllegalArgumentException("dropping vowels is bad7");
|
else throw new IllegalArgumentException("dropping vowels is bad7:" + vowel);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -838,7 +838,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
|
|
||||||
if (null != dc_v && !TibetanMachineWeb.isTopVowel(context_2))
|
if (null != dc_v && !TibetanMachineWeb.isTopVowel(context_2))
|
||||||
l.add(dc_v);
|
l.add(dc_v);
|
||||||
else throw new IllegalArgumentException("dropping vowels is bad8");
|
else throw new IllegalArgumentException("dropping vowels is bad8:" + vowel);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -868,7 +868,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
if (null != dc_v_sub && null != dc_v_sup) {
|
if (null != dc_v_sub && null != dc_v_sup) {
|
||||||
l.add(dc_v_sub);
|
l.add(dc_v_sub);
|
||||||
l.add(dc_v_sup);
|
l.add(dc_v_sup);
|
||||||
} else throw new IllegalArgumentException("dropping vowels is bad9");
|
} else throw new IllegalArgumentException("dropping vowels is bad9:" + vowel);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -894,7 +894,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
if (null != dc_v_sub && null != dc_v_sup) {
|
if (null != dc_v_sub && null != dc_v_sup) {
|
||||||
l.add(dc_v_sub);
|
l.add(dc_v_sub);
|
||||||
l.add(dc_v_sup);
|
l.add(dc_v_sup);
|
||||||
} else throw new IllegalArgumentException("dropping vowels is bad10");
|
} else throw new IllegalArgumentException("dropping vowels is bad10:" + vowel);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -160,8 +160,9 @@ public final class EWTSTraits implements TTraits {
|
||||||
|
|
||||||
public TTshegBarScanner scanner() { return EWTSTshegBarScanner.instance(); }
|
public TTshegBarScanner scanner() { return EWTSTshegBarScanner.instance(); }
|
||||||
|
|
||||||
public void getDuffForWowel(ArrayList duff, DuffCode preceding, String wowel) {
|
public void getDuffForWowel(ArrayList duff, DuffCode preceding, String wowel)
|
||||||
|
throws IllegalArgumentException
|
||||||
|
{
|
||||||
// TODO(DLC)[EWTS->Tibetan]: I have no confidence in this! test, test, test.
|
// TODO(DLC)[EWTS->Tibetan]: I have no confidence in this! test, test, test.
|
||||||
|
|
||||||
// TODO(DLC)[EWTS->Tibetan]: ko+o doesn't work. kai+-i doesn't work.
|
// TODO(DLC)[EWTS->Tibetan]: ko+o doesn't work. kai+-i doesn't work.
|
||||||
|
|
|
@ -393,7 +393,7 @@ public class PackageTest extends TestCase {
|
||||||
}
|
}
|
||||||
if (l.getACIPError(acip, false) != null)
|
if (l.getACIPError(acip, false) != null)
|
||||||
System.out.println("ACIPError: " + l.getACIPError(acip, false));
|
System.out.println("ACIPError: " + l.getACIPError(acip, false));
|
||||||
if (!l.recoverACIP().equals(acip)
|
if (!l.recoverTranslit().equals(acip)
|
||||||
&& (acip.indexOf("A+") < 1) // which becomes +, e.g. {NA+YA}
|
&& (acip.indexOf("A+") < 1) // which becomes +, e.g. {NA+YA}
|
||||||
&& (acip.indexOf('0') < 0)
|
&& (acip.indexOf('0') < 0)
|
||||||
&& (acip.indexOf('1') < 0)
|
&& (acip.indexOf('1') < 0)
|
||||||
|
@ -408,7 +408,7 @@ public class PackageTest extends TestCase {
|
||||||
&& pairListToUse == 1
|
&& pairListToUse == 1
|
||||||
&& (acip.indexOf('\'') < 0)) {
|
&& (acip.indexOf('\'') < 0)) {
|
||||||
System.out.println("acip=" + acip
|
System.out.println("acip=" + acip
|
||||||
+ "; recovery is " + l.recoverACIP());
|
+ "; recovery is " + l.recoverTranslit());
|
||||||
assertTrue(false);
|
assertTrue(false);
|
||||||
}
|
}
|
||||||
if (pairListToUse >= 2) {
|
if (pairListToUse >= 2) {
|
||||||
|
@ -450,7 +450,7 @@ public class PackageTest extends TestCase {
|
||||||
|
|
||||||
/** Tests {@link ACIPTraits#breakTshegBarIntoChunks(String,
|
/** Tests {@link ACIPTraits#breakTshegBarIntoChunks(String,
|
||||||
* boolean)}, {@link TPairList#getACIPError(String, boolean)},
|
* boolean)}, {@link TPairList#getACIPError(String, boolean)},
|
||||||
* and {@link TPairList#recoverACIP()}. */
|
* and {@link TPairList#recoverTranslit()}. */
|
||||||
public void testBreakACIPIntoChunks() {
|
public void testBreakACIPIntoChunks() {
|
||||||
tstHelper("GASN"); // ambiguous with regard to prefix rules
|
tstHelper("GASN"); // ambiguous with regard to prefix rules
|
||||||
tstHelper("BARMA"); // ambiguous with regard to prefix rules
|
tstHelper("BARMA"); // ambiguous with regard to prefix rules
|
||||||
|
|
|
@ -128,10 +128,11 @@ class TPairList {
|
||||||
return b.toString();
|
return b.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns the ACIP corresponding to this TPairList. It will
|
/** Returns the transliteration corresponding to this TPairList.
|
||||||
* be as ambiguous as the input. It may have more disambiguators
|
* It will be as ambiguous as the input. It may have more
|
||||||
* than the original, such as in the case of the ACIP {1234}. */
|
* disambiguators than the original, such as in the case of the
|
||||||
String recoverACIP() {
|
* ACIP {1234}. */
|
||||||
|
String recoverTranslit() {
|
||||||
StringBuffer original = new StringBuffer();
|
StringBuffer original = new StringBuffer();
|
||||||
int sz = size();
|
int sz = size();
|
||||||
for (int i = 0; i < sz; i++) {
|
for (int i = 0; i < sz; i++) {
|
||||||
|
@ -174,7 +175,7 @@ class TPairList {
|
||||||
: ""),
|
: ""),
|
||||||
traits);
|
traits);
|
||||||
String translit
|
String translit
|
||||||
= (null != originalACIP) ? originalACIP : recoverACIP();
|
= (null != originalACIP) ? originalACIP : recoverTranslit();
|
||||||
boolean mustBeEntirelyNumeric = get(0).isNumeric();
|
boolean mustBeEntirelyNumeric = get(0).isNumeric();
|
||||||
for (int i = 0; i < sz; i++) {
|
for (int i = 0; i < sz; i++) {
|
||||||
TPair p = get(i);
|
TPair p = get(i);
|
||||||
|
@ -759,7 +760,7 @@ class TPairList {
|
||||||
? 137
|
? 137
|
||||||
: 511,
|
: 511,
|
||||||
shortMessages,
|
shortMessages,
|
||||||
recoverACIP(),
|
recoverTranslit(),
|
||||||
traits));
|
traits));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -769,9 +770,23 @@ class TPairList {
|
||||||
|| lastPair.equals(traits.disambiguator())) {
|
|| lastPair.equals(traits.disambiguator())) {
|
||||||
duffsAndErrors.add(TibetanMachineWeb.getGlyph(hashKey));
|
duffsAndErrors.add(TibetanMachineWeb.getGlyph(hashKey));
|
||||||
} else {
|
} else {
|
||||||
|
try {
|
||||||
traits.getDuffForWowel(duffsAndErrors,
|
traits.getDuffForWowel(duffsAndErrors,
|
||||||
TibetanMachineWeb.getGlyph(hashKey),
|
TibetanMachineWeb.getGlyph(hashKey),
|
||||||
lastPair.getRight());
|
lastPair.getRight());
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
|
// TODO(dchandler): Error 137 isn't the perfect
|
||||||
|
// message. Try EWTS [RAM], e.g. to see why. It acts
|
||||||
|
// like we're trying to find a single glyph for (R
|
||||||
|
// . A+M) in that case.
|
||||||
|
duffsAndErrors.add(ErrorsAndWarnings.getMessage(noCorrespondingTMWGlyphIsError
|
||||||
|
? 137
|
||||||
|
: 511,
|
||||||
|
shortMessages,
|
||||||
|
recoverTranslit(),
|
||||||
|
traits));
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (previousSize == duffsAndErrors.size())
|
if (previousSize == duffsAndErrors.size())
|
||||||
throw new Error("TPairList with no duffs? " + toString() + " has hash key " + hashKey + " and previous size is " + previousSize); // FIXME: change to assertion.
|
throw new Error("TPairList with no duffs? " + toString() + " has hash key " + hashKey + " and previous size is " + previousSize); // FIXME: change to assertion.
|
||||||
|
|
|
@ -333,7 +333,7 @@ class TParseTree {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
String translit = (null != originalACIP) ? originalACIP : recoverACIP();
|
String translit = (null != originalACIP) ? originalACIP : recoverTranslit();
|
||||||
TStackListList up = getUniqueParse(false);
|
TStackListList up = getUniqueParse(false);
|
||||||
if (null == up || up.size() != 1) {
|
if (null == up || up.size() != 1) {
|
||||||
boolean isLastStack[] = new boolean[1];
|
boolean isLastStack[] = new boolean[1];
|
||||||
|
@ -508,7 +508,7 @@ n+t+s
|
||||||
&& !pl.get(1).endsStack()
|
&& !pl.get(1).endsStack()
|
||||||
&& pl.get(2).endsStack()
|
&& pl.get(2).endsStack()
|
||||||
&& null != left && null != right) {
|
&& null != left && null != right) {
|
||||||
// TODO(DLC)[EWTS->Tibetan]: This is ACIP-specific.
|
// TODO(DLC)[EWTS->Tibetan]: This function is ACIP-specific.
|
||||||
if (("D".equals(left) && "G".equals(middle) && "R".equals(right))
|
if (("D".equals(left) && "G".equals(middle) && "R".equals(right))
|
||||||
|| ("D".equals(left) && "G".equals(middle) && "Y".equals(right))) {
|
|| ("D".equals(left) && "G".equals(middle) && "Y".equals(right))) {
|
||||||
if (pl.size() == 3) {
|
if (pl.size() == 3) {
|
||||||
|
@ -556,13 +556,14 @@ n+t+s
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns something akin to the ACIP input (okay, maybe 1-2-3-4
|
/** Returns something akin to the transliteration that was input
|
||||||
* instead of 1234, and maybe AUTPA instead of AUT-PA)
|
* (okay, maybe 1-2-3-4 instead of 1234, and maybe AUTPA instead
|
||||||
* corresponding to this parse tree. */
|
* of AUT-PA [ACIP examples]) corresponding to this parse
|
||||||
public String recoverACIP() { // TODO(DLC)[EWTS->Tibetan]: acip-specific
|
* tree. */
|
||||||
|
public String recoverTranslit() {
|
||||||
ParseIterator pi = getParseIterator();
|
ParseIterator pi = getParseIterator();
|
||||||
if (pi.hasNext()) {
|
if (pi.hasNext()) {
|
||||||
return pi.next().recoverACIP();
|
return pi.next().recoverTranslit();
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
|
@ -70,9 +70,10 @@ class TStackList {
|
||||||
/** Returns true if and only if this list is empty. */
|
/** Returns true if and only if this list is empty. */
|
||||||
public boolean isEmpty() { return al.isEmpty(); }
|
public boolean isEmpty() { return al.isEmpty(); }
|
||||||
|
|
||||||
/** Returns the ACIP input (okay, maybe 1-2-3-4 instead of 1234)
|
/** Returns something akin to the transliteration that was input
|
||||||
* corresponding to this stack list. */
|
* (okay, maybe 1-2-3-4 instead of 1234, and maybe AUTPA instead
|
||||||
public String recoverACIP() {
|
* of AUT-PA [ACIP examples]) corresponding to this stack list. */
|
||||||
|
public String recoverTranslit() {
|
||||||
return toStringHelper(false);
|
return toStringHelper(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -87,7 +88,7 @@ class TStackList {
|
||||||
StringBuffer b = new StringBuffer();
|
StringBuffer b = new StringBuffer();
|
||||||
for (int i = 0; i < sz; i++) {
|
for (int i = 0; i < sz; i++) {
|
||||||
if (brackets) b.append('{');
|
if (brackets) b.append('{');
|
||||||
b.append(get(i).recoverACIP());
|
b.append(get(i).recoverTranslit());
|
||||||
if (brackets) b.append('}');
|
if (brackets) b.append('}');
|
||||||
}
|
}
|
||||||
return b.toString();
|
return b.toString();
|
||||||
|
|
|
@ -84,10 +84,12 @@ class TStackListList {
|
||||||
* happen. */
|
* happen. */
|
||||||
public ListIterator listIterator() { return al.listIterator(); }
|
public ListIterator listIterator() { return al.listIterator(); }
|
||||||
|
|
||||||
/** Returns the ACIP input (okay, maybe 1-2-3-4 instead of 1234)
|
/** Returns something akin to the transliteration that was input
|
||||||
* corresponding to this stack list list. */
|
* (okay, maybe 1-2-3-4 instead of 1234, and maybe AUTPA instead
|
||||||
public String recoverACIP() {
|
* of AUT-PA [ACIP examples]) corresponding to this stack list
|
||||||
|
* list. */
|
||||||
|
public String recoverTranslit() {
|
||||||
if (isEmpty()) return null;
|
if (isEmpty()) return null;
|
||||||
return get(0).recoverACIP();
|
return get(0).recoverTranslit();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue