TMW->EWTS, TMW->ACIP, and ACIP->Unicode/TMW now support more appendages. Personal correspondence with Robert Chilton led me to support, besides 'am, 'ang, 'o, 'i, and 'u, the following:
'e (used in foreign transliteration) 'ongs 'is 'os 'ur 'us 'ung
This commit is contained in:
parent
5e18feb47d
commit
8c99adeb63
6 changed files with 116 additions and 38 deletions
|
@ -73,13 +73,23 @@ public class TGCPair {
|
|||
vowelWylie = null;
|
||||
}
|
||||
public String getWylie() {
|
||||
return getWylie(false);
|
||||
return getWylie(null);
|
||||
}
|
||||
public String getWylie(boolean appendaged) {
|
||||
/** Returns the EWTS for this pair, given that, if and only if
|
||||
this pair is part of an appendaged tsheg bar like ma'ongs or
|
||||
pa'am or spre'ur, previousTranslitIfAppendaged is non-null.
|
||||
If it is non-null, then it must be equal to the EWTS
|
||||
transliteration of the previous pair.
|
||||
@see #getACIP(String)
|
||||
*/
|
||||
public String getWylie(String previousTranslitIfAppendaged) {
|
||||
StringBuffer b = new StringBuffer();
|
||||
if (consonantWylie != null) {
|
||||
if (appendaged && !"'".equals(consonantWylie))
|
||||
b.append("a"); // pa'am... we want 'am, not 'm; 'ang, not 'ng.
|
||||
// Think of pa'am... we want 'am, not 'm; 'ang, not 'ng. But we want 'ur, not 'uar, 'is, not 'ias.
|
||||
if (null != previousTranslitIfAppendaged
|
||||
&& "'".equals(previousTranslitIfAppendaged)) {
|
||||
b.append("a");
|
||||
}
|
||||
|
||||
// we may have {p-y}, but the user wants to see {py}.
|
||||
for (int i = 0; i < consonantWylie.length(); i++) {
|
||||
|
@ -93,9 +103,10 @@ public class TGCPair {
|
|||
return b.toString();
|
||||
}
|
||||
public String getACIP() {
|
||||
return getACIP(false);
|
||||
return getACIP(null);
|
||||
}
|
||||
public String getACIP(boolean appendaged) {
|
||||
/** Like {@link #getWylie(String)}. */
|
||||
public String getACIP(String previousTranslitIfAppendaged) {
|
||||
// DLC FIXME: has the EWTS change affected Manipulate.acipToWylie?
|
||||
StringBuffer b = new StringBuffer();
|
||||
if (consonantWylie != null) {
|
||||
|
@ -104,8 +115,12 @@ public class TGCPair {
|
|||
if (null == consonantACIP) {
|
||||
return TibetanMachineWeb.getTMWToACIPErrorString("glyph with THDL Extended Wylie " + consonantWylie);
|
||||
} else {
|
||||
if (appendaged && !"'".equals(consonantWylie))
|
||||
b.append("A"); // PA'AM
|
||||
// Think of pa'am... we want 'am, not 'm; 'ang, not 'ng. But we want 'ur, not 'uar, 'is, not 'ias.
|
||||
if (null != previousTranslitIfAppendaged
|
||||
&& "'".equals(previousTranslitIfAppendaged)) {
|
||||
b.append("A");
|
||||
}
|
||||
|
||||
// we may have {P-Y}, but the user wants to see {PY}.
|
||||
for (int i = 0; i < consonantACIP.length(); i++) {
|
||||
char ch = consonantACIP.charAt(i);
|
||||
|
|
|
@ -65,6 +65,10 @@ public interface THDLWylieConstants {
|
|||
*/
|
||||
public static final String SA = "s";
|
||||
/**
|
||||
* the Wylie for the consonant ra:
|
||||
*/
|
||||
public static final String RA = "r";
|
||||
/**
|
||||
* the Wylie for the 16th of the 30 consonants, ma:
|
||||
*/
|
||||
public static final String MA = "m";
|
||||
|
|
|
@ -902,7 +902,11 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
/** True for and only for ma and nga because 'am and 'ang are
|
||||
appendages. */
|
||||
private static final boolean isAppendageNonVowelWylie(String wylie) {
|
||||
return (MA.equals(wylie) || NGA.equals(wylie));
|
||||
return (MA.equals(wylie) /* 'AM */
|
||||
|| NGA.equals(wylie) /* 'ANG, 'UNG */
|
||||
|| SA.equals(wylie) /* 'OS, 'US, maybe 'IS */
|
||||
|| RA.equals(wylie) /* 'UR */
|
||||
);
|
||||
}
|
||||
|
||||
// DLC FIXME: {H}, U+0F7F, is part of a grapheme cluster!
|
||||
|
@ -1209,7 +1213,12 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
break;
|
||||
}
|
||||
} else if (candidateType.startsWith("appendaged-")) {
|
||||
if (TibetanMachineWeb.isWylieAchungAppendage(wylie)) {
|
||||
if (TibetanMachineWeb.isWylieAchungAppendage(wylie)
|
||||
// 'ang:
|
||||
|| TibetanMachineWeb.isWylieAchungAppendage(lastPair.getWylie() + wylie)
|
||||
// 'ongs, as in ma'ongs:
|
||||
|| (i > 1
|
||||
&& TibetanMachineWeb.isWylieAchungAppendage(gcs.get(i-2).getWylie() + lastPair.getWylie() + wylie))) {
|
||||
// candidateType stays what it is.
|
||||
} else if (ACHUNG.equals(wylie)) {
|
||||
candidateType = ("maybe-" + candidateType).intern();
|
||||
|
@ -1495,11 +1504,21 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
}
|
||||
|
||||
// append the wylie/ACIP left over:
|
||||
String lastPairTranslit = null;
|
||||
if (appendaged && leftover >= 1) {
|
||||
TGCPair tp = (TGCPair)gcs.get(leftover-1);
|
||||
lastPairTranslit = (EWTSNotACIP
|
||||
? tp.getWylie(null)
|
||||
: tp.getACIP(null));
|
||||
}
|
||||
for (int i = leftover; i < sz; i++) {
|
||||
TGCPair tp = (TGCPair)gcs.get(i);
|
||||
String y;
|
||||
translitBuffer.append(EWTSNotACIP
|
||||
? tp.getWylie(appendaged)
|
||||
: tp.getACIP(appendaged));
|
||||
? (y = tp.getWylie(lastPairTranslit))
|
||||
: (y = tp.getACIP(lastPairTranslit)));
|
||||
if (appendaged)
|
||||
lastPairTranslit = y;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -910,10 +910,17 @@ public static boolean isWylieSanskritConsonantStack(String s) {
|
|||
or 'ang. The word le'u (chapter) contains such an appendage,
|
||||
e.g. */
|
||||
public static boolean isWylieAchungAppendage(String s) {
|
||||
return (s.equals("'e")
|
||||
return (s.equals("'e") // Appears in transcription of foreign words at the very least. The ACIP LEM'E is {LE}{M'E} if we don't think of 'E as "legal"... tricky, tricky (DLC FIXME: think again, verify it's doing what we want for both ACIP-> and EWTS-> and TMW->)
|
||||
|| s.equals("'i")
|
||||
|| s.equals("'o")
|
||||
|| s.equals("'u")
|
||||
|| s.equals("'us")
|
||||
|| s.equals("'ur")
|
||||
|| s.equals("'ong") // This isn't an appendage I know of, but 'ongs is, and we need this in here or 'ongs won't work. DLC FIXME: cludge
|
||||
|| s.equals("'ongs")
|
||||
|| s.equals("'os")
|
||||
|| s.equals("'is")
|
||||
|| s.equals("'ung")
|
||||
|| s.equals("'ang")
|
||||
|| s.equals("'am"));
|
||||
}
|
||||
|
|
|
@ -80,6 +80,13 @@ public class PackageTest extends TestCase {
|
|||
}
|
||||
|
||||
private static final boolean sdebug = false;
|
||||
/** Testing helper.
|
||||
@param pairListToUse is 0 for the usual lex, 1 for the "Treat
|
||||
' as a consonant, not a vowel" parse, 2 to use 1 but also
|
||||
verify that lex 1 is preferred by the converter over lex 0, 3
|
||||
to use 1 but also verify that lex 0 is preferred by the
|
||||
converter over lex 1, -1 to use 0 and ensure that there is no
|
||||
lex 1, only a lex 0. */
|
||||
private static void tstHelper2(String acip,
|
||||
String expectedPairs,
|
||||
boolean debug,
|
||||
|
@ -88,9 +95,11 @@ public class PackageTest extends TestCase {
|
|||
String expectedBestParse,
|
||||
int pairListToUse) {
|
||||
TPairList[] la = TPairListFactory.breakACIPIntoChunks(acip);
|
||||
TPairList l = la[pairListToUse];
|
||||
TPairList l = la[(pairListToUse == -1) ? 0 : ((pairListToUse >= 1) ? 1 : pairListToUse)];
|
||||
if (sdebug || debug)
|
||||
System.out.println("ACIP=" + acip + " and l'=" + l);
|
||||
if (pairListToUse == -1)
|
||||
assertTrue(la[1] == null);
|
||||
if (expectedPairs != null) {
|
||||
if (!l.equals(expectedPairs)) {
|
||||
System.out.println("acip=" + acip + "; chunks=" + l + "; expected chunks=" + expectedPairs);
|
||||
|
@ -220,6 +229,19 @@ public class PackageTest extends TestCase {
|
|||
+ "; recovery is " + l.recoverACIP());
|
||||
assertTrue(false);
|
||||
}
|
||||
if (pairListToUse >= 2) {
|
||||
TParseTree pt0 = la[0].getParseTree();
|
||||
TParseTree pt1 = la[1].getParseTree();
|
||||
TStackList sl0 = pt0.getBestParse();
|
||||
TStackList sl1 = pt1.getBestParse();
|
||||
BoolTriple sl0bt = sl0.isLegalTshegBar(false);
|
||||
BoolTriple sl1bt = sl1.isLegalTshegBar(false);
|
||||
if (pairListToUse == 2) {
|
||||
assertTrue(sl0bt.compareTo(sl1bt) < 0);
|
||||
} else {
|
||||
assertTrue(sl0bt.compareTo(sl1bt) >= 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// DLC FIXME: warn if we have to use the "what stacks take a GA prefix?" rules to get a unique legal parse.
|
||||
|
@ -380,37 +402,48 @@ tstHelper("KA'", "[(K . A), (' . )]",
|
|||
"{G}{DA}{M}{S}{'O}",
|
||||
},
|
||||
new String[] { "{G}{DA}{M}{S}{'O}" },
|
||||
"{G}{DA}{M}{S}{'O}", 1);
|
||||
"{G}{DA}{M}{S}{'O}", 2);
|
||||
|
||||
tstHelper("SNYAMS'AM'ANG", "{S}{NYA}{M}{S-}{'A}{M-}{'A}{NG}", null, null, "{S+NYA}{M}{S}{'A}{M}{'A}{NG}", 1);
|
||||
tstHelper("SNYAMS'AM'ANG", "{S}{NYA}{M}{S-}{'A}{M-}{'A}{NG}", null, null, "{S+NYA}{M}{S}{'A}{M}{'A}{NG}", 2);
|
||||
tstHelper("SNYAMS'AM'ANG", "{S}{NYA}{M}{S'A}{M'A}{NG}", null, null, "{S+NYA}{M+S'A}{M'A}{NG}", 0);
|
||||
tstHelper("SNYAM'AM", null, null, null, "{S+NYA}{M}{'A}{M}", 1);
|
||||
tstHelper("SNYAMS'AM", null, null, null, "{S+NYA}{M}{S}{'A}{M}", 1);
|
||||
tstHelper("SNYAM'AM", null, null, null, "{S+NYA}{M}{'A}{M}", 2);
|
||||
tstHelper("SNYAMS'AM", null, null, null, "{S+NYA}{M}{S}{'A}{M}", 2);
|
||||
tstHelper("SNYAM-'A-M", null, null, null, "!null!", 1);
|
||||
tstHelper("SNYAM-'A-M", null, null, null, "{S+NYA}{M}{'A}{M}", 0);
|
||||
tstHelper("SNY-M-'-M", null, null, null, "{S+NY}{M}{'}{M}", 0);
|
||||
tstHelper("SNYAM-'A-M", null, null, null, "{S+NYA}{M}{'A}{M}", -1);
|
||||
tstHelper("SNY-M-'-M", null, null, null, "{S+NY}{M}{'}{M}", -1);
|
||||
tstHelper("SNY-M-'-M", null, null, null, "!null!", 1);
|
||||
tstHelper("SNYAMS'AM'ANG'U'I'O", null, null, null, "{S+NYA}{M}{S}{'A}{M}{'A}{NG}{'U}{'I}{'O}", 1);
|
||||
tstHelper("SNYAMS'I'AM'ANG'U'I'O", null, null, null, "{S+NYA}{M}{S}{'I}{'A}{M}{'A}{NG}{'U}{'I}{'O}", 1);
|
||||
tstHelper("SNYAM+S+'O", null, null, null, "{S+NYA}{M+S+'O}", 0);
|
||||
tstHelper("SNYAMS+'O", null, null, null, "{S+NYA}{M+S+'O}", 0);
|
||||
tstHelper("SNYAMS+'O", null, null, null, "{S+NYA}{M+S+'O}", 0);
|
||||
tstHelper("SAM'UR'US", null, null, null, "{SA}{M}{'U}{R}{'U}{S}", 1);
|
||||
tstHelper("SAM'US", null, null, null, "{SA}{M}{'U}{S}", 1);
|
||||
tstHelper("SAM'AM", null, null, null, "{SA}{M}{'A}{M}", 1);
|
||||
tstHelper("SAMS'ANG", null, null, null, "{SA}{M}{S}{'A}{NG}", 1);
|
||||
tstHelper("SNYANGD'O", null, null, null, "{S+NYA}{NG}{D}{'O}", 1);
|
||||
tstHelper("T-SNYANGD'O", null, null, null, "{T}{S+NYA}{NG+D}{'O}", 1); // T is no prefix, so NG+D, not NG-D
|
||||
tstHelper("SNYAMS'AM'ANG'U'I'O", null, null, null, "{S+NYA}{M}{S}{'A}{M}{'A}{NG}{'U}{'I}{'O}", 2);
|
||||
tstHelper("SNYAMS'I'AM'ANG'U'I'O", null, null, null, "{S+NYA}{M}{S}{'I}{'A}{M}{'A}{NG}{'U}{'I}{'O}", 2);
|
||||
tstHelper("SNYAM+S+'O", null, null, null, "{S+NYA}{M+S+'O}", -1);
|
||||
tstHelper("SNYAMS+'O", null, null, null, "{S+NYA}{M+S+'O}", -1);
|
||||
tstHelper("SNYAMS+'O", null, null, null, "{S+NYA}{M+S+'O}", -1);
|
||||
|
||||
tstHelper("SAM'US", null, null, null, "{SA}{M}{'U}{S}", 2);
|
||||
tstHelper("SAM'UR'US", null, null, null, "{SA}{M}{'U}{R}{'U}{S}", 2);
|
||||
tstHelper("LA'OS", null, null, null, "{LA}{'O}{S}", -1);
|
||||
tstHelper("LA'OS", null, null, null, "!null!", 1);
|
||||
tstHelper("NA'OS", null, null, null, "{NA}{'O}{S}", -1);
|
||||
tstHelper("NA'IS", null, null, null, "{NA}{'I}{S}", -1);
|
||||
tstHelper("LE'UNG", null, null, null, "{LE}{'U}{NG}", -1);
|
||||
tstHelper("LE'U'ANG", null, null, null, "{LE}{'U}{'A}{NG}", -1);
|
||||
tstHelper("LE'UM", null, null, null, "{LE}{'U}{M}", -1);
|
||||
tstHelper("LE'U'IS", null, null, null, "{LE}{'U}{'I}{S}", -1);
|
||||
|
||||
tstHelper("MA'ONGS", null, null, null, "{MA}{'O}{NG}{S}", -1);
|
||||
|
||||
tstHelper("SAM'AM", null, null, null, "{SA}{M}{'A}{M}", 2);
|
||||
tstHelper("SAMS'ANG", null, null, null, "{SA}{M}{S}{'A}{NG}", 2);
|
||||
tstHelper("SNYANGD'O", null, null, null, "{S+NYA}{NG}{D}{'O}", 2);
|
||||
tstHelper("T-SNYANGD'O", null, null, null, "{T}{S+NYA}{NG+D}{'O}", 3); // T is no prefix, so NG+D, not NG-D
|
||||
tstHelper("T-SNYANGD'O", null, null, null, "{T}{S+NYA}{NG+D'O}", 0);
|
||||
|
||||
tstHelper("SNYAM+S+'O", null, null, null, "{S+NYA}{M+S+'O}", 0);
|
||||
tstHelper("SNYAMS+'O", null, null, null, "{S+NYA}{M+S+'O}", 0);
|
||||
tstHelper("SNYAM+S+'O", null, null, null, "{S+NYA}{M+S+'O}", -1);
|
||||
tstHelper("SNYAMS+'O", null, null, null, "{S+NYA}{M+S+'O}", -1);
|
||||
|
||||
tstHelper("GDAMS", null, null, null, "{G}{DA}{M}{S}", 0);
|
||||
tstHelper("GDAM-S'O", null, null, null, "{G}{DA}{M}{S}{'O}", 1);
|
||||
tstHelper("GDAMS", null, null, null, "{G}{DA}{M}{S}", -1);
|
||||
tstHelper("GDAM-S'O", null, null, null, "{G}{DA}{M}{S}{'O}", 2);
|
||||
tstHelper("GDAM-C'O", null, null, null, "{G+DA}{M}{C'O}", 0);
|
||||
tstHelper("GDAM-C'O", null, null, null, "{G+DA}{M}{C}{'O}", 1);
|
||||
tstHelper("GDAMS", null, null, null, "{G}{DA}{M}{S}", 0);
|
||||
tstHelper("GDAM-C'O", null, null, null, "{G+DA}{M}{C}{'O}", 3);
|
||||
// DLC NOW: FIXME: tstHelper("DKHY", null, null, null, "{D}{KH+YA}", 0);
|
||||
// DLC DKHY'O should give parse tree {{D-KH+Y'O}, {D+KH+Y'O}}
|
||||
// DLC DKHYA'O should give parse tree {{D-KH+YA'O}, {D+KH+YA'O}}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue