TMW->Unicode conversions have changed; now using U+0F6A for the stacks
whose EWTS transliteration begins with "R+". ACIP->* conversions and test baselines were updated to deal with the "r+..."=>"R+..." change.
This commit is contained in:
parent
aff34174ab
commit
7eca276a62
4 changed files with 100 additions and 20 deletions
|
@ -610,16 +610,35 @@ r+b+p~48,4~~7,56~1,109~8,121~1,123~1,125~8,107~8,114~f62,fa6,fa4
|
||||||
r+b+b~49,4~~7,57~1,109~8,121~1,123~1,125~8,107~8,114~f62,fa6,fa6
|
r+b+b~49,4~~7,57~1,109~8,121~1,123~1,125~8,107~8,114~f62,fa6,fa6
|
||||||
r+b+h~50,4~~7,58~1,110~8,124~1,123~1,125~8,110~8,117~f62,fa6,fb7
|
r+b+h~50,4~~7,58~1,110~8,124~1,123~1,125~8,110~8,117~f62,fa6,fb7
|
||||||
r+m+m~51,4~~7,59~1,110~8,121~1,123~1,125~8,107~8,114~f62,fa8,fa8
|
r+m+m~51,4~~7,59~1,110~8,121~1,123~1,125~8,107~8,114~f62,fa8,fa8
|
||||||
R+Y~52,4~~7,60~1,110~8,120~1,123~1,125~8,106~8,113~f62,fbb
|
|
||||||
R+W~196,4~~7,61~1,109~8,120~1,123~1,125~8,106~8,113~f62,fba
|
// Should we use U+0F62 or U+0F6A for "R+..."? That is the question.
|
||||||
R+sh~53,4~~7,62~1,109~8,120~1,123~1,125~8,106~8,113~f62,fb4
|
// The Unicode 4.0 standard says the following on page 254:
|
||||||
R+sh+y~54,4~~7,63~1,109~8,122~1,123~1,125~8,108~8,115~f62,fb4,fb1
|
//
|
||||||
R+Sh~55,4~~7,64~1,109~8,120~1,123~1,125~8,106~8,113~f62,fb5
|
// 'Some instances of "ra" in the head position require that the
|
||||||
R+Sh+N~56,4~~7,65~1,109~8,123~1,123~1,125~8,109~8,116~f62,fb5,f9e
|
// consonant be represented as a full-formed "ra" that never
|
||||||
R+Sh+N+y~57,4~~7,66~1,109~8,126~1,123~1,125~8,112~8,119~f62,fb5,f9e,fb1
|
// changes. This is not standard usage for the Tibetan language
|
||||||
R+Sh+m~58,4~~7,67~1,109~8,124~1,123~1,125~8,110~8,117~f62,fb5,fa8
|
// itself, but occurs in transliteration and transcription. Only in
|
||||||
R+Sh+y~59,4~~7,68~1,109~8,123~1,123~1,125~8,109~8,116~f62,fb5,fb1
|
// these cases should the character U+0F6A ... be used instead of
|
||||||
R+s~60,4~~7,69~1,109~8,120~1,123~1,125~8,106~8,113~f62,fb6
|
// U+0F62.... Note that the fixed-form "ra" should be used only in
|
||||||
|
// combinations where "ra" would normally transform into a short form
|
||||||
|
// but the user specifically wants to prevent that change.'
|
||||||
|
//
|
||||||
|
// Because "R+..." occurs only in non-standard stacks, i.e. stacks
|
||||||
|
// used for transliteration and transcription, we use U+0F6A.
|
||||||
|
//
|
||||||
|
// Note that TPairList.java's unicodeExceptionsMap must be updated if
|
||||||
|
// we change who uses U+0F6A.
|
||||||
|
R+Y~52,4~~7,60~1,110~8,120~1,123~1,125~8,106~8,113~f6a,fbb
|
||||||
|
// R+W is mentioned in ACIPRules.java:
|
||||||
|
R+W~196,4~~7,61~1,109~8,120~1,123~1,125~8,106~8,113~f6a,fba
|
||||||
|
R+sh~53,4~~7,62~1,109~8,120~1,123~1,125~8,106~8,113~f6a,fb4
|
||||||
|
R+sh+y~54,4~~7,63~1,109~8,122~1,123~1,125~8,108~8,115~f6a,fb4,fb1
|
||||||
|
R+Sh~55,4~~7,64~1,109~8,120~1,123~1,125~8,106~8,113~f6a,fb5
|
||||||
|
R+Sh+N~56,4~~7,65~1,109~8,123~1,123~1,125~8,109~8,116~f6a,fb5,f9e
|
||||||
|
R+Sh+N+y~57,4~~7,66~1,109~8,126~1,123~1,125~8,112~8,119~f6a,fb5,f9e,fb1
|
||||||
|
R+Sh+m~58,4~~7,67~1,109~8,124~1,123~1,125~8,110~8,117~f6a,fb5,fa8
|
||||||
|
R+Sh+y~59,4~~7,68~1,109~8,123~1,123~1,125~8,109~8,116~f6a,fb5,fb1
|
||||||
|
R+s~60,4~~7,69~1,109~8,120~1,123~1,125~8,106~8,113~f6a,fb6
|
||||||
r+h~61,4~~7,70~1,109~8,121~1,123~1,125~8,107~8,114~f62,fb7
|
r+h~61,4~~7,70~1,109~8,121~1,123~1,125~8,107~8,114~f62,fb7
|
||||||
r+k+Sh~62,4~~7,71~1,109~8,121~1,123~1,125~8,107~8,114~f62,f90,fb5
|
r+k+Sh~62,4~~7,71~1,109~8,121~1,123~1,125~8,107~8,114~f62,f90,fb5
|
||||||
l+g+w~63,4~~7,72~1,109~8,122~1,123~1,125~8,108~8,115~f63,f92,fad
|
l+g+w~63,4~~7,72~1,109~8,122~1,123~1,125~8,108~8,115~f63,f92,fad
|
||||||
|
@ -632,6 +651,7 @@ l+h+w~197,4~~7,78~1,109~8,121~1,123~1,125~8,106~8,113~f63,fb7,fad
|
||||||
w+y~69,4~~7,79~1,109~8,121~1,123~1,125~8,107~8,114~f5d,fb1
|
w+y~69,4~~7,79~1,109~8,121~1,123~1,125~8,107~8,114~f5d,fb1
|
||||||
w+r~70,4~~7,80~1,109~8,121~1,123~1,125~8,107~8,114~f5d,fb2
|
w+r~70,4~~7,80~1,109~8,121~1,123~1,125~8,107~8,114~f5d,fb2
|
||||||
w+n~195,4~~7,81~1,109~8,120~1,123~1,125~8,106~8,113~f5d,fa3
|
w+n~195,4~~7,81~1,109~8,120~1,123~1,125~8,106~8,113~f5d,fa3
|
||||||
|
// w+W is mentioned in ACIPRules.java:
|
||||||
w+W~194,4~~7,82~1,109~8,120~1,123~1,125~8,106~8,113~f5d,fba
|
w+W~194,4~~7,82~1,109~8,120~1,123~1,125~8,106~8,113~f5d,fba
|
||||||
sh+ts~71,4~~7,83~1,109~8,120~1,123~1,125~8,106~8,113~f64,fa9
|
sh+ts~71,4~~7,83~1,109~8,120~1,123~1,125~8,106~8,113~f64,fa9
|
||||||
sh+ts+y~72,4~~7,84~1,109~8,122~1,123~1,125~8,108~8,115~f64,fa9,fb1
|
sh+ts+y~72,4~~7,84~1,109~8,122~1,123~1,125~8,108~8,115~f64,fa9,fb1
|
||||||
|
@ -990,6 +1010,7 @@ dz+h~227,5~~10,98~1,110~~1,125~1,126~~~0FAC
|
||||||
zh~229,5~~10,100~1,109~~1,123~1,125~~~0FAE
|
zh~229,5~~10,100~1,109~~1,123~1,125~~~0FAE
|
||||||
z~230,5~~10,101~1,109~~1,123~1,125~~~0FAF
|
z~230,5~~10,101~1,109~~1,123~1,125~~~0FAF
|
||||||
'~231,5~~10,102~1,109~~1,123~1,125~~~0FB0
|
'~231,5~~10,102~1,109~~1,123~1,125~~~0FB0
|
||||||
|
// Hey emacs: fontify this: '
|
||||||
l~234,5~~10,105~1,109~~1,123~1,125~~~0FB3
|
l~234,5~~10,105~1,109~~1,123~1,125~~~0FB3
|
||||||
sh~235,5~~10,106~1,109~~1,123~1,125~~~0FB4
|
sh~235,5~~10,106~1,109~~1,123~1,125~~~0FB4
|
||||||
Sh~236,5~~10,107~1,109~~1,123~1,125~~~0FB5
|
Sh~236,5~~10,107~1,109~~1,123~1,125~~~0FB5
|
||||||
|
|
|
@ -167,10 +167,10 @@ public class ACIPRules {
|
||||||
else {
|
else {
|
||||||
if ("w".equals(tok)) {
|
if ("w".equals(tok)) {
|
||||||
// There are only two stacks in TMW that have
|
// There are only two stacks in TMW that have
|
||||||
// U+0FBA: r+wa and w+wa. TMW->ACIP fails for
|
// U+0FBA: R+Wa and w+Wa. TMW->ACIP fails for
|
||||||
// these unless we handle it here. (FIXME:
|
// these unless we handle it here. (FIXME:
|
||||||
// add an automated test for this).
|
// add an automated test for this).
|
||||||
if ("r+w".equals(EWTS) || "w+w".equals(EWTS)) {
|
if ("R+W".equals(EWTS) || "w+W".equals(EWTS)) {
|
||||||
part = "W";
|
part = "W";
|
||||||
} else {
|
} else {
|
||||||
part = "V";
|
part = "V";
|
||||||
|
|
|
@ -7387,8 +7387,11 @@ tstHelper("ZUR");
|
||||||
uhelp(acip, null);
|
uhelp(acip, null);
|
||||||
}
|
}
|
||||||
private static void uhelp(String acip, String expectedUnicode) {
|
private static void uhelp(String acip, String expectedUnicode) {
|
||||||
|
uhelp(acip, expectedUnicode, "Most");
|
||||||
|
}
|
||||||
|
private static void uhelp(String acip, String expectedUnicode, String warningLevel) {
|
||||||
StringBuffer errors = new StringBuffer();
|
StringBuffer errors = new StringBuffer();
|
||||||
String unicode = ACIPConverter.convertToUnicodeText(acip, errors, null, true, "Most");
|
String unicode = ACIPConverter.convertToUnicodeText(acip, errors, null, true, warningLevel);
|
||||||
if (null == unicode) {
|
if (null == unicode) {
|
||||||
if (null != expectedUnicode && "none" != expectedUnicode) {
|
if (null != expectedUnicode && "none" != expectedUnicode) {
|
||||||
System.out.println("No unicode exists for " + acip + " but you expected " + org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeStringToPrettyString(expectedUnicode));
|
System.out.println("No unicode exists for " + acip + " but you expected " + org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeStringToPrettyString(expectedUnicode));
|
||||||
|
@ -7511,10 +7514,40 @@ M+NA
|
||||||
uhelp("K'EE:", "\u0f40\u0f71\u0f7b\u0f7f");
|
uhelp("K'EE:", "\u0f40\u0f71\u0f7b\u0f7f");
|
||||||
|
|
||||||
uhelp("K'A:", "\u0f40\u0f71\u0f7f");
|
uhelp("K'A:", "\u0f40\u0f71\u0f7f");
|
||||||
|
|
||||||
|
uhelp("RYA", "\u0f6a\u0fbb");
|
||||||
|
uhelp("R+YA", "\u0f6a\u0fbb");
|
||||||
|
|
||||||
uhelp("RVA", "\u0f62\u0fad");
|
uhelp("RVA", "\u0f62\u0fad");
|
||||||
uhelp("R+VA", "\u0f62\u0fad");
|
uhelp("R+VA", "\u0f62\u0fad");
|
||||||
uhelp("RWA", "\u0f62\u0fba");
|
|
||||||
uhelp("R+WA", "\u0f62\u0fba");
|
uhelp("RWA", "\u0f6a\u0fba");
|
||||||
|
uhelp("R+WA", "\u0f6a\u0fba");
|
||||||
|
|
||||||
|
uhelp("RSHA", "\u0f6a\u0fb4", "None");
|
||||||
|
uhelp("R+SHA", "\u0f6a\u0fb4", "None");
|
||||||
|
|
||||||
|
uhelp("RSHYA", "\u0f6a\u0fb4\u0fb1", "None");
|
||||||
|
uhelp("R+SH+YA", "\u0f6a\u0fb4\u0fb1", "None");
|
||||||
|
|
||||||
|
uhelp("Rsh", "\u0f6a\u0fb5", "None");
|
||||||
|
uhelp("R+sh", "\u0f6a\u0fb5", "None");
|
||||||
|
|
||||||
|
uhelp("Rshn", "\u0f6a\u0fb5\u0f9e", "None");
|
||||||
|
uhelp("R+sh+n", "\u0f6a\u0fb5\u0f9e", "None");
|
||||||
|
|
||||||
|
uhelp("RshnY", "\u0f6a\u0fb5\u0f9e\u0fb1", "None");
|
||||||
|
uhelp("R+sh+n+Y", "\u0f6a\u0fb5\u0f9e\u0fb1", "None");
|
||||||
|
uhelp("R+shn+Y", "\u0f6a\u0fb5\u0f9e\u0fb1", "None");
|
||||||
|
|
||||||
|
uhelp("RshMA", "\u0f6a\u0fb5\u0fa8", "None");
|
||||||
|
uhelp("R+sh+M", "\u0f6a\u0fb5\u0fa8", "None");
|
||||||
|
|
||||||
|
uhelp("RshYA", "\u0f6a\u0fb5\u0fb1", "None");
|
||||||
|
uhelp("R+sh+Y", "\u0f6a\u0fb5\u0fb1", "None");
|
||||||
|
uhelp("RS", "\u0f6a\u0fb6", "None");
|
||||||
|
uhelp("R+S", "\u0f6a\u0fb6", "None");
|
||||||
|
|
||||||
|
|
||||||
uhelp("WWA", "\u0f5d\u0fba");
|
uhelp("WWA", "\u0f5d\u0fba");
|
||||||
uhelp("W+WA", "\u0f5d\u0fba");
|
uhelp("W+WA", "\u0f5d\u0fba");
|
||||||
|
@ -7595,13 +7628,13 @@ M+NA
|
||||||
// Full-form subjoined YA:
|
// Full-form subjoined YA:
|
||||||
uhelp("n+d+Y", "\u0f4e\u0f9c\u0fbb");
|
uhelp("n+d+Y", "\u0f4e\u0f9c\u0fbb");
|
||||||
uhelp("Y+Y", "\u0f61\u0fbb");
|
uhelp("Y+Y", "\u0f61\u0fbb");
|
||||||
uhelp("R+Y", "\u0f62\u0fbb");
|
uhelp("R+Y", "\u0f6a\u0fbb");
|
||||||
|
|
||||||
uhelp("RVA R+VEE RWA R+WEE YYA Y+YEE ndRYA n+d+R+YEE KshR K+sh+REE ndY n+d+YEE,",
|
uhelp("RVA R+VEE RWA R+WEE YYA Y+YEE ndRYA n+d+R+YEE KshR K+sh+REE ndY n+d+YEE,",
|
||||||
"\u0f62\u0fad\u0f0b" // RVA
|
"\u0f62\u0fad\u0f0b" // RVA
|
||||||
+ "\u0f62\u0fad\u0f7b\u0f0b" //R+VEE
|
+ "\u0f62\u0fad\u0f7b\u0f0b" //R+VEE
|
||||||
+ "\u0f62\u0fba\u0f0b" // RWA
|
+ "\u0f6a\u0fba\u0f0b" // RWA
|
||||||
+ "\u0f62\u0fba\u0f7b\u0f0b" // R+WEE
|
+ "\u0f6a\u0fba\u0f7b\u0f0b" // R+WEE
|
||||||
+ "\u0f61\u0fbb\u0f0b" // YYA
|
+ "\u0f61\u0fbb\u0f0b" // YYA
|
||||||
+ "\u0f61\u0fbb\u0f7b\u0f0b" // Y+YEE
|
+ "\u0f61\u0fbb\u0f7b\u0f0b" // Y+YEE
|
||||||
+ "\u0f4e\u0f9c\u0fbc\u0fb1\u0f0b" // ndRYA
|
+ "\u0f4e\u0f9c\u0fbc\u0fb1\u0f0b" // ndRYA
|
||||||
|
@ -9228,3 +9261,7 @@ tstHelper("shKA");
|
||||||
// FIXME : handle ^GONG, and "^ GONG". See Bug #838593
|
// FIXME : handle ^GONG, and "^ GONG". See Bug #838593
|
||||||
|
|
||||||
// FIXME: the file ACIP_SHRI should be made into an ACIP->TMW automated test case
|
// FIXME: the file ACIP_SHRI should be made into an ACIP->TMW automated test case
|
||||||
|
|
||||||
|
// FIXME: test that RY, RW, RSH, RSHY, Rsh, Rshn, RshnY, RshM, RshY,
|
||||||
|
// and RS have associated TMW glyphs.
|
||||||
|
|
||||||
|
|
|
@ -634,7 +634,16 @@ class TPairList {
|
||||||
unicodeExceptionsMap.put("\u0f4e\u0f9c\u0fb2\u0fb1", "\u0f4e\u0f9c\u0fbc\u0fb1"); // ndRY
|
unicodeExceptionsMap.put("\u0f4e\u0f9c\u0fb2\u0fb1", "\u0f4e\u0f9c\u0fbc\u0fb1"); // ndRY
|
||||||
unicodeExceptionsMap.put("\u0f4e\u0f9c\u0fb1", "\u0f4e\u0f9c\u0fbb"); // ndY
|
unicodeExceptionsMap.put("\u0f4e\u0f9c\u0fb1", "\u0f4e\u0f9c\u0fbb"); // ndY
|
||||||
unicodeExceptionsMap.put("\u0f61\u0fb1", "\u0f61\u0fbb"); // YY
|
unicodeExceptionsMap.put("\u0f61\u0fb1", "\u0f61\u0fbb"); // YY
|
||||||
unicodeExceptionsMap.put("\u0f62\u0fb1", "\u0f62\u0fbb"); // RY
|
unicodeExceptionsMap.put("\u0f62\u0fb1", "\u0f6a\u0fbb"); // RY
|
||||||
|
unicodeExceptionsMap.put("\u0f62\u0fba", "\u0f6a\u0fba"); // RW
|
||||||
|
unicodeExceptionsMap.put("\u0f62\u0fb4", "\u0f6a\u0fb4"); // RSHA
|
||||||
|
unicodeExceptionsMap.put("\u0f62\u0fb4\u0fb1", "\u0f6a\u0fb4\u0fb1"); // RSHYA
|
||||||
|
unicodeExceptionsMap.put("\u0f62\u0fb5", "\u0f6a\u0fb5"); // Rsh
|
||||||
|
unicodeExceptionsMap.put("\u0f62\u0fb5\u0f9e", "\u0f6a\u0fb5\u0f9e"); // Rshn
|
||||||
|
unicodeExceptionsMap.put("\u0f62\u0fb5\u0f9e\u0fb1", "\u0f6a\u0fb5\u0f9e\u0fb1"); // RshnY
|
||||||
|
unicodeExceptionsMap.put("\u0f62\u0fb5\u0fa8", "\u0f6a\u0fb5\u0fa8"); // RshM
|
||||||
|
unicodeExceptionsMap.put("\u0f62\u0fb5\u0fb1", "\u0f6a\u0fb5\u0fb1"); // RshY
|
||||||
|
unicodeExceptionsMap.put("\u0f62\u0fb6", "\u0f6a\u0fb6"); // RS
|
||||||
}
|
}
|
||||||
String mapEntry = (String)unicodeExceptionsMap.get(nonVowelSB.toString());
|
String mapEntry = (String)unicodeExceptionsMap.get(nonVowelSB.toString());
|
||||||
if (null != mapEntry)
|
if (null != mapEntry)
|
||||||
|
@ -689,7 +698,20 @@ class TPairList {
|
||||||
hashKey = "w+n";
|
hashKey = "w+n";
|
||||||
else if ("W+W".equals(hashKey))
|
else if ("W+W".equals(hashKey))
|
||||||
hashKey = "w+W";
|
hashKey = "w+W";
|
||||||
// We're NOT doing it for r+W etc., on purpose.
|
|
||||||
|
if ("r+Y".equals(hashKey)
|
||||||
|
|| "r+W".equals(hashKey)
|
||||||
|
|| "r+sh".equals(hashKey)
|
||||||
|
|| "r+sh+y".equals(hashKey)
|
||||||
|
|| "r+Sh".equals(hashKey)
|
||||||
|
|| "r+Sh+N".equals(hashKey)
|
||||||
|
|| "r+Sh+N+y".equals(hashKey)
|
||||||
|
|| "r+Sh+m".equals(hashKey)
|
||||||
|
|| "r+Sh+y".equals(hashKey)
|
||||||
|
|| "r+s".equals(hashKey)
|
||||||
|
) {
|
||||||
|
hashKey = "R" + hashKey.substring(1); // r+Y => R+Y, etc.
|
||||||
|
}
|
||||||
|
|
||||||
if (!TibetanMachineWeb.isKnownHashKey(hashKey)) {
|
if (!TibetanMachineWeb.isKnownHashKey(hashKey)) {
|
||||||
hashKey = hashKey.replace('+', '-');
|
hashKey = hashKey.replace('+', '-');
|
||||||
|
|
Loading…
Reference in a new issue