RFE 860190: ACIP->Unicode now gives a warning when it outputs something that can't be represented in TMW.
This commit is contained in:
parent
848349fd3a
commit
c1aa81e943
3 changed files with 42 additions and 19 deletions
|
@ -436,10 +436,7 @@ public class ACIPConverter {
|
||||||
// converter's life, parsing of
|
// converter's life, parsing of
|
||||||
// tsheg bars was handled
|
// tsheg bars was handled
|
||||||
// differently, but now, I think
|
// differently, but now, I think
|
||||||
// this is impossible. DLC FIXME:
|
// this is impossible.
|
||||||
// run with -Dthdl.debug=true on
|
|
||||||
// all ACIP Release V texts you
|
|
||||||
// can find.
|
|
||||||
ThdlDebug.noteIffyCode();
|
ThdlDebug.noteIffyCode();
|
||||||
hasErrors = true;
|
hasErrors = true;
|
||||||
String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: The tsheg bar (\"syllable\") " + s.getText() + " has no legal parses.]";
|
String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: The tsheg bar (\"syllable\") " + s.getText() + " has no legal parses.]";
|
||||||
|
@ -501,6 +498,28 @@ public class ACIPConverter {
|
||||||
if (null != writer) {
|
if (null != writer) {
|
||||||
unicode = sl.getUnicode();
|
unicode = sl.getUnicode();
|
||||||
if (null == unicode) throw new Error("FIXME: make this an assertion 4");
|
if (null == unicode) throw new Error("FIXME: make this an assertion 4");
|
||||||
|
// Warn if any of the stacks
|
||||||
|
// in this tsheg bar do not
|
||||||
|
// have corresponding glyphs
|
||||||
|
// in TMW. That means there
|
||||||
|
// was probably a typo in the
|
||||||
|
// input.
|
||||||
|
if ("None" != warningLevel) {
|
||||||
|
Object[] trialDuff = sl.getDuff();
|
||||||
|
for (int ii = 0; ii < trialDuff.length; ii++) {
|
||||||
|
if (trialDuff[ii] instanceof String) {
|
||||||
|
String bwarning
|
||||||
|
= "[#WARNING CONVERTING ACIP DOCUMENT: "
|
||||||
|
+ (String)trialDuff[ii] + "]";
|
||||||
|
unicode = bwarning + unicode;
|
||||||
|
if (null != hasWarnings) hasWarnings[0] = true;
|
||||||
|
if (null != warnings) {
|
||||||
|
warnings.append(bwarning);
|
||||||
|
warnings.append('\n');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (null != tdoc) {
|
if (null != tdoc) {
|
||||||
duff = sl.getDuff();
|
duff = sl.getDuff();
|
||||||
|
@ -673,12 +692,14 @@ public class ACIPConverter {
|
||||||
color);
|
color);
|
||||||
else {
|
else {
|
||||||
hasErrors = true;
|
hasErrors = true;
|
||||||
|
String emsg
|
||||||
|
= "[ERROR: " + (String)duff[j] + "]";
|
||||||
if (null != errors)
|
if (null != errors)
|
||||||
errors.append((String)duff[j] + "\n");
|
errors.append(emsg + "\n");
|
||||||
tdoc.appendRoman(tdocLocation[0],
|
tdoc.appendRoman(tdocLocation[0],
|
||||||
(String)duff[j],
|
emsg,
|
||||||
Color.RED);
|
Color.RED);
|
||||||
tdocLocation[0] += ((String)duff[j]).length();
|
tdocLocation[0] += emsg.length();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -7448,13 +7448,13 @@ M+NA
|
||||||
uhelp("N+YA", "\u0f53\u0fb1");
|
uhelp("N+YA", "\u0f53\u0fb1");
|
||||||
uhelp("NA+YA", "\u0f53\u0fb1"); // FIXME: warn about the extra A
|
uhelp("NA+YA", "\u0f53\u0fb1"); // FIXME: warn about the extra A
|
||||||
uhelp("NE+YA", "[#ERROR CONVERTING ACIP DOCUMENT: The tsheg bar (\"syllable\") NE+YA has these errors: Cannot convert ACIP NE+-YA because + is not an ACIP consonant]");
|
uhelp("NE+YA", "[#ERROR CONVERTING ACIP DOCUMENT: The tsheg bar (\"syllable\") NE+YA has these errors: Cannot convert ACIP NE+-YA because + is not an ACIP consonant]");
|
||||||
uhelp("tRAStA", "\u0f4a\u0fb2\u0f66\u0f9a");
|
uhelp("tRAStA", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {t+RA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f4a\u0fb2\u0f66\u0f9a");
|
||||||
uhelp("DZHDZHA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP DZHDZHA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f5b\u0fb7\u0fab\u0fb7"); // tricky because DZHDZA is not in TMW but DZHDZHA is
|
uhelp("DZHDZHA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP DZHDZHA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f5b\u0fb7\u0fab\u0fb7"); // tricky because DZHDZA is not in TMW but DZHDZHA is
|
||||||
uhelp("DZHDZA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP DZHDZA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f5b\u0fb7\u0fab");
|
uhelp("DZHDZA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP DZHDZA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.][#WARNING CONVERTING ACIP DOCUMENT: The ACIP {DZH+DZA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f5b\u0fb7\u0fab");
|
||||||
uhelp("P+S+N+YA", "\u0f54\u0fb6\u0fa3\u0fb1");
|
uhelp("P+S+N+YA", "\u0f54\u0fb6\u0fa3\u0fb1");
|
||||||
uhelp("P+S+NYA", "\u0f54\u0fb6\u0f99");
|
uhelp("P+S+NYA", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {P+S+NYA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f54\u0fb6\u0f99");
|
||||||
uhelp("PSNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP PSNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f54\u0fb6\u0f99"); // Is this P+S+N+YA? No, it's P+S+NYA. But warn!
|
uhelp("PSNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP PSNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.][#WARNING CONVERTING ACIP DOCUMENT: The ACIP {P+S+NYA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f54\u0fb6\u0f99"); // Is this P+S+N+YA? No, it's P+S+NYA. But warn!
|
||||||
uhelp("NNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP NNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f53\u0f99");
|
uhelp("NNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP NNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.][#WARNING CONVERTING ACIP DOCUMENT: The ACIP {N+NYA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f53\u0f99");
|
||||||
uhelp("GHNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP GHNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f42\u0fb7\u0f99");
|
uhelp("GHNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP GHNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f42\u0fb7\u0f99");
|
||||||
|
|
||||||
// TS+NYA and T+S+N+YA are both legal, so what is TSNYA?
|
// TS+NYA and T+S+N+YA are both legal, so what is TSNYA?
|
||||||
|
@ -7525,7 +7525,7 @@ M+NA
|
||||||
uhelp("*#HUm: G+DHOO GRO`;.,",
|
uhelp("*#HUm: G+DHOO GRO`;.,",
|
||||||
"\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b\u0f42\u0fa1\u0fb7\u0f7d\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d");
|
"\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b\u0f42\u0fa1\u0fb7\u0f7d\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d");
|
||||||
uhelp("*#HUm: K+DHA GRO`;.,",
|
uhelp("*#HUm: K+DHA GRO`;.,",
|
||||||
"\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b\u0f40\u0fa1\u0fb7\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d");
|
"\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {K+DHA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f40\u0fa1\u0fb7\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d");
|
||||||
uhelp("HA,\nHA\n\nHA", "\u0f67\u0f0d \u0f67\u0f0b\n\n\u0f67");
|
uhelp("HA,\nHA\n\nHA", "\u0f67\u0f0d \u0f67\u0f0b\n\n\u0f67");
|
||||||
uhelp("NGA,", "\u0f44\u0f0c\u0f0d");
|
uhelp("NGA,", "\u0f44\u0f0c\u0f0d");
|
||||||
uhelp("NGA,\nHA\n\nHA", "\u0f44\u0f0c\u0f0d \u0f67\u0f0b\n\n\u0f67");
|
uhelp("NGA,\nHA\n\nHA", "\u0f44\u0f0c\u0f0d \u0f67\u0f0b\n\n\u0f67");
|
||||||
|
@ -7538,6 +7538,8 @@ M+NA
|
||||||
uhelp("GU, ,KHO", "\u0f42\u0f74\u0f0d \u0f0d\u0f41\u0f7c");
|
uhelp("GU, ,KHO", "\u0f42\u0f74\u0f0d \u0f0d\u0f41\u0f7c");
|
||||||
uhelp("GU ,KHO", "\u0f42\u0f74\u0f0b \u0f0d\u0f41\u0f7c"); // FIXME: missing a shad after GU, warn about that.
|
uhelp("GU ,KHO", "\u0f42\u0f74\u0f0b \u0f0d\u0f41\u0f7c"); // FIXME: missing a shad after GU, warn about that.
|
||||||
uhelp("GA HA", "\u0f42\u0f0b \u0f67");
|
uhelp("GA HA", "\u0f42\u0f0b \u0f67");
|
||||||
|
uhelp("BCWA", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {B+C+WA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f56\u0f95\u0fba");
|
||||||
|
uhelp("'KYO", "[#WARNING CONVERTING ACIP DOCUMENT: Warning: We're going with {'+K+YO}, but only because our knowledge of prefix rules says that {'}{K+YO} is not a legal Tibetan tsheg bar (\"syllable\")][#WARNING CONVERTING ACIP DOCUMENT: The ACIP {'+K+YO} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f60\u0f90\u0fb1\u0f7c");
|
||||||
uhelp("WA", "\u0f5d");
|
uhelp("WA", "\u0f5d");
|
||||||
uhelp("W", "\u0f5d");
|
uhelp("W", "\u0f5d");
|
||||||
uhelp("WO", "\u0f5d\u0f7c");
|
uhelp("WO", "\u0f5d\u0f7c");
|
||||||
|
@ -7555,21 +7557,21 @@ M+NA
|
||||||
uhelp("WRA", "\u0f5d\u0fb2");
|
uhelp("WRA", "\u0f5d\u0fb2");
|
||||||
uhelp("W+RA", "\u0f5d\u0fb2");
|
uhelp("W+RA", "\u0f5d\u0fb2");
|
||||||
uhelp("W+R", "\u0f5d\u0fb2");
|
uhelp("W+R", "\u0f5d\u0fb2");
|
||||||
uhelp("BCWA", "\u0f56\u0f95\u0fba");
|
uhelp("BCWA", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {B+C+WA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f56\u0f95\u0fba");
|
||||||
uhelp("BCW", "\u0f56\u0f95\u0fba");
|
uhelp("BCW", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {B+C+W} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f56\u0f95\u0fba");
|
||||||
uhelp("BCWO", "\u0f56\u0f95\u0fba\u0f7c");
|
uhelp("BCWO", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {B+C+WO} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f56\u0f95\u0fba\u0f7c");
|
||||||
uhelp("BCVA", "\u0f56\u0f45\u0fad");
|
uhelp("BCVA", "\u0f56\u0f45\u0fad");
|
||||||
uhelp("BCV", "\u0f56\u0f45\u0fad");
|
uhelp("BCV", "\u0f56\u0f45\u0fad");
|
||||||
uhelp("BCV'O", "\u0f56\u0f45\u0fad\u0f71\u0f7c");
|
uhelp("BCV'O", "\u0f56\u0f45\u0fad\u0f71\u0f7c");
|
||||||
uhelp("BCV'A", "\u0f56\u0f45\u0fad\u0f71");
|
uhelp("BCV'A", "\u0f56\u0f45\u0fad\u0f71");
|
||||||
uhelp("BCV'", "\u0f56\u0f95\u0fad\u0fb0");
|
uhelp("BCV'", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP {B+C+V+'} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f56\u0f95\u0fad\u0fb0");
|
||||||
uhelp("GYA", "\u0f42\u0fb1");
|
uhelp("GYA", "\u0f42\u0fb1");
|
||||||
uhelp("GY", "\u0f42\u0fb1");
|
uhelp("GY", "\u0f42\u0fb1");
|
||||||
uhelp("G-YA", "\u0f42\u0f61");
|
uhelp("G-YA", "\u0f42\u0f61");
|
||||||
uhelp("GA-YA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a useless disambiguator in GA-YA.]\u0f42\u0f61");
|
uhelp("GA-YA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a useless disambiguator in GA-YA.]\u0f42\u0f61");
|
||||||
uhelp("GA-YO", "[#WARNING CONVERTING ACIP DOCUMENT: There is a useless disambiguator in GA-YO.]\u0f42\u0f61\u0F7c");
|
uhelp("GA-YO", "[#WARNING CONVERTING ACIP DOCUMENT: There is a useless disambiguator in GA-YO.]\u0f42\u0f61\u0F7c");
|
||||||
uhelp("RTZVA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP RTZVA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f62\u0fa9\u0fad");
|
uhelp("RTZVA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP RTZVA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f62\u0fa9\u0fad");
|
||||||
uhelp("RTZWA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP RTZWA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f62\u0fa9\u0fba");
|
uhelp("RTZWA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP RTZWA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.][#WARNING CONVERTING ACIP DOCUMENT: The ACIP {R+TZ+WA} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]\u0f62\u0fa9\u0fba");
|
||||||
}
|
}
|
||||||
public void testFixedFormSubjoinedConsonants() {
|
public void testFixedFormSubjoinedConsonants() {
|
||||||
// Usual subjoined RA:
|
// Usual subjoined RA:
|
||||||
|
|
|
@ -694,7 +694,7 @@ class TPairList {
|
||||||
if (!TibetanMachineWeb.isKnownHashKey(hashKey)) {
|
if (!TibetanMachineWeb.isKnownHashKey(hashKey)) {
|
||||||
hashKey = hashKey.replace('+', '-');
|
hashKey = hashKey.replace('+', '-');
|
||||||
if (!TibetanMachineWeb.isKnownHashKey(hashKey)) {
|
if (!TibetanMachineWeb.isKnownHashKey(hashKey)) {
|
||||||
duffsAndErrors.add("[#ERROR The ACIP {" + recoverACIP() + "} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]");
|
duffsAndErrors.add("The ACIP {" + recoverACIP() + "} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue