Added three new kinds of warnings to ACIP->Tibetan conversions.

This commit is contained in:
dchandler 2003-10-21 02:00:49 +00:00
parent a47af2c165
commit 2f81a801ef
3 changed files with 60 additions and 12 deletions

View file

@ -1,6 +1,4 @@
// DLC NOW: 'US etc. -- do we handle them all? // DLC NOW WARN ON DBA
// DLC NOW WARN ON NNYA and DBA
// DLC NOW: implement Robert Chilton-supplied prefix rules
/* /*
The contents of this file are subject to the THDL Open Community License The contents of this file are subject to the THDL Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance Version 1.0 (the "License"); you may not use this file except in compliance
@ -45,10 +43,6 @@ public class ACIPConverter {
// DLC NOW: BAo isn't converting. // DLC NOW: BAo isn't converting.
// DLC NOW: tRAStA is not converter correctly to Unicode, and no
// warning is given when converting to TMW (Wait! isn't the "a
// stack occurs w/o a vowel" warning given?)
/** Command-line converter. Gives error messages on standard /** Command-line converter. Gives error messages on standard
* output about why we can't convert the document perfectly and * output about why we can't convert the document perfectly and
* exits with non-zero return code, or is silent otherwise and * exits with non-zero return code, or is silent otherwise and

View file

@ -7364,12 +7364,14 @@ tstHelper("ZUR");
} }
public void testACIPConversion() { public void testACIPConversion() {
uhelp("DZHDZHA", "\u0f5c\u0fac"); // tricky because DZHDZA is not in TMW but DZHDZHA is uhelp("tRAStA", "\u0f4a\u0fb2\u0f66\u0f9a");
uhelp("DZHDZA", "\u0f5c\u0fab"); uhelp("DZHDZHA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP DZHDZHA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f5c\u0fac"); // tricky because DZHDZA is not in TMW but DZHDZHA is
uhelp("DZHDZA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP DZHDZA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f5c\u0fab");
uhelp("P+S+N+YA", "\u0f54\u0fb6\u0fa3\u0fb1"); uhelp("P+S+N+YA", "\u0f54\u0fb6\u0fa3\u0fb1");
uhelp("PSNYA", "\u0f54\u0fb6\u0f99"); // Is this P+S+N+YA? No, it's P+S+NYA. But, DLC, warn! uhelp("P+S+NYA", "\u0f54\u0fb6\u0f99");
uhelp("NNYA", "\u0f53\u0f99"); // DLC warn uhelp("PSNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP PSNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f54\u0fb6\u0f99"); // Is this P+S+N+YA? No, it's P+S+NYA. But warn!
uhelp("GHNYA", "\u0f43\u0f99"); uhelp("NNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP NNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f53\u0f99");
uhelp("GHNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP GHNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f43\u0f99");
// TS+NYA and T+S+N+YA are both legal, so what is TSNYA? // TS+NYA and T+S+N+YA are both legal, so what is TSNYA?
// Private correspondence with Robert Chilton says that it is // Private correspondence with Robert Chilton says that it is

View file

@ -322,6 +322,58 @@ class TParseTree {
} }
} }
} }
// Check for things like DZHDZ+H: stacks that have some pluses
// but not all pluses.
//
// Check for things like TSNYA: stacks that could be
// mistransliterations of T+S+N+YA
//
// Check for useless disambiguators.
{
int plnum = 0;
String swarn
= "There is a stack of three or more consonants in " + ((null != originalACIP) ? originalACIP : recoverACIP()) + " that uses at least one '+' but does not use a '+' between each consonant.";
String disamWarn
= "There is a useless disambiguator in " + ((null != originalACIP) ? originalACIP : recoverACIP()) + ".";
while (plnum < pl.size() && pl.get(plnum).isDisambiguator()) {
++plnum;
return disamWarn;
}
for (int stackNum = 0; stackNum < bestParse.size(); stackNum++) {
TPairList stack = bestParse.get(stackNum);
int type = 0;
int stackSize = stack.size();
boolean hasAmbiguousConsonant = false; // TS could be TSA or T+SA, so it's "ambiguous"
for (int j = 0; j < stackSize; j++) {
TPair tp = pl.get(plnum++);
if (j + 1 < stack.size()) {
if (null == tp.getRight()) {
if (type == 0)
type = -1;
else if (type == 1)
return swarn;
} else {
if (type == 0)
type = 1;
else if (type == -1)
return swarn;
}
}
if (stackSize > 1 && tp.getLeft() != null && tp.getLeft().length() > 1) {
hasAmbiguousConsonant = true;
}
}
if (hasAmbiguousConsonant && -1 == type) {
if ("Most" == warningLevel || "All" == warningLevel)
return "There is a chance that the ACIP " + ((null != originalACIP) ? originalACIP : recoverACIP()) + " was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.";
}
while (plnum < pl.size() && pl.get(plnum).isDisambiguator()) {
++plnum;
return disamWarn;
}
}
}
return null; return null;
} }