Added three new kinds of warnings to ACIP->Tibetan conversions.
This commit is contained in:
parent
a47af2c165
commit
2f81a801ef
3 changed files with 60 additions and 12 deletions
|
@ -1,6 +1,4 @@
|
|||
// DLC NOW: 'US etc. -- do we handle them all?
|
||||
// DLC NOW WARN ON NNYA and DBA
|
||||
// DLC NOW: implement Robert Chilton-supplied prefix rules
|
||||
// DLC NOW WARN ON DBA
|
||||
/*
|
||||
The contents of this file are subject to the THDL Open Community License
|
||||
Version 1.0 (the "License"); you may not use this file except in compliance
|
||||
|
@ -45,10 +43,6 @@ public class ACIPConverter {
|
|||
|
||||
// DLC NOW: BAo isn't converting.
|
||||
|
||||
// DLC NOW: tRAStA is not converter correctly to Unicode, and no
|
||||
// warning is given when converting to TMW (Wait! isn't the "a
|
||||
// stack occurs w/o a vowel" warning given?)
|
||||
|
||||
/** Command-line converter. Gives error messages on standard
|
||||
* output about why we can't convert the document perfectly and
|
||||
* exits with non-zero return code, or is silent otherwise and
|
||||
|
|
|
@ -7364,12 +7364,14 @@ tstHelper("ZUR");
|
|||
}
|
||||
|
||||
public void testACIPConversion() {
|
||||
uhelp("DZHDZHA", "\u0f5c\u0fac"); // tricky because DZHDZA is not in TMW but DZHDZHA is
|
||||
uhelp("DZHDZA", "\u0f5c\u0fab");
|
||||
uhelp("tRAStA", "\u0f4a\u0fb2\u0f66\u0f9a");
|
||||
uhelp("DZHDZHA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP DZHDZHA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f5c\u0fac"); // tricky because DZHDZA is not in TMW but DZHDZHA is
|
||||
uhelp("DZHDZA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP DZHDZA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f5c\u0fab");
|
||||
uhelp("P+S+N+YA", "\u0f54\u0fb6\u0fa3\u0fb1");
|
||||
uhelp("PSNYA", "\u0f54\u0fb6\u0f99"); // Is this P+S+N+YA? No, it's P+S+NYA. But, DLC, warn!
|
||||
uhelp("NNYA", "\u0f53\u0f99"); // DLC warn
|
||||
uhelp("GHNYA", "\u0f43\u0f99");
|
||||
uhelp("P+S+NYA", "\u0f54\u0fb6\u0f99");
|
||||
uhelp("PSNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP PSNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f54\u0fb6\u0f99"); // Is this P+S+N+YA? No, it's P+S+NYA. But warn!
|
||||
uhelp("NNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP NNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f53\u0f99");
|
||||
uhelp("GHNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP GHNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f43\u0f99");
|
||||
|
||||
// TS+NYA and T+S+N+YA are both legal, so what is TSNYA?
|
||||
// Private correspondence with Robert Chilton says that it is
|
||||
|
|
|
@ -322,6 +322,58 @@ class TParseTree {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for things like DZHDZ+H: stacks that have some pluses
|
||||
// but not all pluses.
|
||||
//
|
||||
// Check for things like TSNYA: stacks that could be
|
||||
// mistransliterations of T+S+N+YA
|
||||
//
|
||||
// Check for useless disambiguators.
|
||||
{
|
||||
int plnum = 0;
|
||||
String swarn
|
||||
= "There is a stack of three or more consonants in " + ((null != originalACIP) ? originalACIP : recoverACIP()) + " that uses at least one '+' but does not use a '+' between each consonant.";
|
||||
String disamWarn
|
||||
= "There is a useless disambiguator in " + ((null != originalACIP) ? originalACIP : recoverACIP()) + ".";
|
||||
while (plnum < pl.size() && pl.get(plnum).isDisambiguator()) {
|
||||
++plnum;
|
||||
return disamWarn;
|
||||
}
|
||||
for (int stackNum = 0; stackNum < bestParse.size(); stackNum++) {
|
||||
TPairList stack = bestParse.get(stackNum);
|
||||
int type = 0;
|
||||
int stackSize = stack.size();
|
||||
boolean hasAmbiguousConsonant = false; // TS could be TSA or T+SA, so it's "ambiguous"
|
||||
for (int j = 0; j < stackSize; j++) {
|
||||
TPair tp = pl.get(plnum++);
|
||||
if (j + 1 < stack.size()) {
|
||||
if (null == tp.getRight()) {
|
||||
if (type == 0)
|
||||
type = -1;
|
||||
else if (type == 1)
|
||||
return swarn;
|
||||
} else {
|
||||
if (type == 0)
|
||||
type = 1;
|
||||
else if (type == -1)
|
||||
return swarn;
|
||||
}
|
||||
}
|
||||
if (stackSize > 1 && tp.getLeft() != null && tp.getLeft().length() > 1) {
|
||||
hasAmbiguousConsonant = true;
|
||||
}
|
||||
}
|
||||
if (hasAmbiguousConsonant && -1 == type) {
|
||||
if ("Most" == warningLevel || "All" == warningLevel)
|
||||
return "There is a chance that the ACIP " + ((null != originalACIP) ? originalACIP : recoverACIP()) + " was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.";
|
||||
}
|
||||
while (plnum < pl.size() && pl.get(plnum).isDisambiguator()) {
|
||||
++plnum;
|
||||
return disamWarn;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue