Added three new kinds of warnings to ACIP->Tibetan conversions.
This commit is contained in:
parent
a47af2c165
commit
2f81a801ef
3 changed files with 60 additions and 12 deletions
|
@ -1,6 +1,4 @@
|
||||||
// DLC NOW: 'US etc. -- do we handle them all?
|
// DLC NOW WARN ON DBA
|
||||||
// DLC NOW WARN ON NNYA and DBA
|
|
||||||
// DLC NOW: implement Robert Chilton-supplied prefix rules
|
|
||||||
/*
|
/*
|
||||||
The contents of this file are subject to the THDL Open Community License
|
The contents of this file are subject to the THDL Open Community License
|
||||||
Version 1.0 (the "License"); you may not use this file except in compliance
|
Version 1.0 (the "License"); you may not use this file except in compliance
|
||||||
|
@ -45,10 +43,6 @@ public class ACIPConverter {
|
||||||
|
|
||||||
// DLC NOW: BAo isn't converting.
|
// DLC NOW: BAo isn't converting.
|
||||||
|
|
||||||
// DLC NOW: tRAStA is not converter correctly to Unicode, and no
|
|
||||||
// warning is given when converting to TMW (Wait! isn't the "a
|
|
||||||
// stack occurs w/o a vowel" warning given?)
|
|
||||||
|
|
||||||
/** Command-line converter. Gives error messages on standard
|
/** Command-line converter. Gives error messages on standard
|
||||||
* output about why we can't convert the document perfectly and
|
* output about why we can't convert the document perfectly and
|
||||||
* exits with non-zero return code, or is silent otherwise and
|
* exits with non-zero return code, or is silent otherwise and
|
||||||
|
|
|
@ -7364,12 +7364,14 @@ tstHelper("ZUR");
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testACIPConversion() {
|
public void testACIPConversion() {
|
||||||
uhelp("DZHDZHA", "\u0f5c\u0fac"); // tricky because DZHDZA is not in TMW but DZHDZHA is
|
uhelp("tRAStA", "\u0f4a\u0fb2\u0f66\u0f9a");
|
||||||
uhelp("DZHDZA", "\u0f5c\u0fab");
|
uhelp("DZHDZHA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP DZHDZHA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f5c\u0fac"); // tricky because DZHDZA is not in TMW but DZHDZHA is
|
||||||
|
uhelp("DZHDZA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP DZHDZA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f5c\u0fab");
|
||||||
uhelp("P+S+N+YA", "\u0f54\u0fb6\u0fa3\u0fb1");
|
uhelp("P+S+N+YA", "\u0f54\u0fb6\u0fa3\u0fb1");
|
||||||
uhelp("PSNYA", "\u0f54\u0fb6\u0f99"); // Is this P+S+N+YA? No, it's P+S+NYA. But, DLC, warn!
|
uhelp("P+S+NYA", "\u0f54\u0fb6\u0f99");
|
||||||
uhelp("NNYA", "\u0f53\u0f99"); // DLC warn
|
uhelp("PSNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP PSNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f54\u0fb6\u0f99"); // Is this P+S+N+YA? No, it's P+S+NYA. But warn!
|
||||||
uhelp("GHNYA", "\u0f43\u0f99");
|
uhelp("NNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP NNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f53\u0f99");
|
||||||
|
uhelp("GHNYA", "[#WARNING CONVERTING ACIP DOCUMENT: There is a chance that the ACIP GHNYA was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.]\u0f43\u0f99");
|
||||||
|
|
||||||
// TS+NYA and T+S+N+YA are both legal, so what is TSNYA?
|
// TS+NYA and T+S+N+YA are both legal, so what is TSNYA?
|
||||||
// Private correspondence with Robert Chilton says that it is
|
// Private correspondence with Robert Chilton says that it is
|
||||||
|
|
|
@ -322,6 +322,58 @@ class TParseTree {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check for things like DZHDZ+H: stacks that have some pluses
|
||||||
|
// but not all pluses.
|
||||||
|
//
|
||||||
|
// Check for things like TSNYA: stacks that could be
|
||||||
|
// mistransliterations of T+S+N+YA
|
||||||
|
//
|
||||||
|
// Check for useless disambiguators.
|
||||||
|
{
|
||||||
|
int plnum = 0;
|
||||||
|
String swarn
|
||||||
|
= "There is a stack of three or more consonants in " + ((null != originalACIP) ? originalACIP : recoverACIP()) + " that uses at least one '+' but does not use a '+' between each consonant.";
|
||||||
|
String disamWarn
|
||||||
|
= "There is a useless disambiguator in " + ((null != originalACIP) ? originalACIP : recoverACIP()) + ".";
|
||||||
|
while (plnum < pl.size() && pl.get(plnum).isDisambiguator()) {
|
||||||
|
++plnum;
|
||||||
|
return disamWarn;
|
||||||
|
}
|
||||||
|
for (int stackNum = 0; stackNum < bestParse.size(); stackNum++) {
|
||||||
|
TPairList stack = bestParse.get(stackNum);
|
||||||
|
int type = 0;
|
||||||
|
int stackSize = stack.size();
|
||||||
|
boolean hasAmbiguousConsonant = false; // TS could be TSA or T+SA, so it's "ambiguous"
|
||||||
|
for (int j = 0; j < stackSize; j++) {
|
||||||
|
TPair tp = pl.get(plnum++);
|
||||||
|
if (j + 1 < stack.size()) {
|
||||||
|
if (null == tp.getRight()) {
|
||||||
|
if (type == 0)
|
||||||
|
type = -1;
|
||||||
|
else if (type == 1)
|
||||||
|
return swarn;
|
||||||
|
} else {
|
||||||
|
if (type == 0)
|
||||||
|
type = 1;
|
||||||
|
else if (type == -1)
|
||||||
|
return swarn;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (stackSize > 1 && tp.getLeft() != null && tp.getLeft().length() > 1) {
|
||||||
|
hasAmbiguousConsonant = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (hasAmbiguousConsonant && -1 == type) {
|
||||||
|
if ("Most" == warningLevel || "All" == warningLevel)
|
||||||
|
return "There is a chance that the ACIP " + ((null != originalACIP) ? originalACIP : recoverACIP()) + " was intended to represent more consonants than we parsed it as representing -- NNYA, e.g., means N+NYA, but you can imagine seeing N+N+YA and typing NNYA for it too.";
|
||||||
|
}
|
||||||
|
while (plnum < pl.size() && pl.get(plnum).isDisambiguator()) {
|
||||||
|
++plnum;
|
||||||
|
return disamWarn;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue