The ACIP "BNA" was converting to B-NA instead of B+NA, even though NA cannot take a BA prefix. This was because BNA was interpreted as root-suffix. In ACIP, BN is surely B+N unless N takes a B prefix, so root-suffix is out of the question.
Now Jskad has two "Convert selected ACIP to Tibetan" conversions, one with and one without warnings, built in to Jskad proper (not the converter, that is).
This commit is contained in:
parent
d99ae50d8a
commit
6bda550157
3 changed files with 58 additions and 12 deletions
|
@ -463,6 +463,8 @@ tstHelper("KA'", "[(K . A), (' . )]",
|
|||
tstHelper("SAM'AM", null, null, null, "{SA}{M}{'A}{M}", 2);
|
||||
tstHelper("SAMS'ANG", null, null, null, "{SA}{M}{S}{'A}{NG}", 2);
|
||||
tstHelper("SNYANGD'O", null, null, null, "{S+NYA}{NG}{D}{'O}", 2);
|
||||
tstHelper("BNA", "{B}{NA}", new String[] { "{B+NA}", "{B}{NA}" },
|
||||
new String[] { "{B+NA}" }, "{B+NA}", -1);
|
||||
tstHelper("T-SNYANGD'O", null, null, null, "{T}{S+NYA}{NG+D}{'O}", 3); // T is no prefix, so NG+D, not NG-D
|
||||
tstHelper("T-SNYANGD'O", null, null, null, "{T}{S+NYA}{NG+D'O}", 0);
|
||||
|
||||
|
@ -7409,6 +7411,7 @@ G+NA
|
|||
MNA'
|
||||
M+NA
|
||||
*/
|
||||
uhelp("BNA", "[#WARNING CONVERTING ACIP DOCUMENT: Warning: We're going with {B+NA}, but only because our knowledge of prefix rules says that {B}{NA} is not a legal Tibetan tsheg bar (\"syllable\")]\u0f56\u0fa3");
|
||||
uhelp("BGLA", "\u0f56\u0f42\u0fb3");
|
||||
uhelp("BLCAG", "\u0f56\u0f63\u0f95\u0f42");
|
||||
uhelp("DBA", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP DBA has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack and forget to input it with '+' characters.]\u0f51\u0f56");
|
||||
|
|
|
@ -207,7 +207,7 @@ class TParseTree {
|
|||
if (bt.isLegal) {
|
||||
if (bt.isLegalAndHasAVowelOnRoot)
|
||||
legalParsesWithVowelOnRoot.add(sl);
|
||||
if (!bt.isLegalButSanskrit)
|
||||
if (!bt.isLegalButSanskrit())
|
||||
allStrictlyLegalParses.add(sl);
|
||||
allNonillegalParses.add(sl);
|
||||
}
|
||||
|
|
|
@ -144,6 +144,14 @@ class TStackList {
|
|||
if (isLegal) {
|
||||
if (isClearlyIllegal())
|
||||
isLegal = false;
|
||||
TPairList firstStack = this.get(0);
|
||||
if (1 == firstStack.size()
|
||||
&& firstStack.get(0).isPrefix()
|
||||
&& null == firstStack.get(0).getRight() // because GAM is legal
|
||||
&& !(candidateType.startsWith("prefix")
|
||||
|| candidateType.startsWith("appendaged-prefix"))) {
|
||||
isLegal = false;
|
||||
}
|
||||
}
|
||||
|
||||
boolean isLegalAndHasAVowelOnRoot = false;
|
||||
|
@ -163,8 +171,8 @@ class TStackList {
|
|||
}
|
||||
}
|
||||
return new BoolTriple(isLegal,
|
||||
(candidateType == "single-sanskrit-gc"),
|
||||
isLegalAndHasAVowelOnRoot);
|
||||
isLegalAndHasAVowelOnRoot,
|
||||
candidateType);
|
||||
}
|
||||
|
||||
private static final boolean ddebug = false;
|
||||
|
@ -241,20 +249,51 @@ class TStackList {
|
|||
}
|
||||
}
|
||||
|
||||
/** Too simple to comment. */
|
||||
/** A BoolTriple is used to convey the legality of a particular tsheg
|
||||
* bar. (FIXME: This class is misnamed.)
|
||||
* @author David Chandler */
|
||||
class BoolTriple implements Comparable {
|
||||
|
||||
/** candidateType is a {@link
|
||||
org.thdl.tib.text.TibTextUtils#getClassificationOfTshegBar(TGCList,StringBuffer,boolean)}
|
||||
concept. You cannot derive isLegal() from it because {@link
|
||||
TStackList#isClearlyIllegal()} and more (think {BNA}) comes
|
||||
into play. */
|
||||
String candidateType;
|
||||
|
||||
|
||||
/** True if and only if the tsheg bar is a native Tibetan tsheg
|
||||
bar or is a single Sanskrit grapheme cluster.
|
||||
@see #isLegalButSanskrit() */
|
||||
boolean isLegal;
|
||||
boolean isLegalButSanskrit; // some subset are legal but legal Sanskrit -- the single sanskrit stacks are this way, such as B+DE.
|
||||
|
||||
|
||||
/** Some subset of tsheg bars are legal but legal Sanskrit -- the
|
||||
single sanskrit stacks are this way, such as B+DE. We treat
|
||||
such a thing as legal because B+DE is the perfect way to input
|
||||
such a thing. But then, we treat B+DEB+DE as illegal, even
|
||||
though it too is perfect. So we're inconsistent (LOW-PRIORITY
|
||||
FIXME), but you really have to watch what happens to
|
||||
coloration and warning messages if you change this. */
|
||||
boolean isLegalButSanskrit() {
|
||||
return (candidateType == "single-sanskrit-gc");
|
||||
}
|
||||
|
||||
/** True if and only if {@link #isLegal} is true and there may be
|
||||
an ACIP "A" vowel on the root stack. */
|
||||
boolean isLegalAndHasAVowelOnRoot;
|
||||
BoolTriple(boolean isLegal,
|
||||
boolean isLegalButSanskrit,
|
||||
boolean isLegalAndHasAVowelOnRoot) {
|
||||
if (!isLegal && (isLegalButSanskrit || isLegalAndHasAVowelOnRoot))
|
||||
throw new IllegalArgumentException();
|
||||
boolean isLegalAndHasAVowelOnRoot,
|
||||
String candidateType) {
|
||||
this.isLegal = isLegal;
|
||||
this.isLegalButSanskrit = isLegalButSanskrit;
|
||||
this.isLegalAndHasAVowelOnRoot = isLegalAndHasAVowelOnRoot;
|
||||
this.candidateType = candidateType;
|
||||
if (!isLegal && (isLegalButSanskrit() || isLegalAndHasAVowelOnRoot))
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
|
||||
/** The more legal and standard a tsheg bar is, the higher score
|
||||
it has. */
|
||||
private int score() {
|
||||
int score = 0;
|
||||
if (isLegalAndHasAVowelOnRoot) {
|
||||
|
@ -263,12 +302,16 @@ class BoolTriple implements Comparable {
|
|||
if (isLegal) {
|
||||
score += 5;
|
||||
}
|
||||
if (isLegalButSanskrit) {
|
||||
if (isLegalButSanskrit()) {
|
||||
score -= 3;
|
||||
}
|
||||
return score;
|
||||
}
|
||||
/** The most legal BoolTriple compares higher. */
|
||||
|
||||
|
||||
/** The "most legal" BoolTriple compares higher. Native Tibetan
|
||||
beats Sanskrit; native tibetan with a vowel on the root stack
|
||||
beats native Tibetan without. */
|
||||
public int compareTo(Object o) {
|
||||
BoolTriple b = (BoolTriple)o;
|
||||
return score() - b.score();
|
||||
|
|
Loading…
Reference in a new issue