The ACIP "BNA" was converting to B-NA instead of B+NA, even though NA cannot take a BA prefix. This was because BNA was interpreted as root-suffix. In ACIP, BN is surely B+N unless N takes a B prefix, so root-suffix is out of the question.

Now Jskad has two "Convert selected ACIP to Tibetan" conversions, one with and one without warnings, built in to Jskad proper (not the converter, that is).
This commit is contained in:
dchandler 2003-10-26 00:32:55 +00:00
parent d99ae50d8a
commit 6bda550157
3 changed files with 58 additions and 12 deletions

View file

@ -463,6 +463,8 @@ tstHelper("KA'", "[(K . A), (' . )]",
tstHelper("SAM'AM", null, null, null, "{SA}{M}{'A}{M}", 2);
tstHelper("SAMS'ANG", null, null, null, "{SA}{M}{S}{'A}{NG}", 2);
tstHelper("SNYANGD'O", null, null, null, "{S+NYA}{NG}{D}{'O}", 2);
tstHelper("BNA", "{B}{NA}", new String[] { "{B+NA}", "{B}{NA}" },
new String[] { "{B+NA}" }, "{B+NA}", -1);
tstHelper("T-SNYANGD'O", null, null, null, "{T}{S+NYA}{NG+D}{'O}", 3); // T is no prefix, so NG+D, not NG-D
tstHelper("T-SNYANGD'O", null, null, null, "{T}{S+NYA}{NG+D'O}", 0);
@ -7409,6 +7411,7 @@ G+NA
MNA'
M+NA
*/
uhelp("BNA", "[#WARNING CONVERTING ACIP DOCUMENT: Warning: We're going with {B+NA}, but only because our knowledge of prefix rules says that {B}{NA} is not a legal Tibetan tsheg bar (\"syllable\")]\u0f56\u0fa3");
uhelp("BGLA", "\u0f56\u0f42\u0fb3");
uhelp("BLCAG", "\u0f56\u0f63\u0f95\u0f42");
uhelp("DBA", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP DBA has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack and forget to input it with '+' characters.]\u0f51\u0f56");

View file

@ -207,7 +207,7 @@ class TParseTree {
if (bt.isLegal) {
if (bt.isLegalAndHasAVowelOnRoot)
legalParsesWithVowelOnRoot.add(sl);
if (!bt.isLegalButSanskrit)
if (!bt.isLegalButSanskrit())
allStrictlyLegalParses.add(sl);
allNonillegalParses.add(sl);
}

View file

@ -144,6 +144,14 @@ class TStackList {
if (isLegal) {
if (isClearlyIllegal())
isLegal = false;
TPairList firstStack = this.get(0);
if (1 == firstStack.size()
&& firstStack.get(0).isPrefix()
&& null == firstStack.get(0).getRight() // because GAM is legal
&& !(candidateType.startsWith("prefix")
|| candidateType.startsWith("appendaged-prefix"))) {
isLegal = false;
}
}
boolean isLegalAndHasAVowelOnRoot = false;
@ -163,8 +171,8 @@ class TStackList {
}
}
return new BoolTriple(isLegal,
(candidateType == "single-sanskrit-gc"),
isLegalAndHasAVowelOnRoot);
isLegalAndHasAVowelOnRoot,
candidateType);
}
private static final boolean ddebug = false;
@ -241,20 +249,51 @@ class TStackList {
}
}
/** Too simple to comment. */
/** A BoolTriple is used to convey the legality of a particular tsheg
* bar. (FIXME: This class is misnamed.)
* @author David Chandler */
class BoolTriple implements Comparable {
/** candidateType is a {@link
org.thdl.tib.text.TibTextUtils#getClassificationOfTshegBar(TGCList,StringBuffer,boolean)}
concept. You cannot derive isLegal() from it because {@link
TStackList#isClearlyIllegal()} and more (think {BNA}) comes
into play. */
String candidateType;
/** True if and only if the tsheg bar is a native Tibetan tsheg
bar or is a single Sanskrit grapheme cluster.
@see #isLegalButSanskrit() */
boolean isLegal;
boolean isLegalButSanskrit; // some subset are legal but legal Sanskrit -- the single sanskrit stacks are this way, such as B+DE.
/** Some subset of tsheg bars are legal but legal Sanskrit -- the
single sanskrit stacks are this way, such as B+DE. We treat
such a thing as legal because B+DE is the perfect way to input
such a thing. But then, we treat B+DEB+DE as illegal, even
though it too is perfect. So we're inconsistent (LOW-PRIORITY
FIXME), but you really have to watch what happens to
coloration and warning messages if you change this. */
boolean isLegalButSanskrit() {
return (candidateType == "single-sanskrit-gc");
}
/** True if and only if {@link #isLegal} is true and there may be
an ACIP "A" vowel on the root stack. */
boolean isLegalAndHasAVowelOnRoot;
BoolTriple(boolean isLegal,
boolean isLegalButSanskrit,
boolean isLegalAndHasAVowelOnRoot) {
if (!isLegal && (isLegalButSanskrit || isLegalAndHasAVowelOnRoot))
throw new IllegalArgumentException();
boolean isLegalAndHasAVowelOnRoot,
String candidateType) {
this.isLegal = isLegal;
this.isLegalButSanskrit = isLegalButSanskrit;
this.isLegalAndHasAVowelOnRoot = isLegalAndHasAVowelOnRoot;
this.candidateType = candidateType;
if (!isLegal && (isLegalButSanskrit() || isLegalAndHasAVowelOnRoot))
throw new IllegalArgumentException();
}
/** The more legal and standard a tsheg bar is, the higher score
it has. */
private int score() {
int score = 0;
if (isLegalAndHasAVowelOnRoot) {
@ -263,12 +302,16 @@ class BoolTriple implements Comparable {
if (isLegal) {
score += 5;
}
if (isLegalButSanskrit) {
if (isLegalButSanskrit()) {
score -= 3;
}
return score;
}
/** The most legal BoolTriple compares higher. */
/** The "most legal" BoolTriple compares higher. Native Tibetan
beats Sanskrit; native tibetan with a vowel on the root stack
beats native Tibetan without. */
public int compareTo(Object o) {
BoolTriple b = (BoolTriple)o;
return score() - b.score();