From 6bda5501576918320d2e983e955c2481a459bb3c Mon Sep 17 00:00:00 2001 From: dchandler Date: Sun, 26 Oct 2003 00:32:55 +0000 Subject: [PATCH] The ACIP "BNA" was converting to B-NA instead of B+NA, even though NA cannot take a BA prefix. This was because BNA was interpreted as root-suffix. In ACIP, BN is surely B+N unless N takes a B prefix, so root-suffix is out of the question. Now Jskad has two "Convert selected ACIP to Tibetan" conversions, one with and one without warnings, built in to Jskad proper (not the converter, that is). --- source/org/thdl/tib/text/ttt/PackageTest.java | 3 + source/org/thdl/tib/text/ttt/TParseTree.java | 2 +- source/org/thdl/tib/text/ttt/TStackList.java | 65 +++++++++++++++---- 3 files changed, 58 insertions(+), 12 deletions(-) diff --git a/source/org/thdl/tib/text/ttt/PackageTest.java b/source/org/thdl/tib/text/ttt/PackageTest.java index 79c79ec..268c935 100644 --- a/source/org/thdl/tib/text/ttt/PackageTest.java +++ b/source/org/thdl/tib/text/ttt/PackageTest.java @@ -463,6 +463,8 @@ tstHelper("KA'", "[(K . A), (' . )]", tstHelper("SAM'AM", null, null, null, "{SA}{M}{'A}{M}", 2); tstHelper("SAMS'ANG", null, null, null, "{SA}{M}{S}{'A}{NG}", 2); tstHelper("SNYANGD'O", null, null, null, "{S+NYA}{NG}{D}{'O}", 2); + tstHelper("BNA", "{B}{NA}", new String[] { "{B+NA}", "{B}{NA}" }, + new String[] { "{B+NA}" }, "{B+NA}", -1); tstHelper("T-SNYANGD'O", null, null, null, "{T}{S+NYA}{NG+D}{'O}", 3); // T is no prefix, so NG+D, not NG-D tstHelper("T-SNYANGD'O", null, null, null, "{T}{S+NYA}{NG+D'O}", 0); @@ -7409,6 +7411,7 @@ G+NA MNA' M+NA */ + uhelp("BNA", "[#WARNING CONVERTING ACIP DOCUMENT: Warning: We're going with {B+NA}, but only because our knowledge of prefix rules says that {B}{NA} is not a legal Tibetan tsheg bar (\"syllable\")]\u0f56\u0fa3"); uhelp("BGLA", "\u0f56\u0f42\u0fb3"); uhelp("BLCAG", "\u0f56\u0f63\u0f95\u0f42"); uhelp("DBA", "[#WARNING CONVERTING ACIP DOCUMENT: The ACIP DBA has been interpreted as two stacks, not one, but you may wish to confirm that the original text had two stacks as it would be an easy mistake to make to see one stack and forget to input it with '+' characters.]\u0f51\u0f56"); diff --git a/source/org/thdl/tib/text/ttt/TParseTree.java b/source/org/thdl/tib/text/ttt/TParseTree.java index 178ff85..2b82e92 100644 --- a/source/org/thdl/tib/text/ttt/TParseTree.java +++ b/source/org/thdl/tib/text/ttt/TParseTree.java @@ -207,7 +207,7 @@ class TParseTree { if (bt.isLegal) { if (bt.isLegalAndHasAVowelOnRoot) legalParsesWithVowelOnRoot.add(sl); - if (!bt.isLegalButSanskrit) + if (!bt.isLegalButSanskrit()) allStrictlyLegalParses.add(sl); allNonillegalParses.add(sl); } diff --git a/source/org/thdl/tib/text/ttt/TStackList.java b/source/org/thdl/tib/text/ttt/TStackList.java index 0b34760..349ad97 100644 --- a/source/org/thdl/tib/text/ttt/TStackList.java +++ b/source/org/thdl/tib/text/ttt/TStackList.java @@ -144,6 +144,14 @@ class TStackList { if (isLegal) { if (isClearlyIllegal()) isLegal = false; + TPairList firstStack = this.get(0); + if (1 == firstStack.size() + && firstStack.get(0).isPrefix() + && null == firstStack.get(0).getRight() // because GAM is legal + && !(candidateType.startsWith("prefix") + || candidateType.startsWith("appendaged-prefix"))) { + isLegal = false; + } } boolean isLegalAndHasAVowelOnRoot = false; @@ -163,8 +171,8 @@ class TStackList { } } return new BoolTriple(isLegal, - (candidateType == "single-sanskrit-gc"), - isLegalAndHasAVowelOnRoot); + isLegalAndHasAVowelOnRoot, + candidateType); } private static final boolean ddebug = false; @@ -241,20 +249,51 @@ class TStackList { } } -/** Too simple to comment. */ +/** A BoolTriple is used to convey the legality of a particular tsheg + * bar. (FIXME: This class is misnamed.) + * @author David Chandler */ class BoolTriple implements Comparable { + + /** candidateType is a {@link + org.thdl.tib.text.TibTextUtils#getClassificationOfTshegBar(TGCList,StringBuffer,boolean)} + concept. You cannot derive isLegal() from it because {@link + TStackList#isClearlyIllegal()} and more (think {BNA}) comes + into play. */ + String candidateType; + + + /** True if and only if the tsheg bar is a native Tibetan tsheg + bar or is a single Sanskrit grapheme cluster. + @see #isLegalButSanskrit() */ boolean isLegal; - boolean isLegalButSanskrit; // some subset are legal but legal Sanskrit -- the single sanskrit stacks are this way, such as B+DE. + + + /** Some subset of tsheg bars are legal but legal Sanskrit -- the + single sanskrit stacks are this way, such as B+DE. We treat + such a thing as legal because B+DE is the perfect way to input + such a thing. But then, we treat B+DEB+DE as illegal, even + though it too is perfect. So we're inconsistent (LOW-PRIORITY + FIXME), but you really have to watch what happens to + coloration and warning messages if you change this. */ + boolean isLegalButSanskrit() { + return (candidateType == "single-sanskrit-gc"); + } + + /** True if and only if {@link #isLegal} is true and there may be + an ACIP "A" vowel on the root stack. */ boolean isLegalAndHasAVowelOnRoot; BoolTriple(boolean isLegal, - boolean isLegalButSanskrit, - boolean isLegalAndHasAVowelOnRoot) { - if (!isLegal && (isLegalButSanskrit || isLegalAndHasAVowelOnRoot)) - throw new IllegalArgumentException(); + boolean isLegalAndHasAVowelOnRoot, + String candidateType) { this.isLegal = isLegal; - this.isLegalButSanskrit = isLegalButSanskrit; this.isLegalAndHasAVowelOnRoot = isLegalAndHasAVowelOnRoot; + this.candidateType = candidateType; + if (!isLegal && (isLegalButSanskrit() || isLegalAndHasAVowelOnRoot)) + throw new IllegalArgumentException(); } + + /** The more legal and standard a tsheg bar is, the higher score + it has. */ private int score() { int score = 0; if (isLegalAndHasAVowelOnRoot) { @@ -263,12 +302,16 @@ class BoolTriple implements Comparable { if (isLegal) { score += 5; } - if (isLegalButSanskrit) { + if (isLegalButSanskrit()) { score -= 3; } return score; } - /** The most legal BoolTriple compares higher. */ + + + /** The "most legal" BoolTriple compares higher. Native Tibetan + beats Sanskrit; native tibetan with a vowel on the root stack + beats native Tibetan without. */ public int compareTo(Object o) { BoolTriple b = (BoolTriple)o; return score() - b.score();