I've got too many sandboxes, so I'm committing these changes,

half-done, from one sandbox so as to consolidate my sandboxes.
This commit is contained in:
dchandler 2003-04-12 20:56:20 +00:00
parent 6e05b60cff
commit daacf6ee3b
7 changed files with 1252 additions and 7 deletions

View file

@ -18,6 +18,8 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.tshegbar;
import java.util.Vector;
import org.thdl.tib.text.THDLWylieConstants;
import org.thdl.util.ThdlDebug;
@ -42,15 +44,15 @@ import org.thdl.util.ThdlDebug;
* exception is that 'i (i.e., the connective case marker), 'u, and
* 'o suffixes are permitted.</li>
*
* <li>It has at most one suffix, which is a single consonant or a
* string consisting of 'i, 'u, 'o, 'am, and 'ang.</li>
*
*
DLC FIXME: we must allow many suffixes. See Andres' e-mail below:
* <li>It has at most one suffix, which is a single consonant (the
* common case) or a string consisting of 'i, 'u, 'o, 'am, and
* 'ang.
<p>See Andres' e-mail below:</p>
<pre>
David,
It is a particle that means "or" as opposed to "dang" that means and.
['am] is a particle that means "or" as opposed to "dang" that means and.
"sgom pa'am" would mean "... or meditation"
@ -65,6 +67,7 @@ And also there are cases where they combine. For ex you can have
Andres
</pre>
</li>
*
*
* <li>It may contain a EWC_sa or EWC_da postsuffix iff there exists
@ -681,7 +684,7 @@ public final class LegalTshegBar
}
/** Like {@link
* #formsLegalTshegBar(char,char,char,char,boolean,boolean,String,char,char)}
* #formsLegalTshegBar(char,char,char,char,boolean,boolean,String,char,char,StringBuffer)}
* but geared for the common case where the suffix is simply a
* consonant. */
public static boolean formsLegalTshegBar(char prefix,
@ -1138,4 +1141,350 @@ public final class LegalTshegBar
public String toString() {
return toConciseXML();
}
/** FIXMEDOC a shortcut */
private static boolean formsLegalTshegBar(Vector grcls) {
return formsLegalTshegBar(grcls, 0, grcls.size());
}
/** FIXMEDOC DLC
*
* Returns true iff the given UnicodeGraphemeClusters form a
* syntactically legal Tibetan syllable. If one is null, it
* means that it is not present.
*
* @exception IllegalArgumentException if root is null, or if
* postsuffix is non-null and suffix is null (these being clearly
* illegal)
*/
private static boolean formsLegalTshegBar(UnicodeGraphemeCluster prefix,
UnicodeGraphemeCluster root,
UnicodeGraphemeCluster suffix,
UnicodeGraphemeCluster postsuffix)
throws IllegalArgumentException
{
// reality checks:
if (null == root)
throw new IllegalArgumentException("root letter is not present");
if (null != postsuffix && null == suffix)
throw new IllegalArgumentException("a postsuffix cannot occur without a suffix");
// handle root:
if (!root.isLegalTibetan())
return false;
char headLetter = root.getSuperscribedLetter();
char rootLetter = root.getRootCP();
char subjoinedLetter = root.getSoleNonWazurSubjoinedLetter();
char vowel = root.getVowel();
boolean hasAchung = root.hasAchung();
boolean hasWazur = root.hasWazur();
// handle prefix:
char prefixLetter = prefix.getSoleTibetanUnicodeCP();
// handle suffix:
String suffixString = null;
if (null != suffix) {
// DLC FIXME suffixString = suffix.getUnicodeInUsualOrder();
throw new Error("DLC FIXME");
}
// handle postsuffix:
char postsuffixLetter = postsuffix.getSoleTibetanUnicodeCP();
return formsLegalTshegBar(prefixLetter, headLetter, rootLetter,
subjoinedLetter, hasWazur, hasAchung,
suffixString, postsuffixLetter, vowel, null);
}
/** Returns true iff the UnicodeGraphemeClusters in grcls with
* indices in the range [start, end) form a syntactically legal
* syllable. If start is as large as end, false is returned. */
private static boolean formsLegalTshegBar(Vector grcls,
int start,
int end)
{
int numGrcls = start - end;
if (numGrcls <= 0)
return false;
if (numGrcls == 1) {
// Option 1: (root)
// else: return false;
return formsLegalTshegBar(null,
(UnicodeGraphemeCluster)grcls.elementAt(start),
null, null);
} else if (numGrcls == 2) {
// Option 1: (prefix, root)
// Option 2: (root, suffix)
// else: return false;
return (formsLegalTshegBar((UnicodeGraphemeCluster)grcls.elementAt(start),
(UnicodeGraphemeCluster)grcls.elementAt(start + 1),
null,
null)
|| formsLegalTshegBar(null,
(UnicodeGraphemeCluster)grcls.elementAt(start),
(UnicodeGraphemeCluster)grcls.elementAt(start + 1),
null));
} else if (numGrcls == 3) {
// Option 1: (prefix, root, suffix)
// Option 2: (root, suffix, postsuffix)
// else: return false;
return (formsLegalTshegBar((UnicodeGraphemeCluster)grcls.elementAt(start),
(UnicodeGraphemeCluster)grcls.elementAt(start + 1),
(UnicodeGraphemeCluster)grcls.elementAt(start + 2),
null)
|| formsLegalTshegBar(null,
(UnicodeGraphemeCluster)grcls.elementAt(start),
(UnicodeGraphemeCluster)grcls.elementAt(start + 1),
(UnicodeGraphemeCluster)grcls.elementAt(start + 2)));
} else if (numGrcls == 4) {
return (formsLegalTshegBar((UnicodeGraphemeCluster)grcls.elementAt(start),
(UnicodeGraphemeCluster)grcls.elementAt(start + 1),
(UnicodeGraphemeCluster)grcls.elementAt(start + 2),
(UnicodeGraphemeCluster)grcls.elementAt(start + 3)));
} else {
// the largest has 'i ... DLC FIXME rethink -- even the case where numGrcls == 3 could be pa'am
return false;
}
}
/** Returns true if the given Tibetan consonant stack (i.e., the
* combination of superscribed, root, and subscribed letters)
* takes an EWC_ga prefix.
* @param head the {@link
* isNominalRepresentationOfConsonant(char) nominal
* representation} of the superscribed letter, or EW_ABSENT if
* not present
* @param root the {@link
* isNominalRepresentationOfConsonant(char) nominal
* representation} of the root letter
* @param sub the {@link isNominalRepresentationOfConsonant(char)
* nominal representation} of the subjoined letter, or EW_ABSENT
* if not present */
static boolean takesGao(char head, char root, char sub) {
if (EW_ABSENT == head) {
if (EW_ABSENT == sub) {
return (EWC_ca == root
|| EWC_ta == root
|| EWC_da == root
|| EWC_tsa == root
|| EWC_zha == root
|| EWC_za == root
|| EWC_ya == root
|| EWC_sha == root
|| EWC_sa == root
|| EWC_nya == root
|| EWC_na == root);
}
}
return false;
}
/** Returns true if the given Tibetan consonant stack (i.e., the
* combination of superscribed, root, and subscribed letters)
* takes an EWC_da prefix.
* @param head the {@link
* isNominalRepresentationOfConsonant(char) nominal
* representation} of the superscribed letter, or EW_ABSENT if
* not present
* @param root the {@link
* isNominalRepresentationOfConsonant(char) nominal
* representation} of the root letter
* @param sub the {@link isNominalRepresentationOfConsonant(char)
* nominal representation} of the subjoined letter, or EW_ABSENT
* if not present */
static boolean takesDao(char head, char root, char sub) {
if (EW_ABSENT == head) {
if (EW_ABSENT == sub) {
return (EWC_ka == root
|| EWC_ga == root
|| EWC_nga == root
|| EWC_pa == root
|| EWC_ba == root
|| EWC_ma == root);
} else {
return ((EWC_ga == root && EWC_ya == sub)
|| (EWC_pa == root && EWC_ya == sub)
|| (EWC_ba == root && EWC_ya == sub)
|| (EWC_ma == root && EWC_ya == sub)
|| (EWC_ka == root && EWC_ra == sub)
|| (EWC_ga == root && EWC_ra == sub)
|| (EWC_ba == root && EWC_ra == sub)
|| (EWC_pa == root && EWC_ra == sub));
}
} else {
return false;
}
}
/** Returns true if the given Tibetan consonant stack (i.e., the
* combination of superscribed, root, and subscribed letters)
* takes an EWC_achung prefix.
* @param head the {@link
* isNominalRepresentationOfConsonant(char) nominal
* representation} of the superscribed letter, or EW_ABSENT if
* not present
* @param root the {@link
* isNominalRepresentationOfConsonant(char) nominal
* representation} of the root letter
* @param sub the {@link isNominalRepresentationOfConsonant(char)
* nominal representation} of the subjoined letter, or EW_ABSENT
* if not present */
static boolean takesAchungPrefix(char head, char root, char sub) {
if (EW_ABSENT == head) {
if (EW_ABSENT == sub) {
return (EWC_ga == root
|| EWC_ja == root
|| EWC_da == root
|| EWC_ba == root
|| EWC_dza == root
|| EWC_kha == root
|| EWC_cha == root
|| EWC_tha == root
|| EWC_pha == root
|| EWC_tsha == root);
} else {
return ((EWC_pha == root && EWC_ya == sub)
|| (EWC_ba == root && EWC_ya == sub)
|| (EWC_kha == root && EWC_ya == sub)
|| (EWC_ga == root && EWC_ya == sub)
|| (EWC_ba == root && EWC_ra == sub)
|| (EWC_kha == root && EWC_ra == sub)
|| (EWC_ga == root && EWC_ra == sub)
|| (EWC_da == root && EWC_ra == sub)
|| (EWC_pha == root && EWC_ra == sub));
}
} else {
return false;
}
}
/** Returns true if the given Tibetan consonant stack (i.e., the
* combination of superscribed, root, and subscribed letters)
* takes an EWC_ma prefix.
* @param head the {@link
* isNominalRepresentationOfConsonant(char) nominal
* representation} of the superscribed letter, or EW_ABSENT if
* not present
* @param root the {@link
* isNominalRepresentationOfConsonant(char) nominal
* representation} of the root letter
* @param sub the {@link isNominalRepresentationOfConsonant(char)
* nominal representation} of the subjoined letter, or EW_ABSENT
* if not present */
static boolean takesMao(char head, char root, char sub) {
if (EW_ABSENT == head) {
if (EW_ABSENT == sub) {
return (EWC_kha == root
|| EWC_ga == root
|| EWC_cha == root
|| EWC_ja == root
|| EWC_tha == root
|| EWC_tsha == root
|| EWC_da == root
|| EWC_dza == root
|| EWC_nga == root
|| EWC_nya == root
|| EWC_na == root);
} else {
return ((EWC_kha == root && EWC_ya == sub)
|| (EWC_ga == root && EWC_ya == sub)
|| (EWC_kha == root && EWC_ra == sub)
|| (EWC_ga == root && EWC_ra == sub));
}
} else {
return false;
}
}
/** Returns true if the given Tibetan consonant stack (i.e., the
* combination of superscribed, root, and subscribed letters)
* takes an EWC_ba prefix.
* @param head the {@link
* isNominalRepresentationOfConsonant(char) nominal
* representation} of the superscribed letter, or EW_ABSENT if
* not present
* @param root the {@link
* isNominalRepresentationOfConsonant(char) nominal
* representation} of the root letter
* @param sub the {@link isNominalRepresentationOfConsonant(char)
* nominal representation} of the subjoined letter, or EW_ABSENT
* if not present */
static boolean takesBao(char head, char root, char sub) {
// DLC ask Ten-lo la about Wazur.
if (EW_ABSENT == head) {
if (EW_ABSENT == sub) {
return (EWC_ka == root
|| EWC_ca == root
|| EWC_ta == root
|| EWC_tsa == root
|| EWC_ga == root
|| EWC_nga == root
|| EWC_ja == root
|| EWC_nya == root
|| EWC_da == root
|| EWC_na == root
|| EWC_dza == root
|| EWC_zha == root
|| EWC_za == root
|| EWC_ra == root
|| EWC_la == root
|| EWC_sha == root);
} else {
// kra, e.g.
return ((EWC_ka == root && EWC_ya == sub)
|| (EWC_ga == root && EWC_ya == sub)
|| (EWC_ka == root && EWC_ra == sub)
|| (EWC_ga == root && EWC_ra == sub)
|| (EWC_sa == root && EWC_ra == sub)
|| (EWC_ka == root && EWC_la == sub)
|| (EWC_za == root && EWC_la == sub)
|| (EWC_ra == root && EWC_la == sub)
|| (EWC_sa == root && EWC_la == sub));
}
} else {
if (EW_ABSENT == sub) {
// ska, e.g.
return ((EWC_sa == head && EWC_ka == root)
|| (EWC_sa == head && EWC_ga == root)
|| (EWC_sa == head && EWC_nga == root)
|| (EWC_sa == head && EWC_nya == root)
|| (EWC_sa == head && EWC_ta == root)
|| (EWC_sa == head && EWC_da == root)
|| (EWC_sa == head && EWC_na == root)
|| (EWC_sa == head && EWC_tsa == root)
|| (EWC_ra == head && EWC_ka == root)
|| (EWC_ra == head && EWC_ga == root)
|| (EWC_ra == head && EWC_nga == root)
|| (EWC_ra == head && EWC_ja == root)
|| (EWC_ra == head && EWC_nya == root)
|| (EWC_ra == head && EWC_ta == root)
|| (EWC_ra == head && EWC_da == root)
|| (EWC_ra == head && EWC_na == root)
|| (EWC_ra == head && EWC_tsa == root)
|| (EWC_ra == head && EWC_dza == root)
|| (EWC_la == head && EWC_ta == root)
|| (EWC_la == head && EWC_da == root));
} else {
return ((EWC_ra == head && EWC_ka == root && EWC_ya == sub)
|| (EWC_ra == head && EWC_ga == root && EWC_ya == sub)
|| (EWC_sa == head && EWC_ka == root && EWC_ya == sub)
|| (EWC_sa == head && EWC_ga == root && EWC_ya == sub)
|| (EWC_sa == head && EWC_ka == root && EWC_ra == sub)
|| (EWC_sa == head && EWC_ga == root && EWC_ra == sub));
}
}
}
}