ant private-javadocs runs without warnings; cleaned up some

as-yet-unused code.
This commit is contained in:
dchandler 2003-04-13 01:46:20 +00:00
parent 644c0d3801
commit 6636d03a41
9 changed files with 158 additions and 145 deletions

View file

@ -19,7 +19,7 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text; package org.thdl.tib.text;
/** This is where basic, static knowledge of THDL's Extended Wylie is housed. /** This is where basic, static knowledge of THDL's Extended Wylie is housed.
* @see org.thdl.tib.text#TibetanMachineWeb */ * @see TibetanMachineWeb */
public interface THDLWylieConstants { public interface THDLWylieConstants {
/** /**
* the Wylie for bindu/anusvara * the Wylie for bindu/anusvara

View file

@ -146,7 +146,7 @@ public final class LegalTshegBar
* must not be absent. To learn about the arguments, and to be * must not be absent. To learn about the arguments, and to be
* sure that your input won't cause an exception to be thrown, * sure that your input won't cause an exception to be thrown,
* see {@link * see {@link
* #formsLegalTshegBar(char,char,char,char,boolean,boolean,String,char,char)}. * #formsLegalTshegBar(char,char,char,char,boolean,boolean,String,char,char,StringBuffer)}.
* *
* @exception IllegalArgumentException if the rootLetter is not * @exception IllegalArgumentException if the rootLetter is not
* one of the thirty consonants (and represented nominally, at * one of the thirty consonants (and represented nominally, at
@ -712,7 +712,7 @@ public final class LegalTshegBar
* @exception IllegalArgumentException if the syllable does not * @exception IllegalArgumentException if the syllable does not
* follow the rules of a Tibetan syllable. To learn about the * follow the rules of a Tibetan syllable. To learn about the
* arguments, see {@link * arguments, see {@link
* #formsLegalTshegBar(char,char,char,char,boolean,boolean,String,char,char)}. */ * #formsLegalTshegBar(char,char,char,char,boolean,boolean,String,char,char,StringBuffer)}. */
private static void throwIfNotLegalTshegBar(char prefix, private static void throwIfNotLegalTshegBar(char prefix,
char headLetter, char headLetter,
char rootLetter, char rootLetter,
@ -745,7 +745,7 @@ public final class LegalTshegBar
/** If you get through this gauntlet without having an exception /** If you get through this gauntlet without having an exception
* thrown, then this combination makes a legal Tibetan syllable. * thrown, then this combination makes a legal Tibetan syllable.
* To learn about the arguments, see {@link * To learn about the arguments, see {@link
* #formsLegalTshegBar(char,char,char,char,boolean,boolean,String,char,char)}. * #formsLegalTshegBar(char,char,char,char,boolean,boolean,String,char,char,StringBuffer)}.
* @param errorBuf if non-null, the reason this is illegal will * @param errorBuf if non-null, the reason this is illegal will
* be written here, if this is illegal * be written here, if this is illegal
* @return true if this syllable is legal, false if this syllable * @return true if this syllable is legal, false if this syllable
@ -1257,13 +1257,13 @@ public final class LegalTshegBar
* combination of superscribed, root, and subscribed letters) * combination of superscribed, root, and subscribed letters)
* takes an EWC_ga prefix. * takes an EWC_ga prefix.
* @param head the {@link * @param head the {@link
* isNominalRepresentationOfConsonant(char) nominal * #isNominalRepresentationOfConsonant(char) nominal
* representation} of the superscribed letter, or EW_ABSENT if * representation} of the superscribed letter, or EW_ABSENT if
* not present * not present
* @param root the {@link * @param root the {@link
* isNominalRepresentationOfConsonant(char) nominal * #isNominalRepresentationOfConsonant(char) nominal
* representation} of the root letter * representation} of the root letter
* @param sub the {@link isNominalRepresentationOfConsonant(char) * @param sub the {@link #isNominalRepresentationOfConsonant(char)
* nominal representation} of the subjoined letter, or EW_ABSENT * nominal representation} of the subjoined letter, or EW_ABSENT
* if not present */ * if not present */
static boolean takesGao(char head, char root, char sub) { static boolean takesGao(char head, char root, char sub) {
@ -1289,13 +1289,13 @@ public final class LegalTshegBar
* combination of superscribed, root, and subscribed letters) * combination of superscribed, root, and subscribed letters)
* takes an EWC_da prefix. * takes an EWC_da prefix.
* @param head the {@link * @param head the {@link
* isNominalRepresentationOfConsonant(char) nominal * #isNominalRepresentationOfConsonant(char) nominal
* representation} of the superscribed letter, or EW_ABSENT if * representation} of the superscribed letter, or EW_ABSENT if
* not present * not present
* @param root the {@link * @param root the {@link
* isNominalRepresentationOfConsonant(char) nominal * #isNominalRepresentationOfConsonant(char) nominal
* representation} of the root letter * representation} of the root letter
* @param sub the {@link isNominalRepresentationOfConsonant(char) * @param sub the {@link #isNominalRepresentationOfConsonant(char)
* nominal representation} of the subjoined letter, or EW_ABSENT * nominal representation} of the subjoined letter, or EW_ABSENT
* if not present */ * if not present */
static boolean takesDao(char head, char root, char sub) { static boolean takesDao(char head, char root, char sub) {
@ -1327,13 +1327,13 @@ public final class LegalTshegBar
* combination of superscribed, root, and subscribed letters) * combination of superscribed, root, and subscribed letters)
* takes an EWC_achung prefix. * takes an EWC_achung prefix.
* @param head the {@link * @param head the {@link
* isNominalRepresentationOfConsonant(char) nominal * #isNominalRepresentationOfConsonant(char) nominal
* representation} of the superscribed letter, or EW_ABSENT if * representation} of the superscribed letter, or EW_ABSENT if
* not present * not present
* @param root the {@link * @param root the {@link
* isNominalRepresentationOfConsonant(char) nominal * #isNominalRepresentationOfConsonant(char) nominal
* representation} of the root letter * representation} of the root letter
* @param sub the {@link isNominalRepresentationOfConsonant(char) * @param sub the {@link #isNominalRepresentationOfConsonant(char)
* nominal representation} of the subjoined letter, or EW_ABSENT * nominal representation} of the subjoined letter, or EW_ABSENT
* if not present */ * if not present */
static boolean takesAchungPrefix(char head, char root, char sub) { static boolean takesAchungPrefix(char head, char root, char sub) {
@ -1370,13 +1370,13 @@ public final class LegalTshegBar
* combination of superscribed, root, and subscribed letters) * combination of superscribed, root, and subscribed letters)
* takes an EWC_ma prefix. * takes an EWC_ma prefix.
* @param head the {@link * @param head the {@link
* isNominalRepresentationOfConsonant(char) nominal * #isNominalRepresentationOfConsonant(char) nominal
* representation} of the superscribed letter, or EW_ABSENT if * representation} of the superscribed letter, or EW_ABSENT if
* not present * not present
* @param root the {@link * @param root the {@link
* isNominalRepresentationOfConsonant(char) nominal * #isNominalRepresentationOfConsonant(char) nominal
* representation} of the root letter * representation} of the root letter
* @param sub the {@link isNominalRepresentationOfConsonant(char) * @param sub the {@link #isNominalRepresentationOfConsonant(char)
* nominal representation} of the subjoined letter, or EW_ABSENT * nominal representation} of the subjoined letter, or EW_ABSENT
* if not present */ * if not present */
static boolean takesMao(char head, char root, char sub) { static boolean takesMao(char head, char root, char sub) {
@ -1409,13 +1409,13 @@ public final class LegalTshegBar
* combination of superscribed, root, and subscribed letters) * combination of superscribed, root, and subscribed letters)
* takes an EWC_ba prefix. * takes an EWC_ba prefix.
* @param head the {@link * @param head the {@link
* isNominalRepresentationOfConsonant(char) nominal * #isNominalRepresentationOfConsonant(char) nominal
* representation} of the superscribed letter, or EW_ABSENT if * representation} of the superscribed letter, or EW_ABSENT if
* not present * not present
* @param root the {@link * @param root the {@link
* isNominalRepresentationOfConsonant(char) nominal * #isNominalRepresentationOfConsonant(char) nominal
* representation} of the root letter * representation} of the root letter
* @param sub the {@link isNominalRepresentationOfConsonant(char) * @param sub the {@link #isNominalRepresentationOfConsonant(char)
* nominal representation} of the subjoined letter, or EW_ABSENT * nominal representation} of the subjoined letter, or EW_ABSENT
* if not present */ * if not present */
static boolean takesBao(char head, char root, char sub) { static boolean takesBao(char head, char root, char sub) {

View file

@ -18,6 +18,8 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.tshegbar; package org.thdl.tib.text.tshegbar;
import java.util.Vector;
/** DLC FIXMEDOC: says "this isn't legal Tibetan", not "this isn't a valid sequence of Unicode" */ /** DLC FIXMEDOC: says "this isn't legal Tibetan", not "this isn't a valid sequence of Unicode" */
class TibetanSyntaxException extends Exception { class TibetanSyntaxException extends Exception {
/** This constructor creates an exception with a less than helpful /** This constructor creates an exception with a less than helpful
@ -46,6 +48,6 @@ class TibetanSyntaxException extends Exception {
GraphemeCluster in the syntactically incorrect stretch of GraphemeCluster in the syntactically incorrect stretch of
Tibetan. */ Tibetan. */
TibetanSyntaxException(Vector grcls, int start, int end) { TibetanSyntaxException(Vector grcls, int start, int end) {
DLC NOW; throw new Error("DLC NOW");
} }
} }

View file

@ -18,6 +18,8 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.tshegbar; package org.thdl.tib.text.tshegbar;
import org.thdl.util.ThdlDebug;
/** DLC FIXMEDOC */ /** DLC FIXMEDOC */
class TransitionInstruction implements UnicodeReadingStateMachineConstants { class TransitionInstruction implements UnicodeReadingStateMachineConstants {
@ -25,19 +27,19 @@ class TransitionInstruction implements UnicodeReadingStateMachineConstants {
TransitionInstruction(int nextState, int action) { TransitionInstruction(int nextState, int action) {
super(); super();
assert(action == ACTION_CONTINUES_GRAPHEME_CLUSTER ThdlDebug.verify(action == ACTION_CONTINUES_GRAPHEME_CLUSTER
|| action == ACTION_BEGINS_NEW_GRAPHEME_CLUSTER || action == ACTION_BEGINS_NEW_GRAPHEME_CLUSTER
|| action == ACTION_PREPEND_WITH_0F68); || action == ACTION_PREPEND_WITH_0F68); // DLC FIXME: assert this.
assert(nextState == STATE_START ThdlDebug.verify(nextState == STATE_START
|| nextState == STATE_READY || nextState == STATE_READY
|| nextState == STATE_DIGIT || nextState == STATE_DIGIT
|| nextState == STATE_STACKING || nextState == STATE_STACKING
|| nextState == STATE_STACKPLUSACHUNG || nextState == STATE_STACKPLUSACHUNG
|| nextState == STATE_PARTIALMARK); || nextState == STATE_PARTIALMARK); // DLC FIXME: assert this.
// we start in the start state, but we can never return to it. // we start in the start state, but we can never return to it.
assert(nextState != STATE_START); ThdlDebug.verify(nextState != STATE_START); // DLC FIXME: assert this.
this.nextState = nextState; this.nextState = nextState;
this.action = action; this.action = action;
@ -55,4 +57,60 @@ class TransitionInstruction implements UnicodeReadingStateMachineConstants {
int getAction() { return action; } int getAction() { return action; }
int getNextState() { return nextState; } int getNextState() { return nextState; }
/** Returns the codepoint class for cp, e.g. {@link
* UnicodeReadingStateMachineConstants#CC_SJC}.
* @param cp a Unicode codepoint, which MUST be nondecomposable
* if it is in the Tibetan range but can be from outside the
* Tibetan range of Unicode */
static int getCCForCP(char cp) {
ThdlDebug.verify(getNFTHDL(cp) == null); // DLC FIXME: assert this
if ('\u0F82' == cp) {
return CC_0F82;
} else if ('\u0F8A' == cp) {
return CC_0F8A;
} else if ('\u0F39' == cp) {
return CC_0F39;
} else if ('\u0F71' == cp) {
return CC_SUBSCRIBED_ACHUNG;
} else if ('\u0F40' <= cp && cp <= '\u0F6A') {
ThdlDebug.verify(cp != '\u0F48'); // DLC FIXME: assert this
return CC_CON;
} else if ('\u0F90' <= cp && cp <= '\u0FBC') {
ThdlDebug.verify(cp != '\u0F98'); // DLC FIXME: assert this
return CC_SJC;
} else if ('\u0F20' <= cp && cp <= '\u0F33') {
return CC_DIGIT;
} else if (/* DLC NOW do these combine ONLY with digits, or do CC_CM just NOT combine with digits? */
'\u0F3E' == cp
|| '\u0F3F' == cp
|| '\u0F18' == cp
|| '\u0F19' == cp) {
return CC_MCWD;
} else if ('\u0FC6' == cp
|| '\u0F87' == cp
|| '\u0F86' == cp
|| '\u0F84' == cp
|| '\u0F83' == cp
|| '\u0F82' == cp
|| '\u0F7F' == cp
|| '\u0F7E' == cp
|| '\u0F37' == cp /* DLC NOW NORMALIZATION OF 0F10, 11 to 0F0F ??? */
|| '\u0F35' == cp) {
return CC_CM;
} else if ('\u0F72' == cp
|| '\u0F74' == cp
|| '\u0F7A' == cp
|| '\u0F7B' == cp
|| '\u0F7C' == cp
|| '\u0F7D' == cp
|| '\u0F80' == cp) {
// DLC what about U+0F84 ??? CC_V or CC_CM ?
return CC_V;
} else {
return CC_SIN;
}
}
} }

View file

@ -387,7 +387,7 @@ public class UnicodeGraphemeCluster
throw new Error("DLC FIXME"); throw new Error("DLC FIXME");
} }
// DLC NOW -- LegalSyllable doesn't handle digits w/ underlining, etc. // DLC NOW -- LegalTshegBar doesn't handle digits w/ underlining, etc.
/** If this is a Tibetan consonant stack, this returns the root /** If this is a Tibetan consonant stack, this returns the root
* letter. If this is a Tibetan digit (perhaps with other * letter. If this is a Tibetan digit (perhaps with other

View file

@ -32,6 +32,7 @@ import java.io.InputStream;
public class UnicodeReader { public class UnicodeReader {
/** You cannot instantiate this class. */ /** You cannot instantiate this class. */
private UnicodeReader() { } private UnicodeReader() { }
// DLC NOW
// public static TTBIR parsePerfectUnicode() { // public static TTBIR parsePerfectUnicode() {
// } // }

View file

@ -18,73 +18,22 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.tshegbar; package org.thdl.tib.text.tshegbar;
/** Constants and static routines (DLC still?) useful in writing state /** Constants useful in writing state machines for transforming
* machines for transforming Unicode input into other forms. * Unicode input into other forms.
*
* @see TransitionInstruction#getCCForCP(char)
* *
* @author David Chandler * @author David Chandler
*/ */
interface UnicodeReadingStateMachineConstants { interface UnicodeReadingStateMachineConstants {
/** Returns the codepoint class for cp, e.g. {@link #CC_SJC}. // Codepoint classes (CC_...) follow. These are mutually
* @param cp a Unicode codepoint, which MUST be nondecomposable
* if it is in the Tibetan range but can be from outside the
* Tibetan range of Unicode */
static int getCCForCP(char cp) {
assert(getNFTHDL(cp) == null);
if ('\u0F82' == cp) {
return CC_0F82;
} else if ('\u0F8A' == cp) {
return CC_0F8A;
} else if ('\u0F39' == cp) {
return CC_0F39;
} else if ('\u0F71' == cp) {
return CC_ACHUNG;
} else if ('\u0F40' <= cp && cp <= '\u0F6A') {
assert(cp != '\u0F48');
return CC_CON;
} else if ('\u0F90' <= cp && cp <= '\u0FBC') {
assert(cp != '\u0F98');
return CC_SJC;
} else if ('\u0F20' <= cp && cp <= '\u0F33') {
return CC_DIGIT;
} else if (/* DLC NOW do these combine ONLY with digits, or do CC_CM just NOT combine with digits? */
'\u0F3E' == cp
|| '\u0F3F' == cp
|| '\u0F18' == cp
|| '\u0F19' == cp) {
return CC_MCWD;
} else if ('\u0FC6' == cp
|| '\u0F87' == cp
|| '\u0F86' == cp
|| '\u0F84' == cp
|| '\u0F83' == cp
|| '\u0F82' == cp
|| '\u0F7F' == cp
|| '\u0F7E' == cp
|| '\u0F37' == cp /* DLC NOW NORMALIZATION OF 0F10, 11 to 0F0F ??? */
|| '\u0F35' == cp) {
return CC_CM;
} else if ('\u0F72' == cp
|| '\u0F74' == cp
|| '\u0F7A' == cp
|| '\u0F7B' == cp
|| '\u0F7C' == cp
|| '\u0F7D' == cp
|| '\u0F80' == cp) {
// DLC what about U+0F84 ??? CC_V or CC_CM ?
return CC_V;
} else {
return CC_SIN;
}
}
// codepoint classes (CC_...) follow. These are mutually
// exclusive, and their union is the whole of Unicode. // exclusive, and their union is the whole of Unicode.
/** for everything else, i.e. non-Tibetan characters like U+0E00 /** for everything else, that is to say non-Tibetan characters
* and also Tibetan characters like U+0FCF and U+0F05 (DLC rename * like U+0E00 and also Tibetan characters like U+0FCF and U+0F05
* SIN[GLETON] to OTHER as combining marks from outside the * (DLC rename SIN[GLETON] to OTHER as combining marks from
* Tibetan range count as this) but not U+0F8A */ * outside the Tibetan range count as this) but not U+0F8A */
static final int CC_SIN = 0; static final int CC_SIN = 0;
/** for combining marks in the Tibetan range of Unicode that /** for combining marks in the Tibetan range of Unicode that
@ -95,18 +44,18 @@ interface UnicodeReadingStateMachineConstants {
* CC_MCWD, U+0F82, and U+0F39 */ * CC_MCWD, U+0F82, and U+0F39 */
static final int CC_CM = 2; static final int CC_CM = 2;
/** for combining consonants, i.e. U+0F90-U+0FBC minus U+0F98 /** for combining consonants, that is to say U+0F90-U+0FBC minus
* minus the decomposable entries like U+0F93, U+0F9D, U+0FA2, * U+0F98 minus the decomposable entries like U+0F93, U+0F9D,
* etc. */ * U+0FA2, etc. */
static final int CC_SJC = 3; static final int CC_SJC = 3;
/** for noncombining consonants, i.e. U+0F40-U+0F6A minus U+0F48 /** for noncombining consonants, that is to say U+0F40-U+0F6A
* minus the decomposable entries like U+0F43, U+0F4D, U+0F52, * minus U+0F48 minus the decomposable entries like U+0F43,
* etc. */ * U+0F4D, U+0F52, etc. */
static final int CC_CON = 4; static final int CC_CON = 4;
/** for simple, nondecomposable vowels, i.e. U+0F72, U+0F74, /** for simple, nondecomposable vowels, that is to say U+0F72,
* U+0F7A, U+0F7B, U+0F7C, U+0F7D, U+0F80 */ * U+0F74, U+0F7A, U+0F7B, U+0F7C, U+0F7D, U+0F80 */
static final int CC_V = 5; static final int CC_V = 5;
/** for U+0F8A */ /** for U+0F8A */
@ -121,9 +70,9 @@ interface UnicodeReadingStateMachineConstants {
static final int CC_0F39 = 8; static final int CC_0F39 = 8;
/** for U+0F71 */ /** for U+0F71 */
static final int CC_ACHUNG = 9; static final int CC_SUBSCRIBED_ACHUNG = 9;
/** for digits, i.e. U+0F20-U+0F33 */ /** for digits, that is to say U+0F20-U+0F33 */
static final int CC_DIGIT = 10; static final int CC_DIGIT = 10;
@ -133,14 +82,14 @@ interface UnicodeReadingStateMachineConstants {
/** initial state */ /** initial state */
static final int STATE_START = 0; static final int STATE_START = 0;
/** ready state, i.e. the state in which some non-empty Unicode /** ready state, that is to say the state in which some non-empty
* String is in the holding area, <i>ready</i> to receive * Unicode String is in the holding area, <i>ready</i> to receive
* combining marks like U+0F35 */ * combining marks like U+0F35 */
static final int STATE_READY = 1; static final int STATE_READY = 1;
/** digit state, i.e. the state in which some non-empty Unicode /** digit state, that is to say the state in which some non-empty
* String consisting entirely of digits is in the holding area, * Unicode String consisting entirely of digits is in the holding
* ready to receive marks that combine only with digits */ * area, ready to receive marks that combine only with digits */
static final int STATE_DIGIT = 2; static final int STATE_DIGIT = 2;
/** state in which CC_SJC are welcomed and treated as consonants /** state in which CC_SJC are welcomed and treated as consonants

View file

@ -18,14 +18,17 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.tshegbar; package org.thdl.tib.text.tshegbar;
import java.util.Vector;
class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants { class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants {
/** Don't instantiate this class. */ /** Don't instantiate this class. */
private Foo() { super(); } private ValidatingUnicodeReader() { super(); }
/** This table tells how to transition from state a 6 states + error state */ /** This table tells how to transition from state to state upon
* encountering certain classes of Unicode codepoints. There are
* 6 legal states + an error state. */
private static final TransitionInstruction private static final TransitionInstruction
transitionTable[6 /* number of STATEs */] transitionTable[/* 6 is the number of STATEs */][/* 11 is the number of CC classes */]
[11 /* number of CC classes */]
= { = {
// STATE_START: // STATE_START:
{ {
@ -50,7 +53,7 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants {
null, null,
/* upon seeing CC_0F39 in this state: */ /* upon seeing CC_0F39 in this state: */
null, null,
/* upon seeing CC_ACHUNG in this state: */ /* upon seeing CC_SUBSCRIBED_ACHUNG in this state: */
null, null,
/* upon seeing CC_DIGIT in this state: */ /* upon seeing CC_DIGIT in this state: */
new TransitionInstruction(STATE_DIGIT, new TransitionInstruction(STATE_DIGIT,
@ -73,7 +76,7 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants {
new TransitionInstruction(STATE_STACKING, new TransitionInstruction(STATE_STACKING,
ACTION_BEGINS_NEW_GRAPHEME_CLUSTER), ACTION_BEGINS_NEW_GRAPHEME_CLUSTER),
/* upon seeing CC_V in this state: */ /* upon seeing CC_V in this state: */
null null,
/* upon seeing CC_0F8A in this state: */ /* upon seeing CC_0F8A in this state: */
new TransitionInstruction(STATE_PARTIALMARK, new TransitionInstruction(STATE_PARTIALMARK,
ACTION_BEGINS_NEW_GRAPHEME_CLUSTER), ACTION_BEGINS_NEW_GRAPHEME_CLUSTER),
@ -82,7 +85,7 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants {
ACTION_CONTINUES_GRAPHEME_CLUSTER), ACTION_CONTINUES_GRAPHEME_CLUSTER),
/* upon seeing CC_0F39 in this state: */ /* upon seeing CC_0F39 in this state: */
null, null,
/* upon seeing CC_ACHUNG in this state: */ /* upon seeing CC_SUBSCRIBED_ACHUNG in this state: */
null, // because 0F71 comes after SJCs, before Vs, and null, // because 0F71 comes after SJCs, before Vs, and
// before CMs. // before CMs.
/* upon seeing CC_DIGIT in this state: */ /* upon seeing CC_DIGIT in this state: */
@ -115,7 +118,7 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants {
ACTION_CONTINUES_GRAPHEME_CLUSTER), ACTION_CONTINUES_GRAPHEME_CLUSTER),
/* upon seeing CC_0F39 in this state: */ /* upon seeing CC_0F39 in this state: */
null, null,
/* upon seeing CC_ACHUNG in this state: */ /* upon seeing CC_SUBSCRIBED_ACHUNG in this state: */
null, null,
/* upon seeing CC_DIGIT in this state: */ /* upon seeing CC_DIGIT in this state: */
new TransitionInstruction(STATE_DIGIT, new TransitionInstruction(STATE_DIGIT,
@ -149,7 +152,7 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants {
/* upon seeing CC_0F39 in this state: */ /* upon seeing CC_0F39 in this state: */
new TransitionInstruction(STATE_STACKING, new TransitionInstruction(STATE_STACKING,
ACTION_CONTINUES_GRAPHEME_CLUSTER), ACTION_CONTINUES_GRAPHEME_CLUSTER),
/* upon seeing CC_ACHUNG in this state: */ /* upon seeing CC_SUBSCRIBED_ACHUNG in this state: */
new TransitionInstruction(STATE_STACKPLUSACHUNG, new TransitionInstruction(STATE_STACKPLUSACHUNG,
ACTION_CONTINUES_GRAPHEME_CLUSTER), ACTION_CONTINUES_GRAPHEME_CLUSTER),
/* upon seeing CC_DIGIT in this state: */ /* upon seeing CC_DIGIT in this state: */
@ -182,7 +185,7 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants {
ACTION_CONTINUES_GRAPHEME_CLUSTER), ACTION_CONTINUES_GRAPHEME_CLUSTER),
/* upon seeing CC_0F39 in this state: */ /* upon seeing CC_0F39 in this state: */
null, null,
/* upon seeing CC_ACHUNG in this state: */ /* upon seeing CC_SUBSCRIBED_ACHUNG in this state: */
null, null,
/* upon seeing CC_DIGIT in this state: */ /* upon seeing CC_DIGIT in this state: */
new TransitionInstruction(STATE_DIGIT, new TransitionInstruction(STATE_DIGIT,
@ -209,48 +212,48 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants {
ACTION_CONTINUES_GRAPHEME_CLUSTER), ACTION_CONTINUES_GRAPHEME_CLUSTER),
/* upon seeing CC_0F39 in this state: */ /* upon seeing CC_0F39 in this state: */
null, null,
/* upon seeing CC_ACHUNG in this state: */ /* upon seeing CC_SUBSCRIBED_ACHUNG in this state: */
null, null,
/* upon seeing CC_DIGIT in this state: */ /* upon seeing CC_DIGIT in this state: */
null null
} }
}; };
DLC NOW -- clearly, we need LegalSyllable to be convertable to and from GraphemeClusters; /* DLC NOW FIXME -- clearly, we need LegalTshegBar to be convertable to and from UnicodeGraphemeClusters; */
/** Breaks a sequence of GraphemeClusters into LegalSyllables. /** Breaks a sequence of UnicodeGraphemeClusters into LegalTshegBars.
@param grcls a sequence of nonnull GraphemeClusters @param grcls a sequence of nonnull UnicodeGraphemeClusters
@return a sequence of nonnull LegalSyllables @return a sequence of nonnull LegalTshegBars
@exception TibetanSyntaxException if grcls does not consist @exception TibetanSyntaxException if grcls does not consist
entirely of legal Tibetan syllables entirely of legal Tibetan syllables
@see #GraphemeCluster @see UnicodeGraphemeCluster
@see #LegalSyllable @see LegalTshegBar
*/ */
private static Vector breakGraphemeClustersIntoOnlySyllables(Vector grcls) private static Vector breakGraphemeClustersIntoOnlyTshegBars(Vector grcls)
throws TibetanSyntaxException throws TibetanSyntaxException
{ {
return breakGraphemeClustersIntoSyllablesAndGraphemeClusters(grcls, return breakGraphemeClustersIntoTshegBarsAndGraphemeClusters(grcls,
true); true);
} }
private static Vector breakGraphemeClustersIntoOnlySyllables(Vector grcls) { private static Vector breakLegalGraphemeClustersIntoOnlyTshegBars(Vector grcls) {
try { try {
return breakGraphemeClustersIntoSyllablesAndGraphemeClusters(grcls, return breakGraphemeClustersIntoTshegBarsAndGraphemeClusters(grcls,
false); false);
} catch (TibetanSyntaxException) { } catch (TibetanSyntaxException ex) {
throw new Error("This can never happen, because the second parameter, validating, was false."); throw new Error("This can never happen, because the second parameter, validating, was false.");
} }
} }
/** /**
@param grcls a Vector consisting entirely of GraphemeClusters @param grcls a Vector consisting entirely of UnicodeGraphemeClusters
@param validate true iff you wish to have a @param validate true iff you wish to have a
TibetanSyntaxException thrown upon encountering a sequence of TibetanSyntaxException thrown upon encountering a sequence of
GraphemeClusters that is syntactically incorrect Tibetan UnicodeGraphemeClusters that is syntactically incorrect Tibetan
@return if validate is true, a Vector consisting entirely of @return if validate is true, a Vector consisting entirely of
LegalSyllables, else a vector of LegalSyllables and LegalTshegBars, else a vector of LegalTshegBars and
GraphemeClusters */ UnicodeGraphemeClusters */
private static Vector breakGraphemeClustersIntoSyllablesAndGraphemeClusters(Vector grcls, private static Vector breakGraphemeClustersIntoTshegBarsAndGraphemeClusters(Vector grcls,
boolean validate) boolean validate)
throws TibetanSyntaxException throws TibetanSyntaxException
{ {
@ -258,8 +261,8 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants {
int grcls_len = grcls.length(); int grcls_len = grcls.length();
int beginning_of_cluster = 0; int beginning_of_cluster = 0;
for (int i = 0; i < grcls_len; i++) { for (int i = 0; i < grcls_len; i++) {
GraphemeCluster current_grcl UnicodeGraphemeCluster current_grcl
= (GraphemeCluster)grcls.elementAt(i); = (UnicodeGraphemeCluster)grcls.elementAt(i);
if (current_grcl.isTshegLike()) { if (current_grcl.isTshegLike()) {
if (beginning_of_cluster < i) { if (beginning_of_cluster < i) {
// One or more non-tsheg-like grapheme clusters is // One or more non-tsheg-like grapheme clusters is
@ -269,7 +272,7 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants {
beginning_of_cluster, beginning_of_cluster,
i)) i))
{ {
syllables.add(new LegalSyllable(grcls, syllables.add(new LegalTshegBar(grcls,
beginning_of_cluster, beginning_of_cluster,
i, tsheg=current_grcl)); i, tsheg=current_grcl));
} }
@ -299,17 +302,17 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants {
} }
/** Breaks a string of perfectly-formed Unicode into /** Breaks a string of perfectly-formed Unicode into
GraphemeClusters. UnicodeGraphemeClusters.
@param nfthdl_unicode a String of NFTHDL-normalized Unicode @param nfthdl_unicode a String of NFTHDL-normalized Unicode
codepoints codepoints
@exception Exception if the input is not perfectly formed @exception Exception if the input is not perfectly formed
@return a vector of GraphemeClusters @return a vector of UnicodeGraphemeClusters
@see #GraphemeCluster @see UnicodeGraphemeCluster
*/ */
private static Vector nonErrorCorrectingReader(String nfthdl_unicode) private static Vector nonErrorCorrectingReader(String nfthdl_unicode)
throws Exception throws Exception
{ {
// a vector of GraphemeClusters that we build up little by // a vector of UnicodeGraphemeClusters that we build up little by
// little: // little:
Vector grcls = new Vector(); Vector grcls = new Vector();
int currentState = STATE_START; int currentState = STATE_START;
@ -326,7 +329,7 @@ class ValidatingUnicodeReader implements UnicodeReadingStateMachineConstants {
} else { } else {
switch (ti.getAction()) { switch (ti.getAction()) {
case ACTION_BEGINS_NEW_GRAPHEME_CLUSTER: case ACTION_BEGINS_NEW_GRAPHEME_CLUSTER:
grcls.add(new GraphemeCluster(holdingPen)); grcls.add(new UnicodeGraphemeCluster(holdingPen));
holdingPen = new StringBuffer(); holdingPen = new StringBuffer();
break; break;
case ACTION_CONTINUES_GRAPHEME_CLUSTER: case ACTION_CONTINUES_GRAPHEME_CLUSTER:

View file

@ -19,12 +19,12 @@ Contributor(s): ______________________________________.
package org.thdl.util; package org.thdl.util;
import java.io.*; import java.io.*;
/** Used by {@link LinkedList} to provide the implementation of a /** Used by {@link SimplifiedLinkedList} to provide the implementation of a
simple dynamic link list. simple dynamic link list.
@author Andr&eacute;s Montano Pellegrini @author Andr&eacute;s Montano Pellegrini
@see LinkedList @see SimplifiedLinkedList
@see ListIterator @see SimplifiedListIterator
*/ */
public class Link public class Link
@ -111,4 +111,4 @@ public class Link
else siguiente.insertSorted(link); else siguiente.insertSorted(link);
} }
} }